hhctrl.ocx: Parse HTML entities in the table of contents.

Robert Shearman rob at codeweavers.com
Sat May 26 03:04:20 CDT 2007


---
  dlls/hhctrl.ocx/content.c |   82 
+++++++++++++++++++++++++++++++++++++++++++++
  1 files changed, 82 insertions(+), 0 deletions(-)

Take 2. With suggestions from Alexandre.
-------------- next part --------------
diff --git a/dlls/hhctrl.ocx/content.c b/dlls/hhctrl.ocx/content.c
index a34c853..ebbfe98 100644
--- a/dlls/hhctrl.ocx/content.c
+++ b/dlls/hhctrl.ocx/content.c
@@ -178,6 +178,87 @@ static const char *get_attr(const char *
     return ptr;
 }
 
+struct character_ref
+{
+    WCHAR name[7];
+    WCHAR character;
+};
+
+static void parse_html_entities(WCHAR *text)
+{
+    WCHAR *p;
+    while ((p = strchrW(text, '&')) != NULL)
+    {
+        WCHAR *start = p;
+        p++;
+        if (*p == '#')
+        {
+            WCHAR ch;
+            p++;
+            if ((*p == 'X') || (*p == 'x'))
+            {
+                /* hexadecimal entity */
+                while ((*p >= '0' && *p <= '9') || (*p >= 'a' && *p <= 'f') ||
+                       (*p >= 'A' && *p <= 'F'))
+                    p++;
+                ch = strtolW(start + 2, NULL, 16);
+            }
+            else
+            {
+                /* decimal entity */
+                while ((*p >= '0' && *p <= '9'))
+                    p++;
+                ch = atoiW(start + 2);
+            }
+            if (ch)
+            {
+                if (*p == ';') p++;
+                /* replace '&' with generated character */
+                *start = ch;
+                /* replace everything after '&' with everything after ';' */
+                memmove(start + 1, p, (strlenW(p) + 1) * sizeof(WCHAR));
+            }
+        }
+        else
+        {
+            static const struct character_ref char_refs[] =
+            {
+                {{'n','b','s','p',0}, 160},
+                {{'i','e','x','c','l',0}, 161},
+                {{'c','e','n','t',0}, 162},
+                {{'p','o','u','n','d',0}, 163},
+                {{'c','u','r','r','e','n',0}, 164},
+                {{'y','e','n',0}, 165},
+                {{'b','r','v','b','a','r',0}, 166},
+                {{'q','u','o','t',0}, 34},
+                {{'a','m','p',0}, 38},
+                {{'l','t',0}, 60},
+                {{'g','t',0}, 62},
+            };
+            int i = sizeof(char_refs)/sizeof(char_refs[0]);
+
+            while (isalphaW(*p))
+                p++;
+
+            if (p - start - 1 <= sizeof(char_refs[0].name))
+            {
+                for (i = 0; i < sizeof(char_refs)/sizeof(char_refs[0]); i++)
+                    if (!strncmpW(char_refs[i].name, start + 1, p - start - 1))
+                        break;
+            }
+
+            if (i == sizeof(char_refs)/sizeof(char_refs[0]))
+            {
+                FIXME("character entity %s not found\n", debugstr_wn(start + 1, p - start - 1));
+                continue;
+            }
+            if (*p == ';') p++;
+            *start = char_refs[i].character;
+            memmove(start + 1, p, (strlenW(p) + 1) * sizeof(WCHAR));
+        }
+    }
+}
+
 static void parse_obj_node_param(ContentItem *item, ContentItem *hhc_root, const char *text)
 {
     const char *ptr;
@@ -211,6 +292,7 @@ static void parse_obj_node_param(Content
     *param = hhctrl_alloc((wlen+1)*sizeof(WCHAR));
     MultiByteToWideChar(CP_ACP, 0, ptr, len, *param, wlen);
     (*param)[wlen] = 0;
+    parse_html_entities(*param);
 
     if(param == &merge) {
         SetChmPath(&item->merge, hhc_root->merge.chm_file, merge);


More information about the wine-patches mailing list