[PATCH v2 5/6] dlls/dbghelp{pdb}: ensure dbghelp's list order in hash table matches PDB's

Eric Pouech wine at gitlab.winehq.org
Wed May 25 10:43:56 CDT 2022


From: Eric Pouech <eric.pouech at gmail.com>

The PDB types can contain several times the same type definition. It seems
to appear when modifying a type (like adding new fields to a struct):
- as the PDB file (generated from first compilation) is updated (and not
  fully rewritten), the debug information for the old type is not flushed;
  a new record (for the same struct name) is emitted, and inserted
  before the old one in the hash table (bucket list).

We must hence preserve that order in bucket list: even if dbghelp's hash
table is different from pdb's internal one (ie number of buckets & bucket
lists are different), this implies that new type information must appear
before the old on in the bucket list)

This order is also important as:
- forward definition always should be resolved with the new type
  (resolution is based on name)
- search by type name must return the new type

This patch fixes builtin dbghelp to implement this behavior.

This patch uses the pdb hash table and relies on it to preserve he partial
order described above. This pdb hash table is also used for resolving
forwards (now always picking the good one).

Signed-off-by: Eric Pouech <eric.pouech at gmail.com>
---
 dlls/dbghelp/msc.c | 296 +++++++++++++++++++++++++++++++++------------
 1 file changed, 217 insertions(+), 79 deletions(-)

diff --git a/dlls/dbghelp/msc.c b/dlls/dbghelp/msc.c
index dfb5cac94aa..e881c1d234e 100644
--- a/dlls/dbghelp/msc.c
+++ b/dlls/dbghelp/msc.c
@@ -555,12 +555,21 @@ static struct symt*  codeview_get_type(unsigned int typeno, BOOL quiet)
     return symt;
 }
 
+struct hash_link
+{
+    unsigned            id;
+    struct hash_link*   next;
+};
+
 struct codeview_type_parse
 {
     struct module*      module;
     PDB_TYPES           header;
     const BYTE*         table;
-    const DWORD*        offset;
+    const DWORD*        offset;      /* typeindex => offset in 'table' */
+    BYTE*               hash_stream; /* content of stream header.hash_file */
+    struct hash_link**  hash;        /* hash_table (deserialized from PDB hash table) */
+    struct hash_link*   alloc_hash;  /* allocated hash_link (id => hash_link) */
 };
 
 static inline const void* codeview_jump_to_type(const struct codeview_type_parse* ctp, DWORD idx)
@@ -615,16 +624,6 @@ static struct symt* codeview_parse_one_type(struct codeview_type_parse* ctp,
                                             unsigned curr_type,
                                             const union codeview_type* type);
 
-static void* codeview_cast_symt(struct symt* symt, enum SymTagEnum tag)
-{
-    if (symt->tag != tag)
-    {
-        FIXME("Bad tag. Expected %d, but got %d\n", tag, symt->tag);
-        return NULL;
-    }   
-    return symt;
-}
-
 static struct symt* codeview_fetch_type(struct codeview_type_parse* ctp,
                                         unsigned typeno)
 {
@@ -661,6 +660,139 @@ static BOOL codeview_is_remapable_type(const union codeview_type* type)
     }
 }
 
+static BOOL codeview_type_is_forward(const union codeview_type* cvtype)
+{
+    cv_property_t property;
+
+    switch (cvtype->generic.id)
+    {
+    case LF_STRUCTURE_V1:
+    case LF_CLASS_V1:     property = cvtype->struct_v1.property;       break;
+    case LF_STRUCTURE_V2:
+    case LF_CLASS_V2:     property = cvtype->struct_v2.property;       break;
+    case LF_STRUCTURE_V3:
+    case LF_CLASS_V3:     property = cvtype->struct_v3.property;       break;
+    case LF_UNION_V1:     property = cvtype->union_v1.property;        break;
+    case LF_UNION_V2:     property = cvtype->union_v2.property;        break;
+    case LF_UNION_V3:     property = cvtype->union_v3.property;        break;
+    case LF_ENUM_V1:      property = cvtype->enumeration_v1.property;  break;
+    case LF_ENUM_V2:      property = cvtype->enumeration_v1.property;  break;
+    case LF_ENUM_V3:      property = cvtype->enumeration_v1.property;  break;
+    default:
+        return FALSE;
+    }
+    return property.is_forward_defn;
+}
+
+static BOOL codeview_type_extract_name(const union codeview_type* cvtype,
+                                       const char** name, unsigned* len, const char** decorated_name)
+{
+    int value, leaf_len;
+    const struct p_string* p_name = NULL;
+    const char* c_name = NULL;
+    BOOL decorated = FALSE;
+
+    switch (cvtype->generic.id)
+    {
+    case LF_STRUCTURE_V1:
+    case LF_CLASS_V1:
+        leaf_len = numeric_leaf(&value, &cvtype->struct_v1.structlen);
+        p_name = (const struct p_string*)((const unsigned char*)&cvtype->struct_v1.structlen + leaf_len);
+        break;
+    case LF_STRUCTURE_V2:
+    case LF_CLASS_V2:
+        leaf_len = numeric_leaf(&value, &cvtype->struct_v2.structlen);
+        p_name = (const struct p_string*)((const unsigned char*)&cvtype->struct_v2.structlen + leaf_len);
+        break;
+    case LF_STRUCTURE_V3:
+    case LF_CLASS_V3:
+        leaf_len = numeric_leaf(&value, &cvtype->struct_v3.structlen);
+        c_name = (const char*)&cvtype->struct_v3.structlen + leaf_len;
+        decorated = cvtype->struct_v3.property.has_decorated_name;
+        break;
+    case LF_UNION_V1:
+        leaf_len = numeric_leaf(&value, &cvtype->union_v1.un_len);
+        p_name = (const struct p_string*)((const unsigned char*)&cvtype->union_v1.un_len + leaf_len);
+        break;
+    case LF_UNION_V2:
+        leaf_len = numeric_leaf(&value, &cvtype->union_v2.un_len);
+        p_name = (const struct p_string*)((const unsigned char*)&cvtype->union_v2.un_len + leaf_len);
+        break;
+    case LF_UNION_V3:
+        leaf_len = numeric_leaf(&value, &cvtype->union_v3.un_len);
+        c_name = (const char*)&cvtype->union_v3.un_len + leaf_len;
+        decorated = cvtype->union_v3.property.has_decorated_name;
+        break;
+    case LF_ENUM_V1:
+        p_name = &cvtype->enumeration_v1.p_name;
+        break;
+    case LF_ENUM_V2:
+        p_name = &cvtype->enumeration_v2.p_name;
+        break;
+    case LF_ENUM_V3:
+        c_name = cvtype->enumeration_v3.name;
+        decorated = cvtype->union_v3.property.has_decorated_name;
+        break;
+    default:
+        return FALSE;
+    }
+    if (p_name)
+    {
+        *name = p_name->name;
+        *len = p_name->namelen;
+        *decorated_name = NULL;
+        return TRUE;
+    }
+    if (c_name)
+    {
+        *name = c_name;
+        *len = strlen(c_name);
+        *decorated_name = decorated ? (c_name + *len + 1) : NULL;
+        return TRUE;
+    }
+    return FALSE;
+}
+
+static unsigned pdb_read_hash_value(const struct codeview_type_parse* ctp, unsigned idx)
+{
+    const void* where = ctp->hash_stream + ctp->header.hash_offset + (idx - ctp->header.first_index) * ctp->header.hash_size;
+    switch (ctp->header.hash_size)
+    {
+    case 2: return *(unsigned short*)where;
+    case 4: return *(unsigned*)where;
+    }
+    return 0;
+}
+
+static BOOL codeview_resolve_forward_type(struct codeview_type_parse* ctp, const union codeview_type* cvref,
+                                          unsigned reftype, unsigned *realtype)
+{
+    const union codeview_type* cvdecl;
+    struct hash_link* hl;
+
+    for (hl = ctp->hash[pdb_read_hash_value(ctp, reftype)]; hl; hl = hl->next)
+    {
+        if (hl->id == reftype) break; /* no better choice */
+        cvdecl = codeview_jump_to_type(ctp, hl->id);
+        if (cvdecl && !codeview_type_is_forward(cvdecl) && cvref->generic.id == cvdecl->generic.id)
+        {
+            const char* nameref, * namedecl, * decoratedref, * decorateddecl;
+            unsigned lenref, lendecl;
+
+            if (codeview_type_extract_name(cvref,  &nameref,  &lenref,  &decoratedref) &&
+                codeview_type_extract_name(cvdecl, &namedecl, &lendecl, &decorateddecl) &&
+                ((decoratedref && decorateddecl && !strcmp(decoratedref, decorateddecl)) ||
+                 (!decoratedref && !decorateddecl && lenref == lendecl && !memcmp(nameref, namedecl, lenref))))
+            {
+                TRACE("mapping forward type %.*s (%s) %x into %x\n", lenref, nameref, debugstr_a(decoratedref), reftype, hl->id);
+                *realtype = hl->id;
+                return TRUE;
+            }
+        }
+    }
+    return FALSE;
+}
+
 static struct symt* codeview_add_type_pointer(struct codeview_type_parse* ctp,
                                               unsigned int pointee_type)
 {
@@ -1001,49 +1133,6 @@ static int codeview_add_type_struct_field_list(struct codeview_type_parse* ctp,
     return TRUE;
 }
 
-static struct symt* codeview_add_type_struct(struct codeview_type_parse* ctp,
-                                             const char* name, int structlen,
-                                             enum UdtKind kind, cv_property_t property)
-{
-    struct symt_udt*    symt;
-    struct symt*        existing = NULL;
-
-    /* if we don't have an existing type, try to find one with same name
-     * FIXME: what to do when several types in different CUs have same name ?
-     */
-    void*                       ptr;
-    struct symt_ht*             type;
-    struct hash_table_iter      hti;
-
-    hash_table_iter_init(&ctp->module->ht_types, &hti, name);
-    while ((ptr = hash_table_iter_up(&hti)))
-    {
-        type = CONTAINING_RECORD(ptr, struct symt_ht, hash_elt);
-
-        if (type->symt.tag == SymTagUDT &&
-            type->hash_elt.name && !strcmp(type->hash_elt.name, name))
-        {
-            existing = &type->symt;
-            break;
-        }
-    }
-    if (existing)
-    {
-        if (!(symt = codeview_cast_symt(existing, SymTagUDT))) return NULL;
-        /* should also check that all fields are the same */
-        if (!property.is_forward_defn)
-        {
-            if (!symt->size) /* likely prior forward declaration, set UDT size */
-                symt_set_udt_size(ctp->module, symt, structlen);
-            else /* different UDT with same name, create a new type */
-                existing = NULL;
-        }
-    }
-    if (!existing) symt = symt_new_udt(ctp->module, name, structlen, kind);
-
-    return &symt->symt;
-}
-
 static struct symt* codeview_new_func_signature(struct codeview_type_parse* ctp,
                                                 enum CV_call_e call_conv)
 {
@@ -1271,54 +1360,59 @@ static struct symt* codeview_parse_remapable_type(struct codeview_type_parse* ct
     const struct p_string*      p_name;
     const char*                 c_name;
 
+    if (codeview_type_is_forward(type))
+    {
+        unsigned impl_type;
+        if (codeview_resolve_forward_type(ctp, type, curr_type, &impl_type))
+        {
+            if (!(symt = codeview_get_type(impl_type, TRUE)))
+                FIXME("forward def of %x => %x doesn't have impl\n", curr_type, impl_type);
+            return codeview_add_type(curr_type, symt) ? symt : NULL;
+        }
+        /* else forward type definition without implementation, create empty Udt */
+    }
     switch (type->generic.id)
     {
     case LF_STRUCTURE_V1:
     case LF_CLASS_V1:
         leaf_len = numeric_leaf(&value, &type->struct_v1.structlen);
         p_name = (const struct p_string*)((const unsigned char*)&type->struct_v1.structlen + leaf_len);
-        symt = codeview_add_type_struct(ctp, terminate_string(p_name), value,
-                                        type->generic.id == LF_CLASS_V1 ? UdtClass : UdtStruct,
-                                        type->struct_v1.property);
+        symt = &symt_new_udt(ctp->module, terminate_string(p_name), value,
+                             type->generic.id == LF_CLASS_V1 ? UdtClass : UdtStruct)->symt;
         break;
 
     case LF_STRUCTURE_V2:
     case LF_CLASS_V2:
         leaf_len = numeric_leaf(&value, &type->struct_v2.structlen);
         p_name = (const struct p_string*)((const unsigned char*)&type->struct_v2.structlen + leaf_len);
-        symt = codeview_add_type_struct(ctp, terminate_string(p_name), value,
-                                        type->generic.id == LF_CLASS_V2 ? UdtClass : UdtStruct,
-                                        type->struct_v2.property);
+        symt = &symt_new_udt(ctp->module, terminate_string(p_name), value,
+                             type->generic.id == LF_CLASS_V2 ? UdtClass : UdtStruct)->symt;
         break;
 
     case LF_STRUCTURE_V3:
     case LF_CLASS_V3:
         leaf_len = numeric_leaf(&value, &type->struct_v3.structlen);
         c_name = (const char*)&type->struct_v3.structlen + leaf_len;
-        symt = codeview_add_type_struct(ctp, c_name, value,
-                                        type->generic.id == LF_CLASS_V3 ? UdtClass : UdtStruct,
-                                        type->struct_v3.property);
+        symt = &symt_new_udt(ctp->module, c_name, value,
+                             type->generic.id == LF_CLASS_V3 ? UdtClass : UdtStruct)->symt;
         break;
 
     case LF_UNION_V1:
         leaf_len = numeric_leaf(&value, &type->union_v1.un_len);
         p_name = (const struct p_string*)((const unsigned char*)&type->union_v1.un_len + leaf_len);
-        symt = codeview_add_type_struct(ctp, terminate_string(p_name),
-                                        value, UdtUnion, type->union_v1.property);
+        symt = &symt_new_udt(ctp->module, terminate_string(p_name), value, UdtUnion)->symt;
         break;
 
     case LF_UNION_V2:
         leaf_len = numeric_leaf(&value, &type->union_v2.un_len);
         p_name = (const struct p_string*)((const unsigned char*)&type->union_v2.un_len + leaf_len);
-        symt = codeview_add_type_struct(ctp, terminate_string(p_name),
-                                        value, UdtUnion, type->union_v2.property);
+        symt = &symt_new_udt(ctp->module, terminate_string(p_name), value, UdtUnion)->symt;
         break;
 
     case LF_UNION_V3:
         leaf_len = numeric_leaf(&value, &type->union_v3.un_len);
         c_name = (const char*)&type->union_v3.un_len + leaf_len;
-        symt = codeview_add_type_struct(ctp, c_name,
-                                        value, UdtUnion, type->union_v3.property);
+        symt = &symt_new_udt(ctp->module, c_name, value, UdtUnion)->symt;
         break;
 
     case LF_ENUM_V1:
@@ -1356,7 +1450,7 @@ static BOOL codeview_is_top_level_type(const union codeview_type* type)
 
 static BOOL codeview_parse_type_table(struct codeview_type_parse* ctp)
 {
-    unsigned int                curr_type;
+    unsigned int                i, curr_type;
     const union codeview_type*  type;
 
     cv_current_module->first_type_index = ctp->header.first_index;
@@ -1366,12 +1460,18 @@ static BOOL codeview_parse_type_table(struct codeview_type_parse* ctp)
 
     /* phase I: only load remapable types (struct/class/union/enum), but without their content
      *          handle also forward declarations
+     *          - do it in the order generated from PDB hash table to preserve that order
+     *            (dbghelp hash table inserts new elements at the end of bucket's list)
      */
-    for (curr_type = ctp->header.first_index; curr_type < ctp->header.last_index; curr_type++)
+    for (i = 0; i < ctp->header.hash_num_buckets; i++)
     {
-        type = codeview_jump_to_type(ctp, curr_type);
-        if (codeview_is_top_level_type(type))
-            codeview_parse_remapable_type(ctp, curr_type, type);
+        struct hash_link* hl;
+        for (hl = ctp->hash[i]; hl; hl = hl->next)
+        {
+            type = codeview_jump_to_type(ctp, hl->id);
+            if (codeview_is_top_level_type(type))
+                codeview_parse_remapable_type(ctp, hl->id, type);
+        }
     }
     /* phase II: non remapable types: load (but since they can be indirectly
      *           loaded (from another type), don't load them twice)
@@ -3031,7 +3131,16 @@ static HANDLE map_pdb_file(const struct process* pcs,
     return hMap;
 }
 
+static void pdb_dispose_type_parse(struct codeview_type_parse* ctp)
+{
+    pdb_free(ctp->hash_stream);
+    free((DWORD*)ctp->offset);
+    free((DWORD*)ctp->hash);
+    free((DWORD*)ctp->alloc_hash);
+}
+
 static BOOL pdb_init_type_parse(const struct msc_debug_info* msc_dbg,
+                                const struct pdb_file_info* pdb_file,
                                 struct codeview_type_parse* ctp,
                                 BYTE* image)
 {
@@ -3039,6 +3148,10 @@ static BOOL pdb_init_type_parse(const struct msc_debug_info* msc_dbg,
     DWORD* offset;
     int i;
 
+    ctp->hash_stream = NULL;
+    ctp->offset = NULL;
+    ctp->hash = NULL;
+    ctp->alloc_hash = NULL;
     pdb_convert_types_header(&ctp->header, image);
 
     /* Check for unknown versions */
@@ -3054,6 +3167,17 @@ static BOOL pdb_init_type_parse(const struct msc_debug_info* msc_dbg,
         ERR("-Unknown type info version %d\n", ctp->header.version);
         return FALSE;
     }
+    if (ctp->header.hash_size != 2 && ctp->header.hash_size != 4)
+    {
+        ERR("-Unsupported hash of size %u\n", ctp->header.hash_size);
+        return FALSE;
+    }
+    ctp->hash_stream = pdb_read_file(pdb_file, ctp->header.hash_file);
+    if (!ctp->hash_stream)
+    {
+        ERR("-Missing hash table in PDB file\n");
+        return FALSE;
+    }
 
     ctp->module = msc_dbg->module;
     /* reconstruct the types offset...
@@ -3061,8 +3185,8 @@ static BOOL pdb_init_type_parse(const struct msc_debug_info* msc_dbg,
      * (not all the indexes are present, so it requires search in table +
      * linear search from previous index...)
      */
-    offset = HeapAlloc(GetProcessHeap(), 0, sizeof(DWORD) * (ctp->header.last_index - ctp->header.first_index));
-    if (!offset) return FALSE;
+    offset = malloc(sizeof(DWORD) * (ctp->header.last_index - ctp->header.first_index));
+    if (!offset) goto oom;
     ctp->table = ptr = image + ctp->header.type_offset;
     for (i = ctp->header.first_index; i < ctp->header.last_index; i++)
     {
@@ -3070,7 +3194,21 @@ static BOOL pdb_init_type_parse(const struct msc_debug_info* msc_dbg,
         ptr += ((const union codeview_type*)ptr)->generic.len + 2;
     }
     ctp->offset = offset;
+    ctp->hash = calloc(ctp->header.hash_num_buckets, sizeof(struct hash_link*));
+    if (!ctp->hash) goto oom;
+    ctp->alloc_hash = calloc(ctp->header.last_index - ctp->header.first_index, sizeof(struct hash_link));
+    if (!ctp->alloc_hash) goto oom;
+    for (i = ctp->header.first_index; i < ctp->header.last_index; i++)
+    {
+        unsigned hash_i = pdb_read_hash_value(ctp, i);
+        ctp->alloc_hash[i - ctp->header.first_index].id = i;
+        ctp->alloc_hash[i - ctp->header.first_index].next = ctp->hash[hash_i];
+        ctp->hash[hash_i] = &ctp->alloc_hash[i - ctp->header.first_index];
+    }
     return TRUE;
+oom:
+    pdb_dispose_type_parse(ctp);
+    return FALSE;
 }
 
 static void pdb_process_types(const struct msc_debug_info* msc_dbg,
@@ -3081,11 +3219,11 @@ static void pdb_process_types(const struct msc_debug_info* msc_dbg,
 
     if (types_image)
     {
-        if (pdb_init_type_parse(msc_dbg, &ctp, types_image))
+        if (pdb_init_type_parse(msc_dbg, pdb_file, &ctp, types_image))
         {
             /* Read type table */
             codeview_parse_type_table(&ctp);
-            HeapFree(GetProcessHeap(), 0, (DWORD*)ctp.offset);
+            pdb_dispose_type_parse(&ctp);
         }
         pdb_free(types_image);
     }
@@ -3369,7 +3507,7 @@ static BOOL pdb_process_internal(const struct process* pcs,
         pdb_process_types(msc_dbg, pdb_file);
 
         ipi_image = pdb_read_file(pdb_file, 4);
-        ipi_ok = pdb_init_type_parse(msc_dbg, &ipi_ctp, ipi_image);
+        ipi_ok = pdb_init_type_parse(msc_dbg, pdb_file, &ipi_ctp, ipi_image);
 
         /* Read global symbol table */
         globalimage = pdb_read_file(pdb_file, symbols.gsym_file);
-- 
GitLab


https://gitlab.winehq.org/wine/wine/-/merge_requests/73



More information about the wine-devel mailing list