msi: Replace the string table hash with a sorted index.

Hans Leidekker hans at codeweavers.com
Tue Dec 15 02:51:20 CST 2009


Addresses a performance bottleneck in the Visual Studio 2005 installer.
See http://bugs.winehq.org/show_bug.cgi?id=14168
---
 dlls/msi/msipriv.h |    2 +-
 dlls/msi/string.c  |  147 ++++++++++++++++++++++++++++++++++++++--------------
 2 files changed, 109 insertions(+), 40 deletions(-)

diff --git a/dlls/msi/msipriv.h b/dlls/msi/msipriv.h
index 0e1a23b..ce03180 100644
--- a/dlls/msi/msipriv.h
+++ b/dlls/msi/msipriv.h
@@ -662,7 +662,7 @@ enum StringPersistence
 
 extern BOOL msi_addstringW( string_table *st, UINT string_no, const WCHAR *data, int len, UINT refcount, enum StringPersistence persistence );
 
-extern UINT msi_string2idW( const string_table *st, LPCWSTR buffer, UINT *id );
+extern UINT msi_string2idW( string_table *st, LPCWSTR buffer, UINT *id );
 extern VOID msi_destroy_stringtable( string_table *st );
 extern const WCHAR *msi_string_lookup_id( const string_table *st, UINT id );
 extern HRESULT msi_init_string_table( IStorage *stg );
diff --git a/dlls/msi/string.c b/dlls/msi/string.c
index e5b5cc2..684cae0 100644
--- a/dlls/msi/string.c
+++ b/dlls/msi/string.c
@@ -40,12 +40,11 @@
 
 WINE_DEFAULT_DEBUG_CHANNEL(msidb);
 
-#define HASH_SIZE 0x101
 #define LONG_STR_BYTES 3
 
 typedef struct _msistring
 {
-    int hash_next;
+    UINT id;
     UINT persistent_refcount;
     UINT nonpersistent_refcount;
     LPWSTR str;
@@ -56,30 +55,15 @@ struct string_table
     UINT maxcount;         /* the number of strings */
     UINT freeslot;
     UINT codepage;
-    int hash[HASH_SIZE];
-    msistring *strings; /* an array of strings (in the tree) */
+    UINT sortcount;
+    BOOL valid_index;
+    msistring *strings;       /* array of strings */
+    const msistring **sorted; /* sorted index */
 };
 
-static UINT msistring_makehash( const WCHAR *str )
-{
-    UINT hash = 0;
-
-    if (str==NULL)
-        return hash;
-
-    while( *str )
-    {
-        hash ^= *str++;
-        hash *= 53;
-        hash = (hash<<5) | (hash>>27);
-    }
-    return hash % HASH_SIZE;
-}
-
 static string_table *init_stringtable( int entries, UINT codepage )
 {
     string_table *st;
-    int i;
 
     if (codepage != CP_ACP && !IsValidCodePage(codepage))
     {
@@ -92,18 +76,27 @@ static string_table *init_stringtable( int entries, UINT codepage )
         return NULL;    
     if( entries < 1 )
         entries = 1;
+
     st->strings = msi_alloc_zero( sizeof (msistring) * entries );
     if( !st->strings )
     {
         msi_free( st );
         return NULL;    
     }
+
+    st->sorted = msi_alloc( sizeof (msistring *) * entries );
+    if( !st->sorted )
+    {
+        msi_free( st->strings );
+        msi_free( st );
+        return NULL;    
+    }
+
     st->maxcount = entries;
     st->freeslot = 1;
     st->codepage = codepage;
-
-    for( i=0; i<HASH_SIZE; i++ )
-        st->hash[i] = -1;
+    st->sortcount = 0;
+    st->valid_index = FALSE;
 
     return st;
 }
@@ -119,13 +112,30 @@ VOID msi_destroy_stringtable( string_table *st )
             msi_free( st->strings[i].str );
     }
     msi_free( st->strings );
+    msi_free( st->sorted );
     msi_free( st );
 }
 
+static int string_compare( const void *a, const void *b )
+{
+    const msistring * const *s1 = a;
+    const msistring * const *s2 = b;
+    return lstrcmpW( (*s1)->str, (*s2)->str );
+}
+
+static void rebuild_index( string_table *st )
+{
+    if (st->sortcount > 1)
+        qsort( st->sorted, st->sortcount, sizeof(msistring *), string_compare );
+
+    st->valid_index = TRUE;
+}
+
 static int st_find_free_entry( string_table *st )
 {
     UINT i, sz;
     msistring *p;
+    const msistring **s;
 
     TRACE("%p\n", st);
 
@@ -146,7 +156,24 @@ static int st_find_free_entry( string_table *st )
     p = msi_realloc_zero( st->strings, sz*sizeof(msistring) );
     if( !p )
         return -1;
-    st->strings = p;
+
+    s = msi_realloc( st->sorted, sz*sizeof(msistring *) );
+    if( !s )
+    {
+        msi_free( p );
+        return -1;
+    }
+    st->sorted = s;
+
+    if( p != st->strings )
+    { 
+        for( i = 0; i < st->sortcount; i++ )
+            st->sorted[i] = &p[i + 1]; 
+
+        st->valid_index = FALSE;
+        st->strings = p;
+    }
+
     st->freeslot = st->maxcount;
     st->maxcount = sz;
     if( st->strings[st->freeslot].persistent_refcount ||
@@ -155,10 +182,51 @@ static int st_find_free_entry( string_table *st )
     return st->freeslot;
 }
 
-static void set_st_entry( string_table *st, UINT n, LPWSTR str, UINT refcount, enum StringPersistence persistence )
+static int find_insert_index( const string_table *st, const msistring *string )
+{
+    int i, c, low = 0, high = st->sortcount - 1;
+
+    while (low <= high)
+    {
+        i = (low + high) / 2;
+        c = lstrcmpW( string->str, st->sorted[i]->str );
+
+        if (c < 0)
+            high = i - 1;
+        else if (c > 0)
+            low = i + 1;
+        else
+            return -1; /* already exists */
+    }
+
+    if (c > 0) return i + 1;
+    return i;
+}
+
+static void insert_string_sorted( string_table *st, const msistring *string )
 {
-    UINT hash = msistring_makehash( str );
+    int i;
 
+    if (!st->sortcount)
+    {
+        st->sorted[st->sortcount++] = string;
+        return;
+    }
+
+    if (!st->valid_index)
+        rebuild_index( st );
+
+    i = find_insert_index( st, string );
+    if (i == -1)
+        return;
+
+    memmove( &st->sorted[i] + 1, &st->sorted[i], (st->sortcount - i) * sizeof(msistring *) );
+    st->sorted[i] = string;
+    st->sortcount++;
+}
+
+static void set_st_entry( string_table *st, UINT n, LPWSTR str, UINT refcount, enum StringPersistence persistence )
+{
     if (persistence == StringPersistent)
     {
         st->strings[n].persistent_refcount = refcount;
@@ -171,15 +239,15 @@ static void set_st_entry( string_table *st, UINT n, LPWSTR str, UINT refcount, e
     }
 
     st->strings[n].str = str;
+    st->strings[n].id = n;
 
-    st->strings[n].hash_next = st->hash[hash];
-    st->hash[hash] = n;
+    insert_string_sorted( st, &st->strings[n] );
 
     if( n < st->maxcount )
         st->freeslot = n + 1;
 }
 
-static UINT msi_string2idA( const string_table *st, LPCSTR buffer, UINT *id )
+static UINT msi_string2idA( string_table *st, LPCSTR buffer, UINT *id )
 {
     DWORD sz;
     UINT r = ERROR_INVALID_PARAMETER;
@@ -380,18 +448,19 @@ static UINT msi_id2stringA( const string_table *st, UINT id, LPSTR buffer, UINT
  *  [in] str        - string to find in the string table
  *  [out] id        - id of the string, if found
  */
-UINT msi_string2idW( const string_table *st, LPCWSTR str, UINT *id )
+UINT msi_string2idW( string_table *st, LPCWSTR str, UINT *id )
 {
-    UINT n, hash = msistring_makehash( str );
-    msistring *se = st->strings;
+    msistring **res, key, *k = &key;
+
+    if (!st->valid_index)
+        rebuild_index( st );
 
-    for (n = st->hash[hash]; n != -1; n = st->strings[n].hash_next )
+    key.str = (WCHAR *)str;
+    res = bsearch( &k, st->sorted, st->sortcount, sizeof(msistring *), string_compare );
+    if( res )
     {
-        if ((str == se[n].str) || !lstrcmpW(str, se[n].str))
-        {
-            *id = n;
-            return ERROR_SUCCESS;
-        }
+        *id = (*res)->id;
+        return ERROR_SUCCESS;
     }
 
     return ERROR_INVALID_PARAMETER;
-- 
1.6.3.3




More information about the wine-patches mailing list