msi: Add binary search to the string table.

Hans Leidekker hans at codeweavers.com
Mon Dec 14 04:54:09 CST 2009


Addresses a performance bottleneck in the Visual Studio 2005 installer.
See http://bugs.winehq.org/show_bug.cgi?id=14168
---
 dlls/msi/msipriv.h |    2 +-
 dlls/msi/string.c  |  131 ++++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 123 insertions(+), 10 deletions(-)

diff --git a/dlls/msi/msipriv.h b/dlls/msi/msipriv.h
index 0e1a23b..ce03180 100644
--- a/dlls/msi/msipriv.h
+++ b/dlls/msi/msipriv.h
@@ -662,7 +662,7 @@ enum StringPersistence
 
 extern BOOL msi_addstringW( string_table *st, UINT string_no, const WCHAR *data, int len, UINT refcount, enum StringPersistence persistence );
 
-extern UINT msi_string2idW( const string_table *st, LPCWSTR buffer, UINT *id );
+extern UINT msi_string2idW( string_table *st, LPCWSTR buffer, UINT *id );
 extern VOID msi_destroy_stringtable( string_table *st );
 extern const WCHAR *msi_string_lookup_id( const string_table *st, UINT id );
 extern HRESULT msi_init_string_table( IStorage *stg );
diff --git a/dlls/msi/string.c b/dlls/msi/string.c
index e5b5cc2..3ddb157 100644
--- a/dlls/msi/string.c
+++ b/dlls/msi/string.c
@@ -49,6 +49,7 @@ typedef struct _msistring
     UINT persistent_refcount;
     UINT nonpersistent_refcount;
     LPWSTR str;
+    UINT id;
 } msistring;
 
 struct string_table
@@ -56,8 +57,11 @@ struct string_table
     UINT maxcount;         /* the number of strings */
     UINT freeslot;
     UINT codepage;
+    UINT sortcount;
+    BOOL valid_index;
     int hash[HASH_SIZE];
     msistring *strings; /* an array of strings (in the tree) */
+    const msistring **sorted; /* index */
 };
 
 static UINT msistring_makehash( const WCHAR *str )
@@ -92,15 +96,27 @@ static string_table *init_stringtable( int entries, UINT codepage )
         return NULL;    
     if( entries < 1 )
         entries = 1;
+
     st->strings = msi_alloc_zero( sizeof (msistring) * entries );
     if( !st->strings )
     {
         msi_free( st );
         return NULL;    
     }
+
+    st->sorted = msi_alloc( sizeof (msistring *) * entries );
+    if( !st->sorted )
+    {
+        msi_free( st->strings );
+        msi_free( st );
+        return NULL;    
+    }
+
     st->maxcount = entries;
     st->freeslot = 1;
     st->codepage = codepage;
+    st->sortcount = 0;
+    st->valid_index = FALSE;
 
     for( i=0; i<HASH_SIZE; i++ )
         st->hash[i] = -1;
@@ -119,13 +135,30 @@ VOID msi_destroy_stringtable( string_table *st )
             msi_free( st->strings[i].str );
     }
     msi_free( st->strings );
+    msi_free( st->sorted );
     msi_free( st );
 }
 
+static int string_compare( const void *a, const void *b )
+{
+    const msistring * const *s1 = a;
+    const msistring * const *s2 = b;
+    return lstrcmpW( (*s1)->str, (*s2)->str );
+}
+
+static void rebuild_index( string_table *st )
+{
+    if (st->sortcount > 1)
+        qsort( st->sorted, st->sortcount, sizeof(msistring *), string_compare );
+
+    st->valid_index = TRUE;
+}
+
 static int st_find_free_entry( string_table *st )
 {
     UINT i, sz;
     msistring *p;
+    const msistring **s;
 
     TRACE("%p\n", st);
 
@@ -146,7 +179,24 @@ static int st_find_free_entry( string_table *st )
     p = msi_realloc_zero( st->strings, sz*sizeof(msistring) );
     if( !p )
         return -1;
-    st->strings = p;
+
+    s = msi_realloc( st->sorted, sz*sizeof(msistring *) );
+    if( !s )
+    {
+        msi_free( p );
+        return -1;
+    }
+    st->sorted = s;
+
+    if( p != st->strings )
+    { 
+        for( i = 0; i < st->sortcount; i++ )
+            st->sorted[i] = &p[i + 1]; 
+
+        st->valid_index = FALSE;
+        st->strings = p;
+    }
+
     st->freeslot = st->maxcount;
     st->maxcount = sz;
     if( st->strings[st->freeslot].persistent_refcount ||
@@ -155,6 +205,49 @@ static int st_find_free_entry( string_table *st )
     return st->freeslot;
 }
 
+static int find_insert_index( const string_table *st, const msistring *string )
+{
+    int i, c, low = 0, high = st->sortcount - 1;
+
+    while (low <= high)
+    {
+        i = (low + high) / 2;
+        c = lstrcmpW( string->str, st->sorted[i]->str );
+
+        if (c < 0)
+            high = i - 1;
+        else if (c > 0)
+            low = i + 1;
+        else
+            return -1; /* already exists */
+    }
+
+    if (c > 0) return i + 1;
+    return i;
+}
+
+static void insert_string_sorted( string_table *st, const msistring *string )
+{
+    int i;
+
+    if (!st->sortcount)
+    {
+        st->sorted[st->sortcount++] = string;
+        return;
+    }
+
+    if (!st->valid_index)
+        rebuild_index( st );
+
+    i = find_insert_index( st, string );
+    if (i == -1)
+        return;
+
+    memmove( &st->sorted[i] + 1, &st->sorted[i], (st->sortcount - i) * sizeof(msistring *) );
+    st->sorted[i] = string;
+    st->sortcount++;
+}
+
 static void set_st_entry( string_table *st, UINT n, LPWSTR str, UINT refcount, enum StringPersistence persistence )
 {
     UINT hash = msistring_makehash( str );
@@ -171,15 +264,18 @@ static void set_st_entry( string_table *st, UINT n, LPWSTR str, UINT refcount, e
     }
 
     st->strings[n].str = str;
-
     st->strings[n].hash_next = st->hash[hash];
+    st->strings[n].id = n;
+
     st->hash[hash] = n;
 
+    insert_string_sorted( st, &st->strings[n] );
+
     if( n < st->maxcount )
         st->freeslot = n + 1;
 }
 
-static UINT msi_string2idA( const string_table *st, LPCSTR buffer, UINT *id )
+static UINT msi_string2idA( string_table *st, LPCSTR buffer, UINT *id )
 {
     DWORD sz;
     UINT r = ERROR_INVALID_PARAMETER;
@@ -380,16 +476,33 @@ static UINT msi_id2stringA( const string_table *st, UINT id, LPSTR buffer, UINT
  *  [in] str        - string to find in the string table
  *  [out] id        - id of the string, if found
  */
-UINT msi_string2idW( const string_table *st, LPCWSTR str, UINT *id )
+UINT msi_string2idW( string_table *st, LPCWSTR str, UINT *id )
 {
-    UINT n, hash = msistring_makehash( str );
-    msistring *se = st->strings;
+    if (st->freeslot / HASH_SIZE < 10)
+    {
+        UINT n, hash = msistring_makehash( str );
 
-    for (n = st->hash[hash]; n != -1; n = st->strings[n].hash_next )
+        for (n = st->hash[hash]; n != -1; n = st->strings[n].hash_next)
+        {
+            if (str == st->strings[n].str || !lstrcmpW( str, st->strings[n].str ))
+            {
+                *id = n;
+                return ERROR_SUCCESS;
+            }
+        }
+    }
+    else
     {
-        if ((str == se[n].str) || !lstrcmpW(str, se[n].str))
+        msistring **res, key, *k = &key;
+
+        if (!st->valid_index)
+            rebuild_index( st );
+
+        key.str = (WCHAR *)str;
+        res = bsearch( &k, st->sorted, st->sortcount, sizeof(msistring *), string_compare );
+        if( res )
         {
-            *id = n;
+            *id = (*res)->id;
             return ERROR_SUCCESS;
         }
     }
-- 
1.6.3.3




More information about the wine-patches mailing list