[PATCH v2 3/3] gdi32: Use a global cache for frequently used color tables for RGB value lookups.

Huw Davies huw at codeweavers.com
Wed Apr 7 04:03:33 CDT 2021


On Fri, Apr 02, 2021 at 03:54:02PM +0300, Gabriel Ivăncescu wrote:
> Usually, when there's performance issues with RGB into color table
> conversions, the same color table is used. So instead of generating it over
> and over, this caches it if it's frequently accessed (as should be the case
> when performance is an issue in the first place).
> 
> Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=50898
> Signed-off-by: Gabriel Ivăncescu <gabrielopcode at gmail.com>
> ---
>  dlls/gdi32/dibdrv/primitives.c | 89 +++++++++++++++++++++++++++++++++-
>  1 file changed, 88 insertions(+), 1 deletion(-)
> 
> diff --git a/dlls/gdi32/dibdrv/primitives.c b/dlls/gdi32/dibdrv/primitives.c
> index 8947c60..6071280 100644
> --- a/dlls/gdi32/dibdrv/primitives.c
> +++ b/dlls/gdi32/dibdrv/primitives.c
> @@ -3511,12 +3511,99 @@ struct rgb_lookup_colortable_ctx
>  
>  static void rgb_lookup_colortable_init(const dib_info *dib, struct rgb_lookup_colortable_ctx *ctx)
>  {
> -    unsigned r, g, b;
> +    /*
> +       Globally cache frequently generated maps, as most apps usually only use a few color tables often.
> +       We keep the variables separated for better cache locality when looking them up (tables are large).
> +    */
> +    static SRWLOCK global_cache_lock = SRWLOCK_INIT;
> +    static volatile DWORD global_cache_sequence;
> +
> +    /* ticks when the cached map was last accessed or generated; we discard the oldest first */
> +    static DWORD global_cache_ticks[4];
> +
> +    /* number of colors in the given cached color table */
> +    static unsigned global_cache_color_table_size[ARRAY_SIZE(global_cache_ticks)];
> +
> +    /* the actual cached color tables to lookup and see if they're cached */
> +    static RGBQUAD global_cache_color_table[ARRAY_SIZE(global_cache_ticks)][256];
> +
> +    /* the associated generated maps for the color tables above */
> +    static BYTE global_cache_map[ARRAY_SIZE(global_cache_ticks)][ARRAY_SIZE(ctx->map)];
> +
> +
> +    unsigned color_table_size = dib->color_table ? dib->color_table_size : 1 << dib->bit_count;
> +    const RGBQUAD *color_table = get_dib_color_table(dib);
> +    DWORD newest, oldest_entry, sequence;
> +    unsigned i, r, g, b;
> +
> +    /* check the global cache first */
> +    if (TryAcquireSRWLockShared(&global_cache_lock))
> +    {
> +        newest = global_cache_ticks[0];
> +        for (i = 0; i < ARRAY_SIZE(global_cache_color_table_size); i++)
> +        {
> +            if (color_table_size == global_cache_color_table_size[i] &&
> +                !memcmp(color_table, global_cache_color_table[i], color_table_size * sizeof(color_table[0])))
> +            {
> +                memcpy(ctx->map, global_cache_map[i], sizeof(ctx->map));
> +
> +                /* it's a shared lock, but the exact timestamp is unimportant, so it's OK */
> +                global_cache_ticks[i] = GetTickCount();
> +
> +                ReleaseSRWLockShared(&global_cache_lock);
> +                return;
> +            }
> +            if ((LONG)(newest - global_cache_ticks[i]) < 0)
> +                newest = global_cache_ticks[i];
> +        }
> +        sequence = global_cache_sequence;
> +        ReleaseSRWLockShared(&global_cache_lock);
> +    }
> +    else
> +    {
> +        newest = global_cache_ticks[0];
> +        sequence = global_cache_sequence - 1;  /* make sure we re-check the cache */
> +    }
> +
>  
>      for (b = 4; b < 256; b += 1 << 3)
>          for (g = 4; g < 256; g += 1 << 3)
>              for (r = 4; r < 256; r += 1 << 3)
>                  ctx->map[r >> 3 | (g & ~7) << 2 | (b & ~7) << 7] = rgb_to_pixel_colortable(dib, r, g, b);
> +
> +
> +    /* update the global cache if needed */
> +    AcquireSRWLockExclusive(&global_cache_lock);
> +
> +    /* if the cache was updated underneath us, check it again */
> +    if (sequence != global_cache_sequence)
> +    {
> +        for (i = 0; i < ARRAY_SIZE(global_cache_color_table_size); i++)
> +        {
> +            if ((LONG)(newest - global_cache_ticks[i]) <= 0 &&
> +                color_table_size == global_cache_color_table_size[i] &&
> +                !memcmp(color_table, global_cache_color_table[i], color_table_size * sizeof(color_table[0])))
> +            {
> +                global_cache_ticks[i] = GetTickCount();
> +                ReleaseSRWLockExclusive(&global_cache_lock);
> +                return;
> +            }
> +        }
> +    }
> +    global_cache_sequence++;
> +
> +    oldest_entry = 0;
> +    for (i = 1; i < ARRAY_SIZE(global_cache_color_table_size); i++)
> +        if ((LONG)(global_cache_ticks[oldest_entry] - global_cache_ticks[i]) > 0)
> +            oldest_entry = i;
> +
> +    memcpy(global_cache_color_table[oldest_entry], color_table, color_table_size * sizeof(color_table[0]));
> +    memcpy(global_cache_map[oldest_entry], ctx->map, sizeof(ctx->map));
> +
> +    global_cache_color_table_size[oldest_entry] = color_table_size;
> +    global_cache_ticks[oldest_entry] = GetTickCount();
> +
> +    ReleaseSRWLockExclusive(&global_cache_lock);
>  }

Yeah, I'm not convinced, not even close.  You're going to have to
provide some pretty good evidence, with real world apps, that this is
necessary over a local, lazy-init solution.

Huw.



More information about the wine-devel mailing list