[PATCH v2 3/3] gdi32: Use a global cache for frequently used color tables for RGB value lookups.
Huw Davies
huw at codeweavers.com
Wed Apr 7 04:03:33 CDT 2021
On Fri, Apr 02, 2021 at 03:54:02PM +0300, Gabriel Ivăncescu wrote:
> Usually, when there's performance issues with RGB into color table
> conversions, the same color table is used. So instead of generating it over
> and over, this caches it if it's frequently accessed (as should be the case
> when performance is an issue in the first place).
>
> Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=50898
> Signed-off-by: Gabriel Ivăncescu <gabrielopcode at gmail.com>
> ---
> dlls/gdi32/dibdrv/primitives.c | 89 +++++++++++++++++++++++++++++++++-
> 1 file changed, 88 insertions(+), 1 deletion(-)
>
> diff --git a/dlls/gdi32/dibdrv/primitives.c b/dlls/gdi32/dibdrv/primitives.c
> index 8947c60..6071280 100644
> --- a/dlls/gdi32/dibdrv/primitives.c
> +++ b/dlls/gdi32/dibdrv/primitives.c
> @@ -3511,12 +3511,99 @@ struct rgb_lookup_colortable_ctx
>
> static void rgb_lookup_colortable_init(const dib_info *dib, struct rgb_lookup_colortable_ctx *ctx)
> {
> - unsigned r, g, b;
> + /*
> + Globally cache frequently generated maps, as most apps usually only use a few color tables often.
> + We keep the variables separated for better cache locality when looking them up (tables are large).
> + */
> + static SRWLOCK global_cache_lock = SRWLOCK_INIT;
> + static volatile DWORD global_cache_sequence;
> +
> + /* ticks when the cached map was last accessed or generated; we discard the oldest first */
> + static DWORD global_cache_ticks[4];
> +
> + /* number of colors in the given cached color table */
> + static unsigned global_cache_color_table_size[ARRAY_SIZE(global_cache_ticks)];
> +
> + /* the actual cached color tables to lookup and see if they're cached */
> + static RGBQUAD global_cache_color_table[ARRAY_SIZE(global_cache_ticks)][256];
> +
> + /* the associated generated maps for the color tables above */
> + static BYTE global_cache_map[ARRAY_SIZE(global_cache_ticks)][ARRAY_SIZE(ctx->map)];
> +
> +
> + unsigned color_table_size = dib->color_table ? dib->color_table_size : 1 << dib->bit_count;
> + const RGBQUAD *color_table = get_dib_color_table(dib);
> + DWORD newest, oldest_entry, sequence;
> + unsigned i, r, g, b;
> +
> + /* check the global cache first */
> + if (TryAcquireSRWLockShared(&global_cache_lock))
> + {
> + newest = global_cache_ticks[0];
> + for (i = 0; i < ARRAY_SIZE(global_cache_color_table_size); i++)
> + {
> + if (color_table_size == global_cache_color_table_size[i] &&
> + !memcmp(color_table, global_cache_color_table[i], color_table_size * sizeof(color_table[0])))
> + {
> + memcpy(ctx->map, global_cache_map[i], sizeof(ctx->map));
> +
> + /* it's a shared lock, but the exact timestamp is unimportant, so it's OK */
> + global_cache_ticks[i] = GetTickCount();
> +
> + ReleaseSRWLockShared(&global_cache_lock);
> + return;
> + }
> + if ((LONG)(newest - global_cache_ticks[i]) < 0)
> + newest = global_cache_ticks[i];
> + }
> + sequence = global_cache_sequence;
> + ReleaseSRWLockShared(&global_cache_lock);
> + }
> + else
> + {
> + newest = global_cache_ticks[0];
> + sequence = global_cache_sequence - 1; /* make sure we re-check the cache */
> + }
> +
>
> for (b = 4; b < 256; b += 1 << 3)
> for (g = 4; g < 256; g += 1 << 3)
> for (r = 4; r < 256; r += 1 << 3)
> ctx->map[r >> 3 | (g & ~7) << 2 | (b & ~7) << 7] = rgb_to_pixel_colortable(dib, r, g, b);
> +
> +
> + /* update the global cache if needed */
> + AcquireSRWLockExclusive(&global_cache_lock);
> +
> + /* if the cache was updated underneath us, check it again */
> + if (sequence != global_cache_sequence)
> + {
> + for (i = 0; i < ARRAY_SIZE(global_cache_color_table_size); i++)
> + {
> + if ((LONG)(newest - global_cache_ticks[i]) <= 0 &&
> + color_table_size == global_cache_color_table_size[i] &&
> + !memcmp(color_table, global_cache_color_table[i], color_table_size * sizeof(color_table[0])))
> + {
> + global_cache_ticks[i] = GetTickCount();
> + ReleaseSRWLockExclusive(&global_cache_lock);
> + return;
> + }
> + }
> + }
> + global_cache_sequence++;
> +
> + oldest_entry = 0;
> + for (i = 1; i < ARRAY_SIZE(global_cache_color_table_size); i++)
> + if ((LONG)(global_cache_ticks[oldest_entry] - global_cache_ticks[i]) > 0)
> + oldest_entry = i;
> +
> + memcpy(global_cache_color_table[oldest_entry], color_table, color_table_size * sizeof(color_table[0]));
> + memcpy(global_cache_map[oldest_entry], ctx->map, sizeof(ctx->map));
> +
> + global_cache_color_table_size[oldest_entry] = color_table_size;
> + global_cache_ticks[oldest_entry] = GetTickCount();
> +
> + ReleaseSRWLockExclusive(&global_cache_lock);
> }
Yeah, I'm not convinced, not even close. You're going to have to
provide some pretty good evidence, with real world apps, that this is
necessary over a local, lazy-init solution.
Huw.
More information about the wine-devel
mailing list