[PATCH 3/3] gdi32: Unroll the start- and end-of-line loops.
Huw Davies
huw at codeweavers.com
Wed Aug 24 02:55:34 CDT 2016
Signed-off-by: Huw Davies <huw at codeweavers.com>
---
dlls/gdi32/dibdrv/primitives.c | 349 +++++++++++++++++++++++++++++++++++++----
1 file changed, 319 insertions(+), 30 deletions(-)
diff --git a/dlls/gdi32/dibdrv/primitives.c b/dlls/gdi32/dibdrv/primitives.c
index b547aa3..c3cb0d4 100644
--- a/dlls/gdi32/dibdrv/primitives.c
+++ b/dlls/gdi32/dibdrv/primitives.c
@@ -5304,17 +5304,46 @@ static void mask_rect_32( const dib_info *dst, const RECT *rc,
return;
}
+ full = ((rc->right - rc->left) - ((8 - (origin->x & 7)) & 7)) / 8;
+
for (y = rc->top; y < rc->bottom; y++)
{
pos = origin->x & 7;
+ src_val = src_start[pos / 8];
+ x = 0;
- for (x = 0; x < rc->right - rc->left && pos < 8; x++, pos++)
+ switch (pos & 7)
{
- bit_val = (src_start[pos / 8] & pixel_masks_1[pos % 8]) ? 1 : 0;
- do_rop_codes_32( dst_start + x, dst_colors[bit_val], &codes );
+ case 1:
+ bit_val = src_val >> 6;
+ do_rop_codes_32( dst_start + x++, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 2:
+ bit_val = src_val >> 5;
+ do_rop_codes_32( dst_start + x++, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 3:
+ bit_val = src_val >> 4;
+ do_rop_codes_32( dst_start + x++, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 4:
+ bit_val = src_val >> 3;
+ do_rop_codes_32( dst_start + x++, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 5:
+ bit_val = src_val >> 2;
+ do_rop_codes_32( dst_start + x++, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 6:
+ bit_val = src_val >> 1;
+ do_rop_codes_32( dst_start + x++, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 7:
+ bit_val = src_val;
+ do_rop_codes_32( dst_start + x++, dst_colors[bit_val], &codes );
+ pos = (pos + 7) & ~7;
}
- full = ((rc->right - rc->left) - x) / 8;
for (i = 0; i < full; i++, pos += 8)
{
src_val = src_start[pos / 8];
@@ -5337,10 +5366,41 @@ static void mask_rect_32( const dib_info *dst, const RECT *rc,
do_rop_codes_32( dst_start + x++, dst_colors[bit_val], &codes );
}
- for ( ; x < rc->right - rc->left; x++, pos++)
+ if (origin_end & 7)
{
- bit_val = (src_start[pos / 8] & pixel_masks_1[pos % 8]) ? 1 : 0;
- do_rop_codes_32( dst_start + x, dst_colors[bit_val], &codes );
+ src_val = src_start[pos / 8];
+ x += (origin_end & 7) - 1;
+
+ switch (origin_end & 7)
+ {
+ case 7:
+ bit_val = src_val >> 1;
+ do_rop_codes_32( dst_start + x--, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 6:
+ bit_val = src_val >> 2;
+ do_rop_codes_32( dst_start + x--, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 5:
+ bit_val = src_val >> 3;
+ do_rop_codes_32( dst_start + x--, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 4:
+ bit_val = src_val >> 4;
+ do_rop_codes_32( dst_start + x--, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 3:
+ bit_val = src_val >> 5;
+ do_rop_codes_32( dst_start + x--, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 2:
+ bit_val = src_val >> 6;
+ do_rop_codes_32( dst_start + x--, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 1:
+ bit_val = src_val >> 7;
+ do_rop_codes_32( dst_start + x, dst_colors[bit_val], &codes );
+ }
}
dst_start += dst->stride / 4;
@@ -5381,20 +5441,74 @@ static void mask_rect_24( const dib_info *dst, const RECT *rc,
return;
}
+ full = ((rc->right - rc->left) - ((8 - (origin->x & 7)) & 7)) / 8;
+
for (y = rc->top; y < rc->bottom; y++)
{
pos = origin->x & 7;
+ src_val = src_start[pos / 8];
+ x = 0;
- for (x = 0; x < rc->right - rc->left && pos < 8; x++, pos++)
+ switch (pos & 7)
{
- bit_val = (src_start[pos / 8] & pixel_masks_1[pos % 8]) ? 1 : 0;
+ case 1:
+ bit_val = (src_val >> 6) & 1;
+ rgb = color_table[bit_val];
+ do_rop_codes_8( dst_start + x * 3, rgb.rgbBlue, &codes );
+ do_rop_codes_8( dst_start + x * 3 + 1, rgb.rgbGreen, &codes );
+ do_rop_codes_8( dst_start + x * 3 + 2, rgb.rgbRed, &codes );
+ x++;
+ /* fall through */
+ case 2:
+ bit_val = (src_val >> 5) & 1;
+ rgb = color_table[bit_val];
+ do_rop_codes_8( dst_start + x * 3, rgb.rgbBlue, &codes );
+ do_rop_codes_8( dst_start + x * 3 + 1, rgb.rgbGreen, &codes );
+ do_rop_codes_8( dst_start + x * 3 + 2, rgb.rgbRed, &codes );
+ x++;
+ /* fall through */
+ case 3:
+ bit_val = (src_val >> 4) & 1;
rgb = color_table[bit_val];
do_rop_codes_8( dst_start + x * 3, rgb.rgbBlue, &codes );
do_rop_codes_8( dst_start + x * 3 + 1, rgb.rgbGreen, &codes );
do_rop_codes_8( dst_start + x * 3 + 2, rgb.rgbRed, &codes );
+ x++;
+ /* fall through */
+ case 4:
+ bit_val = (src_val >> 3) & 1;
+ rgb = color_table[bit_val];
+ do_rop_codes_8( dst_start + x * 3, rgb.rgbBlue, &codes );
+ do_rop_codes_8( dst_start + x * 3 + 1, rgb.rgbGreen, &codes );
+ do_rop_codes_8( dst_start + x * 3 + 2, rgb.rgbRed, &codes );
+ x++;
+ /* fall through */
+ case 5:
+ bit_val = (src_val >> 2) & 1;
+ rgb = color_table[bit_val];
+ do_rop_codes_8( dst_start + x * 3, rgb.rgbBlue, &codes );
+ do_rop_codes_8( dst_start + x * 3 + 1, rgb.rgbGreen, &codes );
+ do_rop_codes_8( dst_start + x * 3 + 2, rgb.rgbRed, &codes );
+ x++;
+ /* fall through */
+ case 6:
+ bit_val = (src_val >> 1) & 1;
+ rgb = color_table[bit_val];
+ do_rop_codes_8( dst_start + x * 3, rgb.rgbBlue, &codes );
+ do_rop_codes_8( dst_start + x * 3 + 1, rgb.rgbGreen, &codes );
+ do_rop_codes_8( dst_start + x * 3 + 2, rgb.rgbRed, &codes );
+ x++;
+ /* fall through */
+ case 7:
+ bit_val = src_val & 1;
+ rgb = color_table[bit_val];
+ do_rop_codes_8( dst_start + x * 3, rgb.rgbBlue, &codes );
+ do_rop_codes_8( dst_start + x * 3 + 1, rgb.rgbGreen, &codes );
+ do_rop_codes_8( dst_start + x * 3 + 2, rgb.rgbRed, &codes );
+ x++;
+ pos = (pos + 7) & ~7;
}
- full = ((rc->right - rc->left) - x) / 8;
for (i = 0; i < full; i++, pos += 8)
{
src_val = src_start[pos / 8];
@@ -5456,13 +5570,68 @@ static void mask_rect_24( const dib_info *dst, const RECT *rc,
x++;
}
- for ( ; x < rc->right - rc->left; x++, pos++)
+ if (origin_end & 7)
{
- bit_val = (src_start[pos / 8] & pixel_masks_1[pos % 8]) ? 1 : 0;
- rgb = color_table[bit_val];
- do_rop_codes_8( dst_start + x * 3, rgb.rgbBlue, &codes );
- do_rop_codes_8( dst_start + x * 3 + 1, rgb.rgbGreen, &codes );
- do_rop_codes_8( dst_start + x * 3 + 2, rgb.rgbRed, &codes );
+ src_val = src_start[pos / 8];
+ x += (origin_end & 7) - 1;
+
+ switch (origin_end & 7)
+ {
+ case 7:
+ bit_val = (src_val >> 1) & 1;
+ rgb = color_table[bit_val];
+ do_rop_codes_8( dst_start + x * 3, rgb.rgbBlue, &codes );
+ do_rop_codes_8( dst_start + x * 3 + 1, rgb.rgbGreen, &codes );
+ do_rop_codes_8( dst_start + x * 3 + 2, rgb.rgbRed, &codes );
+ x--;
+ /* fall through */
+ case 6:
+ bit_val = (src_val >> 2) & 1;
+ rgb = color_table[bit_val];
+ do_rop_codes_8( dst_start + x * 3, rgb.rgbBlue, &codes );
+ do_rop_codes_8( dst_start + x * 3 + 1, rgb.rgbGreen, &codes );
+ do_rop_codes_8( dst_start + x * 3 + 2, rgb.rgbRed, &codes );
+ x--;
+ /* fall through */
+ case 5:
+ bit_val = (src_val >> 3) & 1;
+ rgb = color_table[bit_val];
+ do_rop_codes_8( dst_start + x * 3, rgb.rgbBlue, &codes );
+ do_rop_codes_8( dst_start + x * 3 + 1, rgb.rgbGreen, &codes );
+ do_rop_codes_8( dst_start + x * 3 + 2, rgb.rgbRed, &codes );
+ x--;
+ /* fall through */
+ case 4:
+ bit_val = (src_val >> 4) & 1;
+ rgb = color_table[bit_val];
+ do_rop_codes_8( dst_start + x * 3, rgb.rgbBlue, &codes );
+ do_rop_codes_8( dst_start + x * 3 + 1, rgb.rgbGreen, &codes );
+ do_rop_codes_8( dst_start + x * 3 + 2, rgb.rgbRed, &codes );
+ x--;
+ /* fall through */
+ case 3:
+ bit_val = (src_val >> 5) & 1;
+ rgb = color_table[bit_val];
+ do_rop_codes_8( dst_start + x * 3, rgb.rgbBlue, &codes );
+ do_rop_codes_8( dst_start + x * 3 + 1, rgb.rgbGreen, &codes );
+ do_rop_codes_8( dst_start + x * 3 + 2, rgb.rgbRed, &codes );
+ x--;
+ /* fall through */
+ case 2:
+ bit_val = (src_val >> 6) & 1;
+ rgb = color_table[bit_val];
+ do_rop_codes_8( dst_start + x * 3, rgb.rgbBlue, &codes );
+ do_rop_codes_8( dst_start + x * 3 + 1, rgb.rgbGreen, &codes );
+ do_rop_codes_8( dst_start + x * 3 + 2, rgb.rgbRed, &codes );
+ x--;
+ /* fall through */
+ case 1:
+ bit_val = (src_val >> 7) & 1;
+ rgb = color_table[bit_val];
+ do_rop_codes_8( dst_start + x * 3, rgb.rgbBlue, &codes );
+ do_rop_codes_8( dst_start + x * 3 + 1, rgb.rgbGreen, &codes );
+ do_rop_codes_8( dst_start + x * 3 + 2, rgb.rgbRed, &codes );
+ }
}
dst_start += dst->stride;
@@ -5510,17 +5679,46 @@ static void mask_rect_16( const dib_info *dst, const RECT *rc,
return;
}
+ full = ((rc->right - rc->left) - ((8 - (origin->x & 7)) & 7)) / 8;
+
for (y = rc->top; y < rc->bottom; y++)
{
pos = origin->x & 7;
+ src_val = src_start[pos / 8];
+ x = 0;
- for (x = 0; x < rc->right - rc->left && pos < 8; x++, pos++)
+ switch (pos & 7)
{
- bit_val = (src_start[pos / 8] & pixel_masks_1[pos % 8]) ? 1 : 0;
- do_rop_codes_16( dst_start + x, dst_colors[bit_val], &codes );
+ case 1:
+ bit_val = (src_val >> 6) & 1;
+ do_rop_codes_16( dst_start + x++, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 2:
+ bit_val = (src_val >> 5) & 1;
+ do_rop_codes_16( dst_start + x++, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 3:
+ bit_val = (src_val >> 4) & 1;
+ do_rop_codes_16( dst_start + x++, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 4:
+ bit_val = (src_val >> 3) & 1;
+ do_rop_codes_16( dst_start + x++, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 5:
+ bit_val = (src_val >> 2) & 1;
+ do_rop_codes_16( dst_start + x++, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 6:
+ bit_val = (src_val >> 1) & 1;
+ do_rop_codes_16( dst_start + x++, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 7:
+ bit_val = src_val & 1;
+ do_rop_codes_16( dst_start + x++, dst_colors[bit_val], &codes );
+ pos = (pos + 7) & ~7;
}
- full = ((rc->right - rc->left) - x) / 8;
for (i = 0; i < full; i++, pos += 8)
{
src_val = src_start[pos / 8];
@@ -5543,10 +5741,41 @@ static void mask_rect_16( const dib_info *dst, const RECT *rc,
do_rop_codes_16( dst_start + x++, dst_colors[bit_val], &codes );
}
- for ( ; x < rc->right - rc->left; x++, pos++)
+ if (origin_end & 7)
{
- bit_val = (src_start[pos / 8] & pixel_masks_1[pos % 8]) ? 1 : 0;
- do_rop_codes_16( dst_start + x, dst_colors[bit_val], &codes );
+ src_val = src_start[pos / 8];
+ x += (origin_end & 7) - 1;
+
+ switch (origin_end & 7)
+ {
+ case 7:
+ bit_val = (src_val >> 1) & 1;
+ do_rop_codes_16( dst_start + x--, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 6:
+ bit_val = (src_val >> 2) & 1;
+ do_rop_codes_16( dst_start + x--, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 5:
+ bit_val = (src_val >> 3) & 1;
+ do_rop_codes_16( dst_start + x--, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 4:
+ bit_val = (src_val >> 4) & 1;
+ do_rop_codes_16( dst_start + x--, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 3:
+ bit_val = (src_val >> 5) & 1;
+ do_rop_codes_16( dst_start + x--, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 2:
+ bit_val = (src_val >> 6) & 1;
+ do_rop_codes_16( dst_start + x--, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 1:
+ bit_val = (src_val >> 7) & 1;
+ do_rop_codes_16( dst_start + x, dst_colors[bit_val], &codes );
+ }
}
dst_start += dst->stride / 2;
@@ -5587,17 +5816,46 @@ static void mask_rect_8( const dib_info *dst, const RECT *rc,
return;
}
+ full = ((rc->right - rc->left) - ((8 - (origin->x & 7)) & 7)) / 8;
+
for (y = rc->top; y < rc->bottom; y++)
{
pos = origin->x & 7;
+ src_val = src_start[pos / 8];
+ x = 0;
- for (x = 0; x < rc->right - rc->left && pos < 8; x++, pos++)
+ switch (pos & 7)
{
- bit_val = (src_start[pos / 8] & pixel_masks_1[pos % 8]) ? 1 : 0;
- do_rop_codes_8( dst_start + x, dst_colors[bit_val], &codes );
+ case 1:
+ bit_val = (src_val >> 6) & 1;
+ do_rop_codes_8( dst_start + x++, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 2:
+ bit_val = (src_val >> 5) & 1;
+ do_rop_codes_8( dst_start + x++, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 3:
+ bit_val = (src_val >> 4) & 1;
+ do_rop_codes_8( dst_start + x++, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 4:
+ bit_val = (src_val >> 3) & 1;
+ do_rop_codes_8( dst_start + x++, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 5:
+ bit_val = (src_val >> 2) & 1;
+ do_rop_codes_8( dst_start + x++, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 6:
+ bit_val = (src_val >> 1) & 1;
+ do_rop_codes_8( dst_start + x++, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 7:
+ bit_val = src_val & 1;
+ do_rop_codes_8( dst_start + x++, dst_colors[bit_val], &codes );
+ pos = (pos + 7) & ~7;
}
- full = ((rc->right - rc->left) - x) / 8;
for (i = 0; i < full; i++, pos += 8)
{
src_val = src_start[pos / 8];
@@ -5620,10 +5878,41 @@ static void mask_rect_8( const dib_info *dst, const RECT *rc,
do_rop_codes_8( dst_start + x++, dst_colors[bit_val], &codes );
}
- for ( ; x < rc->right - rc->left; x++, pos++)
+ if (origin_end & 7)
{
- bit_val = (src_start[pos / 8] & pixel_masks_1[pos % 8]) ? 1 : 0;
- do_rop_codes_8( dst_start + x, dst_colors[bit_val], &codes );
+ src_val = src_start[pos / 8];
+ x += (origin_end & 7) - 1;
+
+ switch (origin_end & 7)
+ {
+ case 7:
+ bit_val = (src_val >> 1) & 1;
+ do_rop_codes_8( dst_start + x--, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 6:
+ bit_val = (src_val >> 2) & 1;
+ do_rop_codes_8( dst_start + x--, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 5:
+ bit_val = (src_val >> 3) & 1;
+ do_rop_codes_8( dst_start + x--, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 4:
+ bit_val = (src_val >> 4) & 1;
+ do_rop_codes_8( dst_start + x--, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 3:
+ bit_val = (src_val >> 5) & 1;
+ do_rop_codes_8( dst_start + x--, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 2:
+ bit_val = (src_val >> 6) & 1;
+ do_rop_codes_8( dst_start + x--, dst_colors[bit_val], &codes );
+ /* fall through */
+ case 1:
+ bit_val = (src_val >> 7) & 1;
+ do_rop_codes_8( dst_start + x, dst_colors[bit_val], &codes );
+ }
}
dst_start += dst->stride;
--
2.7.4
More information about the wine-patches
mailing list