[PATCH] WineD3D: Implement YUV emulation with =

Stefan Doesinger stefan at codeweavers.com
Tue Aug 19 11:09:45 CDT 2008


GL_ARB_fragment_program=0A=
=0A=
---=0A=
 dlls/wined3d/arb_program_shader.c |  293 =
+++++++++++++++++++++++++++++++++++++=0A=
 dlls/wined3d/directx.c            |    7 +-=0A=
 dlls/wined3d/wined3d_private.h    |    1 +=0A=
 3 files changed, 299 insertions(+), 2 deletions(-)=0A=
=0A=
diff --git a/dlls/wined3d/arb_program_shader.c =
b/dlls/wined3d/arb_program_shader.c=0A=
index 32373b4..21eb815 100644=0A=
--- a/dlls/wined3d/arb_program_shader.c=0A=
+++ b/dlls/wined3d/arb_program_shader.c=0A=
@@ -2982,3 +2982,296 @@ const struct fragment_pipeline =
arbfp_fragment_pipeline =3D {=0A=
     shader_arb_conv_supported,=0A=
     arbfp_fragmentstate_template=0A=
 };=0A=
+=0A=
+#define GLINFO_LOCATION device->adapter->gl_info=0A=
+=0A=
+struct arbfp_blit_priv {=0A=
+    GLenum yuy2_rect_shader, yuy2_2d_shader;=0A=
+    GLenum uyvy_rect_shader, uyvy_2d_shader;=0A=
+};=0A=
+=0A=
+static HRESULT arbfp_blit_alloc(IWineD3DDevice *iface) {=0A=
+    IWineD3DDeviceImpl *device =3D (IWineD3DDeviceImpl *) iface;=0A=
+    device->blit_priv =3D HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, =
sizeof(struct arbfp_blit_priv));=0A=
+    if(!device->blit_priv) {=0A=
+        ERR("Out of memory\n");=0A=
+        return E_OUTOFMEMORY;=0A=
+    }=0A=
+    return WINED3D_OK;=0A=
+}=0A=
+static void arbfp_blit_free(IWineD3DDevice *iface) {=0A=
+    IWineD3DDeviceImpl *device =3D (IWineD3DDeviceImpl *) iface;=0A=
+    struct arbfp_blit_priv *priv =3D (struct arbfp_blit_priv *) =
device->blit_priv;=0A=
+=0A=
+    ENTER_GL();=0A=
+    GL_EXTCALL(glDeleteProgramsARB(1, &priv->yuy2_rect_shader));=0A=
+    GL_EXTCALL(glDeleteProgramsARB(1, &priv->yuy2_2d_shader));=0A=
+    GL_EXTCALL(glDeleteProgramsARB(1, &priv->uyvy_rect_shader));=0A=
+    GL_EXTCALL(glDeleteProgramsARB(1, &priv->uyvy_2d_shader));=0A=
+    checkGLcall("Delete yuv programs\n");=0A=
+    LEAVE_GL();=0A=
+}=0A=
+=0A=
+GLenum gen_yuv_shader(IWineD3DDeviceImpl *device, WINED3DFORMAT fmt, =
GLenum textype) {=0A=
+    GLenum shader;=0A=
+    SHADER_BUFFER buffer;=0A=
+    const char *tex, *texinstr;=0A=
+    char chroma, luminance;=0A=
+    struct arbfp_blit_priv *priv =3D (struct arbfp_blit_priv *) =
device->blit_priv;=0A=
+=0A=
+    /* Shader header */=0A=
+    buffer.bsize =3D 0;=0A=
+    buffer.lineNo =3D 0;=0A=
+    buffer.newline =3D TRUE;=0A=
+    buffer.buffer =3D HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, =
SHADER_PGMSIZE);=0A=
+=0A=
+    switch(textype) {=0A=
+        case GL_TEXTURE_2D:             tex =3D "2D";     texinstr =3D =
"TXP"; break;=0A=
+        case GL_TEXTURE_RECTANGLE_ARB:  tex =3D "RECT";   texinstr =3D =
"TEX"; break;=0A=
+        default:=0A=
+            /* This is more tricky than just replacing the texture type =
- we have to navigate=0A=
+             * properly in the texture to find the correct chroma values=0A=
+             */=0A=
+            FIXME("Implement yuv correction for non-2d, non-rect =
textures\n");=0A=
+            return 0;=0A=
+    }=0A=
+=0A=
+    if(fmt =3D=3D WINED3DFMT_UYVY) {=0A=
+        chroma =3D 'r';=0A=
+        luminance =3D 'a';=0A=
+    } else {=0A=
+        chroma =3D 'a';=0A=
+        luminance =3D 'r';=0A=
+    }=0A=
+=0A=
+    GL_EXTCALL(glGenProgramsARB(1, &shader));=0A=
+    checkGLcall("GL_EXTCALL(glGenProgramsARB(1, &shader))");=0A=
+    GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader));=0A=
+    checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)");=0A=
+    if(!shader) return 0;=0A=
+=0A=
+    /* The YUY2 and UYVY formats contain two pixels packed into a 32 =
bit macropixel,=0A=
+     * giving effectively 16 bit per pixel. The color consists of a =
luminance(Y) and=0A=
+     * two chroma(U and V) values. Each macropixel has two luminance =
values, one for=0A=
+     * each single pixel it contains, and one U and one V value shared =
between both=0A=
+     * pixels.=0A=
+     *=0A=
+     * The data is loaded into an A8L8 texture. With YUY2, the =
luminance component=0A=
+     * contains the luminance and alpha the chroma. With UYVY it is =
vice versa. Thus=0A=
+     * take the format into account when generating the read swizzles=0A=
+     *=0A=
+     * Reading the Y value is streightforward - just sample the =
texture. The hardware=0A=
+     * takes care of filtering in the horizontal and vertical direction.=0A=
+     *=0A=
+     * Reading the U and V values is harder. We have to avoid filtering =
horizontally,=0A=
+     * because that would mix the U and V values of one pixel or two =
adjacent pixels.=0A=
+     * Thus floor the texture coordinate and add 0.5 to get an =
unfiltered read,=0A=
+     * regardless of the filtering setting. Vertical filtering works =
automatically=0A=
+     * though - the U and V values of two rows are mixed nicely.=0A=
+     *=0A=
+     * Appart of avoiding filtering issues, the code has to know which =
value it just=0A=
+     * read, and where it can find the other one. To determine this, it =
checks if=0A=
+     * it sampled an even or odd pixel, and shifts the 2nd read =
accordingly.=0A=
+     *=0A=
+     * Handling horizontal filtering of U and V values requires reading =
a 2nd pair=0A=
+     * of pixels, extracting U and V and mixing them. This is not =
implemented yet.=0A=
+     *=0A=
+     * An alternative implementation idea is to load the texture as =
A8R8G8B8 texture,=0A=
+     * with width / 2. This way one read gives all 3 values, finding U =
and V is easy=0A=
+     * in an unfiltered situation. Finding the luminance on the other =
hand requires=0A=
+     * finding out if it is an odd or even pixel. The real drawback of =
this approach=0A=
+     * is filtering. This would have to be emulated completely in the =
shader, reading=0A=
+     * up two 2 packed pixels in up to 2 rows and interpolating both =
horizontally and=0A=
+     * vertically. Beyond that it would require adjustments to the =
texture handling=0A=
+     * code to deal with the width scaling=0A=
+     */=0A=
+    shader_addline(&buffer, "!!ARBfp1.0\n");=0A=
+    shader_addline(&buffer, "TEMP luminance;\n");=0A=
+    shader_addline(&buffer, "TEMP temp;\n");=0A=
+    shader_addline(&buffer, "TEMP chroma;\n");=0A=
+    shader_addline(&buffer, "TEMP texcrd;\n");=0A=
+    shader_addline(&buffer, "TEMP texcrd2;\n");=0A=
+    shader_addline(&buffer, "PARAM coef =3D {1.0, 0.5, 2.0, 0.0};\n");=0A=
+    shader_addline(&buffer, "PARAM yuv_coef =3D {1.403, 0.344, 0.714, =
1.770};\n");=0A=
+    shader_addline(&buffer, "PARAM size =3D program.local[0];\n");=0A=
+=0A=
+    /* First we have to read the chroma values. This means we need at =
least two pixels(no filtering),=0A=
+     * or 4 pixels(with filtering). To get the unmodified chromas, we =
have to rid ourselves of the=0A=
+     * filtering when we sample the texture.=0A=
+     *=0A=
+     * These are the rules for reading the chroma:=0A=
+     *=0A=
+     * Even pixel: Cr=0A=
+     * Even pixel: U=0A=
+     * Odd pixel: V=0A=
+     *=0A=
+     * So we have to get the sampling x position in non-normalized =
coordinates in integers=0A=
+     */=0A=
+    if(textype !=3D GL_TEXTURE_RECTANGLE_ARB) {=0A=
+        shader_addline(&buffer, "MUL texcrd.rg, fragment.texcoord[0], =
size.x;\n");=0A=
+        shader_addline(&buffer, "MOV texcrd.a, size.x;\n");=0A=
+    } else {=0A=
+        shader_addline(&buffer, "MOV texcrd, fragment.texcoord[0];\n");=0A=
+    }=0A=
+    /* We must not allow filtering between pixel x and x+1, this would =
mix U and V=0A=
+     * Vertical filtering is ok. However, bear in mind that the pixel =
center is at=0A=
+     * 0.5, so add 0.5.=0A=
+     */=0A=
+    shader_addline(&buffer, "FLR texcrd.x, texcrd.x;\n");=0A=
+    shader_addline(&buffer, "ADD texcrd.x, texcrd.x, coef.y;\n");=0A=
+=0A=
+    /* Divide the x coordinate by 0.5 and get the fraction. This gives =
0.25 and 0.75 for the=0A=
+     * even and odd pixels respectively=0A=
+     */=0A=
+    shader_addline(&buffer, "MUL texcrd2, texcrd, coef.y;\n");=0A=
+    shader_addline(&buffer, "FRC texcrd2, texcrd2;\n");=0A=
+=0A=
+    /* Sample Pixel 1 */=0A=
+    shader_addline(&buffer, "%s luminance, texcrd, texture[0], %s;\n", =
texinstr, tex);=0A=
+=0A=
+    /* Put the value into either of the chroma values */=0A=
+    shader_addline(&buffer, "SGE temp.x, texcrd2.x, coef.y;\n");=0A=
+    shader_addline(&buffer, "MUL chroma.r, luminance.%c, temp.x;\n", =
chroma);=0A=
+    shader_addline(&buffer, "SLT temp.x, texcrd2.x, coef.y;\n");=0A=
+    shader_addline(&buffer, "MUL chroma.g, luminance.%c, temp.x;\n", =
chroma);=0A=
+=0A=
+    /* Sample pixel 2. If we read an even pixel(SLT above returned 1), =
sample=0A=
+     * the pixel right to the current one. Otherwise, sample the left =
pixel.=0A=
+     * Bias and scale the SLT result to -1;1 and add it to the texcrd.x.=0A=
+     */=0A=
+    shader_addline(&buffer, "MAD temp.x, temp.x, coef.z, -coef.x;\n");=0A=
+    shader_addline(&buffer, "ADD texcrd.x, texcrd, temp.x;\n");=0A=
+    shader_addline(&buffer, "%s luminance, texcrd, texture[0], %s;\n", =
texinstr, tex);=0A=
+=0A=
+    /* Put the value into the other chroma */=0A=
+    shader_addline(&buffer, "SGE temp.x, texcrd2.x, coef.y;\n");=0A=
+    shader_addline(&buffer, "MAD chroma.g, luminance.%c, temp.x, =
chroma.g;\n", chroma);=0A=
+    shader_addline(&buffer, "SLT temp.x, texcrd2.x, coef.y;\n");=0A=
+    shader_addline(&buffer, "MAD chroma.r, luminance.%c, temp.x, =
chroma.r;\n", chroma);=0A=
+=0A=
+    /* TODO: If filtering is enabled, sample a 2nd pair of pixels left =
or right of=0A=
+     * the current one and lerp the two U and V values=0A=
+     */=0A=
+=0A=
+    /* This gives the correctly filtered luminance value */=0A=
+    shader_addline(&buffer, "TEX luminance, fragment.texcoord[0], =
texture[0], %s;\n", tex);=0A=
+=0A=
+    /* Calculate the final result. Formula is taken from=0A=
+     * http://www.fourcc.org/fccyvrgb.php. Note that the chroma=0A=
+     * ranges from -0.5 to 0.5=0A=
+     */=0A=
+    shader_addline(&buffer, "SUB chroma.rg, chroma, coef.y;\n");=0A=
+=0A=
+    shader_addline(&buffer, "MAD result.color.r, chroma.r, yuv_coef.x, =
luminance.%c;\n", luminance);=0A=
+    shader_addline(&buffer, "MAD temp.r, -chroma.g, yuv_coef.y, =
luminance.%c;\n", luminance);=0A=
+    shader_addline(&buffer, "MAD result.color.g, -chroma.r, yuv_coef.z, =
temp.r;\n");=0A=
+    shader_addline(&buffer, "MAD result.color.b, chroma.g, yuv_coef.w, =
luminance.%c;\n", luminance);=0A=
+    shader_addline(&buffer, "END\n");=0A=
+=0A=
+    GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, =
GL_PROGRAM_FORMAT_ASCII_ARB, strlen(buffer.buffer), buffer.buffer));=0A=
+=0A=
+    if (glGetError() =3D=3D GL_INVALID_OPERATION) {=0A=
+        GLint pos;=0A=
+        glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos);=0A=
+        FIXME("Fragment program error at position %d: %s\n", pos,=0A=
+              debugstr_a((const char =
*)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));=0A=
+    }=0A=
+=0A=
+    if(fmt =3D=3D WINED3DFMT_YUY2) {=0A=
+        if(textype =3D=3D GL_TEXTURE_RECTANGLE_ARB) {=0A=
+            priv->yuy2_rect_shader =3D shader;=0A=
+        } else {=0A=
+            priv->yuy2_2d_shader =3D shader;=0A=
+        }=0A=
+    } else {=0A=
+        if(textype =3D=3D GL_TEXTURE_RECTANGLE_ARB) {=0A=
+            priv->uyvy_rect_shader =3D shader;=0A=
+        } else {=0A=
+            priv->uyvy_2d_shader =3D shader;=0A=
+        }=0A=
+    }=0A=
+    return shader;=0A=
+}=0A=
+=0A=
+static HRESULT arbfp_blit_set(IWineD3DDevice *iface, WINED3DFORMAT fmt, =
GLenum textype, UINT width, UINT height) {=0A=
+    GLenum shader;=0A=
+    IWineD3DDeviceImpl *device =3D (IWineD3DDeviceImpl *) iface;=0A=
+    float size[4] =3D {width, height, 1, 1};=0A=
+    struct arbfp_blit_priv *priv =3D (struct arbfp_blit_priv *) =
device->blit_priv;=0A=
+    const GlPixelFormatDesc *glDesc;=0A=
+=0A=
+    getFormatDescEntry(fmt, &GLINFO_LOCATION, &glDesc);=0A=
+=0A=
+    if(glDesc->conversion_group !=3D WINED3DFMT_YUY2 && =
glDesc->conversion_group !=3D WINED3DFMT_UYVY) {=0A=
+        /* Don't bother setting up a shader for unconverted formats */=0A=
+        glEnable(textype);=0A=
+        checkGLcall("glEnable(textype)");=0A=
+        return WINED3D_OK;=0A=
+    }=0A=
+=0A=
+    if(glDesc->conversion_group =3D=3D WINED3DFMT_YUY2) {=0A=
+        if(textype =3D=3D GL_TEXTURE_RECTANGLE_ARB) {=0A=
+            shader =3D priv->yuy2_rect_shader;=0A=
+        } else {=0A=
+            shader =3D priv->yuy2_2d_shader;=0A=
+        }=0A=
+    } else {=0A=
+        if(textype =3D=3D GL_TEXTURE_RECTANGLE_ARB) {=0A=
+            shader =3D priv->uyvy_rect_shader;=0A=
+        } else {=0A=
+            shader =3D priv->uyvy_2d_shader;=0A=
+        }=0A=
+    }=0A=
+=0A=
+    if(!shader) {=0A=
+        shader =3D gen_yuv_shader(device, glDesc->conversion_group, =
textype);=0A=
+    }=0A=
+=0A=
+    glEnable(GL_FRAGMENT_PROGRAM_ARB);=0A=
+    checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB)");=0A=
+    GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader));=0A=
+    checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)");=0A=
+    GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, =
0, size));=0A=
+    checkGLcall("glProgramLocalParameter4fvARB");=0A=
+=0A=
+    return WINED3D_OK;=0A=
+}=0A=
+=0A=
+static void arbfp_blit_unset(IWineD3DDevice *iface) {=0A=
+    IWineD3DDeviceImpl *device =3D (IWineD3DDeviceImpl *) iface;=0A=
+    glDisable(GL_FRAGMENT_PROGRAM_ARB);=0A=
+    checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)");=0A=
+    glDisable(GL_TEXTURE_2D);=0A=
+    checkGLcall("glDisable(GL_TEXTURE_2D)");=0A=
+    if(GL_SUPPORT(ARB_TEXTURE_CUBE_MAP)) {=0A=
+        glDisable(GL_TEXTURE_CUBE_MAP_ARB);=0A=
+        checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");=0A=
+    }=0A=
+    if(GL_SUPPORT(ARB_TEXTURE_RECTANGLE)) {=0A=
+        glDisable(GL_TEXTURE_RECTANGLE_ARB);=0A=
+        checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");=0A=
+    }=0A=
+}=0A=
+=0A=
+static BOOL arbfp_blit_conv_supported(WINED3DFORMAT fmt) {=0A=
+    TRACE("Checking blit format support for format %s:", =
debug_d3dformat(fmt));=0A=
+    switch(fmt) {=0A=
+        case WINED3DFMT_YUY2:=0A=
+        case WINED3DFMT_UYVY:=0A=
+            TRACE("[OK]\n");=0A=
+            return TRUE;=0A=
+        default:=0A=
+            TRACE("[FAILED]\n");=0A=
+            return FALSE;=0A=
+    }=0A=
+}=0A=
+=0A=
+const struct blit_shader arbfp_blit =3D {=0A=
+    arbfp_blit_alloc,=0A=
+    arbfp_blit_free,=0A=
+    arbfp_blit_set,=0A=
+    arbfp_blit_unset,=0A=
+    arbfp_blit_conv_supported=0A=
+};=0A=
+=0A=
+#undef GLINFO_LOCATION=0A=
diff --git a/dlls/wined3d/directx.c b/dlls/wined3d/directx.c=0A=
index f0cca69..bd7f68b 100644=0A=
--- a/dlls/wined3d/directx.c=0A=
+++ b/dlls/wined3d/directx.c=0A=
@@ -2953,8 +2953,11 @@ static const struct blit_shader =
*select_blit_implementation(UINT Adapter, WINED3=0A=
     int ps_selected_mode;=0A=
 =0A=
     select_shader_mode(&GLINFO_LOCATION, DeviceType, &ps_selected_mode, =
&vs_selected_mode);=0A=
-    return &ffp_blit;=0A=
-=0A=
+    if((ps_selected_mode =3D=3D SHADER_ARB || ps_selected_mode =3D=3D =
SHADER_GLSL) && GL_SUPPORT(ARB_FRAGMENT_PROGRAM)) {=0A=
+        return &arbfp_blit;=0A=
+    } else {=0A=
+        return &ffp_blit;=0A=
+    }=0A=
 }=0A=
 =0A=
 /* Note: d3d8 passes in a pointer to a D3DCAPS8 structure, which is a =
true=0A=
diff --git a/dlls/wined3d/wined3d_private.h =
b/dlls/wined3d/wined3d_private.h=0A=
index 0b8cca6..e4c6a41 100644=0A=
--- a/dlls/wined3d/wined3d_private.h=0A=
+++ b/dlls/wined3d/wined3d_private.h=0A=
@@ -587,6 +587,7 @@ struct blit_shader {=0A=
 };=0A=
 =0A=
 extern const struct blit_shader ffp_blit;=0A=
+extern const struct blit_shader arbfp_blit;=0A=
 =0A=
 /* The new context manager that should deal with onscreen and offscreen =
rendering */=0A=
 struct WineD3DContext {=0A=
-- =0A=
1.5.4.5=0A=
=0A=

------=_NextPart_000_0015_01C901F1.989FDED0--




More information about the wine-patches mailing list