[PATCH] WineD3D: Implement YV12 support for emulated overlays=0A=

Tue Aug 26 14:36:30 CDT 2008

=0A=
This is the prefered format of many codecs, and for some codecs=0A=
this is the only supported output format. As usual I try to=0A=
handle all the conversion in the GPU and keep the CPU involvement=0A=
minimal to gain the full performance of PBO transfers.=0A=
=0A=
Tests on Windows show that the pitch is equal to the width, so we=0A=
have to treat the surface as 8 bits per pixel. However, the=0A=
surface has 1.5 times the height due to the added U and V planes,=0A=
so we need a height(and memory size) scale factor.=0A=
---=0A=
 dlls/ddraw/utils.c                |    5 +=0A=
 dlls/wined3d/arb_program_shader.c |  342 =
++++++++++++++++++++++++++++---------=0A=
 dlls/wined3d/device.c             |    8 +-=0A=
 dlls/wined3d/surface.c            |    5 +=0A=
 dlls/wined3d/utils.c              |   14 ++-=0A=
 dlls/wined3d/wined3d_private.h    |    2 +-=0A=
 include/wine/wined3d_gl.h         |    1 +=0A=
 7 files changed, 291 insertions(+), 86 deletions(-)=0A=
=0A=

diff --git a/dlls/ddraw/utils.c b/dlls/ddraw/utils.c=0A=
index 5d96b36..937caa4 100644=0A=
--- a/dlls/ddraw/utils.c=0A=
+++ b/dlls/ddraw/utils.c=0A=
@@ -255,6 +255,11 @@ PixelFormat_WineD3DtoDD(DDPIXELFORMAT =
*DDPixelFormat,=0A=
             break;=0A=
 =0A=
         case WINED3DFMT_YV12:=0A=
+            DDPixelFormat->u1.dwYUVBitCount =3D 12;=0A=
+            DDPixelFormat->dwFlags =3D DDPF_FOURCC;=0A=
+            DDPixelFormat->dwFourCC =3D WineD3DFormat;=0A=
+            break;=0A=
+=0A=
         case WINED3DFMT_DXT1:=0A=
         case WINED3DFMT_DXT2:=0A=
         case WINED3DFMT_DXT3:=0A=
diff --git a/dlls/wined3d/arb_program_shader.c =
b/dlls/wined3d/arb_program_shader.c=0A=
index 2f18ddc..54c2a83 100644=0A=
--- a/dlls/wined3d/arb_program_shader.c=0A=
+++ b/dlls/wined3d/arb_program_shader.c=0A=
@@ -3142,6 +3142,7 @@ const struct fragment_pipeline =
arbfp_fragment_pipeline =3D {=0A=
 struct arbfp_blit_priv {=0A=
     GLenum yuy2_rect_shader, yuy2_2d_shader;=0A=
     GLenum uyvy_rect_shader, uyvy_2d_shader;=0A=
+    GLenum yv12_rect_shader, yv12_2d_shader;=0A=
 };=0A=
 =0A=
 static HRESULT arbfp_blit_alloc(IWineD3DDevice *iface) {=0A=
@@ -3162,23 +3163,23 @@ static void arbfp_blit_free(IWineD3DDevice =
*iface) {=0A=
     GL_EXTCALL(glDeleteProgramsARB(1, &priv->yuy2_2d_shader));=0A=
     GL_EXTCALL(glDeleteProgramsARB(1, &priv->uyvy_rect_shader));=0A=
     GL_EXTCALL(glDeleteProgramsARB(1, &priv->uyvy_2d_shader));=0A=
+    GL_EXTCALL(glDeleteProgramsARB(1, &priv->yv12_rect_shader));=0A=
+    GL_EXTCALL(glDeleteProgramsARB(1, &priv->yv12_2d_shader));=0A=
     checkGLcall("Delete yuv programs\n");=0A=
     LEAVE_GL();=0A=
 }=0A=
 =0A=
-GLenum gen_yuv_shader(IWineD3DDeviceImpl *device, WINED3DFORMAT fmt, =
GLenum textype) {=0A=
-    GLenum shader;=0A=
-    SHADER_BUFFER buffer;=0A=
+BOOL gen_planar_yuv_read(SHADER_BUFFER *buffer, WINED3DFORMAT fmt, =
GLenum textype, char *luminance) {=0A=
+    char chroma;=0A=
     const char *tex, *texinstr;=0A=
-    char chroma, luminance;=0A=
-    struct arbfp_blit_priv *priv =3D (struct arbfp_blit_priv *) =
device->blit_priv;=0A=
-=0A=
-    /* Shader header */=0A=
-    buffer.bsize =3D 0;=0A=
-    buffer.lineNo =3D 0;=0A=
-    buffer.newline =3D TRUE;=0A=
-    buffer.buffer =3D HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, =
SHADER_PGMSIZE);=0A=
 =0A=
+    if(fmt =3D=3D WINED3DFMT_UYVY) {=0A=
+        chroma =3D 'r';=0A=
+        *luminance =3D 'a';=0A=
+    } else {=0A=
+        chroma =3D 'a';=0A=
+        *luminance =3D 'r';=0A=
+    }=0A=
     switch(textype) {=0A=
         case GL_TEXTURE_2D:             tex =3D "2D";     texinstr =3D =
"TXP"; break;=0A=
         case GL_TEXTURE_RECTANGLE_ARB:  tex =3D "RECT";   texinstr =3D =
"TEX"; break;=0A=
@@ -3187,16 +3188,229 @@ GLenum gen_yuv_shader(IWineD3DDeviceImpl =
*device, WINED3DFORMAT fmt, GLenum text=0A=
              * properly in the texture to find the correct chroma values=0A=
              */=0A=
             FIXME("Implement yuv correction for non-2d, non-rect =
textures\n");=0A=
-            return 0;=0A=
+            return FALSE;=0A=
     }=0A=
 =0A=
-    if(fmt =3D=3D WINED3DFMT_UYVY) {=0A=
-        chroma =3D 'r';=0A=
-        luminance =3D 'a';=0A=
+    /* First we have to read the chroma values. This means we need at =
least two pixels(no filtering),=0A=
+     * or 4 pixels(with filtering). To get the unmodified chromas, we =
have to rid ourselves of the=0A=
+     * filtering when we sample the texture.=0A=
+     *=0A=
+     * These are the rules for reading the chroma:=0A=
+     *=0A=
+     * Even pixel: Cr=0A=
+     * Even pixel: U=0A=
+     * Odd pixel: V=0A=
+     *=0A=
+     * So we have to get the sampling x position in non-normalized =
coordinates in integers=0A=
+     */=0A=
+    if(textype !=3D GL_TEXTURE_RECTANGLE_ARB) {=0A=
+        shader_addline(buffer, "MUL texcrd.rg, fragment.texcoord[0], =
size.x;\n");=0A=
+        shader_addline(buffer, "MOV texcrd.a, size.x;\n");=0A=
     } else {=0A=
-        chroma =3D 'a';=0A=
-        luminance =3D 'r';=0A=
+        shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n");=0A=
+    }=0A=
+    /* We must not allow filtering between pixel x and x+1, this would =
mix U and V=0A=
+     * Vertical filtering is ok. However, bear in mind that the pixel =
center is at=0A=
+     * 0.5, so add 0.5.=0A=
+     */=0A=
+    shader_addline(buffer, "FLR texcrd.x, texcrd.x;\n");=0A=
+    shader_addline(buffer, "ADD texcrd.x, texcrd.x, coef.y;\n");=0A=
+=0A=
+    /* Divide the x coordinate by 0.5 and get the fraction. This gives =
0.25 and 0.75 for the=0A=
+     * even and odd pixels respectively=0A=
+     */=0A=
+    shader_addline(buffer, "MUL texcrd2, texcrd, coef.y;\n");=0A=
+    shader_addline(buffer, "FRC texcrd2, texcrd2;\n");=0A=
+=0A=
+    /* Sample Pixel 1 */=0A=
+    shader_addline(buffer, "%s luminance, texcrd, texture[0], %s;\n", =
texinstr, tex);=0A=
+=0A=
+    /* Put the value into either of the chroma values */=0A=
+    shader_addline(buffer, "SGE temp.x, texcrd2.x, coef.y;\n");=0A=
+    shader_addline(buffer, "MUL chroma.r, luminance.%c, temp.x;\n", =
chroma);=0A=
+    shader_addline(buffer, "SLT temp.x, texcrd2.x, coef.y;\n");=0A=
+    shader_addline(buffer, "MUL chroma.g, luminance.%c, temp.x;\n", =
chroma);=0A=
+=0A=
+    /* Sample pixel 2. If we read an even pixel(SLT above returned 1), =
sample=0A=
+     * the pixel right to the current one. Otherwise, sample the left =
pixel.=0A=
+     * Bias and scale the SLT result to -1;1 and add it to the texcrd.x.=0A=
+     */=0A=
+    shader_addline(buffer, "MAD temp.x, temp.x, coef.z, -coef.x;\n");=0A=
+    shader_addline(buffer, "ADD texcrd.x, texcrd, temp.x;\n");=0A=
+    shader_addline(buffer, "%s luminance, texcrd, texture[0], %s;\n", =
texinstr, tex);=0A=
+=0A=
+    /* Put the value into the other chroma */=0A=
+    shader_addline(buffer, "SGE temp.x, texcrd2.x, coef.y;\n");=0A=
+    shader_addline(buffer, "MAD chroma.g, luminance.%c, temp.x, =
chroma.g;\n", chroma);=0A=
+    shader_addline(buffer, "SLT temp.x, texcrd2.x, coef.y;\n");=0A=
+    shader_addline(buffer, "MAD chroma.r, luminance.%c, temp.x, =
chroma.r;\n", chroma);=0A=
+=0A=
+    /* TODO: If filtering is enabled, sample a 2nd pair of pixels left =
or right of=0A=
+     * the current one and lerp the two U and V values=0A=
+     */=0A=
+=0A=
+    /* This gives the correctly filtered luminance value */=0A=
+    shader_addline(buffer, "TEX luminance, fragment.texcoord[0], =
texture[0], %s;\n", tex);=0A=
+=0A=
+    return TRUE;=0A=
+}=0A=
+=0A=
+BOOL gen_yv12_read(SHADER_BUFFER *buffer, WINED3DFORMAT fmt, GLenum =
textype, char *luminance) {=0A=
+    const char *tex;=0A=
+=0A=
+    switch(textype) {=0A=
+        case GL_TEXTURE_2D:             tex =3D "2D";     break;=0A=
+        case GL_TEXTURE_RECTANGLE_ARB:  tex =3D "RECT";   break;=0A=
+        default:=0A=
+            FIXME("Implement yv12 correction for non-2d, non-rect =
textures\n");=0A=
+            return FALSE;=0A=
+    }=0A=
+=0A=
+    /* YV12 surfaces contain a WxH sized luminance plane, followed by a =
(W/2)x(H/2)=0A=
+     * V and a (W/2)x(H/2) U plane, each with 8 bit per pixel. So the =
effective=0A=
+     * bitdepth is 12 bits per pixel. Since the U and V planes have =
only half the=0A=
+     * pitch of the luminance plane, the packing into the gl texture is =
a bit=0A=
+     * unfortunate. If the whole texture is interpreted as luminance =
data it looks=0A=
+     * approximately like this:=0A=
+     *=0A=
+     *        +----------------------------------+----=0A=
+     *        |                                  |=0A=
+     *        |                                  |=0A=
+     *        |                                  |=0A=
+     *        |                                  |=0A=
+     *        |                                  |   2=0A=
+     *        |            LUMINANCE             |   -=0A=
+     *        |                                  |   3=0A=
+     *        |                                  |=0A=
+     *        |                                  |=0A=
+     *        |                                  |=0A=
+     *        |                                  |=0A=
+     *        +----------------+-----------------+----=0A=
+     *        |                |                 |=0A=
+     *        |  U even rows   |  U odd rows     |=0A=
+     *        |                |                 |   1=0A=
+     *        +----------------+------------------   -=0A=
+     *        |                |                 |   3=0A=
+     *        |  V even rows   |  V odd rows     |=0A=
+     *        |                |                 |=0A=
+     *        +----------------+-----------------+----=0A=
+     *        |                |                 |=0A=
+     *        |     0.5        |       0.5       |=0A=
+     *=0A=
+     * So it appears as if there are 4 chroma images, but in fact the =
odd rows=0A=
+     * in the chroma images are in the same row as the even ones. So =
its is=0A=
+     * kinda tricky to read=0A=
+     *=0A=
+     * When reading from rectangle textures, keep in mind that the =
input y coordinates=0A=
+     * go from 0 to d3d_height, whereas the opengl texture height is =
1.5 * d3d_height=0A=
+     */=0A=
+    shader_addline(buffer, "PARAM yv12_coef =3D {%f, %f, %f, %f};\n",=0A=
+                   2.0 / 3.0, 1.0 / 6.0, (2.0 / 3.0) + (1.0 / 6.0), 1.0 =
/ 3.0);=0A=
+=0A=
+    shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n");=0A=
+    /* the chroma planes have only half the width */=0A=
+    shader_addline(buffer, "MUL texcrd.x, texcrd.x, coef.y;\n");=0A=
+=0A=
+    /* The first value is between 2/3 and 5/6th of the texture's =
height, so scale+bias=0A=
+     * the coordinate. Also read the right side of the image when =
reading odd lines=0A=
+     *=0A=
+     * Don't forget to clamp the y values in into the range, otherwise =
we'll get filtering=0A=
+     * bleeding=0A=
+     */=0A=
+    if(textype =3D=3D GL_TEXTURE_2D) {=0A=
+=0A=
+        shader_addline(buffer, "RCP chroma.w, size.y;\n");=0A=
+=0A=
+        shader_addline(buffer, "MUL texcrd2.y, texcrd.y, size.y;\n");=0A=
+=0A=
+        shader_addline(buffer, "FLR texcrd2.y, texcrd2.y;\n");=0A=
+        shader_addline(buffer, "MAD texcrd.y, texcrd.y, yv12_coef.y, =
yv12_coef.x;\n");=0A=
+=0A=
+        /* Read odd lines from the right side(add size * 0.5 to the x =
coordinate */=0A=
+        shader_addline(buffer, "ADD texcrd2.x, texcrd2.y, =
yv12_coef.y;\n"); /* To avoid 0.5 =3D=3D 0.5 comparisons */=0A=
+        shader_addline(buffer, "FRC texcrd2.x, texcrd2.x;\n");=0A=
+        shader_addline(buffer, "SGE texcrd2.x, texcrd2.x, coef.y;\n");=0A=
+        shader_addline(buffer, "MAD texcrd.x, texcrd2.x, coef.y, =
texcrd.x;\n");=0A=
+=0A=
+        /* clamp, keep the half pixel origin in mind */=0A=
+        shader_addline(buffer, "MAD temp.y, coef.y, chroma.w, =
yv12_coef.x;\n");=0A=
+        shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n");=0A=
+        shader_addline(buffer, "MAD temp.y, -coef.y, chroma.w, =
yv12_coef.z;\n");=0A=
+        shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n");=0A=
+    } else {=0A=
+        /* Read from [size - size+size/4] */=0A=
+        shader_addline(buffer, "FLR texcrd.y, texcrd.y;\n");=0A=
+        shader_addline(buffer, "MAD texcrd.y, texcrd.y, coef.w, =
size.y;\n");=0A=
+=0A=
+        /* Read odd lines from the right side(add size * 0.5 to the x =
coordinate */=0A=
+        shader_addline(buffer, "ADD texcrd2.x, texcrd.y, =
yv12_coef.y;\n"); /* To avoid 0.5 =3D=3D 0.5 comparisons */=0A=
+        shader_addline(buffer, "FRC texcrd2.x, texcrd2.x;\n");=0A=
+        shader_addline(buffer, "SGE texcrd2.x, texcrd2.x, coef.y;\n");=0A=
+        shader_addline(buffer, "MUL texcrd2.x, texcrd2.x, size.x;\n");=0A=
+        shader_addline(buffer, "MAD texcrd.x, texcrd2.x, coef.y, =
texcrd.x;\n");=0A=
+=0A=
+        /* Make sure to read exactly from the pixel center */=0A=
+        shader_addline(buffer, "FLR texcrd.y, texcrd.y;\n");=0A=
+        shader_addline(buffer, "ADD texcrd.y, texcrd.y, coef.y;\n");=0A=
+=0A=
+        /* Clamp */=0A=
+        shader_addline(buffer, "MAD temp.y, size.y, coef.w, size.y;\n");=0A=
+        shader_addline(buffer, "ADD temp.y, temp.y, -coef.y;\n");=0A=
+        shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n");=0A=
+        shader_addline(buffer, "ADD temp.y, size.y, -coef.y;\n");=0A=
+        shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n");=0A=
+    }=0A=
+    /* Read the texture, put the result into the output register */=0A=
+    shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex);=0A=
+    shader_addline(buffer, "MOV chroma.r, temp.a;\n");=0A=
+=0A=
+    /* The other chroma value is 1/6th of the texture lower, from 5/6th =
to 6/6th=0A=
+     * No need to clamp because we're just reusing the already clamped =
value from above=0A=
+     */=0A=
+    if(textype =3D=3D GL_TEXTURE_2D) {=0A=
+        shader_addline(buffer, "ADD texcrd.y, texcrd.y, =
yv12_coef.y;\n");=0A=
+    } else {=0A=
+        shader_addline(buffer, "MAD texcrd.y, size.y, coef.w, =
texcrd.y;\n");=0A=
+    }=0A=
+    shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex);=0A=
+    shader_addline(buffer, "MOV chroma.g, temp.a;\n");=0A=
+=0A=
+    /* Sample the luminance value. It is in the top 2/3rd of the =
texture, so scale the y coordinate.=0A=
+     * Clamp the y coordinate to prevent the chroma values from =
bleeding into the sampled luminance=0A=
+     * values due to filtering=0A=
+     */=0A=
+    shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n");=0A=
+    if(textype =3D=3D GL_TEXTURE_2D) {=0A=
+        /* Multiply the y coordinate by 2/3 and clamp it */=0A=
+        shader_addline(buffer, "MUL texcrd.y, texcrd.y, =
yv12_coef.x;\n");=0A=
+        shader_addline(buffer, "MAD temp.y, -coef.y, chroma.w, =
yv12_coef.x;\n");=0A=
+        shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n");=0A=
+        shader_addline(buffer, "TEX luminance, texcrd, texture[0], =
%s;\n", tex);=0A=
+    } else {=0A=
+        /* Reading from texture_rectangles is pretty streightforward, =
just use the unmodified=0A=
+         * texture coordinate. It is still a good idea to clamp it =
though, since the opengl texture=0A=
+         * is bigger=0A=
+         */=0A=
+        shader_addline(buffer, "ADD temp.x, size.y, -coef.y;\n");=0A=
+        shader_addline(buffer, "MIN texcrd.y, texcrd.y, size.x;\n");=0A=
+        shader_addline(buffer, "TEX luminance, texcrd, texture[0], =
%s;\n", tex);=0A=
     }=0A=
+    *luminance =3D 'a';=0A=
+=0A=
+    return TRUE;=0A=
+}=0A=
+=0A=
+GLuint gen_yuv_shader(IWineD3DDeviceImpl *device, WINED3DFORMAT fmt, =
GLenum textype) {=0A=
+    GLenum shader;=0A=
+    SHADER_BUFFER buffer;=0A=
+    char luminance_component;=0A=
+    struct arbfp_blit_priv *priv =3D (struct arbfp_blit_priv *) =
device->blit_priv;=0A=
+=0A=
+    /* Shader header */=0A=
+    buffer.bsize =3D 0;=0A=
+    buffer.lineNo =3D 0;=0A=
+    buffer.newline =3D TRUE;=0A=
+    buffer.buffer =3D HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, =
SHADER_PGMSIZE);=0A=
 =0A=
     GL_EXTCALL(glGenProgramsARB(1, &shader));=0A=
     checkGLcall("GL_EXTCALL(glGenProgramsARB(1, &shader))");=0A=
@@ -3245,70 +3459,19 @@ GLenum gen_yuv_shader(IWineD3DDeviceImpl =
*device, WINED3DFORMAT fmt, GLenum text=0A=
     shader_addline(&buffer, "TEMP chroma;\n");=0A=
     shader_addline(&buffer, "TEMP texcrd;\n");=0A=
     shader_addline(&buffer, "TEMP texcrd2;\n");=0A=
-    shader_addline(&buffer, "PARAM coef =3D {1.0, 0.5, 2.0, 0.0};\n");=0A=
+    shader_addline(&buffer, "PARAM coef =3D {1.0, 0.5, 2.0, 0.25};\n");=0A=
     shader_addline(&buffer, "PARAM yuv_coef =3D {1.403, 0.344, 0.714, =
1.770};\n");=0A=
     shader_addline(&buffer, "PARAM size =3D program.local[0];\n");=0A=
 =0A=
-    /* First we have to read the chroma values. This means we need at =
least two pixels(no filtering),=0A=
-     * or 4 pixels(with filtering). To get the unmodified chromas, we =
have to rid ourselves of the=0A=
-     * filtering when we sample the texture.=0A=
-     *=0A=
-     * These are the rules for reading the chroma:=0A=
-     *=0A=
-     * Even pixel: Cr=0A=
-     * Even pixel: U=0A=
-     * Odd pixel: V=0A=
-     *=0A=
-     * So we have to get the sampling x position in non-normalized =
coordinates in integers=0A=
-     */=0A=
-    if(textype !=3D GL_TEXTURE_RECTANGLE_ARB) {=0A=
-        shader_addline(&buffer, "MUL texcrd.rg, fragment.texcoord[0], =
size.x;\n");=0A=
-        shader_addline(&buffer, "MOV texcrd.a, size.x;\n");=0A=
+    if(fmt =3D=3D WINED3DFMT_UYVY || fmt =3D=3DWINED3DFMT_YUY2) {=0A=
+        if(gen_planar_yuv_read(&buffer, fmt, textype, =
&luminance_component) =3D=3D FALSE) {=0A=
+            return 0;=0A=
+        }=0A=
     } else {=0A=
-        shader_addline(&buffer, "MOV texcrd, fragment.texcoord[0];\n");=0A=
+        if(gen_yv12_read(&buffer, fmt, textype, &luminance_component) =
=3D=3D FALSE) {=0A=
+            return 0;=0A=
+        }=0A=
     }=0A=
-    /* We must not allow filtering between pixel x and x+1, this would =
mix U and V=0A=
-     * Vertical filtering is ok. However, bear in mind that the pixel =
center is at=0A=
-     * 0.5, so add 0.5.=0A=
-     */=0A=
-    shader_addline(&buffer, "FLR texcrd.x, texcrd.x;\n");=0A=
-    shader_addline(&buffer, "ADD texcrd.x, texcrd.x, coef.y;\n");=0A=
-=0A=
-    /* Divide the x coordinate by 0.5 and get the fraction. This gives =
0.25 and 0.75 for the=0A=
-     * even and odd pixels respectively=0A=
-     */=0A=
-    shader_addline(&buffer, "MUL texcrd2, texcrd, coef.y;\n");=0A=
-    shader_addline(&buffer, "FRC texcrd2, texcrd2;\n");=0A=
-=0A=
-    /* Sample Pixel 1 */=0A=
-    shader_addline(&buffer, "%s luminance, texcrd, texture[0], %s;\n", =
texinstr, tex);=0A=
-=0A=
-    /* Put the value into either of the chroma values */=0A=
-    shader_addline(&buffer, "SGE temp.x, texcrd2.x, coef.y;\n");=0A=
-    shader_addline(&buffer, "MUL chroma.r, luminance.%c, temp.x;\n", =
chroma);=0A=
-    shader_addline(&buffer, "SLT temp.x, texcrd2.x, coef.y;\n");=0A=
-    shader_addline(&buffer, "MUL chroma.g, luminance.%c, temp.x;\n", =
chroma);=0A=
-=0A=
-    /* Sample pixel 2. If we read an even pixel(SLT above returned 1), =
sample=0A=
-     * the pixel right to the current one. Otherwise, sample the left =
pixel.=0A=
-     * Bias and scale the SLT result to -1;1 and add it to the texcrd.x.=0A=
-     */=0A=
-    shader_addline(&buffer, "MAD temp.x, temp.x, coef.z, -coef.x;\n");=0A=
-    shader_addline(&buffer, "ADD texcrd.x, texcrd, temp.x;\n");=0A=
-    shader_addline(&buffer, "%s luminance, texcrd, texture[0], %s;\n", =
texinstr, tex);=0A=
-=0A=
-    /* Put the value into the other chroma */=0A=
-    shader_addline(&buffer, "SGE temp.x, texcrd2.x, coef.y;\n");=0A=
-    shader_addline(&buffer, "MAD chroma.g, luminance.%c, temp.x, =
chroma.g;\n", chroma);=0A=
-    shader_addline(&buffer, "SLT temp.x, texcrd2.x, coef.y;\n");=0A=
-    shader_addline(&buffer, "MAD chroma.r, luminance.%c, temp.x, =
chroma.r;\n", chroma);=0A=
-=0A=
-    /* TODO: If filtering is enabled, sample a 2nd pair of pixels left =
or right of=0A=
-     * the current one and lerp the two U and V values=0A=
-     */=0A=
-=0A=
-    /* This gives the correctly filtered luminance value */=0A=
-    shader_addline(&buffer, "TEX luminance, fragment.texcoord[0], =
texture[0], %s;\n", tex);=0A=
 =0A=
     /* Calculate the final result. Formula is taken from=0A=
      * http://www.fourcc.org/fccyvrgb.php. Note that the chroma=0A=
@@ -3316,10 +3479,10 @@ GLenum gen_yuv_shader(IWineD3DDeviceImpl =
*device, WINED3DFORMAT fmt, GLenum text=0A=
      */=0A=
     shader_addline(&buffer, "SUB chroma.rg, chroma, coef.y;\n");=0A=
 =0A=
-    shader_addline(&buffer, "MAD result.color.r, chroma.r, yuv_coef.x, =
luminance.%c;\n", luminance);=0A=
-    shader_addline(&buffer, "MAD temp.r, -chroma.g, yuv_coef.y, =
luminance.%c;\n", luminance);=0A=
+    shader_addline(&buffer, "MAD result.color.r, chroma.r, yuv_coef.x, =
luminance.%c;\n", luminance_component);=0A=
+    shader_addline(&buffer, "MAD temp.r, -chroma.g, yuv_coef.y, =
luminance.%c;\n", luminance_component);=0A=
     shader_addline(&buffer, "MAD result.color.g, -chroma.r, yuv_coef.z, =
temp.r;\n");=0A=
-    shader_addline(&buffer, "MAD result.color.b, chroma.g, yuv_coef.w, =
luminance.%c;\n", luminance);=0A=
+    shader_addline(&buffer, "MAD result.color.b, chroma.g, yuv_coef.w, =
luminance.%c;\n", luminance_component);=0A=
     shader_addline(&buffer, "END\n");=0A=
 =0A=
     GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, =
GL_PROGRAM_FORMAT_ASCII_ARB, strlen(buffer.buffer), buffer.buffer));=0A=
@@ -3337,12 +3500,18 @@ GLenum gen_yuv_shader(IWineD3DDeviceImpl =
*device, WINED3DFORMAT fmt, GLenum text=0A=
         } else {=0A=
             priv->yuy2_2d_shader =3D shader;=0A=
         }=0A=
-    } else {=0A=
+    } else if(fmt =3D=3D WINED3DFMT_UYVY) {=0A=
         if(textype =3D=3D GL_TEXTURE_RECTANGLE_ARB) {=0A=
             priv->uyvy_rect_shader =3D shader;=0A=
         } else {=0A=
             priv->uyvy_2d_shader =3D shader;=0A=
         }=0A=
+    } else {=0A=
+        if(textype =3D=3D GL_TEXTURE_RECTANGLE_ARB) {=0A=
+            priv->yv12_rect_shader =3D shader;=0A=
+        } else {=0A=
+            priv->yv12_2d_shader =3D shader;=0A=
+        }=0A=
     }=0A=
     return shader;=0A=
 }=0A=
@@ -3356,7 +3525,9 @@ static HRESULT arbfp_blit_set(IWineD3DDevice =
*iface, WINED3DFORMAT fmt, GLenum t=0A=
 =0A=
     getFormatDescEntry(fmt, &GLINFO_LOCATION, &glDesc);=0A=
 =0A=
-    if(glDesc->conversion_group !=3D WINED3DFMT_YUY2 && =
glDesc->conversion_group !=3D WINED3DFMT_UYVY) {=0A=
+    if(glDesc->conversion_group !=3D WINED3DFMT_YUY2 && =
glDesc->conversion_group !=3D WINED3DFMT_UYVY &&=0A=
+       glDesc->conversion_group !=3D WINED3DFMT_YV12) {=0A=
+        ERR("Format: %s\n", debug_d3dformat(glDesc->conversion_group));=0A=
         /* Don't bother setting up a shader for unconverted formats */=0A=
         glEnable(textype);=0A=
         checkGLcall("glEnable(textype)");=0A=
@@ -3369,12 +3540,18 @@ static HRESULT arbfp_blit_set(IWineD3DDevice =
*iface, WINED3DFORMAT fmt, GLenum t=0A=
         } else {=0A=
             shader =3D priv->yuy2_2d_shader;=0A=
         }=0A=
-    } else {=0A=
+    } else if(glDesc->conversion_group =3D=3D WINED3DFMT_UYVY) {=0A=
         if(textype =3D=3D GL_TEXTURE_RECTANGLE_ARB) {=0A=
             shader =3D priv->uyvy_rect_shader;=0A=
         } else {=0A=
             shader =3D priv->uyvy_2d_shader;=0A=
         }=0A=
+    } else {=0A=
+        if(textype =3D=3D GL_TEXTURE_RECTANGLE_ARB) {=0A=
+            shader =3D priv->yv12_rect_shader;=0A=
+        } else {=0A=
+            shader =3D priv->yv12_2d_shader;=0A=
+        }=0A=
     }=0A=
 =0A=
     if(!shader) {=0A=
@@ -3412,6 +3589,7 @@ static BOOL =
arbfp_blit_conv_supported(WINED3DFORMAT fmt) {=0A=
     switch(fmt) {=0A=
         case WINED3DFMT_YUY2:=0A=
         case WINED3DFMT_UYVY:=0A=
+        case WINED3DFMT_YV12:=0A=
             TRACE("[OK]\n");=0A=
             return TRUE;=0A=
         default:=0A=
diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c=0A=
index e199ad1..73e4242 100644=0A=
--- a/dlls/wined3d/device.c=0A=
+++ b/dlls/wined3d/device.c=0A=
@@ -586,7 +586,8 @@ static HRESULT  WINAPI =
IWineD3DDeviceImpl_CreateSurface(IWineD3DDevice *iface, U=0A=
     IWineD3DDeviceImpl  *This =3D (IWineD3DDeviceImpl *)iface;    =0A=
     IWineD3DSurfaceImpl *object; /*NOTE: impl ref allowed since this is =
a create function */=0A=
     unsigned int Size       =3D 1;=0A=
-    const StaticPixelFormatDesc *tableEntry =3D =
getFormatDescEntry(Format, NULL, NULL);=0A=
+    const GlPixelFormatDesc *glDesc;=0A=
+    const StaticPixelFormatDesc *tableEntry =3D =
getFormatDescEntry(Format, &GLINFO_LOCATION, &glDesc);=0A=
     TRACE("(%p) Create surface\n",This);=0A=
     =0A=
     /** FIXME: Check ranges on the inputs are valid =0A=
@@ -637,9 +638,11 @@ static HRESULT  WINAPI =
IWineD3DDeviceImpl_CreateSurface(IWineD3DDevice *iface, U=0A=
     } else {=0A=
        /* The pitch is a multiple of 4 bytes */=0A=
         Size =3D ((Width * tableEntry->bpp) + This->surface_alignment - =
1) & ~(This->surface_alignment - 1);=0A=
-       Size *=3D Height;=0A=
+        Size *=3D Height;=0A=
     }=0A=
 =0A=
+    if(glDesc->heightscale !=3D 0.0) Size *=3D glDesc->heightscale;=0A=
+=0A=
     /** Create and initialise the surface resource **/=0A=
     =
D3DCREATERESOURCEOBJECTINSTANCE(object,Surface,WINED3DRTYPE_SURFACE, =
Size)=0A=
     /* "Standalone" surface */=0A=
@@ -650,6 +653,7 @@ static HRESULT  WINAPI =
IWineD3DDeviceImpl_CreateSurface(IWineD3DDevice *iface, U=0A=
     object->currentDesc.MultiSampleType    =3D MultiSample;=0A=
     object->currentDesc.MultiSampleQuality =3D MultisampleQuality;=0A=
     object->glDescription.level            =3D Level;=0A=
+    object->heightscale                    =3D glDesc->heightscale !=3D =
0.0 ? glDesc->heightscale : 1.0;=0A=
     list_init(&object->overlays);=0A=
 =0A=
     /* Flags */=0A=
diff --git a/dlls/wined3d/surface.c b/dlls/wined3d/surface.c=0A=
index a988121..7870d4b 100644=0A=
--- a/dlls/wined3d/surface.c=0A=
+++ b/dlls/wined3d/surface.c=0A=
@@ -231,6 +231,9 @@ static void =
surface_download_data(IWineD3DSurfaceImpl *This) {=0A=
 /* This call just uploads data, the caller is responsible for =
activating the=0A=
  * right context and binding the correct texture. */=0A=
 static void surface_upload_data(IWineD3DSurfaceImpl *This, GLenum =
internal, GLsizei width, GLsizei height, GLenum format, GLenum type, =
const GLvoid *data) {=0A=
+=0A=
+    if(This->heightscale !=3D 1.0 && This->heightscale !=3D 0.0) height =
*=3D This->heightscale;=0A=
+=0A=
     if (This->resource.format =3D=3D WINED3DFMT_DXT1 ||=0A=
             This->resource.format =3D=3D WINED3DFMT_DXT2 || =
This->resource.format =3D=3D WINED3DFMT_DXT3 ||=0A=
             This->resource.format =3D=3D WINED3DFMT_DXT4 || =
This->resource.format =3D=3D WINED3DFMT_DXT5 ||=0A=
@@ -293,6 +296,8 @@ static void =
surface_allocate_surface(IWineD3DSurfaceImpl *This, GLenum internal,=0A=
     BOOL enable_client_storage =3D FALSE;=0A=
     BYTE *mem =3D NULL;=0A=
 =0A=
+    if(This->heightscale !=3D 1.0 && This->heightscale !=3D 0.0) height =
*=3D This->heightscale;=0A=
+=0A=
     TRACE("(%p) : Creating surface (target %#x)  level %d, d3d format =
%s, internal format %#x, width %d, height %d, gl format %#x, gl =
type=3D%#x\n", This,=0A=
             This->glDescription.target, This->glDescription.level, =
debug_d3dformat(This->resource.format), internal, width, height, format, =
type);=0A=
 =0A=
diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c=0A=
index 31a1944..57e2b37 100644=0A=
--- a/dlls/wined3d/utils.c=0A=
+++ b/dlls/wined3d/utils.c=0A=
@@ -44,7 +44,7 @@ static const StaticPixelFormatDesc formats[] =3D {=0A=
     /* FourCC formats, kept here to have WINED3DFMT_R8G8B8(=3D20) at =
position 20 */=0A=
     {WINED3DFMT_UYVY        ,0x0        ,0x0        ,0x0        ,0x0    =
    ,2      ,0      ,0          ,TRUE  },=0A=
     {WINED3DFMT_YUY2        ,0x0        ,0x0        ,0x0        ,0x0    =
    ,2      ,0      ,0          ,TRUE  },=0A=
-    {WINED3DFMT_YV12        ,0x0        ,0x0        ,0x0        ,0x0    =
    ,2      ,0      ,0          ,TRUE  },=0A=
+    {WINED3DFMT_YV12        ,0x0        ,0x0        ,0x0        ,0x0    =
    ,1      ,0      ,0          ,TRUE  },=0A=
     {WINED3DFMT_DXT1        ,0x0        ,0x0        ,0x0        ,0x0    =
    ,1      ,0      ,0          ,TRUE  },=0A=
     {WINED3DFMT_DXT2        ,0x0        ,0x0        ,0x0        ,0x0    =
    ,1      ,0      ,0          ,TRUE  },=0A=
     {WINED3DFMT_DXT3        ,0x0        ,0x0        ,0x0        ,0x0    =
    ,1      ,0      ,0          ,TRUE  },=0A=
@@ -146,6 +146,8 @@ static const GlPixelFormatDescTemplate =
gl_formats_template[] =3D {=0A=
         ,WINED3DFMT_FLAG_FILTERING },=0A=
     {WINED3DFMT_YUY2           ,GL_RGB                           =
,GL_RGB                                 , 0,           =
GL_YCBCR_422_APPLE        ,UNSIGNED_SHORT_8_8_REV_APPLE=0A=
         ,WINED3DFMT_FLAG_FILTERING },=0A=
+    {WINED3DFMT_YV12           ,GL_ALPHA                         =
,GL_ALPHA                               , 0,           GL_ALPHA          =
        ,GL_UNSIGNED_BYTE=0A=
+        ,WINED3DFMT_FLAG_FILTERING },=0A=
     {WINED3DFMT_DXT1           ,GL_COMPRESSED_RGBA_S3TC_DXT1_EXT =
,GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT , 0,           GL_RGBA           =
        ,GL_UNSIGNED_BYTE=0A=
         ,WINED3DFMT_FLAG_FILTERING },=0A=
     {WINED3DFMT_DXT2           ,GL_COMPRESSED_RGBA_S3TC_DXT3_EXT =
,GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT , 0,           GL_RGBA           =
        ,GL_UNSIGNED_BYTE=0A=
@@ -273,6 +275,10 @@ static const GlPixelFormatDescTemplate =
gl_formats_template[] =3D {=0A=
         ,0 },=0A=
     /* Vendor-specific formats */=0A=
     {WINED3DFMT_ATI2N          ,0                                ,0     =
                                 , 0,           GL_LUMINANCE_ALPHA       =
 ,GL_UNSIGNED_BYTE=0A=
+        ,0 },=0A=
+    {WINED3DFMT_NVHU           ,0                                ,0     =
                                 , 0,           GL_LUMINANCE_ALPHA       =
 ,GL_UNSIGNED_BYTE=0A=
+        ,0 },=0A=
+    {WINED3DFMT_NVHS           ,0                                ,0     =
                                 , 0,           GL_LUMINANCE_ALPHA       =
 ,GL_UNSIGNED_BYTE=0A=
         ,0 }=0A=
 };=0A=
 =0A=
@@ -314,6 +320,7 @@ BOOL initPixelFormats(WineD3D_GL_Info *gl_info)=0A=
         gl_info->gl_formats[dst].glType          =3D =
gl_formats_template[src].glType;=0A=
         gl_info->gl_formats[dst].conversion_group=3D WINED3DFMT_UNKNOWN;=0A=
         gl_info->gl_formats[dst].Flags           =3D =
gl_formats_template[src].Flags;=0A=
+        gl_info->gl_formats[dst].heightscale     =3D 1.0;=0A=
 =0A=
         if(wined3d_settings.offscreen_rendering_mode =3D=3D ORM_FBO &&=0A=
            gl_formats_template[src].rtInternal !=3D 0) {=0A=
@@ -430,6 +437,11 @@ BOOL initPixelFormats(WineD3D_GL_Info *gl_info)=0A=
         gl_info->gl_formats[dst].glType =3D GL_UNSIGNED_BYTE;=0A=
         gl_info->gl_formats[dst].conversion_group =3D WINED3DFMT_UYVY;=0A=
     }=0A=
+=0A=
+    dst =3D getFmtIdx(WINED3DFMT_YV12);=0A=
+    gl_info->gl_formats[dst].heightscale =3D 1.5;=0A=
+    gl_info->gl_formats[dst].conversion_group =3D WINED3DFMT_YV12;=0A=
+=0A=
     return TRUE;=0A=
 }=0A=
 =0A=
diff --git a/dlls/wined3d/wined3d_private.h =
b/dlls/wined3d/wined3d_private.h=0A=
index f4b81f7..ebe0f7f 100644=0A=
--- a/dlls/wined3d/wined3d_private.h=0A=
+++ b/dlls/wined3d/wined3d_private.h=0A=
@@ -703,7 +703,6 @@ typedef struct WineD3D_PixelFormat=0A=
 } WineD3D_PixelFormat;=0A=
 =0A=
 /* The adapter structure */=0A=
-typedef struct GLPixelFormatDesc GLPixelFormatDesc;=0A=
 struct WineD3DAdapter=0A=
 {=0A=
     UINT                    num;=0A=
@@ -1312,6 +1311,7 @@ struct IWineD3DSurfaceImpl=0A=
 =0A=
     UINT                      pow2Width;=0A=
     UINT                      pow2Height;=0A=
+    float                     heightscale;=0A=
 =0A=
     /* A method to retrieve the drawable size. Not in the Vtable to =
make it changeable */=0A=
     void (*get_drawable_size)(IWineD3DSurfaceImpl *This, UINT *width, =
UINT *height);=0A=
diff --git a/include/wine/wined3d_gl.h b/include/wine/wined3d_gl.h=0A=
index 4c26c9b..3101c17 100644=0A=
--- a/include/wine/wined3d_gl.h=0A=
+++ b/include/wine/wined3d_gl.h=0A=
@@ -3813,6 +3813,7 @@ typedef struct {=0A=
     GLint                   glInternal, glGammaInternal, rtInternal, =
glFormat, glType;=0A=
     WINED3DFORMAT           conversion_group;=0A=
     unsigned int            Flags;=0A=
+    float                   heightscale;=0A=
 } GlPixelFormatDesc;=0A=
 =0A=
 typedef struct _WINED3DGLTYPE {=0A=
-- =0A=
1.5.6.4=0A=
=0A=

------=_NextPart_000_0001_01C909E0.B2FBA3D0--