[PATCHv4 1/3] wined3d: Add support for NV12 textures
Martin Storsjo
martin at martin.st
Thu Feb 13 06:05:57 CST 2014
This fixes video playback with the sample app from OpenH264.
---
No changes since yesterday on this one.
---
dlls/wined3d/arb_program_shader.c | 170 ++++++++++++++++++++++++++++++++++++++
dlls/wined3d/utils.c | 13 +++
dlls/wined3d/wined3d_private.h | 1 +
include/wine/wined3d.h | 1 +
4 files changed, 185 insertions(+)
diff --git a/dlls/wined3d/arb_program_shader.c b/dlls/wined3d/arb_program_shader.c
index 797e349..93a6e31 100644
--- a/dlls/wined3d/arb_program_shader.c
+++ b/dlls/wined3d/arb_program_shader.c
@@ -6772,6 +6772,7 @@ struct arbfp_blit_priv {
GLenum yuy2_rect_shader, yuy2_2d_shader;
GLenum uyvy_rect_shader, uyvy_2d_shader;
GLenum yv12_rect_shader, yv12_2d_shader;
+ GLenum nv12_rect_shader, nv12_2d_shader;
GLenum p8_rect_shader, p8_2d_shader;
GLuint palette_texture;
};
@@ -6798,6 +6799,8 @@ static void arbfp_blit_free(struct wined3d_device *device)
GL_EXTCALL(glDeleteProgramsARB(1, &priv->uyvy_2d_shader));
GL_EXTCALL(glDeleteProgramsARB(1, &priv->yv12_rect_shader));
GL_EXTCALL(glDeleteProgramsARB(1, &priv->yv12_2d_shader));
+ GL_EXTCALL(glDeleteProgramsARB(1, &priv->nv12_rect_shader));
+ GL_EXTCALL(glDeleteProgramsARB(1, &priv->nv12_2d_shader));
GL_EXTCALL(glDeleteProgramsARB(1, &priv->p8_rect_shader));
GL_EXTCALL(glDeleteProgramsARB(1, &priv->p8_2d_shader));
checkGLcall("Delete yuv and p8 programs");
@@ -7046,6 +7049,153 @@ static BOOL gen_yv12_read(struct wined3d_shader_buffer *buffer, GLenum textype,
return TRUE;
}
+static BOOL gen_nv12_read(struct wined3d_shader_buffer *buffer, GLenum textype,
+ char *luminance)
+{
+ const char *tex;
+ static const float nv12_coef[]
+ = {2.0f / 3.0f, 1.0f / 3.0f, 1.0f, 1.0f};
+
+ switch (textype)
+ {
+ case GL_TEXTURE_2D:
+ tex = "2D";
+ break;
+ case GL_TEXTURE_RECTANGLE_ARB:
+ tex = "RECT";
+ break;
+ default:
+ FIXME("Implement nv12 correction for non-2d, non-rect textures\n");
+ return FALSE;
+ }
+
+ /* NV12 surfaces contain a WxH sized luminance plane, followed by a (W/2)x(H/2)
+ * sized plane where each component is an UV pair. So the effective
+ * bitdepth is 12 bits per pixel If the whole texture is interpreted as luminance
+ * data it looks approximately like this:
+ *
+ * +----------------------------------+----
+ * | |
+ * | |
+ * | |
+ * | |
+ * | | 2
+ * | LUMINANCE | -
+ * | | 3
+ * | |
+ * | |
+ * | |
+ * | |
+ * +----------------------------------+----
+ * |UVUVUVUVUVUVUVUVUVUVUVUVUVUVUVUVUV|
+ * |UVUVUVUVUVUVUVUVUVUVUVUVUVUVUVUVUV|
+ * | | 1
+ * | | -
+ * | | 3
+ * | |
+ * | |
+ * +----------------------------------+----
+ *
+ * When reading from rectangle textures, keep in mind that the input y coordinates
+ * go from 0 to d3d_height, whereas the opengl texture height is 1.5 * d3d_height. */
+
+ shader_addline(buffer, "PARAM nv12_coef = ");
+ shader_arb_append_imm_vec4(buffer, nv12_coef);
+ shader_addline(buffer, ";\n");
+
+ shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n");
+ /* We only have half the number of chroma pixels. */
+ shader_addline(buffer, "MUL texcrd.x, texcrd.x, coef.y;\n");
+
+ if (textype == GL_TEXTURE_2D)
+ {
+ shader_addline(buffer, "RCP chroma.w, size.x;\n");
+ shader_addline(buffer, "RCP chroma.z, size.y;\n");
+
+ shader_addline(buffer, "MAD texcrd.y, texcrd.y, nv12_coef.y, nv12_coef.x;\n");
+
+ /* We must not allow filtering horizontally, this would mix U and V.
+ * Vertical filtering is ok. However, bear in mind that the pixel center is at
+ * 0.5, so add 0.5. */
+
+ /* Convert to non-normalized coordinates so we can find the
+ * individual pixel. */
+ shader_addline(buffer, "MUL texcrd.x, texcrd.x, size.x;\n");
+ shader_addline(buffer, "FLR texcrd.x, texcrd.x;\n");
+ /* Multiply by 2 since chroma components are stored in UV pixel pairs,
+ * add 0.5 to hit the center of the pixel. */
+ shader_addline(buffer, "MAD texcrd.x, texcrd.x, coef.z, coef.y;\n");
+
+ /* Convert back to normalized coordinates. */
+ shader_addline(buffer, "MUL texcrd.x, texcrd.x, chroma.w;\n");
+
+ /* Clamp, keep the half pixel origin in mind. */
+ shader_addline(buffer, "MAD temp.y, coef.y, chroma.z, nv12_coef.x;\n");
+ shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n");
+ shader_addline(buffer, "MAD temp.y, -coef.y, chroma.z, nv12_coef.z;\n");
+ shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n");
+ }
+ else
+ {
+ /* Read from [size - size+size/2] */
+ shader_addline(buffer, "MAD texcrd.y, texcrd.y, coef.y, size.y;\n");
+
+ shader_addline(buffer, "FLR texcrd.x, texcrd.x;\n");
+ /* Multiply by 2 since chroma components are stored in UV pixel pairs,
+ * add 0.5 to hit the center of the pixel. */
+ shader_addline(buffer, "MAD texcrd.x, texcrd.x, coef.z, coef.y;\n");
+
+ /* Clamp */
+ shader_addline(buffer, "MAD temp.y, size.y, coef.y, size.y;\n");
+ shader_addline(buffer, "ADD temp.y, temp.y, -coef.y;\n");
+ shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n");
+ shader_addline(buffer, "ADD temp.y, size.y, coef.y;\n");
+ shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n");
+ }
+ /* Read the texture, put the result into the output register. */
+ shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex);
+ shader_addline(buffer, "MOV chroma.y, temp.w;\n");
+
+ if (textype == GL_TEXTURE_2D)
+ {
+ /* Add 1/size.x */
+ shader_addline(buffer, "ADD texcrd.x, texcrd.x, chroma.w;\n");
+ }
+ else
+ {
+ /* Add 1 */
+ shader_addline(buffer, "ADD texcrd.x, texcrd.x, coef.x;\n");
+ }
+ shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex);
+ shader_addline(buffer, "MOV chroma.x, temp.w;\n");
+
+ /* Sample the luminance value. It is in the top 2/3rd of the texture, so scale the y coordinate.
+ * Clamp the y coordinate to prevent the chroma values from bleeding into the sampled luminance
+ * values due to filtering. */
+ shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n");
+ if (textype == GL_TEXTURE_2D)
+ {
+ /* Multiply the y coordinate by 2/3 and clamp it */
+ shader_addline(buffer, "MUL texcrd.y, texcrd.y, nv12_coef.x;\n");
+ shader_addline(buffer, "MAD temp.y, -coef.y, chroma.w, nv12_coef.x;\n");
+ shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n");
+ shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex);
+ }
+ else
+ {
+ /* Reading from texture_rectangles is pretty straightforward, just use the unmodified
+ * texture coordinate. It is still a good idea to clamp it though, since the opengl texture
+ * is bigger
+ */
+ shader_addline(buffer, "ADD temp.x, size.y, -coef.y;\n");
+ shader_addline(buffer, "MIN texcrd.y, texcrd.y, size.x;\n");
+ shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex);
+ }
+ *luminance = 'a';
+
+ return TRUE;
+}
+
static GLuint gen_p8_shader(struct arbfp_blit_priv *priv,
const struct wined3d_gl_info *gl_info, GLenum textype)
{
@@ -7230,6 +7380,14 @@ static GLuint gen_yuv_shader(struct arbfp_blit_priv *priv, const struct wined3d_
}
break;
+ case COMPLEX_FIXUP_NV12:
+ if (!gen_nv12_read(&buffer, textype, &luminance_component))
+ {
+ shader_buffer_free(&buffer);
+ return 0;
+ }
+ break;
+
default:
FIXME("Unsupported YUV fixup %#x\n", yuv_fixup);
shader_buffer_free(&buffer);
@@ -7286,6 +7444,13 @@ static GLuint gen_yuv_shader(struct arbfp_blit_priv *priv, const struct wined3d_
if (textype == GL_TEXTURE_RECTANGLE_ARB) priv->yv12_rect_shader = shader;
else priv->yv12_2d_shader = shader;
break;
+
+ case COMPLEX_FIXUP_NV12:
+ if (textype == GL_TEXTURE_RECTANGLE_ARB)
+ priv->nv12_rect_shader = shader;
+ else
+ priv->nv12_2d_shader = shader;
+ break;
default:
ERR("Unsupported complex fixup: %d\n", yuv_fixup);
}
@@ -7336,6 +7501,10 @@ static HRESULT arbfp_blit_set(void *blit_priv, struct wined3d_context *context,
shader = textype == GL_TEXTURE_RECTANGLE_ARB ? priv->yv12_rect_shader : priv->yv12_2d_shader;
break;
+ case COMPLEX_FIXUP_NV12:
+ shader = textype == GL_TEXTURE_RECTANGLE_ARB ? priv->nv12_rect_shader : priv->nv12_2d_shader;
+ break;
+
case COMPLEX_FIXUP_P8:
shader = textype == GL_TEXTURE_RECTANGLE_ARB ? priv->p8_rect_shader : priv->p8_2d_shader;
if (!shader) shader = gen_p8_shader(priv, gl_info, textype);
@@ -7430,6 +7599,7 @@ static BOOL arbfp_blit_supported(const struct wined3d_gl_info *gl_info, enum win
case COMPLEX_FIXUP_YUY2:
case COMPLEX_FIXUP_UYVY:
case COMPLEX_FIXUP_YV12:
+ case COMPLEX_FIXUP_NV12:
case COMPLEX_FIXUP_P8:
TRACE("[OK]\n");
return TRUE;
diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c
index 8625bc0..32426d9 100644
--- a/dlls/wined3d/utils.c
+++ b/dlls/wined3d/utils.c
@@ -51,6 +51,7 @@ static const struct wined3d_format_channels formats[] =
{WINED3DFMT_UYVY, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0},
{WINED3DFMT_YUY2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0},
{WINED3DFMT_YV12, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0},
+ {WINED3DFMT_NV12, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0},
{WINED3DFMT_DXT1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0},
{WINED3DFMT_DXT2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0},
{WINED3DFMT_DXT3, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0},
@@ -666,6 +667,10 @@ static const struct wined3d_format_texture_info format_texture_info[] =
GL_ALPHA, GL_UNSIGNED_BYTE, 0,
WINED3DFMT_FLAG_FILTERING,
WINED3D_GL_EXT_NONE, NULL},
+ {WINED3DFMT_NV12, GL_ALPHA, GL_ALPHA, 0,
+ GL_ALPHA, GL_UNSIGNED_BYTE, 0,
+ WINED3DFMT_FLAG_FILTERING,
+ WINED3D_GL_EXT_NONE, NULL},
{WINED3DFMT_DXT1, GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, 0,
GL_RGBA, GL_UNSIGNED_BYTE, 0,
WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING
@@ -1833,6 +1838,12 @@ static void apply_format_fixups(struct wined3d_adapter *adapter, struct wined3d_
gl_info->formats[idx].height_scale.denominator = 2;
gl_info->formats[idx].color_fixup = create_complex_fixup_desc(COMPLEX_FIXUP_YV12);
+ idx = getFmtIdx(WINED3DFMT_NV12);
+ gl_info->formats[idx].flags |= WINED3DFMT_FLAG_HEIGHT_SCALE;
+ gl_info->formats[idx].height_scale.numerator = 3;
+ gl_info->formats[idx].height_scale.denominator = 2;
+ gl_info->formats[idx].color_fixup = create_complex_fixup_desc(COMPLEX_FIXUP_NV12);
+
if (gl_info->supported[ARB_FRAGMENT_PROGRAM])
{
idx = getFmtIdx(WINED3DFMT_P8_UINT);
@@ -2069,6 +2080,7 @@ const char *debug_d3dformat(enum wined3d_format_id format_id)
FMT_TO_STR(WINED3DFMT_UYVY);
FMT_TO_STR(WINED3DFMT_YUY2);
FMT_TO_STR(WINED3DFMT_YV12);
+ FMT_TO_STR(WINED3DFMT_NV12);
FMT_TO_STR(WINED3DFMT_DXT1);
FMT_TO_STR(WINED3DFMT_DXT2);
FMT_TO_STR(WINED3DFMT_DXT3);
@@ -2772,6 +2784,7 @@ static const char *debug_complex_fixup(enum complex_fixup fixup)
WINED3D_TO_STR(COMPLEX_FIXUP_YUY2);
WINED3D_TO_STR(COMPLEX_FIXUP_UYVY);
WINED3D_TO_STR(COMPLEX_FIXUP_YV12);
+ WINED3D_TO_STR(COMPLEX_FIXUP_NV12);
WINED3D_TO_STR(COMPLEX_FIXUP_P8);
#undef WINED3D_TO_STR
default:
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
index 991db87..9446bb8 100644
--- a/dlls/wined3d/wined3d_private.h
+++ b/dlls/wined3d/wined3d_private.h
@@ -85,6 +85,7 @@ enum complex_fixup
COMPLEX_FIXUP_UYVY = 2,
COMPLEX_FIXUP_YV12 = 3,
COMPLEX_FIXUP_P8 = 4,
+ COMPLEX_FIXUP_NV12 = 5,
};
#include <pshpack2.h>
diff --git a/include/wine/wined3d.h b/include/wine/wined3d.h
index de1884f..3496c5d 100644
--- a/include/wine/wined3d.h
+++ b/include/wine/wined3d.h
@@ -249,6 +249,7 @@ enum wined3d_format_id
WINED3DFMT_NULL = WINEMAKEFOURCC('N','U','L','L'),
WINED3DFMT_R16 = WINEMAKEFOURCC(' ','R','1','6'),
WINED3DFMT_AL16 = WINEMAKEFOURCC('A','L','1','6'),
+ WINED3DFMT_NV12 = WINEMAKEFOURCC('N','V','1','2'),
WINED3DFMT_FORCE_DWORD = 0xffffffff
};
--
1.8.3.4 (Apple Git-47)
More information about the wine-patches
mailing list