[8/10] WineD3D: Instancing emulation

Stefan Dösinger stefan at codeweavers.com
Wed Feb 14 10:56:29 CST 2007


-------------- next part --------------
From c4cb6b43b674a8d849686cf6c64ba34fe5b117d5 Mon Sep 17 00:00:00 2001
From: Stefan Doesinger <stefan at codeweavers.com>
Date: Sat, 3 Feb 2007 12:33:57 +0100
Subject: [PATCH] WineD3D: Instancing emulation

Implements instancing according to the pseudo instancing technique
described by Nvidia. NumInstances glDrawElements calls are performed,
and the per-instance data is specified using immediate mode
glVertexAttrib* calls before calling glDrawElements.

A check for the ATI radeon 9500-X800 instancing support hack is implemented too
---
 dlls/wined3d/device.c          |    7 ++-
 dlls/wined3d/directx.c         |   20 ++++++
 dlls/wined3d/drawprim.c        |  148 +++++++++++++++++++++++++++++++++++++++-
 dlls/wined3d/state.c           |   18 ++++-
 dlls/wined3d/wined3d_private.h |    1 +
 5 files changed, 188 insertions(+), 6 deletions(-)

diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c
index 757b147..3376bf9 100644
--- a/dlls/wined3d/device.c
+++ b/dlls/wined3d/device.c
@@ -2067,6 +2067,8 @@ VertexOffset = StartVertex / Divider * StreamStride +
 */
 static HRESULT WINAPI IWineD3DDeviceImpl_SetStreamSourceFreq(IWineD3DDevice *iface,  UINT StreamNumber, UINT Divider) {
     IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
+    UINT oldFlags = This->updateStateBlock->streamFlags[StreamNumber];
+    UINT oldFreq = This->updateStateBlock->streamFreq[StreamNumber];
 
     TRACE("(%p) StreamNumber(%d), Divider(%d)\n", This, StreamNumber, Divider);
     This->updateStateBlock->streamFlags[StreamNumber] = Divider & (WINED3DSTREAMSOURCE_INSTANCEDATA  | WINED3DSTREAMSOURCE_INDEXEDDATA );
@@ -2075,8 +2077,9 @@ static HRESULT WINAPI IWineD3DDeviceImpl_SetStreamSourceFreq(IWineD3DDevice *ifa
     This->updateStateBlock->set.streamFreq[StreamNumber]      = TRUE;
     This->updateStateBlock->streamFreq[StreamNumber]          = Divider & 0x7FFFFF;
 
-    if (This->updateStateBlock->streamFlags[StreamNumber] || This->updateStateBlock->streamFreq[StreamNumber] != 1) {
-        FIXME("Stream indexing not fully supported\n");
+    if(This->updateStateBlock->streamFreq[StreamNumber] != oldFreq ||
+       This->updateStateBlock->streamFlags[StreamNumber] != oldFlags) {
+        IWineD3DDeviceImpl_MarkStateDirty(This, STATE_STREAMSRC);
     }
 
     return WINED3D_OK;
diff --git a/dlls/wined3d/directx.c b/dlls/wined3d/directx.c
index d7f8d6d..7e2fc05 100644
--- a/dlls/wined3d/directx.c
+++ b/dlls/wined3d/directx.c
@@ -1770,6 +1770,26 @@ static HRESULT WINAPI IWineD3DImpl_CheckDeviceFormat(IWineD3D *iface, UINT Adapt
             TRACE_(d3d_caps)("[FAILED]\n"); /* Enable when implemented */
             return WINED3DERR_NOTAVAILABLE;
 
+        /* ATI instancing hack: Although ATI cards do not support Shader Model 3.0, they support
+         * instancing. To query if the card supports instancing CheckDeviceFormat with the special format
+         * MAKEFOURCC('I','N','S','T') is used. Should a (broken) app check for this provide a proper return value.
+         * We can do instancing with all shader versions, but we need vertex shaders.
+         *
+         * Additionally applications have to set the D3DRS_POINTSIZE render state to MAKEFOURCC('I','N','S','T') once
+         * to enable instancing. WineD3D doesn't need that and just ignores it.
+         *
+         * With Shader Model 3.0 capable cards Instancing 'just works' in Windows.
+         */
+        case MAKEFOURCC('I','N','S','T'):
+            TRACE("ATI Instancing check hack\n");
+            if(GL_SUPPORT(ARB_VERTEX_PROGRAM) || GL_SUPPORT(ARB_VERTEX_SHADER)) {
+                TRACE_(d3d_caps)("[OK]\n");
+                return WINED3D_OK;
+            } else {
+                TRACE_(d3d_caps)("[FAILED]\n");
+                return WINED3DERR_NOTAVAILABLE;
+            }
+
         default:
             break;
     }
diff --git a/dlls/wined3d/drawprim.c b/dlls/wined3d/drawprim.c
index 329b5ee..1d2a66e 100644
--- a/dlls/wined3d/drawprim.c
+++ b/dlls/wined3d/drawprim.c
@@ -6,6 +6,7 @@
  * Copyright 2004 Christian Costa
  * Copyright 2005 Oliver Stieber
  * Copyright 2006 Henri Verbeet
+ * Copyright 2007 Stefan Dösinger for CodeWeavers
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -1155,6 +1156,142 @@ static void depth_copy(IWineD3DDevice *iface) {
     }
 }
 
+inline void drawStridedInstanced(IWineD3DDevice *iface, WineDirect3DVertexStridedData *sd, UINT numberOfVertices,
+                                 GLenum glPrimitiveType, const void *idxData, short idxSize, ULONG minIndex,
+                                 ULONG startIdx, ULONG startVertex) {
+    UINT numInstances = 0;
+    int numInstancedAttribs = 0, i, j;
+    UINT instancedData[sizeof(sd->u.input) / sizeof(sd->u.input[0]) /* 16 */];
+    IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *) iface;
+    IWineD3DStateBlockImpl *stateblock = This->stateBlock;
+
+    if (idxData == NULL) {
+        /* This is a nasty thing. MSDN says no hardware supports that and apps have to use software vertex processing.
+         * We don't support this for now
+         *
+         * Shouldn't be too hard to support with opengl, in theory just call glDrawArrays instead of drawElements.
+         * But the StreamSourceFreq value has a different meaning in that situation.
+         */
+        FIXME("Non-indexed instanced drawing is not supported\n");
+        return;
+    }
+
+    TRACE("(%p) : glElements(%x, %d, %d, ...)\n", This, glPrimitiveType, numberOfVertices, minIndex);
+    idxData = idxData == (void *)-1 ? NULL : idxData;
+
+    /* First, figure out how many instances we have to draw */
+    for(i = 0; i < MAX_STREAMS; i++) {
+        /* Look at all non-instanced streams */
+        if(!(stateblock->streamFlags[i] & D3DSTREAMSOURCE_INSTANCEDATA) &&
+           stateblock->streamSource[i]) {
+            int inst = stateblock->streamFreq[i];
+
+            if(numInstances && inst != numInstances) {
+                ERR("Two streams specify a different number of instances. Got %d, new is %d\n", numInstances, inst);
+            }
+            numInstances = inst;
+        }
+    }
+
+    for(i = 0; i < sizeof(sd->u.input) / sizeof(sd->u.input[0]); i++) {
+        if(stateblock->streamFlags[sd->u.input[i].streamNo] & D3DSTREAMSOURCE_INSTANCEDATA) {
+            instancedData[numInstancedAttribs] = i;
+            numInstancedAttribs++;
+        }
+    }
+
+    /* now draw numInstances instances :-) */
+    for(i = 0; i < numInstances; i++) {
+        /* Specify the instanced attributes using immediate mode calls */
+        for(j = 0; j < numInstancedAttribs; j++) {
+            BYTE *ptr = sd->u.input[instancedData[j]].lpData +
+                        sd->u.input[instancedData[j]].dwStride * i +
+                        stateblock->streamOffset[sd->u.input[instancedData[j]].streamNo];
+            if(sd->u.input[instancedData[j]].VBO) {
+                IWineD3DVertexBufferImpl *vb = (IWineD3DVertexBufferImpl *) stateblock->streamSource[sd->u.input[instancedData[j]].streamNo];
+                ptr += (long) vb->resource.allocatedMemory;
+            }
+
+            switch(sd->u.input[instancedData[j]].dwType) {
+                case WINED3DDECLTYPE_FLOAT1:
+                    GL_EXTCALL(glVertexAttrib1fvARB(instancedData[j], (float *) ptr));
+                    break;
+                case WINED3DDECLTYPE_FLOAT2:
+                    GL_EXTCALL(glVertexAttrib2fvARB(instancedData[j], (float *) ptr));
+                    break;
+                case WINED3DDECLTYPE_FLOAT3:
+                    GL_EXTCALL(glVertexAttrib3fvARB(instancedData[j], (float *) ptr));
+                    break;
+                case WINED3DDECLTYPE_FLOAT4:
+                    GL_EXTCALL(glVertexAttrib4fvARB(instancedData[j], (float *) ptr));
+                    break;
+
+                case WINED3DDECLTYPE_UBYTE4:
+                    GL_EXTCALL(glVertexAttrib4NubvARB(instancedData[j], ptr));
+                    break;
+                case WINED3DDECLTYPE_UBYTE4N:
+                case WINED3DDECLTYPE_D3DCOLOR:
+                    GL_EXTCALL(glVertexAttrib4NubvARB(instancedData[j], ptr));
+                    break;
+
+                case WINED3DDECLTYPE_SHORT2:
+                    GL_EXTCALL(glVertexAttrib4svARB(instancedData[j], (GLshort *) ptr));
+                    break;
+                case WINED3DDECLTYPE_SHORT4:
+                    GL_EXTCALL(glVertexAttrib4svARB(instancedData[j], (GLshort *) ptr));
+                    break;
+
+                case WINED3DDECLTYPE_SHORT2N:
+                {
+                    GLshort s[4] = {((short *) ptr)[0], ((short *) ptr)[1], 0, 1};
+                    GL_EXTCALL(glVertexAttrib4NsvARB(instancedData[j], s));
+                    break;
+                }
+                case WINED3DDECLTYPE_USHORT2N:
+                {
+                    GLushort s[4] = {((unsigned short *) ptr)[0], ((unsigned short *) ptr)[1], 0, 1};
+                    GL_EXTCALL(glVertexAttrib4NusvARB(instancedData[j], s));
+                    break;
+                }
+                case WINED3DDECLTYPE_SHORT4N:
+                    GL_EXTCALL(glVertexAttrib4NsvARB(instancedData[j], (GLshort *) ptr));
+                    break;
+                case WINED3DDECLTYPE_USHORT4N:
+                    GL_EXTCALL(glVertexAttrib4NusvARB(instancedData[j], (GLushort *) ptr));
+                    break;
+
+                case WINED3DDECLTYPE_UDEC3:
+                    FIXME("Unsure about WINED3DDECLTYPE_UDEC3\n");
+                    /*glVertexAttrib3usvARB(instancedData[j], (GLushort *) ptr); Does not exist */
+                    break;
+                case WINED3DDECLTYPE_DEC3N:
+                    FIXME("Unsure about WINED3DDECLTYPE_DEC3N\n");
+                    /*glVertexAttrib3NusvARB(instancedData[j], (GLushort *) ptr); Does not exist */
+                    break;
+
+                case WINED3DDECLTYPE_FLOAT16_2:
+                    /* Are those 16 bit floats. C doesn't have a 16 bit float type. I could read the single bits and calculate a 4
+                     * byte float according to the IEEE standard
+                     */
+                    FIXME("Unsupported WINED3DDECLTYPE_FLOAT16_2\n");
+                    break;
+                case WINED3DDECLTYPE_FLOAT16_4:
+                    FIXME("Unsupported WINED3DDECLTYPE_FLOAT16_4\n");
+                    break;
+
+                case WINED3DDECLTYPE_UNUSED:
+                default:
+                    ERR("Unexpected declaration in instanced attributes\n");
+                    break;
+            }
+        }
+
+        glDrawElements(glPrimitiveType, numberOfVertices, idxSize == 2 ? GL_UNSIGNED_SHORT : GL_UNSIGNED_INT,
+                    (const char *)idxData+(idxSize * startIdx));
+        checkGLcall("glDrawElements");
+    }
+}
+
 /* Routine common to the draw primitive and draw indexed primitive routines */
 void drawPrimitive(IWineD3DDevice *iface,
                    int PrimitiveType,
@@ -1202,12 +1339,19 @@ void drawPrimitive(IWineD3DDevice *iface,
         if (numberOfVertices == 0 )
             numberOfVertices = calculatedNumberOfindices;
 
-        if (This->useDrawStridedSlow)
+        if (This->useDrawStridedSlow) {
+            /* Immediate mode drawing */
             drawStridedSlow(iface, &This->strided_streams, calculatedNumberOfindices,
                             glPrimType, idxData, idxSize, minIndex, StartIdx, StartVertexIndex);
-        else
+        } else if(This->instancedDraw) {
+            /* Instancing emulation with mixing immediate mode and arrays */
+            drawStridedInstanced(iface, &This->strided_streams, calculatedNumberOfindices, glPrimType,
+                            idxData, idxSize, minIndex, StartIdx, StartVertexIndex);
+        } else {
+            /* Simple array draw call */
             drawStridedFast(iface, calculatedNumberOfindices, glPrimType,
                             idxData, idxSize, minIndex, StartIdx, StartVertexIndex);
+        }
     }
 
     /* Finshed updating the screen, restore lock */
diff --git a/dlls/wined3d/state.c b/dlls/wined3d/state.c
index d083997..808d478 100644
--- a/dlls/wined3d/state.c
+++ b/dlls/wined3d/state.c
@@ -2146,11 +2146,21 @@ static inline void loadNumberedArrays(IWineD3DStateBlockImpl *stateblock, WineDi
     int i;
     UINT *offset = stateblock->streamOffset;
 
+    /* Default to no instancing */
+    stateblock->wineD3DDevice->instancedDraw = FALSE;
+
     for (i = 0; i < MAX_ATTRIBS; i++) {
 
         if (!strided->u.input[i].lpData && !strided->u.input[i].VBO)
             continue;
 
+        /* Do not load instance data. It will be specified using glTexCoord by drawprim */
+        if(stateblock->streamFlags[strided->u.input[i].streamNo] & D3DSTREAMSOURCE_INSTANCEDATA) {
+            GL_EXTCALL(glDisableVertexAttribArrayARB(i));
+            stateblock->wineD3DDevice->instancedDraw = TRUE;
+            continue;
+        }
+
         TRACE_(d3d_shader)("Loading array %u [VBO=%u]\n", i, strided->u.input[i].VBO);
 
         if(strided->u.input[i].dwStride) {
@@ -2227,11 +2237,11 @@ static inline void loadNumberedArrays(IWineD3DStateBlockImpl *stateblock, WineDi
 
                 case WINED3DDECLTYPE_UDEC3:
                     FIXME("Unsure about WINED3DDECLTYPE_UDEC3\n");
-                    /*glVertexAttrib3usvARB(instancedData[j], (GLushort *) ptr); Does not exist */
+                    /*glVertexAttrib3usvARB(i, (GLushort *) ptr); Does not exist */
                     break;
                 case WINED3DDECLTYPE_DEC3N:
                     FIXME("Unsure about WINED3DDECLTYPE_DEC3N\n");
-                    /*glVertexAttrib3NusvARB(instancedData[j], (GLushort *) ptr); Does not exist */
+                    /*glVertexAttrib3NusvARB(i, (GLushort *) ptr); Does not exist */
                     break;
 
                 case WINED3DDECLTYPE_FLOAT16_2:
@@ -2262,6 +2272,10 @@ static void loadVertexData(IWineD3DStateBlockImpl *stateblock, WineDirect3DVerte
     GLint curVBO = GL_SUPPORT(ARB_VERTEX_BUFFER_OBJECT) ? -1 : 0;
 
     TRACE("Using fast vertex array code\n");
+
+    /* This is fixed function pipeline only, and the fixed function pipeline doesn't do instancing */
+    stateblock->wineD3DDevice->instancedDraw = FALSE;
+
     /* Blend Data ---------------------------------------------- */
     if( (sd->u.s.blendWeights.lpData) || (sd->u.s.blendWeights.VBO) ||
         (sd->u.s.blendMatrixIndices.lpData) || (sd->u.s.blendMatrixIndices.VBO) ) {
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
index 4dc914b..6111f53 100644
--- a/dlls/wined3d/wined3d_private.h
+++ b/dlls/wined3d/wined3d_private.h
@@ -681,6 +681,7 @@ struct IWineD3DDeviceImpl
     WineDirect3DVertexStridedData strided_streams;
     WineDirect3DVertexStridedData *up_strided;
     BOOL                      useDrawStridedSlow;
+    BOOL                      instancedDraw;
 
     /* Context management */
     WineD3DContext          **contexts;                  /* Dynamic array containing pointers to context structures */
-- 
1.4.4.3



More information about the wine-patches mailing list