Implement pinned memory for fglrx, where glMapBufferRange is atrociously slow

Stanislaw Halik sthalik at misaki.pl
Mon Feb 18 06:07:14 CST 2013


Got rid of leaks, both memory and BO.

The APPLESYNC bit might be redundant. Sorry for the low quality of the 
previous patch(es). This one is properly indented.

Preferred to leave APPLESYNC bit in rather than face breakage in some 
app that didn't get tested.

Other than using a not-so-good extension, it works fast on fglrx, unlike 
glMapBufferRange which is atrociously slow for apps that do it frequently.

The one in question, Falcon BMS, does MapBufferRange every frame to 
upload cockpit data, such as bitmap screens.

GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD gets called with the same base 
address, so there shouldn't be any VRAM leaks. If there are, there's a 
need for yet another flag. They aren't strictly necessary but a sync is.
-------------- next part --------------
diff --git a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c
index e9a07a5..0e415c0 100644
--- a/dlls/wined3d/buffer.c
+++ b/dlls/wined3d/buffer.c
@@ -548,6 +548,12 @@ static void buffer_unload(struct wined3d_resource *resource)
         buffer->stride = 0;
         buffer->conversion_stride = 0;
         buffer->flags &= ~WINED3D_BUFFER_HASDESC;
+
+        if (buffer->flags & WINED3D_BUFFER_LIFETIME) {
+            buffer->flags &= ~WINED3D_BUFFER_LIFETIME;
+            HeapFree(GetProcessHeap(), 0, buffer->resource.heapMemory);
+            buffer->resource.heapMemory = buffer->resource.allocatedMemory = NULL;
+        }
     }
 
     resource_unload(resource);
@@ -589,6 +595,12 @@ DWORD CDECL wined3d_buffer_get_priority(const struct wined3d_buffer *buffer)
     return resource_get_priority(&buffer->resource);
 }
 
+static void buffer_sync_and_wait_ati(struct wined3d_buffer *This, const struct wined3d_gl_info *gl_info)
+{
+    GLsync s = GL_EXTCALL(glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0));
+    GL_EXTCALL(glWaitSync(s, 0, 0));
+}
+
 /* The caller provides a context and binds the buffer */
 static void buffer_sync_apple(struct wined3d_buffer *This, DWORD flags, const struct wined3d_gl_info *gl_info)
 {
@@ -605,6 +617,12 @@ static void buffer_sync_apple(struct wined3d_buffer *This, DWORD flags, const st
         checkGLcall("glBufferDataARB\n");
         return;
     }
+    
+    if (gl_info->supported[ATI_PINNED_MEMORY])
+    {
+        buffer_sync_and_wait_ati(This, gl_info);
+        return;
+    }
 
     if(!This->query)
     {
@@ -668,7 +686,13 @@ static void buffer_direct_upload(struct wined3d_buffer *This, const struct wined
      * caller always takes care of this. */
     GL_EXTCALL(glBindBufferARB(This->buffer_type_hint, This->buffer_object));
     checkGLcall("glBindBufferARB");
-    if (gl_info->supported[ARB_MAP_BUFFER_RANGE])
+    
+    if (gl_info->supported[ATI_PINNED_MEMORY])
+    {
+        map = This->resource.allocatedMemory;
+        buffer_sync_and_wait_ati(This, gl_info);
+    }
+    else if (gl_info->supported[ARB_MAP_BUFFER_RANGE])
     {
         GLbitfield mapflags;
         mapflags = GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT;
@@ -677,7 +701,7 @@ static void buffer_direct_upload(struct wined3d_buffer *This, const struct wined
         if (flags & WINED3D_BUFFER_NOSYNC)
             mapflags |= GL_MAP_UNSYNCHRONIZED_BIT;
         map = GL_EXTCALL(glMapBufferRange(This->buffer_type_hint, 0,
-                    This->resource.size, mapflags));
+                This->resource.size, mapflags));
         checkGLcall("glMapBufferRange");
     }
     else
@@ -705,22 +729,36 @@ static void buffer_direct_upload(struct wined3d_buffer *This, const struct wined
         This->modified_areas--;
         start = This->maps[This->modified_areas].offset;
         len = This->maps[This->modified_areas].size;
-
+    
         memcpy(map + start, This->resource.allocatedMemory + start, len);
 
-        if (gl_info->supported[ARB_MAP_BUFFER_RANGE])
+        if (!gl_info->supported[ATI_PINNED_MEMORY])
         {
-            GL_EXTCALL(glFlushMappedBufferRange(This->buffer_type_hint, start, len));
-            checkGLcall("glFlushMappedBufferRange");
-        }
-        else if (This->flags & WINED3D_BUFFER_FLUSH)
-        {
-            GL_EXTCALL(glFlushMappedBufferRangeAPPLE(This->buffer_type_hint, start, len));
-            checkGLcall("glFlushMappedBufferRangeAPPLE");
+            if (gl_info->supported[ARB_MAP_BUFFER_RANGE])
+            {
+                GL_EXTCALL(glFlushMappedBufferRange(This->buffer_type_hint, start, len));
+                checkGLcall("glFlushMappedBufferRange");
+            }
+            else if (This->flags & WINED3D_BUFFER_FLUSH)
+            {
+                GL_EXTCALL(glFlushMappedBufferRangeAPPLE(This->buffer_type_hint, start, len));
+                checkGLcall("glFlushMappedBufferRangeAPPLE");
+            }
         }
     }
-    GL_EXTCALL(glUnmapBufferARB(This->buffer_type_hint));
-    checkGLcall("glUnmapBufferARB");
+    if (gl_info->supported[ATI_PINNED_MEMORY])
+    {
+        GL_EXTCALL(glBindBufferARB(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, This->buffer_object));
+        GL_EXTCALL(glBufferDataARB(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, This->resource.size, This->resource.allocatedMemory, GL_DYNAMIC_COPY));
+        GL_EXTCALL(glBindBufferARB(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, 0));
+        This->flags |= WINED3D_BUFFER_LIFETIME;
+        buffer_sync_and_wait_ati(This, gl_info);
+    }
+    else
+    {
+        GL_EXTCALL(glUnmapBufferARB(This->buffer_type_hint));
+        checkGLcall("glUnmapBufferARB");
+    }
 }
 
 /* Do not call while under the GL lock. */
@@ -1019,6 +1057,7 @@ HRESULT CDECL wined3d_buffer_map(struct wined3d_buffer *buffer, UINT offset, UIN
         {
             if (count == 1)
             {
+                BOOL needs_sync_badly = FALSE;
                 struct wined3d_device *device = buffer->resource.device;
                 struct wined3d_context *context;
                 const struct wined3d_gl_info *gl_info;
@@ -1030,7 +1069,12 @@ HRESULT CDECL wined3d_buffer_map(struct wined3d_buffer *buffer, UINT offset, UIN
                     context_invalidate_state(context, STATE_INDEXBUFFER);
                 GL_EXTCALL(glBindBufferARB(buffer->buffer_type_hint, buffer->buffer_object));
 
-                if (gl_info->supported[ARB_MAP_BUFFER_RANGE])
+                if (gl_info->supported[ATI_PINNED_MEMORY])
+                {
+                    buffer->flags |= WINED3D_BUFFER_APPLESYNC;
+                    needs_sync_badly = TRUE;
+                }
+                else if (gl_info->supported[ARB_MAP_BUFFER_RANGE])
                 {
                     GLbitfield mapflags = buffer_gl_map_flags(flags);
                     buffer->resource.allocatedMemory = GL_EXTCALL(glMapBufferRange(buffer->buffer_type_hint,
@@ -1046,12 +1090,15 @@ HRESULT CDECL wined3d_buffer_map(struct wined3d_buffer *buffer, UINT offset, UIN
                     checkGLcall("glMapBufferARB");
                 }
 
-                if (((DWORD_PTR)buffer->resource.allocatedMemory) & (RESOURCE_ALIGNMENT - 1))
+                if (needs_sync_badly ||((DWORD_PTR)buffer->resource.allocatedMemory) & (RESOURCE_ALIGNMENT - 1))
                 {
                     WARN("Pointer %p is not %u byte aligned.\n", buffer->resource.allocatedMemory, RESOURCE_ALIGNMENT);
 
-                    GL_EXTCALL(glUnmapBufferARB(buffer->buffer_type_hint));
-                    checkGLcall("glUnmapBufferARB");
+                    if (!gl_info->supported[ATI_PINNED_MEMORY])
+                    {
+                        GL_EXTCALL(glUnmapBufferARB(buffer->buffer_type_hint));
+                        checkGLcall("glUnmapBufferARB");
+                    }
                     buffer->resource.allocatedMemory = NULL;
 
                     if (buffer->resource.usage & WINED3DUSAGE_DYNAMIC)
@@ -1141,31 +1188,41 @@ void CDECL wined3d_buffer_unmap(struct wined3d_buffer *buffer)
             context_invalidate_state(context, STATE_INDEXBUFFER);
         GL_EXTCALL(glBindBufferARB(buffer->buffer_type_hint, buffer->buffer_object));
 
-        if (gl_info->supported[ARB_MAP_BUFFER_RANGE])
+        if (gl_info->supported[ATI_PINNED_MEMORY])
         {
-            for (i = 0; i < buffer->modified_areas; ++i)
+        }
+        else
+        {
+            if (gl_info->supported[ARB_MAP_BUFFER_RANGE])
             {
-                GL_EXTCALL(glFlushMappedBufferRange(buffer->buffer_type_hint,
+                for (i = 0; i < buffer->modified_areas; ++i)
+                {
+                    GL_EXTCALL(glFlushMappedBufferRange(buffer->buffer_type_hint,
                         buffer->maps[i].offset, buffer->maps[i].size));
-                checkGLcall("glFlushMappedBufferRange");
+                    checkGLcall("glFlushMappedBufferRange");
+                }
             }
-        }
-        else if (buffer->flags & WINED3D_BUFFER_FLUSH)
-        {
-            for (i = 0; i < buffer->modified_areas; ++i)
+            else if (buffer->flags & WINED3D_BUFFER_FLUSH)
             {
-                GL_EXTCALL(glFlushMappedBufferRangeAPPLE(buffer->buffer_type_hint,
+                for (i = 0; i < buffer->modified_areas; ++i)
+                {
+                    GL_EXTCALL(glFlushMappedBufferRangeAPPLE(buffer->buffer_type_hint,
                         buffer->maps[i].offset, buffer->maps[i].size));
-                checkGLcall("glFlushMappedBufferRangeAPPLE");
+                    checkGLcall("glFlushMappedBufferRangeAPPLE");
+                }
             }
+            GL_EXTCALL(glUnmapBufferARB(buffer->buffer_type_hint));
         }
-
-        GL_EXTCALL(glUnmapBufferARB(buffer->buffer_type_hint));
         if (wined3d_settings.strict_draw_ordering)
             gl_info->gl_ops.gl.p_glFlush(); /* Flush to ensure ordering across contexts. */
         context_release(context);
 
-        buffer->resource.allocatedMemory = NULL;
+        if (gl_info->supported[ATI_PINNED_MEMORY])
+        {
+            buffer_sync_apple(buffer, 0, gl_info);
+        }
+        else
+            buffer->resource.allocatedMemory = NULL;
         buffer_clear_dirty_areas(buffer);
     }
     else if (buffer->flags & WINED3D_BUFFER_HASDESC)
diff --git a/dlls/wined3d/directx.c b/dlls/wined3d/directx.c
index acdcc57..21a7aa8 100644
--- a/dlls/wined3d/directx.c
+++ b/dlls/wined3d/directx.c
@@ -145,6 +145,7 @@ static const struct wined3d_extension_map gl_extension_map[] =
     {"GL_ATI_texture_compression_3dc",      ATI_TEXTURE_COMPRESSION_3DC   },
     {"GL_ATI_texture_env_combine3",         ATI_TEXTURE_ENV_COMBINE3      },
     {"GL_ATI_texture_mirror_once",          ATI_TEXTURE_MIRROR_ONCE       },
+    {"GL_AMD_pinned_memory",                ATI_PINNED_MEMORY             },
 
     /* EXT */
     {"GL_EXT_blend_color",                  EXT_BLEND_COLOR               },
diff --git a/dlls/wined3d/wined3d_gl.h b/dlls/wined3d/wined3d_gl.h
index 06b5274..bfedb1e 100644
--- a/dlls/wined3d/wined3d_gl.h
+++ b/dlls/wined3d/wined3d_gl.h
@@ -27,6 +27,7 @@
 #include "wine/wgl.h"
 
 #define GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI 0x8837  /* not in the gl spec */
+#define EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD    0x9160
 
 void (WINE_GLAPI *glDisableWINE)(GLenum cap) DECLSPEC_HIDDEN;
 void (WINE_GLAPI *glEnableWINE)(GLenum cap) DECLSPEC_HIDDEN;
@@ -96,6 +97,7 @@ enum wined3d_gl_extension
     ATI_TEXTURE_COMPRESSION_3DC,
     ATI_TEXTURE_ENV_COMBINE3,
     ATI_TEXTURE_MIRROR_ONCE,
+    ATI_PINNED_MEMORY,
     /* EXT */
     EXT_BLEND_COLOR,
     EXT_BLEND_EQUATION_SEPARATE,
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
index 45f6b29..c516b2f 100644
--- a/dlls/wined3d/wined3d_private.h
+++ b/dlls/wined3d/wined3d_private.h
@@ -2386,6 +2386,7 @@ struct wined3d_map_range
 #define WINED3D_BUFFER_DISCARD      0x20    /* A DISCARD lock has occurred since the last PreLoad */
 #define WINED3D_BUFFER_NOSYNC       0x40    /* All locks since the last PreLoad had NOOVERWRITE set */
 #define WINED3D_BUFFER_APPLESYNC    0x80    /* Using sync as in GL_APPLE_flush_buffer_range */
+#define WINED3D_BUFFER_LIFETIME     0x100   /* Buffer uses pinned memory and has to persist */
 
 struct wined3d_buffer
 {


More information about the wine-patches mailing list