Implement pinned memory for fglrx, where glMapBufferRange is atrociously slow
Stanislaw Halik
sthalik at misaki.pl
Mon Feb 18 06:07:14 CST 2013
Got rid of leaks, both memory and BO.
The APPLESYNC bit might be redundant. Sorry for the low quality of the
previous patch(es). This one is properly indented.
Preferred to leave APPLESYNC bit in rather than face breakage in some
app that didn't get tested.
Other than using a not-so-good extension, it works fast on fglrx, unlike
glMapBufferRange which is atrociously slow for apps that do it frequently.
The one in question, Falcon BMS, does MapBufferRange every frame to
upload cockpit data, such as bitmap screens.
GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD gets called with the same base
address, so there shouldn't be any VRAM leaks. If there are, there's a
need for yet another flag. They aren't strictly necessary but a sync is.
-------------- next part --------------
diff --git a/dlls/wined3d/buffer.c b/dlls/wined3d/buffer.c
index e9a07a5..0e415c0 100644
--- a/dlls/wined3d/buffer.c
+++ b/dlls/wined3d/buffer.c
@@ -548,6 +548,12 @@ static void buffer_unload(struct wined3d_resource *resource)
buffer->stride = 0;
buffer->conversion_stride = 0;
buffer->flags &= ~WINED3D_BUFFER_HASDESC;
+
+ if (buffer->flags & WINED3D_BUFFER_LIFETIME) {
+ buffer->flags &= ~WINED3D_BUFFER_LIFETIME;
+ HeapFree(GetProcessHeap(), 0, buffer->resource.heapMemory);
+ buffer->resource.heapMemory = buffer->resource.allocatedMemory = NULL;
+ }
}
resource_unload(resource);
@@ -589,6 +595,12 @@ DWORD CDECL wined3d_buffer_get_priority(const struct wined3d_buffer *buffer)
return resource_get_priority(&buffer->resource);
}
+static void buffer_sync_and_wait_ati(struct wined3d_buffer *This, const struct wined3d_gl_info *gl_info)
+{
+ GLsync s = GL_EXTCALL(glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0));
+ GL_EXTCALL(glWaitSync(s, 0, 0));
+}
+
/* The caller provides a context and binds the buffer */
static void buffer_sync_apple(struct wined3d_buffer *This, DWORD flags, const struct wined3d_gl_info *gl_info)
{
@@ -605,6 +617,12 @@ static void buffer_sync_apple(struct wined3d_buffer *This, DWORD flags, const st
checkGLcall("glBufferDataARB\n");
return;
}
+
+ if (gl_info->supported[ATI_PINNED_MEMORY])
+ {
+ buffer_sync_and_wait_ati(This, gl_info);
+ return;
+ }
if(!This->query)
{
@@ -668,7 +686,13 @@ static void buffer_direct_upload(struct wined3d_buffer *This, const struct wined
* caller always takes care of this. */
GL_EXTCALL(glBindBufferARB(This->buffer_type_hint, This->buffer_object));
checkGLcall("glBindBufferARB");
- if (gl_info->supported[ARB_MAP_BUFFER_RANGE])
+
+ if (gl_info->supported[ATI_PINNED_MEMORY])
+ {
+ map = This->resource.allocatedMemory;
+ buffer_sync_and_wait_ati(This, gl_info);
+ }
+ else if (gl_info->supported[ARB_MAP_BUFFER_RANGE])
{
GLbitfield mapflags;
mapflags = GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT;
@@ -677,7 +701,7 @@ static void buffer_direct_upload(struct wined3d_buffer *This, const struct wined
if (flags & WINED3D_BUFFER_NOSYNC)
mapflags |= GL_MAP_UNSYNCHRONIZED_BIT;
map = GL_EXTCALL(glMapBufferRange(This->buffer_type_hint, 0,
- This->resource.size, mapflags));
+ This->resource.size, mapflags));
checkGLcall("glMapBufferRange");
}
else
@@ -705,22 +729,36 @@ static void buffer_direct_upload(struct wined3d_buffer *This, const struct wined
This->modified_areas--;
start = This->maps[This->modified_areas].offset;
len = This->maps[This->modified_areas].size;
-
+
memcpy(map + start, This->resource.allocatedMemory + start, len);
- if (gl_info->supported[ARB_MAP_BUFFER_RANGE])
+ if (!gl_info->supported[ATI_PINNED_MEMORY])
{
- GL_EXTCALL(glFlushMappedBufferRange(This->buffer_type_hint, start, len));
- checkGLcall("glFlushMappedBufferRange");
- }
- else if (This->flags & WINED3D_BUFFER_FLUSH)
- {
- GL_EXTCALL(glFlushMappedBufferRangeAPPLE(This->buffer_type_hint, start, len));
- checkGLcall("glFlushMappedBufferRangeAPPLE");
+ if (gl_info->supported[ARB_MAP_BUFFER_RANGE])
+ {
+ GL_EXTCALL(glFlushMappedBufferRange(This->buffer_type_hint, start, len));
+ checkGLcall("glFlushMappedBufferRange");
+ }
+ else if (This->flags & WINED3D_BUFFER_FLUSH)
+ {
+ GL_EXTCALL(glFlushMappedBufferRangeAPPLE(This->buffer_type_hint, start, len));
+ checkGLcall("glFlushMappedBufferRangeAPPLE");
+ }
}
}
- GL_EXTCALL(glUnmapBufferARB(This->buffer_type_hint));
- checkGLcall("glUnmapBufferARB");
+ if (gl_info->supported[ATI_PINNED_MEMORY])
+ {
+ GL_EXTCALL(glBindBufferARB(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, This->buffer_object));
+ GL_EXTCALL(glBufferDataARB(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, This->resource.size, This->resource.allocatedMemory, GL_DYNAMIC_COPY));
+ GL_EXTCALL(glBindBufferARB(GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD, 0));
+ This->flags |= WINED3D_BUFFER_LIFETIME;
+ buffer_sync_and_wait_ati(This, gl_info);
+ }
+ else
+ {
+ GL_EXTCALL(glUnmapBufferARB(This->buffer_type_hint));
+ checkGLcall("glUnmapBufferARB");
+ }
}
/* Do not call while under the GL lock. */
@@ -1019,6 +1057,7 @@ HRESULT CDECL wined3d_buffer_map(struct wined3d_buffer *buffer, UINT offset, UIN
{
if (count == 1)
{
+ BOOL needs_sync_badly = FALSE;
struct wined3d_device *device = buffer->resource.device;
struct wined3d_context *context;
const struct wined3d_gl_info *gl_info;
@@ -1030,7 +1069,12 @@ HRESULT CDECL wined3d_buffer_map(struct wined3d_buffer *buffer, UINT offset, UIN
context_invalidate_state(context, STATE_INDEXBUFFER);
GL_EXTCALL(glBindBufferARB(buffer->buffer_type_hint, buffer->buffer_object));
- if (gl_info->supported[ARB_MAP_BUFFER_RANGE])
+ if (gl_info->supported[ATI_PINNED_MEMORY])
+ {
+ buffer->flags |= WINED3D_BUFFER_APPLESYNC;
+ needs_sync_badly = TRUE;
+ }
+ else if (gl_info->supported[ARB_MAP_BUFFER_RANGE])
{
GLbitfield mapflags = buffer_gl_map_flags(flags);
buffer->resource.allocatedMemory = GL_EXTCALL(glMapBufferRange(buffer->buffer_type_hint,
@@ -1046,12 +1090,15 @@ HRESULT CDECL wined3d_buffer_map(struct wined3d_buffer *buffer, UINT offset, UIN
checkGLcall("glMapBufferARB");
}
- if (((DWORD_PTR)buffer->resource.allocatedMemory) & (RESOURCE_ALIGNMENT - 1))
+ if (needs_sync_badly ||((DWORD_PTR)buffer->resource.allocatedMemory) & (RESOURCE_ALIGNMENT - 1))
{
WARN("Pointer %p is not %u byte aligned.\n", buffer->resource.allocatedMemory, RESOURCE_ALIGNMENT);
- GL_EXTCALL(glUnmapBufferARB(buffer->buffer_type_hint));
- checkGLcall("glUnmapBufferARB");
+ if (!gl_info->supported[ATI_PINNED_MEMORY])
+ {
+ GL_EXTCALL(glUnmapBufferARB(buffer->buffer_type_hint));
+ checkGLcall("glUnmapBufferARB");
+ }
buffer->resource.allocatedMemory = NULL;
if (buffer->resource.usage & WINED3DUSAGE_DYNAMIC)
@@ -1141,31 +1188,41 @@ void CDECL wined3d_buffer_unmap(struct wined3d_buffer *buffer)
context_invalidate_state(context, STATE_INDEXBUFFER);
GL_EXTCALL(glBindBufferARB(buffer->buffer_type_hint, buffer->buffer_object));
- if (gl_info->supported[ARB_MAP_BUFFER_RANGE])
+ if (gl_info->supported[ATI_PINNED_MEMORY])
{
- for (i = 0; i < buffer->modified_areas; ++i)
+ }
+ else
+ {
+ if (gl_info->supported[ARB_MAP_BUFFER_RANGE])
{
- GL_EXTCALL(glFlushMappedBufferRange(buffer->buffer_type_hint,
+ for (i = 0; i < buffer->modified_areas; ++i)
+ {
+ GL_EXTCALL(glFlushMappedBufferRange(buffer->buffer_type_hint,
buffer->maps[i].offset, buffer->maps[i].size));
- checkGLcall("glFlushMappedBufferRange");
+ checkGLcall("glFlushMappedBufferRange");
+ }
}
- }
- else if (buffer->flags & WINED3D_BUFFER_FLUSH)
- {
- for (i = 0; i < buffer->modified_areas; ++i)
+ else if (buffer->flags & WINED3D_BUFFER_FLUSH)
{
- GL_EXTCALL(glFlushMappedBufferRangeAPPLE(buffer->buffer_type_hint,
+ for (i = 0; i < buffer->modified_areas; ++i)
+ {
+ GL_EXTCALL(glFlushMappedBufferRangeAPPLE(buffer->buffer_type_hint,
buffer->maps[i].offset, buffer->maps[i].size));
- checkGLcall("glFlushMappedBufferRangeAPPLE");
+ checkGLcall("glFlushMappedBufferRangeAPPLE");
+ }
}
+ GL_EXTCALL(glUnmapBufferARB(buffer->buffer_type_hint));
}
-
- GL_EXTCALL(glUnmapBufferARB(buffer->buffer_type_hint));
if (wined3d_settings.strict_draw_ordering)
gl_info->gl_ops.gl.p_glFlush(); /* Flush to ensure ordering across contexts. */
context_release(context);
- buffer->resource.allocatedMemory = NULL;
+ if (gl_info->supported[ATI_PINNED_MEMORY])
+ {
+ buffer_sync_apple(buffer, 0, gl_info);
+ }
+ else
+ buffer->resource.allocatedMemory = NULL;
buffer_clear_dirty_areas(buffer);
}
else if (buffer->flags & WINED3D_BUFFER_HASDESC)
diff --git a/dlls/wined3d/directx.c b/dlls/wined3d/directx.c
index acdcc57..21a7aa8 100644
--- a/dlls/wined3d/directx.c
+++ b/dlls/wined3d/directx.c
@@ -145,6 +145,7 @@ static const struct wined3d_extension_map gl_extension_map[] =
{"GL_ATI_texture_compression_3dc", ATI_TEXTURE_COMPRESSION_3DC },
{"GL_ATI_texture_env_combine3", ATI_TEXTURE_ENV_COMBINE3 },
{"GL_ATI_texture_mirror_once", ATI_TEXTURE_MIRROR_ONCE },
+ {"GL_AMD_pinned_memory", ATI_PINNED_MEMORY },
/* EXT */
{"GL_EXT_blend_color", EXT_BLEND_COLOR },
diff --git a/dlls/wined3d/wined3d_gl.h b/dlls/wined3d/wined3d_gl.h
index 06b5274..bfedb1e 100644
--- a/dlls/wined3d/wined3d_gl.h
+++ b/dlls/wined3d/wined3d_gl.h
@@ -27,6 +27,7 @@
#include "wine/wgl.h"
#define GL_COMPRESSED_LUMINANCE_ALPHA_3DC_ATI 0x8837 /* not in the gl spec */
+#define EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD 0x9160
void (WINE_GLAPI *glDisableWINE)(GLenum cap) DECLSPEC_HIDDEN;
void (WINE_GLAPI *glEnableWINE)(GLenum cap) DECLSPEC_HIDDEN;
@@ -96,6 +97,7 @@ enum wined3d_gl_extension
ATI_TEXTURE_COMPRESSION_3DC,
ATI_TEXTURE_ENV_COMBINE3,
ATI_TEXTURE_MIRROR_ONCE,
+ ATI_PINNED_MEMORY,
/* EXT */
EXT_BLEND_COLOR,
EXT_BLEND_EQUATION_SEPARATE,
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
index 45f6b29..c516b2f 100644
--- a/dlls/wined3d/wined3d_private.h
+++ b/dlls/wined3d/wined3d_private.h
@@ -2386,6 +2386,7 @@ struct wined3d_map_range
#define WINED3D_BUFFER_DISCARD 0x20 /* A DISCARD lock has occurred since the last PreLoad */
#define WINED3D_BUFFER_NOSYNC 0x40 /* All locks since the last PreLoad had NOOVERWRITE set */
#define WINED3D_BUFFER_APPLESYNC 0x80 /* Using sync as in GL_APPLE_flush_buffer_range */
+#define WINED3D_BUFFER_LIFETIME 0x100 /* Buffer uses pinned memory and has to persist */
struct wined3d_buffer
{
More information about the wine-patches
mailing list