[PATCH 3/5] wined3d: Merge the IWineD3DSurface::Blt() implementations.

Henri Verbeet hverbeet at codeweavers.com
Fri Apr 29 06:03:40 CDT 2011


---
 dlls/wined3d/surface.c         | 4187 ++++++++++++++++++++--------------------
 dlls/wined3d/wined3d_private.h |    3 +
 2 files changed, 2084 insertions(+), 2106 deletions(-)

diff --git a/dlls/wined3d/surface.c b/dlls/wined3d/surface.c
index cbc5dfe..bb8d44a 100644
--- a/dlls/wined3d/surface.c
+++ b/dlls/wined3d/surface.c
@@ -35,6 +35,9 @@
 WINE_DEFAULT_DEBUG_CHANNEL(d3d_surface);
 WINE_DECLARE_DEBUG_CHANNEL(d3d);
 
+static HRESULT surface_cpu_blt(IWineD3DSurfaceImpl *dst_surface, const RECT *dst_rect,
+        IWineD3DSurfaceImpl *src_surface, const RECT *src_rect, DWORD flags,
+        const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter);
 static HRESULT surface_cpu_bltfast(IWineD3DSurfaceImpl *dst_surface, DWORD dst_x, DWORD dst_y,
         IWineD3DSurfaceImpl *src_surface, const RECT *src_rect, DWORD trans);
 static HRESULT IWineD3DSurfaceImpl_BltOverride(IWineD3DSurfaceImpl *dst_surface, const RECT *dst_rect,
@@ -1068,160 +1071,478 @@ static HRESULT surface_flip(IWineD3DSurfaceImpl *surface, IWineD3DSurfaceImpl *o
     return WINED3D_OK;
 }
 
-/* Do not call while under the GL lock. */
-static HRESULT surface_bltfast(IWineD3DSurfaceImpl *dst_surface, DWORD dst_x, DWORD dst_y,
-        IWineD3DSurfaceImpl *src_surface, const RECT *src_rect_in, DWORD trans)
+static BOOL surface_is_full_rect(IWineD3DSurfaceImpl *surface, const RECT *r)
 {
-    IWineD3DDeviceImpl *device = dst_surface->resource.device;
+    if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
+        return FALSE;
+    if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
+        return FALSE;
+    return TRUE;
+}
 
-    TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect %s, flags %#x.\n",
-            dst_surface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect_in), trans);
+static void wined3d_surface_depth_blt_fbo(IWineD3DDeviceImpl *device, IWineD3DSurfaceImpl *src_surface,
+        const RECT *src_rect, IWineD3DSurfaceImpl *dst_surface, const RECT *dst_rect)
+{
+    const struct wined3d_gl_info *gl_info;
+    struct wined3d_context *context;
+    DWORD src_mask, dst_mask;
+    GLbitfield gl_mask;
 
-    if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface->flags & SFLAG_LOCKED))
+    TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
+            device, src_surface, wine_dbgstr_rect(src_rect),
+            dst_surface, wine_dbgstr_rect(dst_rect));
+
+    src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
+    dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
+
+    if (src_mask != dst_mask)
     {
-        WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
-        return WINEDDERR_SURFACEBUSY;
+        ERR("Incompatible formats %s and %s.\n",
+                debug_d3dformat(src_surface->resource.format->id),
+                debug_d3dformat(dst_surface->resource.format->id));
+        return;
     }
 
-    if (device->inScene && (dst_surface == device->depth_stencil || src_surface == device->depth_stencil))
+    if (!src_mask)
     {
-        WARN("Attempt to access the depth / stencil surface while in a scene.\n");
-        return WINED3DERR_INVALIDCALL;
+        ERR("Not a depth / stencil format: %s.\n",
+                debug_d3dformat(src_surface->resource.format->id));
+        return;
     }
 
-    /* Special cases for RenderTargets */
-    if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
-            || (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
+    gl_mask = 0;
+    if (src_mask & WINED3DFMT_FLAG_DEPTH)
+        gl_mask |= GL_DEPTH_BUFFER_BIT;
+    if (src_mask & WINED3DFMT_FLAG_STENCIL)
+        gl_mask |= GL_STENCIL_BUFFER_BIT;
+
+    /* Make sure the locations are up-to-date. Loading the destination
+     * surface isn't required if the entire surface is overwritten. */
+    surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
+    if (!surface_is_full_rect(dst_surface, dst_rect))
+        surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
+
+    context = context_acquire(device, NULL);
+    if (!context->valid)
     {
+        context_release(context);
+        WARN("Invalid context, skipping blit.\n");
+        return;
+    }
 
-        RECT src_rect, dst_rect;
-        DWORD flags = 0;
+    gl_info = context->gl_info;
 
-        surface_get_rect(src_surface, src_rect_in, &src_rect);
+    ENTER_GL();
 
-        dst_rect.left = dst_x;
-        dst_rect.top = dst_y;
-        dst_rect.right = dst_x + src_rect.right - src_rect.left;
-        dst_rect.bottom = dst_y + src_rect.bottom - src_rect.top;
+    context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
+    glReadBuffer(GL_NONE);
+    checkGLcall("glReadBuffer()");
+    context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
 
-        /* Convert BltFast flags into Blt ones because BltOverride is called
-         * from Blt as well. */
-        if (trans & WINEDDBLTFAST_SRCCOLORKEY)
-            flags |= WINEDDBLT_KEYSRC;
-        if (trans & WINEDDBLTFAST_DESTCOLORKEY)
-            flags |= WINEDDBLT_KEYDEST;
-        if (trans & WINEDDBLTFAST_WAIT)
-            flags |= WINEDDBLT_WAIT;
-        if (trans & WINEDDBLTFAST_DONOTWAIT)
-            flags |= WINEDDBLT_DONOTWAIT;
+    context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
+    context_set_draw_buffer(context, GL_NONE);
+    context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
 
-        if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface,
-                &dst_rect, src_surface, &src_rect, flags, NULL, WINED3DTEXF_POINT)))
-            return WINED3D_OK;
+    if (gl_mask & GL_DEPTH_BUFFER_BIT)
+    {
+        glDepthMask(GL_TRUE);
+        IWineD3DDeviceImpl_MarkStateDirty(device, STATE_RENDER(WINED3DRS_ZWRITEENABLE));
+    }
+    if (gl_mask & GL_STENCIL_BUFFER_BIT)
+    {
+        if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
+        {
+            glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
+            IWineD3DDeviceImpl_MarkStateDirty(device, STATE_RENDER(WINED3DRS_TWOSIDEDSTENCILMODE));
+        }
+        glStencilMask(~0U);
+        IWineD3DDeviceImpl_MarkStateDirty(device, STATE_RENDER(WINED3DRS_STENCILWRITEMASK));
     }
 
-    return surface_cpu_bltfast(dst_surface, dst_x, dst_y, src_surface, src_rect_in, trans);
-}
+    glDisable(GL_SCISSOR_TEST);
+    IWineD3DDeviceImpl_MarkStateDirty(device, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
 
-static HRESULT surface_set_mem(IWineD3DSurfaceImpl *surface, void *mem)
-{
-    TRACE("surface %p, mem %p.\n", surface, mem);
+    gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
+            dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
+    checkGLcall("glBlitFramebuffer()");
 
-    if (mem && mem != surface->resource.allocatedMemory)
-    {
-        void *release = NULL;
+    LEAVE_GL();
 
-        /* Do I have to copy the old surface content? */
-        if (surface->flags & SFLAG_DIBSECTION)
-        {
-            SelectObject(surface->hDC, surface->dib.holdbitmap);
-            DeleteDC(surface->hDC);
-            /* Release the DIB section. */
-            DeleteObject(surface->dib.DIBsection);
-            surface->dib.bitmap_data = NULL;
-            surface->resource.allocatedMemory = NULL;
-            surface->hDC = NULL;
-            surface->flags &= ~SFLAG_DIBSECTION;
-        }
-        else if (!(surface->flags & SFLAG_USERPTR))
-        {
-            release = surface->resource.heapMemory;
-            surface->resource.heapMemory = NULL;
-        }
-        surface->resource.allocatedMemory = mem;
-        surface->flags |= SFLAG_USERPTR;
+    if (wined3d_settings.strict_draw_ordering)
+        wglFlush(); /* Flush to ensure ordering across contexts. */
 
-        /* Now the surface memory is most up do date. Invalidate drawable and texture. */
-        surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
+    context_release(context);
+}
 
-        /* For client textures OpenGL has to be notified. */
-        if (surface->flags & SFLAG_CLIENT)
-            surface_release_client_storage(surface);
+static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
+        const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
+        const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
+{
+    if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
+        return FALSE;
 
-        /* Now free the old memory if any. */
-        HeapFree(GetProcessHeap(), 0, release);
-    }
-    else if (surface->flags & SFLAG_USERPTR)
+    /* Source and/or destination need to be on the GL side */
+    if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
+        return FALSE;
+
+    switch (blit_op)
     {
-        /* Map and GetDC will re-create the dib section and allocated memory. */
-        surface->resource.allocatedMemory = NULL;
-        /* HeapMemory should be NULL already. */
-        if (surface->resource.heapMemory)
-            ERR("User pointer surface has heap memory allocated.\n");
-        surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
+        case WINED3D_BLIT_OP_COLOR_BLIT:
+            if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
+                return FALSE;
+            if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
+                return FALSE;
+            break;
 
-        if (surface->flags & SFLAG_CLIENT)
-            surface_release_client_storage(surface);
+        case WINED3D_BLIT_OP_DEPTH_BLIT:
+            if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
+                return FALSE;
+            if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
+                return FALSE;
+            break;
 
-        surface_prepare_system_memory(surface);
-        surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
+        default:
+            return FALSE;
     }
 
-    return WINED3D_OK;
+    if (!(src_format->id == dst_format->id
+            || (is_identity_fixup(src_format->color_fixup)
+            && is_identity_fixup(dst_format->color_fixup))))
+        return FALSE;
+
+    return TRUE;
 }
 
-/* Context activation is done by the caller. */
-static void surface_remove_pbo(IWineD3DSurfaceImpl *surface, const struct wined3d_gl_info *gl_info)
+static BOOL surface_convert_depth_to_float(IWineD3DSurfaceImpl *surface, DWORD depth, float *float_depth)
 {
-    if (!surface->resource.heapMemory)
+    const struct wined3d_format *format = surface->resource.format;
+
+    switch (format->id)
     {
-        surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
-        surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
-                + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
-    }
+        case WINED3DFMT_S1_UINT_D15_UNORM:
+            *float_depth = depth / (float)0x00007fff;
+            break;
 
-    ENTER_GL();
-    GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
-    checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
-    GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
-            surface->resource.size, surface->resource.allocatedMemory));
-    checkGLcall("glGetBufferSubDataARB");
-    GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
-    checkGLcall("glDeleteBuffersARB");
-    LEAVE_GL();
+        case WINED3DFMT_D16_UNORM:
+            *float_depth = depth / (float)0x0000ffff;
+            break;
 
-    surface->pbo = 0;
-    surface->flags &= ~SFLAG_PBO;
+        case WINED3DFMT_D24_UNORM_S8_UINT:
+        case WINED3DFMT_X8D24_UNORM:
+            *float_depth = depth / (float)0x00ffffff;
+            break;
+
+        case WINED3DFMT_D32_UNORM:
+            *float_depth = depth / (float)0xffffffff;
+            break;
+
+        default:
+            ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
+            return FALSE;
+    }
+
+    return TRUE;
 }
 
 /* Do not call while under the GL lock. */
-static void surface_unload(struct wined3d_resource *resource)
+static HRESULT wined3d_surface_depth_fill(IWineD3DSurfaceImpl *surface, const RECT *rect, float depth)
 {
-    IWineD3DSurfaceImpl *surface = surface_from_resource(resource);
+    const struct wined3d_resource *resource = &surface->resource;
     IWineD3DDeviceImpl *device = resource->device;
-    const struct wined3d_gl_info *gl_info;
-    renderbuffer_entry_t *entry, *entry2;
-    struct wined3d_context *context;
-
-    TRACE("surface %p.\n", surface);
+    const struct blit_shader *blitter;
 
-    if (resource->pool == WINED3DPOOL_DEFAULT)
+    blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
+            NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
+    if (!blitter)
     {
-        /* Default pool resources are supposed to be destroyed before Reset is called.
-         * Implicit resources stay however. So this means we have an implicit render target
-         * or depth stencil. The content may be destroyed, but we still have to tear down
-         * opengl resources, so we cannot leave early.
-         *
+        FIXME("No blitter is capable of performing the requested depth fill operation.\n");
+        return WINED3DERR_INVALIDCALL;
+    }
+
+    return blitter->depth_fill(device, surface, rect, depth);
+}
+
+static HRESULT wined3d_surface_depth_blt(IWineD3DSurfaceImpl *src_surface, const RECT *src_rect,
+        IWineD3DSurfaceImpl *dst_surface, const RECT *dst_rect)
+{
+    IWineD3DDeviceImpl *device = src_surface->resource.device;
+
+    if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
+            src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
+            dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
+        return WINED3DERR_INVALIDCALL;
+
+    wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
+
+    surface_modify_ds_location(dst_surface, SFLAG_DS_OFFSCREEN,
+            dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
+    surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
+
+    return WINED3D_OK;
+}
+
+/* Do not call while under the GL lock. */
+static HRESULT surface_blt(IWineD3DSurfaceImpl *dst_surface, const RECT *dst_rect_in,
+        IWineD3DSurfaceImpl *src_surface, const RECT *src_rect_in, DWORD flags,
+        const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
+{
+    IWineD3DDeviceImpl *device = dst_surface->resource.device;
+    DWORD src_ds_flags, dst_ds_flags;
+
+    TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
+            dst_surface, wine_dbgstr_rect(dst_rect_in), src_surface, wine_dbgstr_rect(src_rect_in),
+            flags, fx, debug_d3dtexturefiltertype(filter));
+    TRACE("Usage is %s.\n", debug_d3dusage(dst_surface->resource.usage));
+
+    if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
+    {
+        WARN(" Surface is busy, returning DDERR_SURFACEBUSY\n");
+        return WINEDDERR_SURFACEBUSY;
+    }
+
+    dst_ds_flags = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
+    if (src_surface)
+        src_ds_flags = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
+    else
+        src_ds_flags = 0;
+
+    if (src_ds_flags || dst_ds_flags)
+    {
+        if (flags & WINEDDBLT_DEPTHFILL)
+        {
+            float depth;
+            RECT rect;
+
+            TRACE("Depth fill.\n");
+
+            surface_get_rect(dst_surface, dst_rect_in, &rect);
+
+            if (!surface_convert_depth_to_float(dst_surface, fx->u5.dwFillDepth, &depth))
+                return WINED3DERR_INVALIDCALL;
+
+            if (SUCCEEDED(wined3d_surface_depth_fill(dst_surface, &rect, depth)))
+                return WINED3D_OK;
+        }
+        else
+        {
+            RECT src_rect, dst_rect;
+
+            /* Accessing depth / stencil surfaces is supposed to fail while in
+             * a scene, except for fills, which seem to work. */
+            if (device->inScene)
+            {
+                WARN("Rejecting depth / stencil access while in scene.\n");
+                return WINED3DERR_INVALIDCALL;
+            }
+
+            if (src_ds_flags != dst_ds_flags)
+            {
+                WARN("Rejecting depth / stencil blit between incompatible formats.\n");
+                return WINED3DERR_INVALIDCALL;
+            }
+
+            if (src_rect_in && (src_rect_in->top || src_rect_in->left
+                    || src_rect_in->bottom != src_surface->resource.height
+                    || src_rect_in->right != src_surface->resource.width))
+            {
+                WARN("Rejecting depth / stencil blit with invalid source rect %s.\n",
+                        wine_dbgstr_rect(src_rect_in));
+                return WINED3DERR_INVALIDCALL;
+            }
+
+            if (dst_rect_in && (dst_rect_in->top || dst_rect_in->left
+                    || dst_rect_in->bottom != dst_surface->resource.height
+                    || dst_rect_in->right != dst_surface->resource.width))
+            {
+                WARN("Rejecting depth / stencil blit with invalid destination rect %s.\n",
+                        wine_dbgstr_rect(src_rect_in));
+                return WINED3DERR_INVALIDCALL;
+            }
+
+            if (src_surface->resource.height != dst_surface->resource.height
+                    || src_surface->resource.width != dst_surface->resource.width)
+            {
+                WARN("Rejecting depth / stencil blit with mismatched surface sizes.\n");
+                return WINED3DERR_INVALIDCALL;
+            }
+
+            surface_get_rect(src_surface, src_rect_in, &src_rect);
+            surface_get_rect(dst_surface, dst_rect_in, &dst_rect);
+
+            if (SUCCEEDED(wined3d_surface_depth_blt(src_surface, &src_rect, dst_surface, &dst_rect)))
+                return WINED3D_OK;
+        }
+    }
+
+    /* Special cases for render targets. */
+    if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
+            || (src_surface && (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)))
+    {
+        if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface, dst_rect_in,
+                src_surface, src_rect_in, flags, fx, filter)))
+            return WINED3D_OK;
+    }
+
+    /* For the rest call the X11 surface implementation. For render targets
+     * this should be implemented OpenGL accelerated in BltOverride, other
+     * blits are rather rare. */
+    return surface_cpu_blt(dst_surface, dst_rect_in, src_surface, src_rect_in, flags, fx, filter);
+}
+
+/* Do not call while under the GL lock. */
+static HRESULT surface_bltfast(IWineD3DSurfaceImpl *dst_surface, DWORD dst_x, DWORD dst_y,
+        IWineD3DSurfaceImpl *src_surface, const RECT *src_rect_in, DWORD trans)
+{
+    IWineD3DDeviceImpl *device = dst_surface->resource.device;
+
+    TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect %s, flags %#x.\n",
+            dst_surface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect_in), trans);
+
+    if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface->flags & SFLAG_LOCKED))
+    {
+        WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY.\n");
+        return WINEDDERR_SURFACEBUSY;
+    }
+
+    if (device->inScene && (dst_surface == device->depth_stencil || src_surface == device->depth_stencil))
+    {
+        WARN("Attempt to access the depth / stencil surface while in a scene.\n");
+        return WINED3DERR_INVALIDCALL;
+    }
+
+    /* Special cases for RenderTargets */
+    if ((dst_surface->resource.usage & WINED3DUSAGE_RENDERTARGET)
+            || (src_surface->resource.usage & WINED3DUSAGE_RENDERTARGET))
+    {
+
+        RECT src_rect, dst_rect;
+        DWORD flags = 0;
+
+        surface_get_rect(src_surface, src_rect_in, &src_rect);
+
+        dst_rect.left = dst_x;
+        dst_rect.top = dst_y;
+        dst_rect.right = dst_x + src_rect.right - src_rect.left;
+        dst_rect.bottom = dst_y + src_rect.bottom - src_rect.top;
+
+        /* Convert BltFast flags into Blt ones because BltOverride is called
+         * from Blt as well. */
+        if (trans & WINEDDBLTFAST_SRCCOLORKEY)
+            flags |= WINEDDBLT_KEYSRC;
+        if (trans & WINEDDBLTFAST_DESTCOLORKEY)
+            flags |= WINEDDBLT_KEYDEST;
+        if (trans & WINEDDBLTFAST_WAIT)
+            flags |= WINEDDBLT_WAIT;
+        if (trans & WINEDDBLTFAST_DONOTWAIT)
+            flags |= WINEDDBLT_DONOTWAIT;
+
+        if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(dst_surface,
+                &dst_rect, src_surface, &src_rect, flags, NULL, WINED3DTEXF_POINT)))
+            return WINED3D_OK;
+    }
+
+    return surface_cpu_bltfast(dst_surface, dst_x, dst_y, src_surface, src_rect_in, trans);
+}
+
+static HRESULT surface_set_mem(IWineD3DSurfaceImpl *surface, void *mem)
+{
+    TRACE("surface %p, mem %p.\n", surface, mem);
+
+    if (mem && mem != surface->resource.allocatedMemory)
+    {
+        void *release = NULL;
+
+        /* Do I have to copy the old surface content? */
+        if (surface->flags & SFLAG_DIBSECTION)
+        {
+            SelectObject(surface->hDC, surface->dib.holdbitmap);
+            DeleteDC(surface->hDC);
+            /* Release the DIB section. */
+            DeleteObject(surface->dib.DIBsection);
+            surface->dib.bitmap_data = NULL;
+            surface->resource.allocatedMemory = NULL;
+            surface->hDC = NULL;
+            surface->flags &= ~SFLAG_DIBSECTION;
+        }
+        else if (!(surface->flags & SFLAG_USERPTR))
+        {
+            release = surface->resource.heapMemory;
+            surface->resource.heapMemory = NULL;
+        }
+        surface->resource.allocatedMemory = mem;
+        surface->flags |= SFLAG_USERPTR;
+
+        /* Now the surface memory is most up do date. Invalidate drawable and texture. */
+        surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
+
+        /* For client textures OpenGL has to be notified. */
+        if (surface->flags & SFLAG_CLIENT)
+            surface_release_client_storage(surface);
+
+        /* Now free the old memory if any. */
+        HeapFree(GetProcessHeap(), 0, release);
+    }
+    else if (surface->flags & SFLAG_USERPTR)
+    {
+        /* Map and GetDC will re-create the dib section and allocated memory. */
+        surface->resource.allocatedMemory = NULL;
+        /* HeapMemory should be NULL already. */
+        if (surface->resource.heapMemory)
+            ERR("User pointer surface has heap memory allocated.\n");
+        surface->flags &= ~(SFLAG_USERPTR | SFLAG_INSYSMEM);
+
+        if (surface->flags & SFLAG_CLIENT)
+            surface_release_client_storage(surface);
+
+        surface_prepare_system_memory(surface);
+        surface_modify_location(surface, SFLAG_INSYSMEM, TRUE);
+    }
+
+    return WINED3D_OK;
+}
+
+/* Context activation is done by the caller. */
+static void surface_remove_pbo(IWineD3DSurfaceImpl *surface, const struct wined3d_gl_info *gl_info)
+{
+    if (!surface->resource.heapMemory)
+    {
+        surface->resource.heapMemory = HeapAlloc(GetProcessHeap(), 0, surface->resource.size + RESOURCE_ALIGNMENT);
+        surface->resource.allocatedMemory = (BYTE *)(((ULONG_PTR)surface->resource.heapMemory
+                + (RESOURCE_ALIGNMENT - 1)) & ~(RESOURCE_ALIGNMENT - 1));
+    }
+
+    ENTER_GL();
+    GL_EXTCALL(glBindBufferARB(GL_PIXEL_UNPACK_BUFFER_ARB, surface->pbo));
+    checkGLcall("glBindBufferARB(GL_PIXEL_UNPACK_BUFFER, surface->pbo)");
+    GL_EXTCALL(glGetBufferSubDataARB(GL_PIXEL_UNPACK_BUFFER_ARB, 0,
+            surface->resource.size, surface->resource.allocatedMemory));
+    checkGLcall("glGetBufferSubDataARB");
+    GL_EXTCALL(glDeleteBuffersARB(1, &surface->pbo));
+    checkGLcall("glDeleteBuffersARB");
+    LEAVE_GL();
+
+    surface->pbo = 0;
+    surface->flags &= ~SFLAG_PBO;
+}
+
+/* Do not call while under the GL lock. */
+static void surface_unload(struct wined3d_resource *resource)
+{
+    IWineD3DSurfaceImpl *surface = surface_from_resource(resource);
+    IWineD3DDeviceImpl *device = resource->device;
+    const struct wined3d_gl_info *gl_info;
+    renderbuffer_entry_t *entry, *entry2;
+    struct wined3d_context *context;
+
+    TRACE("surface %p.\n", surface);
+
+    if (resource->pool == WINED3DPOOL_DEFAULT)
+    {
+        /* Default pool resources are supposed to be destroyed before Reset is called.
+         * Implicit resources stay however. So this means we have an implicit render target
+         * or depth stencil. The content may be destroyed, but we still have to tear down
+         * opengl resources, so we cannot leave early.
+         *
          * Put the surfaces into sysmem, and reset the content. The D3D content is undefined,
          * but we can't set the sysmem INDRAWABLE because when we're rendering the swapchain
          * or the depth stencil into an FBO the texture or render buffer will be removed
@@ -1294,6 +1615,7 @@ static const struct wined3d_surface_ops surface_ops =
     surface_unmap,
     surface_getdc,
     surface_flip,
+    surface_blt,
     surface_bltfast,
     surface_set_mem,
 };
@@ -1478,7 +1800,18 @@ static HRESULT gdi_surface_flip(IWineD3DSurfaceImpl *surface, IWineD3DSurfaceImp
     return WINED3D_OK;
 }
 
-static HRESULT gdi_surface_bltfast(IWineD3DSurfaceImpl *dst_surface, DWORD dst_x, DWORD dst_y,
+static HRESULT gdi_surface_blt(IWineD3DSurfaceImpl *dst_surface, const RECT *dst_rect,
+        IWineD3DSurfaceImpl *src_surface, const RECT *src_rect, DWORD flags,
+        const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
+{
+    TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
+            dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
+            flags, fx, debug_d3dtexturefiltertype(filter));
+
+    return surface_cpu_blt(dst_surface, dst_rect, src_surface, src_rect, flags, fx, filter);
+}
+
+static HRESULT gdi_surface_bltfast(IWineD3DSurfaceImpl *dst_surface, DWORD dst_x, DWORD dst_y,
         IWineD3DSurfaceImpl *src_surface, const RECT *src_rect, DWORD trans)
 {
     TRACE("dst_surface %p, dst_x %u, dst_y %u, src_surface %p, src_rect %s, flags %#x.\n",
@@ -1545,6 +1878,7 @@ static const struct wined3d_surface_ops gdi_surface_ops =
     gdi_surface_unmap,
     gdi_surface_getdc,
     gdi_surface_flip,
+    gdi_surface_blt,
     gdi_surface_bltfast,
     gdi_surface_set_mem,
 };
@@ -2163,37 +2497,6 @@ static BOOL surface_convert_color_to_float(IWineD3DSurfaceImpl *surface, DWORD c
     return TRUE;
 }
 
-static BOOL surface_convert_depth_to_float(IWineD3DSurfaceImpl *surface, DWORD depth, float *float_depth)
-{
-    const struct wined3d_format *format = surface->resource.format;
-
-    switch (format->id)
-    {
-        case WINED3DFMT_S1_UINT_D15_UNORM:
-            *float_depth = depth / (float)0x00007fff;
-            break;
-
-        case WINED3DFMT_D16_UNORM:
-            *float_depth = depth / (float)0x0000ffff;
-            break;
-
-        case WINED3DFMT_D24_UNORM_S8_UINT:
-        case WINED3DFMT_X8D24_UNORM:
-            *float_depth = depth / (float)0x00ffffff;
-            break;
-
-        case WINED3DFMT_D32_UNORM:
-            *float_depth = depth / (float)0xffffffff;
-            break;
-
-        default:
-            ERR("Unhandled conversion from %s to floating point.\n", debug_d3dformat(format->id));
-            return FALSE;
-    }
-
-    return TRUE;
-}
-
 HRESULT surface_load(IWineD3DSurfaceImpl *surface, BOOL srgb)
 {
     DWORD flag = srgb ? SFLAG_INSRGBTEX : SFLAG_INTEXTURE;
@@ -3146,924 +3449,298 @@ do { \
     return WINED3D_OK;
 }
 
-/*****************************************************************************
- * IWineD3DSurface::Blt, SW emulation version
- *
- * Performs a blit to a surface, with or without a source surface.
- * This is the main functionality of DirectDraw
- *****************************************************************************/
-static HRESULT WINAPI IWineD3DBaseSurfaceImpl_Blt(IWineD3DSurface *iface,
-        const RECT *dst_rect, IWineD3DSurface *src_surface, const RECT *src_rect,
-        DWORD flags, const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
+/* Do not call while under the GL lock. */
+static HRESULT WINAPI IWineD3DBaseSurfaceImpl_Blt(IWineD3DSurface *iface, const RECT *dst_rect,
+        IWineD3DSurface *src_surface, const RECT *src_rect, DWORD flags,
+        const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
 {
     IWineD3DSurfaceImpl *dst_surface = (IWineD3DSurfaceImpl *)iface;
-    IWineD3DSurfaceImpl *src = (IWineD3DSurfaceImpl *)src_surface;
-    int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
-    const struct wined3d_format *src_format, *dst_format;
-    WINED3DLOCKED_RECT dlock, slock;
-    HRESULT ret = WINED3D_OK;
-    const BYTE *sbuf;
-    RECT xdst,xsrc;
-    BYTE *dbuf;
-    int x, y;
 
     TRACE("iface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
             iface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
             flags, fx, debug_d3dtexturefiltertype(filter));
 
-    if ((dst_surface->flags & SFLAG_LOCKED) || (src && (src->flags & SFLAG_LOCKED)))
-    {
-        WARN(" Surface is busy, returning DDERR_SURFACEBUSY\n");
-        return WINEDDERR_SURFACEBUSY;
-    }
-
-    /* First check for the validity of source / destination rectangles.
-     * This was verified using a test application and by MSDN. */
-    if (src_rect)
-    {
-        if (src)
-        {
-            if (src_rect->right < src_rect->left || src_rect->bottom < src_rect->top
-                    || src_rect->left > src->resource.width || src_rect->left < 0
-                    || src_rect->top > src->resource.height || src_rect->top < 0
-                    || src_rect->right > src->resource.width || src_rect->right < 0
-                    || src_rect->bottom > src->resource.height || src_rect->bottom < 0)
-            {
-                WARN("Application gave us bad source rectangle for Blt.\n");
-                return WINEDDERR_INVALIDRECT;
-            }
+    return dst_surface->surface_ops->surface_blt(dst_surface, dst_rect,
+            (IWineD3DSurfaceImpl *)src_surface, src_rect, flags, fx, filter);
+}
 
-            if (!src_rect->right || !src_rect->bottom
-                    || src_rect->left == (int)src->resource.width
-                    || src_rect->top == (int)src->resource.height)
-            {
-                TRACE("Nothing to be done.\n");
-                return WINED3D_OK;
-            }
-        }
+/* Do not call while under the GL lock. */
+static HRESULT WINAPI IWineD3DBaseSurfaceImpl_BltFast(IWineD3DSurface *iface,
+        DWORD dst_x, DWORD dst_y, IWineD3DSurface *src_surface, const RECT *src_rect, DWORD trans)
+{
+    IWineD3DSurfaceImpl *dst_surface = (IWineD3DSurfaceImpl *)iface;
 
-        xsrc = *src_rect;
-    }
-    else if (src)
-    {
-        xsrc.left = 0;
-        xsrc.top = 0;
-        xsrc.right = src->resource.width;
-        xsrc.bottom = src->resource.height;
-    }
-    else
-    {
-        memset(&xsrc, 0, sizeof(xsrc));
-    }
+    TRACE("iface %p, dst_x %u, dst_y %u, src_surface %p, src_rect %s, trans %#x.\n",
+            iface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect), trans);
 
-    if (dst_rect)
-    {
-        /* For the Destination rect, it can be out of bounds on the condition
-         * that a clipper is set for the given surface. */
-        if (!dst_surface->clipper && (dst_rect->right < dst_rect->left || dst_rect->bottom < dst_rect->top
-                || dst_rect->left > dst_surface->resource.width || dst_rect->left < 0
-                || dst_rect->top > dst_surface->resource.height || dst_rect->top < 0
-                || dst_rect->right > dst_surface->resource.width || dst_rect->right < 0
-                || dst_rect->bottom > dst_surface->resource.height || dst_rect->bottom < 0))
-        {
-            WARN("Application gave us bad destination rectangle for Blt without a clipper set.\n");
-            return WINEDDERR_INVALIDRECT;
-        }
+    return dst_surface->surface_ops->surface_bltfast(dst_surface, dst_x, dst_y,
+            (IWineD3DSurfaceImpl *)src_surface, src_rect, trans);
+}
 
-        if (dst_rect->right <= 0 || dst_rect->bottom <= 0
-                || dst_rect->left >= (int)dst_surface->resource.width
-                || dst_rect->top >= (int)dst_surface->resource.height)
-        {
-            TRACE("Nothing to be done.\n");
-            return WINED3D_OK;
-        }
+static HRESULT WINAPI IWineD3DBaseSurfaceImpl_Unmap(IWineD3DSurface *iface)
+{
+    IWineD3DSurfaceImpl *surface = (IWineD3DSurfaceImpl *)iface;
 
-        if (!src)
-        {
-            RECT full_rect;
+    TRACE("iface %p.\n", iface);
 
-            full_rect.left = 0;
-            full_rect.top = 0;
-            full_rect.right = dst_surface->resource.width;
-            full_rect.bottom = dst_surface->resource.height;
-            IntersectRect(&xdst, &full_rect, dst_rect);
-        }
-        else
-        {
-            BOOL clip_horiz, clip_vert;
+    if (!(surface->flags & SFLAG_LOCKED))
+    {
+        WARN("Trying to unmap unmapped surface.\n");
+        return WINEDDERR_NOTLOCKED;
+    }
+    surface->flags &= ~SFLAG_LOCKED;
 
-            xdst = *dst_rect;
-            clip_horiz = xdst.left < 0 || xdst.right > (int)dst_surface->resource.width;
-            clip_vert = xdst.top < 0 || xdst.bottom > (int)dst_surface->resource.height;
+    surface->surface_ops->surface_unmap(surface);
 
-            if (clip_vert || clip_horiz)
-            {
-                /* Now check if this is a special case or not... */
-                if ((flags & WINEDDBLT_DDFX)
-                        || (clip_horiz && xdst.right - xdst.left != xsrc.right - xsrc.left)
-                        || (clip_vert && xdst.bottom - xdst.top != xsrc.bottom - xsrc.top))
-                {
-                    WARN("Out of screen rectangle in special case. Not handled right now.\n");
-                    return WINED3D_OK;
-                }
+    return WINED3D_OK;
+}
 
-                if (clip_horiz)
-                {
-                    if (xdst.left < 0)
-                    {
-                        xsrc.left -= xdst.left;
-                        xdst.left = 0;
-                    }
-                    if (xdst.right > dst_surface->resource.width)
-                    {
-                        xsrc.right -= (xdst.right - (int)dst_surface->resource.width);
-                        xdst.right = (int)dst_surface->resource.width;
-                    }
-                }
+static HRESULT WINAPI IWineD3DBaseSurfaceImpl_Map(IWineD3DSurface *iface,
+        WINED3DLOCKED_RECT *locked_rect, const RECT *rect, DWORD flags)
+{
+    IWineD3DSurfaceImpl *surface = (IWineD3DSurfaceImpl *)iface;
 
-                if (clip_vert)
-                {
-                    if (xdst.top < 0)
-                    {
-                        xsrc.top -= xdst.top;
-                        xdst.top = 0;
-                    }
-                    if (xdst.bottom > dst_surface->resource.height)
-                    {
-                        xsrc.bottom -= (xdst.bottom - (int)dst_surface->resource.height);
-                        xdst.bottom = (int)dst_surface->resource.height;
-                    }
-                }
+    TRACE("iface %p, locked_rect %p, rect %s, flags %#x.\n",
+            iface, locked_rect, wine_dbgstr_rect(rect), flags);
 
-                /* And check if after clipping something is still to be done... */
-                if ((xdst.right <= 0) || (xdst.bottom <= 0)
-                        || (xdst.left >= (int)dst_surface->resource.width)
-                        || (xdst.top >= (int)dst_surface->resource.height)
-                        || (xsrc.right <= 0) || (xsrc.bottom <= 0)
-                        || (xsrc.left >= (int)src->resource.width)
-                        || (xsrc.top >= (int)src->resource.height))
-                {
-                    TRACE("Nothing to be done after clipping.\n");
-                    return WINED3D_OK;
-                }
-            }
-        }
-    }
-    else
+    if (surface->flags & SFLAG_LOCKED)
     {
-        xdst.left = 0;
-        xdst.top = 0;
-        xdst.right = dst_surface->resource.width;
-        xdst.bottom = dst_surface->resource.height;
+        WARN("Surface is already mapped.\n");
+        return WINED3DERR_INVALIDCALL;
     }
+    surface->flags |= SFLAG_LOCKED;
 
-    if (src == dst_surface)
+    if (!(surface->flags & SFLAG_LOCKABLE))
+        WARN("Trying to lock unlockable surface.\n");
+
+    surface->surface_ops->surface_map(surface, rect, flags);
+
+    locked_rect->Pitch = IWineD3DSurface_GetPitch(iface);
+
+    if (!rect)
     {
-        IWineD3DSurface_Map(iface, &dlock, NULL, 0);
-        slock = dlock;
-        src_format = dst_surface->resource.format;
-        dst_format = src_format;
+        locked_rect->pBits = surface->resource.allocatedMemory;
+        surface->lockedRect.left = 0;
+        surface->lockedRect.top = 0;
+        surface->lockedRect.right = surface->resource.width;
+        surface->lockedRect.bottom = surface->resource.height;
     }
     else
     {
-        dst_format = dst_surface->resource.format;
-        if (src)
+        const struct wined3d_format *format = surface->resource.format;
+
+        if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
         {
-            if (dst_surface->resource.format->id != src->resource.format->id)
-            {
-                src = surface_convert_format(src, dst_format->id);
-                if (!src)
-                {
-                    /* The conv function writes a FIXME */
-                    WARN("Cannot convert source surface format to dest format\n");
-                    goto release;
-                }
-            }
-            IWineD3DSurface_Map((IWineD3DSurface *)src, &slock, NULL, WINED3DLOCK_READONLY);
-            src_format = src->resource.format;
+            /* Compressed textures are block based, so calculate the offset of
+             * the block that contains the top-left pixel of the locked rectangle. */
+            locked_rect->pBits = surface->resource.allocatedMemory
+                    + ((rect->top / format->block_height) * locked_rect->Pitch)
+                    + ((rect->left / format->block_width) * format->block_byte_count);
         }
         else
         {
-            src_format = dst_format;
+            locked_rect->pBits = surface->resource.allocatedMemory
+                    + (locked_rect->Pitch * rect->top)
+                    + (rect->left * format->byte_count);
         }
-        if (dst_rect)
-            IWineD3DSurface_Map(iface, &dlock, &xdst, 0);
-        else
-            IWineD3DSurface_Map(iface, &dlock, NULL, 0);
+        surface->lockedRect.left = rect->left;
+        surface->lockedRect.top = rect->top;
+        surface->lockedRect.right = rect->right;
+        surface->lockedRect.bottom = rect->bottom;
     }
 
-    if (!fx || !(fx->dwDDFX)) flags &= ~WINEDDBLT_DDFX;
+    TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
+    TRACE("Returning memory %p, pitch %u.\n", locked_rect->pBits, locked_rect->Pitch);
 
-    if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_FOURCC)
-    {
-        if (!dst_rect || src == dst_surface)
-        {
-            memcpy(dlock.pBits, slock.pBits, dst_surface->resource.size);
-            goto release;
-        }
-    }
+    return WINED3D_OK;
+}
 
-    bpp = dst_surface->resource.format->byte_count;
-    srcheight = xsrc.bottom - xsrc.top;
-    srcwidth = xsrc.right - xsrc.left;
-    dstheight = xdst.bottom - xdst.top;
-    dstwidth = xdst.right - xdst.left;
-    width = (xdst.right - xdst.left) * bpp;
+static HRESULT WINAPI IWineD3DBaseSurfaceImpl_GetDC(IWineD3DSurface *iface, HDC *dc)
+{
+    IWineD3DSurfaceImpl *surface = (IWineD3DSurfaceImpl *)iface;
+    HRESULT hr;
 
-    if (dst_rect && src != dst_surface)
-        dbuf = dlock.pBits;
-    else
-        dbuf = (BYTE*)dlock.pBits+(xdst.top*dlock.Pitch)+(xdst.left*bpp);
+    TRACE("iface %p, dc %p.\n", iface, dc);
 
-    if (flags & WINEDDBLT_WAIT)
-    {
-        flags &= ~WINEDDBLT_WAIT;
-    }
-    if (flags & WINEDDBLT_ASYNC)
+    if (surface->flags & SFLAG_USERPTR)
     {
-        static BOOL displayed = FALSE;
-        if (!displayed)
-            FIXME("Can't handle WINEDDBLT_ASYNC flag right now.\n");
-        displayed = TRUE;
-        flags &= ~WINEDDBLT_ASYNC;
+        ERR("Not supported on surfaces with application-provided memory.\n");
+        return WINEDDERR_NODC;
     }
-    if (flags & WINEDDBLT_DONOTWAIT)
+
+    /* Give more detailed info for ddraw. */
+    if (surface->flags & SFLAG_DCINUSE)
+        return WINEDDERR_DCALREADYCREATED;
+
+    /* Can't GetDC if the surface is locked. */
+    if (surface->flags & SFLAG_LOCKED)
+        return WINED3DERR_INVALIDCALL;
+
+    hr = surface->surface_ops->surface_getdc(surface);
+    if (FAILED(hr))
+        return hr;
+
+    if (surface->resource.format->id == WINED3DFMT_P8_UINT
+            || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
     {
-        /* WINEDDBLT_DONOTWAIT appeared in DX7 */
-        static BOOL displayed = FALSE;
-        if (!displayed)
-            FIXME("Can't handle WINEDDBLT_DONOTWAIT flag right now.\n");
-        displayed = TRUE;
-        flags &= ~WINEDDBLT_DONOTWAIT;
+        /* GetDC on palettized formats is unsupported in D3D9, and the method
+         * is missing in D3D8, so this should only be used for DX <=7
+         * surfaces (with non-device palettes). */
+        const PALETTEENTRY *pal = NULL;
+
+        if (surface->palette)
+        {
+            pal = surface->palette->palents;
+        }
+        else
+        {
+            struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
+            IWineD3DSurfaceImpl *dds_primary = swapchain->front_buffer;
+
+            if (dds_primary && dds_primary->palette)
+                pal = dds_primary->palette->palents;
+        }
+
+        if (pal)
+        {
+            RGBQUAD col[256];
+            unsigned int i;
+
+            for (i = 0; i < 256; ++i)
+            {
+                col[i].rgbRed = pal[i].peRed;
+                col[i].rgbGreen = pal[i].peGreen;
+                col[i].rgbBlue = pal[i].peBlue;
+                col[i].rgbReserved = 0;
+            }
+            SetDIBColorTable(surface->hDC, 0, 256, col);
+        }
     }
 
-    /* First, all the 'source-less' blits */
-    if (flags & WINEDDBLT_COLORFILL)
+    surface->flags |= SFLAG_DCINUSE;
+
+    *dc = surface->hDC;
+    TRACE("Returning dc %p.\n", *dc);
+
+    return WINED3D_OK;
+}
+
+static HRESULT WINAPI IWineD3DBaseSurfaceImpl_ReleaseDC(IWineD3DSurface *iface, HDC dc)
+{
+    IWineD3DSurfaceImpl *surface = (IWineD3DSurfaceImpl *)iface;
+
+    TRACE("iface %p, dc %p.\n", iface, dc);
+
+    if (!(surface->flags & SFLAG_DCINUSE))
+        return WINEDDERR_NODC;
+
+    if (surface->hDC != dc)
     {
-        ret = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dlock.Pitch, fx->u5.dwFillColor);
-        flags &= ~WINEDDBLT_COLORFILL;
+        WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
+                dc, surface->hDC);
+        return WINEDDERR_NODC;
     }
 
-    if (flags & WINEDDBLT_DEPTHFILL)
+    if ((surface->flags & SFLAG_PBO) && surface->resource.allocatedMemory)
     {
-        FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
+        /* Copy the contents of the DIB over to the PBO. */
+        memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->dib.bitmap_size);
     }
-    if (flags & WINEDDBLT_ROP)
+
+    /* We locked first, so unlock now. */
+    IWineD3DSurface_Unmap(iface);
+
+    surface->flags &= ~SFLAG_DCINUSE;
+
+    return WINED3D_OK;
+}
+
+static HRESULT WINAPI IWineD3DBaseSurfaceImpl_Flip(IWineD3DSurface *iface, IWineD3DSurface *override, DWORD flags)
+{
+    IWineD3DSurfaceImpl *surface = (IWineD3DSurfaceImpl *)iface;
+    struct wined3d_swapchain *swapchain;
+    HRESULT hr;
+
+    TRACE("iface %p, override %p, flags %#x.\n", iface, override, flags);
+
+    if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
     {
-        /* Catch some degenerate cases here. */
-        switch (fx->dwROP)
-        {
-            case BLACKNESS:
-                ret = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,0);
-                break;
-            case 0xAA0029: /* No-op */
-                break;
-            case WHITENESS:
-                ret = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,~0);
-                break;
-            case SRCCOPY: /* Well, we do that below? */
-                break;
-            default:
-                FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
-                goto error;
-        }
-        flags &= ~WINEDDBLT_ROP;
+        ERR("Flipped surface is not on a swapchain.\n");
+        return WINEDDERR_NOTFLIPPABLE;
     }
-    if (flags & WINEDDBLT_DDROPS)
+    swapchain = surface->container.u.swapchain;
+
+    hr = surface->surface_ops->surface_flip(surface, (IWineD3DSurfaceImpl *)override);
+    if (FAILED(hr))
+        return hr;
+
+    /* Just overwrite the swapchain presentation interval. This is ok because
+     * only ddraw apps can call Flip, and only d3d8 and d3d9 applications
+     * specify the presentation interval. */
+    if (!(flags & (WINEDDFLIP_NOVSYNC | WINEDDFLIP_INTERVAL2 | WINEDDFLIP_INTERVAL3 | WINEDDFLIP_INTERVAL4)))
+        swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_ONE;
+    else if (flags & WINEDDFLIP_NOVSYNC)
+        swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_IMMEDIATE;
+    else if (flags & WINEDDFLIP_INTERVAL2)
+        swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_TWO;
+    else if (flags & WINEDDFLIP_INTERVAL3)
+        swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_THREE;
+    else
+        swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_FOUR;
+
+    return wined3d_swapchain_present(swapchain, NULL, NULL, swapchain->win_handle, NULL, 0);
+}
+
+/* ****************************************************
+   IWineD3DSurface IWineD3DResource parts follow
+   **************************************************** */
+
+/* Do not call while under the GL lock. */
+void surface_internal_preload(IWineD3DSurfaceImpl *surface, enum WINED3DSRGB srgb)
+{
+    IWineD3DDeviceImpl *device = surface->resource.device;
+
+    TRACE("iface %p, srgb %#x.\n", surface, srgb);
+
+    if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
     {
-        FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
+        struct wined3d_texture *texture = surface->container.u.texture;
+
+        TRACE("Passing to container (%p).\n", texture);
+        texture->texture_ops->texture_preload(texture, srgb);
     }
-    /* Now the 'with source' blits. */
-    if (src)
+    else
     {
-        const BYTE *sbase;
-        int sx, xinc, sy, yinc;
+        struct wined3d_context *context = NULL;
 
-        if (!dstwidth || !dstheight) /* Hmm... stupid program? */
-            goto release;
+        TRACE("(%p) : About to load surface\n", surface);
 
-        if (filter != WINED3DTEXF_NONE && filter != WINED3DTEXF_POINT
-                && (srcwidth != dstwidth || srcheight != dstheight))
+        if (!device->isInDraw) context = context_acquire(device, NULL);
+
+        if (surface->resource.format->id == WINED3DFMT_P8_UINT
+                || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
         {
-            /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
-            FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
+            if (palette9_changed(surface))
+            {
+                TRACE("Reloading surface because the d3d8/9 palette was changed\n");
+                /* TODO: This is not necessarily needed with hw palettized texture support */
+                surface_load_location(surface, SFLAG_INSYSMEM, NULL);
+                /* Make sure the texture is reloaded because of the palette change, this kills performance though :( */
+                surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
+            }
         }
 
-        sbase = (BYTE*)slock.pBits+(xsrc.top*slock.Pitch)+xsrc.left*bpp;
-        xinc = (srcwidth << 16) / dstwidth;
-        yinc = (srcheight << 16) / dstheight;
+        surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
 
-        if (!flags)
+        if (surface->resource.pool == WINED3DPOOL_DEFAULT)
         {
-            /* No effects, we can cheat here. */
-            if (dstwidth == srcwidth)
-            {
-                if (dstheight == srcheight)
-                {
-                    /* No stretching in either direction. This needs to be as
-                     * fast as possible. */
-                    sbuf = sbase;
-
-                    /* Check for overlapping surfaces. */
-                    if (src != dst_surface || xdst.top < xsrc.top
-                            || xdst.right <= xsrc.left || xsrc.right <= xdst.left)
-                    {
-                        /* No overlap, or dst above src, so copy from top downwards. */
-                        for (y = 0; y < dstheight; ++y)
-                        {
-                            memcpy(dbuf, sbuf, width);
-                            sbuf += slock.Pitch;
-                            dbuf += dlock.Pitch;
-                        }
-                    }
-                    else if (xdst.top > xsrc.top)
-                    {
-                        /* Copy from bottom upwards. */
-                        sbuf += (slock.Pitch*dstheight);
-                        dbuf += (dlock.Pitch*dstheight);
-                        for (y = 0; y < dstheight; ++y)
-                        {
-                            sbuf -= slock.Pitch;
-                            dbuf -= dlock.Pitch;
-                            memcpy(dbuf, sbuf, width);
-                        }
-                    }
-                    else
-                    {
-                        /* Src and dst overlapping on the same line, use memmove. */
-                        for (y = 0; y < dstheight; ++y)
-                        {
-                            memmove(dbuf, sbuf, width);
-                            sbuf += slock.Pitch;
-                            dbuf += dlock.Pitch;
-                        }
-                    }
-                }
-                else
-                {
-                    /* Stretching in y direction only. */
-                    for (y = sy = 0; y < dstheight; ++y, sy += yinc)
-                    {
-                        sbuf = sbase + (sy >> 16) * slock.Pitch;
-                        memcpy(dbuf, sbuf, width);
-                        dbuf += dlock.Pitch;
-                    }
-                }
-            }
-            else
-            {
-                /* Stretching in X direction. */
-                int last_sy = -1;
-                for (y = sy = 0; y < dstheight; ++y, sy += yinc)
-                {
-                    sbuf = sbase + (sy >> 16) * slock.Pitch;
-
-                    if ((sy >> 16) == (last_sy >> 16))
-                    {
-                        /* This source row is the same as last source row -
-                         * Copy the already stretched row. */
-                        memcpy(dbuf, dbuf - dlock.Pitch, width);
-                    }
-                    else
-                    {
-#define STRETCH_ROW(type) \
-do { \
-    const type *s = (const type *)sbuf; \
-    type *d = (type *)dbuf; \
-    for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
-        d[x] = s[sx >> 16]; \
-} while(0)
-
-                        switch(bpp)
-                        {
-                            case 1:
-                                STRETCH_ROW(BYTE);
-                                break;
-                            case 2:
-                                STRETCH_ROW(WORD);
-                                break;
-                            case 4:
-                                STRETCH_ROW(DWORD);
-                                break;
-                            case 3:
-                            {
-                                const BYTE *s;
-                                BYTE *d = dbuf;
-                                for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
-                                {
-                                    DWORD pixel;
-
-                                    s = sbuf + 3 * (sx >> 16);
-                                    pixel = s[0] | (s[1] << 8) | (s[2] << 16);
-                                    d[0] = (pixel      ) & 0xff;
-                                    d[1] = (pixel >>  8) & 0xff;
-                                    d[2] = (pixel >> 16) & 0xff;
-                                    d += 3;
-                                }
-                                break;
-                            }
-                            default:
-                                FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
-                                ret = WINED3DERR_NOTAVAILABLE;
-                                goto error;
-                        }
-#undef STRETCH_ROW
-                    }
-                    dbuf += dlock.Pitch;
-                    last_sy = sy;
-                }
-            }
-        }
-        else
-        {
-            LONG dstyinc = dlock.Pitch, dstxinc = bpp;
-            DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
-            DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
-            if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
-            {
-                /* The color keying flags are checked for correctness in ddraw */
-                if (flags & WINEDDBLT_KEYSRC)
-                {
-                    keylow  = src->SrcBltCKey.dwColorSpaceLowValue;
-                    keyhigh = src->SrcBltCKey.dwColorSpaceHighValue;
-                }
-                else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
-                {
-                    keylow = fx->ddckSrcColorkey.dwColorSpaceLowValue;
-                    keyhigh = fx->ddckSrcColorkey.dwColorSpaceHighValue;
-                }
-
-                if (flags & WINEDDBLT_KEYDEST)
-                {
-                    /* Destination color keys are taken from the source surface! */
-                    destkeylow = src->DestBltCKey.dwColorSpaceLowValue;
-                    destkeyhigh = src->DestBltCKey.dwColorSpaceHighValue;
-                }
-                else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
-                {
-                    destkeylow = fx->ddckDestColorkey.dwColorSpaceLowValue;
-                    destkeyhigh = fx->ddckDestColorkey.dwColorSpaceHighValue;
-                }
-
-                if (bpp == 1)
-                {
-                    keymask = 0xff;
-                }
-                else
-                {
-                    keymask = src_format->red_mask
-                            | src_format->green_mask
-                            | src_format->blue_mask;
-                }
-                flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
-            }
-
-            if (flags & WINEDDBLT_DDFX)
-            {
-                BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
-                LONG tmpxy;
-                dTopLeft     = dbuf;
-                dTopRight    = dbuf + ((dstwidth - 1) * bpp);
-                dBottomLeft  = dTopLeft + ((dstheight - 1) * dlock.Pitch);
-                dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
-
-                if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
-                {
-                    /* I don't think we need to do anything about this flag */
-                    WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
-                }
-                if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
-                {
-                    tmp          = dTopRight;
-                    dTopRight    = dTopLeft;
-                    dTopLeft     = tmp;
-                    tmp          = dBottomRight;
-                    dBottomRight = dBottomLeft;
-                    dBottomLeft  = tmp;
-                    dstxinc = dstxinc * -1;
-                }
-                if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
-                {
-                    tmp          = dTopLeft;
-                    dTopLeft     = dBottomLeft;
-                    dBottomLeft  = tmp;
-                    tmp          = dTopRight;
-                    dTopRight    = dBottomRight;
-                    dBottomRight = tmp;
-                    dstyinc = dstyinc * -1;
-                }
-                if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
-                {
-                    /* I don't think we need to do anything about this flag */
-                    WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
-                }
-                if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
-                {
-                    tmp          = dBottomRight;
-                    dBottomRight = dTopLeft;
-                    dTopLeft     = tmp;
-                    tmp          = dBottomLeft;
-                    dBottomLeft  = dTopRight;
-                    dTopRight    = tmp;
-                    dstxinc = dstxinc * -1;
-                    dstyinc = dstyinc * -1;
-                }
-                if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
-                {
-                    tmp          = dTopLeft;
-                    dTopLeft     = dBottomLeft;
-                    dBottomLeft  = dBottomRight;
-                    dBottomRight = dTopRight;
-                    dTopRight    = tmp;
-                    tmpxy   = dstxinc;
-                    dstxinc = dstyinc;
-                    dstyinc = tmpxy;
-                    dstxinc = dstxinc * -1;
-                }
-                if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
-                {
-                    tmp          = dTopLeft;
-                    dTopLeft     = dTopRight;
-                    dTopRight    = dBottomRight;
-                    dBottomRight = dBottomLeft;
-                    dBottomLeft  = tmp;
-                    tmpxy   = dstxinc;
-                    dstxinc = dstyinc;
-                    dstyinc = tmpxy;
-                    dstyinc = dstyinc * -1;
-                }
-                if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
-                {
-                    /* I don't think we need to do anything about this flag */
-                    WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
-                }
-                dbuf = dTopLeft;
-                flags &= ~(WINEDDBLT_DDFX);
-            }
-
-#define COPY_COLORKEY_FX(type) \
-do { \
-    const type *s; \
-    type *d = (type *)dbuf, *dx, tmp; \
-    for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
-    { \
-        s = (const type *)(sbase + (sy >> 16) * slock.Pitch); \
-        dx = d; \
-        for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
-        { \
-            tmp = s[sx >> 16]; \
-            if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
-                    && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
-            { \
-                dx[0] = tmp; \
-            } \
-            dx = (type *)(((BYTE *)dx) + dstxinc); \
-        } \
-        d = (type *)(((BYTE *)d) + dstyinc); \
-    } \
-} while(0)
-
-            switch (bpp)
-            {
-                case 1:
-                    COPY_COLORKEY_FX(BYTE);
-                    break;
-                case 2:
-                    COPY_COLORKEY_FX(WORD);
-                    break;
-                case 4:
-                    COPY_COLORKEY_FX(DWORD);
-                    break;
-                case 3:
-                {
-                    const BYTE *s;
-                    BYTE *d = dbuf, *dx;
-                    for (y = sy = 0; y < dstheight; ++y, sy += yinc)
-                    {
-                        sbuf = sbase + (sy >> 16) * slock.Pitch;
-                        dx = d;
-                        for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
-                        {
-                            DWORD pixel, dpixel = 0;
-                            s = sbuf + 3 * (sx>>16);
-                            pixel = s[0] | (s[1] << 8) | (s[2] << 16);
-                            dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
-                            if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
-                                    && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
-                            {
-                                dx[0] = (pixel      ) & 0xff;
-                                dx[1] = (pixel >>  8) & 0xff;
-                                dx[2] = (pixel >> 16) & 0xff;
-                            }
-                            dx += dstxinc;
-                        }
-                        d += dstyinc;
-                    }
-                    break;
-                }
-                default:
-                    FIXME("%s color-keyed blit not implemented for bpp %u!\n",
-                          (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
-                    ret = WINED3DERR_NOTAVAILABLE;
-                    goto error;
-#undef COPY_COLORKEY_FX
-            }
-        }
-    }
-
-error:
-    if (flags && FIXME_ON(d3d_surface))
-    {
-        FIXME("\tUnsupported flags: %#x.\n", flags);
-    }
-
-release:
-    IWineD3DSurface_Unmap(iface);
-    if (src && src != dst_surface)
-        IWineD3DSurface_Unmap((IWineD3DSurface *)src);
-    /* Release the converted surface, if any. */
-    if (src && src_surface != (IWineD3DSurface *)src)
-        IWineD3DSurface_Release((IWineD3DSurface *)src);
-    return ret;
-}
-
-/* Do not call while under the GL lock. */
-static HRESULT WINAPI IWineD3DBaseSurfaceImpl_BltFast(IWineD3DSurface *iface,
-        DWORD dst_x, DWORD dst_y, IWineD3DSurface *src_surface, const RECT *src_rect, DWORD trans)
-{
-    IWineD3DSurfaceImpl *dst_surface = (IWineD3DSurfaceImpl *)iface;
-
-    TRACE("iface %p, dst_x %u, dst_y %u, src_surface %p, src_rect %s, trans %#x.\n",
-            iface, dst_x, dst_y, src_surface, wine_dbgstr_rect(src_rect), trans);
-
-    return dst_surface->surface_ops->surface_bltfast(dst_surface, dst_x, dst_y,
-            (IWineD3DSurfaceImpl *)src_surface, src_rect, trans);
-}
-
-static HRESULT WINAPI IWineD3DBaseSurfaceImpl_Unmap(IWineD3DSurface *iface)
-{
-    IWineD3DSurfaceImpl *surface = (IWineD3DSurfaceImpl *)iface;
-
-    TRACE("iface %p.\n", iface);
-
-    if (!(surface->flags & SFLAG_LOCKED))
-    {
-        WARN("Trying to unmap unmapped surface.\n");
-        return WINEDDERR_NOTLOCKED;
-    }
-    surface->flags &= ~SFLAG_LOCKED;
-
-    surface->surface_ops->surface_unmap(surface);
-
-    return WINED3D_OK;
-}
-
-static HRESULT WINAPI IWineD3DBaseSurfaceImpl_Map(IWineD3DSurface *iface,
-        WINED3DLOCKED_RECT *locked_rect, const RECT *rect, DWORD flags)
-{
-    IWineD3DSurfaceImpl *surface = (IWineD3DSurfaceImpl *)iface;
-
-    TRACE("iface %p, locked_rect %p, rect %s, flags %#x.\n",
-            iface, locked_rect, wine_dbgstr_rect(rect), flags);
-
-    if (surface->flags & SFLAG_LOCKED)
-    {
-        WARN("Surface is already mapped.\n");
-        return WINED3DERR_INVALIDCALL;
-    }
-    surface->flags |= SFLAG_LOCKED;
-
-    if (!(surface->flags & SFLAG_LOCKABLE))
-        WARN("Trying to lock unlockable surface.\n");
-
-    surface->surface_ops->surface_map(surface, rect, flags);
-
-    locked_rect->Pitch = IWineD3DSurface_GetPitch(iface);
-
-    if (!rect)
-    {
-        locked_rect->pBits = surface->resource.allocatedMemory;
-        surface->lockedRect.left = 0;
-        surface->lockedRect.top = 0;
-        surface->lockedRect.right = surface->resource.width;
-        surface->lockedRect.bottom = surface->resource.height;
-    }
-    else
-    {
-        const struct wined3d_format *format = surface->resource.format;
-
-        if ((format->flags & (WINED3DFMT_FLAG_COMPRESSED | WINED3DFMT_FLAG_BROKEN_PITCH)) == WINED3DFMT_FLAG_COMPRESSED)
-        {
-            /* Compressed textures are block based, so calculate the offset of
-             * the block that contains the top-left pixel of the locked rectangle. */
-            locked_rect->pBits = surface->resource.allocatedMemory
-                    + ((rect->top / format->block_height) * locked_rect->Pitch)
-                    + ((rect->left / format->block_width) * format->block_byte_count);
-        }
-        else
-        {
-            locked_rect->pBits = surface->resource.allocatedMemory
-                    + (locked_rect->Pitch * rect->top)
-                    + (rect->left * format->byte_count);
-        }
-        surface->lockedRect.left = rect->left;
-        surface->lockedRect.top = rect->top;
-        surface->lockedRect.right = rect->right;
-        surface->lockedRect.bottom = rect->bottom;
-    }
-
-    TRACE("Locked rect %s.\n", wine_dbgstr_rect(&surface->lockedRect));
-    TRACE("Returning memory %p, pitch %u.\n", locked_rect->pBits, locked_rect->Pitch);
-
-    return WINED3D_OK;
-}
-
-static HRESULT WINAPI IWineD3DBaseSurfaceImpl_GetDC(IWineD3DSurface *iface, HDC *dc)
-{
-    IWineD3DSurfaceImpl *surface = (IWineD3DSurfaceImpl *)iface;
-    HRESULT hr;
-
-    TRACE("iface %p, dc %p.\n", iface, dc);
-
-    if (surface->flags & SFLAG_USERPTR)
-    {
-        ERR("Not supported on surfaces with application-provided memory.\n");
-        return WINEDDERR_NODC;
-    }
-
-    /* Give more detailed info for ddraw. */
-    if (surface->flags & SFLAG_DCINUSE)
-        return WINEDDERR_DCALREADYCREATED;
-
-    /* Can't GetDC if the surface is locked. */
-    if (surface->flags & SFLAG_LOCKED)
-        return WINED3DERR_INVALIDCALL;
-
-    hr = surface->surface_ops->surface_getdc(surface);
-    if (FAILED(hr))
-        return hr;
-
-    if (surface->resource.format->id == WINED3DFMT_P8_UINT
-            || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
-    {
-        /* GetDC on palettized formats is unsupported in D3D9, and the method
-         * is missing in D3D8, so this should only be used for DX <=7
-         * surfaces (with non-device palettes). */
-        const PALETTEENTRY *pal = NULL;
-
-        if (surface->palette)
-        {
-            pal = surface->palette->palents;
-        }
-        else
-        {
-            struct wined3d_swapchain *swapchain = surface->resource.device->swapchains[0];
-            IWineD3DSurfaceImpl *dds_primary = swapchain->front_buffer;
-
-            if (dds_primary && dds_primary->palette)
-                pal = dds_primary->palette->palents;
-        }
-
-        if (pal)
-        {
-            RGBQUAD col[256];
-            unsigned int i;
-
-            for (i = 0; i < 256; ++i)
-            {
-                col[i].rgbRed = pal[i].peRed;
-                col[i].rgbGreen = pal[i].peGreen;
-                col[i].rgbBlue = pal[i].peBlue;
-                col[i].rgbReserved = 0;
-            }
-            SetDIBColorTable(surface->hDC, 0, 256, col);
-        }
-    }
-
-    surface->flags |= SFLAG_DCINUSE;
-
-    *dc = surface->hDC;
-    TRACE("Returning dc %p.\n", *dc);
-
-    return WINED3D_OK;
-}
-
-static HRESULT WINAPI IWineD3DBaseSurfaceImpl_ReleaseDC(IWineD3DSurface *iface, HDC dc)
-{
-    IWineD3DSurfaceImpl *surface = (IWineD3DSurfaceImpl *)iface;
-
-    TRACE("iface %p, dc %p.\n", iface, dc);
-
-    if (!(surface->flags & SFLAG_DCINUSE))
-        return WINEDDERR_NODC;
-
-    if (surface->hDC != dc)
-    {
-        WARN("Application tries to release invalid DC %p, surface DC is %p.\n",
-                dc, surface->hDC);
-        return WINEDDERR_NODC;
-    }
-
-    if ((surface->flags & SFLAG_PBO) && surface->resource.allocatedMemory)
-    {
-        /* Copy the contents of the DIB over to the PBO. */
-        memcpy(surface->resource.allocatedMemory, surface->dib.bitmap_data, surface->dib.bitmap_size);
-    }
-
-    /* We locked first, so unlock now. */
-    IWineD3DSurface_Unmap(iface);
-
-    surface->flags &= ~SFLAG_DCINUSE;
-
-    return WINED3D_OK;
-}
-
-static HRESULT WINAPI IWineD3DBaseSurfaceImpl_Flip(IWineD3DSurface *iface, IWineD3DSurface *override, DWORD flags)
-{
-    IWineD3DSurfaceImpl *surface = (IWineD3DSurfaceImpl *)iface;
-    struct wined3d_swapchain *swapchain;
-    HRESULT hr;
-
-    TRACE("iface %p, override %p, flags %#x.\n", iface, override, flags);
-
-    if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN)
-    {
-        ERR("Flipped surface is not on a swapchain.\n");
-        return WINEDDERR_NOTFLIPPABLE;
-    }
-    swapchain = surface->container.u.swapchain;
-
-    hr = surface->surface_ops->surface_flip(surface, (IWineD3DSurfaceImpl *)override);
-    if (FAILED(hr))
-        return hr;
-
-    /* Just overwrite the swapchain presentation interval. This is ok because
-     * only ddraw apps can call Flip, and only d3d8 and d3d9 applications
-     * specify the presentation interval. */
-    if (!(flags & (WINEDDFLIP_NOVSYNC | WINEDDFLIP_INTERVAL2 | WINEDDFLIP_INTERVAL3 | WINEDDFLIP_INTERVAL4)))
-        swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_ONE;
-    else if (flags & WINEDDFLIP_NOVSYNC)
-        swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_IMMEDIATE;
-    else if (flags & WINEDDFLIP_INTERVAL2)
-        swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_TWO;
-    else if (flags & WINEDDFLIP_INTERVAL3)
-        swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_THREE;
-    else
-        swapchain->presentParms.PresentationInterval = WINED3DPRESENT_INTERVAL_FOUR;
-
-    return wined3d_swapchain_present(swapchain, NULL, NULL, swapchain->win_handle, NULL, 0);
-}
-
-/* ****************************************************
-   IWineD3DSurface IWineD3DResource parts follow
-   **************************************************** */
-
-/* Do not call while under the GL lock. */
-void surface_internal_preload(IWineD3DSurfaceImpl *surface, enum WINED3DSRGB srgb)
-{
-    IWineD3DDeviceImpl *device = surface->resource.device;
-
-    TRACE("iface %p, srgb %#x.\n", surface, srgb);
-
-    if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
-    {
-        struct wined3d_texture *texture = surface->container.u.texture;
-
-        TRACE("Passing to container (%p).\n", texture);
-        texture->texture_ops->texture_preload(texture, srgb);
-    }
-    else
-    {
-        struct wined3d_context *context = NULL;
-
-        TRACE("(%p) : About to load surface\n", surface);
-
-        if (!device->isInDraw) context = context_acquire(device, NULL);
-
-        if (surface->resource.format->id == WINED3DFMT_P8_UINT
-                || surface->resource.format->id == WINED3DFMT_P8_UINT_A8_UNORM)
-        {
-            if (palette9_changed(surface))
-            {
-                TRACE("Reloading surface because the d3d8/9 palette was changed\n");
-                /* TODO: This is not necessarily needed with hw palettized texture support */
-                surface_load_location(surface, SFLAG_INSYSMEM, NULL);
-                /* Make sure the texture is reloaded because of the palette change, this kills performance though :( */
-                surface_modify_location(surface, SFLAG_INTEXTURE, FALSE);
-            }
-        }
-
-        surface_load(surface, srgb == SRGB_SRGB ? TRUE : FALSE);
-
-        if (surface->resource.pool == WINED3DPOOL_DEFAULT)
-        {
-            /* Tell opengl to try and keep this texture in video ram (well mostly) */
-            GLclampf tmp;
-            tmp = 0.9f;
-            ENTER_GL();
-            glPrioritizeTextures(1, &surface->texture_name, &tmp);
-            LEAVE_GL();
-        }
+            /* Tell opengl to try and keep this texture in video ram (well mostly) */
+            GLclampf tmp;
+            tmp = 0.9f;
+            ENTER_GL();
+            glPrioritizeTextures(1, &surface->texture_name, &tmp);
+            LEAVE_GL();
+        }
 
         if (context) context_release(context);
     }
@@ -5353,11 +5030,6 @@ static void fb_copy_to_texture_hwstretch(IWineD3DSurfaceImpl *dst_surface, IWine
     surface_modify_location(dst_surface, SFLAG_INTEXTURE, TRUE);
 }
 
-/* Until the blit_shader is ready, define some prototypes here. */
-static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
-        const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
-        const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format);
-
 /* Front buffer coordinates are always full screen coordinates, but our GL
  * drawable is limited to the window's client area. The sysmem and texture
  * copies do have the full screen size. Note that GL has a bottom-left
@@ -5387,15 +5059,6 @@ void surface_translate_drawable_coords(IWineD3DSurfaceImpl *surface, HWND window
     rect->bottom = drawable_height - rect->bottom;
 }
 
-static BOOL surface_is_full_rect(IWineD3DSurfaceImpl *surface, const RECT *r)
-{
-    if ((r->left && r->right) || abs(r->right - r->left) != surface->resource.width)
-        return FALSE;
-    if ((r->top && r->bottom) || abs(r->bottom - r->top) != surface->resource.height)
-        return FALSE;
-    return TRUE;
-}
-
 /* blit between surface locations. onscreen on different swapchains is not supported.
  * depth / stencil is not supported. */
 static void surface_blt_fbo(IWineD3DDeviceImpl *device, const WINED3DTEXTUREFILTERTYPE filter,
@@ -5482,139 +5145,45 @@ static void surface_blt_fbo(IWineD3DDeviceImpl *device, const WINED3DTEXTUREFILT
 
     if (dst_location == SFLAG_INDRAWABLE)
     {
-        GLenum buffer = surface_get_gl_buffer(dst_surface);
-
-        TRACE("Destination surface %p is onscreen.\n", dst_surface);
-
-        surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
-
-        ENTER_GL();
-        context_bind_fbo(context, GL_DRAW_FRAMEBUFFER, NULL);
-        context_set_draw_buffer(context, buffer);
-    }
-    else
-    {
-        TRACE("Destination surface %p is offscreen.\n", dst_surface);
-
-        ENTER_GL();
-        context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
-        context_set_draw_buffer(context, GL_COLOR_ATTACHMENT0);
-    }
-    context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
-
-    glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
-    IWineD3DDeviceImpl_MarkStateDirty(device, STATE_RENDER(WINED3DRS_COLORWRITEENABLE));
-    IWineD3DDeviceImpl_MarkStateDirty(device, STATE_RENDER(WINED3DRS_COLORWRITEENABLE1));
-    IWineD3DDeviceImpl_MarkStateDirty(device, STATE_RENDER(WINED3DRS_COLORWRITEENABLE2));
-    IWineD3DDeviceImpl_MarkStateDirty(device, STATE_RENDER(WINED3DRS_COLORWRITEENABLE3));
-
-    glDisable(GL_SCISSOR_TEST);
-    IWineD3DDeviceImpl_MarkStateDirty(device, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
-
-    gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
-            dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
-    checkGLcall("glBlitFramebuffer()");
-
-    LEAVE_GL();
-
-    if (wined3d_settings.strict_draw_ordering
-            || (dst_location == SFLAG_INDRAWABLE
-            && dst_surface->container.u.swapchain->front_buffer == dst_surface))
-        wglFlush();
-
-    context_release(context);
-}
-
-static void wined3d_surface_depth_blt_fbo(IWineD3DDeviceImpl *device, IWineD3DSurfaceImpl *src_surface,
-        const RECT *src_rect, IWineD3DSurfaceImpl *dst_surface, const RECT *dst_rect)
-{
-    const struct wined3d_gl_info *gl_info;
-    struct wined3d_context *context;
-    DWORD src_mask, dst_mask;
-    GLbitfield gl_mask;
-
-    TRACE("device %p, src_surface %p, src_rect %s, dst_surface %p, dst_rect %s.\n",
-            device, src_surface, wine_dbgstr_rect(src_rect),
-            dst_surface, wine_dbgstr_rect(dst_rect));
-
-    src_mask = src_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
-    dst_mask = dst_surface->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
-
-    if (src_mask != dst_mask)
-    {
-        ERR("Incompatible formats %s and %s.\n",
-                debug_d3dformat(src_surface->resource.format->id),
-                debug_d3dformat(dst_surface->resource.format->id));
-        return;
-    }
-
-    if (!src_mask)
-    {
-        ERR("Not a depth / stencil format: %s.\n",
-                debug_d3dformat(src_surface->resource.format->id));
-        return;
-    }
-
-    gl_mask = 0;
-    if (src_mask & WINED3DFMT_FLAG_DEPTH)
-        gl_mask |= GL_DEPTH_BUFFER_BIT;
-    if (src_mask & WINED3DFMT_FLAG_STENCIL)
-        gl_mask |= GL_STENCIL_BUFFER_BIT;
-
-    /* Make sure the locations are up-to-date. Loading the destination
-     * surface isn't required if the entire surface is overwritten. */
-    surface_load_location(src_surface, SFLAG_INTEXTURE, NULL);
-    if (!surface_is_full_rect(dst_surface, dst_rect))
-        surface_load_location(dst_surface, SFLAG_INTEXTURE, NULL);
-
-    context = context_acquire(device, NULL);
-    if (!context->valid)
-    {
-        context_release(context);
-        WARN("Invalid context, skipping blit.\n");
-        return;
-    }
-
-    gl_info = context->gl_info;
-
-    ENTER_GL();
+        GLenum buffer = surface_get_gl_buffer(dst_surface);
 
-    context_apply_fbo_state_blit(context, GL_READ_FRAMEBUFFER, NULL, src_surface, SFLAG_INTEXTURE);
-    glReadBuffer(GL_NONE);
-    checkGLcall("glReadBuffer()");
-    context_check_fbo_status(context, GL_READ_FRAMEBUFFER);
+        TRACE("Destination surface %p is onscreen.\n", dst_surface);
 
-    context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, NULL, dst_surface, SFLAG_INTEXTURE);
-    context_set_draw_buffer(context, GL_NONE);
-    context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
+        surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
 
-    if (gl_mask & GL_DEPTH_BUFFER_BIT)
-    {
-        glDepthMask(GL_TRUE);
-        IWineD3DDeviceImpl_MarkStateDirty(device, STATE_RENDER(WINED3DRS_ZWRITEENABLE));
+        ENTER_GL();
+        context_bind_fbo(context, GL_DRAW_FRAMEBUFFER, NULL);
+        context_set_draw_buffer(context, buffer);
     }
-    if (gl_mask & GL_STENCIL_BUFFER_BIT)
+    else
     {
-        if (context->gl_info->supported[EXT_STENCIL_TWO_SIDE])
-        {
-            glDisable(GL_STENCIL_TEST_TWO_SIDE_EXT);
-            IWineD3DDeviceImpl_MarkStateDirty(device, STATE_RENDER(WINED3DRS_TWOSIDEDSTENCILMODE));
-        }
-        glStencilMask(~0U);
-        IWineD3DDeviceImpl_MarkStateDirty(device, STATE_RENDER(WINED3DRS_STENCILWRITEMASK));
+        TRACE("Destination surface %p is offscreen.\n", dst_surface);
+
+        ENTER_GL();
+        context_apply_fbo_state_blit(context, GL_DRAW_FRAMEBUFFER, dst_surface, NULL, dst_location);
+        context_set_draw_buffer(context, GL_COLOR_ATTACHMENT0);
     }
+    context_check_fbo_status(context, GL_DRAW_FRAMEBUFFER);
+
+    glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
+    IWineD3DDeviceImpl_MarkStateDirty(device, STATE_RENDER(WINED3DRS_COLORWRITEENABLE));
+    IWineD3DDeviceImpl_MarkStateDirty(device, STATE_RENDER(WINED3DRS_COLORWRITEENABLE1));
+    IWineD3DDeviceImpl_MarkStateDirty(device, STATE_RENDER(WINED3DRS_COLORWRITEENABLE2));
+    IWineD3DDeviceImpl_MarkStateDirty(device, STATE_RENDER(WINED3DRS_COLORWRITEENABLE3));
 
     glDisable(GL_SCISSOR_TEST);
     IWineD3DDeviceImpl_MarkStateDirty(device, STATE_RENDER(WINED3DRS_SCISSORTESTENABLE));
 
-    gl_info->fbo_ops.glBlitFramebuffer(src_rect->left, src_rect->top, src_rect->right, src_rect->bottom,
-            dst_rect->left, dst_rect->top, dst_rect->right, dst_rect->bottom, gl_mask, GL_NEAREST);
+    gl_info->fbo_ops.glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom,
+            dst_rect.left, dst_rect.top, dst_rect.right, dst_rect.bottom, GL_COLOR_BUFFER_BIT, gl_filter);
     checkGLcall("glBlitFramebuffer()");
 
     LEAVE_GL();
 
-    if (wined3d_settings.strict_draw_ordering)
-        wglFlush(); /* Flush to ensure ordering across contexts. */
+    if (wined3d_settings.strict_draw_ordering
+            || (dst_location == SFLAG_INDRAWABLE
+            && dst_surface->container.u.swapchain->front_buffer == dst_surface))
+        wglFlush();
 
     context_release(context);
 }
@@ -5947,1104 +5516,1593 @@ static HRESULT IWineD3DSurfaceImpl_BltOverride(IWineD3DSurfaceImpl *dst_surface,
             dst_surface->flags &= ~SFLAG_INSYSMEM;
         }
 
-        return WINED3D_OK;
+        return WINED3D_OK;
+    }
+    else if (src_surface)
+    {
+        /* Blit from offscreen surface to render target */
+        DWORD oldCKeyFlags = src_surface->CKeyFlags;
+        WINEDDCOLORKEY oldBltCKey = src_surface->SrcBltCKey;
+
+        TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
+
+        if (!(flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
+                && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
+                        &src_rect, src_surface->resource.usage, src_surface->resource.pool,
+                        src_surface->resource.format,
+                        &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool,
+                        dst_surface->resource.format))
+        {
+            TRACE("Using surface_blt_fbo.\n");
+            /* The source is always a texture, but never the currently active render target, and the texture
+             * contents are never upside down. */
+            surface_blt_fbo(device, Filter,
+                    src_surface, SFLAG_INDRAWABLE, &src_rect,
+                    dst_surface, SFLAG_INDRAWABLE, &dst_rect);
+            surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
+            return WINED3D_OK;
+        }
+
+        if (!(flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
+                && arbfp_blit.blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
+                        &src_rect, src_surface->resource.usage, src_surface->resource.pool,
+                        src_surface->resource.format,
+                        &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool,
+                        dst_surface->resource.format))
+        {
+            return arbfp_blit_surface(device, src_surface, &src_rect, dst_surface, &dst_rect,
+                    WINED3D_BLIT_OP_COLOR_BLIT, Filter);
+        }
+
+        if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
+                &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
+                &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
+        {
+            FIXME("Unsupported blit operation falling back to software\n");
+            return WINED3DERR_INVALIDCALL;
+        }
+
+        /* Color keying: Check if we have to do a color keyed blt,
+         * and if not check if a color key is activated.
+         *
+         * Just modify the color keying parameters in the surface and restore them afterwards
+         * The surface keeps track of the color key last used to load the opengl surface.
+         * PreLoad will catch the change to the flags and color key and reload if necessary.
+         */
+        if (flags & WINEDDBLT_KEYSRC)
+        {
+            /* Use color key from surface */
+        }
+        else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
+        {
+            /* Use color key from DDBltFx */
+            src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
+            src_surface->SrcBltCKey = DDBltFx->ddckSrcColorkey;
+        }
+        else
+        {
+            /* Do not use color key */
+            src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
+        }
+
+        surface_blt_to_drawable(device, Filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
+                src_surface, &src_rect, dst_surface, &dst_rect);
+
+        /* Restore the color key parameters */
+        src_surface->CKeyFlags = oldCKeyFlags;
+        src_surface->SrcBltCKey = oldBltCKey;
+
+        surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
+
+        return WINED3D_OK;
+    }
+    else
+    {
+        /* Source-Less Blit to render target */
+        if (flags & WINEDDBLT_COLORFILL)
+        {
+            WINED3DCOLORVALUE color;
+
+            TRACE("Colorfill\n");
+
+            /* The color as given in the Blt function is in the surface format. */
+            if (!surface_convert_color_to_float(dst_surface, DDBltFx->u5.dwFillColor, &color))
+                return WINED3DERR_INVALIDCALL;
+
+            return surface_color_fill(dst_surface, &dst_rect, &color);
+        }
+    }
+
+    /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
+    TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
+    return WINED3DERR_INVALIDCALL;
+}
+
+/* GL locking is done by the caller */
+static void surface_depth_blt(IWineD3DSurfaceImpl *This, const struct wined3d_gl_info *gl_info,
+        GLuint texture, GLsizei w, GLsizei h, GLenum target)
+{
+    IWineD3DDeviceImpl *device = This->resource.device;
+    GLint compare_mode = GL_NONE;
+    struct blt_info info;
+    GLint old_binding = 0;
+    RECT rect;
+
+    glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
+
+    glDisable(GL_CULL_FACE);
+    glDisable(GL_BLEND);
+    glDisable(GL_ALPHA_TEST);
+    glDisable(GL_SCISSOR_TEST);
+    glDisable(GL_STENCIL_TEST);
+    glEnable(GL_DEPTH_TEST);
+    glDepthFunc(GL_ALWAYS);
+    glDepthMask(GL_TRUE);
+    glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
+    glViewport(0, This->pow2Height - h, w, h);
+
+    SetRect(&rect, 0, h, w, 0);
+    surface_get_blt_info(target, &rect, This->pow2Width, This->pow2Height, &info);
+    GL_EXTCALL(glActiveTextureARB(GL_TEXTURE0_ARB));
+    glGetIntegerv(info.binding, &old_binding);
+    glBindTexture(info.bind_target, texture);
+    if (gl_info->supported[ARB_SHADOW])
+    {
+        glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
+        if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
+    }
+
+    device->shader_backend->shader_select_depth_blt(device->shader_priv,
+            gl_info, info.tex_type, &This->ds_current_size);
+
+    glBegin(GL_TRIANGLE_STRIP);
+    glTexCoord3fv(info.coords[0]);
+    glVertex2f(-1.0f, -1.0f);
+    glTexCoord3fv(info.coords[1]);
+    glVertex2f(1.0f, -1.0f);
+    glTexCoord3fv(info.coords[2]);
+    glVertex2f(-1.0f, 1.0f);
+    glTexCoord3fv(info.coords[3]);
+    glVertex2f(1.0f, 1.0f);
+    glEnd();
+
+    if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
+    glBindTexture(info.bind_target, old_binding);
+
+    glPopAttrib();
+
+    device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
+}
+
+void surface_modify_ds_location(IWineD3DSurfaceImpl *surface,
+        DWORD location, UINT w, UINT h)
+{
+    TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
+
+    if (location & ~SFLAG_DS_LOCATIONS)
+        FIXME("Invalid location (%#x) specified.\n", location);
+
+    surface->ds_current_size.cx = w;
+    surface->ds_current_size.cy = h;
+    surface->flags &= ~SFLAG_DS_LOCATIONS;
+    surface->flags |= location;
+}
+
+/* Context activation is done by the caller. */
+void surface_load_ds_location(IWineD3DSurfaceImpl *surface, struct wined3d_context *context, DWORD location)
+{
+    IWineD3DDeviceImpl *device = surface->resource.device;
+    const struct wined3d_gl_info *gl_info = context->gl_info;
+    GLsizei w, h;
+
+    TRACE("surface %p, new location %#x.\n", surface, location);
+
+    /* TODO: Make this work for modes other than FBO */
+    if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
+
+    if (!(surface->flags & location))
+    {
+        w = surface->ds_current_size.cx;
+        h = surface->ds_current_size.cy;
+        surface->ds_current_size.cx = 0;
+        surface->ds_current_size.cy = 0;
+    }
+    else
+    {
+        w = surface->resource.width;
+        h = surface->resource.height;
+    }
+
+    if (surface->ds_current_size.cx == surface->resource.width
+            && surface->ds_current_size.cy == surface->resource.height)
+    {
+        TRACE("Location (%#x) is already up to date.\n", location);
+        return;
+    }
+
+    if (surface->current_renderbuffer)
+    {
+        FIXME("Not supported with fixed up depth stencil.\n");
+        return;
+    }
+
+    if (!(surface->flags & SFLAG_DS_LOCATIONS))
+    {
+        /* This mostly happens when a depth / stencil is used without being
+         * cleared first. In principle we could upload from sysmem, or
+         * explicitly clear before first usage. For the moment there don't
+         * appear to be a lot of applications depending on this, so a FIXME
+         * should do. */
+        FIXME("No up to date depth stencil location.\n");
+        surface->flags |= location;
+        surface->ds_current_size.cx = surface->resource.width;
+        surface->ds_current_size.cy = surface->resource.height;
+        return;
     }
-    else if (src_surface)
+
+    if (location == SFLAG_DS_OFFSCREEN)
     {
-        /* Blit from offscreen surface to render target */
-        DWORD oldCKeyFlags = src_surface->CKeyFlags;
-        WINEDDCOLORKEY oldBltCKey = src_surface->SrcBltCKey;
+        GLint old_binding = 0;
+        GLenum bind_target;
 
-        TRACE("Blt from surface %p to rendertarget %p\n", src_surface, dst_surface);
+        /* The render target is allowed to be smaller than the depth/stencil
+         * buffer, so the onscreen depth/stencil buffer is potentially smaller
+         * than the offscreen surface. Don't overwrite the offscreen surface
+         * with undefined data. */
+        w = min(w, context->swapchain->presentParms.BackBufferWidth);
+        h = min(h, context->swapchain->presentParms.BackBufferHeight);
 
-        if (!(flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
-                && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
-                        &src_rect, src_surface->resource.usage, src_surface->resource.pool,
-                        src_surface->resource.format,
-                        &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool,
-                        dst_surface->resource.format))
-        {
-            TRACE("Using surface_blt_fbo.\n");
-            /* The source is always a texture, but never the currently active render target, and the texture
-             * contents are never upside down. */
-            surface_blt_fbo(device, Filter,
-                    src_surface, SFLAG_INDRAWABLE, &src_rect,
-                    dst_surface, SFLAG_INDRAWABLE, &dst_rect);
-            surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
-            return WINED3D_OK;
-        }
+        TRACE("Copying onscreen depth buffer to depth texture.\n");
 
-        if (!(flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE))
-                && arbfp_blit.blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
-                        &src_rect, src_surface->resource.usage, src_surface->resource.pool,
-                        src_surface->resource.format,
-                        &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool,
-                        dst_surface->resource.format))
+        ENTER_GL();
+
+        if (!device->depth_blt_texture)
         {
-            return arbfp_blit_surface(device, src_surface, &src_rect, dst_surface, &dst_rect,
-                    WINED3D_BLIT_OP_COLOR_BLIT, Filter);
+            glGenTextures(1, &device->depth_blt_texture);
         }
 
-        if (!device->blitter->blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
-                &src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
-                &dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
+        /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
+         * directly on the FBO texture. That's because we need to flip. */
+        context_bind_fbo(context, GL_FRAMEBUFFER, NULL);
+        if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
         {
-            FIXME("Unsupported blit operation falling back to software\n");
-            return WINED3DERR_INVALIDCALL;
+            glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
+            bind_target = GL_TEXTURE_RECTANGLE_ARB;
         }
-
-        /* Color keying: Check if we have to do a color keyed blt,
-         * and if not check if a color key is activated.
-         *
-         * Just modify the color keying parameters in the surface and restore them afterwards
-         * The surface keeps track of the color key last used to load the opengl surface.
-         * PreLoad will catch the change to the flags and color key and reload if necessary.
-         */
-        if (flags & WINEDDBLT_KEYSRC)
+        else
         {
-            /* Use color key from surface */
+            glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
+            bind_target = GL_TEXTURE_2D;
         }
-        else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
+        glBindTexture(bind_target, device->depth_blt_texture);
+        glCopyTexImage2D(bind_target, surface->texture_level, surface->resource.format->glInternal, 0, 0, w, h, 0);
+        glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+        glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+        glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+        glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+        glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
+        glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
+        glBindTexture(bind_target, old_binding);
+
+        /* Setup the destination */
+        if (!device->depth_blt_rb)
         {
-            /* Use color key from DDBltFx */
-            src_surface->CKeyFlags |= WINEDDSD_CKSRCBLT;
-            src_surface->SrcBltCKey = DDBltFx->ddckSrcColorkey;
+            gl_info->fbo_ops.glGenRenderbuffers(1, &device->depth_blt_rb);
+            checkGLcall("glGenRenderbuffersEXT");
         }
-        else
+        if (device->depth_blt_rb_w != w || device->depth_blt_rb_h != h)
         {
-            /* Do not use color key */
-            src_surface->CKeyFlags &= ~WINEDDSD_CKSRCBLT;
+            gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, device->depth_blt_rb);
+            checkGLcall("glBindRenderbufferEXT");
+            gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, GL_RGBA8, w, h);
+            checkGLcall("glRenderbufferStorageEXT");
+            device->depth_blt_rb_w = w;
+            device->depth_blt_rb_h = h;
         }
 
-        surface_blt_to_drawable(device, Filter, flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYSRCOVERRIDE),
-                src_surface, &src_rect, dst_surface, &dst_rect);
+        context_bind_fbo(context, GL_FRAMEBUFFER, &context->dst_fbo);
+        gl_info->fbo_ops.glFramebufferRenderbuffer(GL_FRAMEBUFFER,
+                GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, device->depth_blt_rb);
+        checkGLcall("glFramebufferRenderbufferEXT");
+        context_attach_depth_stencil_fbo(context, GL_FRAMEBUFFER, surface, FALSE);
 
-        /* Restore the color key parameters */
-        src_surface->CKeyFlags = oldCKeyFlags;
-        src_surface->SrcBltCKey = oldBltCKey;
+        /* Do the actual blit */
+        surface_depth_blt(surface, gl_info, device->depth_blt_texture, w, h, bind_target);
+        checkGLcall("depth_blt");
 
-        surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
+        if (context->current_fbo) context_bind_fbo(context, GL_FRAMEBUFFER, &context->current_fbo->id);
+        else context_bind_fbo(context, GL_FRAMEBUFFER, NULL);
 
-        return WINED3D_OK;
+        LEAVE_GL();
+
+        if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
     }
-    else
+    else if (location == SFLAG_DS_ONSCREEN)
     {
-        /* Source-Less Blit to render target */
-        if (flags & WINEDDBLT_COLORFILL)
-        {
-            WINED3DCOLORVALUE color;
+        TRACE("Copying depth texture to onscreen depth buffer.\n");
 
-            TRACE("Colorfill\n");
+        ENTER_GL();
 
-            /* The color as given in the Blt function is in the surface format. */
-            if (!surface_convert_color_to_float(dst_surface, DDBltFx->u5.dwFillColor, &color))
-                return WINED3DERR_INVALIDCALL;
+        context_bind_fbo(context, GL_FRAMEBUFFER, NULL);
+        surface_depth_blt(surface, gl_info, surface->texture_name,
+                w, h, surface->texture_target);
+        checkGLcall("depth_blt");
 
-            return surface_color_fill(dst_surface, &dst_rect, &color);
-        }
+        if (context->current_fbo) context_bind_fbo(context, GL_FRAMEBUFFER, &context->current_fbo->id);
+
+        LEAVE_GL();
+
+        if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
+    }
+    else
+    {
+        ERR("Invalid location (%#x) specified.\n", location);
     }
 
-    /* Default: Fall back to the generic blt. Not an error, a TRACE is enough */
-    TRACE("Didn't find any usable render target setup for hw blit, falling back to software\n");
-    return WINED3DERR_INVALIDCALL;
+    surface->flags |= location;
+    surface->ds_current_size.cx = surface->resource.width;
+    surface->ds_current_size.cy = surface->resource.height;
 }
 
-/* Do not call while under the GL lock. */
-static HRESULT wined3d_surface_depth_fill(IWineD3DSurfaceImpl *surface, const RECT *rect, float depth)
+void surface_modify_location(IWineD3DSurfaceImpl *surface, DWORD flag, BOOL persistent)
 {
-    const struct wined3d_resource *resource = &surface->resource;
-    IWineD3DDeviceImpl *device = resource->device;
-    const struct blit_shader *blitter;
+    const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
+    IWineD3DSurfaceImpl *overlay;
 
-    blitter = wined3d_select_blitter(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_FILL,
-            NULL, 0, 0, NULL, rect, resource->usage, resource->pool, resource->format);
-    if (!blitter)
+    TRACE("surface %p, location %s, persistent %#x.\n",
+            surface, debug_surflocation(flag), persistent);
+
+    if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
     {
-        FIXME("No blitter is capable of performing the requested depth fill operation.\n");
-        return WINED3DERR_INVALIDCALL;
+        if (surface_is_offscreen(surface))
+        {
+            /* With ORM_FBO, SFLAG_INTEXTURE and SFLAG_INDRAWABLE are the same for offscreen targets. */
+            if (flag & (SFLAG_INTEXTURE | SFLAG_INDRAWABLE)) flag |= (SFLAG_INTEXTURE | SFLAG_INDRAWABLE);
+        }
+        else
+        {
+            TRACE("Surface %p is an onscreen surface.\n", surface);
+        }
     }
 
-    return blitter->depth_fill(device, surface, rect, depth);
-}
+    if (flag & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
+            && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
+    {
+        flag |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
+    }
 
-static HRESULT wined3d_surface_depth_blt(IWineD3DSurfaceImpl *src_surface, const RECT *src_rect,
-        IWineD3DSurfaceImpl *dst_surface, const RECT *dst_rect)
-{
-    IWineD3DDeviceImpl *device = src_surface->resource.device;
+    if (persistent)
+    {
+        if (((surface->flags & SFLAG_INTEXTURE) && !(flag & SFLAG_INTEXTURE))
+                || ((surface->flags & SFLAG_INSRGBTEX) && !(flag & SFLAG_INSRGBTEX)))
+        {
+            if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
+            {
+                TRACE("Passing to container.\n");
+                wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
+            }
+        }
+        surface->flags &= ~SFLAG_LOCATIONS;
+        surface->flags |= flag;
 
-    if (!fbo_blit_supported(&device->adapter->gl_info, WINED3D_BLIT_OP_DEPTH_BLIT,
-            src_rect, src_surface->resource.usage, src_surface->resource.pool, src_surface->resource.format,
-            dst_rect, dst_surface->resource.usage, dst_surface->resource.pool, dst_surface->resource.format))
-        return WINED3DERR_INVALIDCALL;
+        /* Redraw emulated overlays, if any */
+        if (flag & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
+        {
+            LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, IWineD3DSurfaceImpl, overlay_entry)
+            {
+                overlay->surface_ops->surface_draw_overlay(overlay);
+            }
+        }
+    }
+    else
+    {
+        if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (flag & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
+        {
+            if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
+            {
+                TRACE("Passing to container\n");
+                wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
+            }
+        }
+        surface->flags &= ~flag;
+    }
 
-    wined3d_surface_depth_blt_fbo(device, src_surface, src_rect, dst_surface, dst_rect);
+    if (!(surface->flags & SFLAG_LOCATIONS))
+    {
+        ERR("Surface %p does not have any up to date location.\n", surface);
+    }
+}
+
+HRESULT surface_load_location(IWineD3DSurfaceImpl *surface, DWORD flag, const RECT *rect)
+{
+    IWineD3DDeviceImpl *device = surface->resource.device;
+    const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
+    BOOL drawable_read_ok = surface_is_offscreen(surface);
+    struct wined3d_format format;
+    CONVERT_TYPES convert;
+    int width, pitch, outpitch;
+    BYTE *mem;
+    BOOL in_fbo = FALSE;
 
-    surface_modify_ds_location(dst_surface, SFLAG_DS_OFFSCREEN,
-            dst_surface->ds_current_size.cx, dst_surface->ds_current_size.cy);
-    surface_modify_location(dst_surface, SFLAG_INDRAWABLE, TRUE);
+    TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(flag), wine_dbgstr_rect(rect));
 
-    return WINED3D_OK;
-}
+    if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
+    {
+        if (flag == SFLAG_INTEXTURE)
+        {
+            struct wined3d_context *context = context_acquire(device, NULL);
+            surface_load_ds_location(surface, context, SFLAG_DS_OFFSCREEN);
+            context_release(context);
+            return WINED3D_OK;
+        }
+        else
+        {
+            FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(flag));
+            return WINED3DERR_INVALIDCALL;
+        }
+    }
 
-/* Do not call while under the GL lock. */
-static HRESULT WINAPI IWineD3DSurfaceImpl_Blt(IWineD3DSurface *iface, const RECT *DestRect,
-        IWineD3DSurface *src_surface, const RECT *SrcRect, DWORD flags,
-        const WINEDDBLTFX *DDBltFx, WINED3DTEXTUREFILTERTYPE Filter)
-{
-    IWineD3DSurfaceImpl *This = (IWineD3DSurfaceImpl *)iface;
-    IWineD3DSurfaceImpl *src = (IWineD3DSurfaceImpl *)src_surface;
-    IWineD3DDeviceImpl *device = This->resource.device;
-    DWORD src_ds_flags, dst_ds_flags;
+    if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
+    {
+        if (surface_is_offscreen(surface))
+        {
+            /* With ORM_FBO, SFLAG_INTEXTURE and SFLAG_INDRAWABLE are the same for offscreen targets.
+             * Prefer SFLAG_INTEXTURE. */
+            if (flag == SFLAG_INDRAWABLE) flag = SFLAG_INTEXTURE;
+            drawable_read_ok = FALSE;
+            in_fbo = TRUE;
+        }
+        else
+        {
+            TRACE("Surface %p is an onscreen surface.\n", surface);
+        }
+    }
 
-    TRACE("iface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
-            iface, wine_dbgstr_rect(DestRect), src_surface, wine_dbgstr_rect(SrcRect),
-            flags, DDBltFx, debug_d3dtexturefiltertype(Filter));
-    TRACE("Usage is %s.\n", debug_d3dusage(This->resource.usage));
+    if (flag == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
+    {
+        flag = SFLAG_INTEXTURE;
+    }
 
-    if ((This->flags & SFLAG_LOCKED) || (src && (src->flags & SFLAG_LOCKED)))
+    if (surface->flags & flag)
     {
-        WARN(" Surface is busy, returning DDERR_SURFACEBUSY\n");
-        return WINEDDERR_SURFACEBUSY;
+        TRACE("Location already up to date\n");
+        return WINED3D_OK;
     }
 
-    dst_ds_flags = This->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
-    if (src)
-        src_ds_flags = src->resource.format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL);
-    else
-        src_ds_flags = 0;
+    if (!(surface->flags & SFLAG_LOCATIONS))
+    {
+        ERR("Surface %p does not have any up to date location.\n", surface);
+        surface->flags |= SFLAG_LOST;
+        return WINED3DERR_DEVICELOST;
+    }
 
-    if (src_ds_flags || dst_ds_flags)
+    if (flag == SFLAG_INSYSMEM)
     {
-        if (flags & WINEDDBLT_DEPTHFILL)
-        {
-            float depth;
-            RECT rect;
+        surface_prepare_system_memory(surface);
 
-            TRACE("Depth fill.\n");
+        /* Download the surface to system memory */
+        if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
+        {
+            struct wined3d_context *context = NULL;
 
-            surface_get_rect(This, DestRect, &rect);
+            if (!device->isInDraw) context = context_acquire(device, NULL);
 
-            if (!surface_convert_depth_to_float(This, DDBltFx->u5.dwFillDepth, &depth))
-                return WINED3DERR_INVALIDCALL;
+            surface_bind_and_dirtify(surface, gl_info, !(surface->flags & SFLAG_INTEXTURE));
+            surface_download_data(surface, gl_info);
 
-            if (SUCCEEDED(wined3d_surface_depth_fill(This, &rect, depth)))
-                return WINED3D_OK;
+            if (context) context_release(context);
         }
         else
         {
-            RECT src_rect, dst_rect;
+            /* Note: It might be faster to download into a texture first. */
+            read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
+                    IWineD3DSurface_GetPitch((IWineD3DSurface *)surface));
+        }
+    }
+    else if (flag == SFLAG_INDRAWABLE)
+    {
+        if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
+            surface_load_location(surface, SFLAG_INTEXTURE, NULL);
 
-            /* Accessing depth / stencil surfaces is supposed to fail while in
-             * a scene, except for fills, which seem to work. */
-            if (device->inScene)
-            {
-                WARN("Rejecting depth / stencil access while in scene.\n");
-                return WINED3DERR_INVALIDCALL;
-            }
+        if (surface->flags & SFLAG_INTEXTURE)
+        {
+            RECT r;
 
-            if (src_ds_flags != dst_ds_flags)
+            surface_get_rect(surface, rect, &r);
+            surface_blt_to_drawable(device, WINED3DTEXF_POINT, FALSE, surface, &r, surface, &r);
+        }
+        else
+        {
+            int byte_count;
+            if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
             {
-                WARN("Rejecting depth / stencil blit between incompatible formats.\n");
-                return WINED3DERR_INVALIDCALL;
+                /* This needs a shader to convert the srgb data sampled from the GL texture into RGB
+                 * values, otherwise we get incorrect values in the target. For now go the slow way
+                 * via a system memory copy
+                 */
+                surface_load_location(surface, SFLAG_INSYSMEM, rect);
             }
 
-            if (SrcRect && (SrcRect->top || SrcRect->left
-                    || SrcRect->bottom != src->resource.height
-                    || SrcRect->right != src->resource.width))
+            d3dfmt_get_conv(surface, FALSE /* We need color keying */,
+                    FALSE /* We won't use textures */, &format, &convert);
+
+            /* The width is in 'length' not in bytes */
+            width = surface->resource.width;
+            pitch = IWineD3DSurface_GetPitch((IWineD3DSurface *)surface);
+
+            /* Don't use PBOs for converted surfaces. During PBO conversion we look at SFLAG_CONVERTED
+             * but it isn't set (yet) in all cases it is getting called. */
+            if ((convert != NO_CONVERSION) && (surface->flags & SFLAG_PBO))
             {
-                WARN("Rejecting depth / stencil blit with invalid source rect %s.\n",
-                        wine_dbgstr_rect(SrcRect));
-                return WINED3DERR_INVALIDCALL;
+                struct wined3d_context *context = NULL;
+
+                TRACE("Removing the pbo attached to surface %p.\n", surface);
+
+                if (!device->isInDraw) context = context_acquire(device, NULL);
+                surface_remove_pbo(surface, gl_info);
+                if (context) context_release(context);
             }
 
-            if (DestRect && (DestRect->top || DestRect->left
-                    || DestRect->bottom != This->resource.height
-                    || DestRect->right != This->resource.width))
+            if ((convert != NO_CONVERSION) && surface->resource.allocatedMemory)
             {
-                WARN("Rejecting depth / stencil blit with invalid destination rect %s.\n",
-                        wine_dbgstr_rect(SrcRect));
-                return WINED3DERR_INVALIDCALL;
-            }
+                int height = surface->resource.height;
+                byte_count = format.conv_byte_count;
+
+                /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
+                outpitch = width * byte_count;
+                outpitch = (outpitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
+
+                mem = HeapAlloc(GetProcessHeap(), 0, outpitch * height);
+                if(!mem) {
+                    ERR("Out of memory %d, %d!\n", outpitch, height);
+                    return WINED3DERR_OUTOFVIDEOMEMORY;
+                }
+                d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, pitch,
+                        width, height, outpitch, convert, surface);
 
-            if (src->resource.height != This->resource.height
-                    || src->resource.width != This->resource.width)
+                surface->flags |= SFLAG_CONVERTED;
+            }
+            else
             {
-                WARN("Rejecting depth / stencil blit with mismatched surface sizes.\n");
-                return WINED3DERR_INVALIDCALL;
+                surface->flags &= ~SFLAG_CONVERTED;
+                mem = surface->resource.allocatedMemory;
+                byte_count = format.byte_count;
             }
 
-            surface_get_rect(src, SrcRect, &src_rect);
-            surface_get_rect(This, DestRect, &dst_rect);
+            flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
 
-            if (SUCCEEDED(wined3d_surface_depth_blt(src, &src_rect, This, &dst_rect)))
-                return WINED3D_OK;
+            /* Don't delete PBO memory */
+            if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
+                HeapFree(GetProcessHeap(), 0, mem);
         }
     }
-
-    /* Special cases for RenderTargets */
-    if ((This->resource.usage & WINED3DUSAGE_RENDERTARGET)
-            || (src && (src->resource.usage & WINED3DUSAGE_RENDERTARGET)))
+    else /* if(flag & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) */
     {
-        if (SUCCEEDED(IWineD3DSurfaceImpl_BltOverride(This, DestRect, src, SrcRect, flags, DDBltFx, Filter)))
-            return WINED3D_OK;
-    }
+        const DWORD attach_flags = WINED3DFMT_FLAG_FBO_ATTACHABLE | WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB;
 
-    /* For the rest call the X11 surface implementation.
-     * For RenderTargets this should be implemented OpenGL accelerated in BltOverride,
-     * other Blts are rather rare. */
-    return IWineD3DBaseSurfaceImpl_Blt(iface, DestRect, src_surface, SrcRect, flags, DDBltFx, Filter);
-}
+        if (drawable_read_ok && (surface->flags & SFLAG_INDRAWABLE))
+        {
+            read_from_framebuffer_texture(surface, flag == SFLAG_INSRGBTEX);
+        }
+        else if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
+                && (surface->resource.format->flags & attach_flags) == attach_flags
+                && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
+                        NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
+                        NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
+        {
+            DWORD src_location = flag == SFLAG_INSRGBTEX ? SFLAG_INTEXTURE : SFLAG_INSRGBTEX;
+            RECT rect = {0, 0, surface->resource.width, surface->resource.height};
 
-/* GL locking is done by the caller */
-static void surface_depth_blt(IWineD3DSurfaceImpl *This, const struct wined3d_gl_info *gl_info,
-        GLuint texture, GLsizei w, GLsizei h, GLenum target)
-{
-    IWineD3DDeviceImpl *device = This->resource.device;
-    GLint compare_mode = GL_NONE;
-    struct blt_info info;
-    GLint old_binding = 0;
-    RECT rect;
+            surface_blt_fbo(surface->resource.device, WINED3DTEXF_POINT,
+                    surface, src_location, &rect, surface, flag, &rect);
+        }
+        else
+        {
+            /* Upload from system memory */
+            BOOL srgb = flag == SFLAG_INSRGBTEX;
+            struct wined3d_context *context = NULL;
 
-    glPushAttrib(GL_ENABLE_BIT | GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT | GL_VIEWPORT_BIT);
+            d3dfmt_get_conv(surface, TRUE /* We need color keying */,
+                    TRUE /* We will use textures */, &format, &convert);
 
-    glDisable(GL_CULL_FACE);
-    glDisable(GL_BLEND);
-    glDisable(GL_ALPHA_TEST);
-    glDisable(GL_SCISSOR_TEST);
-    glDisable(GL_STENCIL_TEST);
-    glEnable(GL_DEPTH_TEST);
-    glDepthFunc(GL_ALWAYS);
-    glDepthMask(GL_TRUE);
-    glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
-    glViewport(0, This->pow2Height - h, w, h);
+            if (srgb)
+            {
+                if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
+                {
+                    /* Performance warning... */
+                    FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
+                    surface_load_location(surface, SFLAG_INSYSMEM, rect);
+                }
+            }
+            else
+            {
+                if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
+                {
+                    /* Performance warning... */
+                    FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
+                    surface_load_location(surface, SFLAG_INSYSMEM, rect);
+                }
+            }
+            if (!(surface->flags & SFLAG_INSYSMEM))
+            {
+                WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
+                /* Lets hope we get it from somewhere... */
+                surface_load_location(surface, SFLAG_INSYSMEM, rect);
+            }
 
-    SetRect(&rect, 0, h, w, 0);
-    surface_get_blt_info(target, &rect, This->pow2Width, This->pow2Height, &info);
-    GL_EXTCALL(glActiveTextureARB(GL_TEXTURE0_ARB));
-    glGetIntegerv(info.binding, &old_binding);
-    glBindTexture(info.bind_target, texture);
-    if (gl_info->supported[ARB_SHADOW])
-    {
-        glGetTexParameteriv(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, &compare_mode);
-        if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, GL_NONE);
-    }
+            if (!device->isInDraw) context = context_acquire(device, NULL);
+
+            surface_prepare_texture(surface, gl_info, srgb);
+            surface_bind_and_dirtify(surface, gl_info, srgb);
+
+            if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
+            {
+                surface->flags |= SFLAG_GLCKEY;
+                surface->glCKey = surface->SrcBltCKey;
+            }
+            else surface->flags &= ~SFLAG_GLCKEY;
 
-    device->shader_backend->shader_select_depth_blt(device->shader_priv,
-            gl_info, info.tex_type, &This->ds_current_size);
+            /* The width is in 'length' not in bytes */
+            width = surface->resource.width;
+            pitch = IWineD3DSurface_GetPitch((IWineD3DSurface *)surface);
 
-    glBegin(GL_TRIANGLE_STRIP);
-    glTexCoord3fv(info.coords[0]);
-    glVertex2f(-1.0f, -1.0f);
-    glTexCoord3fv(info.coords[1]);
-    glVertex2f(1.0f, -1.0f);
-    glTexCoord3fv(info.coords[2]);
-    glVertex2f(-1.0f, 1.0f);
-    glTexCoord3fv(info.coords[3]);
-    glVertex2f(1.0f, 1.0f);
-    glEnd();
+            /* Don't use PBOs for converted surfaces. During PBO conversion we look at SFLAG_CONVERTED
+             * but it isn't set (yet) in all cases it is getting called. */
+            if ((convert != NO_CONVERSION || format.convert) && (surface->flags & SFLAG_PBO))
+            {
+                TRACE("Removing the pbo attached to surface %p.\n", surface);
+                surface_remove_pbo(surface, gl_info);
+            }
 
-    if (compare_mode != GL_NONE) glTexParameteri(info.bind_target, GL_TEXTURE_COMPARE_MODE_ARB, compare_mode);
-    glBindTexture(info.bind_target, old_binding);
+            if (format.convert)
+            {
+                /* This code is entered for texture formats which need a fixup. */
+                UINT height = surface->resource.height;
 
-    glPopAttrib();
+                /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
+                outpitch = width * format.conv_byte_count;
+                outpitch = (outpitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
 
-    device->shader_backend->shader_deselect_depth_blt(device->shader_priv, gl_info);
-}
+                mem = HeapAlloc(GetProcessHeap(), 0, outpitch * height);
+                if(!mem) {
+                    ERR("Out of memory %d, %d!\n", outpitch, height);
+                    if (context) context_release(context);
+                    return WINED3DERR_OUTOFVIDEOMEMORY;
+                }
+                format.convert(surface->resource.allocatedMemory, mem, pitch, width, height);
+            }
+            else if (convert != NO_CONVERSION && surface->resource.allocatedMemory)
+            {
+                /* This code is only entered for color keying fixups */
+                UINT height = surface->resource.height;
 
-void surface_modify_ds_location(IWineD3DSurfaceImpl *surface,
-        DWORD location, UINT w, UINT h)
-{
-    TRACE("surface %p, new location %#x, w %u, h %u.\n", surface, location, w, h);
+                /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
+                outpitch = width * format.conv_byte_count;
+                outpitch = (outpitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
 
-    if (location & ~SFLAG_DS_LOCATIONS)
-        FIXME("Invalid location (%#x) specified.\n", location);
+                mem = HeapAlloc(GetProcessHeap(), 0, outpitch * height);
+                if(!mem) {
+                    ERR("Out of memory %d, %d!\n", outpitch, height);
+                    if (context) context_release(context);
+                    return WINED3DERR_OUTOFVIDEOMEMORY;
+                }
+                d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, pitch,
+                        width, height, outpitch, convert, surface);
+            }
+            else
+            {
+                mem = surface->resource.allocatedMemory;
+            }
 
-    surface->ds_current_size.cx = w;
-    surface->ds_current_size.cy = h;
-    surface->flags &= ~SFLAG_DS_LOCATIONS;
-    surface->flags |= location;
-}
+            /* Make sure the correct pitch is used */
+            ENTER_GL();
+            glPixelStorei(GL_UNPACK_ROW_LENGTH, width);
+            LEAVE_GL();
 
-/* Context activation is done by the caller. */
-void surface_load_ds_location(IWineD3DSurfaceImpl *surface, struct wined3d_context *context, DWORD location)
-{
-    IWineD3DDeviceImpl *device = surface->resource.device;
-    const struct wined3d_gl_info *gl_info = context->gl_info;
-    GLsizei w, h;
+            if (mem || (surface->flags & SFLAG_PBO))
+                surface_upload_data(surface, gl_info, &format, srgb, mem);
 
-    TRACE("surface %p, new location %#x.\n", surface, location);
+            /* Restore the default pitch */
+            ENTER_GL();
+            glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
+            LEAVE_GL();
 
-    /* TODO: Make this work for modes other than FBO */
-    if (wined3d_settings.offscreen_rendering_mode != ORM_FBO) return;
+            if (context) context_release(context);
 
-    if (!(surface->flags & location))
-    {
-        w = surface->ds_current_size.cx;
-        h = surface->ds_current_size.cy;
-        surface->ds_current_size.cx = 0;
-        surface->ds_current_size.cy = 0;
-    }
-    else
-    {
-        w = surface->resource.width;
-        h = surface->resource.height;
+            /* Don't delete PBO memory */
+            if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
+                HeapFree(GetProcessHeap(), 0, mem);
+        }
     }
 
-    if (surface->ds_current_size.cx == surface->resource.width
-            && surface->ds_current_size.cy == surface->resource.height)
+    if (!rect)
     {
-        TRACE("Location (%#x) is already up to date.\n", location);
-        return;
-    }
+        surface->flags |= flag;
 
-    if (surface->current_renderbuffer)
-    {
-        FIXME("Not supported with fixed up depth stencil.\n");
-        return;
+        if (flag != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
+            surface_evict_sysmem(surface);
     }
 
-    if (!(surface->flags & SFLAG_DS_LOCATIONS))
+    if (in_fbo && (surface->flags & (SFLAG_INTEXTURE | SFLAG_INDRAWABLE)))
     {
-        /* This mostly happens when a depth / stencil is used without being
-         * cleared first. In principle we could upload from sysmem, or
-         * explicitly clear before first usage. For the moment there don't
-         * appear to be a lot of applications depending on this, so a FIXME
-         * should do. */
-        FIXME("No up to date depth stencil location.\n");
-        surface->flags |= location;
-        surface->ds_current_size.cx = surface->resource.width;
-        surface->ds_current_size.cy = surface->resource.height;
-        return;
+        /* With ORM_FBO, SFLAG_INTEXTURE and SFLAG_INDRAWABLE are the same for offscreen targets. */
+        surface->flags |= (SFLAG_INTEXTURE | SFLAG_INDRAWABLE);
     }
 
-    if (location == SFLAG_DS_OFFSCREEN)
+    if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
+            && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
     {
-        GLint old_binding = 0;
-        GLenum bind_target;
-
-        /* The render target is allowed to be smaller than the depth/stencil
-         * buffer, so the onscreen depth/stencil buffer is potentially smaller
-         * than the offscreen surface. Don't overwrite the offscreen surface
-         * with undefined data. */
-        w = min(w, context->swapchain->presentParms.BackBufferWidth);
-        h = min(h, context->swapchain->presentParms.BackBufferHeight);
-
-        TRACE("Copying onscreen depth buffer to depth texture.\n");
-
-        ENTER_GL();
-
-        if (!device->depth_blt_texture)
-        {
-            glGenTextures(1, &device->depth_blt_texture);
-        }
+        surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
+    }
 
-        /* Note that we use depth_blt here as well, rather than glCopyTexImage2D
-         * directly on the FBO texture. That's because we need to flip. */
-        context_bind_fbo(context, GL_FRAMEBUFFER, NULL);
-        if (surface->texture_target == GL_TEXTURE_RECTANGLE_ARB)
-        {
-            glGetIntegerv(GL_TEXTURE_BINDING_RECTANGLE_ARB, &old_binding);
-            bind_target = GL_TEXTURE_RECTANGLE_ARB;
-        }
-        else
-        {
-            glGetIntegerv(GL_TEXTURE_BINDING_2D, &old_binding);
-            bind_target = GL_TEXTURE_2D;
-        }
-        glBindTexture(bind_target, device->depth_blt_texture);
-        glCopyTexImage2D(bind_target, surface->texture_level, surface->resource.format->glInternal, 0, 0, w, h, 0);
-        glTexParameteri(bind_target, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
-        glTexParameteri(bind_target, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
-        glTexParameteri(bind_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
-        glTexParameteri(bind_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
-        glTexParameteri(bind_target, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE);
-        glTexParameteri(bind_target, GL_DEPTH_TEXTURE_MODE_ARB, GL_LUMINANCE);
-        glBindTexture(bind_target, old_binding);
+    return WINED3D_OK;
+}
 
-        /* Setup the destination */
-        if (!device->depth_blt_rb)
-        {
-            gl_info->fbo_ops.glGenRenderbuffers(1, &device->depth_blt_rb);
-            checkGLcall("glGenRenderbuffersEXT");
-        }
-        if (device->depth_blt_rb_w != w || device->depth_blt_rb_h != h)
-        {
-            gl_info->fbo_ops.glBindRenderbuffer(GL_RENDERBUFFER, device->depth_blt_rb);
-            checkGLcall("glBindRenderbufferEXT");
-            gl_info->fbo_ops.glRenderbufferStorage(GL_RENDERBUFFER, GL_RGBA8, w, h);
-            checkGLcall("glRenderbufferStorageEXT");
-            device->depth_blt_rb_w = w;
-            device->depth_blt_rb_h = h;
-        }
+BOOL surface_is_offscreen(IWineD3DSurfaceImpl *surface)
+{
+    struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
 
-        context_bind_fbo(context, GL_FRAMEBUFFER, &context->dst_fbo);
-        gl_info->fbo_ops.glFramebufferRenderbuffer(GL_FRAMEBUFFER,
-                GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, device->depth_blt_rb);
-        checkGLcall("glFramebufferRenderbufferEXT");
-        context_attach_depth_stencil_fbo(context, GL_FRAMEBUFFER, surface, FALSE);
+    /* Not on a swapchain - must be offscreen */
+    if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
 
-        /* Do the actual blit */
-        surface_depth_blt(surface, gl_info, device->depth_blt_texture, w, h, bind_target);
-        checkGLcall("depth_blt");
+    /* The front buffer is always onscreen */
+    if (surface == swapchain->front_buffer) return FALSE;
 
-        if (context->current_fbo) context_bind_fbo(context, GL_FRAMEBUFFER, &context->current_fbo->id);
-        else context_bind_fbo(context, GL_FRAMEBUFFER, NULL);
+    /* If the swapchain is rendered to an FBO, the backbuffer is
+     * offscreen, otherwise onscreen */
+    return swapchain->render_to_fbo;
+}
 
-        LEAVE_GL();
+const IWineD3DSurfaceVtbl IWineD3DSurface_Vtbl =
+{
+    /* IUnknown */
+    IWineD3DBaseSurfaceImpl_QueryInterface,
+    IWineD3DBaseSurfaceImpl_AddRef,
+    IWineD3DBaseSurfaceImpl_Release,
+    /* IWineD3DResource */
+    IWineD3DBaseSurfaceImpl_GetParent,
+    IWineD3DBaseSurfaceImpl_SetPrivateData,
+    IWineD3DBaseSurfaceImpl_GetPrivateData,
+    IWineD3DBaseSurfaceImpl_FreePrivateData,
+    IWineD3DBaseSurfaceImpl_SetPriority,
+    IWineD3DBaseSurfaceImpl_GetPriority,
+    IWineD3DBaseSurfaceImpl_PreLoad,
+    /* IWineD3DSurface */
+    IWineD3DBaseSurfaceImpl_GetResource,
+    IWineD3DBaseSurfaceImpl_Map,
+    IWineD3DBaseSurfaceImpl_Unmap,
+    IWineD3DBaseSurfaceImpl_GetDC,
+    IWineD3DBaseSurfaceImpl_ReleaseDC,
+    IWineD3DBaseSurfaceImpl_Flip,
+    IWineD3DBaseSurfaceImpl_Blt,
+    IWineD3DBaseSurfaceImpl_GetBltStatus,
+    IWineD3DBaseSurfaceImpl_GetFlipStatus,
+    IWineD3DBaseSurfaceImpl_IsLost,
+    IWineD3DBaseSurfaceImpl_Restore,
+    IWineD3DBaseSurfaceImpl_BltFast,
+    IWineD3DBaseSurfaceImpl_GetPalette,
+    IWineD3DBaseSurfaceImpl_SetPalette,
+    IWineD3DBaseSurfaceImpl_SetColorKey,
+    IWineD3DBaseSurfaceImpl_GetPitch,
+    IWineD3DBaseSurfaceImpl_SetMem,
+    IWineD3DBaseSurfaceImpl_SetOverlayPosition,
+    IWineD3DBaseSurfaceImpl_GetOverlayPosition,
+    IWineD3DBaseSurfaceImpl_UpdateOverlayZOrder,
+    IWineD3DBaseSurfaceImpl_UpdateOverlay,
+    IWineD3DBaseSurfaceImpl_SetClipper,
+    IWineD3DBaseSurfaceImpl_GetClipper,
+    /* Internal use: */
+    IWineD3DBaseSurfaceImpl_SetFormat,
+};
 
-        if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
-    }
-    else if (location == SFLAG_DS_ONSCREEN)
-    {
-        TRACE("Copying depth texture to onscreen depth buffer.\n");
+static HRESULT ffp_blit_alloc(IWineD3DDeviceImpl *device) { return WINED3D_OK; }
+/* Context activation is done by the caller. */
+static void ffp_blit_free(IWineD3DDeviceImpl *device) { }
 
-        ENTER_GL();
+/* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
+/* Context activation is done by the caller. */
+static void ffp_blit_p8_upload_palette(IWineD3DSurfaceImpl *surface, const struct wined3d_gl_info *gl_info)
+{
+    BYTE table[256][4];
+    BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
 
-        context_bind_fbo(context, GL_FRAMEBUFFER, NULL);
-        surface_depth_blt(surface, gl_info, surface->texture_name,
-                w, h, surface->texture_target);
-        checkGLcall("depth_blt");
+    d3dfmt_p8_init_palette(surface, table, colorkey_active);
 
-        if (context->current_fbo) context_bind_fbo(context, GL_FRAMEBUFFER, &context->current_fbo->id);
+    TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
+    ENTER_GL();
+    GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
+    LEAVE_GL();
+}
 
-        LEAVE_GL();
+/* Context activation is done by the caller. */
+static HRESULT ffp_blit_set(void *blit_priv, const struct wined3d_gl_info *gl_info, IWineD3DSurfaceImpl *surface)
+{
+    enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
 
-        if (wined3d_settings.strict_draw_ordering) wglFlush(); /* Flush to ensure ordering across contexts. */
-    }
-    else
-    {
-        ERR("Invalid location (%#x) specified.\n", location);
-    }
+    /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
+     * else the surface is converted in software at upload time in LoadLocation.
+     */
+    if(fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
+        ffp_blit_p8_upload_palette(surface, gl_info);
 
-    surface->flags |= location;
-    surface->ds_current_size.cx = surface->resource.width;
-    surface->ds_current_size.cy = surface->resource.height;
+    ENTER_GL();
+    glEnable(surface->texture_target);
+    checkGLcall("glEnable(surface->texture_target)");
+    LEAVE_GL();
+    return WINED3D_OK;
 }
 
-void surface_modify_location(IWineD3DSurfaceImpl *surface, DWORD flag, BOOL persistent)
+/* Context activation is done by the caller. */
+static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
 {
-    const struct wined3d_gl_info *gl_info = &surface->resource.device->adapter->gl_info;
-    IWineD3DSurfaceImpl *overlay;
-
-    TRACE("surface %p, location %s, persistent %#x.\n",
-            surface, debug_surflocation(flag), persistent);
-
-    if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
+    ENTER_GL();
+    glDisable(GL_TEXTURE_2D);
+    checkGLcall("glDisable(GL_TEXTURE_2D)");
+    if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
     {
-        if (surface_is_offscreen(surface))
-        {
-            /* With ORM_FBO, SFLAG_INTEXTURE and SFLAG_INDRAWABLE are the same for offscreen targets. */
-            if (flag & (SFLAG_INTEXTURE | SFLAG_INDRAWABLE)) flag |= (SFLAG_INTEXTURE | SFLAG_INDRAWABLE);
-        }
-        else
-        {
-            TRACE("Surface %p is an onscreen surface.\n", surface);
-        }
+        glDisable(GL_TEXTURE_CUBE_MAP_ARB);
+        checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
     }
-
-    if (flag & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
-            && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
+    if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
     {
-        flag |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
+        glDisable(GL_TEXTURE_RECTANGLE_ARB);
+        checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
     }
+    LEAVE_GL();
+}
 
-    if (persistent)
+static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
+        const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
+        const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
+{
+    enum complex_fixup src_fixup;
+
+    switch (blit_op)
     {
-        if (((surface->flags & SFLAG_INTEXTURE) && !(flag & SFLAG_INTEXTURE))
-                || ((surface->flags & SFLAG_INSRGBTEX) && !(flag & SFLAG_INSRGBTEX)))
-        {
-            if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
+        case WINED3D_BLIT_OP_COLOR_BLIT:
+            src_fixup = get_complex_fixup(src_format->color_fixup);
+            if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
             {
-                TRACE("Passing to container.\n");
-                wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
+                TRACE("Checking support for fixup:\n");
+                dump_color_fixup_desc(src_format->color_fixup);
             }
-        }
-        surface->flags &= ~SFLAG_LOCATIONS;
-        surface->flags |= flag;
 
-        /* Redraw emulated overlays, if any */
-        if (flag & SFLAG_INDRAWABLE && !list_empty(&surface->overlays))
-        {
-            LIST_FOR_EACH_ENTRY(overlay, &surface->overlays, IWineD3DSurfaceImpl, overlay_entry)
+            if (!is_identity_fixup(dst_format->color_fixup))
             {
-                overlay->surface_ops->surface_draw_overlay(overlay);
+                TRACE("Destination fixups are not supported\n");
+                return FALSE;
             }
-        }
-    }
-    else
-    {
-        if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) && (flag & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)))
-        {
-            if (surface->container.type == WINED3D_CONTAINER_TEXTURE)
+
+            if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
             {
-                TRACE("Passing to container\n");
-                wined3d_texture_set_dirty(surface->container.u.texture, TRUE);
+                TRACE("P8 fixup supported\n");
+                return TRUE;
             }
-        }
-        surface->flags &= ~flag;
-    }
-
-    if (!(surface->flags & SFLAG_LOCATIONS))
-    {
-        ERR("Surface %p does not have any up to date location.\n", surface);
-    }
-}
-
-HRESULT surface_load_location(IWineD3DSurfaceImpl *surface, DWORD flag, const RECT *rect)
-{
-    IWineD3DDeviceImpl *device = surface->resource.device;
-    const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
-    BOOL drawable_read_ok = surface_is_offscreen(surface);
-    struct wined3d_format format;
-    CONVERT_TYPES convert;
-    int width, pitch, outpitch;
-    BYTE *mem;
-    BOOL in_fbo = FALSE;
 
-    TRACE("surface %p, location %s, rect %s.\n", surface, debug_surflocation(flag), wine_dbgstr_rect(rect));
+            /* We only support identity conversions. */
+            if (is_identity_fixup(src_format->color_fixup))
+            {
+                TRACE("[OK]\n");
+                return TRUE;
+            }
 
-    if (surface->resource.usage & WINED3DUSAGE_DEPTHSTENCIL)
-    {
-        if (flag == SFLAG_INTEXTURE)
-        {
-            struct wined3d_context *context = context_acquire(device, NULL);
-            surface_load_ds_location(surface, context, SFLAG_DS_OFFSCREEN);
-            context_release(context);
-            return WINED3D_OK;
-        }
-        else
-        {
-            FIXME("Unimplemented location %s for depth/stencil buffers.\n", debug_surflocation(flag));
-            return WINED3DERR_INVALIDCALL;
-        }
-    }
+            TRACE("[FAILED]\n");
+            return FALSE;
 
-    if (wined3d_settings.offscreen_rendering_mode == ORM_FBO)
-    {
-        if (surface_is_offscreen(surface))
-        {
-            /* With ORM_FBO, SFLAG_INTEXTURE and SFLAG_INDRAWABLE are the same for offscreen targets.
-             * Prefer SFLAG_INTEXTURE. */
-            if (flag == SFLAG_INDRAWABLE) flag = SFLAG_INTEXTURE;
-            drawable_read_ok = FALSE;
-            in_fbo = TRUE;
-        }
-        else
-        {
-            TRACE("Surface %p is an onscreen surface.\n", surface);
-        }
-    }
+        case WINED3D_BLIT_OP_COLOR_FILL:
+            if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
+            {
+                TRACE("Color fill not supported\n");
+                return FALSE;
+            }
 
-    if (flag == SFLAG_INSRGBTEX && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
-    {
-        flag = SFLAG_INTEXTURE;
-    }
+            return TRUE;
 
-    if (surface->flags & flag)
-    {
-        TRACE("Location already up to date\n");
-        return WINED3D_OK;
-    }
+        case WINED3D_BLIT_OP_DEPTH_FILL:
+            return TRUE;
 
-    if (!(surface->flags & SFLAG_LOCATIONS))
-    {
-        ERR("Surface %p does not have any up to date location.\n", surface);
-        surface->flags |= SFLAG_LOST;
-        return WINED3DERR_DEVICELOST;
+        default:
+            TRACE("Unsupported blit_op=%d\n", blit_op);
+            return FALSE;
     }
+}
 
-    if (flag == SFLAG_INSYSMEM)
-    {
-        surface_prepare_system_memory(surface);
-
-        /* Download the surface to system memory */
-        if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX))
-        {
-            struct wined3d_context *context = NULL;
+/* Do not call while under the GL lock. */
+static HRESULT ffp_blit_color_fill(IWineD3DDeviceImpl *device, IWineD3DSurfaceImpl *dst_surface,
+        const RECT *dst_rect, const WINED3DCOLORVALUE *color)
+{
+    const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
 
-            if (!device->isInDraw) context = context_acquire(device, NULL);
+    return device_clear_render_targets(device, 1, &dst_surface, NULL,
+            1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
+}
 
-            surface_bind_and_dirtify(surface, gl_info, !(surface->flags & SFLAG_INTEXTURE));
-            surface_download_data(surface, gl_info);
+/* Do not call while under the GL lock. */
+static HRESULT ffp_blit_depth_fill(IWineD3DDeviceImpl *device,
+        IWineD3DSurfaceImpl *surface, const RECT *rect, float depth)
+{
+    const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
 
-            if (context) context_release(context);
-        }
-        else
-        {
-            /* Note: It might be faster to download into a texture first. */
-            read_from_framebuffer(surface, rect, surface->resource.allocatedMemory,
-                    IWineD3DSurface_GetPitch((IWineD3DSurface *)surface));
-        }
-    }
-    else if (flag == SFLAG_INDRAWABLE)
-    {
-        if (wined3d_settings.rendertargetlock_mode == RTL_READTEX)
-            surface_load_location(surface, SFLAG_INTEXTURE, NULL);
+    return device_clear_render_targets(device, 0, NULL, surface,
+            1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
+}
 
-        if (surface->flags & SFLAG_INTEXTURE)
-        {
-            RECT r;
+const struct blit_shader ffp_blit =  {
+    ffp_blit_alloc,
+    ffp_blit_free,
+    ffp_blit_set,
+    ffp_blit_unset,
+    ffp_blit_supported,
+    ffp_blit_color_fill,
+    ffp_blit_depth_fill,
+};
 
-            surface_get_rect(surface, rect, &r);
-            surface_blt_to_drawable(device, WINED3DTEXF_POINT, FALSE, surface, &r, surface, &r);
-        }
-        else
-        {
-            int byte_count;
-            if ((surface->flags & SFLAG_LOCATIONS) == SFLAG_INSRGBTEX)
-            {
-                /* This needs a shader to convert the srgb data sampled from the GL texture into RGB
-                 * values, otherwise we get incorrect values in the target. For now go the slow way
-                 * via a system memory copy
-                 */
-                surface_load_location(surface, SFLAG_INSYSMEM, rect);
-            }
+static HRESULT cpu_blit_alloc(IWineD3DDeviceImpl *device)
+{
+    return WINED3D_OK;
+}
 
-            d3dfmt_get_conv(surface, FALSE /* We need color keying */,
-                    FALSE /* We won't use textures */, &format, &convert);
+/* Context activation is done by the caller. */
+static void cpu_blit_free(IWineD3DDeviceImpl *device)
+{
+}
 
-            /* The width is in 'length' not in bytes */
-            width = surface->resource.width;
-            pitch = IWineD3DSurface_GetPitch((IWineD3DSurface *)surface);
+/* Context activation is done by the caller. */
+static HRESULT cpu_blit_set(void *blit_priv, const struct wined3d_gl_info *gl_info, IWineD3DSurfaceImpl *surface)
+{
+    return WINED3D_OK;
+}
 
-            /* Don't use PBOs for converted surfaces. During PBO conversion we look at SFLAG_CONVERTED
-             * but it isn't set (yet) in all cases it is getting called. */
-            if ((convert != NO_CONVERSION) && (surface->flags & SFLAG_PBO))
-            {
-                struct wined3d_context *context = NULL;
+/* Context activation is done by the caller. */
+static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
+{
+}
 
-                TRACE("Removing the pbo attached to surface %p.\n", surface);
+static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
+        const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
+        const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
+{
+    if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
+    {
+        return TRUE;
+    }
 
-                if (!device->isInDraw) context = context_acquire(device, NULL);
-                surface_remove_pbo(surface, gl_info);
-                if (context) context_release(context);
-            }
+    return FALSE;
+}
 
-            if ((convert != NO_CONVERSION) && surface->resource.allocatedMemory)
-            {
-                int height = surface->resource.height;
-                byte_count = format.conv_byte_count;
+static HRESULT surface_cpu_blt(IWineD3DSurfaceImpl *dst_surface, const RECT *dst_rect,
+        IWineD3DSurfaceImpl *src_surface, const RECT *src_rect, DWORD flags,
+        const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter)
+{
+    int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
+    const struct wined3d_format *src_format, *dst_format;
+    IWineD3DSurfaceImpl *orig_src = src_surface;
+    WINED3DLOCKED_RECT dlock, slock;
+    HRESULT hr = WINED3D_OK;
+    const BYTE *sbuf;
+    RECT xdst,xsrc;
+    BYTE *dbuf;
+    int x, y;
 
-                /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
-                outpitch = width * byte_count;
-                outpitch = (outpitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
+    TRACE("dst_surface %p, dst_rect %s, src_surface %p, src_rect %s, flags %#x, fx %p, filter %s.\n",
+            dst_surface, wine_dbgstr_rect(dst_rect), src_surface, wine_dbgstr_rect(src_rect),
+            flags, fx, debug_d3dtexturefiltertype(filter));
 
-                mem = HeapAlloc(GetProcessHeap(), 0, outpitch * height);
-                if(!mem) {
-                    ERR("Out of memory %d, %d!\n", outpitch, height);
-                    return WINED3DERR_OUTOFVIDEOMEMORY;
-                }
-                d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, pitch,
-                        width, height, outpitch, convert, surface);
+    if ((dst_surface->flags & SFLAG_LOCKED) || (src_surface && (src_surface->flags & SFLAG_LOCKED)))
+    {
+        WARN("Surface is busy, returning WINEDDERR_SURFACEBUSY\n");
+        return WINEDDERR_SURFACEBUSY;
+    }
 
-                surface->flags |= SFLAG_CONVERTED;
-            }
-            else
+    /* First check for the validity of source / destination rectangles.
+     * This was verified using a test application and by MSDN. */
+    if (src_rect)
+    {
+        if (src_surface)
+        {
+            if (src_rect->right < src_rect->left || src_rect->bottom < src_rect->top
+                    || src_rect->left > src_surface->resource.width || src_rect->left < 0
+                    || src_rect->top > src_surface->resource.height || src_rect->top < 0
+                    || src_rect->right > src_surface->resource.width || src_rect->right < 0
+                    || src_rect->bottom > src_surface->resource.height || src_rect->bottom < 0)
             {
-                surface->flags &= ~SFLAG_CONVERTED;
-                mem = surface->resource.allocatedMemory;
-                byte_count = format.byte_count;
+                WARN("Application gave us bad source rectangle for Blt.\n");
+                return WINEDDERR_INVALIDRECT;
             }
 
-            flush_to_framebuffer_drawpixels(surface, rect, format.glFormat, format.glType, byte_count, mem);
-
-            /* Don't delete PBO memory */
-            if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
-                HeapFree(GetProcessHeap(), 0, mem);
+            if (!src_rect->right || !src_rect->bottom
+                    || src_rect->left == (int)src_surface->resource.width
+                    || src_rect->top == (int)src_surface->resource.height)
+            {
+                TRACE("Nothing to be done.\n");
+                return WINED3D_OK;
+            }
         }
+
+        xsrc = *src_rect;
     }
-    else /* if(flag & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)) */
+    else if (src_surface)
     {
-        const DWORD attach_flags = WINED3DFMT_FLAG_FBO_ATTACHABLE | WINED3DFMT_FLAG_FBO_ATTACHABLE_SRGB;
+        xsrc.left = 0;
+        xsrc.top = 0;
+        xsrc.right = src_surface->resource.width;
+        xsrc.bottom = src_surface->resource.height;
+    }
+    else
+    {
+        memset(&xsrc, 0, sizeof(xsrc));
+    }
 
-        if (drawable_read_ok && (surface->flags & SFLAG_INDRAWABLE))
+    if (dst_rect)
+    {
+        /* For the Destination rect, it can be out of bounds on the condition
+         * that a clipper is set for the given surface. */
+        if (!dst_surface->clipper && (dst_rect->right < dst_rect->left || dst_rect->bottom < dst_rect->top
+                || dst_rect->left > dst_surface->resource.width || dst_rect->left < 0
+                || dst_rect->top > dst_surface->resource.height || dst_rect->top < 0
+                || dst_rect->right > dst_surface->resource.width || dst_rect->right < 0
+                || dst_rect->bottom > dst_surface->resource.height || dst_rect->bottom < 0))
         {
-            read_from_framebuffer_texture(surface, flag == SFLAG_INSRGBTEX);
+            WARN("Application gave us bad destination rectangle for Blt without a clipper set.\n");
+            return WINEDDERR_INVALIDRECT;
         }
-        else if (surface->flags & (SFLAG_INSRGBTEX | SFLAG_INTEXTURE)
-                && (surface->resource.format->flags & attach_flags) == attach_flags
-                && fbo_blit_supported(gl_info, WINED3D_BLIT_OP_COLOR_BLIT,
-                        NULL, surface->resource.usage, surface->resource.pool, surface->resource.format,
-                        NULL, surface->resource.usage, surface->resource.pool, surface->resource.format))
+
+        if (dst_rect->right <= 0 || dst_rect->bottom <= 0
+                || dst_rect->left >= (int)dst_surface->resource.width
+                || dst_rect->top >= (int)dst_surface->resource.height)
         {
-            DWORD src_location = flag == SFLAG_INSRGBTEX ? SFLAG_INTEXTURE : SFLAG_INSRGBTEX;
-            RECT rect = {0, 0, surface->resource.width, surface->resource.height};
+            TRACE("Nothing to be done.\n");
+            return WINED3D_OK;
+        }
 
-            surface_blt_fbo(surface->resource.device, WINED3DTEXF_POINT,
-                    surface, src_location, &rect, surface, flag, &rect);
+        if (!src_surface)
+        {
+            RECT full_rect;
+
+            full_rect.left = 0;
+            full_rect.top = 0;
+            full_rect.right = dst_surface->resource.width;
+            full_rect.bottom = dst_surface->resource.height;
+            IntersectRect(&xdst, &full_rect, dst_rect);
         }
         else
         {
-            /* Upload from system memory */
-            BOOL srgb = flag == SFLAG_INSRGBTEX;
-            struct wined3d_context *context = NULL;
+            BOOL clip_horiz, clip_vert;
 
-            d3dfmt_get_conv(surface, TRUE /* We need color keying */,
-                    TRUE /* We will use textures */, &format, &convert);
+            xdst = *dst_rect;
+            clip_horiz = xdst.left < 0 || xdst.right > (int)dst_surface->resource.width;
+            clip_vert = xdst.top < 0 || xdst.bottom > (int)dst_surface->resource.height;
 
-            if (srgb)
-            {
-                if ((surface->flags & (SFLAG_INTEXTURE | SFLAG_INSYSMEM)) == SFLAG_INTEXTURE)
-                {
-                    /* Performance warning... */
-                    FIXME("Downloading RGB surface %p to reload it as sRGB.\n", surface);
-                    surface_load_location(surface, SFLAG_INSYSMEM, rect);
-                }
-            }
-            else
+            if (clip_vert || clip_horiz)
             {
-                if ((surface->flags & (SFLAG_INSRGBTEX | SFLAG_INSYSMEM)) == SFLAG_INSRGBTEX)
+                /* Now check if this is a special case or not... */
+                if ((flags & WINEDDBLT_DDFX)
+                        || (clip_horiz && xdst.right - xdst.left != xsrc.right - xsrc.left)
+                        || (clip_vert && xdst.bottom - xdst.top != xsrc.bottom - xsrc.top))
                 {
-                    /* Performance warning... */
-                    FIXME("Downloading sRGB surface %p to reload it as RGB.\n", surface);
-                    surface_load_location(surface, SFLAG_INSYSMEM, rect);
+                    WARN("Out of screen rectangle in special case. Not handled right now.\n");
+                    return WINED3D_OK;
                 }
-            }
-            if (!(surface->flags & SFLAG_INSYSMEM))
-            {
-                WARN("Trying to load a texture from sysmem, but SFLAG_INSYSMEM is not set.\n");
-                /* Lets hope we get it from somewhere... */
-                surface_load_location(surface, SFLAG_INSYSMEM, rect);
-            }
-
-            if (!device->isInDraw) context = context_acquire(device, NULL);
-
-            surface_prepare_texture(surface, gl_info, srgb);
-            surface_bind_and_dirtify(surface, gl_info, srgb);
-
-            if (surface->CKeyFlags & WINEDDSD_CKSRCBLT)
-            {
-                surface->flags |= SFLAG_GLCKEY;
-                surface->glCKey = surface->SrcBltCKey;
-            }
-            else surface->flags &= ~SFLAG_GLCKEY;
-
-            /* The width is in 'length' not in bytes */
-            width = surface->resource.width;
-            pitch = IWineD3DSurface_GetPitch((IWineD3DSurface *)surface);
-
-            /* Don't use PBOs for converted surfaces. During PBO conversion we look at SFLAG_CONVERTED
-             * but it isn't set (yet) in all cases it is getting called. */
-            if ((convert != NO_CONVERSION || format.convert) && (surface->flags & SFLAG_PBO))
-            {
-                TRACE("Removing the pbo attached to surface %p.\n", surface);
-                surface_remove_pbo(surface, gl_info);
-            }
-
-            if (format.convert)
-            {
-                /* This code is entered for texture formats which need a fixup. */
-                UINT height = surface->resource.height;
-
-                /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
-                outpitch = width * format.conv_byte_count;
-                outpitch = (outpitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
 
-                mem = HeapAlloc(GetProcessHeap(), 0, outpitch * height);
-                if(!mem) {
-                    ERR("Out of memory %d, %d!\n", outpitch, height);
-                    if (context) context_release(context);
-                    return WINED3DERR_OUTOFVIDEOMEMORY;
+                if (clip_horiz)
+                {
+                    if (xdst.left < 0)
+                    {
+                        xsrc.left -= xdst.left;
+                        xdst.left = 0;
+                    }
+                    if (xdst.right > dst_surface->resource.width)
+                    {
+                        xsrc.right -= (xdst.right - (int)dst_surface->resource.width);
+                        xdst.right = (int)dst_surface->resource.width;
+                    }
                 }
-                format.convert(surface->resource.allocatedMemory, mem, pitch, width, height);
-            }
-            else if (convert != NO_CONVERSION && surface->resource.allocatedMemory)
-            {
-                /* This code is only entered for color keying fixups */
-                UINT height = surface->resource.height;
 
-                /* Stick to the alignment for the converted surface too, makes it easier to load the surface */
-                outpitch = width * format.conv_byte_count;
-                outpitch = (outpitch + device->surface_alignment - 1) & ~(device->surface_alignment - 1);
+                if (clip_vert)
+                {
+                    if (xdst.top < 0)
+                    {
+                        xsrc.top -= xdst.top;
+                        xdst.top = 0;
+                    }
+                    if (xdst.bottom > dst_surface->resource.height)
+                    {
+                        xsrc.bottom -= (xdst.bottom - (int)dst_surface->resource.height);
+                        xdst.bottom = (int)dst_surface->resource.height;
+                    }
+                }
 
-                mem = HeapAlloc(GetProcessHeap(), 0, outpitch * height);
-                if(!mem) {
-                    ERR("Out of memory %d, %d!\n", outpitch, height);
-                    if (context) context_release(context);
-                    return WINED3DERR_OUTOFVIDEOMEMORY;
+                /* And check if after clipping something is still to be done... */
+                if ((xdst.right <= 0) || (xdst.bottom <= 0)
+                        || (xdst.left >= (int)dst_surface->resource.width)
+                        || (xdst.top >= (int)dst_surface->resource.height)
+                        || (xsrc.right <= 0) || (xsrc.bottom <= 0)
+                        || (xsrc.left >= (int)src_surface->resource.width)
+                        || (xsrc.top >= (int)src_surface->resource.height))
+                {
+                    TRACE("Nothing to be done after clipping.\n");
+                    return WINED3D_OK;
                 }
-                d3dfmt_convert_surface(surface->resource.allocatedMemory, mem, pitch,
-                        width, height, outpitch, convert, surface);
-            }
-            else
-            {
-                mem = surface->resource.allocatedMemory;
             }
-
-            /* Make sure the correct pitch is used */
-            ENTER_GL();
-            glPixelStorei(GL_UNPACK_ROW_LENGTH, width);
-            LEAVE_GL();
-
-            if (mem || (surface->flags & SFLAG_PBO))
-                surface_upload_data(surface, gl_info, &format, srgb, mem);
-
-            /* Restore the default pitch */
-            ENTER_GL();
-            glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
-            LEAVE_GL();
-
-            if (context) context_release(context);
-
-            /* Don't delete PBO memory */
-            if ((mem != surface->resource.allocatedMemory) && !(surface->flags & SFLAG_PBO))
-                HeapFree(GetProcessHeap(), 0, mem);
         }
     }
-
-    if (!rect)
+    else
     {
-        surface->flags |= flag;
-
-        if (flag != SFLAG_INSYSMEM && (surface->flags & SFLAG_INSYSMEM))
-            surface_evict_sysmem(surface);
+        xdst.left = 0;
+        xdst.top = 0;
+        xdst.right = dst_surface->resource.width;
+        xdst.bottom = dst_surface->resource.height;
     }
 
-    if (in_fbo && (surface->flags & (SFLAG_INTEXTURE | SFLAG_INDRAWABLE)))
+    if (src_surface == dst_surface)
     {
-        /* With ORM_FBO, SFLAG_INTEXTURE and SFLAG_INDRAWABLE are the same for offscreen targets. */
-        surface->flags |= (SFLAG_INTEXTURE | SFLAG_INDRAWABLE);
+        IWineD3DSurface_Map((IWineD3DSurface *)dst_surface, &dlock, NULL, 0);
+        slock = dlock;
+        src_format = dst_surface->resource.format;
+        dst_format = src_format;
     }
-
-    if (surface->flags & (SFLAG_INTEXTURE | SFLAG_INSRGBTEX)
-            && gl_info->supported[EXT_TEXTURE_SRGB_DECODE])
+    else
     {
-        surface->flags |= (SFLAG_INTEXTURE | SFLAG_INSRGBTEX);
+        dst_format = dst_surface->resource.format;
+        if (src_surface)
+        {
+            if (dst_surface->resource.format->id != src_surface->resource.format->id)
+            {
+                src_surface = surface_convert_format(src_surface, dst_format->id);
+                if (!src_surface)
+                {
+                    /* The conv function writes a FIXME */
+                    WARN("Cannot convert source surface format to dest format.\n");
+                    goto release;
+                }
+            }
+            IWineD3DSurface_Map((IWineD3DSurface *)src_surface, &slock, NULL, WINED3DLOCK_READONLY);
+            src_format = src_surface->resource.format;
+        }
+        else
+        {
+            src_format = dst_format;
+        }
+        if (dst_rect)
+            IWineD3DSurface_Map((IWineD3DSurface *)dst_surface, &dlock, &xdst, 0);
+        else
+            IWineD3DSurface_Map((IWineD3DSurface *)dst_surface, &dlock, NULL, 0);
     }
 
-    return WINED3D_OK;
-}
-
-BOOL surface_is_offscreen(IWineD3DSurfaceImpl *surface)
-{
-    struct wined3d_swapchain *swapchain = surface->container.u.swapchain;
-
-    /* Not on a swapchain - must be offscreen */
-    if (surface->container.type != WINED3D_CONTAINER_SWAPCHAIN) return TRUE;
-
-    /* The front buffer is always onscreen */
-    if (surface == swapchain->front_buffer) return FALSE;
-
-    /* If the swapchain is rendered to an FBO, the backbuffer is
-     * offscreen, otherwise onscreen */
-    return swapchain->render_to_fbo;
-}
-
-const IWineD3DSurfaceVtbl IWineD3DSurface_Vtbl =
-{
-    /* IUnknown */
-    IWineD3DBaseSurfaceImpl_QueryInterface,
-    IWineD3DBaseSurfaceImpl_AddRef,
-    IWineD3DBaseSurfaceImpl_Release,
-    /* IWineD3DResource */
-    IWineD3DBaseSurfaceImpl_GetParent,
-    IWineD3DBaseSurfaceImpl_SetPrivateData,
-    IWineD3DBaseSurfaceImpl_GetPrivateData,
-    IWineD3DBaseSurfaceImpl_FreePrivateData,
-    IWineD3DBaseSurfaceImpl_SetPriority,
-    IWineD3DBaseSurfaceImpl_GetPriority,
-    IWineD3DBaseSurfaceImpl_PreLoad,
-    /* IWineD3DSurface */
-    IWineD3DBaseSurfaceImpl_GetResource,
-    IWineD3DBaseSurfaceImpl_Map,
-    IWineD3DBaseSurfaceImpl_Unmap,
-    IWineD3DBaseSurfaceImpl_GetDC,
-    IWineD3DBaseSurfaceImpl_ReleaseDC,
-    IWineD3DBaseSurfaceImpl_Flip,
-    IWineD3DSurfaceImpl_Blt,
-    IWineD3DBaseSurfaceImpl_GetBltStatus,
-    IWineD3DBaseSurfaceImpl_GetFlipStatus,
-    IWineD3DBaseSurfaceImpl_IsLost,
-    IWineD3DBaseSurfaceImpl_Restore,
-    IWineD3DBaseSurfaceImpl_BltFast,
-    IWineD3DBaseSurfaceImpl_GetPalette,
-    IWineD3DBaseSurfaceImpl_SetPalette,
-    IWineD3DBaseSurfaceImpl_SetColorKey,
-    IWineD3DBaseSurfaceImpl_GetPitch,
-    IWineD3DBaseSurfaceImpl_SetMem,
-    IWineD3DBaseSurfaceImpl_SetOverlayPosition,
-    IWineD3DBaseSurfaceImpl_GetOverlayPosition,
-    IWineD3DBaseSurfaceImpl_UpdateOverlayZOrder,
-    IWineD3DBaseSurfaceImpl_UpdateOverlay,
-    IWineD3DBaseSurfaceImpl_SetClipper,
-    IWineD3DBaseSurfaceImpl_GetClipper,
-    /* Internal use: */
-    IWineD3DBaseSurfaceImpl_SetFormat,
-};
-
-static HRESULT ffp_blit_alloc(IWineD3DDeviceImpl *device) { return WINED3D_OK; }
-/* Context activation is done by the caller. */
-static void ffp_blit_free(IWineD3DDeviceImpl *device) { }
-
-/* This function is used in case of 8bit paletted textures using GL_EXT_paletted_texture */
-/* Context activation is done by the caller. */
-static void ffp_blit_p8_upload_palette(IWineD3DSurfaceImpl *surface, const struct wined3d_gl_info *gl_info)
-{
-    BYTE table[256][4];
-    BOOL colorkey_active = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
+    if (!fx || !(fx->dwDDFX)) flags &= ~WINEDDBLT_DDFX;
 
-    d3dfmt_p8_init_palette(surface, table, colorkey_active);
+    if (src_format->flags & dst_format->flags & WINED3DFMT_FLAG_FOURCC)
+    {
+        if (!dst_rect || src_surface == dst_surface)
+        {
+            memcpy(dlock.pBits, slock.pBits, dst_surface->resource.size);
+            goto release;
+        }
+    }
 
-    TRACE("Using GL_EXT_PALETTED_TEXTURE for 8-bit paletted texture support\n");
-    ENTER_GL();
-    GL_EXTCALL(glColorTableEXT(surface->texture_target, GL_RGBA, 256, GL_RGBA, GL_UNSIGNED_BYTE, table));
-    LEAVE_GL();
-}
+    bpp = dst_surface->resource.format->byte_count;
+    srcheight = xsrc.bottom - xsrc.top;
+    srcwidth = xsrc.right - xsrc.left;
+    dstheight = xdst.bottom - xdst.top;
+    dstwidth = xdst.right - xdst.left;
+    width = (xdst.right - xdst.left) * bpp;
 
-/* Context activation is done by the caller. */
-static HRESULT ffp_blit_set(void *blit_priv, const struct wined3d_gl_info *gl_info, IWineD3DSurfaceImpl *surface)
-{
-    enum complex_fixup fixup = get_complex_fixup(surface->resource.format->color_fixup);
+    if (dst_rect && src_surface != dst_surface)
+        dbuf = dlock.pBits;
+    else
+        dbuf = (BYTE*)dlock.pBits+(xdst.top*dlock.Pitch)+(xdst.left*bpp);
 
-    /* When EXT_PALETTED_TEXTURE is around, palette conversion is done by the GPU
-     * else the surface is converted in software at upload time in LoadLocation.
-     */
-    if(fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
-        ffp_blit_p8_upload_palette(surface, gl_info);
+    if (flags & WINEDDBLT_WAIT)
+    {
+        flags &= ~WINEDDBLT_WAIT;
+    }
+    if (flags & WINEDDBLT_ASYNC)
+    {
+        static BOOL displayed = FALSE;
+        if (!displayed)
+            FIXME("Can't handle WINEDDBLT_ASYNC flag right now.\n");
+        displayed = TRUE;
+        flags &= ~WINEDDBLT_ASYNC;
+    }
+    if (flags & WINEDDBLT_DONOTWAIT)
+    {
+        /* WINEDDBLT_DONOTWAIT appeared in DX7 */
+        static BOOL displayed = FALSE;
+        if (!displayed)
+            FIXME("Can't handle WINEDDBLT_DONOTWAIT flag right now.\n");
+        displayed = TRUE;
+        flags &= ~WINEDDBLT_DONOTWAIT;
+    }
 
-    ENTER_GL();
-    glEnable(surface->texture_target);
-    checkGLcall("glEnable(surface->texture_target)");
-    LEAVE_GL();
-    return WINED3D_OK;
-}
+    /* First, all the 'source-less' blits */
+    if (flags & WINEDDBLT_COLORFILL)
+    {
+        hr = _Blt_ColorFill(dbuf, dstwidth, dstheight, bpp, dlock.Pitch, fx->u5.dwFillColor);
+        flags &= ~WINEDDBLT_COLORFILL;
+    }
 
-/* Context activation is done by the caller. */
-static void ffp_blit_unset(const struct wined3d_gl_info *gl_info)
-{
-    ENTER_GL();
-    glDisable(GL_TEXTURE_2D);
-    checkGLcall("glDisable(GL_TEXTURE_2D)");
-    if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
+    if (flags & WINEDDBLT_DEPTHFILL)
     {
-        glDisable(GL_TEXTURE_CUBE_MAP_ARB);
-        checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
+        FIXME("DDBLT_DEPTHFILL needs to be implemented!\n");
+    }
+    if (flags & WINEDDBLT_ROP)
+    {
+        /* Catch some degenerate cases here. */
+        switch (fx->dwROP)
+        {
+            case BLACKNESS:
+                hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,0);
+                break;
+            case 0xAA0029: /* No-op */
+                break;
+            case WHITENESS:
+                hr = _Blt_ColorFill(dbuf,dstwidth,dstheight,bpp,dlock.Pitch,~0);
+                break;
+            case SRCCOPY: /* Well, we do that below? */
+                break;
+            default:
+                FIXME("Unsupported raster op: %08x Pattern: %p\n", fx->dwROP, fx->u5.lpDDSPattern);
+                goto error;
+        }
+        flags &= ~WINEDDBLT_ROP;
     }
-    if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
+    if (flags & WINEDDBLT_DDROPS)
     {
-        glDisable(GL_TEXTURE_RECTANGLE_ARB);
-        checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
+        FIXME("\tDdraw Raster Ops: %08x Pattern: %p\n", fx->dwDDROP, fx->u5.lpDDSPattern);
     }
-    LEAVE_GL();
-}
+    /* Now the 'with source' blits. */
+    if (src_surface)
+    {
+        const BYTE *sbase;
+        int sx, xinc, sy, yinc;
 
-static BOOL ffp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
-        const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
-        const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
-{
-    enum complex_fixup src_fixup;
+        if (!dstwidth || !dstheight) /* Hmm... stupid program? */
+            goto release;
 
-    switch (blit_op)
-    {
-        case WINED3D_BLIT_OP_COLOR_BLIT:
-            src_fixup = get_complex_fixup(src_format->color_fixup);
-            if (TRACE_ON(d3d_surface) && TRACE_ON(d3d))
-            {
-                TRACE("Checking support for fixup:\n");
-                dump_color_fixup_desc(src_format->color_fixup);
-            }
+        if (filter != WINED3DTEXF_NONE && filter != WINED3DTEXF_POINT
+                && (srcwidth != dstwidth || srcheight != dstheight))
+        {
+            /* Can happen when d3d9 apps do a StretchRect() call which isn't handled in GL. */
+            FIXME("Filter %s not supported in software blit.\n", debug_d3dtexturefiltertype(filter));
+        }
 
-            if (!is_identity_fixup(dst_format->color_fixup))
+        sbase = (BYTE*)slock.pBits+(xsrc.top*slock.Pitch)+xsrc.left*bpp;
+        xinc = (srcwidth << 16) / dstwidth;
+        yinc = (srcheight << 16) / dstheight;
+
+        if (!flags)
+        {
+            /* No effects, we can cheat here. */
+            if (dstwidth == srcwidth)
             {
-                TRACE("Destination fixups are not supported\n");
-                return FALSE;
+                if (dstheight == srcheight)
+                {
+                    /* No stretching in either direction. This needs to be as
+                     * fast as possible. */
+                    sbuf = sbase;
+
+                    /* Check for overlapping surfaces. */
+                    if (src_surface != dst_surface || xdst.top < xsrc.top
+                            || xdst.right <= xsrc.left || xsrc.right <= xdst.left)
+                    {
+                        /* No overlap, or dst above src, so copy from top downwards. */
+                        for (y = 0; y < dstheight; ++y)
+                        {
+                            memcpy(dbuf, sbuf, width);
+                            sbuf += slock.Pitch;
+                            dbuf += dlock.Pitch;
+                        }
+                    }
+                    else if (xdst.top > xsrc.top)
+                    {
+                        /* Copy from bottom upwards. */
+                        sbuf += (slock.Pitch*dstheight);
+                        dbuf += (dlock.Pitch*dstheight);
+                        for (y = 0; y < dstheight; ++y)
+                        {
+                            sbuf -= slock.Pitch;
+                            dbuf -= dlock.Pitch;
+                            memcpy(dbuf, sbuf, width);
+                        }
+                    }
+                    else
+                    {
+                        /* Src and dst overlapping on the same line, use memmove. */
+                        for (y = 0; y < dstheight; ++y)
+                        {
+                            memmove(dbuf, sbuf, width);
+                            sbuf += slock.Pitch;
+                            dbuf += dlock.Pitch;
+                        }
+                    }
+                }
+                else
+                {
+                    /* Stretching in y direction only. */
+                    for (y = sy = 0; y < dstheight; ++y, sy += yinc)
+                    {
+                        sbuf = sbase + (sy >> 16) * slock.Pitch;
+                        memcpy(dbuf, sbuf, width);
+                        dbuf += dlock.Pitch;
+                    }
+                }
             }
+            else
+            {
+                /* Stretching in X direction. */
+                int last_sy = -1;
+                for (y = sy = 0; y < dstheight; ++y, sy += yinc)
+                {
+                    sbuf = sbase + (sy >> 16) * slock.Pitch;
 
-            if (src_fixup == COMPLEX_FIXUP_P8 && gl_info->supported[EXT_PALETTED_TEXTURE])
+                    if ((sy >> 16) == (last_sy >> 16))
+                    {
+                        /* This source row is the same as last source row -
+                         * Copy the already stretched row. */
+                        memcpy(dbuf, dbuf - dlock.Pitch, width);
+                    }
+                    else
+                    {
+#define STRETCH_ROW(type) \
+do { \
+    const type *s = (const type *)sbuf; \
+    type *d = (type *)dbuf; \
+    for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
+        d[x] = s[sx >> 16]; \
+} while(0)
+
+                        switch(bpp)
+                        {
+                            case 1:
+                                STRETCH_ROW(BYTE);
+                                break;
+                            case 2:
+                                STRETCH_ROW(WORD);
+                                break;
+                            case 4:
+                                STRETCH_ROW(DWORD);
+                                break;
+                            case 3:
+                            {
+                                const BYTE *s;
+                                BYTE *d = dbuf;
+                                for (x = sx = 0; x < dstwidth; x++, sx+= xinc)
+                                {
+                                    DWORD pixel;
+
+                                    s = sbuf + 3 * (sx >> 16);
+                                    pixel = s[0] | (s[1] << 8) | (s[2] << 16);
+                                    d[0] = (pixel      ) & 0xff;
+                                    d[1] = (pixel >>  8) & 0xff;
+                                    d[2] = (pixel >> 16) & 0xff;
+                                    d += 3;
+                                }
+                                break;
+                            }
+                            default:
+                                FIXME("Stretched blit not implemented for bpp %u!\n", bpp * 8);
+                                hr = WINED3DERR_NOTAVAILABLE;
+                                goto error;
+                        }
+#undef STRETCH_ROW
+                    }
+                    dbuf += dlock.Pitch;
+                    last_sy = sy;
+                }
+            }
+        }
+        else
+        {
+            LONG dstyinc = dlock.Pitch, dstxinc = bpp;
+            DWORD keylow = 0xFFFFFFFF, keyhigh = 0, keymask = 0xFFFFFFFF;
+            DWORD destkeylow = 0x0, destkeyhigh = 0xFFFFFFFF, destkeymask = 0xFFFFFFFF;
+            if (flags & (WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE))
             {
-                TRACE("P8 fixup supported\n");
-                return TRUE;
+                /* The color keying flags are checked for correctness in ddraw */
+                if (flags & WINEDDBLT_KEYSRC)
+                {
+                    keylow  = src_surface->SrcBltCKey.dwColorSpaceLowValue;
+                    keyhigh = src_surface->SrcBltCKey.dwColorSpaceHighValue;
+                }
+                else if (flags & WINEDDBLT_KEYSRCOVERRIDE)
+                {
+                    keylow = fx->ddckSrcColorkey.dwColorSpaceLowValue;
+                    keyhigh = fx->ddckSrcColorkey.dwColorSpaceHighValue;
+                }
+
+                if (flags & WINEDDBLT_KEYDEST)
+                {
+                    /* Destination color keys are taken from the source surface! */
+                    destkeylow = src_surface->DestBltCKey.dwColorSpaceLowValue;
+                    destkeyhigh = src_surface->DestBltCKey.dwColorSpaceHighValue;
+                }
+                else if (flags & WINEDDBLT_KEYDESTOVERRIDE)
+                {
+                    destkeylow = fx->ddckDestColorkey.dwColorSpaceLowValue;
+                    destkeyhigh = fx->ddckDestColorkey.dwColorSpaceHighValue;
+                }
+
+                if (bpp == 1)
+                {
+                    keymask = 0xff;
+                }
+                else
+                {
+                    keymask = src_format->red_mask
+                            | src_format->green_mask
+                            | src_format->blue_mask;
+                }
+                flags &= ~(WINEDDBLT_KEYSRC | WINEDDBLT_KEYDEST | WINEDDBLT_KEYSRCOVERRIDE | WINEDDBLT_KEYDESTOVERRIDE);
             }
 
-            /* We only support identity conversions. */
-            if (is_identity_fixup(src_format->color_fixup))
+            if (flags & WINEDDBLT_DDFX)
             {
-                TRACE("[OK]\n");
-                return TRUE;
+                BYTE *dTopLeft, *dTopRight, *dBottomLeft, *dBottomRight, *tmp;
+                LONG tmpxy;
+                dTopLeft     = dbuf;
+                dTopRight    = dbuf + ((dstwidth - 1) * bpp);
+                dBottomLeft  = dTopLeft + ((dstheight - 1) * dlock.Pitch);
+                dBottomRight = dBottomLeft + ((dstwidth - 1) * bpp);
+
+                if (fx->dwDDFX & WINEDDBLTFX_ARITHSTRETCHY)
+                {
+                    /* I don't think we need to do anything about this flag */
+                    WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_ARITHSTRETCHY\n");
+                }
+                if (fx->dwDDFX & WINEDDBLTFX_MIRRORLEFTRIGHT)
+                {
+                    tmp          = dTopRight;
+                    dTopRight    = dTopLeft;
+                    dTopLeft     = tmp;
+                    tmp          = dBottomRight;
+                    dBottomRight = dBottomLeft;
+                    dBottomLeft  = tmp;
+                    dstxinc = dstxinc * -1;
+                }
+                if (fx->dwDDFX & WINEDDBLTFX_MIRRORUPDOWN)
+                {
+                    tmp          = dTopLeft;
+                    dTopLeft     = dBottomLeft;
+                    dBottomLeft  = tmp;
+                    tmp          = dTopRight;
+                    dTopRight    = dBottomRight;
+                    dBottomRight = tmp;
+                    dstyinc = dstyinc * -1;
+                }
+                if (fx->dwDDFX & WINEDDBLTFX_NOTEARING)
+                {
+                    /* I don't think we need to do anything about this flag */
+                    WARN("flags=DDBLT_DDFX nothing done for WINEDDBLTFX_NOTEARING\n");
+                }
+                if (fx->dwDDFX & WINEDDBLTFX_ROTATE180)
+                {
+                    tmp          = dBottomRight;
+                    dBottomRight = dTopLeft;
+                    dTopLeft     = tmp;
+                    tmp          = dBottomLeft;
+                    dBottomLeft  = dTopRight;
+                    dTopRight    = tmp;
+                    dstxinc = dstxinc * -1;
+                    dstyinc = dstyinc * -1;
+                }
+                if (fx->dwDDFX & WINEDDBLTFX_ROTATE270)
+                {
+                    tmp          = dTopLeft;
+                    dTopLeft     = dBottomLeft;
+                    dBottomLeft  = dBottomRight;
+                    dBottomRight = dTopRight;
+                    dTopRight    = tmp;
+                    tmpxy   = dstxinc;
+                    dstxinc = dstyinc;
+                    dstyinc = tmpxy;
+                    dstxinc = dstxinc * -1;
+                }
+                if (fx->dwDDFX & WINEDDBLTFX_ROTATE90)
+                {
+                    tmp          = dTopLeft;
+                    dTopLeft     = dTopRight;
+                    dTopRight    = dBottomRight;
+                    dBottomRight = dBottomLeft;
+                    dBottomLeft  = tmp;
+                    tmpxy   = dstxinc;
+                    dstxinc = dstyinc;
+                    dstyinc = tmpxy;
+                    dstyinc = dstyinc * -1;
+                }
+                if (fx->dwDDFX & WINEDDBLTFX_ZBUFFERBASEDEST)
+                {
+                    /* I don't think we need to do anything about this flag */
+                    WARN("flags=WINEDDBLT_DDFX nothing done for WINEDDBLTFX_ZBUFFERBASEDEST\n");
+                }
+                dbuf = dTopLeft;
+                flags &= ~(WINEDDBLT_DDFX);
             }
 
-            TRACE("[FAILED]\n");
-            return FALSE;
+#define COPY_COLORKEY_FX(type) \
+do { \
+    const type *s; \
+    type *d = (type *)dbuf, *dx, tmp; \
+    for (y = sy = 0; y < dstheight; ++y, sy += yinc) \
+    { \
+        s = (const type *)(sbase + (sy >> 16) * slock.Pitch); \
+        dx = d; \
+        for (x = sx = 0; x < dstwidth; ++x, sx += xinc) \
+        { \
+            tmp = s[sx >> 16]; \
+            if (((tmp & keymask) < keylow || (tmp & keymask) > keyhigh) \
+                    && ((dx[0] & destkeymask) >= destkeylow && (dx[0] & destkeymask) <= destkeyhigh)) \
+            { \
+                dx[0] = tmp; \
+            } \
+            dx = (type *)(((BYTE *)dx) + dstxinc); \
+        } \
+        d = (type *)(((BYTE *)d) + dstyinc); \
+    } \
+} while(0)
 
-        case WINED3D_BLIT_OP_COLOR_FILL:
-            if (!(dst_usage & WINED3DUSAGE_RENDERTARGET))
+            switch (bpp)
             {
-                TRACE("Color fill not supported\n");
-                return FALSE;
+                case 1:
+                    COPY_COLORKEY_FX(BYTE);
+                    break;
+                case 2:
+                    COPY_COLORKEY_FX(WORD);
+                    break;
+                case 4:
+                    COPY_COLORKEY_FX(DWORD);
+                    break;
+                case 3:
+                {
+                    const BYTE *s;
+                    BYTE *d = dbuf, *dx;
+                    for (y = sy = 0; y < dstheight; ++y, sy += yinc)
+                    {
+                        sbuf = sbase + (sy >> 16) * slock.Pitch;
+                        dx = d;
+                        for (x = sx = 0; x < dstwidth; ++x, sx+= xinc)
+                        {
+                            DWORD pixel, dpixel = 0;
+                            s = sbuf + 3 * (sx>>16);
+                            pixel = s[0] | (s[1] << 8) | (s[2] << 16);
+                            dpixel = dx[0] | (dx[1] << 8 ) | (dx[2] << 16);
+                            if (((pixel & keymask) < keylow || (pixel & keymask) > keyhigh)
+                                    && ((dpixel & keymask) >= destkeylow || (dpixel & keymask) <= keyhigh))
+                            {
+                                dx[0] = (pixel      ) & 0xff;
+                                dx[1] = (pixel >>  8) & 0xff;
+                                dx[2] = (pixel >> 16) & 0xff;
+                            }
+                            dx += dstxinc;
+                        }
+                        d += dstyinc;
+                    }
+                    break;
+                }
+                default:
+                    FIXME("%s color-keyed blit not implemented for bpp %u!\n",
+                          (flags & WINEDDBLT_KEYSRC) ? "Source" : "Destination", bpp * 8);
+                    hr = WINED3DERR_NOTAVAILABLE;
+                    goto error;
+#undef COPY_COLORKEY_FX
             }
-
-            return TRUE;
-
-        case WINED3D_BLIT_OP_DEPTH_FILL:
-            return TRUE;
-
-        default:
-            TRACE("Unsupported blit_op=%d\n", blit_op);
-            return FALSE;
+        }
     }
-}
-
-/* Do not call while under the GL lock. */
-static HRESULT ffp_blit_color_fill(IWineD3DDeviceImpl *device, IWineD3DSurfaceImpl *dst_surface,
-        const RECT *dst_rect, const WINED3DCOLORVALUE *color)
-{
-    const RECT draw_rect = {0, 0, dst_surface->resource.width, dst_surface->resource.height};
-
-    return device_clear_render_targets(device, 1, &dst_surface, NULL,
-            1, dst_rect, &draw_rect, WINED3DCLEAR_TARGET, color, 0.0f, 0);
-}
-
-/* Do not call while under the GL lock. */
-static HRESULT ffp_blit_depth_fill(IWineD3DDeviceImpl *device,
-        IWineD3DSurfaceImpl *surface, const RECT *rect, float depth)
-{
-    const RECT draw_rect = {0, 0, surface->resource.width, surface->resource.height};
-
-    return device_clear_render_targets(device, 0, NULL, surface,
-            1, rect, &draw_rect, WINED3DCLEAR_ZBUFFER, 0, depth, 0);
-}
-
-const struct blit_shader ffp_blit =  {
-    ffp_blit_alloc,
-    ffp_blit_free,
-    ffp_blit_set,
-    ffp_blit_unset,
-    ffp_blit_supported,
-    ffp_blit_color_fill,
-    ffp_blit_depth_fill,
-};
-
-static HRESULT cpu_blit_alloc(IWineD3DDeviceImpl *device)
-{
-    return WINED3D_OK;
-}
-
-/* Context activation is done by the caller. */
-static void cpu_blit_free(IWineD3DDeviceImpl *device)
-{
-}
-
-/* Context activation is done by the caller. */
-static HRESULT cpu_blit_set(void *blit_priv, const struct wined3d_gl_info *gl_info, IWineD3DSurfaceImpl *surface)
-{
-    return WINED3D_OK;
-}
-
-/* Context activation is done by the caller. */
-static void cpu_blit_unset(const struct wined3d_gl_info *gl_info)
-{
-}
 
-static BOOL cpu_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
-        const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
-        const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
-{
-    if (blit_op == WINED3D_BLIT_OP_COLOR_FILL)
+error:
+    if (flags && FIXME_ON(d3d_surface))
     {
-        return TRUE;
+        FIXME("\tUnsupported flags: %#x.\n", flags);
     }
 
-    return FALSE;
+release:
+    IWineD3DSurface_Unmap((IWineD3DSurface *)dst_surface);
+    if (src_surface && src_surface != dst_surface)
+        IWineD3DSurface_Unmap((IWineD3DSurface *)src_surface);
+    /* Release the converted surface, if any. */
+    if (src_surface && src_surface != orig_src)
+        IWineD3DSurface_Release((IWineD3DSurface *)src_surface);
+
+    return hr;
 }
 
 static HRESULT surface_cpu_bltfast(IWineD3DSurfaceImpl *dst_surface, DWORD dst_x, DWORD dst_y,
@@ -7350,87 +7408,6 @@ const struct blit_shader cpu_blit =  {
     cpu_blit_depth_fill,
 };
 
-static BOOL fbo_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
-        const RECT *src_rect, DWORD src_usage, WINED3DPOOL src_pool, const struct wined3d_format *src_format,
-        const RECT *dst_rect, DWORD dst_usage, WINED3DPOOL dst_pool, const struct wined3d_format *dst_format)
-{
-    if ((wined3d_settings.offscreen_rendering_mode != ORM_FBO) || !gl_info->fbo_ops.glBlitFramebuffer)
-        return FALSE;
-
-    /* Source and/or destination need to be on the GL side */
-    if (src_pool == WINED3DPOOL_SYSTEMMEM || dst_pool == WINED3DPOOL_SYSTEMMEM)
-        return FALSE;
-
-    switch (blit_op)
-    {
-        case WINED3D_BLIT_OP_COLOR_BLIT:
-            if (!((src_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (src_usage & WINED3DUSAGE_RENDERTARGET)))
-                return FALSE;
-            if (!((dst_format->flags & WINED3DFMT_FLAG_FBO_ATTACHABLE) || (dst_usage & WINED3DUSAGE_RENDERTARGET)))
-                return FALSE;
-            break;
-
-        case WINED3D_BLIT_OP_DEPTH_BLIT:
-            if (!(src_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
-                return FALSE;
-            if (!(dst_format->flags & (WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL)))
-                return FALSE;
-            break;
-
-        default:
-            return FALSE;
-    }
-
-    if (!(src_format->id == dst_format->id
-            || (is_identity_fixup(src_format->color_fixup)
-            && is_identity_fixup(dst_format->color_fixup))))
-        return FALSE;
-
-    return TRUE;
-}
-
-static const IWineD3DSurfaceVtbl IWineGDISurface_Vtbl =
-{
-    /* IUnknown */
-    IWineD3DBaseSurfaceImpl_QueryInterface,
-    IWineD3DBaseSurfaceImpl_AddRef,
-    IWineD3DBaseSurfaceImpl_Release,
-    /* IWineD3DResource */
-    IWineD3DBaseSurfaceImpl_GetParent,
-    IWineD3DBaseSurfaceImpl_SetPrivateData,
-    IWineD3DBaseSurfaceImpl_GetPrivateData,
-    IWineD3DBaseSurfaceImpl_FreePrivateData,
-    IWineD3DBaseSurfaceImpl_SetPriority,
-    IWineD3DBaseSurfaceImpl_GetPriority,
-    IWineD3DBaseSurfaceImpl_PreLoad,
-    /* IWineD3DSurface */
-    IWineD3DBaseSurfaceImpl_GetResource,
-    IWineD3DBaseSurfaceImpl_Map,
-    IWineD3DBaseSurfaceImpl_Unmap,
-    IWineD3DBaseSurfaceImpl_GetDC,
-    IWineD3DBaseSurfaceImpl_ReleaseDC,
-    IWineD3DBaseSurfaceImpl_Flip,
-    IWineD3DBaseSurfaceImpl_Blt,
-    IWineD3DBaseSurfaceImpl_GetBltStatus,
-    IWineD3DBaseSurfaceImpl_GetFlipStatus,
-    IWineD3DBaseSurfaceImpl_IsLost,
-    IWineD3DBaseSurfaceImpl_Restore,
-    IWineD3DBaseSurfaceImpl_BltFast,
-    IWineD3DBaseSurfaceImpl_GetPalette,
-    IWineD3DBaseSurfaceImpl_SetPalette,
-    IWineD3DBaseSurfaceImpl_SetColorKey,
-    IWineD3DBaseSurfaceImpl_GetPitch,
-    IWineD3DBaseSurfaceImpl_SetMem,
-    IWineD3DBaseSurfaceImpl_SetOverlayPosition,
-    IWineD3DBaseSurfaceImpl_GetOverlayPosition,
-    IWineD3DBaseSurfaceImpl_UpdateOverlayZOrder,
-    IWineD3DBaseSurfaceImpl_UpdateOverlay,
-    IWineD3DBaseSurfaceImpl_SetClipper,
-    IWineD3DBaseSurfaceImpl_GetClipper,
-    /* Internal use: */
-    IWineD3DBaseSurfaceImpl_SetFormat,
-};
-
 HRESULT surface_init(IWineD3DSurfaceImpl *surface, WINED3DSURFTYPE surface_type, UINT alignment,
         UINT width, UINT height, UINT level, BOOL lockable, BOOL discard, WINED3DMULTISAMPLE_TYPE multisample_type,
         UINT multisample_quality, IWineD3DDeviceImpl *device, DWORD usage, enum wined3d_format_id format_id,
@@ -7492,17 +7469,15 @@ HRESULT surface_init(IWineD3DSurfaceImpl *surface, WINED3DSURFTYPE surface_type,
         return WINED3DERR_INVALIDCALL;
 
     surface->surface_type = surface_type;
+    surface->lpVtbl = &IWineD3DSurface_Vtbl;
 
-    /* Look at the implementation and set the correct Vtable. */
     switch (surface_type)
     {
         case SURFACE_OPENGL:
-            surface->lpVtbl = &IWineD3DSurface_Vtbl;
             surface->surface_ops = &surface_ops;
             break;
 
         case SURFACE_GDI:
-            surface->lpVtbl = &IWineGDISurface_Vtbl;
             surface->surface_ops = &gdi_surface_ops;
             break;
 
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h
index 1a702b5..e4127a3 100644
--- a/dlls/wined3d/wined3d_private.h
+++ b/dlls/wined3d/wined3d_private.h
@@ -2027,6 +2027,9 @@ struct wined3d_surface_ops
     void (*surface_unmap)(struct IWineD3DSurfaceImpl *surface);
     HRESULT (*surface_getdc)(struct IWineD3DSurfaceImpl *surface);
     HRESULT (*surface_flip)(struct IWineD3DSurfaceImpl *surface, struct IWineD3DSurfaceImpl *override);
+    HRESULT (*surface_blt)(struct IWineD3DSurfaceImpl *dst_surface, const RECT *dst_rect,
+            IWineD3DSurfaceImpl *src_surface, const RECT *src_rect, DWORD flags,
+            const WINEDDBLTFX *fx, WINED3DTEXTUREFILTERTYPE filter);
     HRESULT (*surface_bltfast)(struct IWineD3DSurfaceImpl *dst_surface, DWORD dst_x, DWORD dst_y,
             IWineD3DSurfaceImpl *src_surface, const RECT *src_rect, DWORD trans);
     HRESULT (*surface_set_mem)(struct IWineD3DSurfaceImpl *surface, void *mem);
-- 
1.7.3.4




More information about the wine-patches mailing list