[PATCH v3 1/2] vkd3d: Allocate one large buffer for a heap and offset into it.

Philip Rebohle philip.rebohle at tu-dortmund.de
Mon Nov 4 07:23:38 CST 2019


It looks like Deus Ex Mankind Divided actually relies on the exact VA 
behaviour; without this patch it crashes reliably. There will be further 
issues to sort out however.

- Philip

Am 23.10.19 um 12:09 schrieb Hans-Kristian Arntzen:
> Greatly reduce VA allocations we have to make and makes returned VA more
> sensible, and better matches returned VAs we see on native drivers.
> 
> D3D12 usage flags for buffers seem generic enough that there
> is no obvious benefit to place smaller VkBuffers on top of
> VkDeviceMemory.
> 
> Ideally, physical_buffer_address is used here, but this works as a
> good fallback if that path is added later.
> 
> With this patch and previous VA optimization, I'm observing a 2.0-2.5%
> FPS uplift on SOTTR when CPU bound.
> 
> Signed-off-by: Hans-Kristian Arntzen <post at arntzen-software.no>
> ---
>   libs/vkd3d/command.c       |  15 +++--
>   libs/vkd3d/resource.c      | 135 ++++++++++++++++++++++++++++++++-----
>   libs/vkd3d/vkd3d_private.h |   2 +
>   3 files changed, 130 insertions(+), 22 deletions(-)
> 
> diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c
> index d420863..4f74fa4 100644
> --- a/libs/vkd3d/command.c
> +++ b/libs/vkd3d/command.c
> @@ -3031,8 +3031,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion(ID3D12Graphics
>   
>       d3d12_command_list_end_current_render_pass(list);
>   
> -    buffer_copy.srcOffset = src_offset;
> -    buffer_copy.dstOffset = dst_offset;
> +    buffer_copy.srcOffset = src_offset + src_resource->heap_offset;
> +    buffer_copy.dstOffset = dst_offset + dst_resource->heap_offset;
>       buffer_copy.size = byte_count;
>   
>       VK_CALL(vkCmdCopyBuffer(list->vk_command_buffer,
> @@ -3341,6 +3341,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic
>   
>           vk_image_buffer_copy_from_d3d12(&buffer_image_copy, &dst->u.PlacedFootprint,
>                   src->u.SubresourceIndex, &src_resource->desc, dst_format, src_box, dst_x, dst_y, dst_z);
> +        buffer_image_copy.bufferOffset += dst_resource->heap_offset;
>           VK_CALL(vkCmdCopyImageToBuffer(list->vk_command_buffer,
>                   src_resource->u.vk_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
>                   dst_resource->u.vk_buffer, 1, &buffer_image_copy));
> @@ -3370,6 +3371,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic
>   
>           vk_buffer_image_copy_from_d3d12(&buffer_image_copy, &src->u.PlacedFootprint,
>                   dst->u.SubresourceIndex, &dst_resource->desc, src_format, src_box, dst_x, dst_y, dst_z);
> +        buffer_image_copy.bufferOffset += src_resource->heap_offset;
>           VK_CALL(vkCmdCopyBufferToImage(list->vk_command_buffer,
>                   src_resource->u.vk_buffer, dst_resource->u.vk_image,
>                   VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &buffer_image_copy));
> @@ -3450,8 +3452,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm
>           assert(d3d12_resource_is_buffer(src_resource));
>           assert(src_resource->desc.Width == dst_resource->desc.Width);
>   
> -        vk_buffer_copy.srcOffset = 0;
> -        vk_buffer_copy.dstOffset = 0;
> +        vk_buffer_copy.srcOffset = src_resource->heap_offset;
> +        vk_buffer_copy.dstOffset = dst_resource->heap_offset;
>           vk_buffer_copy.size = dst_resource->desc.Width;
>           VK_CALL(vkCmdCopyBuffer(list->vk_command_buffer,
>                   src_resource->u.vk_buffer, dst_resource->u.vk_buffer, 1, &vk_buffer_copy));
> @@ -3842,8 +3844,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsC
>               vk_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
>               vk_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
>               vk_barrier.buffer = resource->u.vk_buffer;
> -            vk_barrier.offset = 0;
> -            vk_barrier.size = VK_WHOLE_SIZE;
> +            vk_barrier.offset = resource->heap_offset;
> +            vk_barrier.size = resource->desc.Width;
>   
>               VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer, src_stage_mask, dst_stage_mask, 0,
>                       0, NULL, 1, &vk_barrier, 0, NULL));
> @@ -4728,6 +4730,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID
>               return;
>           }
>   
> +        /* Offset from heap with placed buffers is already applied in CPU descriptor. */
>           VK_CALL(vkCmdFillBuffer(list->vk_command_buffer, resource_impl->u.vk_buffer,
>                   cpu_descriptor->uav.buffer.offset, cpu_descriptor->uav.buffer.size, values[0]));
>   
> diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c
> index 6c9564b..c289fa8 100644
> --- a/libs/vkd3d/resource.c
> +++ b/libs/vkd3d/resource.c
> @@ -292,6 +292,8 @@ static ULONG STDMETHODCALLTYPE d3d12_heap_AddRef(ID3D12Heap *iface)
>       return refcount;
>   }
>   
> +static ULONG d3d12_resource_decref(struct d3d12_resource *resource);
> +
>   static void d3d12_heap_destroy(struct d3d12_heap *heap)
>   {
>       struct d3d12_device *device = heap->device;
> @@ -299,6 +301,9 @@ static void d3d12_heap_destroy(struct d3d12_heap *heap)
>   
>       TRACE("Destroying heap %p.\n", heap);
>   
> +    if (heap->buffer_resource)
> +        d3d12_resource_decref(heap->buffer_resource);
> +
>       vkd3d_private_store_destroy(&heap->private_store);
>   
>       VK_CALL(vkFreeMemory(device->vk_device, heap->vk_memory, NULL));
> @@ -539,6 +544,12 @@ static HRESULT validate_heap_desc(const D3D12_HEAP_DESC *desc, const struct d3d1
>       return S_OK;
>   }
>   
> +static HRESULT d3d12_resource_create(struct d3d12_device *device,
> +                                     const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags,
> +                                     const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state,
> +                                     const D3D12_CLEAR_VALUE *optimized_clear_value, bool placed,
> +                                     struct d3d12_resource **resource);
> +
>   static HRESULT d3d12_heap_init(struct d3d12_heap *heap,
>           struct d3d12_device *device, const D3D12_HEAP_DESC *desc, const struct d3d12_resource *resource)
>   {
> @@ -546,6 +557,9 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap,
>       VkDeviceSize vk_memory_size;
>       HRESULT hr;
>       int rc;
> +    bool buffers_allowed;
> +    D3D12_RESOURCE_DESC resource_desc;
> +    D3D12_RESOURCE_STATES initial_resource_state;
>   
>       heap->ID3D12Heap_iface.lpVtbl = &d3d12_heap_vtbl;
>       heap->refcount = 1;
> @@ -556,6 +570,7 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap,
>   
>       heap->map_ptr = NULL;
>       heap->map_count = 0;
> +    heap->buffer_resource = NULL;
>   
>       if (!heap->desc.Properties.CreationNodeMask)
>           heap->desc.Properties.CreationNodeMask = 1;
> @@ -583,6 +598,53 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap,
>           return hr;
>       }
>   
> +    buffers_allowed = !(heap->desc.Flags & D3D12_HEAP_FLAG_DENY_BUFFERS);
> +    if (buffers_allowed && !resource)
> +    {
> +        /* Create a single omnipotent buffer which fills the entire heap.
> +         * Whenever we place buffer resources on this heap, we'll just offset this VkBuffer.
> +         * This allows us to keep VA space somewhat sane, and keeps number of (limited) VA allocations down.
> +         * One possible downside is that the buffer might be slightly slower to access,
> +         * but D3D12 has very lenient usage flags for buffers. */
> +
> +        memset(&resource_desc, 0, sizeof(resource_desc));
> +        resource_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
> +        resource_desc.Width = desc->SizeInBytes;
> +        resource_desc.Height = 1;
> +        resource_desc.DepthOrArraySize = 1;
> +        resource_desc.MipLevels = 1;
> +        resource_desc.SampleDesc.Count = 1;
> +        resource_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
> +
> +        switch (desc->Properties.Type)
> +        {
> +        case D3D12_HEAP_TYPE_UPLOAD:
> +            initial_resource_state = D3D12_RESOURCE_STATE_GENERIC_READ;
> +            break;
> +
> +        case D3D12_HEAP_TYPE_READBACK:
> +            initial_resource_state = D3D12_RESOURCE_STATE_COPY_DEST;
> +            break;
> +
> +        default:
> +            /* Upload and readback heaps do not allow UAV access, only enable this flag for other heaps. */
> +            resource_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
> +            initial_resource_state = D3D12_RESOURCE_STATE_COMMON;
> +            break;
> +        }
> +
> +        if (FAILED(hr = d3d12_resource_create(device, &desc->Properties, desc->Flags,
> +                                              &resource_desc, initial_resource_state,
> +                                              NULL, false, &heap->buffer_resource)))
> +        {
> +            heap->buffer_resource = NULL;
> +            return hr;
> +        }
> +        /* This internal resource should not own a reference on the device.
> +         * d3d12_resource_create takes a reference on the device. */
> +        d3d12_device_release(device);
> +    }
> +
>       if (resource)
>       {
>           if (d3d12_resource_is_buffer(resource))
> @@ -600,8 +662,16 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap,
>   
>           heap->desc.SizeInBytes = vk_memory_size;
>       }
> +    else if (heap->buffer_resource)
> +    {
> +        hr = vkd3d_allocate_buffer_memory(device, heap->buffer_resource->u.vk_buffer,
> +                                          &heap->desc.Properties, heap->desc.Flags,
> +                                          &heap->vk_memory, &heap->vk_memory_type, &vk_memory_size);
> +    }
>       else
>       {
> +        /* Allocate generic memory which should hopefully match up with whatever resources
> +         * we want to place here. */
>           memory_requirements.size = heap->desc.SizeInBytes;
>           memory_requirements.alignment = heap->desc.Alignment;
>           memory_requirements.memoryTypeBits = ~(uint32_t)0;
> @@ -614,6 +684,11 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap,
>       {
>           vkd3d_private_store_destroy(&heap->private_store);
>           pthread_mutex_destroy(&heap->mutex);
> +        if (heap->buffer_resource)
> +        {
> +            d3d12_resource_decref(heap->buffer_resource);
> +            heap->buffer_resource = NULL;
> +        }
>           return hr;
>       }
>   
> @@ -1003,13 +1078,16 @@ static void d3d12_resource_destroy(struct d3d12_resource *resource, struct d3d12
>       if (resource->flags & VKD3D_RESOURCE_EXTERNAL)
>           return;
>   
> -    if (resource->gpu_address)
> -        vkd3d_gpu_va_allocator_free(&device->gpu_va_allocator, resource->gpu_address);
> +    if (!(resource->flags & VKD3D_RESOURCE_PLACED_BUFFER))
> +    {
> +        if (resource->gpu_address)
> +            vkd3d_gpu_va_allocator_free(&device->gpu_va_allocator, resource->gpu_address);
>   
> -    if (d3d12_resource_is_buffer(resource))
> -        VK_CALL(vkDestroyBuffer(device->vk_device, resource->u.vk_buffer, NULL));
> -    else
> -        VK_CALL(vkDestroyImage(device->vk_device, resource->u.vk_image, NULL));
> +        if (d3d12_resource_is_buffer(resource))
> +            VK_CALL(vkDestroyBuffer(device->vk_device, resource->u.vk_buffer, NULL));
> +        else
> +            VK_CALL(vkDestroyImage(device->vk_device, resource->u.vk_image, NULL));
> +    }
>   
>       if (resource->flags & VKD3D_RESOURCE_DEDICATED_HEAP)
>           d3d12_heap_destroy(resource->heap);
> @@ -1669,7 +1747,7 @@ static bool d3d12_resource_validate_heap_properties(const struct d3d12_resource
>   static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12_device *device,
>           const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags,
>           const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state,
> -        const D3D12_CLEAR_VALUE *optimized_clear_value)
> +        const D3D12_CLEAR_VALUE *optimized_clear_value, bool placed)
>   {
>       HRESULT hr;
>   
> @@ -1699,6 +1777,8 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12
>   
>       resource->gpu_address = 0;
>       resource->flags = 0;
> +    if (placed && d3d12_resource_is_buffer(resource))
> +        resource->flags |= VKD3D_RESOURCE_PLACED_BUFFER;
>   
>       if (FAILED(hr = d3d12_resource_validate_desc(&resource->desc)))
>           return hr;
> @@ -1706,6 +1786,13 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12
>       switch (desc->Dimension)
>       {
>           case D3D12_RESOURCE_DIMENSION_BUFFER:
> +            /* We'll inherit a VkBuffer reference from the heap with an implied offset. */
> +            if (placed)
> +            {
> +                resource->u.vk_buffer = VK_NULL_HANDLE;
> +                break;
> +            }
> +
>               if (FAILED(hr = vkd3d_create_buffer(device, heap_properties, heap_flags,
>                       &resource->desc, &resource->u.vk_buffer)))
>                   return hr;
> @@ -1755,7 +1842,7 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12
>   static HRESULT d3d12_resource_create(struct d3d12_device *device,
>           const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags,
>           const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state,
> -        const D3D12_CLEAR_VALUE *optimized_clear_value, struct d3d12_resource **resource)
> +        const D3D12_CLEAR_VALUE *optimized_clear_value, bool placed, struct d3d12_resource **resource)
>   {
>       struct d3d12_resource *object;
>       HRESULT hr;
> @@ -1764,7 +1851,7 @@ static HRESULT d3d12_resource_create(struct d3d12_device *device,
>           return E_OUTOFMEMORY;
>   
>       if (FAILED(hr = d3d12_resource_init(object, device, heap_properties, heap_flags,
> -            desc, initial_state, optimized_clear_value)))
> +            desc, initial_state, optimized_clear_value, placed)))
>       {
>           vkd3d_free(object);
>           return hr;
> @@ -1806,7 +1893,7 @@ HRESULT d3d12_committed_resource_create(struct d3d12_device *device,
>       }
>   
>       if (FAILED(hr = d3d12_resource_create(device, heap_properties, heap_flags,
> -            desc, initial_state, optimized_clear_value, &object)))
> +            desc, initial_state, optimized_clear_value, false, &object)))
>           return hr;
>   
>       if (FAILED(hr = vkd3d_allocate_resource_memory(device, object, heap_properties, heap_flags)))
> @@ -1830,6 +1917,16 @@ static HRESULT vkd3d_bind_heap_memory(struct d3d12_device *device,
>       VkMemoryRequirements requirements;
>       VkResult vr;
>   
> +    if (resource->flags & VKD3D_RESOURCE_PLACED_BUFFER)
> +    {
> +        /* Just inherit the buffer from the heap. */
> +        resource->u.vk_buffer = heap->buffer_resource->u.vk_buffer;
> +        resource->heap = heap;
> +        resource->heap_offset = heap_offset;
> +        resource->gpu_address = heap->buffer_resource->gpu_address + heap_offset;
> +        return S_OK;
> +    }
> +
>       if (d3d12_resource_is_buffer(resource))
>           VK_CALL(vkGetBufferMemoryRequirements(vk_device, resource->u.vk_buffer, &requirements));
>       else
> @@ -1879,7 +1976,7 @@ HRESULT d3d12_placed_resource_create(struct d3d12_device *device, struct d3d12_h
>       HRESULT hr;
>   
>       if (FAILED(hr = d3d12_resource_create(device, &heap->desc.Properties, heap->desc.Flags,
> -            desc, initial_state, optimized_clear_value, &object)))
> +            desc, initial_state, optimized_clear_value, true, &object)))
>           return hr;
>   
>       if (FAILED(hr = vkd3d_bind_heap_memory(device, object, heap, heap_offset)))
> @@ -1903,7 +2000,7 @@ HRESULT d3d12_reserved_resource_create(struct d3d12_device *device,
>       HRESULT hr;
>   
>       if (FAILED(hr = d3d12_resource_create(device, NULL, 0,
> -            desc, initial_state, optimized_clear_value, &object)))
> +            desc, initial_state, optimized_clear_value, false, &object)))
>           return hr;
>   
>       TRACE("Created reserved resource %p.\n", object);
> @@ -2205,7 +2302,7 @@ static bool vkd3d_create_buffer_view_for_resource(struct d3d12_device *device,
>       assert(d3d12_resource_is_buffer(resource));
>   
>       return vkd3d_create_buffer_view(device, resource->u.vk_buffer,
> -            format, offset * element_size, size * element_size, view);
> +            format, resource->heap_offset + offset * element_size, size * element_size, view);
>   }
>   
>   static void vkd3d_set_view_swizzle_for_format(VkComponentMapping *components,
> @@ -2807,7 +2904,7 @@ static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_
>   
>           format = vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false);
>           if (!vkd3d_create_vk_buffer_view(device, counter_resource->u.vk_buffer, format,
> -                desc->u.Buffer.CounterOffsetInBytes, sizeof(uint32_t), &view->vk_counter_view))
> +                desc->u.Buffer.CounterOffsetInBytes + resource->heap_offset, sizeof(uint32_t), &view->vk_counter_view))
>           {
>               WARN("Failed to create counter buffer view.\n");
>               view->vk_counter_view = VK_NULL_HANDLE;
> @@ -2821,7 +2918,7 @@ static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_
>       {
>           const struct vkd3d_format *format = vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false);
>   
> -        descriptor->uav.buffer.offset = desc->u.Buffer.FirstElement * format->byte_count;
> +        descriptor->uav.buffer.offset = desc->u.Buffer.FirstElement * format->byte_count + resource->heap_offset;
>           descriptor->uav.buffer.size = desc->u.Buffer.NumElements * format->byte_count;
>       }
>   }
> @@ -2913,12 +3010,18 @@ bool vkd3d_create_raw_buffer_view(struct d3d12_device *device,
>   {
>       const struct vkd3d_format *format;
>       struct d3d12_resource *resource;
> +    uint64_t range;
> +    uint64_t offset;
>   
>       format = vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false);
>       resource = vkd3d_gpu_va_allocator_dereference(&device->gpu_va_allocator, gpu_address);
>       assert(d3d12_resource_is_buffer(resource));
> +
> +    offset = gpu_address - resource->gpu_address;
> +    range = min(resource->desc.Width - offset, device->vk_info.device_limits.maxStorageBufferRange);
> +
>       return vkd3d_create_vk_buffer_view(device, resource->u.vk_buffer, format,
> -            gpu_address - resource->gpu_address, VK_WHOLE_SIZE, vk_buffer_view);
> +            offset, range, vk_buffer_view);
>   }
>   
>   /* samplers */
> diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h
> index a5f7c81..2b1ae30 100644
> --- a/libs/vkd3d/vkd3d_private.h
> +++ b/libs/vkd3d/vkd3d_private.h
> @@ -380,6 +380,7 @@ struct d3d12_heap
>       unsigned int map_count;
>       uint32_t vk_memory_type;
>   
> +    struct d3d12_resource *buffer_resource;
>       struct d3d12_device *device;
>   
>       struct vkd3d_private_store private_store;
> @@ -394,6 +395,7 @@ struct d3d12_heap *unsafe_impl_from_ID3D12Heap(ID3D12Heap *iface) DECLSPEC_HIDDE
>   #define VKD3D_RESOURCE_EXTERNAL       0x00000004
>   #define VKD3D_RESOURCE_DEDICATED_HEAP 0x00000008
>   #define VKD3D_RESOURCE_LINEAR_TILING  0x00000010
> +#define VKD3D_RESOURCE_PLACED_BUFFER  0x00000020
>   
>   /* ID3D12Resource */
>   struct d3d12_resource
> 



More information about the wine-devel mailing list