[PATCH v3 1/2] vkd3d: Allocate one large buffer for a heap and offset into it.
Philip Rebohle
philip.rebohle at tu-dortmund.de
Mon Nov 4 07:23:38 CST 2019
It looks like Deus Ex Mankind Divided actually relies on the exact VA
behaviour; without this patch it crashes reliably. There will be further
issues to sort out however.
- Philip
Am 23.10.19 um 12:09 schrieb Hans-Kristian Arntzen:
> Greatly reduce VA allocations we have to make and makes returned VA more
> sensible, and better matches returned VAs we see on native drivers.
>
> D3D12 usage flags for buffers seem generic enough that there
> is no obvious benefit to place smaller VkBuffers on top of
> VkDeviceMemory.
>
> Ideally, physical_buffer_address is used here, but this works as a
> good fallback if that path is added later.
>
> With this patch and previous VA optimization, I'm observing a 2.0-2.5%
> FPS uplift on SOTTR when CPU bound.
>
> Signed-off-by: Hans-Kristian Arntzen <post at arntzen-software.no>
> ---
> libs/vkd3d/command.c | 15 +++--
> libs/vkd3d/resource.c | 135 ++++++++++++++++++++++++++++++++-----
> libs/vkd3d/vkd3d_private.h | 2 +
> 3 files changed, 130 insertions(+), 22 deletions(-)
>
> diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c
> index d420863..4f74fa4 100644
> --- a/libs/vkd3d/command.c
> +++ b/libs/vkd3d/command.c
> @@ -3031,8 +3031,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion(ID3D12Graphics
>
> d3d12_command_list_end_current_render_pass(list);
>
> - buffer_copy.srcOffset = src_offset;
> - buffer_copy.dstOffset = dst_offset;
> + buffer_copy.srcOffset = src_offset + src_resource->heap_offset;
> + buffer_copy.dstOffset = dst_offset + dst_resource->heap_offset;
> buffer_copy.size = byte_count;
>
> VK_CALL(vkCmdCopyBuffer(list->vk_command_buffer,
> @@ -3341,6 +3341,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic
>
> vk_image_buffer_copy_from_d3d12(&buffer_image_copy, &dst->u.PlacedFootprint,
> src->u.SubresourceIndex, &src_resource->desc, dst_format, src_box, dst_x, dst_y, dst_z);
> + buffer_image_copy.bufferOffset += dst_resource->heap_offset;
> VK_CALL(vkCmdCopyImageToBuffer(list->vk_command_buffer,
> src_resource->u.vk_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
> dst_resource->u.vk_buffer, 1, &buffer_image_copy));
> @@ -3370,6 +3371,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic
>
> vk_buffer_image_copy_from_d3d12(&buffer_image_copy, &src->u.PlacedFootprint,
> dst->u.SubresourceIndex, &dst_resource->desc, src_format, src_box, dst_x, dst_y, dst_z);
> + buffer_image_copy.bufferOffset += src_resource->heap_offset;
> VK_CALL(vkCmdCopyBufferToImage(list->vk_command_buffer,
> src_resource->u.vk_buffer, dst_resource->u.vk_image,
> VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &buffer_image_copy));
> @@ -3450,8 +3452,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm
> assert(d3d12_resource_is_buffer(src_resource));
> assert(src_resource->desc.Width == dst_resource->desc.Width);
>
> - vk_buffer_copy.srcOffset = 0;
> - vk_buffer_copy.dstOffset = 0;
> + vk_buffer_copy.srcOffset = src_resource->heap_offset;
> + vk_buffer_copy.dstOffset = dst_resource->heap_offset;
> vk_buffer_copy.size = dst_resource->desc.Width;
> VK_CALL(vkCmdCopyBuffer(list->vk_command_buffer,
> src_resource->u.vk_buffer, dst_resource->u.vk_buffer, 1, &vk_buffer_copy));
> @@ -3842,8 +3844,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsC
> vk_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
> vk_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
> vk_barrier.buffer = resource->u.vk_buffer;
> - vk_barrier.offset = 0;
> - vk_barrier.size = VK_WHOLE_SIZE;
> + vk_barrier.offset = resource->heap_offset;
> + vk_barrier.size = resource->desc.Width;
>
> VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer, src_stage_mask, dst_stage_mask, 0,
> 0, NULL, 1, &vk_barrier, 0, NULL));
> @@ -4728,6 +4730,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID
> return;
> }
>
> + /* Offset from heap with placed buffers is already applied in CPU descriptor. */
> VK_CALL(vkCmdFillBuffer(list->vk_command_buffer, resource_impl->u.vk_buffer,
> cpu_descriptor->uav.buffer.offset, cpu_descriptor->uav.buffer.size, values[0]));
>
> diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c
> index 6c9564b..c289fa8 100644
> --- a/libs/vkd3d/resource.c
> +++ b/libs/vkd3d/resource.c
> @@ -292,6 +292,8 @@ static ULONG STDMETHODCALLTYPE d3d12_heap_AddRef(ID3D12Heap *iface)
> return refcount;
> }
>
> +static ULONG d3d12_resource_decref(struct d3d12_resource *resource);
> +
> static void d3d12_heap_destroy(struct d3d12_heap *heap)
> {
> struct d3d12_device *device = heap->device;
> @@ -299,6 +301,9 @@ static void d3d12_heap_destroy(struct d3d12_heap *heap)
>
> TRACE("Destroying heap %p.\n", heap);
>
> + if (heap->buffer_resource)
> + d3d12_resource_decref(heap->buffer_resource);
> +
> vkd3d_private_store_destroy(&heap->private_store);
>
> VK_CALL(vkFreeMemory(device->vk_device, heap->vk_memory, NULL));
> @@ -539,6 +544,12 @@ static HRESULT validate_heap_desc(const D3D12_HEAP_DESC *desc, const struct d3d1
> return S_OK;
> }
>
> +static HRESULT d3d12_resource_create(struct d3d12_device *device,
> + const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags,
> + const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state,
> + const D3D12_CLEAR_VALUE *optimized_clear_value, bool placed,
> + struct d3d12_resource **resource);
> +
> static HRESULT d3d12_heap_init(struct d3d12_heap *heap,
> struct d3d12_device *device, const D3D12_HEAP_DESC *desc, const struct d3d12_resource *resource)
> {
> @@ -546,6 +557,9 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap,
> VkDeviceSize vk_memory_size;
> HRESULT hr;
> int rc;
> + bool buffers_allowed;
> + D3D12_RESOURCE_DESC resource_desc;
> + D3D12_RESOURCE_STATES initial_resource_state;
>
> heap->ID3D12Heap_iface.lpVtbl = &d3d12_heap_vtbl;
> heap->refcount = 1;
> @@ -556,6 +570,7 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap,
>
> heap->map_ptr = NULL;
> heap->map_count = 0;
> + heap->buffer_resource = NULL;
>
> if (!heap->desc.Properties.CreationNodeMask)
> heap->desc.Properties.CreationNodeMask = 1;
> @@ -583,6 +598,53 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap,
> return hr;
> }
>
> + buffers_allowed = !(heap->desc.Flags & D3D12_HEAP_FLAG_DENY_BUFFERS);
> + if (buffers_allowed && !resource)
> + {
> + /* Create a single omnipotent buffer which fills the entire heap.
> + * Whenever we place buffer resources on this heap, we'll just offset this VkBuffer.
> + * This allows us to keep VA space somewhat sane, and keeps number of (limited) VA allocations down.
> + * One possible downside is that the buffer might be slightly slower to access,
> + * but D3D12 has very lenient usage flags for buffers. */
> +
> + memset(&resource_desc, 0, sizeof(resource_desc));
> + resource_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
> + resource_desc.Width = desc->SizeInBytes;
> + resource_desc.Height = 1;
> + resource_desc.DepthOrArraySize = 1;
> + resource_desc.MipLevels = 1;
> + resource_desc.SampleDesc.Count = 1;
> + resource_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
> +
> + switch (desc->Properties.Type)
> + {
> + case D3D12_HEAP_TYPE_UPLOAD:
> + initial_resource_state = D3D12_RESOURCE_STATE_GENERIC_READ;
> + break;
> +
> + case D3D12_HEAP_TYPE_READBACK:
> + initial_resource_state = D3D12_RESOURCE_STATE_COPY_DEST;
> + break;
> +
> + default:
> + /* Upload and readback heaps do not allow UAV access, only enable this flag for other heaps. */
> + resource_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
> + initial_resource_state = D3D12_RESOURCE_STATE_COMMON;
> + break;
> + }
> +
> + if (FAILED(hr = d3d12_resource_create(device, &desc->Properties, desc->Flags,
> + &resource_desc, initial_resource_state,
> + NULL, false, &heap->buffer_resource)))
> + {
> + heap->buffer_resource = NULL;
> + return hr;
> + }
> + /* This internal resource should not own a reference on the device.
> + * d3d12_resource_create takes a reference on the device. */
> + d3d12_device_release(device);
> + }
> +
> if (resource)
> {
> if (d3d12_resource_is_buffer(resource))
> @@ -600,8 +662,16 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap,
>
> heap->desc.SizeInBytes = vk_memory_size;
> }
> + else if (heap->buffer_resource)
> + {
> + hr = vkd3d_allocate_buffer_memory(device, heap->buffer_resource->u.vk_buffer,
> + &heap->desc.Properties, heap->desc.Flags,
> + &heap->vk_memory, &heap->vk_memory_type, &vk_memory_size);
> + }
> else
> {
> + /* Allocate generic memory which should hopefully match up with whatever resources
> + * we want to place here. */
> memory_requirements.size = heap->desc.SizeInBytes;
> memory_requirements.alignment = heap->desc.Alignment;
> memory_requirements.memoryTypeBits = ~(uint32_t)0;
> @@ -614,6 +684,11 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap,
> {
> vkd3d_private_store_destroy(&heap->private_store);
> pthread_mutex_destroy(&heap->mutex);
> + if (heap->buffer_resource)
> + {
> + d3d12_resource_decref(heap->buffer_resource);
> + heap->buffer_resource = NULL;
> + }
> return hr;
> }
>
> @@ -1003,13 +1078,16 @@ static void d3d12_resource_destroy(struct d3d12_resource *resource, struct d3d12
> if (resource->flags & VKD3D_RESOURCE_EXTERNAL)
> return;
>
> - if (resource->gpu_address)
> - vkd3d_gpu_va_allocator_free(&device->gpu_va_allocator, resource->gpu_address);
> + if (!(resource->flags & VKD3D_RESOURCE_PLACED_BUFFER))
> + {
> + if (resource->gpu_address)
> + vkd3d_gpu_va_allocator_free(&device->gpu_va_allocator, resource->gpu_address);
>
> - if (d3d12_resource_is_buffer(resource))
> - VK_CALL(vkDestroyBuffer(device->vk_device, resource->u.vk_buffer, NULL));
> - else
> - VK_CALL(vkDestroyImage(device->vk_device, resource->u.vk_image, NULL));
> + if (d3d12_resource_is_buffer(resource))
> + VK_CALL(vkDestroyBuffer(device->vk_device, resource->u.vk_buffer, NULL));
> + else
> + VK_CALL(vkDestroyImage(device->vk_device, resource->u.vk_image, NULL));
> + }
>
> if (resource->flags & VKD3D_RESOURCE_DEDICATED_HEAP)
> d3d12_heap_destroy(resource->heap);
> @@ -1669,7 +1747,7 @@ static bool d3d12_resource_validate_heap_properties(const struct d3d12_resource
> static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12_device *device,
> const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags,
> const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state,
> - const D3D12_CLEAR_VALUE *optimized_clear_value)
> + const D3D12_CLEAR_VALUE *optimized_clear_value, bool placed)
> {
> HRESULT hr;
>
> @@ -1699,6 +1777,8 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12
>
> resource->gpu_address = 0;
> resource->flags = 0;
> + if (placed && d3d12_resource_is_buffer(resource))
> + resource->flags |= VKD3D_RESOURCE_PLACED_BUFFER;
>
> if (FAILED(hr = d3d12_resource_validate_desc(&resource->desc)))
> return hr;
> @@ -1706,6 +1786,13 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12
> switch (desc->Dimension)
> {
> case D3D12_RESOURCE_DIMENSION_BUFFER:
> + /* We'll inherit a VkBuffer reference from the heap with an implied offset. */
> + if (placed)
> + {
> + resource->u.vk_buffer = VK_NULL_HANDLE;
> + break;
> + }
> +
> if (FAILED(hr = vkd3d_create_buffer(device, heap_properties, heap_flags,
> &resource->desc, &resource->u.vk_buffer)))
> return hr;
> @@ -1755,7 +1842,7 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12
> static HRESULT d3d12_resource_create(struct d3d12_device *device,
> const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags,
> const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state,
> - const D3D12_CLEAR_VALUE *optimized_clear_value, struct d3d12_resource **resource)
> + const D3D12_CLEAR_VALUE *optimized_clear_value, bool placed, struct d3d12_resource **resource)
> {
> struct d3d12_resource *object;
> HRESULT hr;
> @@ -1764,7 +1851,7 @@ static HRESULT d3d12_resource_create(struct d3d12_device *device,
> return E_OUTOFMEMORY;
>
> if (FAILED(hr = d3d12_resource_init(object, device, heap_properties, heap_flags,
> - desc, initial_state, optimized_clear_value)))
> + desc, initial_state, optimized_clear_value, placed)))
> {
> vkd3d_free(object);
> return hr;
> @@ -1806,7 +1893,7 @@ HRESULT d3d12_committed_resource_create(struct d3d12_device *device,
> }
>
> if (FAILED(hr = d3d12_resource_create(device, heap_properties, heap_flags,
> - desc, initial_state, optimized_clear_value, &object)))
> + desc, initial_state, optimized_clear_value, false, &object)))
> return hr;
>
> if (FAILED(hr = vkd3d_allocate_resource_memory(device, object, heap_properties, heap_flags)))
> @@ -1830,6 +1917,16 @@ static HRESULT vkd3d_bind_heap_memory(struct d3d12_device *device,
> VkMemoryRequirements requirements;
> VkResult vr;
>
> + if (resource->flags & VKD3D_RESOURCE_PLACED_BUFFER)
> + {
> + /* Just inherit the buffer from the heap. */
> + resource->u.vk_buffer = heap->buffer_resource->u.vk_buffer;
> + resource->heap = heap;
> + resource->heap_offset = heap_offset;
> + resource->gpu_address = heap->buffer_resource->gpu_address + heap_offset;
> + return S_OK;
> + }
> +
> if (d3d12_resource_is_buffer(resource))
> VK_CALL(vkGetBufferMemoryRequirements(vk_device, resource->u.vk_buffer, &requirements));
> else
> @@ -1879,7 +1976,7 @@ HRESULT d3d12_placed_resource_create(struct d3d12_device *device, struct d3d12_h
> HRESULT hr;
>
> if (FAILED(hr = d3d12_resource_create(device, &heap->desc.Properties, heap->desc.Flags,
> - desc, initial_state, optimized_clear_value, &object)))
> + desc, initial_state, optimized_clear_value, true, &object)))
> return hr;
>
> if (FAILED(hr = vkd3d_bind_heap_memory(device, object, heap, heap_offset)))
> @@ -1903,7 +2000,7 @@ HRESULT d3d12_reserved_resource_create(struct d3d12_device *device,
> HRESULT hr;
>
> if (FAILED(hr = d3d12_resource_create(device, NULL, 0,
> - desc, initial_state, optimized_clear_value, &object)))
> + desc, initial_state, optimized_clear_value, false, &object)))
> return hr;
>
> TRACE("Created reserved resource %p.\n", object);
> @@ -2205,7 +2302,7 @@ static bool vkd3d_create_buffer_view_for_resource(struct d3d12_device *device,
> assert(d3d12_resource_is_buffer(resource));
>
> return vkd3d_create_buffer_view(device, resource->u.vk_buffer,
> - format, offset * element_size, size * element_size, view);
> + format, resource->heap_offset + offset * element_size, size * element_size, view);
> }
>
> static void vkd3d_set_view_swizzle_for_format(VkComponentMapping *components,
> @@ -2807,7 +2904,7 @@ static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_
>
> format = vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false);
> if (!vkd3d_create_vk_buffer_view(device, counter_resource->u.vk_buffer, format,
> - desc->u.Buffer.CounterOffsetInBytes, sizeof(uint32_t), &view->vk_counter_view))
> + desc->u.Buffer.CounterOffsetInBytes + resource->heap_offset, sizeof(uint32_t), &view->vk_counter_view))
> {
> WARN("Failed to create counter buffer view.\n");
> view->vk_counter_view = VK_NULL_HANDLE;
> @@ -2821,7 +2918,7 @@ static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_
> {
> const struct vkd3d_format *format = vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false);
>
> - descriptor->uav.buffer.offset = desc->u.Buffer.FirstElement * format->byte_count;
> + descriptor->uav.buffer.offset = desc->u.Buffer.FirstElement * format->byte_count + resource->heap_offset;
> descriptor->uav.buffer.size = desc->u.Buffer.NumElements * format->byte_count;
> }
> }
> @@ -2913,12 +3010,18 @@ bool vkd3d_create_raw_buffer_view(struct d3d12_device *device,
> {
> const struct vkd3d_format *format;
> struct d3d12_resource *resource;
> + uint64_t range;
> + uint64_t offset;
>
> format = vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false);
> resource = vkd3d_gpu_va_allocator_dereference(&device->gpu_va_allocator, gpu_address);
> assert(d3d12_resource_is_buffer(resource));
> +
> + offset = gpu_address - resource->gpu_address;
> + range = min(resource->desc.Width - offset, device->vk_info.device_limits.maxStorageBufferRange);
> +
> return vkd3d_create_vk_buffer_view(device, resource->u.vk_buffer, format,
> - gpu_address - resource->gpu_address, VK_WHOLE_SIZE, vk_buffer_view);
> + offset, range, vk_buffer_view);
> }
>
> /* samplers */
> diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h
> index a5f7c81..2b1ae30 100644
> --- a/libs/vkd3d/vkd3d_private.h
> +++ b/libs/vkd3d/vkd3d_private.h
> @@ -380,6 +380,7 @@ struct d3d12_heap
> unsigned int map_count;
> uint32_t vk_memory_type;
>
> + struct d3d12_resource *buffer_resource;
> struct d3d12_device *device;
>
> struct vkd3d_private_store private_store;
> @@ -394,6 +395,7 @@ struct d3d12_heap *unsafe_impl_from_ID3D12Heap(ID3D12Heap *iface) DECLSPEC_HIDDE
> #define VKD3D_RESOURCE_EXTERNAL 0x00000004
> #define VKD3D_RESOURCE_DEDICATED_HEAP 0x00000008
> #define VKD3D_RESOURCE_LINEAR_TILING 0x00000010
> +#define VKD3D_RESOURCE_PLACED_BUFFER 0x00000020
>
> /* ID3D12Resource */
> struct d3d12_resource
>
More information about the wine-devel
mailing list