[PATCH vkd3d v3 2/2] vkd3d: Back descriptor heaps with Vulkan descriptor sets if descriptor indexing is available.

Henri Verbeet hverbeet at gmail.com
Mon Jan 31 11:08:07 CST 2022


On Thu, 13 Jan 2022 at 06:31, Conor McCarthy <cmccarthy at codeweavers.com> wrote:
>   * VKD3D_CONFIG - a list of options that change the behavior of libvkd3d.
> +    * virtual_heaps - Create descriptors for each D3D12 root signature
> +      descriptor range instead of entire descriptor heaps. Useful when push
> +      constant or bound descriptor limits are exceeded.
>      * vk_debug - enables Vulkan debug extensions.
>
Being able to explicitly select the older implementation is useful,
but it seems unfortunate that it would be required for some
applications. What would be required in order to fall back to the
current implementation at run-time for applications that need it?

> @@ -1873,14 +1873,17 @@ static void d3d12_command_list_invalidate_current_render_pass(struct d3d12_comma
>  static void d3d12_command_list_invalidate_bindings(struct d3d12_command_list *list,
>          struct d3d12_pipeline_state *state)
>  {
> -    if (state && state->uav_counters.binding_count)
> +    if (state)
>      {
>          enum vkd3d_pipeline_bind_point bind_point = (enum vkd3d_pipeline_bind_point)state->vk_bind_point;
>          struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point];
>
> -        vkd3d_array_reserve((void **)&bindings->vk_uav_counter_views, &bindings->vk_uav_counter_views_size,
> -                state->uav_counters.binding_count, sizeof(*bindings->vk_uav_counter_views));
> -        bindings->uav_counters_dirty = true;
> +        if (state->uav_counters.binding_count)
> +        {
> +            vkd3d_array_reserve((void **)&bindings->vk_uav_counter_views, &bindings->vk_uav_counter_views_size,
> +                    state->uav_counters.binding_count, sizeof(*bindings->vk_uav_counter_views));
> +            bindings->uav_counters_dirty = true;
> +        }
>      }
>  }
>
This change looks superfluous.

> +static unsigned int d3d12_command_list_bind_descriptor_table(struct d3d12_command_list *list,
> +        struct vkd3d_pipeline_bindings *bindings, unsigned int index,
> +        struct d3d12_descriptor_heap **cbv_srv_uav_heap, struct d3d12_descriptor_heap **sampler_heap)
> +{
> +    struct d3d12_descriptor_heap *heap;
> +    const struct d3d12_desc *desc;
> +    unsigned int offset;
> +
> +    if (!(desc = bindings->descriptor_tables[index]))
> +        return 0;
> +
> +    /* AMD, Nvidia and Intel drivers on Windows work if SetDescriptorHeaps()
> +     * is not called, so we bind heaps from the tables instead. No NULL check is
> +     * needed here because it's checked when descriptor tables are set. */
> +    heap = vkd3d_gpu_descriptor_allocator_heap_from_descriptor(&list->device->gpu_descriptor_allocator, desc);
> +    offset = desc - (const struct d3d12_desc *)heap->descriptors;
> +
> +    if (heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV)
> +    {
> +        if (*cbv_srv_uav_heap)
> +        {
> +            if (heap == *cbv_srv_uav_heap)
> +                return offset;
> +            /* This occurs occasionally in Rise of the Tomb Raider apparently
> +             * due to a race condition (one of several). */
> +            ERR("List %p uses descriptors from more than one CBV/SRV/UAV heap.\n", list);
> +            return ~0u;
> +        }

Things like memory corruption aside, applications should generally not
be able to trigger ERRs. Should this be a WARN instead, or are we
missing some synchronisation somewhere?

> +static void d3d12_command_list_bind_descriptor_heap(struct d3d12_command_list *list,
> +        enum vkd3d_pipeline_bind_point bind_point, struct d3d12_descriptor_heap *heap)
> +{
> +    struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point];
> +    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
> +    const struct d3d12_root_signature *rs = bindings->root_signature;
> +    enum vkd3d_vk_descriptor_set_index set;
> +
> +    if (!heap)
> +        return;
> +
> +    if (heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV)
> +    {
> +        if (heap->serial_id == bindings->cbv_srv_uav_heap_id)
> +            return;
> +        bindings->cbv_srv_uav_heap_id = heap->serial_id;
> +    }
> +    else
> +    {
> +        if (heap->serial_id == bindings->sampler_heap_id)
> +            return;
> +        bindings->sampler_heap_id = heap->serial_id;
> +    }
> +
> +    for (set = 0; set < ARRAY_SIZE(heap->vk_descriptor_sets); ++set)
> +    {
> +        VkDescriptorSet vk_descriptor_set = heap->vk_descriptor_sets[set].vk_set;
> +
> +        if (!vk_descriptor_set)
> +            continue;
> +
> +        /* These sets can be shared across multiple command lists, and therefore binding must
> +         * be synchronised. On an experimental branch in which caching of Vk descriptor writes
> +         * greatly increased the chance of multiple threads arriving here at the same time,
> +         * GRID 2019 crashed without the mutex lock. */
> +        pthread_mutex_lock(&heap->vk_descriptor_sets[set].mutex);
> +        VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, bindings->vk_bind_point, rs->vk_pipeline_layout,
> +                rs->vk_set_count + set, 1, &vk_descriptor_set, 0, NULL));
> +        pthread_mutex_unlock(&heap->vk_descriptor_sets[set].mutex);
> +    }
> +}
> +
Does this need a mutex per descriptor set? It seems like we always
lock/bind all the sets that exist for a particular heap; would it make
sense to have a single mutex per heap?

> +static HRESULT vkd3d_create_vk_descriptor_heap_layout(struct d3d12_device *device, unsigned int index)
> +{
> +    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
> +    VkDescriptorSetLayoutBindingFlagsCreateInfoEXT flags_info;
> +    VkDescriptorSetLayoutCreateInfo set_desc;
> +    VkDescriptorBindingFlagsEXT set_flags;
> +    VkDescriptorSetLayoutBinding binding;
> +    VkResult vr;
> +
> +    binding.binding = 0;
> +    binding.descriptorType = device->vk_descriptor_heap_layouts[index].type;
> +    binding.descriptorCount = device->vk_descriptor_heap_layouts[index].count;
> +    binding.stageFlags = VK_SHADER_STAGE_ALL;
> +    binding.pImmutableSamplers = NULL;
> +
> +    set_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
> +    set_desc.pNext = &flags_info;
> +    set_desc.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT;
> +    set_desc.bindingCount = 1;
> +    set_desc.pBindings = &binding;
> +
That should use
VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT_EXT.
VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT doesn't
exist in Vulkan-Headers 1.1.113 (which is the current minimum required
version), and technically it's part of Vulkan 1.2 instead of
VK_EXT_descriptor_indexing.

> +static void vkd3d_device_descriptor_limits_init(struct vkd3d_device_descriptor_limits *limits,
> +        const VkPhysicalDeviceDescriptorIndexingPropertiesEXT *properties)
> +{
> +    const unsigned int root_provision = D3D12_MAX_ROOT_COST / 2;
> +    unsigned int srv_divisor = 1, uav_divisor = 1;
> +
> +    /* The total number of populated sampled image or storage image descriptors never exceeds the size of
> +     * one set (or two sets if every UAV has a counter), but the total size of bound layouts will exceed
> +     * device limits if each set size is maxDescriptorSet*, because of the D3D12 buffer + image allowance
> +     * (and UAV counters). Breaking limits for layouts seems to work with RADV and Nvidia drivers at
> +     * least, but let's try to stay within them if limits are high enough. */
> +    if (properties->maxDescriptorSetUpdateAfterBindSampledImages >= (1u << 21))
> +    {
> +        srv_divisor = 2;
> +        uav_divisor = properties->maxDescriptorSetUpdateAfterBindSampledImages >= (3u << 20) ? 3 : 2;
> +    }
> +
> +    limits->uniform_buffer_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindUniformBuffers,
> +            properties->maxPerStageDescriptorUpdateAfterBindUniformBuffers - root_provision);
> +    limits->sampled_image_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindSampledImages,
> +            properties->maxPerStageDescriptorUpdateAfterBindSampledImages / srv_divisor - root_provision);
> +    limits->storage_buffer_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindStorageBuffers,
> +            properties->maxPerStageDescriptorUpdateAfterBindStorageBuffers - root_provision);
> +    limits->storage_image_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindStorageImages,
> +            properties->maxPerStageDescriptorUpdateAfterBindStorageImages / uav_divisor - root_provision);
> +    limits->sampler_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindSamplers,
> +            properties->maxPerStageDescriptorUpdateAfterBindSamplers - root_provision);
> +    limits->sampler_max_descriptors = min(limits->sampler_max_descriptors, VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS);
> +}
> +
The descriptor limit changes would probably make sense as a separate
patch. Note that we need these limits for
d3d12_command_allocator_allocate_descriptor_pool() and
vk_binding_count_from_descriptor_range() as well, as pointed out by
Giovanni back in October.

> +const enum vkd3d_vk_descriptor_set_index vk_descriptor_set_index_table[] =
> +{
> +    VKD3D_SET_INDEX_SAMPLER,
> +    VKD3D_SET_INDEX_COUNT,
> +    VKD3D_SET_INDEX_SAMPLED_IMAGE,
> +    VKD3D_SET_INDEX_STORAGE_IMAGE,
> +    VKD3D_SET_INDEX_UNIFORM_TEXEL_BUFFER,
> +    VKD3D_SET_INDEX_STORAGE_TEXEL_BUFFER,
> +    VKD3D_SET_INDEX_UNIFORM_BUFFER,
> +};
> +
This table is only used by
vkd3d_vk_descriptor_set_index_from_vk_descriptor_type(); we may as
well make it local to that function. Somewhat similarly, it seems like
vkd3d_vk_descriptor_set_index_from_vk_descriptor_type() is only used
in resource.c, so we may as well make it local to that file.

This table is indexed by VkDescriptorType. We could make that more
obvious by either using designated initialisers, or by adding
equivalent comments.

> +static HRESULT d3d12_descriptor_heap_create_descriptor_set(struct d3d12_descriptor_heap *descriptor_heap,
> +        struct d3d12_device *device, unsigned int set)
> +{
> +    struct d3d12_descriptor_heap_vk_set *descriptor_set = &descriptor_heap->vk_descriptor_sets[set];
> +    uint32_t variable_binding_size = descriptor_heap->desc.NumDescriptors;
> +    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
> +    VkDescriptorSetVariableDescriptorCountAllocateInfoEXT set_size;
> +    VkDescriptorSetAllocateInfo set_desc;
> +    VkResult vr;
> +
> +    set_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
> +    set_desc.pNext = NULL;
> +    set_desc.descriptorPool = descriptor_heap->vk_descriptor_pool;
> +    set_desc.descriptorSetCount = 1;
> +    set_desc.pSetLayouts = &device->vk_descriptor_heap_layouts[set].vk_set_layout;
> +    set_desc.pNext = &set_size;
> +    set_size.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO_EXT;
> +    set_size.pNext = NULL;
> +    set_size.descriptorSetCount = 1;
> +    set_size.pDescriptorCounts = &variable_binding_size;
> +    if ((vr = VK_CALL(vkAllocateDescriptorSets(device->vk_device, &set_desc, &descriptor_set->vk_set))) >= 0)
> +    {
> +        descriptor_set->vk_descriptor_write.dstSet = descriptor_set->vk_set;
> +        return S_OK;
> +    }
> +
> +    ERR("Failed to allocate descriptor set, vr %d.\n", vr);
> +    return hresult_from_vk_result(vr);
> +}
> +
"set_desc.pNext" is written twice above.

> +static HRESULT d3d12_descriptor_heap_vk_descriptor_sets_init(struct d3d12_descriptor_heap *descriptor_heap,
> +        struct d3d12_device *device, const D3D12_DESCRIPTOR_HEAP_DESC *desc)
> +{
> +    enum vkd3d_vk_descriptor_set_index set;
> +    HRESULT hr;
> +
> +    descriptor_heap->vk_descriptor_pool = VK_NULL_HANDLE;
> +    memset(descriptor_heap->vk_descriptor_sets, 0, sizeof(descriptor_heap->vk_descriptor_sets));
> +
> +    if (!device->use_vk_heaps || (desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV
> +            && desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER))
> +        return S_OK;
> +
> +    if (FAILED(hr = d3d12_descriptor_heap_create_descriptor_pool(descriptor_heap, device, desc)))
> +        return hr;
> +
> +    for (set = 0; set < ARRAY_SIZE(descriptor_heap->vk_descriptor_sets); ++set)
> +    {
> +        struct d3d12_descriptor_heap_vk_set *descriptor_set = &descriptor_heap->vk_descriptor_sets[set];
> +
> +        pthread_mutex_init(&descriptor_set->mutex, NULL);
> +
> +        descriptor_set->vk_descriptor_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
> +        descriptor_set->vk_descriptor_write.pNext = NULL;
> +        descriptor_set->vk_descriptor_write.dstBinding = 0;
> +        descriptor_set->vk_descriptor_write.descriptorCount = 1;
> +        descriptor_set->vk_descriptor_write.descriptorType = device->vk_descriptor_heap_layouts[set].type;
> +        descriptor_set->vk_descriptor_write.pImageInfo = NULL;
> +        descriptor_set->vk_descriptor_write.pBufferInfo = NULL;
> +        descriptor_set->vk_descriptor_write.pTexelBufferView = NULL;
> +
> +        switch(device->vk_descriptor_heap_layouts[set].type)
> +        {
> +            case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
> +                descriptor_set->write_descriptor = d3d12_descriptor_heap_write_buffer;
> +                break;
> +            case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
> +            case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
> +                descriptor_set->write_descriptor = (set == VKD3D_SET_INDEX_UAV_COUNTER)
> +                        ? d3d12_descriptor_heap_write_uav_counter_texel_buffer
> +                        : d3d12_descriptor_heap_write_texel_buffer;
> +                break;
> +            case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
> +                descriptor_set->write_descriptor = d3d12_descriptor_heap_write_image;
> +                descriptor_set->vk_descriptor_write.pImageInfo = &descriptor_set->vk_image_info;
> +                descriptor_set->vk_image_info.sampler = VK_NULL_HANDLE;
> +                descriptor_set->vk_image_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
> +                break;
> +            case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
> +                descriptor_set->write_descriptor = d3d12_descriptor_heap_write_image;
> +                descriptor_set->vk_descriptor_write.pImageInfo = &descriptor_set->vk_image_info;
> +                descriptor_set->vk_image_info.sampler = VK_NULL_HANDLE;
> +                descriptor_set->vk_image_info.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
> +                break;
> +            case VK_DESCRIPTOR_TYPE_SAMPLER:
> +                descriptor_set->write_descriptor = d3d12_descriptor_heap_write_sampler;
> +                descriptor_set->vk_descriptor_write.pImageInfo = &descriptor_set->vk_image_info;
> +                descriptor_set->vk_image_info.imageView = VK_NULL_HANDLE;
> +                descriptor_set->vk_image_info.imageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
> +                break;
> +            default:
> +                FIXME("Unhandled descriptor type %#x.\n", device->vk_descriptor_heap_layouts[set].type);
> +                return E_FAIL;
> +        }
> +        if (device->vk_descriptor_heap_layouts[set].applicable_heap_type == desc->Type
> +                && FAILED(hr = d3d12_descriptor_heap_create_descriptor_set(descriptor_heap, device, set)))
> +            return hr;
> +    }
> +
> +    return S_OK;
> +}
> +
For small/trivial functions like e.g.
d3d12_descriptor_heap_write_image(), calling them through a function
pointer tends to not be much faster than just using a switch. I don't
think it helps much in terms of clarity either. In terms of
efficiency, it would probably help to write an entire range of
descriptors in a single call, instead of writing single descriptors at
a time.

>  static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descriptor_heap,
>          struct d3d12_device *device, const D3D12_DESCRIPTOR_HEAP_DESC *desc)
>  {
> +    static LONG serial_id;
>      HRESULT hr;
>
>      descriptor_heap->ID3D12DescriptorHeap_iface.lpVtbl = &d3d12_descriptor_heap_vtbl;
>      descriptor_heap->refcount = 1;
> +    descriptor_heap->serial_id = InterlockedIncrement(&serial_id);
>
>      descriptor_heap->desc = *desc;
>
>      if (FAILED(hr = vkd3d_private_store_init(&descriptor_heap->private_store)))
>          return hr;
>
> +    d3d12_descriptor_heap_vk_descriptor_sets_init(descriptor_heap, device, desc);
> +
>      d3d12_device_add_ref(descriptor_heap->device = device);
>
>      return S_OK;

"serial_id" above can wrap, and for a 32-bit value that may not even
take all that long.

> +static void d3d12_root_signature_init_descriptor_table_push_constants(struct d3d12_root_signature *root_signature,
> +        const struct vkd3d_descriptor_set_context *context)
> +{
> +    root_signature->descriptor_table_offset = 0;
> +    if ((root_signature->descriptor_table_count = context->push_constant_index))
> +    {
> +        VkPushConstantRange *range = &root_signature->push_constant_ranges[D3D12_SHADER_VISIBILITY_ALL];
> +
> +        root_signature->descriptor_table_offset = (range->size + 15) & ~15;
> +        range->size = root_signature->descriptor_table_offset
> +                + root_signature->descriptor_table_count * sizeof(uint32_t);
> +
Note that we have an align() helper.

> @@ -2448,13 +2649,14 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s
>      VkVertexInputBindingDivisorDescriptionEXT *binding_divisor;
>      const struct vkd3d_vulkan_info *vk_info = &device->vk_info;
>      uint32_t instance_divisors[D3D12_VS_INPUT_REGISTER_COUNT];
> +    struct vkd3d_shader_spirv_target_info *stage_target_info;
>      uint32_t aligned_offsets[D3D12_VS_INPUT_REGISTER_COUNT];
> +    struct vkd3d_shader_descriptor_offset_info offset_info;
>      struct vkd3d_shader_parameter ps_shader_parameters[1];
>      struct vkd3d_shader_transform_feedback_info xfb_info;
>      struct vkd3d_shader_spirv_target_info ps_target_info;
>      struct vkd3d_shader_interface_info shader_interface;
> -    struct vkd3d_shader_descriptor_offset_info offset_info;
> -    struct vkd3d_shader_spirv_target_info *target_info;
> +    struct vkd3d_shader_spirv_target_info target_info;
>      const struct d3d12_root_signature *root_signature;
>      struct vkd3d_shader_signature input_signature;
>      bool have_attachment, is_dsv_format_unknown;
> @@ -2659,6 +2861,12 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s
>          }
>      }
>
> +    memset(&target_info, 0, sizeof(target_info));
> +    target_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SPIRV_TARGET_INFO;
> +    target_info.environment = VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0;
> +    target_info.extensions = vk_info->shader_extensions;
> +    target_info.extension_count = vk_info->shader_extension_count;
> +
The "target_info" related changes here probably make sense as a separate patch.

> diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h
> index 047f4a29..575f538b 100644
> --- a/libs/vkd3d/vkd3d_private.h
> +++ b/libs/vkd3d/vkd3d_private.h
> @@ -59,6 +59,9 @@
>  #define VKD3D_MAX_SHADER_STAGES           5u
>  #define VKD3D_MAX_VK_SYNC_OBJECTS         4u
>  #define VKD3D_MAX_DESCRIPTOR_SETS        64u
> +#define VKD3D_MAX_BOUND_DESCRIPTOR_HEAPS  2u
> +/* D3D12 binding tier 3 has a limit of 2048 samplers. */
> +#define VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS 2048u
>
VKD3D_MAX_BOUND_DESCRIPTOR_HEAPS above is unused.

> @@ -1210,6 +1277,9 @@ struct d3d12_device
>      const struct vkd3d_format_compatibility_list *format_compatibility_lists;
>      struct vkd3d_null_resources null_resources;
>      struct vkd3d_uav_clear_state uav_clear_state;
> +
> +    struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT];
> +    bool use_vk_heaps;
>  };
>
I notice there are a fair amount of "use_vk_heaps" checks sprinkled
through the code. While not the worst thing in the world, it's not
quite ideal either. Anything we could do about that?



More information about the wine-devel mailing list