[PATCH vkd3d 11/11] vkd3d: Implement simple pipeline cache.

Józef Kucia joseph.kucia at gmail.com
Wed Sep 12 08:20:02 CDT 2018


From: Józef Kucia <jkucia at codeweavers.com>

Ideally, we would like to introduce a Vulkan extension to make vertex
buffer strides and primitive topology dynamic in Vulkan.

Signed-off-by: Józef Kucia <jkucia at codeweavers.com>
---

Such extension should be quite easy to implement in RADV and Anvil.

---
 libs/vkd3d/command.c       |  53 +++++++---------
 libs/vkd3d/device.c        | 151 ++++++++++++++++++++++++++++++++++++++++++---
 libs/vkd3d/state.c         |  12 +++-
 libs/vkd3d/vkd3d_private.h |  31 ++++++++--
 4 files changed, 201 insertions(+), 46 deletions(-)

diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c
index 2fc564312f8e..5e67e6674e95 100644
--- a/libs/vkd3d/command.c
+++ b/libs/vkd3d/command.c
@@ -725,17 +725,6 @@ static bool d3d12_command_allocator_add_framebuffer(struct d3d12_command_allocat
     return true;
 }
 
-static bool d3d12_command_allocator_add_pipeline(struct d3d12_command_allocator *allocator, VkPipeline pipeline)
-{
-    if (!vkd3d_array_reserve((void **)&allocator->pipelines, &allocator->pipelines_size,
-            allocator->pipeline_count + 1, sizeof(*allocator->pipelines)))
-        return false;
-
-    allocator->pipelines[allocator->pipeline_count++] = pipeline;
-
-    return true;
-}
-
 static bool d3d12_command_allocator_add_descriptor_pool(struct d3d12_command_allocator *allocator,
         VkDescriptorPool pool)
 {
@@ -951,12 +940,6 @@ static void d3d12_command_allocator_free_resources(struct d3d12_command_allocato
     }
     allocator->descriptor_pool_count = 0;
 
-    for (i = 0; i < allocator->pipeline_count; ++i)
-    {
-        VK_CALL(vkDestroyPipeline(device->vk_device, allocator->pipelines[i], NULL));
-    }
-    allocator->pipeline_count = 0;
-
     for (i = 0; i < allocator->framebuffer_count; ++i)
     {
         VK_CALL(vkDestroyFramebuffer(device->vk_device, allocator->framebuffers[i], NULL));
@@ -1029,7 +1012,6 @@ static ULONG STDMETHODCALLTYPE d3d12_command_allocator_Release(ID3D12CommandAllo
         vkd3d_free(allocator->views);
         vkd3d_free(allocator->descriptor_pools);
         vkd3d_free(allocator->free_descriptor_pools);
-        vkd3d_free(allocator->pipelines);
         vkd3d_free(allocator->framebuffers);
         vkd3d_free(allocator->passes);
 
@@ -1214,10 +1196,6 @@ static HRESULT d3d12_command_allocator_init(struct d3d12_command_allocator *allo
     allocator->framebuffers_size = 0;
     allocator->framebuffer_count = 0;
 
-    allocator->pipelines = NULL;
-    allocator->pipelines_size = 0;
-    allocator->pipeline_count = 0;
-
     allocator->descriptor_pools = NULL;
     allocator->descriptor_pools_size = 0;
     allocator->descriptor_pool_count = 0;
@@ -1837,8 +1815,8 @@ static bool d3d12_command_list_update_current_framebuffer(struct d3d12_command_l
     return true;
 }
 
-static VkPipeline d3d12_command_list_create_graphics_pipeline(struct d3d12_command_list *list,
-        const struct d3d12_graphics_pipeline_state *state)
+static VkPipeline d3d12_command_list_get_or_create_pipeline(struct d3d12_command_list *list,
+        struct d3d12_graphics_pipeline_state *state)
 {
     struct VkVertexInputBindingDescription bindings[D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT];
     const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
@@ -1846,7 +1824,8 @@ static VkPipeline d3d12_command_list_create_graphics_pipeline(struct d3d12_comma
     struct VkPipelineInputAssemblyStateCreateInfo ia_desc;
     struct VkPipelineColorBlendStateCreateInfo blend_desc;
     struct VkGraphicsPipelineCreateInfo pipeline_desc;
-    const struct d3d12_device *device = list->device;
+    struct d3d12_device *device = list->device;
+    struct vkd3d_pipeline_key pipeline_key;
     size_t binding_count = 0;
     VkPipeline vk_pipeline;
     unsigned int i;
@@ -1878,6 +1857,10 @@ static VkPipeline d3d12_command_list_create_graphics_pipeline(struct d3d12_comma
         .pDynamicStates = dynamic_states,
     };
 
+    memset(&pipeline_key, 0, sizeof(pipeline_key));
+    pipeline_key.state = state;
+    pipeline_key.topology = list->primitive_topology;
+
     for (i = 0, mask = 0; i < state->attribute_count; ++i)
     {
         struct VkVertexInputBindingDescription *b;
@@ -1902,9 +1885,14 @@ static VkPipeline d3d12_command_list_create_graphics_pipeline(struct d3d12_comma
         if (!b->stride)
             FIXME("Invalid stride for input slot %u.\n", binding);
 
+        pipeline_key.strides[binding_count] = list->strides[binding];
+
         ++binding_count;
     }
 
+    if ((vk_pipeline = d3d12_device_find_cached_pipeline(device, &pipeline_key)))
+        return vk_pipeline;
+
     input_desc.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
     input_desc.pNext = NULL;
     input_desc.flags = 0;
@@ -1957,13 +1945,14 @@ static VkPipeline d3d12_command_list_create_graphics_pipeline(struct d3d12_comma
         return VK_NULL_HANDLE;
     }
 
-    if (!d3d12_command_allocator_add_pipeline(list->allocator, vk_pipeline))
-    {
-        WARN("Failed to add pipeline.\n");
-        VK_CALL(vkDestroyPipeline(device->vk_device, vk_pipeline, NULL));
-        return VK_NULL_HANDLE;
-    }
+    if (d3d12_device_put_pipeline_to_cache(device, &pipeline_key, vk_pipeline, &state->compiled_pipelines))
+        return vk_pipeline;
 
+    /* Other thread compiled the pipeline before us. */
+    VK_CALL(vkDestroyPipeline(device->vk_device, vk_pipeline, NULL));
+    vk_pipeline = d3d12_device_find_cached_pipeline(device, &pipeline_key);
+    if (!vk_pipeline)
+        ERR("Could not get the pipeline compiled by other thread from the cache.\n");
     return vk_pipeline;
 }
 
@@ -1981,7 +1970,7 @@ static bool d3d12_command_list_update_current_pipeline(struct d3d12_command_list
         return false;
     }
 
-    if (!(vk_pipeline = d3d12_command_list_create_graphics_pipeline(list, &list->state->u.graphics)))
+    if (!(vk_pipeline = d3d12_command_list_get_or_create_pipeline(list, &list->state->u.graphics)))
         return false;
 
     VK_CALL(vkCmdBindPipeline(list->vk_command_buffer, list->state->vk_bind_point, vk_pipeline));
diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c
index cfb17ea17d99..c1db7d30a9e8 100644
--- a/libs/vkd3d/device.c
+++ b/libs/vkd3d/device.c
@@ -1169,11 +1169,116 @@ static HRESULT d3d12_device_create_dummy_sampler(struct d3d12_device *device)
     return vkd3d_create_static_sampler(device, &sampler_desc, &device->vk_dummy_sampler);
 }
 
-static void d3d12_device_init_pipeline_cache(struct d3d12_device *device)
+static void destroy_compiled_pipeline(struct vkd3d_compiled_pipeline *pipeline,
+        struct d3d12_device *device)
+{
+    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
+
+    VK_CALL(vkDestroyPipeline(device->vk_device, pipeline->vk_pipeline, NULL));
+    vkd3d_free(pipeline);
+}
+
+static int compare_pipeline_cache_entry(const void *key, const struct rb_entry *entry)
+{
+    const struct vkd3d_compiled_pipeline *compiled_pipeline;
+    const struct vkd3d_pipeline_key *pipeline_key;
+
+    pipeline_key = key;
+    compiled_pipeline = RB_ENTRY_VALUE(entry, const struct vkd3d_compiled_pipeline, entry);
+    return memcmp(&compiled_pipeline->key, pipeline_key, sizeof(*pipeline_key));
+}
+
+static void destroy_pipeline_cache_entry(struct rb_entry *entry, void *context)
+{
+    struct vkd3d_compiled_pipeline *pipeline;
+    struct d3d12_device *device = context;
+
+    pipeline = RB_ENTRY_VALUE(entry, struct vkd3d_compiled_pipeline, entry);
+    destroy_compiled_pipeline(pipeline, device);
+}
+
+VkPipeline d3d12_device_find_cached_pipeline(struct d3d12_device *device,
+        const struct vkd3d_pipeline_key *key)
+{
+    VkPipeline vk_pipeline = VK_NULL_HANDLE;
+    struct rb_entry *entry;
+    int rc;
+
+    if (!(rc = pthread_mutex_lock(&device->pipeline_cache_mutex)))
+    {
+        if ((entry = rb_get(&device->pipeline_cache, key)))
+            vk_pipeline = RB_ENTRY_VALUE(entry, struct vkd3d_compiled_pipeline, entry)->vk_pipeline;
+        pthread_mutex_unlock(&device->pipeline_cache_mutex);
+    }
+    else
+    {
+        ERR("Failed to lock mutex, error %d.\n", rc);
+    }
+
+    return vk_pipeline;
+}
+
+bool d3d12_device_put_pipeline_to_cache(struct d3d12_device *device,
+        const struct vkd3d_pipeline_key *key, VkPipeline vk_pipeline, struct list *list)
+{
+    struct vkd3d_compiled_pipeline *compiled_pipeline;
+    bool ret = true;
+    int rc;
+
+    if (!(compiled_pipeline = vkd3d_malloc(sizeof(*compiled_pipeline))))
+        return false;
+
+    compiled_pipeline->key = *key;
+    compiled_pipeline->vk_pipeline = vk_pipeline;
+
+    if ((rc = pthread_mutex_lock(&device->pipeline_cache_mutex)))
+    {
+        ERR("Failed to lock mutex, error %d.\n", rc);
+        vkd3d_free(compiled_pipeline);
+        return false;
+    }
+
+    if (rb_put(&device->pipeline_cache, key, &compiled_pipeline->entry) >= 0)
+    {
+        list_add_tail(list, &compiled_pipeline->list);
+    }
+    else
+    {
+        WARN("Failed to put pipeline to cache.\n");
+        vkd3d_free(compiled_pipeline);
+        ret = false;
+    }
+
+    pthread_mutex_unlock(&device->pipeline_cache_mutex);
+    return ret;
+}
+
+void d3d12_device_destroy_compiled_pipelines(struct d3d12_device *device, struct list *list)
+{
+    struct vkd3d_compiled_pipeline *pipeline, *cursor;
+    int rc;
+
+    if ((rc = pthread_mutex_lock(&device->pipeline_cache_mutex)))
+    {
+        ERR("Failed to lock mutex, error %d.\n", rc);
+        return;
+    }
+
+    LIST_FOR_EACH_ENTRY_SAFE(pipeline, cursor, list, struct vkd3d_compiled_pipeline, list)
+    {
+        rb_remove(&device->pipeline_cache, &pipeline->entry);
+        destroy_compiled_pipeline(pipeline, device);
+    }
+
+    pthread_mutex_unlock(&device->pipeline_cache_mutex);
+}
+
+static HRESULT d3d12_device_init_pipeline_cache(struct d3d12_device *device)
 {
     const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
     VkPipelineCacheCreateInfo cache_info;
     VkResult vr;
+    int rc;
 
     cache_info.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
     cache_info.pNext = NULL;
@@ -1183,9 +1288,39 @@ static void d3d12_device_init_pipeline_cache(struct d3d12_device *device)
     if ((vr = VK_CALL(vkCreatePipelineCache(device->vk_device, &cache_info, NULL,
             &device->vk_pipeline_cache))) < 0)
     {
-        ERR("Failed to create pipeline cache, vr %d.\n", vr);
+        ERR("Failed to create Vulkan pipeline cache, vr %d.\n", vr);
         device->vk_pipeline_cache = VK_NULL_HANDLE;
     }
+
+    rb_init(&device->pipeline_cache, compare_pipeline_cache_entry);
+
+    if ((rc = pthread_mutex_init(&device->pipeline_cache_mutex, NULL)))
+    {
+        ERR("Failed to initialize mutex, error %d.\n", rc);
+        return E_FAIL;
+    }
+
+    return S_OK;
+}
+
+static void d3d12_device_destroy_pipeline_cache(struct d3d12_device *device)
+{
+    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
+    int rc;
+
+    if (device->vk_pipeline_cache)
+        VK_CALL(vkDestroyPipelineCache(device->vk_device, device->vk_pipeline_cache, NULL));
+
+    if ((rc = pthread_mutex_lock(&device->pipeline_cache_mutex)))
+    {
+        ERR("Failed to lock mutex, error %d.\n", rc);
+        return;
+    }
+
+    rb_destroy(&device->pipeline_cache, destroy_pipeline_cache_entry, device);
+
+    pthread_mutex_unlock(&device->pipeline_cache_mutex);
+    pthread_mutex_destroy(&device->pipeline_cache_mutex);
 }
 
 D3D12_GPU_VIRTUAL_ADDRESS vkd3d_gpu_va_allocator_allocate(struct vkd3d_gpu_va_allocator *allocator,
@@ -1357,8 +1492,7 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface)
         vkd3d_gpu_va_allocator_cleanup(&device->gpu_va_allocator);
         vkd3d_fence_worker_stop(&device->fence_worker, device);
         VK_CALL(vkDestroySampler(device->vk_device, device->vk_dummy_sampler, NULL));
-        if (device->vk_pipeline_cache)
-            VK_CALL(vkDestroyPipelineCache(device->vk_device, device->vk_pipeline_cache, NULL));
+        d3d12_device_destroy_pipeline_cache(device);
         d3d12_device_destroy_vkd3d_queues(device);
         VK_CALL(vkDestroyDevice(device->vk_device, NULL));
         if (device->parent)
@@ -2310,18 +2444,21 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
         goto out_free_vk_resources;
     }
 
-    if (FAILED(hr = vkd3d_fence_worker_start(&device->fence_worker, device)))
+    if (FAILED(hr = d3d12_device_init_pipeline_cache(device)))
         goto out_free_vk_resources;
 
-    vkd3d_gpu_va_allocator_init(&device->gpu_va_allocator);
+    if (FAILED(hr = vkd3d_fence_worker_start(&device->fence_worker, device)))
+        goto out_free_pipeline_cache;
 
-    d3d12_device_init_pipeline_cache(device);
+    vkd3d_gpu_va_allocator_init(&device->gpu_va_allocator);
 
     if ((device->parent = create_info->parent))
         IUnknown_AddRef(device->parent);
 
     return S_OK;
 
+out_free_pipeline_cache:
+    d3d12_device_destroy_pipeline_cache(device);
 out_free_vk_resources:
     vk_procs = &device->vk_procs;
     VK_CALL(vkDestroySampler(device->vk_device, device->vk_dummy_sampler, NULL));
diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c
index 446e1a65029d..b60cebf26654 100644
--- a/libs/vkd3d/state.c
+++ b/libs/vkd3d/state.c
@@ -1095,11 +1095,15 @@ static ULONG STDMETHODCALLTYPE d3d12_pipeline_state_Release(ID3D12PipelineState
 
         if (state->vk_bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS)
         {
-            for (i = 0; i < state->u.graphics.stage_count; ++i)
+            struct d3d12_graphics_pipeline_state *graphics = &state->u.graphics;
+
+            for (i = 0; i < graphics->stage_count; ++i)
             {
-                VK_CALL(vkDestroyShaderModule(device->vk_device, state->u.graphics.stages[i].module, NULL));
+                VK_CALL(vkDestroyShaderModule(device->vk_device, graphics->stages[i].module, NULL));
             }
-            VK_CALL(vkDestroyRenderPass(device->vk_device, state->u.graphics.render_pass, NULL));
+            VK_CALL(vkDestroyRenderPass(device->vk_device, graphics->render_pass, NULL));
+
+            d3d12_device_destroy_compiled_pipelines(device, &graphics->compiled_pipelines);
         }
         else if (state->vk_bind_point == VK_PIPELINE_BIND_POINT_COMPUTE)
         {
@@ -2185,6 +2189,8 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s
 
     graphics->root_signature = root_signature;
 
+    list_init(&graphics->compiled_pipelines);
+
     state->vk_bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS;
     state->device = device;
     ID3D12Device_AddRef(&device->ID3D12Device_iface);
diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h
index 37ad13c132cd..5461cc3d36e1 100644
--- a/libs/vkd3d/vkd3d_private.h
+++ b/libs/vkd3d/vkd3d_private.h
@@ -25,6 +25,8 @@
 
 #include "vkd3d_common.h"
 #include "vkd3d_memory.h"
+#include "list.h"
+#include "rbtree.h"
 
 #include "vkd3d.h"
 #include "vkd3d_shader.h"
@@ -489,6 +491,8 @@ struct d3d12_graphics_pipeline_state
     struct VkPipelineDepthStencilStateCreateInfo ds_desc;
 
     const struct d3d12_root_signature *root_signature;
+
+    struct list compiled_pipelines;
 };
 
 struct d3d12_compute_pipeline_state
@@ -532,6 +536,21 @@ bool d3d12_pipeline_state_is_render_pass_compatible(const struct d3d12_pipeline_
         const struct d3d12_pipeline_state *state_b) DECLSPEC_HIDDEN;
 struct d3d12_pipeline_state *unsafe_impl_from_ID3D12PipelineState(ID3D12PipelineState *iface) DECLSPEC_HIDDEN;
 
+struct vkd3d_pipeline_key
+{
+    const struct d3d12_graphics_pipeline_state *state;
+    VkPrimitiveTopology topology;
+    uint32_t strides[D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT];
+};
+
+struct vkd3d_compiled_pipeline
+{
+    struct rb_entry entry;
+    struct vkd3d_pipeline_key key;
+    VkPipeline vk_pipeline;
+    struct list list;
+};
+
 struct vkd3d_buffer
 {
     VkBuffer vk_buffer;
@@ -562,10 +581,6 @@ struct d3d12_command_allocator
     size_t framebuffers_size;
     size_t framebuffer_count;
 
-    VkPipeline *pipelines;
-    size_t pipelines_size;
-    size_t pipeline_count;
-
     VkDescriptorPool *descriptor_pools;
     size_t descriptor_pools_size;
     size_t descriptor_pool_count;
@@ -701,6 +716,8 @@ struct d3d12_device
     struct vkd3d_gpu_va_allocator gpu_va_allocator;
     struct vkd3d_fence_worker fence_worker;
 
+    pthread_mutex_t pipeline_cache_mutex;
+    struct rb_tree pipeline_cache;
     VkPipelineCache vk_pipeline_cache;
 
     /* A sampler used for SpvOpImageFetch. */
@@ -733,6 +750,12 @@ void d3d12_device_mark_as_removed(struct d3d12_device *device, HRESULT reason,
         const char *message, ...) VKD3D_PRINTF_FUNC(3, 4) DECLSPEC_HIDDEN;
 struct d3d12_device *unsafe_impl_from_ID3D12Device(ID3D12Device *iface) DECLSPEC_HIDDEN;
 
+void d3d12_device_destroy_compiled_pipelines(struct d3d12_device *device, struct list *list) DECLSPEC_HIDDEN;
+VkPipeline d3d12_device_find_cached_pipeline(struct d3d12_device *device,
+        const struct vkd3d_pipeline_key *key) DECLSPEC_HIDDEN;
+bool d3d12_device_put_pipeline_to_cache(struct d3d12_device *device,
+        const struct vkd3d_pipeline_key *key, VkPipeline vk_pipeline, struct list *list) DECLSPEC_HIDDEN;
+
 HRESULT vkd3d_create_buffer(struct d3d12_device *device,
         const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags,
         const D3D12_RESOURCE_DESC *desc, VkBuffer *vk_buffer) DECLSPEC_HIDDEN;
-- 
2.16.4




More information about the wine-devel mailing list