Module: Mesa Branch: main Commit: 538ca7801aba1712e6fadd64d898dd39d0eb5827 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=538ca7801aba1712e6fadd64d898dd39d0eb5827
Author: Tatsuyuki Ishi <[email protected]> Date: Tue Nov 14 17:17:28 2023 +0900 radv: Use shader part caching helpers for VS prolog and PS/TCS epilog. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26028> --- src/amd/vulkan/radv_cmd_buffer.c | 175 +++++++++------------------------------ src/amd/vulkan/radv_device.c | 142 +++++++++++++++++++------------ src/amd/vulkan/radv_private.h | 24 ++---- src/amd/vulkan/radv_shader.c | 5 ++ 4 files changed, 139 insertions(+), 207 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 287ccdbd222..99027f982a5 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -291,6 +291,33 @@ radv_emit_clear_data(struct radv_cmd_buffer *cmd_buffer, unsigned engine_sel, ui radv_write_data(cmd_buffer, engine_sel, va, size / 4, zeroes, false); } +static void +radv_cmd_buffer_finish_shader_part_cache(struct radv_cmd_buffer *cmd_buffer) +{ + ralloc_free(cmd_buffer->vs_prologs.table); + ralloc_free(cmd_buffer->ps_epilogs.table); + ralloc_free(cmd_buffer->tcs_epilogs.table); +} + +static bool +radv_cmd_buffer_init_shader_part_cache(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer) +{ + if (device->vs_prologs.ops) { + if (!_mesa_set_init(&cmd_buffer->vs_prologs, NULL, device->vs_prologs.ops->hash, device->vs_prologs.ops->equals)) + return false; + } + if (device->tcs_epilogs.ops) { + if (!_mesa_set_init(&cmd_buffer->tcs_epilogs, NULL, device->tcs_epilogs.ops->hash, + device->tcs_epilogs.ops->equals)) + return false; + } + if (device->ps_epilogs.ops) { + if (!_mesa_set_init(&cmd_buffer->ps_epilogs, NULL, device->ps_epilogs.ops->hash, device->ps_epilogs.ops->equals)) + return false; + } + return true; +} + static void radv_destroy_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer) { @@ -315,6 +342,8 @@ radv_destroy_cmd_buffer(struct vk_command_buffer *vk_cmd_buffer) if (cmd_buffer->transfer.copy_temp) cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, cmd_buffer->transfer.copy_temp); + radv_cmd_buffer_finish_shader_part_cache(cmd_buffer); + for (unsigned i = 0; i < MAX_BIND_POINTS; i++) { struct radv_descriptor_set_header *set = &cmd_buffer->descriptors[i].push_set.set; free(set->mapped_ptr); @@ -346,6 +375,11 @@ radv_create_cmd_buffer(struct vk_command_pool *pool, struct vk_command_buffer ** return result; } + if (!radv_cmd_buffer_init_shader_part_cache(device, cmd_buffer)) { + radv_destroy_cmd_buffer(&cmd_buffer->vk); + return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); + } + list_inithead(&cmd_buffer->upload.list); cmd_buffer->device = device; @@ -3708,22 +3742,6 @@ radv_instance_rate_prolog_index(unsigned num_attributes, uint32_t instance_rate_ return start_index + offset_from_start_index + first; } -uint32_t -radv_hash_vs_prolog(const void *key_) -{ - const struct radv_vs_prolog_key *key = key_; - return _mesa_hash_data(key, sizeof(*key)); -} - -bool -radv_cmp_vs_prolog(const void *a_, const void *b_) -{ - const struct radv_vs_prolog_key *a = a_; - const struct radv_vs_prolog_key *b = b_; - - return memcmp(a, b, sizeof(*a)) == 0; -} - static struct radv_shader_part * lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *vs_shader, uint32_t *nontrivial_divisors) { @@ -3819,36 +3837,7 @@ lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader *v key.next_stage = vs_shader->info.stage; } - uint32_t hash = radv_hash_vs_prolog(&key); - - u_rwlock_rdlock(&device->vs_prologs_lock); - struct hash_entry *prolog_entry = _mesa_hash_table_search_pre_hashed(device->vs_prologs, hash, &key); - u_rwlock_rdunlock(&device->vs_prologs_lock); - - if (!prolog_entry) { - u_rwlock_wrlock(&device->vs_prologs_lock); - prolog_entry = _mesa_hash_table_search_pre_hashed(device->vs_prologs, hash, &key); - if (prolog_entry) { - u_rwlock_wrunlock(&device->vs_prologs_lock); - return prolog_entry->data; - } - - prolog = radv_create_vs_prolog(device, &key); - struct radv_vs_prolog_key *key2 = malloc(sizeof(key)); - if (!prolog || !key2) { - radv_shader_part_unref(device, prolog); - free(key2); - u_rwlock_wrunlock(&device->vs_prologs_lock); - return NULL; - } - memcpy(key2, &key, sizeof(key)); - _mesa_hash_table_insert_pre_hashed(device->vs_prologs, hash, key2, prolog); - - u_rwlock_wrunlock(&device->vs_prologs_lock); - return prolog; - } - - return prolog_entry->data; + return radv_shader_part_cache_get(device, &device->vs_prologs, &cmd_buffer->vs_prologs, &key); } static void @@ -4179,28 +4168,12 @@ radv_emit_color_blend(struct radv_cmd_buffer *cmd_buffer) } } -uint32_t -radv_hash_ps_epilog(const void *key_) -{ - const struct radv_ps_epilog_key *key = key_; - return _mesa_hash_data(key, sizeof(*key)); -} - -bool -radv_cmp_ps_epilog(const void *a_, const void *b_) -{ - const struct radv_ps_epilog_key *a = a_; - const struct radv_ps_epilog_key *b = b_; - return memcmp(a, b, sizeof(*a)) == 0; -} - static struct radv_shader_part * lookup_ps_epilog(struct radv_cmd_buffer *cmd_buffer) { const struct radv_rendering_state *render = &cmd_buffer->state.render; const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; struct radv_device *device = cmd_buffer->device; - struct radv_shader_part *epilog = NULL; struct radv_ps_epilog_state state = {0}; state.color_attachment_count = render->color_att_count; @@ -4232,51 +4205,7 @@ lookup_ps_epilog(struct radv_cmd_buffer *cmd_buffer) } struct radv_ps_epilog_key key = radv_generate_ps_epilog_key(device, &state, true); - uint32_t hash = radv_hash_ps_epilog(&key); - - u_rwlock_rdlock(&device->ps_epilogs_lock); - struct hash_entry *epilog_entry = _mesa_hash_table_search_pre_hashed(device->ps_epilogs, hash, &key); - u_rwlock_rdunlock(&device->ps_epilogs_lock); - - if (!epilog_entry) { - u_rwlock_wrlock(&device->ps_epilogs_lock); - epilog_entry = _mesa_hash_table_search_pre_hashed(device->ps_epilogs, hash, &key); - if (epilog_entry) { - u_rwlock_wrunlock(&device->ps_epilogs_lock); - return epilog_entry->data; - } - - epilog = radv_create_ps_epilog(device, &key, NULL); - struct radv_ps_epilog_key *key2 = malloc(sizeof(*key2)); - if (!epilog || !key2) { - radv_shader_part_unref(device, epilog); - free(key2); - u_rwlock_wrunlock(&device->ps_epilogs_lock); - return NULL; - } - memcpy(key2, &key, sizeof(*key2)); - _mesa_hash_table_insert_pre_hashed(device->ps_epilogs, hash, key2, epilog); - - u_rwlock_wrunlock(&device->ps_epilogs_lock); - return epilog; - } - - return epilog_entry->data; -} - -uint32_t -radv_hash_tcs_epilog(const void *key_) -{ - const struct radv_tcs_epilog_key *key = key_; - return _mesa_hash_data(key, sizeof(*key)); -} - -bool -radv_cmp_tcs_epilog(const void *a_, const void *b_) -{ - const struct radv_tcs_epilog_key *a = a_; - const struct radv_tcs_epilog_key *b = b_; - return memcmp(a, b, sizeof(*a)) == 0; + return radv_shader_part_cache_get(device, &device->ps_epilogs, &cmd_buffer->ps_epilogs, &key); } static struct radv_shader_part * @@ -4285,7 +4214,6 @@ lookup_tcs_epilog(struct radv_cmd_buffer *cmd_buffer) const struct radv_shader *tcs = cmd_buffer->state.shaders[MESA_SHADER_TESS_CTRL]; const struct radv_shader *tes = radv_get_shader(cmd_buffer->state.shaders, MESA_SHADER_TESS_EVAL); struct radv_device *device = cmd_buffer->device; - struct radv_shader_part *epilog = NULL; struct radv_tcs_epilog_key key = { .primitive_mode = tes->info.tes._primitive_mode, @@ -4293,36 +4221,7 @@ lookup_tcs_epilog(struct radv_cmd_buffer *cmd_buffer) .tcs_out_patch_fits_subgroup = tcs->info.wave_size % tcs->info.tcs.tcs_vertices_out == 0, }; - uint32_t hash = radv_hash_tcs_epilog(&key); - - u_rwlock_rdlock(&device->tcs_epilogs_lock); - struct hash_entry *epilog_entry = _mesa_hash_table_search_pre_hashed(device->tcs_epilogs, hash, &key); - u_rwlock_rdunlock(&device->tcs_epilogs_lock); - - if (!epilog_entry) { - u_rwlock_wrlock(&device->tcs_epilogs_lock); - epilog_entry = _mesa_hash_table_search_pre_hashed(device->tcs_epilogs, hash, &key); - if (epilog_entry) { - u_rwlock_wrunlock(&device->tcs_epilogs_lock); - return epilog_entry->data; - } - - epilog = radv_create_tcs_epilog(device, &key); - struct radv_tcs_epilog_key *key2 = malloc(sizeof(*key2)); - if (!epilog || !key2) { - radv_shader_part_unref(device, epilog); - free(key2); - u_rwlock_wrunlock(&device->tcs_epilogs_lock); - return NULL; - } - memcpy(key2, &key, sizeof(*key2)); - _mesa_hash_table_insert_pre_hashed(device->tcs_epilogs, hash, key2, epilog); - - u_rwlock_wrunlock(&device->tcs_epilogs_lock); - return epilog; - } - - return epilog_entry->data; + return radv_shader_part_cache_get(device, &device->tcs_epilogs, &cmd_buffer->tcs_epilogs, &key); } static void diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c index 8aef8e62125..1258a5ae2e4 100644 --- a/src/amd/vulkan/radv_device.c +++ b/src/amd/vulkan/radv_device.c @@ -156,12 +156,39 @@ radv_device_finish_border_color(struct radv_device *device) } } +static struct radv_shader_part * +_radv_create_vs_prolog(struct radv_device *device, const void *_key) +{ + struct radv_vs_prolog_key *key = (struct radv_vs_prolog_key *)_key; + return radv_create_vs_prolog(device, key); +} + +static uint32_t +radv_hash_vs_prolog(const void *key_) +{ + const struct radv_vs_prolog_key *key = key_; + return _mesa_hash_data(key, sizeof(*key)); +} + +static bool +radv_cmp_vs_prolog(const void *a_, const void *b_) +{ + const struct radv_vs_prolog_key *a = a_; + const struct radv_vs_prolog_key *b = b_; + + return memcmp(a, b, sizeof(*a)) == 0; +} + +static struct radv_shader_part_cache_ops vs_prolog_ops = { + .create = _radv_create_vs_prolog, + .hash = radv_hash_vs_prolog, + .equals = radv_cmp_vs_prolog, +}; + static VkResult radv_device_init_vs_prologs(struct radv_device *device) { - u_rwlock_init(&device->vs_prologs_lock); - device->vs_prologs = _mesa_hash_table_create(NULL, &radv_hash_vs_prolog, &radv_cmp_vs_prolog); - if (!device->vs_prologs) + if (!radv_shader_part_cache_init(&device->vs_prologs, &vs_prolog_ops)) return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); /* don't pre-compile prologs if we want to print them */ @@ -208,13 +235,8 @@ radv_device_init_vs_prologs(struct radv_device *device) static void radv_device_finish_vs_prologs(struct radv_device *device) { - if (device->vs_prologs) { - hash_table_foreach (device->vs_prologs, entry) { - free((void *)entry->key); - radv_shader_part_unref(device, entry->data); - } - _mesa_hash_table_destroy(device->vs_prologs, NULL); - } + if (device->vs_prologs.ops) + radv_shader_part_cache_finish(device, &device->vs_prologs); for (unsigned i = 0; i < ARRAY_SIZE(device->simple_vs_prologs); i++) { if (!device->simple_vs_prologs[i]) @@ -231,54 +253,64 @@ radv_device_finish_vs_prologs(struct radv_device *device) } } -static VkResult -radv_device_init_ps_epilogs(struct radv_device *device) +static struct radv_shader_part * +_radv_create_ps_epilog(struct radv_device *device, const void *_key) { - u_rwlock_init(&device->ps_epilogs_lock); - - device->ps_epilogs = _mesa_hash_table_create(NULL, &radv_hash_ps_epilog, &radv_cmp_ps_epilog); - if (!device->ps_epilogs) - return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); - - return VK_SUCCESS; + struct radv_ps_epilog_key *key = (struct radv_ps_epilog_key *)_key; + return radv_create_ps_epilog(device, key, NULL); } -static void -radv_device_finish_ps_epilogs(struct radv_device *device) +static uint32_t +radv_hash_ps_epilog(const void *key_) { - if (device->ps_epilogs) { - hash_table_foreach (device->ps_epilogs, entry) { - free((void *)entry->key); - radv_shader_part_unref(device, entry->data); - } - _mesa_hash_table_destroy(device->ps_epilogs, NULL); - } + const struct radv_ps_epilog_key *key = key_; + return _mesa_hash_data(key, sizeof(*key)); } -static VkResult -radv_device_init_tcs_epilogs(struct radv_device *device) +static bool +radv_cmp_ps_epilog(const void *a_, const void *b_) { - u_rwlock_init(&device->tcs_epilogs_lock); + const struct radv_ps_epilog_key *a = a_; + const struct radv_ps_epilog_key *b = b_; - device->tcs_epilogs = _mesa_hash_table_create(NULL, &radv_hash_tcs_epilog, &radv_cmp_tcs_epilog); - if (!device->tcs_epilogs) - return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY); + return memcmp(a, b, sizeof(*a)) == 0; +} - return VK_SUCCESS; +static struct radv_shader_part_cache_ops ps_epilog_ops = { + .create = _radv_create_ps_epilog, + .hash = radv_hash_ps_epilog, + .equals = radv_cmp_ps_epilog, +}; + +static struct radv_shader_part * +_radv_create_tcs_epilog(struct radv_device *device, const void *_key) +{ + struct radv_tcs_epilog_key *key = (struct radv_tcs_epilog_key *)_key; + return radv_create_tcs_epilog(device, key); } -static void -radv_device_finish_tcs_epilogs(struct radv_device *device) +static uint32_t +radv_hash_tcs_epilog(const void *key_) { - if (device->tcs_epilogs) { - hash_table_foreach (device->tcs_epilogs, entry) { - free((void *)entry->key); - radv_shader_part_unref(device, entry->data); - } - _mesa_hash_table_destroy(device->tcs_epilogs, NULL); - } + const struct radv_tcs_epilog_key *key = key_; + return _mesa_hash_data(key, sizeof(*key)); } +static bool +radv_cmp_tcs_epilog(const void *a_, const void *b_) +{ + const struct radv_tcs_epilog_key *a = a_; + const struct radv_tcs_epilog_key *b = b_; + + return memcmp(a, b, sizeof(*a)) == 0; +} + +static struct radv_shader_part_cache_ops tcs_epilog_ops = { + .create = _radv_create_tcs_epilog, + .hash = radv_hash_tcs_epilog, + .equals = radv_cmp_tcs_epilog, +}; + VkResult radv_device_init_vrs_state(struct radv_device *device) { @@ -1087,15 +1119,17 @@ radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCr } if (tcs_epilogs) { - result = radv_device_init_tcs_epilogs(device); - if (result != VK_SUCCESS) + if (!radv_shader_part_cache_init(&device->tcs_epilogs, &tcs_epilog_ops)) { + result = VK_ERROR_OUT_OF_HOST_MEMORY; goto fail; + } } if (ps_epilogs) { - result = radv_device_init_ps_epilogs(device); - if (result != VK_SUCCESS) + if (!radv_shader_part_cache_init(&device->ps_epilogs, &ps_epilog_ops)) { + result = VK_ERROR_OUT_OF_HOST_MEMORY; goto fail; + } } if (!(device->instance->debug_flags & RADV_DEBUG_NO_IBS)) @@ -1164,8 +1198,10 @@ fail: radv_device_finish_notifier(device); radv_device_finish_vs_prologs(device); - radv_device_finish_tcs_epilogs(device); - radv_device_finish_ps_epilogs(device); + if (device->tcs_epilogs.ops) + radv_shader_part_cache_finish(device, &device->tcs_epilogs); + if (device->ps_epilogs.ops) + radv_shader_part_cache_finish(device, &device->ps_epilogs); radv_device_finish_border_color(device); radv_destroy_shader_upload_queue(device); @@ -1217,8 +1253,10 @@ radv_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator) radv_device_finish_notifier(device); radv_device_finish_vs_prologs(device); - radv_device_finish_tcs_epilogs(device); - radv_device_finish_ps_epilogs(device); + if (device->tcs_epilogs.ops) + radv_shader_part_cache_finish(device, &device->tcs_epilogs); + if (device->ps_epilogs.ops) + radv_shader_part_cache_finish(device, &device->ps_epilogs); radv_device_finish_border_color(device); radv_device_finish_vrs_image(device); diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index dc6832ce595..0ca9b5d58e4 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -1099,22 +1099,16 @@ struct radv_device { struct radv_device_memory *mem; } vrs; - struct u_rwlock vs_prologs_lock; - struct hash_table *vs_prologs; - /* Prime blit sdma queue */ struct radv_queue *private_sdma_queue; + struct radv_shader_part_cache vs_prologs; struct radv_shader_part *simple_vs_prologs[MAX_VERTEX_ATTRIBS]; struct radv_shader_part *instance_rate_vs_prologs[816]; - /* PS epilogs */ - struct u_rwlock ps_epilogs_lock; - struct hash_table *ps_epilogs; + struct radv_shader_part_cache ps_epilogs; - /* TCS epilogs */ - struct u_rwlock tcs_epilogs_lock; - struct hash_table *tcs_epilogs; + struct radv_shader_part_cache tcs_epilogs; simple_mtx_t trace_mtx; @@ -1838,6 +1832,10 @@ struct radv_cmd_buffer { uint64_t mec_inv_pred_va; /* For inverted predication when using MEC. */ bool mec_inv_pred_emitted; /* To ensure we don't have to repeat inverting the VA. */ + struct set vs_prologs; + struct set ps_epilogs; + struct set tcs_epilogs; + /** * Gang state. * Used when the command buffer needs work done on a different queue @@ -2004,11 +2002,6 @@ void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer); uint32_t radv_get_vgt_index_size(uint32_t type); unsigned radv_instance_rate_prolog_index(unsigned num_attributes, uint32_t instance_rate_inputs); -uint32_t radv_hash_vs_prolog(const void *key_); -bool radv_cmp_vs_prolog(const void *a_, const void *b_); - -uint32_t radv_hash_ps_epilog(const void *key_); -bool radv_cmp_ps_epilog(const void *a_, const void *b_); struct radv_ps_epilog_state { uint8_t color_attachment_count; @@ -2025,9 +2018,6 @@ struct radv_ps_epilog_key radv_generate_ps_epilog_key(const struct radv_device * const struct radv_ps_epilog_state *state, bool disable_mrt_compaction); -uint32_t radv_hash_tcs_epilog(const void *key_); -bool radv_cmp_tcs_epilog(const void *a_, const void *b_); - bool radv_needs_null_export_workaround(const struct radv_device *device, const struct radv_shader *ps, unsigned custom_blend_mode); diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 25a43b1ec08..8ce5c2ba4ca 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -2650,6 +2650,7 @@ radv_create_vs_prolog(struct radv_device *device, const struct radv_vs_prolog_ke if (!prolog) goto fail; + prolog->key.vs = *key; prolog->nontrivial_divisors = key->nontrivial_divisors; if (options.dump_shader) { @@ -2704,6 +2705,8 @@ radv_create_ps_epilog(struct radv_device *device, const struct radv_ps_epilog_ke if (!epilog) goto fail; + epilog->key.ps = *key; + if (options.dump_shader) { fprintf(stderr, "Fragment epilog"); fprintf(stderr, "\ndisasm:\n%s\n", epilog->disasm_string); @@ -2757,6 +2760,8 @@ radv_create_tcs_epilog(struct radv_device *device, const struct radv_tcs_epilog_ if (!epilog) goto fail; + epilog->key.tcs = *key; + if (options.dump_shader) { fprintf(stderr, "TCS epilog"); fprintf(stderr, "\ndisasm:\n%s\n", epilog->disasm_string);
