Module: Mesa Branch: main Commit: a73e0e9a042045bd2c063bd9d8d7a6ef95cbf5fc URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a73e0e9a042045bd2c063bd9d8d7a6ef95cbf5fc
Author: Chia-I Wu <[email protected]> Date: Thu Sep 28 09:40:36 2023 -0700 anv: decompress on upload for emulated formats Add anv_astc_emu_decompress to decompress the raw texel data to the hidden plane. Call anv_astc_emu_decompress from anv_CmdCopyImage2 and anv_CmdCopyBufferToImage2. v2: support transfer queue and add missing flushes (Lionel) Signed-off-by: Chia-I Wu <[email protected]> Reviewed-by: Lionel Landwerlin <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25467> --- src/intel/vulkan/anv_astc_emu.c | 195 ++++++++++++++++++++++++++++++++++++++++ src/intel/vulkan/anv_blorp.c | 90 +++++++++++++++++-- src/intel/vulkan/anv_device.c | 4 + src/intel/vulkan/anv_image.c | 14 ++- src/intel/vulkan/anv_private.h | 11 +++ src/intel/vulkan/meson.build | 1 + 6 files changed, 305 insertions(+), 10 deletions(-) diff --git a/src/intel/vulkan/anv_astc_emu.c b/src/intel/vulkan/anv_astc_emu.c new file mode 100644 index 00000000000..d3014c7cde4 --- /dev/null +++ b/src/intel/vulkan/anv_astc_emu.c @@ -0,0 +1,195 @@ +/* + * Copyright 2023 Google LLC + * SPDX-License-Identifier: MIT + */ + +#include "anv_private.h" + +static void +astc_emu_init_image_view(struct anv_cmd_buffer *cmd_buffer, + struct anv_image_view *iview, + struct anv_image *image, + VkFormat format, + VkImageUsageFlags usage, + uint32_t level, uint32_t layer) +{ + struct anv_device *device = cmd_buffer->device; + + const VkImageViewCreateInfo create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = &(VkImageViewUsageCreateInfo){ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, + .usage = usage, + }, + .image = anv_image_to_handle(image), + /* XXX we only need 2D but the shader expects 2D_ARRAY */ + .viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY, + .format = format, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = level, + .levelCount = 1, + .baseArrayLayer = layer, + .layerCount = 1, + }, + }; + + memset(iview, 0, sizeof(*iview)); + anv_image_view_init(device, iview, &create_info, + &cmd_buffer->surface_state_stream); +} + +static void +astc_emu_init_push_descriptor_set(struct anv_cmd_buffer *cmd_buffer, + struct anv_push_descriptor_set *push_set, + const struct vk_texcompress_astc_write_descriptor_set *writes) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_descriptor_set_layout *layout = + anv_descriptor_set_layout_from_handle( + device->texcompress_astc->ds_layout); + + memset(push_set, 0, sizeof(*push_set)); + anv_push_descriptor_set_init(cmd_buffer, push_set, layout); + + anv_descriptor_set_write(device, &push_set->set, + ARRAY_SIZE(writes->descriptor_set), + writes->descriptor_set); +} + +static void +astc_emu_decompress_slice(struct anv_cmd_buffer *cmd_buffer, + VkFormat astc_format, + VkImageLayout layout, + VkImageView src_view, + VkImageView dst_view, + VkRect2D rect) +{ + struct anv_device *device = cmd_buffer->device; + VkCommandBuffer cmd_buffer_ = anv_cmd_buffer_to_handle(cmd_buffer); + + VkPipeline pipeline = + vk_texcompress_astc_get_decode_pipeline(&device->vk, &device->vk.alloc, + device->texcompress_astc, + VK_NULL_HANDLE, astc_format); + if (pipeline == VK_NULL_HANDLE) { + anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_UNKNOWN); + return; + } + + anv_CmdBindPipeline(cmd_buffer_, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); + + struct vk_texcompress_astc_write_descriptor_set writes; + vk_texcompress_astc_fill_write_descriptor_sets(device->texcompress_astc, + &writes, src_view, layout, + dst_view, astc_format); + + struct anv_push_descriptor_set push_set; + astc_emu_init_push_descriptor_set(cmd_buffer, &push_set, &writes); + + VkDescriptorSet set = anv_descriptor_set_to_handle(&push_set.set); + anv_CmdBindDescriptorSets(cmd_buffer_, VK_PIPELINE_BIND_POINT_COMPUTE, + device->texcompress_astc->p_layout, 0, 1, &set, + 0, NULL); + + const uint32_t push_const[] = { + rect.offset.x, + rect.offset.y, + (rect.offset.x + rect.extent.width) * + vk_format_get_blockwidth(astc_format), + (rect.offset.y + rect.extent.height) * + vk_format_get_blockheight(astc_format), + false, /* we don't use VK_IMAGE_VIEW_TYPE_3D */ + }; + anv_CmdPushConstants(cmd_buffer_, device->texcompress_astc->p_layout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, + sizeof(push_const), push_const); + + /* each workgroup processes 2x2 texel blocks */ + rect.extent.width = DIV_ROUND_UP(rect.extent.width, 2); + rect.extent.height = DIV_ROUND_UP(rect.extent.height, 2); + + anv_genX(device->info, CmdDispatchBase)(cmd_buffer_, 0, 0, 0, + rect.extent.width, + rect.extent.height, + 1); + + anv_push_descriptor_set_finish(&push_set); +} + +void +anv_astc_emu_decompress(struct anv_cmd_buffer *cmd_buffer, + struct anv_image *image, + VkImageLayout layout, + const VkImageSubresourceLayers *subresource, + VkOffset3D block_offset, + VkExtent3D block_extent) +{ + assert(image->emu_plane_format != VK_FORMAT_UNDEFINED); + + const VkRect2D rect = { + .offset = { + .x = block_offset.x, + .y = block_offset.y, + }, + .extent = { + .width = block_extent.width, + .height = block_extent.height, + }, + }; + + /* decompress one layer at a time because anv_image_fill_surface_state + * requires an uncompressed view of a compressed image to be single layer + */ + const bool is_3d = image->vk.image_type == VK_IMAGE_TYPE_3D; + const uint32_t slice_base = is_3d ? + block_offset.z : subresource->baseArrayLayer; + const uint32_t slice_count = is_3d ? + block_extent.depth : subresource->layerCount; + + struct anv_cmd_saved_state saved; + anv_cmd_buffer_save_state(cmd_buffer, + ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE | + ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_0 | + ANV_CMD_SAVED_STATE_PUSH_CONSTANTS, + &saved); + + for (uint32_t i = 0; i < slice_count; i++) { + struct anv_image_view src_view; + struct anv_image_view dst_view; + astc_emu_init_image_view(cmd_buffer, &src_view, image, + VK_FORMAT_R32G32B32A32_UINT, + VK_IMAGE_USAGE_SAMPLED_BIT, + subresource->mipLevel, slice_base + i); + astc_emu_init_image_view(cmd_buffer, &dst_view, image, + VK_FORMAT_R8G8B8A8_UINT, + VK_IMAGE_USAGE_STORAGE_BIT, + subresource->mipLevel, slice_base + i); + + astc_emu_decompress_slice(cmd_buffer, image->vk.format, layout, + anv_image_view_to_handle(&src_view), + anv_image_view_to_handle(&dst_view), + rect); + } + + anv_cmd_buffer_restore_state(cmd_buffer, &saved); +} + +VkResult +anv_device_init_astc_emu(struct anv_device *device) +{ + if (!device->physical->emu_astc_ldr) + return VK_SUCCESS; + + return vk_texcompress_astc_init(&device->vk, &device->vk.alloc, + VK_NULL_HANDLE, &device->texcompress_astc); +} + +void +anv_device_finish_astc_emu(struct anv_device *device) +{ + if (device->texcompress_astc) { + vk_texcompress_astc_finish(&device->vk, &device->vk.alloc, + device->texcompress_astc); + } +} diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c index 414893d2343..e1d6e988905 100644 --- a/src/intel/vulkan/anv_blorp.c +++ b/src/intel/vulkan/anv_blorp.c @@ -398,6 +398,28 @@ end_main_rcs_cmd_buffer_done(struct anv_cmd_buffer *cmd_buffer, syncpoint); } +static bool +anv_blorp_execute_on_companion(struct anv_cmd_buffer *cmd_buffer, + struct anv_image *dst_image) +{ + /* MSAA images have to be dealt with on the companion RCS command buffer + * for both CCS && BCS engines. + */ + if ((anv_cmd_buffer_is_blitter_queue(cmd_buffer) || + anv_cmd_buffer_is_compute_queue(cmd_buffer)) && + dst_image->vk.samples > 1) + return true; + + /* Emulation of formats is done through a compute shader, so we need + * the companion command buffer for the BCS engine. + */ + if (anv_cmd_buffer_is_blitter_queue(cmd_buffer) && + dst_image->emu_plane_format != VK_FORMAT_UNDEFINED) + return true; + + return false; +} + void anv_CmdCopyImage2( VkCommandBuffer commandBuffer, const VkCopyImageInfo2* pCopyImageInfo) @@ -407,12 +429,9 @@ void anv_CmdCopyImage2( ANV_FROM_HANDLE(anv_image, dst_image, pCopyImageInfo->dstImage); struct anv_cmd_buffer *main_cmd_buffer = cmd_buffer; - UNUSED struct anv_state rcs_done = ANV_STATE_NULL;; + UNUSED struct anv_state rcs_done = ANV_STATE_NULL; - if (cmd_buffer->device->info->verx10 >= 125 && - dst_image->vk.samples > 1 && - (anv_cmd_buffer_is_blitter_queue(main_cmd_buffer) || - anv_cmd_buffer_is_compute_queue(main_cmd_buffer))) { + if (anv_blorp_execute_on_companion(cmd_buffer, dst_image)) { rcs_done = record_main_rcs_cmd_buffer_done(cmd_buffer); cmd_buffer = cmd_buffer->companion_rcs_cmd_buffer; } @@ -429,6 +448,28 @@ void anv_CmdCopyImage2( anv_blorp_batch_finish(&batch); + if (dst_image->emu_plane_format != VK_FORMAT_UNDEFINED) { + assert(!anv_cmd_buffer_is_blitter_queue(cmd_buffer)); + const enum anv_pipe_bits pipe_bits = + anv_cmd_buffer_is_compute_queue(cmd_buffer) ? + ANV_PIPE_HDC_PIPELINE_FLUSH_BIT : + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT; + anv_add_pending_pipe_bits(cmd_buffer, pipe_bits, + "Copy flush before decompression"); + + for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) { + const VkImageCopy2 *region = &pCopyImageInfo->pRegions[r]; + const VkOffset3D block_offset = vk_image_offset_to_elements( + &dst_image->vk, region->dstOffset); + const VkExtent3D block_extent = vk_image_extent_to_elements( + &src_image->vk, region->extent); + anv_astc_emu_decompress(cmd_buffer, dst_image, + pCopyImageInfo->dstImageLayout, + ®ion->dstSubresource, + block_offset, block_extent); + } + } + if (rcs_done.alloc_size) end_main_rcs_cmd_buffer_done(main_cmd_buffer, rcs_done); } @@ -563,6 +604,14 @@ void anv_CmdCopyBufferToImage2( ANV_FROM_HANDLE(anv_buffer, src_buffer, pCopyBufferToImageInfo->srcBuffer); ANV_FROM_HANDLE(anv_image, dst_image, pCopyBufferToImageInfo->dstImage); + struct anv_cmd_buffer *main_cmd_buffer = cmd_buffer; + UNUSED struct anv_state rcs_done = ANV_STATE_NULL; + + if (anv_blorp_execute_on_companion(cmd_buffer, dst_image)) { + rcs_done = record_main_rcs_cmd_buffer_done(cmd_buffer); + cmd_buffer = cmd_buffer->companion_rcs_cmd_buffer; + } + struct blorp_batch batch; anv_blorp_batch_init(cmd_buffer, &batch, 0); @@ -573,6 +622,32 @@ void anv_CmdCopyBufferToImage2( } anv_blorp_batch_finish(&batch); + + if (dst_image->emu_plane_format != VK_FORMAT_UNDEFINED) { + assert(!anv_cmd_buffer_is_blitter_queue(cmd_buffer)); + const enum anv_pipe_bits pipe_bits = + anv_cmd_buffer_is_compute_queue(cmd_buffer) ? + ANV_PIPE_HDC_PIPELINE_FLUSH_BIT : + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT; + anv_add_pending_pipe_bits(cmd_buffer, pipe_bits, + "Copy flush before decompression"); + + for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) { + const VkBufferImageCopy2 *region = + &pCopyBufferToImageInfo->pRegions[r]; + const VkOffset3D block_offset = vk_image_offset_to_elements( + &dst_image->vk, region->imageOffset); + const VkExtent3D block_extent = vk_image_extent_to_elements( + &dst_image->vk, region->imageExtent); + anv_astc_emu_decompress(cmd_buffer, dst_image, + pCopyBufferToImageInfo->dstImageLayout, + ®ion->imageSubresource, + block_offset, block_extent); + } + } + + if (rcs_done.alloc_size) + end_main_rcs_cmd_buffer_done(main_cmd_buffer, rcs_done); } static void @@ -1018,10 +1093,7 @@ void anv_CmdClearColorImage( struct anv_cmd_buffer *main_cmd_buffer = cmd_buffer; UNUSED struct anv_state rcs_done = ANV_STATE_NULL; - if (cmd_buffer->device->info->verx10 >= 125 && - image->vk.samples > 1 && - (anv_cmd_buffer_is_blitter_queue(main_cmd_buffer) || - anv_cmd_buffer_is_compute_queue(main_cmd_buffer))) { + if (anv_blorp_execute_on_companion(cmd_buffer, image)) { rcs_done = record_main_rcs_cmd_buffer_done(cmd_buffer); cmd_buffer = cmd_buffer->companion_rcs_cmd_buffer; } diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 51f9124ed31..7c3968180c6 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -3546,6 +3546,8 @@ VkResult anv_CreateDevice( anv_device_init_internal_kernels(device); + anv_device_init_astc_emu(device); + anv_device_perf_init(device); anv_device_utrace_init(device); @@ -3672,6 +3674,8 @@ void anv_DestroyDevice( anv_device_finish_rt_shaders(device); + anv_device_finish_astc_emu(device); + anv_device_finish_internal_kernels(device); vk_pipeline_cache_destroy(device->internal_cache, NULL); diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 99e69ee32db..6a27a1d6a3a 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -2875,7 +2875,19 @@ anv_image_fill_surface_state(struct anv_device *device, enum anv_image_view_state_flags flags, struct anv_surface_state *state_inout) { - const uint32_t plane = anv_image_aspect_to_plane(image, aspect); + uint32_t plane = anv_image_aspect_to_plane(image, aspect); + if (image->emu_plane_format != VK_FORMAT_UNDEFINED) { + const uint16_t view_bpb = isl_format_get_layout(view_in->format)->bpb; + enum isl_format format = + image->planes[plane].primary_surface.isl.format; + + /* redirect to the hidden plane if not size-compatible */ + if (isl_format_get_layout(format)->bpb != view_bpb) { + plane = image->n_planes; + format = image->planes[plane].primary_surface.isl.format; + assert(isl_format_get_layout(format)->bpb == view_bpb); + } + } const struct anv_surface *surface = &image->planes[plane].primary_surface, *aux_surface = &image->planes[plane].aux_surface; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 8d3a69e0b02..eb21036de22 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1670,6 +1670,8 @@ struct anv_device { * resources but never use them. */ bool using_sparse; + + struct vk_texcompress_astc_state *texcompress_astc; }; static inline uint32_t @@ -5356,6 +5358,15 @@ struct anv_memcpy_state { VkResult anv_device_init_internal_kernels(struct anv_device *device); void anv_device_finish_internal_kernels(struct anv_device *device); +VkResult anv_device_init_astc_emu(struct anv_device *device); +void anv_device_finish_astc_emu(struct anv_device *device); +void anv_astc_emu_decompress(struct anv_cmd_buffer *cmd_buffer, + struct anv_image *image, + VkImageLayout layout, + const VkImageSubresourceLayers *subresource, + VkOffset3D block_offset, + VkExtent3D block_extent); + /* This structure is used in 2 scenarios : * * - copy utrace timestamps from command buffer so that command buffer can diff --git a/src/intel/vulkan/meson.build b/src/intel/vulkan/meson.build index 60058c8cd50..a120e422cb7 100644 --- a/src/intel/vulkan/meson.build +++ b/src/intel/vulkan/meson.build @@ -183,6 +183,7 @@ libanv_files = files( 'anv_private.h', 'anv_queue.c', 'anv_sparse.c', + 'anv_astc_emu.c', 'anv_util.c', 'anv_utrace.c', 'anv_va.c',
