Module: Mesa Branch: main Commit: 46f5359238ed410d7af7a058f46a954f5c5912aa URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=46f5359238ed410d7af7a058f46a954f5c5912aa
Author: Sagar Ghuge <[email protected]> Date: Tue Nov 28 15:14:48 2023 -0800 anv: Invalidate aux map for copy/video engine Make sure to invalidate the aux map table for copy/video engines on platforms that has the aux map. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/9231 Signed-off-by: Sagar Ghuge <[email protected]> Reviewed-by: Lionel Landwerlin <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26409> --- src/intel/vulkan/anv_genX.h | 6 ++ src/intel/vulkan/genX_cmd_buffer.c | 110 +++++++++++++++++++++++++++---------- 2 files changed, 88 insertions(+), 28 deletions(-) diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h index 88770f153b2..c828d97d804 100644 --- a/src/intel/vulkan/anv_genX.h +++ b/src/intel/vulkan/anv_genX.h @@ -106,6 +106,12 @@ genX(emit_apply_pipe_flushes)(struct anv_batch *batch, uint32_t current_pipeline, enum anv_pipe_bits bits, enum anv_pipe_bits *emitted_flush_bits); +void +genX(invalidate_aux_map)(struct anv_batch *batch, + struct anv_device *device, + enum intel_engine_class engine_class, + enum anv_pipe_bits bits); + void genX(emit_so_memcpy_init)(struct anv_memcpy_state *state, struct anv_device *device, diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 2e7b0e5e37a..3c0fcff3801 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -1361,6 +1361,63 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer, cmd_buffer->state.current_l3_config = cfg; } +ALWAYS_INLINE void +genX(invalidate_aux_map)(struct anv_batch *batch, + struct anv_device *device, + enum intel_engine_class engine_class, + enum anv_pipe_bits bits) +{ +#if GFX_VER == 12 + if ((bits & ANV_PIPE_AUX_TABLE_INVALIDATE_BIT) && device->info->has_aux_map) { + uint32_t register_addr = 0; + switch (engine_class) { + case INTEL_ENGINE_CLASS_COMPUTE: + register_addr = GENX(COMPCS0_CCS_AUX_INV_num); + break; + case INTEL_ENGINE_CLASS_COPY: +#if GFX_VERx10 >= 125 + register_addr = GENX(BCS_CCS_AUX_INV_num); +#endif + break; + case INTEL_ENGINE_CLASS_VIDEO: + register_addr = GENX(VD0_CCS_AUX_INV_num); + break; + case INTEL_ENGINE_CLASS_RENDER: + default: + register_addr = GENX(GFX_CCS_AUX_INV_num); + break; + } + + anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), lri) { + lri.RegisterOffset = register_addr; + lri.DataDWord = 1; + } + + /* Wa_16018063123 - emit fast color dummy blit before MI_FLUSH_DW. */ + if (intel_needs_workaround(device->info, 16018063123) && + engine_class == INTEL_ENGINE_CLASS_COPY) { + genX(batch_emit_fast_color_dummy_blit)(batch, device); + } + + /* HSD 22012751911: SW Programming sequence when issuing aux invalidation: + * + * "Poll Aux Invalidation bit once the invalidation is set + * (Register 4208 bit 0)" + */ + anv_batch_emit(batch, GENX(MI_SEMAPHORE_WAIT), sem) { + sem.CompareOperation = COMPARE_SAD_EQUAL_SDD; + sem.WaitMode = PollingMode; + sem.RegisterPollMode = true; + sem.SemaphoreDataDword = 0x0; + sem.SemaphoreAddress = + anv_address_from_u64(register_addr); + } + } +#else + assert(!device->info->has_aux_map); +#endif +} + ALWAYS_INLINE enum anv_pipe_bits genX(emit_apply_pipe_flushes)(struct anv_batch *batch, struct anv_device *device, @@ -1642,32 +1699,10 @@ genX(emit_apply_pipe_flushes)(struct anv_batch *batch, genx_batch_emit_pipe_control_write(batch, device->info, current_pipeline, sync_op, addr, 0, bits); -#if GFX_VER == 12 - if ((bits & ANV_PIPE_AUX_TABLE_INVALIDATE_BIT) && device->info->has_aux_map) { - uint64_t register_addr = - current_pipeline == GPGPU ? GENX(COMPCS0_CCS_AUX_INV_num) : - GENX(GFX_CCS_AUX_INV_num); - anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), lri) { - lri.RegisterOffset = register_addr; - lri.DataDWord = 1; - } - /* HSD 22012751911: SW Programming sequence when issuing aux invalidation: - * - * "Poll Aux Invalidation bit once the invalidation is set - * (Register 4208 bit 0)" - */ - anv_batch_emit(batch, GENX(MI_SEMAPHORE_WAIT), sem) { - sem.CompareOperation = COMPARE_SAD_EQUAL_SDD; - sem.WaitMode = PollingMode; - sem.RegisterPollMode = true; - sem.SemaphoreDataDword = 0x0; - sem.SemaphoreAddress = - anv_address_from_u64(register_addr); - } - } -#else - assert(!device->info->has_aux_map); -#endif + enum intel_engine_class engine_class = + current_pipeline == GPGPU ? INTEL_ENGINE_CLASS_COMPUTE : + INTEL_ENGINE_CLASS_RENDER; + genX(invalidate_aux_map)(batch, device, engine_class, bits); bits &= ~ANV_PIPE_INVALIDATE_BITS; } @@ -1704,8 +1739,16 @@ genX(cmd_buffer_apply_pipe_flushes)(struct anv_cmd_buffer *cmd_buffer) else if (bits == 0) return; - if (anv_cmd_buffer_is_blitter_queue(cmd_buffer)) + if (anv_cmd_buffer_is_blitter_queue(cmd_buffer) || + anv_cmd_buffer_is_video_queue(cmd_buffer)) { + if (bits & ANV_PIPE_INVALIDATE_BITS) { + genX(invalidate_aux_map)(&cmd_buffer->batch, cmd_buffer->device, + cmd_buffer->queue_family->engine_class, bits); + bits &= ~ANV_PIPE_INVALIDATE_BITS; + } + cmd_buffer->state.pending_pipe_bits = bits; return; + } const bool trace_flush = (bits & (ANV_PIPE_FLUSH_BITS | @@ -3340,8 +3383,18 @@ genX(BeginCommandBuffer)( trace_intel_begin_cmd_buffer(&cmd_buffer->trace); if (anv_cmd_buffer_is_video_queue(cmd_buffer) || - anv_cmd_buffer_is_blitter_queue(cmd_buffer)) + anv_cmd_buffer_is_blitter_queue(cmd_buffer)) { + /* Re-emit the aux table register in every command buffer. This way we're + * ensured that we have the table even if this command buffer doesn't + * initialize any images. + */ + if (cmd_buffer->device->info->has_aux_map) { + anv_add_pending_pipe_bits(cmd_buffer, + ANV_PIPE_AUX_TABLE_INVALIDATE_BIT, + "new cmd buffer with aux-tt"); + } return VK_SUCCESS; + } #if GFX_VER >= 12 if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY && @@ -3532,6 +3585,7 @@ end_command_buffer(struct anv_cmd_buffer *cmd_buffer) if (anv_cmd_buffer_is_video_queue(cmd_buffer) || anv_cmd_buffer_is_blitter_queue(cmd_buffer)) { trace_intel_end_cmd_buffer(&cmd_buffer->trace, cmd_buffer->vk.level); + genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer); anv_cmd_buffer_end_batch_buffer(cmd_buffer); return VK_SUCCESS; }
