Before bcae327469 this was emitting CP DMA packet even on SI, but apparently hasn't caused too many problems. After that commit the CP DMA code now always sets the CIK+ only bit for prefetch. Just follow radeonsi there and don't try to prefetch at all.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101334 Fixes: bcae327469 "radv: realign cp dma code with radeonsi" Signed-off-by: Grazvydas Ignotas <[email protected]> --- src/amd/vulkan/radv_cmd_buffer.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 1ac9de1..b08f218 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -529,10 +529,18 @@ radv_emit_graphics_raster_state(struct radv_cmd_buffer *cmd_buffer, radeon_set_context_reg(cmd_buffer->cs, R_028814_PA_SU_SC_MODE_CNTL, raster->pa_su_sc_mode_cntl); } +static inline void +radv_emit_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va, + unsigned size) +{ + if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) + si_cp_dma_prefetch(cmd_buffer, va, size); +} + static void radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline, struct radv_shader_variant *shader, struct ac_vs_output_info *outinfo) @@ -540,11 +548,11 @@ radv_emit_hw_vs(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys *ws = cmd_buffer->device->ws; uint64_t va = ws->buffer_get_va(shader->bo); unsigned export_count; ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8); - si_cp_dma_prefetch(cmd_buffer, va, shader->code_size); + radv_emit_prefetch(cmd_buffer, va, shader->code_size); export_count = MAX2(1, outinfo->param_exports); radeon_set_context_reg(cmd_buffer->cs, R_0286C4_SPI_VS_OUT_CONFIG, S_0286C4_VS_EXPORT_COUNT(export_count - 1)); @@ -589,11 +597,11 @@ radv_emit_hw_es(struct radv_cmd_buffer *cmd_buffer, { struct radeon_winsys *ws = cmd_buffer->device->ws; uint64_t va = ws->buffer_get_va(shader->bo); ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8); - si_cp_dma_prefetch(cmd_buffer, va, shader->code_size); + radv_emit_prefetch(cmd_buffer, va, shader->code_size); radeon_set_context_reg(cmd_buffer->cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE, outinfo->esgs_itemsize / 4); radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B320_SPI_SHADER_PGM_LO_ES, 4); radeon_emit(cmd_buffer->cs, va >> 8); @@ -609,11 +617,11 @@ radv_emit_hw_ls(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys *ws = cmd_buffer->device->ws; uint64_t va = ws->buffer_get_va(shader->bo); uint32_t rsrc2 = shader->rsrc2; ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8); - si_cp_dma_prefetch(cmd_buffer, va, shader->code_size); + radv_emit_prefetch(cmd_buffer, va, shader->code_size); radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B520_SPI_SHADER_PGM_LO_LS, 2); radeon_emit(cmd_buffer->cs, va >> 8); radeon_emit(cmd_buffer->cs, va >> 40); @@ -633,11 +641,11 @@ radv_emit_hw_hs(struct radv_cmd_buffer *cmd_buffer, { struct radeon_winsys *ws = cmd_buffer->device->ws; uint64_t va = ws->buffer_get_va(shader->bo); ws->cs_add_buffer(cmd_buffer->cs, shader->bo, 8); - si_cp_dma_prefetch(cmd_buffer, va, shader->code_size); + radv_emit_prefetch(cmd_buffer, va, shader->code_size); radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B420_SPI_SHADER_PGM_LO_HS, 4); radeon_emit(cmd_buffer->cs, va >> 8); radeon_emit(cmd_buffer->cs, va >> 40); radeon_emit(cmd_buffer->cs, shader->rsrc1); @@ -767,11 +775,12 @@ radv_emit_geometry_shader(struct radv_cmd_buffer *cmd_buffer, S_028B90_CNT(MIN2(gs_num_invocations, 127)) | S_028B90_ENABLE(gs_num_invocations > 0)); va = ws->buffer_get_va(gs->bo); ws->cs_add_buffer(cmd_buffer->cs, gs->bo, 8); - si_cp_dma_prefetch(cmd_buffer, va, gs->code_size); + radv_emit_prefetch(cmd_buffer, va, gs->code_size); + radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B220_SPI_SHADER_PGM_LO_GS, 4); radeon_emit(cmd_buffer->cs, va >> 8); radeon_emit(cmd_buffer->cs, va >> 40); radeon_emit(cmd_buffer->cs, gs->rsrc1); radeon_emit(cmd_buffer->cs, gs->rsrc2); @@ -808,11 +817,11 @@ radv_emit_fragment_shader(struct radv_cmd_buffer *cmd_buffer, ps = pipeline->shaders[MESA_SHADER_FRAGMENT]; va = ws->buffer_get_va(ps->bo); ws->cs_add_buffer(cmd_buffer->cs, ps->bo, 8); - si_cp_dma_prefetch(cmd_buffer, va, ps->code_size); + radv_emit_prefetch(cmd_buffer, va, ps->code_size); radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B020_SPI_SHADER_PGM_LO_PS, 4); radeon_emit(cmd_buffer->cs, va >> 8); radeon_emit(cmd_buffer->cs, va >> 40); radeon_emit(cmd_buffer->cs, ps->rsrc1); @@ -2213,11 +2222,11 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer) compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE]; va = ws->buffer_get_va(compute_shader->bo); ws->cs_add_buffer(cmd_buffer->cs, compute_shader->bo, 8); - si_cp_dma_prefetch(cmd_buffer, va, compute_shader->code_size); + radv_emit_prefetch(cmd_buffer, va, compute_shader->code_size); MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 16); radeon_set_sh_reg_seq(cmd_buffer->cs, R_00B830_COMPUTE_PGM_LO, 2); -- 2.7.4 _______________________________________________ mesa-dev mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/mesa-dev
