From: Dave Airlie <airl...@redhat.com>

This ports the regalloc hang workaround from radeonsi, not 100%
sure if this is only needed on the GFX queue as the workaround
references async compute not requiring it.

Signed-off-by: Dave Airlie <airl...@redhat.com>
---
 src/amd/vulkan/radv_cmd_buffer.c | 10 ++++++++++
 src/amd/vulkan/radv_pipeline.c   |  8 ++++++++
 src/amd/vulkan/radv_private.h    |  3 +++
 3 files changed, 21 insertions(+)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 4b08781..4415e36 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -2290,6 +2290,16 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer 
*cmd_buffer)
        radeon_emit(cmd_buffer->cs,
                    
S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[2]));
 
+       /* HW bug workaround when CS threadgroups > 256 threads and async
+        * compute isn't used, i.e. only one compute job can run at a time.
+        * If async compute is possible, the threadgroup size must be limited
+        * to 256 threads on all queues to avoid the bug.
+        * Only SI and certain CIK chips are affected.
+        */
+       if (pipeline->compute.regalloc_hang) {
+               cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
+                       RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
+       }
        assert(cmd_buffer->cs->cdw <= cdw_max);
 }
 
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 496c06a..fcfe7dc 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -2376,6 +2376,14 @@ static VkResult radv_compute_pipeline_create(
                return result;
        }
 
+       if ((device->physical_device->rad_info.chip_class == SI ||
+            device->physical_device->rad_info.family == CHIP_BONAIRE ||
+            device->physical_device->rad_info.family == CHIP_KABINI) &&
+           (pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.block_size[0] *
+            pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.block_size[1] *
+            pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.block_size[2] > 
256))
+               pipeline->compute.regalloc_hang = true;
+
        *pPipeline = radv_pipeline_to_handle(pipeline);
 
        if (device->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) {
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 8cd5ec0..4eac84c 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1094,6 +1094,9 @@ struct radv_pipeline {
                        struct radv_prim_vertex_count prim_vertex_count;
                        bool can_use_guardband;
                } graphics;
+               struct {
+                       bool regalloc_hang;
+               } compute;
        };
 
        unsigned max_waves;
-- 
2.9.4

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to