Module: Mesa
Branch: main
Commit: cbbb85c907a2cc7987ddc701380f088629a02d80
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=cbbb85c907a2cc7987ddc701380f088629a02d80

Author: Timur Kristóf <[email protected]>
Date:   Sat Oct 14 17:17:07 2023 +0200

radv: Allow gang submit use cases other than task shaders.

Signed-off-by: Timur Kristóf <[email protected]>
Reviewed-by: Samuel Pitoiset <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25796>

---

 src/amd/vulkan/radv_cmd_buffer.c | 18 +++++++++++-------
 src/amd/vulkan/radv_queue.c      | 25 +++++++++++++------------
 2 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 77ca2582415..1d8f8539751 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -6166,7 +6166,9 @@ radv_EndCommandBuffer(VkCommandBuffer commandBuffer)
 
    radv_emit_mip_change_flush_default(cmd_buffer);
 
-   if (cmd_buffer->qf == RADV_QUEUE_GENERAL || cmd_buffer->qf == 
RADV_QUEUE_COMPUTE) {
+   const bool is_gfx_or_ace = cmd_buffer->qf == RADV_QUEUE_GENERAL || 
cmd_buffer->qf == RADV_QUEUE_COMPUTE;
+
+   if (is_gfx_or_ace) {
       if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX6)
          cmd_buffer->state.flush_bits |=
             RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | 
RADV_CMD_FLAG_WB_L2;
@@ -6189,14 +6191,16 @@ radv_EndCommandBuffer(VkCommandBuffer commandBuffer)
        */
       if (cmd_buffer->gds_needed)
          cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH;
+   }
 
-      /* Finalize the internal compute command stream, if it exists. */
-      if (cmd_buffer->gang.cs) {
-         VkResult result = radv_gang_finalize(cmd_buffer);
-         if (result != VK_SUCCESS)
-            return vk_error(cmd_buffer, result);
-      }
+   /* Finalize the internal compute command stream, if it exists. */
+   if (cmd_buffer->gang.cs) {
+      VkResult result = radv_gang_finalize(cmd_buffer);
+      if (result != VK_SUCCESS)
+         return vk_error(cmd_buffer, result);
+   }
 
+   if (is_gfx_or_ace) {
       si_emit_cache_flush(cmd_buffer);
 
       /* Make sure CP DMA is idle at the end of IBs because the kernel
diff --git a/src/amd/vulkan/radv_queue.c b/src/amd/vulkan/radv_queue.c
index 7894930f015..3160824d461 100644
--- a/src/amd/vulkan/radv_queue.c
+++ b/src/amd/vulkan/radv_queue.c
@@ -53,12 +53,6 @@ radv_get_queue_global_priority(const 
VkDeviceQueueGlobalPriorityCreateInfoKHR *p
    }
 }
 
-static bool
-radv_cmd_buffer_has_follower(const struct radv_cmd_buffer *cmd_buffer)
-{
-   return cmd_buffer->gang.cs && cmd_buffer->task_rings_needed;
-}
-
 static VkResult
 radv_sparse_buffer_bind_memory(struct radv_device *device, const 
VkSparseBufferMemoryBindInfo *bind)
 {
@@ -1171,8 +1165,15 @@ radv_update_preambles(struct radv_queue_state *queue, 
struct radv_device *device
                       struct vk_command_buffer *const *cmd_buffers, uint32_t 
cmd_buffer_count, bool *use_perf_counters,
                       bool *has_follower)
 {
-   if (queue->qf != RADV_QUEUE_GENERAL && queue->qf != RADV_QUEUE_COMPUTE)
+   if (queue->qf != RADV_QUEUE_GENERAL && queue->qf != RADV_QUEUE_COMPUTE) {
+      for (uint32_t j = 0; j < cmd_buffer_count; j++) {
+         struct radv_cmd_buffer *cmd_buffer = container_of(cmd_buffers[j], 
struct radv_cmd_buffer, vk);
+
+         *has_follower |= !!cmd_buffer->gang.cs;
+      }
+
       return VK_SUCCESS;
+   }
 
    /* Figure out the needs of the current submission.
     * Start by copying the queue's current info.
@@ -1621,14 +1622,14 @@ radv_queue_submit_normal(struct radv_queue *queue, 
struct vk_queue_submit *submi
          assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
          const bool can_chain_next = !(cmd_buffer->usage_flags & 
VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT);
 
-         /* Follower needs to be first because the last CS must match the 
queue's IP type. */
-         if (radv_cmd_buffer_has_follower(cmd_buffer)) {
+         /* Follower needs to be before the gang leader because the last CS 
must match the queue's IP type. */
+         if (cmd_buffer->gang.cs) {
             queue->device->ws->cs_unchain(cmd_buffer->gang.cs);
             if (!chainable_ace || !queue->device->ws->cs_chain(chainable_ace, 
cmd_buffer->gang.cs, false)) {
                cs_array[num_submitted_cs++] = cmd_buffer->gang.cs;
-               /* Reset chaining for GFX when the cmdbuf has GFX+ACE because 
the follower CS (ACE)
-                * must always be before the leader CS (GFX). Otherwise, the 
GFX CS might be chained
-                * to previous one and ordering would be incorrect.
+
+               /* Prevent chaining the gang leader when the follower couldn't 
be chained.
+                * Otherwise, they would be in the wrong order.
                 */
                chainable = NULL;
             }

Reply via email to