Module: Mesa
Branch: main
Commit: e59a16bbb8fb770bc11bbacfa2254d6a958c7cb8
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e59a16bbb8fb770bc11bbacfa2254d6a958c7cb8

Author: Samuel Pitoiset <[email protected]>
Date:   Fri Nov 10 17:47:16 2023 +0100

radv: use an indirect draw when IBO isn't updated as part of DGC

To remove the dependency on the cmd buffer state.

Signed-off-by: Samuel Pitoiset <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26172>

---

 src/amd/vulkan/radv_device_generated_commands.c | 137 ++++++++++++++++++++----
 1 file changed, 114 insertions(+), 23 deletions(-)

diff --git a/src/amd/vulkan/radv_device_generated_commands.c 
b/src/amd/vulkan/radv_device_generated_commands.c
index 76f730e37b6..336ad9e8aec 100644
--- a/src/amd/vulkan/radv_device_generated_commands.c
+++ b/src/amd/vulkan/radv_device_generated_commands.c
@@ -81,8 +81,13 @@ radv_get_sequence_size_graphics(const struct 
radv_indirect_command_layout *layou
    }
 
    if (layout->indexed) {
-      /* userdata writes + instance count + indexed draw */
-      *cmd_size += (5 + 2 + 5) * 4;
+      if (layout->binds_index_buffer) {
+         /* userdata writes + instance count + indexed draw */
+         *cmd_size += (5 + 2 + 5) * 4;
+      } else {
+         /* PKT3_SET_BASE + PKT3_DRAW_{INDEX}_INDIRECT_MULTI */
+         *cmd_size += (4 + (pipeline->uses_drawid ? 10 : 5)) * 4;
+      }
    } else {
       /* userdata writes + instance count + non-indexed draw */
       *cmd_size += (5 + 2 + 3) * 4;
@@ -188,7 +193,7 @@ struct radv_dgc_params {
    /* draw info */
    uint16_t draw_indexed;
    uint16_t draw_params_offset;
-   uint16_t base_index_size;
+   uint16_t binds_index_buffer;
    uint16_t vtx_base_sgpr;
    uint32_t max_index_count;
 
@@ -197,7 +202,7 @@ struct radv_dgc_params {
    uint16_t dispatch_params_offset;
    uint16_t grid_base_sgpr;
 
-   /* bind index buffer info. Valid if base_index_size == 0 && draw_indexed */
+   /* bind index buffer info. Valid if binds_index_buffer == true && 
draw_indexed */
    uint16_t index_buffer_offset;
 
    uint8_t vbo_cnt;
@@ -467,6 +472,88 @@ dgc_emit_grid_size_pointer(nir_builder *b, struct 
dgc_cmdbuf *cs, nir_def *grid_
    dgc_emit(b, cs, nir_vec(b, values, 4));
 }
 
+static void
+dgc_emit_pkt3_set_base(nir_builder *b, struct dgc_cmdbuf *cs, nir_def *va)
+{
+   nir_def *va_lo = nir_unpack_64_2x32_split_x(b, va);
+   nir_def *va_hi = nir_unpack_64_2x32_split_y(b, va);
+
+   nir_def *values[4] = {nir_imm_int(b, PKT3(PKT3_SET_BASE, 2, false)), 
nir_imm_int(b, 1), va_lo, va_hi};
+
+   dgc_emit(b, cs, nir_vec(b, values, 4));
+}
+
+static void
+dgc_emit_pkt3_draw_indirect(nir_builder *b, struct dgc_cmdbuf *cs, nir_def 
*vtx_base_sgpr, bool indexed)
+{
+   const unsigned di_src_sel = indexed ? V_0287F0_DI_SRC_SEL_DMA : 
V_0287F0_DI_SRC_SEL_AUTO_INDEX;
+
+   vtx_base_sgpr = nir_iand_imm(b, nir_u2u32(b, vtx_base_sgpr), 0x3FFF);
+
+   nir_def *has_drawid = nir_test_mask(b, vtx_base_sgpr, DGC_USES_DRAWID);
+   nir_def *has_baseinstance = nir_test_mask(b, vtx_base_sgpr, 
DGC_USES_BASEINSTANCE);
+
+   /* vertex_offset_reg = (base_reg - SI_SH_REG_OFFSET) >> 2 */
+   nir_def *vertex_offset_reg = vtx_base_sgpr;
+
+   /* start_instance_reg = (base_reg + (draw_id_enable ? 8 : 4) - 
SI_SH_REG_OFFSET) >> 2 */
+   nir_def *start_instance_offset = nir_bcsel(b, has_drawid, nir_imm_int(b, 
2), nir_imm_int(b, 1));
+   nir_def *start_instance_reg = nir_iadd(b, vtx_base_sgpr, 
start_instance_offset);
+
+   /* draw_id_reg = (base_reg + 4 - SI_SH_REG_OFFSET) >> 2 */
+   nir_def *draw_id_reg = nir_iadd(b, vtx_base_sgpr, nir_imm_int(b, 1));
+
+   nir_if *if_drawid = nir_push_if(b, has_drawid);
+   {
+      const unsigned pkt3_op = indexed ? PKT3_DRAW_INDEX_INDIRECT_MULTI : 
PKT3_DRAW_INDIRECT_MULTI;
+
+      nir_def *values[8];
+      values[0] = nir_imm_int(b, PKT3(pkt3_op, 8, false));
+      values[1] = nir_imm_int(b, 0);
+      values[2] = vertex_offset_reg;
+      values[3] = nir_bcsel(b, has_baseinstance, start_instance_reg, 
nir_imm_int(b, 0));
+      values[4] = nir_ior(b, draw_id_reg, nir_imm_int(b, 
S_2C3_DRAW_INDEX_ENABLE(1)));
+      values[5] = nir_imm_int(b, 1); /* draw count */
+      values[6] = nir_imm_int(b, 0); /* count va low */
+      values[7] = nir_imm_int(b, 0); /* count va high */
+
+      dgc_emit(b, cs, nir_vec(b, values, 8));
+
+      values[0] = nir_imm_int(b, 0); /* stride */
+      values[1] = nir_imm_int(b, V_0287F0_DI_SRC_SEL_AUTO_INDEX);
+
+      dgc_emit(b, cs, nir_vec(b, values, 2));
+   }
+   nir_push_else(b, if_drawid);
+   {
+      const unsigned pkt3_op = indexed ? PKT3_DRAW_INDEX_INDIRECT : 
PKT3_DRAW_INDIRECT;
+
+      nir_def *values[5];
+      values[0] = nir_imm_int(b, PKT3(pkt3_op, 3, false));
+      values[1] = nir_imm_int(b, 0);
+      values[2] = vertex_offset_reg;
+      values[3] = nir_bcsel(b, has_baseinstance, start_instance_reg, 
nir_imm_int(b, 0));
+      values[4] = nir_imm_int(b, di_src_sel);
+
+      dgc_emit(b, cs, nir_vec(b, values, 5));
+   }
+   nir_pop_if(b, if_drawid);
+}
+
+static void
+dgc_emit_draw_indirect(nir_builder *b, struct dgc_cmdbuf *cs, nir_def 
*stream_base, nir_def *draw_params_offset,
+                       bool indexed)
+{
+   nir_def *vtx_base_sgpr = load_param16(b, vtx_base_sgpr);
+   nir_def *stream_offset = nir_iadd(b, draw_params_offset, stream_base);
+
+   nir_def *stream_addr = load_param64(b, stream_addr);
+   nir_def *va = nir_iadd(b, stream_addr, nir_u2u64(b, stream_offset));
+
+   dgc_emit_pkt3_set_base(b, cs, va);
+   dgc_emit_pkt3_draw_indirect(b, cs, vtx_base_sgpr, indexed);
+}
+
 static nir_def *
 dgc_cmd_buf_size(nir_builder *b, nir_def *sequence_count, const struct 
radv_device *device)
 {
@@ -663,7 +750,7 @@ dgc_emit_draw_indexed(nir_builder *b, struct dgc_cmdbuf 
*cs, nir_def *stream_buf
 static void
 dgc_emit_index_buffer(nir_builder *b, struct dgc_cmdbuf *cs, nir_def 
*stream_buf, nir_def *stream_base,
                       nir_def *index_buffer_offset, nir_def *ibo_type_32, 
nir_def *ibo_type_8,
-                      nir_variable *index_size_var, nir_variable 
*max_index_count_var, const struct radv_device *device)
+                      nir_variable *max_index_count_var, const struct 
radv_device *device)
 {
    nir_def *index_stream_offset = nir_iadd(b, index_buffer_offset, 
stream_base);
    nir_def *data = nir_load_ssbo(b, 4, 32, stream_buf, index_stream_offset);
@@ -674,7 +761,6 @@ dgc_emit_index_buffer(nir_builder *b, struct dgc_cmdbuf 
*cs, nir_def *stream_buf
    index_type = nir_bcsel(b, nir_ieq(b, vk_index_type, ibo_type_8), 
nir_imm_int(b, V_028A7C_VGT_INDEX_8), index_type);
 
    nir_def *index_size = nir_iand_imm(b, nir_ushr(b, nir_imm_int(b, 0x142), 
nir_imul_imm(b, index_type, 4)), 0xf);
-   nir_store_var(b, index_size_var, index_size, 0x1);
 
    nir_def *max_index_count = nir_udiv(b, nir_channel(b, data, 2), index_size);
    nir_store_var(b, max_index_count_var, max_index_count, 0x1);
@@ -1107,26 +1193,31 @@ build_dgc_prepare_shader(struct radv_device *dev)
          }
          nir_push_else(&b, NULL);
          {
-            nir_variable *index_size_var =
-               nir_variable_create(b.shader, nir_var_shader_temp, 
glsl_uint_type(), "index_size");
-            nir_store_var(&b, index_size_var, load_param16(&b, 
base_index_size), 0x1);
-            nir_variable *max_index_count_var =
-               nir_variable_create(b.shader, nir_var_shader_temp, 
glsl_uint_type(), "max_index_count");
-            nir_store_var(&b, max_index_count_var, load_param32(&b, 
max_index_count), 0x1);
-
-            nir_def *bind_index_buffer = nir_ieq_imm(&b, nir_load_var(&b, 
index_size_var), 0);
-            nir_push_if(&b, bind_index_buffer);
+            /* Emit direct draws when index buffers are also updated by DGC. 
Otherwise, emit
+             * indirect draws to remove the dependency on the cmdbuf state in 
order to enable
+             * preprocessing.
+             */
+            nir_def *binds_index_buffer = nir_ine_imm(&b, load_param16(&b, 
binds_index_buffer), 0);
+            nir_push_if(&b, binds_index_buffer);
             {
+               nir_variable *max_index_count_var =
+                  nir_variable_create(b.shader, nir_var_shader_temp, 
glsl_uint_type(), "max_index_count");
+
                dgc_emit_index_buffer(&b, &cmd_buf, stream_buf, stream_base, 
load_param16(&b, index_buffer_offset),
-                                     load_param32(&b, ibo_type_32), 
load_param32(&b, ibo_type_8), index_size_var,
-                                     max_index_count_var, dev);
-            }
-            nir_pop_if(&b, NULL);
+                                     load_param32(&b, ibo_type_32), 
load_param32(&b, ibo_type_8), max_index_count_var,
+                                     dev);
+
+               nir_def *max_index_count = nir_load_var(&b, 
max_index_count_var);
 
-            nir_def *max_index_count = nir_load_var(&b, max_index_count_var);
+               dgc_emit_draw_indexed(&b, &cmd_buf, stream_buf, stream_base, 
load_param16(&b, draw_params_offset),
+                                     sequence_id, max_index_count, dev);
+            }
+            nir_push_else(&b, NULL);
+            {
+               dgc_emit_draw_indirect(&b, &cmd_buf, stream_base, 
load_param16(&b, draw_params_offset), true);
+            }
 
-            dgc_emit_draw_indexed(&b, &cmd_buf, stream_buf, stream_base, 
load_param16(&b, draw_params_offset),
-                                  sequence_id, max_index_count, dev);
+            nir_pop_if(&b, NULL);
          }
          nir_pop_if(&b, NULL);
       }
@@ -1496,7 +1587,7 @@ radv_prepare_dgc_graphics(struct radv_cmd_buffer 
*cmd_buffer, const VkGeneratedC
 
    params->draw_indexed = layout->indexed;
    params->draw_params_offset = layout->draw_params_offset;
-   params->base_index_size = layout->binds_index_buffer ? 0 : 
radv_get_vgt_index_size(cmd_buffer->state.index_type);
+   params->binds_index_buffer = layout->binds_index_buffer;
    params->vtx_base_sgpr = vtx_base_sgpr;
    params->max_index_count = cmd_buffer->state.max_index_count;
    params->index_buffer_offset = layout->index_buffer_offset;

Reply via email to