Module: Mesa
Branch: main
Commit: 6a92af158dc132eee449c175bdee66d92c68d191
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=6a92af158dc132eee449c175bdee66d92c68d191

Author: Lionel Landwerlin <[email protected]>
Date:   Wed Dec 13 11:45:31 2023 +0200

anv: wait for CS write completion before executing secondary

Got a error state on DG2 with a jump to secondary. The secondary is
empty and padded with MI_NOOPs to workaround the CS prefetching.
According to the error state, the return jump address from the
secondary to the primary is 0x0. The ACTHD register value is 0x10, so
it seems that the command streamer indeed jumped to 0x0 and hanged on
a few dwords after that.

The return address should have been set edited by a previous
MI_STORE_DATA_IMM instruction. So it appears it did not complete in
time for the command stream to catch it. On Gfx12+ this can happend if
we do not set ForceWriteCompletionCheck.

This change also takes the opportunity to remove the padding MI_NOOPs
at the end of secondaries on Gfx12+ by using disabling the prefetching
just before jumping into secondaries and reenabling it at the
beginning of each secondary.

Signed-off-by: Lionel Landwerlin <[email protected]>
Cc: mesa-stable
Reviewed-by: Ivan Briano <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26665>

---

 src/intel/vulkan/anv_batch_chain.c | 36 ++++-------------------
 src/intel/vulkan/anv_genX.h        |  6 ++++
 src/intel/vulkan/genX_cmd_buffer.c | 60 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 71 insertions(+), 31 deletions(-)

diff --git a/src/intel/vulkan/anv_batch_chain.c 
b/src/intel/vulkan/anv_batch_chain.c
index d511b9623e4..7aa96bb2984 100644
--- a/src/intel/vulkan/anv_batch_chain.c
+++ b/src/intel/vulkan/anv_batch_chain.c
@@ -1055,27 +1055,9 @@ anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer 
*cmd_buffer)
       const uint32_t length = cmd_buffer->batch.next - cmd_buffer->batch.start;
       if (cmd_buffer->device->physical->use_call_secondary) {
          cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_CALL_AND_RETURN;
-         /* If the secondary command buffer begins & ends in the same BO and
-          * its length is less than the length of CS prefetch, add some NOOPs
-          * instructions so the last MI_BATCH_BUFFER_START is outside the CS
-          * prefetch.
-          */
-         if (cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) {
-            const enum intel_engine_class engine_class = 
cmd_buffer->queue_family->engine_class;
-            /* Careful to have everything in signed integer. */
-            int32_t prefetch_len = 
devinfo->engine_class_prefetch[engine_class];
-            int32_t batch_len = cmd_buffer->batch.next - 
cmd_buffer->batch.start;
-
-            for (int32_t i = 0; i < (prefetch_len - batch_len); i += 4)
-               anv_batch_emit(&cmd_buffer->batch, GFX9_MI_NOOP, noop);
-         }
 
          void *jump_addr =
-            anv_batch_emitn(&cmd_buffer->batch,
-                            GFX9_MI_BATCH_BUFFER_START_length,
-                            GFX9_MI_BATCH_BUFFER_START,
-                            .AddressSpaceIndicator = ASI_PPGTT,
-                            .SecondLevelBatchBuffer = Firstlevelbatch) +
+            anv_genX(devinfo, batch_emit_return)(&cmd_buffer->batch) +
             (GFX9_MI_BATCH_BUFFER_START_BatchBufferStartAddress_start / 8);
          cmd_buffer->return_addr = anv_batch_address(&cmd_buffer->batch, 
jump_addr);
 
@@ -1195,18 +1177,10 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer 
*primary,
       struct anv_batch_bo *first_bbo =
          list_first_entry(&secondary->batch_bos, struct anv_batch_bo, link);
 
-      uint64_t *write_return_addr =
-         anv_batch_emitn(&primary->batch,
-                         GFX9_MI_STORE_DATA_IMM_length + 1 /* QWord write */,
-                         GFX9_MI_STORE_DATA_IMM,
-                         .Address = secondary->return_addr)
-         + (GFX9_MI_STORE_DATA_IMM_ImmediateData_start / 8);
-
-      emit_batch_buffer_start(&primary->batch, first_bbo->bo, 0);
-
-      *write_return_addr =
-         anv_address_physical(anv_batch_address(&primary->batch,
-                                                primary->batch.next));
+      anv_genX(primary->device->info, batch_emit_secondary_call)(
+         &primary->batch,
+         (struct anv_address) { .bo = first_bbo->bo },
+         secondary->return_addr);
 
       anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos);
       break;
diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h
index c828d97d804..94cb208a103 100644
--- a/src/intel/vulkan/anv_genX.h
+++ b/src/intel/vulkan/anv_genX.h
@@ -179,6 +179,12 @@ void genX(cmd_buffer_dispatch_kernel)(struct 
anv_cmd_buffer *cmd_buffer,
 void genX(blorp_exec)(struct blorp_batch *batch,
                       const struct blorp_params *params);
 
+void genX(batch_emit_secondary_call)(struct anv_batch *batch,
+                                     struct anv_address secondary_addr,
+                                     struct anv_address secondary_return_addr);
+
+void *genX(batch_emit_return)(struct anv_batch *batch);
+
 void genX(cmd_emit_timestamp)(struct anv_batch *batch,
                               struct anv_device *device,
                               struct anv_address addr,
diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 3c0fcff3801..428603fb712 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -3376,6 +3376,19 @@ genX(BeginCommandBuffer)(
    if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY)
       cmd_buffer->usage_flags &= 
~VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT;
 
+#if GFX_VER >= 12
+   /* Reenable prefetching at the beginning of secondary command buffers. We
+    * do this so that the return instruction edition is not prefetched before
+    * completion.
+    */
+   if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
+      anv_batch_emit(&cmd_buffer->batch, GENX(MI_ARB_CHECK), arb) {
+         arb.PreParserDisableMask = true;
+         arb.PreParserDisable = false;
+      }
+   }
+#endif
+
    /* Assume the viewport has already been set in primary command buffers. */
    if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY)
       cmd_buffer->state.gfx.viewport_set = true;
@@ -8512,6 +8525,53 @@ void genX(cmd_emit_timestamp)(struct anv_batch *batch,
    }
 }
 
+void genX(batch_emit_secondary_call)(struct anv_batch *batch,
+                                     struct anv_address secondary_addr,
+                                     struct anv_address secondary_return_addr)
+{
+   /* Emit a write to change the return address of the secondary */
+   uint64_t *write_return_addr =
+      anv_batch_emitn(batch,
+                      GENX(MI_STORE_DATA_IMM_length) + 1 /* QWord write */,
+                      GENX(MI_STORE_DATA_IMM),
+#if GFX_VER >= 12
+                      .ForceWriteCompletionCheck = true,
+#endif
+                      .Address = secondary_return_addr) +
+      GENX(MI_STORE_DATA_IMM_ImmediateData_start) / 8;
+
+#if GFX_VER >= 12
+   /* Disable prefetcher before jumping into a secondary */
+   anv_batch_emit(batch, GENX(MI_ARB_CHECK), arb) {
+      arb.PreParserDisableMask = true;
+      arb.PreParserDisable = true;
+   }
+#endif
+
+   /* Jump into the secondary */
+   anv_batch_emit(batch, GENX(MI_BATCH_BUFFER_START), bbs) {
+      bbs.AddressSpaceIndicator = ASI_PPGTT;
+      bbs.SecondLevelBatchBuffer = Firstlevelbatch;
+      bbs.BatchBufferStartAddress = secondary_addr;
+   }
+
+   /* Replace the return address written by the MI_STORE_DATA_IMM above with
+    * the primary's current batch address (immediately after the jump).
+    */
+   *write_return_addr =
+      anv_address_physical(anv_batch_current_address(batch));
+}
+
+void *
+genX(batch_emit_return)(struct anv_batch *batch)
+{
+   return anv_batch_emitn(batch,
+                          GENX(MI_BATCH_BUFFER_START_length),
+                          GENX(MI_BATCH_BUFFER_START),
+                          .AddressSpaceIndicator = ASI_PPGTT,
+                          .SecondLevelBatchBuffer = Firstlevelbatch);
+}
+
 void
 genX(batch_emit_post_3dprimitive_was)(struct anv_batch *batch,
                                       const struct anv_device *device,

Reply via email to