Module: Mesa
Branch: main
Commit: 716847a77dd3f10bf79c61c9c3328da6126c16b1
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=716847a77dd3f10bf79c61c9c3328da6126c16b1

Author: Iago Toral Quiroga <[email protected]>
Date:   Thu Dec 14 09:47:48 2023 +0100

broadcom: disable perquad tmu loads after discards

Otherwise we may emit a load from an invalid offset from
a lane that was discarded.

This fixes an simulator assert from triggering when
executing:
dEQP-VK.spirv_assembly.instruction.terminate_invocation.terminate.no_null_pointer_load

That test emits a conditional kill and then a buffer load
which would have invalid offsets for the lines killed. Since
the buffer load is in uniform control flow we were incorrectly
emitting a full quad load, including disabled lanes which would
prompt the simulator to assert on invalid offsets being loaded
coming from the lanes that had been killed in the shader.

Reviewed-by: Alejandro PiƱeiro <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26683>

---

 src/broadcom/compiler/nir_to_vir.c   | 13 +++++++++----
 src/broadcom/compiler/v3d_compiler.h |  6 ++++++
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/src/broadcom/compiler/nir_to_vir.c 
b/src/broadcom/compiler/nir_to_vir.c
index c1228c6760c..724543b93a0 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -648,10 +648,14 @@ ntq_emit_tmu_general(struct v3d_compile *c, 
nir_intrinsic_instr *instr,
                                 v3d_tmu_get_type_from_op(tmu_op, !is_load) ==
                                 V3D_TMU_OP_TYPE_ATOMIC;
 
+                        /* Only load per-quad if we can be certain that all
+                         * lines in the quad are active.
+                         */
                         uint32_t perquad =
-                                is_load && !vir_in_nonuniform_control_flow(c)
-                                ? GENERAL_TMU_LOOKUP_PER_QUAD
-                                : GENERAL_TMU_LOOKUP_PER_PIXEL;
+                                is_load && !vir_in_nonuniform_control_flow(c) 
&&
+                                !c->emitted_discard ?
+                                GENERAL_TMU_LOOKUP_PER_QUAD :
+                                GENERAL_TMU_LOOKUP_PER_PIXEL;
                         config = 0xffffff00 | tmu_op << 3 | perquad;
 
                         if (tmu_op == V3D_TMU_OP_WRITE_CMPXCHG_READ_FLUSH) {
@@ -3436,6 +3440,7 @@ ntq_emit_intrinsic(struct v3d_compile *c, 
nir_intrinsic_instr *instr)
                         vir_SETMSF_dest(c, vir_nop_reg(),
                                         vir_uniform_ui(c, 0));
                 }
+                c->emitted_discard = true;
                 break;
 
         case nir_intrinsic_discard_if: {
@@ -3456,7 +3461,7 @@ ntq_emit_intrinsic(struct v3d_compile *c, 
nir_intrinsic_instr *instr)
 
                 vir_set_cond(vir_SETMSF_dest(c, vir_nop_reg(),
                                              vir_uniform_ui(c, 0)), cond);
-
+                c->emitted_discard = true;
                 break;
         }
 
diff --git a/src/broadcom/compiler/v3d_compiler.h 
b/src/broadcom/compiler/v3d_compiler.h
index 5ee00eb6487..18281e42b12 100644
--- a/src/broadcom/compiler/v3d_compiler.h
+++ b/src/broadcom/compiler/v3d_compiler.h
@@ -914,6 +914,12 @@ struct v3d_compile {
 
         bool tmu_dirty_rcl;
         bool has_global_address;
+
+        /* If we have processed a discard/terminate instruction. This may
+         * cause some lanes to be inactive even during uniform control
+         * flow.
+         */
+        bool emitted_discard;
 };
 
 struct v3d_uniform_list {

Reply via email to