Module: Mesa Branch: main Commit: 716847a77dd3f10bf79c61c9c3328da6126c16b1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=716847a77dd3f10bf79c61c9c3328da6126c16b1
Author: Iago Toral Quiroga <[email protected]> Date: Thu Dec 14 09:47:48 2023 +0100 broadcom: disable perquad tmu loads after discards Otherwise we may emit a load from an invalid offset from a lane that was discarded. This fixes an simulator assert from triggering when executing: dEQP-VK.spirv_assembly.instruction.terminate_invocation.terminate.no_null_pointer_load That test emits a conditional kill and then a buffer load which would have invalid offsets for the lines killed. Since the buffer load is in uniform control flow we were incorrectly emitting a full quad load, including disabled lanes which would prompt the simulator to assert on invalid offsets being loaded coming from the lanes that had been killed in the shader. Reviewed-by: Alejandro PiƱeiro <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26683> --- src/broadcom/compiler/nir_to_vir.c | 13 +++++++++---- src/broadcom/compiler/v3d_compiler.h | 6 ++++++ 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index c1228c6760c..724543b93a0 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -648,10 +648,14 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr, v3d_tmu_get_type_from_op(tmu_op, !is_load) == V3D_TMU_OP_TYPE_ATOMIC; + /* Only load per-quad if we can be certain that all + * lines in the quad are active. + */ uint32_t perquad = - is_load && !vir_in_nonuniform_control_flow(c) - ? GENERAL_TMU_LOOKUP_PER_QUAD - : GENERAL_TMU_LOOKUP_PER_PIXEL; + is_load && !vir_in_nonuniform_control_flow(c) && + !c->emitted_discard ? + GENERAL_TMU_LOOKUP_PER_QUAD : + GENERAL_TMU_LOOKUP_PER_PIXEL; config = 0xffffff00 | tmu_op << 3 | perquad; if (tmu_op == V3D_TMU_OP_WRITE_CMPXCHG_READ_FLUSH) { @@ -3436,6 +3440,7 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) vir_SETMSF_dest(c, vir_nop_reg(), vir_uniform_ui(c, 0)); } + c->emitted_discard = true; break; case nir_intrinsic_discard_if: { @@ -3456,7 +3461,7 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr) vir_set_cond(vir_SETMSF_dest(c, vir_nop_reg(), vir_uniform_ui(c, 0)), cond); - + c->emitted_discard = true; break; } diff --git a/src/broadcom/compiler/v3d_compiler.h b/src/broadcom/compiler/v3d_compiler.h index 5ee00eb6487..18281e42b12 100644 --- a/src/broadcom/compiler/v3d_compiler.h +++ b/src/broadcom/compiler/v3d_compiler.h @@ -914,6 +914,12 @@ struct v3d_compile { bool tmu_dirty_rcl; bool has_global_address; + + /* If we have processed a discard/terminate instruction. This may + * cause some lanes to be inactive even during uniform control + * flow. + */ + bool emitted_discard; }; struct v3d_uniform_list {
