Module: Mesa
Branch: main
Commit: 88afbbba1152bd9dd8dd7058eba18540f485fbf8
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=88afbbba1152bd9dd8dd7058eba18540f485fbf8

Author: Daniel Schürmann <[email protected]>
Date:   Wed Nov  1 16:37:46 2023 +0100

nir: optimize open-coded quadVote* directly to new nir_quad intrinsics

Reviewed-by: Georg Lehmann <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/218>

---

 src/amd/compiler/aco_instruction_selection.cpp | 6 ++----
 src/compiler/nir/nir_intrinsics.py             | 5 +----
 src/compiler/nir/nir_opt_intrinsics.c          | 8 +++++---
 3 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/src/amd/compiler/aco_instruction_selection.cpp 
b/src/amd/compiler/aco_instruction_selection.cpp
index 0f2c632a501..9472d2e5193 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -7888,7 +7888,7 @@ emit_uniform_reduce(isel_context* ctx, 
nir_intrinsic_instr* instr)
 
       Temp thread_count =
          bld.sop1(Builder::s_bcnt1_i32, bld.def(s1), bld.def(s1, scc), 
Operand(exec, bld.lm));
-      set_wqm(ctx, nir_intrinsic_include_helpers(instr));
+      set_wqm(ctx);
 
       emit_addition_uniform_reduce(ctx, op, dst, instr->src[0], thread_count);
    } else {
@@ -8606,8 +8606,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* 
instr)
          instr->intrinsic == nir_intrinsic_reduce ? 
nir_intrinsic_cluster_size(instr) : 0;
       cluster_size = util_next_power_of_two(
          MIN2(cluster_size ? cluster_size : ctx->program->wave_size, 
ctx->program->wave_size));
-      bool create_helpers =
-         instr->intrinsic == nir_intrinsic_reduce && 
nir_intrinsic_include_helpers(instr);
 
       if (!nir_src_is_divergent(instr->src[0]) && cluster_size == 
ctx->program->wave_size &&
           instr->def.bit_size != 1) {
@@ -8667,7 +8665,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* 
instr)
          else
             emit_reduction_instr(ctx, aco_op, reduce_op, cluster_size, 
Definition(dst), src);
       }
-      set_wqm(ctx, create_helpers);
+      set_wqm(ctx);
       break;
    }
    case nir_intrinsic_quad_broadcast:
diff --git a/src/compiler/nir/nir_intrinsics.py 
b/src/compiler/nir/nir_intrinsics.py
index bc1f81ed0cb..2a67586afd6 100644
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -172,9 +172,6 @@ index("unsigned", "reduction_op")
 # Cluster size for reduction operations
 index("unsigned", "cluster_size")
 
-# Requires that the operation creates and includes helper invocations
-index("bool", "include_helpers")
-
 # Parameter index for a load_param intrinsic
 index("unsigned", "param_idx")
 
@@ -510,7 +507,7 @@ intrinsic("rotate", src_comp=[0, 1], dest_comp=0, 
bit_sizes=src0,
           indices=[EXECUTION_SCOPE, CLUSTER_SIZE], flags=[CAN_ELIMINATE]);
 
 intrinsic("reduce", src_comp=[0], dest_comp=0, bit_sizes=src0,
-          indices=[REDUCTION_OP, CLUSTER_SIZE, INCLUDE_HELPERS], 
flags=[CAN_ELIMINATE])
+          indices=[REDUCTION_OP, CLUSTER_SIZE], flags=[CAN_ELIMINATE])
 intrinsic("inclusive_scan", src_comp=[0], dest_comp=0, bit_sizes=src0,
           indices=[REDUCTION_OP], flags=[CAN_ELIMINATE])
 intrinsic("exclusive_scan", src_comp=[0], dest_comp=0, bit_sizes=src0,
diff --git a/src/compiler/nir/nir_opt_intrinsics.c 
b/src/compiler/nir/nir_opt_intrinsics.c
index 08866e1751b..b114d1f7289 100644
--- a/src/compiler/nir/nir_opt_intrinsics.c
+++ b/src/compiler/nir/nir_opt_intrinsics.c
@@ -206,9 +206,11 @@ try_opt_quad_vote(nir_builder *b, nir_alu_instr *alu, bool 
block_has_discard)
    if (lanes_read != 0xffff)
       return NULL;
 
-   /* Create reduction. */
-   return nir_reduce(b, quad_broadcasts[0]->src[0].ssa, .reduction_op = 
alu->op, .cluster_size = 4,
-                     .include_helpers = true);
+   /* Create quad vote. */
+   if (alu->op == nir_op_iand)
+      return nir_quad_vote_all(b, 1, quad_broadcasts[0]->src[0].ssa);
+   else
+      return nir_quad_vote_any(b, 1, quad_broadcasts[0]->src[0].ssa);
 }
 
 static bool

Reply via email to