Module: Mesa Branch: main Commit: 88afbbba1152bd9dd8dd7058eba18540f485fbf8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=88afbbba1152bd9dd8dd7058eba18540f485fbf8
Author: Daniel Schürmann <[email protected]> Date: Wed Nov 1 16:37:46 2023 +0100 nir: optimize open-coded quadVote* directly to new nir_quad intrinsics Reviewed-by: Georg Lehmann <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/218> --- src/amd/compiler/aco_instruction_selection.cpp | 6 ++---- src/compiler/nir/nir_intrinsics.py | 5 +---- src/compiler/nir/nir_opt_intrinsics.c | 8 +++++--- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 0f2c632a501..9472d2e5193 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -7888,7 +7888,7 @@ emit_uniform_reduce(isel_context* ctx, nir_intrinsic_instr* instr) Temp thread_count = bld.sop1(Builder::s_bcnt1_i32, bld.def(s1), bld.def(s1, scc), Operand(exec, bld.lm)); - set_wqm(ctx, nir_intrinsic_include_helpers(instr)); + set_wqm(ctx); emit_addition_uniform_reduce(ctx, op, dst, instr->src[0], thread_count); } else { @@ -8606,8 +8606,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) instr->intrinsic == nir_intrinsic_reduce ? nir_intrinsic_cluster_size(instr) : 0; cluster_size = util_next_power_of_two( MIN2(cluster_size ? cluster_size : ctx->program->wave_size, ctx->program->wave_size)); - bool create_helpers = - instr->intrinsic == nir_intrinsic_reduce && nir_intrinsic_include_helpers(instr); if (!nir_src_is_divergent(instr->src[0]) && cluster_size == ctx->program->wave_size && instr->def.bit_size != 1) { @@ -8667,7 +8665,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) else emit_reduction_instr(ctx, aco_op, reduce_op, cluster_size, Definition(dst), src); } - set_wqm(ctx, create_helpers); + set_wqm(ctx); break; } case nir_intrinsic_quad_broadcast: diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index bc1f81ed0cb..2a67586afd6 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -172,9 +172,6 @@ index("unsigned", "reduction_op") # Cluster size for reduction operations index("unsigned", "cluster_size") -# Requires that the operation creates and includes helper invocations -index("bool", "include_helpers") - # Parameter index for a load_param intrinsic index("unsigned", "param_idx") @@ -510,7 +507,7 @@ intrinsic("rotate", src_comp=[0, 1], dest_comp=0, bit_sizes=src0, indices=[EXECUTION_SCOPE, CLUSTER_SIZE], flags=[CAN_ELIMINATE]); intrinsic("reduce", src_comp=[0], dest_comp=0, bit_sizes=src0, - indices=[REDUCTION_OP, CLUSTER_SIZE, INCLUDE_HELPERS], flags=[CAN_ELIMINATE]) + indices=[REDUCTION_OP, CLUSTER_SIZE], flags=[CAN_ELIMINATE]) intrinsic("inclusive_scan", src_comp=[0], dest_comp=0, bit_sizes=src0, indices=[REDUCTION_OP], flags=[CAN_ELIMINATE]) intrinsic("exclusive_scan", src_comp=[0], dest_comp=0, bit_sizes=src0, diff --git a/src/compiler/nir/nir_opt_intrinsics.c b/src/compiler/nir/nir_opt_intrinsics.c index 08866e1751b..b114d1f7289 100644 --- a/src/compiler/nir/nir_opt_intrinsics.c +++ b/src/compiler/nir/nir_opt_intrinsics.c @@ -206,9 +206,11 @@ try_opt_quad_vote(nir_builder *b, nir_alu_instr *alu, bool block_has_discard) if (lanes_read != 0xffff) return NULL; - /* Create reduction. */ - return nir_reduce(b, quad_broadcasts[0]->src[0].ssa, .reduction_op = alu->op, .cluster_size = 4, - .include_helpers = true); + /* Create quad vote. */ + if (alu->op == nir_op_iand) + return nir_quad_vote_all(b, 1, quad_broadcasts[0]->src[0].ssa); + else + return nir_quad_vote_any(b, 1, quad_broadcasts[0]->src[0].ssa); } static bool
