> Pengxuan Zheng <quic_pzh...@quicinc.com> writes: > > Similar to the canonicalization done in combine, we canonicalize > > vec_merge with swap_communattive_operands_p in > simplify_ternary_operation too. > > > > gcc/ChangeLog: > > > > * config/aarch64/aarch64-protos.h (aarch64_exact_log2_inverse): > New. > > * config/aarch64/aarch64-simd.md > (aarch64_simd_vec_set_zero<mode>): > > Update pattern accordingly. > > * config/aarch64/aarch64.cc (aarch64_exact_log2_inverse): New. > > * simplify-rtx.cc (simplify_context::simplify_ternary_operation): > > Canonicalize vec_merge. > > OK for GCC 16, thanks. aarch64_exact_log2_inverse isn't really target-specific, > but I can't think of a target-independent set of interfaces that it would > naturally fit. > > Richard
Thanks, pushed the patch as r16-459-g9b13bea07706a. Pengxuan > > > > > Signed-off-by: Pengxuan Zheng <quic_pzh...@quicinc.com> > > --- > > gcc/config/aarch64/aarch64-protos.h | 1 + > > gcc/config/aarch64/aarch64-simd.md | 10 ++++++---- > > gcc/config/aarch64/aarch64.cc | 10 ++++++++++ > > gcc/simplify-rtx.cc | 7 +++++++ > > 4 files changed, 24 insertions(+), 4 deletions(-) > > > > diff --git a/gcc/config/aarch64/aarch64-protos.h > > b/gcc/config/aarch64/aarch64-protos.h > > index 4235f4a0ca5..2391b99cacd 100644 > > --- a/gcc/config/aarch64/aarch64-protos.h > > +++ b/gcc/config/aarch64/aarch64-protos.h > > @@ -1051,6 +1051,7 @@ void aarch64_subvti_scratch_regs (rtx, rtx, rtx *, > > rtx *, rtx *, rtx *); > > void aarch64_expand_subvti (rtx, rtx, rtx, > > rtx, rtx, rtx, rtx, bool); > > +int aarch64_exact_log2_inverse (unsigned int, rtx); > > > > > > /* Initialize builtins for SIMD intrinsics. */ diff --git > > a/gcc/config/aarch64/aarch64-simd.md > > b/gcc/config/aarch64/aarch64-simd.md > > index e2afe87e513..1099e742cbf 100644 > > --- a/gcc/config/aarch64/aarch64-simd.md > > +++ b/gcc/config/aarch64/aarch64-simd.md > > @@ -1193,12 +1193,14 @@ (define_insn > "@aarch64_simd_vec_set<mode>" > > (define_insn "aarch64_simd_vec_set_zero<mode>" > > [(set (match_operand:VALL_F16 0 "register_operand" "=w") > > (vec_merge:VALL_F16 > > - (match_operand:VALL_F16 1 "aarch64_simd_imm_zero" "") > > - (match_operand:VALL_F16 3 "register_operand" "0") > > + (match_operand:VALL_F16 1 "register_operand" "0") > > + (match_operand:VALL_F16 3 "aarch64_simd_imm_zero" "") > > (match_operand:SI 2 "immediate_operand" "i")))] > > - "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0" > > + "TARGET_SIMD && aarch64_exact_log2_inverse (<nunits>, operands[2]) > >= 0" > > { > > - int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2]))); > > + int elt = ENDIAN_LANE_N (<nunits>, > > + aarch64_exact_log2_inverse (<nunits>, > > + operands[2])); > > operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt); > > return "ins\\t%0.<Vetype>[%p2], <vwcore>zr"; > > } > > diff --git a/gcc/config/aarch64/aarch64.cc > > b/gcc/config/aarch64/aarch64.cc index f5f23f6ff4b..103a00915e5 100644 > > --- a/gcc/config/aarch64/aarch64.cc > > +++ b/gcc/config/aarch64/aarch64.cc > > @@ -23682,6 +23682,16 @@ aarch64_strided_registers_p (rtx *operands, > unsigned int num_operands, > > return true; > > } > > > > +/* Return the base 2 logarithm of the bit inverse of OP masked by the > lowest > > + NELTS bits, if OP is a power of 2. Otherwise, returns -1. */ > > + > > +int > > +aarch64_exact_log2_inverse (unsigned int nelts, rtx op) { > > + return exact_log2 ((~INTVAL (op)) > > + & ((HOST_WIDE_INT_1U << nelts) - 1)); } > > + > > /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and > > HIGH (exclusive). */ > > void > > diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc index > > c478bd060fc..22002d1e1ab 100644 > > --- a/gcc/simplify-rtx.cc > > +++ b/gcc/simplify-rtx.cc > > @@ -7307,6 +7307,13 @@ simplify_context::simplify_ternary_operation > (rtx_code code, machine_mode mode, > > return gen_rtx_CONST_VECTOR (mode, v); > > } > > > > + if (swap_commutative_operands_p (op0, op1) > > + /* Two operands have same precedence, then first bit of mask > > + select first operand. */ > > + || (!swap_commutative_operands_p (op1, op0) && !(sel & 1))) > > + return simplify_gen_ternary (code, mode, mode, op1, op0, > > + GEN_INT (~sel & mask)); > > + > > /* Replace (vec_merge (vec_merge a b m) c n) with (vec_merge b c n) > > if no element from a appears in the result. */ > > if (GET_CODE (op0) == VEC_MERGE)