https://gcc.gnu.org/g:9b13bea07706a7cae0185f8a860d67209308c050
commit r16-459-g9b13bea07706a7cae0185f8a860d67209308c050 Author: Pengxuan Zheng <quic_pzh...@quicinc.com> Date: Thu Feb 6 16:16:32 2025 -0800 Canonicalize vec_merge in simplify_ternary_operation Similar to the canonicalization done in combine, we canonicalize vec_merge with swap_communattive_operands_p in simplify_ternary_operation too. gcc/ChangeLog: * config/aarch64/aarch64-protos.h (aarch64_exact_log2_inverse): New. * config/aarch64/aarch64-simd.md (aarch64_simd_vec_set_zero<mode>): Update pattern accordingly. * config/aarch64/aarch64.cc (aarch64_exact_log2_inverse): New. * simplify-rtx.cc (simplify_context::simplify_ternary_operation): Canonicalize vec_merge. Signed-off-by: Pengxuan Zheng <quic_pzh...@quicinc.com> Diff: --- gcc/config/aarch64/aarch64-protos.h | 1 + gcc/config/aarch64/aarch64-simd.md | 10 ++++++---- gcc/config/aarch64/aarch64.cc | 10 ++++++++++ gcc/simplify-rtx.cc | 7 +++++++ 4 files changed, 24 insertions(+), 4 deletions(-) diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index c83c35c6d71e..c935e7bcf33d 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -1055,6 +1055,7 @@ void aarch64_subvti_scratch_regs (rtx, rtx, rtx *, rtx *, rtx *, rtx *); void aarch64_expand_subvti (rtx, rtx, rtx, rtx, rtx, rtx, rtx, bool); +int aarch64_exact_log2_inverse (unsigned int, rtx); /* Initialize builtins for SIMD intrinsics. */ diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index e2afe87e5130..1099e742cbf7 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1193,12 +1193,14 @@ (define_insn "aarch64_simd_vec_set_zero<mode>" [(set (match_operand:VALL_F16 0 "register_operand" "=w") (vec_merge:VALL_F16 - (match_operand:VALL_F16 1 "aarch64_simd_imm_zero" "") - (match_operand:VALL_F16 3 "register_operand" "0") + (match_operand:VALL_F16 1 "register_operand" "0") + (match_operand:VALL_F16 3 "aarch64_simd_imm_zero" "") (match_operand:SI 2 "immediate_operand" "i")))] - "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0" + "TARGET_SIMD && aarch64_exact_log2_inverse (<nunits>, operands[2]) >= 0" { - int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2]))); + int elt = ENDIAN_LANE_N (<nunits>, + aarch64_exact_log2_inverse (<nunits>, + operands[2])); operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt); return "ins\\t%0.<Vetype>[%p2], <vwcore>zr"; } diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 2dc5f4c4b59d..9e3f2885bccb 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -23914,6 +23914,16 @@ aarch64_strided_registers_p (rtx *operands, unsigned int num_operands, return true; } +/* Return the base 2 logarithm of the bit inverse of OP masked by the lowest + NELTS bits, if OP is a power of 2. Otherwise, returns -1. */ + +int +aarch64_exact_log2_inverse (unsigned int nelts, rtx op) +{ + return exact_log2 ((~INTVAL (op)) + & ((HOST_WIDE_INT_1U << nelts) - 1)); +} + /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). */ void diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc index 7bcbe11370fa..b34fd2f4b9ea 100644 --- a/gcc/simplify-rtx.cc +++ b/gcc/simplify-rtx.cc @@ -7387,6 +7387,13 @@ simplify_context::simplify_ternary_operation (rtx_code code, machine_mode mode, return gen_rtx_CONST_VECTOR (mode, v); } + if (swap_commutative_operands_p (op0, op1) + /* Two operands have same precedence, then first bit of mask + select first operand. */ + || (!swap_commutative_operands_p (op1, op0) && !(sel & 1))) + return simplify_gen_ternary (code, mode, mode, op1, op0, + GEN_INT (~sel & mask)); + /* Replace (vec_merge (vec_merge a b m) c n) with (vec_merge b c n) if no element from a appears in the result. */ if (GET_CODE (op0) == VEC_MERGE)