This patch transforms RTL expressions of the form (subreg (not X)) into (not (subreg X)) if the subreg is an operand of another binary logical operation. This transformation can expose opportunities to combine more logical operations.
For example, it improves the codegen of the following AArch64 NEON intrinsics: vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(a)), vreinterpretq_s64_s32(b)); from: not v0.16b, v0.16b and v0.16b, v0.16b, v1.16b to: bic v0.16b, v1.16b, v0.16b Regression tested on x86_64-linux-gnu, arm-linux-gnueabihf and aarch64-linux-gnu. gcc/ChangeLog: * simplify-rtx.cc (non_paradoxical_subreg_not_p): New function for pattern match of (subreg (not X)). (simplify_with_subreg_not): New function for simplification. --- gcc/simplify-rtx.cc | 50 +++++++++++++++++++ .../gcc.target/aarch64/simd/bic_orn_1.c | 17 +++++++ 2 files changed, 67 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/bic_orn_1.c diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc index 06b52ca8003..5a6c1a9c039 100644 --- a/gcc/simplify-rtx.cc +++ b/gcc/simplify-rtx.cc @@ -3032,6 +3032,44 @@ match_plus_neg_pattern (rtx op0, rtx op1, machine_mode mode) return false; } +/* Check if OP matches the pattern of (subreg (not X)) and the subreg is + non-paradoxical. */ + +static bool +non_paradoxical_subreg_not_p (rtx op) +{ + return GET_CODE (op) == SUBREG + && !paradoxical_subreg_p (op) + && GET_CODE (SUBREG_REG (op)) == NOT; +} + +/* Convert (binop (subreg (not X)) Y) into (binop (not (subreg X)) Y), or + (binop X (subreg (not Y))) into (binop X (not (subreg Y))) to expose + opportunities to combine another binary logical operation with NOT. */ + +static rtx +simplify_with_subreg_not (rtx_code binop, machine_mode mode, rtx op0, rtx op1) +{ + rtx opn = NULL_RTX; + if (non_paradoxical_subreg_not_p (op0)) + opn = op0; + else if (non_paradoxical_subreg_not_p (op1)) + opn = op1; + + if (opn == NULL_RTX) + return NULL_RTX; + + rtx new_subreg = simplify_gen_subreg (mode, + XEXP (SUBREG_REG (opn), 0), + GET_MODE (SUBREG_REG (opn)), + SUBREG_BYTE (opn)); + rtx new_not = simplify_gen_unary (NOT, mode, new_subreg, mode); + if (opn == op0) + return simplify_gen_binary (binop, mode, new_not, op1); + else + return simplify_gen_binary (binop, mode, op0, new_not); +} + /* Subroutine of simplify_binary_operation. Simplify a binary operation CODE with result mode MODE, operating on OP0 and OP1. If OP0 and/or OP1 are constant pool references, TRUEOP0 and TRUEOP1 represent the @@ -3749,6 +3787,10 @@ simplify_context::simplify_binary_operation_1 (rtx_code code, && rtx_equal_p (XEXP (XEXP (op0, 0), 0), op1)) return simplify_gen_binary (IOR, mode, XEXP (op0, 1), op1); + tem = simplify_with_subreg_not (code, mode, op0, op1); + if (tem) + return tem; + tem = simplify_byte_swapping_operation (code, mode, op0, op1); if (tem) return tem; @@ -4017,6 +4059,10 @@ simplify_context::simplify_binary_operation_1 (rtx_code code, && rtx_equal_p (XEXP (XEXP (op0, 0), 0), op1)) return simplify_gen_binary (IOR, mode, XEXP (op0, 1), op1); + tem = simplify_with_subreg_not (code, mode, op0, op1); + if (tem) + return tem; + tem = simplify_byte_swapping_operation (code, mode, op0, op1); if (tem) return tem; @@ -4285,6 +4331,10 @@ simplify_context::simplify_binary_operation_1 (rtx_code code, return simplify_gen_binary (LSHIFTRT, mode, XEXP (op0, 0), XEXP (op0, 1)); } + tem = simplify_with_subreg_not (code, mode, op0, op1); + if (tem) + return tem; + tem = simplify_byte_swapping_operation (code, mode, op0, op1); if (tem) return tem; diff --git a/gcc/testsuite/gcc.target/aarch64/simd/bic_orn_1.c b/gcc/testsuite/gcc.target/aarch64/simd/bic_orn_1.c new file mode 100644 index 00000000000..1c66f21424e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/bic_orn_1.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +#include <arm_neon.h> + +int64x2_t bic_16b (int32x4_t a, int32x4_t b) { + return vandq_s64 (vreinterpretq_s64_s32 (vmvnq_s32 (a)), + vreinterpretq_s64_s32 (b)); +} + +int16x4_t orn_8b (int32x2_t a, int32x2_t b) { + return vorr_s16 (vreinterpret_s16_s32 (a), + vreinterpret_s16_s32 (vmvn_s32 (b))); +} + +/* { dg-final { scan-assembler {\tbic\tv[0-9]+\.16b} } } */ +/* { dg-final { scan-assembler {\torn\tv[0-9]+\.8b} } } */ -- 2.43.0