[PATCH v2] simplify-rtx: Combine bitwise operations in more cases

Pengfei Li Mon, 28 Apr 2025 03:42:10 -0700

This patch transforms RTL expressions of the form (subreg (not X)) into
(not (subreg X)) if the subreg is an operand of another binary logical
operation. This transformation can expose opportunities to combine more
logical operations.


For example, it improves the codegen of the following AArch64 NEON
intrinsics:
        vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(a)),
                  vreinterpretq_s64_s32(b));
from:
        not     v0.16b, v0.16b
        and     v0.16b, v0.16b, v1.16b
to:
        bic     v0.16b, v1.16b, v0.16b

Regression tested on x86_64-linux-gnu, arm-linux-gnueabihf and
aarch64-linux-gnu.

gcc/ChangeLog:

        * simplify-rtx.cc (non_paradoxical_subreg_not_p): New function
        for pattern match of (subreg (not X)).
        (simplify_with_subreg_not): New function for simplification.
---
 gcc/simplify-rtx.cc                           | 50 +++++++++++++++++++
 .../gcc.target/aarch64/simd/bic_orn_1.c       | 17 +++++++
 2 files changed, 67 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/bic_orn_1.c

diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index 06b52ca8003..5a6c1a9c039 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -3032,6 +3032,44 @@ match_plus_neg_pattern (rtx op0, rtx op1, machine_mode 
mode)
   return false;
 }
 
+/* Check if OP matches the pattern of (subreg (not X)) and the subreg is
+   non-paradoxical.  */
+
+static bool
+non_paradoxical_subreg_not_p (rtx op)
+{
+  return GET_CODE (op) == SUBREG
+        && !paradoxical_subreg_p (op)
+        && GET_CODE (SUBREG_REG (op)) == NOT;
+}
+
+/* Convert (binop (subreg (not X)) Y) into (binop (not (subreg X)) Y), or
+   (binop X (subreg (not Y))) into (binop X (not (subreg Y))) to expose
+   opportunities to combine another binary logical operation with NOT.  */
+
+static rtx
+simplify_with_subreg_not (rtx_code binop, machine_mode mode, rtx op0, rtx op1)
+{
+  rtx opn = NULL_RTX;
+  if (non_paradoxical_subreg_not_p (op0))
+    opn = op0;
+  else if (non_paradoxical_subreg_not_p (op1))
+    opn = op1;
+
+  if (opn == NULL_RTX)
+    return NULL_RTX;
+
+  rtx new_subreg = simplify_gen_subreg (mode,
+                                       XEXP (SUBREG_REG (opn), 0),
+                                       GET_MODE (SUBREG_REG (opn)),
+                                       SUBREG_BYTE (opn));
+  rtx new_not = simplify_gen_unary (NOT, mode, new_subreg, mode);
+  if (opn == op0)
+    return simplify_gen_binary (binop, mode, new_not, op1);
+  else
+    return simplify_gen_binary (binop, mode, op0, new_not);
+}
+
 /* Subroutine of simplify_binary_operation.  Simplify a binary operation
    CODE with result mode MODE, operating on OP0 and OP1.  If OP0 and/or
    OP1 are constant pool references, TRUEOP0 and TRUEOP1 represent the
@@ -3749,6 +3787,10 @@ simplify_context::simplify_binary_operation_1 (rtx_code 
code,
          && rtx_equal_p (XEXP (XEXP (op0, 0), 0), op1))
        return simplify_gen_binary (IOR, mode, XEXP (op0, 1), op1);
 
+      tem = simplify_with_subreg_not (code, mode, op0, op1);
+      if (tem)
+       return tem;
+
       tem = simplify_byte_swapping_operation (code, mode, op0, op1);
       if (tem)
        return tem;
@@ -4017,6 +4059,10 @@ simplify_context::simplify_binary_operation_1 (rtx_code 
code,
          && rtx_equal_p (XEXP (XEXP (op0, 0), 0), op1))
        return simplify_gen_binary (IOR, mode, XEXP (op0, 1), op1);
 
+      tem = simplify_with_subreg_not (code, mode, op0, op1);
+      if (tem)
+       return tem;
+
       tem = simplify_byte_swapping_operation (code, mode, op0, op1);
       if (tem)
        return tem;
@@ -4285,6 +4331,10 @@ simplify_context::simplify_binary_operation_1 (rtx_code 
code,
            return simplify_gen_binary (LSHIFTRT, mode, XEXP (op0, 0), XEXP 
(op0, 1));
        }
 
+      tem = simplify_with_subreg_not (code, mode, op0, op1);
+      if (tem)
+       return tem;
+
       tem = simplify_byte_swapping_operation (code, mode, op0, op1);
       if (tem)
        return tem;
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/bic_orn_1.c 
b/gcc/testsuite/gcc.target/aarch64/simd/bic_orn_1.c
new file mode 100644
index 00000000000..1c66f21424e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/bic_orn_1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <arm_neon.h>
+
+int64x2_t bic_16b (int32x4_t a, int32x4_t b) {
+  return vandq_s64 (vreinterpretq_s64_s32 (vmvnq_s32 (a)),
+                   vreinterpretq_s64_s32 (b));
+}
+
+int16x4_t orn_8b (int32x2_t a, int32x2_t b) {
+  return vorr_s16 (vreinterpret_s16_s32 (a),
+                  vreinterpret_s16_s32 (vmvn_s32 (b)));
+}
+
+/* { dg-final { scan-assembler {\tbic\tv[0-9]+\.16b} } } */
+/* { dg-final { scan-assembler {\torn\tv[0-9]+\.8b} } } */
-- 
2.43.0

[PATCH v2] simplify-rtx: Combine bitwise operations in more cases

Reply via email to