This patch transforms RTL expressions of the form (subreg (not X) off)
into (not (subreg X off)) when the subreg is an operand of a bitwise AND
or OR. This transformation can expose opportunities to combine a NOT
operation with the bitwise AND/OR.

For example, it improves the codegen of the following AArch64 NEON
intrinsics:
        vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(a)),
                  vreinterpretq_s64_s32(b));
from:
        not     v0.16b, v0.16b
        and     v0.16b, v0.16b, v1.16b
to:
        bic     v0.16b, v1.16b, v0.16b

Regression tested on x86_64-linux-gnu, arm-linux-gnueabihf and
aarch64-linux-gnu.

gcc/ChangeLog:

        * simplify-rtx.cc (simplify_context::simplify_binary_operation_1):
          Add RTX simplification for bitwise AND/OR.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/simd/bic_orn_1.c: New test.
---
 gcc/simplify-rtx.cc                           | 24 +++++++++++++++++++
 .../gcc.target/aarch64/simd/bic_orn_1.c       | 17 +++++++++++++
 2 files changed, 41 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/bic_orn_1.c

diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index 88d31a71c05..ed620ef5d45 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -3738,6 +3738,18 @@ simplify_context::simplify_binary_operation_1 (rtx_code 
code,
          && rtx_equal_p (XEXP (XEXP (op0, 0), 0), op1))
        return simplify_gen_binary (IOR, mode, XEXP (op0, 1), op1);
 
+      /* Convert (ior (subreg (not X) off) Y) into (ior (not (subreg X off)) Y)
+        to expose opportunities to combine IOR and NOT.  */
+      if (GET_CODE (op0) == SUBREG
+         && GET_CODE (SUBREG_REG (op0)) == NOT)
+       {
+         rtx new_subreg = gen_rtx_SUBREG (mode,
+                                          XEXP (SUBREG_REG (op0), 0),
+                                          SUBREG_BYTE (op0));
+         rtx new_not = simplify_gen_unary (NOT, mode, new_subreg, mode);
+         return simplify_gen_binary (IOR, mode, new_not, op1);
+       }
+
       tem = simplify_byte_swapping_operation (code, mode, op0, op1);
       if (tem)
        return tem;
@@ -4274,6 +4286,18 @@ simplify_context::simplify_binary_operation_1 (rtx_code 
code,
            return simplify_gen_binary (LSHIFTRT, mode, XEXP (op0, 0), XEXP 
(op0, 1));
        }
 
+      /* Convert (and (subreg (not X) off) Y) into (and (not (subreg X off)) Y)
+        to expose opportunities to combine AND and NOT.  */
+      if (GET_CODE (op0) == SUBREG
+         && GET_CODE (SUBREG_REG (op0)) == NOT)
+       {
+         rtx new_subreg = gen_rtx_SUBREG (mode,
+                                          XEXP (SUBREG_REG (op0), 0),
+                                          SUBREG_BYTE (op0));
+         rtx new_not = simplify_gen_unary (NOT, mode, new_subreg, mode);
+         return simplify_gen_binary (AND, mode, new_not, op1);
+       }
+
       tem = simplify_byte_swapping_operation (code, mode, op0, op1);
       if (tem)
        return tem;
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/bic_orn_1.c 
b/gcc/testsuite/gcc.target/aarch64/simd/bic_orn_1.c
new file mode 100644
index 00000000000..1c66f21424e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/bic_orn_1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <arm_neon.h>
+
+int64x2_t bic_16b (int32x4_t a, int32x4_t b) {
+  return vandq_s64 (vreinterpretq_s64_s32 (vmvnq_s32 (a)),
+                   vreinterpretq_s64_s32 (b));
+}
+
+int16x4_t orn_8b (int32x2_t a, int32x2_t b) {
+  return vorr_s16 (vreinterpret_s16_s32 (a),
+                  vreinterpret_s16_s32 (vmvn_s32 (b)));
+}
+
+/* { dg-final { scan-assembler {\tbic\tv[0-9]+\.16b} } } */
+/* { dg-final { scan-assembler {\torn\tv[0-9]+\.8b} } } */
-- 
2.43.0

Reply via email to