This patch transforms RTL expressions of the form (subreg (not X) off)
into (not (subreg X off)) when the subreg is an operand of a bitwise AND
or OR. This transformation can expose opportunities to combine a NOT
operation with the bitwise AND/OR.
For example, it improves the codegen of the following AArch64 NEON
intrinsics:
vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(a)),
vreinterpretq_s64_s32(b));
from:
not v0.16b, v0.16b
and v0.16b, v0.16b, v1.16b
to:
bic v0.16b, v1.16b, v0.16b
Regression tested on x86_64-linux-gnu, arm-linux-gnueabihf and
aarch64-linux-gnu.
gcc/ChangeLog:
* simplify-rtx.cc (simplify_context::simplify_binary_operation_1):
Add RTX simplification for bitwise AND/OR.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/simd/bic_orn_1.c: New test.
---
gcc/simplify-rtx.cc | 24 +++++++++++++++++++
.../gcc.target/aarch64/simd/bic_orn_1.c | 17 +++++++++++++
2 files changed, 41 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/bic_orn_1.c
diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index 88d31a71c05..ed620ef5d45 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -3738,6 +3738,18 @@ simplify_context::simplify_binary_operation_1 (rtx_code
code,
&& rtx_equal_p (XEXP (XEXP (op0, 0), 0), op1))
return simplify_gen_binary (IOR, mode, XEXP (op0, 1), op1);
+ /* Convert (ior (subreg (not X) off) Y) into (ior (not (subreg X off)) Y)
+ to expose opportunities to combine IOR and NOT. */
+ if (GET_CODE (op0) == SUBREG
+ && GET_CODE (SUBREG_REG (op0)) == NOT)
+ {
+ rtx new_subreg = gen_rtx_SUBREG (mode,
+ XEXP (SUBREG_REG (op0), 0),
+ SUBREG_BYTE (op0));
+ rtx new_not = simplify_gen_unary (NOT, mode, new_subreg, mode);
+ return simplify_gen_binary (IOR, mode, new_not, op1);
+ }
+
tem = simplify_byte_swapping_operation (code, mode, op0, op1);
if (tem)
return tem;
@@ -4274,6 +4286,18 @@ simplify_context::simplify_binary_operation_1 (rtx_code
code,
return simplify_gen_binary (LSHIFTRT, mode, XEXP (op0, 0), XEXP
(op0, 1));
}
+ /* Convert (and (subreg (not X) off) Y) into (and (not (subreg X off)) Y)
+ to expose opportunities to combine AND and NOT. */
+ if (GET_CODE (op0) == SUBREG
+ && GET_CODE (SUBREG_REG (op0)) == NOT)
+ {
+ rtx new_subreg = gen_rtx_SUBREG (mode,
+ XEXP (SUBREG_REG (op0), 0),
+ SUBREG_BYTE (op0));
+ rtx new_not = simplify_gen_unary (NOT, mode, new_subreg, mode);
+ return simplify_gen_binary (AND, mode, new_not, op1);
+ }
+
tem = simplify_byte_swapping_operation (code, mode, op0, op1);
if (tem)
return tem;
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/bic_orn_1.c
b/gcc/testsuite/gcc.target/aarch64/simd/bic_orn_1.c
new file mode 100644
index 00000000000..1c66f21424e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/bic_orn_1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <arm_neon.h>
+
+int64x2_t bic_16b (int32x4_t a, int32x4_t b) {
+ return vandq_s64 (vreinterpretq_s64_s32 (vmvnq_s32 (a)),
+ vreinterpretq_s64_s32 (b));
+}
+
+int16x4_t orn_8b (int32x2_t a, int32x2_t b) {
+ return vorr_s16 (vreinterpret_s16_s32 (a),
+ vreinterpret_s16_s32 (vmvn_s32 (b)));
+}
+
+/* { dg-final { scan-assembler {\tbic\tv[0-9]+\.16b} } } */
+/* { dg-final { scan-assembler {\torn\tv[0-9]+\.8b} } } */
--
2.43.0