To better create rtl directly from gimple, we can use
these already internal functions from the gimple.
That is simplify `a & ~b` into BIT_ANDN.
Likewise `a | ~b` into BIT_IORN.
We only want to do this late after vectorization as some
targets (e.g. aarch64 SVE) has BIT_IORN on scalars but not on
some vector modes; even though the vectorizer could expand it back.
Note a few testcases need to be changed to not look
into optimized dump and catch them earlier.
The modified testcases could catch BIT_ANDN and BIT_IORN so move the
testing to forwprop2 before simplification happens.
Built and tested on aarch64-linux-gnu with no regressions.
PR target/115086
gcc/ChangeLog:
* match.pd (`a & ~b`, `a | ~b`): New pattern.
(BIT_ANDN/BIT_IORN with CST): New pattern.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/bic-cst-1.c: New test.
* gcc.target/aarch64/bic_simd-1.c: New test.
* gcc.dg/tree-ssa/bitops-1.c: Move testing from optimized to forwprop2.
* gcc.dg/tree-ssa/bitops-6.c: Likewise.
* gcc.dg/tree-ssa/cmpbit-4.c: Likewise.
* gcc.dg/tree-ssa/pr110637-2.c: Likewise.
* gcc.dg/tree-ssa/pr94880.c: Likewise.
* gcc.dg/tree-ssa/pr96671-1.c: Likewise.
Signed-off-by: Andrew Pinski <[email protected]>
---
gcc/match.pd | 17 ++++++++++
gcc/testsuite/gcc.dg/tree-ssa/bitops-1.c | 10 +++---
gcc/testsuite/gcc.dg/tree-ssa/bitops-6.c | 12 +++----
gcc/testsuite/gcc.dg/tree-ssa/bitops-8.c | 8 ++---
gcc/testsuite/gcc.dg/tree-ssa/cmpbit-4.c | 12 +++----
gcc/testsuite/gcc.dg/tree-ssa/pr110637-2.c | 8 ++---
gcc/testsuite/gcc.dg/tree-ssa/pr94880.c | 6 ++--
gcc/testsuite/gcc.dg/tree-ssa/pr96671-1.c | 8 ++---
gcc/testsuite/gcc.target/aarch64/bic-cst-1.c | 31 ++++++++++++++++++
gcc/testsuite/gcc.target/aarch64/bic_simd-1.c | 32 +++++++++++++++++++
10 files changed, 112 insertions(+), 32 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/aarch64/bic-cst-1.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/bic_simd-1.c
diff --git a/gcc/match.pd b/gcc/match.pd
index cf359b0ec0f..56f631dfeec 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -9979,6 +9979,23 @@ and,
(cond_op:s @1 @2 @3 @4 @5) @5)
(cond_op (bit_and @1 @0) @2 @3 @4 @5)))
+#if GIMPLE
+/* Create bit_andc and bit_iorc internal functions. */
+(for bitop (bit_and bit_ior)
+ bitopc (IFN_BIT_ANDN IFN_BIT_IORN)
+ (simplify
+ (bitop:c (bit_not:s @0) @1)
+ (if (canonicalize_math_after_vectorization_p ()
+ && direct_internal_fn_supported_p (as_internal_fn (bitopc),
+ type, OPTIMIZE_FOR_BOTH))
+ (bitopc @1 @0)))
+ /* If the second operand is a constant, then reduce it to a & ~cst if
+ the not simplifies. */
+ (simplify
+ (bitopc @0 CONSTANT_CLASS_P@1)
+ (bitop (bit_not! @1) @0)))
+#endif
+
/* For pointers @0 and @2 and nonnegative constant offset @1, look for
expressions like:
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-1.c
b/gcc/testsuite/gcc.dg/tree-ssa/bitops-1.c
index cf2823deb62..3a394b1f188 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/bitops-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-1.c
@@ -1,5 +1,5 @@
/* { dg-do run } */
-/* { dg-options "-O -fdump-tree-optimized-raw" } */
+/* { dg-options "-O -fdump-tree-forwprop2-raw" } */
#define DECLS(n,VOL) \
__attribute__((noinline,noclone)) \
@@ -66,7 +66,7 @@ int main(){
}
}
-/* { dg-final { scan-tree-dump-times "bit_not_expr" 12 "optimized"} } */
-/* { dg-final { scan-tree-dump-times "bit_and_expr" 9 "optimized"} } */
-/* { dg-final { scan-tree-dump-times "bit_ior_expr" 10 "optimized"} } */
-/* { dg-final { scan-tree-dump-times "bit_xor_expr" 9 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "bit_not_expr, " 12 "forwprop2"} } */
+/* { dg-final { scan-tree-dump-times "bit_and_expr, " 9 "forwprop2"} } */
+/* { dg-final { scan-tree-dump-times "bit_ior_expr, " 10 "forwprop2"} } */
+/* { dg-final { scan-tree-dump-times "bit_xor_expr, " 9 "forwprop2"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-6.c
b/gcc/testsuite/gcc.dg/tree-ssa/bitops-6.c
index e6ab2fd6c71..e08132e2ab5 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/bitops-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-6.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
+/* { dg-options "-O2 -fdump-tree-forwprop2-raw" } */
/* PR tree-optimization/111282 */
@@ -25,9 +25,9 @@ int fcmp(int x, int y)
return a & (b ^ !a); // (x == 2) & (y == 1)
}
-/* { dg-final { scan-tree-dump-not "bit_xor_expr, " "optimized" } } */
-/* { dg-final { scan-tree-dump-times "bit_and_expr, " 4 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "bit_not_expr, " 1 "optimized" } } */
-/* { dg-final { scan-tree-dump-not "ne_expr, " "optimized" } } */
-/* { dg-final { scan-tree-dump-times "eq_expr, " 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-not "bit_xor_expr, " "forwprop2" } } */
+/* { dg-final { scan-tree-dump-times "bit_and_expr, " 4 "forwprop2" } } */
+/* { dg-final { scan-tree-dump-times "bit_not_expr, " 1 "forwprop2" } } */
+/* { dg-final { scan-tree-dump-not "ne_expr, " "forwprop2" } } */
+/* { dg-final { scan-tree-dump-times "eq_expr, " 2 "forwprop2" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bitops-8.c
b/gcc/testsuite/gcc.dg/tree-ssa/bitops-8.c
index 40f756e4455..52c2f394222 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/bitops-8.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/bitops-8.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
+/* { dg-options "-O2 -fdump-tree-forwprop2-raw" } */
/* PR tree-optimization/115224 */
int f1(int a, int b)
@@ -9,7 +9,7 @@ int f1(int a, int b)
return c | (a ^ b);
// ~((a ^ 1) & b) or (a ^ -2) | ~b
}
-/* { dg-final { scan-tree-dump-times "bit_xor_expr, " 1 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "bit_ior_expr, " 1 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "bit_not_expr, " 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "bit_xor_expr, " 1 "forwprop2" } } */
+/* { dg-final { scan-tree-dump-times "bit_ior_expr, " 1 "forwprop2" } } */
+/* { dg-final { scan-tree-dump-times "bit_not_expr, " 1 "forwprop2" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-4.c
b/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-4.c
index cdba5d623af..627dcc57cc7 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-4.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cmpbit-4.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-optimized-raw" } */
+/* { dg-options "-O2 -fdump-tree-forwprop2-raw" } */
int g(int x, int y)
{
@@ -40,8 +40,8 @@ _Bool gbi0(int a, int b)
}
/* All of these should be optimized to `x & y` or `~x & y` */
-/* { dg-final { scan-tree-dump-times "le_expr, " 3 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "gt_expr, " 1 "optimized" } } */
-/* { dg-final { scan-tree-dump-not "bit_xor_expr, " "optimized" } } */
-/* { dg-final { scan-tree-dump-times "bit_and_expr, " 6 "optimized" } } */
-/* { dg-final { scan-tree-dump-times "bit_not_expr, " 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "le_expr, " 3 "forwprop2" } } */
+/* { dg-final { scan-tree-dump-times "gt_expr, " 1 "forwprop2" } } */
+/* { dg-final { scan-tree-dump-not "bit_xor_expr, " "forwprop2" } } */
+/* { dg-final { scan-tree-dump-times "bit_and_expr, " 6 "forwprop2" } } */
+/* { dg-final { scan-tree-dump-times "bit_not_expr, " 2 "forwprop2" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr110637-2.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr110637-2.c
index f1c5b90353a..81d6a092508 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr110637-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr110637-2.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O1 -fdump-tree-optimized" } */
+/* { dg-options "-O1 -fdump-tree-forwprop2" } */
int f(int a)
{
int b = a & 1;
@@ -8,6 +8,6 @@ int f(int a)
}
/* This should be optimized to just return `(a&1) ^ 1` or `(~a) & 1`. */
-/* { dg-final { scan-tree-dump-not " == " "optimized"} } */
-/* { dg-final { scan-tree-dump-times "~a" 1 "optimized"} } */
-/* { dg-final { scan-tree-dump-times " & 1" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-not " == " "forwprop2"} } */
+/* { dg-final { scan-tree-dump-times "~a" 1 "forwprop2"} } */
+/* { dg-final { scan-tree-dump-times " & 1" 1 "forwprop2"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr94880.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr94880.c
index f7216618147..72a14b915a5 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr94880.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr94880.c
@@ -1,8 +1,8 @@
/* PR tree-optimization/94786 */
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-optimized" } */
-/* { dg-final { scan-tree-dump-times "= ~\[xy\]_" 4 "optimized" } } */
-/* { dg-final { scan-tree-dump-times " & \[xy\]_" 4 "optimized" } } */
+/* { dg-options "-O2 -fdump-tree-forwprop2" } */
+/* { dg-final { scan-tree-dump-times "= ~\[xy\]_" 4 "forwprop2" } } */
+/* { dg-final { scan-tree-dump-times " & \[xy\]_" 4 "forwprop2" } } */
unsigned
foo_u(unsigned x, unsigned y)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96671-1.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr96671-1.c
index 42c5b27b53f..cf977b55cc2 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr96671-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96671-1.c
@@ -1,9 +1,9 @@
/* PR tree-optimization/96671 */
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-optimized" } */
-/* { dg-final { scan-tree-dump-times " \\^ " 6 "optimized" } } */
-/* { dg-final { scan-tree-dump-times " ~" 6 "optimized" } } */
-/* { dg-final { scan-tree-dump-times " & " 6 "optimized" } } */
+/* { dg-options "-O2 -fdump-tree-forwprop2" } */
+/* { dg-final { scan-tree-dump-times " \\^ " 6 "forwprop2" } } */
+/* { dg-final { scan-tree-dump-times " ~" 6 "forwprop2" } } */
+/* { dg-final { scan-tree-dump-times " & " 6 "forwprop2" } } */
int
foo (int a, int b, int c)
diff --git a/gcc/testsuite/gcc.target/aarch64/bic-cst-1.c
b/gcc/testsuite/gcc.target/aarch64/bic-cst-1.c
new file mode 100644
index 00000000000..1c25de3ba84
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bic-cst-1.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+/**
+**bar1:
+** mov w([0-9]+), 4
+** bic w0, w\1, w1
+** ret
+*/
+int bar1(int a, int c)
+{
+ int b = 4 & ~c;
+ return b;
+}
+
+/**
+**foo1:
+** mov w([0-9]+), 4
+** orn w0, w\1, w1
+** ret
+*/
+int foo1(int a, int c)
+{
+ int b = 4 | ~c;
+ return b;
+}
+
+/* { dg-final { scan-tree-dump ".BIT_ANDN " "optimized" } } */
+/* { dg-final { scan-tree-dump ".BIT_IORN " "optimized" } } */
+
diff --git a/gcc/testsuite/gcc.target/aarch64/bic_simd-1.c
b/gcc/testsuite/gcc.target/aarch64/bic_simd-1.c
new file mode 100644
index 00000000000..e2a69272456
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bic_simd-1.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+/**
+**bar1:
+** movi v([0-9]+).2s, 0x4
+** bic v0.8b, v\1.8b, v1.8b
+** ret
+*/
+#define vect8 __attribute__((vector_size(8)))
+vect8 int bar1(vect8 int a, vect8 int c)
+{
+ vect8 int b = 4 & ~c;
+ return b;
+}
+
+/**
+**foo1:
+** movi v([0-9]+).2s, 0x4
+** orn v0.8b, v\1.8b, v1.8b
+** ret
+*/
+vect8 int foo1(vect8 int a, vect8 int c)
+{
+ vect8 int b = 4 | ~c;
+ return b;
+}
+
+/* { dg-final { scan-tree-dump ".BIT_ANDN " "optimized" } } */
+/* { dg-final { scan-tree-dump ".BIT_IORN " "optimized" } } */
+
--
2.43.0