Generalize existing scalar gimple_fold rules to apply the same
bitwise comparison simplifications to vector types. Previously, an
expression like
(x < y) && (x > y)
would fold to `false` if x and y are scalars, but equivalent vector
comparisons were left untouched. This patch enables folding of
patterns of the form
(cmp x y) bit_and (cmp x y)
(cmp x y) bit_ior (cmp x y)
(cmp x y) bit_xor (cmp x y)
for vector operands as well, ensuring consistent optimization across
all data types.
PR tree-optimization/119196
gcc/ChangeLog:
* match.pd: Allow scalar optimizations with bitwise AND/OR/XOR to apply
to vectors.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/vector-compare-5.c: Add new test for vector compare
simplification.
Signed-off-by: Icen Zeyada <[email protected]>
---
gcc/match.pd | 16 ++++-
.../gcc.target/aarch64/vector-compare-5.c | 67 +++++++++++++++++++
2 files changed, 81 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/aarch64/vector-compare-5.c
diff --git a/gcc/match.pd b/gcc/match.pd
index 611f05ef9f9c..7a7df6aeb6c5 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3635,6 +3635,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(if ((TREE_CODE (@1) == INTEGER_CST
&& TREE_CODE (@2) == INTEGER_CST)
|| ((INTEGRAL_TYPE_P (TREE_TYPE (@1))
+ || (VECTOR_TYPE_P (TREE_TYPE (@1))
+ && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, code2))
|| POINTER_TYPE_P (TREE_TYPE (@1)))
&& bitwise_equal_p (@1, @2)))
(with
@@ -3712,6 +3714,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(if ((TREE_CODE (@1) == INTEGER_CST
&& TREE_CODE (@2) == INTEGER_CST)
|| ((INTEGRAL_TYPE_P (TREE_TYPE (@1))
+ || (VECTOR_TYPE_P (TREE_TYPE (@1))
+ && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, code2))
|| POINTER_TYPE_P (TREE_TYPE (@1)))
&& operand_equal_p (@1, @2)))
(with
@@ -3762,6 +3766,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(if ((TREE_CODE (@1) == INTEGER_CST
&& TREE_CODE (@2) == INTEGER_CST)
|| ((INTEGRAL_TYPE_P (TREE_TYPE (@1))
+ || (VECTOR_TYPE_P (TREE_TYPE (@1))
+ && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, code2))
|| POINTER_TYPE_P (TREE_TYPE (@1)))
&& bitwise_equal_p (@1, @2)))
(with
@@ -3885,7 +3891,10 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
rcmp (ne le gt ne lt ge)
(simplify
(op:c (cmp1:c @0 @1) (cmp2 @0 @1))
- (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) || POINTER_TYPE_P (TREE_TYPE (@0)))
+ (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ || POINTER_TYPE_P (TREE_TYPE (@0))
+ || (VECTOR_TYPE_P (TREE_TYPE (@1))
+ && expand_vec_cmp_expr_p (TREE_TYPE (@0), type, rcmp)))
(rcmp @0 @1)))))
/* Optimize (a CMP b) == (a CMP b) */
@@ -3894,7 +3903,10 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
rcmp (eq gt le eq ge lt)
(simplify
(eq:c (cmp1:c @0 @1) (cmp2 @0 @1))
- (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) || POINTER_TYPE_P (TREE_TYPE (@0)))
+ (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ || POINTER_TYPE_P (TREE_TYPE (@0))
+ || (VECTOR_TYPE_P (TREE_TYPE (@0))
+ && expand_vec_cmp_expr_p (TREE_TYPE (@0), type, rcmp)))
(rcmp @0 @1))))
/* (type)([0,1]@a != 0) -> (type)a
diff --git a/gcc/testsuite/gcc.target/aarch64/vector-compare-5.c
b/gcc/testsuite/gcc.target/aarch64/vector-compare-5.c
new file mode 100644
index 000000000000..a1a601dc1958
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vector-compare-5.c
@@ -0,0 +1,67 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-additional-options "-fdump-tree-original-all" } */
+
+typedef int v4i __attribute__((vector_size(4*sizeof(int))));
+
+/* Ensure we can simplify `VEC_COND_EXPR(a OP1 b) OP2 VEC_COND_EXPR(a OP3 b)`
+ * into `VEC_COND_EXPR(a OP4 b)`
+ */
+
+void use (v4i const *z);
+
+void
+g (v4i *x, v4i const *y, v4i *z, v4i *t)
+{
+ *z = *x > *y | *x == *y; // expect >=
+ *t = *x > *y | *x <= *y; // expect true
+}
+
+void
+h (v4i *x, v4i const *y, v4i *z, v4i *t)
+{
+ *z = *x <= *y & *x >= *y; // expect x == y
+ *t = *x <= *y & *x != *y; // expect x<y
+}
+
+void
+i (v4i *x, v4i const *y, v4i *z, v4i *t)
+{
+ *z = *x == *y | *x != *y; // expect true
+ *t = *x == *y & *x != *y; // expect false
+}
+
+void
+k (v4i *x, v4i const *y, v4i *z, v4i *t)
+{
+ *z = *x < *y | *x == *y; // x <= y
+ *t = *x < *y & *x > *y; // expect false
+}
+
+void
+m (v4i *x, v4i const *y, v4i *z, v4i *t)
+{
+ *z = *x <= *y ^ *x >= *y; /* expect x != y */
+ *t = *x <= *y ^ *x != *y; /* expect x <= y */
+}
+
+void
+n (v4i *x, v4i const *y, v4i *z, v4i *t)
+{
+ *z = *x == *y ^ *x != *y; /* expect true */
+ *t = *x == *y ^ *x == *y; /* expect false */
+}
+
+
+/* { dg-final { scan-tree-dump
".*\\*zD\\.\\d+\\s*=\\s*VEC_COND_EXPR\\s*<\\s*\\*xD\\.\\d+\\s*>=\\s*VIEW_CONVERT_EXPR<v4iD\\.\\d+>\\(\\*yD\\.\\d+\\)\\s*,\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*,\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*>\\s*;"
"original" } } */
+/* { dg-final { scan-tree-dump
".*\\*tD\\.\\d+\\s*=\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*;" "original" } } */
+/* { dg-final { scan-tree-dump
".*\\*zD\\.\\d+\\s*=\\s*VEC_COND_EXPR\\s*<\\s*\\*xD\\.\\d+\\s*==\\s*VIEW_CONVERT_EXPR<v4iD\\.\\d+>\\(\\*yD\\.\\d+\\)\\s*,\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*,\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*>\\s*;"
"original" } } */
+/* { dg-final { scan-tree-dump
".*\\*tD\\.\\d+\\s*=\\s*VEC_COND_EXPR\\s*<\\s*\\*xD\\.\\d+\\s*<\\s*VIEW_CONVERT_EXPR<v4iD\\.\\d+>\\(\\*yD\\.\\d+\\)\\s*,\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*,\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*>\\s*;"
"original" } } */
+/* { dg-final { scan-tree-dump
".*\\*zD\\.\\d+\\s*=\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*;" "original" } } */
+/* { dg-final { scan-tree-dump
".*\\*tD\\.\\d+\\s*=\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*;" "original" } } */
+/* { dg-final { scan-tree-dump
".*\\*zD\\.\\d+\\s*=\\s*VEC_COND_EXPR\\s*<\\s*\\*xD\\.\\d+\\s*<=\\s*VIEW_CONVERT_EXPR<v4iD\\.\\d+>\\(\\*yD\\.\\d+\\)\\s*,\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*,\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*>\\s*;"
"original" } } */
+/* { dg-final { scan-tree-dump
".*\\*tD\\.\\d+\\s*=\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*;" "original" } } */
+/* { dg-final { scan-tree-dump
".*\\*zD\\.\\d+\\s*=\\s*VEC_COND_EXPR\\s*<\\s*\\*xD\\.\\d+\\s*!=\\s*VIEW_CONVERT_EXPR<v4iD\\.\\d+>\\(\\*yD\\.\\d+\\)\\s*,\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*,\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*>\\s*;"
"original" } } */
+/* { dg-final { scan-tree-dump
".*\\*tD\\.\\d+\\s*=\\s*VEC_COND_EXPR\\s*<\\s*\\*xD\\.\\d+\\s*>=\\s*VIEW_CONVERT_EXPR<v4iD\\.\\d+>\\(\\*yD\\.\\d+\\)\\s*,\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*,\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*>\\s*;"
"original" } } */
+/* { dg-final { scan-tree-dump
".*\\*zD\\.\\d+\\s*=\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*;" "original" } } */
+/* { dg-final { scan-tree-dump
".*\\*tD\\.\\d+\\s*=\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*;" "original" } } */
--
2.43.0