On Thu, 3 Jul 2025, Icen Zeyada wrote: > Generalize existing scalar gimple_fold rules to apply the same > bitwise comparison simplifications to vector types. Previously, an > expression like > > (x < y) && (x > y) > > would fold to `false` if x and y are scalars, but equivalent vector > comparisons were left untouched. This patch enables folding of > patterns of the form > > (cmp x y) bit_and (cmp x y) > (cmp x y) bit_ior (cmp x y) > (cmp x y) bit_xor (cmp x y) > > for vector operands as well, ensuring consistent optimization across > all data types.
This patch looks good to me. Thanks, Richard. > PR tree-optimization/119196 > > gcc/ChangeLog: > > * match.pd: Allow scalar optimizations with bitwise AND/OR/XOR to apply > to vectors. > > gcc/testsuite/ChangeLog: > > * gcc.target/aarch64/vector-compare-5.c: Add new test for vector > compare simplification. > > Signed-off-by: Icen Zeyada <icen.zeya...@arm.com> > --- > gcc/match.pd | 57 +++++++++++++--- > .../gcc.target/aarch64/vector-compare-5.c | 67 +++++++++++++++++++ > 2 files changed, 113 insertions(+), 11 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/aarch64/vector-compare-5.c > > diff --git a/gcc/match.pd b/gcc/match.pd > index 36317b9128f..80c02a0ab02 100644 > --- a/gcc/match.pd > +++ b/gcc/match.pd > @@ -3674,6 +3674,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (if ((TREE_CODE (@1) == INTEGER_CST > && TREE_CODE (@2) == INTEGER_CST) > || ((INTEGRAL_TYPE_P (TREE_TYPE (@1)) > + || (VECTOR_TYPE_P (TREE_TYPE (@1)) > + && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, code2)) > || POINTER_TYPE_P (TREE_TYPE (@1))) > && bitwise_equal_p (@1, @2))) > (with > @@ -3712,27 +3714,39 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (if (code1 == EQ_EXPR && val) @3) > (if (code1 == EQ_EXPR && !val) { constant_boolean_node (false, type); > }) > (if (code1 == NE_EXPR && !val && allbits) @4) > - (if (code1 == NE_EXPR > + (if ((code1 == NE_EXPR > && code2 == GE_EXPR > && cmp == 0 > && allbits) > + && ((VECTOR_BOOLEAN_TYPE_P (type) > + && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, GT_EXPR)) > + || !VECTOR_TYPE_P (TREE_TYPE (@1)))) > (gt @c0 (convert @1))) > - (if (code1 == NE_EXPR > + (if ((code1 == NE_EXPR > && code2 == LE_EXPR > && cmp == 0 > && allbits) > + && ((VECTOR_BOOLEAN_TYPE_P (type) > + && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, LT_EXPR)) > + || !VECTOR_TYPE_P (TREE_TYPE (@1)))) > (lt @c0 (convert @1))) > /* (a != (b+1)) & (a > b) -> a > (b+1) */ > - (if (code1 == NE_EXPR > + (if ((code1 == NE_EXPR > && code2 == GT_EXPR > && one_after > && allbits) > + && ((VECTOR_BOOLEAN_TYPE_P (type) > + && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, GT_EXPR)) > + || !VECTOR_TYPE_P (TREE_TYPE (@1)))) > (gt @c0 (convert @1))) > /* (a != (b-1)) & (a < b) -> a < (b-1) */ > - (if (code1 == NE_EXPR > + (if ((code1 == NE_EXPR > && code2 == LT_EXPR > && one_before > && allbits) > + && ((VECTOR_BOOLEAN_TYPE_P (type) > + && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, LT_EXPR)) > + || !VECTOR_TYPE_P (TREE_TYPE (@1)))) > (lt @c0 (convert @1))) > ) > ) > @@ -3751,6 +3765,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (if ((TREE_CODE (@1) == INTEGER_CST > && TREE_CODE (@2) == INTEGER_CST) > || ((INTEGRAL_TYPE_P (TREE_TYPE (@1)) > + || (VECTOR_TYPE_P (TREE_TYPE (@1)) > + && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, code2)) > || POINTER_TYPE_P (TREE_TYPE (@1))) > && operand_equal_p (@1, @2))) > (with > @@ -3801,6 +3817,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (if ((TREE_CODE (@1) == INTEGER_CST > && TREE_CODE (@2) == INTEGER_CST) > || ((INTEGRAL_TYPE_P (TREE_TYPE (@1)) > + || (VECTOR_TYPE_P (TREE_TYPE (@1))) > || POINTER_TYPE_P (TREE_TYPE (@1))) > && bitwise_equal_p (@1, @2))) > (with > @@ -3842,24 +3859,36 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > (if (code1 == EQ_EXPR > && code2 == GT_EXPR > && cmp == 0 > - && allbits) > + && allbits > + && ((VECTOR_BOOLEAN_TYPE_P (type) > + && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, GE_EXPR)) > + || !VECTOR_TYPE_P (TREE_TYPE (@1)))) > (ge @c0 @2)) > (if (code1 == EQ_EXPR > && code2 == LT_EXPR > && cmp == 0 > - && allbits) > + && allbits > + && ((VECTOR_BOOLEAN_TYPE_P (type) > + && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, LE_EXPR)) > + || !VECTOR_TYPE_P (TREE_TYPE (@1)))) > (le @c0 @2)) > /* (a == (b-1)) | (a >= b) -> a >= (b-1) */ > (if (code1 == EQ_EXPR > && code2 == GE_EXPR > && one_before > - && allbits) > + && allbits > + && ((VECTOR_BOOLEAN_TYPE_P (type) > + && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, GE_EXPR)) > + || !VECTOR_TYPE_P (TREE_TYPE (@1)))) > (ge @c0 (convert @1))) > /* (a == (b+1)) | (a <= b) -> a <= (b-1) */ > (if (code1 == EQ_EXPR > && code2 == LE_EXPR > && one_after > - && allbits) > + && allbits > + && ((VECTOR_BOOLEAN_TYPE_P (type) > + && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, LE_EXPR)) > + || !VECTOR_TYPE_P (TREE_TYPE (@1)))) > (le @c0 (convert @1))) > ) > ) > @@ -3924,7 +3953,10 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > rcmp (ne le gt ne lt ge) > (simplify > (op:c (cmp1:c @0 @1) (cmp2 @0 @1)) > - (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) || POINTER_TYPE_P (TREE_TYPE (@0))) > + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) > + || POINTER_TYPE_P (TREE_TYPE (@0)) > + || (VECTOR_TYPE_P (TREE_TYPE (@1)) > + && expand_vec_cmp_expr_p (TREE_TYPE (@0), type, rcmp))) > (rcmp @0 @1))))) > > /* Optimize (a CMP b) == (a CMP b) */ > @@ -3933,7 +3965,10 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) > rcmp (eq gt le eq ge lt) > (simplify > (eq:c (cmp1:c @0 @1) (cmp2 @0 @1)) > - (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) || POINTER_TYPE_P (TREE_TYPE (@0))) > + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) > + || POINTER_TYPE_P (TREE_TYPE (@0)) > + || (VECTOR_TYPE_P (TREE_TYPE (@0)) > + && expand_vec_cmp_expr_p (TREE_TYPE (@0), type, rcmp))) > (rcmp @0 @1)))) > > /* (type)([0,1]@a != 0) -> (type)a > @@ -11601,4 +11636,4 @@ and, > && VECTOR_TYPE_P (type) > && direct_internal_fn_supported_p (IFN_AVG_CEIL, type, > OPTIMIZE_FOR_BOTH)) > (IFN_AVG_CEIL @0 @2))) > -#endif > +#endif > \ No newline at end of file > diff --git a/gcc/testsuite/gcc.target/aarch64/vector-compare-5.c > b/gcc/testsuite/gcc.target/aarch64/vector-compare-5.c > new file mode 100644 > index 00000000000..a1a601dc195 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/vector-compare-5.c > @@ -0,0 +1,67 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > +/* { dg-additional-options "-fdump-tree-original-all" } */ > + > +typedef int v4i __attribute__((vector_size(4*sizeof(int)))); > + > +/* Ensure we can simplify `VEC_COND_EXPR(a OP1 b) OP2 VEC_COND_EXPR(a OP3 b)` > + * into `VEC_COND_EXPR(a OP4 b)` > + */ > + > +void use (v4i const *z); > + > +void > +g (v4i *x, v4i const *y, v4i *z, v4i *t) > +{ > + *z = *x > *y | *x == *y; // expect >= > + *t = *x > *y | *x <= *y; // expect true > +} > + > +void > +h (v4i *x, v4i const *y, v4i *z, v4i *t) > +{ > + *z = *x <= *y & *x >= *y; // expect x == y > + *t = *x <= *y & *x != *y; // expect x<y > +} > + > +void > +i (v4i *x, v4i const *y, v4i *z, v4i *t) > +{ > + *z = *x == *y | *x != *y; // expect true > + *t = *x == *y & *x != *y; // expect false > +} > + > +void > +k (v4i *x, v4i const *y, v4i *z, v4i *t) > +{ > + *z = *x < *y | *x == *y; // x <= y > + *t = *x < *y & *x > *y; // expect false > +} > + > +void > +m (v4i *x, v4i const *y, v4i *z, v4i *t) > +{ > + *z = *x <= *y ^ *x >= *y; /* expect x != y */ > + *t = *x <= *y ^ *x != *y; /* expect x <= y */ > +} > + > +void > +n (v4i *x, v4i const *y, v4i *z, v4i *t) > +{ > + *z = *x == *y ^ *x != *y; /* expect true */ > + *t = *x == *y ^ *x == *y; /* expect false */ > +} > + > + > +/* { dg-final { scan-tree-dump > ".*\\*zD\\.\\d+\\s*=\\s*VEC_COND_EXPR\\s*<\\s*\\*xD\\.\\d+\\s*>=\\s*VIEW_CONVERT_EXPR<v4iD\\.\\d+>\\(\\*yD\\.\\d+\\)\\s*,\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*,\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*>\\s*;" > "original" } } */ > +/* { dg-final { scan-tree-dump > ".*\\*tD\\.\\d+\\s*=\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*;" "original" } } */ > +/* { dg-final { scan-tree-dump > ".*\\*zD\\.\\d+\\s*=\\s*VEC_COND_EXPR\\s*<\\s*\\*xD\\.\\d+\\s*==\\s*VIEW_CONVERT_EXPR<v4iD\\.\\d+>\\(\\*yD\\.\\d+\\)\\s*,\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*,\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*>\\s*;" > "original" } } */ > +/* { dg-final { scan-tree-dump > ".*\\*tD\\.\\d+\\s*=\\s*VEC_COND_EXPR\\s*<\\s*\\*xD\\.\\d+\\s*<\\s*VIEW_CONVERT_EXPR<v4iD\\.\\d+>\\(\\*yD\\.\\d+\\)\\s*,\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*,\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*>\\s*;" > "original" } } */ > +/* { dg-final { scan-tree-dump > ".*\\*zD\\.\\d+\\s*=\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*;" "original" } } */ > +/* { dg-final { scan-tree-dump > ".*\\*tD\\.\\d+\\s*=\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*;" "original" } } */ > +/* { dg-final { scan-tree-dump > ".*\\*zD\\.\\d+\\s*=\\s*VEC_COND_EXPR\\s*<\\s*\\*xD\\.\\d+\\s*<=\\s*VIEW_CONVERT_EXPR<v4iD\\.\\d+>\\(\\*yD\\.\\d+\\)\\s*,\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*,\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*>\\s*;" > "original" } } */ > +/* { dg-final { scan-tree-dump > ".*\\*tD\\.\\d+\\s*=\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*;" "original" } } */ > +/* { dg-final { scan-tree-dump > ".*\\*zD\\.\\d+\\s*=\\s*VEC_COND_EXPR\\s*<\\s*\\*xD\\.\\d+\\s*!=\\s*VIEW_CONVERT_EXPR<v4iD\\.\\d+>\\(\\*yD\\.\\d+\\)\\s*,\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*,\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*>\\s*;" > "original" } } */ > +/* { dg-final { scan-tree-dump > ".*\\*tD\\.\\d+\\s*=\\s*VEC_COND_EXPR\\s*<\\s*\\*xD\\.\\d+\\s*>=\\s*VIEW_CONVERT_EXPR<v4iD\\.\\d+>\\(\\*yD\\.\\d+\\)\\s*,\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*,\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*>\\s*;" > "original" } } */ > +/* { dg-final { scan-tree-dump > ".*\\*zD\\.\\d+\\s*=\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*;" "original" } } */ > +/* { dg-final { scan-tree-dump > ".*\\*tD\\.\\d+\\s*=\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*;" "original" } } */ > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)