On Wed, 21 May 2025, Icen Zeyada wrote:
> Generalize existing scalar gimple_fold rules to apply the same
> bitwise comparison simplifications to vector types. Previously, an
> expression like
>
> (x < y) && (x > y)
>
> would fold to `false` if x and y are scalars, but equivalent vector
> comparisons were left untouched. This patch enables folding of
> patterns of the form
>
> (cmp x y) bit_and (cmp x y)
> (cmp x y) bit_ior (cmp x y)
> (cmp x y) bit_xor (cmp x y)
>
> for vector operands as well, ensuring consistent optimization across
> all data types.
>
> PR tree-optimization/119196
>
> gcc/ChangeLog:
>
> * match.pd: Allow scalar optimizations with bitwise AND/OR/XOR to apply
> to vectors.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/aarch64/vector-compare-5.c: Add new test for vector
> compare simplification.
>
> Signed-off-by: Icen Zeyada <[email protected]>
> ---
> gcc/match.pd | 16 ++++-
> .../gcc.target/aarch64/vector-compare-5.c | 67 +++++++++++++++++++
> 2 files changed, 81 insertions(+), 2 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/aarch64/vector-compare-5.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 611f05ef9f9c..7a7df6aeb6c5 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -3635,6 +3635,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> (if ((TREE_CODE (@1) == INTEGER_CST
> && TREE_CODE (@2) == INTEGER_CST)
> || ((INTEGRAL_TYPE_P (TREE_TYPE (@1))
> + || (VECTOR_TYPE_P (TREE_TYPE (@1))
Note this does not verify we are doing a vector compare, our IL
allows vector ==/!= vector to scalar bool compares. The appropriate
test should be VECTOR_BOOLEAN_TYPE_P (type) to check for
a vector compare (to gate an expand_vec_cmp_expr_p check)
and for the bitwise_equal_p guard your change looks OK.
> + && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, code2))
The expand_vec_cmp_expr_p is misplaced - we generate not 'code2'
but a comparison code depending on it, like for
(if (code1 == NE_EXPR
&& code2 == LE_EXPR
&& cmp == 0
&& allbits)
(lt @c0 (convert @1)))
so here you'd want to verify we can to LT_EXPR for the types involved
and the cases which simplify to constant_boolean_node do not need
any such check. Possibly the same issue applies to the cases below,
I did not verify.
Thanks,
Richard.
> || POINTER_TYPE_P (TREE_TYPE (@1)))
> && bitwise_equal_p (@1, @2)))
> (with
> @@ -3712,6 +3714,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> (if ((TREE_CODE (@1) == INTEGER_CST
> && TREE_CODE (@2) == INTEGER_CST)
> || ((INTEGRAL_TYPE_P (TREE_TYPE (@1))
> + || (VECTOR_TYPE_P (TREE_TYPE (@1))
> + && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, code2))
> || POINTER_TYPE_P (TREE_TYPE (@1)))
> && operand_equal_p (@1, @2)))
> (with
> @@ -3762,6 +3766,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> (if ((TREE_CODE (@1) == INTEGER_CST
> && TREE_CODE (@2) == INTEGER_CST)
> || ((INTEGRAL_TYPE_P (TREE_TYPE (@1))
> + || (VECTOR_TYPE_P (TREE_TYPE (@1))
> + && expand_vec_cmp_expr_p (TREE_TYPE (@1), type, code2))
> || POINTER_TYPE_P (TREE_TYPE (@1)))
> && bitwise_equal_p (@1, @2)))
> (with
> @@ -3885,7 +3891,10 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> rcmp (ne le gt ne lt ge)
> (simplify
> (op:c (cmp1:c @0 @1) (cmp2 @0 @1))
> - (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) || POINTER_TYPE_P (TREE_TYPE (@0)))
> + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
> + || POINTER_TYPE_P (TREE_TYPE (@0))
> + || (VECTOR_TYPE_P (TREE_TYPE (@1))
> + && expand_vec_cmp_expr_p (TREE_TYPE (@0), type, rcmp)))
> (rcmp @0 @1)))))
>
> /* Optimize (a CMP b) == (a CMP b) */
> @@ -3894,7 +3903,10 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> rcmp (eq gt le eq ge lt)
> (simplify
> (eq:c (cmp1:c @0 @1) (cmp2 @0 @1))
> - (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) || POINTER_TYPE_P (TREE_TYPE (@0)))
> + (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
> + || POINTER_TYPE_P (TREE_TYPE (@0))
> + || (VECTOR_TYPE_P (TREE_TYPE (@0))
> + && expand_vec_cmp_expr_p (TREE_TYPE (@0), type, rcmp)))
> (rcmp @0 @1))))
>
> /* (type)([0,1]@a != 0) -> (type)a
> diff --git a/gcc/testsuite/gcc.target/aarch64/vector-compare-5.c
> b/gcc/testsuite/gcc.target/aarch64/vector-compare-5.c
> new file mode 100644
> index 000000000000..a1a601dc1958
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/vector-compare-5.c
> @@ -0,0 +1,67 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +/* { dg-additional-options "-fdump-tree-original-all" } */
> +
> +typedef int v4i __attribute__((vector_size(4*sizeof(int))));
> +
> +/* Ensure we can simplify `VEC_COND_EXPR(a OP1 b) OP2 VEC_COND_EXPR(a OP3 b)`
> + * into `VEC_COND_EXPR(a OP4 b)`
> + */
> +
> +void use (v4i const *z);
> +
> +void
> +g (v4i *x, v4i const *y, v4i *z, v4i *t)
> +{
> + *z = *x > *y | *x == *y; // expect >=
> + *t = *x > *y | *x <= *y; // expect true
> +}
> +
> +void
> +h (v4i *x, v4i const *y, v4i *z, v4i *t)
> +{
> + *z = *x <= *y & *x >= *y; // expect x == y
> + *t = *x <= *y & *x != *y; // expect x<y
> +}
> +
> +void
> +i (v4i *x, v4i const *y, v4i *z, v4i *t)
> +{
> + *z = *x == *y | *x != *y; // expect true
> + *t = *x == *y & *x != *y; // expect false
> +}
> +
> +void
> +k (v4i *x, v4i const *y, v4i *z, v4i *t)
> +{
> + *z = *x < *y | *x == *y; // x <= y
> + *t = *x < *y & *x > *y; // expect false
> +}
> +
> +void
> +m (v4i *x, v4i const *y, v4i *z, v4i *t)
> +{
> + *z = *x <= *y ^ *x >= *y; /* expect x != y */
> + *t = *x <= *y ^ *x != *y; /* expect x <= y */
> +}
> +
> +void
> +n (v4i *x, v4i const *y, v4i *z, v4i *t)
> +{
> + *z = *x == *y ^ *x != *y; /* expect true */
> + *t = *x == *y ^ *x == *y; /* expect false */
> +}
> +
> +
> +/* { dg-final { scan-tree-dump
> ".*\\*zD\\.\\d+\\s*=\\s*VEC_COND_EXPR\\s*<\\s*\\*xD\\.\\d+\\s*>=\\s*VIEW_CONVERT_EXPR<v4iD\\.\\d+>\\(\\*yD\\.\\d+\\)\\s*,\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*,\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*>\\s*;"
> "original" } } */
> +/* { dg-final { scan-tree-dump
> ".*\\*tD\\.\\d+\\s*=\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*;" "original" } } */
> +/* { dg-final { scan-tree-dump
> ".*\\*zD\\.\\d+\\s*=\\s*VEC_COND_EXPR\\s*<\\s*\\*xD\\.\\d+\\s*==\\s*VIEW_CONVERT_EXPR<v4iD\\.\\d+>\\(\\*yD\\.\\d+\\)\\s*,\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*,\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*>\\s*;"
> "original" } } */
> +/* { dg-final { scan-tree-dump
> ".*\\*tD\\.\\d+\\s*=\\s*VEC_COND_EXPR\\s*<\\s*\\*xD\\.\\d+\\s*<\\s*VIEW_CONVERT_EXPR<v4iD\\.\\d+>\\(\\*yD\\.\\d+\\)\\s*,\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*,\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*>\\s*;"
> "original" } } */
> +/* { dg-final { scan-tree-dump
> ".*\\*zD\\.\\d+\\s*=\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*;" "original" } } */
> +/* { dg-final { scan-tree-dump
> ".*\\*tD\\.\\d+\\s*=\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*;" "original" } } */
> +/* { dg-final { scan-tree-dump
> ".*\\*zD\\.\\d+\\s*=\\s*VEC_COND_EXPR\\s*<\\s*\\*xD\\.\\d+\\s*<=\\s*VIEW_CONVERT_EXPR<v4iD\\.\\d+>\\(\\*yD\\.\\d+\\)\\s*,\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*,\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*>\\s*;"
> "original" } } */
> +/* { dg-final { scan-tree-dump
> ".*\\*tD\\.\\d+\\s*=\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*;" "original" } } */
> +/* { dg-final { scan-tree-dump
> ".*\\*zD\\.\\d+\\s*=\\s*VEC_COND_EXPR\\s*<\\s*\\*xD\\.\\d+\\s*!=\\s*VIEW_CONVERT_EXPR<v4iD\\.\\d+>\\(\\*yD\\.\\d+\\)\\s*,\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*,\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*>\\s*;"
> "original" } } */
> +/* { dg-final { scan-tree-dump
> ".*\\*tD\\.\\d+\\s*=\\s*VEC_COND_EXPR\\s*<\\s*\\*xD\\.\\d+\\s*>=\\s*VIEW_CONVERT_EXPR<v4iD\\.\\d+>\\(\\*yD\\.\\d+\\)\\s*,\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*,\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*>\\s*;"
> "original" } } */
> +/* { dg-final { scan-tree-dump
> ".*\\*zD\\.\\d+\\s*=\\s*\\{\\s*-1(,\\s*-1){3}\\s*\\}\\s*;" "original" } } */
> +/* { dg-final { scan-tree-dump
> ".*\\*tD\\.\\d+\\s*=\\s*\\{\\s*0(,\\s*0){3}\\s*\\}\\s*;" "original" } } */
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)