On Tue, Dec 19, 2023 at 6:39 AM liuhongt <[email protected]> wrote:
>
> Similar for A < B ? B : A to MAX_EXPR.
> There're codes in the frontend to optimize such pattern but failed to
> handle testcase in the PR since it's exposed at gimple level when
> folding backend builtins.
>
> pr95906 now can be optimized to MAX_EXPR as it's commented in the
> testcase.
>
> // FIXME: this should further optimize to a MAX_EXPR
> typedef signed char v16i8 __attribute__((vector_size(16)));
> v16i8 f(v16i8 a, v16i8 b)
>
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk? (or maybe wait for GCC 15).
I wonder if you can amend the existing patterns instead by iterating
over cond/vec_cond. There are quite some (look for uses of
minmax_from_comparison) that could be adapted to vectors.
The ones matching the simple form you match are
#if GIMPLE
/* A >= B ? A : B -> max (A, B) and friends. The code is still
in fold_cond_expr_with_comparison for GENERIC folding with
some extra constraints. */
(for cmp (eq ne le lt unle unlt ge gt unge ungt uneq ltgt)
(simplify
(cond (cmp:c (nop_convert1?@c0 @0) (nop_convert2?@c1 @1))
(convert3? @0) (convert4? @1))
(if (!HONOR_SIGNED_ZEROS (type)
...
I think. Consider at least placing the new patterns next to that.
> gcc/ChangeLog:
>
> PR target/104401
> * match.pd (A < B ? A : B -> MIN_EXPR): New patten match.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/pr104401.c: New test.
> * gcc.dg/tree-ssa/pr95906.c: Adjust testcase.
> ---
> gcc/match.pd | 20 ++++++++++++++++++
> gcc/testsuite/gcc.dg/tree-ssa/pr95906.c | 3 +--
> gcc/testsuite/gcc.target/i386/pr104401.c | 27 ++++++++++++++++++++++++
> 3 files changed, 48 insertions(+), 2 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/i386/pr104401.c
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index d57e29bfe1d..9584a70aa3d 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -5263,6 +5263,26 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> (view_convert:type
> (vec_cond @4 (view_convert:vtype @2) (view_convert:vtype @3)))))))
>
> +/* Optimize A < B ? A : B to MIN (A, B)
> + A > B ? A : B to MAX (A, B). */
> +(for cmp (lt le gt ge)
> + minmax (min min max max)
> + MINMAX (MIN_EXPR MIN_EXPR MAX_EXPR MAX_EXPR)
> + (simplify
> + (vec_cond (cmp @0 @1) @0 @1)
> + (if (VECTOR_INTEGER_TYPE_P (type)
> + && target_supports_op_p (type, MINMAX, optab_vector))
> + (minmax @0 @1))))
> +
> +(for cmp (lt le gt ge)
> + minmax (max max min min)
> + MINMAX (MAX_EXPR MAX_EXPR MIN_EXPR MIN_EXPR)
> + (simplify
> + (vec_cond (cmp @0 @1) @1 @0)
> + (if (VECTOR_INTEGER_TYPE_P (type)
> + && target_supports_op_p (type, MINMAX, optab_vector))
> + (minmax @0 @1))))
> +
> /* c1 ? c2 ? a : b : b --> (c1 & c2) ? a : b */
> (simplify
> (vec_cond @0 (vec_cond:s @1 @2 @3) @3)
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c
> b/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c
> index 3d820a58e93..d15670f3e9e 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c
> @@ -1,7 +1,6 @@
> /* { dg-do compile } */
> /* { dg-options "-O2 -fdump-tree-forwprop3-raw -w -Wno-psabi" } */
>
> -// FIXME: this should further optimize to a MAX_EXPR
> typedef signed char v16i8 __attribute__((vector_size(16)));
> v16i8 f(v16i8 a, v16i8 b)
> {
> @@ -10,4 +9,4 @@ v16i8 f(v16i8 a, v16i8 b)
> }
>
> /* { dg-final { scan-tree-dump-not "bit_(and|ior)_expr" "forwprop3" } } */
> -/* { dg-final { scan-tree-dump-times "vec_cond_expr" 1 "forwprop3" } } */
> +/* { dg-final { scan-tree-dump-times "max_expr" 1 "forwprop3" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr104401.c
> b/gcc/testsuite/gcc.target/i386/pr104401.c
> new file mode 100644
> index 00000000000..8ce7ff88d9e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr104401.c
> @@ -0,0 +1,27 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msse4.1" } */
> +/* { dg-final { scan-assembler-times "pminsd" 2 } } */
> +/* { dg-final { scan-assembler-times "pmaxsd" 2 } } */
> +
> +#include <smmintrin.h>
> +
> +__m128i min32(__m128i value, __m128i input)
> +{
> + return _mm_blendv_epi8(input, value, _mm_cmplt_epi32(value, input));
> +}
> +
> +__m128i max32(__m128i value, __m128i input)
> +{
> + return _mm_blendv_epi8(input, value, _mm_cmpgt_epi32(value, input));
> +}
> +
> +__m128i min32_1(__m128i value, __m128i input)
> +{
> + return _mm_blendv_epi8(input, value, _mm_cmpgt_epi32(input, value));
> +}
> +
> +__m128i max32_1(__m128i value, __m128i input)
> +{
> + return _mm_blendv_epi8(input, value, _mm_cmplt_epi32(input, value));
> +}
> +
> --
> 2.31.1
>