On Thu, Oct 30, 2025 at 12:03 AM Andrew Pinski
<[email protected]> wrote:
>
> It was noticed if we have `.VEC_SHL_INSERT ({ 0, ... }, 0)` it was not being
> simplified to just `{ 0, ... }`. This was generated from the autovectorizer
> (maybe even on accident, see PR tree-optmization/116081).
>
> This adds a few SVE testcases to see if this is optimized since the
> auto-vectorizer or intrinsics are the only two ways of getting this
> produced.
>
> Changes since:
> * v1: Move the constant case over to fold-const-call.cc.
>       Simplify match pattern to use handle vec_duplicate.
>
> Build and tested for aarch64-linux-gnu with no regressions.

OK.

Richard.

>         PR target/116075
>
> gcc/ChangeLog:
>
>         * fold-const-call.cc (fold_const_vec_shl_insert): New function.
>         (fold_const_call): Call fold_const_vec_shl_insert for 
> CFN_VEC_SHL_INSERT.
>         * match.pd (`VEC_SHL_INSERT (dup (A), A)`): New pattern.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/aarch64/sve/dup-insr-1.c: New test.
>         * gcc.target/aarch64/sve/dup-insr-2.c: New test.
>
> Signed-off-by: Andrew Pinski <[email protected]>
> ---
>  gcc/fold-const-call.cc                        | 22 ++++++++++++++++
>  gcc/match.pd                                  |  5 ++++
>  .../gcc.target/aarch64/sve/dup-insr-1.c       | 26 +++++++++++++++++++
>  .../gcc.target/aarch64/sve/dup-insr-2.c       | 26 +++++++++++++++++++
>  4 files changed, 79 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/dup-insr-1.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/dup-insr-2.c
>
> diff --git a/gcc/fold-const-call.cc b/gcc/fold-const-call.cc
> index 439bf8044f5..89c1c28b60d 100644
> --- a/gcc/fold-const-call.cc
> +++ b/gcc/fold-const-call.cc
> @@ -1440,6 +1440,25 @@ fold_const_fold_left (tree type, tree arg0, tree arg1, 
> tree_code code)
>    return arg0;
>  }
>
> +/* Fold a call to IFN_VEC_SHL_INSERT (ARG0, ARG1), returning a value
> +   of type TYPE.  */
> +
> +static tree
> +fold_const_vec_shl_insert (tree, tree arg0, tree arg1)
> +{
> +  if (TREE_CODE (arg0) != VECTOR_CST)
> +    return NULL_TREE;
> +
> +  /* vec_shl_insert ( dup(CST), CST) -> dup (CST). */
> +  if (tree elem = uniform_vector_p (arg0))
> +    {
> +      if (operand_equal_p (elem, arg1))
> +       return arg0;
> +    }
> +
> +  return NULL_TREE;
> +}
> +
>  /* Try to evaluate:
>
>        *RESULT = FN (*ARG0, *ARG1)
> @@ -1843,6 +1862,9 @@ fold_const_call (combined_fn fn, tree type, tree arg0, 
> tree arg1)
>      case CFN_FOLD_LEFT_PLUS:
>        return fold_const_fold_left (type, arg0, arg1, PLUS_EXPR);
>
> +    case CFN_VEC_SHL_INSERT:
> +      return fold_const_vec_shl_insert (type, arg0, arg1);
> +
>      case CFN_UBSAN_CHECK_ADD:
>      case CFN_ADD_OVERFLOW:
>        subcode = PLUS_EXPR;
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 00493d6ad99..caa32545081 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -12021,3 +12021,8 @@ and,
>        && direct_internal_fn_supported_p (IFN_AVG_CEIL, type, 
> OPTIMIZE_FOR_BOTH))
>        (IFN_AVG_CEIL @0 @2)))
>  #endif
> +
> +/* vec shift left insert (dup (A), A) -> dup(A) */
> +(simplify
> + (IFN_VEC_SHL_INSERT (vec_duplicate@1 @0) @0)
> +  @1)
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-1.c
> new file mode 100644
> index 00000000000..41dcbba45cf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-1.c
> @@ -0,0 +1,26 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -fdump-tree-optimized" } */
> +/* PR target/116075 */
> +
> +#include <arm_sve.h>
> +
> +svint8_t f(void)
> +{
> +  svint8_t tt;
> +  tt = svdup_s8 (0);
> +  tt = svinsr (tt, 0);
> +  return tt;
> +}
> +
> +svint8_t f1(int8_t t)
> +{
> +  svint8_t tt;
> +  tt = svdup_s8 (t);
> +  tt = svinsr (tt, t);
> +  return tt;
> +}
> +
> +/* The above 2 functions should have removed the VEC_SHL_INSERT. */
> +
> +/* { dg-final { scan-tree-dump-not ".VEC_SHL_INSERT " "optimized" } } */
> +
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-2.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-2.c
> new file mode 100644
> index 00000000000..8eafe974624
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/dup-insr-2.c
> @@ -0,0 +1,26 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -fdump-tree-optimized" } */
> +/* PR target/116075 */
> +
> +#include <arm_sve.h>
> +
> +svint8_t f(int8_t t)
> +{
> +  svint8_t tt;
> +  tt = svdup_s8 (0);
> +  tt = svinsr (tt, t);
> +  return tt;
> +}
> +
> +svint8_t f1(int8_t t)
> +{
> +  svint8_t tt;
> +  tt = svdup_s8 (t);
> +  tt = svinsr (tt, 0);
> +  return tt;
> +}
> +
> +/* The above 2 functions should not have removed the VEC_SHL_INSERT. */
> +
> +/* { dg-final { scan-tree-dump-times ".VEC_SHL_INSERT " 2 "optimized" } } */
> +
> --
> 2.43.0
>

Reply via email to