Prathamesh Kulkarni <prathamesh.kulka...@linaro.org> writes:
> gcc/ChangeLog:
>       * tree-ssa-forwprop.cc (is_combined_permutation_identity): Try to
>       simplify two successive VEC_PERM_EXPRs with single operand and same
>       mask, where mask chooses elements in reverse order.
>
> gcc/testesuite/ChangeLog:
>       * gcc.target/aarch64/sve/acle/general/rev-1.c: New test.
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c
> new file mode 100644
> index 00000000000..e57ee67d716
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/rev-1.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -fdump-tree-optimized" } */
> +
> +#include <arm_sve.h>
> +
> +svint32_t f(svint32_t v)
> +{
> +  return svrev_s32 (svrev_s32 (v));
> +}
> +
> +/* { dg-final { scan-tree-dump "return v_1\\(D\\)" "optimized" } } */
> +/* { dg-final { scan-tree-dump-not "VEC_PERM_EXPR" "optimized" } } */
> diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
> index 9b567440ba4..61df7efe82c 100644
> --- a/gcc/tree-ssa-forwprop.cc
> +++ b/gcc/tree-ssa-forwprop.cc
> @@ -2541,6 +2541,27 @@ is_combined_permutation_identity (tree mask1, tree 
> mask2)
>  
>    gcc_checking_assert (TREE_CODE (mask1) == VECTOR_CST
>                      && TREE_CODE (mask2) == VECTOR_CST);
> +
> +  /* For VLA masks, check for the following pattern:
> +     v1 = VEC_PERM_EXPR (v0, v0, mask1)
> +     v2 = VEC_PERM_EXPR (v1, v1, mask2)

Maybe blank out the second operands using "...":

     v1 = VEC_PERM_EXPR (v0, ..., mask1)
     v2 = VEC_PERM_EXPR (v1, ..., mask2)

to make it clear that they don't matter.

OK with that change, thanks.

Richard

> +     -->
> +     v2 = v0
> +     if mask1 == mask2 == {nelts - 1, nelts - 2, ...}.  */
> +
> +  if (operand_equal_p (mask1, mask2, 0)
> +      && !VECTOR_CST_NELTS (mask1).is_constant ())
> +    {
> +      vec_perm_builder builder;
> +      if (tree_to_vec_perm_builder (&builder, mask1))
> +     {
> +       poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask1));
> +       vec_perm_indices sel (builder, 1, nelts);
> +       if (sel.series_p (0, 1, nelts - 1, -1))
> +         return 1;
> +     }
> +    }
> +
>    mask = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (mask1), mask1, mask1, 
> mask2);
>    if (mask == NULL_TREE || TREE_CODE (mask) != VECTOR_CST)
>      return 0;

Reply via email to