Richard Biener <[email protected]> writes:
> The following adds a function like tree_to_vec_perm_builder but
> for building vec_perm_indices directly from all operands of a
> VEC_PERM_EXPR.  This avoids errors such as shown in PR123175
> and allows us to extract information from the permuted inputs
> to implement vec_shl/vec_shr recognition in can_vec_perm_const_p,
> making that fully cover what vector lowering will assess.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu and aarch64-linux.
>
> I went for a toplevel function rather than a method to provide
> better mitigation against PR123175 and to elide the builder which
> in most cases isn't necessary to expose.
>
> OK for trunk?

LGTM FWIW.

Very minor, but on the formatting of the inline function definitions:
I thought the convention was not to indent the "{", to match what we do
for non-inline functions.  I realise the codebase isn't consistent though.

Thanks,
Richard


>
> Thanks,
> Richard.
>
>       * vec-perm-indices.h (vec_perm_indices::new_vector): New overload.
>       (vec_perm_indices::input_bitwise_zero_p): New method.
>       (vec_perm_indices::m_input0_bitwise_zero_p,
>       vec_perm_indices::m_input1_bitwise_zero_p): New members.
>       (vec_perm_indices::vec_perm_indices): Adjust.
>       (tree_to_vec_perm_indices): Declare.
>       * vec-perm-indices.cc (vec_perm_indices::new_vector): New overload.
>       (vec_perm_indices::new_expanded_vector): Adjust.
>       (tree_to_vec_perm_indices): New function.
>       * optabs-query.cc (can_vec_perm_const_p): Handle permute
>       patterns mapping to vec_shl/vec_shr.
>       * tree-vect-generic.cc (lower_vec_perm): Move vec_shl/vec_shr
>       detection to can_vec_perm_const_p and simplify.
> ---
>  gcc/optabs-query.cc      | 53 ++++++++++++++++++++++++++++++----
>  gcc/tree-vect-generic.cc | 62 ++--------------------------------------
>  gcc/vec-perm-indices.cc  | 25 ++++++++++++++++
>  gcc/vec-perm-indices.h   | 21 ++++++++++++--
>  4 files changed, 94 insertions(+), 67 deletions(-)
>
> diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc
> index 58842e40ed6..da0d37cf5df 100644
> --- a/gcc/optabs-query.cc
> +++ b/gcc/optabs-query.cc
> @@ -415,12 +415,7 @@ can_vec_perm_var_p (machine_mode mode)
>  /* Return true if the target directly supports VEC_PERM_EXPRs on vectors
>     of mode OP_MODE and result vector of mode MODE using the selector SEL.
>     ALLOW_VARIABLE_P is true if it is acceptable to force the selector into a
> -   register and use a variable permute (if the target supports that).
> -
> -   Note that additional permutations representing whole-vector shifts may
> -   also be handled via the vec_shr or vec_shl optab, but only where the
> -   second input vector is entirely constant zeroes; this case is not dealt
> -   with here.  */
> +   register and use a variable permute (if the target supports that).  */
>  
>  bool
>  can_vec_perm_const_p (machine_mode mode, machine_mode op_mode,
> @@ -465,6 +460,52 @@ can_vec_perm_const_p (machine_mode mode, machine_mode 
> op_mode,
>        into integer operations.  */
>      }
>  
> +  unsigned elements;
> +  if (mode == op_mode
> +      && GET_MODE_NUNITS (mode).is_constant (&elements))
> +    {
> +      if (sel.input_bitwise_zero_p (0)
> +       && can_implement_p (vec_shl_optab, mode))
> +     {
> +       unsigned int first = 0, i;
> +       for (i = 0; i < elements; ++i)
> +         if (known_eq (poly_uint64 (sel[i]), elements))
> +           {
> +             if (i == 0 || first)
> +               break;
> +             first = i;
> +           }
> +         else if (first
> +                  ? maybe_ne (poly_uint64 (sel[i]),
> +                              elements + i - first)
> +                  : maybe_ge (poly_uint64 (sel[i]), elements))
> +           break;
> +       if (first && i == elements)
> +         return true;
> +     }
> +      if (sel.input_bitwise_zero_p (1)
> +       && maybe_ne (sel[0], 0)
> +       && known_lt (sel[0], elements)
> +       && can_implement_p (vec_shr_optab, mode))
> +     {
> +       if (sel.series_p (0, 1, sel[0], 1))
> +         return true;
> +       unsigned i;
> +       for (i = 1; i < elements; ++i)
> +         {
> +           poly_uint64 actual = sel[i];
> +           poly_uint64 expected = i + sel[0];
> +           /* Indices into the second vector are all equivalent.  */
> +           if (maybe_lt (actual, elements)
> +               ? maybe_ne (actual, expected)
> +               : maybe_lt (expected, elements))
> +             break;
> +         }
> +       if (i == elements)
> +         return true;
> +     }
> +    }
> +
>    return false;
>  }
>  
> diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc
> index fddb44bfe86..a8c31974973 100644
> --- a/gcc/tree-vect-generic.cc
> +++ b/gcc/tree-vect-generic.cc
> @@ -1640,76 +1640,20 @@ lower_vec_perm (gimple_stmt_iterator *gsi)
>       mask = gimple_assign_rhs1 (def_stmt);
>      }
>  
> -  vec_perm_builder sel_int;
>  
> +  vec_perm_indices indices;
>    if (TREE_CODE (mask) == VECTOR_CST
> -      && tree_to_vec_perm_builder (&sel_int, mask))
> +      && tree_to_vec_perm_indices (&indices, vec0, vec1, mask))
>      {
> -      vec_perm_indices indices (sel_int, 2, in_elements);
>        machine_mode vmode = TYPE_MODE (vect_type);
>        tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
>        machine_mode lhs_mode = TYPE_MODE (lhs_type);
> -      if (can_vec_perm_const_p (lhs_mode, vmode, indices))
> +      if (can_vec_perm_const_p (lhs_mode, vmode, indices, true))
>       {
>         gimple_assign_set_rhs3 (stmt, mask);
>         update_stmt (stmt);
>         return;
>       }
> -      /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
> -      vector as VEC1 and a right element shift MASK.  */
> -      if (can_implement_p (vec_shr_optab, TYPE_MODE (vect_type))
> -       && TREE_CODE (vec1) == VECTOR_CST
> -       && initializer_zerop (vec1)
> -       && maybe_ne (indices[0], 0)
> -       && known_lt (poly_uint64 (indices[0]), elements))
> -     {
> -       bool ok_p = indices.series_p (0, 1, indices[0], 1);
> -       if (!ok_p)
> -         {
> -           for (i = 1; i < elements; ++i)
> -             {
> -               poly_uint64 actual = indices[i];
> -               poly_uint64 expected = i + indices[0];
> -               /* Indices into the second vector are all equivalent.  */
> -               if (maybe_lt (actual, elements)
> -                   ? maybe_ne (actual, expected)
> -                   : maybe_lt (expected, elements))
> -                 break;
> -             }
> -           ok_p = i == elements;
> -         }
> -       if (ok_p)
> -         {
> -           gimple_assign_set_rhs3 (stmt, mask);
> -           update_stmt (stmt);
> -           return;
> -         }
> -     }
> -      /* And similarly vec_shl pattern.  */
> -      if (can_implement_p (vec_shl_optab, TYPE_MODE (vect_type))
> -       && TREE_CODE (vec0) == VECTOR_CST
> -       && initializer_zerop (vec0))
> -     {
> -       unsigned int first = 0;
> -       for (i = 0; i < elements; ++i)
> -         if (known_eq (poly_uint64 (indices[i]), elements))
> -           {
> -             if (i == 0 || first)
> -               break;
> -             first = i;
> -           }
> -         else if (first
> -                  ? maybe_ne (poly_uint64 (indices[i]),
> -                                           elements + i - first)
> -                  : maybe_ge (poly_uint64 (indices[i]), elements))
> -           break;
> -       if (first && i == elements)
> -         {
> -           gimple_assign_set_rhs3 (stmt, mask);
> -           update_stmt (stmt);
> -           return;
> -         }
> -     }
>      }
>    else if (can_vec_perm_var_p (TYPE_MODE (vect_type)))
>      return;
> diff --git a/gcc/vec-perm-indices.cc b/gcc/vec-perm-indices.cc
> index 5b54a84caee..6bb2f39ff91 100644
> --- a/gcc/vec-perm-indices.cc
> +++ b/gcc/vec-perm-indices.cc
> @@ -31,6 +31,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "selftest.h"
>  #include "rtx-vector-builder.h"
>  
> +
>  /* Switch to a new permutation vector that selects between NINPUTS vector
>     inputs that have NELTS_PER_INPUT elements each.  Take the elements of the
>     new permutation vector from ELEMENTS, clamping each one to be in range.  
> */
> @@ -38,10 +39,14 @@ along with GCC; see the file COPYING3.  If not see
>  void
>  vec_perm_indices::new_vector (const vec_perm_builder &elements,
>                             unsigned int ninputs,
> +                           bool input0_bitwise_zero_p,
> +                           bool input1_bitwise_zero_p,
>                             poly_uint64 nelts_per_input)
>  {
>    m_ninputs = ninputs;
>    m_nelts_per_input = nelts_per_input;
> +  m_input0_bitwise_zero_p = input0_bitwise_zero_p;
> +  m_input1_bitwise_zero_p = input1_bitwise_zero_p;
>    /* If the vector has a constant number of elements, expand the
>       encoding and clamp each element.  E.g. { 0, 2, 4, ... } might
>       wrap halfway if there is only one vector input, and we want
> @@ -87,6 +92,8 @@ vec_perm_indices::new_expanded_vector (const 
> vec_perm_indices &orig,
>                                      unsigned int factor)
>  {
>    m_ninputs = orig.m_ninputs;
> +  m_input0_bitwise_zero_p = orig.m_input0_bitwise_zero_p;
> +  m_input1_bitwise_zero_p = orig.m_input1_bitwise_zero_p;
>    m_nelts_per_input = orig.m_nelts_per_input * factor;
>    m_encoding.new_vector (orig.m_encoding.full_nelts () * factor,
>                        orig.m_encoding.npatterns () * factor,
> @@ -301,6 +308,24 @@ tree_to_vec_perm_builder (vec_perm_builder *builder, 
> tree cst)
>    return true;
>  }
>  
> +/* Try to read the contents of VECTOR_CST PERM_CST as a constant permutation
> +   vector permuting OP0 and OP1.  Return true and populate INDICES on 
> success,
> +   otherwise return false without modifying INDICES.  */
> +
> +bool
> +tree_to_vec_perm_indices (vec_perm_indices *indices, tree op0, tree op1,
> +                       tree perm_cst)
> +{
> +  vec_perm_builder builder;
> +  if (!tree_to_vec_perm_builder (&builder, perm_cst))
> +    return false;
> +  indices->new_vector (builder, op0 == op1 ? 1 : 2,
> +                    initializer_zerop (op0), initializer_zerop (op1),
> +                    TYPE_VECTOR_SUBPARTS (TREE_TYPE (op0)));
> +  return true;
> +}
> +
> +
>  /* Return a VECTOR_CST of type TYPE for the permutation vector in INDICES.  
> */
>  
>  tree
> diff --git a/gcc/vec-perm-indices.h b/gcc/vec-perm-indices.h
> index 5e68f7f62ec..b7bf5aa3f05 100644
> --- a/gcc/vec-perm-indices.h
> +++ b/gcc/vec-perm-indices.h
> @@ -55,7 +55,12 @@ public:
>    vec_perm_indices ();
>    vec_perm_indices (const vec_perm_builder &, unsigned int, poly_uint64);
>  
> -  void new_vector (const vec_perm_builder &, unsigned int, poly_uint64);
> +  void new_vector (const vec_perm_builder &b, unsigned int ni, poly_uint64 
> ne)
> +    {
> +      new_vector (b, ni, false, false, ne);
> +    }
> +  void new_vector (const vec_perm_builder &, unsigned int, bool, bool,
> +                poly_uint64);
>    void new_expanded_vector (const vec_perm_indices &, unsigned int);
>    bool new_shrunk_vector (const vec_perm_indices &, unsigned int);
>    void rotate_inputs (int delta);
> @@ -70,6 +75,13 @@ public:
>    /* Return the number of input vectors being permuted.  */
>    unsigned int ninputs () const { return m_ninputs; }
>  
> +  /* Return whether the input N is known bitwise zero.  */
> +  bool input_bitwise_zero_p (unsigned n) const
> +    {
> +      return (n == 0 ? m_input0_bitwise_zero_p
> +           : (n == 1 ? m_input1_bitwise_zero_p : false));
> +    }
> +
>    /* Return the number of elements in each input vector.  */
>    poly_uint64 nelts_per_input () const { return m_nelts_per_input; }
>  
> @@ -87,16 +99,21 @@ private:
>  
>    vec_perm_builder m_encoding;
>    unsigned int m_ninputs;
> +  bool m_input0_bitwise_zero_p;
> +  bool m_input1_bitwise_zero_p;
>    poly_uint64 m_nelts_per_input;
>  };
>  
>  bool tree_to_vec_perm_builder (vec_perm_builder *, tree);
> +bool tree_to_vec_perm_indices (vec_perm_indices *, tree, tree, tree);
>  tree vec_perm_indices_to_tree (tree, const vec_perm_indices &);
>  rtx vec_perm_indices_to_rtx (machine_mode, const vec_perm_indices &);
>  
>  inline
>  vec_perm_indices::vec_perm_indices ()
>    : m_ninputs (0),
> +    m_input0_bitwise_zero_p (false),
> +    m_input1_bitwise_zero_p (false),
>      m_nelts_per_input (0)
>  {
>  }
> @@ -110,7 +127,7 @@ vec_perm_indices::vec_perm_indices (const 
> vec_perm_builder &elements,
>                                   unsigned int ninputs,
>                                   poly_uint64 nelts_per_input)
>  {
> -  new_vector (elements, ninputs, nelts_per_input);
> +  new_vector (elements, ninputs, false, false, nelts_per_input);
>  }
>  
>  /* Return the canonical value for permutation vector element ELT,

Reply via email to