The following adds a function like tree_to_vec_perm_builder but
for building vec_perm_indices directly from all operands of a
VEC_PERM_EXPR.  This avoids errors such as shown in PR123175
and allows us to extract information from the permuted inputs
to implement vec_shl/vec_shr recognition in can_vec_perm_const_p,
making that fully cover what vector lowering will assess.

Bootstrapped and tested on x86_64-unknown-linux-gnu and aarch64-linux.

I went for a toplevel function rather than a method to provide
better mitigation against PR123175 and to elide the builder which
in most cases isn't necessary to expose.

OK for trunk?

Thanks,
Richard.

        * vec-perm-indices.h (vec_perm_indices::new_vector): New overload.
        (vec_perm_indices::input_bitwise_zero_p): New method.
        (vec_perm_indices::m_input0_bitwise_zero_p,
        vec_perm_indices::m_input1_bitwise_zero_p): New members.
        (vec_perm_indices::vec_perm_indices): Adjust.
        (tree_to_vec_perm_indices): Declare.
        * vec-perm-indices.cc (vec_perm_indices::new_vector): New overload.
        (vec_perm_indices::new_expanded_vector): Adjust.
        (tree_to_vec_perm_indices): New function.
        * optabs-query.cc (can_vec_perm_const_p): Handle permute
        patterns mapping to vec_shl/vec_shr.
        * tree-vect-generic.cc (lower_vec_perm): Move vec_shl/vec_shr
        detection to can_vec_perm_const_p and simplify.
---
 gcc/optabs-query.cc      | 53 ++++++++++++++++++++++++++++++----
 gcc/tree-vect-generic.cc | 62 ++--------------------------------------
 gcc/vec-perm-indices.cc  | 25 ++++++++++++++++
 gcc/vec-perm-indices.h   | 21 ++++++++++++--
 4 files changed, 94 insertions(+), 67 deletions(-)

diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc
index 58842e40ed6..da0d37cf5df 100644
--- a/gcc/optabs-query.cc
+++ b/gcc/optabs-query.cc
@@ -415,12 +415,7 @@ can_vec_perm_var_p (machine_mode mode)
 /* Return true if the target directly supports VEC_PERM_EXPRs on vectors
    of mode OP_MODE and result vector of mode MODE using the selector SEL.
    ALLOW_VARIABLE_P is true if it is acceptable to force the selector into a
-   register and use a variable permute (if the target supports that).
-
-   Note that additional permutations representing whole-vector shifts may
-   also be handled via the vec_shr or vec_shl optab, but only where the
-   second input vector is entirely constant zeroes; this case is not dealt
-   with here.  */
+   register and use a variable permute (if the target supports that).  */
 
 bool
 can_vec_perm_const_p (machine_mode mode, machine_mode op_mode,
@@ -465,6 +460,52 @@ can_vec_perm_const_p (machine_mode mode, machine_mode 
op_mode,
         into integer operations.  */
     }
 
+  unsigned elements;
+  if (mode == op_mode
+      && GET_MODE_NUNITS (mode).is_constant (&elements))
+    {
+      if (sel.input_bitwise_zero_p (0)
+         && can_implement_p (vec_shl_optab, mode))
+       {
+         unsigned int first = 0, i;
+         for (i = 0; i < elements; ++i)
+           if (known_eq (poly_uint64 (sel[i]), elements))
+             {
+               if (i == 0 || first)
+                 break;
+               first = i;
+             }
+           else if (first
+                    ? maybe_ne (poly_uint64 (sel[i]),
+                                elements + i - first)
+                    : maybe_ge (poly_uint64 (sel[i]), elements))
+             break;
+         if (first && i == elements)
+           return true;
+       }
+      if (sel.input_bitwise_zero_p (1)
+         && maybe_ne (sel[0], 0)
+         && known_lt (sel[0], elements)
+         && can_implement_p (vec_shr_optab, mode))
+       {
+         if (sel.series_p (0, 1, sel[0], 1))
+           return true;
+         unsigned i;
+         for (i = 1; i < elements; ++i)
+           {
+             poly_uint64 actual = sel[i];
+             poly_uint64 expected = i + sel[0];
+             /* Indices into the second vector are all equivalent.  */
+             if (maybe_lt (actual, elements)
+                 ? maybe_ne (actual, expected)
+                 : maybe_lt (expected, elements))
+               break;
+           }
+         if (i == elements)
+           return true;
+       }
+    }
+
   return false;
 }
 
diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc
index fddb44bfe86..a8c31974973 100644
--- a/gcc/tree-vect-generic.cc
+++ b/gcc/tree-vect-generic.cc
@@ -1640,76 +1640,20 @@ lower_vec_perm (gimple_stmt_iterator *gsi)
        mask = gimple_assign_rhs1 (def_stmt);
     }
 
-  vec_perm_builder sel_int;
 
+  vec_perm_indices indices;
   if (TREE_CODE (mask) == VECTOR_CST
-      && tree_to_vec_perm_builder (&sel_int, mask))
+      && tree_to_vec_perm_indices (&indices, vec0, vec1, mask))
     {
-      vec_perm_indices indices (sel_int, 2, in_elements);
       machine_mode vmode = TYPE_MODE (vect_type);
       tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
       machine_mode lhs_mode = TYPE_MODE (lhs_type);
-      if (can_vec_perm_const_p (lhs_mode, vmode, indices))
+      if (can_vec_perm_const_p (lhs_mode, vmode, indices, true))
        {
          gimple_assign_set_rhs3 (stmt, mask);
          update_stmt (stmt);
          return;
        }
-      /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
-        vector as VEC1 and a right element shift MASK.  */
-      if (can_implement_p (vec_shr_optab, TYPE_MODE (vect_type))
-         && TREE_CODE (vec1) == VECTOR_CST
-         && initializer_zerop (vec1)
-         && maybe_ne (indices[0], 0)
-         && known_lt (poly_uint64 (indices[0]), elements))
-       {
-         bool ok_p = indices.series_p (0, 1, indices[0], 1);
-         if (!ok_p)
-           {
-             for (i = 1; i < elements; ++i)
-               {
-                 poly_uint64 actual = indices[i];
-                 poly_uint64 expected = i + indices[0];
-                 /* Indices into the second vector are all equivalent.  */
-                 if (maybe_lt (actual, elements)
-                     ? maybe_ne (actual, expected)
-                     : maybe_lt (expected, elements))
-                   break;
-               }
-             ok_p = i == elements;
-           }
-         if (ok_p)
-           {
-             gimple_assign_set_rhs3 (stmt, mask);
-             update_stmt (stmt);
-             return;
-           }
-       }
-      /* And similarly vec_shl pattern.  */
-      if (can_implement_p (vec_shl_optab, TYPE_MODE (vect_type))
-         && TREE_CODE (vec0) == VECTOR_CST
-         && initializer_zerop (vec0))
-       {
-         unsigned int first = 0;
-         for (i = 0; i < elements; ++i)
-           if (known_eq (poly_uint64 (indices[i]), elements))
-             {
-               if (i == 0 || first)
-                 break;
-               first = i;
-             }
-           else if (first
-                    ? maybe_ne (poly_uint64 (indices[i]),
-                                             elements + i - first)
-                    : maybe_ge (poly_uint64 (indices[i]), elements))
-             break;
-         if (first && i == elements)
-           {
-             gimple_assign_set_rhs3 (stmt, mask);
-             update_stmt (stmt);
-             return;
-           }
-       }
     }
   else if (can_vec_perm_var_p (TYPE_MODE (vect_type)))
     return;
diff --git a/gcc/vec-perm-indices.cc b/gcc/vec-perm-indices.cc
index 5b54a84caee..6bb2f39ff91 100644
--- a/gcc/vec-perm-indices.cc
+++ b/gcc/vec-perm-indices.cc
@@ -31,6 +31,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "selftest.h"
 #include "rtx-vector-builder.h"
 
+
 /* Switch to a new permutation vector that selects between NINPUTS vector
    inputs that have NELTS_PER_INPUT elements each.  Take the elements of the
    new permutation vector from ELEMENTS, clamping each one to be in range.  */
@@ -38,10 +39,14 @@ along with GCC; see the file COPYING3.  If not see
 void
 vec_perm_indices::new_vector (const vec_perm_builder &elements,
                              unsigned int ninputs,
+                             bool input0_bitwise_zero_p,
+                             bool input1_bitwise_zero_p,
                              poly_uint64 nelts_per_input)
 {
   m_ninputs = ninputs;
   m_nelts_per_input = nelts_per_input;
+  m_input0_bitwise_zero_p = input0_bitwise_zero_p;
+  m_input1_bitwise_zero_p = input1_bitwise_zero_p;
   /* If the vector has a constant number of elements, expand the
      encoding and clamp each element.  E.g. { 0, 2, 4, ... } might
      wrap halfway if there is only one vector input, and we want
@@ -87,6 +92,8 @@ vec_perm_indices::new_expanded_vector (const vec_perm_indices 
&orig,
                                       unsigned int factor)
 {
   m_ninputs = orig.m_ninputs;
+  m_input0_bitwise_zero_p = orig.m_input0_bitwise_zero_p;
+  m_input1_bitwise_zero_p = orig.m_input1_bitwise_zero_p;
   m_nelts_per_input = orig.m_nelts_per_input * factor;
   m_encoding.new_vector (orig.m_encoding.full_nelts () * factor,
                         orig.m_encoding.npatterns () * factor,
@@ -301,6 +308,24 @@ tree_to_vec_perm_builder (vec_perm_builder *builder, tree 
cst)
   return true;
 }
 
+/* Try to read the contents of VECTOR_CST PERM_CST as a constant permutation
+   vector permuting OP0 and OP1.  Return true and populate INDICES on success,
+   otherwise return false without modifying INDICES.  */
+
+bool
+tree_to_vec_perm_indices (vec_perm_indices *indices, tree op0, tree op1,
+                         tree perm_cst)
+{
+  vec_perm_builder builder;
+  if (!tree_to_vec_perm_builder (&builder, perm_cst))
+    return false;
+  indices->new_vector (builder, op0 == op1 ? 1 : 2,
+                      initializer_zerop (op0), initializer_zerop (op1),
+                      TYPE_VECTOR_SUBPARTS (TREE_TYPE (op0)));
+  return true;
+}
+
+
 /* Return a VECTOR_CST of type TYPE for the permutation vector in INDICES.  */
 
 tree
diff --git a/gcc/vec-perm-indices.h b/gcc/vec-perm-indices.h
index 5e68f7f62ec..b7bf5aa3f05 100644
--- a/gcc/vec-perm-indices.h
+++ b/gcc/vec-perm-indices.h
@@ -55,7 +55,12 @@ public:
   vec_perm_indices ();
   vec_perm_indices (const vec_perm_builder &, unsigned int, poly_uint64);
 
-  void new_vector (const vec_perm_builder &, unsigned int, poly_uint64);
+  void new_vector (const vec_perm_builder &b, unsigned int ni, poly_uint64 ne)
+    {
+      new_vector (b, ni, false, false, ne);
+    }
+  void new_vector (const vec_perm_builder &, unsigned int, bool, bool,
+                  poly_uint64);
   void new_expanded_vector (const vec_perm_indices &, unsigned int);
   bool new_shrunk_vector (const vec_perm_indices &, unsigned int);
   void rotate_inputs (int delta);
@@ -70,6 +75,13 @@ public:
   /* Return the number of input vectors being permuted.  */
   unsigned int ninputs () const { return m_ninputs; }
 
+  /* Return whether the input N is known bitwise zero.  */
+  bool input_bitwise_zero_p (unsigned n) const
+    {
+      return (n == 0 ? m_input0_bitwise_zero_p
+             : (n == 1 ? m_input1_bitwise_zero_p : false));
+    }
+
   /* Return the number of elements in each input vector.  */
   poly_uint64 nelts_per_input () const { return m_nelts_per_input; }
 
@@ -87,16 +99,21 @@ private:
 
   vec_perm_builder m_encoding;
   unsigned int m_ninputs;
+  bool m_input0_bitwise_zero_p;
+  bool m_input1_bitwise_zero_p;
   poly_uint64 m_nelts_per_input;
 };
 
 bool tree_to_vec_perm_builder (vec_perm_builder *, tree);
+bool tree_to_vec_perm_indices (vec_perm_indices *, tree, tree, tree);
 tree vec_perm_indices_to_tree (tree, const vec_perm_indices &);
 rtx vec_perm_indices_to_rtx (machine_mode, const vec_perm_indices &);
 
 inline
 vec_perm_indices::vec_perm_indices ()
   : m_ninputs (0),
+    m_input0_bitwise_zero_p (false),
+    m_input1_bitwise_zero_p (false),
     m_nelts_per_input (0)
 {
 }
@@ -110,7 +127,7 @@ vec_perm_indices::vec_perm_indices (const vec_perm_builder 
&elements,
                                    unsigned int ninputs,
                                    poly_uint64 nelts_per_input)
 {
-  new_vector (elements, ninputs, nelts_per_input);
+  new_vector (elements, ninputs, false, false, nelts_per_input);
 }
 
 /* Return the canonical value for permutation vector element ELT,
-- 
2.51.0

Reply via email to