The following adds a function like tree_to_vec_perm_builder but
for building vec_perm_indices directly from all operands of a
VEC_PERM_EXPR. This avoids errors such as shown in PR123175
and allows us to extract information from the permuted inputs
to implement vec_shl/vec_shr recognition in can_vec_perm_const_p,
making that fully cover what vector lowering will assess.
Bootstrapped and tested on x86_64-unknown-linux-gnu and aarch64-linux.
I went for a toplevel function rather than a method to provide
better mitigation against PR123175 and to elide the builder which
in most cases isn't necessary to expose.
OK for trunk?
Thanks,
Richard.
* vec-perm-indices.h (vec_perm_indices::new_vector): New overload.
(vec_perm_indices::input_bitwise_zero_p): New method.
(vec_perm_indices::m_input0_bitwise_zero_p,
vec_perm_indices::m_input1_bitwise_zero_p): New members.
(vec_perm_indices::vec_perm_indices): Adjust.
(tree_to_vec_perm_indices): Declare.
* vec-perm-indices.cc (vec_perm_indices::new_vector): New overload.
(vec_perm_indices::new_expanded_vector): Adjust.
(tree_to_vec_perm_indices): New function.
* optabs-query.cc (can_vec_perm_const_p): Handle permute
patterns mapping to vec_shl/vec_shr.
* tree-vect-generic.cc (lower_vec_perm): Move vec_shl/vec_shr
detection to can_vec_perm_const_p and simplify.
---
gcc/optabs-query.cc | 53 ++++++++++++++++++++++++++++++----
gcc/tree-vect-generic.cc | 62 ++--------------------------------------
gcc/vec-perm-indices.cc | 25 ++++++++++++++++
gcc/vec-perm-indices.h | 21 ++++++++++++--
4 files changed, 94 insertions(+), 67 deletions(-)
diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc
index 58842e40ed6..da0d37cf5df 100644
--- a/gcc/optabs-query.cc
+++ b/gcc/optabs-query.cc
@@ -415,12 +415,7 @@ can_vec_perm_var_p (machine_mode mode)
/* Return true if the target directly supports VEC_PERM_EXPRs on vectors
of mode OP_MODE and result vector of mode MODE using the selector SEL.
ALLOW_VARIABLE_P is true if it is acceptable to force the selector into a
- register and use a variable permute (if the target supports that).
-
- Note that additional permutations representing whole-vector shifts may
- also be handled via the vec_shr or vec_shl optab, but only where the
- second input vector is entirely constant zeroes; this case is not dealt
- with here. */
+ register and use a variable permute (if the target supports that). */
bool
can_vec_perm_const_p (machine_mode mode, machine_mode op_mode,
@@ -465,6 +460,52 @@ can_vec_perm_const_p (machine_mode mode, machine_mode
op_mode,
into integer operations. */
}
+ unsigned elements;
+ if (mode == op_mode
+ && GET_MODE_NUNITS (mode).is_constant (&elements))
+ {
+ if (sel.input_bitwise_zero_p (0)
+ && can_implement_p (vec_shl_optab, mode))
+ {
+ unsigned int first = 0, i;
+ for (i = 0; i < elements; ++i)
+ if (known_eq (poly_uint64 (sel[i]), elements))
+ {
+ if (i == 0 || first)
+ break;
+ first = i;
+ }
+ else if (first
+ ? maybe_ne (poly_uint64 (sel[i]),
+ elements + i - first)
+ : maybe_ge (poly_uint64 (sel[i]), elements))
+ break;
+ if (first && i == elements)
+ return true;
+ }
+ if (sel.input_bitwise_zero_p (1)
+ && maybe_ne (sel[0], 0)
+ && known_lt (sel[0], elements)
+ && can_implement_p (vec_shr_optab, mode))
+ {
+ if (sel.series_p (0, 1, sel[0], 1))
+ return true;
+ unsigned i;
+ for (i = 1; i < elements; ++i)
+ {
+ poly_uint64 actual = sel[i];
+ poly_uint64 expected = i + sel[0];
+ /* Indices into the second vector are all equivalent. */
+ if (maybe_lt (actual, elements)
+ ? maybe_ne (actual, expected)
+ : maybe_lt (expected, elements))
+ break;
+ }
+ if (i == elements)
+ return true;
+ }
+ }
+
return false;
}
diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc
index fddb44bfe86..a8c31974973 100644
--- a/gcc/tree-vect-generic.cc
+++ b/gcc/tree-vect-generic.cc
@@ -1640,76 +1640,20 @@ lower_vec_perm (gimple_stmt_iterator *gsi)
mask = gimple_assign_rhs1 (def_stmt);
}
- vec_perm_builder sel_int;
+ vec_perm_indices indices;
if (TREE_CODE (mask) == VECTOR_CST
- && tree_to_vec_perm_builder (&sel_int, mask))
+ && tree_to_vec_perm_indices (&indices, vec0, vec1, mask))
{
- vec_perm_indices indices (sel_int, 2, in_elements);
machine_mode vmode = TYPE_MODE (vect_type);
tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
machine_mode lhs_mode = TYPE_MODE (lhs_type);
- if (can_vec_perm_const_p (lhs_mode, vmode, indices))
+ if (can_vec_perm_const_p (lhs_mode, vmode, indices, true))
{
gimple_assign_set_rhs3 (stmt, mask);
update_stmt (stmt);
return;
}
- /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
- vector as VEC1 and a right element shift MASK. */
- if (can_implement_p (vec_shr_optab, TYPE_MODE (vect_type))
- && TREE_CODE (vec1) == VECTOR_CST
- && initializer_zerop (vec1)
- && maybe_ne (indices[0], 0)
- && known_lt (poly_uint64 (indices[0]), elements))
- {
- bool ok_p = indices.series_p (0, 1, indices[0], 1);
- if (!ok_p)
- {
- for (i = 1; i < elements; ++i)
- {
- poly_uint64 actual = indices[i];
- poly_uint64 expected = i + indices[0];
- /* Indices into the second vector are all equivalent. */
- if (maybe_lt (actual, elements)
- ? maybe_ne (actual, expected)
- : maybe_lt (expected, elements))
- break;
- }
- ok_p = i == elements;
- }
- if (ok_p)
- {
- gimple_assign_set_rhs3 (stmt, mask);
- update_stmt (stmt);
- return;
- }
- }
- /* And similarly vec_shl pattern. */
- if (can_implement_p (vec_shl_optab, TYPE_MODE (vect_type))
- && TREE_CODE (vec0) == VECTOR_CST
- && initializer_zerop (vec0))
- {
- unsigned int first = 0;
- for (i = 0; i < elements; ++i)
- if (known_eq (poly_uint64 (indices[i]), elements))
- {
- if (i == 0 || first)
- break;
- first = i;
- }
- else if (first
- ? maybe_ne (poly_uint64 (indices[i]),
- elements + i - first)
- : maybe_ge (poly_uint64 (indices[i]), elements))
- break;
- if (first && i == elements)
- {
- gimple_assign_set_rhs3 (stmt, mask);
- update_stmt (stmt);
- return;
- }
- }
}
else if (can_vec_perm_var_p (TYPE_MODE (vect_type)))
return;
diff --git a/gcc/vec-perm-indices.cc b/gcc/vec-perm-indices.cc
index 5b54a84caee..6bb2f39ff91 100644
--- a/gcc/vec-perm-indices.cc
+++ b/gcc/vec-perm-indices.cc
@@ -31,6 +31,7 @@ along with GCC; see the file COPYING3. If not see
#include "selftest.h"
#include "rtx-vector-builder.h"
+
/* Switch to a new permutation vector that selects between NINPUTS vector
inputs that have NELTS_PER_INPUT elements each. Take the elements of the
new permutation vector from ELEMENTS, clamping each one to be in range. */
@@ -38,10 +39,14 @@ along with GCC; see the file COPYING3. If not see
void
vec_perm_indices::new_vector (const vec_perm_builder &elements,
unsigned int ninputs,
+ bool input0_bitwise_zero_p,
+ bool input1_bitwise_zero_p,
poly_uint64 nelts_per_input)
{
m_ninputs = ninputs;
m_nelts_per_input = nelts_per_input;
+ m_input0_bitwise_zero_p = input0_bitwise_zero_p;
+ m_input1_bitwise_zero_p = input1_bitwise_zero_p;
/* If the vector has a constant number of elements, expand the
encoding and clamp each element. E.g. { 0, 2, 4, ... } might
wrap halfway if there is only one vector input, and we want
@@ -87,6 +92,8 @@ vec_perm_indices::new_expanded_vector (const vec_perm_indices
&orig,
unsigned int factor)
{
m_ninputs = orig.m_ninputs;
+ m_input0_bitwise_zero_p = orig.m_input0_bitwise_zero_p;
+ m_input1_bitwise_zero_p = orig.m_input1_bitwise_zero_p;
m_nelts_per_input = orig.m_nelts_per_input * factor;
m_encoding.new_vector (orig.m_encoding.full_nelts () * factor,
orig.m_encoding.npatterns () * factor,
@@ -301,6 +308,24 @@ tree_to_vec_perm_builder (vec_perm_builder *builder, tree
cst)
return true;
}
+/* Try to read the contents of VECTOR_CST PERM_CST as a constant permutation
+ vector permuting OP0 and OP1. Return true and populate INDICES on success,
+ otherwise return false without modifying INDICES. */
+
+bool
+tree_to_vec_perm_indices (vec_perm_indices *indices, tree op0, tree op1,
+ tree perm_cst)
+{
+ vec_perm_builder builder;
+ if (!tree_to_vec_perm_builder (&builder, perm_cst))
+ return false;
+ indices->new_vector (builder, op0 == op1 ? 1 : 2,
+ initializer_zerop (op0), initializer_zerop (op1),
+ TYPE_VECTOR_SUBPARTS (TREE_TYPE (op0)));
+ return true;
+}
+
+
/* Return a VECTOR_CST of type TYPE for the permutation vector in INDICES. */
tree
diff --git a/gcc/vec-perm-indices.h b/gcc/vec-perm-indices.h
index 5e68f7f62ec..b7bf5aa3f05 100644
--- a/gcc/vec-perm-indices.h
+++ b/gcc/vec-perm-indices.h
@@ -55,7 +55,12 @@ public:
vec_perm_indices ();
vec_perm_indices (const vec_perm_builder &, unsigned int, poly_uint64);
- void new_vector (const vec_perm_builder &, unsigned int, poly_uint64);
+ void new_vector (const vec_perm_builder &b, unsigned int ni, poly_uint64 ne)
+ {
+ new_vector (b, ni, false, false, ne);
+ }
+ void new_vector (const vec_perm_builder &, unsigned int, bool, bool,
+ poly_uint64);
void new_expanded_vector (const vec_perm_indices &, unsigned int);
bool new_shrunk_vector (const vec_perm_indices &, unsigned int);
void rotate_inputs (int delta);
@@ -70,6 +75,13 @@ public:
/* Return the number of input vectors being permuted. */
unsigned int ninputs () const { return m_ninputs; }
+ /* Return whether the input N is known bitwise zero. */
+ bool input_bitwise_zero_p (unsigned n) const
+ {
+ return (n == 0 ? m_input0_bitwise_zero_p
+ : (n == 1 ? m_input1_bitwise_zero_p : false));
+ }
+
/* Return the number of elements in each input vector. */
poly_uint64 nelts_per_input () const { return m_nelts_per_input; }
@@ -87,16 +99,21 @@ private:
vec_perm_builder m_encoding;
unsigned int m_ninputs;
+ bool m_input0_bitwise_zero_p;
+ bool m_input1_bitwise_zero_p;
poly_uint64 m_nelts_per_input;
};
bool tree_to_vec_perm_builder (vec_perm_builder *, tree);
+bool tree_to_vec_perm_indices (vec_perm_indices *, tree, tree, tree);
tree vec_perm_indices_to_tree (tree, const vec_perm_indices &);
rtx vec_perm_indices_to_rtx (machine_mode, const vec_perm_indices &);
inline
vec_perm_indices::vec_perm_indices ()
: m_ninputs (0),
+ m_input0_bitwise_zero_p (false),
+ m_input1_bitwise_zero_p (false),
m_nelts_per_input (0)
{
}
@@ -110,7 +127,7 @@ vec_perm_indices::vec_perm_indices (const vec_perm_builder
&elements,
unsigned int ninputs,
poly_uint64 nelts_per_input)
{
- new_vector (elements, ninputs, nelts_per_input);
+ new_vector (elements, ninputs, false, false, nelts_per_input);
}
/* Return the canonical value for permutation vector element ELT,
--
2.51.0