Richard Biener <[email protected]> writes:
> The following adds a function like tree_to_vec_perm_builder but
> for building vec_perm_indices directly from all operands of a
> VEC_PERM_EXPR. This avoids errors such as shown in PR123175
> and allows us to extract information from the permuted inputs
> to implement vec_shl/vec_shr recognition in can_vec_perm_const_p,
> making that fully cover what vector lowering will assess.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu and aarch64-linux.
>
> I went for a toplevel function rather than a method to provide
> better mitigation against PR123175 and to elide the builder which
> in most cases isn't necessary to expose.
>
> OK for trunk?
LGTM FWIW.
Very minor, but on the formatting of the inline function definitions:
I thought the convention was not to indent the "{", to match what we do
for non-inline functions. I realise the codebase isn't consistent though.
Thanks,
Richard
>
> Thanks,
> Richard.
>
> * vec-perm-indices.h (vec_perm_indices::new_vector): New overload.
> (vec_perm_indices::input_bitwise_zero_p): New method.
> (vec_perm_indices::m_input0_bitwise_zero_p,
> vec_perm_indices::m_input1_bitwise_zero_p): New members.
> (vec_perm_indices::vec_perm_indices): Adjust.
> (tree_to_vec_perm_indices): Declare.
> * vec-perm-indices.cc (vec_perm_indices::new_vector): New overload.
> (vec_perm_indices::new_expanded_vector): Adjust.
> (tree_to_vec_perm_indices): New function.
> * optabs-query.cc (can_vec_perm_const_p): Handle permute
> patterns mapping to vec_shl/vec_shr.
> * tree-vect-generic.cc (lower_vec_perm): Move vec_shl/vec_shr
> detection to can_vec_perm_const_p and simplify.
> ---
> gcc/optabs-query.cc | 53 ++++++++++++++++++++++++++++++----
> gcc/tree-vect-generic.cc | 62 ++--------------------------------------
> gcc/vec-perm-indices.cc | 25 ++++++++++++++++
> gcc/vec-perm-indices.h | 21 ++++++++++++--
> 4 files changed, 94 insertions(+), 67 deletions(-)
>
> diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc
> index 58842e40ed6..da0d37cf5df 100644
> --- a/gcc/optabs-query.cc
> +++ b/gcc/optabs-query.cc
> @@ -415,12 +415,7 @@ can_vec_perm_var_p (machine_mode mode)
> /* Return true if the target directly supports VEC_PERM_EXPRs on vectors
> of mode OP_MODE and result vector of mode MODE using the selector SEL.
> ALLOW_VARIABLE_P is true if it is acceptable to force the selector into a
> - register and use a variable permute (if the target supports that).
> -
> - Note that additional permutations representing whole-vector shifts may
> - also be handled via the vec_shr or vec_shl optab, but only where the
> - second input vector is entirely constant zeroes; this case is not dealt
> - with here. */
> + register and use a variable permute (if the target supports that). */
>
> bool
> can_vec_perm_const_p (machine_mode mode, machine_mode op_mode,
> @@ -465,6 +460,52 @@ can_vec_perm_const_p (machine_mode mode, machine_mode
> op_mode,
> into integer operations. */
> }
>
> + unsigned elements;
> + if (mode == op_mode
> + && GET_MODE_NUNITS (mode).is_constant (&elements))
> + {
> + if (sel.input_bitwise_zero_p (0)
> + && can_implement_p (vec_shl_optab, mode))
> + {
> + unsigned int first = 0, i;
> + for (i = 0; i < elements; ++i)
> + if (known_eq (poly_uint64 (sel[i]), elements))
> + {
> + if (i == 0 || first)
> + break;
> + first = i;
> + }
> + else if (first
> + ? maybe_ne (poly_uint64 (sel[i]),
> + elements + i - first)
> + : maybe_ge (poly_uint64 (sel[i]), elements))
> + break;
> + if (first && i == elements)
> + return true;
> + }
> + if (sel.input_bitwise_zero_p (1)
> + && maybe_ne (sel[0], 0)
> + && known_lt (sel[0], elements)
> + && can_implement_p (vec_shr_optab, mode))
> + {
> + if (sel.series_p (0, 1, sel[0], 1))
> + return true;
> + unsigned i;
> + for (i = 1; i < elements; ++i)
> + {
> + poly_uint64 actual = sel[i];
> + poly_uint64 expected = i + sel[0];
> + /* Indices into the second vector are all equivalent. */
> + if (maybe_lt (actual, elements)
> + ? maybe_ne (actual, expected)
> + : maybe_lt (expected, elements))
> + break;
> + }
> + if (i == elements)
> + return true;
> + }
> + }
> +
> return false;
> }
>
> diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc
> index fddb44bfe86..a8c31974973 100644
> --- a/gcc/tree-vect-generic.cc
> +++ b/gcc/tree-vect-generic.cc
> @@ -1640,76 +1640,20 @@ lower_vec_perm (gimple_stmt_iterator *gsi)
> mask = gimple_assign_rhs1 (def_stmt);
> }
>
> - vec_perm_builder sel_int;
>
> + vec_perm_indices indices;
> if (TREE_CODE (mask) == VECTOR_CST
> - && tree_to_vec_perm_builder (&sel_int, mask))
> + && tree_to_vec_perm_indices (&indices, vec0, vec1, mask))
> {
> - vec_perm_indices indices (sel_int, 2, in_elements);
> machine_mode vmode = TYPE_MODE (vect_type);
> tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
> machine_mode lhs_mode = TYPE_MODE (lhs_type);
> - if (can_vec_perm_const_p (lhs_mode, vmode, indices))
> + if (can_vec_perm_const_p (lhs_mode, vmode, indices, true))
> {
> gimple_assign_set_rhs3 (stmt, mask);
> update_stmt (stmt);
> return;
> }
> - /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
> - vector as VEC1 and a right element shift MASK. */
> - if (can_implement_p (vec_shr_optab, TYPE_MODE (vect_type))
> - && TREE_CODE (vec1) == VECTOR_CST
> - && initializer_zerop (vec1)
> - && maybe_ne (indices[0], 0)
> - && known_lt (poly_uint64 (indices[0]), elements))
> - {
> - bool ok_p = indices.series_p (0, 1, indices[0], 1);
> - if (!ok_p)
> - {
> - for (i = 1; i < elements; ++i)
> - {
> - poly_uint64 actual = indices[i];
> - poly_uint64 expected = i + indices[0];
> - /* Indices into the second vector are all equivalent. */
> - if (maybe_lt (actual, elements)
> - ? maybe_ne (actual, expected)
> - : maybe_lt (expected, elements))
> - break;
> - }
> - ok_p = i == elements;
> - }
> - if (ok_p)
> - {
> - gimple_assign_set_rhs3 (stmt, mask);
> - update_stmt (stmt);
> - return;
> - }
> - }
> - /* And similarly vec_shl pattern. */
> - if (can_implement_p (vec_shl_optab, TYPE_MODE (vect_type))
> - && TREE_CODE (vec0) == VECTOR_CST
> - && initializer_zerop (vec0))
> - {
> - unsigned int first = 0;
> - for (i = 0; i < elements; ++i)
> - if (known_eq (poly_uint64 (indices[i]), elements))
> - {
> - if (i == 0 || first)
> - break;
> - first = i;
> - }
> - else if (first
> - ? maybe_ne (poly_uint64 (indices[i]),
> - elements + i - first)
> - : maybe_ge (poly_uint64 (indices[i]), elements))
> - break;
> - if (first && i == elements)
> - {
> - gimple_assign_set_rhs3 (stmt, mask);
> - update_stmt (stmt);
> - return;
> - }
> - }
> }
> else if (can_vec_perm_var_p (TYPE_MODE (vect_type)))
> return;
> diff --git a/gcc/vec-perm-indices.cc b/gcc/vec-perm-indices.cc
> index 5b54a84caee..6bb2f39ff91 100644
> --- a/gcc/vec-perm-indices.cc
> +++ b/gcc/vec-perm-indices.cc
> @@ -31,6 +31,7 @@ along with GCC; see the file COPYING3. If not see
> #include "selftest.h"
> #include "rtx-vector-builder.h"
>
> +
> /* Switch to a new permutation vector that selects between NINPUTS vector
> inputs that have NELTS_PER_INPUT elements each. Take the elements of the
> new permutation vector from ELEMENTS, clamping each one to be in range.
> */
> @@ -38,10 +39,14 @@ along with GCC; see the file COPYING3. If not see
> void
> vec_perm_indices::new_vector (const vec_perm_builder &elements,
> unsigned int ninputs,
> + bool input0_bitwise_zero_p,
> + bool input1_bitwise_zero_p,
> poly_uint64 nelts_per_input)
> {
> m_ninputs = ninputs;
> m_nelts_per_input = nelts_per_input;
> + m_input0_bitwise_zero_p = input0_bitwise_zero_p;
> + m_input1_bitwise_zero_p = input1_bitwise_zero_p;
> /* If the vector has a constant number of elements, expand the
> encoding and clamp each element. E.g. { 0, 2, 4, ... } might
> wrap halfway if there is only one vector input, and we want
> @@ -87,6 +92,8 @@ vec_perm_indices::new_expanded_vector (const
> vec_perm_indices &orig,
> unsigned int factor)
> {
> m_ninputs = orig.m_ninputs;
> + m_input0_bitwise_zero_p = orig.m_input0_bitwise_zero_p;
> + m_input1_bitwise_zero_p = orig.m_input1_bitwise_zero_p;
> m_nelts_per_input = orig.m_nelts_per_input * factor;
> m_encoding.new_vector (orig.m_encoding.full_nelts () * factor,
> orig.m_encoding.npatterns () * factor,
> @@ -301,6 +308,24 @@ tree_to_vec_perm_builder (vec_perm_builder *builder,
> tree cst)
> return true;
> }
>
> +/* Try to read the contents of VECTOR_CST PERM_CST as a constant permutation
> + vector permuting OP0 and OP1. Return true and populate INDICES on
> success,
> + otherwise return false without modifying INDICES. */
> +
> +bool
> +tree_to_vec_perm_indices (vec_perm_indices *indices, tree op0, tree op1,
> + tree perm_cst)
> +{
> + vec_perm_builder builder;
> + if (!tree_to_vec_perm_builder (&builder, perm_cst))
> + return false;
> + indices->new_vector (builder, op0 == op1 ? 1 : 2,
> + initializer_zerop (op0), initializer_zerop (op1),
> + TYPE_VECTOR_SUBPARTS (TREE_TYPE (op0)));
> + return true;
> +}
> +
> +
> /* Return a VECTOR_CST of type TYPE for the permutation vector in INDICES.
> */
>
> tree
> diff --git a/gcc/vec-perm-indices.h b/gcc/vec-perm-indices.h
> index 5e68f7f62ec..b7bf5aa3f05 100644
> --- a/gcc/vec-perm-indices.h
> +++ b/gcc/vec-perm-indices.h
> @@ -55,7 +55,12 @@ public:
> vec_perm_indices ();
> vec_perm_indices (const vec_perm_builder &, unsigned int, poly_uint64);
>
> - void new_vector (const vec_perm_builder &, unsigned int, poly_uint64);
> + void new_vector (const vec_perm_builder &b, unsigned int ni, poly_uint64
> ne)
> + {
> + new_vector (b, ni, false, false, ne);
> + }
> + void new_vector (const vec_perm_builder &, unsigned int, bool, bool,
> + poly_uint64);
> void new_expanded_vector (const vec_perm_indices &, unsigned int);
> bool new_shrunk_vector (const vec_perm_indices &, unsigned int);
> void rotate_inputs (int delta);
> @@ -70,6 +75,13 @@ public:
> /* Return the number of input vectors being permuted. */
> unsigned int ninputs () const { return m_ninputs; }
>
> + /* Return whether the input N is known bitwise zero. */
> + bool input_bitwise_zero_p (unsigned n) const
> + {
> + return (n == 0 ? m_input0_bitwise_zero_p
> + : (n == 1 ? m_input1_bitwise_zero_p : false));
> + }
> +
> /* Return the number of elements in each input vector. */
> poly_uint64 nelts_per_input () const { return m_nelts_per_input; }
>
> @@ -87,16 +99,21 @@ private:
>
> vec_perm_builder m_encoding;
> unsigned int m_ninputs;
> + bool m_input0_bitwise_zero_p;
> + bool m_input1_bitwise_zero_p;
> poly_uint64 m_nelts_per_input;
> };
>
> bool tree_to_vec_perm_builder (vec_perm_builder *, tree);
> +bool tree_to_vec_perm_indices (vec_perm_indices *, tree, tree, tree);
> tree vec_perm_indices_to_tree (tree, const vec_perm_indices &);
> rtx vec_perm_indices_to_rtx (machine_mode, const vec_perm_indices &);
>
> inline
> vec_perm_indices::vec_perm_indices ()
> : m_ninputs (0),
> + m_input0_bitwise_zero_p (false),
> + m_input1_bitwise_zero_p (false),
> m_nelts_per_input (0)
> {
> }
> @@ -110,7 +127,7 @@ vec_perm_indices::vec_perm_indices (const
> vec_perm_builder &elements,
> unsigned int ninputs,
> poly_uint64 nelts_per_input)
> {
> - new_vector (elements, ninputs, nelts_per_input);
> + new_vector (elements, ninputs, false, false, nelts_per_input);
> }
>
> /* Return the canonical value for permutation vector element ELT,