On Wed, 2 Oct 2024, Andrew Pinski wrote:
> On Tue, Oct 1, 2024 at 5:04 AM Richard Biener <[email protected]> wrote:
> >
> > The following adds SLP support for vectorizing single-lane inductions
> > with variable length vectors.
>
> This introduces a bootstrap failure on aarch64 due to a maybe
> uninitialized variable.
>
> inlined from ‘bool vectorizable_induction(loop_vec_info,
> stmt_vec_info, gimple**, slp_tree, stmt_vector_for_cost*)’ at
> /home/linaro/src/upstream-gcc/gcc/gcc/tree-vect-loop.cc:10718:33:
> /home/linaro/src/upstream-gcc/gcc/gcc/gimple-fold.h:183:25: error:
> ‘vec_init’ may be used uninitialized [-Werror=maybe-uninitialized]
> 183 | return gimple_convert (&gsi, false, GSI_CONTINUE_LINKING,
> | ~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> 184 | UNKNOWN_LOCATION, type, op);
> | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
> /home/linaro/src/upstream-gcc/gcc/gcc/tree-vect-loop.cc: In function
> ‘bool vectorizable_induction(loop_vec_info, stmt_vec_info, gimple**,
> slp_tree, stmt_vector_for_cost*)’:
> /home/linaro/src/upstream-gcc/gcc/gcc/tree-vect-loop.cc:10281:17:
> note: ‘vec_init’ was declared here
> 10281 | tree new_vec, vec_init, vec_step, t;
> | ^~~~~~~~
>
>
> The issue is around line 10718:
> if (init_node)
> vec_init = vect_get_slp_vect_def (init_node, ivn);
> if (!nested_in_vect_loop
> && step_mul
> && !integer_zerop (step_mul))
> {
> gcc_assert (invariant);
> vec_def = gimple_convert (&init_stmts, step_vectype, vec_init);
>
> it is hard to follow the code to see if it is actually uninitialized or not.
It is, I agree it's a bit hard to follow - I was torn between interweaving
the code like I did and copying the whole SLP handling for the VLA case
...
I'll push the obvious fix.
Richard.
> Thanks,
> Andrew
>
> >
> > Bootstrapped and tested on x86_64-unknown-linux-gnu.
> >
> > PR tree-optimization/116566
> > * tree-vect-loop.cc (vectorizable_induction): Handle single-lane
> > SLP for VLA vectors.
> > ---
> > gcc/tree-vect-loop.cc | 247 ++++++++++++++++++++++++++++++++----------
> > 1 file changed, 189 insertions(+), 58 deletions(-)
> >
> > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> > index a5a44613cb2..f5ecf0bdb80 100644
> > --- a/gcc/tree-vect-loop.cc
> > +++ b/gcc/tree-vect-loop.cc
> > @@ -10283,7 +10283,6 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > gimple *new_stmt;
> > gphi *induction_phi;
> > tree induc_def, vec_dest;
> > - tree init_expr, step_expr;
> > poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
> > unsigned i;
> > tree expr;
> > @@ -10369,7 +10368,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > iv_loop = loop;
> > gcc_assert (iv_loop == (gimple_bb (phi))->loop_father);
> >
> > - if (slp_node && !nunits.is_constant ())
> > + if (slp_node && (!nunits.is_constant () && SLP_TREE_LANES (slp_node) !=
> > 1))
> > {
> > /* The current SLP code creates the step value element-by-element.
> > */
> > if (dump_enabled_p ())
> > @@ -10387,7 +10386,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > return false;
> > }
> >
> > - step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
> > + tree step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
> > gcc_assert (step_expr != NULL_TREE);
> > if (INTEGRAL_TYPE_P (TREE_TYPE (step_expr))
> > && !type_has_mode_precision_p (TREE_TYPE (step_expr)))
> > @@ -10475,9 +10474,6 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > [i2 + 2*S2, i0 + 3*S0, i1 + 3*S1, i2 + 3*S2]. */
> > if (slp_node)
> > {
> > - /* Enforced above. */
> > - unsigned int const_nunits = nunits.to_constant ();
> > -
> > /* The initial values are vectorized, but any lanes > group_size
> > need adjustment. */
> > slp_tree init_node
> > @@ -10499,11 +10495,12 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> >
> > /* Now generate the IVs. */
> > unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
> > - gcc_assert ((const_nunits * nvects) % group_size == 0);
> > + gcc_assert (multiple_p (nunits * nvects, group_size));
> > unsigned nivs;
> > + unsigned HOST_WIDE_INT const_nunits;
> > if (nested_in_vect_loop)
> > nivs = nvects;
> > - else
> > + else if (nunits.is_constant (&const_nunits))
> > {
> > /* Compute the number of distinct IVs we need. First reduce
> > group_size if it is a multiple of const_nunits so we get
> > @@ -10514,21 +10511,43 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > nivs = least_common_multiple (group_sizep,
> > const_nunits) / const_nunits;
> > }
> > + else
> > + {
> > + gcc_assert (SLP_TREE_LANES (slp_node) == 1);
> > + nivs = 1;
> > + }
> > + gimple_seq init_stmts = NULL;
> > tree stept = TREE_TYPE (step_vectype);
> > tree lupdate_mul = NULL_TREE;
> > if (!nested_in_vect_loop)
> > {
> > - /* The number of iterations covered in one vector iteration. */
> > - unsigned lup_mul = (nvects * const_nunits) / group_size;
> > - lupdate_mul
> > - = build_vector_from_val (step_vectype,
> > - SCALAR_FLOAT_TYPE_P (stept)
> > - ? build_real_from_wide (stept, lup_mul,
> > - UNSIGNED)
> > - : build_int_cstu (stept, lup_mul));
> > + if (nunits.is_constant (&const_nunits))
> > + {
> > + /* The number of iterations covered in one vector iteration.
> > */
> > + unsigned lup_mul = (nvects * const_nunits) / group_size;
> > + lupdate_mul
> > + = build_vector_from_val (step_vectype,
> > + SCALAR_FLOAT_TYPE_P (stept)
> > + ? build_real_from_wide (stept,
> > lup_mul,
> > + UNSIGNED)
> > + : build_int_cstu (stept, lup_mul));
> > + }
> > + else
> > + {
> > + if (SCALAR_FLOAT_TYPE_P (stept))
> > + {
> > + tree tem = build_int_cst (integer_type_node, vf);
> > + lupdate_mul = gimple_build (&init_stmts, FLOAT_EXPR,
> > + stept, tem);
> > + }
> > + else
> > + lupdate_mul = build_int_cst (stept, vf);
> > + lupdate_mul = gimple_build_vector_from_val (&init_stmts,
> > + step_vectype,
> > + lupdate_mul);
> > + }
> > }
> > tree peel_mul = NULL_TREE;
> > - gimple_seq init_stmts = NULL;
> > if (LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo))
> > {
> > if (SCALAR_FLOAT_TYPE_P (stept))
> > @@ -10540,44 +10559,105 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > peel_mul = gimple_build_vector_from_val (&init_stmts,
> > step_vectype, peel_mul);
> > }
> > + tree step_mul = NULL_TREE;
> > unsigned ivn;
> > auto_vec<tree> vec_steps;
> > for (ivn = 0; ivn < nivs; ++ivn)
> > {
> > - tree_vector_builder step_elts (step_vectype, const_nunits, 1);
> > - tree_vector_builder init_elts (vectype, const_nunits, 1);
> > - tree_vector_builder mul_elts (step_vectype, const_nunits, 1);
> > - for (unsigned eltn = 0; eltn < const_nunits; ++eltn)
> > + gimple_seq stmts = NULL;
> > + bool invariant = true;
> > + if (nunits.is_constant (&const_nunits))
> > {
> > - /* The scalar steps of the IVs. */
> > - tree elt = steps[(ivn*const_nunits + eltn) % group_size];
> > - elt = gimple_convert (&init_stmts, TREE_TYPE (step_vectype),
> > elt);
> > - step_elts.quick_push (elt);
> > + tree_vector_builder step_elts (step_vectype, const_nunits, 1);
> > + tree_vector_builder init_elts (vectype, const_nunits, 1);
> > + tree_vector_builder mul_elts (step_vectype, const_nunits, 1);
> > + for (unsigned eltn = 0; eltn < const_nunits; ++eltn)
> > + {
> > + /* The scalar steps of the IVs. */
> > + tree elt = steps[(ivn*const_nunits + eltn) % group_size];
> > + elt = gimple_convert (&init_stmts,
> > + TREE_TYPE (step_vectype), elt);
> > + step_elts.quick_push (elt);
> > + if (!init_node)
> > + {
> > + /* The scalar inits of the IVs if not vectorized. */
> > + elt = inits[(ivn*const_nunits + eltn) % group_size];
> > + if (!useless_type_conversion_p (TREE_TYPE (vectype),
> > + TREE_TYPE (elt)))
> > + elt = gimple_build (&init_stmts, VIEW_CONVERT_EXPR,
> > + TREE_TYPE (vectype), elt);
> > + init_elts.quick_push (elt);
> > + }
> > + /* The number of steps to add to the initial values. */
> > + unsigned mul_elt = (ivn*const_nunits + eltn) / group_size;
> > + mul_elts.quick_push (SCALAR_FLOAT_TYPE_P (stept)
> > + ? build_real_from_wide (stept,
> > mul_elt,
> > + UNSIGNED)
> > + : build_int_cstu (stept, mul_elt));
> > + }
> > + vec_step = gimple_build_vector (&init_stmts, &step_elts);
> > + step_mul = gimple_build_vector (&init_stmts, &mul_elts);
> > if (!init_node)
> > + vec_init = gimple_build_vector (&init_stmts, &init_elts);
> > + }
> > + else
> > + {
> > + if (init_node)
> > + ;
> > + else if (INTEGRAL_TYPE_P (TREE_TYPE (steps[0])))
> > + {
> > + new_name = gimple_convert (&init_stmts, stept, inits[0]);
> > + /* Build the initial value directly as a VEC_SERIES_EXPR.
> > */
> > + vec_init = gimple_build (&init_stmts, VEC_SERIES_EXPR,
> > + step_vectype, new_name,
> > steps[0]);
> > + if (!useless_type_conversion_p (vectype, step_vectype))
> > + vec_init = gimple_build (&init_stmts, VIEW_CONVERT_EXPR,
> > + vectype, vec_init);
> > + }
> > + else
> > {
> > - /* The scalar inits of the IVs if not vectorized. */
> > - elt = inits[(ivn*const_nunits + eltn) % group_size];
> > - if (!useless_type_conversion_p (TREE_TYPE (vectype),
> > - TREE_TYPE (elt)))
> > - elt = gimple_build (&init_stmts, VIEW_CONVERT_EXPR,
> > - TREE_TYPE (vectype), elt);
> > - init_elts.quick_push (elt);
> > + /* Build:
> > + [base, base, base, ...]
> > + + (vectype) [0, 1, 2, ...] * [step, step, step,
> > ...]. */
> > + gcc_assert (SCALAR_FLOAT_TYPE_P (TREE_TYPE (steps[0])));
> > + gcc_assert (flag_associative_math);
> > + tree index = build_index_vector (step_vectype, 0, 1);
> > + new_name = gimple_convert (&init_stmts, TREE_TYPE
> > (steps[0]),
> > + inits[0]);
> > + tree base_vec = gimple_build_vector_from_val (&init_stmts,
> > +
> > step_vectype,
> > + new_name);
> > + tree step_vec = gimple_build_vector_from_val (&init_stmts,
> > +
> > step_vectype,
> > + steps[0]);
> > + vec_init = gimple_build (&init_stmts, FLOAT_EXPR,
> > + step_vectype, index);
> > + vec_init = gimple_build (&init_stmts, MULT_EXPR,
> > + step_vectype, vec_init,
> > step_vec);
> > + vec_init = gimple_build (&init_stmts, PLUS_EXPR,
> > + step_vectype, vec_init,
> > base_vec);
> > + if (!useless_type_conversion_p (vectype, step_vectype))
> > + vec_init = gimple_build (&init_stmts, VIEW_CONVERT_EXPR,
> > + vectype, vec_init);
> > }
> > - /* The number of steps to add to the initial values. */
> > - unsigned mul_elt = (ivn*const_nunits + eltn) / group_size;
> > - mul_elts.quick_push (SCALAR_FLOAT_TYPE_P (stept)
> > - ? build_real_from_wide (stept,
> > - mul_elt,
> > UNSIGNED)
> > - : build_int_cstu (stept, mul_elt));
> > + /* iv_loop is nested in the loop to be vectorized. Generate:
> > + vec_step = [S, S, S, S] */
> > + t = unshare_expr (steps[0]);
> > + gcc_assert (CONSTANT_CLASS_P (t)
> > + || TREE_CODE (t) == SSA_NAME);
> > + vec_step = gimple_build_vector_from_val (&init_stmts,
> > + step_vectype, t);
> > }
> > - vec_step = gimple_build_vector (&init_stmts, &step_elts);
> > vec_steps.safe_push (vec_step);
> > - tree step_mul = gimple_build_vector (&init_stmts, &mul_elts);
> > if (peel_mul)
> > - step_mul = gimple_build (&init_stmts, MINUS_EXPR, step_vectype,
> > - step_mul, peel_mul);
> > - if (!init_node)
> > - vec_init = gimple_build_vector (&init_stmts, &init_elts);
> > + {
> > + if (!step_mul)
> > + step_mul = peel_mul;
> > + else
> > + step_mul = gimple_build (&init_stmts,
> > + MINUS_EXPR, step_vectype,
> > + step_mul, peel_mul);
> > + }
> >
> > /* Create the induction-phi that defines the induction-operand.
> > */
> > vec_dest = vect_get_new_vect_var (vectype, vect_simple_var,
> > @@ -10588,9 +10668,38 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > /* Create the iv update inside the loop */
> > tree up = vec_step;
> > if (lupdate_mul)
> > - up = gimple_build (&init_stmts, MULT_EXPR, step_vectype,
> > - vec_step, lupdate_mul);
> > - gimple_seq stmts = NULL;
> > + {
> > + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> > + {
> > + /* When we're using loop_len produced by SELEC_VL, the
> > + non-final iterations are not always processing VF
> > + elements. So vectorize induction variable instead of
> > +
> > + _21 = vect_vec_iv_.6_22 + { VF, ... };
> > +
> > + We should generate:
> > +
> > + _35 = .SELECT_VL (ivtmp_33, VF);
> > + vect_cst__22 = [vec_duplicate_expr] _35;
> > + _21 = vect_vec_iv_.6_22 + vect_cst__22; */
> > + vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
> > + tree len = vect_get_loop_len (loop_vinfo, NULL, lens, 1,
> > + vectype, 0, 0);
> > + if (SCALAR_FLOAT_TYPE_P (stept))
> > + expr = gimple_build (&stmts, FLOAT_EXPR, stept, len);
> > + else
> > + expr = gimple_convert (&stmts, stept, len);
> > + lupdate_mul = gimple_build_vector_from_val (&stmts,
> > + step_vectype,
> > + expr);
> > + up = gimple_build (&stmts, MULT_EXPR,
> > + step_vectype, vec_step, lupdate_mul);
> > + }
> > + else
> > + up = gimple_build (&init_stmts,
> > + MULT_EXPR, step_vectype,
> > + vec_step, lupdate_mul);
> > + }
> > vec_def = gimple_convert (&stmts, step_vectype, induc_def);
> > vec_def = gimple_build (&stmts,
> > PLUS_EXPR, step_vectype, vec_def, up);
> > @@ -10602,8 +10711,10 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > if (init_node)
> > vec_init = vect_get_slp_vect_def (init_node, ivn);
> > if (!nested_in_vect_loop
> > + && step_mul
> > && !integer_zerop (step_mul))
> > {
> > + gcc_assert (invariant);
> > vec_def = gimple_convert (&init_stmts, step_vectype,
> > vec_init);
> > up = gimple_build (&init_stmts, MULT_EXPR, step_vectype,
> > vec_step, step_mul);
> > @@ -10620,8 +10731,11 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > if (!nested_in_vect_loop)
> > {
> > /* Fill up to the number of vectors we need for the whole group.
> > */
> > - nivs = least_common_multiple (group_size,
> > - const_nunits) / const_nunits;
> > + if (nunits.is_constant (&const_nunits))
> > + nivs = least_common_multiple (group_size,
> > + const_nunits) / const_nunits;
> > + else
> > + nivs = 1;
> > vec_steps.reserve (nivs-ivn);
> > for (; ivn < nivs; ++ivn)
> > {
> > @@ -10634,14 +10748,31 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > stmts by adding VF' * stride to the IVs generated above. */
> > if (ivn < nvects)
> > {
> > - unsigned vfp
> > - = least_common_multiple (group_size, const_nunits) / group_size;
> > - tree lupdate_mul
> > - = build_vector_from_val (step_vectype,
> > - SCALAR_FLOAT_TYPE_P (stept)
> > - ? build_real_from_wide (stept,
> > - vfp, UNSIGNED)
> > - : build_int_cstu (stept, vfp));
> > + if (nunits.is_constant (&const_nunits))
> > + {
> > + unsigned vfp = (least_common_multiple (group_size,
> > const_nunits)
> > + / group_size);
> > + lupdate_mul
> > + = build_vector_from_val (step_vectype,
> > + SCALAR_FLOAT_TYPE_P (stept)
> > + ? build_real_from_wide (stept,
> > + vfp,
> > UNSIGNED)
> > + : build_int_cstu (stept, vfp));
> > + }
> > + else
> > + {
> > + if (SCALAR_FLOAT_TYPE_P (stept))
> > + {
> > + tree tem = build_int_cst (integer_type_node, nunits);
> > + lupdate_mul = gimple_build (&init_stmts, FLOAT_EXPR,
> > + stept, tem);
> > + }
> > + else
> > + lupdate_mul = build_int_cst (stept, nunits);
> > + lupdate_mul = gimple_build_vector_from_val (&init_stmts,
> > + step_vectype,
> > + lupdate_mul);
> > + }
> > for (; ivn < nvects; ++ivn)
> > {
> > gimple *iv
> > @@ -10673,7 +10804,7 @@ vectorizable_induction (loop_vec_info loop_vinfo,
> > return true;
> > }
> >
> > - init_expr = vect_phi_initial_value (phi);
> > + tree init_expr = vect_phi_initial_value (phi);
> >
> > gimple_seq stmts = NULL;
> > if (!nested_in_vect_loop)
> > --
> > 2.43.0
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)