On Wed, 2 Oct 2024, Andrew Pinski wrote: > On Tue, Oct 1, 2024 at 5:04 AM Richard Biener <rguent...@suse.de> wrote: > > > > The following adds SLP support for vectorizing single-lane inductions > > with variable length vectors. > > This introduces a bootstrap failure on aarch64 due to a maybe > uninitialized variable. > > inlined from ‘bool vectorizable_induction(loop_vec_info, > stmt_vec_info, gimple**, slp_tree, stmt_vector_for_cost*)’ at > /home/linaro/src/upstream-gcc/gcc/gcc/tree-vect-loop.cc:10718:33: > /home/linaro/src/upstream-gcc/gcc/gcc/gimple-fold.h:183:25: error: > ‘vec_init’ may be used uninitialized [-Werror=maybe-uninitialized] > 183 | return gimple_convert (&gsi, false, GSI_CONTINUE_LINKING, > | ~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > 184 | UNKNOWN_LOCATION, type, op); > | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ > /home/linaro/src/upstream-gcc/gcc/gcc/tree-vect-loop.cc: In function > ‘bool vectorizable_induction(loop_vec_info, stmt_vec_info, gimple**, > slp_tree, stmt_vector_for_cost*)’: > /home/linaro/src/upstream-gcc/gcc/gcc/tree-vect-loop.cc:10281:17: > note: ‘vec_init’ was declared here > 10281 | tree new_vec, vec_init, vec_step, t; > | ^~~~~~~~ > > > The issue is around line 10718: > if (init_node) > vec_init = vect_get_slp_vect_def (init_node, ivn); > if (!nested_in_vect_loop > && step_mul > && !integer_zerop (step_mul)) > { > gcc_assert (invariant); > vec_def = gimple_convert (&init_stmts, step_vectype, vec_init); > > it is hard to follow the code to see if it is actually uninitialized or not.
It is, I agree it's a bit hard to follow - I was torn between interweaving the code like I did and copying the whole SLP handling for the VLA case ... I'll push the obvious fix. Richard. > Thanks, > Andrew > > > > > Bootstrapped and tested on x86_64-unknown-linux-gnu. > > > > PR tree-optimization/116566 > > * tree-vect-loop.cc (vectorizable_induction): Handle single-lane > > SLP for VLA vectors. > > --- > > gcc/tree-vect-loop.cc | 247 ++++++++++++++++++++++++++++++++---------- > > 1 file changed, 189 insertions(+), 58 deletions(-) > > > > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc > > index a5a44613cb2..f5ecf0bdb80 100644 > > --- a/gcc/tree-vect-loop.cc > > +++ b/gcc/tree-vect-loop.cc > > @@ -10283,7 +10283,6 @@ vectorizable_induction (loop_vec_info loop_vinfo, > > gimple *new_stmt; > > gphi *induction_phi; > > tree induc_def, vec_dest; > > - tree init_expr, step_expr; > > poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); > > unsigned i; > > tree expr; > > @@ -10369,7 +10368,7 @@ vectorizable_induction (loop_vec_info loop_vinfo, > > iv_loop = loop; > > gcc_assert (iv_loop == (gimple_bb (phi))->loop_father); > > > > - if (slp_node && !nunits.is_constant ()) > > + if (slp_node && (!nunits.is_constant () && SLP_TREE_LANES (slp_node) != > > 1)) > > { > > /* The current SLP code creates the step value element-by-element. > > */ > > if (dump_enabled_p ()) > > @@ -10387,7 +10386,7 @@ vectorizable_induction (loop_vec_info loop_vinfo, > > return false; > > } > > > > - step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info); > > + tree step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info); > > gcc_assert (step_expr != NULL_TREE); > > if (INTEGRAL_TYPE_P (TREE_TYPE (step_expr)) > > && !type_has_mode_precision_p (TREE_TYPE (step_expr))) > > @@ -10475,9 +10474,6 @@ vectorizable_induction (loop_vec_info loop_vinfo, > > [i2 + 2*S2, i0 + 3*S0, i1 + 3*S1, i2 + 3*S2]. */ > > if (slp_node) > > { > > - /* Enforced above. */ > > - unsigned int const_nunits = nunits.to_constant (); > > - > > /* The initial values are vectorized, but any lanes > group_size > > need adjustment. */ > > slp_tree init_node > > @@ -10499,11 +10495,12 @@ vectorizable_induction (loop_vec_info loop_vinfo, > > > > /* Now generate the IVs. */ > > unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); > > - gcc_assert ((const_nunits * nvects) % group_size == 0); > > + gcc_assert (multiple_p (nunits * nvects, group_size)); > > unsigned nivs; > > + unsigned HOST_WIDE_INT const_nunits; > > if (nested_in_vect_loop) > > nivs = nvects; > > - else > > + else if (nunits.is_constant (&const_nunits)) > > { > > /* Compute the number of distinct IVs we need. First reduce > > group_size if it is a multiple of const_nunits so we get > > @@ -10514,21 +10511,43 @@ vectorizable_induction (loop_vec_info loop_vinfo, > > nivs = least_common_multiple (group_sizep, > > const_nunits) / const_nunits; > > } > > + else > > + { > > + gcc_assert (SLP_TREE_LANES (slp_node) == 1); > > + nivs = 1; > > + } > > + gimple_seq init_stmts = NULL; > > tree stept = TREE_TYPE (step_vectype); > > tree lupdate_mul = NULL_TREE; > > if (!nested_in_vect_loop) > > { > > - /* The number of iterations covered in one vector iteration. */ > > - unsigned lup_mul = (nvects * const_nunits) / group_size; > > - lupdate_mul > > - = build_vector_from_val (step_vectype, > > - SCALAR_FLOAT_TYPE_P (stept) > > - ? build_real_from_wide (stept, lup_mul, > > - UNSIGNED) > > - : build_int_cstu (stept, lup_mul)); > > + if (nunits.is_constant (&const_nunits)) > > + { > > + /* The number of iterations covered in one vector iteration. > > */ > > + unsigned lup_mul = (nvects * const_nunits) / group_size; > > + lupdate_mul > > + = build_vector_from_val (step_vectype, > > + SCALAR_FLOAT_TYPE_P (stept) > > + ? build_real_from_wide (stept, > > lup_mul, > > + UNSIGNED) > > + : build_int_cstu (stept, lup_mul)); > > + } > > + else > > + { > > + if (SCALAR_FLOAT_TYPE_P (stept)) > > + { > > + tree tem = build_int_cst (integer_type_node, vf); > > + lupdate_mul = gimple_build (&init_stmts, FLOAT_EXPR, > > + stept, tem); > > + } > > + else > > + lupdate_mul = build_int_cst (stept, vf); > > + lupdate_mul = gimple_build_vector_from_val (&init_stmts, > > + step_vectype, > > + lupdate_mul); > > + } > > } > > tree peel_mul = NULL_TREE; > > - gimple_seq init_stmts = NULL; > > if (LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo)) > > { > > if (SCALAR_FLOAT_TYPE_P (stept)) > > @@ -10540,44 +10559,105 @@ vectorizable_induction (loop_vec_info loop_vinfo, > > peel_mul = gimple_build_vector_from_val (&init_stmts, > > step_vectype, peel_mul); > > } > > + tree step_mul = NULL_TREE; > > unsigned ivn; > > auto_vec<tree> vec_steps; > > for (ivn = 0; ivn < nivs; ++ivn) > > { > > - tree_vector_builder step_elts (step_vectype, const_nunits, 1); > > - tree_vector_builder init_elts (vectype, const_nunits, 1); > > - tree_vector_builder mul_elts (step_vectype, const_nunits, 1); > > - for (unsigned eltn = 0; eltn < const_nunits; ++eltn) > > + gimple_seq stmts = NULL; > > + bool invariant = true; > > + if (nunits.is_constant (&const_nunits)) > > { > > - /* The scalar steps of the IVs. */ > > - tree elt = steps[(ivn*const_nunits + eltn) % group_size]; > > - elt = gimple_convert (&init_stmts, TREE_TYPE (step_vectype), > > elt); > > - step_elts.quick_push (elt); > > + tree_vector_builder step_elts (step_vectype, const_nunits, 1); > > + tree_vector_builder init_elts (vectype, const_nunits, 1); > > + tree_vector_builder mul_elts (step_vectype, const_nunits, 1); > > + for (unsigned eltn = 0; eltn < const_nunits; ++eltn) > > + { > > + /* The scalar steps of the IVs. */ > > + tree elt = steps[(ivn*const_nunits + eltn) % group_size]; > > + elt = gimple_convert (&init_stmts, > > + TREE_TYPE (step_vectype), elt); > > + step_elts.quick_push (elt); > > + if (!init_node) > > + { > > + /* The scalar inits of the IVs if not vectorized. */ > > + elt = inits[(ivn*const_nunits + eltn) % group_size]; > > + if (!useless_type_conversion_p (TREE_TYPE (vectype), > > + TREE_TYPE (elt))) > > + elt = gimple_build (&init_stmts, VIEW_CONVERT_EXPR, > > + TREE_TYPE (vectype), elt); > > + init_elts.quick_push (elt); > > + } > > + /* The number of steps to add to the initial values. */ > > + unsigned mul_elt = (ivn*const_nunits + eltn) / group_size; > > + mul_elts.quick_push (SCALAR_FLOAT_TYPE_P (stept) > > + ? build_real_from_wide (stept, > > mul_elt, > > + UNSIGNED) > > + : build_int_cstu (stept, mul_elt)); > > + } > > + vec_step = gimple_build_vector (&init_stmts, &step_elts); > > + step_mul = gimple_build_vector (&init_stmts, &mul_elts); > > if (!init_node) > > + vec_init = gimple_build_vector (&init_stmts, &init_elts); > > + } > > + else > > + { > > + if (init_node) > > + ; > > + else if (INTEGRAL_TYPE_P (TREE_TYPE (steps[0]))) > > + { > > + new_name = gimple_convert (&init_stmts, stept, inits[0]); > > + /* Build the initial value directly as a VEC_SERIES_EXPR. > > */ > > + vec_init = gimple_build (&init_stmts, VEC_SERIES_EXPR, > > + step_vectype, new_name, > > steps[0]); > > + if (!useless_type_conversion_p (vectype, step_vectype)) > > + vec_init = gimple_build (&init_stmts, VIEW_CONVERT_EXPR, > > + vectype, vec_init); > > + } > > + else > > { > > - /* The scalar inits of the IVs if not vectorized. */ > > - elt = inits[(ivn*const_nunits + eltn) % group_size]; > > - if (!useless_type_conversion_p (TREE_TYPE (vectype), > > - TREE_TYPE (elt))) > > - elt = gimple_build (&init_stmts, VIEW_CONVERT_EXPR, > > - TREE_TYPE (vectype), elt); > > - init_elts.quick_push (elt); > > + /* Build: > > + [base, base, base, ...] > > + + (vectype) [0, 1, 2, ...] * [step, step, step, > > ...]. */ > > + gcc_assert (SCALAR_FLOAT_TYPE_P (TREE_TYPE (steps[0]))); > > + gcc_assert (flag_associative_math); > > + tree index = build_index_vector (step_vectype, 0, 1); > > + new_name = gimple_convert (&init_stmts, TREE_TYPE > > (steps[0]), > > + inits[0]); > > + tree base_vec = gimple_build_vector_from_val (&init_stmts, > > + > > step_vectype, > > + new_name); > > + tree step_vec = gimple_build_vector_from_val (&init_stmts, > > + > > step_vectype, > > + steps[0]); > > + vec_init = gimple_build (&init_stmts, FLOAT_EXPR, > > + step_vectype, index); > > + vec_init = gimple_build (&init_stmts, MULT_EXPR, > > + step_vectype, vec_init, > > step_vec); > > + vec_init = gimple_build (&init_stmts, PLUS_EXPR, > > + step_vectype, vec_init, > > base_vec); > > + if (!useless_type_conversion_p (vectype, step_vectype)) > > + vec_init = gimple_build (&init_stmts, VIEW_CONVERT_EXPR, > > + vectype, vec_init); > > } > > - /* The number of steps to add to the initial values. */ > > - unsigned mul_elt = (ivn*const_nunits + eltn) / group_size; > > - mul_elts.quick_push (SCALAR_FLOAT_TYPE_P (stept) > > - ? build_real_from_wide (stept, > > - mul_elt, > > UNSIGNED) > > - : build_int_cstu (stept, mul_elt)); > > + /* iv_loop is nested in the loop to be vectorized. Generate: > > + vec_step = [S, S, S, S] */ > > + t = unshare_expr (steps[0]); > > + gcc_assert (CONSTANT_CLASS_P (t) > > + || TREE_CODE (t) == SSA_NAME); > > + vec_step = gimple_build_vector_from_val (&init_stmts, > > + step_vectype, t); > > } > > - vec_step = gimple_build_vector (&init_stmts, &step_elts); > > vec_steps.safe_push (vec_step); > > - tree step_mul = gimple_build_vector (&init_stmts, &mul_elts); > > if (peel_mul) > > - step_mul = gimple_build (&init_stmts, MINUS_EXPR, step_vectype, > > - step_mul, peel_mul); > > - if (!init_node) > > - vec_init = gimple_build_vector (&init_stmts, &init_elts); > > + { > > + if (!step_mul) > > + step_mul = peel_mul; > > + else > > + step_mul = gimple_build (&init_stmts, > > + MINUS_EXPR, step_vectype, > > + step_mul, peel_mul); > > + } > > > > /* Create the induction-phi that defines the induction-operand. > > */ > > vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, > > @@ -10588,9 +10668,38 @@ vectorizable_induction (loop_vec_info loop_vinfo, > > /* Create the iv update inside the loop */ > > tree up = vec_step; > > if (lupdate_mul) > > - up = gimple_build (&init_stmts, MULT_EXPR, step_vectype, > > - vec_step, lupdate_mul); > > - gimple_seq stmts = NULL; > > + { > > + if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo)) > > + { > > + /* When we're using loop_len produced by SELEC_VL, the > > + non-final iterations are not always processing VF > > + elements. So vectorize induction variable instead of > > + > > + _21 = vect_vec_iv_.6_22 + { VF, ... }; > > + > > + We should generate: > > + > > + _35 = .SELECT_VL (ivtmp_33, VF); > > + vect_cst__22 = [vec_duplicate_expr] _35; > > + _21 = vect_vec_iv_.6_22 + vect_cst__22; */ > > + vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo); > > + tree len = vect_get_loop_len (loop_vinfo, NULL, lens, 1, > > + vectype, 0, 0); > > + if (SCALAR_FLOAT_TYPE_P (stept)) > > + expr = gimple_build (&stmts, FLOAT_EXPR, stept, len); > > + else > > + expr = gimple_convert (&stmts, stept, len); > > + lupdate_mul = gimple_build_vector_from_val (&stmts, > > + step_vectype, > > + expr); > > + up = gimple_build (&stmts, MULT_EXPR, > > + step_vectype, vec_step, lupdate_mul); > > + } > > + else > > + up = gimple_build (&init_stmts, > > + MULT_EXPR, step_vectype, > > + vec_step, lupdate_mul); > > + } > > vec_def = gimple_convert (&stmts, step_vectype, induc_def); > > vec_def = gimple_build (&stmts, > > PLUS_EXPR, step_vectype, vec_def, up); > > @@ -10602,8 +10711,10 @@ vectorizable_induction (loop_vec_info loop_vinfo, > > if (init_node) > > vec_init = vect_get_slp_vect_def (init_node, ivn); > > if (!nested_in_vect_loop > > + && step_mul > > && !integer_zerop (step_mul)) > > { > > + gcc_assert (invariant); > > vec_def = gimple_convert (&init_stmts, step_vectype, > > vec_init); > > up = gimple_build (&init_stmts, MULT_EXPR, step_vectype, > > vec_step, step_mul); > > @@ -10620,8 +10731,11 @@ vectorizable_induction (loop_vec_info loop_vinfo, > > if (!nested_in_vect_loop) > > { > > /* Fill up to the number of vectors we need for the whole group. > > */ > > - nivs = least_common_multiple (group_size, > > - const_nunits) / const_nunits; > > + if (nunits.is_constant (&const_nunits)) > > + nivs = least_common_multiple (group_size, > > + const_nunits) / const_nunits; > > + else > > + nivs = 1; > > vec_steps.reserve (nivs-ivn); > > for (; ivn < nivs; ++ivn) > > { > > @@ -10634,14 +10748,31 @@ vectorizable_induction (loop_vec_info loop_vinfo, > > stmts by adding VF' * stride to the IVs generated above. */ > > if (ivn < nvects) > > { > > - unsigned vfp > > - = least_common_multiple (group_size, const_nunits) / group_size; > > - tree lupdate_mul > > - = build_vector_from_val (step_vectype, > > - SCALAR_FLOAT_TYPE_P (stept) > > - ? build_real_from_wide (stept, > > - vfp, UNSIGNED) > > - : build_int_cstu (stept, vfp)); > > + if (nunits.is_constant (&const_nunits)) > > + { > > + unsigned vfp = (least_common_multiple (group_size, > > const_nunits) > > + / group_size); > > + lupdate_mul > > + = build_vector_from_val (step_vectype, > > + SCALAR_FLOAT_TYPE_P (stept) > > + ? build_real_from_wide (stept, > > + vfp, > > UNSIGNED) > > + : build_int_cstu (stept, vfp)); > > + } > > + else > > + { > > + if (SCALAR_FLOAT_TYPE_P (stept)) > > + { > > + tree tem = build_int_cst (integer_type_node, nunits); > > + lupdate_mul = gimple_build (&init_stmts, FLOAT_EXPR, > > + stept, tem); > > + } > > + else > > + lupdate_mul = build_int_cst (stept, nunits); > > + lupdate_mul = gimple_build_vector_from_val (&init_stmts, > > + step_vectype, > > + lupdate_mul); > > + } > > for (; ivn < nvects; ++ivn) > > { > > gimple *iv > > @@ -10673,7 +10804,7 @@ vectorizable_induction (loop_vec_info loop_vinfo, > > return true; > > } > > > > - init_expr = vect_phi_initial_value (phi); > > + tree init_expr = vect_phi_initial_value (phi); > > > > gimple_seq stmts = NULL; > > if (!nested_in_vect_loop) > > -- > > 2.43.0 > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman; HRB 36809 (AG Nuernberg)