On Thu, Jul 8, 2021 at 2:42 PM Richard Sandiford via Gcc-patches
<[email protected]> wrote:
>
> This patch constructs an array_slice of the scalar statements that
> produce live-out reduction results in the original unvectorised loop.
> There are three cases:
>
> - SLP reduction chains: the final SLP stmt is live-out
> - full SLP reductions: all SLP stmts are live-out
> - non-SLP reductions: the single scalar stmt is live-out
>
> This is a slight simplification on its own, mostly because it maans
> “group_size” has a consistent meaning throughout the function.
> The main justification though is that it helps with later patches.
OK
> gcc/
> * tree-vect-loop.c (vect_create_epilog_for_reduction): Truncate
> scalar_results to group_size elements after reducing down from
> N*group_size elements. Construct an array_slice of the live-out
> stmts and assert that there is one stmt per scalar result.
> ---
> gcc/tree-vect-loop.c | 61 +++++++++++++++-----------------------------
> 1 file changed, 21 insertions(+), 40 deletions(-)
>
> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> index 7c3e3352b43..8390ac80ca0 100644
> --- a/gcc/tree-vect-loop.c
> +++ b/gcc/tree-vect-loop.c
> @@ -5010,7 +5010,12 @@ vect_create_epilog_for_reduction (loop_vec_info
> loop_vinfo,
> auto_vec<tree> scalar_results;
> unsigned int group_size = 1, k;
> auto_vec<gimple *> phis;
> - bool slp_reduc = false;
> + /* SLP reduction without reduction chain, e.g.,
> + # a1 = phi <a2, a0>
> + # b1 = phi <b2, b0>
> + a2 = operation (a1)
> + b2 = operation (b1) */
> + bool slp_reduc = (slp_node && !REDUC_GROUP_FIRST_ELEMENT (stmt_info));
> bool direct_slp_reduc;
> tree new_phi_result;
> tree induction_index = NULL_TREE;
> @@ -5050,6 +5055,16 @@ vect_create_epilog_for_reduction (loop_vec_info
> loop_vinfo,
> adjustment_def = STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info);
> }
>
> + stmt_vec_info single_live_out_stmt[] = { stmt_info };
> + array_slice<const stmt_vec_info> live_out_stmts = single_live_out_stmt;
> + if (slp_reduc)
> + /* All statements produce live-out values. */
> + live_out_stmts = SLP_TREE_SCALAR_STMTS (slp_node);
> + else if (slp_node)
> + /* The last statement in the reduction chain produces the live-out
> + value. */
> + single_live_out_stmt[0] = SLP_TREE_SCALAR_STMTS (slp_node)[group_size -
> 1];
> +
> unsigned vec_num;
> int ncopies;
> if (slp_node)
> @@ -5248,13 +5263,6 @@ vect_create_epilog_for_reduction (loop_vec_info
> loop_vinfo,
> new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
> bitsize = TYPE_SIZE (scalar_type);
>
> - /* SLP reduction without reduction chain, e.g.,
> - # a1 = phi <a2, a0>
> - # b1 = phi <b2, b0>
> - a2 = operation (a1)
> - b2 = operation (b1) */
> - slp_reduc = (slp_node && !REDUC_GROUP_FIRST_ELEMENT (stmt_info));
> -
> /* True if we should implement SLP_REDUC using native reduction operations
> instead of scalar operations. */
> direct_slp_reduc = (reduc_fn != IFN_LAST
> @@ -5877,6 +5885,7 @@ vect_create_epilog_for_reduction (loop_vec_info
> loop_vinfo,
> first_res, res);
> scalar_results[j % group_size] = new_res;
> }
> + scalar_results.truncate (group_size);
> for (k = 0; k < group_size; k++)
> scalar_results[k] = gimple_convert (&stmts, scalar_type,
> scalar_results[k]);
> @@ -5969,39 +5978,11 @@ vect_create_epilog_for_reduction (loop_vec_info
> loop_vinfo,
> use <s_out4>
> use <s_out4> */
>
> -
> - /* In SLP reduction chain we reduce vector results into one vector if
> - necessary, hence we set here REDUC_GROUP_SIZE to 1. SCALAR_DEST is the
> - LHS of the last stmt in the reduction chain, since we are looking for
> - the loop exit phi node. */
> - if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
> - {
> - stmt_vec_info dest_stmt_info
> - = vect_orig_stmt (SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1]);
> - scalar_dest = gimple_assign_lhs (dest_stmt_info->stmt);
> - group_size = 1;
> - }
> -
> - /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
> - case that REDUC_GROUP_SIZE is greater than vectorization factor).
> - Therefore, we need to match SCALAR_RESULTS with corresponding
> statements.
> - The first (REDUC_GROUP_SIZE / number of new vector stmts) scalar results
> - correspond to the first vector stmt, etc.
> - (RATIO is equal to (REDUC_GROUP_SIZE / number of new vector stmts)). */
> - if (group_size > new_phis.length ())
> - gcc_assert (!(group_size % new_phis.length ()));
> -
> - for (k = 0; k < group_size; k++)
> + gcc_assert (live_out_stmts.size () == scalar_results.length ());
> + for (k = 0; k < live_out_stmts.size (); k++)
> {
> - if (slp_reduc)
> - {
> - stmt_vec_info scalar_stmt_info = SLP_TREE_SCALAR_STMTS
> (slp_node)[k];
> -
> - orig_stmt_info = STMT_VINFO_RELATED_STMT (scalar_stmt_info);
> - /* SLP statements can't participate in patterns. */
> - gcc_assert (!orig_stmt_info);
> - scalar_dest = gimple_assign_lhs (scalar_stmt_info->stmt);
> - }
> + stmt_vec_info scalar_stmt_info = vect_orig_stmt (live_out_stmts[k]);
> + scalar_dest = gimple_assign_lhs (scalar_stmt_info->stmt);
>
> phis.create (3);
> /* Find the loop-closed-use at the loop exit of the original scalar