On Tue, Aug 22, 2023 at 10:44 AM Kewen.Lin <[email protected]> wrote:
>
> Hi,
>
> Now we use DR_GROUP_STORE_COUNT to record how many stores
> in a group have been transformed and only do the actual
> transform when encountering the last one. I'm making
> patches to move costing next to the transform code, it's
> awkward to use this DR_GROUP_STORE_COUNT for both costing
> and transforming. This patch is to introduce last_element
> to record the last element to be transformed in the group
> rather than to sum up the store number we have seen, then
> we can only check the given stmt is the last or not. It
> can make it work simply for both costing and transforming.
>
> Bootstrapped and regtested on x86_64-redhat-linux,
> aarch64-linux-gnu and powerpc64{,le}-linux-gnu.
>
> Is it ok for trunk?
This is all (existing) gross, so ... can't we do sth like the following
instead? Going to test this further besides the quick single
testcase I verified.
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 33f62b77710..67de19d9ce5 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -8437,16 +8437,6 @@ vectorizable_store (vec_info *vinfo,
/* FORNOW */
gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
- /* We vectorize all the stmts of the interleaving group when we
- reach the last stmt in the group. */
- if (DR_GROUP_STORE_COUNT (first_stmt_info)
- < DR_GROUP_SIZE (first_stmt_info)
- && !slp)
- {
- *vec_stmt = NULL;
- return true;
- }
-
if (slp)
{
grouped_store = false;
@@ -12487,21 +12477,21 @@ vect_transform_stmt (vec_info *vinfo,
break;
case store_vec_info_type:
- done = vectorizable_store (vinfo, stmt_info,
- gsi, &vec_stmt, slp_node, NULL);
- gcc_assert (done);
- if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
+ if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
+ && !slp_node
+ && DR_GROUP_NEXT_ELEMENT (stmt_info))
+ /* In case of interleaving, the whole chain is vectorized when the
+ last store in the chain is reached. Store stmts before the last
+ one are skipped, and there vec_stmt_info shouldn't be freed
+ meanwhile. */
+ ;
+ else
{
- /* In case of interleaving, the whole chain is vectorized when the
- last store in the chain is reached. Store stmts before the last
- one are skipped, and there vec_stmt_info shouldn't be freed
- meanwhile. */
- stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
- if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
- is_store = true;
+ done = vectorizable_store (vinfo, stmt_info,
+ gsi, &vec_stmt, slp_node, NULL);
+ gcc_assert (done);
+ is_store = true;
}
- else
- is_store = true;
break;
case condition_vec_info_type:
> BR,
> Kewen
> -----
>
> gcc/ChangeLog:
>
> * tree-vect-data-refs.cc (vect_set_group_last_element): New function.
> (vect_analyze_group_access): Call new function
> vect_set_group_last_element.
> * tree-vect-stmts.cc (vectorizable_store): Replace
> DR_GROUP_STORE_COUNT
> uses with DR_GROUP_LAST_ELEMENT.
> (vect_transform_stmt): Likewise.
> * tree-vect-slp.cc (vect_split_slp_store_group): Likewise.
> (vect_build_slp_instance): Likewise.
> * tree-vectorizer.h (DR_GROUP_LAST_ELEMENT): New macro.
> (DR_GROUP_STORE_COUNT): Remove.
> (class _stmt_vec_info::store_count): Remove.
> (class _stmt_vec_info::last_element): New class member.
> (vect_set_group_last_element): New function declaration.
> ---
> gcc/tree-vect-data-refs.cc | 30 ++++++++++++++++++++++++++++++
> gcc/tree-vect-slp.cc | 13 +++++++++----
> gcc/tree-vect-stmts.cc | 9 +++------
> gcc/tree-vectorizer.h | 12 +++++++-----
> 4 files changed, 49 insertions(+), 15 deletions(-)
>
> diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
> index 3e9a284666c..c4a495431d5 100644
> --- a/gcc/tree-vect-data-refs.cc
> +++ b/gcc/tree-vect-data-refs.cc
> @@ -2832,6 +2832,33 @@ vect_analyze_group_access_1 (vec_info *vinfo,
> dr_vec_info *dr_info)
> return true;
> }
>
> +/* Given vectorization information VINFO, set the last element in the
> + group led by FIRST_STMT_INFO. For now, it's only used for loop
> + vectorization and stores, since for loop-vect the grouped stores
> + are only transformed till encounting its last one. */
> +
> +void
> +vect_set_group_last_element (vec_info *vinfo, stmt_vec_info first_stmt_info)
> +{
> + if (first_stmt_info
> + && is_a<loop_vec_info> (vinfo)
> + && DR_IS_WRITE (STMT_VINFO_DATA_REF (first_stmt_info)))
> + {
> + stmt_vec_info stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
> + stmt_vec_info last_stmt_info = first_stmt_info;
> + while (stmt_info)
> + {
> + gimple *stmt = stmt_info->stmt;
> + gimple *last_stmt = last_stmt_info->stmt;
> + gcc_assert (gimple_bb (stmt) == gimple_bb (last_stmt));
> + if (gimple_uid (stmt) > gimple_uid (last_stmt))
> + last_stmt_info = stmt_info;
> + stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
> + }
> + DR_GROUP_LAST_ELEMENT (first_stmt_info) = last_stmt_info;
> + }
> +}
> +
> /* Analyze groups of accesses: check that DR_INFO belongs to a group of
> accesses of legal size, step, etc. Detect gaps, single element
> interleaving, and other special cases. Set grouped access info.
> @@ -2853,6 +2880,9 @@ vect_analyze_group_access (vec_info *vinfo, dr_vec_info
> *dr_info)
> }
> return false;
> }
> +
> + stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (dr_info->stmt);
> + vect_set_group_last_element (vinfo, first_stmt_info);
> return true;
> }
>
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index 89c3216afac..e9b64efe125 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -2827,7 +2827,8 @@ vect_find_first_scalar_stmt_in_slp (slp_tree node)
> Return the first stmt in the second group. */
>
> static stmt_vec_info
> -vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size)
> +vect_split_slp_store_group (vec_info *vinfo, stmt_vec_info first_vinfo,
> + unsigned group1_size)
> {
> gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_vinfo);
> gcc_assert (group1_size > 0);
> @@ -2860,6 +2861,9 @@ vect_split_slp_store_group (stmt_vec_info first_vinfo,
> unsigned group1_size)
> /* DR_GROUP_GAP of the first group now has to skip over the second group
> too. */
> DR_GROUP_GAP (first_vinfo) += group2_size;
>
> + vect_set_group_last_element (vinfo, first_vinfo);
> + vect_set_group_last_element (vinfo, group2);
> +
> if (dump_enabled_p ())
> dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
> group1_size, group2_size);
> @@ -3321,7 +3325,7 @@ vect_build_slp_instance (vec_info *vinfo,
> if (dump_enabled_p ())
> dump_printf_loc (MSG_NOTE, vect_location,
> "Splitting SLP group at stmt %u\n", i);
> - stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
> + stmt_vec_info rest = vect_split_slp_store_group (vinfo,
> stmt_info,
> group1_size);
> bool res = vect_analyze_slp_instance (vinfo, bst_map, stmt_info,
> kind, max_tree_size,
> @@ -3334,7 +3338,8 @@ vect_build_slp_instance (vec_info *vinfo,
> || i - group1_size > 1))
> {
> stmt_vec_info rest2 = rest;
> - rest = vect_split_slp_store_group (rest, i - group1_size);
> + rest = vect_split_slp_store_group (vinfo, rest,
> + i - group1_size);
> if (i - group1_size > 1)
> res |= vect_analyze_slp_instance (vinfo, bst_map, rest2,
> kind, max_tree_size,
> @@ -3361,7 +3366,7 @@ vect_build_slp_instance (vec_info *vinfo,
> dump_printf_loc (MSG_NOTE, vect_location,
> "Splitting SLP group at stmt %u\n", i);
>
> - stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
> + stmt_vec_info rest = vect_split_slp_store_group (vinfo, stmt_info,
> group1_size);
> /* Loop vectorization cannot handle gaps in stores, make sure
> the split group appears as strided. */
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 33f62b77710..1580a396301 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -8429,9 +8429,6 @@ vectorizable_store (vec_info *vinfo,
> else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
> return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt,
> ncopies);
>
> - if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
> - DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
> -
> if (grouped_store)
> {
> /* FORNOW */
> @@ -8439,8 +8436,8 @@ vectorizable_store (vec_info *vinfo,
>
> /* We vectorize all the stmts of the interleaving group when we
> reach the last stmt in the group. */
> - if (DR_GROUP_STORE_COUNT (first_stmt_info)
> - < DR_GROUP_SIZE (first_stmt_info)
> + if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
> + && stmt_info != DR_GROUP_LAST_ELEMENT (first_stmt_info)
> && !slp)
> {
> *vec_stmt = NULL;
> @@ -12497,7 +12494,7 @@ vect_transform_stmt (vec_info *vinfo,
> one are skipped, and there vec_stmt_info shouldn't be freed
> meanwhile. */
> stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
> - if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
> + if (stmt_info == DR_GROUP_LAST_ELEMENT (group_info))
> is_store = true;
> }
> else
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 53a3d78d545..6817e113b56 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -1285,11 +1285,12 @@ public:
> stmt_vec_info first_element;
> /* Pointer to the next element in the group. */
> stmt_vec_info next_element;
> + /* Pointer to the last element in the group, for now it's only used
> + for loop-vect stores since they only get transformed till the
> + last one is being transformed. */
> + stmt_vec_info last_element;
> /* The size of the group. */
> unsigned int size;
> - /* For stores, number of stores from this group seen. We vectorize the last
> - one. */
> - unsigned int store_count;
> /* For loads only, the gap from the previous load. For consecutive loads,
> GAP
> is 1. */
> unsigned int gap;
> @@ -1500,10 +1501,10 @@ struct gather_scatter_info {
> (gcc_checking_assert ((S)->dr_aux.dr), (S)->first_element)
> #define DR_GROUP_NEXT_ELEMENT(S) \
> (gcc_checking_assert ((S)->dr_aux.dr), (S)->next_element)
> +#define DR_GROUP_LAST_ELEMENT(S) \
> + (gcc_checking_assert ((S)->dr_aux.dr), (S)->last_element)
> #define DR_GROUP_SIZE(S) \
> (gcc_checking_assert ((S)->dr_aux.dr), (S)->size)
> -#define DR_GROUP_STORE_COUNT(S) \
> - (gcc_checking_assert ((S)->dr_aux.dr), (S)->store_count)
> #define DR_GROUP_GAP(S) \
> (gcc_checking_assert ((S)->dr_aux.dr), (S)->gap)
>
> @@ -2317,6 +2318,7 @@ extern tree vect_get_new_ssa_name (tree, enum
> vect_var_kind,
> extern tree vect_create_addr_base_for_vector_ref (vec_info *,
> stmt_vec_info, gimple_seq *,
> tree);
> +extern void vect_set_group_last_element (vec_info *, stmt_vec_info);
>
> /* In tree-vect-loop.cc. */
> extern tree neutral_op_for_reduction (tree, code_helper, tree);
> --
> 2.31.1