Hi,
Updated to v3 according to 2/4's param change.
BR,
Kewen
-----------
gcc/ChangeLog
2020-MM-DD Kewen Lin <[email protected]>
* tree-ssa-loop-ivopts.c (struct iv_group): New field reg_offset_p.
(struct iv_cand): New field reg_offset_p.
(struct ivopts_data): New field consider_reg_offset_for_unroll_p.
(dump_groups): Dump group with reg_offset_p.
(record_group): Initialize reg_offset_p.
(mark_reg_offset_groups): New function.
(find_interesting_uses): Call mark_reg_offset_groups.
(add_candidate_1): Update reg_offset_p if derived from reg_offset_p
group.
(set_group_iv_cost): Scale up group cost with estimate_unroll_factor if
consider_reg_offset_for_unroll_p.
(determine_iv_cost): Increase step cost with estimate_unroll_factor if
consider_reg_offset_for_unroll_p.
(tree_ssa_iv_optimize_loop): Call estimate_unroll_factor, update
consider_reg_offset_for_unroll_p.
on 2020/2/25 下午5:48, Kewen.Lin wrote:
> Hi,
>
> As the proposed hook changes, updated this with main changes:
> 1) Check with addr_offset_valid_p instead.
> 2) Check the 1st and the last use for the whole address group.
> 3) Scale up group costs accordingly.
>
> Bootstrapped/regtested on powerpc64le-linux-gnu (LE).
>
> BR,
> Kewen
> -----------
>
> gcc/ChangeLog
>
> 2020-02-25 Kewen Lin <[email protected]>
>
> * tree-ssa-loop-ivopts.c (struct iv_group): New field reg_offset_p.
> (struct iv_cand): New field reg_offset_p.
> (struct ivopts_data): New field consider_reg_offset_for_unroll_p.
> (dump_groups): Dump group with reg_offset_p.
> (record_group): Initialize reg_offset_p.
> (mark_reg_offset_groups): New function.
> (find_interesting_uses): Call mark_reg_offset_groups.
> (add_candidate_1): Update reg_offset_p if derived from reg_offset_p
> group.
> (set_group_iv_cost): Scale up group cost with estimate_unroll_factor if
> consider_reg_offset_for_unroll_p.
> (determine_iv_cost): Increase step cost with estimate_unroll_factor if
> consider_reg_offset_for_unroll_p.
> (tree_ssa_iv_optimize_loop): Call estimate_unroll_factor, update
> consider_reg_offset_for_unroll_p.
>
diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
index 1d2697ae1ba..1b7e4621f37 100644
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -432,6 +432,8 @@ struct iv_group
struct iv_cand *selected;
/* To indicate this is a doloop use group. */
bool doloop_p;
+ /* To indicate this group is reg_offset valid. */
+ bool reg_offset_p;
/* Uses in the group. */
vec<struct iv_use *> vuses;
};
@@ -473,6 +475,7 @@ struct iv_cand
struct iv *orig_iv; /* The original iv if this cand is added from biv with
smaller type. */
bool doloop_p; /* Whether this is a doloop candidate. */
+ bool reg_offset_p; /* Derived from one reg_offset valid group. */
};
/* Hashtable entry for common candidate derived from iv uses. */
@@ -653,6 +656,10 @@ struct ivopts_data
/* Whether the loop has doloop comparison use. */
bool doloop_use_p;
+
+ /* Whether need to consider register offset addressing mode for the loop with
+ upcoming unrolling by estimated unroll factor. */
+ bool consider_reg_offset_for_unroll_p;
};
/* An assignment of iv candidates to uses. */
@@ -840,6 +847,11 @@ dump_groups (FILE *file, struct ivopts_data *data)
gcc_assert (group->type == USE_COMPARE);
fprintf (file, " Type:\tCOMPARE\n");
}
+ if (group->reg_offset_p)
+ {
+ gcc_assert (address_p (group->type));
+ fprintf (file, " reg_offset_p: true\n");
+ }
for (j = 0; j < group->vuses.length (); j++)
dump_use (file, group->vuses[j]);
}
@@ -1582,6 +1594,7 @@ record_group (struct ivopts_data *data, enum use_type
type)
group->related_cands = BITMAP_ALLOC (NULL);
group->vuses.create (1);
group->doloop_p = false;
+ group->reg_offset_p = false;
data->vgroups.safe_push (group);
return group;
@@ -2731,6 +2744,60 @@ split_address_groups (struct ivopts_data *data)
}
}
+/* Go through all address type groups, check and mark reg_offset addressing
mode
+ valid groups. */
+
+static void
+mark_reg_offset_groups (struct ivopts_data *data)
+{
+ class loop *loop = data->current_loop;
+ gcc_assert (data->current_loop->estimated_unroll > 1);
+ bool any_reg_offset_p = false;
+
+ for (unsigned i = 0; i < data->vgroups.length (); i++)
+ {
+ struct iv_group *group = data->vgroups[i];
+ if (address_p (group->type))
+ {
+ struct iv_use *head_use = group->vuses[0];
+ if (!tree_fits_poly_int64_p (head_use->iv->step))
+ continue;
+
+ bool found = true;
+ poly_int64 step = tree_to_poly_int64 (head_use->iv->step);
+ /* Max extra offset to fill for head of group. */
+ poly_int64 max_increase = (loop->estimated_unroll - 1) * step;
+ /* Check whether this increment still valid. */
+ if (!addr_offset_valid_p (head_use, max_increase))
+ found = false;
+
+ unsigned group_size = group->vuses.length ();
+ /* Check the whole group further. */
+ if (group_size > 1)
+ {
+ /* Only need to check the last one in the group, both the head and
+ the last is valid, the others should be fine. */
+ struct iv_use *last_use = group->vuses[group_size - 1];
+ poly_int64 max_delta
+ = last_use->addr_offset - head_use->addr_offset;
+ poly_int64 max_offset = max_delta + max_increase;
+ if (maybe_ne (max_delta, 0)
+ && !addr_offset_valid_p (head_use, max_offset))
+ found = false;
+ }
+
+ if (found)
+ {
+ group->reg_offset_p = true;
+ any_reg_offset_p = true;
+ }
+ }
+ }
+
+ if (!any_reg_offset_p)
+ data->consider_reg_offset_for_unroll_p = false;
+}
+
/* Finds uses of the induction variables that are interesting. */
static void
@@ -2762,6 +2829,9 @@ find_interesting_uses (struct ivopts_data *data)
split_address_groups (data);
+ if (data->consider_reg_offset_for_unroll_p)
+ mark_reg_offset_groups (data);
+
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "\n<IV Groups>:\n");
@@ -3147,6 +3217,7 @@ add_candidate_1 (struct ivopts_data *data, tree base,
tree step, bool important,
cand->important = important;
cand->incremented_at = incremented_at;
cand->doloop_p = doloop;
+ cand->reg_offset_p = false;
data->vcands.safe_push (cand);
if (!poly_int_tree_p (step))
@@ -3183,7 +3254,11 @@ add_candidate_1 (struct ivopts_data *data, tree base,
tree step, bool important,
/* Relate candidate to the group for which it is added. */
if (use)
- bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
+ {
+ bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
+ if (data->vgroups[use->group_id]->reg_offset_p)
+ cand->reg_offset_p = true;
+ }
return cand;
}
@@ -3654,6 +3729,14 @@ set_group_iv_cost (struct ivopts_data *data,
return;
}
+ /* Since we priced more on non reg_offset IV cand step cost, we should scale
+ up the appropriate IV group costs. Simply consider USE_COMPARE at the
+ loop exit, FIXME if multiple exits supported or no loop exit comparisons
+ matter. */
+ if (data->consider_reg_offset_for_unroll_p
+ && group->vuses[0]->type != USE_COMPARE)
+ cost *= (HOST_WIDE_INT) data->current_loop->estimated_unroll;
+
if (data->consider_all_candidates)
{
group->cost_map[cand->id].cand = cand;
@@ -5890,6 +5973,10 @@ determine_iv_cost (struct ivopts_data *data, struct
iv_cand *cand)
cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
cost = cost_step + adjust_setup_cost (data, cost_base.cost);
+ /* Consider additional step updates during unrolling. */
+ if (data->consider_reg_offset_for_unroll_p && !cand->reg_offset_p)
+ cost += (data->current_loop->estimated_unroll - 1) * cost_step;
+
/* Prefer the original ivs unless we may gain something by replacing it.
The reason is to make debugging simpler; so this is not relevant for
artificial ivs created by other optimization passes. */
@@ -7976,6 +8063,7 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data,
class loop *loop,
data->current_loop = loop;
data->loop_loc = find_loop_location (loop).get_location_t ();
data->speed = optimize_loop_for_speed_p (loop);
+ data->consider_reg_offset_for_unroll_p = false;
if (dump_file && (dump_flags & TDF_DETAILS))
{
@@ -8008,6 +8096,16 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data,
class loop *loop,
if (!find_induction_variables (data))
goto finish;
+ if (param_iv_consider_reg_offset_for_unroll != 0 && exit)
+ {
+ tree_niter_desc *desc = niter_for_exit (data, exit);
+ estimate_unroll_factor (loop, desc);
+ data->consider_reg_offset_for_unroll_p = loop->estimated_unroll > 1;
+ if (dump_file && (dump_flags & TDF_DETAILS)
+ && data->consider_reg_offset_for_unroll_p)
+ fprintf (dump_file, "estimated_unroll:%u\n", loop->estimated_unroll);
+ }
+
/* Finds interesting uses (item 1). */
find_interesting_uses (data);
if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)