Hi Juzhe,
general remark upfront: Please add function-level comments for all
functions. This makes reading and reviewing much easier. I had to sweep
back and forth quite a bit.
> +
> +static int
> +get_last_live_range (const vec<var_live_range> &live_ranges, tree var)
> +{
> + unsigned int ix;
> + var_live_range *live_range;
> + FOR_EACH_VEC_ELT_REVERSE (live_ranges, ix, live_range)
> + if (live_range->var == var)
> + return ix;
> + return -1;
> +}
>From reading the usage site of this function it looks like we could benefit
from having the live ranges be a hash_map as well? That way we wouldn't
need to scan through the list every time. Something like
hash_map<tree, pair<int, int>>. It looks like we only consider the range
end anyway.
> + int index = get_last_live_range (live_ranges, var);
That way we could avoid some worst-case behavior here for pathological
inputs.
> + if (index == -1)
> + {
> + var_live_range range = {var, 0, point};
> + live_ranges.safe_push (range);
> + }
Please add a comment that we assume the variable is live from the start
of this block.
> + else
> + live_ranges[index].end = point;
And here a comment that we will grow the live range for each use.
> +static bool
> +live_range_conflict_p (const var_live_range &live_range1,
> + const var_live_range &live_range2)
> +{
> + if (live_range1.start >= live_range2.end)
> + return false;
> + if (live_range1.end <= live_range2.start)
> + return false;
> + if (live_range2.start >= live_range1.end)
> + return false;
> + if (live_range2.end <= live_range1.start)
> + return false;
> + return true;
> +}
Rename to live_range_overlap_p and simplify to
return a.end >= b.start || b.end >= a.start;
> +
> +static unsigned int
> +max_number_of_live_regs (const basic_block bb,
> + const vec<var_live_range> &live_ranges,
> + machine_mode biggest_mode, int lmul)
> +{
> + unsigned int max_nregs = 0;
> + unsigned int i, j, k;
> + unsigned int live_point = 0;
> + for (i = 0; i < live_ranges.length (); i++)
> + {
> + auto_vec<var_live_range> conflict_live_ranges;
> + var_live_range live_range = live_ranges[i];
> + conflict_live_ranges.safe_push (live_range);
> + unsigned int min_point = live_range.start;
> + unsigned int max_point = live_range.end;
> + for (j = 0; j < live_ranges.length (); j++)
> + {
> + if (j == i)
> + continue;
> + if (live_range_conflict_p (live_range, live_ranges[j]))
> + {
> + conflict_live_ranges.safe_push (live_ranges[j]);
> + min_point
> + = std::min (min_point, (unsigned int) live_ranges[j].start);
> + max_point
> + = std::max (max_point, (unsigned int) live_ranges[j].end);
> + }
> + }
> + for (j = min_point; j <= max_point; j++)
> + {
> + unsigned int nregs = 0;
> + for (k = 0; k < conflict_live_ranges.length (); k++)
> + {
> + if (j >= (unsigned int) conflict_live_ranges[k].start
> + && j <= (unsigned int) conflict_live_ranges[k].end)
> + {
> + machine_mode mode
> + = TYPE_MODE (TREE_TYPE (conflict_live_ranges[k].var));
> + nregs += compute_nregs_for_mode (mode, biggest_mode, lmul);
> + }
> + }
> + if (nregs > max_nregs)
> + {
> + max_nregs = nregs;
> + live_point = j;
> + }
> + }
> + }
This looks pretty quadratic in the number of live ranges (or even cubic?).
Can't it be done more efficiently using a sliding-window approach by sorting
the live ranges according to their start point before?
Also std::min/max -> MIN/MAX.
> +
> + /* Collect user explicit RVV type. */
> + hash_set<basic_block> all_preds = get_all_predecessors (bb);
> + hash_set<basic_block> all_succs = get_all_successors (bb);
As mentioned before, maybe dominator info could help here?
> + for (i = 0; i < cfun->gimple_df->ssa_names->length (); i++)
> + {
> + tree t = ssa_name (i);
> + if (!t)
> + continue;
> + machine_mode mode = TYPE_MODE (TREE_TYPE (t));
> + if (!lookup_vector_type_attribute (TREE_TYPE (t))
> + && !riscv_v_ext_vls_mode_p (mode))
> + continue;
> +
> + gimple *def = SSA_NAME_DEF_STMT (t);
> + if (gimple_bb (def) && !all_preds.contains (gimple_bb (def)))
> + continue;
> + const ssa_use_operand_t *const head = &(SSA_NAME_IMM_USE_NODE (t));
> + const ssa_use_operand_t *ptr;
> +
> + for (ptr = head->next; ptr != head; ptr = ptr->next)
> + {
> + if (USE_STMT (ptr) && !is_gimple_debug (USE_STMT (ptr)))
> + {
> + if (all_succs.contains (gimple_bb (USE_STMT (ptr))))
> + {
Reverse the conditions and continue, i.e. if (!USE_STMT || is_gimple_debug
|| !all_succs.contains).
> +
> +static int
> +compute_lmul (class loop *loop)
> +{
> + unsigned int current_lmul = loop_autovec_infos.get (loop)->current_lmul;
> + return current_lmul;
> +}
return loop_autovec_infos.get (loop)->current_lmul;
Besides, the function does not compute so maybe change its name?
And assert that we have loop_autovec_infos for that loop.
Or maybe inline it altogether.
> +bool
> +costs::prefer_new_lmul_p (const vector_costs *uncast_other) const
> +{
I don't remember how often the cost hooks are actually called so maybe
this is unnecessary: Does it make sense to cache the computation results
here in case we are called more than once for the same loop?
Regarding tests: I would like to have at least one test with global
variables that overlap local ones.
Regards
Robin