On Tue, 13 Jul 2021, Jiufu Guo wrote:
> Major changes from v1:
> * Add target hook to query preferred doloop mode.
> * Recompute doloop iv base from niter under preferred mode.
>
> Currently, doloop.xx variable is using the type as niter which may shorter
> than word size. For some cases, it would be better to use word size type.
> For example, on 64bit system, to access 32bit value, subreg maybe used.
> Then using 64bit type maybe better for niter if it can be present in
> both 32bit and 64bit.
>
> This patch add target hook for querg perferred mode for doloop iv.
> And update doloop iv mode accordingly.
>
> Bootstrap and regtest pass on powerpc64le, is this ok for trunk?
>
> BR.
> Jiufu
>
> gcc/ChangeLog:
>
> 2021-07-13 Jiufu Guo <[email protected]>
>
> PR target/61837
> * config/rs6000/rs6000.c (TARGET_PREFERRED_DOLOOP_MODE): New hook.
> (rs6000_preferred_doloop_mode): New hook.
> * doc/tm.texi: Regenerated.
> * doc/tm.texi.in: Add hook preferred_doloop_mode.
> * target.def (preferred_doloop_mode): New hook.
> * targhooks.c (default_preferred_doloop_mode): New hook.
> * targhooks.h (default_preferred_doloop_mode): New hook.
> * tree-ssa-loop-ivopts.c (compute_doloop_base_on_mode): New function.
> (add_iv_candidate_for_doloop): Call targetm.preferred_doloop_mode
> and compute_doloop_base_on_mode.
>
> gcc/testsuite/ChangeLog:
>
> 2021-07-13 Jiufu Guo <[email protected]>
>
> PR target/61837
> * gcc.target/powerpc/pr61837.c: New test.
> ---
> gcc/config/rs6000/rs6000.c | 9 +++
> gcc/doc/tm.texi | 4 ++
> gcc/doc/tm.texi.in | 2 +
> gcc/target.def | 7 +++
> gcc/targhooks.c | 8 +++
> gcc/targhooks.h | 2 +
> gcc/testsuite/gcc.target/powerpc/pr61837.c | 16 ++++++
> gcc/tree-ssa-loop-ivopts.c | 66 +++++++++++++++++++++-
> 8 files changed, 112 insertions(+), 2 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/powerpc/pr61837.c
>
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 9a5db63d0ef..444f3c49288 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -1700,6 +1700,9 @@ static const struct attribute_spec
> rs6000_attribute_table[] =
> #undef TARGET_DOLOOP_COST_FOR_ADDRESS
> #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
>
> +#undef TARGET_PREFERRED_DOLOOP_MODE
> +#define TARGET_PREFERRED_DOLOOP_MODE rs6000_preferred_doloop_mode
> +
> #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
> #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
>
> @@ -27867,6 +27870,12 @@ rs6000_predict_doloop_p (struct loop *loop)
> return true;
> }
>
> +static machine_mode
> +rs6000_preferred_doloop_mode (machine_mode)
> +{
> + return word_mode;
> +}
> +
> /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
>
> static bool
> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
> index 2a41ae5fba1..3f5881220f8 100644
> --- a/gcc/doc/tm.texi
> +++ b/gcc/doc/tm.texi
> @@ -11984,6 +11984,10 @@ By default, the RTL loop optimizer does not use a
> present doloop pattern for
> loops containing function calls or branch on table instructions.
> @end deftypefn
>
> +@deftypefn {Target Hook} machine_mode TARGET_PREFERRED_DOLOOP_MODE
> (machine_mode @var{mode})
> +This hook returns a more preferred mode or the @var{mode} itself.
> +@end deftypefn
> +
> @deftypefn {Target Hook} bool TARGET_LEGITIMATE_COMBINED_INSN (rtx_insn
> *@var{insn})
> Take an instruction in @var{insn} and return @code{false} if the instruction
> is not appropriate as a combination of two or more instructions. The
> diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
> index f881cdabe9e..38215149a92 100644
> --- a/gcc/doc/tm.texi.in
> +++ b/gcc/doc/tm.texi.in
> @@ -7917,6 +7917,8 @@ to by @var{ce_info}.
>
> @hook TARGET_INVALID_WITHIN_DOLOOP
>
> +@hook TARGET_PREFERRED_DOLOOP_MODE
> +
> @hook TARGET_LEGITIMATE_COMBINED_INSN
>
> @hook TARGET_CAN_FOLLOW_JUMP
> diff --git a/gcc/target.def b/gcc/target.def
> index c009671c583..91a96150e50 100644
> --- a/gcc/target.def
> +++ b/gcc/target.def
> @@ -4454,6 +4454,13 @@ loops containing function calls or branch on table
> instructions.",
> const char *, (const rtx_insn *insn),
> default_invalid_within_doloop)
>
> +DEFHOOK
> +(preferred_doloop_mode,
> + "This hook returns a more preferred mode or the @var{mode} itself.",
> + machine_mode,
> + (machine_mode mode),
> + default_preferred_doloop_mode)
> +
> /* Returns true for a legitimate combined insn. */
> DEFHOOK
> (legitimate_combined_insn,
> diff --git a/gcc/targhooks.c b/gcc/targhooks.c
> index 44a1facedcf..eb5190910dc 100644
> --- a/gcc/targhooks.c
> +++ b/gcc/targhooks.c
> @@ -660,6 +660,14 @@ default_predict_doloop_p (class loop *loop
> ATTRIBUTE_UNUSED)
> return false;
> }
>
> +/* By default, just use the input MODE itself. */
> +
> +machine_mode
> +default_preferred_doloop_mode (machine_mode mode)
> +{
> + return mode;
> +}
> +
> /* NULL if INSN insn is valid within a low-overhead loop, otherwise returns
> an error message.
>
> diff --git a/gcc/targhooks.h b/gcc/targhooks.h
> index f70a307d26c..f0560b6ae34 100644
> --- a/gcc/targhooks.h
> +++ b/gcc/targhooks.h
> @@ -88,6 +88,8 @@ extern bool default_fixed_point_supported_p (void);
> extern bool default_has_ifunc_p (void);
>
> extern bool default_predict_doloop_p (class loop *);
> +extern machine_mode
> +default_preferred_doloop_mode (machine_mode);
> extern const char * default_invalid_within_doloop (const rtx_insn *);
>
> extern tree default_builtin_vectorized_function (unsigned int, tree, tree);
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr61837.c
> b/gcc/testsuite/gcc.target/powerpc/pr61837.c
> new file mode 100644
> index 00000000000..dc44eb9cb41
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr61837.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +void foo(int *p1, long *p2, int s)
> +{
> + int n, v, i;
> +
> + v = 0;
> + for (n = 0; n <= 100; n++) {
> + for (i = 0; i < s; i++)
> + if (p2[i] == n)
> + p1[i] = v;
> + v += 88;
> + }
> +}
> +
> +/* { dg-final { scan-assembler-not {\mrldicl\M} } } */
> diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
> index 12a8a49a307..92767a09b6c 100644
> --- a/gcc/tree-ssa-loop-ivopts.c
> +++ b/gcc/tree-ssa-loop-ivopts.c
> @@ -5657,6 +5657,56 @@ relate_compare_use_with_all_cands (struct ivopts_data
> *data)
> }
> }
>
> +/* If PREFERRED_MODE is suitable and profitable, use the preferred
> + PREFERRED_MODE to compute doloop iv base from niter: base = niter + 1. */
> +
> +static tree
> +compute_doloop_base_on_mode (machine_mode preferred_mode, tree niter,
> + const widest_int &iterations_max)
> +{
> + tree ntype = TREE_TYPE (niter);
> + tree pref_type = lang_hooks.types.type_for_mode (preferred_mode, 1);
It's unlikely but type_for_mode can return NULL, so you shouldn't
assert but fall back to using the original type. The assert for
TYPE_UNSIGNED is superfluous - you asked for an unsigned type already.
Otherwise the IVOPTs changes and the new target hook look OK, please
address Seghers comments though.
Thanks,
Richard.
> + gcc_assert (pref_type && TYPE_UNSIGNED (ntype));
> +
> + int prec = TYPE_PRECISION (ntype);
> + int pref_prec = TYPE_PRECISION (pref_type);
> +
> + tree base;
> +
> + /* Check if the PREFERRED_MODED is able to present niter. */
> + if (pref_prec > prec
> + || wi::ltu_p (iterations_max,
> + widest_int::from (wi::max_value (pref_prec, UNSIGNED),
> + UNSIGNED)))
> + {
> + /* No wrap, it is safe to use preferred type after niter + 1. */
> + if (wi::ltu_p (iterations_max,
> + widest_int::from (wi::max_value (prec, UNSIGNED),
> + UNSIGNED)))
> + {
> + /* This could help to optimize "-1 +1" pair when niter looks
> + like "n-1": n is in original mode. "base = (n - 1) + 1"
> + in PREFERRED_MODED: it could be base = (PREFERRED_TYPE)n. */
> + base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
> + build_int_cst (ntype, 1));
> + base = fold_convert (pref_type, base);
> + }
> +
> + /* To avoid wrap, need fold niter to preferred type before plus 1. */
> + else
> + {
> + niter = fold_convert (pref_type, niter);
> + base = fold_build2 (PLUS_EXPR, pref_type, unshare_expr (niter),
> + build_int_cst (pref_type, 1));
> + }
> + }
> + else
> + base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
> + build_int_cst (ntype, 1));
> + return base;
> +}
> +
> /* Add one doloop dedicated IV candidate:
> - Base is (may_be_zero ? 1 : (niter + 1)).
> - Step is -1. */
> @@ -5688,8 +5738,20 @@ add_iv_candidate_for_doloop (struct ivopts_data *data)
> return;
> }
>
> - tree base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
> - build_int_cst (ntype, 1));
> + machine_mode mode = TYPE_MODE (ntype);
> + machine_mode pref_mode = targetm.preferred_doloop_mode (mode);
> +
> + tree base;
> + if (mode != pref_mode)
> + {
> + base = compute_doloop_base_on_mode (pref_mode, niter, niter_desc->max);
> + ntype = TREE_TYPE (base);
> + }
> + else
> + base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
> + build_int_cst (ntype, 1));
> +
> +
> add_candidate (data, base, build_int_cst (ntype, -1), true, NULL, NULL,
> true);
> }
>
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)