On Mon, Feb 3, 2025 at 7:23 AM H.J. Lu <[email protected]> wrote:
>
> commit 3b9b8d6cfdf59337f4b7ce10ce92a98044b2657b
> Author: Surya Kumari Jangala <[email protected]>
> Date: Tue Jun 25 08:37:49 2024 -0500
>
> ira: Scale save/restore costs of callee save registers with block
> frequency
>
> scales the cost of saving/restoring a callee-save hard register in epilogue
> and prologue with the entry block frequency, which, if not optimizing for
> size, is 10000, for all targets. As the result, callee-saved registers
> may not be used to preserve local variable values across calls on some
> targets, like x86. Add a target hook for the callee-saved register cost
> scale in epilogue and prologue used by IRA. The default version of this
> target hook returns 1 if optimizing for size, otherwise returns the entry
> block frequency. Add an x86 version of this target hook to restore the
> old behavior prior to the above commit.
>
> PR rtl-optimization/111673
> PR rtl-optimization/115932
> PR rtl-optimization/116028
> PR rtl-optimization/117081
> PR rtl-optimization/117082
> PR rtl-optimization/118497
> * ira-color.cc (assign_hard_reg): Call the target hook for the
> callee-saved register cost scale in epilogue and prologue.
> * target.def (ira_callee_saved_register_cost_scale): New target
> hook.
> * targhooks.cc (default_ira_callee_saved_register_cost_scale):
> New.
> * targhooks.h (default_ira_callee_saved_register_cost_scale):
> Likewise.
> * config/i386/i386.cc (ix86_ira_callee_saved_register_cost_scale):
> New.
> (TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE): Likewise.
> * doc/tm.texi: Regenerated.
> * doc/tm.texi.in (TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE):
> New.
>
> Signed-off-by: H.J. Lu <[email protected]>
> ---
> gcc/config/i386/i386.cc | 11 +++++++++++
> gcc/doc/tm.texi | 8 ++++++++
> gcc/doc/tm.texi.in | 2 ++
> gcc/ira-color.cc | 3 +--
> gcc/target.def | 12 ++++++++++++
> gcc/targhooks.cc | 8 ++++++++
> gcc/targhooks.h | 1 +
> 7 files changed, 43 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index f89201684a8..3128973ba79 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -20600,6 +20600,14 @@ ix86_class_likely_spilled_p (reg_class_t rclass)
> return false;
> }
>
> +/* Implement TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE. */
> +
> +static int
> +ix86_ira_callee_saved_register_cost_scale (int)
> +{
> + return 1;
> +}
> +
> /* Return true if a set of DST by the expression SRC should be allowed.
> This prevents complex sets of likely_spilled hard regs before split1. */
>
> @@ -27078,6 +27086,9 @@ ix86_libgcc_floating_mode_supported_p
> #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
> ix86_preferred_output_reload_class
> #undef TARGET_CLASS_LIKELY_SPILLED_P
> #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
> +#undef TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE
> +#define TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE \
> + ix86_ira_callee_saved_register_cost_scale
>
> #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
> #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
> index 0de24eda6f0..9f42913a4ef 100644
> --- a/gcc/doc/tm.texi
> +++ b/gcc/doc/tm.texi
> @@ -3047,6 +3047,14 @@ A target hook which can change allocno class for given
> pseudo from
> The default version of this target hook always returns given class.
> @end deftypefn
>
> +@deftypefn {Target Hook} int TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE
> (int @var{hard_regno})
> +A target hook which returns the callee-saved register @var{hard_regno}
> +cost scale in epilogue and prologue used by IRA.
> +
> +The default version of this target hook returns 1 if optimizing for
> +size, otherwise returns the entry block frequency.
> +@end deftypefn
> +
> @deftypefn {Target Hook} bool TARGET_LRA_P (void)
> A target hook which returns true if we use LRA instead of reload pass.
>
> diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
> index 631d04131e3..6dbe22581ca 100644
> --- a/gcc/doc/tm.texi.in
> +++ b/gcc/doc/tm.texi.in
> @@ -2388,6 +2388,8 @@ in the reload pass.
>
> @hook TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
>
> +@hook TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE
> +
> @hook TARGET_LRA_P
>
> @hook TARGET_REGISTER_PRIORITY
> diff --git a/gcc/ira-color.cc b/gcc/ira-color.cc
> index 0699b349a1a..233060e1587 100644
> --- a/gcc/ira-color.cc
> +++ b/gcc/ira-color.cc
> @@ -2180,8 +2180,7 @@ assign_hard_reg (ira_allocno_t a, bool retry_p)
> + ira_memory_move_cost[mode][rclass][1])
> * saved_nregs / hard_regno_nregs (hard_regno,
> mode) - 1)
> - * (optimize_size ? 1 :
> - REG_FREQ_FROM_BB (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
> + * targetm.ira_callee_saved_register_cost_scale
> (hard_regno);
> cost += add_cost;
> full_cost += add_cost;
> }
> diff --git a/gcc/target.def b/gcc/target.def
> index 4050b2ebdd4..c348b15815a 100644
> --- a/gcc/target.def
> +++ b/gcc/target.def
> @@ -5714,6 +5714,18 @@ DEFHOOK
> reg_class_t, (int, reg_class_t, reg_class_t),
> default_ira_change_pseudo_allocno_class)
>
> +/* Scale of callee-saved register cost in epilogue and prologue used by
> + IRA. */
> +DEFHOOK
> +(ira_callee_saved_register_cost_scale,
> + "A target hook which returns the callee-saved register @var{hard_regno}\n\
> +cost scale in epilogue and prologue used by IRA.\n\
> +\n\
> +The default version of this target hook returns 1 if optimizing for\n\
> +size, otherwise returns the entry block frequency.",
> + int, (int hard_regno),
> + default_ira_callee_saved_register_cost_scale)
> +
> /* Return true if we use LRA instead of reload. */
> DEFHOOK
> (lra_p,
> diff --git a/gcc/targhooks.cc b/gcc/targhooks.cc
> index f80dc8b2e7e..344075efa41 100644
> --- a/gcc/targhooks.cc
> +++ b/gcc/targhooks.cc
> @@ -1305,6 +1305,14 @@ default_ira_change_pseudo_allocno_class (int regno
> ATTRIBUTE_UNUSED,
> return cl;
> }
>
> +int
> +default_ira_callee_saved_register_cost_scale (int)
> +{
> + return (optimize_size
> + ? 1
Why use optimize_size when REG_FREQ_FROM_BB uses
((optimize_function_for_size_p (cfun) \
|| !cfun->cfg->count_max.initialized_p ())
? You are changing behavior for all targets this way? Did you consider
altering REG_FREQ_FROM_BB to
#define REG_FREQ_FROM_BB(bb, size_value)
((optimize_function_for_size_p (cfun) \
|| !cfun->cfg->count_max.initialized_p ()) \
? default \
: ((bb)->count.to_frequency (cfun) \
* REG_FREQ_MAX / BB_FREQ_MAX) \
? ((bb)->count.to_frequency (cfun) \
* REG_FREQ_MAX / BB_FREQ_MAX) \
: 1)
so return a specified value for the optimize_size case? (see the
other thread where
I wonder about that odd !cfun->cfg->count_max.initialized_p () check)
IMO at this point a new target hook should preserve existing behavior by default
or alternatively the original patch should be reverted as causing
regressions and
a new patch introducing the target hook should be installed in next stage1.
Richard.
> + : REG_FREQ_FROM_BB (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
> +}
> +
> extern bool
> default_lra_p (void)
> {
> diff --git a/gcc/targhooks.h b/gcc/targhooks.h
> index 7d15f632c23..8871e01430c 100644
> --- a/gcc/targhooks.h
> +++ b/gcc/targhooks.h
> @@ -173,6 +173,7 @@ extern void default_emit_call_builtin___clear_cache (rtx,
> rtx);
> extern poly_int64 default_return_pops_args (tree, tree, poly_int64);
> extern reg_class_t default_ira_change_pseudo_allocno_class (int, reg_class_t,
> reg_class_t);
> +extern int default_ira_callee_saved_register_cost_scale (int);
> extern bool default_lra_p (void);
> extern int default_register_priority (int);
> extern bool default_register_usage_leveling_p (void);
> --
> 2.48.1
>