On 29/10/14 12:55, Wilco Dijkstra wrote:
> This patch adds the TARGET_SCHED_REASSOCIATION_WIDTH hook. Separate settings
> for integer, floating
> point and vector modes are supported via the CPU tuning parameters. Setting
> the FP reassociation
> width to 4 improves FP performance on SPEC2000 by ~1.3%.
>
> OK for commit?
>
> ChangeLog:
> 2014-10-29 Wilco Dijkstra <[email protected]>
>
> * gcc/config/aarch64/aarch64-protos.h (tune-params):
> Add reasociation tuning parameters.
> * gcc/config/aarch64/aarch64.c (TARGET_SCHED_REASSOCIATION_WIDTH):
> Define. (aarch64_reassociation_width): New function.
> (generic_tunings) Add reassociation tuning parameters.
> (cortexa53_tunings): Likewise.
> (cortexa57_tunings): Likewise.
> (thunderx_tunings): Likewise.
>
If all cores seem to benefit from FP reassociation set to 4, then it
seems odd that 4 is not also the default for generic.
Andrew, you may need to pick a target-specific value for ThunderX; I
think Wilco has just picked something that seems plausible because he
needs to put a real value in there.
What happens if the integer and vector numbers are bumped up? I'd have
thought that integer numbers >1 would be appropriate on all dual-issue
or greater cores.
R.
> ---
> gcc/config/aarch64/aarch64-protos.h | 3 +++
> gcc/config/aarch64/aarch64.c | 34 +++++++++++++++++++++++++++++++---
> 2 files changed, 34 insertions(+), 3 deletions(-)
>
> diff --git a/gcc/config/aarch64/aarch64-protos.h
> b/gcc/config/aarch64/aarch64-protos.h
> index 810644c..9c03f7b 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -170,6 +170,9 @@ struct tune_params
> const struct cpu_vector_cost *const vec_costs;
> const int memmov_cost;
> const int issue_rate;
> + const int int_reassoc_width;
> + const int fp_reassoc_width;
> + const int vec_reassoc_width;
> };
>
> HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index e6cd5eb..4d67722 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -309,7 +309,10 @@ static const struct tune_params generic_tunings =
> &generic_regmove_cost,
> &generic_vector_cost,
> NAMED_PARAM (memmov_cost, 4),
> - NAMED_PARAM (issue_rate, 2)
> + NAMED_PARAM (issue_rate, 2),
> + 1, /* int_reassoc_width. */
> + 1, /* fp_reassoc_width. */
> + 1 /* vec_reassoc_width. */
> };
>
> static const struct tune_params cortexa53_tunings =
> @@ -319,7 +322,10 @@ static const struct tune_params cortexa53_tunings =
> &cortexa53_regmove_cost,
> &generic_vector_cost,
> NAMED_PARAM (memmov_cost, 4),
> - NAMED_PARAM (issue_rate, 2)
> + NAMED_PARAM (issue_rate, 2),
> + 1, /* int_reassoc_width. */
> + 4, /* fp_reassoc_width. */
> + 1 /* vec_reassoc_width. */
> };
>
> static const struct tune_params cortexa57_tunings =
> @@ -329,7 +335,10 @@ static const struct tune_params cortexa57_tunings =
> &cortexa57_regmove_cost,
> &cortexa57_vector_cost,
> NAMED_PARAM (memmov_cost, 4),
> - NAMED_PARAM (issue_rate, 3)
> + NAMED_PARAM (issue_rate, 3),
> + 1, /* int_reassoc_width. */
> + 4, /* fp_reassoc_width. */
> + 1 /* vec_reassoc_width. */
> };
>
> static const struct tune_params thunderx_tunings =
> @@ -340,6 +349,9 @@ static const struct tune_params thunderx_tunings =
> &generic_vector_cost,
> NAMED_PARAM (memmov_cost, 6),
> NAMED_PARAM (issue_rate, 2)
> + 1, /* int_reassoc_width. */
> + 4, /* fp_reassoc_width. */
> + 1 /* vec_reassoc_width. */
> };
>
> /* A processor implementing AArch64. */
> @@ -429,6 +441,19 @@ static const char * const aarch64_condition_codes[] =
> "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
> };
>
> +static int
> +aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED,
> + enum machine_mode mode)
> +{
> + if (VECTOR_MODE_P (mode))
> + return aarch64_tune_params->vec_reassoc_width;
> + if (INTEGRAL_MODE_P (mode))
> + return aarch64_tune_params->int_reassoc_width;
> + if (FLOAT_MODE_P (mode))
> + return aarch64_tune_params->fp_reassoc_width;
> + return 1;
> +}
> +
> /* Provide a mapping from gcc register numbers to dwarf register numbers. */
> unsigned
> aarch64_dbx_register_number (unsigned regno)
> @@ -10147,6 +10172,9 @@ aarch64_asan_shadow_offset (void)
> #undef TARGET_PREFERRED_RELOAD_CLASS
> #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
>
> +#undef TARGET_SCHED_REASSOCIATION_WIDTH
> +#define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
> +
> #undef TARGET_SECONDARY_RELOAD
> #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
>
>