Hi Wilco, > On 14 Nov 2024, at 18:44, Wilco Dijkstra <wilco.dijks...@arm.com> wrote: > > > Add AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS and > AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT > to the baseline tuning since all modern cores use it. Fix the neoverse512tvb > tuning to be > like Neoverse V1/V2. >
This would make USE_NEW_VECTOR_COSTS effectively the default. Jennifer has been trying to do that as well and then to remove it (as it would be always true) but there are some codegen regressions that still need to be addressed. See the threads “[RFC][PATCH] AArch64: Remove AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS” from October and September. Do those regressions go away if you also specify AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT at the same time? Thanks, Kyrill > gcc/ChangeLog: > > * config/aarch64/aarch64-tuning-flags.def (AARCH64_EXTRA_TUNE_BASE): Update. > * config/aarch64/tuning_models/cortexx925.h: Update. > * config/aarch64/tuning_models/fujitsu_monaka.h: Likewise. > * config/aarch64/tuning_models/generic_armv8_a.h: Likewise. > * config/aarch64/tuning_models/generic_armv9_a.h: Likewise. > * config/aarch64/tuning_models/neoverse512tvb.h: Likewise. > * config/aarch64/tuning_models/neoversen2.h: Likewise. > * config/aarch64/tuning_models/neoversen3.h: Likewise. > * config/aarch64/tuning_models/neoversev1.h: Likewise. > * config/aarch64/tuning_models/neoversev2.h: Likewise. > * config/aarch64/tuning_models/neoversev3.h: Likewise. > * config/aarch64/tuning_models/neoversev3ae.h: Likewise. > > --- > > diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def > b/gcc/config/aarch64/aarch64-tuning-flags.def > index > 1d8abee1e263706e3930e4d39c59faefef8cfe41..94ab968dcab999300ce4a01be939b3d9d0a7d910 > 100644 > --- a/gcc/config/aarch64/aarch64-tuning-flags.def > +++ b/gcc/config/aarch64/aarch64-tuning-flags.def > @@ -52,6 +52,8 @@ AARCH64_EXTRA_TUNING_OPTION ("avoid_pred_rmw", > AVOID_PRED_RMW) > > /* Baseline tuning settings suitable for all modern cores. */ > #define AARCH64_EXTRA_TUNE_BASE (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND \ > - | AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA) > + | AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA \ > + | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS \ > + | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT) > > #undef AARCH64_EXTRA_TUNING_OPTION > diff --git a/gcc/config/aarch64/tuning_models/cortexx925.h > b/gcc/config/aarch64/tuning_models/cortexx925.h > index > b2ff716157a452f4ff0260c5be8ddc0355e1a9e1..ab7504a367ed0b0f8b0e59f3ad0230b172d94fa0 > 100644 > --- a/gcc/config/aarch64/tuning_models/cortexx925.h > +++ b/gcc/config/aarch64/tuning_models/cortexx925.h > @@ -219,8 +219,6 @@ static const struct tune_params cortexx925_tunings = > tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ > (AARCH64_EXTRA_TUNE_BASE > | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS > - | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS > - | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT > | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */ > &generic_prefetch_tune, > AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */ > diff --git a/gcc/config/aarch64/tuning_models/fujitsu_monaka.h > b/gcc/config/aarch64/tuning_models/fujitsu_monaka.h > index > 2d704ecd1100b5ed04a81c297f4d1508089fa78b..feb512811ee7fdb542c8d41578c40267eab0dea5 > 100644 > --- a/gcc/config/aarch64/tuning_models/fujitsu_monaka.h > +++ b/gcc/config/aarch64/tuning_models/fujitsu_monaka.h > @@ -54,9 +54,7 @@ static const struct tune_params fujitsu_monaka_tunings = > 2, /* min_div_recip_mul_df. */ > 0, /* max_case_values. */ > tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ > - (AARCH64_EXTRA_TUNE_BASE > - | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS > - | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT), /* tune_flags. */ > + (AARCH64_EXTRA_TUNE_BASE), /* tune_flags. */ > &generic_prefetch_tune, > AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */ > AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */ > diff --git a/gcc/config/aarch64/tuning_models/generic_armv8_a.h > b/gcc/config/aarch64/tuning_models/generic_armv8_a.h > index > bdd309ab03d7737a38c2b12b16db669424d43b3a..7529848fe1569944be862fdc267c8c5e7f8512a0 > 100644 > --- a/gcc/config/aarch64/tuning_models/generic_armv8_a.h > +++ b/gcc/config/aarch64/tuning_models/generic_armv8_a.h > @@ -183,8 +183,7 @@ static const struct tune_params generic_armv8_a_tunings = > tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ > (AARCH64_EXTRA_TUNE_BASE > | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS > - | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS > - | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT), /* tune_flags. */ > + | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */ > &generic_prefetch_tune, > AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */ > AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */ > diff --git a/gcc/config/aarch64/tuning_models/generic_armv9_a.h > b/gcc/config/aarch64/tuning_models/generic_armv9_a.h > index > a05a9ab92a27e8f24949aa2ffa5b5512c1487518..1ef8bd43e1efb44137f4fa7a85383e85dbd68725 > 100644 > --- a/gcc/config/aarch64/tuning_models/generic_armv9_a.h > +++ b/gcc/config/aarch64/tuning_models/generic_armv9_a.h > @@ -248,9 +248,7 @@ static const struct tune_params generic_armv9_a_tunings = > 2, /* min_div_recip_mul_df. */ > 0, /* max_case_values. */ > tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ > - (AARCH64_EXTRA_TUNE_BASE > - | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS > - | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT), /* tune_flags. */ > + (AARCH64_EXTRA_TUNE_BASE), /* tune_flags. */ > &generic_armv9a_prefetch_tune, > AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */ > AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */ > diff --git a/gcc/config/aarch64/tuning_models/neoverse512tvb.h > b/gcc/config/aarch64/tuning_models/neoverse512tvb.h > index > c407b89a22f1aecbfd594b493be4fbaf1f9b0437..4186697a7230280e6a912574ea35e42173018ba1 > 100644 > --- a/gcc/config/aarch64/tuning_models/neoverse512tvb.h > +++ b/gcc/config/aarch64/tuning_models/neoverse512tvb.h > @@ -155,9 +155,9 @@ static const struct tune_params neoverse512tvb_tunings = > 2, /* min_div_recip_mul_df. */ > 0, /* max_case_values. */ > tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ > - (AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS > - | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS > - | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT), /* tune_flags. */ > + (AARCH64_EXTRA_TUNE_BASE > + | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS > + | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */ > &generic_prefetch_tune, > AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */ > AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */ > diff --git a/gcc/config/aarch64/tuning_models/neoversen2.h > b/gcc/config/aarch64/tuning_models/neoversen2.h > index > fd5f8f3737054e037428527206101f9bc726116d..494c19649c256b83b1751106f2542f5522be92e6 > 100644 > --- a/gcc/config/aarch64/tuning_models/neoversen2.h > +++ b/gcc/config/aarch64/tuning_models/neoversen2.h > @@ -219,8 +219,6 @@ static const struct tune_params neoversen2_tunings = > tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ > (AARCH64_EXTRA_TUNE_BASE > | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS > - | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS > - | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT > | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */ > &generic_prefetch_tune, > AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */ > diff --git a/gcc/config/aarch64/tuning_models/neoversen3.h > b/gcc/config/aarch64/tuning_models/neoversen3.h > index > 8b156c2fe4d27c5df70446c4eb7d9153e8082268..0eabfe8021a74ba78d3f271c6aff2513a1ca1247 > 100644 > --- a/gcc/config/aarch64/tuning_models/neoversen3.h > +++ b/gcc/config/aarch64/tuning_models/neoversen3.h > @@ -218,9 +218,7 @@ static const struct tune_params neoversen3_tunings = > 0, /* max_case_values. */ > tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ > (AARCH64_EXTRA_TUNE_BASE > - | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS > - | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS > - | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT), /* tune_flags. */ > + | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS), /* tune_flags. */ > &generic_prefetch_tune, > AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */ > AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */ > diff --git a/gcc/config/aarch64/tuning_models/neoversev1.h > b/gcc/config/aarch64/tuning_models/neoversev1.h > index > 23c121d865248c38c2bfef5e34c314207014649b..40e24041cb3da1b9a165cb67759e8254eb853dda > 100644 > --- a/gcc/config/aarch64/tuning_models/neoversev1.h > +++ b/gcc/config/aarch64/tuning_models/neoversev1.h > @@ -228,8 +228,6 @@ static const struct tune_params neoversev1_tunings = > tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ > (AARCH64_EXTRA_TUNE_BASE > | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS > - | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS > - | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT > | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */ > &generic_prefetch_tune, > AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */ > diff --git a/gcc/config/aarch64/tuning_models/neoversev2.h > b/gcc/config/aarch64/tuning_models/neoversev2.h > index > 43baeafd646bafadb739376160eaaf268d0542a8..523dfe9fe56cdff67c17909d92e94f3942888ea6 > 100644 > --- a/gcc/config/aarch64/tuning_models/neoversev2.h > +++ b/gcc/config/aarch64/tuning_models/neoversev2.h > @@ -232,8 +232,6 @@ static const struct tune_params neoversev2_tunings = > tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ > (AARCH64_EXTRA_TUNE_BASE > | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS > - | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS > - | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT > | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */ > &neoversev2_prefetch_tune, > AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */ > diff --git a/gcc/config/aarch64/tuning_models/neoversev3.h > b/gcc/config/aarch64/tuning_models/neoversev3.h > index > d65d74bfecfb69eac6c523870a372fc9d32e8687..34fc69f9e5fddbc8630de6b1da41af6fc1162818 > 100644 > --- a/gcc/config/aarch64/tuning_models/neoversev3.h > +++ b/gcc/config/aarch64/tuning_models/neoversev3.h > @@ -219,8 +219,6 @@ static const struct tune_params neoversev3_tunings = > tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ > (AARCH64_EXTRA_TUNE_BASE > | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS > - | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS > - | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT > | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */ > &generic_prefetch_tune, > AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */ > diff --git a/gcc/config/aarch64/tuning_models/neoversev3ae.h > b/gcc/config/aarch64/tuning_models/neoversev3ae.h > index > 7b7fa0b4b081bb3bd17c8418b8ce9fc598cb19ce..7046ab66eb8c021be3b34934070458241082935f > 100644 > --- a/gcc/config/aarch64/tuning_models/neoversev3ae.h > +++ b/gcc/config/aarch64/tuning_models/neoversev3ae.h > @@ -219,8 +219,6 @@ static const struct tune_params neoversev3ae_tunings = > tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ > (AARCH64_EXTRA_TUNE_BASE > | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS > - | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS > - | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT > | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */ > &generic_prefetch_tune, > AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */ >