On 30/11/2023 08:27, Di Zhao OS wrote:
This patch modifies tunings for ampere1/ampere1a/ampere1b, to:

1. Allow reassociation on FP additions.
2. Avoid generating loop-dependant FMA chains. Added a tuning
option for this.

Bootstrapped and tested. Is this ok for trunk?

Thanks,
Di Zhao

gcc/ChangeLog:

         * config/aarch64/aarch64-tuning-flags.def 
(AARCH64_EXTRA_TUNING_OPTION):
         New tuing option to avoid cross-loop FMA.

typo: tuning

         * config/aarch64/aarch64.cc (aarch64_override_options_internal): Set
         param_avoid_fma_max_bits according to tuning option.
         * config/aarch64/tuning_models/ampere1.h: Modify tunings related with
         FMA.
         * config/aarch64/tuning_models/ampere1a.h: Modify tunings related with
         FMA.
         * config/aarch64/tuning_models/ampere1b.h: Modify tunings related with
         FMA.


You need to mention the name of the structure you're modifying. Also we usually just write 'Likewise.' if the change is identical in effect to the change immediately above, so

        * config/aarch64/tuning_models/ampere1.h (ampere1_tunings):
        Modify tunings related with FMA.
        * config/aarch64/tuning_models/ampere1a.h (ampere1a_tunings):
        Likewise.
        * config/aarch64/tuning_models/ampere1b.h (ampere1b_tunings):
        Likewise.

Finally, watch your line length. The total length of the line should not go beyond column 72 in commit log entries, unless that involves breaking a single word on a line.

Otherwise, this is OK.

R.

---
  gcc/config/aarch64/aarch64-tuning-flags.def | 2 ++
  gcc/config/aarch64/aarch64.cc               | 6 ++++++
  gcc/config/aarch64/tuning_models/ampere1.h  | 2 +-
  gcc/config/aarch64/tuning_models/ampere1a.h | 4 ++--
  gcc/config/aarch64/tuning_models/ampere1b.h | 5 +++--
  5 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def 
b/gcc/config/aarch64/aarch64-tuning-flags.def
index 774568e9106..f28a73839a6 100644
--- a/gcc/config/aarch64/aarch64-tuning-flags.def
+++ b/gcc/config/aarch64/aarch64-tuning-flags.def
@@ -47,4 +47,6 @@ AARCH64_EXTRA_TUNING_OPTION ("use_new_vector_costs", 
USE_NEW_VECTOR_COSTS)
AARCH64_EXTRA_TUNING_OPTION ("matched_vector_throughput", MATCHED_VECTOR_THROUGHPUT) +AARCH64_EXTRA_TUNING_OPTION ("avoid_cross_loop_fma", AVOID_CROSS_LOOP_FMA)
+
  #undef AARCH64_EXTRA_TUNING_OPTION
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 64684258b7b..28bc70a787f 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -16083,6 +16083,12 @@ aarch64_override_options_internal (struct gcc_options 
*opts)
        && opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level)
      opts->x_flag_prefetch_loop_arrays = 1;
+ /* Avoid loop-dependant FMA chains. */
+  if (aarch64_tune_params.extra_tuning_flags
+      & AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA)
+    SET_OPTION_IF_UNSET (opts, &global_options_set, param_avoid_fma_max_bits,
+                        512);
+
    aarch64_override_options_after_change_1 (opts);
  }
diff --git a/gcc/config/aarch64/tuning_models/ampere1.h b/gcc/config/aarch64/tuning_models/ampere1.h
index 8d2a1c69610..a144e8f94b3 100644
--- a/gcc/config/aarch64/tuning_models/ampere1.h
+++ b/gcc/config/aarch64/tuning_models/ampere1.h
@@ -104,7 +104,7 @@ static const struct tune_params ampere1_tunings =
    2,  /* min_div_recip_mul_df.  */
    0,  /* max_case_values.  */
    tune_params::AUTOPREFETCHER_WEAK,   /* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_NONE),   /* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA),   /* tune_flags.  */
    &ampere1_prefetch_tune,
    AARCH64_LDP_STP_POLICY_ALIGNED,   /* ldp_policy_model.  */
    AARCH64_LDP_STP_POLICY_ALIGNED    /* stp_policy_model.  */
diff --git a/gcc/config/aarch64/tuning_models/ampere1a.h 
b/gcc/config/aarch64/tuning_models/ampere1a.h
index c419ffb3c1a..f688ed08a79 100644
--- a/gcc/config/aarch64/tuning_models/ampere1a.h
+++ b/gcc/config/aarch64/tuning_models/ampere1a.h
@@ -50,13 +50,13 @@ static const struct tune_params ampere1a_tunings =
    "32:16",  /* loop_align.  */
    2,  /* int_reassoc_width.  */
    4,  /* fp_reassoc_width.  */
-  1,   /* fma_reassoc_width.  */
+  4,   /* fma_reassoc_width.  */
    2,  /* vec_reassoc_width.  */
    2,  /* min_div_recip_mul_sf.  */
    2,  /* min_div_recip_mul_df.  */
    0,  /* max_case_values.  */
    tune_params::AUTOPREFETCHER_WEAK,   /* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_NONE),   /* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA),   /* tune_flags.  */
    &ampere1_prefetch_tune,
    AARCH64_LDP_STP_POLICY_ALIGNED,   /* ldp_policy_model.  */
    AARCH64_LDP_STP_POLICY_ALIGNED    /* stp_policy_model.  */
diff --git a/gcc/config/aarch64/tuning_models/ampere1b.h 
b/gcc/config/aarch64/tuning_models/ampere1b.h
index c4928f50d29..a98b6a980f7 100644
--- a/gcc/config/aarch64/tuning_models/ampere1b.h
+++ b/gcc/config/aarch64/tuning_models/ampere1b.h
@@ -99,13 +99,14 @@ static const struct tune_params ampere1b_tunings =
    "32:16",  /* loop_align.  */
    2,  /* int_reassoc_width.  */
    4,  /* fp_reassoc_width.  */
-  1,   /* fma_reassoc_width.  */
+  4,   /* fma_reassoc_width.  */
    2,  /* vec_reassoc_width.  */
    2,  /* min_div_recip_mul_sf.  */
    2,  /* min_div_recip_mul_df.  */
    0,  /* max_case_values.  */
    tune_params::AUTOPREFETCHER_STRONG, /* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND),     /* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND |
+   AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA),   /* tune_flags.  */
    &ampere1b_prefetch_tune,
    AARCH64_LDP_STP_POLICY_ALIGNED,   /* ldp_policy_model.  */
    AARCH64_LDP_STP_POLICY_ALIGNED    /* stp_policy_model.  */

Reply via email to