From a882c45b74a9480c9b3f52d731f3ab0abeca8a97 Mon Sep 17 00:00:00 2001
From: Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org>
Date: Thu, 15 Dec 2016 12:50:07 +0000
Subject: [PATCH 4/6] Enable -fprefetch-loop-arrays at -O3 for cores that
 benefit from prefetching.

	* config/aarch64/aarch64.c (aarch64_override_options_internal):
	Set flag_prefetch_loop_arrays if tuning sets prefetching slots > 1.

Change-Id: Id41411e671e0a55dc7268e0ad0a4e8ff1421c90a
---
 gcc/config/aarch64/aarch64-protos.h |  1 +
 gcc/config/aarch64/aarch64.c        | 25 +++++++++++++++++++------
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index edca3e2..27725e2 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -231,6 +231,7 @@ struct tune_params
       int l1_cache_size;
       int l1_cache_line_size;
       int l2_cache_size;
+      int default_opt_level;
     } prefetch;
 /* An enum specifying how to take into account CPU autoprefetch capabilities
    during instruction scheduling:
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index a87b216..35db3f2 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -526,13 +526,15 @@ static const cpu_approx_modes xgene1_approx_modes =
   AARCH64_APPROX_ALL	/* recip_sqrt  */
 };
 
-#define AARCH64_PREFETCH_NOT_BENEFICIAL { 0, -1, -1, -1 }
-#define AARCH64_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size,l2_size) \
+#define AARCH64_PREFETCH_NOT_BENEFICIAL { 0, -1, -1, -1, -1 }
+#define AARCH64_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size,l2_size, \
+				    opt_level)				\
   {									\
     num_slots,								\
     l1_size,								\
     l1_line_size,							\
-    l2_size								\
+    l2_size,								\
+    opt_level								\
   }
 
 static const struct tune_params generic_tunings =
@@ -710,7 +712,7 @@ static const struct tune_params exynosm1_tunings =
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
   48,	/* max_case_values.  */
-  AARCH64_PREFETCH_BENEFICIAL (0, -1, 64, -1),
+  AARCH64_PREFETCH_BENEFICIAL (0, -1, 64, -1, -1),
   tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model.  */
   (AARCH64_EXTRA_TUNE_NONE) /* tune_flags.  */
 };
@@ -786,7 +788,7 @@ static const struct tune_params qdf24xx_tunings =
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
-  AARCH64_PREFETCH_BENEFICIAL (0, -1, 64, -1),
+  AARCH64_PREFETCH_BENEFICIAL (0, -1, 64, -1, -1),
   tune_params::AUTOPREFETCHER_STRONG,	/* autoprefetcher_model.  */
   (AARCH64_EXTRA_TUNE_NONE)		/* tune_flags.  */
 };
@@ -811,7 +813,7 @@ static const struct tune_params thunderx2t99_tunings =
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
-  AARCH64_PREFETCH_BENEFICIAL (0, -1, 64, -1),
+  AARCH64_PREFETCH_BENEFICIAL (0, -1, 64, -1, -1),
   tune_params::AUTOPREFETCHER_OFF,	/* autoprefetcher_model.  */
   (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
 };
@@ -8782,6 +8784,17 @@ aarch64_override_options_internal (struct gcc_options *opts)
 			   opts->x_param_values,
 			   global_options_set.x_param_values);
 
+  /* Enable sw prefetching at specified optimization level for
+     CPUS that have prefetch.  Lower optimization level threshold by 1
+     when profiling is enabled.  */
+  if (opts->x_flag_prefetch_loop_arrays < 0
+      && !opts->x_optimize_size
+      && aarch64_tune_params.prefetch.default_opt_level >= 0
+      && (opts->x_optimize
+	  + (opts->x_flag_profile_use
+	     ? 1 : 0)) >= aarch64_tune_params.prefetch.default_opt_level)
+    opts->x_flag_prefetch_loop_arrays = 1;
+
   aarch64_override_options_after_change_1 (opts);
 }
 
-- 
2.7.4

