On 20/01/15 13:26, Maxim Kuvyrkov wrote: > On Jan 20, 2015, at 1:24 PM, Richard Earnshaw <rearn...@arm.com> wrote: > ... >>>> In general, how should someone tuning the compiler for this parameter >>>> select a value that isn't one of (-1, m_i_q_d+1)? >>> >>> From my experiments it seems there are 4 reasonable values for the >>> parameter: (-1) autopref turned off, (0) turned on in rank_for_schedule, >>> (m_i_q_d+1) turned on everywhere. If there is a static constructor >>> generated for tune tables and it is a problem to have it -- I can shrink >>> acceptable values to these 3 and call it a day. >>> >> >> You only mention 3 values: what was the fourth? > > Typo. No fourth. > >> It might be better then >> to define a set of values that represent each of these cases and only >> allow the tuning parameters to select one of those. The init code then >> uses that set to select how to set up the various parameters to meet >> those goals. >> >> So something like >> >> ARM_SCHED_AUTOPREF_OFF >> ARM_SCHED_AUTOPREF_RANK >> ARM_SCHED_AUTOPREF_FULL > > A patch is attached. I bootstrapped it on arm-linux-gnueabihf. OK to apply? >
OK. Thanks. R. > -- > Maxim Kuvyrkov > www.linaro.org > > > 0001-Use-enum-for-sched_autopref-tune-settings.patch > > > From 9d9ee7c33210960970d0d78ccc7a16a58b392f85 Mon Sep 17 00:00:00 2001 > From: Maxim Kuvyrkov <maxim.kuvyr...@linaro.org> > Date: Tue, 20 Jan 2015 12:30:37 +0000 > Subject: [PATCH 1/3] Use enum for sched_autopref tune settings > > * config/arm/arm-protos.h (enum arm_sched_autopref): New constants. > (struct tune_params): Use the enum. > * arm.c (arm_*_tune): Update. > (arm_option_override): Update. > --- > gcc/config/arm/arm-protos.h | 9 +++++++- > gcc/config/arm/arm.c | 51 > +++++++++++++++++++++++++------------------ > 2 files changed, 38 insertions(+), 22 deletions(-) > > diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h > index 3db7e16..307babb 100644 > --- a/gcc/config/arm/arm-protos.h > +++ b/gcc/config/arm/arm-protos.h > @@ -257,6 +257,13 @@ struct cpu_vec_costs { > > struct cpu_cost_table; > > +enum arm_sched_autopref > + { > + ARM_SCHED_AUTOPREF_OFF, > + ARM_SCHED_AUTOPREF_RANK, > + ARM_SCHED_AUTOPREF_FULL > + }; > + > struct tune_params > { > bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool); > @@ -292,7 +299,7 @@ struct tune_params > /* Bitfield encoding the fuseable pairs of instructions. */ > unsigned int fuseable_ops; > /* Depth of scheduling queue to check for L2 autoprefetcher. */ > - int sched_autopref_queue_depth; > + enum arm_sched_autopref sched_autopref; > }; > > extern const struct tune_params *current_tune; > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > index fddd770..34672ce 100644 > --- a/gcc/config/arm/arm.c > +++ b/gcc/config/arm/arm.c > @@ -1697,7 +1697,7 @@ const struct tune_params arm_slowmul_tune = > false, /* Prefer Neon for stringops. > */ > 8, /* Maximum insns to inline > memset. */ > ARM_FUSE_NOTHING, /* Fuseable pairs of > instructions. */ > - -1 /* Sched L2 autopref depth. */ > + ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ > }; > > const struct tune_params arm_fastmul_tune = > @@ -1718,7 +1718,7 @@ const struct tune_params arm_fastmul_tune = > false, /* Prefer Neon for stringops. > */ > 8, /* Maximum insns to inline > memset. */ > ARM_FUSE_NOTHING, /* Fuseable pairs of > instructions. */ > - -1 /* Sched L2 autopref depth. */ > + ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ > }; > > /* StrongARM has early execution of branches, so a sequence that is worth > @@ -1742,7 +1742,7 @@ const struct tune_params arm_strongarm_tune = > false, /* Prefer Neon for stringops. > */ > 8, /* Maximum insns to inline > memset. */ > ARM_FUSE_NOTHING, /* Fuseable pairs of > instructions. */ > - -1 /* Sched L2 autopref depth. */ > + ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ > }; > > const struct tune_params arm_xscale_tune = > @@ -1763,7 +1763,7 @@ const struct tune_params arm_xscale_tune = > false, /* Prefer Neon for stringops. > */ > 8, /* Maximum insns to inline > memset. */ > ARM_FUSE_NOTHING, /* Fuseable pairs of > instructions. */ > - -1 /* Sched L2 autopref depth. */ > + ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ > }; > > const struct tune_params arm_9e_tune = > @@ -1784,7 +1784,7 @@ const struct tune_params arm_9e_tune = > false, /* Prefer Neon for stringops. > */ > 8, /* Maximum insns to inline > memset. */ > ARM_FUSE_NOTHING, /* Fuseable pairs of > instructions. */ > - -1 /* Sched L2 autopref depth. */ > + ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ > }; > > const struct tune_params arm_v6t2_tune = > @@ -1805,7 +1805,7 @@ const struct tune_params arm_v6t2_tune = > false, /* Prefer Neon for stringops. > */ > 8, /* Maximum insns to inline > memset. */ > ARM_FUSE_NOTHING, /* Fuseable pairs of > instructions. */ > - -1 /* Sched L2 autopref depth. */ > + ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ > }; > > /* Generic Cortex tuning. Use more specific tunings if appropriate. */ > @@ -1827,7 +1827,7 @@ const struct tune_params arm_cortex_tune = > false, /* Prefer Neon for stringops. > */ > 8, /* Maximum insns to inline > memset. */ > ARM_FUSE_NOTHING, /* Fuseable pairs of > instructions. */ > - -1 /* Sched L2 autopref depth. */ > + ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ > }; > > const struct tune_params arm_cortex_a8_tune = > @@ -1848,7 +1848,7 @@ const struct tune_params arm_cortex_a8_tune = > true, /* Prefer Neon for > stringops. */ > 8, /* Maximum insns to inline > memset. */ > ARM_FUSE_NOTHING, /* Fuseable pairs of > instructions. */ > - -1 /* Sched L2 autopref depth. */ > + ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ > }; > > const struct tune_params arm_cortex_a7_tune = > @@ -1869,7 +1869,7 @@ const struct tune_params arm_cortex_a7_tune = > true, /* Prefer Neon for > stringops. */ > 8, /* Maximum insns to inline > memset. */ > ARM_FUSE_NOTHING, /* Fuseable pairs of > instructions. */ > - -1 /* Sched L2 autopref depth. */ > + ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ > }; > > const struct tune_params arm_cortex_a15_tune = > @@ -1890,7 +1890,7 @@ const struct tune_params arm_cortex_a15_tune = > true, /* Prefer Neon for > stringops. */ > 8, /* Maximum insns to inline > memset. */ > ARM_FUSE_NOTHING, /* Fuseable pairs of > instructions. */ > - max_insn_queue_index + 1 /* Sched L2 autopref depth. */ > + ARM_SCHED_AUTOPREF_FULL /* Sched L2 autopref. */ > }; > > const struct tune_params arm_cortex_a53_tune = > @@ -1911,7 +1911,7 @@ const struct tune_params arm_cortex_a53_tune = > false, /* Prefer Neon for stringops. > */ > 8, /* Maximum insns to inline > memset. */ > ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of > instructions. */ > - -1 /* Sched L2 autopref depth. */ > + ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ > }; > > const struct tune_params arm_cortex_a57_tune = > @@ -1932,7 +1932,7 @@ const struct tune_params arm_cortex_a57_tune = > false, /* Prefer Neon for stringops. > */ > 8, /* Maximum insns to inline > memset. */ > ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of > instructions. */ > - max_insn_queue_index + 1 /* Sched L2 autopref depth. */ > + ARM_SCHED_AUTOPREF_FULL /* Sched L2 autopref. */ > }; > > const struct tune_params arm_xgene1_tune = > @@ -1953,7 +1953,7 @@ const struct tune_params arm_xgene1_tune = > false, /* Prefer Neon for stringops. */ > 32, /* Maximum insns to > inline memset. */ > ARM_FUSE_NOTHING, /* Fuseable pairs of > instructions. */ > - -1 /* Sched L2 autopref depth. */ > + ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ > }; > > /* Branches can be dual-issued on Cortex-A5, so conditional execution is > @@ -1977,7 +1977,7 @@ const struct tune_params arm_cortex_a5_tune = > true, /* Prefer Neon for > stringops. */ > 8, /* Maximum insns to inline > memset. */ > ARM_FUSE_NOTHING, /* Fuseable pairs of > instructions. */ > - -1 /* Sched L2 autopref depth. */ > + ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ > }; > > const struct tune_params arm_cortex_a9_tune = > @@ -1998,7 +1998,7 @@ const struct tune_params arm_cortex_a9_tune = > false, /* Prefer Neon for stringops. > */ > 8, /* Maximum insns to inline > memset. */ > ARM_FUSE_NOTHING, /* Fuseable pairs of > instructions. */ > - -1 /* Sched L2 autopref depth. */ > + ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ > }; > > const struct tune_params arm_cortex_a12_tune = > @@ -2019,7 +2019,7 @@ const struct tune_params arm_cortex_a12_tune = > true, /* Prefer Neon for > stringops. */ > 8, /* Maximum insns to inline > memset. */ > ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of > instructions. */ > - -1 /* Sched L2 autopref depth. */ > + ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ > }; > > /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single > @@ -2047,7 +2047,7 @@ const struct tune_params arm_v7m_tune = > false, /* Prefer Neon for stringops. > */ > 8, /* Maximum insns to inline > memset. */ > ARM_FUSE_NOTHING, /* Fuseable pairs of > instructions. */ > - -1 /* Sched L2 autopref depth. */ > + ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ > }; > > /* Cortex-M7 tuning. */ > @@ -2070,7 +2070,7 @@ const struct tune_params arm_cortex_m7_tune = > false, /* Prefer Neon for stringops. > */ > 8, /* Maximum insns to inline > memset. */ > ARM_FUSE_NOTHING, /* Fuseable pairs of > instructions. */ > - -1 /* Sched L2 autopref depth. */ > + ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ > }; > > /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than > @@ -2093,7 +2093,7 @@ const struct tune_params arm_v6m_tune = > false, /* Prefer Neon for stringops. > */ > 8, /* Maximum insns to inline > memset. */ > ARM_FUSE_NOTHING, /* Fuseable pairs of > instructions. */ > - -1 /* Sched L2 autopref depth. */ > + ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ > }; > > const struct tune_params arm_fa726te_tune = > @@ -2114,7 +2114,7 @@ const struct tune_params arm_fa726te_tune = > false, /* Prefer Neon for stringops. > */ > 8, /* Maximum insns to inline > memset. */ > ARM_FUSE_NOTHING, /* Fuseable pairs of > instructions. */ > - -1 /* Sched L2 autopref depth. */ > + ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ > }; > > > @@ -3172,8 +3172,17 @@ arm_option_override (void) > > /* Look through ready list and all of queue for instructions > relevant for L2 auto-prefetcher. */ > + int param_sched_autopref_queue_depth; > + if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_OFF) > + param_sched_autopref_queue_depth = -1; > + else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_RANK) > + param_sched_autopref_queue_depth = 0; > + else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_FULL) > + param_sched_autopref_queue_depth = max_insn_queue_index + 1; > + else > + gcc_unreachable (); > maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH, > - current_tune->sched_autopref_queue_depth, > + param_sched_autopref_queue_depth, > global_options.x_param_values, > global_options_set.x_param_values); > >