On Fri, Oct 10, 2014 at 5:40 PM, Evgeny Stupachenko <evstu...@gmail.com> wrote: > Hi, > > The patch increase PARAM_MAX_COMPLETELY_PEELED_INSNS for CPUs with > high branch cost. > Bootstrap and make check are in progress. > The patch boosts (up to 2,5 times improve) several benchmarks compiled > with "-Ofast" on Silvermont > Spec2000: > +5% gain on 173.applu > +1% gain on 255.vortex > > Is it ok for trunk when pass bootstrap and make check?
This is only a 20% increase - from 100 to 120. I would instead suggest to explore doing this change unconditionally if it helps that much. Richard. > Thanks, > Evgeny > > 2014-10-10 Evgeny Stupachenko <evstu...@gmail.com> > * config/i386/i386.c (ix86_option_override_internal): Increase > PARAM_MAX_COMPLETELY_PEELED_INSNS for CPUs with high branch cost. > * config/i386/i386.h (TARGET_HIGH_BRANCH_COST): New. > * config/i386/x86-tune.def (X86_TUNE_HIGH_BRANCH_COST): Indicates > CPUs with high branch cost. > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > index 6337aa5..5ac10eb 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -4081,6 +4081,14 @@ ix86_option_override_internal (bool main_args_p, > opts->x_param_values, > opts_set->x_param_values); > > + /* Extend full peel max insns parameter for CPUs with high branch cost. */ > + if (TARGET_HIGH_BRANCH_COST) > + maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, > + 120, > + opts->x_param_values, > + opts_set->x_param_values); > + > + > /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */ > if (opts->x_flag_prefetch_loop_arrays < 0 > && HAVE_prefetch > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > index 2c64162..da0c57b 100644 > --- a/gcc/config/i386/i386.h > +++ b/gcc/config/i386/i386.h > @@ -415,6 +415,7 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; > #define TARGET_INTER_UNIT_CONVERSIONS \ > ix86_tune_features[X86_TUNE_INTER_UNIT_CONVERSIONS] > #define TARGET_FOUR_JUMP_LIMIT ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT] > +#define TARGET_HIGH_BRANCH_COST > ix86_tune_features[X86_TUNE_HIGH_BRANCH_COST] > #define TARGET_SCHEDULE ix86_tune_features[X86_TUNE_SCHEDULE] > #define TARGET_USE_BT ix86_tune_features[X86_TUNE_USE_BT] > #define TARGET_USE_INCDEC ix86_tune_features[X86_TUNE_USE_INCDEC] > diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def > index b6b210e..04d8bf8 100644 > --- a/gcc/config/i386/x86-tune.def > +++ b/gcc/config/i386/x86-tune.def > @@ -208,6 +208,11 @@ DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, "four_jump_limit", > m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_INTEL | > m_ATHLON_K8 | m_AMDFAM10) > > +/* X86_TUNE_HIGH_BRANCH_COST: Some CPUs have higher branch cost. This could > be > + used to tune unroll, if-cvt, inline... heuristics. */ > +DEF_TUNE (X86_TUNE_HIGH_BRANCH_COST, "high_branch_cost", > + m_BONNELL | m_SILVERMONT | m_INTEL) > + > > /*****************************************************************************/ > /* Integer instruction selection tuning > */ > > /*****************************************************************************/