The results are the same for Silvermont. There are no significant changes on Haswell. So I agree with Richard, let's enable this x86 wide.
Bootstrap/ passed. Make check in progress. Is it ok? 2014-10-25 Evgeny Stupachenko <evstu...@gmail.com> * config/i386/i386.c (ix86_option_override_internal): Increase PARAM_MAX_COMPLETELY_PEELED_INSNS. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 6337aa5..5ac10eb 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -4081,6 +4081,12 @@ ix86_option_override_internal (bool main_args_p, opts->x_param_values, opts_set->x_param_values); + /* Extend full peel max insns parameter for x86. */ + maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, + 120, + opts->x_param_values, + opts_set->x_param_values); + /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */ if (opts->x_flag_prefetch_loop_arrays < 0 && HAVE_prefetch On Mon, Oct 13, 2014 at 4:23 PM, Jan Hubicka <hubi...@ucw.cz> wrote: >> On Fri, Oct 10, 2014 at 5:40 PM, Evgeny Stupachenko <evstu...@gmail.com> >> wrote: >> > Hi, >> > >> > The patch increase PARAM_MAX_COMPLETELY_PEELED_INSNS for CPUs with >> > high branch cost. >> > Bootstrap and make check are in progress. >> > The patch boosts (up to 2,5 times improve) several benchmarks compiled >> > with "-Ofast" on Silvermont >> > Spec2000: >> > +5% gain on 173.applu >> > +1% gain on 255.vortex >> > >> > Is it ok for trunk when pass bootstrap and make check? >> >> This is only a 20% increase - from 100 to 120. I would instead suggest >> to explore doing this change unconditionally if it helps that much. > > Agreed, I think the value of 100 was set decade ago by Zdenek and me > completely > artifically. I do not recall any serious tuning of this flag. > > Note that I plan to update > https://gcc.gnu.org/ml/gcc-patches/2013-11/msg02270.html to current tree so > PARAM_MAX_COMPLETELY_PEELED_INSNS will be used at gimple level rather than > tree > changing its meaning somewhat. > > Perhaps I could try to find time this or next week to update the patch so we > do > not need to do the tuning twice. > > Honza > >> >> Richard. >> >> > Thanks, >> > Evgeny >> > >> > 2014-10-10 Evgeny Stupachenko <evstu...@gmail.com> >> > * config/i386/i386.c (ix86_option_override_internal): Increase >> > PARAM_MAX_COMPLETELY_PEELED_INSNS for CPUs with high branch cost. >> > * config/i386/i386.h (TARGET_HIGH_BRANCH_COST): New. >> > * config/i386/x86-tune.def (X86_TUNE_HIGH_BRANCH_COST): Indicates >> > CPUs with high branch cost. >> > >> > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c >> > index 6337aa5..5ac10eb 100644 >> > --- a/gcc/config/i386/i386.c >> > +++ b/gcc/config/i386/i386.c >> > @@ -4081,6 +4081,14 @@ ix86_option_override_internal (bool main_args_p, >> > opts->x_param_values, >> > opts_set->x_param_values); >> > >> > + /* Extend full peel max insns parameter for CPUs with high branch cost. >> > */ >> > + if (TARGET_HIGH_BRANCH_COST) >> > + maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, >> > + 120, >> > + opts->x_param_values, >> > + opts_set->x_param_values); >> > + >> > + >> > /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. >> > */ >> > if (opts->x_flag_prefetch_loop_arrays < 0 >> > && HAVE_prefetch >> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h >> > index 2c64162..da0c57b 100644 >> > --- a/gcc/config/i386/i386.h >> > +++ b/gcc/config/i386/i386.h >> > @@ -415,6 +415,7 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; >> > #define TARGET_INTER_UNIT_CONVERSIONS \ >> > ix86_tune_features[X86_TUNE_INTER_UNIT_CONVERSIONS] >> > #define TARGET_FOUR_JUMP_LIMIT >> > ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT] >> > +#define TARGET_HIGH_BRANCH_COST >> > ix86_tune_features[X86_TUNE_HIGH_BRANCH_COST] >> > #define TARGET_SCHEDULE >> > ix86_tune_features[X86_TUNE_SCHEDULE] >> > #define TARGET_USE_BT ix86_tune_features[X86_TUNE_USE_BT] >> > #define TARGET_USE_INCDEC ix86_tune_features[X86_TUNE_USE_INCDEC] >> > diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def >> > index b6b210e..04d8bf8 100644 >> > --- a/gcc/config/i386/x86-tune.def >> > +++ b/gcc/config/i386/x86-tune.def >> > @@ -208,6 +208,11 @@ DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, "four_jump_limit", >> > m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_INTEL | >> > m_ATHLON_K8 | m_AMDFAM10) >> > >> > +/* X86_TUNE_HIGH_BRANCH_COST: Some CPUs have higher branch cost. This >> > could be >> > + used to tune unroll, if-cvt, inline... heuristics. */ >> > +DEF_TUNE (X86_TUNE_HIGH_BRANCH_COST, "high_branch_cost", >> > + m_BONNELL | m_SILVERMONT | m_INTEL) >> > + >> > >> > /*****************************************************************************/ >> > /* Integer instruction selection tuning >> > */ >> > >> > /*****************************************************************************/ On Mon, Oct 13, 2014 at 3:23 PM, Jan Hubicka <hubi...@ucw.cz> wrote: >> On Fri, Oct 10, 2014 at 5:40 PM, Evgeny Stupachenko <evstu...@gmail.com> >> wrote: >> > Hi, >> > >> > The patch increase PARAM_MAX_COMPLETELY_PEELED_INSNS for CPUs with >> > high branch cost. >> > Bootstrap and make check are in progress. >> > The patch boosts (up to 2,5 times improve) several benchmarks compiled >> > with "-Ofast" on Silvermont >> > Spec2000: >> > +5% gain on 173.applu >> > +1% gain on 255.vortex >> > >> > Is it ok for trunk when pass bootstrap and make check? >> >> This is only a 20% increase - from 100 to 120. I would instead suggest >> to explore doing this change unconditionally if it helps that much. > > Agreed, I think the value of 100 was set decade ago by Zdenek and me > completely > artifically. I do not recall any serious tuning of this flag. > > Note that I plan to update > https://gcc.gnu.org/ml/gcc-patches/2013-11/msg02270.html to current tree so > PARAM_MAX_COMPLETELY_PEELED_INSNS will be used at gimple level rather than > tree > changing its meaning somewhat. > > Perhaps I could try to find time this or next week to update the patch so we > do > not need to do the tuning twice. > > Honza > >> >> Richard. >> >> > Thanks, >> > Evgeny >> > >> > 2014-10-10 Evgeny Stupachenko <evstu...@gmail.com> >> > * config/i386/i386.c (ix86_option_override_internal): Increase >> > PARAM_MAX_COMPLETELY_PEELED_INSNS for CPUs with high branch cost. >> > * config/i386/i386.h (TARGET_HIGH_BRANCH_COST): New. >> > * config/i386/x86-tune.def (X86_TUNE_HIGH_BRANCH_COST): Indicates >> > CPUs with high branch cost. >> > >> > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c >> > index 6337aa5..5ac10eb 100644 >> > --- a/gcc/config/i386/i386.c >> > +++ b/gcc/config/i386/i386.c >> > @@ -4081,6 +4081,14 @@ ix86_option_override_internal (bool main_args_p, >> > opts->x_param_values, >> > opts_set->x_param_values); >> > >> > + /* Extend full peel max insns parameter for CPUs with high branch cost. >> > */ >> > + if (TARGET_HIGH_BRANCH_COST) >> > + maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, >> > + 120, >> > + opts->x_param_values, >> > + opts_set->x_param_values); >> > + >> > + >> > /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. >> > */ >> > if (opts->x_flag_prefetch_loop_arrays < 0 >> > && HAVE_prefetch >> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h >> > index 2c64162..da0c57b 100644 >> > --- a/gcc/config/i386/i386.h >> > +++ b/gcc/config/i386/i386.h >> > @@ -415,6 +415,7 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; >> > #define TARGET_INTER_UNIT_CONVERSIONS \ >> > ix86_tune_features[X86_TUNE_INTER_UNIT_CONVERSIONS] >> > #define TARGET_FOUR_JUMP_LIMIT >> > ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT] >> > +#define TARGET_HIGH_BRANCH_COST >> > ix86_tune_features[X86_TUNE_HIGH_BRANCH_COST] >> > #define TARGET_SCHEDULE >> > ix86_tune_features[X86_TUNE_SCHEDULE] >> > #define TARGET_USE_BT ix86_tune_features[X86_TUNE_USE_BT] >> > #define TARGET_USE_INCDEC ix86_tune_features[X86_TUNE_USE_INCDEC] >> > diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def >> > index b6b210e..04d8bf8 100644 >> > --- a/gcc/config/i386/x86-tune.def >> > +++ b/gcc/config/i386/x86-tune.def >> > @@ -208,6 +208,11 @@ DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, "four_jump_limit", >> > m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_INTEL | >> > m_ATHLON_K8 | m_AMDFAM10) >> > >> > +/* X86_TUNE_HIGH_BRANCH_COST: Some CPUs have higher branch cost. This >> > could be >> > + used to tune unroll, if-cvt, inline... heuristics. */ >> > +DEF_TUNE (X86_TUNE_HIGH_BRANCH_COST, "high_branch_cost", >> > + m_BONNELL | m_SILVERMONT | m_INTEL) >> > + >> > >> > /*****************************************************************************/ >> > /* Integer instruction selection tuning >> > */ >> > >> > /*****************************************************************************/