I need to collect data from Haswell, but the patch should not help it's performance much, just increase code size.
On Mon, Oct 13, 2014 at 12:01 PM, Richard Biener <richard.guent...@gmail.com> wrote: > On Fri, Oct 10, 2014 at 5:40 PM, Evgeny Stupachenko <evstu...@gmail.com> > wrote: >> Hi, >> >> The patch increase PARAM_MAX_COMPLETELY_PEELED_INSNS for CPUs with >> high branch cost. >> Bootstrap and make check are in progress. >> The patch boosts (up to 2,5 times improve) several benchmarks compiled >> with "-Ofast" on Silvermont >> Spec2000: >> +5% gain on 173.applu >> +1% gain on 255.vortex >> >> Is it ok for trunk when pass bootstrap and make check? > > This is only a 20% increase - from 100 to 120. I would instead suggest > to explore doing this change unconditionally if it helps that much. > > Richard. > >> Thanks, >> Evgeny >> >> 2014-10-10 Evgeny Stupachenko <evstu...@gmail.com> >> * config/i386/i386.c (ix86_option_override_internal): Increase >> PARAM_MAX_COMPLETELY_PEELED_INSNS for CPUs with high branch cost. >> * config/i386/i386.h (TARGET_HIGH_BRANCH_COST): New. >> * config/i386/x86-tune.def (X86_TUNE_HIGH_BRANCH_COST): Indicates >> CPUs with high branch cost. >> >> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c >> index 6337aa5..5ac10eb 100644 >> --- a/gcc/config/i386/i386.c >> +++ b/gcc/config/i386/i386.c >> @@ -4081,6 +4081,14 @@ ix86_option_override_internal (bool main_args_p, >> opts->x_param_values, >> opts_set->x_param_values); >> >> + /* Extend full peel max insns parameter for CPUs with high branch cost. >> */ >> + if (TARGET_HIGH_BRANCH_COST) >> + maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, >> + 120, >> + opts->x_param_values, >> + opts_set->x_param_values); >> + >> + >> /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */ >> if (opts->x_flag_prefetch_loop_arrays < 0 >> && HAVE_prefetch >> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h >> index 2c64162..da0c57b 100644 >> --- a/gcc/config/i386/i386.h >> +++ b/gcc/config/i386/i386.h >> @@ -415,6 +415,7 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; >> #define TARGET_INTER_UNIT_CONVERSIONS \ >> ix86_tune_features[X86_TUNE_INTER_UNIT_CONVERSIONS] >> #define TARGET_FOUR_JUMP_LIMIT ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT] >> +#define TARGET_HIGH_BRANCH_COST >> ix86_tune_features[X86_TUNE_HIGH_BRANCH_COST] >> #define TARGET_SCHEDULE ix86_tune_features[X86_TUNE_SCHEDULE] >> #define TARGET_USE_BT ix86_tune_features[X86_TUNE_USE_BT] >> #define TARGET_USE_INCDEC ix86_tune_features[X86_TUNE_USE_INCDEC] >> diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def >> index b6b210e..04d8bf8 100644 >> --- a/gcc/config/i386/x86-tune.def >> +++ b/gcc/config/i386/x86-tune.def >> @@ -208,6 +208,11 @@ DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, "four_jump_limit", >> m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_INTEL | >> m_ATHLON_K8 | m_AMDFAM10) >> >> +/* X86_TUNE_HIGH_BRANCH_COST: Some CPUs have higher branch cost. This >> could be >> + used to tune unroll, if-cvt, inline... heuristics. */ >> +DEF_TUNE (X86_TUNE_HIGH_BRANCH_COST, "high_branch_cost", >> + m_BONNELL | m_SILVERMONT | m_INTEL) >> + >> >> /*****************************************************************************/ >> /* Integer instruction selection tuning >> */ >> >> /*****************************************************************************/