I need to collect data from Haswell, but the patch should not help
it's performance much, just increase code size.

On Mon, Oct 13, 2014 at 12:01 PM, Richard Biener
<richard.guent...@gmail.com> wrote:
> On Fri, Oct 10, 2014 at 5:40 PM, Evgeny Stupachenko <evstu...@gmail.com> 
> wrote:
>> Hi,
>>
>> The patch increase PARAM_MAX_COMPLETELY_PEELED_INSNS for CPUs with
>> high branch cost.
>> Bootstrap and make check are in progress.
>> The patch boosts (up to 2,5 times improve) several benchmarks compiled
>> with "-Ofast" on Silvermont
>> Spec2000:
>> +5% gain on 173.applu
>> +1% gain on 255.vortex
>>
>> Is it ok for trunk when pass bootstrap and make check?
>
> This is only a 20% increase - from 100 to 120.  I would instead suggest
> to explore doing this change unconditionally if it helps that much.
>
> Richard.
>
>> Thanks,
>> Evgeny
>>
>> 2014-10-10  Evgeny Stupachenko  <evstu...@gmail.com>
>>         * config/i386/i386.c (ix86_option_override_internal): Increase
>>         PARAM_MAX_COMPLETELY_PEELED_INSNS for CPUs with high branch cost.
>>         * config/i386/i386.h (TARGET_HIGH_BRANCH_COST): New.
>>         * config/i386/x86-tune.def (X86_TUNE_HIGH_BRANCH_COST): Indicates
>>         CPUs with high branch cost.
>>
>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>> index 6337aa5..5ac10eb 100644
>> --- a/gcc/config/i386/i386.c
>> +++ b/gcc/config/i386/i386.c
>> @@ -4081,6 +4081,14 @@ ix86_option_override_internal (bool main_args_p,
>>                          opts->x_param_values,
>>                          opts_set->x_param_values);
>>
>> +  /* Extend full peel max insns parameter for CPUs with high branch cost.  
>> */
>> +  if (TARGET_HIGH_BRANCH_COST)
>> +    maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS,
>> +                          120,
>> +                          opts->x_param_values,
>> +                          opts_set->x_param_values);
>> +
>> +
>>    /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful.  */
>>    if (opts->x_flag_prefetch_loop_arrays < 0
>>        && HAVE_prefetch
>> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
>> index 2c64162..da0c57b 100644
>> --- a/gcc/config/i386/i386.h
>> +++ b/gcc/config/i386/i386.h
>> @@ -415,6 +415,7 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
>>  #define TARGET_INTER_UNIT_CONVERSIONS \
>>         ix86_tune_features[X86_TUNE_INTER_UNIT_CONVERSIONS]
>>  #define TARGET_FOUR_JUMP_LIMIT ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT]
>> +#define TARGET_HIGH_BRANCH_COST
>> ix86_tune_features[X86_TUNE_HIGH_BRANCH_COST]
>>  #define TARGET_SCHEDULE                ix86_tune_features[X86_TUNE_SCHEDULE]
>>  #define TARGET_USE_BT          ix86_tune_features[X86_TUNE_USE_BT]
>>  #define TARGET_USE_INCDEC      ix86_tune_features[X86_TUNE_USE_INCDEC]
>> diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
>> index b6b210e..04d8bf8 100644
>> --- a/gcc/config/i386/x86-tune.def
>> +++ b/gcc/config/i386/x86-tune.def
>> @@ -208,6 +208,11 @@ DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, "four_jump_limit",
>>            m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_INTEL |
>>           m_ATHLON_K8 | m_AMDFAM10)
>>
>> +/* X86_TUNE_HIGH_BRANCH_COST: Some CPUs have higher branch cost.  This 
>> could be
>> +   used to tune unroll, if-cvt, inline... heuristics.  */
>> +DEF_TUNE (X86_TUNE_HIGH_BRANCH_COST, "high_branch_cost",
>> +          m_BONNELL | m_SILVERMONT | m_INTEL)
>> +
>>  
>> /*****************************************************************************/
>>  /* Integer instruction selection tuning                                     
>>  */
>>  
>> /*****************************************************************************/

Reply via email to