On Fri, Oct 10, 2014 at 5:40 PM, Evgeny Stupachenko <evstu...@gmail.com> wrote:
> Hi,
>
> The patch increase PARAM_MAX_COMPLETELY_PEELED_INSNS for CPUs with
> high branch cost.
> Bootstrap and make check are in progress.
> The patch boosts (up to 2,5 times improve) several benchmarks compiled
> with "-Ofast" on Silvermont
> Spec2000:
> +5% gain on 173.applu
> +1% gain on 255.vortex
>
> Is it ok for trunk when pass bootstrap and make check?

This is only a 20% increase - from 100 to 120.  I would instead suggest
to explore doing this change unconditionally if it helps that much.

Richard.

> Thanks,
> Evgeny
>
> 2014-10-10  Evgeny Stupachenko  <evstu...@gmail.com>
>         * config/i386/i386.c (ix86_option_override_internal): Increase
>         PARAM_MAX_COMPLETELY_PEELED_INSNS for CPUs with high branch cost.
>         * config/i386/i386.h (TARGET_HIGH_BRANCH_COST): New.
>         * config/i386/x86-tune.def (X86_TUNE_HIGH_BRANCH_COST): Indicates
>         CPUs with high branch cost.
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 6337aa5..5ac10eb 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -4081,6 +4081,14 @@ ix86_option_override_internal (bool main_args_p,
>                          opts->x_param_values,
>                          opts_set->x_param_values);
>
> +  /* Extend full peel max insns parameter for CPUs with high branch cost.  */
> +  if (TARGET_HIGH_BRANCH_COST)
> +    maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS,
> +                          120,
> +                          opts->x_param_values,
> +                          opts_set->x_param_values);
> +
> +
>    /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful.  */
>    if (opts->x_flag_prefetch_loop_arrays < 0
>        && HAVE_prefetch
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 2c64162..da0c57b 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -415,6 +415,7 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
>  #define TARGET_INTER_UNIT_CONVERSIONS \
>         ix86_tune_features[X86_TUNE_INTER_UNIT_CONVERSIONS]
>  #define TARGET_FOUR_JUMP_LIMIT ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT]
> +#define TARGET_HIGH_BRANCH_COST
> ix86_tune_features[X86_TUNE_HIGH_BRANCH_COST]
>  #define TARGET_SCHEDULE                ix86_tune_features[X86_TUNE_SCHEDULE]
>  #define TARGET_USE_BT          ix86_tune_features[X86_TUNE_USE_BT]
>  #define TARGET_USE_INCDEC      ix86_tune_features[X86_TUNE_USE_INCDEC]
> diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
> index b6b210e..04d8bf8 100644
> --- a/gcc/config/i386/x86-tune.def
> +++ b/gcc/config/i386/x86-tune.def
> @@ -208,6 +208,11 @@ DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, "four_jump_limit",
>            m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_INTEL |
>           m_ATHLON_K8 | m_AMDFAM10)
>
> +/* X86_TUNE_HIGH_BRANCH_COST: Some CPUs have higher branch cost.  This could 
> be
> +   used to tune unroll, if-cvt, inline... heuristics.  */
> +DEF_TUNE (X86_TUNE_HIGH_BRANCH_COST, "high_branch_cost",
> +          m_BONNELL | m_SILVERMONT | m_INTEL)
> +
>  
> /*****************************************************************************/
>  /* Integer instruction selection tuning                                      
> */
>  
> /*****************************************************************************/

Reply via email to