On Wed, Feb 19, 2025 at 9:06 PM Jan Hubicka <hubi...@ucw.cz> wrote:
>
> Hi,
> this is a variant of a hook I benchmarked on cpu2016 with -Ofast -flto
> and -O2 -flto.  For non -Os and no Windows ABI should be pratically the
> same as your variant that was simply returning mem_cost - 2.
>
I've tested O2/(Ofast march=native) with SPEC2017 on SPR, mostly
neutral (small improvement on povray).

> It seems mostly SPEC netural. With -O2 -flto there is
> small 4% improvement on povray (which was mentioned earlier) and also
> 5% regression on perlbench.
>
> I will check to see if I can figure out what is going out with
> perlbench. However I relalized that -flto is probably hidding some of
> differences becuase of cross-module inlining and IPA-RA, so I am
> retesting with -O2 alone and -O2 -fno-ipa-ra to stress the costs little
> more.
>
> I also noticed that move costs for -Os are not really set according to
> size of the instructions, so I will experiment with fixing that
> incrementally.
>
> Honza
>
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index 560e6525b56..3d09448c326 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -20713,12 +20713,27 @@ ix86_class_likely_spilled_p (reg_class_t rclass)
>    return false;
>  }
>
> -/* Implement TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE.  */
> +/* Implement TARGET_CALLEE_SAVE_COST.  */
>
>  static int
> -ix86_ira_callee_saved_register_cost_scale (int)
> -{
> -  return 1;
> +ix86_callee_save_cost (spill_cost_type, unsigned int hard_regno, 
> machine_mode,
> +                      unsigned int, int mem_cost, const HARD_REG_SET &, bool)
> +{
> +  /* Account for the fact that push and pop are shorter and do their
> +     own allocation and deallocation.  */
> +  if (GENERAL_REGNO_P (hard_regno))
> +    {
> +      /* push is 1 byte while typical spill is 4-5 bytes.
> +        ??? We probably should adjust size costs accordingly.
> +        Costs are relative to reg-reg move that has 2 bytes for 32bit
> +        and 3 bytes otherwise.  */
> +      if (optimize_function_for_size_p (cfun))
> +       return 1;
> +      /* Be sure that no cost table sets cost to 2, so we end up with 0.  */
> +      gcc_checking_assert (mem_cost > 2);
> +      return mem_cost - 2;
> +    }
> +  return mem_cost;
>  }
>
>  /* Return true if a set of DST by the expression SRC should be allowed.
> @@ -27199,9 +27214,8 @@ ix86_libgcc_floating_mode_supported_p
>  #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS 
> ix86_preferred_output_reload_class
>  #undef TARGET_CLASS_LIKELY_SPILLED_P
>  #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
> -#undef TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE
> -#define TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE \
> -  ix86_ira_callee_saved_register_cost_scale
> +#undef TARGET_CALLEE_SAVE_COST
> +#define TARGET_CALLEE_SAVE_COST ix86_callee_save_cost
>
>  #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
>  #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \



-- 
BR,
Hongtao

Reply via email to