Hi,
this is a variant of a hook I benchmarked on cpu2016 with -Ofast -flto
and -O2 -flto. For non -Os and no Windows ABI should be pratically the
same as your variant that was simply returning mem_cost - 2.
It seems mostly SPEC netural. With -O2 -flto there is
small 4% improvement on povray (which was mentioned earlier) and also
5% regression on perlbench.
I will check to see if I can figure out what is going out with
perlbench. However I relalized that -flto is probably hidding some of
differences becuase of cross-module inlining and IPA-RA, so I am
retesting with -O2 alone and -O2 -fno-ipa-ra to stress the costs little
more.
I also noticed that move costs for -Os are not really set according to
size of the instructions, so I will experiment with fixing that
incrementally.
Honza
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 560e6525b56..3d09448c326 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -20713,12 +20713,27 @@ ix86_class_likely_spilled_p (reg_class_t rclass)
return false;
}
-/* Implement TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE. */
+/* Implement TARGET_CALLEE_SAVE_COST. */
static int
-ix86_ira_callee_saved_register_cost_scale (int)
-{
- return 1;
+ix86_callee_save_cost (spill_cost_type, unsigned int hard_regno, machine_mode,
+ unsigned int, int mem_cost, const HARD_REG_SET &, bool)
+{
+ /* Account for the fact that push and pop are shorter and do their
+ own allocation and deallocation. */
+ if (GENERAL_REGNO_P (hard_regno))
+ {
+ /* push is 1 byte while typical spill is 4-5 bytes.
+ ??? We probably should adjust size costs accordingly.
+ Costs are relative to reg-reg move that has 2 bytes for 32bit
+ and 3 bytes otherwise. */
+ if (optimize_function_for_size_p (cfun))
+ return 1;
+ /* Be sure that no cost table sets cost to 2, so we end up with 0. */
+ gcc_checking_assert (mem_cost > 2);
+ return mem_cost - 2;
+ }
+ return mem_cost;
}
/* Return true if a set of DST by the expression SRC should be allowed.
@@ -27199,9 +27214,8 @@ ix86_libgcc_floating_mode_supported_p
#define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
#undef TARGET_CLASS_LIKELY_SPILLED_P
#define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
-#undef TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE
-#define TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE \
- ix86_ira_callee_saved_register_cost_scale
+#undef TARGET_CALLEE_SAVE_COST
+#define TARGET_CALLEE_SAVE_COST ix86_callee_save_cost
#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \