This is the last patch switching on LRA for x86/x86-64. The patch also
contains code deciding when to use spilling general regs into SSE
instead of memory.
2012-09-27 Vladimir Makarov <vmaka...@redhat.com>
* config/i386/i386.h (enum ix86_tune_indices): Add
X86_TUNE_GENERAL_REGS_SSE_SPILL.
(TARGET_GENERAL_REGS_SSE_SPILL): New macro.
* config/i386/i386.c (initial_ix86_tune_features): Set up
X86_TUNE_GENERAL_REGS_SSE_SPILL for m_COREI7 and
m_CORE2I7.
(ix86_lra_p, ix86_register_bank): New functions.
(ix86_secondary_reload): Add NON_Q_REGS, SIREG, DIREG.
(inline_secondary_memory_needed): Change assert.
(ix86_spill_class, ix86_spill_class_mode): New function.
(TARGET_LRA_P, TARGET_REGISTER_BANK, TARGET_SPILL_CLASS): New macros.
(TARGET_SPILL_CLASS_MODE): New macro.
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c (revision 191771)
+++ config/i386/i386.c (working copy)
@@ -2267,7 +2267,11 @@ static unsigned int initial_ix86_tune_fe
/* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
during reassociation of fp computation. */
- m_ATOM
+ m_ATOM,
+
+ /* X86_TUNE_GENERAL_REGS_SSE_SPILL: Try to spill general regs to SSE
+ regs instead of memory. */
+ m_COREI7 | m_CORE2I7
};
/* Feature tests against the various architecture variations. */
@@ -31694,6 +31698,38 @@ ix86_free_from_memory (enum machine_mode
}
}
+/* Return true if we use LRA instead of reload pass. */
+static bool
+ix86_lra_p (void)
+{
+ return true;
+}
+
+/* Return a register bank number for hard reg REGNO. */
+static int
+ix86_register_bank (int hard_regno)
+{
+ /* ebp and r13 as the base always wants a displacement, r12 as the
+ base always wants an index. So discourage their usage in an
+ address. */
+ if (hard_regno == R12_REG || hard_regno == R13_REG)
+ return 4;
+ if (hard_regno == BP_REG)
+ return 2;
+ /* New x86-64 int registers result in bigger code size. Discourage
+ them. */
+ if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
+ return 3;
+ /* New x86-64 SSE registers result in bigger code size. Discourage
+ them. */
+ if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
+ return 3;
+ /* Usage of AX register results in smaller code. Prefer it. */
+ if (hard_regno == 0)
+ return 0;
+ return 1;
+}
+
/* Implement TARGET_PREFERRED_RELOAD_CLASS.
Put float CONST_DOUBLE in the constant pool instead of fp regs.
@@ -31827,6 +31863,9 @@ ix86_secondary_reload (bool in_p, rtx x,
&& !in_p && mode == QImode
&& (rclass == GENERAL_REGS
|| rclass == LEGACY_REGS
+ || rclass == NON_Q_REGS
+ || rclass == SIREG
+ || rclass == DIREG
|| rclass == INDEX_REGS))
{
int regno;
@@ -31936,7 +31975,7 @@ inline_secondary_memory_needed (enum reg
|| MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
|| MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
{
- gcc_assert (!strict);
+ gcc_assert (!strict || lra_in_progress);
return true;
}
@@ -40483,6 +40522,39 @@ ix86_autovectorize_vector_sizes (void)
return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
}
+
+
+/* Return class of registers which could be used for pseudo of class
+ RCLASS for spilling instead of memory. Return NO_REGS if it is not
+ possible or non-profitable. */
+static enum reg_class
+ix86_spill_class (enum reg_class rclass)
+{
+ if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL
+ && hard_reg_set_subset_p (reg_class_contents[rclass],
+ reg_class_contents[GENERAL_REGS]))
+ return SSE_REGS;
+ return NO_REGS;
+}
+
+/* Return mode in which pseudo of MODE and RCLASS can be spilled into
+ a register of class SPILL_CLASS. Return VOIDmode if it is not
+ possible. */
+static enum machine_mode
+ix86_spill_class_mode (enum reg_class rclass, enum reg_class spill_class,
+ enum machine_mode mode)
+{
+ if (! TARGET_SSE || ! TARGET_GENERAL_REGS_SSE_SPILL
+ || ! hard_reg_set_subset_p (reg_class_contents[rclass],
+ reg_class_contents[GENERAL_REGS])
+ || spill_class != SSE_REGS)
+ return VOIDmode;
+ if (mode == SImode || (TARGET_64BIT && mode == DImode))
+ return mode;
+ return VOIDmode;
+}
+
+
/* Implement targetm.vectorize.init_cost. */
static void *
@@ -40885,6 +40957,12 @@ ix86_memmodel_check (unsigned HOST_WIDE_
#undef TARGET_LEGITIMATE_ADDRESS_P
#define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
+#undef TARGET_LRA_P
+#define TARGET_LRA_P ix86_lra_p
+
+#undef TARGET_REGISTER_BANK
+#define TARGET_REGISTER_BANK ix86_register_bank
+
#undef TARGET_LEGITIMATE_CONSTANT_P
#define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
@@ -40908,6 +40986,12 @@ ix86_memmodel_check (unsigned HOST_WIDE_
#define TARGET_INIT_LIBFUNCS darwin_rename_builtins
#endif
+#undef TARGET_SPILL_CLASS
+#define TARGET_SPILL_CLASS ix86_spill_class
+
+#undef TARGET_SPILL_CLASS_MODE
+#define TARGET_SPILL_CLASS_MODE ix86_spill_class_mode
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-i386.h"
Index: config/i386/i386.h
===================================================================
--- config/i386/i386.h (revision 191771)
+++ config/i386/i386.h (working copy)
@@ -327,6 +327,7 @@ enum ix86_tune_indices {
X86_TUNE_AVX128_OPTIMAL,
X86_TUNE_REASSOC_INT_TO_PARALLEL,
X86_TUNE_REASSOC_FP_TO_PARALLEL,
+ X86_TUNE_GENERAL_REGS_SSE_SPILL,
X86_TUNE_LAST
};
@@ -431,6 +432,8 @@ extern unsigned char ix86_tune_features[
ix86_tune_features[X86_TUNE_REASSOC_INT_TO_PARALLEL]
#define TARGET_REASSOC_FP_TO_PARALLEL \
ix86_tune_features[X86_TUNE_REASSOC_FP_TO_PARALLEL]
+#define TARGET_GENERAL_REGS_SSE_SPILL \
+ ix86_tune_features[X86_TUNE_GENERAL_REGS_SSE_SPILL]
/* Feature tests against the various architecture variations. */
enum ix86_arch_indices {