This is the last patch switching on LRA for x86/x86-64. The patch also contains code deciding when to use spilling general regs into SSE instead of memory.

2012-09-27  Vladimir Makarov  <vmaka...@redhat.com>

    * config/i386/i386.h (enum ix86_tune_indices): Add
    X86_TUNE_GENERAL_REGS_SSE_SPILL.
    (TARGET_GENERAL_REGS_SSE_SPILL): New macro.
    * config/i386/i386.c (initial_ix86_tune_features): Set up
    X86_TUNE_GENERAL_REGS_SSE_SPILL for m_COREI7 and
    m_CORE2I7.
    (ix86_lra_p, ix86_register_bank): New functions.
    (ix86_secondary_reload): Add NON_Q_REGS, SIREG, DIREG.
    (inline_secondary_memory_needed): Change assert.
    (ix86_spill_class, ix86_spill_class_mode): New function.
    (TARGET_LRA_P, TARGET_REGISTER_BANK, TARGET_SPILL_CLASS): New macros.
    (TARGET_SPILL_CLASS_MODE): New macro.

Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 191771)
+++ config/i386/i386.c	(working copy)
@@ -2267,7 +2267,11 @@ static unsigned int initial_ix86_tune_fe
 
   /* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
      during reassociation of fp computation.  */
-  m_ATOM
+  m_ATOM,
+
+  /* X86_TUNE_GENERAL_REGS_SSE_SPILL: Try to spill general regs to SSE
+     regs instead of memory.  */
+  m_COREI7 | m_CORE2I7
 };
 
 /* Feature tests against the various architecture variations.  */
@@ -31694,6 +31698,38 @@ ix86_free_from_memory (enum machine_mode
     }
 }
 
+/* Return true if we use LRA instead of reload pass.  */
+static bool
+ix86_lra_p (void)
+{
+  return true;
+}
+
+/* Return a register bank number for hard reg REGNO.  */
+static int
+ix86_register_bank (int hard_regno)
+{
+  /* ebp and r13 as the base always wants a displacement, r12 as the
+     base always wants an index.  So discourage their usage in an
+     address.  */
+  if (hard_regno == R12_REG || hard_regno == R13_REG)
+    return 4;
+  if (hard_regno == BP_REG)
+    return 2;
+  /* New x86-64 int registers result in bigger code size.  Discourage
+     them.  */
+  if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
+    return 3;
+  /* New x86-64 SSE registers result in bigger code size.  Discourage
+     them.  */
+  if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
+    return 3;
+  /* Usage of AX register results in smaller code.  Prefer it.  */
+  if (hard_regno == 0)
+    return 0;
+  return 1;
+}
+
 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
 
    Put float CONST_DOUBLE in the constant pool instead of fp regs.
@@ -31827,6 +31863,9 @@ ix86_secondary_reload (bool in_p, rtx x,
       && !in_p && mode == QImode
       && (rclass == GENERAL_REGS
 	  || rclass == LEGACY_REGS
+	  || rclass == NON_Q_REGS
+	  || rclass == SIREG
+	  || rclass == DIREG
 	  || rclass == INDEX_REGS))
     {
       int regno;
@@ -31936,7 +31975,7 @@ inline_secondary_memory_needed (enum reg
       || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
       || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
     {
-      gcc_assert (!strict);
+      gcc_assert (!strict || lra_in_progress);
       return true;
     }
 
@@ -40483,6 +40522,39 @@ ix86_autovectorize_vector_sizes (void)
   return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
 }
 
+
+
+/* Return class of registers which could be used for pseudo of class
+   RCLASS for spilling instead of memory.  Return NO_REGS if it is not
+   possible or non-profitable.  */
+static enum reg_class
+ix86_spill_class (enum reg_class rclass)
+{
+  if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL
+      && hard_reg_set_subset_p (reg_class_contents[rclass],
+				reg_class_contents[GENERAL_REGS]))
+    return SSE_REGS;
+  return NO_REGS;
+}
+
+/* Return mode in which pseudo of MODE and RCLASS can be spilled into
+   a register of class SPILL_CLASS.  Return VOIDmode if it is not
+   possible.  */
+static enum machine_mode
+ix86_spill_class_mode (enum reg_class rclass, enum reg_class spill_class,
+		       enum machine_mode mode)
+{
+  if (! TARGET_SSE || ! TARGET_GENERAL_REGS_SSE_SPILL
+      || ! hard_reg_set_subset_p (reg_class_contents[rclass],
+				  reg_class_contents[GENERAL_REGS])
+      || spill_class != SSE_REGS)
+    return VOIDmode;
+  if (mode == SImode || (TARGET_64BIT && mode == DImode))
+    return mode;
+  return VOIDmode;
+}
+
+
 /* Implement targetm.vectorize.init_cost.  */
 
 static void *
@@ -40885,6 +40957,12 @@ ix86_memmodel_check (unsigned HOST_WIDE_
 #undef TARGET_LEGITIMATE_ADDRESS_P
 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
 
+#undef TARGET_LRA_P
+#define TARGET_LRA_P ix86_lra_p
+
+#undef TARGET_REGISTER_BANK
+#define TARGET_REGISTER_BANK ix86_register_bank
+
 #undef TARGET_LEGITIMATE_CONSTANT_P
 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
 
@@ -40908,6 +40986,12 @@ ix86_memmodel_check (unsigned HOST_WIDE_
 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
 #endif
 
+#undef TARGET_SPILL_CLASS
+#define TARGET_SPILL_CLASS ix86_spill_class
+
+#undef TARGET_SPILL_CLASS_MODE
+#define TARGET_SPILL_CLASS_MODE ix86_spill_class_mode
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-i386.h"
Index: config/i386/i386.h
===================================================================
--- config/i386/i386.h	(revision 191771)
+++ config/i386/i386.h	(working copy)
@@ -327,6 +327,7 @@ enum ix86_tune_indices {
   X86_TUNE_AVX128_OPTIMAL,
   X86_TUNE_REASSOC_INT_TO_PARALLEL,
   X86_TUNE_REASSOC_FP_TO_PARALLEL,
+  X86_TUNE_GENERAL_REGS_SSE_SPILL,
 
   X86_TUNE_LAST
 };
@@ -431,6 +432,8 @@ extern unsigned char ix86_tune_features[
 	ix86_tune_features[X86_TUNE_REASSOC_INT_TO_PARALLEL]
 #define TARGET_REASSOC_FP_TO_PARALLEL \
 	ix86_tune_features[X86_TUNE_REASSOC_FP_TO_PARALLEL]
+#define TARGET_GENERAL_REGS_SSE_SPILL \
+	ix86_tune_features[X86_TUNE_GENERAL_REGS_SSE_SPILL]
 
 /* Feature tests against the various architecture variations.  */
 enum ix86_arch_indices {

Reply via email to