LRA on ppc32 had some code size degradation in comparison with the
reload pass. The reason for that is systematic usage of moves from
memory to memory through two integer registers for DFmode instead of one
floating point register as reload does.
The following patch solves the problem. It is achieved by
preferencing an insn alternative with smallest number of registers
involved when higher priority rules (like # of needed reloads) have the
same results.
I wish I could use also register pressure information for choosing an
alternative but unfortunately it will result in slower LRA because the
info is not available at this subpass (constraints).
Another wish would be to use insn length but again it needs (a
temporary) transformation to final result insn which is not known yet at
this stage because we did not assigned hard registers to reload pseudos
or memory to spilled pseudos.
The patch also contains a clean up of function mark_not_eliminable.
The patch was bootstrapped on x86-64 and ppc64.
2011-08-26 Vladimir Makarov <vmaka...@redhat.com>
* lra-constraints.c (best_reload_nregs): New variable.
(process_alt_operands): Add preferences for smaller hard registers
involved. Increase reject for all failed non registers.
* lra-eliminations.c (mark_not_eliminable): Add check on hard
register before looping on eliminations.
Index: lra-constraints.c
===================================================================
--- lra-constraints.c (revision 178120)
+++ lra-constraints.c (working copy)
@@ -1143,6 +1143,10 @@ static int best_losers, best_overall;
/* Number of small register classes used for operands of the best
alternative. */
static int best_small_class_operands_num;
+/* Overall number hard registers used for reloads. For example, on
+ some targets we need 2 general registers to reload DFmode and only
+ one floating point register. */
+static int best_reload_nregs;
/* Overall number reflecting distances of previous reloading the same
value. It is used to improve inheritance chances. */
static int best_reload_sum;
@@ -1415,7 +1419,7 @@ process_alt_operands (int only_alternati
rtx no_subreg_operand[MAX_RECOG_OPERANDS], operand_reg[MAX_RECOG_OPERANDS];
int hard_regno[MAX_RECOG_OPERANDS];
enum machine_mode biggest_mode[MAX_RECOG_OPERANDS];
- int reload_sum;
+ int reload_nregs, reload_sum;
/* Calculate some data common for all alternatives to speed up the
function. */
@@ -1460,7 +1464,7 @@ process_alt_operands (int only_alternati
(only_alternative >= 0 && nalt != only_alternative))
continue;
- overall = losers = reject = reload_sum = 0;
+ overall = losers = reject = reload_nregs = reload_sum = 0;
for (nop = 0; nop < n_operands; nop++)
reject += (curr_static_id
->operand_alternative[nalt * n_operands + nop].reject);
@@ -2003,7 +2007,7 @@ process_alt_operands (int only_alternati
/* Input reloads can be inherited more often than output
reloads can be removed, so penalize output
reloads. */
- if (curr_static_id->operand[nop].type != OP_IN)
+ if (!REG_P (op) || curr_static_id->operand[nop].type != OP_IN)
reject++;
/* SUBREGS ??? */
if (this_alternative_matches >= 0)
@@ -2012,6 +2016,9 @@ process_alt_operands (int only_alternati
}
else if (no_regs_p && ! this_alternative_offmemok && ! constmemok)
goto fail;
+
+ if (! no_regs_p)
+ reload_nregs += ira_reg_class_max_nregs[this_alternative][mode];
}
if (early_clobber_p)
@@ -2128,7 +2135,9 @@ process_alt_operands (int only_alternati
< best_small_class_operands_num
|| (small_class_operands_num
== best_small_class_operands_num
- && best_reload_sum < reload_sum))))))
+ && (reload_nregs < best_reload_nregs
+ || (reload_nregs == best_reload_nregs
+ && best_reload_sum < reload_sum))))))))
{
for (nop = 0; nop < n_operands; nop++)
{
@@ -2145,6 +2154,7 @@ process_alt_operands (int only_alternati
best_overall = overall;
best_losers = losers;
best_small_class_operands_num = small_class_operands_num;
+ best_reload_nregs = reload_nregs;
best_reload_sum = reload_sum;
goal_alt_number = nalt;
}
Index: lra-eliminations.c
===================================================================
--- lra-eliminations.c (revision 178120)
+++ lra-eliminations.c (working copy)
@@ -671,49 +671,46 @@ mark_not_eliminable (rtx x)
case POST_DEC:
case POST_MODIFY:
case PRE_MODIFY:
- /* If we modify the source of an elimination rule, disable it. */
- for (ep = reg_eliminate; ep < ®_eliminate[NUM_ELIMINABLE_REGS]; ep++)
- if (ep->from_rtx == XEXP (x, 0)
- || (ep->to_rtx == XEXP (x, 0)
- && ep->to_rtx != hard_frame_pointer_rtx))
- setup_can_eliminate (ep, false);
-
- /* These two aren't unary operators. */
- if (code == POST_MODIFY || code == PRE_MODIFY)
- break;
-
- mark_not_eliminable (XEXP (x, 0));
- return;
-
- case SUBREG:
- mark_not_eliminable (SUBREG_REG (x));
+ if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER)
+ /* If we modify the source of an elimination rule, disable it. */
+ for (ep = reg_eliminate;
+ ep < ®_eliminate[NUM_ELIMINABLE_REGS];
+ ep++)
+ if (ep->from_rtx == XEXP (x, 0)
+ || (ep->to_rtx == XEXP (x, 0)
+ && ep->to_rtx != hard_frame_pointer_rtx))
+ setup_can_eliminate (ep, false);
return;
case USE:
- /* If using a register that is the source of an eliminate we still
- think can be performed, note it cannot be performed since we don't
- know how this register is used. */
- for (ep = reg_eliminate; ep < ®_eliminate[NUM_ELIMINABLE_REGS]; ep++)
- if (ep->from_rtx == XEXP (x, 0) && ep->to_rtx != hard_frame_pointer_rtx)
- setup_can_eliminate (ep, false);
-
- mark_not_eliminable (XEXP (x, 0));
+ if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER)
+ /* If using a register that is the source of an eliminate we
+ still think can be performed, note it cannot be performed
+ since we don't know how this register is used. */
+ for (ep = reg_eliminate;
+ ep < ®_eliminate[NUM_ELIMINABLE_REGS];
+ ep++)
+ if (ep->from_rtx == XEXP (x, 0)
+ && ep->to_rtx != hard_frame_pointer_rtx)
+ setup_can_eliminate (ep, false);
return;
case CLOBBER:
- /* If clobbering a register that is the replacement register for an
- elimination we still think can be performed, note that it cannot
- be performed. Otherwise, we need not be concerned about it. */
- for (ep = reg_eliminate; ep < ®_eliminate[NUM_ELIMINABLE_REGS]; ep++)
- if (ep->to_rtx == XEXP (x, 0) && ep->to_rtx != hard_frame_pointer_rtx)
- setup_can_eliminate (ep, false);
-
- mark_not_eliminable (XEXP (x, 0));
+ if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER)
+ /* If clobbering a register that is the replacement register for an
+ elimination we still think can be performed, note that it cannot
+ be performed. Otherwise, we need not be concerned about it. */
+ for (ep = reg_eliminate;
+ ep < ®_eliminate[NUM_ELIMINABLE_REGS];
+ ep++)
+ if (ep->to_rtx == XEXP (x, 0)
+ && ep->to_rtx != hard_frame_pointer_rtx)
+ setup_can_eliminate (ep, false);
return;
case SET:
/* Check for setting a register that we know about. */
- if (REG_P (SET_DEST (x)))
+ if (REG_P (SET_DEST (x)) && REGNO (SET_DEST (x)) < FIRST_PSEUDO_REGISTER)
{
/* See if this is setting the replacement register for an
elimination.