Generate epilogue using LDRD in Thumb mode when prefer_ldrd_strd is set in
tune_params.
ChangeLog
gcc/
2012-09-13 Sameera Deshpande <sameera.deshpa...@arm.com>
Greta Yorsh <greta.yo...@arm.com>
* config/arm/arm.c (thumb2_emit_ldrd_pop): New function.
(arm_expand_epilogue): Use the new function.
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 1212a93..f330da3 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -16150,6 +16150,143 @@ arm_emit_vfp_multi_reg_pop (int first_reg, int
num_regs, rtx base_reg)
REG_NOTES (par) = dwarf;
}
+/* Generate and emit a pattern that will be recognized as LDRD pattern. If
even
+ number of registers are being popped, multiple LDRD patterns are created for
+ all register pairs. If odd number of registers are popped, last register is
+ loaded by using LDR pattern. */
+static void
+thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
+{
+ int num_regs = 0;
+ int i, j;
+ rtx par = NULL_RTX;
+ rtx dwarf = NULL_RTX;
+ rtx tmp, reg, tmp1;
+ bool return_in_pc;
+
+ return_in_pc = (saved_regs_mask & (1 << PC_REGNUM)) ? true : false;
+ for (i = 0; i <= LAST_ARM_REGNUM; i++)
+ if (saved_regs_mask & (1 << i))
+ num_regs++;
+
+ gcc_assert (num_regs && num_regs <= 16);
+
+ /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
+ to be popped. So, if num_regs is even, now it will become odd,
+ and we can generate pop with PC. If num_regs is odd, it will be
+ even now, and ldr with return can be generated for PC. */
+ if (return_in_pc)
+ num_regs--;
+
+ /* Var j iterates over all the registers to gather all the registers in
+ saved_regs_mask. Var i gives index of saved registers in stack frame.
+ A PARALLEL RTX of register-pair is created here, so that pattern for
+ LDRD can be matched. As PC is always last register to be popped, and
+ we have already decremented num_regs if PC, we don't have to worry
+ about PC in this loop. */
+ for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
+ if (saved_regs_mask & (1 << j))
+ {
+ gcc_assert (j != SP_REGNUM);
+
+ /* Create RTX for memory load. */
+ reg = gen_rtx_REG (SImode, j);
+ tmp = gen_rtx_SET (SImode,
+ reg,
+ gen_frame_mem (SImode,
+ plus_constant (Pmode,
+ stack_pointer_rtx, 4 * i)));
+ RTX_FRAME_RELATED_P (tmp) = 1;
+
+ if (i % 2 == 0)
+ {
+ /* When saved-register index (i) is even, the RTX to be emitted is
+ yet to be created. Hence create it first. The LDRD pattern we
+ are generating is :
+ [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
+ (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
+ where target registers need not be consecutive. */
+ par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+ dwarf = NULL_RTX;
+ }
+
+ /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
+ added as 0th element and if i is odd, reg_i is added as 1st element
+ of LDRD pattern shown above. */
+ XVECEXP (par, 0, (i % 2)) = tmp;
+ dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+
+ if ((i % 2) == 1)
+ {
+ /* When saved-register index (i) is odd, RTXs for both the
registers
+ to be loaded are generated in above given LDRD pattern, and the
+ pattern can be emitted now. */
+ par = emit_insn (par);
+ REG_NOTES (par) = dwarf;
+ }
+
+ i++;
+ }
+
+ /* If the number of registers pushed is odd AND return_in_pc is false OR
+ number of registers are even AND return_in_pc is true, last register is
+ popped using LDR. It can be PC as well. Hence, adjust the stack first
and
+ then LDR with post increment. */
+
+ /* Increment the stack pointer, based on there being
+ num_regs 4-byte registers to restore. */
+ tmp = gen_rtx_SET (VOIDmode,
+ stack_pointer_rtx,
+ plus_constant (Pmode, stack_pointer_rtx, 4 * i));
+ RTX_FRAME_RELATED_P (tmp) = 1;
+ emit_insn (tmp);
+
+ dwarf = NULL_RTX;
+
+ if (((num_regs % 2) == 1 && !return_in_pc)
+ || ((num_regs % 2) == 0 && return_in_pc))
+ {
+ /* Scan for the single register to be popped. Skip until the saved
+ register is found. */
+ for (; (saved_regs_mask & (1 << j)) == 0; j++);
+
+ /* Gen LDR with post increment here. */
+ tmp1 = gen_rtx_MEM (SImode,
+ gen_rtx_POST_INC (SImode,
+ stack_pointer_rtx));
+ set_mem_alias_set (tmp1, get_frame_alias_set ());
+
+ reg = gen_rtx_REG (SImode, j);
+ tmp = gen_rtx_SET (SImode, reg, tmp1);
+ RTX_FRAME_RELATED_P (tmp) = 1;
+ dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+
+ if (return_in_pc)
+ {
+ /* If return_in_pc, j must be PC_REGNUM. */
+ gcc_assert (j == PC_REGNUM);
+ par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+ XVECEXP (par, 0, 0) = ret_rtx;
+ XVECEXP (par, 0, 1) = tmp;
+ par = emit_jump_insn (par);
+ }
+ else
+ {
+ par = emit_insn (tmp);
+ }
+
+ REG_NOTES (par) = dwarf;
+ }
+ else if ((num_regs % 2) == 1 && return_in_pc)
+ {
+ /* There are 2 registers to be popped. So, generate the pattern
+ pop_multiple_with_stack_update_and_return to pop in PC. */
+ arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
+ }
+
+ return;
+}
+
/* Calculate the size of the return value that is passed in registers. */
static unsigned
arm_size_return_regs (void)
@@ -23102,7 +23239,16 @@ arm_expand_epilogue (bool really_return)
}
else
{
- arm_emit_multi_reg_pop (saved_regs_mask);
+ if (current_tune->prefer_ldrd_strd
+ && !optimize_function_for_size_p (cfun))
+ {
+ if (TARGET_THUMB2)
+ thumb2_emit_ldrd_pop (saved_regs_mask);
+ else
+ arm_emit_multi_reg_pop (saved_regs_mask);
+ }
+ else
+ arm_emit_multi_reg_pop (saved_regs_mask);
}
if (return_in_pc == true)