https://gcc.gnu.org/g:5163cf2ae14c5e7ec730ad72680564001d0d0441

commit r15-7373-g5163cf2ae14c5e7ec730ad72680564001d0d0441
Author: Richard Earnshaw <rearn...@arm.com>
Date:   Thu Dec 19 16:00:48 2024 +0000

    arm: Use POP {pc} to return when returning [PR118089]
    
    When generating thumb2 code,
            LDM SP!, {PC}
    is a two-byte instruction, whereas
            LDR PC, [SP], #4
    is needs 4 bytes.  When optimizing for size, or when there's no obvious
    performance benefit prefer the former.
    
    gcc/ChangeLog:
    
            PR target/118089
            * config/arm/arm.cc (thumb2_expand_return): Use LDM SP!, {PC}
            when optimizing for size, or when there's no performance benefit 
over
            LDR PC, [SP], #4.
            (arm_expand_epilogue): Likewise.

Diff:
---
 gcc/config/arm/arm.cc | 62 +++++++++++++++++++++++++++++----------------------
 1 file changed, 35 insertions(+), 27 deletions(-)

diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 4ee84361dc6e..7e2082101d83 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -27762,35 +27762,40 @@ thumb2_expand_return (bool simple_return)
       /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
         functions or adapt code to handle according to ACLE.  This path should
         not be reachable for cmse_nonsecure_entry functions though we prefer
-        to assert it for now to ensure that future code changes do not silently
-        change this behavior.  */
+        to assert it for now to ensure that future code changes do not
+        silently change this behavior.  */
       gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
       if (arm_current_function_pac_enabled_p ())
-        {
-          gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
-          arm_emit_multi_reg_pop (saved_regs_mask);
-          emit_insn (gen_aut_nop ());
-          emit_jump_insn (simple_return_rtx);
-        }
-      else if (num_regs == 1)
-        {
-          rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
-          rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
-          rtx addr = gen_rtx_MEM (SImode,
-                                  gen_rtx_POST_INC (SImode,
-                                                    stack_pointer_rtx));
-          set_mem_alias_set (addr, get_frame_alias_set ());
-          XVECEXP (par, 0, 0) = ret_rtx;
-          XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
-          RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
-          emit_jump_insn (par);
-        }
+       {
+         gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
+         arm_emit_multi_reg_pop (saved_regs_mask);
+         emit_insn (gen_aut_nop ());
+         emit_jump_insn (simple_return_rtx);
+       }
+      /* Use LDR PC, [sp], #4.  Only do this if not optimizing for size and
+        there's a known performance benefit (we don't know this exactly, but
+        preferring LDRD/STRD over LDM/STM is a reasonable proxy).  */
+      else if (num_regs == 1
+              && !optimize_size
+              && current_tune->prefer_ldrd_strd)
+       {
+         rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+         rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
+         rtx addr = gen_rtx_MEM (SImode,
+                                 gen_rtx_POST_INC (SImode,
+                                                   stack_pointer_rtx));
+         set_mem_alias_set (addr, get_frame_alias_set ());
+         XVECEXP (par, 0, 0) = ret_rtx;
+         XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
+         RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
+         emit_jump_insn (par);
+       }
       else
-        {
-          saved_regs_mask &= ~ (1 << LR_REGNUM);
-          saved_regs_mask |=   (1 << PC_REGNUM);
-          arm_emit_multi_reg_pop (saved_regs_mask);
-        }
+       {
+         saved_regs_mask &= ~ (1 << LR_REGNUM);
+         saved_regs_mask |=   (1 << PC_REGNUM);
+         arm_emit_multi_reg_pop (saved_regs_mask);
+       }
     }
   else
     {
@@ -28204,7 +28209,10 @@ arm_expand_epilogue (bool really_return)
           return_in_pc = true;
         }
 
-      if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
+      if (num_regs == 1
+         && !optimize_size
+         && current_tune->prefer_ldrd_strd
+         && !(IS_INTERRUPT (func_type) && return_in_pc))
         {
           for (i = 0; i <= LAST_ARM_REGNUM; i++)
             if (saved_regs_mask & (1 << i))

Reply via email to