https://gcc.gnu.org/g:5163cf2ae14c5e7ec730ad72680564001d0d0441
commit r15-7373-g5163cf2ae14c5e7ec730ad72680564001d0d0441 Author: Richard Earnshaw <rearn...@arm.com> Date: Thu Dec 19 16:00:48 2024 +0000 arm: Use POP {pc} to return when returning [PR118089] When generating thumb2 code, LDM SP!, {PC} is a two-byte instruction, whereas LDR PC, [SP], #4 is needs 4 bytes. When optimizing for size, or when there's no obvious performance benefit prefer the former. gcc/ChangeLog: PR target/118089 * config/arm/arm.cc (thumb2_expand_return): Use LDM SP!, {PC} when optimizing for size, or when there's no performance benefit over LDR PC, [SP], #4. (arm_expand_epilogue): Likewise. Diff: --- gcc/config/arm/arm.cc | 62 +++++++++++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 27 deletions(-) diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index 4ee84361dc6e..7e2082101d83 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -27762,35 +27762,40 @@ thumb2_expand_return (bool simple_return) /* TODO: Verify that this path is never taken for cmse_nonsecure_entry functions or adapt code to handle according to ACLE. This path should not be reachable for cmse_nonsecure_entry functions though we prefer - to assert it for now to ensure that future code changes do not silently - change this behavior. */ + to assert it for now to ensure that future code changes do not + silently change this behavior. */ gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ())); if (arm_current_function_pac_enabled_p ()) - { - gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM))); - arm_emit_multi_reg_pop (saved_regs_mask); - emit_insn (gen_aut_nop ()); - emit_jump_insn (simple_return_rtx); - } - else if (num_regs == 1) - { - rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2)); - rtx reg = gen_rtx_REG (SImode, PC_REGNUM); - rtx addr = gen_rtx_MEM (SImode, - gen_rtx_POST_INC (SImode, - stack_pointer_rtx)); - set_mem_alias_set (addr, get_frame_alias_set ()); - XVECEXP (par, 0, 0) = ret_rtx; - XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr); - RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1; - emit_jump_insn (par); - } + { + gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM))); + arm_emit_multi_reg_pop (saved_regs_mask); + emit_insn (gen_aut_nop ()); + emit_jump_insn (simple_return_rtx); + } + /* Use LDR PC, [sp], #4. Only do this if not optimizing for size and + there's a known performance benefit (we don't know this exactly, but + preferring LDRD/STRD over LDM/STM is a reasonable proxy). */ + else if (num_regs == 1 + && !optimize_size + && current_tune->prefer_ldrd_strd) + { + rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2)); + rtx reg = gen_rtx_REG (SImode, PC_REGNUM); + rtx addr = gen_rtx_MEM (SImode, + gen_rtx_POST_INC (SImode, + stack_pointer_rtx)); + set_mem_alias_set (addr, get_frame_alias_set ()); + XVECEXP (par, 0, 0) = ret_rtx; + XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr); + RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1; + emit_jump_insn (par); + } else - { - saved_regs_mask &= ~ (1 << LR_REGNUM); - saved_regs_mask |= (1 << PC_REGNUM); - arm_emit_multi_reg_pop (saved_regs_mask); - } + { + saved_regs_mask &= ~ (1 << LR_REGNUM); + saved_regs_mask |= (1 << PC_REGNUM); + arm_emit_multi_reg_pop (saved_regs_mask); + } } else { @@ -28204,7 +28209,10 @@ arm_expand_epilogue (bool really_return) return_in_pc = true; } - if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc)) + if (num_regs == 1 + && !optimize_size + && current_tune->prefer_ldrd_strd + && !(IS_INTERRUPT (func_type) && return_in_pc)) { for (i = 0; i <= LAST_ARM_REGNUM; i++) if (saved_regs_mask & (1 << i))