If the address register is dead after load/store operation it looks beneficial to use LDMIA/STMIA instead of pair of LDR/STR instructions, at least if optimizing for size.
E.g. ldr r0, [r3, #0] ldr r1, [r3, #4] @ r3 is dead after will be replaced by ldmia r3!, {r0, r1} also for reused reg is legal to: ldr r2, [r3, #0] ldr r3, [r3, #4] @ r3 reused will be replaced by ldmia r3, {r2, r3} However, I know little about other thumb CPUs except Cortex M0/M0+. 1. Is there any drawbacks if optimizing speed? 2. Might it be profitable for thumb2? Regarding code size with the patch gives for v6-m/nofp: libgcc: -52 bytes / -0.10% Newlib's libc: -68 bytes / -0.03% libm: -96 bytes / -0.10% libstdc++: -140 bytes / -0.02% Also I have questions regarding testing the patch. It's obscure how to do it properly, for now I compile for arm-none-eabi target and make check seems failing on any compilable test due to missing symbols from libnosys. I guess that arm-gnu-elf is the correct triple but it still advisable for proper commands to make & run the testsuite. Signed-off-by: Siarhei Volkau <lis8...@gmail.com> --- gcc/config/arm/arm-protos.h | 2 +- gcc/config/arm/arm.cc | 7 ++++++- gcc/config/arm/thumb1.md | 10 ++++++++-- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 2cd560c9925..548bfbaccdc 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -254,7 +254,7 @@ extern int thumb_shiftable_const (unsigned HOST_WIDE_INT); extern enum arm_cond_code maybe_get_arm_condition_code (rtx); extern void thumb1_final_prescan_insn (rtx_insn *); extern void thumb2_final_prescan_insn (rtx_insn *); -extern const char *thumb_load_double_from_address (rtx *); +extern const char *thumb_load_double_from_address (rtx *, rtx_insn *); extern const char *thumb_output_move_mem_multiple (int, rtx *); extern const char *thumb_call_via_reg (rtx); extern void thumb_expand_cpymemqi (rtx *); diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index b8c32db0a1d..73c2478ed77 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -28350,7 +28350,7 @@ thumb1_output_interwork (void) a computed memory address. The computed address may involve a register which is overwritten by the load. */ const char * -thumb_load_double_from_address (rtx *operands) +thumb_load_double_from_address (rtx *operands, rtx_insn *insn) { rtx addr; rtx base; @@ -28368,6 +28368,11 @@ thumb_load_double_from_address (rtx *operands) switch (GET_CODE (addr)) { case REG: + if (find_reg_note (insn, REG_DEAD, addr)) + return "ldmia\t%m1!, {%0, %H0}"; + else if (REGNO (addr) == REGNO (operands[0]) + 1) + return "ldmia\t%m1, {%0, %H0}"; + operands[2] = adjust_address (operands[1], SImode, 4); if (REGNO (operands[0]) == REGNO (addr)) diff --git a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md index d7074b43f60..8da6887b560 100644 --- a/gcc/config/arm/thumb1.md +++ b/gcc/config/arm/thumb1.md @@ -637,8 +637,11 @@ case 5: return \"stmia\\t%0, {%1, %H1}\"; case 6: - return thumb_load_double_from_address (operands); + return thumb_load_double_from_address (operands, insn); case 7: + if (MEM_P (operands[0]) && REG_P (XEXP (operands[0], 0)) + && find_reg_note (insn, REG_DEAD, XEXP (operands[0], 0))) + return \"stmia\\t%m0!, {%1, %H1}\"; operands[2] = gen_rtx_MEM (SImode, plus_constant (Pmode, XEXP (operands[0], 0), 4)); output_asm_insn (\"str\\t%1, %0\;str\\t%H1, %2\", operands); @@ -970,8 +973,11 @@ case 2: return \"stmia\\t%0, {%1, %H1}\"; case 3: - return thumb_load_double_from_address (operands); + return thumb_load_double_from_address (operands, insn); case 4: + if (MEM_P (operands[0]) && REG_P (XEXP (operands[0], 0)) + && find_reg_note (insn, REG_DEAD, XEXP (operands[0], 0))) + return \"stmia\\t%m0!, {%1, %H1}\"; operands[2] = gen_rtx_MEM (SImode, plus_constant (Pmode, XEXP (operands[0], 0), 4)); -- 2.45.2