If the address register is dead after load/store operation it looks
beneficial to use LDMIA/STMIA instead of pair of LDR/STR instructions,
at least if optimizing for size.

E.g.
 ldr r0, [r3, #0]
 ldr r1, [r3, #4]  @ r3 is dead after
will be replaced by
 ldmia r3!, {r0, r1}

also for reused reg is legal to:
 ldr r2, [r3, #0]
 ldr r3, [r3, #4] @ r3 reused
will be replaced by
 ldmia r3, {r2, r3}

However, I know little about other thumb CPUs except Cortex M0/M0+.
1. Is there any drawbacks if optimizing speed?
2. Might it be profitable for thumb2?

Regarding code size with the patch gives for v6-m/nofp:
       libgcc:  -52 bytes / -0.10%
Newlib's libc:  -68 bytes / -0.03%
         libm:  -96 bytes / -0.10%
    libstdc++: -140 bytes / -0.02%

Also I have questions regarding testing the patch.
It's obscure how to do it properly, for now I compile
for arm-none-eabi target and make check seems failing
on any compilable test due to missing symbols from libnosys.
I guess that arm-gnu-elf is the correct triple but it still
advisable for proper commands to make & run the testsuite.

Signed-off-by: Siarhei Volkau <lis8...@gmail.com>
---
 gcc/config/arm/arm-protos.h |  2 +-
 gcc/config/arm/arm.cc       |  7 ++++++-
 gcc/config/arm/thumb1.md    | 10 ++++++++--
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 2cd560c9925..548bfbaccdc 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -254,7 +254,7 @@ extern int thumb_shiftable_const (unsigned HOST_WIDE_INT);
 extern enum arm_cond_code maybe_get_arm_condition_code (rtx);
 extern void thumb1_final_prescan_insn (rtx_insn *);
 extern void thumb2_final_prescan_insn (rtx_insn *);
-extern const char *thumb_load_double_from_address (rtx *);
+extern const char *thumb_load_double_from_address (rtx *, rtx_insn *);
 extern const char *thumb_output_move_mem_multiple (int, rtx *);
 extern const char *thumb_call_via_reg (rtx);
 extern void thumb_expand_cpymemqi (rtx *);
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index b8c32db0a1d..73c2478ed77 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -28350,7 +28350,7 @@ thumb1_output_interwork (void)
    a computed memory address.  The computed address may involve a
    register which is overwritten by the load.  */
 const char *
-thumb_load_double_from_address (rtx *operands)
+thumb_load_double_from_address (rtx *operands, rtx_insn *insn)
 {
   rtx addr;
   rtx base;
@@ -28368,6 +28368,11 @@ thumb_load_double_from_address (rtx *operands)
   switch (GET_CODE (addr))
     {
     case REG:
+      if (find_reg_note (insn, REG_DEAD, addr))
+        return "ldmia\t%m1!, {%0, %H0}";
+      else if (REGNO (addr) == REGNO (operands[0]) + 1)
+        return "ldmia\t%m1, {%0, %H0}";
+
       operands[2] = adjust_address (operands[1], SImode, 4);
 
       if (REGNO (operands[0]) == REGNO (addr))
diff --git a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md
index d7074b43f60..8da6887b560 100644
--- a/gcc/config/arm/thumb1.md
+++ b/gcc/config/arm/thumb1.md
@@ -637,8 +637,11 @@
     case 5:
       return \"stmia\\t%0, {%1, %H1}\";
     case 6:
-      return thumb_load_double_from_address (operands);
+      return thumb_load_double_from_address (operands, insn);
     case 7:
+      if (MEM_P (operands[0]) && REG_P (XEXP (operands[0], 0))
+          && find_reg_note (insn, REG_DEAD, XEXP (operands[0], 0)))
+        return \"stmia\\t%m0!, {%1, %H1}\";
       operands[2] = gen_rtx_MEM (SImode,
                             plus_constant (Pmode, XEXP (operands[0], 0), 4));
       output_asm_insn (\"str\\t%1, %0\;str\\t%H1, %2\", operands);
@@ -970,8 +973,11 @@
     case 2:
       return \"stmia\\t%0, {%1, %H1}\";
     case 3:
-      return thumb_load_double_from_address (operands);
+      return thumb_load_double_from_address (operands, insn);
     case 4:
+      if (MEM_P (operands[0]) && REG_P (XEXP (operands[0], 0))
+          && find_reg_note (insn, REG_DEAD, XEXP (operands[0], 0)))
+        return \"stmia\\t%m0!, {%1, %H1}\";
       operands[2] = gen_rtx_MEM (SImode,
                                 plus_constant (Pmode,
                                                XEXP (operands[0], 0), 4));
-- 
2.45.2

Reply via email to