Hi Siarahei,
On 16/06/2024 09:51, Siarhei Volkau wrote:
> If the address register is dead after load/store operation it looks
> beneficial to use LDMIA/STMIA instead of pair of LDR/STR instructions,
> at least if optimizing for size.
>
> E.g.
> ldr r0, [r3, #0]
> ldr r1, [r3, #4] @ r3 is dead after
> will be replaced by
> ldmia r3!, {r0, r1}
>
> also for reused reg is legal to:
> ldr r2, [r3, #0]
> ldr r3, [r3, #4] @ r3 reused
> will be replaced by
> ldmia r3, {r2, r3}
>
> However, I know little about other thumb CPUs except Cortex M0/M0+.
> 1. Is there any drawbacks if optimizing speed?
> 2. Might it be profitable for thumb2?
I like the idea behind this patch, but I think I'd try first doing this as a
peephole2 rule to rewrite the address in this case. That has the additional
advantage that we then estimate the size of the instruction more accurately.
I think it would then be easy to extend this to thumb2 as well if it looks like
a win (perhaps only for -Os in the thumb2 case).
>
> Regarding code size with the patch gives for v6-m/nofp:
> libgcc: -52 bytes / -0.10%
> Newlib's libc: -68 bytes / -0.03%
> libm: -96 bytes / -0.10%
> libstdc++: -140 bytes / -0.02%
>
> Also I have questions regarding testing the patch.
> It's obscure how to do it properly, for now I compile
> for arm-none-eabi target and make check seems failing
> on any compilable test due to missing symbols from libnosys.
> I guess that arm-gnu-elf is the correct triple but it still
> advisable for proper commands to make & run the testsuite.
For testing, I'd start with something like
gcc/testsuite/gcc.target/arm/thumb-andsi.c as a template and adapt that for
your specific case. Matching something like "ldmia\tr[0-7]!," should be enough.
R.
>
> Signed-off-by: Siarhei Volkau <[email protected]>
> ---
> gcc/config/arm/arm-protos.h | 2 +-
> gcc/config/arm/arm.cc | 7 ++++++-
> gcc/config/arm/thumb1.md | 10 ++++++++--
> 3 files changed, 15 insertions(+), 4 deletions(-)
>
> diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
> index 2cd560c9925..548bfbaccdc 100644
> --- a/gcc/config/arm/arm-protos.h
> +++ b/gcc/config/arm/arm-protos.h
> @@ -254,7 +254,7 @@ extern int thumb_shiftable_const (unsigned HOST_WIDE_INT);
> extern enum arm_cond_code maybe_get_arm_condition_code (rtx);
> extern void thumb1_final_prescan_insn (rtx_insn *);
> extern void thumb2_final_prescan_insn (rtx_insn *);
> -extern const char *thumb_load_double_from_address (rtx *);
> +extern const char *thumb_load_double_from_address (rtx *, rtx_insn *);
> extern const char *thumb_output_move_mem_multiple (int, rtx *);
> extern const char *thumb_call_via_reg (rtx);
> extern void thumb_expand_cpymemqi (rtx *);
> diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
> index b8c32db0a1d..73c2478ed77 100644
> --- a/gcc/config/arm/arm.cc
> +++ b/gcc/config/arm/arm.cc
> @@ -28350,7 +28350,7 @@ thumb1_output_interwork (void)
> a computed memory address. The computed address may involve a
> register which is overwritten by the load. */
> const char *
> -thumb_load_double_from_address (rtx *operands)
> +thumb_load_double_from_address (rtx *operands, rtx_insn *insn)
> {
> rtx addr;
> rtx base;
> @@ -28368,6 +28368,11 @@ thumb_load_double_from_address (rtx *operands)
> switch (GET_CODE (addr))
> {
> case REG:
> + if (find_reg_note (insn, REG_DEAD, addr))
> + return "ldmia\t%m1!, {%0, %H0}";
> + else if (REGNO (addr) == REGNO (operands[0]) + 1)
> + return "ldmia\t%m1, {%0, %H0}";
> +
> operands[2] = adjust_address (operands[1], SImode, 4);
>
> if (REGNO (operands[0]) == REGNO (addr))
> diff --git a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md
> index d7074b43f60..8da6887b560 100644
> --- a/gcc/config/arm/thumb1.md
> +++ b/gcc/config/arm/thumb1.md
> @@ -637,8 +637,11 @@
> case 5:
> return \"stmia\\t%0, {%1, %H1}\";
> case 6:
> - return thumb_load_double_from_address (operands);
> + return thumb_load_double_from_address (operands, insn);
> case 7:
> + if (MEM_P (operands[0]) && REG_P (XEXP (operands[0], 0))
> + && find_reg_note (insn, REG_DEAD, XEXP (operands[0], 0)))
> + return \"stmia\\t%m0!, {%1, %H1}\";
> operands[2] = gen_rtx_MEM (SImode,
> plus_constant (Pmode, XEXP (operands[0], 0), 4));
> output_asm_insn (\"str\\t%1, %0\;str\\t%H1, %2\", operands);
> @@ -970,8 +973,11 @@
> case 2:
> return \"stmia\\t%0, {%1, %H1}\";
> case 3:
> - return thumb_load_double_from_address (operands);
> + return thumb_load_double_from_address (operands, insn);
> case 4:
> + if (MEM_P (operands[0]) && REG_P (XEXP (operands[0], 0))
> + && find_reg_note (insn, REG_DEAD, XEXP (operands[0], 0)))
> + return \"stmia\\t%m0!, {%1, %H1}\";
> operands[2] = gen_rtx_MEM (SImode,
> plus_constant (Pmode,
> XEXP (operands[0], 0), 4));