On 20/06/2024 08:24, Siarhei Volkau wrote:
> If the address register is dead after load/store operation it looks
> beneficial to use LDMIA/STMIA instead of pair of LDR/STR instructions,
> at least if optimizing for size.
> 
> Changes v2 -> v3:
>  - switching to mixed approach (insn+peep2)
>  - keep memory attributes in peepholes
>  - handle stmia corner cases
> 
> Changes v1 -> v2:
>  - switching to peephole2 approach
>  - added test case
> 
> gcc/ChangeLog:
> 
>         * config/arm/arm.cc (thumb_load_double_from_address): Emit ldmia
>         when address reg rewritten by load.
> 
>         * config/arm/thumb1.md (peephole2 to rewrite DI/DF load): New.
>         (peephole2 to rewrite DI/DF store): New.
> 
>       * config/arm/iterators.md (DIDF): New.
> 
> gcc/testsuite:
> 
>         * gcc.target/arm/thumb1-load-store-64bit.c: Add new test.

I made a couple of cleanups and pushed this.  My testing of the cleanup also 
identified another corner case for the ldm instruciton: if the result of the 
load is not used (but it can't be eliminated because the address is marked 
volatile), then we could end up with
        ldm r0!, {r0, r1}
Which of course is unpredictable.  So we need to test not only that r0 is dead 
but that it isn't written by the load either.

Anyway, thanks for the patch.

R.

> 
> Signed-off-by: Siarhei Volkau <lis8...@gmail.com>
> ---
>  gcc/config/arm/arm.cc                         |  8 ++--
>  gcc/config/arm/iterators.md                   |  3 ++
>  gcc/config/arm/thumb1.md                      | 37 ++++++++++++++++++-
>  .../gcc.target/arm/thumb1-load-store-64bit.c  | 16 ++++++++
>  4 files changed, 58 insertions(+), 6 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/arm/thumb1-load-store-64bit.c
> 
> diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
> index b8c32db0a1d..2734ab3bce1 100644
> --- a/gcc/config/arm/arm.cc
> +++ b/gcc/config/arm/arm.cc
> @@ -28368,15 +28368,13 @@ thumb_load_double_from_address (rtx *operands)
>    switch (GET_CODE (addr))
>      {
>      case REG:
> -      operands[2] = adjust_address (operands[1], SImode, 4);
> -
> -      if (REGNO (operands[0]) == REGNO (addr))
> +      if (reg_overlap_mentioned_p (addr, operands[0]))
>       {
> -       output_asm_insn ("ldr\t%H0, %2", operands);
> -       output_asm_insn ("ldr\t%0, %1", operands);
> +       output_asm_insn ("ldmia\t%m1, {%0, %H0}", operands);
>       }
>        else
>       {
> +       operands[2] = adjust_address (operands[1], SImode, 4);
>         output_asm_insn ("ldr\t%0, %1", operands);
>         output_asm_insn ("ldr\t%H0, %2", operands);
>       }
> diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
> index 8d066fcf05d..09046bff83b 100644
> --- a/gcc/config/arm/iterators.md
> +++ b/gcc/config/arm/iterators.md
> @@ -50,6 +50,9 @@ (define_mode_iterator QHSD [QI HI SI DI])
>  ;; A list of the 32bit and 64bit integer modes
>  (define_mode_iterator SIDI [SI DI])
>  
> +;; A list of the 64bit modes for thumb1.
> +(define_mode_iterator DIDF [DI DF])
> +
>  ;; A list of atomic compare and swap success return modes
>  (define_mode_iterator CCSI [(CC_Z "TARGET_32BIT") (SI "TARGET_THUMB1")])
>  
> diff --git a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md
> index d7074b43f60..c9a4201fcc9 100644
> --- a/gcc/config/arm/thumb1.md
> +++ b/gcc/config/arm/thumb1.md
> @@ -2055,4 +2055,39 @@ (define_insn "thumb1_stack_protect_test_insn"
>     (set_attr "conds" "clob")
>     (set_attr "type" "multiple")]
>  )
> -
> +
> +;; match patterns usable by ldmia/stmia
> +(define_peephole2
> +  [(set (match_operand:DIDF 0 "low_register_operand" "")
> +     (match_operand:DIDF 1 "memory_operand" ""))]
> +  "TARGET_THUMB1
> +   && REG_P (XEXP (operands[1], 0))
> +   && REGNO_REG_CLASS (REGNO (XEXP (operands[1], 0))) == LO_REGS
> +   && peep2_reg_dead_p (1, XEXP (operands[1], 0))"
> +  [(set (match_dup 0)
> +     (match_dup 1))]
> +  "
> +  {
> +    operands[1] = change_address (operands[1], VOIDmode,
> +                               gen_rtx_POST_INC (SImode,
> +                                                 XEXP (operands[1], 0)));
> +  }"
> +)
> +
> +(define_peephole2
> +  [(set (match_operand:DIDF 1 "memory_operand" "")
> +     (match_operand:DIDF 0 "low_register_operand" ""))]
> +  "TARGET_THUMB1
> +   && REG_P (XEXP (operands[1], 0))
> +   && REGNO_REG_CLASS (REGNO (XEXP (operands[1], 0))) == LO_REGS
> +   && peep2_reg_dead_p (1, XEXP (operands[1], 0))
> +   && REGNO (XEXP (operands[1], 0)) != (REGNO (operands[0]) + 1)"
> +  [(set (match_dup 1)
> +     (match_dup 0))]
> +  "
> +  {
> +    operands[1] = change_address (operands[1], VOIDmode,
> +                               gen_rtx_POST_INC (SImode,
> +                                                 XEXP (operands[1], 0)));
> +  }"
> +)
> diff --git a/gcc/testsuite/gcc.target/arm/thumb1-load-store-64bit.c 
> b/gcc/testsuite/gcc.target/arm/thumb1-load-store-64bit.c
> new file mode 100644
> index 00000000000..167fa9ec876
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/thumb1-load-store-64bit.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mthumb -Os" }  */
> +/* { dg-require-effective-target arm_thumb1_ok } */
> +
> +void copy_df(double *dst, const double *src)
> +{
> +    *dst = *src;
> +}
> +
> +void copy_di(unsigned long long *dst, const unsigned long long *src)
> +{
> +    *dst = *src;
> +}
> +
> +/* { dg-final { scan-assembler-times "ldmia\tr\[0-7\]" 2 } } */
> +/* { dg-final { scan-assembler-times "stmia\tr\[0-7\]!" 2 } } */

Reply via email to