If the address register is dead after load/store operation it looks beneficial to use LDMIA/STMIA instead of pair of LDR/STR instructions, at least if optimizing for size.
Changes v2 -> v3: - switching to mixed approach (insn+peep2) - keep memory attributes in peepholes - handle stmia corner cases Changes v1 -> v2: - switching to peephole2 approach - added test case gcc/ChangeLog: * config/arm/arm.cc (thumb_load_double_from_address): Emit ldmia when address reg rewritten by load. * config/arm/thumb1.md (peephole2 to rewrite DI/DF load): New. (peephole2 to rewrite DI/DF store): New. * config/arm/iterators.md (DIDF): New. gcc/testsuite: * gcc.target/arm/thumb1-load-store-64bit.c: Add new test. Signed-off-by: Siarhei Volkau <lis8...@gmail.com> --- gcc/config/arm/arm.cc | 8 ++-- gcc/config/arm/iterators.md | 3 ++ gcc/config/arm/thumb1.md | 37 ++++++++++++++++++- .../gcc.target/arm/thumb1-load-store-64bit.c | 16 ++++++++ 4 files changed, 58 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/gcc.target/arm/thumb1-load-store-64bit.c diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index b8c32db0a1d..2734ab3bce1 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -28368,15 +28368,13 @@ thumb_load_double_from_address (rtx *operands) switch (GET_CODE (addr)) { case REG: - operands[2] = adjust_address (operands[1], SImode, 4); - - if (REGNO (operands[0]) == REGNO (addr)) + if (reg_overlap_mentioned_p (addr, operands[0])) { - output_asm_insn ("ldr\t%H0, %2", operands); - output_asm_insn ("ldr\t%0, %1", operands); + output_asm_insn ("ldmia\t%m1, {%0, %H0}", operands); } else { + operands[2] = adjust_address (operands[1], SImode, 4); output_asm_insn ("ldr\t%0, %1", operands); output_asm_insn ("ldr\t%H0, %2", operands); } diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 8d066fcf05d..09046bff83b 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -50,6 +50,9 @@ (define_mode_iterator QHSD [QI HI SI DI]) ;; A list of the 32bit and 64bit integer modes (define_mode_iterator SIDI [SI DI]) +;; A list of the 64bit modes for thumb1. +(define_mode_iterator DIDF [DI DF]) + ;; A list of atomic compare and swap success return modes (define_mode_iterator CCSI [(CC_Z "TARGET_32BIT") (SI "TARGET_THUMB1")]) diff --git a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md index d7074b43f60..c9a4201fcc9 100644 --- a/gcc/config/arm/thumb1.md +++ b/gcc/config/arm/thumb1.md @@ -2055,4 +2055,39 @@ (define_insn "thumb1_stack_protect_test_insn" (set_attr "conds" "clob") (set_attr "type" "multiple")] ) - + +;; match patterns usable by ldmia/stmia +(define_peephole2 + [(set (match_operand:DIDF 0 "low_register_operand" "") + (match_operand:DIDF 1 "memory_operand" ""))] + "TARGET_THUMB1 + && REG_P (XEXP (operands[1], 0)) + && REGNO_REG_CLASS (REGNO (XEXP (operands[1], 0))) == LO_REGS + && peep2_reg_dead_p (1, XEXP (operands[1], 0))" + [(set (match_dup 0) + (match_dup 1))] + " + { + operands[1] = change_address (operands[1], VOIDmode, + gen_rtx_POST_INC (SImode, + XEXP (operands[1], 0))); + }" +) + +(define_peephole2 + [(set (match_operand:DIDF 1 "memory_operand" "") + (match_operand:DIDF 0 "low_register_operand" ""))] + "TARGET_THUMB1 + && REG_P (XEXP (operands[1], 0)) + && REGNO_REG_CLASS (REGNO (XEXP (operands[1], 0))) == LO_REGS + && peep2_reg_dead_p (1, XEXP (operands[1], 0)) + && REGNO (XEXP (operands[1], 0)) != (REGNO (operands[0]) + 1)" + [(set (match_dup 1) + (match_dup 0))] + " + { + operands[1] = change_address (operands[1], VOIDmode, + gen_rtx_POST_INC (SImode, + XEXP (operands[1], 0))); + }" +) diff --git a/gcc/testsuite/gcc.target/arm/thumb1-load-store-64bit.c b/gcc/testsuite/gcc.target/arm/thumb1-load-store-64bit.c new file mode 100644 index 00000000000..167fa9ec876 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/thumb1-load-store-64bit.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-mthumb -Os" } */ +/* { dg-require-effective-target arm_thumb1_ok } */ + +void copy_df(double *dst, const double *src) +{ + *dst = *src; +} + +void copy_di(unsigned long long *dst, const unsigned long long *src) +{ + *dst = *src; +} + +/* { dg-final { scan-assembler-times "ldmia\tr\[0-7\]" 2 } } */ +/* { dg-final { scan-assembler-times "stmia\tr\[0-7\]!" 2 } } */ -- 2.45.2