https://gcc.gnu.org/g:8ee643e50957904d75affece056a6dd84de343d6

commit 8ee643e50957904d75affece056a6dd84de343d6
Author: Kaz Kojima <kkoj...@rr.iij4u.or.jp>
Date:   Fri Sep 20 18:15:30 2024 +0900

    SH: Try to reduce R0 live ranges
    
    Some move or extend patterns will make long R0 live ranges and could
    confuse LRA.
    
    gcc/ChangeLog:
            * config/sh/sh-protos.h
            (sh_satisfies_constraint_Sid_subreg_index): Declare.
            * config/sh/sh.cc (sh_satisfies_constraint_Sid_subreg_index):
            New function.
            * config/sh/sh.md (extend<mode>si2_short_mem_disp_z,
            *mov<mode>_store_mem_index, mov<mode>_store_mem_index):
            New insn and insn_and_split patterns.
            (extend<mode>si2, mov<mode>): Use them for LRA.

Diff:
---
 gcc/config/sh/sh-protos.h |  1 +
 gcc/config/sh/sh.cc       | 12 +++++++
 gcc/config/sh/sh.md       | 90 ++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 102 insertions(+), 1 deletion(-)

diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h
index b151a7c8fccc..5e5bd0aff7e7 100644
--- a/gcc/config/sh/sh-protos.h
+++ b/gcc/config/sh/sh-protos.h
@@ -61,6 +61,7 @@ extern rtx legitimize_pic_address (rtx, machine_mode, rtx);
 extern bool nonpic_symbol_mentioned_p (rtx);
 extern void output_pic_addr_const (FILE *, rtx);
 extern bool expand_block_move (rtx *);
+extern bool sh_satisfies_constraint_Sid_subreg_index (rtx);
 extern void prepare_move_operands (rtx[], machine_mode mode);
 extern bool sh_expand_cmpstr (rtx *);
 extern bool sh_expand_cmpnstr (rtx *);
diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc
index 7391b8df5830..c9222c3e6ac0 100644
--- a/gcc/config/sh/sh.cc
+++ b/gcc/config/sh/sh.cc
@@ -1577,6 +1577,18 @@ sh_encode_section_info (tree decl, rtx rtl, int first)
     SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
 }
 
+/* Test Sid constraint with subreg index.  See also the comment in
+   prepare_move_operands.  */
+bool
+sh_satisfies_constraint_Sid_subreg_index (rtx op)
+{
+  return ((GET_CODE (op) == MEM)
+         && ((GET_CODE (XEXP (op, 0)) == PLUS)
+             && ((GET_CODE (XEXP (XEXP (op, 0), 0)) == REG)
+                 && ((GET_CODE (XEXP (XEXP (op, 0), 1)) == SUBREG)
+                     && (GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 0)) == 
REG)))));
+}
+
 /* Prepare operands for a move define_expand; specifically, one of the
    operands must be in a register.  */
 void
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index 7eee12ca6b8a..6d93f5cb816b 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -4801,7 +4801,38 @@
 
 (define_expand "extend<mode>si2"
   [(set (match_operand:SI 0 "arith_reg_dest")
-       (sign_extend:SI (match_operand:QIHI 1 "general_extend_operand")))])
+       (sign_extend:SI (match_operand:QIHI 1 "general_extend_operand")))]
+  ""
+{
+  /* When the displacement addressing is used, RA will assign r0 to
+       the pseudo register operand for the QI/HImode load.  See
+       the comment in sh.cc:prepare_move_operand and PR target/55212.  */
+  if (! lra_in_progress && ! reload_completed
+      && sh_lra_p ()
+      && ! TARGET_SH2A
+      && arith_reg_dest (operands[0], <MODE>mode)
+      && short_displacement_mem_operand (operands[1], <MODE>mode))
+    {
+      emit_insn (gen_extend<mode>si2_short_mem_disp_z (operands[0],
+                                                                               
                             operands[1]));
+      DONE;
+    }
+})
+
+(define_insn_and_split "extend<mode>si2_short_mem_disp_z"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+       (sign_extend:SI
+           (match_operand:QIHI 1 "short_displacement_mem_operand" "m")))
+   (clobber (reg:SI R0_REG))]
+  "TARGET_SH1 && ! TARGET_SH2A && sh_lra_p ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 2) (sign_extend:SI (match_dup  1)))
+   (set (match_dup 0) (match_dup 2))]
+{
+  operands[2] = gen_rtx_REG (SImode, R0_REG);
+}
+  [(set_attr "type" "load")])
 
 (define_insn_and_split "*extend<mode>si2_compact_reg"
   [(set (match_operand:SI 0 "arith_reg_dest" "=r")
@@ -5343,9 +5374,50 @@
         operands[1] = gen_lowpart (<MODE>mode, reg);
     }
 
+  if (! lra_in_progress && ! reload_completed
+      && sh_lra_p ()
+      && ! TARGET_SH2A
+      && arith_reg_operand (operands[1], <MODE>mode)
+      && (satisfies_constraint_Sid (operands[0])
+              || sh_satisfies_constraint_Sid_subreg_index (operands[0])))
+    {
+      rtx adr = XEXP (operands[0], 0);
+      rtx base = XEXP (adr, 0);
+      rtx idx = XEXP (adr, 1);
+      emit_insn (gen_mov<mode>_store_mem_index (base, idx,
+                                                                               
                      operands[1]));
+      DONE;
+    }
+
   prepare_move_operands (operands, <MODE>mode);
 })
 
+(define_insn "*mov<mode>_store_mem_index"
+  [(set (mem:QIHI
+               (plus:SI (match_operand:SI 0 "arith_reg_operand" "%r")
+                              (match_operand:SI 1 "arith_reg_operand" "z")))
+          (match_operand:QIHI 2 "arith_reg_operand" "r"))]
+  "TARGET_SH1 && ! TARGET_SH2A && sh_lra_p ()
+   && REG_P (operands[1]) && REGNO (operands[1]) == R0_REG"
+  "mov.<bw>    %2,@(%1,%0)"
+  [(set_attr "type" "store")])
+
+(define_insn_and_split "mov<mode>_store_mem_index"
+  [(set (mem:QIHI
+               (plus:SI (match_operand:SI 0 "arith_reg_operand" "%r")
+                              (match_operand:SI 1 "arith_reg_operand" "^zr")))
+          (match_operand:QIHI 2 "arith_reg_operand" "r"))
+   (clobber (reg:SI R0_REG))]
+  "TARGET_SH1 && ! TARGET_SH2A && sh_lra_p ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 3) (match_dup 1))
+    (set (mem:QIHI (plus:SI (match_dup 0) (match_dup 3))) (match_dup 2))]
+{
+  operands[3] = gen_rtx_REG (SImode, R0_REG);
+}
+  [(set_attr "type" "store")])
+
 ;; The pre-dec and post-inc mems must be captured by the '<' and '>'
 ;; constraints, otherwise wrong code might get generated.
 (define_insn "*mov<mode>_load_predec"
@@ -5631,6 +5703,22 @@
                                           (const_string "double")
                                           (const_string "none")))])
 
+;; LRA will try to satisfy the constraints in match_scratch for the memory
+;; displacements and it will make issues on this target.  Use R0 as a scratch
+;; register for the constant load.
+(define_insn "movdf_i4_F_z"
+  [(set (match_operand:DF 0 "fp_arith_reg_operand" "=d")
+       (match_operand:DF 1 "const_double_operand" "F"))
+   (use (reg:SI FPSCR_MODES_REG))
+   (clobber (reg:SI R0_REG))]
+  "TARGET_FPU_DOUBLE && sh_lra_p ()"
+  "#"
+  [(set_attr "type" "pcfload")
+   (set (attr "length") (if_then_else (eq_attr "fmovd" "yes") (const_int 4) 
(const_int 8)))
+   (set (attr "fp_mode") (if_then_else (eq_attr "fmovd" "yes")
+                                          (const_string "double")
+                                          (const_string "none")))])
+
 ;; Moving DFmode between fp/general registers through memory
 ;; (the top of the stack) is faster than moving through fpul even for
 ;; little endian.  Because the type of an instruction is important for its

Reply via email to