https://gcc.gnu.org/g:8ee643e50957904d75affece056a6dd84de343d6
commit 8ee643e50957904d75affece056a6dd84de343d6 Author: Kaz Kojima <kkoj...@rr.iij4u.or.jp> Date: Fri Sep 20 18:15:30 2024 +0900 SH: Try to reduce R0 live ranges Some move or extend patterns will make long R0 live ranges and could confuse LRA. gcc/ChangeLog: * config/sh/sh-protos.h (sh_satisfies_constraint_Sid_subreg_index): Declare. * config/sh/sh.cc (sh_satisfies_constraint_Sid_subreg_index): New function. * config/sh/sh.md (extend<mode>si2_short_mem_disp_z, *mov<mode>_store_mem_index, mov<mode>_store_mem_index): New insn and insn_and_split patterns. (extend<mode>si2, mov<mode>): Use them for LRA. Diff: --- gcc/config/sh/sh-protos.h | 1 + gcc/config/sh/sh.cc | 12 +++++++ gcc/config/sh/sh.md | 90 ++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 102 insertions(+), 1 deletion(-) diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h index b151a7c8fccc..5e5bd0aff7e7 100644 --- a/gcc/config/sh/sh-protos.h +++ b/gcc/config/sh/sh-protos.h @@ -61,6 +61,7 @@ extern rtx legitimize_pic_address (rtx, machine_mode, rtx); extern bool nonpic_symbol_mentioned_p (rtx); extern void output_pic_addr_const (FILE *, rtx); extern bool expand_block_move (rtx *); +extern bool sh_satisfies_constraint_Sid_subreg_index (rtx); extern void prepare_move_operands (rtx[], machine_mode mode); extern bool sh_expand_cmpstr (rtx *); extern bool sh_expand_cmpnstr (rtx *); diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc index 7391b8df5830..c9222c3e6ac0 100644 --- a/gcc/config/sh/sh.cc +++ b/gcc/config/sh/sh.cc @@ -1577,6 +1577,18 @@ sh_encode_section_info (tree decl, rtx rtl, int first) SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION; } +/* Test Sid constraint with subreg index. See also the comment in + prepare_move_operands. */ +bool +sh_satisfies_constraint_Sid_subreg_index (rtx op) +{ + return ((GET_CODE (op) == MEM) + && ((GET_CODE (XEXP (op, 0)) == PLUS) + && ((GET_CODE (XEXP (XEXP (op, 0), 0)) == REG) + && ((GET_CODE (XEXP (XEXP (op, 0), 1)) == SUBREG) + && (GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 0)) == REG))))); +} + /* Prepare operands for a move define_expand; specifically, one of the operands must be in a register. */ void diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md index 7eee12ca6b8a..6d93f5cb816b 100644 --- a/gcc/config/sh/sh.md +++ b/gcc/config/sh/sh.md @@ -4801,7 +4801,38 @@ (define_expand "extend<mode>si2" [(set (match_operand:SI 0 "arith_reg_dest") - (sign_extend:SI (match_operand:QIHI 1 "general_extend_operand")))]) + (sign_extend:SI (match_operand:QIHI 1 "general_extend_operand")))] + "" +{ + /* When the displacement addressing is used, RA will assign r0 to + the pseudo register operand for the QI/HImode load. See + the comment in sh.cc:prepare_move_operand and PR target/55212. */ + if (! lra_in_progress && ! reload_completed + && sh_lra_p () + && ! TARGET_SH2A + && arith_reg_dest (operands[0], <MODE>mode) + && short_displacement_mem_operand (operands[1], <MODE>mode)) + { + emit_insn (gen_extend<mode>si2_short_mem_disp_z (operands[0], + operands[1])); + DONE; + } +}) + +(define_insn_and_split "extend<mode>si2_short_mem_disp_z" + [(set (match_operand:SI 0 "arith_reg_dest" "=r") + (sign_extend:SI + (match_operand:QIHI 1 "short_displacement_mem_operand" "m"))) + (clobber (reg:SI R0_REG))] + "TARGET_SH1 && ! TARGET_SH2A && sh_lra_p ()" + "#" + "&& 1" + [(set (match_dup 2) (sign_extend:SI (match_dup 1))) + (set (match_dup 0) (match_dup 2))] +{ + operands[2] = gen_rtx_REG (SImode, R0_REG); +} + [(set_attr "type" "load")]) (define_insn_and_split "*extend<mode>si2_compact_reg" [(set (match_operand:SI 0 "arith_reg_dest" "=r") @@ -5343,9 +5374,50 @@ operands[1] = gen_lowpart (<MODE>mode, reg); } + if (! lra_in_progress && ! reload_completed + && sh_lra_p () + && ! TARGET_SH2A + && arith_reg_operand (operands[1], <MODE>mode) + && (satisfies_constraint_Sid (operands[0]) + || sh_satisfies_constraint_Sid_subreg_index (operands[0]))) + { + rtx adr = XEXP (operands[0], 0); + rtx base = XEXP (adr, 0); + rtx idx = XEXP (adr, 1); + emit_insn (gen_mov<mode>_store_mem_index (base, idx, + operands[1])); + DONE; + } + prepare_move_operands (operands, <MODE>mode); }) +(define_insn "*mov<mode>_store_mem_index" + [(set (mem:QIHI + (plus:SI (match_operand:SI 0 "arith_reg_operand" "%r") + (match_operand:SI 1 "arith_reg_operand" "z"))) + (match_operand:QIHI 2 "arith_reg_operand" "r"))] + "TARGET_SH1 && ! TARGET_SH2A && sh_lra_p () + && REG_P (operands[1]) && REGNO (operands[1]) == R0_REG" + "mov.<bw> %2,@(%1,%0)" + [(set_attr "type" "store")]) + +(define_insn_and_split "mov<mode>_store_mem_index" + [(set (mem:QIHI + (plus:SI (match_operand:SI 0 "arith_reg_operand" "%r") + (match_operand:SI 1 "arith_reg_operand" "^zr"))) + (match_operand:QIHI 2 "arith_reg_operand" "r")) + (clobber (reg:SI R0_REG))] + "TARGET_SH1 && ! TARGET_SH2A && sh_lra_p ()" + "#" + "&& 1" + [(set (match_dup 3) (match_dup 1)) + (set (mem:QIHI (plus:SI (match_dup 0) (match_dup 3))) (match_dup 2))] +{ + operands[3] = gen_rtx_REG (SImode, R0_REG); +} + [(set_attr "type" "store")]) + ;; The pre-dec and post-inc mems must be captured by the '<' and '>' ;; constraints, otherwise wrong code might get generated. (define_insn "*mov<mode>_load_predec" @@ -5631,6 +5703,22 @@ (const_string "double") (const_string "none")))]) +;; LRA will try to satisfy the constraints in match_scratch for the memory +;; displacements and it will make issues on this target. Use R0 as a scratch +;; register for the constant load. +(define_insn "movdf_i4_F_z" + [(set (match_operand:DF 0 "fp_arith_reg_operand" "=d") + (match_operand:DF 1 "const_double_operand" "F")) + (use (reg:SI FPSCR_MODES_REG)) + (clobber (reg:SI R0_REG))] + "TARGET_FPU_DOUBLE && sh_lra_p ()" + "#" + [(set_attr "type" "pcfload") + (set (attr "length") (if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 8))) + (set (attr "fp_mode") (if_then_else (eq_attr "fmovd" "yes") + (const_string "double") + (const_string "none")))]) + ;; Moving DFmode between fp/general registers through memory ;; (the top of the stack) is faster than moving through fpul even for ;; little endian. Because the type of an instruction is important for its