Some sfuncs uses hard reg as input and clobber its raw reg pattern. It
seems that LRA doesn't process this clobber pattern.  Rewrite these
patterns so as to work with LRA.

Committed to master.


gcc/ChangeLog:

        PR target/55212
        * config/sh/predicates.md (hard_reg_r0..r7): New predicates.
        * config/sh/sh.md (udivsi3_i4, udivsi3_i4_single,
        udivsi3_i1): Rewrite with match_operand and match_dup.
        (block_lump_real, block_lump_real_i4): Ditto.
        (udivsi3): Adjust for it.
        * config/sh/sh-mem.cc (expand_block_move): Ditto.

From 933221722a25bc46c5215d40e97a4c0a385a96ba Mon Sep 17 00:00:00 2001
From: Kaz Kojima <[email protected]>
Date: Tue, 24 Sep 2024 18:26:42 +0900
Subject: [PATCH] SH: Pin input args to hard-regs via predicates for sfuncs

Some sfuncs uses hard reg as input and clobber its raw reg pattern. It
seems that LRA doesn't process this clobber pattern.  Rewrite these
patterns so as to work with LRA.

gcc/ChangeLog:

	PR target/55212
	* config/sh/predicates.md (hard_reg_r0..r7): New predicates.
	* config/sh/sh.md (udivsi3_i4, udivsi3_i4_single,
	udivsi3_i1): Rewrite with match_operand and match_dup.
	(block_lump_real, block_lump_real_i4): Ditto.
	(udivsi3): Adjust for it.
	* config/sh/sh-mem.cc (expand_block_move): Ditto.
---
 gcc/config/sh/predicates.md |  33 +++++++++++++++++++++++++++++++++
 gcc/config/sh/sh-mem.cc     |   4 ++--
 gcc/config/sh/sh.md         | 101 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------------------------
 3 files changed, 98 insertions(+), 40 deletions(-)

diff --git a/gcc/config/sh/predicates.md b/gcc/config/sh/predicates.md
index 6964833..100c0ac 100644
--- a/gcc/config/sh/predicates.md
+++ b/gcc/config/sh/predicates.md
@@ -803,4 +803,37 @@
     return true;
 
   return false;
 })
+
+;; Predicates for pinning operands to hard-regs.
+(define_predicate "hard_reg_r0"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == R0_REG")))
+
+(define_predicate "hard_reg_r1"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == R1_REG")))
+
+(define_predicate "hard_reg_r2"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == R2_REG")))
+
+(define_predicate "hard_reg_r3"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == R3_REG")))
+
+(define_predicate "hard_reg_r4"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == R4_REG")))
+
+(define_predicate "hard_reg_r5"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == R5_REG")))
+
+(define_predicate "hard_reg_r6"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == R6_REG")))
+
+(define_predicate "hard_reg_r7"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == R7_REG")))
diff --git a/gcc/config/sh/sh-mem.cc b/gcc/config/sh/sh-mem.cc
index 591bdab..0302a4e 100644
--- a/gcc/config/sh/sh-mem.cc
+++ b/gcc/config/sh/sh-mem.cc
@@ -133,9 +133,9 @@ expand_block_move (rtx *operands)
 	  force_into (XEXP (operands[1], 0), r5);
 
 	  int dwords = bytes >> 3;
 	  emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
-	  emit_insn (gen_block_lump_real_i4 (func_addr_rtx, lab));
+	  emit_insn (gen_block_lump_real_i4 (func_addr_rtx, lab, r4, r5, r6));
 	  return true;
 	}
       else
 	return false;
@@ -177,9 +177,9 @@ expand_block_move (rtx *operands)
 
       final_switch = 16 - ((bytes / 4) % 16);
       while_loop = ((bytes / 4) / 16 - 1) * 16;
       emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
-      emit_insn (gen_block_lump_real (func_addr_rtx, lab));
+      emit_insn (gen_block_lump_real (func_addr_rtx, lab, r4, r5, r6));
       return true;
     }
 
   return false;
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index fbffd63..4b12b06 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -2193,37 +2193,51 @@
 ;; gets allocated to a stack slot that needs its address reloaded, then
 ;; there is nothing to prevent reload from using r0 to reload the address.
 ;; This reload would clobber the value in r0 we are trying to store.
 ;; If we let reload allocate r0, then this problem can never happen.
+;;
+;; In addition to that, we also must pin the input regs to hard-regs via the
+;; predicates.  When these insns are instantiated it also emits the
+;; accompanying mov insns to load the hard-regs.  However, subsequent RTL
+;; passes might move things around and reassign the operands to pseudo regs
+;; which might get allocated to different (wrong) hard-regs eventually.  To
+;; avoid that, only allow matching these insns if the operands are the
+;; expected hard-regs.
 (define_insn "udivsi3_i1"
   [(set (match_operand:SI 0 "register_operand" "=z,z")
-	(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+	(udiv:SI (match_operand:SI 3 "hard_reg_r4" "=r,r")
+		 (match_operand:SI 4 "hard_reg_r5" "=r,r")))
    (clobber (reg:SI T_REG))
    (clobber (reg:SI PR_REG))
    (clobber (reg:SI R1_REG))
-   (clobber (reg:SI R4_REG))
+   (clobber (match_dup 3))
+   (use (reg:SI R4_REG))
+   (use (reg:SI R5_REG))
    (use (match_operand:SI 1 "arith_reg_operand" "r,r"))
    (use (match_operand 2 "" "Z,Ccl"))]
   "TARGET_SH1 && TARGET_DIVIDE_CALL_DIV1"
   "@
 	jsr	@%1%#
 	bsrf	%1\n%O2:%#"
   [(set_attr "type" "sfunc")
    (set_attr "needs_delay_slot" "yes")])
 
 (define_insn "udivsi3_i4"
   [(set (match_operand:SI 0 "register_operand" "=y,y")
-	(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+	(udiv:SI (match_operand:SI 3 "hard_reg_r4" "=r,r")
+		 (match_operand:SI 4 "hard_reg_r5" "=r,r")))
    (clobber (reg:SI T_REG))
    (clobber (reg:SI PR_REG))
    (clobber (reg:DF DR0_REG))
    (clobber (reg:DF DR2_REG))
    (clobber (reg:DF DR4_REG))
    (clobber (reg:SI R0_REG))
    (clobber (reg:SI R1_REG))
-   (clobber (reg:SI R4_REG))
-   (clobber (reg:SI R5_REG))
+   (clobber (match_dup 3))
+   (clobber (match_dup 4))
    (clobber (reg:SI FPSCR_STAT_REG))
+   (use (reg:SI R4_REG))
+   (use (reg:SI R5_REG))
    (use (match_operand:SI 1 "arith_reg_operand" "r,r"))
    (use (match_operand 2 "" "Z,Ccl"))
    (use (reg:SI FPSCR_MODES_REG))]
   "TARGET_FPU_DOUBLE && ! TARGET_FPU_SINGLE"
@@ -2235,18 +2249,21 @@
    (set_attr "needs_delay_slot" "yes")])
 
 (define_insn "udivsi3_i4_single"
   [(set (match_operand:SI 0 "register_operand" "=y,y")
-	(udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+	(udiv:SI (match_operand:SI 3 "hard_reg_r4" "=r,r")
+		 (match_operand:SI 4 "hard_reg_r5" "=r,r")))
    (clobber (reg:SI T_REG))
    (clobber (reg:SI PR_REG))
    (clobber (reg:DF DR0_REG))
    (clobber (reg:DF DR2_REG))
    (clobber (reg:DF DR4_REG))
    (clobber (reg:SI R0_REG))
    (clobber (reg:SI R1_REG))
-   (clobber (reg:SI R4_REG))
-   (clobber (reg:SI R5_REG))
+   (clobber (match_dup 3))
+   (clobber (match_dup 4))
+   (use (reg:SI R4_REG))
+   (use (reg:SI R5_REG))
    (use (match_operand:SI 1 "arith_reg_operand" "r,r"))
    (use (match_operand 2 "" "Z,Ccl"))]
   "TARGET_FPU_ANY && TARGET_FPU_SINGLE"
   "@
@@ -2277,8 +2294,10 @@
   ""
 {
   rtx last;
   rtx func_ptr = gen_reg_rtx (Pmode);
+  rtx r4 = gen_rtx_REG (SImode, R4_REG);
+  rtx r5 = gen_rtx_REG (SImode, R5_REG);
 
   /* Emit the move of the address to a pseudo outside of the libcall.  */
   if (TARGET_DIVIDE_CALL_TABLE)
     {
@@ -2304,26 +2323,26 @@
   else if (TARGET_DIVIDE_CALL_FP)
     {
       rtx lab = function_symbol (func_ptr, "__udivsi3_i4", SFUNC_STATIC).lab;
       if (TARGET_FPU_SINGLE)
-	last = gen_udivsi3_i4_single (operands[0], func_ptr, lab);
+	last = gen_udivsi3_i4_single (operands[0], func_ptr, lab, r4, r5);
       else
-	last = gen_udivsi3_i4 (operands[0], func_ptr, lab);
+	last = gen_udivsi3_i4 (operands[0], func_ptr, lab, r4, r5);
     }
   else if (TARGET_SH2A)
     {
       operands[1] = force_reg (SImode, operands[1]);
       operands[2] = force_reg (SImode, operands[2]);
       emit_insn (gen_udivsi3_sh2a (operands[0], operands[1], operands[2]));
       DONE;
     }
   else
     {
       rtx lab = function_symbol (func_ptr, "__udivsi3", SFUNC_STATIC).lab;
-      last = gen_udivsi3_i1 (operands[0], func_ptr, lab);
+      last = gen_udivsi3_i1 (operands[0], func_ptr, lab, r4, r5);
     }
-  emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
-  emit_move_insn (gen_rtx_REG (SImode, 5), operands[2]);
+  emit_move_insn (r4, operands[1]);
+  emit_move_insn (r5, operands[2]);
   emit_insn (last);
   DONE;
 })
 
@@ -8964,19 +8983,22 @@
   [(set_attr "type" "sfunc")
    (set_attr "needs_delay_slot" "yes")])
 
 (define_insn "block_lump_real"
-  [(parallel [(set (mem:BLK (reg:SI R4_REG))
-		   (mem:BLK (reg:SI R5_REG)))
-	      (use (match_operand:SI 0 "arith_reg_operand" "r,r"))
-	      (use (match_operand 1 "" "Z,Ccl"))
-	      (use (reg:SI R6_REG))
-	      (clobber (reg:SI PR_REG))
-	      (clobber (reg:SI T_REG))
-	      (clobber (reg:SI R4_REG))
-	      (clobber (reg:SI R5_REG))
-	      (clobber (reg:SI R6_REG))
-	      (clobber (reg:SI R0_REG))])]
+  [(set (mem:BLK (match_operand:SI 2 "hard_reg_r4" "=r,r"))
+	(mem:BLK (match_operand:SI 3 "hard_reg_r5" "=r,r")))
+   (use (match_operand:SI 0 "arith_reg_operand" "r,r"))
+   (use (match_operand 1 "" "Z,Ccl"))
+   (use (match_operand:SI 4 "hard_reg_r6" "=r,r"))
+   (use (reg:SI R4_REG))
+   (use (reg:SI R5_REG))
+   (use (reg:SI R6_REG))
+   (clobber (match_dup 2))
+   (clobber (match_dup 3))
+   (clobber (match_dup 4))
+   (clobber (reg:SI PR_REG))
+   (clobber (reg:SI T_REG))
+   (clobber (reg:SI R0_REG))]
   "TARGET_SH1 && ! TARGET_HARD_SH4"
   "@
 	jsr	@%0%#
 	bsrf	%0\n%O1:%#"
@@ -8999,22 +9021,25 @@
   [(set_attr "type" "sfunc")
    (set_attr "needs_delay_slot" "yes")])
 
 (define_insn "block_lump_real_i4"
-  [(parallel [(set (mem:BLK (reg:SI R4_REG))
-		   (mem:BLK (reg:SI R5_REG)))
-	      (use (match_operand:SI 0 "arith_reg_operand" "r,r"))
-	      (use (match_operand 1 "" "Z,Ccl"))
-	      (use (reg:SI R6_REG))
-	      (clobber (reg:SI PR_REG))
-	      (clobber (reg:SI T_REG))
-	      (clobber (reg:SI R4_REG))
-	      (clobber (reg:SI R5_REG))
-	      (clobber (reg:SI R6_REG))
-	      (clobber (reg:SI R0_REG))
-	      (clobber (reg:SI R1_REG))
-	      (clobber (reg:SI R2_REG))
-	      (clobber (reg:SI R3_REG))])]
+  [(set (mem:BLK (match_operand:SI 2 "hard_reg_r4" "=r,r"))
+	(mem:BLK (match_operand:SI 3 "hard_reg_r5" "=r,r")))
+   (use (match_operand:SI 0 "arith_reg_operand" "r,r"))
+   (use (match_operand 1 "" "Z,Ccl"))
+   (use (match_operand:SI 4 "hard_reg_r6" "=r,r"))
+   (use (reg:SI R4_REG))
+   (use (reg:SI R5_REG))
+   (use (reg:SI R6_REG))
+   (clobber (match_dup 2))
+   (clobber (match_dup 3))
+   (clobber (match_dup 4))
+   (clobber (reg:SI PR_REG))
+   (clobber (reg:SI T_REG))
+   (clobber (reg:SI R0_REG))
+   (clobber (reg:SI R1_REG))
+   (clobber (reg:SI R2_REG))
+   (clobber (reg:SI R3_REG))]
   "TARGET_HARD_SH4"
   "@
 	jsr	@%0%#
 	bsrf	%0\n%O1:%#"
--
libgit2 1.9.1

Reply via email to