[gcc r15-2183] SH: Fix outage caused by recently added 2nd combine pass after reg alloc

2024-07-20 Thread Oleg Endo via Gcc-cvs
https://gcc.gnu.org/g:58b78cf068b3b24c11d7812a5f4de865e9cdb8b4

commit r15-2183-g58b78cf068b3b24c11d7812a5f4de865e9cdb8b4
Author: Oleg Endo 
Date:   Sun Jul 21 14:11:21 2024 +0900

SH: Fix outage caused by recently added 2nd combine pass after reg alloc

I've also confirmed on the CSiBE set that the secondary combine pass is
actually beneficial on SH.  It does result in some code size reductions.

gcc/CHangeLog:
* config/sh/sh.md (mov_neg_si_t): Allow insn and split after
register allocation.
(*treg_noop_move): New insn.

Diff:
---
 gcc/config/sh/sh.md | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index 3e978254ab0c..7eee12ca6b8a 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -8408,7 +8408,7 @@
   gcc_assert (t_reg_operand (operands[1], VOIDmode));
   return "subc %0,%0";
 }
-  "&& can_create_pseudo_p () && !t_reg_operand (operands[1], VOIDmode)"
+  "&& !t_reg_operand (operands[1], VOIDmode)"
   [(const_int 0)]
 {
   sh_treg_insns ti = sh_split_treg_set_expr (operands[1], curr_insn);
@@ -8421,6 +8421,14 @@
 }
   [(set_attr "type" "arith")])
 
+;; no-op T bit move which can result from other optimizations.
+(define_insn_and_split "*treg_noop_move"
+  [(set (reg:SI T_REG) (reg:SI T_REG))]
+  "TARGET_SH1"
+  "#"
+  "&& 1"
+  [(const_int 0)])
+
 ;; Invert the T bit.
 ;; On SH2A we can use the nott insn.  On anything else this must be done with
 ;; multiple insns like:


[gcc r15-4307] SH: Fix cost estimation of mem load/store

2024-10-13 Thread Oleg Endo via Gcc-cvs
https://gcc.gnu.org/g:b717c462b96e7870f8081d2bc330e4749a4b0538

commit r15-4307-gb717c462b96e7870f8081d2bc330e4749a4b0538
Author: Oleg Endo 
Date:   Sun Oct 13 11:36:38 2024 +0900

SH: Fix cost estimation of mem load/store

For memory loads/stores (that contain a MEM rtx) sh_rtx_costs would wrongly
report a cost lower than 1 insn which is not accurate as it makes 
loads/stores
appear cheaper than simple arithmetic insns.  The cost of a load/store insn 
is
at least 1 insn plus the cost of the address expression (some addressing 
modes
can be considered more expensive than others due to additional constraints).

gcc/ChangeLog:

PR target/113533
* config/sh/sh.cc (sh_rtx_costs): Adjust cost estimation of MEM rtx
to be always at least COST_N_INSNS (1).  Forward speed argument to
sh_address_cost.

Co-authored-by: Roger Sayle 

Diff:
---
 gcc/config/sh/sh.cc | 20 +---
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc
index 7391b8df5830..6ad202fd4263 100644
--- a/gcc/config/sh/sh.cc
+++ b/gcc/config/sh/sh.cc
@@ -3231,7 +3231,7 @@ multcosts (rtx x ATTRIBUTE_UNUSED)
 static bool
 sh_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
  int opno ATTRIBUTE_UNUSED,
- int *total, bool speed ATTRIBUTE_UNUSED)
+ int *total, bool speed)
 {
   int code = GET_CODE (x);
 
@@ -3264,10 +3264,12 @@ sh_rtx_costs (rtx x, machine_mode mode 
ATTRIBUTE_UNUSED, int outer_code,
 }
   return false;
 
-/* The cost of a mem access is mainly the cost of the address mode.  */
+/* The cost of a mem access is mainly the cost of the address mode on top
+   of the cost of the load/store insn itself.  */
 case MEM:
   *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
-   true);
+   speed)
+  + COSTS_N_INSNS (1);
   return true;
 
 case IF_THEN_ELSE:
@@ -3317,7 +3319,8 @@ sh_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, 
int outer_code,
{
  *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
GET_MODE (XEXP (x, 0)),
-   MEM_ADDR_SPACE (XEXP (x, 0)), true);
+   MEM_ADDR_SPACE (XEXP (x, 0)), speed)
+  + COSTS_N_INSNS (1);
  return true;
}
   return false;
@@ -3335,7 +3338,8 @@ sh_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, 
int outer_code,
  /* Handle SH2A's movu.b and movu.w insn.  */
  *total = sh_address_cost (XEXP (XEXP (x, 0), 0), 
GET_MODE (XEXP (x, 0)), 
-   MEM_ADDR_SPACE (XEXP (x, 0)), true);
+   MEM_ADDR_SPACE (XEXP (x, 0)), speed)
+  + COSTS_N_INSNS (1);
  return true;
}
   return false;
@@ -3350,14 +3354,16 @@ sh_rtx_costs (rtx x, machine_mode mode 
ATTRIBUTE_UNUSED, int outer_code,
{
  *total = sh_address_cost (XEXP (XEXP (xx, 0), 0), 
GET_MODE (XEXP (xx, 0)),
-   MEM_ADDR_SPACE (XEXP (xx, 0)), true);
+   MEM_ADDR_SPACE (XEXP (xx, 0)), speed);
+  + COSTS_N_INSNS (1);
  return true;
}
  if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
{
  *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
GET_MODE (XEXP (xx, 1)),
-   MEM_ADDR_SPACE (XEXP (xx, 1)), true);
+   MEM_ADDR_SPACE (XEXP (xx, 1)), speed);
+  + COSTS_N_INSNS (1);
  return true;
}
}


[gcc r15-4306] SH: Add -fno-math-errno to fsca,fsrra tests.

2024-10-13 Thread Oleg Endo via Gcc-cvs
https://gcc.gnu.org/g:7ec8b4bf42fe9e27f7cf65fafae62e57be784db2

commit r15-4306-g7ec8b4bf42fe9e27f7cf65fafae62e57be784db2
Author: Oleg Endo 
Date:   Sun Oct 13 10:33:17 2024 +0900

SH: Add -fno-math-errno to fsca,fsrra tests.

Without -fno-math-errno some of the test might fail because the expected 
insns
will not be generated.

gcc/testsuite/ChangeLog:
* gcc.target/sh/pr53512-1.c: Add -fno-math-errno option.
* gcc.target/sh/pr53512-2.c: Likewise.
* gcc.target/sh/pr53512-3.c: Likewise.
* gcc.target/sh/pr53512-4.c: Likewise.
* gcc.target/sh/pr54680.c: Likewise.

Diff:
---
 gcc/testsuite/gcc.target/sh/pr53512-1.c | 2 +-
 gcc/testsuite/gcc.target/sh/pr53512-2.c | 2 +-
 gcc/testsuite/gcc.target/sh/pr53512-3.c | 2 +-
 gcc/testsuite/gcc.target/sh/pr53512-4.c | 2 +-
 gcc/testsuite/gcc.target/sh/pr54680.c   | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/testsuite/gcc.target/sh/pr53512-1.c 
b/gcc/testsuite/gcc.target/sh/pr53512-1.c
index 14106c02facd..a03273883eb6 100644
--- a/gcc/testsuite/gcc.target/sh/pr53512-1.c
+++ b/gcc/testsuite/gcc.target/sh/pr53512-1.c
@@ -1,7 +1,7 @@
 /* Verify that the fsca insn is used when specifying -mfsca and
   -funsafe-math-optimizations.  */
 /* { dg-do compile { target { has_fsca } } }  */
-/* { dg-options "-O1 -mfsca -funsafe-math-optimizations" } */
+/* { dg-options "-O1 -mfsca -funsafe-math-optimizations -fno-math-errno" } */
 /* { dg-final { scan-assembler-times "fsca" 3 } } */
 
 #include 
diff --git a/gcc/testsuite/gcc.target/sh/pr53512-2.c 
b/gcc/testsuite/gcc.target/sh/pr53512-2.c
index a79e58adb749..48872a1f2d2e 100644
--- a/gcc/testsuite/gcc.target/sh/pr53512-2.c
+++ b/gcc/testsuite/gcc.target/sh/pr53512-2.c
@@ -1,7 +1,7 @@
 /* Verify that the fsca insn is not used when specifying -mno-fsca and
   -funsafe-math-optimizations.  */
 /* { dg-do compile { target { has_fsca } } }  */
-/* { dg-options "-O1 -mno-fsca -funsafe-math-optimizations" } */
+/* { dg-options "-O1 -mno-fsca -funsafe-math-optimizations -fno-math-errno" } 
*/
 /* { dg-final { scan-assembler-not "fsca" } } */
 
 #include 
diff --git a/gcc/testsuite/gcc.target/sh/pr53512-3.c 
b/gcc/testsuite/gcc.target/sh/pr53512-3.c
index 19e9ede8156b..b834f3587ff1 100644
--- a/gcc/testsuite/gcc.target/sh/pr53512-3.c
+++ b/gcc/testsuite/gcc.target/sh/pr53512-3.c
@@ -1,7 +1,7 @@
 /* Verify that the fsrra insn is used when specifying -mfsrra and
   -funsafe-math-optimizations and -ffinite-math-only.  */
 /* { dg-do compile { target { has_fsrra } } }  */
-/* { dg-options "-O1 -mfsrra -funsafe-math-optimizations -ffinite-math-only" } 
*/
+/* { dg-options "-O1 -mfsrra -funsafe-math-optimizations -ffinite-math-only 
-fno-math-errno" } */
 /* { dg-final { scan-assembler "fsrra" } } */
 
 #include 
diff --git a/gcc/testsuite/gcc.target/sh/pr53512-4.c 
b/gcc/testsuite/gcc.target/sh/pr53512-4.c
index a1d3e8141170..01a981dd8bb6 100644
--- a/gcc/testsuite/gcc.target/sh/pr53512-4.c
+++ b/gcc/testsuite/gcc.target/sh/pr53512-4.c
@@ -1,7 +1,7 @@
 /* Verify that the fsrra insn is not used when specifying -mno-fsrra and
   -funsafe-math-optimizations and -ffinite-math-only.  */
 /* { dg-do compile { target { has_fsrra } } }  */
-/* { dg-options "-O1 -mno-fsrra -funsafe-math-optimizations 
-ffinite-math-only" } */
+/* { dg-options "-O1 -mno-fsrra -funsafe-math-optimizations -ffinite-math-only 
-fno-math-errno" } */
 /* { dg-final { scan-assembler-not "fsrra" } } */
 
 #include 
diff --git a/gcc/testsuite/gcc.target/sh/pr54680.c 
b/gcc/testsuite/gcc.target/sh/pr54680.c
index 7b02de354647..1ca67b7f8362 100644
--- a/gcc/testsuite/gcc.target/sh/pr54680.c
+++ b/gcc/testsuite/gcc.target/sh/pr54680.c
@@ -2,7 +2,7 @@
to int.  Notice that we can't count just "lds" insns because mode switches
use "lds.l".  */
 /* { dg-do compile { target { has_fsca } } }  */
-/* { dg-options "-O2 -mfsca -funsafe-math-optimizations -fno-ipa-icf" }  */
+/* { dg-options "-O2 -mfsca -funsafe-math-optimizations -fno-ipa-icf 
-fno-math-errno" }  */
 /* { dg-final { scan-assembler-times "fsca" 7 } } */
 /* { dg-final { scan-assembler-times "shad" 1 } } */
 /* { dg-final { scan-assembler-times "lds\tr\[0-9\],fpul" 6 } } */


[gcc r15-4411] SH: Fix typo of commit b717c462b96e

2024-10-17 Thread Oleg Endo via Gcc-cvs
https://gcc.gnu.org/g:2390cbad85cbd122d4e58c94f7891d7c5fde49b3

commit r15-4411-g2390cbad85cbd122d4e58c94f7891d7c5fde49b3
Author: Oleg Endo 
Date:   Thu Oct 17 21:40:14 2024 +0900

SH: Fix typo of commit b717c462b96e

gcc/ChangeLog:
PR target/113533
* config/sh/sh.cc (sh_rtx_costs): Delete wrong semicolon.

Diff:
---
 gcc/config/sh/sh.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc
index 6ad202fd4263..f69ede0edf7f 100644
--- a/gcc/config/sh/sh.cc
+++ b/gcc/config/sh/sh.cc
@@ -3354,7 +3354,7 @@ sh_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, 
int outer_code,
{
  *total = sh_address_cost (XEXP (XEXP (xx, 0), 0), 
GET_MODE (XEXP (xx, 0)),
-   MEM_ADDR_SPACE (XEXP (xx, 0)), speed);
+   MEM_ADDR_SPACE (XEXP (xx, 0)), speed)
   + COSTS_N_INSNS (1);
  return true;
}
@@ -3362,7 +3362,7 @@ sh_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, 
int outer_code,
{
  *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
GET_MODE (XEXP (xx, 1)),
-   MEM_ADDR_SPACE (XEXP (xx, 1)), speed);
+   MEM_ADDR_SPACE (XEXP (xx, 1)), speed)
   + COSTS_N_INSNS (1);
  return true;
}


[gcc r15-4228] SH: Use softfp for sh-elf

2024-10-09 Thread Oleg Endo via Gcc-cvs
https://gcc.gnu.org/g:e95512e2d5a317e8c043f232158df4b38186e51c

commit r15-4228-ge95512e2d5a317e8c043f232158df4b38186e51c
Author: Sébastien Michelland 
Date:   Thu Oct 10 09:24:39 2024 +0900

SH: Use softfp for sh-elf

libgcc/ChangeLog:

PR target/29845
* config.host (sh-*-elf*): Replace fdpbit with softfp.
* config/sh/sfp-machine.h: New file.

Signed-off-by: Sébastien Michelland 


Diff:
---
 libgcc/config.host |  2 +-
 libgcc/config/sh/sfp-machine.h | 83 ++
 2 files changed, 84 insertions(+), 1 deletion(-)

diff --git a/libgcc/config.host b/libgcc/config.host
index fa001c5e900b..06fae1545b18 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -1414,7 +1414,7 @@ s390x-ibm-tpf*)
md_unwind_header=s390/tpf-unwind.h
;;
 sh-*-elf* | sh[12346l]*-*-elf*)
-   tmake_file="$tmake_file sh/t-sh t-crtstuff-pic t-fdpbit"
+   tmake_file="$tmake_file sh/t-sh t-crtstuff-pic t-softfp-sfdf t-softfp"
extra_parts="$extra_parts crt1.o crti.o crtn.o crtbeginS.o crtendS.o \
libic_invalidate_array_4-100.a \
libic_invalidate_array_4-200.a \
diff --git a/libgcc/config/sh/sfp-machine.h b/libgcc/config/sh/sfp-machine.h
new file mode 100644
index ..26f65166976b
--- /dev/null
+++ b/libgcc/config/sh/sfp-machine.h
@@ -0,0 +1,83 @@
+/* Software floating-point machine description for SuperH.
+
+Copyright (C) 2024 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+.  */
+
+#define _FP_W_TYPE_SIZE32
+#define _FP_W_TYPE unsigned long
+#define _FP_WS_TYPEsigned long
+#define _FP_I_TYPE long
+
+#define _FP_MUL_MEAT_S(R,X,Y) \
+  _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_D(R,X,Y) \
+  _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_Q(R,X,Y) \
+  _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
+
+#define _FP_DIV_MEAT_S(R,X,Y)  _FP_DIV_MEAT_1_udiv_norm(S,R,X,Y)
+#define _FP_DIV_MEAT_D(R,X,Y)  _FP_DIV_MEAT_2_udiv(D,R,X,Y)
+#define _FP_DIV_MEAT_Q(R,X,Y)  _FP_DIV_MEAT_4_udiv(Q,R,X,Y)
+
+#define _FP_NANFRAC_B  _FP_QNANBIT_B
+#define _FP_NANFRAC_H  _FP_QNANBIT_H
+#define _FP_NANFRAC_S  _FP_QNANBIT_S
+#define _FP_NANFRAC_D  _FP_QNANBIT_D, 0
+#define _FP_NANFRAC_Q  _FP_QNANBIT_Q, 0, 0, 0
+
+/* The type of the result of a floating point comparison.  This must
+   match __libgcc_cmp_return__ in GCC for the target.  */
+typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__)));
+#define CMPtype __gcc_CMPtype
+
+#define _FP_NANSIGN_B  0
+#define _FP_NANSIGN_H  0
+#define _FP_NANSIGN_S  0
+#define _FP_NANSIGN_D  0
+#define _FP_NANSIGN_Q  0
+
+#define _FP_KEEPNANFRACP 0
+#define _FP_QNANNEGATEDP 0
+
+#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP)  \
+  do {  \
+R##_s = _FP_NANSIGN_##fs;   \
+_FP_FRAC_SET_##wc(R,_FP_NANFRAC_##fs);  \
+R##_c = FP_CLS_NAN; \
+  } while (0)
+
+#define _FP_TININESS_AFTER_ROUNDING 1
+
+#define __LITTLE_ENDIAN 1234
+#define __BIG_ENDIAN4321
+
+#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#define __BYTE_ORDER __BIG_ENDIAN
+#else
+#define __BYTE_ORDER __LITTLE_ENDIAN
+#endif
+
+/* Define ALIASNAME as a strong alias for NAME.  */
+# define strong_alias(name, aliasname) _strong_alias(name, aliasname)
+# define _strong_alias(name, aliasname) \
+  extern __typeof (name) aliasname __attribute__ ((alias (#name)));


[gcc/devel/sh-lra] SH: Try to reduce R0 live ranges

2024-09-24 Thread Oleg Endo via Gcc-cvs
https://gcc.gnu.org/g:8ee643e50957904d75affece056a6dd84de343d6

commit 8ee643e50957904d75affece056a6dd84de343d6
Author: Kaz Kojima 
Date:   Fri Sep 20 18:15:30 2024 +0900

SH: Try to reduce R0 live ranges

Some move or extend patterns will make long R0 live ranges and could
confuse LRA.

gcc/ChangeLog:
* config/sh/sh-protos.h
(sh_satisfies_constraint_Sid_subreg_index): Declare.
* config/sh/sh.cc (sh_satisfies_constraint_Sid_subreg_index):
New function.
* config/sh/sh.md (extendsi2_short_mem_disp_z,
*mov_store_mem_index, mov_store_mem_index):
New insn and insn_and_split patterns.
(extendsi2, mov): Use them for LRA.

Diff:
---
 gcc/config/sh/sh-protos.h |  1 +
 gcc/config/sh/sh.cc   | 12 +++
 gcc/config/sh/sh.md   | 90 ++-
 3 files changed, 102 insertions(+), 1 deletion(-)

diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h
index b151a7c8fccc..5e5bd0aff7e7 100644
--- a/gcc/config/sh/sh-protos.h
+++ b/gcc/config/sh/sh-protos.h
@@ -61,6 +61,7 @@ extern rtx legitimize_pic_address (rtx, machine_mode, rtx);
 extern bool nonpic_symbol_mentioned_p (rtx);
 extern void output_pic_addr_const (FILE *, rtx);
 extern bool expand_block_move (rtx *);
+extern bool sh_satisfies_constraint_Sid_subreg_index (rtx);
 extern void prepare_move_operands (rtx[], machine_mode mode);
 extern bool sh_expand_cmpstr (rtx *);
 extern bool sh_expand_cmpnstr (rtx *);
diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc
index 7391b8df5830..c9222c3e6ac0 100644
--- a/gcc/config/sh/sh.cc
+++ b/gcc/config/sh/sh.cc
@@ -1577,6 +1577,18 @@ sh_encode_section_info (tree decl, rtx rtl, int first)
 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
 }
 
+/* Test Sid constraint with subreg index.  See also the comment in
+   prepare_move_operands.  */
+bool
+sh_satisfies_constraint_Sid_subreg_index (rtx op)
+{
+  return ((GET_CODE (op) == MEM)
+ && ((GET_CODE (XEXP (op, 0)) == PLUS)
+ && ((GET_CODE (XEXP (XEXP (op, 0), 0)) == REG)
+ && ((GET_CODE (XEXP (XEXP (op, 0), 1)) == SUBREG)
+ && (GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 0)) == 
REG);
+}
+
 /* Prepare operands for a move define_expand; specifically, one of the
operands must be in a register.  */
 void
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index 7eee12ca6b8a..6d93f5cb816b 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -4801,7 +4801,38 @@
 
 (define_expand "extendsi2"
   [(set (match_operand:SI 0 "arith_reg_dest")
-   (sign_extend:SI (match_operand:QIHI 1 "general_extend_operand")))])
+   (sign_extend:SI (match_operand:QIHI 1 "general_extend_operand")))]
+  ""
+{
+  /* When the displacement addressing is used, RA will assign r0 to
+   the pseudo register operand for the QI/HImode load.  See
+   the comment in sh.cc:prepare_move_operand and PR target/55212.  */
+  if (! lra_in_progress && ! reload_completed
+  && sh_lra_p ()
+  && ! TARGET_SH2A
+  && arith_reg_dest (operands[0], mode)
+  && short_displacement_mem_operand (operands[1], mode))
+{
+  emit_insn (gen_extendsi2_short_mem_disp_z (operands[0],
+   
 operands[1]));
+  DONE;
+}
+})
+
+(define_insn_and_split "extendsi2_short_mem_disp_z"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r")
+   (sign_extend:SI
+   (match_operand:QIHI 1 "short_displacement_mem_operand" "m")))
+   (clobber (reg:SI R0_REG))]
+  "TARGET_SH1 && ! TARGET_SH2A && sh_lra_p ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 2) (sign_extend:SI (match_dup  1)))
+   (set (match_dup 0) (match_dup 2))]
+{
+  operands[2] = gen_rtx_REG (SImode, R0_REG);
+}
+  [(set_attr "type" "load")])
 
 (define_insn_and_split "*extendsi2_compact_reg"
   [(set (match_operand:SI 0 "arith_reg_dest" "=r")
@@ -5343,9 +5374,50 @@
 operands[1] = gen_lowpart (mode, reg);
 }
 
+  if (! lra_in_progress && ! reload_completed
+  && sh_lra_p ()
+  && ! TARGET_SH2A
+  && arith_reg_operand (operands[1], mode)
+  && (satisfies_constraint_Sid (operands[0])
+  || sh_satisfies_constraint_Sid_subreg_index (operands[0])))
+{
+  rtx adr = XEXP (operands[0], 0);
+  rtx base = XEXP (adr, 0);
+  rtx idx = XEXP (adr, 1);
+  emit_insn (gen_mov_store_mem_index (base, idx,
+   
  operands[1]));
+  DONE;
+}
+
   prepare_move_operands (operands, mode);
 })
 
+(define_insn "*mov_store_mem_index"
+  [(set (mem:QIHI
+   (plus:SI (match_operand:SI 0 "arith_reg_operand" "%r")
+  (match_operand:SI 1 "arith_reg_operand" "z")))
+  (match_operand:QIHI 2 "arith_reg_operan

[gcc] Created branch 'devel/sh-lra'

2024-09-24 Thread Oleg Endo via Gcc-cvs
The branch 'devel/sh-lra' was created pointing to:

 09210f927265... SH: enable LRA by default


[gcc/devel/sh-lra] SH: pin input args to hard-regs via predicates for sfuncs

2024-09-24 Thread Oleg Endo via Gcc-cvs
https://gcc.gnu.org/g:4d28c5b1eb15cff97f07982c73770485e6eaa986

commit 4d28c5b1eb15cff97f07982c73770485e6eaa986
Author: Kaz Kojima 
Date:   Tue Sep 24 18:26:42 2024 +0900

SH: pin input args to hard-regs via predicates for sfuncs

Some sfuncs uses hard reg as input and clobber its raw reg pattern. It
seems that LRA doesn't process this clobber pattern.  Rewrite these
patterns so as to work with LRA.

gcc/ChangeLog:
* config/sh/predicates.md (hard_reg_r4, hard_reg_r5,
hard_reg_r6): New predicates.
* config/sh/sh.md (udivsi3_i4, udivsi3_i4_single,
udivsi3_i1): Rewrite with match_operand and match_dup.
(block_lump_real, block_lump_real_i4): Ditto.
(udivsi3): Adjust for it.
* config/sh/sh-mem.cc (expand_block_move): Ditto.

Diff:
---
 gcc/config/sh/predicates.md |  19 +
 gcc/config/sh/sh-mem.cc |   4 +-
 gcc/config/sh/sh.md | 101 +++-
 3 files changed, 84 insertions(+), 40 deletions(-)

diff --git a/gcc/config/sh/predicates.md b/gcc/config/sh/predicates.md
index 3732cec9608b..b10af71c280e 100644
--- a/gcc/config/sh/predicates.md
+++ b/gcc/config/sh/predicates.md
@@ -818,3 +818,22 @@
 
   return false;
 })
+
+;; Predicats for the arguments of sfunc R4, R5 and R6.
+(define_predicate "hard_reg_r4"
+  (match_code "reg")
+{
+  return REGNO (op) == R4_REG;
+})
+
+(define_predicate "hard_reg_r5"
+  (match_code "reg")
+{
+  return REGNO (op) == R5_REG;
+})
+
+(define_predicate "hard_reg_r6"
+  (match_code "reg")
+{
+  return REGNO (op) == R6_REG;
+})
diff --git a/gcc/config/sh/sh-mem.cc b/gcc/config/sh/sh-mem.cc
index e22419912d6f..751c826e84fb 100644
--- a/gcc/config/sh/sh-mem.cc
+++ b/gcc/config/sh/sh-mem.cc
@@ -134,7 +134,7 @@ expand_block_move (rtx *operands)
 
  int dwords = bytes >> 3;
  emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
- emit_insn (gen_block_lump_real_i4 (func_addr_rtx, lab));
+ emit_insn (gen_block_lump_real_i4 (func_addr_rtx, lab, r4, r5, r6));
  return true;
}
   else
@@ -178,7 +178,7 @@ expand_block_move (rtx *operands)
   final_switch = 16 - ((bytes / 4) % 16);
   while_loop = ((bytes / 4) / 16 - 1) * 16;
   emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
-  emit_insn (gen_block_lump_real (func_addr_rtx, lab));
+  emit_insn (gen_block_lump_real (func_addr_rtx, lab, r4, r5, r6));
   return true;
 }
 
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index c69eda36885f..451ae0b76891 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -2194,13 +2194,24 @@
 ;; there is nothing to prevent reload from using r0 to reload the address.
 ;; This reload would clobber the value in r0 we are trying to store.
 ;; If we let reload allocate r0, then this problem can never happen.
+;;
+;; In addition to that, we also must pin the input regs to hard-regs via the
+;; predicates.  When these insns are instantiated it also emits the
+;; accompanying mov insns to load the hard-regs.  However, subsequent RTL
+;; passes might move things around and reassign the operands to pseudo regs
+;; which might get allocated to different (wrong) hard-regs eventually.  To
+;; avoid that, only allow matching these insns if the operands are the
+;; expected hard-regs.
 (define_insn "udivsi3_i1"
   [(set (match_operand:SI 0 "register_operand" "=z,z")
-   (udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (udiv:SI (match_operand:SI 3 "hard_reg_r4" "=r,r")
+(match_operand:SI 4 "hard_reg_r5" "=r,r")))
(clobber (reg:SI T_REG))
(clobber (reg:SI PR_REG))
(clobber (reg:SI R1_REG))
-   (clobber (reg:SI R4_REG))
+   (clobber (match_dup 3))
+   (use (reg:SI R4_REG))
+   (use (reg:SI R5_REG))
(use (match_operand:SI 1 "arith_reg_operand" "r,r"))
(use (match_operand 2 "" "Z,Ccl"))]
   "TARGET_SH1 && TARGET_DIVIDE_CALL_DIV1"
@@ -2212,7 +2223,8 @@
 
 (define_insn "udivsi3_i4"
   [(set (match_operand:SI 0 "register_operand" "=y,y")
-   (udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (udiv:SI (match_operand:SI 3 "hard_reg_r4" "=r,r")
+(match_operand:SI 4 "hard_reg_r5" "=r,r")))
(clobber (reg:SI T_REG))
(clobber (reg:SI PR_REG))
(clobber (reg:DF DR0_REG))
@@ -2220,9 +2232,11 @@
(clobber (reg:DF DR4_REG))
(clobber (reg:SI R0_REG))
(clobber (reg:SI R1_REG))
-   (clobber (reg:SI R4_REG))
-   (clobber (reg:SI R5_REG))
+   (clobber (match_dup 3))
+   (clobber (match_dup 4))
(clobber (reg:SI FPSCR_STAT_REG))
+   (use (reg:SI R4_REG))
+   (use (reg:SI R5_REG))
(use (match_operand:SI 1 "arith_reg_operand" "r,r"))
(use (match_operand 2 "" "Z,Ccl"))
(use (reg:SI FPSCR_MODES_REG))]
@@ -2236,7 +2250,8 @@
 
 (define_insn "udivsi3_i4_single"
   [(set (match_operand:SI 0 "register_operand" "=y,y")
-   (udiv:SI (reg:SI R4_REG) (reg:SI R5_REG)))
+   (udiv:SI (match_o

[gcc/devel/sh-lra] SH: Fix the condition to use movsh_ie_y pattern.

2024-09-24 Thread Oleg Endo via Gcc-cvs
https://gcc.gnu.org/g:eee3b80ab6217ab792f01280bc6240c55468600a

commit eee3b80ab6217ab792f01280bc6240c55468600a
Author: Kaz Kojima 
Date:   Tue Sep 24 18:35:46 2024 +0900

SH: Fix the condition to use movsh_ie_y pattern.

gcc/ChangeLog:
* config/sh/sh.cc (sh_movsf_ie_y_split_p): Take the subreg
of DImode into account.

Diff:
---
 gcc/config/sh/sh.cc | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc
index b2ba7488c5de..d9e7e67cba65 100644
--- a/gcc/config/sh/sh.cc
+++ b/gcc/config/sh/sh.cc
@@ -11462,11 +11462,15 @@ sh_movsf_ie_y_split_p (rtx op0, rtx op1)
 {
   /* f, r */
   if (REG_P (op0)
-  && (SUBREG_P (op1) && GET_MODE (SUBREG_REG (op1)) == SImode))
+  && (SUBREG_P (op1)
+ && (GET_MODE (SUBREG_REG (op1)) == SImode
+ || GET_MODE (SUBREG_REG (op1)) == DImode)))
 return true;
   /* r, f */
   if (REG_P (op1)
-  && (SUBREG_P (op0) && GET_MODE (SUBREG_REG (op0)) == SImode))
+  && (SUBREG_P (op0)
+ && (GET_MODE (SUBREG_REG (op0)) == SImode
+ || GET_MODE (SUBREG_REG (op0)) == DImode)))
 return true;
 
   return false;


[gcc/devel/sh-lra] SH: A test case for the wrong-code with -mlra PR55212 c#248.

2024-09-24 Thread Oleg Endo via Gcc-cvs
https://gcc.gnu.org/g:c5a639c84c15adf402d5b664184628ff809a5bb0

commit c5a639c84c15adf402d5b664184628ff809a5bb0
Author: Kaz Kojima 
Date:   Fri Sep 20 18:36:22 2024 +0900

SH: A test case for the wrong-code with -mlra PR55212 c#248.

gcc/testsuite/ChangeLog:
* gcc.target/sh/pr55212-c248.c: New test.

Diff:
---
 gcc/testsuite/gcc.target/sh/pr55212-c248.c | 31 ++
 1 file changed, 31 insertions(+)

diff --git a/gcc/testsuite/gcc.target/sh/pr55212-c248.c 
b/gcc/testsuite/gcc.target/sh/pr55212-c248.c
new file mode 100644
index ..94fd6afaab3c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/sh/pr55212-c248.c
@@ -0,0 +1,31 @@
+/* { dg-do run }  */
+/* { dg-options "-O2 -m4 -mlra -ffixed-r7 -ffixed-r8 -ffixed-r9 -ffixed-r10 
-ffixed-r11 -ffixed-r12 -ffixed-r13" } */
+#include 
+#include 
+
+typedef struct { int c[64]; } obj;
+obj obj0;
+obj obj1;
+
+void __attribute__ ((noinline))
+bar (int a, int b, int c, int d, obj *q)
+{
+  if (q->c[0] != 0x12345678 || q->c[1] != 0xdeadbeef) 
+abort ();
+}
+
+void foo (obj *p)
+{
+  obj bobj;
+  bobj = *p;
+  bar (0, 0, 0, 0, &bobj);
+}
+
+int
+main ()
+{
+  obj0.c[0] = 0x12345678;
+  obj0.c[1] = 0xdeadbeef;
+  foo (&obj0);
+  exit (0);
+}


[gcc/devel/sh-lra] LRA: Take scratch as implicit unused output reloads

2024-09-24 Thread Oleg Endo via Gcc-cvs
https://gcc.gnu.org/g:dbd192caf14916d0869c7123e03ad21ef7bfd65f

commit dbd192caf14916d0869c7123e03ad21ef7bfd65f
Author: Alexandre Oliva 
Date:   Fri Sep 20 18:22:12 2024 +0900

LRA: Take scratch as implicit unused output reloads

gcc/ChangeLog:
* lra-constraints.cc (match_reload, process_alt_operands,
curr_insn_transform): Take scratch as implicit unused
output reloads.

Diff:
---
 gcc/lra-constraints.cc | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index fdcc07764a2e..4bc40ef532ba 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -1213,7 +1213,9 @@ match_reload (signed char out, signed char *ins, signed 
char *outs,
 return;
   /* See a comment for the input operand above.  */
   narrow_reload_pseudo_class (out_rtx, goal_class);
-  if (find_reg_note (curr_insn, REG_UNUSED, out_rtx) == NULL_RTX)
+  if (find_reg_note (curr_insn, REG_UNUSED, out_rtx) == NULL_RTX
+  && !ira_former_scratch_p (REGNO (SUBREG_P (out_rtx)
+  ? SUBREG_REG (out_rtx) : out_rtx)))
 {
   reg = SUBREG_P (out_rtx) ? SUBREG_REG (out_rtx) : out_rtx;
   start_sequence ();
@@ -2946,7 +2948,8 @@ process_alt_operands (int only_alternative)
 objects with a REG_UNUSED note.  */
  if ((curr_static_id->operand[nop].type != OP_IN
   && no_output_reloads_p
-  && ! find_reg_note (curr_insn, REG_UNUSED, op))
+  && ! find_reg_note (curr_insn, REG_UNUSED, op)
+  && ! ira_former_scratch_p (REGNO (operand_reg[nop])))
  || (curr_static_id->operand[nop].type != OP_OUT
  && no_input_reloads_p && ! const_to_mem)
  || (this_alternative_matches >= 0
@@ -2956,7 +2959,9 @@ process_alt_operands (int only_alternative)
  [this_alternative_matches].type != OP_IN)
  && ! find_reg_note (curr_insn, REG_UNUSED,
  no_subreg_reg_operand
- 
[this_alternative_matches])
+ [this_alternative_matches])
+ && ! (ira_former_scratch_p
+   (REGNO (operand_reg[nop])))
{
  if (lra_dump_file != NULL)
fprintf
@@ -4744,7 +4749,8 @@ curr_insn_transform (bool check_only_p)
  if (type != OP_IN
  && find_reg_note (curr_insn, REG_UNUSED, old) == NULL_RTX
  /* OLD can be an equivalent constant here.  */
- && !CONSTANT_P (old))
+ && !CONSTANT_P (old)
+ && !ira_former_scratch_p (REGNO (old)))
{
  start_sequence ();
  lra_emit_move (type == OP_INOUT ? copy_rtx (old) : old, new_reg);


[gcc/devel/sh-lra] SH: try to workaround fp-reg related move insns

2024-09-24 Thread Oleg Endo via Gcc-cvs
https://gcc.gnu.org/g:b924a79f5d3a8d32e4e65e62b63dc4432076af1d

commit b924a79f5d3a8d32e4e65e62b63dc4432076af1d
Author: Kaz Kojima 
Date:   Fri Sep 20 18:17:31 2024 +0900

SH: try to workaround fp-reg related move insns

LRA will try to satisfy the constraints in match_scratch for the memory
displacements and it will make issues on this target. To mitigate the
issue, split movsf_ie_ra into several new patterns to remove
match_scratch.  Also define a new sub-pattern of movdf for constant
loads.

gcc/ChangeLog:
* gcc/config/sh/predicates.md (pc_relative_load_operand):
New predicate.
* gcc/config/sh/sh-protos.h (sh_movsf_ie_ra_split_p): Remove.
(sh_movsf_ie_y_split_p): New proto.
* gcc/config/sh/sh.cc: (sh_movsf_ie_ra_split_p): Remove.
(sh_movsf_ie_y_split_p): New function.
(broken_move): Take movsf_ie_ra into account for fldi cases.
* gcc/config/sh/sh.md (movdf_i4_F_z): New insn pattern.
(movdf): Use it.
(movsf_ie_ra): Use define_insn instead of define_insn_and_split.
(movsf_ie_F_z, movsf_ie_Q_z, movsf_ie_y): New insn pattern.
(movsf): Use new patterns.
(movsf-1):  Don't split when operands[0] or operands[1]
is fpul.
(movdf_i4_F_z+7): New splitter.

Diff:
---
 gcc/config/sh/predicates.md |  11 +
 gcc/config/sh/sh-protos.h   |   2 +-
 gcc/config/sh/sh.cc |  30 -
 gcc/config/sh/sh.md | 103 +++-
 4 files changed, 95 insertions(+), 51 deletions(-)

diff --git a/gcc/config/sh/predicates.md b/gcc/config/sh/predicates.md
index da32329b4b54..3732cec9608b 100644
--- a/gcc/config/sh/predicates.md
+++ b/gcc/config/sh/predicates.md
@@ -485,6 +485,17 @@
 && sh_legitimate_index_p (mode, XEXP (plus0_rtx, 1), TARGET_SH2A, 
true);
 })
 
+;; Returns true if OP is a pc relative load operand.
+(define_predicate "pc_relative_load_operand"
+  (match_code "mem")
+{
+  if (GET_MODE (op) != QImode
+  && IS_PC_RELATIVE_LOAD_ADDR_P (XEXP (op, 0)))
+return true;
+
+  return false;
+})
+
 ;; Returns true if OP is a valid source operand for a logical operation.
 (define_predicate "logical_operand"
   (and (match_code "subreg,reg,const_int")
diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h
index 5e5bd0aff7e7..ffbe5164f08c 100644
--- a/gcc/config/sh/sh-protos.h
+++ b/gcc/config/sh/sh-protos.h
@@ -103,7 +103,7 @@ extern rtx sh_find_equiv_gbr_addr (rtx_insn* cur_insn, rtx 
mem);
 extern int sh_eval_treg_value (rtx op);
 extern HOST_WIDE_INT sh_disp_addr_displacement (rtx mem_op);
 extern int sh_max_mov_insn_displacement (machine_mode mode, bool 
consider_sh2a);
-extern bool sh_movsf_ie_ra_split_p (rtx, rtx, rtx);
+extern bool sh_movsf_ie_y_split_p (rtx, rtx);
 extern void sh_expand_sym_label2reg (rtx, rtx, rtx, bool);
 
 /* Result value of sh_find_set_of_reg.  */
diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc
index c9222c3e6ac0..b2ba7488c5de 100644
--- a/gcc/config/sh/sh.cc
+++ b/gcc/config/sh/sh.cc
@@ -4832,6 +4832,7 @@ broken_move (rtx_insn *insn)
   we changed this to do a constant load.  In that case
   we don't have an r0 clobber, hence we must use fldi.  */
&& (TARGET_FMOVD
+   || sh_lra_p ()
|| (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
== SCRATCH))
&& REG_P (SET_DEST (pat))
@@ -11455,30 +11456,17 @@ sh_legitimize_address_displacement (rtx *offset1, rtx 
*offset2,
   return false;
 }
 
-/* Return true if movsf insn should be splited with an additional
-   register.  */
+/* Return true if movsf insn should be splited with fpul register.  */
 bool
-sh_movsf_ie_ra_split_p (rtx op0, rtx op1, rtx op2)
+sh_movsf_ie_y_split_p (rtx op0, rtx op1)
 {
-  /* op0 == op1 */
-  if (rtx_equal_p (op0, op1))
+  /* f, r */
+  if (REG_P (op0)
+  && (SUBREG_P (op1) && GET_MODE (SUBREG_REG (op1)) == SImode))
 return true;
-  /* fy, FQ, reg */
-  if (GET_CODE (op1) == CONST_DOUBLE
-  && ! satisfies_constraint_G (op1)
-  && ! satisfies_constraint_H (op1)
-  && REG_P (op0)
-  && REG_P (op2))
-return true;
-  /* f, r, y */
-  if (REG_P (op0) && FP_REGISTER_P (REGNO (op0))
-  && REG_P (op1) && GENERAL_REGISTER_P (REGNO (op1))
-  && REG_P (op2) && (REGNO (op2) == FPUL_REG))
-return true;
-  /* r, f, y */
-  if (REG_P (op1) && FP_REGISTER_P (REGNO (op1))
-  && REG_P (op0) && GENERAL_REGISTER_P (REGNO (op0))
-  && REG_P (op2) && (REGNO (op2) == FPUL_REG))
+  /* r, f */
+  if (REG_P (op1)
+  && (SUBREG_P (op0) && GET_MODE (SUBREG_REG (op0)) == SImode))
 return true;
 
   return false;
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index 6d93f5cb816b..c69eda36885f 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -5858,6 +5858,15 @@
   

[gcc/devel/sh-lra] SH: enable LRA by default

2024-09-24 Thread Oleg Endo via Gcc-cvs
https://gcc.gnu.org/g:09210f927265fb4f198e904ba11297ac1e370554

commit 09210f927265fb4f198e904ba11297ac1e370554
Author: Oleg Endo 
Date:   Wed Sep 25 09:47:25 2024 +0900

SH: enable LRA by default

gcc/ChangeLog:

PR target/55212
* conifg/sh/sh.opt (sh_lra_flag): Init to 1.

Diff:
---
 gcc/config/sh/sh.opt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/sh/sh.opt b/gcc/config/sh/sh.opt
index c44cfe70cb11..718dfb744ff5 100644
--- a/gcc/config/sh/sh.opt
+++ b/gcc/config/sh/sh.opt
@@ -299,5 +299,5 @@ Target Var(TARGET_FSRRA)
 Enable the use of the fsrra instruction.
 
 mlra
-Target Var(sh_lra_flag) Init(0) Save
+Target Var(sh_lra_flag) Init(1) Save
 Use LRA instead of reload (transitional).