Hello, I somehow initially forgot to implement DImode GBR based loads/stores. Attached patch does that and also fixes a problem with the GBR address mode optimization. Tested on rev 192417 with make -k check RUNTESTFLAGS="--target_board=sh-sim \{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"
and no new failures. OK? Cheers, Oleg gcc/ChangeLog: PR target/54760 * config/sh/sh.c (sh_find_base_reg_disp): Stop searching insns when hitting a call insn if GBR is marked as call used. * config/sh/iterators.md (QIHISIDI): New mode iterator. * config/sh/predicates.md (gbr_address_mem): New predicate. * config/sh/sh.md (*movdi_gbr_load, *movdi_gbr_store): New insn_and_split. Use QIHISIDI instead of QIHISI in unnamed GBR addressing splits. testsuite/ChangeLog: PR target/54760 * gcc.target/sh/pr54760-2.c: Add long long and unsigned long long test functions. * gcc.target/sh/pr54760-4.c: New.
Index: gcc/config/sh/sh.c =================================================================== --- gcc/config/sh/sh.c (revision 192417) +++ gcc/config/sh/sh.c (working copy) @@ -13383,6 +13383,10 @@ for (rtx i = prev_nonnote_insn (insn); i != NULL; i = prev_nonnote_insn (i)) { + if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG) + && CALL_P (i)) + break; + if (!NONJUMP_INSN_P (i)) continue; Index: gcc/config/sh/sh.md =================================================================== --- gcc/config/sh/sh.md (revision 192417) +++ gcc/config/sh/sh.md (working copy) @@ -10277,6 +10277,47 @@ "mov.<bwl> %0,@(0,gbr)" [(set_attr "type" "store")]) +;; DImode memory accesses have to be split in two SImode accesses. +;; Split them before reload, so that it gets a better chance to figure out +;; how to deal with the R0 restriction for the individual SImode accesses. +;; Do not match this insn during or after reload because it can't be split +;; afterwards. +(define_insn_and_split "*movdi_gbr_load" + [(set (match_operand:DI 0 "register_operand") + (match_operand:DI 1 "gbr_address_mem"))] + "TARGET_SH1 && can_create_pseudo_p ()" + "#" + "&& 1" + [(set (match_dup 3) (match_dup 5)) + (set (match_dup 4) (match_dup 6))] +{ + /* Swap low/high part load order on little endian, so that the result reg + of the second load can be used better. */ + int off = TARGET_LITTLE_ENDIAN ? 1 : 0; + operands[3 + off] = gen_lowpart (SImode, operands[0]); + operands[5 + off] = gen_lowpart (SImode, operands[1]); + operands[4 - off] = gen_highpart (SImode, operands[0]); + operands[6 - off] = gen_highpart (SImode, operands[1]); +}) + +(define_insn_and_split "*movdi_gbr_store" + [(set (match_operand:DI 0 "gbr_address_mem") + (match_operand:DI 1 "register_operand"))] + "TARGET_SH1 && can_create_pseudo_p ()" + "#" + "&& 1" + [(set (match_dup 3) (match_dup 5)) + (set (match_dup 4) (match_dup 6))] +{ + /* Swap low/high part store order on big endian, so that stores of function + call results can save a reg copy. */ + int off = TARGET_LITTLE_ENDIAN ? 0 : 1; + operands[3 + off] = gen_lowpart (SImode, operands[0]); + operands[5 + off] = gen_lowpart (SImode, operands[1]); + operands[4 - off] = gen_highpart (SImode, operands[0]); + operands[6 - off] = gen_highpart (SImode, operands[1]); +}) + ;; Sometimes memory accesses do not get combined with the store_gbr insn, ;; in particular when the displacements are in the range of the regular move ;; insns. Thus, in the first split pass after the combine pass we search @@ -10287,15 +10328,15 @@ ;; other operand) and there's no point of doing it if the GBR is not ;; referenced in a function at all. (define_split - [(set (match_operand:QIHISI 0 "register_operand") - (match_operand:QIHISI 1 "memory_operand"))] + [(set (match_operand:QIHISIDI 0 "register_operand") + (match_operand:QIHISIDI 1 "memory_operand"))] "TARGET_SH1 && !reload_in_progress && !reload_completed && df_regs_ever_live_p (GBR_REG)" [(set (match_dup 0) (match_dup 1))] { rtx gbr_mem = sh_find_equiv_gbr_addr (curr_insn, operands[1]); if (gbr_mem != NULL_RTX) - operands[1] = change_address (operands[1], GET_MODE (operands[1]), gbr_mem); + operands[1] = replace_equiv_address (operands[1], gbr_mem); else FAIL; }) @@ -10309,7 +10350,7 @@ { rtx gbr_mem = sh_find_equiv_gbr_addr (curr_insn, operands[1]); if (gbr_mem != NULL_RTX) - operands[1] = change_address (operands[1], GET_MODE (operands[1]), gbr_mem); + operands[1] = replace_equiv_address (operands[1], gbr_mem); else FAIL; }) @@ -10328,23 +10369,22 @@ if (gbr_mem != NULL_RTX) { operands[2] = gen_reg_rtx (GET_MODE (operands[1])); - operands[1] = change_address (operands[1], GET_MODE (operands[1]), - gbr_mem); + operands[1] = replace_equiv_address (operands[1], gbr_mem); } else FAIL; }) (define_split - [(set (match_operand:QIHISI 0 "memory_operand") - (match_operand:QIHISI 1 "register_operand"))] + [(set (match_operand:QIHISIDI 0 "memory_operand") + (match_operand:QIHISIDI 1 "register_operand"))] "TARGET_SH1 && !reload_in_progress && !reload_completed && df_regs_ever_live_p (GBR_REG)" [(set (match_dup 0) (match_dup 1))] { rtx gbr_mem = sh_find_equiv_gbr_addr (curr_insn, operands[0]); if (gbr_mem != NULL_RTX) - operands[0] = change_address (operands[0], GET_MODE (operands[0]), gbr_mem); + operands[0] = replace_equiv_address (operands[0], gbr_mem); else FAIL; }) Index: gcc/config/sh/iterators.md =================================================================== --- gcc/config/sh/iterators.md (revision 192417) +++ gcc/config/sh/iterators.md (working copy) @@ -18,6 +18,7 @@ ;; along with GCC; see the file COPYING3. If not see ;; <http://www.gnu.org/licenses/>. +(define_mode_iterator QIHISIDI [QI HI SI DI]) (define_mode_iterator QIHISI [QI HI SI]) (define_mode_iterator QIHI [QI HI]) (define_mode_iterator HISI [HI SI]) Index: gcc/config/sh/predicates.md =================================================================== --- gcc/config/sh/predicates.md (revision 192417) +++ gcc/config/sh/predicates.md (working copy) @@ -1139,3 +1139,20 @@ return INTVAL (op) >= 0 && INTVAL (op) <= max_disp; }) + +;; A predicate that determines whether OP is a valid GBR addressing mode +;; memory reference. +(define_predicate "gbr_address_mem" + (match_code "mem") +{ + rtx addr = XEXP (op, 0); + + if (REG_P (addr) && REGNO (addr) == GBR_REG) + return true; + if (GET_CODE (addr) == PLUS + && REG_P (XEXP (addr, 0)) && REGNO (XEXP (addr, 0)) == GBR_REG + && gbr_displacement (XEXP (addr, 1), mode)) + return true; + + return false; +}) Index: gcc/testsuite/gcc.target/sh/pr54760-2.c =================================================================== --- gcc/testsuite/gcc.target/sh/pr54760-2.c (revision 192417) +++ gcc/testsuite/gcc.target/sh/pr54760-2.c (working copy) @@ -9,107 +9,129 @@ /* --------------------------------------------------------------------------- Simple GBR load. */ -#define func(name, type, disp)\ - int \ +#define func(name, rettype, type, disp)\ + rettype \ name ## _tp_load (void) \ { \ type* tp = (type*)__builtin_thread_pointer (); \ return tp[disp]; \ } -func (test00, int, 0) -func (test01, int, 5) -func (test02, int, 255) +func (test00, int, int, 0) +func (test01, int, int, 5) +func (test02, int, int, 255) -func (test03, short, 0) -func (test04, short, 5) -func (test05, short, 255) +func (test03, int, short, 0) +func (test04, int, short, 5) +func (test05, int, short, 255) -func (test06, char, 0) -func (test07, char, 5) -func (test08, char, 255) +func (test06, int, char, 0) +func (test07, int, char, 5) +func (test08, int, char, 255) -func (test09, unsigned int, 0) -func (test10, unsigned int, 5) -func (test11, unsigned int, 255) +func (test09, int, unsigned int, 0) +func (test10, int, unsigned int, 5) +func (test11, int, unsigned int, 255) -func (test12, unsigned short, 0) -func (test13, unsigned short, 5) -func (test14, unsigned short, 255) +func (test12, int, unsigned short, 0) +func (test13, int, unsigned short, 5) +func (test14, int, unsigned short, 255) -func (test15, unsigned char, 0) -func (test16, unsigned char, 5) -func (test17, unsigned char, 255) +func (test15, int, unsigned char, 0) +func (test16, int, unsigned char, 5) +func (test17, int, unsigned char, 255) +func (test18, long long, long long, 0) +func (test19, long long, long long, 5) +func (test20, long long, long long, 127) + +func (test21, long long, unsigned long long, 0) +func (test22, long long, unsigned long long, 5) +func (test23, long long, unsigned long long, 127) + #undef func /* --------------------------------------------------------------------------- Simple GBR store. */ -#define func(name, type, disp)\ +#define func(name, argtype, type, disp)\ void \ - name ## _tp_store (int a) \ + name ## _tp_store (argtype a) \ { \ type* tp = (type*)__builtin_thread_pointer (); \ tp[disp] = (type)a; \ } -func (test00, int, 0) -func (test01, int, 5) -func (test02, int, 255) +func (test00, int, int, 0) +func (test01, int, int, 5) +func (test02, int, int, 255) -func (test03, short, 0) -func (test04, short, 5) -func (test05, short, 255) +func (test03, int, short, 0) +func (test04, int, short, 5) +func (test05, int, short, 255) -func (test06, char, 0) -func (test07, char, 5) -func (test08, char, 255) +func (test06, int, char, 0) +func (test07, int, char, 5) +func (test08, int, char, 255) -func (test09, unsigned int, 0) -func (test10, unsigned int, 5) -func (test11, unsigned int, 255) +func (test09, int, unsigned int, 0) +func (test10, int, unsigned int, 5) +func (test11, int, unsigned int, 255) -func (test12, unsigned short, 0) -func (test13, unsigned short, 5) -func (test14, unsigned short, 255) +func (test12, int, unsigned short, 0) +func (test13, int, unsigned short, 5) +func (test14, int, unsigned short, 255) -func (test15, unsigned char, 0) -func (test16, unsigned char, 5) -func (test17, unsigned char, 255) +func (test15, int, unsigned char, 0) +func (test16, int, unsigned char, 5) +func (test17, int, unsigned char, 255) +func (test18, long long, long long, 0) +func (test19, long long, long long, 5) +func (test20, long long, long long, 127) + +func (test21, long long, unsigned long long, 0) +func (test22, long long, unsigned long long, 5) +func (test23, long long, unsigned long long, 127) + #undef func /* --------------------------------------------------------------------------- Arithmetic on the result of a GBR load. */ -#define func(name, type, disp, op, opname)\ - int \ - name ## _tp_load_arith_ ##opname (int a) \ +#define func(name, retargtype, type, disp, op, opname)\ + retargtype \ + name ## _tp_load_arith_ ##opname (retargtype a) \ { \ type* tp = (type*)__builtin_thread_pointer (); \ return tp[disp] op a; \ } #define funcs(op, opname) \ - func (test00, int, 0, op, opname) \ - func (test01, int, 5, op, opname) \ - func (test02, int, 255, op, opname) \ - func (test03, short, 0, op, opname) \ - func (test04, short, 5, op, opname) \ - func (test05, short, 255, op, opname) \ - func (test06, char, 0, op, opname) \ - func (test07, char, 5, op, opname) \ - func (test08, char, 255, op, opname) \ - func (test09, unsigned int, 0, op, opname) \ - func (test10, unsigned int, 5, op, opname) \ - func (test11, unsigned int, 255, op, opname) \ - func (test12, unsigned short, 0, op, opname) \ - func (test13, unsigned short, 5, op, opname) \ - func (test14, unsigned short, 255, op, opname) \ - func (test15, unsigned char, 0, op, opname) \ - func (test16, unsigned char, 5, op, opname) \ - func (test17, unsigned char, 255, op, opname) \ + func (test00, int, int, 0, op, opname) \ + func (test01, int, int, 5, op, opname) \ + func (test02, int, int, 255, op, opname) \ + func (test03, int, short, 0, op, opname) \ + func (test04, int, short, 5, op, opname) \ + func (test05, int, short, 255, op, opname) \ + func (test06, int, char, 0, op, opname) \ + func (test07, int, char, 5, op, opname) \ + func (test08, int, char, 255, op, opname) \ + func (test09, int, unsigned int, 0, op, opname) \ + func (test10, int, unsigned int, 5, op, opname) \ + func (test11, int, unsigned int, 255, op, opname) \ + func (test12, int, unsigned short, 0, op, opname) \ + func (test13, int, unsigned short, 5, op, opname) \ + func (test14, int, unsigned short, 255, op, opname) \ + func (test15, int, unsigned char, 0, op, opname) \ + func (test16, int, unsigned char, 5, op, opname) \ + func (test17, int, unsigned char, 255, op, opname) \ + func (test18, long long, long long, 0, op, opname) \ + func (test19, long long, long long, 5, op, opname) \ + func (test20, long long, long long, 127, op, opname) \ + func (test21, long long, unsigned long long, 0, op, opname) \ + func (test22, long long, unsigned long long, 5, op, opname) \ + func (test23, long long, unsigned long long, 127, op, opname) \ funcs (+, plus) funcs (-, minus) @@ -124,8 +146,8 @@ /* --------------------------------------------------------------------------- Arithmetic of the result of two GBR loads. */ -#define func(name, type, disp0, disp1, op, opname)\ - int \ +#define func(name, rettype, type, disp0, disp1, op, opname)\ + rettype \ name ## _tp_load_load_arith_ ##opname (void) \ { \ type* tp = (type*)__builtin_thread_pointer (); \ @@ -133,18 +155,22 @@ } #define funcs(op, opname) \ - func (test00, int, 0, 5, op, opname) \ - func (test02, int, 1, 255, op, opname) \ - func (test03, short, 0, 5, op, opname) \ - func (test05, short, 1, 255, op, opname) \ - func (test06, char, 0, 5, op, opname) \ - func (test08, char, 1, 255, op, opname) \ - func (test09, unsigned int, 0, 5, op, opname) \ - func (test11, unsigned int, 1, 255, op, opname) \ - func (test12, unsigned short, 0, 5, op, opname) \ - func (test14, unsigned short, 1, 255, op, opname) \ - func (test15, unsigned char, 0, 5, op, opname) \ - func (test17, unsigned char, 1, 255, op, opname) \ + func (test00, int, int, 0, 5, op, opname) \ + func (test02, int, int, 1, 255, op, opname) \ + func (test03, int, short, 0, 5, op, opname) \ + func (test05, int, short, 1, 255, op, opname) \ + func (test06, int, char, 0, 5, op, opname) \ + func (test08, int, char, 1, 255, op, opname) \ + func (test09, int, unsigned int, 0, 5, op, opname) \ + func (test11, int, unsigned int, 1, 255, op, opname) \ + func (test12, int, unsigned short, 0, 5, op, opname) \ + func (test14, int, unsigned short, 1, 255, op, opname) \ + func (test15, int, unsigned char, 0, 5, op, opname) \ + func (test17, int, unsigned char, 1, 255, op, opname) \ + func (test18, long long, long long, 0, 5, op, opname) \ + func (test19, long long, long long, 1, 127, op, opname) \ + func (test20, long long, unsigned long long, 0, 5, op, opname) \ + func (test21, long long, unsigned long long, 1, 127, op, opname) \ funcs (+, plus) funcs (-, minus) @@ -180,6 +206,10 @@ func (test14, unsigned short, 1, 255) func (test15, unsigned char, 0, 5) func (test17, unsigned char, 1, 255) +func (test18, long long, 0, 5) +func (test19, long long, 1, 127) +func (test20, unsigned long long, 0, 5) +func (test21, unsigned long long, 1, 127) #undef func @@ -187,33 +217,39 @@ GBR load, arithmetic, GBR store */ -#define func(name, type, disp, op, opname)\ +#define func(name, argtype, type, disp, op, opname)\ void \ - name ## _tp_load_arith_store_ ##opname (int a) \ + name ## _tp_load_arith_store_ ##opname (argtype a) \ { \ type* tp = (type*)__builtin_thread_pointer (); \ tp[disp] op a; \ } #define funcs(op, opname) \ - func (test00, int, 0, op, opname) \ - func (test01, int, 5, op, opname) \ - func (test02, int, 255, op, opname) \ - func (test03, short, 0, op, opname) \ - func (test04, short, 5, op, opname) \ - func (test05, short, 255, op, opname) \ - func (test06, char, 0, op, opname) \ - func (test07, char, 5, op, opname) \ - func (test08, char, 255, op, opname) \ - func (test09, unsigned int, 0, op, opname) \ - func (test10, unsigned int, 5, op, opname) \ - func (test11, unsigned int, 255, op, opname) \ - func (test12, unsigned short, 0, op, opname) \ - func (test13, unsigned short, 5, op, opname) \ - func (test14, unsigned short, 255, op, opname) \ - func (test15, unsigned char, 0, op, opname) \ - func (test16, unsigned char, 5, op, opname) \ - func (test17, unsigned char, 255, op, opname) \ + func (test00, int, int, 0, op, opname) \ + func (test01, int, int, 5, op, opname) \ + func (test02, int, int, 255, op, opname) \ + func (test03, int, short, 0, op, opname) \ + func (test04, int, short, 5, op, opname) \ + func (test05, int, short, 255, op, opname) \ + func (test06, int, char, 0, op, opname) \ + func (test07, int, char, 5, op, opname) \ + func (test08, int, char, 255, op, opname) \ + func (test09, int, unsigned int, 0, op, opname) \ + func (test10, int, unsigned int, 5, op, opname) \ + func (test11, int, unsigned int, 255, op, opname) \ + func (test12, int, unsigned short, 0, op, opname) \ + func (test13, int, unsigned short, 5, op, opname) \ + func (test14, int, unsigned short, 255, op, opname) \ + func (test15, int, unsigned char, 0, op, opname) \ + func (test16, int, unsigned char, 5, op, opname) \ + func (test17, int, unsigned char, 255, op, opname) \ + func (test18, long long, long long, 0, op, opname) \ + func (test19, long long, long long, 5, op, opname) \ + func (test20, long long, long long, 127, op, opname) \ + func (test21, long long, unsigned long long, 0, op, opname) \ + func (test22, long long, unsigned long long, 5, op, opname) \ + func (test23, long long, unsigned long long, 127, op, opname) \ funcs (+=, plus) funcs (-=, minus) Index: gcc/testsuite/gcc.target/sh/pr54760-4.c =================================================================== --- gcc/testsuite/gcc.target/sh/pr54760-4.c (revision 0) +++ gcc/testsuite/gcc.target/sh/pr54760-4.c (revision 0) @@ -0,0 +1,19 @@ +/* Check that the GBR address optimization does not combine a gbr store + and its use when a function call is inbetween, when GBR is a call used + register, i.e. it is invalidated by function calls. */ +/* { dg-do compile { target "sh*-*-*" } } */ +/* { dg-options "-O1 -fcall-used-gbr" } */ +/* { dg-skip-if "" { "sh*-*-*" } { "-m5*"} { "" } } */ +/* { dg-final { scan-assembler "stc\tgbr" } } */ + +extern int test00 (void); +int +test01 (int x) +{ + /* We must see a stc gbr,rn before the function call, because + a function call could modify the gbr. In this case the user requests + the old gbr value, before the function call. */ + int* p = (int*)__builtin_thread_pointer (); + p[5] = test00 (); + return 0; +}