Hi, The attached patch fixes PR 59401. Tested with make -k check RUNTESTFLAGS="--target_board=sh-sim\{-m4/-ml,-m4/-mb}" and no new failures. Commited as r216128. I'd like to backport this to 4.8 and 4.9 later.
Cheers, Oleg gcc/ChangeLog PR target/59401 * config/sh/sh-protos (sh_find_equiv_gbr_addr): Use rtx_insn* instead of rtx. * config/sh/sh.c (sh_find_equiv_gbr_addr): Use def chains instead of insn walking. (sh_find_equiv_gbr_addr): Do nothing if input mem is already a GBR address. Use def chains to handle GBR clobbering call insns. gcc/testsuite/ChangeLog PR target/59401 PR target/54760 * gcc.target/pr54760-5.c: New. * gcc.target/pr54760-6.c: New. * gcc.target/sh/pr59401-1.c: New.
Index: gcc/testsuite/gcc.target/sh/pr54760-6.c =================================================================== --- gcc/testsuite/gcc.target/sh/pr54760-6.c (revision 0) +++ gcc/testsuite/gcc.target/sh/pr54760-6.c (revision 0) @@ -0,0 +1,19 @@ +/* Check that the GBR address optimization works when the GBR register + definition is not in the same basic block where the GBR memory accesses + are. */ +/* { dg-do compile } */ +/* { dg-options "-O1" } */ +/* { dg-skip-if "" { "sh*-*-*" } { "-m5*"} { "" } } */ +/* { dg-final { scan-assembler-not "stc\tgbr" } } */ + +typedef struct +{ + int x, y, z, w; +} tcb_t; + +int +test_00 (int a, tcb_t* b, int c) +{ + tcb_t* tcb = (tcb_t*)__builtin_thread_pointer (); + return (a & 5) ? tcb->x : tcb->w; +} Index: gcc/testsuite/gcc.target/sh/pr59401-1.c =================================================================== --- gcc/testsuite/gcc.target/sh/pr59401-1.c (revision 0) +++ gcc/testsuite/gcc.target/sh/pr59401-1.c (revision 0) @@ -0,0 +1,20 @@ +/* Check that the GBR address optimization does not produce wrong memory + accesses. In this case the GBR value must be stored to a normal register + and a GBR memory access must not be done. */ +/* { dg-do compile } */ +/* { dg-options "-O1" } */ +/* { dg-skip-if "" { "sh*-*-*" } { "-m5*"} { "" } } */ +/* { dg-final { scan-assembler "stc\tgbr" } } */ +/* { dg-final { scan-assembler "bf|bt" } } */ + +typedef struct +{ + int x, y, z, w; +} tcb_t; + +int +test_00 (int a, tcb_t* b) +{ + tcb_t* tcb = (a & 5) ? (tcb_t*)__builtin_thread_pointer () : b; + return tcb->w + tcb->x; +} Index: gcc/testsuite/gcc.target/sh/pr54760-5.c =================================================================== --- gcc/testsuite/gcc.target/sh/pr54760-5.c (revision 0) +++ gcc/testsuite/gcc.target/sh/pr54760-5.c (revision 0) @@ -0,0 +1,26 @@ +/* Check that the GBR address optimization works when there are multiple + GBR register definitions and function calls, if the GBR is marked as a + call saved register. */ +/* { dg-do compile } */ +/* { dg-options "-O1 -fcall-saved-gbr" } */ +/* { dg-skip-if "" { "sh*-*-*" } { "-m5*"} { "" } } */ +/* { dg-final { scan-assembler-not "stc\tgbr" } } */ + +typedef struct +{ + int x, y, z, w; +} tcb_t; + +extern void test_00 (void); + +int +test_01 (int x, volatile int* y, int a) +{ + if (a) + test_00 (); + + y[0] = 1; + + tcb_t* tcb = (tcb_t*)__builtin_thread_pointer (); + return (a & 5) ? tcb->x : tcb->w; +} Index: gcc/config/sh/sh-protos.h =================================================================== --- gcc/config/sh/sh-protos.h (revision 216120) +++ gcc/config/sh/sh-protos.h (working copy) @@ -162,7 +162,7 @@ extern rtx sh_gen_truncate (enum machine_mode, rtx, int); extern bool sh_vector_mode_supported_p (enum machine_mode); extern bool sh_cfun_trap_exit_p (void); -extern rtx sh_find_equiv_gbr_addr (rtx cur_insn, rtx mem); +extern rtx sh_find_equiv_gbr_addr (rtx_insn* cur_insn, rtx mem); extern int sh_eval_treg_value (rtx op); extern HOST_WIDE_INT sh_disp_addr_displacement (rtx mem_op); extern int sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a); Index: gcc/config/sh/sh.c =================================================================== --- gcc/config/sh/sh.c (revision 216120) +++ gcc/config/sh/sh.c (working copy) @@ -13421,11 +13421,10 @@ } /* Find the base register and calculate the displacement for a given - address rtx 'x'. - This is done by walking the insn list backwards and following SET insns - that set the value of the specified reg 'x'. */ + address rtx 'x'. */ static base_reg_disp -sh_find_base_reg_disp (rtx insn, rtx x, disp_t disp = 0, rtx base_reg = NULL) +sh_find_base_reg_disp (rtx_insn* insn, rtx x, disp_t disp = 0, + rtx base_reg = NULL) { if (REG_P (x)) { @@ -13439,32 +13438,38 @@ if (REGNO (x) < FIRST_PSEUDO_REGISTER) return base_reg_disp (base_reg != NULL ? base_reg : x, disp); - /* Try to find the previous insn that sets the reg. */ - for (rtx i = prev_nonnote_insn (insn); i != NULL; - i = prev_nonnote_insn (i)) + /* Find the def of the reg and trace it. If there are more than one + defs and they are not the same, assume it's not safe to proceed. */ + rtx_insn* last_i = NULL; + rtx last_set = NULL; + for (df_ref d = DF_REG_DEF_CHAIN (REGNO (x)); d != NULL; + d = DF_REF_NEXT_REG (d)) { - if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG) - && CALL_P (i)) - break; + rtx set = const_cast<rtx> (set_of (x, DF_REF_INSN (d))); - if (!NONJUMP_INSN_P (i)) - continue; - - rtx p = PATTERN (i); - if (p != NULL && GET_CODE (p) == SET && REG_P (XEXP (p, 0)) - && REGNO (XEXP (p, 0)) == REGNO (x)) + /* Accept multiple defs, as long as they are equal. */ + if (last_set == NULL || rtx_equal_p (last_set, set)) { - /* If the recursion can't find out any more details about the - source of the set, then this reg becomes our new base reg. */ - return sh_find_base_reg_disp (i, XEXP (p, 1), disp, XEXP (p, 0)); + last_i = DF_REF_INSN (d); + last_set = set; } + else + { + last_i = NULL; + last_set = NULL; + break; + } } - /* When here, no previous insn was found that sets the reg. - The input reg is already the base reg. */ - return base_reg_disp (x, disp); - } + if (last_set != NULL && last_i != NULL) + return sh_find_base_reg_disp (last_i, XEXP (last_set, 1), disp, + XEXP (last_set, 0)); + /* When here, no previous insn was found that sets the reg. + The input reg is already the base reg. */ + return base_reg_disp (x, disp); + } + else if (GET_CODE (x) == PLUS) { base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0)); @@ -13493,19 +13498,47 @@ based memory address and return the corresponding new memory address. Return NULL_RTX if not found. */ rtx -sh_find_equiv_gbr_addr (rtx insn, rtx mem) +sh_find_equiv_gbr_addr (rtx_insn* insn, rtx mem) { - if (!MEM_P (mem)) + if (!MEM_P (mem) || gbr_address_mem (mem, GET_MODE (mem))) return NULL_RTX; /* Leave post/pre inc/dec or any other side effect addresses alone. */ if (side_effects_p (XEXP (mem, 0))) return NULL_RTX; + /* When not optimizing there might be no dataflow available. */ + if (df == NULL) + return NULL_RTX; + base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0)); if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG) { + /* If GBR is marked as call clobbered we bail out if we see a call. + FIXME: Actually should check if this mem refers to the gbr value + before or after the call. If there is a store_gbr preceeding this + mem, it's safe to use GBR for this mem. + + If GBR is not marked as call clobbered, but there is some other + def than a call, it's probably a load_gbr upon which we also + bail out to be on the safe side. + FIXME: Should check if we have a use-after-def case, such as + the call case above. */ + for (df_ref d = DF_REG_DEF_CHAIN (GBR_REG); d != NULL; + d = DF_REF_NEXT_REG (d)) + { + if (CALL_P (DF_REF_INSN (d))) + { + if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG)) + return NULL_RTX; + else + continue; + } + else + return NULL_RTX; + } + rtx disp = GEN_INT (gbr_disp.disp ()); if (gbr_displacement (disp, GET_MODE (mem))) return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);