Hi, The attached patch reduces the code size of inlined builtin strlen functions on SH a little bit. Tested on r210083 with make -k check RUNTESTFLAGS="--target_board=sh-sim \{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"
and no new failures, except for gcc.target/sh/pr53976-1.c on SH2 and SH2A. Using builtin strlen for checking the sett/clrt optimization pass was a bit inappropriate in this case. Committed as r210187. Cheers, Oleg gcc/ChangeLog: PR target/60884 * config/sh/sh-mem.cc (sh_expand_strlen): Use loop when emitting unrolled byte insns. Emit address increments after move insns. gcc/testsuite/ChangeLog: PR target/60884 * gcc.target/sh/pr53976-1.c (test_02): Remove inappropriate test case. (test_03): Rename to test_02.
Index: gcc/testsuite/gcc.target/sh/pr53976-1.c =================================================================== --- gcc/testsuite/gcc.target/sh/pr53976-1.c (revision 210185) +++ gcc/testsuite/gcc.target/sh/pr53976-1.c (working copy) @@ -24,15 +24,8 @@ } int -test_02 (const char* a) +test_02 (int a, int b, int c, int d) { - /* Must not see a sett after the inlined strlen. */ - return __builtin_strlen (a); -} - -int -test_03 (int a, int b, int c, int d) -{ /* One of the blocks should have a sett and the other one should not. */ if (d > 4) return a + b + 1; Index: gcc/config/sh/sh-mem.cc =================================================================== --- gcc/config/sh/sh-mem.cc (revision 210185) +++ gcc/config/sh/sh-mem.cc (working copy) @@ -568,7 +568,7 @@ addr1 = adjust_automodify_address (addr1, SImode, current_addr, 0); - /*start long loop. */ + /* start long loop. */ emit_label (L_loop_long); /* tmp1 is aligned, OK to load. */ @@ -589,29 +589,15 @@ addr1 = adjust_address (addr1, QImode, 0); /* unroll remaining bytes. */ - emit_insn (gen_extendqisi2 (tmp1, addr1)); - emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx)); - jump = emit_jump_insn (gen_branch_true (L_return)); - add_int_reg_note (jump, REG_BR_PROB, prob_likely); + for (int i = 0; i < 4; ++i) + { + emit_insn (gen_extendqisi2 (tmp1, addr1)); + emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1)); + emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx)); + jump = emit_jump_insn (gen_branch_true (L_return)); + add_int_reg_note (jump, REG_BR_PROB, prob_likely); + } - emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1)); - - emit_insn (gen_extendqisi2 (tmp1, addr1)); - emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx)); - jump = emit_jump_insn (gen_branch_true (L_return)); - add_int_reg_note (jump, REG_BR_PROB, prob_likely); - - emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1)); - - emit_insn (gen_extendqisi2 (tmp1, addr1)); - emit_insn (gen_cmpeqsi_t (tmp1, const0_rtx)); - jump = emit_jump_insn (gen_branch_true (L_return)); - add_int_reg_note (jump, REG_BR_PROB, prob_likely); - - emit_move_insn (current_addr, plus_constant (Pmode, current_addr, 1)); - - emit_insn (gen_extendqisi2 (tmp1, addr1)); - jump = emit_jump_insn (gen_jump_compact (L_return)); emit_barrier_after (jump); /* start byte loop. */ @@ -626,10 +612,9 @@ /* end loop. */ - emit_insn (gen_addsi3 (start_addr, start_addr, GEN_INT (1))); - emit_label (L_return); + emit_insn (gen_addsi3 (start_addr, start_addr, GEN_INT (1))); emit_insn (gen_subsi3 (operands[0], current_addr, start_addr)); return true;