Hi, On some early SH2E versions such as SH7055, there is a hardware bug related to conditional branches without delay slots. The recommended workaround for the bug is not to use conditional branches without delay slots. This is what GCC has been doing, but the comments around those parts were a bit confusing, since there is no such thing as annulled branches on SH. Moreover, it seems the original SH7055F is not being manufactured anymore and got replaced by the SH7055SF, which got the bug fix for the problem.
The attached patch allows using conditional branches without delay slots by default for any target type and adds a new option -mcbranch-force-delay-slot to reinstate the old behavior. Tested with make -k check RUNTESTFLAGS="--target_board=sh-sim \{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}" make info dvi pdf make -k check-gcc RUNTESTFLAGS="sh.exp=force-cbranch-delay-slot.c --target_board=sh-sim \{-m1/-ml,-m1/-mb,-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}" Committed as r220774. Cheers, Oleg gcc/ChangeLog: * config/sh/sh.opt (mcbranch-force-delay-slot): New option. * doc/invoke.texi (SH options): Document it. * config/sh/sh.c (sh_insn_length_adjustment): Check TARGET_CBRANCH_FORCE_DELAY_SLOT instead of sh_cpu_attr == CPU_SH2E. gcc/testsuite/ChangeLog: * gcc.target/sh/sh.exp (check_effective_target_sh1): New. * gcc.target/sh/force-cbranch-delay-slot.c: New.
Index: gcc/testsuite/gcc.target/sh/force-cbranch-delay-slot.c =================================================================== --- gcc/testsuite/gcc.target/sh/force-cbranch-delay-slot.c (revision 0) +++ gcc/testsuite/gcc.target/sh/force-cbranch-delay-slot.c (revision 0) @@ -0,0 +1,18 @@ +/* Check that the option -mcbranch-force-delay-slot works as expected on + targets other than SH1, and that it compiles on SH1 targets without fuzz. */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mcbranch-force-delay-slot" } */ +/* { dg-final { scan-assembler-times "nop" 2 { target { ! sh1 } } } } */ + +int g (int, int); + +int +f (int a, int b) +{ + /* Expected: 1x bt/s, 1x nop. */ + if (a != 5) + a = 10; + + /* Expected: 1x jmp, 1x nop. */ + return g (a, b); +} Index: gcc/testsuite/gcc.target/sh/sh.exp =================================================================== --- gcc/testsuite/gcc.target/sh/sh.exp (revision 220708) +++ gcc/testsuite/gcc.target/sh/sh.exp (working copy) @@ -33,6 +33,15 @@ } ""] } +# Return 1 if target is SH1 +proc check_effective_target_sh1 { } { + return [check_no_compiler_messages sh1 object { + #ifndef __SH1__ + #error "" + #endif + } ""] +} + # Return 1 if target supports atomic-model=soft-gusa proc check_effective_target_atomic_model_soft_gusa_available { } { return [check_no_compiler_messages atomic_model_soft_gusa_available object { Index: gcc/config/sh/sh.opt =================================================================== --- gcc/config/sh/sh.opt (revision 220708) +++ gcc/config/sh/sh.opt (working copy) @@ -229,7 +229,7 @@ Cost to assume for a branch insn mzdcbranch -Target Var(TARGET_ZDCBRANCH) +Target Report Var(TARGET_ZDCBRANCH) Assume that zero displacement conditional branches are fast mcbranchdi @@ -240,6 +240,10 @@ Target Undocumented Var(TARGET_CMPEQDI_T) Warn(%qs is deprecated and has no effect) Emit cmpeqdi_t pattern even when -mcbranchdi is in effect. +mcbranch-force-delay-slot +Target Report RejectNegative Var(TARGET_CBRANCH_FORCE_DELAY_SLOT) Init(0) +Force the usage of delay slots for conditional branches. + mcut2-workaround Target RejectNegative Var(TARGET_SH5_CUT2_WORKAROUND) Enable SH5 cut2 workaround Index: gcc/config/sh/sh.c =================================================================== --- gcc/config/sh/sh.c (revision 220708) +++ gcc/config/sh/sh.c (working copy) @@ -10245,11 +10245,10 @@ && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES) return 2; - /* SH2e has a bug that prevents the use of annulled branches, so if - the delay slot is not filled, we'll have to put a NOP in it. */ - if (sh_cpu_attr == CPU_SH2E - && JUMP_P (insn) - && get_attr_type (insn) == TYPE_CBRANCH + /* Increase the insn length of a cbranch without a delay slot insn to + force a delay slot which will be stuffed with a nop. */ + if (TARGET_CBRANCH_FORCE_DELAY_SLOT && TARGET_SH2 + && JUMP_P (insn) && get_attr_type (insn) == TYPE_CBRANCH && ! sequence_insn_p (insn)) return 2; Index: gcc/doc/invoke.texi =================================================================== --- gcc/doc/invoke.texi (revision 220708) +++ gcc/doc/invoke.texi (working copy) @@ -965,6 +965,7 @@ -maccumulate-outgoing-args -minvalid-symbols @gol -matomic-model=@var{atomic-model} @gol -mbranch-cost=@var{num} -mzdcbranch -mno-zdcbranch @gol +-mcbranch-force-delay-slot @gol -mfused-madd -mno-fused-madd -mfsca -mno-fsca -mfsrra -mno-fsrra @gol -mpretend-cmove -mtas} @@ -20584,6 +20585,13 @@ enabled by default when generating code for SH4 and SH4A. It can be explicitly disabled by specifying @option{-mno-zdcbranch}. +@item -mcbranch-force-delay-slot +@opindex mcbranch-force-delay-slot +Force the usage of delay slots for conditional branches, which stuffs the delay +slot with a @code{nop} if a suitable instruction can't be found. By default +this option is disabled. It can be enabled to work around hardware bugs as +found in the original SH7055. + @item -mfused-madd @itemx -mno-fused-madd @opindex mfused-madd