Hello, The attached patch adds support for the SH tas.b instruction by introducing a new target option -menable-tas. The patch was tested against rev 184589 with...
make info dvi pdf make -k check RUNTESTFLAGS="--target_board=sh-sim\{ -m2/-ml,-m2/-mb, -m2a-single/-mb, -m4-single/-ml,-m4-single/-mb, -m4a-single/-ml,-m4a-single/-mb}" make -k check RUNTESTFLAGS="--target_board=sh-sim\{ -m2/-ml/-msoft-atomic,-m2/-mb/-msoft-atomic, -m2a-single/-mb/-msoft-atomic, -m4-single/-ml/-msoft-atomic,-m4-single/-mb/-msoft-atomic, -m4a-single/-ml/-msoft-atomic,-m4a-single/-mb/-msoft-atomic}" make -k check RUNTESTFLAGS="--target_board=sh-sim\{ -m2/-ml/-msoft-atomic/-menable-tas,-m2/-mb/-msoft-atomic/-menable-tas, -m2a-single/-mb/-msoft-atomic/-menable-tas, -m4-single/-ml/-msoft-atomic/-menable-tas, -m4-single/-mb/-msoft-atomic/-menable-tas, -m4a-single/-ml/-msoft-atomic/-menable-tas, -m4a-single/-mb/-msoft-atomic/-menable-tas}" make -k check RUNTESTFLAGS="--target_board=sh-sim\{ -m2/-ml/-menable-tas,-m2/-mb/-menable-tas, -m2a-single/-mb/-menable-tas, -m4-single/-ml/-menable-tas,-m4-single/-mb/-menable-tas, -m4a-single/-ml/-menable-tas,-m4a-single/-mb/-menable-tas}" ... and no new failures, except for the last test run which does not enable any atomics. There it fails the test case gcc.dg/atomic-flag.c. This is because TARGET_ATOMIC_TEST_AND_SET_TRUEVAL is defined to 0x80, but the code in optabs.c (expand_atomic_test_and_set) always uses const1_rtx as the atomic flag true-value when expanding "single thread" fake atomics. However, the failing test case checks the written atomic flag value against __GCC_ATOMIC_TEST_AND_SET_TRUEVAL, which is defined in the SH target to 0x80. One way to fix this problem is the example patch in my previous message regarding this issue: http://gcc.gnu.org/ml/gcc-patches/2012-02/msg01408.html Another way would be to allow the TARGET_ATOMIC_TEST_AND_SET_TRUEVAL hook to be evaluated (conditionally) at runtime. Then for example, the target could set it to 0x80 if atomics are enabled by the user, or leave it at 0x01 if they aren't. Other than that, OK for 4.8? Cheers, Oleg 2012-03-01 Oleg Endo <olege...@gcc.gnu.org> * config/sh/sh.h (TARGET_ATOMIC_TEST_AND_SET_TRUEVAL): New hook. * config/sh/sync.md (atomic_test_and_set): New expander. (tasb, atomic_test_and_set_soft): New insns. * config/sh/sh.opt (menable-tas): New option. * doc/invoke.texi (SH Options): Document it.
Index: gcc/doc/invoke.texi =================================================================== --- gcc/doc/invoke.texi (revision 184669) +++ gcc/doc/invoke.texi (working copy) @@ -887,7 +887,8 @@ -mdivsi3_libfunc=@var{name} -mfixed-range=@var{register-range} @gol -madjust-unroll -mindexed-addressing -mgettrcost=@var{number} -mpt-fixed @gol -maccumulate-outgoing-args -minvalid-symbols -msoft-atomic @gol --mbranch-cost=@var{num} -mcbranchdi -mcmpeqdi -mfused-madd -mpretend-cmove} +-mbranch-cost=@var{num} -mcbranchdi -mcmpeqdi -mfused-madd -mpretend-cmove @gol +-menable-tas} @emph{Solaris 2 Options} @gccoptlist{-mimpure-text -mno-impure-text @gol @@ -17829,6 +17830,15 @@ This option is enabled by default when the target is @code{sh-*-linux*}. For details on the atomic built-in functions see @ref{__atomic Builtins}. +@item -menable-tas +@opindex menable-tas +Generate the @code{tas.b} opcode for @code{__atomic_test_and_set}. +Notice that depending on the particular hardware and software configuration +this can degrade overall performance due to the operand cache line flushes +that are implied by the @code{tas.b} instruction. On multi-core SH4A +processors the @code{tas.b} instruction must be used with caution since it +can result in data corruption for certain cache configurations. + @item -mspace @opindex mspace Optimize for space instead of speed. Implied by @option{-Os}. Index: gcc/config/sh/sh.h =================================================================== --- gcc/config/sh/sh.h (revision 184669) +++ gcc/config/sh/sh.h (working copy) @@ -2475,4 +2475,11 @@ /* FIXME: middle-end support for highpart optimizations is missing. */ #define high_life_started reload_in_progress +/* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. + This value is used by optabs.c atomic op expansion code as well as in + sync.md. It must be defined as signed char here or else the movqi + pattern will refuse to load it as a QImode constant. */ +#undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL +#define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL ((signed char)0x80) + #endif /* ! GCC_SH_H */ Index: gcc/config/sh/sync.md =================================================================== --- gcc/config/sh/sync.md (revision 184669) +++ gcc/config/sh/sync.md (working copy) @@ -365,3 +365,60 @@ "1: mov r1,r15"; } [(set_attr "length" "18")]) + +(define_expand "atomic_test_and_set" + [(match_operand:SI 0 "register_operand" "") ;; bool result output + (match_operand:QI 1 "memory_operand" "") ;; memory + (match_operand:SI 2 "const_int_operand" "")] ;; model + "(TARGET_SOFT_ATOMIC || TARGET_ENABLE_TAS) && !TARGET_SHMEDIA" +{ + rtx addr = force_reg (Pmode, XEXP (operands[1], 0)); + + if (TARGET_ENABLE_TAS) + emit_insn (gen_tasb (addr)); + else + { + rtx val = force_reg (QImode, + GEN_INT (TARGET_ATOMIC_TEST_AND_SET_TRUEVAL)); + emit_insn (gen_atomic_test_and_set_soft (addr, val)); + } + + /* The result of the test op is the inverse of what we are + supposed to return. Thus invert the T bit. The inversion will be + potentially optimized away and integrated into surrounding code. */ + emit_insn (gen_movnegt (operands[0])); + DONE; +}) + +(define_insn "tasb" + [(set (reg:SI T_REG) + (eq:SI (mem:QI (match_operand:SI 0 "register_operand" "r")) + (const_int 0))) + (set (mem:QI (match_dup 0)) + (unspec:QI [(const_int 128)] UNSPEC_ATOMIC))] + "TARGET_ENABLE_TAS && !TARGET_SHMEDIA" + "tas.b @%0" + [(set_attr "insn_class" "co_group")]) + +(define_insn "atomic_test_and_set_soft" + [(set (reg:SI T_REG) + (eq:SI (mem:QI (match_operand:SI 0 "register_operand" "u")) + (const_int 0))) + (set (mem:QI (match_dup 0)) + (unspec:QI [(match_operand:QI 1 "register_operand" "u")] UNSPEC_ATOMIC)) + (clobber (match_scratch:QI 2 "=&u")) + (clobber (reg:SI R0_REG)) + (clobber (reg:SI R1_REG))] + "TARGET_SOFT_ATOMIC && !TARGET_ENABLE_TAS && !TARGET_SHMEDIA" +{ + return "mova 1f,r0" "\n" + " .align 2" "\n" + " mov r15,r1" "\n" + " mov #(0f-1f),r15" "\n" + "0: mov.b @%0,%2" "\n" + " mov.b %1,@%0" "\n" + "1: mov r1,r15" "\n" + " tst %2,%2"; +} + [(set_attr "length" "16")]) + Index: gcc/config/sh/sh.opt =================================================================== --- gcc/config/sh/sh.opt (revision 184669) +++ gcc/config/sh/sh.opt (working copy) @@ -323,6 +323,10 @@ Target Report Mask(SOFT_ATOMIC) Use software atomic sequences supported by kernel +menable-tas +Target Report RejectNegative Var(TARGET_ENABLE_TAS) +Use tas.b instruction for __atomic_test_and_set + mspace Target RejectNegative Alias(Os) Deprecated. Use -Os instead