Hi, the attached patch introduces a new target hook (targetm.calls.special_function_flags) which can be used by a backend to add specific call properties to builtins.
On S/390 this is used for the *tbegin* and *tabort builtins whose control flow otherwise is not modelled correctly. On S/390 this leads to problems since our TX intructions do not save and restore the floating point registers. On RTL level we deal with this by adding FPR clobbers to the tbegin instruction pattern. However, there are several optimizations taking place on tree level which need to know about the special characteristics of tbegin as well. So e.g. constant copy propagation misoptimizes the following example by propagating f = 77.0f into the f != 88.0f comparison: int foo () { float f = 77.0f; if (__builtin_tbegin (0) == 0) { f = 88.0f; __builtin_tabort (256); return 2; } if (f != 88.0f) return 3; return 4; } Fixed with the attached patch. Another side effect of the patch is that the "return 2" statement is now optimized away due to __builtin_tabort being "noreturn". Bootstrapped and regtested on s390 and s390x with --with-arch=zEC12. htm-nofloat-2.c fails with that patch. The returns-twice flag on tbegin prevents several optimizations on the cfg and basically disables the TX optimization in s390_optimize_nonescaping_tx that way. I'll try to address this with a follow-on patch. Ok for mainline and 4.8? Bye, -Andreas- 2013-10-11 Andreas Krebbel <andreas.kreb...@de.ibm.com> * calls.c (special_function_p): Call targetm.calls.special_function_flags. * target.def (special_function_flags): Add new target hook. * targhooks.c (default_get_reg_raw_mode): New function. * targhooks.h (default_get_reg_raw_mode): Add prototype. * doc/tm.texi.in: Add doc placeholder. * doc/tm.texi: Update. * config/s390/s390.c (s390_special_function_flags): Implement the new target hook for S/390. (TARGET_SPECIAL_FUNCTION_FLAGS): Define new target hook for S/390. 2013-10-11 Andreas Krebbel <andreas.kreb...@de.ibm.com> * gcc.target/s390/htm-1.c: Move __builtin_tabort invokations into separate functions. --- gcc/calls.c | 3 + gcc/config/s390/s390.c | 24 +++++++++++++++ gcc/doc/tm.texi | 4 ++ gcc/doc/tm.texi.in | 2 + gcc/target.def | 10 ++++++ gcc/targhooks.c | 8 +++++ gcc/targhooks.h | 1 gcc/testsuite/gcc.target/s390/htm-1.c | 52 ++++++++++++++++++++++++++!!!!!!!! 8 files changed, 92 insertions(+), 12 modifications(!) Index: gcc/calls.c =================================================================== *** gcc/calls.c.orig --- gcc/calls.c *************** special_function_p (const_tree fndecl, i *** 562,567 **** --- 562,570 ---- else if (tname[0] == 'l' && tname[1] == 'o' && ! strcmp (tname, "longjmp")) flags |= ECF_NORETURN; + + /* Apply target specific flags. */ + flags |= targetm.calls.special_function_flags (name); } return flags; Index: gcc/config/s390/s390.c =================================================================== *** gcc/config/s390/s390.c.orig --- gcc/config/s390/s390.c *************** s390_expand_builtin (tree exp, rtx targe *** 10065,10070 **** --- 10065,10091 ---- return const0_rtx; } + /* Return call flags to be added for target specific functions. */ + + static int + s390_special_function_flags (const char *name) + { + int flags = 0; + + if (!TARGET_HTM) + return 0; + + if (strcmp (name, "__builtin_tbegin") == 0 + || strcmp (name, "__builtin_tbegin_nofloat") == 0 + || strcmp (name, "__builtin_tbegin_retry") == 0 + || strcmp (name, "__builtin_tbegin_retry_nofloat") == 0) + flags |= ECF_RETURNS_TWICE; + + if (strcmp (name, "__builtin_tabort") == 0) + flags |= ECF_NORETURN; + + return flags; + } /* Output assembly code for the trampoline template to stdio stream FILE. *************** s390_loop_unroll_adjust (unsigned nunrol *** 11833,11838 **** --- 11854,11862 ---- #undef TARGET_LIBCALL_VALUE #define TARGET_LIBCALL_VALUE s390_libcall_value + #undef TARGET_SPECIAL_FUNCTION_FLAGS + #define TARGET_SPECIAL_FUNCTION_FLAGS s390_special_function_flags + #undef TARGET_FIXED_CONDITION_CODE_REGS #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs Index: gcc/target.def =================================================================== *** gcc/target.def.orig --- gcc/target.def *************** DEFHOOK *** 4179,4184 **** --- 4179,4194 ---- enum machine_mode, (int regno), default_get_reg_raw_mode) + /* Return a mode wide enough to copy any argument value that might be + passed. */ + DEFHOOK + (special_function_flags, + "This target hook returns the flags to be set for a function with the\ + specified @var{name}. Define this hook e.g. if certain builtins require\ + specific call properties.", + int, (const char *name), + default_special_function_flags) + HOOK_VECTOR_END (calls) /* Return the diagnostic message string if conversion from FROMTYPE Index: gcc/targhooks.c =================================================================== *** gcc/targhooks.c.orig --- gcc/targhooks.c *************** default_get_reg_raw_mode (int regno) *** 1432,1437 **** --- 1432,1445 ---- return reg_raw_mode[regno]; } + /* Return the call flags required for function NAME. */ + + int + default_special_function_flags (const char *name ATTRIBUTE_UNUSED) + { + return 0; + } + /* Return true if the state of option OPTION should be stored in PCH files and checked by default_pch_valid_p. Store the option's current state in STATE if so. */ Index: gcc/targhooks.h =================================================================== *** gcc/targhooks.h.orig --- gcc/targhooks.h *************** extern int default_jump_align_max_skip ( *** 194,199 **** --- 194,200 ---- extern section * default_function_section(tree decl, enum node_frequency freq, bool startup, bool exit); extern enum machine_mode default_get_reg_raw_mode (int); + extern int default_special_function_flags (const char *); extern void *default_get_pch_validity (size_t *); extern const char *default_pch_valid_p (const void *, size_t); Index: gcc/doc/tm.texi.in =================================================================== *** gcc/doc/tm.texi.in.orig --- gcc/doc/tm.texi.in *************** nothing when you use @option{-freg-struc *** 3819,3824 **** --- 3819,3826 ---- @hook TARGET_GET_RAW_ARG_MODE + @hook TARGET_SPECIAL_FUNCTION_FLAGS + @node Caller Saves @subsection Caller-Saves Register Allocation Index: gcc/doc/tm.texi =================================================================== *** gcc/doc/tm.texi.orig --- gcc/doc/tm.texi *************** This target hook returns the mode to be *** 4663,4668 **** --- 4663,4672 ---- This target hook returns the mode to be used when accessing raw argument registers in @code{__builtin_apply_args}. Define this macro if the value in @var{reg_raw_mode} is not correct. @end deftypefn + @deftypefn {Target Hook} int TARGET_SPECIAL_FUNCTION_FLAGS (const char *@var{name}) + This target hook returns the flags to be set for a function with the specified @var{name}. Define this hook e.g. if certain builtins require specific call properties. + @end deftypefn + @node Caller Saves @subsection Caller-Saves Register Allocation Index: gcc/testsuite/gcc.target/s390/htm-1.c =================================================================== *** gcc/testsuite/gcc.target/s390/htm-1.c.orig --- gcc/testsuite/gcc.target/s390/htm-1.c *************** foo (struct __htm_tdb* tdb, int reg, int *** 48,73 **** __builtin_non_tx_store(&g, *mem); __builtin_non_tx_store(&g, global); __builtin_tabort (42 + 255); __builtin_tabort (reg); /* { dg-final { scan-assembler-times "tabort\t255" 1 } } */ __builtin_tabort (reg + 255); __builtin_tabort (*mem); __builtin_tabort (global); /* Here global + 255 gets reloaded into a reg. Better would be to just reload global or *mem and get the +255 for free as address arithmetic. */ __builtin_tabort (*mem + 255); ! __builtin_tabort (global + 255); ! ! __builtin_tend(); ! __builtin_tx_assist (23); ! __builtin_tx_assist (reg); ! __builtin_tx_assist (*mem); ! __builtin_tx_assist (global); } /* Make sure the tdb NULL argument ends up as immediate value in the instruction. */ /* { dg-final { scan-assembler-times "tbegin\t0," 10 } } */ --- 48,111 ---- __builtin_non_tx_store(&g, *mem); __builtin_non_tx_store(&g, global); + __builtin_tend(); + + __builtin_tx_assist (23); + __builtin_tx_assist (reg); + __builtin_tx_assist (*mem); + __builtin_tx_assist (global); + } + + /* The taborts must go into separate function since they are + "noreturn". */ + + void + tabort1 () + { __builtin_tabort (42 + 255); + } + + void + tabort2 (int reg) + { __builtin_tabort (reg); + } + + void + tabort3 (int reg) + { /* { dg-final { scan-assembler-times "tabort\t255" 1 } } */ __builtin_tabort (reg + 255); + } + + void + tabort4 (int *mem) + { __builtin_tabort (*mem); + } + + void + tabort5 () + { __builtin_tabort (global); + } + + void + tabort6 (int *mem) + { /* Here global + 255 gets reloaded into a reg. Better would be to just reload global or *mem and get the +255 for free as address arithmetic. */ __builtin_tabort (*mem + 255); ! } ! void ! tabort7 () ! { ! __builtin_tabort (global + 255); } + /* Make sure the tdb NULL argument ends up as immediate value in the instruction. */ /* { dg-final { scan-assembler-times "tbegin\t0," 10 } } */