This is the common infrastructure part. As explained in the initial message,
nothing is activated unless both the language and the target opt in.
2016-07-04 Eric Botcazou <ebotca...@adacore.com>
PR ada/37139
PR ada/67205
* common.opt (-ftrampolines): New option.
* doc/invoke.texi (Code Gen Options): Document it.
* doc/tm.texi.in (Trampolines): AddTARGET_CUSTOM_FUNCTION_DESCRIPTORS
* doc/tm.texi: Regenerate.
* builtins.def: Add init_descriptor and adjust_descriptor.
* builtins.c (expand_builtin_init_trampoline): Do not issue a warning
on platforms with descriptors.
(expand_builtin_init_descriptor): New function.
(expand_builtin_adjust_descriptor): Likewise.
(expand_builtin) <BUILT_IN_INIT_DESCRIPTOR>: New case.
<BUILT_IN_ADJUST_DESCRIPTOR>: Likewise.
* calls.c (prepare_call_address): Remove SIBCALLP parameter and add
FLAGS parameter. Deal with indirect calls by descriptor and adjust.
Set STATIC_CHAIN_REG_P on the static chain register, if any.
(call_expr_flags): Set ECF_BY_DESCRIPTOR for calls by descriptor.
(expand_call): Likewise. Move around call to prepare_call_address
and pass all flags to it.
* cfgexpand.c (expand_call_stmt): Reinstate CALL_EXPR_BY_DESCRIPTOR.
* gimple.h (enum gf_mask): New GF_CALL_BY_DESCRIPTOR value.
(gimple_call_set_by_descriptor): New setter.
(gimple_call_by_descriptor_p): New getter.
* gimple.c (gimple_build_call_from_tree): SetCALL_EXPR_BY_DESCRIPTOR.
(gimple_call_flags): Deal with GF_CALL_BY_DESCRIPTOR.
* langhooks.h (struct lang_hooks): Add custom_function_descriptors.
* langhooks-def.h (LANG_HOOKS_CUSTOM_FUNCTION_DESCRIPTORS): Define.
(LANG_HOOKS_INITIALIZER): Add LANG_HOOKS_CUSTOM_FUNCTION_DESCRIPTORS.
* rtl.h (STATIC_CHAIN_REG_P): New macro.
* rtlanal.c (find_first_parameter_load): Skip static chain registers.
* target.def (custom_function_descriptors): New POD hook.
* tree.h (FUNC_ADDR_BY_DESCRIPTOR): New flag on ADDR_EXPR.
(CALL_EXPR_BY_DESCRIPTOR): New flag on CALL_EXPR.
* tree-core.h (ECF_BY_DESCRIPTOR): New mask.
Document FUNC_ADDR_BY_DESCRIPTOR and CALL_EXPR_BY_DESCRIPTOR.
* tree.c (make_node_stat) <tcc_declaration>: Use FUNCTION_ALIGNMENT.
(build_common_builtin_nodes): Initialize init_descriptor and
adjust_descriptor.
* tree-nested.c: Include target.h.
(struct nesting_info): Add 'any_descr_created' field.
(get_descriptor_type): New function.
(lookup_element_for_decl): New function extracted from...
(create_field_for_decl): Likewise.
(lookup_tramp_for_decl): ...here. Adjust.
(lookup_descr_for_decl): New function.
(convert_tramp_reference_op): Deal with descriptors.
(build_init_call_stmt): New function extracted from...
(finalize_nesting_tree_1): ...here. Adjust and deal withdescriptors.
* defaults.h (FUNCTION_ALIGNMENT): Define.
(TRAMPOLINE_ALIGNMENT): Set to above instead of FUNCTION_BOUNDARY.
--
Eric Botcazou
Index: builtins.c
===================================================================
--- builtins.c (revision 239944)
+++ builtins.c (working copy)
@@ -4618,8 +4618,9 @@ expand_builtin_init_trampoline (tree exp, bool ons
{
trampolines_created = 1;
- warning_at (DECL_SOURCE_LOCATION (t_func), OPT_Wtrampolines,
- "trampoline generated for nested function %qD", t_func);
+ if (targetm.calls.custom_function_descriptors != 0)
+ warning_at (DECL_SOURCE_LOCATION (t_func), OPT_Wtrampolines,
+ "trampoline generated for nested function %qD", t_func);
}
return const0_rtx;
@@ -4641,6 +4642,58 @@ expand_builtin_adjust_trampoline (tree exp)
return tramp;
}
+/* Expand a call to the builtin descriptor initialization routine.
+ A descriptor is made up of a couple of pointers to the static
+ chain and the code entry in this order. */
+
+static rtx
+expand_builtin_init_descriptor (tree exp)
+{
+ tree t_descr, t_func, t_chain;
+ rtx m_descr, r_descr, r_func, r_chain;
+
+ if (!validate_arglist (exp, POINTER_TYPE, POINTER_TYPE, POINTER_TYPE,
+ VOID_TYPE))
+ return NULL_RTX;
+
+ t_descr = CALL_EXPR_ARG (exp, 0);
+ t_func = CALL_EXPR_ARG (exp, 1);
+ t_chain = CALL_EXPR_ARG (exp, 2);
+
+ r_descr = expand_normal (t_descr);
+ m_descr = gen_rtx_MEM (BLKmode, r_descr);
+ MEM_NOTRAP_P (m_descr) = 1;
+
+ r_func = expand_normal (t_func);
+ r_chain = expand_normal (t_chain);
+
+ /* Generate insns to initialize the descriptor. */
+ emit_move_insn (adjust_address_nv (m_descr, ptr_mode, 0), r_chain);
+ emit_move_insn (adjust_address_nv (m_descr, ptr_mode,
+ POINTER_SIZE / BITS_PER_UNIT), r_func);
+
+ return const0_rtx;
+}
+
+/* Expand a call to the builtin descriptor adjustment routine. */
+
+static rtx
+expand_builtin_adjust_descriptor (tree exp)
+{
+ rtx tramp;
+
+ if (!validate_arglist (exp, POINTER_TYPE, VOID_TYPE))
+ return NULL_RTX;
+
+ tramp = expand_normal (CALL_EXPR_ARG (exp, 0));
+
+ /* Unalign the descriptor to allow runtime identification. */
+ tramp = plus_constant (ptr_mode, tramp,
+ targetm.calls.custom_function_descriptors);
+
+ return force_operand (tramp, NULL_RTX);
+}
+
/* Expand the call EXP to the built-in signbit, signbitf or signbitl
function. The function first checks whether the back end provides
an insn to implement signbit for the respective mode. If not, it
@@ -6337,6 +6390,11 @@ expand_builtin (tree exp, rtx target, rtx subtarge
case BUILT_IN_ADJUST_TRAMPOLINE:
return expand_builtin_adjust_trampoline (exp);
+ case BUILT_IN_INIT_DESCRIPTOR:
+ return expand_builtin_init_descriptor (exp);
+ case BUILT_IN_ADJUST_DESCRIPTOR:
+ return expand_builtin_adjust_descriptor (exp);
+
case BUILT_IN_FORK:
case BUILT_IN_EXECL:
case BUILT_IN_EXECV:
Index: builtins.def
===================================================================
--- builtins.def (revision 239944)
+++ builtins.def (working copy)
@@ -883,6 +883,8 @@ DEF_C99_BUILTIN (BUILT_IN__EXIT2, "_Exit",
DEF_BUILTIN_STUB (BUILT_IN_INIT_TRAMPOLINE, "__builtin_init_trampoline")
DEF_BUILTIN_STUB (BUILT_IN_INIT_HEAP_TRAMPOLINE, "__builtin_init_heap_trampoline")
DEF_BUILTIN_STUB (BUILT_IN_ADJUST_TRAMPOLINE, "__builtin_adjust_trampoline")
+DEF_BUILTIN_STUB (BUILT_IN_INIT_DESCRIPTOR, "__builtin_init_descriptor")
+DEF_BUILTIN_STUB (BUILT_IN_ADJUST_DESCRIPTOR, "__builtin_adjust_descriptor")
DEF_BUILTIN_STUB (BUILT_IN_NONLOCAL_GOTO, "__builtin_nonlocal_goto")
/* Implementing __builtin_setjmp. */
Index: calls.c
===================================================================
--- calls.c (revision 239944)
+++ calls.c (working copy)
@@ -183,17 +183,76 @@ static void restore_fixed_argument_area (rtx, rtx,
rtx
prepare_call_address (tree fndecl_or_type, rtx funexp, rtx static_chain_value,
- rtx *call_fusage, int reg_parm_seen, int sibcallp)
+ rtx *call_fusage, int reg_parm_seen, int flags)
{
/* Make a valid memory address and copy constants through pseudo-regs,
but not for a constant address if -fno-function-cse. */
if (GET_CODE (funexp) != SYMBOL_REF)
- /* If we are using registers for parameters, force the
- function address into a register now. */
- funexp = ((reg_parm_seen
- && targetm.small_register_classes_for_mode_p (FUNCTION_MODE))
- ? force_not_mem (memory_address (FUNCTION_MODE, funexp))
- : memory_address (FUNCTION_MODE, funexp));
+ {
+ /* If it's an indirect call by descriptor, generate code to perform
+ runtime identification of the pointer and load the descriptor. */
+ if ((flags & ECF_BY_DESCRIPTOR) && !flag_trampolines)
+ {
+ const int bit_val = targetm.calls.custom_function_descriptors;
+ rtx call_lab = gen_label_rtx ();
+
+ gcc_assert (fndecl_or_type && TYPE_P (fndecl_or_type));
+ fndecl_or_type
+ = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL, NULL_TREE,
+ fndecl_or_type);
+ DECL_STATIC_CHAIN (fndecl_or_type) = 1;
+ rtx chain = targetm.calls.static_chain (fndecl_or_type, false);
+
+ /* Avoid long live ranges around function calls. */
+ funexp = copy_to_mode_reg (Pmode, funexp);
+
+ if (REG_P (chain))
+ emit_insn (gen_rtx_CLOBBER (VOIDmode, chain));
+
+ /* Emit the runtime identification pattern. */
+ rtx mask = gen_rtx_AND (Pmode, funexp, GEN_INT (bit_val));
+ emit_cmp_and_jump_insns (mask, const0_rtx, EQ, NULL_RTX, Pmode, 1,
+ call_lab);
+
+ /* Statically predict the branch to very likely taken. */
+ rtx_insn *insn = get_last_insn ();
+ if (JUMP_P (insn))
+ predict_insn_def (insn, PRED_BUILTIN_EXPECT, TAKEN);
+
+ /* Load the descriptor. */
+ rtx mem = gen_rtx_MEM (ptr_mode,
+ plus_constant (Pmode, funexp, - bit_val));
+ MEM_NOTRAP_P (mem) = 1;
+ mem = convert_memory_address (Pmode, mem);
+ emit_move_insn (chain, mem);
+
+ mem = gen_rtx_MEM (ptr_mode,
+ plus_constant (Pmode, funexp,
+ POINTER_SIZE / BITS_PER_UNIT
+ - bit_val));
+ MEM_NOTRAP_P (mem) = 1;
+ mem = convert_memory_address (Pmode, mem);
+ emit_move_insn (funexp, mem);
+
+ emit_label (call_lab);
+
+ if (REG_P (chain))
+ {
+ use_reg (call_fusage, chain);
+ STATIC_CHAIN_REG_P (chain) = 1;
+ }
+
+ /* Make sure we're not going to be overwritten below. */
+ gcc_assert (!static_chain_value);
+ }
+
+ /* If we are using registers for parameters, force the
+ function address into a register now. */
+ funexp = ((reg_parm_seen
+ && targetm.small_register_classes_for_mode_p (FUNCTION_MODE))
+ ? force_not_mem (memory_address (FUNCTION_MODE, funexp))
+ : memory_address (FUNCTION_MODE, funexp));
+ }
else
{
/* funexp could be a SYMBOL_REF represents a function pointer which is
@@ -202,7 +261,7 @@ prepare_call_address (tree fndecl_or_type, rtx fun
if (GET_MODE (funexp) != Pmode)
funexp = convert_memory_address (Pmode, funexp);
- if (! sibcallp)
+ if (!(flags & ECF_SIBCALL))
{
if (!NO_FUNCTION_CSE && optimize && ! flag_no_function_cse)
funexp = force_reg (Pmode, funexp);
@@ -220,7 +279,10 @@ prepare_call_address (tree fndecl_or_type, rtx fun
emit_move_insn (chain, static_chain_value);
if (REG_P (chain))
- use_reg (call_fusage, chain);
+ {
+ use_reg (call_fusage, chain);
+ STATIC_CHAIN_REG_P (chain) = 1;
+ }
}
return funexp;
@@ -806,11 +868,13 @@ call_expr_flags (const_tree t)
flags = internal_fn_flags (CALL_EXPR_IFN (t));
else
{
- t = TREE_TYPE (CALL_EXPR_FN (t));
- if (t && TREE_CODE (t) == POINTER_TYPE)
- flags = flags_from_decl_or_type (TREE_TYPE (t));
+ tree type = TREE_TYPE (CALL_EXPR_FN (t));
+ if (type && TREE_CODE (type) == POINTER_TYPE)
+ flags = flags_from_decl_or_type (TREE_TYPE (type));
else
flags = 0;
+ if (CALL_EXPR_BY_DESCRIPTOR (t))
+ flags |= ECF_BY_DESCRIPTOR;
}
return flags;
@@ -2647,6 +2711,8 @@ expand_call (tree exp, rtx target, int ignore)
{
fntype = TREE_TYPE (TREE_TYPE (addr));
flags |= flags_from_decl_or_type (fntype);
+ if (CALL_EXPR_BY_DESCRIPTOR (exp))
+ flags |= ECF_BY_DESCRIPTOR;
}
rettype = TREE_TYPE (exp);
@@ -3358,6 +3424,13 @@ expand_call (tree exp, rtx target, int ignore)
if (STRICT_ALIGNMENT)
store_unaligned_arguments_into_pseudos (args, num_actuals);
+ /* Prepare the address of the call. This must be done before any
+ register parameters is loaded for find_first_parameter_load to
+ work properly in the presence of descriptors. */
+ funexp = prepare_call_address (fndecl ? fndecl : fntype, funexp,
+ static_chain_value, &call_fusage,
+ reg_parm_seen, flags);
+
/* Now store any partially-in-registers parm.
This is the last place a block-move can happen. */
if (reg_parm_seen)
@@ -3468,10 +3541,6 @@ expand_call (tree exp, rtx target, int ignore)
}
after_args = get_last_insn ();
- funexp = prepare_call_address (fndecl ? fndecl : fntype, funexp,
- static_chain_value, &call_fusage,
- reg_parm_seen, pass == 0);
-
load_register_parameters (args, num_actuals, &call_fusage, flags,
pass == 0, &sibcall_failure);
Index: cfgexpand.c
===================================================================
--- cfgexpand.c (revision 239944)
+++ cfgexpand.c (working copy)
@@ -2642,6 +2642,7 @@ expand_call_stmt (gcall *stmt)
else
CALL_FROM_THUNK_P (exp) = gimple_call_from_thunk_p (stmt);
CALL_EXPR_VA_ARG_PACK (exp) = gimple_call_va_arg_pack_p (stmt);
+ CALL_EXPR_BY_DESCRIPTOR (exp) = gimple_call_by_descriptor_p (stmt);
SET_EXPR_LOCATION (exp, gimple_location (stmt));
CALL_WITH_BOUNDS_P (exp) = gimple_call_with_bounds_p (stmt);
Index: common.opt
===================================================================
--- common.opt (revision 239944)
+++ common.opt (working copy)
@@ -2331,6 +2331,11 @@ ftracer
Common Report Var(flag_tracer) Optimization
Perform superblock formation via tail duplication.
+ftrampolines
+Common Report Var(flag_trampolines) Init(0)
+For targets that normally need trampolines for nested functions, always
+generate them instead of using descriptors.
+
; Zero means that floating-point math operations cannot generate a
; (user-visible) trap. This is the case, for example, in nonstop
; IEEE 754 arithmetic.
Index: defaults.h
===================================================================
--- defaults.h (revision 239944)
+++ defaults.h (working copy)
@@ -1072,9 +1072,18 @@ see the files COPYING3 and COPYING.RUNTIME respect
#define CASE_VECTOR_PC_RELATIVE 0
#endif
+/* Force minimum alignment to be able to use the least significant bits
+ for distinguishing descriptor addresses from code addresses. */
+#define FUNCTION_ALIGNMENT(ALIGN) \
+ (lang_hooks.custom_function_descriptors \
+ && targetm.calls.custom_function_descriptors > 0 \
+ ? MAX ((ALIGN), \
+ 2 * targetm.calls.custom_function_descriptors * BITS_PER_UNIT)\
+ : (ALIGN))
+
/* Assume that trampolines need function alignment. */
#ifndef TRAMPOLINE_ALIGNMENT
-#define TRAMPOLINE_ALIGNMENT FUNCTION_BOUNDARY
+#define TRAMPOLINE_ALIGNMENT FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY)
#endif
/* Register mappings for target machines without register windows. */
Index: doc/invoke.texi
===================================================================
--- doc/invoke.texi (revision 239944)
+++ doc/invoke.texi (working copy)
@@ -499,7 +499,7 @@ Objective-C and Objective-C++ Dialects}.
-fverbose-asm -fpack-struct[=@var{n}] @gol
-fleading-underscore -ftls-model=@var{model} @gol
-fstack-reuse=@var{reuse_level} @gol
--ftrapv -fwrapv @gol
+-ftrampolines -ftrapv -fwrapv @gol
-fvisibility=@r{[}default@r{|}internal@r{|}hidden@r{|}protected@r{]} @gol
-fstrict-volatile-bitfields -fsync-libcalls}
@@ -11698,6 +11698,28 @@ unit, or if @option{-fpic} is not given on the com
The default without @option{-fpic} is @samp{initial-exec}; with
@option{-fpic} the default is @samp{global-dynamic}.
+@item -ftrampolines
+@opindex ftrampolines
+For targets that normally need trampolines for nested functions, always
+generate them instead of using descriptors. Otherwise, for targets that
+do not need them, like for example HP-PA or IA-64, do nothing.
+
+A trampoline is a small piece of code that is created at run time on the
+stack when the address of a nested function is taken, and is used to call
+the nested function indirectly. Therefore, it requires the stack to be
+made executable in order for the program to work properly.
+
+@option{-fno-trampolines} is enabled by default on a language by language
+basis to let the compiler avoid generating them, if it computes that this
+is safe, and replace them with descriptors. Descriptors are made up of data
+only, but the generated code must be prepared to deal with them. As of this
+writing, @option{-fno-trampolines} is enabled by default only for Ada.
+
+Moreover, code compiled with @option{-ftrampolines} and code compiled with
+@option{-fno-trampolines} are not binary compatible if nested functions are
+present. This option must therefore be used on a program-wide basis and be
+manipulated with extreme care.
+
@item -fvisibility=@r{[}default@r{|}internal@r{|}hidden@r{|}protected@r{]}
@opindex fvisibility
Set the default ELF image symbol visibility to the specified option---all
Index: doc/tm.texi
===================================================================
--- doc/tm.texi (revision 239944)
+++ doc/tm.texi (working copy)
@@ -5199,6 +5199,24 @@ be returned; otherwise @var{addr} should be return
If this hook is not defined, @var{addr} will be used for function calls.
@end deftypefn
+@deftypevr {Target Hook} int TARGET_CUSTOM_FUNCTION_DESCRIPTORS
+This hook should be defined to a power of 2 if the target will benefit
+from the use of custom descriptors for nested functions instead of the
+standard trampolines. Such descriptors are created at run time on the
+stack and made up of data only, but they are non-standard so the generated
+code must be prepared to deal with them. This hook should be defined to 0
+if the target uses function descriptors for its standard calling sequence,
+like for example HP-PA or IA-64. Using descriptors for nested functions
+eliminates the need for trampolines that reside on the stack and require
+it to be made executable.
+
+The value of the macro is used to parameterize the run-time identification
+scheme implemented to distinguish descriptors from function addresses: it
+gives the number of bytes by which their address is misaligned compared
+with function addresses. The value of 1 will generally work, unless it is
+already reserved by the target for another purpose, like for example on ARM.
+@end deftypevr
+
Implementing trampolines is difficult on many machines because they have
separate instruction and data caches. Writing into a stack location
fails to clear the memory in the instruction cache, so when the program
Index: doc/tm.texi.in
===================================================================
--- doc/tm.texi.in (revision 239944)
+++ doc/tm.texi.in (working copy)
@@ -3949,6 +3949,8 @@ is used for aligning trampolines.
@hook TARGET_TRAMPOLINE_ADJUST_ADDRESS
+@hook TARGET_CUSTOM_FUNCTION_DESCRIPTORS
+
Implementing trampolines is difficult on many machines because they have
separate instruction and data caches. Writing into a stack location
fails to clear the memory in the instruction cache, so when the program
Index: gimple.c
===================================================================
--- gimple.c (revision 239944)
+++ gimple.c (working copy)
@@ -373,6 +373,7 @@ gimple_build_call_from_tree (tree t)
gimple_call_set_from_thunk (call, CALL_FROM_THUNK_P (t));
gimple_call_set_va_arg_pack (call, CALL_EXPR_VA_ARG_PACK (t));
gimple_call_set_nothrow (call, TREE_NOTHROW (t));
+ gimple_call_set_by_descriptor (call, CALL_EXPR_BY_DESCRIPTOR (t));
gimple_set_no_warning (call, TREE_NO_WARNING (t));
gimple_call_set_with_bounds (call, CALL_WITH_BOUNDS_P (t));
@@ -1386,6 +1387,9 @@ gimple_call_flags (const gimple *stmt)
if (stmt->subcode & GF_CALL_NOTHROW)
flags |= ECF_NOTHROW;
+ if (stmt->subcode & GF_CALL_BY_DESCRIPTOR)
+ flags |= ECF_BY_DESCRIPTOR;
+
return flags;
}
Index: gimple.h
===================================================================
--- gimple.h (revision 239944)
+++ gimple.h (working copy)
@@ -146,6 +146,7 @@ enum gf_mask {
GF_CALL_CTRL_ALTERING = 1 << 7,
GF_CALL_WITH_BOUNDS = 1 << 8,
GF_CALL_MUST_TAIL_CALL = 1 << 9,
+ GF_CALL_BY_DESCRIPTOR = 1 << 10,
GF_OMP_PARALLEL_COMBINED = 1 << 0,
GF_OMP_PARALLEL_GRID_PHONY = 1 << 1,
GF_OMP_TASK_TASKLOOP = 1 << 0,
@@ -3358,6 +3359,26 @@ gimple_call_alloca_for_var_p (gcall *s)
return (s->subcode & GF_CALL_ALLOCA_FOR_VAR) != 0;
}
+/* If BY_DESCRIPTOR_P is true, GIMPLE_CALL S is an indirect call for which
+ pointers to nested function are descriptors instead of trampolines. */
+
+static inline void
+gimple_call_set_by_descriptor (gcall *s, bool by_descriptor_p)
+{
+ if (by_descriptor_p)
+ s->subcode |= GF_CALL_BY_DESCRIPTOR;
+ else
+ s->subcode &= ~GF_CALL_BY_DESCRIPTOR;
+}
+
+/* Return true if S is a by-descriptor call. */
+
+static inline bool
+gimple_call_by_descriptor_p (gcall *s)
+{
+ return (s->subcode & GF_CALL_BY_DESCRIPTOR) != 0;
+}
+
/* Copy all the GF_CALL_* flags from ORIG_CALL to DEST_CALL. */
static inline void
Index: langhooks-def.h
===================================================================
--- langhooks-def.h (revision 239944)
+++ langhooks-def.h (working copy)
@@ -120,6 +120,7 @@ extern bool lhd_omp_mappable_type (tree);
#define LANG_HOOKS_BLOCK_MAY_FALLTHRU hook_bool_const_tree_true
#define LANG_HOOKS_EH_USE_CXA_END_CLEANUP false
#define LANG_HOOKS_DEEP_UNSHARING false
+#define LANG_HOOKS_CUSTOM_FUNCTION_DESCRIPTORS false
#define LANG_HOOKS_RUN_LANG_SELFTESTS lhd_do_nothing
/* Attribute hooks. */
@@ -323,6 +324,7 @@ extern void lhd_end_section (void);
LANG_HOOKS_BLOCK_MAY_FALLTHRU, \
LANG_HOOKS_EH_USE_CXA_END_CLEANUP, \
LANG_HOOKS_DEEP_UNSHARING, \
+ LANG_HOOKS_CUSTOM_FUNCTION_DESCRIPTORS, \
LANG_HOOKS_RUN_LANG_SELFTESTS \
}
Index: langhooks.h
===================================================================
--- langhooks.h (revision 239944)
+++ langhooks.h (working copy)
@@ -510,6 +510,10 @@ struct lang_hooks
gimplification. */
bool deep_unsharing;
+ /* True if this language may use custom descriptors for nested functions
+ instead of trampolines. */
+ bool custom_function_descriptors;
+
/* Run all lang-specific selftests. */
void (*run_lang_selftests) (void);
Index: rtl.h
===================================================================
--- rtl.h (revision 239944)
+++ rtl.h (working copy)
@@ -317,6 +317,7 @@ struct GTY((desc("0"), tag("0"),
1 in a CONCAT is VAL_EXPR_IS_COPIED in var-tracking.c.
1 in a VALUE is SP_BASED_VALUE_P in cselib.c.
1 in a SUBREG generated by LRA for reload insns.
+ 1 in a REG if this is a static chain register.
1 in a CALL for calls instrumented by Pointer Bounds Checker. */
unsigned int jump : 1;
/* In a CODE_LABEL, part of the two-bit alternate entry field.
@@ -2264,6 +2265,10 @@ do { \
: (SIGN) == SRP_SIGNED ? SUBREG_PROMOTED_SIGNED_P (RTX) \
: SUBREG_PROMOTED_UNSIGNED_P (RTX))
+/* True if the REG is the static chain register for some CALL_INSN. */
+#define STATIC_CHAIN_REG_P(RTX) \
+ (RTL_FLAG_CHECK1 ("STATIC_CHAIN_REG_P", (RTX), REG)->jump)
+
/* True if the subreg was generated by LRA for reload insns. Such
subregs are valid only during LRA. */
#define LRA_SUBREG_P(RTX) \
Index: rtlanal.c
===================================================================
--- rtlanal.c (revision 239944)
+++ rtlanal.c (working copy)
@@ -3915,7 +3915,8 @@ find_first_parameter_load (rtx_insn *call_insn, rt
parm.nregs = 0;
for (p = CALL_INSN_FUNCTION_USAGE (call_insn); p; p = XEXP (p, 1))
if (GET_CODE (XEXP (p, 0)) == USE
- && REG_P (XEXP (XEXP (p, 0), 0)))
+ && REG_P (XEXP (XEXP (p, 0), 0))
+ && !STATIC_CHAIN_REG_P (XEXP (XEXP (p, 0), 0)))
{
gcc_assert (REGNO (XEXP (XEXP (p, 0), 0)) < FIRST_PSEUDO_REGISTER);
Index: target.def
===================================================================
--- target.def (revision 239944)
+++ target.def (working copy)
@@ -4774,6 +4774,25 @@ be returned; otherwise @var{addr} should be return
If this hook is not defined, @var{addr} will be used for function calls.",
rtx, (rtx addr), NULL)
+DEFHOOKPOD
+(custom_function_descriptors,
+ "This hook should be defined to a power of 2 if the target will benefit\n\
+from the use of custom descriptors for nested functions instead of the\n\
+standard trampolines. Such descriptors are created at run time on the\n\
+stack and made up of data only, but they are non-standard so the generated\n\
+code must be prepared to deal with them. This hook should be defined to 0\n\
+if the target uses function descriptors for its standard calling sequence,\n\
+like for example HP-PA or IA-64. Using descriptors for nested functions\n\
+eliminates the need for trampolines that reside on the stack and require\n\
+it to be made executable.\n\
+\n\
+The value of the macro is used to parameterize the run-time identification\n\
+scheme implemented to distinguish descriptors from function addresses: it\n\
+gives the number of bytes by which their address is misaligned compared\n\
+with function addresses. The value of 1 will generally work, unless it is\n\
+already reserved by the target for another purpose, like for example on ARM.",\
+ int, -1)
+
/* Return the number of bytes of its own arguments that a function
pops on returning, or 0 if the function pops no arguments and the
caller must therefore pop them all after the function returns. */
Index: tree-core.h
===================================================================
--- tree-core.h (revision 239944)
+++ tree-core.h (working copy)
@@ -90,6 +90,9 @@ struct die_struct;
/* Nonzero if this call is into the transaction runtime library. */
#define ECF_TM_BUILTIN (1 << 13)
+/* Nonzero if this is an indirect call by descriptor. */
+#define ECF_BY_DESCRIPTOR (1 << 14)
+
/* Call argument flags. */
/* Nonzero if the argument is not dereferenced recursively, thus only
directly reachable memory is read or written. */
@@ -1247,6 +1250,12 @@ struct GTY(()) tree_base {
REF_REVERSE_STORAGE_ORDER in
BIT_FIELD_REF, MEM_REF
+
+ FUNC_ADDR_BY_DESCRIPTOR in
+ ADDR_EXPR
+
+ CALL_EXPR_BY_DESCRIPTOR in
+ CALL_EXPR
*/
struct GTY(()) tree_typed {
Index: tree-nested.c
===================================================================
--- tree-nested.c (revision 239944)
+++ tree-nested.c (working copy)
@@ -21,6 +21,7 @@
#include "system.h"
#include "coretypes.h"
#include "backend.h"
+#include "target.h"
#include "rtl.h"
#include "tree.h"
#include "gimple.h"
@@ -103,6 +104,7 @@ struct nesting_info
bool any_parm_remapped;
bool any_tramp_created;
+ bool any_descr_created;
char static_chain_added;
};
@@ -486,13 +488,39 @@ get_trampoline_type (struct nesting_info *info)
return trampoline_type;
}
-/* Given DECL, a nested function, find or create a field in the non-local
- frame structure for a trampoline for this function. */
+/* Build or return the type used to represent a nested function descriptor. */
+static GTY(()) tree descriptor_type;
+
static tree
-lookup_tramp_for_decl (struct nesting_info *info, tree decl,
- enum insert_option insert)
+get_descriptor_type (struct nesting_info *info)
{
+ tree t;
+
+ if (descriptor_type)
+ return descriptor_type;
+
+ t = build_index_type (integer_one_node);
+ t = build_array_type (ptr_type_node, t);
+ t = build_decl (DECL_SOURCE_LOCATION (info->context),
+ FIELD_DECL, get_identifier ("__data"), t);
+
+ descriptor_type = make_node (RECORD_TYPE);
+ TYPE_NAME (descriptor_type) = get_identifier ("__builtin_descriptor");
+ TYPE_FIELDS (descriptor_type) = t;
+ layout_type (descriptor_type);
+ DECL_CONTEXT (t) = descriptor_type;
+
+ return descriptor_type;
+}
+
+/* Given DECL, a nested function, find or create an element in the
+ var map for this function. */
+
+static tree
+lookup_element_for_decl (struct nesting_info *info, tree decl,
+ enum insert_option insert)
+{
if (insert == NO_INSERT)
{
tree *slot = info->var_map->get (decl);
@@ -501,21 +529,75 @@ static tree
tree *slot = &info->var_map->get_or_insert (decl);
if (!*slot)
- {
- tree field = make_node (FIELD_DECL);
- DECL_NAME (field) = DECL_NAME (decl);
- TREE_TYPE (field) = get_trampoline_type (info);
- TREE_ADDRESSABLE (field) = 1;
+ *slot = build_tree_list (NULL_TREE, NULL_TREE);
- insert_field_into_struct (get_frame_type (info), field);
- *slot = field;
+ return (tree) *slot;
+}
+/* Given DECL, a nested function, create a field in the non-local
+ frame structure for this function. */
+
+static tree
+create_field_for_decl (struct nesting_info *info, tree decl, tree type)
+{
+ tree field = make_node (FIELD_DECL);
+ DECL_NAME (field) = DECL_NAME (decl);
+ TREE_TYPE (field) = type;
+ TREE_ADDRESSABLE (field) = 1;
+ insert_field_into_struct (get_frame_type (info), field);
+ return field;
+}
+
+/* Given DECL, a nested function, find or create a field in the non-local
+ frame structure for a trampoline for this function. */
+
+static tree
+lookup_tramp_for_decl (struct nesting_info *info, tree decl,
+ enum insert_option insert)
+{
+ tree elt, field;
+
+ elt = lookup_element_for_decl (info, decl, insert);
+ if (!elt)
+ return NULL_TREE;
+
+ field = TREE_PURPOSE (elt);
+
+ if (!field && insert == INSERT)
+ {
+ field = create_field_for_decl (info, decl, get_trampoline_type (info));
+ TREE_PURPOSE (elt) = field;
info->any_tramp_created = true;
}
- return *slot;
+ return field;
}
+/* Given DECL, a nested function, find or create a field in the non-local
+ frame structure for a descriptor for this function. */
+
+static tree
+lookup_descr_for_decl (struct nesting_info *info, tree decl,
+ enum insert_option insert)
+{
+ tree elt, field;
+
+ elt = lookup_element_for_decl (info, decl, insert);
+ if (!elt)
+ return NULL_TREE;
+
+ field = TREE_VALUE (elt);
+
+ if (!field && insert == INSERT)
+ {
+ field = create_field_for_decl (info, decl, get_descriptor_type (info));
+ TREE_VALUE (elt) = field;
+ info->any_descr_created = true;
+ }
+
+ return field;
+}
+
/* Build or return the field within the non-local frame state that holds
the non-local goto "jmp_buf". The buffer itself is maintained by the
rtl middle-end as dynamic stack space is allocated. */
@@ -2303,6 +2385,7 @@ convert_tramp_reference_op (tree *tp, int *walk_su
struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
struct nesting_info *const info = (struct nesting_info *) wi->info, *i;
tree t = *tp, decl, target_context, x, builtin;
+ bool descr;
gcall *call;
*walk_subtrees = 0;
@@ -2337,8 +2420,15 @@ convert_tramp_reference_op (tree *tp, int *walk_su
we need to insert the trampoline. */
for (i = info; i->context != target_context; i = i->outer)
continue;
- x = lookup_tramp_for_decl (i, decl, INSERT);
+ /* Decide whether to generate a descriptor or a trampoline. */
+ descr = FUNC_ADDR_BY_DESCRIPTOR (t) && !flag_trampolines;
+
+ if (descr)
+ x = lookup_descr_for_decl (i, decl, INSERT);
+ else
+ x = lookup_tramp_for_decl (i, decl, INSERT);
+
/* Compute the address of the field holding the trampoline. */
x = get_frame_field (info, target_context, x, &wi->gsi);
x = build_addr (x);
@@ -2346,7 +2436,10 @@ convert_tramp_reference_op (tree *tp, int *walk_su
/* Do machine-specific ugliness. Normally this will involve
computing extra alignment, but it can really be anything. */
- builtin = builtin_decl_implicit (BUILT_IN_ADJUST_TRAMPOLINE);
+ if (descr)
+ builtin = builtin_decl_implicit (BUILT_IN_ADJUST_DESCRIPTOR);
+ else
+ builtin = builtin_decl_implicit (BUILT_IN_ADJUST_TRAMPOLINE);
call = gimple_build_call (builtin, 1, x);
x = init_tmp_var_with_call (info, &wi->gsi, call);
@@ -2820,6 +2913,27 @@ fold_mem_refs (tree *const &e, void *data ATTRIBUT
return true;
}
+/* Given DECL, a nested function, build an initialization call for FIELD,
+ the trampoline or descriptor for DECL, using FUNC as the function. */
+
+static gcall *
+build_init_call_stmt (struct nesting_info *info, tree decl, tree field,
+ tree func)
+{
+ tree arg1, arg2, arg3, x;
+
+ gcc_assert (DECL_STATIC_CHAIN (decl));
+ arg3 = build_addr (info->frame_decl);
+
+ arg2 = build_addr (decl);
+
+ x = build3 (COMPONENT_REF, TREE_TYPE (field),
+ info->frame_decl, field, NULL_TREE);
+ arg1 = build_addr (x);
+
+ return gimple_build_call (func, 3, arg1, arg2, arg3);
+}
+
/* Do "everything else" to clean up or complete state collected by the various
walking passes -- create a field to hold the frame base address, lay out the
types and decls, generate code to initialize the frame decl, store critical
@@ -2965,23 +3079,32 @@ finalize_nesting_tree_1 (struct nesting_info *root
struct nesting_info *i;
for (i = root->inner; i ; i = i->next)
{
- tree arg1, arg2, arg3, x, field;
+ tree field, x;
field = lookup_tramp_for_decl (root, i->context, NO_INSERT);
if (!field)
continue;
- gcc_assert (DECL_STATIC_CHAIN (i->context));
- arg3 = build_addr (root->frame_decl);
+ x = builtin_decl_implicit (BUILT_IN_INIT_TRAMPOLINE);
+ stmt = build_init_call_stmt (root, i->context, field, x);
+ gimple_seq_add_stmt (&stmt_list, stmt);
+ }
+ }
- arg2 = build_addr (i->context);
+ /* If descriptors were created, then we need to initialize them. */
+ if (root->any_descr_created)
+ {
+ struct nesting_info *i;
+ for (i = root->inner; i ; i = i->next)
+ {
+ tree field, x;
- x = build3 (COMPONENT_REF, TREE_TYPE (field),
- root->frame_decl, field, NULL_TREE);
- arg1 = build_addr (x);
+ field = lookup_descr_for_decl (root, i->context, NO_INSERT);
+ if (!field)
+ continue;
- x = builtin_decl_implicit (BUILT_IN_INIT_TRAMPOLINE);
- stmt = gimple_build_call (x, 3, arg1, arg2, arg3);
+ x = builtin_decl_implicit (BUILT_IN_INIT_DESCRIPTOR);
+ stmt = build_init_call_stmt (root, i->context, field, x);
gimple_seq_add_stmt (&stmt_list, stmt);
}
}
Index: tree.c
===================================================================
--- tree.c (revision 239944)
+++ tree.c (working copy)
@@ -1019,7 +1019,7 @@ make_node_stat (enum tree_code code MEM_STAT_DECL)
{
if (code == FUNCTION_DECL)
{
- SET_DECL_ALIGN (t, FUNCTION_BOUNDARY);
+ SET_DECL_ALIGN (t, FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY));
DECL_MODE (t) = FUNCTION_MODE;
}
else
@@ -10602,6 +10602,9 @@ build_common_builtin_nodes (void)
BUILT_IN_INIT_HEAP_TRAMPOLINE,
"__builtin_init_heap_trampoline",
ECF_NOTHROW | ECF_LEAF);
+ local_define_builtin ("__builtin_init_descriptor", ftype,
+ BUILT_IN_INIT_DESCRIPTOR,
+ "__builtin_init_descriptor", ECF_NOTHROW | ECF_LEAF);
ftype = build_function_type_list (ptr_type_node, ptr_type_node, NULL_TREE);
local_define_builtin ("__builtin_adjust_trampoline", ftype,
@@ -10608,6 +10611,10 @@ build_common_builtin_nodes (void)
BUILT_IN_ADJUST_TRAMPOLINE,
"__builtin_adjust_trampoline",
ECF_CONST | ECF_NOTHROW);
+ local_define_builtin ("__builtin_adjust_descriptor", ftype,
+ BUILT_IN_ADJUST_DESCRIPTOR,
+ "__builtin_adjust_descriptor",
+ ECF_CONST | ECF_NOTHROW);
ftype = build_function_type_list (void_type_node,
ptr_type_node, ptr_type_node, NULL_TREE);
Index: tree.h
===================================================================
--- tree.h (revision 239944)
+++ tree.h (working copy)
@@ -962,6 +962,16 @@ extern void omp_clause_range_check_failed (const_t
#define REF_REVERSE_STORAGE_ORDER(NODE) \
(TREE_CHECK2 (NODE, BIT_FIELD_REF, MEM_REF)->base.default_def_flag)
+ /* In an ADDR_EXPR, indicates that this is a pointer to nested function
+ represented by a descriptor instead of a trampoline. */
+#define FUNC_ADDR_BY_DESCRIPTOR(NODE) \
+ (TREE_CHECK (NODE, ADDR_EXPR)->base.default_def_flag)
+
+/* In a CALL_EXPR, indicates that this is an indirect call for which
+ pointers to nested function are descriptors instead of trampolines. */
+#define CALL_EXPR_BY_DESCRIPTOR(NODE) \
+ (TREE_CHECK (NODE, CALL_EXPR)->base.default_def_flag)
+
/* These flags are available for each language front end to use internally. */
#define TREE_LANG_FLAG_0(NODE) \
(TREE_NOT_CHECK2 (NODE, TREE_VEC, SSA_NAME)->base.u.bits.lang_flag_0)