On Tue, Dec 06, 2016 at 05:02:36PM +0300, Maxim Kuvyrkov wrote: > > On Oct 5, 2016, at 12:45 AM, Maxim Kuvyrkov <[email protected]> > > wrote: > > > > Ideally, I want to improve support for -fprolog-pad=N and > > __attribute__((prolog_pad(N))) to provide functionality to also output pad > > before the function label to address use-cases for s390, sparc, etc (what > > Jose E. Marchesi was referring to). I.e., -fprolog-pad= option would > > accept both -fprolog-pad=N and -fprolog-pad=M,N forms -- issue M nops > > before function label and N nops after function label. Similarly for > > __attribute__((prolog_pad(N,M))). I (or you :-) ) can attempt to implement > > this functionality before stage1 closes, but it should not block this > > initial patch.
Implemented :-] Other changes since v2 are the missing docs added and it is based on the master branch now. I have deliberately not approached the warning topic yet; IMHO the generalisation of my use case is quite valid for any instrumentation: functions part of the instrumentation framework itself might reside in the same source file as some target functions but need to be excluded with an __attribute__((prolog_pad(0))). Who knows what else is legitimate... Suggested by Richard Biener, this feature is no longer language specific; only the __attribute__ syntax is of course limited to C. > >> Changes since the previous version > >> (which in turn was based on Maxim's suggestion): > >> > >> * Document the feature in *.texi > >> > >> * Automatically disable IPA-RA, like normal profiling does. > >> You never know in advance what the code patched in at run time will do. > >> Any optimisation here is potentially wrong. > >> > >> * record a prolog_nop_pad_size value specified on the command line > >> in each function's attributes, so that it survives an LTO pipe. > >> Signed-off-by: Torsten Duwe <[email protected]> diff --git a/gcc/attribs.c b/gcc/attribs.c index e66349a..6ff81a8 100644 --- a/gcc/attribs.c +++ b/gcc/attribs.c @@ -365,6 +365,28 @@ decl_attributes (tree *node, tree attributes, int flags) if (!attributes_initialized) init_attributes (); + /* If we're building NOP pads because of a command line arg, note the size + for LTO builds, unless the attribute has already been overridden. */ + if (TREE_CODE (*node) == FUNCTION_DECL && + prolog_nop_pad_size > 0) + { + tree pp_attr = lookup_attribute ("prolog_pad", attributes); + if (!pp_attr) + { + tree pp_size = build_int_cstu (integer_type_node, prolog_nop_pad_size); + tree pp_entry = build_int_cstu (integer_type_node, prolog_nop_pad_entry); + + attributes = tree_cons (get_identifier ("prolog_pad"), + tree_cons (NULL_TREE, + pp_size, + tree_cons (NULL_TREE, + pp_entry, + NULL_TREE) + ), + attributes); + } + } + /* If this is a function and the user used #pragma GCC optimize, add the options to the attribute((optimize(...))) list. */ if (TREE_CODE (*node) == FUNCTION_DECL && current_optimize_pragma) diff --git a/gcc/c-family/c-attribs.c b/gcc/c-family/c-attribs.c index f5adade..5881d05 100644 --- a/gcc/c-family/c-attribs.c +++ b/gcc/c-family/c-attribs.c @@ -139,6 +139,7 @@ static tree handle_bnd_variable_size_attribute (tree *, tree, tree, int, bool *) static tree handle_bnd_legacy (tree *, tree, tree, int, bool *); static tree handle_bnd_instrument (tree *, tree, tree, int, bool *); static tree handle_fallthrough_attribute (tree *, tree, tree, int, bool *); +static tree handle_prolog_pad_attribute (tree *, tree, tree, int, bool *); /* Table of machine-independent attributes common to all C-like languages. @@ -345,6 +346,8 @@ const struct attribute_spec c_common_attribute_table[] = handle_bnd_instrument, false }, { "fallthrough", 0, 0, false, false, false, handle_fallthrough_attribute, false }, + { "prolog_pad", 1, 2, true, false, false, + handle_prolog_pad_attribute, false }, { NULL, 0, 0, false, false, false, NULL, false } }; @@ -3173,3 +3176,10 @@ handle_fallthrough_attribute (tree *, tree name, tree, int, *no_add_attrs = true; return NULL_TREE; } + +static tree +handle_prolog_pad_attribute (tree *, tree, tree, int, + bool *) +{ + return NULL_TREE; +} diff --git a/gcc/c/c-decl.c b/gcc/c/c-decl.c index db293fe..7f3e558 100644 --- a/gcc/c/c-decl.c +++ b/gcc/c/c-decl.c @@ -2322,6 +2322,34 @@ merge_decls (tree newdecl, tree olddecl, tree newtype, tree oldtype) TREE_ASM_WRITTEN (olddecl) = 0; } + /* Prolog pad size may be set wrongly by a forward declaration; + fix it up by pulling the final value in front. + */ + if (TREE_CODE (newdecl) == FUNCTION_DECL && new_is_definition && + lookup_attribute ("prolog_pad", DECL_ATTRIBUTES (newdecl)) && + lookup_attribute ("prolog_pad", DECL_ATTRIBUTES (olddecl))) + { + tree last_pp_attr = NULL_TREE; + tree *it; + + for (it = &DECL_ATTRIBUTES (newdecl); *it; it = &TREE_CHAIN(*it)) + { + if (IDENTIFIER_LENGTH (get_attribute_name (*it)) != + strlen("prolog_pad")) + continue; + if (strcmp ("prolog_pad", + IDENTIFIER_POINTER (get_attribute_name (*it)))) + continue; + + last_pp_attr = *it; + *it = TREE_CHAIN(last_pp_attr); + TREE_CHAIN(last_pp_attr) = NULL_TREE; + break; + } + DECL_ATTRIBUTES (olddecl) = chainon (last_pp_attr, + DECL_ATTRIBUTES (olddecl)); + } + DECL_ATTRIBUTES (newdecl) = targetm.merge_decl_attributes (olddecl, newdecl); diff --git a/gcc/common.opt b/gcc/common.opt index b350b07..2cb8979 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -163,6 +163,13 @@ bool flag_stack_usage_info = false Variable int flag_debug_asm +; If we should generate NOP pads before each function prologue +Variable +HOST_WIDE_INT prolog_nop_pad_size + +; And how far the asm entry point is into this pad +Variable +HOST_WIDE_INT prolog_nop_pad_entry ; Balance between GNAT encodings and standard DWARF to emit. Variable @@ -2019,6 +2026,10 @@ fprofile-reorder-functions Common Report Var(flag_profile_reorder_functions) Enable function reordering that improves code placement. +fprolog-pad= +Common Joined +Pad NOPs before each function prolog + frandom-seed Common Var(common_deferred_options) Defer diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index a8402e1..c26ec54 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -3072,6 +3072,17 @@ that affect more than one function. This attribute should be used for debugging purposes only. It is not suitable in production code. +@item prolog_pad +@cindex @code{prolog_pad} function attribute +@cindex function entry padded with NOPs +In case the target's text segment can be made writable at run time +by any means, padding the function entry with a number of NOPs can +be used to provide a universal tool for instrumentation. Usually, +prolog padding is enabled globally using the -fprolog-pad= command +line switch, and disabled by the attribute keyword for functions +that are part of the actual instrumentation framework, to easily avoid +an endless recursion. + @item pure @cindex @code{pure} function attribute @cindex functions that have no side effects diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 034ae98..af4f4b0 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -11207,6 +11207,21 @@ of the function name, it is considered to be a match. For C99 and C++ extended identifiers, the function name must be given in UTF-8, not using universal character names. +@item -fprolog-pad=N +@opindex fprolog-pad +Generate a pad of N nops right at the beginning +of each function, which can be used to patch in any desired +instrumentation at run time, provided that the code segment +is writeable. For run time identification, the starting addresses +of these pads, which correspond to their respective functions, +are additionally collected in the @code{__prolog_pads_loc} section +of the resulting binary. + +Note that value of @code{__attribute__((prolog_pad(N)))} takes +precedence over command-line option -fprolog_pad=N. This can be used +to increase the pad size or to remove the pad completely on a single +function. + @end table diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index cdf5f48..65c265c 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -4564,6 +4564,10 @@ will select the smallest suitable mode. This section describes the macros that output function entry (@dfn{prologue}) and exit (@dfn{epilogue}) code. +@deftypefn {Target Hook} void TARGET_ASM_PRINT_PROLOG_PAD (FILE *@var{file}, unsigned HOST_WIDE_INT @var{pad_size}, bool @var{record_p}) +Generate prologue pad +@end deftypefn + @deftypefn {Target Hook} void TARGET_ASM_FUNCTION_PROLOGUE (FILE *@var{file}, HOST_WIDE_INT @var{size}) If defined, a function that outputs the assembler code for entry to a function. The prologue is responsible for setting up the stack frame, diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index bbf53c9..88424ae 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -3648,6 +3648,8 @@ will select the smallest suitable mode. This section describes the macros that output function entry (@dfn{prologue}) and exit (@dfn{epilogue}) code. +@hook TARGET_ASM_PRINT_PROLOG_PAD + @hook TARGET_ASM_FUNCTION_PROLOGUE @hook TARGET_ASM_FUNCTION_END_PROLOGUE diff --git a/gcc/lto/lto-lang.c b/gcc/lto/lto-lang.c index 58f6e0c..6723506 100644 --- a/gcc/lto/lto-lang.c +++ b/gcc/lto/lto-lang.c @@ -50,6 +50,7 @@ static tree handle_sentinel_attribute (tree *, tree, tree, int, bool *); static tree handle_type_generic_attribute (tree *, tree, tree, int, bool *); static tree handle_transaction_pure_attribute (tree *, tree, tree, int, bool *); static tree handle_returns_twice_attribute (tree *, tree, tree, int, bool *); +static tree handle_prolog_pad_attribute (tree *, tree, tree, int, bool *); static tree ignore_attribute (tree *, tree, tree, int, bool *); static tree handle_format_attribute (tree *, tree, tree, int, bool *); @@ -78,6 +79,8 @@ const struct attribute_spec lto_attribute_table[] = handle_nonnull_attribute, false }, { "nothrow", 0, 0, true, false, false, handle_nothrow_attribute, false }, + { "prolog_pad", 1, 2, false, true, true, + handle_prolog_pad_attribute, false }, { "returns_twice", 0, 0, true, false, false, handle_returns_twice_attribute, false }, { "sentinel", 0, 1, false, true, true, @@ -475,6 +478,14 @@ handle_returns_twice_attribute (tree *node, tree ARG_UNUSED (name), return NULL_TREE; } +static tree +handle_prolog_pad_attribute (tree *, tree, tree, int, + bool *) +{ + /* Nothing to be done here. */ + return NULL_TREE; +} + /* Ignore the given attribute. Used when this attribute may be usefully overridden by the target, but is not used generically. */ diff --git a/gcc/opts.c b/gcc/opts.c index c61c367..05cd4bb 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -2080,6 +2080,26 @@ common_handle_option (struct gcc_options *opts, opts->x_flag_ipa_reference = false; break; + case OPT_fprolog_pad_: + { + const char *comma = strchr (arg, ','); + if (comma) + { + prolog_nop_pad_size = atoi (arg); + prolog_nop_pad_entry = atoi (comma + 1); + } + else + { + prolog_nop_pad_size = atoi (arg); + prolog_nop_pad_entry = 0; + } + if (prolog_nop_pad_size < 0 || + prolog_nop_pad_entry < 0 || + prolog_nop_pad_size < prolog_nop_pad_entry) + error ("invalid arguments for %<-fprolog_pad%>"); + } + break; + case OPT_ftree_vectorize: if (!opts_set->x_flag_tree_loop_vectorize) opts->x_flag_tree_loop_vectorize = value; diff --git a/gcc/target.def b/gcc/target.def index ac3470e..487b900 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -288,6 +288,12 @@ hidden, protected or internal visibility as specified by @var{visibility}.", void, (tree decl, int visibility), default_assemble_visibility) +DEFHOOK +(print_prolog_pad, + "Generate prologue pad", + void, (FILE *file, unsigned HOST_WIDE_INT pad_size, bool record_p), + default_print_prolog_pad) + /* Output the assembler code for entry to a function. */ DEFHOOK (function_prologue, diff --git a/gcc/targhooks.c b/gcc/targhooks.c index 5d3e91e..501a501 100644 --- a/gcc/targhooks.c +++ b/gcc/targhooks.c @@ -1617,6 +1617,25 @@ default_compare_by_pieces_branch_ratio (machine_mode) return 1; } +void +default_print_prolog_pad (FILE *file, unsigned HOST_WIDE_INT pad_size, + bool record_p) +{ + if (record_p) + fprintf (file, "1:"); + + unsigned i; + for (i = 0; i < pad_size; ++i) + fprintf (file, "\tnop\n"); + + if (record_p) + { + fprintf (file, "\t.section __prolog_pads_loc, \"a\",@progbits\n"); + fprintf (file, "\t.quad 1b\n"); + fprintf (file, "\t.previous\n"); + } +} + bool default_profile_before_prologue (void) { diff --git a/gcc/targhooks.h b/gcc/targhooks.h index 3a9271f..13c33de 100644 --- a/gcc/targhooks.h +++ b/gcc/targhooks.h @@ -203,6 +203,7 @@ extern bool default_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT, bool); extern int default_compare_by_pieces_branch_ratio (machine_mode); +extern void default_print_prolog_pad (FILE *, unsigned HOST_WIDE_INT , bool); extern bool default_profile_before_prologue (void); extern reg_class_t default_preferred_reload_class (rtx, reg_class_t); extern reg_class_t default_preferred_output_reload_class (rtx, reg_class_t); diff --git a/gcc/toplev.c b/gcc/toplev.c index 5af02ea..d4a46fa 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -1595,8 +1595,10 @@ process_options (void) } /* Do not use IPA optimizations for register allocation if profiler is active + or prolog-pads are inserted for run-time instrumentation or port does not emit prologue and epilogue as RTL. */ - if (profile_flag || !targetm.have_prologue () || !targetm.have_epilogue ()) + if (profile_flag || prolog_nop_pad_size || + !targetm.have_prologue () || !targetm.have_epilogue ()) flag_ipa_ra = 0; /* Enable -Werror=coverage-mismatch when -Werror and -Wno-error diff --git a/gcc/varasm.c b/gcc/varasm.c index f3cd70a..afa0304 100644 --- a/gcc/varasm.c +++ b/gcc/varasm.c @@ -1830,6 +1830,40 @@ assemble_start_function (tree decl, const char *fnname) if (DECL_PRESERVE_P (decl)) targetm.asm_out.mark_decl_preserved (fnname); + unsigned HOST_WIDE_INT pad_size = 0; + unsigned HOST_WIDE_INT pad_entry = 0; + + tree prolog_pad_attr + = lookup_attribute ("prolog_pad", DECL_ATTRIBUTES (decl)); + if (prolog_pad_attr) + { + tree pp_val = TREE_VALUE (prolog_pad_attr); + tree prolog_pad_value1 = TREE_VALUE (pp_val); + + if (tree_fits_uhwi_p (prolog_pad_value1) ) + { + pad_size = tree_to_uhwi (prolog_pad_value1); + } + else + gcc_unreachable (); + + if (list_length (pp_val) > 1) + { + tree prolog_pad_value2 = TREE_VALUE (TREE_CHAIN (pp_val)); + + if (tree_fits_uhwi_p (prolog_pad_value2)) + { + pad_entry = tree_to_uhwi (prolog_pad_value2); + } + else + gcc_unreachable (); + } + } + + /* Emit the prolog padding before the entry label, if any */ + if (pad_entry > 0) + targetm.asm_out.print_prolog_pad (asm_out_file, pad_entry, true); + /* Do any machine/system dependent processing of the function name. */ #ifdef ASM_DECLARE_FUNCTION_NAME ASM_DECLARE_FUNCTION_NAME (asm_out_file, fnname, current_function_decl); @@ -1838,6 +1872,11 @@ assemble_start_function (tree decl, const char *fnname) ASM_OUTPUT_FUNCTION_LABEL (asm_out_file, fnname, current_function_decl); #endif /* ASM_DECLARE_FUNCTION_NAME */ + /* And the padding after the label. Record it if we haven't done so yet */ + if (pad_size > pad_entry) + targetm.asm_out.print_prolog_pad (asm_out_file, pad_size-pad_entry, + (pad_entry == 0)); + if (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (decl))) saw_no_split_stack = true; }
