From: Andi Kleen <a...@linux.intel.com> When instrumenting programs using __fentry__ it is often useful to instrument the function return too. Traditionally this has been done by patching the return address on the stack frame on entry. However this is fairly complicated (trace function has to emulate a stack) and also slow because it causes a branch misprediction on every return.
Add an option to generate call or nop instrumentation for every return instead, including patch sections. This will increase the program size slightly, but can be a lot faster and simpler. This version only instruments true returns, not sibling calls or tail recursion. This matches the semantics of the original stack. gcc/: 2018-11-04 Andi Kleen <a...@linux.intel.com> * config/i386/i386-opts.h (enum instrument_return): Add. * config/i386/i386.c (output_return_instrumentation): Add. (ix86_output_function_return): Call output_return_instrumentation. (ix86_output_call_insn): Call output_return_instrumentation. * config/i386/i386.opt: Add -minstrument-return=. * doc/invoke.texi (-minstrument-return): Document. gcc/testsuite/: 2018-11-04 Andi Kleen <a...@linux.intel.com> * gcc.target/i386/returninst1.c: New test. * gcc.target/i386/returninst2.c: New test. * gcc.target/i386/returninst3.c: New test. --- gcc/config/i386/i386-opts.h | 6 ++++ gcc/config/i386/i386.c | 36 +++++++++++++++++++++ gcc/config/i386/i386.opt | 21 ++++++++++++ gcc/doc/invoke.texi | 14 ++++++++ gcc/testsuite/gcc.target/i386/returninst1.c | 14 ++++++++ gcc/testsuite/gcc.target/i386/returninst2.c | 21 ++++++++++++ gcc/testsuite/gcc.target/i386/returninst3.c | 9 ++++++ 7 files changed, 121 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/returninst1.c create mode 100644 gcc/testsuite/gcc.target/i386/returninst2.c create mode 100644 gcc/testsuite/gcc.target/i386/returninst3.c diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h index 46366cbfa72..35e9413100e 100644 --- a/gcc/config/i386/i386-opts.h +++ b/gcc/config/i386/i386-opts.h @@ -119,4 +119,10 @@ enum indirect_branch { indirect_branch_thunk_extern }; +enum instrument_return { + instrument_return_none = 0, + instrument_return_call, + instrument_return_nop5 +}; + #endif diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index f9ef0b4445b..f7cd94a8139 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -28336,12 +28336,47 @@ ix86_output_indirect_jmp (rtx call_op) return "%!jmp\t%A0"; } +/* Output return instrumentation for current function if needed. */ + +static void +output_return_instrumentation (void) +{ + if (ix86_instrument_return != instrument_return_none + && flag_fentry + && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl)) + { + if (ix86_flag_record_return) + fprintf (asm_out_file, "1:\n"); + switch (ix86_instrument_return) + { + case instrument_return_call: + fprintf (asm_out_file, "\tcall\t__return__\n"); + break; + case instrument_return_nop5: + /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */ + fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n"); + break; + case instrument_return_none: + break; + } + + if (ix86_flag_record_return) + { + fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n"); + fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long"); + fprintf (asm_out_file, "\t.previous\n"); + } + } +} + /* Output function return. CALL_OP is the jump target. Add a REP prefix to RET if LONG_P is true and function return is kept. */ const char * ix86_output_function_return (bool long_p) { + output_return_instrumentation (); + if (cfun->machine->function_return_type != indirect_branch_keep) { char thunk_name[32]; @@ -28454,6 +28489,7 @@ ix86_output_call_insn (rtx_insn *insn, rtx call_op) if (SIBLING_CALL_P (insn)) { + output_return_instrumentation (); if (direct_p) { if (ix86_nopic_noplt_attribute_p (call_op)) diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index e7fbf9b6f99..5925b75244f 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -1063,3 +1063,24 @@ Support WAITPKG built-in functions and code generation. mcldemote Target Report Mask(ISA_CLDEMOTE) Var(ix86_isa_flags2) Save Support CLDEMOTE built-in functions and code generation. + +minstrument-return= +Target Report RejectNegative Joined Enum(instrument_return) Var(ix86_instrument_return) Init(instrument_return_none) +Instrument function exit in instrumented functions with __fentry__. + +Enum +Name(instrument_return) Type(enum instrument_return) +Known choices for return instrumentation with -minstrument-return= + +EnumValue +Enum(instrument_return) String(none) Value(instrument_return_none) + +EnumValue +Enum(instrument_return) String(call) Value(instrument_return_call) + +EnumValue +Enum(instrument_return) String(nop5) Value(instrument_return_nop5) + +mrecord-return +Target Report Var(ix86_flag_record_return) Init(0) +Generate a __return_loc section pointing to all return instrumentation code. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 1743c64582e..939be3e251b 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -1301,6 +1301,7 @@ See RS/6000 and PowerPC Options. -mcmodel=@var{code-model} -mabi=@var{name} -maddress-mode=@var{mode} @gol -m32 -m64 -mx32 -m16 -miamcu -mlarge-data-threshold=@var{num} @gol -msse2avx -mfentry -mrecord-mcount -mnop-mcount -m8bit-idiv @gol +-minstrument-return=@var{type} @gol -mavx256-split-unaligned-load -mavx256-split-unaligned-store @gol -malign-data=@var{type} -mstack-protector-guard=@var{guard} @gol -mstack-protector-guard-reg=@var{reg} @gol @@ -28442,6 +28443,19 @@ the profiling functions as NOPs. This is useful when they should be patched in later dynamically. This is likely only useful together with @option{-mrecord-mcount}. +@item -minstrument-return=@var{type} +@opindex minstrument-return +Instrument function exit in -pg -mfentry instrumented functions with +call to specified function. This only instruments true returns ending +with ret, but not sibling calls ending with jump. Valid types +are @var{none} to not instrument, @var{call} to generate a call to __return__, +or @var{nop5} to generate a 5 byte nop. + +@item -mrecord-return +@itemx -mno-record-return +@opindex mrecord-return +Generate a __return_loc section pointing to all return instrumentation code. + @item -mskip-rax-setup @itemx -mno-skip-rax-setup @opindex mskip-rax-setup diff --git a/gcc/testsuite/gcc.target/i386/returninst1.c b/gcc/testsuite/gcc.target/i386/returninst1.c new file mode 100644 index 00000000000..f970e75a774 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/returninst1.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-pg -mfentry -minstrument-return=call -mrecord-return" } */ +/* { dg-final { scan-assembler "call.*__return__" } } */ +/* { dg-final { scan-assembler "section.*return_loc" } } */ + +int func(int a) +{ + return a+1; +} + +int func2(int a) +{ + return a+1; +} diff --git a/gcc/testsuite/gcc.target/i386/returninst2.c b/gcc/testsuite/gcc.target/i386/returninst2.c new file mode 100644 index 00000000000..716b38556dd --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/returninst2.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-pg -mfentry -minstrument-return=nop5 -mrecord-return" } */ +/* { dg-final { scan-assembler-times "0x0f, 0x1f, 0x44, 0x00, 0x00" 3 } } */ +/* { dg-final { scan-assembler "section.*return_loc" } } */ + +int func(int a) +{ + return a+1; +} + +int func2(int a) +{ + return a+1; +} + +extern void func4(int); + +int func3(int a) +{ + func4(a + 1); +} diff --git a/gcc/testsuite/gcc.target/i386/returninst3.c b/gcc/testsuite/gcc.target/i386/returninst3.c new file mode 100644 index 00000000000..5bbc60e8bd4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/returninst3.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-pg -mfentry -minstrument-return=call" } */ +/* { dg-final { scan-assembler-not "call.*__return__" } } */ + +__attribute__((no_instrument_function)) +int func(int a) +{ + return a+1; +} -- 2.19.1