https://gcc.gnu.org/g:aaf55e09b3d97164615e783d98cfa842f382559d
commit r16-1664-gaaf55e09b3d97164615e783d98cfa842f382559d Author: Jan Hubicka <hubi...@ucw.cz> Date: Wed Jun 25 03:01:29 2025 +0200 Add -fauto-profile-inlining this patch adds -fauto-profile-inlining which can be used to control the auto-profile directed inlning. gcc/ChangeLog: * common.opt: (fauto-profile-inlining): New * doc/invoke.texi (-fauto-profile-inlining): Document. * ipa-inline.cc (inline_functions_by_afdo): Check flag_auto_profile. (early_inliner): Also do inline_functions_by_afdo with !flag_early_inlining. Diff: --- gcc/common.opt | 4 ++++ gcc/doc/invoke.texi | 8 +++++++- gcc/ipa-inline.cc | 21 ++++++++++++++++++++- 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/gcc/common.opt b/gcc/common.opt index 0e50305dde8e..a76a6920b54c 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1187,6 +1187,10 @@ Common Joined RejectNegative Var(auto_profile_file) Use sample profile information for call graph node weights. The profile file is specified in the argument. +fauto-profile-inlining +Common Var(flag_auto_profile_inlining) Init(1) Optimization +Perform inlining using auto-profile. + ; -fcheck-bounds causes gcc to generate array bounds checks. ; For C, C++ and ObjC: defaults off. ; For Java: defaults to on. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index a0c6d3d082e6..95790f7bd171 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -573,7 +573,7 @@ Objective-C and Objective-C++ Dialects}. -fmin-function-alignment=[@var{n}] -fno-allocation-dce -fallow-store-data-races -fassociative-math -fauto-profile -fauto-profile[=@var{path}] --fauto-inc-dec -fbranch-probabilities +-fauto-profile-inlining -fauto-inc-dec -fbranch-probabilities -fcaller-saves -fcombine-stack-adjustments -fconserve-stack -ffold-mem-offsets @@ -15502,6 +15502,12 @@ E.g. create_gcov --binary=your_program.unstripped --profile=perf.data \ --gcov=profile.afdo @end smallexample + +@opindex fauto-profile-inlining +@item -fauto-profile-inlining +When auto-profile is available inline all relevant functions which was +inlined in the tran run before reading the profile feedback. This improves +context sensitivity of the profile. Enabled by default. @end table The following options control compiler behavior regarding floating-point diff --git a/gcc/ipa-inline.cc b/gcc/ipa-inline.cc index a960d55b661d..ca605b027dcf 100644 --- a/gcc/ipa-inline.cc +++ b/gcc/ipa-inline.cc @@ -3120,7 +3120,7 @@ early_inline_small_functions (struct cgraph_node *node) static bool inline_functions_by_afdo (struct cgraph_node *node, bool *speculative_calls) { - if (!flag_auto_profile) + if (!flag_auto_profile || !flag_auto_profile_inlining) return false; struct cgraph_edge *e; bool inlined = false; @@ -3320,6 +3320,25 @@ early_inliner (function *fun) fprintf (dump_file, "Iterations: %i\n", iterations); } + /* do AFDO inlining in case it was not done as part of early inlining. */ + if (optimize + && !flag_no_inline + && !flag_early_inlining + && flag_auto_profile_inlining) + { + bool speculative_calls = false; + inlined |= inline_functions_by_afdo (node, &speculative_calls); + if (speculative_calls) + { + cgraph_edge *next; + for (cgraph_edge *e = node->callees; e; e = next) + { + next = e->next_callee; + cgraph_edge::redirect_call_stmt_to_callee (e); + } + } + } + if (inlined) { timevar_push (TV_INTEGRATION);