https://gcc.gnu.org/g:aaf55e09b3d97164615e783d98cfa842f382559d

commit r16-1664-gaaf55e09b3d97164615e783d98cfa842f382559d
Author: Jan Hubicka <hubi...@ucw.cz>
Date:   Wed Jun 25 03:01:29 2025 +0200

    Add -fauto-profile-inlining
    
    this patch adds -fauto-profile-inlining which can be used to control
    the auto-profile directed inlning.
    
    gcc/ChangeLog:
    
            * common.opt: (fauto-profile-inlining): New
            * doc/invoke.texi (-fauto-profile-inlining): Document.
            * ipa-inline.cc (inline_functions_by_afdo): Check
            flag_auto_profile.
            (early_inliner): Also do inline_functions_by_afdo with
            !flag_early_inlining.

Diff:
---
 gcc/common.opt      |  4 ++++
 gcc/doc/invoke.texi |  8 +++++++-
 gcc/ipa-inline.cc   | 21 ++++++++++++++++++++-
 3 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/gcc/common.opt b/gcc/common.opt
index 0e50305dde8e..a76a6920b54c 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1187,6 +1187,10 @@ Common Joined RejectNegative Var(auto_profile_file)
 Use sample profile information for call graph node weights. The profile
 file is specified in the argument.
 
+fauto-profile-inlining
+Common Var(flag_auto_profile_inlining) Init(1) Optimization
+Perform inlining using auto-profile.
+
 ; -fcheck-bounds causes gcc to generate array bounds checks.
 ; For C, C++ and ObjC: defaults off.
 ; For Java: defaults to on.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index a0c6d3d082e6..95790f7bd171 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -573,7 +573,7 @@ Objective-C and Objective-C++ Dialects}.
 -fmin-function-alignment=[@var{n}]
 -fno-allocation-dce -fallow-store-data-races
 -fassociative-math  -fauto-profile  -fauto-profile[=@var{path}]
--fauto-inc-dec  -fbranch-probabilities
+-fauto-profile-inlining -fauto-inc-dec  -fbranch-probabilities
 -fcaller-saves
 -fcombine-stack-adjustments  -fconserve-stack
 -ffold-mem-offsets
@@ -15502,6 +15502,12 @@ E.g.
 create_gcov --binary=your_program.unstripped --profile=perf.data \
     --gcov=profile.afdo
 @end smallexample
+
+@opindex fauto-profile-inlining
+@item -fauto-profile-inlining
+When auto-profile is available inline all relevant functions which was
+inlined in the tran run before reading the profile feedback.  This improves
+context sensitivity of the profile.  Enabled by default.
 @end table
 
 The following options control compiler behavior regarding floating-point
diff --git a/gcc/ipa-inline.cc b/gcc/ipa-inline.cc
index a960d55b661d..ca605b027dcf 100644
--- a/gcc/ipa-inline.cc
+++ b/gcc/ipa-inline.cc
@@ -3120,7 +3120,7 @@ early_inline_small_functions (struct cgraph_node *node)
 static bool
 inline_functions_by_afdo (struct cgraph_node *node, bool *speculative_calls)
 {
-  if (!flag_auto_profile)
+  if (!flag_auto_profile || !flag_auto_profile_inlining)
     return false;
   struct cgraph_edge *e;
   bool inlined = false;
@@ -3320,6 +3320,25 @@ early_inliner (function *fun)
        fprintf (dump_file, "Iterations: %i\n", iterations);
     }
 
+  /* do AFDO inlining in case it was not done as part of early inlining.  */
+  if (optimize
+      && !flag_no_inline
+      && !flag_early_inlining
+      && flag_auto_profile_inlining)
+    {
+      bool speculative_calls = false;
+      inlined |= inline_functions_by_afdo (node, &speculative_calls);
+      if (speculative_calls)
+       {
+         cgraph_edge *next;
+         for (cgraph_edge *e = node->callees; e; e = next)
+           {
+             next = e->next_callee;
+             cgraph_edge::redirect_call_stmt_to_callee (e);
+           }
+       }
+    }
+
   if (inlined)
     {
       timevar_push (TV_INTEGRATION);

Reply via email to