Hi,
while at beggining of inline_transform, the count of cgraph node may not
make count of the entry block of the cfg. This is because we may have inlined
the function and some of execution count has been subtracted.  For this reason
profile neds to be rescaled that is done by fixup_cfg. Because fixup_cfg however
runs only after inlining we end up mixing up unscaled counts (of the original
body) with scaled counts (of the inlined functions) and get profile mismatches.

This patch fixes it by scaling the body profile first.

Bootstrapped/regtested x86_64-linux, comitted.

Honza

        * ipa-inline-transform.c: Include function.h, cfg.h and basic-block.h
        (mark_all_inlined_calls_cdtor): Fix formating.
        (inline_transform): Rescale profile before inlining.
Index: ipa-inline-transform.c
===================================================================
--- ipa-inline-transform.c      (revision 249096)
+++ ipa-inline-transform.c      (working copy)
@@ -44,6 +44,9 @@ along with GCC; see the file COPYING3.
 #include "ipa-fnsummary.h"
 #include "ipa-inline.h"
 #include "tree-inline.h"
+#include "function.h"
+#include "cfg.h"
+#include "basic-block.h"
 
 int ncalls_inlined;
 int nfunctions_inlined;
@@ -276,7 +279,7 @@ mark_all_inlined_calls_cdtor (cgraph_nod
     {
       cs->in_polymorphic_cdtor = true;
       if (!cs->inline_failed)
-    mark_all_inlined_calls_cdtor (cs->callee);
+       mark_all_inlined_calls_cdtor (cs->callee);
     }
   for (cgraph_edge *cs = node->indirect_calls; cs; cs = cs->next_callee)
     cs->in_polymorphic_cdtor = true;
@@ -661,7 +664,37 @@ inline_transform (struct cgraph_node *no
 
   timevar_push (TV_INTEGRATION);
   if (node->callees && (opt_for_fn (node->decl, optimize) || has_inline))
-    todo = optimize_inline_calls (current_function_decl);
+    {
+      profile_count num = node->count;
+      profile_count den = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
+      bool scale = num.initialized_p ()
+                  && (den > 0 || num == profile_count::zero ())
+                  && !(num == den);
+      if (scale)
+       {
+         if (dump_file)
+           {
+             fprintf (dump_file, "Applying count scale ");
+             num.dump (dump_file);
+             fprintf (dump_file, "/");
+             den.dump (dump_file);
+             fprintf (dump_file, "\n");
+           }
+
+         basic_block bb;
+         FOR_ALL_BB_FN (bb, cfun)
+           {
+             bb->count = bb->count.apply_scale (num, den);
+       
+             edge e;
+             edge_iterator ei;
+             FOR_EACH_EDGE (e, ei, bb->succs)
+               e->count = e->count.apply_scale (num, den);
+           }
+         ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = node->count;
+       }
+      todo = optimize_inline_calls (current_function_decl);
+   }
   timevar_pop (TV_INTEGRATION);
 
   cfun->always_inline_functions_inlined = true;

Reply via email to