Hi,
this patch fixes sanity checking ICE with FDO bootstrap.
The problem is when ENTRY_BLOCK_PTR count is zero and function is being
inlined we disabled scaling. This is no longer correct because scaling
also involves conversion between local and global profiles.
Bootstrapped/regtested x86_64-linux, comitted.
Honza
PR bootstrap/82832
* ipa-inline-transform.c (update_noncloned_frequencies): Always
scale.
(inline_transform): Likewise.
* predict.c (counts_to_freqs): Remove useless conditional.
* profile-count.h (profile_count::apply_scale): Move sanity check.
* tree-inline.c (copy_bb): Always scale.
(copy_cfg_body): Likewise.
Index: ipa-inline-transform.c
===================================================================
--- ipa-inline-transform.c (revision 254411)
+++ ipa-inline-transform.c (working copy)
@@ -59,7 +59,18 @@ update_noncloned_frequencies (struct cgr
profile_count den)
{
struct cgraph_edge *e;
- bool scale = (num == profile_count::zero () || den > 0);
+
+ /* We always must scale to be sure counters end up compatible.
+ If den is zero, just force it nonzero and hope for reasonable
+ approximation.
+ When num is forced nonzero, also update den, so we do not scale profile
+ to 0. */
+ if (!(num == den)
+ && !(den.force_nonzero () == den))
+ {
+ den = den.force_nonzero ();
+ num = num.force_nonzero ();
+ }
/* We do not want to ignore high loop nest after freq drops to 0. */
if (!freq_scale)
@@ -71,19 +82,16 @@ update_noncloned_frequencies (struct cgr
e->frequency = CGRAPH_FREQ_MAX;
if (!e->inline_failed)
update_noncloned_frequencies (e->callee, freq_scale, num, den);
- if (scale)
- e->count = e->count.apply_scale (num, den);
+ e->count = e->count.apply_scale (num, den);
}
for (e = node->indirect_calls; e; e = e->next_callee)
{
e->frequency = e->frequency * (gcov_type) freq_scale / CGRAPH_FREQ_BASE;
if (e->frequency > CGRAPH_FREQ_MAX)
e->frequency = CGRAPH_FREQ_MAX;
- if (scale)
- e->count = e->count.apply_scale (num, den);
+ e->count = e->count.apply_scale (num, den);
}
- if (scale)
- node->count = node->count.apply_scale (num, den);
+ node->count = node->count.apply_scale (num, den);
}
/* We removed or are going to remove the last call to NODE.
@@ -692,7 +700,10 @@ inline_transform (struct cgraph_node *no
basic_block bb;
FOR_ALL_BB_FN (bb, cfun)
- bb->count = bb->count.apply_scale (num, den);
+ if (num == profile_count::zero ())
+ bb->count = bb->count.global0 ();
+ else
+ bb->count = bb->count.apply_scale (num, den);
ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = node->count;
}
todo = optimize_inline_calls (current_function_decl);
Index: predict.c
===================================================================
--- predict.c (revision 254411)
+++ predict.c (working copy)
@@ -3324,8 +3324,7 @@ counts_to_freqs (void)
basic_block bb;
FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb)
- if (!(bb->count < true_count_max))
- true_count_max = true_count_max.max (bb->count);
+ true_count_max = true_count_max.max (bb->count);
cfun->cfg->count_max = true_count_max;
Index: profile-count.h
===================================================================
--- profile-count.h (revision 254411)
+++ profile-count.h (working copy)
@@ -949,9 +949,9 @@ public:
return num;
if (!initialized_p () || !num.initialized_p () || !den.initialized_p ())
return profile_count::uninitialized ();
- gcc_checking_assert (den.m_val);
if (num == den)
return *this;
+ gcc_checking_assert (den.m_val);
profile_count ret;
uint64_t val;
Index: tree-inline.c
===================================================================
--- tree-inline.c (revision 254411)
+++ tree-inline.c (working copy)
@@ -1770,8 +1770,18 @@ copy_bb (copy_body_data *id, basic_block
basic_block copy_basic_block;
tree decl;
basic_block prev;
- bool scale = !num.initialized_p ()
- || (den.nonzero_p () || num == profile_count::zero ());
+
+ /* We always must scale to be sure counters end up compatible.
+ If den is zero, just force it nonzero and hope for reasonable
+ approximation.
+ When num is forced nonzero, also update den, so we do not scale profile
+ to 0. */