> diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc
> index 7e0e8c66124..8a317d85277 100644
> --- a/gcc/auto-profile.cc
> +++ b/gcc/auto-profile.cc
> @@ -1129,6 +1129,26 @@ afdo_set_bb_count (basic_block bb, const stmt_set 
> &promoted)
>        gimple *stmt = gsi_stmt (gsi);
>        if (gimple_clobber_p (stmt) || is_gimple_debug (stmt))
>          continue;
> +      /* If statements are de-duplicated, we will have same stmt executing 
> from
> +      more than one path (by jumping to same statment).  In this case, the
> +      profile we get will be for multiple paths and would make the annotated
> +      profile wrong.  An example of this is:
> +
> +      if (foo () == 4)
> +        {
> +          bar ();
> +        }
> +      else if (foo () == 5)
> +        {
> +          bar ();
> +        }
> +     In this case, we want to skip the profile count of bar () and calculate
> +     the profile from the edge counts.  In case of LBR/BRBE we are
> +     profiling branches and GIMPLE_CALL is the important statement
> +     here.  */
> +
> +      if (gimple_code (stmt) == GIMPLE_CALL)
> +     continue;

I am not quite sure about this.  With this change you will basically
ignore all samples anotated to calls.  We can deduplciate other
statements, not only calls.  Ignoring all samples annotated with calls
seems to be throwing away good part of useful information, since
pre-inline there are very many of them.  

We can mitigate this particular problem in some cases by deduplicating
early. This would also help inliner.

There are many later optimizations that will inavoidably lead to AFDO
disturption.  We may have something like -fautofdo-collection which will
disable passes that disturbs afdo a lot (like ICF or deduplication), but
I am not sure that makes a lot of sense either...

>             location_t phi_loc
>               = gimple_phi_arg_location_from_edge (phi, tmp_e);
>             count_info info;
> -           if (afdo_source_profile->get_count_info (phi_loc, &info)
> -               && info.count != 0)
> +           if (afdo_source_profile->get_count_info (phi_loc, &info))
>               {
>                 if (info.count > max_count)
>                   max_count = info.count;

So the idea is to not mark BB as anotated if it only has zero executed statement
since deduplication effectively makes the other BB to contain such
statmeent even while it is executed?

> @@ -1217,7 +1236,9 @@ afdo_find_equiv_class (bb_set *annotated_bb)
>         && bb1->loop_father == bb->loop_father)
>       {
>         bb1->aux = bb;
> -       if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
> +       if (bb1->count > bb->count
> +           && !is_bb_annotated (bb, *annotated_bb)
> +           && is_bb_annotated (bb1, *annotated_bb))
>           {
>             bb->count = bb1->count;
>             set_bb_annotated (bb, annotated_bb);
> @@ -1229,7 +1250,9 @@ afdo_find_equiv_class (bb_set *annotated_bb)
>         && bb1->loop_father == bb->loop_father)
>       {
>         bb1->aux = bb;
> -       if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
> +       if (bb1->count > bb->count
> +           && !is_bb_annotated (bb, *annotated_bb)
> +           && is_bb_annotated (bb1, *annotated_bb))

Why these two are necessary? The code identifies pairs of BBs that
should execute same number of times (which is visible in CFG) and
attemtps to fixup the counts.  Perhaps the merging should be smarter,
but if we do not make them executed same number of time, we will only
have more inconsistent profiles...
@@ -1269,10 +1293,14 @@ afdo_propagate_edge (bool is_succ, bb_set *annotated_bb)
>       else
>         total_known_count += AFDO_EINFO (e)->get_count ();
>       num_edge++;
> +     if (is_bb_annotated (is_succ ? e->dest : e->src, *annotated_bb))
> +       num_annotated++;
> +     else
> +       bb_edge_to_annotate = e;
>        }
>  
>      /* Be careful not to annotate block with no successor in special cases.  
> */
> -    if (num_unknown_edge == 0 && total_known_count > bb->count)
> +    if (num_unknown_edge == 0 && total_known_count >= bb->count)
>        {
>       bb->count = total_known_count;
>       if (!is_bb_annotated (bb, *annotated_bb))
> @@ -1281,26 +1309,52 @@ afdo_propagate_edge (bool is_succ, bb_set 
> *annotated_bb)
>        }
>      else if (num_unknown_edge == 1 && is_bb_annotated (bb, *annotated_bb))
>        {
> -     if (bb->count > total_known_count)
> -       {
> -           profile_count new_count = bb->count - total_known_count;
> -           AFDO_EINFO(unknown_edge)->set_count(new_count);
> -           if (num_edge == 1)
> -             {
> -               basic_block succ_or_pred_bb = is_succ ? unknown_edge->dest : 
> unknown_edge->src;
> -               if (new_count > succ_or_pred_bb->count)
> -                 {
> -                   succ_or_pred_bb->count = new_count;
> -                   if (!is_bb_annotated (succ_or_pred_bb, *annotated_bb))
> -                     set_bb_annotated (succ_or_pred_bb, annotated_bb);
> -                 }
> -             }
> -        }
> +     profile_count new_count;
> +     if (bb->count >= total_known_count)
> +       new_count = bb->count - total_known_count;
> else
> -       AFDO_EINFO (unknown_edge)->set_count (profile_count::zero().afdo ());
> +       new_count = profile_count::zero ().afdo ();
> +
> +     AFDO_EINFO (unknown_edge)->set_count (new_count);
> +     if (num_edge - num_annotated == 1
> +         && bb_edge_to_annotate == unknown_edge)
> +       {
> +         basic_block succ_or_pred_bb = is_succ
> +                ? unknown_edge->dest : unknown_edge->src;
> +         unsigned len = is_succ
> +                             ? succ_or_pred_bb->preds->length ()
> +                             : succ_or_pred_bb->succs->length ();
> +         if (len == 1 && new_count >= succ_or_pred_bb->count)
There is single_succ_edge_p and single_pred_edge_p to test this.
> +          {
> +             if (!is_bb_annotated (succ_or_pred_bb, *annotated_bb))
> +               {
> +                 succ_or_pred_bb->count = new_count;
> +                 set_bb_annotated (succ_or_pred_bb, annotated_bb);
> +               }
> +          }
> +       }

However i do not quite follow the old or new logic here.
So if I have only one unknown edge out (or in) from BB and I know
its count, I can determine count of that edge by Kirhoff law.

But then the old code computes number of edges out of the BB
and if it is only one it updates the count of destinating BB.
I think it should be be testing number of in-edgs of the
destinating bb which seems a bug you are fixing.
(and if it is indeed bug, I think we should fix it first before dealing
with deduplication).

But why you need bb_edge_to_annotate variable? 

Honza

Reply via email to