Hi. As Honza noticed in the PR, we are quite strict about TOP N counter invalidation due to multiple values that can't fit in a counter. We due it in order to have a reproducible builds. I guess we should do a compromise in between reproducibility and possible speed up. That's why I'm suggesting to invalidate a TOP N counter only if param_profile_topn_invalid_threshold percent of profile are missing.
Patch can bootstrap on x86_64-linux-gnu and survives regression tests. Ready to be installed? Thanks, Martin gcc/ChangeLog: 2020-01-06 Martin Liska <mli...@suse.cz> PR tree-optimization/92924 * params.opt (param_profile_topn_invalid_threshold): New parameter. * profile.c (sort_hist_values): Mark TOP N counter invalid only if significant amount of samples is missing. gcc/testsuite/ChangeLog: 2020-01-06 Martin Liska <mli...@suse.cz> PR tree-optimization/92924 * gcc.dg/tree-prof/pr92924-2.c: New test. * gcc.dg/tree-prof/pr92924.c: New test. libgcc/ChangeLog: 2020-01-06 Martin Liska <mli...@suse.cz> PR tree-optimization/92924 * libgcov-merge.c (merge_topn_values_set): Replace value with lowest count. --- gcc/params.opt | 4 +++ gcc/profile.c | 17 +++++++++-- gcc/testsuite/gcc.dg/tree-prof/pr92924-2.c | 34 ++++++++++++++++++++++ gcc/testsuite/gcc.dg/tree-prof/pr92924.c | 34 ++++++++++++++++++++++ libgcc/libgcov-merge.c | 12 ++++++-- 5 files changed, 95 insertions(+), 6 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-prof/pr92924-2.c create mode 100644 gcc/testsuite/gcc.dg/tree-prof/pr92924.c
diff --git a/gcc/params.opt b/gcc/params.opt index 6f05b29a929..dd4e1546d93 100644 --- a/gcc/params.opt +++ b/gcc/params.opt @@ -756,6 +756,10 @@ The minimum constant stride beyond which we should use prefetch hints for. Common Joined UInteger Var(param_profile_func_internal_id) IntegerRange(0, 1) Param Use internal function id in profile lookup. +-param=profile-topn-invalid-threshold= +Common Joined UInteger Var(param_profile_topn_invalid_threshold) IntegerRange(0, 100) Init(10) Param Optimization +Threshold of missing samples (in percent) that lead to an invalid (and ignored) TOP N counter. + -param=rpo-vn-max-loop-depth= Common Joined UInteger Var(param_rpo_vn_max_loop_depth) Init(7) IntegerRange(2, 65536) Param Maximum depth of a loop nest to fully value-number optimistically. diff --git a/gcc/profile.c b/gcc/profile.c index e124dc6173a..d3479f0e998 100644 --- a/gcc/profile.c +++ b/gcc/profile.c @@ -765,9 +765,20 @@ compute_branch_probabilities (unsigned cfg_checksum, unsigned lineno_checksum) static void sort_hist_values (histogram_value hist) { - /* counters[2] equal to -1 means that all counters are invalidated. */ - if (hist->hvalue.counters[2] == -1) - return; + /* Calculate what number of samples didn't fit into counters. */ + gcov_type sample_total = 0; + for (unsigned i = 0; i < GCOV_TOPN_VALUES; i++) + sample_total += hist->hvalue.counters[2 * i + 2]; + + /* Mark the counter invalid if we miss X% of samples or more. */ + gcc_assert (hist->hvalue.counters[2] != -1); + gcov_type threshold + = (100 - param_profile_topn_invalid_threshold) * hist->hvalue.counters[0]; + if (100 * sample_total <= threshold) + { + hist->hvalue.counters[2] = -1; + return; + } gcc_assert (hist->type == HIST_TYPE_TOPN_VALUES || hist->type == HIST_TYPE_INDIR_CALL); diff --git a/gcc/testsuite/gcc.dg/tree-prof/pr92924-2.c b/gcc/testsuite/gcc.dg/tree-prof/pr92924-2.c new file mode 100644 index 00000000000..d78d73df04c --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-prof/pr92924-2.c @@ -0,0 +1,34 @@ +/* { dg-options "-O2 -fdump-tree-optimized -fdump-ipa-profile-optimized --param=profile-topn-invalid-threshold=1" } */ +unsigned int a[1000]; +unsigned int b = 99; +unsigned int c = 10022; +unsigned int d = 10033; +unsigned int e = 100444; +unsigned int f = 1005555; +int +main () +{ + int i; + unsigned int n; + for (i = 0; i < 1000; i++) + { + a[i]=1000+i; + } + for (i = 0; i < 1000; i++) + { + if (i % 100 == 1) + n = b; + else if (i % 100 == 2) + n = c; + else if (i % 100 == 3) + n = d; + else if (i % 100 == 4) + n = e; + else + n = f; + a[i] /= n; + } + return 0; +} +/* autofdo does not do value profiling so far */ +/* { dg-final-use-not-autofdo { scan-ipa-dump-not "Transformation done: div.mod by constant 1005555" "profile"} } */ diff --git a/gcc/testsuite/gcc.dg/tree-prof/pr92924.c b/gcc/testsuite/gcc.dg/tree-prof/pr92924.c new file mode 100644 index 00000000000..8cee33f3a43 --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-prof/pr92924.c @@ -0,0 +1,34 @@ +/* { dg-options "-O2 -fdump-tree-optimized -fdump-ipa-profile-optimized" } */ +unsigned int a[1000]; +unsigned int b = 99; +unsigned int c = 10022; +unsigned int d = 10033; +unsigned int e = 100444; +unsigned int f = 1005555; +int +main () +{ + int i; + unsigned int n; + for (i = 0; i < 1000; i++) + { + a[i]=1000+i; + } + for (i = 0; i < 1000; i++) + { + if (i % 100 == 1) + n = b; + else if (i % 100 == 2) + n = c; + else if (i % 100 == 3) + n = d; + else if (i % 100 == 4) + n = e; + else + n = f; + a[i] /= n; + } + return 0; +} +/* autofdo does not do value profiling so far */ +/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: div.mod by constant 1005555" "profile"} } */ diff --git a/libgcc/libgcov-merge.c b/libgcc/libgcov-merge.c index b658aec46c6..2c43bf0ec1d 100644 --- a/libgcc/libgcov-merge.c +++ b/libgcc/libgcov-merge.c @@ -130,11 +130,17 @@ merge_topn_values_set (gcov_type *counters) } } - /* We haven't found a slot, bail out. */ if (j == GCOV_TOPN_VALUES) { - counters[1] = -1; - return; + int min = 0; + for (j = 1; j < GCOV_TOPN_VALUES; j++) + if (counters[2 * j + 1] < counters[2 * min + 1]) + min = j; + if (counters[2 * min + 1] < read_counters[2 * i + 1]) + { + counters[2 * min] = read_counters[2 * i]; + counters[2 * min + 1] = read_counters[2 * i + 1]; + } } } }