Hi.

As Honza noticed in the PR, we are quite strict about TOP N
counter invalidation due to multiple values that can't
fit in a counter. We due it in order to have a reproducible
builds. I guess we should do a compromise in between reproducibility
and possible speed up. That's why I'm suggesting to invalidate
a TOP N counter only if param_profile_topn_invalid_threshold percent
of profile are missing.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

gcc/ChangeLog:

2020-01-06  Martin Liska  <mli...@suse.cz>

        PR tree-optimization/92924
        * params.opt (param_profile_topn_invalid_threshold):
        New parameter.
        * profile.c (sort_hist_values): Mark TOP N counter
        invalid only if significant amount of samples
        is missing.

gcc/testsuite/ChangeLog:

2020-01-06  Martin Liska  <mli...@suse.cz>

        PR tree-optimization/92924
        * gcc.dg/tree-prof/pr92924-2.c: New test.
        * gcc.dg/tree-prof/pr92924.c: New test.

libgcc/ChangeLog:

2020-01-06  Martin Liska  <mli...@suse.cz>

        PR tree-optimization/92924
        * libgcov-merge.c (merge_topn_values_set): Replace
        value with lowest count.
---
 gcc/params.opt                             |  4 +++
 gcc/profile.c                              | 17 +++++++++--
 gcc/testsuite/gcc.dg/tree-prof/pr92924-2.c | 34 ++++++++++++++++++++++
 gcc/testsuite/gcc.dg/tree-prof/pr92924.c   | 34 ++++++++++++++++++++++
 libgcc/libgcov-merge.c                     | 12 ++++++--
 5 files changed, 95 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-prof/pr92924-2.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-prof/pr92924.c


diff --git a/gcc/params.opt b/gcc/params.opt
index 6f05b29a929..dd4e1546d93 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -756,6 +756,10 @@ The minimum constant stride beyond which we should use prefetch hints for.
 Common Joined UInteger Var(param_profile_func_internal_id) IntegerRange(0, 1) Param
 Use internal function id in profile lookup.
 
+-param=profile-topn-invalid-threshold=
+Common Joined UInteger Var(param_profile_topn_invalid_threshold) IntegerRange(0, 100) Init(10) Param Optimization
+Threshold of missing samples (in percent) that lead to an invalid (and ignored) TOP N counter.
+
 -param=rpo-vn-max-loop-depth=
 Common Joined UInteger Var(param_rpo_vn_max_loop_depth) Init(7) IntegerRange(2, 65536) Param
 Maximum depth of a loop nest to fully value-number optimistically.
diff --git a/gcc/profile.c b/gcc/profile.c
index e124dc6173a..d3479f0e998 100644
--- a/gcc/profile.c
+++ b/gcc/profile.c
@@ -765,9 +765,20 @@ compute_branch_probabilities (unsigned cfg_checksum, unsigned lineno_checksum)
 static void
 sort_hist_values (histogram_value hist)
 {
-  /* counters[2] equal to -1 means that all counters are invalidated.  */
-  if (hist->hvalue.counters[2] == -1)
-    return;
+  /* Calculate what number of samples didn't fit into counters.  */
+  gcov_type sample_total = 0;
+  for (unsigned i = 0; i < GCOV_TOPN_VALUES; i++)
+    sample_total += hist->hvalue.counters[2 * i + 2];
+
+  /* Mark the counter invalid if we miss X% of samples or more.  */
+  gcc_assert (hist->hvalue.counters[2] != -1);
+  gcov_type threshold
+    = (100 - param_profile_topn_invalid_threshold) * hist->hvalue.counters[0];
+  if (100 * sample_total <= threshold)
+    {
+      hist->hvalue.counters[2] = -1;
+      return;
+    }
 
   gcc_assert (hist->type == HIST_TYPE_TOPN_VALUES
 	      || hist->type == HIST_TYPE_INDIR_CALL);
diff --git a/gcc/testsuite/gcc.dg/tree-prof/pr92924-2.c b/gcc/testsuite/gcc.dg/tree-prof/pr92924-2.c
new file mode 100644
index 00000000000..d78d73df04c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-prof/pr92924-2.c
@@ -0,0 +1,34 @@
+/* { dg-options "-O2 -fdump-tree-optimized -fdump-ipa-profile-optimized --param=profile-topn-invalid-threshold=1" } */
+unsigned int a[1000];
+unsigned int b = 99;
+unsigned int c = 10022;
+unsigned int d = 10033;
+unsigned int e = 100444;
+unsigned int f = 1005555;
+int
+main ()
+{
+  int i;
+  unsigned int n;
+  for (i = 0; i < 1000; i++)
+    {
+	    a[i]=1000+i;
+    }
+  for (i = 0; i < 1000; i++)
+    {
+      if (i % 100 == 1)
+	n = b;
+      else if (i % 100 == 2)
+	n = c;
+      else if (i % 100 == 3)
+	n = d;
+      else if (i % 100 == 4)
+	n = e;
+      else
+	n = f;
+      a[i] /= n;
+    }
+  return 0;
+}
+/* autofdo does not do value profiling so far */
+/* { dg-final-use-not-autofdo { scan-ipa-dump-not "Transformation done: div.mod by constant 1005555" "profile"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-prof/pr92924.c b/gcc/testsuite/gcc.dg/tree-prof/pr92924.c
new file mode 100644
index 00000000000..8cee33f3a43
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-prof/pr92924.c
@@ -0,0 +1,34 @@
+/* { dg-options "-O2 -fdump-tree-optimized -fdump-ipa-profile-optimized" } */
+unsigned int a[1000];
+unsigned int b = 99;
+unsigned int c = 10022;
+unsigned int d = 10033;
+unsigned int e = 100444;
+unsigned int f = 1005555;
+int
+main ()
+{
+  int i;
+  unsigned int n;
+  for (i = 0; i < 1000; i++)
+    {
+	    a[i]=1000+i;
+    }
+  for (i = 0; i < 1000; i++)
+    {
+      if (i % 100 == 1)
+	n = b;
+      else if (i % 100 == 2)
+	n = c;
+      else if (i % 100 == 3)
+	n = d;
+      else if (i % 100 == 4)
+	n = e;
+      else
+	n = f;
+      a[i] /= n;
+    }
+  return 0;
+}
+/* autofdo does not do value profiling so far */
+/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: div.mod by constant 1005555" "profile"} } */
diff --git a/libgcc/libgcov-merge.c b/libgcc/libgcov-merge.c
index b658aec46c6..2c43bf0ec1d 100644
--- a/libgcc/libgcov-merge.c
+++ b/libgcc/libgcov-merge.c
@@ -130,11 +130,17 @@ merge_topn_values_set (gcov_type *counters)
 	    }
 	}
 
-      /* We haven't found a slot, bail out.  */
       if (j == GCOV_TOPN_VALUES)
 	{
-	  counters[1] = -1;
-	  return;
+	  int min = 0;
+	  for (j = 1; j < GCOV_TOPN_VALUES; j++)
+	    if (counters[2 * j + 1] < counters[2 * min + 1])
+	      min = j;
+	  if (counters[2 * min + 1] < read_counters[2 * i + 1])
+	    {
+	      counters[2 * min] = read_counters[2 * i];
+	      counters[2 * min + 1] = read_counters[2 * i + 1];
+	    }
 	}
     }
 }

Reply via email to