On 1/22/21 2:51 PM, Jan Hubicka wrote:
diff --git a/Makefile.in b/Makefile.in
index 247cb9c8711..03785200dc7 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -565,7 +565,7 @@ STAGEprofile_TFLAGS = $(STAGE2_TFLAGS)
  STAGEtrain_CFLAGS = $(filter-out -fchecking=1,$(STAGE3_CFLAGS))
  STAGEtrain_TFLAGS = $(filter-out -fchecking=1,$(STAGE3_TFLAGS))
-STAGEfeedback_CFLAGS = $(STAGE4_CFLAGS) -fprofile-use
+STAGEfeedback_CFLAGS = $(STAGE4_CFLAGS) -fprofile-use 
-fprofile-reproducible=parallel-runs

I would make this go in separately from the feature itself (it is build
machinery change).

All right.

Especially since you say it does not reach
reproducibility anyway until we patch hashtables?

Yep, I'm testing a patch that should improve the reproducible build.

  STAGEfeedback_TFLAGS = $(STAGE4_TFLAGS)
STAGEautoprofile_CFLAGS = $(STAGE2_CFLAGS) -g
diff --git a/gcc/common.opt b/gcc/common.opt
index bde1711870d..a8a2b67a99d 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2248,7 +2248,7 @@ Enum(profile_reproducibility) String(parallel-runs) 
Value(PROFILE_REPRODUCIBILIT
  EnumValue
  Enum(profile_reproducibility) String(multithreaded) 
Value(PROFILE_REPRODUCIBILITY_MULTITHREADED)
-fprofile-reproducible
+fprofile-reproducible=
  Common Joined RejectNegative Var(flag_profile_reproducible) 
Enum(profile_reproducibility) Init(PROFILE_REPRODUCIBILITY_SERIAL)
  -fprofile-reproducible=[serial|parallel-runs|multithreaded]   Control level 
of reproducibility of profile gathered by -fprofile-generate.
diff --git a/gcc/value-prof.c b/gcc/value-prof.c
index 4c916f4994f..8078a9569d7 100644
--- a/gcc/value-prof.c
+++ b/gcc/value-prof.c
@@ -747,8 +747,8 @@ gimple_divmod_fixed_value (gassign *stmt, tree value, 
profile_probability prob,
abs (counters[0]) is the number of executions
     for i in 0 ... TOPN-1
-     counters[2 * i + 1] is target
-     abs (counters[2 * i + 2]) is corresponding hitrate counter.
+     counters[2 * i + 2] is target
+     counters[2 * i + 3] is corresponding hitrate counter.
Value of counters[0] negative when counter became
     full during merging and some values are lost.  */
@@ -766,14 +766,18 @@ get_nth_most_common_value (gimple *stmt, const char 
*counter_type,
    *value = 0;
gcov_type read_all = abs_hwi (hist->hvalue.counters[0]);
+  gcov_type covered = 0;
+  for (unsigned i = 0; i < counters; ++i)
+    covered += hist->hvalue.counters[2 * i + 3];
gcov_type v = hist->hvalue.counters[2 * n + 2];
    gcov_type c = hist->hvalue.counters[2 * n + 3];
if (hist->hvalue.counters[0] < 0
-      && (flag_profile_reproducible == PROFILE_REPRODUCIBILITY_PARALLEL_RUNS
-         || (flag_profile_reproducible
-             == PROFILE_REPRODUCIBILITY_MULTITHREADED)))
+      && flag_profile_reproducible == PROFILE_REPRODUCIBILITY_PARALLEL_RUNS)
+    return false;
+  else if (covered != read_all
+          && flag_profile_reproducible == 
PROFILE_REPRODUCIBILITY_MULTITHREADED)
      return false;

I think it would be useful to add dump messages here for easier
debugging.  Pehraps we could have even some kind of user visible
missed optimization warning?

Yes.

/* Indirect calls can't be verified. */
diff --git a/libgcc/libgcov-driver.c b/libgcc/libgcov-driver.c
index e474e032b54..55df1bafa79 100644
--- a/libgcc/libgcov-driver.c
+++ b/libgcc/libgcov-driver.c
@@ -213,6 +213,63 @@ static struct gcov_fn_buffer *fn_buffer;
  /* Including system dependent components. */
  #include "libgcov-driver-system.c"
+/* Prune TOP N value COUNTERS. It's needed in order to preserve
+   reproducibility of builds.  */
+
+static void
+prune_topn_counter (gcov_type *counters)
+{
+  gcov_type total = counters[0];
+  gcov_type threshold = total / GCOV_TOPN_MAXIMUM_TRACKED_VALUES;
+  gcov_type *nodes = &counters[1];
+
+  struct gcov_kvp **prev = (struct gcov_kvp **)(intptr_t)&counters[2];
+
+  for (struct gcov_kvp *node = *prev; node != NULL; node = node->next)
+    /* Count is small in this run, skip it.  */
+    {
+      if (node->count < threshold)
+       {
+         --(*nodes);
+         /* Skip the node from linked list.  */
+         *prev = node->next;
+       }
+      else
+       prev = &node->next;
+    }
+}

I remember we had issues with streaming running in parallel with
threads.  Can't we get here corruption without atomic updates of nndoes
and the next pointer?

You are right, it can get into an inconsistent state in the pruning code.
I likely tend to drop the pruning path now, it's optional feature.


I also remember that these parlalel updates was pretty bad, because if
you have multithreaded concurent update of very frequent indirect
branch, like in firefox, the likelyness that update happens between
pruning and quite slow stream-out is high.

Yes.


One option would be to do the skipping non-destructivly while streaming
out. Other option would be to have global flag and turn off topn profile
collecting while streaming is in progress.

We have also problem when we merge counters with an existing one on disk.
Then it can be also updated while being merged.

diff --git a/libgcc/libgcov-merge.c b/libgcc/libgcov-merge.c
index 9306e8d688c..35936a8364b 100644
--- a/libgcc/libgcov-merge.c
+++ b/libgcc/libgcov-merge.c
@@ -107,7 +107,9 @@ __gcov_merge_topn (gcov_type *counters, unsigned n_counters)
        gcov_type all = gcov_get_counter_ignore_scaling (-1);
        gcov_type n = gcov_get_counter_ignore_scaling (-1);
- counters[GCOV_TOPN_MEM_COUNTERS * i] += all;
+      unsigned full = all < 0;
+      gcov_type *total = &counters[GCOV_TOPN_MEM_COUNTERS * i];
+      *total += full ? -all : all;

Here I think total will be nefative value if full is true.

No, 'total' comes from memory counters. These can't have total < 0.

for (unsigned j = 0; j < n; j++)
        {
@@ -115,9 +117,12 @@ __gcov_merge_topn (gcov_type *counters, unsigned 
n_counters)
          gcov_type count = gcov_get_counter_ignore_scaling (-1);
// TODO: we should use atomic here
-         gcov_topn_add_value (counters + GCOV_TOPN_MEM_COUNTERS * i, value,
-                              count, 0, 0);
+         full |= gcov_topn_add_value (counters + GCOV_TOPN_MEM_COUNTERS * i,
+                                      value, count, 0, 0);
        }
+
+      if (full)
+       *total = -(*total);
While here you flip it again to positive value if it is still full after
adding new values?

No, as explained in the previous comment.

Is this safe wrt concurent updates of the counters
from parallel thread?

Unfortunately, starting from this point we may set total to a negative value 
(until
it's streamed). But training run can still do total++ for each instrumented 
value.

Martin


Honza


Reply via email to