Hi.

So I'm sending first version of the relaxation patch and a demonstration
source file that can demonstrate how me merge profile for indirect
functions provided on input. There are some demonstrations:

1) rm indir-call.gcda -f ; ./a.out ABCD && gcov-dump -l indir-call.gcda | grep 
-A2 'indirect_call 9'
indir-call.gcda:    01a90000:  18:COUNTERS indirect_call 9 counts
indir-call.gcda:                   0: 4 51084318 4 1042717606 4 1451412899 4 
1149064406
indir-call.gcda:                   8: 4

All 4 values will fit in the counter (note that profile read code will divide 
all values by 4).

2) rm indir-call.gcda -f ; ./a.out AAABB && ./a.out B && ./a.out C && gcov-dump 
-l indir-call.gcda | grep -A2 'indirect_call 9'
indir-call.gcda:    01a90000:  18:COUNTERS indirect_call 9 counts
indir-call.gcda:                   0: 7 51084318 12 1042717606 12 0 0 0
indir-call.gcda:                   8: 0

Here A, B, C have frequency < 5/4.

3) rm indir-call.gcda -f ; ./a.out AABCDE && gcov-dump -l indir-call.gcda | 
grep -A2 'indirect_call 9'
indir-call.gcda:    01a90000:  18:COUNTERS indirect_call 9 counts
indir-call.gcda:                   0: 6 51084318 7 0 0 0 0 0
indir-call.gcda:                   8: 0

Here only A survives (2 * 4 - 1).

4) rm indir-call.gcda -f ; ./a.out AAABB && ./a.out B && gcov-dump -l 
indir-call.gcda | grep -A2 'indirect_call 9'
indir-call.gcda:    01a90000:  18:COUNTERS indirect_call 9 counts
indir-call.gcda:                   0: 6 51084318 12 1042717606 12 0 0 0
indir-call.gcda:                   8: 0

Merging works in between runs properly

5) rm indir-call.gcda -f ; ./a.out AAABB && ./a.out B && ./a.out C && gcov-dump 
-l indir-call.gcda | grep -A2 'indirect_call 9'
indir-call.gcda:    01a90000:  18:COUNTERS indirect_call 9 counts
indir-call.gcda:                   0: 7 51084318 12 1042717606 12 0 0 0
indir-call.gcda:                   8: 0

Same here, where C is discarded due to small count.

Can you please Honza take a look and provide a feedback? About the new option:
I would like not to invent a bigger complexity. Even now, the merging is quite
complex.

Martin

>From 0965602cd24d29e6d4442a0e530794e70650328e Mon Sep 17 00:00:00 2001
From: Martin Liska <mli...@suse.cz>
Date: Wed, 15 Jan 2020 15:39:55 +0100
Subject: [PATCH] Smart relaxation of TOP N counter.

---
 gcc/gcov-io.c             | 25 +++++++++++++++++
 gcc/gcov-io.h             |  1 +
 gcc/profile.c             |  9 ++++++-
 libgcc/libgcov-driver.c   | 33 ++++++++++++++++++++++-
 libgcc/libgcov-merge.c    | 57 ++++++++++++++++++++++++---------------
 libgcc/libgcov-profiler.c | 27 +++++++++----------
 6 files changed, 113 insertions(+), 39 deletions(-)

diff --git a/gcc/gcov-io.c b/gcc/gcov-io.c
index 213c413b3a0..0bdc49bc108 100644
--- a/gcc/gcov-io.c
+++ b/gcc/gcov-io.c
@@ -587,6 +587,31 @@ mangle_path (char const *base)
   return buffer;
 }
 
+/* Prune TOP N value COUNTERS.  */
+
+void
+prune_topn_counter (gcov_type *counters, gcov_type all)
+{
+  if (counters[1] == -1)
+    return;
+
+  // TODO
+#if 0
+  fprintf (stderr, "prune_topn_counter for all=%d\n", all);
+  for (unsigned i = 0; i < GCOV_TOPN_VALUES; i++)
+    fprintf (stderr, "%d:%d:%d\n", i, counters[2 * i], counters[2 * i + 1]);
+#endif
+
+  for (unsigned i = 0; i < GCOV_TOPN_VALUES; i++)
+    {
+      if (counters[2 * i + 1] < all)
+	{
+	  counters[2 * i] = 0;
+	  counters[2 * i + 1] = 0;
+	}
+    }
+}
+
 /* We need to expose the below function when compiling for gcov-tool.  */
 
 #if !IN_LIBGCOV || defined (IN_GCOV_TOOL)
diff --git a/gcc/gcov-io.h b/gcc/gcov-io.h
index d21a43c4c31..61bb7def23b 100644
--- a/gcc/gcov-io.h
+++ b/gcc/gcov-io.h
@@ -338,6 +338,7 @@ GCOV_LINKAGE const char *gcov_read_string (void);
 GCOV_LINKAGE void gcov_sync (gcov_position_t /*base*/,
 			     gcov_unsigned_t /*length */);
 char *mangle_path (char const *base);
+void prune_topn_counter (gcov_type *counters, gcov_type all);
 
 #if !IN_GCOV
 /* Available outside gcov */
diff --git a/gcc/profile.c b/gcc/profile.c
index 6a2de21c3bd..1a6caf75a61 100644
--- a/gcc/profile.c
+++ b/gcc/profile.c
@@ -863,7 +863,14 @@ compute_value_histograms (histogram_values values, unsigned cfg_checksum,
 
       if (hist->type == HIST_TYPE_TOPN_VALUES
 	  || hist->type == HIST_TYPE_INDIR_CALL)
-	sort_hist_values (hist);
+	{
+	  /* Each count value is multiplied by GCOV_TOPN_VALUES.  */
+	  if (hist->hvalue.counters[2] != -1)
+	    for (unsigned i = 0; i < GCOV_TOPN_VALUES; i++)
+	      hist->hvalue.counters[2 * i + 2] /= GCOV_TOPN_VALUES;
+
+	  sort_hist_values (hist);
+	}
 
       /* Time profiler counter is not related to any statement,
          so that we have to read the counter and set the value to
diff --git a/libgcc/libgcov-driver.c b/libgcc/libgcov-driver.c
index 1c07761b7f1..de5e2eea29b 100644
--- a/libgcc/libgcov-driver.c
+++ b/libgcc/libgcov-driver.c
@@ -213,6 +213,35 @@ static struct gcov_fn_buffer *fn_buffer;
 /* Including system dependent components. */
 #include "libgcov-driver-system.c"
 
+/* Prune counters so that they are ready to store or merge.  */
+
+static void
+prune_counters (struct gcov_info *gi)
+{
+  for (unsigned i = 0; i < gi->n_functions; i++)
+    {
+      const struct gcov_fn_info *gfi = gi->functions[i];
+      const struct gcov_ctr_info *ci = gfi->ctrs;
+      for (unsigned j = 0; j < GCOV_COUNTERS; j++)
+	{
+	  if (gi->merge[j] == NULL)
+	    continue;
+
+	  if (gi->merge[j] == __gcov_merge_topn)
+	    {
+	      gcc_assert (!(ci->num % GCOV_TOPN_VALUES_COUNTERS));
+	      for (unsigned k = 0; k < (ci->num / GCOV_TOPN_VALUES_COUNTERS); k++)
+		{
+		  gcov_type *counters
+		    = ci->values + (k * GCOV_TOPN_VALUES_COUNTERS);
+		  prune_topn_counter (counters + 1, *counters);
+		}
+	    }
+	  ci++;
+	}
+    }
+}
+
 /* This function merges counters in GI_PTR to an existing gcda file.
    Return 0 on success.
    Return -1 on error. In this case, caller will goto read_fatal.  */
@@ -429,9 +458,11 @@ dump_one_gcov (struct gcov_info *gi_ptr, struct gcov_filename *gf,
   struct gcov_summary summary = {};
   int error;
   gcov_unsigned_t tag;
-
   fn_buffer = 0;
 
+  /* Prune current counters before we merge them.  */
+  prune_counters (gi_ptr);
+
   error = gcov_exit_open_gcda_file (gi_ptr, gf);
   if (error == -1)
     return;
diff --git a/libgcc/libgcov-merge.c b/libgcc/libgcov-merge.c
index b658aec46c6..d2898f67a5a 100644
--- a/libgcc/libgcov-merge.c
+++ b/libgcc/libgcov-merge.c
@@ -92,10 +92,11 @@ merge_topn_values_set (gcov_type *counters)
   /* First value is number of total executions of the profiler.  */
   gcov_type all = gcov_get_counter_ignore_scaling (-1);
   counters[0] += all;
+  all = counters[0];
   ++counters;
 
-  /* Read all part values.  */
-  gcov_type read_counters[2 * GCOV_TOPN_VALUES];
+  /* Read all part values.  Make the buffer twice bigger.  */
+  gcov_type read_counters[2 * 2 * GCOV_TOPN_VALUES] = {};
 
   for (unsigned i = 0; i < GCOV_TOPN_VALUES; i++)
     {
@@ -109,32 +110,44 @@ merge_topn_values_set (gcov_type *counters)
       return;
     }
 
+  /* Merge both couters into read_counter which has sufficient size.  */
   for (unsigned i = 0; i < GCOV_TOPN_VALUES; i++)
+    for (unsigned j = 0; j < GCOV_TOPN_VALUES; j++)
+      {
+	if (read_counters[2 * j] == counters[2 * i])
+	  {
+	    read_counters[2 * j + 1] += counters[2 * i + 1];
+	    break;
+	  }
+	else if (read_counters[2 * j + 1] == 0)
+	  {
+	    read_counters[2 * j] += counters[2 * i];
+	    read_counters[2 * j + 1] += counters[2 * i + 1];
+	    break;
+	  }
+      }
+
+  /* Prune both halves of the read_counters.  */
+  prune_topn_counter (read_counters, all);
+  prune_topn_counter (read_counters + 2 * GCOV_TOPN_VALUES, all);
+
+  /* Copy back merged values.  */
+  memset (counters, 0, GCOV_TOPN_VALUES_COUNTERS * sizeof (counters[0]));
+  unsigned index = 0;
+  for (unsigned i = 0; i < 2 * GCOV_TOPN_VALUES; i++)
     {
-      if (read_counters[2 * i + 1] == 0)
-	return;
-
-      unsigned j;
-      for (j = 0; j < GCOV_TOPN_VALUES; j++)
+      if (read_counters[2 * i + 1] > 0)
 	{
-	  if (counters[2 * j] == read_counters[2 * i])
-	    {
-	      counters[2 * j + 1] += read_counters[2 * i + 1];
-	      break;
-	    }
-	  else if (counters[2 * j + 1] == 0)
+	  /* We haven't found an empty slot, bail out.  */
+	  if (index == GCOV_TOPN_VALUES)
 	    {
-	      counters[2 * j] += read_counters[2 * i];
-	      counters[2 * j + 1] += read_counters[2 * i + 1];
-	      break;
+	      counters[1] = -1;
+	      return;
 	    }
-	}
 
-      /* We haven't found a slot, bail out.  */
-      if (j == GCOV_TOPN_VALUES)
-	{
-	  counters[1] = -1;
-	  return;
+	  counters[2 * index] = read_counters[2 * i];
+	  counters[2 * index + 1] = read_counters[2 * i + 1];
+	  index++;
 	}
     }
 }
diff --git a/libgcc/libgcov-profiler.c b/libgcc/libgcov-profiler.c
index 9417904d462..39e3c523250 100644
--- a/libgcc/libgcov-profiler.c
+++ b/libgcc/libgcov-profiler.c
@@ -119,37 +119,34 @@ __gcov_topn_values_profiler_body (gcov_type *counters, gcov_type value,
 
   ++counters;
 
-  /* We have GCOV_TOPN_VALUES as we can keep multiple values
-     next to each other.  */
-  unsigned sindex = 0;
-
   for (unsigned i = 0; i < GCOV_TOPN_VALUES; i++)
     {
       if (value == counters[2 * i])
 	{
 	  if (use_atomic)
-	    __atomic_fetch_add (&counters[2 * i + 1], 1, __ATOMIC_RELAXED);
+	    __atomic_fetch_add (&counters[2 * i + 1], GCOV_TOPN_VALUES, __ATOMIC_RELAXED);
 	  else
-	    counters[2 * i + 1]++;
+	    counters[2 * i + 1] += GCOV_TOPN_VALUES;
 	  return;
 	}
       else if (counters[2 * i + 1] == 0)
 	{
 	  /* We found an empty slot.  */
 	  counters[2 * i] = value;
-	  counters[2 * i + 1] = 1;
+	  counters[2 * i + 1] = GCOV_TOPN_VALUES;
 	  return;
 	}
-
-      if (counters[2 * i + 1] < counters[2 * sindex + 1])
-	sindex = i;
     }
 
-  /* We haven't found an empty slot, then decrement the smallest.  */
-  if (use_atomic)
-    __atomic_fetch_sub (&counters[2 * sindex + 1], 1, __ATOMIC_RELAXED);
-  else
-    counters[2 * sindex + 1]--;
+  /* We haven't found an empty slot, then decrement all
+     counter values by one.  */
+  for (unsigned i = 0; i < GCOV_TOPN_VALUES; i++)
+    {
+      if (use_atomic)
+	__atomic_fetch_sub (&counters[2 * i + 1], 1, __ATOMIC_RELAXED);
+      else
+	counters[2 * i + 1]--;
+    }
 }
 
 #ifdef L_gcov_topn_values_profiler
-- 
2.24.1

/* { dg-options "-O2 -fdump-tree-optimized -fdump-ipa-profile-optimized -fdump-ipa-afdo" } */

static int a (void)
{
    return 10;
}

static int b (void)
{
    return 0;
}

static int c (void)
{
    return 2;
}

static int d (void)
{
    return 22;
}

static int e (void)
{
    return 222;
}

static int f (void)
{
    return 2222;
}


typedef int (*tp) (void);
static tp fns[] = {a, b, c, d, e, f};

void setp (int (**ptr) (void), char c)
{  
  *ptr = fns[c - 'A'];
}

int
main (int argc, char **argv)
{
  if (argc != 2)
    return 1;

  char *str = argv[1];
  int (*p) (void);
  int  i;

  for (int i = 0; i < __builtin_strlen (str); i ++)
  {
      setp (&p, str[i]);
      p ();
    }
  
  return 0;
}

/* { dg-final-use-not-autofdo { scan-ipa-dump "Indirect call -> direct call.* a1 transformation on insn" "profile"} } */
/* { dg-final-use-autofdo { scan-ipa-dump "Indirect call -> direct call.* a1 transformation on insn" "afdo"} } */
/* { dg-final-use { scan-tree-dump-not "Invalid sum" "optimized"} } */

Reply via email to