Hi. So I'm sending first version of the relaxation patch and a demonstration source file that can demonstrate how me merge profile for indirect functions provided on input. There are some demonstrations:
1) rm indir-call.gcda -f ; ./a.out ABCD && gcov-dump -l indir-call.gcda | grep -A2 'indirect_call 9' indir-call.gcda: 01a90000: 18:COUNTERS indirect_call 9 counts indir-call.gcda: 0: 4 51084318 4 1042717606 4 1451412899 4 1149064406 indir-call.gcda: 8: 4 All 4 values will fit in the counter (note that profile read code will divide all values by 4). 2) rm indir-call.gcda -f ; ./a.out AAABB && ./a.out B && ./a.out C && gcov-dump -l indir-call.gcda | grep -A2 'indirect_call 9' indir-call.gcda: 01a90000: 18:COUNTERS indirect_call 9 counts indir-call.gcda: 0: 7 51084318 12 1042717606 12 0 0 0 indir-call.gcda: 8: 0 Here A, B, C have frequency < 5/4. 3) rm indir-call.gcda -f ; ./a.out AABCDE && gcov-dump -l indir-call.gcda | grep -A2 'indirect_call 9' indir-call.gcda: 01a90000: 18:COUNTERS indirect_call 9 counts indir-call.gcda: 0: 6 51084318 7 0 0 0 0 0 indir-call.gcda: 8: 0 Here only A survives (2 * 4 - 1). 4) rm indir-call.gcda -f ; ./a.out AAABB && ./a.out B && gcov-dump -l indir-call.gcda | grep -A2 'indirect_call 9' indir-call.gcda: 01a90000: 18:COUNTERS indirect_call 9 counts indir-call.gcda: 0: 6 51084318 12 1042717606 12 0 0 0 indir-call.gcda: 8: 0 Merging works in between runs properly 5) rm indir-call.gcda -f ; ./a.out AAABB && ./a.out B && ./a.out C && gcov-dump -l indir-call.gcda | grep -A2 'indirect_call 9' indir-call.gcda: 01a90000: 18:COUNTERS indirect_call 9 counts indir-call.gcda: 0: 7 51084318 12 1042717606 12 0 0 0 indir-call.gcda: 8: 0 Same here, where C is discarded due to small count. Can you please Honza take a look and provide a feedback? About the new option: I would like not to invent a bigger complexity. Even now, the merging is quite complex. Martin
>From 0965602cd24d29e6d4442a0e530794e70650328e Mon Sep 17 00:00:00 2001 From: Martin Liska <mli...@suse.cz> Date: Wed, 15 Jan 2020 15:39:55 +0100 Subject: [PATCH] Smart relaxation of TOP N counter. --- gcc/gcov-io.c | 25 +++++++++++++++++ gcc/gcov-io.h | 1 + gcc/profile.c | 9 ++++++- libgcc/libgcov-driver.c | 33 ++++++++++++++++++++++- libgcc/libgcov-merge.c | 57 ++++++++++++++++++++++++--------------- libgcc/libgcov-profiler.c | 27 +++++++++---------- 6 files changed, 113 insertions(+), 39 deletions(-) diff --git a/gcc/gcov-io.c b/gcc/gcov-io.c index 213c413b3a0..0bdc49bc108 100644 --- a/gcc/gcov-io.c +++ b/gcc/gcov-io.c @@ -587,6 +587,31 @@ mangle_path (char const *base) return buffer; } +/* Prune TOP N value COUNTERS. */ + +void +prune_topn_counter (gcov_type *counters, gcov_type all) +{ + if (counters[1] == -1) + return; + + // TODO +#if 0 + fprintf (stderr, "prune_topn_counter for all=%d\n", all); + for (unsigned i = 0; i < GCOV_TOPN_VALUES; i++) + fprintf (stderr, "%d:%d:%d\n", i, counters[2 * i], counters[2 * i + 1]); +#endif + + for (unsigned i = 0; i < GCOV_TOPN_VALUES; i++) + { + if (counters[2 * i + 1] < all) + { + counters[2 * i] = 0; + counters[2 * i + 1] = 0; + } + } +} + /* We need to expose the below function when compiling for gcov-tool. */ #if !IN_LIBGCOV || defined (IN_GCOV_TOOL) diff --git a/gcc/gcov-io.h b/gcc/gcov-io.h index d21a43c4c31..61bb7def23b 100644 --- a/gcc/gcov-io.h +++ b/gcc/gcov-io.h @@ -338,6 +338,7 @@ GCOV_LINKAGE const char *gcov_read_string (void); GCOV_LINKAGE void gcov_sync (gcov_position_t /*base*/, gcov_unsigned_t /*length */); char *mangle_path (char const *base); +void prune_topn_counter (gcov_type *counters, gcov_type all); #if !IN_GCOV /* Available outside gcov */ diff --git a/gcc/profile.c b/gcc/profile.c index 6a2de21c3bd..1a6caf75a61 100644 --- a/gcc/profile.c +++ b/gcc/profile.c @@ -863,7 +863,14 @@ compute_value_histograms (histogram_values values, unsigned cfg_checksum, if (hist->type == HIST_TYPE_TOPN_VALUES || hist->type == HIST_TYPE_INDIR_CALL) - sort_hist_values (hist); + { + /* Each count value is multiplied by GCOV_TOPN_VALUES. */ + if (hist->hvalue.counters[2] != -1) + for (unsigned i = 0; i < GCOV_TOPN_VALUES; i++) + hist->hvalue.counters[2 * i + 2] /= GCOV_TOPN_VALUES; + + sort_hist_values (hist); + } /* Time profiler counter is not related to any statement, so that we have to read the counter and set the value to diff --git a/libgcc/libgcov-driver.c b/libgcc/libgcov-driver.c index 1c07761b7f1..de5e2eea29b 100644 --- a/libgcc/libgcov-driver.c +++ b/libgcc/libgcov-driver.c @@ -213,6 +213,35 @@ static struct gcov_fn_buffer *fn_buffer; /* Including system dependent components. */ #include "libgcov-driver-system.c" +/* Prune counters so that they are ready to store or merge. */ + +static void +prune_counters (struct gcov_info *gi) +{ + for (unsigned i = 0; i < gi->n_functions; i++) + { + const struct gcov_fn_info *gfi = gi->functions[i]; + const struct gcov_ctr_info *ci = gfi->ctrs; + for (unsigned j = 0; j < GCOV_COUNTERS; j++) + { + if (gi->merge[j] == NULL) + continue; + + if (gi->merge[j] == __gcov_merge_topn) + { + gcc_assert (!(ci->num % GCOV_TOPN_VALUES_COUNTERS)); + for (unsigned k = 0; k < (ci->num / GCOV_TOPN_VALUES_COUNTERS); k++) + { + gcov_type *counters + = ci->values + (k * GCOV_TOPN_VALUES_COUNTERS); + prune_topn_counter (counters + 1, *counters); + } + } + ci++; + } + } +} + /* This function merges counters in GI_PTR to an existing gcda file. Return 0 on success. Return -1 on error. In this case, caller will goto read_fatal. */ @@ -429,9 +458,11 @@ dump_one_gcov (struct gcov_info *gi_ptr, struct gcov_filename *gf, struct gcov_summary summary = {}; int error; gcov_unsigned_t tag; - fn_buffer = 0; + /* Prune current counters before we merge them. */ + prune_counters (gi_ptr); + error = gcov_exit_open_gcda_file (gi_ptr, gf); if (error == -1) return; diff --git a/libgcc/libgcov-merge.c b/libgcc/libgcov-merge.c index b658aec46c6..d2898f67a5a 100644 --- a/libgcc/libgcov-merge.c +++ b/libgcc/libgcov-merge.c @@ -92,10 +92,11 @@ merge_topn_values_set (gcov_type *counters) /* First value is number of total executions of the profiler. */ gcov_type all = gcov_get_counter_ignore_scaling (-1); counters[0] += all; + all = counters[0]; ++counters; - /* Read all part values. */ - gcov_type read_counters[2 * GCOV_TOPN_VALUES]; + /* Read all part values. Make the buffer twice bigger. */ + gcov_type read_counters[2 * 2 * GCOV_TOPN_VALUES] = {}; for (unsigned i = 0; i < GCOV_TOPN_VALUES; i++) { @@ -109,32 +110,44 @@ merge_topn_values_set (gcov_type *counters) return; } + /* Merge both couters into read_counter which has sufficient size. */ for (unsigned i = 0; i < GCOV_TOPN_VALUES; i++) + for (unsigned j = 0; j < GCOV_TOPN_VALUES; j++) + { + if (read_counters[2 * j] == counters[2 * i]) + { + read_counters[2 * j + 1] += counters[2 * i + 1]; + break; + } + else if (read_counters[2 * j + 1] == 0) + { + read_counters[2 * j] += counters[2 * i]; + read_counters[2 * j + 1] += counters[2 * i + 1]; + break; + } + } + + /* Prune both halves of the read_counters. */ + prune_topn_counter (read_counters, all); + prune_topn_counter (read_counters + 2 * GCOV_TOPN_VALUES, all); + + /* Copy back merged values. */ + memset (counters, 0, GCOV_TOPN_VALUES_COUNTERS * sizeof (counters[0])); + unsigned index = 0; + for (unsigned i = 0; i < 2 * GCOV_TOPN_VALUES; i++) { - if (read_counters[2 * i + 1] == 0) - return; - - unsigned j; - for (j = 0; j < GCOV_TOPN_VALUES; j++) + if (read_counters[2 * i + 1] > 0) { - if (counters[2 * j] == read_counters[2 * i]) - { - counters[2 * j + 1] += read_counters[2 * i + 1]; - break; - } - else if (counters[2 * j + 1] == 0) + /* We haven't found an empty slot, bail out. */ + if (index == GCOV_TOPN_VALUES) { - counters[2 * j] += read_counters[2 * i]; - counters[2 * j + 1] += read_counters[2 * i + 1]; - break; + counters[1] = -1; + return; } - } - /* We haven't found a slot, bail out. */ - if (j == GCOV_TOPN_VALUES) - { - counters[1] = -1; - return; + counters[2 * index] = read_counters[2 * i]; + counters[2 * index + 1] = read_counters[2 * i + 1]; + index++; } } } diff --git a/libgcc/libgcov-profiler.c b/libgcc/libgcov-profiler.c index 9417904d462..39e3c523250 100644 --- a/libgcc/libgcov-profiler.c +++ b/libgcc/libgcov-profiler.c @@ -119,37 +119,34 @@ __gcov_topn_values_profiler_body (gcov_type *counters, gcov_type value, ++counters; - /* We have GCOV_TOPN_VALUES as we can keep multiple values - next to each other. */ - unsigned sindex = 0; - for (unsigned i = 0; i < GCOV_TOPN_VALUES; i++) { if (value == counters[2 * i]) { if (use_atomic) - __atomic_fetch_add (&counters[2 * i + 1], 1, __ATOMIC_RELAXED); + __atomic_fetch_add (&counters[2 * i + 1], GCOV_TOPN_VALUES, __ATOMIC_RELAXED); else - counters[2 * i + 1]++; + counters[2 * i + 1] += GCOV_TOPN_VALUES; return; } else if (counters[2 * i + 1] == 0) { /* We found an empty slot. */ counters[2 * i] = value; - counters[2 * i + 1] = 1; + counters[2 * i + 1] = GCOV_TOPN_VALUES; return; } - - if (counters[2 * i + 1] < counters[2 * sindex + 1]) - sindex = i; } - /* We haven't found an empty slot, then decrement the smallest. */ - if (use_atomic) - __atomic_fetch_sub (&counters[2 * sindex + 1], 1, __ATOMIC_RELAXED); - else - counters[2 * sindex + 1]--; + /* We haven't found an empty slot, then decrement all + counter values by one. */ + for (unsigned i = 0; i < GCOV_TOPN_VALUES; i++) + { + if (use_atomic) + __atomic_fetch_sub (&counters[2 * i + 1], 1, __ATOMIC_RELAXED); + else + counters[2 * i + 1]--; + } } #ifdef L_gcov_topn_values_profiler -- 2.24.1
/* { dg-options "-O2 -fdump-tree-optimized -fdump-ipa-profile-optimized -fdump-ipa-afdo" } */ static int a (void) { return 10; } static int b (void) { return 0; } static int c (void) { return 2; } static int d (void) { return 22; } static int e (void) { return 222; } static int f (void) { return 2222; } typedef int (*tp) (void); static tp fns[] = {a, b, c, d, e, f}; void setp (int (**ptr) (void), char c) { *ptr = fns[c - 'A']; } int main (int argc, char **argv) { if (argc != 2) return 1; char *str = argv[1]; int (*p) (void); int i; for (int i = 0; i < __builtin_strlen (str); i ++) { setp (&p, str[i]); p (); } return 0; } /* { dg-final-use-not-autofdo { scan-ipa-dump "Indirect call -> direct call.* a1 transformation on insn" "profile"} } */ /* { dg-final-use-autofdo { scan-ipa-dump "Indirect call -> direct call.* a1 transformation on insn" "afdo"} } */ /* { dg-final-use { scan-tree-dump-not "Invalid sum" "optimized"} } */