The attached is the revised patch with a warning suggested for cases when CFG matches, but source locations change.
Ok for trunk? thanks, David On Sun, Apr 17, 2011 at 8:36 AM, Jan Hubicka <hubi...@ucw.cz> wrote: >> Hi, in current FDO implementation, the source file version used in >> profile-generate needs to strictly match the version used in >> profile-use -- simple formating changes will invalidate the profile >> data (use of it will lead to compiler error or ICE). There are two >> main problems that lead to the weakness: >> >> 1) the function checksum is computed based on line number and number >> of basic blocks -- this means any line number change will invalidate >> the profile data. In fact, line number should play very minimal role >> in profile matching decision. Another problem is that number of basic >> blocks is also not a good indicator whether CFG matches or not. >> 2) cgraph's pid is used as the key to find the matching profile data >> for a function. If there is any new function added, or if the order of >> the functions changes, the profile data is invalidated. >> >> The attached is a patch from google that improves this. >> 1) function checksum is split into two parts: lineno checksum and cfg >> checksum >> 2) function assembler name is used in profile hashing. > > Well, the overall idea was to make profile intolerant to pretty much any > source > level change, since you never know if even when the CFG shape match, the > actual > probabiliies did not changed. > I however see that during development it is not bad to make compiler grok > the profile as long as it seems consistent. > > I did not looked in detail into the implementation yet, but it seems > sane at first glance. However what about issuing a warning when line number > information change telling user that profile might no longer be accurate > and that he may want to remove it and train again? > > Honza >
Index: tree.c =================================================================== --- tree.c (revision 172693) +++ tree.c (working copy) @@ -8508,14 +8508,12 @@ dump_tree_statistics (void) #define FILE_FUNCTION_FORMAT "_GLOBAL__%s_%s" -/* Generate a crc32 of a string. */ +/* Generate a crc32 of a byte. */ unsigned -crc32_string (unsigned chksum, const char *string) +crc32_byte (unsigned chksum, char byte) { - do - { - unsigned value = *string << 24; + unsigned value = (unsigned) byte << 24; unsigned ix; for (ix = 8; ix--; value <<= 1) @@ -8526,6 +8524,18 @@ crc32_string (unsigned chksum, const cha chksum <<= 1; chksum ^= feedback; } + return chksum; +} + + +/* Generate a crc32 of a string. */ + +unsigned +crc32_string (unsigned chksum, const char *string) +{ + do + { + chksum = crc32_byte (chksum, *string); } while (*string++); return chksum; @@ -8549,8 +8559,10 @@ clean_symbol_name (char *p) *p = '_'; } -/* Generate a name for a special-purpose function function. +/* Generate a name for a special-purpose function. The generated name may need to be unique across the whole link. + Changes to this function may also require corresponding changes to + xstrdup_mask_random. TYPE is some string to identify the purpose of this function to the linker or collect2; it must start with an uppercase letter, one of: Index: tree.h =================================================================== --- tree.h (revision 172693) +++ tree.h (working copy) @@ -4998,6 +4998,7 @@ inlined_function_outer_scope_p (const_tr /* In tree.c */ extern unsigned crc32_string (unsigned, const char *); +extern unsigned crc32_byte (unsigned, char); extern void clean_symbol_name (char *); extern tree get_file_function_name (const char *); extern tree get_callee_fndecl (const_tree); Index: gcov.c =================================================================== --- gcov.c (revision 172693) +++ gcov.c (working copy) @@ -54,6 +54,13 @@ along with Gcov; see the file COPYING3. some places we make use of the knowledge of how profile.c works to select particular algorithms here. */ +/* The code validates that the profile information read in corresponds + to the code currently being compiled. Rather than checking for + identical files, the code below computes a checksum on the CFG + (based on the order of basic blocks and the arcs in the CFG). If + the CFG checksum in the gcda file match the CFG checksum for the + code currently being compiled, the profile data will be used. */ + /* This is the size of the buffer used to read in source file lines. */ #define STRING_SIZE 200 @@ -161,7 +168,8 @@ typedef struct function_info /* Name of function. */ char *name; unsigned ident; - unsigned checksum; + unsigned lineno_checksum; + unsigned cfg_checksum; /* Array of basic blocks. */ block_t *blocks; @@ -809,12 +817,14 @@ read_graph_file (void) if (tag == GCOV_TAG_FUNCTION) { char *function_name; - unsigned ident, checksum, lineno; + unsigned ident, lineno; + unsigned lineno_checksum, cfg_checksum; source_t *src; function_t *probe, *prev; ident = gcov_read_unsigned (); - checksum = gcov_read_unsigned (); + lineno_checksum = gcov_read_unsigned (); + cfg_checksum = gcov_read_unsigned (); function_name = xstrdup (gcov_read_string ()); src = find_source (gcov_read_string ()); lineno = gcov_read_unsigned (); @@ -822,7 +832,8 @@ read_graph_file (void) fn = XCNEW (function_t); fn->name = function_name; fn->ident = ident; - fn->checksum = checksum; + fn->lineno_checksum = lineno_checksum; + fn->cfg_checksum = cfg_checksum; fn->src = src; fn->line = lineno; @@ -1039,6 +1050,7 @@ read_count_file (void) unsigned tag; function_t *fn = NULL; int error = 0; + const char *fn_name; if (!gcov_open (da_file_name, 1)) { @@ -1109,13 +1121,20 @@ read_count_file (void) if (!fn) ; - else if (gcov_read_unsigned () != fn->checksum) + else if (gcov_read_unsigned () != fn->lineno_checksum + || gcov_read_unsigned () != fn->cfg_checksum) { mismatch:; fnotice (stderr, "%s:profile mismatch for '%s'\n", da_file_name, fn->name); goto cleanup; } + + fn_name = gcov_read_string (); + if (strcmp (fn_name, fn->name) != 0) + fnotice (stderr, "%s:profile mismatch:" + " expected function name '%s' but found '%s'\n", + da_file_name, fn->name, fn_name); } else if (tag == GCOV_TAG_FOR_COUNTER (GCOV_COUNTER_ARCS) && fn) { Index: testsuite/gcc.dg/tree-prof/prof-robust-1.c =================================================================== --- testsuite/gcc.dg/tree-prof/prof-robust-1.c (revision 0) +++ testsuite/gcc.dg/tree-prof/prof-robust-1.c (revision 0) @@ -0,0 +1,21 @@ +/* { dg-options "-O2 -w" } */ + +#include <stdio.h> +#include <stdlib.h> + +#ifdef _PROFILE_USE +int foo(int x) { + return 3 * x; +} +#endif + +int x = 1000; + +int main(int argc, char *argv[]) { + int i; + int sum = 0; + for (i = 0; i < x; i++) + sum += i; + printf("%d\n", sum); + return 0; +} Index: testsuite/g++.dg/prof-robust-1.C =================================================================== --- testsuite/g++.dg/prof-robust-1.C (revision 0) +++ testsuite/g++.dg/prof-robust-1.C (revision 0) @@ -0,0 +1,24 @@ +/* { dg-options "-O2 -fno-weak" } */ + +#include <stdio.h> + +namespace { + namespace { + + class MyClass { + public: + void foo() const; + ~MyClass() { foo(); } + }; + + void MyClass::foo() const { printf("Goodbye World\n"); } + + } + + static MyClass variable; + +} + +int main() { + return 0; +} Index: gcov-io.c =================================================================== --- gcov-io.c (revision 172693) +++ gcov-io.c (working copy) @@ -28,6 +28,12 @@ see the files COPYING3 and COPYING.RUNTI /* Routines declared in gcov-io.h. This file should be #included by another source file, after having #included gcov-io.h. */ +/* Redefine these here, rather than using the ones in system.h since + * including system.h leads to conflicting definitions of other + * symbols and macros. */ +#undef MIN +#define MIN(X,Y) ((X) < (Y) ? (X) : (Y)) + #if !IN_GCOV static void gcov_write_block (unsigned); static gcov_unsigned_t *gcov_write_words (unsigned); @@ -228,6 +234,27 @@ gcov_write_block (unsigned size) gcov_var.offset -= size; } +#if IN_LIBGCOV + +/* These functions are guarded by #if to avoid compile time warning. */ + +/* Return the number of words STRING would need including the length + field in the output stream itself. This should be identical to + "alloc" calculation in gcov_write_string(). */ + +GCOV_LINKAGE gcov_unsigned_t +gcov_string_length (const char *string) +{ + gcov_unsigned_t len = (string) ? strlen (string) : 0; + /* + 1 because of the length field. */ + gcov_unsigned_t alloc = 1 + ((len + 4) >> 2); + + /* Can not write a bigger than GCOV_BLOCK_SIZE string yet */ + gcc_assert (alloc < GCOV_BLOCK_SIZE); + return alloc; +} +#endif + /* Allocate space to write BYTES bytes to the gcov file. Return a pointer to those bytes, or NULL on failure. */ @@ -238,13 +265,15 @@ gcov_write_words (unsigned words) gcc_assert (gcov_var.mode < 0); #if IN_LIBGCOV - if (gcov_var.offset >= GCOV_BLOCK_SIZE) + if (gcov_var.offset + words >= GCOV_BLOCK_SIZE) { - gcov_write_block (GCOV_BLOCK_SIZE); + gcov_write_block (MIN (gcov_var.offset, GCOV_BLOCK_SIZE)); if (gcov_var.offset) { - gcc_assert (gcov_var.offset == 1); - memcpy (gcov_var.buffer, gcov_var.buffer + GCOV_BLOCK_SIZE, 4); + gcc_assert (gcov_var.offset < GCOV_BLOCK_SIZE); + memcpy (gcov_var.buffer, + gcov_var.buffer + GCOV_BLOCK_SIZE, + gcov_var.offset << 2); } } #else @@ -285,7 +314,6 @@ gcov_write_counter (gcov_type value) } #endif /* IN_LIBGCOV */ -#if !IN_LIBGCOV /* Write STRING to coverage file. Sets error flag on file error, overflow flag on overflow */ @@ -308,7 +336,7 @@ gcov_write_string (const char *string) buffer[alloc] = 0; memcpy (&buffer[1], string, length); } -#endif + #if !IN_LIBGCOV /* Write a tag TAG and reserve space for the record length. Return a @@ -396,14 +424,15 @@ gcov_read_words (unsigned words) unsigned excess = gcov_var.length - gcov_var.offset; gcc_assert (gcov_var.mode > 0); + gcc_assert (words < GCOV_BLOCK_SIZE); if (excess < words) { gcov_var.start += gcov_var.offset; #if IN_LIBGCOV if (excess) { - gcc_assert (excess == 1); - memcpy (gcov_var.buffer, gcov_var.buffer + gcov_var.offset, 4); + gcc_assert (excess < GCOV_BLOCK_SIZE); + memmove (gcov_var.buffer, gcov_var.buffer + gcov_var.offset, excess * 4); } #else memmove (gcov_var.buffer, gcov_var.buffer + gcov_var.offset, excess * 4); @@ -411,8 +440,7 @@ gcov_read_words (unsigned words) gcov_var.offset = 0; gcov_var.length = excess; #if IN_LIBGCOV - gcc_assert (!gcov_var.length || gcov_var.length == 1); - excess = GCOV_BLOCK_SIZE; + excess = (sizeof (gcov_var.buffer) / sizeof (gcov_var.buffer[0])) - gcov_var.length; #else if (gcov_var.length + words > gcov_var.alloc) gcov_allocate (gcov_var.length + words); @@ -472,7 +500,6 @@ gcov_read_counter (void) buffer, or NULL on empty string. You must copy the string before calling another gcov function. */ -#if !IN_LIBGCOV GCOV_LINKAGE const char * gcov_read_string (void) { @@ -483,7 +510,7 @@ gcov_read_string (void) return (const char *) gcov_read_words (length); } -#endif + GCOV_LINKAGE void gcov_read_summary (struct gcov_summary *summary) Index: gcov-io.h =================================================================== --- gcov-io.h (revision 172693) +++ gcov-io.h (working copy) @@ -101,9 +101,10 @@ see the files COPYING3 and COPYING.RUNTI The basic block graph file contains the following records note: unit function-graph* - unit: header int32:checksum string:source + unit: header int32:checksum int32: string:source function-graph: announce_function basic_blocks {arcs | lines}* - announce_function: header int32:ident int32:checksum + announce_function: header int32:ident + int32:lineno_checksum int32:cfg_checksum string:name string:source int32:lineno basic_block: header int32:flags* arcs: header int32:block_no arc* @@ -132,7 +133,9 @@ see the files COPYING3 and COPYING.RUNTI data: {unit function-data* summary:object summary:program*}* unit: header int32:checksum function-data: announce_function arc_counts - announce_function: header int32:ident int32:checksum + announce_function: header int32:ident + int32:lineno_checksum int32:cfg_checksum + string:name arc_counts: header int64:count* summary: int32:checksum {count-summary}GCOV_COUNTERS count-summary: int32:num int32:runs int64:sum @@ -243,13 +246,16 @@ typedef HOST_WIDEST_INT gcov_type; #define gcov_write_unsigned __gcov_write_unsigned #define gcov_write_counter __gcov_write_counter #define gcov_write_summary __gcov_write_summary +#define gcov_write_string __gcov_write_string +#define gcov_string_length __gcov_string_length #define gcov_read_unsigned __gcov_read_unsigned #define gcov_read_counter __gcov_read_counter +#define gcov_read_string __gcov_read_string #define gcov_read_summary __gcov_read_summary /* Poison these, so they don't accidentally slip in. */ -#pragma GCC poison gcov_write_string gcov_write_tag gcov_write_length -#pragma GCC poison gcov_read_string gcov_sync gcov_time gcov_magic +#pragma GCC poison gcov_write_tag gcov_write_length +#pragma GCC poison gcov_sync gcov_time gcov_magic #ifdef HAVE_GAS_HIDDEN #define ATTRIBUTE_HIDDEN __attribute__ ((__visibility__ ("hidden"))) @@ -294,7 +300,7 @@ typedef HOST_WIDEST_INT gcov_type; file marker -- it is not required to be present. */ #define GCOV_TAG_FUNCTION ((gcov_unsigned_t)0x01000000) -#define GCOV_TAG_FUNCTION_LENGTH (2) +#define GCOV_TAG_FUNCTION_LENGTH (3) #define GCOV_TAG_BLOCKS ((gcov_unsigned_t)0x01410000) #define GCOV_TAG_BLOCKS_LENGTH(NUM) (NUM) #define GCOV_TAG_BLOCKS_NUM(LENGTH) (LENGTH) @@ -411,11 +417,20 @@ struct gcov_summary /* Information about a single function. This uses the trailing array idiom. The number of counters is determined from the counter_mask in gcov_info. We hold an array of function info, so have to - explicitly calculate the correct array stride. */ + explicitly calculate the correct array stride. + + "ident" is no longer used in hash computation. Additionally, + "name" is used in hash computation. This makes the profile data + not compatible across function name changes. + Also, the function pointer is stored for later use for indirect + function call name resolution. + */ struct gcov_fn_info { gcov_unsigned_t ident; /* unique ident of function */ - gcov_unsigned_t checksum; /* function checksum */ + gcov_unsigned_t lineno_checksum; /* function lineo_checksum */ + gcov_unsigned_t cfg_checksum; /* function cfg checksum */ + const char *name; /* name of the function */ unsigned n_ctrs[0]; /* instrumented counters */ }; @@ -543,6 +558,7 @@ static int gcov_is_error (void); GCOV_LINKAGE gcov_unsigned_t gcov_read_unsigned (void) ATTRIBUTE_HIDDEN; GCOV_LINKAGE gcov_type gcov_read_counter (void) ATTRIBUTE_HIDDEN; GCOV_LINKAGE void gcov_read_summary (struct gcov_summary *) ATTRIBUTE_HIDDEN; +GCOV_LINKAGE const char *gcov_read_string (void) ATTRIBUTE_HIDDEN; #if IN_LIBGCOV /* Available only in libgcov */ @@ -554,9 +570,9 @@ GCOV_LINKAGE void gcov_write_summary (gc ATTRIBUTE_HIDDEN; static void gcov_rewrite (void); GCOV_LINKAGE void gcov_seek (gcov_position_t /*position*/) ATTRIBUTE_HIDDEN; +GCOV_LINKAGE gcov_unsigned_t gcov_string_length (const char *) ATTRIBUTE_HIDDEN; #else /* Available outside libgcov */ -GCOV_LINKAGE const char *gcov_read_string (void); GCOV_LINKAGE void gcov_sync (gcov_position_t /*base*/, gcov_unsigned_t /*length */); #endif @@ -564,11 +580,11 @@ GCOV_LINKAGE void gcov_sync (gcov_positi #if !IN_GCOV /* Available outside gcov */ GCOV_LINKAGE void gcov_write_unsigned (gcov_unsigned_t) ATTRIBUTE_HIDDEN; +GCOV_LINKAGE void gcov_write_string (const char *) ATTRIBUTE_HIDDEN; #endif #if !IN_GCOV && !IN_LIBGCOV /* Available only in compiler */ -GCOV_LINKAGE void gcov_write_string (const char *); GCOV_LINKAGE gcov_position_t gcov_write_tag (gcov_unsigned_t); GCOV_LINKAGE void gcov_write_length (gcov_position_t /*position*/); #endif Index: profile.c =================================================================== --- profile.c (revision 172693) +++ profile.c (working copy) @@ -102,13 +102,6 @@ static int total_num_branches; /* Forward declarations. */ static void find_spanning_tree (struct edge_list *); -static unsigned instrument_edges (struct edge_list *); -static void instrument_values (histogram_values); -static void compute_branch_probabilities (void); -static void compute_value_histograms (histogram_values); -static gcov_type * get_exec_counts (void); -static basic_block find_group (basic_block); -static void union_groups (basic_block, basic_block); /* Add edge instrumentation code to the entire insn chain. @@ -233,10 +226,12 @@ instrument_values (histogram_values valu } -/* Computes hybrid profile for all matching entries in da_file. */ +/* Computes hybrid profile for all matching entries in da_file. + + CFG_CHECKSUM is the precomputed checksum for the CFG. */ static gcov_type * -get_exec_counts (void) +get_exec_counts (unsigned cfg_checksum, unsigned lineno_checksum) { unsigned num_edges = 0; basic_block bb; @@ -253,7 +248,8 @@ get_exec_counts (void) num_edges++; } - counts = get_coverage_counts (GCOV_COUNTER_ARCS, num_edges, &profile_info); + counts = get_coverage_counts (GCOV_COUNTER_ARCS, num_edges, cfg_checksum, + lineno_checksum, &profile_info); if (!counts) return NULL; @@ -442,10 +438,12 @@ read_profile_edge_counts (gcov_type *exe } /* Compute the branch probabilities for the various branches. - Annotate them accordingly. */ + Annotate them accordingly. + + CFG_CHECKSUM is the precomputed checksum for the CFG. */ static void -compute_branch_probabilities (void) +compute_branch_probabilities (unsigned cfg_checksum, unsigned lineno_checksum) { basic_block bb; int i; @@ -454,7 +452,7 @@ compute_branch_probabilities (void) int passes; int hist_br_prob[20]; int num_branches; - gcov_type *exec_counts = get_exec_counts (); + gcov_type *exec_counts = get_exec_counts (cfg_checksum, lineno_checksum); int inconsistent = 0; /* Very simple sanity checks so we catch bugs in our profiling code. */ @@ -772,10 +770,13 @@ compute_branch_probabilities (void) } /* Load value histograms values whose description is stored in VALUES array - from .gcda file. */ + from .gcda file. + + CFG_CHECKSUM is the precomputed checksum for the CFG. */ static void -compute_value_histograms (histogram_values values) +compute_value_histograms (histogram_values values, unsigned cfg_checksum, + unsigned lineno_checksum) { unsigned i, j, t, any; unsigned n_histogram_counters[GCOV_N_VALUE_COUNTERS]; @@ -803,7 +804,8 @@ compute_value_histograms (histogram_valu histogram_counts[t] = get_coverage_counts (COUNTER_FOR_HIST_TYPE (t), - n_histogram_counters[t], NULL); + n_histogram_counters[t], cfg_checksum, + lineno_checksum, NULL); if (histogram_counts[t]) any = 1; act_count[t] = histogram_counts[t]; @@ -906,6 +908,7 @@ branch_prob (void) unsigned num_instrumented; struct edge_list *el; histogram_values values = NULL; + unsigned cfg_checksum, lineno_checksum; total_num_times_called++; @@ -1059,11 +1062,19 @@ branch_prob (void) if (dump_file) fprintf (dump_file, "%d ignored edges\n", ignored_edges); + + /* Compute two different checksums. Note that we want to compute + the checksum in only once place, since it depends on the shape + of the control flow which can change during + various transformations. */ + cfg_checksum = coverage_compute_cfg_checksum (); + lineno_checksum = coverage_compute_lineno_checksum (); + /* Write the data from which gcov can reconstruct the basic block graph. */ /* Basic block flags */ - if (coverage_begin_output ()) + if (coverage_begin_output (lineno_checksum, cfg_checksum)) { gcov_position_t offset; @@ -1080,7 +1091,7 @@ branch_prob (void) EXIT_BLOCK_PTR->index = last_basic_block; /* Arcs */ - if (coverage_begin_output ()) + if (coverage_begin_output (lineno_checksum, cfg_checksum)) { gcov_position_t offset; @@ -1121,7 +1132,7 @@ branch_prob (void) } /* Line numbers. */ - if (coverage_begin_output ()) + if (coverage_begin_output (lineno_checksum, cfg_checksum)) { /* Initialize the output. */ output_location (NULL, 0, NULL, NULL); @@ -1176,9 +1187,9 @@ branch_prob (void) if (flag_branch_probabilities) { - compute_branch_probabilities (); + compute_branch_probabilities (cfg_checksum, lineno_checksum); if (flag_profile_values) - compute_value_histograms (values); + compute_value_histograms (values, cfg_checksum, lineno_checksum); } remove_fake_edges (); @@ -1206,7 +1217,7 @@ branch_prob (void) VEC_free (histogram_value, heap, values); free_edge_list (el); - coverage_end_function (); + coverage_end_function (lineno_checksum, cfg_checksum); } /* Union find algorithm implementation for the basic blocks using @@ -1373,4 +1384,3 @@ end_branch_prob (void) } } } - Index: coverage.c =================================================================== --- coverage.c (revision 172693) +++ coverage.c (working copy) @@ -51,13 +51,17 @@ along with GCC; see the file COPYING3. #include "intl.h" #include "filenames.h" +#include "gcov-io.h" #include "gcov-io.c" struct function_list { struct function_list *next; /* next function */ unsigned ident; /* function ident */ - unsigned checksum; /* function checksum */ + unsigned lineno_checksum; /* function lineno checksum */ + unsigned cfg_checksum; /* function cfg checksum */ + const char *name; /* function name */ + tree decl; /* function decl */ unsigned n_ctrs[GCOV_COUNTERS];/* number of counters. */ }; @@ -67,9 +71,11 @@ typedef struct counts_entry /* We hash by */ unsigned ident; unsigned ctr; + char *name; /* Store */ - unsigned checksum; + unsigned lineno_checksum; + unsigned cfg_checksum; gcov_type *counts; struct gcov_ctr_summary summary; @@ -114,14 +120,13 @@ static hashval_t htab_counts_entry_hash static int htab_counts_entry_eq (const void *, const void *); static void htab_counts_entry_del (void *); static void read_counts_file (void); -static unsigned compute_checksum (void); -static unsigned coverage_checksum_string (unsigned, const char *); static tree build_fn_info_type (unsigned); static tree build_fn_info_value (const struct function_list *, tree); static tree build_ctr_info_type (void); static tree build_ctr_info_value (unsigned, tree); static tree build_gcov_info (void); static void create_coverage (void); +static char * xstrdup_mask_random (const char *); /* Return the type node for gcov_type. */ @@ -139,12 +144,21 @@ get_gcov_unsigned_t (void) return lang_hooks.types.type_for_size (32, true); } +/* Return the type node for const char *. */ + +static tree +get_const_string_type (void) +{ + return build_pointer_type + (build_qualified_type (char_type_node, TYPE_QUAL_CONST)); +} + static hashval_t htab_counts_entry_hash (const void *of) { const counts_entry_t *const entry = (const counts_entry_t *) of; - return entry->ident * GCOV_COUNTERS + entry->ctr; + return htab_hash_string (entry->name) + entry->ctr; } static int @@ -153,7 +167,8 @@ htab_counts_entry_eq (const void *of1, c const counts_entry_t *const entry1 = (const counts_entry_t *) of1; const counts_entry_t *const entry2 = (const counts_entry_t *) of2; - return entry1->ident == entry2->ident && entry1->ctr == entry2->ctr; + return (entry1->ctr == entry2->ctr + && strcmp (entry1->name, entry2->name) == 0); } static void @@ -161,6 +176,7 @@ htab_counts_entry_del (void *of) { counts_entry_t *const entry = (counts_entry_t *) of; + free (entry->name); free (entry->counts); free (entry); } @@ -171,11 +187,13 @@ static void read_counts_file (void) { gcov_unsigned_t fn_ident = 0; - gcov_unsigned_t checksum = -1; + char *name = NULL; counts_entry_t *summaried = NULL; unsigned seen_summary = 0; gcov_unsigned_t tag; int is_error = 0; + unsigned lineno_checksum = 0; + unsigned cfg_checksum = 0; if (!gcov_open (da_file_name, 1)) return; @@ -214,8 +232,21 @@ read_counts_file (void) offset = gcov_position (); if (tag == GCOV_TAG_FUNCTION) { + const char *nm; fn_ident = gcov_read_unsigned (); - checksum = gcov_read_unsigned (); + lineno_checksum = gcov_read_unsigned (); + cfg_checksum = gcov_read_unsigned (); + nm = gcov_read_string(); + if (!nm) + { + /* Error. Die! */ + fatal_error ("corrupted profile - name of the function" + " (ident = 0x%x lineno_checksum 0x%x " + "ccfg_checksum 0x%x)" + " is NULL.", fn_ident, lineno_checksum, + cfg_checksum); + } + name = xstrdup (nm); if (seen_summary) { /* We have already seen a summary, this means that this @@ -257,6 +288,7 @@ read_counts_file (void) unsigned ix; elt.ident = fn_ident; + elt.name = name; elt.ctr = GCOV_COUNTER_FOR_TAG (tag); slot = (counts_entry_t **) htab_find_slot @@ -265,17 +297,22 @@ read_counts_file (void) if (!entry) { *slot = entry = XCNEW (counts_entry_t); - entry->ident = elt.ident; + entry->ident = fn_ident; + entry->name = name; entry->ctr = elt.ctr; - entry->checksum = checksum; + entry->lineno_checksum = lineno_checksum; + entry->cfg_checksum = cfg_checksum; entry->summary.num = n_counts; entry->counts = XCNEWVEC (gcov_type, n_counts); } - else if (entry->checksum != checksum) + else if (entry->lineno_checksum != lineno_checksum + || entry->cfg_checksum != cfg_checksum) { error ("coverage mismatch for function %u while reading execution counters", fn_ident); - error ("checksum is %x instead of %x", entry->checksum, checksum); + error ("checksum is (%x,%x) instead of (%x,%x)", + entry->lineno_checksum, entry->cfg_checksum, + lineno_checksum, cfg_checksum); htab_delete (counts_hash); break; } @@ -324,10 +361,10 @@ read_counts_file (void) gcov_type * get_coverage_counts (unsigned counter, unsigned expected, + unsigned cfg_checksum, unsigned lineno_checksum, const struct gcov_ctr_summary **summary) { counts_entry_t *entry, elt; - gcov_unsigned_t checksum = -1; /* No hash table, no counts. */ if (!counts_hash) @@ -343,6 +380,8 @@ get_coverage_counts (unsigned counter, u } elt.ident = current_function_funcdef_no + 1; + elt.name = xstrdup_mask_random + (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (current_function_decl))); elt.ctr = counter; entry = (counts_entry_t *) htab_find (counts_hash, &elt); if (!entry) @@ -352,23 +391,22 @@ get_coverage_counts (unsigned counter, u return NULL; } - checksum = compute_checksum (); - if (entry->checksum != checksum + if (entry->cfg_checksum != cfg_checksum || entry->summary.num != expected) { static int warned = 0; bool warning_printed = false; tree id = DECL_ASSEMBLER_NAME (current_function_decl); - warning_printed = - warning_at (input_location, OPT_Wcoverage_mismatch, + warning_printed = + warning_at (input_location, OPT_Wcoverage_mismatch, "coverage mismatch for function " "%qE while reading counter %qs", id, ctr_names[counter]); if (warning_printed) { - if (entry->checksum != checksum) - inform (input_location, "checksum is %x instead of %x", - entry->checksum, checksum); + if (entry->cfg_checksum != cfg_checksum) + inform (input_location, "cfg_checksum is %x instead of %x", + entry->cfg_checksum, cfg_checksum); else inform (input_location, "number of counters is %d instead of %d", entry->summary.num, expected); @@ -388,6 +426,12 @@ get_coverage_counts (unsigned counter, u return NULL; } + else if (entry->lineno_checksum != lineno_checksum) + { + warning (0, "Source location for function %qE have changed," + " the profile data may be out of date", + DECL_ASSEMBLER_NAME (current_function_decl)); + } if (summary) *summary = &entry->summary; @@ -467,79 +511,125 @@ tree_coverage_counter_addr (unsigned cou NULL, NULL)); } -/* Generate a checksum for a string. CHKSUM is the current - checksum. */ - -static unsigned -coverage_checksum_string (unsigned chksum, const char *string) +/* Mask out the random part of the identifier name. + Returns a pointer to the newly allocated string + that contains random part masked out to 0. */ + +static char * +xstrdup_mask_random (const char *string) { - int i; - char *dup = NULL; + char *dup = xstrdup (string); + char *ptr = dup; /* Look for everything that looks if it were produced by get_file_function_name and zero out the second part - that may result from flag_random_seed. This is not critical - as the checksums are used only for sanity checking. */ - for (i = 0; string[i]; i++) - { - int offset = 0; - if (!strncmp (string + i, "_GLOBAL__N_", 11)) - offset = 11; - if (!strncmp (string + i, "_GLOBAL__", 9)) - offset = 9; - - /* C++ namespaces do have scheme: - _GLOBAL__N_<filename>_<wrongmagicnumber>_<magicnumber>functionname - since filename might contain extra underscores there seems - to be no better chance then walk all possible offsets looking - for magicnumber. */ - if (offset) - { - for (i = i + offset; string[i]; i++) - if (string[i]=='_') - { - int y; - - for (y = 1; y < 9; y++) - if (!(string[i + y] >= '0' && string[i + y] <= '9') - && !(string[i + y] >= 'A' && string[i + y] <= 'F')) - break; - if (y != 9 || string[i + 9] != '_') - continue; - for (y = 10; y < 18; y++) - if (!(string[i + y] >= '0' && string[i + y] <= '9') - && !(string[i + y] >= 'A' && string[i + y] <= 'F')) - break; - if (y != 18) - continue; - if (!dup) - string = dup = xstrdup (string); - for (y = 10; y < 18; y++) - dup[i + y] = '0'; - } - break; - } + that may result from flag_random_seed. This is critical + since we use the function name as the key for finding + the profile entry. */ +#define GLOBAL_PREFIX "_GLOBAL__" +#define TRAILING_N "N_" +#define ISCAPXDIGIT(a) (((a) >= '0' && (a) <= '9') || ((a) >= 'A' && (a) <= 'F')) + /* Thanks to anonymous namespace, we can have a function name + with multiple GLOBAL_PREFIX. */ + while ((ptr = strstr (ptr, GLOBAL_PREFIX))) + { + char *temp_ptr; + /* Skip _GLOBAL__. */ + ptr += strlen (GLOBAL_PREFIX); + + /* Skip optional N_ (in case __GLOBAL_N__). */ + if (!strncmp (ptr, TRAILING_N, strlen (TRAILING_N))) + ptr += strlen (TRAILING_N); + /* At this point, ptr should point after "_GLOBAL__N_" or "_GLOBAL__". */ + + while ((temp_ptr = strchr (ptr, '_')) != NULL) + { + int y; + + ptr = temp_ptr; + /* For every "_" in the rest of the string, + try the follwing pattern matching */ + + /* Skip over '_'. */ + ptr++; +#define NDIGITS (8) + /* Try matching the pattern: + <8-digit hex>_<8-digit hex> + The second number is randomly generated + so we want to mask it out before computing the checksum. */ + for (y = 0; *ptr != 0 && y < NDIGITS; y++, ptr++) + if (!ISCAPXDIGIT (*ptr)) + break; + if (y != NDIGITS || *ptr != '_') + continue; + /* Skip over '_' again. */ + ptr++; + for (y = 0; *ptr != 0 && y < NDIGITS; y++, ptr++) + if (!ISCAPXDIGIT (*ptr)) + break; + + if (y == NDIGITS) + { + /* We have a match. + Mask out the second 8-digit number. */ + for(y = -NDIGITS - 1 ; y < 0; y++) + ptr[y] = '0'; + break; + } + } } - chksum = crc32_string (chksum, string); - if (dup) - free (dup); - - return chksum; + return dup; } -/* Compute checksum for the current function. We generate a CRC32. */ -static unsigned -compute_checksum (void) +/* Compute checksum for the current function. We generate a CRC32. + TODO: This checksum can be made more stringent by computing + crc32 over the filename/lineno output in gcno. */ + +unsigned +coverage_compute_lineno_checksum (void) { expanded_location xloc = expand_location (DECL_SOURCE_LOCATION (current_function_decl)); + char *name = xstrdup_mask_random + (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (current_function_decl))); unsigned chksum = xloc.line; - chksum = coverage_checksum_string (chksum, xloc.file); - chksum = coverage_checksum_string - (chksum, IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (current_function_decl))); + chksum = crc32_string (chksum, xloc.file); + chksum = crc32_string (chksum, name); + + free (name); + + return chksum; +} + + +/* Compute cfg checksum for the current function. + The checksum is calculated carefully so that + source code changes that doesn't affect the control flow graph + won't change the checksum. + This is to make the profile data useable across source code change. + The downside of this is that the compiler may use potentially + wrong profile data - that the source code change has non-trivial impact + on the validity of profile data (e.g. the reversed condition) + but the compiler won't detect the change and use the wrong profile data. */ + +unsigned +coverage_compute_cfg_checksum (void) +{ + basic_block bb; + unsigned chksum = n_basic_blocks; + + FOR_EACH_BB (bb) + { + edge e; + edge_iterator ei; + FOR_EACH_EDGE (e, ei, bb->succs) + { + chksum = crc32_byte (chksum, e->dest->index); + } + } return chksum; } @@ -550,7 +640,7 @@ compute_checksum (void) should be output. */ int -coverage_begin_output (void) +coverage_begin_output (unsigned lineno_checksum, unsigned cfg_checksum) { /* We don't need to output .gcno file unless we're under -ftest-coverage (e.g. -fprofile-arcs/generate/use don't need .gcno to work). */ @@ -562,6 +652,7 @@ coverage_begin_output (void) expanded_location xloc = expand_location (DECL_SOURCE_LOCATION (current_function_decl)); unsigned long offset; + char *name; if (!bbg_file_opened) { @@ -576,17 +667,22 @@ coverage_begin_output (void) bbg_file_opened = 1; } + name = xstrdup_mask_random + (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (current_function_decl))); + /* Announce function */ offset = gcov_write_tag (GCOV_TAG_FUNCTION); gcov_write_unsigned (current_function_funcdef_no + 1); - gcov_write_unsigned (compute_checksum ()); - gcov_write_string (IDENTIFIER_POINTER - (DECL_ASSEMBLER_NAME (current_function_decl))); + gcov_write_unsigned (lineno_checksum); + gcov_write_unsigned (cfg_checksum); + gcov_write_string (name); gcov_write_string (xloc.file); gcov_write_unsigned (xloc.line); gcov_write_length (offset); bbg_function_announced = 1; + + free (name); } return !gcov_is_error (); } @@ -595,7 +691,7 @@ coverage_begin_output (void) error has occurred. Save function coverage counts. */ void -coverage_end_function (void) +coverage_end_function (unsigned lineno_checksum, unsigned cfg_checksum) { unsigned i; @@ -608,15 +704,22 @@ coverage_end_function (void) if (fn_ctr_mask) { struct function_list *item; + const char *name; item = XNEW (struct function_list); *functions_tail = item; functions_tail = &item->next; + name = xstrdup_mask_random + (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (current_function_decl))); + item->next = 0; item->ident = current_function_funcdef_no + 1; - item->checksum = compute_checksum (); + item->decl = current_function_decl; + item->name = name; + item->lineno_checksum = lineno_checksum; + item->cfg_checksum = cfg_checksum; for (i = 0; i != GCOV_COUNTERS; i++) { item->n_ctrs[i] = fn_n_ctrs[i]; @@ -641,13 +744,24 @@ build_fn_info_type (unsigned int counter /* ident */ fields = build_decl (BUILTINS_LOCATION, FIELD_DECL, NULL_TREE, get_gcov_unsigned_t ()); - - /* checksum */ + /* lineno_checksum */ field = build_decl (BUILTINS_LOCATION, FIELD_DECL, NULL_TREE, get_gcov_unsigned_t ()); DECL_CHAIN (field) = fields; fields = field; + /* cfg checksum */ + field = build_decl (BUILTINS_LOCATION, + FIELD_DECL, NULL_TREE, get_gcov_unsigned_t ()); + DECL_CHAIN (field) = fields; + fields = field; + + /* name */ + field = build_decl (BUILTINS_LOCATION, + FIELD_DECL, NULL_TREE, get_const_string_type ()); + DECL_CHAIN (field) = fields; + fields = field; + array_type = build_int_cst (NULL_TREE, counters - 1); array_type = build_index_type (array_type); array_type = build_array_type (get_gcov_unsigned_t (), array_type); @@ -681,10 +795,22 @@ build_fn_info_value (const struct functi function->ident)); fields = DECL_CHAIN (fields); - /* checksum */ + /* lineno_checksum */ + CONSTRUCTOR_APPEND_ELT (v1, fields, + build_int_cstu (get_gcov_unsigned_t (), + function->lineno_checksum)); + fields = DECL_CHAIN (fields); + + /* cfg_checksum */ CONSTRUCTOR_APPEND_ELT (v1, fields, build_int_cstu (get_gcov_unsigned_t (), - function->checksum)); + function->cfg_checksum)); + fields = DECL_CHAIN (fields); + + /* name */ + CONSTRUCTOR_APPEND_ELT (v1, fields, + build_string_literal (strlen (function->name) + 1, + function->name)); fields = DECL_CHAIN (fields); /* counters */ Index: coverage.h =================================================================== --- coverage.h (revision 172693) +++ coverage.h (working copy) @@ -28,11 +28,17 @@ extern void coverage_finish (void); /* Complete the coverage information for the current function. Once per function. */ -extern void coverage_end_function (void); +extern void coverage_end_function (unsigned, unsigned); /* Start outputting coverage information for the current function. Repeatable per function. */ -extern int coverage_begin_output (void); +extern int coverage_begin_output (unsigned, unsigned); + +/* Compute the control flow checksum for the current function. */ +extern unsigned coverage_compute_cfg_checksum (void); + +/* Compute the line number checksum for the current function. */ +extern unsigned coverage_compute_lineno_checksum (void); /* Allocate some counters. Repeatable per function. */ extern int coverage_counter_alloc (unsigned /*counter*/, unsigned/*num*/); @@ -44,6 +50,8 @@ extern tree tree_coverage_counter_addr ( /* Get all the counters for the current function. */ extern gcov_type *get_coverage_counts (unsigned /*counter*/, unsigned /*expected*/, + unsigned /*cfg_checksum*/, + unsigned /*lineno_checksum*/, const struct gcov_ctr_summary **); extern tree get_gcov_type (void); Index: libgcov.c =================================================================== --- libgcov.c (revision 172693) +++ libgcov.c (working copy) @@ -372,9 +372,10 @@ gcov_exit (void) /* Check function. */ if (tag != GCOV_TAG_FUNCTION - || length != GCOV_TAG_FUNCTION_LENGTH || gcov_read_unsigned () != fi_ptr->ident - || gcov_read_unsigned () != fi_ptr->checksum) + || gcov_read_unsigned () != fi_ptr->lineno_checksum + || gcov_read_unsigned () != fi_ptr->cfg_checksum + || strcmp (gcov_read_string (), fi_ptr->name)) { read_mismatch:; fprintf (stderr, "profiling:%s:Merge mismatch for %s\n", @@ -515,9 +516,13 @@ gcov_exit (void) ((const char *) gi_ptr->functions + f_ix * fi_stride); /* Announce function. */ - gcov_write_tag_length (GCOV_TAG_FUNCTION, GCOV_TAG_FUNCTION_LENGTH); + gcov_write_tag_length + (GCOV_TAG_FUNCTION, + GCOV_TAG_FUNCTION_LENGTH + gcov_string_length (fi_ptr->name)); gcov_write_unsigned (fi_ptr->ident); - gcov_write_unsigned (fi_ptr->checksum); + gcov_write_unsigned (fi_ptr->lineno_checksum); + gcov_write_unsigned (fi_ptr->cfg_checksum); + gcov_write_string (fi_ptr->name); c_ix = 0; for (t_ix = 0; t_ix < GCOV_COUNTERS; t_ix++) Index: gcov-dump.c =================================================================== --- gcov-dump.c (revision 172693) +++ gcov-dump.c (working copy) @@ -265,21 +265,18 @@ tag_function (const char *filename ATTRI unsigned tag ATTRIBUTE_UNUSED, unsigned length ATTRIBUTE_UNUSED) { unsigned long pos = gcov_position (); + const char *name; printf (" ident=%u", gcov_read_unsigned ()); - printf (", checksum=0x%08x", gcov_read_unsigned ()); + printf (", lineno_checksum=0x%08x", gcov_read_unsigned ()); + printf (", cfg_checksum=0x%08x", gcov_read_unsigned ()); - if (gcov_position () - pos < length) - { - const char *name; - - name = gcov_read_string (); - printf (", `%s'", name ? name : "NULL"); name = gcov_read_string (); printf (" %s", name ? name : "NULL"); + + if (gcov_position () - pos < length) printf (":%u", gcov_read_unsigned ()); } -} static void tag_blocks (const char *filename ATTRIBUTE_UNUSED,