Hi, This patch injects a condition into the instrumented code for edge counter update. The counter value will not be updated after reaching value 1.
The feature is under a new parameter --param=coverage-exec_once. Default is disabled and setting to 1 to enable. This extra check usually slows the program down. For SPEC 2006 benchmarks (all single thread programs), we usually see around 20%-35% slow down in -O2 coverage build. This feature, however, is expected to improve the coverage run speed for multi-threaded programs, because there virtually no data race and false sharing in updating counters. The improvement can be significant for highly threaded programs -- we are seeing 7x speedup in coverage test run for some non-trivial google applications. Tested with bootstrap. Thanks, -Rong
2013-11-21 Rong Xu <x...@google.com> * gcc/doc/invoke.texi (coverage-exec_once): Add document. * gcc/params.def (coverage-exec_once): New param. * gcc/profile.h (gcov_type sum_edge_counts): Add extern decl. * gcc/profile.c (branch_prob): Add support for coverage-exec_once. * gcc/tree-profile.c (gimple_gen_edge_profiler): Ditto. (gimple_gen_ior_profiler): Ditto. (insert_if_then): Ditto. (add_execonce_wrapper): Ditto. (is_pred_instrumentation_candidate): Ditto. (add_predicate_to_edge_counters): Ditto. (cleanup_pred_instrumentation): Ditto. (init_pred_instrumentation): Ditto. (tree_profiling): Ditto. Index: gcc/doc/invoke.texi =================================================================== --- gcc/doc/invoke.texi (revision 205226) +++ gcc/doc/invoke.texi (working copy) @@ -9937,6 +9937,10 @@ The default choice depends on the target. Set the maximum number of existing candidates that will be considered when seeking a basis for a new straight-line strength reduction candidate. +@item coverage-exec_once +Set to 1 to update each arc counter only once. This avoids false sharing +and speeds up profile-generate run for multi-threaded programs. + @end table @end table Index: gcc/params.def =================================================================== --- gcc/params.def (revision 205226) +++ gcc/params.def (working copy) @@ -441,6 +441,14 @@ DEFPARAM(TRACER_MIN_BRANCH_PROBABILITY, "Stop forward growth if the probability of best edge is less than this threshold (in percent). Used when profile feedback is not available", 50, 0, 100) +/* This parameter enables fast coverage test. Once the counter flips from 0 + to 1, we no longer updating the value . This avoids false sharing and speeds + up profile-generate run for multi-threaded programs. */ +DEFPARAM (PARAM_COVERAGE_EXEC_ONCE, + "coverage-exec_once", + "Stop incrementing edge counts once they become 1.", + 0, 0, 1) + /* The maximum number of incoming edges to consider for crossjumping. */ DEFPARAM(PARAM_MAX_CROSSJUMP_EDGES, "max-crossjump-edges", Index: gcc/profile.c =================================================================== --- gcc/profile.c (revision 205226) +++ gcc/profile.c (working copy) @@ -67,6 +67,7 @@ along with GCC; see the file COPYING3. If not see #include "cfgloop.h" #include "dumpfile.h" #include "cgraph.h" +#include "params.h" #include "profile.h" @@ -1327,6 +1328,9 @@ branch_prob (void) /* Commit changes done by instrumentation. */ gsi_commit_edge_inserts (); + + if (PARAM_VALUE (PARAM_COVERAGE_EXEC_ONCE)) + add_predicate_to_edge_counters (); } free_aux_for_edges (); Index: gcc/profile.h =================================================================== --- gcc/profile.h (revision 205226) +++ gcc/profile.h (working copy) @@ -46,6 +46,9 @@ extern gcov_type sum_edge_counts (vec<edge, va_gc> extern void init_node_map (bool); extern void del_node_map (void); +/* Insert a predicate to edge counter update stmt. */ +extern void add_predicate_to_edge_counters (void); + extern void get_working_sets (void); /* In predict.c. */ Index: gcc/tree-profile.c =================================================================== --- gcc/tree-profile.c (revision 205226) +++ gcc/tree-profile.c (working copy) @@ -52,6 +52,7 @@ along with GCC; see the file COPYING3. If not see #include "target.h" #include "tree-cfgcleanup.h" #include "tree-nested.h" +#include "params.h" static GTY(()) tree gcov_type_node; static GTY(()) tree tree_interval_profiler_fn; @@ -67,6 +68,11 @@ static GTY(()) tree ic_void_ptr_var; static GTY(()) tree ic_gcov_type_ptr_var; static GTY(()) tree ptr_void; +/* A pointer-set of the last statement in each block of statements that need to + be applied a predicate . */ +static struct pointer_set_t *pred_instrumentation_candidate = NULL; + + /* Do initialization work for the edge profiler. */ /* Add code: @@ -295,6 +301,10 @@ gimple_gen_edge_profiler (int edgeno, edge e) stmt2 = gimple_build_assign_with_ops (PLUS_EXPR, gcov_type_tmp_var, gimple_assign_lhs (stmt1), one); stmt3 = gimple_build_assign (unshare_expr (ref), gimple_assign_lhs (stmt2)); + + if (PARAM_VALUE (PARAM_COVERAGE_EXEC_ONCE)) + pointer_set_insert (pred_instrumentation_candidate, stmt3); + gsi_insert_on_edge (e, stmt1); gsi_insert_on_edge (e, stmt2); gsi_insert_on_edge (e, stmt3); @@ -554,6 +564,121 @@ gimple_gen_ior_profiler (histogram_value value, un gsi_insert_before (&gsi, call, GSI_NEW_STMT); } +/* Insert STMT_IF around given sequence of consecutive statements in the + same basic block starting with STMT_START, ending with STMT_END. + PROB is the probability of the taken branch. */ + +static void +insert_if_then (gimple stmt_start, gimple stmt_end, gimple stmt_if, int prob) +{ + gimple_stmt_iterator gsi; + basic_block bb_original, bb_before_if, bb_after_if; + edge e_if_taken, e_then_join, e_else; + int orig_frequency; + + gsi = gsi_for_stmt (stmt_start); + gsi_insert_before (&gsi, stmt_if, GSI_SAME_STMT); + bb_original = gsi_bb (gsi); + e_if_taken = split_block (bb_original, stmt_if); + e_if_taken->flags &= ~EDGE_FALLTHRU; + e_if_taken->flags |= EDGE_TRUE_VALUE; + e_then_join = split_block (e_if_taken->dest, stmt_end); + bb_before_if = e_if_taken->src; + bb_after_if = e_then_join->dest; + e_else = make_edge (bb_before_if, bb_after_if, EDGE_FALSE_VALUE); + orig_frequency = bb_original->frequency; + e_if_taken->probability = prob; + e_else->probability = REG_BR_PROB_BASE - prob; + e_if_taken->dest->frequency = orig_frequency * (prob / REG_BR_PROB_BASE); +} + +/* Add a conditional stmt so that counter update will only exec one time. */ + +static void +add_execonce_wrapper (gimple stmt_start, gimple stmt_end) +{ + tree zero, tmp1; + gimple stmt_if, stmt_assign; + gimple_stmt_iterator gsi; + + tmp1 = make_temp_ssa_name (gcov_type_node, NULL, "PROF_temp"); + stmt_assign = gimple_build_assign (tmp1, unshare_expr (gimple_assign_lhs (stmt_end))); + + zero = build_int_cst (get_gcov_type (), 0); + stmt_if = gimple_build_cond (EQ_EXPR, tmp1, zero, NULL_TREE, NULL_TREE); + + gsi = gsi_for_stmt (stmt_start); + gsi_insert_before (&gsi, stmt_assign, GSI_SAME_STMT); + + /* Insert IF block. */ + insert_if_then (stmt_start, stmt_end, stmt_if, 1); +} + +/* Return whether STMT is an instrumentation block. */ + +static bool +is_pred_instrumentation_candidate (gimple stmt) +{ + return pointer_set_contains (pred_instrumentation_candidate, stmt); +} + +/* Number of statements inserted for each edge counter increment. */ +#define EDGE_COUNTER_STMT_COUNT 3 + +/* Add a predicate wrapper around edge counter code in current function. */ + +void +add_predicate_to_edge_counters (void) +{ + gimple_stmt_iterator gsi; + basic_block bb; + + if (!PARAM_VALUE (PARAM_COVERAGE_EXEC_ONCE)) + return; + + FOR_EACH_BB_REVERSE (bb) + for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi)) + { + gimple stmt_end = gsi_stmt (gsi); + if (is_pred_instrumentation_candidate (stmt_end)) + { + gimple stmt_beg; + int i; + int edge_counter_stmt_count = EDGE_COUNTER_STMT_COUNT; + + for (i = 0; i < edge_counter_stmt_count - 1; i++) + gsi_prev (&gsi); + stmt_beg = gsi_stmt (gsi); + gcc_assert (stmt_beg); + + add_execonce_wrapper (stmt_beg, stmt_end); + + /* reset the iterator and continue. */ + gsi = gsi_last_bb (bb); + } + } +} + +static void +cleanup_pred_instrumentation (void) +{ + /* Free the hashmap. */ + if (PARAM_VALUE (PARAM_COVERAGE_EXEC_ONCE)) + { + pointer_set_destroy (pred_instrumentation_candidate); + pred_instrumentation_candidate = NULL; + } +} + +static void +init_pred_instrumentation (void) +{ + if (PARAM_VALUE (PARAM_COVERAGE_EXEC_ONCE) + && pred_instrumentation_candidate == 0) + pred_instrumentation_candidate = pointer_set_create (); + +} + /* Profile all functions in the callgraph. */ static unsigned int @@ -567,6 +692,8 @@ tree_profiling (void) init_node_map (true); + init_pred_instrumentation (); + FOR_EACH_DEFINED_FUNCTION (node) { if (!gimple_has_body_p (node->decl)) @@ -656,6 +783,7 @@ tree_profiling (void) handle_missing_profiles (); del_node_map (); + cleanup_pred_instrumentation (); return 0; }