Hi,

This patch injects a condition into the instrumented code for edge
counter update. The counter value will not be updated after reaching
value 1.

The feature is under a new parameter --param=coverage-exec_once.
Default is disabled and setting to 1 to enable.

This extra check usually slows the program down. For SPEC 2006
benchmarks (all single thread programs), we usually see around 20%-35%
slow down in -O2 coverage build. This feature, however, is expected to
improve the coverage run speed for multi-threaded programs, because
there virtually no data race and false sharing in updating counters.
The improvement can be significant for highly threaded programs -- we
are seeing 7x speedup in coverage test run for some non-trivial google
applications.

Tested with bootstrap.

Thanks,

-Rong
2013-11-21  Rong Xu  <x...@google.com>

        * gcc/doc/invoke.texi (coverage-exec_once): Add document.
        * gcc/params.def (coverage-exec_once): New param.
        * gcc/profile.h (gcov_type sum_edge_counts): Add extern decl.
        * gcc/profile.c (branch_prob): Add support for coverage-exec_once.
        * gcc/tree-profile.c (gimple_gen_edge_profiler): Ditto.
        (gimple_gen_ior_profiler): Ditto.
        (insert_if_then): Ditto.
        (add_execonce_wrapper): Ditto.
        (is_pred_instrumentation_candidate): Ditto.
        (add_predicate_to_edge_counters): Ditto.
        (cleanup_pred_instrumentation): Ditto.
        (init_pred_instrumentation): Ditto.
        (tree_profiling): Ditto.

Index: gcc/doc/invoke.texi
===================================================================
--- gcc/doc/invoke.texi (revision 205226)
+++ gcc/doc/invoke.texi (working copy)
@@ -9937,6 +9937,10 @@ The default choice depends on the target.
 Set the maximum number of existing candidates that will be considered when
 seeking a basis for a new straight-line strength reduction candidate.
 
+@item coverage-exec_once
+Set to 1 to update each arc counter only once. This avoids false sharing
+and speeds up profile-generate run for multi-threaded programs.
+
 @end table
 @end table
 
Index: gcc/params.def
===================================================================
--- gcc/params.def      (revision 205226)
+++ gcc/params.def      (working copy)
@@ -441,6 +441,14 @@ DEFPARAM(TRACER_MIN_BRANCH_PROBABILITY,
         "Stop forward growth if the probability of best edge is less than this 
threshold (in percent). Used when profile feedback is not available",
         50, 0, 100)
 
+/* This parameter enables fast coverage test. Once the counter flips from 0
+   to 1, we no longer updating the value . This avoids false sharing and speeds
+   up profile-generate run for multi-threaded programs.  */
+DEFPARAM (PARAM_COVERAGE_EXEC_ONCE,
+         "coverage-exec_once",
+         "Stop incrementing edge counts once they become 1.",
+         0, 0, 1)
+
 /* The maximum number of incoming edges to consider for crossjumping.  */
 DEFPARAM(PARAM_MAX_CROSSJUMP_EDGES,
         "max-crossjump-edges",
Index: gcc/profile.c
===================================================================
--- gcc/profile.c       (revision 205226)
+++ gcc/profile.c       (working copy)
@@ -67,6 +67,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "cfgloop.h"
 #include "dumpfile.h"
 #include "cgraph.h"
+#include "params.h"
 
 #include "profile.h"
 
@@ -1327,6 +1328,9 @@ branch_prob (void)
 
       /* Commit changes done by instrumentation.  */
       gsi_commit_edge_inserts ();
+
+      if (PARAM_VALUE (PARAM_COVERAGE_EXEC_ONCE))
+        add_predicate_to_edge_counters ();
     }
 
   free_aux_for_edges ();
Index: gcc/profile.h
===================================================================
--- gcc/profile.h       (revision 205226)
+++ gcc/profile.h       (working copy)
@@ -46,6 +46,9 @@ extern gcov_type sum_edge_counts (vec<edge, va_gc>
 extern void init_node_map (bool);
 extern void del_node_map (void);
 
+/* Insert a predicate to edge counter update stmt.  */
+extern void add_predicate_to_edge_counters (void);
+
 extern void get_working_sets (void);
 
 /* In predict.c.  */
Index: gcc/tree-profile.c
===================================================================
--- gcc/tree-profile.c  (revision 205226)
+++ gcc/tree-profile.c  (working copy)
@@ -52,6 +52,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "target.h"
 #include "tree-cfgcleanup.h"
 #include "tree-nested.h"
+#include "params.h"
 
 static GTY(()) tree gcov_type_node;
 static GTY(()) tree tree_interval_profiler_fn;
@@ -67,6 +68,11 @@ static GTY(()) tree ic_void_ptr_var;
 static GTY(()) tree ic_gcov_type_ptr_var;
 static GTY(()) tree ptr_void;
 
+/* A pointer-set of the last statement in each block of statements that need to
+   be applied a predicate .  */
+static struct pointer_set_t *pred_instrumentation_candidate = NULL;
+
+
 /* Do initialization work for the edge profiler.  */
 
 /* Add code:
@@ -295,6 +301,10 @@ gimple_gen_edge_profiler (int edgeno, edge e)
   stmt2 = gimple_build_assign_with_ops (PLUS_EXPR, gcov_type_tmp_var,
                                        gimple_assign_lhs (stmt1), one);
   stmt3 = gimple_build_assign (unshare_expr (ref), gimple_assign_lhs (stmt2));
+
+  if (PARAM_VALUE (PARAM_COVERAGE_EXEC_ONCE))
+    pointer_set_insert (pred_instrumentation_candidate, stmt3);
+
   gsi_insert_on_edge (e, stmt1);
   gsi_insert_on_edge (e, stmt2);
   gsi_insert_on_edge (e, stmt3);
@@ -554,6 +564,121 @@ gimple_gen_ior_profiler (histogram_value value, un
   gsi_insert_before (&gsi, call, GSI_NEW_STMT);
 }
 
+/* Insert STMT_IF around given sequence of consecutive statements in the
+   same basic block starting with STMT_START, ending with STMT_END.
+   PROB is the probability of the taken branch.  */
+
+static void
+insert_if_then (gimple stmt_start, gimple stmt_end, gimple stmt_if, int prob)
+{
+  gimple_stmt_iterator gsi;
+  basic_block bb_original, bb_before_if, bb_after_if;
+  edge e_if_taken, e_then_join, e_else;
+  int orig_frequency;
+
+  gsi = gsi_for_stmt (stmt_start);
+  gsi_insert_before (&gsi, stmt_if, GSI_SAME_STMT);
+  bb_original = gsi_bb (gsi);
+  e_if_taken = split_block (bb_original, stmt_if);
+  e_if_taken->flags &= ~EDGE_FALLTHRU;
+  e_if_taken->flags |= EDGE_TRUE_VALUE;
+  e_then_join = split_block (e_if_taken->dest, stmt_end);
+  bb_before_if = e_if_taken->src;
+  bb_after_if = e_then_join->dest;
+  e_else = make_edge (bb_before_if, bb_after_if, EDGE_FALSE_VALUE);
+  orig_frequency = bb_original->frequency;
+  e_if_taken->probability = prob;
+  e_else->probability = REG_BR_PROB_BASE - prob;
+  e_if_taken->dest->frequency = orig_frequency * (prob / REG_BR_PROB_BASE);
+}
+
+/* Add a conditional stmt so that counter update will only exec one time.  */
+
+static void
+add_execonce_wrapper (gimple stmt_start, gimple stmt_end)
+{
+  tree zero, tmp1;
+  gimple stmt_if, stmt_assign;
+  gimple_stmt_iterator gsi;
+
+  tmp1 = make_temp_ssa_name (gcov_type_node, NULL, "PROF_temp");
+  stmt_assign = gimple_build_assign (tmp1, unshare_expr (gimple_assign_lhs 
(stmt_end)));
+
+  zero = build_int_cst (get_gcov_type (), 0);
+  stmt_if = gimple_build_cond (EQ_EXPR, tmp1, zero, NULL_TREE, NULL_TREE);
+
+  gsi = gsi_for_stmt (stmt_start);
+  gsi_insert_before (&gsi, stmt_assign, GSI_SAME_STMT);
+
+  /* Insert IF block.  */
+  insert_if_then (stmt_start, stmt_end, stmt_if, 1);
+}
+
+/* Return whether STMT is an instrumentation block.  */
+
+static bool
+is_pred_instrumentation_candidate (gimple stmt)
+{
+  return pointer_set_contains (pred_instrumentation_candidate, stmt);
+}
+
+/* Number of statements inserted for each edge counter increment.  */
+#define EDGE_COUNTER_STMT_COUNT 3
+
+/* Add a predicate wrapper around edge counter code in current function.  */
+
+void
+add_predicate_to_edge_counters (void)
+{
+  gimple_stmt_iterator gsi;
+  basic_block bb;
+
+  if (!PARAM_VALUE (PARAM_COVERAGE_EXEC_ONCE))
+    return;
+
+  FOR_EACH_BB_REVERSE (bb)
+    for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
+      {
+        gimple stmt_end = gsi_stmt (gsi);
+        if (is_pred_instrumentation_candidate (stmt_end))
+          {
+            gimple stmt_beg;
+            int i;
+            int edge_counter_stmt_count = EDGE_COUNTER_STMT_COUNT;
+
+            for (i = 0; i < edge_counter_stmt_count - 1; i++)
+              gsi_prev (&gsi);
+            stmt_beg = gsi_stmt (gsi);
+            gcc_assert (stmt_beg);
+
+            add_execonce_wrapper (stmt_beg, stmt_end);
+
+            /* reset the iterator and continue.  */
+            gsi = gsi_last_bb (bb);
+          }
+      }
+}
+
+static void
+cleanup_pred_instrumentation (void)
+{
+  /* Free the hashmap.  */
+  if (PARAM_VALUE (PARAM_COVERAGE_EXEC_ONCE))
+    {
+      pointer_set_destroy (pred_instrumentation_candidate);
+      pred_instrumentation_candidate = NULL;
+    }
+}
+
+static void
+init_pred_instrumentation (void)
+{
+  if (PARAM_VALUE (PARAM_COVERAGE_EXEC_ONCE)
+      && pred_instrumentation_candidate == 0)
+    pred_instrumentation_candidate = pointer_set_create ();
+
+}
+
 /* Profile all functions in the callgraph.  */
 
 static unsigned int
@@ -567,6 +692,8 @@ tree_profiling (void)
 
   init_node_map (true);
 
+  init_pred_instrumentation ();
+
   FOR_EACH_DEFINED_FUNCTION (node)
     {
       if (!gimple_has_body_p (node->decl))
@@ -656,6 +783,7 @@ tree_profiling (void)
   handle_missing_profiles ();
 
   del_node_map ();
+  cleanup_pred_instrumentation ();
   return 0;
 }
 

Reply via email to