Hi,

I saw a bug happened in fdo-gen phase when a gcov counter related ssa
name was used as induction variable and used to calculate loop
boundary after loop cond was replaced by an expr with the ssa name. We
knew that there was data race in gcov counter in multithread program,
so the values in gcov counter may be changed unexpectedly. Normally
compiler optimizations assume global variables have no data race, so
it was compiler's responsiblity to prevent gcov counter related
variables from affecting program's correctness. However, using gcov
counter related ssa tmp as induction variable and doing iv
replacements was a break to this rule.

The following testcase shows the problem in concept.

void *ptr;
int N;

void foo(void *t) {
  int i;
  for (i = 0; i < N; i++) {
    t = *(void **)t;
  }
  ptr = t;
}

The compile command:
gcc-r206603/build/install/bin/gcc -O2 -fprofile-generate=./fdoprof
-fno-tree-loop-im -fdump-tree-ivopts-details-blocks -c 1.c

IR for kernel loop before IVOPT:

;;   basic block 3, loop depth 0, count 0, freq 819, maybe hot
;;    prev block 2, next block 4, flags: (NEW)
;;    pred:       2 [91.0%]  (TRUE_VALUE,EXECUTABLE)
  pretmp_1 = __gcov0.foo[0];
;;    succ:       4 [100.0%]  (FALLTHRU,EXECUTABLE)

;;   basic block 4, loop depth 1, count 0, freq 9100, maybe hot
;;    prev block 3, next block 5, flags: (NEW, REACHABLE)
;;    pred:       5 [100.0%]  (FALLTHRU,EXECUTABLE)
;;                3 [100.0%]  (FALLTHRU,EXECUTABLE)
  # t_25 = PHI <t_6(5), t_3(D)(3)>
  # i_26 = PHI <i_7(5), 0(3)>
  # prephitmp_23 = PHI <PROF_edge_counter_10(5), pretmp_1(3)>
  PROF_edge_counter_10 = prephitmp_23 + 1;
  __gcov0.foo[0] = PROF_edge_counter_10;
  t_6 = MEM[(void * *)t_25];
  i_7 = i_26 + 1;
  if (i_7 < N.0_24)
    goto <bb 5>;
  else
    goto <bb 6>;
;;    succ:       5 [91.0%]  (TRUE_VALUE,EXECUTABLE)
;;                6 [9.0%]  (FALSE_VALUE,EXECUTABLE)

;;   basic block 5, loop depth 1, count 0, freq 8281, maybe hot
;;    prev block 4, next block 6, flags: (NEW)
;;    pred:       4 [91.0%]  (TRUE_VALUE,EXECUTABLE)
  goto <bb 4>;

Induction variable dumps:

In IVOPT dump, I can see that some gcov counter related ssa names are
used as induction variables.
Induction variables:
ssa name PROF_edge_counter_10
  type long int
  base pretmp_1 + 1
  step 1
  is a biv
ssa name prephitmp_23
  type long int
  base pretmp_1
  step 1
  is a biv

After IVOPT:
pretmp_1 is a gcov counter related tmp var, and it is used to
calculate _33 which is the loop boundary. Sometimes register
allocation may replace the use of pretmp_1 with __gcov0.foo[0], so
there may be muliple __gcov0.foo[0] accesses in loop boundary
calculation. But the values of those __gcov0.foo[0] accesses may or
may not be the same because of data race. That caused the bug.

;;   basic block 3, loop depth 0, count 0, freq 819, maybe hot
;;    prev block 2, next block 4, flags: (NEW)
;;    pred:       2 [91.0%]  (TRUE_VALUE,EXECUTABLE)
  pretmp_1 = __gcov0.foo[0];
  _22 = pretmp_1 + 1;
  ivtmp.8_2 = (unsigned long) _22;
  _21 = (unsigned int) N.0_24;
  _29 = _21 + 4294967295;
  _30 = (unsigned long) _29;
  _31 = (unsigned long) pretmp_1;          // may be replaced by
__gcov0.foo[0] in register allocation.
  _32 = _30 + _31;
  _33 = _32 + 2;
;;    succ:       4 [100.0%]  (FALLTHRU,EXECUTABLE)

;;   basic block 4, loop depth 1, count 0, freq 9100, maybe hot
;;    prev block 3, next block 5, flags: (NEW, REACHABLE)
;;    pred:       5 [100.0%]  (FALLTHRU,EXECUTABLE)
;;                3 [100.0%]  (FALLTHRU,EXECUTABLE)
  # t_25 = PHI <t_6(5), t_3(D)(3)>
  # ivtmp.8_9 = PHI <ivtmp.8_5(5), ivtmp.8_2(3)>
  PROF_edge_counter_10 = (long int) ivtmp.8_9;
  __gcov0.foo[0] = PROF_edge_counter_10;
  t_6 = MEM[(void * *)t_25];
  ivtmp.8_5 = ivtmp.8_9 + 1;
  if (ivtmp.8_5 != _33)
    goto <bb 5>;
  else
    goto <bb 6>;
;;    succ:       5 [91.0%]  (TRUE_VALUE,EXECUTABLE)
;;                6 [9.0%]  (FALSE_VALUE,EXECUTABLE)

;;   basic block 5, loop depth 1, count 0, freq 8281, maybe hot
;;    prev block 4, next block 6, flags: (NEW)
;;    pred:       4 [91.0%]  (TRUE_VALUE,EXECUTABLE)
  goto <bb 4>;


The patch is to mark ssa name generated in profile-gen as
PROFILE_GENERATED. In IVOPT, for ssa name marked as PROFILE_GENERATED,
or ssa name defined by PHI with other PROFILE_GENERATED ssa name as
PHI operand, they will not be identified as induction variables.

Testing is going on. Is it ok if tests pass?

2014-02-10  Wei Mi  <w...@google.com>

        * tree-flow-inline.h (make_prof_ssa_name): New.
        (make_temp_prof_ssa_name): Ditto.
        * tree.h (struct tree_base): Add PROFILE_GENERATED flag for ssa name.
        * tree-inline.c (remap_ssa_name): Set PROFILE_GENERATED flag for
        ssa name.
        * tree-profile.c (add_sampling_wrapper): Ditto.
        (add_execonce_wrapper): Ditto.
        (gimple_gen_edge_profiler): Ditto.
        (gimple_gen_ic_profiler): Ditto.
        (gimple_gen_dc_profiler): Ditto.
        * value-prof.c (gimple_divmod_fixed_value): Ditto.
        (gimple_mod_pow2): Ditto.
        (gimple_mod_subtract): Ditto.
        (gimple_ic): Ditto.
        (gimple_stringop_fixed_value): Ditto.
        * tree-ssa-loop-ivopts.c (contains_abnormal_ssa_name_p): Don't use
        PROFILE_GENERATED ssa name as induction variable.
        (find_bivs): Ditto.
        (find_givs_in_stmt_scev): Ditto.

        * testsuite/gcc.dg/tree-prof/ivopt-1.c: New test.

Index: tree-flow-inline.h
===================================================================
--- tree-flow-inline.h  (revision 207019)
+++ tree-flow-inline.h  (working copy)
@@ -1192,6 +1192,17 @@ make_ssa_name (tree var, gimple stmt)
   return make_ssa_name_fn (cfun, var, stmt);
 }

+/* Call make_ssa_name and mark the ssa name as generated by
+   profiling.  */
+
+static inline tree
+make_prof_ssa_name (tree var, gimple stmt)
+{
+  tree t = make_ssa_name (var, stmt);
+  PROFILE_GENERATED (t) = 1;
+  return t;
+}
+
 /* Return an SSA_NAME node using the template SSA name NAME defined in
    statement STMT in function cfun.  */

@@ -1223,6 +1234,17 @@ make_temp_ssa_name (tree type, gimple st
   return ssa_name;
 }

+/* Call make_temp_ssa_name and mark the ssa name as generated by
+   profiling.  */
+
+static inline tree
+make_temp_prof_ssa_name (tree type, gimple stmt, const char *name)
+{
+  tree t = make_temp_ssa_name (type, stmt, name);
+  PROFILE_GENERATED (t) = 1;
+  return t;
+}
+
 /* Returns the base object and a constant BITS_PER_UNIT offset in *POFFSET that
    denotes the starting address of the memory access EXP.
    Returns NULL_TREE if the offset is not constant or any component
Index: tree-inline.c
===================================================================
--- tree-inline.c       (revision 207019)
+++ tree-inline.c       (working copy)
@@ -236,6 +236,7 @@ remap_ssa_name (tree name, copy_body_dat
       insert_decl_map (id, name, new_tree);
       SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_tree)
        = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name);
+      PROFILE_GENERATED (new_tree) = PROFILE_GENERATED (name);
       /* At least IPA points-to info can be directly transferred.  */
       if (id->src_cfun->gimple_df
          && id->src_cfun->gimple_df->ipa_pta
@@ -268,6 +269,7 @@ remap_ssa_name (tree name, copy_body_dat
       insert_decl_map (id, name, new_tree);
       SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_tree)
        = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name);
+      PROFILE_GENERATED (new_tree) = PROFILE_GENERATED (name);
       /* At least IPA points-to info can be directly transferred.  */
       if (id->src_cfun->gimple_df
          && id->src_cfun->gimple_df->ipa_pta
Index: tree-profile.c
===================================================================
--- tree-profile.c      (revision 207019)
+++ tree-profile.c      (working copy)
@@ -250,8 +250,8 @@ add_sampling_wrapper (gimple stmt_start,
   gimple_stmt_iterator gsi;

   tmp_var = create_tmp_reg (get_gcov_unsigned_t (), "PROF_sample");
-  tmp1 = make_ssa_name (tmp_var, NULL);
-  tmp2 = make_ssa_name (tmp_var, NULL);
+  tmp1 = make_prof_ssa_name (tmp_var, NULL);
+  tmp2 = make_prof_ssa_name (tmp_var, NULL);

   /* Create all the new statements needed.  */
   stmt_inc_counter1 = gimple_build_assign (tmp1, gcov_sample_counter_decl);
@@ -261,7 +261,7 @@ add_sampling_wrapper (gimple stmt_start,
   stmt_inc_counter3 = gimple_build_assign (gcov_sample_counter_decl, tmp2);
   zero = build_int_cst (get_gcov_unsigned_t (), 0);
   stmt_reset_counter = gimple_build_assign (gcov_sample_counter_decl, zero);
-  tmp3 = make_ssa_name (tmp_var, NULL);
+  tmp3 = make_prof_ssa_name (tmp_var, NULL);
   stmt_assign_period = gimple_build_assign (tmp3, gcov_sampling_period_decl);
   stmt_if = gimple_build_cond (GE_EXPR, tmp2, tmp3, NULL_TREE, NULL_TREE);

@@ -290,7 +290,7 @@ add_execonce_wrapper (gimple stmt_start,

   /* Create all the new statements needed.  */
   tmp_var = create_tmp_reg (get_gcov_type (), "PROF_temp");
-  tmp1 = make_ssa_name (tmp_var, NULL);
+  tmp1 = make_prof_ssa_name (tmp_var, NULL);
   stmt_assign = gimple_build_assign (tmp1, gimple_assign_lhs (stmt_end));

   zero = build_int_cst (get_gcov_type (), 0);
@@ -757,10 +757,10 @@ gimple_gen_edge_profiler (int edgeno, ed
     }
   else
     {
-      gcov_type_tmp_var = make_temp_ssa_name (gcov_type_node,
+      gcov_type_tmp_var = make_temp_prof_ssa_name (gcov_type_node,
                                          NULL, "PROF_edge_counter");
       stmt1 = gimple_build_assign (gcov_type_tmp_var, ref);
-      gcov_type_tmp_var = make_temp_ssa_name (gcov_type_node,
+      gcov_type_tmp_var = make_temp_prof_ssa_name (gcov_type_node,
                                          NULL, "PROF_edge_counter");
       stmt2 = gimple_build_assign_with_ops (PLUS_EXPR, gcov_type_tmp_var,
                                        gimple_assign_lhs (stmt1), one);
@@ -890,7 +890,7 @@ gimple_gen_ic_profiler (histogram_value
    */

   stmt1 = gimple_build_assign (ic_gcov_type_ptr_var, ref_ptr);
-  tmp1 = make_temp_ssa_name (ptr_void, NULL, "PROF");
+  tmp1 = make_temp_prof_ssa_name (ptr_void, NULL, "PROF");
   stmt2 = gimple_build_assign (tmp1, unshare_expr (value->hvalue.value));
   stmt3 = gimple_build_assign (ic_void_ptr_var, gimple_assign_lhs (stmt2));

@@ -1008,7 +1008,7 @@ gimple_gen_dc_profiler (unsigned base, g
   stmt1 = gimple_build_assign (dc_gcov_type_ptr_var, tmp1);
   tmp2 = create_tmp_var (ptr_void, "PROF_dc");
   stmt2 = gimple_build_assign (tmp2, unshare_expr (callee));
-  tmp3 = make_ssa_name (tmp2, stmt2);
+  tmp3 = make_prof_ssa_name (tmp2, stmt2);
   gimple_assign_set_lhs (stmt2, tmp3);
   stmt3 = gimple_build_assign (dc_void_ptr_var, tmp3);
   gsi_insert_before (&gsi, stmt1, GSI_SAME_STMT);
Index: tree.h
===================================================================
--- tree.h      (revision 207019)
+++ tree.h      (working copy)
@@ -530,6 +530,9 @@ struct GTY(()) tree_base {
        TRANSACTION_EXPR_OUTER in
           TRANSACTION_EXPR

+       PROFILE_GENERATED in
+          SSA_NAME
+
    public_flag:

        TREE_OVERFLOW in
@@ -1151,6 +1154,12 @@ extern void omp_clause_range_check_faile
 /* Used to mark scoped enums.  */
 #define ENUM_IS_SCOPED(NODE) (ENUMERAL_TYPE_CHECK (NODE)->base.static_flag)

+/* Used to mark profile generated ssa name. This is let other
+   optimizations disallow profile generated ssa names to affect
+   original program correctness, because profile counters may
+   have data race in multithread program.  */
+#define PROFILE_GENERATED(NODE) ((NODE)->base.static_flag)
+
 /* Determines whether an ENUMERAL_TYPE has defined the list of constants. */
 #define ENUM_IS_OPAQUE(NODE) (ENUMERAL_TYPE_CHECK (NODE)->base.private_flag)

Index: value-prof.c
===================================================================
--- value-prof.c        (revision 207019)
+++ value-prof.c        (working copy)
@@ -681,8 +681,8 @@ gimple_divmod_fixed_value (gimple stmt,
   bb = gimple_bb (stmt);
   gsi = gsi_for_stmt (stmt);

-  tmp0 = make_temp_ssa_name (optype, NULL, "PROF");
-  tmp1 = make_temp_ssa_name (optype, NULL, "PROF");
+  tmp0 = make_temp_prof_ssa_name (optype, NULL, "PROF");
+  tmp1 = make_temp_prof_ssa_name (optype, NULL, "PROF");
   stmt1 = gimple_build_assign (tmp0, fold_convert (optype, value));
   stmt2 = gimple_build_assign (tmp1, op2);
   stmt3 = gimple_build_cond (NE_EXPR, tmp1, tmp0, NULL_TREE, NULL_TREE);
@@ -836,8 +836,8 @@ gimple_mod_pow2 (gimple stmt, int prob,
   gsi = gsi_for_stmt (stmt);

   result = create_tmp_reg (optype, "PROF");
-  tmp2 = make_temp_ssa_name (optype, NULL, "PROF");
-  tmp3 = make_temp_ssa_name (optype, NULL, "PROF");
+  tmp2 = make_temp_prof_ssa_name (optype, NULL, "PROF");
+  tmp3 = make_temp_prof_ssa_name (optype, NULL, "PROF");
   stmt2 = gimple_build_assign_with_ops (PLUS_EXPR, tmp2, op2,
                                        build_int_cst (optype, -1));
   stmt3 = gimple_build_assign_with_ops (BIT_AND_EXPR, tmp3, tmp2, op2);
@@ -989,7 +989,7 @@ gimple_mod_subtract (gimple stmt, int pr
   gsi = gsi_for_stmt (stmt);

   result = create_tmp_reg (optype, "PROF");
-  tmp1 = make_temp_ssa_name (optype, NULL, "PROF");
+  tmp1 = make_temp_prof_ssa_name (optype, NULL, "PROF");
   stmt1 = gimple_build_assign (result, op1);
   stmt2 = gimple_build_assign (tmp1, op2);
   stmt3 = gimple_build_cond (LT_EXPR, result, tmp1, NULL_TREE, NULL_TREE);
@@ -1365,8 +1365,8 @@ gimple_ic (gimple icall_stmt, struct cgr
   cond_bb = gimple_bb (icall_stmt);
   gsi = gsi_for_stmt (icall_stmt);

-  tmp0 = make_temp_ssa_name (optype, NULL, "PROF");
-  tmp1 = make_temp_ssa_name (optype, NULL, "PROF");
+  tmp0 = make_temp_prof_ssa_name (optype, NULL, "PROF");
+  tmp1 = make_temp_prof_ssa_name (optype, NULL, "PROF");
   tmp = unshare_expr (gimple_call_fn (icall_stmt));
   load_stmt = gimple_build_assign (tmp0, tmp);
   gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
@@ -1829,8 +1829,8 @@ gimple_stringop_fixed_value (gimple vcal
   vcall_size = gimple_call_arg (vcall_stmt, size_arg);
   optype = TREE_TYPE (vcall_size);

-  tmp0 = make_temp_ssa_name (optype, NULL, "PROF");
-  tmp1 = make_temp_ssa_name (optype, NULL, "PROF");
+  tmp0 = make_temp_prof_ssa_name (optype, NULL, "PROF");
+  tmp1 = make_temp_prof_ssa_name (optype, NULL, "PROF");
   tmp_stmt = gimple_build_assign (tmp0, fold_convert (optype, icall_size));
   gsi_insert_before (&gsi, tmp_stmt, GSI_SAME_STMT);

Index: tree-ssa-loop-ivopts.c
===================================================================
--- tree-ssa-loop-ivopts.c      (revision 207019)
+++ tree-ssa-loop-ivopts.c      (working copy)
@@ -721,7 +721,8 @@ contains_abnormal_ssa_name_p (tree expr)
   codeclass = TREE_CODE_CLASS (code);

   if (code == SSA_NAME)
-    return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
+    return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0
+          || PROFILE_GENERATED (expr);

   if (code == INTEGER_CST
       || is_gimple_min_invariant (expr))
@@ -1007,7 +1008,7 @@ static bool
 find_bivs (struct ivopts_data *data)
 {
   gimple phi;
-  tree step, type, base;
+  tree step, type, base, arg;
   bool found = false;
   struct loop *loop = data->current_loop;
   gimple_stmt_iterator psi;
@@ -1029,6 +1030,10 @@ find_bivs (struct ivopts_data *data)
          || contains_abnormal_ssa_name_p (step))
        continue;

+      arg = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
+      if (PROFILE_GENERATED (arg))
+       continue;
+
       type = TREE_TYPE (PHI_RESULT (phi));
       base = fold_convert (type, base);
       if (step)
@@ -1115,6 +1120,12 @@ find_givs_in_stmt_scev (struct ivopts_da
   if (stmt_could_throw_p (stmt))
     return false;

+  /* If lhs is profile generated ssa name, never use it as IV. Because
+     gcov counter may have data-race for multithread program, it could
+     involve tricky bug to use such ssa var in IVOPT.  */
+  if (PROFILE_GENERATED (lhs))
+    return false;
+
   return true;
 }
Index: testsuite/gcc.dg/tree-prof/ivopt-1.c
===================================================================
--- testsuite/gcc.dg/tree-prof/ivopt-1.c        (revision 0)
+++ testsuite/gcc.dg/tree-prof/ivopt-1.c        (revision 0)
@@ -0,0 +1,21 @@
+/* { dg-do compile { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-options "-O2 -fprofile-generate -fno-tree-loop-im
-fdump-tree-ivopts" } */
+
+/* Because gcov counter related var has data race for multithread program,
+ *    compiler should prevent them from affecting program correctness. So
+ *    PROF_edge_counter variable should not be used as induction variable, or
+ *    else IVOPT may use such variable to compute loop boundary.  */
+
+void *ptr;
+int N;
+
+void foo(void *t) {
+  int i;
+  for (i = 0; i < N; i++) {
+    t = *(void **)t;
+  }
+  ptr = t;
+}
+
+/* { dg-final { scan-tree-dump-times "ssa name PROF_edge_counter" 0
"ivopts"} } */
+/* { dg-final { cleanup-tree-dump "ivopts" } } */

Reply via email to