Hi,

I.

In openmp expansion of loops, we do some effort to try to create matching loops in the loop state of the child function, f.i.in expand_omp_for_generic:
...
      struct loop *outer_loop;
      if (seq_loop)
        outer_loop = l0_bb->loop_father;
      else
        {
          outer_loop = alloc_loop ();
          outer_loop->header = l0_bb;
          outer_loop->latch = l2_bb;
          add_loop (outer_loop, l0_bb->loop_father);
        }

      if (!gimple_omp_for_combined_p (fd->for_stmt))
        {
          struct loop *loop = alloc_loop ();
          loop->header = l1_bb;
          /* The loop may have multiple latches.  */
          add_loop (loop, outer_loop);
        }
...

And if that doesn't work out, we try to mark the loop state for fixup, in expand_omp_taskreg and expand_omp_target:
...
      /* When the OMP expansion process cannot guarantee an up-to-date
         loop tree arrange for the child function to fixup loops.  */
      if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
        child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
...

and expand_omp_for:
...
  else
    /* If there isn't a continue then this is a degerate case where
       the introduction of abnormal edges during lowering will prevent
       original loops from being detected.  Fix that up.  */
    loops_state_set (LOOPS_NEED_FIXUP);
...

However, loops are fixed up anyway, because the first pass we execute with the new child function is pass_fixup_cfg.

The new child function contains a function call to __builtin_omp_get_num_threads, which is marked with ECF_CONST, so execute_fixup_cfg marks the function for TODO_cleanup_cfg, and subsequently the loops with LOOPS_NEED_FIXUP.


II.

This patch adds a verification that at the end of the omp-expand processing of the child function, either the loop structure is ok, or marked for fixup.

This verfication triggered a failure in parloops. When an outer loop is being parallelized, both the outer and inner loop are cancelled. Then during omp-expansion, we create a loop in the loop state for the outer loop (the one that is transformed), but not for the inner, which causes the verification failure:
...
outer-1.c:11:3: error: loop with header 5 not in loop tree
...

[ I ran into this verification failure with an openacc kernels testcase on the gomp-4_0-branch, where parloops is called additionally from a different location, and pass_fixup_cfg is not the first pass that the child function is processed by. ]

The patch contains a bit that makes sure that the loop state of the child function is marked for fixup in parloops. The bit is non-trival since it create a loop state and sets the fixup flag on the loop state, but postpones the init_loops_structure call till move_sese_region_to_fn, where it can succeed.


III.

Bootstrapped and reg-tested on x86_64.

OK for trunk?

Thanks,
- Tom
Mark inner loop for fixup in parloops

2015-07-13  Tom de Vries  <t...@codesourcery.com>

	PR tree-optimization/66846
	* omp-low.c (expand_omp_taskreg) [ENABLE_CHECKING]: If
	!LOOPS_NEED_FIXUP, verify_loop_structure in child_fn.
	(expand_omp_target) [ENABLE_CHECKING]: Same.
	* tree-cfg.c (move_sese_region_to_fn): Only allocate struct loops if
	necessary.  Preserve dest_cfun->x_current_loops->state while calling
	init_loops_structure.
	* tree-parloops.c (gen_parallel_loop): If inner loop is cancelled,
	allocate struct loop and mark child_fn for loop fixup.
---
 gcc/omp-low.c       |  8 ++++++++
 gcc/tree-cfg.c      | 26 +++++++++++++++++++++-----
 gcc/tree-parloops.c | 10 +++++++++-
 3 files changed, 38 insertions(+), 6 deletions(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 0e69bc2..64d9742 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -5603,6 +5603,10 @@ expand_omp_taskreg (struct omp_region *region)
 	}
       if (gimple_in_ssa_p (cfun))
 	update_ssa (TODO_update_ssa);
+#ifdef ENABLE_CHECKING
+      if (!loops_state_satisfies_p (LOOPS_NEED_FIXUP))
+	verify_loop_structure ();
+#endif
       pop_cfun ();
     }
 
@@ -8983,6 +8987,10 @@ expand_omp_target (struct omp_region *region)
 	  if (changed)
 	    cleanup_tree_cfg ();
 	}
+#ifdef ENABLE_CHECKING
+      if (!loops_state_satisfies_p (LOOPS_NEED_FIXUP))
+	verify_loop_structure ();
+#endif
       pop_cfun ();
     }
 
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index d97b824..6b415fe 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -7126,11 +7126,27 @@ move_sese_region_to_fn (struct function *dest_cfun, basic_block entry_bb,
 	}
     }
 
-  /* Initialize an empty loop tree.  */
-  struct loops *loops = ggc_cleared_alloc<struct loops> ();
-  init_loops_structure (dest_cfun, loops, 1);
-  loops->state = LOOPS_MAY_HAVE_MULTIPLE_LATCHES;
-  set_loops_for_fn (dest_cfun, loops);
+  struct loops *loops;
+  int loop_state_flags = 0;
+  if (dest_cfun->x_current_loops == NULL)
+    {
+      /* Initialize an empty loop tree.  */
+      loops = ggc_cleared_alloc<struct loops> ();
+      set_loops_for_fn (dest_cfun, loops);
+    }
+  else
+    {
+      loops = dest_cfun->x_current_loops;
+      loop_state_flags = loops->state;
+    }
+
+  if (loops->tree_root == NULL)
+    {
+      init_loops_structure (dest_cfun, loops, 1);
+      loops->state |= loop_state_flags;
+    }
+
+  loops->state |= LOOPS_MAY_HAVE_MULTIPLE_LATCHES;
 
   /* Move the outlined loop tree part.  */
   num_nodes = bbs.length ();
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index 036677b..23b134f 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -2267,7 +2267,8 @@ gen_parallel_loop (struct loop *loop,
   cond_stmt = last_stmt (loop->header);
   if (cond_stmt)
     loc = gimple_location (cond_stmt);
-  create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
+  tree child_fn = create_loop_fn (loc);
+  create_parallel_loop (loop, child_fn, arg_struct,
 			new_arg_struct, n_threads, loc);
   if (reduction_list->elements () > 0)
     create_call_for_reduction (loop, reduction_list, &clsn_data);
@@ -2276,6 +2277,13 @@ gen_parallel_loop (struct loop *loop,
 
   /* Cancel the loop (it is simpler to do it here rather than to teach the
      expander to do it).  */
+  if (loop->inner)
+    {
+      struct function *child_cfun = DECL_STRUCT_FUNCTION (child_fn);
+      struct loops *loops = ggc_cleared_alloc<struct loops> ();
+      set_loops_for_fn (child_cfun, loops);
+      child_cfun->x_current_loops->state = LOOPS_NEED_FIXUP;
+    }
   cancel_loop_tree (loop);
 
   /* Free loop bound estimations that could contain references to
-- 
1.9.1

Reply via email to