[patch, fortran] Fix PR 50690

Thomas Koenig Thu, 08 Dec 2011 11:49:02 -0800

Hello world,

this is what I hope is the final round of the OMP front-end optimization
patch.  This one ignores outer workshares when doing function
elimination within omp do and similar blocks.


Regression-tested.  OK for trunk?

        Thomas


2011-12-02  Thomas Koenig  <[email protected]>

        PR fortran/50690
        * frontend-passes.c (omp_level):  New variable.
        (omp_size):  New variable.
        (omp_block):  New variable.
        (gfc_run_passes):  Allocate and deallocate omp_block, set
        omp_size.
        (cfe_expr_0):  Don't eliminiate common function if it would put
        the variable immediately into a WORKSHARE construct.
        (optimize_namespace):  Set omp_level.
        (gfc_code_walker):  Keep track of OMP PARALLEL and OMP WORKSHARE
        constructs.

2011-12-02  Thomas Koenig  <[email protected]>

        PR fortran/50690
        * gfortran.dg/gomp/workshare2.f90:  New test.
        * gfortran.dg/gomp/workshare3.f90:  New test.

Index: frontend-passes.c
===================================================================
--- frontend-passes.c	(Revision 181809)
+++ frontend-passes.c	(Arbeitskopie)
@@ -66,6 +66,13 @@ static gfc_namespace *current_ns;
 
 static int forall_level;
 
+/* Keep track of the OMP blocks, so we can mark variables introduced
+   by optimizations as private.  */
+
+static int omp_level;
+static int omp_size;
+static gfc_code **omp_block;
+
 /* Entry point - run all passes for a namespace.  So far, only an
    optimization pass is run.  */
 
@@ -76,12 +83,15 @@ gfc_run_passes (gfc_namespace *ns)
     {
       expr_size = 20;
       expr_array = XNEWVEC(gfc_expr **, expr_size);
+      omp_size = 20;
+      omp_block = XCNEWVEC(gfc_code *, omp_size);
 
       optimize_namespace (ns);
       if (gfc_option.dump_fortran_optimized)
 	gfc_dump_parse_tree (ns, stdout);
 
       XDELETEVEC (expr_array);
+      XDELETEVEC (omp_block);
     }
 }
 
@@ -367,6 +377,23 @@ cfe_expr_0 (gfc_expr **e, int *walk_subtrees,
   int i,j;
   gfc_expr *newvar;
 
+  /* If we are within an OMP WORKSHARE or OMP PARALLEL WORKSHARE
+     construct, don't do this optimization.  Only look at the
+     innermost level because an EXEC_OMP_PARALLEL{,_DO,_SECTIONS}
+     nested in an EXEC_OMP_WORKSHARE/EXEC_OMP_PARALLEL_WORKSHARE
+     is OK.  */
+  if (omp_level > 0)
+    {
+      gfc_exec_op op;
+      op = omp_block[omp_level - 1]->op;
+
+      if (op == EXEC_OMP_WORKSHARE || op == EXEC_OMP_PARALLEL_WORKSHARE)
+	{
+	  *walk_subtrees = 0;
+	  return 0;
+	}
+    }
+
   expr_count = 0;
 
   gfc_expr_walker (e, cfe_register_funcs, NULL);
@@ -505,6 +532,7 @@ optimize_namespace (gfc_namespace *ns)
 
   current_ns = ns;
   forall_level = 0;
+  omp_level = 0;
 
   gfc_code_walker (&ns->code, convert_do_while, dummy_expr_callback, NULL);
   gfc_code_walker (&ns->code, cfe_code, cfe_expr_0, NULL);
@@ -1150,11 +1178,13 @@ gfc_code_walker (gfc_code **c, walk_code_fn_t code
 	  gfc_actual_arglist *a;
 	  gfc_code *co;
 	  gfc_association_list *alist;
+	  bool in_omp;
 
 	  /* There might be statement insertions before the current code,
 	     which must not affect the expression walker.  */
 
 	  co = *c;
+	  in_omp = false;
 
 	  switch (co->op)
 	    {
@@ -1330,14 +1360,32 @@ gfc_code_walker (gfc_code **c, walk_code_fn_t code
 	      WALK_SUBEXPR (co->ext.dt->extra_comma);
 	      break;
 
-	    case EXEC_OMP_DO:
 	    case EXEC_OMP_PARALLEL:
 	    case EXEC_OMP_PARALLEL_DO:
 	    case EXEC_OMP_PARALLEL_SECTIONS:
 	    case EXEC_OMP_PARALLEL_WORKSHARE:
+	    case EXEC_OMP_WORKSHARE:
+
+	      /* Register all OMP PARALLEL and WORKSHARE constructs
+		 on a stack so they can be handled separately for
+		 common function elimination.  */
+
+	      in_omp = 1;
+
+	      if (omp_level >= omp_size)
+		{
+		  omp_size += omp_size;
+		  omp_block = XRESIZEVEC(gfc_code *, omp_block, omp_size);
+		}
+
+	      omp_block[omp_level] = co;
+	      omp_level ++;
+
+	      /* Fall through. */
+
 	    case EXEC_OMP_SECTIONS:
+	    case EXEC_OMP_DO:
 	    case EXEC_OMP_SINGLE:
-	    case EXEC_OMP_WORKSHARE:
 	    case EXEC_OMP_END_SINGLE:
 	    case EXEC_OMP_TASK:
 	      if (co->ext.omp_clauses)
@@ -1366,6 +1414,9 @@ gfc_code_walker (gfc_code **c, walk_code_fn_t code
 	  if (co->op == EXEC_FORALL)
 	    forall_level --;
 
+	  if (in_omp)
+	    omp_level --;
+
 	}
     }
   return 0;

! { dg-do compile }
! { dg-options "-ffrontend-optimize -fdump-tree-original" }
! Test that common function elimination is done within the OMP parallel
! blocks even if there is a workshare around it.
program foo
  implicit none
  integer, parameter :: n = 100000000
  real, parameter :: eps = 3e-7
  integer :: i,j
  real :: A(n), B(5), C(n)
  real :: tmp
  B(1) = 3.344
  tmp = B(1)
  do i=1,10
     call random_number(a)
     c = a
     !$omp parallel workshare
     !$omp parallel default(shared)
     !$omp do
     do j=1,n
       A(j) = A(j)*cos(B(1))+A(j)*cos(B(1))
     end do
     !$omp end do
     !$omp end parallel
     !$omp end parallel workshare
  end do

  c = c*cos(b(1))+ c*cos(b(1))

  do j=1,n
     if (abs(a(j)-c(j)) > eps) then
        print *,1,j,a(j), c(j)
        call abort
     end if
  end do

end program foo
! { dg-final { scan-tree-dump-times "__builtin_cosf" 2 "original" } }
! { dg-final { cleanup-tree-dump "original" } }

! { dg-do compile }
! { dg-options "-ffrontend-optimize -fdump-tree-original" }
! PR 50690 - this used to ICE because workshare could not handle
! BLOCKs.
! To test for correct execution, run this program (but don't forget
! to unset the stack limit).
program foo
  implicit none
  integer, parameter :: n = 100000000
  real, parameter :: eps = 3e-7
  integer :: i,j
  real :: A(n), B(5), C(n)
  real :: tmp
  B(1) = 3.344
  tmp = B(1)
  do i=1,10
     call random_number(a)
     c = a
     !$omp parallel default(shared)
     !$omp workshare
     A(:) = A(:)*cos(B(1))+A(:)*cos(B(1))
     !$omp end workshare nowait
     !$omp end parallel ! sync is implied here
  end do

  c = c*tmp + c*tmp

  do j=1,n
     if (abs(a(j)-c(j)) > eps) then
        print *,1,j,a(j), c(j)
        call abort
     end if
  end do

  do i=1,10
     call random_number(a)
     c = a
     !$omp parallel workshare default(shared)
     A(:) = A(:)*cos(B(1))+A(:)*cos(B(1))
     !$omp end parallel workshare
  end do

  c = c*tmp + c*tmp
  do j=1,n
     if (abs(a(j)-c(j)) > eps) then
        print *,2,j,a(j), c(j)
        call abort
     end if
  end do

end program foo
! { dg-final { scan-tree-dump-times "__var" 0 "original" } }
! { dg-final { cleanup-tree-dump "original" } }

[patch, fortran] Fix PR 50690

Reply via email to