Hello world,
this is what I hope is the final round of the OMP front-end optimization
patch. This one ignores outer workshares when doing function
elimination within omp do and similar blocks.
Regression-tested. OK for trunk?
Thomas
2011-12-02 Thomas Koenig <[email protected]>
PR fortran/50690
* frontend-passes.c (omp_level): New variable.
(omp_size): New variable.
(omp_block): New variable.
(gfc_run_passes): Allocate and deallocate omp_block, set
omp_size.
(cfe_expr_0): Don't eliminiate common function if it would put
the variable immediately into a WORKSHARE construct.
(optimize_namespace): Set omp_level.
(gfc_code_walker): Keep track of OMP PARALLEL and OMP WORKSHARE
constructs.
2011-12-02 Thomas Koenig <[email protected]>
PR fortran/50690
* gfortran.dg/gomp/workshare2.f90: New test.
* gfortran.dg/gomp/workshare3.f90: New test.
Index: frontend-passes.c
===================================================================
--- frontend-passes.c (Revision 181809)
+++ frontend-passes.c (Arbeitskopie)
@@ -66,6 +66,13 @@ static gfc_namespace *current_ns;
static int forall_level;
+/* Keep track of the OMP blocks, so we can mark variables introduced
+ by optimizations as private. */
+
+static int omp_level;
+static int omp_size;
+static gfc_code **omp_block;
+
/* Entry point - run all passes for a namespace. So far, only an
optimization pass is run. */
@@ -76,12 +83,15 @@ gfc_run_passes (gfc_namespace *ns)
{
expr_size = 20;
expr_array = XNEWVEC(gfc_expr **, expr_size);
+ omp_size = 20;
+ omp_block = XCNEWVEC(gfc_code *, omp_size);
optimize_namespace (ns);
if (gfc_option.dump_fortran_optimized)
gfc_dump_parse_tree (ns, stdout);
XDELETEVEC (expr_array);
+ XDELETEVEC (omp_block);
}
}
@@ -367,6 +377,23 @@ cfe_expr_0 (gfc_expr **e, int *walk_subtrees,
int i,j;
gfc_expr *newvar;
+ /* If we are within an OMP WORKSHARE or OMP PARALLEL WORKSHARE
+ construct, don't do this optimization. Only look at the
+ innermost level because an EXEC_OMP_PARALLEL{,_DO,_SECTIONS}
+ nested in an EXEC_OMP_WORKSHARE/EXEC_OMP_PARALLEL_WORKSHARE
+ is OK. */
+ if (omp_level > 0)
+ {
+ gfc_exec_op op;
+ op = omp_block[omp_level - 1]->op;
+
+ if (op == EXEC_OMP_WORKSHARE || op == EXEC_OMP_PARALLEL_WORKSHARE)
+ {
+ *walk_subtrees = 0;
+ return 0;
+ }
+ }
+
expr_count = 0;
gfc_expr_walker (e, cfe_register_funcs, NULL);
@@ -505,6 +532,7 @@ optimize_namespace (gfc_namespace *ns)
current_ns = ns;
forall_level = 0;
+ omp_level = 0;
gfc_code_walker (&ns->code, convert_do_while, dummy_expr_callback, NULL);
gfc_code_walker (&ns->code, cfe_code, cfe_expr_0, NULL);
@@ -1150,11 +1178,13 @@ gfc_code_walker (gfc_code **c, walk_code_fn_t code
gfc_actual_arglist *a;
gfc_code *co;
gfc_association_list *alist;
+ bool in_omp;
/* There might be statement insertions before the current code,
which must not affect the expression walker. */
co = *c;
+ in_omp = false;
switch (co->op)
{
@@ -1330,14 +1360,32 @@ gfc_code_walker (gfc_code **c, walk_code_fn_t code
WALK_SUBEXPR (co->ext.dt->extra_comma);
break;
- case EXEC_OMP_DO:
case EXEC_OMP_PARALLEL:
case EXEC_OMP_PARALLEL_DO:
case EXEC_OMP_PARALLEL_SECTIONS:
case EXEC_OMP_PARALLEL_WORKSHARE:
+ case EXEC_OMP_WORKSHARE:
+
+ /* Register all OMP PARALLEL and WORKSHARE constructs
+ on a stack so they can be handled separately for
+ common function elimination. */
+
+ in_omp = 1;
+
+ if (omp_level >= omp_size)
+ {
+ omp_size += omp_size;
+ omp_block = XRESIZEVEC(gfc_code *, omp_block, omp_size);
+ }
+
+ omp_block[omp_level] = co;
+ omp_level ++;
+
+ /* Fall through. */
+
case EXEC_OMP_SECTIONS:
+ case EXEC_OMP_DO:
case EXEC_OMP_SINGLE:
- case EXEC_OMP_WORKSHARE:
case EXEC_OMP_END_SINGLE:
case EXEC_OMP_TASK:
if (co->ext.omp_clauses)
@@ -1366,6 +1414,9 @@ gfc_code_walker (gfc_code **c, walk_code_fn_t code
if (co->op == EXEC_FORALL)
forall_level --;
+ if (in_omp)
+ omp_level --;
+
}
}
return 0;
! { dg-do compile }
! { dg-options "-ffrontend-optimize -fdump-tree-original" }
! Test that common function elimination is done within the OMP parallel
! blocks even if there is a workshare around it.
program foo
implicit none
integer, parameter :: n = 100000000
real, parameter :: eps = 3e-7
integer :: i,j
real :: A(n), B(5), C(n)
real :: tmp
B(1) = 3.344
tmp = B(1)
do i=1,10
call random_number(a)
c = a
!$omp parallel workshare
!$omp parallel default(shared)
!$omp do
do j=1,n
A(j) = A(j)*cos(B(1))+A(j)*cos(B(1))
end do
!$omp end do
!$omp end parallel
!$omp end parallel workshare
end do
c = c*cos(b(1))+ c*cos(b(1))
do j=1,n
if (abs(a(j)-c(j)) > eps) then
print *,1,j,a(j), c(j)
call abort
end if
end do
end program foo
! { dg-final { scan-tree-dump-times "__builtin_cosf" 2 "original" } }
! { dg-final { cleanup-tree-dump "original" } }
! { dg-do compile }
! { dg-options "-ffrontend-optimize -fdump-tree-original" }
! PR 50690 - this used to ICE because workshare could not handle
! BLOCKs.
! To test for correct execution, run this program (but don't forget
! to unset the stack limit).
program foo
implicit none
integer, parameter :: n = 100000000
real, parameter :: eps = 3e-7
integer :: i,j
real :: A(n), B(5), C(n)
real :: tmp
B(1) = 3.344
tmp = B(1)
do i=1,10
call random_number(a)
c = a
!$omp parallel default(shared)
!$omp workshare
A(:) = A(:)*cos(B(1))+A(:)*cos(B(1))
!$omp end workshare nowait
!$omp end parallel ! sync is implied here
end do
c = c*tmp + c*tmp
do j=1,n
if (abs(a(j)-c(j)) > eps) then
print *,1,j,a(j), c(j)
call abort
end if
end do
do i=1,10
call random_number(a)
c = a
!$omp parallel workshare default(shared)
A(:) = A(:)*cos(B(1))+A(:)*cos(B(1))
!$omp end parallel workshare
end do
c = c*tmp + c*tmp
do j=1,n
if (abs(a(j)-c(j)) > eps) then
print *,2,j,a(j), c(j)
call abort
end if
end do
end program foo
! { dg-final { scan-tree-dump-times "__var" 0 "original" } }
! { dg-final { cleanup-tree-dump "original" } }