The OpenACC 2.5 spec updated the behavior of acc loops inside acc parallel regions such that loop with seq and auto clauses are not implicitly independent. Back in OpenACC 2.0, all loops inside acc parallel regions were implicitly independent. Oddly enough, if the user just places an acc loop without any clauses, it is still implicitly independent. E.g.
#pragma acc loop implies #pragma acc loop independent which is not equal to #pragma acc loop auto I suppose the auto flag is used to explicitly have the compiler "automatically" detect loop dependencies and partition the loop accordingly. This patch, which I've applied to gomp-4_0-branch makes GCC comply with this new behavior. Cesar
2017-05-03 Cesar Philippidis <ce...@codesourcery.com> gcc/ * omp-low.c (lower_oacc_head_mark): Don't mark OpenACC auto loops as independent inside acc parallel regions. gcc/testsuite/ * c-c++-common/goacc/loop-auto-1.c: Adjust test case to conform to the new behavior of the auto clause in OpenACC 2.5. * c-c++-common/goacc/loop-auto-2.c: Likewise. * gcc.dg/goacc/loop-processing-1.c: Likewise. * c-c++-common/goacc/loop-auto-3.c: New test. * gfortran.dg/goacc/loop-auto-1.f90: New test. libgomp/ * testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: Adjust test case to conform to the new behavior of the auto clause in OpenACC 2.5. diff --git a/gcc/omp-low.c b/gcc/omp-low.c index cf299c12..9e9a363 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -6638,9 +6638,10 @@ lower_oacc_head_mark (location_t loc, tree ddvar, tree clauses, tag |= OLF_GANG_STATIC; } - /* In a parallel region, loops are implicitly INDEPENDENT. */ + /* In a parallel region, loops without auto and seq clauses are + implicitly INDEPENDENT. */ omp_context *tgt = enclosing_target_ctx (ctx); - if (!tgt || is_oacc_parallel (tgt)) + if ((!tgt || is_oacc_parallel (tgt)) && !(tag & (OLF_SEQ | OLF_AUTO))) tag |= OLF_INDEPENDENT; if (tag & OLF_TILE) diff --git a/gcc/testsuite/c-c++-common/goacc/loop-auto-1.c b/gcc/testsuite/c-c++-common/goacc/loop-auto-1.c index 124befc..dcad07f 100644 --- a/gcc/testsuite/c-c++-common/goacc/loop-auto-1.c +++ b/gcc/testsuite/c-c++-common/goacc/loop-auto-1.c @@ -10,7 +10,7 @@ void Foo () #pragma acc loop seq for (int jx = 0; jx < 10; jx++) {} -#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */ for (int jx = 0; jx < 10; jx++) {} } @@ -20,7 +20,7 @@ void Foo () #pragma acc loop auto for (int jx = 0; jx < 10; jx++) {} -#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */ for (int jx = 0; jx < 10; jx++) { #pragma acc loop vector @@ -51,7 +51,7 @@ void Foo () #pragma acc loop vector for (int jx = 0; jx < 10; jx++) { -#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */ for (int kx = 0; kx < 10; kx++) {} } @@ -64,27 +64,27 @@ void Foo () } -#pragma acc loop auto +#pragma acc loop auto independent for (int ix = 0; ix < 10; ix++) { -#pragma acc loop auto +#pragma acc loop auto independent for (int jx = 0; jx < 10; jx++) { -#pragma acc loop auto +#pragma acc loop auto independent for (int kx = 0; kx < 10; kx++) {} } } -#pragma acc loop auto +#pragma acc loop auto independent for (int ix = 0; ix < 10; ix++) { -#pragma acc loop auto +#pragma acc loop auto independent for (int jx = 0; jx < 10; jx++) { -#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */ for (int kx = 0; kx < 10; kx++) { -#pragma acc loop auto +#pragma acc loop auto independent for (int lx = 0; lx < 10; lx++) {} } } @@ -101,7 +101,7 @@ void Gang (void) #pragma acc loop seq for (int jx = 0; jx < 10; jx++) {} -#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */ for (int jx = 0; jx < 10; jx++) {} } @@ -111,7 +111,7 @@ void Gang (void) #pragma acc loop auto for (int jx = 0; jx < 10; jx++) {} -#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */ for (int jx = 0; jx < 10; jx++) { #pragma acc loop vector @@ -142,7 +142,7 @@ void Gang (void) #pragma acc loop vector for (int jx = 0; jx < 10; jx++) { -#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */ for (int kx = 0; kx < 10; kx++) {} } @@ -176,7 +176,7 @@ void Worker (void) #pragma acc loop seq for (int jx = 0; jx < 10; jx++) {} -#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */ for (int jx = 0; jx < 10; jx++) {} } @@ -186,7 +186,7 @@ void Worker (void) #pragma acc loop auto for (int jx = 0; jx < 10; jx++) {} -#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */ for (int jx = 0; jx < 10; jx++) { #pragma acc loop vector @@ -194,20 +194,20 @@ void Worker (void) } } -#pragma acc loop auto +#pragma acc loop for (int ix = 0; ix < 10; ix++) { -#pragma acc loop auto +#pragma acc loop for (int jx = 0; jx < 10; jx++) {} } -#pragma acc loop auto +#pragma acc loop for (int ix = 0; ix < 10; ix++) { -#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop /* { dg-warning "insufficient partitioning" } */ for (int jx = 0; jx < 10; jx++) { -#pragma acc loop auto +#pragma acc loop for (int kx = 0; kx < 10; kx++) {} } } @@ -222,17 +222,17 @@ void Vector (void) #pragma acc loop seq for (int jx = 0; jx < 10; jx++) {} -#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */ for (int jx = 0; jx < 10; jx++) {} } -#pragma acc loop auto +#pragma acc loop auto independent for (int ix = 0; ix < 10; ix++) {} -#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */ for (int ix = 0; ix < 10; ix++) { -#pragma acc loop auto +#pragma acc loop auto independent for (int jx = 0; jx < 10; jx++) {} } } @@ -240,6 +240,6 @@ void Vector (void) #pragma acc routine seq void Seq (void) { -#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */ for (int ix = 0; ix < 10; ix++) {} } diff --git a/gcc/testsuite/c-c++-common/goacc/loop-auto-2.c b/gcc/testsuite/c-c++-common/goacc/loop-auto-2.c index af3f0bd..5aa36e9 100644 --- a/gcc/testsuite/c-c++-common/goacc/loop-auto-2.c +++ b/gcc/testsuite/c-c++-common/goacc/loop-auto-2.c @@ -72,12 +72,12 @@ void Bad () #pragma acc loop tile(*) gang vector for (int ix = 0; ix < 10; ix++) { - #pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ + #pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */ for (int jx = 0; jx < 10; jx++) ; } -#pragma acc loop tile(*) auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop tile(*) auto independent /* { dg-warning "insufficient partitioning" } */ for (int ix = 0; ix < 10; ix++) { #pragma acc loop worker diff --git a/gcc/testsuite/c-c++-common/goacc/loop-auto-3.c b/gcc/testsuite/c-c++-common/goacc/loop-auto-3.c new file mode 100644 index 0000000..42f8759 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/loop-auto-3.c @@ -0,0 +1,81 @@ +/* Ensure that the auto clause falls back to seq parallelism when the + OpenACC loop is not explicitly independent. */ + +/* { dg-compile } */ +/* { dg-additional-options "-fopt-info-note-omp" } */ + +void +test () +{ + int i, j, k, l, n = 100; + +#pragma acc parallel loop auto /* { dg-message "Detected parallelism <acc loop seq>" } */ + for (i = 0; i < n; i++) +#pragma acc loop auto independent /* { dg-message "Detected parallelism <acc loop gang>" } */ + for (j = 0; j < n; j++) +#pragma acc loop worker vector /* { dg-message "Detected parallelism <acc loop worker vector>" } */ + for (k = 0; k < n; k++) + ; + +#pragma acc parallel loop auto independent /* { dg-message "Detected parallelism <acc loop gang worker>" } */ + for (i = 0; i < n; i++) +#pragma acc loop auto /* { dg-message "Detected parallelism <acc loop seq>" } */ + for (j = 0; j < n; j++) +#pragma acc loop auto /* { dg-message "Detected parallelism <acc loop seq>" } */ + for (k = 0; k < n; k++) +#pragma acc loop auto independent /* { dg-message "Detected parallelism <acc loop vector>" } */ + for (l = 0; l < n; l++) + ; + +#pragma acc parallel loop gang /* { dg-message "Detected parallelism <acc loop gang>" } */ + for (i = 0; i < n; i++) +#pragma acc loop worker /* { dg-message "Detected parallelism <acc loop worker>" } */ + for (j = 0; j < n; j++) +#pragma acc loop vector /* { dg-message "Detected parallelism <acc loop vector>" } */ + for (k = 0; k < n; k++) + { +#pragma acc loop auto independent /* { dg-message "Detected parallelism <acc loop seq>" } */ + for (l = 0; l < n; l++) + ; +#pragma acc loop auto /* { dg-message "Detected parallelism <acc loop seq>" } */ + for (l = 0; l < n; l++) + ; + } + +#pragma acc parallel loop /* { dg-message "Detected parallelism <acc loop seq>" } */ + for (i = 0; i < n; i++) + { +#pragma acc loop gang worker /* { dg-message "Detected parallelism <acc loop gang worker>" } */ + for (j = 0; j < n; j++) +#pragma acc loop auto /* { dg-message "Detected parallelism <acc loop seq>" } */ + for (k = 0; k < n; k++) + { +#pragma acc loop vector /* { dg-message "Detected parallelism <acc loop vector>" } */ + for (l = 0; l < n; l++) + ; +#pragma acc loop auto independent /* { dg-message "Detected parallelism <acc loop vector>" } */ + for (l = 0; l < n; l++) + ; + } +#pragma acc loop worker /* { dg-message "Detected parallelism <acc loop worker>" } */ + for (j = 0; j < n; j++) +#pragma acc loop vector /* { dg-message "Detected parallelism <acc loop vector>" } */ + for (k = 0; k < n; k++) + ; + } + +#pragma acc parallel loop /* { dg-message "Detected parallelism <acc loop gang>" } */ + for (i = 0; i < n; i++) +#pragma acc loop /* { dg-message "Detected parallelism <acc loop worker>" } */ + for (j = 0; j < n; j++) +#pragma acc loop /* { dg-message "Detected parallelism <acc loop seq>" } */ + for (k = 0; k < n; k++) +#pragma acc loop /* { dg-message "Detected parallelism <acc loop vector>" } */ + for (l = 0; l < n; l++) + ; +} + +/* { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } 37 } */ +/* { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } 45 } */ +/* { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } 71 } */ + diff --git a/gcc/testsuite/gcc.dg/goacc/loop-processing-1.c b/gcc/testsuite/gcc.dg/goacc/loop-processing-1.c index 85e73b1..805bbb9 100644 --- a/gcc/testsuite/gcc.dg/goacc/loop-processing-1.c +++ b/gcc/testsuite/gcc.dg/goacc/loop-processing-1.c @@ -9,7 +9,7 @@ void vector_1 (int *ary, int size) { #pragma acc loop gang for (int jx = 0; jx < 1; jx++) -#pragma acc loop auto +#pragma acc loop auto independent for (int ix = 0; ix < size; ix++) ary[ix] = place (); } diff --git a/gcc/testsuite/gfortran.dg/goacc/loop-auto-1.f90 b/gcc/testsuite/gfortran.dg/goacc/loop-auto-1.f90 new file mode 100644 index 0000000..354d6fc7 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/loop-auto-1.f90 @@ -0,0 +1,91 @@ +! Ensure that the auto clause falls back to seq parallelism when the +! OpenACC loop is not explicitly independent. + +! { dg-compile } +! { dg-additional-options "-fopt-info-note-omp" } + +program test + implicit none + integer, parameter :: n = 100 + integer i, j, k, l + + !$acc parallel loop auto ! { dg-message "Detected parallelism <acc loop seq>" } + do i = 1, n + !$acc loop auto independent ! { dg-message "Detected parallelism <acc loop gang>" } + do j = 1, n + !$acc loop worker vector ! { dg-message "Detected parallelism <acc loop worker vector>" } + do k = 1, n + end do + end do + end do + + !$acc parallel loop auto independent ! { dg-message "Detected parallelism <acc loop gang worker>" } + do i = 1, n + !$acc loop auto ! { dg-message "Detected parallelism <acc loop seq>" } + do j = 1, n + !$acc loop auto ! { dg-message "Detected parallelism <acc loop seq>" } + do k = 1, n + !$acc loop auto independent ! { dg-message "Detected parallelism <acc loop vector>" } + do l = 1, n + end do + end do + end do + end do + + !$acc parallel loop gang ! { dg-message "Detected parallelism <acc loop gang>" } + do i = 1, n + !$acc loop worker ! { dg-message "Detected parallelism <acc loop worker>" } + do j = 1, n + !$acc loop vector ! { dg-message "Detected parallelism <acc loop vector>" } + do k = 1, n + !$acc loop auto independent ! { dg-message "Detected parallelism <acc loop seq>" } + do l = 1, n + end do + !$acc loop auto ! { dg-message "Detected parallelism <acc loop seq>" } + do l = 1, n + end do + end do + end do + end do + + + !$acc parallel loop ! { dg-message "Detected parallelism <acc loop seq>" } + do i = 1, n + !$acc loop gang worker ! { dg-message "Detected parallelism <acc loop gang worker>" } + do j = 1, n + !$acc loop auto ! { dg-message "Detected parallelism <acc loop seq>" } + do k = 1, n + !$acc loop vector ! { dg-message "Detected parallelism <acc loop vector>" } + do l = 1, n + end do + end do + !$acc loop auto independent ! { dg-message "Detected parallelism <acc loop vector>" } + do l = 1, n + end do + end do + !$acc loop worker ! { dg-message "Detected parallelism <acc loop worker>" } + do j = 1, n + !$acc loop vector ! { dg-message "Detected parallelism <acc loop vector>" } + do k = 1, n + end do + end do + end do + + !$acc parallel loop ! { dg-message "Detected parallelism <acc loop gang>" } + do i = 1, n + !$acc loop ! { dg-message "Detected parallelism <acc loop worker>" } + do j = 1, n + !$acc loop ! { dg-message "Detected parallelism <acc loop seq>" } + do k = 1, n + !$acc loop ! { dg-message "Detected parallelism <acc loop vector>" } + do l = 1, n + end do + end do + end do + end do +end program test + +! { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } 41 } +! { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } 52 } +! { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } 78 } + diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c index 87ac1b1..4c1c091 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c @@ -107,7 +107,7 @@ int vector_1 (int *ary, int size) { #pragma acc loop gang for (int jx = 0; jx < 1; jx++) -#pragma acc loop auto +#pragma acc loop auto independent for (int ix = 0; ix < size; ix++) ary[ix] = place (); } @@ -123,7 +123,7 @@ int vector_2 (int *ary, int size) { #pragma acc loop worker for (int jx = 0; jx < size / 64; jx++) -#pragma acc loop auto +#pragma acc loop auto independent for (int ix = 0; ix < 64; ix++) ary[ix + jx * 64] = place (); } @@ -139,7 +139,7 @@ int worker_1 (int *ary, int size) { #pragma acc loop gang for (int kx = 0; kx < 1; kx++) -#pragma acc loop auto +#pragma acc loop auto independent for (int jx = 0; jx < size / 64; jx++) #pragma acc loop vector for (int ix = 0; ix < 64; ix++) @@ -155,7 +155,7 @@ int gang_1 (int *ary, int size) #pragma acc parallel num_gangs (32) num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)/* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "vector" { target *-*-* } } */ { -#pragma acc loop auto +#pragma acc loop auto independent for (int jx = 0; jx < size / 64; jx++) #pragma acc loop worker for (int ix = 0; ix < 64; ix++) @@ -171,11 +171,11 @@ int gang_2 (int *ary, int size) #pragma acc parallel num_gangs (32) num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size) { -#pragma acc loop auto +#pragma acc loop auto independent for (int kx = 0; kx < size / (32 * 32); kx++) -#pragma acc loop auto +#pragma acc loop auto independent for (int jx = 0; jx < 32; jx++) -#pragma acc loop auto +#pragma acc loop auto independent for (int ix = 0; ix < 32; ix++) ary[ix + jx * 32 + kx * 32 * 32] = place (); } @@ -189,9 +189,9 @@ int gang_3 (int *ary, int size) #pragma acc parallel num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size) { -#pragma acc loop auto +#pragma acc loop auto independent for (int jx = 0; jx < size / 64; jx++) -#pragma acc loop auto +#pragma acc loop auto independent for (int ix = 0; ix < 64; ix++) ary[ix + jx * 64] = place (); } @@ -205,7 +205,7 @@ int gang_4 (int *ary, int size) #pragma acc parallel vector_length(32) copy(ary[0:size]) firstprivate (size) { -#pragma acc loop auto +#pragma acc loop auto independent for (int jx = 0; jx < size; jx++) ary[jx] = place (); }