This patch "notices" the index variable of an acc loop (internally an OMP_FOR) inside an OpenACC construct, and completes the implicit firstprivate behavior as described in the spec. The firstprivate clauses and FIXME in libgomp.oacc-c-c++-common/parallel-loop-2.h has also been removed together in the patch.
Also a typo-bug in testcase libgomp.oacc-c-c++-common/reduction-4.c is also corrected, where reduction variable names are apparently wrong. Tested without regressions, and applied to gomp-4_0-branch. Chung-Lin 2015-07-01 Chung-Lin Tang <clt...@codesourcery.com> gcc/ * gimplify.c (gimplify_omp_for): For acc loops inside OpenACC constructs, notice the use of the index variable in the surrounding gimplify_omp_ctx. libgomp/ * testsuite/libgomp.oacc-c-c++-common/reduction-4.c (main): Correct the names of reduction variables in '&&' and '||' tests. * testsuite/libgomp.oacc-c-c++-common/parallel-loop-2.h: Remove uses of the firstprivate clause, remove FIXME comment.
Index: gcc/gimplify.c =================================================================== --- gcc/gimplify.c (revision 225248) +++ gcc/gimplify.c (working copy) @@ -7348,7 +7348,11 @@ gimplify_omp_for (tree *expr_p, gimple_seq *pre_p) else if (omp_is_private (gimplify_omp_ctxp, decl, 0)) omp_notice_variable (gimplify_omp_ctxp, decl, true); else - omp_add_variable (gimplify_omp_ctxp, decl, GOVD_PRIVATE | GOVD_SEEN); + { + if (ork == ORK_OACC && gimplify_omp_ctxp->outer_context) + omp_notice_variable (gimplify_omp_ctxp->outer_context, decl, true); + omp_add_variable (gimplify_omp_ctxp, decl, GOVD_PRIVATE | GOVD_SEEN); + } /* If DECL is not a gimple register, create a temporary variable to act as an iteration counter. This is valid, since DECL cannot be Index: libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-4.c =================================================================== --- libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-4.c (revision 225248) +++ libgomp/testsuite/libgomp.oacc-c-c++-common/reduction-4.c (working copy) @@ -59,14 +59,14 @@ main(void) lvresult = false; /* '&&' reductions. */ -#pragma acc parallel num_gangs (ng) copy (result) +#pragma acc parallel num_gangs (ng) copy (lresult) #pragma acc loop reduction (&&:lresult) gang for (i = 0; i < n; i++) lresult = lresult && (creal(result) > creal(array[i])); /* Verify the reduction. */ for (i = 0; i < n; i++) - lvresult = lresult && (creal(result) > creal(array[i])); + lvresult = lvresult && (creal(result) > creal(array[i])); if (lresult != lvresult) abort (); @@ -78,14 +78,14 @@ main(void) lvresult = false; /* '||' reductions. */ -#pragma acc parallel num_gangs (ng) copy (result) +#pragma acc parallel num_gangs (ng) copy (lresult) #pragma acc loop reduction (||:lresult) gang for (i = 0; i < n; i++) lresult = lresult || (creal(result) > creal(array[i])); /* Verify the reduction. */ for (i = 0; i < n; i++) - lvresult = lresult || (creal(result) > creal(array[i])); + lvresult = lvresult || (creal(result) > creal(array[i])); if (lresult != lvresult) abort (); Index: libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-loop-2.h =================================================================== --- libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-loop-2.h (revision 225248) +++ libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-loop-2.h (working copy) @@ -1,5 +1,3 @@ -/* FIXME: Remove the firstprivate clauses from the paralle regions. */ - #ifndef VARS #define VARS int a[1500]; @@ -19,7 +17,7 @@ __attribute__((noinline, noclone)) void N(f0) (void) { int i; -#pragma acc parallel loop L F firstprivate (i) +#pragma acc parallel loop L F for (i = 0; i < 1500; i++) a[i] += 2; } @@ -36,7 +34,7 @@ __attribute__((noinline, noclone)) void N(f2) (void) { unsigned long long i; -#pragma acc parallel loop L F firstprivate (i) +#pragma acc parallel loop L F for (i = __LONG_LONG_MAX__ + 4500ULL - 27; i > __LONG_LONG_MAX__ - 27ULL; i -= 3) a[(i + 26LL - __LONG_LONG_MAX__) / 3] -= 4; @@ -54,7 +52,7 @@ __attribute__((noinline, noclone)) void N(f4) (void) { unsigned int i; -#pragma acc parallel loop L F firstprivate (i) +#pragma acc parallel loop L F for (i = 30; i < 20; i += 2) a[i] += 10; } @@ -64,7 +62,7 @@ N(f5) (int n11, int n12, int n21, int n22, int n31 int s1, int s2, int s3) { SC int v1, v2, v3; -#pragma acc parallel loop L F firstprivate (v1, v2, v3) +#pragma acc parallel loop L F for (v1 = n11; v1 < n12; v1 += s1) #pragma acc loop S for (v2 = n21; v2 < n22; v2 += s2) @@ -78,7 +76,7 @@ N(f6) (int n11, int n12, int n21, int n22, long lo { SC int v1, v2; SC long long v3; -#pragma acc parallel loop L F firstprivate (v1, v2, v3) +#pragma acc parallel loop L F for (v1 = n11; v1 > n12; v1 += s1) #pragma acc loop S for (v2 = n21; v2 > n22; v2 += s2) @@ -91,7 +89,7 @@ N(f7) (void) { SC unsigned int v1, v3; SC unsigned long long v2; -#pragma acc parallel loop L F firstprivate (v1, v2, v3) +#pragma acc parallel loop L F for (v1 = 0; v1 < 20; v1 += 2) #pragma acc loop S for (v2 = __LONG_LONG_MAX__ + 16ULL; @@ -104,7 +102,7 @@ __attribute__((noinline, noclone)) void N(f8) (void) { SC long long v1, v2, v3; -#pragma acc parallel loop L F firstprivate (v1, v2, v3) +#pragma acc parallel loop L F for (v1 = 0; v1 < 20; v1 += 2) #pragma acc loop S for (v2 = 30; v2 < 20; v2++) @@ -116,7 +114,7 @@ __attribute__((noinline, noclone)) void N(f9) (void) { int i; -#pragma acc parallel loop L F firstprivate (i) +#pragma acc parallel loop L F for (i = 20; i < 10; i++) { a[i] += 2; @@ -129,7 +127,7 @@ __attribute__((noinline, noclone)) void N(f10) (void) { SC int i; -#pragma acc parallel loop L F firstprivate (i) +#pragma acc parallel loop L F for (i = 0; i < 10; i++) #pragma acc loop S for (int j = 10; j < 8; j++) @@ -145,7 +143,7 @@ __attribute__((noinline, noclone)) void N(f11) (int n) { int i; -#pragma acc parallel loop L F firstprivate (i) +#pragma acc parallel loop L F for (i = 20; i < n; i++) { a[i] += 8; @@ -158,7 +156,7 @@ __attribute__((noinline, noclone)) void N(f12) (int n) { SC int i; -#pragma acc parallel loop L F firstprivate (i) +#pragma acc parallel loop L F for (i = 0; i < 10; i++) #pragma acc loop S for (int j = n; j < 8; j++) @@ -174,7 +172,7 @@ __attribute__((noinline, noclone)) void N(f13) (void) { int *i; -#pragma acc parallel loop L F firstprivate (i) +#pragma acc parallel loop L F for (i = a; i < &a[1500]; i++) i[0] += 2; } @@ -183,7 +181,7 @@ __attribute__((noinline, noclone)) void N(f14) (void) { SC float *i; -#pragma acc parallel loop L F firstprivate (i) +#pragma acc parallel loop L F for (i = &b[0][0][0]; i < &b[0][0][10]; i++) #pragma acc loop S for (float *j = &b[0][15][0]; j > &b[0][0][0]; j -= 10)