This patch adds some more runtime test coverage for all of the clauses
supported by the kernels, parallel and loop directives. Specifically,
acc loops inside parallel regions were missing coverage for the tile
clause. Kernels loop were missing coverage for gang, worker, vector,
seq, auto, collapse, independent, private, reduction and tile. The
kernels directive lacked coverage for pcopy, pcopyin, pcopyout, pcreate
and deviceptr (but Jim is working on deviceptr).

Both c/c++ and fortran have different levels of coverage, with c and c++
possessing slightly more tests than fortran. I didn't want to go too
crazy with theses tests, as a lot of these clauses are already covered
in the compile-only tests. Most of the new kernels tests are more or
less ports of their c counterparts, with some subtle differences
(replace num_gangs with gang(num), etc.).

I've applied this patch to gomp-4_0-branch. Some of the parallel tests
could probably go into trunk, but that'll wait for some other time.

Cesar


2015-12-04  Cesar Philippidis  <ce...@codesourcery.com>

	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/{kernels-loop-4.c,
	kernels-private-vars-local-worker-1.c,
	kernels-private-vars-local-worker-2.c,
	kernels-private-vars-local-worker-3.c,
	kernels-private-vars-local-worker-4.c,
	kernels-private-vars-local-worker-5.c,
	kernels-private-vars-loop-gang-1.c,
	kernels-private-vars-loop-gang-2.c,
	kernels-private-vars-loop-gang-3.c,
	kernels-private-vars-loop-gang-4.c,
	kernels-private-vars-loop-gang-5.c,
	kernels-private-vars-loop-gang-6.c,
	kernels-private-vars-loop-vector-1.c,
	kernels-private-vars-loop-vector-2.c,
	kernels-private-vars-loop-worker-1.c,
	kernels-private-vars-loop-worker-2.c,
	kernels-private-vars-loop-worker-3.c,
	kernels-private-vars-loop-worker-4.c,
	kernels-private-vars-loop-worker-5.c,
	kernels-private-vars-loop-worker-6.c,
	kernels-private-vars-loop-worker-7.c,
	kernels-reduction-1.c}: New test.
	* testsuite/libgomp.oacc-c-c++-common/parallel-loop-1.c: Remove xfail,
	add tile clause test define PK as parallel.
	* testsuite/libgomp.oacc-c-c++-common/parallel-loop-1.h: Add tile
	clause test.
	* testsuite/libgomp.oacc-c-c++-common/parallel-loop-2.h: Replace
	the parallel construct text with a PK macro.
	* testsuite/libgomp.oacc-fortran/{kernels-collapse-3.f90,
	kernels-collapse-4.f90, kernels-loop-1.f90, kernels-map-1.f90,
	kernels-private-vars-loop-gang-1.f90,
	kernels-private-vars-loop-gang-2.f90,
	kernels-private-vars-loop-gang-3.f90,
	kernels-private-vars-loop-gang-6.f90,
	kernels-private-vars-loop-vector-1.f90,
	kernels-private-vars-loop-vector-2.f90,
	kernels-private-vars-loop-worker-1.f90,
	kernels-private-vars-loop-worker-2.f90,
	kernels-private-vars-loop-worker-3.f90,
	kernels-private-vars-loop-worker-4.f90,
	kernels-private-vars-loop-worker-5.f90,
	kernels-private-vars-loop-worker-6.f90,
	kernels-private-vars-loop-worker-7.f90,
	kernels-reduction-1.f90, parallel-loop-1.f90,
	private-vars-loop-gang-1.f90, private-vars-loop-gang-2.f90,
	private-vars-loop-gang-3.f90, private-vars-loop-gang-6.f90,
	private-vars-loop-vector-1.f90, private-vars-loop-vector-2.f90,
	private-vars-loop-worker-1.f90, private-vars-loop-worker-2.f90,
	private-vars-loop-worker-3.f90, private-vars-loop-worker-4.f90,
	private-vars-loop-worker-5.f90, private-vars-loop-worker-6.f90,
	private-vars-loop-worker-7.f90}: New tests.

diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-4.c
new file mode 100644
index 0000000..314292a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-4.c
@@ -0,0 +1,66 @@
+/* Exercise the auto, independent, seq and tile loop clauses inside
+   kernels regions.  */
+
+/* { dg-do run } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+/* { dg-prune-output "insufficient partitioning available to parallelize loop" } */
+
+#include <assert.h>
+
+#define N 100
+
+void
+check (int *a, int *b)
+{
+  int i;
+
+  for (i = 0; i < N; i++)
+    assert (a[i] == b[i]);
+}
+
+int
+main ()
+{
+  int i, a[N], b[N];
+
+#pragma acc kernels copy(a)
+  {
+#pragma acc loop auto
+    for (i = 0; i < N; i++)
+      a[i] = i;
+  }
+
+  for (i = 0; i < N; i++)
+    b[i] = i;
+
+  check (a, b);
+
+#pragma acc kernels copyout(a)
+  {
+#pragma acc loop independent
+    for (i = 0; i < N; i++)
+      a[i] = i;
+  }
+
+  check (a, b);
+
+#pragma acc kernels present_or_copy(a)
+  {
+#pragma acc loop seq
+    for (i = 0; i < N; i++)
+      a[i] = i;
+  }
+
+  check (a, b);
+
+#pragma acc kernels pcopyout(a) present_or_copyin(b)
+  {
+#pragma acc loop seq
+    for (i = 0; i < N; i++)
+      a[i] = b[i];
+  }
+
+  check (a, b);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-1.c
new file mode 100644
index 0000000..2e920cd
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-1.c
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared in a local scope, broadcasting
+   to vector-partitioned mode.  Back-to-back worker loops.  */
+
+int
+main (int argc, char* argv[])
+{
+  int i, arr[32 * 32 * 32];
+
+  for (i = 0; i < 32 * 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc kernels copy(arr)
+  {
+    int j;
+
+    #pragma acc loop gang(num:32)
+    for (i = 0; i < 32; i++)
+      {
+	#pragma acc loop worker(num:32)
+	for (j = 0; j < 32; j++)
+	  {
+	    int k;
+	    int x = i ^ j * 3;
+
+	    #pragma acc loop vector(length:32)
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += x * k;
+	  }
+
+	#pragma acc loop worker(num:32)
+	for (j = 0; j < 32; j++)
+	  {
+	    int k;
+	    int x = i | j * 5;
+	    
+	    #pragma acc loop vector(length:32)
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += x * k;
+	  }
+      }
+  }
+
+  for (i = 0; i < 32; i++)
+    for (int j = 0; j < 32; j++)
+      for (int k = 0; k < 32; k++)
+        {
+	  int idx = i * 1024 + j * 32 + k;
+          assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+	}
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-2.c
new file mode 100644
index 0000000..72249cc
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-2.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared in a local scope, broadcasting
+   to vector-partitioned mode.  Successive vector loops.  */
+
+int
+main (int argc, char* argv[])
+{
+  int x = 5, i, arr[32 * 32 * 32];
+
+  for (i = 0; i < 32 * 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc kernels copy(arr)
+  {
+    int j;
+
+    #pragma acc loop gang(num:32)
+    for (i = 0; i < 32; i++)
+      {
+        #pragma acc loop worker(num:32)
+	for (j = 0; j < 32; j++)
+	  {
+	    int k;
+	    int x = i ^ j * 3;
+
+	    #pragma acc loop vector(length:32)
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += x * k;
+	    
+	    x = i | j * 5;
+	    
+	    #pragma acc loop vector(length:32)
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += x * k;
+	  }
+      }
+  }
+
+  for (i = 0; i < 32; i++)
+    for (int j = 0; j < 32; j++)
+      for (int k = 0; k < 32; k++)
+        {
+	  int idx = i * 1024 + j * 32 + k;
+          assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+	}
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-3.c
new file mode 100644
index 0000000..1b0a7cc
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-3.c
@@ -0,0 +1,58 @@
+/* { dg-do run } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared in a local scope, broadcasting
+   to vector-partitioned mode.  Aggregate worker variable.  */
+
+typedef struct
+{
+  int x, y;
+} vec2;
+
+int
+main (int argc, char* argv[])
+{
+  int i, arr[32 * 32 * 32];
+
+  for (i = 0; i < 32 * 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc kernels copy(arr)
+  {
+    int j;
+
+    #pragma acc loop gang(num:32)
+    for (i = 0; i < 32; i++)
+      {
+        #pragma acc loop worker(num:32)
+	for (j = 0; j < 32; j++)
+	  {
+	    int k;
+	    vec2 pt;
+	    
+	    pt.x = i ^ j * 3;
+	    pt.y = i | j * 5;
+
+	    #pragma acc loop vector(length:32)
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += pt.x * k;
+	    
+	    #pragma acc loop vector(length:32)
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += pt.y * k;
+	  }
+      }
+  }
+
+  for (i = 0; i < 32; i++)
+    for (int j = 0; j < 32; j++)
+      for (int k = 0; k < 32; k++)
+        {
+	  int idx = i * 1024 + j * 32 + k;
+          assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+	}
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-4.c
new file mode 100644
index 0000000..bbe6b3c
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-4.c
@@ -0,0 +1,61 @@
+/* { dg-do run } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared in a local scope, broadcasting
+   to vector-partitioned mode.  Addressable worker variable.  */
+
+typedef struct
+{
+  int x, y;
+} vec2;
+
+int
+main (int argc, char* argv[])
+{
+  int i, arr[32 * 32 * 32];
+
+  for (i = 0; i < 32 * 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc kernels copy(arr)
+  {
+    int j;
+
+    #pragma acc loop gang(num:32)
+    for (i = 0; i < 32; i++)
+      {
+        #pragma acc loop worker(num:32)
+	for (j = 0; j < 32; j++)
+	  {
+	    int k;
+	    vec2 pt, *ptp;
+	    
+	    ptp = &pt;
+	    
+	    pt.x = i ^ j * 3;
+
+	    #pragma acc loop vector(length:32)
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += ptp->x * k;
+
+	    ptp->y = i | j * 5;
+	    
+	    #pragma acc loop vector(length:32)
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += pt.y * k;
+	  }
+      }
+  }
+
+  for (i = 0; i < 32; i++)
+    for (int j = 0; j < 32; j++)
+      for (int k = 0; k < 32; k++)
+        {
+	  int idx = i * 1024 + j * 32 + k;
+          assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+	}
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-5.c
new file mode 100644
index 0000000..18e5676
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-local-worker-5.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared in a local scope, broadcasting
+   to vector-partitioned mode.  Array worker variable.  */
+
+int
+main (int argc, char* argv[])
+{
+  int i, arr[32 * 32 * 32];
+
+  for (i = 0; i < 32 * 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc kernels copy(arr)
+  {
+    int j;
+
+    #pragma acc loop gang(num:32)
+    for (i = 0; i < 32; i++)
+      {
+        #pragma acc loop worker(num:32)
+	for (j = 0; j < 32; j++)
+	  {
+	    int k;
+	    int pt[2];
+	    
+	    pt[0] = i ^ j * 3;
+
+	    #pragma acc loop vector(length:32)
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += pt[0] * k;
+
+	    pt[1] = i | j * 5;
+	    
+	    #pragma acc loop vector(length:32)
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += pt[1] * k;
+	  }
+      }
+  }
+
+  for (i = 0; i < 32; i++)
+    for (int j = 0; j < 32; j++)
+      for (int k = 0; k < 32; k++)
+        {
+	  int idx = i * 1024 + j * 32 + k;
+          assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+	}
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-1.c
new file mode 100644
index 0000000..e424739
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-1.c
@@ -0,0 +1,30 @@
+/* { dg-do run } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+
+#include <assert.h>
+
+/* Test of gang-private variables declared on loop directive.  */
+
+int
+main (int argc, char* argv[])
+{
+  int x = 5, i, arr[32];
+
+  for (i = 0; i < 32; i++)
+    arr[i] = i;
+
+  #pragma acc kernels copy(arr)
+  {
+    #pragma acc loop gang(num:32) private(x)
+    for (i = 0; i < 32; i++)
+      {
+	x = i * 2;
+	arr[i] += x;
+      }
+  }
+
+  for (i = 0; i < 32; i++)
+    assert (arr[i] == i * 3);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-2.c
new file mode 100644
index 0000000..a12e36e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-2.c
@@ -0,0 +1,34 @@
+/* { dg-do run } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+
+#include <assert.h>
+
+/* Test of gang-private variables declared on loop directive, with broadcasting
+   to partitioned workers.  */
+
+int
+main (int argc, char* argv[])
+{
+  int x = 5, i, arr[32 * 32];
+
+  for (i = 0; i < 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc kernels copy(arr)
+  {
+    #pragma acc loop gang(num:32) private(x)
+    for (i = 0; i < 32; i++)
+      {
+	x = i * 2;
+
+	#pragma acc loop worker(num:32)
+	for (int j = 0; j < 32; j++)
+	  arr[i * 32 + j] += x;
+      }
+  }
+
+  for (i = 0; i < 32 * 32; i++)
+    assert (arr[i] == i + (i / 32) * 2);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-3.c
new file mode 100644
index 0000000..f8ec543
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-3.c
@@ -0,0 +1,34 @@
+/* { dg-do run } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+
+#include <assert.h>
+
+/* Test of gang-private variables declared on loop directive, with broadcasting
+   to partitioned vectors.  */
+
+int
+main (int argc, char* argv[])
+{
+  int x = 5, i, arr[32 * 32];
+
+  for (i = 0; i < 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc kernels copy(arr)
+  {
+    #pragma acc loop gang(num:32) private(x)
+    for (i = 0; i < 32; i++)
+      {
+	x = i * 2;
+
+	#pragma acc loop vector(length:32)
+	for (int j = 0; j < 32; j++)
+	  arr[i * 32 + j] += x;
+      }
+  }
+
+  for (i = 0; i < 32 * 32; i++)
+    assert (arr[i] == i + (i / 32) * 2);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-4.c
new file mode 100644
index 0000000..73561b3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-4.c
@@ -0,0 +1,38 @@
+/* { dg-do run } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+
+#include <assert.h>
+
+/* Test of gang-private addressable variable declared on loop directive, with
+   broadcasting to partitioned workers.  */
+
+int
+main (int argc, char* argv[])
+{
+  int x = 5, i, arr[32 * 32];
+
+  for (i = 0; i < 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc kernels copy(arr)
+  {
+    #pragma acc loop gang(num:32) private(x)
+    for (i = 0; i < 32; i++)
+      {
+        int *p = &x;
+
+	x = i * 2;
+
+	#pragma acc loop worker(num:32)
+	for (int j = 0; j < 32; j++)
+	  arr[i * 32 + j] += x;
+
+	(*p)--;
+      }
+  }
+
+  for (i = 0; i < 32 * 32; i++)
+    assert (arr[i] == i + (i / 32) * 2);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-5.c
new file mode 100644
index 0000000..3334830
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-5.c
@@ -0,0 +1,35 @@
+/* { dg-do run } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+
+#include <assert.h>
+
+/* Test of gang-private array variable declared on loop directive, with
+   broadcasting to partitioned workers.  */
+
+int
+main (int argc, char* argv[])
+{
+  int x[8], i, arr[32 * 32];
+
+  for (i = 0; i < 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc kernels copy(arr)
+  {
+    #pragma acc loop gang(num:32) private(x)
+    for (i = 0; i < 32; i++)
+      {
+        for (int j = 0; j < 8; j++)
+	  x[j] = j * 2;
+
+	#pragma acc loop worker(num:32)
+	for (int j = 0; j < 32; j++)
+	  arr[i * 32 + j] += x[j % 8];
+      }
+  }
+
+  for (i = 0; i < 32 * 32; i++)
+    assert (arr[i] == i + (i % 8) * 2);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-6.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-6.c
new file mode 100644
index 0000000..88ab245
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-gang-6.c
@@ -0,0 +1,40 @@
+#include <assert.h>
+
+/* Test of gang-private aggregate variable declared on loop directive, with
+   broadcasting to partitioned workers.  */
+
+typedef struct {
+  int x, y, z;
+  int attr[13];
+} vec3;
+
+int
+main (int argc, char* argv[])
+{
+  int i, arr[32 * 32];
+  vec3 pt;
+
+  for (i = 0; i < 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc kernels copy(arr)
+  {
+    #pragma acc loop gang private(pt)
+    for (i = 0; i < 32; i++)
+      {
+        pt.x = i;
+	pt.y = i * 2;
+	pt.z = i * 4;
+	pt.attr[5] = i * 6;
+
+	#pragma acc loop worker
+	for (int j = 0; j < 32; j++)
+	  arr[i * 32 + j] += pt.x + pt.y + pt.z + pt.attr[5];
+      }
+  }
+
+  for (i = 0; i < 32 * 32; i++)
+    assert (arr[i] == i + (i / 32) * 13);
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-1.c
new file mode 100644
index 0000000..3f7062d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-1.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+
+#include <assert.h>
+
+/* Test of vector-private variables declared on loop directive.  */
+
+int
+main (int argc, char* argv[])
+{
+  int x, i, arr[32 * 32 * 32];
+
+  for (i = 0; i < 32 * 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc kernels copy(arr)
+  {
+    int j;
+
+    #pragma acc loop gang(num:32)
+    for (i = 0; i < 32; i++)
+      {
+        #pragma acc loop worker(num:32)
+	for (j = 0; j < 32; j++)
+	  {
+	    int k;
+
+	    #pragma acc loop vector(length:32) private(x)
+	    for (k = 0; k < 32; k++)
+	      {
+		x = i ^ j * 3;
+		arr[i * 1024 + j * 32 + k] += x * k;
+	      }
+
+	    #pragma acc loop vector(length:32) private(x)
+	    for (k = 0; k < 32; k++)
+	      {
+		x = i | j * 5;
+		arr[i * 1024 + j * 32 + k] += x * k;
+	      }
+	  }
+      }
+  }
+
+  for (i = 0; i < 32; i++)
+    for (int j = 0; j < 32; j++)
+      for (int k = 0; k < 32; k++)
+        {
+	  int idx = i * 1024 + j * 32 + k;
+          assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+	}
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-2.c
new file mode 100644
index 0000000..dada424
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-vector-2.c
@@ -0,0 +1,49 @@
+/* { dg-do run } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+
+#include <assert.h>
+
+/* Test of vector-private variables declared on loop directive. Array type.  */
+
+int
+main (int argc, char* argv[])
+{
+  int pt[2], i, arr[32 * 32 * 32];
+
+  for (i = 0; i < 32 * 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc kernels copy(arr)
+  {
+    int j;
+
+    #pragma acc loop gang(num:32)
+    for (i = 0; i < 32; i++)
+      {
+        #pragma acc loop worker(num:32)
+	for (j = 0; j < 32; j++)
+	  {
+	    int k;
+
+	    #pragma acc loop vector(length:32) private(pt)
+	    for (k = 0; k < 32; k++)
+	      {
+	        pt[0] = i ^ j * 3;
+		pt[1] = i | j * 5;
+		arr[i * 1024 + j * 32 + k] += pt[0] * k;
+		arr[i * 1024 + j * 32 + k] += pt[1] * k;
+	      }
+	  }
+      }
+  }
+
+  for (i = 0; i < 32; i++)
+    for (int j = 0; j < 32; j++)
+      for (int k = 0; k < 32; k++)
+        {
+	  int idx = i * 1024 + j * 32 + k;
+          assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+	}
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-1.c
new file mode 100644
index 0000000..8d649d1
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-1.c
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared on a loop directive.  */
+
+int
+main (int argc, char* argv[])
+{
+  int x = 5, i, arr[32 * 32];
+
+  for (i = 0; i < 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc kernels copy(arr)
+  {
+    int j;
+
+    #pragma acc loop gang(num:32)
+    for (i = 0; i < 32; i++)
+      {
+        #pragma acc loop worker(num:32) private(x)
+	for (j = 0; j < 32; j++)
+	  {
+	    x = i ^ j * 3;
+	    /* Try to ensure 'x' accesses doesn't get optimized into a
+	       temporary.  */
+	    __asm__ __volatile__ ("");
+	    arr[i * 32 + j] += x;
+	  }
+      }
+  }
+
+  for (i = 0; i < 32 * 32; i++)
+    assert (arr[i] == i + ((i / 32) ^ (i % 32) * 3));
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-2.c
new file mode 100644
index 0000000..a67f90e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-2.c
@@ -0,0 +1,46 @@
+/* { dg-do run } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared on a loop directive, broadcasting
+   to vector-partitioned mode.  */
+
+int
+main (int argc, char* argv[])
+{
+  int x = 5, i, arr[32 * 32 * 32];
+
+  for (i = 0; i < 32 * 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc kernels copy(arr)
+  {
+    int j;
+
+    #pragma acc loop gang(num:32)
+    for (i = 0; i < 32; i++)
+      {
+        #pragma acc loop worker(num:32) private(x)
+	for (j = 0; j < 32; j++)
+	  {
+	    int k;
+	    x = i ^ j * 3;
+
+	    #pragma acc loop vector(length:32)
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += x * k;
+	  }
+      }
+  }
+
+  for (i = 0; i < 32; i++)
+    for (int j = 0; j < 32; j++)
+      for (int k = 0; k < 32; k++)
+        {
+	  int idx = i * 1024 + j * 32 + k;
+          assert (arr[idx] == idx + (i ^ j * 3) * k);
+	}
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-3.c
new file mode 100644
index 0000000..465a800
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-3.c
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared on a loop directive, broadcasting
+   to vector-partitioned mode.  Back-to-back worker loops.  */
+
+int
+main (int argc, char* argv[])
+{
+  int x = 5, i, arr[32 * 32 * 32];
+
+  for (i = 0; i < 32 * 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc kernels copy(arr)
+  {
+    int j;
+
+    #pragma acc loop gang(num:32)
+    for (i = 0; i < 32; i++)
+      {
+        #pragma acc loop worker(num:32) private(x)
+	for (j = 0; j < 32; j++)
+	  {
+	    int k;
+	    x = i ^ j * 3;
+
+	    #pragma acc loop vector(length:32)
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += x * k;
+	  }
+
+	#pragma acc loop worker(num:32) private(x)
+	for (j = 0; j < 32; j++)
+	  {
+	    int k;
+	    x = i | j * 5;
+	    
+	    #pragma acc loop vector(length:32)
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += x * k;
+	  }
+      }
+  }
+
+  for (i = 0; i < 32; i++)
+    for (int j = 0; j < 32; j++)
+      for (int k = 0; k < 32; k++)
+        {
+	  int idx = i * 1024 + j * 32 + k;
+          assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+	}
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-4.c
new file mode 100644
index 0000000..a08ba69
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-4.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared on a loop directive, broadcasting
+   to vector-partitioned mode.  Successive vector loops.  */
+
+int
+main (int argc, char* argv[])
+{
+  int x = 5, i, arr[32 * 32 * 32];
+
+  for (i = 0; i < 32 * 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc kernels copy(arr)
+  {
+    int j;
+
+    #pragma acc loop gang(num:32)
+    for (i = 0; i < 32; i++)
+      {
+        #pragma acc loop worker(num:32) private(x)
+	for (j = 0; j < 32; j++)
+	  {
+	    int k;
+	    x = i ^ j * 3;
+
+	    #pragma acc loop vector(length:32)
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += x * k;
+	    
+	    x = i | j * 5;
+	    
+	    #pragma acc loop vector(length:32)
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += x * k;
+	  }
+      }
+  }
+
+  for (i = 0; i < 32; i++)
+    for (int j = 0; j < 32; j++)
+      for (int k = 0; k < 32; k++)
+        {
+	  int idx = i * 1024 + j * 32 + k;
+          assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+	}
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-5.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-5.c
new file mode 100644
index 0000000..1f76345
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-5.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared on a loop directive, broadcasting
+   to vector-partitioned mode.  Addressable worker variable.  */
+
+int
+main (int argc, char* argv[])
+{
+  int x = 5, i, arr[32 * 32 * 32];
+
+  for (i = 0; i < 32 * 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc kernels copy(arr)
+  {
+    int j;
+
+    #pragma acc loop gang(num:32)
+    for (i = 0; i < 32; i++)
+      {
+        #pragma acc loop worker(num:32) private(x)
+	for (j = 0; j < 32; j++)
+	  {
+	    int k;
+	    int *p = &x;
+	    
+	    x = i ^ j * 3;
+
+	    #pragma acc loop vector(length:32)
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += x * k;
+	    
+	    *p = i | j * 5;
+	    
+	    #pragma acc loop vector(length:32)
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += x * k;
+	  }
+      }
+  }
+
+  for (i = 0; i < 32; i++)
+    for (int j = 0; j < 32; j++)
+      for (int k = 0; k < 32; k++)
+        {
+	  int idx = i * 1024 + j * 32 + k;
+          assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+	}
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-6.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-6.c
new file mode 100644
index 0000000..fe2e23a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-6.c
@@ -0,0 +1,58 @@
+/* { dg-do run } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared on a loop directive, broadcasting
+   to vector-partitioned mode.  Aggregate worker variable.  */
+
+typedef struct
+{
+  int x, y;
+} vec2;
+
+int
+main (int argc, char* argv[])
+{
+  int i, arr[32 * 32 * 32];
+  vec2 pt;
+
+  for (i = 0; i < 32 * 32 * 32; i++)
+    arr[i] = i;
+
+  #pragma acc kernels copy(arr)
+  {
+    int j;
+
+    #pragma acc loop gang(num:32)
+    for (i = 0; i < 32; i++)
+      {
+        #pragma acc loop worker(num:32) private(pt)
+	for (j = 0; j < 32; j++)
+	  {
+	    int k;
+	    
+	    pt.x = i ^ j * 3;
+	    pt.y = i | j * 5;
+
+	    #pragma acc loop vector(length:32)
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += pt.x * k;
+	    
+	    #pragma acc loop vector(length:32)
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += pt.y * k;
+	  }
+      }
+  }
+
+  for (i = 0; i < 32; i++)
+    for (int j = 0; j < 32; j++)
+      for (int k = 0; k < 32; k++)
+        {
+	  int idx = i * 1024 + j * 32 + k;
+          assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+	}
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-7.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-7.c
new file mode 100644
index 0000000..12c17e4
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-private-vars-loop-worker-7.c
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+
+#include <assert.h>
+
+/* Test of worker-private variables declared on loop directive, broadcasting
+   to vector-partitioned mode.  Array worker variable.  */
+
+int
+main (int argc, char* argv[])
+{
+  int i, arr[32 * 32 * 32];
+  int pt[2];
+
+  for (i = 0; i < 32 * 32 * 32; i++)
+    arr[i] = i;
+
+  /* "pt" is treated as "present_or_copy" on the kernels directive because it
+     is an array variable.  */
+  #pragma acc kernels copy(arr)
+  {
+    int j;
+
+    #pragma acc loop gang(num:32)
+    for (i = 0; i < 32; i++)
+      {
+        /* But here, it is made private per-worker.  */
+        #pragma acc loop worker(num:32) private(pt)
+	for (j = 0; j < 32; j++)
+	  {
+	    int k;
+	    
+	    pt[0] = i ^ j * 3;
+
+	    #pragma acc loop vector(length:32)
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += pt[0] * k;
+
+	    pt[1] = i | j * 5;
+	    
+	    #pragma acc loop vector(length:32)
+	    for (k = 0; k < 32; k++)
+	      arr[i * 1024 + j * 32 + k] += pt[1] * k;
+	  }
+      }
+  }
+
+  for (i = 0; i < 32; i++)
+    for (int j = 0; j < 32; j++)
+      for (int k = 0; k < 32; k++)
+        {
+	  int idx = i * 1024 + j * 32 + k;
+          assert (arr[idx] == idx + (i ^ j * 3) * k + (i | j * 5) * k);
+	}
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-reduction-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-reduction-1.c
new file mode 100644
index 0000000..3a2a5b5
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-reduction-1.c
@@ -0,0 +1,27 @@
+/* Verify that a simple, explicit acc loop reduction works inside
+ a kernels region.  */
+
+/* { dg-do run } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
+
+#include <stdlib.h>
+
+#define N 100
+
+int
+main ()
+{
+  int i, red = 0;
+
+#pragma acc kernels
+  {
+#pragma acc loop reduction (+:red)
+  for (i = 0; i < N; i++)
+    red++;
+  }
+
+  if (red != N)
+    abort ();
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-loop-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-loop-1.c
index 23a9b23..62af0f7 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-loop-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-loop-1.c
@@ -1,8 +1,8 @@
 /* { dg-do run } */
-/* { dg-xfail-run-if "cuStreamSynchronize error: unknown result code:   716" { openacc_nvidia_accel_selected } } */
 
 #include <stdlib.h>
 
+#define PK parallel
 #define M(x, y, z) O(x, y, z)
 #define O(x, y, z) x ## _ ## y ## _ ## z
 
@@ -29,10 +29,12 @@ main ()
       || test_none_auto ()
       || test_none_independent ()
       || test_none_seq ()
+      || test_none_tile ()
       || test_gangs_none ()
       || test_gangs_auto ()
       || test_gangs_independent ()
-      || test_gangs_seq ())
+      || test_gangs_seq ()
+      || test_gangs_tile ())
     abort ();
   return 0;
 }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-loop-1.h b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-loop-1.h
index fd83dd4..971f9c9 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-loop-1.h
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-loop-1.h
@@ -18,3 +18,8 @@
 #include "parallel-loop-2.h"
 #undef S
 #undef N
+#define S tile(*)
+#define N(x) M(x, G, tile)
+#include "parallel-loop-2.h"
+#undef S
+#undef N
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-loop-2.h b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-loop-2.h
index c2c9df1..5691b7e 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-loop-2.h
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-loop-2.h
@@ -17,7 +17,7 @@ __attribute__((noinline, noclone)) void
 N(f0) (void)
 {
   int i;
-#pragma acc parallel loop L F
+#pragma acc PK loop L F
   for (i = 0; i < 1500; i++)
     a[i] += 2;
 }
@@ -25,7 +25,7 @@ N(f0) (void)
 __attribute__((noinline, noclone)) void
 N(f1) (void)
 {
-#pragma acc parallel loop L F
+#pragma acc PK loop L F
   for (unsigned int i = __INT_MAX__; i < 3000U + __INT_MAX__; i += 2)
     a[(i - __INT_MAX__) >> 1] -= 2;
 }
@@ -34,7 +34,7 @@ __attribute__((noinline, noclone)) void
 N(f2) (void)
 {
   unsigned long long i;
-#pragma acc parallel loop L F
+#pragma acc PK loop L F
   for (i = __LONG_LONG_MAX__ + 4500ULL - 27;
        i > __LONG_LONG_MAX__ - 27ULL; i -= 3)
     a[(i + 26LL - __LONG_LONG_MAX__) / 3] -= 4;
@@ -43,7 +43,7 @@ N(f2) (void)
 __attribute__((noinline, noclone)) void
 N(f3) (long long n1, long long n2, long long s3)
 {
-#pragma acc parallel loop L F
+#pragma acc PK loop L F
   for (long long i = n1 + 23; i > n2 - 25; i -= s3)
     a[i + 48] += 7;
 }
@@ -52,7 +52,7 @@ __attribute__((noinline, noclone)) void
 N(f4) (void)
 {
   unsigned int i;
-#pragma acc parallel loop L F
+#pragma acc PK loop L F
   for (i = 30; i < 20; i += 2)
     a[i] += 10;
 }
@@ -62,7 +62,7 @@ N(f5) (int n11, int n12, int n21, int n22, int n31, int n32,
        int s1, int s2, int s3)
 {
   SC int v1, v2, v3;
-#pragma acc parallel loop L F
+#pragma acc PK loop L F
   for (v1 = n11; v1 < n12; v1 += s1)
 #pragma acc loop S
     for (v2 = n21; v2 < n22; v2 += s2)
@@ -76,7 +76,7 @@ N(f6) (int n11, int n12, int n21, int n22, long long n31, long long n32,
 {
   SC int v1, v2;
   SC long long v3;
-#pragma acc parallel loop L F
+#pragma acc PK loop L F
   for (v1 = n11; v1 > n12; v1 += s1)
 #pragma acc loop S
     for (v2 = n21; v2 > n22; v2 += s2)
@@ -89,7 +89,7 @@ N(f7) (void)
 {
   SC unsigned int v1, v3;
   SC unsigned long long v2;
-#pragma acc parallel loop L F
+#pragma acc PK loop L F
   for (v1 = 0; v1 < 20; v1 += 2)
 #pragma acc loop S
     for (v2 = __LONG_LONG_MAX__ + 16ULL;
@@ -102,7 +102,7 @@ __attribute__((noinline, noclone)) void
 N(f8) (void)
 {
   SC long long v1, v2, v3;
-#pragma acc parallel loop L F
+#pragma acc PK loop L F
   for (v1 = 0; v1 < 20; v1 += 2)
 #pragma acc loop S
     for (v2 = 30; v2 < 20; v2++)
@@ -114,7 +114,7 @@ __attribute__((noinline, noclone)) void
 N(f9) (void)
 {
   int i;
-#pragma acc parallel loop L F
+#pragma acc PK loop L F
   for (i = 20; i < 10; i++)
     {
       a[i] += 2;
@@ -127,7 +127,7 @@ __attribute__((noinline, noclone)) void
 N(f10) (void)
 {
   SC int i;
-#pragma acc parallel loop L F
+#pragma acc PK loop L F
   for (i = 0; i < 10; i++)
 #pragma acc loop S
     for (int j = 10; j < 8; j++)
@@ -143,7 +143,7 @@ __attribute__((noinline, noclone)) void
 N(f11) (int n)
 {
   int i;
-#pragma acc parallel loop L F
+#pragma acc PK loop L F
   for (i = 20; i < n; i++)
     {
       a[i] += 8;
@@ -156,7 +156,7 @@ __attribute__((noinline, noclone)) void
 N(f12) (int n)
 {
   SC int i;
-#pragma acc parallel loop L F
+#pragma acc PK loop L F
   for (i = 0; i < 10; i++)
 #pragma acc loop S
     for (int j = n; j < 8; j++)
@@ -172,7 +172,7 @@ __attribute__((noinline, noclone)) void
 N(f13) (void)
 {
   int *i;
-#pragma acc parallel loop L F
+#pragma acc PK loop L F
   for (i = a; i < &a[1500]; i++)
     i[0] += 2;
 }
@@ -181,7 +181,7 @@ __attribute__((noinline, noclone)) void
 N(f14) (void)
 {
   SC float *i;
-#pragma acc parallel loop L F
+#pragma acc PK loop L F
   for (i = &b[0][0][0]; i < &b[0][0][10]; i++)
 #pragma acc loop S
     for (float *j = &b[0][15][0]; j > &b[0][0][0]; j -= 10)
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-collapse-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-collapse-3.f90
new file mode 100644
index 0000000..9378b12
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-collapse-3.f90
@@ -0,0 +1,31 @@
+! Test the collapse clause inside a kernels region.
+
+! { dg-do run }
+! { dg-additional-options "-ftree-parallelize-loops=32" }
+
+program collapse3
+  integer :: a(3,3,3), k, kk, kkk, l, ll, lll
+  !$acc kernels
+  !$acc loop collapse(3)
+    do 115 k=1,3
+dokk: do kk=1,3
+        do kkk=1,3
+          a(k,kk,kkk) = 1
+        enddo
+      enddo dokk
+115   continue
+  !$acc end kernels
+  if (any(a(1:3,1:3,1:3).ne.1)) call abort
+
+  !$acc kernels
+  !$acc loop collapse(3)
+dol: do 120 l=1,3
+doll: do ll=1,3
+        do lll=1,3
+          a(l,ll,lll) = 2
+        enddo
+      enddo doll
+120 end do dol
+  !$acc end kernels
+  if (any(a(1:3,1:3,1:3).ne.2)) call abort
+end program collapse3
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-collapse-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-collapse-4.f90
new file mode 100644
index 0000000..dfd9cd2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-collapse-4.f90
@@ -0,0 +1,42 @@
+! Test the collapse and reduction loop clauses inside a kernels region.
+
+! { dg-do run }
+! { dg-additional-options "-ftree-parallelize-loops=32" }
+
+program collapse4
+  integer :: i, j, k, a(1:7, -3:5, 12:19), b(1:7, -3:5, 12:19)
+  logical :: l, r
+  l = .false.
+  r = .false.
+  a(:, :, :) = 0
+  b(:, :, :) = 0
+  !$acc kernels
+  !$acc loop collapse (3) reduction (.or.:l)
+    do i = 2, 6
+      do j = -2, 4
+        do k = 13, 18
+          l = l.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+          l = l.or.k.lt.13.or.k.gt.18
+          if (.not.l) a(i, j, k) = a(i, j, k) + 1
+        end do
+      end do
+    end do
+  !$acc end kernels
+  do i = 2, 6
+    do j = -2, 4
+      do k = 13, 18
+        r = r.or.i.lt.2.or.i.gt.6.or.j.lt.-2.or.j.gt.4
+        r = r.or.k.lt.13.or.k.gt.18
+        if (.not.l) b(i, j, k) = b(i, j, k) + 1
+      end do
+    end do
+  end do
+  if (l .neqv. r) call abort
+  do i = 2, 6
+    do j = -2, 4
+      do k = 13, 18
+         if (a(i, j, k) .ne. b(i, j, k)) call abort
+      end do
+    end do
+  end do
+end program collapse4
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-1.f90
new file mode 100644
index 0000000..c30b3a3
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-1.f90
@@ -0,0 +1,78 @@
+! Exercise the auto, independent, seq and tile loop clauses inside
+! kernels regions. 
+
+! { dg-do run }
+! { dg-additional-options "-ftree-parallelize-loops=32" }
+
+program loops
+  integer, parameter     :: n = 20
+  integer                :: i, a(n), b(n)
+
+  a(:) = 0
+  b(:) = 0
+
+  ! COPY
+
+  !$acc kernels copy (a)
+  !$acc loop auto
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end kernels
+
+  do i = 1, n
+     b(i) = i
+  end do
+
+  call check (a, b, n)
+
+  ! COPYOUT
+
+  a(:) = 0
+
+  !$acc kernels copyout (a)
+  !$acc loop independent
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end kernels
+
+  do i = 1, n
+     if (a(i) .ne. b(i)) call abort
+  end do
+  call check (a, b, n)
+
+  ! COPYIN
+
+  a(:) = 0
+
+  !$acc kernels copyout (a) copyin (b)
+  !$acc loop seq
+  do i = 1, n
+     a(i) = b(i)
+  end do
+  !$acc end kernels
+
+  call check (a, b, n)
+
+  ! PRESENT_OR_COPY
+
+  !$acc kernels pcopy (a)
+  !$acc loop tile (*)
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end kernels
+
+  call check (a, b, n)
+
+end program loops
+
+subroutine check (a, b, n)
+  integer :: n, a(n), b(n)
+  integer :: i
+
+  do i = 1, n
+     if (a(i) .ne. b(i)) call abort
+  end do
+end subroutine check
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-map-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-map-1.f90
new file mode 100644
index 0000000..df6e62d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-map-1.f90
@@ -0,0 +1,100 @@
+! Test the copy, copyin, copyout, pcopy, pcopyin, and pcopyout
+! clauses on kernels constructs.
+
+program map
+  integer, parameter     :: n = 20, c = 10
+  integer                :: i, a(n), b(n)
+
+  a(:) = 0
+  b(:) = 0
+
+  ! COPY
+
+  !$acc kernels copy (a)
+  !$acc loop
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end kernels
+
+  do i = 1, n
+     b(i) = i
+  end do
+
+  call check (a, b, n)
+
+  ! COPYOUT
+
+  a(:) = 0
+
+  !$acc kernels copyout (a)
+  !$acc loop
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end kernels
+
+  do i = 1, n
+     if (a(i) .ne. b(i)) call abort
+  end do
+  call check (a, b, n)
+
+  ! COPYIN
+
+  a(:) = 0
+
+  !$acc kernels copyout (a) copyin (b)
+  !$acc loop
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end kernels
+
+  call check (a, b, n)
+
+  ! PRESENT_OR_COPY
+
+  !$acc kernels pcopy (a)
+  !$acc loop
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end kernels
+
+  call check (a, b, n)
+
+  ! PRESENT_OR_COPYOUT
+
+  a(:) = 0
+
+  !$acc kernels pcopyout (a)
+  !$acc loop
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end kernels
+
+  call check (a, b, n)
+
+  ! PRESENT_OR_COPYIN
+
+  a(:) = 0
+
+  !$acc kernels pcopyout (a) pcopyin (b)
+  !$acc loop
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end kernels
+
+  call check (a, b, n)
+end program map
+
+subroutine check (a, b, n)
+  integer :: n, a(n), b(n)
+  integer :: i
+
+  do i = 1, n
+     if (a(i) .ne. b(i)) call abort
+  end do
+end subroutine check
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-1.f90
new file mode 100644
index 0000000..3a70ade
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-1.f90
@@ -0,0 +1,24 @@
+! Test of gang-private variables declared on loop directive.
+
+! { dg-do run }
+! { dg-additional-options "-ftree-parallelize-loops=32" }
+
+program main
+  integer :: x, i, arr(32)
+
+  do i = 1, 32
+     arr(i) = i
+  end do
+
+  !$acc kernels copy(arr)
+  !$acc loop gang(num:32) private(x)
+  do i = 1, 32
+     x = i * 2;
+     arr(i) = arr(i) + x;
+  end do
+  !$acc end kernels
+
+  do i = 1, 32
+     if (arr(i) .ne. i * 3) call abort
+  end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-2.f90
new file mode 100644
index 0000000..43a1988
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-2.f90
@@ -0,0 +1,29 @@
+! Test of gang-private variables declared on loop directive, with broadcasting
+! to partitioned workers.
+
+! { dg-do run }
+! { dg-additional-options "-ftree-parallelize-loops=32" }
+
+program main
+  integer :: x, i, j, arr(0:32*32)
+
+  do i = 0, 32*32 -1
+     arr(i) = i
+  end do
+
+  !$acc kernels copy(arr)
+  !$acc loop gang(num:32) private(x)
+  do i = 0, 31
+     x = i * 2;
+
+     !$acc loop worker(num:32)
+     do j = 0, 31
+        arr(i * 32 + j) = arr(i * 32 + j) + x;
+     end do
+  end do
+  !$acc end kernels
+
+  do i = 0, 32 * 32 - 1
+     if (arr(i) .ne. i + (i / 32) * 2) call abort
+  end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-3.f90
new file mode 100644
index 0000000..e5806ee
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-3.f90
@@ -0,0 +1,29 @@
+! Test of gang-private variables declared on loop directive, with broadcasting
+! to partitioned vectors.
+
+! { dg-do run }
+! { dg-additional-options "-ftree-parallelize-loops=32" }
+
+program main
+  integer :: x, i, j, arr(0:32*32)
+
+  do i = 0, 32*32-1
+     arr(i) = i
+  end do
+
+  !$acc kernels copy(arr)
+  !$acc loop gang(num:32) private(x)
+  do i = 0, 31
+     x = i * 2;
+
+     !$acc loop vector(length:32)
+     do j = 0, 31
+        arr(i * 32 + j) = arr(i * 32 + j) + x;
+     end do
+  end do
+  !$acc end kernels
+
+  do i = 0, 32 * 32 - 1
+     if (arr(i) .ne. i + (i / 32) * 2) call abort
+  end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-6.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-6.f90
new file mode 100644
index 0000000..7d19bba
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-gang-6.f90
@@ -0,0 +1,37 @@
+! Test of gang-private addressable variable declared on loop directive, with
+! broadcasting to partitioned workers.
+
+! { dg-do run }
+! { dg-additional-options "-ftree-parallelize-loops=32" }
+
+program main
+  type vec3
+     integer x, y, z, attr(13)
+  end type vec3
+
+  integer x, i, j, arr(0:32*32)
+  type(vec3) pt
+  
+  do i = 0, 32*32-1
+     arr(i) = i
+  end do
+
+  !$acc kernels copy(arr)
+  !$acc loop gang(num:32) private(pt)
+  do i = 0, 31
+     pt%x = i
+     pt%y = i * 2
+     pt%z = i * 4
+     pt%attr(5) = i * 6
+
+     !$acc loop vector(length:32)
+     do j = 0, 31
+        arr(i * 32 + j) = arr(i * 32 + j) + pt%x + pt%y + pt%z + pt%attr(5);
+     end do
+  end do
+  !$acc end kernels
+
+  do i = 0, 32 * 32 - 1
+     if (arr(i) .ne. i + (i / 32) * 13) call abort
+  end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-1.f90
new file mode 100644
index 0000000..379bb3a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-1.f90
@@ -0,0 +1,42 @@
+! Test of vector-private variables declared on loop directive.
+
+! { dg-do run }
+! { dg-additional-options "-ftree-parallelize-loops=32" }
+
+program main
+  integer :: x, i, j, k, idx, arr(0:32*32*32)
+
+  do i = 0, 32*32*32-1
+     arr(i) = i
+  end do
+
+  !$acc kernels copy(arr)
+  !$acc loop gang(num:32)
+  do i = 0, 31
+     !$acc loop worker(num:8)
+     do j = 0, 31
+        !$acc loop vector(length:32) private(x)
+        do k = 0, 31
+           x = ieor(i, j * 3)
+           arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+        end do
+        !$acc loop vector(length:32) private(x)
+        do k = 0, 31
+           x = ior(i, j * 5)
+           arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+        end do
+     end do
+  end do
+  !$acc end kernels
+
+  do i = 0, 32 - 1
+     do j = 0, 32 -1
+        do k = 0, 32 - 1
+           idx = i * 1024 + j * 32 + k
+           if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
+              call abort
+           end if
+        end do
+     end do
+  end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-2.f90
new file mode 100644
index 0000000..8873efe
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-vector-2.f90
@@ -0,0 +1,39 @@
+! Test of vector-private variables declared on loop directive. Array type.
+
+! { dg-do run }
+! { dg-additional-options "-ftree-parallelize-loops=32" }
+
+program main
+  integer :: i, j, k, idx, arr(0:32*32*32), pt(2)
+
+  do i = 0, 32*32*32-1
+     arr(i) = i
+  end do
+
+  !$acc kernels copy(arr)
+  !$acc loop gang(num:32)
+  do i = 0, 31
+     !$acc loop worker(num:8)
+     do j = 0, 31
+        !$acc loop vector(length:32) private(x, pt)
+        do k = 0, 31
+           pt(1) = ieor(i, j * 3)
+           pt(2) = ior(i, j * 5)
+           arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt(1) * k
+           arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt(2) * k
+        end do
+     end do
+  end do
+  !$acc end kernels
+
+  do i = 0, 32 - 1
+     do j = 0, 32 -1
+        do k = 0, 32 - 1
+           idx = i * 1024 + j * 32 + k
+           if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
+              call abort
+           end if
+        end do
+     end do
+  end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-1.f90
new file mode 100644
index 0000000..f513ec2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-1.f90
@@ -0,0 +1,28 @@
+! Test of worker-private variables declared on a loop directive.
+
+! { dg-do run }
+! { dg-additional-options "-ftree-parallelize-loops=32" }
+
+program main
+  integer :: x, i, j, arr(0:32*32)
+  common x
+
+  do i = 0, 32*32-1
+     arr(i) = i
+  end do
+
+  !$acc kernels copy(arr)
+  !$acc loop gang(num:32) private(x)
+  do i = 0, 31
+     !$acc loop worker(num:8) private(x)
+     do j = 0, 31
+        x = ieor(i, j * 3)
+        arr(i * 32 + j) = arr(i * 32 + j) + x
+     end do
+  end do
+  !$acc end kernels
+
+  do i = 0, 32 * 32 - 1
+     if (arr(i) .ne. i + ieor(i / 32, mod(i, 32) * 3)) call abort
+  end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-2.f90
new file mode 100644
index 0000000..e7652d9
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-2.f90
@@ -0,0 +1,37 @@
+! Test of worker-private variables declared on a loop directive, broadcasting
+! to vector-partitioned mode.
+
+! { dg-do run }
+! { dg-additional-options "-ftree-parallelize-loops=32" }
+
+program main
+  integer :: x, i, j, k, idx, arr(0:32*32*32)
+
+  do i = 0, 32*32*32-1
+     arr(i) = i
+  end do
+
+  !$acc kernels copy(arr)
+  !$acc loop gang(num:32)
+  do i = 0, 31
+     !$acc loop worker(num:8) private(x)
+     do j = 0, 31
+        x = ieor(i, j * 3)
+
+        !$acc loop vector(length:32)
+        do k = 0, 31
+           arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+        end do
+     end do
+  end do
+  !$acc end kernels
+
+  do i = 0, 32 - 1
+     do j = 0, 32 -1
+        do k = 0, 32 - 1
+           idx = i * 1024 + j * 32 + k
+           if (arr(idx) .ne. idx + ieor(i, j * 3) * k) call abort
+        end do
+     end do
+  end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-3.f90
new file mode 100644
index 0000000..c82ced7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-3.f90
@@ -0,0 +1,49 @@
+! Test of worker-private variables declared on a loop directive, broadcasting
+! to vector-partitioned mode.  Back-to-back worker loops.
+
+! { dg-do run }
+! { dg-additional-options "-ftree-parallelize-loops=32" }
+
+program main
+  integer :: x, i, j, k, idx, arr(0:32*32*32)
+
+  do i = 0, 32*32*32-1
+     arr(i) = i
+  end do
+
+  !$acc kernels copy(arr)
+  !$acc loop gang(num:32)
+  do i = 0, 31
+     !$acc loop worker(num:8) private(x)
+     do j = 0, 31
+        x = ieor(i, j * 3)
+
+        !$acc loop vector(length:32)
+        do k = 0, 31
+           arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+        end do
+     end do
+
+     !$acc loop worker(num:8) private(x)
+     do j = 0, 31
+        x = ior(i, j * 5)
+
+        !$acc loop vector(length:32)
+        do k = 0, 31
+           arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+        end do
+     end do
+  end do
+  !$acc end kernels
+
+  do i = 0, 32 - 1
+     do j = 0, 32 -1
+        do k = 0, 32 - 1
+           idx = i * 1024 + j * 32 + k
+           if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
+              call abort
+           end if
+        end do
+     end do
+  end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-4.f90
new file mode 100644
index 0000000..e30de70
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-4.f90
@@ -0,0 +1,46 @@
+! Test of worker-private variables declared on a loop directive, broadcasting
+! to vector-partitioned mode.  Successive vector loops.  */
+
+! { dg-do run }
+! { dg-additional-options "-ftree-parallelize-loops=32" }
+
+program main
+  integer :: x, i, j, k, idx, arr(0:32*32*32)
+
+  do i = 0, 32*32*32-1
+     arr(i) = i
+  end do
+
+  !$acc kernels copy(arr)
+  !$acc loop gang(num:32)
+  do i = 0, 31
+     !$acc loop worker(num:8) private(x)
+     do j = 0, 31
+        x = ieor(i, j * 3)
+
+        !$acc loop vector(length:32)
+        do k = 0, 31
+           arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+        end do
+
+        x = ior(i, j * 5)
+
+        !$acc loop vector(length:32)
+        do k = 0, 31
+           arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+        end do
+     end do
+  end do
+  !$acc end kernels
+
+  do i = 0, 32 - 1
+     do j = 0, 32 -1
+        do k = 0, 32 - 1
+           idx = i * 1024 + j * 32 + k
+           if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
+              call abort
+           end if
+        end do
+     end do
+  end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-5.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-5.f90
new file mode 100644
index 0000000..20f8579
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-5.f90
@@ -0,0 +1,49 @@
+! Test of worker-private variables declared on a loop directive, broadcasting
+! to vector-partitioned mode.  Addressable worker variable.
+
+! { dg-do run }
+! { dg-additional-options "-ftree-parallelize-loops=32" }
+
+program main
+  integer :: i, j, k, idx, arr(0:32*32*32)
+  integer, target :: x
+  integer, pointer :: p
+
+  do i = 0, 32*32*32-1
+     arr(i) = i
+  end do
+
+  !$acc kernels copy(arr)
+  !$acc loop gang(num:32)
+  do i = 0, 31
+     !$acc loop worker(num:8) private(x, p)
+     do j = 0, 31
+        p => x
+        x = ieor(i, j * 3)
+
+        !$acc loop vector(length:32)
+        do k = 0, 31
+           arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+        end do
+
+        p = ior(i, j * 5)
+
+        !$acc loop vector(length:32)
+        do k = 0, 31
+           arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+        end do
+     end do
+  end do
+  !$acc end kernels
+
+  do i = 0, 32 - 1
+     do j = 0, 32 -1
+        do k = 0, 32 - 1
+           idx = i * 1024 + j * 32 + k
+           if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
+              call abort
+           end if
+        end do
+     end do
+  end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-6.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-6.f90
new file mode 100644
index 0000000..48c3bfd
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-6.f90
@@ -0,0 +1,50 @@
+! Test of worker-private variables declared on a loop directive, broadcasting
+! to vector-partitioned mode.  Aggregate worker variable.
+
+! { dg-do run }
+! { dg-additional-options "-ftree-parallelize-loops=32" }
+
+program main
+  type vec2
+     integer x, y
+  end type vec2
+  
+  integer :: i, j, k, idx, arr(0:32*32*32)
+  type(vec2) :: pt
+  
+  do i = 0, 32*32*32-1
+     arr(i) = i
+  end do
+
+  !$acc kernels copy(arr)
+  !$acc loop gang(num:32)
+  do i = 0, 31
+     !$acc loop worker(num:8) private(pt)
+     do j = 0, 31
+        pt%x = ieor(i, j * 3)
+        pt%y = ior(i, j * 5)
+        
+        !$acc loop vector(length:32)
+        do k = 0, 31
+           arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt%x * k
+        end do
+
+        !$acc loop vector(length:32)
+        do k = 0, 31
+           arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt%y * k
+        end do
+     end do
+  end do
+  !$acc end kernels
+
+  do i = 0, 32 - 1
+     do j = 0, 32 -1
+        do k = 0, 32 - 1
+           idx = i * 1024 + j * 32 + k
+           if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
+              call abort
+           end if
+        end do
+     end do
+  end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-7.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-7.f90
new file mode 100644
index 0000000..ca63796
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-private-vars-loop-worker-7.f90
@@ -0,0 +1,45 @@
+! Test of worker-private variables declared on loop directive, broadcasting
+! to vector-partitioned mode.  Array worker variable.
+
+! { dg-do run }
+! { dg-additional-options "-ftree-parallelize-loops=32" }
+
+program main
+  integer :: i, j, k, idx, arr(0:32*32*32), pt(2)
+
+  do i = 0, 32*32*32-1
+     arr(i) = i
+  end do
+
+  !$acc kernels copy(arr)
+  !$acc loop gang(num:32)
+  do i = 0, 31
+     !$acc loop worker(num:8) private(pt)
+     do j = 0, 31
+        pt(1) = ieor(i, j * 3)
+        pt(2) = ior(i, j * 5)
+        
+        !$acc loop vector(length:32)
+        do k = 0, 31
+           arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt(1) * k
+        end do
+
+        !$acc loop vector(length:32)
+        do k = 0, 31
+           arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt(2) * k
+        end do
+     end do
+  end do
+  !$acc end kernels
+
+  do i = 0, 32 - 1
+     do j = 0, 32 -1
+        do k = 0, 32 - 1
+           idx = i * 1024 + j * 32 + k
+           if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
+              call abort
+           end if
+        end do
+     end do
+  end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/kernels-reduction-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/kernels-reduction-1.f90
new file mode 100644
index 0000000..e894b6d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/kernels-reduction-1.f90
@@ -0,0 +1,20 @@
+! Test a simple acc loop reduction inside a kernels region. 
+
+! { dg-do run }
+! { dg-additional-options "-ftree-parallelize-loops=32" }
+
+program reduction
+  integer, parameter     :: n = 20
+  integer                :: i, red
+
+  red = 0
+
+  !$acc kernels
+  !$acc loop reduction (+:red)
+  do i = 1, n
+     red = red + 1
+  end do
+  !$acc end kernels
+
+  if (red .ne. n) call abort
+end program reduction
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/parallel-loop-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/parallel-loop-1.f90
new file mode 100644
index 0000000..4c86ada
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/parallel-loop-1.f90
@@ -0,0 +1,75 @@
+! Exercise the auto, independent, seq and tile loop clauses inside
+! parallel regions. 
+
+program loops
+  integer, parameter     :: n = 20, c = 10
+  integer                :: i, a(n), b(n)
+
+  a(:) = 0
+  b(:) = 0
+
+  ! COPY
+
+  !$acc parallel copy (a)
+  !$acc loop auto
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  do i = 1, n
+     b(i) = i
+  end do
+
+  call check (a, b, n)
+
+  ! COPYOUT
+
+  a(:) = 0
+
+  !$acc parallel copyout (a)
+  !$acc loop independent
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  do i = 1, n
+     if (a(i) .ne. b(i)) call abort
+  end do
+  call check (a, b, n)
+
+  ! COPYIN
+
+  a(:) = 0
+
+  !$acc parallel copyout (a) copyin (b)
+  !$acc loop seq
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  call check (a, b, n)
+
+  ! PRESENT_OR_COPY
+
+  !$acc parallel pcopy (a)
+  !$acc loop tile (*)
+  do i = 1, n
+     a(i) = i
+  end do
+  !$acc end parallel
+
+  call check (a, b, n)
+
+end program loops
+
+subroutine check (a, b, n)
+  integer :: n, a(n), b(n)
+  integer :: i
+
+  do i = 1, n
+     if (a(i) .ne. b(i)) call abort
+  end do
+end subroutine check
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-gang-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-gang-1.f90
new file mode 100644
index 0000000..b8dbfb8
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-gang-1.f90
@@ -0,0 +1,21 @@
+! Test of gang-private variables declared on loop directive.
+
+program main
+  integer :: x, i, arr(32)
+
+  do i = 1, 32
+     arr(i) = i
+  end do
+
+  !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+  !$acc loop gang private(x)
+  do i = 1, 32
+     x = i * 2;
+     arr(i) = arr(i) + x
+  end do
+  !$acc end parallel
+
+  do i = 1, 32
+     if (arr(i) .ne. i * 3) call abort
+  end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-gang-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-gang-2.f90
new file mode 100644
index 0000000..d6bd147
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-gang-2.f90
@@ -0,0 +1,26 @@
+! Test of gang-private variables declared on loop directive, with broadcasting
+! to partitioned workers.
+
+program main
+  integer :: x, i, j, arr(0:32*32)
+
+  do i = 0, 32*32-1
+     arr(i) = i
+  end do
+
+  !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+  !$acc loop gang private(x)
+  do i = 0, 31
+     x = i * 2;
+
+     !$acc loop worker
+     do j = 0, 31
+        arr(i * 32 + j) = arr(i * 32 + j) + x
+     end do
+  end do
+  !$acc end parallel
+
+  do i = 0, 32 * 32 - 1
+     if (arr(i) .ne. i + (i / 32) * 2) call abort
+  end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-gang-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-gang-3.f90
new file mode 100644
index 0000000..38d5a2d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-gang-3.f90
@@ -0,0 +1,26 @@
+! Test of gang-private variables declared on loop directive, with broadcasting
+! to partitioned vectors.
+
+program main
+  integer :: x, i, j, arr(0:32*32)
+
+  do i = 0, 32*32-1
+     arr(i) = i
+  end do
+
+  !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+  !$acc loop gang private(x)
+  do i = 0, 31
+     x = i * 2;
+
+     !$acc loop vector
+     do j = 0, 31
+        arr(i * 32 + j) = arr(i * 32 + j) + x
+     end do
+  end do
+  !$acc end parallel
+
+  do i = 0, 32 * 32 - 1
+     if (arr(i) .ne. i + (i / 32) * 2) call abort
+  end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-gang-6.f90 b/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-gang-6.f90
new file mode 100644
index 0000000..b43844b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-gang-6.f90
@@ -0,0 +1,34 @@
+! Test of gang-private addressable variable declared on loop directive, with
+! broadcasting to partitioned workers.
+
+program main
+  type vec3
+     integer x, y, z, attr(13)
+  end type vec3
+
+  integer x, i, j, arr(0:32*32)
+  type(vec3) pt
+  
+  do i = 0, 32*32-1
+     arr(i) = i
+  end do
+
+  !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+  !$acc loop gang private(pt)
+  do i = 0, 31
+     pt%x = i
+     pt%y = i * 2
+     pt%z = i * 4
+     pt%attr(5) = i * 6
+
+     !$acc loop vector
+     do j = 0, 31
+        arr(i * 32 + j) = arr(i * 32 + j) + pt%x + pt%y + pt%z + pt%attr(5);
+     end do
+  end do
+  !$acc end parallel
+
+  do i = 0, 32 * 32 - 1
+     if (arr(i) .ne. i + (i / 32) * 13) call abort
+  end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-vector-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-vector-1.f90
new file mode 100644
index 0000000..7fa900a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-vector-1.f90
@@ -0,0 +1,39 @@
+! Test of vector-private variables declared on loop directive.
+
+program main
+  integer :: x, i, j, k, idx, arr(0:32*32*32)
+
+  do i = 0, 32*32*32-1
+     arr(i) = i
+  end do
+
+  !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+  !$acc loop gang
+  do i = 0, 31
+     !$acc loop worker
+     do j = 0, 31
+        !$acc loop vector private(x)
+        do k = 0, 31
+           x = ieor(i, j * 3)
+           arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+        end do
+        !$acc loop vector private(x)
+        do k = 0, 31
+           x = ior(i, j * 5)
+           arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+        end do
+     end do
+  end do
+  !$acc end parallel
+
+  do i = 0, 32 - 1
+     do j = 0, 32 -1
+        do k = 0, 32 - 1
+           idx = i * 1024 + j * 32 + k
+           if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
+              call abort
+           end if
+        end do
+     end do
+  end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-vector-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-vector-2.f90
new file mode 100644
index 0000000..5456c38
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-vector-2.f90
@@ -0,0 +1,36 @@
+! Test of vector-private variables declared on loop directive. Array type.
+
+program main
+  integer :: i, j, k, idx, arr(0:32*32*32), pt(2)
+
+  do i = 0, 32*32*32-1
+     arr(i) = i
+  end do
+
+  !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+  !$acc loop gang
+  do i = 0, 31
+     !$acc loop worker
+     do j = 0, 31
+        !$acc loop vector private(x, pt)
+        do k = 0, 31
+           pt(1) = ieor(i, j * 3)
+           pt(2) = ior(i, j * 5)
+           arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt(1) * k
+           arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt(2) * k
+        end do
+     end do
+  end do
+  !$acc end parallel
+
+  do i = 0, 32 - 1
+     do j = 0, 32 -1
+        do k = 0, 32 - 1
+           idx = i * 1024 + j * 32 + k
+           if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
+              call abort
+           end if
+        end do
+     end do
+  end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-worker-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-worker-1.f90
new file mode 100644
index 0000000..297a6c2
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-worker-1.f90
@@ -0,0 +1,25 @@
+! Test of worker-private variables declared on a loop directive.
+
+program main
+  integer :: x, i, j, arr(0:32*32)
+  common x
+
+  do i = 0, 32*32-1
+     arr(i) = i
+  end do
+
+  !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+  !$acc loop gang private(x)
+  do i = 0, 31
+     !$acc loop worker private(x)
+     do j = 0, 31
+        x = ieor(i, j * 3)
+        arr(i * 32 + j) = arr(i * 32 + j) + x
+     end do
+  end do
+  !$acc end parallel
+
+  do i = 0, 32 * 32 - 1
+     if (arr(i) .ne. i + ieor(i / 32, mod(i, 32) * 3)) call abort
+  end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-worker-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-worker-2.f90
new file mode 100644
index 0000000..725f175
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-worker-2.f90
@@ -0,0 +1,34 @@
+! Test of worker-private variables declared on a loop directive, broadcasting
+! to vector-partitioned mode.
+
+program main
+  integer :: x, i, j, k, idx, arr(0:32*32*32)
+
+  do i = 0, 32*32*32-1
+     arr(i) = i
+  end do
+
+  !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+  !$acc loop gang
+  do i = 0, 31
+     !$acc loop worker private(x)
+     do j = 0, 31
+        x = ieor(i, j * 3)
+
+        !$acc loop vector
+        do k = 0, 31
+           arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+        end do
+     end do
+  end do
+  !$acc end parallel
+
+  do i = 0, 32 - 1
+     do j = 0, 32 -1
+        do k = 0, 32 - 1
+           idx = i * 1024 + j * 32 + k
+           if (arr(idx) .ne. idx + ieor(i, j * 3) * k) call abort
+        end do
+     end do
+  end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-worker-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-worker-3.f90
new file mode 100644
index 0000000..29239ec
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-worker-3.f90
@@ -0,0 +1,46 @@
+! Test of worker-private variables declared on a loop directive, broadcasting
+! to vector-partitioned mode.  Back-to-back worker loops.
+
+program main
+  integer :: x, i, j, k, idx, arr(0:32*32*32)
+
+  do i = 0, 32*32*32-1
+     arr(i) = i
+  end do
+
+  !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+  !$acc loop gang
+  do i = 0, 31
+     !$acc loop worker private(x)
+     do j = 0, 31
+        x = ieor(i, j * 3)
+
+        !$acc loop vector
+        do k = 0, 31
+           arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+        end do
+     end do
+
+     !$acc loop worker private(x)
+     do j = 0, 31
+        x = ior(i, j * 5)
+
+        !$acc loop vector
+        do k = 0, 31
+           arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+        end do
+     end do
+  end do
+  !$acc end parallel
+
+  do i = 0, 32 - 1
+     do j = 0, 32 -1
+        do k = 0, 32 - 1
+           idx = i * 1024 + j * 32 + k
+           if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
+              call abort
+           end if
+        end do
+     end do
+  end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-worker-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-worker-4.f90
new file mode 100644
index 0000000..9f621ef
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-worker-4.f90
@@ -0,0 +1,43 @@
+! Test of worker-private variables declared on a loop directive, broadcasting
+! to vector-partitioned mode.  Successive vector loops.  */
+
+program main
+  integer :: x, i, j, k, idx, arr(0:32*32*32)
+
+  do i = 0, 32*32*32-1
+     arr(i) = i
+  end do
+
+  !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+  !$acc loop gang
+  do i = 0, 31
+     !$acc loop worker private(x)
+     do j = 0, 31
+        x = ieor(i, j * 3)
+
+        !$acc loop vector
+        do k = 0, 31
+           arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+        end do
+
+        x = ior(i, j * 5)
+
+        !$acc loop vector
+        do k = 0, 31
+           arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+        end do
+     end do
+  end do
+  !$acc end parallel
+
+  do i = 0, 32 - 1
+     do j = 0, 32 -1
+        do k = 0, 32 - 1
+           idx = i * 1024 + j * 32 + k
+           if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
+              call abort
+           end if
+        end do
+     end do
+  end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-worker-5.f90 b/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-worker-5.f90
new file mode 100644
index 0000000..fa65f5e
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-worker-5.f90
@@ -0,0 +1,46 @@
+! Test of worker-private variables declared on a loop directive, broadcasting
+! to vector-partitioned mode.  Addressable worker variable.
+
+program main
+  integer :: i, j, k, idx, arr(0:32*32*32)
+  integer, target :: x
+  integer, pointer :: p
+
+  do i = 0, 32*32*32-1
+     arr(i) = i
+  end do
+
+  !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+  !$acc loop gang
+  do i = 0, 31
+     !$acc loop worker private(x, p)
+     do j = 0, 31
+        p => x
+        x = ieor(i, j * 3)
+
+        !$acc loop vector
+        do k = 0, 31
+           arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+        end do
+
+        p = ior(i, j * 5)
+
+        !$acc loop vector
+        do k = 0, 31
+           arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + x * k
+        end do
+     end do
+  end do
+  !$acc end parallel
+
+  do i = 0, 32 - 1
+     do j = 0, 32 -1
+        do k = 0, 32 - 1
+           idx = i * 1024 + j * 32 + k
+           if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
+              call abort
+           end if
+        end do
+     end do
+  end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-worker-6.f90 b/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-worker-6.f90
new file mode 100644
index 0000000..45bc414
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-worker-6.f90
@@ -0,0 +1,47 @@
+! Test of worker-private variables declared on a loop directive, broadcasting
+! to vector-partitioned mode.  Aggregate worker variable.
+
+program main
+  type vec2
+     integer x, y
+  end type vec2
+  
+  integer :: i, j, k, idx, arr(0:32*32*32)
+  type(vec2) :: pt
+  
+  do i = 0, 32*32*32-1
+     arr(i) = i
+  end do
+
+  !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+  !$acc loop gang
+  do i = 0, 31
+     !$acc loop worker private(pt)
+     do j = 0, 31
+        pt%x = ieor(i, j * 3)
+        pt%y = ior(i, j * 5)
+        
+        !$acc loop vector
+        do k = 0, 31
+           arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt%x * k
+        end do
+
+        !$acc loop vector
+        do k = 0, 31
+           arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt%y * k
+        end do
+     end do
+  end do
+  !$acc end parallel
+
+  do i = 0, 32 - 1
+     do j = 0, 32 -1
+        do k = 0, 32 - 1
+           idx = i * 1024 + j * 32 + k
+           if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
+              call abort
+           end if
+        end do
+     end do
+  end do
+end program main
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-worker-7.f90 b/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-worker-7.f90
new file mode 100644
index 0000000..a046e77
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/private-vars-loop-worker-7.f90
@@ -0,0 +1,42 @@
+! Test of worker-private variables declared on loop directive, broadcasting
+! to vector-partitioned mode.  Array worker variable.
+
+program main
+  integer :: i, j, k, idx, arr(0:32*32*32), pt(2)
+
+  do i = 0, 32*32*32-1
+     arr(i) = i
+  end do
+
+  !$acc parallel copy(arr) num_gangs(32) num_workers(8) vector_length(32)
+  !$acc loop gang
+  do i = 0, 31
+     !$acc loop worker private(pt)
+     do j = 0, 31
+        pt(1) = ieor(i, j * 3)
+        pt(2) = ior(i, j * 5)
+        
+        !$acc loop vector
+        do k = 0, 31
+           arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt(1) * k
+        end do
+
+        !$acc loop vector
+        do k = 0, 31
+           arr(i * 1024 + j * 32 + k) = arr(i * 1024 + j * 32 + k) + pt(2) * k
+        end do
+     end do
+  end do
+  !$acc end parallel
+
+  do i = 0, 32 - 1
+     do j = 0, 32 -1
+        do k = 0, 32 - 1
+           idx = i * 1024 + j * 32 + k
+           if (arr(idx) .ne. idx + ieor(i, j * 3) * k + ior(i, j * 5) * k) then
+              call abort
+           end if
+        end do
+     end do
+  end do
+end program main

Reply via email to