Allow vector_length clauses to accept values larger than warp size.  Note that
this does not enable setting vector_length to values larger than warp size using
-fopenacc-dim.

2018-12-17  Tom de Vries  <tdevr...@suse.de>

        * config/nvptx/nvptx.c (nvptx_goacc_validate_dims): Take larger vector
        lengths into account.

        * testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c: Expect
        vector length to be 128.
        * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Expect vector
        length 2097152 to be reduced to 1024 instead of 32.
---
 gcc/config/nvptx/nvptx.c                                          | 2 +-
 libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c       | 4 ++--
 libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c | 5 ++---
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 1d9704543d9..8d2740cd50f 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -96,7 +96,7 @@
 #define PTX_NUM_PER_WORKER_BARRIERS (PTX_CTA_NUM_BARRIERS - 
PTX_NUM_PER_CTA_BARRIERS)
 
 #define PTX_DEFAULT_VECTOR_LENGTH PTX_WARP_SIZE
-#define PTX_MAX_VECTOR_LENGTH PTX_WARP_SIZE
+#define PTX_MAX_VECTOR_LENGTH PTX_CTA_SIZE
 #define PTX_WORKER_LENGTH 32
 #define PTX_DEFAULT_RUNTIME_DIM 0 /* Defer to runtime.  */
 
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c 
b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
index 4a9854662cc..d7cd0461b53 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
@@ -350,7 +350,7 @@ int main ()
     int gangs_min, gangs_max, workers_min, workers_max, vectors_min, 
vectors_max;
     gangs_min = workers_min = vectors_min = INT_MAX;
     gangs_max = workers_max = vectors_max = INT_MIN;
-#pragma acc parallel copy (vectors_actual) /* { dg-warning "using 
vector_length \\(32\\), ignoring 2097152" "" { target 
openacc_nvidia_accel_configured } } */ \
+#pragma acc parallel copy (vectors_actual) /* { dg-warning "using 
vector_length \\(1024\\), ignoring 2097152" "" { target 
openacc_nvidia_accel_configured } } */ \
   vector_length (VECTORS)
     {
       if (acc_on_device (acc_device_host))
@@ -361,7 +361,7 @@ int main ()
       else if (acc_on_device (acc_device_nvidia))
        {
          /* The GCC nvptx back end enforces vector_length (32).  */
-         vectors_actual = 32;
+         vectors_actual = 1024;
        }
       else
        __builtin_abort ();
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c 
b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c
index fab5b0d25d1..18d77cc5ecb 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c
@@ -33,7 +33,6 @@ main (void)
 
   return 0;
 }
-/* { dg-prune-output "using vector_length \\(32\\), ignoring 128" } */
 
-/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 
1, 32\\)" "oaccdevlow" } } */
-/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, 
workers=1, vectors=32" } */
+/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 
1, 128\\)" "oaccdevlow" } } */
+/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, 
workers=1, vectors=128" } */
-- 
2.16.4

Reply via email to