Allow vector_length clauses to accept values larger than warp size. Note that this does not enable setting vector_length to values larger than warp size using -fopenacc-dim.
2018-12-17 Tom de Vries <tdevr...@suse.de> * config/nvptx/nvptx.c (nvptx_goacc_validate_dims): Take larger vector lengths into account. * testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c: Expect vector length to be 128. * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Expect vector length 2097152 to be reduced to 1024 instead of 32. --- gcc/config/nvptx/nvptx.c | 2 +- libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c | 4 ++-- libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c | 5 ++--- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index 1d9704543d9..8d2740cd50f 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -96,7 +96,7 @@ #define PTX_NUM_PER_WORKER_BARRIERS (PTX_CTA_NUM_BARRIERS - PTX_NUM_PER_CTA_BARRIERS) #define PTX_DEFAULT_VECTOR_LENGTH PTX_WARP_SIZE -#define PTX_MAX_VECTOR_LENGTH PTX_WARP_SIZE +#define PTX_MAX_VECTOR_LENGTH PTX_CTA_SIZE #define PTX_WORKER_LENGTH 32 #define PTX_DEFAULT_RUNTIME_DIM 0 /* Defer to runtime. */ diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c index 4a9854662cc..d7cd0461b53 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c @@ -350,7 +350,7 @@ int main () int gangs_min, gangs_max, workers_min, workers_max, vectors_min, vectors_max; gangs_min = workers_min = vectors_min = INT_MAX; gangs_max = workers_max = vectors_max = INT_MIN; -#pragma acc parallel copy (vectors_actual) /* { dg-warning "using vector_length \\(32\\), ignoring 2097152" "" { target openacc_nvidia_accel_configured } } */ \ +#pragma acc parallel copy (vectors_actual) /* { dg-warning "using vector_length \\(1024\\), ignoring 2097152" "" { target openacc_nvidia_accel_configured } } */ \ vector_length (VECTORS) { if (acc_on_device (acc_device_host)) @@ -361,7 +361,7 @@ int main () else if (acc_on_device (acc_device_nvidia)) { /* The GCC nvptx back end enforces vector_length (32). */ - vectors_actual = 32; + vectors_actual = 1024; } else __builtin_abort (); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c index fab5b0d25d1..18d77cc5ecb 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/vector-length-128-1.c @@ -33,7 +33,6 @@ main (void) return 0; } -/* { dg-prune-output "using vector_length \\(32\\), ignoring 128" } */ -/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 32\\)" "oaccdevlow" } } */ -/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=32" } */ +/* { dg-final { scan-offload-tree-dump "__attribute__\\(\\(oacc function \\(1, 1, 128\\)" "oaccdevlow" } } */ +/* { dg-output "nvptx_exec: kernel main\\\$_omp_fn\\\$0: launch gangs=1, workers=1, vectors=128" } */ -- 2.16.4