I've committed this patch which contains all of the remaining goacc_parlevel bug fixes present in trunk to og8.
The goal of the goacc parlevel changes is replace the use of inline ptx code with builtin functions so that the certain OpenACC execution tests that exercise the execution model can be target independent. For the most part, these patches applied cleanly to og8, however, as I noted in PR86757, there were a couple of og8-specific regressions involving tests that started to fail when built -O0. I believe that problem is caused by the ganglocal memory changes. Chung-Lin, we'll need to fix PR86757 before we push the gangprivate changes upstream. Julian, I'm not sure if the GCN port supports gangprivate memory. If it does, you might be hit by this failure at -O0. But those tests have already been xfailed, so you should be OK. Cesar
[og8] More goacc_parlevel enhancements 2018-07-31 Cesar Philippidis <ce...@codesourcery.com> libgomp/ * testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c: Adjust test. * testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-w-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/routine-g-1.c: Likewise. * testsuite/libgomp.oacc-c-c++-common/routine-w-1.c: Likewise. Backport from mainline: 2018-05-02 Tom de Vries <t...@codesourcery.com> PR libgomp/85411 libgomp/ * plugin/plugin-nvptx.c (nvptx_exec): Move parsing of GOMP_OPENACC_DIM ... * env.c (parse_gomp_openacc_dim): ... here. New function. (initialize_env): Call parse_gomp_openacc_dim. (goacc_default_dims): Define. * libgomp.h (goacc_default_dims): Declare. * oacc-plugin.c (GOMP_PLUGIN_acc_default_dim): New function. * oacc-plugin.h (GOMP_PLUGIN_acc_default_dim): Declare. * libgomp.map: New version "GOMP_PLUGIN_1.2". Add GOMP_PLUGIN_acc_default_dim. * testsuite/libgomp.oacc-c-c++-common/loop-default-runtime.c: New test. * testsuite/libgomp.oacc-c-c++-common/loop-default.h: New test. 2018-05-04 Tom de Vries <t...@codesourcery.com> PR libgomp/85639 gcc/ * builtins.c (expand_builtin_goacc_parlevel_id_size): Handle null target if ignore == 0. 2018-05-07 Tom de Vries <t...@codesourcery.com> PR testsuite/85677 libgomp/ * testsuite/lib/libgomp.exp (libgomp_init): Move inclusion of top-level include directory in ALWAYS_CFLAGS out of $blddir != "" condition. [openacc] Move GOMP_OPENACC_DIM parsing out of nvptx plugin git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@259852 138bc75d-0d04-0410-961f-82ee72b054a4 [expand] Handle null target in expand_builtin_goacc_parlevel_id_size git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@259927 138bc75d-0d04-0410-961f-82ee72b054a4 [openacc, testsuite] Allow installed testing of libgomp to find gomp-constants.h git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@259992 138bc75d-0d04-0410-961f-82ee72b054a4 diff --git a/gcc/builtins.c b/gcc/builtins.c index 300e13c..0097d5b 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -6682,6 +6682,9 @@ expand_builtin_goacc_parlevel_id_size (tree exp, rtx target, int ignore) if (ignore) return target; + if (target == NULL_RTX) + target = gen_reg_rtx (TYPE_MODE (TREE_TYPE (exp))); + if (!targetm.have_oacc_dim_size ()) { emit_move_insn (target, fallback_retval); diff --git a/libgomp/env.c b/libgomp/env.c index c99ba85..fab35b7 100644 --- a/libgomp/env.c +++ b/libgomp/env.c @@ -90,6 +90,7 @@ int gomp_debug_var; unsigned int gomp_num_teams_var; char *goacc_device_type; int goacc_device_num; +int goacc_default_dims[GOMP_DIM_MAX]; #ifndef LIBGOMP_OFFLOADED_ONLY @@ -1066,6 +1067,36 @@ parse_acc_device_type (void) } static void +parse_gomp_openacc_dim (void) +{ + /* The syntax is the same as for the -fopenacc-dim compilation option. */ + const char *var_name = "GOMP_OPENACC_DIM"; + const char *env_var = getenv (var_name); + if (!env_var) + return; + + const char *pos = env_var; + int i; + for (i = 0; *pos && i != GOMP_DIM_MAX; i++) + { + if (i && *pos++ != ':') + break; + + if (*pos == ':') + continue; + + const char *eptr; + errno = 0; + long val = strtol (pos, (char **)&eptr, 10); + if (errno || val < 0 || (unsigned)val != val) + break; + + goacc_default_dims[i] = (int)val; + pos = eptr; + } +} + +static void handle_omp_display_env (unsigned long stacksize, int wait_policy) { const char *env; @@ -1336,6 +1367,7 @@ initialize_env (void) goacc_device_num = 0; parse_acc_device_type (); + parse_gomp_openacc_dim (); goacc_runtime_initialize (); diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index a9aca74..607f4c2 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -44,6 +44,7 @@ #include "config.h" #include "gstdint.h" #include "libgomp-plugin.h" +#include "gomp-constants.h" #ifdef HAVE_PTHREAD_H #include <pthread.h> @@ -367,6 +368,7 @@ extern unsigned int gomp_num_teams_var; extern int gomp_debug_var; extern int goacc_device_num; extern char *goacc_device_type; +extern int goacc_default_dims[GOMP_DIM_MAX]; enum gomp_task_kind { diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index 7a49acc..595b988 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -485,6 +485,7 @@ GOMP_PLUGIN_1.1 { GOMP_PLUGIN_1.2 { global: GOMP_PLUGIN_acc_thread_default_async; + GOMP_PLUGIN_acc_default_dim; } GOMP_PLUGIN_1.1; # TODO diff --git a/libgomp/oacc-plugin.c b/libgomp/oacc-plugin.c index 10a1169..01fe354 100644 --- a/libgomp/oacc-plugin.c +++ b/libgomp/oacc-plugin.c @@ -60,3 +60,14 @@ GOMP_PLUGIN_acc_thread_default_async (void) struct goacc_thread *thr = goacc_thread (); return thr ? thr->default_async : acc_async_default; } + +int +GOMP_PLUGIN_acc_default_dim (unsigned int i) +{ + if (i >= GOMP_DIM_MAX) + { + gomp_fatal ("invalid dimension argument: %d", i); + return -1; + } + return goacc_default_dims[i]; +} diff --git a/libgomp/oacc-plugin.h b/libgomp/oacc-plugin.h index 52949ca..dc60530 100644 --- a/libgomp/oacc-plugin.h +++ b/libgomp/oacc-plugin.h @@ -31,6 +31,7 @@ extern void GOMP_PLUGIN_async_unmap_vars (void *, int); extern void *GOMP_PLUGIN_acc_thread (void); +extern int GOMP_PLUGIN_acc_default_dim (unsigned int); extern struct goacc_thread *GOMP_PLUGIN_goacc_thread (void); extern int GOMP_PLUGIN_acc_thread_default_async (void); diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index 43be4cf..a1c12bf 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -757,26 +757,8 @@ nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs, const char *env_var = getenv (var_name); notify_var (var_name, env_var); if (env_var) - { - const char *pos = env_var; - - for (i = 0; *pos && i != GOMP_DIM_MAX; i++) - { - if (i && *pos++ != ':') - break; - if (*pos != ':') - { - const char *eptr; - - errno = 0; - long val = strtol (pos, (char **)&eptr, 10); - if (errno || val < 0 || (unsigned)val != val) - break; - default_dims[i] = (int)val; - pos = eptr; - } - } - } + for (int i = 0; i < GOMP_DIM_MAX; ++i) + default_dims[i] = GOMP_PLUGIN_acc_default_dim (i); /* 32 is the default for known hardware. */ int gang = 0, worker = 32, vector = 32; diff --git a/libgomp/testsuite/lib/libgomp.exp b/libgomp/testsuite/lib/libgomp.exp index 88af438..e5b5308 100644 --- a/libgomp/testsuite/lib/libgomp.exp +++ b/libgomp/testsuite/lib/libgomp.exp @@ -184,9 +184,9 @@ proc libgomp_init { args } { lappend ALWAYS_CFLAGS "additional_flags=-B${blddir}/.libs" lappend ALWAYS_CFLAGS "additional_flags=-I${blddir}" lappend ALWAYS_CFLAGS "ldflags=-L${blddir}/.libs" - # The top-level include directory, for gomp-constants.h. - lappend ALWAYS_CFLAGS "additional_flags=-I${srcdir}/../../include" } + # The top-level include directory, for gomp-constants.h. + lappend ALWAYS_CFLAGS "additional_flags=-I${srcdir}/../../include" lappend ALWAYS_CFLAGS "additional_flags=-I${srcdir}/.." # For build-tree testing, also consider the library paths used for builing. diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-default-runtime.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-default-runtime.c index eb00d32..c6110a1 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-default-runtime.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-default-runtime.c @@ -1,12 +1,13 @@ -/* { dg-additional-options "-fopenacc-dim=-:-" } */ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ -/* { dg-set-target-env-var "GOMP_OPENACC_DIM" "8:16" } */ +/* { dg-set-target-env-var GOMP_OPENACC_DIM "8::" } */ #include "loop-default.h" +#include <stdlib.h> -int main () +int +main () { - return test_1 (8, 16, 32); + if (check_gang (8) != 0) + abort (); + + return 0; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-default.h b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-default.h index 162c1d9..a9e2693 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-default.h +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-default.h @@ -1,5 +1,3 @@ -/* { dg-additional-options "-fopenacc-dim=16:16" } */ - #include <openacc.h> #include <alloca.h> #include <string.h> @@ -7,24 +5,26 @@ #include <gomp-constants.h> #pragma acc routine seq -static int __attribute__ ((noinline)) coord () +static int __attribute__ ((noinline)) +coord (void) { int res = 0; - if (acc_on_device (acc_device_not_host)) + if (acc_on_device (acc_device_nvidia)) { - int g, w, v; - + int g = 0, w = 0, v = 0; g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); + res = (1 << 24) | (g << 16) | (w << 8) | v; } + return res; } - -int check (const int *ary, int size, int gp, int wp, int vp) +static int +check (const int *ary, int size, int gp, int wp, int vp) { int exit = 0; int ix; @@ -32,11 +32,11 @@ int check (const int *ary, int size, int gp, int wp, int vp) int *workers = (int *)alloca (wp * sizeof (int)); int *vectors = (int *)alloca (vp * sizeof (int)); int offloaded = 0; - + memset (gangs, 0, gp * sizeof (int)); memset (workers, 0, wp * sizeof (int)); memset (vectors, 0, vp * sizeof (int)); - + for (ix = 0; ix < size; ix++) { int g = (ary[ix] >> 16) & 0xff; @@ -72,31 +72,30 @@ int check (const int *ary, int size, int gp, int wp, int vp) printf ("gang %d not used %d times\n", ix, gangs[0]); exit = 1; } - + for (ix = 0; ix < wp; ix++) if (workers[ix] != workers[0]) { printf ("worker %d not used %d times\n", ix, workers[0]); exit = 1; } - + for (ix = 0; ix < vp; ix++) if (vectors[ix] != vectors[0]) { printf ("vector %d not used %d times\n", ix, vectors[0]); exit = 1; } - + return exit; } -#define N (32 *32*32) +#define N (32 * 32 * 32) +int ary[N]; -int test_1 (int gp, int wp, int vp) +static int +check_gang (int gp) { - int ary[N]; - int exit = 0; - #pragma acc parallel copyout (ary) { #pragma acc loop gang (static:1) @@ -104,8 +103,12 @@ int test_1 (int gp, int wp, int vp) ary[ix] = coord (); } - exit |= check (ary, N, gp, 1, 1); + return check (ary, N, gp, 1, 1); +} +static int +check_worker (int wp) +{ #pragma acc parallel copyout (ary) { #pragma acc loop worker @@ -113,8 +116,12 @@ int test_1 (int gp, int wp, int vp) ary[ix] = coord (); } - exit |= check (ary, N, 1, wp, 1); + return check (ary, N, 1, wp, 1); +} +static int +check_vector (int vp) +{ #pragma acc parallel copyout (ary) { #pragma acc loop vector @@ -122,7 +129,17 @@ int test_1 (int gp, int wp, int vp) ary[ix] = coord (); } - exit |= check (ary, N, 1, 1, vp); + return check (ary, N, 1, 1, vp); +} + +static int +test_1 (int gp, int wp, int vp) +{ + int exit = 0; + + exit |= check_gang (gp); + exit |= check_worker (wp); + exit |= check_vector (vp); return exit; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c index 766e578..2fecac0 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c @@ -1,3 +1,5 @@ +/* { dg-xfail-run-if "gangprivate failure" { openacc_nvidia_accel_selected } { "-O0" } { "" } } */ + #include <stdio.h> #include <openacc.h> #include <gomp-constants.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c index 0bec6e1..384f2ac 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c @@ -1,3 +1,5 @@ +/* { dg-xfail-run-if "gangprivate failure" { openacc_nvidia_accel_selected } { "-O0" } { "" } } */ + #include <stdio.h> #include <openacc.h> #include <gomp-constants.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c index 15e2bc2..f919117 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c @@ -1,3 +1,5 @@ +/* { dg-xfail-run-if "n/a" { openacc_nvidia_accel_selected } { "-O0" "-O1" } { "" } } */ + #include <stdio.h> #include <openacc.h> #include <gomp-constants.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c index 6bbd04f..fcfa7ab 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c @@ -1,3 +1,5 @@ +/* { dg-additional-options "-w" } */ + #include <stdio.h> #include <openacc.h> #include <gomp-constants.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c index c63a5d4..23d288c 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c @@ -1,3 +1,6 @@ +/* { dg-additional-options "-w" } */ +/* { dg-xfail-run-if "gangprivate failure" { openacc_nvidia_accel_selected } { "-O0" } { "" } } */ + #include <stdio.h> #include <openacc.h> #include <gomp-constants.h> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c index 05e5d67..10b80f1 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c @@ -14,7 +14,7 @@ int main () ary[ix] = -1; #pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) - /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "vector" { target *-*-* } 18 } */ + /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "vector" { target *-*-* } 16 } */ { #pragma acc loop worker for (unsigned ix = 0; ix < N; ix++) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c index f223afa..26bb9fe 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c @@ -157,7 +157,7 @@ int main () gangs_min = workers_min = vectors_min = INT_MAX; gangs_max = workers_max = vectors_max = INT_MIN; #pragma acc parallel copy (vectors_actual) /* { dg-warning "region contains vector partitoned code but is not vector partitioned" } */ \ - /* { dg-warning "using vector_length \\(32\\), ignoring 1" "" { target openacc_nvidia_accel_selected } 170 } */ \ + /* { dg-warning "using vector_length \\(32\\), ignoring 1" "" { target openacc_nvidia_accel_selected } 159 } */ \ vector_length (VECTORS) /* { dg-warning "'vector_length' value must be positive" "" { target c++ } } */ { /* We're actually executing with vector_length (1), just the GCC nvptx diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c index d211782..8c3b938 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c @@ -6,8 +6,8 @@ #pragma acc routine gang void __attribute__ ((noinline)) gang (int ary[N]) -/* { dg-warning "region is worker partitioned but does not contain worker partitioned code" "worker" { target *-*-* } 10 } */ -/* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "vector" { target *-*-* } 10 } */ +/* { dg-warning "region is worker partitioned but does not contain worker partitioned code" "worker" { target *-*-* } 8 } */ +/* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "vector" { target *-*-* } 8 } */ { #pragma acc loop gang for (unsigned ix = 0; ix < N; ix++) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c index 77d1d00..e14947c 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c @@ -6,7 +6,7 @@ #pragma acc routine worker void __attribute__ ((noinline)) worker (int ary[N]) -/* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "vector" { target *-*-* } 10 } */ +/* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "vector" { target *-*-* } 8 } */ { #pragma acc loop worker for (unsigned ix = 0; ix < N; ix++) -- 2.7.4