I've committed this patch to og8 which backports the first of Tom's goacc_parlevel patches from mainline. I'll post of a followup patch which contains various bug fixes. I believe that this patch was originally introduced in PR82428, or at least it resolves that PR.
Cesar
[og8] Add __builtin_goacc_parlevel_{id,size} 2018-07-31 Cesar Philippidis <ce...@codesourcery.com> Backport from mainline: 2018-05-02 Tom de Vries <t...@codesourcery.com> PR libgomp/82428 gcc/ * builtins.def (DEF_GOACC_BUILTIN_ONLY): Define. * omp-builtins.def (BUILT_IN_GOACC_PARLEVEL_ID) (BUILT_IN_GOACC_PARLEVEL_SIZE): New builtin. * builtins.c (expand_builtin_goacc_parlevel_id_size): New function. (expand_builtin): Call expand_builtin_goacc_parlevel_id_size. * doc/extend.texi (Other Builtins): Add __builtin_goacc_parlevel_id and __builtin_goacc_parlevel_size. gcc/fortran/ * f95-lang.c (DEF_GOACC_BUILTIN_ONLY): Define. gcc/testsuite/ * c-c++-common/goacc/builtin-goacc-parlevel-id-size-2.c: New test. * c-c++-common/goacc/builtin-goacc-parlevel-id-size.c: New test. libgomp/ * testsuite/libgomp.oacc-c-c++-common/gang-static-2.c: Use __builtin_goacc_parlevel_{id,size}. * testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: Same. * testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c: Same. * testsuite/libgomp.oacc-c-c++-common/loop-g-1.c: Same. * testsuite/libgomp.oacc-c-c++-common/loop-g-2.c: Same. * testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c: Same. * testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c: Same. * testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c: Same. * testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c: Same. * testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c: Same. * testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c: Same. * testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c: Same. * testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c: Same. * testsuite/libgomp.oacc-c-c++-common/loop-v-1.c: Same. * testsuite/libgomp.oacc-c-c++-common/loop-w-1.c: Same. * testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c: Same. * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Same. * testsuite/libgomp.oacc-c-c++-common/routine-g-1.c: Same. * testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c: Same. * testsuite/libgomp.oacc-c-c++-common/routine-v-1.c: Same. * testsuite/libgomp.oacc-c-c++-common/routine-w-1.c: Same. * testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c: Same. * testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c: Same. * testsuite/libgomp.oacc-c-c++-common/tile-1.c: Same. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@259850 138bc75d-0d04-0410-961f-82ee72b054a4 diff --git a/gcc/builtins.c b/gcc/builtins.c index a71555e..300e13c 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -71,6 +71,8 @@ along with GCC; see the file COPYING3. If not see #include "gimple-fold.h" #include "intl.h" #include "file-prefix-map.h" /* remap_macro_filename() */ +#include "gomp-constants.h" +#include "omp-general.h" struct target_builtins default_target_builtins; #if SWITCHABLE_TARGET @@ -6628,6 +6630,71 @@ expand_stack_save (void) return ret; } +/* Emit code to get the openacc gang, worker or vector id or size. */ + +static rtx +expand_builtin_goacc_parlevel_id_size (tree exp, rtx target, int ignore) +{ + const char *name; + rtx fallback_retval; + rtx_insn *(*gen_fn) (rtx, rtx); + switch (DECL_FUNCTION_CODE (get_callee_fndecl (exp))) + { + case BUILT_IN_GOACC_PARLEVEL_ID: + name = "__builtin_goacc_parlevel_id"; + fallback_retval = const0_rtx; + gen_fn = targetm.gen_oacc_dim_pos; + break; + case BUILT_IN_GOACC_PARLEVEL_SIZE: + name = "__builtin_goacc_parlevel_size"; + fallback_retval = const1_rtx; + gen_fn = targetm.gen_oacc_dim_size; + break; + default: + gcc_unreachable (); + } + + if (oacc_get_fn_attrib (current_function_decl) == NULL_TREE) + { + error ("%qs only supported in OpenACC code", name); + return const0_rtx; + } + + tree arg = CALL_EXPR_ARG (exp, 0); + if (TREE_CODE (arg) != INTEGER_CST) + { + error ("non-constant argument 0 to %qs", name); + return const0_rtx; + } + + int dim = TREE_INT_CST_LOW (arg); + switch (dim) + { + case GOMP_DIM_GANG: + case GOMP_DIM_WORKER: + case GOMP_DIM_VECTOR: + break; + default: + error ("illegal argument 0 to %qs", name); + return const0_rtx; + } + + if (ignore) + return target; + + if (!targetm.have_oacc_dim_size ()) + { + emit_move_insn (target, fallback_retval); + return target; + } + + rtx reg = MEM_P (target) ? gen_reg_rtx (GET_MODE (target)) : target; + emit_insn (gen_fn (reg, GEN_INT (dim))); + if (reg != target) + emit_move_insn (target, reg); + + return target; +} /* Expand an expression EXP that calls a built-in function, with result going to TARGET if that's convenient @@ -7758,6 +7825,10 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode, folding. */ break; + case BUILT_IN_GOACC_PARLEVEL_ID: + case BUILT_IN_GOACC_PARLEVEL_SIZE: + return expand_builtin_goacc_parlevel_id_size (exp, target, ignore); + default: /* just do library call, if unknown builtin */ break; } diff --git a/gcc/builtins.def b/gcc/builtins.def index 17f825d..449d08d 100644 --- a/gcc/builtins.def +++ b/gcc/builtins.def @@ -214,6 +214,10 @@ along with GCC; see the file COPYING3. If not see #define DEF_GOACC_BUILTIN_COMPILER(ENUM, NAME, TYPE, ATTRS) \ DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE, \ flag_openacc, true, true, ATTRS, false, true) +#undef DEF_GOACC_BUILTIN_ONLY +#define DEF_GOACC_BUILTIN_ONLY(ENUM, NAME, TYPE, ATTRS) \ + DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, BT_LAST, \ + false, false, true, ATTRS, false, flag_openacc) #undef DEF_GOMP_BUILTIN #define DEF_GOMP_BUILTIN(ENUM, NAME, TYPE, ATTRS) \ DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE, \ diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 5571d05..f751b08 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -12437,6 +12437,16 @@ Aarch64. This function is mainly useful when writing inline assembly code. @end deftypefn +@deftypefn {Built-in Function} int __builtin_goacc_parlevel_id (int x) +Returns the openacc gang, worker or vector id depending on whether @var{x} is +0, 1 or 2. +@end deftypefn + +@deftypefn {Built-in Function} int __builtin_goacc_parlevel_size (int x) +Returns the openacc gang, worker or vector size depending on whether @var{x} is +0, 1 or 2. +@end deftypefn + @node Target Builtins @section Built-in Functions Specific to Particular Target Machines diff --git a/gcc/fortran/f95-lang.c b/gcc/fortran/f95-lang.c index 5fe34b2..0f39f0c 100644 --- a/gcc/fortran/f95-lang.c +++ b/gcc/fortran/f95-lang.c @@ -1202,6 +1202,10 @@ gfc_init_builtin_functions (void) #undef DEF_GOACC_BUILTIN_COMPILER #define DEF_GOACC_BUILTIN_COMPILER(code, name, type, attr) \ gfc_define_builtin (name, builtin_types[type], code, name, attr); +#undef DEF_GOACC_BUILTIN_ONLY +#define DEF_GOACC_BUILTIN_ONLY(code, name, type, attr) \ + gfc_define_builtin ("__builtin_" name, builtin_types[type], code, NULL, \ + attr); #undef DEF_GOMP_BUILTIN #define DEF_GOMP_BUILTIN(code, name, type, attr) /* ignore */ #include "../omp-builtins.def" diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def index 554d021..3df4b5e 100644 --- a/gcc/omp-builtins.def +++ b/gcc/omp-builtins.def @@ -51,6 +51,11 @@ DEF_GOACC_BUILTIN (BUILT_IN_GOACC_WAIT, "GOACC_wait", DEF_GOACC_BUILTIN_COMPILER (BUILT_IN_ACC_ON_DEVICE, "acc_on_device", BT_FN_INT_INT, ATTR_CONST_NOTHROW_LEAF_LIST) +DEF_GOACC_BUILTIN_ONLY (BUILT_IN_GOACC_PARLEVEL_ID, "goacc_parlevel_id", + BT_FN_INT_INT, ATTR_NOTHROW_LEAF_LIST) +DEF_GOACC_BUILTIN_ONLY (BUILT_IN_GOACC_PARLEVEL_SIZE, "goacc_parlevel_size", + BT_FN_INT_INT, ATTR_NOTHROW_LEAF_LIST) + DEF_GOMP_BUILTIN (BUILT_IN_OMP_GET_THREAD_NUM, "omp_get_thread_num", BT_FN_INT, ATTR_CONST_NOTHROW_LEAF_LIST) DEF_GOMP_BUILTIN (BUILT_IN_OMP_GET_NUM_THREADS, "omp_get_num_threads", diff --git a/gcc/testsuite/c-c++-common/goacc/builtin-goacc-parlevel-id-size-2.c b/gcc/testsuite/c-c++-common/goacc/builtin-goacc-parlevel-id-size-2.c new file mode 100644 index 0000000..16c7b34 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/builtin-goacc-parlevel-id-size-2.c @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O2" } */ + +#include "../../../../include/gomp-constants.h" + +void +foo (void) +{ + __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + /* { dg-error "'__builtin_goacc_parlevel_id' only supported in OpenACC code" "" { target *-*-* } .-1 } */ + + __builtin_goacc_parlevel_size (GOMP_DIM_GANG); + /* { dg-error "'__builtin_goacc_parlevel_size' only supported in OpenACC code" "" { target *-*-* } .-1 } */ +} + +#pragma acc routine +void +foo2 (int arg) +{ + __builtin_goacc_parlevel_id (arg); + /* { dg-error "non-constant argument 0 to '__builtin_goacc_parlevel_id'" "" { target *-*-* } .-1 } */ + + __builtin_goacc_parlevel_size (arg); + /* { dg-error "non-constant argument 0 to '__builtin_goacc_parlevel_size'" "" { target *-*-* } .-1 } */ + + __builtin_goacc_parlevel_id (-1); + /* { dg-error "illegal argument 0 to '__builtin_goacc_parlevel_id'" "" { target *-*-* } .-1 } */ + + __builtin_goacc_parlevel_id (-1); + /* { dg-error "illegal argument 0 to '__builtin_goacc_parlevel_id'" "" { target *-*-* } .-1 } */ + + __builtin_goacc_parlevel_size (-1); + /* { dg-error "illegal argument 0 to '__builtin_goacc_parlevel_size'" "" { target *-*-* } .-1 } */ + + __builtin_goacc_parlevel_size (3); + /* { dg-error "illegal argument 0 to '__builtin_goacc_parlevel_size'" "" { target *-*-* } .-1 } */ +} diff --git a/gcc/testsuite/c-c++-common/goacc/builtin-goacc-parlevel-id-size.c b/gcc/testsuite/c-c++-common/goacc/builtin-goacc-parlevel-id-size.c new file mode 100644 index 0000000..5cda818 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/builtin-goacc-parlevel-id-size.c @@ -0,0 +1,79 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O2" } */ + +#include "../../../../include/gomp-constants.h" + +#pragma acc routine +int +foo (void) +{ + int res; + + __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); + + __builtin_goacc_parlevel_size (GOMP_DIM_GANG); + __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); + __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); + + res += __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + res += __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + res += __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); + + res += __builtin_goacc_parlevel_size (GOMP_DIM_GANG); + res += __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); + res += __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); + + return res; +} + +void +foo2 (void) +{ + int res; + +#pragma acc parallel + { + __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); + + __builtin_goacc_parlevel_size (GOMP_DIM_GANG); + __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); + __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); + + res += __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + res += __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + res += __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); + + res += __builtin_goacc_parlevel_size (GOMP_DIM_GANG); + res += __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); + res += __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); + } +} + +void +foo3 (void) +{ + int res; + +#pragma acc kernels + { + __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); + + __builtin_goacc_parlevel_size (GOMP_DIM_GANG); + __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); + __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); + + res += __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + res += __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + res += __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); + + res += __builtin_goacc_parlevel_size (GOMP_DIM_GANG); + res += __builtin_goacc_parlevel_size (GOMP_DIM_WORKER); + res += __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR); + } +} diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c index 6de739a..e273a79 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c @@ -1,25 +1,23 @@ -/* { dg-do run { target openacc_nvidia_accel_selected } } */ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include <assert.h> #include <openacc.h> +#include <gomp-constants.h> #define N 100 #define GANG_ID(I) \ - (acc_on_device (acc_device_nvidia) \ - ? ({unsigned __r; \ - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (__r)); \ - __r; }) : (I)) + (acc_on_device (acc_device_not_host) \ + ? __builtin_goacc_parlevel_id (GOMP_DIM_GANG) \ + : (I)) void test_static(int *a, int num_gangs, int sarg) { int i, j; - if (sarg == 0) + if (acc_on_device (acc_device_host)) + return; + + if (sarg == 0) sarg = 1; for (i = 0; i < N / sarg; i++) @@ -32,6 +30,9 @@ test_nonstatic(int *a, int gangs) { int i, j; + if (acc_on_device (acc_device_host)) + return; + for (i = 0; i < N; i+=gangs) for (j = 0; j < gangs; j++) assert (a[i+j] == i/gangs); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c index 4c1c091..9642b39 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c @@ -1,11 +1,8 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - /* { dg-additional-options "-fopenacc-dim=32" } */ #include <stdio.h> #include <openacc.h> +#include <gomp-constants.h> int check (const int *ary, int size, int gp, int wp, int vp) { @@ -79,15 +76,12 @@ static int __attribute__((noinline)) place () { int r = 0; - if (acc_on_device (acc_device_nvidia)) - { - int g = 0, w = 0, v = 0; + int g = 0, w = 0, v = 0; + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); + r = (g << 16) | (w << 8) | v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); - r = (g << 16) | (w << 8) | v; - } return r; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-default.h b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-default.h index 36e8497..162c1d9 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-default.h +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-default.h @@ -1,20 +1,23 @@ +/* { dg-additional-options "-fopenacc-dim=16:16" } */ + #include <openacc.h> #include <alloca.h> #include <string.h> #include <stdio.h> +#include <gomp-constants.h> #pragma acc routine seq static int __attribute__ ((noinline)) coord () { int res = 0; - if (acc_on_device (acc_device_nvidia)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); res = (1 << 24) | (g << 16) | (w << 8) | v; } return res; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.c index af0eef4..98f02e9 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.c @@ -1,9 +1,6 @@ -/* { dg-additional-options "-w" } */ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include <stdio.h> +#include <openacc.h> +#include <gomp-constants.h> #define N (32*32*32+17) int main () @@ -21,13 +18,12 @@ int main () #pragma acc loop gang for (unsigned ix = 0; ix < N; ix++) { - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; - - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + int g, w, v; + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); ary[ix] = (g << 16) | (w << 8) | v; ondev = 1; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-2.c index ea9f987..4152a4e 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-2.c @@ -1,9 +1,6 @@ -/* { dg-additional-options "-w" } */ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include <stdio.h> +#include <openacc.h> +#include <gomp-constants.h> #define N (32*32*32+17) int main () @@ -21,13 +18,13 @@ int main () #pragma acc loop gang (static:1) for (unsigned ix = 0; ix < N; ix++) { - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); ary[ix] = (g << 16) | (w << 8) | v; ondev = 1; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c index 42b612a..766e578 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c @@ -1,8 +1,6 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include <stdio.h> +#include <openacc.h> +#include <gomp-constants.h> #define N (32*32*32+17) int main () @@ -20,13 +18,14 @@ int main () #pragma acc loop gang worker vector for (unsigned ix = 0; ix < N; ix++) { - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; + + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); ary[ix] = (g << 16) | (w << 8) | v; ondev = 1; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c index dad02ea..7107502 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c @@ -1,9 +1,6 @@ -/* { dg-additional-options "-w" } */ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include <stdio.h> +#include <openacc.h> +#include <gomp-constants.h> #define N (32*32*32+17) int main () @@ -19,13 +16,13 @@ int main () { int val = ix; - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); val = (g << 16) | (w << 8) | v; ondev = 1; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c index 4ae4b7c..0bec6e1 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c @@ -1,8 +1,6 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include <stdio.h> +#include <openacc.h> +#include <gomp-constants.h> #define N (32*32*32+17) int main () @@ -18,13 +16,13 @@ int main () { int val = ix; - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); val = (g << 16) | (w << 8) | v; ondev = 1; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c index 0556455..da4921d 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c @@ -1,8 +1,6 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include <stdio.h> +#include <openacc.h> +#include <gomp-constants.h> #define N (32*32*32+17) @@ -19,13 +17,13 @@ int main () { int val = ix; - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); val = (g << 16) | (w << 8) | v; ondev = 1; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c index 16d8f9f..15e2bc2 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c @@ -1,8 +1,6 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include <stdio.h> +#include <openacc.h> +#include <gomp-constants.h> #define N (32*32*32+17) @@ -21,13 +19,13 @@ int main () { int val = ix; - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); val = (g << 16) | (w << 8) | v; ondev = 1; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c index efda662..6bbd04f 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c @@ -1,9 +1,6 @@ -/* { dg-additional-options "-w" } */ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include <stdio.h> +#include <openacc.h> +#include <gomp-constants.h> #define N (32*32*32+17) int main () @@ -19,13 +16,13 @@ int main () { int val = ix; - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); val = (g << 16) | (w << 8) | v; ondev = 1; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c index 174a3ff..c63a5d4 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c @@ -1,9 +1,6 @@ -/* { dg-additional-options "-w" } */ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include <stdio.h> +#include <openacc.h> +#include <gomp-constants.h> #define N (32*32*32+17) int main () @@ -21,13 +18,13 @@ int main () { int val = ix; - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); val = (g << 16) | (w << 8) | v; ondev = 1; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c index fad20a0..d0e1255 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c @@ -3,6 +3,8 @@ { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ #include <stdio.h> +#include <openacc.h> +#include <gomp-constants.h> #define N (32*32*32+17) int main () @@ -18,13 +20,13 @@ int main () { int val = ix; - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); val = (g << 16) | (w << 8) | v; ondev = 1; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c index 2974807..6010cd2 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c @@ -1,8 +1,6 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include <stdio.h> +#include <openacc.h> +#include <gomp-constants.h> #define N (32*32*32+17) int main () @@ -20,13 +18,13 @@ int main () #pragma acc loop vector for (unsigned ix = 0; ix < N; ix++) { - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); ary[ix] = (g << 16) | (w << 8) | v; ondev = 1; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c index 30e8e78..05e5d67 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c @@ -1,8 +1,6 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include <stdio.h> +#include <openacc.h> +#include <gomp-constants.h> #define N (32*32*32+17) int main () @@ -21,13 +19,13 @@ int main () #pragma acc loop worker for (unsigned ix = 0; ix < N; ix++) { - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); ary[ix] = (g << 16) | (w << 8) | v; ondev = 1; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c index 578cfad..cd4cc99 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c @@ -1,8 +1,6 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include <stdio.h> +#include <openacc.h> +#include <gomp-constants.h> #define N (32*32*32+17) int main () @@ -20,13 +18,13 @@ int main () #pragma acc loop worker vector for (unsigned ix = 0; ix < N; ix++) { - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); ary[ix] = (g << 16) | (w << 8) | v; ondev = 1; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c index 1498fb4..f223afa 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c @@ -5,6 +5,7 @@ #include <limits.h> #include <openacc.h> +#include <gomp-constants.h> /* TODO: "(int) acc_device_*" casts because of the C++ acc_on_device wrapper not behaving as expected for -O0. */ @@ -14,11 +15,7 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_gang () if (acc_on_device ((int) acc_device_host)) return 0; else if (acc_on_device ((int) acc_device_nvidia)) - { - unsigned int r; - asm volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (r)); - return r; - } + return __builtin_goacc_parlevel_id (GOMP_DIM_GANG); else __builtin_abort (); } @@ -29,11 +26,7 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_worker () if (acc_on_device ((int) acc_device_host)) return 0; else if (acc_on_device ((int) acc_device_nvidia)) - { - unsigned int r; - asm volatile ("mov.u32 %0,%%tid.y;" : "=r" (r)); - return r; - } + return __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); else __builtin_abort (); } @@ -44,11 +37,7 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_vector () if (acc_on_device ((int) acc_device_host)) return 0; else if (acc_on_device ((int) acc_device_nvidia)) - { - unsigned int r; - asm volatile ("mov.u32 %0,%%tid.x;" : "=r" (r)); - return r; - } + return __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); else __builtin_abort (); } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c index a9fa338..d211782 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-g-1.c @@ -1,8 +1,6 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include <stdio.h> +#include <openacc.h> +#include <gomp-constants.h> #define N (32*32*32+17) @@ -14,13 +12,13 @@ void __attribute__ ((noinline)) gang (int ary[N]) #pragma acc loop gang for (unsigned ix = 0; ix < N; ix++) { - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); ary[ix] = (g << 16) | (w << 8) | v; } else @@ -40,7 +38,7 @@ int main () #pragma acc parallel num_gangs(32) copy(ary) copy(ondev) { - ondev = __builtin_acc_on_device (5); + ondev = acc_on_device (acc_device_not_host); gang (ary); } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c index ace2f49..a97e046 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c @@ -1,8 +1,6 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include <stdio.h> +#include <openacc.h> +#include <gomp-constants.h> #define N (32*32*32+17) @@ -12,13 +10,13 @@ void __attribute__ ((noinline)) gang (int ary[N]) #pragma acc loop gang worker vector for (unsigned ix = 0; ix < N; ix++) { - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); ary[ix] = (g << 16) | (w << 8) | v; } else @@ -38,7 +36,7 @@ int main () #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ary) copy(ondev) { - ondev = __builtin_acc_on_device (5); + ondev = acc_on_device (acc_device_not_host); gang (ary); } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c index 2503e8d..b1e3e3a 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c @@ -1,8 +1,6 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include <stdio.h> +#include <openacc.h> +#include <gomp-constants.h> #define N (32*32*32+17) @@ -12,13 +10,13 @@ void __attribute__ ((noinline)) vector (int ary[N]) #pragma acc loop vector for (unsigned ix = 0; ix < N; ix++) { - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); ary[ix] = (g << 16) | (w << 8) | v; } else @@ -38,7 +36,7 @@ int main () #pragma acc parallel vector_length(32) copy(ary) copy(ondev) { - ondev = __builtin_acc_on_device (5); + ondev = acc_on_device (acc_device_not_host); vector (ary); } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c index 0b03a01..77d1d00 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c @@ -1,8 +1,6 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include <stdio.h> +#include <openacc.h> +#include <gomp-constants.h> #define N (32*32*32+17) @@ -13,13 +11,13 @@ void __attribute__ ((noinline)) worker (int ary[N]) #pragma acc loop worker for (unsigned ix = 0; ix < N; ix++) { - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); ary[ix] = (g << 16) | (w << 8) | v; } else @@ -39,7 +37,7 @@ int main () #pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) { - ondev = __builtin_acc_on_device (5); + ondev = acc_on_device (acc_device_not_host); worker (ary); } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c index 5e45fad..23dbc1a 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c @@ -1,8 +1,6 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include <stdio.h> +#include <openacc.h> +#include <gomp-constants.h> #define N (32*32*32+17) @@ -12,13 +10,13 @@ void __attribute__ ((noinline)) worker (int ary[N]) #pragma acc loop worker vector for (unsigned ix = 0; ix < N; ix++) { - if (__builtin_acc_on_device (5)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); ary[ix] = (g << 16) | (w << 8) | v; } else @@ -38,7 +36,7 @@ int main () #pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) { - ondev = __builtin_acc_on_device (5); + ondev = acc_on_device (acc_device_not_host); worker (ary); } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c index b5cbc90..8862148 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c @@ -1,9 +1,6 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - #include <stdio.h> #include <openacc.h> +#include <gomp-constants.h> #define NUM_WORKERS 16 #define NUM_VECTORS 32 @@ -11,15 +8,13 @@ #define HEIGHT 32 #define WORK_ID(I,N) \ - (acc_on_device (acc_device_nvidia) \ - ? ({unsigned __r; \ - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (__r)); \ - __r; }) : (I % N)) + (acc_on_device (acc_device_not_host) \ + ? __builtin_goacc_parlevel_id (GOMP_DIM_WORKER) \ + : (I % N)) #define VEC_ID(I,N) \ - (acc_on_device (acc_device_nvidia) \ - ? ({unsigned __r; \ - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (__r)); \ - __r; }) : (I % N)) + (acc_on_device (acc_device_not_host) \ + ? __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR) \ + : (I % N)) #pragma acc routine worker void __attribute__ ((noinline)) diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/tile-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/tile-1.c index 8dcb956..5130591 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/tile-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/tile-1.c @@ -1,11 +1,8 @@ -/* This code uses nvptx inline assembly guarded with acc_on_device, which is - not optimized away at -O0, and then confuses the target assembler. - { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ - /* { dg-additional-options "-fopenacc-dim=32" } */ #include <stdio.h> #include <openacc.h> +#include <gomp-constants.h> static int check (const int *ary, int size, int gp, int wp, int vp) { @@ -79,13 +76,13 @@ static int __attribute__((noinline)) place () { int r = 0; - if (acc_on_device (acc_device_nvidia)) + if (acc_on_device (acc_device_not_host)) { - int g = 0, w = 0, v = 0; + int g, w, v; - __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g)); - __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w)); - __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v)); + g = __builtin_goacc_parlevel_id (GOMP_DIM_GANG); + w = __builtin_goacc_parlevel_id (GOMP_DIM_WORKER); + v = __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR); r = (g << 16) | (w << 8) | v; } return r; -- 2.7.4