Hi Chung-Lin! ;-) It's been a while:
On 2018-09-10T23:04:18+0800, Chung-Lin Tang <chunglin_t...@mentor.com> wrote: > * testsuite/libgomp.oacc-c-c++-common/lib-94.c: New test. > * testsuite/libgomp.oacc-c-c++-common/lib-95.c: New test. > * testsuite/libgomp.oacc-fortran/lib-16.f90: New test. Do you happen to remember why in these testcases you're using the following pattern: > --- libgomp/testsuite/libgomp.oacc-c-c++-common/lib-94.c (nonexistent) > +++ libgomp/testsuite/libgomp.oacc-c-c++-common/lib-94.c (working copy) > @@ -0,0 +1,42 @@ > +/* { dg-do run } */ > +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ > + > +#include <string.h> > +#include <stdlib.h> > +#include <openacc.h> > + > +int > +main (int argc, char **argv) > +{ > + const int N = 256; > + int i; > + int async = 8; > + unsigned char *h; > + > + h = (unsigned char *) malloc (N); > + > + for (i = 0; i < N; i++) > + { > + h[i] = i; > + } > + > + acc_copyin_async (h, N, async); > + > + memset (h, 0, N); > + > + acc_wait (async); You first issue 'acc_copyin_async', then (while potentially that's still accessing 'h') already 'memset' 'h' (potentially overwriting data that 'acc_copyin_async' is still working on), and only then 'acc_wait'? My understanding of OpenACC would swap 'memset' and 'acc_wait', but maybe you have a specific reason to do it in this way? In particular, the GCC nvptx offloading implementation "doesn't seem to care" (as discussed elsewhere; 'OpenACC "ephemeral" asynchronous host-to-device copies', etc.) -- but I suppose if you meant to test such implementation traits here, you'd have commented that? > + > + acc_copyout_async (h, N, async + 1); > + > + acc_wait (async + 1); > + > + for (i = 0; i < N; i++) > + { > + if (h[i] != i) > + abort (); > + } > + > + free (h); > + > + return 0; > +} > --- libgomp/testsuite/libgomp.oacc-c-c++-common/lib-95.c (nonexistent) > +++ libgomp/testsuite/libgomp.oacc-c-c++-common/lib-95.c (working copy) > @@ -0,0 +1,45 @@ > +/* { dg-do run } */ > +/* { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } */ > + > +#include <string.h> > +#include <stdlib.h> > +#include <openacc.h> > + > +int > +main (int argc, char **argv) > +{ > + const int N = 256; > + int i, q = 5; > + unsigned char *h, *g; > + void *d; > + > + h = (unsigned char *) malloc (N); > + g = (unsigned char *) malloc (N); > + for (i = 0; i < N; i++) > + { > + g[i] = i; > + } > + > + acc_create_async (h, N, q); > + > + acc_memcpy_to_device_async (acc_deviceptr (h), g, N, q); > + memset (&h[0], 0, N); > + > + acc_wait (q); Similar here. > + acc_update_self_async (h, N, q + 1); > + acc_delete_async (h, N, q + 1); > + > + acc_wait (q + 1); > + > + for (i = 0; i < N; i++) > + { > + if (h[i] != i) > + abort (); > + } > + > + free (h); > + free (g); > + > + return 0; > +} > --- libgomp/testsuite/libgomp.oacc-fortran/lib-16.f90 (nonexistent) > +++ libgomp/testsuite/libgomp.oacc-fortran/lib-16.f90 (working copy) (Later also similarly copied into 'libgomp.oacc-fortran/lib-16-2.f90'.) Similar: > @@ -0,0 +1,57 @@ > +! { dg-do run } > +! { dg-skip-if "" { *-*-* } { "*" } { "-DACC_MEM_SHARED=0" } } > + > +program main > + use openacc > + implicit none > + > + integer, parameter :: N = 256 > + integer, allocatable :: h(:) > + integer :: i > + integer :: async = 5 > + > + allocate (h(N)) > + > + do i = 1, N > + h(i) = i > + end do > + > + call acc_copyin (h) > + > + do i = 1, N > + h(i) = i + i > + end do > + > + call acc_update_device_async (h, sizeof (h), async) > + > + if (acc_is_present (h) .neqv. .TRUE.) call abort Don't we need 'acc_wait' here (while 'acc_update_device_async' may still be reading from 'h'), before overwriting 'h' here: > + > + h(:) = 0 > + > + call acc_copyout_async (h, sizeof (h), async) > + > + call acc_wait (async) > + > + do i = 1, N > + if (h(i) /= i + i) call abort > + end do > + > + call acc_copyin (h, sizeof (h)) > + > + h(:) = 0 > + > + call acc_update_self_async (h, sizeof (h), async) > + > + if (acc_is_present (h) .neqv. .TRUE.) call abort Don't we need 'acc_wait' here (to make sure we finish device to host copy of 'h'), before evaluating 'h' here: > + > + do i = 1, N > + if (h(i) /= i + i) call abort > + end do > + > + call acc_delete_async (h, async) > + > + call acc_wait (async) > + > + if (acc_is_present (h) .neqv. .FALSE.) call abort > + > +end program Julian has patches for most of these (as part of other commits). Grüße Thomas ----------------- Mentor Graphics (Deutschland) GmbH, Arnulfstrasse 201, 80634 München Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Frank Thürauf