This patch adds runtime support for the OpenACC update if_present clause. It turned out to require significantly less work to implement if_present in the runtime. Instead of creating a new API for GOACC_updated, I exploited the fact that OpenACC no longer uses GOMP_MAP_FORCE_* data mappings. This allowed me to encode the if_present update data mappings as GOMP_MAP_{TO,FROM} for device, and host/self, respectively, during gimplification. The actual runtime changes themselves were minor; the runtime only needs to call acc_is_present prior to actually updating the device/host data.
I've applied this patch to gomp-4_0-branch. Cesar
2017-05-09 Cesar Philippidis <ce...@codesourcery.com> gcc/ * gimplify.c (gimplify_omp_target_update): Relax OpenACC update data mappings to GOMP_MAP_{TO,FROM} when the user specifies if_present. gcc/testsuite/ * c-c++-common/goacc/update-if_present-1.c: Update test case. libgomp/ * oacc-parallel.c (GOACC_update): Handle GOMP_MAP_{TO,FROM} for the if_present data clauses. * testsuite/libgomp.oacc-c-c++-common/update-2.c: New test. * testsuite/libgomp.oacc-fortran/update-3.f90: New test. diff --git a/gcc/gimplify.c b/gcc/gimplify.c index af908f4..47fe9ee 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -10034,6 +10034,25 @@ gimplify_omp_target_update (tree *expr_p, gimple_seq *pre_p) ort, TREE_CODE (expr)); gimplify_adjust_omp_clauses (pre_p, NULL, &OMP_STANDALONE_CLAUSES (expr), TREE_CODE (expr)); + if (TREE_CODE (expr) == OACC_UPDATE + && find_omp_clause (OMP_STANDALONE_CLAUSES (expr), OMP_CLAUSE_IF_PRESENT)) + { + /* The runtime uses GOMP_MAP_{TO,FROM} to denote the if_present + clause. */ + for (tree c = OMP_STANDALONE_CLAUSES (expr); c; c = OMP_CLAUSE_CHAIN (c)) + if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_MAP) + switch (OMP_CLAUSE_MAP_KIND (c)) + { + case GOMP_MAP_FORCE_TO: + OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_TO); + break; + case GOMP_MAP_FORCE_FROM: + OMP_CLAUSE_SET_MAP_KIND (c, GOMP_MAP_FROM); + break; + default: + break; + } + } stmt = gimple_build_omp_target (NULL, kind, OMP_STANDALONE_CLAUSES (expr)); gimplify_seq_add_stmt (pre_p, stmt); diff --git a/gcc/testsuite/c-c++-common/goacc/update-if_present-1.c b/gcc/testsuite/c-c++-common/goacc/update-if_present-1.c index 519393cf..5a19ee1 100644 --- a/gcc/testsuite/c-c++-common/goacc/update-if_present-1.c +++ b/gcc/testsuite/c-c++-common/goacc/update-if_present-1.c @@ -12,6 +12,18 @@ t () #pragma acc update device(b) async if_present #pragma acc update host(c[1:3]) wait(4) if_present #pragma acc update self(c) device(b) host (a) async(10) if (a == 5) if_present + +#pragma acc update self(a) +#pragma acc update device(b) async +#pragma acc update host(c[1:3]) wait(4) +#pragma acc update self(c) device(b) host (a) async(10) if (a == 5) } -/* { dg-final { scan-tree-dump-times "pragma omp target oacc_update if_present" 4 "omplower" } } */ +/* { dg-final { scan-tree-dump-times "omp target oacc_update if_present map.from:a .len: 4.." 1 "omplower" } } */ +/* { dg-final { scan-tree-dump-times "omp target oacc_update if_present async.-1. map.to:b .len: 4.." 1 "omplower" } } */ +/* { dg-final { scan-tree-dump-times "omp target oacc_update if_present wait.4. map.from:c.1. .len: 12.." 1 "omplower" } } */ +/* { dg-final { scan-tree-dump-times "omp target oacc_update if_present if.D...... async.10. map.from:a .len: 4.. map.to:b .len: 4.. map.from:c .len: 40.." 1 "omplower" } } */ +/* { dg-final { scan-tree-dump-times "omp target oacc_update map.force_from:a .len: 4.." 1 "omplower" } } */ +/* { dg-final { scan-tree-dump-times "omp target oacc_update async.-1. map.force_to:b .len: 4.." 1 "omplower" } } */ +/* { dg-final { scan-tree-dump-times "omp target oacc_update wait.4. map.force_from:c.1. .len: 12.." 1 "omplower" } } */ +/* { dg-final { scan-tree-dump-times "omp target oacc_update if.D...... async.10. map.force_from:a .len: 4.. map.force_to:b .len: 4.. map.force_from:c .len: 40.." 1 "omplower" } } */ diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index 66acdf6..de70ac0 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -683,14 +683,29 @@ GOACC_update (int device, size_t mapnum, /* Restore the host pointer. */ *(uintptr_t *) hostaddrs[i] = t; + update_device = false; } break; + case GOMP_MAP_TO: + if (!acc_is_present (hostaddrs[i], sizes[i])) + { + update_device = false; + break; + } + /* Fallthru */ case GOMP_MAP_FORCE_TO: update_device = true; acc_update_device (hostaddrs[i], sizes[i]); break; + case GOMP_MAP_FROM: + if (!acc_is_present (hostaddrs[i], sizes[i])) + { + update_device = false; + break; + } + /* Fallthru */ case GOMP_MAP_FORCE_FROM: update_device = false; acc_update_self (hostaddrs[i], sizes[i]); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/update-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/update-2.c new file mode 100644 index 0000000..95bc16d --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/update-2.c @@ -0,0 +1,100 @@ +/* Test OpenACC update if_present clause. Note that part of this test + depends on a target that requires discrete memory. See the early + exit at acc_get_device_type. */ + +/* { dg-compile } */ + +#include <assert.h> +#include <openacc.h> + +void +t (int n, int a, int *c, int *d) +{ + int b, i; + +#pragma acc parallel loop present(c[0:n]) copyout(d[0:n]) + for (i = 0; i < n; i++) + d[i] = c[i]; + + for (i = 0; i < n; i++) + { + assert (d[i] == a); + c[i] = i; + } + + a = 0; + +#pragma acc update device(c[0:n], a) if_present + +#pragma acc parallel loop reduction(+:a) present(c[0:n]) + for (i = 0; i < n; i++) + a += c[i]; + +#pragma acc update host(a) if_present + + b = 0; + for (i = 0; i < n; i++) + b += i; + + assert (a == b); + +#pragma acc enter data copyin (b) +#pragma acc parallel loop reduction (+:b) + for (i = 0; i < n; i++) + b += i; + +#pragma acc update self(b) + assert (2*a == b); +#pragma acc exit data delete(b) + +#pragma acc parallel loop present(c[0:n]) + for (i = 0; i < n; i++) + c[i] = i; + +#pragma acc update host (c[0:n]) + +#pragma acc parallel loop present(c[0:n]) + for (i = 0; i < n; i++) + c[i] = 1000 + i; +} + +int +main () +{ + int n = 100; + int a, i, x[n], y[n]; + +#pragma acc update self(a) if_present +#pragma acc update if_present device(x[20:10]) + + a = -1; + for (i = 0; i < n; i++) + x[i] = a; + + if (acc_get_device_type () == acc_device_host) + return 0; + +#pragma acc enter data copyin(x) + t (n, a, x, y); + +#pragma acc update host(x[10:20]) + + for (i = 0; i < n; i++) + if (i < 10 || i >= 30) + assert (x[i] == i); + else + assert (x[i] == 1000 + i); + +#pragma acc exit data delete(x) + + a = -1; + for (i = 0; i < n; i++) + x[i] = a; + +#pragma acc data copy (x) + { + t (n, a, x, y); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-fortran/update-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/update-3.f90 new file mode 100644 index 0000000..a39358c --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/update-3.f90 @@ -0,0 +1,97 @@ +! Test OpenACC update if_present clause. Note that part of this test +! depends on a target that requires discrete memory. See the early +! exit at acc_get_device_type. + +! { dg-compile } + +program main + use openacc + integer, parameter :: n = 100 + integer :: a, i, x(n), y(n) + + !$acc update self(a) if_present + !$acc update if_present device(x(20:30)) + + a = -1 + do i = 1, n + x(i) = a + end do + + if (acc_get_device_type () == acc_device_host) call exit (0) + + !$acc enter data copyin(x) + call t (n, a, x, y) + + !$acc update host(x(10:30)) + + do i = 1, n + if ((i < 10 .or. i > 30) .and. (x(i) /= i)) call abort + if ((i >= 10 .and. i <= 30) .and. (x(i) /= 1000 + i)) call abort + end do + + !$acc exit data delete(x) + + a = -1; + do i = 1, n + x(i) = a + end do + + !$acc data copy (x) + call t (n, a, x, y) + !$acc end data + +contains + subroutine t (n, a, c, d) + integer :: n, a, b, i, c(n), d(n) + +!$acc parallel loop present(c(1:n)) copyout(d(1:n)) + do i = 1, n + d(i) = c(i) + end do + + do i = 1, n + if (d(i) /= a) call abort + c(i) = i + end do + + a = 0 + + !$acc update device(c(1:n), a) if_present + + !$acc parallel loop reduction(+:a) present(c(1:n)) + do i = 1, n + a = a + c(i) + end do + + !$acc update host(a) if_present + + b = 0 + do i = 1, n + b = b + i + end do + + if (a /= b) call abort + + !$acc enter data copyin (b) + !$acc parallel loop reduction (+:b) + do i = 1, n + b = b + i; + end do + + !$acc update self(b) + if (2*a /= b) call abort + !$acc exit data delete(b) + + !$acc parallel loop present(c(1:n)) + do i = 1, n + c(i) = i + end do + + !$acc update host (c(1:n)) + + !$acc parallel loop present(c(1:n)) + do i = 1, n + c(i) = 1000 + i + end do + end subroutine t +end program main