Hi Thomas,
this is the updated Fortran deviceptr patche, originated from Cesar, and one of
the tests was from James Norris:
https://gcc.gnu.org/ml/gcc-patches/2018-05/msg00286.html
https://gcc.gnu.org/ml/gcc-patches/2018-08/msg00532.html
There were a few style cleanups, but the goal of modification is the same:
to use only one clause to represent Fortran deviceptr, and to preserve it
during gimplification.
Because of this modification, and as we discussed earlier, the
handle_ftn_pointers()
code in libgomp/oacc-parallel.c appeared to be no longer needed.
I have remove them in this patch, and tested libgomp without regressions.
Also, I've added a new libgomp.oacc-fortran/deviceptr-2.f90 testcase that
actually copies out and verifies the deviceptr computation.
Is this okay for trunk now?
Thanks,
Chung-Lin
2019-10-18 Cesar Philippidis <[email protected]>
Chung-Lin Tang <[email protected]>
gcc/fortran/
* trans-openmp.c (gfc_omp_finish_clause): Don't create pointer data
mappings for deviceptr clauses.
(gfc_trans_omp_clauses): Likewise.
gcc/
* gimplify.c (enum gimplify_omp_var_data): Add GOVD_DEVICETPR.
(omp_notice_variable): Add GOVD_DEVICEPTR attribute when appropriate.
(gimplify_scan_omp_clauses): Likewise.
(gimplify_adjust_omp_clauses_1): Set GOMP_MAP_FORCE_DEVICEPTR for
implicit deviceptr mappings.
gcc/testsuite/
* c-c++-common/goacc/deviceptr-4.c: Update expected data mapping.
2019-10-18 Chung-Lin Tang <[email protected]>
James Norris <[email protected]>
libgomp/
* oacc-parallel.c (handle_ftn_pointers): Delete function.
(GOACC_parallel_keyed): Remove call to handle_ftn_pointers.
* testsuite/libgomp.oacc-fortran/deviceptr-1.f90: New test.
* testsuite/libgomp.oacc-fortran/deviceptr-2.f90: New test.
Index: gcc/fortran/trans-openmp.c
===================================================================
--- gcc/fortran/trans-openmp.c (revision 277155)
+++ gcc/fortran/trans-openmp.c (working copy)
@@ -1099,7 +1099,8 @@ gfc_omp_clause_dtor (tree clause, tree decl)
void
gfc_omp_finish_clause (tree c, gimple_seq *pre_p)
{
- if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP)
+ if (OMP_CLAUSE_CODE (c) != OMP_CLAUSE_MAP
+ || OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FORCE_DEVICEPTR)
return;
tree decl = OMP_CLAUSE_DECL (c);
@@ -2173,6 +2174,12 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp
if (n->expr == NULL || n->expr->ref->u.ar.type == AR_FULL)
{
if (POINTER_TYPE_P (TREE_TYPE (decl))
+ && n->u.map_op == OMP_MAP_FORCE_DEVICEPTR)
+ {
+ OMP_CLAUSE_DECL (node) = decl;
+ goto finalize_map_clause;
+ }
+ else if (POINTER_TYPE_P (TREE_TYPE (decl))
&& (gfc_omp_privatize_by_reference (decl)
|| GFC_DECL_GET_SCALAR_POINTER (decl)
|| GFC_DECL_GET_SCALAR_ALLOCATABLE (decl)
@@ -2346,6 +2353,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp
OMP_CLAUSE_SIZE (node3)
= fold_build2 (MINUS_EXPR, sizetype, ptr, ptr2);
}
+ finalize_map_clause:
switch (n->u.map_op)
{
case OMP_MAP_ALLOC:
Index: gcc/gimplify.c
===================================================================
--- gcc/gimplify.c (revision 277155)
+++ gcc/gimplify.c (working copy)
@@ -123,6 +123,9 @@ enum gimplify_omp_var_data
/* Flag for GOVD_REDUCTION: inscan seen in {in,ex}clusive clause. */
GOVD_REDUCTION_INSCAN = 0x2000000,
+ /* Flag for OpenACC deviceptrs. */
+ GOVD_DEVICEPTR = 0x4000000,
+
GOVD_DATA_SHARE_CLASS = (GOVD_SHARED | GOVD_PRIVATE | GOVD_FIRSTPRIVATE
| GOVD_LASTPRIVATE | GOVD_REDUCTION | GOVD_LINEAR
| GOVD_LOCAL)
@@ -7426,6 +7429,7 @@ omp_notice_variable (struct gimplify_omp_ctx *ctx,
error ("variable %qE declared in enclosing "
"%<host_data%> region", DECL_NAME (decl));
nflags |= GOVD_MAP;
+ nflags |= (n2->value & GOVD_DEVICEPTR);
if (octx->region_type == ORT_ACC_DATA
&& (n2->value & GOVD_MAP_0LEN_ARRAY))
nflags |= GOVD_MAP_0LEN_ARRAY;
@@ -8943,6 +8947,8 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_se
if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ALWAYS_TO
|| OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_ALWAYS_TOFROM)
flags |= GOVD_MAP_ALWAYS_TO;
+ else if (OMP_CLAUSE_MAP_KIND (c) == GOMP_MAP_FORCE_DEVICEPTR)
+ flags |= GOVD_DEVICEPTR;
goto do_add;
case OMP_CLAUSE_DEPEND:
@@ -9727,7 +9733,8 @@ gimplify_adjust_omp_clauses_1 (splay_tree_node n,
| GOVD_MAP_FORCE
| GOVD_MAP_FORCE_PRESENT
| GOVD_MAP_ALLOC_ONLY
- | GOVD_MAP_FROM_ONLY))
+ | GOVD_MAP_FROM_ONLY
+ | GOVD_DEVICEPTR))
{
case 0:
kind = GOMP_MAP_TOFROM;
@@ -9750,6 +9757,9 @@ gimplify_adjust_omp_clauses_1 (splay_tree_node n,
case GOVD_MAP_FORCE_PRESENT:
kind = GOMP_MAP_FORCE_PRESENT;
break;
+ case GOVD_DEVICEPTR:
+ kind = GOMP_MAP_FORCE_DEVICEPTR;
+ break;
default:
gcc_unreachable ();
}
Index: gcc/testsuite/c-c++-common/goacc/deviceptr-4.c
===================================================================
--- gcc/testsuite/c-c++-common/goacc/deviceptr-4.c (revision 277155)
+++ gcc/testsuite/c-c++-common/goacc/deviceptr-4.c (working copy)
@@ -8,4 +8,4 @@ subr (int *a)
a[0] += 1.0;
}
-/* { dg-final { scan-tree-dump-times "#pragma omp target
oacc_parallel.*map\\(tofrom:a" 1 "gimple" } } */
+/* { dg-final { scan-tree-dump-times "#pragma omp target
oacc_parallel.*map\\(force_deviceptr:a" 1 "gimple" } } */
Index: libgomp/oacc-parallel.c
===================================================================
--- libgomp/oacc-parallel.c (revision 277155)
+++ libgomp/oacc-parallel.c (working copy)
@@ -66,51 +66,6 @@ find_pointer (int pos, size_t mapnum, unsigned sho
return 0;
}
-/* Handle the mapping pair that are presented when a
- deviceptr clause is used with Fortran. */
-
-static void
-handle_ftn_pointers (size_t mapnum, void **hostaddrs, size_t *sizes,
- unsigned short *kinds)
-{
- int i;
-
- for (i = 0; i < mapnum; i++)
- {
- unsigned short kind1 = kinds[i] & 0xff;
-
- /* Handle Fortran deviceptr clause. */
- if (kind1 == GOMP_MAP_FORCE_DEVICEPTR)
- {
- unsigned short kind2;
-
- if (i < (signed)mapnum - 1)
- kind2 = kinds[i + 1] & 0xff;
- else
- kind2 = 0xffff;
-
- if (sizes[i] == sizeof (void *))
- continue;
-
- /* At this point, we're dealing with a Fortran deviceptr.
- If the next element is not what we're expecting, then
- this is an instance of where the deviceptr variable was
- not used within the region and the pointer was removed
- by the gimplifier. */
- if (kind2 == GOMP_MAP_POINTER
- && sizes[i + 1] == 0
- && hostaddrs[i] == *(void **)hostaddrs[i + 1])
- {
- kinds[i+1] = kinds[i];
- sizes[i+1] = sizeof (void *);
- }
-
- /* Invalidate the entry. */
- hostaddrs[i] = NULL;
- }
- }
-}
-
static void goacc_wait (int async, int num_waits, va_list *ap);
@@ -203,8 +158,6 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (voi
goacc_profiling_dispatch (&prof_info, &compute_construct_event_info,
&api_info);
- handle_ftn_pointers (mapnum, hostaddrs, sizes, kinds);
-
/* Host fallback if "if" clause is false or if the current device is set to
the host. */
if (flags & GOACC_FLAG_HOST_FALLBACK)
Index: libgomp/testsuite/libgomp.oacc-fortran/deviceptr-1.f90
===================================================================
--- libgomp/testsuite/libgomp.oacc-fortran/deviceptr-1.f90 (nonexistent)
+++ libgomp/testsuite/libgomp.oacc-fortran/deviceptr-1.f90 (working copy)
@@ -0,0 +1,197 @@
+! { dg-do run }
+
+! Test the deviceptr clause with various directives
+! and in combination with other directives where
+! the deviceptr variable is implied.
+
+subroutine subr1 (a, b)
+ implicit none
+ integer, parameter :: N = 8
+ integer :: a(N)
+ integer :: b(N)
+ integer :: i = 0
+
+ !$acc data deviceptr (a)
+
+ !$acc parallel copy (b)
+ do i = 1, N
+ a(i) = i * 2
+ b(i) = a(i)
+ end do
+ !$acc end parallel
+
+ !$acc end data
+
+end subroutine
+
+subroutine subr2 (a, b)
+ implicit none
+ integer, parameter :: N = 8
+ integer :: a(N)
+ !$acc declare deviceptr (a)
+ integer :: b(N)
+ integer :: i = 0
+
+ !$acc parallel copy (b)
+ do i = 1, N
+ a(i) = i * 4
+ b(i) = a(i)
+ end do
+ !$acc end parallel
+
+end subroutine
+
+subroutine subr3 (a, b)
+ implicit none
+ integer, parameter :: N = 8
+ integer :: a(N)
+ !$acc declare deviceptr (a)
+ integer :: b(N)
+ integer :: i = 0
+
+ !$acc kernels copy (b)
+ do i = 1, N
+ a(i) = i * 8
+ b(i) = a(i)
+ end do
+ !$acc end kernels
+
+end subroutine
+
+subroutine subr4 (a, b)
+ implicit none
+ integer, parameter :: N = 8
+ integer :: a(N)
+ integer :: b(N)
+ integer :: i = 0
+
+ !$acc parallel deviceptr (a) copy (b)
+ do i = 1, N
+ a(i) = i * 16
+ b(i) = a(i)
+ end do
+ !$acc end parallel
+
+end subroutine
+
+subroutine subr5 (a, b)
+ implicit none
+ integer, parameter :: N = 8
+ integer :: a(N)
+ integer :: b(N)
+ integer :: i = 0
+
+ !$acc kernels deviceptr (a) copy (b)
+ do i = 1, N
+ a(i) = i * 32
+ b(i) = a(i)
+ end do
+ !$acc end kernels
+
+end subroutine
+
+subroutine subr6 (a, b)
+ implicit none
+ integer, parameter :: N = 8
+ integer :: a(N)
+ integer :: b(N)
+ integer :: i = 0
+
+ !$acc parallel deviceptr (a) copy (b)
+ do i = 1, N
+ b(i) = i
+ end do
+ !$acc end parallel
+
+end subroutine
+
+subroutine subr7 (a, b)
+ implicit none
+ integer, parameter :: N = 8
+ integer :: a(N)
+ integer :: b(N)
+ integer :: i = 0
+
+ !$acc data deviceptr (a)
+
+ !$acc parallel copy (b)
+ do i = 1, N
+ a(i) = i * 2
+ b(i) = a(i)
+ end do
+ !$acc end parallel
+
+ !$acc parallel copy (b)
+ do i = 1, N
+ a(i) = b(i) * 2
+ b(i) = a(i)
+ end do
+ !$acc end parallel
+
+ !$acc end data
+
+end subroutine
+
+program main
+ use iso_c_binding, only: c_ptr, c_f_pointer
+ implicit none
+ type (c_ptr) :: cp
+ integer, parameter :: N = 8
+ integer, pointer :: fp(:)
+ integer :: i = 0
+ integer :: b(N)
+
+ interface
+ function acc_malloc (s) bind (C)
+ use iso_c_binding, only: c_ptr, c_size_t
+ integer (c_size_t), value :: s
+ type (c_ptr) :: acc_malloc
+ end function
+ end interface
+
+ cp = acc_malloc (N * sizeof (fp(N)))
+ call c_f_pointer (cp, fp, [N])
+
+ call subr1 (fp, b)
+
+ do i = 1, N
+ if (b(i) .ne. i * 2) call abort
+ end do
+
+ call subr2 (fp, b)
+
+ do i = 1, N
+ if (b(i) .ne. i * 4) call abort
+ end do
+
+ call subr3 (fp, b)
+
+ do i = 1, N
+ if (b(i) .ne. i * 8) call abort
+ end do
+
+ call subr4 (fp, b)
+
+ do i = 1, N
+ if (b(i) .ne. i * 16) call abort
+ end do
+
+ call subr5 (fp, b)
+
+ do i = 1, N
+ if (b(i) .ne. i * 32) call abort
+ end do
+
+ call subr6 (fp, b)
+
+ do i = 1, N
+ if (b(i) .ne. i) call abort
+ end do
+
+ call subr7 (fp, b)
+
+ do i = 1, N
+ if (b(i) .ne. i * 4) call abort
+ end do
+
+end program main
Index: libgomp/testsuite/libgomp.oacc-fortran/deviceptr-2.f90
===================================================================
--- libgomp/testsuite/libgomp.oacc-fortran/deviceptr-2.f90 (nonexistent)
+++ libgomp/testsuite/libgomp.oacc-fortran/deviceptr-2.f90 (working copy)
@@ -0,0 +1,54 @@
+! { dg-do run }
+
+! Test deviceptr clause to see if computation on device memory array
+! and copy back to host memory works.
+
+subroutine process_by_openacc (a, c)
+ implicit none
+ integer, parameter :: N = 8
+ integer :: a(N)
+ integer :: i = 0
+ integer :: c
+
+ !$acc parallel deviceptr (a)
+ do i = 1, N
+ a(i) = i * c
+ end do
+ !$acc end parallel
+
+end subroutine
+
+program main
+ use iso_c_binding, only: c_ptr, c_f_pointer, c_loc
+ implicit none
+ type (c_ptr) :: cp
+ integer, parameter :: N = 8
+ integer, pointer :: fp(:)
+ integer, target :: res(N)
+ integer :: i
+
+ interface
+ function acc_malloc (s) bind (C)
+ use iso_c_binding, only: c_ptr, c_size_t
+ integer (c_size_t), value :: s
+ type (c_ptr) :: acc_malloc
+ end function acc_malloc
+
+ subroutine acc_memcpy_from_device (d, s, sz) bind (C)
+ use iso_c_binding, only: c_ptr, c_size_t
+ type (c_ptr), value :: d, s
+ integer (c_size_t), value :: sz
+ end subroutine acc_memcpy_from_device
+ end interface
+
+ cp = acc_malloc (N * sizeof (fp(N)))
+ call c_f_pointer (cp, fp, [N])
+
+ call process_by_openacc (fp, 1234)
+ call acc_memcpy_from_device (c_loc (res), cp, N * sizeof (fp(N)))
+
+ do i = 1, N
+ if (res(i) .ne. i * 1234) call abort
+ end do
+
+end program main