This patch adds the OpenACC routines acc_attach and acc_detach.
However, in order to avoid the generation of a temporary, which breaks this feature, a special case had to be added to gfc_trans_call. Otherwise, I think it completes the Fortran additions of existing C/C++ functions, by adding this OpenACC 3.3 feature, which is used by ICON. Any comments, suggestions, remarks before I commit this patch? Tobias
Fortran/OpenACC: Add Fortran support for acc_attach/acc_detach While C/++ support the routines acc_attach{,_async} and acc_detach{,_finalize}{,_async} routines since a long time, the Fortran API routines where only added in OpenACC 3.3. Unfortunately, they cannot directly be implemented in the library as GCC will introduce a temporary array descriptor in some cases, which causes the attempted attachment to the this temporary variable instead of to the original one. Therefore, those API routines are handled in a special way in the compiler. gcc/fortran/ChangeLog: * trans-stmt.cc (gfc_trans_call_acc_attach_detach): New. (gfc_trans_call): Call it. libgomp/ChangeLog: * libgomp.texi (acc_attach, acc_detach): Update for Fortran version. * openacc.f90 acc_attach{,_async}, acc_detach{,_finalize}{,_async}: Add. * openacc_lib.h: Likewise. * testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90: New test. * testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90: New test. gcc/fortran/trans-stmt.cc | 74 +++++++++++++++++++++- libgomp/libgomp.texi | 40 ++++++------ libgomp/openacc.f90 | 44 +++++++++++++ libgomp/openacc_lib.h | 42 ++++++++++++ .../libgomp.oacc-fortran/acc-attach-detach-1.f90 | 25 ++++++++ .../libgomp.oacc-fortran/acc-attach-detach-2.f90 | 62 ++++++++++++++++++ 6 files changed, 265 insertions(+), 22 deletions(-) diff --git a/gcc/fortran/trans-stmt.cc b/gcc/fortran/trans-stmt.cc index 487b7687ef1..f1054015862 100644 --- a/gcc/fortran/trans-stmt.cc +++ b/gcc/fortran/trans-stmt.cc @@ -377,6 +377,57 @@ get_intrinsic_for_code (gfc_code *code) } +/* Handle the OpenACC routines acc_attach{,_async} and + acc_detach{,_finalize}{,_async} explicitly. This is required as the + the corresponding device pointee is attached to the corresponding device + pointer, but if a temporary array descriptor is created for the call, + that one is used as pointer instead of the original pointer. */ + +tree +gfc_trans_call_acc_attach_detach (gfc_code *code) +{ + stmtblock_t block; + gfc_se ptr_addr_se, async_se; + tree fn; + + fn = code->resolved_sym->backend_decl; + if (fn == NULL) + { + fn = gfc_get_symbol_decl (code->resolved_sym); + code->resolved_sym->backend_decl = fn; + } + + gfc_start_block (&block); + + gfc_init_se (&ptr_addr_se, NULL); + ptr_addr_se.descriptor_only = 1; + ptr_addr_se.want_pointer = 1; + gfc_conv_expr (&ptr_addr_se, code->ext.actual->expr); + gfc_add_block_to_block (&block, &ptr_addr_se.pre); + if (GFC_DESCRIPTOR_TYPE_P (TREE_TYPE (ptr_addr_se.expr))) + ptr_addr_se.expr = gfc_conv_descriptor_data_get (ptr_addr_se.expr); + ptr_addr_se.expr = build_fold_addr_expr (ptr_addr_se.expr); + + bool async = code->ext.actual->next != NULL; + if (async) + { + gfc_init_se (&async_se, NULL); + gfc_conv_expr (&async_se, code->ext.actual->next->expr); + fn = build_call_expr_loc (gfc_get_location (&code->loc), fn, 2, + ptr_addr_se.expr, async_se.expr); + } + else + fn = build_call_expr_loc (gfc_get_location (&code->loc), + fn, 1, ptr_addr_se.expr); + gfc_add_expr_to_block (&block, fn); + gfc_add_block_to_block (&block, &ptr_addr_se.post); + if (async) + gfc_add_block_to_block (&block, &async_se.post); + + return gfc_finish_block (&block); +} + + /* Translate the CALL statement. Builds a call to an F95 subroutine. */ tree @@ -392,13 +443,32 @@ gfc_trans_call (gfc_code * code, bool dependency_check, tree tmp; bool is_intrinsic_mvbits; + gcc_assert (code->resolved_sym); + + /* Unfortunately, acc_attach* and acc_detach* need some special treatment for + attaching the the pointee to a pointer as GCC might introduce a temporary + array descriptor, whose data component is then used as to be attached to + pointer. */ + if (flag_openacc + && code->resolved_sym->attr.subroutine + && code->resolved_sym->formal + && code->resolved_sym->formal->sym->ts.type == BT_ASSUMED + && code->resolved_sym->formal->sym->attr.dimension + && code->resolved_sym->formal->sym->as->type == AS_ASSUMED_RANK + && startswith (code->resolved_sym->name, "acc_") + && (!strcmp (code->resolved_sym->name + 4, "attach") + || !strcmp (code->resolved_sym->name + 4, "attach_async") + || !strcmp (code->resolved_sym->name + 4, "detach") + || !strcmp (code->resolved_sym->name + 4, "detach_async") + || !strcmp (code->resolved_sym->name + 4, "detach_finalize") + || !strcmp (code->resolved_sym->name + 4, "detach_finalize_async"))) + return gfc_trans_call_acc_attach_detach (code); + /* A CALL starts a new block because the actual arguments may have to be evaluated first. */ gfc_init_se (&se, NULL); gfc_start_block (&se.pre); - gcc_assert (code->resolved_sym); - ss = gfc_ss_terminator; if (code->resolved_sym->attr.elemental) ss = gfc_walk_elemental_function_args (ss, code->ext.actual, diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi index 9f53f167e06..5518033f1f3 100644 --- a/libgomp/libgomp.texi +++ b/libgomp/libgomp.texi @@ -5967,19 +5967,19 @@ address to pointing to the corresponding device data. @item @emph{Prototype}: @tab @code{void acc_attach_async(h_void **ptr_addr, int async);} @end multitable -@c @item @emph{Fortran}: -@c @multitable @columnfractions .20 .80 -@c @item @emph{Interface}: @tab @code{subroutine acc_attach(ptr_addr)} -@c @item @emph{Interface}: @tab @code{subroutine acc_attach_async(ptr_addr, async_arg)} -@c @item @tab @code{type(*), dimension(..) :: ptr_addr} -@c @item @tab @code{integer(acc_handle_kind), value :: async_arg} -@c @end multitable +@item @emph{Fortran}: +@multitable @columnfractions .20 .80 +@item @emph{Interface}: @tab @code{subroutine acc_attach(ptr_addr)} +@item @emph{Interface}: @tab @code{subroutine acc_attach_async(ptr_addr, async_arg)} +@item @tab @code{type(*), dimension(..) :: ptr_addr} +@item @tab @code{integer(acc_handle_kind), value :: async_arg} +@end multitable @item @emph{Reference}: @uref{https://www.openacc.org, OpenACC specification v2.6}, section 3.2.34. -@c @uref{https://www.openacc.org, OpenACC specification v3.3}, section -@c 3.2.29. + @uref{https://www.openacc.org, OpenACC specification v3.3}, section +3.2.29. @end table @@ -5999,21 +5999,21 @@ address to pointing to the corresponding host data. @item @emph{Prototype}: @tab @code{void acc_detach_finalize_async(h_void **ptr_addr, int async);} @end multitable -@c @item @emph{Fortran}: -@c @multitable @columnfractions .20 .80 -@c @item @emph{Interface}: @tab @code{subroutine acc_detach(ptr_addr)} -@c @item @emph{Interface}: @tab @code{subroutine acc_detach_async(ptr_addr, async_arg)} -@c @item @emph{Interface}: @tab @code{subroutine acc_detach_finalize(ptr_addr)} -@c @item @emph{Interface}: @tab @code{subroutine acc_detach_finalize_async(ptr_addr, async_arg)} -@c @item @tab @code{type(*), dimension(..) :: ptr_addr} -@c @item @tab @code{integer(acc_handle_kind), value :: async_arg} -@c @end multitable +@item @emph{Fortran}: +@multitable @columnfractions .20 .80 +@item @emph{Interface}: @tab @code{subroutine acc_detach(ptr_addr)} +@item @emph{Interface}: @tab @code{subroutine acc_detach_async(ptr_addr, async_arg)} +@item @emph{Interface}: @tab @code{subroutine acc_detach_finalize(ptr_addr)} +@item @emph{Interface}: @tab @code{subroutine acc_detach_finalize_async(ptr_addr, async_arg)} +@item @tab @code{type(*), dimension(..) :: ptr_addr} +@item @tab @code{integer(acc_handle_kind), value :: async_arg} +@end multitable @item @emph{Reference}: @uref{https://www.openacc.org, OpenACC specification v2.6}, section 3.2.35. -@c @uref{https://www.openacc.org, OpenACC specification v3.3}, section -@c 3.2.29. +@uref{https://www.openacc.org, OpenACC specification v3.3}, section +3.2.29. @end table diff --git a/libgomp/openacc.f90 b/libgomp/openacc.f90 index 9d51f017985..3f2db45617b 100644 --- a/libgomp/openacc.f90 +++ b/libgomp/openacc.f90 @@ -798,6 +798,8 @@ module openacc public :: acc_memcpy_to_device, acc_memcpy_to_device_async public :: acc_memcpy_from_device, acc_memcpy_from_device_async public :: acc_memcpy_device, acc_memcpy_device_async + public :: acc_attach, acc_attach_async, acc_detach, acc_detach_async + public :: acc_detach_finalize, acc_detach_finalize_async integer, parameter :: openacc_version = 201711 @@ -1068,6 +1070,48 @@ module openacc end subroutine end interface + interface + subroutine acc_attach (ptr_addr) bind(C) + type(*), dimension(..) :: ptr_addr + end subroutine + end interface + + interface + subroutine acc_attach_async (ptr_addr, async_arg) bind(C) + import :: acc_handle_kind + type(*), dimension(..) :: ptr_addr + integer(acc_handle_kind), value :: async_arg + end subroutine + end interface + + interface + subroutine acc_detach (ptr_addr) bind(C) + type(*), dimension(..) :: ptr_addr + end subroutine + end interface + + interface + subroutine acc_detach_async (ptr_addr, async_arg) bind(C) + import :: acc_handle_kind + type(*), dimension(..) :: ptr_addr + integer(acc_handle_kind), value :: async_arg + end subroutine + end interface + + interface + subroutine acc_detach_finalize (ptr_addr) bind(C) + type(*), dimension(..) :: ptr_addr + end subroutine + end interface + + interface + subroutine acc_detach_finalize_async (ptr_addr, async_arg) bind(C) + import :: acc_handle_kind + type(*), dimension(..) :: ptr_addr + integer(acc_handle_kind), value :: async_arg + end subroutine + end interface + interface acc_copyin_async procedure :: acc_copyin_async_32_h procedure :: acc_copyin_async_64_h diff --git a/libgomp/openacc_lib.h b/libgomp/openacc_lib.h index 9333c481502..dbdc4d7bc40 100644 --- a/libgomp/openacc_lib.h +++ b/libgomp/openacc_lib.h @@ -707,3 +707,45 @@ integer (acc_handle_kind) async_ end subroutine end interface + + interface + subroutine acc_attach (ptr_addr) bind(C) + type(*), dimension(..) :: ptr_addr + end subroutine + end interface + + interface + subroutine acc_attach_async (ptr_addr, async_arg) bind(C) + import :: acc_handle_kind + type(*), dimension(..) :: ptr_addr + integer(acc_handle_kind), value :: async_arg + end subroutine + end interface + + interface + subroutine acc_detach (ptr_addr) bind(C) + type(*), dimension(..) :: ptr_addr + end subroutine + end interface + + interface + subroutine acc_detach_async (ptr_addr, async_arg) bind(C) + import :: acc_handle_kind + type(*), dimension(..) :: ptr_addr + integer(acc_handle_kind), value :: async_arg + end subroutine + end interface + + interface + subroutine acc_detach_finalize (ptr_addr) bind(C) + type(*), dimension(..) :: ptr_addr + end subroutine + end interface + + interface + subroutine acc_detach_finalize_async(ptr_addr, async_arg)bind(C) + import :: acc_handle_kind + type(*), dimension(..) :: ptr_addr + integer(acc_handle_kind), value :: async_arg + end subroutine + end interface diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90 new file mode 100644 index 00000000000..15393b456c8 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-1.f90 @@ -0,0 +1,25 @@ +! { dg-do compile } +! { dg-additional-options "-fdump-tree-original" } + +use openacc +implicit none (type, external) +integer,pointer :: a, b(:) +integer,allocatable :: c, d(:) + +call acc_attach(a) ! ICE +call acc_attach_async(b, 4) +call acc_attach(c) + +call acc_detach(a) +call acc_detach_async(b, 4) +call acc_detach_finalize(c) +call acc_detach_finalize_async(d,7) +end + +! { dg-final { scan-tree-dump-times "acc_attach \\(&a\\);" 1 "original" } } +! { dg-final { scan-tree-dump-times "acc_attach_async \\(&\\(integer\\(kind=4\\)\\\[0:\\\] \\*\\) b.data, 4\\);" 1 "original" } } +! { dg-final { scan-tree-dump-times "acc_attach \\(&c\\);" 1 "original" } } +! { dg-final { scan-tree-dump-times "acc_detach \\(&a\\);" 1 "original" } } +! { dg-final { scan-tree-dump-times "acc_detach_async \\(&\\(integer\\(kind=4\\)\\\[0:\\\] \\*\\) b.data, 4\\);" 1 "original" } } +! { dg-final { scan-tree-dump-times "acc_detach_finalize \\(&c\\);" 1 "original" } } +! { dg-final { scan-tree-dump-times "acc_detach_finalize_async \\(&\\(integer\\(kind=4\\)\\\[0:\\\] \\* restrict\\) d.data, 7\\);" 1 "original" } } diff --git a/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90 new file mode 100644 index 00000000000..b2204ac4467 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/acc-attach-detach-2.f90 @@ -0,0 +1,62 @@ +! { dg-do run } + +use openacc +implicit none (type, external) +integer, target :: tgt_a, tgt_b(5) + +integer, pointer :: p1, p2(:) + +type t + integer,pointer :: a => null () + integer,pointer :: b(:) => null () + integer,allocatable :: c, d(:) +end type t + +type(t), target :: var + +tgt_a = 51 +tgt_b = [11,22,33,44,55] + +var%b => tgt_b +!$acc enter data copyin(var, tgt_a, tgt_b) +var%a => tgt_a + +call acc_attach(var%a) +call acc_attach(var%b) + +!$acc serial +! { dg-warning "using .vector_length \\(32\\)., ignoring 1" "" { target openacc_nvidia_accel_selected } .-1 } + if (var%a /= 51) stop 1 + if (any (var%b /= [11,22,33,44,55])) stop 2 +!$acc end serial + +call acc_detach(var%a) +call acc_detach(var%b) + +!$acc exit data delete(var, tgt_a, tgt_b) + +var%c = 9 +var%d = [1,2,3] + +p1 => var%c +p2 => var%d + +!$acc enter data copyin(p1, p2) +!$acc enter data copyin(var) +call acc_attach(var%c) +call acc_attach(var%d) + +!$acc serial +! { dg-warning "using .vector_length \\(32\\)., ignoring 1" "" { target openacc_nvidia_accel_selected } .-1 } + if (var%c /= 9) stop 3 + if (any (var%d /= [1,2,3])) stop 4 +!$acc end serial + +call acc_detach(var%c) +call acc_detach(var%d) + +!$acc exit data delete(var, p1, p2) + +deallocate(var%d) + +end