[PATCH] libgfortran: Use __builtin_issignaling in libgfortran

2022-08-15 Thread Jakub Jelinek via Fortran
Hi!

The following patch makes use of the new __builtin_issignaling,
so it no longer needs the fallback implementation and can use
the builtin even where glibc provides the macro.

Bootstrapped/regtested on x86_64-linux, i686-linux, powerpc64le-linux
and powerpc64le-linux, ok for trunk?

2022-08-15  Jakub Jelinek  

* ieee/ieee_helper.c: Don't include issignaling_fallback.h.
(CLASSMACRO): Use __builtin_issignaling instead of issignaling.
* ieee/issignaling_fallback.h: Removed.

--- libgfortran/ieee/ieee_helper.c.jj   2022-06-27 15:34:47.111928150 +0200
+++ libgfortran/ieee/ieee_helper.c  2022-08-12 13:21:00.922306862 +0200
@@ -26,13 +26,6 @@ see the files COPYING3 and COPYING.RUNTI
 #include "libgfortran.h"
 
 
-/* Check support for issignaling macro.  If not, we include our own
-   fallback implementation.  */
-#ifndef issignaling
-# include "issignaling_fallback.h"
-#endif
-
-
 /* Prototypes.  */
 
 extern int ieee_class_helper_4 (GFC_REAL_4 *);
@@ -94,7 +87,7 @@ enum {
  \
 if (res == IEEE_QUIET_NAN) \
 { \
-  if (issignaling (*value)) \
+  if (__builtin_issignaling (*value)) \
return IEEE_SIGNALING_NAN; \
   else \
return IEEE_QUIET_NAN; \
--- libgfortran/ieee/issignaling_fallback.h.jj  2022-06-28 13:14:45.332799201 
+0200
+++ libgfortran/ieee/issignaling_fallback.h 2022-08-12 13:20:17.784877531 
+0200
@@ -1,251 +0,0 @@
-/* Fallback implementation of issignaling macro.
-   Copyright (C) 2022 Free Software Foundation, Inc.
-   Contributed by Francois-Xavier Coudert 
-
-This file is part of the GNU Fortran runtime library (libgfortran).
-
-Libgfortran is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public
-License as published by the Free Software Foundation; either
-version 3 of the License, or (at your option) any later version.
-
-Libgfortran is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-Under Section 7 of GPL version 3, you are granted additional
-permissions described in the GCC Runtime Library Exception, version
-3.1, as published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License and
-a copy of the GCC Runtime Library Exception along with this program;
-see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-.  */
-
-#include "libgfortran.h"
-
-/* This header provides an implementation of the type-generic issignaling 
macro.
-   Some points of note:
-
- - This header is only included if the issignaling macro is not defined.
- - All targets for which Fortran IEEE modules are supported currently have
-   the high-order bit of the NaN mantissa clear for signaling (and set
-   for quiet), as recommended by IEEE.
- - We use the __*_IS_IEC_60559__ macros to make sure we only deal with 
formats
-   we know. For other floating-point formats, we consider all NaNs as 
quiet.
-
- */
-
-typedef union
-{
-  float value;
-  uint32_t word;
-} ieee_float_shape_type;
-
-static inline int
-__issignalingf (float x)
-{
-#if __FLT_IS_IEC_60559__
-  uint32_t xi;
-  ieee_float_shape_type u;
-
-  u.value = x;
-  xi = u.word;
-
-  xi ^= 0x0040;
-  return (xi & 0x7fff) > 0x7fc0;
-#else
-  return 0;
-#endif
-}
-
-
-typedef union
-{
-  double value;
-  uint64_t word;
-} ieee_double_shape_type;
-
-static inline int
-__issignaling (double x)
-{
-#if __DBL_IS_IEC_60559__
-  ieee_double_shape_type u;
-  uint64_t xi;
-
-  u.value = x;
-  xi = u.word;
-
-  xi ^= UINT64_C (0x0008);
-  return (xi & UINT64_C (0x7fff)) > UINT64_C (0x7ff8);
-#else
-  return 0;
-#endif
-}
-
-
-#if __LDBL_DIG__ == __DBL_DIG__
-
-/* Long double is the same as double.  */
-static inline int
-__issignalingl (long double x)
-{
-  return __issignaling (x);
-}
-
-#elif (__LDBL_DIG__ == 18) && __LDBL_IS_IEC_60559__
-
-/* Long double is x86 extended type.  */
-
-typedef union
-{
-  long double value;
-  struct
-  {
-#if __FLOAT_WORD_ORDER__ == __ORDER_BIG_ENDIAN__
-int sign_exponent:16;
-unsigned int empty:16;
-uint32_t msw;
-uint32_t lsw;
-#elif __FLOAT_WORD_ORDER__ == __ORDER_LITTLE_ENDIAN__
-uint32_t lsw;
-uint32_t msw;
-int sign_exponent:16;
-unsigned int empty:16;
-#endif
-  } parts;
-} ieee_long_double_shape_type;
-
-static inline int
-__issignalingl (long double x)
-{
-  int ret;
-  uint32_t exi, hxi, lxi;
-  ieee_long_double_shape_type u;
-
-  u.value = x;
-  exi = u.parts.sign_exponent;
-  hxi = u.parts.msw;
-  lxi = u.parts.lsw;
-
-  /* Pseudo numbers on x86 are always signaling.  */
-  ret = (exi & 0x7fff) && ((hxi & 0x8000) == 0);
-
-  hxi ^= 0x4000;
-  hxi |= (lxi | -lxi) >> 31;
-  return ret || (((exi & 0x7fff) == 0x7fff) && (hxi > 0xc000));
-}
-
-#e

[PATCH] fortran: Expand ieee_arithmetic module's ieee_class inline [PR106579]

2022-08-15 Thread Jakub Jelinek via Fortran
Hi!

The following patch expands IEEE_CLASS inline in the FE, using the
__builtin_fpclassify, __builtin_signbit and the new __builtin_issignaling
builtins.

Bootstrapped/regtested on x86_64-linux, i686-linux, powerpc64le-linux
and powerpc64-linux, ok for trunk?

2022-08-15  Jakub Jelinek  

PR fortran/106579
gcc/fortran/
* f95-lang.cc (gfc_init_builtin_functions): Initialize
BUILT_IN_FPCLASSIFY.
* libgfortran.h (IEEE_OTHER_VALUE, IEEE_SIGNALING_NAN,
IEEE_QUIET_NAN, IEEE_NEGATIVE_INF, IEEE_NEGATIVE_NORMAL,
IEEE_NEGATIVE_DENORMAL, IEEE_NEGATIVE_SUBNORMAL,
IEEE_NEGATIVE_ZERO, IEEE_POSITIVE_ZERO, IEEE_POSITIVE_DENORMAL,
IEEE_POSITIVE_SUBNORMAL, IEEE_POSITIVE_NORMAL, IEEE_POSITIVE_INF):
New enum.
* trans-intrinsic.cc (conv_intrinsic_ieee_class): New function.
(gfc_conv_ieee_arithmetic_function): Handle ieee_class.
libgfortran/
* ieee/ieee_helper.c (IEEE_OTHER_VALUE, IEEE_SIGNALING_NAN,
IEEE_QUIET_NAN, IEEE_NEGATIVE_INF, IEEE_NEGATIVE_NORMAL,
IEEE_NEGATIVE_DENORMAL, IEEE_NEGATIVE_SUBNORMAL,
IEEE_NEGATIVE_ZERO, IEEE_POSITIVE_ZERO, IEEE_POSITIVE_DENORMAL,
IEEE_POSITIVE_SUBNORMAL, IEEE_POSITIVE_NORMAL, IEEE_POSITIVE_INF):
Move to gcc/fortran/libgfortran.h.

--- gcc/fortran/f95-lang.cc.jj  2022-08-12 17:06:33.906598328 +0200
+++ gcc/fortran/f95-lang.cc 2022-08-12 18:39:47.727073699 +0200
@@ -1017,8 +1017,9 @@ gfc_init_builtin_functions (void)
  "__builtin_issignaling", ATTR_CONST_NOTHROW_LEAF_LIST);
   gfc_define_builtin ("__builtin_signbit", ftype, BUILT_IN_SIGNBIT,
  "__builtin_signbit", ATTR_CONST_NOTHROW_LEAF_LIST);
+  gfc_define_builtin ("__builtin_fpclassify", ftype, BUILT_IN_FPCLASSIFY,
+ "__builtin_fpclassify", ATTR_CONST_NOTHROW_LEAF_LIST);
 
-  ftype = build_function_type (integer_type_node, NULL_TREE);
   gfc_define_builtin ("__builtin_isless", ftype, BUILT_IN_ISLESS,
  "__builtin_isless", ATTR_CONST_NOTHROW_LEAF_LIST);
   gfc_define_builtin ("__builtin_islessequal", ftype, BUILT_IN_ISLESSEQUAL,
--- gcc/fortran/libgfortran.h.jj2022-05-31 11:33:51.550250610 +0200
+++ gcc/fortran/libgfortran.h   2022-08-12 17:22:33.210947170 +0200
@@ -187,3 +187,23 @@ typedef enum
   BT_ASSUMED, BT_UNION, BT_BOZ
 }
 bt;
+
+/* Enumeration of the possible floating-point types. These values
+   correspond to the hidden arguments of the IEEE_CLASS_TYPE
+   derived-type of IEEE_ARITHMETIC.  */
+
+enum {
+  IEEE_OTHER_VALUE = 0,
+  IEEE_SIGNALING_NAN,
+  IEEE_QUIET_NAN,
+  IEEE_NEGATIVE_INF,
+  IEEE_NEGATIVE_NORMAL,
+  IEEE_NEGATIVE_DENORMAL,
+  IEEE_NEGATIVE_SUBNORMAL = IEEE_NEGATIVE_DENORMAL,
+  IEEE_NEGATIVE_ZERO,
+  IEEE_POSITIVE_ZERO,
+  IEEE_POSITIVE_DENORMAL,
+  IEEE_POSITIVE_SUBNORMAL = IEEE_POSITIVE_DENORMAL,
+  IEEE_POSITIVE_NORMAL,
+  IEEE_POSITIVE_INF
+};
--- gcc/fortran/trans-intrinsic.cc.jj   2022-06-28 13:14:45.322799333 +0200
+++ gcc/fortran/trans-intrinsic.cc  2022-08-12 18:51:28.095927643 +0200
@@ -10013,6 +10013,78 @@ conv_intrinsic_ieee_copy_sign (gfc_se *
 }
 
 
+/* Generate code for IEEE_CLASS.  */
+
+static void
+conv_intrinsic_ieee_class (gfc_se *se, gfc_expr *expr)
+{
+  tree arg, c, t1, t2, t3, t4;
+
+  /* Convert arg, evaluate it only once.  */
+  conv_ieee_function_args (se, expr, &arg, 1);
+  arg = gfc_evaluate_now (arg, &se->pre);
+
+  c = build_call_expr_loc (input_location,
+  builtin_decl_explicit (BUILT_IN_FPCLASSIFY), 6,
+  build_int_cst (integer_type_node, IEEE_QUIET_NAN),
+  build_int_cst (integer_type_node,
+ IEEE_POSITIVE_INF),
+  build_int_cst (integer_type_node,
+ IEEE_POSITIVE_NORMAL),
+  build_int_cst (integer_type_node,
+ IEEE_POSITIVE_DENORMAL),
+  build_int_cst (integer_type_node,
+ IEEE_POSITIVE_ZERO),
+  arg);
+  c = gfc_evaluate_now (c, &se->pre);
+  t1 = fold_build2_loc (input_location, EQ_EXPR, logical_type_node,
+   c, build_int_cst (integer_type_node,
+ IEEE_QUIET_NAN));
+  t2 = build_call_expr_loc (input_location,
+   builtin_decl_explicit (BUILT_IN_ISSIGNALING), 1,
+   arg);
+  t2 = fold_build2_loc (input_location, NE_EXPR, logical_type_node,
+   t2, build_zero_cst (TREE_TYPE (t2)));
+  t1 = fold_build2_loc (input_location, TRUTH_AND_EXPR,
+   logical_type_node, t1, t2);
+  t3 = fold_build2_loc (input_location, GE_EXPR, logical_type_node,
+   c, build_int_cst (integer_type_node,
+ IEEE_POSITIVE_ZERO));
+  t4 = build_call_e

[PATCH] fortran: Expand ieee_arithmetic module's ieee_value inline [PR106579]

2022-08-15 Thread Jakub Jelinek via Fortran
Hi!

The following patch expands IEEE_VALUE function inline in the FE.

Bootstrapped/regtested on x86_64-linux, i686-linux, powerpc64le-linux
and powerpc64-linux, ok for trunk?

2022-08-15  Jakub Jelinek  

PR fortran/106579
* trans-intrinsic.cc: Include realmpfr.h.
(conv_intrinsic_ieee_value): New function.
(gfc_conv_ieee_arithmetic_function): Handle ieee_value.

--- gcc/fortran/trans-intrinsic.cc.jj   2022-08-12 18:51:28.095927643 +0200
+++ gcc/fortran/trans-intrinsic.cc  2022-08-13 13:24:37.446768877 +0200
@@ -41,6 +41,7 @@ along with GCC; see the file COPYING3.
 #include "trans-array.h"
 #include "dependency.h"/* For CAF array alias analysis.  */
 #include "attribs.h"
+#include "realmpfr.h"
 
 /* Only for gfc_trans_assign and gfc_trans_pointer_assign.  */
 
@@ -10085,6 +10086,115 @@ conv_intrinsic_ieee_class (gfc_se *se, g
 }
 
 
+/* Generate code for IEEE_VALUE.  */
+
+static void
+conv_intrinsic_ieee_value (gfc_se *se, gfc_expr *expr)
+{
+  tree args[2], arg, ret, tmp;
+  stmtblock_t body;
+
+  /* Convert args, evaluate the second one only once.  */
+  conv_ieee_function_args (se, expr, args, 2);
+  arg = gfc_evaluate_now (args[1], &se->pre);
+
+  tree type = TREE_TYPE (arg);
+  gcc_assert (TREE_CODE (type) == RECORD_TYPE);
+  tree field = NULL_TREE;
+  for (tree f = TYPE_FIELDS (type); f != NULL_TREE; f = DECL_CHAIN (f))
+if (TREE_CODE (f) == FIELD_DECL)
+  {
+   gcc_assert (field == NULL_TREE);
+   field = f;
+  }
+  gcc_assert (field);
+  arg = fold_build3_loc (input_location, COMPONENT_REF, TREE_TYPE (field),
+arg, field, NULL_TREE);
+  arg = gfc_evaluate_now (arg, &se->pre);
+
+  type = gfc_typenode_for_spec (&expr->ts);
+  gcc_assert (TREE_CODE (type) == REAL_TYPE);
+  ret = gfc_create_var (type, NULL);
+
+  gfc_init_block (&body);
+
+  tree end_label = gfc_build_label_decl (NULL_TREE);
+  for (int c = IEEE_SIGNALING_NAN; c <= IEEE_POSITIVE_INF; ++c)
+{
+  tree label = gfc_build_label_decl (NULL_TREE);
+  tree low = build_int_cst (TREE_TYPE (arg), c);
+  tmp = build_case_label (low, low, label);
+  gfc_add_expr_to_block (&body, tmp);
+
+  REAL_VALUE_TYPE real;
+  int k;
+  switch (c)
+   {
+   case IEEE_SIGNALING_NAN:
+ real_nan (&real, "", 0, TYPE_MODE (type));
+ break;
+   case IEEE_QUIET_NAN:
+ real_nan (&real, "", 1, TYPE_MODE (type));
+ break;
+   case IEEE_NEGATIVE_INF:
+ real_inf (&real);
+ real = real_value_negate (&real);
+ break;
+   case IEEE_NEGATIVE_NORMAL:
+ real_from_integer (&real, TYPE_MODE (type), -42, SIGNED);
+ break;
+   case IEEE_NEGATIVE_DENORMAL:
+ k = gfc_validate_kind (BT_REAL, expr->ts.kind, false);
+ real_from_mpfr (&real, gfc_real_kinds[k].tiny,
+ type, GFC_RND_MODE);
+ real_arithmetic (&real, RDIV_EXPR, &real, &dconst2);
+ real = real_value_negate (&real);
+ break;
+   case IEEE_NEGATIVE_ZERO:
+ real_from_integer (&real, TYPE_MODE (type), 0, SIGNED);
+ real = real_value_negate (&real);
+ break;
+   case IEEE_POSITIVE_ZERO:
+ /* Make this also the default: label.  */
+ label = gfc_build_label_decl (NULL_TREE);
+ tmp = build_case_label (NULL_TREE, NULL_TREE, label);
+ gfc_add_expr_to_block (&body, tmp);
+ real_from_integer (&real, TYPE_MODE (type), 0, SIGNED);
+ break;
+   case IEEE_POSITIVE_DENORMAL:
+ k = gfc_validate_kind (BT_REAL, expr->ts.kind, false);
+ real_from_mpfr (&real, gfc_real_kinds[k].tiny,
+ type, GFC_RND_MODE);
+ real_arithmetic (&real, RDIV_EXPR, &real, &dconst2);
+ break;
+   case IEEE_POSITIVE_NORMAL:
+ real_from_integer (&real, TYPE_MODE (type), 42, SIGNED);
+ break;
+   case IEEE_POSITIVE_INF:
+ real_inf (&real);
+ break;
+   default:
+ gcc_unreachable ();
+   }
+
+  tree val = build_real (type, real);
+  gfc_add_modify (&body, ret, val);
+
+  tmp = build1_v (GOTO_EXPR, end_label);
+  gfc_add_expr_to_block (&body, tmp);
+}
+
+  tmp = gfc_finish_block (&body);
+  tmp = fold_build2_loc (input_location, SWITCH_EXPR, NULL_TREE, arg, tmp);
+  gfc_add_expr_to_block (&se->pre, tmp);
+
+  tmp = build1_v (LABEL_EXPR, end_label);
+  gfc_add_expr_to_block (&se->pre, tmp);
+
+  se->expr = ret;
+}
+
+
 /* Generate code for an intrinsic function from the IEEE_ARITHMETIC
module.  */
 
@@ -10117,6 +10227,8 @@ gfc_conv_ieee_arithmetic_function (gfc_s
 conv_intrinsic_ieee_logb_rint (se, expr, BUILT_IN_RINT);
   else if (startswith (name, "ieee_class_") && ISDIGIT (name[11]))
 conv_intrinsic_ieee_class (se, expr);
+  else if (startswith (name, "ieee_value_") && ISDIGIT (name[11]))
+conv_intrinsic_ieee_value (se, expr);
   else
 /* It is not among the functio

[PATCH, OpenMP, Fortran] requires unified_shared_memory 1/2: adjust libgfortran memory allocators

2022-08-15 Thread Chung-Lin Tang

Hi, this patch is to fix the case where 'requires unified_shared_memory' doesn't
work due to memory allocator mismatch. Currently this is only for OG12 
(devel/omp/gcc-12),
but will apply to mainline as well once those requires patches get in.

Basically, under 'requires unified_shared_memory' enables the usm_transform 
pass,
which transforms some of the expanded Fortran intrinsic code that uses 
__builtin_free()
into 'omp_free (..., ompx_unified_shared_mem_alloc)'.

The intention is to make all dynamic memory allocation use the OpenMP 
unified_shared_memory
allocator, but there is a big gap in this, namely libgfortran. What happens in 
some tests
are that libgfortran allocates stuff using normal malloc(), and the 
usm_transform generates
code that frees the stuff using omp_free(), and chaos ensues.

So the proper fix we believe is: to make it possible to move the entire 
libgfortran on to
unified_shared_memory.

This first patch is a mostly mechanical patch to change all references of 
malloc/free/calloc/realloc
in libgfortran into xmalloc/xfree/xcalloc/xrealloc in 
libgfortran/runtime/memory.c,
as well as strdup uses into a new internal xstrdup.

All of libgfortran is adjusted this way, except libgfortran/caf, which is an 
independent library
outside of libgfortran.so.

The second patch of this series will present a way to switch the references of 
allocators
in libgfortran/runtime/memory.c from the normal glibc malloc/free/etc. to 
omp_alloc/omp_free/etc.
when 'requires unified_shared_memory' is detected.

Tested on devel/omp/gcc-12. Plans is to commit there soon, but also seeking 
approval for mainline
once the requires stuff goes in.

Thanks,
Chung-Lin

2022-08-15  Chung-Lin Tang  

libgfortran/ChangeLog:

* m4/matmul_internal.m4: Adjust malloc/free to xmalloc/xfree.
* generated/matmul_c10.c: Regenerate.
* generated/matmul_c16.c: Likewise.
* generated/matmul_c17.c: Likewise.
* generated/matmul_c4.c: Likewise.
* generated/matmul_c8.c: Likewise.
* generated/matmul_i1.c: Likewise.
* generated/matmul_i16.c: Likewise.
* generated/matmul_i2.c: Likewise.
* generated/matmul_i4.c: Likewise.
* generated/matmul_i8.c: Likewise.
* generated/matmul_r10.c: Likewise.
* generated/matmul_r16.c: Likewise.
* generated/matmul_r17.c: Likewise.
* generated/matmul_r4.c: Likewise.
* generated/matmul_r8.c: Likewise.
* generated/matmulavx128_c10.c: Likewise.
* generated/matmulavx128_c16.c: Likewise.
* generated/matmulavx128_c17.c: Likewise.
* generated/matmulavx128_c4.c: Likewise.
* generated/matmulavx128_c8.c: Likewise.
* generated/matmulavx128_i1.c: Likewise.
* generated/matmulavx128_i16.c: Likewise.
* generated/matmulavx128_i2.c: Likewise.
* generated/matmulavx128_i4.c: Likewise.
* generated/matmulavx128_i8.c: Likewise.
* generated/matmulavx128_r10.c: Likewise.
* generated/matmulavx128_r16.c: Likewise.
* generated/matmulavx128_r17.c: Likewise.
* generated/matmulavx128_r4.c: Likewise.
* generated/matmulavx128_r8.c: Likewise.
* intrinsics/access.c (access_func): Adjust free to xfree.
* intrinsics/chdir.c (chdir_i4_sub): Likewise.
(chdir_i8_sub): Likewise.
* intrinsics/chmod.c (chmod_func): Likewise.
* intrinsics/date_and_time.c (secnds): Likewise.
* intrinsics/env.c (PREFIX(getenv)): Likewise.
(get_environment_variable_i4): Likewise.
* intrinsics/execute_command_line.c (execute_command_line): Likewise.
* intrinsics/getcwd.c (getcwd_i4_sub): Likewise.
* intrinsics/getlog.c (PREFIX(getlog)): Likewise.
* intrinsics/link.c (link_internal): Likewise.
* intrinsics/move_alloc.c (move_alloc): Likewise.
* intrinsics/perror.c (perror_sub): Likewise.
* intrinsics/random.c (constructor_random): Likewise.
* intrinsics/rename.c (rename_internal): Likewise.
* intrinsics/stat.c (stat_i4_sub_0): Likewise.
(stat_i8_sub_0): Likewise.
* intrinsics/symlnk.c (symlnk_internal): Likewise.
* intrinsics/system.c (system_sub): Likewise.
* intrinsics/unlink.c (unlink_i4_sub): Likewise.
* io/async.c (update_pdt): Likewise.
(async_io): Likewise.
(free_async_unit): Likewise.
(init_async_unit): Adjust calloc to xcalloc.
(enqueue_done_id): Likewise.
(enqueue_done): Likewise.
(enqueue_close): Likewise.
* io/async.h (MUTEX_DEBUG_ADD): Adjust malloc/free to xmalloc/xfree.
* io/close.c (st_close): Adjust strdup/free to xstrdup/xfree.
* io/fbuf.c (fbuf_destroy): Adjust free to xfree.
* io/format.c (free_format_hash_table): Likewise.
(save_parsed_format): Likewise.
(free_format): Likewise.
(free_format_data): Likewise.
* io/intrinsics.c (ttynam):

[PATCH, OpenMP, Fortran] requires unified_shared_memory 2/2: insert USM allocators into libgfortran

2022-08-15 Thread Chung-Lin Tang

After the first libgfortran memory allocator preparation patch, this is the
actual patch that organizes unified_shared_memory allocation into libgfortran.

In the current OpenMP requires implementation, the requires_mask is collected
through offload LTO processing, and presented to libgomp when registering
offload images through GOMP_offload_register_ver() (called by the mkoffload 
generated
constructor linked into the program binary)

This means that the only reliable place to access omp_requires_mask is in
GOMP_offload_register_ver, however since it is called through an ELF constructor
in the *main program*, this runs later than libgfortran/runtime/main.c:init() 
constructor,
and because some libgfortran init actions there start allocating memory, this 
can cause
more deallocation errors later.

Another issue is that CUDA appears to be registering some cleanup actions using 
atexit(),
which forces libgomp to register gomp_target_fini() using atexit as well (to 
properly run
before the underlying CUDA stuff disappears). This happens to us here as well.

So to summarize we need to: (1) order libgfortran init actions after 
omp_requires_mask
processing is done, and (2) order libgfortran cleanup actions before 
gomp_target_fini,
to properly deallocate stuff without crashing.

The above explanation is for why there's a little new set of definitions, as 
well as
callback registering functions exported from libgomp to libgfortran, basically 
to register
libgfortran init/fini actions into libgomp to run.

Inside GOMP_offload_register_ver, after omp_requires_mask processing is done, 
we call into
libgfortran through a new _gfortran_mem_allocators_init function to insert the 
omp_free/alloc/etc.
based allocators into the Fortran runtime, when 
GOMP_REQUIRES_UNIFIED_SHARED_MEMORY is set.

All symbol references between libgfortran/libgomp are defined with weak 
symbols. Test of the
weak symbols are also used to determine if the other library exists in this 
program.

A final issue is: the case where we have an OpenMP program that does NOT have 
offloading.
We cannot passively determine in libgomp/libgfortran whether offloading exists 
or not, only the
main program itself can, by seeing if the hidden __OFFLOAD_TABLE__ exists.

When we do init/fini libgomp callback registering for OpenMP programs, those 
with no offloading
will not have those callback properly run (because of no offload image loading)
Therefore the solution here is a constructor added into the crtoffloadend.o 
fragment that does
a "null" call of GOMP_offload_register_ver, solely for triggering the 
post-offload_register callbacks
when __OFFLOAD_TABLE__ is NULL. (and because of this, the crtoffloadend.o 
Makefile rule is adjusted
to compile with PIC)

I know this is a big pile of yarn wrt how the main program/libgomp/libgfortran 
interacts, but it's
finally working. Again tested without regressions. Preparing to commit to 
devel/omp/gcc-12, and seeking
approval for mainline when the requires patches are in.

Thanks,
Chung-Lin

2022-08-15  Chung-Lin Tang  

libgcc/
* Makefile.in (crtoffloadend$(objext)): Add $(PICFLAG) to compile rule.
* offloadstuff.c (GOMP_offload_register_ver): Add declaration of weak
symbol.
(__OFFLOAD_TABLE__): Likewise.
(init_non_offload): New function.

libgfortran/

* gfortran.map (GFORTRAN_13): New namespace.
(_gfortran_mem_allocators_init): New name inside GFORTRAN_13.
* libgfortran.h (mem_allocators_init): New exported declaration.
* runtime/main.c (do_init): Rename from init, add run-once guard code.
(cleanup): Add run-once guard code.
(GOMP_post_offload_register_callback): Declare weak symbol.
(GOMP_pre_gomp_target_fini_callback): Likewise.
(init): New constructor to register offload callbacks, or call do_init
when not OpenMP.
* runtime/memory.c (gfortran_malloc): New pointer variable.
(gfortran_calloc): Likewise.
(gfortran_realloc): Likewise.
(gfortran_free): Likewise.
(mem_allocators_init): New function.
(xmalloc): Use gfortran_malloc.
(xmallocarray): Use gfortran_malloc.
(xcalloc): Use gfortran_calloc.
(xrealloc): Use gfortran_realloc.
(xfree): Use gfortran_free.

libgomp/

* libgomp.map (GOMP_5.1.2): New version namespace.
(GOMP_post_offload_register_callback): New name inside GOMP_5.1.2.
(GOMP_pre_gomp_target_fini_callback): Likewise.
(GOMP_DEFINE_CALLBACK_SET): Macro to define callback set.
(post_offload_register): Define callback set for after offload image
register.
(pre_gomp_target_fini): Define callback set for before gomp_target_fini
is called.
(libgfortran_malloc_usm): New function.
(libgfortran_calloc_usm): Likewise
(libgfortran_realloc_usm): Likewise
(libgfortran_free_usm): Likewise.
(_gfortran_mem_allocators_init): De

Re: [PATCH, OpenMP, Fortran] requires unified_shared_memory 2/2: insert USM allocators into libgfortran

2022-08-15 Thread Chung-Lin Tang

On 2022/8/15 7:06 PM, Chung-Lin Tang wrote:


I know this is a big pile of yarn wrt how the main program/libgomp/libgfortran 
interacts, but it's
finally working. Again tested without regressions. Preparing to commit to 
devel/omp/gcc-12, and seeking
approval for mainline when the requires patches are in.


Just realized that I don't have the new testcases added in this patch.
Will supplement them later :P

Thanks,
Chung-Lin


Re: [PATCH] libgfortran: Use __builtin_issignaling in libgfortran

2022-08-15 Thread Thomas Koenig via Fortran



Hi Jakub,


The following patch makes use of the new __builtin_issignaling,
so it no longer needs the fallback implementation and can use
the builtin even where glibc provides the macro.

Bootstrapped/regtested on x86_64-linux, i686-linux, powerpc64le-linux
and powerpc64le-linux, ok for trunk?


OK. Can you mention PR 105105 in the ChangeLog when you commit?

Thanks for the patch!

Best regards

Thomas


Re: [PATCH] fortran: Expand ieee_arithmetic module's ieee_class inline [PR106579]

2022-08-15 Thread FX via Fortran
Question to the Fortran maintainers:

Do you know if the standard allows IEEE_CLASS and IEEE_VALUE to be used as 
procedure pointers? I think not, because they do not follow (in F2008) the 
standard constraint C729 / R740.

If so, we need to keep these functions implementations in libgfortran for now 
(for ABI compatibility) but can remove them at the next breakage. Is one 
planned? Where is this tracked, is it still at 
https://gcc.gnu.org/wiki/LibgfortranAbiCleanup or do we have another place 
(e.g. in bugzilla)?

Thanks,
FX

Re: [PATCH] fortran: Expand ieee_arithmetic module's ieee_value inline [PR106579]

2022-08-15 Thread FX via Fortran
Hi Jakub,

I have two questions, on this and the ieee_class patch:


> +  tree type = TREE_TYPE (arg);
> +  gcc_assert (TREE_CODE (type) == RECORD_TYPE);
> +  tree field = NULL_TREE;
> +  for (tree f = TYPE_FIELDS (type); f != NULL_TREE; f = DECL_CHAIN (f))
> +if (TREE_CODE (f) == FIELD_DECL)
> +  {
> + gcc_assert (field == NULL_TREE);
> + field = f;
> +  }
> +  gcc_assert (field);

Why looping over fields? The class type is a simple type with only one member 
(and it should be an integer, we can assert that).


> + case IEEE_POSITIVE_ZERO:
> +   /* Make this also the default: label.  */
> +   label = gfc_build_label_decl (NULL_TREE);
> +   tmp = build_case_label (NULL_TREE, NULL_TREE, label);
> +   gfc_add_expr_to_block (&body, tmp);
> +   real_from_integer (&real, TYPE_MODE (type), 0, SIGNED);
> +   break;

Do we need a default label? It’s not like this is a more likely case than 
anything else…


Thanks,
FX

Re: [PATCH] fortran: Expand ieee_arithmetic module's ieee_class inline [PR106579]

2022-08-15 Thread Jakub Jelinek via Fortran
On Mon, Aug 15, 2022 at 09:47:45PM +0200, FX wrote:
> Question to the Fortran maintainers:
> 
> Do you know if the standard allows IEEE_CLASS and IEEE_VALUE to be used as 
> procedure pointers? I think not, because they do not follow (in F2008) the 
> standard constraint C729 / R740.
> 
> If so, we need to keep these functions implementations in libgfortran for now 
> (for ABI compatibility) but can remove them at the next breakage. Is one 
> planned? Where is this tracked, is it still at 
> https://gcc.gnu.org/wiki/LibgfortranAbiCleanup or do we have another place 
> (e.g. in bugzilla)?

Both are elemental generic procedures, and we have
Procedure pointer %qs at %L shall not be elemental
and
Interface %qs at %L may not be generic
errors for these 2 cases (trying to create procedure
pointer to elemental and trying to create procedure
pointer to generic procedure).

Jakub



Re: [PATCH] fortran: Expand ieee_arithmetic module's ieee_value inline [PR106579]

2022-08-15 Thread Jakub Jelinek via Fortran
On Mon, Aug 15, 2022 at 10:00:02PM +0200, FX wrote:
> I have two questions, on this and the ieee_class patch:
> 
> 
> > +  tree type = TREE_TYPE (arg);
> > +  gcc_assert (TREE_CODE (type) == RECORD_TYPE);
> > +  tree field = NULL_TREE;
> > +  for (tree f = TYPE_FIELDS (type); f != NULL_TREE; f = DECL_CHAIN (f))
> > +if (TREE_CODE (f) == FIELD_DECL)
> > +  {
> > +   gcc_assert (field == NULL_TREE);
> > +   field = f;
> > +  }
> > +  gcc_assert (field);
> 
> Why looping over fields? The class type is a simple type with only one member 
> (and it should be an integer, we can assert that).

I wanted to make sure it has exactly one field.
The ieee_arithmetic.F90 module in libgfortran surely does that, but I've
been worrying about some user overriding that module with something
different.  At least in the C/C++ FEs we had in the past tons of bugs filed
for when some builtin made some assumptions about some headers and data
types in those and then somebody running a testcase that violated that.
Even failed gcc_assertion isn't the best answer to that, ideally one would
verify that upfront and then either error, sorry or ignore the call (leave
it as is).  In that last case, it might be better to do the check on the
gfortran FE types instead of trees (verify the return type or second
argument type is ieee_class_type derived type with a single integral
(hidden) field).

> > +   case IEEE_POSITIVE_ZERO:
> > + /* Make this also the default: label.  */
> > + label = gfc_build_label_decl (NULL_TREE);
> > + tmp = build_case_label (NULL_TREE, NULL_TREE, label);
> > + gfc_add_expr_to_block (&body, tmp);
> > + real_from_integer (&real, TYPE_MODE (type), 0, SIGNED);
> > + break;
> 
> Do we need a default label? It’s not like this is a more likely case than 
> anything else…

The libgfortran version had default: label:
switch (type) \
{ \
  case IEEE_SIGNALING_NAN: \
return __builtin_nans ## SUFFIX (""); \
  case IEEE_QUIET_NAN: \
return __builtin_nan ## SUFFIX (""); \
  case IEEE_NEGATIVE_INF: \
return - __builtin_inf ## SUFFIX (); \
  case IEEE_NEGATIVE_NORMAL: \
return -42; \
  case IEEE_NEGATIVE_DENORMAL: \
return -(GFC_REAL_ ## TYPE ## _TINY) / 2; \
  case IEEE_NEGATIVE_ZERO: \
return -(GFC_REAL_ ## TYPE) 0; \
  case IEEE_POSITIVE_ZERO: \
return 0; \
  case IEEE_POSITIVE_DENORMAL: \
return (GFC_REAL_ ## TYPE ## _TINY) / 2; \
  case IEEE_POSITIVE_NORMAL: \
return 42; \
  case IEEE_POSITIVE_INF: \
return __builtin_inf ## SUFFIX (); \
  default: \
return 0; \
} \
and I've tried to traslate that into what it generates.
There is at least the IEEE_OTHER_VALUE (aka 0) value
that isn't covered in the switch, but it is just an integer
under the hood, so it could have any other value.

Jakub