Re: AArch64 bfloat16 mangling

Jakub Jelinek via Gcc-patches Fri, 10 Mar 2023 03:30:54 -0800

On Fri, Mar 10, 2023 at 08:43:02AM +0000, Richard Sandiford wrote:
> > So, either __bf16 should be also extended floating-point type
> > like decltype (0.0bf16) and std::bfloat16_t and in that case
> > it is fine if it mangles u6__bf16, or __bf16 will be a distinct
> > type from the latter two,
> 
> Yeah, the former is what I meant.  The intention is that __bf16 and
> std::bfloat16_t are the same type, not distinct types.


Ok, in that case here is totally untested patch on top of
https://gcc.gnu.org/pipermail/gcc-patches/2022-November/606398.html
which is also needed (for aarch64 of course the i386 parts of the
patch which have been acked already don't matter but the 2 libgcc
new files are needed and the optabs change is too).

The reason why __floatdibf and __floatundibf are needed on aarch64
and not on x86 is that the latter has optabs for DI -> XF conversions
and so for DI -> BF uses DI -> XF -> BF where the first conversion
doesn't round/truncate anything.  While on aarch64 DI -> TF conversion
where TF is the narrowed mode which can hold all DI values exactly
is done using a libcall and so GCC emits direct DI -> BF conversions.

Will test it momentarily (including the patch it depends on):

2023-03-10  Jakub Jelinek  <ja...@redhat.com>

gcc/
        * config/aarch64/aarch64.h (aarch64_bf16_type_node): Remove.
        (aarch64_bf16_ptr_type_node): Adjust comment.
        * config/aarch64/aarch64.cc (aarch64_gimplify_va_arg_expr): Use
        bfloat16_type_node rather than aarch64_bf16_type_node.
        (aarch64_libgcc_floating_mode_supported_p,
        aarch64_scalar_mode_supported_p): Also support BFmode.
        (aarch64_invalid_conversion, aarch64_invalid_unary_op): Remove.
        aarch64_invalid_binary_op): Remove BFmode related rejections.
        (TARGET_INVALID_CONVERSION, TARGET_INVALID_UNARY_OP): Don't redefine.
        * config/aarch64/aarch64-builtins.cc (aarch64_bf16_type_node): Remove.
        (aarch64_int_or_fp_type): Use bfloat16_type_node rather than
        aarch64_bf16_type_node.
        (aarch64_init_simd_builtin_types): Likewise.
        (aarch64_init_bf16_types): Likewise.  Don't create bfloat16_type_node,
        which is created in tree.cc already.
        * config/aarch64/aarch64-sve-builtins.def (svbfloat16_t): Likewise.
gcc/testsuite/
        * gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c:
        Don't expect one __bf16 related error.
libgcc/
        * config/aarch64/t-softfp (softfp_extensions): Add bfsf.
        (softfp_truncations): Add tfbf dfbf sfbf hfbf.
        (softfp_extras): Add floatdibf floatundibf floattibf floatuntibf.
        * config/aarch64/libgcc-softfp.ver (GCC_13.0.0): Export
        __extendbfsf2 and __trunc{s,d,t,h}fbf2.
        * config/aarch64/sfp-machine.h (_FP_NANFRAC_B, _FP_NANSIGN_B): Define.
        * soft-fp/floatundibf.c: New file.
        * soft-fp/floatdibf.c: New file.
libstdc++-v3/
        * config/abi/pre/gnu.ver (CXXABI_1.3.14): Also export __bf16 tinfos
        if it isn't mangled as DF16b but u6__bf16.

--- gcc/config/aarch64/aarch64.h.jj     2023-01-16 11:52:15.923736422 +0100
+++ gcc/config/aarch64/aarch64.h        2023-03-10 11:49:35.941436327 +0100
@@ -1237,9 +1237,8 @@ extern const char *aarch64_rewrite_mcpu
 extern GTY(()) tree aarch64_fp16_type_node;
 extern GTY(()) tree aarch64_fp16_ptr_type_node;
 
-/* This type is the user-visible __bf16, and a pointer to that type.  Defined
-   in aarch64-builtins.cc.  */
-extern GTY(()) tree aarch64_bf16_type_node;
+/* Pointer to the user-visible __bf16 type.  __bf16 itself is generic
+   bfloat16_type_node.  Defined in aarch64-builtins.cc.  */
 extern GTY(()) tree aarch64_bf16_ptr_type_node;
 
 /* The generic unwind code in libgcc does not initialize the frame pointer.
--- gcc/config/aarch64/aarch64-builtins.cc.jj   2023-01-16 11:52:15.913736570 
+0100
+++ gcc/config/aarch64/aarch64-builtins.cc      2023-03-10 11:49:35.942436313 
+0100
@@ -918,7 +918,6 @@ tree aarch64_fp16_type_node = NULL_TREE;
 tree aarch64_fp16_ptr_type_node = NULL_TREE;
 
 /* Back-end node type for brain float (bfloat) types.  */
-tree aarch64_bf16_type_node = NULL_TREE;
 tree aarch64_bf16_ptr_type_node = NULL_TREE;
 
 /* Wrapper around add_builtin_function.  NAME is the name of the built-in
@@ -1010,7 +1009,7 @@ aarch64_int_or_fp_type (machine_mode mod
     case E_DFmode:
       return double_type_node;
     case E_BFmode:
-      return aarch64_bf16_type_node;
+      return bfloat16_type_node;
     default:
       gcc_unreachable ();
     }
@@ -1124,8 +1123,8 @@ aarch64_init_simd_builtin_types (void)
   aarch64_simd_types[Float64x2_t].eltype = double_type_node;
 
   /* Init Bfloat vector types with underlying __bf16 type.  */
-  aarch64_simd_types[Bfloat16x4_t].eltype = aarch64_bf16_type_node;
-  aarch64_simd_types[Bfloat16x8_t].eltype = aarch64_bf16_type_node;
+  aarch64_simd_types[Bfloat16x4_t].eltype = bfloat16_type_node;
+  aarch64_simd_types[Bfloat16x8_t].eltype = bfloat16_type_node;
 
   for (i = 0; i < nelts; i++)
     {
@@ -1197,7 +1196,7 @@ aarch64_init_simd_builtin_scalar_types (
                                             "__builtin_aarch64_simd_poly128");
   (*lang_hooks.types.register_builtin_type) (intTI_type_node,
                                             "__builtin_aarch64_simd_ti");
-  (*lang_hooks.types.register_builtin_type) (aarch64_bf16_type_node,
+  (*lang_hooks.types.register_builtin_type) (bfloat16_type_node,
                                             "__builtin_aarch64_simd_bf");
   /* Unsigned integer types for various mode sizes.  */
   (*lang_hooks.types.register_builtin_type) (unsigned_intQI_type_node,
@@ -1682,13 +1681,8 @@ aarch64_init_fp16_types (void)
 static void
 aarch64_init_bf16_types (void)
 {
-  aarch64_bf16_type_node = make_node (REAL_TYPE);
-  TYPE_PRECISION (aarch64_bf16_type_node) = 16;
-  SET_TYPE_MODE (aarch64_bf16_type_node, BFmode);
-  layout_type (aarch64_bf16_type_node);
-
-  lang_hooks.types.register_builtin_type (aarch64_bf16_type_node, "__bf16");
-  aarch64_bf16_ptr_type_node = build_pointer_type (aarch64_bf16_type_node);
+  lang_hooks.types.register_builtin_type (bfloat16_type_node, "__bf16");
+  aarch64_bf16_ptr_type_node = build_pointer_type (bfloat16_type_node);
 }
 
 /* Pointer authentication builtins that will become NOP on legacy platform.
--- gcc/config/aarch64/aarch64.cc.jj    2023-02-08 18:40:20.779327223 +0100
+++ gcc/config/aarch64/aarch64.cc       2023-03-10 11:49:35.946436254 +0100
@@ -19858,7 +19858,7 @@ aarch64_gimplify_va_arg_expr (tree valis
          field_ptr_t = aarch64_fp16_ptr_type_node;
          break;
        case E_BFmode:
-         field_t = aarch64_bf16_type_node;
+         field_t = bfloat16_type_node;
          field_ptr_t = aarch64_bf16_ptr_type_node;
          break;
        case E_V2SImode:
@@ -26588,18 +26588,18 @@ aarch64_dwarf_poly_indeterminate_value (
 }
 
 /* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
-   if MODE is HFmode, and punt to the generic implementation otherwise.  */
+   if MODE is [BH]Fmode, and punt to the generic implementation otherwise.  */
 
 static bool
 aarch64_libgcc_floating_mode_supported_p (scalar_float_mode mode)
 {
-  return (mode == HFmode
+  return ((mode == HFmode || mode == BFmode)
          ? true
          : default_libgcc_floating_mode_supported_p (mode));
 }
 
 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P - return TRUE
-   if MODE is HFmode, and punt to the generic implementation otherwise.  */
+   if MODE is [BH]Fmode, and punt to the generic implementation otherwise.  */
 
 static bool
 aarch64_scalar_mode_supported_p (scalar_mode mode)
@@ -26607,7 +26607,7 @@ aarch64_scalar_mode_supported_p (scalar_
   if (DECIMAL_FLOAT_MODE_P (mode))
     return default_decimal_float_supported_p ();
 
-  return (mode == HFmode
+  return ((mode == HFmode || mode == BFmode)
          ? true
          : default_scalar_mode_supported_p (mode));
 }
@@ -27075,39 +27075,6 @@ aarch64_stack_protect_guard (void)
   return NULL_TREE;
 }
 
-/* Return the diagnostic message string if conversion from FROMTYPE to
-   TOTYPE is not allowed, NULL otherwise.  */
-
-static const char *
-aarch64_invalid_conversion (const_tree fromtype, const_tree totype)
-{
-  if (element_mode (fromtype) != element_mode (totype))
-    {
-      /* Do no allow conversions to/from BFmode scalar types.  */
-      if (TYPE_MODE (fromtype) == BFmode)
-       return N_("invalid conversion from type %<bfloat16_t%>");
-      if (TYPE_MODE (totype) == BFmode)
-       return N_("invalid conversion to type %<bfloat16_t%>");
-    }
-
-  /* Conversion allowed.  */
-  return NULL;
-}
-
-/* Return the diagnostic message string if the unary operation OP is
-   not permitted on TYPE, NULL otherwise.  */
-
-static const char *
-aarch64_invalid_unary_op (int op, const_tree type)
-{
-  /* Reject all single-operand operations on BFmode except for &.  */
-  if (element_mode (type) == BFmode && op != ADDR_EXPR)
-    return N_("operation not permitted on type %<bfloat16_t%>");
-
-  /* Operation allowed.  */
-  return NULL;
-}
-
 /* Return the diagnostic message string if the binary operation OP is
    not permitted on TYPE1 and TYPE2, NULL otherwise.  */
 
@@ -27115,11 +27082,6 @@ static const char *
 aarch64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
                           const_tree type2)
 {
-  /* Reject all 2-operand operations on BFmode.  */
-  if (element_mode (type1) == BFmode
-      || element_mode (type2) == BFmode)
-    return N_("operation not permitted on type %<bfloat16_t%>");
-
   if (VECTOR_TYPE_P (type1)
       && VECTOR_TYPE_P (type2)
       && !TYPE_INDIVISIBLE_P (type1)
@@ -27716,12 +27678,6 @@ aarch64_libgcc_floating_mode_supported_p
 #undef TARGET_MANGLE_TYPE
 #define TARGET_MANGLE_TYPE aarch64_mangle_type
 
-#undef TARGET_INVALID_CONVERSION
-#define TARGET_INVALID_CONVERSION aarch64_invalid_conversion
-
-#undef TARGET_INVALID_UNARY_OP
-#define TARGET_INVALID_UNARY_OP aarch64_invalid_unary_op
-
 #undef TARGET_INVALID_BINARY_OP
 #define TARGET_INVALID_BINARY_OP aarch64_invalid_binary_op
 
--- gcc/config/aarch64/aarch64-sve-builtins.def.jj      2023-01-16 
11:52:15.918736496 +0100
+++ gcc/config/aarch64/aarch64-sve-builtins.def 2023-03-10 11:49:35.970435904 
+0100
@@ -61,7 +61,7 @@ DEF_SVE_MODE (u64offset, none, svuint64_
 DEF_SVE_MODE (vnum, none, none, vectors)
 
 DEF_SVE_TYPE (svbool_t, 10, __SVBool_t, boolean_type_node)
-DEF_SVE_TYPE (svbfloat16_t, 14, __SVBfloat16_t, aarch64_bf16_type_node)
+DEF_SVE_TYPE (svbfloat16_t, 14, __SVBfloat16_t, bfloat16_type_node)
 DEF_SVE_TYPE (svfloat16_t, 13, __SVFloat16_t, aarch64_fp16_type_node)
 DEF_SVE_TYPE (svfloat32_t, 13, __SVFloat32_t, float_type_node)
 DEF_SVE_TYPE (svfloat64_t, 13, __SVFloat64_t, double_type_node)
--- 
gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c.jj
   2020-01-31 19:18:02.603901390 +0100
+++ 
gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_bfloat16_opt_n_1.c  
    2023-03-10 12:13:46.754296831 +0100
@@ -18,7 +18,7 @@ f1 (svbool_t pg, svuint8_t u8, svuint16_
   svbfdot (f32, bf16, bf16);
   svbfdot (f32, 0, bf16); /* { dg-error {passing 'int' to argument 2 of 
'svbfdot', which expects 'svbfloat16_t'} } */
   svbfdot (f32, f32, bf16); /* { dg-error {passing 'svfloat32_t' to argument 2 
of 'svbfdot', which expects 'svbfloat16_t'} } */
-  svbfdot (f32, bf16, 0); /* { dg-error {invalid conversion to type 
'bfloat16_t'} } */
+  svbfdot (f32, bf16, 0);
   svbfdot (f32, bf16, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 
of 'svbfdot', which expects 'svbfloat16_t'} } */
   svbfdot (f32, bf16, bf);
 }
--- libgcc/config/aarch64/t-softfp.jj   2022-11-14 13:35:34.527155682 +0100
+++ libgcc/config/aarch64/t-softfp      2023-03-10 12:19:58.668882041 +0100
@@ -1,9 +1,10 @@
 softfp_float_modes := tf
 softfp_int_modes := si di ti
-softfp_extensions := sftf dftf hftf
-softfp_truncations := tfsf tfdf tfhf
+softfp_extensions := sftf dftf hftf bfsf
+softfp_truncations := tfsf tfdf tfhf tfbf dfbf sfbf hfbf
 softfp_exclude_libgcc2 := n
-softfp_extras := fixhfti fixunshfti floattihf floatuntihf
+softfp_extras := fixhfti fixunshfti floattihf floatuntihf \
+                floatdibf floatundibf floattibf floatuntibf
 
 TARGET_LIBGCC2_CFLAGS += -Wno-missing-prototypes
 
--- libgcc/config/aarch64/libgcc-softfp.ver.jj  2023-01-16 11:52:16.633725959 
+0100
+++ libgcc/config/aarch64/libgcc-softfp.ver     2023-03-10 12:11:44.144082714 
+0100
@@ -26,3 +26,16 @@ GCC_11.0 {
   __mulhc3
   __trunctfhf2
 }
+
+%inherit GCC_13.0.0 GCC_11.0.0
+GCC_13.0.0 {
+  __extendbfsf2
+  __floatdibf
+  __floattibf
+  __floatundibf
+  __floatuntibf
+  __truncdfbf2
+  __truncsfbf2
+  __trunctfbf2
+  __trunchfbf2
+}
--- libgcc/config/aarch64/sfp-machine.h.jj      2023-01-16 11:52:16.633725959 
+0100
+++ libgcc/config/aarch64/sfp-machine.h 2023-03-10 11:49:35.985435685 +0100
@@ -43,10 +43,12 @@ typedef int __gcc_CMPtype __attribute__
 #define _FP_DIV_MEAT_Q(R,X,Y)  _FP_DIV_MEAT_2_udiv(Q,R,X,Y)
 
 #define _FP_NANFRAC_H          ((_FP_QNANBIT_H << 1) - 1)
+#define _FP_NANFRAC_B          ((_FP_QNANBIT_B << 1) - 1)
 #define _FP_NANFRAC_S          ((_FP_QNANBIT_S << 1) - 1)
 #define _FP_NANFRAC_D          ((_FP_QNANBIT_D << 1) - 1)
 #define _FP_NANFRAC_Q          ((_FP_QNANBIT_Q << 1) - 1), -1
 #define _FP_NANSIGN_H          0
+#define _FP_NANSIGN_B          0
 #define _FP_NANSIGN_S          0
 #define _FP_NANSIGN_D          0
 #define _FP_NANSIGN_Q          0
--- libgcc/soft-fp/floatundibf.c.jj     2023-03-10 12:10:40.143014939 +0100
+++ libgcc/soft-fp/floatundibf.c        2023-03-10 12:11:07.387618096 +0100
@@ -0,0 +1,45 @@
+/* Software floating-point emulation.
+   Convert a 64bit unsigned integer to bfloat16
+   Copyright (C) 2007-2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "brain.h"
+
+BFtype
+__floatundibf (UDItype i)
+{
+  FP_DECL_EX;
+  FP_DECL_B (A);
+  BFtype a;
+
+  FP_INIT_ROUNDMODE;
+  FP_FROM_INT_B (A, i, DI_BITS, UDItype);
+  FP_PACK_RAW_B (a, A);
+  FP_HANDLE_EXCEPTIONS;
+
+  return a;
+}
--- libgcc/soft-fp/floatdibf.c.jj       2023-03-10 12:08:56.752520872 +0100
+++ libgcc/soft-fp/floatdibf.c  2023-03-10 12:09:56.934644288 +0100
@@ -0,0 +1,45 @@
+/* Software floating-point emulation.
+   Convert a 64bit signed integer to bfloat16
+   Copyright (C) 2007-2023 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Lesser General Public License restrictions do apply in
+   other respects; for example, they cover modification of the file,
+   and distribution when not linked into a combine executable.)
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "soft-fp.h"
+#include "brain.h"
+
+BFtype
+__floatdibf (DItype i)
+{
+  FP_DECL_EX;
+  FP_DECL_B (A);
+  BFtype a;
+
+  FP_INIT_ROUNDMODE;
+  FP_FROM_INT_B (A, i, DI_BITS, UDItype);
+  FP_PACK_RAW_B (a, A);
+  FP_HANDLE_EXCEPTIONS;
+
+  return a;
+}
--- libstdc++-v3/config/abi/pre/gnu.ver.jj      2023-03-07 18:57:13.135213321 
+0100
+++ libstdc++-v3/config/abi/pre/gnu.ver 2023-03-10 11:52:27.870929478 +0100
@@ -2828,6 +2828,9 @@ CXXABI_1.3.14 {
     _ZTIDF[0-9]*[_bx];
     _ZTIPDF[0-9]*[_bx];
     _ZTIPKDF[0-9]*[_bx];
+    _ZTIu6__bf16;
+    _ZTIPu6__bf16;
+    _ZTIPKu6__bf16;
 
 } CXXABI_1.3.13;
 


        Jakub

Re: AArch64 bfloat16 mangling

Reply via email to