work221-float)] Add initial bfloat16 support.

Michael Meissner via Gcc-cvs Fri, 12 Sep 2025 00:45:52 -0700

https://gcc.gnu.org/g:00a1e92a0309b61912c67f6449802944b428724a


commit 00a1e92a0309b61912c67f6449802944b428724a
Author: Michael Meissner <[email protected]>
Date:   Tue Sep 9 15:27:37 2025 -0400

    Add initial bfloat16 support.
    
    2025-09-09  Michael Meissner  <[email protected]>
    
    gcc/
    
            * config/rs6000/altivec.md (VM): Add initial bfloat16 support.
            (VM2): Likewise.
            (VI_char): Likewise.
            (VI_scalar): Likewise.
            (VI_unit): Likewise.
            (VU_char): Likewise.
            * config/rs6000/predicates.md (easy_fp_constant): Likewise.
            (fp16_xxspltiw_constant): Likewise.
            * config/rs6000/rs6000-builtin.cc (rs6000_type_string): Likewise.
            (rs6000_init_builtins): Likewise.
            * config/rs6000/rs6000-call.cc (USE_FP_FOR_ARG_P): Likewise.
            * config/rs6000/rs6000-modes.def (BFmode): Likewise.
            * config/rs6000/rs6000.cc (rs6000_hard_regno_mode_ok_uncached):
            Likewise.
            (rs6000_modes_tieable_p): Likewise.
            (rs6000_debug_reg_global): Likewise.
            (rs6000_setup_reg_addr_masks): Likewise.
            (rs6000_init_hard_regno_mode_ok): Likewise.
            (rs6000_secondary_reload_simple_move): Likewise.
            (rs6000_preferred_reload_class): Likewise.
            (rs6000_can_change_mode_class): Likewise.
            (rs6000_function_value): Likewise.
            (rs6000_scalar_mode_supported_p): Likewise.
            (constant_fp_to_128bit_vector): Likewise.
            (constant_generates_xxspltiw): Likewise.
            * config/rs6000/rs6000.h (TARGET_BFLOAT16): Likewise.
            (FP16_SCALAR_P): Likewise.
            * config/rs6000/rs6000.md (FMOVE128_GPR): Likewise.
            (RELOAD): Likewise.
            (FP16): Likewise.
            (CONVERT_FP16): Likewise.
            (extendhf<mode>2): Likewise.
            (trunc<mode>hf2): Likewise.
            (mov<mode>, FP16 iterator): Likewise.
            (mov<mode>_xxspltiw): Likewise.
            (mov<mode>_internal): Fix load/store instructions.  Add bfloat16
            support.
            * config/rs6000/rs6000.opt (-mfloat16_gpr_args): Rename from
            -mieee16-gpr-args.
            * config/rs6000/vector.md (VEC_L): Add initial bfloat16 support.
            (VEC_M): Likewise.
            (VEC_E): Likewise.
            (VEC_base): Likewise.
            (VEC_base_l): Likewise.
            * config/rs6000/vsx.md (V8HI_V8HF): Likewise.
            (VSX_L): Likewise.
            (VSX_M): Likewise.
            (VSX_XXBR): Likewise.
            (VSm): Likewise.
            (VSr): Likewise.
            (VSisa): Likewise.
            (??r): Likewise.
            (VSc): Likewise.
            (VM3): Likewise.
            (VM3_char): Likewise.
            (vsx_extract_<mode>_store_p9): Likewise.
            (*vsx_extract_<mode>_p8): Likewise.

Diff:
---
 gcc/config/rs6000/altivec.md        |  6 +++
 gcc/config/rs6000/predicates.md     | 10 ++---
 gcc/config/rs6000/rs6000-builtin.cc | 14 +++++++
 gcc/config/rs6000/rs6000-call.cc    |  2 +-
 gcc/config/rs6000/rs6000-modes.def  |  3 ++
 gcc/config/rs6000/rs6000.cc         | 77 +++++++++++++++++++++++++++----------
 gcc/config/rs6000/rs6000.h          |  9 ++++-
 gcc/config/rs6000/rs6000.md         | 66 ++++++++++++++++---------------
 gcc/config/rs6000/rs6000.opt        |  6 +--
 gcc/config/rs6000/vector.md         |  5 +++
 gcc/config/rs6000/vsx.md            | 18 ++++++++-
 11 files changed, 153 insertions(+), 63 deletions(-)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index b6f92a71f963..fb960f7ba966 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -191,6 +191,7 @@
 ;; otherwise handled by altivec (v2df, v2di, ti)
 (define_mode_iterator VM [V4SI
                          V8HI
+                         V8BF
                          V8HF
                          V16QI
                          V4SF
@@ -204,6 +205,7 @@
 ;; Like VM, except don't do TImode
 (define_mode_iterator VM2 [V4SI
                           V8HI
+                          V8BF
                           V8HF
                           V16QI
                           V4SF
@@ -227,15 +229,18 @@
 (define_mode_attr VI_char [(V2DI "d")
                           (V4SI "w")
                           (V8HI "h")
+                          (V8BF "h")
                           (V8HF "h")
                           (V16QI "b")])
 (define_mode_attr VI_scalar [(V2DI "DI")
                             (V4SI "SI")
                             (V8HI "HI")
+                            (V8BF "BF")
                             (V8HF "HF")
                             (V16QI "QI")])
 (define_mode_attr VI_unit [(V16QI "VECTOR_UNIT_ALTIVEC_P (V16QImode)")
                           (V8HI "VECTOR_UNIT_ALTIVEC_P (V8HImode)")
+                          (V8BF "VECTOR_UNIT_ALTIVEC_P (V8BFmode)")
                           (V8HF "VECTOR_UNIT_ALTIVEC_P (V8HFmode)")
                           (V4SI "VECTOR_UNIT_ALTIVEC_P (V4SImode)")
                           (V2DI "VECTOR_UNIT_P8_VECTOR_P (V2DImode)")])
@@ -251,6 +256,7 @@
 (define_mode_attr VU_char [(V2DI "w")
                           (V4SI "h")
                           (V8HI "b")
+                          (V8BF "b")
                           (V8HF "b")])
 
 ;; Vector negate
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 2a4b38838d20..44a3fa003d36 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -602,8 +602,8 @@
     return 1;
 
   /* Power9 needs to load HFmode constants from memory, Power10 can use
-     XXSPLTIW.  */
-  if (mode == HFmode && !TARGET_POWER10)
+     XXSPLTIW for HFmode or BFmode constants.  */
+  if (FP16_SCALAR_P (mode) && !TARGET_POWER10)
     return 0;
 
   /* Constants that can be generated with ISA 3.1 instructions are easy.  */
@@ -2172,11 +2172,11 @@
        (match_test "subreg_lowpart_offset (mode, GET_MODE (SUBREG_REG (op)))
                    == SUBREG_BYTE (op)")))
 
-;; Return 1 if this is a HFmode constant that can be loaded with XXSPLTIW.
-(define_predicate "ieee16_xxspltiw_constant"
+;; Return 1 if this is a HFmode/BFmode constant that can be loaded with 
XXSPLTIW.
+(define_predicate "fp16_xxspltiw_constant"
   (match_code "const_double")
 {
-  if (!TARGET_POWER10 || mode != HFmode)
+  if (!TARGET_POWER10 || !FP16_SCALAR_P (mode))
     return false;
 
   vec_const_128bit_type vsx_const;
diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index 05a730a8fdca..46c781b82568 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -493,6 +493,8 @@ const char *rs6000_type_string (tree type_node)
     return "_Float128";
   else if (type_node == float16_type_node)
     return "_Float16";
+  else if (type_node == bfloat16_type_node)
+    return "__bfloat16";
   else if (type_node == vector_pair_type_node)
     return "__vector_pair";
   else if (type_node == vector_quad_type_node)
@@ -758,6 +760,18 @@ rs6000_init_builtins (void)
   else
     ieee128_float_type_node = NULL_TREE;
 
+  /* __bfloat16 support.  */
+  if (TARGET_BFLOAT16)
+    {
+      bfloat16_type_node = make_node (REAL_TYPE);
+      TYPE_PRECISION (bfloat16_type_node) = 16;
+      SET_TYPE_MODE (bfloat16_type_node, BFmode);
+      layout_type (bfloat16_type_node);
+      t = build_qualified_type (bfloat16_type_node, TYPE_QUAL_CONST);
+      lang_hooks.types.register_builtin_type (bfloat16_type_node,
+                                             "__bfloat16");
+    }
+
   /* Vector pair and vector quad support.  */
   vector_pair_type_node = make_node (OPAQUE_TYPE);
   SET_TYPE_MODE (vector_pair_type_node, OOmode);
diff --git a/gcc/config/rs6000/rs6000-call.cc b/gcc/config/rs6000/rs6000-call.cc
index 3872d742d159..39466f969e47 100644
--- a/gcc/config/rs6000/rs6000-call.cc
+++ b/gcc/config/rs6000/rs6000-call.cc
@@ -86,7 +86,7 @@
   (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE)                       \
    && (CUM)->fregno <= FP_ARG_MAX_REG                          \
    && TARGET_HARD_FLOAT                                                \
-   && ((MODE) != HFmode || !TARGET_IEEE16_GPR_ARGS))
+   && (FP16_SCALAR_P (MODE) && !TARGET_FLOAT16_GPR_ARGS))
 
 
 /* Nonzero if we can use an AltiVec register to pass this arg.  */
diff --git a/gcc/config/rs6000/rs6000-modes.def 
b/gcc/config/rs6000/rs6000-modes.def
index 04dc1d8c9194..28eb389787ff 100644
--- a/gcc/config/rs6000/rs6000-modes.def
+++ b/gcc/config/rs6000/rs6000-modes.def
@@ -48,6 +48,9 @@ FLOAT_MODE (IF, 16, ibm_extended_format);
 /* Explicit IEEE 16-bit floating point.  */
 FLOAT_MODE (HF, 2, ieee_half_format);
 
+/* Arm (google brain)  16-bit floating point.  */
+FLOAT_MODE (BF, 2, arm_bfloat_half_format);
+
 /* Add any extra modes needed to represent the condition code.
 
    For the RS/6000, we need separate modes when unsigned (logical) comparisons
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 7a6b16d30866..624fe7138703 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -1900,7 +1900,7 @@ rs6000_hard_regno_mode_ok_uncached (int regno, 
machine_mode mode)
       if (ALTIVEC_REGNO_P (regno))
        {
          if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p
-             && mode != HFmode)
+             && !FP16_SCALAR_P (mode))
            return 0;
 
          return ALTIVEC_REGNO_P (last_regno);
@@ -1933,7 +1933,7 @@ rs6000_hard_regno_mode_ok_uncached (int regno, 
machine_mode mode)
            return 1;
 
          if (TARGET_P9_VECTOR
-             && (mode == QImode || mode == HImode || mode == HFmode))
+             && (mode == QImode || mode == HImode || FP16_SCALAR_P (mode)))
            return 1;
        }
 
@@ -1992,7 +1992,7 @@ rs6000_modes_tieable_p (machine_mode mode1, machine_mode 
mode2)
 {
   if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
       || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode
-      || mode1 == HFmode || mode2 == HFmode)
+      || FP16_SCALAR_P (mode1) || FP16_SCALAR_P (mode2))
     return mode1 == mode2;
 
   if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
@@ -2258,6 +2258,7 @@ rs6000_debug_reg_global (void)
     DImode,
     TImode,
     PTImode,
+    BFmode,
     HFmode,
     SFmode,
     DFmode,
@@ -2279,6 +2280,7 @@ rs6000_debug_reg_global (void)
     V8SImode,
     V4DImode,
     V2TImode,
+    V8BFmode,
     V8HFmode,
     V4SFmode,
     V2DFmode,
@@ -2640,11 +2642,12 @@ rs6000_setup_reg_addr_masks (void)
         addressing on power7 and above, since we want to use the LFIWZX and
         STFIWZX instructions to load it.
 
-        Never allow offset addressing for HFmode, since it is expected that
-        16-bit floating point should always go into the vector registers and
-        we only have indexed and indirect 16-bit loads to VSR registers.  */
+        Never allow offset addressing for HFmode/BFmode, since it is expected
+        that 16-bit floating point should always go into the vector registers
+        and we only have indexed and indirect 16-bit loads to VSR
+        registers.  */
       bool indexed_only_p = ((m == SDmode && TARGET_NO_SDMODE_STACK)
-                            || m == HFmode);
+                            || FP16_SCALAR_P (m));
 
       any_addr_mask = 0;
       for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
@@ -2694,6 +2697,7 @@ rs6000_setup_reg_addr_masks (void)
                  && (m != E_DFmode || !TARGET_VSX)
                  && (m != E_SFmode || !TARGET_P8_VECTOR)
                  && m != E_HFmode
+                 && m != E_BFmode
                  && !small_int_vsx_p)
                {
                  addr_mask |= RELOAD_REG_PRE_INCDEC;
@@ -2949,6 +2953,13 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
       rs6000_vector_align[V8HFmode] = align64;
     }
 
+  if (TARGET_BFLOAT16)
+    {
+      rs6000_vector_unit[V8BFmode] = VECTOR_VSX;
+      rs6000_vector_mem[V8BFmode] = VECTOR_VSX;
+      rs6000_vector_align[V8BFmode] = align64;
+    }
+
   /* DFmode, see if we want to use the VSX unit.  Memory is handled
      differently, so don't set rs6000_vector_mem.  */
   if (TARGET_VSX)
@@ -2971,13 +2982,19 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
       rs6000_vector_align[TImode] = align64;
     }
 
-  /* Allow HFmode in VSX register and set the VSX memory macros.  */
+  /* Allow HFmode/BFmode in VSX register and set the VSX memory macros.  */
   if (TARGET_IEEE16)
     {
-      rs6000_vector_mem[HImode] = VECTOR_VSX;
+      rs6000_vector_mem[HFmode] = VECTOR_VSX;
       rs6000_vector_align[HFmode] = 16;
     }
 
+  if (TARGET_BFLOAT16)
+    {
+      rs6000_vector_mem[BFmode] = VECTOR_VSX;
+      rs6000_vector_align[BFmode] = 16;
+    }
+
   /* Add support for vector pairs and vector quad registers.  */
   if (TARGET_MMA)
     {
@@ -3037,6 +3054,8 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
          reg_addr[V16QImode].reload_load  = CODE_FOR_reload_v16qi_di_load;
          reg_addr[V8HImode].reload_store  = CODE_FOR_reload_v8hi_di_store;
          reg_addr[V8HImode].reload_load   = CODE_FOR_reload_v8hi_di_load;
+         reg_addr[V8BFmode].reload_store  = CODE_FOR_reload_v8bf_di_store;
+         reg_addr[V8BFmode].reload_load   = CODE_FOR_reload_v8bf_di_load;
          reg_addr[V8HFmode].reload_store  = CODE_FOR_reload_v8hf_di_store;
          reg_addr[V8HFmode].reload_load   = CODE_FOR_reload_v8hf_di_load;
          reg_addr[V4SImode].reload_store  = CODE_FOR_reload_v4si_di_store;
@@ -3074,6 +3093,12 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
              reg_addr[HFmode].reload_load  = CODE_FOR_reload_hf_di_load;
            }
 
+         if (TARGET_BFLOAT16)
+           {
+             reg_addr[BFmode].reload_store = CODE_FOR_reload_bf_di_store;
+             reg_addr[BFmode].reload_load  = CODE_FOR_reload_bf_di_load;
+           }
+
          /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
             available.  */
          if (TARGET_NO_SDMODE_STACK)
@@ -3096,6 +3121,7 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
              reg_addr[V2DImode].reload_gpr_vsx  = 
CODE_FOR_reload_gpr_from_vsxv2di;
              reg_addr[V4SFmode].reload_gpr_vsx  = 
CODE_FOR_reload_gpr_from_vsxv4sf;
              reg_addr[V4SImode].reload_gpr_vsx  = 
CODE_FOR_reload_gpr_from_vsxv4si;
+             reg_addr[V8BFmode].reload_gpr_vsx  = 
CODE_FOR_reload_gpr_from_vsxv8bf;
              reg_addr[V8HFmode].reload_gpr_vsx  = 
CODE_FOR_reload_gpr_from_vsxv8hf;
              reg_addr[V8HImode].reload_gpr_vsx  = 
CODE_FOR_reload_gpr_from_vsxv8hi;
              reg_addr[V16QImode].reload_gpr_vsx = 
CODE_FOR_reload_gpr_from_vsxv16qi;
@@ -3107,6 +3133,7 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
              reg_addr[V2DImode].reload_vsx_gpr  = 
CODE_FOR_reload_vsx_from_gprv2di;
              reg_addr[V4SFmode].reload_vsx_gpr  = 
CODE_FOR_reload_vsx_from_gprv4sf;
              reg_addr[V4SImode].reload_vsx_gpr  = 
CODE_FOR_reload_vsx_from_gprv4si;
+             reg_addr[V8BFmode].reload_vsx_gpr  = 
CODE_FOR_reload_vsx_from_gprv8bf;
              reg_addr[V8HFmode].reload_vsx_gpr  = 
CODE_FOR_reload_vsx_from_gprv8hf;
              reg_addr[V8HImode].reload_vsx_gpr  = 
CODE_FOR_reload_vsx_from_gprv8hi;
              reg_addr[V16QImode].reload_vsx_gpr = 
CODE_FOR_reload_vsx_from_gprv16qi;
@@ -3145,6 +3172,8 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
          reg_addr[V2DImode].reload_load   = CODE_FOR_reload_v2di_si_load;
          reg_addr[V1TImode].reload_store  = CODE_FOR_reload_v1ti_si_store;
          reg_addr[V1TImode].reload_load   = CODE_FOR_reload_v1ti_si_load;
+         reg_addr[V8BFmode].reload_store  = CODE_FOR_reload_v8bf_si_store;
+         reg_addr[V8BFmode].reload_load   = CODE_FOR_reload_v8bf_si_load;
          reg_addr[V8HFmode].reload_store  = CODE_FOR_reload_v8hf_si_store;
          reg_addr[V8HFmode].reload_load   = CODE_FOR_reload_v8hf_si_load;
          reg_addr[V4SFmode].reload_store  = CODE_FOR_reload_v4sf_si_store;
@@ -3176,6 +3205,12 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
              reg_addr[HFmode].reload_load  = CODE_FOR_reload_hf_si_load;
            }
 
+         if (TARGET_BFLOAT16)
+           {
+             reg_addr[BFmode].reload_store = CODE_FOR_reload_bf_si_store;
+             reg_addr[BFmode].reload_load  = CODE_FOR_reload_bf_si_load;
+           }
+
          /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
             available.  */
          if (TARGET_NO_SDMODE_STACK)
@@ -12706,7 +12741,7 @@ rs6000_secondary_reload_simple_move (enum 
rs6000_reg_type to_type,
       && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
          || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
     {
-      if (TARGET_IEEE16 && mode == HFmode)
+      if (FP16_SCALAR_P (mode))
        return true;
 
       if (TARGET_POWERPC64)
@@ -12727,7 +12762,7 @@ rs6000_secondary_reload_simple_move (enum 
rs6000_reg_type to_type,
            return true;
 
          if (TARGET_P9_VECTOR
-             && (mode == HImode || mode == QImode || mode == HFmode))
+             && (mode == HImode || mode == QImode || FP16_SCALAR_P (mode)))
            return true;
        }
 
@@ -13497,9 +13532,9 @@ rs6000_preferred_reload_class (rtx x, enum reg_class 
rclass)
          || mode_supports_dq_form (mode))
        return rclass;
 
-      /* IEEE 16-bit don't support offset addressing, but they can go in any
-        floating point/vector register.  */
-      if (mode == HFmode && TARGET_IEEE16)
+      /* IEEE 16-bit and __bfloat16 don't support offset addressing, but they
+        can go in any floating point/vector register.  */
+      if (FP16_SCALAR_P (mode))
        return rclass;
 
       /* If this is a scalar floating point value and we don't have D-form
@@ -13731,7 +13766,7 @@ rs6000_can_change_mode_class (machine_mode from,
   unsigned from_size = GET_MODE_SIZE (from);
   unsigned to_size = GET_MODE_SIZE (to);
 
-  if (from == HFmode || to == HFmode)
+  if (FP16_SCALAR_P (from) || FP16_SCALAR_P (to))
     return from_size == to_size;
 
   if (from_size != to_size)
@@ -24075,7 +24110,7 @@ rs6000_function_value (const_tree valtype,
   if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
     /* _Decimal128 must use an even/odd register pair.  */
     regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
-  else if (mode == HFmode && TARGET_IEEE16_GPR_ARGS)
+  else if (FP16_SCALAR_P (mode) && TARGET_FLOAT16_GPR_ARGS)
     regno = GP_ARG_RETURN;
   else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
           && !FLOAT128_VECTOR_P (mode))
@@ -24349,8 +24384,8 @@ rs6000_scalar_mode_supported_p (scalar_mode mode)
     return default_decimal_float_supported_p ();
   else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
     return true;
-  else if (mode == HFmode)
-    return TARGET_IEEE16;
+  else if (FP16_SCALAR_P (mode))
+    return true;
   else
     return default_scalar_mode_supported_p (mode);
 }
@@ -28971,7 +29006,7 @@ constant_fp_to_128bit_vector (rtx op,
 
   /* For IEEE 16-bit, the constant doesn't fill the whole 32-bit word, so
      deal with it here.  */
-  if (mode == HFmode)
+  if (FP16_SCALAR_P (mode))
     {
       real_to_target (real_words, rtype, mode);
       unsigned char hi = (unsigned char) (real_words[0] >> 8);
@@ -29272,8 +29307,8 @@ constant_generates_xxspltiw (vec_const_128bit_type 
*vsx_const)
   if (!TARGET_SPLAT_WORD_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
     return 0;
 
-  /* HFmode constants can always use XXSPLTIW.  */
-  if (vsx_const->mode == HFmode)
+  /* HFmode/BFmode constants can always use XXSPLTIW.  */
+  if (FP16_SCALAR_P (vsx_const->mode))
     return 1;
 
   if (!vsx_const->all_words_same)
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 31c1d8f613a6..7fc074f93905 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -567,8 +567,15 @@ extern int rs6000_vector_align[];
    below.  */
 #define RS6000_FN_TARGET_INFO_HTM 1
 
-/* Support for IEEE 16-bit floating point.  */
+/* Support for 16-bit floating point formats.  Power9 has instructions to
+   convert vector and scalar _Float16 formats, Power10 has instructions to
+   convert vector __bfloat16 formats.  */
 #define TARGET_IEEE16          TARGET_P9_VECTOR
+#define TARGET_BFLOAT16                TARGET_POWER10
+
+#define FP16_SCALAR_P(MODE)                                            \
+  (((MODE) == HFmode && TARGET_IEEE16)                                 \
+   || ((MODE) == BFmode && TARGET_BFLOAT16))
 
 /* Whether the various reciprocal divide/square root estimate instructions
    exist, and whether we should automatically generate code for the instruction
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index a81758e62a02..c6b6d7e9ad54 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -552,6 +552,7 @@
 (define_mode_iterator FMOVE128_GPR [TI
                                    V16QI
                                    V8HI
+                                   V8BF
                                    V8HF
                                    V4SI
                                    V4SF
@@ -838,8 +839,8 @@
 
 ;; Reload iterator for creating the function to allocate a base register to
 ;; supplement addressing modes.
-(define_mode_iterator RELOAD [V16QI V8HI V8HF V4SI V2DI V4SF V2DF V1TI
-                             SF SD SI DF DD DI TI PTI KF IF TF HF
+(define_mode_iterator RELOAD [V16QI V8HI V8BF V8HF V4SI V2DI V4SF V2DF V1TI
+                             SF SD SI DF DD DI TI PTI KF IF TF BF HF
                              OO XO])
 
 ;; Iterate over smin, smax
@@ -858,9 +859,12 @@
                                     (SF "TARGET_P8_VECTOR")
                                     (DI "TARGET_POWERPC64")])
 
+;; Mode iterator for supported 16-bit floating point types.
+(define_mode_iterator FP16 [HF BF])
+
 ;; Mode iterator for floating point modes other than SF/DFmode that we
 ;; convert to/from _Float16 (HFmode) via DFmode.
-(define_mode_iterator HF_CONVERT [TF KF IF SD DD TD])
+(define_mode_iterator CONVERT_FP16 [TF KF IF SD DD TD])
 
 (include "darwin.md")
 
@@ -5876,8 +5880,8 @@
 ;; Use DFmode to convert to/from HFmode for floating point types other
 ;; than SF/DFmode.
 (define_expand "extendhf<mode>2"
-  [(set (match_operand:HF_CONVERT 0 "vsx_register_operand" "=wa")
-       (float_extend:HF_CONVERT
+  [(set (match_operand:CONVERT_FP16 0 "vsx_register_operand" "=wa")
+       (float_extend:CONVERT_FP16
         (match_operand:HF 1 "vsx_register_operand" "wa")))]
   "TARGET_IEEE16"
 {
@@ -5890,7 +5894,7 @@
 (define_expand "trunc<mode>hf2"
   [(set (match_operand:HF 0 "vsx_register_operand" "=wa")
        (float_truncate:HF
-        (match_operand:HF_CONVERT 1 "vsx_register_operand" "wa")))]
+        (match_operand:CONVERT_FP16 1 "vsx_register_operand" "wa")))]
   "TARGET_IEEE16"
 {
   rtx df_tmp = gen_reg_rtx (DFmode);
@@ -8196,58 +8200,60 @@
                 p9v,       *,         *,         *")])
 
 
-(define_expand "movhf"
-  [(set (match_operand:HF 0 "nonimmediate_operand")
-       (match_operand:HF 1 "any_operand"))]
-  "TARGET_IEEE16"
+;; 16-bit floating point formats.  HFmode is _Float16 and BFmode is __bfloat16.
+(define_expand "mov<mode>"
+  [(set (match_operand:FP16 0 "nonimmediate_operand")
+       (match_operand:FP16 1 "any_operand"))]
+  "FP16_SCALAR_P (<MODE>mode)"
 {
   if (MEM_P (operands[0]) && !REG_P (operands[1]))
-    operands[1] = force_reg (HFmode, operands[1]);
+    operands[1] = force_reg (<MODE>mode, operands[1]);
 })
 
-;; On power10, we can load up HFmode constants with xxspltiw or pli.
-(define_insn "*movhf_xxspltiw"
-  [(set (match_operand:HF 0 "gpc_reg_operand" "=wa,r")
-       (match_operand:HF 1 "ieee16_xxspltiw_constant" "eP,eP"))]
-  "TARGET_IEEE16 && TARGET_POWER10 && TARGET_PREFIXED"
+;; On power10, we can load up HFmode/BFmode constants with xxspltiw or
+;; pli.
+(define_insn "*mov<mode>_xxspltiw"
+  [(set (match_operand:FP16 0 "gpc_reg_operand" "=wa,r")
+       (match_operand:FP16 1 "fp16_xxspltiw_constant" "eP,eP"))]
+  "FP16_SCALAR_P (<MODE>mode) && TARGET_POWER10 && TARGET_PREFIXED"
 {
   rtx op1 = operands[1];
   const REAL_VALUE_TYPE *rtype = CONST_DOUBLE_REAL_VALUE (op1);
   long real_words[VECTOR_128BIT_WORDS];
 
-  real_to_target (real_words, rtype, HFmode);
+  real_to_target (real_words, rtype, <MODE>mode);
   operands[2] = GEN_INT (real_words[0]);
-  return (vsx_register_operand (operands[0], HFmode)
+  return (vsx_register_operand (operands[0], <MODE>mode)
          ? "xxspltiw %x0,%2"
          : "li %0,%2");
 }
   [(set_attr "type" "vecperm,*")
    (set_attr "prefixed" "yes")])
 
-(define_insn "*movhf_internal"
-  [(set (match_operand:HF 0 "nonimmediate_operand"
+(define_insn "*mov<mode>_internal"
+  [(set (match_operand:FP16 0 "nonimmediate_operand"
                      "=wa,       wa,       Z,         r,          r,
                       m,         r,        wa,        wa,         r")
 
-       (match_operand:HF 1 "any_operand"
+       (match_operand:FP16 1 "any_operand"
                      "wa,        Z,        wa,        r,          m,
                       r,         wa,       r,         j,          j"))]
-  "TARGET_IEEE16
-   && (gpc_reg_operand (operands[0], HFmode)
-       || gpc_reg_operand (operands[1], HFmode))"
+  "FP16_SCALAR_P (<MODE>mode)
+   && (gpc_reg_operand (operands[0], <MODE>mode)
+       || gpc_reg_operand (operands[1], <MODE>mode))"
   "@
    xxlor %x0,%x1,%x1
-   lxsiwzx %x0,%y1
-   stxsiwx %x1,%y0
+   lxsihzx %x0,%y1
+   stxsihx %x1,%y0
    mr %0,%1
-   lwz%U1%X1 %0,%1
-   stw%U0%X0 %1,%0
+   lhz%U1%X1 %0,%1
+   sth%U0%X0 %1,%0
    mfvsrwz %0,%x1
    mtvsrwz %x0,%1
    xxspltib %x0,0
    li %0,0"
-  [(set_attr "type" "vecsimple, fpload,    fpstore,   *,         load,
-                       store,     mtvsr,     mfvsr,     vecsimple, *")])
+  [(set_attr "type"  "vecsimple, fpload,    fpstore,   *,         load,
+                      store,     mtvsr,     mfvsr,     vecsimple, *")])
 
 
 
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 5f81d3426a2c..c248f6c890a1 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -642,9 +642,9 @@ mieee128-constant
 Target Var(TARGET_IEEE128_CONSTANT) Init(1) Save
 Generate (do not generate) code that uses the LXVKQ instruction.
 
-mieee16-gpr-args
-Target Undocumented Var(TARGET_IEEE16_GPR_ARGS) Init(1) Save
-Pass _Float16 in GPR registers.
+mfloat16-gpr-args
+Target Undocumented Var(TARGET_FLOAT16_GPR_ARGS) Init(1) Save
+Pass and return _Float16 and __bfloat16 in GPR registers.
 
 ; Documented parameters
 
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index ed427ea05e9b..0a9f092c1951 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -52,6 +52,7 @@
 ;; Vector logical modes
 (define_mode_iterator VEC_L [V16QI
                             V8HI
+                            V8BF
                             V8HF
                             V4SI
                             V2DI
@@ -68,6 +69,7 @@
                             V8HI
                             V4SI
                             V2DI
+                            V8BF
                             V8HF
                             V4SF
                             V2DF
@@ -85,6 +87,7 @@
                             V8HI
                             V4SI
                             V2DI
+                            V8BF
                             V8HF
                             V4SF
                             V2DF])
@@ -100,6 +103,7 @@
                            (V8HI  "HI")
                            (V4SI  "SI")
                            (V2DI  "DI")
+                           (V8BF  "BF")
                            (V8HF  "HF")
                            (V4SF  "SF")
                            (V2DF  "DF")
@@ -111,6 +115,7 @@
                              (V8HI  "hi")
                              (V4SI  "si")
                              (V2DI  "di")
+                             (V8BF  "bf")
                              (V8HF  "hf")
                              (V4SF  "sf")
                              (V2DF  "df")
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 09b4d53813ba..be65b309c63a 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -48,11 +48,13 @@
 
 ;; Iterator for 8 element vectors
 (define_mode_iterator V8HI_V8HF [V8HI
+                                (V8BF "TARGET_BFLOAT16")
                                 (V8HF "TARGET_IEEE16")])
 
 ;; Iterator for logical types supported by VSX
 (define_mode_iterator VSX_L [V16QI
                             V8HI
+                            (V8BF      "TARGET_BFLOAT16")
                             (V8HF      "TARGET_IEEE16")
                             V4SI
                             V2DI
@@ -66,6 +68,7 @@
 ;; Iterator for memory moves.
 (define_mode_iterator VSX_M [V16QI
                             V8HI
+                            (V8BF      "TARGET_BFLOAT16")
                             (V8HF      "TARGET_IEEE16")
                             V4SI
                             V2DI
@@ -77,6 +80,7 @@
                             TI])
 
 (define_mode_attr VSX_XXBR  [(V8HI  "h")
+                            (V8BF  "h")
                             (V8HF  "h")
                             (V4SI  "w")
                             (V4SF  "w")
@@ -87,6 +91,7 @@
 ;; Map into the appropriate load/store name based on the type
 (define_mode_attr VSm  [(V16QI "vw4")
                        (V8HI  "vw4")
+                       (V8BF  "vw4")
                        (V8HF  "vw4")
                        (V4SI  "vw4")
                        (V4SF  "vw4")
@@ -101,6 +106,7 @@
 ;; Map the register class used
 (define_mode_attr VSr  [(V16QI "v")
                         (V8HI  "v")
+                        (V8BF  "v")
                         (V8HF  "v")
                         (V4SI  "v")
                         (V4SF  "wa")
@@ -117,6 +123,7 @@
 ;; What value we need in the "isa" field, to make the IEEE QP float work.
 (define_mode_attr VSisa        [(V16QI "*")
                         (V8HI  "*")
+                        (V8BF  "p10")
                         (V8HF  "p9v")
                         (V4SI  "*")
                         (V4SF  "*")
@@ -134,6 +141,7 @@
 ;; integer modes.
 (define_mode_attr ??r  [(V16QI "??r")
                         (V8HI  "??r")
+                        (V8BF  "??r")
                         (V8HF  "??r")
                         (V4SI  "??r")
                         (V4SF  "??r")
@@ -147,6 +155,7 @@
 ;; A mode attribute used for 128-bit constant values.
 (define_mode_attr nW   [(V16QI "W")
                         (V8HI  "W")
+                        (V8BF  "W")
                         (V8HF  "W")
                         (V4SI  "W")
                         (V4SF  "W")
@@ -175,6 +184,7 @@
 ;; operation
 (define_mode_attr VSv  [(V16QI "v")
                         (V8HI  "v")
+                        (V8BF  "v")
                         (V8HF  "v")
                         (V4SI  "v")
                         (V4SF  "v")
@@ -409,6 +419,7 @@
 ;; Like VM2 in altivec.md, just do char, short, int, long, float and double
 (define_mode_iterator VM3 [V4SI
                           V8HI
+                          V8BF
                           V8HF
                           V16QI
                           V4SF
@@ -421,6 +432,7 @@
 (define_mode_attr VM3_char [(V2DI "d")
                           (V4SI "w")
                           (V8HI "h")
+                          (V8BF "h")
                           (V8HF "h")
                           (V16QI "b")
                           (V2DF  "d")
@@ -4095,7 +4107,8 @@
   if (which_alternative == 0
       && ((<MODE>mode == V16QImode
           && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 7 : 8))
-         || ((<MODE>mode == V8HImode || <MODE>mode == V8HFmode)
+         || ((<MODE>mode == V8HImode || <MODE>mode == V8HFmode
+              || <MODE>mode == V8BFmode)
              && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 3 : 4))))
     {
       enum machine_mode dest_mode = GET_MODE (operands[0]);
@@ -4174,7 +4187,8 @@
       else
        vec_tmp = src;
     }
-  else if (<MODE>mode == V8HImode || <MODE>mode == V8HFmode)
+  else if (<MODE>mode == V8HImode || <MODE>mode == V8HFmode
+          || <MODE>mode == V8BFmode)
     {
       if (value != 3)
        emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));

[gcc(refs/users/meissner/heads/work221-float)] Add initial bfloat16 support.

Reply via email to