https://gcc.gnu.org/g:00a1e92a0309b61912c67f6449802944b428724a
commit 00a1e92a0309b61912c67f6449802944b428724a Author: Michael Meissner <[email protected]> Date: Tue Sep 9 15:27:37 2025 -0400 Add initial bfloat16 support. 2025-09-09 Michael Meissner <[email protected]> gcc/ * config/rs6000/altivec.md (VM): Add initial bfloat16 support. (VM2): Likewise. (VI_char): Likewise. (VI_scalar): Likewise. (VI_unit): Likewise. (VU_char): Likewise. * config/rs6000/predicates.md (easy_fp_constant): Likewise. (fp16_xxspltiw_constant): Likewise. * config/rs6000/rs6000-builtin.cc (rs6000_type_string): Likewise. (rs6000_init_builtins): Likewise. * config/rs6000/rs6000-call.cc (USE_FP_FOR_ARG_P): Likewise. * config/rs6000/rs6000-modes.def (BFmode): Likewise. * config/rs6000/rs6000.cc (rs6000_hard_regno_mode_ok_uncached): Likewise. (rs6000_modes_tieable_p): Likewise. (rs6000_debug_reg_global): Likewise. (rs6000_setup_reg_addr_masks): Likewise. (rs6000_init_hard_regno_mode_ok): Likewise. (rs6000_secondary_reload_simple_move): Likewise. (rs6000_preferred_reload_class): Likewise. (rs6000_can_change_mode_class): Likewise. (rs6000_function_value): Likewise. (rs6000_scalar_mode_supported_p): Likewise. (constant_fp_to_128bit_vector): Likewise. (constant_generates_xxspltiw): Likewise. * config/rs6000/rs6000.h (TARGET_BFLOAT16): Likewise. (FP16_SCALAR_P): Likewise. * config/rs6000/rs6000.md (FMOVE128_GPR): Likewise. (RELOAD): Likewise. (FP16): Likewise. (CONVERT_FP16): Likewise. (extendhf<mode>2): Likewise. (trunc<mode>hf2): Likewise. (mov<mode>, FP16 iterator): Likewise. (mov<mode>_xxspltiw): Likewise. (mov<mode>_internal): Fix load/store instructions. Add bfloat16 support. * config/rs6000/rs6000.opt (-mfloat16_gpr_args): Rename from -mieee16-gpr-args. * config/rs6000/vector.md (VEC_L): Add initial bfloat16 support. (VEC_M): Likewise. (VEC_E): Likewise. (VEC_base): Likewise. (VEC_base_l): Likewise. * config/rs6000/vsx.md (V8HI_V8HF): Likewise. (VSX_L): Likewise. (VSX_M): Likewise. (VSX_XXBR): Likewise. (VSm): Likewise. (VSr): Likewise. (VSisa): Likewise. (??r): Likewise. (VSc): Likewise. (VM3): Likewise. (VM3_char): Likewise. (vsx_extract_<mode>_store_p9): Likewise. (*vsx_extract_<mode>_p8): Likewise. Diff: --- gcc/config/rs6000/altivec.md | 6 +++ gcc/config/rs6000/predicates.md | 10 ++--- gcc/config/rs6000/rs6000-builtin.cc | 14 +++++++ gcc/config/rs6000/rs6000-call.cc | 2 +- gcc/config/rs6000/rs6000-modes.def | 3 ++ gcc/config/rs6000/rs6000.cc | 77 +++++++++++++++++++++++++++---------- gcc/config/rs6000/rs6000.h | 9 ++++- gcc/config/rs6000/rs6000.md | 66 ++++++++++++++++--------------- gcc/config/rs6000/rs6000.opt | 6 +-- gcc/config/rs6000/vector.md | 5 +++ gcc/config/rs6000/vsx.md | 18 ++++++++- 11 files changed, 153 insertions(+), 63 deletions(-) diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index b6f92a71f963..fb960f7ba966 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -191,6 +191,7 @@ ;; otherwise handled by altivec (v2df, v2di, ti) (define_mode_iterator VM [V4SI V8HI + V8BF V8HF V16QI V4SF @@ -204,6 +205,7 @@ ;; Like VM, except don't do TImode (define_mode_iterator VM2 [V4SI V8HI + V8BF V8HF V16QI V4SF @@ -227,15 +229,18 @@ (define_mode_attr VI_char [(V2DI "d") (V4SI "w") (V8HI "h") + (V8BF "h") (V8HF "h") (V16QI "b")]) (define_mode_attr VI_scalar [(V2DI "DI") (V4SI "SI") (V8HI "HI") + (V8BF "BF") (V8HF "HF") (V16QI "QI")]) (define_mode_attr VI_unit [(V16QI "VECTOR_UNIT_ALTIVEC_P (V16QImode)") (V8HI "VECTOR_UNIT_ALTIVEC_P (V8HImode)") + (V8BF "VECTOR_UNIT_ALTIVEC_P (V8BFmode)") (V8HF "VECTOR_UNIT_ALTIVEC_P (V8HFmode)") (V4SI "VECTOR_UNIT_ALTIVEC_P (V4SImode)") (V2DI "VECTOR_UNIT_P8_VECTOR_P (V2DImode)")]) @@ -251,6 +256,7 @@ (define_mode_attr VU_char [(V2DI "w") (V4SI "h") (V8HI "b") + (V8BF "b") (V8HF "b")]) ;; Vector negate diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 2a4b38838d20..44a3fa003d36 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -602,8 +602,8 @@ return 1; /* Power9 needs to load HFmode constants from memory, Power10 can use - XXSPLTIW. */ - if (mode == HFmode && !TARGET_POWER10) + XXSPLTIW for HFmode or BFmode constants. */ + if (FP16_SCALAR_P (mode) && !TARGET_POWER10) return 0; /* Constants that can be generated with ISA 3.1 instructions are easy. */ @@ -2172,11 +2172,11 @@ (match_test "subreg_lowpart_offset (mode, GET_MODE (SUBREG_REG (op))) == SUBREG_BYTE (op)"))) -;; Return 1 if this is a HFmode constant that can be loaded with XXSPLTIW. -(define_predicate "ieee16_xxspltiw_constant" +;; Return 1 if this is a HFmode/BFmode constant that can be loaded with XXSPLTIW. +(define_predicate "fp16_xxspltiw_constant" (match_code "const_double") { - if (!TARGET_POWER10 || mode != HFmode) + if (!TARGET_POWER10 || !FP16_SCALAR_P (mode)) return false; vec_const_128bit_type vsx_const; diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc index 05a730a8fdca..46c781b82568 100644 --- a/gcc/config/rs6000/rs6000-builtin.cc +++ b/gcc/config/rs6000/rs6000-builtin.cc @@ -493,6 +493,8 @@ const char *rs6000_type_string (tree type_node) return "_Float128"; else if (type_node == float16_type_node) return "_Float16"; + else if (type_node == bfloat16_type_node) + return "__bfloat16"; else if (type_node == vector_pair_type_node) return "__vector_pair"; else if (type_node == vector_quad_type_node) @@ -758,6 +760,18 @@ rs6000_init_builtins (void) else ieee128_float_type_node = NULL_TREE; + /* __bfloat16 support. */ + if (TARGET_BFLOAT16) + { + bfloat16_type_node = make_node (REAL_TYPE); + TYPE_PRECISION (bfloat16_type_node) = 16; + SET_TYPE_MODE (bfloat16_type_node, BFmode); + layout_type (bfloat16_type_node); + t = build_qualified_type (bfloat16_type_node, TYPE_QUAL_CONST); + lang_hooks.types.register_builtin_type (bfloat16_type_node, + "__bfloat16"); + } + /* Vector pair and vector quad support. */ vector_pair_type_node = make_node (OPAQUE_TYPE); SET_TYPE_MODE (vector_pair_type_node, OOmode); diff --git a/gcc/config/rs6000/rs6000-call.cc b/gcc/config/rs6000/rs6000-call.cc index 3872d742d159..39466f969e47 100644 --- a/gcc/config/rs6000/rs6000-call.cc +++ b/gcc/config/rs6000/rs6000-call.cc @@ -86,7 +86,7 @@ (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \ && (CUM)->fregno <= FP_ARG_MAX_REG \ && TARGET_HARD_FLOAT \ - && ((MODE) != HFmode || !TARGET_IEEE16_GPR_ARGS)) + && (FP16_SCALAR_P (MODE) && !TARGET_FLOAT16_GPR_ARGS)) /* Nonzero if we can use an AltiVec register to pass this arg. */ diff --git a/gcc/config/rs6000/rs6000-modes.def b/gcc/config/rs6000/rs6000-modes.def index 04dc1d8c9194..28eb389787ff 100644 --- a/gcc/config/rs6000/rs6000-modes.def +++ b/gcc/config/rs6000/rs6000-modes.def @@ -48,6 +48,9 @@ FLOAT_MODE (IF, 16, ibm_extended_format); /* Explicit IEEE 16-bit floating point. */ FLOAT_MODE (HF, 2, ieee_half_format); +/* Arm (google brain) 16-bit floating point. */ +FLOAT_MODE (BF, 2, arm_bfloat_half_format); + /* Add any extra modes needed to represent the condition code. For the RS/6000, we need separate modes when unsigned (logical) comparisons diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 7a6b16d30866..624fe7138703 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -1900,7 +1900,7 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode) if (ALTIVEC_REGNO_P (regno)) { if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p - && mode != HFmode) + && !FP16_SCALAR_P (mode)) return 0; return ALTIVEC_REGNO_P (last_regno); @@ -1933,7 +1933,7 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode) return 1; if (TARGET_P9_VECTOR - && (mode == QImode || mode == HImode || mode == HFmode)) + && (mode == QImode || mode == HImode || FP16_SCALAR_P (mode))) return 1; } @@ -1992,7 +1992,7 @@ rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2) { if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode - || mode1 == HFmode || mode2 == HFmode) + || FP16_SCALAR_P (mode1) || FP16_SCALAR_P (mode2)) return mode1 == mode2; if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1)) @@ -2258,6 +2258,7 @@ rs6000_debug_reg_global (void) DImode, TImode, PTImode, + BFmode, HFmode, SFmode, DFmode, @@ -2279,6 +2280,7 @@ rs6000_debug_reg_global (void) V8SImode, V4DImode, V2TImode, + V8BFmode, V8HFmode, V4SFmode, V2DFmode, @@ -2640,11 +2642,12 @@ rs6000_setup_reg_addr_masks (void) addressing on power7 and above, since we want to use the LFIWZX and STFIWZX instructions to load it. - Never allow offset addressing for HFmode, since it is expected that - 16-bit floating point should always go into the vector registers and - we only have indexed and indirect 16-bit loads to VSR registers. */ + Never allow offset addressing for HFmode/BFmode, since it is expected + that 16-bit floating point should always go into the vector registers + and we only have indexed and indirect 16-bit loads to VSR + registers. */ bool indexed_only_p = ((m == SDmode && TARGET_NO_SDMODE_STACK) - || m == HFmode); + || FP16_SCALAR_P (m)); any_addr_mask = 0; for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++) @@ -2694,6 +2697,7 @@ rs6000_setup_reg_addr_masks (void) && (m != E_DFmode || !TARGET_VSX) && (m != E_SFmode || !TARGET_P8_VECTOR) && m != E_HFmode + && m != E_BFmode && !small_int_vsx_p) { addr_mask |= RELOAD_REG_PRE_INCDEC; @@ -2949,6 +2953,13 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) rs6000_vector_align[V8HFmode] = align64; } + if (TARGET_BFLOAT16) + { + rs6000_vector_unit[V8BFmode] = VECTOR_VSX; + rs6000_vector_mem[V8BFmode] = VECTOR_VSX; + rs6000_vector_align[V8BFmode] = align64; + } + /* DFmode, see if we want to use the VSX unit. Memory is handled differently, so don't set rs6000_vector_mem. */ if (TARGET_VSX) @@ -2971,13 +2982,19 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) rs6000_vector_align[TImode] = align64; } - /* Allow HFmode in VSX register and set the VSX memory macros. */ + /* Allow HFmode/BFmode in VSX register and set the VSX memory macros. */ if (TARGET_IEEE16) { - rs6000_vector_mem[HImode] = VECTOR_VSX; + rs6000_vector_mem[HFmode] = VECTOR_VSX; rs6000_vector_align[HFmode] = 16; } + if (TARGET_BFLOAT16) + { + rs6000_vector_mem[BFmode] = VECTOR_VSX; + rs6000_vector_align[BFmode] = 16; + } + /* Add support for vector pairs and vector quad registers. */ if (TARGET_MMA) { @@ -3037,6 +3054,8 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load; reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store; reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load; + reg_addr[V8BFmode].reload_store = CODE_FOR_reload_v8bf_di_store; + reg_addr[V8BFmode].reload_load = CODE_FOR_reload_v8bf_di_load; reg_addr[V8HFmode].reload_store = CODE_FOR_reload_v8hf_di_store; reg_addr[V8HFmode].reload_load = CODE_FOR_reload_v8hf_di_load; reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store; @@ -3074,6 +3093,12 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[HFmode].reload_load = CODE_FOR_reload_hf_di_load; } + if (TARGET_BFLOAT16) + { + reg_addr[BFmode].reload_store = CODE_FOR_reload_bf_di_store; + reg_addr[BFmode].reload_load = CODE_FOR_reload_bf_di_load; + } + /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are available. */ if (TARGET_NO_SDMODE_STACK) @@ -3096,6 +3121,7 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di; reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf; reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si; + reg_addr[V8BFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8bf; reg_addr[V8HFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hf; reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi; reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi; @@ -3107,6 +3133,7 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di; reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf; reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si; + reg_addr[V8BFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8bf; reg_addr[V8HFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hf; reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi; reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi; @@ -3145,6 +3172,8 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load; reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store; reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load; + reg_addr[V8BFmode].reload_store = CODE_FOR_reload_v8bf_si_store; + reg_addr[V8BFmode].reload_load = CODE_FOR_reload_v8bf_si_load; reg_addr[V8HFmode].reload_store = CODE_FOR_reload_v8hf_si_store; reg_addr[V8HFmode].reload_load = CODE_FOR_reload_v8hf_si_load; reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store; @@ -3176,6 +3205,12 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[HFmode].reload_load = CODE_FOR_reload_hf_si_load; } + if (TARGET_BFLOAT16) + { + reg_addr[BFmode].reload_store = CODE_FOR_reload_bf_si_store; + reg_addr[BFmode].reload_load = CODE_FOR_reload_bf_si_load; + } + /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are available. */ if (TARGET_NO_SDMODE_STACK) @@ -12706,7 +12741,7 @@ rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type, && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE))) { - if (TARGET_IEEE16 && mode == HFmode) + if (FP16_SCALAR_P (mode)) return true; if (TARGET_POWERPC64) @@ -12727,7 +12762,7 @@ rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type, return true; if (TARGET_P9_VECTOR - && (mode == HImode || mode == QImode || mode == HFmode)) + && (mode == HImode || mode == QImode || FP16_SCALAR_P (mode))) return true; } @@ -13497,9 +13532,9 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass) || mode_supports_dq_form (mode)) return rclass; - /* IEEE 16-bit don't support offset addressing, but they can go in any - floating point/vector register. */ - if (mode == HFmode && TARGET_IEEE16) + /* IEEE 16-bit and __bfloat16 don't support offset addressing, but they + can go in any floating point/vector register. */ + if (FP16_SCALAR_P (mode)) return rclass; /* If this is a scalar floating point value and we don't have D-form @@ -13731,7 +13766,7 @@ rs6000_can_change_mode_class (machine_mode from, unsigned from_size = GET_MODE_SIZE (from); unsigned to_size = GET_MODE_SIZE (to); - if (from == HFmode || to == HFmode) + if (FP16_SCALAR_P (from) || FP16_SCALAR_P (to)) return from_size == to_size; if (from_size != to_size) @@ -24075,7 +24110,7 @@ rs6000_function_value (const_tree valtype, if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT) /* _Decimal128 must use an even/odd register pair. */ regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN; - else if (mode == HFmode && TARGET_IEEE16_GPR_ARGS) + else if (FP16_SCALAR_P (mode) && TARGET_FLOAT16_GPR_ARGS) regno = GP_ARG_RETURN; else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && !FLOAT128_VECTOR_P (mode)) @@ -24349,8 +24384,8 @@ rs6000_scalar_mode_supported_p (scalar_mode mode) return default_decimal_float_supported_p (); else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode)) return true; - else if (mode == HFmode) - return TARGET_IEEE16; + else if (FP16_SCALAR_P (mode)) + return true; else return default_scalar_mode_supported_p (mode); } @@ -28971,7 +29006,7 @@ constant_fp_to_128bit_vector (rtx op, /* For IEEE 16-bit, the constant doesn't fill the whole 32-bit word, so deal with it here. */ - if (mode == HFmode) + if (FP16_SCALAR_P (mode)) { real_to_target (real_words, rtype, mode); unsigned char hi = (unsigned char) (real_words[0] >> 8); @@ -29272,8 +29307,8 @@ constant_generates_xxspltiw (vec_const_128bit_type *vsx_const) if (!TARGET_SPLAT_WORD_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX) return 0; - /* HFmode constants can always use XXSPLTIW. */ - if (vsx_const->mode == HFmode) + /* HFmode/BFmode constants can always use XXSPLTIW. */ + if (FP16_SCALAR_P (vsx_const->mode)) return 1; if (!vsx_const->all_words_same) diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 31c1d8f613a6..7fc074f93905 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -567,8 +567,15 @@ extern int rs6000_vector_align[]; below. */ #define RS6000_FN_TARGET_INFO_HTM 1 -/* Support for IEEE 16-bit floating point. */ +/* Support for 16-bit floating point formats. Power9 has instructions to + convert vector and scalar _Float16 formats, Power10 has instructions to + convert vector __bfloat16 formats. */ #define TARGET_IEEE16 TARGET_P9_VECTOR +#define TARGET_BFLOAT16 TARGET_POWER10 + +#define FP16_SCALAR_P(MODE) \ + (((MODE) == HFmode && TARGET_IEEE16) \ + || ((MODE) == BFmode && TARGET_BFLOAT16)) /* Whether the various reciprocal divide/square root estimate instructions exist, and whether we should automatically generate code for the instruction diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index a81758e62a02..c6b6d7e9ad54 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -552,6 +552,7 @@ (define_mode_iterator FMOVE128_GPR [TI V16QI V8HI + V8BF V8HF V4SI V4SF @@ -838,8 +839,8 @@ ;; Reload iterator for creating the function to allocate a base register to ;; supplement addressing modes. -(define_mode_iterator RELOAD [V16QI V8HI V8HF V4SI V2DI V4SF V2DF V1TI - SF SD SI DF DD DI TI PTI KF IF TF HF +(define_mode_iterator RELOAD [V16QI V8HI V8BF V8HF V4SI V2DI V4SF V2DF V1TI + SF SD SI DF DD DI TI PTI KF IF TF BF HF OO XO]) ;; Iterate over smin, smax @@ -858,9 +859,12 @@ (SF "TARGET_P8_VECTOR") (DI "TARGET_POWERPC64")]) +;; Mode iterator for supported 16-bit floating point types. +(define_mode_iterator FP16 [HF BF]) + ;; Mode iterator for floating point modes other than SF/DFmode that we ;; convert to/from _Float16 (HFmode) via DFmode. -(define_mode_iterator HF_CONVERT [TF KF IF SD DD TD]) +(define_mode_iterator CONVERT_FP16 [TF KF IF SD DD TD]) (include "darwin.md") @@ -5876,8 +5880,8 @@ ;; Use DFmode to convert to/from HFmode for floating point types other ;; than SF/DFmode. (define_expand "extendhf<mode>2" - [(set (match_operand:HF_CONVERT 0 "vsx_register_operand" "=wa") - (float_extend:HF_CONVERT + [(set (match_operand:CONVERT_FP16 0 "vsx_register_operand" "=wa") + (float_extend:CONVERT_FP16 (match_operand:HF 1 "vsx_register_operand" "wa")))] "TARGET_IEEE16" { @@ -5890,7 +5894,7 @@ (define_expand "trunc<mode>hf2" [(set (match_operand:HF 0 "vsx_register_operand" "=wa") (float_truncate:HF - (match_operand:HF_CONVERT 1 "vsx_register_operand" "wa")))] + (match_operand:CONVERT_FP16 1 "vsx_register_operand" "wa")))] "TARGET_IEEE16" { rtx df_tmp = gen_reg_rtx (DFmode); @@ -8196,58 +8200,60 @@ p9v, *, *, *")]) -(define_expand "movhf" - [(set (match_operand:HF 0 "nonimmediate_operand") - (match_operand:HF 1 "any_operand"))] - "TARGET_IEEE16" +;; 16-bit floating point formats. HFmode is _Float16 and BFmode is __bfloat16. +(define_expand "mov<mode>" + [(set (match_operand:FP16 0 "nonimmediate_operand") + (match_operand:FP16 1 "any_operand"))] + "FP16_SCALAR_P (<MODE>mode)" { if (MEM_P (operands[0]) && !REG_P (operands[1])) - operands[1] = force_reg (HFmode, operands[1]); + operands[1] = force_reg (<MODE>mode, operands[1]); }) -;; On power10, we can load up HFmode constants with xxspltiw or pli. -(define_insn "*movhf_xxspltiw" - [(set (match_operand:HF 0 "gpc_reg_operand" "=wa,r") - (match_operand:HF 1 "ieee16_xxspltiw_constant" "eP,eP"))] - "TARGET_IEEE16 && TARGET_POWER10 && TARGET_PREFIXED" +;; On power10, we can load up HFmode/BFmode constants with xxspltiw or +;; pli. +(define_insn "*mov<mode>_xxspltiw" + [(set (match_operand:FP16 0 "gpc_reg_operand" "=wa,r") + (match_operand:FP16 1 "fp16_xxspltiw_constant" "eP,eP"))] + "FP16_SCALAR_P (<MODE>mode) && TARGET_POWER10 && TARGET_PREFIXED" { rtx op1 = operands[1]; const REAL_VALUE_TYPE *rtype = CONST_DOUBLE_REAL_VALUE (op1); long real_words[VECTOR_128BIT_WORDS]; - real_to_target (real_words, rtype, HFmode); + real_to_target (real_words, rtype, <MODE>mode); operands[2] = GEN_INT (real_words[0]); - return (vsx_register_operand (operands[0], HFmode) + return (vsx_register_operand (operands[0], <MODE>mode) ? "xxspltiw %x0,%2" : "li %0,%2"); } [(set_attr "type" "vecperm,*") (set_attr "prefixed" "yes")]) -(define_insn "*movhf_internal" - [(set (match_operand:HF 0 "nonimmediate_operand" +(define_insn "*mov<mode>_internal" + [(set (match_operand:FP16 0 "nonimmediate_operand" "=wa, wa, Z, r, r, m, r, wa, wa, r") - (match_operand:HF 1 "any_operand" + (match_operand:FP16 1 "any_operand" "wa, Z, wa, r, m, r, wa, r, j, j"))] - "TARGET_IEEE16 - && (gpc_reg_operand (operands[0], HFmode) - || gpc_reg_operand (operands[1], HFmode))" + "FP16_SCALAR_P (<MODE>mode) + && (gpc_reg_operand (operands[0], <MODE>mode) + || gpc_reg_operand (operands[1], <MODE>mode))" "@ xxlor %x0,%x1,%x1 - lxsiwzx %x0,%y1 - stxsiwx %x1,%y0 + lxsihzx %x0,%y1 + stxsihx %x1,%y0 mr %0,%1 - lwz%U1%X1 %0,%1 - stw%U0%X0 %1,%0 + lhz%U1%X1 %0,%1 + sth%U0%X0 %1,%0 mfvsrwz %0,%x1 mtvsrwz %x0,%1 xxspltib %x0,0 li %0,0" - [(set_attr "type" "vecsimple, fpload, fpstore, *, load, - store, mtvsr, mfvsr, vecsimple, *")]) + [(set_attr "type" "vecsimple, fpload, fpstore, *, load, + store, mtvsr, mfvsr, vecsimple, *")]) diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index 5f81d3426a2c..c248f6c890a1 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -642,9 +642,9 @@ mieee128-constant Target Var(TARGET_IEEE128_CONSTANT) Init(1) Save Generate (do not generate) code that uses the LXVKQ instruction. -mieee16-gpr-args -Target Undocumented Var(TARGET_IEEE16_GPR_ARGS) Init(1) Save -Pass _Float16 in GPR registers. +mfloat16-gpr-args +Target Undocumented Var(TARGET_FLOAT16_GPR_ARGS) Init(1) Save +Pass and return _Float16 and __bfloat16 in GPR registers. ; Documented parameters diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index ed427ea05e9b..0a9f092c1951 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -52,6 +52,7 @@ ;; Vector logical modes (define_mode_iterator VEC_L [V16QI V8HI + V8BF V8HF V4SI V2DI @@ -68,6 +69,7 @@ V8HI V4SI V2DI + V8BF V8HF V4SF V2DF @@ -85,6 +87,7 @@ V8HI V4SI V2DI + V8BF V8HF V4SF V2DF]) @@ -100,6 +103,7 @@ (V8HI "HI") (V4SI "SI") (V2DI "DI") + (V8BF "BF") (V8HF "HF") (V4SF "SF") (V2DF "DF") @@ -111,6 +115,7 @@ (V8HI "hi") (V4SI "si") (V2DI "di") + (V8BF "bf") (V8HF "hf") (V4SF "sf") (V2DF "df") diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 09b4d53813ba..be65b309c63a 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -48,11 +48,13 @@ ;; Iterator for 8 element vectors (define_mode_iterator V8HI_V8HF [V8HI + (V8BF "TARGET_BFLOAT16") (V8HF "TARGET_IEEE16")]) ;; Iterator for logical types supported by VSX (define_mode_iterator VSX_L [V16QI V8HI + (V8BF "TARGET_BFLOAT16") (V8HF "TARGET_IEEE16") V4SI V2DI @@ -66,6 +68,7 @@ ;; Iterator for memory moves. (define_mode_iterator VSX_M [V16QI V8HI + (V8BF "TARGET_BFLOAT16") (V8HF "TARGET_IEEE16") V4SI V2DI @@ -77,6 +80,7 @@ TI]) (define_mode_attr VSX_XXBR [(V8HI "h") + (V8BF "h") (V8HF "h") (V4SI "w") (V4SF "w") @@ -87,6 +91,7 @@ ;; Map into the appropriate load/store name based on the type (define_mode_attr VSm [(V16QI "vw4") (V8HI "vw4") + (V8BF "vw4") (V8HF "vw4") (V4SI "vw4") (V4SF "vw4") @@ -101,6 +106,7 @@ ;; Map the register class used (define_mode_attr VSr [(V16QI "v") (V8HI "v") + (V8BF "v") (V8HF "v") (V4SI "v") (V4SF "wa") @@ -117,6 +123,7 @@ ;; What value we need in the "isa" field, to make the IEEE QP float work. (define_mode_attr VSisa [(V16QI "*") (V8HI "*") + (V8BF "p10") (V8HF "p9v") (V4SI "*") (V4SF "*") @@ -134,6 +141,7 @@ ;; integer modes. (define_mode_attr ??r [(V16QI "??r") (V8HI "??r") + (V8BF "??r") (V8HF "??r") (V4SI "??r") (V4SF "??r") @@ -147,6 +155,7 @@ ;; A mode attribute used for 128-bit constant values. (define_mode_attr nW [(V16QI "W") (V8HI "W") + (V8BF "W") (V8HF "W") (V4SI "W") (V4SF "W") @@ -175,6 +184,7 @@ ;; operation (define_mode_attr VSv [(V16QI "v") (V8HI "v") + (V8BF "v") (V8HF "v") (V4SI "v") (V4SF "v") @@ -409,6 +419,7 @@ ;; Like VM2 in altivec.md, just do char, short, int, long, float and double (define_mode_iterator VM3 [V4SI V8HI + V8BF V8HF V16QI V4SF @@ -421,6 +432,7 @@ (define_mode_attr VM3_char [(V2DI "d") (V4SI "w") (V8HI "h") + (V8BF "h") (V8HF "h") (V16QI "b") (V2DF "d") @@ -4095,7 +4107,8 @@ if (which_alternative == 0 && ((<MODE>mode == V16QImode && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 7 : 8)) - || ((<MODE>mode == V8HImode || <MODE>mode == V8HFmode) + || ((<MODE>mode == V8HImode || <MODE>mode == V8HFmode + || <MODE>mode == V8BFmode) && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 3 : 4)))) { enum machine_mode dest_mode = GET_MODE (operands[0]); @@ -4174,7 +4187,8 @@ else vec_tmp = src; } - else if (<MODE>mode == V8HImode || <MODE>mode == V8HFmode) + else if (<MODE>mode == V8HImode || <MODE>mode == V8HFmode + || <MODE>mode == V8BFmode) { if (value != 3) emit_insn (gen_altivec_vsplth_direct (vec_tmp, src, element));
