https://gcc.gnu.org/g:075dcaac7c8eb38b7a328a87472d31c6f6f89bf6
commit 075dcaac7c8eb38b7a328a87472d31c6f6f89bf6 Author: Michael Meissner <[email protected]> Date: Thu Sep 11 16:01:48 2025 -0400 Update -mbfloat16 and -mieee16 support. 2025-09-11 Michael Meissner <[email protected]> gcc/ * config/rs6000/altivec.md (altivec_vsplth_v8bf): New insn. * config/rs6000/predicates.md (fp16_xxspltiw_constant): Rename insn, add bfloat16 support. * config/rs6000/rs6000-call.cc (USE_FP_FOR_ARG_P): Add bfloat16 support. * config/rs6000/rs6000.cc (rs6000_hard_regno_mode_ok_uncached): Likewise. (rs6000_modes_tieable_p): Likewise. (rs6000_debug_reg_global): Likewise. (rs6000_setup_reg_addr_masks): Likewise. (rs6000_init_hard_regno_mode_ok): Likewise. (rs6000_secondary_reload_simple_move): Likewise. (rs6000_preferred_reload_class): Likewise. (rs6000_can_change_mode_class): Likewise. (rs6000_function_value): Likewise. (rs6000_scalar_mode_supported_p): Likewise. (constant_fp_to_128bit_vector): Likewise. (constant_generates_xxspltiw): Likewise. * config/rs6000/rs6000.h (FP16_SCALAR_MODE_P): New macro. * config/rs6000/rs6000.md (extendbf<mode>2): New insn. (mov<mode>_xxspltiw): Add bfloat16 support. * config/rs6000/vsx.md (vsx_xscvspdpn_sf): New insn. (xvcvbf16spn_v8bf): Likewise. Diff: --- gcc/config/rs6000/altivec.md | 9 +++++ gcc/config/rs6000/predicates.md | 7 ++-- gcc/config/rs6000/rs6000-call.cc | 2 +- gcc/config/rs6000/rs6000.cc | 82 +++++++++++++++++++++++++++------------- gcc/config/rs6000/rs6000.h | 5 +++ gcc/config/rs6000/rs6000.md | 41 +++++++++++++++++++- gcc/config/rs6000/vsx.md | 16 ++++++++ 7 files changed, 130 insertions(+), 32 deletions(-) diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index fb960f7ba966..9bfa72018f9a 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -2411,6 +2411,15 @@ "vsplth %0,%1,%2" [(set_attr "type" "vecperm")]) +(define_insn "altivec_vsplth_v8bf" + [(set (match_operand:V8BF 0 "register_operand" "=v") + (unspec:V8BF [(match_operand:BF 1 "register_operand" "v") + (match_operand:QI 2 "const_0_to_7_operand" "i")] + UNSPEC_VSPLT_DIRECT))] + "TARGET_BFLOAT16" + "vsplth %0,%1,%2" + [(set_attr "type" "vecperm")]) + (define_expand "altivec_vspltw" [(use (match_operand:V4SI 0 "register_operand")) (use (match_operand:V4SI 1 "register_operand")) diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 2a4b38838d20..3dc9e020fd71 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -2172,11 +2172,12 @@ (match_test "subreg_lowpart_offset (mode, GET_MODE (SUBREG_REG (op))) == SUBREG_BYTE (op)"))) -;; Return 1 if this is a HFmode constant that can be loaded with XXSPLTIW. -(define_predicate "ieee16_xxspltiw_constant" +;; Return 1 if this is a 16-bit floating point constant that can be +;; loaded with XXSPLTIW. +(define_predicate "fp16_xxspltiw_constant" (match_code "const_double") { - if (!TARGET_POWER10 || mode != HFmode) + if (!TARGET_POWER10 || !FP16_SCALAR_MODE_P (mode)) return false; vec_const_128bit_type vsx_const; diff --git a/gcc/config/rs6000/rs6000-call.cc b/gcc/config/rs6000/rs6000-call.cc index 3872d742d159..a446897f842b 100644 --- a/gcc/config/rs6000/rs6000-call.cc +++ b/gcc/config/rs6000/rs6000-call.cc @@ -86,7 +86,7 @@ (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \ && (CUM)->fregno <= FP_ARG_MAX_REG \ && TARGET_HARD_FLOAT \ - && ((MODE) != HFmode || !TARGET_IEEE16_GPR_ARGS)) + && (!FP16_SCALAR_MODE_P (MODE) || !TARGET_IEEE16_GPR_ARGS)) /* Nonzero if we can use an AltiVec register to pass this arg. */ diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 6eb53f3b4c8b..65ac03b70d84 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -1900,7 +1900,7 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode) if (ALTIVEC_REGNO_P (regno)) { if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p - && mode != HFmode) + && !FP16_SCALAR_MODE_P (mode)) return 0; return ALTIVEC_REGNO_P (last_regno); @@ -1933,7 +1933,7 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode) return 1; if (TARGET_P9_VECTOR - && (mode == QImode || mode == HImode || mode == HFmode)) + && (mode == QImode || mode == HImode)) return 1; } @@ -1992,7 +1992,7 @@ rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2) { if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode - || mode1 == HFmode || mode2 == HFmode) + || FP16_SCALAR_MODE_P (mode1) || FP16_SCALAR_MODE_P (mode2)) return mode1 == mode2; if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1)) @@ -2258,6 +2258,7 @@ rs6000_debug_reg_global (void) DImode, TImode, PTImode, + BFmode, HFmode, SFmode, DFmode, @@ -2279,6 +2280,7 @@ rs6000_debug_reg_global (void) V8SImode, V4DImode, V2TImode, + V8BFmode, V8HFmode, V4SFmode, V2DFmode, @@ -2640,11 +2642,12 @@ rs6000_setup_reg_addr_masks (void) addressing on power7 and above, since we want to use the LFIWZX and STFIWZX instructions to load it. - Never allow offset addressing for HFmode, since it is expected that - 16-bit floating point should always go into the vector registers and - we only have indexed and indirect 16-bit loads to VSR registers. */ + Never allow offset addressing for 16-bit floating point modes, since + it is expected that 16-bit floating point should always go into the + vector registers and we only have indexed and indirect 16-bit loads to + VSR registers. */ bool indexed_only_p = ((m == SDmode && TARGET_NO_SDMODE_STACK) - || m == HFmode); + || FP16_SCALAR_MODE_P (m)); any_addr_mask = 0; for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++) @@ -2693,7 +2696,7 @@ rs6000_setup_reg_addr_masks (void) && !complex_p && (m != E_DFmode || !TARGET_VSX) && (m != E_SFmode || !TARGET_P8_VECTOR) - && m != E_HFmode + && !FP16_SCALAR_MODE_P (m) && !small_int_vsx_p) { addr_mask |= RELOAD_REG_PRE_INCDEC; @@ -2947,6 +2950,20 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) rs6000_vector_unit[V8HFmode] = VECTOR_VSX; rs6000_vector_mem[V8HFmode] = VECTOR_VSX; rs6000_vector_align[V8HFmode] = align64; + + rs6000_vector_mem[HFmode] = VECTOR_VSX; + rs6000_vector_align[HFmode] = 16; + } + + /* _bfloat16 support. */ + if (TARGET_BFLOAT16) + { + rs6000_vector_unit[V8BFmode] = VECTOR_VSX; + rs6000_vector_mem[V8BFmode] = VECTOR_VSX; + rs6000_vector_align[V8BFmode] = align64; + + rs6000_vector_mem[BFmode] = VECTOR_VSX; + rs6000_vector_align[BFmode] = 16; } /* DFmode, see if we want to use the VSX unit. Memory is handled @@ -2971,13 +2988,6 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) rs6000_vector_align[TImode] = align64; } - /* Allow HFmode in VSX register and set the VSX memory macros. */ - if (TARGET_IEEE16) - { - rs6000_vector_mem[HImode] = VECTOR_VSX; - rs6000_vector_align[HFmode] = 16; - } - /* Add support for vector pairs and vector quad registers. */ if (TARGET_MMA) { @@ -3037,6 +3047,8 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load; reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store; reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load; + reg_addr[V8BFmode].reload_store = CODE_FOR_reload_v8bf_di_store; + reg_addr[V8BFmode].reload_load = CODE_FOR_reload_v8bf_di_load; reg_addr[V8HFmode].reload_store = CODE_FOR_reload_v8hf_di_store; reg_addr[V8HFmode].reload_load = CODE_FOR_reload_v8hf_di_load; reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store; @@ -3074,6 +3086,12 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[HFmode].reload_load = CODE_FOR_reload_hf_di_load; } + if (TARGET_BFLOAT16) + { + reg_addr[BFmode].reload_store = CODE_FOR_reload_bf_di_store; + reg_addr[BFmode].reload_load = CODE_FOR_reload_bf_di_load; + } + /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are available. */ if (TARGET_NO_SDMODE_STACK) @@ -3096,6 +3114,7 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di; reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf; reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si; + reg_addr[V8BFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8bf; reg_addr[V8HFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hf; reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi; reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi; @@ -3107,6 +3126,7 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di; reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf; reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si; + reg_addr[V8BFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8bf; reg_addr[V8HFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hf; reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi; reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi; @@ -3145,6 +3165,8 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load; reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store; reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load; + reg_addr[V8BFmode].reload_store = CODE_FOR_reload_v8bf_si_store; + reg_addr[V8BFmode].reload_load = CODE_FOR_reload_v8bf_si_load; reg_addr[V8HFmode].reload_store = CODE_FOR_reload_v8hf_si_store; reg_addr[V8HFmode].reload_load = CODE_FOR_reload_v8hf_si_load; reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store; @@ -3176,6 +3198,12 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[HFmode].reload_load = CODE_FOR_reload_hf_si_load; } + if (TARGET_BFLOAT16) + { + reg_addr[BFmode].reload_store = CODE_FOR_reload_bf_si_store; + reg_addr[BFmode].reload_load = CODE_FOR_reload_bf_si_load; + } + /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are available. */ if (TARGET_NO_SDMODE_STACK) @@ -12722,7 +12750,7 @@ rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type, && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE))) { - if (TARGET_IEEE16 && mode == HFmode) + if (FP16_SCALAR_MODE_P (mode)) return true; if (TARGET_POWERPC64) @@ -12743,7 +12771,7 @@ rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type, return true; if (TARGET_P9_VECTOR - && (mode == HImode || mode == QImode || mode == HFmode)) + && (mode == HImode || mode == QImode)) return true; } @@ -13513,9 +13541,9 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass) || mode_supports_dq_form (mode)) return rclass; - /* IEEE 16-bit don't support offset addressing, but they can go in any - floating point/vector register. */ - if (mode == HFmode && TARGET_IEEE16) + /* IEEE 16-bit and bfloat16 don't support offset addressing, but they can + go in any floating point/vector register. */ + if (FP16_SCALAR_MODE_P (mode)) return rclass; /* If this is a scalar floating point value and we don't have D-form @@ -13747,7 +13775,7 @@ rs6000_can_change_mode_class (machine_mode from, unsigned from_size = GET_MODE_SIZE (from); unsigned to_size = GET_MODE_SIZE (to); - if (from == HFmode || to == HFmode) + if (FP16_SCALAR_MODE_P (from) || FP16_SCALAR_MODE_P (to)) return from_size == to_size; if (from_size != to_size) @@ -24091,7 +24119,7 @@ rs6000_function_value (const_tree valtype, if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT) /* _Decimal128 must use an even/odd register pair. */ regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN; - else if (mode == HFmode && TARGET_IEEE16_GPR_ARGS) + else if (FP16_SCALAR_MODE_P (mode) && TARGET_IEEE16_GPR_ARGS) regno = GP_ARG_RETURN; else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && !FLOAT128_VECTOR_P (mode)) @@ -24365,8 +24393,8 @@ rs6000_scalar_mode_supported_p (scalar_mode mode) return default_decimal_float_supported_p (); else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode)) return true; - else if (mode == HFmode) - return TARGET_IEEE16; + else if (FP16_SCALAR_MODE_P (mode)) + return true; else return default_scalar_mode_supported_p (mode); } @@ -28989,7 +29017,7 @@ constant_fp_to_128bit_vector (rtx op, /* For IEEE 16-bit, the constant doesn't fill the whole 32-bit word, so deal with it here. */ - if (mode == HFmode) + if (FP16_SCALAR_MODE_P (mode)) { real_to_target (real_words, rtype, mode); unsigned char hi = (unsigned char) (real_words[0] >> 8); @@ -29290,8 +29318,8 @@ constant_generates_xxspltiw (vec_const_128bit_type *vsx_const) if (!TARGET_SPLAT_WORD_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX) return 0; - /* HFmode constants can always use XXSPLTIW. */ - if (vsx_const->mode == HFmode) + /* HFmode/BFmode constants can always use XXSPLTIW. */ + if (FP16_SCALAR_MODE_P (vsx_const->mode)) return 1; if (!vsx_const->all_words_same) diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index cffe2750ba9a..cd66310de12c 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -343,6 +343,11 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); || ((MODE) == TDmode) \ || (!TARGET_FLOAT128_TYPE && FLOAT128_IEEE_P (MODE))) +/* Is this a valid 16-bit scalar floating point mode? */ +#define FP16_SCALAR_MODE_P(MODE) \ + (((MODE) == HFmode && TARGET_IEEE16) \ + || ((MODE) == BFmode && TARGET_BFLOAT16)) + /* Return true for floating point that does not use a vector register. */ #define SCALAR_FLOAT_MODE_NOT_VECTOR_P(MODE) \ (SCALAR_FLOAT_MODE_P (MODE) && !FLOAT128_VECTOR_P (MODE)) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 3da282932395..10e81ad1eb2e 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -5954,6 +5954,45 @@ DONE; }) +;; Convert BFmode to SFmode/DFmode. +;; 3 instructions are generated: +;; VSPLTH -- duplicate BFmode into all elements +;; XVCVBF16SPN -- convert even BFmode elements to SFmode +;; XSCVSPNDP -- convert memory format of SFmode to DFmode. +(define_insn_and_split "extendbf<mode>2" + [(set (match_operand:SFDF 0 "vsx_register_operand" "=wa") + (float_extend:SFDF + (match_operand:BF 1 "vsx_register_operand" "v"))) + (clobber (match_scratch:V8BF 2 "=v"))] + "TARGET_BFLOAT16" + "#" + "&& 1" + [(pc)] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2_v8bf = operands[2]; + + if (GET_CODE (op2_v8bf) == SCRATCH) + op2_v8bf = gen_reg_rtx (V8BFmode); + + rtx op2_v4sf = gen_lowpart (V4SFmode, op2_v8bf); + + /* VSPLTH -- duplicate BFmode into all elements. */ + emit_insn (gen_altivec_vsplth_v8bf (op2_v8bf, op1, GEN_INT (3))); + + /* XVCVBF16SPN -- convert even V8BFmode elements to V4SFmode. */ + emit_insn (gen_xvcvbf16spn_v8bf (op2_v4sf, op2_v8bf)); + + /* XSCVSPNDP -- convert single V4SFmode element to DFmode. */ + emit_insn (GET_MODE (op0) == SFmode + ? gen_vsx_xscvspdpn_sf (op0, op2_v4sf) + : gen_vsx_xscvspdpn (op0, op2_v4sf)); + DONE; +} + [(set_attr "type" "fpsimple") + (set_attr "length" "12")]) + ;; Conversions to and from floating-point. @@ -8264,7 +8303,7 @@ ;; or pli. (define_insn "*mov<mode>_xxspltiw" [(set (match_operand:FP16 0 "gpc_reg_operand" "=wa,r") - (match_operand:FP16 1 "ieee16_xxspltiw_constant" "eP,eP"))] + (match_operand:FP16 1 "fp16_xxspltiw_constant" "eP,eP"))] "TARGET_POWER10 && TARGET_PREFIXED" { rtx op1 = operands[1]; diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index aee4a9ba8c1a..d2cb299fe191 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -2511,6 +2511,14 @@ "xscvspdpn %x0,%x1" [(set_attr "type" "fp")]) +(define_insn "vsx_xscvspdpn_sf" + [(set (match_operand:SF 0 "vsx_register_operand" "=wa") + (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_CVSPDPN))] + "TARGET_XSCVSPDPN" + "xscvspdpn %x0,%x1" + [(set_attr "type" "fp")]) + (define_insn "vsx_xscvdpspn_scalar" [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")] @@ -6497,6 +6505,14 @@ "<xvcvbf16> %x0,%x1" [(set_attr "type" "vecfloat")]) +(define_insn "xvcvbf16spn_v8bf" + [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") + (unspec:V4SF [(match_operand:V8BF 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_XVCVBF16SPN))] + "TARGET_BFLOAT16" + "xvcvbf16spn %x0,%x1" + [(set_attr "type" "vecfloat")]) + (define_insn "vec_mtvsrbmi" [(set (match_operand:V16QI 0 "altivec_register_operand" "=v") (unspec:V16QI [(match_operand:QI 1 "u6bit_cint_operand" "n")]
