https://gcc.gnu.org/g:04be883c92e0243991232574dc289e56b40f6b93
commit 04be883c92e0243991232574dc289e56b40f6b93 Author: Michael Meissner <meiss...@linux.ibm.com> Date: Wed Aug 13 00:09:18 2025 -0400 Add _Float16 support. 2025-08-13 Michael Meissner <meiss...@linux.ibm.com> gcc/ * gcc/config/rs6000/predicates.md (easy_fp_constant): Add support for _Float16 constants. (ieee16_xxspltiw_constant): New predicate. * gcc/config/rs6000/rs6000-builtin.cc (rs6000_type_string): Add _Float16 support. * gcc/config/rs6000/rs6000-modes.def (HFmode): Add mode for _Float16. * gcc/config/rs6000/rs6000-p8swap.cc (rs6000_gen_stvx): Remove old code for V8HFmode that was never used. (rs6000_gen_lvx): Likewise. (replace_swapped_load_constant): Likewise. * gcc/config/rs6000/rs6000-protos.h (vec_const_128bit_type): Add mode field. * gcc/config/rs6000/rs6000.cc (rs6000_hard_regno_mode_ok_uncached): Add support for HFmode. (rs6000_modes_tieable_p): Likewise. (rs6000_debug_reg_global): Likewise. (rs6000_setup_reg_addr_masks): Likewise. (rs6000_init_hard_regno_mode_ok): Likewise. (rs6000_secondary_reload_simple_move): Likewise. (rs6000_preferred_reload_class): Likewise. (rs6000_can_change_mode_class): Likewise. (rs6000_mangle_type): Likewise. (rs6000_scalar_mode_supported_p): Likewise. (rs6000_floatn_mode): Likewise. (constant_fp_to_128bit_vector): Add support for _Float16 constants. (vec_const_128bit_to_bytes): Likewise. (constant_generates_xxspltiw): Likewise. * config/rs6000/rs6000.h (TARGET_IEEE16): New macro. * config/rs6000/rs6000.md (RELOAD): Add HFmode. (movhf): New define_expand. (movhf_xxspltiw): New insn. (movhf_internal): Likewise. * config/rs6000/vsx.md (extendhf<mode>2): Likewise. (trunc<mode>hf2): Likewise. Diff: --- gcc/config/rs6000/predicates.md | 19 ++++++ gcc/config/rs6000/rs6000-builtin.cc | 2 + gcc/config/rs6000/rs6000-modes.def | 3 + gcc/config/rs6000/rs6000-p8swap.cc | 14 +---- gcc/config/rs6000/rs6000-protos.h | 1 + gcc/config/rs6000/rs6000.cc | 113 +++++++++++++++++++++++++++++------- gcc/config/rs6000/rs6000.h | 3 + gcc/config/rs6000/rs6000.md | 51 +++++++++++++++- gcc/config/rs6000/vsx.md | 19 ++++++ 9 files changed, 191 insertions(+), 34 deletions(-) diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 647e89afb6a7..2a4b38838d20 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -601,6 +601,11 @@ if (TARGET_VSX && op == CONST0_RTX (mode)) return 1; + /* Power9 needs to load HFmode constants from memory, Power10 can use + XXSPLTIW. */ + if (mode == HFmode && !TARGET_POWER10) + return 0; + /* Constants that can be generated with ISA 3.1 instructions are easy. */ vec_const_128bit_type vsx_const; if (TARGET_POWER10 && vec_const_128bit_to_bytes (op, mode, &vsx_const)) @@ -2166,3 +2171,17 @@ (and (match_code "subreg") (match_test "subreg_lowpart_offset (mode, GET_MODE (SUBREG_REG (op))) == SUBREG_BYTE (op)"))) + +;; Return 1 if this is a HFmode constant that can be loaded with XXSPLTIW. +(define_predicate "ieee16_xxspltiw_constant" + (match_code "const_double") +{ + if (!TARGET_POWER10 || mode != HFmode) + return false; + + vec_const_128bit_type vsx_const; + if (!vec_const_128bit_to_bytes (op, mode, &vsx_const)) + return false; + + return constant_generates_xxspltiw (&vsx_const); +}) diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc index bc1580f051b0..05a730a8fdca 100644 --- a/gcc/config/rs6000/rs6000-builtin.cc +++ b/gcc/config/rs6000/rs6000-builtin.cc @@ -491,6 +491,8 @@ const char *rs6000_type_string (tree type_node) return "voidc*"; else if (type_node == float128_type_node) return "_Float128"; + else if (type_node == float16_type_node) + return "_Float16"; else if (type_node == vector_pair_type_node) return "__vector_pair"; else if (type_node == vector_quad_type_node) diff --git a/gcc/config/rs6000/rs6000-modes.def b/gcc/config/rs6000/rs6000-modes.def index f89e4ef403c1..04dc1d8c9194 100644 --- a/gcc/config/rs6000/rs6000-modes.def +++ b/gcc/config/rs6000/rs6000-modes.def @@ -45,6 +45,9 @@ FLOAT_MODE (TF, 16, ieee_quad_format); /* IBM 128-bit floating point. */ FLOAT_MODE (IF, 16, ibm_extended_format); +/* Explicit IEEE 16-bit floating point. */ +FLOAT_MODE (HF, 2, ieee_half_format); + /* Add any extra modes needed to represent the condition code. For the RS/6000, we need separate modes when unsigned (logical) comparisons diff --git a/gcc/config/rs6000/rs6000-p8swap.cc b/gcc/config/rs6000/rs6000-p8swap.cc index e92f01031270..4fb107c60a47 100644 --- a/gcc/config/rs6000/rs6000-p8swap.cc +++ b/gcc/config/rs6000/rs6000-p8swap.cc @@ -1598,10 +1598,6 @@ rs6000_gen_stvx (enum machine_mode mode, rtx dest_exp, rtx src_exp) stvx = gen_altivec_stvx_v16qi (src_exp, dest_exp); else if (mode == V8HImode) stvx = gen_altivec_stvx_v8hi (src_exp, dest_exp); -#ifdef HAVE_V8HFmode - else if (mode == V8HFmode) - stvx = gen_altivec_stvx_v8hf (src_exp, dest_exp); -#endif else if (mode == V4SImode) stvx = gen_altivec_stvx_v4si (src_exp, dest_exp); else if (mode == V4SFmode) @@ -1722,10 +1718,6 @@ rs6000_gen_lvx (enum machine_mode mode, rtx dest_exp, rtx src_exp) lvx = gen_altivec_lvx_v16qi (dest_exp, src_exp); else if (mode == V8HImode) lvx = gen_altivec_lvx_v8hi (dest_exp, src_exp); -#ifdef HAVE_V8HFmode - else if (mode == V8HFmode) - lvx = gen_altivec_lvx_v8hf (dest_exp, src_exp); -#endif else if (mode == V4SImode) lvx = gen_altivec_lvx_v4si (dest_exp, src_exp); else if (mode == V4SFmode) @@ -1930,11 +1922,7 @@ replace_swapped_load_constant (swap_web_entry *insn_entry, rtx swap_insn) rtx new_const_vector = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); new_mem = force_const_mem (mode, new_const_vector); } - else if ((mode == V8HImode) -#ifdef HAVE_V8HFmode - || (mode == V8HFmode) -#endif - ) + else if (mode == V8HImode) { rtx vals = gen_rtx_PARALLEL (mode, rtvec_alloc (8)); int i; diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 4619142d197b..9bf971370d41 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -250,6 +250,7 @@ typedef struct { bool all_words_same; /* Are the words all equal? */ bool all_half_words_same; /* Are the half words all equal? */ bool all_bytes_same; /* Are the bytes all equal? */ + machine_mode mode; /* Original constant mode. */ } vec_const_128bit_type; extern bool vec_const_128bit_to_bytes (rtx, machine_mode, diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 17219bb5402e..cd244047ce5e 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -1899,7 +1899,8 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode) if (ALTIVEC_REGNO_P (regno)) { - if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p) + if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p + && mode != HFmode) return 0; return ALTIVEC_REGNO_P (last_regno); @@ -1931,7 +1932,8 @@ rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode) if (TARGET_POPCNTD && mode == SImode) return 1; - if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode)) + if (TARGET_P9_VECTOR + && (mode == QImode || mode == HImode || mode == HFmode)) return 1; } @@ -1989,7 +1991,8 @@ static bool rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2) { if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode - || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode) + || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode + || mode1 == HFmode || mode2 == HFmode) return mode1 == mode2; if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1)) @@ -2255,6 +2258,7 @@ rs6000_debug_reg_global (void) DImode, TImode, PTImode, + HFmode, SFmode, DFmode, TFmode, @@ -2633,8 +2637,13 @@ rs6000_setup_reg_addr_masks (void) /* SDmode is special in that we want to access it only via REG+REG addressing on power7 and above, since we want to use the LFIWZX and - STFIWZX instructions to load it. */ - bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK); + STFIWZX instructions to load it. + + Never allow offset addressing for HFmode, since it is expected that + 16-bit floating point should always go into the vector registers and + we only have indexed and indirect 16-bit loads to VSR registers. */ + bool indexed_only_p = ((m == SDmode && TARGET_NO_SDMODE_STACK) + || m == HFmode); any_addr_mask = 0; for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++) @@ -2683,6 +2692,7 @@ rs6000_setup_reg_addr_masks (void) && !complex_p && (m != E_DFmode || !TARGET_VSX) && (m != E_SFmode || !TARGET_P8_VECTOR) + && m != E_HFmode && !small_int_vsx_p) { addr_mask |= RELOAD_REG_PRE_INCDEC; @@ -2952,6 +2962,13 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) rs6000_vector_align[TImode] = align64; } + /* Allow HFmode in VSX register and set the VSX memory macros. */ + if (TARGET_IEEE16) + { + rs6000_vector_mem[HImode] = VECTOR_VSX; + rs6000_vector_align[HFmode] = 16; + } + /* Add support for vector pairs and vector quad registers. */ if (TARGET_MMA) { @@ -3040,6 +3057,12 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load; } + if (TARGET_IEEE16) + { + reg_addr[HFmode].reload_store = CODE_FOR_reload_hf_di_store; + reg_addr[HFmode].reload_load = CODE_FOR_reload_hf_di_load; + } + /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are available. */ if (TARGET_NO_SDMODE_STACK) @@ -3132,6 +3155,12 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load; } + if (TARGET_IEEE16) + { + reg_addr[HFmode].reload_store = CODE_FOR_reload_hf_si_store; + reg_addr[HFmode].reload_load = CODE_FOR_reload_hf_si_load; + } + /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are available. */ if (TARGET_NO_SDMODE_STACK) @@ -12662,6 +12691,9 @@ rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type, && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE))) { + if (TARGET_IEEE16 && mode == HFmode) + return true; + if (TARGET_POWERPC64) { /* ISA 2.07: MTVSRD or MVFVSRD. */ @@ -12679,7 +12711,8 @@ rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type, if (mode == SImode) return true; - if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode)) + if (TARGET_P9_VECTOR + && (mode == HImode || mode == QImode || mode == HFmode)) return true; } @@ -13449,6 +13482,11 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass) || mode_supports_dq_form (mode)) return rclass; + /* IEEE 16-bit don't support offset addressing, but they can go in any + floating point/vector register. */ + if (mode == HFmode && TARGET_IEEE16) + return rclass; + /* If this is a scalar floating point value and we don't have D-form addressing, prefer the traditional floating point registers so that we can use D-form (register+offset) addressing. */ @@ -13678,6 +13716,9 @@ rs6000_can_change_mode_class (machine_mode from, unsigned from_size = GET_MODE_SIZE (from); unsigned to_size = GET_MODE_SIZE (to); + if (from == HFmode || to == HFmode) + return from_size == to_size; + if (from_size != to_size) { enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS; @@ -24291,6 +24332,8 @@ rs6000_scalar_mode_supported_p (scalar_mode mode) return default_decimal_float_supported_p (); else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode)) return true; + else if (mode == HFmode) + return TARGET_IEEE16; else return default_scalar_mode_supported_p (mode); } @@ -24342,6 +24385,9 @@ rs6000_floatn_mode (int n, bool extended) { switch (n) { + case 16: + return TARGET_IEEE16 ? SFmode : opt_scalar_float_mode (); + case 32: return DFmode; @@ -24363,6 +24409,9 @@ rs6000_floatn_mode (int n, bool extended) { switch (n) { + case 16: + return TARGET_IEEE16 ? HFmode : opt_scalar_float_mode (); + case 32: return SFmode; @@ -28903,24 +28952,43 @@ constant_fp_to_128bit_vector (rtx op, const REAL_VALUE_TYPE *rtype = CONST_DOUBLE_REAL_VALUE (op); long real_words[VECTOR_128BIT_WORDS]; - /* Make sure we don't overflow the real_words array and that it is - filled completely. */ - gcc_assert (num_words <= VECTOR_128BIT_WORDS && (bitsize % 32) == 0); + /* For IEEE 16-bit, the constant doesn't fill the whole 32-bit word, so + deal with it here. */ + if (mode == HFmode) + { + real_to_target (real_words, rtype, mode); + unsigned char hi = (unsigned char) (real_words[0] >> 8); + unsigned char lo = (unsigned char) real_words[0]; - real_to_target (real_words, rtype, mode); + if (!BYTES_BIG_ENDIAN) + std::swap (hi, lo); - /* Iterate over each 32-bit word in the floating point constant. The - real_to_target function puts out words in target endian fashion. We need - to arrange the order so that the bytes are written in big endian order. */ - for (unsigned num = 0; num < num_words; num++) + info->bytes[0] = hi; + info->bytes[1] = lo; + } + + else { - unsigned endian_num = (BYTES_BIG_ENDIAN - ? num - : num_words - 1 - num); + /* Make sure we don't overflow the real_words array and that it is filled + completely. */ + gcc_assert (num_words <= VECTOR_128BIT_WORDS && (bitsize % 32) == 0); - unsigned uvalue = real_words[endian_num]; - for (int shift = 32 - 8; shift >= 0; shift -= 8) - info->bytes[byte_num++] = (uvalue >> shift) & 0xff; + real_to_target (real_words, rtype, mode); + + /* Iterate over each 32-bit word in the floating point constant. The + real_to_target function puts out words in target endian fashion. We + need to arrange the order so that the bytes are written in big endian + order. */ + for (unsigned num = 0; num < num_words; num++) + { + unsigned endian_num = (BYTES_BIG_ENDIAN + ? num + : num_words - 1 - num); + + unsigned uvalue = real_words[endian_num]; + for (int shift = 32 - 8; shift >= 0; shift -= 8) + info->bytes[byte_num++] = (uvalue >> shift) & 0xff; + } } /* Mark that this constant involves floating point. */ @@ -28959,6 +29027,7 @@ vec_const_128bit_to_bytes (rtx op, return false; /* Set up the bits. */ + info->mode = mode; switch (GET_CODE (op)) { /* Integer constants, default to double word. */ @@ -29186,6 +29255,10 @@ constant_generates_xxspltiw (vec_const_128bit_type *vsx_const) if (!TARGET_SPLAT_WORD_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX) return 0; + /* HFmode constants can always use XXSPLTIW. */ + if (vsx_const->mode == HFmode) + return 1; + if (!vsx_const->all_words_same) return 0; diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index cffe2750ba9a..31c1d8f613a6 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -567,6 +567,9 @@ extern int rs6000_vector_align[]; below. */ #define RS6000_FN_TARGET_INFO_HTM 1 +/* Support for IEEE 16-bit floating point. */ +#define TARGET_IEEE16 TARGET_P9_VECTOR + /* Whether the various reciprocal divide/square root estimate instructions exist, and whether we should automatically generate code for the instruction by default. */ diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index e31ee40aa870..b4af2eb4b9c0 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -838,7 +838,7 @@ ;; Reload iterator for creating the function to allocate a base register to ;; supplement addressing modes. (define_mode_iterator RELOAD [V16QI V8HI V4SI V2DI V4SF V2DF V1TI - SF SD SI DF DD DI TI PTI KF IF TF + SF SD SI DF DD DI TI PTI KF IF TF HF OO XO]) ;; Iterate over smin, smax @@ -8145,6 +8145,55 @@ p9v, p9v, p9v, p9v, p9v, p9v, p9v, *, *, *")]) + +(define_expand "movhf" + [(set (match_operand:HF 0 "nonimmediate_operand") + (match_operand:HF 1 "any_operand"))] + "TARGET_IEEE16") + +;; On power10, we can load up HFmode constants with xxspltiw. +(define_insn "*movhf_xxspltiw" + [(set (match_operand:HF 0 "vsx_register_operand" "=wa") + (match_operand:HF 1 "ieee16_xxspltiw_constant" "eP"))] + "TARGET_IEEE16 && TARGET_POWER10" +{ + rtx op1 = operands[1]; + const REAL_VALUE_TYPE *rtype = CONST_DOUBLE_REAL_VALUE (op1); + long real_words[VECTOR_128BIT_WORDS]; + + real_to_target (real_words, rtype, HFmode); + operands[2] = GEN_INT (real_words[0]); + return "xxspltiw %x0,%2"; +} + [(set_attr "type" "vecperm") + (set_attr "prefixed" "yes")]) + +(define_insn "*movhf_internal" + [(set (match_operand:HF 0 "nonimmediate_operand" + "=wa, wa, Z, r, r, + m, r, wa, wa, r") + + (match_operand:HF 1 "any_operand" + "wa, Z, wa, r, m, + r, wa, r, j, j"))] + "TARGET_IEEE16 + && (gpc_reg_operand (operands[0], HFmode) + || gpc_reg_operand (operands[1], HFmode))" + "@ + xxlor %x0,%x1,%x1 + lxsiwzx %x0,%y1 + stxsiwx %x1,%y0 + mr %0,%1 + lwz%U1%X1 %0,%1 + stw%U0%X0 %1,%0 + mfvsrwz %0,%x1 + mtvsrwz %x0,%1 + xxspltib %x0,0 + li %0,0" + [(set_attr "type" "vecsimple, fpload, fpstore, *, load, + store, mtvsr, mfvsr, vecsimple, *")]) + + ;; Here is how to move condition codes around. When we store CC data in ;; an integer register or memory, we store just the high-order 4 bits. diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index dd3573b80868..f84fda3677f4 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -3120,6 +3120,25 @@ "xvrdpiz %x0,%x1" [(set_attr "type" "vecdouble")]) + +;; Convert IEEE 16-bit floating point to/from SF and DF modes. + +(define_insn "extendhf<mode>2" + [(set (match_operand:SFDF 0 "vsx_register_operand" "=wa") + (float_extend:SFDF + (match_operand:HF 1 "vsx_register_operand" "wa")))] + "TARGET_IEEE16" + "xscvhpdp %x0,%x1" + [(set_attr "type" "fpsimple")]) + +(define_insn "trunc<mode>hf2" + [(set (match_operand:HF 0 "vsx_register_operand" "=wa") + (float_truncate:HF + (match_operand:SFDF 1 "vsx_register_operand" "wa")))] + "TARGET_IEEE16" + "xscvdphp %x0,%1" + [(set_attr "type" "fpsimple")]) + ;; Permute operations