https://gcc.gnu.org/g:cbcdfe7d9f268b12d00b655695d008e1d5441024
commit cbcdfe7d9f268b12d00b655695d008e1d5441024 Author: Michael Meissner <[email protected]> Date: Mon Nov 10 21:22:52 2025 -0500 Revert changes Diff: --- gcc/ChangeLog.ibm | 17 +-- gcc/config/rs6000/float16.cc | 150 -------------------- gcc/config/rs6000/float16.md | 282 -------------------------------------- gcc/config/rs6000/predicates.md | 76 ---------- gcc/config/rs6000/rs6000-protos.h | 2 - 5 files changed, 5 insertions(+), 522 deletions(-) diff --git a/gcc/ChangeLog.ibm b/gcc/ChangeLog.ibm index 0069b22009c3..ee46756009e9 100644 --- a/gcc/ChangeLog.ibm +++ b/gcc/ChangeLog.ibm @@ -1,15 +1,4 @@ -==================== Branch ibm/gcc-16-future-float16, patch #109 ==================== - -Tell user if we have hardware support for 16-bit floating point. - -2025-11-10 Michael Meissner <[email protected]> - -gcc/ - - * config/rs6000/rs6000-c.cc (rs6000_target_modify_macros); Define - __BFLOAT16_HW__ if we have hardware support for __bflot16 conversions. - Define __FLOAT16_HW__ if we have hardware support for _Float16 - conversions. +==================== Branch ibm/gcc-16-future-float16, patch #109 was reverted ==================== ==================== Branch ibm/gcc-16-future-float16, patch #108 ==================== @@ -158,6 +147,8 @@ gcc/ (xxspltw_<mode>): Likewise. (xvcvbf16spn_bf): Likewise. (xvcvspbf16_bf): Likewise. + * config/rs6000/rs6000-c.cc (rs6000_target_modify_macros): Define + __BFLOAT16_HW__ if we have hardware support for __bfloat16. ==================== Branch ibm/gcc-16-future-float16, patch #103 ==================== @@ -174,6 +165,8 @@ gcc/ (extendhf<mode>2): Add support converting between HFmode and SFmode/DFmoded if we are on power9 or later. (trunc<mode>hf2): Likewise. + * config/rs6000/rs6000-c.cc (rs6000_target_modify_macros): Define + __FLOAT16_HW__ if we have hardware support for _Float16. ==================== Branch ibm/gcc-16-future-float16, patch #102 ==================== diff --git a/gcc/config/rs6000/float16.cc b/gcc/config/rs6000/float16.cc index 2c7b6278a16a..5274a0df962f 100644 --- a/gcc/config/rs6000/float16.cc +++ b/gcc/config/rs6000/float16.cc @@ -183,153 +183,3 @@ fp16_vectorization (enum rtx_code icode, return; } - -/* Expand a bfloat16 scalar floating point operation: - - ICODE: Operation to perform. - RESULT: Result of the operation. - OP1: Input operand1. - OP2: Input operand2. - OP3: Input operand3 or NULL_RTX. - SUBTYPE: Describe the operation. - - The operation is done as a V4SFmode vector operation. This is because - converting BFmode from a scalar BFmode to SFmode to do the operation and - back again takes quite a bit of time. GCC will only generate the native - operation if -Ofast is used. The float16.md code that calls this function - adds various combine operations to do the operation in V4SFmode instead of - SFmode. */ - -void -bfloat16_operation_as_v4sf (enum rtx_code icode, - rtx result, - rtx op1, - rtx op2, - rtx op3, - enum fp16_operation subtype) -{ - gcc_assert (can_create_pseudo_p ()); - - rtx result_v4sf = gen_reg_rtx (V4SFmode); - rtx ops_orig[3] = { op1, op2, op3 }; - rtx ops_v4sf[3]; - size_t n_opts; - - switch (subtype) - { - case FP16_BINARY: - n_opts = 2; - gcc_assert (op3 == NULL_RTX); - break; - - case FP16_FMA: - case FP16_FMS: - case FP16_NFMA: - case FP16_NFMS: - gcc_assert (icode == FMA); - n_opts = 3; - break; - - default: - gcc_unreachable (); - } - - for (size_t i = 0; i < n_opts; i++) - { - rtx op = ops_orig[i]; - rtx tmp = ops_v4sf[i] = gen_reg_rtx (V4SFmode); - - gcc_assert (op != NULL_RTX); - - /* Remove truncation/extend added. */ - if (GET_CODE (op) == FLOAT_EXTEND || GET_CODE (op) == FLOAT_TRUNCATE) - op = XEXP (op, 0); - - /* Convert operands to V4SFmode format. We use SPLAT for registers to - get the value into the upper 32-bits. We can use XXSPLTW to splat - words instead of VSPLTIH since the XVCVBF16SPN instruction ignores the - odd half-words, and XXSPLTW can operate on all VSX registers instead - of just the Altivec registers. Using SPLAT instead of a shift also - insure that other bits are not a signalling NaN. If we are using - XXSPLTIW or XXSPLTIB to load the constant the other bits are - duplicated. */ - - if (op == CONST0_RTX (SFmode) || op == CONST0_RTX (BFmode)) - emit_move_insn (tmp, CONST0_RTX (V4SFmode)); - - else if (GET_MODE (op) == BFmode) - { - emit_insn (gen_xxspltw_bf (tmp, force_reg (BFmode, op))); - emit_insn (gen_xvcvbf16spn_bf (tmp, tmp)); - } - - else if (GET_MODE (op) == SFmode) - { - if (GET_CODE (op) == CONST_DOUBLE) - { - rtvec v = rtvec_alloc (4); - - for (size_t i = 0; i < 4; i++) - RTVEC_ELT (v, i) = op; - - emit_insn (gen_rtx_SET (tmp, - gen_rtx_CONST_VECTOR (V4SFmode, v))); - } - - else - emit_insn (gen_vsx_splat_v4sf (tmp, - force_reg (SFmode, op))); - } - - else - gcc_unreachable (); - } - - /* Do the operation in V4SFmode. */ - switch (subtype) - { - case FP16_BINARY: - emit_insn (gen_rtx_SET (result_v4sf, - gen_rtx_fmt_ee (icode, V4SFmode, - ops_v4sf[0], - ops_v4sf[1]))); - break; - - case FP16_FMA: - case FP16_FMS: - case FP16_NFMA: - case FP16_NFMS: - { - rtx op1 = ops_v4sf[0]; - rtx op2 = ops_v4sf[1]; - rtx op3 = ops_v4sf[2]; - - if (subtype == FP16_FMS || subtype == FP16_NFMS) - op3 = gen_rtx_NEG (V4SFmode, op3); - - rtx op_fma = gen_rtx_FMA (V4SFmode, op1, op2, op3); - - if (subtype == FP16_NFMA || subtype == FP16_NFMS) - op_fma = gen_rtx_NEG (V4SFmode, op_fma); - - emit_insn (gen_rtx_SET (result_v4sf, op_fma)); - } - break; - - default: - gcc_unreachable (); - } - - /* Convert V4SF result back to scalar mode. */ - if (GET_MODE (result) == BFmode) - emit_insn (gen_xvcvspbf16_bf (result, result_v4sf)); - - else if (GET_MODE (result) == SFmode) - { - rtx element = GEN_INT (WORDS_BIG_ENDIAN ? 2 : 3); - emit_insn (gen_vsx_extract_v4sf (result, result_v4sf, element)); - } - - else - gcc_unreachable (); -} diff --git a/gcc/config/rs6000/float16.md b/gcc/config/rs6000/float16.md index cb8de4eb4c2f..55cca8fa7e19 100644 --- a/gcc/config/rs6000/float16.md +++ b/gcc/config/rs6000/float16.md @@ -701,288 +701,6 @@ %q3 %0,%1,%2" [(set_attr "type" "veclogical,logical")]) -;; Optimize __bfloat16 binary operations. Unlike _Float16 where we -;; have instructions to convert between HFmode and SFmode as scalar -;; values, with BFmode, we only have vector conversions. Thus to do: -;; -;; __bfloat16 a, b, c; -;; a = b + c; -;; -;; the GCC compiler would normally generate: -;; -;; lxsihzx 0,4,2 // load __bfloat16 value b -;; lxsihzx 12,5,2 // load __bfloat16 value c -;; xxsldwi 0,0,0,1 // shift b into bits 16..31 -;; xxsldwi 12,12,12,1 // shift c into bits 16..31 -;; xvcvbf16spn 0,0 // vector convert b into V4SFmode -;; xvcvbf16spn 12,12 // vector convert c into V4SFmode -;; xscvspdpn 0,0 // convert b into SFmode scalar -;; xscvspdpn 12,12 // convert c into SFmode scalar -;; fadds 0,0,12 // add b+c -;; xscvdpspn 0,0 // convert b+c into SFmode memory format -;; xvcvspbf16 0,0 // convert b+c into BFmode memory format -;; stxsihx 0,3,2 // store b+c -;; -;; Using the following combiner patterns, the code generated would now -;; be: -;; -;; lxsihzx 12,4,2 // load __bfloat16 value b -;; lxsihzx 0,5,2 // load __bfloat16 value c -;; xxspltw 12,12,1 // shift b into bits 16..31 -;; xxspltw 0,0,1 // shift c into bits 16..31 -;; xvcvbf16spn 12,12 // vector convert b into V4SFmode -;; xvcvbf16spn 0,0 // vector convert c into V4SFmode -;; xvaddsp 0,0,12 // vector b+c in V4SFmode -;; xvcvspbf16 0,0 // convert b+c into BFmode memory format -;; stxsihx 0,3,2 // store b+c -;; -;; We cannot just define insns like 'addbf3' to keep the operation as -;; BFmode because GCC will not generate these patterns unless the user -;; uses -Ofast. Without -Ofast, it will always convert BFmode into -;; SFmode. - -(define_insn_and_split "*bfloat16_binary_op_internal1" - [(set (match_operand:SF 0 "vsx_register_operand") - (match_operator:SF 1 "fp16_binary_operator" - [(match_operand:SF 2 "bfloat16_v4sf_operand") - (match_operand:SF 3 "bfloat16_v4sf_operand")]))] - "TARGET_BFLOAT16_HW && can_create_pseudo_p () - && (bfloat16_bf_operand (operands[2], SFmode) - || bfloat16_bf_operand (operands[3], SFmode))" - "#" - "&& 1" - [(pc)] -{ - bfloat16_operation_as_v4sf (GET_CODE (operands[1]), operands[0], operands[2], - operands[3], NULL_RTX, FP16_BINARY); - DONE; -}) - -(define_insn_and_split "*bfloat16_binary_op_internal2" - [(set (match_operand:BF 0 "vsx_register_operand") - (float_truncate:BF - (match_operator:SF 1 "fp16_binary_operator" - [(match_operand:SF 2 "bfloat16_v4sf_operand") - (match_operand:SF 3 "bfloat16_v4sf_operand")])))] - "TARGET_BFLOAT16_HW && can_create_pseudo_p () - && (bfloat16_bf_operand (operands[2], SFmode) - || bfloat16_bf_operand (operands[3], SFmode))" - "#" - "&& 1" - [(pc)] -{ - bfloat16_operation_as_v4sf (GET_CODE (operands[1]), operands[0], operands[2], - operands[3], NULL_RTX, FP16_BINARY); - DONE; -}) - -(define_insn_and_split "*bfloat16_fma_internal1" - [(set (match_operand:SF 0 "vsx_register_operand") - (fma:SF - (match_operand:SF 1 "bfloat16_v4sf_operand") - (match_operand:SF 2 "bfloat16_v4sf_operand") - (match_operand:SF 3 "bfloat16_v4sf_operand")))] - "TARGET_BFLOAT16_HW && can_create_pseudo_p () - && (bfloat16_bf_operand (operands[1], SFmode) - + bfloat16_bf_operand (operands[2], SFmode) - + bfloat16_bf_operand (operands[3], SFmode) >= 2)" - "#" - "&& 1" - [(pc)] -{ - bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_FMA); - DONE; -}) - -(define_insn_and_split "*bfloat16_fma_internal2" - [(set (match_operand:BF 0 "vsx_register_operand" "=wa") - (float_truncate:BF - (fma:SF - (match_operand:SF 1 "bfloat16_v4sf_operand") - (match_operand:SF 2 "bfloat16_v4sf_operand") - (match_operand:SF 3 "bfloat16_v4sf_operand"))))] - "TARGET_BFLOAT16_HW && can_create_pseudo_p () - && (bfloat16_bf_operand (operands[1], SFmode) - + bfloat16_bf_operand (operands[2], SFmode) - + bfloat16_bf_operand (operands[3], SFmode) >= 2)" - "#" - "&& 1" - [(pc)] -{ - bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_FMA); - DONE; -}) - -(define_insn_and_split "*bfloat16_fms_internal1" - [(set (match_operand:SF 0 "vsx_register_operand") - (fma:SF - (match_operand:SF 1 "bfloat16_v4sf_operand") - (match_operand:SF 2 "bfloat16_v4sf_operand") - (neg:SF - (match_operand:SF 3 "bfloat16_v4sf_operand"))))] - "TARGET_BFLOAT16_HW && can_create_pseudo_p () - && (bfloat16_bf_operand (operands[1], SFmode) - + bfloat16_bf_operand (operands[2], SFmode) - + bfloat16_bf_operand (operands[3], SFmode) >= 2)" - "#" - "&& 1" - [(pc)] -{ - bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_FMS); - DONE; -}) - -(define_insn_and_split "*bfloat16_fms_interna2" - [(set (match_operand:BF 0 "vsx_register_operand") - (float_truncate:BF - (fma:SF - (match_operand:SF 1 "bfloat16_v4sf_operand") - (match_operand:SF 2 "bfloat16_v4sf_operand") - (neg:SF - (match_operand:SF 3 "bfloat16_v4sf_operand")))))] - "TARGET_BFLOAT16_HW && can_create_pseudo_p () - && (bfloat16_bf_operand (operands[1], SFmode) - + bfloat16_bf_operand (operands[2], SFmode) - + bfloat16_bf_operand (operands[3], SFmode) >= 2)" - "#" - "&& 1" - [(pc)] -{ - bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_FMS); - DONE; -}) - -(define_insn_and_split "*bfloat16_nfma_internal1" - [(set (match_operand:SF 0 "vsx_register_operand") - (neg:SF - (fma:SF - (match_operand:SF 1 "bfloat16_v4sf_operand") - (match_operand:SF 2 "bfloat16_v4sf_operand") - (match_operand:SF 3 "bfloat16_v4sf_operand"))))] - "TARGET_BFLOAT16_HW && can_create_pseudo_p () - && (bfloat16_bf_operand (operands[1], SFmode) - + bfloat16_bf_operand (operands[2], SFmode) - + bfloat16_bf_operand (operands[3], SFmode) >= 2)" - "#" - "&& 1" - [(pc)] -{ - bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_NFMA); - DONE; -}) - -(define_insn_and_split "*bfloat16_nfma_internal2" - [(set (match_operand:BF 0 "vsx_register_operand" "=wa") - (float_truncate:BF - (neg:SF - (fma:SF - (match_operand:SF 1 "bfloat16_v4sf_operand") - (match_operand:SF 2 "bfloat16_v4sf_operand") - (match_operand:SF 3 "bfloat16_v4sf_operand")))))] - "TARGET_BFLOAT16_HW && can_create_pseudo_p () - && (bfloat16_bf_operand (operands[1], SFmode) - + bfloat16_bf_operand (operands[2], SFmode) - + bfloat16_bf_operand (operands[3], SFmode) >= 2)" - "#" - "&& 1" - [(pc)] -{ - bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_NFMA); - DONE; -}) - -(define_insn_and_split "*bfloat16_nfma_internal3" - [(set (match_operand:BF 0 "vsx_register_operand" "=wa") - (neg:BF - (float_truncate:BF - (fma:SF - (match_operand:SF 1 "bfloat16_v4sf_operand") - (match_operand:SF 2 "bfloat16_v4sf_operand") - (match_operand:SF 3 "bfloat16_v4sf_operand")))))] - "TARGET_BFLOAT16_HW && can_create_pseudo_p () - && (bfloat16_bf_operand (operands[1], SFmode) - + bfloat16_bf_operand (operands[2], SFmode) - + bfloat16_bf_operand (operands[3], SFmode) >= 2)" - "#" - "&& 1" - [(pc)] -{ - bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_NFMA); - DONE; -}) - -(define_insn_and_split "*bfloat16_nfms_internal1" - [(set (match_operand:SF 0 "vsx_register_operand") - (neg:SF - (fma:SF - (match_operand:SF 1 "bfloat16_v4sf_operand") - (match_operand:SF 2 "bfloat16_v4sf_operand") - (neg:SF - (match_operand:SF 3 "bfloat16_v4sf_operand")))))] - "TARGET_BFLOAT16_HW && can_create_pseudo_p () - && (bfloat16_bf_operand (operands[1], SFmode) - + bfloat16_bf_operand (operands[2], SFmode) - + bfloat16_bf_operand (operands[3], SFmode) >= 2)" - "#" - "&& 1" - [(pc)] -{ - bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_NFMS); - DONE; -}) - -(define_insn_and_split "*bfloat16_nfms_internal2" - [(set (match_operand:BF 0 "vsx_register_operand") - (float_truncate:BF - (neg:SF - (fma:SF - (match_operand:SF 1 "bfloat16_v4sf_operand") - (match_operand:SF 2 "bfloat16_v4sf_operand") - (neg:SF - (match_operand:SF 3 "bfloat16_v4sf_operand"))))))] - "TARGET_BFLOAT16_HW && can_create_pseudo_p () - && (bfloat16_bf_operand (operands[1], SFmode) - + bfloat16_bf_operand (operands[2], SFmode) - + bfloat16_bf_operand (operands[3], SFmode) >= 2)" - "#" - "&& 1" - [(pc)] -{ - bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_NFMS); - DONE; -}) - -(define_insn_and_split "*bfloat16_nfms_internal3" - [(set (match_operand:BF 0 "vsx_register_operand") - (neg:BF - (float_truncate:BF - (fma:SF - (match_operand:SF 1 "bfloat16_v4sf_operand") - (match_operand:SF 2 "bfloat16_v4sf_operand") - (neg:SF - (match_operand:SF 3 "bfloat16_v4sf_operand"))))))] - "TARGET_BFLOAT16_HW && can_create_pseudo_p () - && (bfloat16_bf_operand (operands[1], SFmode) - + bfloat16_bf_operand (operands[2], SFmode) - + bfloat16_bf_operand (operands[3], SFmode) >= 2)" - "#" - "&& 1" - [(pc)] -{ - bfloat16_operation_as_v4sf (FMA, operands[0], operands[1], operands[2], - operands[3], FP16_NFMS); - DONE; -}) - ;; Add vectorization support for 16-bit floating point. ;; Binary operators being vectorized. diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 41fcae44c84a..7df6f5bcb00d 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -2224,79 +2224,3 @@ return constant_generates_xxspltiw (&vsx_const); }) - -;; Return 1 if this is a 16-bit floating point operand that can be used -;; in an add, subtract, or multiply operation that uses the vector -;; conversion function. -(define_predicate "fp16_reg_or_constant_operand" - (match_code "reg,subreg,const_double") -{ - if (REG_P (op) || SUBREG_P (op)) - return vsx_register_operand (op, mode); - - if (CONST_DOUBLE_P (op)) - return fp16_xxspltiw_constant (op, mode); - - return false; -}) - -;; Match binary operators where we convert a BFmode operand into a -;; SFmode operand so that we can optimize the BFmode operation to do -;; the operation in vector mode rather than convverting the BFmode to a -;; V8BFmode vector, converting that V8BFmode vector to V4SFmode, and -;; then converting the V4SFmode element to SFmode scalar. -(define_predicate "fp16_binary_operator" - (match_code "plus,minus,mult,smax,smin")) - -;; Match bfloat16/float operands that can be optimized to do the -;; operation in V4SFmode. -(define_predicate "bfloat16_v4sf_operand" - (match_code "reg,subreg,const_double,float_extend,float_truncate") -{ - if (mode != BFmode && mode != SFmode) - return false; - - if (REG_P (op) || SUBREG_P (op)) - return register_operand (op, mode); - - if (CONST_DOUBLE_P (op)) - return true; - - if (GET_CODE (op) == FLOAT_EXTEND) - { - rtx op_arg = XEXP (op, 0); - return (mode == SFmode - && GET_MODE (op_arg) == BFmode - && (REG_P (op_arg) || SUBREG_P (op_arg))); - } - - if (GET_CODE (op) == FLOAT_TRUNCATE) - { - rtx op_arg = XEXP (op, 0); - return (mode == BFmode - && GET_MODE (op_arg) == SFmode - && (REG_P (op_arg) || SUBREG_P (op_arg))); - } - - return false; -}) - -;; Match an operand that originally was an BFmode value to prevent -;; operations involing only SFmode values from being converted to -;; BFmode. -(define_predicate "bfloat16_bf_operand" - (match_code "reg,subreg,const_double,float_extend") -{ - if (mode == BFmode || GET_MODE (op) == BFmode) - return true; - - if (mode != SFmode) - return false; - - if (GET_MODE (op) == SFmode - && GET_CODE (op) == FLOAT_EXTEND - && GET_MODE (XEXP (op, 0)) == BFmode) - return true; - - return false; -}) diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 7499b0db4972..e8ad7d637c90 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -271,8 +271,6 @@ enum fp16_operation { extern void fp16_vectorization (enum rtx_code, rtx, rtx, rtx, rtx, enum fp16_operation); -extern void bfloat16_operation_as_v4sf (enum rtx_code, rtx, rtx, rtx, rtx, - enum fp16_operation); #endif /* RTX_CODE */ #ifdef TREE_CODE
