https://gcc.gnu.org/g:78401db6cfe4db4f5f1bf1a0d114102fecb2e8eb
commit 78401db6cfe4db4f5f1bf1a0d114102fecb2e8eb Author: Michael Meissner <[email protected]> Date: Sat Oct 25 02:40:21 2025 -0400 Revert changes Diff: --- gcc/ChangeLog.float | 19 +--------- gcc/config/rs6000/float16.md | 89 ++++++++++++++++++++++---------------------- gcc/config/rs6000/rs6000.opt | 2 +- 3 files changed, 46 insertions(+), 64 deletions(-) diff --git a/gcc/ChangeLog.float b/gcc/ChangeLog.float index 44172798ed86..56ddfdbb61e1 100644 --- a/gcc/ChangeLog.float +++ b/gcc/ChangeLog.float @@ -1,21 +1,4 @@ -==================== Branch work223-float, patch #315 ==================== - -Just use VSLD to convert bfloat16 to SF/DF. - -2025-10-25 Michael Meissner <[email protected]> - -gcc/ - - * config/rs6000/float16.md (UNSPEC_FP16_SHIFT_LEFT_32BIT): Delete. - (UNSPEC_VSLD_BF): New unspec. - (extendbf<mode>2): Rewrite to avoid doing xscvspdpnp. - (extendbf<mode>2_internal): Likewise. - (xscvdpspn_sf): Likewise. - (shift_left_bf): Likewise. - (xscvspdpn_<mode>): Likewise. - ("<fp16_vector8>_shift_left_32bit): Likewise. - (xscvdpspn_sf): Likewise. - * config/rs6000/rs6000.md (-mbfloat16-combine): Default to 0. +==================== Branch work223-float, patch #315 was reverted ==================== ==================== Branch work223-float, patch #314 ==================== diff --git a/gcc/config/rs6000/float16.md b/gcc/config/rs6000/float16.md index 040e1bc89f5d..1365086f5751 100644 --- a/gcc/config/rs6000/float16.md +++ b/gcc/config/rs6000/float16.md @@ -81,7 +81,7 @@ ;; UNSPEC constants (define_c_enum "unspec" - [UNSPEC_VSLD_BF + [UNSPEC_FP16_SHIFT_LEFT_32BIT UNSPEC_CVT_FP16_TO_V4SF UNSPEC_XXSPLTW_FP16 UNSPEC_XVCVSPBF16_BF @@ -298,27 +298,14 @@ ;; Convert BFmode to SFmode/DFmode. ;; 3 instructions are generated: -;; PLXSD -- load up shift amount -;; VSLD -- shift BF left 48 bits +;; VSPLTH -- duplicate BFmode into all elements +;; XVCVBF16SPN -- convert even BFmode elements to SFmode ;; XSCVSPNDP -- convert memory format of SFmode to DFmode. - -(define_expand "extendbf<mode>2" - [(parallel [(set (match_operand:SFDF 0 "vsx_register_operand" "=wa") - (float_extend:SFDF - (match_operand:BF 1 "altivec_register_operand" "v"))) - (use (match_dup 2)) - (clobber (match_scratch:DI 3 "=&v"))])] - "TARGET_BFLOAT16_HW" -{ - operands[2] = force_reg (DImode, GEN_INT (48)); -}) - -(define_insn_and_split "*extendbf<mode>2_internal" +(define_insn_and_split "extendbf<mode>2" [(set (match_operand:SFDF 0 "vsx_register_operand" "=wa") (float_extend:SFDF - (match_operand:BF 1 "altivec_register_operand" "v"))) - (use (match_operand:DI 2 "altivec_register_operand" "v")) - (clobber (match_scratch:DI 3 "=&v"))] + (match_operand:BF 1 "vsx_register_operand" "v"))) + (clobber (match_scratch:V8BF 2 "=v"))] "TARGET_BFLOAT16_HW" "#" "&& 1" @@ -326,48 +313,60 @@ { rtx op0 = operands[0]; rtx op1 = operands[1]; - rtx op2 = operands[2]; - rtx op3 = operands[2]; + rtx op2_v8bf = operands[2]; + + if (GET_CODE (op2_v8bf) == SCRATCH) + op2_v8bf = gen_reg_rtx (V8BFmode); - if (GET_CODE (op3) == SCRATCH) - op3 = gen_reg_rtx (DImode); + rtx op2_v4sf = gen_lowpart (V4SFmode, op2_v8bf); - /* Shift BFmode into the upper 16 bits. */ - emit_insn (gen_shift_left_bf (op3, op1, op2)); + /* XXSLDWI -- shift BFmode element into the upper 32 bits. */ + emit_insn (gen_v8bf_shift_left_32bit (op2_v8bf, op1)); - /* XXSLDWI -- shift BFmode element into the upper 16 bits. */ - emit_insn (gen_shift_left_bf (op3, op1, op2)); + /* XVCVBF16SPN -- convert even V8BFmode elements to V4SFmode. */ + emit_insn (gen_cvt_fp16_to_v4sf_v8bf (op2_v4sf, op2_v8bf)); - /* XSCVSPDPN -- convert single V4SFmode element to DFmode. */ + /* XSCVSPNDP -- convert single V4SFmode element to DFmode. */ emit_insn (GET_MODE (op0) == SFmode - ? gen_xscvspdpn_sf (op0, op3) - : gen_xscvspdpn_df (op0, op3)); + ? gen_xscvspdpn_sf (op0, op2_v4sf) + : gen_vsx_xscvspdpn (op0, op2_v4sf)); DONE; } [(set_attr "type" "fpsimple") (set_attr "length" "12")]) -;; Shift BFmode left -(define_insn "shift_left_bf" - [(set (match_operand:DI 0 "altivec_register_operand" "=v") - (unspec:DI [(match_operand:BF 1 "altivec_register_operand" "v") - (match_operand:DI 2 "altivec_register_operand" "v")] - UNSPEC_VSLD_BF))] - "TARGET_BFLOAT16" - "vsld %0,%1,%2" - [(set_attr "type" "vecsimple")]) +;; Convert a SFmode scalar represented as DFmode to elements 0 and 1 of +;; V4SFmode. +(define_insn "xscvdpspn_sf" + [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") + (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_CVSPDP))] + "VECTOR_UNIT_VSX_P (SFmode)" + "xscvdpspn %x0,%x1" + [(set_attr "type" "fp")]) ;; Convert element 0 of a V4SFmode to scalar SFmode (which on the ;; PowerPC uses the DFmode encoding). -(define_insn "xscvspdpn_<mode>" - [(set (match_operand:SFDF 0 "vsx_register_operand" "=wa") - (unspec:SFDF [(match_operand:DI 1 "vsx_register_operand" "wa")] - UNSPEC_VSX_CVSPDPN))] - "TARGET_BFLOAT16" +(define_insn "xscvspdpn_sf" + [(set (match_operand:SF 0 "vsx_register_operand" "=wa") + (unspec:SF [(match_operand:V4SF 1 "vsx_register_operand" "wa")] + UNSPEC_VSX_CVSPDPN))] + "TARGET_XSCVSPDPN" "xscvspdpn %x0,%x1" [(set_attr "type" "fp")]) +;; Vector shift left by 32 bits to get the 16-bit floating point value +;; into the upper 32 bits for the conversion. +(define_insn "<fp16_vector8>_shift_left_32bit" + [(set (match_operand:<FP16_VECTOR8> 0 "vsx_register_operand" "=wa") + (unspec:<FP16_VECTOR8> + [(match_operand:FP16_HW 1 "vsx_register_operand" "wa")] + UNSPEC_FP16_SHIFT_LEFT_32BIT))] + "" + "xxsldwi %x0,%x1,%x1,1" + [(set_attr "type" "vecperm")]) + ;; Convert SFmode/DFmode to BFmode. ;; 2 instructions are generated: ;; XSCVDPSPN -- convert SFmode/DFmode scalar to V4SFmode @@ -399,7 +398,7 @@ } [(set_attr "type" "fpsimple")]) -(define_insn "xscvdpspn_sf" +(define_insn "vsx_xscvdpspn_sf" [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "wa")] UNSPEC_VSX_CVDPSPN))] diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index 7bae64f2405d..053183b004c6 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -655,7 +655,7 @@ Target Mask(BFLOAT16) Var(rs6000_isa_flags) Enable or disable __bfloat16 support. mbfloat16-combine -Target Undocumented Var(TARGET_BFLOAT16_COMBINE) Init(0) Save +Target Undocumented Var(TARGET_BFLOAT16_COMBINE) Init(1) Save Enable or disable __bfloat16 combine optimizations mbfloat16-pack
