https://gcc.gnu.org/g:df83b062fdd5258040293929891f6028afeeeee6
commit df83b062fdd5258040293929891f6028afeeeee6 Author: Surya Kumari Jangala <jskum...@linux.ibm.com> Date: Fri Aug 22 00:50:38 2025 -0500 MMA+: Add float16 ger builtins Add builtins __builtin_mma_dmxvf16gerx2, __builtin_mma_dmxvf16gerx2nn, __builtin_mma_dmxvf16gerx2np, __builtin_mma_dmxvf16gerx2pn, __builtin_mma_dmxvf16gerx2pp, __builtin_mma_pmdmxvf16gerx2, __builtin_mma_pmdmxvf16gerx2nn, __builtin_mma_pmdmxvf16gerx2np, __builtin_mma_pmdmxvf16gerx2pn, __builtin_mma_pmdmxvf16gerx2pp Diff: --- gcc/config/rs6000/mma.md | 46 +++++-- gcc/config/rs6000/rs6000-builtins.def | 70 ++++++++++ gcc/testsuite/gcc.target/powerpc/dmf-builtin-2.c | 168 +++++++++++++++++++++++ 3 files changed, 276 insertions(+), 8 deletions(-) diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md index c7ec9d0fb56d..78052822e651 100644 --- a/gcc/config/rs6000/mma.md +++ b/gcc/config/rs6000/mma.md @@ -115,6 +115,16 @@ UNSPEC_DMF_PMDMXVBF16GERX2PN UNSPEC_DMF_PMDMXVBF16GERX2NP UNSPEC_DMF_PMDMXVBF16GERX2NN + UNSPEC_DMF_DMXVF16GERX2 + UNSPEC_DMF_DMXVF16GERX2PP + UNSPEC_DMF_DMXVF16GERX2PN + UNSPEC_DMF_DMXVF16GERX2NP + UNSPEC_DMF_DMXVF16GERX2NN + UNSPEC_DMF_PMDMXVF16GERX2 + UNSPEC_DMF_PMDMXVF16GERX2PP + UNSPEC_DMF_PMDMXVF16GERX2PN + UNSPEC_DMF_PMDMXVF16GERX2NP + UNSPEC_DMF_PMDMXVF16GERX2NN ]) (define_c_enum "unspecv" @@ -159,7 +169,8 @@ ;; DMF instructions with 1 vector pair and 1 vector arguments (define_int_iterator DMF_PV [UNSPEC_DMF_DMXVI8GERX4 - UNSPEC_DMF_DMXVBF16GERX2]) + UNSPEC_DMF_DMXVBF16GERX2 + UNSPEC_DMF_DMXVF16GERX2]) ;; MMA instructions with 1 accumulator, 1 vector pair and 1 vector arguments (define_int_iterator MMA_APV [UNSPEC_MMA_XVF64GERPP @@ -173,7 +184,11 @@ UNSPEC_DMF_DMXVBF16GERX2PP UNSPEC_DMF_DMXVBF16GERX2PN UNSPEC_DMF_DMXVBF16GERX2NP - UNSPEC_DMF_DMXVBF16GERX2NN]) + UNSPEC_DMF_DMXVBF16GERX2NN + UNSPEC_DMF_DMXVF16GERX2PP + UNSPEC_DMF_DMXVF16GERX2PN + UNSPEC_DMF_DMXVF16GERX2NP + UNSPEC_DMF_DMXVF16GERX2NN]) ;; MMA instructions with 2 vector, 2 4-bit and 1 8-bit arguments (define_int_iterator MMA_VVI4I4I8 [UNSPEC_MMA_PMXVI4GER8]) @@ -235,14 +250,19 @@ ;; DMF instructions with 1 vector pair, 1 vector, 1 8-bit, 1 4-bit ;; and 1 2-bit arguments -(define_int_iterator DMF_PVI8I4I2 [UNSPEC_DMF_PMDMXVBF16GERX2]) +(define_int_iterator DMF_PVI8I4I2 [UNSPEC_DMF_PMDMXVBF16GERX2 + UNSPEC_DMF_PMDMXVF16GERX2]) ;; DMF instructions with 1dmr, 1 vector pair, 1 vector, 1 8-bit, ;; 1 4-bit and 1 2-bit arguments (define_int_iterator DMF_DPVI8I4I2 [UNSPEC_DMF_PMDMXVBF16GERX2PP UNSPEC_DMF_PMDMXVBF16GERX2PN UNSPEC_DMF_PMDMXVBF16GERX2NP - UNSPEC_DMF_PMDMXVBF16GERX2NN]) + UNSPEC_DMF_PMDMXVBF16GERX2NN + UNSPEC_DMF_PMDMXVF16GERX2PP + UNSPEC_DMF_PMDMXVF16GERX2PN + UNSPEC_DMF_PMDMXVF16GERX2NP + UNSPEC_DMF_PMDMXVF16GERX2NN]) (define_int_attr acc [(UNSPEC_MMA_XXMFACC "xxmfacc") (UNSPEC_MMA_XXMTACC "xxmtacc")]) @@ -275,7 +295,8 @@ (define_int_attr pv [(UNSPEC_MMA_XVF64GER "xvf64ger") (UNSPEC_DMF_DMXVI8GERX4 "dmxvi8gerx4") - (UNSPEC_DMF_DMXVBF16GERX2 "dmxvbf16gerx2")]) + (UNSPEC_DMF_DMXVBF16GERX2 "dmxvbf16gerx2") + (UNSPEC_DMF_DMXVF16GERX2 "dmxvf16gerx2")]) (define_int_attr apv [(UNSPEC_MMA_XVF64GERPP "xvf64gerpp") (UNSPEC_MMA_XVF64GERPN "xvf64gerpn") @@ -287,7 +308,11 @@ (UNSPEC_DMF_DMXVBF16GERX2PP "dmxvbf16gerx2pp") (UNSPEC_DMF_DMXVBF16GERX2PN "dmxvbf16gerx2pn") (UNSPEC_DMF_DMXVBF16GERX2NP "dmxvbf16gerx2np") - (UNSPEC_DMF_DMXVBF16GERX2NN "dmxvbf16gerx2nn")]) + (UNSPEC_DMF_DMXVBF16GERX2NN "dmxvbf16gerx2nn") + (UNSPEC_DMF_DMXVF16GERX2PP "dmxvf16gerx2pp") + (UNSPEC_DMF_DMXVF16GERX2PN "dmxvf16gerx2pn") + (UNSPEC_DMF_DMXVF16GERX2NP "dmxvf16gerx2np") + (UNSPEC_DMF_DMXVF16GERX2NN "dmxvf16gerx2nn")]) ;; The "pm" prefix is not in these expansions, so that we can generate ;; pmdmxvi4ger8 on systems with dense math registers and xvi4ger8 on systems @@ -336,12 +361,17 @@ (define_int_attr dpvi8i4i4 [(UNSPEC_DMF_PMDMXVI8GERX4PP "pmdmxvi8gerx4pp") (UNSPEC_DMF_PMDMXVI8GERX4SPP "pmdmxvi8gerx4spp")]) -(define_int_attr pvi8i4i2 [(UNSPEC_DMF_PMDMXVBF16GERX2 "pmdmxvbf16gerx2")]) +(define_int_attr pvi8i4i2 [(UNSPEC_DMF_PMDMXVBF16GERX2 "pmdmxvbf16gerx2") + (UNSPEC_DMF_PMDMXVF16GERX2 "pmdmxvf16gerx2")]) (define_int_attr dpvi8i4i2 [(UNSPEC_DMF_PMDMXVBF16GERX2PP "pmdmxvbf16gerx2pp") (UNSPEC_DMF_PMDMXVBF16GERX2PN "pmdmxvbf16gerx2pn") (UNSPEC_DMF_PMDMXVBF16GERX2NP "pmdmxvbf16gerx2np") - (UNSPEC_DMF_PMDMXVBF16GERX2NN "pmdmxvbf16gerx2nn")]) + (UNSPEC_DMF_PMDMXVBF16GERX2NN "pmdmxvbf16gerx2nn") + (UNSPEC_DMF_PMDMXVF16GERX2PP "pmdmxvf16gerx2pp") + (UNSPEC_DMF_PMDMXVF16GERX2PN "pmdmxvf16gerx2pn") + (UNSPEC_DMF_PMDMXVF16GERX2NP "pmdmxvf16gerx2np") + (UNSPEC_DMF_PMDMXVF16GERX2NN "pmdmxvf16gerx2nn")]) ;; Vector pair support. OOmode can only live in VSRs. (define_expand "movoo" diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index 7ba1715b89cd..a107932ca0f7 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -4076,6 +4076,76 @@ const int<4>, const int<2>); PMDMXVBF16GERX2NN_INTERNAL dmf_pmdmxvbf16gerx2nn {dm,pair} + void __builtin_mma_dmxvf16gerx2 (dm1024 *, v256, vuc); + DMXVF16GERX2 nothing {dm,dmint} + + dm1024 __builtin_mma_dmxvf16gerx2_internal (v256, vuc); + DMXVF16GERX2_INTERNAL dmf_dmxvf16gerx2 {dm} + + void __builtin_mma_dmxvf16gerx2pp (dm1024 *, v256, vuc); + DMXVF16GERX2PP nothing {dm,dmint,dmr} + + dm1024 __builtin_mma_dmxvf16gerx2pp_internal (dm1024, v256, vuc); + DMXVF16GERX2PP_INTERNAL dmf_dmxvf16gerx2pp {dm} + + void __builtin_mma_dmxvf16gerx2pn (dm1024 *, v256, vuc); + DMXVF16GERX2PN nothing {dm,dmint,dmr} + + dm1024 __builtin_mma_dmxvf16gerx2pn_internal (dm1024, v256, vuc); + DMXVF16GERX2PN_INTERNAL dmf_dmxvf16gerx2pn {dm} + + void __builtin_mma_dmxvf16gerx2np (dm1024 *, v256, vuc); + DMXVF16GERX2NP nothing {dm,dmint,dmr} + + dm1024 __builtin_mma_dmxvf16gerx2np_internal (dm1024, v256, vuc); + DMXVF16GERX2NP_INTERNAL dmf_dmxvf16gerx2np {dm} + + void __builtin_mma_dmxvf16gerx2nn (dm1024 *, v256, vuc); + DMXVF16GERX2NN nothing {dm,dmint,dmr} + + dm1024 __builtin_mma_dmxvf16gerx2nn_internal (dm1024, v256, vuc); + DMXVF16GERX2NN_INTERNAL dmf_dmxvf16gerx2nn {dm} + + void __builtin_mma_pmdmxvf16gerx2 (dm1024 *, v256, vuc, const int<8>, \ + const int<4>, const int<2>); + PMDMXVF16GERX2 nothing {dm,pair,dmint} + + dm1024 __builtin_mma_pmdmxvf16gerx2_internal (v256, vuc, const int<8>, \ + const int<4>, const int<2>); + PMDMXVF16GERX2_INTERNAL dmf_pmdmxvf16gerx2 {dm,pair} + + void __builtin_mma_pmdmxvf16gerx2pp (dm1024 *, v256, vuc, const int<8>, \ + const int<4>, const int<2>); + PMDMXVF16GERX2PP nothing {dm,pair,dmint,dmr} + + dm1024 __builtin_mma_pmdmxvf16gerx2pp_internal (dm1024, v256, vuc, const int<8>, \ + const int<4>, const int<2>); + PMDMXVF16GERX2PP_INTERNAL dmf_pmdmxvf16gerx2pp {dm,pair} + + void __builtin_mma_pmdmxvf16gerx2pn (dm1024 *, v256, vuc, const int<8>, \ + const int<4>, const int<2>); + PMDMXVF16GERX2PN nothing {dm,pair,dmint,dmr} + + dm1024 __builtin_mma_pmdmxvf16gerx2pn_internal (dm1024, v256, vuc, const int<8>, \ + const int<4>, const int<2>); + PMDMXVF16GERX2PN_INTERNAL dmf_pmdmxvf16gerx2pn {dm,pair} + + void __builtin_mma_pmdmxvf16gerx2np (dm1024 *, v256, vuc, const int<8>, \ + const int<4>, const int<2>); + PMDMXVF16GERX2NP nothing {dm,pair,dmint,dmr} + + dm1024 __builtin_mma_pmdmxvf16gerx2np_internal (dm1024, v256, vuc, const int<8>, \ + const int<4>, const int<2>); + PMDMXVF16GERX2NP_INTERNAL dmf_pmdmxvf16gerx2np {dm,pair} + + void __builtin_mma_pmdmxvf16gerx2nn (dm1024 *, v256, vuc, const int<8>, \ + const int<4>, const int<2>); + PMDMXVF16GERX2NN nothing {dm,pair,dmint,dmr} + + dm1024 __builtin_mma_pmdmxvf16gerx2nn_internal (dm1024, v256, vuc, const int<8>, \ + const int<4>, const int<2>); + PMDMXVF16GERX2NN_INTERNAL dmf_pmdmxvf16gerx2nn {dm,pair} + [future] const signed int __builtin_saturate_subtract32 (signed int, signed int); SAT_SUBSI sat_subsi3 {} diff --git a/gcc/testsuite/gcc.target/powerpc/dmf-builtin-2.c b/gcc/testsuite/gcc.target/powerpc/dmf-builtin-2.c new file mode 100644 index 000000000000..b733de1f6f9c --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/dmf-builtin-2.c @@ -0,0 +1,168 @@ +/* { dg-options "-mdejagnu-cpu=future -O2" } */ + +typedef unsigned char vec_t __attribute__((vector_size(16))); + +void +foo (__dmr1024 *dst, __vector_pair *vpp, vec_t *src) +{ + __dmr1024 dmr; + __vector_pair vp = *vpp; + vec_t vec = *src; + __builtin_mma_dmsetdmrz (&dmr); + __builtin_mma_dmxvf16gerx2 (&dmr, vp, vec); + *dst = dmr; +} + +void +bar (__dmr1024 *dst, __vector_pair *vpp, vec_t *src) +{ + __dmr1024 dmr = dst[0];; + __vector_pair vp = *vpp; + vec_t vec = *src; + __builtin_mma_dmxvf16gerx2 (&dmr, vp, vec); + dst[1] = dmr; +} + +/* { dg-final { scan-assembler-times {\mdmxvf16gerx2\M} 2 } } */ + +void +foo_1 (__dmr1024 *dst, __vector_pair *vpp, vec_t *src) +{ + __dmr1024 dmr; + __vector_pair vp = *vpp; + vec_t vec = *src; + __builtin_mma_dmsetdmrz (&dmr); + __builtin_mma_dmxvf16gerx2nn (&dmr, vp, vec); + *dst = dmr; +} + +void +bar_1 (__dmr1024 *dst, __vector_pair *vpp, vec_t *src) +{ + __dmr1024 dmr = dst[0];; + __vector_pair vp = *vpp; + vec_t vec = *src; + __builtin_mma_dmxvf16gerx2nn (&dmr, vp, vec); + dst[1] = dmr; +} + +/* { dg-final { scan-assembler-times {\mdmxvf16gerx2nn\M} 2 } } */ + +void +foo_2 (__dmr1024 *dst, __vector_pair *vpp, vec_t *src) +{ + __dmr1024 dmr; + __vector_pair vp = *vpp; + vec_t vec = *src; + __builtin_mma_dmsetdmrz (&dmr); + __builtin_mma_dmxvf16gerx2np (&dmr, vp, vec); + *dst = dmr; +} + +void +bar_2 (__dmr1024 *dst, __vector_pair *vpp, vec_t *src) +{ + __dmr1024 dmr = dst[0];; + __vector_pair vp = *vpp; + vec_t vec = *src; + __builtin_mma_dmxvf16gerx2np (&dmr, vp, vec); + dst[1] = dmr; +} + +/* { dg-final { scan-assembler-times {\mdmxvf16gerx2np\M} 2 } } */ + +void +foo_3 (__dmr1024 *dst, __vector_pair *vpp, vec_t *src) +{ + __dmr1024 dmr; + __vector_pair vp = *vpp; + vec_t vec = *src; + __builtin_mma_dmsetdmrz (&dmr); + __builtin_mma_dmxvf16gerx2pn (&dmr, vp, vec); + *dst = dmr; +} + +void +bar_3 (__dmr1024 *dst, __vector_pair *vpp, vec_t *src) +{ + __dmr1024 dmr = dst[0];; + __vector_pair vp = *vpp; + vec_t vec = *src; + __builtin_mma_dmxvf16gerx2pn (&dmr, vp, vec); + dst[1] = dmr; +} + +/* { dg-final { scan-assembler-times {\mdmxvf16gerx2pn\M} 2 } } */ + +void +foo_4 (__dmr1024 *dst, __vector_pair *vpp, vec_t *src) +{ + __dmr1024 dmr; + __vector_pair vp = *vpp; + vec_t vec = *src; + __builtin_mma_dmsetdmrz (&dmr); + __builtin_mma_dmxvf16gerx2pp (&dmr, vp, vec); + *dst = dmr; +} + +void +bar_4 (__dmr1024 *dst, __vector_pair *vpp, vec_t *src) +{ + __dmr1024 dmr = dst[0];; + __vector_pair vp = *vpp; + vec_t vec = *src; + __builtin_mma_dmxvf16gerx2pp (&dmr, vp, vec); + dst[1] = dmr; +} + +/* { dg-final { scan-assembler-times {\mdmxvf16gerx2pp\M} 2 } } */ + +void +foo_5 (__dmr1024 *dst, __vector_pair *vpp, vec_t *src) +{ + __vector_pair vp = *vpp; + vec_t vec = *src; + __builtin_mma_pmdmxvf16gerx2 (dst, vp, vec, 255, 15, 2); +} + +/* { dg-final { scan-assembler-times {\mpmdmxvf16gerx2\M} 1 } } */ + +void +foo_6 (__dmr1024 *dst, __vector_pair *vpp, vec_t *src) +{ + __vector_pair vp = *vpp; + vec_t vec = *src; + __builtin_mma_pmdmxvf16gerx2nn (dst, vp, vec, 255, 15, 2); +} + +/* { dg-final { scan-assembler-times {\mpmdmxvf16gerx2nn\M} 1 } } */ + +void +foo_7 (__dmr1024 *dst, __vector_pair *vpp, vec_t *src) +{ + __vector_pair vp = *vpp; + vec_t vec = *src; + __builtin_mma_pmdmxvf16gerx2np (dst, vp, vec, 255, 15, 2); +} + +/* { dg-final { scan-assembler-times {\mpmdmxvf16gerx2np\M} 1 } } */ + +void +foo_8 (__dmr1024 *dst, __vector_pair *vpp, vec_t *src) +{ + __vector_pair vp = *vpp; + vec_t vec = *src; + __builtin_mma_pmdmxvf16gerx2pn (dst, vp, vec, 255, 15, 2); +} + +/* { dg-final { scan-assembler-times {\mpmdmxvf16gerx2pn\M} 1 } } */ + +void +foo_9 (__dmr1024 *dst, __vector_pair *vpp, vec_t *src) +{ + __vector_pair vp = *vpp; + vec_t vec = *src; + __builtin_mma_pmdmxvf16gerx2pp (dst, vp, vec, 255, 15, 2); +} + +/* { dg-final { scan-assembler-times {\mpmdmxvf16gerx2pp\M} 1 } } */