On Fri, Nov 5, 2021 at 3:20 PM Kong, Lingling via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > Hi, > > This patch is to support fold _mm512_fmadd_pch (a, _mm512_set1_pch(*(b)), c) > to 1 instruction vfmaddcph (%rsp){1to16}, %zmm1, %zmm2. > OK for master? > LGTM. > gcc/ChangeLog: > > * config/i386/sse.md (fma_<complexpairopname>_<mode>_pair): > Add new define_insn. > (fma_<mode>_fmaddc_bcst): Add new define_insn_and_split. > (fma_<mode>_fcmaddc_bcst): Likewise > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/avx512fp16vl-complex-broadcast-1.c: New test. > --- > gcc/config/i386/sse.md | 62 +++++++++++++++++++ > .../i386/avx512fp16vl-complex-broadcast-1.c | 25 ++++++++ > 2 files changed, 87 insertions(+) > create mode 100644 > gcc/testsuite/gcc.target/i386/avx512fp16vl-complex-broadcast-1.c > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index > 0a7f5b178f9..eba8e77515f 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -193,7 +193,9 @@ > > ;; For AVX512FP16 suppport > UNSPEC_COMPLEX_FMA > + UNSPEC_COMPLEX_FMA_PAIR > UNSPEC_COMPLEX_FCMA > + UNSPEC_COMPLEX_FCMA_PAIR > UNSPEC_COMPLEX_FMUL > UNSPEC_COMPLEX_FCMUL > UNSPEC_COMPLEX_MASK > @@ -5913,6 +5915,9 @@ > (define_int_iterator UNSPEC_COMPLEX_F_C_MA > [UNSPEC_COMPLEX_FMA UNSPEC_COMPLEX_FCMA]) > > +(define_int_iterator UNSPEC_COMPLEX_F_C_MA_PAIR > + [UNSPEC_COMPLEX_FMA_PAIR UNSPEC_COMPLEX_FCMA_PAIR]) > + > (define_int_iterator UNSPEC_COMPLEX_F_C_MUL > [UNSPEC_COMPLEX_FMUL UNSPEC_COMPLEX_FCMUL]) > > @@ -5922,6 +5927,10 @@ > (UNSPEC_COMPLEX_FMUL "fmulc") > (UNSPEC_COMPLEX_FCMUL "fcmulc")]) > > +(define_int_attr complexpairopname > + [(UNSPEC_COMPLEX_FMA_PAIR "fmaddc") > + (UNSPEC_COMPLEX_FCMA_PAIR "fcmaddc")]) > + > (define_mode_attr complexmove > [(V32HF "avx512f_loadv16sf") > (V16HF "avx512vl_loadv8sf") > @@ -6067,6 +6076,59 @@ > [(match_dup 1) (match_dup 2) (match_dup 4)] > UNSPEC_COMPLEX_F_C_MA))]) > > +(define_insn "fma_<complexpairopname>_<mode>_pair" > + [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=&v") > + (unspec:VF1_AVX512VL > + [(match_operand:VF1_AVX512VL 1 "vector_operand" "%v") > + (match_operand:VF1_AVX512VL 2 "bcst_vector_operand" "vmBr") > + (match_operand:VF1_AVX512VL 3 "vector_operand" "0")] > + UNSPEC_COMPLEX_F_C_MA_PAIR))] > + "TARGET_AVX512FP16" > + "v<complexpairopname>ph\t{%2, %1, %0|%0, %1, %2}" > + [(set_attr "type" "ssemuladd")]) > + > +(define_insn_and_split "fma_<mode>_fmaddc_bcst" > + [(set (match_operand:VF_AVX512FP16VL 0 "register_operand") > + (unspec:VF_AVX512FP16VL > + [(match_operand:VF_AVX512FP16VL 1 "vector_operand") > + (subreg:VF_AVX512FP16VL > + (match_operand:<ssePSmode> 2 "bcst_vector_operand") 0) > + (match_operand:VF_AVX512FP16VL 3 "vector_operand")] > + UNSPEC_COMPLEX_FMA))] > + "TARGET_AVX512FP16" > + "#" > + "&& 1" > + [(set (match_dup 0) > + (unspec:<ssePSmode> > + [(match_dup 1) (match_dup 2) (match_dup 3)] > + UNSPEC_COMPLEX_FMA_PAIR))] > + { > + operands[0] = lowpart_subreg (<ssePSmode>mode, operands[0], <MODE>mode); > + operands[1] = lowpart_subreg (<ssePSmode>mode, operands[1], <MODE>mode); > + operands[3] = lowpart_subreg (<ssePSmode>mode, operands[3], > +<MODE>mode); > + }) > + > +(define_insn_and_split "fma_<mode>_fcmaddc_bcst" > + [(set (match_operand:VF_AVX512FP16VL 0 "register_operand") > + (unspec:VF_AVX512FP16VL > + [(match_operand:VF_AVX512FP16VL 1 "vector_operand") > + (subreg:VF_AVX512FP16VL > + (match_operand:<ssePSmode> 2 "bcst_vector_operand") 0) > + (match_operand:VF_AVX512FP16VL 3 "vector_operand")] > + UNSPEC_COMPLEX_FCMA))] > + "TARGET_AVX512FP16" > + "#" > + "&& 1" > + [(set (match_dup 0) > + (unspec:<ssePSmode> > + [(match_dup 1) (match_dup 2) (match_dup 3)] > + UNSPEC_COMPLEX_FCMA_PAIR))] > + { > + operands[0] = lowpart_subreg (<ssePSmode>mode, operands[0], <MODE>mode); > + operands[1] = lowpart_subreg (<ssePSmode>mode, operands[1], <MODE>mode); > + operands[3] = lowpart_subreg (<ssePSmode>mode, operands[3], > +<MODE>mode); > + }) > + > (define_insn "<avx512>_<complexopname>_<mode>_mask<round_name>" > [(set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v") > (vec_merge:VF_AVX512FP16VL > diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16vl-complex-broadcast-1.c > b/gcc/testsuite/gcc.target/i386/avx512fp16vl-complex-broadcast-1.c > new file mode 100644 > index 00000000000..3c8e84230f3 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512fp16vl-complex-broadcast-1.c > @@ -0,0 +1,25 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */ > +/* { dg-final { scan-assembler-times "\[^\n\]*\\\{1to4\\\}" 2 } } */ > +/* { dg-final { scan-assembler-times "\[^\n\]*\\\{1to8\\\}" 2 } } */ > +/* { dg-final { scan-assembler-times "\[^\n\]*\\\{1to16\\\}" 2 } } */ > + > +#include <immintrin.h> > + > +volatile __m512h res0, a0, c0; > +volatile __m256h res1, a1, c1; > +volatile __m128h res2, a2, c2; > +volatile _Float16 *b; > + > +void extern > +avx_test(void) > +{ > + res0 = _mm512_fmadd_pch (a0, _mm512_set1_pch(*(b + 2 * 6)), c0); > + res0 = _mm512_fcmadd_pch (a0, _mm512_set1_pch(*(b + 2 * 6)), c0); > + > + res1 = _mm256_fmadd_pch (a1, _mm256_set1_pch(*(b + 2 * 6)), c1); > + res1 = _mm256_fcmadd_pch (a1, _mm256_set1_pch(*(b + 2 * 6)), c1); > + > + res2 = _mm_fmadd_pch (a2, _mm_set1_pch(*(b + 2 * 6)), c2); > + res2 = _mm_fcmadd_pch (a2, _mm_set1_pch(*(b + 2 * 6)), c2); } > -- > 2.18.1 >
-- BR, Hongtao