On Wed, Mar 5, 2025 at 3:23 PM Haochen Jiang <haochen.ji...@intel.com> wrote: > > Hi all, > > For bf8 -> pf16 convert, when dst is 256 bit, the mask should be > 16 bit since 16*16=256, not the 8 bit in the current intrin. In > 512 bit intrin, the mask bit is also halved. This patch will fix > both of them. > > Ok for trunk? Ok. > > Thx, > Haochen > > gcc/ChangeLog: > > * config/i386/avx10_2-512convertintrin.h > (_mm512_mask_cvtbf8_ph): Correct mask width. > (_mm512_maskz_cvtbf8_ph): Ditto. > * config/i386/avx10_2convertintrin.h > (_mm256_mask_cvtbf8_ph): Ditto. > (_mm256_maskz_cvtbf8_ph): Ditto. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/avx10_2-512-convert-1.c: Change function call. > * gcc.target/i386/avx10_2-convert-1.c: Ditto. > --- > gcc/config/i386/avx10_2-512convertintrin.h | 4 ++-- > gcc/config/i386/avx10_2convertintrin.h | 4 ++-- > gcc/testsuite/gcc.target/i386/avx10_2-512-convert-1.c | 4 ++-- > gcc/testsuite/gcc.target/i386/avx10_2-convert-1.c | 4 ++-- > 4 files changed, 8 insertions(+), 8 deletions(-) > > diff --git a/gcc/config/i386/avx10_2-512convertintrin.h > b/gcc/config/i386/avx10_2-512convertintrin.h > index 1079e0a2bda..a44481e0b4e 100644 > --- a/gcc/config/i386/avx10_2-512convertintrin.h > +++ b/gcc/config/i386/avx10_2-512convertintrin.h > @@ -550,7 +550,7 @@ _mm512_cvtbf8_ph (__m256i __A) > > extern __inline __m512h > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > -_mm512_mask_cvtbf8_ph (__m512h __S, __mmask16 __U, __m256i __A) > +_mm512_mask_cvtbf8_ph (__m512h __S, __mmask32 __U, __m256i __A) > { > return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_mask_slli_epi16 ( > (__m512i) __S, __U, (__m512i) _mm512_cvtepi8_epi16 (__A), 8)); > @@ -558,7 +558,7 @@ _mm512_mask_cvtbf8_ph (__m512h __S, __mmask16 __U, > __m256i __A) > > extern __inline __m512h > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > -_mm512_maskz_cvtbf8_ph (__mmask16 __U, __m256i __A) > +_mm512_maskz_cvtbf8_ph (__mmask32 __U, __m256i __A) > { > return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_slli_epi16 ( > (__m512i) _mm512_maskz_cvtepi8_epi16 (__U, __A), 8)); > diff --git a/gcc/config/i386/avx10_2convertintrin.h > b/gcc/config/i386/avx10_2convertintrin.h > index 3fc51b17435..7c9c238a3b4 100644 > --- a/gcc/config/i386/avx10_2convertintrin.h > +++ b/gcc/config/i386/avx10_2convertintrin.h > @@ -1004,7 +1004,7 @@ _mm256_cvtbf8_ph (__m128i __A) > > extern __inline __m256h > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > -_mm256_mask_cvtbf8_ph (__m256h __S, __mmask8 __U, __m128i __A) > +_mm256_mask_cvtbf8_ph (__m256h __S, __mmask16 __U, __m128i __A) > { > return (__m256h) _mm256_castsi256_ph ((__m256i) _mm256_mask_slli_epi16 ( > (__m256i) __S, __U, (__m256i) _mm256_cvtepi8_epi16 (__A), 8)); > @@ -1012,7 +1012,7 @@ _mm256_mask_cvtbf8_ph (__m256h __S, __mmask8 __U, > __m128i __A) > > extern __inline __m256h > __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) > -_mm256_maskz_cvtbf8_ph (__mmask8 __U, __m128i __A) > +_mm256_maskz_cvtbf8_ph (__mmask16 __U, __m128i __A) > { > return (__m256h) _mm256_castsi256_ph ((__m256i) _mm256_slli_epi16 ( > (__m256i) _mm256_maskz_cvtepi8_epi16 (__U, __A), 8)); > diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-convert-1.c > b/gcc/testsuite/gcc.target/i386/avx10_2-512-convert-1.c > index bda74b5776b..c1e44efdb2f 100644 > --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-convert-1.c > +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-convert-1.c > @@ -183,6 +183,6 @@ void extern > avx10_2_512_cvtbf8_fp16_test (void) > { > y = _mm512_cvtbf8_ph (z1); > - y = _mm512_mask_cvtbf8_ph (z, m16, z1); > - y = _mm512_maskz_cvtbf8_ph (m16, z1); > + y = _mm512_mask_cvtbf8_ph (z, m32, z1); > + y = _mm512_maskz_cvtbf8_ph (m32, z1); > } > diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-convert-1.c > b/gcc/testsuite/gcc.target/i386/avx10_2-convert-1.c > index 57b5fce7fb6..729496f7173 100644 > --- a/gcc/testsuite/gcc.target/i386/avx10_2-convert-1.c > +++ b/gcc/testsuite/gcc.target/i386/avx10_2-convert-1.c > @@ -289,6 +289,6 @@ avx10_2_cvtbf8_fp16_test (void) > y = _mm_maskz_cvtbf8_ph (m8, z3); > > y2 = _mm256_cvtbf8_ph (z3); > - y2 = _mm256_mask_cvtbf8_ph (z2, m8, z3); > - y2 = _mm256_maskz_cvtbf8_ph (m8, z3); > + y2 = _mm256_mask_cvtbf8_ph (z2, m16, z3); > + y2 = _mm256_maskz_cvtbf8_ph (m16, z3); > } > -- > 2.31.1 >
-- BR, Hongtao