On Fri, Nov 22, 2024 at 4:08 PM Haochen Jiang <haochen.ji...@intel.com> wrote: > > Hi all, > > Under FP8, we should not use AVX512F_LEN_HALF to get the mask size since > it will get 16 instead of 8 and drop into wrong if condition. Correct > the usage for vcvtneph2[b,h]f8[,s] runtime test. > > Tested under sde. Ok for trunk? Ok. > > Thx, > Haochen > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/avx10_2-512-vcvtneph2bf8-2.c: Correct 128bit > mask usage. > * gcc.target/i386/avx10_2-512-vcvtneph2bf8s-2.c: Ditto. > * gcc.target/i386/avx10_2-512-vcvtneph2hf8-2.c: Ditto. > * gcc.target/i386/avx10_2-512-vcvtneph2hf8s-2.c: Ditto. > --- > .../i386/avx10_2-512-vcvtneph2bf8-2.c | 25 +++++++++++-------- > .../i386/avx10_2-512-vcvtneph2bf8s-2.c | 25 +++++++++++-------- > .../i386/avx10_2-512-vcvtneph2hf8-2.c | 23 ++++++++++------- > .../i386/avx10_2-512-vcvtneph2hf8s-2.c | 23 ++++++++++------- > 4 files changed, 58 insertions(+), 38 deletions(-) > > diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2bf8-2.c > b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2bf8-2.c > index d5ba911334c..96ca7e80c4d 100644 > --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2bf8-2.c > +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2bf8-2.c > @@ -11,8 +11,8 @@ > #include "avx10-helper.h" > #include "fp8-helper.h" > > -#define SIZE_SRC (AVX512F_LEN / 16) > -#define SIZE (AVX512F_LEN_HALF / 8) > +#define SIZE (AVX512F_LEN / 16) > +#define SIZE_DST (AVX512F_LEN_HALF / 8) > #include "avx512f-mask-type.h" > > void > @@ -23,14 +23,14 @@ CALC (unsigned char *r, _Float16 *s) > hf8_bf8 = 1; > saturate = 0; > > - for (i = 0; i < SIZE; i++) > + for (i = 0; i < SIZE_DST; i++) > { > r[i] = 0; > - if (i < SIZE_SRC) > + if (i < SIZE) > { > Float16Union usrc = {.f16 = s[i]}; > r[i] = convert_fp16_to_fp8(usrc.f16, 0, hf8_bf8, saturate); > } > } > } > > @@ -41,17 +41,22 @@ TEST (void) > UNION_TYPE (AVX512F_LEN_HALF, i_b) res1, res2, res3; > UNION_TYPE (AVX512F_LEN, h) src; > MASK_TYPE mask = MASK_VALUE; > - unsigned char res_ref[SIZE]; > + unsigned char res_ref[SIZE_DST]; > > sign = 1; > - for (i = 0; i < SIZE_SRC; i++) > + for (i = 0; i < SIZE; i++) > { > src.a[i] = (_Float16)(sign * (2.5 * (1 << (i % 3)))); > sign = -sign; > } > > +#if AVX512F_LEN > 128 > + for (i = 0; i < SIZE_DST; i++) > + res2.a[i] = DEFAULT_VALUE; > +#else > for (i = 0; i < SIZE; i++) > res2.a[i] = DEFAULT_VALUE; > +#endif > > CALC(res_ref, src.a); > > diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2bf8s-2.c > b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2bf8s-2.c > index 49e170aa428..c458f1ebb77 100644 > --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2bf8s-2.c > +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2bf8s-2.c > @@ -11,8 +11,8 @@ > #include "avx10-helper.h" > #include "fp8-helper.h" > > -#define SIZE_SRC (AVX512F_LEN / 16) > -#define SIZE (AVX512F_LEN_HALF / 8) > +#define SIZE (AVX512F_LEN / 16) > +#define SIZE_DST (AVX512F_LEN_HALF / 8) > #include "avx512f-mask-type.h" > > void > @@ -23,14 +23,14 @@ CALC (unsigned char *r, _Float16 *s) > hf8_bf8 = 1; > saturate = 1; > > - for (i = 0; i < SIZE; i++) > + for (i = 0; i < SIZE_DST; i++) > { > r[i] = 0; > - if (i < SIZE_SRC) > + if (i < SIZE) > { > Float16Union usrc = {.f16 = s[i]}; > r[i] = convert_fp16_to_fp8(usrc.f16, 0, hf8_bf8, saturate); > } > } > } > > @@ -41,17 +41,22 @@ TEST (void) > UNION_TYPE (AVX512F_LEN_HALF, i_b) res1, res2, res3; > UNION_TYPE (AVX512F_LEN, h) src; > MASK_TYPE mask = MASK_VALUE; > - unsigned char res_ref[SIZE]; > + unsigned char res_ref[SIZE_DST]; > > sign = 1; > - for (i = 0; i < SIZE_SRC; i++) > + for (i = 0; i < SIZE; i++) > { > src.a[i] = (_Float16)(sign * (2.5 * (1 << (i % 3)))); > sign = -sign; > } > > +#if AVX512F_LEN > 128 > + for (i = 0; i < SIZE_DST; i++) > + res2.a[i] = DEFAULT_VALUE; > +#else > for (i = 0; i < SIZE; i++) > res2.a[i] = DEFAULT_VALUE; > +#endif > > CALC(res_ref, src.a); > > diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2hf8-2.c > b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2hf8-2.c > index f481b72cc71..cb9cdbb89c1 100644 > --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2hf8-2.c > +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2hf8-2.c > @@ -11,8 +11,8 @@ > #include "avx10-helper.h" > #include "fp8-helper.h" > > -#define SIZE_SRC (AVX512F_LEN / 16) > -#define SIZE (AVX512F_LEN_HALF / 8) > +#define SIZE (AVX512F_LEN / 16) > +#define SIZE_DST (AVX512F_LEN_HALF / 8) > #include "avx512f-mask-type.h" > > void > @@ -23,14 +23,14 @@ CALC (unsigned char *r, _Float16 *s) > hf8_bf8 = 0; > saturate = 0; > > - for (i = 0; i < SIZE; i++) > + for (i = 0; i < SIZE_DST; i++) > { > r[i] = 0; > - if (i < SIZE_SRC) > + if (i < SIZE) > { > Float16Union usrc = {.f16 = s[i]}; > r[i] = convert_fp16_to_fp8(usrc.f16, 0, hf8_bf8, saturate); > } > } > } > > @@ -44,14 +44,19 @@ TEST (void) > unsigned char res_ref[SIZE]; > > sign = 1; > - for (i = 0; i < SIZE_SRC; i++) > + for (i = 0; i < SIZE; i++) > { > src.a[i] = (_Float16)(sign * (2.5 * (1 << (i % 3)))); > sign = -sign; > } > > +#if AVX512F_LEN > 128 > + for (i = 0; i < SIZE_DST; i++) > + res2.a[i] = DEFAULT_VALUE; > +#else > for (i = 0; i < SIZE; i++) > res2.a[i] = DEFAULT_VALUE; > +#endif > > CALC(res_ref, src.a); > > diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2hf8s-2.c > b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2hf8s-2.c > index 4782b038f3b..4827af4df53 100644 > --- a/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2hf8s-2.c > +++ b/gcc/testsuite/gcc.target/i386/avx10_2-512-vcvtneph2hf8s-2.c > @@ -11,8 +11,8 @@ > #include "avx10-helper.h" > #include "fp8-helper.h" > > -#define SIZE_SRC (AVX512F_LEN / 16) > -#define SIZE (AVX512F_LEN_HALF / 8) > +#define SIZE (AVX512F_LEN / 16) > +#define SIZE_DST (AVX512F_LEN_HALF / 8) > #include "avx512f-mask-type.h" > > void > @@ -23,14 +23,14 @@ CALC (unsigned char *r, _Float16 *s) > hf8_bf8 = 0; > saturate = 1; > > - for (i = 0; i < SIZE; i++) > + for (i = 0; i < SIZE_DST; i++) > { > r[i] = 0; > - if (i < SIZE_SRC) > + if (i < SIZE) > { > Float16Union usrc = {.f16 = s[i]}; > r[i] = convert_fp16_to_fp8(usrc.f16, 0, hf8_bf8, saturate); > } > } > } > > @@ -44,14 +44,19 @@ TEST (void) > unsigned char res_ref[SIZE]; > > sign = 1; > - for (i = 0; i < SIZE_SRC; i++) > + for (i = 0; i < SIZE; i++) > { > src.a[i] = (_Float16)(sign * (2.5 * (1 << (i % 3)))); > sign = -sign; > } > > +#if AVX512F_LEN > 128 > + for (i = 0; i < SIZE_DST; i++) > + res2.a[i] = DEFAULT_VALUE; > +#else > for (i = 0; i < SIZE; i++) > res2.a[i] = DEFAULT_VALUE; > +#endif > > CALC(res_ref, src.a); > > -- > 2.31.1 >
-- BR, Hongtao