On Tue, Sep 3, 2024 at 2:24 PM Haochen Jiang <haochen.ji...@intel.com> wrote: > > Hi all, > > The intrin for non-optimized got a typo in mask type, which will cause > the high bits of __mmask32 being unexpectedly zeroed. > > The test does not fail under O0 with current 1b since the testcase is > wrong. We need to include avx512-mask-type.h after SIZE is defined, or > it will always be __mmask8. That problem also happened in AVX10.2 testcases. > I will write a seperate patch to fix that. > > Bootstrapped and tested on x86-64-pc-linux-gnu. Ok for trunk? Ok, please backport. > > Thx, > Haochen > > gcc/ChangeLog: > > * config/i386/avx512fp16intrin.h > (_mm512_mask_fpclass_ph_mask): Correct mask type to __mmask32. > (_mm512_fpclass_ph_mask): Ditto. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/avx512fp16-vfpclassph-1c.c: New test. > --- > gcc/config/i386/avx512fp16intrin.h | 4 +- > .../i386/avx512fp16-vfpclassph-1c.c | 77 +++++++++++++++++++ > 2 files changed, 79 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1c.c > > diff --git a/gcc/config/i386/avx512fp16intrin.h > b/gcc/config/i386/avx512fp16intrin.h > index 1869a920dd3..c3096b74ad2 100644 > --- a/gcc/config/i386/avx512fp16intrin.h > +++ b/gcc/config/i386/avx512fp16intrin.h > @@ -3961,11 +3961,11 @@ _mm512_fpclass_ph_mask (__m512h __A, const int __imm) > #else > #define _mm512_mask_fpclass_ph_mask(u, x, c) \ > ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \ > - (int) (c),(__mmask8)(u))) > + (int) (c),(__mmask32)(u))) > > #define _mm512_fpclass_ph_mask(x, c) \ > ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \ > - (int) (c),(__mmask8)-1)) > + (int) (c),(__mmask32)-1)) > #endif /* __OPIMTIZE__ */ > > /* Intrinsics vgetexpph. */ > diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1c.c > b/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1c.c > new file mode 100644 > index 00000000000..4739f1228e3 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1c.c > @@ -0,0 +1,77 @@ > +/* { dg-do run } */ > +/* { dg-options "-O0 -mavx512fp16" } */ > +/* { dg-require-effective-target avx512fp16 } */ > + > +#define AVX512FP16 > +#include "avx512f-helper.h" > + > +#include <math.h> > +#include <limits.h> > +#include <float.h> > +#define SIZE (AVX512F_LEN / 16) > +#include "avx512f-mask-type.h" > + > +#ifndef __FPCLASSPH__ > +#define __FPCLASSPH__ > +int check_fp_class_hp (_Float16 src, int imm) > +{ > + int qNaN_res = isnan (src); > + int sNaN_res = isnan (src); > + int Pzero_res = (src == 0.0); > + int Nzero_res = (src == -0.0); > + int PInf_res = (isinf (src) == 1); > + int NInf_res = (isinf (src) == -1); > + int Denorm_res = (fpclassify (src) == FP_SUBNORMAL); > + int FinNeg_res = __builtin_finite (src) && (src < 0); > + > + int result = (((imm & 1) && qNaN_res) > + || (((imm >> 1) & 1) && Pzero_res) > + || (((imm >> 2) & 1) && Nzero_res) > + || (((imm >> 3) & 1) && PInf_res) > + || (((imm >> 4) & 1) && NInf_res) > + || (((imm >> 5) & 1) && Denorm_res) > + || (((imm >> 6) & 1) && FinNeg_res) > + || (((imm >> 7) & 1) && sNaN_res)); > + return result; > +} > +#endif > + > +MASK_TYPE > +CALC (_Float16 *s1, int imm) > +{ > + int i; > + MASK_TYPE res = 0; > + > + for (i = 0; i < SIZE; i++) > + if (check_fp_class_hp(s1[i], imm)) > + res = res | (1 << i); > + > + return res; > +} > + > +void > +TEST (void) > +{ > + int i; > + UNION_TYPE (AVX512F_LEN, h) src; > + MASK_TYPE res1, res2, res_ref = 0; > + MASK_TYPE mask = MASK_VALUE; > + > + src.a[SIZE - 1] = NAN; > + src.a[SIZE - 2] = 1.0 / 0.0; > + for (i = 0; i < SIZE - 2; i++) > + { > + src.a[i] = -24.43 + 0.6 * i; > + } > + > + res1 = INTRINSIC (_fpclass_ph_mask) (src.x, 0xFF); > + res2 = INTRINSIC (_mask_fpclass_ph_mask) (mask, src.x, 0xFF); > + > + res_ref = CALC (src.a, 0xFF); > + > + if (res_ref != res1) > + abort (); > + > + if ((mask & res_ref) != res2) > + abort (); > +} > -- > 2.31.1 >
-- BR, Hongtao