Re: [PATCH] i386: Fix vfpclassph non-optimizied intrin

Hongtao Liu Tue, 03 Sep 2024 00:04:59 -0700

On Tue, Sep 3, 2024 at 2:24 PM Haochen Jiang <haochen.ji...@intel.com> wrote:
>
> Hi all,
>
> The intrin for non-optimized got a typo in mask type, which will cause
> the high bits of __mmask32 being unexpectedly zeroed.
>
> The test does not fail under O0 with current 1b since the testcase is
> wrong. We need to include avx512-mask-type.h after SIZE is defined, or
> it will always be __mmask8. That problem also happened in AVX10.2 testcases.
> I will write a seperate patch to fix that.
>
> Bootstrapped and tested on x86-64-pc-linux-gnu. Ok for trunk?
Ok, please backport.
>
> Thx,
> Haochen
>
> gcc/ChangeLog:
>
>         * config/i386/avx512fp16intrin.h
>         (_mm512_mask_fpclass_ph_mask): Correct mask type to __mmask32.
>         (_mm512_fpclass_ph_mask): Ditto.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/avx512fp16-vfpclassph-1c.c: New test.
> ---
>  gcc/config/i386/avx512fp16intrin.h            |  4 +-
>  .../i386/avx512fp16-vfpclassph-1c.c           | 77 +++++++++++++++++++
>  2 files changed, 79 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1c.c
>
> diff --git a/gcc/config/i386/avx512fp16intrin.h 
> b/gcc/config/i386/avx512fp16intrin.h
> index 1869a920dd3..c3096b74ad2 100644
> --- a/gcc/config/i386/avx512fp16intrin.h
> +++ b/gcc/config/i386/avx512fp16intrin.h
> @@ -3961,11 +3961,11 @@ _mm512_fpclass_ph_mask (__m512h __A, const int __imm)
>  #else
>  #define _mm512_mask_fpclass_ph_mask(u, x, c)                           \
>    ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
> -                                                (int) (c),(__mmask8)(u)))
> +                                                (int) (c),(__mmask32)(u)))
>
>  #define _mm512_fpclass_ph_mask(x, c)                                    \
>    ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
> -                                                (int) (c),(__mmask8)-1))
> +                                                (int) (c),(__mmask32)-1))
>  #endif /* __OPIMTIZE__ */
>
>  /* Intrinsics vgetexpph.  */
> diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1c.c 
> b/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1c.c
> new file mode 100644
> index 00000000000..4739f1228e3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1c.c
> @@ -0,0 +1,77 @@
> +/* { dg-do run } */
> +/* { dg-options "-O0 -mavx512fp16" } */
> +/* { dg-require-effective-target avx512fp16 } */
> +
> +#define AVX512FP16
> +#include "avx512f-helper.h"
> +
> +#include <math.h>
> +#include <limits.h>
> +#include <float.h>
> +#define SIZE (AVX512F_LEN / 16)
> +#include "avx512f-mask-type.h"
> +
> +#ifndef __FPCLASSPH__
> +#define __FPCLASSPH__
> +int check_fp_class_hp (_Float16 src, int imm)
> +{
> +  int qNaN_res = isnan (src);
> +  int sNaN_res = isnan (src);
> +  int Pzero_res = (src == 0.0);
> +  int Nzero_res = (src == -0.0);
> +  int PInf_res = (isinf (src) == 1);
> +  int NInf_res = (isinf (src) == -1);
> +  int Denorm_res = (fpclassify (src) == FP_SUBNORMAL);
> +  int FinNeg_res = __builtin_finite (src) && (src < 0);
> +
> +  int result = (((imm & 1) && qNaN_res)
> +               || (((imm >> 1) & 1) && Pzero_res)
> +               || (((imm >> 2) & 1) && Nzero_res)
> +               || (((imm >> 3) & 1) && PInf_res)
> +               || (((imm >> 4) & 1) && NInf_res)
> +               || (((imm >> 5) & 1) && Denorm_res)
> +               || (((imm >> 6) & 1) && FinNeg_res)
> +               || (((imm >> 7) & 1) && sNaN_res));
> +  return result;
> +}
> +#endif
> +
> +MASK_TYPE
> +CALC (_Float16 *s1, int imm)
> +{
> +  int i;
> +  MASK_TYPE res = 0;
> +
> +  for (i = 0; i < SIZE; i++)
> +    if (check_fp_class_hp(s1[i], imm))
> +      res = res | (1 << i);
> +
> +  return res;
> +}
> +
> +void
> +TEST (void)
> +{
> +  int i;
> +  UNION_TYPE (AVX512F_LEN, h) src;
> +  MASK_TYPE res1, res2, res_ref = 0;
> +  MASK_TYPE mask = MASK_VALUE;
> +
> +  src.a[SIZE - 1] = NAN;
> +  src.a[SIZE - 2] = 1.0 / 0.0;
> +  for (i = 0; i < SIZE - 2; i++)
> +    {
> +      src.a[i] = -24.43 + 0.6 * i;
> +    }
> +
> +  res1 = INTRINSIC (_fpclass_ph_mask) (src.x, 0xFF);
> +  res2 = INTRINSIC (_mask_fpclass_ph_mask) (mask, src.x, 0xFF);
> +
> +  res_ref = CALC (src.a, 0xFF);
> +
> +  if (res_ref != res1)
> +    abort ();
> +
> +  if ((mask & res_ref) != res2)
> +    abort ();
> +}
> --
> 2.31.1
>



-- 
BR,
Hongtao

Re: [PATCH] i386: Fix vfpclassph non-optimizied intrin

Reply via email to