Hi!

The following patch fixes a bunch of pastos in the -O0 macros in the
PR89784 implementation plus testcase coverage that FAILs without the header
change and succeeds with that (the tests were previously run at -O2 only
where they test the inline functions and not the macros).
Because at -O0 the C x * y + z isn't contracted into FMA, there is a small
precision difference in two of the tests with the chosen constants, so I've
changed them to ones where a precision difference isn't really possible.
I think the constants weren't chosen very well, because either we just want
some basic testing, for which even the adjusted ones are ok, or we want
to specifically check for FMA, in that case we should check some FMA
cornercases where without FMA the result is completely different from one
with FMA.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

And sorry for screwing it up.

2019-04-17  Hongtao Liu  <hongtao....@intel.com>

        PR target/90125
        * config/i386/avx512fintrin.h (_mm_maskz_fmadd_round_sd,
        _mm_maskz_fmadd_round_ss, _mm_maskz_fmsub_round_sd,
        _mm_maskz_fmsub_round_ss, _mm_maskz_fnmadd_round_sd,
        _mm_maskz_fnmadd_round_ss, _mm_maskz_fnmsub_round_sd,
        _mm_maskz_fnmsub_round_ss): Use _maskz builtin instead of _mask3.

2019-04-17  Jakub Jelinek  <ja...@redhat.com>

        PR target/90125
        * gcc.target/i386/avx512f-vfmsubXXXss-2.c (avx512f_test): Adjust
        constants to ensure precise result even when not using fma.
        * gcc.target/i386/avx512f-vfnmaddXXXss-2.c (avx512f_test): Likewise.
        * gcc.target/i386/avx512f-vfmaddXXXsd-3.c: New test.
        * gcc.target/i386/avx512f-vfmaddXXXss-3.c: New test.
        * gcc.target/i386/avx512f-vfmsubXXXsd-3.c: New test.
        * gcc.target/i386/avx512f-vfmsubXXXss-3.c: New test.
        * gcc.target/i386/avx512f-vfnmaddXXXsd-3.c: New test.
        * gcc.target/i386/avx512f-vfnmaddXXXss-3.c: New test.
        * gcc.target/i386/avx512f-vfnmsubXXXsd-3.c: New test.
        * gcc.target/i386/avx512f-vfnmsubXXXss-3.c: New test.

--- gcc/config/i386/avx512fintrin.h.jj  2019-03-22 11:07:00.699948784 +0100
+++ gcc/config/i386/avx512fintrin.h     2019-04-17 11:24:53.683695473 +0200
@@ -12104,10 +12104,10 @@ _mm_maskz_fnmsub_round_ss (__mmask8 __U,
     (__m128) __builtin_ia32_vfmaddss3_mask3 (A, B, C, U, R)
 
 #define _mm_maskz_fmadd_round_sd(U, A, B, C, R)            \
-    (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, B, C, U, R)
+    (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, B, C, U, R)
 
 #define _mm_maskz_fmadd_round_ss(U, A, B, C, R)            \
-    (__m128) __builtin_ia32_vfmaddss3_mask3 (A, B, C, U, R)
+    (__m128) __builtin_ia32_vfmaddss3_maskz (A, B, C, U, R)
 
 #define _mm_mask_fmsub_round_sd(A, U, B, C, R)            \
     (__m128d) __builtin_ia32_vfmaddsd3_mask (A, B, -(C), U, R)
@@ -12122,10 +12122,10 @@ _mm_maskz_fnmsub_round_ss (__mmask8 __U,
     (__m128) __builtin_ia32_vfmsubss3_mask3 (A, B, C, U, R)
 
 #define _mm_maskz_fmsub_round_sd(U, A, B, C, R)            \
-    (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, B, -(C), U, R)
+    (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, B, -(C), U, R)
 
 #define _mm_maskz_fmsub_round_ss(U, A, B, C, R)            \
-    (__m128) __builtin_ia32_vfmaddss3_mask3 (A, B, -(C), U, R)
+    (__m128) __builtin_ia32_vfmaddss3_maskz (A, B, -(C), U, R)
 
 #define _mm_mask_fnmadd_round_sd(A, U, B, C, R)            \
     (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), C, U, R)
@@ -12140,10 +12140,10 @@ _mm_maskz_fnmsub_round_ss (__mmask8 __U,
     (__m128) __builtin_ia32_vfmaddss3_mask3 (A, -(B), C, U, R)
 
 #define _mm_maskz_fnmadd_round_sd(U, A, B, C, R)            \
-    (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, -(B), C, U, R)
+    (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, -(B), C, U, R)
 
 #define _mm_maskz_fnmadd_round_ss(U, A, B, C, R)            \
-    (__m128) __builtin_ia32_vfmaddss3_mask3 (A, -(B), C, U, R)
+    (__m128) __builtin_ia32_vfmaddss3_maskz (A, -(B), C, U, R)
 
 #define _mm_mask_fnmsub_round_sd(A, U, B, C, R)            \
     (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), -(C), U, R)
@@ -12158,10 +12158,10 @@ _mm_maskz_fnmsub_round_ss (__mmask8 __U,
     (__m128) __builtin_ia32_vfmsubss3_mask3 (A, -(B), C, U, R)
 
 #define _mm_maskz_fnmsub_round_sd(U, A, B, C, R)            \
-    (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, -(B), -(C), U, R)
+    (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, -(B), -(C), U, R)
 
 #define _mm_maskz_fnmsub_round_ss(U, A, B, C, R)            \
-    (__m128) __builtin_ia32_vfmaddss3_mask3 (A, -(B), -(C), U, R)
+    (__m128) __builtin_ia32_vfmaddss3_maskz (A, -(B), -(C), U, R)
 #endif
 
 #ifdef __OPTIMIZE__
--- gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-2.c.jj    2019-03-22 
11:07:00.701948752 +0100
+++ gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-2.c       2019-04-17 
11:35:57.314481901 +0200
@@ -41,8 +41,8 @@ avx512f_test (void)
   for (i = 0; i < SIZE; i++)
     {
       src1.a[i] = DEFAULT_VALUE;
-      src2.a[i] = 56.78 * (i + 1) * sign;
-      src3.a[i] = 90.12 * (i + 2) * sign;
+      src2.a[i] = 56.75 * (i + 1) * sign;
+      src3.a[i] = 90.25 * (i + 2) * sign;
       sign = sign * -1;
     }
   for (i = 0; i < SIZE; i++)
--- gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-2.c.jj   2019-03-22 
11:07:00.701948752 +0100
+++ gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-2.c      2019-04-17 
11:36:40.372755625 +0200
@@ -41,8 +41,8 @@ avx512f_test (void)
   for (i = 0; i < SIZE; i++)
     {
       src1.a[i] = DEFAULT_VALUE;
-      src2.a[i] = 56.78 * (i + 1) * sign;
-      src3.a[i] = 90.12 * (i + 2) * sign;
+      src2.a[i] = 56.75 * (i + 1) * sign;
+      src3.a[i] = 90.25 * (i + 2) * sign;
       sign = sign * -1;
     }
   for (i = 0; i < SIZE; i++)
--- gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-3.c.jj    2019-04-17 
11:11:13.483563310 +0200
+++ gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXsd-3.c       2019-04-17 
11:11:30.553279159 +0200
@@ -0,0 +1,5 @@
+/* { dg-do run } */
+/* { dg-options "-O0 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-vfmaddXXXsd-2.c"
--- gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-3.c.jj    2019-04-17 
11:11:41.806091847 +0200
+++ gcc/testsuite/gcc.target/i386/avx512f-vfmaddXXXss-3.c       2019-04-17 
11:11:55.740859882 +0200
@@ -0,0 +1,5 @@
+/* { dg-do run } */
+/* { dg-options "-O0 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-vfmaddXXXss-2.c"
--- gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-3.c.jj    2019-04-17 
11:13:02.975730705 +0200
+++ gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXsd-3.c       2019-04-17 
11:13:24.260370262 +0200
@@ -0,0 +1,5 @@
+/* { dg-do run } */
+/* { dg-options "-O0 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-vfmsubXXXsd-2.c"
--- gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-3.c.jj    2019-04-17 
11:13:02.977730671 +0200
+++ gcc/testsuite/gcc.target/i386/avx512f-vfmsubXXXss-3.c       2019-04-17 
11:13:42.046069079 +0200
@@ -0,0 +1,5 @@
+/* { dg-do run } */
+/* { dg-options "-O0 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-vfmsubXXXss-2.c"
--- gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-3.c.jj   2019-04-17 
11:13:02.979730637 +0200
+++ gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXsd-3.c      2019-04-17 
11:13:54.807852972 +0200
@@ -0,0 +1,5 @@
+/* { dg-do run } */
+/* { dg-options "-O0 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-vfnmaddXXXsd-2.c"
--- gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-3.c.jj   2019-04-17 
11:13:02.981730603 +0200
+++ gcc/testsuite/gcc.target/i386/avx512f-vfnmaddXXXss-3.c      2019-04-17 
11:14:05.945664352 +0200
@@ -0,0 +1,5 @@
+/* { dg-do run } */
+/* { dg-options "-O0 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-vfnmaddXXXss-2.c"
--- gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-3.c.jj   2019-04-17 
11:13:02.983730569 +0200
+++ gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXsd-3.c      2019-04-17 
11:14:19.444435772 +0200
@@ -0,0 +1,5 @@
+/* { dg-do run } */
+/* { dg-options "-O0 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-vfnmsubXXXsd-2.c"
--- gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-3.c.jj   2019-04-17 
11:13:02.985730535 +0200
+++ gcc/testsuite/gcc.target/i386/avx512f-vfnmsubXXXss-3.c      2019-04-17 
11:15:08.390606912 +0200
@@ -0,0 +1,5 @@
+/* { dg-do run } */
+/* { dg-options "-O0 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-vfnmsubXXXss-2.c"

        Jakub

Reply via email to