Re: [5.0 Backport][AArch64] Fix simd intrinsics bug on float vminnm/vmaxnm

Jiong Wang Fri, 29 Jul 2016 01:42:08 -0700

Jiong Wang writes:

> On 07/07/16 10:34, James Greenhalgh wrote:
>>
>> To make backporting easier, could you please write a very simple
>> standalone test that exposes this bug, and submit this patch with just
>> that simple test? I've already OKed the functional part of this patch, and
>> I'm happy to pre-approve a simple testcase.
>>
>> With that committed to trunk, this needs to go to all active release
>> branches please.
>
> Committed attached patch to trunk as r238166, fmax/fmin pattern were
> introduced by [1] which is available since gcc 6, so backported to
> gcc 6 branch as r238167.


Here is the gcc 5 backport patch, it's slightly different from gcc 6
backport patch as fmin/fmax are not introduced yet.

OK to backport?

gcc/
2016-07-29  Jiong Wang  <jiong.w...@arm.com>

        * config/aarch64/aarch64-simd-builtins.def (smax, smin): Don't
        register float variants.
        (fmax, fmin): New builtins for VDQF modes.
        * config/aarch64/arm_neon.h (vmaxnm_f32): Use
        __builtin_aarch64_fmaxv2sf.
        (vmaxnmq_f32): Likewise.
        (vmaxnmq_f64): Likewise.
        (vminnm_f32): Likewise.
        (vminnmq_f32): Likewise.
        (vminnmq_f64): Likewise.
        * config/aarch64/iterators.md (UNSPEC_FMAXNM, UNSPEC_FMINNM): New.
        (FMAXMIN_UNS): Support UNSPEC_FMAXNM and UNSPEC_FMINNM.
        (maxmin_uns, maxmin_uns_op): Likewise. 

gcc/testsuite/
2016-07-29  Jiong Wang  <jiong.w...@arm.com>
        
        * gcc.target/aarch64/simd/vminmaxnm_1.c: New.

-- 
Regards,
Jiong

diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index dd2bc47..446d826 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -240,15 +240,16 @@
   BUILTIN_VDQF (UNOP, reduc_smax_nan_scal_, 10)
   BUILTIN_VDQF (UNOP, reduc_smin_nan_scal_, 10)
 
-  /* Implemented by <maxmin><mode>3.
-     smax variants map to fmaxnm,
-     smax_nan variants map to fmax.  */
-  BUILTIN_VDQIF (BINOP, smax, 3)
-  BUILTIN_VDQIF (BINOP, smin, 3)
+  /* Implemented by <maxmin><mode>3.  */
+  BUILTIN_VDQ_BHSI (BINOP, smax, 3)
+  BUILTIN_VDQ_BHSI (BINOP, smin, 3)
   BUILTIN_VDQ_BHSI (BINOP, umax, 3)
   BUILTIN_VDQ_BHSI (BINOP, umin, 3)
+  /* Implemented by <maxmin_uns><mode>3.  */
   BUILTIN_VDQF (BINOP, smax_nan, 3)
   BUILTIN_VDQF (BINOP, smin_nan, 3)
+  BUILTIN_VDQF (BINOP, fmax, 3)
+  BUILTIN_VDQF (BINOP, fmin, 3)
 
   /* Implemented by aarch64_<maxmin_uns>p<mode>.  */
   BUILTIN_VDQ_BHSI (BINOP, smaxp, 0)
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 4c15312..283000e 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -17733,19 +17733,19 @@ vpminnms_f32 (float32x2_t a)
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vmaxnm_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return __builtin_aarch64_smaxv2sf (__a, __b);
+  return __builtin_aarch64_fmaxv2sf (__a, __b);
 }
 
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vmaxnmq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return __builtin_aarch64_smaxv4sf (__a, __b);
+  return __builtin_aarch64_fmaxv4sf (__a, __b);
 }
 
 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
 vmaxnmq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return __builtin_aarch64_smaxv2df (__a, __b);
+  return __builtin_aarch64_fmaxv2df (__a, __b);
 }
 
 /* vmaxv  */
@@ -17963,19 +17963,19 @@ vminq_u32 (uint32x4_t __a, uint32x4_t __b)
 __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
 vminnm_f32 (float32x2_t __a, float32x2_t __b)
 {
-  return __builtin_aarch64_sminv2sf (__a, __b);
+  return __builtin_aarch64_fminv2sf (__a, __b);
 }
 
 __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
 vminnmq_f32 (float32x4_t __a, float32x4_t __b)
 {
-  return __builtin_aarch64_sminv4sf (__a, __b);
+  return __builtin_aarch64_fminv4sf (__a, __b);
 }
 
 __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
 vminnmq_f64 (float64x2_t __a, float64x2_t __b)
 {
-  return __builtin_aarch64_sminv2df (__a, __b);
+  return __builtin_aarch64_fminv2df (__a, __b);
 }
 
 /* vminv  */
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 2efbfab..c7e1d0c 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -186,9 +186,11 @@
     UNSPEC_ASHIFT_UNSIGNED	; Used in aarch64-simd.md.
     UNSPEC_ABS		; Used in aarch64-simd.md.
     UNSPEC_FMAX		; Used in aarch64-simd.md.
+    UNSPEC_FMAXNM	; Used in aarch64-simd.md.
     UNSPEC_FMAXNMV	; Used in aarch64-simd.md.
     UNSPEC_FMAXV	; Used in aarch64-simd.md.
     UNSPEC_FMIN		; Used in aarch64-simd.md.
+    UNSPEC_FMINNM	; Used in aarch64-simd.md.
     UNSPEC_FMINNMV	; Used in aarch64-simd.md.
     UNSPEC_FMINV	; Used in aarch64-simd.md.
     UNSPEC_FADDV	; Used in aarch64-simd.md.
@@ -876,7 +878,8 @@
 (define_int_iterator ADDSUBHN2 [UNSPEC_ADDHN2 UNSPEC_RADDHN2
 			        UNSPEC_SUBHN2 UNSPEC_RSUBHN2])
 
-(define_int_iterator FMAXMIN_UNS [UNSPEC_FMAX UNSPEC_FMIN])
+(define_int_iterator FMAXMIN_UNS [UNSPEC_FMAX UNSPEC_FMIN
+				  UNSPEC_FMAXNM UNSPEC_FMINNM])
 
 (define_int_iterator VQDMULH [UNSPEC_SQDMULH UNSPEC_SQRDMULH])
 
@@ -941,9 +944,11 @@
 			      (UNSPEC_SMAXV "smax")
 			      (UNSPEC_SMINV "smin")
 			      (UNSPEC_FMAX  "smax_nan")
+			      (UNSPEC_FMAXNM  "fmax")
 			      (UNSPEC_FMAXNMV "smax")
 			      (UNSPEC_FMAXV "smax_nan")
 			      (UNSPEC_FMIN "smin_nan")
+			      (UNSPEC_FMINNM "fmin")
 			      (UNSPEC_FMINNMV "smin")
 			      (UNSPEC_FMINV "smin_nan")])
 
@@ -952,9 +957,11 @@
 				 (UNSPEC_SMAXV "smax")
 				 (UNSPEC_SMINV "smin")
 				 (UNSPEC_FMAX "fmax")
+				 (UNSPEC_FMAXNM "fmaxnm")
 				 (UNSPEC_FMAXNMV "fmaxnm")
 				 (UNSPEC_FMAXV "fmax")
 				 (UNSPEC_FMIN "fmin")
+				 (UNSPEC_FMINNM "fminnm")
 				 (UNSPEC_FMINNMV "fminnm")
 				 (UNSPEC_FMINV "fmin")])
 
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vminmaxnm_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vminmaxnm_1.c
new file mode 100644
index 0000000..96608eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vminmaxnm_1.c
@@ -0,0 +1,82 @@
+/* Test the `v[min|max]nm{q}_f*' AArch64 SIMD intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+#include "arm_neon.h"
+
+extern void abort ();
+
+#define CHECK(T, N, R, E) \
+  {\
+    int i = 0;\
+    for (; i < N; i++)\
+      if (* (T *) &R[i] != * (T *) &E[i])\
+	abort ();\
+  }
+
+int
+main (int argc, char **argv)
+{
+  float32x2_t f32x2_input1 = vdup_n_f32 (-1.0);
+  float32x2_t f32x2_input2 = vdup_n_f32 (0.0);
+  float32x2_t f32x2_exp_minnm  = vdup_n_f32 (-1.0);
+  float32x2_t f32x2_exp_maxnm  = vdup_n_f32 (0.0);
+  float32x2_t f32x2_ret_minnm  = vminnm_f32 (f32x2_input1, f32x2_input2);
+  float32x2_t f32x2_ret_maxnm  = vmaxnm_f32 (f32x2_input1, f32x2_input2);
+
+  CHECK (uint32_t, 2, f32x2_ret_minnm, f32x2_exp_minnm);
+  CHECK (uint32_t, 2, f32x2_ret_maxnm, f32x2_exp_maxnm);
+
+  f32x2_input1 = vdup_n_f32 (__builtin_nanf (""));
+  f32x2_input2 = vdup_n_f32 (1.0);
+  f32x2_exp_minnm  = vdup_n_f32 (1.0);
+  f32x2_exp_maxnm  = vdup_n_f32 (1.0);
+  f32x2_ret_minnm  = vminnm_f32 (f32x2_input1, f32x2_input2);
+  f32x2_ret_maxnm  = vmaxnm_f32 (f32x2_input1, f32x2_input2);
+
+  CHECK (uint32_t, 2, f32x2_ret_minnm, f32x2_exp_minnm);
+  CHECK (uint32_t, 2, f32x2_ret_maxnm, f32x2_exp_maxnm);
+
+  float32x4_t f32x4_input1 = vdupq_n_f32 (-1024.0);
+  float32x4_t f32x4_input2 = vdupq_n_f32 (77.0);
+  float32x4_t f32x4_exp_minnm  = vdupq_n_f32 (-1024.0);
+  float32x4_t f32x4_exp_maxnm  = vdupq_n_f32 (77.0);
+  float32x4_t f32x4_ret_minnm  = vminnmq_f32 (f32x4_input1, f32x4_input2);
+  float32x4_t f32x4_ret_maxnm  = vmaxnmq_f32 (f32x4_input1, f32x4_input2);
+
+  CHECK (uint32_t, 4, f32x4_ret_minnm, f32x4_exp_minnm);
+  CHECK (uint32_t, 4, f32x4_ret_maxnm, f32x4_exp_maxnm);
+
+  f32x4_input1 = vdupq_n_f32 (-__builtin_nanf (""));
+  f32x4_input2 = vdupq_n_f32 (-1.0);
+  f32x4_exp_minnm  = vdupq_n_f32 (-1.0);
+  f32x4_exp_maxnm  = vdupq_n_f32 (-1.0);
+  f32x4_ret_minnm  = vminnmq_f32 (f32x4_input1, f32x4_input2);
+  f32x4_ret_maxnm  = vmaxnmq_f32 (f32x4_input1, f32x4_input2);
+
+  CHECK (uint32_t, 4, f32x4_ret_minnm, f32x4_exp_minnm);
+  CHECK (uint32_t, 4, f32x4_ret_maxnm, f32x4_exp_maxnm);
+
+  float64x2_t f64x2_input1 = vdupq_n_f64 (1.23);
+  float64x2_t f64x2_input2 = vdupq_n_f64 (4.56);
+  float64x2_t f64x2_exp_minnm  = vdupq_n_f64 (1.23);
+  float64x2_t f64x2_exp_maxnm  = vdupq_n_f64 (4.56);
+  float64x2_t f64x2_ret_minnm  = vminnmq_f64 (f64x2_input1, f64x2_input2);
+  float64x2_t f64x2_ret_maxnm  = vmaxnmq_f64 (f64x2_input1, f64x2_input2);
+
+  CHECK (uint64_t, 2, f64x2_ret_minnm, f64x2_exp_minnm);
+  CHECK (uint64_t, 2, f64x2_ret_maxnm, f64x2_exp_maxnm);
+
+  f64x2_input1 = vdupq_n_f64 (-__builtin_nan (""));
+  f64x2_input2 = vdupq_n_f64 (1.0);
+  f64x2_exp_minnm  = vdupq_n_f64 (1.0);
+  f64x2_exp_maxnm  = vdupq_n_f64 (1.0);
+  f64x2_ret_minnm  = vminnmq_f64 (f64x2_input1, f64x2_input2);
+  f64x2_ret_maxnm  = vmaxnmq_f64 (f64x2_input1, f64x2_input2);
+
+  CHECK (uint64_t, 2, f64x2_ret_minnm, f64x2_exp_minnm);
+  CHECK (uint64_t, 2, f64x2_ret_maxnm, f64x2_exp_maxnm);
+
+  return 0;
+}

Re: [5.0 Backport][AArch64] Fix simd intrinsics bug on float vminnm/vmaxnm

Reply via email to