Jiong Wang writes:
> On 07/07/16 10:34, James Greenhalgh wrote:
>>
>> To make backporting easier, could you please write a very simple
>> standalone test that exposes this bug, and submit this patch with just
>> that simple test? I've already OKed the functional part of this patch, and
>> I'm happy to pre-approve a simple testcase.
>>
>> With that committed to trunk, this needs to go to all active release
>> branches please.
>
> Committed attached patch to trunk as r238166, fmax/fmin pattern were
> introduced by [1] which is available since gcc 6, so backported to
> gcc 6 branch as r238167.
Here is the gcc 5 backport patch, it's slightly different from gcc 6
backport patch as fmin/fmax are not introduced yet.
OK to backport?
gcc/
2016-07-29 Jiong Wang <[email protected]>
* config/aarch64/aarch64-simd-builtins.def (smax, smin): Don't
register float variants.
(fmax, fmin): New builtins for VDQF modes.
* config/aarch64/arm_neon.h (vmaxnm_f32): Use
__builtin_aarch64_fmaxv2sf.
(vmaxnmq_f32): Likewise.
(vmaxnmq_f64): Likewise.
(vminnm_f32): Likewise.
(vminnmq_f32): Likewise.
(vminnmq_f64): Likewise.
* config/aarch64/iterators.md (UNSPEC_FMAXNM, UNSPEC_FMINNM): New.
(FMAXMIN_UNS): Support UNSPEC_FMAXNM and UNSPEC_FMINNM.
(maxmin_uns, maxmin_uns_op): Likewise.
gcc/testsuite/
2016-07-29 Jiong Wang <[email protected]>
* gcc.target/aarch64/simd/vminmaxnm_1.c: New.
--
Regards,
Jiong
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index dd2bc47..446d826 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -240,15 +240,16 @@
BUILTIN_VDQF (UNOP, reduc_smax_nan_scal_, 10)
BUILTIN_VDQF (UNOP, reduc_smin_nan_scal_, 10)
- /* Implemented by <maxmin><mode>3.
- smax variants map to fmaxnm,
- smax_nan variants map to fmax. */
- BUILTIN_VDQIF (BINOP, smax, 3)
- BUILTIN_VDQIF (BINOP, smin, 3)
+ /* Implemented by <maxmin><mode>3. */
+ BUILTIN_VDQ_BHSI (BINOP, smax, 3)
+ BUILTIN_VDQ_BHSI (BINOP, smin, 3)
BUILTIN_VDQ_BHSI (BINOP, umax, 3)
BUILTIN_VDQ_BHSI (BINOP, umin, 3)
+ /* Implemented by <maxmin_uns><mode>3. */
BUILTIN_VDQF (BINOP, smax_nan, 3)
BUILTIN_VDQF (BINOP, smin_nan, 3)
+ BUILTIN_VDQF (BINOP, fmax, 3)
+ BUILTIN_VDQF (BINOP, fmin, 3)
/* Implemented by aarch64_<maxmin_uns>p<mode>. */
BUILTIN_VDQ_BHSI (BINOP, smaxp, 0)
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 4c15312..283000e 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -17733,19 +17733,19 @@ vpminnms_f32 (float32x2_t a)
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmaxnm_f32 (float32x2_t __a, float32x2_t __b)
{
- return __builtin_aarch64_smaxv2sf (__a, __b);
+ return __builtin_aarch64_fmaxv2sf (__a, __b);
}
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmaxnmq_f32 (float32x4_t __a, float32x4_t __b)
{
- return __builtin_aarch64_smaxv4sf (__a, __b);
+ return __builtin_aarch64_fmaxv4sf (__a, __b);
}
__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vmaxnmq_f64 (float64x2_t __a, float64x2_t __b)
{
- return __builtin_aarch64_smaxv2df (__a, __b);
+ return __builtin_aarch64_fmaxv2df (__a, __b);
}
/* vmaxv */
@@ -17963,19 +17963,19 @@ vminq_u32 (uint32x4_t __a, uint32x4_t __b)
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vminnm_f32 (float32x2_t __a, float32x2_t __b)
{
- return __builtin_aarch64_sminv2sf (__a, __b);
+ return __builtin_aarch64_fminv2sf (__a, __b);
}
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vminnmq_f32 (float32x4_t __a, float32x4_t __b)
{
- return __builtin_aarch64_sminv4sf (__a, __b);
+ return __builtin_aarch64_fminv4sf (__a, __b);
}
__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vminnmq_f64 (float64x2_t __a, float64x2_t __b)
{
- return __builtin_aarch64_sminv2df (__a, __b);
+ return __builtin_aarch64_fminv2df (__a, __b);
}
/* vminv */
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 2efbfab..c7e1d0c 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -186,9 +186,11 @@
UNSPEC_ASHIFT_UNSIGNED ; Used in aarch64-simd.md.
UNSPEC_ABS ; Used in aarch64-simd.md.
UNSPEC_FMAX ; Used in aarch64-simd.md.
+ UNSPEC_FMAXNM ; Used in aarch64-simd.md.
UNSPEC_FMAXNMV ; Used in aarch64-simd.md.
UNSPEC_FMAXV ; Used in aarch64-simd.md.
UNSPEC_FMIN ; Used in aarch64-simd.md.
+ UNSPEC_FMINNM ; Used in aarch64-simd.md.
UNSPEC_FMINNMV ; Used in aarch64-simd.md.
UNSPEC_FMINV ; Used in aarch64-simd.md.
UNSPEC_FADDV ; Used in aarch64-simd.md.
@@ -876,7 +878,8 @@
(define_int_iterator ADDSUBHN2 [UNSPEC_ADDHN2 UNSPEC_RADDHN2
UNSPEC_SUBHN2 UNSPEC_RSUBHN2])
-(define_int_iterator FMAXMIN_UNS [UNSPEC_FMAX UNSPEC_FMIN])
+(define_int_iterator FMAXMIN_UNS [UNSPEC_FMAX UNSPEC_FMIN
+ UNSPEC_FMAXNM UNSPEC_FMINNM])
(define_int_iterator VQDMULH [UNSPEC_SQDMULH UNSPEC_SQRDMULH])
@@ -941,9 +944,11 @@
(UNSPEC_SMAXV "smax")
(UNSPEC_SMINV "smin")
(UNSPEC_FMAX "smax_nan")
+ (UNSPEC_FMAXNM "fmax")
(UNSPEC_FMAXNMV "smax")
(UNSPEC_FMAXV "smax_nan")
(UNSPEC_FMIN "smin_nan")
+ (UNSPEC_FMINNM "fmin")
(UNSPEC_FMINNMV "smin")
(UNSPEC_FMINV "smin_nan")])
@@ -952,9 +957,11 @@
(UNSPEC_SMAXV "smax")
(UNSPEC_SMINV "smin")
(UNSPEC_FMAX "fmax")
+ (UNSPEC_FMAXNM "fmaxnm")
(UNSPEC_FMAXNMV "fmaxnm")
(UNSPEC_FMAXV "fmax")
(UNSPEC_FMIN "fmin")
+ (UNSPEC_FMINNM "fminnm")
(UNSPEC_FMINNMV "fminnm")
(UNSPEC_FMINV "fmin")])
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vminmaxnm_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vminmaxnm_1.c
new file mode 100644
index 0000000..96608eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vminmaxnm_1.c
@@ -0,0 +1,82 @@
+/* Test the `v[min|max]nm{q}_f*' AArch64 SIMD intrinsic. */
+
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+#include "arm_neon.h"
+
+extern void abort ();
+
+#define CHECK(T, N, R, E) \
+ {\
+ int i = 0;\
+ for (; i < N; i++)\
+ if (* (T *) &R[i] != * (T *) &E[i])\
+ abort ();\
+ }
+
+int
+main (int argc, char **argv)
+{
+ float32x2_t f32x2_input1 = vdup_n_f32 (-1.0);
+ float32x2_t f32x2_input2 = vdup_n_f32 (0.0);
+ float32x2_t f32x2_exp_minnm = vdup_n_f32 (-1.0);
+ float32x2_t f32x2_exp_maxnm = vdup_n_f32 (0.0);
+ float32x2_t f32x2_ret_minnm = vminnm_f32 (f32x2_input1, f32x2_input2);
+ float32x2_t f32x2_ret_maxnm = vmaxnm_f32 (f32x2_input1, f32x2_input2);
+
+ CHECK (uint32_t, 2, f32x2_ret_minnm, f32x2_exp_minnm);
+ CHECK (uint32_t, 2, f32x2_ret_maxnm, f32x2_exp_maxnm);
+
+ f32x2_input1 = vdup_n_f32 (__builtin_nanf (""));
+ f32x2_input2 = vdup_n_f32 (1.0);
+ f32x2_exp_minnm = vdup_n_f32 (1.0);
+ f32x2_exp_maxnm = vdup_n_f32 (1.0);
+ f32x2_ret_minnm = vminnm_f32 (f32x2_input1, f32x2_input2);
+ f32x2_ret_maxnm = vmaxnm_f32 (f32x2_input1, f32x2_input2);
+
+ CHECK (uint32_t, 2, f32x2_ret_minnm, f32x2_exp_minnm);
+ CHECK (uint32_t, 2, f32x2_ret_maxnm, f32x2_exp_maxnm);
+
+ float32x4_t f32x4_input1 = vdupq_n_f32 (-1024.0);
+ float32x4_t f32x4_input2 = vdupq_n_f32 (77.0);
+ float32x4_t f32x4_exp_minnm = vdupq_n_f32 (-1024.0);
+ float32x4_t f32x4_exp_maxnm = vdupq_n_f32 (77.0);
+ float32x4_t f32x4_ret_minnm = vminnmq_f32 (f32x4_input1, f32x4_input2);
+ float32x4_t f32x4_ret_maxnm = vmaxnmq_f32 (f32x4_input1, f32x4_input2);
+
+ CHECK (uint32_t, 4, f32x4_ret_minnm, f32x4_exp_minnm);
+ CHECK (uint32_t, 4, f32x4_ret_maxnm, f32x4_exp_maxnm);
+
+ f32x4_input1 = vdupq_n_f32 (-__builtin_nanf (""));
+ f32x4_input2 = vdupq_n_f32 (-1.0);
+ f32x4_exp_minnm = vdupq_n_f32 (-1.0);
+ f32x4_exp_maxnm = vdupq_n_f32 (-1.0);
+ f32x4_ret_minnm = vminnmq_f32 (f32x4_input1, f32x4_input2);
+ f32x4_ret_maxnm = vmaxnmq_f32 (f32x4_input1, f32x4_input2);
+
+ CHECK (uint32_t, 4, f32x4_ret_minnm, f32x4_exp_minnm);
+ CHECK (uint32_t, 4, f32x4_ret_maxnm, f32x4_exp_maxnm);
+
+ float64x2_t f64x2_input1 = vdupq_n_f64 (1.23);
+ float64x2_t f64x2_input2 = vdupq_n_f64 (4.56);
+ float64x2_t f64x2_exp_minnm = vdupq_n_f64 (1.23);
+ float64x2_t f64x2_exp_maxnm = vdupq_n_f64 (4.56);
+ float64x2_t f64x2_ret_minnm = vminnmq_f64 (f64x2_input1, f64x2_input2);
+ float64x2_t f64x2_ret_maxnm = vmaxnmq_f64 (f64x2_input1, f64x2_input2);
+
+ CHECK (uint64_t, 2, f64x2_ret_minnm, f64x2_exp_minnm);
+ CHECK (uint64_t, 2, f64x2_ret_maxnm, f64x2_exp_maxnm);
+
+ f64x2_input1 = vdupq_n_f64 (-__builtin_nan (""));
+ f64x2_input2 = vdupq_n_f64 (1.0);
+ f64x2_exp_minnm = vdupq_n_f64 (1.0);
+ f64x2_exp_maxnm = vdupq_n_f64 (1.0);
+ f64x2_ret_minnm = vminnmq_f64 (f64x2_input1, f64x2_input2);
+ f64x2_ret_maxnm = vmaxnmq_f64 (f64x2_input1, f64x2_input2);
+
+ CHECK (uint64_t, 2, f64x2_ret_minnm, f64x2_exp_minnm);
+ CHECK (uint64_t, 2, f64x2_ret_maxnm, f64x2_exp_maxnm);
+
+ return 0;
+}