Jiong Wang writes: > On 07/07/16 10:34, James Greenhalgh wrote: >> >> To make backporting easier, could you please write a very simple >> standalone test that exposes this bug, and submit this patch with just >> that simple test? I've already OKed the functional part of this patch, and >> I'm happy to pre-approve a simple testcase. >> >> With that committed to trunk, this needs to go to all active release >> branches please. > > Committed attached patch to trunk as r238166, fmax/fmin pattern were > introduced by [1] which is available since gcc 6, so backported to > gcc 6 branch as r238167.
Here is the gcc 5 backport patch, it's slightly different from gcc 6 backport patch as fmin/fmax are not introduced yet. OK to backport? gcc/ 2016-07-29 Jiong Wang <jiong.w...@arm.com> * config/aarch64/aarch64-simd-builtins.def (smax, smin): Don't register float variants. (fmax, fmin): New builtins for VDQF modes. * config/aarch64/arm_neon.h (vmaxnm_f32): Use __builtin_aarch64_fmaxv2sf. (vmaxnmq_f32): Likewise. (vmaxnmq_f64): Likewise. (vminnm_f32): Likewise. (vminnmq_f32): Likewise. (vminnmq_f64): Likewise. * config/aarch64/iterators.md (UNSPEC_FMAXNM, UNSPEC_FMINNM): New. (FMAXMIN_UNS): Support UNSPEC_FMAXNM and UNSPEC_FMINNM. (maxmin_uns, maxmin_uns_op): Likewise. gcc/testsuite/ 2016-07-29 Jiong Wang <jiong.w...@arm.com> * gcc.target/aarch64/simd/vminmaxnm_1.c: New. -- Regards, Jiong
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index dd2bc47..446d826 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -240,15 +240,16 @@ BUILTIN_VDQF (UNOP, reduc_smax_nan_scal_, 10) BUILTIN_VDQF (UNOP, reduc_smin_nan_scal_, 10) - /* Implemented by <maxmin><mode>3. - smax variants map to fmaxnm, - smax_nan variants map to fmax. */ - BUILTIN_VDQIF (BINOP, smax, 3) - BUILTIN_VDQIF (BINOP, smin, 3) + /* Implemented by <maxmin><mode>3. */ + BUILTIN_VDQ_BHSI (BINOP, smax, 3) + BUILTIN_VDQ_BHSI (BINOP, smin, 3) BUILTIN_VDQ_BHSI (BINOP, umax, 3) BUILTIN_VDQ_BHSI (BINOP, umin, 3) + /* Implemented by <maxmin_uns><mode>3. */ BUILTIN_VDQF (BINOP, smax_nan, 3) BUILTIN_VDQF (BINOP, smin_nan, 3) + BUILTIN_VDQF (BINOP, fmax, 3) + BUILTIN_VDQF (BINOP, fmin, 3) /* Implemented by aarch64_<maxmin_uns>p<mode>. */ BUILTIN_VDQ_BHSI (BINOP, smaxp, 0) diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 4c15312..283000e 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -17733,19 +17733,19 @@ vpminnms_f32 (float32x2_t a) __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vmaxnm_f32 (float32x2_t __a, float32x2_t __b) { - return __builtin_aarch64_smaxv2sf (__a, __b); + return __builtin_aarch64_fmaxv2sf (__a, __b); } __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vmaxnmq_f32 (float32x4_t __a, float32x4_t __b) { - return __builtin_aarch64_smaxv4sf (__a, __b); + return __builtin_aarch64_fmaxv4sf (__a, __b); } __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) vmaxnmq_f64 (float64x2_t __a, float64x2_t __b) { - return __builtin_aarch64_smaxv2df (__a, __b); + return __builtin_aarch64_fmaxv2df (__a, __b); } /* vmaxv */ @@ -17963,19 +17963,19 @@ vminq_u32 (uint32x4_t __a, uint32x4_t __b) __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vminnm_f32 (float32x2_t __a, float32x2_t __b) { - return __builtin_aarch64_sminv2sf (__a, __b); + return __builtin_aarch64_fminv2sf (__a, __b); } __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vminnmq_f32 (float32x4_t __a, float32x4_t __b) { - return __builtin_aarch64_sminv4sf (__a, __b); + return __builtin_aarch64_fminv4sf (__a, __b); } __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) vminnmq_f64 (float64x2_t __a, float64x2_t __b) { - return __builtin_aarch64_sminv2df (__a, __b); + return __builtin_aarch64_fminv2df (__a, __b); } /* vminv */ diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 2efbfab..c7e1d0c 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -186,9 +186,11 @@ UNSPEC_ASHIFT_UNSIGNED ; Used in aarch64-simd.md. UNSPEC_ABS ; Used in aarch64-simd.md. UNSPEC_FMAX ; Used in aarch64-simd.md. + UNSPEC_FMAXNM ; Used in aarch64-simd.md. UNSPEC_FMAXNMV ; Used in aarch64-simd.md. UNSPEC_FMAXV ; Used in aarch64-simd.md. UNSPEC_FMIN ; Used in aarch64-simd.md. + UNSPEC_FMINNM ; Used in aarch64-simd.md. UNSPEC_FMINNMV ; Used in aarch64-simd.md. UNSPEC_FMINV ; Used in aarch64-simd.md. UNSPEC_FADDV ; Used in aarch64-simd.md. @@ -876,7 +878,8 @@ (define_int_iterator ADDSUBHN2 [UNSPEC_ADDHN2 UNSPEC_RADDHN2 UNSPEC_SUBHN2 UNSPEC_RSUBHN2]) -(define_int_iterator FMAXMIN_UNS [UNSPEC_FMAX UNSPEC_FMIN]) +(define_int_iterator FMAXMIN_UNS [UNSPEC_FMAX UNSPEC_FMIN + UNSPEC_FMAXNM UNSPEC_FMINNM]) (define_int_iterator VQDMULH [UNSPEC_SQDMULH UNSPEC_SQRDMULH]) @@ -941,9 +944,11 @@ (UNSPEC_SMAXV "smax") (UNSPEC_SMINV "smin") (UNSPEC_FMAX "smax_nan") + (UNSPEC_FMAXNM "fmax") (UNSPEC_FMAXNMV "smax") (UNSPEC_FMAXV "smax_nan") (UNSPEC_FMIN "smin_nan") + (UNSPEC_FMINNM "fmin") (UNSPEC_FMINNMV "smin") (UNSPEC_FMINV "smin_nan")]) @@ -952,9 +957,11 @@ (UNSPEC_SMAXV "smax") (UNSPEC_SMINV "smin") (UNSPEC_FMAX "fmax") + (UNSPEC_FMAXNM "fmaxnm") (UNSPEC_FMAXNMV "fmaxnm") (UNSPEC_FMAXV "fmax") (UNSPEC_FMIN "fmin") + (UNSPEC_FMINNM "fminnm") (UNSPEC_FMINNMV "fminnm") (UNSPEC_FMINV "fmin")]) diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vminmaxnm_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vminmaxnm_1.c new file mode 100644 index 0000000..96608eb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vminmaxnm_1.c @@ -0,0 +1,82 @@ +/* Test the `v[min|max]nm{q}_f*' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +#include "arm_neon.h" + +extern void abort (); + +#define CHECK(T, N, R, E) \ + {\ + int i = 0;\ + for (; i < N; i++)\ + if (* (T *) &R[i] != * (T *) &E[i])\ + abort ();\ + } + +int +main (int argc, char **argv) +{ + float32x2_t f32x2_input1 = vdup_n_f32 (-1.0); + float32x2_t f32x2_input2 = vdup_n_f32 (0.0); + float32x2_t f32x2_exp_minnm = vdup_n_f32 (-1.0); + float32x2_t f32x2_exp_maxnm = vdup_n_f32 (0.0); + float32x2_t f32x2_ret_minnm = vminnm_f32 (f32x2_input1, f32x2_input2); + float32x2_t f32x2_ret_maxnm = vmaxnm_f32 (f32x2_input1, f32x2_input2); + + CHECK (uint32_t, 2, f32x2_ret_minnm, f32x2_exp_minnm); + CHECK (uint32_t, 2, f32x2_ret_maxnm, f32x2_exp_maxnm); + + f32x2_input1 = vdup_n_f32 (__builtin_nanf ("")); + f32x2_input2 = vdup_n_f32 (1.0); + f32x2_exp_minnm = vdup_n_f32 (1.0); + f32x2_exp_maxnm = vdup_n_f32 (1.0); + f32x2_ret_minnm = vminnm_f32 (f32x2_input1, f32x2_input2); + f32x2_ret_maxnm = vmaxnm_f32 (f32x2_input1, f32x2_input2); + + CHECK (uint32_t, 2, f32x2_ret_minnm, f32x2_exp_minnm); + CHECK (uint32_t, 2, f32x2_ret_maxnm, f32x2_exp_maxnm); + + float32x4_t f32x4_input1 = vdupq_n_f32 (-1024.0); + float32x4_t f32x4_input2 = vdupq_n_f32 (77.0); + float32x4_t f32x4_exp_minnm = vdupq_n_f32 (-1024.0); + float32x4_t f32x4_exp_maxnm = vdupq_n_f32 (77.0); + float32x4_t f32x4_ret_minnm = vminnmq_f32 (f32x4_input1, f32x4_input2); + float32x4_t f32x4_ret_maxnm = vmaxnmq_f32 (f32x4_input1, f32x4_input2); + + CHECK (uint32_t, 4, f32x4_ret_minnm, f32x4_exp_minnm); + CHECK (uint32_t, 4, f32x4_ret_maxnm, f32x4_exp_maxnm); + + f32x4_input1 = vdupq_n_f32 (-__builtin_nanf ("")); + f32x4_input2 = vdupq_n_f32 (-1.0); + f32x4_exp_minnm = vdupq_n_f32 (-1.0); + f32x4_exp_maxnm = vdupq_n_f32 (-1.0); + f32x4_ret_minnm = vminnmq_f32 (f32x4_input1, f32x4_input2); + f32x4_ret_maxnm = vmaxnmq_f32 (f32x4_input1, f32x4_input2); + + CHECK (uint32_t, 4, f32x4_ret_minnm, f32x4_exp_minnm); + CHECK (uint32_t, 4, f32x4_ret_maxnm, f32x4_exp_maxnm); + + float64x2_t f64x2_input1 = vdupq_n_f64 (1.23); + float64x2_t f64x2_input2 = vdupq_n_f64 (4.56); + float64x2_t f64x2_exp_minnm = vdupq_n_f64 (1.23); + float64x2_t f64x2_exp_maxnm = vdupq_n_f64 (4.56); + float64x2_t f64x2_ret_minnm = vminnmq_f64 (f64x2_input1, f64x2_input2); + float64x2_t f64x2_ret_maxnm = vmaxnmq_f64 (f64x2_input1, f64x2_input2); + + CHECK (uint64_t, 2, f64x2_ret_minnm, f64x2_exp_minnm); + CHECK (uint64_t, 2, f64x2_ret_maxnm, f64x2_exp_maxnm); + + f64x2_input1 = vdupq_n_f64 (-__builtin_nan ("")); + f64x2_input2 = vdupq_n_f64 (1.0); + f64x2_exp_minnm = vdupq_n_f64 (1.0); + f64x2_exp_maxnm = vdupq_n_f64 (1.0); + f64x2_ret_minnm = vminnmq_f64 (f64x2_input1, f64x2_input2); + f64x2_ret_maxnm = vmaxnmq_f64 (f64x2_input1, f64x2_input2); + + CHECK (uint64_t, 2, f64x2_ret_minnm, f64x2_exp_minnm); + CHECK (uint64_t, 2, f64x2_ret_maxnm, f64x2_exp_maxnm); + + return 0; +}