[AArch32][NEON] Implementing vmaxnmQ_ST and vminnmQ_ST intrinsics.

Bilyan Borisov Fri, 08 Jan 2016 00:57:44 -0800

This patch implements the vmaxnmQ_ST and vminnmQ_ST intrinsics. The
current builtin registration code is deficient since it can't access
standard pattern names, to which vmaxnmQ_ST and vminnmQ_ST map
directly. Thus, to enable the vectoriser to have access to these
intrinsics, we implement them using builtin functions, which we
expand to the proper standard pattern using a define_expand.


This patch also implements the __ARM_FEATURE_NUMERIC_MAXMIN macro,
which is defined when __ARM_ARCH >= 8, and which enables the
intrinsics.

Cross tested on arm-none-eabi, armeb-none-eabi,
arm-none-linux-gnueabi, and arm-none-linux-gnueabihf. Bootstrapped
and tested on arm-none-linux-gnueabihf.

---

gcc/

2015-XX-XX  Bilyan Borisov  <bilyan.bori...@arm.com>

        * config/arm/arm-c.c (arm_cpu_builtins): New macro definition.
        * config/arm/arm_neon.h (vmaxnm_f32): New intrinsinc.
        (vmaxnmq_f32): Likewise.
        (vminnm_f32): Likewise.
        (vminnmq_f32): Likewise.
        * config/arm/arm_neon_builtins.def (vmaxnm): New builtin.
        (vminnm): Likewise.
        * config/arm/neon.md (neon_<fmaxmin_op><mode>, VCVTF): New
        expander.

gcc/testsuite/

2015-XX-XX  Bilyan Borisov  <bilyan.bori...@arm.com>

        * gcc.target/arm/simd/vmaxnm_f32_1.c: New.
        * gcc.target/arm/simd/vmaxnmq_f32_1.c: Likewise.
        * gcc.target/arm/simd/vminnm_f32_1.c: Likewise.
        * gcc.target/arm/simd/vminnmq_f32_1.c: Likewise.

diff --git a/gcc/config/arm/arm-c.c b/gcc/config/arm/arm-c.c
index 7dee28ec52df68f8c7a60fe66e1b049fed39c1c0..7b63bdcf86c079288611f79ed89d6540b348fe82 100644
--- a/gcc/config/arm/arm-c.c
+++ b/gcc/config/arm/arm-c.c
@@ -83,6 +83,9 @@ arm_cpu_builtins (struct cpp_reader* pfile)
 		      ((TARGET_ARM_ARCH >= 5 && !TARGET_THUMB)
 		       || TARGET_ARM_ARCH_ISA_THUMB >=2));
 
+  def_or_undef_macro (pfile, "__ARM_FEATURE_NUMERIC_MAXMIN",
+		      TARGET_ARM_ARCH >= 8);
+
   def_or_undef_macro (pfile, "__ARM_FEATURE_SIMD32", TARGET_INT_SIMD);
 
   builtin_define_with_int_value ("__ARM_SIZEOF_MINIMAL_ENUM",
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index 0a33d21f2fcf8a1074fb62e89f4418295d446db5..2e28621cd3a3bf6682ce5353cfb1fd5d8d6c55ad 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -2889,6 +2889,34 @@ vmaxq_f32 (float32x4_t __a, float32x4_t __b)
   return (float32x4_t)__builtin_neon_vmaxfv4sf (__a, __b);
 }
 
+#pragma GCC push_options
+#pragma GCC target ("fpu=neon-fp-armv8")
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vmaxnm_f32 (float32x2_t a, float32x2_t b)
+{
+  return (float32x2_t)__builtin_neon_vmaxnmv2sf (a, b);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vmaxnmq_f32 (float32x4_t a, float32x4_t b)
+{
+  return (float32x4_t)__builtin_neon_vmaxnmv4sf (a, b);
+}
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vminnm_f32 (float32x2_t a, float32x2_t b)
+{
+  return (float32x2_t)__builtin_neon_vminnmv2sf (a, b);
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vminnmq_f32 (float32x4_t a, float32x4_t b)
+{
+  return (float32x4_t)__builtin_neon_vminnmv4sf (a, b);
+}
+#pragma GCC pop_options
+
+
 __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
 vmaxq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def
index 0b719df760747af7642bd14ab14a9b2144d43359..1d3b6e9b6a08a3cf3b0d6f76bf340208919c9b13 100644
--- a/gcc/config/arm/arm_neon_builtins.def
+++ b/gcc/config/arm/arm_neon_builtins.def
@@ -126,6 +126,9 @@ VAR6 (BINOP, vmins, v8qi, v4hi, v2si, v16qi, v8hi, v4si)
 VAR6 (BINOP, vminu, v8qi, v4hi, v2si, v16qi, v8hi, v4si)
 VAR2 (BINOP, vminf, v2sf, v4sf)
 
+VAR2 (BINOP, vmaxnm, v2sf, v4sf)
+VAR2 (BINOP, vminnm, v2sf, v4sf)
+
 VAR3 (BINOP, vpmaxs, v8qi, v4hi, v2si)
 VAR3 (BINOP, vpmaxu, v8qi, v4hi, v2si)
 VAR1 (BINOP, vpmaxf, v2sf)
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 94c63fd3dbc071291844fbe7732435465fbc0ada..b745199fe25d7afd7d93598e12e74d8efb4863fe 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -2354,6 +2354,18 @@
   [(set_attr "type" "neon_fp_minmax_s<q>")]
 )
 
+;; Expander for v<maxmin>nm intrinsics.
+(define_expand "neon_<fmaxmin_op><mode>"
+  [(unspec:VCVTF [(match_operand:VCVTF 0 "s_register_operand" "")
+   (match_operand:VCVTF 1 "s_register_operand" "")
+   (match_operand:VCVTF 2 "s_register_operand" "")]
+		  VMAXMINFNM)]
+  "TARGET_NEON && TARGET_FPU_ARMV8"
+{
+  emit_insn (gen_<fmaxmin><mode>3 (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
 ;; Vector forms for the IEEE-754 fmax()/fmin() functions
 (define_insn "<fmaxmin><mode>3"
   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
diff --git a/gcc/testsuite/gcc.target/arm/simd/vmaxnm_f32_1.c b/gcc/testsuite/gcc.target/arm/simd/vmaxnm_f32_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..c3a9f3671b36a1491ed6d33dc894a3b4b559c4ae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/vmaxnm_f32_1.c
@@ -0,0 +1,159 @@
+/* Test the `vmaxnmf32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_v8_neon_ok } */
+/* { dg-options "-save-temps -O3 -march=armv8-a" } */
+/* { dg-add-options arm_v8_neon } */
+
+#include "arm_neon.h"
+
+extern void abort ();
+
+void __attribute__ ((noinline))
+test_vmaxnm_f32__regular_input1 ()
+{
+  float32_t a1[] = {1,2};
+  float32_t b1[] = {3,4};
+  float32x2_t a = vld1_f32 (a1);
+  float32x2_t b = vld1_f32 (b1);
+  float32x2_t c = vmaxnm_f32 (a, b);
+  float32_t actual[2];
+  vst1_f32 (actual, c);
+
+  for (int i = 0; i < 2; ++i)
+    if (actual[i] != b1[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vmaxnm_f32__regular_input2 ()
+{
+  float32_t a1[] = {3,2};
+  float32_t b1[] = {1,4};
+  float32_t e[] = {3,4};
+  float32x2_t a = vld1_f32 (a1);
+  float32x2_t b = vld1_f32 (b1);
+  float32x2_t c = vmaxnm_f32 (a, b);
+  float32_t actual[2];
+  vst1_f32 (actual, c);
+
+  for (int i = 0; i < 2; ++i)
+    if (actual[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vmaxnm_f32__quiet_NaN_one_arg ()
+{
+  /* When given a quiet NaN, vmaxnm returns the other operand.
+     In this test case we have NaNs in only one operand.  */
+  float32_t n = __builtin_nanf ("");
+  float32_t a1[] = {1,2};
+  float32_t b1[] = {n,n};
+  float32_t e[] = {1,2};
+  float32x2_t a = vld1_f32 (a1);
+  float32x2_t b = vld1_f32 (b1);
+  float32x2_t c = vmaxnm_f32 (a, b);
+  float32_t actual[2];
+  vst1_f32 (actual, c);
+
+  for (int i = 0; i < 2; ++i)
+    if (actual[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vmaxnm_f32__quiet_NaN_both_args ()
+{
+  /* When given a quiet NaN, vmaxnm returns the other operand.
+     In this test case we have NaNs in both operands.  */
+  float32_t n = __builtin_nanf ("");
+  float32_t a1[] = {n,2};
+  float32_t b1[] = {1,n};
+  float32_t e[] = {1,2};
+  float32x2_t a = vld1_f32 (a1);
+  float32x2_t b = vld1_f32 (b1);
+  float32x2_t c = vmaxnm_f32 (a, b);
+  float32_t actual[2];
+  vst1_f32 (actual, c);
+
+  for (int i = 0; i < 2; ++i)
+    if (actual[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vmaxnm_f32__zero_both_args ()
+{
+  /* For 0 and -0, vmaxnm returns 0.  Since 0 == -0, check sign bit.  */
+  float32_t a1[] = {0.0, 0.0};
+  float32_t b1[] = {-0.0, -0.0};
+  float32_t e[] = {0.0, 0.0};
+
+  float32x2_t a = vld1_f32 (a1);
+  float32x2_t b = vld1_f32 (b1);
+  float32x2_t c = vmaxnm_f32 (a, b);
+
+  float32_t actual1[2];
+  vst1_f32 (actual1, c);
+
+  for (int i = 0; i < 2; ++i)
+    if (actual1[i] != e[i] || __builtin_signbit (actual1[i]) != 0)
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vmaxnm_f32__inf_both_args ()
+{
+  /* The max of inf and inf is inf.  The max of -inf and -inf is -inf.  */
+  float32_t inf = __builtin_huge_valf ();
+  float32_t a1[] = {inf, -inf};
+  float32_t b1[] = {inf, -inf};
+  float32_t e[] = {inf, -inf};
+
+  float32x2_t a = vld1_f32 (a1);
+  float32x2_t b = vld1_f32 (b1);
+  float32x2_t c = vmaxnm_f32 (a, b);
+
+  float32_t actual1[2];
+  vst1_f32 (actual1, c);
+
+  for (int i = 0; i < 2; ++i)
+    if (actual1[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vmaxnm_f32__two_quiet_NaNs_both_args ()
+{
+  /* When given 2 NaNs, return a NaN.  Since a NaN is not equal to anything,
+     not even another NaN, use __builtin_isnan () to check.  */
+  float32_t n = __builtin_nanf ("");
+  float32_t a1[] = {n,n};
+  float32_t b1[] = {n,n};
+  float32_t e[] = {n,n};
+  float32x2_t a = vld1_f32 (a1);
+  float32x2_t b = vld1_f32 (b1);
+  float32x2_t c = vmaxnm_f32 (a, b);
+  float32_t actual[2];
+  vst1_f32 (actual, c);
+
+  for (int i = 0; i < 2; ++i)
+    if (!__builtin_isnan (actual[i]))
+      abort ();
+}
+
+int
+main ()
+{
+  test_vmaxnm_f32__regular_input1 ();
+  test_vmaxnm_f32__regular_input2 ();
+  test_vmaxnm_f32__quiet_NaN_one_arg ();
+  test_vmaxnm_f32__quiet_NaN_both_args ();
+  test_vmaxnm_f32__zero_both_args ();
+  test_vmaxnm_f32__inf_both_args ();
+  test_vmaxnm_f32__two_quiet_NaNs_both_args ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "vmaxnm\.f32\t\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+\n" 7 } } */
diff --git a/gcc/testsuite/gcc.target/arm/simd/vmaxnmq_f32_1.c b/gcc/testsuite/gcc.target/arm/simd/vmaxnmq_f32_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..80c4e9aa18810fea318b865e8c4e503238e826f8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/vmaxnmq_f32_1.c
@@ -0,0 +1,160 @@
+/* Test the `vmaxnmqf32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_v8_neon_ok } */
+/* { dg-options "-save-temps -O3 -march=armv8-a" } */
+/* { dg-add-options arm_v8_neon } */
+
+#include "arm_neon.h"
+
+extern void abort ();
+
+void __attribute__ ((noinline))
+test_vmaxnmq_f32__regular_input1 ()
+{
+  float32_t a1[] = {1,2,5,6};
+  float32_t b1[] = {3,4,7,8};
+  float32x4_t a = vld1q_f32 (a1);
+  float32x4_t b = vld1q_f32 (b1);
+  float32x4_t c = vmaxnmq_f32 (a, b);
+  float32_t actual[4];
+  vst1q_f32 (actual, c);
+
+  for (int i = 0; i < 4; ++i)
+    if (actual[i] != b1[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vmaxnmq_f32__regular_input2 ()
+{
+  float32_t a1[] = {3,2,7,6};
+  float32_t b1[] = {1,4,5,8};
+  float32_t e[] = {3,4,7,8};
+  float32x4_t a = vld1q_f32 (a1);
+  float32x4_t b = vld1q_f32 (b1);
+  float32x4_t c = vmaxnmq_f32 (a, b);
+  float32_t actual[4];
+  vst1q_f32 (actual, c);
+
+  for (int i = 0; i < 4; ++i)
+    if (actual[i] != e[i])
+      abort ();
+}
+
+
+void __attribute__ ((noinline))
+test_vmaxnmq_f32__quiet_NaN_one_arg ()
+{
+  /* When given a quiet NaN, vmaxnmq returns the other operand.
+     In this test case we have NaNs in only one operand.  */
+  float32_t n = __builtin_nanf ("");
+  float32_t a1[] = {1,2,3,4};
+  float32_t b1[] = {n,n,n,n};
+  float32_t e[] = {1,2,3,4};
+  float32x4_t a = vld1q_f32 (a1);
+  float32x4_t b = vld1q_f32 (b1);
+  float32x4_t c = vmaxnmq_f32 (a, b);
+  float32_t actual[4];
+  vst1q_f32 (actual, c);
+
+  for (int i = 0; i < 4; ++i)
+    if (actual[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vmaxnmq_f32__quiet_NaN_both_args ()
+{
+  /* When given a quiet NaN, vmaxnmq returns the other operand.
+     In this test case we have NaNs in both operands.  */
+  float32_t n = __builtin_nanf ("");
+  float32_t a1[] = {n,2,n,4};
+  float32_t b1[] = {1,n,3,n};
+  float32_t e[] = {1,2,3,4};
+  float32x4_t a = vld1q_f32 (a1);
+  float32x4_t b = vld1q_f32 (b1);
+  float32x4_t c = vmaxnmq_f32 (a, b);
+  float32_t actual[4];
+  vst1q_f32 (actual, c);
+
+  for (int i = 0; i < 4; ++i)
+    if (actual[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vmaxnmq_f32__zero_both_args ()
+{
+  /* For 0 and -0, vmaxnmq returns 0.  Since 0 == -0, check sign bit.  */
+  float32_t a1[] = {0.0, 0.0, -0.0, -0.0};
+  float32_t b1[] = {-0.0, -0.0, 0.0, 0.0};
+  float32_t e[] = {0.0, 0.0, 0.0, 0.0};
+
+  float32x4_t a = vld1q_f32 (a1);
+  float32x4_t b = vld1q_f32 (b1);
+  float32x4_t c = vmaxnmq_f32 (a, b);
+
+  float32_t actual1[4];
+  vst1q_f32 (actual1, c);
+
+  for (int i = 0; i < 4; ++i)
+    if (actual1[i] != e[i] || __builtin_signbit (actual1[i]) != 0)
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vmaxnmq_f32__inf_both_args ()
+{
+  /* The max of inf and inf is inf.  The max of -inf and -inf is -inf.  */
+  float32_t inf = __builtin_huge_valf ();
+  float32_t a1[] = {inf, -inf, inf, inf};
+  float32_t b1[] = {inf, -inf, -inf, -inf};
+  float32_t e[] = {inf, -inf, inf, inf};
+
+  float32x4_t a = vld1q_f32 (a1);
+  float32x4_t b = vld1q_f32 (b1);
+  float32x4_t c = vmaxnmq_f32 (a, b);
+
+  float32_t actual1[4];
+  vst1q_f32 (actual1, c);
+
+  for (int i = 0; i < 4; ++i)
+    if (actual1[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vmaxnmq_f32__two_quiet_NaNs_both_args ()
+{
+  /* When given 2 NaNs, return a NaN.  Since a NaN is not equal to anything,
+     not even another NaN, use __builtin_isnan () to check.  */
+  float32_t n = __builtin_nanf ("");
+  float32_t a1[] = {n,n,n,n};
+  float32_t b1[] = {n,n,n,n};
+  float32_t e[] = {n,n};
+  float32x4_t a = vld1q_f32 (a1);
+  float32x4_t b = vld1q_f32 (b1);
+  float32x4_t c = vmaxnmq_f32 (a, b);
+  float32_t actual[4];
+  vst1q_f32 (actual, c);
+
+  for (int i = 0; i < 4; ++i)
+    if (!__builtin_isnan (actual[i]))
+      abort ();
+}
+
+int
+main ()
+{
+  test_vmaxnmq_f32__regular_input1 ();
+  test_vmaxnmq_f32__regular_input2 ();
+  test_vmaxnmq_f32__quiet_NaN_one_arg ();
+  test_vmaxnmq_f32__quiet_NaN_both_args ();
+  test_vmaxnmq_f32__zero_both_args ();
+  test_vmaxnmq_f32__inf_both_args ();
+  test_vmaxnmq_f32__two_quiet_NaNs_both_args ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "vmaxnm\.f32\t\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+\n" 7 } } */
diff --git a/gcc/testsuite/gcc.target/arm/simd/vminnm_f32_1.c b/gcc/testsuite/gcc.target/arm/simd/vminnm_f32_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..9a1d097911748108591a11f3bd7fbf3e44adebaa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/vminnm_f32_1.c
@@ -0,0 +1,159 @@
+/* Test the `vminnmf32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_v8_neon_ok } */
+/* { dg-options "-save-temps -O3 -march=armv8-a" } */
+/* { dg-add-options arm_v8_neon } */
+
+#include "arm_neon.h"
+
+extern void abort ();
+
+void __attribute__ ((noinline))
+test_vminnm_f32__regular_input1 ()
+{
+  float32_t a1[] = {1,2};
+  float32_t b1[] = {3,4};
+  float32x2_t a = vld1_f32 (a1);
+  float32x2_t b = vld1_f32 (b1);
+  float32x2_t c = vminnm_f32 (a, b);
+  float32_t actual[2];
+  vst1_f32 (actual, c);
+
+  for (int i = 0; i < 2; ++i)
+    if (actual[i] != a1[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vminnm_f32__regular_input2 ()
+{
+  float32_t a1[] = {3,2};
+  float32_t b1[] = {1,4};
+  float32_t e[] = {1,2};
+  float32x2_t a = vld1_f32 (a1);
+  float32x2_t b = vld1_f32 (b1);
+  float32x2_t c = vminnm_f32 (a, b);
+  float32_t actual[2];
+  vst1_f32 (actual, c);
+
+  for (int i = 0; i < 2; ++i)
+    if (actual[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vminnm_f32__quiet_NaN_one_arg ()
+{
+  /* When given a quiet NaN, vminnm returns the other operand.
+     In this test case we have NaNs in only one operand.  */
+  float32_t n = __builtin_nanf ("");
+  float32_t a1[] = {1,2};
+  float32_t b1[] = {n,n};
+  float32_t e[] = {1,2};
+  float32x2_t a = vld1_f32 (a1);
+  float32x2_t b = vld1_f32 (b1);
+  float32x2_t c = vminnm_f32 (a, b);
+  float32_t actual[2];
+  vst1_f32 (actual, c);
+
+  for (int i = 0; i < 2; ++i)
+    if (actual[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vminnm_f32__quiet_NaN_both_args ()
+{
+  /* When given a quiet NaN, vminnm returns the other operand.
+     In this test case we have NaNs in both operands.  */
+  float32_t n = __builtin_nanf ("");
+  float32_t a1[] = {n,2};
+  float32_t b1[] = {1,n};
+  float32_t e[] = {1,2};
+  float32x2_t a = vld1_f32 (a1);
+  float32x2_t b = vld1_f32 (b1);
+  float32x2_t c = vminnm_f32 (a, b);
+  float32_t actual[2];
+  vst1_f32 (actual, c);
+
+  for (int i = 0; i < 2; ++i)
+    if (actual[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vminnm_f32__zero_both_args ()
+{
+  /* For 0 and -0, vminnm returns -0.  Since 0 == -0, check sign bit.  */
+  float32_t a1[] = {0.0,0.0};
+  float32_t b1[] = {-0.0, -0.0};
+  float32_t e[] = {-0.0, -0.0};
+
+  float32x2_t a = vld1_f32 (a1);
+  float32x2_t b = vld1_f32 (b1);
+  float32x2_t c = vminnm_f32 (a, b);
+
+  float32_t actual1[2];
+  vst1_f32 (actual1, c);
+
+  for (int i = 0; i < 2; ++i)
+    if (actual1[i] != e[i] || __builtin_signbit (actual1[i]) == 0)
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vminnm_f32__inf_both_args ()
+{
+  /* The min of inf and inf is inf.  The min of -inf and -inf is -inf.  */
+  float32_t inf = __builtin_huge_valf ();
+  float32_t a1[] = {inf, -inf};
+  float32_t b1[] = {inf, -inf};
+  float32_t e[] = {inf, -inf};
+
+  float32x2_t a = vld1_f32 (a1);
+  float32x2_t b = vld1_f32 (b1);
+  float32x2_t c = vminnm_f32 (a, b);
+
+  float32_t actual1[2];
+  vst1_f32 (actual1, c);
+
+  for (int i = 0; i < 2; ++i)
+    if (actual1[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vminnm_f32__two_quiet_NaNs_both_args ()
+{
+  /* When given 2 NaNs, return a NaN.  Since a NaN is not equal to anything,
+     not even another NaN, use __builtin_isnan () to check.  */
+  float32_t n = __builtin_nanf ("");
+  float32_t a1[] = {n,n};
+  float32_t b1[] = {n,n};
+  float32_t e[] = {n,n};
+  float32x2_t a = vld1_f32 (a1);
+  float32x2_t b = vld1_f32 (b1);
+  float32x2_t c = vminnm_f32 (a, b);
+  float32_t actual[2];
+  vst1_f32 (actual, c);
+
+  for (int i = 0; i < 2; ++i)
+    if (!__builtin_isnan (actual[i]))
+      abort ();
+}
+
+int
+main ()
+{
+  test_vminnm_f32__regular_input1 ();
+  test_vminnm_f32__regular_input2 ();
+  test_vminnm_f32__quiet_NaN_one_arg ();
+  test_vminnm_f32__quiet_NaN_both_args ();
+  test_vminnm_f32__zero_both_args ();
+  test_vminnm_f32__inf_both_args ();
+  test_vminnm_f32__two_quiet_NaNs_both_args ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "vminnm\.f32\t\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+\n" 7 } } */
diff --git a/gcc/testsuite/gcc.target/arm/simd/vminnmq_f32_1.c b/gcc/testsuite/gcc.target/arm/simd/vminnmq_f32_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..a778abecd857e9ea83d249e0ab52886209030aa4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/vminnmq_f32_1.c
@@ -0,0 +1,159 @@
+/* Test the `vminnmqf32' ARM Neon intrinsic.  */
+
+/* { dg-do run } */
+/* { dg-require-effective-target arm_v8_neon_ok } */
+/* { dg-options "-save-temps -O3 -march=armv8-a" } */
+/* { dg-add-options arm_v8_neon } */
+
+#include "arm_neon.h"
+
+extern void abort ();
+
+void __attribute__ ((noinline))
+test_vminnmq_f32__regular_input1 ()
+{
+  float32_t a1[] = {1,2,5,6};
+  float32_t b1[] = {3,4,7,8};
+  float32x4_t a = vld1q_f32 (a1);
+  float32x4_t b = vld1q_f32 (b1);
+  float32x4_t c = vminnmq_f32 (a, b);
+  float32_t actual[4];
+  vst1q_f32 (actual, c);
+
+  for (int i = 0; i < 4; ++i)
+    if (actual[i] != a1[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vminnmq_f32__regular_input2 ()
+{
+  float32_t a1[] = {3,2,7,6};
+  float32_t b1[] = {1,4,5,8};
+  float32_t e[] = {1,2,5,6};
+  float32x4_t a = vld1q_f32 (a1);
+  float32x4_t b = vld1q_f32 (b1);
+  float32x4_t c = vminnmq_f32 (a, b);
+  float32_t actual[4];
+  vst1q_f32 (actual, c);
+
+  for (int i = 0; i < 4; ++i)
+    if (actual[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vminnmq_f32__quiet_NaN_one_arg ()
+{
+  /* When given a quiet NaN, vminnmq returns the other operand.
+     In this test case we have NaNs in only one operand.  */
+  float32_t n = __builtin_nanf ("");
+  float32_t a1[] = {1,2,3,4};
+  float32_t b1[] = {n,n,n,n};
+  float32_t e[] = {1,2,3,4};
+  float32x4_t a = vld1q_f32 (a1);
+  float32x4_t b = vld1q_f32 (b1);
+  float32x4_t c = vminnmq_f32 (a, b);
+  float32_t actual[4];
+  vst1q_f32 (actual, c);
+
+  for (int i = 0; i < 4; ++i)
+    if (actual[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vminnmq_f32__quiet_NaN_both_args ()
+{
+  /* When given a quiet NaN, vminnmq returns the other operand.
+     In this test case we have NaNs in both operands.  */
+  float32_t n = __builtin_nanf ("");
+  float32_t a1[] = {n,2,n,4};
+  float32_t b1[] = {1,n,3,n};
+  float32_t e[] = {1,2,3,4};
+  float32x4_t a = vld1q_f32 (a1);
+  float32x4_t b = vld1q_f32 (b1);
+  float32x4_t c = vminnmq_f32 (a, b);
+  float32_t actual[4];
+  vst1q_f32 (actual, c);
+
+  for (int i = 0; i < 4; ++i)
+    if (actual[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vminnmq_f32__zero_both_args ()
+{
+  /* For 0 and -0, vminnmq returns -0.  Since 0 == -0, check sign bit.  */
+  float32_t a1[] = {0.0, 0.0, -0.0, -0.0};
+  float32_t b1[] = {-0.0, -0.0, 0.0, 0.0};
+  float32_t e[] = {-0.0, -0.0, -0.0, -0.0};
+
+  float32x4_t a = vld1q_f32 (a1);
+  float32x4_t b = vld1q_f32 (b1);
+  float32x4_t c = vminnmq_f32 (a, b);
+
+  float32_t actual1[4];
+  vst1q_f32 (actual1, c);
+
+  for (int i = 0; i < 4; ++i)
+    if (actual1[i] != e[i] || __builtin_signbit (actual1[i]) == 0)
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vminnmq_f32__inf_both_args ()
+{
+  /* The min of inf and inf is inf.  The min of -inf and -inf is -inf.  */
+  float32_t inf = __builtin_huge_valf ();
+  float32_t a1[] = {inf, -inf, inf, inf};
+  float32_t b1[] = {inf, -inf, -inf, -inf};
+  float32_t e[] = {inf, -inf, -inf, -inf};
+
+  float32x4_t a = vld1q_f32 (a1);
+  float32x4_t b = vld1q_f32 (b1);
+  float32x4_t c = vminnmq_f32 (a, b);
+
+  float32_t actual1[4];
+  vst1q_f32 (actual1, c);
+
+  for (int i = 0; i < 4; ++i)
+    if (actual1[i] != e[i])
+      abort ();
+}
+
+void __attribute__ ((noinline))
+test_vminnmq_f32__two_quiet_NaNs_both_args ()
+{
+  /* When given 2 NaNs, return a NaN.  Since a NaN is not equal to anything,
+     not even another NaN, use __builtin_isnan () to check.  */
+  float32_t n = __builtin_nanf ("");
+  float32_t a1[] = {n,n,n,n};
+  float32_t b1[] = {n,n,n,n};
+  float32_t e[] = {n,n};
+  float32x4_t a = vld1q_f32 (a1);
+  float32x4_t b = vld1q_f32 (b1);
+  float32x4_t c = vminnmq_f32 (a, b);
+  float32_t actual[4];
+  vst1q_f32 (actual, c);
+
+  for (int i = 0; i < 4; ++i)
+    if (!__builtin_isnan (actual[i]))
+      abort ();
+}
+
+int
+main ()
+{
+  test_vminnmq_f32__regular_input1 ();
+  test_vminnmq_f32__regular_input2 ();
+  test_vminnmq_f32__quiet_NaN_one_arg ();
+  test_vminnmq_f32__quiet_NaN_both_args ();
+  test_vminnmq_f32__zero_both_args ();
+  test_vminnmq_f32__inf_both_args ();
+  test_vminnmq_f32__two_quiet_NaNs_both_args ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler-times "vminnm\.f32\t\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+\n" 7 } } */

[AArch32][NEON] Implementing vmaxnmQ_ST and vminnmQ_ST intrinsics.

Reply via email to