https://gcc.gnu.org/g:924d2596c9ad1deb0acb78c32762608838ea7db4

commit 924d2596c9ad1deb0acb78c32762608838ea7db4
Author: Michael Meissner <meiss...@linux.ibm.com>
Date:   Thu Oct 3 00:31:49 2024 -0400

    Add vector-pair.h runtime tests.
    
    2024-10-03  Michael Meissner  <meiss...@linux.ibm.com>
    
    gcc/testsuite
    
            * gcc.target/powerpc/vpair-3-not-p10.c: New test.
            * gcc.target/powerpc/vpair-3-p10.c: Likewise.
            * gcc.target/powerpc/vpair-3.h: New test include.
            * gcc.target/powerpc/vpair-4-not-p10.c: New test.
            * gcc.target/powerpc/vpair-4-p10.c: Likewise.
            * gcc.target/powerpc/vpair-4.h: New test include.

Diff:
---
 gcc/testsuite/gcc.target/powerpc/vpair-3-not-p10.c |  15 +
 gcc/testsuite/gcc.target/powerpc/vpair-3-p10.c     |  14 +
 gcc/testsuite/gcc.target/powerpc/vpair-3.h         | 435 +++++++++++++++++++++
 gcc/testsuite/gcc.target/powerpc/vpair-4-not-p10.c |  15 +
 gcc/testsuite/gcc.target/powerpc/vpair-4-p10.c     |  14 +
 gcc/testsuite/gcc.target/powerpc/vpair-4.h         | 435 +++++++++++++++++++++
 6 files changed, 928 insertions(+)

diff --git a/gcc/testsuite/gcc.target/powerpc/vpair-3-not-p10.c 
b/gcc/testsuite/gcc.target/powerpc/vpair-3-not-p10.c
new file mode 100644
index 000000000000..d1a1029417f0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vpair-3-not-p10.c
@@ -0,0 +1,15 @@
+/* { dg-do run { target { vsx_hw } } } */
+/* { dg-options "-mvsx -O2 -ffast-math -mno-mma" } */
+
+/*
+ * This test of the double (f64) vector pair functions in vector-pair.h is run
+ * on VSX systems when the load/store vector pair instructions are not
+ * available.
+ *
+ * The -ffast-math option is used to just use the hardware sqrt, min, and max
+ * instructions without calling into the library.
+ *
+ * The -mno-mma option disables GCC from enabling the __vector_pair type.
+ */
+
+#include "vpair-3.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/vpair-3-p10.c 
b/gcc/testsuite/gcc.target/powerpc/vpair-3-p10.c
new file mode 100644
index 000000000000..d78faf3fed47
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vpair-3-p10.c
@@ -0,0 +1,14 @@
+/* { dg-do run { target { power10_hw } } } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -ffast-math -mmma" } */
+
+/*
+ * This test of the double (f64) vector pair functions in vector-pair.h is run
+ * on VSX systems when the load/store vector pair instructions are available.
+ *
+ * The -ffast-math option is used to just use the hardware sqrt, min, and max
+ * instructions without calling into the library.
+ *
+ * The -mmma option makes sure GC enables the __vector_pair type.
+ */
+
+#include "vpair-3.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/vpair-3.h 
b/gcc/testsuite/gcc.target/powerpc/vpair-3.h
new file mode 100644
index 000000000000..e61ad23dd57e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vpair-3.h
@@ -0,0 +1,435 @@
+/* Common include file to test the vector pair double functions.  This is run
+   two times, once compiled for a non-power10 system that does not have the
+   vector pair load and store instructions, and once with power10 defaults that
+   has load/store vector pair.  */
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <vector-pair.h>
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+#ifndef NUM
+#define NUM    16
+#endif
+
+static double  result1[NUM];
+static double  result2[NUM];
+static double  in_a[NUM];
+static double  in_b[NUM];
+static double  in_c[NUM];
+
+/* vector pair tests.  */
+
+void
+vpair_abs (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double));
+
+  for (i = 0; i < num2; i++)
+    vpair_f64_abs (vr + i, va + i);
+}
+
+void
+vpair_nabs (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double));
+
+  for (i = 0; i < num2; i++)
+    vpair_f64_nabs (vr + i, va + i);
+}
+
+void
+vpair_neg (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double));
+
+  for (i = 0; i < num2; i++)
+    vpair_f64_neg (vr + i, va + i);
+}
+
+void
+vpair_sqrt (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double));
+
+  for (i = 0; i < num2; i++)
+    vpair_f64_sqrt (vr + i, va + i);
+}
+
+void
+vpair_add (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+  vector_pair_f64_t *vb = (vector_pair_f64_t *)b;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double));
+
+  for (i = 0; i < num2; i++)
+    vpair_f64_add (vr + i, va + i, vb + i);
+}
+
+void
+vpair_sub (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+  vector_pair_f64_t *vb = (vector_pair_f64_t *)b;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double));
+
+  for (i = 0; i < num2; i++)
+    vpair_f64_sub (vr + i, va + i, vb + i);
+}
+
+void
+vpair_mul (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+  vector_pair_f64_t *vb = (vector_pair_f64_t *)b;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double));
+
+  for (i = 0; i < num2; i++)
+    vpair_f64_mul (vr + i, va + i, vb + i);
+}
+
+void
+vpair_div (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+  vector_pair_f64_t *vb = (vector_pair_f64_t *)b;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double));
+
+  for (i = 0; i < num2; i++)
+    vpair_f64_div (vr + i, va + i, vb + i);
+}
+
+void
+vpair_min (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+  vector_pair_f64_t *vb = (vector_pair_f64_t *)b;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double));
+
+  for (i = 0; i < num2; i++)
+    vpair_f64_min (vr + i, va + i, vb + i);
+}
+
+void
+vpair_max (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+  vector_pair_f64_t *vb = (vector_pair_f64_t *)b;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double));
+
+  for (i = 0; i < num2; i++)
+    vpair_f64_max (vr + i, va + i, vb + i);
+}
+
+void
+vpair_fma (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+  vector_pair_f64_t *vb = (vector_pair_f64_t *)b;
+  vector_pair_f64_t *vc = (vector_pair_f64_t *)c;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double));
+
+  for (i = 0; i < num2; i++)
+    vpair_f64_fma (vr + i, va + i, vb + i, vc + i);
+}
+
+void
+vpair_fms (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+  vector_pair_f64_t *vb = (vector_pair_f64_t *)b;
+  vector_pair_f64_t *vc = (vector_pair_f64_t *)c;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double));
+
+  for (i = 0; i < num2; i++)
+    vpair_f64_fms (vr + i, va + i, vb + i, vc + i);
+}
+
+void
+vpair_nfma (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+  vector_pair_f64_t *vb = (vector_pair_f64_t *)b;
+  vector_pair_f64_t *vc = (vector_pair_f64_t *)c;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double));
+
+  for (i = 0; i < num2; i++)
+    vpair_f64_nfma (vr + i, va + i, vb + i, vc + i);
+}
+
+void
+vpair_nfms (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+  vector_pair_f64_t *vb = (vector_pair_f64_t *)b;
+  vector_pair_f64_t *vc = (vector_pair_f64_t *)c;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double));
+
+  for (i = 0; i < num2; i++)
+    vpair_f64_nfms (vr + i, va + i, vb + i, vc + i);
+}
+
+
+/* scalar tests.  */
+
+void
+scalar_abs (double *r, double *a, double *b, double *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = (a[i] < 0.0) ? -a[i] : a[i];
+}
+
+void
+scalar_nabs (double *r, double *a, double *b, double *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = (a[i] < 0.0) ? a[i] : -a[i];
+}
+
+void
+scalar_neg (double *r, double *a, double *b, double *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = -a[i];
+}
+
+void
+scalar_sqrt (double *r, double *a, double *b, double *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = __builtin_sqrt (a[i]);
+}
+
+void
+scalar_add (double *r, double *a, double *b, double *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = a[i] + b[i];
+}
+
+void
+scalar_sub (double *r, double *a, double *b, double *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = a[i] - b[i];
+}
+
+void
+scalar_mul (double *r, double *a, double *b, double *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = a[i] * b[i];
+}
+
+void
+scalar_div (double *r, double *a, double *b, double *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = a[i] / b[i];
+}
+
+void
+scalar_min (double *r, double *a, double *b, double *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = (a[i] < b[i]) ? a[i] : b[i];
+}
+
+void
+scalar_max (double *r, double *a, double *b, double *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = (a[i] > b[i]) ? a[i] : b[i];
+}
+
+void
+scalar_fma (double *r, double *a, double *b, double *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = __builtin_fma (a[i], b[i], c[i]);
+}
+
+void
+scalar_fms (double *r, double *a, double *b, double *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = __builtin_fma (a[i], b[i], -c[i]);
+}
+
+void
+scalar_nfma (double *r, double *a, double *b, double *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = - __builtin_fma (a[i], b[i], c[i]);
+}
+
+void
+scalar_nfms (double *r, double *a, double *b, double *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = - __builtin_fma (a[i], b[i], -c[i]);
+}
+
+
+/* Check results.  */
+void
+check (const char *name)
+{
+  size_t i;
+
+  for (i = 0; i < NUM; i++)
+    if (result1[i] != result2[i])
+      {
+#ifdef DEBUG
+       printf ("test #%ld failed, %g != %g, %s (%g, %g, %g).\n",
+               (long)i,
+               result1[i],
+               result2[i],
+               name,
+               in_a[i],
+               in_b[i],
+               in_c[i]);
+#endif
+       abort ();
+      }
+
+  return;
+}
+
+typedef void func_t (double *, double *, double *, double *, size_t);
+
+/* tests to run.  */
+struct
+{
+  func_t *vpair_test;
+  func_t *scalar_test;
+  const char *name;
+} tests[] = {
+  { vpair_abs,  scalar_abs,     "abs"  }, 
+  { vpair_nabs, scalar_nabs,    "nabs" }, 
+  { vpair_neg,  scalar_neg,     "neg"  }, 
+  { vpair_sqrt, scalar_sqrt,    "sqrt" }, 
+  { vpair_add,  scalar_add,     "add"  }, 
+  { vpair_sub,  scalar_sub,     "sub"  }, 
+  { vpair_mul,  scalar_mul,     "mul"  }, 
+  { vpair_div,  scalar_div,     "div"  }, 
+  { vpair_min,  scalar_min,     "min"  }, 
+  { vpair_max,  scalar_max,     "max"  }, 
+  { vpair_fma,  scalar_fma,     "fma"  }, 
+  { vpair_fms,  scalar_fms,     "fms"  }, 
+  { vpair_nfma, scalar_nfma,    "nfma" }, 
+  { vpair_nfms, scalar_nfms,    "nfms" }, 
+};
+
+/* Run tests.  */
+
+int
+main (void)
+{
+  size_t i;
+
+  /* Initialize the inputs.  */
+  for (i = 0; i < NUM; i++)
+    {
+      double d = (double)(i + 1);
+      in_a[i] = d * d;
+      in_b[i] = d;
+      in_c[i] = d + 2.0;
+    }
+
+#ifdef DEBUG
+  printf ("Start tests\n");
+#endif
+
+  /* Run the tests.  */
+  for (i = 0; i < sizeof (tests) / sizeof (tests[0]); i++)
+    {
+      tests[i].vpair_test  (result1, in_a, in_b, in_c, NUM);
+      tests[i].scalar_test (result2, in_a, in_b, in_c, NUM);
+      check (tests[i].name);
+    }
+
+#ifdef DEBUG
+  printf ("End tests\n");
+#endif
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/vpair-4-not-p10.c 
b/gcc/testsuite/gcc.target/powerpc/vpair-4-not-p10.c
new file mode 100644
index 000000000000..f57fbbf8b050
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vpair-4-not-p10.c
@@ -0,0 +1,15 @@
+/* { dg-do run { target { vsx_hw } } } */
+/* { dg-options "-mvsx -O2 -ffast-math -mno-mma" } */
+
+/*
+ * This test of the float (f32) vector pair functions in vector-pair.h is run
+ * on VSX systems when the load/store vector pair instructions are not
+ * available.
+ *
+ * The -ffast-math option is used to just use the hardware sqrt, min, and max
+ * instructions without calling into the library.
+ *
+ * The -mno-mma option disables GCC from enabling the __vector_pair type.
+ */
+
+#include "vpair-4.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/vpair-4-p10.c 
b/gcc/testsuite/gcc.target/powerpc/vpair-4-p10.c
new file mode 100644
index 000000000000..12291202c163
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vpair-4-p10.c
@@ -0,0 +1,14 @@
+/* { dg-do run { target { power10_hw } } } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -ffast-math -mmma" } */
+
+/*
+ * This test of the float (f32) vector pair functions in vector-pair.h is run
+ * on VSX systems when the load/store vector pair instructions are available.
+ *
+ * The -ffast-math option is used to just use the hardware sqrt, min, and max
+ * instructions without calling into the library.
+ *
+ * The -mmma option makes sure GC enables the __vector_pair type.
+ */
+
+#include "vpair-4.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/vpair-4.h 
b/gcc/testsuite/gcc.target/powerpc/vpair-4.h
new file mode 100644
index 000000000000..1a80cf5e639a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vpair-4.h
@@ -0,0 +1,435 @@
+/* Common include file to test the vector pair float functions.  This is run
+   two times, once compiled for a non-power10 system that does not have the
+   vector pair load and store instructions, and once with power10 defaults that
+   has load/store vector pair.  */
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <vector-pair.h>
+
+#ifdef DEBUG
+#include <stdio.h>
+#endif
+
+#ifndef NUM
+#define NUM    16
+#endif
+
+static float   result1[NUM];
+static float   result2[NUM];
+static float   in_a[NUM];
+static float   in_b[NUM];
+static float   in_c[NUM];
+
+/* vector pair tests.  */
+
+void
+vpair_abs (float *r, float *a, float *b, float *c, size_t num)
+{
+  vector_pair_f32_t *vr = (vector_pair_f32_t *)r;
+  vector_pair_f32_t *va = (vector_pair_f32_t *)a;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float));
+
+  for (i = 0; i < num2; i++)
+    vpair_f32_abs (vr + i, va + i);
+}
+
+void
+vpair_nabs (float *r, float *a, float *b, float *c, size_t num)
+{
+  vector_pair_f32_t *vr = (vector_pair_f32_t *)r;
+  vector_pair_f32_t *va = (vector_pair_f32_t *)a;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float));
+
+  for (i = 0; i < num2; i++)
+    vpair_f32_nabs (vr + i, va + i);
+}
+
+void
+vpair_neg (float *r, float *a, float *b, float *c, size_t num)
+{
+  vector_pair_f32_t *vr = (vector_pair_f32_t *)r;
+  vector_pair_f32_t *va = (vector_pair_f32_t *)a;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float));
+
+  for (i = 0; i < num2; i++)
+    vpair_f32_neg (vr + i, va + i);
+}
+
+void
+vpair_sqrt (float *r, float *a, float *b, float *c, size_t num)
+{
+  vector_pair_f32_t *vr = (vector_pair_f32_t *)r;
+  vector_pair_f32_t *va = (vector_pair_f32_t *)a;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float));
+
+  for (i = 0; i < num2; i++)
+    vpair_f32_sqrt (vr + i, va + i);
+}
+
+void
+vpair_add (float *r, float *a, float *b, float *c, size_t num)
+{
+  vector_pair_f32_t *vr = (vector_pair_f32_t *)r;
+  vector_pair_f32_t *va = (vector_pair_f32_t *)a;
+  vector_pair_f32_t *vb = (vector_pair_f32_t *)b;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float));
+
+  for (i = 0; i < num2; i++)
+    vpair_f32_add (vr + i, va + i, vb + i);
+}
+
+void
+vpair_sub (float *r, float *a, float *b, float *c, size_t num)
+{
+  vector_pair_f32_t *vr = (vector_pair_f32_t *)r;
+  vector_pair_f32_t *va = (vector_pair_f32_t *)a;
+  vector_pair_f32_t *vb = (vector_pair_f32_t *)b;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float));
+
+  for (i = 0; i < num2; i++)
+    vpair_f32_sub (vr + i, va + i, vb + i);
+}
+
+void
+vpair_mul (float *r, float *a, float *b, float *c, size_t num)
+{
+  vector_pair_f32_t *vr = (vector_pair_f32_t *)r;
+  vector_pair_f32_t *va = (vector_pair_f32_t *)a;
+  vector_pair_f32_t *vb = (vector_pair_f32_t *)b;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float));
+
+  for (i = 0; i < num2; i++)
+    vpair_f32_mul (vr + i, va + i, vb + i);
+}
+
+void
+vpair_div (float *r, float *a, float *b, float *c, size_t num)
+{
+  vector_pair_f32_t *vr = (vector_pair_f32_t *)r;
+  vector_pair_f32_t *va = (vector_pair_f32_t *)a;
+  vector_pair_f32_t *vb = (vector_pair_f32_t *)b;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float));
+
+  for (i = 0; i < num2; i++)
+    vpair_f32_div (vr + i, va + i, vb + i);
+}
+
+void
+vpair_min (float *r, float *a, float *b, float *c, size_t num)
+{
+  vector_pair_f32_t *vr = (vector_pair_f32_t *)r;
+  vector_pair_f32_t *va = (vector_pair_f32_t *)a;
+  vector_pair_f32_t *vb = (vector_pair_f32_t *)b;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float));
+
+  for (i = 0; i < num2; i++)
+    vpair_f32_min (vr + i, va + i, vb + i);
+}
+
+void
+vpair_max (float *r, float *a, float *b, float *c, size_t num)
+{
+  vector_pair_f32_t *vr = (vector_pair_f32_t *)r;
+  vector_pair_f32_t *va = (vector_pair_f32_t *)a;
+  vector_pair_f32_t *vb = (vector_pair_f32_t *)b;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float));
+
+  for (i = 0; i < num2; i++)
+    vpair_f32_max (vr + i, va + i, vb + i);
+}
+
+void
+vpair_fma (float *r, float *a, float *b, float *c, size_t num)
+{
+  vector_pair_f32_t *vr = (vector_pair_f32_t *)r;
+  vector_pair_f32_t *va = (vector_pair_f32_t *)a;
+  vector_pair_f32_t *vb = (vector_pair_f32_t *)b;
+  vector_pair_f32_t *vc = (vector_pair_f32_t *)c;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float));
+
+  for (i = 0; i < num2; i++)
+    vpair_f32_fma (vr + i, va + i, vb + i, vc + i);
+}
+
+void
+vpair_fms (float *r, float *a, float *b, float *c, size_t num)
+{
+  vector_pair_f32_t *vr = (vector_pair_f32_t *)r;
+  vector_pair_f32_t *va = (vector_pair_f32_t *)a;
+  vector_pair_f32_t *vb = (vector_pair_f32_t *)b;
+  vector_pair_f32_t *vc = (vector_pair_f32_t *)c;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float));
+
+  for (i = 0; i < num2; i++)
+    vpair_f32_fms (vr + i, va + i, vb + i, vc + i);
+}
+
+void
+vpair_nfma (float *r, float *a, float *b, float *c, size_t num)
+{
+  vector_pair_f32_t *vr = (vector_pair_f32_t *)r;
+  vector_pair_f32_t *va = (vector_pair_f32_t *)a;
+  vector_pair_f32_t *vb = (vector_pair_f32_t *)b;
+  vector_pair_f32_t *vc = (vector_pair_f32_t *)c;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float));
+
+  for (i = 0; i < num2; i++)
+    vpair_f32_nfma (vr + i, va + i, vb + i, vc + i);
+}
+
+void
+vpair_nfms (float *r, float *a, float *b, float *c, size_t num)
+{
+  vector_pair_f32_t *vr = (vector_pair_f32_t *)r;
+  vector_pair_f32_t *va = (vector_pair_f32_t *)a;
+  vector_pair_f32_t *vb = (vector_pair_f32_t *)b;
+  vector_pair_f32_t *vc = (vector_pair_f32_t *)c;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float));
+
+  for (i = 0; i < num2; i++)
+    vpair_f32_nfms (vr + i, va + i, vb + i, vc + i);
+}
+
+
+/* scalar tests.  */
+
+void
+scalar_abs (float *r, float *a, float *b, float *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = (a[i] < 0.0) ? -a[i] : a[i];
+}
+
+void
+scalar_nabs (float *r, float *a, float *b, float *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = (a[i] < 0.0) ? a[i] : -a[i];
+}
+
+void
+scalar_neg (float *r, float *a, float *b, float *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = -a[i];
+}
+
+void
+scalar_sqrt (float *r, float *a, float *b, float *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = __builtin_sqrt (a[i]);
+}
+
+void
+scalar_add (float *r, float *a, float *b, float *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = a[i] + b[i];
+}
+
+void
+scalar_sub (float *r, float *a, float *b, float *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = a[i] - b[i];
+}
+
+void
+scalar_mul (float *r, float *a, float *b, float *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = a[i] * b[i];
+}
+
+void
+scalar_div (float *r, float *a, float *b, float *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = a[i] / b[i];
+}
+
+void
+scalar_min (float *r, float *a, float *b, float *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = (a[i] < b[i]) ? a[i] : b[i];
+}
+
+void
+scalar_max (float *r, float *a, float *b, float *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = (a[i] > b[i]) ? a[i] : b[i];
+}
+
+void
+scalar_fma (float *r, float *a, float *b, float *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = __builtin_fma (a[i], b[i], c[i]);
+}
+
+void
+scalar_fms (float *r, float *a, float *b, float *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = __builtin_fma (a[i], b[i], -c[i]);
+}
+
+void
+scalar_nfma (float *r, float *a, float *b, float *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = - __builtin_fma (a[i], b[i], c[i]);
+}
+
+void
+scalar_nfms (float *r, float *a, float *b, float *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i++)
+    r[i] = - __builtin_fma (a[i], b[i], -c[i]);
+}
+
+
+/* Check results.  */
+void
+check (const char *name)
+{
+  size_t i;
+
+  for (i = 0; i < NUM; i++)
+    if (result1[i] != result2[i])
+      {
+#ifdef DEBUG
+       printf ("test #%ld failed, %g != %g, %s (%g, %g, %g).\n",
+               (long)i,
+               result1[i],
+               result2[i],
+               name,
+               in_a[i],
+               in_b[i],
+               in_c[i]);
+#endif
+       abort ();
+      }
+
+  return;
+}
+
+typedef void func_t (float *, float *, float *, float *, size_t);
+
+/* tests to run.  */
+struct
+{
+  func_t *vpair_test;
+  func_t *scalar_test;
+  const char *name;
+} tests[] = {
+  { vpair_abs,  scalar_abs,     "abs"  }, 
+  { vpair_nabs, scalar_nabs,    "nabs" }, 
+  { vpair_neg,  scalar_neg,     "neg"  }, 
+  { vpair_sqrt, scalar_sqrt,    "sqrt" }, 
+  { vpair_add,  scalar_add,     "add"  }, 
+  { vpair_sub,  scalar_sub,     "sub"  }, 
+  { vpair_mul,  scalar_mul,     "mul"  }, 
+  { vpair_div,  scalar_div,     "div"  }, 
+  { vpair_min,  scalar_min,     "min"  }, 
+  { vpair_max,  scalar_max,     "max"  }, 
+  { vpair_fma,  scalar_fma,     "fma"  }, 
+  { vpair_fms,  scalar_fms,     "fms"  }, 
+  { vpair_nfma, scalar_nfma,    "nfma" }, 
+  { vpair_nfms, scalar_nfms,    "nfms" }, 
+};
+
+/* Run tests.  */
+
+int
+main (void)
+{
+  size_t i;
+
+  /* Initialize the inputs.  */
+  for (i = 0; i < NUM; i++)
+    {
+      float f = (float)(i + 1);
+      in_a[i] = f * f;
+      in_b[i] = f;
+      in_c[i] = f + 2.0f;
+    }
+
+#ifdef DEBUG
+  printf ("Start tests\n");
+#endif
+
+  /* Run the tests.  */
+  for (i = 0; i < sizeof (tests) / sizeof (tests[0]); i++)
+    {
+      tests[i].vpair_test  (result1, in_a, in_b, in_c, NUM);
+      tests[i].scalar_test (result2, in_a, in_b, in_c, NUM);
+      check (tests[i].name);
+    }
+
+#ifdef DEBUG
+  printf ("End tests\n");
+#endif
+
+  return 0;
+}

Reply via email to