https://gcc.gnu.org/g:924d2596c9ad1deb0acb78c32762608838ea7db4
commit 924d2596c9ad1deb0acb78c32762608838ea7db4 Author: Michael Meissner <meiss...@linux.ibm.com> Date: Thu Oct 3 00:31:49 2024 -0400 Add vector-pair.h runtime tests. 2024-10-03 Michael Meissner <meiss...@linux.ibm.com> gcc/testsuite * gcc.target/powerpc/vpair-3-not-p10.c: New test. * gcc.target/powerpc/vpair-3-p10.c: Likewise. * gcc.target/powerpc/vpair-3.h: New test include. * gcc.target/powerpc/vpair-4-not-p10.c: New test. * gcc.target/powerpc/vpair-4-p10.c: Likewise. * gcc.target/powerpc/vpair-4.h: New test include. Diff: --- gcc/testsuite/gcc.target/powerpc/vpair-3-not-p10.c | 15 + gcc/testsuite/gcc.target/powerpc/vpair-3-p10.c | 14 + gcc/testsuite/gcc.target/powerpc/vpair-3.h | 435 +++++++++++++++++++++ gcc/testsuite/gcc.target/powerpc/vpair-4-not-p10.c | 15 + gcc/testsuite/gcc.target/powerpc/vpair-4-p10.c | 14 + gcc/testsuite/gcc.target/powerpc/vpair-4.h | 435 +++++++++++++++++++++ 6 files changed, 928 insertions(+) diff --git a/gcc/testsuite/gcc.target/powerpc/vpair-3-not-p10.c b/gcc/testsuite/gcc.target/powerpc/vpair-3-not-p10.c new file mode 100644 index 000000000000..d1a1029417f0 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vpair-3-not-p10.c @@ -0,0 +1,15 @@ +/* { dg-do run { target { vsx_hw } } } */ +/* { dg-options "-mvsx -O2 -ffast-math -mno-mma" } */ + +/* + * This test of the double (f64) vector pair functions in vector-pair.h is run + * on VSX systems when the load/store vector pair instructions are not + * available. + * + * The -ffast-math option is used to just use the hardware sqrt, min, and max + * instructions without calling into the library. + * + * The -mno-mma option disables GCC from enabling the __vector_pair type. + */ + +#include "vpair-3.h" diff --git a/gcc/testsuite/gcc.target/powerpc/vpair-3-p10.c b/gcc/testsuite/gcc.target/powerpc/vpair-3-p10.c new file mode 100644 index 000000000000..d78faf3fed47 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vpair-3-p10.c @@ -0,0 +1,14 @@ +/* { dg-do run { target { power10_hw } } } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2 -ffast-math -mmma" } */ + +/* + * This test of the double (f64) vector pair functions in vector-pair.h is run + * on VSX systems when the load/store vector pair instructions are available. + * + * The -ffast-math option is used to just use the hardware sqrt, min, and max + * instructions without calling into the library. + * + * The -mmma option makes sure GC enables the __vector_pair type. + */ + +#include "vpair-3.h" diff --git a/gcc/testsuite/gcc.target/powerpc/vpair-3.h b/gcc/testsuite/gcc.target/powerpc/vpair-3.h new file mode 100644 index 000000000000..e61ad23dd57e --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vpair-3.h @@ -0,0 +1,435 @@ +/* Common include file to test the vector pair double functions. This is run + two times, once compiled for a non-power10 system that does not have the + vector pair load and store instructions, and once with power10 defaults that + has load/store vector pair. */ + +#include <stddef.h> +#include <stdlib.h> +#include <vector-pair.h> + +#ifdef DEBUG +#include <stdio.h> +#endif + +#ifndef NUM +#define NUM 16 +#endif + +static double result1[NUM]; +static double result2[NUM]; +static double in_a[NUM]; +static double in_b[NUM]; +static double in_c[NUM]; + +/* vector pair tests. */ + +void +vpair_abs (double *r, double *a, double *b, double *c, size_t num) +{ + vector_pair_f64_t *vr = (vector_pair_f64_t *)r; + vector_pair_f64_t *va = (vector_pair_f64_t *)a; + + size_t i; + size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double)); + + for (i = 0; i < num2; i++) + vpair_f64_abs (vr + i, va + i); +} + +void +vpair_nabs (double *r, double *a, double *b, double *c, size_t num) +{ + vector_pair_f64_t *vr = (vector_pair_f64_t *)r; + vector_pair_f64_t *va = (vector_pair_f64_t *)a; + + size_t i; + size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double)); + + for (i = 0; i < num2; i++) + vpair_f64_nabs (vr + i, va + i); +} + +void +vpair_neg (double *r, double *a, double *b, double *c, size_t num) +{ + vector_pair_f64_t *vr = (vector_pair_f64_t *)r; + vector_pair_f64_t *va = (vector_pair_f64_t *)a; + + size_t i; + size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double)); + + for (i = 0; i < num2; i++) + vpair_f64_neg (vr + i, va + i); +} + +void +vpair_sqrt (double *r, double *a, double *b, double *c, size_t num) +{ + vector_pair_f64_t *vr = (vector_pair_f64_t *)r; + vector_pair_f64_t *va = (vector_pair_f64_t *)a; + + size_t i; + size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double)); + + for (i = 0; i < num2; i++) + vpair_f64_sqrt (vr + i, va + i); +} + +void +vpair_add (double *r, double *a, double *b, double *c, size_t num) +{ + vector_pair_f64_t *vr = (vector_pair_f64_t *)r; + vector_pair_f64_t *va = (vector_pair_f64_t *)a; + vector_pair_f64_t *vb = (vector_pair_f64_t *)b; + + size_t i; + size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double)); + + for (i = 0; i < num2; i++) + vpair_f64_add (vr + i, va + i, vb + i); +} + +void +vpair_sub (double *r, double *a, double *b, double *c, size_t num) +{ + vector_pair_f64_t *vr = (vector_pair_f64_t *)r; + vector_pair_f64_t *va = (vector_pair_f64_t *)a; + vector_pair_f64_t *vb = (vector_pair_f64_t *)b; + + size_t i; + size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double)); + + for (i = 0; i < num2; i++) + vpair_f64_sub (vr + i, va + i, vb + i); +} + +void +vpair_mul (double *r, double *a, double *b, double *c, size_t num) +{ + vector_pair_f64_t *vr = (vector_pair_f64_t *)r; + vector_pair_f64_t *va = (vector_pair_f64_t *)a; + vector_pair_f64_t *vb = (vector_pair_f64_t *)b; + + size_t i; + size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double)); + + for (i = 0; i < num2; i++) + vpair_f64_mul (vr + i, va + i, vb + i); +} + +void +vpair_div (double *r, double *a, double *b, double *c, size_t num) +{ + vector_pair_f64_t *vr = (vector_pair_f64_t *)r; + vector_pair_f64_t *va = (vector_pair_f64_t *)a; + vector_pair_f64_t *vb = (vector_pair_f64_t *)b; + + size_t i; + size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double)); + + for (i = 0; i < num2; i++) + vpair_f64_div (vr + i, va + i, vb + i); +} + +void +vpair_min (double *r, double *a, double *b, double *c, size_t num) +{ + vector_pair_f64_t *vr = (vector_pair_f64_t *)r; + vector_pair_f64_t *va = (vector_pair_f64_t *)a; + vector_pair_f64_t *vb = (vector_pair_f64_t *)b; + + size_t i; + size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double)); + + for (i = 0; i < num2; i++) + vpair_f64_min (vr + i, va + i, vb + i); +} + +void +vpair_max (double *r, double *a, double *b, double *c, size_t num) +{ + vector_pair_f64_t *vr = (vector_pair_f64_t *)r; + vector_pair_f64_t *va = (vector_pair_f64_t *)a; + vector_pair_f64_t *vb = (vector_pair_f64_t *)b; + + size_t i; + size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double)); + + for (i = 0; i < num2; i++) + vpair_f64_max (vr + i, va + i, vb + i); +} + +void +vpair_fma (double *r, double *a, double *b, double *c, size_t num) +{ + vector_pair_f64_t *vr = (vector_pair_f64_t *)r; + vector_pair_f64_t *va = (vector_pair_f64_t *)a; + vector_pair_f64_t *vb = (vector_pair_f64_t *)b; + vector_pair_f64_t *vc = (vector_pair_f64_t *)c; + + size_t i; + size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double)); + + for (i = 0; i < num2; i++) + vpair_f64_fma (vr + i, va + i, vb + i, vc + i); +} + +void +vpair_fms (double *r, double *a, double *b, double *c, size_t num) +{ + vector_pair_f64_t *vr = (vector_pair_f64_t *)r; + vector_pair_f64_t *va = (vector_pair_f64_t *)a; + vector_pair_f64_t *vb = (vector_pair_f64_t *)b; + vector_pair_f64_t *vc = (vector_pair_f64_t *)c; + + size_t i; + size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double)); + + for (i = 0; i < num2; i++) + vpair_f64_fms (vr + i, va + i, vb + i, vc + i); +} + +void +vpair_nfma (double *r, double *a, double *b, double *c, size_t num) +{ + vector_pair_f64_t *vr = (vector_pair_f64_t *)r; + vector_pair_f64_t *va = (vector_pair_f64_t *)a; + vector_pair_f64_t *vb = (vector_pair_f64_t *)b; + vector_pair_f64_t *vc = (vector_pair_f64_t *)c; + + size_t i; + size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double)); + + for (i = 0; i < num2; i++) + vpair_f64_nfma (vr + i, va + i, vb + i, vc + i); +} + +void +vpair_nfms (double *r, double *a, double *b, double *c, size_t num) +{ + vector_pair_f64_t *vr = (vector_pair_f64_t *)r; + vector_pair_f64_t *va = (vector_pair_f64_t *)a; + vector_pair_f64_t *vb = (vector_pair_f64_t *)b; + vector_pair_f64_t *vc = (vector_pair_f64_t *)c; + + size_t i; + size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double)); + + for (i = 0; i < num2; i++) + vpair_f64_nfms (vr + i, va + i, vb + i, vc + i); +} + + +/* scalar tests. */ + +void +scalar_abs (double *r, double *a, double *b, double *c, size_t num) +{ + size_t i; + + for (i = 0; i < num; i++) + r[i] = (a[i] < 0.0) ? -a[i] : a[i]; +} + +void +scalar_nabs (double *r, double *a, double *b, double *c, size_t num) +{ + size_t i; + + for (i = 0; i < num; i++) + r[i] = (a[i] < 0.0) ? a[i] : -a[i]; +} + +void +scalar_neg (double *r, double *a, double *b, double *c, size_t num) +{ + size_t i; + + for (i = 0; i < num; i++) + r[i] = -a[i]; +} + +void +scalar_sqrt (double *r, double *a, double *b, double *c, size_t num) +{ + size_t i; + + for (i = 0; i < num; i++) + r[i] = __builtin_sqrt (a[i]); +} + +void +scalar_add (double *r, double *a, double *b, double *c, size_t num) +{ + size_t i; + + for (i = 0; i < num; i++) + r[i] = a[i] + b[i]; +} + +void +scalar_sub (double *r, double *a, double *b, double *c, size_t num) +{ + size_t i; + + for (i = 0; i < num; i++) + r[i] = a[i] - b[i]; +} + +void +scalar_mul (double *r, double *a, double *b, double *c, size_t num) +{ + size_t i; + + for (i = 0; i < num; i++) + r[i] = a[i] * b[i]; +} + +void +scalar_div (double *r, double *a, double *b, double *c, size_t num) +{ + size_t i; + + for (i = 0; i < num; i++) + r[i] = a[i] / b[i]; +} + +void +scalar_min (double *r, double *a, double *b, double *c, size_t num) +{ + size_t i; + + for (i = 0; i < num; i++) + r[i] = (a[i] < b[i]) ? a[i] : b[i]; +} + +void +scalar_max (double *r, double *a, double *b, double *c, size_t num) +{ + size_t i; + + for (i = 0; i < num; i++) + r[i] = (a[i] > b[i]) ? a[i] : b[i]; +} + +void +scalar_fma (double *r, double *a, double *b, double *c, size_t num) +{ + size_t i; + + for (i = 0; i < num; i++) + r[i] = __builtin_fma (a[i], b[i], c[i]); +} + +void +scalar_fms (double *r, double *a, double *b, double *c, size_t num) +{ + size_t i; + + for (i = 0; i < num; i++) + r[i] = __builtin_fma (a[i], b[i], -c[i]); +} + +void +scalar_nfma (double *r, double *a, double *b, double *c, size_t num) +{ + size_t i; + + for (i = 0; i < num; i++) + r[i] = - __builtin_fma (a[i], b[i], c[i]); +} + +void +scalar_nfms (double *r, double *a, double *b, double *c, size_t num) +{ + size_t i; + + for (i = 0; i < num; i++) + r[i] = - __builtin_fma (a[i], b[i], -c[i]); +} + + +/* Check results. */ +void +check (const char *name) +{ + size_t i; + + for (i = 0; i < NUM; i++) + if (result1[i] != result2[i]) + { +#ifdef DEBUG + printf ("test #%ld failed, %g != %g, %s (%g, %g, %g).\n", + (long)i, + result1[i], + result2[i], + name, + in_a[i], + in_b[i], + in_c[i]); +#endif + abort (); + } + + return; +} + +typedef void func_t (double *, double *, double *, double *, size_t); + +/* tests to run. */ +struct +{ + func_t *vpair_test; + func_t *scalar_test; + const char *name; +} tests[] = { + { vpair_abs, scalar_abs, "abs" }, + { vpair_nabs, scalar_nabs, "nabs" }, + { vpair_neg, scalar_neg, "neg" }, + { vpair_sqrt, scalar_sqrt, "sqrt" }, + { vpair_add, scalar_add, "add" }, + { vpair_sub, scalar_sub, "sub" }, + { vpair_mul, scalar_mul, "mul" }, + { vpair_div, scalar_div, "div" }, + { vpair_min, scalar_min, "min" }, + { vpair_max, scalar_max, "max" }, + { vpair_fma, scalar_fma, "fma" }, + { vpair_fms, scalar_fms, "fms" }, + { vpair_nfma, scalar_nfma, "nfma" }, + { vpair_nfms, scalar_nfms, "nfms" }, +}; + +/* Run tests. */ + +int +main (void) +{ + size_t i; + + /* Initialize the inputs. */ + for (i = 0; i < NUM; i++) + { + double d = (double)(i + 1); + in_a[i] = d * d; + in_b[i] = d; + in_c[i] = d + 2.0; + } + +#ifdef DEBUG + printf ("Start tests\n"); +#endif + + /* Run the tests. */ + for (i = 0; i < sizeof (tests) / sizeof (tests[0]); i++) + { + tests[i].vpair_test (result1, in_a, in_b, in_c, NUM); + tests[i].scalar_test (result2, in_a, in_b, in_c, NUM); + check (tests[i].name); + } + +#ifdef DEBUG + printf ("End tests\n"); +#endif + + return 0; +} diff --git a/gcc/testsuite/gcc.target/powerpc/vpair-4-not-p10.c b/gcc/testsuite/gcc.target/powerpc/vpair-4-not-p10.c new file mode 100644 index 000000000000..f57fbbf8b050 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vpair-4-not-p10.c @@ -0,0 +1,15 @@ +/* { dg-do run { target { vsx_hw } } } */ +/* { dg-options "-mvsx -O2 -ffast-math -mno-mma" } */ + +/* + * This test of the float (f32) vector pair functions in vector-pair.h is run + * on VSX systems when the load/store vector pair instructions are not + * available. + * + * The -ffast-math option is used to just use the hardware sqrt, min, and max + * instructions without calling into the library. + * + * The -mno-mma option disables GCC from enabling the __vector_pair type. + */ + +#include "vpair-4.h" diff --git a/gcc/testsuite/gcc.target/powerpc/vpair-4-p10.c b/gcc/testsuite/gcc.target/powerpc/vpair-4-p10.c new file mode 100644 index 000000000000..12291202c163 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vpair-4-p10.c @@ -0,0 +1,14 @@ +/* { dg-do run { target { power10_hw } } } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2 -ffast-math -mmma" } */ + +/* + * This test of the float (f32) vector pair functions in vector-pair.h is run + * on VSX systems when the load/store vector pair instructions are available. + * + * The -ffast-math option is used to just use the hardware sqrt, min, and max + * instructions without calling into the library. + * + * The -mmma option makes sure GC enables the __vector_pair type. + */ + +#include "vpair-4.h" diff --git a/gcc/testsuite/gcc.target/powerpc/vpair-4.h b/gcc/testsuite/gcc.target/powerpc/vpair-4.h new file mode 100644 index 000000000000..1a80cf5e639a --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vpair-4.h @@ -0,0 +1,435 @@ +/* Common include file to test the vector pair float functions. This is run + two times, once compiled for a non-power10 system that does not have the + vector pair load and store instructions, and once with power10 defaults that + has load/store vector pair. */ + +#include <stddef.h> +#include <stdlib.h> +#include <vector-pair.h> + +#ifdef DEBUG +#include <stdio.h> +#endif + +#ifndef NUM +#define NUM 16 +#endif + +static float result1[NUM]; +static float result2[NUM]; +static float in_a[NUM]; +static float in_b[NUM]; +static float in_c[NUM]; + +/* vector pair tests. */ + +void +vpair_abs (float *r, float *a, float *b, float *c, size_t num) +{ + vector_pair_f32_t *vr = (vector_pair_f32_t *)r; + vector_pair_f32_t *va = (vector_pair_f32_t *)a; + + size_t i; + size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float)); + + for (i = 0; i < num2; i++) + vpair_f32_abs (vr + i, va + i); +} + +void +vpair_nabs (float *r, float *a, float *b, float *c, size_t num) +{ + vector_pair_f32_t *vr = (vector_pair_f32_t *)r; + vector_pair_f32_t *va = (vector_pair_f32_t *)a; + + size_t i; + size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float)); + + for (i = 0; i < num2; i++) + vpair_f32_nabs (vr + i, va + i); +} + +void +vpair_neg (float *r, float *a, float *b, float *c, size_t num) +{ + vector_pair_f32_t *vr = (vector_pair_f32_t *)r; + vector_pair_f32_t *va = (vector_pair_f32_t *)a; + + size_t i; + size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float)); + + for (i = 0; i < num2; i++) + vpair_f32_neg (vr + i, va + i); +} + +void +vpair_sqrt (float *r, float *a, float *b, float *c, size_t num) +{ + vector_pair_f32_t *vr = (vector_pair_f32_t *)r; + vector_pair_f32_t *va = (vector_pair_f32_t *)a; + + size_t i; + size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float)); + + for (i = 0; i < num2; i++) + vpair_f32_sqrt (vr + i, va + i); +} + +void +vpair_add (float *r, float *a, float *b, float *c, size_t num) +{ + vector_pair_f32_t *vr = (vector_pair_f32_t *)r; + vector_pair_f32_t *va = (vector_pair_f32_t *)a; + vector_pair_f32_t *vb = (vector_pair_f32_t *)b; + + size_t i; + size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float)); + + for (i = 0; i < num2; i++) + vpair_f32_add (vr + i, va + i, vb + i); +} + +void +vpair_sub (float *r, float *a, float *b, float *c, size_t num) +{ + vector_pair_f32_t *vr = (vector_pair_f32_t *)r; + vector_pair_f32_t *va = (vector_pair_f32_t *)a; + vector_pair_f32_t *vb = (vector_pair_f32_t *)b; + + size_t i; + size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float)); + + for (i = 0; i < num2; i++) + vpair_f32_sub (vr + i, va + i, vb + i); +} + +void +vpair_mul (float *r, float *a, float *b, float *c, size_t num) +{ + vector_pair_f32_t *vr = (vector_pair_f32_t *)r; + vector_pair_f32_t *va = (vector_pair_f32_t *)a; + vector_pair_f32_t *vb = (vector_pair_f32_t *)b; + + size_t i; + size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float)); + + for (i = 0; i < num2; i++) + vpair_f32_mul (vr + i, va + i, vb + i); +} + +void +vpair_div (float *r, float *a, float *b, float *c, size_t num) +{ + vector_pair_f32_t *vr = (vector_pair_f32_t *)r; + vector_pair_f32_t *va = (vector_pair_f32_t *)a; + vector_pair_f32_t *vb = (vector_pair_f32_t *)b; + + size_t i; + size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float)); + + for (i = 0; i < num2; i++) + vpair_f32_div (vr + i, va + i, vb + i); +} + +void +vpair_min (float *r, float *a, float *b, float *c, size_t num) +{ + vector_pair_f32_t *vr = (vector_pair_f32_t *)r; + vector_pair_f32_t *va = (vector_pair_f32_t *)a; + vector_pair_f32_t *vb = (vector_pair_f32_t *)b; + + size_t i; + size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float)); + + for (i = 0; i < num2; i++) + vpair_f32_min (vr + i, va + i, vb + i); +} + +void +vpair_max (float *r, float *a, float *b, float *c, size_t num) +{ + vector_pair_f32_t *vr = (vector_pair_f32_t *)r; + vector_pair_f32_t *va = (vector_pair_f32_t *)a; + vector_pair_f32_t *vb = (vector_pair_f32_t *)b; + + size_t i; + size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float)); + + for (i = 0; i < num2; i++) + vpair_f32_max (vr + i, va + i, vb + i); +} + +void +vpair_fma (float *r, float *a, float *b, float *c, size_t num) +{ + vector_pair_f32_t *vr = (vector_pair_f32_t *)r; + vector_pair_f32_t *va = (vector_pair_f32_t *)a; + vector_pair_f32_t *vb = (vector_pair_f32_t *)b; + vector_pair_f32_t *vc = (vector_pair_f32_t *)c; + + size_t i; + size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float)); + + for (i = 0; i < num2; i++) + vpair_f32_fma (vr + i, va + i, vb + i, vc + i); +} + +void +vpair_fms (float *r, float *a, float *b, float *c, size_t num) +{ + vector_pair_f32_t *vr = (vector_pair_f32_t *)r; + vector_pair_f32_t *va = (vector_pair_f32_t *)a; + vector_pair_f32_t *vb = (vector_pair_f32_t *)b; + vector_pair_f32_t *vc = (vector_pair_f32_t *)c; + + size_t i; + size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float)); + + for (i = 0; i < num2; i++) + vpair_f32_fms (vr + i, va + i, vb + i, vc + i); +} + +void +vpair_nfma (float *r, float *a, float *b, float *c, size_t num) +{ + vector_pair_f32_t *vr = (vector_pair_f32_t *)r; + vector_pair_f32_t *va = (vector_pair_f32_t *)a; + vector_pair_f32_t *vb = (vector_pair_f32_t *)b; + vector_pair_f32_t *vc = (vector_pair_f32_t *)c; + + size_t i; + size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float)); + + for (i = 0; i < num2; i++) + vpair_f32_nfma (vr + i, va + i, vb + i, vc + i); +} + +void +vpair_nfms (float *r, float *a, float *b, float *c, size_t num) +{ + vector_pair_f32_t *vr = (vector_pair_f32_t *)r; + vector_pair_f32_t *va = (vector_pair_f32_t *)a; + vector_pair_f32_t *vb = (vector_pair_f32_t *)b; + vector_pair_f32_t *vc = (vector_pair_f32_t *)c; + + size_t i; + size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float)); + + for (i = 0; i < num2; i++) + vpair_f32_nfms (vr + i, va + i, vb + i, vc + i); +} + + +/* scalar tests. */ + +void +scalar_abs (float *r, float *a, float *b, float *c, size_t num) +{ + size_t i; + + for (i = 0; i < num; i++) + r[i] = (a[i] < 0.0) ? -a[i] : a[i]; +} + +void +scalar_nabs (float *r, float *a, float *b, float *c, size_t num) +{ + size_t i; + + for (i = 0; i < num; i++) + r[i] = (a[i] < 0.0) ? a[i] : -a[i]; +} + +void +scalar_neg (float *r, float *a, float *b, float *c, size_t num) +{ + size_t i; + + for (i = 0; i < num; i++) + r[i] = -a[i]; +} + +void +scalar_sqrt (float *r, float *a, float *b, float *c, size_t num) +{ + size_t i; + + for (i = 0; i < num; i++) + r[i] = __builtin_sqrt (a[i]); +} + +void +scalar_add (float *r, float *a, float *b, float *c, size_t num) +{ + size_t i; + + for (i = 0; i < num; i++) + r[i] = a[i] + b[i]; +} + +void +scalar_sub (float *r, float *a, float *b, float *c, size_t num) +{ + size_t i; + + for (i = 0; i < num; i++) + r[i] = a[i] - b[i]; +} + +void +scalar_mul (float *r, float *a, float *b, float *c, size_t num) +{ + size_t i; + + for (i = 0; i < num; i++) + r[i] = a[i] * b[i]; +} + +void +scalar_div (float *r, float *a, float *b, float *c, size_t num) +{ + size_t i; + + for (i = 0; i < num; i++) + r[i] = a[i] / b[i]; +} + +void +scalar_min (float *r, float *a, float *b, float *c, size_t num) +{ + size_t i; + + for (i = 0; i < num; i++) + r[i] = (a[i] < b[i]) ? a[i] : b[i]; +} + +void +scalar_max (float *r, float *a, float *b, float *c, size_t num) +{ + size_t i; + + for (i = 0; i < num; i++) + r[i] = (a[i] > b[i]) ? a[i] : b[i]; +} + +void +scalar_fma (float *r, float *a, float *b, float *c, size_t num) +{ + size_t i; + + for (i = 0; i < num; i++) + r[i] = __builtin_fma (a[i], b[i], c[i]); +} + +void +scalar_fms (float *r, float *a, float *b, float *c, size_t num) +{ + size_t i; + + for (i = 0; i < num; i++) + r[i] = __builtin_fma (a[i], b[i], -c[i]); +} + +void +scalar_nfma (float *r, float *a, float *b, float *c, size_t num) +{ + size_t i; + + for (i = 0; i < num; i++) + r[i] = - __builtin_fma (a[i], b[i], c[i]); +} + +void +scalar_nfms (float *r, float *a, float *b, float *c, size_t num) +{ + size_t i; + + for (i = 0; i < num; i++) + r[i] = - __builtin_fma (a[i], b[i], -c[i]); +} + + +/* Check results. */ +void +check (const char *name) +{ + size_t i; + + for (i = 0; i < NUM; i++) + if (result1[i] != result2[i]) + { +#ifdef DEBUG + printf ("test #%ld failed, %g != %g, %s (%g, %g, %g).\n", + (long)i, + result1[i], + result2[i], + name, + in_a[i], + in_b[i], + in_c[i]); +#endif + abort (); + } + + return; +} + +typedef void func_t (float *, float *, float *, float *, size_t); + +/* tests to run. */ +struct +{ + func_t *vpair_test; + func_t *scalar_test; + const char *name; +} tests[] = { + { vpair_abs, scalar_abs, "abs" }, + { vpair_nabs, scalar_nabs, "nabs" }, + { vpair_neg, scalar_neg, "neg" }, + { vpair_sqrt, scalar_sqrt, "sqrt" }, + { vpair_add, scalar_add, "add" }, + { vpair_sub, scalar_sub, "sub" }, + { vpair_mul, scalar_mul, "mul" }, + { vpair_div, scalar_div, "div" }, + { vpair_min, scalar_min, "min" }, + { vpair_max, scalar_max, "max" }, + { vpair_fma, scalar_fma, "fma" }, + { vpair_fms, scalar_fms, "fms" }, + { vpair_nfma, scalar_nfma, "nfma" }, + { vpair_nfms, scalar_nfms, "nfms" }, +}; + +/* Run tests. */ + +int +main (void) +{ + size_t i; + + /* Initialize the inputs. */ + for (i = 0; i < NUM; i++) + { + float f = (float)(i + 1); + in_a[i] = f * f; + in_b[i] = f; + in_c[i] = f + 2.0f; + } + +#ifdef DEBUG + printf ("Start tests\n"); +#endif + + /* Run the tests. */ + for (i = 0; i < sizeof (tests) / sizeof (tests[0]); i++) + { + tests[i].vpair_test (result1, in_a, in_b, in_c, NUM); + tests[i].scalar_test (result2, in_a, in_b, in_c, NUM); + check (tests[i].name); + } + +#ifdef DEBUG + printf ("End tests\n"); +#endif + + return 0; +}