https://gcc.gnu.org/g:4ce2ca4ed9fee43f9fc7a99168a99d395736244b

commit 4ce2ca4ed9fee43f9fc7a99168a99d395736244b
Author: Michael Meissner <meiss...@linux.ibm.com>
Date:   Sat Nov 2 00:11:55 2024 -0400

    Add vector pair swap even and odd.
    
    This patch adds 2 additional functions:
    
            vpair_f32_swap_odd_even
            vpair_f64_swap_odd_even
    
    This swaps the odd and even elements in a vector pair.  The intention is for
    libraries that are doing complex multiply to be able to swap the real and
    imaginary elements to do the multiply operation.
    
    2024-11-01  Michael Meissner  <meiss...@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/vector-pair.h (vpair_f32_swap_odd_even): New 
function.
            (vpair_f64_swap_odd_even): Likewise.

Diff:
---
 gcc/config/rs6000/vector-pair.h            | 54 ++++++++++++++++++++++++++++++
 gcc/testsuite/gcc.target/powerpc/vpair-1.c | 13 +++++--
 gcc/testsuite/gcc.target/powerpc/vpair-2.c | 13 +++++--
 gcc/testsuite/gcc.target/powerpc/vpair-3.h | 26 ++++++++++++++
 gcc/testsuite/gcc.target/powerpc/vpair-4.h | 26 ++++++++++++++
 5 files changed, 128 insertions(+), 4 deletions(-)

diff --git a/gcc/config/rs6000/vector-pair.h b/gcc/config/rs6000/vector-pair.h
index ceb28c4e974c..848c876beffc 100644
--- a/gcc/config/rs6000/vector-pair.h
+++ b/gcc/config/rs6000/vector-pair.h
@@ -516,4 +516,58 @@ vpair_f32_nfms (vector_pair_f32_t       *__r,
                   "xvnmsubasp",
                   __builtin_vsx_xvnmsubsp);
 }
+
+
+/* Swap even/odd operations.  */
+
+static inline void
+vpair_f32_swap_odd_even (vector_pair_f32_t       *__r,
+                        const vector_pair_f32_t *__a)
+{
+  vector unsigned long long __rotate = { 32, 32 };
+
+#if __MMA__ && !__VPAIR_NOP10__
+  /* Power10 vector pair support.  */
+  __asm__ ("vrld %0,%1,%2\n\tvrld %L0,%L1,%2"
+          : "=v" (__r->__vpair)
+          : "v"  (__a->__vpair), "v" (__rotate));
+
+#else
+  /* vector pair not available.  */
+  vector unsigned long long *__r_ll = (vector unsigned long long *)__r;
+  vector unsigned long long *__a_ll = (vector unsigned long long *)__a;
+  __r_ll[0] = __builtin_vec_vrld (__a_ll[0], __rotate);
+  __r_ll[1] = __builtin_vec_vrld (__a_ll[1], __rotate);
+#endif /* power10/not power10.  */
+}
+
+
+static inline void
+vpair_f64_swap_odd_even (vector_pair_f64_t       *__r,
+                        const vector_pair_f64_t *__a)
+{
+#if __MMA__ && !__VPAIR_NOP10__
+#if __VPAIR__USE_FPR__ || !__GNUC__ || (!__linux__ && !__ELF__)
+
+  /* Use vector pair and use %0 and %L0 on traditional FPR registers.  */
+  __asm__ ("xxpermdi %0,%1,%1,2\n\txxpermdi %L0,%L1,%L1,2"
+          : "=d" (__r->__vpair)
+          : "d"  (__a->__vpair));
+
+#else
+  /* Use vector pair and use %x0 and %x0+ on all VSX registers.  */
+  __asm__ ("xxpermdi %x0,%x1,%x1,2\n\txxpermdi %x0+1,%x1+1,%x1+1,2"
+          : "=wa" (__r->__vpair)
+          : "wa"  (__a->__vpair));
+#endif
+
+#else
+  /* vector pair not available.  */
+  __r->__vp_f64[0]
+    = __builtin_vsx_xxpermdi_2df (__a->__vp_f64[0], __a->__vp_f64[0], 2);
+  __r->__vp_f64[1]
+    = __builtin_vsx_xxpermdi_2df (__a->__vp_f64[1], __a->__vp_f64[1], 2);
+#endif
+}
+
 #endif /* _VECTOR_PAIR_H.  */
diff --git a/gcc/testsuite/gcc.target/powerpc/vpair-1.c 
b/gcc/testsuite/gcc.target/powerpc/vpair-1.c
index 55772cc44e31..f56e99a1d044 100644
--- a/gcc/testsuite/gcc.target/powerpc/vpair-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/vpair-1.c
@@ -124,8 +124,16 @@ test_nfms (vector_pair_f64_t *dest,
   vpair_f64_nfms (dest, x, y, z);
 }
 
-/* { dg-final { scan-assembler-times {\mlxvp\M}       26 } } */
-/* { dg-final { scan-assembler-times {\mstxvp\M}      13 } } */
+void
+test_swap (vector_pair_f64_t *dest,
+          vector_pair_f64_t *x)
+{
+  /* 1 lxvp, 2 xxpermdi, 1 stxvp.  */
+  vpair_f64_swap_odd_even (dest, x);
+}
+
+/* { dg-final { scan-assembler-times {\mlxvp\M}       27 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M}      14 } } */
 /* { dg-final { scan-assembler-times {\mxvabsdp\M}     2 } } */
 /* { dg-final { scan-assembler-times {\mxvadddp\M}     2 } } */
 /* { dg-final { scan-assembler-times {\mxvmadd.dp\M}   2 } } */
@@ -139,3 +147,4 @@ test_nfms (vector_pair_f64_t *dest,
 /* { dg-final { scan-assembler-times {\mxvnmsub.dp\M}  2 } } */
 /* { dg-final { scan-assembler-times {\mxvsqrtdp\M}    2 } } */
 /* { dg-final { scan-assembler-times {\mxvsubdp\M}     2 } } */
+/* { dg-final { scan-assembler-times {\mxxpermdi\M}    2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vpair-2.c 
b/gcc/testsuite/gcc.target/powerpc/vpair-2.c
index 3030b0b33380..0f84a74e94a1 100644
--- a/gcc/testsuite/gcc.target/powerpc/vpair-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/vpair-2.c
@@ -124,8 +124,17 @@ test_nfms (vector_pair_f32_t *dest,
   vpair_f32_nfms (dest, x, y, z);
 }
 
-/* { dg-final { scan-assembler-times {\mlxvp\M}       26 } } */
-/* { dg-final { scan-assembler-times {\mstxvp\M}      13 } } */
+void
+test_swap (vector_pair_f32_t *dest,
+          vector_pair_f32_t *x)
+{
+  /* 1 lxvp, 2 xxpermdi, 1 stxvp.  */
+  vpair_f32_swap_odd_even (dest, x);
+}
+
+/* { dg-final { scan-assembler-times {\mlxvp\M}       27 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M}      14 } } */
+/* { dg-final { scan-assembler-times {\mvrld\M}        2 } } */
 /* { dg-final { scan-assembler-times {\mxvabssp\M}     2 } } */
 /* { dg-final { scan-assembler-times {\mxvaddsp\M}     2 } } */
 /* { dg-final { scan-assembler-times {\mxvmadd.sp\M}   2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vpair-3.h 
b/gcc/testsuite/gcc.target/powerpc/vpair-3.h
index e61ad23dd57e..656488dbb627 100644
--- a/gcc/testsuite/gcc.target/powerpc/vpair-3.h
+++ b/gcc/testsuite/gcc.target/powerpc/vpair-3.h
@@ -219,6 +219,19 @@ vpair_nfms (double *r, double *a, double *b, double *c, 
size_t num)
     vpair_f64_nfms (vr + i, va + i, vb + i, vc + i);
 }
 
+void
+vpair_swap (double *r, double *a, double *b, double *c, size_t num)
+{
+  vector_pair_f64_t *vr = (vector_pair_f64_t *)r;
+  vector_pair_f64_t *va = (vector_pair_f64_t *)a;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double));
+
+  for (i = 0; i < num2; i++)
+    vpair_f64_swap_odd_even (vr + i, va + i);
+}
+
 
 /* scalar tests.  */
 
@@ -347,6 +360,18 @@ scalar_nfms (double *r, double *a, double *b, double *c, 
size_t num)
   for (i = 0; i < num; i++)
     r[i] = - __builtin_fma (a[i], b[i], -c[i]);
 }
+
+void
+scalar_swap (double *r, double *a, double *b, double *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i += 2)
+    {
+      r[i] = a[i+1];
+      r[i+1] = a[i];
+    }
+}
 
 
 /* Check results.  */
@@ -397,6 +422,7 @@ struct
   { vpair_fms,  scalar_fms,     "fms"  }, 
   { vpair_nfma, scalar_nfma,    "nfma" }, 
   { vpair_nfms, scalar_nfms,    "nfms" }, 
+  { vpair_swap, scalar_swap,    "swap" }, 
 };
 
 /* Run tests.  */
diff --git a/gcc/testsuite/gcc.target/powerpc/vpair-4.h 
b/gcc/testsuite/gcc.target/powerpc/vpair-4.h
index 1a80cf5e639a..49384e279740 100644
--- a/gcc/testsuite/gcc.target/powerpc/vpair-4.h
+++ b/gcc/testsuite/gcc.target/powerpc/vpair-4.h
@@ -219,6 +219,19 @@ vpair_nfms (float *r, float *a, float *b, float *c, size_t 
num)
     vpair_f32_nfms (vr + i, va + i, vb + i, vc + i);
 }
 
+void
+vpair_swap (float *r, float *a, float *b, float *c, size_t num)
+{
+  vector_pair_f32_t *vr = (vector_pair_f32_t *)r;
+  vector_pair_f32_t *va = (vector_pair_f32_t *)a;
+
+  size_t i;
+  size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float));
+
+  for (i = 0; i < num2; i++)
+    vpair_f32_swap_odd_even (vr + i, va + i);
+}
+
 
 /* scalar tests.  */
 
@@ -347,6 +360,18 @@ scalar_nfms (float *r, float *a, float *b, float *c, 
size_t num)
   for (i = 0; i < num; i++)
     r[i] = - __builtin_fma (a[i], b[i], -c[i]);
 }
+
+void
+scalar_swap (float *r, float *a, float *b, float *c, size_t num)
+{
+  size_t i;
+
+  for (i = 0; i < num; i += 2)
+    {
+      r[i] = a[i+1];
+      r[i+1] = a[i];
+    }
+}
 
 
 /* Check results.  */
@@ -397,6 +422,7 @@ struct
   { vpair_fms,  scalar_fms,     "fms"  }, 
   { vpair_nfma, scalar_nfma,    "nfma" }, 
   { vpair_nfms, scalar_nfms,    "nfms" }, 
+  { vpair_swap, scalar_swap,    "swap" }, 
 };
 
 /* Run tests.  */

Reply via email to