https://gcc.gnu.org/g:4ce2ca4ed9fee43f9fc7a99168a99d395736244b
commit 4ce2ca4ed9fee43f9fc7a99168a99d395736244b Author: Michael Meissner <meiss...@linux.ibm.com> Date: Sat Nov 2 00:11:55 2024 -0400 Add vector pair swap even and odd. This patch adds 2 additional functions: vpair_f32_swap_odd_even vpair_f64_swap_odd_even This swaps the odd and even elements in a vector pair. The intention is for libraries that are doing complex multiply to be able to swap the real and imaginary elements to do the multiply operation. 2024-11-01 Michael Meissner <meiss...@linux.ibm.com> gcc/ * config/rs6000/vector-pair.h (vpair_f32_swap_odd_even): New function. (vpair_f64_swap_odd_even): Likewise. Diff: --- gcc/config/rs6000/vector-pair.h | 54 ++++++++++++++++++++++++++++++ gcc/testsuite/gcc.target/powerpc/vpair-1.c | 13 +++++-- gcc/testsuite/gcc.target/powerpc/vpair-2.c | 13 +++++-- gcc/testsuite/gcc.target/powerpc/vpair-3.h | 26 ++++++++++++++ gcc/testsuite/gcc.target/powerpc/vpair-4.h | 26 ++++++++++++++ 5 files changed, 128 insertions(+), 4 deletions(-) diff --git a/gcc/config/rs6000/vector-pair.h b/gcc/config/rs6000/vector-pair.h index ceb28c4e974c..848c876beffc 100644 --- a/gcc/config/rs6000/vector-pair.h +++ b/gcc/config/rs6000/vector-pair.h @@ -516,4 +516,58 @@ vpair_f32_nfms (vector_pair_f32_t *__r, "xvnmsubasp", __builtin_vsx_xvnmsubsp); } + + +/* Swap even/odd operations. */ + +static inline void +vpair_f32_swap_odd_even (vector_pair_f32_t *__r, + const vector_pair_f32_t *__a) +{ + vector unsigned long long __rotate = { 32, 32 }; + +#if __MMA__ && !__VPAIR_NOP10__ + /* Power10 vector pair support. */ + __asm__ ("vrld %0,%1,%2\n\tvrld %L0,%L1,%2" + : "=v" (__r->__vpair) + : "v" (__a->__vpair), "v" (__rotate)); + +#else + /* vector pair not available. */ + vector unsigned long long *__r_ll = (vector unsigned long long *)__r; + vector unsigned long long *__a_ll = (vector unsigned long long *)__a; + __r_ll[0] = __builtin_vec_vrld (__a_ll[0], __rotate); + __r_ll[1] = __builtin_vec_vrld (__a_ll[1], __rotate); +#endif /* power10/not power10. */ +} + + +static inline void +vpair_f64_swap_odd_even (vector_pair_f64_t *__r, + const vector_pair_f64_t *__a) +{ +#if __MMA__ && !__VPAIR_NOP10__ +#if __VPAIR__USE_FPR__ || !__GNUC__ || (!__linux__ && !__ELF__) + + /* Use vector pair and use %0 and %L0 on traditional FPR registers. */ + __asm__ ("xxpermdi %0,%1,%1,2\n\txxpermdi %L0,%L1,%L1,2" + : "=d" (__r->__vpair) + : "d" (__a->__vpair)); + +#else + /* Use vector pair and use %x0 and %x0+ on all VSX registers. */ + __asm__ ("xxpermdi %x0,%x1,%x1,2\n\txxpermdi %x0+1,%x1+1,%x1+1,2" + : "=wa" (__r->__vpair) + : "wa" (__a->__vpair)); +#endif + +#else + /* vector pair not available. */ + __r->__vp_f64[0] + = __builtin_vsx_xxpermdi_2df (__a->__vp_f64[0], __a->__vp_f64[0], 2); + __r->__vp_f64[1] + = __builtin_vsx_xxpermdi_2df (__a->__vp_f64[1], __a->__vp_f64[1], 2); +#endif +} + #endif /* _VECTOR_PAIR_H. */ diff --git a/gcc/testsuite/gcc.target/powerpc/vpair-1.c b/gcc/testsuite/gcc.target/powerpc/vpair-1.c index 55772cc44e31..f56e99a1d044 100644 --- a/gcc/testsuite/gcc.target/powerpc/vpair-1.c +++ b/gcc/testsuite/gcc.target/powerpc/vpair-1.c @@ -124,8 +124,16 @@ test_nfms (vector_pair_f64_t *dest, vpair_f64_nfms (dest, x, y, z); } -/* { dg-final { scan-assembler-times {\mlxvp\M} 26 } } */ -/* { dg-final { scan-assembler-times {\mstxvp\M} 13 } } */ +void +test_swap (vector_pair_f64_t *dest, + vector_pair_f64_t *x) +{ + /* 1 lxvp, 2 xxpermdi, 1 stxvp. */ + vpair_f64_swap_odd_even (dest, x); +} + +/* { dg-final { scan-assembler-times {\mlxvp\M} 27 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 14 } } */ /* { dg-final { scan-assembler-times {\mxvabsdp\M} 2 } } */ /* { dg-final { scan-assembler-times {\mxvadddp\M} 2 } } */ /* { dg-final { scan-assembler-times {\mxvmadd.dp\M} 2 } } */ @@ -139,3 +147,4 @@ test_nfms (vector_pair_f64_t *dest, /* { dg-final { scan-assembler-times {\mxvnmsub.dp\M} 2 } } */ /* { dg-final { scan-assembler-times {\mxvsqrtdp\M} 2 } } */ /* { dg-final { scan-assembler-times {\mxvsubdp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxxpermdi\M} 2 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vpair-2.c b/gcc/testsuite/gcc.target/powerpc/vpair-2.c index 3030b0b33380..0f84a74e94a1 100644 --- a/gcc/testsuite/gcc.target/powerpc/vpair-2.c +++ b/gcc/testsuite/gcc.target/powerpc/vpair-2.c @@ -124,8 +124,17 @@ test_nfms (vector_pair_f32_t *dest, vpair_f32_nfms (dest, x, y, z); } -/* { dg-final { scan-assembler-times {\mlxvp\M} 26 } } */ -/* { dg-final { scan-assembler-times {\mstxvp\M} 13 } } */ +void +test_swap (vector_pair_f32_t *dest, + vector_pair_f32_t *x) +{ + /* 1 lxvp, 2 xxpermdi, 1 stxvp. */ + vpair_f32_swap_odd_even (dest, x); +} + +/* { dg-final { scan-assembler-times {\mlxvp\M} 27 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 14 } } */ +/* { dg-final { scan-assembler-times {\mvrld\M} 2 } } */ /* { dg-final { scan-assembler-times {\mxvabssp\M} 2 } } */ /* { dg-final { scan-assembler-times {\mxvaddsp\M} 2 } } */ /* { dg-final { scan-assembler-times {\mxvmadd.sp\M} 2 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vpair-3.h b/gcc/testsuite/gcc.target/powerpc/vpair-3.h index e61ad23dd57e..656488dbb627 100644 --- a/gcc/testsuite/gcc.target/powerpc/vpair-3.h +++ b/gcc/testsuite/gcc.target/powerpc/vpair-3.h @@ -219,6 +219,19 @@ vpair_nfms (double *r, double *a, double *b, double *c, size_t num) vpair_f64_nfms (vr + i, va + i, vb + i, vc + i); } +void +vpair_swap (double *r, double *a, double *b, double *c, size_t num) +{ + vector_pair_f64_t *vr = (vector_pair_f64_t *)r; + vector_pair_f64_t *va = (vector_pair_f64_t *)a; + + size_t i; + size_t num2 = num / (sizeof (vector_pair_f64_t) / sizeof (double)); + + for (i = 0; i < num2; i++) + vpair_f64_swap_odd_even (vr + i, va + i); +} + /* scalar tests. */ @@ -347,6 +360,18 @@ scalar_nfms (double *r, double *a, double *b, double *c, size_t num) for (i = 0; i < num; i++) r[i] = - __builtin_fma (a[i], b[i], -c[i]); } + +void +scalar_swap (double *r, double *a, double *b, double *c, size_t num) +{ + size_t i; + + for (i = 0; i < num; i += 2) + { + r[i] = a[i+1]; + r[i+1] = a[i]; + } +} /* Check results. */ @@ -397,6 +422,7 @@ struct { vpair_fms, scalar_fms, "fms" }, { vpair_nfma, scalar_nfma, "nfma" }, { vpair_nfms, scalar_nfms, "nfms" }, + { vpair_swap, scalar_swap, "swap" }, }; /* Run tests. */ diff --git a/gcc/testsuite/gcc.target/powerpc/vpair-4.h b/gcc/testsuite/gcc.target/powerpc/vpair-4.h index 1a80cf5e639a..49384e279740 100644 --- a/gcc/testsuite/gcc.target/powerpc/vpair-4.h +++ b/gcc/testsuite/gcc.target/powerpc/vpair-4.h @@ -219,6 +219,19 @@ vpair_nfms (float *r, float *a, float *b, float *c, size_t num) vpair_f32_nfms (vr + i, va + i, vb + i, vc + i); } +void +vpair_swap (float *r, float *a, float *b, float *c, size_t num) +{ + vector_pair_f32_t *vr = (vector_pair_f32_t *)r; + vector_pair_f32_t *va = (vector_pair_f32_t *)a; + + size_t i; + size_t num2 = num / (sizeof (vector_pair_f32_t) / sizeof (float)); + + for (i = 0; i < num2; i++) + vpair_f32_swap_odd_even (vr + i, va + i); +} + /* scalar tests. */ @@ -347,6 +360,18 @@ scalar_nfms (float *r, float *a, float *b, float *c, size_t num) for (i = 0; i < num; i++) r[i] = - __builtin_fma (a[i], b[i], -c[i]); } + +void +scalar_swap (float *r, float *a, float *b, float *c, size_t num) +{ + size_t i; + + for (i = 0; i < num; i += 2) + { + r[i] = a[i+1]; + r[i+1] = a[i]; + } +} /* Check results. */ @@ -397,6 +422,7 @@ struct { vpair_fms, scalar_fms, "fms" }, { vpair_nfma, scalar_nfma, "nfma" }, { vpair_nfms, scalar_nfms, "nfms" }, + { vpair_swap, scalar_swap, "swap" }, }; /* Run tests. */