https://gcc.gnu.org/g:b5dc5d0e1843ec1bb40270b684e161a06359b6a8
commit b5dc5d0e1843ec1bb40270b684e161a06359b6a8 Author: Michael Meissner <meiss...@linux.ibm.com> Date: Mon Sep 30 13:25:21 2024 -0400 Rewrite vector-pair.h. 2024-09-30 Michael Meissner <meiss...@linux.ibm.com> gcc/ * config/rs6000/vector-pair.h: Rewrite. Diff: --- gcc/config/rs6000/vector-pair.h | 1447 ++++++++++++++++++++++++++------------- 1 file changed, 973 insertions(+), 474 deletions(-) diff --git a/gcc/config/rs6000/vector-pair.h b/gcc/config/rs6000/vector-pair.h index e0023842f331..78dfc74c0d59 100644 --- a/gcc/config/rs6000/vector-pair.h +++ b/gcc/config/rs6000/vector-pair.h @@ -30,47 +30,9 @@ #ifndef _VECTOR_PAIR_H #define _VECTOR_PAIR_H 1 -/* During testing, allow vector-pair.h to be included multiple times. */ -#undef vector_pair_t -#undef vector_pair_f64_t -#undef vector_pair_f32_t - -#undef vpair_f64_abs -#undef vpair_f64_add -#undef vpair_f64_div -#undef vpair_f64_fma -#undef vpair_f64_fms -#undef vpair_f64_max -#undef vpair_f64_min -#undef vpair_f64_mul -#undef vpair_f64_nabs -#undef vpair_f64_neg -#undef vpair_f64_nfma -#undef vpair_f64_nfms -#undef vpair_f64_splat -#undef vpair_f64_sqrt -#undef vpair_f64_sub - -#undef vpair_f32_abs -#undef vpair_f32_add -#undef vpair_f32_div -#undef vpair_f32_fma -#undef vpair_f32_fms -#undef vpair_f32_max -#undef vpair_f32_min -#undef vpair_f32_mul -#undef vpair_f32_nabs -#undef vpair_f32_neg -#undef vpair_f32_nfma -#undef vpair_f32_nfms -#undef vpair_f32_splat -#undef vpair_f32_sqrt -#undef vpair_f32_sub - -/* Union of the various vector pair types. For testing, allow vector-pair.h to - be included multiple times, so protect the union from re-declaration. */ -#ifndef __VECTOR_PAIR_UNION__ -#define __VECTOR_PAIR_UNION__ 1 +/* Union of the various vector pair types. */ +#ifndef __VPAIR_UNION__ +#define __VPAIR_UNION__ 1 union __vpair_union { @@ -87,11 +49,13 @@ typedef union __vpair_union vector_pair_t; typedef union __vpair_union vector_pair_f64_t; typedef union __vpair_union vector_pair_f32_t; typedef union __vpair_union *__vpair_ptr_t; +#endif /* __VPAIR_UNION__. */ -#endif /* __VECTOR_PAIR_UNION__. */ +#if !__VPAIR_BUILTIN__ && !__VPAIR_ASM__ && !__VPAIR_NOP10__ +#if __MMA__ && __VPAIR__ +#define __VPAIR_BUILTIN__ 1 -#if !__VPAIR_ASM__ && !__VPAIR_NOP10__ -#if __MMA__ +#elif __MMA__ #define __VPAIR_ASM__ 1 #else @@ -99,9 +63,379 @@ typedef union __vpair_union *__vpair_ptr_t; #endif #endif -/* ISA 3.1 (power10/power11) support with explicit vector pair type. */ + +/* ISA 3.1 (power10/power11) support with explicit vector pair type and + built-in functions for the vector pair operations. */ + +#if __VPAIR_BUILTIN__ && __MMA__ + +/* Allow vector-pair.h to be included multiple times during testing. */ +#undef vpair_f64_abs +#undef vpair_f64_add +#undef vpair_f64_div +#undef vpair_f64_fma +#undef vpair_f64_fms +#undef vpair_f64_max +#undef vpair_f64_min +#undef vpair_f64_mul +#undef vpair_f64_nabs +#undef vpair_f64_neg +#undef vpair_f64_nfma +#undef vpair_f64_nfms +#undef vpair_f64_splat +#undef vpair_f64_sqrt +#undef vpair_f64_sub + +#undef vpair_f32_abs +#undef vpair_f32_add +#undef vpair_f32_div +#undef vpair_f32_fma +#undef vpair_f32_fms +#undef vpair_f32_max +#undef vpair_f32_min +#undef vpair_f32_mul +#undef vpair_f32_nabs +#undef vpair_f32_neg +#undef vpair_f32_nfma +#undef vpair_f32_nfms +#undef vpair_f32_splat +#undef vpair_f32_sqrt +#undef vpair_f32_sub + +#define vpair_f64_abs __vpair_f64_abs_builtin +#define vpair_f64_add __vpair_f64_add_builtin +#define vpair_f64_div __vpair_f64_div_builtin +#define vpair_f64_fma __vpair_f64_fma_builtin +#define vpair_f64_fms __vpair_f64_fms_builtin +#define vpair_f64_max __vpair_f64_max_builtin +#define vpair_f64_min __vpair_f64_min_builtin +#define vpair_f64_mul __vpair_f64_mul_builtin +#define vpair_f64_nabs __vpair_f64_nabs_builtin +#define vpair_f64_neg __vpair_f64_neg_builtin +#define vpair_f64_nfma __vpair_f64_nfma_builtin +#define vpair_f64_nfms __vpair_f64_nfms_builtin +#define vpair_f64_splat __vpair_f64_splat_builtin +#define vpair_f64_sqrt __vpair_f64_sqrt_builtin +#define vpair_f64_sub __vpair_f64_sub_builtin + +#define vpair_f32_abs __vpair_f32_abs_builtin +#define vpair_f32_add __vpair_f32_add_builtin +#define vpair_f32_div __vpair_f32_div_builtin +#define vpair_f32_fma __vpair_f32_fma_builtin +#define vpair_f32_fms __vpair_f32_fms_builtin +#define vpair_f32_max __vpair_f32_max_builtin +#define vpair_f32_min __vpair_f32_min_builtin +#define vpair_f32_mul __vpair_f32_mul_builtin +#define vpair_f32_nabs __vpair_f32_nabs_builtin +#define vpair_f32_neg __vpair_f32_neg_builtin +#define vpair_f32_nfma __vpair_f32_nfma_builtin +#define vpair_f32_nfms __vpair_f32_nfms_builtin +#define vpair_f32_splat __vpair_f32_splat_builtin +#define vpair_f32_sqrt __vpair_f32_sqrt_builtin +#define vpair_f32_sub __vpair_f32_sub_builtin + +/* vector pair double operations on power10/power11. */ +static inline void +vpair_f64_splat (vector_pair_f64_t *__r, double x) +{ + __r->__vpair = __builtin_vpair_f64_splat (x); +} + +static inline void +vpair_f64_abs (vector_ptr_f64_t *__r, vector_pair_f64_t *__a) +{ + __r->__vpair = __builtin_vpair_f64_abs (__a->_-vpair); +} + +static inline void +vpair_f64_nabs (vector_ptr_f64_t *__r, vector_pair_f64_t *__a) +{ + __r->__vpair = __builtin_vpair_f64_nabs (__a->_-vpair); +} + +static inline void +vpair_f64_neg (vector_ptr_f64_t *__r, vector_pair_f64_t *__a) +{ + __r->__vpair = __builtin_vpair_f64_neg (__a->_-vpair); +} + +static inline void +vpair_f64_sqrt (vector_ptr_f64_t *__r, vector_pair_f64_t *__a) +{ + __r->__vpair = __builtin_vpair_f64_sqrt (__a->_-vpair); +} + +static inline void +vpair_f64_add (vector_ptr_f64_t *__r, + vector_ptr_f64_t *__a, + vector_ptr_f64_t *__b) +{ + __r->__vpair = __builtin_vpair_f64_add (__a->__vpair, __b->__vpair); +} + +static inline void +vpair_f64_max (vector_ptr_f64_t *__r, + vector_ptr_f64_t *__a, + vector_ptr_f64_t *__b) +{ + __r->__vpair = __builtin_vpair_f64_max (__a->__vpair, __b->__vpair); +} + +static inline void +vpair_f64_min (vector_ptr_f64_t *__r, + vector_ptr_f64_t *__a, + vector_ptr_f64_t *__b) +{ + __r->__vpair = __builtin_vpair_f64_min (__a->__vpair, __b->__vpair); +} + +static inline void +vpair_f64_mul (vector_ptr_f64_t *__r, + vector_ptr_f64_t *__a, + vector_ptr_f64_t *__b) +{ + __r->__vpair = __builtin_vpair_f64_mul (__a->__vpair, __b->__vpair); +} + +static inline void +vpair_f64_sub (vector_ptr_f64_t *__r, + vector_ptr_f64_t *__a, + vector_ptr_f64_t *__b) +{ + __r->__vpair = __builtin_vpair_f64_sub (__a->__vpair, __b->__vpair); +} + +static inline void +vpair_f64_fma (vector_ptr_f64_t *__r, + vector_ptr_f64_t *__a, + vector_ptr_f64_t *__b, + vector_ptr_f64_t *__c) +{ + __r->__vpair = __builtin_vpair_f64_fma (__a->__vpair, + __b->__vpair, + __c->__vpair); +} + +static inline void +vpair_f64_fms (vector_ptr_f64_t *__r, + vector_ptr_f64_t *__a, + vector_ptr_f64_t *__b, + vector_ptr_f64_t *__c) +{ + __r->__vpair = __builtin_vpair_f64_fms (__a->__vpair, + __b->__vpair, + __c->__vpair); +} + +static inline void +vpair_f64_nfma (vector_ptr_f64_t *__r, + vector_ptr_f64_t *__a, + vector_ptr_f64_t *__b, + vector_ptr_f64_t *__c) +{ + __r->__vpair = __builtin_vpair_f64_nfma (__a->__vpair, + __b->__vpair, + __c->__vpair); +} + +static inline void +vpair_f64_nfms (vector_ptr_f64_t *__r, + vector_ptr_f64_t *__a, + vector_ptr_f64_t *__b, + vector_ptr_f64_t *__c) +{ + __r->__vpair = __builtin_vpair_f64_nfms (__a->__vpair, + __b->__vpair, + __c->__vpair); +} + +/* vector pair float operations on power10/power11. */ + +static inline void +vpair_f32_splat (vector_pair_f32_t *__r, float x) +{ + __r->__vpair = __builtin_vpair_f32_splat (x); +} + +static inline void +vpair_f32_abs (vector_ptr_f32_t *__r, vector_pair_f32_t *__a) +{ + __r->__vpair = __builtin_vpair_f32_abs (__a->_-vpair); +} + +static inline void +vpair_f32_nabs (vector_ptr_f32_t *__r, vector_pair_f32_t *__a) +{ + __r->__vpair = __builtin_vpair_f32_nabs (__a->_-vpair); +} + +static inline void +vpair_f32_neg (vector_ptr_f32_t *__r, vector_pair_f32_t *__a) +{ + __r->__vpair = __builtin_vpair_f32_neg (__a->_-vpair); +} + +static inline void +vpair_f32_sqrt (vector_ptr_f32_t *__r, vector_pair_f32_t *__a) +{ + __r->__vpair = __builtin_vpair_f32_sqrt (__a->_-vpair); +} + +static inline void +vpair_f32_add (vector_ptr_f32_t *__r, + vector_ptr_f32_t *__a, + vector_ptr_f32_t *__b) +{ + __r->__vpair = __builtin_vpair_f32_add (__a->__vpair, __b->__vpair); +} + +static inline void +vpair_f32_max (vector_ptr_f32_t *__r, + vector_ptr_f32_t *__a, + vector_ptr_f32_t *__b) +{ + __r->__vpair = __builtin_vpair_f32_max (__a->__vpair, __b->__vpair); +} + +static inline void +vpair_f32_min (vector_ptr_f32_t *__r, + vector_ptr_f32_t *__a, + vector_ptr_f32_t *__b) +{ + __r->__vpair = __builtin_vpair_f32_min (__a->__vpair, __b->__vpair); +} + +static inline void +vpair_f32_mul (vector_ptr_f32_t *__r, + vector_ptr_f32_t *__a, + vector_ptr_f32_t *__b) +{ + __r->__vpair = __builtin_vpair_f32_mul (__a->__vpair, __b->__vpair); +} + +static inline void +vpair_f32_sub (vector_ptr_f32_t *__r, + vector_ptr_f32_t *__a, + vector_ptr_f32_t *__b) +{ + __r->__vpair = __builtin_vpair_f32_sub (__a->__vpair, __b->__vpair); +} + +static inline void +vpair_f32_fma (vector_ptr_f32_t *__r, + vector_ptr_f32_t *__a, + vector_ptr_f32_t *__b, + vector_ptr_f32_t *__c) +{ + __r->__vpair = __builtin_vpair_f32_fma (__a->__vpair, + __b->__vpair, + __c->__vpair); +} + +static inline void +vpair_f32_fms (vector_ptr_f32_t *__r, + vector_ptr_f32_t *__a, + vector_ptr_f32_t *__b, + vector_ptr_f32_t *__c) +{ + __r->__vpair = __builtin_vpair_f32_fms (__a->__vpair, + __b->__vpair, + __c->__vpair); +} + +static inline void +vpair_f32_nfma (vector_ptr_f32_t *__r, + vector_ptr_f32_t *__a, + vector_ptr_f32_t *__b, + vector_ptr_f32_t *__c) +{ + __r->__vpair = __builtin_vpair_f32_nfma (__a->__vpair, + __b->__vpair, + __c->__vpair); +} + +static inline void +vpair_f32_nfms (vector_ptr_f32_t *__r, + vector_ptr_f32_t *__a, + vector_ptr_f32_t *__b, + vector_ptr_f32_t *__c) +{ + __r->__vpair = __builtin_vpair_f32_nfms (__a->__vpair, + __b->__vpair, + __c->__vpair); +} -#if __VPAIR_ASM__ && __MMA__ + +/* ISA 3.1 (power10/power11) support with explicit vector pair type, using + __asm__ to do the vector pair operations. */ + +#elif __VPAIR_ASM__ && __MMA__ + + +/* Allow vector-pair.h to be included multiple times during testing. */ +#undef vpair_f64_abs +#undef vpair_f64_add +#undef vpair_f64_div +#undef vpair_f64_fma +#undef vpair_f64_fms +#undef vpair_f64_max +#undef vpair_f64_min +#undef vpair_f64_mul +#undef vpair_f64_nabs +#undef vpair_f64_neg +#undef vpair_f64_nfma +#undef vpair_f64_nfms +#undef vpair_f64_splat +#undef vpair_f64_sqrt +#undef vpair_f64_sub + +#undef vpair_f32_abs +#undef vpair_f32_add +#undef vpair_f32_div +#undef vpair_f32_fma +#undef vpair_f32_fms +#undef vpair_f32_max +#undef vpair_f32_min +#undef vpair_f32_mul +#undef vpair_f32_nabs +#undef vpair_f32_neg +#undef vpair_f32_nfma +#undef vpair_f32_nfms +#undef vpair_f32_splat +#undef vpair_f32_sqrt +#undef vpair_f32_sub + +#define vpair_f64_abs __vpair_f64_abs_asm +#define vpair_f64_add __vpair_f64_add_asm +#define vpair_f64_div __vpair_f64_div_asm +#define vpair_f64_fma __vpair_f64_fma_asm +#define vpair_f64_fms __vpair_f64_fms_asm +#define vpair_f64_max __vpair_f64_max_asm +#define vpair_f64_min __vpair_f64_min_asm +#define vpair_f64_mul __vpair_f64_mul_asm +#define vpair_f64_nabs __vpair_f64_nabs_asm +#define vpair_f64_neg __vpair_f64_neg_asm +#define vpair_f64_nfma __vpair_f64_nfma_asm +#define vpair_f64_nfms __vpair_f64_nfms_asm +#define vpair_f64_splat __vpair_f64_splat_asm +#define vpair_f64_sqrt __vpair_f64_sqrt_asm +#define vpair_f64_sub __vpair_f64_sub_asm + +#define vpair_f32_abs __vpair_f32_abs_asm +#define vpair_f32_add __vpair_f32_add_asm +#define vpair_f32_div __vpair_f32_div_asm +#define vpair_f32_fma __vpair_f32_fma_asm +#define vpair_f32_fms __vpair_f32_fms_asm +#define vpair_f32_max __vpair_f32_max_asm +#define vpair_f32_min __vpair_f32_min_asm +#define vpair_f32_mul __vpair_f32_mul_asm +#define vpair_f32_nabs __vpair_f32_nabs_asm +#define vpair_f32_neg __vpair_f32_neg_asm +#define vpair_f32_nfma __vpair_f32_nfma_asm +#define vpair_f32_nfms __vpair_f32_nfms_asm +#define vpair_f32_splat __vpair_f32_splat_asm +#define vpair_f32_sqrt __vpair_f32_sqrt_asm +#define vpair_f32_sub __vpair_f32_sub_asm #undef __VPAIR_FP_UNARY_ASM #define __VPAIR_FP_UNARY_ASM(OPCODE, R, A) \ @@ -125,439 +459,604 @@ typedef union __vpair_union *__vpair_ptr_t; "wa" (((__vpair_ptr_t)(B))->__vpair), \ "0" (((__vpair_ptr_t)(C))->__vpair)); -#define vpair_f64_splat(R, A) \ - __asm__ ("xxlor %x0+1,%x1,%x1" \ - : "=wa" (((__vpair_ptr_t)(R))->__vpair) \ - : "0" (__builtin_vec_splats ((double) (A)))) - -#define vpair_f64_abs(R,A) __VPAIR_FP_UNARY_ASM ("xvabsdp", R, A) -#define vpair_f64_nabs(R,A) __VPAIR_FP_UNARY_ASM ("xvnabsdp", R, A) -#define vpair_f64_neg(R,A) __VPAIR_FP_UNARY_ASM ("xvnegdp", R, A) -#define vpair_f64_sqrt(R,A) __VPAIR_FP_UNARY_ASM ("xvsqrtdp", R, A) - -#define vpair_f64_add(R,A,B) __VPAIR_FP_BINARY_ASM ("xvadddp", R, A, B) -#define vpair_f64_div(R,A,B) __VPAIR_FP_BINARY_ASM ("xvdivdp", R, A, B) -#define vpair_f64_max(R,A,B) __VPAIR_FP_BINARY_ASM ("xvmaxdp", R, A, B) -#define vpair_f64_min(R,A,B) __VPAIR_FP_BINARY_ASM ("xvmindp", R, A, B) -#define vpair_f64_mul(R,A,B) __VPAIR_FP_BINARY_ASM ("xvmuldp", R, A, B) -#define vpair_f64_sub(R,A,B) __VPAIR_FP_BINARY_ASM ("xvsubdp", R, A, B) - -#define vpair_f64_fma(R,A,B,C) __VPAIR_FP_FMA_ASM ("xvmaddadp", R, A, B, C) -#define vpair_f64_fms(R,A,B,C) __VPAIR_FP_FMA_ASM ("xvmsubadp", R, A, B, C) -#define vpair_f64_nfma(R,A,B,C) __VPAIR_FP_FMA_ASM ("xvnmaddadp", R, A, B, C) -#define vpair_f64_nfms(R,A,B,C) __VPAIR_FP_FMA_ASM ("xvnmsubadp", R, A, B, C) - -#define vpair_f32_splat(R, A) \ - __asm__ ("xxlor %x0+1,%x1,%x1" \ - : "=wa" (((__vpair_ptr_t)(R))->__vpair) \ - : "0" (__builtin_vec_splats ((float) (A)))) - -#define vpair_f32_abs(R,A) __VPAIR_FP_UNARY_ASM ("xvabssp", R, A) -#define vpair_f32_nabs(R,A) __VPAIR_FP_UNARY_ASM ("xvnabssp", R, A) -#define vpair_f32_neg(R,A) __VPAIR_FP_UNARY_ASM ("xvnegsp", R, A) -#define vpair_f32_sqrt(R,A) __VPAIR_FP_UNARY_ASM ("xvsqrtsp", R, A) - -#define vpair_f32_add(R,A,B) __VPAIR_FP_BINARY_ASM ("xvaddsp", R, A, B) -#define vpair_f32_div(R,A,B) __VPAIR_FP_BINARY_ASM ("xvdivsp", R, A, B) -#define vpair_f32_max(R,A,B) __VPAIR_FP_BINARY_ASM ("xvmaxsp", R, A, B) -#define vpair_f32_min(R,A,B) __VPAIR_FP_BINARY_ASM ("xvminsp", R, A, B) -#define vpair_f32_mul(R,A,B) __VPAIR_FP_BINARY_ASM ("xvmulsp", R, A, B) -#define vpair_f32_sub(R,A,B) __VPAIR_FP_BINARY_ASM ("xvsubsp", R, A, B) - -#define vpair_f32_fma(R,A,B,C) __VPAIR_FP_FMA_ASM ("xvmaddasp", R, A, B, C) -#define vpair_f32_fms(R,A,B,C) __VPAIR_FP_FMA_ASM ("xvmsubasp", R, A, B, C) -#define vpair_f32_nfma(R,A,B,C) __VPAIR_FP_FMA_ASM ("xvnmaddasp", R, A, B, C) -#define vpair_f32_nfms(R,A,B,C) __VPAIR_FP_FMA_ASM ("xvnmsubasp", R, A, B, C) +/* vector pair double operations on power10/power11. */ +static inline void +vpair_f64_splat (vector_pair_f64_t *__r, double x) +{ + __asm__ ("xxlor %x0+1,%x1,%x1" + : "=wa" (__r->__vpair) + : "0" (__builtin_vec_splats (x))); +} + +static inline void +vpair_f64_abs (vector_ptr_f64_t *__r, vector_pair_f64_t *__a) +{ + __VPAIR_FP_UNARY_ASM ("xvabsdp", __r, __a); +} + +static inline void +vpair_f64_nabs (vector_ptr_f64_t *__r, vector_pair_f64_t *__a) +{ + __VPAIR_FP_UNARY_ASM ("xvnabsdp", __r, __a); +} + +static inline void +vpair_f64_neg (vector_ptr_f64_t *__r, vector_pair_f64_t *__a) +{ + __VPAIR_FP_UNARY_ASM ("xvnegdp", __r, __a); +} + +static inline void +vpair_f64_sqrt (vector_ptr_f64_t *__r, vector_pair_f64_t *__a) +{ + __VPAIR_FP_UNARY_ASM ("xvsqrtdp", __r, __a); +} + +static inline void +vpair_f64_add (vector_ptr_f64_t *__r, + vector_ptr_f64_t *__a, + vector_ptr_f64_t *__b) +{ + __VPAIR_FP_BINARY_ASM ("xvadddp", __r, __a, __b); +} + +static inline void +vpair_f64_max (vector_ptr_f64_t *__r, + vector_ptr_f64_t *__a, + vector_ptr_f64_t *__b) +{ + __VPAIR_FP_BINARY_ASM ("xvmaxdp", __r, __a, __b); +} + +static inline void +vpair_f64_min (vector_ptr_f64_t *__r, + vector_ptr_f64_t *__a, + vector_ptr_f64_t *__b) +{ + __VPAIR_FP_BINARY_ASM ("xvmindp", __r, __a, __b); +} + +static inline void +vpair_f64_mul (vector_ptr_f64_t *__r, + vector_ptr_f64_t *__a, + vector_ptr_f64_t *__b) +{ + __VPAIR_FP_BINARY_ASM ("xvmuldp", __r, __a, __b); +} + +static inline void +vpair_f64_sub (vector_ptr_f64_t *__r, + vector_ptr_f64_t *__a, + vector_ptr_f64_t *__b) +{ + __VPAIR_FP_BINARY_ASM ("xvsubdp", __r, __a, __b); +} + +static inline void +vpair_f64_fma (vector_ptr_f64_t *__r, + vector_ptr_f64_t *__a, + vector_ptr_f64_t *__b, + vector_ptr_f64_t *__c) +{ + __VPAIR_FP_FMA_ASM ("xvmaddadp", __r, __a, __b, __c); +} + +static inline void +vpair_f64_fms (vector_ptr_f64_t *__r, + vector_ptr_f64_t *__a, + vector_ptr_f64_t *__b, + vector_ptr_f64_t *__c) +{ + __VPAIR_FP_FMA_ASM ("xvmsubadp", __r, __a, __b, __c); +} + +static inline void +vpair_f64_nfma (vector_ptr_f64_t *__r, + vector_ptr_f64_t *__a, + vector_ptr_f64_t *__b, + vector_ptr_f64_t *__c) +{ + __VPAIR_FP_FMA_ASM ("xvnmaddadp", __r, __a, __b, __c); +} + +static inline void +vpair_f64_nfms (vector_ptr_f64_t *__r, + vector_ptr_f64_t *__a, + vector_ptr_f64_t *__b, + vector_ptr_f64_t *__c) +{ + __VPAIR_FP_FMA_ASM ("xvnmsubadp", __r, __a, __b, __c); +} +static inline void +vpair_f32_splat (vector_pair_f32_t *__r, float x) +{ + __asm__ ("xxlor %x0+1,%x1,%x1" + : "=wa" (__r->__vpair) + : "0" (__builtin_vec_splats (x))); +} + +static inline void +vpair_f32_abs (vector_ptr_f32_t *__r, vector_pair_f32_t *__a) +{ + __VPAIR_FP_UNARY_ASM ("xvabssp", __r, __a); +} + +static inline void +vpair_f32_nabs (vector_ptr_f32_t *__r, vector_pair_f32_t *__a) +{ + __VPAIR_FP_UNARY_ASM ("xvnabssp", __r, __a); +} + +static inline void +vpair_f32_neg (vector_ptr_f32_t *__r, vector_pair_f32_t *__a) +{ + __VPAIR_FP_UNARY_ASM ("xvnegsp", __r, __a); +} + +static inline void +vpair_f32_sqrt (vector_ptr_f32_t *__r, vector_pair_f32_t *__a) +{ + __VPAIR_FP_UNARY_ASM ("xvsqrtsp", __r, __a); +} + +static inline void +vpair_f32_add (vector_ptr_f32_t *__r, + vector_ptr_f32_t *__a, + vector_ptr_f32_t *__b) +{ + __VPAIR_FP_BINARY_ASM ("xvaddsp", __r, __a, __b); +} + +static inline void +vpair_f32_max (vector_ptr_f32_t *__r, + vector_ptr_f32_t *__a, + vector_ptr_f32_t *__b) +{ + __VPAIR_FP_BINARY_ASM ("xvmaxsp", __r, __a, __b); +} + +static inline void +vpair_f32_min (vector_ptr_f32_t *__r, + vector_ptr_f32_t *__a, + vector_ptr_f32_t *__b) +{ + __VPAIR_FP_BINARY_ASM ("xvminsp", __r, __a, __b); +} + +static inline void +vpair_f32_mul (vector_ptr_f32_t *__r, + vector_ptr_f32_t *__a, + vector_ptr_f32_t *__b) +{ + __VPAIR_FP_BINARY_ASM ("xvmulsp", __r, __a, __b); +} + +static inline void +vpair_f32_sub (vector_ptr_f32_t *__r, + vector_ptr_f32_t *__a, + vector_ptr_f32_t *__b) +{ + __VPAIR_FP_BINARY_ASM ("xvsubsp", __r, __a, __b); +} + +static inline void +vpair_f32_fma (vector_ptr_f32_t *__r, + vector_ptr_f32_t *__a, + vector_ptr_f32_t *__b, + vector_ptr_f32_t *__c) +{ + __VPAIR_FP_FMA_ASM ("xvmaddasp", __r, __a, __b, __c); +} + +static inline void +vpair_f32_fms (vector_ptr_f32_t *__r, + vector_ptr_f32_t *__a, + vector_ptr_f32_t *__b, + vector_ptr_f32_t *__c) +{ + __VPAIR_FP_FMA_ASM ("xvmsubasp", __r, __a, __b, __c); +} + +static inline void +vpair_f32_nfma (vector_ptr_f32_t *__r, + vector_ptr_f32_t *__a, + vector_ptr_f32_t *__b, + vector_ptr_f32_t *__c) +{ + __VPAIR_FP_FMA_ASM ("xvnmaddasp", __r, __a, __b, __c); +} + +static inline void +vpair_f32_nfms (vector_ptr_f32_t *__r, + vector_ptr_f32_t *__a, + vector_ptr_f32_t *__b, + vector_ptr_f32_t *__c) +{ + __VPAIR_FP_FMA_ASM ("xvnmsubasp", __r, __a, __b, __c); +} + +/* vector pair float operations on power10/power11. */ #else /* ISA 2.8/3.0 support for machines without vector pair support. */ /* vector pair double operations on power8/power9. */ -#define vpair_f64_splat(R, A) \ - do \ - { \ - __vpair_ptr_t __vr = (__vpair_ptr_t)(R); \ - __vr->__vp_f64[0] = __vr->__vp_f64[1] \ - = __builtin_vec_splats ((double)(A)); \ - } \ - while (0) - -#define vpair_f64_abs(R, A) \ - do \ - { \ - __vpair_ptr_t __vr = (__vpair_ptr_t)(R); \ - __vpair_ptr_t __va = (__vpair_ptr_t)(A); \ - __vr->__vp_f64[0] = __builtin_vsx_xvabsdp (__va->__vp_f64[0]); \ - __vr->__vp_f64[1] = __builtin_vsx_xvabsdp (__va->__vp_f64[1]); \ - } \ - while (0) - -#define vpair_f64_nabs(R, A) \ - do \ - { \ - __vpair_ptr_t __vr = (__vpair_ptr_t)(R); \ - __vpair_ptr_t __va = (__vpair_ptr_t)(A); \ - __vr->__vp_f64[0] = __builtin_vsx_xvnabsdp (__va->__vp_f64[0]); \ - __vr->__vp_f64[1] = __builtin_vsx_xvnabsdp (__va->__vp_f64[1]); \ - } \ - while (0) - -#define vpair_f64_neg(R, A) \ - do \ - { \ - __vpair_ptr_t __vr = (__vpair_ptr_t)(R); \ - __vpair_ptr_t __va = (__vpair_ptr_t)(A); \ - __vr->__vp_f64[0] = - __va->__vp_f64[0]; \ - __vr->__vp_f64[1] = - __va->__vp_f64[1]; \ - } \ - while (0) - -#define vpair_f64_sqrt(R, A) \ - do \ - { \ - __vpair_ptr_t __vr = (__vpair_ptr_t)(R); \ - __vpair_ptr_t __va = (__vpair_ptr_t)(A); \ - __vr->__vp_f64[0] = __builtin_vsx_xvsqrtdp (__va->__vp_f64[0]); \ - __vr->__vp_f64[1] = __builtin_vsx_xvsqrtdp (__va->__vp_f64[1]); \ - } \ - while (0) - -#define vpair_f64_add(R, A, B) \ - do \ - { \ - __vpair_ptr_t __vr = (__vpair_ptr_t)(R); \ - __vpair_ptr_t __va = (__vpair_ptr_t)(A); \ - __vpair_ptr_t __vb = (__vpair_ptr_t)(B); \ - __vr->__vp_f64[0] = __va->__vp_f64[0] + __vb->__vp_f64[0]; \ - __vr->__vp_f64[1] = __va->__vp_f64[1] + __vb->__vp_f64[1]; \ - } \ - while (0) - -#define vpair_f64_div(R, A, B) \ - do \ - { \ - __vpair_ptr_t __vr = (__vpair_ptr_t)(R); \ - __vpair_ptr_t __va = (__vpair_ptr_t)(A); \ - __vpair_ptr_t __vb = (__vpair_ptr_t)(B); \ - __vr->__vp_f64[0] = __va->__vp_f64[0] / __vb->__vp_f64[0]; \ - __vr->__vp_f64[1] = __va->__vp_f64[1] / __vb->__vp_f64[1]; \ - } \ - while (0) - -#define vpair_f64_max(R, A, B) \ - do \ - { \ - __vpair_ptr_t __vr = (__vpair_ptr_t)(R); \ - __vpair_ptr_t __va = (__vpair_ptr_t)(A); \ - __vpair_ptr_t __vb = (__vpair_ptr_t)(B); \ - __vr->__vp_f64[0] \ - = __builtin_vsx_xvmaxdp (__va->__vp_f64[0], __vb->__vp_f64[0]); \ - __vr->__vp_f64[1] \ - = __builtin_vsx_xvmaxdp (__va->__vp_f64[1], __vb->__vp_f64[1]); \ - } \ - while (0) - -#define vpair_f64_min(R, A, B) \ - do \ - { \ - __vpair_ptr_t __vr = (__vpair_ptr_t)(R); \ - __vpair_ptr_t __va = (__vpair_ptr_t)(A); \ - __vpair_ptr_t __vb = (__vpair_ptr_t)(B); \ - __vr->__vp_f64[0] \ - = __builtin_vsx_xvmindp (__va->__vp_f64[0], __vb->__vp_f64[0]); \ - __vr->__vp_f64[1] \ - = __builtin_vsx_xvmindp (__va->__vp_f64[1], __vb->__vp_f64[1]); \ - } \ - while (0) - -#define vpair_f64_mul(R, A, B) \ - do \ - { \ - __vpair_ptr_t __vr = (__vpair_ptr_t)(R); \ - __vpair_ptr_t __va = (__vpair_ptr_t)(A); \ - __vpair_ptr_t __vb = (__vpair_ptr_t)(B); \ - __vr->__vp_f64[0] = __va->__vp_f64[0] * __vb->__vp_f64[0]; \ - __vr->__vp_f64[1] = __va->__vp_f64[1] * __vb->__vp_f64[1]; \ - } \ - while (0) - -#define vpair_f64_sub(R, A, B) \ - do \ - { \ - __vpair_ptr_t __vr = (__vpair_ptr_t)(R); \ - __vpair_ptr_t __va = (__vpair_ptr_t)(A); \ - __vpair_ptr_t __vb = (__vpair_ptr_t)(B); \ - __vr->__vp_f64[0] = __va->__vp_f64[0] - __vb->__vp_f64[0]; \ - __vr->__vp_f64[1] = __va->__vp_f64[1] - __vb->__vp_f64[1]; \ - } \ - while (0) - -#define vpair_f64_fma(R, A, B, C) \ - do \ - { \ - __vpair_ptr_t __vr = (__vpair_ptr_t)(R); \ - __vpair_ptr_t __va = (__vpair_ptr_t)(A); \ - __vpair_ptr_t __vb = (__vpair_ptr_t)(B); \ - __vpair_ptr_t __vc = (__vpair_ptr_t)(C); \ - __vr->__vp_f64[0] \ - = __builtin_vsx_xvmadddp (__va->__vp_f64[0], \ - __vb->__vp_f64[0], \ - __vc->__vp_f64[0]); \ - __vr->__vp_f64[1] \ - = __builtin_vsx_xvmadddp (__va->__vp_f64[1], \ - __vb->__vp_f64[1], \ - __vc->__vp_f64[1]); \ - } \ - while (0) - -#define vpair_f64_fms(R, A, B, C) \ - do \ - { \ - __vpair_ptr_t __vr = (__vpair_ptr_t)(R); \ - __vpair_ptr_t __va = (__vpair_ptr_t)(A); \ - __vpair_ptr_t __vb = (__vpair_ptr_t)(B); \ - __vpair_ptr_t __vc = (__vpair_ptr_t)(C); \ - __vr->__vp_f64[0] \ - = __builtin_vsx_xvmsubdp (__va->__vp_f64[0], \ - __vb->__vp_f64[0], \ - __vc->__vp_f64[0]); \ - __vr->__vp_f64[1] \ - = __builtin_vsx_xvmsubdp (__va->__vp_f64[1], \ - __vb->__vp_f64[1], \ - __vc->__vp_f64[1]); \ - } \ - while (0) - -#define vpair_f64_nfma(R, A, B, C) \ - do \ - { \ - __vpair_ptr_t __vr = (__vpair_ptr_t)(R); \ - __vpair_ptr_t __va = (__vpair_ptr_t)(A); \ - __vpair_ptr_t __vb = (__vpair_ptr_t)(B); \ - __vpair_ptr_t __vc = (__vpair_ptr_t)(C); \ - __vr->__vp_f64[0] \ - = __builtin_vsx_xvnmadddp (__va->__vp_f64[0], \ - __vb->__vp_f64[0], \ - __vc->__vp_f64[0]); \ - __vr->__vp_f64[1] \ - = __builtin_vsx_xvnmadddp (__va->__vp_f64[1], \ - __vb->__vp_f64[1], \ - __vc->__vp_f64[1]); \ - } \ - while (0) - -#define vpair_f64_nfms(R, A, B, C) \ - do \ - { \ - __vpair_ptr_t __vr = (__vpair_ptr_t)(R); \ - __vpair_ptr_t __va = (__vpair_ptr_t)(A); \ - __vpair_ptr_t __vb = (__vpair_ptr_t)(B); \ - __vpair_ptr_t __vc = (__vpair_ptr_t)(C); \ - __vr->__vp_f64[0] \ - = __builtin_vsx_xvnmsubdp (__va->__vp_f64[0], \ - __vb->__vp_f64[0], \ - __vc->__vp_f64[0]); \ - __vr->__vp_f64[1] \ - = __builtin_vsx_xvnmsubdp (__va->__vp_f64[1], \ - __vb->__vp_f64[1], \ - __vc->__vp_f64[1]); \ - } \ - while (0) - -/* vector pair float operations on power8/power9. */ - -#define vpair_f32_splat(R, A) \ - do \ - { \ - __vpair_ptr_t __vr = (__vpair_ptr_t)(R); \ - __vr->__vp_f32[0] = __vr->__vp_f32[1] \ - = __builtin_vec_splats ((float)(A)); \ - } \ - while (0) - -#define vpair_f32_abs(R, A) \ - do \ - { \ - __vpair_ptr_t __vr = (__vpair_ptr_t)(R); \ - __vpair_ptr_t __va = (__vpair_ptr_t)(A); \ - __vr->__vp_f32[0] = __builtin_vsx_xvabssp (__va->__vp_f32[0]); \ - __vr->__vp_f32[1] = __builtin_vsx_xvabssp (__va->__vp_f32[1]); \ - } \ - while (0) - -#define vpair_f32_nabs(R, A) \ - do \ - { \ - __vpair_ptr_t __vr = (__vpair_ptr_t)(R); \ - __vpair_ptr_t __va = (__vpair_ptr_t)(A); \ - __vr->__vp_f32[0] = __builtin_vsx_xvnabssp (__va->__vp_f32[0]); \ - __vr->__vp_f32[1] = __builtin_vsx_xvnabssp (__va->__vp_f32[1]); \ - } \ - while (0) - -#define vpair_f32_neg(R, A) \ - do \ - { \ - __vpair_ptr_t __vr = (__vpair_ptr_t)(R); \ - __vpair_ptr_t __va = (__vpair_ptr_t)(A); \ - __vr->__vp_f32[0] = - __va->__vp_f32[0]; \ - __vr->__vp_f32[1] = - __va->__vp_f32[1]; \ - } \ - while (0) - -#define vpair_f32_sqrt(R, A) \ - do \ - { \ - __vpair_ptr_t __vr = (__vpair_ptr_t)(R); \ - __vpair_ptr_t __va = (__vpair_ptr_t)(A); \ - __vr->__vp_f32[0] = __builtin_vsx_xvsqrtsp (__va->__vp_f32[0]); \ - __vr->__vp_f32[1] = __builtin_vsx_xvsqrtsp (__va->__vp_f32[1]); \ - } \ - while (0) - -#define vpair_f32_add(R, A, B) \ - do \ - { \ - __vpair_ptr_t __vr = (__vpair_ptr_t)(R); \ - __vpair_ptr_t __va = (__vpair_ptr_t)(A); \ - __vpair_ptr_t __vb = (__vpair_ptr_t)(B); \ - __vr->__vp_f32[0] = __va->__vp_f32[0] + __vb->__vp_f32[0]; \ - __vr->__vp_f32[1] = __va->__vp_f32[1] + __vb->__vp_f32[1]; \ - } \ - while (0) - -#define vpair_f32_div(R, A, B) \ - do \ - { \ - __vpair_ptr_t __vr = (__vpair_ptr_t)(R); \ - __vpair_ptr_t __va = (__vpair_ptr_t)(A); \ - __vpair_ptr_t __vb = (__vpair_ptr_t)(B); \ - __vr->__vp_f32[0] = __va->__vp_f32[0] / __vb->__vp_f32[0]; \ - __vr->__vp_f32[1] = __va->__vp_f32[1] / __vb->__vp_f32[1]; \ - } \ - while (0) - -#define vpair_f32_max(R, A, B) \ - do \ - { \ - __vpair_ptr_t __vr = (__vpair_ptr_t)(R); \ - __vpair_ptr_t __va = (__vpair_ptr_t)(A); \ - __vpair_ptr_t __vb = (__vpair_ptr_t)(B); \ - __vr->__vp_f32[0] \ - = __builtin_vsx_xvmaxsp (__va->__vp_f32[0], __vb->__vp_f32[0]); \ - __vr->__vp_f32[1] \ - = __builtin_vsx_xvmaxsp (__va->__vp_f32[1], __vb->__vp_f32[1]); \ - } \ - while (0) - -#define vpair_f32_min(R, A, B) \ - do \ - { \ - __vpair_ptr_t __vr = (__vpair_ptr_t)(R); \ - __vpair_ptr_t __va = (__vpair_ptr_t)(A); \ - __vpair_ptr_t __vb = (__vpair_ptr_t)(B); \ - __vr->__vp_f32[0] \ - = __builtin_vsx_xvminsp (__va->__vp_f32[0], __vb->__vp_f32[0]); \ - __vr->__vp_f32[1] \ - = __builtin_vsx_xvminsp (__va->__vp_f32[1], __vb->__vp_f32[1]); \ - } \ - while (0) - -#define vpair_f32_mul(R, A, B) \ - do \ - { \ - __vpair_ptr_t __vr = (__vpair_ptr_t)(R); \ - __vpair_ptr_t __va = (__vpair_ptr_t)(A); \ - __vpair_ptr_t __vb = (__vpair_ptr_t)(B); \ - __vr->__vp_f32[0] = __va->__vp_f32[0] * __vb->__vp_f32[0]; \ - __vr->__vp_f32[1] = __va->__vp_f32[1] * __vb->__vp_f32[1]; \ - } \ - while (0) - -#define vpair_f32_sub(R, A, B) \ - do \ - { \ - __vpair_ptr_t __vr = (__vpair_ptr_t)(R); \ - __vpair_ptr_t __va = (__vpair_ptr_t)(A); \ - __vpair_ptr_t __vb = (__vpair_ptr_t)(B); \ - __vr->__vp_f32[0] = __va->__vp_f32[0] - __vb->__vp_f32[0]; \ - __vr->__vp_f32[1] = __va->__vp_f32[1] - __vb->__vp_f32[1]; \ - } \ - while (0) - -#define vpair_f32_fma(R, A, B, C) \ - do \ - { \ - __vpair_ptr_t __vr = (__vpair_ptr_t)(R); \ - __vpair_ptr_t __va = (__vpair_ptr_t)(A); \ - __vpair_ptr_t __vb = (__vpair_ptr_t)(B); \ - __vpair_ptr_t __vc = (__vpair_ptr_t)(C); \ - __vr->__vp_f32[0] \ - = __builtin_vsx_xvmaddsp (__va->__vp_f32[0], \ - __vb->__vp_f32[0], \ - __vc->__vp_f32[0]); \ - __vr->__vp_f32[1] \ - = __builtin_vsx_xvmaddsp (__va->__vp_f32[1], \ - __vb->__vp_f32[1], \ - __vc->__vp_f32[1]); \ - } \ - while (0) - -#define vpair_f32_fms(R, A, B, C) \ - do \ - { \ - __vpair_ptr_t __vr = (__vpair_ptr_t)(R); \ - __vpair_ptr_t __va = (__vpair_ptr_t)(A); \ - __vpair_ptr_t __vb = (__vpair_ptr_t)(B); \ - __vpair_ptr_t __vc = (__vpair_ptr_t)(C); \ - __vr->__vp_f32[0] \ - = __builtin_vsx_xvmsubsp (__va->__vp_f32[0], \ - __vb->__vp_f32[0], \ - __vc->__vp_f32[0]); \ - __vr->__vp_f32[1] \ - = __builtin_vsx_xvmsubsp (__va->__vp_f32[1], \ - __vb->__vp_f32[1], \ - __vc->__vp_f32[1]); \ - } \ - while (0) - -#define vpair_f32_nfma(R, A, B, C) \ - do \ - { \ - __vpair_ptr_t __vr = (__vpair_ptr_t)(R); \ - __vpair_ptr_t __va = (__vpair_ptr_t)(A); \ - __vpair_ptr_t __vb = (__vpair_ptr_t)(B); \ - __vpair_ptr_t __vc = (__vpair_ptr_t)(C); \ - __vr->__vp_f32[0] \ - = __builtin_vsx_xvnmaddsp (__va->__vp_f32[0], \ - __vb->__vp_f32[0], \ - __vc->__vp_f32[0]); \ - __vr->__vp_f32[1] \ - = __builtin_vsx_xvnmaddsp (__va->__vp_f32[1], \ - __vb->__vp_f32[1], \ - __vc->__vp_f32[1]); \ - } \ - while (0) - -#define vpair_f32_nfms(R, A, B, C) \ - do \ - { \ - __vpair_ptr_t __vr = (__vpair_ptr_t)(R); \ - __vpair_ptr_t __va = (__vpair_ptr_t)(A); \ - __vpair_ptr_t __vb = (__vpair_ptr_t)(B); \ - __vpair_ptr_t __vc = (__vpair_ptr_t)(C); \ - __vr->__vp_f32[0] \ - = __builtin_vsx_xvnmsubsp (__va->__vp_f32[0], \ - __vb->__vp_f32[0], \ - __vc->__vp_f32[0]); \ - __vr->__vp_f32[1] \ - = __builtin_vsx_xvnmsubsp (__va->__vp_f32[1], \ - __vb->__vp_f32[1], \ - __vc->__vp_f32[1]); \ - } \ - while (0) - +/* Allow vector-pair.h to be included multiple times during testing. */ +#undef vpair_f64_abs +#undef vpair_f64_add +#undef vpair_f64_div +#undef vpair_f64_fma +#undef vpair_f64_fms +#undef vpair_f64_max +#undef vpair_f64_min +#undef vpair_f64_mul +#undef vpair_f64_nabs +#undef vpair_f64_neg +#undef vpair_f64_nfma +#undef vpair_f64_nfms +#undef vpair_f64_splat +#undef vpair_f64_sqrt +#undef vpair_f64_sub + +#undef vpair_f32_abs +#undef vpair_f32_add +#undef vpair_f32_div +#undef vpair_f32_fma +#undef vpair_f32_fms +#undef vpair_f32_max +#undef vpair_f32_min +#undef vpair_f32_mul +#undef vpair_f32_nabs +#undef vpair_f32_neg +#undef vpair_f32_nfma +#undef vpair_f32_nfms +#undef vpair_f32_splat +#undef vpair_f32_sqrt +#undef vpair_f32_sub + +#define vpair_f64_abs __vpair_f64_abs_nop10 +#define vpair_f64_add __vpair_f64_add_nop10 +#define vpair_f64_div __vpair_f64_div_nop10 +#define vpair_f64_fma __vpair_f64_fma_nop10 +#define vpair_f64_fms __vpair_f64_fms_nop10 +#define vpair_f64_max __vpair_f64_max_nop10 +#define vpair_f64_min __vpair_f64_min_nop10 +#define vpair_f64_mul __vpair_f64_mul_nop10 +#define vpair_f64_nabs __vpair_f64_nabs_nop10 +#define vpair_f64_neg __vpair_f64_neg_nop10 +#define vpair_f64_nfma __vpair_f64_nfma_nop10 +#define vpair_f64_nfms __vpair_f64_nfms_nop10 +#define vpair_f64_splat __vpair_f64_splat_nop10 +#define vpair_f64_sqrt __vpair_f64_sqrt_nop10 +#define vpair_f64_sub __vpair_f64_sub_nop10 + +#define vpair_f32_abs __vpair_f32_abs_nop10 +#define vpair_f32_add __vpair_f32_add_nop10 +#define vpair_f32_div __vpair_f32_div_nop10 +#define vpair_f32_fma __vpair_f32_fma_nop10 +#define vpair_f32_fms __vpair_f32_fms_nop10 +#define vpair_f32_max __vpair_f32_max_nop10 +#define vpair_f32_min __vpair_f32_min_nop10 +#define vpair_f32_mul __vpair_f32_mul_nop10 +#define vpair_f32_nabs __vpair_f32_nabs_nop10 +#define vpair_f32_neg __vpair_f32_neg_nop10 +#define vpair_f32_nfma __vpair_f32_nfma_nop10 +#define vpair_f32_nfms __vpair_f32_nfms_nop10 +#define vpair_f32_splat __vpair_f32_splat_nop10 +#define vpair_f32_sqrt __vpair_f32_sqrt_nop10 +#define vpair_f32_sub __vpair_f32_sub_nop10 + + +/* vector pair double operations on power10/power11. */ +static inline void +vpair_f64_splat (vector_pair_f64_t *__r, double x) +{ + __r->__vp_f64[0] = __r->__vp_f64[1] = __builtin_vpair_f64_splat (x); +} + +static inline void +vpair_f64_abs (vector_ptr_f64_t *__r, vector_pair_f64_t *__a) +{ + __r->__vp_f64[0] = __builtin_vsx_xvabsdp (__a->vp_f64[0]); + __r->__vp_f64[1] = __builtin_vsx_xvabsdp (__a->vp_f64[1]); +} + +static inline void +vpair_f64_nabs (vector_ptr_f64_t *__r, vector_pair_f64_t *__a) +{ + __r->__vp_f64[0] = __builtin_vsx_xvnabsdp (__a->vp_f64[0]); + __r->__vp_f64[1] = __builtin_vsx_xvnabsdp (__a->vp_f64[1]); +} + +static inline void +vpair_f64_neg (vector_ptr_f64_t *__r, vector_pair_f64_t *__a) +{ + __r->__vp_f64[0] = - __a->vp_f64[0]; + __r->__vp_f64[1] = - __a->vp_f64[1]; +} + +static inline void +vpair_f64_sqrt (vector_ptr_f64_t *__r, vector_pair_f64_t *__a) +{ + __r->__vp_f64[0] = __builtin_vsx_xvsqrtdp (__a->vp_f64[0]); + __r->__vp_f64[1] = __builtin_vsx_xvsqrtdp (__a->vp_f64[1]); +} + +static inline void +vpair_f64_add (vector_ptr_f64_t *__r, + vector_ptr_f64_t *__a, + vector_ptr_f64_t *__b) +{ + __r->__vp_f64[0] = __a->__vp_f64[0] + __b->__fp_f64[0]; + __r->__vp_f64[1] = __a->__vp_f64[1] + __b->__fp_f64[1]; +} + +static inline void +vpair_f64_div (vector_ptr_f64_t *__r, + vector_ptr_f64_t *__a, + vector_ptr_f64_t *__b) +{ + __r->__vp_f64[0] = __a->__vp_f64[0] / __b->__fp_f64[0]; + __r->__vp_f64[1] = __a->__vp_f64[1] / __b->__fp_f64[1]; +} + +static inline void +vpair_f64_max (vector_ptr_f64_t *__r, + vector_ptr_f64_t *__a, + vector_ptr_f64_t *__b) +{ + __r->__vp_f64[0] = __builtin_vsx_xvmaxdp (__a->__vp_f64[0], + __b->__fp_f64[0]); + + __r->__vp_f64[1] = __builtin_vsx_xvmaxdp (__a->__vp_f64[1], + __b->__fp_f64[1]); +} + +static inline void +vpair_f64_min (vector_ptr_f64_t *__r, + vector_ptr_f64_t *__a, + vector_ptr_f64_t *__b) +{ + __r->__vp_f64[0] = __builtin_vsx_xvmindp (__a->__vp_f64[0], + __b->__fp_f64[0]); + + __r->__vp_f64[1] = __builtin_vsx_xvmindp (__a->__vp_f64[1], + __b->__fp_f64[1]); +} + +static inline void +vpair_f64_mul (vector_ptr_f64_t *__r, + vector_ptr_f64_t *__a, + vector_ptr_f64_t *__b) +{ + __r->__vp_f64[0] = __a->__vp_f64[0] * __b->__fp_f64[0]; + __r->__vp_f64[1] = __a->__vp_f64[1] * __b->__fp_f64[1]; +} + +static inline void +vpair_f64_sub (vector_ptr_f64_t *__r, + vector_ptr_f64_t *__a, + vector_ptr_f64_t *__b) +{ + __r->__vp_f64[0] = __a->__vp_f64[0] - __b->__fp_f64[0]; + __r->__vp_f64[1] = __a->__vp_f64[1] - __b->__fp_f64[1]; +} + +static inline void +vpair_f64_fma (vector_ptr_f64_t *__r, + vector_ptr_f64_t *__a, + vector_ptr_f64_t *__b, + vector_ptr_f64_t *__c) +{ + __r->__vp_f64[0] = __builtin_vsx_xvmadddp (__a->__vp_f64[0], + __b->__fp_f64[0], + __c->__fp_f64[0]); + + __r->__vp_f64[1] = __builtin_vsx_xvmadddp (__a->__vp_f64[1], + __b->__fp_f64[1], + __c->__fp_f64[1]); +} + +static inline void +vpair_f64_fms (vector_ptr_f64_t *__r, + vector_ptr_f64_t *__a, + vector_ptr_f64_t *__b, + vector_ptr_f64_t *__c) +{ + __r->__vp_f64[0] = __builtin_vsx_xvmsubdp (__a->__vp_f64[0], + __b->__fp_f64[0], + __c->__fp_f64[0]); + + __r->__vp_f64[1] = __builtin_vsx_xvmsubdp (__a->__vp_f64[1], + __b->__fp_f64[1], + __c->__fp_f64[1]); +} + +static inline void +vpair_f64_nfma (vector_ptr_f64_t *__r, + vector_ptr_f64_t *__a, + vector_ptr_f64_t *__b, + vector_ptr_f64_t *__c) +{ + __r->__vp_f64[0] = __builtin_vsx_xvnmadddp (__a->__vp_f64[0], + __b->__fp_f64[0], + __c->__fp_f64[0]); + + __r->__vp_f64[1] = __builtin_vsx_xvnmadddp (__a->__vp_f64[1], + __b->__fp_f64[1], + __c->__fp_f64[1]); +} + +static inline void +vpair_f64_nfms (vector_ptr_f64_t *__r, + vector_ptr_f64_t *__a, + vector_ptr_f64_t *__b, + vector_ptr_f64_t *__c) +{ + __r->__vp_f64[0] = __builtin_vsx_xvnmsubdp (__a->__vp_f64[0], + __b->__fp_f64[0], + __c->__fp_f64[0]); + + __r->__vp_f64[1] = __builtin_vsx_xvnmsubdp (__a->__vp_f64[1], + __b->__fp_f64[1], + __c->__fp_f64[1]); +} + +/* vector pair float operations on power10/power11. */ + +static inline void +vpair_f32_splat (vector_pair_f32_t *__r, float x) +{ + __r->__vp_f32[0] = __r->__vp_f32[1] = __builtin_vpair_f32_splat (x); +} + +static inline void +vpair_f32_abs (vector_ptr_f32_t *__r, vector_pair_f32_t *__a) +{ + __r->__vp_f32[0] = __builtin_vsx_xvabssp (__a->vp_f32[0]); + __r->__vp_f32[1] = __builtin_vsx_xvabssp (__a->vp_f32[1]); +} + +static inline void +vpair_f32_nabs (vector_ptr_f32_t *__r, vector_pair_f32_t *__a) +{ + __r->__vp_f32[0] = __builtin_vsx_xvnabssp (__a->vp_f32[0]); + __r->__vp_f32[1] = __builtin_vsx_xvnabssp (__a->vp_f32[1]); +} + +static inline void +vpair_f32_neg (vector_ptr_f32_t *__r, vector_pair_f32_t *__a) +{ + __r->__vp_f32[0] = - __a->vp_f32[0]; + __r->__vp_f32[1] = - __a->vp_f32[1]; +} + +static inline void +vpair_f32_sqrt (vector_ptr_f32_t *__r, vector_pair_f32_t *__a) +{ + __r->__vp_f32[0] = __builtin_vsx_xvsqrtsp (__a->vp_f32[0]); + __r->__vp_f32[1] = __builtin_vsx_xvsqrtsp (__a->vp_f32[1]); +} + +static inline void +vpair_f32_add (vector_ptr_f32_t *__r, + vector_ptr_f32_t *__a, + vector_ptr_f32_t *__b) +{ + __r->__vp_f32[0] = __a->__vp_f32[0] + __b->__fp_f32[0]; + __r->__vp_f32[1] = __a->__vp_f32[1] + __b->__fp_f32[1]; +} + +static inline void +vpair_f32_div (vector_ptr_f32_t *__r, + vector_ptr_f32_t *__a, + vector_ptr_f32_t *__b) +{ + __r->__vp_f32[0] = __a->__vp_f32[0] / __b->__fp_f32[0]; + __r->__vp_f32[1] = __a->__vp_f32[1] / __b->__fp_f32[1]; +} + +static inline void +vpair_f32_max (vector_ptr_f32_t *__r, + vector_ptr_f32_t *__a, + vector_ptr_f32_t *__b) +{ + __r->__vp_f32[0] = __builtin_vsx_xvmaxsp (__a->__vp_f32[0], + __b->__fp_f32[0]); + + __r->__vp_f32[1] = __builtin_vsx_xvmaxsp (__a->__vp_f32[1], + __b->__fp_f32[1]); +} + +static inline void +vpair_f32_min (vector_ptr_f32_t *__r, + vector_ptr_f32_t *__a, + vector_ptr_f32_t *__b) +{ + __r->__vp_f32[0] = __builtin_vsx_xvminsp (__a->__vp_f32[0], + __b->__fp_f32[0]); + + __r->__vp_f32[1] = __builtin_vsx_xvminsp (__a->__vp_f32[1], + __b->__fp_f32[1]); +} + +static inline void +vpair_f32_mul (vector_ptr_f32_t *__r, + vector_ptr_f32_t *__a, + vector_ptr_f32_t *__b) +{ + __r->__vp_f32[0] = __a->__vp_f32[0] * __b->__fp_f32[0]; + __r->__vp_f32[1] = __a->__vp_f32[1] * __b->__fp_f32[1]; +} + +static inline void +vpair_f32_sub (vector_ptr_f32_t *__r, + vector_ptr_f32_t *__a, + vector_ptr_f32_t *__b) +{ + __r->__vp_f32[0] = __a->__vp_f32[0] - __b->__fp_f32[0]; + __r->__vp_f32[1] = __a->__vp_f32[1] - __b->__fp_f32[1]; +} + +static inline void +vpair_f32_fma (vector_ptr_f32_t *__r, + vector_ptr_f32_t *__a, + vector_ptr_f32_t *__b, + vector_ptr_f32_t *__c) +{ + __r->__vp_f32[0] = __builtin_vsx_xvmaddsp (__a->__vp_f32[0], + __b->__fp_f32[0], + __c->__fp_f32[0]); + + __r->__vp_f32[1] = __builtin_vsx_xvmaddsp (__a->__vp_f32[1], + __b->__fp_f32[1], + __c->__fp_f32[1]); +} + +static inline void +vpair_f32_fms (vector_ptr_f32_t *__r, + vector_ptr_f32_t *__a, + vector_ptr_f32_t *__b, + vector_ptr_f32_t *__c) +{ + __r->__vp_f32[0] = __builtin_vsx_xvmsubsp (__a->__vp_f32[0], + __b->__fp_f32[0], + __c->__fp_f32[0]); + + __r->__vp_f32[1] = __builtin_vsx_xvmsubsp (__a->__vp_f32[1], + __b->__fp_f32[1], + __c->__fp_f32[1]); +} + +static inline void +vpair_f32_nfma (vector_ptr_f32_t *__r, + vector_ptr_f32_t *__a, + vector_ptr_f32_t *__b, + vector_ptr_f32_t *__c) +{ + __r->__vp_f32[0] = __builtin_vsx_xvnmaddsp (__a->__vp_f32[0], + __b->__fp_f32[0], + __c->__fp_f32[0]); + + __r->__vp_f32[1] = __builtin_vsx_xvnmaddsp (__a->__vp_f32[1], + __b->__fp_f32[1], + __c->__fp_f32[1]); +} + +static inline void +vpair_f32_nfms (vector_ptr_f32_t *__r, + vector_ptr_f32_t *__a, + vector_ptr_f32_t *__b, + vector_ptr_f32_t *__c) +{ + __r->__vp_f32[0] = __builtin_vsx_xvnmsubsp (__a->__vp_f32[0], + __b->__fp_f32[0], + __c->__fp_f32[0]); + + __r->__vp_f32[1] = __builtin_vsx_xvnmsubsp (__a->__vp_f32[1], + __b->__fp_f32[1], + __c->__fp_f32[1]); +} #endif /* Vector pair support for power8/power9 systems. */ - #endif /* _VECTOR_PAIR_H. */