https://gcc.gnu.org/g:bbb392f70a0774dc47e3cf31a8d4c74e3e7572e8
commit bbb392f70a0774dc47e3cf31a8d4c74e3e7572e8 Author: Michael Meissner <meiss...@linux.ibm.com> Date: Wed Sep 4 22:55:54 2024 -0400 Add support for vector pair fma operations. 2024-09-04 Michael Meissner <meiss...@linux.ibm.com> gcc/ * config/rs6000/rs6000-builtins.def (__builtin_vpair_f32_fma): New built-in. (__builtin_vpair_f32_fms): Likewise. (__builtin_vpair_f32_nfma): Likewise. (__builtin_vpair_f32_nfms): Likewise. (__builtin_vpair_f64_fma): Likewise. (__builtin_vpair_f64_fms): Likewise. (__builtin_vpair_f64_nfma): Likewise. * config/rs6000/rs6000/rs6000-proto.h (enum vpair_split_fma): New enumeration. (vpair_split_fma): New declaration. * config/rs6000/rs6000.cc (vpair_split_fma): New function to split vector pair FMA operations. * config/rs6000/vector-pair.md (UNSPEC_VPAIR_FMA): New unspec. (vpair_stdname): Add UNSPEC_VPAIR_FMA. (VPAIR_OP): Likewise. (vpair_fma_<vpair_modename>4): New insns. (vpair_fms_<vpair_modename>4): Likewise. (vpair_nfma_<vpair_modename>4): Likewise. (vpair_nfms_<vpair_modename>4): Likewise. * config/rs6000/vector-pair.h: Update to use the power10 vector pair built-in functions. * doc/extend.texi (PowerPC Vector Pair Built-in Functions): Document new vector pair fma built-in functions. gcc/testsuite/ * gcc.target/powerpc/vector-pair-3.c: New test. * gcc.target/powerpc/vector-pair-4.c: Likewise. Diff: --- gcc/config/rs6000/rs6000-builtins.def | 24 ++++++ gcc/config/rs6000/rs6000-protos.h | 13 ++++ gcc/config/rs6000/rs6000.cc | 71 ++++++++++++++++++ gcc/config/rs6000/vector-pair.h | 57 +++----------- gcc/config/rs6000/vector-pair.md | 96 ++++++++++++++++++++++++ gcc/doc/extend.texi | 25 ++++++ gcc/testsuite/gcc.target/powerpc/vector-pair-3.c | 57 ++++++++++++++ gcc/testsuite/gcc.target/powerpc/vector-pair-4.c | 57 ++++++++++++++ 8 files changed, 354 insertions(+), 46 deletions(-) diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index cf22389542d..2bac0e58971 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -3944,6 +3944,12 @@ v256 __builtin_vpair_f32_div (v256, v256); VPAIR_F32_DIV vpair_div_v8sf3 {mma} + v256 __builtin_vpair_f32_fma (v256, v256, v256); + VPAIR_F32_FMA vpair_fma_v8sf4 {mma} + + v256 __builtin_vpair_f32_fms (v256, v256, v256); + VPAIR_F32_FMS vpair_fms_v8sf4 {mma} + v256 __builtin_vpair_f32_max (v256, v256); VPAIR_F32_MAX vpair_smax_v8sf3 {mma} @@ -3962,6 +3968,12 @@ v256 __builtin_vpair_f32_sqrt (v256); VPAIR_F32_SQRT vpair_sqrt_v8sf2 {mma} + v256 __builtin_vpair_f32_nfma (v256, v256, v256); + VPAIR_F32_NFMA vpair_nfma_v8sf4 {mma} + + v256 __builtin_vpair_f32_nfms (v256, v256, v256); + VPAIR_F32_NFMS vpair_nfms_v8sf4 {mma} + v256 __builtin_vpair_f32_sub (v256, v256); VPAIR_F32_SUB vpair_sub_v8sf3 {mma} @@ -3975,6 +3987,12 @@ v256 __builtin_vpair_f64_div (v256, v256); VPAIR_F64_DIV vpair_div_v4df3 {mma} + v256 __builtin_vpair_f64_fma (v256, v256, v256); + VPAIR_F64_FMA vpair_fma_v4df4 {mma} + + v256 __builtin_vpair_f64_fms (v256, v256, v256); + VPAIR_F64_FMS vpair_fms_v4df4 {mma} + v256 __builtin_vpair_f64_max (v256, v256); VPAIR_F64_MAX vpair_smax_v4df3 {mma} @@ -3993,5 +4011,11 @@ v256 __builtin_vpair_f64_sqrt (v256); VPAIR_F64_SQRT vpair_sqrt_v4df2 {mma} + v256 __builtin_vpair_f64_nfma (v256, v256, v256); + VPAIR_F64_NFMA vpair_nfma_v4df4 {mma} + + v256 __builtin_vpair_f64_nfms (v256, v256, v256); + VPAIR_F64_NFMS vpair_nfms_v4df4 {mma} + v256 __builtin_vpair_f64_sub (v256, v256); VPAIR_F64_SUB vpair_sub_v4df3 {mma} diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 7b8b3b0c237..bab5fb437c2 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -173,6 +173,19 @@ extern void vpair_split_unary (rtx [], machine_mode, enum rtx_code, enum vpair_split_unary); extern void vpair_split_binary (rtx [], machine_mode, enum rtx_code); +/* When we are splitting a vector pair FMA operation into two vector operations, we + may need to modify the code generated. This enumeration encodes the + different choices. */ + +enum vpair_split_fma { + VPAIR_SPLIT_FMA, /* Fused multiply-add. */ + VPAIR_SPLIT_FMS, /* Fused multiply-subtract. */ + VPAIR_SPLIT_NFMA, /* Fused negate multiply-add. */ + VPAIR_SPLIT_NFMS /* Fused negate multiply-subtract. */ +}; + +extern void vpair_split_fma (rtx [], machine_mode, enum vpair_split_fma); + /* Different PowerPC instruction formats that are used by GCC. There are various other instruction formats used by the PowerPC hardware, but these formats are not currently used by GCC. */ diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index f6d24b4706e..a9452ee0efb 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -29676,6 +29676,77 @@ vpair_split_binary (rtx operands[], /* Dest, 2 inputs. */ emit_insn (gen_rtx_SET (op0_b, operation_b)); return; } + +/* Split vector pair fma operations. */ + +void +vpair_split_fma (rtx operands[], /* Dest, 3 inputs. */ + machine_mode vmode, /* Vector mode. */ + enum vpair_split_fma action) /* Action to take. */ +{ + rtx op0 = operands[0]; + machine_mode mode0 = GET_MODE (op0); + gcc_assert (GET_MODE_SIZE (mode0) == 32); + rtx op0_a = simplify_gen_subreg (vmode, op0, mode0, 0); + rtx op0_b = simplify_gen_subreg (vmode, op0, mode0, 16); + + rtx op1 = operands[1]; + machine_mode mode1 = GET_MODE (op1); + gcc_assert (GET_MODE_SIZE (mode1) == 32); + rtx op1_a = simplify_gen_subreg (vmode, op1, mode1, 0); + rtx op1_b = simplify_gen_subreg (vmode, op1, mode1, 16); + + rtx op2 = operands[2]; + machine_mode mode2 = GET_MODE (op2); + gcc_assert (GET_MODE_SIZE (mode2) == 32); + rtx op2_a = simplify_gen_subreg (vmode, op2, mode2, 0); + rtx op2_b = simplify_gen_subreg (vmode, op2, mode2, 16); + + rtx op3 = operands[3]; + machine_mode mode3 = GET_MODE (op3); + gcc_assert (GET_MODE_SIZE (mode3) == 32); + rtx op3_a = simplify_gen_subreg (vmode, op3, mode3, 0); + rtx op3_b = simplify_gen_subreg (vmode, op3, mode3, 16); + + switch (action) + { + case VPAIR_SPLIT_FMA: + case VPAIR_SPLIT_NFMA: + break; + + case VPAIR_SPLIT_FMS: + case VPAIR_SPLIT_NFMS: + op3_a = gen_rtx_NEG (vmode, op3_a); + op3_b = gen_rtx_NEG (vmode, op3_b); + break; + + default: + gcc_unreachable (); + } + + rtx operation_a = gen_rtx_fmt_eee (FMA, vmode, op1_a, op2_a, op3_a); + rtx operation_b = gen_rtx_fmt_eee (FMA, vmode, op1_b, op2_b, op3_b); + + switch (action) + { + case VPAIR_SPLIT_FMA: + case VPAIR_SPLIT_FMS: + break; + + case VPAIR_SPLIT_NFMA: + case VPAIR_SPLIT_NFMS: + operation_a = gen_rtx_NEG (vmode, operation_a); + operation_b = gen_rtx_NEG (vmode, operation_b); + break; + + default: + gcc_unreachable (); + } + + emit_insn (gen_rtx_SET (op0_a, operation_a)); + emit_insn (gen_rtx_SET (op0_b, operation_b)); + return; +} struct gcc_target targetm = TARGET_INITIALIZER; diff --git a/gcc/config/rs6000/vector-pair.h b/gcc/config/rs6000/vector-pair.h index bf056cea2cf..e399e89e8e4 100644 --- a/gcc/config/rs6000/vector-pair.h +++ b/gcc/config/rs6000/vector-pair.h @@ -56,30 +56,12 @@ typedef __vector_pair vector_pair_t; #define vpair_f64_mul(R,A,B) (*R) = __builtin_vpair_f64_mul (*A, *B) #define vpair_f64_sub(R,A,B) (*R) = __builtin_vpair_f64_sub (*A, *B) -#define vpair_f64_fma(R,A,B,C) \ - __asm__ ("xvmaddadp %x0,%x1,%x2" "\n\t" \ - "xvmaddadp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" VPAIR_FP_SECOND "2" \ - : "=" VPAIR_FP_CONSTRAINT (*R) \ - : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)), "0" ((*C))) - -#define vpair_f64_fms(R,A,B,C) \ - __asm__ ("xvmsubadp %x0,%x1,%x2" "\n\t" \ - "xvmsubadp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" VPAIR_FP_SECOND "2" \ - : "=" VPAIR_FP_CONSTRAINT (*R) \ - : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)), "0" ((*C))) - -#define vpair_f64_nfma(R,A,B,C) \ - __asm__ ("xvnmaddadp %x0,%x1,%x2" "\n\t" \ - "xvnmaddadp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" VPAIR_FP_SECOND "2" \ - : "=" VPAIR_FP_CONSTRAINT (*R) \ - : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)), "0" ((*C))) - -#define vpair_f64_nfms(R,A,B,C) \ - __asm__ ("xvnmsubadp %x0,%x1,%x2" "\n\t" \ - "xvnmsubadp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" VPAIR_FP_SECOND "2" \ - : "=" VPAIR_FP_CONSTRAINT (*R) \ - : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)), "0" ((*C))) +#define vpair_f64_fma(R,A,B,C) (*R) = __builtin_vpair_f64_fma (*A, *B, *C) +#define vpair_f64_fms(R,A,B,C) (*R) = __builtin_vpair_f64_fms (*A, *B, *C) +#define vpair_f64_nfma(R,A,B,C) (*R) = __builtin_vpair_f64_nfma (*A, *B, *C) +#define vpair_f64_nfms(R,A,B,C) (*R) = __builtin_vpair_f64_nfms (*A, *B, *C) + /* vector pair float operations on power10. */ #define vpair_f32_splat(R, A) \ __asm__ ("xscvdpspn %x0,%x1" "\n\t" \ @@ -100,30 +82,12 @@ typedef __vector_pair vector_pair_t; #define vpair_f32_mul(R,A,B) (*R) = __builtin_vpair_f32_mul (*A, *B) #define vpair_f32_sub(R,A,B) (*R) = __builtin_vpair_f32_sub (*A, *B) -#define vpair_f32_fma(R,A,B,C) \ - __asm__ ("xvmaddasp %x0,%x1,%x2" "\n\t" \ - "xvmaddasp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" VPAIR_FP_SECOND "2" \ - : "=" VPAIR_FP_CONSTRAINT (*R) \ - : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)), "0" ((*C))) - -#define vpair_f32_fms(R,A,B,C) \ - __asm__ ("xvmsubasp %x0,%x1,%x2" "\n\t" \ - "xvmsubasp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" VPAIR_FP_SECOND "2" \ - : "=" VPAIR_FP_CONSTRAINT (*R) \ - : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)), "0" ((*C))) - -#define vpair_f32_nfma(R,A,B,C) \ - __asm__ ("xvnmaddasp %x0,%x1,%x2" "\n\t" \ - "xvnmaddasp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" VPAIR_FP_SECOND "2" \ - : "=" VPAIR_FP_CONSTRAINT (*R) \ - : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)), "0" ((*C))) - -#define vpair_f32_nfms(R,A,B,C) \ - __asm__ ("xvnmsubasp %x0,%x1,%x2" "\n\t" \ - "xvnmsubasp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" VPAIR_FP_SECOND "2" \ - : "=" VPAIR_FP_CONSTRAINT (*R) \ - : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)), "0" ((*C))) +#define vpair_f32_fma(R,A,B,C) (*R) = __builtin_vpair_f32_fma (*A, *B, *C) +#define vpair_f32_fms(R,A,B,C) (*R) = __builtin_vpair_f32_fms (*A, *B, *C) +#define vpair_f32_nfma(R,A,B,C) (*R) = __builtin_vpair_f32_nfma (*A, *B, *C) +#define vpair_f32_nfms(R,A,B,C) (*R) = __builtin_vpair_f32_nfma (*A, *B, *C) + #else /* !__MMA__. */ typedef union { /* Double vector pairs. */ @@ -235,6 +199,7 @@ typedef union { (B)->__vpair_vec_f64[1], \ (C)->__vpair_vec_f64[1])))) + /* vector pair float operations on power8/power9. */ #define vpair_f32_splat(R,A) \ ((R)->__vpair_vec_f32[0] = (R)->__vpair_vec_f32[1] \ diff --git a/gcc/config/rs6000/vector-pair.md b/gcc/config/rs6000/vector-pair.md index a188f0b79cf..fe8004b75d5 100644 --- a/gcc/config/rs6000/vector-pair.md +++ b/gcc/config/rs6000/vector-pair.md @@ -32,6 +32,7 @@ (define_c_enum "unspec" [UNSPEC_VPAIR_ABS UNSPEC_VPAIR_DIV + UNSPEC_VPAIR_FMA UNSPEC_VPAIR_MINUS UNSPEC_VPAIR_MULT UNSPEC_VPAIR_NEG @@ -73,6 +74,7 @@ ;; Map the vpair operator unspec number to the standard name. (define_int_attr vpair_stdname [(UNSPEC_VPAIR_ABS "abs") (UNSPEC_VPAIR_DIV "div") + (UNSPEC_VPAIR_FMA "fma") (UNSPEC_VPAIR_MINUS "sub") (UNSPEC_VPAIR_MULT "mul") (UNSPEC_VPAIR_NEG "neg") @@ -84,6 +86,7 @@ ;; Map the vpair operator unspec number to the RTL operator. (define_int_attr VPAIR_OP [(UNSPEC_VPAIR_ABS "ABS") (UNSPEC_VPAIR_DIV "DIV") + (UNSPEC_VPAIR_FMA "FMA") (UNSPEC_VPAIR_MINUS "MINUS") (UNSPEC_VPAIR_MULT "MULT") (UNSPEC_VPAIR_NEG "NEG") @@ -162,3 +165,96 @@ (set (attr "type") (if_then_else (match_test "<VPAIR_OP> == DIV") (const_string "<vpair_divtype>") (const_string "<vpair_type>")))]) + +;; Vector pair fused-multiply (FMA) operations. The last argument in the +;; UNSPEC is a CONST_INT which identifies what the scalar element is. +(define_insn_and_split "vpair_fma_<vpair_modename>4" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa") + (unspec:OO + [(match_operand:OO 1 "vsx_register_operand" "%wa,wa") + (match_operand:OO 2 "vsx_register_operand" "wa,0") + (match_operand:OO 3 "vsx_register_operand" "0,wa") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_FMA))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(const_int 0)] +{ + vpair_split_fma (operands, <VPAIR_VMODE>mode, VPAIR_SPLIT_FMA); + DONE; +} + [(set_attr "length" "8") + (set_attr "type" "<vpair_type>")]) + +;; Vector pair fused multiply-subtract +(define_insn_and_split "vpair_fms_<vpair_modename>4" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa") + (unspec:OO + [(match_operand:OO 1 "vsx_register_operand" "%wa,wa") + (match_operand:OO 2 "vsx_register_operand" "wa,0") + (unspec:OO + [(match_operand:OO 3 "vsx_register_operand" "0,wa") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_NEG) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_FMA))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(const_int 0)] +{ + vpair_split_fma (operands, <VPAIR_VMODE>mode, VPAIR_SPLIT_FMS); + DONE; +} + [(set_attr "length" "8") + (set_attr "type" "<vpair_type>")]) + +;; Vector pair negate fused multiply-add +(define_insn_and_split "vpair_nfma_<vpair_modename>4" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa") + (unspec:OO + [(unspec:OO + [(match_operand:OO 1 "vsx_register_operand" "%wa,wa") + (match_operand:OO 2 "vsx_register_operand" "wa,0") + (match_operand:OO 3 "vsx_register_operand" "0,wa") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_FMA) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_NEG))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(const_int 0)] +{ + vpair_split_fma (operands, <VPAIR_VMODE>mode, VPAIR_SPLIT_NFMA); + DONE; +} + [(set_attr "length" "8") + (set_attr "type" "<vpair_type>")]) + +;; Vector pair fused multiply-subtract +(define_insn_and_split "vpair_nfms_<vpair_modename>4" + [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa") + (unspec:OO + [(unspec:OO + [(match_operand:OO 1 "vsx_register_operand" "%wa,wa") + (match_operand:OO 2 "vsx_register_operand" "wa,0") + (unspec:OO + [(match_operand:OO 3 "vsx_register_operand" "0,wa") + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_NEG) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_FMA) + (const_int VPAIR_FP_ELEMENT)] + UNSPEC_VPAIR_NEG))] + "TARGET_MMA" + "#" + "&& reload_completed" + [(const_int 0)] +{ + vpair_split_fma (operands, <VPAIR_VMODE>mode, VPAIR_SPLIT_NFMS); + DONE; +} + [(set_attr "length" "8") + (set_attr "type" "<vpair_type>")]) diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 23ed920a45a..a54f1194378 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -24264,6 +24264,15 @@ store instruction. The @code{nabs} built-in is a combination of @code{neg} and @code{abs}. +The @code{fms} built-in is a combination of @code{fma} and @code{neg} +of the third element. + +The @code{nfma} built-in is a combination of @code{neg} of the +@code{fma} built-in. + +The @code{nfms} built-in is a combination of @code{neg} of the +@code{fms} built-in. + The following built-in functions operate on pairs of @code{vector float} values: @@ -24271,11 +24280,19 @@ The following built-in functions operate on pairs of __vector_pair __builtin_vpair_f32_abs (__vector_pair); __vector_pair __builtin_vpair_f32_add (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_f32_div (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_f32_fma (__vector_pair, __vector_pair, + __vector_pair); +__vector_pair __builtin_vpair_f32_fms (__vector_pair, __vector_pair, + __vector_pair); __vector_pair __builtin_vpair_f32_max (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_f32_min (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_f32_mul (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_f32_nabs (__vector_pair); __vector_pair __builtin_vpair_f32_neg (__vector_pair); +__vector_pair __builtin_vpair_f32_nfma (__vector_pair, __vector_pair, + __vector_pair); +__vector_pair __builtin_vpair_f32_nfms (__vector_pair, __vector_pair, + __vector_pair); __vector_pair __builtin_vpair_f32_sub (__vector_pair, __vector_pair); @end smallexample @@ -24286,11 +24303,19 @@ The following built-in functions operate on pairs of __vector_pair __builtin_vpair_f64_abs (__vector_pair); __vector_pair __builtin_vpair_f64_add (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_f64_div (__vector_pair, __vector_pair); +__vector_pair __builtin_vpair_f64_fma (__vector_pair, __vector_pair, + __vector_pair); +__vector_pair __builtin_vpair_f64_fms (__vector_pair, __vector_pair, + __vector_pair); __vector_pair __builtin_vpair_f64_max (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_f64_min (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_f64_mul (__vector_pair, __vector_pair); __vector_pair __builtin_vpair_f64_nabs (__vector_pair); __vector_pair __builtin_vpair_f64_neg (__vector_pair); +__vector_pair __builtin_vpair_f64_nfma (__vector_pair, __vector_pair, + __vector_pair); +__vector_pair __builtin_vpair_f64_nfms (__vector_pair, __vector_pair, + __vector_pair); __vector_pair __builtin_vpair_f64_sub (__vector_pair, __vector_pair); @end smallexample diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-3.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-3.c new file mode 100644 index 00000000000..43b91461759 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-3.c @@ -0,0 +1,57 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* Test whether the vector builtin code generates the expected FMA instructions + for vector pairs with 4 double elements. */ + +void +test_fma (__vector_pair *dest, + __vector_pair *x, + __vector_pair *y, + __vector_pair *z) +{ + /* 3 lxvp, 2 xvmadd{a,q}sp, 1 stxvp. */ + *dest = __builtin_vpair_f64_fma (*x, *y, *z); +} + +void +test_fms (__vector_pair *dest, + __vector_pair *x, + __vector_pair *y, + __vector_pair *z) +{ + /* 3 lxvp, 2 xvmsub{a,q}sp, 1 stxvp. */ + __vector_pair n = __builtin_vpair_f64_neg (*z); + *dest = __builtin_vpair_f64_fma (*x, *y, n); +} + +void +test_nfma (__vector_pair *dest, + __vector_pair *x, + __vector_pair *y, + __vector_pair *z) +{ + /* 3 lxvp, 2 xvnmadd{a,q}sp, 1 stxvp. */ + __vector_pair w = __builtin_vpair_f64_fma (*x, *y, *z); + *dest = __builtin_vpair_f64_neg (w); +} + +void +test_nfms (__vector_pair *dest, + __vector_pair *x, + __vector_pair *y, + __vector_pair *z) +{ + /* 3 lxvp, 2 xvnmsub{a,q}sp, 1 stxvp. */ + __vector_pair n = __builtin_vpair_f64_neg (*z); + __vector_pair w = __builtin_vpair_f64_fma (*x, *y, n); + *dest = __builtin_vpair_f64_neg (w); +} + +/* { dg-final { scan-assembler-times {\mlxvp\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mxvmadd.dp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxvnmadd.dp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxvnmsub.dp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxvmsub.dp\M} 2 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-4.c b/gcc/testsuite/gcc.target/powerpc/vector-pair-4.c new file mode 100644 index 00000000000..d5c55d3883c --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-4.c @@ -0,0 +1,57 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +/* Test whether the vector builtin code generates the expected FMA instructions + for vector pairs with 8 float elements. */ + +void +test_fma (__vector_pair *dest, + __vector_pair *x, + __vector_pair *y, + __vector_pair *z) +{ + /* 3 lxvp, 2 xvmadd{a,q}sp, 1 stxvp. */ + *dest = __builtin_vpair_f32_fma (*x, *y, *z); +} + +void +test_fms (__vector_pair *dest, + __vector_pair *x, + __vector_pair *y, + __vector_pair *z) +{ + /* 3 lxvp, 2 xvmsub{a,q}sp, 1 stxvp. */ + __vector_pair n = __builtin_vpair_f32_neg (*z); + *dest = __builtin_vpair_f32_fma (*x, *y, n); +} + +void +test_nfma (__vector_pair *dest, + __vector_pair *x, + __vector_pair *y, + __vector_pair *z) +{ + /* 3 lxvp, 2 xvnmadd{a,q}sp, 1 stxvp. */ + __vector_pair w = __builtin_vpair_f32_fma (*x, *y, *z); + *dest = __builtin_vpair_f32_neg (w); +} + +void +test_nfms (__vector_pair *dest, + __vector_pair *x, + __vector_pair *y, + __vector_pair *z) +{ + /* 3 lxvp, 2 xvnmsub{a,q}sp, 1 stxvp. */ + __vector_pair n = __builtin_vpair_f32_neg (*z); + __vector_pair w = __builtin_vpair_f32_fma (*x, *y, n); + *dest = __builtin_vpair_f32_neg (w); +} + +/* { dg-final { scan-assembler-times {\mlxvp\M} 12 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mxvmadd.sp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxvnmadd.sp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxvnmsub.sp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxvmsub.sp\M} 2 } } */