work177-vpair)] Add support for vector pair fma operations.

Michael Meissner via Gcc-cvs Wed, 04 Sep 2024 20:25:51 -0700

https://gcc.gnu.org/g:bbb392f70a0774dc47e3cf31a8d4c74e3e7572e8


commit bbb392f70a0774dc47e3cf31a8d4c74e3e7572e8
Author: Michael Meissner <meiss...@linux.ibm.com>
Date:   Wed Sep 4 22:55:54 2024 -0400

    Add support for vector pair fma operations.
    
    2024-09-04  Michael Meissner  <meiss...@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/rs6000-builtins.def (__builtin_vpair_f32_fma): New
            built-in.
            (__builtin_vpair_f32_fms): Likewise.
            (__builtin_vpair_f32_nfma): Likewise.
            (__builtin_vpair_f32_nfms): Likewise.
            (__builtin_vpair_f64_fma): Likewise.
            (__builtin_vpair_f64_fms): Likewise.
            (__builtin_vpair_f64_nfma): Likewise.
            * config/rs6000/rs6000/rs6000-proto.h (enum vpair_split_fma): New
            enumeration.
            (vpair_split_fma): New declaration.
            * config/rs6000/rs6000.cc (vpair_split_fma): New function to split
            vector pair FMA operations.
            * config/rs6000/vector-pair.md (UNSPEC_VPAIR_FMA): New unspec.
            (vpair_stdname): Add UNSPEC_VPAIR_FMA.
            (VPAIR_OP): Likewise.
            (vpair_fma_<vpair_modename>4): New insns.
            (vpair_fms_<vpair_modename>4): Likewise.
            (vpair_nfma_<vpair_modename>4): Likewise.
            (vpair_nfms_<vpair_modename>4): Likewise.
            * config/rs6000/vector-pair.h: Update to use the power10 vector pair
            built-in functions.
            * doc/extend.texi (PowerPC Vector Pair Built-in Functions): 
Document new
            vector pair fma built-in functions.
    
    gcc/testsuite/
    
            * gcc.target/powerpc/vector-pair-3.c: New test.
            * gcc.target/powerpc/vector-pair-4.c: Likewise.

Diff:
---
 gcc/config/rs6000/rs6000-builtins.def            | 24 ++++++
 gcc/config/rs6000/rs6000-protos.h                | 13 ++++
 gcc/config/rs6000/rs6000.cc                      | 71 ++++++++++++++++++
 gcc/config/rs6000/vector-pair.h                  | 57 +++-----------
 gcc/config/rs6000/vector-pair.md                 | 96 ++++++++++++++++++++++++
 gcc/doc/extend.texi                              | 25 ++++++
 gcc/testsuite/gcc.target/powerpc/vector-pair-3.c | 57 ++++++++++++++
 gcc/testsuite/gcc.target/powerpc/vector-pair-4.c | 57 ++++++++++++++
 8 files changed, 354 insertions(+), 46 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtins.def 
b/gcc/config/rs6000/rs6000-builtins.def
index cf22389542d..2bac0e58971 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -3944,6 +3944,12 @@
   v256 __builtin_vpair_f32_div (v256, v256);
     VPAIR_F32_DIV vpair_div_v8sf3 {mma}
 
+  v256 __builtin_vpair_f32_fma (v256, v256, v256);
+    VPAIR_F32_FMA vpair_fma_v8sf4 {mma}
+
+  v256 __builtin_vpair_f32_fms (v256, v256, v256);
+    VPAIR_F32_FMS vpair_fms_v8sf4 {mma}
+
   v256 __builtin_vpair_f32_max (v256, v256);
     VPAIR_F32_MAX vpair_smax_v8sf3 {mma}
 
@@ -3962,6 +3968,12 @@
   v256 __builtin_vpair_f32_sqrt (v256);
     VPAIR_F32_SQRT vpair_sqrt_v8sf2 {mma}
 
+  v256 __builtin_vpair_f32_nfma (v256, v256, v256);
+    VPAIR_F32_NFMA vpair_nfma_v8sf4 {mma}
+
+  v256 __builtin_vpair_f32_nfms (v256, v256, v256);
+    VPAIR_F32_NFMS vpair_nfms_v8sf4 {mma}
+
   v256 __builtin_vpair_f32_sub (v256, v256);
     VPAIR_F32_SUB vpair_sub_v8sf3 {mma}
 
@@ -3975,6 +3987,12 @@
   v256 __builtin_vpair_f64_div (v256, v256);
     VPAIR_F64_DIV vpair_div_v4df3 {mma}
 
+  v256 __builtin_vpair_f64_fma (v256, v256, v256);
+    VPAIR_F64_FMA vpair_fma_v4df4 {mma}
+
+  v256 __builtin_vpair_f64_fms (v256, v256, v256);
+    VPAIR_F64_FMS vpair_fms_v4df4 {mma}
+
   v256 __builtin_vpair_f64_max (v256, v256);
     VPAIR_F64_MAX vpair_smax_v4df3 {mma}
 
@@ -3993,5 +4011,11 @@
   v256 __builtin_vpair_f64_sqrt (v256);
     VPAIR_F64_SQRT vpair_sqrt_v4df2 {mma}
 
+  v256 __builtin_vpair_f64_nfma (v256, v256, v256);
+    VPAIR_F64_NFMA vpair_nfma_v4df4 {mma}
+
+  v256 __builtin_vpair_f64_nfms (v256, v256, v256);
+    VPAIR_F64_NFMS vpair_nfms_v4df4 {mma}
+
   v256 __builtin_vpair_f64_sub (v256, v256);
     VPAIR_F64_SUB vpair_sub_v4df3 {mma}
diff --git a/gcc/config/rs6000/rs6000-protos.h 
b/gcc/config/rs6000/rs6000-protos.h
index 7b8b3b0c237..bab5fb437c2 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -173,6 +173,19 @@ extern void vpair_split_unary (rtx [], machine_mode, enum 
rtx_code,
                               enum vpair_split_unary);
 extern void vpair_split_binary (rtx [], machine_mode, enum rtx_code);
 
+/* When we are splitting a vector pair FMA operation into two vector 
operations, we
+   may need to modify the code generated.  This enumeration encodes the
+   different choices.  */
+
+enum vpair_split_fma {
+  VPAIR_SPLIT_FMA,             /* Fused multiply-add.  */
+  VPAIR_SPLIT_FMS,             /* Fused multiply-subtract.  */
+  VPAIR_SPLIT_NFMA,            /* Fused negate multiply-add.  */
+  VPAIR_SPLIT_NFMS             /* Fused negate multiply-subtract.  */
+};
+
+extern void vpair_split_fma (rtx [], machine_mode, enum vpair_split_fma);
+
 /* Different PowerPC instruction formats that are used by GCC.  There are
    various other instruction formats used by the PowerPC hardware, but these
    formats are not currently used by GCC.  */
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index f6d24b4706e..a9452ee0efb 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -29676,6 +29676,77 @@ vpair_split_binary (rtx operands[],                    
/* Dest, 2 inputs.  */
   emit_insn (gen_rtx_SET (op0_b, operation_b));
   return;
 }
+
+/* Split vector pair fma operations.  */
+
+void
+vpair_split_fma (rtx operands[],                       /* Dest, 3 inputs.  */
+                machine_mode vmode,                    /* Vector mode.  */
+                enum vpair_split_fma action)           /* Action to take.  */
+{
+  rtx op0 = operands[0];
+  machine_mode mode0 = GET_MODE (op0);
+  gcc_assert (GET_MODE_SIZE (mode0) == 32);
+  rtx op0_a = simplify_gen_subreg (vmode, op0, mode0, 0);
+  rtx op0_b = simplify_gen_subreg (vmode, op0, mode0, 16);
+
+  rtx op1 = operands[1];
+  machine_mode mode1 = GET_MODE (op1);
+  gcc_assert (GET_MODE_SIZE (mode1) == 32);
+  rtx op1_a = simplify_gen_subreg (vmode, op1, mode1, 0);
+  rtx op1_b = simplify_gen_subreg (vmode, op1, mode1, 16);
+
+  rtx op2 = operands[2];
+  machine_mode mode2 = GET_MODE (op2);
+  gcc_assert (GET_MODE_SIZE (mode2) == 32);
+  rtx op2_a = simplify_gen_subreg (vmode, op2, mode2, 0);
+  rtx op2_b = simplify_gen_subreg (vmode, op2, mode2, 16);
+
+  rtx op3 = operands[3];
+  machine_mode mode3 = GET_MODE (op3);
+  gcc_assert (GET_MODE_SIZE (mode3) == 32);
+  rtx op3_a = simplify_gen_subreg (vmode, op3, mode3, 0);
+  rtx op3_b = simplify_gen_subreg (vmode, op3, mode3, 16);
+
+  switch (action)
+    {
+    case VPAIR_SPLIT_FMA:
+    case VPAIR_SPLIT_NFMA:
+      break;
+
+    case VPAIR_SPLIT_FMS:
+    case VPAIR_SPLIT_NFMS:
+      op3_a = gen_rtx_NEG (vmode, op3_a);
+      op3_b = gen_rtx_NEG (vmode, op3_b);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  rtx operation_a = gen_rtx_fmt_eee (FMA, vmode, op1_a, op2_a, op3_a);
+  rtx operation_b = gen_rtx_fmt_eee (FMA, vmode, op1_b, op2_b, op3_b);
+
+  switch (action)
+    {
+    case VPAIR_SPLIT_FMA:
+    case VPAIR_SPLIT_FMS:
+      break;
+
+    case VPAIR_SPLIT_NFMA:
+    case VPAIR_SPLIT_NFMS:
+      operation_a = gen_rtx_NEG (vmode, operation_a);
+      operation_b = gen_rtx_NEG (vmode, operation_b);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_insn (gen_rtx_SET (op0_a, operation_a));
+  emit_insn (gen_rtx_SET (op0_b, operation_b));
+  return;
+}
 
 struct gcc_target targetm = TARGET_INITIALIZER;
 
diff --git a/gcc/config/rs6000/vector-pair.h b/gcc/config/rs6000/vector-pair.h
index bf056cea2cf..e399e89e8e4 100644
--- a/gcc/config/rs6000/vector-pair.h
+++ b/gcc/config/rs6000/vector-pair.h
@@ -56,30 +56,12 @@ typedef __vector_pair vector_pair_t;
 #define vpair_f64_mul(R,A,B)   (*R) = __builtin_vpair_f64_mul (*A, *B)
 #define vpair_f64_sub(R,A,B)   (*R) = __builtin_vpair_f64_sub (*A, *B)
 
-#define vpair_f64_fma(R,A,B,C)                                         \
-  __asm__ ("xvmaddadp %x0,%x1,%x2" "\n\t"                              \
-           "xvmaddadp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" 
VPAIR_FP_SECOND "2" \
-           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
-          : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)), "0" ((*C)))
-
-#define vpair_f64_fms(R,A,B,C)                                         \
-  __asm__ ("xvmsubadp %x0,%x1,%x2" "\n\t"                              \
-           "xvmsubadp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" 
VPAIR_FP_SECOND "2" \
-           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
-          : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)), "0" ((*C)))
-
-#define vpair_f64_nfma(R,A,B,C)                                                
\
-  __asm__ ("xvnmaddadp %x0,%x1,%x2" "\n\t"                             \
-           "xvnmaddadp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" 
VPAIR_FP_SECOND "2" \
-           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
-          : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)), "0" ((*C)))
-
-#define vpair_f64_nfms(R,A,B,C)                                                
\
-  __asm__ ("xvnmsubadp %x0,%x1,%x2" "\n\t"                             \
-           "xvnmsubadp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" 
VPAIR_FP_SECOND "2" \
-           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
-          : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)), "0" ((*C)))
+#define vpair_f64_fma(R,A,B,C) (*R) = __builtin_vpair_f64_fma (*A, *B, *C)
+#define vpair_f64_fms(R,A,B,C) (*R) = __builtin_vpair_f64_fms (*A, *B, *C)
+#define vpair_f64_nfma(R,A,B,C)        (*R) = __builtin_vpair_f64_nfma (*A, 
*B, *C)
+#define vpair_f64_nfms(R,A,B,C)        (*R) = __builtin_vpair_f64_nfms (*A, 
*B, *C)
 
+
 /* vector pair float operations on power10.  */
 #define vpair_f32_splat(R, A)                                          \
   __asm__ ("xscvdpspn %x0,%x1" "\n\t"                                  \
@@ -100,30 +82,12 @@ typedef __vector_pair vector_pair_t;
 #define vpair_f32_mul(R,A,B)   (*R) = __builtin_vpair_f32_mul (*A, *B)
 #define vpair_f32_sub(R,A,B)   (*R) = __builtin_vpair_f32_sub (*A, *B)
 
-#define vpair_f32_fma(R,A,B,C)                                         \
-  __asm__ ("xvmaddasp %x0,%x1,%x2" "\n\t"                              \
-           "xvmaddasp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" 
VPAIR_FP_SECOND "2" \
-           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
-          : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)), "0" ((*C)))
-
-#define vpair_f32_fms(R,A,B,C)                                         \
-  __asm__ ("xvmsubasp %x0,%x1,%x2" "\n\t"                              \
-           "xvmsubasp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" 
VPAIR_FP_SECOND "2" \
-           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
-          : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)), "0" ((*C)))
-
-#define vpair_f32_nfma(R,A,B,C)                                                
\
-  __asm__ ("xvnmaddasp %x0,%x1,%x2" "\n\t"                             \
-           "xvnmaddasp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" 
VPAIR_FP_SECOND "2" \
-           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
-          : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)), "0" ((*C)))
-
-#define vpair_f32_nfms(R,A,B,C)                                                
\
-  __asm__ ("xvnmsubasp %x0,%x1,%x2" "\n\t"                             \
-           "xvnmsubasp %" VPAIR_FP_SECOND "0,%" VPAIR_FP_SECOND "1,%" 
VPAIR_FP_SECOND "2" \
-           : "=" VPAIR_FP_CONSTRAINT (*R)                              \
-          : VPAIR_FP_CONSTRAINT ((*A)), VPAIR_FP_CONSTRAINT ((*B)), "0" ((*C)))
+#define vpair_f32_fma(R,A,B,C) (*R) = __builtin_vpair_f32_fma (*A, *B, *C)
+#define vpair_f32_fms(R,A,B,C) (*R) = __builtin_vpair_f32_fms (*A, *B, *C)
+#define vpair_f32_nfma(R,A,B,C)        (*R) = __builtin_vpair_f32_nfma (*A, 
*B, *C)
+#define vpair_f32_nfms(R,A,B,C)        (*R) = __builtin_vpair_f32_nfma (*A, 
*B, *C)
 
+
 #else  /* !__MMA__.  */
 typedef union {
   /* Double vector pairs.  */
@@ -235,6 +199,7 @@ typedef union {
                               (B)->__vpair_vec_f64[1],                 \
                               (C)->__vpair_vec_f64[1]))))
 
+
 /* vector pair float operations on power8/power9.  */
 #define vpair_f32_splat(R,A)                                           \
   ((R)->__vpair_vec_f32[0] = (R)->__vpair_vec_f32[1]                   \
diff --git a/gcc/config/rs6000/vector-pair.md b/gcc/config/rs6000/vector-pair.md
index a188f0b79cf..fe8004b75d5 100644
--- a/gcc/config/rs6000/vector-pair.md
+++ b/gcc/config/rs6000/vector-pair.md
@@ -32,6 +32,7 @@
 (define_c_enum "unspec"
   [UNSPEC_VPAIR_ABS
    UNSPEC_VPAIR_DIV
+   UNSPEC_VPAIR_FMA
    UNSPEC_VPAIR_MINUS
    UNSPEC_VPAIR_MULT
    UNSPEC_VPAIR_NEG
@@ -73,6 +74,7 @@
 ;; Map the vpair operator unspec number to the standard name.
 (define_int_attr vpair_stdname [(UNSPEC_VPAIR_ABS    "abs")
                                (UNSPEC_VPAIR_DIV    "div")
+                               (UNSPEC_VPAIR_FMA    "fma")
                                (UNSPEC_VPAIR_MINUS  "sub")
                                (UNSPEC_VPAIR_MULT   "mul")
                                (UNSPEC_VPAIR_NEG    "neg")
@@ -84,6 +86,7 @@
 ;; Map the vpair operator unspec number to the RTL operator.
 (define_int_attr VPAIR_OP [(UNSPEC_VPAIR_ABS    "ABS")
                           (UNSPEC_VPAIR_DIV    "DIV")
+                          (UNSPEC_VPAIR_FMA    "FMA")
                           (UNSPEC_VPAIR_MINUS  "MINUS")
                           (UNSPEC_VPAIR_MULT   "MULT")
                           (UNSPEC_VPAIR_NEG    "NEG")
@@ -162,3 +165,96 @@
    (set (attr "type") (if_then_else (match_test "<VPAIR_OP> == DIV")
                                    (const_string "<vpair_divtype>")
                                    (const_string "<vpair_type>")))])
+
+;; Vector pair fused-multiply (FMA) operations.  The last argument in the
+;; UNSPEC is a CONST_INT which identifies what the scalar element is.
+(define_insn_and_split "vpair_fma_<vpair_modename>4"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa")
+       (unspec:OO
+        [(match_operand:OO 1 "vsx_register_operand" "%wa,wa")
+         (match_operand:OO 2 "vsx_register_operand" "wa,0")
+         (match_operand:OO 3 "vsx_register_operand" "0,wa")
+         (const_int VPAIR_FP_ELEMENT)]
+        UNSPEC_VPAIR_FMA))]
+  "TARGET_MMA"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  vpair_split_fma (operands, <VPAIR_VMODE>mode, VPAIR_SPLIT_FMA);
+  DONE;
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "<vpair_type>")])
+
+;; Vector pair fused multiply-subtract
+(define_insn_and_split "vpair_fms_<vpair_modename>4"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa")
+       (unspec:OO
+        [(match_operand:OO 1 "vsx_register_operand" "%wa,wa")
+         (match_operand:OO 2 "vsx_register_operand" "wa,0")
+         (unspec:OO
+          [(match_operand:OO 3 "vsx_register_operand" "0,wa")
+           (const_int VPAIR_FP_ELEMENT)]
+          UNSPEC_VPAIR_NEG)
+         (const_int VPAIR_FP_ELEMENT)]
+        UNSPEC_VPAIR_FMA))]
+  "TARGET_MMA"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  vpair_split_fma (operands, <VPAIR_VMODE>mode, VPAIR_SPLIT_FMS);
+  DONE;
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "<vpair_type>")])
+
+;; Vector pair negate fused multiply-add
+(define_insn_and_split "vpair_nfma_<vpair_modename>4"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa")
+       (unspec:OO
+        [(unspec:OO
+          [(match_operand:OO 1 "vsx_register_operand" "%wa,wa")
+           (match_operand:OO 2 "vsx_register_operand" "wa,0")
+           (match_operand:OO 3 "vsx_register_operand" "0,wa")
+           (const_int VPAIR_FP_ELEMENT)]
+          UNSPEC_VPAIR_FMA)
+         (const_int VPAIR_FP_ELEMENT)]
+        UNSPEC_VPAIR_NEG))]
+  "TARGET_MMA"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  vpair_split_fma (operands, <VPAIR_VMODE>mode, VPAIR_SPLIT_NFMA);
+  DONE;
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "<vpair_type>")])
+
+;; Vector pair fused multiply-subtract
+(define_insn_and_split "vpair_nfms_<vpair_modename>4"
+  [(set (match_operand:OO 0 "vsx_register_operand" "=wa,wa")
+       (unspec:OO
+        [(unspec:OO
+          [(match_operand:OO 1 "vsx_register_operand" "%wa,wa")
+           (match_operand:OO 2 "vsx_register_operand" "wa,0")
+           (unspec:OO
+            [(match_operand:OO 3 "vsx_register_operand" "0,wa")
+             (const_int VPAIR_FP_ELEMENT)]
+            UNSPEC_VPAIR_NEG)
+           (const_int VPAIR_FP_ELEMENT)]
+          UNSPEC_VPAIR_FMA)
+         (const_int VPAIR_FP_ELEMENT)]
+        UNSPEC_VPAIR_NEG))]
+  "TARGET_MMA"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  vpair_split_fma (operands, <VPAIR_VMODE>mode, VPAIR_SPLIT_NFMS);
+  DONE;
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "<vpair_type>")])
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 23ed920a45a..a54f1194378 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -24264,6 +24264,15 @@ store instruction.
 The @code{nabs} built-in is a combination of @code{neg} and
 @code{abs}.
 
+The @code{fms} built-in is a combination of @code{fma} and @code{neg}
+of the third element.
+
+The @code{nfma} built-in is a combination of @code{neg} of the
+@code{fma} built-in.
+
+The @code{nfms} built-in is a combination of @code{neg} of the
+@code{fms} built-in.
+
 The following built-in functions operate on pairs of
 @code{vector float} values:
 
@@ -24271,11 +24280,19 @@ The following built-in functions operate on pairs of
 __vector_pair __builtin_vpair_f32_abs (__vector_pair);
 __vector_pair __builtin_vpair_f32_add (__vector_pair, __vector_pair);
 __vector_pair __builtin_vpair_f32_div (__vector_pair, __vector_pair);
+__vector_pair __builtin_vpair_f32_fma (__vector_pair, __vector_pair,
+                                       __vector_pair);
+__vector_pair __builtin_vpair_f32_fms (__vector_pair, __vector_pair,
+                                       __vector_pair);
 __vector_pair __builtin_vpair_f32_max (__vector_pair, __vector_pair);
 __vector_pair __builtin_vpair_f32_min (__vector_pair, __vector_pair);
 __vector_pair __builtin_vpair_f32_mul (__vector_pair, __vector_pair);
 __vector_pair __builtin_vpair_f32_nabs (__vector_pair);
 __vector_pair __builtin_vpair_f32_neg (__vector_pair);
+__vector_pair __builtin_vpair_f32_nfma (__vector_pair, __vector_pair,
+                                       __vector_pair);
+__vector_pair __builtin_vpair_f32_nfms (__vector_pair, __vector_pair,
+                                       __vector_pair);
 __vector_pair __builtin_vpair_f32_sub (__vector_pair, __vector_pair);
 @end smallexample
 
@@ -24286,11 +24303,19 @@ The following built-in functions operate on pairs of
 __vector_pair __builtin_vpair_f64_abs (__vector_pair);
 __vector_pair __builtin_vpair_f64_add (__vector_pair, __vector_pair);
 __vector_pair __builtin_vpair_f64_div (__vector_pair, __vector_pair);
+__vector_pair __builtin_vpair_f64_fma (__vector_pair, __vector_pair,
+                                       __vector_pair);
+__vector_pair __builtin_vpair_f64_fms (__vector_pair, __vector_pair,
+                                       __vector_pair);
 __vector_pair __builtin_vpair_f64_max (__vector_pair, __vector_pair);
 __vector_pair __builtin_vpair_f64_min (__vector_pair, __vector_pair);
 __vector_pair __builtin_vpair_f64_mul (__vector_pair, __vector_pair);
 __vector_pair __builtin_vpair_f64_nabs (__vector_pair);
 __vector_pair __builtin_vpair_f64_neg (__vector_pair);
+__vector_pair __builtin_vpair_f64_nfma (__vector_pair, __vector_pair,
+                                       __vector_pair);
+__vector_pair __builtin_vpair_f64_nfms (__vector_pair, __vector_pair,
+                                       __vector_pair);
 __vector_pair __builtin_vpair_f64_sub (__vector_pair, __vector_pair);
 @end smallexample
 
diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-3.c 
b/gcc/testsuite/gcc.target/powerpc/vector-pair-3.c
new file mode 100644
index 00000000000..43b91461759
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-3.c
@@ -0,0 +1,57 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Test whether the vector builtin code generates the expected FMA instructions
+   for vector pairs with 4 double elements.  */
+
+void
+test_fma (__vector_pair *dest,
+         __vector_pair *x,
+         __vector_pair *y,
+         __vector_pair *z)
+{
+  /* 3 lxvp, 2 xvmadd{a,q}sp, 1 stxvp.  */
+  *dest = __builtin_vpair_f64_fma (*x, *y, *z);
+}
+
+void
+test_fms (__vector_pair *dest,
+         __vector_pair *x,
+         __vector_pair *y,
+         __vector_pair *z)
+{
+  /* 3 lxvp, 2 xvmsub{a,q}sp, 1 stxvp.  */
+  __vector_pair n = __builtin_vpair_f64_neg (*z);
+  *dest = __builtin_vpair_f64_fma (*x, *y, n);
+}
+
+void
+test_nfma (__vector_pair *dest,
+          __vector_pair *x,
+          __vector_pair *y,
+          __vector_pair *z)
+{
+  /* 3 lxvp, 2 xvnmadd{a,q}sp, 1 stxvp.  */
+  __vector_pair w = __builtin_vpair_f64_fma (*x, *y, *z);
+  *dest = __builtin_vpair_f64_neg (w);
+}
+
+void
+test_nfms (__vector_pair *dest,
+          __vector_pair *x,
+          __vector_pair *y,
+          __vector_pair *z)
+{
+  /* 3 lxvp, 2 xvnmsub{a,q}sp, 1 stxvp.  */
+  __vector_pair n = __builtin_vpair_f64_neg (*z);
+  __vector_pair w = __builtin_vpair_f64_fma (*x, *y, n);
+  *dest = __builtin_vpair_f64_neg (w);
+}
+
+/* { dg-final { scan-assembler-times {\mlxvp\M}       12 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M}       4 } } */
+/* { dg-final { scan-assembler-times {\mxvmadd.dp\M}   2 } } */
+/* { dg-final { scan-assembler-times {\mxvnmadd.dp\M}  2 } } */
+/* { dg-final { scan-assembler-times {\mxvnmsub.dp\M}  2 } } */
+/* { dg-final { scan-assembler-times {\mxvmsub.dp\M}   2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vector-pair-4.c 
b/gcc/testsuite/gcc.target/powerpc/vector-pair-4.c
new file mode 100644
index 00000000000..d5c55d3883c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vector-pair-4.c
@@ -0,0 +1,57 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* Test whether the vector builtin code generates the expected FMA instructions
+   for vector pairs with 8 float elements.  */
+
+void
+test_fma (__vector_pair *dest,
+         __vector_pair *x,
+         __vector_pair *y,
+         __vector_pair *z)
+{
+  /* 3 lxvp, 2 xvmadd{a,q}sp, 1 stxvp.  */
+  *dest = __builtin_vpair_f32_fma (*x, *y, *z);
+}
+
+void
+test_fms (__vector_pair *dest,
+         __vector_pair *x,
+         __vector_pair *y,
+         __vector_pair *z)
+{
+  /* 3 lxvp, 2 xvmsub{a,q}sp, 1 stxvp.  */
+  __vector_pair n = __builtin_vpair_f32_neg (*z);
+  *dest = __builtin_vpair_f32_fma (*x, *y, n);
+}
+
+void
+test_nfma (__vector_pair *dest,
+          __vector_pair *x,
+          __vector_pair *y,
+          __vector_pair *z)
+{
+  /* 3 lxvp, 2 xvnmadd{a,q}sp, 1 stxvp.  */
+  __vector_pair w = __builtin_vpair_f32_fma (*x, *y, *z);
+  *dest = __builtin_vpair_f32_neg (w);
+}
+
+void
+test_nfms (__vector_pair *dest,
+          __vector_pair *x,
+          __vector_pair *y,
+          __vector_pair *z)
+{
+  /* 3 lxvp, 2 xvnmsub{a,q}sp, 1 stxvp.  */
+  __vector_pair n = __builtin_vpair_f32_neg (*z);
+  __vector_pair w = __builtin_vpair_f32_fma (*x, *y, n);
+  *dest = __builtin_vpair_f32_neg (w);
+}
+
+/* { dg-final { scan-assembler-times {\mlxvp\M}       12 } } */
+/* { dg-final { scan-assembler-times {\mstxvp\M}       4 } } */
+/* { dg-final { scan-assembler-times {\mxvmadd.sp\M}   2 } } */
+/* { dg-final { scan-assembler-times {\mxvnmadd.sp\M}  2 } } */
+/* { dg-final { scan-assembler-times {\mxvnmsub.sp\M}  2 } } */
+/* { dg-final { scan-assembler-times {\mxvmsub.sp\M}   2 } } */

[gcc(refs/users/meissner/heads/work177-vpair)] Add support for vector pair fma operations.

Reply via email to