The following allows SLP build to succeed when mixing .FMA/.FMS
in different lanes like we handle mixed plus/minus.  This does not
yet address SLP pattern matching to not being able to form
a FMADDSUB from this.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

While the testcases are x86 specific I've kept them in vect/ with the
hope that we'd get better general dejagnu target_fma handling...

        PR tree-optimization/120808
        * tree-vectorizer.h (compatible_calls_p): Add flag to
        indicate a FMA/FMS pair is allowed.
        * tree-vect-slp.cc (compatible_calls_p): Likewise.
        (vect_build_slp_tree_1): Allow mixed .FMA/.FMS as two-operator.
        (vect_build_slp_tree_2): Handle calls in two-operator SLP build.
        * tree-vect-slp-patterns.cc (compatible_complex_nodes_p):
        Adjust.

        * gcc.dg/vect/bb-slp-pr120808.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/bb-slp-pr120808.c | 12 +++++
 gcc/tree-vect-slp-patterns.cc               |  2 +-
 gcc/tree-vect-slp.cc                        | 52 ++++++++++++++-------
 gcc/tree-vectorizer.h                       |  2 +-
 4 files changed, 50 insertions(+), 18 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-pr120808.c

diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr120808.c 
b/gcc/testsuite/gcc.dg/vect/bb-slp-pr120808.c
new file mode 100644
index 00000000000..c334d6ad8d3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr120808.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-ffp-contract=on" } */
+/* { dg-additional-options "-mfma" { target { x86_64-*-* i?86-*-* } } } */
+
+void f(double x[restrict], double *y, double *z)
+{
+    x[0] = x[0] * y[0] + z[0];
+    x[1] = x[1] * y[1] - z[1];
+}
+
+/* The following should check for SLP build covering the loads.  */
+/* { dg-final { scan-tree-dump "transform load" "slp2" { target { x86_64-*-* 
i?86-*-* } } } } */
diff --git a/gcc/tree-vect-slp-patterns.cc b/gcc/tree-vect-slp-patterns.cc
index c0dff90d9ba..24ae203e6ff 100644
--- a/gcc/tree-vect-slp-patterns.cc
+++ b/gcc/tree-vect-slp-patterns.cc
@@ -786,7 +786,7 @@ compatible_complex_nodes_p (slp_compat_nodes_map_t 
*compat_cache,
   if (is_gimple_call (a_stmt))
     {
        if (!compatible_calls_p (dyn_cast <gcall *> (a_stmt),
-                                dyn_cast <gcall *> (b_stmt)))
+                                dyn_cast <gcall *> (b_stmt), false))
          return false;
     }
   else if (!is_gimple_assign (a_stmt))
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 9f0cb978a5a..155da099d95 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -990,13 +990,18 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned 
char swap,
    to be combined into the same SLP group.  */
 
 bool
-compatible_calls_p (gcall *call1, gcall *call2)
+compatible_calls_p (gcall *call1, gcall *call2, bool allow_two_operators)
 {
   unsigned int nargs = gimple_call_num_args (call1);
   if (nargs != gimple_call_num_args (call2))
     return false;
 
-  if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
+  auto cfn1 = gimple_call_combined_fn (call1);
+  auto cfn2 = gimple_call_combined_fn (call2);
+  if (cfn1 != cfn2
+      && (!allow_two_operators
+         || !((cfn1 == CFN_FMA || cfn1 == CFN_FMS)
+              && (cfn2 == CFN_FMA || cfn2 == CFN_FMS))))
     return false;
 
   if (gimple_call_internal_p (call1))
@@ -1358,10 +1363,14 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char 
*swap,
                   || rhs_code != IMAGPART_EXPR)
               /* Handle mismatches in plus/minus by computing both
                  and merging the results.  */
-              && !((first_stmt_code == PLUS_EXPR
-                    || first_stmt_code == MINUS_EXPR)
-                   && (alt_stmt_code == PLUS_EXPR
-                       || alt_stmt_code == MINUS_EXPR)
+              && !((((first_stmt_code == PLUS_EXPR
+                      || first_stmt_code == MINUS_EXPR)
+                     && (alt_stmt_code == PLUS_EXPR
+                         || alt_stmt_code == MINUS_EXPR))
+                    || ((first_stmt_code == CFN_FMA
+                         || first_stmt_code == CFN_FMS)
+                        && (alt_stmt_code == CFN_FMA
+                            || alt_stmt_code == CFN_FMS)))
                    && rhs_code == alt_stmt_code)
               && !(first_stmt_code.is_tree_code ()
                    && rhs_code.is_tree_code ()
@@ -1410,7 +1419,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char 
*swap,
            {
              if (!is_a <gcall *> (stmts[0]->stmt)
                  || !compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
-                                         call_stmt))
+                                         call_stmt, true))
                {
                  if (dump_enabled_p ())
                    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -3059,24 +3068,35 @@ fail:
       SLP_TREE_CODE (node) = VEC_PERM_EXPR;
       SLP_TREE_CHILDREN (node).quick_push (one);
       SLP_TREE_CHILDREN (node).quick_push (two);
-      gassign *stmt = as_a <gassign *> (stmts[0]->stmt);
-      enum tree_code code0 = gimple_assign_rhs_code (stmt);
+      enum tree_code code0 = ERROR_MARK;
       enum tree_code ocode = ERROR_MARK;
+      if (gassign *stmt = dyn_cast <gassign *> (stmts[0]->stmt))
+       code0 = gimple_assign_rhs_code (stmt);
       stmt_vec_info ostmt_info;
       unsigned j = 0;
       FOR_EACH_VEC_ELT (stmts, i, ostmt_info)
        {
-         gassign *ostmt = as_a <gassign *> (ostmt_info->stmt);
-         if (gimple_assign_rhs_code (ostmt) != code0)
+         int op = 0;
+         if (gassign *ostmt = dyn_cast <gassign *> (ostmt_info->stmt))
            {
-             SLP_TREE_LANE_PERMUTATION (node).safe_push (std::make_pair (1, 
i));
-             ocode = gimple_assign_rhs_code (ostmt);
-             j = i;
+             if (gimple_assign_rhs_code (ostmt) != code0)
+               {
+                 ocode = gimple_assign_rhs_code (ostmt);
+                 op = 1;
+                 j = i;
+               }
            }
          else
-           SLP_TREE_LANE_PERMUTATION (node).safe_push (std::make_pair (0, i));
+           {
+             if (gimple_call_combined_fn (stmts[0]->stmt)
+                 != gimple_call_combined_fn (ostmt_info->stmt))
+               {
+                 op = 1;
+                 j = i;
+               }
+           }
+         SLP_TREE_LANE_PERMUTATION (node).safe_push (std::make_pair (op, i));
        }
-
       SLP_TREE_CODE (one) = code0;
       SLP_TREE_CODE (two) = ocode;
       SLP_TREE_LANES (one) = stmts.length ();
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 5f63447133a..eb08342d629 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2702,7 +2702,7 @@ extern void duplicate_and_interleave (vec_info *, 
gimple_seq *, tree,
 extern int vect_get_place_in_interleaving_chain (stmt_vec_info, stmt_vec_info);
 extern slp_tree vect_create_new_slp_node (unsigned, tree_code);
 extern void vect_free_slp_tree (slp_tree);
-extern bool compatible_calls_p (gcall *, gcall *);
+extern bool compatible_calls_p (gcall *, gcall *, bool);
 extern int vect_slp_child_index_for_operand (const gimple *, int op, bool);
 
 extern tree prepare_vec_mask (loop_vec_info, tree, tree, tree,
-- 
2.43.0

Reply via email to