The following adds SLP build operand swapping for .FMA which is
a ternary operator and a call.  The current code only handles
binary operators in assignments, thus the patch extends this to
handle both calls and assignments as well as binary and ternary
operators.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

Richard.

        * tree-vect-slp.cc (vect_build_slp_2): Handle ternary
        and call operators when swapping operands.

        * gcc.target/i386/vect-pr82426.c: Pass explicit -ffp-contract=fast.
        * gcc.target/i386/vect-pr82426-2.c: New testcase variant with
        -ffp-contract=on.
---
 .../gcc.target/i386/vect-pr82426-2.c          | 31 ++++++++++++++++
 gcc/testsuite/gcc.target/i386/vect-pr82426.c  |  2 +-
 gcc/tree-vect-slp.cc                          | 37 ++++++++++++++-----
 3 files changed, 60 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/vect-pr82426-2.c

diff --git a/gcc/testsuite/gcc.target/i386/vect-pr82426-2.c 
b/gcc/testsuite/gcc.target/i386/vect-pr82426-2.c
new file mode 100644
index 00000000000..525940866ad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-pr82426-2.c
@@ -0,0 +1,31 @@
+/* i?86 does not have V2SF, x32 does though.  */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O3 -mavx -mfma -ffp-contract=on" } */
+
+struct Matrix
+{
+  float m11;
+  float m12;
+  float m21;
+  float m22;
+  float dx;
+  float dy;
+};
+
+struct Matrix multiply(const struct Matrix *a, const struct Matrix *b)
+{
+  struct Matrix out;
+  out.m11 = a->m11*b->m11 + a->m12*b->m21;
+  out.m12 = a->m11*b->m12 + a->m12*b->m22;
+  out.m21 = a->m21*b->m11 + a->m22*b->m21;
+  out.m22 = a->m21*b->m12 + a->m22*b->m22;
+
+  out.dx = a->dx*b->m11  + a->dy*b->m21 + b->dx;
+  out.dy = a->dx*b->m12  + a->dy*b->m22 + b->dy;
+  return out;
+}
+
+/* The whole kernel should be vectorized with V4SF and V2SF operations.  */
+/* { dg-final { scan-assembler-times "vadd" 1 } } */
+/* { dg-final { scan-assembler-times "vmul" 2 } } */
+/* { dg-final { scan-assembler-times "vfma" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-pr82426.c 
b/gcc/testsuite/gcc.target/i386/vect-pr82426.c
index 03b10adff9b..8ce8fe78a91 100644
--- a/gcc/testsuite/gcc.target/i386/vect-pr82426.c
+++ b/gcc/testsuite/gcc.target/i386/vect-pr82426.c
@@ -1,6 +1,6 @@
 /* i?86 does not have V2SF, x32 does though.  */
 /* { dg-do compile { target { ! ia32 } } } */
-/* { dg-options "-O3 -mavx -mfma" } */
+/* { dg-options "-O3 -mavx -mfma -ffp-contract=fast" } */
 
 struct Matrix
 {
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 6842ca90f1c..a69cbb92739 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -2853,9 +2853,10 @@ out:
          && matches[0]
          /* ???  For COND_EXPRs we can swap the comparison operands
             as well as the arms under some constraints.  */
-         && nops == 2
+         && (nops == 2 || nops == 3)
          && oprnds_info[1]->first_dt == vect_internal_def
-         && is_gimple_assign (stmt_info->stmt)
+         && (is_gimple_assign (stmt_info->stmt)
+             || is_gimple_call (stmt_info->stmt))
          /* Swapping operands for reductions breaks assumptions later on.  */
          && STMT_VINFO_REDUC_IDX (stmt_info) == -1)
        {
@@ -2870,14 +2871,32 @@ out:
                    continue;
                  stmt_vec_info stmt_info = stmts[j];
                  /* Verify if we can swap operands of this stmt.  */
-                 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
-                 if (!stmt
-                     || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
+                 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
                    {
-                     if (!swap_not_matching)
-                       goto fail;
-                     swap_not_matching = false;
-                     break;
+                     tree_code code = gimple_assign_rhs_code (stmt);
+                     if (! commutative_tree_code (code)
+                         && ! commutative_ternary_tree_code (code))
+                       {
+                         if (!swap_not_matching)
+                           goto fail;
+                         swap_not_matching = false;
+                         break;
+                       }
+                   }
+                 else if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
+                   {
+                     internal_fn fn = (gimple_call_internal_p (call)
+                                       ? gimple_call_internal_fn (call)
+                                       : IFN_LAST);
+                     if ((! commutative_binary_fn_p (fn)
+                          && ! commutative_ternary_fn_p (fn))
+                         || first_commutative_argument (fn) != 0)
+                       {
+                         if (!swap_not_matching)
+                           goto fail;
+                         swap_not_matching = false;
+                         break;
+                       }
                    }
                }
            }
-- 
2.43.0

Reply via email to