The following allows SLP build to succeed when mixing .FMA/.FMS in different lanes like we handle mixed plus/minus. This does not yet address SLP pattern matching to not being able to form a FMADDSUB from this.
Bootstrapped and tested on x86_64-unknown-linux-gnu. While the testcases are x86 specific I've kept them in vect/ with the hope that we'd get better general dejagnu target_fma handling... PR tree-optimization/120808 * tree-vectorizer.h (compatible_calls_p): Add flag to indicate a FMA/FMS pair is allowed. * tree-vect-slp.cc (compatible_calls_p): Likewise. (vect_build_slp_tree_1): Allow mixed .FMA/.FMS as two-operator. (vect_build_slp_tree_2): Handle calls in two-operator SLP build. * tree-vect-slp-patterns.cc (compatible_complex_nodes_p): Adjust. * gcc.dg/vect/bb-slp-pr120808.c: New testcase. --- gcc/testsuite/gcc.dg/vect/bb-slp-pr120808.c | 12 +++++ gcc/tree-vect-slp-patterns.cc | 2 +- gcc/tree-vect-slp.cc | 52 ++++++++++++++------- gcc/tree-vectorizer.h | 2 +- 4 files changed, 50 insertions(+), 18 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-pr120808.c diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr120808.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr120808.c new file mode 100644 index 00000000000..c334d6ad8d3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr120808.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-ffp-contract=on" } */ +/* { dg-additional-options "-mfma" { target { x86_64-*-* i?86-*-* } } } */ + +void f(double x[restrict], double *y, double *z) +{ + x[0] = x[0] * y[0] + z[0]; + x[1] = x[1] * y[1] - z[1]; +} + +/* The following should check for SLP build covering the loads. */ +/* { dg-final { scan-tree-dump "transform load" "slp2" { target { x86_64-*-* i?86-*-* } } } } */ diff --git a/gcc/tree-vect-slp-patterns.cc b/gcc/tree-vect-slp-patterns.cc index c0dff90d9ba..24ae203e6ff 100644 --- a/gcc/tree-vect-slp-patterns.cc +++ b/gcc/tree-vect-slp-patterns.cc @@ -786,7 +786,7 @@ compatible_complex_nodes_p (slp_compat_nodes_map_t *compat_cache, if (is_gimple_call (a_stmt)) { if (!compatible_calls_p (dyn_cast <gcall *> (a_stmt), - dyn_cast <gcall *> (b_stmt))) + dyn_cast <gcall *> (b_stmt), false)) return false; } else if (!is_gimple_assign (a_stmt)) diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 9f0cb978a5a..155da099d95 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -990,13 +990,18 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap, to be combined into the same SLP group. */ bool -compatible_calls_p (gcall *call1, gcall *call2) +compatible_calls_p (gcall *call1, gcall *call2, bool allow_two_operators) { unsigned int nargs = gimple_call_num_args (call1); if (nargs != gimple_call_num_args (call2)) return false; - if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2)) + auto cfn1 = gimple_call_combined_fn (call1); + auto cfn2 = gimple_call_combined_fn (call2); + if (cfn1 != cfn2 + && (!allow_two_operators + || !((cfn1 == CFN_FMA || cfn1 == CFN_FMS) + && (cfn2 == CFN_FMA || cfn2 == CFN_FMS)))) return false; if (gimple_call_internal_p (call1)) @@ -1358,10 +1363,14 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, || rhs_code != IMAGPART_EXPR) /* Handle mismatches in plus/minus by computing both and merging the results. */ - && !((first_stmt_code == PLUS_EXPR - || first_stmt_code == MINUS_EXPR) - && (alt_stmt_code == PLUS_EXPR - || alt_stmt_code == MINUS_EXPR) + && !((((first_stmt_code == PLUS_EXPR + || first_stmt_code == MINUS_EXPR) + && (alt_stmt_code == PLUS_EXPR + || alt_stmt_code == MINUS_EXPR)) + || ((first_stmt_code == CFN_FMA + || first_stmt_code == CFN_FMS) + && (alt_stmt_code == CFN_FMA + || alt_stmt_code == CFN_FMS))) && rhs_code == alt_stmt_code) && !(first_stmt_code.is_tree_code () && rhs_code.is_tree_code () @@ -1410,7 +1419,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, { if (!is_a <gcall *> (stmts[0]->stmt) || !compatible_calls_p (as_a <gcall *> (stmts[0]->stmt), - call_stmt)) + call_stmt, true)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -3059,24 +3068,35 @@ fail: SLP_TREE_CODE (node) = VEC_PERM_EXPR; SLP_TREE_CHILDREN (node).quick_push (one); SLP_TREE_CHILDREN (node).quick_push (two); - gassign *stmt = as_a <gassign *> (stmts[0]->stmt); - enum tree_code code0 = gimple_assign_rhs_code (stmt); + enum tree_code code0 = ERROR_MARK; enum tree_code ocode = ERROR_MARK; + if (gassign *stmt = dyn_cast <gassign *> (stmts[0]->stmt)) + code0 = gimple_assign_rhs_code (stmt); stmt_vec_info ostmt_info; unsigned j = 0; FOR_EACH_VEC_ELT (stmts, i, ostmt_info) { - gassign *ostmt = as_a <gassign *> (ostmt_info->stmt); - if (gimple_assign_rhs_code (ostmt) != code0) + int op = 0; + if (gassign *ostmt = dyn_cast <gassign *> (ostmt_info->stmt)) { - SLP_TREE_LANE_PERMUTATION (node).safe_push (std::make_pair (1, i)); - ocode = gimple_assign_rhs_code (ostmt); - j = i; + if (gimple_assign_rhs_code (ostmt) != code0) + { + ocode = gimple_assign_rhs_code (ostmt); + op = 1; + j = i; + } } else - SLP_TREE_LANE_PERMUTATION (node).safe_push (std::make_pair (0, i)); + { + if (gimple_call_combined_fn (stmts[0]->stmt) + != gimple_call_combined_fn (ostmt_info->stmt)) + { + op = 1; + j = i; + } + } + SLP_TREE_LANE_PERMUTATION (node).safe_push (std::make_pair (op, i)); } - SLP_TREE_CODE (one) = code0; SLP_TREE_CODE (two) = ocode; SLP_TREE_LANES (one) = stmts.length (); diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 5f63447133a..eb08342d629 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -2702,7 +2702,7 @@ extern void duplicate_and_interleave (vec_info *, gimple_seq *, tree, extern int vect_get_place_in_interleaving_chain (stmt_vec_info, stmt_vec_info); extern slp_tree vect_create_new_slp_node (unsigned, tree_code); extern void vect_free_slp_tree (slp_tree); -extern bool compatible_calls_p (gcall *, gcall *); +extern bool compatible_calls_p (gcall *, gcall *, bool); extern int vect_slp_child_index_for_operand (const gimple *, int op, bool); extern tree prepare_vec_mask (loop_vec_info, tree, tree, tree, -- 2.43.0