This avoids detecting random unrelated nodes as possible entries to not backwards reachable regions of the SLP graph. Instead explicitely add the problematic nodes.
This temporary XFAILs gcc.dg/vect/pr67790.c until I get the permute propagation adjusted to when it needs more than one optimistic iteration. Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed. 2021-06-29 Richard Biener <rguent...@suse.de> PR tree-optimization/101242 * tree-vect-slp.c (vect_slp_build_vertices): Force-add PHIs with not represented initial values as leafs. * gcc.dg/vect/bb-slp-pr101242.c: New testcase. * gcc.dg/vect/pr67790.c: XFAIL scan for zero VEC_PERM_EXPR. --- gcc/testsuite/gcc.dg/vect/bb-slp-pr101242.c | 38 +++++++++++++++++++++ gcc/testsuite/gcc.dg/vect/pr67790.c | 2 +- gcc/tree-vect-slp.c | 24 ++++++------- 3 files changed, 50 insertions(+), 14 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-pr101242.c diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr101242.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr101242.c new file mode 100644 index 00000000000..d8854468df4 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr101242.c @@ -0,0 +1,38 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-Ofast" } */ + +typedef struct { + double real; + double imag; +} complex; +typedef struct { + complex e[3][3]; +} su3_matrix; +su3_matrix check_su3_c; +double check_su3_ar, check_su3_ari, check_su3_max; +int arireturn(); +int check_su3() { + check_su3_ar = check_su3_c.e[0][0].real * check_su3_c.e[1][0].real + + check_su3_c.e[0][0].imag * check_su3_c.e[1][0].imag + + check_su3_c.e[0][1].real * check_su3_c.e[1][1].real + + check_su3_c.e[0][1].imag * check_su3_c.e[1][1].imag + + check_su3_c.e[0][2].real * check_su3_c.e[1][2].real + + check_su3_c.e[0][2].imag * check_su3_c.e[1][2].imag; + check_su3_max = check_su3_c.e[0][0].real * check_su3_c.e[2][0].real + + check_su3_c.e[0][0].imag * check_su3_c.e[2][0].imag + + check_su3_c.e[0][1].real * check_su3_c.e[2][1].real + + check_su3_c.e[0][1].imag * check_su3_c.e[2][1].imag + + check_su3_c.e[0][2].real * check_su3_c.e[2][2].real + + check_su3_c.e[0][2].imag * check_su3_c.e[2][2].imag; + check_su3_ari = check_su3_ar; + if (check_su3_ari) + check_su3_max = check_su3_c.e[1][0].real * check_su3_c.e[2][0].real + + check_su3_c.e[1][0].imag * check_su3_c.e[2][0].imag + + check_su3_c.e[1][1].real * check_su3_c.e[2][1].real + + check_su3_c.e[1][1].imag * check_su3_c.e[2][1].imag + + check_su3_c.e[1][2].real * check_su3_c.e[2][2].real + + check_su3_c.e[1][2].imag * check_su3_c.e[2][2].imag; + if (check_su3_max) + arireturn(); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/vect/pr67790.c b/gcc/testsuite/gcc.dg/vect/pr67790.c index 32eacd91fda..0555d41abf7 100644 --- a/gcc/testsuite/gcc.dg/vect/pr67790.c +++ b/gcc/testsuite/gcc.dg/vect/pr67790.c @@ -38,4 +38,4 @@ int main() } /* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */ -/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 0 "vect" } } */ +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 0 "vect" { xfail *-*-* } } } */ diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 5401dbe4d5e..63b6e6a24b9 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -3499,13 +3499,21 @@ vect_slp_build_vertices (hash_set<slp_tree> &visited, slp_tree node, vertices.safe_push (slpg_vertex (node)); bool leaf = true; + bool force_leaf = false; FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) if (child) { leaf = false; vect_slp_build_vertices (visited, child, vertices, leafs); } - if (leaf) + else + force_leaf = true; + /* Since SLP discovery works along use-def edges all cycles have an + entry - but there's the exception of cycles where we do not handle + the entry explicitely (but with a NULL SLP node), like some reductions + and inductions. Force those SLP PHIs to act as leafs to make them + backwards reachable. */ + if (leaf || force_leaf) leafs.safe_push (node->vertex); } @@ -3519,18 +3527,8 @@ vect_slp_build_vertices (vec_info *info, vec<slpg_vertex> &vertices, unsigned i; slp_instance instance; FOR_EACH_VEC_ELT (info->slp_instances, i, instance) - { - unsigned n_v = vertices.length (); - unsigned n_l = leafs.length (); - vect_slp_build_vertices (visited, SLP_INSTANCE_TREE (instance), vertices, - leafs); - /* If we added vertices but no entries to the reverse graph we've - added a cycle that is not backwards-reachable. Push the entry - to mimic as leaf then. */ - if (vertices.length () > n_v - && leafs.length () == n_l) - leafs.safe_push (SLP_INSTANCE_TREE (instance)->vertex); - } + vect_slp_build_vertices (visited, SLP_INSTANCE_TREE (instance), vertices, + leafs); } /* Apply (reverse) bijectite PERM to VEC. */ -- 2.26.2