This reverts the reversion of r10-5979 amending the CTOR case with a comment as to why the conversion is not necessary there. It also adds a testcase (but not for the CTOR case).
Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed. 2022-03-07 Richard Biener <rguent...@suse.de> PR tree-optimization/104782 * tree-vect-slp.cc (vectorize_slp_instance_root_stmt): Re-instantiate r10-5979 fix, add comment. * gcc.dg/vect/pr104782.c: New testcase. --- gcc/testsuite/gcc.dg/vect/pr104782.c | 18 ++++++++++++++++++ gcc/tree-vect-slp.cc | 7 +++++++ 2 files changed, 25 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/vect/pr104782.c diff --git a/gcc/testsuite/gcc.dg/vect/pr104782.c b/gcc/testsuite/gcc.dg/vect/pr104782.c new file mode 100644 index 00000000000..7b8ca6ca25b --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr104782.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O3" } */ +/* { dg-additional-options "-march=armv8.2-a+sve -msve-vector-bits=128" { target aarch64-*-* } } */ + +int a, b, c; +static int d; +short *q; +void f() { + int *p = &d; + b = 9; + for (b = 9; b; b--) { + a = 2; + for (c = 2; c <= 9; c++) { + for (int i = 0; i < 3; i++) + *p |= (*q)++; + } + } +} diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index f9bbc872a99..4ac2b70303c 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -7382,6 +7382,10 @@ vectorize_slp_instance_root_stmt (slp_tree node, slp_instance instance) gimple *child_stmt = SLP_TREE_VEC_STMTS (node)[0]; tree vect_lhs = gimple_get_lhs (child_stmt); tree root_lhs = gimple_get_lhs (instance->root_stmts[0]->stmt); + if (!useless_type_conversion_p (TREE_TYPE (root_lhs), + TREE_TYPE (vect_lhs))) + vect_lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (root_lhs), + vect_lhs); rstmt = gimple_build_assign (root_lhs, vect_lhs); } else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) > 1) @@ -7392,6 +7396,9 @@ vectorize_slp_instance_root_stmt (slp_tree node, slp_instance instance) vec<constructor_elt, va_gc> *v; vec_alloc (v, nelts); + /* A CTOR can handle V16HI composition from VNx8HI so we + do not need to convert vector elements if the types + do not match. */ FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt) CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, gimple_get_lhs (child_stmt)); -- 2.34.1