On Mon, Dec 2, 2024 at 9:05 PM Richard Biener <rguent...@suse.de> wrote: > > The PR uncovers unchecked constraints on the ability to code-generate > with SLP but also latent issues with regard to stmt order checking > since loop (early-break) and BB (for quite some time) vectorization > are no longer constraint to single-BBs. In particular get_later_stmt > simply compares UIDs of stmts, but that's only reliable when they > are in the same BB. > > For the PR in question the problematical case is demoting a SLP node > to external which fails to check we can actually code generate this > in the way we do (using get_later_stmt). The following thus adds > checking that we demote to external only when all defs are from > the same BB. > > We no longer vectorize gcc.dg/vect/bb-slp-49.c but the testcase was > for a wrong-code issue and the vectorization done is a no-op. > > Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.
This caused: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118266 > PR tree-optimization/116352 > PR tree-optimization/117876 > * tree-vect-slp.cc (vect_slp_can_convert_to_external): New. > (vect_slp_convert_to_external): Call it. > (vect_build_slp_tree_2): Likewise. > > * gcc.dg/vect/pr116352.c: New testcase. > * gcc.dg/vect/bb-slp-49.c: Remove vectorization check. > --- > gcc/testsuite/gcc.dg/vect/bb-slp-49.c | 3 +-- > gcc/testsuite/gcc.dg/vect/pr116352.c | 34 +++++++++++++++++++++++++++ > gcc/tree-vect-slp.cc | 29 ++++++++++++++++++----- > 3 files changed, 58 insertions(+), 8 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/vect/pr116352.c > > diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-49.c > b/gcc/testsuite/gcc.dg/vect/bb-slp-49.c > index e7101fcff46..c0ad5d70a9a 100644 > --- a/gcc/testsuite/gcc.dg/vect/bb-slp-49.c > +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-49.c > @@ -23,6 +23,5 @@ main () > return 0; > } > > -/* See that we vectorize an SLP instance. */ > +/* See that we try to vectorize an SLP instance. */ > /* { dg-final { scan-tree-dump "Analyzing vectorizable constructor" "slp1" } > } */ > -/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "slp1" } } */ > diff --git a/gcc/testsuite/gcc.dg/vect/pr116352.c > b/gcc/testsuite/gcc.dg/vect/pr116352.c > new file mode 100644 > index 00000000000..3fe537c34ff > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/pr116352.c > @@ -0,0 +1,34 @@ > +/* { dg-do compile } */ > +/* { dg-additional-options "-O3" } */ > + > +static void addPrior(float center_x, float center_y, float width, float > height, > + bool normalized, float *dst) > +{ > + if (normalized) > + { > + dst[0] = (center_x - width * 0.5f); > + dst[1] = (center_y - height * 0.5f); > + dst[2] = (center_x + width * 0.5f); > + dst[3] = (center_y + height * 0.5f); > + } > + else > + { > + dst[0] = center_x - width * 0.5f; > + dst[1] = center_y - height * 0.5f; > + dst[2] = center_x + width * 0.5f - 1.0f; > + dst[3] = center_y + height * 0.5f - 1.0f; > + } > +} > +void forward(float *outputPtr, int _offsetsXs, float *_offsetsX, > + float *_offsetsY, float _stepX, float _stepY, > + bool _bboxesNormalized, float _boxWidth, float _boxHeight) > +{ > + for (int j = 0; j < _offsetsXs; ++j) > + { > + float center_x = (_offsetsX[j]) * _stepX; > + float center_y = (_offsetsY[j]) * _stepY; > + addPrior(center_x, center_y, _boxWidth, _boxHeight, _bboxesNormalized, > + outputPtr); > + outputPtr += 4; > + } > +} > diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc > index ec986cc3f68..1799d5a619b 100644 > --- a/gcc/tree-vect-slp.cc > +++ b/gcc/tree-vect-slp.cc > @@ -67,6 +67,7 @@ static int vectorizable_slp_permutation_1 (vec_info *, > gimple_stmt_iterator *, > static bool vectorizable_slp_permutation (vec_info *, gimple_stmt_iterator *, > slp_tree, stmt_vector_for_cost *); > static void vect_print_slp_tree (dump_flags_t, dump_location_t, slp_tree); > +static bool vect_slp_can_convert_to_external (const vec<stmt_vec_info> &); > > static object_allocator<_slp_tree> *slp_tree_pool; > static slp_tree slp_first_node; > @@ -2887,7 +2888,8 @@ fail: > for (j = 0; j < group_size; ++j) > if (!matches[j]) > break; > - if (!known_ge (j, TYPE_VECTOR_SUBPARTS (vectype))) > + if (!known_ge (j, TYPE_VECTOR_SUBPARTS (vectype)) > + && vect_slp_can_convert_to_external (oprnd_info->def_stmts)) > { > if (dump_enabled_p ()) > dump_printf_loc (MSG_NOTE, vect_location, > @@ -7764,6 +7766,24 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, > slp_tree node, > node, node_instance, cost_vec); > } > > +/* Verify if we can externalize a set of internal defs. */ > + > +static bool > +vect_slp_can_convert_to_external (const vec<stmt_vec_info> &stmts) > +{ > + basic_block bb = NULL; > + for (stmt_vec_info stmt : stmts) > + if (!stmt) > + return false; > + /* Constant generation uses get_later_stmt which can only handle > + defs from the same BB. */ > + else if (!bb) > + bb = gimple_bb (stmt->stmt); > + else if (gimple_bb (stmt->stmt) != bb) > + return false; > + return true; > +} > + > /* Try to build NODE from scalars, returning true on success. > NODE_INSTANCE is the SLP instance that contains NODE. */ > > @@ -7779,13 +7799,10 @@ vect_slp_convert_to_external (vec_info *vinfo, > slp_tree node, > || !SLP_TREE_SCALAR_STMTS (node).exists () > || vect_contains_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (node)) > /* Force the mask use to be built from scalars instead. */ > - || VECTOR_BOOLEAN_TYPE_P (SLP_TREE_VECTYPE (node))) > + || VECTOR_BOOLEAN_TYPE_P (SLP_TREE_VECTYPE (node)) > + || !vect_slp_can_convert_to_external (SLP_TREE_SCALAR_STMTS (node))) > return false; > > - FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info) > - if (!stmt_info) > - return false; > - > if (dump_enabled_p ()) > dump_printf_loc (MSG_NOTE, vect_location, > "Building vector operands of %p from scalars instead\n", > -- > 2.43.0 -- H.J.