On Mon, Dec 2, 2024 at 9:05 PM Richard Biener <rguent...@suse.de> wrote:
>
> The PR uncovers unchecked constraints on the ability to code-generate
> with SLP but also latent issues with regard to stmt order checking
> since loop (early-break) and BB (for quite some time) vectorization
> are no longer constraint to single-BBs.  In particular get_later_stmt
> simply compares UIDs of stmts, but that's only reliable when they
> are in the same BB.
>
> For the PR in question the problematical case is demoting a SLP node
> to external which fails to check we can actually code generate this
> in the way we do (using get_later_stmt).  The following thus adds
> checking that we demote to external only when all defs are from
> the same BB.
>
> We no longer vectorize gcc.dg/vect/bb-slp-49.c but the testcase was
> for a wrong-code issue and the vectorization done is a no-op.
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

This caused:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118266

>         PR tree-optimization/116352
>         PR tree-optimization/117876
>         * tree-vect-slp.cc (vect_slp_can_convert_to_external): New.
>         (vect_slp_convert_to_external): Call it.
>         (vect_build_slp_tree_2): Likewise.
>
>         * gcc.dg/vect/pr116352.c: New testcase.
>         * gcc.dg/vect/bb-slp-49.c: Remove vectorization check.
> ---
>  gcc/testsuite/gcc.dg/vect/bb-slp-49.c |  3 +--
>  gcc/testsuite/gcc.dg/vect/pr116352.c  | 34 +++++++++++++++++++++++++++
>  gcc/tree-vect-slp.cc                  | 29 ++++++++++++++++++-----
>  3 files changed, 58 insertions(+), 8 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr116352.c
>
> diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-49.c 
> b/gcc/testsuite/gcc.dg/vect/bb-slp-49.c
> index e7101fcff46..c0ad5d70a9a 100644
> --- a/gcc/testsuite/gcc.dg/vect/bb-slp-49.c
> +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-49.c
> @@ -23,6 +23,5 @@ main ()
>    return 0;
>  }
>
> -/* See that we vectorize an SLP instance.  */
> +/* See that we try to vectorize an SLP instance.  */
>  /* { dg-final { scan-tree-dump "Analyzing vectorizable constructor" "slp1" } 
> } */
> -/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "slp1" } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/pr116352.c 
> b/gcc/testsuite/gcc.dg/vect/pr116352.c
> new file mode 100644
> index 00000000000..3fe537c34ff
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr116352.c
> @@ -0,0 +1,34 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-O3" } */
> +
> +static void addPrior(float center_x, float center_y, float width, float 
> height,
> +                     bool normalized, float *dst)
> +{
> +  if (normalized)
> +    {
> +      dst[0] = (center_x - width * 0.5f);
> +      dst[1] = (center_y - height * 0.5f);
> +      dst[2] = (center_x + width * 0.5f);
> +      dst[3] = (center_y + height * 0.5f);
> +    }
> +  else
> +    {
> +      dst[0] = center_x - width * 0.5f;
> +      dst[1] = center_y - height * 0.5f;
> +      dst[2] = center_x + width * 0.5f - 1.0f;
> +      dst[3] = center_y + height * 0.5f - 1.0f;
> +    }
> +}
> +void forward(float *outputPtr, int _offsetsXs, float *_offsetsX,
> +            float *_offsetsY, float _stepX, float _stepY,
> +            bool _bboxesNormalized, float _boxWidth, float _boxHeight)
> +{
> +  for (int j = 0; j < _offsetsXs; ++j)
> +    {
> +      float center_x = (_offsetsX[j]) * _stepX;
> +      float center_y = (_offsetsY[j]) * _stepY;
> +      addPrior(center_x, center_y, _boxWidth, _boxHeight, _bboxesNormalized,
> +              outputPtr);
> +      outputPtr += 4;
> +    }
> +}
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index ec986cc3f68..1799d5a619b 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -67,6 +67,7 @@ static int vectorizable_slp_permutation_1 (vec_info *, 
> gimple_stmt_iterator *,
>  static bool vectorizable_slp_permutation (vec_info *, gimple_stmt_iterator *,
>                                           slp_tree, stmt_vector_for_cost *);
>  static void vect_print_slp_tree (dump_flags_t, dump_location_t, slp_tree);
> +static bool vect_slp_can_convert_to_external (const vec<stmt_vec_info> &);
>
>  static object_allocator<_slp_tree> *slp_tree_pool;
>  static slp_tree slp_first_node;
> @@ -2887,7 +2888,8 @@ fail:
>           for (j = 0; j < group_size; ++j)
>             if (!matches[j])
>               break;
> -         if (!known_ge (j, TYPE_VECTOR_SUBPARTS (vectype)))
> +         if (!known_ge (j, TYPE_VECTOR_SUBPARTS (vectype))
> +             && vect_slp_can_convert_to_external (oprnd_info->def_stmts))
>             {
>               if (dump_enabled_p ())
>                 dump_printf_loc (MSG_NOTE, vect_location,
> @@ -7764,6 +7766,24 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, 
> slp_tree node,
>                             node, node_instance, cost_vec);
>  }
>
> +/* Verify if we can externalize a set of internal defs.  */
> +
> +static bool
> +vect_slp_can_convert_to_external (const vec<stmt_vec_info> &stmts)
> +{
> +  basic_block bb = NULL;
> +  for (stmt_vec_info stmt : stmts)
> +    if (!stmt)
> +      return false;
> +    /* Constant generation uses get_later_stmt which can only handle
> +       defs from the same BB.  */
> +    else if (!bb)
> +      bb = gimple_bb (stmt->stmt);
> +    else if (gimple_bb (stmt->stmt) != bb)
> +      return false;
> +  return true;
> +}
> +
>  /* Try to build NODE from scalars, returning true on success.
>     NODE_INSTANCE is the SLP instance that contains NODE.  */
>
> @@ -7779,13 +7799,10 @@ vect_slp_convert_to_external (vec_info *vinfo, 
> slp_tree node,
>        || !SLP_TREE_SCALAR_STMTS (node).exists ()
>        || vect_contains_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (node))
>        /* Force the mask use to be built from scalars instead.  */
> -      || VECTOR_BOOLEAN_TYPE_P (SLP_TREE_VECTYPE (node)))
> +      || VECTOR_BOOLEAN_TYPE_P (SLP_TREE_VECTYPE (node))
> +      || !vect_slp_can_convert_to_external (SLP_TREE_SCALAR_STMTS (node)))
>      return false;
>
> -  FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
> -    if (!stmt_info)
> -      return false;
> -
>    if (dump_enabled_p ())
>      dump_printf_loc (MSG_NOTE, vect_location,
>                      "Building vector operands of %p from scalars instead\n",
> --
> 2.43.0



-- 
H.J.

Reply via email to