On Fri, Jun 17, 2022 at 12:53 PM Kewen.Lin <li...@linux.ibm.com> wrote:
>
> Hi,
>
> This follows Richi's suggestion in PR105940, it aims to avoid
> inconsistent slp decision between when the suggested unroll
> factor is worked out and when the suggested unroll factor is
> applied.
>
> If the previous slp decision is true when the suggested unroll
> factor is worked out, when we are applying unroll factor we
> don't need to start over with slp off if the analysis with slp
> on fails.  On the other hand, if the previous slp decision is
> false when the suggested unroll factor is worked out, when we
> are applying unroll factor we can skip the slp handlings.
>
> Function vect_is_simple_reduction saves reduction chains for
> subsequent slp analyses, we have to disable this early otherwise
> there is an ICE in vectorizable_reduction for below:
>
>   if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
>     gcc_assert (slp_node
>                 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
>                    == stmt_info);

We ensure this by either decomposing the group in vect_analyze_slp
if the reduction chain doesn't SLP or when we re-try without SLP
by not re-trying:

  /* If there are reduction chains re-trying will fail anyway.  */
  if (! LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).is_empty ())
    return ok;

> Bootstrapped and regtested on x86_64-redhat-linux,
> powerpc64{,le}-linux-gnu and aarch64-linux-gnu.
>
> Also tested with SPEC2017 build with some rs6000 hacking.
>
> Is it ok for trunk?

OK.

Thanks,
Richard.

> BR,
> Kewen
> -----
>
>         PR tree-optimization/105940
>
> gcc/ChangeLog:
>
>         * tree-vect-loop.cc (vect_analyze_loop_2): Add new parameter
>         slp_done_for_suggested_uf and adjust with it accordingly.
>         (vect_analyze_loop_1): Add new variable slp_done_for_suggested_uf,
>         pass it down to vect_analyze_loop_2 for the initial analysis and
>         applying suggested unroll factor.
>         (vect_is_simple_reduction): Add parameter slp and adjust with it.
>         (vect_analyze_scalar_cycles_1): Add parameter slp and pass down.
>         (vect_analyze_scalar_cycles): Likewise.
> ---
>  gcc/tree-vect-loop.cc | 101 ++++++++++++++++++++++++++++--------------
>  1 file changed, 67 insertions(+), 34 deletions(-)
>
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index e05f8e87f7d..ccab68caf9a 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -157,7 +157,7 @@ along with GCC; see the file COPYING3.  If not see
>  static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *,
>                                                 unsigned *);
>  static stmt_vec_info vect_is_simple_reduction (loop_vec_info, stmt_vec_info,
> -                                              bool *, bool *);
> +                                              bool *, bool *, bool);
>
>  /* Subroutine of vect_determine_vf_for_stmt that handles only one
>     statement.  VECTYPE_MAYBE_SET_P is true if STMT_VINFO_VECTYPE
> @@ -463,10 +463,12 @@ vect_inner_phi_in_double_reduction_p (loop_vec_info 
> loop_vinfo, gphi *phi)
>     Examine the cross iteration def-use cycles of scalar variables
>     in LOOP.  LOOP_VINFO represents the loop that is now being
>     considered for vectorization (can be LOOP, or an outer-loop
> -   enclosing LOOP).  */
> +   enclosing LOOP).  SLP indicates there will be some subsequent
> +   slp analyses or not.  */
>
>  static void
> -vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, class loop *loop)
> +vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, class loop *loop,
> +                             bool slp)
>  {
>    basic_block bb = loop->header;
>    tree init, step;
> @@ -545,7 +547,7 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, 
> class loop *loop)
>
>        stmt_vec_info reduc_stmt_info
>         = vect_is_simple_reduction (loop_vinfo, stmt_vinfo, &double_reduc,
> -                                   &reduc_chain);
> +                                   &reduc_chain, slp);
>        if (reduc_stmt_info)
>          {
>           STMT_VINFO_REDUC_DEF (stmt_vinfo) = reduc_stmt_info;
> @@ -616,11 +618,11 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, 
> class loop *loop)
>                   a[i] = i;  */
>
>  static void
> -vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
> +vect_analyze_scalar_cycles (loop_vec_info loop_vinfo, bool slp)
>  {
>    class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
>
> -  vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
> +  vect_analyze_scalar_cycles_1 (loop_vinfo, loop, slp);
>
>    /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
>       Reductions in such inner-loop therefore have different properties than
> @@ -632,7 +634,7 @@ vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
>          current checks are too strict.  */
>
>    if (loop->inner)
> -    vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
> +    vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner, slp);
>  }
>
>  /* Transfer group and reduction information from STMT_INFO to its
> @@ -2223,12 +2225,18 @@ vect_determine_partial_vectors_and_peeling 
> (loop_vec_info loop_vinfo,
>
>  /* Function vect_analyze_loop_2.
>
> -   Apply a set of analyses on LOOP, and create a loop_vec_info struct
> -   for it.  The different analyses will record information in the
> -   loop_vec_info struct.  */
> +   Apply a set of analyses on LOOP specified by LOOP_VINFO, the different
> +   analyses will record information in some members of LOOP_VINFO.  FATAL
> +   indicates if some analysis meets fatal error.  If one non-NULL pointer
> +   SUGGESTED_UNROLL_FACTOR is provided, it's intent to be filled with one
> +   worked out suggested unroll factor, while one NULL pointer shows it's
> +   going to apply the suggested unroll factor.  SLP_DONE_FOR_SUGGESTED_UF
> +   is to hold the slp decision when the suggested unroll factor is worked
> +   out.  */
>  static opt_result
>  vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal,
> -                    unsigned *suggested_unroll_factor)
> +                    unsigned *suggested_unroll_factor,
> +                    bool& slp_done_for_suggested_uf)
>  {
>    opt_result ok = opt_result::success ();
>    int res;
> @@ -2290,9 +2298,18 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool 
> &fatal,
>        return ok;
>      }
>
> +  /* Check if we are applying unroll factor now.  */
> +  bool applying_suggested_uf = loop_vinfo->suggested_unroll_factor > 1;
> +  gcc_assert (!applying_suggested_uf || !suggested_unroll_factor);
> +
> +  /* If the slp decision is false when suggested unroll factor is worked
> +     out, and we are applying suggested unroll factor, we can simply skip
> +     all slp related analyses this time.  */
> +  bool slp = !applying_suggested_uf || slp_done_for_suggested_uf;
> +
>    /* Classify all cross-iteration scalar data-flow cycles.
>       Cross-iteration cycles caused by virtual phis are analyzed separately.  
> */
> -  vect_analyze_scalar_cycles (loop_vinfo);
> +  vect_analyze_scalar_cycles (loop_vinfo, slp);
>
>    vect_pattern_recog (loop_vinfo);
>
> @@ -2359,26 +2376,30 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool 
> &fatal,
>
>    poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR 
> (loop_vinfo);
>
> -  /* Check the SLP opportunities in the loop, analyze and build SLP trees.  
> */
> -  ok = vect_analyze_slp (loop_vinfo, LOOP_VINFO_N_STMTS (loop_vinfo));
> -  if (!ok)
> -    return ok;
> -
> -  /* If there are any SLP instances mark them as pure_slp.  */
> -  bool slp = vect_make_slp_decision (loop_vinfo);
>    if (slp)
>      {
> -      /* Find stmts that need to be both vectorized and SLPed.  */
> -      vect_detect_hybrid_slp (loop_vinfo);
> +      /* Check the SLP opportunities in the loop, analyze and build
> +        SLP trees.  */
> +      ok = vect_analyze_slp (loop_vinfo, LOOP_VINFO_N_STMTS (loop_vinfo));
> +      if (!ok)
> +       return ok;
> +
> +      /* If there are any SLP instances mark them as pure_slp.  */
> +      slp = vect_make_slp_decision (loop_vinfo);
> +      if (slp)
> +       {
> +         /* Find stmts that need to be both vectorized and SLPed.  */
> +         vect_detect_hybrid_slp (loop_vinfo);
>
> -      /* Update the vectorization factor based on the SLP decision.  */
> -      vect_update_vf_for_slp (loop_vinfo);
> +         /* Update the vectorization factor based on the SLP decision.  */
> +         vect_update_vf_for_slp (loop_vinfo);
>
> -      /* Optimize the SLP graph with the vectorization factor fixed.  */
> -      vect_optimize_slp (loop_vinfo);
> +         /* Optimize the SLP graph with the vectorization factor fixed.  */
> +         vect_optimize_slp (loop_vinfo);
>
> -      /* Gather the loads reachable from the SLP graph entries.  */
> -      vect_gather_slp_loads (loop_vinfo);
> +         /* Gather the loads reachable from the SLP graph entries.  */
> +         vect_gather_slp_loads (loop_vinfo);
> +       }
>      }
>
>    bool saved_can_use_partial_vectors_p
> @@ -2394,7 +2415,7 @@ start_over:
>    /* Apply the suggested unrolling factor, this was determined by the backend
>       during finish_cost the first time we ran the analyzis for this
>       vector mode.  */
> -  if (loop_vinfo->suggested_unroll_factor > 1)
> +  if (applying_suggested_uf)
>      LOOP_VINFO_VECT_FACTOR (loop_vinfo) *= 
> loop_vinfo->suggested_unroll_factor;
>
>    /* Now the vectorization factor is final.  */
> @@ -2665,6 +2686,8 @@ start_over:
>    gcc_assert (known_eq (vectorization_factor,
>                         LOOP_VINFO_VECT_FACTOR (loop_vinfo)));
>
> +  slp_done_for_suggested_uf = slp;
> +
>    /* Ok to vectorize!  */
>    LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
>    return opt_result::success ();
> @@ -2678,6 +2701,12 @@ again:
>    if (!slp)
>      return ok;
>
> +  /* If the slp decision is true when suggested unroll factor is worked
> +     out, and we are applying suggested unroll factor, we don't need to
> +     re-try any more.  */
> +  if (applying_suggested_uf && slp_done_for_suggested_uf)
> +    return ok;
> +
>    /* If there are reduction chains re-trying will fail anyway.  */
>    if (! LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).is_empty ())
>      return ok;
> @@ -2864,10 +2893,12 @@ vect_analyze_loop_1 (class loop *loop, 
> vec_info_shared *shared,
>    machine_mode vector_mode = vector_modes[mode_i];
>    loop_vinfo->vector_mode = vector_mode;
>    unsigned int suggested_unroll_factor = 1;
> +  bool slp_done_for_suggested_uf;
>
>    /* Run the main analysis.  */
>    opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal,
> -                                       &suggested_unroll_factor);
> +                                       &suggested_unroll_factor,
> +                                       slp_done_for_suggested_uf);
>    if (dump_enabled_p ())
>      dump_printf_loc (MSG_NOTE, vect_location,
>                      "***** Analysis %s with vector mode %s\n",
> @@ -2879,13 +2910,15 @@ vect_analyze_loop_1 (class loop *loop, 
> vec_info_shared *shared,
>        if (dump_enabled_p ())
>         dump_printf_loc (MSG_NOTE, vect_location,
>                          "***** Re-trying analysis for unrolling"
> -                        " with unroll factor %d.\n",
> -                        suggested_unroll_factor);
> +                        " with unroll factor %d and slp %s.\n",
> +                        suggested_unroll_factor,
> +                        slp_done_for_suggested_uf ? "on" : "off");
>        loop_vec_info unroll_vinfo
>         = vect_create_loop_vinfo (loop, shared, loop_form_info, 
> main_loop_vinfo);
>        unroll_vinfo->vector_mode = vector_mode;
>        unroll_vinfo->suggested_unroll_factor = suggested_unroll_factor;
> -      opt_result new_res = vect_analyze_loop_2 (unroll_vinfo, fatal, NULL);
> +      opt_result new_res = vect_analyze_loop_2 (unroll_vinfo, fatal, NULL,
> +                                               slp_done_for_suggested_uf);
>        if (new_res)
>         {
>           delete loop_vinfo;
> @@ -3598,7 +3631,7 @@ check_reduction_path (dump_user_location_t loc, loop_p 
> loop, gphi *phi,
>
>  static stmt_vec_info
>  vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
> -                         bool *double_reduc, bool *reduc_chain_p)
> +                         bool *double_reduc, bool *reduc_chain_p, bool slp)
>  {
>    gphi *phi = as_a <gphi *> (phi_info->stmt);
>    gimple *phi_use_stmt = NULL;
> @@ -3787,7 +3820,7 @@ vect_is_simple_reduction (loop_vec_info loop_info, 
> stmt_vec_info phi_info,
>             continue;
>           reduc_chain.safe_push (stmt_info);
>         }
> -      if (is_slp_reduc && reduc_chain.length () > 1)
> +      if (slp && is_slp_reduc && reduc_chain.length () > 1)
>         {
>           for (unsigned i = 0; i < reduc_chain.length () - 1; ++i)
>             {
> --
> 2.27.0

Reply via email to