On Fri, 24 Nov 2023, Tamar Christina wrote:

> Hi,
> 
> Having simplified peeling this patch becomes smaller as well:
> 
> This changes the PHI node updates to support early breaks.
> It has to support both the case where the loop's exit matches the normal loop
> exit and one where the early exit is "inverted", i.e. it's an early exit edge.
> 
> In the latter case we must always restart the loop for VF iterations.  For an
> early exit the reason is obvious, but there are cases where the "normal" exit
> is located before the early one.  This exit then does a check on ivtmp 
> resulting
> in us leaving the loop since it thinks we're done.
> 
> In these case we may still have side-effects to perform so we also go to the
> scalar loop.
> 
> For the "normal" exit niters has already been adjusted for peeling, for the
> early exits we must find out how many iterations we actually did.  So we have
> to recalculate the new position for each exit.
> 
> For the "inverse" case we essentially peel a vector iteration *after* the 
> vector
> loop has finished. i.e. conceptually it's the same as vect epilogue peeling 
> but
> without generating code for the peeled iteration.  That'll be handled by the
> scalar loop.
> 
> To do this we just adjust niters_vector_mult_vf and remove one VF and for 
> masked
> cases we do the same with final_iv.
> 
> The normal IV update code will then generate the correct values for us.
> Eventually VRP will simplify the constant bounds and we get the proper scalar
> unrolling.  This means we don't have to make any changes at all to
> vect_update_ivs_after_vectorizer but dropping some asserts.
> 
> Ok for master?

Nice.  OK.

Thanks,
Richard.

> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>       * tree-vect-loop-manip.cc (vect_set_loop_condition_partial_vectors,
>       vect_set_loop_condition_partial_vectors_avx512,
>       vect_gen_vector_loop_niters_mult_vf): Support peeling a vector
>       iteration.
>       (vect_update_ivs_after_vectorizer): Drop asserts.
>       (vect_do_peeling): Skip forwarder edge.
>       (vect_is_loop_exit_latch_pred): New.
>       * tree-vectorizer.h (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED): New.
> 
> --- inline copy of patch ---
> 
> diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
> index 
> d61d7c3a189b279fc3bcbb58c3c0e32521db3cf8..476be8a0bb6da2d06c4ca7052cb07bacecca60b1
>  100644
> --- a/gcc/tree-vect-loop-manip.cc
> +++ b/gcc/tree-vect-loop-manip.cc
> @@ -951,7 +951,18 @@ vect_set_loop_condition_partial_vectors (class loop 
> *loop, edge exit_edge,
>  
>    if (final_iv)
>      {
> -      gassign *assign = gimple_build_assign (final_iv, orig_niters);
> +      gassign *assign;
> +      /* If vectorizing an inverted early break loop we have to restart the
> +      scalar loop at niters - vf.  This matches what we do in
> +      vect_gen_vector_loop_niters_mult_vf for non-masked loops.  */
> +      if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
> +     {
> +       tree ftype = TREE_TYPE (orig_niters);
> +       tree vf = build_int_cst (ftype, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
> +       assign = gimple_build_assign (final_iv, MINUS_EXPR, orig_niters, vf);
> +     }
> +       else
> +     assign = gimple_build_assign (final_iv, orig_niters);
>        gsi_insert_on_edge_immediate (exit_edge, assign);
>      }
>  
> @@ -1188,8 +1199,19 @@ vect_set_loop_condition_partial_vectors_avx512 (class 
> loop *loop,
>  
>    if (final_iv)
>      {
> -      gassign *assign = gimple_build_assign (final_iv, orig_niters);
> -      gsi_insert_on_edge_immediate (single_exit (loop), assign);
> +      gassign *assign;
> +      /* If vectorizing an inverted early break loop we have to restart the
> +      scalar loop at niters - vf.  This matches what we do in
> +      vect_gen_vector_loop_niters_mult_vf for non-masked loops.  */
> +      if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
> +     {
> +       tree ftype = TREE_TYPE (orig_niters);
> +       tree vf = build_int_cst (ftype, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
> +       assign = gimple_build_assign (final_iv, MINUS_EXPR, orig_niters, vf);
> +     }
> +       else
> +     assign = gimple_build_assign (final_iv, orig_niters);
> +      gsi_insert_on_edge_immediate (exit_edge, assign);
>      }
>  
>    return cond_stmt;
> @@ -2157,11 +2179,8 @@ vect_update_ivs_after_vectorizer (loop_vec_info 
> loop_vinfo,
>    gphi_iterator gsi, gsi1;
>    class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
>    basic_block update_bb = update_e->dest;
> -
>    basic_block exit_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest;
> -
> -  /* Make sure there exists a single-predecessor exit bb:  */
> -  gcc_assert (single_pred_p (exit_bb));
> +  gimple_stmt_iterator last_gsi = gsi_last_bb (exit_bb);
>  
>    for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis 
> (update_bb);
>         !gsi_end_p (gsi) && !gsi_end_p (gsi1);
> @@ -2171,7 +2190,6 @@ vect_update_ivs_after_vectorizer (loop_vec_info 
> loop_vinfo,
>        tree step_expr, off;
>        tree type;
>        tree var, ni, ni_name;
> -      gimple_stmt_iterator last_gsi;
>  
>        gphi *phi = gsi.phi ();
>        gphi *phi1 = gsi1.phi ();
> @@ -2207,7 +2225,8 @@ vect_update_ivs_after_vectorizer (loop_vec_info 
> loop_vinfo,
>       {
>         tree stype = TREE_TYPE (step_expr);
>         off = fold_build2 (MULT_EXPR, stype,
> -                          fold_convert (stype, niters), step_expr);
> +                            fold_convert (stype, niters), step_expr);
> +
>         if (POINTER_TYPE_P (type))
>           ni = fold_build_pointer_plus (init_expr, off);
>         else
> @@ -2226,9 +2245,9 @@ vect_update_ivs_after_vectorizer (loop_vec_info 
> loop_vinfo,
>  
>        var = create_tmp_var (type, "tmp");
>  
> -      last_gsi = gsi_last_bb (exit_bb);
>        gimple_seq new_stmts = NULL;
>        ni_name = force_gimple_operand (ni, &new_stmts, false, var);
> +
>        /* Exit_bb shouldn't be empty.  */
>        if (!gsi_end_p (last_gsi))
>       {
> @@ -2726,11 +2745,19 @@ vect_gen_vector_loop_niters_mult_vf (loop_vec_info 
> loop_vinfo,
>    int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ();
>    tree type = TREE_TYPE (niters_vector);
>    tree log_vf = build_int_cst (type, exact_log2 (vf));
> +  tree tree_vf = build_int_cst (type, vf);
>    basic_block exit_bb = LOOP_VINFO_IV_EXIT (loop_vinfo)->dest;
>  
>    gcc_assert (niters_vector_mult_vf_ptr != NULL);
>    tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
>                                           niters_vector, log_vf);
> +
> +  /* If we've peeled a vector iteration then subtract one full vector
> +     iteration.  */
> +  if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo))
> +    niters_vector_mult_vf = fold_build2 (MINUS_EXPR, type,
> +                                      niters_vector_mult_vf, tree_vf);
> +
>    if (!is_gimple_val (niters_vector_mult_vf))
>      {
>        tree var = create_tmp_var (type, "niters_vector_mult_vf");
> @@ -3328,6 +3355,10 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree 
> niters, tree nitersm1,
>        niters_vector_mult_vf steps.  */
>        gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
>        update_e = skip_vector ? e : loop_preheader_edge (epilog);
> +      if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
> +     update_e = single_succ_edge (e->dest);
> +
> +      /* Update the main exit.  */
>        vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
>                                       update_e);
>  
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 
> 39aa4d1250efe308acccf484d370f8adfd1ba843..de60da31e2a3030a7fbc302d3f676af9683fd019
>  100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -1016,6 +1016,8 @@ public:
>  #define LOOP_VINFO_PEELING_FOR_GAPS(L)     (L)->peeling_for_gaps
>  #define LOOP_VINFO_PEELING_FOR_NITER(L)    (L)->peeling_for_niter
>  #define LOOP_VINFO_EARLY_BREAKS(L)         (L)->early_breaks
> +#define LOOP_VINFO_EARLY_BREAKS_VECT_PEELED(L)  \
> +  (single_pred ((L)->loop->latch) != (L)->vec_loop_iv_exit->src)
>  #define LOOP_VINFO_EARLY_BRK_CONFLICT_STMTS(L) (L)->early_break_conflict
>  #define LOOP_VINFO_EARLY_BRK_DEST_BB(L)    (L)->early_break_dest_bb
>  #define LOOP_VINFO_EARLY_BRK_VUSES(L)      (L)->early_break_vuses
> @@ -2224,6 +2226,7 @@ extern dump_user_location_t find_loop_location (class 
> loop *);
>  extern bool vect_can_advance_ivs_p (loop_vec_info);
>  extern void vect_update_inits_of_drs (loop_vec_info, tree, tree_code);
>  extern edge vec_init_loop_exit_info (class loop *);
> +extern void vect_iv_increment_position (edge, gimple_stmt_iterator *, bool 
> *);
>  
>  /* In tree-vect-stmts.cc.  */
>  extern tree get_related_vectype_for_scalar_type (machine_mode, tree,
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to