https://gcc.gnu.org/g:b423891ad43d003a565e7b5c6ed648e446bd3c7c

commit b423891ad43d003a565e7b5c6ed648e446bd3c7c
Author: Richard Biener <rguent...@suse.de>
Date:   Fri Feb 23 11:45:50 2024 +0100

    Do single-lane SLP discovery for reductions
    
    The following performs single-lane SLP discovery for reductions.
    This exposes a latent issue with reduction SLP in outer loop
    vectorization and makes gcc.dg/vect/vect-outer-4[fgkl].c FAIL
    execution.
    
            * tree-vect-slp.cc (vect_build_slp_tree_2): Only multi-lane
            discoveries are reduction chains and need special backedge
            treatment.
            (vect_analyze_slp): Fall back to single-lane SLP discovery
            for reductions. Make sure to try single-lane SLP reduction
            for all reductions as fallback.

Diff:
---
 gcc/tree-vect-slp.cc | 58 +++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 48 insertions(+), 10 deletions(-)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index ecc185aae885..f39cde3a8d50 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -1918,7 +1918,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
            /* Reduction chain backedge defs are filled manually.
               ???  Need a better way to identify a SLP reduction chain PHI.
               Or a better overall way to SLP match those.  */
-           if (all_same && def_type == vect_reduction_def)
+           if (stmts.length () > 1
+               && all_same && def_type == vect_reduction_def)
              skip_args[loop_latch_edge (loop)->dest_idx] = true;
          }
        else if (def_type != vect_internal_def)
@@ -3911,7 +3912,7 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
          }
 
       /* Find SLP sequences starting from groups of reductions.  */
-      if (loop_vinfo->reductions.length () > 1)
+      if (loop_vinfo->reductions.length () > 0)
        {
          /* Collect reduction statements.  */
          vec<stmt_vec_info> scalar_stmts;
@@ -3934,17 +3935,54 @@ vect_analyze_slp (vec_info *vinfo, unsigned 
max_tree_size)
                          && gimple_assign_rhs_code (g) != WIDEN_SUM_EXPR
                          && gimple_assign_rhs_code (g) != SAD_EXPR)))
                scalar_stmts.quick_push (next_info);
+             else if (param_vect_single_lane_slp != 0)
+               {
+                 vec<stmt_vec_info> stmts;
+                 vec<stmt_vec_info> roots = vNULL;
+                 vec<tree> remain = vNULL;
+                 stmts.create (1);
+                 stmts.quick_push (next_info);
+                 bool res = vect_build_slp_instance (vinfo,
+                                                     slp_inst_kind_reduc_group,
+                                                     stmts, roots, remain,
+                                                     max_tree_size, &limit,
+                                                     bst_map, NULL);
+                 gcc_assert (res);
+               }
            }
-         if (scalar_stmts.length () > 1)
+         vec<stmt_vec_info> roots = vNULL;
+         vec<tree> remain = vNULL;
+         vec<stmt_vec_info> saved_stmts = vNULL;
+         if (param_vect_single_lane_slp != 0)
+           /* ???  scalar_stmts ownership and arg passing sucks.  */
+           saved_stmts = scalar_stmts.copy ();
+         if ((scalar_stmts.length () <= 1
+              || !vect_build_slp_instance (loop_vinfo,
+                                           slp_inst_kind_reduc_group,
+                                           scalar_stmts, roots, remain,
+                                           max_tree_size, &limit, bst_map,
+                                           NULL))
+             && param_vect_single_lane_slp != 0)
            {
-             vec<stmt_vec_info> roots = vNULL;
-             vec<tree> remain = vNULL;
-             vect_build_slp_instance (loop_vinfo, slp_inst_kind_reduc_group,
-                                      scalar_stmts, roots, remain,
-                                      max_tree_size, &limit, bst_map, NULL);
+             if (scalar_stmts.length () <= 1)
+               scalar_stmts.release ();
+             /* Do SLP discovery for single-lane reductions.  */
+             for (auto stmt_info : saved_stmts)
+               {
+                 vec<stmt_vec_info> stmts;
+                 vec<stmt_vec_info> roots = vNULL;
+                 vec<tree> remain = vNULL;
+                 stmts.create (1);
+                 stmts.quick_push (vect_stmt_to_vectorize (stmt_info));
+                 bool res = vect_build_slp_instance (vinfo,
+                                                     slp_inst_kind_reduc_group,
+                                                     stmts, roots, remain,
+                                                     max_tree_size, &limit,
+                                                     bst_map, NULL);
+                 gcc_assert (res);
+               }
+             saved_stmts.release ();
            }
-         else
-           scalar_stmts.release ();
        }
     }

Reply via email to