https://gcc.gnu.org/g:e2d9709cdc50d68ed0e662634d8608c6f8491888

commit e2d9709cdc50d68ed0e662634d8608c6f8491888
Author: Richard Biener <rguent...@suse.de>
Date:   Fri Sep 29 12:54:17 2023 +0200

    Add --param vect-single-lane-slp
    
    The following adds --param vect-single-lane-slp to guard single-lane
    loop SLP discovery.  As first client we look at non-grouped stores
    with an assert that SLP discovery works to discover gaps in it.
    
            * params.opt (-param=vect-single-lane-slp=): New.
            * tree-vect-slp.cc (vect_analyze_slp): Perform single-lane
            loop SLP discovery for non-grouped stores if requested.

Diff:
---
 gcc/params.opt       |  4 ++++
 gcc/tree-vect-slp.cc | 26 ++++++++++++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/gcc/params.opt b/gcc/params.opt
index 74ea9c6f8d93..4cde5c3015ae 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -1198,6 +1198,10 @@ The maximum factor which the loop vectorizer applies to 
the cost of statements i
 Common Joined UInteger Var(param_vect_induction_float) Init(1) IntegerRange(0, 
1) Param Optimization
 Enable loop vectorization of floating point inductions.
 
+-param=vect-single-lane-slp=
+Common Joined UInteger Var(param_vect_single_lane_slp) Init(0) IntegerRange(0, 
1) Param Optimization
+Enable single lane SLP discovery.
+
 -param=vect-force-slp=
 Common Joined UInteger Var(param_vect_force_slp) Init(0) IntegerRange(0, 1) 
Param Optimization
 Fail vectorization when falling back to non-SLP.
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index f34ed54a70b0..66c8fa38979f 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -3643,6 +3643,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
 opt_result
 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
 {
+  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   unsigned int i;
   stmt_vec_info first_element;
   slp_instance instance;
@@ -3658,6 +3659,31 @@ vect_analyze_slp (vec_info *vinfo, unsigned 
max_tree_size)
   FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
     vect_analyze_slp_instance (vinfo, bst_map, first_element,
                               slp_inst_kind_store, max_tree_size, &limit);
+  if (loop_vinfo && param_vect_single_lane_slp != 0)
+    {
+      data_reference_p dr;
+      FOR_EACH_VEC_ELT (vinfo->shared->datarefs, i, dr)
+       if (DR_IS_WRITE (dr))
+         {
+           stmt_vec_info stmt_info = vinfo->lookup_dr (dr)->stmt;
+           /* It works a bit to dissolve the group but that's
+              not really what we want to do.  Instead group analysis
+              above starts discovery for each lane and pieces them together
+              to a single store to the whole group.  */
+           if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
+             continue;
+           vec<stmt_vec_info> stmts;
+           vec<stmt_vec_info> roots = vNULL;
+           vec<tree> remain = vNULL;
+           stmts.create (1);
+           stmts.quick_push (stmt_info);
+           bool res = vect_build_slp_instance (vinfo, slp_inst_kind_store,
+                                               stmts, roots, remain,
+                                               max_tree_size, &limit,
+                                               bst_map, NULL);
+           gcc_assert (res);
+         }
+    }
 
   if (bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo))
     {

Reply via email to