https://gcc.gnu.org/g:39e763ca876fba4449c0781f61115e063f481add

commit r15-5339-g39e763ca876fba4449c0781f61115e063f481add
Author: Richard Biener <rguent...@suse.de>
Date:   Fri Nov 15 10:04:23 2024 +0100

    tree-optimization/117606 - SLP and single element interleaving
    
    The following tries to reduce the amount of difference between
    SLP and non-SLP for single-element interleaving load classification.
    
    This fixes another fallout of --param vect-force-slp=1
    
            PR tree-optimization/117606
            * tree-vect-stmts.cc (get_group_load_store_type): For single
            element interleaving also fall back to VMAT_ELEMENTWISE if
            a left-over permutation isn't supported.

Diff:
---
 gcc/tree-vect-stmts.cc | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index ab5ea038d1d4..7ffee2c4cc8b 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -2082,8 +2082,9 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info 
stmt_info,
            *memory_access_type = VMAT_CONTIGUOUS;
 
          /* If this is single-element interleaving with an element
-            distance that leaves unused vector loads around punt - we
-            at least create very sub-optimal code in that case (and
+            distance that leaves unused vector loads around fall back
+            to elementwise access if possible - we otherwise least
+            create very sub-optimal code in that case (and
             blow up memory, see PR65518).  */
          if (loop_vinfo
              && single_element_p
@@ -2110,6 +2111,28 @@ get_group_load_store_type (vec_info *vinfo, 
stmt_vec_info stmt_info,
                }
            }
 
+         /* For single-element interleaving also fall back to elementwise
+            access in case we did not lower a permutation and cannot
+            code generate it.  */
+         auto_vec<tree> temv;
+         unsigned n_perms;
+         if (loop_vinfo
+             && single_element_p
+             && SLP_TREE_LANES (slp_node) == 1
+             && (*memory_access_type == VMAT_CONTIGUOUS
+                 || *memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+             && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
+             && !vect_transform_slp_perm_load
+                   (loop_vinfo, slp_node, temv, NULL,
+                    LOOP_VINFO_VECT_FACTOR (loop_vinfo), true, &n_perms))
+           {
+             *memory_access_type = VMAT_ELEMENTWISE;
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                "single-element interleaving permutation not "
+                                "supported, using elementwise access\n");
+           }
+
          overrun_p = (loop_vinfo && gap != 0
                       && *memory_access_type != VMAT_ELEMENTWISE);
          if (overrun_p && vls_type != VLS_LOAD)

Reply via email to