1 and VMAT_STRIDED_SLP

Richard Biener Tue, 06 May 2025 05:52:24 -0700

The following fixes the alignment analysis done by the VMAT_STRIDED_SLP
code which for the case of VF > 1 currently relies on dataref analysis
which assumes consecutive accesses.  But the code generation advances
by DR_STEP between each iteration which requires us to assess that
individual DR_STEP preserve the alignment rather than only VF * DR_STEP.
This allows us to use vector aligned accesses in some cases.


Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Richard.

        PR tree-optimization/119589
        PR tree-optimization/119586
        PR tree-optimization/119155
        * tree-vect-stmts.cc (vectorizable_store): Verify
        DR_STEP_ALIGNMENT preserves DR_TARGET_ALIGNMENT when
        VF > 1 and VMAT_STRIDED_SLP.  Use vector aligned accesses when
        we can.
        (vectorizable_load): Likewise.
---
 gcc/tree-vect-stmts.cc | 47 ++++++++++++++++++++++++++++++------------
 1 file changed, 34 insertions(+), 13 deletions(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 0ec3b4fb606..93ab9f9a284 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -8845,6 +8845,15 @@ vectorizable_store (vec_info *vinfo,
          if (n == const_nunits)
            {
              int mis_align = dr_misalignment (first_dr_info, vectype);
+             /* With VF > 1 we advance the DR by step, if that is constant
+                and only aligned when performed VF times, DR alignment
+                analysis can analyze this as aligned since it assumes
+                contiguous accesses.  But that is not how we code generate
+                here, so adjust for this.  */
+             if (maybe_gt (vf, 1u)
+                 && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
+                                 DR_TARGET_ALIGNMENT (first_dr_info)))
+               mis_align = -1;
              dr_alignment_support dr_align
                = vect_supportable_dr_alignment (vinfo, dr_info, vectype,
                                                 mis_align);
@@ -8866,6 +8875,10 @@ vectorizable_store (vec_info *vinfo,
              ltype = build_vector_type (elem_type, n);
              lvectype = vectype;
              int mis_align = dr_misalignment (first_dr_info, ltype);
+             if (maybe_gt (vf, 1u)
+                 && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
+                                 DR_TARGET_ALIGNMENT (first_dr_info)))
+               mis_align = -1;
              dr_alignment_support dr_align
                = vect_supportable_dr_alignment (vinfo, dr_info, ltype,
                                                 mis_align);
@@ -8926,17 +8939,10 @@ vectorizable_store (vec_info *vinfo,
                }
            }
          unsigned align;
-         /* ???  We'd want to use
-              if (alignment_support_scheme == dr_aligned)
-                align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
-            since doing that is what we assume we can in the above checks.
-            But this interferes with groups with gaps where for example
-            VF == 2 makes the group in the unrolled loop aligned but the
-            fact that we advance with step between the two subgroups
-            makes the access to the second unaligned.  See PR119586.
-            We have to anticipate that here or adjust code generation to
-            avoid the misaligned loads by means of permutations.  */
-         align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
+         if (alignment_support_scheme == dr_aligned)
+           align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
+         else
+           align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
          /* Alignment is at most the access size if we do multiple stores.  */
          if (nstores > 1)
            align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align);
@@ -10864,6 +10870,15 @@ vectorizable_load (vec_info *vinfo,
          if (n == const_nunits)
            {
              int mis_align = dr_misalignment (first_dr_info, vectype);
+             /* With VF > 1 we advance the DR by step, if that is constant
+                and only aligned when performed VF times, DR alignment
+                analysis can analyze this as aligned since it assumes
+                contiguous accesses.  But that is not how we code generate
+                here, so adjust for this.  */
+             if (maybe_gt (vf, 1u)
+                 && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
+                                 DR_TARGET_ALIGNMENT (first_dr_info)))
+               mis_align = -1;
              dr_alignment_support dr_align
                = vect_supportable_dr_alignment (vinfo, dr_info, vectype,
                                                 mis_align);
@@ -10892,6 +10907,10 @@ vectorizable_load (vec_info *vinfo,
                  if (VECTOR_TYPE_P (ptype))
                    {
                      mis_align = dr_misalignment (first_dr_info, ptype);
+                     if (maybe_gt (vf, 1u)
+                         && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
+                                         DR_TARGET_ALIGNMENT (first_dr_info)))
+                       mis_align = -1;
                      dr_align
                        = vect_supportable_dr_alignment (vinfo, dr_info, ptype,
                                                         mis_align);
@@ -10911,8 +10930,10 @@ vectorizable_load (vec_info *vinfo,
                }
            }
          unsigned align;
-         /* ???  The above is still wrong, see vectorizable_store.  */
-         align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
+         if (alignment_support_scheme == dr_aligned)
+           align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
+         else
+           align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
          /* Alignment is at most the access size if we do multiple loads.  */
          if (nloads > 1)
            align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align);
-- 
2.43.0

[PATCH] tree-optimization/119589 - alignment analysis for VF > 1 and VMAT_STRIDED_SLP

Reply via email to