1 and VMAT_STRIDED_SLP

Richard Biener via Gcc-cvs Wed, 07 May 2025 23:37:47 -0700

https://gcc.gnu.org/g:9e85d056cd15befffb39d2f84902d21eda4d98eb


commit r16-462-g9e85d056cd15befffb39d2f84902d21eda4d98eb
Author: Richard Biener <rguent...@suse.de>
Date:   Tue May 6 13:29:42 2025 +0200

    tree-optimization/119589 - alignment analysis for VF > 1 and 
VMAT_STRIDED_SLP
    
    The following fixes the alignment analysis done by the VMAT_STRIDED_SLP
    code which for the case of VF > 1 currently relies on dataref analysis
    which assumes consecutive accesses.  But the code generation advances
    by DR_STEP between each iteration which requires us to assess that
    individual DR_STEP preserve the alignment rather than only VF * DR_STEP.
    This allows us to use vector aligned accesses in some cases.
    
            PR tree-optimization/119589
            PR tree-optimization/119586
            PR tree-optimization/119155
            * tree-vect-stmts.cc (vectorizable_store): Verify
            DR_STEP_ALIGNMENT preserves DR_TARGET_ALIGNMENT when
            VF > 1 and VMAT_STRIDED_SLP.  Use vector aligned accesses when
            we can.
            (vectorizable_load): Likewise.

Diff:
---
 gcc/tree-vect-stmts.cc | 47 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 34 insertions(+), 13 deletions(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index af7114d41923..a8762baa076c 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -8791,6 +8791,15 @@ vectorizable_store (vec_info *vinfo,
          if (n == const_nunits)
            {
              int mis_align = dr_misalignment (first_dr_info, vectype);
+             /* With VF > 1 we advance the DR by step, if that is constant
+                and only aligned when performed VF times, DR alignment
+                analysis can analyze this as aligned since it assumes
+                contiguous accesses.  But that is not how we code generate
+                here, so adjust for this.  */
+             if (maybe_gt (vf, 1u)
+                 && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
+                                 DR_TARGET_ALIGNMENT (first_dr_info)))
+               mis_align = -1;
              dr_alignment_support dr_align
                = vect_supportable_dr_alignment (vinfo, dr_info, vectype,
                                                 mis_align);
@@ -8812,6 +8821,10 @@ vectorizable_store (vec_info *vinfo,
              ltype = build_vector_type (elem_type, n);
              lvectype = vectype;
              int mis_align = dr_misalignment (first_dr_info, ltype);
+             if (maybe_gt (vf, 1u)
+                 && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
+                                 DR_TARGET_ALIGNMENT (first_dr_info)))
+               mis_align = -1;
              dr_alignment_support dr_align
                = vect_supportable_dr_alignment (vinfo, dr_info, ltype,
                                                 mis_align);
@@ -8872,17 +8885,10 @@ vectorizable_store (vec_info *vinfo,
                }
            }
          unsigned align;
-         /* ???  We'd want to use
-              if (alignment_support_scheme == dr_aligned)
-                align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
-            since doing that is what we assume we can in the above checks.
-            But this interferes with groups with gaps where for example
-            VF == 2 makes the group in the unrolled loop aligned but the
-            fact that we advance with step between the two subgroups
-            makes the access to the second unaligned.  See PR119586.
-            We have to anticipate that here or adjust code generation to
-            avoid the misaligned loads by means of permutations.  */
-         align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
+         if (alignment_support_scheme == dr_aligned)
+           align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
+         else
+           align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
          /* Alignment is at most the access size if we do multiple stores.  */
          if (nstores > 1)
            align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align);
@@ -10810,6 +10816,15 @@ vectorizable_load (vec_info *vinfo,
          if (n == const_nunits)
            {
              int mis_align = dr_misalignment (first_dr_info, vectype);
+             /* With VF > 1 we advance the DR by step, if that is constant
+                and only aligned when performed VF times, DR alignment
+                analysis can analyze this as aligned since it assumes
+                contiguous accesses.  But that is not how we code generate
+                here, so adjust for this.  */
+             if (maybe_gt (vf, 1u)
+                 && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
+                                 DR_TARGET_ALIGNMENT (first_dr_info)))
+               mis_align = -1;
              dr_alignment_support dr_align
                = vect_supportable_dr_alignment (vinfo, dr_info, vectype,
                                                 mis_align);
@@ -10838,6 +10853,10 @@ vectorizable_load (vec_info *vinfo,
                  if (VECTOR_TYPE_P (ptype))
                    {
                      mis_align = dr_misalignment (first_dr_info, ptype);
+                     if (maybe_gt (vf, 1u)
+                         && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr),
+                                         DR_TARGET_ALIGNMENT (first_dr_info)))
+                       mis_align = -1;
                      dr_align
                        = vect_supportable_dr_alignment (vinfo, dr_info, ptype,
                                                         mis_align);
@@ -10857,8 +10876,10 @@ vectorizable_load (vec_info *vinfo,
                }
            }
          unsigned align;
-         /* ???  The above is still wrong, see vectorizable_store.  */
-         align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
+         if (alignment_support_scheme == dr_aligned)
+           align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
+         else
+           align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
          /* Alignment is at most the access size if we do multiple loads.  */
          if (nloads > 1)
            align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align);

[gcc r16-462] tree-optimization/119589 - alignment analysis for VF > 1 and VMAT_STRIDED_SLP

Reply via email to