https://gcc.gnu.org/g:9e85d056cd15befffb39d2f84902d21eda4d98eb
commit r16-462-g9e85d056cd15befffb39d2f84902d21eda4d98eb Author: Richard Biener <rguent...@suse.de> Date: Tue May 6 13:29:42 2025 +0200 tree-optimization/119589 - alignment analysis for VF > 1 and VMAT_STRIDED_SLP The following fixes the alignment analysis done by the VMAT_STRIDED_SLP code which for the case of VF > 1 currently relies on dataref analysis which assumes consecutive accesses. But the code generation advances by DR_STEP between each iteration which requires us to assess that individual DR_STEP preserve the alignment rather than only VF * DR_STEP. This allows us to use vector aligned accesses in some cases. PR tree-optimization/119589 PR tree-optimization/119586 PR tree-optimization/119155 * tree-vect-stmts.cc (vectorizable_store): Verify DR_STEP_ALIGNMENT preserves DR_TARGET_ALIGNMENT when VF > 1 and VMAT_STRIDED_SLP. Use vector aligned accesses when we can. (vectorizable_load): Likewise. Diff: --- gcc/tree-vect-stmts.cc | 47 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index af7114d41923..a8762baa076c 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -8791,6 +8791,15 @@ vectorizable_store (vec_info *vinfo, if (n == const_nunits) { int mis_align = dr_misalignment (first_dr_info, vectype); + /* With VF > 1 we advance the DR by step, if that is constant + and only aligned when performed VF times, DR alignment + analysis can analyze this as aligned since it assumes + contiguous accesses. But that is not how we code generate + here, so adjust for this. */ + if (maybe_gt (vf, 1u) + && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr), + DR_TARGET_ALIGNMENT (first_dr_info))) + mis_align = -1; dr_alignment_support dr_align = vect_supportable_dr_alignment (vinfo, dr_info, vectype, mis_align); @@ -8812,6 +8821,10 @@ vectorizable_store (vec_info *vinfo, ltype = build_vector_type (elem_type, n); lvectype = vectype; int mis_align = dr_misalignment (first_dr_info, ltype); + if (maybe_gt (vf, 1u) + && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr), + DR_TARGET_ALIGNMENT (first_dr_info))) + mis_align = -1; dr_alignment_support dr_align = vect_supportable_dr_alignment (vinfo, dr_info, ltype, mis_align); @@ -8872,17 +8885,10 @@ vectorizable_store (vec_info *vinfo, } } unsigned align; - /* ??? We'd want to use - if (alignment_support_scheme == dr_aligned) - align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info)); - since doing that is what we assume we can in the above checks. - But this interferes with groups with gaps where for example - VF == 2 makes the group in the unrolled loop aligned but the - fact that we advance with step between the two subgroups - makes the access to the second unaligned. See PR119586. - We have to anticipate that here or adjust code generation to - avoid the misaligned loads by means of permutations. */ - align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info)); + if (alignment_support_scheme == dr_aligned) + align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info)); + else + align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info)); /* Alignment is at most the access size if we do multiple stores. */ if (nstores > 1) align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align); @@ -10810,6 +10816,15 @@ vectorizable_load (vec_info *vinfo, if (n == const_nunits) { int mis_align = dr_misalignment (first_dr_info, vectype); + /* With VF > 1 we advance the DR by step, if that is constant + and only aligned when performed VF times, DR alignment + analysis can analyze this as aligned since it assumes + contiguous accesses. But that is not how we code generate + here, so adjust for this. */ + if (maybe_gt (vf, 1u) + && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr), + DR_TARGET_ALIGNMENT (first_dr_info))) + mis_align = -1; dr_alignment_support dr_align = vect_supportable_dr_alignment (vinfo, dr_info, vectype, mis_align); @@ -10838,6 +10853,10 @@ vectorizable_load (vec_info *vinfo, if (VECTOR_TYPE_P (ptype)) { mis_align = dr_misalignment (first_dr_info, ptype); + if (maybe_gt (vf, 1u) + && !multiple_p (DR_STEP_ALIGNMENT (first_dr_info->dr), + DR_TARGET_ALIGNMENT (first_dr_info))) + mis_align = -1; dr_align = vect_supportable_dr_alignment (vinfo, dr_info, ptype, mis_align); @@ -10857,8 +10876,10 @@ vectorizable_load (vec_info *vinfo, } } unsigned align; - /* ??? The above is still wrong, see vectorizable_store. */ - align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info)); + if (alignment_support_scheme == dr_aligned) + align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info)); + else + align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info)); /* Alignment is at most the access size if we do multiple loads. */ if (nloads > 1) align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align);