https://gcc.gnu.org/g:adb14c7625178b501389c2d7d7c2feec37da7a19

commit r15-8047-gadb14c7625178b501389c2d7d7c2feec37da7a19
Author: Richard Biener <rguent...@suse.de>
Date:   Fri Mar 7 12:57:42 2025 +0100

    tree-optimization/119155 - wrong aligned access for vectorized packed access
    
    When doing strided SLP vectorization we use the wrong alignment for
    the possibly piecewise access of the vector elements for loads and
    stores.  While we are carefully using element aligned loads and
    stores that isn't enough for the case the original scalar accesses
    are packed.  The following instead honors larger alignment when
    present but correctly falls back to the original scalar alignment
    used.
    
            PR tree-optimization/119155
            * tree-vect-stmts.cc (vectorizable_store): Do not always
            use vector element alignment for VMAT_STRIDED_SLP but
            a more correct alignment towards both ends.
            (vectorizable_load): Likewise.
    
            * gcc.dg/vect/pr119155.c: New testcase.

Diff:
---
 gcc/testsuite/gcc.dg/vect/pr119155.c | 26 ++++++++++++++++++++++++++
 gcc/tree-vect-stmts.cc               | 25 +++++++++++++++++++++----
 2 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/pr119155.c 
b/gcc/testsuite/gcc.dg/vect/pr119155.c
new file mode 100644
index 000000000000..b860cf24b0fa
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr119155.c
@@ -0,0 +1,26 @@
+#include <stdlib.h>
+#include "tree-vect.h"
+
+struct s { int x; } __attribute__((packed));
+
+void __attribute__((noipa))
+f (char *xc, char *yc, int z)
+{
+  for (int i = 0; i < 100; ++i)
+    {
+      struct s *x = (struct s *) xc;
+      struct s *y = (struct s *) yc;
+      x->x += y->x;
+      xc += z;
+      yc += z;
+    }
+}
+
+int main ()
+{
+  check_vect ();
+  char *x = malloc (100 * sizeof (struct s) + 1);
+  char *y = malloc (100 * sizeof (struct s) + 1);
+  f (x + 1, y + 1, sizeof (struct s));
+  return 0;
+}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index f894787f7bfb..17e3b1db894a 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -8904,7 +8904,15 @@ vectorizable_store (vec_info *vinfo,
                    }
                }
            }
-         ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
+         unsigned align;
+         if (alignment_support_scheme == dr_aligned)
+           align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
+         else
+           align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
+         /* Alignment is at most the access size if we do multiple stores.  */
+         if (nstores > 1)
+           align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align);
+         ltype = build_aligned_type (ltype, align * BITS_PER_UNIT);
          ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
        }
 
@@ -10851,7 +10859,7 @@ vectorizable_load (vec_info *vinfo,
                                                  &ptype);
              if (vtype != NULL_TREE)
                {
-                 dr_alignment_support dr_align = dr_aligned;
+                 dr_alignment_support dr_align;
                  int mis_align = 0;
                  if (VECTOR_TYPE_P (ptype))
                    {
@@ -10860,6 +10868,8 @@ vectorizable_load (vec_info *vinfo,
                        = vect_supportable_dr_alignment (vinfo, dr_info, ptype,
                                                         mis_align);
                    }
+                 else
+                   dr_align = dr_unaligned_supported;
                  if (dr_align == dr_aligned
                      || dr_align == dr_unaligned_supported)
                    {
@@ -10872,8 +10882,15 @@ vectorizable_load (vec_info *vinfo,
                    }
                }
            }
-         /* Else fall back to the default element-wise access.  */
-         ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
+         unsigned align;
+         if (alignment_support_scheme == dr_aligned)
+           align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
+         else
+           align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
+         /* Alignment is at most the access size if we do multiple loads.  */
+         if (nloads > 1)
+           align = MIN (tree_to_uhwi (TYPE_SIZE_UNIT (ltype)), align);
+         ltype = build_aligned_type (ltype, align * BITS_PER_UNIT);
        }
 
       if (slp)

Reply via email to