https://gcc.gnu.org/g:7fffff1deb47a70ff804f0b2cce7be7e5fe8ba13

commit r15-7149-g7fffff1deb47a70ff804f0b2cce7be7e5fe8ba13
Author: Richard Biener <rguent...@suse.de>
Date:   Tue Jan 21 14:58:43 2025 +0100

    tree-optimization/118558 - fix alignment compute with 
VMAT_CONTIGUOUS_REVERSE
    
    There are calls to dr_misalignment left that do not correct for the
    offset (which is vector type dependent) when the stride is negative.
    Notably vect_known_alignment_in_bytes doesn't allow to pass through
    such offset which the following adds (computing the offset in
    vect_known_alignment_in_bytes would be possible as well, but the
    offset can be shared as seen).  Eventually this function could go away.
    
    This leads to peeling for gaps not considerd, nor shortening of the
    access applied which is what fixes the testcase on x86_64.
    
            PR tree-optimization/118558
            * tree-vectorizer.h (vect_known_alignment_in_bytes): Pass
            through offset to dr_misalignment.
            * tree-vect-stmts.cc (get_group_load_store_type): Compute
            offset applied for negative stride and use it when querying
            alignment of accesses.
            (vectorizable_load): Likewise.
    
            * gcc.dg/vect/pr118558.c: New testcase.

Diff:
---
 gcc/testsuite/gcc.dg/vect/pr118558.c | 15 +++++++++++++++
 gcc/tree-vect-stmts.cc               | 24 +++++++++++++++++-------
 gcc/tree-vectorizer.h                |  5 +++--
 3 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/pr118558.c 
b/gcc/testsuite/gcc.dg/vect/pr118558.c
new file mode 100644
index 000000000000..5483328d686b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr118558.c
@@ -0,0 +1,15 @@
+#include "tree-vect.h"
+
+static unsigned long g_270[5][2] = {{123}};
+static short g_2312 = 0;
+int main()
+{
+  check_vect ();
+  int g_1168 = 0;
+  unsigned t = 4;
+  for (g_1168 = 3; g_1168 >= 0; g_1168 -= 1)
+    for (g_2312 = 0; g_2312 <= 1; g_2312 += 1)
+      t = g_270[g_1168][0];
+  if (t != 123) __builtin_abort();
+}
+
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 21fb5cf5bd47..c0550acf6b2b 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -2198,14 +2198,20 @@ get_group_load_store_type (vec_info *vinfo, 
stmt_vec_info stmt_info,
                               " non-consecutive accesses\n");
              return false;
            }
+
+         unsigned HOST_WIDE_INT dr_size
+           = vect_get_scalar_dr_size (first_dr_info);
+         poly_int64 off = 0;
+         if (*memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+           off = (TYPE_VECTOR_SUBPARTS (vectype) - 1) * -dr_size;
+
          /* An overrun is fine if the trailing elements are smaller
             than the alignment boundary B.  Every vector access will
             be a multiple of B and so we are guaranteed to access a
             non-gap element in the same B-sized block.  */
          if (overrun_p
              && gap < (vect_known_alignment_in_bytes (first_dr_info,
-                                                      vectype)
-                       / vect_get_scalar_dr_size (first_dr_info)))
+                                                      vectype, off) / dr_size))
            overrun_p = false;
 
          /* When we have a contiguous access across loop iterations
@@ -2230,7 +2236,7 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info 
stmt_info,
             by simply loading half of the vector only.  Usually
             the construction with an upper zero half will be elided.  */
          dr_alignment_support alss;
-         int misalign = dr_misalignment (first_dr_info, vectype);
+         int misalign = dr_misalignment (first_dr_info, vectype, off);
          tree half_vtype;
          poly_uint64 remain;
          unsigned HOST_WIDE_INT tem, num;
@@ -11991,8 +11997,14 @@ vectorizable_load (vec_info *vinfo,
                    tree ltype = vectype;
                    tree new_vtype = NULL_TREE;
                    unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
+                   unsigned HOST_WIDE_INT dr_size
+                     = vect_get_scalar_dr_size (first_dr_info);
+                   poly_int64 off = 0;
+                   if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
+                     off = (TYPE_VECTOR_SUBPARTS (vectype) - 1) * -dr_size;
                    unsigned int vect_align
-                     = vect_known_alignment_in_bytes (first_dr_info, vectype);
+                     = vect_known_alignment_in_bytes (first_dr_info, vectype,
+                                                      off);
                    /* Try to use a single smaller load when we are about
                       to load excess elements compared to the unrolled
                       scalar loop.  */
@@ -12013,9 +12025,7 @@ vectorizable_load (vec_info *vinfo,
                             scalar loop.  */
                          ;
                        else if (known_gt (vect_align,
-                                          ((nunits - remain)
-                                           * vect_get_scalar_dr_size
-                                               (first_dr_info))))
+                                          ((nunits - remain) * dr_size)))
                          /* Aligned access to the gap area when there's
                             at least one element in it is OK.  */
                          ;
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 79db02a39a8f..44d3a1d46c40 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2028,9 +2028,10 @@ known_alignment_for_access_p (dr_vec_info *dr_info, tree 
vectype)
    of DR_INFO is guaranteed to have.  */
 
 inline unsigned int
-vect_known_alignment_in_bytes (dr_vec_info *dr_info, tree vectype)
+vect_known_alignment_in_bytes (dr_vec_info *dr_info, tree vectype,
+                              poly_int64 offset = 0)
 {
-  int misalignment = dr_misalignment (dr_info, vectype);
+  int misalignment = dr_misalignment (dr_info, vectype, offset);
   if (misalignment == DR_MISALIGNMENT_UNKNOWN)
     return TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_info->dr)));
   else if (misalignment == 0)

Reply via email to