https://gcc.gnu.org/g:0d52a736a3856871761c89fefa59c2ff39f3a5f8

commit r16-2426-g0d52a736a3856871761c89fefa59c2ff39f3a5f8
Author: Karl Meakin <karl.mea...@arm.com>
Date:   Tue Jul 15 14:49:58 2025 +0000

    middle-end: Enable masked load with non-constant offset
    
    The function `vect_check_gather_scatter` requires the `base` of the load
    to be loop-invariant and the `off`set to be not loop-invariant. When faced
    with a scenario where `base` is not loop-invariant, instead of giving up
    immediately we can try swapping the `base` and `off`, if `off` is
    actually loop-invariant.
    
    Previously, it would only swap if `off` was the constant zero (and so
    trivially loop-invariant). This is too conservative: we can still
    perform the swap if `off` is a more complex but still loop-invariant
    expression, such as a variable defined outside of the loop.
    
    This allows loops like the function below to be vectorised, if the
    target has masked loads and sufficiently large vector registers (eg
    `-march=armv8-a+sve -msve-vector-bits=128`):
    
    ```c
    typedef struct Array {
        int elems[3];
    } Array;
    
    int loop(Array **pp, int len, int idx) {
        int nRet = 0;
    
        for (int i = 0; i < len; i++) {
            Array *p = pp[i];
            if (p) {
                nRet += p->elems[idx];
            }
        }
    
        return nRet;
    }
    ```
    
    gcc/ChangeLog:
            * tree-vect-data-refs.cc (vect_check_gather_scatter): Swap
            `base` and `off` in more scenarios. Also assert at the end of
            the function that `base` and `off` are loop-invariant and not
            loop-invariant respectively.
    
    gcc/testsuite/ChangeLog:
            * gcc.target/aarch64/sve/mask_load_2.c: Update tests.

Diff:
---
 gcc/testsuite/gcc.target/aarch64/sve/mask_load_2.c |  4 ++--
 gcc/tree-vect-data-refs.cc                         | 26 +++++++++-------------
 2 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_load_2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/mask_load_2.c
index 38fcf4f72060..66d95101a146 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/mask_load_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/mask_load_2.c
@@ -19,5 +19,5 @@ int loop(Array **pp, int len, int idx) {
     return nRet;
 }
 
-// { dg-final { scan-assembler-times {ld1w\tz[0-9]+\.d, p[0-7]/z} 0 } }
-// { dg-final { scan-assembler-times {add\tz[0-9]+\.s, p[0-7]/m}  0 } }
+// { dg-final { scan-assembler-times {ld1w\tz[0-9]+\.d, p[0-7]/z} 1 } }
+// { dg-final { scan-assembler-times {add\tz[0-9]+\.s, p[0-7]/m}  1 } }
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index a24ddfbc3841..460a48db2e60 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -4667,26 +4667,19 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, 
loop_vec_info loop_vinfo,
   if (off == NULL_TREE)
     off = size_zero_node;
 
-  /* If base is not loop invariant, either off is 0, then we start with just
-     the constant offset in the loop invariant BASE and continue with base
-     as OFF, otherwise give up.
-     We could handle that case by gimplifying the addition of base + off
-     into some SSA_NAME and use that as off, but for now punt.  */
+  /* BASE must be loop invariant.  If it is not invariant, but OFF is, then we
+   * can fix that by swapping BASE and OFF.  */
   if (!expr_invariant_in_loop_p (loop, base))
     {
-      if (!integer_zerop (off))
+      if (!expr_invariant_in_loop_p (loop, off))
        return false;
-      off = base;
-      base = size_int (pbytepos);
-    }
-  /* Otherwise put base + constant offset into the loop invariant BASE
-     and continue with OFF.  */
-  else
-    {
-      base = fold_convert (sizetype, base);
-      base = size_binop (PLUS_EXPR, base, size_int (pbytepos));
+
+      std::swap (base, off);
     }
 
+  base = fold_convert (sizetype, base);
+  base = size_binop (PLUS_EXPR, base, size_int (pbytepos));
+
   /* OFF at this point may be either a SSA_NAME or some tree expression
      from get_inner_reference.  Try to peel off loop invariants from it
      into BASE as long as possible.  */
@@ -4864,6 +4857,9 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, 
loop_vec_info loop_vinfo,
       offset_vectype = NULL_TREE;
     }
 
+  gcc_checking_assert (expr_invariant_in_loop_p (loop, base));
+  gcc_checking_assert (!expr_invariant_in_loop_p (loop, off));
+
   info->ifn = ifn;
   info->decl = decl;
   info->base = base;

Reply via email to