Hi Richard,

I'm hitting a couple of testcase ICEs for RISC-V while testing with the latest trunk code, it looks like these two patches are causing it, can you help me look at it? ICE log like bellow:

➜ vsetvl git:(tintin-dev) ~/open-source/riscv-gnu-toolchain-golden/build/dev-rv64gcv_zfh-lp64d-medany-newlib-spike-debug-4/install/bin/riscv64-unknown-elf-gcc -march=rv64gcv_zfh -mabi=lp64d -mcmodel=medany -fdiagnostics-plain-output -ftree-vectorize -O3 --param riscv-autovec-lmul=m1 --param=riscv-autovec-preference=scalable -fno-vect-cost-model -ffast-math ../../../riscv-gnu-toolchain-golden/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_load_run-1.c -march=rv64gcv_zfh -mabi=lp64d -mcmodel=medany -fdiagnostics-plain-output -ftree-vectorize -O2 --param riscv-autovec-preference=fixed-vlmax --param riscv-autovec-lmul=m1 -fno-vect-cost-model -ffast-math -mcmodel=medany -lm
during GIMPLE pass: vect
In file included from ../../../riscv-gnu-toolchain-golden/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_load_run-1.c:4: ../../../riscv-gnu-toolchain-golden/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_load-1.c: In function 'f_int16_t_8': ../../../riscv-gnu-toolchain-golden/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_load-1.c:15:3: internal compiler error: in update_epilogue_loop_vinfo, at tree-vect-loop.cc:11376 ../../../riscv-gnu-toolchain-golden/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_load-1.c:23:3: note: in expansion of macro 'TEST_LOOP' ../../../riscv-gnu-toolchain-golden/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_load-1.c:31:3: note: in expansion of macro 'TEST_TYPE' ../../../riscv-gnu-toolchain-golden/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/gather-scatter/strided_load-1.c:41:1: note: in expansion of macro 'TEST_ALL'
0x1bf5838 update_epilogue_loop_vinfo
        ../../../../gcc/gcc/tree-vect-loop.cc:11375
0x1bf746f vect_transform_loop(_loop_vec_info*, gimple*)
        ../../../../gcc/gcc/tree-vect-loop.cc:11826
0x1c4e8a7 vect_transform_loops
        ../../../../gcc/gcc/tree-vectorizer.cc:1007
0x1c4eff9 try_vectorize_loop_1
        ../../../../gcc/gcc/tree-vectorizer.cc:1152
0x1c4f135 try_vectorize_loop
        ../../../../gcc/gcc/tree-vectorizer.cc:1184
0x1c4f3e5 execute
        ../../../../gcc/gcc/tree-vectorizer.cc:1298
Please submit a full bug report, with preprocessed source (by using -freport-bug).
Please include the complete backtrace with any bug report.
See <https://gcc.gnu.org/bugs/> for instructions.


On 2023/10/19 19:47, Richard Biener wrote:
The following implements SLP vectorization support for gathers
without relying on IFNs being pattern detected (and supported by
the target).  That includes support for emulated gathers but also
the legacy x86 builtin path.

Bootstrapped and tested on x86_64-unknown-linux-gnu, will push.

Richard.

        PR tree-optimization/111131
        * tree-vect-loop.cc (update_epilogue_loop_vinfo): Make
        sure to update all gather/scatter stmt DRs, not only those
        that eventually got VMAT_GATHER_SCATTER set.
        * tree-vect-slp.cc (_slp_oprnd_info::first_gs_info): Add.
        (vect_get_and_check_slp_defs): Handle gathers/scatters,
        adding the offset as SLP operand and comparing base and scale.
        (vect_build_slp_tree_1): Handle gathers.
        (vect_build_slp_tree_2): Likewise.

        * gcc.dg/vect/vect-gather-1.c: Now expected to vectorize
        everywhere.
        * gcc.dg/vect/vect-gather-2.c: Expected to not SLP anywhere.
        Massage the scale case to more reliably produce a different
        one.  Scan for the specific messages.
        * gcc.dg/vect/vect-gather-3.c: Masked gather is also supported
        for AVX2, but not emulated.
        * gcc.dg/vect/vect-gather-4.c: Expected to not SLP anywhere.
        Massage to more properly ensure this.
        * gcc.dg/vect/tsvc/vect-tsvc-s353.c: Expect to vectorize
        everywhere.
---
  .../gcc.dg/vect/tsvc/vect-tsvc-s353.c         |  2 +-
  gcc/testsuite/gcc.dg/vect/vect-gather-1.c     |  2 +-
  gcc/testsuite/gcc.dg/vect/vect-gather-2.c     | 13 ++++--
  gcc/testsuite/gcc.dg/vect/vect-gather-3.c     |  2 +-
  gcc/testsuite/gcc.dg/vect/vect-gather-4.c     |  6 +--
  gcc/tree-vect-loop.cc                         |  6 ++-
  gcc/tree-vect-slp.cc                          | 45 +++++++++++++++++--
  7 files changed, 61 insertions(+), 15 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s353.c 
b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s353.c
index 98ba7522471..2c4fa3f5991 100644
--- a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s353.c
+++ b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s353.c
@@ -44,4 +44,4 @@ int main (int argc, char **argv)
    return 0;
  }
-/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail { ! riscv_v } } } } */
+/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-gather-1.c 
b/gcc/testsuite/gcc.dg/vect/vect-gather-1.c
index e3bbf5c0bf8..5f6640d9ab6 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-gather-1.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-gather-1.c
@@ -58,4 +58,4 @@ main (void)
    return 0;
  }
-/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" vect { target vect_gather_load_ifn } } } */
+/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" vect } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-gather-2.c 
b/gcc/testsuite/gcc.dg/vect/vect-gather-2.c
index a1f6ba458a9..4c23b808333 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-gather-2.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-gather-2.c
@@ -8,6 +8,7 @@ f1 (int *restrict y, int *restrict x1, int *restrict x2,
  {
    for (int i = 0; i < N; ++i)
      {
+      /* Different base.  */
        y[i * 2] = x1[indices[i * 2]] + 1;
        y[i * 2 + 1] = x2[indices[i * 2 + 1]] + 2;
      }
@@ -18,8 +19,9 @@ f2 (int *restrict y, int *restrict x, int *restrict indices)
  {
    for (int i = 0; i < N; ++i)
      {
-      y[i * 2] = x[indices[i * 2]] + 1;
-      y[i * 2 + 1] = x[indices[i * 2 + 1] * 2] + 2;
+      /* Different scale.  */
+      y[i * 2] = *(int *)((char *)x + (__UINTPTR_TYPE__)indices[i * 2] * 4) + 
1;
+      y[i * 2 + 1] = *(int *)((char *)x + (__UINTPTR_TYPE__)indices[i * 2 + 1] 
* 2) + 2;
      }
  }
@@ -28,9 +30,12 @@ f3 (int *restrict y, int *restrict x, int *restrict indices)
  {
    for (int i = 0; i < N; ++i)
      {
+      /* Different type.  */
        y[i * 2] = x[indices[i * 2]] + 1;
-      y[i * 2 + 1] = x[(unsigned int) indices[i * 2 + 1]] + 2;
+      y[i * 2 + 1] = x[((unsigned int *) indices)[i * 2 + 1]] + 2;
      }
  }
-/* { dg-final { scan-tree-dump-not "Loop contains only SLP stmts" vect { target vect_gather_load_ifn } } } */
+/* { dg-final { scan-tree-dump-not "Loop contains only SLP stmts" vect } } */
+/* { dg-final { scan-tree-dump "different gather base" vect { target { ! 
vect_gather_load_ifn } } } } */
+/* { dg-final { scan-tree-dump "different gather scale" vect { target { ! 
vect_gather_load_ifn } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-gather-3.c 
b/gcc/testsuite/gcc.dg/vect/vect-gather-3.c
index adfef3bf407..30ba6789e03 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-gather-3.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-gather-3.c
@@ -62,4 +62,4 @@ main (void)
    return 0;
  }
-/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" vect { target { vect_gather_load_ifn && vect_masked_load } } } } */
+/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" vect { target { { 
vect_gather_load_ifn || avx2 } && vect_masked_load } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-gather-4.c 
b/gcc/testsuite/gcc.dg/vect/vect-gather-4.c
index ee2e4e4999a..1ce63e69199 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-gather-4.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-gather-4.c
@@ -39,10 +39,10 @@ f3 (int *restrict y, int *restrict x, int *restrict indices)
        y[i * 2] = (indices[i * 2] < N * 2
                  ? x[indices[i * 2]] + 1
                  : 1);
-      y[i * 2 + 1] = (indices[i * 2 + 1] < N * 2
-                     ? x[(unsigned int) indices[i * 2 + 1]] + 2
+      y[i * 2 + 1] = (((unsigned int *)indices)[i * 2 + 1] < N * 2
+                     ? x[((unsigned int *) indices)[i * 2 + 1]] + 2
                      : 2);
      }
  }
-/* { dg-final { scan-tree-dump-not "Loop contains only SLP stmts" vect { target vect_gather_load_ifn } } } */
+/* { dg-final { scan-tree-dump-not "Loop contains only SLP stmts" vect } } */
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index ebab1953b9c..8877ebde246 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -11362,8 +11362,7 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree 
advance)
         updated offset we set using ADVANCE.  Instead we have to make sure the
         reference in the data references point to the corresponding copy of
         the original in the epilogue.  */
-      if (STMT_VINFO_MEMORY_ACCESS_TYPE (vect_stmt_to_vectorize (stmt_vinfo))
-         == VMAT_GATHER_SCATTER)
+      if (STMT_VINFO_GATHER_SCATTER_P (vect_stmt_to_vectorize (stmt_vinfo)))
        {
          DR_REF (dr)
            = simplify_replace_tree (DR_REF (dr), NULL_TREE, NULL_TREE,
@@ -11372,6 +11371,9 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree 
advance)
            = simplify_replace_tree (DR_BASE_ADDRESS (dr), NULL_TREE, NULL_TREE,
                                     &find_in_mapping, &mapping);
        }
+      else
+       gcc_assert (STMT_VINFO_MEMORY_ACCESS_TYPE (vect_stmt_to_vectorize 
(stmt_vinfo))
+                   != VMAT_GATHER_SCATTER);
        DR_STMT (dr) = STMT_VINFO_STMT (stmt_vinfo);
        stmt_vinfo->dr_aux.stmt = stmt_vinfo;
        /* The vector size of the epilogue is smaller than that of the main loop
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index d081999a763..8efff2e912d 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -283,10 +283,11 @@ typedef struct _slp_oprnd_info
    vec<tree> ops;
    /* Information about the first statement, its vector def-type, type, the
       operand itself in case it's constant, and an indication if it's a pattern
-     stmt.  */
+     stmt and gather/scatter info.  */
    tree first_op_type;
    enum vect_def_type first_dt;
    bool any_pattern;
+  gather_scatter_info first_gs_info;
  } *slp_oprnd_info;
@@ -609,6 +610,7 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
    unsigned int i, number_of_oprnds;
    enum vect_def_type dt = vect_uninitialized_def;
    slp_oprnd_info oprnd_info;
+  gather_scatter_info gs_info;
    unsigned int commutative_op = -1U;
    bool first = stmt_num == 0;
@@ -660,6 +662,19 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap, oprnd_info = (*oprnds_info)[i]; + if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+       {
+         gcc_assert (number_of_oprnds == 1);
+         if (!is_a <loop_vec_info> (vinfo)
+             || !vect_check_gather_scatter (stmt_info,
+                                            as_a <loop_vec_info> (vinfo),
+                                            first ? &oprnd_info->first_gs_info
+                                            : &gs_info))
+           return -1;
+
+         oprnd = first ? oprnd_info->first_gs_info.offset : gs_info.offset;
+       }
+
        stmt_vec_info def_stmt_info;
        if (!vect_is_simple_use (oprnd, vinfo, &dts[i], &def_stmt_info))
        {
@@ -792,6 +807,25 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned 
char swap,
          return 1;
        }
+ if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+       {
+         if (!operand_equal_p (oprnd_info->first_gs_info.base,
+                               gs_info.base))
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                "Build SLP failed: different gather base\n");
+             return 1;
+           }
+         if (oprnd_info->first_gs_info.scale != gs_info.scale)
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                "Build SLP failed: different gather scale\n");
+             return 1;
+           }
+       }
+
        /* Not first stmt of the group, check that the def-stmt/s match
         the def-stmt/s of the first stmt.  Allow different definition
         types for reduction chains: the first stmt must be a
@@ -1235,6 +1269,9 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char 
*swap,
                        || rhs_code == INDIRECT_REF
                        || rhs_code == COMPONENT_REF
                        || rhs_code == MEM_REF)))
+             || (ldst_p
+                 && (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
+                     != STMT_VINFO_GATHER_SCATTER_P (first_stmt_info)))
              || first_stmt_ldst_p != ldst_p
              || first_stmt_phi_p != phi_p)
            {
@@ -1357,12 +1394,12 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char 
*swap,
          if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))
              && rhs_code != CFN_GATHER_LOAD
              && rhs_code != CFN_MASK_GATHER_LOAD
+             && !STMT_VINFO_GATHER_SCATTER_P (stmt_info)
              /* Not grouped loads are handled as externals for BB
                 vectorization.  For loop vectorization we can handle
                 splats the same we handle single element interleaving.  */
              && (is_a <bb_vec_info> (vinfo)
-                 || stmt_info != first_stmt_info
-                 || STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
+                 || stmt_info != first_stmt_info))
            {
              /* Not grouped load.  */
              if (dump_enabled_p ())
@@ -1858,6 +1895,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
        gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD)
                    || gimple_call_internal_p (stmt, IFN_GATHER_LOAD)
                    || gimple_call_internal_p (stmt, IFN_MASK_GATHER_LOAD));
+      else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+       gcc_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
        else
        {
          *max_nunits = this_max_nunits;

--
Best,
Lehua (RiVAI)
lehua.d...@rivai.ai

Reply via email to