The following enables single-lane loop SLP discovery for non-grouped stores
and adjusts vectorizable_store to properly handle those.
For gfortran.dg/vect/vect-8.f90 we vectorize one additional loop,
not running into the "not falling back to strided accesses" bail-out.
I have not investigated in detail. Similar for gcc.dg/vect/slp-19c.c.
The gcc.dg/vect/O3-pr39675-2.c and gcc.dg/vect/slp-19[abc].c SLPs
depend on the load permute lowering as the single-lane store we
now want to handle is fed from a single lane from groups of size four.
I've updated the expected number of SLPs but they FAIL.
For gfortran.dg/vect/fast-math-mgrid-resid.f predictive commoning
now unrolls the loop, the vectorization factor is the same. I think
association during SLP build might be the reason for the difference.
There is a set of i386 target assembler test FAILs,
gcc.target/i386/pr88531-2[bc].c in particular fail because the
target cannot identify SLP emulated gathers, see another mail from me.
Others need adjustment, I've adjusted one with this patch only.
I'm probably delaying this a bit until the load permute lowering
is good enough for pushing.
* tree-vect-slp.cc (vect_analyze_slp): Perform single-lane
loop SLP discovery for non-grouped stores.
* tree-vect-stmts.cc (vectorizable_store): Always set
vec_num for SLP.
* gcc.dg/vect/O3-pr39675-2.c: Adjust expected number of SLP.
* gcc.dg/vect/fast-math-vect-call-1.c: Likewise.
* gcc.dg/vect/no-scevccp-slp-31.c: Likewise.
* gcc.dg/vect/slp-12b.c: Likewise.
* gcc.dg/vect/slp-12c.c: Likewise.
* gcc.dg/vect/slp-19a.c: Likewise.
* gcc.dg/vect/slp-19b.c: Likewise.
* gcc.dg/vect/slp-19c.c: Likewise.
* gcc.dg/vect/slp-4-big-array.c: Likewise.
* gcc.dg/vect/slp-4.c: Likewise.
* gcc.dg/vect/slp-5.c: Likewise.
* gcc.dg/vect/slp-7.c: Likewise.
* gcc.dg/vect/slp-perm-7.c: Likewise.
* gcc.dg/vect/slp-37.c: Likewise.
* gcc.dg/vect/vect-outer-slp-3.c: Disable vectorization of
initialization loop.
* gcc.dg/vect/slp-reduc-5.c: Likewise.
* gcc.dg/vect/no-scevccp-outer-12.c: Un-XFAIL. SLP can handle
inner loop inductions with multiple vector stmt copies.
* gfortran.dg/vect/vect-8.f90: Adjust expected number of
vectorized loops.
* gfortran.dg/vect/fast-math-mgrid-resid.f: Expect predictive
commoning with unrolling.
* gcc.target/i386/vectorize1.c: Adjust what we scan for.
---
gcc/testsuite/gcc.dg/vect/O3-pr39675-2.c | 2 +-
.../gcc.dg/vect/fast-math-vect-call-1.c | 2 +-
.../gcc.dg/vect/no-scevccp-outer-12.c | 3 +--
gcc/testsuite/gcc.dg/vect/no-scevccp-slp-31.c | 5 ++--
gcc/testsuite/gcc.dg/vect/slp-12b.c | 2 +-
gcc/testsuite/gcc.dg/vect/slp-12c.c | 2 +-
gcc/testsuite/gcc.dg/vect/slp-19a.c | 2 +-
gcc/testsuite/gcc.dg/vect/slp-19b.c | 2 +-
gcc/testsuite/gcc.dg/vect/slp-19c.c | 4 ++--
gcc/testsuite/gcc.dg/vect/slp-37.c | 2 +-
gcc/testsuite/gcc.dg/vect/slp-4-big-array.c | 2 +-
gcc/testsuite/gcc.dg/vect/slp-4.c | 2 +-
gcc/testsuite/gcc.dg/vect/slp-5.c | 2 +-
gcc/testsuite/gcc.dg/vect/slp-7.c | 4 ++--
gcc/testsuite/gcc.dg/vect/slp-perm-7.c | 4 ++--
gcc/testsuite/gcc.dg/vect/slp-reduc-5.c | 3 ++-
gcc/testsuite/gcc.dg/vect/vect-outer-slp-3.c | 1 +
gcc/testsuite/gcc.target/i386/vectorize1.c | 4 ++--
.../gfortran.dg/vect/fast-math-mgrid-resid.f | 2 +-
gcc/testsuite/gfortran.dg/vect/vect-8.f90 | 2 +-
gcc/tree-vect-slp.cc | 23 +++++++++++++++++++
gcc/tree-vect-stmts.cc | 11 +++++----
22 files changed, 57 insertions(+), 29 deletions(-)
diff --git a/gcc/testsuite/gcc.dg/vect/O3-pr39675-2.c
b/gcc/testsuite/gcc.dg/vect/O3-pr39675-2.c
index c3f0f6dc1be..ddaac56cc0b 100644
--- a/gcc/testsuite/gcc.dg/vect/O3-pr39675-2.c
+++ b/gcc/testsuite/gcc.dg/vect/O3-pr39675-2.c
@@ -27,5 +27,5 @@ foo ()
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target
vect_strided4 } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" {
target vect_strided4 } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {
target vect_strided4 } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c
b/gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c
index ad22f6e82b3..6c9b7c37b6e 100644
--- a/gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c
+++ b/gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c
@@ -101,4 +101,4 @@ main ()
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" { target {
vect_call_copysignf && vect_call_sqrtf } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {
target { { vect_call_copysignf && vect_call_sqrtf } && vect_perm3_int } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" {
target { { vect_call_copysignf && vect_call_sqrtf } && vect_perm3_int } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c
b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c
index c2d3031bc0c..6ace6ad022e 100644
--- a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c
+++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c
@@ -46,5 +46,4 @@ int main (void)
return 0;
}
-/* Until we support multiple types in the inner loop */
-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail
{ ! { aarch64*-*-* riscv*-*-* } } } } } */
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-slp-31.c
b/gcc/testsuite/gcc.dg/vect/no-scevccp-slp-31.c
index 22817a57ef8..f6ac5f60298 100644
--- a/gcc/testsuite/gcc.dg/vect/no-scevccp-slp-31.c
+++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-slp-31.c
@@ -53,6 +53,7 @@ int main (void)
return 0;
}
+/* We cannot handle grouped accesses in outer loops. */
+/* { dg-final { scan-tree-dump-not "OUTER LOOP VECTORIZED" "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" }
} */
-
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" }
} */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-12b.c
b/gcc/testsuite/gcc.dg/vect/slp-12b.c
index e2ea24d6c53..8e06e3bfa93 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-12b.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-12b.c
@@ -47,6 +47,6 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target {
vect_strided2 && vect_int_mult } } } } */
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target {
! { vect_strided2 && vect_int_mult } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" {
target { vect_strided2 && vect_int_mult } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {
target { vect_strided2 && vect_int_mult } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" {
target { ! { vect_strided2 && vect_int_mult } } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-12c.c
b/gcc/testsuite/gcc.dg/vect/slp-12c.c
index 9c48dff3bf4..a3536e3053b 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-12c.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-12c.c
@@ -49,5 +49,5 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target {
vect_int_mult } } } } */
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target {
! vect_int_mult } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" {
target vect_int_mult } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {
target vect_int_mult } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" {
target { ! vect_int_mult } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-19a.c
b/gcc/testsuite/gcc.dg/vect/slp-19a.c
index ca7a0a8e456..6c21416046d 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-19a.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-19a.c
@@ -57,5 +57,5 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target
vect_strided8 } } } */
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target {
! vect_strided8 } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" {
target vect_strided8 } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {
target vect_strided8 } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" {
target { ! vect_strided8} } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-19b.c
b/gcc/testsuite/gcc.dg/vect/slp-19b.c
index 4d53ac698db..10b84aab3b5 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-19b.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-19b.c
@@ -54,5 +54,5 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target
vect_strided4 } } } */
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target {
! vect_strided4 } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" {
target vect_strided4 } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {
target vect_strided4 } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" {
target { ! vect_strided4 } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-19c.c
b/gcc/testsuite/gcc.dg/vect/slp-19c.c
index 188ab37a0b6..84869cadc89 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-19c.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-19c.c
@@ -105,5 +105,5 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" }
} */
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" }
} */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-37.c
b/gcc/testsuite/gcc.dg/vect/slp-37.c
index caee2bb508f..8a430e63847 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-37.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-37.c
@@ -60,4 +60,4 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target
vect_hw_misalign } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" {
target vect_hw_misalign } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {
target vect_hw_misalign } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-4-big-array.c
b/gcc/testsuite/gcc.dg/vect/slp-4-big-array.c
index fcda45ff368..f738a613324 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-4-big-array.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-4-big-array.c
@@ -131,5 +131,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" }
} */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 6 "vect" }
} */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-4.c
b/gcc/testsuite/gcc.dg/vect/slp-4.c
index 29e741df02b..1ecad7415ef 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-4.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-4.c
@@ -125,5 +125,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" }
} */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 6 "vect" }
} */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-5.c
b/gcc/testsuite/gcc.dg/vect/slp-5.c
index 6d51f6a7323..484898c2afd 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-5.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-5.c
@@ -124,5 +124,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" }
} */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 5 "vect" }
} */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-7.c
b/gcc/testsuite/gcc.dg/vect/slp-7.c
index 2845a99dedf..f83fdc96d16 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-7.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-7.c
@@ -125,6 +125,6 @@ int main (void)
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target
vect_short_mult } } }*/
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target {
! { vect_short_mult } } } } }*/
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" {
target vect_short_mult } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {
target { ! { vect_short_mult } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 5 "vect" {
target vect_short_mult } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" {
target { ! { vect_short_mult } } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-7.c
b/gcc/testsuite/gcc.dg/vect/slp-perm-7.c
index df13c37bc75..c3d903e5b11 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-perm-7.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-perm-7.c
@@ -97,8 +97,8 @@ int main (int argc, const char* argv[])
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target
vect_perm } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" {
target { vect_perm3_int && { ! vect_load_lanes } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" {
target vect_load_lanes } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" {
target { vect_perm3_int && { ! vect_load_lanes } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" {
target vect_load_lanes } } } */
/* { dg-final { scan-tree-dump "Built SLP cancelled: can use load/store-lanes"
"vect" { target { vect_perm3_int && vect_load_lanes } } } } */
/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes }
} } */
/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes }
} } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-5.c
b/gcc/testsuite/gcc.dg/vect/slp-reduc-5.c
index 11f5a7414cf..0cde79d9e49 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-reduc-5.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-5.c
@@ -36,6 +36,7 @@ int main (void)
check_vect ();
+#pragma GCC novector
for (i = 0; i < N; i++)
c[i] = (i+3) * -1;
@@ -44,6 +45,6 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail
vect_no_int_min_max } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail
vect_no_int_min_max } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" {
xfail vect_no_int_min_max } } } */
/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 0 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-slp-3.c
b/gcc/testsuite/gcc.dg/vect/vect-outer-slp-3.c
index 3dce51426b5..d315db5632b 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-outer-slp-3.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-outer-slp-3.c
@@ -30,6 +30,7 @@ int main ()
{
check_vect ();
+#pragma GCC novector
for (int i = 0; i < 40; ++i)
image[i] = 1.;
diff --git a/gcc/testsuite/gcc.target/i386/vectorize1.c
b/gcc/testsuite/gcc.target/i386/vectorize1.c
index f3b9bfba382..14a8c5f28b3 100644
--- a/gcc/testsuite/gcc.target/i386/vectorize1.c
+++ b/gcc/testsuite/gcc.target/i386/vectorize1.c
@@ -1,6 +1,6 @@
/* PR middle-end/28915 */
/* { dg-do compile } */
-/* { dg-options "-msse -O2 -ftree-vectorize -fdump-tree-vect" } */
+/* { dg-options "-msse -O2 -ftree-vectorize -fdump-tree-vect-optimized" } */
extern char lanip[3][40];
typedef struct
@@ -17,4 +17,4 @@ int set_names (void)
tt1.t[ln] = lanip[1];
}
-/* { dg-final { scan-tree-dump "vect_cst" "vect" } } */
+/* { dg-final { scan-tree-dump "optimized: loop vectorized" "vect" } } */
diff --git a/gcc/testsuite/gfortran.dg/vect/fast-math-mgrid-resid.f
b/gcc/testsuite/gfortran.dg/vect/fast-math-mgrid-resid.f
index 2e548748296..9dda5087551 100644
--- a/gcc/testsuite/gfortran.dg/vect/fast-math-mgrid-resid.f
+++ b/gcc/testsuite/gfortran.dg/vect/fast-math-mgrid-resid.f
@@ -43,5 +43,5 @@ C
! vectorized loop. If vector factor is 2, the vectorized loop can
! be predictive commoned, we check if predictive commoning PHI node
! is created with vector(2) type.
-! { dg-final { scan-tree-dump "Executing predictive commoning without
unrolling" "pcom" { xfail vect_variable_length } } }
+! { dg-final { scan-tree-dump "Unrolling 2 times" "pcom" { xfail
vect_variable_length } } }
! { dg-final { scan-tree-dump "vectp_u.*__lsm.* = PHI <.*vectp_u.*__lsm"
"pcom" { xfail vect_variable_length } } }
diff --git a/gcc/testsuite/gfortran.dg/vect/vect-8.f90
b/gcc/testsuite/gfortran.dg/vect/vect-8.f90
index f77ec9fb87a..283c36e0ebe 100644
--- a/gcc/testsuite/gfortran.dg/vect/vect-8.f90
+++ b/gcc/testsuite/gfortran.dg/vect/vect-8.f90
@@ -708,5 +708,5 @@ END SUBROUTINE kernel
! { dg-final { scan-tree-dump-times "vectorized 2\[56\] loops" 1 "vect" {
target aarch64_sve } } }
! { dg-final { scan-tree-dump-times "vectorized 2\[45\] loops" 1 "vect" {
target { aarch64*-*-* && { ! aarch64_sve } } } } }
-! { dg-final { scan-tree-dump-times "vectorized 2\[234\] loops" 1 "vect" {
target { vect_intdouble_cvt && { ! aarch64*-*-* } } } } }
+! { dg-final { scan-tree-dump-times "vectorized 2\[345\] loops" 1 "vect" {
target { vect_intdouble_cvt && { ! aarch64*-*-* } } } } }
! { dg-final { scan-tree-dump-times "vectorized 17 loops" 1 "vect" { target {
{ ! vect_intdouble_cvt } && { ! aarch64*-*-* } } } } }
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 7e3d0107b4e..e1e47b786c2 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -3845,6 +3845,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
opt_result
vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
{
+ loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
unsigned int i;
stmt_vec_info first_element;
slp_instance instance;
@@ -3861,6 +3862,28 @@ vect_analyze_slp (vec_info *vinfo, unsigned
max_tree_size)
vect_analyze_slp_instance (vinfo, bst_map, first_element,
slp_inst_kind_store, max_tree_size, &limit);
+ /* For loops also start SLP discovery from non-grouped stores. */
+ if (loop_vinfo)
+ {
+ data_reference_p dr;
+ FOR_EACH_VEC_ELT (vinfo->shared->datarefs, i, dr)
+ if (DR_IS_WRITE (dr))
+ {
+ stmt_vec_info stmt_info = vinfo->lookup_dr (dr)->stmt;
+ /* Grouped stores are already handled above. */
+ if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
+ continue;
+ vec<stmt_vec_info> stmts;
+ vec<stmt_vec_info> roots = vNULL;
+ vec<tree> remain = vNULL;
+ stmts.create (1);
+ stmts.quick_push (stmt_info);
+ vect_build_slp_instance (vinfo, slp_inst_kind_store,
+ stmts, roots, remain, max_tree_size,
+ &limit, bst_map, NULL);
+ }
+ }
+
if (bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo))
{
for (unsigned i = 0; i < bb_vinfo->roots.length (); ++i)
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 5098b7fab6a..bd7dd149d11 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -8387,10 +8387,12 @@ vectorizable_store (vec_info *vinfo,
return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt,
ncopies);
}
- if (grouped_store)
+ if (grouped_store || slp)
{
/* FORNOW */
- gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
+ gcc_assert (!grouped_store
+ || !loop
+ || !nested_in_vect_loop_p (loop, stmt_info));
if (slp)
{
@@ -8399,8 +8401,9 @@ vectorizable_store (vec_info *vinfo,
group. */
vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
- gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
- == first_stmt_info);
+ gcc_assert (!STMT_VINFO_GROUPED_ACCESS (first_stmt_info)
+ || (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
+ == first_stmt_info));
first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
op = vect_get_store_rhs (first_stmt_info);
}
--
2.35.3