https://gcc.gnu.org/g:bcccc3221b838ee7ae7848e7194603acb18294b3
commit r15-4235-gbcccc3221b838ee7ae7848e7194603acb18294b3 Author: Richard Biener <rguent...@suse.de> Date: Wed Oct 9 15:31:59 2024 +0200 Allow SLP store of mixed external and constant vect_build_slp_tree_1 rejected this during SLP discovery because it ran into the rhs code comparison code for stores. The following skips that completely for loads and stores as those are handled later anyway. This needs a heuristic adjustment in vect_get_and_check_slp_defs to avoid fallout with regard to BB vectorization and splitting of a store group vs. demoting one operand to external. gcc.dg/Wstringop-overflow-47.c needs adjustment given we now have vast improvements for code generation. gcc.dg/strlenopt-32.c needs adjustment because the strlen pass doesn't handle _11 = {0, b_6(D)}; __builtin_memcpy (&a, "foo.bar", 8); MEM <vector(2) char> [(char *)&a + 3B] = _11; _9 = strlen (&a); I have opened PR117057 for this. * tree-vect-slp.cc (vect_build_slp_tree_1): Do not compare RHS codes for loads or stores. (vect_get_and_check_slp_defs): Only demote operand to external in case there is more than one operand. * gcc.dg/vect/slp-57.c: New testcase. * gcc.dg/Wstringop-overflow-47.c: Adjust. * gcc.dg/strlenopt-32.c: XFAIL parts. Diff: --- gcc/testsuite/gcc.dg/Wstringop-overflow-47.c | 6 +++--- gcc/testsuite/gcc.dg/strlenopt-32.c | 3 ++- gcc/testsuite/gcc.dg/vect/slp-57.c | 14 ++++++++++++++ gcc/tree-vect-slp.cc | 24 +++++++----------------- 4 files changed, 26 insertions(+), 21 deletions(-) diff --git a/gcc/testsuite/gcc.dg/Wstringop-overflow-47.c b/gcc/testsuite/gcc.dg/Wstringop-overflow-47.c index 9fb78e55046f..aa5402a060f3 100644 --- a/gcc/testsuite/gcc.dg/Wstringop-overflow-47.c +++ b/gcc/testsuite/gcc.dg/Wstringop-overflow-47.c @@ -31,15 +31,15 @@ void nowarn_c32 (char c) void warn_c32 (char c) { - extern char warn_a32[32]; // { dg-message "at offset (32|1) into destination object 'warn_a32' of size 32" "pr97027" } + extern char warn_a32[32]; // { dg-message "at offset (32|1|17) into destination object 'warn_a32' of size 32" "pr97027" } void *p = warn_a32 + 1; - *(C32*)p = (C32){ c }; // { dg-warning "writing (1 byte|32 bytes) into a region of size (0|31)" "pr97027" } + *(C32*)p = (C32){ c }; // { dg-warning "writing (1 byte|16 bytes|32 bytes) into a region of size (0|15|31)" "pr97027" } /* Verify a local variable too. */ char a32[32]; p = a32 + 1; - *(C32*)p = (C32){ c }; // { dg-warning "writing (1 byte|32 bytes) into a region of size (0|31)" "pr97027" } + *(C32*)p = (C32){ c }; // { dg-warning "writing (1 byte|16 bytes|32 bytes) into a region of size (0|15|31)" "pr97027" } sink (p); } diff --git a/gcc/testsuite/gcc.dg/strlenopt-32.c b/gcc/testsuite/gcc.dg/strlenopt-32.c index 4220314fb3f0..c53168570fdb 100644 --- a/gcc/testsuite/gcc.dg/strlenopt-32.c +++ b/gcc/testsuite/gcc.dg/strlenopt-32.c @@ -190,4 +190,5 @@ main () return 0; } -/* { dg-final { scan-tree-dump-times "strlen \\(" 0 "strlen1" } } */ +/* { dg-final { scan-tree-dump-times "strlen \\(" 0 "strlen1" { xfail vect_slp_v2qi_store_unalign } } } */ +/* { dg-final { scan-tree-dump-times "strlen \\(" 2 "strlen1" { target vect_slp_v2qi_store_unalign } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/slp-57.c b/gcc/testsuite/gcc.dg/vect/slp-57.c new file mode 100644 index 000000000000..a35c4ef62030 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/slp-57.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ + +int a[1024]; +void foo (int x) +{ + for (int i = 0; i < 1024; i += 2) + { + a[i] = x; + a[i+1] = 1; + } +} + +/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */ diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 9bb765e2cbac..8b53b0fdb16d 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -905,7 +905,8 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap, } if (is_a <bb_vec_info> (vinfo) - && !oprnd_info->any_pattern) + && !oprnd_info->any_pattern + && number_of_oprnds > 1) { /* Now for commutative ops we should see whether we can make the other operand matching. */ @@ -1305,10 +1306,12 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, /* Mismatch. */ continue; } - if (first_stmt_code != rhs_code + if (!ldst_p + && first_stmt_code != rhs_code && alt_stmt_code == ERROR_MARK) alt_stmt_code = rhs_code; - if ((first_stmt_code != rhs_code + if ((!ldst_p + && first_stmt_code != rhs_code && (first_stmt_code != IMAGPART_EXPR || rhs_code != REALPART_EXPR) && (first_stmt_code != REALPART_EXPR @@ -1325,20 +1328,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, && (TREE_CODE_CLASS (tree_code (first_stmt_code)) == tcc_comparison) && (swap_tree_comparison (tree_code (first_stmt_code)) - == tree_code (rhs_code))) - && !(STMT_VINFO_GROUPED_ACCESS (stmt_info) - && (first_stmt_code == ARRAY_REF - || first_stmt_code == BIT_FIELD_REF - || first_stmt_code == COMPONENT_REF - || first_stmt_code == REALPART_EXPR - || first_stmt_code == IMAGPART_EXPR - || first_stmt_code == MEM_REF) - && (rhs_code == ARRAY_REF - || rhs_code == BIT_FIELD_REF - || rhs_code == COMPONENT_REF - || rhs_code == REALPART_EXPR - || rhs_code == IMAGPART_EXPR - || rhs_code == MEM_REF))) + == tree_code (rhs_code)))) || (ldst_p && (STMT_VINFO_GROUPED_ACCESS (stmt_info) != STMT_VINFO_GROUPED_ACCESS (first_stmt_info)))