https://gcc.gnu.org/g:664e0ce580a8f20a78aa355c42222e4647841f77
commit r15-3735-g664e0ce580a8f20a78aa355c42222e4647841f77 Author: Richard Biener <rguent...@suse.de> Date: Fri Sep 20 12:17:22 2024 +0200 Fall back to elementwise access for too spaced SLP single element interleaving gcc.dg/vect/vect-pr111779.c is a case where non-SLP manages to vectorize using VMAT_ELEMENTWISE but SLP currently refuses because doing a regular access with permutes would cause excess vector loads with at most one element used. The following makes us fall back to elementwise accesses for that, too. * tree-vect-stmts.cc (get_group_load_store_type): Fall back to VMAT_ELEMENTWISE when single element interleaving of a too large group. (vectorizable_load): Do not try to verify load permutations when using VMAT_ELEMENTWISE for single-lane SLP and fix code generation for this case. * gfortran.dg/vect/vect-8.f90: Allow one more vectorized loop. Diff: --- gcc/testsuite/gfortran.dg/vect/vect-8.f90 | 2 +- gcc/tree-vect-stmts.cc | 37 ++++++++++++++++++------------- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/gcc/testsuite/gfortran.dg/vect/vect-8.f90 b/gcc/testsuite/gfortran.dg/vect/vect-8.f90 index 2a3fa90740e3..918eddee292f 100644 --- a/gcc/testsuite/gfortran.dg/vect/vect-8.f90 +++ b/gcc/testsuite/gfortran.dg/vect/vect-8.f90 @@ -708,5 +708,5 @@ END SUBROUTINE kernel ! { dg-final { scan-tree-dump-times "vectorized 2\[56\] loops" 1 "vect" { target aarch64_sve } } } ! { dg-final { scan-tree-dump-times "vectorized 2\[45\] loops" 1 "vect" { target { aarch64*-*-* && { ! aarch64_sve } } } } } -! { dg-final { scan-tree-dump-times "vectorized 2\[345\] loops" 1 "vect" { target { vect_intdouble_cvt && { ! aarch64*-*-* } } } } } +! { dg-final { scan-tree-dump-times "vectorized 2\[3456\] loops" 1 "vect" { target { vect_intdouble_cvt && { ! aarch64*-*-* } } } } } ! { dg-final { scan-tree-dump-times "vectorized 17 loops" 1 "vect" { target { { ! vect_intdouble_cvt } && { ! aarch64*-*-* } } } } } diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 33cdccae7849..45003f762ddf 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -2190,11 +2190,12 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, && single_element_p && maybe_gt (group_size, TYPE_VECTOR_SUBPARTS (vectype))) { + *memory_access_type = VMAT_ELEMENTWISE; if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "single-element interleaving not supported " - "for not adjacent vector loads\n"); - return false; + "for not adjacent vector loads, using " + "elementwise access\n"); } } } @@ -10039,7 +10040,23 @@ vectorizable_load (vec_info *vinfo, else group_size = 1; - if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()) + vect_memory_access_type memory_access_type; + enum dr_alignment_support alignment_support_scheme; + int misalignment; + poly_int64 poffset; + internal_fn lanes_ifn; + if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD, + ncopies, &memory_access_type, &poffset, + &alignment_support_scheme, &misalignment, &gs_info, + &lanes_ifn)) + return false; + + /* ??? The following checks should really be part of + get_group_load_store_type. */ + if (slp + && SLP_TREE_LOAD_PERMUTATION (slp_node).exists () + && !(memory_access_type == VMAT_ELEMENTWISE + && SLP_TREE_LANES (slp_node) == 1)) { slp_perm = true; @@ -10079,17 +10096,6 @@ vectorizable_load (vec_info *vinfo, } } - vect_memory_access_type memory_access_type; - enum dr_alignment_support alignment_support_scheme; - int misalignment; - poly_int64 poffset; - internal_fn lanes_ifn; - if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD, - ncopies, &memory_access_type, &poffset, - &alignment_support_scheme, &misalignment, &gs_info, - &lanes_ifn)) - return false; - if (slp_node && slp_node->ldst_lanes && memory_access_type != VMAT_LOAD_STORE_LANES) @@ -10292,7 +10298,8 @@ vectorizable_load (vec_info *vinfo, first_dr_info = dr_info; } - if (slp && grouped_load) + if (slp && grouped_load + && memory_access_type == VMAT_STRIDED_SLP) { group_size = DR_GROUP_SIZE (first_stmt_info); ref_type = get_group_alias_ptr_type (first_stmt_info);