This avoids confusing the backends. Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
Richard. * tree-vect-slp.cc (vectorizable_bb_reduc_epilogue): Do not cost zero remaining scalar stmts. (vectorizable_slp_permutation): Do not cost zero actual permutations. * tree-vect-stmts.cc (vectorizable_load): Likewise. --- gcc/tree-vect-slp.cc | 7 ++++--- gcc/tree-vect-stmts.cc | 5 +++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 6258a8eb53d..59bca1dfce7 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -8701,8 +8701,9 @@ vectorizable_bb_reduc_epilogue (slp_instance instance, /* Since we replace all stmts of a possibly longer scalar reduction chain account for the extra scalar stmts for that. */ - record_stmt_cost (cost_vec, instance->remain_defs.length (), scalar_stmt, - instance->root_stmts[0], 0, vect_body); + if (!instance->remain_defs.is_empty ()) + record_stmt_cost (cost_vec, instance->remain_defs.length (), scalar_stmt, + instance->root_stmts[0], 0, vect_body); return true; } @@ -11370,7 +11371,7 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi, if (nperms < 0) return false; - if (!gsi) + if (!gsi && nperms != 0) record_stmt_cost (cost_vec, nperms, vec_perm, node, vectype, 0, vect_body); return true; diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index edc669ba3e4..9fcc2fd0849 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -11387,8 +11387,9 @@ vectorizable_load (vec_info *vinfo, { vect_transform_slp_perm_load (vinfo, slp_node, vNULL, nullptr, vf, true, &n_perms, nullptr); - inside_cost = record_stmt_cost (cost_vec, n_perms, vec_perm, - slp_node, 0, vect_body); + if (n_perms != 0) + inside_cost = record_stmt_cost (cost_vec, n_perms, vec_perm, + slp_node, 0, vect_body); } else { -- 2.51.0