https://gcc.gnu.org/g:36b9c5e6f3301d3d0165f578d020dcd350cd516d
commit r15-4246-g36b9c5e6f3301d3d0165f578d020dcd350cd516d Author: Richard Biener <rguent...@suse.de> Date: Thu Oct 10 14:00:11 2024 +0200 Fix possible wrong-code with masked store-lanes When we're doing masked store-lanes one mask element applies to all loads of one struct element. This requires uniform masks for all of the SLP lanes, something we already compute into STMT_VINFO_SLP_VECT_ONLY but fail to check when doing SLP store-lanes. The following corrects this. The following also adjusts the store-lane heuristic to properly check for masked or non-masked optab support. * tree-vect-slp.cc (vect_slp_prefer_store_lanes_p): Allow passing in of vectype, pass in whether the stores are masked and query the correct optab. (vect_build_slp_instance): Guard store-lanes query with ! STMT_VINFO_SLP_VECT_ONLY, guaranteeing an uniform mask. Diff: --- gcc/tree-vect-slp.cc | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 96f1992cfbff..3024b87a1f83 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -3504,17 +3504,22 @@ vect_match_slp_patterns (slp_instance instance, vec_info *vinfo, } /* STMT_INFO is a store group of size GROUP_SIZE that we are considering - splitting into two, with the first split group having size NEW_GROUP_SIZE. + vectorizing with VECTYPE that might be NULL. MASKED_P indicates whether + the stores are masked. Return true if we could use IFN_STORE_LANES instead and if that appears to be the better approach. */ static bool vect_slp_prefer_store_lanes_p (vec_info *vinfo, stmt_vec_info stmt_info, + tree vectype, bool masked_p, unsigned int group_size, unsigned int new_group_size) { - tree scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info))); - tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type); + if (!vectype) + { + tree scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info))); + vectype = get_vectype_for_scalar_type (vinfo, scalar_type); + } if (!vectype) return false; /* Allow the split if one of the two new groups would operate on full @@ -3528,7 +3533,7 @@ vect_slp_prefer_store_lanes_p (vec_info *vinfo, stmt_vec_info stmt_info, if (multiple_p (group_size - new_group_size, TYPE_VECTOR_SUBPARTS (vectype)) || multiple_p (new_group_size, TYPE_VECTOR_SUBPARTS (vectype))) return false; - return vect_store_lanes_supported (vectype, group_size, false) != IFN_LAST; + return vect_store_lanes_supported (vectype, group_size, masked_p) != IFN_LAST; } /* Analyze an SLP instance starting from a group of grouped stores. Call @@ -3973,6 +3978,10 @@ vect_build_slp_instance (vec_info *vinfo, else if (is_a <loop_vec_info> (vinfo) && (group_size != 1 && i < group_size)) { + gcall *call = dyn_cast <gcall *> (stmt_info->stmt); + bool masked_p = call + && gimple_call_internal_p (call) + && internal_fn_mask_index (gimple_call_internal_fn (call)) != -1; /* There are targets that cannot do even/odd interleaving schemes so they absolutely need to use load/store-lanes. For now force single-lane SLP for them - they would be happy with @@ -3987,9 +3996,10 @@ vect_build_slp_instance (vec_info *vinfo, bool want_store_lanes = (! STMT_VINFO_GATHER_SCATTER_P (stmt_info) && ! STMT_VINFO_STRIDED_P (stmt_info) + && ! STMT_VINFO_SLP_VECT_ONLY (stmt_info) && compare_step_with_zero (vinfo, stmt_info) > 0 - && vect_slp_prefer_store_lanes_p (vinfo, stmt_info, - group_size, 1)); + && vect_slp_prefer_store_lanes_p (vinfo, stmt_info, NULL_TREE, + masked_p, group_size, 1)); if (want_store_lanes || force_single_lane) i = 1; @@ -4074,14 +4084,14 @@ vect_build_slp_instance (vec_info *vinfo, /* Now re-assess whether we want store lanes in case the discovery ended up producing all single-lane RHSs. */ - if (rhs_common_nlanes == 1 + if (! want_store_lanes + && rhs_common_nlanes == 1 && ! STMT_VINFO_GATHER_SCATTER_P (stmt_info) && ! STMT_VINFO_STRIDED_P (stmt_info) + && ! STMT_VINFO_SLP_VECT_ONLY (stmt_info) && compare_step_with_zero (vinfo, stmt_info) > 0 && (vect_store_lanes_supported (SLP_TREE_VECTYPE (rhs_nodes[0]), - group_size, - SLP_TREE_CHILDREN - (rhs_nodes[0]).length () != 1) + group_size, masked_p) != IFN_LAST)) want_store_lanes = true;