https://gcc.gnu.org/g:c1b21855e131bb818aedc953f403812b494917fc
commit r12-10449-gc1b21855e131bb818aedc953f403812b494917fc Author: Richard Biener <rguent...@suse.de> Date: Mon Mar 18 12:39:03 2024 +0100 tree-optimization/114375 - disallow SLP discovery of permuted mask loads We cannot currently handle permutations of mask loads in code generation or permute optimization. But we simply drop any permutation on the floor, so the following instead rejects the SLP build rather than producing wrong-code. I've also made sure to reject them in vectorizable_load for completeness. PR tree-optimization/114375 * tree-vect-slp.cc (vect_build_slp_tree_2): Compute the load permutation for masked loads but reject it when any such is necessary. * tree-vect-stmts.cc (vectorizable_load): Reject masked VMAT_ELEMENTWISE and VMAT_STRIDED_SLP as those are not supported. * gcc.dg/vect/vect-pr114375.c: New testcase. (cherry picked from commit 4f2a35a76cca503749c696e7772d2e8eadc77ba5) Diff: --- gcc/testsuite/gcc.dg/vect/vect-pr114375.c | 44 +++++++++++++++++++++++++++++++ gcc/tree-vect-slp.cc | 34 +++++++++++++++++++----- gcc/tree-vect-stmts.cc | 8 ++++++ 3 files changed, 79 insertions(+), 7 deletions(-) diff --git a/gcc/testsuite/gcc.dg/vect/vect-pr114375.c b/gcc/testsuite/gcc.dg/vect/vect-pr114375.c new file mode 100644 index 000000000000..1e1cb0123d07 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-pr114375.c @@ -0,0 +1,44 @@ +/* { dg-additional-options "-mavx2" { target avx2_runtime } } */ + +#include "tree-vect.h" + +int a[512]; +int b[512]; +int c[512]; + +void __attribute__((noipa)) +foo(int * __restrict p) +{ + for (int i = 0; i < 64; ++i) + { + int tem = 2, tem2 = 2; + if (a[4*i + 1]) + tem = p[4*i]; + if (a[4*i]) + tem2 = p[4*i + 2]; + b[2*i] = tem2; + b[2*i+1] = tem; + if (a[4*i + 2]) + tem = p[4*i + 1]; + if (a[4*i + 3]) + tem2 = p[4*i + 3]; + c[2*i] = tem2; + c[2*i+1] = tem; + } +} +int main() +{ + check_vect (); + + for (int i = 0; i < 512; ++i) + a[i] = (i >> 1) & 1; + + foo (a); + + if (c[0] != 1 || c[1] != 0 || c[2] != 1 || c[3] != 0 + || b[0] != 2 || b[1] != 2 || b[2] != 2 || b[3] != 2) + abort (); + + return 0; +} + diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index f33e85337abd..26c989cbff9a 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -1722,10 +1722,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))) { - if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt)) - gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD) - || gimple_call_internal_p (stmt, IFN_GATHER_LOAD) - || gimple_call_internal_p (stmt, IFN_MASK_GATHER_LOAD)); + if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) + gcc_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))); else { *max_nunits = this_max_nunits; @@ -1741,15 +1739,37 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, load_permutation.create (group_size); stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node)[0]); + bool any_permute = false; FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info) { int load_place = vect_get_place_in_interleaving_chain (load_info, first_stmt_info); gcc_assert (load_place != -1); - load_permutation.safe_push (load_place); + any_permute |= load_place != j; + load_permutation.quick_push (load_place); + } + + if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt)) + { + gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD) + || gimple_call_internal_p (stmt, IFN_GATHER_LOAD) + || gimple_call_internal_p (stmt, IFN_MASK_GATHER_LOAD)); + load_permutation.release (); + /* We cannot handle permuted masked loads, see PR114375. */ + if (any_permute + || (STMT_VINFO_GROUPED_ACCESS (stmt_info) + && DR_GROUP_SIZE (first_stmt_info) != group_size) + || STMT_VINFO_STRIDED_P (stmt_info)) + { + matches[0] = false; + return NULL; + } + } + else + { + SLP_TREE_LOAD_PERMUTATION (node) = load_permutation; + return node; } - SLP_TREE_LOAD_PERMUTATION (node) = load_permutation; - return node; } } else if (gimple_assign_single_p (stmt_info->stmt) diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 34920041116b..b1ab4bce7d28 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -8994,6 +8994,14 @@ vectorizable_load (vec_info *vinfo, "unsupported masked emulated gather.\n"); return false; } + else if (memory_access_type == VMAT_ELEMENTWISE + || memory_access_type == VMAT_STRIDED_SLP) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "unsupported masked strided access.\n"); + return false; + } } if (!vec_stmt) /* transformation not required. */