On Mon, Jan 4, 2021 at 12:50 PM Richard Sandiford via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > The IFN_MASK* functions take two leading arguments: a load or > store pointer and a “cookie”. The type of the cookie is the > type of the access for TBAA purposes (like for MEM_REFs) > while the value of the cookie is the alignment of the access. > This PR was caused by a disagreement about whether the alignment > is measured in bits or bytes. > > It looks like this goes back to PR68786, which made the > vectoriser create its own cookie argument rather than reusing > the one created by ifcvt. The alignment value of the new cookie > was measured in bytes (as needed by set_ptr_info_alignment) > while the existing code expected it to be measured in bits. > The folds I added for IFN_MASK_LOAD and STORE then made > things worse. > > Tested on aarch64-linux-gnu, aarch64_be-elf and x86_64-linux-gnu. > OK for the explow bits?
OK. Richard. > Richard > > > gcc/ > PR tree-vectorization/95401 > * config/aarch64/aarch64-sve-builtins.cc > (gimple_folder::load_store_cookie): Use bits rather than bytes > for the alignment argument to IFN_MASK_LOAD and IFN_MASK_STORE. > * gimple-fold.c (gimple_fold_mask_load_store_mem_ref): Likewise. > * tree-vect-stmts.c (vectorizable_store): Likewise. > (vectorizable_load): Likewise. > > gcc/testsuite/ > PR tree-vectorization/95401 > * g++.dg/vect/pr95401.cc: New test. > * g++.dg/vect/pr95401a.cc: Likewise. > --- > gcc/config/aarch64/aarch64-sve-builtins.cc | 2 +- > gcc/gimple-fold.c | 2 +- > gcc/testsuite/g++.dg/vect/pr95401.cc | 13 +++++++++++++ > gcc/testsuite/g++.dg/vect/pr95401a.cc | 13 +++++++++++++ > gcc/tree-vect-stmts.c | 14 ++++++++------ > 5 files changed, 36 insertions(+), 8 deletions(-) > create mode 100644 gcc/testsuite/g++.dg/vect/pr95401.cc > create mode 100644 gcc/testsuite/g++.dg/vect/pr95401a.cc > > diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc > b/gcc/config/aarch64/aarch64-sve-builtins.cc > index e73aa9ad8a9..6589438855a 100644 > --- a/gcc/config/aarch64/aarch64-sve-builtins.cc > +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc > @@ -2580,7 +2580,7 @@ gimple_folder::fold_contiguous_base (gimple_seq &stmts, > tree vectype) > tree > gimple_folder::load_store_cookie (tree type) > { > - return build_int_cst (build_pointer_type (type), TYPE_ALIGN_UNIT (type)); > + return build_int_cst (build_pointer_type (type), TYPE_ALIGN (type)); > } > > /* Fold the call to a call to INSTANCE, with the same arguments. */ > diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c > index 3148c6b84d9..1f3b7d0818d 100644 > --- a/gcc/gimple-fold.c > +++ b/gcc/gimple-fold.c > @@ -5201,7 +5201,7 @@ gimple_fold_mask_load_store_mem_ref (gcall *call, tree > vectype) > if (!tree_fits_uhwi_p (alias_align) || !integer_all_onesp (mask)) > return NULL_TREE; > > - unsigned HOST_WIDE_INT align = tree_to_uhwi (alias_align) * BITS_PER_UNIT; > + unsigned HOST_WIDE_INT align = tree_to_uhwi (alias_align); > if (TYPE_ALIGN (vectype) != align) > vectype = build_aligned_type (vectype, align); > tree offset = build_zero_cst (TREE_TYPE (alias_align)); > diff --git a/gcc/testsuite/g++.dg/vect/pr95401.cc > b/gcc/testsuite/g++.dg/vect/pr95401.cc > new file mode 100644 > index 00000000000..6a56dab0957 > --- /dev/null > +++ b/gcc/testsuite/g++.dg/vect/pr95401.cc > @@ -0,0 +1,13 @@ > +// { dg-additional-options "-mavx2 -O3" { target avx2_runtime } } > +// { dg-additional-sources pr95401a.cc } > + > +extern int var_9; > +extern unsigned var_14; > +extern int arr_16[]; > +#include <algorithm> > +void test() { > + for (short a = 0; a < (short)var_9; a += 12140) > + for (short b = 0; b < 8; b++) > + if (std::max(var_14, 1U)) > + arr_16[a + b] = 0; > +} > diff --git a/gcc/testsuite/g++.dg/vect/pr95401a.cc > b/gcc/testsuite/g++.dg/vect/pr95401a.cc > new file mode 100644 > index 00000000000..71b054c7621 > --- /dev/null > +++ b/gcc/testsuite/g++.dg/vect/pr95401a.cc > @@ -0,0 +1,13 @@ > +// { dg-do compile } > + > +#include "../../gcc.dg/vect/tree-vect.h" > + > +int var_9 = 1693986256, var_14; > +int arr_16[11]; > +void test(); > +int main() > +{ > + check_vect(); > + test(); > + return 0; > +} > diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c > index 11737a38a56..90822aba0ee 100644 > --- a/gcc/tree-vect-stmts.c > +++ b/gcc/tree-vect-stmts.c > @@ -7964,7 +7964,7 @@ vectorizable_store (vec_info *vinfo, > /* Emit: > MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK, > VEC_ARRAY). */ > - unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype)); > + unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype)); > tree alias_ptr = build_int_cst (ref_type, align); > call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4, > dataref_ptr, alias_ptr, > @@ -8079,7 +8079,7 @@ vectorizable_store (vec_info *vinfo, > if (final_mask) > { > align = least_bit_hwi (misalign | align); > - tree ptr = build_int_cst (ref_type, align); > + tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT); > gcall *call > = gimple_build_call_internal (IFN_MASK_STORE, 4, > dataref_ptr, ptr, > @@ -8094,7 +8094,7 @@ vectorizable_store (vec_info *vinfo, > = vect_get_loop_len (loop_vinfo, loop_lens, > vec_num * ncopies, vec_num * j + i); > align = least_bit_hwi (misalign | align); > - tree ptr = build_int_cst (ref_type, align); > + tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT); > machine_mode vmode = TYPE_MODE (vectype); > opt_machine_mode new_ovmode > = get_len_load_store_mode (vmode, false); > @@ -9235,7 +9235,7 @@ vectorizable_load (vec_info *vinfo, > /* Emit: > VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR, > VEC_MASK). */ > - unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype)); > + unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype)); > tree alias_ptr = build_int_cst (ref_type, align); > call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3, > dataref_ptr, alias_ptr, > @@ -9336,7 +9336,8 @@ vectorizable_load (vec_info *vinfo, > if (final_mask) > { > align = least_bit_hwi (misalign | align); > - tree ptr = build_int_cst (ref_type, align); > + tree ptr = build_int_cst (ref_type, > + align * BITS_PER_UNIT); > gcall *call > = gimple_build_call_internal (IFN_MASK_LOAD, 3, > dataref_ptr, ptr, > @@ -9352,7 +9353,8 @@ vectorizable_load (vec_info *vinfo, > vec_num * ncopies, > vec_num * j + i); > align = least_bit_hwi (misalign | align); > - tree ptr = build_int_cst (ref_type, align); > + tree ptr = build_int_cst (ref_type, > + align * BITS_PER_UNIT); > gcall *call > = gimple_build_call_internal (IFN_LEN_LOAD, 3, > dataref_ptr, ptr,