This patch adds simple misalignment checks for gather/scatter operations. Previously, we assumed that those perform element accesses internally so alignment does not matter. The riscv vector spec however explicitly states that vector operations are allowed to fault on element-misaligned accesses. Reasonable uarchs won't, but...
For gather/scatter we have two paths in the vectorizer: (1) Regular analysis based on datarefs. Here we can also create strided loads. (2) Non-affine access where each gather index is relative to the initial address. The assumption this patch works off is that once the alignment for the first scalar is correct, all others will fall in line, as the index is always a multiple of the first element's size. For (1) we have a dataref and can check it for alignment as in other cases. For (2) this patch checks the object alignment of BASE and compares it against the natural alignment of the current vectype's unit. The patch also adds a pointer argument to the gather/scatter IFNs that contains the necessary alignment. Most of the patch is thus mechanical in that it merely adjusts indices. I tested the riscv version with a custom qemu version that faults on element-misaligned vector accesses. With this patch applied, there is just a single fault left, which is due to PR120782 and which will be addressed separately. Bootstrapped and regtested on x86 and aarch64. Regtested on rv64gcv_zvl512b with and without unaligned vector support. gcc/ChangeLog: * internal-fn.cc (internal_fn_len_index): Adjust indices for new alias_ptr param. (internal_fn_else_index): Ditto. (internal_fn_mask_index): Ditto. (internal_fn_stored_value_index): Ditto. (internal_fn_alias_ptr_index): Ditto. (internal_fn_offset_index): Ditto. (internal_fn_scale_index): Ditto. (internal_gather_scatter_fn_supported_p): Ditto. * optabs-query.cc (supports_vec_gather_load_p): Ditto. * tree-vect-data-refs.cc (vect_check_gather_scatter): Add alias pointer. * tree-vect-patterns.cc (vect_recog_gather_scatter_pattern): Add alias pointer. * tree-vect-slp.cc (vect_get_operand_map): Adjust for alias pointer. * tree-vect-stmts.cc (vect_truncate_gather_scatter_offset): Add alias pointer and misalignment handling. (get_load_store_type): Move from here... (get_group_load_store_type): ...To here. (vectorizable_store): Add alias pointer. (vectorizable_load): Ditto. * tree-vectorizer.h (struct gather_scatter_info): Ditto. --- gcc/internal-fn.cc | 43 ++++++-- gcc/internal-fn.h | 1 + gcc/optabs-query.cc | 6 +- gcc/tree-vect-data-refs.cc | 7 ++ gcc/tree-vect-patterns.cc | 17 +-- gcc/tree-vect-slp.cc | 16 +-- gcc/tree-vect-stmts.cc | 214 +++++++++++++++++++++++-------------- gcc/tree-vectorizer.h | 4 + 8 files changed, 198 insertions(+), 110 deletions(-) diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index 4a9dc26e836..6c0155e4c63 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -4940,11 +4940,13 @@ internal_fn_len_index (internal_fn fn) return 2; case IFN_MASK_LEN_SCATTER_STORE: + return 6; + case IFN_MASK_LEN_STRIDED_LOAD: return 5; case IFN_MASK_LEN_GATHER_LOAD: - return 6; + return 7; case IFN_COND_LEN_FMA: case IFN_COND_LEN_FMS: @@ -5048,7 +5050,7 @@ internal_fn_else_index (internal_fn fn) case IFN_MASK_GATHER_LOAD: case IFN_MASK_LEN_GATHER_LOAD: - return 5; + return 6; default: return -1; @@ -5083,7 +5085,7 @@ internal_fn_mask_index (internal_fn fn) case IFN_MASK_SCATTER_STORE: case IFN_MASK_LEN_GATHER_LOAD: case IFN_MASK_LEN_SCATTER_STORE: - return 4; + return 5; case IFN_VCOND_MASK: case IFN_VCOND_MASK_LEN: @@ -5108,10 +5110,11 @@ internal_fn_stored_value_index (internal_fn fn) case IFN_MASK_STORE: case IFN_MASK_STORE_LANES: + return 3; case IFN_SCATTER_STORE: case IFN_MASK_SCATTER_STORE: case IFN_MASK_LEN_SCATTER_STORE: - return 3; + return 4; case IFN_LEN_STORE: return 4; @@ -5125,6 +5128,28 @@ internal_fn_stored_value_index (internal_fn fn) } } +/* If FN has an alias pointer return its index, otherwise return -1. */ + +int +internal_fn_alias_ptr_index (internal_fn fn) +{ + switch (fn) + { + case IFN_MASK_LOAD: + case IFN_MASK_LEN_LOAD: + case IFN_GATHER_LOAD: + case IFN_MASK_GATHER_LOAD: + case IFN_MASK_LEN_GATHER_LOAD: + case IFN_SCATTER_STORE: + case IFN_MASK_SCATTER_STORE: + case IFN_MASK_LEN_SCATTER_STORE: + return 1; + + default: + return -1; + } +} + /* If FN is a gather/scatter return the index of its offset argument, otherwise return -1. */ @@ -5142,7 +5167,7 @@ internal_fn_offset_index (internal_fn fn) case IFN_SCATTER_STORE: case IFN_MASK_SCATTER_STORE: case IFN_MASK_LEN_SCATTER_STORE: - return 1; + return 2; default: return -1; @@ -5166,7 +5191,7 @@ internal_fn_scale_index (internal_fn fn) case IFN_SCATTER_STORE: case IFN_MASK_SCATTER_STORE: case IFN_MASK_LEN_SCATTER_STORE: - return 2; + return 3; default: return -1; @@ -5250,13 +5275,9 @@ internal_gather_scatter_fn_supported_p (internal_fn ifn, tree vector_type, && insn_operand_matches (icode, 2 + output_ops, GEN_INT (unsigned_p)) && insn_operand_matches (icode, 3 + output_ops, GEN_INT (scale)); - /* For gather the optab's operand indices do not match the IFN's because - the latter does not have the extension operand (operand 3). It is - implicitly added during expansion so we use the IFN's else index + 1. - */ if (ok && elsvals) get_supported_else_vals - (icode, internal_fn_else_index (IFN_MASK_GATHER_LOAD) + 1, *elsvals); + (icode, internal_fn_else_index (IFN_MASK_GATHER_LOAD), *elsvals); return ok; } diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h index c5b533c0abd..d190d718240 100644 --- a/gcc/internal-fn.h +++ b/gcc/internal-fn.h @@ -241,6 +241,7 @@ extern int internal_fn_else_index (internal_fn); extern int internal_fn_stored_value_index (internal_fn); extern int internal_fn_offset_index (internal_fn fn); extern int internal_fn_scale_index (internal_fn fn); +extern int internal_fn_alias_ptr_index (internal_fn fn); extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree, tree, tree, int, vec<int> * = nullptr); diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc index f5ca98da818..5335d0d8401 100644 --- a/gcc/optabs-query.cc +++ b/gcc/optabs-query.cc @@ -719,13 +719,9 @@ supports_vec_gather_load_p (machine_mode mode, vec<int> *elsvals) = (icode != CODE_FOR_nothing) ? 1 : -1; } - /* For gather the optab's operand indices do not match the IFN's because - the latter does not have the extension operand (operand 3). It is - implicitly added during expansion so we use the IFN's else index + 1. - */ if (elsvals && icode != CODE_FOR_nothing) get_supported_else_vals - (icode, internal_fn_else_index (IFN_MASK_GATHER_LOAD) + 1, *elsvals); + (icode, internal_fn_else_index (IFN_MASK_GATHER_LOAD), *elsvals); return this_fn_optabs->supports_vec_gather_load[mode] > 0; } diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index 019f0b6ca36..3414d518e17 100644 --- a/gcc/tree-vect-data-refs.cc +++ b/gcc/tree-vect-data-refs.cc @@ -4539,6 +4539,8 @@ vect_describe_gather_scatter_call (stmt_vec_info stmt_info, info->ifn = gimple_call_internal_fn (call); info->decl = NULL_TREE; info->base = gimple_call_arg (call, 0); + info->alias_ptr = gimple_call_arg + (call, internal_fn_alias_ptr_index (info->ifn)); info->offset = gimple_call_arg (call, internal_fn_offset_index (info->ifn)); info->offset_dt = vect_unknown_def_type; @@ -4869,6 +4871,11 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo, info->ifn = ifn; info->decl = decl; info->base = base; + + info->alias_ptr = build_int_cst + (reference_alias_ptr_type (DR_REF (dr)), + get_object_alignment (DR_REF (dr))); + info->offset = off; info->offset_dt = vect_unknown_def_type; info->offset_vectype = offset_vectype; diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index 0f6d6b77ea1..f0ddbf9660c 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -6042,12 +6042,14 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo, tree vec_els = vect_get_mask_load_else (elsval, TREE_TYPE (gs_vectype)); - pattern_stmt = gimple_build_call_internal (gs_info.ifn, 6, base, + pattern_stmt = gimple_build_call_internal (gs_info.ifn, 7, base, + gs_info.alias_ptr, offset, scale, zero, mask, vec_els); } else - pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base, + pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base, + gs_info.alias_ptr, offset, scale, zero); tree lhs = gimple_get_lhs (stmt_info->stmt); tree load_lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); @@ -6057,12 +6059,13 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo, { tree rhs = vect_get_store_rhs (stmt_info); if (mask != NULL) - pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, - base, offset, scale, rhs, - mask); + pattern_stmt = gimple_build_call_internal (gs_info.ifn, 6, + base, gs_info.alias_ptr, + offset, scale, rhs, mask); else - pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, - base, offset, scale, rhs); + pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, + base, gs_info.alias_ptr, + offset, scale, rhs); } gimple_call_set_nothrow (pattern_stmt, true); diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 0c95ed946bb..30dd886d305 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -511,11 +511,11 @@ vect_def_types_match (enum vect_def_type dta, enum vect_def_type dtb) static const int no_arg_map[] = { 0 }; static const int arg0_map[] = { 1, 0 }; -static const int arg1_map[] = { 1, 1 }; +static const int arg2_map[] = { 1, 2 }; static const int arg2_arg3_map[] = { 2, 2, 3 }; -static const int arg1_arg3_map[] = { 2, 1, 3 }; -static const int arg1_arg4_arg5_map[] = { 3, 1, 4, 5 }; -static const int arg1_arg3_arg4_map[] = { 3, 1, 3, 4 }; +static const int arg2_arg4_map[] = { 2, 2, 4 }; +static const int arg2_arg5_arg6_map[] = { 3, 2, 5, 6 }; +static const int arg2_arg4_arg5_map[] = { 3, 2, 4, 5 }; static const int arg3_arg2_map[] = { 2, 3, 2 }; static const int op1_op0_map[] = { 2, 1, 0 }; static const int off_map[] = { 1, GATHER_SCATTER_OFFSET }; @@ -570,18 +570,18 @@ vect_get_operand_map (const gimple *stmt, bool gather_scatter_p = false, return gather_scatter_p ? off_arg2_arg3_map : arg2_arg3_map; case IFN_GATHER_LOAD: - return arg1_map; + return arg2_map; case IFN_MASK_GATHER_LOAD: case IFN_MASK_LEN_GATHER_LOAD: - return arg1_arg4_arg5_map; + return arg2_arg5_arg6_map; case IFN_SCATTER_STORE: - return arg1_arg3_map; + return arg2_arg4_map; case IFN_MASK_SCATTER_STORE: case IFN_MASK_LEN_SCATTER_STORE: - return arg1_arg3_arg4_map; + return arg2_arg4_arg5_map; case IFN_MASK_STORE: return gather_scatter_p ? off_arg3_arg2_map : arg3_arg2_map; diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 57942f43c3b..9b524becb88 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -1803,6 +1803,9 @@ vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info, /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET, but we don't need to store that here. */ gs_info->base = NULL_TREE; + gs_info->alias_ptr = build_int_cst + (reference_alias_ptr_type (DR_REF (dr)), + get_object_alignment (DR_REF (dr))); gs_info->element_type = TREE_TYPE (vectype); gs_info->offset = fold_convert (offset_type, step); gs_info->offset_dt = vect_constant_def; @@ -2106,7 +2109,7 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, separated by the stride, until we have a complete vector. Fall back to scalar accesses if that isn't possible. */ *memory_access_type = VMAT_STRIDED_SLP; - else + else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info)) { int cmp = compare_step_with_zero (vinfo, stmt_info); if (cmp < 0) @@ -2349,19 +2352,71 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, allows us to use contiguous accesses. */ if ((*memory_access_type == VMAT_ELEMENTWISE || *memory_access_type == VMAT_STRIDED_SLP) + && !STMT_VINFO_GATHER_SCATTER_P (stmt_info) && single_element_p && SLP_TREE_LANES (slp_node) == 1 && loop_vinfo && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo, masked_p, gs_info, elsvals)) *memory_access_type = VMAT_GATHER_SCATTER; + else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) + { + *memory_access_type = VMAT_GATHER_SCATTER; + if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, + elsvals)) + gcc_unreachable (); + /* When using internal functions, we rely on pattern recognition + to convert the type of the offset to the type that the target + requires, with the result being a call to an internal function. + If that failed for some reason (e.g. because another pattern + took priority), just handle cases in which the offset already + has the right type. */ + else if (GATHER_SCATTER_IFN_P (*gs_info) + && !is_gimple_call (stmt_info->stmt) + && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset), + TREE_TYPE (gs_info->offset_vectype))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "%s offset requires a conversion\n", + vls_type == VLS_LOAD ? "gather" : "scatter"); + return false; + } + else if (!vect_is_simple_use (gs_info->offset, vinfo, + &gs_info->offset_dt, + &gs_info->offset_vectype)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "%s index use not simple.\n", + vls_type == VLS_LOAD ? "gather" : "scatter"); + return false; + } + else if (GATHER_SCATTER_EMULATED_P (*gs_info)) + { + if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant () + || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant () + || VECTOR_BOOLEAN_TYPE_P (gs_info->offset_vectype) + || !constant_multiple_p (TYPE_VECTOR_SUBPARTS + (gs_info->offset_vectype), + TYPE_VECTOR_SUBPARTS (vectype))) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "unsupported vector types for emulated " + "gather.\n"); + return false; + } + } + } if (*memory_access_type == VMAT_CONTIGUOUS_DOWN || *memory_access_type == VMAT_CONTIGUOUS_REVERSE) *poffset = neg_ldst_offset; - if (*memory_access_type == VMAT_GATHER_SCATTER - || *memory_access_type == VMAT_ELEMENTWISE + if (*memory_access_type == VMAT_ELEMENTWISE + || (*memory_access_type == VMAT_GATHER_SCATTER + && GATHER_SCATTER_LEGACY_P (*gs_info)) || *memory_access_type == VMAT_STRIDED_SLP || *memory_access_type == VMAT_INVARIANT) { @@ -2370,10 +2425,48 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, } else { - *misalignment = dr_misalignment (first_dr_info, vectype, *poffset); - *alignment_support_scheme - = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype, - *misalignment); + /* Non dataref-based gather/scatter. */ + if (*memory_access_type == VMAT_GATHER_SCATTER + && !first_dr_info) + { + /* Gather-scatter accesses normally perform only component accesses + so alignment is irrelevant for them. Targets like riscv do care + about scalar alignment in vector accesses, though, so check scalar + alignment here. We determined the alias pointer as well as the + base alignment during pattern recognition and can re-use it here. + + As we do not have a dataref we only know the alignment of the + base. For now don't try harder to determine misalignment and + just assume it is unknown. We consider the type packed if its + scalar alignment is lower than the natural alignment of a vector + element's type. */ + + tree inner_vectype = TREE_TYPE (vectype); + + unsigned HOST_WIDE_INT scalar_align + = tree_to_uhwi (gs_info->alias_ptr); + unsigned HOST_WIDE_INT inner_vectype_sz + = tree_to_uhwi (TYPE_SIZE (inner_vectype)); + + bool is_misaligned = scalar_align < inner_vectype_sz; + bool is_packed = scalar_align > 1 && is_misaligned; + + *misalignment = DR_MISALIGNMENT_UNKNOWN; + + if (targetm.vectorize.support_vector_misalignment + (TYPE_MODE (vectype), inner_vectype, *misalignment, is_packed, + /*is_gather_scatter=*/ true)) + *alignment_support_scheme = dr_unaligned_supported; + else + *alignment_support_scheme = dr_unaligned_unsupported; + } + else + { + *misalignment = dr_misalignment (first_dr_info, vectype, *poffset); + *alignment_support_scheme + = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype, + *misalignment); + } } if (vls_type != VLS_LOAD && first_stmt_info == stmt_info) @@ -2443,58 +2536,12 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); *misalignment = DR_MISALIGNMENT_UNKNOWN; *poffset = 0; - if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)) - { - *memory_access_type = VMAT_GATHER_SCATTER; - if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info, - elsvals)) - gcc_unreachable (); - /* When using internal functions, we rely on pattern recognition - to convert the type of the offset to the type that the target - requires, with the result being a call to an internal function. - If that failed for some reason (e.g. because another pattern - took priority), just handle cases in which the offset already - has the right type. */ - else if (GATHER_SCATTER_IFN_P (*gs_info) - && !is_gimple_call (stmt_info->stmt) - && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset), - TREE_TYPE (gs_info->offset_vectype))) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "%s offset requires a conversion\n", - vls_type == VLS_LOAD ? "gather" : "scatter"); - return false; - } - slp_tree offset_node = SLP_TREE_CHILDREN (slp_node)[0]; - gs_info->offset_dt = SLP_TREE_DEF_TYPE (offset_node); - gs_info->offset_vectype = SLP_TREE_VECTYPE (offset_node); - if (gs_info->ifn == IFN_LAST && !gs_info->decl) - { - if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant () - || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant () - || VECTOR_BOOLEAN_TYPE_P (gs_info->offset_vectype) - || !constant_multiple_p (TYPE_VECTOR_SUBPARTS - (gs_info->offset_vectype), - TYPE_VECTOR_SUBPARTS (vectype))) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "unsupported vector types for emulated " - "gather.\n"); - return false; - } - } - /* Gather-scatter accesses perform only component accesses, alignment - is irrelevant for them. */ - *alignment_support_scheme = dr_unaligned_supported; - } - else if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node, - masked_p, - vls_type, memory_access_type, poffset, - alignment_support_scheme, - misalignment, gs_info, lanes_ifn, - elsvals)) + if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node, + masked_p, + vls_type, memory_access_type, poffset, + alignment_support_scheme, + misalignment, gs_info, lanes_ifn, + elsvals)) return false; if ((*memory_access_type == VMAT_ELEMENTWISE @@ -2528,17 +2575,18 @@ get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info, "alignment. With non-contiguous memory vectorization" " could read out of bounds at %G ", STMT_VINFO_STMT (stmt_info)); - if (inbounds) - LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true; - else - return false; + if (inbounds) + LOOP_VINFO_MUST_USE_PARTIAL_VECTORS_P (loop_vinfo) = true; + else + return false; } /* If this DR needs alignment for correctness, we must ensure the target alignment is a constant power-of-two multiple of the amount read per vector iteration or force masking. */ if (dr_safe_speculative_read_required (stmt_info) - && *alignment_support_scheme == dr_aligned) + && (*alignment_support_scheme == dr_aligned + && *memory_access_type != VMAT_GATHER_SCATTER)) { /* We can only peel for loops, of course. */ gcc_checking_assert (loop_vinfo); @@ -8442,7 +8490,6 @@ vectorizable_store (vec_info *vinfo, if (dump_enabled_p () && memory_access_type != VMAT_ELEMENTWISE - && memory_access_type != VMAT_GATHER_SCATTER && memory_access_type != VMAT_STRIDED_SLP && memory_access_type != VMAT_INVARIANT && alignment_support_scheme != dr_aligned) @@ -9143,24 +9190,31 @@ vectorizable_store (vec_info *vinfo, { if (VECTOR_TYPE_P (TREE_TYPE (vec_offset))) call = gimple_build_call_internal ( - IFN_MASK_LEN_SCATTER_STORE, 7, dataref_ptr, + IFN_MASK_LEN_SCATTER_STORE, 8, dataref_ptr, + gs_info.alias_ptr, vec_offset, scale, vec_oprnd, final_mask, final_len, bias); else /* Non-vector offset indicates that prefer to take MASK_LEN_STRIDED_STORE instead of the - IFN_MASK_SCATTER_STORE with direct stride arg. */ + IFN_MASK_SCATTER_STORE with direct stride arg. + Similar to the gather case we have checked the + alignment for a scatter already and assume + that the strided store has the same requirements. */ call = gimple_build_call_internal ( IFN_MASK_LEN_STRIDED_STORE, 6, dataref_ptr, vec_offset, vec_oprnd, final_mask, final_len, bias); } else if (final_mask) call = gimple_build_call_internal - (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, + (IFN_MASK_SCATTER_STORE, 6, dataref_ptr, + gs_info.alias_ptr, vec_offset, scale, vec_oprnd, final_mask); else - call = gimple_build_call_internal (IFN_SCATTER_STORE, 4, - dataref_ptr, vec_offset, + call = gimple_build_call_internal (IFN_SCATTER_STORE, 5, + dataref_ptr, + gs_info.alias_ptr, + vec_offset, scale, vec_oprnd); gimple_call_set_nothrow (call, true); vect_finish_stmt_generation (vinfo, stmt_info, call, gsi); @@ -10627,7 +10681,6 @@ vectorizable_load (vec_info *vinfo, vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); } - gcc_assert (alignment_support_scheme); vec_loop_masks *loop_masks = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) ? &LOOP_VINFO_MASKS (loop_vinfo) @@ -10647,10 +10700,12 @@ vectorizable_load (vec_info *vinfo, /* Targets with store-lane instructions must not require explicit realignment. vect_supportable_dr_alignment always returns either - dr_aligned or dr_unaligned_supported for masked operations. */ + dr_aligned or dr_unaligned_supported for (non-length) masked + operations. */ gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES && !mask && !loop_masks) + || memory_access_type == VMAT_GATHER_SCATTER || alignment_support_scheme == dr_aligned || alignment_support_scheme == dr_unaligned_supported); @@ -10995,8 +11050,6 @@ vectorizable_load (vec_info *vinfo, if (memory_access_type == VMAT_GATHER_SCATTER) { - gcc_assert (alignment_support_scheme == dr_aligned - || alignment_support_scheme == dr_unaligned_supported); gcc_assert (!grouped_load && !slp_perm); unsigned int inside_cost = 0, prologue_cost = 0; @@ -11085,7 +11138,8 @@ vectorizable_load (vec_info *vinfo, { if (VECTOR_TYPE_P (TREE_TYPE (vec_offset))) call = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD, - 8, dataref_ptr, + 9, dataref_ptr, + gs_info.alias_ptr, vec_offset, scale, zero, final_mask, vec_els, final_len, bias); @@ -11100,13 +11154,15 @@ vectorizable_load (vec_info *vinfo, } else if (final_mask) call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, - 6, dataref_ptr, + 7, dataref_ptr, + gs_info.alias_ptr, vec_offset, scale, zero, final_mask, vec_els); else - call = gimple_build_call_internal (IFN_GATHER_LOAD, 4, - dataref_ptr, vec_offset, - scale, zero); + call = gimple_build_call_internal (IFN_GATHER_LOAD, 5, + dataref_ptr, + gs_info.alias_ptr, + vec_offset, scale, zero); gimple_call_set_nothrow (call, true); new_stmt = call; data_ref = NULL_TREE; diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 7b927491b1c..4511527647a 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1557,6 +1557,10 @@ struct gather_scatter_info { /* The loop-invariant base value. */ tree base; + /* The TBBA alias pointer the value of which determines the alignment + of the scalar accesses. */ + tree alias_ptr; + /* The original scalar offset, which is a non-loop-invariant SSA_NAME. */ tree offset; -- 2.50.0