https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94398
--- Comment #1 from z.zhanghaijian at huawei dot com <z.zhanghaijian at huawei dot com> --- (gdb) bt #0 aarch64_builtin_support_vector_misalignment (mode=E_VNx4SFmode, type=0xffffb79ec2a0, misalignment=-1, is_packed=false) at ../../gcc-git/gcc/config/aarch64/aarch64.c:17510 #1 0x000000000220631c in vect_supportable_dr_alignment (dr_info=0x2ef3798, check_aligned_accesses=false) at ../../gcc-git/gcc/tree-vect-data-refs.c:6618 #2 0x000000000162fc9c in vectorizable_load (stmt_info=0x2ef3770, gsi=0xffffffffe0b0, vec_stmt=0xffffffffdde0, slp_node=0x0, slp_node_instance=0x0, cost_vec=0x0) at ../../gcc-git/gcc/tree-vect-stmts.c:9172 #3 0x0000000001635174 in vect_transform_stmt (stmt_info=0x2ef3770, gsi=0xffffffffe0b0, slp_node=0x0, slp_node_instance=0x0) at ../../gcc-git/gcc/tree-vect-stmts.c:11034 #4 0x000000000165a340 in vect_transform_loop_stmt (loop_vinfo=0x2ed0ad0, stmt_info=0x2ef3770, gsi=0xffffffffe0b0, seen_store=0xffffffffe0a8) at ../../gcc-git/gcc/tree-vect-loop.c:8307 #5 0x000000000165b5c4 in vect_transform_loop (loop_vinfo=0x2ed0ad0, loop_vectorized_call=0x0) at ../../gcc-git/gcc/tree-vect-loop.c:8708 #6 0x0000000001689f08 in try_vectorize_loop_1 (simduid_to_vf_htab=@0xffffffffed68: 0x0, num_vectorized_loops=0xffffffffed7c, loop=0xffffb7820000, loop_vectorized_call=0x0, loop_dist_alias_call=0x0) at ../../gcc-git/gcc/tree-vectorizer.c:990 #7 0x000000000168a184 in try_vectorize_loop (simduid_to_vf_htab=@0xffffffffed68: 0x0, num_vectorized_loops=0xffffffffed7c, loop=0xffffb7820000) at ../../gcc-git/gcc/tree-vectorizer.c:1047 #8 0x000000000168a330 in vectorize_loops () at ../../gcc-git/gcc/tree-vectorizer.c:1127 #9 0x00000000014e55e4 in (anonymous namespace)::pass_vectorize::execute (this=0x2d6f860, fun=0xffffb7817000) at ../../gcc-git/gcc/tree-ssa-loop.c:414 #10 0x000000000113dec0 in execute_one_pass (pass=0x2d6f860) at ../../gcc-git/gcc/passes.c:2502 #11 0x000000000113e284 in execute_pass_list_1 (pass=0x2d6f860) at ../../gcc-git/gcc/passes.c:2590 #12 0x000000000113e2c0 in execute_pass_list_1 (pass=0x2d6f070) at ../../gcc-git/gcc/passes.c:2591 #13 0x000000000113e2c0 in execute_pass_list_1 (pass=0x2d6dd00) at ../../gcc-git/gcc/passes.c:2591 #14 0x000000000113e32c in execute_pass_list (fn=0xffffb7817000, pass=0x2d6db20) at ../../gcc-git/gcc/passes.c:2601 #15 0x0000000000be2f50 in cgraph_node::expand (this=0xffffb79dc870) at ../../gcc-git/gcc/cgraphunit.c:2299 #16 0x0000000000be3814 in expand_all_functions () at ../../gcc-git/gcc/cgraphunit.c:2470 #17 0x0000000000be45c4 in symbol_table::compile (this=0xffffb79ce000) at ../../gcc-git/gcc/cgraphunit.c:2820 #18 0x0000000000be4b14 in symbol_table::finalize_compilation_unit (this=0xffffb79ce000) at ../../gcc-git/gcc/cgraphunit.c:3000 #19 0x000000000129f7dc in compile_file () at ../../gcc-git/gcc/toplev.c:483 #20 0x00000000012a3a14 in do_compile () at ../../gcc-git/gcc/toplev.c:2273 #21 0x00000000012a3de0 in toplev::main (this=0xfffffffff148, argc=21, argv=0xfffffffff298) at ../../gcc-git/gcc/toplev.c:2412 #22 0x000000000224a038 in main (argc=21, argv=0xfffffffff298) at ../../gcc-git/gcc/main.c:39 (gdb) p misalignment $3 = -1 (gdb) p mode $4 = E_VNx4SFmode vect_supportable_dr_alignment is expected to return either dr_aligned or dr_unaligned_supported for masked operations. But it seems that this function only catches internal_fn IFN_MASK_LOAD & IFN_MASK_STORE. We are emitting a mask gather load here for this test case. As backends have their own vector misalignment support policy, I am supposing this should be better handled in the auto-vect shared code. Proposed fix by felix.y...@huawei.com: diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c index 0192aa6..67d3345 100644 --- a/gcc/tree-vect-data-refs.c +++ b/gcc/tree-vect-data-refs.c @@ -6509,11 +6509,26 @@ vect_supportable_dr_alignment (dr_vec_info *dr_info, /* For now assume all conditional loads/stores support unaligned access without any special code. */ - if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt)) - if (gimple_call_internal_p (stmt) - && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD - || gimple_call_internal_fn (stmt) == IFN_MASK_STORE)) - return dr_unaligned_supported; + gcall *call = dyn_cast <gcall *> (stmt_info->stmt); + if (call && gimple_call_internal_p (call)) + { + internal_fn ifn = gimple_call_internal_fn (call); + switch (ifn) + { + case IFN_MASK_LOAD: + case IFN_MASK_LOAD_LANES: + case IFN_MASK_GATHER_LOAD: + case IFN_MASK_STORE: + case IFN_MASK_STORE_LANES: + case IFN_MASK_SCATTER_STORE: + return dr_unaligned_supported; + default: + break; + } + } + + if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) + return dr_unaligned_supported; if (loop_vinfo) {