https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98535
--- Comment #6 from Richard Biener <rguenth at gcc dot gnu.org> --- So the issue is that hi_start isn't halved I think or hi_start isn't added when i&1 != 0. So probably all non-single round duplicate&interleaves are broken right now. @@ -5091,13 +5091,14 @@ duplicate_and_interleave (vec_info *vinfo, gimple_seq *seq, tree vector_type, a multiple of N * 2, the HI result is the same as the LO. */ unsigned int in_start = 0; unsigned int out_start = nvectors; - unsigned int hi_start = nvectors / 2; + unsigned int hi_start = nvectors; /* A bound on the number of outputs needed to produce NRESULTS results in the final iteration. */ unsigned int noutputs_bound = nvectors * nresults; for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2) { noutputs_bound /= 2; + hi_start /= 2; unsigned int limit = MIN (noutputs_bound, nvectors); for (unsigned int i = 0; i < limit; ++i) { fixes the crash but the code generated seemingly lacks a permute anyway since seq is initially _99 = {j$b_2(D)}; _100 = VIEW_CONVERT_EXPR<unsigned short>(_99); _101 = [vec_duplicate_expr] _100; _102 = {j$c_10(D)}; _103 = VIEW_CONVERT_EXPR<unsigned short>(_102); _104 = [vec_duplicate_expr] _103; _105 = {j$d_11(D)}; _106 = VIEW_CONVERT_EXPR<unsigned short>(_105); _107 = [vec_duplicate_expr] _106; _108 = {j$e_12(D)}; _109 = VIEW_CONVERT_EXPR<unsigned short>(_108); _110 = [vec_duplicate_expr] _109; but we re-use the interleaving of _101 and _107 for the other part due to if ((i & 1) != 0 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type), 2 * in_repeat)) { pieces[out_start + i] = pieces[out_start + i - 1]; continue; } that is, we end up with <bb 26> [local count: 73320728]: _99 = {j$b_2(D)}; _100 = VIEW_CONVERT_EXPR<unsigned short>(_99); _101 = [vec_duplicate_expr] _100; _102 = {j$c_10(D)}; _103 = VIEW_CONVERT_EXPR<unsigned short>(_102); _104 = [vec_duplicate_expr] _103; _105 = {j$d_11(D)}; _106 = VIEW_CONVERT_EXPR<unsigned short>(_105); _107 = [vec_duplicate_expr] _106; _108 = {j$e_12(D)}; _109 = VIEW_CONVERT_EXPR<unsigned short>(_108); _110 = [vec_duplicate_expr] _109; _111 = VEC_PERM_EXPR <_101, _107, { 0, POLY_INT_CST [4, 4], 1, POLY_INT_CST [5, 4], 2, POLY_INT_CST [6, 4], ... }>; _112 = VEC_PERM_EXPR <_111, _111, { 0, POLY_INT_CST [4, 4], 1, POLY_INT_CST [5, 4], 2, POLY_INT_CST [6, 4], ... }>; _113 = VIEW_CONVERT_EXPR<vector([4,4]) short int>(_112); seemingly ignoring _104 and _108 entirely ...