https://gcc.gnu.org/bugzilla/show_bug.cgi?id=117709
--- Comment #2 from Robin Dapp <rdapp at gcc dot gnu.org> --- This is the code: vect__23.27_8 = .MASK_GATHER_LOAD (&MEM <int[11][101]> [(void *)&k + -88B], { 0, -15, -30, -45, -60, -75, -90, -105, -120, -135, -150, -165, -180, -195, -210, -225, -240, -255, -270, -285, -300, -315, -330, -345, -360, -375, -390, -405, -420, -435, -450, -465, -480, -495, -510, -525, -540, -555, -570, -585, -600, -615, -630, -645, -660, -675, -690, -705, -720, -735, -750, -765, -780, -795, -810, -825, -840, -855, -870, -885, -900, -915, -930, -945 }, 4, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, { -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, _50(D)); mask__24.28_4 = vect__23.27_8 != { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }; vect_patt_75.29_13 = .VCOND_MASK (mask__24.28_4, { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }); vect__26.30_5 = .MASK_LOAD (&D.2001, 32B, { -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, _9(D)); vect__27.31_12 = vect__26.30_5 | vect_patt_75.29_13; .MASK_STORE (&D.2001, 32B, { -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, vect__27.31_12); vect__23 and vect__26 have undefined values for all but the first three elements but right now I don't see how what we're doing wrong? The IOR obviously has inherited stale values but they don't seem to be used? The corresponding assembly sequence is: (before) v_mov_b32 v3, 0 flat_load_dword v3, v[4:5] offset:0 v_cmp_ne_u32 s[12:13], v3, 1 v_cndmask_b32 v3, 0, 1, s[12:13] v_mov_b32 v0, 0 flat_load_dword v0, v[4:5] offset:0 v_or_b32 v0, v0, v3 flat_store_dword v[4:5], v0 offset:0 With the patch just the zero moves are missing. Where is the mask in the assembly?