https://gcc.gnu.org/bugzilla/show_bug.cgi?id=117709

--- Comment #2 from Robin Dapp <rdapp at gcc dot gnu.org> ---
This is the code:

  vect__23.27_8 = .MASK_GATHER_LOAD (&MEM <int[11][101]> [(void *)&k + -88B], {
0, -15, -30, -45, -60, -75, -90, -105, -120, -135, -150, -165, -180, -195,
-210, -225, -240, -255, -270, -285, -300, -315, -330, -345, -360, -375, -390,
-405, -420, -435, -450, -465, -480, -495, -510, -525, -540, -555, -570, -585,
-600, -615, -630, -645, -660, -675, -690, -705, -720, -735, -750, -765, -780,
-795, -810, -825, -840, -855, -870, -885, -900, -915, -930, -945 }, 4, { 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, { -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
_50(D));
  mask__24.28_4 = vect__23.27_8 != { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
  vect_patt_75.29_13 = .VCOND_MASK (mask__24.28_4, { 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 });
  vect__26.30_5 = .MASK_LOAD (&D.2001, 32B, { -1, -1, -1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0 }, _9(D));
  vect__27.31_12 = vect__26.30_5 | vect_patt_75.29_13;
  .MASK_STORE (&D.2001, 32B, { -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
vect__27.31_12);

vect__23 and vect__26 have undefined values for all but the first three
elements but right now I don't see how what we're doing wrong?  The IOR
obviously has inherited stale values but they don't seem to be used?

The corresponding assembly sequence is:
(before)

  v_mov_b32       v3, 0
  flat_load_dword v3, v[4:5] offset:0
  v_cmp_ne_u32    s[12:13], v3, 1 
  v_cndmask_b32   v3, 0, 1, s[12:13]

  v_mov_b32       v0, 0
  flat_load_dword v0, v[4:5] offset:0

  v_or_b32        v0, v0, v3
  flat_store_dword        v[4:5], v0 offset:0

With the patch just the zero moves are missing.  Where is the mask in the
assembly?

Reply via email to