================ @@ -446,8 +474,10 @@ define amdgpu_kernel void @add_i32_uniform(ptr addrspace(1) %out, ptr addrspace( ; GFX11W64-NEXT: ; implicit-def: $vgpr1 ; GFX11W64-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11W64-NEXT: v_mbcnt_hi_u32_b32 v0, s5, v0 -; GFX11W64-NEXT: v_cmpx_eq_u32_e32 0, v0 -; GFX11W64-NEXT: s_cbranch_execz .LBB1_2 +; GFX11W64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX11W64-NEXT: s_cmp_lg_u64 vcc, 0 +; GFX11W64-NEXT: s_cmov_b64 exec, vcc +; GFX11W64-NEXT: s_cbranch_scc0 .LBB1_2 ---------------- ruiling wrote:
We can merge the s_cmp_lg for updating SCC and the exec save into one s_and_saveexec ``` s_and_saveexec_b64 s[0:1], vcc s_cselect_b64 exec, vcc, s[0:1] s_cbranch_scc0 .LBB1_2 ``` so we don't need the separate `s_mov_b64 s[0:1], exec` at line 472 to save the exec. https://github.com/llvm/llvm-project/pull/108596 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits