================ @@ -1391,24 +1419,28 @@ define amdgpu_ps float @test_control_flow_0(<8 x i32> inreg %rsrc, <4 x i32> inr ; GFX10-W32: ; %bb.0: ; %main_body ; GFX10-W32-NEXT: s_mov_b32 s12, exec_lo ; GFX10-W32-NEXT: s_wqm_b32 exec_lo, exec_lo -; GFX10-W32-NEXT: s_mov_b32 s13, exec_lo -; GFX10-W32-NEXT: v_cmpx_ne_u32_e32 0, v1 -; GFX10-W32-NEXT: s_xor_b32 s13, exec_lo, s13 -; GFX10-W32-NEXT: s_cbranch_execz .LBB27_2 +; GFX10-W32-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 +; GFX10-W32-NEXT: s_xor_b32 s14, vcc_lo, exec_lo +; GFX10-W32-NEXT: s_cmp_lg_u32 vcc_lo, 0 +; GFX10-W32-NEXT: s_cmov_b32 exec_lo, vcc_lo +; GFX10-W32-NEXT: s_cbranch_scc0 .LBB27_2 ; GFX10-W32-NEXT: ; %bb.1: ; %ELSE -; GFX10-W32-NEXT: s_and_saveexec_b32 s14, s12 +; GFX10-W32-NEXT: s_and_saveexec_b32 s13, s12 ; GFX10-W32-NEXT: buffer_store_dword v2, v0, s[0:3], 0 idxen ; GFX10-W32-NEXT: ; implicit-def: $vgpr0 -; GFX10-W32-NEXT: s_mov_b32 exec_lo, s14 +; GFX10-W32-NEXT: s_mov_b32 exec_lo, s13 +; GFX10-W32-NEXT: s_or_b32 exec_lo, exec_lo, s14 ; GFX10-W32-NEXT: .LBB27_2: ; %Flow -; GFX10-W32-NEXT: s_andn2_saveexec_b32 s13, s13 -; GFX10-W32-NEXT: s_cbranch_execz .LBB27_4 +; GFX10-W32-NEXT: s_xor_b32 s13, s14, exec_lo +; GFX10-W32-NEXT: s_cmp_lg_u32 s14, 0 +; GFX10-W32-NEXT: s_cmov_b32 exec_lo, s14 ---------------- ruiling wrote:
We are safe to use (no kill in the else region): s_and_saveexec s13, s14 s_cselect s14, s13ruil s_cbranch_scc0 https://github.com/llvm/llvm-project/pull/108596 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits