llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-llvm-analysis Author: Aaditya (easyonaadit) <details> <summary>Changes</summary> --- Patch is 169.48 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/150395.diff 9 Files Affected: - (modified) llvm/lib/Analysis/ConstantFolding.cpp (+14) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll (+88-532) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll (+8-80) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll (+8-80) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll (+8-80) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll (+8-80) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll (+88-658) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll (+88-589) - (modified) llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/wave.reduce.ll (+445-10) ``````````diff diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp old mode 100644 new mode 100755 index e71ba5ea5521e..11d22c75831d0 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1647,6 +1647,13 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { case Intrinsic::amdgcn_perm: case Intrinsic::amdgcn_wave_reduce_umin: case Intrinsic::amdgcn_wave_reduce_umax: + case Intrinsic::amdgcn_wave_reduce_max: + case Intrinsic::amdgcn_wave_reduce_min: + case Intrinsic::amdgcn_wave_reduce_add: + case Intrinsic::amdgcn_wave_reduce_sub: + case Intrinsic::amdgcn_wave_reduce_and: + case Intrinsic::amdgcn_wave_reduce_or: + case Intrinsic::amdgcn_wave_reduce_xor: case Intrinsic::amdgcn_s_wqm: case Intrinsic::amdgcn_s_quadmask: case Intrinsic::amdgcn_s_bitreplicate: @@ -3520,6 +3527,13 @@ static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty, return ConstantInt::get(Ty, C0->abs()); case Intrinsic::amdgcn_wave_reduce_umin: case Intrinsic::amdgcn_wave_reduce_umax: + case Intrinsic::amdgcn_wave_reduce_max: + case Intrinsic::amdgcn_wave_reduce_min: + case Intrinsic::amdgcn_wave_reduce_add: + case Intrinsic::amdgcn_wave_reduce_sub: + case Intrinsic::amdgcn_wave_reduce_and: + case Intrinsic::amdgcn_wave_reduce_or: + case Intrinsic::amdgcn_wave_reduce_xor: return dyn_cast<Constant>(Operands[0]); } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll index b6af8b4bb798d..d3d9275a566e2 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll @@ -191,10 +191,7 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) { ; GFX8DAGISEL-LABEL: const_value: ; GFX8DAGISEL: ; %bb.0: ; %entry ; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, 0x7b ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 ; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 @@ -204,10 +201,7 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) { ; GFX8GISEL-LABEL: const_value: ; GFX8GISEL: ; %bb.0: ; %entry ; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s2 +; GFX8GISEL-NEXT: v_mov_b32_e32 v2, 0x7b ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 ; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 @@ -217,11 +211,8 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) { ; GFX9DAGISEL-LABEL: const_value: ; GFX9DAGISEL: ; %bb.0: ; %entry ; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9DAGISEL-NEXT: s_mulk_i32 s2, 0x7b ; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] ; GFX9DAGISEL-NEXT: s_endpgm @@ -229,72 +220,35 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) { ; GFX9GISEL-LABEL: const_value: ; GFX9GISEL: ; %bb.0: ; %entry ; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9GISEL-NEXT: s_endpgm ; -; GFX1064DAGISEL-LABEL: const_value: -; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX1064DAGISEL-NEXT: s_endpgm -; -; GFX1064GISEL-LABEL: const_value: -; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1064GISEL-NEXT: s_endpgm -; -; GFX1032DAGISEL-LABEL: const_value: -; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX1032DAGISEL-NEXT: s_endpgm -; -; GFX1032GISEL-LABEL: const_value: -; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1032GISEL-NEXT: s_endpgm +; GFX10DAGISEL-LABEL: const_value: +; GFX10DAGISEL: ; %bb.0: ; %entry +; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b +; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] +; GFX10DAGISEL-NEXT: s_endpgm +; +; GFX10GISEL-LABEL: const_value: +; GFX10GISEL: ; %bb.0: ; %entry +; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b +; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10GISEL-NEXT: global_store_dword v1, v0, s[0:1] +; GFX10GISEL-NEXT: s_endpgm ; ; GFX1164DAGISEL-LABEL: const_value: ; GFX1164DAGISEL: ; %bb.0: ; %entry ; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec ; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 +; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b ; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX1164DAGISEL-NEXT: s_endpgm @@ -302,12 +256,8 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) { ; GFX1164GISEL-LABEL: const_value: ; GFX1164GISEL: ; %bb.0: ; %entry ; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec +; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1164GISEL-NEXT: s_endpgm @@ -315,12 +265,7 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) { ; GFX1132DAGISEL-LABEL: const_value: ; GFX1132DAGISEL: ; %bb.0: ; %entry ; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132DAGISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 +; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b ; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] ; GFX1132DAGISEL-NEXT: s_endpgm @@ -328,12 +273,7 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) { ; GFX1132GISEL-LABEL: const_value: ; GFX1132GISEL: ; %bb.0: ; %entry ; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: s_mulk_i32 s2, 0x7b -; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0 ; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX1132GISEL-NEXT: s_endpgm @@ -346,152 +286,35 @@ entry: define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) { ; GFX8DAGISEL-LABEL: poison_value: ; GFX8DAGISEL: ; %bb.0: ; %entry -; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2 -; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 ; GFX8DAGISEL-NEXT: s_endpgm ; ; GFX8GISEL-LABEL: poison_value: ; GFX8GISEL: ; %bb.0: ; %entry -; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s2 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 ; GFX8GISEL-NEXT: s_endpgm ; ; GFX9DAGISEL-LABEL: poison_value: ; GFX9DAGISEL: ; %bb.0: ; %entry -; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] ; GFX9DAGISEL-NEXT: s_endpgm ; ; GFX9GISEL-LABEL: poison_value: ; GFX9GISEL: ; %bb.0: ; %entry -; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9GISEL-NEXT: s_endpgm ; -; GFX1064DAGISEL-LABEL: poison_value: -; GFX1064DAGISEL: ; %bb.0: ; %entry -; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX1064DAGISEL-NEXT: s_endpgm -; -; GFX1064GISEL-LABEL: poison_value: -; GFX1064GISEL: ; %bb.0: ; %entry -; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1064GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1064GISEL-NEXT: s_endpgm -; -; GFX1032DAGISEL-LABEL: poison_value: -; GFX1032DAGISEL: ; %bb.0: ; %entry -; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] -; GFX1032DAGISEL-NEXT: s_endpgm -; -; GFX1032GISEL-LABEL: poison_value: -; GFX1032GISEL: ; %bb.0: ; %entry -; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1032GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1] -; GFX1032GISEL-NEXT: s_endpgm -; -; GFX1164DAGISEL-LABEL: poison_value: -; GFX1164DAGISEL: ; %bb.0: ; %entry -; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 -; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 -; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1164DAGISEL-NEXT: s_endpgm +; GFX10DAGISEL-LABEL: poison_value: +; GFX10DAGISEL: ; %bb.0: ; %entry +; GFX10DAGISEL-NEXT: s_endpgm ; -; GFX1164GISEL-LABEL: poison_value: -; GFX1164GISEL: ; %bb.0: ; %entry -; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3] -; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1164GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1164GISEL-NEXT: s_endpgm +; GFX10GISEL-LABEL: poison_value: +; GFX10GISEL: ; %bb.0: ; %entry +; GFX10GISEL-NEXT: s_endpgm ; -; GFX1132DAGISEL-LABEL: poison_value: -; GFX1132DAGISEL: ; %bb.0: ; %entry -; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) -; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 -; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] -; GFX1132DAGISEL-NEXT: s_endpgm +; GFX11DAGISEL-LABEL: poison_value: +; GFX11DAGISEL: ; %bb.0: ; %entry +; GFX11DAGISEL-NEXT: s_endpgm ; -; GFX1132GISEL-LABEL: poison_value: -; GFX1132GISEL: ; %bb.0: ; %entry -; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 -; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo -; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2 -; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX1132GISEL-NEXT: s_mul_i32 s2, s0, s2 -; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] -; GFX1132GISEL-NEXT: s_endpgm +; GFX11GISEL-LABEL: poison_value: +; GFX11GISEL: ; %bb.0: ; %entry +; GFX11GISEL-NEXT: s_endpgm entry: %result = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 poison, i32 1) store i32 %result, ptr addrspace(1) %out @@ -1431,33 +1254,21 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) { ; GFX8DAGISEL-LABEL: const_value_i64: ; GFX8DAGISEL: ; %bb.0: ; %entry ; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3] -; GFX8DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX8DAGISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s3 -; GFX8DAGISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX8DAGISEL-NEXT: s_add_u32 s3, s4, s3 +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 -; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s3 -; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3] +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 +; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX8DAGISEL-NEXT: s_endpgm ; ; GFX8GISEL-LABEL: const_value_i64: ; GFX8GISEL: ; %bb.0: ; %entry ; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3] -; GFX8GISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX8GISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s3 -; GFX8GISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX8GISEL-NEXT: s_add_u32 s3, s4, s3 -; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX8GISEL-NEXT: v_mov_b32_e32 v0, 0x7b +; GFX8GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1 -; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s3 ; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0 ; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX8GISEL-NEXT: s_endpgm @@ -1465,129 +1276,56 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) { ; GFX9DAGISEL-LABEL: const_value_i64: ; GFX9DAGISEL: ; %bb.0: ; %entry ; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3] -; GFX9DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX9DAGISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s3 -; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX9DAGISEL-NEXT: s_add_u32 s3, s4, s3 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s3 +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX9DAGISEL-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1] ; GFX9DAGISEL-NEXT: s_endpgm ; ; GFX9GISEL-LABEL: const_value_i64: ; GFX9GISEL: ; %bb.0: ; %entry ; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 -; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec -; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3] -; GFX9GISEL-NEXT: s_mul_i32 s2, s3, 0x7b -; GFX9GISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s3 -; GFX9GISEL-NEXT: s_mul_i32 s3, s3, 0 -; GFX9GISEL-NEXT: s_add_u32 s3, s4, s3 -; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/150395 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits