llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Shilei Tian (shiltian) <details> <summary>Changes</summary> This PR updates the SGPR layout to a striped caller/callee-saved design, similar to the VGPR layout. The stripe width is set to 8. Fixes #<!-- -->113782. --- Patch is 2.57 MiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/127353.diff 60 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td (+5-1) - (modified) llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll (+145-145) - (modified) llvm/test/CodeGen/AMDGPU/bf16.ll (+90-245) - (modified) llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll (+21-21) - (modified) llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll (+203-201) - (modified) llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll (+73-140) - (modified) llvm/test/CodeGen/AMDGPU/call-args-inreg-no-sgpr-for-csrspill-xfail.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/call-args-inreg.ll (+6-6) - (modified) llvm/test/CodeGen/AMDGPU/call-argument-types.ll (+1256-1256) - (modified) llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll (+20-14) - (modified) llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll (+788-1549) - (modified) llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir (+4-6) - (modified) llvm/test/CodeGen/AMDGPU/ds_read2.ll (+18-18) - (modified) llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll (+36-36) - (modified) llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir (+26-27) - (modified) llvm/test/CodeGen/AMDGPU/function-args-inreg.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/function-resource-usage.ll (+5-5) - (modified) llvm/test/CodeGen/AMDGPU/gfx-call-non-gfx-func.ll (+66-2) - (modified) llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll (+80-208) - (modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll (+1834-1834) - (modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll (+1554-1554) - (modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll (+1554-1554) - (modified) llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll (+1834-1834) - (modified) llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir (+64-62) - (modified) llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll (+55-91) - (modified) llvm/test/CodeGen/AMDGPU/indirect-call.ll (+492-748) - (modified) llvm/test/CodeGen/AMDGPU/issue48473.mir (+1-1) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll (+24-24) - (modified) llvm/test/CodeGen/AMDGPU/llvm.maximum.f32.ll (+6-39) - (modified) llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll (+18-63) - (modified) llvm/test/CodeGen/AMDGPU/llvm.minimum.f32.ll (+6-39) - (modified) llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll (+18-63) - (modified) llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll (+32-32) - (modified) llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll (+160-160) - (modified) llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.gfx10.ll (+68-774) - (modified) llvm/test/CodeGen/AMDGPU/materialize-frame-index-sgpr.ll (+416-1095) - (modified) llvm/test/CodeGen/AMDGPU/mcexpr-knownbits-assign-crash-gh-issue-110930.ll (+13-13) - (modified) llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir (+28-58) - (modified) llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-gfx9.mir (+17-39) - (modified) llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr.mir (+9-21) - (modified) llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll (+223-223) - (modified) llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir (+120-86) - (modified) llvm/test/CodeGen/AMDGPU/schedule-amdgpu-tracker-physreg.ll (+4-4) - (modified) llvm/test/CodeGen/AMDGPU/select.f16.ll (+2-13) - (modified) llvm/test/CodeGen/AMDGPU/sgpr-spill-update-only-slot-indexes.ll (+8-8) - (modified) llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll (+672-1568) - (modified) llvm/test/CodeGen/AMDGPU/sibling-call.ll (+120-120) - (modified) llvm/test/CodeGen/AMDGPU/snippet-copy-bundle-regression.mir (+38-17) - (modified) llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir (+11-27) - (modified) llvm/test/CodeGen/AMDGPU/spill-sgpr-used-for-exec-copy.mir (+3-8) - (modified) llvm/test/CodeGen/AMDGPU/spill_more_than_wavesize_csr_sgprs.ll (+132-264) - (modified) llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir (+107-93) - (modified) llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll (+11-11) - (modified) llvm/test/CodeGen/AMDGPU/stack-realign.ll (+7-13) - (modified) llvm/test/CodeGen/AMDGPU/tuple-allocation-failure.ll (+189-144) - (modified) llvm/test/CodeGen/AMDGPU/unallocatable-bundle-regression.mir (+11-11) - (modified) llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll (+106-106) - (modified) llvm/test/CodeGen/AMDGPU/use_restore_frame_reg.mir (+25-51) - (modified) llvm/test/CodeGen/AMDGPU/vgpr-large-tuple-alloc-error.ll (+112-240) - (modified) llvm/test/CodeGen/MIR/AMDGPU/spill-phys-vgprs.mir (+1-2) ``````````diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td index 80969fce3d77f..e3861a7d06c3d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td @@ -91,7 +91,11 @@ def CSR_AMDGPU_AGPRs : CalleeSavedRegs< >; def CSR_AMDGPU_SGPRs : CalleeSavedRegs< - (sequence "SGPR%u", 30, 105) + (add (sequence "SGPR%u", 30, 37), + (sequence "SGPR%u", 46, 53), + (sequence "SGPR%u", 62, 69), + (sequence "SGPR%u", 78, 85), + (sequence "SGPR%u", 94, 105)) >; def CSR_AMDGPU_SI_Gfx_SGPRs : CalleeSavedRegs< diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll index ab2363860af9d..905d0deacab35 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll @@ -125,35 +125,35 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) { ; CHECK-NEXT: v_writelane_b32 v43, s35, 3 ; CHECK-NEXT: v_writelane_b32 v43, s36, 4 ; CHECK-NEXT: v_writelane_b32 v43, s37, 5 -; CHECK-NEXT: v_writelane_b32 v43, s38, 6 -; CHECK-NEXT: v_writelane_b32 v43, s39, 7 +; CHECK-NEXT: v_writelane_b32 v43, s46, 6 +; CHECK-NEXT: v_writelane_b32 v43, s47, 7 ; CHECK-NEXT: s_addk_i32 s32, 0x800 -; CHECK-NEXT: v_writelane_b32 v43, s40, 8 -; CHECK-NEXT: v_writelane_b32 v43, s41, 9 -; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5] +; CHECK-NEXT: v_writelane_b32 v43, s48, 8 +; CHECK-NEXT: v_writelane_b32 v43, s49, 9 +; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5] ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d@gotpcrel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: v_writelane_b32 v43, s42, 10 +; CHECK-NEXT: v_writelane_b32 v43, s50, 10 ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill -; CHECK-NEXT: v_writelane_b32 v43, s43, 11 +; CHECK-NEXT: v_writelane_b32 v43, s51, 11 ; CHECK-NEXT: v_mov_b32_e32 v42, v1 -; CHECK-NEXT: v_writelane_b32 v43, s44, 12 +; CHECK-NEXT: v_writelane_b32 v43, s52, 12 ; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v42 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] -; CHECK-NEXT: v_writelane_b32 v43, s45, 13 +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] +; CHECK-NEXT: v_writelane_b32 v43, s53, 13 ; CHECK-NEXT: v_mov_b32_e32 v40, v31 ; CHECK-NEXT: v_mov_b32_e32 v41, v2 -; CHECK-NEXT: s_mov_b32 s42, s15 -; CHECK-NEXT: s_mov_b32 s43, s14 -; CHECK-NEXT: s_mov_b32 s44, s13 -; CHECK-NEXT: s_mov_b32 s45, s12 +; CHECK-NEXT: s_mov_b32 s50, s15 +; CHECK-NEXT: s_mov_b32 s51, s14 +; CHECK-NEXT: s_mov_b32 s52, s13 +; CHECK-NEXT: s_mov_b32 s53, s12 ; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11] ; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9] -; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7] +; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7] ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_cvt_f64_i32_e32 v[2:3], v41 @@ -161,15 +161,15 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) { ; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2d@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2d@gotpcrel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] ; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] -; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] +; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] ; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37] ; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] -; CHECK-NEXT: s_mov_b32 s12, s45 -; CHECK-NEXT: s_mov_b32 s13, s44 -; CHECK-NEXT: s_mov_b32 s14, s43 -; CHECK-NEXT: s_mov_b32 s15, s42 +; CHECK-NEXT: s_mov_b32 s12, s53 +; CHECK-NEXT: s_mov_b32 s13, s52 +; CHECK-NEXT: s_mov_b32 s14, s51 +; CHECK-NEXT: s_mov_b32 s15, s50 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -179,14 +179,14 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) { ; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; CHECK-NEXT: v_or_b32_e32 v1, v2, v1 -; CHECK-NEXT: v_readlane_b32 s45, v43, 13 -; CHECK-NEXT: v_readlane_b32 s44, v43, 12 -; CHECK-NEXT: v_readlane_b32 s43, v43, 11 -; CHECK-NEXT: v_readlane_b32 s42, v43, 10 -; CHECK-NEXT: v_readlane_b32 s41, v43, 9 -; CHECK-NEXT: v_readlane_b32 s40, v43, 8 -; CHECK-NEXT: v_readlane_b32 s39, v43, 7 -; CHECK-NEXT: v_readlane_b32 s38, v43, 6 +; CHECK-NEXT: v_readlane_b32 s53, v43, 13 +; CHECK-NEXT: v_readlane_b32 s52, v43, 12 +; CHECK-NEXT: v_readlane_b32 s51, v43, 11 +; CHECK-NEXT: v_readlane_b32 s50, v43, 10 +; CHECK-NEXT: v_readlane_b32 s49, v43, 9 +; CHECK-NEXT: v_readlane_b32 s48, v43, 8 +; CHECK-NEXT: v_readlane_b32 s47, v43, 7 +; CHECK-NEXT: v_readlane_b32 s46, v43, 6 ; CHECK-NEXT: v_readlane_b32 s37, v43, 5 ; CHECK-NEXT: v_readlane_b32 s36, v43, 4 ; CHECK-NEXT: v_readlane_b32 s35, v43, 3 @@ -266,34 +266,34 @@ define double @test_powr_fast_f64(double %x, double %y) { ; CHECK-NEXT: v_writelane_b32 v43, s35, 3 ; CHECK-NEXT: v_writelane_b32 v43, s36, 4 ; CHECK-NEXT: v_writelane_b32 v43, s37, 5 -; CHECK-NEXT: v_writelane_b32 v43, s38, 6 -; CHECK-NEXT: v_writelane_b32 v43, s39, 7 +; CHECK-NEXT: v_writelane_b32 v43, s46, 6 +; CHECK-NEXT: v_writelane_b32 v43, s47, 7 ; CHECK-NEXT: s_addk_i32 s32, 0x800 -; CHECK-NEXT: v_writelane_b32 v43, s40, 8 -; CHECK-NEXT: v_writelane_b32 v43, s41, 9 -; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5] +; CHECK-NEXT: v_writelane_b32 v43, s48, 8 +; CHECK-NEXT: v_writelane_b32 v43, s49, 9 +; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5] ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d@gotpcrel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: v_writelane_b32 v43, s42, 10 -; CHECK-NEXT: v_writelane_b32 v43, s43, 11 -; CHECK-NEXT: v_writelane_b32 v43, s44, 12 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] +; CHECK-NEXT: v_writelane_b32 v43, s50, 10 +; CHECK-NEXT: v_writelane_b32 v43, s51, 11 +; CHECK-NEXT: v_writelane_b32 v43, s52, 12 +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill -; CHECK-NEXT: v_writelane_b32 v43, s45, 13 +; CHECK-NEXT: v_writelane_b32 v43, s53, 13 ; CHECK-NEXT: v_mov_b32_e32 v42, v31 ; CHECK-NEXT: v_mov_b32_e32 v41, v3 ; CHECK-NEXT: v_mov_b32_e32 v40, v2 -; CHECK-NEXT: s_mov_b32 s42, s15 -; CHECK-NEXT: s_mov_b32 s43, s14 -; CHECK-NEXT: s_mov_b32 s44, s13 -; CHECK-NEXT: s_mov_b32 s45, s12 +; CHECK-NEXT: s_mov_b32 s50, s15 +; CHECK-NEXT: s_mov_b32 s51, s14 +; CHECK-NEXT: s_mov_b32 s52, s13 +; CHECK-NEXT: s_mov_b32 s53, s12 ; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11] ; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9] -; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7] +; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7] ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_mul_f64 v[0:1], v[40:41], v[0:1] @@ -301,28 +301,28 @@ define double @test_powr_fast_f64(double %x, double %y) { ; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2d@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2d@gotpcrel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] -; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] +; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] ; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37] ; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] -; CHECK-NEXT: s_mov_b32 s12, s45 -; CHECK-NEXT: s_mov_b32 s13, s44 -; CHECK-NEXT: s_mov_b32 s14, s43 -; CHECK-NEXT: s_mov_b32 s15, s42 +; CHECK-NEXT: s_mov_b32 s12, s53 +; CHECK-NEXT: s_mov_b32 s13, s52 +; CHECK-NEXT: s_mov_b32 s14, s51 +; CHECK-NEXT: s_mov_b32 s15, s50 ; CHECK-NEXT: v_mov_b32_e32 v31, v42 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; CHECK-NEXT: v_readlane_b32 s45, v43, 13 -; CHECK-NEXT: v_readlane_b32 s44, v43, 12 -; CHECK-NEXT: v_readlane_b32 s43, v43, 11 -; CHECK-NEXT: v_readlane_b32 s42, v43, 10 -; CHECK-NEXT: v_readlane_b32 s41, v43, 9 -; CHECK-NEXT: v_readlane_b32 s40, v43, 8 -; CHECK-NEXT: v_readlane_b32 s39, v43, 7 -; CHECK-NEXT: v_readlane_b32 s38, v43, 6 +; CHECK-NEXT: v_readlane_b32 s53, v43, 13 +; CHECK-NEXT: v_readlane_b32 s52, v43, 12 +; CHECK-NEXT: v_readlane_b32 s51, v43, 11 +; CHECK-NEXT: v_readlane_b32 s50, v43, 10 +; CHECK-NEXT: v_readlane_b32 s49, v43, 9 +; CHECK-NEXT: v_readlane_b32 s48, v43, 8 +; CHECK-NEXT: v_readlane_b32 s47, v43, 7 +; CHECK-NEXT: v_readlane_b32 s46, v43, 6 ; CHECK-NEXT: v_readlane_b32 s37, v43, 5 ; CHECK-NEXT: v_readlane_b32 s36, v43, 4 ; CHECK-NEXT: v_readlane_b32 s35, v43, 3 @@ -409,35 +409,35 @@ define double @test_pown_fast_f64(double %x, i32 %y) { ; CHECK-NEXT: v_writelane_b32 v43, s35, 3 ; CHECK-NEXT: v_writelane_b32 v43, s36, 4 ; CHECK-NEXT: v_writelane_b32 v43, s37, 5 -; CHECK-NEXT: v_writelane_b32 v43, s38, 6 -; CHECK-NEXT: v_writelane_b32 v43, s39, 7 +; CHECK-NEXT: v_writelane_b32 v43, s46, 6 +; CHECK-NEXT: v_writelane_b32 v43, s47, 7 ; CHECK-NEXT: s_addk_i32 s32, 0x800 -; CHECK-NEXT: v_writelane_b32 v43, s40, 8 -; CHECK-NEXT: v_writelane_b32 v43, s41, 9 -; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5] +; CHECK-NEXT: v_writelane_b32 v43, s48, 8 +; CHECK-NEXT: v_writelane_b32 v43, s49, 9 +; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5] ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d@gotpcrel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: v_writelane_b32 v43, s42, 10 +; CHECK-NEXT: v_writelane_b32 v43, s50, 10 ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill -; CHECK-NEXT: v_writelane_b32 v43, s43, 11 +; CHECK-NEXT: v_writelane_b32 v43, s51, 11 ; CHECK-NEXT: v_mov_b32_e32 v42, v1 -; CHECK-NEXT: v_writelane_b32 v43, s44, 12 +; CHECK-NEXT: v_writelane_b32 v43, s52, 12 ; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v42 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] -; CHECK-NEXT: v_writelane_b32 v43, s45, 13 +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] +; CHECK-NEXT: v_writelane_b32 v43, s53, 13 ; CHECK-NEXT: v_mov_b32_e32 v40, v31 ; CHECK-NEXT: v_mov_b32_e32 v41, v2 -; CHECK-NEXT: s_mov_b32 s42, s15 -; CHECK-NEXT: s_mov_b32 s43, s14 -; CHECK-NEXT: s_mov_b32 s44, s13 -; CHECK-NEXT: s_mov_b32 s45, s12 +; CHECK-NEXT: s_mov_b32 s50, s15 +; CHECK-NEXT: s_mov_b32 s51, s14 +; CHECK-NEXT: s_mov_b32 s52, s13 +; CHECK-NEXT: s_mov_b32 s53, s12 ; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11] ; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9] -; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7] +; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7] ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: v_cvt_f64_i32_e32 v[2:3], v41 @@ -445,15 +445,15 @@ define double @test_pown_fast_f64(double %x, i32 %y) { ; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2d@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2d@gotpcrel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] ; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] -; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] +; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] ; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37] ; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] -; CHECK-NEXT: s_mov_b32 s12, s45 -; CHECK-NEXT: s_mov_b32 s13, s44 -; CHECK-NEXT: s_mov_b32 s14, s43 -; CHECK-NEXT: s_mov_b32 s15, s42 +; CHECK-NEXT: s_mov_b32 s12, s53 +; CHECK-NEXT: s_mov_b32 s13, s52 +; CHECK-NEXT: s_mov_b32 s14, s51 +; CHECK-NEXT: s_mov_b32 s15, s50 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -463,14 +463,14 @@ define double @test_pown_fast_f64(double %x, i32 %y) { ; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; CHECK-NEXT: v_or_b32_e32 v1, v2, v1 -; CHECK-NEXT: v_readlane_b32 s45, v43, 13 -; CHECK-NEXT: v_readlane_b32 s44, v43, 12 -; CHECK-NEXT: v_readlane_b32 s43, v43, 11 -; CHECK-NEXT: v_readlane_b32 s42, v43, 10 -; CHECK-NEXT: v_readlane_b32 s41, v43, 9 -; CHECK-NEXT: v_readlane_b32 s40, v43, 8 -; CHECK-NEXT: v_readlane_b32 s39, v43, 7 -; CHECK-NEXT: v_readlane_b32 s38, v43, 6 +; CHECK-NEXT: v_readlane_b32 s53, v43, 13 +; CHECK-NEXT: v_readlane_b32 s52, v43, 12 +; CHECK-NEXT: v_readlane_b32 s51, v43, 11 +; CHECK-NEXT: v_readlane_b32 s50, v43, 10 +; CHECK-NEXT: v_readlane_b32 s49, v43, 9 +; CHECK-NEXT: v_readlane_b32 s48, v43, 8 +; CHECK-NEXT: v_readlane_b32 s47, v43, 7 +; CHECK-NEXT: v_readlane_b32 s46, v43, 6 ; CHECK-NEXT: v_readlane_b32 s37, v43, 5 ; CHECK-NEXT: v_readlane_b32 s36, v43, 4 ; CHECK-NEXT: v_readlane_b32 s35, v43, 3 @@ -552,32 +552,32 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) { ; CHECK-NEXT: v_writelane_b32 v42, s35, 3 ; CHECK-NEXT: v_writelane_b32 v42, s36, 4 ; CHECK-NEXT: v_writelane_b32 v42, s37, 5 -; CHECK-NEXT: v_writelane_b32 v42, s38, 6 -; CHECK-NEXT: v_writelane_b32 v42, s39, 7 +; CHECK-NEXT: v_writelane_b32 v42, s46, 6 +; CHECK-NEXT: v_writelane_b32 v42, s47, 7 ; CHECK-NEXT: s_addk_i32 s32, 0x400 -; CHECK-NEXT: v_writelane_b32 v42, s40, 8 -; CHECK-NEXT: v_writelane_b32 v42, s41, 9 -; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5] +; CHECK-NEXT: v_writelane_b32 v42, s48, 8 +; CHECK-NEXT: v_writelane_b32 v42, s49, 9 +; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5] ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d@gotpcrel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: v_writelane_b32 v42, s42, 10 -; CHECK-NEXT: v_writelane_b32 v42, s43, 11 -; CHECK-NEXT: v_writelane_b32 v42, s44, 12 +; CHECK-NEXT: v_writelane_b32 v42, s50, 10 +; CHECK-NEXT: v_writelane_b32 v42, s51, 11 +; CHECK-NEXT: v_writelane_b32 v42, s52, 12 ; CHECK-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill -; CHECK-NEXT: v_writelane_b32 v42, s45, 13 +; CHECK-NEXT: v_writelane_b32 v42, s53, 13 ; CHECK-NEXT: v_mov_b32_e32 v40, v31 -; CHECK-NEXT: s_mov_b32 s42, s15 -; CHECK-NEXT: s_mov_b32 s43, s14 -; CHECK-NEXT: s_mov_b32 s44, s13 -; CHECK-NEXT: s_mov_b32 s45, s12 +; CHECK-NEXT: s_mov_b32 s50, s15 +; CHECK-NEXT: s_mov_b32 s51, s14 +; CHECK-NEXT: s_mov_b32 s52, s13 +; CHECK-NEXT: s_mov_b32 s53, s12 ; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11] ; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9] -; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7] +; CHECK-NEXT: s_mov_b64 s[46:47], s[6:7] ; CHECK-NEXT: v_lshlrev_b32_e32 v41, 1, v2 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] @@ -586,28 +586,28 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) { ; CHECK-NEXT: s_add_u32 s4, s4, _Z4exp2d@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2d@gotpcrel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41] +; CHECK-NEXT: s_mov_b64 s[4:5], s[48:49] ; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] -; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39] +; CHECK-NEXT: s_mov_b64 s[6:7], s[46:47] ; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37] ; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35] -; CHECK-NEXT: s_mov_b32 s12, s45 -; CHECK-NEXT: s_mov_b32 s13, s44 -; CHECK-NEXT: s_mov_b32 s14, s43 -; CHECK-NEXT: s_mov_b32 s15, s42 +; CHECK-NEXT: s_mov_b32 s12, s53 +; CHECK-NEXT: s_mov_b32 s13, s52 +; CHECK-NEXT: s_mov_b32 s14, s51 +; CHECK-NEXT: s_mov_b32 s15, s50 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; CHECK-NEXT: v_readlane_b32 s45, v42, 13 -; CHECK-NEXT: v_readlane_b32 s44, v42, 12 -; CHECK-NEXT: v_readlane_b32 s43, v42, 11 -; CHECK-NEXT: v_readlane_b32 s42, v42, 10 -; CHECK-NEXT: v_readlane_b32 s41, v42, 9 -; CHECK-NEXT: v_readlane_b32 s40, v42, 8 -; CHECK-NEXT: v_readlane_b32 s39, v42, 7 -; CHECK-NEXT: v_readlane_b32 s38, v42, 6 +; CHECK-NEXT: v_readlane_b32 s53, v42, 13 +; CHECK-NEXT: v_readlane_b32 s52, v42, 12 +; CHECK-NEXT: v_readlane_b32 s51, v42, 11 +; CHECK-NEXT: v_readlane_b32 s50, v42, 10 +; CHECK-NEXT: v_readlane_b32 s49, v42, 9 +; CHECK-NEXT: v_readlane_b32 s48, v42, 8 +; CHECK-NEXT: v_readlane_b32 s47, v42, 7 +; CHECK-NEXT: v_readlane_b32 s46, v42, 6 ; CHECK-NEXT: v_readlane_b32 s37, v42, 5 ; CHECK-NEXT: v_readlane_b32 s36, v42, 4 ; CHECK-NEXT: v_readlane_b32 s35, v42, 3 @@ -694,34 +694,34 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) { ; CHECK-NEXT: v_writelane_b32 v43, s35, 3 ; CHECK-NEXT: v_writelane_b32 v43, s36, 4 ; CHECK-NEXT: v_writelane_b32 v43, s37, 5 -; CHECK-NEXT: v_writelane_b32 v43, s38, 6 -; CHECK-NEXT: v_writelane_b32 v43, s39, 7 +; CHECK-NEXT: v_writelane_b32 v43, s46, 6 +; CHECK-NEXT: v_writelane_b32 v43, s47, 7 ; CHECK-NEXT: s_addk_i32 s32, 0x800 -; CHECK-NEXT: v_writelane_b32 v43, s40, 8 -; CHECK-NEXT: v_writelane_b32 v43, s41, 9 -; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5] +; CHECK-NEXT: v_writelane_b32 v43, s48, 8 +; CHECK-NEXT: v_writelane_b32 v43, s49, 9 +; CHECK-NEXT: s_mov_b64 s[48:49], s[4:5] ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, _Z4log2d@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, _Z4log2d@gotpcrel32@hi+12 ; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0 -; CHECK-NEXT: v_writelane_b32 v43, s42, 10 +; CHECK-NEXT: v_writelane_b32 v43, s50, 10 ; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill -; CHECK-NEXT: v_writelane_b32 v43, s43, 11 +; CHECK-NEXT: v_writelane_b32 v43, s51, 11 ; CHECK-NEXT: v_mov_b32_e32 v41, v1 -; CHECK-NEXT: ... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/127353 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits