llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Petar Avramovic (petar-avramovic) <details> <summary>Changes</summary> --- Patch is 28.05 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/145885.diff 2 Files Affected: - (added) llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.ll (+166) - (added) llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.mir (+393) ``````````diff diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.ll new file mode 100644 index 0000000000000..51b473f2d8994 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.ll @@ -0,0 +1,166 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -new-reg-bank-select < %s | FileCheck %s + +define amdgpu_ps void @readanylane_to_virtual_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) { +; CHECK-LABEL: readanylane_to_virtual_vgpr: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: global_load_dword v1, v0, s[0:1] glc dlc +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dword v0, v1, s[2:3] +; CHECK-NEXT: s_endpgm + %load = load volatile float, ptr addrspace(1) %ptr0 + store float %load, ptr addrspace(1) %ptr1 + ret void +} + +define amdgpu_ps float @readanylane_to_physical_vgpr(ptr addrspace(1) inreg %ptr) { +; CHECK-LABEL: readanylane_to_physical_vgpr: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: global_load_dword v0, v0, s[0:1] glc dlc +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-NEXT: ; return to shader part epilog + %load = load volatile float, ptr addrspace(1) %ptr + ret float %load +} + +define amdgpu_ps void @readanylane_to_bitcast_to_virtual_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) { +; CHECK-LABEL: readanylane_to_bitcast_to_virtual_vgpr: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: global_load_dword v1, v0, s[0:1] glc dlc +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_readfirstlane_b32 s0, v1 +; CHECK-NEXT: v_mov_b32_e32 v1, s0 +; CHECK-NEXT: global_store_dword v0, v1, s[2:3] +; CHECK-NEXT: s_endpgm + %load = load volatile <2 x i16>, ptr addrspace(1) %ptr0 + %bitcast = bitcast <2 x i16> %load to i32 + store i32 %bitcast, ptr addrspace(1) %ptr1 + ret void +} + +define amdgpu_ps float @readanylane_to_bitcast_to_physical_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) { +; CHECK-LABEL: readanylane_to_bitcast_to_physical_vgpr: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: global_load_dword v0, v0, s[0:1] glc dlc +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-NEXT: ; return to shader part epilog + %load = load volatile <2 x i16>, ptr addrspace(1) %ptr0 + %bitcast = bitcast <2 x i16> %load to float + ret float %bitcast +} + +define amdgpu_ps void @unmerge_readanylane_merge_to_virtual_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) { +; CHECK-LABEL: unmerge_readanylane_merge_to_virtual_vgpr: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_mov_b32_e32 v2, 0 +; CHECK-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] glc dlc +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: v_readfirstlane_b32 s1, v1 +; CHECK-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-NEXT: v_mov_b32_e32 v1, s1 +; CHECK-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] +; CHECK-NEXT: s_endpgm + %load = load volatile i64, ptr addrspace(1) %ptr0 + store i64 %load, ptr addrspace(1) %ptr1 + ret void +} + +;define amdgpu_ps double @unmerge_readanylane_merge_to_physical_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) { +; %load = load volatile double, ptr addrspace(1) %ptr0 +; ret double %load +;} + +define amdgpu_ps void @unmerge_readanylane_merge_bitcast_to_virtual_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) { +; CHECK-LABEL: unmerge_readanylane_merge_bitcast_to_virtual_vgpr: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_mov_b32_e32 v2, 0 +; CHECK-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] glc dlc +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: v_readfirstlane_b32 s1, v1 +; CHECK-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-NEXT: v_mov_b32_e32 v1, s1 +; CHECK-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] +; CHECK-NEXT: s_endpgm + %load = load volatile <2 x i32>, ptr addrspace(1) %ptr0 + %bitcast = bitcast <2 x i32> %load to double + store double %bitcast, ptr addrspace(1) %ptr1 + ret void +} + +;define amdgpu_ps double @unmerge_readanylane_merge_bitcast_to_physical_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) { +; %load = load volatile <2 x i32>, ptr addrspace(1) %ptr0 +; %bitcast = bitcast <2 x i32> %load to double +; ret double %bitcast +;} + +define amdgpu_ps void @unmerge_readanylane_merge_extract_to_virtual_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) { +; CHECK-LABEL: unmerge_readanylane_merge_extract_to_virtual_vgpr: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_mov_b32_e32 v2, 0 +; CHECK-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] glc dlc +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_readfirstlane_b32 s0, v1 +; CHECK-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-NEXT: global_store_dword v2, v0, s[2:3] +; CHECK-NEXT: s_endpgm + %load = load volatile <2 x i32>, ptr addrspace(1) %ptr0 + %extracted = extractelement <2 x i32> %load, i32 1 + store i32 %extracted, ptr addrspace(1) %ptr1 + ret void +} + +define amdgpu_ps float @unmerge_readanylane_merge_extract_to_physical_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) { +; CHECK-LABEL: unmerge_readanylane_merge_extract_to_physical_vgpr: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc dlc +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_readfirstlane_b32 s0, v1 +; CHECK-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-NEXT: ; return to shader part epilog + %load = load volatile <2 x float>, ptr addrspace(1) %ptr0 + %extracted = extractelement <2 x float> %load, i32 1 + ret float %extracted +} + +define amdgpu_ps void @unmerge_readanylane_merge_extract_bitcast_to_virtual_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) { +; CHECK-LABEL: unmerge_readanylane_merge_extract_bitcast_to_virtual_vgpr: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_mov_b32_e32 v2, 0 +; CHECK-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] glc dlc +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-NEXT: global_store_dword v2, v0, s[2:3] +; CHECK-NEXT: s_endpgm + %load = load volatile <4 x i16>, ptr addrspace(1) %ptr0 + %extracted = shufflevector <4 x i16> %load, <4 x i16> %load, <2 x i32> <i32 0, i32 1> + %bitcast = bitcast <2 x i16> %extracted to float + store float %bitcast, ptr addrspace(1) %ptr1 + ret void +} + +define amdgpu_ps float @unmerge_readanylane_merge_extract_bitcast_to_physical_vgpr(ptr addrspace(1) inreg %ptr0, ptr addrspace(1) inreg %ptr1) { +; CHECK-LABEL: unmerge_readanylane_merge_extract_bitcast_to_physical_vgpr: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc dlc +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-NEXT: ; return to shader part epilog + %load = load volatile <4 x i16>, ptr addrspace(1) %ptr0 + %extracted = shufflevector <4 x i16> %load, <4 x i16> %load, <2 x i32> <i32 0, i32 1> + %bitcast = bitcast <2 x i16> %extracted to float + ret float %bitcast +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.mir new file mode 100644 index 0000000000000..673cf1696e5e0 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.mir @@ -0,0 +1,393 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-regbanklegalize %s -verify-machineinstrs -o - | FileCheck %s + +--- +name: readanylane_to_virtual_vgpr +legalized: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 + + ; CHECK-LABEL: name: readanylane_to_virtual_vgpr + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[MV]](p1) :: (volatile "amdgpu-noclobber" load (s32), addrspace 1) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(p1) = COPY [[MV1]](p1) + ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[COPY4]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(p1) = G_MERGE_VALUES %0(s32), %1(s32) + %3:sgpr(s32) = COPY $sgpr2 + %4:sgpr(s32) = COPY $sgpr3 + %5:sgpr(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:sgpr(s32) = G_LOAD %2(p1) :: (volatile "amdgpu-noclobber" load (s32), addrspace 1) + G_STORE %6(s32), %5(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: readanylane_to_physical_vgpr +legalized: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; CHECK-LABEL: name: readanylane_to_physical_vgpr + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[MV]](p1) :: (volatile "amdgpu-noclobber" load (s32), addrspace 1) + ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[LOAD]] + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_READANYLANE]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(p1) = G_MERGE_VALUES %0(s32), %1(s32) + %3:sgpr(s32) = G_LOAD %2(p1) :: (volatile "amdgpu-noclobber" load (s32), addrspace 1) + $vgpr0 = COPY %3(s32) + SI_RETURN_TO_EPILOG implicit $vgpr0 +... + +--- +name: readanylane_to_bitcast_to_virtual_vgpr +legalized: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 + + ; CHECK-LABEL: name: readanylane_to_bitcast_to_virtual_vgpr + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s16>) = G_LOAD [[MV]](p1) :: (volatile "amdgpu-noclobber" load (<2 x s16>), addrspace 1) + ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(<2 x s16>) = G_AMDGPU_READANYLANE [[LOAD]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[AMDGPU_READANYLANE]](<2 x s16>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[BITCAST]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(p1) = COPY [[MV1]](p1) + ; CHECK-NEXT: G_STORE [[COPY4]](s32), [[COPY5]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(p1) = G_MERGE_VALUES %0(s32), %1(s32) + %3:sgpr(s32) = COPY $sgpr2 + %4:sgpr(s32) = COPY $sgpr3 + %5:sgpr(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:sgpr(<2 x s16>) = G_LOAD %2(p1) :: (volatile "amdgpu-noclobber" load (<2 x s16>), addrspace 1) + %7:sgpr(s32) = G_BITCAST %6(<2 x s16>) + G_STORE %7(s32), %5(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: readanylane_to_bitcast_to_physical_vgpr +legalized: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 + + ; CHECK-LABEL: name: readanylane_to_bitcast_to_physical_vgpr + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s16>) = G_LOAD [[MV]](p1) :: (volatile "amdgpu-noclobber" load (<2 x s16>), addrspace 1) + ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(<2 x s16>) = G_AMDGPU_READANYLANE [[LOAD]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[AMDGPU_READANYLANE]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(p1) = G_MERGE_VALUES %0(s32), %1(s32) + %3:sgpr(<2 x s16>) = G_LOAD %2(p1) :: (volatile "amdgpu-noclobber" load (<2 x s16>), addrspace 1) + %4:sgpr(s32) = G_BITCAST %3(<2 x s16>) + $vgpr0 = COPY %4(s32) + SI_RETURN_TO_EPILOG implicit $vgpr0 +... + +--- +name: unmerge_readanylane_merge_to_virtual_vgpr +legalized: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 + + ; CHECK-LABEL: name: unmerge_readanylane_merge_to_virtual_vgpr + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s64) = G_LOAD [[MV]](p1) :: (volatile "amdgpu-noclobber" load (s64), addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LOAD]](s64) + ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV]] + ; CHECK-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV1]] + ; CHECK-NEXT: [[MV2:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[AMDGPU_READANYLANE]](s32), [[AMDGPU_READANYLANE1]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[MV2]](s64) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(p1) = COPY [[MV1]](p1) + ; CHECK-NEXT: G_STORE [[COPY4]](s64), [[COPY5]](p1) :: (store (s64), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(p1) = G_MERGE_VALUES %0(s32), %1(s32) + %3:sgpr(s32) = COPY $sgpr2 + %4:sgpr(s32) = COPY $sgpr3 + %5:sgpr(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:sgpr(s64) = G_LOAD %2(p1) :: (volatile "amdgpu-noclobber" load (s64), addrspace 1) + G_STORE %6(s64), %5(p1) :: (store (s64), addrspace 1) + S_ENDPGM 0 +... + +--- +name: unmerge_readanylane_merge_to_physical_vgpr +legalized: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 + + ; CHECK-LABEL: name: unmerge_readanylane_merge_to_physical_vgpr + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s64) = G_LOAD [[MV]](p1) :: (volatile "amdgpu-noclobber" load (s64), addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LOAD]](s64) + ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV]] + ; CHECK-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV1]] + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[AMDGPU_READANYLANE]](s32), [[AMDGPU_READANYLANE1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV1]](s64) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(p1) = G_MERGE_VALUES %0(s32), %1(s32) + %3:sgpr(s64) = G_LOAD %2(p1) :: (volatile "amdgpu-noclobber" load (s64), addrspace 1) + $vgpr0_vgpr1 = COPY %3(s64) + SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 +... + +--- +name: unmerge_readanylane_merge_bitcast_to_virtual_vgpr +legalized: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 + + ; CHECK-LABEL: name: unmerge_readanylane_merge_bitcast_to_virtual_vgpr + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_LOAD [[MV]](p1) :: (volatile "amdgpu-noclobber" load (<2 x s32>), addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV]] + ; CHECK-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV1]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<2 x s32>) = G_BUILD_VECTOR [[AMDGPU_READANYLANE]](s32), [[AMDGPU_READANYLANE1]](s32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s64) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[BITCAST]](s64) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(p1) = COPY [[MV1]](p1) + ; CHECK-NEXT: G_STORE [[COPY4]](s64), [[COPY5]](p1) :: (store (s64), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(p1) = G_MERGE_VALUES %0(s32), %1(s32) + %3:sgpr(s32) = COPY $sgpr2 + %4:sgpr(s32) = COPY $sgpr3 + %5:sgpr(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:sgpr(<2 x s32>) = G_LOAD %2(p1) :: (volatile "amdgpu-noclobber" load (<2 x s32>), addrspace 1) + %7:sgpr(s64) = G_BITCAST %6(<2 x s32>) + G_STORE %7(s64), %5(p1) :: (store (s64), addrspace 1) + S_ENDPGM 0 +... + +--- +name: unmerge_readanylane_merge_bitcast_to_physical_vgpr +legalized: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 + + ; CHECK-LABEL: name: unmerge_readanylane_merge_bitcast_to_physical_vgpr + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_LOAD [[MV]](p1) :: (volatile "amdgpu-noclobber" load (<2 x s32>), addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV]] + ; CHECK-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV1]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<2 x s32>) = G_BUILD_VECTOR [[AMDGPU_READANYLANE]](s32), [[AMDGPU_READANYLANE1]](s32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s64) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](s64) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(p1) = G_MERGE_VALUES %0(s32), %1... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/145885 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits