https://github.com/petar-avramovic updated https://github.com/llvm/llvm-project/pull/145911
>From 046418f7ccd46a2b0c2ea3c9ab15e659de709b27 Mon Sep 17 00:00:00 2001 From: Petar Avramovic <petar.avramo...@amd.com> Date: Thu, 5 Jun 2025 12:17:13 +0200 Subject: [PATCH] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize --- .../Target/AMDGPU/AMDGPURegBankLegalize.cpp | 317 ++++++++++++------ .../AMDGPU/GlobalISel/readanylane-combines.ll | 25 +- .../GlobalISel/readanylane-combines.mir | 78 ++--- .../GlobalISel/regbankselect-and-s1.mir | 6 + .../GlobalISel/regbankselect-anyext.mir | 4 + .../AMDGPU/GlobalISel/regbankselect-trunc.mir | 2 + 6 files changed, 246 insertions(+), 186 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp index ba661348ca5b5..e1879598f098a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp @@ -23,6 +23,8 @@ #include "GCNSubtarget.h" #include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineUniformityAnalysis.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -115,126 +117,233 @@ class AMDGPURegBankLegalizeCombiner { VgprRB(&RBI.getRegBank(AMDGPU::VGPRRegBankID)), VccRB(&RBI.getRegBank(AMDGPU::VCCRegBankID)) {}; - bool isLaneMask(Register Reg) { - const RegisterBank *RB = MRI.getRegBankOrNull(Reg); - if (RB && RB->getID() == AMDGPU::VCCRegBankID) - return true; + bool isLaneMask(Register Reg); + std::pair<MachineInstr *, Register> tryMatch(Register Src, unsigned Opcode); + std::pair<GUnmerge *, int> tryMatchRALFromUnmerge(Register Src); + Register getReadAnyLaneSrc(Register Src); + void replaceRegWithOrBuildCopy(Register Dst, Register Src); - const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg); - return RC && TRI.isSGPRClass(RC) && MRI.getType(Reg) == LLT::scalar(1); - } + bool tryEliminateReadAnyLane(MachineInstr &Copy); + void tryCombineCopy(MachineInstr &MI); + void tryCombineS1AnyExt(MachineInstr &MI); +}; - void cleanUpAfterCombine(MachineInstr &MI, MachineInstr *Optional0) { - MI.eraseFromParent(); - if (Optional0 && isTriviallyDead(*Optional0, MRI)) - Optional0->eraseFromParent(); - } +bool AMDGPURegBankLegalizeCombiner::isLaneMask(Register Reg) { + const RegisterBank *RB = MRI.getRegBankOrNull(Reg); + if (RB && RB->getID() == AMDGPU::VCCRegBankID) + return true; - std::pair<MachineInstr *, Register> tryMatch(Register Src, unsigned Opcode) { - MachineInstr *MatchMI = MRI.getVRegDef(Src); - if (MatchMI->getOpcode() != Opcode) - return {nullptr, Register()}; - return {MatchMI, MatchMI->getOperand(1).getReg()}; - } + const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg); + return RC && TRI.isSGPRClass(RC) && MRI.getType(Reg) == LLT::scalar(1); +} - void tryCombineCopy(MachineInstr &MI) { - Register Dst = MI.getOperand(0).getReg(); - Register Src = MI.getOperand(1).getReg(); - // Skip copies of physical registers. - if (!Dst.isVirtual() || !Src.isVirtual()) - return; - - // This is a cross bank copy, sgpr S1 to lane mask. - // - // %Src:sgpr(s1) = G_TRUNC %TruncS32Src:sgpr(s32) - // %Dst:lane-mask(s1) = COPY %Src:sgpr(s1) - // -> - // %Dst:lane-mask(s1) = G_AMDGPU_COPY_VCC_SCC %TruncS32Src:sgpr(s32) - if (isLaneMask(Dst) && MRI.getRegBankOrNull(Src) == SgprRB) { - auto [Trunc, TruncS32Src] = tryMatch(Src, AMDGPU::G_TRUNC); - assert(Trunc && MRI.getType(TruncS32Src) == S32 && - "sgpr S1 must be result of G_TRUNC of sgpr S32"); - - B.setInstr(MI); - // Ensure that truncated bits in BoolSrc are 0. - auto One = B.buildConstant({SgprRB, S32}, 1); - auto BoolSrc = B.buildAnd({SgprRB, S32}, TruncS32Src, One); - B.buildInstr(AMDGPU::G_AMDGPU_COPY_VCC_SCC, {Dst}, {BoolSrc}); - cleanUpAfterCombine(MI, Trunc); - return; - } +std::pair<MachineInstr *, Register> +AMDGPURegBankLegalizeCombiner::tryMatch(Register Src, unsigned Opcode) { + MachineInstr *MatchMI = MRI.getVRegDef(Src); + if (MatchMI->getOpcode() != Opcode) + return {nullptr, Register()}; + return {MatchMI, MatchMI->getOperand(1).getReg()}; +} + +std::pair<GUnmerge *, int> +AMDGPURegBankLegalizeCombiner::tryMatchRALFromUnmerge(Register Src) { + MachineInstr *ReadAnyLane = MRI.getVRegDef(Src); + if (ReadAnyLane->getOpcode() != AMDGPU::G_AMDGPU_READANYLANE) + return {nullptr, -1}; + + Register RALSrc = ReadAnyLane->getOperand(1).getReg(); + if (auto *UnMerge = getOpcodeDef<GUnmerge>(RALSrc, MRI)) + return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc, nullptr)}; - // Src = G_AMDGPU_READANYLANE RALSrc - // Dst = COPY Src - // -> - // Dst = RALSrc - if (MRI.getRegBankOrNull(Dst) == VgprRB && - MRI.getRegBankOrNull(Src) == SgprRB) { - auto [RAL, RALSrc] = tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE); - if (!RAL) - return; - - assert(MRI.getRegBank(RALSrc) == VgprRB); - MRI.replaceRegWith(Dst, RALSrc); - cleanUpAfterCombine(MI, RAL); - return; + return {nullptr, -1}; +} + +Register AMDGPURegBankLegalizeCombiner::getReadAnyLaneSrc(Register Src) { + // Src = G_AMDGPU_READANYLANE RALSrc + auto [RAL, RALSrc] = tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE); + if (RAL) + return RALSrc; + + // LoVgpr, HiVgpr = G_UNMERGE_VALUES UnmergeSrc + // LoSgpr = G_AMDGPU_READANYLANE LoVgpr + // HiSgpr = G_AMDGPU_READANYLANE HiVgpr + // Src G_MERGE_VALUES LoSgpr, HiSgpr + auto *Merge = getOpcodeDef<GMergeLikeInstr>(Src, MRI); + if (Merge) { + unsigned NumElts = Merge->getNumSources(); + auto [Unmerge, Idx] = tryMatchRALFromUnmerge(Merge->getSourceReg(0)); + if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0) + return {}; + + // Check if all elements are from same unmerge and there is no shuffling. + for (unsigned i = 1; i < NumElts; ++i) { + auto [UnmergeI, IdxI] = tryMatchRALFromUnmerge(Merge->getSourceReg(i)); + if (UnmergeI != Unmerge || (unsigned)IdxI != i) + return {}; } + return Unmerge->getSourceReg(); } - void tryCombineS1AnyExt(MachineInstr &MI) { - // %Src:sgpr(S1) = G_TRUNC %TruncSrc - // %Dst = G_ANYEXT %Src:sgpr(S1) - // -> - // %Dst = G_... %TruncSrc - Register Dst = MI.getOperand(0).getReg(); - Register Src = MI.getOperand(1).getReg(); - if (MRI.getType(Src) != S1) - return; - - auto [Trunc, TruncSrc] = tryMatch(Src, AMDGPU::G_TRUNC); - if (!Trunc) - return; - - LLT DstTy = MRI.getType(Dst); - LLT TruncSrcTy = MRI.getType(TruncSrc); - - if (DstTy == TruncSrcTy) { - MRI.replaceRegWith(Dst, TruncSrc); - cleanUpAfterCombine(MI, Trunc); - return; - } + // SrcRegIdx = G_AMDGPU_READANYLANE RALElSrc + // SourceReg G_MERGE_VALUES ..., SrcRegIdx, ... + // ..., Src, ... = G_UNMERGE_VALUES SourceReg + auto *UnMerge = getOpcodeDef<GUnmerge>(Src, MRI); + if (!UnMerge) + return {}; + + int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr); + Merge = getOpcodeDef<GMergeLikeInstr>(UnMerge->getSourceReg(), MRI); + if (!Merge || UnMerge->getNumDefs() != Merge->getNumSources()) + return {}; + + Register SrcRegIdx = Merge->getSourceReg(Idx); + if (MRI.getType(Src) != MRI.getType(SrcRegIdx)) + return {}; + + auto [RALEl, RALElSrc] = tryMatch(SrcRegIdx, AMDGPU::G_AMDGPU_READANYLANE); + if (RALEl) + return RALElSrc; + + return {}; +} + +void AMDGPURegBankLegalizeCombiner::replaceRegWithOrBuildCopy(Register Dst, + Register Src) { + if (Dst.isVirtual()) + MRI.replaceRegWith(Dst, Src); + else + B.buildCopy(Dst, Src); +} + +bool AMDGPURegBankLegalizeCombiner::tryEliminateReadAnyLane( + MachineInstr &Copy) { + Register Dst = Copy.getOperand(0).getReg(); + Register Src = Copy.getOperand(1).getReg(); + + // Skip non-vgpr Dst + if (Dst.isVirtual() ? (MRI.getRegBankOrNull(Dst) != VgprRB) + : !TRI.isVGPR(MRI, Dst)) + return false; + + // Skip physical source registers and source registers with register class + if (!Src.isVirtual() || MRI.getRegClassOrNull(Src)) + return false; + + Register RALDst = Src; + MachineInstr &SrcMI = *MRI.getVRegDef(Src); + if (SrcMI.getOpcode() == AMDGPU::G_BITCAST) + RALDst = SrcMI.getOperand(1).getReg(); + + Register RALSrc = getReadAnyLaneSrc(RALDst); + if (!RALSrc) + return false; + + B.setInstr(Copy); + if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) { + // Src = READANYLANE RALSrc Src = READANYLANE RALSrc + // Dst = Copy Src $Dst = Copy Src + // -> -> + // Dst = RALSrc $Dst = Copy RALSrc + replaceRegWithOrBuildCopy(Dst, RALSrc); + } else { + // RALDst = READANYLANE RALSrc RALDst = READANYLANE RALSrc + // Src = G_BITCAST RALDst Src = G_BITCAST RALDst + // Dst = Copy Src Dst = Copy Src + // -> -> + // NewVgpr = G_BITCAST RALDst NewVgpr = G_BITCAST RALDst + // Dst = NewVgpr $Dst = Copy NewVgpr + auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc); + replaceRegWithOrBuildCopy(Dst, Bitcast.getReg(0)); + } + + eraseInstr(Copy, MRI); + return true; +} + +void AMDGPURegBankLegalizeCombiner::tryCombineCopy(MachineInstr &MI) { + if (tryEliminateReadAnyLane(MI)) + return; + + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + // Skip copies of physical registers. + if (!Dst.isVirtual() || !Src.isVirtual()) + return; + + // This is a cross bank copy, sgpr S1 to lane mask. + // + // %Src:sgpr(s1) = G_TRUNC %TruncS32Src:sgpr(s32) + // %Dst:lane-mask(s1) = COPY %Src:sgpr(s1) + // -> + // %BoolSrc:sgpr(s32) = G_AND %TruncS32Src:sgpr(s32), 1 + // %Dst:lane-mask(s1) = G_AMDGPU_COPY_VCC_SCC %BoolSrc:sgpr(s32) + if (isLaneMask(Dst) && MRI.getRegBankOrNull(Src) == SgprRB) { + auto [Trunc, TruncS32Src] = tryMatch(Src, AMDGPU::G_TRUNC); + assert(Trunc && MRI.getType(TruncS32Src) == S32 && + "sgpr S1 must be result of G_TRUNC of sgpr S32"); B.setInstr(MI); + // Ensure that truncated bits in BoolSrc are 0. + auto One = B.buildConstant({SgprRB, S32}, 1); + auto BoolSrc = B.buildAnd({SgprRB, S32}, TruncS32Src, One); + B.buildInstr(AMDGPU::G_AMDGPU_COPY_VCC_SCC, {Dst}, {BoolSrc}); + eraseInstr(MI, MRI); + } +} - if (DstTy == S32 && TruncSrcTy == S64) { - auto Unmerge = B.buildUnmerge({SgprRB, S32}, TruncSrc); - MRI.replaceRegWith(Dst, Unmerge.getReg(0)); - cleanUpAfterCombine(MI, Trunc); - return; - } +void AMDGPURegBankLegalizeCombiner::tryCombineS1AnyExt(MachineInstr &MI) { + // %Src:sgpr(S1) = G_TRUNC %TruncSrc + // %Dst = G_ANYEXT %Src:sgpr(S1) + // -> + // %Dst = G_... %TruncSrc + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + if (MRI.getType(Src) != S1) + return; + + auto [Trunc, TruncSrc] = tryMatch(Src, AMDGPU::G_TRUNC); + if (!Trunc) + return; + + LLT DstTy = MRI.getType(Dst); + LLT TruncSrcTy = MRI.getType(TruncSrc); + + if (DstTy == TruncSrcTy) { + MRI.replaceRegWith(Dst, TruncSrc); + eraseInstr(MI, MRI); + return; + } - if (DstTy == S64 && TruncSrcTy == S32) { - B.buildMergeLikeInstr(MI.getOperand(0).getReg(), - {TruncSrc, B.buildUndef({SgprRB, S32})}); - cleanUpAfterCombine(MI, Trunc); - return; - } + B.setInstr(MI); - if (DstTy == S32 && TruncSrcTy == S16) { - B.buildAnyExt(Dst, TruncSrc); - cleanUpAfterCombine(MI, Trunc); - return; - } + if (DstTy == S32 && TruncSrcTy == S64) { + auto Unmerge = B.buildUnmerge({SgprRB, S32}, TruncSrc); + MRI.replaceRegWith(Dst, Unmerge.getReg(0)); + eraseInstr(MI, MRI); + return; + } - if (DstTy == S16 && TruncSrcTy == S32) { - B.buildTrunc(Dst, TruncSrc); - cleanUpAfterCombine(MI, Trunc); - return; - } + if (DstTy == S64 && TruncSrcTy == S32) { + B.buildMergeLikeInstr(MI.getOperand(0).getReg(), + {TruncSrc, B.buildUndef({SgprRB, S32})}); + eraseInstr(MI, MRI); + return; + } - llvm_unreachable("missing anyext + trunc combine"); + if (DstTy == S32 && TruncSrcTy == S16) { + B.buildAnyExt(Dst, TruncSrc); + eraseInstr(MI, MRI); + return; } -}; + + if (DstTy == S16 && TruncSrcTy == S32) { + B.buildTrunc(Dst, TruncSrc); + eraseInstr(MI, MRI); + return; + } + + llvm_unreachable("missing anyext + trunc combine"); +} // Search through MRI for virtual registers with sgpr register bank and S1 LLT. [[maybe_unused]] static Register getAnySgprS1(const MachineRegisterInfo &MRI) { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.ll index 51b473f2d8994..5f72d3e2ab161 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.ll @@ -20,8 +20,6 @@ define amdgpu_ps float @readanylane_to_physical_vgpr(ptr addrspace(1) inreg %ptr ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: global_load_dword v0, v0, s[0:1] glc dlc ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: v_readfirstlane_b32 s0, v0 -; CHECK-NEXT: v_mov_b32_e32 v0, s0 ; CHECK-NEXT: ; return to shader part epilog %load = load volatile float, ptr addrspace(1) %ptr ret float %load @@ -33,8 +31,6 @@ define amdgpu_ps void @readanylane_to_bitcast_to_virtual_vgpr(ptr addrspace(1) i ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: global_load_dword v1, v0, s[0:1] glc dlc ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: v_readfirstlane_b32 s0, v1 -; CHECK-NEXT: v_mov_b32_e32 v1, s0 ; CHECK-NEXT: global_store_dword v0, v1, s[2:3] ; CHECK-NEXT: s_endpgm %load = load volatile <2 x i16>, ptr addrspace(1) %ptr0 @@ -49,8 +45,6 @@ define amdgpu_ps float @readanylane_to_bitcast_to_physical_vgpr(ptr addrspace(1) ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: global_load_dword v0, v0, s[0:1] glc dlc ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: v_readfirstlane_b32 s0, v0 -; CHECK-NEXT: v_mov_b32_e32 v0, s0 ; CHECK-NEXT: ; return to shader part epilog %load = load volatile <2 x i16>, ptr addrspace(1) %ptr0 %bitcast = bitcast <2 x i16> %load to float @@ -63,10 +57,6 @@ define amdgpu_ps void @unmerge_readanylane_merge_to_virtual_vgpr(ptr addrspace(1 ; CHECK-NEXT: v_mov_b32_e32 v2, 0 ; CHECK-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] glc dlc ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: v_readfirstlane_b32 s0, v0 -; CHECK-NEXT: v_readfirstlane_b32 s1, v1 -; CHECK-NEXT: v_mov_b32_e32 v0, s0 -; CHECK-NEXT: v_mov_b32_e32 v1, s1 ; CHECK-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] ; CHECK-NEXT: s_endpgm %load = load volatile i64, ptr addrspace(1) %ptr0 @@ -85,10 +75,6 @@ define amdgpu_ps void @unmerge_readanylane_merge_bitcast_to_virtual_vgpr(ptr add ; CHECK-NEXT: v_mov_b32_e32 v2, 0 ; CHECK-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] glc dlc ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: v_readfirstlane_b32 s0, v0 -; CHECK-NEXT: v_readfirstlane_b32 s1, v1 -; CHECK-NEXT: v_mov_b32_e32 v0, s0 -; CHECK-NEXT: v_mov_b32_e32 v1, s1 ; CHECK-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] ; CHECK-NEXT: s_endpgm %load = load volatile <2 x i32>, ptr addrspace(1) %ptr0 @@ -109,9 +95,7 @@ define amdgpu_ps void @unmerge_readanylane_merge_extract_to_virtual_vgpr(ptr add ; CHECK-NEXT: v_mov_b32_e32 v2, 0 ; CHECK-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] glc dlc ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: v_readfirstlane_b32 s0, v1 -; CHECK-NEXT: v_mov_b32_e32 v0, s0 -; CHECK-NEXT: global_store_dword v2, v0, s[2:3] +; CHECK-NEXT: global_store_dword v2, v1, s[2:3] ; CHECK-NEXT: s_endpgm %load = load volatile <2 x i32>, ptr addrspace(1) %ptr0 %extracted = extractelement <2 x i32> %load, i32 1 @@ -125,8 +109,7 @@ define amdgpu_ps float @unmerge_readanylane_merge_extract_to_physical_vgpr(ptr a ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc dlc ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: v_readfirstlane_b32 s0, v1 -; CHECK-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-NEXT: v_mov_b32_e32 v0, v1 ; CHECK-NEXT: ; return to shader part epilog %load = load volatile <2 x float>, ptr addrspace(1) %ptr0 %extracted = extractelement <2 x float> %load, i32 1 @@ -139,8 +122,6 @@ define amdgpu_ps void @unmerge_readanylane_merge_extract_bitcast_to_virtual_vgpr ; CHECK-NEXT: v_mov_b32_e32 v2, 0 ; CHECK-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1] glc dlc ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: v_readfirstlane_b32 s0, v0 -; CHECK-NEXT: v_mov_b32_e32 v0, s0 ; CHECK-NEXT: global_store_dword v2, v0, s[2:3] ; CHECK-NEXT: s_endpgm %load = load volatile <4 x i16>, ptr addrspace(1) %ptr0 @@ -156,8 +137,6 @@ define amdgpu_ps float @unmerge_readanylane_merge_extract_bitcast_to_physical_vg ; CHECK-NEXT: v_mov_b32_e32 v0, 0 ; CHECK-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1] glc dlc ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: v_readfirstlane_b32 s0, v0 -; CHECK-NEXT: v_mov_b32_e32 v0, s0 ; CHECK-NEXT: ; return to shader part epilog %load = load volatile <4 x i16>, ptr addrspace(1) %ptr0 %extracted = shufflevector <4 x i16> %load, <4 x i16> %load, <2 x i32> <i32 0, i32 1> diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.mir index 673cf1696e5e0..dd7a3ebeab471 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.mir @@ -46,8 +46,7 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[MV]](p1) :: (volatile "amdgpu-noclobber" load (s32), addrspace 1) - ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[LOAD]] - ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_READANYLANE]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -74,11 +73,9 @@ body: | ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK-NEXT: [[MV1:%[0-9]+]]:sgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s16>) = G_LOAD [[MV]](p1) :: (volatile "amdgpu-noclobber" load (<2 x s16>), addrspace 1) - ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(<2 x s16>) = G_AMDGPU_READANYLANE [[LOAD]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[AMDGPU_READANYLANE]](<2 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[BITCAST]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(p1) = COPY [[MV1]](p1) - ; CHECK-NEXT: G_STORE [[COPY4]](s32), [[COPY5]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(s32) = G_BITCAST [[LOAD]](<2 x s16>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(p1) = COPY [[MV1]](p1) + ; CHECK-NEXT: G_STORE [[BITCAST]](s32), [[COPY4]](p1) :: (store (s32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -106,8 +103,7 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s16>) = G_LOAD [[MV]](p1) :: (volatile "amdgpu-noclobber" load (<2 x s16>), addrspace 1) - ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(<2 x s16>) = G_AMDGPU_READANYLANE [[LOAD]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[AMDGPU_READANYLANE]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(s32) = G_BITCAST [[LOAD]](<2 x s16>) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %0:sgpr(s32) = COPY $sgpr0 @@ -136,13 +132,8 @@ body: | ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK-NEXT: [[MV1:%[0-9]+]]:sgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s64) = G_LOAD [[MV]](p1) :: (volatile "amdgpu-noclobber" load (s64), addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV]] - ; CHECK-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV1]] - ; CHECK-NEXT: [[MV2:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[AMDGPU_READANYLANE]](s32), [[AMDGPU_READANYLANE1]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[MV2]](s64) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(p1) = COPY [[MV1]](p1) - ; CHECK-NEXT: G_STORE [[COPY4]](s64), [[COPY5]](p1) :: (store (s64), addrspace 1) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(p1) = COPY [[MV1]](p1) + ; CHECK-NEXT: G_STORE [[LOAD]](s64), [[COPY4]](p1) :: (store (s64), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -169,11 +160,7 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s64) = G_LOAD [[MV]](p1) :: (volatile "amdgpu-noclobber" load (s64), addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV]] - ; CHECK-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV1]] - ; CHECK-NEXT: [[MV1:%[0-9]+]]:sgpr(s64) = G_MERGE_VALUES [[AMDGPU_READANYLANE]](s32), [[AMDGPU_READANYLANE1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV1]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[LOAD]](s64) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -200,14 +187,9 @@ body: | ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 ; CHECK-NEXT: [[MV1:%[0-9]+]]:sgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_LOAD [[MV]](p1) :: (volatile "amdgpu-noclobber" load (<2 x s32>), addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV]] - ; CHECK-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV1]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<2 x s32>) = G_BUILD_VECTOR [[AMDGPU_READANYLANE]](s32), [[AMDGPU_READANYLANE1]](s32) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s64) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s64) = COPY [[BITCAST]](s64) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(p1) = COPY [[MV1]](p1) - ; CHECK-NEXT: G_STORE [[COPY4]](s64), [[COPY5]](p1) :: (store (s64), addrspace 1) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(s64) = G_BITCAST [[LOAD]](<2 x s32>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(p1) = COPY [[MV1]](p1) + ; CHECK-NEXT: G_STORE [[BITCAST]](s64), [[COPY4]](p1) :: (store (s64), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -235,11 +217,7 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_LOAD [[MV]](p1) :: (volatile "amdgpu-noclobber" load (<2 x s32>), addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV]] - ; CHECK-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV1]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<2 x s32>) = G_BUILD_VECTOR [[AMDGPU_READANYLANE]](s32), [[AMDGPU_READANYLANE1]](s32) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s64) = G_BITCAST [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(s64) = G_BITCAST [[LOAD]](<2 x s32>) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](s64) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 %0:sgpr(s32) = COPY $sgpr0 @@ -269,13 +247,8 @@ body: | ; CHECK-NEXT: [[MV1:%[0-9]+]]:sgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_LOAD [[MV]](p1) :: (volatile "amdgpu-noclobber" load (<2 x s32>), addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV]] - ; CHECK-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV1]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<2 x s32>) = G_BUILD_VECTOR [[AMDGPU_READANYLANE]](s32), [[AMDGPU_READANYLANE1]](s32) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(p1) = COPY [[MV1]](p1) - ; CHECK-NEXT: G_STORE [[COPY4]](s32), [[COPY5]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(p1) = COPY [[MV1]](p1) + ; CHECK-NEXT: G_STORE [[UV1]](s32), [[COPY4]](p1) :: (store (s32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -304,11 +277,7 @@ body: | ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_LOAD [[MV]](p1) :: (volatile "amdgpu-noclobber" load (<2 x s32>), addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV]] - ; CHECK-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(s32) = G_AMDGPU_READANYLANE [[UV1]] - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<2 x s32>) = G_BUILD_VECTOR [[AMDGPU_READANYLANE]](s32), [[AMDGPU_READANYLANE1]](s32) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[UV1]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -337,14 +306,9 @@ body: | ; CHECK-NEXT: [[MV1:%[0-9]+]]:sgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s16>) = G_LOAD [[MV]](p1) :: (volatile "amdgpu-noclobber" load (<4 x s16>), addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(<2 x s16>) = G_AMDGPU_READANYLANE [[UV]] - ; CHECK-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(<2 x s16>) = G_AMDGPU_READANYLANE [[UV1]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<4 x s16>) = G_CONCAT_VECTORS [[AMDGPU_READANYLANE]](<2 x s16>), [[AMDGPU_READANYLANE1]](<2 x s16>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(<2 x s16>), [[UV3:%[0-9]+]]:sgpr(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[BITCAST]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(p1) = COPY [[MV1]](p1) - ; CHECK-NEXT: G_STORE [[COPY4]](s32), [[COPY5]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(p1) = COPY [[MV1]](p1) + ; CHECK-NEXT: G_STORE [[BITCAST]](s32), [[COPY4]](p1) :: (store (s32), addrspace 1) ; CHECK-NEXT: S_ENDPGM 0 %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 @@ -374,11 +338,7 @@ body: | ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s16>) = G_LOAD [[MV]](p1) :: (volatile "amdgpu-noclobber" load (<4 x s16>), addrspace 1) ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s16>), [[UV1:%[0-9]+]]:vgpr(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; CHECK-NEXT: [[AMDGPU_READANYLANE:%[0-9]+]]:sgpr(<2 x s16>) = G_AMDGPU_READANYLANE [[UV]] - ; CHECK-NEXT: [[AMDGPU_READANYLANE1:%[0-9]+]]:sgpr(<2 x s16>) = G_AMDGPU_READANYLANE [[UV1]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<4 x s16>) = G_CONCAT_VECTORS [[AMDGPU_READANYLANE]](<2 x s16>), [[AMDGPU_READANYLANE1]](<2 x s16>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:sgpr(<2 x s16>), [[UV3:%[0-9]+]]:sgpr(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %0:sgpr(s32) = COPY $sgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and-s1.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and-s1.mir index d446f6b1c5071..71adf63da145f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and-s1.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-and-s1.mir @@ -14,12 +14,14 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: S_ENDPGM 0, implicit [[AND]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s1) = G_TRUNC %0 %3:_(s1) = G_TRUNC %1 %4:_(s1) = G_AND %2, %3 %5:_(s32) = G_ANYEXT %4 + S_ENDPGM 0, implicit %5 ... --- @@ -38,6 +40,7 @@ body: | ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: S_ENDPGM 0, implicit [[AND]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_CONSTANT i32 0 @@ -45,6 +48,7 @@ body: | %4:_(s1) = G_ICMP intpred(eq), %1, %2 %5:_(s1) = G_AND %3, %4 %6:_(s32) = G_ANYEXT %5 + S_ENDPGM 0, implicit %6 ... --- @@ -309,6 +313,7 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY]], [[COPY1]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[COPY2]], [[AND]] + ; CHECK-NEXT: S_ENDPGM 0, implicit [[AND1]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $sgpr0 @@ -318,4 +323,5 @@ body: | %6:_(s1) = G_AND %3, %4 %7:_(s1) = G_AND %5, %6 %8:_(s32) = G_ANYEXT %7 + S_ENDPGM 0, implicit %8 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir index 9260b06e53bea..d954ba050bd8f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-anyext.mir @@ -68,10 +68,12 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]] + ; CHECK-NEXT: S_ENDPGM 0, implicit [[ICMP]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s1) = G_ICMP intpred(eq), %0, %1 %3:_(s32) = G_ANYEXT %2 + S_ENDPGM 0, implicit %3 ... --- @@ -191,9 +193,11 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s1) = G_TRUNC %0 %2:_(s32) = G_ANYEXT %1 + S_ENDPGM 0, implicit %2 ... --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-trunc.mir index 0069692522650..3744bc9f0dc19 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-trunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-trunc.mir @@ -83,9 +83,11 @@ body: | ; CHECK: liveins: $sgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s1) = G_TRUNC %0 %2:_(s32) = G_ANYEXT %1 + S_ENDPGM 0, implicit %2 ... --- _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits