llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-powerpc @llvm/pr-subscribers-backend-aarch64 Author: Sander de Smalen (sdesmalen-arm) <details> <summary>Changes</summary> I had to previously revert #<!-- -->123632 due to failures on X86 and it took me a while before I had the time to get back to this. This PR tries to reland the original patch, with additional fixes. The PR is structured as follows: * The `git revert`ed patch (with tests updated) * A fix to only add the implicit-def when tracking subreg-liveness of the destination register. * A fix to only add the implicit-def when the destination register is not dead. * Updated tests after latest rebase. The PR depends on #<!-- -->131361, which was split off as a separate PR. --- Patch is 141.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/134408.diff 31 Files Affected: - (modified) llvm/lib/CodeGen/RegisterCoalescer.cpp (+70-16) - (modified) llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll (+2-2) - (modified) llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll (+5-5) - (added) llvm/test/CodeGen/AArch64/reduced-coalescer-issue.ll (+51) - (added) llvm/test/CodeGen/AArch64/register-coalesce-implicit-def-subreg-to-reg.mir (+30) - (modified) llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir (+55-3) - (modified) llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll (+3-4) - (modified) llvm/test/CodeGen/AMDGPU/fptosi.f16.ll (+5-5) - (modified) llvm/test/CodeGen/AMDGPU/fptoui.f16.ll (+5-5) - (modified) llvm/test/CodeGen/AMDGPU/llvm.maximum.f16.ll (+11-12) - (modified) llvm/test/CodeGen/AMDGPU/llvm.minimum.f16.ll (+11-12) - (modified) llvm/test/CodeGen/AMDGPU/load-constant-i16.ll (+6-10) - (modified) llvm/test/CodeGen/AMDGPU/select.f16.ll (+53-55) - (modified) llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll (+6-6) - (modified) llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll (+4) - (modified) llvm/test/CodeGen/PowerPC/build-vector-tests.ll (+48) - (modified) llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll (+6) - (modified) llvm/test/CodeGen/PowerPC/combine-fneg.ll (+1) - (modified) llvm/test/CodeGen/PowerPC/fp-strict-round.ll (+6) - (modified) llvm/test/CodeGen/PowerPC/frem.ll (+3) - (modified) llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll (+1) - (modified) llvm/test/CodeGen/PowerPC/ldexp.ll (+2) - (modified) llvm/test/CodeGen/PowerPC/llvm.modf.ll (+1) - (modified) llvm/test/CodeGen/PowerPC/vec_insert_elt.ll (+4) - (modified) llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll (+176) - (added) llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll (+185) - (added) llvm/test/CodeGen/X86/coalescer-subreg-to-reg-implicit-def-regression.mir (+62) - (added) llvm/test/CodeGen/X86/coalescing-subreg-to-reg-requires-subrange-update.mir (+47) - (added) llvm/test/CodeGen/X86/pr76416.ll (+79) - (modified) llvm/test/CodeGen/X86/subreg-fail.mir (+2-2) - (added) llvm/test/CodeGen/X86/subreg-to-reg-coalescing.mir (+372) ``````````diff diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index dbd354f2ca2c4..963f5620d8dba 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -306,7 +306,11 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate { /// number if it is not zero. If DstReg is a physical register and the /// existing subregister number of the def / use being updated is not zero, /// make sure to set it to the correct physical subregister. - void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx); + /// + /// If \p IsSubregToReg, we are coalescing a DstReg = SUBREG_TO_REG + /// SrcReg. This introduces an implicit-def of DstReg on coalesced users. + void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx, + bool IsSubregToReg); /// If the given machine operand reads only undefined lanes add an undef /// flag. @@ -1444,6 +1448,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, // CopyMI may have implicit operands, save them so that we can transfer them // over to the newly materialized instruction after CopyMI is removed. + LaneBitmask NewMIImplicitOpsMask; SmallVector<MachineOperand, 4> ImplicitOps; ImplicitOps.reserve(CopyMI->getNumOperands() - CopyMI->getDesc().getNumOperands()); @@ -1458,6 +1463,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, (MO.getSubReg() == 0 && MO.getReg() == DstOperand.getReg())) && "unexpected implicit virtual register def"); ImplicitOps.push_back(MO); + if (MO.isDef() && MO.getReg().isVirtual() && + MRI->shouldTrackSubRegLiveness(DstReg)) + NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg()); } } @@ -1500,14 +1508,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, } else { assert(MO.getReg() == NewMI.getOperand(0).getReg()); - // We're only expecting another def of the main output, so the range - // should get updated with the regular output range. - // - // FIXME: The range updating below probably needs updating to look at - // the super register if subranges are tracked. - assert(!MRI->shouldTrackSubRegLiveness(DstReg) && - "subrange update for implicit-def of super register may not be " - "properly handled"); + // If lanemasks need to be tracked, compile the lanemask of the NewMI + // implicit def operands to avoid subranges for the super-regs from + // being removed by code later on in this function. + if (MRI->shouldTrackSubRegLiveness(MO.getReg())) + NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg()); } } } @@ -1531,7 +1536,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, MRI->setRegClass(DstReg, NewRC); // Update machine operands and add flags. - updateRegDefsUses(DstReg, DstReg, DstIdx); + updateRegDefsUses(DstReg, DstReg, DstIdx, false); NewMI.getOperand(0).setSubReg(NewIdx); // updateRegDefUses can add an "undef" flag to the definition, since // it will replace DstReg with DstReg.DstIdx. If NewIdx is 0, make @@ -1607,7 +1612,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber()); VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator(); for (LiveInterval::SubRange &SR : DstInt.subranges()) { - if ((SR.LaneMask & DstMask).none()) { + if ((SR.LaneMask & DstMask).none() && + (SR.LaneMask & NewMIImplicitOpsMask).none()) { LLVM_DEBUG(dbgs() << "Removing undefined SubRange " << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n"); @@ -1872,7 +1878,7 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx, } void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, - unsigned SubIdx) { + unsigned SubIdx, bool IsSubregToReg) { bool DstIsPhys = DstReg.isPhysical(); LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg); @@ -1892,6 +1898,14 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, } } + // If DstInt already has a subrange for the unused lanes, then we shouldn't + // create duplicate subranges when we update the interval for unused lanes. + LaneBitmask DefinedLanes; + if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) { + for (LiveInterval::SubRange &SR : DstInt->subranges()) + DefinedLanes |= SR.LaneMask; + } + SmallPtrSet<MachineInstr *, 8> Visited; for (MachineRegisterInfo::reg_instr_iterator I = MRI->reg_instr_begin(SrcReg), E = MRI->reg_instr_end(); @@ -1915,6 +1929,9 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr()) Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI)); + bool FullDef = true; + bool DeadDef = false; + // Replace SrcReg with DstReg in all UseMI operands. for (unsigned Op : Ops) { MachineOperand &MO = UseMI->getOperand(Op); @@ -1922,8 +1939,11 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, // Adjust <undef> flags in case of sub-register joins. We don't want to // turn a full def into a read-modify-write sub-register def and vice // versa. - if (SubIdx && MO.isDef()) + if (SubIdx && MO.isDef()) { MO.setIsUndef(!Reads); + FullDef = false; + DeadDef = MO.isDead(); + } // A subreg use of a partially undef (super) register may be a complete // undef use now and then has to be marked that way. @@ -1956,6 +1976,35 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg, MO.substVirtReg(DstReg, SubIdx, *TRI); } + if (IsSubregToReg && !FullDef && !DeadDef) { + // If the coalesed instruction doesn't fully define the register, we need + // to preserve the original super register liveness for SUBREG_TO_REG. + // + // We pretended SUBREG_TO_REG was a regular copy for coalescing purposes, + // but it introduces liveness for other subregisters. Downstream users may + // have been relying on those bits, so we need to ensure their liveness is + // captured with a def of other lanes. + // + // The implicit-def only needs adding if we track subregister liveness + // for this register, otherwise there is no point. + + if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) { + assert(DstInt->hasSubRanges() && + "SUBREG_TO_REG should have resulted in subrange"); + LaneBitmask DstMask = MRI->getMaxLaneMaskForVReg(DstInt->reg()); + LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx); + LaneBitmask UnusedLanes = DstMask & ~UsedLanes & ~DefinedLanes; + if ((UnusedLanes).any()) { + BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); + DstInt->createSubRangeFrom(Allocator, UnusedLanes, *DstInt); + DefinedLanes |= UnusedLanes; + } + + MachineInstrBuilder MIB(*MF, UseMI); + MIB.addReg(DstReg, RegState::ImplicitDefine); + } + } + LLVM_DEBUG({ dbgs() << "\t\tupdated: "; if (!UseMI->isDebugInstr()) @@ -2157,6 +2206,8 @@ bool RegisterCoalescer::joinCopy( }); } + const bool IsSubregToReg = CopyMI->isSubregToReg(); + ShrinkMask = LaneBitmask::getNone(); ShrinkMainRange = false; @@ -2226,9 +2277,12 @@ bool RegisterCoalescer::joinCopy( // Rewrite all SrcReg operands to DstReg. // Also update DstReg operands to include DstIdx if it is set. - if (CP.getDstIdx()) - updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx()); - updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx()); + if (CP.getDstIdx()) { + assert(!IsSubregToReg && "can this happen?"); + updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx(), false); + } + updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx(), + IsSubregToReg); // Shrink subregister ranges if necessary. if (ShrinkMask.any()) { diff --git a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll index 0f208f8ed9052..374def5d3cdb6 100644 --- a/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll +++ b/llvm/test/CodeGen/AArch64/implicit-def-subreg-to-reg-regression.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc -aarch64-min-jump-table-entries=4 -mtriple=arm64-apple-ios < %s | FileCheck %s +; RUN: llc -aarch64-min-jump-table-entries=4 -mtriple=arm64-apple-ios -enable-subreg-liveness=false < %s | sed -e "/; kill: /d" | FileCheck %s +; RUN: llc -aarch64-min-jump-table-entries=4 -mtriple=arm64-apple-ios -enable-subreg-liveness=true < %s | FileCheck %s ; Check there's no assert in spilling from implicit-def operands on an ; IMPLICIT_DEF. @@ -92,7 +93,6 @@ define void @widget(i32 %arg, i32 %arg1, ptr %arg2, ptr %arg3, ptr %arg4, i32 %a ; CHECK-NEXT: ldr x8, [sp, #40] ; 8-byte Folded Reload ; CHECK-NEXT: mov x0, xzr ; CHECK-NEXT: mov x1, xzr -; CHECK-NEXT: ; kill: def $w8 killed $w8 killed $x8 def $x8 ; CHECK-NEXT: str x8, [sp] ; CHECK-NEXT: bl _fprintf ; CHECK-NEXT: brk #0x1 diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll index 2a77d4dd33fe5..4206c0bc26991 100644 --- a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll +++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll @@ -27,11 +27,12 @@ define i32 @caller() nounwind ssp { ; CHECK-NEXT: sub sp, sp, #208 ; CHECK-NEXT: mov w8, #10 ; =0xa ; CHECK-NEXT: mov w9, #9 ; =0x9 -; CHECK-NEXT: mov w10, #8 ; =0x8 +; CHECK-NEXT: mov w0, #1 ; =0x1 ; CHECK-NEXT: stp x9, x8, [sp, #24] -; CHECK-NEXT: mov w8, #7 ; =0x7 +; CHECK-NEXT: mov w8, #8 ; =0x8 ; CHECK-NEXT: mov w9, #6 ; =0x6 -; CHECK-NEXT: mov w0, #1 ; =0x1 +; CHECK-NEXT: str x8, [sp, #16] +; CHECK-NEXT: mov w8, #7 ; =0x7 ; CHECK-NEXT: mov w1, #2 ; =0x2 ; CHECK-NEXT: mov w2, #3 ; =0x3 ; CHECK-NEXT: mov w3, #4 ; =0x4 @@ -46,8 +47,7 @@ define i32 @caller() nounwind ssp { ; CHECK-NEXT: stp x22, x21, [sp, #160] ; 16-byte Folded Spill ; CHECK-NEXT: stp x20, x19, [sp, #176] ; 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #192] ; 16-byte Folded Spill -; CHECK-NEXT: stp x8, x10, [sp, #8] -; CHECK-NEXT: str x9, [sp] +; CHECK-NEXT: stp x9, x8, [sp] ; CHECK-NEXT: bl _callee ; CHECK-NEXT: ldp x29, x30, [sp, #192] ; 16-byte Folded Reload ; CHECK-NEXT: ldp x20, x19, [sp, #176] ; 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/reduced-coalescer-issue.ll b/llvm/test/CodeGen/AArch64/reduced-coalescer-issue.ll new file mode 100644 index 0000000000000..942b408b5f39c --- /dev/null +++ b/llvm/test/CodeGen/AArch64/reduced-coalescer-issue.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -enable-subreg-liveness=false < %s | FileCheck %s +; RUN: llc -enable-subreg-liveness=true < %s | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" +target triple = "aarch64-unknown-linux-gnu" + +define void @_ZN4llvm5APInt6divideEPKmjS2_jPmS3_(i32 %lhsWords, i32 %rhsWords) { +; CHECK-LABEL: _ZN4llvm5APInt6divideEPKmjS2_jPmS3_: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl w9, w0, #1 +; CHECK-NEXT: mov w10, #1 // =0x1 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov w0, #1 // =0x1 +; CHECK-NEXT: sub w9, w9, w1, lsl #1 +; CHECK-NEXT: bfi w0, w8, #1, #31 +; CHECK-NEXT: lsr w9, w9, #1 +; CHECK-NEXT: bfi w10, w9, #2, #30 +; CHECK-NEXT: cmp w10, #0 +; CHECK-NEXT: b.hs .LBB0_2 +; CHECK-NEXT: // %bb.1: // %if.then15 +; CHECK-NEXT: lsl x8, x0, #2 +; CHECK-NEXT: ldr xzr, [x8] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: b _Znam + %mul = shl i32 %rhsWords, 1 + %mul1 = shl i32 %lhsWords, 1 + %sub = sub i32 %mul1, %mul + %add7 = or i32 %mul1, 1 + %idxprom = zext i32 %add7 to i64 + %mul3 = shl i32 %sub, 1 + %add4 = or i32 %mul3, 1 + %1 = icmp ult i32 %add4, 0 + br i1 %1, label %if.then15, label %3 + +common.ret: ; preds = %3, %if.then15 + ret void + +if.then15: ; preds = %0 + %idxprom12 = zext i32 %add7 to i64 + %arrayidx13 = getelementptr [128 x i32], ptr null, i64 0, i64 %idxprom12 + %2 = load volatile ptr, ptr %arrayidx13, align 8 + br label %common.ret + +3: ; preds = %0 + %call = tail call ptr @_Znam(i64 %idxprom) + br label %common.ret +} + +declare ptr @_Znam(i64) diff --git a/llvm/test/CodeGen/AArch64/register-coalesce-implicit-def-subreg-to-reg.mir b/llvm/test/CodeGen/AArch64/register-coalesce-implicit-def-subreg-to-reg.mir new file mode 100644 index 0000000000000..678d76527fa81 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/register-coalesce-implicit-def-subreg-to-reg.mir @@ -0,0 +1,30 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=aarch64 -start-before=register-coalescer -stop-after=virtregrewriter -enable-subreg-liveness=false -o - %s | FileCheck %s --check-prefix=SRLT +# RUN: llc -mtriple=aarch64 -start-before=register-coalescer -stop-after=virtregrewriter -enable-subreg-liveness=true -o - %s | FileCheck %s --check-prefix=NOSRLT +--- +name: test +tracksRegLiveness: true +body: | + bb.0: + liveins: $x1 + ; SRLT-LABEL: name: test + ; SRLT: liveins: $x1 + ; SRLT-NEXT: {{ $}} + ; SRLT-NEXT: renamable $x0 = COPY $x1 + ; SRLT-NEXT: renamable $w1 = ORRWrr $wzr, renamable $w0, implicit-def $x1 + ; SRLT-NEXT: RET_ReallyLR implicit $x1, implicit $x0 + ; + ; NOSRLT-LABEL: name: test + ; NOSRLT: liveins: $x1 + ; NOSRLT-NEXT: {{ $}} + ; NOSRLT-NEXT: renamable $x0 = COPY $x1 + ; NOSRLT-NEXT: renamable $w1 = ORRWrr $wzr, renamable $w0, implicit-def renamable $x1 + ; NOSRLT-NEXT: RET_ReallyLR implicit $x1, implicit $x0 + %190:gpr64 = COPY killed $x1 + %191:gpr32 = COPY %190.sub_32:gpr64 + %192:gpr32 = ORRWrr $wzr, killed %191:gpr32 + %193:gpr64all = SUBREG_TO_REG 0, killed %192:gpr32, %subreg.sub_32 + $x0 = COPY killed %190:gpr64 + $x1 = COPY killed %193:gpr64all + RET_ReallyLR implicit $x1, implicit $x0 +... diff --git a/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir b/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir index 08fc47d9480ce..abf739fb9095e 100644 --- a/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir +++ b/llvm/test/CodeGen/AArch64/register-coalesce-update-subranges-remat.mir @@ -7,8 +7,8 @@ # CHECK-DBG: ********** JOINING INTERVALS *********** # CHECK-DBG: ********** INTERVALS ********** # CHECK-DBG: %0 [16r,32r:0) 0@16r weight:0.000000e+00 -# CHECK-DBG: %3 [48r,112r:0) 0@48r L0000000000000040 [48r,112r:0) 0@48r weight:0.000000e+00 -# CHECK-DBG: %4 [80r,112e:1)[112e,112d:0) 0@112e 1@80r L0000000000000080 [112e,112d:0) 0@112e L0000000000000040 [80r,112e:1)[112e,112d:0) 0@112e 1@80r weight:0.000000e+00 +# CHECK-DBG: %3 [48r,112r:0) 0@48r L0000000000000080 [48r,112r:0) 0@48r L0000000000000040 [48r,112r:0) 0@48r weight:0.000000e+00 +# CHECK-DBG: %4 [80r,112e:1)[112e,112d:0) 0@112e 1@80r L0000000000000080 [80r,112e:1)[112e,112d:0) 0@112e 1@80r L0000000000000040 [80r,112e:1)[112e,112d:0) 0@112e 1@80r weight:0.000000e+00 # CHECK-DBG: %5 [32r,112r:1)[112r,112d:0) 0@112r 1@32r weight:0.000000e+00 --- name: test @@ -43,7 +43,7 @@ body: | # CHECK-DBG: %1 [32r,48B:2)[48B,320r:0)[320r,368B:1) 0@48B-phi 1@320r 2@32r # CHECK-DBG-SAME: weight:0.000000e+00 # CHECK-DBG: %3 [80r,160B:2)[240r,272B:1)[288r,304B:0)[304B,320r:3) 0@288r 1@240r 2@80r 3@304B-phi -# CHECK-DBG-SAME: L0000000000000080 [288r,304B:0)[304B,320r:3) 0@288r 1@x 2@x 3@304B-phi +# CHECK-DBG-SAME: L0000000000000080 [240r,272B:1)[288r,304B:0)[304B,320r:3) 0@288r 1@240r 2@x 3@304B-phi # CHECK-DBG-SAME: L0000000000000040 [80r,160B:2)[240r,272B:1)[288r,304B:0)[304B,320r:3) 0@288r 1@240r 2@80r 3@304B-phi # CHECK-DBG-SAME: weight:0.000000e+00 --- @@ -127,3 +127,55 @@ body: | B %bb.1 ... +# Test that the interval `L0000000000000080 [112r,112d:1)` is not removed, +# when removing undefined subranges. +# +# CHECK-DBG: ********** REGISTER COALESCER ********** +# CHECK-DBG: ********** Function: reproducer3 +# CHECK-DBG: ********** JOINING INTERVALS *********** +# CHECK-DBG: ********** INTERVALS ********** +# CHECK-DBG: W0 [0B,32r:0)[320r,336r:1) 0@0B-phi 1@320r +# CHECK-DBG: W1 [0B,16r:0) 0@0B-phi +# CHECK-DBG: %0 [16r,64r:0) 0@16r weight:0.000000e+00 +# CHECK-DBG: %1 [32r,128r:0) 0@32r weight:0.000000e+00 +# CHECK-DBG: %2 [48r,64r:0) 0@48r weight:0.000000e+00 +# CHECK-DBG: %3 [64r,80r:0) 0@64r weight:0.000000e+00 +# CHECK-DBG: %4 [80r,176r:0) 0@80r weight:0.000000e+00 +# CHECK-DBG: %7 [112r,128r:1)[128r,256r:0)[304B,320r:0) 0@128r 1@112r +# CHECK-DBG-SAME: L0000000000000080 [112r,112d:1)[128r,256r:0)[304B,320r:0) 0@128r 1@112r +# CHECK-DBG-SAME: L0000000000000040 [112r,128r:1)[128r,256r:0)[304B,320r:0) 0@128r 1@112r +# CHECK-DBG-SAME: weight:0.000000e+00 +# CHECK-DBG: %8 [96r,176r:1)[176r,192r:0) 0@176r 1@96r weight:0.000000e+00 +# CHECK-DBG: %9 [256r,272r:0) 0@256r weight:0.000000e+00 +--- +name: reproducer3 +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1 + + %0:gpr32 = COPY killed $w1 + %1:gpr32 = COPY killed $w0 + %3:gpr32 = UBFMWri %1, 31, 30 + %4:gpr32 = SUBWrs killed %3, killed %0, 1 + %5:gpr32 = UBFMWri killed %4, 1, 31 + %6:gpr32 = MOVi32imm 1 + %7:gpr32 = COPY %6 + %7:gpr32 = BFMWri %7, killed %1, 31, 30 + %8:gpr64 = SUBREG_TO_REG 0, killed %7, %subreg.sub_32 + %9:gpr32common = COPY killed %6 + %9:gpr32common = BFMWri %9, killed %5, 30, 29 + dead $wzr = SUBSWri killed %9, 0, 0, implicit-def $nzcv + Bcc 2, %bb.2, implicit killed $nzcv + B %bb.1 + + bb.1: + %10:gpr64common = UBFMXri killed %8, 62, 61 + dead $xzr = LDRXui killed %10, 0 + RET_ReallyLR + + bb.2: + $x0 = COPY killed %8 + RET_ReallyLR implicit killed $x0 + +... diff --git a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll index c739ba2183ef9..86ef27a1522f5 100644 --- a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll +++ b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll @@ -329,11 +329,10 @@ define <2 x half> @chain_hi_to_lo_global() { ; GFX11-TRUE16: ; %bb.0: ; %bb ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 2 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v3, 0 ; GFX11-TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, 0 -; GFX11-TRUE16-NEXT: global_load_d16_hi_b16 v0, v[1:2], off +; GFX11-TRUE16-NEXT: global_load_d16_hi_b16 v0, v[2:3], off ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll b/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll index f84e14ea62273..d5f983c2f5648 100644 --- a/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll @@ -328,13 +328,13 @@ define amdgpu_kernel void @fptosi_v2f16_to_v2i16( ; GFX11-TRUE16-NEXT: buffer_load_b32 v0, off, s[8:11], 0 ; GFX11-TRUE16-NEXT: s_mov_b32 s5, s1 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: v_l... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/134408 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits