https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/123936
This was allocating tiny helper classes for every instruction visited. We can just dispatch over the cases in the visitor function instead. >From 86973bb3b4aa7bfccccb0d4c3a3bf03e18dbf8fe Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Wed, 22 Jan 2025 19:15:29 +0700 Subject: [PATCH] PeepholeOpt: Stop allocating tiny helper classes This was allocating tiny helper classes for every instruction visited. We can just dispatch over the cases in the visitor function instead. --- llvm/lib/CodeGen/PeepholeOptimizer.cpp | 1359 ++++++++++++------------ 1 file changed, 675 insertions(+), 684 deletions(-) diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp index 5fc8f419e80a5d..aec4aaa81761c7 100644 --- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -149,288 +149,584 @@ namespace { class ValueTrackerResult; class RecurrenceInstr; -class PeepholeOptimizer : private MachineFunction::Delegate { - const TargetInstrInfo *TII = nullptr; - const TargetRegisterInfo *TRI = nullptr; - MachineRegisterInfo *MRI = nullptr; - MachineDominatorTree *DT = nullptr; // Machine dominator tree - MachineLoopInfo *MLI = nullptr; - +/// Interface to query instructions amenable to copy rewriting. +class Rewriter { +protected: + MachineInstr &CopyLike; + unsigned CurrentSrcIdx = 0; ///< The index of the source being rewritten. public: - PeepholeOptimizer(MachineDominatorTree *DT, MachineLoopInfo *MLI) - : DT(DT), MLI(MLI) {} - - bool run(MachineFunction &MF); - /// Track Def -> Use info used for rewriting copies. - using RewriteMapTy = SmallDenseMap<RegSubRegPair, ValueTrackerResult>; - - /// Sequence of instructions that formulate recurrence cycle. - using RecurrenceCycle = SmallVector<RecurrenceInstr, 4>; - -private: - bool optimizeCmpInstr(MachineInstr &MI); - bool optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB, - SmallPtrSetImpl<MachineInstr *> &LocalMIs); - bool optimizeSelect(MachineInstr &MI, - SmallPtrSetImpl<MachineInstr *> &LocalMIs); - bool optimizeCondBranch(MachineInstr &MI); - bool optimizeCoalescableCopy(MachineInstr &MI); - bool optimizeUncoalescableCopy(MachineInstr &MI, - SmallPtrSetImpl<MachineInstr *> &LocalMIs); - bool optimizeRecurrence(MachineInstr &PHI); - bool findNextSource(RegSubRegPair RegSubReg, RewriteMapTy &RewriteMap); - bool isMoveImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs, - DenseMap<Register, MachineInstr *> &ImmDefMIs); - bool foldImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs, - DenseMap<Register, MachineInstr *> &ImmDefMIs, - bool &Deleted); - - /// Finds recurrence cycles, but only ones that formulated around - /// a def operand and a use operand that are tied. If there is a use - /// operand commutable with the tied use operand, find recurrence cycle - /// along that operand as well. - bool findTargetRecurrence(Register Reg, - const SmallSet<Register, 2> &TargetReg, - RecurrenceCycle &RC); - - /// If copy instruction \p MI is a virtual register copy or a copy of a - /// constant physical register to a virtual register, track it in the - /// set CopySrcMIs. If this virtual register was previously seen as a - /// copy, replace the uses of this copy with the previously seen copy's - /// destination register. - bool foldRedundantCopy(MachineInstr &MI); - - /// Is the register \p Reg a non-allocatable physical register? - bool isNAPhysCopy(Register Reg); + Rewriter(MachineInstr &CopyLike) : CopyLike(CopyLike) {} + virtual ~Rewriter() = default; - /// If copy instruction \p MI is a non-allocatable virtual<->physical - /// register copy, track it in the \p NAPhysToVirtMIs map. If this - /// non-allocatable physical register was previously copied to a virtual - /// registered and hasn't been clobbered, the virt->phys copy can be - /// deleted. - bool - foldRedundantNAPhysCopy(MachineInstr &MI, - DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs); + /// Get the next rewritable source (SrcReg, SrcSubReg) and + /// the related value that it affects (DstReg, DstSubReg). + /// A source is considered rewritable if its register class and the + /// register class of the related DstReg may not be register + /// coalescer friendly. In other words, given a copy-like instruction + /// not all the arguments may be returned at rewritable source, since + /// some arguments are none to be register coalescer friendly. + /// + /// Each call of this method moves the current source to the next + /// rewritable source. + /// For instance, let CopyLike be the instruction to rewrite. + /// CopyLike has one definition and one source: + /// dst.dstSubIdx = CopyLike src.srcSubIdx. + /// + /// The first call will give the first rewritable source, i.e., + /// the only source this instruction has: + /// (SrcReg, SrcSubReg) = (src, srcSubIdx). + /// This source defines the whole definition, i.e., + /// (DstReg, DstSubReg) = (dst, dstSubIdx). + /// + /// The second and subsequent calls will return false, as there is only one + /// rewritable source. + /// + /// \return True if a rewritable source has been found, false otherwise. + /// The output arguments are valid if and only if true is returned. + virtual bool getNextRewritableSource(RegSubRegPair &Src, + RegSubRegPair &Dst) = 0; - bool isLoadFoldable(MachineInstr &MI, - SmallSet<Register, 16> &FoldAsLoadDefCandidates); + /// Rewrite the current source with \p NewReg and \p NewSubReg if possible. + /// \return True if the rewriting was possible, false otherwise. + virtual bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) = 0; +}; - /// Check whether \p MI is understood by the register coalescer - /// but may require some rewriting. - bool isCoalescableCopy(const MachineInstr &MI) { - // SubregToRegs are not interesting, because they are already register - // coalescer friendly. - return MI.isCopy() || - (!DisableAdvCopyOpt && (MI.isRegSequence() || MI.isInsertSubreg() || - MI.isExtractSubreg())); +/// Rewriter for COPY instructions. +class CopyRewriter : public Rewriter { +public: + CopyRewriter(MachineInstr &MI) : Rewriter(MI) { + assert(MI.isCopy() && "Expected copy instruction"); } + virtual ~CopyRewriter() = default; - /// Check whether \p MI is a copy like instruction that is - /// not recognized by the register coalescer. - bool isUncoalescableCopy(const MachineInstr &MI) { - return MI.isBitcast() || (!DisableAdvCopyOpt && (MI.isRegSequenceLike() || - MI.isInsertSubregLike() || - MI.isExtractSubregLike())); + bool getNextRewritableSource(RegSubRegPair &Src, + RegSubRegPair &Dst) override { + // CurrentSrcIdx > 0 means this function has already been called. + if (CurrentSrcIdx > 0) + return false; + // This is the first call to getNextRewritableSource. + // Move the CurrentSrcIdx to remember that we made that call. + CurrentSrcIdx = 1; + // The rewritable source is the argument. + const MachineOperand &MOSrc = CopyLike.getOperand(1); + Src = RegSubRegPair(MOSrc.getReg(), MOSrc.getSubReg()); + // What we track are the alternative sources of the definition. + const MachineOperand &MODef = CopyLike.getOperand(0); + Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg()); + return true; } - MachineInstr &rewriteSource(MachineInstr &CopyLike, RegSubRegPair Def, - RewriteMapTy &RewriteMap); - - // Set of copies to virtual registers keyed by source register. Never - // holds any physreg which requires def tracking. - DenseMap<RegSubRegPair, MachineInstr *> CopySrcMIs; + bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override { + if (CurrentSrcIdx != 1) + return false; + MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx); + MOSrc.setReg(NewReg); + MOSrc.setSubReg(NewSubReg); + return true; + } +}; - // MachineFunction::Delegate implementation. Used to maintain CopySrcMIs. - void MF_HandleInsertion(MachineInstr &MI) override { return; } +/// Helper class to rewrite uncoalescable copy like instructions +/// into new COPY (coalescable friendly) instructions. +class UncoalescableRewriter : public Rewriter { + unsigned NumDefs; ///< Number of defs in the bitcast. - bool getCopySrc(MachineInstr &MI, RegSubRegPair &SrcPair) { - if (!MI.isCopy()) - return false; +public: + UncoalescableRewriter(MachineInstr &MI) : Rewriter(MI) { + NumDefs = MI.getDesc().getNumDefs(); + } - Register SrcReg = MI.getOperand(1).getReg(); - unsigned SrcSubReg = MI.getOperand(1).getSubReg(); - if (!SrcReg.isVirtual() && !MRI->isConstantPhysReg(SrcReg)) + /// \see See Rewriter::getNextRewritableSource() + /// All such sources need to be considered rewritable in order to + /// rewrite a uncoalescable copy-like instruction. This method return + /// each definition that must be checked if rewritable. + bool getNextRewritableSource(RegSubRegPair &Src, + RegSubRegPair &Dst) override { + // Find the next non-dead definition and continue from there. + if (CurrentSrcIdx == NumDefs) return false; - SrcPair = RegSubRegPair(SrcReg, SrcSubReg); - return true; - } + while (CopyLike.getOperand(CurrentSrcIdx).isDead()) { + ++CurrentSrcIdx; + if (CurrentSrcIdx == NumDefs) + return false; + } - // If a COPY instruction is to be deleted or changed, we should also remove - // it from CopySrcMIs. - void deleteChangedCopy(MachineInstr &MI) { - RegSubRegPair SrcPair; - if (!getCopySrc(MI, SrcPair)) - return; + // What we track are the alternative sources of the definition. + Src = RegSubRegPair(0, 0); + const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx); + Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg()); - auto It = CopySrcMIs.find(SrcPair); - if (It != CopySrcMIs.end() && It->second == &MI) - CopySrcMIs.erase(It); + CurrentSrcIdx++; + return true; } - void MF_HandleRemoval(MachineInstr &MI) override { deleteChangedCopy(MI); } - - void MF_HandleChangeDesc(MachineInstr &MI, const MCInstrDesc &TID) override { - deleteChangedCopy(MI); + bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override { + return false; } }; -class PeepholeOptimizerLegacy : public MachineFunctionPass { +/// Specialized rewriter for INSERT_SUBREG instruction. +class InsertSubregRewriter : public Rewriter { public: - static char ID; // Pass identification - - PeepholeOptimizerLegacy() : MachineFunctionPass(ID) { - initializePeepholeOptimizerLegacyPass(*PassRegistry::getPassRegistry()); + InsertSubregRewriter(MachineInstr &MI) : Rewriter(MI) { + assert(MI.isInsertSubreg() && "Invalid instruction"); } - bool runOnMachineFunction(MachineFunction &MF) override; + /// \see See Rewriter::getNextRewritableSource() + /// Here CopyLike has the following form: + /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx. + /// Src1 has the same register class has dst, hence, there is + /// nothing to rewrite. + /// Src2.src2SubIdx, may not be register coalescer friendly. + /// Therefore, the first call to this method returns: + /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx). + /// (DstReg, DstSubReg) = (dst, subIdx). + /// + /// Subsequence calls will return false. + bool getNextRewritableSource(RegSubRegPair &Src, + RegSubRegPair &Dst) override { + // If we already get the only source we can rewrite, return false. + if (CurrentSrcIdx == 2) + return false; + // We are looking at v2 = INSERT_SUBREG v0, v1, sub0. + CurrentSrcIdx = 2; + const MachineOperand &MOInsertedReg = CopyLike.getOperand(2); + Src = RegSubRegPair(MOInsertedReg.getReg(), MOInsertedReg.getSubReg()); + const MachineOperand &MODef = CopyLike.getOperand(0); - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); - AU.addRequired<MachineLoopInfoWrapperPass>(); - AU.addPreserved<MachineLoopInfoWrapperPass>(); - if (Aggressive) { - AU.addRequired<MachineDominatorTreeWrapperPass>(); - AU.addPreserved<MachineDominatorTreeWrapperPass>(); - } + // We want to track something that is compatible with the + // partial definition. + if (MODef.getSubReg()) + // Bail if we have to compose sub-register indices. + return false; + Dst = RegSubRegPair(MODef.getReg(), + (unsigned)CopyLike.getOperand(3).getImm()); + return true; } - MachineFunctionProperties getRequiredProperties() const override { - return MachineFunctionProperties().set( - MachineFunctionProperties::Property::IsSSA); - } -}; - -/// Helper class to hold instructions that are inside recurrence cycles. -/// The recurrence cycle is formulated around 1) a def operand and its -/// tied use operand, or 2) a def operand and a use operand that is commutable -/// with another use operand which is tied to the def operand. In the latter -/// case, index of the tied use operand and the commutable use operand are -/// maintained with CommutePair. -class RecurrenceInstr { -public: - using IndexPair = std::pair<unsigned, unsigned>; - - RecurrenceInstr(MachineInstr *MI) : MI(MI) {} - RecurrenceInstr(MachineInstr *MI, unsigned Idx1, unsigned Idx2) - : MI(MI), CommutePair(std::make_pair(Idx1, Idx2)) {} - - MachineInstr *getMI() const { return MI; } - std::optional<IndexPair> getCommutePair() const { return CommutePair; } - -private: - MachineInstr *MI; - std::optional<IndexPair> CommutePair; + bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override { + if (CurrentSrcIdx != 2) + return false; + // We are rewriting the inserted reg. + MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx); + MO.setReg(NewReg); + MO.setSubReg(NewSubReg); + return true; + } }; -/// Helper class to hold a reply for ValueTracker queries. -/// Contains the returned sources for a given search and the instructions -/// where the sources were tracked from. -class ValueTrackerResult { -private: - /// Track all sources found by one ValueTracker query. - SmallVector<RegSubRegPair, 2> RegSrcs; - - /// Instruction using the sources in 'RegSrcs'. - const MachineInstr *Inst = nullptr; +/// Specialized rewriter for EXTRACT_SUBREG instruction. +class ExtractSubregRewriter : public Rewriter { + const TargetInstrInfo &TII; public: - ValueTrackerResult() = default; - - ValueTrackerResult(Register Reg, unsigned SubReg) { addSource(Reg, SubReg); } + ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII) + : Rewriter(MI), TII(TII) { + assert(MI.isExtractSubreg() && "Invalid instruction"); + } - bool isValid() const { return getNumSources() > 0; } + /// \see Rewriter::getNextRewritableSource() + /// Here CopyLike has the following form: + /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx. + /// There is only one rewritable source: Src.subIdx, + /// which defines dst.dstSubIdx. + bool getNextRewritableSource(RegSubRegPair &Src, + RegSubRegPair &Dst) override { + // If we already get the only source we can rewrite, return false. + if (CurrentSrcIdx == 1) + return false; + // We are looking at v1 = EXTRACT_SUBREG v0, sub0. + CurrentSrcIdx = 1; + const MachineOperand &MOExtractedReg = CopyLike.getOperand(1); + // If we have to compose sub-register indices, bail out. + if (MOExtractedReg.getSubReg()) + return false; - void setInst(const MachineInstr *I) { Inst = I; } - const MachineInstr *getInst() const { return Inst; } + Src = + RegSubRegPair(MOExtractedReg.getReg(), CopyLike.getOperand(2).getImm()); - void clear() { - RegSrcs.clear(); - Inst = nullptr; + // We want to track something that is compatible with the definition. + const MachineOperand &MODef = CopyLike.getOperand(0); + Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg()); + return true; } - void addSource(Register SrcReg, unsigned SrcSubReg) { - RegSrcs.push_back(RegSubRegPair(SrcReg, SrcSubReg)); + bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override { + // The only source we can rewrite is the input register. + if (CurrentSrcIdx != 1) + return false; + + CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg); + + // If we find a source that does not require to extract something, + // rewrite the operation with a copy. + if (!NewSubReg) { + // Move the current index to an invalid position. + // We do not want another call to this method to be able + // to do any change. + CurrentSrcIdx = -1; + // Rewrite the operation as a COPY. + // Get rid of the sub-register index. + CopyLike.removeOperand(2); + // Morph the operation into a COPY. + CopyLike.setDesc(TII.get(TargetOpcode::COPY)); + return true; + } + CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg); + return true; } +}; - void setSource(int Idx, Register SrcReg, unsigned SrcSubReg) { - assert(Idx < getNumSources() && "Reg pair source out of index"); - RegSrcs[Idx] = RegSubRegPair(SrcReg, SrcSubReg); +/// Specialized rewriter for REG_SEQUENCE instruction. +class RegSequenceRewriter : public Rewriter { +public: + RegSequenceRewriter(MachineInstr &MI) : Rewriter(MI) { + assert(MI.isRegSequence() && "Invalid instruction"); } - int getNumSources() const { return RegSrcs.size(); } + /// \see Rewriter::getNextRewritableSource() + /// Here CopyLike has the following form: + /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2. + /// Each call will return a different source, walking all the available + /// source. + /// + /// The first call returns: + /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx). + /// (DstReg, DstSubReg) = (dst, subIdx1). + /// + /// The second call returns: + /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx). + /// (DstReg, DstSubReg) = (dst, subIdx2). + /// + /// And so on, until all the sources have been traversed, then + /// it returns false. + bool getNextRewritableSource(RegSubRegPair &Src, + RegSubRegPair &Dst) override { + // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc. - RegSubRegPair getSrc(int Idx) const { return RegSrcs[Idx]; } + // If this is the first call, move to the first argument. + if (CurrentSrcIdx == 0) { + CurrentSrcIdx = 1; + } else { + // Otherwise, move to the next argument and check that it is valid. + CurrentSrcIdx += 2; + if (CurrentSrcIdx >= CopyLike.getNumOperands()) + return false; + } + const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx); + Src.Reg = MOInsertedReg.getReg(); + // If we have to compose sub-register indices, bail out. + if ((Src.SubReg = MOInsertedReg.getSubReg())) + return false; - Register getSrcReg(int Idx) const { - assert(Idx < getNumSources() && "Reg source out of index"); - return RegSrcs[Idx].Reg; - } + // We want to track something that is compatible with the related + // partial definition. + Dst.SubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm(); - unsigned getSrcSubReg(int Idx) const { - assert(Idx < getNumSources() && "SubReg source out of index"); - return RegSrcs[Idx].SubReg; + const MachineOperand &MODef = CopyLike.getOperand(0); + Dst.Reg = MODef.getReg(); + // If we have to compose sub-registers, bail. + return MODef.getSubReg() == 0; } - bool operator==(const ValueTrackerResult &Other) const { - if (Other.getInst() != getInst()) - return false; - - if (Other.getNumSources() != getNumSources()) + bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override { + // We cannot rewrite out of bound operands. + // Moreover, rewritable sources are at odd positions. + if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands()) return false; - for (int i = 0, e = Other.getNumSources(); i != e; ++i) - if (Other.getSrcReg(i) != getSrcReg(i) || - Other.getSrcSubReg(i) != getSrcSubReg(i)) - return false; + MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx); + MO.setReg(NewReg); + MO.setSubReg(NewSubReg); return true; } }; -/// Helper class to track the possible sources of a value defined by -/// a (chain of) copy related instructions. -/// Given a definition (instruction and definition index), this class -/// follows the use-def chain to find successive suitable sources. -/// The given source can be used to rewrite the definition into -/// def = COPY src. -/// -/// For instance, let us consider the following snippet: -/// v0 = -/// v2 = INSERT_SUBREG v1, v0, sub0 -/// def = COPY v2.sub0 -/// -/// Using a ValueTracker for def = COPY v2.sub0 will give the following -/// suitable sources: -/// v2.sub0 and v0. -/// Then, def can be rewritten into def = COPY v0. -class ValueTracker { -private: - /// The current point into the use-def chain. - const MachineInstr *Def = nullptr; +class PeepholeOptimizer : private MachineFunction::Delegate { + const TargetInstrInfo *TII = nullptr; + const TargetRegisterInfo *TRI = nullptr; + MachineRegisterInfo *MRI = nullptr; + MachineDominatorTree *DT = nullptr; // Machine dominator tree + MachineLoopInfo *MLI = nullptr; - /// The index of the definition in Def. - unsigned DefIdx = 0; +public: + PeepholeOptimizer(MachineDominatorTree *DT, MachineLoopInfo *MLI) + : DT(DT), MLI(MLI) {} - /// The sub register index of the definition. - unsigned DefSubReg; + bool run(MachineFunction &MF); + /// Track Def -> Use info used for rewriting copies. + using RewriteMapTy = SmallDenseMap<RegSubRegPair, ValueTrackerResult>; - /// The register where the value can be found. - Register Reg; + /// Sequence of instructions that formulate recurrence cycle. + using RecurrenceCycle = SmallVector<RecurrenceInstr, 4>; - /// MachineRegisterInfo used to perform tracking. - const MachineRegisterInfo &MRI; +private: + bool optimizeCmpInstr(MachineInstr &MI); + bool optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB, + SmallPtrSetImpl<MachineInstr *> &LocalMIs); + bool optimizeSelect(MachineInstr &MI, + SmallPtrSetImpl<MachineInstr *> &LocalMIs); + bool optimizeCondBranch(MachineInstr &MI); - /// Optional TargetInstrInfo used to perform some complex tracking. - const TargetInstrInfo *TII; + bool optimizeCoalescableCopyImpl(Rewriter &&CpyRewriter); + bool optimizeCoalescableCopy(MachineInstr &MI); + bool optimizeUncoalescableCopy(MachineInstr &MI, + SmallPtrSetImpl<MachineInstr *> &LocalMIs); + bool optimizeRecurrence(MachineInstr &PHI); + bool findNextSource(RegSubRegPair RegSubReg, RewriteMapTy &RewriteMap); + bool isMoveImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs, + DenseMap<Register, MachineInstr *> &ImmDefMIs); + bool foldImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs, + DenseMap<Register, MachineInstr *> &ImmDefMIs, + bool &Deleted); - /// Dispatcher to the right underlying implementation of getNextSource. - ValueTrackerResult getNextSourceImpl(); + /// Finds recurrence cycles, but only ones that formulated around + /// a def operand and a use operand that are tied. If there is a use + /// operand commutable with the tied use operand, find recurrence cycle + /// along that operand as well. + bool findTargetRecurrence(Register Reg, + const SmallSet<Register, 2> &TargetReg, + RecurrenceCycle &RC); - /// Specialized version of getNextSource for Copy instructions. - ValueTrackerResult getNextSourceFromCopy(); + /// If copy instruction \p MI is a virtual register copy or a copy of a + /// constant physical register to a virtual register, track it in the + /// set CopySrcMIs. If this virtual register was previously seen as a + /// copy, replace the uses of this copy with the previously seen copy's + /// destination register. + bool foldRedundantCopy(MachineInstr &MI); - /// Specialized version of getNextSource for Bitcast instructions. + /// Is the register \p Reg a non-allocatable physical register? + bool isNAPhysCopy(Register Reg); + + /// If copy instruction \p MI is a non-allocatable virtual<->physical + /// register copy, track it in the \p NAPhysToVirtMIs map. If this + /// non-allocatable physical register was previously copied to a virtual + /// registered and hasn't been clobbered, the virt->phys copy can be + /// deleted. + bool + foldRedundantNAPhysCopy(MachineInstr &MI, + DenseMap<Register, MachineInstr *> &NAPhysToVirtMIs); + + bool isLoadFoldable(MachineInstr &MI, + SmallSet<Register, 16> &FoldAsLoadDefCandidates); + + /// Check whether \p MI is understood by the register coalescer + /// but may require some rewriting. + bool isCoalescableCopy(const MachineInstr &MI) { + // SubregToRegs are not interesting, because they are already register + // coalescer friendly. + return MI.isCopy() || + (!DisableAdvCopyOpt && (MI.isRegSequence() || MI.isInsertSubreg() || + MI.isExtractSubreg())); + } + + /// Check whether \p MI is a copy like instruction that is + /// not recognized by the register coalescer. + bool isUncoalescableCopy(const MachineInstr &MI) { + return MI.isBitcast() || (!DisableAdvCopyOpt && (MI.isRegSequenceLike() || + MI.isInsertSubregLike() || + MI.isExtractSubregLike())); + } + + MachineInstr &rewriteSource(MachineInstr &CopyLike, RegSubRegPair Def, + RewriteMapTy &RewriteMap); + + // Set of copies to virtual registers keyed by source register. Never + // holds any physreg which requires def tracking. + DenseMap<RegSubRegPair, MachineInstr *> CopySrcMIs; + + // MachineFunction::Delegate implementation. Used to maintain CopySrcMIs. + void MF_HandleInsertion(MachineInstr &MI) override { return; } + + bool getCopySrc(MachineInstr &MI, RegSubRegPair &SrcPair) { + if (!MI.isCopy()) + return false; + + Register SrcReg = MI.getOperand(1).getReg(); + unsigned SrcSubReg = MI.getOperand(1).getSubReg(); + if (!SrcReg.isVirtual() && !MRI->isConstantPhysReg(SrcReg)) + return false; + + SrcPair = RegSubRegPair(SrcReg, SrcSubReg); + return true; + } + + // If a COPY instruction is to be deleted or changed, we should also remove + // it from CopySrcMIs. + void deleteChangedCopy(MachineInstr &MI) { + RegSubRegPair SrcPair; + if (!getCopySrc(MI, SrcPair)) + return; + + auto It = CopySrcMIs.find(SrcPair); + if (It != CopySrcMIs.end() && It->second == &MI) + CopySrcMIs.erase(It); + } + + void MF_HandleRemoval(MachineInstr &MI) override { deleteChangedCopy(MI); } + + void MF_HandleChangeDesc(MachineInstr &MI, const MCInstrDesc &TID) override { + deleteChangedCopy(MI); + } +}; + +class PeepholeOptimizerLegacy : public MachineFunctionPass { +public: + static char ID; // Pass identification + + PeepholeOptimizerLegacy() : MachineFunctionPass(ID) { + initializePeepholeOptimizerLegacyPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired<MachineLoopInfoWrapperPass>(); + AU.addPreserved<MachineLoopInfoWrapperPass>(); + if (Aggressive) { + AU.addRequired<MachineDominatorTreeWrapperPass>(); + AU.addPreserved<MachineDominatorTreeWrapperPass>(); + } + } + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::IsSSA); + } +}; + +/// Helper class to hold instructions that are inside recurrence cycles. +/// The recurrence cycle is formulated around 1) a def operand and its +/// tied use operand, or 2) a def operand and a use operand that is commutable +/// with another use operand which is tied to the def operand. In the latter +/// case, index of the tied use operand and the commutable use operand are +/// maintained with CommutePair. +class RecurrenceInstr { +public: + using IndexPair = std::pair<unsigned, unsigned>; + + RecurrenceInstr(MachineInstr *MI) : MI(MI) {} + RecurrenceInstr(MachineInstr *MI, unsigned Idx1, unsigned Idx2) + : MI(MI), CommutePair(std::make_pair(Idx1, Idx2)) {} + + MachineInstr *getMI() const { return MI; } + std::optional<IndexPair> getCommutePair() const { return CommutePair; } + +private: + MachineInstr *MI; + std::optional<IndexPair> CommutePair; +}; + +/// Helper class to hold a reply for ValueTracker queries. +/// Contains the returned sources for a given search and the instructions +/// where the sources were tracked from. +class ValueTrackerResult { +private: + /// Track all sources found by one ValueTracker query. + SmallVector<RegSubRegPair, 2> RegSrcs; + + /// Instruction using the sources in 'RegSrcs'. + const MachineInstr *Inst = nullptr; + +public: + ValueTrackerResult() = default; + + ValueTrackerResult(Register Reg, unsigned SubReg) { addSource(Reg, SubReg); } + + bool isValid() const { return getNumSources() > 0; } + + void setInst(const MachineInstr *I) { Inst = I; } + const MachineInstr *getInst() const { return Inst; } + + void clear() { + RegSrcs.clear(); + Inst = nullptr; + } + + void addSource(Register SrcReg, unsigned SrcSubReg) { + RegSrcs.push_back(RegSubRegPair(SrcReg, SrcSubReg)); + } + + void setSource(int Idx, Register SrcReg, unsigned SrcSubReg) { + assert(Idx < getNumSources() && "Reg pair source out of index"); + RegSrcs[Idx] = RegSubRegPair(SrcReg, SrcSubReg); + } + + int getNumSources() const { return RegSrcs.size(); } + + RegSubRegPair getSrc(int Idx) const { return RegSrcs[Idx]; } + + Register getSrcReg(int Idx) const { + assert(Idx < getNumSources() && "Reg source out of index"); + return RegSrcs[Idx].Reg; + } + + unsigned getSrcSubReg(int Idx) const { + assert(Idx < getNumSources() && "SubReg source out of index"); + return RegSrcs[Idx].SubReg; + } + + bool operator==(const ValueTrackerResult &Other) const { + if (Other.getInst() != getInst()) + return false; + + if (Other.getNumSources() != getNumSources()) + return false; + + for (int i = 0, e = Other.getNumSources(); i != e; ++i) + if (Other.getSrcReg(i) != getSrcReg(i) || + Other.getSrcSubReg(i) != getSrcSubReg(i)) + return false; + return true; + } +}; + +/// Helper class to track the possible sources of a value defined by +/// a (chain of) copy related instructions. +/// Given a definition (instruction and definition index), this class +/// follows the use-def chain to find successive suitable sources. +/// The given source can be used to rewrite the definition into +/// def = COPY src. +/// +/// For instance, let us consider the following snippet: +/// v0 = +/// v2 = INSERT_SUBREG v1, v0, sub0 +/// def = COPY v2.sub0 +/// +/// Using a ValueTracker for def = COPY v2.sub0 will give the following +/// suitable sources: +/// v2.sub0 and v0. +/// Then, def can be rewritten into def = COPY v0. +class ValueTracker { +private: + /// The current point into the use-def chain. + const MachineInstr *Def = nullptr; + + /// The index of the definition in Def. + unsigned DefIdx = 0; + + /// The sub register index of the definition. + unsigned DefSubReg; + + /// The register where the value can be found. + Register Reg; + + /// MachineRegisterInfo used to perform tracking. + const MachineRegisterInfo &MRI; + + /// Optional TargetInstrInfo used to perform some complex tracking. + const TargetInstrInfo *TII; + + /// Dispatcher to the right underlying implementation of getNextSource. + ValueTrackerResult getNextSourceImpl(); + + /// Specialized version of getNextSource for Copy instructions. + ValueTrackerResult getNextSourceFromCopy(); + + /// Specialized version of getNextSource for Bitcast instructions. ValueTrackerResult getNextSourceFromBitcast(); /// Specialized version of getNextSource for RegSequence instructions. @@ -707,457 +1003,136 @@ bool PeepholeOptimizer::optimizeCondBranch(MachineInstr &MI) { /// Try to find the next source that share the same register file /// for the value defined by \p Reg and \p SubReg. /// When true is returned, the \p RewriteMap can be used by the client to -/// retrieve all Def -> Use along the way up to the next source. Any found -/// Use that is not itself a key for another entry, is the next source to -/// use. During the search for the next source, multiple sources can be found -/// given multiple incoming sources of a PHI instruction. In this case, we -/// look in each PHI source for the next source; all found next sources must -/// share the same register file as \p Reg and \p SubReg. The client should -/// then be capable to rewrite all intermediate PHIs to get the next source. -/// \return False if no alternative sources are available. True otherwise. -bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg, - RewriteMapTy &RewriteMap) { - // Do not try to find a new source for a physical register. - // So far we do not have any motivating example for doing that. - // Thus, instead of maintaining untested code, we will revisit that if - // that changes at some point. - Register Reg = RegSubReg.Reg; - if (Reg.isPhysical()) - return false; - const TargetRegisterClass *DefRC = MRI->getRegClass(Reg); - - SmallVector<RegSubRegPair, 4> SrcToLook; - RegSubRegPair CurSrcPair = RegSubReg; - SrcToLook.push_back(CurSrcPair); - - unsigned PHICount = 0; - do { - CurSrcPair = SrcToLook.pop_back_val(); - // As explained above, do not handle physical registers - if (CurSrcPair.Reg.isPhysical()) - return false; - - ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI, TII); - - // Follow the chain of copies until we find a more suitable source, a phi - // or have to abort. - while (true) { - ValueTrackerResult Res = ValTracker.getNextSource(); - // Abort at the end of a chain (without finding a suitable source). - if (!Res.isValid()) - return false; - - // Insert the Def -> Use entry for the recently found source. - ValueTrackerResult CurSrcRes = RewriteMap.lookup(CurSrcPair); - if (CurSrcRes.isValid()) { - assert(CurSrcRes == Res && "ValueTrackerResult found must match"); - // An existent entry with multiple sources is a PHI cycle we must avoid. - // Otherwise it's an entry with a valid next source we already found. - if (CurSrcRes.getNumSources() > 1) { - LLVM_DEBUG(dbgs() - << "findNextSource: found PHI cycle, aborting...\n"); - return false; - } - break; - } - RewriteMap.insert(std::make_pair(CurSrcPair, Res)); - - // ValueTrackerResult usually have one source unless it's the result from - // a PHI instruction. Add the found PHI edges to be looked up further. - unsigned NumSrcs = Res.getNumSources(); - if (NumSrcs > 1) { - PHICount++; - if (PHICount >= RewritePHILimit) { - LLVM_DEBUG(dbgs() << "findNextSource: PHI limit reached\n"); - return false; - } - - for (unsigned i = 0; i < NumSrcs; ++i) - SrcToLook.push_back(Res.getSrc(i)); - break; - } - - CurSrcPair = Res.getSrc(0); - // Do not extend the live-ranges of physical registers as they add - // constraints to the register allocator. Moreover, if we want to extend - // the live-range of a physical register, unlike SSA virtual register, - // we will have to check that they aren't redefine before the related use. - if (CurSrcPair.Reg.isPhysical()) - return false; - - // Keep following the chain if the value isn't any better yet. - const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg); - if (!TRI->shouldRewriteCopySrc(DefRC, RegSubReg.SubReg, SrcRC, - CurSrcPair.SubReg)) - continue; - - // We currently cannot deal with subreg operands on PHI instructions - // (see insertPHI()). - if (PHICount > 0 && CurSrcPair.SubReg != 0) - continue; - - // We found a suitable source, and are done with this chain. - break; - } - } while (!SrcToLook.empty()); - - // If we did not find a more suitable source, there is nothing to optimize. - return CurSrcPair.Reg != Reg; -} - -/// Insert a PHI instruction with incoming edges \p SrcRegs that are -/// guaranteed to have the same register class. This is necessary whenever we -/// successfully traverse a PHI instruction and find suitable sources coming -/// from its edges. By inserting a new PHI, we provide a rewritten PHI def -/// suitable to be used in a new COPY instruction. -static MachineInstr &insertPHI(MachineRegisterInfo &MRI, - const TargetInstrInfo &TII, - const SmallVectorImpl<RegSubRegPair> &SrcRegs, - MachineInstr &OrigPHI) { - assert(!SrcRegs.empty() && "No sources to create a PHI instruction?"); - - const TargetRegisterClass *NewRC = MRI.getRegClass(SrcRegs[0].Reg); - // NewRC is only correct if no subregisters are involved. findNextSource() - // should have rejected those cases already. - assert(SrcRegs[0].SubReg == 0 && "should not have subreg operand"); - Register NewVR = MRI.createVirtualRegister(NewRC); - MachineBasicBlock *MBB = OrigPHI.getParent(); - MachineInstrBuilder MIB = BuildMI(*MBB, &OrigPHI, OrigPHI.getDebugLoc(), - TII.get(TargetOpcode::PHI), NewVR); - - unsigned MBBOpIdx = 2; - for (const RegSubRegPair &RegPair : SrcRegs) { - MIB.addReg(RegPair.Reg, 0, RegPair.SubReg); - MIB.addMBB(OrigPHI.getOperand(MBBOpIdx).getMBB()); - // Since we're extended the lifetime of RegPair.Reg, clear the - // kill flags to account for that and make RegPair.Reg reaches - // the new PHI. - MRI.clearKillFlags(RegPair.Reg); - MBBOpIdx += 2; - } - - return *MIB; -} - -namespace { - -/// Interface to query instructions amenable to copy rewriting. -class Rewriter { -protected: - MachineInstr &CopyLike; - unsigned CurrentSrcIdx = 0; ///< The index of the source being rewritten. -public: - Rewriter(MachineInstr &CopyLike) : CopyLike(CopyLike) {} - virtual ~Rewriter() = default; - - /// Get the next rewritable source (SrcReg, SrcSubReg) and - /// the related value that it affects (DstReg, DstSubReg). - /// A source is considered rewritable if its register class and the - /// register class of the related DstReg may not be register - /// coalescer friendly. In other words, given a copy-like instruction - /// not all the arguments may be returned at rewritable source, since - /// some arguments are none to be register coalescer friendly. - /// - /// Each call of this method moves the current source to the next - /// rewritable source. - /// For instance, let CopyLike be the instruction to rewrite. - /// CopyLike has one definition and one source: - /// dst.dstSubIdx = CopyLike src.srcSubIdx. - /// - /// The first call will give the first rewritable source, i.e., - /// the only source this instruction has: - /// (SrcReg, SrcSubReg) = (src, srcSubIdx). - /// This source defines the whole definition, i.e., - /// (DstReg, DstSubReg) = (dst, dstSubIdx). - /// - /// The second and subsequent calls will return false, as there is only one - /// rewritable source. - /// - /// \return True if a rewritable source has been found, false otherwise. - /// The output arguments are valid if and only if true is returned. - virtual bool getNextRewritableSource(RegSubRegPair &Src, - RegSubRegPair &Dst) = 0; - - /// Rewrite the current source with \p NewReg and \p NewSubReg if possible. - /// \return True if the rewriting was possible, false otherwise. - virtual bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) = 0; -}; - -/// Rewriter for COPY instructions. -class CopyRewriter : public Rewriter { -public: - CopyRewriter(MachineInstr &MI) : Rewriter(MI) { - assert(MI.isCopy() && "Expected copy instruction"); - } - virtual ~CopyRewriter() = default; - - bool getNextRewritableSource(RegSubRegPair &Src, - RegSubRegPair &Dst) override { - // CurrentSrcIdx > 0 means this function has already been called. - if (CurrentSrcIdx > 0) - return false; - // This is the first call to getNextRewritableSource. - // Move the CurrentSrcIdx to remember that we made that call. - CurrentSrcIdx = 1; - // The rewritable source is the argument. - const MachineOperand &MOSrc = CopyLike.getOperand(1); - Src = RegSubRegPair(MOSrc.getReg(), MOSrc.getSubReg()); - // What we track are the alternative sources of the definition. - const MachineOperand &MODef = CopyLike.getOperand(0); - Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg()); - return true; - } - - bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override { - if (CurrentSrcIdx != 1) - return false; - MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx); - MOSrc.setReg(NewReg); - MOSrc.setSubReg(NewSubReg); - return true; - } -}; - -/// Helper class to rewrite uncoalescable copy like instructions -/// into new COPY (coalescable friendly) instructions. -class UncoalescableRewriter : public Rewriter { - unsigned NumDefs; ///< Number of defs in the bitcast. - -public: - UncoalescableRewriter(MachineInstr &MI) : Rewriter(MI) { - NumDefs = MI.getDesc().getNumDefs(); - } - - /// \see See Rewriter::getNextRewritableSource() - /// All such sources need to be considered rewritable in order to - /// rewrite a uncoalescable copy-like instruction. This method return - /// each definition that must be checked if rewritable. - bool getNextRewritableSource(RegSubRegPair &Src, - RegSubRegPair &Dst) override { - // Find the next non-dead definition and continue from there. - if (CurrentSrcIdx == NumDefs) - return false; - - while (CopyLike.getOperand(CurrentSrcIdx).isDead()) { - ++CurrentSrcIdx; - if (CurrentSrcIdx == NumDefs) - return false; - } - - // What we track are the alternative sources of the definition. - Src = RegSubRegPair(0, 0); - const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx); - Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg()); - - CurrentSrcIdx++; - return true; - } - - bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override { - return false; - } -}; - -/// Specialized rewriter for INSERT_SUBREG instruction. -class InsertSubregRewriter : public Rewriter { -public: - InsertSubregRewriter(MachineInstr &MI) : Rewriter(MI) { - assert(MI.isInsertSubreg() && "Invalid instruction"); - } - - /// \see See Rewriter::getNextRewritableSource() - /// Here CopyLike has the following form: - /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx. - /// Src1 has the same register class has dst, hence, there is - /// nothing to rewrite. - /// Src2.src2SubIdx, may not be register coalescer friendly. - /// Therefore, the first call to this method returns: - /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx). - /// (DstReg, DstSubReg) = (dst, subIdx). - /// - /// Subsequence calls will return false. - bool getNextRewritableSource(RegSubRegPair &Src, - RegSubRegPair &Dst) override { - // If we already get the only source we can rewrite, return false. - if (CurrentSrcIdx == 2) - return false; - // We are looking at v2 = INSERT_SUBREG v0, v1, sub0. - CurrentSrcIdx = 2; - const MachineOperand &MOInsertedReg = CopyLike.getOperand(2); - Src = RegSubRegPair(MOInsertedReg.getReg(), MOInsertedReg.getSubReg()); - const MachineOperand &MODef = CopyLike.getOperand(0); - - // We want to track something that is compatible with the - // partial definition. - if (MODef.getSubReg()) - // Bail if we have to compose sub-register indices. - return false; - Dst = RegSubRegPair(MODef.getReg(), - (unsigned)CopyLike.getOperand(3).getImm()); - return true; - } - - bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override { - if (CurrentSrcIdx != 2) - return false; - // We are rewriting the inserted reg. - MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx); - MO.setReg(NewReg); - MO.setSubReg(NewSubReg); - return true; - } -}; - -/// Specialized rewriter for EXTRACT_SUBREG instruction. -class ExtractSubregRewriter : public Rewriter { - const TargetInstrInfo &TII; - -public: - ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII) - : Rewriter(MI), TII(TII) { - assert(MI.isExtractSubreg() && "Invalid instruction"); - } - - /// \see Rewriter::getNextRewritableSource() - /// Here CopyLike has the following form: - /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx. - /// There is only one rewritable source: Src.subIdx, - /// which defines dst.dstSubIdx. - bool getNextRewritableSource(RegSubRegPair &Src, - RegSubRegPair &Dst) override { - // If we already get the only source we can rewrite, return false. - if (CurrentSrcIdx == 1) - return false; - // We are looking at v1 = EXTRACT_SUBREG v0, sub0. - CurrentSrcIdx = 1; - const MachineOperand &MOExtractedReg = CopyLike.getOperand(1); - // If we have to compose sub-register indices, bail out. - if (MOExtractedReg.getSubReg()) - return false; - - Src = - RegSubRegPair(MOExtractedReg.getReg(), CopyLike.getOperand(2).getImm()); +/// retrieve all Def -> Use along the way up to the next source. Any found +/// Use that is not itself a key for another entry, is the next source to +/// use. During the search for the next source, multiple sources can be found +/// given multiple incoming sources of a PHI instruction. In this case, we +/// look in each PHI source for the next source; all found next sources must +/// share the same register file as \p Reg and \p SubReg. The client should +/// then be capable to rewrite all intermediate PHIs to get the next source. +/// \return False if no alternative sources are available. True otherwise. +bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg, + RewriteMapTy &RewriteMap) { + // Do not try to find a new source for a physical register. + // So far we do not have any motivating example for doing that. + // Thus, instead of maintaining untested code, we will revisit that if + // that changes at some point. + Register Reg = RegSubReg.Reg; + if (Reg.isPhysical()) + return false; + const TargetRegisterClass *DefRC = MRI->getRegClass(Reg); - // We want to track something that is compatible with the definition. - const MachineOperand &MODef = CopyLike.getOperand(0); - Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg()); - return true; - } + SmallVector<RegSubRegPair, 4> SrcToLook; + RegSubRegPair CurSrcPair = RegSubReg; + SrcToLook.push_back(CurSrcPair); - bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override { - // The only source we can rewrite is the input register. - if (CurrentSrcIdx != 1) + unsigned PHICount = 0; + do { + CurSrcPair = SrcToLook.pop_back_val(); + // As explained above, do not handle physical registers + if (CurSrcPair.Reg.isPhysical()) return false; - CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg); + ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI, TII); - // If we find a source that does not require to extract something, - // rewrite the operation with a copy. - if (!NewSubReg) { - // Move the current index to an invalid position. - // We do not want another call to this method to be able - // to do any change. - CurrentSrcIdx = -1; - // Rewrite the operation as a COPY. - // Get rid of the sub-register index. - CopyLike.removeOperand(2); - // Morph the operation into a COPY. - CopyLike.setDesc(TII.get(TargetOpcode::COPY)); - return true; - } - CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg); - return true; - } -}; + // Follow the chain of copies until we find a more suitable source, a phi + // or have to abort. + while (true) { + ValueTrackerResult Res = ValTracker.getNextSource(); + // Abort at the end of a chain (without finding a suitable source). + if (!Res.isValid()) + return false; -/// Specialized rewriter for REG_SEQUENCE instruction. -class RegSequenceRewriter : public Rewriter { -public: - RegSequenceRewriter(MachineInstr &MI) : Rewriter(MI) { - assert(MI.isRegSequence() && "Invalid instruction"); - } + // Insert the Def -> Use entry for the recently found source. + ValueTrackerResult CurSrcRes = RewriteMap.lookup(CurSrcPair); + if (CurSrcRes.isValid()) { + assert(CurSrcRes == Res && "ValueTrackerResult found must match"); + // An existent entry with multiple sources is a PHI cycle we must avoid. + // Otherwise it's an entry with a valid next source we already found. + if (CurSrcRes.getNumSources() > 1) { + LLVM_DEBUG(dbgs() + << "findNextSource: found PHI cycle, aborting...\n"); + return false; + } + break; + } + RewriteMap.insert(std::make_pair(CurSrcPair, Res)); - /// \see Rewriter::getNextRewritableSource() - /// Here CopyLike has the following form: - /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2. - /// Each call will return a different source, walking all the available - /// source. - /// - /// The first call returns: - /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx). - /// (DstReg, DstSubReg) = (dst, subIdx1). - /// - /// The second call returns: - /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx). - /// (DstReg, DstSubReg) = (dst, subIdx2). - /// - /// And so on, until all the sources have been traversed, then - /// it returns false. - bool getNextRewritableSource(RegSubRegPair &Src, - RegSubRegPair &Dst) override { - // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc. + // ValueTrackerResult usually have one source unless it's the result from + // a PHI instruction. Add the found PHI edges to be looked up further. + unsigned NumSrcs = Res.getNumSources(); + if (NumSrcs > 1) { + PHICount++; + if (PHICount >= RewritePHILimit) { + LLVM_DEBUG(dbgs() << "findNextSource: PHI limit reached\n"); + return false; + } - // If this is the first call, move to the first argument. - if (CurrentSrcIdx == 0) { - CurrentSrcIdx = 1; - } else { - // Otherwise, move to the next argument and check that it is valid. - CurrentSrcIdx += 2; - if (CurrentSrcIdx >= CopyLike.getNumOperands()) + for (unsigned i = 0; i < NumSrcs; ++i) + SrcToLook.push_back(Res.getSrc(i)); + break; + } + + CurSrcPair = Res.getSrc(0); + // Do not extend the live-ranges of physical registers as they add + // constraints to the register allocator. Moreover, if we want to extend + // the live-range of a physical register, unlike SSA virtual register, + // we will have to check that they aren't redefine before the related use. + if (CurSrcPair.Reg.isPhysical()) return false; - } - const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx); - Src.Reg = MOInsertedReg.getReg(); - // If we have to compose sub-register indices, bail out. - if ((Src.SubReg = MOInsertedReg.getSubReg())) - return false; - // We want to track something that is compatible with the related - // partial definition. - Dst.SubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm(); + // Keep following the chain if the value isn't any better yet. + const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg); + if (!TRI->shouldRewriteCopySrc(DefRC, RegSubReg.SubReg, SrcRC, + CurSrcPair.SubReg)) + continue; - const MachineOperand &MODef = CopyLike.getOperand(0); - Dst.Reg = MODef.getReg(); - // If we have to compose sub-registers, bail. - return MODef.getSubReg() == 0; - } + // We currently cannot deal with subreg operands on PHI instructions + // (see insertPHI()). + if (PHICount > 0 && CurSrcPair.SubReg != 0) + continue; - bool RewriteCurrentSource(Register NewReg, unsigned NewSubReg) override { - // We cannot rewrite out of bound operands. - // Moreover, rewritable sources are at odd positions. - if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands()) - return false; + // We found a suitable source, and are done with this chain. + break; + } + } while (!SrcToLook.empty()); - MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx); - MO.setReg(NewReg); - MO.setSubReg(NewSubReg); - return true; - } -}; + // If we did not find a more suitable source, there is nothing to optimize. + return CurSrcPair.Reg != Reg; +} -} // end anonymous namespace +/// Insert a PHI instruction with incoming edges \p SrcRegs that are +/// guaranteed to have the same register class. This is necessary whenever we +/// successfully traverse a PHI instruction and find suitable sources coming +/// from its edges. By inserting a new PHI, we provide a rewritten PHI def +/// suitable to be used in a new COPY instruction. +static MachineInstr &insertPHI(MachineRegisterInfo &MRI, + const TargetInstrInfo &TII, + const SmallVectorImpl<RegSubRegPair> &SrcRegs, + MachineInstr &OrigPHI) { + assert(!SrcRegs.empty() && "No sources to create a PHI instruction?"); -/// Get the appropriated Rewriter for \p MI. -/// \return A pointer to a dynamically allocated Rewriter or nullptr if no -/// rewriter works for \p MI. -static Rewriter *getCopyRewriter(MachineInstr &MI, const TargetInstrInfo &TII) { - // Handle uncoalescable copy-like instructions. - if (MI.isBitcast() || MI.isRegSequenceLike() || MI.isInsertSubregLike() || - MI.isExtractSubregLike()) - return new UncoalescableRewriter(MI); + const TargetRegisterClass *NewRC = MRI.getRegClass(SrcRegs[0].Reg); + // NewRC is only correct if no subregisters are involved. findNextSource() + // should have rejected those cases already. + assert(SrcRegs[0].SubReg == 0 && "should not have subreg operand"); + Register NewVR = MRI.createVirtualRegister(NewRC); + MachineBasicBlock *MBB = OrigPHI.getParent(); + MachineInstrBuilder MIB = BuildMI(*MBB, &OrigPHI, OrigPHI.getDebugLoc(), + TII.get(TargetOpcode::PHI), NewVR); - switch (MI.getOpcode()) { - default: - return nullptr; - case TargetOpcode::COPY: - return new CopyRewriter(MI); - case TargetOpcode::INSERT_SUBREG: - return new InsertSubregRewriter(MI); - case TargetOpcode::EXTRACT_SUBREG: - return new ExtractSubregRewriter(MI, TII); - case TargetOpcode::REG_SEQUENCE: - return new RegSequenceRewriter(MI); + unsigned MBBOpIdx = 2; + for (const RegSubRegPair &RegPair : SrcRegs) { + MIB.addReg(RegPair.Reg, 0, RegPair.SubReg); + MIB.addMBB(OrigPHI.getOperand(MBBOpIdx).getMBB()); + // Since we're extended the lifetime of RegPair.Reg, clear the + // kill flags to account for that and make RegPair.Reg reaches + // the new PHI. + MRI.clearKillFlags(RegPair.Reg); + MBBOpIdx += 2; } + + return *MIB; } /// Given a \p Def.Reg and Def.SubReg pair, use \p RewriteMap to find @@ -1212,36 +1187,13 @@ getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, return RegSubRegPair(0, 0); } -/// Optimize generic copy instructions to avoid cross register bank copy. -/// The optimization looks through a chain of copies and tries to find a source -/// that has a compatible register class. -/// Two register classes are considered to be compatible if they share the same -/// register bank. -/// New copies issued by this optimization are register allocator -/// friendly. This optimization does not remove any copy as it may -/// overconstrain the register allocator, but replaces some operands -/// when possible. -/// \pre isCoalescableCopy(*MI) is true. -/// \return True, when \p MI has been rewritten. False otherwise. -bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) { - assert(isCoalescableCopy(MI) && "Invalid argument"); - assert(MI.getDesc().getNumDefs() == 1 && - "Coalescer can understand multiple defs?!"); - const MachineOperand &MODef = MI.getOperand(0); - // Do not rewrite physical definitions. - if (MODef.getReg().isPhysical()) - return false; - +bool PeepholeOptimizer::optimizeCoalescableCopyImpl(Rewriter &&CpyRewriter) { bool Changed = false; // Get the right rewriter for the current copy. - std::unique_ptr<Rewriter> CpyRewriter(getCopyRewriter(MI, *TII)); - // If none exists, bail out. - if (!CpyRewriter) - return false; // Rewrite each rewritable source. RegSubRegPair Src; RegSubRegPair TrackPair; - while (CpyRewriter->getNextRewritableSource(Src, TrackPair)) { + while (CpyRewriter.getNextRewritableSource(Src, TrackPair)) { // Keep track of PHI nodes and its incoming edges when looking for sources. RewriteMapTy RewriteMap; // Try to find a more suitable source. If we failed to do so, or get the @@ -1257,12 +1209,13 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) { continue; // Rewrite source. - if (CpyRewriter->RewriteCurrentSource(NewSrc.Reg, NewSrc.SubReg)) { + if (CpyRewriter.RewriteCurrentSource(NewSrc.Reg, NewSrc.SubReg)) { // We may have extended the live-range of NewSrc, account for that. MRI->clearKillFlags(NewSrc.Reg); Changed = true; } } + // TODO: We could have a clean-up method to tidy the instruction. // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0 // => v0 = COPY v1 @@ -1272,6 +1225,44 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) { return Changed; } +/// Optimize generic copy instructions to avoid cross register bank copy. +/// The optimization looks through a chain of copies and tries to find a source +/// that has a compatible register class. +/// Two register classes are considered to be compatible if they share the same +/// register bank. +/// New copies issued by this optimization are register allocator +/// friendly. This optimization does not remove any copy as it may +/// overconstrain the register allocator, but replaces some operands +/// when possible. +/// \pre isCoalescableCopy(*MI) is true. +/// \return True, when \p MI has been rewritten. False otherwise. +bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) { + assert(isCoalescableCopy(MI) && "Invalid argument"); + assert(MI.getDesc().getNumDefs() == 1 && + "Coalescer can understand multiple defs?!"); + const MachineOperand &MODef = MI.getOperand(0); + // Do not rewrite physical definitions. + if (MODef.getReg().isPhysical()) + return false; + + switch (MI.getOpcode()) { + case TargetOpcode::COPY: + return optimizeCoalescableCopyImpl(CopyRewriter(MI)); + case TargetOpcode::INSERT_SUBREG: + return optimizeCoalescableCopyImpl(InsertSubregRewriter(MI)); + case TargetOpcode::EXTRACT_SUBREG: + return optimizeCoalescableCopyImpl(ExtractSubregRewriter(MI, *TII)); + case TargetOpcode::REG_SEQUENCE: + return optimizeCoalescableCopyImpl(RegSequenceRewriter(MI)); + default: + // Handle uncoalescable copy-like instructions. + if (MI.isBitcast() || MI.isRegSequenceLike() || MI.isInsertSubregLike() || + MI.isExtractSubregLike()) + return optimizeCoalescableCopyImpl(UncoalescableRewriter(MI)); + return false; + } +} + /// Rewrite the source found through \p Def, by using the \p RewriteMap /// and create a new COPY instruction. More info about RewriteMap in /// PeepholeOptimizer::findNextSource. Right now this is only used to handle _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits