yroux created this revision. yroux added reviewers: t.p.northover, efriedma, paquette, samparker, SjoerdMeijer. Herald added subscribers: llvm-commits, cfe-commits, danielkiss, hiraditya, kristof.beyls. Herald added projects: clang, LLVM.
Enables Machine Outlining support on ARM for ARM and Thumb2 modes. Only the simplest outlining modes (tailcalls and thunks) are handled here, the patch also disables LowOverheadLoops pass when the machine outliner is used since outlined functions are not supprted by this pass. This is a follow-up of ARM Machine Outliner support RFC D57054 <https://reviews.llvm.org/D57054> Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D76066 Files: clang/lib/Driver/ToolChains/Clang.cpp llvm/include/llvm/CodeGen/TargetPassConfig.h llvm/lib/CodeGen/MachineOutliner.cpp llvm/lib/CodeGen/TargetPassConfig.cpp llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp llvm/lib/Target/ARM/ARMBaseInstrInfo.h llvm/lib/Target/ARM/ARMTargetMachine.cpp llvm/test/CodeGen/ARM/machine-outliner-tail.ll llvm/test/CodeGen/ARM/machine-outliner-thunk.ll
Index: llvm/test/CodeGen/ARM/machine-outliner-thunk.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/machine-outliner-thunk.ll @@ -0,0 +1,111 @@ +; RUN: llc -enable-machine-outliner -verify-machineinstrs -mtriple=armv7-- \ +; RUN: -stop-after=machine-outliner < %s | FileCheck %s --check-prefix=ARM +; RUN: llc -enable-machine-outliner -verify-machineinstrs -mtriple=thumbv7-- \ +; RUN: -stop-after=machine-outliner < %s | FileCheck %s --check-prefix=THUMB +; RUN: llc -enable-machine-outliner -verify-machineinstrs \ +; RUN: -mtriple=thumbv7-apple-darwin -stop-after=machine-outliner < %s \ +; RUN: | FileCheck %s --check-prefix=MACHO + +declare i32 @thunk_called_fn(i32, i32, i32, i32) + +define i32 @a() { +; ARM-LABEL: name: a +; ARM: bb.0.entry: +; ARM-NEXT: liveins: $r11, $lr +; ARM: $sp = frame-setup STMDB_UPD $sp, 14, $noreg, killed $r11, killed $lr +; ARM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 +; ARM-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +; ARM-NEXT: frame-setup CFI_INSTRUCTION offset $r11, -8 +; ARM-NEXT: BL @OUTLINED_FUNCTION_0{{.*}} +; ARM-NEXT: renamable $r0 = ADDri killed renamable $r0, 8, 14, $noreg, $noreg +; ARM-NEXT: $sp = LDMIA_RET $sp, 14, $noreg, def $r11, def $pc, implicit killed $r0 + +; THUMB-LABEL: name: a +; THUMB: bb.0.entry: +; THUMB-NEXT: liveins: $r7, $lr +; THUMB: frame-setup tPUSH 14, $noreg, killed $r7, killed $lr +; THUMB-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 +; THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +; THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $r7, -8 +; THUMB-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_0{{.*}} +; THUMB-NEXT: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 8, 14, $noreg +; THUMB-NEXT: tPOP_RET 14, $noreg, def $r7, def $pc + +; MACHO-LABEL: name: a +; MACHO: bb.0.entry: +; MACHO-NEXT: liveins: $lr +; MACHO: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -4, 14, $noreg +; MACHO-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 4 +; MACHO-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +; MACHO-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_0{{.*}} +; MACHO-NEXT: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 8, 14, $noreg +; MACHO-NEXT: $lr, $sp = t2LDR_POST $sp, 4, 14, $noreg +; MACHO-NEXT: tBX_RET 14, $noreg, implicit killed $r0 +entry: + %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4) + %cx = add i32 %call, 8 + ret i32 %cx +} + +define i32 @b() { +; ARM-LABEL: name: b +; ARM: bb.0.entry: +; ARM-NEXT: liveins: $r11, $lr +; ARM: $sp = frame-setup STMDB_UPD $sp, 14, $noreg, killed $r11, killed $lr +; ARM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 +; ARM-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +; ARM-NEXT: frame-setup CFI_INSTRUCTION offset $r11, -8 +; ARM-NEXT: BL @OUTLINED_FUNCTION_0{{.*}} +; ARM-NEXT: renamable $r0 = ADDri killed renamable $r0, 88, 14, $noreg, $noreg +; ARM-NEXT: $sp = LDMIA_RET $sp, 14, $noreg, def $r11, def $pc, implicit killed $r0 + +; THUMB-LABEL: name: b +; THUMB: bb.0.entry: +; THUMB-NEXT: liveins: $r7, $lr +; THUMB: frame-setup tPUSH 14, $noreg, killed $r7, killed $lr +; THUMB-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 +; THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +; THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $r7, -8 +; THUMB-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_0{{.*}} +; THUMB-NEXT: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 88, 14, $noreg +; THUMB-NEXT: tPOP_RET 14, $noreg, def $r7, def $pc + +; MACHO-LABEL: name: b +; MACHO: bb.0.entry: +; MACHO-NEXT: liveins: $lr +; MACHO: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -4, 14, $noreg +; MACHO-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 4 +; MACHO-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +; MACHO-NEXT: tBL 14, $noreg, @OUTLINED_FUNCTION_0{{.*}} +; MACHO-NEXT: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 88, 14, $noreg +; MACHO-NEXT: $lr, $sp = t2LDR_POST $sp, 4, 14, $noreg +; MACHO-NEXT: tBX_RET 14, $noreg, implicit killed $r0 +entry: + %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4) + %cx = add i32 %call, 88 + ret i32 %cx +} + +; ARM-LABEL: name: OUTLINED_FUNCTION_0 +; ARM: bb.0: +; ARM-NEXT: $r0 = MOVi 1, 14, $noreg, $noreg +; ARM-NEXT: $r1 = MOVi 2, 14, $noreg, $noreg +; ARM-NEXT: $r2 = MOVi 3, 14, $noreg, $noreg +; ARM-NEXT: $r3 = MOVi 4, 14, $noreg, $noreg +; ARM-NEXT: TAILJMPd @thunk_called_fn, implicit $sp + +; THUMB-LABEL: name: OUTLINED_FUNCTION_0 +; THUMB: bb.0: +; THUMB-NEXT: $r0, dead $cpsr = tMOVi8 1, 14, $noreg +; THUMB-NEXT: $r1, dead $cpsr = tMOVi8 2, 14, $noreg +; THUMB-NEXT: $r2, dead $cpsr = tMOVi8 3, 14, $noreg +; THUMB-NEXT: $r3, dead $cpsr = tMOVi8 4, 14, $noreg +; THUMB-NEXT: tTAILJMPdND @thunk_called_fn, 14, $noreg, implicit $sp + +; MACHO-LABEL: name: OUTLINED_FUNCTION_0 +; MACHO: bb.0: +; MACHO-NEXT: $r0, dead $cpsr = tMOVi8 1, 14, $noreg +; MACHO-NEXT: $r1, dead $cpsr = tMOVi8 2, 14, $noreg +; MACHO-NEXT: $r2, dead $cpsr = tMOVi8 3, 14, $noreg +; MACHO-NEXT: $r3, dead $cpsr = tMOVi8 4, 14, $noreg +; MACHO-NEXT: tTAILJMPd @thunk_called_fn, 14, $noreg, implicit $sp Index: llvm/test/CodeGen/ARM/machine-outliner-tail.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/machine-outliner-tail.ll @@ -0,0 +1,42 @@ +; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=arm-- \ +; RUN: --stop-after=machine-outliner < %s | FileCheck %s --check-prefix=ARM +; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=thumbv7-- \ +; RUN: --stop-after=machine-outliner < %s | FileCheck %s --check-prefix=THUMB +; RUN: llc -verify-machineinstrs -enable-machine-outliner \ +; RUN: -mtriple=thumbv7-apple-darwin --stop-after=machine-outliner < %s \ +; RUN: | FileCheck %s --check-prefix=MACHO + +; ARM-LABEL: name: OUTLINED_FUNCTION_0 +; ARM: $r0 = MOVi 1, 14, $noreg, $noreg +; ARM-NEXT: $r1 = MOVi 2, 14, $noreg, $noreg +; ARM-NEXT: $r2 = MOVi 3, 14, $noreg, $noreg +; ARM-NEXT: $r3 = MOVi 4, 14, $noreg, $noreg +; ARM-NEXT: TAILJMPd @z + +; THUMB-LABEL: name: OUTLINED_FUNCTION_0 +; THUMB: $r0, dead $cpsr = tMOVi8 1, 14, $noreg +; THUMB-NEXT: $r1, dead $cpsr = tMOVi8 2, 14, $noreg +; THUMB-NEXT: $r2, dead $cpsr = tMOVi8 3, 14, $noreg +; THUMB-NEXT: $r3, dead $cpsr = tMOVi8 4, 14, $noreg +; THUMB-NEXT: tTAILJMPdND @z, 14, $noreg + +; MACHO-LABEL: name: OUTLINED_FUNCTION_0 +; MACHO: $r0, dead $cpsr = tMOVi8 1, 14, $noreg +; MACHO-NEXT: $r1, dead $cpsr = tMOVi8 2, 14, $noreg +; MACHO-NEXT: $r2, dead $cpsr = tMOVi8 3, 14, $noreg +; MACHO-NEXT: $r3, dead $cpsr = tMOVi8 4, 14, $noreg +; MACHO-NEXT: tTAILJMPd @z, 14, $noreg + +define void @a() { +entry: + tail call void @z(i32 1, i32 2, i32 3, i32 4) + ret void +} + +declare void @z(i32, i32, i32, i32) + +define dso_local void @b(i32* nocapture readnone %p) { +entry: + tail call void @z(i32 1, i32 2, i32 3, i32 4) + ret void +} Index: llvm/lib/Target/ARM/ARMTargetMachine.cpp =================================================================== --- llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -244,6 +244,10 @@ } initAsmInfo(); + + // ARM supports the MachineOutliner. + setMachineOutliner(true); + setSupportsDefaultOutlining(false); } ARMBaseTargetMachine::~ARMBaseTargetMachine() = default; @@ -543,7 +547,8 @@ void ARMPassConfig::addPreEmitPass2() { addPass(createARMConstantIslandPass()); - addPass(createARMLowOverheadLoopsPass()); + if (!MachineOutlinerEnabled) + addPass(createARMLowOverheadLoopsPass()); // Identify valid longjmp targets for Windows Control Flow Guard. if (TM->getTargetTriple().isOSWindows()) Index: llvm/lib/Target/ARM/ARMBaseInstrInfo.h =================================================================== --- llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -343,6 +343,22 @@ ArrayRef<std::pair<unsigned, const char *>> getSerializableBitmaskMachineOperandTargetFlags() const override; + /// ARM supports the MachineOutliner. + bool isFunctionSafeToOutlineFrom(MachineFunction &MF, + bool OutlineFromLinkOnceODRs) const override; + outliner::OutlinedFunction getOutliningCandidateInfo( + std::vector<outliner::Candidate> &RepeatedSequenceLocs) const override; + outliner::InstrType getOutliningType(MachineBasicBlock::iterator &MIT, + unsigned Flags) const override; + bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, + unsigned &Flags) const override; + void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, + const outliner::OutlinedFunction &OF) const override; + MachineBasicBlock::iterator + insertOutlinedCall(Module &M, MachineBasicBlock &MBB, + MachineBasicBlock::iterator &It, MachineFunction &MF, + const outliner::Candidate &C) const override; + private: unsigned getInstBundleLength(const MachineInstr &MI) const; Index: llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp =================================================================== --- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -32,6 +32,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" @@ -5460,3 +5461,374 @@ return ConstantMaterializationCost(Val1, Subtarget, !ForCodesize) < ConstantMaterializationCost(Val2, Subtarget, !ForCodesize); } + +/// Constants defining how certain sequences should be outlined. +/// This encompasses how an outlined function should be called, and what kind of +/// frame should be emitted for that outlined function. +/// +/// \p MachineOutlinerTailCall implies that the function is being created from +/// a sequence of instructions ending in a return. +/// +/// That is, +/// +/// I1 OUTLINED_FUNCTION: +/// I2 --> B OUTLINED_FUNCTION I1 +/// BX LR I2 +/// BX LR +/// +/// +-------------------------+--------+-----+ +/// | | Thumb2 | ARM | +/// +-------------------------+--------+-----+ +/// | Call overhead in Bytes | 4 | 4 | +/// | Frame overhead in Bytes | 0 | 0 | +/// | Stack fixup required | No | No | +/// +-------------------------+--------+-----+ +/// +/// \p MachineOutlinerThunk implies that the function is being created from +/// a sequence of instructions ending in a call. The outlined function is +/// called with a BL instruction, and the outlined function tail-calls the +/// original call destination. +/// +/// That is, +/// +/// I1 OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// BL f I2 +/// B f +/// +/// +-------------------------+--------+-----+ +/// | | Thumb2 | ARM | +/// +-------------------------+--------+-----+ +/// | Call overhead in Bytes | 4 | 4 | +/// | Frame overhead in Bytes | 0 | 0 | +/// | Stack fixup required | No | No | +/// +-------------------------+--------+-----+ + +enum MachineOutlinerClass { + MachineOutlinerTailCall, + MachineOutlinerThunk +}; + +enum MachineOutlinerMBBFlags { + LRUnavailableSomewhere = 0x2, + HasCalls = 0x4, + UnsafeRegsDead = 0x8 +}; + +struct OutlinerCosts { + const int CallTailCall; + const int FrameTailCall; + const int CallThunk; + const int FrameThunk; + + OutlinerCosts(const ARMSubtarget &target) + : CallTailCall(target.isThumb() ? 4 : 4), + FrameTailCall(target.isThumb() ? 0 : 0), + CallThunk(target.isThumb() ? 4 : 4), + FrameThunk(target.isThumb() ? 0 : 0) {} +}; + +outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( + std::vector<outliner::Candidate> &RepeatedSequenceLocs) const { + outliner::Candidate &FirstCand = RepeatedSequenceLocs[0]; + unsigned SequenceSize = + std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0, + [this](unsigned Sum, const MachineInstr &MI) { + return Sum + getInstSizeInBytes(MI); + }); + + // Properties about candidate MBBs that hold for all of them. + unsigned FlagsSetInAll = 0xF; + + // Compute liveness information for each candidate, and set FlagsSetInAll. + const TargetRegisterInfo &TRI = getRegisterInfo(); + std::for_each( + RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), + [&FlagsSetInAll](outliner::Candidate &C) { FlagsSetInAll &= C.Flags; }); + + // According to the ARM Procedure Call Standard, the following are + // undefined on entry/exit from a function call: + // + // * Register R12(IP), + // * Condition codes (and thus the CPSR register) + // + // Because of this, we can't outline any sequence of instructions where one + // of these registers is live into/across it. Thus, we need to delete those + // candidates. + auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) { + // If the unsafe registers in this block are all dead, then we don't need + // to compute liveness here. + if (C.Flags & UnsafeRegsDead) + return false; + C.initLRU(TRI); + LiveRegUnits LRU = C.LRU; + return (!LRU.available(ARM::R12) || !LRU.available(ARM::CPSR)); + }; + + // Are there any candidates where those registers are live? + if (!(FlagsSetInAll & UnsafeRegsDead)) { + // Erase every candidate that violates the restrictions above. (It could be + // true that we have viable candidates, so it's not worth bailing out in + // the case that, say, 1 out of 20 candidates violate the restructions.) + RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(), + RepeatedSequenceLocs.end(), + CantGuaranteeValueAcrossCall), + RepeatedSequenceLocs.end()); + + // If the sequence doesn't have enough candidates left, then we're done. + if (RepeatedSequenceLocs.size() < 2) + return outliner::OutlinedFunction(); + } + + // At this point, we have only "safe" candidates to outline. Figure out + // frame + call instruction information. + + unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode(); + + // Helper lambda which sets call information for every candidate. + auto SetCandidateCallInfo = + [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) { + for (outliner::Candidate &C : RepeatedSequenceLocs) + C.setCallInfo(CallID, NumBytesForCall); + }; + + OutlinerCosts *Costs = new OutlinerCosts(Subtarget); + unsigned FrameID = 0; + unsigned NumBytesToCreateFrame = 0; + + // If the last instruction in any candidate is a terminator, then we should + // tail call all of the candidates. + if (RepeatedSequenceLocs[0].back()->isTerminator()) { + FrameID = MachineOutlinerTailCall; + NumBytesToCreateFrame = Costs->FrameTailCall; + SetCandidateCallInfo(MachineOutlinerTailCall, Costs->CallTailCall); + } else if (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX || + LastInstrOpcode == ARM::tBL || LastInstrOpcode == ARM::tBLXr || + LastInstrOpcode == ARM::tBLXi) { + FrameID = MachineOutlinerThunk; + NumBytesToCreateFrame = Costs->FrameThunk; + SetCandidateCallInfo(MachineOutlinerThunk, Costs->CallThunk); + } else + return outliner::OutlinedFunction(); + + return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, + NumBytesToCreateFrame, FrameID); +} + +bool ARMBaseInstrInfo::isFunctionSafeToOutlineFrom( + MachineFunction &MF, bool OutlineFromLinkOnceODRs) const { + const Function &F = MF.getFunction(); + + // Can F be deduplicated by the linker? If it can, don't outline from it. + if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage()) + return false; + + // Don't outline from functions with section markings; the program could + // expect that all the code is in the named section. + // FIXME: Allow outlining from multiple functions with the same section + // marking. + if (F.hasSection()) + return false; + + // FIXME: Thumb1 outlining is not handled + if (MF.getInfo<ARMFunctionInfo>()->isThumb1OnlyFunction()) + return false; + + // It's safe to outline from MF. + return true; +} + +bool ARMBaseInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, + unsigned &Flags) const { + // Check if LR is available through all of the MBB. If it's not, then set + // a flag. + assert(MBB.getParent()->getRegInfo().tracksLiveness() && + "Suitable Machine Function for outlining must track liveness"); + + LiveRegUnits LRU(getRegisterInfo()); + + std::for_each(MBB.rbegin(), MBB.rend(), + [&LRU](MachineInstr &MI) { LRU.accumulate(MI); }); + + // Check if each of the unsafe registers are available... + bool R12AvailableInBlock = LRU.available(ARM::R12); + bool CPSRAvailableInBlock = LRU.available(ARM::CPSR); + + // If all of these are dead (and not live out), we know we don't have to check + // them later. + if (R12AvailableInBlock && CPSRAvailableInBlock) + Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead; + + // Now, add the live outs to the set. + LRU.addLiveOuts(MBB); + + // If any of these registers is available in the MBB, but also a live out of + // the block, then we know outlining is unsafe. + if (R12AvailableInBlock && !LRU.available(ARM::R12)) + return false; + if (CPSRAvailableInBlock && !LRU.available(ARM::CPSR)) + return false; + + // Check if there's a call inside this MachineBasicBlock. If there is, then + // set a flag. + if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); })) + Flags |= MachineOutlinerMBBFlags::HasCalls; + + if (!LRU.available(ARM::LR)) + Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere; + + return true; +} + +outliner::InstrType +ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT, + unsigned Flags) const { + MachineInstr &MI = *MIT; + const TargetRegisterInfo *TRI = &getRegisterInfo(); + + // Be conservative with inline ASM + if (MI.isInlineAsm()) + return outliner::InstrType::Invisible; + + // Don't allow debug values to impact outlining type. + if (MI.isDebugInstr() || MI.isIndirectDebugValue()) + return outliner::InstrType::Invisible; + + // At this point, KILL instructions don't really tell us much so we can go + // ahead and skip over them. + if (MI.isKill()) + return outliner::InstrType::Invisible; + + // PIC instructions contain labels, outlining them would break offset + // computing. + unsigned Opc = MI.getOpcode(); + if (Opc == ARM::t2IT || Opc == ARM::tPICADD || Opc == ARM::PICADD || + Opc == ARM::PICSTR || Opc == ARM::PICSTRB || Opc == ARM::PICSTRH || + Opc == ARM::PICLDR || Opc == ARM::PICLDRB || Opc == ARM::PICLDRH || + Opc == ARM::PICLDRSB || Opc == ARM::PICLDRSH || + Opc == ARM::t2LDRpci_pic || Opc == ARM::t2MOVi16_ga_pcrel || + Opc == ARM::t2MOVTi16_ga_pcrel || Opc == ARM::t2MOV_ga_pcrel) + return outliner::InstrType::Illegal; + + // Is this a terminator for a basic block? + if (MI.isTerminator()) { + // Don't outline if the branch is not unconditional. + if (Opc == ARM::BX_RET || Opc == ARM::tBX_RET || Opc == ARM::MOVPCLR) { + if (MI.getOperand(0).getImm() != ARMCC::AL) + return outliner::InstrType::Illegal; + } + if (Opc == ARM::LDMIA_RET) { + if (MI.getOperand(2).getImm() != ARMCC::AL) + return outliner::InstrType::Illegal; + } + + // Is this the end of a function? + if (MI.getParent()->succ_empty()) + return outliner::InstrType::Legal; + + // It's not, so don't outline it. + return outliner::InstrType::Illegal; + } + + // Make sure none of the operands are un-outlinable. + for (const MachineOperand &MOP : MI.operands()) { + if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() || + MOP.isTargetIndex()) + return outliner::InstrType::Illegal; + + // If it uses LR then don't touch it. + if (MOP.isReg() && !MOP.isImplicit() && (MOP.getReg() == ARM::LR)) + return outliner::InstrType::Illegal; + } + + if (MI.isCall()) { + // If we don't know anything about the callee, assume it depends on the + // stack layout of the caller. In that case, it's only legal to outline + // as a tail-call. Whitelist the call instructions we know about so we + // don't get unexpected results with call pseudo-instructions. + auto UnknownCallOutlineType = outliner::InstrType::Illegal; + if (Opc == ARM::BL || Opc == ARM::tBL || Opc == ARM::BLX || + Opc == ARM::tBLXr || Opc == ARM::tBLXi) + UnknownCallOutlineType = outliner::InstrType::LegalTerminator; + + return UnknownCallOutlineType; + } + + // Don't outline positions. + if (MI.isPosition()) + return outliner::InstrType::Illegal; + + // Don't touch the link register + if (MI.readsRegister(ARM::LR, &getRegisterInfo()) || + MI.modifiesRegister(ARM::LR, &getRegisterInfo())) + return outliner::InstrType::Illegal; + + // Does this use the stack? + if (MI.modifiesRegister(ARM::SP, TRI) || MI.readsRegister(ARM::SP, TRI)) + return outliner::InstrType::Illegal; + + return outliner::InstrType::Legal; +} + +void ARMBaseInstrInfo::buildOutlinedFrame( + MachineBasicBlock &MBB, MachineFunction &MF, + const outliner::OutlinedFunction &OF) const { + // For thunk outlining, rewrite the last instruction from a call to a + // tail-call. + if (OF.FrameConstructionID == MachineOutlinerThunk) { + MachineInstr *Call = &*--MBB.instr_end(); + if (Subtarget.isThumb()) + if (Call->getOperand(2).isReg()) + BuildMI(MBB, MBB.end(), DebugLoc(), get(ARM::tTAILJMPr)) + .add(Call->getOperand(2)); + else if (Subtarget.isTargetMachO()) + BuildMI(MBB, MBB.end(), DebugLoc(), get(ARM::tTAILJMPd)) + .add(Call->getOperand(2)) + .add(predOps(ARMCC::AL)); + else + BuildMI(MBB, MBB.end(), DebugLoc(), get(ARM::tTAILJMPdND)) + .add(Call->getOperand(2)) + .add(predOps(ARMCC::AL)); + else if (Call->getOperand(0).isReg()) + BuildMI(MBB, MBB.end(), DebugLoc(), get(ARM::TAILJMPr)) + .add(Call->getOperand(0)); + else + BuildMI(MBB, MBB.end(), DebugLoc(), get(ARM::TAILJMPd)) + .add(Call->getOperand(0)); + Call->eraseFromParent(); + } +} + +MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall( + Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, + MachineFunction &MF, const outliner::Candidate &C) const { + MachineInstrBuilder MIB; + MachineBasicBlock::iterator CallPt; + unsigned Opc; + bool isThumb = Subtarget.isThumb(); + + // Are we tail calling? + if (C.CallConstructionID == MachineOutlinerTailCall) { + // If yes, then we can just branch to the label. + Opc = isThumb + ? (Subtarget.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) + : ARM::TAILJMPd; + MIB = BuildMI(MF, DebugLoc(), get(Opc)) + .addGlobalAddress(M.getNamedValue(MF.getName())); + if (isThumb) + MIB.add(predOps(ARMCC::AL)); + It = MBB.insert(It, MIB); + return It; + } + + // Create the call instruction. + Opc = isThumb ? ARM::tBL : ARM::BL; + MachineInstrBuilder CallMIB = BuildMI(MF, DebugLoc(), get(Opc)); + if (isThumb) + CallMIB.add(predOps(ARMCC::AL)); + CallMIB.addGlobalAddress(M.getNamedValue(MF.getName())); + + // Insert the call. + It = MBB.insert(It, CallMIB); + return It; +} Index: llvm/lib/CodeGen/TargetPassConfig.cpp =================================================================== --- llvm/lib/CodeGen/TargetPassConfig.cpp +++ llvm/lib/CodeGen/TargetPassConfig.cpp @@ -979,8 +979,10 @@ bool RunOnAllFunctions = (EnableMachineOutliner == AlwaysOutline); bool AddOutliner = RunOnAllFunctions || TM->Options.SupportsDefaultOutlining; - if (AddOutliner) + if (AddOutliner) { + MachineOutlinerEnabled = true; addPass(createMachineOutlinerPass(RunOnAllFunctions)); + } } // Add passes that directly emit MI after all other MI passes. Index: llvm/lib/CodeGen/MachineOutliner.cpp =================================================================== --- llvm/lib/CodeGen/MachineOutliner.cpp +++ llvm/lib/CodeGen/MachineOutliner.cpp @@ -1155,6 +1155,9 @@ // Outlined functions shouldn't preserve liveness. MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness); MF.getRegInfo().freezeReservedRegs(MF); + MF.getProperties().reset(MachineFunctionProperties::Property::IsSSA); + MF.getProperties().set(MachineFunctionProperties::Property::NoPHIs); + MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); // If there's a DISubprogram associated with this outlined function, then // emit debug info for the outlined function. Index: llvm/include/llvm/CodeGen/TargetPassConfig.h =================================================================== --- llvm/include/llvm/CodeGen/TargetPassConfig.h +++ llvm/include/llvm/CodeGen/TargetPassConfig.h @@ -132,6 +132,9 @@ /// callers. bool RequireCodeGenSCCOrder = false; + /// Default setting for -enable-machine-outliner + bool MachineOutlinerEnabled = false; + /// Add the actual instruction selection passes. This does not include /// preparation passes on IR. bool addCoreISelPasses(); Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -6059,8 +6059,9 @@ // We only support -moutline in AArch64 right now. If we're not compiling // for AArch64, emit a warning and ignore the flag. Otherwise, add the // proper mllvm flags. - if (Triple.getArch() != llvm::Triple::aarch64 && - Triple.getArch() != llvm::Triple::aarch64_32) { + if (!(Triple.isARM() || Triple.isThumb() || + Triple.getArch() == llvm::Triple::aarch64 || + Triple.getArch() == llvm::Triple::aarch64_32)) { D.Diag(diag::warn_drv_moutline_unsupported_opt) << Triple.getArchName(); } else { CmdArgs.push_back("-mllvm");
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits