Author: Tres Popp Date: 2021-01-18T12:01:57+01:00 New Revision: 3bd24574c7d0187e69780540fe0681914d71794f
URL: https://github.com/llvm/llvm-project/commit/3bd24574c7d0187e69780540fe0681914d71794f DIFF: https://github.com/llvm/llvm-project/commit/3bd24574c7d0187e69780540fe0681914d71794f.diff LOG: Revert "[PowerPC] support register pressure reduction in machine combiner." This reverts commit 26a396c4ef481cb159bba631982841736a125a9c. See https://reviews.llvm.org/D92071 for a description of the issue. Added: Modified: llvm/include/llvm/CodeGen/MachineCombinerPattern.h llvm/lib/CodeGen/MachineCombiner.cpp llvm/lib/Target/PowerPC/PPCInstrInfo.cpp llvm/lib/Target/PowerPC/PPCInstrInfo.h Removed: llvm/test/CodeGen/PowerPC/register-pressure-reduction.ll ################################################################################ diff --git a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h index ac0cc70744d1..e9f52fb064e1 100644 --- a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h +++ b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h @@ -29,11 +29,6 @@ enum class MachineCombinerPattern { REASSOC_XY_AMM_BMM, REASSOC_XMM_AMM_BMM, - // These are patterns matched by the PowerPC to reassociate FMA and FSUB to - // reduce register pressure. - REASSOC_XY_BCA, - REASSOC_XY_BAC, - // These are multiply-add patterns matched by the AArch64 machine combiner. MULADDW_OP1, MULADDW_OP2, diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp index e2b6cfe55c16..878912a6032b 100644 --- a/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/llvm/lib/CodeGen/MachineCombiner.cpp @@ -279,9 +279,6 @@ static CombinerObjective getCombinerObjective(MachineCombinerPattern P) { case MachineCombinerPattern::REASSOC_XY_AMM_BMM: case MachineCombinerPattern::REASSOC_XMM_AMM_BMM: return CombinerObjective::MustReduceDepth; - case MachineCombinerPattern::REASSOC_XY_BCA: - case MachineCombinerPattern::REASSOC_XY_BAC: - return CombinerObjective::MustReduceRegisterPressure; default: return CombinerObjective::Default; } diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 71d9fb2c179e..75a498b807cd 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -21,15 +21,12 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveIntervals.h" -#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/CodeGen/RegisterClassInfo.h" -#include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/StackMaps.h" @@ -76,14 +73,6 @@ static cl::opt<bool> UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden, cl::desc("Use the old (incorrect) instruction latency calculation")); -static cl::opt<float> - FMARPFactor("ppc-fma-rp-factor", cl::Hidden, cl::init(1.5), - cl::desc("register pressure factor for the transformations.")); - -static cl::opt<bool> EnableFMARegPressureReduction( - "ppc-fma-rp-reduction", cl::Hidden, cl::init(true), - cl::desc("enable register pressure reduce in machine combiner pass.")); - // Pin the vtable to this file. void PPCInstrInfo::anchor() {} @@ -289,23 +278,21 @@ bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const { #define InfoArrayIdxFMULInst 2 #define InfoArrayIdxAddOpIdx 3 #define InfoArrayIdxMULOpIdx 4 -#define InfoArrayIdxFSubInst 5 // Array keeps info for FMA instructions: // Index 0(InfoArrayIdxFMAInst): FMA instruction; -// Index 1(InfoArrayIdxFAddInst): ADD instruction associated with FMA; -// Index 2(InfoArrayIdxFMULInst): MUL instruction associated with FMA; +// Index 1(InfoArrayIdxFAddInst): ADD instruction assoaicted with FMA; +// Index 2(InfoArrayIdxFMULInst): MUL instruction assoaicted with FMA; // Index 3(InfoArrayIdxAddOpIdx): ADD operand index in FMA operands; // Index 4(InfoArrayIdxMULOpIdx): first MUL operand index in FMA operands; -// second MUL operand index is plus 1; -// Index 5(InfoArrayIdxFSubInst): SUB instruction associated with FMA. -static const uint16_t FMAOpIdxInfo[][6] = { +// second MUL operand index is plus 1. +static const uint16_t FMAOpIdxInfo[][5] = { // FIXME: Add more FMA instructions like XSNMADDADP and so on. - {PPC::XSMADDADP, PPC::XSADDDP, PPC::XSMULDP, 1, 2, PPC::XSSUBDP}, - {PPC::XSMADDASP, PPC::XSADDSP, PPC::XSMULSP, 1, 2, PPC::XSSUBSP}, - {PPC::XVMADDADP, PPC::XVADDDP, PPC::XVMULDP, 1, 2, PPC::XVSUBDP}, - {PPC::XVMADDASP, PPC::XVADDSP, PPC::XVMULSP, 1, 2, PPC::XVSUBSP}, - {PPC::FMADD, PPC::FADD, PPC::FMUL, 3, 1, PPC::FSUB}, - {PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1, PPC::FSUBS}}; + {PPC::XSMADDADP, PPC::XSADDDP, PPC::XSMULDP, 1, 2}, + {PPC::XSMADDASP, PPC::XSADDSP, PPC::XSMULSP, 1, 2}, + {PPC::XVMADDADP, PPC::XVADDDP, PPC::XVMULDP, 1, 2}, + {PPC::XVMADDASP, PPC::XVADDSP, PPC::XVMULSP, 1, 2}, + {PPC::FMADD, PPC::FADD, PPC::FMUL, 3, 1}, + {PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1}}; // Check if an opcode is a FMA instruction. If it is, return the index in array // FMAOpIdxInfo. Otherwise, return -1. @@ -316,8 +303,6 @@ int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode) const { return -1; } -// On PowerPC target, we have two kinds of patterns related to FMA: -// 1: Improve ILP. // Try to reassociate FMA chains like below: // // Pattern 1: @@ -341,35 +326,11 @@ int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode) const { // // breaking the dependency between A and B, allowing FMA to be executed in // parallel (or back-to-back in a pipeline) instead of depending on each other. -// -// 2: Reduce register pressure. -// Try to reassociate FMA with FSUB and a constant like below: -// C is a floatint point const. -// -// Pattern 1: -// A = FSUB X, Y (Leaf) -// D = FMA B, C, A (Root) -// --> -// A = FMA B, Y, -C -// D = FMA A, X, C -// -// Pattern 2: -// A = FSUB X, Y (Leaf) -// D = FMA B, A, C (Root) -// --> -// A = FMA B, Y, -C -// D = FMA A, X, C -// -// Before the transformation, A must be assigned with diff erent hardware -// register with D. After the transformation, A and D must be assigned with -// same hardware register due to TIE attricute of FMA instructions. -// bool PPCInstrInfo::getFMAPatterns( - MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns, - bool DoRegPressureReduce) const { + MachineInstr &Root, + SmallVectorImpl<MachineCombinerPattern> &Patterns) const { MachineBasicBlock *MBB = Root.getParent(); - const MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo(); - const TargetRegisterInfo *TRI = &getRegisterInfo(); + const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); auto IsAllOpsVirtualReg = [](const MachineInstr &Instr) { for (const auto &MO : Instr.explicit_operands()) @@ -378,10 +339,9 @@ bool PPCInstrInfo::getFMAPatterns( return true; }; - auto IsReassociableAddOrSub = [&](const MachineInstr &Instr, - unsigned OpType) { + auto IsReassociableAdd = [&](const MachineInstr &Instr) { if (Instr.getOpcode() != - FMAOpIdxInfo[getFMAOpIdxInfo(Root.getOpcode())][OpType]) + FMAOpIdxInfo[getFMAOpIdxInfo(Root.getOpcode())][InfoArrayIdxFAddInst]) return false; // Instruction can be reassociated. @@ -394,17 +354,11 @@ bool PPCInstrInfo::getFMAPatterns( if (!IsAllOpsVirtualReg(Instr)) return false; - // For register pressure reassociation, the FSub must have only one use as - // we want to delete the sub to save its def. - if (OpType == InfoArrayIdxFSubInst && - !MRI->hasOneNonDBGUse(Instr.getOperand(0).getReg())) - return false; - return true; }; auto IsReassociableFMA = [&](const MachineInstr &Instr, int16_t &AddOpIdx, - int16_t &MulOpIdx, bool IsLeaf) { + bool IsLeaf) { int16_t Idx = getFMAOpIdxInfo(Instr.getOpcode()); if (Idx < 0) return false; @@ -419,327 +373,53 @@ bool PPCInstrInfo::getFMAPatterns( if (!IsAllOpsVirtualReg(Instr)) return false; - MulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx]; if (IsLeaf) return true; AddOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxAddOpIdx]; const MachineOperand &OpAdd = Instr.getOperand(AddOpIdx); - MachineInstr *MIAdd = MRI->getUniqueVRegDef(OpAdd.getReg()); + MachineInstr *MIAdd = MRI.getUniqueVRegDef(OpAdd.getReg()); // If 'add' operand's def is not in current block, don't do ILP related opt. if (!MIAdd || MIAdd->getParent() != MBB) return false; // If this is not Leaf FMA Instr, its 'add' operand should only have one use // as this fma will be changed later. - return IsLeaf ? true : MRI->hasOneNonDBGUse(OpAdd.getReg()); + return IsLeaf ? true : MRI.hasOneNonDBGUse(OpAdd.getReg()); }; int16_t AddOpIdx = -1; - int16_t MulOpIdx = -1; - - bool IsUsedOnceL = false; - bool IsUsedOnceR = false; - MachineInstr *MULInstrL = nullptr; - MachineInstr *MULInstrR = nullptr; - - auto IsRPReductionCandidate = [&]() { - // Currently, we only support float and double. - // FIXME: add support for other types. - unsigned Opcode = Root.getOpcode(); - if (Opcode != PPC::XSMADDASP && Opcode != PPC::XSMADDADP) - return false; - - // Root must be a valid FMA like instruction. - // Treat it as leaf as we don't care its add operand. - if (IsReassociableFMA(Root, AddOpIdx, MulOpIdx, true)) { - assert((MulOpIdx >= 0) && "mul operand index not right!"); - Register MULRegL = TRI->lookThruCopyLike( - Root.getOperand(MulOpIdx).getReg(), MRI, &IsUsedOnceL); - Register MULRegR = TRI->lookThruCopyLike( - Root.getOperand(MulOpIdx + 1).getReg(), MRI, &IsUsedOnceR); - if (!Register::isVirtualRegister(MULRegL) || - !Register::isVirtualRegister(MULRegR)) - return false; - - MULInstrL = MRI->getVRegDef(MULRegL); - MULInstrR = MRI->getVRegDef(MULRegR); - return true; - } - return false; - }; - - // Register pressure fma reassociation patterns. - if (DoRegPressureReduce && IsRPReductionCandidate()) { - assert((MULInstrL && MULInstrR) && "wrong register preduction candidate!"); - // Register pressure pattern 1 - if (isLoadFromConstantPool(MULInstrL) && IsUsedOnceR && - IsReassociableAddOrSub(*MULInstrR, InfoArrayIdxFSubInst)) { - LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BCA\n"); - Patterns.push_back(MachineCombinerPattern::REASSOC_XY_BCA); - return true; - } - - // Register pressure pattern 2 - if ((isLoadFromConstantPool(MULInstrR) && IsUsedOnceL && - IsReassociableAddOrSub(*MULInstrL, InfoArrayIdxFSubInst))) { - LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BAC\n"); - Patterns.push_back(MachineCombinerPattern::REASSOC_XY_BAC); - return true; - } - } - - // ILP fma reassociation patterns. // Root must be a valid FMA like instruction. - AddOpIdx = -1; - if (!IsReassociableFMA(Root, AddOpIdx, MulOpIdx, false)) + if (!IsReassociableFMA(Root, AddOpIdx, false)) return false; assert((AddOpIdx >= 0) && "add operand index not right!"); Register RegB = Root.getOperand(AddOpIdx).getReg(); - MachineInstr *Prev = MRI->getUniqueVRegDef(RegB); + MachineInstr *Prev = MRI.getUniqueVRegDef(RegB); // Prev must be a valid FMA like instruction. AddOpIdx = -1; - if (!IsReassociableFMA(*Prev, AddOpIdx, MulOpIdx, false)) + if (!IsReassociableFMA(*Prev, AddOpIdx, false)) return false; assert((AddOpIdx >= 0) && "add operand index not right!"); Register RegA = Prev->getOperand(AddOpIdx).getReg(); - MachineInstr *Leaf = MRI->getUniqueVRegDef(RegA); + MachineInstr *Leaf = MRI.getUniqueVRegDef(RegA); AddOpIdx = -1; - if (IsReassociableFMA(*Leaf, AddOpIdx, MulOpIdx, true)) { + if (IsReassociableFMA(*Leaf, AddOpIdx, true)) { Patterns.push_back(MachineCombinerPattern::REASSOC_XMM_AMM_BMM); - LLVM_DEBUG(dbgs() << "add pattern REASSOC_XMM_AMM_BMM\n"); return true; } - if (IsReassociableAddOrSub(*Leaf, InfoArrayIdxFAddInst)) { + if (IsReassociableAdd(*Leaf)) { Patterns.push_back(MachineCombinerPattern::REASSOC_XY_AMM_BMM); - LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_AMM_BMM\n"); return true; } return false; } -void PPCInstrInfo::finalizeInsInstrs( - MachineInstr &Root, MachineCombinerPattern &P, - SmallVectorImpl<MachineInstr *> &InsInstrs) const { - assert(!InsInstrs.empty() && "Instructions set to be inserted is empty!"); - - MachineFunction *MF = Root.getMF(); - MachineRegisterInfo *MRI = &MF->getRegInfo(); - const TargetRegisterInfo *TRI = &getRegisterInfo(); - MachineConstantPool *MCP = MF->getConstantPool(); - - int16_t Idx = getFMAOpIdxInfo(Root.getOpcode()); - if (Idx < 0) - return; - - uint16_t FirstMulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx]; - - // For now we only need to fix up placeholder for register pressure reduce - // patterns. - Register ConstReg = 0; - switch (P) { - case MachineCombinerPattern::REASSOC_XY_BCA: - ConstReg = - TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), MRI); - break; - case MachineCombinerPattern::REASSOC_XY_BAC: - ConstReg = - TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx + 1).getReg(), MRI); - break; - default: - // Not register pressure reduce patterns. - return; - } - - MachineInstr *ConstDefInstr = MRI->getVRegDef(ConstReg); - // Get const value from const pool. - const Constant *C = getConstantFromConstantPool(ConstDefInstr); - assert(isa<llvm::ConstantFP>(C) && "not a valid constant!"); - - // Get negative fp const. - APFloat F1((dyn_cast<ConstantFP>(C))->getValueAPF()); - F1.changeSign(); - Constant *NegC = ConstantFP::get(dyn_cast<ConstantFP>(C)->getContext(), F1); - Align Alignment = MF->getDataLayout().getPrefTypeAlign(C->getType()); - - // Put negative fp const into constant pool. - unsigned ConstPoolIdx = MCP->getConstantPoolIndex(NegC, Alignment); - - MachineOperand *Placeholder = nullptr; - // Record the placeholder PPC::ZERO8 we add in reassociateFMA. - for (auto *Inst : InsInstrs) { - for (MachineOperand &Operand : Inst->explicit_operands()) { - assert(Operand.isReg() && "Invalid instruction in InsInstrs!"); - if (Operand.getReg() == PPC::ZERO8) { - Placeholder = &Operand; - break; - } - } - } - - assert(Placeholder && "Placeholder does not exist!"); - - // Generate instructions to load the const fp from constant pool. - // We only support PPC64 and medium code model. - Register LoadNewConst = - generateLoadForNewConst(ConstPoolIdx, &Root, C->getType(), InsInstrs); - - // Fill the placeholder with the new load from constant pool. - Placeholder->setReg(LoadNewConst); -} - -bool PPCInstrInfo::shouldReduceRegisterPressure( - MachineBasicBlock *MBB, RegisterClassInfo *RegClassInfo) const { - - if (!EnableFMARegPressureReduction) - return false; - - // Currently, we only enable register pressure reducing in machine combiner - // for: 1: PPC64; 2: Code Model is Medium; 3: Power9 which also has vector - // support. - // - // So we need following instructions to access a TOC entry: - // - // %6:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, %const.0 - // %7:vssrc = DFLOADf32 target-flags(ppc-toc-lo) %const.0, - // killed %6:g8rc_and_g8rc_nox0, implicit $x2 :: (load 4 from constant-pool) - // - // FIXME: add more supported targets, like Small and Large code model, PPC32, - // AIX. - if (!(Subtarget.isPPC64() && Subtarget.hasP9Vector() && - Subtarget.getTargetMachine().getCodeModel() == CodeModel::Medium)) - return false; - - const TargetRegisterInfo *TRI = &getRegisterInfo(); - MachineFunction *MF = MBB->getParent(); - MachineRegisterInfo *MRI = &MF->getRegInfo(); - - auto GetMBBPressure = [&](MachineBasicBlock *MBB) -> std::vector<unsigned> & { - RegionPressure Pressure; - RegPressureTracker RPTracker(Pressure); - - // Initialize the register pressure tracker. - RPTracker.init(MBB->getParent(), RegClassInfo, nullptr, MBB, MBB->end(), - /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true); - - for (MachineBasicBlock::iterator MII = MBB->instr_end(), - MIE = MBB->instr_begin(); - MII != MIE; --MII) { - MachineInstr &MI = *std::prev(MII); - if (MI.isDebugValue() || MI.isDebugLabel()) - continue; - RegisterOperands RegOpers; - RegOpers.collect(MI, *TRI, *MRI, false, false); - RPTracker.recedeSkipDebugValues(); - assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!"); - RPTracker.recede(RegOpers); - } - - // Close the RPTracker to finalize live ins. - RPTracker.closeRegion(); - - return RPTracker.getPressure().MaxSetPressure; - }; - - // For now we only care about float and double type fma. - unsigned VSSRCLimit = TRI->getRegPressureSetLimit( - *MBB->getParent(), PPC::RegisterPressureSets::VSSRC); - - // Only reduce register pressure when pressure is high. - return GetMBBPressure(MBB)[PPC::RegisterPressureSets::VSSRC] > - (float)VSSRCLimit * FMARPFactor; -} - -bool PPCInstrInfo::isLoadFromConstantPool(MachineInstr *I) const { - // I has only one memory operand which is load from constant pool. - if (!I->hasOneMemOperand()) - return false; - - MachineMemOperand *Op = I->memoperands()[0]; - return Op->isLoad() && Op->getPseudoValue() && - Op->getPseudoValue()->kind() == PseudoSourceValue::ConstantPool; -} - -Register PPCInstrInfo::generateLoadForNewConst( - unsigned Idx, MachineInstr *MI, Type *Ty, - SmallVectorImpl<MachineInstr *> &InsInstrs) const { - // Now we only support PPC64, Medium code model and P9 with vector. - // We have immutable pattern to access const pool. See function - // shouldReduceRegisterPressure. - assert((Subtarget.isPPC64() && Subtarget.hasP9Vector() && - Subtarget.getTargetMachine().getCodeModel() == CodeModel::Medium) && - "Target not supported!\n"); - - MachineFunction *MF = MI->getMF(); - MachineRegisterInfo *MRI = &MF->getRegInfo(); - - // Generate ADDIStocHA8 - Register VReg1 = MRI->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass); - MachineInstrBuilder TOCOffset = - BuildMI(*MF, MI->getDebugLoc(), get(PPC::ADDIStocHA8), VReg1) - .addReg(PPC::X2) - .addConstantPoolIndex(Idx); - - assert((Ty->isFloatTy() || Ty->isDoubleTy()) && - "Only float and double are supported!"); - - unsigned LoadOpcode; - // Should be float type or double type. - if (Ty->isFloatTy()) - LoadOpcode = PPC::DFLOADf32; - else - LoadOpcode = PPC::DFLOADf64; - - const TargetRegisterClass *RC = MRI->getRegClass(MI->getOperand(0).getReg()); - Register VReg2 = MRI->createVirtualRegister(RC); - MachineMemOperand *MMO = MF->getMachineMemOperand( - MachinePointerInfo::getConstantPool(*MF), MachineMemOperand::MOLoad, - Ty->getScalarSizeInBits() / 8, MF->getDataLayout().getPrefTypeAlign(Ty)); - - // Generate Load from constant pool. - MachineInstrBuilder Load = - BuildMI(*MF, MI->getDebugLoc(), get(LoadOpcode), VReg2) - .addConstantPoolIndex(Idx) - .addReg(VReg1, getKillRegState(true)) - .addMemOperand(MMO); - - Load->getOperand(1).setTargetFlags(PPCII::MO_TOC_LO); - - // Insert the toc load instructions into InsInstrs. - InsInstrs.insert(InsInstrs.begin(), Load); - InsInstrs.insert(InsInstrs.begin(), TOCOffset); - return VReg2; -} - -// This function returns the const value in constant pool if the \p I is a load -// from constant pool. -const Constant * -PPCInstrInfo::getConstantFromConstantPool(MachineInstr *I) const { - MachineFunction *MF = I->getMF(); - MachineRegisterInfo *MRI = &MF->getRegInfo(); - MachineConstantPool *MCP = MF->getConstantPool(); - assert(I->mayLoad() && "Should be a load instruction.\n"); - for (auto MO : I->uses()) { - if (!MO.isReg()) - continue; - Register Reg = MO.getReg(); - if (Reg == 0 || !Register::isVirtualRegister(Reg)) - continue; - // Find the toc address. - MachineInstr *DefMI = MRI->getVRegDef(Reg); - for (auto MO2 : DefMI->uses()) - if (MO2.isCPI()) - return (MCP->getConstants())[MO2.getIndex()].Val.ConstVal; - } - return nullptr; -} - bool PPCInstrInfo::getMachineCombinerPatterns( MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns, bool DoRegPressureReduce) const { @@ -748,7 +428,7 @@ bool PPCInstrInfo::getMachineCombinerPatterns( if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOpt::Aggressive) return false; - if (getFMAPatterns(Root, Patterns, DoRegPressureReduce)) + if (getFMAPatterns(Root, Patterns)) return true; return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns, @@ -763,8 +443,6 @@ void PPCInstrInfo::genAlternativeCodeSequence( switch (Pattern) { case MachineCombinerPattern::REASSOC_XY_AMM_BMM: case MachineCombinerPattern::REASSOC_XMM_AMM_BMM: - case MachineCombinerPattern::REASSOC_XY_BCA: - case MachineCombinerPattern::REASSOC_XY_BAC: reassociateFMA(Root, Pattern, InsInstrs, DelInstrs, InstrIdxForVirtReg); break; default: @@ -775,6 +453,8 @@ void PPCInstrInfo::genAlternativeCodeSequence( } } +// Currently, only handle two patterns REASSOC_XY_AMM_BMM and +// REASSOC_XMM_AMM_BMM. See comments for getFMAPatterns. void PPCInstrInfo::reassociateFMA( MachineInstr &Root, MachineCombinerPattern Pattern, SmallVectorImpl<MachineInstr *> &InsInstrs, @@ -782,7 +462,6 @@ void PPCInstrInfo::reassociateFMA( DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const { MachineFunction *MF = Root.getMF(); MachineRegisterInfo &MRI = MF->getRegInfo(); - const TargetRegisterInfo *TRI = &getRegisterInfo(); MachineOperand &OpC = Root.getOperand(0); Register RegC = OpC.getReg(); const TargetRegisterClass *RC = MRI.getRegClass(RegC); @@ -792,42 +471,13 @@ void PPCInstrInfo::reassociateFMA( int16_t Idx = getFMAOpIdxInfo(FmaOp); assert(Idx >= 0 && "Root must be a FMA instruction"); - bool IsILPReassociate = - (Pattern == MachineCombinerPattern::REASSOC_XY_AMM_BMM) || - (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM); - uint16_t AddOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxAddOpIdx]; uint16_t FirstMulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx]; - - MachineInstr *Prev = nullptr; - MachineInstr *Leaf = nullptr; - switch (Pattern) { - default: - llvm_unreachable("not recognized pattern!"); - case MachineCombinerPattern::REASSOC_XY_AMM_BMM: - case MachineCombinerPattern::REASSOC_XMM_AMM_BMM: - Prev = MRI.getUniqueVRegDef(Root.getOperand(AddOpIdx).getReg()); - Leaf = MRI.getUniqueVRegDef(Prev->getOperand(AddOpIdx).getReg()); - break; - case MachineCombinerPattern::REASSOC_XY_BAC: { - Register MULReg = - TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), &MRI); - Leaf = MRI.getVRegDef(MULReg); - break; - } - case MachineCombinerPattern::REASSOC_XY_BCA: { - Register MULReg = TRI->lookThruCopyLike( - Root.getOperand(FirstMulOpIdx + 1).getReg(), &MRI); - Leaf = MRI.getVRegDef(MULReg); - break; - } - } - - uint16_t IntersectedFlags = 0; - if (IsILPReassociate) - IntersectedFlags = Root.getFlags() & Prev->getFlags() & Leaf->getFlags(); - else - IntersectedFlags = Root.getFlags() & Leaf->getFlags(); + MachineInstr *Prev = MRI.getUniqueVRegDef(Root.getOperand(AddOpIdx).getReg()); + MachineInstr *Leaf = + MRI.getUniqueVRegDef(Prev->getOperand(AddOpIdx).getReg()); + uint16_t IntersectedFlags = + Root.getFlags() & Prev->getFlags() & Leaf->getFlags(); auto GetOperandInfo = [&](const MachineOperand &Operand, Register &Reg, bool &KillFlag) { @@ -837,51 +487,36 @@ void PPCInstrInfo::reassociateFMA( }; auto GetFMAInstrInfo = [&](const MachineInstr &Instr, Register &MulOp1, - Register &MulOp2, Register &AddOp, - bool &MulOp1KillFlag, bool &MulOp2KillFlag, - bool &AddOpKillFlag) { + Register &MulOp2, bool &MulOp1KillFlag, + bool &MulOp2KillFlag) { GetOperandInfo(Instr.getOperand(FirstMulOpIdx), MulOp1, MulOp1KillFlag); GetOperandInfo(Instr.getOperand(FirstMulOpIdx + 1), MulOp2, MulOp2KillFlag); - GetOperandInfo(Instr.getOperand(AddOpIdx), AddOp, AddOpKillFlag); }; - Register RegM11, RegM12, RegX, RegY, RegM21, RegM22, RegM31, RegM32, RegA11, - RegA21, RegB; + Register RegM11, RegM12, RegX, RegY, RegM21, RegM22, RegM31, RegM32; bool KillX = false, KillY = false, KillM11 = false, KillM12 = false, - KillM21 = false, KillM22 = false, KillM31 = false, KillM32 = false, - KillA11 = false, KillA21 = false, KillB = false; + KillM21 = false, KillM22 = false, KillM31 = false, KillM32 = false; - GetFMAInstrInfo(Root, RegM31, RegM32, RegB, KillM31, KillM32, KillB); - - if (IsILPReassociate) - GetFMAInstrInfo(*Prev, RegM21, RegM22, RegA21, KillM21, KillM22, KillA21); + GetFMAInstrInfo(Root, RegM31, RegM32, KillM31, KillM32); + GetFMAInstrInfo(*Prev, RegM21, RegM22, KillM21, KillM22); if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) { - GetFMAInstrInfo(*Leaf, RegM11, RegM12, RegA11, KillM11, KillM12, KillA11); + GetFMAInstrInfo(*Leaf, RegM11, RegM12, KillM11, KillM12); GetOperandInfo(Leaf->getOperand(AddOpIdx), RegX, KillX); } else if (Pattern == MachineCombinerPattern::REASSOC_XY_AMM_BMM) { GetOperandInfo(Leaf->getOperand(1), RegX, KillX); GetOperandInfo(Leaf->getOperand(2), RegY, KillY); - } else { - // Get FSUB instruction info. - GetOperandInfo(Leaf->getOperand(1), RegX, KillX); - GetOperandInfo(Leaf->getOperand(2), RegY, KillY); } // Create new virtual registers for the new results instead of // recycling legacy ones because the MachineCombiner's computation of the // critical path requires a new register definition rather than an existing // one. - // For register pressure reassociation, we only need create one virtual - // register for the new fma. Register NewVRA = MRI.createVirtualRegister(RC); InstrIdxForVirtReg.insert(std::make_pair(NewVRA, 0)); - Register NewVRB = 0; - if (IsILPReassociate) { - NewVRB = MRI.createVirtualRegister(RC); - InstrIdxForVirtReg.insert(std::make_pair(NewVRB, 1)); - } + Register NewVRB = MRI.createVirtualRegister(RC); + InstrIdxForVirtReg.insert(std::make_pair(NewVRB, 1)); Register NewVRD = 0; if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) { @@ -900,11 +535,7 @@ void PPCInstrInfo::reassociateFMA( MI->getOperand(FirstMulOpIdx + 1).setIsKill(KillRegMul2); }; - MachineInstrBuilder NewARegPressure, NewCRegPressure; - switch (Pattern) { - default: - llvm_unreachable("not recognized pattern!"); - case MachineCombinerPattern::REASSOC_XY_AMM_BMM: { + if (Pattern == MachineCombinerPattern::REASSOC_XY_AMM_BMM) { // Create new instructions for insertion. MachineInstrBuilder MINewB = BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB) @@ -937,9 +568,7 @@ void PPCInstrInfo::reassociateFMA( InsInstrs.push_back(MINewA); InsInstrs.push_back(MINewB); InsInstrs.push_back(MINewC); - break; - } - case MachineCombinerPattern::REASSOC_XMM_AMM_BMM: { + } else if (Pattern == MachineCombinerPattern::REASSOC_XMM_AMM_BMM) { assert(NewVRD && "new FMA register not created!"); // Create new instructions for insertion. MachineInstrBuilder MINewA = @@ -981,47 +610,6 @@ void PPCInstrInfo::reassociateFMA( InsInstrs.push_back(MINewB); InsInstrs.push_back(MINewD); InsInstrs.push_back(MINewC); - break; - } - case MachineCombinerPattern::REASSOC_XY_BAC: - case MachineCombinerPattern::REASSOC_XY_BCA: { - Register VarReg; - bool KillVarReg = false; - if (Pattern == MachineCombinerPattern::REASSOC_XY_BCA) { - VarReg = RegM31; - KillVarReg = KillM31; - } else { - VarReg = RegM32; - KillVarReg = KillM32; - } - // We don't want to get negative const from memory pool too early, as the - // created entry will not be deleted even if it has no users. Since all - // operand of Leaf and Root are virtual register, we use zero register - // here as a placeholder. When the InsInstrs is selected in - // MachineCombiner, we call finalizeInsInstrs to replace the zero register - // with a virtual register which is a load from constant pool. - NewARegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA) - .addReg(RegB, getKillRegState(RegB)) - .addReg(RegY, getKillRegState(KillY)) - .addReg(PPC::ZERO8); - NewCRegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), RegC) - .addReg(NewVRA, getKillRegState(true)) - .addReg(RegX, getKillRegState(KillX)) - .addReg(VarReg, getKillRegState(KillVarReg)); - // For now, we only support xsmaddadp/xsmaddasp, their add operand are - // both at index 1, no need to adjust. - // FIXME: when add more fma instructions support, like fma/fmas, adjust - // the operand index here. - break; - } - } - - if (!IsILPReassociate) { - setSpecialOperandAttr(*NewARegPressure, IntersectedFlags); - setSpecialOperandAttr(*NewCRegPressure, IntersectedFlags); - - InsInstrs.push_back(NewARegPressure); - InsInstrs.push_back(NewCRegPressure); } assert(!InsInstrs.empty() && @@ -1029,8 +617,7 @@ void PPCInstrInfo::reassociateFMA( // Record old instructions for deletion. DelInstrs.push_back(Leaf); - if (IsILPReassociate) - DelInstrs.push_back(Prev); + DelInstrs.push_back(Prev); DelInstrs.push_back(&Root); } diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index c6ef1742b722..962456e784fa 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -252,11 +252,6 @@ class PPCInstrInfo : public PPCGenInstrInfo { SmallVectorImpl<MachineInstr *> &InsInstrs, SmallVectorImpl<MachineInstr *> &DelInstrs, DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const; - bool isLoadFromConstantPool(MachineInstr *I) const; - Register - generateLoadForNewConst(unsigned Idx, MachineInstr *MI, Type *Ty, - SmallVectorImpl<MachineInstr *> &InsInstrs) const; - const Constant *getConstantFromConstantPool(MachineInstr *I) const; virtual void anchor(); protected: @@ -348,8 +343,7 @@ class PPCInstrInfo : public PPCGenInstrInfo { /// chain ending in \p Root. All potential patterns are output in the \p /// P array. bool getFMAPatterns(MachineInstr &Root, - SmallVectorImpl<MachineCombinerPattern> &P, - bool DoRegPressureReduce) const; + SmallVectorImpl<MachineCombinerPattern> &P) const; /// Return true when there is potentially a faster code sequence /// for an instruction chain ending in <Root>. All potential patterns are @@ -358,20 +352,6 @@ class PPCInstrInfo : public PPCGenInstrInfo { SmallVectorImpl<MachineCombinerPattern> &P, bool DoRegPressureReduce) const override; - /// On PowerPC, we leverage machine combiner pass to reduce register pressure - /// when the register pressure is high for one BB. - /// Return true if register pressure for \p MBB is high and ABI is supported - /// to reduce register pressure. Otherwise return false. - bool - shouldReduceRegisterPressure(MachineBasicBlock *MBB, - RegisterClassInfo *RegClassInfo) const override; - - /// Fixup the placeholders we put in genAlternativeCodeSequence() for - /// MachineCombiner. - void - finalizeInsInstrs(MachineInstr &Root, MachineCombinerPattern &P, - SmallVectorImpl<MachineInstr *> &InsInstrs) const override; - bool isAssociativeAndCommutative(const MachineInstr &Inst) const override; /// On PowerPC, we try to reassociate FMA chain which will increase diff --git a/llvm/test/CodeGen/PowerPC/register-pressure-reduction.ll b/llvm/test/CodeGen/PowerPC/register-pressure-reduction.ll deleted file mode 100644 index 281734e456e8..000000000000 --- a/llvm/test/CodeGen/PowerPC/register-pressure-reduction.ll +++ /dev/null @@ -1,135 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -O3 < %s \ -; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 | FileCheck %s -; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -O3 < %s \ -; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s --check-prefix=CHECK-P8 -; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-fma-rp-factor=0.0 -O3 < %s \ -; RUN: -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 | FileCheck %s --check-prefix=CHECK-FMA - -@global_val = external global float, align 4 - -define float @foo_float(float %0, float %1, float %2, float %3) { -; CHECK-LABEL: foo_float: -; CHECK: # %bb.0: -; CHECK-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-NEXT: xsmulsp f1, f2, f1 -; CHECK-NEXT: xssubsp f0, f3, f4 -; CHECK-NEXT: lfs f2, .LCPI0_0@toc@l(r3) -; CHECK-NEXT: xsmaddasp f1, f0, f2 -; CHECK-NEXT: blr -; -; CHECK-P8-LABEL: foo_float: -; CHECK-P8: # %bb.0: -; CHECK-P8-NEXT: xsmulsp f1, f2, f1 -; CHECK-P8-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-P8-NEXT: xssubsp f0, f3, f4 -; CHECK-P8-NEXT: lfs f2, .LCPI0_0@toc@l(r3) -; CHECK-P8-NEXT: xsmaddasp f1, f0, f2 -; CHECK-P8-NEXT: blr -; -; CHECK-FMA-LABEL: foo_float: -; CHECK-FMA: # %bb.0: -; CHECK-FMA-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-FMA-NEXT: xsmulsp f1, f2, f1 -; CHECK-FMA-NEXT: lfs f0, .LCPI0_0@toc@l(r3) -; CHECK-FMA-NEXT: addis r3, r2, .LCPI0_1@toc@ha -; CHECK-FMA-NEXT: lfs f2, .LCPI0_1@toc@l(r3) -; CHECK-FMA-NEXT: xsmaddasp f1, f4, f2 -; CHECK-FMA-NEXT: xsmaddasp f1, f3, f0 -; CHECK-FMA-NEXT: blr - %5 = fmul reassoc nsz float %1, %0 - %6 = fsub reassoc nsz float %2, %3 - %7 = fmul reassoc nsz float %6, 0x3DB2533FE0000000 - %8 = fadd reassoc nsz float %7, %5 - ret float %8 -} - -define double @foo_double(double %0, double %1, double %2, double %3) { -; CHECK-LABEL: foo_double: -; CHECK: # %bb.0: -; CHECK-NEXT: xsmuldp f1, f2, f1 -; CHECK-NEXT: xssubdp f0, f3, f4 -; CHECK-NEXT: addis r3, r2, .LCPI1_0@toc@ha -; CHECK-NEXT: lfd f2, .LCPI1_0@toc@l(r3) -; CHECK-NEXT: xsmaddadp f1, f0, f2 -; CHECK-NEXT: blr -; -; CHECK-P8-LABEL: foo_double: -; CHECK-P8: # %bb.0: -; CHECK-P8-NEXT: xsmuldp f1, f2, f1 -; CHECK-P8-NEXT: addis r3, r2, .LCPI1_0@toc@ha -; CHECK-P8-NEXT: xssubdp f0, f3, f4 -; CHECK-P8-NEXT: lfd f2, .LCPI1_0@toc@l(r3) -; CHECK-P8-NEXT: xsmaddadp f1, f0, f2 -; CHECK-P8-NEXT: blr -; -; CHECK-FMA-LABEL: foo_double: -; CHECK-FMA: # %bb.0: -; CHECK-FMA-NEXT: addis r3, r2, .LCPI1_0@toc@ha -; CHECK-FMA-NEXT: xsmuldp f1, f2, f1 -; CHECK-FMA-NEXT: lfd f0, .LCPI1_0@toc@l(r3) -; CHECK-FMA-NEXT: addis r3, r2, .LCPI1_1@toc@ha -; CHECK-FMA-NEXT: lfd f2, .LCPI1_1@toc@l(r3) -; CHECK-FMA-NEXT: xsmaddadp f1, f4, f2 -; CHECK-FMA-NEXT: xsmaddadp f1, f3, f0 -; CHECK-FMA-NEXT: blr - %5 = fmul reassoc nsz double %1, %0 - %6 = fsub reassoc nsz double %2, %3 - %7 = fmul reassoc nsz double %6, 0x3DB2533FE68CADDE - %8 = fadd reassoc nsz double %7, %5 - ret double %8 -} - -define float @foo_float_reuse_const(float %0, float %1, float %2, float %3) { -; CHECK-LABEL: foo_float_reuse_const: -; CHECK: # %bb.0: -; CHECK-NEXT: addis r3, r2, .LCPI2_0@toc@ha -; CHECK-NEXT: xsmulsp f1, f2, f1 -; CHECK-NEXT: xssubsp f0, f3, f4 -; CHECK-NEXT: lfs f3, .LCPI2_0@toc@l(r3) -; CHECK-NEXT: addis r3, r2, .LCPI2_1@toc@ha -; CHECK-NEXT: xsmaddasp f1, f0, f3 -; CHECK-NEXT: lfs f0, .LCPI2_1@toc@l(r3) -; CHECK-NEXT: addis r3, r2, .LC0@toc@ha -; CHECK-NEXT: ld r3, .LC0@toc@l(r3) -; CHECK-NEXT: xsmulsp f0, f2, f0 -; CHECK-NEXT: stfs f0, 0(r3) -; CHECK-NEXT: blr -; -; CHECK-P8-LABEL: foo_float_reuse_const: -; CHECK-P8: # %bb.0: -; CHECK-P8-NEXT: xsmulsp f1, f2, f1 -; CHECK-P8-NEXT: addis r3, r2, .LCPI2_0@toc@ha -; CHECK-P8-NEXT: addis r4, r2, .LCPI2_1@toc@ha -; CHECK-P8-NEXT: xssubsp f0, f3, f4 -; CHECK-P8-NEXT: lfs f3, .LCPI2_0@toc@l(r3) -; CHECK-P8-NEXT: lfs f4, .LCPI2_1@toc@l(r4) -; CHECK-P8-NEXT: addis r3, r2, .LC0@toc@ha -; CHECK-P8-NEXT: ld r3, .LC0@toc@l(r3) -; CHECK-P8-NEXT: xsmaddasp f1, f0, f3 -; CHECK-P8-NEXT: xsmulsp f0, f2, f4 -; CHECK-P8-NEXT: stfsx f0, 0, r3 -; CHECK-P8-NEXT: blr -; -; CHECK-FMA-LABEL: foo_float_reuse_const: -; CHECK-FMA: # %bb.0: -; CHECK-FMA-NEXT: addis r3, r2, .LCPI2_0@toc@ha -; CHECK-FMA-NEXT: xsmulsp f1, f2, f1 -; CHECK-FMA-NEXT: lfs f0, .LCPI2_0@toc@l(r3) -; CHECK-FMA-NEXT: addis r3, r2, .LCPI2_1@toc@ha -; CHECK-FMA-NEXT: lfs f5, .LCPI2_1@toc@l(r3) -; CHECK-FMA-NEXT: addis r3, r2, .LC0@toc@ha -; CHECK-FMA-NEXT: ld r3, .LC0@toc@l(r3) -; CHECK-FMA-NEXT: xsmaddasp f1, f4, f5 -; CHECK-FMA-NEXT: xsmaddasp f1, f3, f0 -; CHECK-FMA-NEXT: xsmulsp f0, f2, f5 -; CHECK-FMA-NEXT: stfs f0, 0(r3) -; CHECK-FMA-NEXT: blr - %5 = fmul reassoc nsz float %1, %0 - %6 = fsub reassoc nsz float %2, %3 - %7 = fmul reassoc nsz float %6, 0x3DB2533FE0000000 - %8 = fadd reassoc nsz float %7, %5 - %9 = fmul reassoc nsz float %1, 0xBDB2533FE0000000 - store float %9, float* @global_val, align 4 - ret float %8 -} _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits