I think this optimization is too special. Maybe we could analyze the hole in IR, optimize register allocate to approach the same optimize. It is more common.
> -----Original Message----- > From: Beignet [mailto:[email protected]] On Behalf Of > Ruiling Song > Sent: Friday, July 22, 2016 15:28 > To: [email protected] > Cc: Song, Ruiling <[email protected]> > Subject: [Beignet] [PATCH 1/2] GBE: Further optimize phiNode elimination. > > Take below example: > bb2: > mov x, x-copy > add x2, x, 1 > mul x3, x2, 2 > mov x-copy, x3 > > this is a self-loop block. In previous implementation, we have try our best to > coaleasce phi/phiCopy/phiCopySrc. But in this example, we can do more, we > can even coaleasce x2 to x, x-copy and x3. in real world, there may be many > x2's lies in the use/def chain. So we do some check to see whether we can > totally coaleasce the use/def chain lies between phi/phiCopySrc. We place > various careful check in this path. Some may be too strict, so we need further > refine to this logic. > > Signed-off-by: Ruiling Song <[email protected]> > --- > backend/src/ir/instruction.cpp | 18 ++++++ > backend/src/ir/instruction.hpp | 11 ++++ > backend/src/llvm/llvm_gen_backend.cpp | 101 > ++++++++++++++++++++++++++++++++-- > 3 files changed, 124 insertions(+), 6 deletions(-) > > diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp > index ed64580..a5bf89f 100644 > --- a/backend/src/ir/instruction.cpp > +++ b/backend/src/ir/instruction.cpp > @@ -2292,6 +2292,24 @@ END_FUNCTION(Instruction, Register) > if (new_ins) > *new_ins = insn; > } > + bool Instruction::isNativeInstruction() const { > + if (BinaryInstruction::isClassOf(*this)) { > + //const BinaryInstruction * const insn = cast<BinaryInstruction > *>(this); > + const BinaryInstruction &insn = cast<BinaryInstruction>(*this); > + if (insn.getType() == ir::TYPE_U32 || > + insn.getType() == ir::TYPE_S32 || > + insn.getType() == ir::TYPE_FLOAT) { > + if (opcode == OP_MUL || opcode == OP_ADD || opcode == OP_MOV) > + return true; > + } > + } > + > + if (TernaryInstruction::isClassOf(*this)) { > + if (opcode == OP_MAD) > + return true; > + } > + return false; > + } > > bool Instruction::hasSideEffect(void) const { > return opcode == OP_STORE || > diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp > index b2b0b49..f710b5e 100644 > --- a/backend/src/ir/instruction.hpp > +++ b/backend/src/ir/instruction.hpp > @@ -184,10 +184,21 @@ namespace ir { > RegisterData getDstData(uint32_t ID = 0u) const; > /*! Get the register of the given destination */ > RegisterData getSrcData(uint32_t ID = 0u) const; > + /*! whether the instruction directly maps to native instr. > + * this is used to determine whether we can do aggressive > + * register coaleascing for source and destination */ > + bool isNativeInstruction() const; > /*! Set a register in src srcID */ > void setSrc(uint32_t srcID, Register reg); > /*! Set a register in dst dstID */ > void setDst(uint32_t dstID, Register reg); > + bool use(Register reg) const { > + for (uint32_t i = 0; i < getSrcNum(); i++) { > + if (getSrc(i) == reg) > + return true; > + } > + return false; > + } > /*! Is there any side effect in the memory sub-system? */ > bool hasSideEffect(void) const; > /*! Get / set the parent basic block */ diff --git > a/backend/src/llvm/llvm_gen_backend.cpp > b/backend/src/llvm/llvm_gen_backend.cpp > index 41cb783..7d78163 100644 > --- a/backend/src/llvm/llvm_gen_backend.cpp > +++ b/backend/src/llvm/llvm_gen_backend.cpp > @@ -2272,6 +2272,64 @@ namespace gbe > }); > } > > + bool useRegisters(const ir::Instruction *inst, std::vector<ir::Register> > ®s) { > + if (regs.empty()) return false; > + for (auto & r : regs) { > + if (inst->use(r)) > + return true; > + } > + return false; > + } > + void findPossibleReplaceChain(ir::Liveness &liveness, ir::Function &fn, > + std::vector<ir::Register> &replaceChain, > + ir::Register phi, ir::Register phiCopy, > + const ir::BasicBlock *bb, > ir::BasicBlock::const_iterator > searchEnd) { > + ir::BasicBlock::const_iterator iter = bb->begin(); > + ir::Register targetReg = phi; > + const ir::Liveness::LiveOut &out = liveness.getLiveOut(bb); > + > + for ( ; iter != searchEnd; ++iter) { > + const ir::Instruction *inst = iter.node(); > + if (useRegisters(inst, replaceChain)) { > + replaceChain.clear(); > + // should stop > + break; > + } > + if (inst->use(targetReg)) { > + if (!inst->isNativeInstruction()) { > + replaceChain.clear(); > + // we need to stop further coaleascing > + // in fact we need careful consideration here. > + // I just break and clear the replaceChain now. > + break; > + } > + > + const ir::RegisterData oldData = fn.getRegisterData(targetReg); > + const ir::RegisterData newData = fn.getRegisterData(inst->getDst(0)); > + if(oldData.family != newData.family) { > + replaceChain.clear(); > + break; > + } > + // the value cannot be an liveout > + if (out.contains(targetReg)) { > + replaceChain.clear(); > + break; > + } > + > + replaceChain.push_back(targetReg); > + targetReg = inst->getDst(0); > + } > + } > + // check no use of the registers in replaceChain till the bb->end() > + if (!replaceChain.empty()) { > + for (; iter != bb->end(); ++iter) { > + if (useRegisters(iter.node(), replaceChain)) { > + replaceChain.clear(); > + } > + } > + } > + } > + > void GenWriter::optimizePhiCopy(ir::Liveness &liveness, ir::Function &fn, > map<ir::Register, ir::Register> &replaceMap, > map<ir::Register, ir::Register> &redundantPhiCopyMap) @@ -2292,7 > +2350,7 @@ namespace gbe > const ir::DefSet *phiCopyDef = dag->getRegDef(phiCopy); > const ir::UseSet *phiUse = dag->getRegUse(phi); > const DefSet *phiDef = dag->getRegDef(phi); > - bool isOpt = true; > + bool coalescePhiPhiCopy = true; > > // FIXME, I find under some situation, the phiDef maybe null, seems a > bug when building FunctionDAg. > // need fix it there. > @@ -2307,7 +2365,7 @@ namespace gbe > // phi & phiCopy are both alive at the endpoint of bb, > // thus can not be optimized. > if (out.contains(phi)) { > - isOpt = false; > + coalescePhiPhiCopy = false; > break; > } > > @@ -2333,7 +2391,17 @@ namespace gbe > // add x2, x, 1 > // mov x-copy, x2 > // obviously x2, x-copy and x2 can be mapped to same virtual > register > - > + // the situation may be more complex that this example, like > + // bb2: > + // mov x, x-copy > + // add x2, x, 1 > + // mul x3, x2, 2 > + // mov x-copy, x3 > + // that's why we add findPossibleReplaceChain() to solve such > case. > + // that is to replace all registers in the chain (x2, x3) by > x-copy. > + // note I did not merge the new 'ReplaceChain' with previous > phiCopy/phiCopySrc > + // coaleascing, the reason is 'ReplaceChain' is very hard to > match. But > + // phiCopy/phiCopySrc is eady to meet. The code here need further > refine. > ir::BasicBlock::const_iterator iter = > ir::BasicBlock::const_iterator(phiCopySrcDefInsn); > ir::BasicBlock::const_iterator iterE = bb->end(); > > @@ -2351,6 +2419,12 @@ namespace gbe > } > ++iter; > } > + > + // Go through the basic block, to find chain of [phi, ... > phiCopy] > + std::vector<ir::Register> replaceChain; > + findPossibleReplaceChain(liveness, fn, replaceChain, phi, > phiCopy, bb, > + > + ir::BasicBlock::const_iterator(phiCopyDefInsn)); > + > if (!phiPhiCopySrcInterfere) { > replaceSrc(const_cast<Instruction *>(phiCopyDefInsn), > phiCopySrc, > phiCopy); > > @@ -2364,6 +2438,21 @@ namespace gbe > replaceSrc(const_cast<Instruction *>(phiSrcUseInsn), > phiCopySrc, > phiCopy); > } > replaceMap.insert(std::make_pair(phiCopySrc, phiCopy)); > + > + if (!replaceChain.empty()) { > + for (auto &r : replaceChain) { > + const ir::DefSet *d = dag->getRegDef(r); > + const ir::UseSet *u = dag->getRegUse(r); > + replaceMap.insert(std::make_pair(r, phiCopy)); > + > + for (auto &dd : *d) { > + replaceDst(const_cast<Instruction > *>(dd->getInstruction()), r, > phiCopy); > + } > + for (auto &uu : *u) { > + replaceSrc(const_cast<Instruction > *>(uu->getInstruction()), r, > phiCopy); > + } > + } > + } > } > } > } else { > @@ -2396,7 +2485,7 @@ namespace gbe > if (&p == phiCopyDefInsn) break; > // we only care MOV here > if (p.getSrcNum() == 1 && p.getSrc(0) == phi) { > - isOpt = false; > + coalescePhiPhiCopy = false; > break; > } > } > @@ -2404,14 +2493,14 @@ namespace gbe > } > > // coalease phi and phiCopy > - if (isOpt) { > + if (coalescePhiPhiCopy) { > + replaceMap.insert(std::make_pair(phi, phiCopy)); > for (auto &x : *phiDef) { > replaceDst(const_cast<Instruction *>(x->getInstruction()), phi, > phiCopy); > } > for (auto &x : *phiUse) { > const Instruction *phiUseInsn = x->getInstruction(); > replaceSrc(const_cast<Instruction *>(phiUseInsn), phi, phiCopy); > - replaceMap.insert(std::make_pair(phi, phiCopy)); > } > } > } > -- > 2.4.1 > > _______________________________________________ > Beignet mailing list > [email protected] > https://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/beignet
