Previously, a phi node's sources were implicitly ordered by the inbound edge order. This patch changes that so that a phi node instead has a basic block stored for each source in a deque.
There are no regressions in Unigine Heaven, Valley and Superposition (with a SERIALIZE hack to fix unrelated errors in all three). I also found that the code generated is the same as without this patch for the shaders in shader-db even with the sources of phi nodes reversed after convertToSSA(). --- src/gallium/drivers/nouveau/codegen/nv50_ir.cpp | 41 +++++++++++++ src/gallium/drivers/nouveau/codegen/nv50_ir.h | 19 ++++++ .../drivers/nouveau/codegen/nv50_ir_inlines.h | 14 +++++ .../drivers/nouveau/codegen/nv50_ir_print.cpp | 2 + src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 71 ++++++---------------- .../drivers/nouveau/codegen/nv50_ir_ssa.cpp | 11 ++-- src/gallium/drivers/nouveau/codegen/nv50_ir_util.h | 2 + 7 files changed, 104 insertions(+), 56 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp index 49425b98b9..4a2835c526 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp @@ -1104,6 +1104,43 @@ FlowInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const return flow; } +PhiInstruction::PhiInstruction(Function *fn, DataType ty) + : Instruction(fn, OP_PHI, ty) {} + +PhiInstruction * +PhiInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const +{ + assert(op == OP_PHI); + + PhiInstruction *phi = (i ? static_cast<PhiInstruction *>(i) : + new_PhiInstruction(pol.context(), dType)); + + Instruction::clone(pol, phi); + phi->basicBlocks.resize(basicBlocks.size()); + for (size_t i = 0; i < basicBlocks.size(); i++) + phi->basicBlocks[i] = basicBlocks[i]; + + return phi; +} + +void +PhiInstruction::setSrcBB(int s, Value *val, BasicBlock *bb) +{ + this->setSrc(s, val); + if (s >= (int)basicBlocks.size()) + basicBlocks.resize(s + 1); + basicBlocks[s] = bb; +} + +void +PhiInstruction::setSrcBB(int s, const ValueRef& ref, BasicBlock *bb) +{ + this->setSrc(s, ref); + if (s >= (int)basicBlocks.size()) + basicBlocks.resize(s + 1); + basicBlocks[s] = bb; +} + Program::Program(Type type, Target *arch) : progType(type), target(arch), @@ -1111,6 +1148,7 @@ Program::Program(Type type, Target *arch) mem_CmpInstruction(sizeof(CmpInstruction), 4), mem_TexInstruction(sizeof(TexInstruction), 4), mem_FlowInstruction(sizeof(FlowInstruction), 4), + mem_PhiInstruction(sizeof(PhiInstruction), 4), mem_LValue(sizeof(LValue), 8), mem_Symbol(sizeof(Symbol), 7), mem_ImmediateValue(sizeof(ImmediateValue), 7) @@ -1152,6 +1190,9 @@ void Program::releaseInstruction(Instruction *insn) else if (insn->asFlow()) mem_FlowInstruction.release(insn); + else + if (insn->asPhi()) + mem_PhiInstruction.release(insn); else mem_Instruction.release(insn); } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h index f4f3c70888..e9fe7422b5 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h @@ -489,6 +489,7 @@ class Instruction; class CmpInstruction; class TexInstruction; class FlowInstruction; +class PhiInstruction; class Value; class LValue; @@ -853,9 +854,11 @@ public: inline CmpInstruction *asCmp(); inline TexInstruction *asTex(); inline FlowInstruction *asFlow(); + inline PhiInstruction *asPhi(); inline const TexInstruction *asTex() const; inline const CmpInstruction *asCmp() const; inline const FlowInstruction *asFlow() const; + inline const PhiInstruction *asPhi() const; public: Instruction *next; @@ -1075,6 +1078,21 @@ public: } target; }; +class PhiInstruction : public Instruction +{ +public: + PhiInstruction(Function *, DataType); + + virtual PhiInstruction *clone(ClonePolicy<Function>&, + Instruction * = NULL) const; + + void setSrcBB(int s, Value *, BasicBlock *); + void setSrcBB(int s, const ValueRef&, BasicBlock *); + +public: + std::deque<BasicBlock *> basicBlocks; +}; + class BasicBlock { public: @@ -1287,6 +1305,7 @@ public: MemoryPool mem_CmpInstruction; MemoryPool mem_TexInstruction; MemoryPool mem_FlowInstruction; + MemoryPool mem_PhiInstruction; MemoryPool mem_LValue; MemoryPool mem_Symbol; MemoryPool mem_ImmediateValue; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h index 4cb53ab42e..829578a2ac 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h @@ -323,6 +323,20 @@ const TexInstruction *Instruction::asTex() const return NULL; } +PhiInstruction *Instruction::asPhi() +{ + if (op == OP_PHI) + return static_cast<PhiInstruction *>(this); + return NULL; +} + +const PhiInstruction *Instruction::asPhi() const +{ + if (op == OP_PHI) + return static_cast<const PhiInstruction *>(this); + return NULL; +} + static inline Instruction *cloneForward(Function *ctx, Instruction *obj) { DeepClonePolicy<Function> pol(ctx); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp index cbb21f5f72..efb8798dce 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp @@ -675,6 +675,8 @@ void Instruction::print() const getIndirect(s, 1)); else pos += getSrc(s)->print(&buf[pos], BUFSZ - pos, sType); + if (op == OP_PHI) + PRINT("%s(BB:%i)", colour[TXT_INSN], asPhi()->basicBlocks[s]->getId()); } if (exit) PRINT("%s exit", colour[TXT_INSN]); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp index b660fec75c..cecc23b22c 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp @@ -372,20 +372,13 @@ typedef unordered_map< std::pair<Instruction *, BasicBlock *>, Value *, PhiMapHash> PhiMap; // Critical edges need to be split up so that work can be inserted along -// specific edge transitions. Unfortunately manipulating incident edges into a -// BB invalidates all the PHI nodes since their sources are implicitly ordered -// by incident edge order. -// -// TODO: Make it so that that is not the case, and PHI nodes store pointers to -// the original BBs. +// specific edge transitions. void RegAlloc::PhiMovesPass::splitEdges(BasicBlock *bb) { BasicBlock *pb, *pn; - Instruction *phi; Graph::EdgeIterator ei; std::stack<BasicBlock *> stack; - int j = 0; for (ei = bb->cfg.incident(); !ei.end(); ei.next()) { pb = BasicBlock::get(ei.getNode()); @@ -394,22 +387,6 @@ RegAlloc::PhiMovesPass::splitEdges(BasicBlock *bb) stack.push(pb); } - // No critical edges were found, no need to perform any work. - if (stack.empty()) - return; - - // We're about to, potentially, reorder the inbound edges. This means that - // we need to hold on to the (phi, bb) -> src mapping, and fix up the phi - // nodes after the graph has been modified. - PhiMap phis; - - j = 0; - for (ei = bb->cfg.incident(); !ei.end(); ei.next(), j++) { - pb = BasicBlock::get(ei.getNode()); - for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) - phis.insert(std::make_pair(std::make_pair(phi, pb), phi->getSrc(j))); - } - while (!stack.empty()) { pb = stack.top(); pn = new BasicBlock(func); @@ -423,23 +400,12 @@ RegAlloc::PhiMovesPass::splitEdges(BasicBlock *bb) if (pb->getExit()->asFlow()->target.bb == bb) pb->getExit()->asFlow()->target.bb = pn; - for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) { - PhiMap::iterator it = phis.find(std::make_pair(phi, pb)); - assert(it != phis.end()); - phis.insert(std::make_pair(std::make_pair(phi, pn), it->second)); - phis.erase(it); - } - } - - // Now go through and fix up all of the phi node sources. - j = 0; - for (ei = bb->cfg.incident(); !ei.end(); ei.next(), j++) { - pb = BasicBlock::get(ei.getNode()); - for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) { - PhiMap::const_iterator it = phis.find(std::make_pair(phi, pb)); - assert(it != phis.end()); - - phi->setSrc(j, it->second); + PhiInstruction *phi = bb->getPhi() ? bb->getPhi()->asPhi() : NULL; + for (; phi; phi = phi->next->asPhi()) { + for (int s = 0; phi->srcExists(s); s++) { + if (phi->basicBlocks[s] == pb) + phi->basicBlocks[s] = pn; + } } } } @@ -455,28 +421,29 @@ RegAlloc::PhiMovesPass::splitEdges(BasicBlock *bb) bool RegAlloc::PhiMovesPass::visit(BasicBlock *bb) { - Instruction *phi, *mov; - splitEdges(bb); - // insert MOVs (phi->src(j) should stem from j-th in-BB) - int j = 0; + // ensure incoming basic blocks are terminated for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) { BasicBlock *pb = BasicBlock::get(ei.getNode()); if (!pb->isTerminated()) pb->insertTail(new_FlowInstruction(func, OP_BRA, bb)); + } - for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) { - LValue *tmp = new_LValue(func, phi->getDef(0)->asLValue()); - mov = new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size)); + // insert MOVs + PhiInstruction *phi = bb->getPhi() ? bb->getPhi()->asPhi() : NULL; + for (; phi; phi = phi->next->asPhi()) { + for (int i = 0; phi->srcExists(i); i++) { + BasicBlock *pb = phi->basicBlocks[i]; - mov->setSrc(0, phi->getSrc(j)); + LValue *tmp = new_LValue(func, phi->getDef(0)->asLValue()); + Instruction *mov = new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size)); + mov->setSrc(0, phi->getSrc(i)); mov->setDef(0, tmp); - phi->setSrc(j, tmp); - pb->insertBefore(pb->getExit(), mov); + + phi->setSrc(i, tmp); } - ++j; } return true; diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ssa.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ssa.cpp index 3d25ad928e..66586a283d 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ssa.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ssa.cpp @@ -371,7 +371,7 @@ Function::convertToSSA() DLList::Iterator dfIter = bb->getDF().iterator(); for (; !dfIter.end(); dfIter.next()) { - Instruction *phi; + PhiInstruction *phi; BasicBlock *dfBB = BasicBlock::get(dfIter); if (hasAlready[dfBB->getId()] >= iterCount) @@ -382,12 +382,12 @@ Function::convertToSSA() if (!dfBB->liveSet.test(lval->id)) continue; - phi = new_Instruction(this, OP_PHI, typeOfSize(lval->reg.size)); + phi = new_PhiInstruction(this, typeOfSize(lval->reg.size)); dfBB->insertTail(phi); phi->setDef(0, lval); - for (int s = 0; s < dfBB->cfg.incidentCount(); ++s) - phi->setSrc(s, lval); + for (Graph::EdgeIterator ei = dfBB->cfg.incident(); !ei.end(); ei.next()) + phi->setSrcBB(phi->srcCount(), lval, BasicBlock::get(ei.getNode())); if (work[dfBB->getId()] < iterCount) { work[dfBB->getId()] = iterCount; @@ -514,6 +514,9 @@ void RenamePass::search(BasicBlock *bb) assert(p < sb->cfg.incidentCount()); for (phi = sb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) { + // phi instructions are supposed to handle when this isn't true, but + // it's always true during conversion to SSA. + assert(phi->asPhi()->basicBlocks[p] == bb); lval = getStackTop(phi->getSrc(p)); if (!lval) lval = mkUndefined(phi->getSrc(p)); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_util.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_util.h index c619499046..732ad436ad 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_util.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_util.h @@ -64,6 +64,8 @@ NV50_IR_FUNC_ALLOC_OBJ_DEF(TexInstruction, f, args) #define new_FlowInstruction(f, args...) \ NV50_IR_FUNC_ALLOC_OBJ_DEF(FlowInstruction, f, args) +#define new_PhiInstruction(f, args...) \ + NV50_IR_FUNC_ALLOC_OBJ_DEF(PhiInstruction, f, args) #define new_LValue(f, args...) \ NV50_IR_FUNC_ALLOC_OBJ_DEF(LValue, f, args) -- 2.14.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev