From: Luo Xionghu <[email protected]> lower LABEL, backward/forward BRANCH after instruction selection.
Signed-off-by: Luo Xionghu <[email protected]> --- backend/src/CMakeLists.txt | 1 + backend/src/backend/gen_context.cpp | 2 + backend/src/backend/gen_insn_selection.cpp | 170 +++----- backend/src/backend/gen_insn_selection.hpp | 17 +- backend/src/backend/gen_insn_selection.hxx | 1 + .../backend/gen_insn_selection_branch_lowering.cpp | 468 +++++++++++++++++++++ backend/src/backend/gen_insn_selection_passes.hpp | 30 ++ backend/src/sys/intrusive_list.hpp | 4 + 8 files changed, 581 insertions(+), 112 deletions(-) create mode 100644 backend/src/backend/gen_insn_selection_branch_lowering.cpp create mode 100644 backend/src/backend/gen_insn_selection_passes.hpp diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt index 6ff25e7..d1f3f32 100644 --- a/backend/src/CMakeLists.txt +++ b/backend/src/CMakeLists.txt @@ -104,6 +104,7 @@ set (GBE_SRC backend/gen_insn_selection.cpp backend/gen_insn_selection.hpp backend/gen_insn_selection_optimize.cpp + backend/gen_insn_selection_branch_lowering.cpp backend/gen_insn_scheduling.cpp backend/gen_insn_scheduling.hpp backend/gen_insn_selection_output.cpp diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index c8019e3..19550a3 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -27,6 +27,7 @@ #include "backend/gen_defs.hpp" #include "backend/gen_encoder.hpp" #include "backend/gen_insn_selection.hpp" +#include "backend/gen_insn_selection_passes.hpp" #include "backend/gen_insn_scheduling.hpp" #include "backend/gen_insn_selection_output.hpp" #include "backend/gen_reg_allocation.hpp" @@ -4047,6 +4048,7 @@ namespace gbe if (OCL_OUTPUT_SEL_IR) outputSelectionIR(*this, this->sel, genKernel->getName()); schedulePreRegAllocation(*this, *this->sel); + lowerBranch(this->sel); sel->addID(); if (UNLIKELY(ra->allocate(*this->sel) == false)) return false; diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 128c2bc..e7ff970 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -291,6 +291,11 @@ namespace gbe insn->parent = this; } + void SelectionBlock::insertAfter(SelectionInstruction *prevInsn, SelectionInstruction *insn) { + this->insnList.insert_after(prevInsn, insn); + insn->parent = this; + } + void SelectionBlock::append(SelectionVector *vec) { this->vectorList.push_back(vec); } @@ -646,7 +651,7 @@ namespace gbe /*! Encode a barrier instruction */ void FENCE(GenRegister dst); /*! Encode a label instruction */ - void LABEL(ir::LabelIndex label); + void LABEL(ir::LabelIndex label, ir::LabelIndex jip); /*! Jump indexed instruction, return the encoded instruction count according to jump distance. */ int JMPI(Reg src, ir::LabelIndex target, ir::LabelIndex origin); /*! IF indexed instruction */ @@ -661,6 +666,8 @@ namespace gbe void BRD(Reg src, ir::LabelIndex jip); /*! BRC indexed instruction */ void BRC(Reg src, ir::LabelIndex jip, ir::LabelIndex uip); + /*! BRANCH instruction */ + void BRANCH(Reg reg, ir::LabelIndex src, ir::LabelIndex dst, uint32_t pred_index, uint32_t jip); /*! Compare instructions */ void CMP(uint32_t conditional, Reg src0, Reg src1, Reg dst = GenRegister::null()); /*! Select instruction with embedded comparison */ @@ -843,6 +850,12 @@ namespace gbe return temps; } + INLINE ir::LabelIndex newAuxLabel() + { + currAuxLabel++; + return (ir::LabelIndex)currAuxLabel; + } + /*! Use custom allocators */ GBE_CLASS(Opaque); friend class SelectionBlock; @@ -858,12 +871,6 @@ namespace gbe bool bHasSends; uint32_t ldMsgOrder; bool slowByteGather; - INLINE ir::LabelIndex newAuxLabel() - { - currAuxLabel++; - return (ir::LabelIndex)currAuxLabel; - } - }; /////////////////////////////////////////////////////////////////////////// @@ -1244,9 +1251,10 @@ namespace gbe /*! Syntactic sugar for method declaration */ typedef const GenRegister &Reg; - void Selection::Opaque::LABEL(ir::LabelIndex index) { + void Selection::Opaque::LABEL(ir::LabelIndex index, ir::LabelIndex jip) { SelectionInstruction *insn = this->appendInsn(SEL_OP_LABEL, 0, 0); insn->index = index.value(); + insn->index1 = jip.value(); } void Selection::Opaque::BARRIER(GenRegister src, GenRegister fence, uint32_t barrierType) { @@ -1294,6 +1302,15 @@ namespace gbe insn->index1 = uip.value(); } + void Selection::Opaque::BRANCH(Reg reg, ir::LabelIndex src, ir::LabelIndex dst, uint32_t pred_index, uint32_t jip) { + SelectionInstruction *insn = this->appendInsn(SEL_OP_BRANCH, 0, 1); + insn->src(0) = reg; + insn->index = src.value(); + insn->index1 = dst.value(); + insn->jip = jip; + insn->extra.pred_index = pred_index; + } + void Selection::Opaque::IF(Reg src, ir::LabelIndex jip, ir::LabelIndex uip) { SelectionInstruction *insn = this->appendInsn(SEL_OP_IF, 0, 1); insn->src(0) = src; @@ -1306,7 +1323,7 @@ namespace gbe SelectionInstruction *insn = this->appendInsn(SEL_OP_ELSE, 0, 1); insn->src(0) = src; insn->index = jip.value(); - this->LABEL(elseLabel); + this->LABEL(elseLabel, ir::LabelIndex(0)); } void Selection::Opaque::ENDIF(Reg src, ir::LabelIndex jip, ir::LabelIndex endifLabel) { @@ -1314,7 +1331,7 @@ namespace gbe this->block->endifLabel = this->newAuxLabel(); else this->block->endifLabel = endifLabel; - this->LABEL(this->block->endifLabel); + this->LABEL(this->block->endifLabel, ir::LabelIndex(0)); SelectionInstruction *insn = this->appendInsn(SEL_OP_ENDIF, 0, 1); insn->src(0) = src; insn->index = this->block->endifLabel.value(); @@ -2530,7 +2547,7 @@ namespace gbe this->block->hasBranch = bb.getLastInstruction()->getOpcode() == OP_BRA || bb.getLastInstruction()->getOpcode() == OP_RET; if (!this->block->hasBranch) - this->block->endifOffset = -1; + this->block->needJump = false; // Build the DAG on the fly uint32_t insnNum = 0; @@ -2604,7 +2621,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp // Bottom up code generation bool needEndif = this->block->hasBranch == false && !this->block->hasBarrier; needEndif = needEndif && bb.needEndif; - this->block->removeSimpleIfEndif = insnNum < 10 && isSimpleBlock(bb, insnNum); + this->block->removeSimpleIfEndif = false;//insnNum < 10 && isSimpleBlock(bb, insnNum); if (needEndif && !this->block->removeSimpleIfEndif) { if(!bb.needIf) // this basic block is the exit of a structure this->ENDIF(GenRegister::immd(0), bb.endifLabel, bb.endifLabel); @@ -2750,7 +2767,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp /////////////////////////////////////////////////////////////////////////// // Code selection public implementation /////////////////////////////////////////////////////////////////////////// - const GenContext& Selection::getCtx() + GenContext& Selection::getCtx() { return this->opaque->ctx; } @@ -2904,6 +2921,12 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp return this->opaque->isPartialWrite(reg); } + GenRegister Selection::selReg(ir::Register reg, ir::Type type) const { + return this->opaque->selReg(reg, type); + } + + ir::LabelIndex Selection::newAuxLabel() { return this->opaque->newAuxLabel(); } + SelectionInstruction *Selection::create(SelectionOpcode opcode, uint32_t dstNum, uint32_t srcNum) { return this->opaque->create(opcode, dstNum, srcNum); } @@ -6602,18 +6625,18 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp { using namespace ir; const LabelIndex label = insn.getLabelIndex(); - const GenRegister src0 = sel.getBlockIP(); - const GenRegister src1 = sel.getLabelImmReg(label); - const uint32_t simdWidth = sel.ctx.getSimdWidth(); GBE_ASSERTM(label < sel.ctx.getMaxLabel(), "We reached the maximum label number which is reserved for barrier handling"); - sel.LABEL(label); - if(!insn.getParent()->needIf) + if(!insn.getParent()->needIf) { + sel.LABEL(label, LabelIndex(0)); return true; + } // Do not emit any code for the "returning" block. There is no need for it - if (insn.getParent() == &sel.ctx.getFunction().getBottomBlock()) + if (insn.getParent() == &sel.ctx.getFunction().getBottomBlock()) { + sel.LABEL(label, LabelIndex(0)); return true; + } LabelIndex jip; const LabelIndex nextLabel = insn.getParent()->getNextBlock()->getLabelIndex(); @@ -6621,85 +6644,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp jip = sel.ctx.getLabelIndex(&insn); else jip = nextLabel; - - // Emit the mask computation at the head of each basic block - sel.push(); - sel.curr.noMask = 1; - sel.curr.predicate = GEN_PREDICATE_NONE; - sel.curr.flag = 0; - sel.curr.subFlag = 1; - sel.cmpBlockIP(GEN_CONDITIONAL_LE, src0, src1); - sel.pop(); - - if (sel.block->hasBarrier) { - // If this block has barrier, we don't execute the block until all lanes - // are 1s. Set each reached lane to 1, then check all lanes. If there is any - // lane not reached, we jump to jip. And no need to issue if/endif for - // this block, as it will always excute with all lanes activated. - sel.push(); - sel.curr.predicate = GEN_PREDICATE_NORMAL; - sel.curr.flag = 0; - sel.curr.subFlag = 1; - sel.setBlockIP(src0, sel.ctx.getMaxLabel()); - sel.curr.predicate = GEN_PREDICATE_NONE; - sel.curr.noMask = 1; - sel.cmpBlockIP(GEN_CONDITIONAL_EQ, src0, sel.ctx.getMaxLabel()); - if (simdWidth == 8) - sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H; - else if (simdWidth == 16) - sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL16H; - else - NOT_IMPLEMENTED; - sel.curr.noMask = 1; - sel.curr.execWidth = 1; - sel.curr.inversePredicate = 1; - sel.JMPI(GenRegister::immd(0), jip, label); - sel.pop(); - // FIXME, if the last BRA is unconditional jump, we don't need to update the label here. - sel.push(); - sel.curr.predicate = GEN_PREDICATE_NORMAL; - sel.curr.flag = 0; - sel.curr.subFlag = 1; - sel.setBlockIP(src0, label.value()); - sel.pop(); - } - else { - if (sel.ctx.hasJIP(&insn) && - // If jump to next label and the endif offset is -1, then - // We don't need to add a jmpi here, as the following IF will do the same - // thing if all channels are disabled. - (jip != nextLabel || sel.block->endifOffset != -1)) { - // If it is required, insert a JUMP to bypass the block - sel.push(); - sel.curr.flag = 0; - sel.curr.subFlag = 1; - if (simdWidth == 8) - sel.curr.predicate = GEN_PREDICATE_ALIGN1_ANY8H; - else if (simdWidth == 16) - sel.curr.predicate = GEN_PREDICATE_ALIGN1_ANY16H; - else - NOT_IMPLEMENTED; - sel.curr.noMask = 1; - sel.curr.execWidth = 1; - sel.curr.inversePredicate = 1; - sel.JMPI(GenRegister::immd(0), jip, label); - sel.pop(); - } - if(!sel.block->removeSimpleIfEndif){ - sel.push(); - sel.curr.flag = 0; - sel.curr.subFlag = 1; - sel.curr.predicate = GEN_PREDICATE_NORMAL; - if(!insn.getParent()->needEndif && insn.getParent()->needIf) { - ir::LabelIndex label = insn.getParent()->endifLabel; - sel.IF(GenRegister::immd(0), label, label); - } - else - sel.IF(GenRegister::immd(0), sel.block->endifLabel, sel.block->endifLabel); - sel.pop(); - } - } - + sel.LABEL(label, jip); return true; } DECL_CTOR(LabelInstruction, 1, 1); @@ -7259,7 +7204,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp sel.curr.predicate = GEN_PREDICATE_NONE; if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif) sel.ENDIF(GenRegister::immd(0), nextLabel); - sel.block->endifOffset = -1; + sel.block->needJump = false; sel.pop(); } else { // Update the PcIPs @@ -7275,7 +7220,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp else if(insn.getParent()->needEndif) sel.ENDIF(GenRegister::immd(0), nextLabel); } - sel.block->endifOffset = -1; + sel.block->needJump = false; if (nextLabel == jip) return; // Branch to the jump target sel.push(); @@ -7283,7 +7228,8 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp sel.curr.noMask = 1; sel.curr.predicate = GEN_PREDICATE_NONE; // Actually, the origin of this JMPI should be the beginning of next BB. - sel.block->endifOffset -= sel.JMPI(GenRegister::immd(0), jip, ir::LabelIndex(curr->getLabelIndex().value() + 1)); + sel.JMPI(GenRegister::immd(0), jip, ir::LabelIndex(curr->getLabelIndex().value() + 1)); + sel.block->needJump = true; sel.pop(); } } @@ -7317,7 +7263,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp sel.curr.flagIndex = pred.value(); sel.curr.predicate = GEN_PREDICATE_NORMAL; sel.setBlockIP(ip, dst.value()); - sel.block->endifOffset = -1; + sel.block->needJump = false; sel.curr.predicate = GEN_PREDICATE_NONE; if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif) sel.ENDIF(GenRegister::immd(0), next); @@ -7327,7 +7273,8 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp else sel.curr.predicate = GEN_PREDICATE_ALIGN1_ANY8H; sel.curr.noMask = 1; - sel.block->endifOffset -= sel.JMPI(GenRegister::immd(0), jip, label); + sel.JMPI(GenRegister::immd(0), jip, label); + sel.block->needJump = true; sel.pop(); } else { const LabelIndex next = bb.getNextBlock()->getLabelIndex(); @@ -7336,7 +7283,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp sel.curr.subFlag = 1; if(insn.getParent()->needEndif) sel.setBlockIP(ip, dst.value()); - sel.block->endifOffset = -1; + sel.block->needJump = false; if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif) { if(insn.getParent()->needEndif && !insn.getParent()->needIf) sel.ENDIF(GenRegister::immd(0), insn.getParent()->endifLabel, insn.getParent()->endifLabel); @@ -7348,7 +7295,8 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp sel.curr.execWidth = 1; sel.curr.noMask = 1; sel.curr.predicate = GEN_PREDICATE_NONE; - sel.block->endifOffset -= sel.JMPI(GenRegister::immd(0), jip, label); + sel.JMPI(GenRegister::immd(0), jip, label); + sel.block->needJump = true; sel.pop(); } } @@ -7362,6 +7310,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp else if (opcode == OP_BRA) { const LabelIndex dst = insn.getLabelIndex(); const LabelIndex src = insn.getParent()->getLabelIndex(); + const LabelIndex jip = sel.ctx.getLabelIndex(&insn); sel.push(); if (insn.isPredicated() == true) { @@ -7369,11 +7318,12 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp sel.curr.externFlag = 1; } - // We handle foward and backward branches differently - if (uint32_t(dst) <= uint32_t(src)) - this->emitBackwardBranch(sel, insn, dst, src); - else - this->emitForwardBranch(sel, insn, dst, src); + if (insn.isPredicated() == true) { + const Register pred = insn.getPredicateIndex(); + sel.BRANCH(GenRegister::immd(0), dst, src, pred.value(), jip.value()); + } else { + sel.BRANCH(GenRegister::immd(0), dst, src, 0, jip.value()); + } sel.pop(); } else if(opcode == OP_IF) { diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp index 01999a2..fbbb826 100644 --- a/backend/src/backend/gen_insn_selection.hpp +++ b/backend/src/backend/gen_insn_selection.hpp @@ -68,6 +68,8 @@ namespace gbe public: /*! Owns the instruction */ SelectionBlock *parent; + /*! Get the parent Selection block */ + SelectionBlock *getParent(void) { return parent; } /*! Append an instruction before this one */ void prepend(SelectionInstruction &insn); /*! Append an instruction after this one */ @@ -160,6 +162,7 @@ namespace gbe uint16_t printfSize; }; uint32_t workgroupOp; + uint32_t pred_index; } extra; /*! Gen opcode */ uint8_t opcode; @@ -171,6 +174,8 @@ namespace gbe uint32_t index; /*! For BRC/IF to store the UIP */ uint32_t index1; + /*! for BRANCH to store jip */ + uint32_t jip; /*! instruction ID used for vector allocation. */ uint32_t ID; DebugInfo DBGInfo; @@ -258,8 +263,10 @@ namespace gbe void append(SelectionInstruction *insn); /*! Append a new selection instruction at the beginning of the block */ void prepend(SelectionInstruction *insn); + /*! insert a new selection instruction after prevInsn */ + void insertAfter(SelectionInstruction *prevInsn, SelectionInstruction *insn); ir::LabelIndex endifLabel; - int endifOffset; + bool needJump; bool hasBarrier; bool hasBranch; bool removeSimpleIfEndif; @@ -305,6 +312,10 @@ namespace gbe bool isScalarReg(const ir::Register ®) const; /*! is this register a partially written register.*/ bool isPartialWrite(const ir::Register ®) const; + /*! create GenRegister for ir Register.*/ + GenRegister selReg(ir::Register reg, ir::Type type) const; + + ir::LabelIndex newAuxLabel(); /*! Create a new selection instruction */ SelectionInstruction *create(SelectionOpcode, uint32_t dstNum, uint32_t srcNum); /*! List of emitted blocks */ @@ -316,11 +327,13 @@ namespace gbe /* optimize at selection IR level */ void optimize(void); + /* branch lower at selection IR level */ + void branchLowering(void); uint32_t opt_features; /* Add insn ID for sel IR */ void addID(void); - const GenContext &getCtx(); + GenContext &getCtx(); /*! Use custom allocators */ GBE_CLASS(Selection); diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx index 5d96e9e..9e4806b 100644 --- a/backend/src/backend/gen_insn_selection.hxx +++ b/backend/src/backend/gen_insn_selection.hxx @@ -90,6 +90,7 @@ DECL_SELECTION_IR(CONVI64_TO_I, UnaryInstruction) DECL_SELECTION_IR(CONVI64_TO_F, I64ToFloatInstruction) DECL_SELECTION_IR(CONVF_TO_I64, FloatToI64Instruction) DECL_SELECTION_IR(I64MADSAT, I64MADSATInstruction) +DECL_SELECTION_IR(BRANCH, UnaryInstruction) DECL_SELECTION_IR(BRC, UnaryInstruction) DECL_SELECTION_IR(BRD, UnaryInstruction) DECL_SELECTION_IR(IF, UnaryInstruction) diff --git a/backend/src/backend/gen_insn_selection_branch_lowering.cpp b/backend/src/backend/gen_insn_selection_branch_lowering.cpp new file mode 100644 index 0000000..92efbdb --- /dev/null +++ b/backend/src/backend/gen_insn_selection_branch_lowering.cpp @@ -0,0 +1,468 @@ + +#include "backend/gen_insn_selection.hpp" +#include "backend/gen_insn_selection_passes.hpp" +#include "backend/gen_context.hpp" +#include "ir/function.hpp" +#include "ir/liveness.hpp" +#include "ir/profile.hpp" +#include "sys/cvar.hpp" +#include "sys/vector.hpp" +#include <algorithm> +#include <climits> +#include <map> + +namespace gbe +{ + class BranchLowering + { + public: + /*! To make function prototypes more readable */ + typedef const GenRegister &Reg; + typedef const GenInstructionState &State; + BranchLowering(GenContext &ctx, SelectionBlock &selblock) + : ctx(ctx), selBlock(selblock) {} + void run(); + void insnLower(); + void lowerBRANCH(SelectionInstruction &insn); + void lowerForwardBRANCH(SelectionInstruction &insn); + void lowerBackwardBRANCH(SelectionInstruction &insn); + void lowerLABEL(SelectionInstruction &insn); + + /* Get current block IP register according to label width. */ + GenRegister getBlockIP() { + return ctx.isDWLabel() ? ctx.sel->selReg(ir::ocl::dwblockip, ir::TYPE_U32) : ctx.sel->selReg(ir::ocl::blockip, ir::TYPE_U32); + } + + SelectionInstruction *setBlockIP(GenRegister blockip, uint32_t labelValue, + State state, + SelectionInstruction *prevInsn); + SelectionInstruction *cmpBlockIP(uint32_t cond, GenRegister blockip, + GenRegister labelReg, State state, + SelectionInstruction *prevInsn); + SelectionInstruction *cmpBlockIP(uint32_t cond, GenRegister blockip, + uint32_t labelValue, State state, + SelectionInstruction *prevInsn); + SelectionInstruction *MOV(Reg dst, Reg src, State state, + SelectionInstruction *prevInsn); + SelectionInstruction *CMP(uint32_t conditional, Reg src0, Reg src1, Reg dst, + State state, SelectionInstruction *prevInsn); + SelectionInstruction *JMPI(Reg src, ir::LabelIndex index, + ir::LabelIndex origin, State state, + SelectionInstruction *prevInsn); + SelectionInstruction *IF(Reg src, ir::LabelIndex jip, ir::LabelIndex uip, + State state, SelectionInstruction *prevInsn); + SelectionInstruction *ENDIF(Reg src, ir::LabelIndex jip, + ir::LabelIndex endifLabel, State state, + SelectionInstruction *prevInsn); + SelectionInstruction *LABEL(ir::LabelIndex index, ir::LabelIndex jip, + State state, SelectionInstruction *prevInsn); + SelectionInstruction *generateInsn(SelectionOpcode opcode, uint32_t dstNum, + uint32_t srcNum, State state, + SelectionInstruction *prevInsn); + ~BranchLowering() {} + + protected: + GenContext &ctx; //in case that we need it + SelectionBlock &selBlock; + bool lowered; + }; + + void BranchLowering::insnLower() + { + //for (auto &insn : selBlock.insnList) { + for (auto iter = selBlock.insnList.end() ; iter != selBlock.insnList.begin(); ) { + iter--; + SelectionInstruction &insn = *iter; + if (insn.opcode == SEL_OP_BRANCH) { + lowerBRANCH(insn); + } + else if (insn.opcode == SEL_OP_LABEL) { + lowerLABEL(insn); + } + } + for (auto iter = selBlock.insnList.begin() ; iter != selBlock.insnList.end(); iter++) { + SelectionInstruction &insn = *iter; + if (insn.opcode == SEL_OP_BRANCH) { + iter = selBlock.insnList.erase(&insn); + } + } + + } + + void BranchLowering::lowerBackwardBRANCH(SelectionInstruction &insn) + { + using namespace ir; + SelectionInstruction *prev_insn = &insn; + const GenRegister ip = getBlockIP(); + const BasicBlock *currBB = insn.getParent()->bb; + const BasicBlock *nextBB = currBB->getNextBlock(); + const LabelIndex nextLabel = nextBB->getLabelIndex(); + const uint32_t jip = insn.jip; + const LabelIndex label = LabelIndex(insn.index1); + uint32_t dst = insn.index; + uint32_t predIndex = insn.extra.pred_index; + const uint32_t simdWidth = ctx.getSimdWidth(); + GBE_ASSERT(nextBB != NULL); + + if (predIndex != 0) { + + // Update the PcIPs for all the branches. Just put the IPs of the next + // block. Next instruction will properly update the IPs of the lanes + // that actually take the branch + { + GenInstructionState curr = insn.state; + prev_insn = setBlockIP(ip, nextLabel.value(), curr, prev_insn); + } + GBE_ASSERT(jip == dst); + GenInstructionState curr; + curr.execWidth = simdWidth; + curr.physicalFlag = 0; + curr.flagIndex = predIndex; + curr.predicate = GEN_PREDICATE_NORMAL; + prev_insn = setBlockIP(ip, dst, curr, prev_insn); + curr.predicate = GEN_PREDICATE_NONE; + if (!selBlock.hasBarrier) + prev_insn = ENDIF(GenRegister::immd(0), nextLabel, LabelIndex(0), curr, prev_insn); + curr.execWidth = 1; + if (simdWidth == 16) + curr.predicate = GEN_PREDICATE_ALIGN1_ANY16H; + else + curr.predicate = GEN_PREDICATE_ALIGN1_ANY8H; + curr.noMask = 1; + prev_insn = JMPI(GenRegister::immd(0), LabelIndex(jip), LabelIndex(label), curr, prev_insn); + selBlock.needJump = true; + } else { + // Update the PcIPs + GenInstructionState curr = insn.state; + curr.flag = 0; + curr.subFlag = 1; + if(insn.getParent()->bb->needEndif) + prev_insn = setBlockIP(ip, dst, curr, prev_insn); + if (!selBlock.hasBarrier) { + if(insn.getParent()->bb->needEndif && !insn.getParent()->bb->needIf) + prev_insn = ENDIF(GenRegister::immd(0), insn.getParent()->endifLabel, insn.getParent()->endifLabel, curr, prev_insn); + else if(insn.getParent()->bb->needEndif) + prev_insn = ENDIF(GenRegister::immd(0), nextLabel, LabelIndex(0), curr, prev_insn); + } + // Branch to the jump target + { + GenInstructionState curr; + curr.execWidth = simdWidth; + curr.execWidth = 1; + curr.noMask = 1; + curr.predicate = GEN_PREDICATE_NONE; + prev_insn = JMPI(GenRegister::immd(0), LabelIndex(jip), LabelIndex(label), curr, prev_insn); + selBlock.needJump = true; + } + } + } + + void BranchLowering::lowerForwardBRANCH(SelectionInstruction &insn) + { + using namespace ir; + SelectionInstruction *prev_insn = &insn; + const uint32_t simdWidth = ctx.getSimdWidth(); + uint32_t dst = insn.index; + uint32_t predIndex = insn.extra.pred_index; + const uint32_t jipValue = insn.jip; + const GenRegister ip = getBlockIP(); + // We will not emit any jump if we must go the next block anyway + const BasicBlock *currBB = insn.getParent()->bb; + const BasicBlock *nextBB = currBB->getNextBlock(); + const LabelIndex nextLabel = nextBB->getLabelIndex(); + if (predIndex != 0) { + GenInstructionState curr = insn.state; + // we don't need to set next label to the pcip + // as if there is no backward jump latter, then obviously everything will work fine. + // If there is backward jump latter, then all the pcip will be updated correctly there. + curr.execWidth = simdWidth; + curr.physicalFlag = 0; + curr.flagIndex = predIndex; + curr.predicate = GEN_PREDICATE_NORMAL; + prev_insn = setBlockIP(ip, dst, curr, prev_insn); + curr.predicate = GEN_PREDICATE_NONE; + if (!selBlock.hasBarrier) + prev_insn = ENDIF(GenRegister::immd(0), nextLabel, LabelIndex(0), curr, prev_insn); + selBlock.needJump = false; + } else { + // Update the PcIPs + GenInstructionState curr = insn.state; + curr.flag = 0; + curr.subFlag = 1; + if(insn.getParent()->bb->needEndif) + prev_insn = setBlockIP(ip, dst, curr, prev_insn); + + if (!selBlock.hasBarrier) { + if(insn.getParent()->bb->needEndif && !insn.getParent()->bb->needIf) + prev_insn = ENDIF(GenRegister::immd(0), insn.getParent()->bb->endifLabel, insn.getParent()->bb->endifLabel, curr, prev_insn); + else if(insn.getParent()->bb->needEndif) + prev_insn = ENDIF(GenRegister::immd(0), nextLabel, LabelIndex(0), curr, prev_insn); + } + selBlock.needJump = false; + if (nextLabel == jipValue) return; + // Branch to the jump target + { + GenInstructionState curr; + curr.execWidth = simdWidth; + curr.execWidth = 1; + curr.noMask = 1; + curr.predicate = GEN_PREDICATE_NONE; + // Actually, the origin of this JMPI should be the beginning of next + // BB. + prev_insn = JMPI(GenRegister::immd(0), LabelIndex(jipValue), + ir::LabelIndex(currBB->getLabelIndex().value() + 1), curr, + prev_insn); + selBlock.needJump = true; + } + } + } + + void BranchLowering::lowerBRANCH(SelectionInstruction& insn) + { + uint32_t dst = insn.index; + uint32_t src = insn.index1; + if (dst <= src) { + lowerBackwardBRANCH(insn); + } else { + lowerForwardBRANCH(insn); + } + } + + void BranchLowering::lowerLABEL(SelectionInstruction& insn) + { + //src0, src1, jip, nextLabel, hasBarrier, sel + using namespace ir; + uint32_t labelValue = insn.index; + const GenRegister src0 = getBlockIP(); + const GenRegister src1 = ctx.isDWLabel() ? GenRegister::immud(labelValue) : GenRegister::immuw(labelValue); + const uint32_t simdWidth = ctx.getSimdWidth(); + + if (insn.getParent()->bb == &ctx.getFunction().getBottomBlock()) { + return; + } + + const LabelIndex nextLabel = insn.getParent()->bb->getNextBlock()->getLabelIndex(); + uint32_t jipValue = insn.index1; + if(jipValue == 0) + return; + + SelectionInstruction* prev_insn = &insn; + GenInstructionState curr; + curr.execWidth = simdWidth; + curr.noMask = 1; + curr.predicate = GEN_PREDICATE_NONE; + curr.flag = 0; + curr.subFlag = 1; + SelectionInstruction *insn_cmp = cmpBlockIP(GEN_CONDITIONAL_LE, src0, src1, curr, prev_insn); + prev_insn = insn_cmp; + + if (selBlock.hasBarrier) { + GenInstructionState curr; + curr.execWidth = simdWidth; + curr.predicate = GEN_PREDICATE_NORMAL; + curr.flag = 0; + curr.subFlag = 1; + prev_insn = setBlockIP(src0, ctx.getMaxLabel(), curr, prev_insn); + curr.predicate = GEN_PREDICATE_NONE; + curr.noMask = 1; + prev_insn = cmpBlockIP(GEN_CONDITIONAL_EQ, src0, ctx.getMaxLabel(), curr, prev_insn); + + if (simdWidth == 8) + curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H; + else if (simdWidth == 16) + curr.predicate = GEN_PREDICATE_ALIGN1_ALL16H; + else + NOT_IMPLEMENTED; + curr.noMask = 1; + curr.execWidth = 1; + curr.inversePredicate = 1; + prev_insn = JMPI(GenRegister::immd(0), LabelIndex(jipValue), + LabelIndex(labelValue), curr, prev_insn); + { + GenInstructionState curr; + curr.execWidth = simdWidth; + curr.predicate = GEN_PREDICATE_NORMAL; + curr.flag = 0; + curr.subFlag = 1; + prev_insn = setBlockIP(src0, labelValue, curr, prev_insn); + } + } else { + if ( + // If jump to next label and the endif offset is -1, then + // We don't need to add a jmpi here, as the following IF will do the + // same + // thing if all channels are disabled. + (jipValue != nextLabel.value() || selBlock.needJump != false)) { + // If it is required, insert a JUMP to bypass the block + GenInstructionState curr; + curr.noMask = 1; + curr.execWidth = 1; + curr.inversePredicate = 1; + curr.flag = 0; + curr.subFlag = 1; + if (simdWidth == 8) + curr.predicate = GEN_PREDICATE_ALIGN1_ANY8H; + else if (simdWidth == 16) + curr.predicate = GEN_PREDICATE_ALIGN1_ANY16H; + else + NOT_IMPLEMENTED; + prev_insn = JMPI(GenRegister::immd(0), LabelIndex(jipValue), + LabelIndex(labelValue), curr, prev_insn); + } + GenInstructionState curr; + curr.execWidth = simdWidth; + curr.predicate = GEN_PREDICATE_NORMAL; + curr.flag = 0; + curr.subFlag = 1; + if(!insn.getParent()->bb->needEndif && insn.getParent()->bb->needIf) { + ir::LabelIndex label = insn.getParent()->bb->endifLabel; + prev_insn = IF(GenRegister::immd(0), label, label, curr, prev_insn); + } + else { + prev_insn = IF(GenRegister::immd(0), selBlock.endifLabel, selBlock.endifLabel, curr, prev_insn); + } + } + } + /* Set current label register to a label value. */ + SelectionInstruction * + BranchLowering::setBlockIP(GenRegister blockip, uint32_t labelValue, + State state, SelectionInstruction *prevInsn) { + if (!ctx.isDWLabel()) + return MOV(GenRegister::retype(blockip, GEN_TYPE_UW), + GenRegister::immuw(labelValue), state, prevInsn); + else + return MOV(GenRegister::retype(blockip, GEN_TYPE_UD), + GenRegister::immud(labelValue), state, prevInsn); + } + + SelectionInstruction * + BranchLowering::cmpBlockIP(uint32_t cond, GenRegister blockip, + GenRegister labelReg, State state, + SelectionInstruction *prevInsn) { + if (!ctx.isDWLabel()) + return CMP(cond, GenRegister::retype(blockip, GEN_TYPE_UW), labelReg, + GenRegister::retype(GenRegister::null(), GEN_TYPE_UW), state, prevInsn); + else + return CMP(cond, GenRegister::retype(blockip, GEN_TYPE_UD), labelReg, + GenRegister::retype(GenRegister::null(), GEN_TYPE_UD), state, prevInsn); + } + + SelectionInstruction * + BranchLowering::cmpBlockIP(uint32_t cond, GenRegister blockip, + uint32_t labelValue, State state, + SelectionInstruction *prevInsn) { + if (!ctx.isDWLabel()) + return CMP(cond, GenRegister::retype(blockip, GEN_TYPE_UW), + GenRegister::immuw(labelValue), + GenRegister::retype(GenRegister::null(), GEN_TYPE_UW), state, + prevInsn); + else + return CMP(cond, GenRegister::retype(blockip, GEN_TYPE_UD), + GenRegister::immuw(labelValue), + GenRegister::retype(GenRegister::null(), GEN_TYPE_UD), state, + prevInsn); + } + + SelectionInstruction *BranchLowering::MOV(Reg dst, Reg src, State state, + SelectionInstruction *prevInsn) { + SelectionInstruction *insn = this->generateInsn(SEL_OP_MOV, 1, 1, state, prevInsn); + insn->dst(0) = dst; + insn->src(0) = src; + return insn; + } + + SelectionInstruction *BranchLowering::CMP(uint32_t conditional, Reg src0, + Reg src1, Reg dst, State state, + SelectionInstruction *prevInsn) { + SelectionInstruction *insn = this->generateInsn(SEL_OP_CMP, 1, 2, state, prevInsn); + insn->src(0) = src0; + insn->src(1) = src1; + insn->dst(0) = dst; + insn->extra.function = conditional; + return insn; + } + + SelectionInstruction *BranchLowering::JMPI(Reg src, ir::LabelIndex index, + ir::LabelIndex origin, State state, + SelectionInstruction *prevInsn) { + SelectionInstruction *insn = this->generateInsn(SEL_OP_JMPI, 0, 1, state, prevInsn); + insn->src(0) = src; + insn->index = index.value(); + ir::LabelIndex start, end; + if (origin.value() < index.value()) { + // Forward Jump, need to exclude the target BB. Because we + // need to jump to the beginning of it. + start = origin; + end = ir::LabelIndex(index.value() - 1); + } else { + start = index; + end = origin; + } + // FIXME, this longjmp check is too hacky. We need to support instruction + // insertion at code emission stage in the future. + insn->extra.longjmp = ctx.getFunction().getDistance(start, end) > 3000; + return insn; + } + + SelectionInstruction *BranchLowering::IF(Reg src, ir::LabelIndex jip, + ir::LabelIndex uip, State state, + SelectionInstruction *prevInsn) { + SelectionInstruction *insn = this->generateInsn(SEL_OP_IF, 0, 1, state, prevInsn); + insn->src(0) = src; + insn->index = jip.value(); + insn->index1 = uip.value(); + return insn; + } + + SelectionInstruction *BranchLowering::ENDIF(Reg src, ir::LabelIndex jip, + ir::LabelIndex endifLabel, State state, + SelectionInstruction *prevInsn) { + if(endifLabel == 0) + selBlock.endifLabel = ctx.sel->newAuxLabel(); + else + selBlock.endifLabel = endifLabel; + SelectionInstruction * insn_label = LABEL(selBlock.endifLabel, ir::LabelIndex(0), state, prevInsn); + SelectionInstruction *insn = this->generateInsn(SEL_OP_ENDIF, 0, 1, state, insn_label); + insn->src(0) = src; + insn->index = selBlock.endifLabel.value(); + return insn; + } + + SelectionInstruction *BranchLowering::LABEL(ir::LabelIndex index, + ir::LabelIndex jip, State state, + SelectionInstruction *prevInsn) { + SelectionInstruction *insn = this->generateInsn(SEL_OP_LABEL, 0, 0, state, prevInsn); + insn->index = index.value(); + insn->index1 = jip.value(); + return insn; + } + + SelectionInstruction * + BranchLowering::generateInsn(SelectionOpcode opcode, uint32_t dstNum, + uint32_t srcNum, State state, + SelectionInstruction *prevInsn) { + GBE_ASSERT(dstNum <= SelectionInstruction::MAX_DST_NUM && + srcNum <= SelectionInstruction::MAX_SRC_NUM); + SelectionInstruction *insn = ctx.sel->create(opcode, dstNum, srcNum); + selBlock.insertAfter(prevInsn, insn); + insn->state = state; + return insn; + } + + void BranchLowering::run() { + lowered = false; + insnLower(); + } + + void lowerBranch(Selection *sel) + { + //do basic block level optimization + for (SelectionBlock &block : *sel->blockList) { + BranchLowering branch_lower(sel->getCtx(), block); + branch_lower.run(); + } + + } +} /* namespace gbe */ + + diff --git a/backend/src/backend/gen_insn_selection_passes.hpp b/backend/src/backend/gen_insn_selection_passes.hpp new file mode 100644 index 0000000..19d3f4c --- /dev/null +++ b/backend/src/backend/gen_insn_selection_passes.hpp @@ -0,0 +1,30 @@ +/* + * Copyright © 2012 Intel Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __GEN_INSN_SELECTION_PASSES_HPP__ +#define __GEN_INSN_SELECTION_PASSES_HPP__ + +#include "backend/gen_insn_selection.hpp" + +namespace gbe +{ + /* !optimize functions after instruction selection */ + + /* !lower functions after instruction selection */ + void lowerBranch(Selection* sel); +} + +#endif diff --git a/backend/src/sys/intrusive_list.hpp b/backend/src/sys/intrusive_list.hpp index 2e2f2a9..6677c77 100644 --- a/backend/src/sys/intrusive_list.hpp +++ b/backend/src/sys/intrusive_list.hpp @@ -147,6 +147,10 @@ namespace gbe link(v, pos.node()); return iterator(v); } + iterator insert_after(iterator pos, value_type* v) { + append(v, pos.node()); + return iterator(v); + } iterator erase(iterator it) { iterator itErase(it); ++it; -- 2.5.0 _______________________________________________ Beignet mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/beignet
