Please review the new version, this version has some indent issues.
On Wed, May 14, 2014 at 04:11:27PM +0800, Zhigang Gong wrote: > Actually, all of the above 3 instructions could avoid > one LOADI instruction by switching operands position. > > This patch impemented this optimization. And consolidate > all the same type of optimization into one place. > > No obvious performance impact on luxmark. > > Signed-off-by: Zhigang Gong <[email protected]> > --- > backend/src/backend/gen_insn_selection.cpp | 183 > +++++++++++++++-------------- > 1 file changed, 97 insertions(+), 86 deletions(-) > > diff --git a/backend/src/backend/gen_insn_selection.cpp > b/backend/src/backend/gen_insn_selection.cpp > index a554ee9..3d6897d 100644 > --- a/backend/src/backend/gen_insn_selection.cpp > +++ b/backend/src/backend/gen_insn_selection.cpp > @@ -129,15 +129,15 @@ namespace gbe > } > } > > - uint32_t getGenCompare(ir::Opcode opcode) { > + uint32_t getGenCompare(ir::Opcode opcode, bool inverse = false) { > using namespace ir; > switch (opcode) { > - case OP_LE: return GEN_CONDITIONAL_LE; > - case OP_LT: return GEN_CONDITIONAL_L; > - case OP_GE: return GEN_CONDITIONAL_GE; > - case OP_GT: return GEN_CONDITIONAL_G; > - case OP_EQ: return GEN_CONDITIONAL_EQ; > - case OP_NE: return GEN_CONDITIONAL_NEQ; > + case OP_LE: return (!inverse) ? GEN_CONDITIONAL_LE : GEN_CONDITIONAL_G; > + case OP_LT: return (!inverse) ? GEN_CONDITIONAL_L : GEN_CONDITIONAL_GE; > + case OP_GE: return (!inverse) ? GEN_CONDITIONAL_GE : GEN_CONDITIONAL_L; > + case OP_GT: return (!inverse) ? GEN_CONDITIONAL_G : GEN_CONDITIONAL_LE; > + case OP_EQ: return (!inverse) ? GEN_CONDITIONAL_EQ : > GEN_CONDITIONAL_NEQ; > + case OP_NE: return (!inverse) ? GEN_CONDITIONAL_NEQ : > GEN_CONDITIONAL_EQ; > default: NOT_SUPPORTED; return 0u; > }; > } > @@ -576,6 +576,15 @@ namespace gbe > void I64DIV(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]); > /*! 64-bit integer remainder of division */ > void I64REM(Reg dst, Reg src0, Reg src1, GenRegister tmp[13]); > + /* common functions for both binary instruction and sel_cmp and compare > instruction. > + It will handle the IMM or normal register assignment, and will try to > avoid LOADI > + as much as possible. */ > + void getSrcGenRegImm(SelectionDAG &dag, GenRegister &src0, > + GenRegister &src1, ir::Type type, bool &inverse); > + void getSrcGenRegImm(SelectionDAG &dag, > + SelectionDAG *dag0, SelectionDAG *dag1, > + GenRegister &src0, GenRegister &src1, > + ir::Type type, bool &inverse); > /*! Use custom allocators */ > GBE_CLASS(Opaque); > friend class SelectionBlock; > @@ -588,6 +597,7 @@ namespace gbe > currAuxLabel++; > return (ir::LabelIndex)currAuxLabel; > } > + > }; > > /////////////////////////////////////////////////////////////////////////// > @@ -1665,11 +1675,11 @@ namespace gbe > return false; > } > > - GenRegister getRegisterFromImmediate(ir::Immediate imm, bool negate = > false) > + GenRegister getRegisterFromImmediate(ir::Immediate imm, ir::Type type, > bool negate = false) > { > using namespace ir; > int sign = negate ? -1 : 1; > - switch (imm.type) { > + switch (type) { > case TYPE_U32: return GenRegister::immud(imm.data.u32 * sign); > case TYPE_S32: return GenRegister::immd(imm.data.s32 * sign); > case TYPE_FLOAT: return GenRegister::immf(imm.data.f32 * sign); > @@ -1683,6 +1693,68 @@ namespace gbe > } > } > > + BVAR(OCL_OPTIMIZE_IMMEDIATE, true); > + void Selection::Opaque::getSrcGenRegImm(SelectionDAG &dag, > + SelectionDAG *dag0, SelectionDAG > *dag1, > + GenRegister &src0, GenRegister &src1, > + ir::Type type, bool &inverse) { > + using namespace ir; > + // Right source can always be an immediate > + //logica ops of bool shouldn't use 0xffff, may use flag reg, so can't > optimize > + inverse = false; > + const int src0Index = dag.insn.isMemberOf<SelectInstruction>() ? > SelectInstruction::src0Index : 0; > + const int src1Index = dag.insn.isMemberOf<SelectInstruction>() ? > SelectInstruction::src1Index : 1; > + if (OCL_OPTIMIZE_IMMEDIATE && dag1 != NULL && dag1->insn.getOpcode() > == OP_LOADI && > + canGetRegisterFromImmediate(dag1->insn)) { > + const auto &childInsn = cast<LoadImmInstruction>(dag1->insn); > + src0 = this->selReg(dag.insn.getSrc(src0Index), type); > + src1 = getRegisterFromImmediate(childInsn.getImmediate(), type); > + if (dag0) dag0->isRoot = 1; > + } > + // Left source cannot be immediate but it is OK if we can commute > + else if (OCL_OPTIMIZE_IMMEDIATE && dag0 != NULL && > dag.insn.isMemberOf<BinaryInstruction>() && > + ((cast<BinaryInstruction>(dag.insn)).commutes() || > dag.insn.getOpcode() == OP_SUB) && > + dag0->insn.getOpcode() == OP_LOADI && > canGetRegisterFromImmediate(dag0->insn)) { > + const auto &childInsn = cast<LoadImmInstruction>(dag0->insn); > + src0 = dag.insn.getOpcode() != OP_SUB ? > + this->selReg(dag.insn.getSrc(src1Index), type) : > + GenRegister::negate(this->selReg(dag.insn.getSrc(src1Index), > type)); > + Immediate imm = childInsn.getImmediate(); > + src1 = getRegisterFromImmediate(imm, type, dag.insn.getOpcode() == > OP_SUB); > + if (dag1) dag1->isRoot = 1; > + } > + // If it's a compare instruction, theoritically, we can easily revert > the condition code to > + // switch the two operands. But we can't do that for float due to the > NaN's exist. > + // For a normal select instruction, we can always inverse the > predication to switch the two > + // operands' position. > + else if (OCL_OPTIMIZE_IMMEDIATE && dag0 != NULL && > + dag0->insn.getOpcode() == OP_LOADI && > canGetRegisterFromImmediate(dag0->insn) && > + ((dag.insn.isMemberOf<CompareInstruction>() && type != > TYPE_FLOAT && type != TYPE_DOUBLE) || > + (dag.insn.isMemberOf<SelectInstruction>()) || > + (dag.insn.getOpcode() == OP_ORD))) { > + const auto &childInsn = cast<LoadImmInstruction>(dag0->insn); > + src0 = this->selReg(dag.insn.getSrc(src1Index), type); > + src1 = getRegisterFromImmediate(childInsn.getImmediate(), type); > + inverse = true; > + if (dag1) dag1->isRoot = 1; > + } > + // Just grab the two sources > + else { > + src0 = this->selReg(dag.insn.getSrc(src0Index), type); > + src1 = this->selReg(dag.insn.getSrc(src1Index), type); > + markAllChildren(dag); > + } > + } > + > + void Selection::Opaque::getSrcGenRegImm(SelectionDAG &dag, GenRegister > &src0, > + GenRegister &src1, ir::Type type, > + bool &inverse) { > + SelectionDAG *dag0 = dag.child[0]; > + SelectionDAG *dag1 = dag.child[1]; > + getSrcGenRegImm(dag, dag0, dag1, src0, src1, type, inverse); > + } > + > + > /*! Template for the one-to-many instruction patterns */ > template <typename T, typename U> > class OneToManyPattern : public SelectionPattern > @@ -1846,7 +1918,6 @@ namespace gbe > DECL_CTOR(UnaryInstruction, 1, 1) > }; > > - BVAR(OCL_OPTIMIZE_IMMEDIATE, true); > > /*! Binary regular instruction pattern */ > class BinaryInstructionPattern : public SelectionPattern > @@ -1957,36 +2028,8 @@ namespace gbe > > // Look for immediate values > GenRegister src0, src1; > - SelectionDAG *dag0 = dag.child[0]; > - SelectionDAG *dag1 = dag.child[1]; > - > - // Right source can always be an immediate > - //logica ops of bool shouldn't use 0xffff, may use flag reg, so can't > optimize > - if (OCL_OPTIMIZE_IMMEDIATE && dag1 != NULL && dag1->insn.getOpcode() > == OP_LOADI && > - canGetRegisterFromImmediate(dag1->insn) && type != TYPE_BOOL) { > - const auto &childInsn = cast<LoadImmInstruction>(dag1->insn); > - src0 = sel.selReg(insn.getSrc(0), type); > - src1 = getRegisterFromImmediate(childInsn.getImmediate()); > - if (dag0) dag0->isRoot = 1; > - } > - // Left source cannot be immediate but it is OK if we can commute > - else if (OCL_OPTIMIZE_IMMEDIATE && dag0 != NULL && > - (insn.commutes() || insn.getOpcode() == OP_SUB) && > - dag0->insn.getOpcode() == OP_LOADI && > canGetRegisterFromImmediate(dag0->insn) && > - type != TYPE_BOOL) { > - const auto &childInsn = cast<LoadImmInstruction>(dag0->insn); > - src0 = insn.getOpcode() != OP_SUB ? > - sel.selReg(insn.getSrc(1), type) : > GenRegister::negate(sel.selReg(insn.getSrc(1), type)); > - src1 = getRegisterFromImmediate(childInsn.getImmediate(), > insn.getOpcode() == OP_SUB); > - if (dag1) dag1->isRoot = 1; > - } > - // Just grab the two sources > - else { > - src0 = sel.selReg(insn.getSrc(0), type); > - src1 = sel.selReg(insn.getSrc(1), type); > - markAllChildren(dag); > - } > - > + bool inverse = false; > + sel.getSrcGenRegImm(dag, src0, src1, type, inverse); > // Output the binary instruction > if (sel.getRegisterFamily(insn.getDst(0)) == ir::FAMILY_BOOL) { > GBE_ASSERT(insn.getOpcode() == OP_AND || > @@ -2272,12 +2315,16 @@ namespace gbe > // So both sources must match > if (sourceMatch(cmp, 0, &dag, 1) == false) return false; > if (sourceMatch(cmp, 1, &dag, 2) == false) return false; > - > // OK, we merge the instructions > const ir::CompareInstruction &cmpInsn = > cast<CompareInstruction>(cmp->insn); > const ir::Opcode opcode = cmpInsn.getOpcode(); > if(opcode == OP_ORD) return false; > - const uint32_t genCmp = getGenCompare(opcode); > + GenRegister src0, src1; > + const ir::Type type = cmpInsn.getType(); > + bool inverse = false; > + sel.getSrcGenRegImm(*cmp, src0, src1, type, inverse); > + > + const uint32_t genCmp = getGenCompare(opcode, inverse); > sel.push(); > if (sel.isScalarReg(insn.getDst(0)) == true) { > sel.curr.execWidth = 1; > @@ -2287,21 +2334,12 @@ namespace gbe > > // Like for regular selects, we need a temporary since we cannot > predicate > // properly > - const ir::Type type = cmpInsn.getType(); > const uint32_t simdWidth = sel.curr.execWidth; > const GenRegister dst = sel.selReg(insn.getDst(0), type); > - const GenRegister src0 = sel.selReg(cmpInsn.getSrc(0), type); > - const GenRegister src1 = sel.selReg(cmpInsn.getSrc(1), type); > - > sel.curr.predicate = GEN_PREDICATE_NONE; > sel.curr.execWidth = simdWidth; > sel.SEL_CMP(genCmp, dst, src0, src1); > sel.pop(); > - > - > - // We need the sources of the compare instruction > - markAllChildren(*cmp); > - > return true; > } > }; > @@ -2913,25 +2951,8 @@ namespace gbe > > // Look for immediate values for the right source > GenRegister src0, src1; > - SelectionDAG *dag0 = dag.child[0]; > - SelectionDAG *dag1 = dag.child[1]; > - > - // Right source can always be an immediate > - if (OCL_OPTIMIZE_IMMEDIATE && dag1 != NULL && dag1->insn.getOpcode() > == OP_LOADI && > - canGetRegisterFromImmediate(dag1->insn) && opcode != OP_ORD) { > - const auto &childInsn = cast<LoadImmInstruction>(dag1->insn); > - src0 = sel.selReg(insn.getSrc(0), type); > - Immediate imm = childInsn.getImmediate(); > - if(imm.type != type) > - imm.type = type; > - src1 = getRegisterFromImmediate(imm); > - if (dag0) dag0->isRoot = 1; > - } else { > - src0 = sel.selReg(insn.getSrc(0), type); > - src1 = sel.selReg(insn.getSrc(1), type); > - markAllChildren(dag); > - } > - > + bool inverseCmp = false; > + sel.getSrcGenRegImm(dag, src0, src1, type, inverseCmp); > sel.push(); > if (sel.isScalarReg(dst)) > sel.curr.noMask = 1; > @@ -2944,7 +2965,7 @@ namespace gbe > for(int i=0; i<3; i++) > tmp[i] = sel.selReg(sel.reg(FAMILY_DWORD)); > sel.curr.flagGen = 1; > - sel.I64CMP(getGenCompare(opcode), src0, src1, tmp); > + sel.I64CMP(getGenCompare(opcode, inverseCmp), src0, src1, tmp); > } else if(opcode == OP_ORD) { > sel.push(); > sel.CMP(GEN_CONDITIONAL_EQ, src0, src0, tmpDst); > @@ -2966,7 +2987,7 @@ namespace gbe > sel.curr.flagGen = needStoreBool; > tmpDst = GenRegister::nullud(); > } > - sel.CMP(getGenCompare(opcode), src0, src1, tmpDst); > + sel.CMP(getGenCompare(opcode, inverseCmp), src0, src1, tmpDst); > } > sel.pop(); > return true; > @@ -3189,19 +3210,9 @@ namespace gbe > SelectionDAG *dag1 = dag.child[1]; > SelectionDAG *dag2 = dag.child[2]; > > - // Right source can always be an immediate > - if (OCL_OPTIMIZE_IMMEDIATE && dag2 != NULL && dag2->insn.getOpcode() > == OP_LOADI && canGetRegisterFromImmediate(dag2->insn)) { > - const auto &childInsn = cast<LoadImmInstruction>(dag2->insn); > - src0 = sel.selReg(insn.getSrc(SelectInstruction::src0Index), type); > - src1 = getRegisterFromImmediate(childInsn.getImmediate()); > - if (dag0) dag0->isRoot = 1; > - if (dag1) dag1->isRoot = 1; > - } else { > - src0 = sel.selReg(insn.getSrc(SelectInstruction::src0Index), type); > - src1 = sel.selReg(insn.getSrc(SelectInstruction::src1Index), type); > - markAllChildren(dag); > - } > - > + if (dag0) dag0->isRoot = 1; > + bool inverse = false; > + sel.getSrcGenRegImm(dag, dag1, dag2, src0, src1, type, inverse); > const Register pred = insn.getPredicate(); > sel.push(); > if (sel.isScalarReg(insn.getDst(0)) == true) { > @@ -3209,7 +3220,7 @@ namespace gbe > sel.curr.predicate = GEN_PREDICATE_NONE; > sel.curr.noMask = 1; > } > - > + sel.curr.inversePredicate ^= inverse; > sel.curr.physicalFlag = 0; > sel.curr.flagIndex = (uint16_t) pred; > sel.curr.predicate = GEN_PREDICATE_NORMAL; > -- > 1.8.3.2 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
