This version patchset LGTM. Thanks! Ruiling
> -----Original Message----- > From: Beignet [mailto:[email protected]] On Behalf Of > Chuanbo Weng > Sent: Friday, November 6, 2015 11:28 AM > To: [email protected] > Cc: Weng, Chuanbo > Subject: [Beignet] [PATCH v6 1/4] Add built-in function __gen_ocl_vme. > > __gen_ocl_vme is used for hardware accelerated video motion estimation. > It gets payload values as parameters and uses MOV to pass these payload > values to VME SEND Message's payload grfs. The int8 return value is used > to store SEND Message writeback. > > v2: > Remove unnecessary 5 parameters(src_grf*) of built-in function(we just > need to allocate related registers in gen_insn_selection step). > > v3: > Remove redundant code and change MAX_SRC_NUM to 40. > > v4: > Choose message response length by message type instead of hard code. > > v5: > Choose message response length by message type in the whole backend > pipeline. > > v6: > Treat simd8 and simd16 differently when mov payload value to consecutive > payload grfs. > > Signed-off-by: Chuanbo Weng <[email protected]> > --- > backend/src/backend/gen/gen_mesa_disasm.c | 14 ++++ > backend/src/backend/gen7_instruction.hpp | 15 ++++ > backend/src/backend/gen_context.cpp | 98 > ++++++++++++++++++++++ > backend/src/backend/gen_context.hpp | 1 + > backend/src/backend/gen_defs.hpp | 15 ++++ > backend/src/backend/gen_encoder.cpp | 44 ++++++++++ > backend/src/backend/gen_encoder.hpp | 13 +++ > .../src/backend/gen_insn_gen7_schedule_info.hxx | 1 + > backend/src/backend/gen_insn_selection.cpp | 73 ++++++++++++++++ > backend/src/backend/gen_insn_selection.hpp | 14 +++- > backend/src/backend/gen_insn_selection.hxx | 1 + > backend/src/ir/instruction.cpp | 66 +++++++++++++++ > backend/src/ir/instruction.hpp | 17 +++- > backend/src/ir/instruction.hxx | 1 + > backend/src/libocl/include/ocl_misc.h | 15 ++++ > backend/src/llvm/llvm_gen_backend.cpp | 47 +++++++++++ > backend/src/llvm/llvm_gen_ocl_function.hxx | 2 + > backend/src/llvm/llvm_scalarize.cpp | 4 + > 18 files changed, 436 insertions(+), 5 deletions(-) > > diff --git a/backend/src/backend/gen/gen_mesa_disasm.c > b/backend/src/backend/gen/gen_mesa_disasm.c > index 5b71cfa..3198da7 100644 > --- a/backend/src/backend/gen/gen_mesa_disasm.c > +++ b/backend/src/backend/gen/gen_mesa_disasm.c > @@ -476,6 +476,13 @@ static int column; > > static int gen_version; > > +#define GEN7_BITS_FIELD(inst, gen7) \ > + ({ \ > + int bits; \ > + bits = ((const union Gen7NativeInstruction *)inst)->gen7; \ > + bits; \ > + }) > + > #define GEN_BITS_FIELD(inst, gen) \ > ({ \ > int bits; \ > @@ -530,6 +537,8 @@ static int gen_version; > #define EXECUTION_SIZE(inst) GEN_BITS_FIELD(inst, > header.execution_size) > #define BRANCH_JIP(inst) GEN_BITS_FIELD2(inst, > bits3.gen7_branch.jip, > bits3.gen8_branch.jip/8) > #define BRANCH_UIP(inst) GEN_BITS_FIELD2(inst, > bits3.gen7_branch.uip, > bits2.gen8_branch.uip/8) > +#define VME_BTI(inst) GEN7_BITS_FIELD(inst, bits3.vme_gen7.bti) > +#define VME_MSG_TYPE(inst) GEN7_BITS_FIELD(inst, > bits3.vme_gen7.msg_type) > #define SAMPLE_BTI(inst) GEN_BITS_FIELD(inst, > bits3.sampler_gen7.bti) > #define SAMPLER(inst) GEN_BITS_FIELD(inst, > bits3.sampler_gen7.sampler) > #define SAMPLER_MSG_TYPE(inst) GEN_BITS_FIELD(inst, > bits3.sampler_gen7.msg_type) > @@ -1431,6 +1440,11 @@ int gen_disasm (FILE *file, const void *inst, uint32_t > deviceID, uint32_t compac > > if (GEN_BITS_FIELD2(inst, bits1.da1.src1_reg_file, > bits2.da1.src1_reg_file) == > GEN_IMMEDIATE_VALUE) { > switch (target) { > + case GEN_SFID_VIDEO_MOTION_EST: > + format(file, " (bti: %d, msg_type: %d)", > + VME_BTI(inst), > + VME_MSG_TYPE(inst)); > + break; > case GEN_SFID_SAMPLER: > format(file, " (%d, %d, %d, %d)", > SAMPLE_BTI(inst), > diff --git a/backend/src/backend/gen7_instruction.hpp > b/backend/src/backend/gen7_instruction.hpp > index 51f342b..258dd24 100644 > --- a/backend/src/backend/gen7_instruction.hpp > +++ b/backend/src/backend/gen7_instruction.hpp > @@ -350,6 +350,21 @@ union Gen7NativeInstruction > uint32_t end_of_thread:1; > } sampler_gen7; > > + struct { > + uint32_t bti:8; > + uint32_t vme_search_path_lut:3; > + uint32_t lut_sub:2; > + uint32_t msg_type:2; > + uint32_t stream_in:1; > + uint32_t stream_out:1; > + uint32_t reserved_mbz:2; > + uint32_t header_present:1; > + uint32_t response_length:5; > + uint32_t msg_length:4; > + uint32_t pad1:2; > + uint32_t end_of_thread:1; > + } vme_gen7; > + > /** > * Message for the Sandybridge Sampler Cache or Constant Cache Data > Port. > * > diff --git a/backend/src/backend/gen_context.cpp > b/backend/src/backend/gen_context.cpp > index 4e2ebfb..ccc9f17 100644 > --- a/backend/src/backend/gen_context.cpp > +++ b/backend/src/backend/gen_context.cpp > @@ -2266,6 +2266,104 @@ namespace gbe > p->SAMPLE(dst, msgPayload, msgLen, false, bti, sampler, simdWidth, -1, 0, > insn.extra.isLD, insn.extra.isUniform); > } > > + void GenContext::emitVmeInstruction(const SelectionInstruction &insn) { > + const GenRegister dst = ra->genReg(insn.dst(0)); > + const unsigned int msg_type = insn.extra.msg_type; > + > + GBE_ASSERT(msg_type == 1); > + int rsp_len; > + if(msg_type == 1) > + rsp_len = 6; > + uint32_t execWidth_org = p->curr.execWidth; > + p->push(); > + p->curr.predicate = GEN_PREDICATE_NONE; > + p->curr.noMask = 1; > + p->curr.execWidth = 1; > + /* Use MOV to Setup bits of payload: mov payload value stored in > insn.src(x) > to > + * 5 consecutive payload grf. > + * In simd8 mode, one virtual grf register map to one physical grf > register. > But > + * in simd16 mode, one virtual grf register map to two physical grf > registers. > + * So we should treat them differently. > + * */ > + if(execWidth_org == 8){ > + for(int i=0; i < 5; i++){ > + GenRegister payload_grf = ra->genReg(insn.dst(rsp_len+i)); > + payload_grf.vstride = GEN_VERTICAL_STRIDE_0; > + payload_grf.width = GEN_WIDTH_1; > + payload_grf.hstride = GEN_HORIZONTAL_STRIDE_0; > + payload_grf.subphysical = 1; > + for(int j=0; j < 8; j++){ > + payload_grf.subnr = (7 - j) * typeSize(GEN_TYPE_UD); > + GenRegister payload_val = ra->genReg(insn.src(i*8+j)); > + payload_val.vstride = GEN_VERTICAL_STRIDE_0; > + payload_val.width = GEN_WIDTH_1; > + payload_val.hstride = GEN_HORIZONTAL_STRIDE_0; > + > + p->MOV(payload_grf, payload_val); > + } > + } > + } > + else if(execWidth_org == 16){ > + for(int i=0; i < 2; i++){ > + for(int k = 0; k < 2; k++){ > + GenRegister payload_grf = ra->genReg(insn.dst(rsp_len+i)); > + payload_grf.nr += k; > + payload_grf.vstride = GEN_VERTICAL_STRIDE_0; > + payload_grf.width = GEN_WIDTH_1; > + payload_grf.hstride = GEN_HORIZONTAL_STRIDE_0; > + payload_grf.subphysical = 1; > + for(int j=0; j < 8; j++){ > + payload_grf.subnr = (7 - j) * typeSize(GEN_TYPE_UD); > + GenRegister payload_val = ra->genReg(insn.src(i*16+k*8+j)); > + payload_val.vstride = GEN_VERTICAL_STRIDE_0; > + payload_val.width = GEN_WIDTH_1; > + payload_val.hstride = GEN_HORIZONTAL_STRIDE_0; > + > + p->MOV(payload_grf, payload_val); > + } > + } > + } > + { > + int i = 2; > + GenRegister payload_grf = ra->genReg(insn.dst(rsp_len+i)); > + payload_grf.vstride = GEN_VERTICAL_STRIDE_0; > + payload_grf.width = GEN_WIDTH_1; > + payload_grf.hstride = GEN_HORIZONTAL_STRIDE_0; > + payload_grf.subphysical = 1; > + for(int j=0; j < 8; j++){ > + payload_grf.subnr = (7 - j) * typeSize(GEN_TYPE_UD); > + GenRegister payload_val = ra->genReg(insn.src(i*16+j)); > + payload_val.vstride = GEN_VERTICAL_STRIDE_0; > + payload_val.width = GEN_WIDTH_1; > + payload_val.hstride = GEN_HORIZONTAL_STRIDE_0; > + > + p->MOV(payload_grf, payload_val); > + } > + } > + } > + p->pop(); > + > + p->push(); > + p->curr.predicate = GEN_PREDICATE_NONE; > + p->curr.noMask = 1; > + p->curr.execWidth = 1; > + GenRegister payload_did = GenRegister::retype(ra- > >genReg(insn.dst(rsp_len)), GEN_TYPE_UB); > + payload_did.vstride = GEN_VERTICAL_STRIDE_0; > + payload_did.width = GEN_WIDTH_1; > + payload_did.hstride = GEN_HORIZONTAL_STRIDE_0; > + payload_did.subphysical = 1; > + payload_did.subnr = 20 * typeSize(GEN_TYPE_UB); > + GenRegister grf0 = GenRegister::ub1grf(0, 20); > + p->MOV(payload_did, grf0); > + p->pop(); > + > + const GenRegister msgPayload = ra->genReg(insn.dst(rsp_len)); > + const unsigned char bti = insn.getbti(); > + const unsigned int vme_search_path_lut = insn.extra.vme_search_path_lut; > + const unsigned int lut_sub = insn.extra.lut_sub; > + p->VME(bti, dst, msgPayload, msg_type, vme_search_path_lut, lut_sub); > + } > + > void GenContext::scratchWrite(const GenRegister header, uint32_t offset, > uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode) { > p->push(); > uint32_t simdWidth = p->curr.execWidth; > diff --git a/backend/src/backend/gen_context.hpp > b/backend/src/backend/gen_context.hpp > index 4044694..870266c 100644 > --- a/backend/src/backend/gen_context.hpp > +++ b/backend/src/backend/gen_context.hpp > @@ -167,6 +167,7 @@ namespace gbe > virtual void emitUnpackLongInstruction(const SelectionInstruction &insn); > void emitDWordGatherInstruction(const SelectionInstruction &insn); > void emitSampleInstruction(const SelectionInstruction &insn); > + void emitVmeInstruction(const SelectionInstruction &insn); > void emitTypedWriteInstruction(const SelectionInstruction &insn); > void emitSpillRegInstruction(const SelectionInstruction &insn); > void emitUnSpillRegInstruction(const SelectionInstruction &insn); > diff --git a/backend/src/backend/gen_defs.hpp > b/backend/src/backend/gen_defs.hpp > index 1b550ac..09cb2ba 100644 > --- a/backend/src/backend/gen_defs.hpp > +++ b/backend/src/backend/gen_defs.hpp > @@ -615,6 +615,21 @@ union GenNativeInstruction > uint32_t end_of_thread:1; > } sampler_gen7; > > + struct { > + uint32_t bti:8; > + uint32_t vme_search_path_lut:3; > + uint32_t lut_sub:2; > + uint32_t msg_type:2; > + uint32_t stream_in:1; > + uint32_t stream_out:1; > + uint32_t reserved_mbz:2; > + uint32_t header_present:1; > + uint32_t response_length:5; > + uint32_t msg_length:4; > + uint32_t pad1:2; > + uint32_t end_of_thread:1; > + } vme_gen7; > + > /** > * Message for the Sandybridge Sampler Cache or Constant Cache Data > Port. > * > diff --git a/backend/src/backend/gen_encoder.cpp > b/backend/src/backend/gen_encoder.cpp > index 2cc51cc..be38cef 100644 > --- a/backend/src/backend/gen_encoder.cpp > +++ b/backend/src/backend/gen_encoder.cpp > @@ -1191,6 +1191,50 @@ namespace gbe > simd_mode, return_format); > } > > + void GenEncoder::setVmeMessage(GenNativeInstruction *insn, > + unsigned char bti, > + uint32_t response_length, > + uint32_t msg_length, > + uint32_t msg_type, > + unsigned char vme_search_path_lut, > + unsigned char lut_sub) > + { > + const GenMessageTarget sfid = GEN_SFID_VIDEO_MOTION_EST; > + setMessageDescriptor(insn, sfid, msg_length, response_length, true); > + insn->bits3.vme_gen7.bti = bti; > + insn->bits3.vme_gen7.vme_search_path_lut = vme_search_path_lut; > + insn->bits3.vme_gen7.lut_sub = lut_sub; > + insn->bits3.vme_gen7.msg_type = msg_type; > + insn->bits3.vme_gen7.stream_in = 0; > + insn->bits3.vme_gen7.stream_out = 0; > + insn->bits3.vme_gen7.reserved_mbz = 0; > + > + } > + > + void GenEncoder::VME(unsigned char bti, > + GenRegister dest, > + GenRegister msg, > + uint32_t msg_type, > + uint32_t vme_search_path_lut, > + uint32_t lut_sub) > + { > + /* Currectly we just support inter search only, we will support other > + * modes in future. > + */ > + GBE_ASSERT(msg_type == 1); > + uint32_t msg_length, response_length; > + if(msg_type == 1){ > + msg_length = 5; > + response_length = 6; > + } > + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); > + this->setHeader(insn); > + this->setDst(insn, dest); > + this->setSrc0(insn, msg); > + setVmeMessage(insn, bti, response_length, msg_length, > + msg_type, vme_search_path_lut, lut_sub); > + } > + > void GenEncoder::TYPED_WRITE(GenRegister msg, bool header_present, > unsigned char bti) > { > GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); > diff --git a/backend/src/backend/gen_encoder.hpp > b/backend/src/backend/gen_encoder.hpp > index f2bb5ab..6df7087 100644 > --- a/backend/src/backend/gen_encoder.hpp > +++ b/backend/src/backend/gen_encoder.hpp > @@ -203,6 +203,19 @@ namespace gbe > bool header_present, > uint32_t simd_mode, > uint32_t return_format); > + virtual void VME(unsigned char bti, > + GenRegister dest, > + GenRegister msg, > + uint32_t msg_type, > + uint32_t vme_search_path_lut, > + uint32_t lut_sub); > + void setVmeMessage(GenNativeInstruction *insn, > + unsigned char bti, > + uint32_t response_length, > + uint32_t msg_length, > + uint32_t msg_type, > + unsigned char vme_search_path_lut, > + unsigned char lut_sub); > > /*! TypedWrite instruction for texture */ > virtual void TYPED_WRITE(GenRegister header, > diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx > b/backend/src/backend/gen_insn_gen7_schedule_info.hxx > index 9b60c17..878e0e7 100644 > --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx > +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx > @@ -36,6 +36,7 @@ DECL_GEN7_SCHEDULE(UnpackByte, 40, 1, 1) > DECL_GEN7_SCHEDULE(PackLong, 40, 1, 1) > DECL_GEN7_SCHEDULE(UnpackLong, 40, 1, 1) > DECL_GEN7_SCHEDULE(Sample, 160, 1, 1) > +DECL_GEN7_SCHEDULE(Vme, 320, 1, 1) > DECL_GEN7_SCHEDULE(TypedWrite, 80, 1, 1) > DECL_GEN7_SCHEDULE(SpillReg, 20, 1, 1) > DECL_GEN7_SCHEDULE(UnSpillReg, 160, 1, 1) > diff --git a/backend/src/backend/gen_insn_selection.cpp > b/backend/src/backend/gen_insn_selection.cpp > index 2452aea..cfaa792 100644 > --- a/backend/src/backend/gen_insn_selection.cpp > +++ b/backend/src/backend/gen_insn_selection.cpp > @@ -187,6 +187,7 @@ namespace gbe > this->opcode == SEL_OP_ATOMIC || > this->opcode == SEL_OP_BYTE_GATHER || > this->opcode == SEL_OP_SAMPLE || > + this->opcode == SEL_OP_VME || > this->opcode == SEL_OP_DWORD_GATHER; > } > > @@ -661,6 +662,8 @@ namespace gbe > void ALU3(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg src2); > /*! Encode sample instructions */ > void SAMPLE(GenRegister *dst, uint32_t dstNum, GenRegister *msgPayloads, > uint32_t msgNum, uint32_t bti, uint32_t sampler, bool isLD, bool isUniform); > + /*! Encode vme instructions */ > + void VME(uint32_t bti, GenRegister *dst, GenRegister *payloadVal, > uint32_t > dstNum, uint32_t srcNum, uint32_t msg_type, uint32_t vme_search_path_lut, > uint32_t lut_sub); > /*! Encode typed write instructions */ > void TYPED_WRITE(GenRegister *msgs, uint32_t msgNum, uint32_t bti, bool > is3D); > /*! Get image information */ > @@ -2120,6 +2123,34 @@ namespace gbe > insn->extra.isUniform = isUniform; > } > > + void Selection::Opaque::VME(uint32_t bti, GenRegister *dst, GenRegister > *payloadVal, > + uint32_t dstNum, uint32_t srcNum, uint32_t > msg_type, > + uint32_t vme_search_path_lut, uint32_t > lut_sub) { > + SelectionInstruction *insn = this->appendInsn(SEL_OP_VME, dstNum, > srcNum); > + SelectionVector *dstVector = this->appendVector(); > + SelectionVector *msgVector = this->appendVector(); > + > + for (uint32_t elemID = 0; elemID < dstNum; ++elemID) > + insn->dst(elemID) = dst[elemID]; > + for (uint32_t elemID = 0; elemID < srcNum; ++elemID) > + insn->src(elemID) = payloadVal[elemID]; > + > + dstVector->regNum = dstNum; > + dstVector->isSrc = 0; > + dstVector->offsetID = 0; > + dstVector->reg = &insn->dst(0); > + > + msgVector->regNum = srcNum; > + msgVector->isSrc = 1; > + msgVector->offsetID = 0; > + msgVector->reg = &insn->src(0); > + > + insn->setbti(bti); > + insn->extra.msg_type = msg_type; > + insn->extra.vme_search_path_lut = vme_search_path_lut; > + insn->extra.lut_sub = lut_sub; > + } > + > /////////////////////////////////////////////////////////////////////////// > // Code selection public implementation > /////////////////////////////////////////////////////////////////////////// > @@ -5126,6 +5157,47 @@ namespace gbe > DECL_CTOR(SampleInstruction, 1, 1); > }; > > + DECL_PATTERN(VmeInstruction) > + { > + INLINE bool emitOne(Selection::Opaque &sel, const ir::VmeInstruction > &insn, > bool &markChildren) const > + { > + using namespace ir; > + uint32_t msg_type, vme_search_path_lut, lut_sub; > + msg_type = insn.getMsgType(); > + vme_search_path_lut = 0; > + lut_sub = 0; > + GBE_ASSERT(msg_type == 1); > + uint32_t payloadLen = 0; > + //We allocate 5 virtual payload grfs to selection dst register. > + if(msg_type == 1){ > + payloadLen = 5; > + } > + uint32_t selDstNum = insn.getDstNum() + payloadLen; > + uint32_t srcNum = insn.getSrcNum(); > + vector<GenRegister> dst(selDstNum); > + vector<GenRegister> payloadVal(srcNum); > + uint32_t valueID = 0; > + for (valueID = 0; valueID < insn.getDstNum(); ++valueID) > + dst[valueID] = sel.selReg(insn.getDst(valueID), insn.getDstType()); > + for (valueID = insn.getDstNum(); valueID < selDstNum; ++valueID) > + dst[valueID] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); > + > + for (valueID = 0; valueID < srcNum; ++valueID) > + payloadVal[valueID] = sel.selReg(insn.getSrc(valueID), > insn.getSrcType()); > + > + uint32_t bti = insn.getImageIndex(); > + if (bti > BTI_MAX_ID) { > + std::cerr << "Too large bti " << bti; > + return false; > + } > + > + sel.VME(bti, dst.data(), payloadVal.data(), selDstNum, srcNum, > msg_type, > vme_search_path_lut, lut_sub); > + > + return true; > + } > + DECL_CTOR(VmeInstruction, 1, 1); > + }; > + > /*! Typed write instruction pattern. */ > DECL_PATTERN(TypedWriteInstruction) > { > @@ -5591,6 +5663,7 @@ namespace gbe > this->insert<MulAddInstructionPattern>(); > this->insert<SelectModifierInstructionPattern>(); > this->insert<SampleInstructionPattern>(); > + this->insert<VmeInstructionPattern>(); > this->insert<GetImageInfoInstructionPattern>(); > this->insert<ReadARFInstructionPattern>(); > this->insert<RegionInstructionPattern>(); > diff --git a/backend/src/backend/gen_insn_selection.hpp > b/backend/src/backend/gen_insn_selection.hpp > index f51c905..578db41 100644 > --- a/backend/src/backend/gen_insn_selection.hpp > +++ b/backend/src/backend/gen_insn_selection.hpp > @@ -90,8 +90,8 @@ namespace gbe > const GenRegister &dst(uint32_t dstID) const { return regs[dstID]; } > /*! Damn C++ */ > const GenRegister &src(uint32_t srcID) const { return > regs[dstNum+srcID]; } > - /*! No more than 9 sources (used by typed writes on simd8 mode.) */ > - enum { MAX_SRC_NUM = 9 }; > + /*! No more than 40 sources (40 sources are used by vme for payload > passing and setting) */ > + enum { MAX_SRC_NUM = 40 }; > /*! No more than 16 destinations (15 used by I64DIV/I64REM) */ > enum { MAX_DST_NUM = 16 }; > /*! State of the instruction (extra fields neeed for the encoding) */ > @@ -129,6 +129,12 @@ namespace gbe > bool isLD; // is this a ld message? > bool isUniform; > }; > + struct { > + uint16_t vme_bti:8; > + uint16_t msg_type:2; > + uint16_t vme_search_path_lut:3; > + uint16_t lut_sub:2; > + }; > uint32_t barrierType; > bool longjmp; > uint32_t indirect_offset; > @@ -138,7 +144,7 @@ namespace gbe > /*! Number of destinations */ > uint8_t dstNum:5; > /*! Number of sources */ > - uint8_t srcNum:4; > + uint8_t srcNum:6; > /*! To store various indices */ > uint32_t index; > /*! For BRC/IF to store the UIP */ > @@ -152,6 +158,7 @@ namespace gbe > switch (opcode) { > case SEL_OP_DWORD_GATHER: return extra.function; > case SEL_OP_SAMPLE: return extra.rdbti; > + case SEL_OP_VME: return extra.vme_bti; > case SEL_OP_TYPED_WRITE: return extra.bti; > default: > GBE_ASSERT(0); > @@ -164,6 +171,7 @@ namespace gbe > switch (opcode) { > case SEL_OP_DWORD_GATHER: extra.function = bti; return; > case SEL_OP_SAMPLE: extra.rdbti = bti; return; > + case SEL_OP_VME: extra.vme_bti = bti; return; > case SEL_OP_TYPED_WRITE: extra.bti = bti; return; > default: > GBE_ASSERT(0); > diff --git a/backend/src/backend/gen_insn_selection.hxx > b/backend/src/backend/gen_insn_selection.hxx > index 479398b..4d3e921 100644 > --- a/backend/src/backend/gen_insn_selection.hxx > +++ b/backend/src/backend/gen_insn_selection.hxx > @@ -65,6 +65,7 @@ DECL_SELECTION_IR(UNPACK_BYTE, > UnpackByteInstruction) > DECL_SELECTION_IR(PACK_LONG, PackLongInstruction) > DECL_SELECTION_IR(UNPACK_LONG, UnpackLongInstruction) > DECL_SELECTION_IR(SAMPLE, SampleInstruction) > +DECL_SELECTION_IR(VME, VmeInstruction) > DECL_SELECTION_IR(TYPED_WRITE, TypedWriteInstruction) > DECL_SELECTION_IR(SPILL_REG, SpillRegInstruction) > DECL_SELECTION_IR(UNSPILL_REG, UnSpillRegInstruction) > diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp > index f93c528..7bf787e 100644 > --- a/backend/src/ir/instruction.cpp > +++ b/backend/src/ir/instruction.cpp > @@ -595,6 +595,58 @@ namespace ir { > static const uint32_t dstNum = 4; > }; > > + class ALIGNED_INSTRUCTION VmeInstruction : > + public BasePolicy, > + public TupleSrcPolicy<VmeInstruction>, > + public TupleDstPolicy<VmeInstruction> > + { > + public: > + VmeInstruction(uint8_t imageIdx, Tuple dstTuple, Tuple srcTuple, > + uint32_t dstNum, uint32_t srcNum, int msg_type, > + int vme_search_path_lut, int lut_sub) { > + this->opcode = OP_VME; > + this->dst = dstTuple; > + this->src = srcTuple; > + this->dstNum = dstNum; > + this->srcNum = srcNum; > + this->imageIdx = imageIdx; > + this->msg_type = msg_type; > + this->vme_search_path_lut = vme_search_path_lut; > + this->lut_sub = lut_sub; > + } > + INLINE bool wellFormed(const Function &fn, std::string &why) const; > + INLINE void out(std::ostream &out, const Function &fn) const { > + this->outOpcode(out); > + out << " src_surface id " << (int)this->getImageIndex() > + << " ref_surface id " << (int)this->getImageIndex() + 1; > + for(uint32_t i = 0; i < dstNum; i++){ > + out<< " %" << this->getDst(fn, i); > + } > + for(uint32_t i = 0; i < srcNum; i++){ > + out<< " %" << this->getSrc(fn, i); > + } > + out > + << " msg_type " << (int)this->getMsgType() > + << " vme_search_path_lut " << (int)this->vme_search_path_lut > + << " lut_sub " << (int)this->lut_sub; > + } > + Tuple src; > + Tuple dst; > + > + INLINE uint8_t getImageIndex(void) const { return this->imageIdx; } > + INLINE uint8_t getMsgType(void) const { return this->msg_type; } > + > + INLINE Type getSrcType(void) const { return TYPE_U32; } > + INLINE Type getDstType(void) const { return TYPE_U32; } > + uint8_t imageIdx; > + uint8_t msg_type; > + uint8_t vme_search_path_lut; > + uint8_t lut_sub; > + uint32_t srcNum; > + uint32_t dstNum; > + }; > + > + > class ALIGNED_INSTRUCTION TypedWriteInstruction : // TODO > public BasePolicy, > public TupleSrcPolicy<TypedWriteInstruction>, > @@ -1111,6 +1163,8 @@ namespace ir { > // TODO > INLINE bool SampleInstruction::wellFormed(const Function &fn, std::string > &why) const > { return true; } > + INLINE bool VmeInstruction::wellFormed(const Function &fn, std::string > &why) const > + { return true; } > INLINE bool TypedWriteInstruction::wellFormed(const Function &fn, > std::string &why) const > { return true; } > INLINE bool GetImageInfoInstruction::wellFormed(const Function &fn, > std::string &why) const > @@ -1502,6 +1556,10 @@ START_INTROSPECTION(LabelInstruction) > #include "ir/instruction.hxx" > END_INTROSPECTION(LabelInstruction) > > +START_INTROSPECTION(VmeInstruction) > +#include "ir/instruction.hxx" > +END_INTROSPECTION(VmeInstruction) > + > #undef END_INTROSPECTION > #undef START_INTROSPECTION > #undef DECL_INSN > @@ -1694,6 +1752,10 @@ DECL_MEM_FN(SampleInstruction, Type, > getDstType(void), getDstType()) > DECL_MEM_FN(SampleInstruction, uint8_t, getSamplerIndex(void), > getSamplerIndex()) > DECL_MEM_FN(SampleInstruction, uint8_t, getSamplerOffset(void), > getSamplerOffset()) > DECL_MEM_FN(SampleInstruction, uint8_t, getImageIndex(void), > getImageIndex()) > +DECL_MEM_FN(VmeInstruction, Type, getSrcType(void), getSrcType()) > +DECL_MEM_FN(VmeInstruction, Type, getDstType(void), getDstType()) > +DECL_MEM_FN(VmeInstruction, uint8_t, getImageIndex(void), > getImageIndex()) > +DECL_MEM_FN(VmeInstruction, uint8_t, getMsgType(void), getMsgType()) > DECL_MEM_FN(TypedWriteInstruction, Type, getSrcType(void), getSrcType()) > DECL_MEM_FN(TypedWriteInstruction, Type, getCoordType(void), > getCoordType()) > DECL_MEM_FN(TypedWriteInstruction, uint8_t, getImageIndex(void), > getImageIndex()) > @@ -1932,6 +1994,10 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, > getImageIndex(void), getImageIndex > return internal::SampleInstruction(imageIndex, dst, src, srcNum, > dstIsFloat, > srcIsFloat, sampler, samplerOffset).convert(); > } > > + Instruction VME(uint8_t imageIndex, Tuple dst, Tuple src, uint32_t dstNum, > uint32_t srcNum, int msg_type, int vme_search_path_lut, int lut_sub) { > + return internal::VmeInstruction(imageIndex, dst, src, dstNum, srcNum, > msg_type, vme_search_path_lut, lut_sub).convert(); > + } > + > Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, uint8_t srcNum, Type > srcType, Type coordType) { > return internal::TypedWriteInstruction(imageIndex, src, srcNum, srcType, > coordType).convert(); > } > diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp > index 3f3c655..c8da416 100644 > --- a/backend/src/ir/instruction.hpp > +++ b/backend/src/ir/instruction.hpp > @@ -191,8 +191,8 @@ namespace ir { > template <typename T> INLINE bool isMemberOf(void) const { > return T::isClassOf(*this); > } > - /*! max_src for store instruction (vec16 + addr) */ > - static const uint32_t MAX_SRC_NUM = 32; > + /*! max_src used by vme for payload passing and setting */ > + static const uint32_t MAX_SRC_NUM = 40; > static const uint32_t MAX_DST_NUM = 32; > protected: > BasicBlock *parent; //!< The basic block containing the instruction > @@ -399,6 +399,17 @@ namespace ir { > static bool isClassOf(const Instruction &insn); > }; > > + /*! Video motion estimation */ > + class VmeInstruction : public Instruction { > + public: > + uint8_t getImageIndex() const; > + uint8_t getMsgType() const; > + Type getSrcType(void) const; > + Type getDstType(void) const; > + /*! Return true if the given instruction is an instance of this class */ > + static bool isClassOf(const Instruction &insn); > + }; > + > typedef union _ImageInfoKey{ > _ImageInfoKey(uint8_t i, uint8_t t) : index(i), type(t) {}; > _ImageInfoKey(int key) : data(key) {}; > @@ -756,6 +767,8 @@ namespace ir { > Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, uint8_t srcNum, Type > srcType, Type coordType); > /*! sample textures */ > Instruction SAMPLE(uint8_t imageIndex, Tuple dst, Tuple src, uint8_t > srcNum, > bool dstIsFloat, bool srcIsFloat, uint8_t sampler, uint8_t samplerOffset); > + /*! video motion estimation */ > + Instruction VME(uint8_t imageIndex, Tuple dst, Tuple src, uint32_t dstNum, > uint32_t srcNum, int msg_type, int vme_search_path_lut, int lut_sub); > /*! get image information , such as width/height/depth/... */ > Instruction GET_IMAGE_INFO(int infoType, Register dst, uint8_t imageIndex, > Register infoReg); > /*! label labelIndex */ > diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx > index 81548c9..27d59a9 100644 > --- a/backend/src/ir/instruction.hxx > +++ b/backend/src/ir/instruction.hxx > @@ -85,6 +85,7 @@ DECL_INSN(SYNC, SyncInstruction) > DECL_INSN(LABEL, LabelInstruction) > DECL_INSN(READ_ARF, ReadARFInstruction) > DECL_INSN(REGION, RegionInstruction) > +DECL_INSN(VME, VmeInstruction) > DECL_INSN(INDIRECT_MOV, IndirectMovInstruction) > DECL_INSN(GET_IMAGE_INFO, GetImageInfoInstruction) > DECL_INSN(MUL_HI, BinaryInstruction) > diff --git a/backend/src/libocl/include/ocl_misc.h > b/backend/src/libocl/include/ocl_misc.h > index 359025b..7d4abab 100644 > --- a/backend/src/libocl/include/ocl_misc.h > +++ b/backend/src/libocl/include/ocl_misc.h > @@ -136,5 +136,20 @@ struct time_stamp { > uint event; > }; > > +uint __gen_ocl_region(ushort offset, uint data); > + > struct time_stamp __gen_ocl_get_timestamp(void); > + > +uint8 __gen_ocl_vme(image2d_t, image2d_t, > + uint, uint, uint, uint, > + uint, uint, uint, uint, > + uint, uint, uint, uint, > + uint, uint, uint, uint, > + uint, uint, uint, uint, > + uint, uint, uint, uint, > + uint, uint, uint, uint, > + uint, uint, uint, uint, > + uint, uint, uint, uint, > + uint, uint, uint, uint, > + int, int, int); > #endif > diff --git a/backend/src/llvm/llvm_gen_backend.cpp > b/backend/src/llvm/llvm_gen_backend.cpp > index 7299d53..19927ba 100644 > --- a/backend/src/llvm/llvm_gen_backend.cpp > +++ b/backend/src/llvm/llvm_gen_backend.cpp > @@ -3541,6 +3541,7 @@ namespace gbe > case GEN_OCL_REGION: > case GEN_OCL_SIMD_ID: > case GEN_OCL_SIMD_SHUFFLE: > + case GEN_OCL_VME: > this->newRegister(&I); > break; > case GEN_OCL_PRINTF: > @@ -3839,6 +3840,52 @@ namespace gbe > ctx.READ_ARF(ir::TYPE_U32, dst, ir::ARF_TM); > break; > } > + case GEN_OCL_VME: > + { > + > + const uint8_t imageID = getImageID(I); > + > + AI++; > + AI++; > + > + uint32_t src_length = 40; > + > + vector<ir::Register> dstTupleData, srcTupleData; > + for (uint32_t i = 0; i < src_length; i++, AI++){ > + srcTupleData.push_back(this->getRegister(*AI)); > + } > + > + const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], > src_length); > + > + Constant *msg_type_cpv = dyn_cast<Constant>(*AI); > + assert(msg_type_cpv); > + const ir::Immediate &msg_type_x = > processConstantImm(msg_type_cpv); > + int msg_type = msg_type_x.getIntegerValue(); > + uint32_t dst_length; > + //msy_type =1 indicate inter search only of gen vme shared > function > + GBE_ASSERT(msg_type == 1); > + if(msg_type == 1) > + dst_length = 6; > + for (uint32_t elemID = 0; elemID < dst_length; ++elemID) { > + const ir::Register reg = this->getRegister(&I, elemID); > + dstTupleData.push_back(reg); > + } > + const ir::Tuple dstTuple = ctx.arrayTuple(&dstTupleData[0], > dst_length); > + ++AI; > + Constant *vme_search_path_lut_cpv = dyn_cast<Constant>(*AI); > + assert(vme_search_path_lut_cpv); > + const ir::Immediate &vme_search_path_lut_x = > processConstantImm(vme_search_path_lut_cpv); > + ++AI; > + Constant *lut_sub_cpv = dyn_cast<Constant>(*AI); > + assert(lut_sub_cpv); > + const ir::Immediate &lut_sub_x = processConstantImm(lut_sub_cpv); > + > + ctx.VME(imageID, dstTuple, srcTuple, dst_length, src_length, > + msg_type, vme_search_path_lut_x.getIntegerValue(), > + lut_sub_x.getIntegerValue()); > + > + break; > + } > case GEN_OCL_REGION: > { > const ir::Register dst = this->getRegister(&I); > diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx > b/backend/src/llvm/llvm_gen_ocl_function.hxx > index cabb225..3fbf847 100644 > --- a/backend/src/llvm/llvm_gen_ocl_function.hxx > +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx > @@ -168,5 +168,7 @@ DECL_LLVM_GEN_FUNCTION(SIMD_SHUFFLE, > intel_sub_group_shuffle) > DECL_LLVM_GEN_FUNCTION(READ_TM, __gen_ocl_read_tm) > DECL_LLVM_GEN_FUNCTION(REGION, __gen_ocl_region) > > +DECL_LLVM_GEN_FUNCTION(VME, __gen_ocl_vme) > + > // printf function > DECL_LLVM_GEN_FUNCTION(PRINTF, __gen_ocl_printf) > diff --git a/backend/src/llvm/llvm_scalarize.cpp > b/backend/src/llvm/llvm_scalarize.cpp > index 7ee5259..dc1d8ab 100644 > --- a/backend/src/llvm/llvm_scalarize.cpp > +++ b/backend/src/llvm/llvm_scalarize.cpp > @@ -671,6 +671,10 @@ namespace gbe { > *CI = InsertToVector(call, *CI); > break; > } > + case GEN_OCL_VME: > + setAppendPoint(call); > + extractFromVector(call); > + break; > } > } > } > -- > 1.9.1 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
