Pushed, thanks.
> -----Original Message----- > From: Beignet [mailto:[email protected]] On Behalf Of > Song, Ruiling > Sent: Tuesday, November 10, 2015 11:44 > To: Weng, Chuanbo; [email protected] > Cc: Weng, Chuanbo > Subject: Re: [Beignet] [PATCH v6 1/4] Add built-in function __gen_ocl_vme. > > This version patchset LGTM. > > Thanks! > Ruiling > > > -----Original Message----- > > From: Beignet [mailto:[email protected]] On Behalf > > Of Chuanbo Weng > > Sent: Friday, November 6, 2015 11:28 AM > > To: [email protected] > > Cc: Weng, Chuanbo > > Subject: [Beignet] [PATCH v6 1/4] Add built-in function __gen_ocl_vme. > > > > __gen_ocl_vme is used for hardware accelerated video motion estimation. > > It gets payload values as parameters and uses MOV to pass these > > payload values to VME SEND Message's payload grfs. The int8 return > > value is used to store SEND Message writeback. > > > > v2: > > Remove unnecessary 5 parameters(src_grf*) of built-in function(we just > > need to allocate related registers in gen_insn_selection step). > > > > v3: > > Remove redundant code and change MAX_SRC_NUM to 40. > > > > v4: > > Choose message response length by message type instead of hard code. > > > > v5: > > Choose message response length by message type in the whole backend > > pipeline. > > > > v6: > > Treat simd8 and simd16 differently when mov payload value to > > consecutive payload grfs. > > > > Signed-off-by: Chuanbo Weng <[email protected]> > > --- > > backend/src/backend/gen/gen_mesa_disasm.c | 14 ++++ > > backend/src/backend/gen7_instruction.hpp | 15 ++++ > > backend/src/backend/gen_context.cpp | 98 > ++++++++++++++++++++++ > > backend/src/backend/gen_context.hpp | 1 + > > backend/src/backend/gen_defs.hpp | 15 ++++ > > backend/src/backend/gen_encoder.cpp | 44 ++++++++++ > > backend/src/backend/gen_encoder.hpp | 13 +++ > > .../src/backend/gen_insn_gen7_schedule_info.hxx | 1 + > > backend/src/backend/gen_insn_selection.cpp | 73 > ++++++++++++++++ > > backend/src/backend/gen_insn_selection.hpp | 14 +++- > > backend/src/backend/gen_insn_selection.hxx | 1 + > > backend/src/ir/instruction.cpp | 66 +++++++++++++++ > > backend/src/ir/instruction.hpp | 17 +++- > > backend/src/ir/instruction.hxx | 1 + > > backend/src/libocl/include/ocl_misc.h | 15 ++++ > > backend/src/llvm/llvm_gen_backend.cpp | 47 +++++++++++ > > backend/src/llvm/llvm_gen_ocl_function.hxx | 2 + > > backend/src/llvm/llvm_scalarize.cpp | 4 + > > 18 files changed, 436 insertions(+), 5 deletions(-) > > > > diff --git a/backend/src/backend/gen/gen_mesa_disasm.c > > b/backend/src/backend/gen/gen_mesa_disasm.c > > index 5b71cfa..3198da7 100644 > > --- a/backend/src/backend/gen/gen_mesa_disasm.c > > +++ b/backend/src/backend/gen/gen_mesa_disasm.c > > @@ -476,6 +476,13 @@ static int column; > > > > static int gen_version; > > > > +#define GEN7_BITS_FIELD(inst, gen7) \ > > + ({ \ > > + int bits; \ > > + bits = ((const union Gen7NativeInstruction *)inst)->gen7; \ > > + bits; \ > > + }) > > + > > #define GEN_BITS_FIELD(inst, gen) \ > > ({ \ > > int bits; \ > > @@ -530,6 +537,8 @@ static int gen_version; > > #define EXECUTION_SIZE(inst) GEN_BITS_FIELD(inst, > header.execution_size) > > #define BRANCH_JIP(inst) GEN_BITS_FIELD2(inst, > bits3.gen7_branch.jip, > > bits3.gen8_branch.jip/8) > > #define BRANCH_UIP(inst) GEN_BITS_FIELD2(inst, > bits3.gen7_branch.uip, > > bits2.gen8_branch.uip/8) > > +#define VME_BTI(inst) GEN7_BITS_FIELD(inst, > > bits3.vme_gen7.bti) > > +#define VME_MSG_TYPE(inst) GEN7_BITS_FIELD(inst, > > bits3.vme_gen7.msg_type) > > #define SAMPLE_BTI(inst) GEN_BITS_FIELD(inst, > bits3.sampler_gen7.bti) > > #define SAMPLER(inst) GEN_BITS_FIELD(inst, > > bits3.sampler_gen7.sampler) > > #define SAMPLER_MSG_TYPE(inst) GEN_BITS_FIELD(inst, > > bits3.sampler_gen7.msg_type) > > @@ -1431,6 +1440,11 @@ int gen_disasm (FILE *file, const void *inst, > > uint32_t deviceID, uint32_t compac > > > > if (GEN_BITS_FIELD2(inst, bits1.da1.src1_reg_file, > > bits2.da1.src1_reg_file) == > > GEN_IMMEDIATE_VALUE) { > > switch (target) { > > + case GEN_SFID_VIDEO_MOTION_EST: > > + format(file, " (bti: %d, msg_type: %d)", > > + VME_BTI(inst), > > + VME_MSG_TYPE(inst)); > > + break; > > case GEN_SFID_SAMPLER: > > format(file, " (%d, %d, %d, %d)", > > SAMPLE_BTI(inst), > > diff --git a/backend/src/backend/gen7_instruction.hpp > > b/backend/src/backend/gen7_instruction.hpp > > index 51f342b..258dd24 100644 > > --- a/backend/src/backend/gen7_instruction.hpp > > +++ b/backend/src/backend/gen7_instruction.hpp > > @@ -350,6 +350,21 @@ union Gen7NativeInstruction > > uint32_t end_of_thread:1; > > } sampler_gen7; > > > > + struct { > > + uint32_t bti:8; > > + uint32_t vme_search_path_lut:3; > > + uint32_t lut_sub:2; > > + uint32_t msg_type:2; > > + uint32_t stream_in:1; > > + uint32_t stream_out:1; > > + uint32_t reserved_mbz:2; > > + uint32_t header_present:1; > > + uint32_t response_length:5; > > + uint32_t msg_length:4; > > + uint32_t pad1:2; > > + uint32_t end_of_thread:1; > > + } vme_gen7; > > + > > /** > > * Message for the Sandybridge Sampler Cache or Constant Cache Data > Port. > > * > > diff --git a/backend/src/backend/gen_context.cpp > > b/backend/src/backend/gen_context.cpp > > index 4e2ebfb..ccc9f17 100644 > > --- a/backend/src/backend/gen_context.cpp > > +++ b/backend/src/backend/gen_context.cpp > > @@ -2266,6 +2266,104 @@ namespace gbe > > p->SAMPLE(dst, msgPayload, msgLen, false, bti, sampler, > > simdWidth, -1, 0, insn.extra.isLD, insn.extra.isUniform); > > } > > > > + void GenContext::emitVmeInstruction(const SelectionInstruction &insn) > { > > + const GenRegister dst = ra->genReg(insn.dst(0)); > > + const unsigned int msg_type = insn.extra.msg_type; > > + > > + GBE_ASSERT(msg_type == 1); > > + int rsp_len; > > + if(msg_type == 1) > > + rsp_len = 6; > > + uint32_t execWidth_org = p->curr.execWidth; > > + p->push(); > > + p->curr.predicate = GEN_PREDICATE_NONE; > > + p->curr.noMask = 1; > > + p->curr.execWidth = 1; > > + /* Use MOV to Setup bits of payload: mov payload value stored in > > + insn.src(x) > > to > > + * 5 consecutive payload grf. > > + * In simd8 mode, one virtual grf register map to one physical grf > register. > > But > > + * in simd16 mode, one virtual grf register map to two physical grf > registers. > > + * So we should treat them differently. > > + * */ > > + if(execWidth_org == 8){ > > + for(int i=0; i < 5; i++){ > > + GenRegister payload_grf = ra->genReg(insn.dst(rsp_len+i)); > > + payload_grf.vstride = GEN_VERTICAL_STRIDE_0; > > + payload_grf.width = GEN_WIDTH_1; > > + payload_grf.hstride = GEN_HORIZONTAL_STRIDE_0; > > + payload_grf.subphysical = 1; > > + for(int j=0; j < 8; j++){ > > + payload_grf.subnr = (7 - j) * typeSize(GEN_TYPE_UD); > > + GenRegister payload_val = ra->genReg(insn.src(i*8+j)); > > + payload_val.vstride = GEN_VERTICAL_STRIDE_0; > > + payload_val.width = GEN_WIDTH_1; > > + payload_val.hstride = GEN_HORIZONTAL_STRIDE_0; > > + > > + p->MOV(payload_grf, payload_val); > > + } > > + } > > + } > > + else if(execWidth_org == 16){ > > + for(int i=0; i < 2; i++){ > > + for(int k = 0; k < 2; k++){ > > + GenRegister payload_grf = ra->genReg(insn.dst(rsp_len+i)); > > + payload_grf.nr += k; > > + payload_grf.vstride = GEN_VERTICAL_STRIDE_0; > > + payload_grf.width = GEN_WIDTH_1; > > + payload_grf.hstride = GEN_HORIZONTAL_STRIDE_0; > > + payload_grf.subphysical = 1; > > + for(int j=0; j < 8; j++){ > > + payload_grf.subnr = (7 - j) * typeSize(GEN_TYPE_UD); > > + GenRegister payload_val = ra->genReg(insn.src(i*16+k*8+j)); > > + payload_val.vstride = GEN_VERTICAL_STRIDE_0; > > + payload_val.width = GEN_WIDTH_1; > > + payload_val.hstride = GEN_HORIZONTAL_STRIDE_0; > > + > > + p->MOV(payload_grf, payload_val); > > + } > > + } > > + } > > + { > > + int i = 2; > > + GenRegister payload_grf = ra->genReg(insn.dst(rsp_len+i)); > > + payload_grf.vstride = GEN_VERTICAL_STRIDE_0; > > + payload_grf.width = GEN_WIDTH_1; > > + payload_grf.hstride = GEN_HORIZONTAL_STRIDE_0; > > + payload_grf.subphysical = 1; > > + for(int j=0; j < 8; j++){ > > + payload_grf.subnr = (7 - j) * typeSize(GEN_TYPE_UD); > > + GenRegister payload_val = ra->genReg(insn.src(i*16+j)); > > + payload_val.vstride = GEN_VERTICAL_STRIDE_0; > > + payload_val.width = GEN_WIDTH_1; > > + payload_val.hstride = GEN_HORIZONTAL_STRIDE_0; > > + > > + p->MOV(payload_grf, payload_val); > > + } > > + } > > + } > > + p->pop(); > > + > > + p->push(); > > + p->curr.predicate = GEN_PREDICATE_NONE; > > + p->curr.noMask = 1; > > + p->curr.execWidth = 1; > > + GenRegister payload_did = GenRegister::retype(ra- > > >genReg(insn.dst(rsp_len)), GEN_TYPE_UB); > > + payload_did.vstride = GEN_VERTICAL_STRIDE_0; > > + payload_did.width = GEN_WIDTH_1; > > + payload_did.hstride = GEN_HORIZONTAL_STRIDE_0; > > + payload_did.subphysical = 1; > > + payload_did.subnr = 20 * typeSize(GEN_TYPE_UB); > > + GenRegister grf0 = GenRegister::ub1grf(0, 20); > > + p->MOV(payload_did, grf0); > > + p->pop(); > > + > > + const GenRegister msgPayload = ra->genReg(insn.dst(rsp_len)); > > + const unsigned char bti = insn.getbti(); > > + const unsigned int vme_search_path_lut = > insn.extra.vme_search_path_lut; > > + const unsigned int lut_sub = insn.extra.lut_sub; > > + p->VME(bti, dst, msgPayload, msg_type, vme_search_path_lut, > > + lut_sub); } > > + > > void GenContext::scratchWrite(const GenRegister header, uint32_t > > offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode) { > > p->push(); > > uint32_t simdWidth = p->curr.execWidth; diff --git > > a/backend/src/backend/gen_context.hpp > > b/backend/src/backend/gen_context.hpp > > index 4044694..870266c 100644 > > --- a/backend/src/backend/gen_context.hpp > > +++ b/backend/src/backend/gen_context.hpp > > @@ -167,6 +167,7 @@ namespace gbe > > virtual void emitUnpackLongInstruction(const SelectionInstruction > &insn); > > void emitDWordGatherInstruction(const SelectionInstruction &insn); > > void emitSampleInstruction(const SelectionInstruction &insn); > > + void emitVmeInstruction(const SelectionInstruction &insn); > > void emitTypedWriteInstruction(const SelectionInstruction &insn); > > void emitSpillRegInstruction(const SelectionInstruction &insn); > > void emitUnSpillRegInstruction(const SelectionInstruction &insn); > > diff --git a/backend/src/backend/gen_defs.hpp > > b/backend/src/backend/gen_defs.hpp > > index 1b550ac..09cb2ba 100644 > > --- a/backend/src/backend/gen_defs.hpp > > +++ b/backend/src/backend/gen_defs.hpp > > @@ -615,6 +615,21 @@ union GenNativeInstruction > > uint32_t end_of_thread:1; > > } sampler_gen7; > > > > + struct { > > + uint32_t bti:8; > > + uint32_t vme_search_path_lut:3; > > + uint32_t lut_sub:2; > > + uint32_t msg_type:2; > > + uint32_t stream_in:1; > > + uint32_t stream_out:1; > > + uint32_t reserved_mbz:2; > > + uint32_t header_present:1; > > + uint32_t response_length:5; > > + uint32_t msg_length:4; > > + uint32_t pad1:2; > > + uint32_t end_of_thread:1; > > + } vme_gen7; > > + > > /** > > * Message for the Sandybridge Sampler Cache or Constant Cache Data > Port. > > * > > diff --git a/backend/src/backend/gen_encoder.cpp > > b/backend/src/backend/gen_encoder.cpp > > index 2cc51cc..be38cef 100644 > > --- a/backend/src/backend/gen_encoder.cpp > > +++ b/backend/src/backend/gen_encoder.cpp > > @@ -1191,6 +1191,50 @@ namespace gbe > > simd_mode, return_format); > > } > > > > + void GenEncoder::setVmeMessage(GenNativeInstruction *insn, > > + unsigned char bti, > > + uint32_t response_length, > > + uint32_t msg_length, > > + uint32_t msg_type, > > + unsigned char vme_search_path_lut, > > + unsigned char lut_sub) { > > + const GenMessageTarget sfid = GEN_SFID_VIDEO_MOTION_EST; > > + setMessageDescriptor(insn, sfid, msg_length, response_length, true); > > + insn->bits3.vme_gen7.bti = bti; > > + insn->bits3.vme_gen7.vme_search_path_lut = vme_search_path_lut; > > + insn->bits3.vme_gen7.lut_sub = lut_sub; > > + insn->bits3.vme_gen7.msg_type = msg_type; > > + insn->bits3.vme_gen7.stream_in = 0; > > + insn->bits3.vme_gen7.stream_out = 0; > > + insn->bits3.vme_gen7.reserved_mbz = 0; > > + > > + } > > + > > + void GenEncoder::VME(unsigned char bti, > > + GenRegister dest, > > + GenRegister msg, > > + uint32_t msg_type, > > + uint32_t vme_search_path_lut, > > + uint32_t lut_sub) { > > + /* Currectly we just support inter search only, we will support other > > + * modes in future. > > + */ > > + GBE_ASSERT(msg_type == 1); > > + uint32_t msg_length, response_length; > > + if(msg_type == 1){ > > + msg_length = 5; > > + response_length = 6; > > + } > > + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); > > + this->setHeader(insn); > > + this->setDst(insn, dest); > > + this->setSrc0(insn, msg); > > + setVmeMessage(insn, bti, response_length, msg_length, > > + msg_type, vme_search_path_lut, lut_sub); } > > + > > void GenEncoder::TYPED_WRITE(GenRegister msg, bool header_present, > > unsigned char bti) > > { > > GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); diff > > --git a/backend/src/backend/gen_encoder.hpp > > b/backend/src/backend/gen_encoder.hpp > > index f2bb5ab..6df7087 100644 > > --- a/backend/src/backend/gen_encoder.hpp > > +++ b/backend/src/backend/gen_encoder.hpp > > @@ -203,6 +203,19 @@ namespace gbe > > bool header_present, > > uint32_t simd_mode, > > uint32_t return_format); > > + virtual void VME(unsigned char bti, > > + GenRegister dest, > > + GenRegister msg, > > + uint32_t msg_type, > > + uint32_t vme_search_path_lut, > > + uint32_t lut_sub); > > + void setVmeMessage(GenNativeInstruction *insn, > > + unsigned char bti, > > + uint32_t response_length, > > + uint32_t msg_length, > > + uint32_t msg_type, > > + unsigned char vme_search_path_lut, > > + unsigned char lut_sub); > > > > /*! TypedWrite instruction for texture */ > > virtual void TYPED_WRITE(GenRegister header, diff --git > > a/backend/src/backend/gen_insn_gen7_schedule_info.hxx > > b/backend/src/backend/gen_insn_gen7_schedule_info.hxx > > index 9b60c17..878e0e7 100644 > > --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx > > +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx > > @@ -36,6 +36,7 @@ DECL_GEN7_SCHEDULE(UnpackByte, 40, 1, > > 1) > > DECL_GEN7_SCHEDULE(PackLong, 40, 1, 1) > > DECL_GEN7_SCHEDULE(UnpackLong, 40, 1, 1) > > DECL_GEN7_SCHEDULE(Sample, 160, 1, 1) > > +DECL_GEN7_SCHEDULE(Vme, 320, 1, 1) > > DECL_GEN7_SCHEDULE(TypedWrite, 80, 1, 1) > > DECL_GEN7_SCHEDULE(SpillReg, 20, 1, 1) > > DECL_GEN7_SCHEDULE(UnSpillReg, 160, 1, 1) > > diff --git a/backend/src/backend/gen_insn_selection.cpp > > b/backend/src/backend/gen_insn_selection.cpp > > index 2452aea..cfaa792 100644 > > --- a/backend/src/backend/gen_insn_selection.cpp > > +++ b/backend/src/backend/gen_insn_selection.cpp > > @@ -187,6 +187,7 @@ namespace gbe > > this->opcode == SEL_OP_ATOMIC || > > this->opcode == SEL_OP_BYTE_GATHER || > > this->opcode == SEL_OP_SAMPLE || > > + this->opcode == SEL_OP_VME || > > this->opcode == SEL_OP_DWORD_GATHER; > > } > > > > @@ -661,6 +662,8 @@ namespace gbe > > void ALU3(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg > src2); > > /*! Encode sample instructions */ > > void SAMPLE(GenRegister *dst, uint32_t dstNum, GenRegister > > *msgPayloads, uint32_t msgNum, uint32_t bti, uint32_t sampler, bool > > isLD, bool isUniform); > > + /*! Encode vme instructions */ > > + void VME(uint32_t bti, GenRegister *dst, GenRegister *payloadVal, > > + uint32_t > > dstNum, uint32_t srcNum, uint32_t msg_type, uint32_t > > vme_search_path_lut, uint32_t lut_sub); > > /*! Encode typed write instructions */ > > void TYPED_WRITE(GenRegister *msgs, uint32_t msgNum, uint32_t > > bti, bool is3D); > > /*! Get image information */ > > @@ -2120,6 +2123,34 @@ namespace gbe > > insn->extra.isUniform = isUniform; > > } > > > > + void Selection::Opaque::VME(uint32_t bti, GenRegister *dst, > > + GenRegister > > *payloadVal, > > + uint32_t dstNum, uint32_t srcNum, uint32_t > > msg_type, > > + uint32_t vme_search_path_lut, uint32_t > > lut_sub) { > > + SelectionInstruction *insn = this->appendInsn(SEL_OP_VME, dstNum, > > srcNum); > > + SelectionVector *dstVector = this->appendVector(); > > + SelectionVector *msgVector = this->appendVector(); > > + > > + for (uint32_t elemID = 0; elemID < dstNum; ++elemID) > > + insn->dst(elemID) = dst[elemID]; > > + for (uint32_t elemID = 0; elemID < srcNum; ++elemID) > > + insn->src(elemID) = payloadVal[elemID]; > > + > > + dstVector->regNum = dstNum; > > + dstVector->isSrc = 0; > > + dstVector->offsetID = 0; > > + dstVector->reg = &insn->dst(0); > > + > > + msgVector->regNum = srcNum; > > + msgVector->isSrc = 1; > > + msgVector->offsetID = 0; > > + msgVector->reg = &insn->src(0); > > + > > + insn->setbti(bti); > > + insn->extra.msg_type = msg_type; > > + insn->extra.vme_search_path_lut = vme_search_path_lut; > > + insn->extra.lut_sub = lut_sub; > > + } > > + > > > > /////////////////////////////////////////////////////////////////////////// > > // Code selection public implementation > > > > ////////////////////////////////////////////////////////////////////// > > ///// > > @@ -5126,6 +5157,47 @@ namespace gbe > > DECL_CTOR(SampleInstruction, 1, 1); > > }; > > > > + DECL_PATTERN(VmeInstruction) > > + { > > + INLINE bool emitOne(Selection::Opaque &sel, const > > + ir::VmeInstruction &insn, > > bool &markChildren) const > > + { > > + using namespace ir; > > + uint32_t msg_type, vme_search_path_lut, lut_sub; > > + msg_type = insn.getMsgType(); > > + vme_search_path_lut = 0; > > + lut_sub = 0; > > + GBE_ASSERT(msg_type == 1); > > + uint32_t payloadLen = 0; > > + //We allocate 5 virtual payload grfs to selection dst register. > > + if(msg_type == 1){ > > + payloadLen = 5; > > + } > > + uint32_t selDstNum = insn.getDstNum() + payloadLen; > > + uint32_t srcNum = insn.getSrcNum(); > > + vector<GenRegister> dst(selDstNum); > > + vector<GenRegister> payloadVal(srcNum); > > + uint32_t valueID = 0; > > + for (valueID = 0; valueID < insn.getDstNum(); ++valueID) > > + dst[valueID] = sel.selReg(insn.getDst(valueID), insn.getDstType()); > > + for (valueID = insn.getDstNum(); valueID < selDstNum; ++valueID) > > + dst[valueID] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); > > + > > + for (valueID = 0; valueID < srcNum; ++valueID) > > + payloadVal[valueID] = sel.selReg(insn.getSrc(valueID), > > + insn.getSrcType()); > > + > > + uint32_t bti = insn.getImageIndex(); > > + if (bti > BTI_MAX_ID) { > > + std::cerr << "Too large bti " << bti; > > + return false; > > + } > > + > > + sel.VME(bti, dst.data(), payloadVal.data(), selDstNum, srcNum, > > + msg_type, > > vme_search_path_lut, lut_sub); > > + > > + return true; > > + } > > + DECL_CTOR(VmeInstruction, 1, 1); > > + }; > > + > > /*! Typed write instruction pattern. */ > > DECL_PATTERN(TypedWriteInstruction) > > { > > @@ -5591,6 +5663,7 @@ namespace gbe > > this->insert<MulAddInstructionPattern>(); > > this->insert<SelectModifierInstructionPattern>(); > > this->insert<SampleInstructionPattern>(); > > + this->insert<VmeInstructionPattern>(); > > this->insert<GetImageInfoInstructionPattern>(); > > this->insert<ReadARFInstructionPattern>(); > > this->insert<RegionInstructionPattern>(); > > diff --git a/backend/src/backend/gen_insn_selection.hpp > > b/backend/src/backend/gen_insn_selection.hpp > > index f51c905..578db41 100644 > > --- a/backend/src/backend/gen_insn_selection.hpp > > +++ b/backend/src/backend/gen_insn_selection.hpp > > @@ -90,8 +90,8 @@ namespace gbe > > const GenRegister &dst(uint32_t dstID) const { return regs[dstID]; } > > /*! Damn C++ */ > > const GenRegister &src(uint32_t srcID) const { return > regs[dstNum+srcID]; } > > - /*! No more than 9 sources (used by typed writes on simd8 mode.) */ > > - enum { MAX_SRC_NUM = 9 }; > > + /*! No more than 40 sources (40 sources are used by vme for > > + payload > > passing and setting) */ > > + enum { MAX_SRC_NUM = 40 }; > > /*! No more than 16 destinations (15 used by I64DIV/I64REM) */ > > enum { MAX_DST_NUM = 16 }; > > /*! State of the instruction (extra fields neeed for the > > encoding) */ @@ -129,6 +129,12 @@ namespace gbe > > bool isLD; // is this a ld message? > > bool isUniform; > > }; > > + struct { > > + uint16_t vme_bti:8; > > + uint16_t msg_type:2; > > + uint16_t vme_search_path_lut:3; > > + uint16_t lut_sub:2; > > + }; > > uint32_t barrierType; > > bool longjmp; > > uint32_t indirect_offset; > > @@ -138,7 +144,7 @@ namespace gbe > > /*! Number of destinations */ > > uint8_t dstNum:5; > > /*! Number of sources */ > > - uint8_t srcNum:4; > > + uint8_t srcNum:6; > > /*! To store various indices */ > > uint32_t index; > > /*! For BRC/IF to store the UIP */ @@ -152,6 +158,7 @@ namespace > > gbe > > switch (opcode) { > > case SEL_OP_DWORD_GATHER: return extra.function; > > case SEL_OP_SAMPLE: return extra.rdbti; > > + case SEL_OP_VME: return extra.vme_bti; > > case SEL_OP_TYPED_WRITE: return extra.bti; > > default: > > GBE_ASSERT(0); > > @@ -164,6 +171,7 @@ namespace gbe > > switch (opcode) { > > case SEL_OP_DWORD_GATHER: extra.function = bti; return; > > case SEL_OP_SAMPLE: extra.rdbti = bti; return; > > + case SEL_OP_VME: extra.vme_bti = bti; return; > > case SEL_OP_TYPED_WRITE: extra.bti = bti; return; > > default: > > GBE_ASSERT(0); > > diff --git a/backend/src/backend/gen_insn_selection.hxx > > b/backend/src/backend/gen_insn_selection.hxx > > index 479398b..4d3e921 100644 > > --- a/backend/src/backend/gen_insn_selection.hxx > > +++ b/backend/src/backend/gen_insn_selection.hxx > > @@ -65,6 +65,7 @@ DECL_SELECTION_IR(UNPACK_BYTE, > > UnpackByteInstruction) > > DECL_SELECTION_IR(PACK_LONG, PackLongInstruction) > > DECL_SELECTION_IR(UNPACK_LONG, UnpackLongInstruction) > > DECL_SELECTION_IR(SAMPLE, SampleInstruction) > > +DECL_SELECTION_IR(VME, VmeInstruction) > > DECL_SELECTION_IR(TYPED_WRITE, TypedWriteInstruction) > > DECL_SELECTION_IR(SPILL_REG, SpillRegInstruction) > > DECL_SELECTION_IR(UNSPILL_REG, UnSpillRegInstruction) diff --git > > a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp > > index f93c528..7bf787e 100644 > > --- a/backend/src/ir/instruction.cpp > > +++ b/backend/src/ir/instruction.cpp > > @@ -595,6 +595,58 @@ namespace ir { > > static const uint32_t dstNum = 4; > > }; > > > > + class ALIGNED_INSTRUCTION VmeInstruction : > > + public BasePolicy, > > + public TupleSrcPolicy<VmeInstruction>, > > + public TupleDstPolicy<VmeInstruction> > > + { > > + public: > > + VmeInstruction(uint8_t imageIdx, Tuple dstTuple, Tuple srcTuple, > > + uint32_t dstNum, uint32_t srcNum, int msg_type, > > + int vme_search_path_lut, int lut_sub) { > > + this->opcode = OP_VME; > > + this->dst = dstTuple; > > + this->src = srcTuple; > > + this->dstNum = dstNum; > > + this->srcNum = srcNum; > > + this->imageIdx = imageIdx; > > + this->msg_type = msg_type; > > + this->vme_search_path_lut = vme_search_path_lut; > > + this->lut_sub = lut_sub; > > + } > > + INLINE bool wellFormed(const Function &fn, std::string &why) const; > > + INLINE void out(std::ostream &out, const Function &fn) const { > > + this->outOpcode(out); > > + out << " src_surface id " << (int)this->getImageIndex() > > + << " ref_surface id " << (int)this->getImageIndex() + 1; > > + for(uint32_t i = 0; i < dstNum; i++){ > > + out<< " %" << this->getDst(fn, i); > > + } > > + for(uint32_t i = 0; i < srcNum; i++){ > > + out<< " %" << this->getSrc(fn, i); > > + } > > + out > > + << " msg_type " << (int)this->getMsgType() > > + << " vme_search_path_lut " << (int)this->vme_search_path_lut > > + << " lut_sub " << (int)this->lut_sub; > > + } > > + Tuple src; > > + Tuple dst; > > + > > + INLINE uint8_t getImageIndex(void) const { return this->imageIdx; } > > + INLINE uint8_t getMsgType(void) const { return this->msg_type; > > + } > > + > > + INLINE Type getSrcType(void) const { return TYPE_U32; } > > + INLINE Type getDstType(void) const { return TYPE_U32; } > > + uint8_t imageIdx; > > + uint8_t msg_type; > > + uint8_t vme_search_path_lut; > > + uint8_t lut_sub; > > + uint32_t srcNum; > > + uint32_t dstNum; > > + }; > > + > > + > > class ALIGNED_INSTRUCTION TypedWriteInstruction : // TODO > > public BasePolicy, > > public TupleSrcPolicy<TypedWriteInstruction>, > > @@ -1111,6 +1163,8 @@ namespace ir { > > // TODO > > INLINE bool SampleInstruction::wellFormed(const Function &fn, > > std::string > > &why) const > > { return true; } > > + INLINE bool VmeInstruction::wellFormed(const Function &fn, > > + std::string > > &why) const > > + { return true; } > > INLINE bool TypedWriteInstruction::wellFormed(const Function &fn, > > std::string &why) const > > { return true; } > > INLINE bool GetImageInfoInstruction::wellFormed(const Function > > &fn, std::string &why) const @@ -1502,6 +1556,10 @@ > > START_INTROSPECTION(LabelInstruction) > > #include "ir/instruction.hxx" > > END_INTROSPECTION(LabelInstruction) > > > > +START_INTROSPECTION(VmeInstruction) > > +#include "ir/instruction.hxx" > > +END_INTROSPECTION(VmeInstruction) > > + > > #undef END_INTROSPECTION > > #undef START_INTROSPECTION > > #undef DECL_INSN > > @@ -1694,6 +1752,10 @@ DECL_MEM_FN(SampleInstruction, Type, > > getDstType(void), getDstType()) DECL_MEM_FN(SampleInstruction, > > uint8_t, getSamplerIndex(void), > > getSamplerIndex()) > > DECL_MEM_FN(SampleInstruction, uint8_t, getSamplerOffset(void), > > getSamplerOffset()) > > DECL_MEM_FN(SampleInstruction, uint8_t, getImageIndex(void), > > getImageIndex()) > > +DECL_MEM_FN(VmeInstruction, Type, getSrcType(void), getSrcType()) > > +DECL_MEM_FN(VmeInstruction, Type, getDstType(void), getDstType()) > > +DECL_MEM_FN(VmeInstruction, uint8_t, getImageIndex(void), > > getImageIndex()) > > +DECL_MEM_FN(VmeInstruction, uint8_t, getMsgType(void), > getMsgType()) > > DECL_MEM_FN(TypedWriteInstruction, Type, getSrcType(void), > > getSrcType()) DECL_MEM_FN(TypedWriteInstruction, Type, > > getCoordType(void), > > getCoordType()) > > DECL_MEM_FN(TypedWriteInstruction, uint8_t, getImageIndex(void), > > getImageIndex()) > > @@ -1932,6 +1994,10 @@ DECL_MEM_FN(GetImageInfoInstruction, > uint8_t, > > getImageIndex(void), getImageIndex > > return internal::SampleInstruction(imageIndex, dst, src, srcNum, > > dstIsFloat, srcIsFloat, sampler, samplerOffset).convert(); > > } > > > > + Instruction VME(uint8_t imageIndex, Tuple dst, Tuple src, uint32_t > > + dstNum, > > uint32_t srcNum, int msg_type, int vme_search_path_lut, int lut_sub) { > > + return internal::VmeInstruction(imageIndex, dst, src, dstNum, > > + srcNum, > > msg_type, vme_search_path_lut, lut_sub).convert(); > > + } > > + > > Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, uint8_t > > srcNum, Type srcType, Type coordType) { > > return internal::TypedWriteInstruction(imageIndex, src, srcNum, > > srcType, coordType).convert(); > > } > > diff --git a/backend/src/ir/instruction.hpp > > b/backend/src/ir/instruction.hpp index 3f3c655..c8da416 100644 > > --- a/backend/src/ir/instruction.hpp > > +++ b/backend/src/ir/instruction.hpp > > @@ -191,8 +191,8 @@ namespace ir { > > template <typename T> INLINE bool isMemberOf(void) const { > > return T::isClassOf(*this); > > } > > - /*! max_src for store instruction (vec16 + addr) */ > > - static const uint32_t MAX_SRC_NUM = 32; > > + /*! max_src used by vme for payload passing and setting */ > > + static const uint32_t MAX_SRC_NUM = 40; > > static const uint32_t MAX_DST_NUM = 32; > > protected: > > BasicBlock *parent; //!< The basic block containing the > > instruction > > @@ -399,6 +399,17 @@ namespace ir { > > static bool isClassOf(const Instruction &insn); > > }; > > > > + /*! Video motion estimation */ > > + class VmeInstruction : public Instruction { > > + public: > > + uint8_t getImageIndex() const; > > + uint8_t getMsgType() const; > > + Type getSrcType(void) const; > > + Type getDstType(void) const; > > + /*! Return true if the given instruction is an instance of this class > > */ > > + static bool isClassOf(const Instruction &insn); }; > > + > > typedef union _ImageInfoKey{ > > _ImageInfoKey(uint8_t i, uint8_t t) : index(i), type(t) {}; > > _ImageInfoKey(int key) : data(key) {}; @@ -756,6 +767,8 @@ > > namespace ir { > > Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, uint8_t > > srcNum, Type srcType, Type coordType); > > /*! sample textures */ > > Instruction SAMPLE(uint8_t imageIndex, Tuple dst, Tuple src, > > uint8_t srcNum, bool dstIsFloat, bool srcIsFloat, uint8_t sampler, > > uint8_t samplerOffset); > > + /*! video motion estimation */ > > + Instruction VME(uint8_t imageIndex, Tuple dst, Tuple src, uint32_t > > + dstNum, > > uint32_t srcNum, int msg_type, int vme_search_path_lut, int lut_sub); > > /*! get image information , such as width/height/depth/... */ > > Instruction GET_IMAGE_INFO(int infoType, Register dst, uint8_t > > imageIndex, Register infoReg); > > /*! label labelIndex */ > > diff --git a/backend/src/ir/instruction.hxx > > b/backend/src/ir/instruction.hxx index 81548c9..27d59a9 100644 > > --- a/backend/src/ir/instruction.hxx > > +++ b/backend/src/ir/instruction.hxx > > @@ -85,6 +85,7 @@ DECL_INSN(SYNC, SyncInstruction) DECL_INSN(LABEL, > > LabelInstruction) DECL_INSN(READ_ARF, ReadARFInstruction) > > DECL_INSN(REGION, RegionInstruction) > > +DECL_INSN(VME, VmeInstruction) > > DECL_INSN(INDIRECT_MOV, IndirectMovInstruction) > > DECL_INSN(GET_IMAGE_INFO, GetImageInfoInstruction) > DECL_INSN(MUL_HI, > > BinaryInstruction) diff --git a/backend/src/libocl/include/ocl_misc.h > > b/backend/src/libocl/include/ocl_misc.h > > index 359025b..7d4abab 100644 > > --- a/backend/src/libocl/include/ocl_misc.h > > +++ b/backend/src/libocl/include/ocl_misc.h > > @@ -136,5 +136,20 @@ struct time_stamp { > > uint event; > > }; > > > > +uint __gen_ocl_region(ushort offset, uint data); > > + > > struct time_stamp __gen_ocl_get_timestamp(void); > > + > > +uint8 __gen_ocl_vme(image2d_t, image2d_t, > > + uint, uint, uint, uint, > > + uint, uint, uint, uint, > > + uint, uint, uint, uint, > > + uint, uint, uint, uint, > > + uint, uint, uint, uint, > > + uint, uint, uint, uint, > > + uint, uint, uint, uint, > > + uint, uint, uint, uint, > > + uint, uint, uint, uint, > > + uint, uint, uint, uint, > > + int, int, int); > > #endif > > diff --git a/backend/src/llvm/llvm_gen_backend.cpp > > b/backend/src/llvm/llvm_gen_backend.cpp > > index 7299d53..19927ba 100644 > > --- a/backend/src/llvm/llvm_gen_backend.cpp > > +++ b/backend/src/llvm/llvm_gen_backend.cpp > > @@ -3541,6 +3541,7 @@ namespace gbe > > case GEN_OCL_REGION: > > case GEN_OCL_SIMD_ID: > > case GEN_OCL_SIMD_SHUFFLE: > > + case GEN_OCL_VME: > > this->newRegister(&I); > > break; > > case GEN_OCL_PRINTF: > > @@ -3839,6 +3840,52 @@ namespace gbe > > ctx.READ_ARF(ir::TYPE_U32, dst, ir::ARF_TM); > > break; > > } > > + case GEN_OCL_VME: > > + { > > + > > + const uint8_t imageID = getImageID(I); > > + > > + AI++; > > + AI++; > > + > > + uint32_t src_length = 40; > > + > > + vector<ir::Register> dstTupleData, srcTupleData; > > + for (uint32_t i = 0; i < src_length; i++, AI++){ > > + srcTupleData.push_back(this->getRegister(*AI)); > > + } > > + > > + const ir::Tuple srcTuple = > > + ctx.arrayTuple(&srcTupleData[0], src_length); > > + > > + Constant *msg_type_cpv = dyn_cast<Constant>(*AI); > > + assert(msg_type_cpv); > > + const ir::Immediate &msg_type_x = > > processConstantImm(msg_type_cpv); > > + int msg_type = msg_type_x.getIntegerValue(); > > + uint32_t dst_length; > > + //msy_type =1 indicate inter search only of gen vme shared > function > > + GBE_ASSERT(msg_type == 1); > > + if(msg_type == 1) > > + dst_length = 6; > > + for (uint32_t elemID = 0; elemID < dst_length; ++elemID) { > > + const ir::Register reg = this->getRegister(&I, elemID); > > + dstTupleData.push_back(reg); > > + } > > + const ir::Tuple dstTuple = ctx.arrayTuple(&dstTupleData[0], > dst_length); > > + ++AI; > > + Constant *vme_search_path_lut_cpv = dyn_cast<Constant>(*AI); > > + assert(vme_search_path_lut_cpv); > > + const ir::Immediate &vme_search_path_lut_x = > > processConstantImm(vme_search_path_lut_cpv); > > + ++AI; > > + Constant *lut_sub_cpv = dyn_cast<Constant>(*AI); > > + assert(lut_sub_cpv); > > + const ir::Immediate &lut_sub_x = > > + processConstantImm(lut_sub_cpv); > > + > > + ctx.VME(imageID, dstTuple, srcTuple, dst_length, src_length, > > + msg_type, vme_search_path_lut_x.getIntegerValue(), > > + lut_sub_x.getIntegerValue()); > > + > > + break; > > + } > > case GEN_OCL_REGION: > > { > > const ir::Register dst = this->getRegister(&I); diff > > --git a/backend/src/llvm/llvm_gen_ocl_function.hxx > > b/backend/src/llvm/llvm_gen_ocl_function.hxx > > index cabb225..3fbf847 100644 > > --- a/backend/src/llvm/llvm_gen_ocl_function.hxx > > +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx > > @@ -168,5 +168,7 @@ DECL_LLVM_GEN_FUNCTION(SIMD_SHUFFLE, > > intel_sub_group_shuffle) > > DECL_LLVM_GEN_FUNCTION(READ_TM, __gen_ocl_read_tm) > > DECL_LLVM_GEN_FUNCTION(REGION, __gen_ocl_region) > > > > +DECL_LLVM_GEN_FUNCTION(VME, __gen_ocl_vme) > > + > > // printf function > > DECL_LLVM_GEN_FUNCTION(PRINTF, __gen_ocl_printf) diff --git > > a/backend/src/llvm/llvm_scalarize.cpp > > b/backend/src/llvm/llvm_scalarize.cpp > > index 7ee5259..dc1d8ab 100644 > > --- a/backend/src/llvm/llvm_scalarize.cpp > > +++ b/backend/src/llvm/llvm_scalarize.cpp > > @@ -671,6 +671,10 @@ namespace gbe { > > *CI = InsertToVector(call, *CI); > > break; > > } > > + case GEN_OCL_VME: > > + setAppendPoint(call); > > + extractFromVector(call); > > + break; > > } > > } > > } > > -- > > 1.9.1 > > > > _______________________________________________ > > Beignet mailing list > > [email protected] > > http://lists.freedesktop.org/mailman/listinfo/beignet > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
