The patchset LGTM, pushed, thanks.
> -----Original Message----- > From: Beignet [mailto:[email protected]] On Behalf Of > Xiuli Pan > Sent: Sunday, June 12, 2016 5:33 > To: [email protected] > Cc: Pan, Xiuli <[email protected]> > Subject: [Beignet] [PATCH V3 11/12] Backend: Add > intel_sub_group_block_read/write form image > > From: Pan Xiuli <[email protected]> > > Using meida block read/write to read data in block. In simd16 mode the > need some reg relocation for later use. > GEN7 has some different data port. > V2: Refine block read simd16 with tmp reg to avoide MOVs > V3: Fix build bug with clang. > > Signed-off-by: Pan Xiuli <[email protected]> > --- > backend/src/backend/gen/gen_mesa_disasm.c | 27 +++- > backend/src/backend/gen7_encoder.cpp | 48 +++++++ > backend/src/backend/gen7_encoder.hpp | 4 + > backend/src/backend/gen7_instruction.hpp | 16 +++ > backend/src/backend/gen8_instruction.hpp | 16 +++ > backend/src/backend/gen_context.cpp | 155 > +++++++++++++++++++++ > backend/src/backend/gen_context.hpp | 2 + > backend/src/backend/gen_defs.hpp | 16 +++ > backend/src/backend/gen_encoder.cpp | 47 +++++++ > backend/src/backend/gen_encoder.hpp | 4 + > .../src/backend/gen_insn_gen7_schedule_info.hxx | 2 + > backend/src/backend/gen_insn_selection.cpp | 115 ++++++++++++++- > backend/src/backend/gen_insn_selection.hpp | 4 + > backend/src/backend/gen_insn_selection.hxx | 2 + > backend/src/ir/instruction.cpp | 112 ++++++++++++++- > backend/src/ir/instruction.hpp | 22 +++ > backend/src/ir/instruction.hxx | 2 + > backend/src/ir/liveness.cpp | 3 +- > backend/src/libocl/src/ocl_substore.ll | 33 +++++ > backend/src/libocl/tmpl/ocl_simd.tmpl.cl | 21 +++ > backend/src/libocl/tmpl/ocl_simd.tmpl.h | 10 ++ > backend/src/llvm/llvm_gen_backend.cpp | 62 ++++++++- > backend/src/llvm/llvm_gen_ocl_function.hxx | 8 ++ > backend/src/llvm/llvm_scalarize.cpp | 14 ++ > 24 files changed, 732 insertions(+), 13 deletions(-) > > diff --git a/backend/src/backend/gen/gen_mesa_disasm.c > b/backend/src/backend/gen/gen_mesa_disasm.c > index 9200c26..9955dfc 100644 > --- a/backend/src/backend/gen/gen_mesa_disasm.c > +++ b/backend/src/backend/gen/gen_mesa_disasm.c > @@ -1476,6 +1476,15 @@ int gen_disasm (FILE *file, const void *inst, > uint32_t deviceID, uint32_t compac > SAMPLER_MSG_TYPE(inst), > SAMPLER_SIMD_MODE(inst)); > break; > + case GEN_SFID_DATAPORT_RENDER: > + if(UNTYPED_RW_MSG_TYPE(inst) == 4 || > UNTYPED_RW_MSG_TYPE(inst) == 10) > + format(file, " (bti: %d, %s, %s)", > + UNTYPED_RW_BTI(inst), > + > data_port_data_cache_category[UNTYPED_RW_CATEGORY(inst)], > + > data_port1_data_cache_msg_type[UNTYPED_RW_MSG_TYPE(inst)]); > + else > + format(file, " not implemented"); > + break; > case GEN_SFID_DATAPORT_DATA: > if(UNTYPED_RW_CATEGORY(inst) == 0) { > if(UNTYPED_RW_MSG_TYPE(inst) == 5 || > UNTYPED_RW_MSG_TYPE(inst) == 13) > @@ -1510,12 +1519,18 @@ int gen_disasm (FILE *file, const void *inst, > uint32_t deviceID, uint32_t compac > } > break; > case GEN_SFID_DATAPORT1_DATA: > - format(file, " (bti: %d, rgba: %d, %s, %s, %s)", > - UNTYPED_RW_BTI(inst), > - UNTYPED_RW_RGBA(inst), > - > data_port_data_cache_simd_mode[UNTYPED_RW_SIMD_MODE(inst)], > - data_port_data_cache_category[UNTYPED_RW_CATEGORY(inst)], > - > data_port1_data_cache_msg_type[UNTYPED_RW_MSG_TYPE(inst)]); > + if(UNTYPED_RW_MSG_TYPE(inst) == 4 || > UNTYPED_RW_MSG_TYPE(inst) == 10) > + format(file, " (bti: %d, %s, %s)", > + UNTYPED_RW_BTI(inst), > + > data_port_data_cache_category[UNTYPED_RW_CATEGORY(inst)], > + > data_port1_data_cache_msg_type[UNTYPED_RW_MSG_TYPE(inst)]); > + else > + format(file, " (bti: %d, rgba: %d, %s, %s, %s)", > + UNTYPED_RW_BTI(inst), > + UNTYPED_RW_RGBA(inst), > + > data_port_data_cache_simd_mode[UNTYPED_RW_SIMD_MODE(inst)], > + > data_port_data_cache_category[UNTYPED_RW_CATEGORY(inst)], > + > data_port1_data_cache_msg_type[UNTYPED_RW_MSG_TYPE(inst)]); > break; > case GEN_SFID_DATAPORT_CONSTANT: > format(file, " (bti: %d, %s)", > diff --git a/backend/src/backend/gen7_encoder.cpp > b/backend/src/backend/gen7_encoder.cpp > index fc358be..abb8b77 100644 > --- a/backend/src/backend/gen7_encoder.cpp > +++ b/backend/src/backend/gen7_encoder.cpp > @@ -239,5 +239,53 @@ namespace gbe > } > } > > + static void setMBlockRWGEN7(GenEncoder *p, > + GenNativeInstruction *insn, > + uint32_t bti, > + uint32_t msg_type, > + uint32_t msg_length, > + uint32_t response_length) > + { > + const GenMessageTarget sfid = GEN_SFID_DATAPORT_RENDER; > + p->setMessageDescriptor(insn, sfid, msg_length, response_length); > + insn->bits3.gen7_mblock_rw.msg_type = msg_type; > + insn->bits3.gen7_mblock_rw.bti = bti; > + insn->bits3.gen7_mblock_rw.header_present = 1; > + } > + > + > + void Gen7Encoder::MBREAD(GenRegister dst, GenRegister header, > uint32_t bti, uint32_t size) { > + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); > + const uint32_t msg_length = 1; > + const uint32_t response_length = size; // Size of registers > + this->setHeader(insn); > + this->setDst(insn, GenRegister::ud8grf(dst.nr, 0)); > + this->setSrc0(insn, GenRegister::ud8grf(header.nr, 0)); > + this->setSrc1(insn, GenRegister::immud(0)); > + setMBlockRWGEN7(this, > + insn, > + bti, > + GEN75_P1_MEDIA_BREAD, > + msg_length, > + response_length); > + } > + > + void Gen7Encoder::MBWRITE(GenRegister header, uint32_t bti, uint32_t > size) { > + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); > + const uint32_t msg_length = 1 + size; > + const uint32_t response_length = 0; // Size of registers > + this->setHeader(insn); > + this->setDst(insn, GenRegister::retype(GenRegister::null(), > GEN_TYPE_UW)); > + this->setSrc0(insn, GenRegister::ud8grf(header.nr, 0)); > + this->setSrc1(insn, GenRegister::immud(0)); > + setMBlockRWGEN7(this, > + insn, > + bti, > + GEN75_P1_MEDIA_TYPED_BWRITE, > + msg_length, > + response_length); > + } > + > + > #undef NO_SWIZZLE > } > diff --git a/backend/src/backend/gen7_encoder.hpp > b/backend/src/backend/gen7_encoder.hpp > index 1276c67..edb711d 100644 > --- a/backend/src/backend/gen7_encoder.hpp > +++ b/backend/src/backend/gen7_encoder.hpp > @@ -42,6 +42,10 @@ namespace gbe > virtual void setSrc1(GenNativeInstruction *insn, GenRegister reg); > virtual void alu3(uint32_t opcode, GenRegister dst, > GenRegister src0, GenRegister src1, GenRegister src2); > + /*! MBlock read */ > + virtual void MBREAD(GenRegister dst, GenRegister header, uint32_t bti, > uint32_t elemSize); > + /*! MBlock write */ > + virtual void MBWRITE(GenRegister header, uint32_t bti, uint32_t > elemSize); > }; > } > #endif /* __GBE_GEN7_ENCODER_HPP__ */ > diff --git a/backend/src/backend/gen7_instruction.hpp > b/backend/src/backend/gen7_instruction.hpp > index 258dd24..7d7eada 100644 > --- a/backend/src/backend/gen7_instruction.hpp > +++ b/backend/src/backend/gen7_instruction.hpp > @@ -531,6 +531,22 @@ union Gen7NativeInstruction > uint32_t uip:16; > } gen7_branch; > > + /*! Data port Media block read / write */ > + struct { > + uint32_t bti:8; > + uint32_t ver_line_stride_offset:1; > + uint32_t ver_line_stride:1; > + uint32_t ver_line_stride_override:1; > + uint32_t ignored:3; > + uint32_t msg_type:4; > + uint32_t category:1; > + uint32_t header_present:1; > + uint32_t response_length:5; > + uint32_t msg_length:4; > + uint32_t pad2:2; > + uint32_t end_of_thread:1; > + } gen7_mblock_rw; > + > int d; > uint32_t ud; > float f; > diff --git a/backend/src/backend/gen8_instruction.hpp > b/backend/src/backend/gen8_instruction.hpp > index ada9ffc..549948a 100644 > --- a/backend/src/backend/gen8_instruction.hpp > +++ b/backend/src/backend/gen8_instruction.hpp > @@ -608,6 +608,22 @@ union Gen8NativeInstruction > uint32_t jip:32; > } gen8_branch; > > + /*! Data port Media block read / write */ > + struct { > + uint32_t bti:8; > + uint32_t ver_line_stride_offset:1; > + uint32_t ver_line_stride:1; > + uint32_t ver_line_stride_override:1; > + uint32_t ignored:3; > + uint32_t msg_type:4; > + uint32_t category:1; > + uint32_t header_present:1; > + uint32_t response_length:5; > + uint32_t msg_length:4; > + uint32_t pad2:2; > + uint32_t end_of_thread:1; > + } gen7_mblock_rw; > + > int d; > uint32_t ud; > float f; > diff --git a/backend/src/backend/gen_context.cpp > b/backend/src/backend/gen_context.cpp > index 90b8b45..98a94ba 100644 > --- a/backend/src/backend/gen_context.cpp > +++ b/backend/src/backend/gen_context.cpp > @@ -3538,6 +3538,161 @@ namespace gbe > p->OBWRITE(header, insn.getbti(), insn.extra.elem); > } > > + void GenContext::emitMBReadInstruction(const SelectionInstruction > &insn) { > + const GenRegister dst = ra->genReg(insn.dst(0)); > + const GenRegister coordx = GenRegister::toUniform(ra- > >genReg(insn.src(0)),GEN_TYPE_D); > + const GenRegister coordy = GenRegister::toUniform(ra- > >genReg(insn.src(1)),GEN_TYPE_D); > + GenRegister header, offsetx, offsety, blocksizereg; > + if (simdWidth == 8) > + header = GenRegister::retype(ra->genReg(insn.dst(0)), GEN_TYPE_UD); > + else > + header = GenRegister::retype(GenRegister::Qn(ra- > >genReg(insn.src(2)),1), GEN_TYPE_UD); > + > + offsetx = GenRegister::offset(header, 0, 0*4); > + offsety = GenRegister::offset(header, 0, 1*4); > + blocksizereg = GenRegister::offset(header, 0, 2*4); > + size_t vec_size = insn.extra.elem; > + uint32_t blocksize = 0x1F | (vec_size-1) << 16; > + > + if (simdWidth == 8) > + { > + p->push(); > + // Copy r0 into the header first > + p->curr.execWidth = 8; > + p->curr.predicate = GEN_PREDICATE_NONE; > + p->curr.noMask = 1; > + p->MOV(header, GenRegister::ud8grf(0,0)); > + > + // Update the header with the coord > + p->curr.execWidth = 1; > + p->MOV(offsetx, coordx); > + p->MOV(offsety, coordy); > + // Update block width and height > + p->MOV(blocksizereg, GenRegister::immud(blocksize)); > + // Now read the data > + p->curr.execWidth = 8; > + p->MBREAD(dst, header, insn.getbti(), vec_size); > + p->pop(); > + > + } > + else > + { > + const GenRegister tmp = ra->genReg(insn.dst(vec_size)); > + p->push(); > + // Copy r0 into the header first > + p->curr.execWidth = 8; > + p->curr.predicate = GEN_PREDICATE_NONE; > + p->curr.noMask = 1; > + p->MOV(header, GenRegister::ud8grf(0,0)); > + > + // First half > + // Update the header with the coord > + p->curr.execWidth = 1; > + p->MOV(offsetx, coordx); > + p->MOV(offsety, coordy); > + // Update block width and height > + p->MOV(blocksizereg, GenRegister::immud(blocksize)); > + // Now read the data > + p->curr.execWidth = 8; > + p->MBREAD(tmp, header, insn.getbti(), vec_size); > + > + // Second half > + // Update the header with the coord > + p->curr.execWidth = 1; > + p->ADD(offsetx, offsetx, GenRegister::immud(32)); > + > + const GenRegister tmp2 = GenRegister::offset(tmp, vec_size); > + // Now read the data > + p->curr.execWidth = 8; > + p->MBREAD(tmp2, header, insn.getbti(), vec_size); > + > + // Move the reg to fit vector rule. > + for (int i = 0; i < vec_size; i++) { > + p->MOV(GenRegister::offset(dst, i * 2), GenRegister::offset(tmp, > i)); > + p->MOV(GenRegister::offset(dst, i * 2 + 1), > + GenRegister::offset(tmp2, i)); > + } > + p->pop(); > + } > + } > + > + void GenContext::emitMBWriteInstruction(const SelectionInstruction > &insn) { > + const GenRegister coordx = GenRegister::toUniform(ra- > >genReg(insn.src(0)), GEN_TYPE_D); > + const GenRegister coordy = GenRegister::toUniform(ra- > >genReg(insn.src(1)), GEN_TYPE_D); > + GenRegister header, offsetx, offsety, blocksizereg; > + size_t vec_size = insn.extra.elem; > + uint32_t blocksize = 0x1F | (vec_size-1) << 16; > + if (simdWidth == 8) > + header = GenRegister::retype(ra->genReg(insn.dst(0)), GEN_TYPE_UD); > + else > + header = GenRegister::retype(GenRegister::Qn(ra- > >genReg(insn.dst(0)),1), GEN_TYPE_UD); > + > + offsetx = GenRegister::offset(header, 0, 0*4); > + offsety = GenRegister::offset(header, 0, 1*4); > + blocksizereg = GenRegister::offset(header, 0, 2*4); > + > + if (simdWidth == 8) > + { > + p->push(); > + // Copy r0 into the header first > + p->curr.execWidth = 8; > + p->curr.predicate = GEN_PREDICATE_NONE; > + p->curr.noMask = 1; > + p->MOV(header, GenRegister::ud8grf(0,0)); > + > + // Update the header with the coord > + p->curr.execWidth = 1; > + p->MOV(offsetx, coordx); > + p->MOV(offsety, coordy); > + // Update block width and height > + p->MOV(blocksizereg, GenRegister::immud(blocksize)); > + p->curr.execWidth = 8; > + // Mov what we need into msgs > + for(uint32_t i = 0; i < vec_size; i++) > + p->MOV(ra->genReg(insn.dst(1 + i)), ra->genReg(insn.src(2 + i))); > + // Now read the data > + p->MBWRITE(header, insn.getbti(), vec_size); > + p->pop(); > + > + } > + else > + { > + p->push(); > + // Copy r0 into the header first > + p->curr.execWidth = 8; > + p->curr.predicate = GEN_PREDICATE_NONE; > + p->curr.noMask = 1; > + p->MOV(header, GenRegister::ud8grf(0,0)); > + > + // First half > + // Update the header with the coord > + p->curr.execWidth = 1; > + p->MOV(offsetx, coordx); > + p->MOV(offsety, coordy); > + // Update block width and height > + p->MOV(blocksizereg, GenRegister::immud(blocksize)); > + // Now read the data > + p->curr.execWidth = 8; > + // Mov what we need into msgs > + for(uint32_t i = 0; i < vec_size; i++) > + p->MOV(GenRegister::offset(header, 1 + i), ra->genReg(insn.src(2 + > i))); > + p->MBWRITE(header, insn.getbti(), vec_size); > + > + // Second half > + // Update the header with the coord > + p->curr.execWidth = 1; > + p->ADD(offsetx, offsetx, GenRegister::immud(32)); > + > + p->curr.execWidth = 8; > + // Mov what we need into msgs > + for(uint32_t i = 0; i < vec_size; i++) > + p->MOV(GenRegister::offset(header, 1 + i), GenRegister::Qn(ra- > >genReg(insn.src(2 + i)), 1)); > + // Now write the data > + p->MBWRITE(header, insn.getbti(), vec_size); > + > + p->pop(); > + } > + } > > BVAR(OCL_OUTPUT_REG_ALLOC, false); > BVAR(OCL_OUTPUT_ASM, false); > diff --git a/backend/src/backend/gen_context.hpp > b/backend/src/backend/gen_context.hpp > index a634338..fb3d4fe 100644 > --- a/backend/src/backend/gen_context.hpp > +++ b/backend/src/backend/gen_context.hpp > @@ -189,6 +189,8 @@ namespace gbe > void afterMessage(const SelectionInstruction &insn, GenRegister bti, > GenRegister flagTemp, GenRegister btiTmp, unsigned jip0); > void emitOBReadInstruction(const SelectionInstruction &insn); > void emitOBWriteInstruction(const SelectionInstruction &insn); > + void emitMBReadInstruction(const SelectionInstruction &insn); > + void emitMBWriteInstruction(const SelectionInstruction &insn); > > /*! Implements base class */ > virtual Kernel *allocateKernel(void); > diff --git a/backend/src/backend/gen_defs.hpp > b/backend/src/backend/gen_defs.hpp > index 09cb2ba..66ae5b5 100644 > --- a/backend/src/backend/gen_defs.hpp > +++ b/backend/src/backend/gen_defs.hpp > @@ -784,6 +784,22 @@ union GenNativeInstruction > uint32_t jip:32; > } gen8_branch; > > + /*! Data port Media block read / write */ > + struct { > + uint32_t bti:8; > + uint32_t ver_line_stride_offset:1; > + uint32_t ver_line_stride:1; > + uint32_t ver_line_stride_override:1; > + uint32_t ignored:3; > + uint32_t msg_type:4; > + uint32_t category:1; > + uint32_t header_present:1; > + uint32_t response_length:5; > + uint32_t msg_length:4; > + uint32_t pad2:2; > + uint32_t end_of_thread:1; > + } gen7_mblock_rw; > + > int d; > uint32_t ud; > float f; > diff --git a/backend/src/backend/gen_encoder.cpp > b/backend/src/backend/gen_encoder.cpp > index e745b9c..eb9fbeb 100644 > --- a/backend/src/backend/gen_encoder.cpp > +++ b/backend/src/backend/gen_encoder.cpp > @@ -276,6 +276,21 @@ namespace gbe > insn->bits3.gen7_oblock_rw.header_present = 1; > } > > + static void setMBlockRW(GenEncoder *p, > + GenNativeInstruction *insn, > + uint32_t bti, > + uint32_t msg_type, > + uint32_t msg_length, > + uint32_t response_length) > + { > + const GenMessageTarget sfid = GEN_SFID_DATAPORT1_DATA; > + p->setMessageDescriptor(insn, sfid, msg_length, response_length); > + insn->bits3.gen7_mblock_rw.msg_type = msg_type; > + insn->bits3.gen7_mblock_rw.bti = bti; > + insn->bits3.gen7_mblock_rw.header_present = 1; > + } > + > + > static void setDWordScatterMessgae(GenEncoder *p, > GenNativeInstruction *insn, > uint32_t bti, > @@ -1277,6 +1292,38 @@ namespace gbe > response_length); > } > > + void GenEncoder::MBREAD(GenRegister dst, GenRegister header, > uint32_t bti, uint32_t size) { > + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); > + const uint32_t msg_length = 1; > + const uint32_t response_length = size; // Size of registers > + this->setHeader(insn); > + this->setDst(insn, GenRegister::ud8grf(dst.nr, 0)); > + this->setSrc0(insn, GenRegister::ud8grf(header.nr, 0)); > + this->setSrc1(insn, GenRegister::immud(0)); > + setMBlockRW(this, > + insn, > + bti, > + GEN75_P1_MEDIA_BREAD, > + msg_length, > + response_length); > + } > + > + void GenEncoder::MBWRITE(GenRegister header, uint32_t bti, uint32_t > size) { > + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); > + const uint32_t msg_length = 1 + size; > + const uint32_t response_length = 0; // Size of registers > + this->setHeader(insn); > + this->setDst(insn, GenRegister::retype(GenRegister::null(), > GEN_TYPE_UW)); > + this->setSrc0(insn, GenRegister::ud8grf(header.nr, 0)); > + this->setSrc1(insn, GenRegister::immud(0)); > + setMBlockRW(this, > + insn, > + bti, > + GEN75_P1_MEDIA_TYPED_BWRITE, > + msg_length, > + response_length); > + } > + > void GenEncoder::EOT(uint32_t msg) { > GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); > this->setDst(insn, GenRegister::retype(GenRegister::null(), > GEN_TYPE_UD)); > diff --git a/backend/src/backend/gen_encoder.hpp > b/backend/src/backend/gen_encoder.hpp > index a53c879..4979305 100644 > --- a/backend/src/backend/gen_encoder.hpp > +++ b/backend/src/backend/gen_encoder.hpp > @@ -271,6 +271,10 @@ namespace gbe > void OBREAD(GenRegister dst, GenRegister header, uint32_t bti, uint32_t > elemSize); > /*! OBlock write */ > void OBWRITE(GenRegister header, uint32_t bti, uint32_t elemSize); > + /*! MBlock read */ > + virtual void MBREAD(GenRegister dst, GenRegister header, uint32_t bti, > uint32_t elemSize); > + /*! MBlock write */ > + virtual void MBWRITE(GenRegister header, uint32_t bti, uint32_t > elemSize); > > GBE_CLASS(GenEncoder); //!< Use custom allocators > virtual void alu3(uint32_t opcode, GenRegister dst, > diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx > b/backend/src/backend/gen_insn_gen7_schedule_info.hxx > index d297726..c396626 100644 > --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx > +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx > @@ -52,3 +52,5 @@ DECL_GEN7_SCHEDULE(SubGroupOp, 80, 1, 1) > DECL_GEN7_SCHEDULE(Printf, 80, 1, 1) > DECL_GEN7_SCHEDULE(OBRead, 80, 1, 1) > DECL_GEN7_SCHEDULE(OBWrite, 80, 1, 1) > +DECL_GEN7_SCHEDULE(MBRead, 80, 1, 1) > +DECL_GEN7_SCHEDULE(MBWrite, 80, 1, 1) > diff --git a/backend/src/backend/gen_insn_selection.cpp > b/backend/src/backend/gen_insn_selection.cpp > index e974e97..d3c5a40c 100644 > --- a/backend/src/backend/gen_insn_selection.cpp > +++ b/backend/src/backend/gen_insn_selection.cpp > @@ -189,7 +189,8 @@ namespace gbe > this->opcode == SEL_OP_SAMPLE || > this->opcode == SEL_OP_VME || > this->opcode == SEL_OP_DWORD_GATHER || > - this->opcode == SEL_OP_OBREAD; > + this->opcode == SEL_OP_OBREAD || > + this->opcode == SEL_OP_MBREAD; > } > > bool SelectionInstruction::modAcc(void) const { > @@ -212,7 +213,8 @@ namespace gbe > this->opcode == SEL_OP_ATOMIC || > this->opcode == SEL_OP_BYTE_SCATTER || > this->opcode == SEL_OP_TYPED_WRITE || > - this->opcode == SEL_OP_OBWRITE; > + this->opcode == SEL_OP_OBWRITE || > + this->opcode == SEL_OP_MBWRITE; > } > > bool SelectionInstruction::isBranch(void) const { > @@ -703,6 +705,10 @@ namespace gbe > void OBREAD(GenRegister dst, GenRegister addr, GenRegister header, > uint32_t bti, uint32_t size); > /*! Oblock write */ > void OBWRITE(GenRegister addr, GenRegister value, GenRegister header, > uint32_t bti, uint32_t size); > + /*! Media block read */ > + void MBREAD(GenRegister* dsts, GenRegister coordx, GenRegister > coordy, GenRegister header, GenRegister* tmp, uint32_t bti, uint32_t > vec_size); > + /*! Media block write */ > + void MBWRITE(GenRegister coordx, GenRegister coordy, GenRegister* > values, GenRegister header, GenRegister* tmp, uint32_t bti, uint32_t > vec_size); > > /* common functions for both binary instruction and sel_cmp and > compare instruction. > It will handle the IMM or normal register assignment, and will try to > avoid > LOADI > @@ -2055,6 +2061,63 @@ namespace gbe > vector->isSrc = 1; > } > > + void Selection::Opaque::MBREAD(GenRegister* dsts, > + GenRegister coordx, > + GenRegister coordy, > + GenRegister header, > + GenRegister* tmp, > + uint32_t bti, > + uint32_t vec_size) { > + SelectionInstruction *insn = this->appendInsn(SEL_OP_MBREAD, > vec_size * 2, 3); > + SelectionVector *vector = this->appendVector(); > + SelectionVector *vectortmp = this->appendVector(); > + for (uint32_t i = 0; i < vec_size; ++i) { > + insn->dst(i) = dsts[i]; > + insn->dst(i + vec_size) = tmp[i]; > + } > + insn->src(0) = coordx; > + insn->src(1) = coordy; > + insn->src(2) = header; > + insn->setbti(bti); > + insn->extra.elem = vec_size; // vector size > + > + vector->regNum = vec_size; > + vector->reg = &insn->dst(0); > + vector->offsetID = 0; > + vector->isSrc = 0; > + vectortmp->regNum = vec_size; > + vectortmp->reg = &insn->dst(vec_size); > + vectortmp->offsetID = 0; > + vectortmp->isSrc = 0; > + > + } > + > + void Selection::Opaque::MBWRITE(GenRegister coordx, > + GenRegister coordy, > + GenRegister* values, > + GenRegister header, > + GenRegister* tmp, > + uint32_t bti, > + uint32_t vec_size) { > + SelectionInstruction *insn = this->appendInsn(SEL_OP_MBWRITE, 1 + > vec_size, 2 + vec_size); > + SelectionVector *vector = this->appendVector(); > + insn->src(0) = coordx; > + insn->src(1) = coordy; > + for (uint32_t i = 0; i < vec_size; ++i) > + insn->src(2 + i) = values[i]; > + insn->dst(0) = header; > + for (uint32_t i = 0; i < vec_size; ++i) > + insn->dst(1 + i) = tmp[i]; > + insn->state = this->curr; > + insn->setbti(bti); > + insn->extra.elem = vec_size; // vector size > + > + // We need to put the header and the data together > + vector->regNum = 1 + vec_size; > + vector->reg = &insn->dst(0); > + vector->offsetID = 0; > + vector->isSrc = 0; > + } > > // Boiler plate to initialize the selection library at c++ pre-main > static SelectionLibrary *selLib = NULL; > @@ -6583,6 +6646,52 @@ extern bool OCL_DEBUGINFO; // first defined by > calling BVAR in program.cpp > } > }; > > + /*! Media Block Read pattern */ > + DECL_PATTERN(MediaBlockReadInstruction) > + { > + bool emitOne(Selection::Opaque &sel, const > ir::MediaBlockReadInstruction &insn, bool &markChildren) const > + { > + using namespace ir; > + uint32_t vec_size = insn.getVectorSize(); > + vector<GenRegister> valuesVec; > + vector<GenRegister> tmpVec; > + for (uint32_t i = 0; i < vec_size; ++i) { > + valuesVec.push_back(sel.selReg(insn.getSrc(i), TYPE_U32)); > + tmpVec.push_back(sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32)); > + } > + const GenRegister coordx = sel.selReg(insn.getSrc(0), TYPE_U32); > + const GenRegister coordy = sel.selReg(insn.getSrc(1), TYPE_U32); > + const GenRegister header = sel.selReg(sel.reg(FAMILY_DWORD), > TYPE_U32); > + sel.MBREAD(values, coordx, coordy, header, tmp, insn.getImageIndex(), > insn.getVectorSize()); > + return true; > + } > + DECL_CTOR(MediaBlockReadInstruction, 1, 1); > + }; > + > + /*! Media Block Write pattern */ > + DECL_PATTERN(MediaBlockWriteInstruction) > + { > + bool emitOne(Selection::Opaque &sel, const > ir::MediaBlockWriteInstruction &insn, bool &markChildren) const > + { > + using namespace ir; > + uint32_t vec_size = insn.getVectorSize(); > + const GenRegister coordx = sel.selReg(insn.getSrc(0), TYPE_U32); > + const GenRegister coordy = sel.selReg(insn.getSrc(1), TYPE_U32); > + vector<GenRegister> valuesVec; > + vector<GenRegister> tmpVec; > + for(uint32_t i = 0; i < vec_size; i++) > + { > + valuesVec.push_back(sel.selReg(insn.getSrc(2 + i), TYPE_U32)); > + tmpVec.push_back(sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32)); > + } > + const GenRegister header = sel.selReg(sel.reg(FAMILY_DWORD), > TYPE_U32); > + sel.MBWRITE(coordx, coordy, &valuesVec[0], header, &tmpVec[0], > insn.getImageIndex(), vec_size); > + return true; > + } > + DECL_CTOR(MediaBlockWriteInstruction, 1, 1); > + }; > + > + > /*! Sort patterns */ > INLINE bool cmp(const SelectionPattern *p0, const SelectionPattern *p1) { > if (p0->insnNum != p1->insnNum) > @@ -6624,6 +6733,8 @@ extern bool OCL_DEBUGINFO; // first defined by > calling BVAR in program.cpp > this->insert<NullaryInstructionPattern>(); > this->insert<WaitInstructionPattern>(); > this->insert<PrintfInstructionPattern>(); > + this->insert<MediaBlockReadInstructionPattern>(); > + this->insert<MediaBlockWriteInstructionPattern>(); > > // Sort all the patterns with the number of instructions they output > for (uint32_t op = 0; op < ir::OP_INVALID; ++op) > diff --git a/backend/src/backend/gen_insn_selection.hpp > b/backend/src/backend/gen_insn_selection.hpp > index 51af686..b481de8 100644 > --- a/backend/src/backend/gen_insn_selection.hpp > +++ b/backend/src/backend/gen_insn_selection.hpp > @@ -177,6 +177,8 @@ namespace gbe > switch (opcode) { > case SEL_OP_OBREAD: > case SEL_OP_OBWRITE: > + case SEL_OP_MBREAD: > + case SEL_OP_MBWRITE: > case SEL_OP_DWORD_GATHER: return extra.function; > case SEL_OP_SAMPLE: return extra.rdbti; > case SEL_OP_VME: return extra.vme_bti; > @@ -192,6 +194,8 @@ namespace gbe > switch (opcode) { > case SEL_OP_OBREAD: > case SEL_OP_OBWRITE: > + case SEL_OP_MBREAD: > + case SEL_OP_MBWRITE: > case SEL_OP_DWORD_GATHER: extra.function = bti; return; > case SEL_OP_SAMPLE: extra.rdbti = bti; return; > case SEL_OP_VME: extra.vme_bti = bti; return; > diff --git a/backend/src/backend/gen_insn_selection.hxx > b/backend/src/backend/gen_insn_selection.hxx > index 4a7caff..ccaf526 100644 > --- a/backend/src/backend/gen_insn_selection.hxx > +++ b/backend/src/backend/gen_insn_selection.hxx > @@ -98,3 +98,5 @@ DECL_SELECTION_IR(SUBGROUP_OP, > SubGroupOpInstruction) > DECL_SELECTION_IR(PRINTF, PrintfInstruction) > DECL_SELECTION_IR(OBREAD, OBReadInstruction) > DECL_SELECTION_IR(OBWRITE, OBWriteInstruction) > +DECL_SELECTION_IR(MBREAD, MBReadInstruction) > +DECL_SELECTION_IR(MBWRITE, MBWriteInstruction) > diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp > index 88491a7..ed64580 100644 > --- a/backend/src/ir/instruction.cpp > +++ b/backend/src/ir/instruction.cpp > @@ -1064,6 +1064,78 @@ namespace ir { > Register dst[1]; > }; > > + class ALIGNED_INSTRUCTION MediaBlockReadInstruction : > + public BasePolicy, > + public TupleSrcPolicy<MediaBlockReadInstruction>, > + public TupleDstPolicy<MediaBlockReadInstruction> > + { > + public: > + INLINE MediaBlockReadInstruction(uint8_t imageIdx, Tuple dst, uint8_t > vec_size, Tuple srcTuple, uint8_t srcNum) { > + this->opcode = OP_MBREAD; > + this->dst = dst; > + this->dstNum = vec_size; > + this->src = srcTuple; > + this->srcNum = srcNum; > + this->imageIdx = imageIdx; > + } > + INLINE bool wellFormed(const Function &fn, std::string &why) const; > + INLINE void out(std::ostream &out, const Function &fn) const { > + this->outOpcode(out); > + out << (int)this->getVectorSize(); > + out << " {"; > + for (uint32_t i = 0; i < dstNum; ++i) > + out << "%" << this->getDst(fn, i) << (i != (dstNum-1u) ? " " : ""); > + out << "}"; > + out << " 2D surface id " << (int)this->getImageIndex() > + << " byte coord x %" << this->getSrc(fn, 0) > + << " row coord y %" << this->getSrc(fn, 1); > + } > + INLINE uint8_t getImageIndex(void) const { return this->imageIdx; } > + INLINE uint8_t getVectorSize(void) const { return this->dstNum; } > + > + Tuple src; > + Tuple dst; > + uint8_t imageIdx; > + uint8_t srcNum; > + uint8_t dstNum; > + }; > + > + class ALIGNED_INSTRUCTION MediaBlockWriteInstruction : > + public BasePolicy, > + public TupleSrcPolicy<MediaBlockWriteInstruction>, > + public NDstPolicy<MediaBlockWriteInstruction, 0> > + { > + public: > + > + INLINE MediaBlockWriteInstruction(uint8_t imageIdx, Tuple srcTuple, > uint8_t srcNum, uint8_t vec_size) { > + this->opcode = OP_MBWRITE; > + this->src = srcTuple; > + this->srcNum = srcNum; > + this->imageIdx = imageIdx; > + this->vec_size = vec_size; > + } > + INLINE bool wellFormed(const Function &fn, std::string &why) const; > + INLINE void out(std::ostream &out, const Function &fn) const { > + this->outOpcode(out); > + out << (int)this->getVectorSize() > + << " 2D surface id " << (int)this->getImageIndex() > + << " byte coord x %" << this->getSrc(fn, 0) > + << " row coord y %" << this->getSrc(fn, 1); > + out << " {"; > + for (uint32_t i = 0; i < vec_size; ++i) > + out << "%" << this->getSrc(fn, i + 2) << (i != (vec_size-1u) ? " " > : ""); > + out << "}"; > + } > + INLINE uint8_t getImageIndex(void) const { return this->imageIdx; } > + INLINE uint8_t getVectorSize(void) const { return this->vec_size; } > + > + Tuple src; > + Register dst[0]; > + uint8_t imageIdx; > + uint8_t srcNum; > + uint8_t vec_size; > + }; > + > #undef ALIGNED_INSTRUCTION > > ///////////////////////////////////////////////////////////////////////// > @@ -1591,6 +1663,22 @@ namespace ir { > return true; > } > > + INLINE bool MediaBlockReadInstruction::wellFormed(const Function &fn, > std::string &whyNot) const { > + if (this->srcNum != 2) { > + whyNot = "Wrong number of source."; > + return false; > + } > + return true; > + } > + > + INLINE bool MediaBlockWriteInstruction::wellFormed(const Function &fn, > std::string &whyNot) const { > + if (this->srcNum != 2 + this->vec_size) { > + whyNot = "Wrong number of source."; > + return false; > + } > + return true; > + } > + > #undef CHECK_TYPE > > ///////////////////////////////////////////////////////////////////////// > @@ -2058,6 +2146,14 @@ START_INTROSPECTION(PrintfInstruction) > #include "ir/instruction.hxx" > END_INTROSPECTION(PrintfInstruction) > > +START_INTROSPECTION(MediaBlockReadInstruction) > +#include "ir/instruction.hxx" > +END_INTROSPECTION(MediaBlockReadInstruction) > + > +START_INTROSPECTION(MediaBlockWriteInstruction) > +#include "ir/instruction.hxx" > +END_INTROSPECTION(MediaBlockWriteInstruction) > + > #undef END_INTROSPECTION > #undef START_INTROSPECTION > #undef DECL_INSN > @@ -2205,7 +2301,8 @@ END_FUNCTION(Instruction, Register) > opcode == OP_CALC_TIMESTAMP || > opcode == OP_STORE_PROFILING || > opcode == OP_WAIT || > - opcode == OP_PRINTF; > + opcode == OP_PRINTF || > + opcode == OP_MBWRITE; > } > > #define DECL_MEM_FN(CLASS, RET, PROTOTYPE, CALL) \ > @@ -2275,6 +2372,10 @@ DECL_MEM_FN(SubGroupInstruction, > WorkGroupOps, getWorkGroupOpcode(void), getWork > DECL_MEM_FN(PrintfInstruction, uint32_t, getNum(void), getNum()) > DECL_MEM_FN(PrintfInstruction, uint32_t, getBti(void), getBti()) > DECL_MEM_FN(PrintfInstruction, Type, getType(const Function& fn, > uint32_t ID), getType(fn, ID)) > +DECL_MEM_FN(MediaBlockReadInstruction, uint8_t, getImageIndex(void), > getImageIndex()) > +DECL_MEM_FN(MediaBlockReadInstruction, uint8_t, getVectorSize(void), > getVectorSize()) > +DECL_MEM_FN(MediaBlockWriteInstruction, uint8_t, getImageIndex(void), > getImageIndex()) > +DECL_MEM_FN(MediaBlockWriteInstruction, uint8_t, getVectorSize(void), > getVectorSize()) > > #undef DECL_MEM_FN > > @@ -2582,6 +2683,15 @@ DECL_MEM_FN(MemInstruction, void, > setBtiReg(Register reg), setBtiReg(reg)) > return internal::PrintfInstruction(dst, srcTuple, typeTuple, srcNum, bti, > num).convert(); > } > > + Instruction MBREAD(uint8_t imageIndex, Tuple dst, uint8_t vec_size, > Tuple coord, uint8_t srcNum) { > + return internal::MediaBlockReadInstruction(imageIndex, dst, vec_size, > coord, srcNum).convert(); > + } > + > + Instruction MBWRITE(uint8_t imageIndex, Tuple srcTuple, uint8_t srcNum, > uint8_t vec_size) { > + return internal::MediaBlockWriteInstruction(imageIndex, srcTuple, > srcNum, vec_size).convert(); > + } > + > + > std::ostream &operator<< (std::ostream &out, const Instruction &insn) { > const Function &fn = insn.getFunction(); > const BasicBlock *bb = insn.getParent(); > diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp > index 4e7d5b7..b2b0b49 100644 > --- a/backend/src/ir/instruction.hpp > +++ b/backend/src/ir/instruction.hpp > @@ -635,6 +635,24 @@ namespace ir { > static bool isClassOf(const Instruction &insn); > }; > > + /*! Media Block Read. */ > + class MediaBlockReadInstruction : public Instruction { > + public: > + /*! Return true if the given instruction is an instance of this class */ > + static bool isClassOf(const Instruction &insn); > + uint8_t getImageIndex() const; > + uint8_t getVectorSize() const; > + }; > + > + /*! Media Block Write. */ > + class MediaBlockWriteInstruction : public Instruction { > + public: > + /*! Return true if the given instruction is an instance of this class */ > + static bool isClassOf(const Instruction &insn); > + uint8_t getImageIndex() const; > + uint8_t getVectorSize() const; > + }; > + > /*! Specialize the instruction. Also performs typechecking first based on > the > * opcode. Crashes if it fails > */ > @@ -867,6 +885,10 @@ namespace ir { > Instruction SUBGROUP(WorkGroupOps opcode, Register dst, Tuple > srcTuple, uint8_t srcNum, Type type); > /*! printf */ > Instruction PRINTF(Register dst, Tuple srcTuple, Tuple typeTuple, uint8_t > srcNum, uint8_t bti, uint16_t num); > + /*! media block read */ > + Instruction MBREAD(uint8_t imageIndex, Tuple dst, uint8_t vec_size, > Tuple coord, uint8_t srcNum); > + /*! media block write */ > + Instruction MBWRITE(uint8_t imageIndex, Tuple srcTuple, uint8_t srcNum, > uint8_t vec_size); > } /* namespace ir */ > } /* namespace gbe */ > > diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx > index 57e13eb..7d755ae 100644 > --- a/backend/src/ir/instruction.hxx > +++ b/backend/src/ir/instruction.hxx > @@ -114,3 +114,5 @@ DECL_INSN(WAIT, WaitInstruction) > DECL_INSN(WORKGROUP, WorkGroupInstruction) > DECL_INSN(SUBGROUP, SubGroupInstruction) > DECL_INSN(PRINTF, PrintfInstruction) > +DECL_INSN(MBREAD, MediaBlockReadInstruction) > +DECL_INSN(MBWRITE, MediaBlockWriteInstruction) > diff --git a/backend/src/ir/liveness.cpp b/backend/src/ir/liveness.cpp > index 3162d13..43d4c87 100644 > --- a/backend/src/ir/liveness.cpp > +++ b/backend/src/ir/liveness.cpp > @@ -118,7 +118,8 @@ namespace ir { > uniform = false; > > // do not change dst uniform for block read > - if (insn.getOpcode() == ir::OP_LOAD && > ir::cast<ir::LoadInstruction>(insn).isBlock()) > + if ((insn.getOpcode() == ir::OP_LOAD && > ir::cast<ir::LoadInstruction>(insn).isBlock()) || > + insn.getOpcode() == ir::OP_MBREAD) > uniform = false; > > for (uint32_t srcID = 0; srcID < srcNum; ++srcID) { > diff --git a/backend/src/libocl/src/ocl_substore.ll > b/backend/src/libocl/src/ocl_substore.ll > index 665cdfa..f6c2c70 100644 > --- a/backend/src/libocl/src/ocl_substore.ll > +++ b/backend/src/libocl/src/ocl_substore.ll > @@ -1,9 +1,42 @@ > target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128- > v192:256-v256:256-v512:512-v1024:1024" > target triple = "spir" > +%opencl.image2d_t = type opaque > > declare void @__gen_ocl_sub_group_block_write_mem(i32 addrspace(1)* > nocapture, i32) nounwind alwaysinline noduplicate > +declare void > @__gen_ocl_sub_group_block_write_image(%opencl.image2d_t > addrspace(1)*, i32, i32, i32) nounwind alwaysinline noduplicate > +declare void > @__gen_ocl_sub_group_block_write_image2(%opencl.image2d_t > addrspace(1)*, i32, i32, <2 x i32>) nounwind alwaysinline noduplicate > +declare void > @__gen_ocl_sub_group_block_write_image4(%opencl.image2d_t > addrspace(1)*, i32, i32, <4 x i32>) nounwind alwaysinline noduplicate > +declare void > @__gen_ocl_sub_group_block_write_image8(%opencl.image2d_t > addrspace(1)*, i32, i32, <8 x i32>) nounwind alwaysinline noduplicate > > define void @_Z27intel_sub_group_block_writePKU3AS1jj(i32 > addrspace(1)* %p, i32 %data) nounwind alwaysinline noduplicate { > call void @__gen_ocl_sub_group_block_write_mem(i32 addrspace(1)* %p, > i32 %data) > ret void > } > + > +define void > @_Z27intel_sub_group_block_write11ocl_image2dDv2_ij(%opencl.image2d > _t addrspace(1)* %image, <2 x i32> %byte_coord, i32 %data) nounwind > alwaysinline noduplicate { > + %1 = extractelement <2 x i32> %byte_coord, i32 0 > + %2 = extractelement <2 x i32> %byte_coord, i32 1 > + call void @__gen_ocl_sub_group_block_write_image(%opencl.image2d_t > addrspace(1)* %image, i32 %1, i32 %2, i32 %data) > + ret void > +} > + > +define void > @_Z28intel_sub_group_block_write211ocl_image2dDv2_iDv2_j(%opencl.im > age2d_t addrspace(1)* %image, <2 x i32> %byte_coord, <2 x i32> %data) > nounwind alwaysinline noduplicate { > + %1 = extractelement <2 x i32> %byte_coord, i32 0 > + %2 = extractelement <2 x i32> %byte_coord, i32 1 > + call void > @__gen_ocl_sub_group_block_write_image2(%opencl.image2d_t > addrspace(1)* %image, i32 %1, i32 %2, <2 x i32> %data) > + ret void > +} > + > +define void > @_Z28intel_sub_group_block_write411ocl_image2dDv2_iDv4_j(%opencl.im > age2d_t addrspace(1)* %image, <2 x i32> %byte_coord, <4 x i32> %data) > nounwind alwaysinline noduplicate { > + %1 = extractelement <2 x i32> %byte_coord, i32 0 > + %2 = extractelement <2 x i32> %byte_coord, i32 1 > + call void > @__gen_ocl_sub_group_block_write_image4(%opencl.image2d_t > addrspace(1)* %image, i32 %1, i32 %2, <4 x i32> %data) > + ret void > +} > + > +define void > @_Z28intel_sub_group_block_write811ocl_image2dDv2_iDv8_j(%opencl.im > age2d_t addrspace(1)* %image, <2 x i32> %byte_coord, <8 x i32> %data) > nounwind alwaysinline noduplicate { > + %1 = extractelement <2 x i32> %byte_coord, i32 0 > + %2 = extractelement <2 x i32> %byte_coord, i32 1 > + call void > @__gen_ocl_sub_group_block_write_image8(%opencl.image2d_t > addrspace(1)* %image, i32 %1, i32 %2, <8 x i32> %data) > + ret void > +} > diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.cl > b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl > index 66490cc..753a045 100644 > --- a/backend/src/libocl/tmpl/ocl_simd.tmpl.cl > +++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.cl > @@ -187,3 +187,24 @@ OVERLOADABLE void > intel_sub_group_block_write8(const global uint* p,uint8 data) > intel_sub_group_block_write(p + get_simd_size() * 6, data.s6); > intel_sub_group_block_write(p + get_simd_size() * 7, data.s7); > } > + > +PURE CONST uint __gen_ocl_sub_group_block_read_image(image2d_t p, > int x, int y); > +PURE CONST uint2 __gen_ocl_sub_group_block_read_image2(image2d_t p, > int x, int y); > +PURE CONST uint4 __gen_ocl_sub_group_block_read_image4(image2d_t p, > int x, int y); > +PURE CONST uint8 __gen_ocl_sub_group_block_read_image8(image2d_t p, > int x, int y); > +OVERLOADABLE uint intel_sub_group_block_read(image2d_t p, int2 cord) > +{ > + return __gen_ocl_sub_group_block_read_image(p, cord.x, cord.y); > +} > +OVERLOADABLE uint2 intel_sub_group_block_read2(image2d_t p, int2 cord) > +{ > + return __gen_ocl_sub_group_block_read_image2(p, cord.x, cord.y); > +} > +OVERLOADABLE uint4 intel_sub_group_block_read4(image2d_t p, int2 cord) > +{ > + return __gen_ocl_sub_group_block_read_image4(p, cord.x, cord.y); > +} > +OVERLOADABLE uint8 intel_sub_group_block_read8(image2d_t p, int2 cord) > +{ > + return __gen_ocl_sub_group_block_read_image8(p, cord.x, cord.y); > +} > diff --git a/backend/src/libocl/tmpl/ocl_simd.tmpl.h > b/backend/src/libocl/tmpl/ocl_simd.tmpl.h > index d0676be..799f772 100644 > --- a/backend/src/libocl/tmpl/ocl_simd.tmpl.h > +++ b/backend/src/libocl/tmpl/ocl_simd.tmpl.h > @@ -143,3 +143,13 @@ OVERLOADABLE void > intel_sub_group_block_write(const __global uint* p, uint data) > OVERLOADABLE void intel_sub_group_block_write2(const __global uint* p, > uint2 data); > OVERLOADABLE void intel_sub_group_block_write4(const __global uint* p, > uint4 data); > OVERLOADABLE void intel_sub_group_block_write8(const __global uint* p, > uint8 data); > + > +OVERLOADABLE uint intel_sub_group_block_read(image2d_t image, int2 > byte_coord); > +OVERLOADABLE uint2 intel_sub_group_block_read2(image2d_t image, int2 > byte_coord); > +OVERLOADABLE uint4 intel_sub_group_block_read4(image2d_t image, int2 > byte_coord); > +OVERLOADABLE uint8 intel_sub_group_block_read8(image2d_t image, int2 > byte_coord); > + > +OVERLOADABLE void intel_sub_group_block_write(image2d_t image, int2 > byte_coord, uint data); > +OVERLOADABLE void intel_sub_group_block_write2(image2d_t image, int2 > byte_coord, uint2 data); > +OVERLOADABLE void intel_sub_group_block_write4(image2d_t image, int2 > byte_coord, uint4 data); > +OVERLOADABLE void intel_sub_group_block_write8(image2d_t image, int2 > byte_coord, uint8 data); > diff --git a/backend/src/llvm/llvm_gen_backend.cpp > b/backend/src/llvm/llvm_gen_backend.cpp > index ffa838c..2dcf308 100644 > --- a/backend/src/llvm/llvm_gen_backend.cpp > +++ b/backend/src/llvm/llvm_gen_backend.cpp > @@ -699,6 +699,7 @@ namespace gbe > void emitSubGroupInst(CallInst &I, CallSite &CS, ir::WorkGroupOps > opcode); > // Emit subgroup instructions > void emitBlockReadWriteMemInst(CallInst &I, CallSite &CS, bool isWrite); > + void emitBlockReadWriteImageInst(CallInst &I, CallSite &CS, bool isWrite, > uint8_t vec_size); > > uint8_t appendSampler(CallSite::arg_iterator AI); > uint8_t getImageID(CallInst &I); > @@ -3744,10 +3745,12 @@ namespace gbe > case GEN_OCL_SUB_GROUP_SCAN_INCLUSIVE_MAX: > case GEN_OCL_SUB_GROUP_SCAN_INCLUSIVE_MIN: > case GEN_OCL_LRP: > - this->newRegister(&I); > - break; > case GEN_OCL_SUB_GROUP_BLOCK_READ_MEM: > - this->newRegister(&I, NULL, false); > + case GEN_OCL_SUB_GROUP_BLOCK_READ_IMAGE: > + case GEN_OCL_SUB_GROUP_BLOCK_READ_IMAGE2: > + case GEN_OCL_SUB_GROUP_BLOCK_READ_IMAGE4: > + case GEN_OCL_SUB_GROUP_BLOCK_READ_IMAGE8: > + this->newRegister(&I); > break; > case GEN_OCL_PRINTF: > this->newRegister(&I); // fall through > @@ -3764,6 +3767,10 @@ namespace gbe > case GEN_OCL_STORE_PROFILING: > case GEN_OCL_DEBUGWAIT: > case GEN_OCL_SUB_GROUP_BLOCK_WRITE_MEM: > + case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE: > + case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE2: > + case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE4: > + case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE8: > break; > case GEN_OCL_NOT_FOUND: > default: > @@ -4013,6 +4020,39 @@ namespace gbe > GBE_ASSERT(AI == AE); > } > > + void GenWriter::emitBlockReadWriteImageInst(CallInst &I, CallSite &CS, > bool isWrite, uint8_t vec_size) { > + CallSite::arg_iterator AI = CS.arg_begin(); > + CallSite::arg_iterator AE = CS.arg_end(); > + GBE_ASSERT(AI != AE); > + > + const uint8_t imageID = getImageID(I); > + AI++; > + > + if(isWrite){ > + vector<ir::Register> srcTupleData; > + srcTupleData.push_back(getRegister(*(AI++))); > + srcTupleData.push_back(getRegister(*(AI++))); > + for(int i = 0;i < vec_size; i++) > + srcTupleData.push_back(getRegister(*(AI), i)); > + AI++; > + const ir::Tuple srctuple = ctx.arrayTuple(&srcTupleData[0], 2 + > vec_size); > + ctx.MBWRITE(imageID, srctuple, 2 + vec_size, vec_size); > + } else { > + ir::Register src[2]; > + src[0] = getRegister(*(AI++)); > + src[1] = getRegister(*(AI++)); > + vector<ir::Register> dstTupleData; > + for(int i = 0;i < vec_size; i++) > + dstTupleData.push_back(getRegister(&I, i)); > + const ir::Tuple srctuple = ctx.arrayTuple(src, 2); > + const ir::Tuple dsttuple = ctx.arrayTuple(&dstTupleData[0], vec_size); > + ctx.MBREAD(imageID, dsttuple, vec_size, srctuple, 2); > + } > + > + GBE_ASSERT(AI == AE); > + } > + > + > /* append a new sampler. should be called before any reference to > * a sampler_t value. */ > uint8_t GenWriter::appendSampler(CallSite::arg_iterator AI) { > @@ -4841,6 +4881,22 @@ namespace gbe > this->emitBlockReadWriteMemInst(I, CS, false); break; > case GEN_OCL_SUB_GROUP_BLOCK_WRITE_MEM: > this->emitBlockReadWriteMemInst(I, CS, true); break; > + case GEN_OCL_SUB_GROUP_BLOCK_READ_IMAGE: > + this->emitBlockReadWriteImageInst(I, CS, false, 1); break; > + case GEN_OCL_SUB_GROUP_BLOCK_READ_IMAGE2: > + this->emitBlockReadWriteImageInst(I, CS, false, 2); break; > + case GEN_OCL_SUB_GROUP_BLOCK_READ_IMAGE4: > + this->emitBlockReadWriteImageInst(I, CS, false, 4); break; > + case GEN_OCL_SUB_GROUP_BLOCK_READ_IMAGE8: > + this->emitBlockReadWriteImageInst(I, CS, false, 8); break; > + case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE: > + this->emitBlockReadWriteImageInst(I, CS, true, 1); break; > + case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE2: > + this->emitBlockReadWriteImageInst(I, CS, true, 2); break; > + case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE4: > + this->emitBlockReadWriteImageInst(I, CS, true, 4); break; > + case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE8: > + this->emitBlockReadWriteImageInst(I, CS, true, 8); break; > default: break; > } > } > diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx > b/backend/src/llvm/llvm_gen_ocl_function.hxx > index 003be91..456ab58 100644 > --- a/backend/src/llvm/llvm_gen_ocl_function.hxx > +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx > @@ -219,6 +219,14 @@ > DECL_LLVM_GEN_FUNCTION(SUB_GROUP_SCAN_INCLUSIVE_MIN, > __gen_ocl_sub_group_scan_in > > DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_READ_MEM, > __gen_ocl_sub_group_block_read_mem) > DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_WRITE_MEM, > __gen_ocl_sub_group_block_write_mem) > +DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_READ_IMAGE, > __gen_ocl_sub_group_block_read_image) > +DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_READ_IMAGE2, > __gen_ocl_sub_group_block_read_image2) > +DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_READ_IMAGE4, > __gen_ocl_sub_group_block_read_image4) > +DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_READ_IMAGE8, > __gen_ocl_sub_group_block_read_image8) > +DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_WRITE_IMAGE, > __gen_ocl_sub_group_block_write_image) > +DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_WRITE_IMAGE2, > __gen_ocl_sub_group_block_write_image2) > +DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_WRITE_IMAGE4, > __gen_ocl_sub_group_block_write_image4) > +DECL_LLVM_GEN_FUNCTION(SUB_GROUP_BLOCK_WRITE_IMAGE8, > __gen_ocl_sub_group_block_write_image8) > > // common function > DECL_LLVM_GEN_FUNCTION(LRP, __gen_ocl_lrp) > diff --git a/backend/src/llvm/llvm_scalarize.cpp > b/backend/src/llvm/llvm_scalarize.cpp > index 53fd320..e60bf4b 100644 > --- a/backend/src/llvm/llvm_scalarize.cpp > +++ b/backend/src/llvm/llvm_scalarize.cpp > @@ -682,7 +682,21 @@ namespace gbe { > *CI = InsertToVector(call, *CI); > break; > } > + case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE: > + case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE2: > + case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE4: > + case GEN_OCL_SUB_GROUP_BLOCK_WRITE_IMAGE8: > + { > + ++CI; > + ++CI; > + if ((*CI)->getType()->isVectorTy()) > + *CI = InsertToVector(call, *CI); > + break; > + } > case GEN_OCL_VME: > + case GEN_OCL_SUB_GROUP_BLOCK_READ_IMAGE2: > + case GEN_OCL_SUB_GROUP_BLOCK_READ_IMAGE4: > + case GEN_OCL_SUB_GROUP_BLOCK_READ_IMAGE8: > setAppendPoint(call); > extractFromVector(call); > break; > -- > 2.7.4 > > _______________________________________________ > Beignet mailing list > [email protected] > https://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/beignet
