From: Pan Xiuli <[email protected]> Some of the size need covert from oword to reg and the name may be misleading. Change and make new function for these problems.
Signed-off-by: Pan Xiuli <[email protected]> --- backend/src/backend/gen8_encoder.cpp | 40 ++++++++---------- backend/src/backend/gen_context.cpp | 8 ++-- backend/src/backend/gen_encoder.cpp | 66 ++++++++++++++++++------------ backend/src/backend/gen_encoder.hpp | 15 ++++--- backend/src/backend/gen_insn_selection.cpp | 26 ++++++------ 5 files changed, 83 insertions(+), 72 deletions(-) diff --git a/backend/src/backend/gen8_encoder.cpp b/backend/src/backend/gen8_encoder.cpp index 8f73346..39dcfd3 100644 --- a/backend/src/backend/gen8_encoder.cpp +++ b/backend/src/backend/gen8_encoder.cpp @@ -840,20 +840,15 @@ namespace gbe gen8_insn->bits3.gen8_block_rw_a64.header_present = 1; } - void Gen8Encoder::OBREADA64(GenRegister dst, GenRegister header, uint32_t bti, uint32_t size) { - GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); + void Gen8Encoder::OBREADA64(GenRegister dst, GenRegister header, uint32_t bti, uint32_t ow_size) { + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); const uint32_t msg_length = 1; - uint32_t rsize = size / 2; - uint32_t msgsize = size; - // When size is 1 OWord, which means half a reg, we need to know which half to use - if (size == 1) { - if (dst.subnr == 0) - msgsize = 0; - else - msgsize = 1; - } - rsize = rsize == 0 ? 1 : rsize; - const uint32_t response_length = rsize; // Size is in regs + uint32_t sizeinreg = ow_size / 2; + // half reg should also have size 1 + sizeinreg = sizeinreg == 0 ? 1 : sizeinreg; + const uint32_t block_size = getOBlockSize(ow_size, dst.subnr == 0); + const uint32_t response_length = sizeinreg; // Size is in reg + this->setHeader(insn); this->setDst(insn, GenRegister::uw16grf(dst.nr, 0)); this->setSrc0(insn, GenRegister::ud8grf(header.nr, 0)); @@ -861,21 +856,22 @@ namespace gbe setOBlockRWA64(this, insn, bti, - msgsize, + block_size, GEN8_P1_BLOCK_READ_A64, msg_length, response_length); } - void Gen8Encoder::OBWRITEA64(GenRegister header, uint32_t bti, uint32_t size) { - GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); - uint32_t rsize = size / 2; - rsize = rsize == 0 ? 1 : rsize; - const uint32_t msg_length = 1 + rsize; // Size is in owords + void Gen8Encoder::OBWRITEA64(GenRegister header, uint32_t bti, uint32_t ow_size) { + GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); + uint32_t sizeinreg = ow_size / 2; + // half reg should also have size 1 + sizeinreg = sizeinreg == 0 ? 1 : sizeinreg; + const uint32_t msg_length = 1 + sizeinreg; // Size is in reg and header const uint32_t response_length = 0; - uint32_t msgsize = size; - msgsize = msgsize == 1 ? 0 : msgsize; + const uint32_t block_size = getOBlockSize(ow_size); + this->setHeader(insn); this->setSrc0(insn, GenRegister::ud8grf(header.nr, 0)); this->setSrc1(insn, GenRegister::immud(0)); @@ -883,7 +879,7 @@ namespace gbe setOBlockRWA64(this, insn, bti, - msgsize, + block_size, GEN8_P1_BLOCK_WRITE_A64, msg_length, response_length); diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 7ab5770..791e607 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -3576,14 +3576,14 @@ namespace gbe void GenContext::emitMBReadInstruction(const SelectionInstruction &insn) { const GenRegister dst = ra->genReg(insn.dst(0)); const GenRegister header = ra->genReg(insn.src(0)); - const size_t rsize = insn.extra.elem; - p->MBREAD(dst, header, insn.getbti(), rsize); + const size_t response_size = insn.extra.elem; + p->MBREAD(dst, header, insn.getbti(), response_size); } void GenContext::emitMBWriteInstruction(const SelectionInstruction &insn) { const GenRegister header = ra->genReg(insn.dst(0)); - const size_t msgsize = insn.extra.elem; - p->MBWRITE(header, insn.getbti(), msgsize); + const size_t data_size = insn.extra.elem; + p->MBWRITE(header, insn.getbti(), data_size); } BVAR(OCL_OUTPUT_REG_ALLOC, false); diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp index 49d93e8..e678136 100644 --- a/backend/src/backend/gen_encoder.cpp +++ b/backend/src/backend/gen_encoder.cpp @@ -260,20 +260,36 @@ namespace gbe static void setOBlockRW(GenEncoder *p, GenNativeInstruction *insn, uint32_t bti, - uint32_t size, + uint32_t block_size, uint32_t msg_type, uint32_t msg_length, uint32_t response_length) { const GenMessageTarget sfid = GEN_SFID_DATAPORT_DATA; p->setMessageDescriptor(insn, sfid, msg_length, response_length); - assert(size == 0 || size == 1 || size == 2 || size == 4 || size == 8); insn->bits3.gen7_oblock_rw.msg_type = msg_type; insn->bits3.gen7_oblock_rw.bti = bti; - insn->bits3.gen7_oblock_rw.block_size = size <= 2 ? size : (size == 4 ? 3 : 4); + insn->bits3.gen7_oblock_rw.block_size = block_size; insn->bits3.gen7_oblock_rw.header_present = 1; } + uint32_t GenEncoder::getOBlockSize(uint32_t oword_size, bool low_half) + { + /* 000: 1 OWord, read into or written from the low 128 bits of the destination register. + * 001: 1 OWord, read into or written from the high 128 bits of the destination register. + * 010: 2 OWords + * 011: 4 OWords + * 100: 8 OWords */ + switch(oword_size) + { + case 1: return low_half ? 0 : 1; + case 2: return 2; + case 4: return 3; + case 8: return 4; + default: NOT_SUPPORTED; + } + } + static void setMBlockRW(GenEncoder *p, GenNativeInstruction *insn, uint32_t bti, @@ -1312,20 +1328,15 @@ namespace gbe setScratchMessage(this, insn, offset, block_size, channel_mode, GEN_SCRATCH_READ, 1, dst_num); } - void GenEncoder::OBREAD(GenRegister dst, GenRegister header, uint32_t bti, uint32_t size) { + void GenEncoder::OBREAD(GenRegister dst, GenRegister header, uint32_t bti, uint32_t ow_size) { GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); const uint32_t msg_length = 1; - uint32_t rsize = size / 2; - uint32_t msgsize = size; - // When size is 1 OWord, which means half a reg, we need to know which half to use - if (size == 1) { - if (dst.subnr == 0) - msgsize = 0; - else - msgsize = 1; - } - rsize = rsize == 0 ? 1 : rsize; - const uint32_t response_length = rsize; // Size is in regs + uint32_t sizeinreg = ow_size / 2; + // half reg should also have size 1 + sizeinreg = sizeinreg == 0 ? 1 : sizeinreg; + const uint32_t block_size = getOBlockSize(ow_size, dst.subnr == 0); + const uint32_t response_length = sizeinreg; // Size is in reg + this->setHeader(insn); this->setDst(insn, GenRegister::uw16grf(dst.nr, 0)); this->setSrc0(insn, GenRegister::ud8grf(header.nr, 0)); @@ -1333,20 +1344,21 @@ namespace gbe setOBlockRW(this, insn, bti, - msgsize, + block_size, GEN7_UNALIGNED_OBLOCK_READ, msg_length, response_length); } - void GenEncoder::OBWRITE(GenRegister header, uint32_t bti, uint32_t size) { + void GenEncoder::OBWRITE(GenRegister header, uint32_t bti, uint32_t ow_size) { GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); - uint32_t rsize = size / 2; - rsize = rsize == 0 ? 1 : rsize; - const uint32_t msg_length = 1 + rsize; // Size is in owords + uint32_t sizeinreg = ow_size / 2; + // half reg should also have size 1 + sizeinreg = sizeinreg == 0 ? 1 : sizeinreg; + const uint32_t msg_length = 1 + sizeinreg; // Size is in reg and header const uint32_t response_length = 0; - uint32_t msgsize = size; - msgsize = msgsize == 1 ? 0 : msgsize; + const uint32_t block_size = getOBlockSize(ow_size); + this->setHeader(insn); this->setSrc0(insn, GenRegister::ud8grf(header.nr, 0)); this->setSrc1(insn, GenRegister::immud(0)); @@ -1354,16 +1366,16 @@ namespace gbe setOBlockRW(this, insn, bti, - msgsize, + block_size, GEN7_OBLOCK_WRITE, msg_length, response_length); } - void GenEncoder::MBREAD(GenRegister dst, GenRegister header, uint32_t bti, uint32_t size) { + void GenEncoder::MBREAD(GenRegister dst, GenRegister header, uint32_t bti, uint32_t response_size) { GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); const uint32_t msg_length = 1; - const uint32_t response_length = size; // Size of registers + const uint32_t response_length = response_size; // Size of registers this->setHeader(insn); this->setDst(insn, GenRegister::ud8grf(dst.nr, 0)); this->setSrc0(insn, GenRegister::ud8grf(header.nr, 0)); @@ -1376,9 +1388,9 @@ namespace gbe response_length); } - void GenEncoder::MBWRITE(GenRegister header, uint32_t bti, uint32_t size) { + void GenEncoder::MBWRITE(GenRegister header, uint32_t bti, uint32_t data_size) { GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); - const uint32_t msg_length = 1 + size; + const uint32_t msg_length = 1 + data_size; const uint32_t response_length = 0; // Size of registers this->setHeader(insn); this->setDst(insn, GenRegister::retype(GenRegister::null(), GEN_TYPE_UW)); diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp index e5eb2e2..c916064 100644 --- a/backend/src/backend/gen_encoder.hpp +++ b/backend/src/backend/gen_encoder.hpp @@ -286,18 +286,21 @@ namespace gbe virtual bool canHandleLong(uint32_t opcode, GenRegister dst, GenRegister src0, GenRegister src1 = GenRegister::null()); virtual void handleDouble(GenEncoder *p, uint32_t opcode, GenRegister dst, GenRegister src0, GenRegister src1 = GenRegister::null()); + + /*! OBlock helper function */ + uint32_t getOBlockSize(uint32_t oword_size, bool low_half = false); /*! OBlock read */ - void OBREAD(GenRegister dst, GenRegister header, uint32_t bti, uint32_t elemSize); + void OBREAD(GenRegister dst, GenRegister header, uint32_t bti, uint32_t ow_size); /*! OBlock write */ - void OBWRITE(GenRegister header, uint32_t bti, uint32_t elemSize); + void OBWRITE(GenRegister header, uint32_t bti, uint32_t ow_size); /*! MBlock read */ - virtual void MBREAD(GenRegister dst, GenRegister header, uint32_t bti, uint32_t elemSize); + virtual void MBREAD(GenRegister dst, GenRegister header, uint32_t bti, uint32_t response_size); /*! MBlock write */ - virtual void MBWRITE(GenRegister header, uint32_t bti, uint32_t elemSize); + virtual void MBWRITE(GenRegister header, uint32_t bti, uint32_t data_size); /*! A64 OBlock read */ - virtual void OBREADA64(GenRegister dst, GenRegister header, uint32_t bti, uint32_t elemSize); + virtual void OBREADA64(GenRegister dst, GenRegister header, uint32_t bti, uint32_t ow_size); /*! A64 OBlock write */ - virtual void OBWRITEA64(GenRegister header, uint32_t bti, uint32_t elemSize); + virtual void OBWRITEA64(GenRegister header, uint32_t bti, uint32_t ow_size); GBE_CLASS(GenEncoder); //!< Use custom allocators virtual void alu3(uint32_t opcode, GenRegister dst, diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 31effd1..e6b6210 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -763,9 +763,9 @@ namespace gbe /*! Oblock write */ void OBWRITE(GenRegister header, GenRegister* values, uint32_t tmp_size, uint32_t bti, uint32_t ow_size); /*! Media block read */ - void MBREAD(GenRegister* dsts, uint32_t tmp_size, GenRegister header, uint32_t bti, uint32_t block_size); + void MBREAD(GenRegister* dsts, uint32_t tmp_size, GenRegister header, uint32_t bti, uint32_t response_size); /*! Media block write */ - void MBWRITE(GenRegister header, GenRegister* values, uint32_t tmp_size, uint32_t bti, uint32_t block_size); + void MBWRITE(GenRegister header, GenRegister* values, uint32_t tmp_size, uint32_t bti, uint32_t data_size); /* common functions for both binary instruction and sel_cmp and compare instruction. It will handle the IMM or normal register assignment, and will try to avoid LOADI @@ -2310,12 +2310,12 @@ namespace gbe uint32_t tmp_size, GenRegister header, uint32_t bti, - uint32_t block_size) { + uint32_t response_size) { SelectionInstruction *insn = this->appendInsn(SEL_OP_MBREAD, tmp_size, 1); insn->src(0) = header; insn->setbti(bti); - insn->extra.elem = block_size; // vector size + insn->extra.elem = response_size; // send response length for (uint32_t i = 0; i < tmp_size; ++i) { insn->dst(i) = dsts[i]; @@ -2331,14 +2331,14 @@ namespace gbe GenRegister* values, uint32_t tmp_size, uint32_t bti, - uint32_t block_size) { + uint32_t data_size) { SelectionInstruction *insn = this->appendInsn(SEL_OP_MBWRITE, 0, 1 + tmp_size); SelectionVector *vector = this->appendVector(); insn->src(0) = header; for (uint32_t i = 0; i < tmp_size; ++i) insn->src(1 + i) = values[i]; insn->setbti(bti); - insn->extra.elem = block_size; // msg size + insn->extra.elem = data_size; // msg data part size // We need to put the header and the data together vector->regNum = 1 + tmp_size; @@ -7752,9 +7752,9 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp uint32_t simdWidth = sel.curr.execWidth; const Type type = insn.getType(); const uint32_t typeSize = type == TYPE_U32 ? 4 : 2; - // ushort in simd8 will have half reg, but response lenght is still 1 - uint32_t rsize = simdWidth * vec_size * typeSize / 32; - rsize = rsize ? rsize : 1; + uint32_t response_size = simdWidth * vec_size * typeSize / 32; + // ushort in simd8 will have half reg thus 0.5 reg size, but response lenght is still 1 + response_size = response_size ? response_size : 1; uint32_t block_width = typeSize * simdWidth; uint32_t blocksize = (block_width - 1) % 32 | (vec_size - 1) << 16; @@ -7795,7 +7795,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp sel.curr.predicate = GEN_PREDICATE_NONE; sel.curr.noMask = 1; // Now read the data - sel.MBREAD(&valuesVec[0], vec_size, header, insn.getImageIndex(), rsize); + sel.MBREAD(&valuesVec[0], vec_size, header, insn.getImageIndex(), response_size); sel.pop(); } else if (simdWidth * typeSize == 64) { sel.push(); @@ -7840,8 +7840,8 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp const RegisterFamily family = getFamily(type); const uint32_t typeSize = type == TYPE_U32 ? 4 : 2; // ushort in simd8 will have half reg, but response lenght is still 1 - uint32_t msgsize = simdWidth * vec_size * typeSize / 32; - msgsize = msgsize ? msgsize : 1; + uint32_t data_size = simdWidth * vec_size * typeSize / 32; + data_size = data_size? data_size : 1; uint32_t block_width = typeSize * simdWidth; uint32_t blocksize = (block_width - 1) % 32 | (vec_size - 1) << 16; @@ -7887,7 +7887,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp sel.curr.predicate = GEN_PREDICATE_NONE; sel.curr.noMask = 1; // Now read the data - sel.MBWRITE(header, &tmpVec[0], vec_size, insn.getImageIndex(), msgsize); + sel.MBWRITE(header, &tmpVec[0], vec_size, insn.getImageIndex(), data_size); sel.pop(); } else if (simdWidth * typeSize == 64) { sel.push(); -- 2.7.4 _______________________________________________ Beignet mailing list [email protected] https://lists.freedesktop.org/mailman/listinfo/beignet
