Orignally, the IR instruction only has 64 bits thus we don't have enough room for coordinate number. Now the IR instructions has been extent to 128 bits, we don't need to use the invalid register to implicitly pass the coordinate number any more.
Signed-off-by: Zhigang Gong <[email protected]> --- backend/src/backend/gen_insn_selection.cpp | 40 +++++++++++------------------- backend/src/ir/instruction.cpp | 20 ++++++++------- backend/src/ir/instruction.hpp | 4 +-- backend/src/llvm/llvm_gen_backend.cpp | 31 ++++++----------------- 4 files changed, 36 insertions(+), 59 deletions(-) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 81fa5f8..106ea82 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -4318,10 +4318,6 @@ namespace gbe using namespace ir; GBE_ASSERT(insn.getSrcType() != TYPE_FLOAT); uint32_t srcNum = insn.getSrcNum(); - if (insn.getSrc(1) == ir::ocl::invalid) //not 3D - srcNum = 1; - else if (insn.getSrc(2) == ir::ocl::invalid) - srcNum = 2; msgPayloads[0] = sel.selReg(insn.getSrc(0), insn.getSrcType()); msgPayloads[1] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); sel.MOV(msgPayloads[1], GenRegister::immud(0)); @@ -4364,12 +4360,6 @@ namespace gbe for (valueID = 0; valueID < insn.getDstNum(); ++valueID) dst[valueID] = sel.selReg(insn.getDst(valueID), insn.getDstType()); - GBE_ASSERT(srcNum == 3); - if (insn.getSrc(1) == ir::ocl::invalid) //not 3D - srcNum = 1; - else if (insn.getSrc(2) == ir::ocl::invalid) - srcNum = 2; - if (insn.getSamplerOffset() != 0) { if(sel.getLdMsgOrder() < LD_MSG_ORDER_SKL) this->emitLd_ivb(sel, insn, msgPayloads, msgLen); @@ -4405,7 +4395,7 @@ namespace gbe const uint32_t simdWidth = sel.ctx.getSimdWidth(); GenRegister msgs[9]; // (header + U + V + R + LOD + 4) const uint32_t msgNum = (8 / (simdWidth / 8)) + 1; - const uint32_t coordNum = 3; + const uint32_t dim = insn.getSrcNum() - 4; if (simdWidth == 16) { for(uint32_t i = 0; i < msgNum; i++) @@ -4413,18 +4403,18 @@ namespace gbe } else { uint32_t valueID = 0; msgs[0] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); - for(uint32_t msgID = 1; msgID < 1 + coordNum; msgID++, valueID++) + for(uint32_t msgID = 1; msgID < 1 + dim; msgID++, valueID++) msgs[msgID] = sel.selReg(insn.getSrc(msgID - 1), insn.getCoordType()); - // fake u. - if (insn.getSrc(1) == ir::ocl::invalid) + // fake v. + if (dim < 2) msgs[2] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); // fake w. - if (insn.getSrc(2) == ir::ocl::invalid) + if (dim < 3) msgs[3] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); // LOD. msgs[4] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); - for(uint32_t msgID = 5; valueID < insn.getSrcNum(); msgID++, valueID++) + for(uint32_t msgID = dim + 2; valueID < insn.getSrcNum(); msgID++, valueID++) msgs[msgID] = sel.selReg(insn.getSrc(valueID), insn.getSrcType()); } @@ -4441,14 +4431,14 @@ namespace gbe sel.curr.execWidth = 8; // Set zero LOD. if (simdWidth == 8) - sel.MOV(msgs[4], GenRegister::immud(0)); + sel.MOV(msgs[dim + 1], GenRegister::immud(0)); else sel.MOV(GenRegister::Qn(msgs[2], 0), GenRegister::immud(0)); sel.pop(); uint32_t bti = insn.getImageIndex(); if (simdWidth == 8) - sel.TYPED_WRITE(msgs, msgNum, bti, insn.getSrc(2) != ir::ocl::invalid); + sel.TYPED_WRITE(msgs, msgNum, bti, dim == 3); else { sel.push(); sel.curr.execWidth = 8; @@ -4464,16 +4454,16 @@ namespace gbe sel.curr.quarterControl = (quarter == 0) ? GEN_COMPRESSION_Q1 : GEN_COMPRESSION_Q2; // Set U,V,W QUARTER_MOV0(msgs, 1, sel.selReg(insn.getSrc(0), insn.getCoordType())); - if (insn.getSrc(1) != ir::ocl::invalid) //not 2D + if (dim > 1) QUARTER_MOV0(msgs, 2, sel.selReg(insn.getSrc(1), insn.getCoordType())); - if (insn.getSrc(2) != ir::ocl::invalid) //not 3D + if (dim > 2) QUARTER_MOV0(msgs, 3, sel.selReg(insn.getSrc(2), insn.getCoordType())); // Set R, G, B, A - QUARTER_MOV1(msgs, 5, sel.selReg(insn.getSrc(3), insn.getSrcType())); - QUARTER_MOV1(msgs, 6, sel.selReg(insn.getSrc(4), insn.getSrcType())); - QUARTER_MOV1(msgs, 7, sel.selReg(insn.getSrc(5), insn.getSrcType())); - QUARTER_MOV1(msgs, 8, sel.selReg(insn.getSrc(6), insn.getSrcType())); - sel.TYPED_WRITE(msgs, msgNum, bti, insn.getSrc(2) != ir::ocl::invalid); + QUARTER_MOV1(msgs, 5, sel.selReg(insn.getSrc(dim), insn.getSrcType())); + QUARTER_MOV1(msgs, 6, sel.selReg(insn.getSrc(dim + 1), insn.getSrcType())); + QUARTER_MOV1(msgs, 7, sel.selReg(insn.getSrc(dim + 2), insn.getSrcType())); + QUARTER_MOV1(msgs, 8, sel.selReg(insn.getSrc(dim + 3), insn.getSrcType())); + sel.TYPED_WRITE(msgs, msgNum, bti, dim == 3); #undef QUARTER_MOV0 #undef QUARTER_MOV1 } diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index 795ff07..928e365 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -506,10 +506,11 @@ namespace ir { public TupleDstPolicy<SampleInstruction> { public: - SampleInstruction(uint8_t imageIdx, Tuple dstTuple, Tuple srcTuple, bool dstIsFloat, bool srcIsFloat, uint8_t sampler, uint8_t samplerOffset) { + SampleInstruction(uint8_t imageIdx, Tuple dstTuple, Tuple srcTuple, uint8_t srcNum, bool dstIsFloat, bool srcIsFloat, uint8_t sampler, uint8_t samplerOffset) { this->opcode = OP_SAMPLE; this->dst = dstTuple; this->src = srcTuple; + this->srcNum = srcNum; this->dstIsFloat = dstIsFloat; this->srcIsFloat = srcIsFloat; this->samplerIdx = sampler; @@ -544,7 +545,7 @@ namespace ir { uint8_t samplerIdx:4; uint8_t samplerOffset:2; uint8_t imageIdx; - static const uint32_t srcNum = 3; + uint8_t srcNum; static const uint32_t dstNum = 4; }; @@ -555,9 +556,10 @@ namespace ir { { public: - INLINE TypedWriteInstruction(uint8_t imageIdx, Tuple srcTuple, Type srcType, Type coordType) { + INLINE TypedWriteInstruction(uint8_t imageIdx, Tuple srcTuple, uint8_t srcNum, Type srcType, Type coordType) { this->opcode = OP_TYPED_WRITE; this->src = srcTuple; + this->srcNum = srcNum; this->coordType = coordType; this->srcType = srcType; this->imageIdx = imageIdx; @@ -580,12 +582,12 @@ namespace ir { uint8_t srcType; uint8_t coordType; uint8_t imageIdx; + // bti, u, [v], [w], 4 data elements + uint8_t srcNum; INLINE uint8_t getImageIndex(void) const { return this->imageIdx; } INLINE Type getSrcType(void) const { return (Type)this->srcType; } INLINE Type getCoordType(void) const { return (Type)this->coordType; } - // bti, u, v, w, 4 data elements - static const uint32_t srcNum = 7; Register dst[0]; //!< No dest register }; @@ -1774,12 +1776,12 @@ DECL_MEM_FN(GetImageInfoInstruction, uint8_t, getImageIndex(void), getImageIndex } // SAMPLE - Instruction SAMPLE(uint8_t imageIndex, Tuple dst, Tuple src, bool dstIsFloat, bool srcIsFloat, uint8_t sampler, uint8_t samplerOffset) { - return internal::SampleInstruction(imageIndex, dst, src, dstIsFloat, srcIsFloat, sampler, samplerOffset).convert(); + Instruction SAMPLE(uint8_t imageIndex, Tuple dst, Tuple src, uint8_t srcNum, bool dstIsFloat, bool srcIsFloat, uint8_t sampler, uint8_t samplerOffset) { + return internal::SampleInstruction(imageIndex, dst, src, srcNum, dstIsFloat, srcIsFloat, sampler, samplerOffset).convert(); } - Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, Type srcType, Type coordType) { - return internal::TypedWriteInstruction(imageIndex, src, srcType, coordType).convert(); + Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, uint8_t srcNum, Type srcType, Type coordType) { + return internal::TypedWriteInstruction(imageIndex, src, srcNum, srcType, coordType).convert(); } Instruction GET_IMAGE_INFO(int infoType, Register dst, uint8_t imageIndex, Register infoReg) { diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index 484e7d1..6963111 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -706,9 +706,9 @@ namespace ir { Instruction READ_ARF(Type type, Register dst, ARFRegister arf); Instruction REGION(Register dst, Register src, uint32_t offset); /*! typed write */ - Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, Type srcType, Type coordType); + Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, uint8_t srcNum, Type srcType, Type coordType); /*! sample textures */ - Instruction SAMPLE(uint8_t imageIndex, Tuple dst, Tuple src, bool dstIsFloat, bool srcIsFloat, uint8_t sampler, uint8_t samplerOffset); + Instruction SAMPLE(uint8_t imageIndex, Tuple dst, Tuple src, uint8_t srcNum, bool dstIsFloat, bool srcIsFloat, uint8_t sampler, uint8_t samplerOffset); /*! get image information , such as width/height/depth/... */ Instruction GET_IMAGE_INFO(int infoType, Register dst, uint8_t imageIndex, Register infoReg); /*! label labelIndex */ diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 04f3092..3e22526 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -3460,16 +3460,8 @@ error: GBE_ASSERT(isFloatCoord == requiredFloatCoord); vector<ir::Register> dstTupleData, srcTupleData; - for (uint32_t elemID = 0; elemID < 3; elemID++) { - ir::Register reg; - - if (elemID < imageDim) - reg = this->getRegister(coordVal, elemID); - else - reg = ir::ocl::invalid; - - srcTupleData.push_back(reg); - } + for (uint32_t elemID = 0; elemID < imageDim; elemID++) + srcTupleData.push_back(this->getRegister(coordVal, elemID)); uint32_t elemNum; ir::Type dstType = getVectorInfo(ctx, &I, elemNum); @@ -3480,9 +3472,9 @@ error: dstTupleData.push_back(reg); } const ir::Tuple dstTuple = ctx.arrayTuple(&dstTupleData[0], elemNum); - const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 3); + const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], imageDim); - ctx.SAMPLE(imageID, dstTuple, srcTuple, dstType == ir::TYPE_FLOAT, + ctx.SAMPLE(imageID, dstTuple, srcTuple, imageDim, dstType == ir::TYPE_FLOAT, requiredFloatCoord, sampler, samplerOffset); break; } @@ -3501,16 +3493,9 @@ error: vector<ir::Register> srcTupleData; GBE_ASSERT(imageDim >= 1 && imageDim <= 3); - for (uint32_t elemID = 0; elemID < 3; elemID++) { - ir::Register reg; + for (uint32_t elemID = 0; elemID < imageDim; elemID++) + srcTupleData.push_back(this->getRegister(*AI, elemID)); - if (elemID < imageDim) - reg = this->getRegister(*AI, elemID); - else - reg = ir::ocl::invalid; - - srcTupleData.push_back(reg); - } ++AI; GBE_ASSERT(AI != AE); uint32_t elemNum; ir::Type srcType = getVectorInfo(ctx, *AI, elemNum); @@ -3520,8 +3505,8 @@ error: const ir::Register reg = this->getRegister(*AI, elemID); srcTupleData.push_back(reg); } - const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 7); - ctx.TYPED_WRITE(imageID, srcTuple, srcType, ir::TYPE_U32); + const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], imageDim + 4); + ctx.TYPED_WRITE(imageID, srcTuple, imageDim + 4, srcType, ir::TYPE_U32); break; } case GEN_OCL_MUL_HI_INT: -- 1.8.3.2 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
