The relative big problem in this patch is that you already defined a ir::ocl::invalid register in the profile.hpp. But you also use another ir::InvalidRegister in many places.
And you hard coded the InvalidRegister to register(27). If any other consequent patch change the register number for ir::ocl::invalid, then it will cause inconsistency bug. No need to submit new patch, I will fix it. On Thu, May 15, 2014 at 04:43:30PM +0800, [email protected] wrote: > From: Junyan He <[email protected]> > > 1. Delete the is3D member in instruction class. Because we need more > than 1 bit to represent 1D 2D and 3D. We now add an invalid register > in ir profile, and comparing the coords to it to judge the dimension. > 2. Rename all the xxx_image to xxx_image2D to make its meaning clear. > 3. Update the according Sampler and Typed_Write instruction in selection > and Gen IR generation. > > Signed-off-by: Junyan He <[email protected]> > --- > backend/src/backend/gen_insn_selection.cpp | 27 +++-- > backend/src/ir/instruction.cpp | 21 ++-- > backend/src/ir/instruction.hpp | 6 +- > backend/src/ir/profile.cpp | 1 + > backend/src/ir/profile.hpp | 3 +- > backend/src/ir/register.cpp | 2 + > backend/src/ir/register.hpp | 3 + > backend/src/llvm/llvm_gen_backend.cpp | 162 > ++++++++++++++++++++--------- > backend/src/llvm/llvm_gen_ocl_function.hxx | 28 +++-- > backend/src/llvm/llvm_scalarize.cpp | 31 ++++-- > backend/src/ocl_stdlib.tmpl.h | 82 +++++++++++++-- > 11 files changed, 257 insertions(+), 109 deletions(-) > > diff --git a/backend/src/backend/gen_insn_selection.cpp > b/backend/src/backend/gen_insn_selection.cpp > index 88ec408..a7a7982 100644 > --- a/backend/src/backend/gen_insn_selection.cpp > +++ b/backend/src/backend/gen_insn_selection.cpp > @@ -3265,15 +3265,19 @@ namespace gbe > for (valueID = 0; valueID < insn.getDstNum(); ++valueID) > dst[valueID] = sel.selReg(insn.getDst(valueID), insn.getDstType()); > > - if (!insn.is3D()) > - srcNum--; > + GBE_ASSERT(srcNum == 3); > + if (insn.getSrc(1) == ir::InvalidRegister) //not 3D > + srcNum = 1; > + else if (insn.getSrc(2) == ir::InvalidRegister) > + srcNum = 2; > > if (insn.getSamplerOffset() != 0) { > - // U, lod, V, [W] > + // U, lod, [V], [W] > GBE_ASSERT(insn.getSrcType() != TYPE_FLOAT); > msgPayloads[0] = sel.selReg(insn.getSrc(0), insn.getSrcType()); > msgPayloads[1] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); > - msgPayloads[2] = sel.selReg(insn.getSrc(1), insn.getSrcType()); > + if (srcNum > 1) > + msgPayloads[2] = sel.selReg(insn.getSrc(1), insn.getSrcType()); > if (srcNum > 2) > msgPayloads[3] = sel.selReg(insn.getSrc(2), insn.getSrcType()); > // Clear the lod to zero. > @@ -3314,8 +3318,12 @@ namespace gbe > msgs[0] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); > for(uint32_t msgID = 1; msgID < 1 + coordNum; msgID++, valueID++) > msgs[msgID] = sel.selReg(insn.getSrc(msgID - 1), > insn.getCoordType()); > + > + // fake u. > + if (insn.getSrc(1) == ir::InvalidRegister) > + msgs[2] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); > // fake w. > - if (!insn.is3D()) > + if (insn.getSrc(2) == ir::InvalidRegister) > msgs[3] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); > // LOD. > msgs[4] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32); > @@ -3343,7 +3351,7 @@ namespace gbe > > uint32_t bti = insn.getImageIndex(); > if (simdWidth == 8) > - sel.TYPED_WRITE(msgs, msgNum, bti, insn.is3D()); > + sel.TYPED_WRITE(msgs, msgNum, bti, insn.getSrc(2) != > ir::InvalidRegister); > else { > sel.push(); > sel.curr.execWidth = 8; > @@ -3359,15 +3367,16 @@ namespace gbe > sel.curr.quarterControl = (quarter == 0) ? GEN_COMPRESSION_Q1 : > GEN_COMPRESSION_Q2; > // Set U,V,W > QUARTER_MOV0(msgs, 1, sel.selReg(insn.getSrc(0), > insn.getCoordType())); > - QUARTER_MOV0(msgs, 2, sel.selReg(insn.getSrc(1), > insn.getCoordType())); > - if (insn.is3D()) > + if (insn.getSrc(1) != ir::InvalidRegister) //not 2D > + QUARTER_MOV0(msgs, 2, sel.selReg(insn.getSrc(1), > insn.getCoordType())); > + if (insn.getSrc(2) != ir::InvalidRegister) //not 3D > QUARTER_MOV0(msgs, 3, sel.selReg(insn.getSrc(2), > insn.getCoordType())); > // Set R, G, B, A > QUARTER_MOV1(msgs, 5, sel.selReg(insn.getSrc(3), > insn.getSrcType())); > QUARTER_MOV1(msgs, 6, sel.selReg(insn.getSrc(4), > insn.getSrcType())); > QUARTER_MOV1(msgs, 7, sel.selReg(insn.getSrc(5), > insn.getSrcType())); > QUARTER_MOV1(msgs, 8, sel.selReg(insn.getSrc(6), > insn.getSrcType())); > - sel.TYPED_WRITE(msgs, msgNum, bti, insn.is3D()); > + sel.TYPED_WRITE(msgs, msgNum, bti, insn.getSrc(2) != > ir::InvalidRegister); > #undef QUARTER_MOV0 > #undef QUARTER_MOV1 > } > diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp > index 2d2b34b..b351324 100644 > --- a/backend/src/ir/instruction.cpp > +++ b/backend/src/ir/instruction.cpp > @@ -491,7 +491,7 @@ namespace ir { > public TupleDstPolicy<SampleInstruction> > { > public: > - SampleInstruction(uint8_t imageIdx, Tuple dstTuple, Tuple srcTuple, > bool dstIsFloat, bool srcIsFloat, uint8_t sampler, uint8_t samplerOffset, > bool is3D) { > + SampleInstruction(uint8_t imageIdx, Tuple dstTuple, Tuple srcTuple, > bool dstIsFloat, bool srcIsFloat, uint8_t sampler, uint8_t samplerOffset) { > this->opcode = OP_SAMPLE; > this->dst = dstTuple; > this->src = srcTuple; > @@ -500,7 +500,6 @@ namespace ir { > this->samplerIdx = sampler; > this->imageIdx = imageIdx; > this->samplerOffset = samplerOffset; > - this->is3DRead = is3D; > } > INLINE bool wellFormed(const Function &fn, std::string &why) const; > INLINE void out(std::ostream &out, const Function &fn) const { > @@ -525,12 +524,10 @@ namespace ir { > INLINE Type getDstType(void) const { return this->dstIsFloat ? > TYPE_FLOAT : TYPE_U32; } > INLINE const uint8_t getSamplerIndex(void) const { return > this->samplerIdx; } > INLINE const uint8_t getSamplerOffset(void) const { return > this->samplerOffset; } > - INLINE const bool is3D(void) const { return !!this->is3DRead; } > uint8_t srcIsFloat:1; > uint8_t dstIsFloat:1; > uint8_t samplerIdx:4; > uint8_t samplerOffset:1; > - uint8_t is3DRead:1; > uint8_t imageIdx; > static const uint32_t srcNum = 3; > static const uint32_t dstNum = 4; > @@ -543,13 +540,12 @@ namespace ir { > { > public: > > - INLINE TypedWriteInstruction(uint8_t imageIdx, Tuple srcTuple, Type > srcType, Type coordType, bool is3D) { > + INLINE TypedWriteInstruction(uint8_t imageIdx, Tuple srcTuple, Type > srcType, Type coordType) { > this->opcode = OP_TYPED_WRITE; > this->src = srcTuple; > this->coordType = coordType; > this->srcType = srcType; > this->imageIdx = imageIdx; > - this->is3DWrite = is3D; > } > INLINE bool wellFormed(const Function &fn, std::string &why) const; > INLINE void out(std::ostream &out, const Function &fn) const { > @@ -569,9 +565,6 @@ namespace ir { > uint8_t srcType; > uint8_t coordType; > uint8_t imageIdx; > - uint8_t is3DWrite; > - > - INLINE const bool is3D(void) const { return !!this->is3DWrite; } > > INLINE const uint8_t getImageIndex(void) const { return > this->imageIdx; } > INLINE Type getSrcType(void) const { return (Type)this->srcType; } > @@ -1452,13 +1445,11 @@ DECL_MEM_FN(SyncInstruction, uint32_t, > getParameters(void), getParameters()) > DECL_MEM_FN(SampleInstruction, Type, getSrcType(void), getSrcType()) > DECL_MEM_FN(SampleInstruction, Type, getDstType(void), getDstType()) > DECL_MEM_FN(SampleInstruction, const uint8_t, getSamplerIndex(void), > getSamplerIndex()) > -DECL_MEM_FN(SampleInstruction, const bool, is3D(void), is3D()) > DECL_MEM_FN(SampleInstruction, const uint8_t, getSamplerOffset(void), > getSamplerOffset()) > DECL_MEM_FN(SampleInstruction, const uint8_t, getImageIndex(void), > getImageIndex()) > DECL_MEM_FN(TypedWriteInstruction, Type, getSrcType(void), getSrcType()) > DECL_MEM_FN(TypedWriteInstruction, Type, getCoordType(void), getCoordType()) > DECL_MEM_FN(TypedWriteInstruction, const uint8_t, getImageIndex(void), > getImageIndex()) > -DECL_MEM_FN(TypedWriteInstruction, const bool, is3D(void), is3D()) > DECL_MEM_FN(GetImageInfoInstruction, uint32_t, getInfoType(void), > getInfoType()) > DECL_MEM_FN(GetImageInfoInstruction, const uint8_t, getImageIndex(void), > getImageIndex()) > > @@ -1638,12 +1629,12 @@ DECL_MEM_FN(GetImageInfoInstruction, const uint8_t, > getImageIndex(void), getImag > } > > // SAMPLE > - Instruction SAMPLE(uint8_t imageIndex, Tuple dst, Tuple src, bool > dstIsFloat, bool srcIsFloat, uint8_t sampler, uint8_t samplerOffset, bool > is3D) { > - return internal::SampleInstruction(imageIndex, dst, src, dstIsFloat, > srcIsFloat, sampler, samplerOffset, is3D).convert(); > + Instruction SAMPLE(uint8_t imageIndex, Tuple dst, Tuple src, bool > dstIsFloat, bool srcIsFloat, uint8_t sampler, uint8_t samplerOffset) { > + return internal::SampleInstruction(imageIndex, dst, src, dstIsFloat, > srcIsFloat, sampler, samplerOffset).convert(); > } > > - Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, Type srcType, Type > coordType, bool is3D) { > - return internal::TypedWriteInstruction(imageIndex, src, srcType, > coordType, is3D).convert(); > + Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, Type srcType, Type > coordType) { > + return internal::TypedWriteInstruction(imageIndex, src, srcType, > coordType).convert(); > } > > Instruction GET_IMAGE_INFO(int infoType, Register dst, uint8_t imageIndex, > Register infoReg) { > diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp > index 582e22d..a29a734 100644 > --- a/backend/src/ir/instruction.hpp > +++ b/backend/src/ir/instruction.hpp > @@ -351,7 +351,6 @@ namespace ir { > class TypedWriteInstruction : public Instruction { > public: > /*! Return true if the given instruction is an instance of this class */ > - const bool is3D() const; > static bool isClassOf(const Instruction &insn); > const uint8_t getImageIndex() const; > Type getSrcType(void) const; > @@ -361,7 +360,6 @@ namespace ir { > /*! Load texels from a texture */ > class SampleInstruction : public Instruction { > public: > - const bool is3D() const; > const uint8_t getImageIndex() const; > const uint8_t getSamplerIndex(void) const; > const uint8_t getSamplerOffset(void) const; > @@ -662,9 +660,9 @@ namespace ir { > /*! sync.params... (see Sync instruction) */ > Instruction SYNC(uint32_t parameters); > /*! typed write */ > - Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, Type srcType, Type > coordType, bool is3D); > + Instruction TYPED_WRITE(uint8_t imageIndex, Tuple src, Type srcType, Type > coordType); > /*! sample textures */ > - Instruction SAMPLE(uint8_t imageIndex, Tuple dst, Tuple src, bool > dstIsFloat, bool srcIsFloat, uint8_t sampler, uint8_t samplerOffset, bool > is3D); > + Instruction SAMPLE(uint8_t imageIndex, Tuple dst, Tuple src, bool > dstIsFloat, bool srcIsFloat, uint8_t sampler, uint8_t samplerOffset); > /*! get image information , such as width/height/depth/... */ > Instruction GET_IMAGE_INFO(int infoType, Register dst, uint8_t imageIndex, > Register infoReg); > /*! label labelIndex */ > diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp > index ddf53d1..4fbf87e 100644 > --- a/backend/src/ir/profile.cpp > +++ b/backend/src/ir/profile.cpp > @@ -81,6 +81,7 @@ namespace ir { > DECL_NEW_REG(FAMILY_DWORD, zero, 1); > DECL_NEW_REG(FAMILY_DWORD, one, 1); > DECL_NEW_REG(FAMILY_WORD, retVal, 1); > + DECL_NEW_REG(FAMILY_DWORD, invalid, 1); > } > #undef DECL_NEW_REG > > diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp > index 0652a25..81547d8 100644 > --- a/backend/src/ir/profile.hpp > +++ b/backend/src/ir/profile.hpp > @@ -68,7 +68,8 @@ namespace ir { > static const Register zero = Register(24); // scalar register holds > zero. > static const Register one = Register(25); // scalar register holds > one. > static const Register retVal = Register(26); // helper register to do > data flow analysis. > - static const uint32_t regNum = 27; // number of special > registers > + static const Register invalid = Register(27); // used for valid > comparation. > + static const uint32_t regNum = 28; // number of special > registers > extern const char *specialRegMean[]; // special register name. > } /* namespace ocl */ > > diff --git a/backend/src/ir/register.cpp b/backend/src/ir/register.cpp > index 471bfbd..aed3e46 100644 > --- a/backend/src/ir/register.cpp > +++ b/backend/src/ir/register.cpp > @@ -27,6 +27,8 @@ > namespace gbe { > namespace ir { > > + const Register InvalidRegister = Register(27); > + > std::ostream &operator<< (std::ostream &out, const RegisterData ®Data) > { > switch (regData.family) { > diff --git a/backend/src/ir/register.hpp b/backend/src/ir/register.hpp > index 340ebc8..ba15b2d 100644 > --- a/backend/src/ir/register.hpp > +++ b/backend/src/ir/register.hpp > @@ -161,6 +161,9 @@ namespace ir { > /*! Output the register file string in the given stream */ > std::ostream &operator<< (std::ostream &out, const RegisterFile &file); > > + /*! The valid register used for comparation. */ > + extern const Register InvalidRegister; > + > } /* namespace ir */ > } /* namespace gbe */ > > diff --git a/backend/src/llvm/llvm_gen_backend.cpp > b/backend/src/llvm/llvm_gen_backend.cpp > index 8489c87..c23fd49 100644 > --- a/backend/src/llvm/llvm_gen_backend.cpp > +++ b/backend/src/llvm/llvm_gen_backend.cpp > @@ -2205,22 +2205,32 @@ namespace gbe > case GEN_OCL_LGBARRIER: > ctx.getFunction().setUseSLM(true); > break; > - case GEN_OCL_WRITE_IMAGE_I: > - case GEN_OCL_WRITE_IMAGE_UI: > - case GEN_OCL_WRITE_IMAGE_F: > + case GEN_OCL_WRITE_IMAGE_I_1D: > + case GEN_OCL_WRITE_IMAGE_UI_1D: > + case GEN_OCL_WRITE_IMAGE_F_1D: > + case GEN_OCL_WRITE_IMAGE_I_2D: > + case GEN_OCL_WRITE_IMAGE_UI_2D: > + case GEN_OCL_WRITE_IMAGE_F_2D: > case GEN_OCL_WRITE_IMAGE_I_3D: > case GEN_OCL_WRITE_IMAGE_UI_3D: > case GEN_OCL_WRITE_IMAGE_F_3D: > break; > - case GEN_OCL_READ_IMAGE_I: > - case GEN_OCL_READ_IMAGE_UI: > - case GEN_OCL_READ_IMAGE_F: > + case GEN_OCL_READ_IMAGE_I_1D: > + case GEN_OCL_READ_IMAGE_UI_1D: > + case GEN_OCL_READ_IMAGE_F_1D: > + case GEN_OCL_READ_IMAGE_I_2D: > + case GEN_OCL_READ_IMAGE_UI_2D: > + case GEN_OCL_READ_IMAGE_F_2D: > case GEN_OCL_READ_IMAGE_I_3D: > case GEN_OCL_READ_IMAGE_UI_3D: > case GEN_OCL_READ_IMAGE_F_3D: > - case GEN_OCL_READ_IMAGE_I_I: > - case GEN_OCL_READ_IMAGE_UI_I: > - case GEN_OCL_READ_IMAGE_F_I: > + > + case GEN_OCL_READ_IMAGE_I_1D_I: > + case GEN_OCL_READ_IMAGE_UI_1D_I: > + case GEN_OCL_READ_IMAGE_F_1D_I: > + case GEN_OCL_READ_IMAGE_I_2D_I: > + case GEN_OCL_READ_IMAGE_UI_2D_I: > + case GEN_OCL_READ_IMAGE_F_2D_I: > case GEN_OCL_READ_IMAGE_I_3D_I: > case GEN_OCL_READ_IMAGE_UI_3D_I: > case GEN_OCL_READ_IMAGE_F_3D_I: > @@ -2401,6 +2411,7 @@ namespace gbe > default: NOT_IMPLEMENTED; > } > } else { > + int image_dim; > // Get the name of the called function and handle it > Value *Callee = I.getCalledValue(); > const std::string fnName = Callee->getName(); > @@ -2504,18 +2515,31 @@ namespace gbe > ctx.GET_IMAGE_INFO(infoType, reg, surfaceID, infoReg); > break; > } > - case GEN_OCL_READ_IMAGE_I: > - case GEN_OCL_READ_IMAGE_UI: > - case GEN_OCL_READ_IMAGE_F: > + > + case GEN_OCL_READ_IMAGE_I_1D: > + case GEN_OCL_READ_IMAGE_UI_1D: > + case GEN_OCL_READ_IMAGE_F_1D: > + case GEN_OCL_READ_IMAGE_I_1D_I: > + case GEN_OCL_READ_IMAGE_UI_1D_I: > + case GEN_OCL_READ_IMAGE_F_1D_I: > + image_dim = 1; > + goto handle_read_image; > + case GEN_OCL_READ_IMAGE_I_2D: > + case GEN_OCL_READ_IMAGE_UI_2D: > + case GEN_OCL_READ_IMAGE_F_2D: > + case GEN_OCL_READ_IMAGE_I_2D_I: > + case GEN_OCL_READ_IMAGE_UI_2D_I: > + case GEN_OCL_READ_IMAGE_F_2D_I: > + image_dim = 2; > + goto handle_read_image; > case GEN_OCL_READ_IMAGE_I_3D: > case GEN_OCL_READ_IMAGE_UI_3D: > case GEN_OCL_READ_IMAGE_F_3D: > - case GEN_OCL_READ_IMAGE_I_I: > - case GEN_OCL_READ_IMAGE_UI_I: > - case GEN_OCL_READ_IMAGE_F_I: > case GEN_OCL_READ_IMAGE_I_3D_I: > case GEN_OCL_READ_IMAGE_UI_3D_I: > case GEN_OCL_READ_IMAGE_F_3D_I: > + image_dim = 3; > +handle_read_image: > { > GBE_ASSERT(AI != AE); const ir::Register surfaceReg = > this->getRegister(*AI); ++AI; > const uint8_t surfaceID = > ctx.getFunction().getImageSet()->getIdx(surfaceReg); > @@ -2523,20 +2547,26 @@ namespace gbe > const uint8_t sampler = this->appendSampler(AI); > ++AI; > > - GBE_ASSERT(AI != AE); const ir::Register ucoord = > this->getRegister(*AI); ++AI; > - GBE_ASSERT(AI != AE); const ir::Register vcoord = > this->getRegister(*AI); ++AI; > + ir::Register ucoord; > + ir::Register vcoord; > ir::Register wcoord; > - bool is3D = false; > - if (it->second == GEN_OCL_READ_IMAGE_I_3D || > - it->second == GEN_OCL_READ_IMAGE_UI_3D || > - it->second == GEN_OCL_READ_IMAGE_F_3D || > - it->second == GEN_OCL_READ_IMAGE_I_3D_I || > - it->second == GEN_OCL_READ_IMAGE_UI_3D_I || > - it->second == GEN_OCL_READ_IMAGE_F_3D_I) { > - GBE_ASSERT(AI != AE); wcoord = this->getRegister(*AI); ++AI; > - is3D = true; > - } else > - wcoord = ucoord; // not used, just a padding. > + > + GBE_ASSERT(AI != AE); ucoord = this->getRegister(*AI); ++AI; > + if (image_dim > 1) { > + GBE_ASSERT(AI != AE); > + vcoord = this->getRegister(*AI); > + ++AI; > + } else { > + vcoord = ir::InvalidRegister; > + } > + > + if (image_dim > 2) { > + GBE_ASSERT(AI != AE); > + wcoord = this->getRegister(*AI); > + ++AI; > + } else { > + wcoord = ir::InvalidRegister; > + } > > vector<ir::Register> dstTupleData, srcTupleData; > const uint32_t elemNum = 4; > @@ -2561,19 +2591,25 @@ namespace gbe > ir::Type dstType = ir::TYPE_U32; > > switch(it->second) { > - case GEN_OCL_READ_IMAGE_I: > - case GEN_OCL_READ_IMAGE_UI: > + case GEN_OCL_READ_IMAGE_I_1D: > + case GEN_OCL_READ_IMAGE_UI_1D: > + case GEN_OCL_READ_IMAGE_I_2D: > + case GEN_OCL_READ_IMAGE_UI_2D: > case GEN_OCL_READ_IMAGE_I_3D: > case GEN_OCL_READ_IMAGE_UI_3D: > - case GEN_OCL_READ_IMAGE_I_I: > - case GEN_OCL_READ_IMAGE_UI_I: > + case GEN_OCL_READ_IMAGE_I_1D_I: > + case GEN_OCL_READ_IMAGE_UI_1D_I: > + case GEN_OCL_READ_IMAGE_I_2D_I: > + case GEN_OCL_READ_IMAGE_UI_2D_I: > case GEN_OCL_READ_IMAGE_I_3D_I: > case GEN_OCL_READ_IMAGE_UI_3D_I: > dstType = ir::TYPE_U32; > break; > - case GEN_OCL_READ_IMAGE_F: > + case GEN_OCL_READ_IMAGE_F_1D: > + case GEN_OCL_READ_IMAGE_F_2D: > case GEN_OCL_READ_IMAGE_F_3D: > - case GEN_OCL_READ_IMAGE_F_I: > + case GEN_OCL_READ_IMAGE_F_1D_I: > + case GEN_OCL_READ_IMAGE_F_2D_I: > case GEN_OCL_READ_IMAGE_F_3D_I: > dstType = ir::TYPE_FLOAT; > break; > @@ -2584,27 +2620,48 @@ namespace gbe > bool isFloatCoord = it->second <= GEN_OCL_READ_IMAGE_F_3D; > > ctx.SAMPLE(surfaceID, dstTuple, srcTuple, dstType == > ir::TYPE_FLOAT, > - isFloatCoord, sampler, samplerOffset, is3D); > + isFloatCoord, sampler, samplerOffset); > break; > } > - case GEN_OCL_WRITE_IMAGE_I: > - case GEN_OCL_WRITE_IMAGE_UI: > - case GEN_OCL_WRITE_IMAGE_F: > + > + case GEN_OCL_WRITE_IMAGE_I_1D: > + case GEN_OCL_WRITE_IMAGE_UI_1D: > + case GEN_OCL_WRITE_IMAGE_F_1D: > + image_dim = 1; > + goto handle_write_image; > + case GEN_OCL_WRITE_IMAGE_I_2D: > + case GEN_OCL_WRITE_IMAGE_UI_2D: > + case GEN_OCL_WRITE_IMAGE_F_2D: > + image_dim = 2; > + goto handle_write_image; > case GEN_OCL_WRITE_IMAGE_I_3D: > case GEN_OCL_WRITE_IMAGE_UI_3D: > case GEN_OCL_WRITE_IMAGE_F_3D: > + image_dim = 3; > +handle_write_image: > { > GBE_ASSERT(AI != AE); const ir::Register surfaceReg = > this->getRegister(*AI); ++AI; > const uint8_t surfaceID = > ctx.getFunction().getImageSet()->getIdx(surfaceReg); > - GBE_ASSERT(AI != AE); const ir::Register ucoord = > this->getRegister(*AI); ++AI; > - GBE_ASSERT(AI != AE); const ir::Register vcoord = > this->getRegister(*AI); ++AI; > - ir::Register wcoord; > - bool is3D = false; > - if(it->second >= GEN_OCL_WRITE_IMAGE_I_3D) { > - GBE_ASSERT(AI != AE); wcoord = this->getRegister(*AI); ++AI; > - is3D = true; > - } else > - wcoord = ucoord; // not used, just padding. > + ir::Register ucoord, vcoord, wcoord; > + > + GBE_ASSERT(AI != AE); ucoord = this->getRegister(*AI); ++AI; > + > + if (image_dim > 1) { > + GBE_ASSERT(AI != AE); > + vcoord = this->getRegister(*AI); > + ++AI; > + } else { > + vcoord = ir::InvalidRegister; > + } > + > + if (image_dim > 2) { > + GBE_ASSERT(AI != AE); > + wcoord = this->getRegister(*AI); > + ++AI; > + } else { > + wcoord = ir::InvalidRegister; > + } > + > GBE_ASSERT(AI != AE); > vector<ir::Register> srcTupleData; > > @@ -2622,13 +2679,16 @@ namespace gbe > ir::Type srcType = ir::TYPE_U32; > > switch(it->second) { > - case GEN_OCL_WRITE_IMAGE_I: > - case GEN_OCL_WRITE_IMAGE_UI: > + case GEN_OCL_WRITE_IMAGE_I_1D: > + case GEN_OCL_WRITE_IMAGE_UI_1D: > + case GEN_OCL_WRITE_IMAGE_I_2D: > + case GEN_OCL_WRITE_IMAGE_UI_2D: > case GEN_OCL_WRITE_IMAGE_I_3D: > case GEN_OCL_WRITE_IMAGE_UI_3D: > srcType = ir::TYPE_U32; > break; > - case GEN_OCL_WRITE_IMAGE_F: > + case GEN_OCL_WRITE_IMAGE_F_1D: > + case GEN_OCL_WRITE_IMAGE_F_2D: > case GEN_OCL_WRITE_IMAGE_F_3D: > srcType = ir::TYPE_FLOAT; > break; > @@ -2636,7 +2696,7 @@ namespace gbe > GBE_ASSERT(0); // never been here. > } > > - ctx.TYPED_WRITE(surfaceID, srcTuple, srcType, ir::TYPE_U32, > is3D); > + ctx.TYPED_WRITE(surfaceID, srcTuple, srcType, ir::TYPE_U32); > break; > } > case GEN_OCL_MUL_HI_INT: > diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx > b/backend/src/llvm/llvm_gen_ocl_function.hxx > index 4236298..e6f25b3 100644 > --- a/backend/src/llvm/llvm_gen_ocl_function.hxx > +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx > @@ -46,24 +46,34 @@ DECL_LLVM_GEN_FUNCTION(FORCE_SIMD8, > __gen_ocl_force_simd8) > DECL_LLVM_GEN_FUNCTION(FORCE_SIMD16, __gen_ocl_force_simd16) > > // To read_image functions. > -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I, _Z21__gen_ocl_read_imageijtffj) > -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI, _Z22__gen_ocl_read_imageuijtffj) > -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F, _Z21__gen_ocl_read_imagefjtffj) > +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_1D, _Z21__gen_ocl_read_imageijtfj) > +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_1D, _Z22__gen_ocl_read_imageuijtfj) > +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_1D, _Z21__gen_ocl_read_imagefjtfj) > +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D, _Z21__gen_ocl_read_imageijtffj) > +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D, _Z22__gen_ocl_read_imageuijtffj) > +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D, _Z21__gen_ocl_read_imagefjtffj) > DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D, _Z21__gen_ocl_read_imageijtfffj) > DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D, _Z22__gen_ocl_read_imageuijtfffj) > DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D, _Z21__gen_ocl_read_imagefjtfffj) > // work around read image with the LD message. The coords are integer type. > -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_I, _Z21__gen_ocl_read_imageijtiij) > -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_I, _Z22__gen_ocl_read_imageuijtiij) > -DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_I, _Z21__gen_ocl_read_imagefjtiij) > +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_1D_I, _Z21__gen_ocl_read_imageijtij) > +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_1D_I, _Z22__gen_ocl_read_imageuijtij) > +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_1D_I, _Z21__gen_ocl_read_imagefjtij) > +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_2D_I, _Z21__gen_ocl_read_imageijtiij) > +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_2D_I, _Z22__gen_ocl_read_imageuijtiij) > +DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_2D_I, _Z21__gen_ocl_read_imagefjtiij) > DECL_LLVM_GEN_FUNCTION(READ_IMAGE_I_3D_I, _Z21__gen_ocl_read_imageijtiiij) > DECL_LLVM_GEN_FUNCTION(READ_IMAGE_UI_3D_I, _Z22__gen_ocl_read_imageuijtiiij) > DECL_LLVM_GEN_FUNCTION(READ_IMAGE_F_3D_I, _Z21__gen_ocl_read_imagefjtiiij) > > // To write_image functions. > -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I, _Z22__gen_ocl_write_imageijiiDv4_i) > -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI, _Z23__gen_ocl_write_imageuijiiDv4_j) > -DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F, _Z22__gen_ocl_write_imagefjiiDv4_f) > +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_1D, _Z22__gen_ocl_write_imageijiDv4_i) > +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_1D, _Z23__gen_ocl_write_imageuijiDv4_j) > +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_1D, _Z22__gen_ocl_write_imagefjiDv4_f) > + > +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_2D, _Z22__gen_ocl_write_imageijiiDv4_i) > +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_2D, > _Z23__gen_ocl_write_imageuijiiDv4_j) > +DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_F_2D, _Z22__gen_ocl_write_imagefjiiDv4_f) > > DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_I_3D, _Z22__gen_ocl_write_imageijiiiDv4_i) > DECL_LLVM_GEN_FUNCTION(WRITE_IMAGE_UI_3D, > _Z23__gen_ocl_write_imageuijiiiDv4_j) > diff --git a/backend/src/llvm/llvm_scalarize.cpp > b/backend/src/llvm/llvm_scalarize.cpp > index 73817e2..f1513f8 100644 > --- a/backend/src/llvm/llvm_scalarize.cpp > +++ b/backend/src/llvm/llvm_scalarize.cpp > @@ -639,19 +639,26 @@ namespace gbe { > > // Get the function arguments > CallSite CS(call); > - CallSite::arg_iterator CI = CS.arg_begin() + 3; > + CallSite::arg_iterator CI = CS.arg_begin() + 2; > > switch (it->second) { > default: break; > - case GEN_OCL_READ_IMAGE_I: > - case GEN_OCL_READ_IMAGE_UI: > - case GEN_OCL_READ_IMAGE_F: > + case GEN_OCL_READ_IMAGE_I_1D: > + case GEN_OCL_READ_IMAGE_UI_1D: > + case GEN_OCL_READ_IMAGE_F_1D: > + case GEN_OCL_READ_IMAGE_I_2D: > + case GEN_OCL_READ_IMAGE_UI_2D: > + case GEN_OCL_READ_IMAGE_F_2D: > case GEN_OCL_READ_IMAGE_I_3D: > case GEN_OCL_READ_IMAGE_UI_3D: > case GEN_OCL_READ_IMAGE_F_3D: > - case GEN_OCL_READ_IMAGE_I_I: > - case GEN_OCL_READ_IMAGE_UI_I: > - case GEN_OCL_READ_IMAGE_F_I: > + > + case GEN_OCL_READ_IMAGE_I_1D_I: > + case GEN_OCL_READ_IMAGE_UI_1D_I: > + case GEN_OCL_READ_IMAGE_F_1D_I: > + case GEN_OCL_READ_IMAGE_I_2D_I: > + case GEN_OCL_READ_IMAGE_UI_2D_I: > + case GEN_OCL_READ_IMAGE_F_2D_I: > case GEN_OCL_READ_IMAGE_I_3D_I: > case GEN_OCL_READ_IMAGE_UI_3D_I: > case GEN_OCL_READ_IMAGE_F_3D_I: > @@ -666,9 +673,13 @@ namespace gbe { > case GEN_OCL_WRITE_IMAGE_UI_3D: > case GEN_OCL_WRITE_IMAGE_F_3D: > CI++; > - case GEN_OCL_WRITE_IMAGE_I: > - case GEN_OCL_WRITE_IMAGE_UI: > - case GEN_OCL_WRITE_IMAGE_F: > + case GEN_OCL_WRITE_IMAGE_I_2D: > + case GEN_OCL_WRITE_IMAGE_UI_2D: > + case GEN_OCL_WRITE_IMAGE_F_2D: > + CI++; > + case GEN_OCL_WRITE_IMAGE_I_1D: > + case GEN_OCL_WRITE_IMAGE_UI_1D: > + case GEN_OCL_WRITE_IMAGE_F_1D: > { > *CI = InsertToVector(call, *CI); > break; > diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h > index cd8b918..8ab8b31 100755 > --- a/backend/src/ocl_stdlib.tmpl.h > +++ b/backend/src/ocl_stdlib.tmpl.h > @@ -83,12 +83,15 @@ DEF(double); > // This is a transitional hack to bypass the LLVM 3.3 built-in types. > // See the Khronos SPIR specification for handling of these types. > #define __texture __attribute__((address_space(4))) > +struct _image1d_t; > +typedef __texture struct _image1d_t* __image1d_t; > struct _image2d_t; > typedef __texture struct _image2d_t* __image2d_t; > struct _image3d_t; > typedef __texture struct _image3d_t* __image3d_t; > typedef const ushort __sampler_t; > typedef size_t __event_t; > +#define image1d_t __image1d_t > #define image2d_t __image2d_t > #define image3d_t __image3d_t > #define sampler_t __sampler_t > @@ -4545,6 +4548,15 @@ int __gen_ocl_force_simd16(void); > // Image access functions > ///////////////////////////////////////////////////////////////////////////// > > +// 1D read > +OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, > float u, uint sampler_offset); > +OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, > int u, uint sampler_offset); > +OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t > sampler, float u, uint sampler_offset); > +OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t > sampler, int u, uint sampler_offset); > +OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t > sampler, float u, uint sampler_offset); > +OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t > sampler, int u, uint sampler_offset); > + > +// 2D read > OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, > float u, float v, uint sampler_offset); > OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, > int u, int v, uint sampler_offset); > OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t > sampler, float u, float v, uint sampler_offset); > @@ -4552,6 +4564,7 @@ OVERLOADABLE uint4 __gen_ocl_read_imageui(uint > surface_id, sampler_t sampler, in > OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t > sampler, float u, float v, uint sampler_offset); > OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t > sampler, int u, int v, uint sampler_offset); > > +// 3D read > OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, > float u, float v, float w, uint sampler_offset); > OVERLOADABLE int4 __gen_ocl_read_imagei(uint surface_id, sampler_t sampler, > int u, int v, int w, uint sampler_offset); > OVERLOADABLE uint4 __gen_ocl_read_imageui(uint surface_id, sampler_t > sampler, float u, float v, float w, uint sampler_offset); > @@ -4559,28 +4572,37 @@ OVERLOADABLE uint4 __gen_ocl_read_imageui(uint > surface_id, sampler_t sampler, in > OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t > sampler, float u, float v, float w, uint sampler_offset); > OVERLOADABLE float4 __gen_ocl_read_imagef(uint surface_id, sampler_t > sampler, int u, int v, int w, uint sampler_offset); > > +// 1D write > +OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int4 color); > +OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, uint4 > color); > +OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, float4 > color); > + > +// 2D write > OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int v, int4 > color); > OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, int v, > uint4 color); > OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, int v, > float4 color); > > +// 3D write > OVERLOADABLE void __gen_ocl_write_imagei(uint surface_id, int u, int v, int > w, int4 color); > OVERLOADABLE void __gen_ocl_write_imageui(uint surface_id, int u, int v, int > w, uint4 color); > OVERLOADABLE void __gen_ocl_write_imagef(uint surface_id, int u, int v, int > w, float4 color); > + > int __gen_ocl_get_image_width(uint surface_id); > int __gen_ocl_get_image_height(uint surface_id); > int __gen_ocl_get_image_channel_data_type(uint surface_id); > int __gen_ocl_get_image_channel_order(uint surface_id); > int __gen_ocl_get_image_depth(uint surface_id); > > -#define GET_IMAGE(cl_image, surface_id) \ > - uint surface_id = (uint)cl_image > - > +// 2D 3D Image Common Macro > #ifdef GEN7_SAMPLER_CLAMP_BORDER_WORKAROUND > #define GEN_FIX_1 1 > #else > #define GEN_FIX_1 0 > #endif > > +#define GET_IMAGE(cl_image, surface_id) \ > + uint surface_id = (uint)cl_image > + > #define DECL_READ_IMAGE0(int_clamping_fix, \ > image_type, type, suffix, coord_type, n) > \ > INLINE_OVERLOADABLE type read_image ##suffix(image_type cl_image, > \ > @@ -4646,6 +4668,52 @@ int __gen_ocl_get_image_depth(uint surface_id); > __gen_ocl_write_image ##suffix(EXPEND_WRITE_COORD(surface_id, coord, > color));\ > } > > + > +// 1D > +#define DECL_IMAGE(int_clamping_fix, image_type, type, suffix) > \ > + DECL_READ_IMAGE0(int_clamping_fix, image_type, type, suffix, int, 1) > \ > + DECL_READ_IMAGE1(GEN_FIX_1, int_clamping_fix, image_type, type, suffix, > float, 1) \ > + DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, int, 1) > \ > + DECL_WRITE_IMAGE(image_type, type, suffix, int) > \ > + DECL_WRITE_IMAGE(image_type, type, suffix, float) > + > +#define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord > +#define EXPEND_READ_COORD1(id, sampler, coord) id, sampler, (int)(coord < 0 > ? -1 : coord) > +#define DENORMALIZE_COORD(id, dstCoord, srcCoord) dstCoord = srcCoord * > __gen_ocl_get_image_width(id); > +#define EXPEND_WRITE_COORD(id, coord, color) id, coord, color > + > +#define OUT_OF_BOX(coord, surface, normalized) \ > + (coord < 0 || \ > + ((normalized == 0) \ > + && (coord >= __gen_ocl_get_image_width(surface))) \ > + || ((normalized != 0) && (coord > 0x1p0))) > + > +#define FIXUP_FLOAT_COORD(tmpCoord) \ > + { \ > + if (tmpCoord < 0 && tmpCoord > -0x1p-20f) \ > + tmpCoord += -0x1p-9; \ > + } > + > +DECL_IMAGE(GEN_FIX_1, image1d_t, int4, i) > +DECL_IMAGE(GEN_FIX_1, image1d_t, uint4, ui) > +DECL_IMAGE(0, image1d_t, float4, f) > + > +#undef EXPEND_READ_COORD > +#undef EXPEND_READ_COORD1 > +#undef DENORMALIZE_COORD > +#undef EXPEND_WRITE_COORD > +#undef OUT_OF_BOX > +#undef FIXUP_FLOAT_COORD > +#undef DECL_IMAGE > +// End of 1D > + > +#define DECL_IMAGE(int_clamping_fix, image_type, type, suffix, n) > \ > + DECL_READ_IMAGE0(int_clamping_fix, image_type, type, suffix, int ##n, n) > \ > + DECL_READ_IMAGE1(GEN_FIX_1, int_clamping_fix, image_type, type, suffix, > float ##n, n) \ > + DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, int ##n, n) > \ > + DECL_WRITE_IMAGE(image_type, type, suffix, int ## n) > \ > + DECL_WRITE_IMAGE(image_type, type, suffix, float ## n) > +// 2D > #define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, coord.s1 > #define EXPEND_READ_COORD1(id, sampler, coord) id, sampler, (int)(coord.s0 < > 0 ? -1 : coord.s0), \ > (int)(coord.s1 < 0 ? -1 : > coord.s1) > @@ -4668,13 +4736,6 @@ int __gen_ocl_get_image_depth(uint surface_id); > tmpCoord.s1 += -0x1p-9f; \ > } > > -#define DECL_IMAGE(int_clamping_fix, image_type, type, suffix, n) > \ > - DECL_READ_IMAGE0(int_clamping_fix, image_type, type, suffix, int ##n, n) > \ > - DECL_READ_IMAGE1(GEN_FIX_1, int_clamping_fix, image_type, type, suffix, > float ##n, n) \ > - DECL_READ_IMAGE_NOSAMPLER(image_type, type, suffix, int ##n, n) > \ > - DECL_WRITE_IMAGE(image_type, type, suffix, int ## n) > \ > - DECL_WRITE_IMAGE(image_type, type, suffix, float ## n) > - > DECL_IMAGE(GEN_FIX_1, image2d_t, int4, i, 2) > DECL_IMAGE(GEN_FIX_1, image2d_t, uint4, ui, 2) > DECL_IMAGE(0, image2d_t, float4, f, 2) > @@ -4686,6 +4747,7 @@ DECL_IMAGE(0, image2d_t, float4, f, 2) > #undef OUT_OF_BOX > #undef FIXUP_FLOAT_COORD > > +// 3D > #define EXPEND_READ_COORD(id, sampler, coord) id, sampler, coord.s0, > coord.s1, coord.s2 > #define EXPEND_READ_COORD1(id, sampler, coord) id, sampler, (int) (coord.s0 > < 0 ? -1 : coord.s0), \ > (int)(coord.s1 < 0 ? -1 : > coord.s1), (int)(coord.s2 < 0 ? -1 : coord.s2) > -- > 1.8.3.2 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
