LGTM, pushed, thanks.
> -----Original Message----- > From: Beignet [mailto:[email protected]] On Behalf Of > Guo Yejun > Sent: Tuesday, August 25, 2015 04:04 > To: [email protected] > Cc: Guo, Yejun > Subject: [Beignet] [PATCH V2] remove GBE_CURBE_STACK_POINTER in > payload > > initialize the data inside kernel with packed integer vector > > V2: call functions from ctx, instead of ctx.registerAllocator > Signed-off-by: Guo Yejun <[email protected]> > --- > backend/src/backend/context.cpp | 10 ++++----- > backend/src/backend/context.hpp | 2 +- > backend/src/backend/gen75_context.cpp | 4 +++- > backend/src/backend/gen_context.cpp | 33 > ++++++++++++++++++++++++++++-- > backend/src/backend/gen_context.hpp | 2 ++ > backend/src/backend/gen_reg_allocation.cpp | 27 +++++++++++++++------- > -- > backend/src/backend/program.h | 1 - > backend/src/backend/program.hpp | 2 +- > src/cl_command_queue_gen7.c | 9 -------- > 9 files changed, 60 insertions(+), 30 deletions(-) > > diff --git a/backend/src/backend/context.cpp > b/backend/src/backend/context.cpp index b8dfa8c..33b2409 100644 > --- a/backend/src/backend/context.cpp > +++ b/backend/src/backend/context.cpp > @@ -373,8 +373,8 @@ namespace gbe > return this->kernel; > } > > - int16_t Context::allocate(int16_t size, int16_t alignment) { > - return registerAllocator->allocate(size, alignment); > + int16_t Context::allocate(int16_t size, int16_t alignment, bool bFwd) { > + return registerAllocator->allocate(size, alignment, bFwd); > } > > void Context::deallocate(int16_t offset) { registerAllocator- > >deallocate(offset); } @@ -396,10 +396,10 @@ namespace gbe > > void Context::buildStack(void) { > const auto &stackUse = dag->getUse(ir::ocl::stackptr); > - if (stackUse.size() == 0) // no stack is used if stackptr is unused > + if (stackUse.size() == 0) { // no stack is used if stackptr is unused > + this->kernel->stackSize = 0; > return; > - // Be sure that the stack pointer is set > - // GBE_ASSERT(this->kernel- > >getCurbeOffset(GBE_CURBE_STACK_POINTER, 0) >= 0); > + } > uint32_t stackSize = 128; > while (stackSize < fn.getStackSize()) { > stackSize *= 3; > diff --git a/backend/src/backend/context.hpp > b/backend/src/backend/context.hpp index faa7c8a..079967d 100644 > --- a/backend/src/backend/context.hpp > +++ b/backend/src/backend/context.hpp > @@ -85,7 +85,7 @@ namespace gbe > return JIPs.find(insn) != JIPs.end(); > } > /*! Allocate some memory in the register file */ > - int16_t allocate(int16_t size, int16_t alignment); > + int16_t allocate(int16_t size, int16_t alignment, bool bFwd=0); > /*! Deallocate previously allocated memory */ > void deallocate(int16_t offset); > /*! Spilt a block into 2 blocks, for some registers allocate together but > deallocate seperate */ diff --git a/backend/src/backend/gen75_context.cpp > b/backend/src/backend/gen75_context.cpp > index b9dfb18..7d407c3 100644 > --- a/backend/src/backend/gen75_context.cpp > +++ b/backend/src/backend/gen75_context.cpp > @@ -67,7 +67,7 @@ namespace gbe > using namespace ir; > > // Only emit stack pointer computation if we use a stack > - if (kernel->getCurbeOffset(GBE_CURBE_STACK_POINTER, 0) <= 0) > + if (kernel->getStackSize() == 0) > return; > > // Check that everything is consistent in the kernel code @@ -80,6 +80,8 > @@ namespace gbe > GenRegister::ud16grf(ir::ocl::stackptr); > const GenRegister stackptr = ra->genReg(selStatckPtr); > > + loadLaneID(stackptr); > + > // We compute the per-lane stack pointer here > // private address start from zero > p->push(); > diff --git a/backend/src/backend/gen_context.cpp > b/backend/src/backend/gen_context.cpp > index 0c301dd..25fdf08 100644 > --- a/backend/src/backend/gen_context.cpp > +++ b/backend/src/backend/gen_context.cpp > @@ -176,11 +176,39 @@ namespace gbe > p->pop(); > } > > + void GenContext::loadLaneID(GenRegister dst) { > + const GenRegister laneID = GenRegister::immv(0x76543210); > + GenRegister dst_; > + if (dst.type == GEN_TYPE_UW) > + dst_ = dst; > + else > + dst_ = GenRegister::uw16grf(126,0); > + > + p->push(); > + uint32_t execWidth = p->curr.execWidth; > + p->curr.predicate = GEN_PREDICATE_NONE; > + p->curr.noMask = 1; > + if (execWidth == 8) > + p->MOV(dst_, laneID); > + else { > + p->curr.execWidth = 8; > + p->MOV(dst_, laneID); > + //Packed Unsigned Half-Byte Integer Vector does not work > + //have to mock by adding 8 to the singed vector > + const GenRegister eight = GenRegister::immuw(8); > + p->ADD(GenRegister::offset(dst_, 0, 16), dst_, eight); > + p->curr.execWidth = 16; > + } > + if (dst.type != GEN_TYPE_UW) > + p->MOV(dst, dst_); > + p->pop(); > + } > + > void GenContext::emitStackPointer(void) { > using namespace ir; > > // Only emit stack pointer computation if we use a stack > - if (kernel->getCurbeOffset(GBE_CURBE_STACK_POINTER, 0) <= 0) > + if (kernel->getStackSize() == 0) > return; > > // Check that everything is consistent in the kernel code @@ -193,6 > +221,8 > @@ namespace gbe > GenRegister::ud16grf(ir::ocl::stackptr); > const GenRegister stackptr = ra->genReg(selStatckPtr); > > + loadLaneID(stackptr); > + > // We compute the per-lane stack pointer here > // threadId * perThreadSize + laneId*perLaneSize > // let private address start from zero @@ -2254,7 +2284,6 @@ namespace > gbe > INSERT_REG(numgroup0, GROUP_NUM_X) > INSERT_REG(numgroup1, GROUP_NUM_Y) > INSERT_REG(numgroup2, GROUP_NUM_Z) > - INSERT_REG(stackptr, STACK_POINTER) > INSERT_REG(printfbptr, PRINTF_BUF_POINTER) > INSERT_REG(printfiptr, PRINTF_INDEX_POINTER) > do {} while(0); > diff --git a/backend/src/backend/gen_context.hpp > b/backend/src/backend/gen_context.hpp > index 8ef725f..34f9293 100644 > --- a/backend/src/backend/gen_context.hpp > +++ b/backend/src/backend/gen_context.hpp > @@ -107,6 +107,8 @@ namespace gbe > return this->liveness->getLiveIn(bb); > } > > + void loadLaneID(GenRegister dst); > + > void collectShifter(GenRegister dest, GenRegister src); > void loadTopHalf(GenRegister dest, GenRegister src); > void storeTopHalf(GenRegister dest, GenRegister src); diff --git > a/backend/src/backend/gen_reg_allocation.cpp > b/backend/src/backend/gen_reg_allocation.cpp > index 4cb88e9..39f1934 100644 > --- a/backend/src/backend/gen_reg_allocation.cpp > +++ b/backend/src/backend/gen_reg_allocation.cpp > @@ -133,8 +133,8 @@ namespace gbe > void validateFlag(Selection &selection, SelectionInstruction &insn); > /*! Allocate the GRF registers */ > bool allocateGRFs(Selection &selection); > - /*! Create gen registers for all preallocated curbe registers. */ > - void allocatePayloadRegs(void); > + /*! Create gen registers for all preallocated special registers. */ > + void allocateSpecialRegs(void); > /*! Create a Gen register from a register set in the payload */ > void allocatePayloadReg(ir::Register, uint32_t offset, uint32_t > subOffset = > 0); > /*! Create the intervals for each register */ @@ -228,7 +228,7 @@ > namespace gbe > this->intervals[reg].maxID = 0; > } > > - INLINE void GenRegAllocator::Opaque::allocatePayloadRegs(void) { > + INLINE void GenRegAllocator::Opaque::allocateSpecialRegs(void) { > using namespace ir; > for(auto &it : this->ctx.curbeRegs) > allocatePayloadReg(it.first, it.second); @@ -248,6 +248,19 @@ > namespace gbe > allocatePayloadReg(reg, it->second, subOffset); > ctx.splitBlock(it->second, subOffset); > } > + > + if (RA.contains(ocl::stackbuffer)) { > + uint32_t regSize = 0; > + this->getRegAttrib(ocl::stackptr, regSize); > + uint32_t offset = this->ctx.allocate(regSize, regSize, 1); > + RA.insert(std::make_pair(ocl::stackptr, offset)); > + } > + > + // Group and barrier IDs are always allocated by the hardware in r0 > + RA.insert(std::make_pair(ocl::groupid0, 1*sizeof(float))); // r0.1 > + RA.insert(std::make_pair(ocl::groupid1, 6*sizeof(float))); // r0.6 > + RA.insert(std::make_pair(ocl::groupid2, 7*sizeof(float))); // r0.7 > + RA.insert(std::make_pair(ocl::barrierid, 2*sizeof(float))); // r0.2 > } > > bool GenRegAllocator::Opaque::createGenReg(const Selection &selection, > const GenRegInterval &interval) { @@ -1001,13 +1014,7 @@ namespace gbe > this->intervals.push_back(ir::Register(regID)); > > // Allocate the special registers (only those which are actually used) > - this->allocatePayloadRegs(); > - > - // Group and barrier IDs are always allocated by the hardware in r0 > - RA.insert(std::make_pair(ocl::groupid0, 1*sizeof(float))); // r0.1 > - RA.insert(std::make_pair(ocl::groupid1, 6*sizeof(float))); // r0.6 > - RA.insert(std::make_pair(ocl::groupid2, 7*sizeof(float))); // r0.7 > - RA.insert(std::make_pair(ocl::barrierid, 2*sizeof(float))); // r0.2 > + this->allocateSpecialRegs(); > > // block IP used to handle the mask in SW is always allocated > > diff --git a/backend/src/backend/program.h > b/backend/src/backend/program.h index fa75052..af19732 100644 > --- a/backend/src/backend/program.h > +++ b/backend/src/backend/program.h > @@ -91,7 +91,6 @@ enum gbe_curbe_type { > GBE_CURBE_GROUP_NUM_Z, > GBE_CURBE_WORK_DIM, > GBE_CURBE_IMAGE_INFO, > - GBE_CURBE_STACK_POINTER, > GBE_CURBE_PRINTF_BUF_POINTER, > GBE_CURBE_PRINTF_INDEX_POINTER, > GBE_CURBE_KERNEL_ARGUMENT, > diff --git a/backend/src/backend/program.hpp > b/backend/src/backend/program.hpp index cff2463..efe192f 100644 > --- a/backend/src/backend/program.hpp > +++ b/backend/src/backend/program.hpp > @@ -223,7 +223,7 @@ namespace gbe { > uint32_t argNum; //!< Number of function arguments > uint32_t curbeSize; //!< Size of the data to push > uint32_t simdWidth; //!< SIMD size for the kernel (lane number) > - uint32_t stackSize; //!< Stack size (may be 0 if unused) > + uint32_t stackSize; //!< Stack size (0 if unused) > uint32_t scratchSize; //!< Scratch memory size (may be 0 if unused) > bool useSLM; //!< SLM requires a special HW config > uint32_t slmSize; //!< slm size for kernel variable > diff --git a/src/cl_command_queue_gen7.c > b/src/cl_command_queue_gen7.c index 4adbd2b..0e60528 100644 > --- a/src/cl_command_queue_gen7.c > +++ b/src/cl_command_queue_gen7.c > @@ -210,15 +210,6 @@ cl_curbe_fill(cl_kernel ker, > UPLOAD(GBE_CURBE_WORK_DIM, work_dim); #undef UPLOAD > > - /* Write identity for the stack pointer. This is required by the stack > pointer > - * computation in the kernel > - */ > - if ((offset = interp_kernel_get_curbe_offset(ker->opaque, > GBE_CURBE_STACK_POINTER, 0)) >= 0) { > - const uint32_t simd_sz = interp_kernel_get_simd_width(ker->opaque); > - uint32_t *stackptr = (uint32_t *) (ker->curbe + offset); > - int32_t i; > - for (i = 0; i < (int32_t) simd_sz; ++i) stackptr[i] = i; > - } > /* Handle the various offsets to SLM */ > const int32_t arg_n = interp_kernel_get_arg_num(ker->opaque); > int32_t arg, slm_offset = interp_kernel_get_slm_size(ker->opaque); > -- > 1.9.1 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
