Concentrate the register allocation to one place, and don't use hard coded size when do curbe register allocation. All register size allocation should use the same method.
Signed-off-by: Zhigang Gong <[email protected]> --- backend/src/backend/context.cpp | 93 +----------------------- backend/src/backend/context.hpp | 3 +- backend/src/backend/gen_context.cpp | 109 +++++++++++++++++++++++++++++ backend/src/backend/gen_context.hpp | 6 ++ backend/src/backend/gen_reg_allocation.cpp | 6 ++ backend/src/backend/gen_reg_allocation.hpp | 2 + backend/src/backend/program.hpp | 1 + 7 files changed, 126 insertions(+), 94 deletions(-) diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp index b8f4171..dc27d83 100644 --- a/backend/src/backend/context.cpp +++ b/backend/src/backend/context.cpp @@ -353,7 +353,6 @@ namespace gbe Kernel *Context::compileKernel(void) { this->kernel = this->allocateKernel(); this->kernel->simdWidth = this->simdWidth; - this->buildPatchList(); this->buildArgList(); this->buildUsedLabels(); this->buildJIPs(); @@ -417,7 +416,7 @@ namespace gbe if (stackUse.size() == 0) // no stack is used if stackptr is unused return; // Be sure that the stack pointer is set - GBE_ASSERT(this->kernel->getCurbeOffset(GBE_CURBE_STACK_POINTER, 0) >= 0); + // GBE_ASSERT(this->kernel->getCurbeOffset(GBE_CURBE_STACK_POINTER, 0) >= 0); uint32_t stackSize = 1*KB; while (stackSize < fn.getStackSize()) { stackSize <<= 1; @@ -453,100 +452,10 @@ namespace gbe return offset + GEN_REG_SIZE; } - void Context::insertCurbeReg(ir::Register reg, uint32_t offset) { curbeRegs.insert(std::make_pair(reg, offset)); } - void Context::buildPatchList(void) { - const uint32_t ptrSize = unit.getPointerSize() == ir::POINTER_32_BITS ? 4u : 8u; - kernel->curbeSize = 0u; - - // We insert the block IP mask first - this->insertCurbeReg(ir::ocl::blockip, this->newCurbeEntry(GBE_CURBE_BLOCK_IP, 0, this->simdWidth*sizeof(uint16_t))); - this->insertCurbeReg(ir::ocl::emask, this->newCurbeEntry(GBE_CURBE_EMASK, 0, sizeof(uint16_t))); - this->insertCurbeReg(ir::ocl::notemask, this->newCurbeEntry(GBE_CURBE_NOT_EMASK, 0, sizeof(uint16_t))); - this->insertCurbeReg(ir::ocl::barriermask, this->newCurbeEntry(GBE_CURBE_BARRIER_MASK, 0, sizeof(uint16_t))); - - // Go over the arguments and find the related patch locations - const uint32_t argNum = fn.argNum(); - for (uint32_t argID = 0u; argID < argNum; ++argID) { - const ir::FunctionArgument &arg = fn.getArg(argID); - // For pointers and values, we have nothing to do. We just push the values - if (arg.type == ir::FunctionArgument::GLOBAL_POINTER || - arg.type == ir::FunctionArgument::LOCAL_POINTER || - arg.type == ir::FunctionArgument::CONSTANT_POINTER || - arg.type == ir::FunctionArgument::VALUE || - arg.type == ir::FunctionArgument::STRUCTURE || - arg.type == ir::FunctionArgument::IMAGE || - arg.type == ir::FunctionArgument::SAMPLER) - this->insertCurbeReg(arg.reg, this->newCurbeEntry(GBE_CURBE_KERNEL_ARGUMENT, argID, arg.size, ptrSize)); - } - - // Already inserted registers go here - const size_t localIDSize = sizeof(uint32_t) * this->simdWidth; - insertCurbeReg(ir::ocl::lid0, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_X, 0, localIDSize)); - insertCurbeReg(ir::ocl::lid1, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Y, 0, localIDSize)); - insertCurbeReg(ir::ocl::lid2, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Z, 0, localIDSize)); - - // Go over all the instructions and find the special register we need - // to push -#define INSERT_REG(SPECIAL_REG, PATCH, WIDTH) \ - if (reg == ir::ocl::SPECIAL_REG) { \ - if (curbeRegs.find(reg) != curbeRegs.end()) continue; \ - insertCurbeReg(reg, this->newCurbeEntry(GBE_CURBE_##PATCH, 0, ptrSize * WIDTH)); \ - } else - - bool useStackPtr = false; - fn.foreachInstruction([&](ir::Instruction &insn) { - const uint32_t srcNum = insn.getSrcNum(); - for (uint32_t srcID = 0; srcID < srcNum; ++srcID) { - const ir::Register reg = insn.getSrc(srcID); - if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) { - if (srcID != 0) continue; - const unsigned char bti = ir::cast<ir::GetImageInfoInstruction>(insn).getImageIndex(); - const unsigned char type = ir::cast<ir::GetImageInfoInstruction>(insn).getInfoType();; - ir::ImageInfoKey key(bti, type); - const ir::Register imageInfo = insn.getSrc(0); - if (curbeRegs.find(imageInfo) == curbeRegs.end()) { - uint32_t offset = this->getImageInfoCurbeOffset(key, 4); - insertCurbeReg(imageInfo, offset); - } - continue; - } - if (fn.isSpecialReg(reg) == false) continue; - if (curbeRegs.find(reg) != curbeRegs.end()) continue; - if (reg == ir::ocl::stackptr) useStackPtr = true; - INSERT_REG(lsize0, LOCAL_SIZE_X, 1) - INSERT_REG(lsize1, LOCAL_SIZE_Y, 1) - INSERT_REG(lsize2, LOCAL_SIZE_Z, 1) - INSERT_REG(gsize0, GLOBAL_SIZE_X, 1) - INSERT_REG(gsize1, GLOBAL_SIZE_Y, 1) - INSERT_REG(gsize2, GLOBAL_SIZE_Z, 1) - INSERT_REG(goffset0, GLOBAL_OFFSET_X, 1) - INSERT_REG(goffset1, GLOBAL_OFFSET_Y, 1) - INSERT_REG(goffset2, GLOBAL_OFFSET_Z, 1) - INSERT_REG(workdim, WORK_DIM, 1) - INSERT_REG(numgroup0, GROUP_NUM_X, 1) - INSERT_REG(numgroup1, GROUP_NUM_Y, 1) - INSERT_REG(numgroup2, GROUP_NUM_Z, 1) - INSERT_REG(stackptr, STACK_POINTER, this->simdWidth) - do {} while(0); - } - }); -#undef INSERT_REG - - // Insert the stack buffer if used - if (useStackPtr) - insertCurbeReg(ir::ocl::stackbuffer, this->newCurbeEntry(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER, ptrSize)); - - // After this point the vector is immutable. Sorting it will make - // research faster - std::sort(kernel->patches.begin(), kernel->patches.end()); - - kernel->curbeSize = ALIGN(kernel->curbeSize, GEN_REG_SIZE); - } - void Context::buildArgList(void) { kernel->argNum = fn.argNum(); if (kernel->argNum) diff --git a/backend/src/backend/context.hpp b/backend/src/backend/context.hpp index 384a2fb..26167a0 100644 --- a/backend/src/backend/context.hpp +++ b/backend/src/backend/context.hpp @@ -105,8 +105,6 @@ namespace gbe virtual Kernel *allocateKernel(void) = 0; /*! Look if a stack is needed and allocate it */ void buildStack(void); - /*! Build the curbe patch list for the given kernel */ - void buildPatchList(void); /*! Build the list of arguments to set to launch the kernel */ void buildArgList(void); /*! Build the sets of used labels */ @@ -121,6 +119,7 @@ namespace gbe * of the entry */ void insertCurbeReg(ir::Register, uint32_t grfOffset); + /*! allocate a curbe entry. */ uint32_t newCurbeEntry(gbe_curbe_type value, uint32_t subValue, uint32_t size, uint32_t alignment = 0); /*! Provide for each branch and label the label index target */ typedef map<const ir::Instruction*, ir::LabelIndex> JIPMap; diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 8bcf454..51c6c97 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -31,6 +31,7 @@ #include "backend/gen_reg_allocation.hpp" #include "backend/gen/gen_mesa_disasm.h" #include "ir/function.hpp" +#include "ir/value.hpp" #include "sys/cvar.hpp" #include <cstring> #include <iostream> @@ -1860,8 +1861,116 @@ namespace gbe BVAR(OCL_OUTPUT_REG_ALLOC, false); BVAR(OCL_OUTPUT_ASM, false); + + void GenContext::allocCurbeReg(ir::Register reg, gbe_curbe_type value, uint32_t subValue) { + uint32_t regSize; + regSize = this->ra->getRegSize(reg); + insertCurbeReg(reg, newCurbeEntry(value, subValue, regSize)); + } + + void GenContext::buildPatchList(void) { + const uint32_t ptrSize = unit.getPointerSize() == ir::POINTER_32_BITS ? 4u : 8u; + kernel->curbeSize = 0u; + auto &stackUse = dag->getUse(ir::ocl::stackptr); + + // We insert the block IP mask first +#if 0 + this->insertCurbeReg(ir::ocl::blockip, this->newCurbeEntry(GBE_CURBE_BLOCK_IP, 0, this->simdWidth * sizeof(uint16_t))); + this->insertCurbeReg(ir::ocl::emask, this->newCurbeEntry(GBE_CURBE_EMASK, 0, this->simdWidth * sizeof(uint16_t))); + this->insertCurbeReg(ir::ocl::notemask, this->newCurbeEntry(GBE_CURBE_NOT_EMASK, 0, sizeof(uint16_t))); + this->insertCurbeReg(ir::ocl::barriermask, this->newCurbeEntry(GBE_CURBE_BARRIER_MASK, 0, sizeof(uint16_t))); + // Already inserted registers go here + const size_t localIDSizde = sizeof(uint32_t) * this->simdWidth; + insertCurbeReg(ir::ocl::lid0, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_X, 0, localIDSize)); + insertCurbeReg(ir::ocl::lid1, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Y, 0, localIDSize)); + insertCurbeReg(ir::ocl::lid2, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Z, 0, localIDSize)); + // Insert the stack buffer if used + if (stackUse.size() != 0) + insertCurbeReg(ir::ocl::stackbuffer, this->newCurbeEntry(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER, ptrSize)); +#else + using namespace ir::ocl; + allocCurbeReg(blockip, GBE_CURBE_BLOCK_IP); + allocCurbeReg(emask, GBE_CURBE_EMASK); + allocCurbeReg(notemask, GBE_CURBE_NOT_EMASK); + allocCurbeReg(barriermask, GBE_CURBE_BARRIER_MASK); + allocCurbeReg(lid0, GBE_CURBE_LOCAL_ID_X); + allocCurbeReg(lid1, GBE_CURBE_LOCAL_ID_Y); + allocCurbeReg(lid2, GBE_CURBE_LOCAL_ID_Z); + if (stackUse.size() != 0) + allocCurbeReg(stackbuffer, GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER); +#endif + // Go over the arguments and find the related patch locations + const uint32_t argNum = fn.argNum(); + for (uint32_t argID = 0u; argID < argNum; ++argID) { + const ir::FunctionArgument &arg = fn.getArg(argID); + // For pointers and values, we have nothing to do. We just push the values + if (arg.type == ir::FunctionArgument::GLOBAL_POINTER || + arg.type == ir::FunctionArgument::LOCAL_POINTER || + arg.type == ir::FunctionArgument::CONSTANT_POINTER || + arg.type == ir::FunctionArgument::VALUE || + arg.type == ir::FunctionArgument::STRUCTURE || + arg.type == ir::FunctionArgument::IMAGE || + arg.type == ir::FunctionArgument::SAMPLER) + this->insertCurbeReg(arg.reg, this->newCurbeEntry(GBE_CURBE_KERNEL_ARGUMENT, argID, arg.size, ptrSize)); + } + + // Go over all the instructions and find the special register we need + // to push + #define INSERT_REG(SPECIAL_REG, PATCH) \ + if (reg == ir::ocl::SPECIAL_REG) { \ + if (curbeRegs.find(reg) != curbeRegs.end()) continue; \ + allocCurbeReg(reg, GBE_CURBE_##PATCH); \ + } else + + fn.foreachInstruction([&](ir::Instruction &insn) { + const uint32_t srcNum = insn.getSrcNum(); + for (uint32_t srcID = 0; srcID < srcNum; ++srcID) { + const ir::Register reg = insn.getSrc(srcID); + if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) { + if (srcID != 0) continue; + const unsigned char bti = ir::cast<ir::GetImageInfoInstruction>(insn).getImageIndex(); + const unsigned char type = ir::cast<ir::GetImageInfoInstruction>(insn).getInfoType();; + ir::ImageInfoKey key(bti, type); + const ir::Register imageInfo = insn.getSrc(0); + if (curbeRegs.find(imageInfo) == curbeRegs.end()) { + uint32_t offset = this->getImageInfoCurbeOffset(key, 4); + insertCurbeReg(imageInfo, offset); + } + continue; + } + if (fn.isSpecialReg(reg) == false) continue; + if (curbeRegs.find(reg) != curbeRegs.end()) continue; + if (reg == ir::ocl::stackptr) GBE_ASSERT(stackUse.size() > 0); + INSERT_REG(lsize0, LOCAL_SIZE_X) + INSERT_REG(lsize1, LOCAL_SIZE_Y) + INSERT_REG(lsize2, LOCAL_SIZE_Z) + INSERT_REG(gsize0, GLOBAL_SIZE_X) + INSERT_REG(gsize1, GLOBAL_SIZE_Y) + INSERT_REG(gsize2, GLOBAL_SIZE_Z) + INSERT_REG(goffset0, GLOBAL_OFFSET_X) + INSERT_REG(goffset1, GLOBAL_OFFSET_Y) + INSERT_REG(goffset2, GLOBAL_OFFSET_Z) + INSERT_REG(workdim, WORK_DIM) + INSERT_REG(numgroup0, GROUP_NUM_X) + INSERT_REG(numgroup1, GROUP_NUM_Y) + INSERT_REG(numgroup2, GROUP_NUM_Z) + INSERT_REG(stackptr, STACK_POINTER) + do {} while(0); + } + }); +#undef INSERT_REG + + + // After this point the vector is immutable. Sorting it will make + // research faster + std::sort(kernel->patches.begin(), kernel->patches.end()); + + kernel->curbeSize = ALIGN(kernel->curbeSize, GEN_REG_SIZE); + } + bool GenContext::emitCode(void) { GenKernel *genKernel = static_cast<GenKernel*>(this->kernel); + buildPatchList(); sel->select(); schedulePreRegAllocation(*this, *this->sel); if (UNLIKELY(ra->allocate(*this->sel) == false)) diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp index 642301c..6ec43cc 100644 --- a/backend/src/backend/gen_context.hpp +++ b/backend/src/backend/gen_context.hpp @@ -162,6 +162,12 @@ namespace gbe * regenerating the code */ bool limitRegisterPressure; + private: + /*! Build the curbe patch list for the given kernel */ + void buildPatchList(void); + /*! allocate a new curbe register and insert to curbe pool. */ + void allocCurbeReg(ir::Register reg, gbe_curbe_type value, uint32_t subValue = 0); + }; } /* namespace gbe */ diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp index f446a5b..2ba9495 100644 --- a/backend/src/backend/gen_reg_allocation.cpp +++ b/backend/src/backend/gen_reg_allocation.cpp @@ -1001,5 +1001,11 @@ namespace gbe this->opaque->outputAllocation(); } + uint32_t GenRegAllocator::getRegSize(ir::Register reg) { + uint32_t regSize; + this->opaque->getRegAttrib(reg, regSize); + return regSize; + } + } /* namespace gbe */ diff --git a/backend/src/backend/gen_reg_allocation.hpp b/backend/src/backend/gen_reg_allocation.hpp index bccccc8..a2a1d40 100644 --- a/backend/src/backend/gen_reg_allocation.hpp +++ b/backend/src/backend/gen_reg_allocation.hpp @@ -57,6 +57,8 @@ namespace gbe GenRegister genReg(const GenRegister ®); /*! Output the register allocation */ void outputAllocation(void); + /*! Get register actual size in byte. */ + uint32_t getRegSize(ir::Register reg); private: /*! Actual implementation of the register allocator (use Pimpl) */ class Opaque; diff --git a/backend/src/backend/program.hpp b/backend/src/backend/program.hpp index e6fc411..83fb0b4 100644 --- a/backend/src/backend/program.hpp +++ b/backend/src/backend/program.hpp @@ -180,6 +180,7 @@ namespace gbe { protected: friend class Context; //!< Owns the kernels + friend class GenContext; std::string name; //!< Kernel name KernelArgument *args; //!< Each argument vector<PatchInfo> patches; //!< Indicates how to build the curbe -- 1.8.3.2 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
