This patchset is for ocl2.0 branch only.
> -----Original Message----- > From: Beignet [mailto:[email protected]] On Behalf Of > [email protected] > Sent: Tuesday, January 19, 2016 11:29 AM > To: [email protected] > Cc: Luo, Xionghu <[email protected]>; Song, Ruiling > <[email protected]> > Subject: [Beignet] [PATCH 1/4] GBE: Implement new constant solution for > ocl2 > > From: Ruiling Song <[email protected]> > > Different from ocl 1.2, which gather all constant into one surface. > ocl2 only gather program global/constant into one surface. But > keep other constant passed through kernel argument in separate buffer. > > Signed-off-by: Ruiling Song <[email protected]> > --- > backend/src/backend/gen_insn_selection.cpp | 15 ++++-- > backend/src/backend/program.cpp | 21 +++++++- > backend/src/backend/program.h | 6 +++ > backend/src/backend/program.hpp | 4 ++ > backend/src/gbe_bin_interpreter.cpp | 2 + > backend/src/ir/profile.cpp | 4 +- > backend/src/ir/profile.hpp | 3 +- > backend/src/ir/unit.hpp | 50 ++++++++++++++++++ > backend/src/llvm/llvm_gen_backend.cpp | 84 ++++++++++++++++++---- > -------- > kernels/compiler_program_global.cl | 68 ++++++++++++++++++++++++ > src/cl_command_queue.c | 2 +- > src/cl_command_queue_gen7.c | 16 ++++++ > src/cl_gbe_loader.cpp | 10 ++++ > src/cl_gbe_loader.h | 2 + > src/cl_program.c | 46 ++++++++++++++++ > src/cl_program.h | 2 + > utests/CMakeLists.txt | 1 + > utests/compiler_program_global.cpp | 80 > ++++++++++++++++++++++++++++ > 18 files changed, 374 insertions(+), 42 deletions(-) > create mode 100644 kernels/compiler_program_global.cl > create mode 100644 utests/compiler_program_global.cpp > > diff --git a/backend/src/backend/gen_insn_selection.cpp > b/backend/src/backend/gen_insn_selection.cpp > index d19f985..6ef077b 100644 > --- a/backend/src/backend/gen_insn_selection.cpp > +++ b/backend/src/backend/gen_insn_selection.cpp > @@ -3596,6 +3596,13 @@ namespace gbe > LoadInstructionPattern(void) : SelectionPattern(1, 1) { > this->opcodes.push_back(ir::OP_LOAD); > } > + bool isReadConstantLegacy(const ir::LoadInstruction &load) const { > + ir::AddressMode AM = load.getAddressMode(); > + ir::AddressSpace AS = load.getAddressSpace(); > + if (AM != ir::AM_Stateless && AS == ir::MEM_CONSTANT) > + return true; > + return false; > + } > void untypedReadStateless(Selection::Opaque &sel, > GenRegister addr, > vector<GenRegister> &dst > @@ -3678,7 +3685,7 @@ namespace gbe > unsigned SI = insn.getSurfaceIndex(); > sel.UNTYPED_READ(addr, dst.data(), valueNum, > GenRegister::immud(SI), btiTemp); > } > - } else if (addrSpace == ir::MEM_LOCAL || addrSpace == > ir::MEM_CONSTANT ) { > + } else if (addrSpace == ir::MEM_LOCAL || isReadConstantLegacy(insn) ) > { > // stateless mode, local/constant still use bti access > unsigned bti = addrSpace == ir::MEM_CONSTANT ? BTI_CONSTANT : > 0xfe; > GenRegister addrDW = addr; > @@ -3842,7 +3849,7 @@ namespace gbe > b = GenRegister::immud(insn.getSurfaceIndex()); > } > read64Legacy(sel, addr, dst, b, btiTemp); > - } else if (addrSpace == MEM_LOCAL || addrSpace == MEM_CONSTANT) > { > + } else if (addrSpace == MEM_LOCAL || isReadConstantLegacy(insn)) { > GenRegister b = GenRegister::immud(addrSpace == MEM_LOCAL? > 0xfe : BTI_CONSTANT); > GenRegister addrDW = addr; > if (addrBytes == 8) > @@ -4063,7 +4070,7 @@ namespace gbe > unsigned SI = insn.getSurfaceIndex(); > sel.BYTE_GATHER(dst, addr, elemSize, GenRegister::immud(SI), > btiTemp); > } > - } else if (addrSpace == ir::MEM_LOCAL || addrSpace == > ir::MEM_CONSTANT) { > + } else if (addrSpace == ir::MEM_LOCAL || isReadConstantLegacy(insn)) { > unsigned bti = addrSpace == ir::MEM_CONSTANT ? BTI_CONSTANT : > 0xfe; > GenRegister addrDW = addr; > if (addrBytes == 8) { > @@ -4207,7 +4214,7 @@ namespace gbe > const Type type = insn.getValueType(); > const uint32_t elemSize = getByteScatterGatherSize(sel, type); > > - if (addrSpace == MEM_CONSTANT) { > + if (isReadConstantLegacy(insn)) { > // XXX TODO read 64bit constant through constant cache > // Per HW Spec, constant cache messages can read at least DWORD data. > // So, byte/short data type, we have to read through data cache. > diff --git a/backend/src/backend/program.cpp > b/backend/src/backend/program.cpp > index 36af95f..ce4f927 100644 > --- a/backend/src/backend/program.cpp > +++ b/backend/src/backend/program.cpp > @@ -104,11 +104,13 @@ namespace gbe { > return it->offset; // we found it! > } > > - Program::Program(void) : constantSet(NULL) {} > + Program::Program(void) : constantSet(NULL), > + relocTable(NULL) {} > Program::~Program(void) { > for (map<std::string, Kernel*>::iterator it = kernels.begin(); it != > kernels.end(); ++it) > GBE_DELETE(it->second); > if (constantSet) delete constantSet; > + if (relocTable) delete relocTable; > } > > #ifdef GBE_COMPILER_AVAILABLE > @@ -151,6 +153,7 @@ namespace gbe { > > bool Program::buildFromUnit(const ir::Unit &unit, std::string &error) { > constantSet = new ir::ConstantSet(unit.getConstantSet()); > + relocTable = new ir::RelocTable(unit.getRelocTable()); > const auto &set = unit.getFunctionSet(); > const uint32_t kernelNum = set.size(); > if (OCL_OUTPUT_GEN_IR) std::cout << unit; > @@ -978,6 +981,18 @@ namespace gbe { > program->getGlobalConstantData(mem); > } > > + static size_t programGetGlobalRelocCount(gbe_program gbeProgram) { > + if (gbeProgram == NULL) return 0; > + const gbe::Program *program = (const gbe::Program*) gbeProgram; > + return program->getGlobalRelocCount(); > + } > + > + static void programGetGlobalRelocTable(gbe_program gbeProgram, char > *mem) { > + if (gbeProgram == NULL) return; > + const gbe::Program *program = (const gbe::Program*) gbeProgram; > + program->getGlobalRelocTable(mem); > + } > + > static uint32_t programGetKernelNum(gbe_program gbeProgram) { > if (gbeProgram == NULL) return 0; > const gbe::Program *program = (const gbe::Program*) gbeProgram; > @@ -1220,6 +1235,8 @@ GBE_EXPORT_SYMBOL > gbe_program_link_from_llvm_cb *gbe_program_link_from_llvm = NU > GBE_EXPORT_SYMBOL gbe_program_build_from_llvm_cb > *gbe_program_build_from_llvm = NULL; > GBE_EXPORT_SYMBOL gbe_program_get_global_constant_size_cb > *gbe_program_get_global_constant_size = NULL; > GBE_EXPORT_SYMBOL gbe_program_get_global_constant_data_cb > *gbe_program_get_global_constant_data = NULL; > +GBE_EXPORT_SYMBOL gbe_program_get_global_reloc_count_cb > *gbe_program_get_global_reloc_count = NULL; > +GBE_EXPORT_SYMBOL gbe_program_get_global_reloc_table_cb > *gbe_program_get_global_reloc_table = NULL; > GBE_EXPORT_SYMBOL gbe_program_clean_llvm_resource_cb > *gbe_program_clean_llvm_resource = NULL; > GBE_EXPORT_SYMBOL gbe_program_delete_cb *gbe_program_delete = > NULL; > GBE_EXPORT_SYMBOL gbe_program_get_kernel_num_cb > *gbe_program_get_kernel_num = NULL; > @@ -1269,6 +1286,8 @@ namespace gbe > gbe_program_check_opt = gbe::programCheckOption; > gbe_program_get_global_constant_size = > gbe::programGetGlobalConstantSize; > gbe_program_get_global_constant_data = > gbe::programGetGlobalConstantData; > + gbe_program_get_global_reloc_count = > gbe::programGetGlobalRelocCount; > + gbe_program_get_global_reloc_table = > gbe::programGetGlobalRelocTable; > gbe_program_clean_llvm_resource = gbe::programCleanLlvmResource; > gbe_program_delete = gbe::programDelete; > gbe_program_get_kernel_num = gbe::programGetKernelNum; > diff --git a/backend/src/backend/program.h > b/backend/src/backend/program.h > index 86b3177..03150bc 100644 > --- a/backend/src/backend/program.h > +++ b/backend/src/backend/program.h > @@ -99,6 +99,7 @@ enum gbe_curbe_type { > GBE_CURBE_BLOCK_IP, > GBE_CURBE_DW_BLOCK_IP, > GBE_CURBE_THREAD_NUM, > + GBE_CURBE_CONSTANT_ADDRSPACE, > GBE_GEN_REG, > }; > > @@ -243,6 +244,11 @@ extern gbe_program_get_global_constant_size_cb > *gbe_program_get_global_constant_ > typedef void (gbe_program_get_global_constant_data_cb)(gbe_program > gbeProgram, char *mem); > extern gbe_program_get_global_constant_data_cb > *gbe_program_get_global_constant_data; > > +typedef size_t (gbe_program_get_global_reloc_count_cb)(gbe_program > gbeProgram); > +extern gbe_program_get_global_reloc_count_cb > *gbe_program_get_global_reloc_count; > + > +typedef void (gbe_program_get_global_reloc_table_cb)(gbe_program > gbeProgram, char *mem); > +extern gbe_program_get_global_reloc_table_cb > *gbe_program_get_global_reloc_table; > /*! Get the size of defined samplers */ > typedef size_t (gbe_kernel_get_sampler_size_cb)(gbe_kernel gbeKernel); > extern gbe_kernel_get_sampler_size_cb *gbe_kernel_get_sampler_size; > diff --git a/backend/src/backend/program.hpp > b/backend/src/backend/program.hpp > index efe192f..e58ddf0 100644 > --- a/backend/src/backend/program.hpp > +++ b/backend/src/backend/program.hpp > @@ -280,6 +280,8 @@ namespace gbe { > /*! Get the content of global constant arrays */ > void getGlobalConstantData(char *mem) const { constantSet- > >getData(mem); } > > + uint32_t getGlobalRelocCount(void) const { return relocTable- > >getCount(); } > + void getGlobalRelocTable(char *p) const { relocTable->getData(p); } > static const uint32_t magic_begin = TO_MAGIC('P', 'R', 'O', 'G'); > static const uint32_t magic_end = TO_MAGIC('G', 'O', 'R', 'P'); > > @@ -309,6 +311,8 @@ namespace gbe { > map<std::string, Kernel*> kernels; > /*! Global (constants) outside any kernel */ > ir::ConstantSet *constantSet; > + /*! relocation table */ > + ir::RelocTable *relocTable; > /*! Use custom allocators */ > GBE_CLASS(Program); > }; > diff --git a/backend/src/gbe_bin_interpreter.cpp > b/backend/src/gbe_bin_interpreter.cpp > index 4594a0a..0957092 100644 > --- a/backend/src/gbe_bin_interpreter.cpp > +++ b/backend/src/gbe_bin_interpreter.cpp > @@ -61,6 +61,8 @@ struct BinInterpCallBackInitializer > gbe_program_get_global_constant_size = > gbe::programGetGlobalConstantSize; > gbe_program_delete = gbe::programDelete; > gbe_program_get_global_constant_data = > gbe::programGetGlobalConstantData; > + gbe_program_get_global_reloc_count = > gbe::programGetGlobalRelocCount; > + gbe_program_get_global_reloc_table = > gbe::programGetGlobalRelocTable; > gbe_kernel_get_sampler_data = gbe::kernelGetSamplerData; > gbe_kernel_get_image_data = gbe::kernelGetImageData; > gbe_kernel_get_arg_info = gbe::kernelGetArgInfo; > diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp > index 3ead8a7..0699167 100644 > --- a/backend/src/ir/profile.cpp > +++ b/backend/src/ir/profile.cpp > @@ -43,7 +43,8 @@ namespace ir { > "zero", "one", > "retVal", > "printf_buffer_pointer", "printf_index_buffer_pointer", > - "dwblockip" > + "dwblockip", > + "constant_addrspace_start" > }; > > #if GBE_DEBUG > @@ -86,6 +87,7 @@ namespace ir { > DECL_NEW_REG(FAMILY_QWORD, printfbptr, 1, > GBE_CURBE_PRINTF_BUF_POINTER); > DECL_NEW_REG(FAMILY_QWORD, printfiptr, 1, > GBE_CURBE_PRINTF_INDEX_POINTER); > DECL_NEW_REG(FAMILY_DWORD, dwblockip, 0, > GBE_CURBE_DW_BLOCK_IP); > + DECL_NEW_REG(FAMILY_QWORD, constant_addrspace, 1, > GBE_CURBE_CONSTANT_ADDRSPACE); > } > #undef DECL_NEW_REG > > diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp > index a8445c4..79761d4 100644 > --- a/backend/src/ir/profile.hpp > +++ b/backend/src/ir/profile.hpp > @@ -71,7 +71,8 @@ namespace ir { > static const Register printfbptr = Register(27); // printf buffer > address . > static const Register printfiptr = Register(28); // printf index buffer > address. > static const Register dwblockip = Register(29); // blockip > - static const uint32_t regNum = 30; // number of special > registers > + static const Register constant_addrspace = Register(30); // starting > address of program-scope constant > + static const uint32_t regNum = 31; // number of special > registers > extern const char *specialRegMean[]; // special register name. > } /* namespace ocl */ > > diff --git a/backend/src/ir/unit.hpp b/backend/src/ir/unit.hpp > index 8ff858d..b8df145 100644 > --- a/backend/src/ir/unit.hpp > +++ b/backend/src/ir/unit.hpp > @@ -27,6 +27,7 @@ > #include "ir/constant.hpp" > #include "ir/register.hpp" > #include "sys/map.hpp" > +#include <string.h> > > namespace gbe { > namespace ir { > @@ -37,6 +38,52 @@ namespace ir { > /*! Complete unit of compilation. It contains a set of functions and a set > of > * constant the functions may refer to. > */ > + struct RelocEntry { > + RelocEntry(unsigned int rO, unsigned int dO): > + refOffset(rO), > + defOffset(dO) {} > + > + unsigned int refOffset; > + unsigned int defOffset; > + }; > + > + class RelocTable : public NonCopyable, public Serializable > + { > + public: > + void addEntry(unsigned refOffset, unsigned defOffset) { > + entries.push_back(RelocEntry(refOffset, defOffset)); > + } > + RelocTable() {} > + RelocTable(const RelocTable& other) : Serializable(other), > + entries(other.entries) {} > + uint32_t getCount() { return entries.size(); } > + void getData(char *p) { > + if (entries.size() > 1 && p) > + memcpy(p, entries.data(), entries.size()*sizeof(RelocEntry)); > + } > + static const uint32_t magic_begin = TO_MAGIC('R', 'E', 'L', 'C'); > + static const uint32_t magic_end = TO_MAGIC('C', 'L', 'E', 'R'); > + > + /* format: > + magic_begin | > + const_data_size | > + const_data | > + constant_1_size | > + constant_1 | > + ........ | > + constant_n_size | > + constant_n | > + magic_end | > + total_size > + */ > + > + /*! Implements the serialization. */ > + virtual size_t serializeToBin(std::ostream& outs) { return 0;} > + virtual size_t deserializeFromBin(std::istream& ins) { return 0; } > + private: > + vector<RelocEntry> entries; > + GBE_CLASS(RelocTable); > + }; > class Unit : public NonCopyable > { > public: > @@ -70,6 +117,8 @@ namespace ir { > } > /*! Return the constant set */ > ConstantSet& getConstantSet(void) { return constantSet; } > + const RelocTable& getRelocTable(void) const { return relocTable; } > + RelocTable& getRelocTable(void) { return relocTable; } > /*! Return the constant set */ > const ConstantSet& getConstantSet(void) const { return constantSet; } > void setValid(bool value) { valid = value; } > @@ -78,6 +127,7 @@ namespace ir { > friend class ContextInterface; //!< Can free modify the unit > FunctionSet functions; //!< All the defined functions > ConstantSet constantSet; //!< All the constants defined in the unit > + RelocTable relocTable; > PointerSize pointerSize; //!< Size shared by all pointers > GBE_CLASS(Unit); > bool valid; > diff --git a/backend/src/llvm/llvm_gen_backend.cpp > b/backend/src/llvm/llvm_gen_backend.cpp > index cb47097..d23a598 100644 > --- a/backend/src/llvm/llvm_gen_backend.cpp > +++ b/backend/src/llvm/llvm_gen_backend.cpp > @@ -506,7 +506,7 @@ namespace gbe > > virtual bool doInitialization(Module &M); > /*! helper function for parsing global constant data */ > - void getConstantData(const Constant * c, void* mem, uint32_t& offset) > const; > + void getConstantData(const Constant * c, void* mem, uint32_t& offset, > vector<ir::RelocEntry> &) const; > void collectGlobalConstant(void) const; > ir::ImmediateIndex processConstantImmIndex(Constant *CPV, int32_t > index = 0u); > const ir::Immediate &processConstantImm(Constant *CPV, int32_t index > = 0u); > @@ -1111,8 +1111,9 @@ namespace gbe > break; > } > case 2: > - new_bti = BTI_CONSTANT; > - > + // ocl 2.0, constant pointer use separate bti > + new_bti = btiBase; > + incBtiBase(); > break; > case 3: > new_bti = BTI_LOCAL; > @@ -1349,22 +1350,34 @@ namespace gbe > return; > } > > - void GenWriter::getConstantData(const Constant * c, void* mem, > uint32_t& offset) const { > + void GenWriter::getConstantData(const Constant * c, void* mem, > uint32_t& offset, vector<ir::RelocEntry> &relocs) const { > Type * type = c->getType(); > Type::TypeID id = type->getTypeID(); > > GBE_ASSERT(c); > + if (isa<GlobalVariable>(c)) { > + const GlobalVariable *GV = cast<GlobalVariable>(c); > + > + unsigned valueAddrSpace = GV->getType()->getAddressSpace(); > + ir::Constant cc = unit.getConstantSet().getConstant(c->getName()); > + unsigned int defOffset = cc.getOffset(); > + > + relocs.push_back(ir::RelocEntry(offset, defOffset)); > + uint32_t size = getTypeByteSize(unit, type); > + memset((char*)mem+offset, 0, size); > + offset += size; > + return; > + } > if(isa<UndefValue>(c)) { > uint32_t size = getTypeByteSize(unit, type); > offset += size; > return; > - } else if(isa<ConstantAggregateZero>(c)) { > + } else if(isa<ConstantAggregateZero>(c) || isa<ConstantPointerNull>(c)) { > uint32_t size = getTypeByteSize(unit, type); > memset((char*)mem+offset, 0, size); > offset += size; > return; > } > - > switch(id) { > case Type::TypeID::StructTyID: > { > @@ -1382,7 +1395,7 @@ namespace gbe > offset += padding/8; > const Constant* sub = cast<Constant>(c->getOperand(op)); > GBE_ASSERT(sub); > - getConstantData(sub, mem, offset); > + getConstantData(sub, mem, offset, relocs); > } > break; > } > @@ -1401,7 +1414,7 @@ namespace gbe > uint32_t ops = c->getNumOperands(); > for(uint32_t op = 0; op < ops; ++op) { > Constant * ca = dyn_cast<Constant>(c->getOperand(op)); > - getConstantData(ca, mem, offset); > + getConstantData(ca, mem, offset, relocs); > offset += padding; > } > } > @@ -1449,21 +1462,34 @@ namespace gbe > const Module::GlobalListType &globalList = TheModule->getGlobalList(); > for(auto i = globalList.begin(); i != globalList.end(); i ++) { > const GlobalVariable &v = *i; > - if(!v.isConstantUsed()) continue; > const char *name = v.getName().data(); > unsigned addrSpace = v.getType()->getAddressSpace(); > - if(addrSpace == ir::AddressSpace::MEM_CONSTANT || v.isConstant()) { > - GBE_ASSERT(v.hasInitializer()); > - const Constant *c = v.getInitializer(); > - Type * type = c->getType(); > + > + vector<ir::RelocEntry> relocs; > + if(addrSpace == 2 /* __constant */ > + || addrSpace == 1 > + || addrSpace == 0) { > + Type * type = v.getValueType(); > > uint32_t size = getTypeByteSize(unit, type); > void* mem = malloc(size); > uint32_t offset = 0; > - getConstantData(c, mem, offset); > + if (v.hasInitializer()) { > + const Constant *c = v.getInitializer(); > + getConstantData(c, mem, offset, relocs); > + } else { > + memset(mem, 0, size); > + } > uint32_t alignment = getAlignmentByte(unit, type); > unit.newConstant((char *)mem, name, size, alignment); > free(mem); > + uint32_t refOffset = > unit.getConstantSet().getConstant(name).getOffset(); > + for (uint32_t k = 0; k < relocs.size(); k++) { > + unit.getRelocTable().addEntry( > + refOffset + relocs[k].refOffset, > + relocs[k].defOffset > + ); > + } > } > } > } > @@ -2562,33 +2588,23 @@ namespace gbe > this->newRegister(const_cast<GlobalVariable*>(&v)); > ir::Register reg = > regTranslator.getScalar(const_cast<GlobalVariable*>(&v), 0); > ctx.LOADI(getType(ctx, v.getType()), reg, > ctx.newIntegerImmediate(oldSlm + padding/8, getType(ctx, v.getType()))); > - } else if(addrSpace == ir::MEM_CONSTANT || v.isConstant()) { > - GBE_ASSERT(v.hasInitializer()); > - this->newRegister(const_cast<GlobalVariable*>(&v)); > - ir::Register reg = > regTranslator.getScalar(const_cast<GlobalVariable*>(&v), 0); > - ir::Constant &con = unit.getConstantSet().getConstant(v.getName()); > - ctx.LOADI(getType(ctx, v.getType()), reg, > ctx.newIntegerImmediate(con.getOffset(), getType(ctx, v.getType()))); > - } else { > + } else if(addrSpace == ir::MEM_CONSTANT > + || addrSpace == ir::MEM_GLOBAL > + || v.isConstant()) { > if(v.getName().equals(StringRef("__gen_ocl_printf_buf"))) { > ctx.getFunction().getPrintfSet()- > >setBufBTI(BtiMap.find(const_cast<GlobalVariable*>(&v))->second); > regTranslator.newScalarProxy(ir::ocl::printfbptr, > const_cast<GlobalVariable*>(&v)); > } else > if(v.getName().equals(StringRef("__gen_ocl_printf_index_buf"))) > { > ctx.getFunction().getPrintfSet()- > >setIndexBufBTI(BtiMap.find(const_cast<GlobalVariable*>(&v))->second); > regTranslator.newScalarProxy(ir::ocl::printfiptr, > const_cast<GlobalVariable*>(&v)); > - } else if(v.getName().str().substr(0, 4) == ".str") { > - /* When there are multi printf statements in multi kernel fucntions > within the same > - translate unit, if they have the same sting parameter, such as > - kernel_func1 () { > - printf("Line is %d\n", line_num1); > - } > - kernel_func2 () { > - printf("Line is %d\n", line_num2); > - } > - The Clang will just generate one global string named .strXXX to > represent "Line is %d\n" > - So when translating the kernel_func1, we can not unref that > global > var, so we will > - get here. Just ignore it to avoid assert. */ > } else { > - GBE_ASSERT(0 && "Unsupported private memory access pattern"); > + this->newRegister(const_cast<GlobalVariable*>(&v)); > + ir::Register reg = > regTranslator.getScalar(const_cast<GlobalVariable*>(&v), 0); > + ir::Constant &con = unit.getConstantSet().getConstant(v.getName()); > + ctx.LOADI(getType(ctx, v.getType()), reg, > ctx.newIntegerImmediate(con.getOffset(), getType(ctx, v.getType()))); > + if (!legacyMode) { > + ctx.ADD(getType(ctx, v.getType()), reg, > ir::ocl::constant_addrspace, > reg); > + } > } > } > } > diff --git a/kernels/compiler_program_global.cl > b/kernels/compiler_program_global.cl > new file mode 100644 > index 0000000..405c53f > --- /dev/null > +++ b/kernels/compiler_program_global.cl > @@ -0,0 +1,68 @@ > +struct config{ > + int s0; > + global short *s1; > +}; > + > +global int i = 5; > +global int bb = 4; > +global int *global p; > + > +/* array */ > +global int ba[12]; > + > +/* short/long data type */ > +global short s; > +global short s2; > +global long l; > + > +/* pointer in constant AS to global */ > +global int * constant px =&i; > + > +/* constant pointer relocation */ > +constant int x = 2; > +constant int y =1; > +constant int *constant z[2] = {&x, &y}; > + > +/* structure with pointer field */ > +global struct config c[2] = {{1, &s}, {2, &s2} }; > + > + > +global int a = 1; > +global int b = 2; > +global int * constant gArr[2]= {&a, &b}; > + > +__kernel void compiler_program_global0(const global int *src, int dynamic) > { > + size_t gid = get_global_id(0); > + /* global read/write */ > + p = &i; > + *p += 1; > + > + /* pointer in struct memory access */ > + *c[gid&1].s1 += 2; > + > + s = 2; > + l = 3; > + > + /* constant AS pointer (points to global) memory access */ > + *px += *z[dynamic]; > + > + p = &bb; > + /* array */ > + if (gid < 11) > + ba[gid] = src[gid]; > +} > + > +__kernel void compiler_program_global1(global int *dst, int dynamic) { > + size_t gid = get_global_id(0); > +// static global sg; > + > + dst[11] = i; > + dst[12] = *p; > + dst[13] = s; > + dst[14] = l; > + dst[15] = *gArr[dynamic]; > + > + if (gid < 11) > + dst[gid] = ba[gid]; > +} > + > diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c > index 9dc3fe6..442c6a2 100644 > --- a/src/cl_command_queue.c > +++ b/src/cl_command_queue.c > @@ -161,7 +161,7 @@ > cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k) > for (i = 0; i < k->arg_n; ++i) { > int32_t offset; // location of the address in the curbe > arg_type = interp_kernel_get_arg_type(k->opaque, i); > - if (arg_type != GBE_ARG_GLOBAL_PTR || !k->args[i].mem) > + if (!(arg_type == GBE_ARG_GLOBAL_PTR || arg_type == > GBE_ARG_CONSTANT_PTR) || !k->args[i].mem) > continue; > offset = interp_kernel_get_curbe_offset(k->opaque, > GBE_CURBE_KERNEL_ARGUMENT, i); > if (offset < 0) > diff --git a/src/cl_command_queue_gen7.c > b/src/cl_command_queue_gen7.c > index 2edc3be..61ffe7e 100644 > --- a/src/cl_command_queue_gen7.c > +++ b/src/cl_command_queue_gen7.c > @@ -29,6 +29,7 @@ > #include <assert.h> > #include <stdio.h> > #include <string.h> > +#include <unistd.h> > > #define MAX_GROUP_SIZE_IN_HALFSLICE 512 > static INLINE size_t cl_kernel_compute_batch_sz(cl_kernel k) { return > 256+256; } > @@ -117,6 +118,11 @@ cl_upload_constant_buffer(cl_command_queue > queue, cl_kernel ker) > * we need raw_size & aligned_size > */ > GET_QUEUE_THREAD_GPGPU(queue); > + // TODO this is only valid for OpenCL 1.2, > + // under ocl1.2 we gather all constant into one dedicated surface. > + // but in 2.0 we put program global into one surface, but constants > + // pass through kernel argument in each separate buffer > +#if 0 > int32_t arg; > size_t offset = 0; > uint32_t raw_size = 0, aligned_size =0; > @@ -185,6 +191,16 @@ cl_upload_constant_buffer(cl_command_queue > queue, cl_kernel ker) > } > } > cl_buffer_unmap(bo); > +#endif > + // pass the starting of constant address space > + int32_t constant_addrspace = interp_kernel_get_curbe_offset(ker- > >opaque, GBE_CURBE_CONSTANT_ADDRSPACE, 0); > + if (constant_addrspace >= 0) { > + size_t global_const_size = > interp_program_get_global_constant_size(ker->program->opaque); > + if (global_const_size > 0) { > + *(uint64_t*)(ker->curbe + constant_addrspace) = (uint64_t)ker- > >program->global_data_ptr; > + cl_gpgpu_bind_buf(gpgpu, ker->program->global_data, > constant_addrspace, 0, ALIGN(global_const_size, getpagesize()), > BTI_CONSTANT); > + } > + } > return 0; > } > > diff --git a/src/cl_gbe_loader.cpp b/src/cl_gbe_loader.cpp > index e832a53..d75c92c 100644 > --- a/src/cl_gbe_loader.cpp > +++ b/src/cl_gbe_loader.cpp > @@ -38,6 +38,8 @@ gbe_program_clean_llvm_resource_cb > *compiler_program_clean_llvm_resource = NULL; > gbe_program_new_from_binary_cb *interp_program_new_from_binary = > NULL; > gbe_program_get_global_constant_size_cb > *interp_program_get_global_constant_size = NULL; > gbe_program_get_global_constant_data_cb > *interp_program_get_global_constant_data = NULL; > +gbe_program_get_global_reloc_count_cb > *interp_program_get_global_reloc_count = NULL; > +gbe_program_get_global_reloc_table_cb > *interp_program_get_global_reloc_table = NULL; > gbe_program_delete_cb *interp_program_delete = NULL; > gbe_program_get_kernel_num_cb *interp_program_get_kernel_num = > NULL; > gbe_program_get_kernel_by_name_cb > *interp_program_get_kernel_by_name = NULL; > @@ -109,6 +111,14 @@ struct GbeLoaderInitializer > if (interp_program_get_global_constant_data == NULL) > return false; > > + interp_program_get_global_reloc_count = > *(gbe_program_get_global_reloc_count_cb**)dlsym(dlhInterp, > "gbe_program_get_global_reloc_count"); > + if (interp_program_get_global_reloc_count == NULL) > + return false; > + > + interp_program_get_global_reloc_table = > *(gbe_program_get_global_reloc_table_cb**)dlsym(dlhInterp, > "gbe_program_get_global_reloc_table"); > + if (interp_program_get_global_reloc_table == NULL) > + return false; > + > interp_program_delete = *(gbe_program_delete_cb**)dlsym(dlhInterp, > "gbe_program_delete"); > if (interp_program_delete == NULL) > return false; > diff --git a/src/cl_gbe_loader.h b/src/cl_gbe_loader.h > index de91c85..28741ff 100644 > --- a/src/cl_gbe_loader.h > +++ b/src/cl_gbe_loader.h > @@ -38,6 +38,8 @@ extern gbe_program_clean_llvm_resource_cb > *compiler_program_clean_llvm_resource; > extern gbe_program_new_from_binary_cb > *interp_program_new_from_binary; > extern gbe_program_get_global_constant_size_cb > *interp_program_get_global_constant_size; > extern gbe_program_get_global_constant_data_cb > *interp_program_get_global_constant_data; > +extern gbe_program_get_global_reloc_count_cb > *interp_program_get_global_reloc_count; > +extern gbe_program_get_global_reloc_table_cb > *interp_program_get_global_reloc_table; > extern gbe_program_delete_cb *interp_program_delete; > extern gbe_program_get_kernel_num_cb > *interp_program_get_kernel_num; > extern gbe_program_get_kernel_by_name_cb > *interp_program_get_kernel_by_name; > diff --git a/src/cl_program.c b/src/cl_program.c > index 98b6d51..ffdb2a1 100644 > --- a/src/cl_program.c > +++ b/src/cl_program.c > @@ -97,6 +97,9 @@ cl_program_delete(cl_program p) > cl_kernel_delete(p->ker[i]); > cl_free(p->ker); > > + cl_free(p->global_data_ptr); > + if (p->global_data_ptr) > + cl_buffer_unreference(p->global_data); > /* Program belongs to their parent context */ > cl_context_delete(p->ctx); > > @@ -191,6 +194,42 @@ LOCAL cl_bool headerCompare(const unsigned char > *BufPtr, BINARY_HEADER_INDEX ind > #define isLLVM_LIB(BufPtr) headerCompare(BufPtr, BHI_LIBRARY) > #define isGenBinary(BufPtr) headerCompare(BufPtr, BHI_GEN_BINARY) > > +static cl_int get_program_global_data(cl_program prog) { > + cl_buffer_mgr bufmgr = NULL; > + bufmgr = cl_context_get_bufmgr(prog->ctx); > + assert(bufmgr); > + size_t const_size = interp_program_get_global_constant_size(prog- > >opaque); > + if (const_size == 0) return CL_SUCCESS; > + > + int page_size = getpagesize(); > + size_t alignedSz = ALIGN(const_size, page_size); > + char * p = (char*)cl_aligned_malloc(alignedSz, page_size); > + prog->global_data_ptr = p; > + interp_program_get_global_constant_data(prog->opaque, (char*)p); > + > + prog->global_data = cl_buffer_alloc_userptr(bufmgr, "program global > data", p, alignedSz, 0); > + cl_buffer_set_softpin_offset(prog->global_data, (size_t)p); > + > + uint32_t reloc_count = interp_program_get_global_reloc_count(prog- > >opaque); > + if (reloc_count > 0) { > + uint32_t x; > + struct RelocEntry {int refOffset; int defOffset;}; > + char *temp = (char*) malloc(reloc_count *sizeof(int)*2); > + interp_program_get_global_reloc_table(prog->opaque, temp); > + for (x = 0; x < reloc_count; x++) { > + int ref_offset = ((struct RelocEntry *)temp)[x].refOffset; > + *(uint64_t*)&(p[ref_offset]) = ((struct RelocEntry *)temp)[x].defOffset > + (uint64_t)p; > + } > + free(temp); > + } > +#if 0 > + int x = 0; > + for (x = 0; x < const_size; x++) { > + printf("offset %d data: %x\n", x, (unsigned)p[x]); > + } > +#endif > + return CL_SUCCESS; > +} > LOCAL cl_program > cl_program_create_from_binary(cl_context ctx, > cl_uint num_devices, > @@ -603,6 +642,9 @@ cl_program_build(cl_program p, const char *options) > memcpy(p->bin + copyed, interp_kernel_get_code(opaque), sz); > copyed += sz; > } > + if ((err = get_program_global_data(p)) != CL_SUCCESS) > + goto error; > + > p->is_built = 1; > p->build_status = CL_BUILD_SUCCESS; > return CL_SUCCESS; > @@ -697,6 +739,10 @@ cl_program_link(cl_context context, > memcpy(p->bin + copyed, interp_kernel_get_code(opaque), sz); > copyed += sz; > } > + > + if ((err = get_program_global_data(p)) != CL_SUCCESS) > + goto error; > + > done: > if(p) p->is_built = 1; > if(p) p->build_status = CL_BUILD_SUCCESS; > diff --git a/src/cl_program.h b/src/cl_program.h > index 63ad16d..083d66a 100644 > --- a/src/cl_program.h > +++ b/src/cl_program.h > @@ -54,6 +54,8 @@ struct _cl_program { > cl_kernel *ker; /* All kernels included by the OCL file */ > cl_program prev, next; /* We chain the programs together */ > cl_context ctx; /* Its parent context */ > + cl_buffer global_data; > + char * global_data_ptr; > char *bin; /* The program copied verbatim */ > size_t bin_sz; /* Its size in memory */ > char *source; /* Program sources */ > diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt > index 78442cb..0fca450 100644 > --- a/utests/CMakeLists.txt > +++ b/utests/CMakeLists.txt > @@ -191,6 +191,7 @@ set (utests_sources > compiler_bool_cross_basic_block.cpp > compiler_private_const.cpp > compiler_private_data_overflow.cpp > + compiler_program_global.cpp > compiler_getelementptr_bitcast.cpp > compiler_sub_group_any.cpp > compiler_sub_group_all.cpp > diff --git a/utests/compiler_program_global.cpp > b/utests/compiler_program_global.cpp > new file mode 100644 > index 0000000..ef7c655 > --- /dev/null > +++ b/utests/compiler_program_global.cpp > @@ -0,0 +1,80 @@ > +#include "utest_helper.hpp" > +#include "utest_file_map.hpp" > + > +static int init_program(const char* name, cl_context ctx, cl_program *pg ) > +{ > + cl_int err; > + char* ker_path = cl_do_kiss_path(name, device); > + > + cl_file_map_t *fm = cl_file_map_new(); > + err = cl_file_map_open(fm, ker_path); > + if(err != CL_FILE_MAP_SUCCESS) > + OCL_ASSERT(0); > + const char *src = cl_file_map_begin(fm); > + > + *pg = clCreateProgramWithSource(ctx, 1, &src, NULL, &err); > + free(ker_path); > + cl_file_map_delete(fm); > + return 0; > + > +} > + > +void compiler_program_global() > +{ > + const int n = 16; > + int cpu_src[16]; > + cl_int err; > + > + // Setup kernel and buffers > + cl_program program; > + init_program("compiler_program_global.cl", ctx, &program); > + OCL_CALL (clBuildProgram, program, 1, &device, "-cl-std=CL2.0", NULL, > NULL); > + > + cl_kernel k0 = clCreateKernel(program, "compiler_program_global0", > &err); > + assert(err == CL_SUCCESS); > + cl_kernel k1 = clCreateKernel(program, "compiler_program_global1", > &err); > + assert(err == CL_SUCCESS); > + > + OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int), NULL); > + OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int), NULL); > + > + OCL_CALL (clSetKernelArg, k0, 0, sizeof(cl_mem), &buf[0]); > + OCL_CALL (clSetKernelArg, k1, 0, sizeof(cl_mem), &buf[1]); > + > + int dynamic = 1; > + OCL_CALL (clSetKernelArg, k0, 1, sizeof(cl_int), &dynamic); > + OCL_CALL (clSetKernelArg, k1, 1, sizeof(cl_int), &dynamic); > + > + globals[0] = 16; > + locals[0] = 16; > + > + OCL_MAP_BUFFER(0); > + for (int i = 0; i < n; ++i) > + cpu_src[i] = ((int*)buf_data[0])[i] = i; > + OCL_UNMAP_BUFFER(0); > + > + // Run the kernel on GPU > + OCL_CALL (clEnqueueNDRangeKernel, queue, k0, 1, NULL, globals, locals, 0, > NULL, NULL); > + OCL_CALL (clEnqueueNDRangeKernel, queue, k1, 1, NULL, globals, locals, 0, > NULL, NULL); > + > + // Compare > + OCL_MAP_BUFFER(1); > + for (int32_t i = 0; i < n; ++i) { > +// printf("i=%d dst=%d\n", i, ((int*)buf_data[1])[i]); > + switch(i) { > + default: OCL_ASSERT(((int*)buf_data[1])[i] == i); break; > + case 11: OCL_ASSERT(((int*)buf_data[1])[i] == 7); break; > + case 12: OCL_ASSERT(((int*)buf_data[1])[i] == 4); break; > + case 13: OCL_ASSERT(((int*)buf_data[1])[i] == 2); break; > + case 14: OCL_ASSERT(((int*)buf_data[1])[i] == 3); break; > + case 15: OCL_ASSERT(((int*)buf_data[1])[i] == 2); break; > + } > + } > + OCL_UNMAP_BUFFER(1); > + clReleaseKernel(k0); > + clReleaseKernel(k1); > + clReleaseProgram(program); > +} > + > +MAKE_UTEST_FROM_FUNCTION(compiler_program_global); > + > -- > 2.4.1 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
