LGTM, pushed, thanks.
On Tue, Oct 14, 2014 at 03:52:04PM +0800, [email protected] wrote: > From: Junyan He <[email protected]> > > The static analyse for printf can not totally work > when the printf inst is within the loop and the loop > can not be unrolled. This causes the printf just to > print one info for a loop and to lose all the others. > We now increment the exec number every time the printf > inst is triggered. The number is stored for output all > the message later. > The problem is that we can not caculate the exact loops > number for each printf inst. The wrong loop number will > cause the data overwritten. We now assume all the printf > inst are in loop and store the data like this: > | PRINTF1_DATA PRINTF2_DATA ... | PRINTF1_DATA PRINTF2_DATA ... | ... > | DATA_LOOP_ONE | DATA_LOOP_TWO | ... > Although this may cause some space wasted. > > Another problem is that we need to decide the size of printf buffer > because the loop upbound can not be caculated. We just set > it yo 1M for small info slot request and 4M for big one. > > Signed-off-by: Junyan He <[email protected]> > --- > backend/src/ir/printf.cpp | 13 +- > backend/src/llvm/llvm_printf_parser.cpp | 376 > +++++++++++++++++++++---------- > kernels/test_printf.cl | 27 ++- > src/cl_command_queue_gen7.c | 8 + > 4 files changed, 283 insertions(+), 141 deletions(-) > > diff --git a/backend/src/ir/printf.cpp b/backend/src/ir/printf.cpp > index e99aad5..093bfc6 100644 > --- a/backend/src/ir/printf.cpp > +++ b/backend/src/ir/printf.cpp > @@ -106,8 +106,9 @@ namespace gbe > if (!vec_i) \ > pf_str = pf_str + std::string(#conv); \ > printf(pf_str.c_str(), \ > - ((target_ty *)((char *)buf_addr + > slot.state->out_buf_sizeof_offset * \ > - global_wk_sz0 * global_wk_sz1 * global_wk_sz2)) \ > + ((target_ty *)((char *)buf_addr + sizeOfSize * global_wk_sz0 * > global_wk_sz1 * global_wk_sz2 * n \ > + + > slot.state->out_buf_sizeof_offset * \ > + global_wk_sz0 * > global_wk_sz1 * global_wk_sz2)) \ > [(k*global_wk_sz0*global_wk_sz1 + j*global_wk_sz0 + i) * > vec_num + vec_i]);\ > } while (0) > > @@ -124,10 +125,9 @@ namespace gbe > for (i = 0; i < global_wk_sz0; i++) { > for (j = 0; j < global_wk_sz1; j++) { > for (k = 0; k < global_wk_sz2; k++) { > - > - int flag = ((int > *)index_addr)[stmt*global_wk_sz0*global_wk_sz1*global_wk_sz2 > - + k*global_wk_sz0*global_wk_sz1 > + j*global_wk_sz0 + i]; > - if (flag) { > + int loop_num = ((int > *)index_addr)[stmt*global_wk_sz0*global_wk_sz1*global_wk_sz2 > + + > k*global_wk_sz0*global_wk_sz1 + j*global_wk_sz0 + i]; > + for (int n = 0; n < loop_num; n++) { > for (auto &slot : pf) { > pf_str = ""; > int vec_num; > @@ -225,6 +225,7 @@ namespace gbe > > pf_str = ""; > } > + > } > } > } > diff --git a/backend/src/llvm/llvm_printf_parser.cpp > b/backend/src/llvm/llvm_printf_parser.cpp > index 29684ba..bb6c26d 100644 > --- a/backend/src/llvm/llvm_printf_parser.cpp > +++ b/backend/src/llvm/llvm_printf_parser.cpp > @@ -330,9 +330,17 @@ error: > Type* intTy; > Value* pbuf_ptr; > Value* index_buf_ptr; > + Value* g1Xg2Xg3; > + Value* wg_offset; > int out_buf_sizeof_offset; > static map<CallInst*, PrintfSet::PrintfFmt*> printfs; > int printf_num; > + int totalSizeofSize; > + > + struct PrintfParserInfo { > + llvm::CallInst* call; > + PrintfSet::PrintfFmt* printf_fmt; > + }; > > PrintfParser(void) : FunctionPass(ID) > { > @@ -343,7 +351,10 @@ error: > printfs.clear(); > pbuf_ptr = NULL; > index_buf_ptr = NULL; > + g1Xg2Xg3 = NULL; > + wg_offset = NULL; > printf_num = 0; > + totalSizeofSize = 0; > } > > ~PrintfParser(void) > @@ -355,9 +366,9 @@ error: > printfs.clear(); > } > > - > - bool parseOnePrintfInstruction(CallInst *& call); > + bool parseOnePrintfInstruction(CallInst * call, PrintfParserInfo& info, > int& sizeof_size); > bool generateOneParameterInst(PrintfSlot& slot, Value*& arg, Type*& > dst_type, int& sizeof_size); > + bool generateOnePrintfInstruction(PrintfParserInfo& pInfo); > > virtual const char *getPassName() const > { > @@ -367,119 +378,38 @@ error: > virtual bool runOnFunction(llvm::Function &F); > }; > > - bool PrintfParser::parseOnePrintfInstruction(CallInst *& call) > + bool PrintfParser::generateOnePrintfInstruction(PrintfParserInfo& pInfo) > { > - CallSite CS(call); > - CallSite::arg_iterator CI_FMT = CS.arg_begin(); > - int param_num = 0; > - > - llvm::Constant* arg0 = dyn_cast<llvm::ConstantExpr>(*CI_FMT); > - llvm::Constant* arg0_ptr = dyn_cast<llvm::Constant>(arg0->getOperand(0)); > - if (!arg0_ptr) { > - return false; > - } > - > - ConstantDataSequential* fmt_arg = > dyn_cast<ConstantDataSequential>(arg0_ptr->getOperand(0)); > - if (!fmt_arg || !fmt_arg->isCString()) { > - return false; > - } > - > - std::string fmt = fmt_arg->getAsCString(); > - > - PrintfSet::PrintfFmt* printf_fmt = NULL; > - > - if (!(printf_fmt = parser_printf_fmt((char *)fmt.c_str(), param_num))) > {//at lease print something > - return false; > - } > - > - /* iff parameter more than %, error. */ > - /* str_fmt arg0 arg1 ... NULL */ > - if (param_num + 2 < static_cast<int>(call->getNumOperands())) { > - delete printf_fmt; > - return false; > - } > - > - /* FIXME: Because the OpenCL language do not support va macro, and we do > not want > - to introduce the va_list, va_start and va_end into our code, we just > simulate > - the function calls to caculate the offset caculation here. */ > -#define BUILD_CALL_INST(name) \ > - CallInst* name = > builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction( \ > - "__gen_ocl_get_"#name, > \ > - IntegerType::getInt32Ty(module->getContext()), > \ > - NULL))) > - > - BUILD_CALL_INST(group_id2); > - BUILD_CALL_INST(group_id1); > - BUILD_CALL_INST(group_id0); > - BUILD_CALL_INST(global_size2); > - BUILD_CALL_INST(global_size1); > - BUILD_CALL_INST(global_size0); > - BUILD_CALL_INST(local_id2); > - BUILD_CALL_INST(local_id1); > - BUILD_CALL_INST(local_id0); > - BUILD_CALL_INST(local_size2); > - BUILD_CALL_INST(local_size1); > - BUILD_CALL_INST(local_size0); > - > -#undef BUILD_CALL_INST > - > Value* op0 = NULL; > Value* val = NULL; > - /* calculate offset for later usage. > - offset = ((local_id2 + local_size2 * group_id2) * (global_size1 * > global_size0) > - + (local_id1 + local_size1 * group_id1) * global_size0 > - + (local_id0 + local_size0 * group_id0)) * sizeof(type) */ > - > - // local_size2 * group_id2 > - val = builder->CreateMul(local_size2, group_id2); > - // local_id2 + local_size2 * group_id2 > - val = builder->CreateAdd(local_id2, val); > - // global_size1 * global_size0 > - op0 = builder->CreateMul(global_size1, global_size0); > - // (local_id2 + local_size2 * group_id2) * (global_size1 * global_size0) > - Value* offset1 = builder->CreateMul(val, op0); > - // local_size1 * group_id1 > - val = builder->CreateMul(local_size1, group_id1); > - // local_id1 + local_size1 * group_id1 > - val = builder->CreateAdd(local_id1, val); > - // (local_id1 + local_size1 * group_id1) * global_size_0 > - Value* offset2 = builder->CreateMul(val, global_size0); > - // local_size0 * group_id0 > - val = builder->CreateMul(local_size0, group_id0); > - // local_id0 + local_size0 * group_id0 > - val = builder->CreateAdd(local_id0, val); > - // The total sum > - val = builder->CreateAdd(val, offset1); > - Value* offset = builder->CreateAdd(val, offset2); > > ///////////////////////////////////////////////////// > /* calculate index address. > - index_addr = (index_offset + offset )* sizeof(int) + index_buf_ptr > + index_addr = (index_offset + wg_offset )* sizeof(int) + index_buf_ptr > index_offset = global_size2 * global_size1 * global_size0 * > printf_num */ > > - // global_size2 * global_size1 > - op0 = builder->CreateMul(global_size2, global_size1); > - // global_size2 * global_size1 * global_size0 > - Value* glXg2Xg3 = builder->CreateMul(op0, global_size0); > - Value* index_offset = builder->CreateMul(glXg2Xg3, > ConstantInt::get(intTy, printf_num)); > + Value* index_offset = builder->CreateMul(g1Xg2Xg3, > ConstantInt::get(intTy, printf_num)); > // index_offset + offset > - op0 = builder->CreateAdd(index_offset, offset); > + op0 = builder->CreateAdd(index_offset, wg_offset); > // (index_offset + offset)* sizeof(int) > op0 = builder->CreateMul(op0, ConstantInt::get(intTy, sizeof(int))); > // Final index address = index_buf_ptr + (index_offset + offset)* > sizeof(int) > op0 = builder->CreateAdd(index_buf_ptr, op0); > Value* index_addr = builder->CreateIntToPtr(op0, > Type::getInt32PtrTy(module->getContext(), 1)); > - builder->CreateStore(ConstantInt::get(intTy, 1), index_addr);// The flag > + // Load the printf num first, printf may be in loop. > + Value* loop_num = builder->CreateLoad(index_addr); > + val = builder->CreateAdd(loop_num, ConstantInt::get(intTy, 1)); > + builder->CreateStore(val, index_addr);// The loop number. > > int i = 1; > Value* data_addr = NULL; > - for (auto &s : *printf_fmt) { > + for (auto &s : *pInfo.printf_fmt) { > if (s.type == PRINTF_SLOT_TYPE_STRING) > continue; > > - assert(i < static_cast<int>(call->getNumOperands()) - 1); > + assert(i < static_cast<int>(pInfo.call->getNumOperands()) - 1); > > - Value *out_arg = call->getOperand(i); > + Value *out_arg = pInfo.call->getOperand(i); > Type *dst_type = NULL; > int sizeof_size = 0; > if (!generateOneParameterInst(s, out_arg, dst_type, sizeof_size)) { > @@ -499,16 +429,23 @@ error: > > ///////////////////////////////////////////////////// > /* Calculate the data address. > - data_addr = data_offset + pbuf_ptr + offset * sizeof(specify) > + data_addr = (data_offset + pbuf_ptr + offset * sizeof(specify)) + > + totalSizeofSize * global_size2 * global_size1 * global_size0 > * loop_num > data_offset = global_size2 * global_size1 * global_size0 * > out_buf_sizeof_offset > > //global_size2 * global_size1 * global_size0 * out_buf_sizeof_offset */ > - op0 = builder->CreateMul(glXg2Xg3, ConstantInt::get(intTy, > out_buf_sizeof_offset)); > + op0 = builder->CreateMul(g1Xg2Xg3, ConstantInt::get(intTy, > out_buf_sizeof_offset)); > //offset * sizeof(specify) > - val = builder->CreateMul(offset, ConstantInt::get(intTy, sizeof_size)); > + val = builder->CreateMul(wg_offset, ConstantInt::get(intTy, > sizeof_size)); > //data_offset + pbuf_ptr > op0 = builder->CreateAdd(pbuf_ptr, op0); > op0 = builder->CreateAdd(op0, val); > + //totalSizeofSize * global_size2 * global_size1 * global_size0 > + val = builder->CreateMul(g1Xg2Xg3, ConstantInt::get(intTy, > totalSizeofSize)); > + //totalSizeofSize * global_size2 * global_size1 * global_size0 * > loop_num > + val = builder->CreateMul(val, loop_num); > + //final > + op0 = builder->CreateAdd(op0, val); > data_addr = builder->CreateIntToPtr(op0, dst_type); > builder->CreateStore(out_arg, data_addr); > > @@ -520,14 +457,101 @@ error: > "__gen_ocl_printf", > Type::getVoidTy(module->getContext()), > NULL))); > assert(printfs[printf_inst] == NULL); > - printfs[printf_inst] = printf_fmt; > + printfs[printf_inst] = pInfo.printf_fmt; > printf_num++; > return true; > } > > + bool PrintfParser::parseOnePrintfInstruction(CallInst * call, > PrintfParserInfo& info, int& sizeof_size) > + { > + CallSite CS(call); > + CallSite::arg_iterator CI_FMT = CS.arg_begin(); > + int param_num = 0; > + > + llvm::Constant* arg0 = dyn_cast<llvm::ConstantExpr>(*CI_FMT); > + llvm::Constant* arg0_ptr = dyn_cast<llvm::Constant>(arg0->getOperand(0)); > + if (!arg0_ptr) { > + return false; > + } > + > + ConstantDataSequential* fmt_arg = > dyn_cast<ConstantDataSequential>(arg0_ptr->getOperand(0)); > + if (!fmt_arg || !fmt_arg->isCString()) { > + return false; > + } > + > + std::string fmt = fmt_arg->getAsCString(); > + > + PrintfSet::PrintfFmt* printf_fmt = NULL; > + > + if (!(printf_fmt = parser_printf_fmt((char *)fmt.c_str(), param_num))) > {//at lease print something > + return false; > + } > + > + /* iff parameter more than %, error. */ > + /* str_fmt arg0 arg1 ... NULL */ > + if (param_num + 2 < static_cast<int>(call->getNumOperands())) { > + delete printf_fmt; > + return false; > + } > + > + info.call = call; > + info.printf_fmt = printf_fmt; > + > + sizeof_size = 0; > + int i = 1; > + for (auto &s : *printf_fmt) { > + int sz = 0; > + if (s.type == PRINTF_SLOT_TYPE_STRING) > + continue; > + > + assert(i < static_cast<int>(call->getNumOperands()) - 1); > + > + switch (s.state->conversion_specifier) { > + case PRINTF_CONVERSION_I: > + case PRINTF_CONVERSION_D: > + case PRINTF_CONVERSION_O: > + case PRINTF_CONVERSION_U: > + case PRINTF_CONVERSION_x: > + case PRINTF_CONVERSION_X: > + case PRINTF_CONVERSION_P: > + if (s.state->length_modifier == PRINTF_LM_L) > + sz = sizeof(int64_t); > + else > + sz = sizeof(int); > + break; > + case PRINTF_CONVERSION_C: > + sz = sizeof(char); > + break; > + case PRINTF_CONVERSION_F: > + case PRINTF_CONVERSION_f: > + case PRINTF_CONVERSION_E: > + case PRINTF_CONVERSION_e: > + case PRINTF_CONVERSION_G: > + case PRINTF_CONVERSION_g: > + case PRINTF_CONVERSION_A: > + case PRINTF_CONVERSION_a: > + sz = sizeof(float); > + break; > + default: > + sz = 0; > + break; > + } > + > + if (s.state->vector_n) { > + sz = sz * s.state->vector_n; > + } > + > + sizeof_size += ((sz + 3) / 4) * 4; > + } > + > + return true; > + } > + > + > bool PrintfParser::runOnFunction(llvm::Function &F) > { > bool changed = false; > + bool hasPrintf = false; > switch (F.getCallingConv()) { > #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2 > case CallingConv::PTX_Device: > @@ -541,6 +565,8 @@ error: > GBE_ASSERTM(false, "Unsupported calling convention"); > } > > + std::vector<PrintfParserInfo> infoVect; > + totalSizeofSize = 0; > module = F.getParent(); > intTy = IntegerType::get(module->getContext(), 32); > > @@ -550,11 +576,14 @@ error: > > builder = new IRBuilder<>(module->getContext()); > > - /* Iter the function and find printf. */ > + /* First find printfs and caculate all slots size of one loop. */ > for (llvm::Function::iterator B = F.begin(), BE = F.end(); B != BE; B++) > { > for (BasicBlock::iterator instI = B->begin(), > instE = B->end(); instI != instE; ++instI) { > > + PrintfParserInfo pInfo; > + int sizeof_size = 0; > + > llvm::CallInst* call = dyn_cast<llvm::CallInst>(instI); > if (!call) { > continue; > @@ -569,26 +598,107 @@ error: > if (fnName != "__gen_ocl_printf_stub") > continue; > > - changed = true; > - > - builder->SetInsertPoint(call); > - > - if (!pbuf_ptr) { > - /* alloc a new buffer ptr to collect the print output. */ > - Type *ptrTy = Type::getInt32PtrTy(module->getContext()); > - llvm::Constant * pBuf = > module->getOrInsertGlobal(StringRef("__gen_ocl_printf_buf"), ptrTy); > - pbuf_ptr = builder->CreatePtrToInt(pBuf, > Type::getInt32Ty(module->getContext())); > - } > - if (!index_buf_ptr) { > - Type *ptrTy = Type::getInt32PtrTy(module->getContext()); > - llvm::Constant * pBuf = > module->getOrInsertGlobal(StringRef("__gen_ocl_printf_index_buf"), ptrTy); > - index_buf_ptr = builder->CreatePtrToInt(pBuf, > Type::getInt32Ty(module->getContext())); > + if (!parseOnePrintfInstruction(call, pInfo, sizeof_size)) { > + printf("Parse One printf inst failed, may have some error\n"); > + // Just kill this printf instruction. > + deadprintfs.push_back(PrintfInst(cast<Instruction>(call),0)); > + continue; > } > > - > deadprintfs.push_back(PrintfInst(cast<Instruction>(call),parseOnePrintfInstruction(call))); > + hasPrintf = true; > + > + infoVect.push_back(pInfo); > + totalSizeofSize += sizeof_size; > } > } > > + if (!hasPrintf) > + return changed; > + > + if (!pbuf_ptr) { > + /* alloc a new buffer ptr to collect the print output. */ > + Type *ptrTy = Type::getInt32PtrTy(module->getContext()); > + llvm::Constant * pBuf = > module->getOrInsertGlobal(StringRef("__gen_ocl_printf_buf"), ptrTy); > + pbuf_ptr = builder->CreatePtrToInt(pBuf, > Type::getInt32Ty(module->getContext())); > + } > + if (!index_buf_ptr) { > + Type *ptrTy = Type::getInt32PtrTy(module->getContext()); > + llvm::Constant * pBuf = > module->getOrInsertGlobal(StringRef("__gen_ocl_printf_index_buf"), ptrTy); > + index_buf_ptr = builder->CreatePtrToInt(pBuf, > Type::getInt32Ty(module->getContext())); > + } > + > + if (!wg_offset || !g1Xg2Xg3) { > + Value* op0 = NULL; > + Value* val = NULL; > + > + builder->SetInsertPoint(F.begin()->begin());// Insert the common var > in the begin. > + > + /* FIXME: Because the OpenCL language do not support va macro, and we > do not want > + to introduce the va_list, va_start and va_end into our code, we > just simulate > + the function calls to caculate the offset caculation here. */ > +#define BUILD_CALL_INST(name) \ > + CallInst* name = > builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction( \ > + "__gen_ocl_get_"#name, > \ > + IntegerType::getInt32Ty(module->getContext()), > \ > + NULL))) > + > + BUILD_CALL_INST(group_id2); > + BUILD_CALL_INST(group_id1); > + BUILD_CALL_INST(group_id0); > + BUILD_CALL_INST(global_size2); > + BUILD_CALL_INST(global_size1); > + BUILD_CALL_INST(global_size0); > + BUILD_CALL_INST(local_id2); > + BUILD_CALL_INST(local_id1); > + BUILD_CALL_INST(local_id0); > + BUILD_CALL_INST(local_size2); > + BUILD_CALL_INST(local_size1); > + BUILD_CALL_INST(local_size0); > + > +#undef BUILD_CALL_INST > + > + /* calculate offset for later usage. > + wg_offset = ((local_id2 + local_size2 * group_id2) * (global_size1 > * global_size0) > + + (local_id1 + local_size1 * group_id1) * global_size0 > + + (local_id0 + local_size0 * group_id0)) */ > + > + // local_size2 * group_id2 > + val = builder->CreateMul(local_size2, group_id2); > + // local_id2 + local_size2 * group_id2 > + val = builder->CreateAdd(local_id2, val); > + // global_size1 * global_size0 > + op0 = builder->CreateMul(global_size1, global_size0); > + // (local_id2 + local_size2 * group_id2) * (global_size1 * > global_size0) > + Value* offset1 = builder->CreateMul(val, op0); > + // local_size1 * group_id1 > + val = builder->CreateMul(local_size1, group_id1); > + // local_id1 + local_size1 * group_id1 > + val = builder->CreateAdd(local_id1, val); > + // (local_id1 + local_size1 * group_id1) * global_size_0 > + Value* offset2 = builder->CreateMul(val, global_size0); > + // local_size0 * group_id0 > + val = builder->CreateMul(local_size0, group_id0); > + // local_id0 + local_size0 * group_id0 > + val = builder->CreateAdd(local_id0, val); > + // The total sum > + val = builder->CreateAdd(val, offset1); > + wg_offset = builder->CreateAdd(val, offset2); > + > + // global_size2 * global_size1 > + op0 = builder->CreateMul(global_size2, global_size1); > + // global_size2 * global_size1 * global_size0 > + g1Xg2Xg3 = builder->CreateMul(op0, global_size0); > + } > + > + > + /* Now generate the instructions. */ > + for (auto pInfo : infoVect) { > + builder->SetInsertPoint(pInfo.call); > + deadprintfs.push_back(PrintfInst(cast<Instruction>(pInfo.call), > generateOnePrintfInstruction(pInfo))); > + } > + > + assert(out_buf_sizeof_offset == totalSizeofSize); > + > /* Replace the instruction's operand if using printf's return value. */ > for (llvm::Function::iterator B = F.begin(), BE = F.end(); B != BE; B++) > { > for (BasicBlock::iterator instI = B->begin(), > @@ -775,6 +885,7 @@ error: > bool sign = false; > > if (vec_num != slot.state->vector_n) { > + printf("Error The printf vector number is not match!\n"); > return false; > } > > @@ -785,26 +896,37 @@ error: > case PRINTF_CONVERSION_O: > case PRINTF_CONVERSION_U: > case PRINTF_CONVERSION_x: > - case PRINTF_CONVERSION_X: > - if (elt_type->getTypeID() != Type::IntegerTyID) > + case PRINTF_CONVERSION_X: { > + if (elt_type->getTypeID() != Type::IntegerTyID) { > + printf("Do not support type conversion between float and int > in vector printf!\n"); > return false; > + } > + > + Type* elt_dst_type = NULL; > + if (slot.state->length_modifier == PRINTF_LM_L) { > + elt_dst_type = Type::getInt64Ty(elt_type->getContext()); > + } else { > + elt_dst_type = Type::getInt32Ty(elt_type->getContext()); > + } > > /* If the bits change, we need to consider the signed. */ > - if (elt_type != Type::getInt32Ty(elt_type->getContext())) { > + if (elt_type != elt_dst_type) { > Value *II = NULL; > for (int i = 0; i < vec_num; i++) { > - Value *vec = II ? II : > UndefValue::get(VectorType::get(Type::getInt32Ty(elt_type->getContext()), > vec_num)); > + Value *vec = II ? II : > UndefValue::get(VectorType::get(elt_dst_type, vec_num)); > Value *cv = > ConstantInt::get(Type::getInt32Ty(elt_type->getContext()), i); > Value *org = builder->CreateExtractElement(arg, cv); > - Value *cvt = builder->CreateIntCast(org, > Type::getInt32Ty(module->getContext()), sign); > + Value *cvt = builder->CreateIntCast(org, elt_dst_type, sign); > II = builder->CreateInsertElement(vec, cvt, cv); > } > arg = II; > } > > dst_type = arg->getType()->getPointerTo(1); > - sizeof_size = sizeof(int) * vec_num; > + sizeof_size = (elt_dst_type == > Type::getInt32Ty(elt_type->getContext()) ? > + sizeof(int) * vec_num : sizeof(int64_t) * > vec_num); > return true; > + } > > case PRINTF_CONVERSION_F: > case PRINTF_CONVERSION_f: > @@ -814,8 +936,10 @@ error: > case PRINTF_CONVERSION_g: > case PRINTF_CONVERSION_A: > case PRINTF_CONVERSION_a: > - if (elt_type->getTypeID() != Type::DoubleTyID && > elt_type->getTypeID() != Type::FloatTyID) > + if (elt_type->getTypeID() != Type::DoubleTyID && > elt_type->getTypeID() != Type::FloatTyID) { > + printf("Do not support type conversion between float and int > in vector printf!\n"); > return false; > + } > > if (elt_type->getTypeID() != Type::FloatTyID) { > Value *II = NULL; > @@ -828,10 +952,14 @@ error: > } > arg = II; > } > + > + dst_type = arg->getType()->getPointerTo(1); > + sizeof_size = sizeof(int) * vec_num; > + return true; > + > + default: > + return false; > } > - dst_type = arg->getType()->getPointerTo(1); > - sizeof_size = sizeof(int) * vec_num; > - return true; > } > > default: > diff --git a/kernels/test_printf.cl b/kernels/test_printf.cl > index c2844f4..0a59e88 100644 > --- a/kernels/test_printf.cl > +++ b/kernels/test_printf.cl > @@ -4,6 +4,8 @@ test_printf(void) > int x = (int)get_global_id(0); > int y = (int)get_global_id(1); > int z = (int)get_global_id(2); > + int g0 = (int)get_global_size(0); > + int g1 = (int)get_global_size(1); > uint a = 'x'; > float f = 5.0f; > int3 vec; > @@ -14,28 +16,31 @@ test_printf(void) > > if (x == 0 && y == 0 && z == 0) { > printf("--- Welcome to the printf test of %s ---\n", "Intel Beignet"); > - > printf("### output a char is %c\n", a); > - > printf("@@@ A long value is %ld\n", cc); > } > > - if (x % 15 == 0) > - if (y % 3 == 0) > - if (z % 7 == 0) > - printf("######## global_id(x, y, z) = %v3d, global_size(d0, d1, d3) > = (%d, %d, %d)\n", > - vec, get_global_size(0), get_global_size(1), > get_global_size(2)); > + for(int i = 0; i < g0/2; i++) > + for(int j = 0; j < g1/2; j++) > + if(x == 0 && y == 0 && z == 0) > + printf("loops: i = %d, j = %d\n", i, j); > > - if (x == 1) > + if (x == 0) { > if (y == 0) { > if (z % 2 == 0) > - printf("#### output a float is %f\n", f); > + printf("!!! output a float is %f\n", f); > else > - printf("#### output a float to int is %d\n", f); > + printf("!!! output a float to int is %d\n", f); > } > + } > + > + if (x % 15 == 0) > + if (y % 3 == 0) > + if (z % 7 == 0) > + printf("######## global_id(x, y, z) = %v3d, global_size(d0, d1, d3) > = (%d, %d, %d)\n", > + vec, get_global_size(0), get_global_size(1), > get_global_size(2)); > > if (x == 0 && y == 0 && z == 0) { > printf("--- End to the printf test ---\n"); > } > - > } > diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c > index 330f0f9..b020540 100644 > --- a/src/cl_command_queue_gen7.c > +++ b/src/cl_command_queue_gen7.c > @@ -271,6 +271,14 @@ cl_bind_printf(cl_gpgpu gpgpu, cl_kernel ker, void* > printf_info, int printf_num, > value = GBE_CURBE_PRINTF_BUF_POINTER; > offset = interp_kernel_get_curbe_offset(ker->opaque, value, 0); > buf_size = interp_get_printf_sizeof_size(printf_info) * global_sz; > + /* because of the printf may exist in a loop, which loop number can not be > gotten by > + static analysis. So we set the data buffer as big as we can. Out of > bound printf > + info will be discarded. */ > + if (buf_size < 1*1024) > + buf_size = 1*1024*1024; > + else > + buf_size = 4*1024*1024; //at most. > + > if (offset > 0) { > if (cl_gpgpu_set_printf_buffer(gpgpu, 1, buf_size, offset, > interp_get_printf_buf_bti(printf_info)) != 0) > return -1; > -- > 1.7.9.5 > > _______________________________________________ > Beignet mailing list > [email protected] > http://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
