add debugloc when llvm instuctions inserted, the debugloc is inherited from the contiguous one.
Signed-off-by: Bai Yannan <[email protected]> --- backend/src/backend/program.cpp | 7 + backend/src/llvm/llvm_gen_backend.cpp | 25 ++ backend/src/llvm/llvm_loadstore_optimization.cpp | 18 ++ backend/src/llvm/llvm_printf_parser.cpp | 20 ++ backend/src/llvm/llvm_sampler_fix.cpp | 17 ++ backend/src/llvm/llvm_scalarize.cpp | 18 ++ backend/src/llvm/llvm_timestamp.cpp | 337 +++++++++++++++++++++++ backend/src/llvm/llvm_to_gen.cpp | 10 +- 8 files changed, 451 insertions(+), 1 deletion(-) create mode 100644 backend/src/llvm/llvm_timestamp.cpp diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp index f5865c2..af817de 100644 --- a/backend/src/backend/program.cpp +++ b/backend/src/backend/program.cpp @@ -49,6 +49,7 @@ #include <iostream> #include <unistd.h> #include <mutex> +#include <cstdlib> #ifdef GBE_COMPILER_AVAILABLE /* Not defined for LLVM 3.0 */ @@ -554,6 +555,12 @@ namespace gbe { args.push_back("stringInput.cl"); args.push_back("-ffp-contract=off"); + if(getenv("OCL_PROFILING")) { + char * isProfiling = getenv("OCL_PROFILING"); + if(*isProfiling == '1') + args.push_back("-g"); + } + // The compiler invocation needs a DiagnosticsEngine so it can report problems std::string ErrorString; llvm::raw_string_ostream ErrorInfo(ErrorString); diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 4905415..238370a 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -108,6 +108,8 @@ #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >=5 #include "llvm/IR/Mangler.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DebugInfo.h" #else #include "llvm/Target/Mangler.h" #endif @@ -178,6 +180,20 @@ using namespace llvm; +#define OCL_PROFILING (bool)(getenv("OCL_PROFILING")[0]-48) +#define SETDEBUGLOCATION(BUILDER, INSN) \ + if(OCL_PROFILING) { \ + llvm::BasicBlock *bb = INSN->getParent(); \ + llvm::BasicBlock::iterator iter =bb->begin(); \ + while(!(iter++)->isIdenticalTo(INSN)) ; \ + llvm::MDNode *N = iter->getMetadata("dbg"); \ + llvm::DebugLoc dg = iter->getDebugLoc(); \ + while(!N) N = (++iter)->getMetadata("dbg"); \ + BUILDER.SetCurrentDebugLocation(dg); \ + } +// end define SETDEBUGLOCATION + + namespace gbe { /*! Gen IR manipulates only scalar types */ @@ -977,6 +993,7 @@ namespace gbe Value *trueVal = getPointerBase((*iter).second[0]); Value *falseVal = getPointerBase((*iter).second[1]); Builder.SetInsertPoint(si); + SETDEBUGLOCATION(Builder, si); Value *base = Builder.CreateSelect(si->getCondition(), trueVal, falseVal); pointerBaseMap.insert(std::make_pair(ptr, base)); return base; @@ -984,6 +1001,7 @@ namespace gbe PHINode *phi = dyn_cast<PHINode>(ptr); IRBuilder<> Builder(phi->getParent()); Builder.SetInsertPoint(phi); + SETDEBUGLOCATION(Builder, phi); PHINode *basePhi = Builder.CreatePHI(ptr->getType(), phi->getNumIncomingValues()); unsigned srcNum = pointers.size(); @@ -997,7 +1015,10 @@ namespace gbe IRBuilder<> Builder2(phi->getIncomingBlock(x)); BasicBlock *predBB = phi->getIncomingBlock(x); if (predBB->getTerminator()) + { Builder2.SetInsertPoint(predBB->getTerminator()); + SETDEBUGLOCATION(Builder2, predBB->getTerminator()); + } #if (LLVM_VERSION_MAJOR== 3 && LLVM_VERSION_MINOR < 6) // llvm 3.5 and older version don't have CreateBitOrPointerCast() define @@ -1065,6 +1086,7 @@ namespace gbe Value *trueVal = getBtiRegister((*iter).second[0]); Value *falseVal = getBtiRegister((*iter).second[1]); Builder.SetInsertPoint(si); + SETDEBUGLOCATION(Builder, si); Value *bti = Builder.CreateSelect(si->getCondition(), trueVal, falseVal); BtiValueMap.insert(std::make_pair(Val, bti)); return bti; @@ -1072,6 +1094,7 @@ namespace gbe PHINode *phi = dyn_cast<PHINode>(Val); IRBuilder<> Builder(phi->getParent()); Builder.SetInsertPoint(phi); + SETDEBUGLOCATION(Builder, phi); PHINode *btiPhi = Builder.CreatePHI(IntegerType::get(Val->getContext(), 32), phi->getNumIncomingValues()); PtrOrigMapIter iter = pointerOrigMap.find(Val); @@ -1240,6 +1263,7 @@ namespace gbe isLoad = false; } Builder.SetInsertPoint(cast<Instruction>(theUser)); + SETDEBUGLOCATION(Builder, cast<Instruction>(theUser)); Type *int32Ty = Type::getInt32Ty(ptr->getContext()); Value *v1 = Builder.CreatePtrToInt(pointerOp, int32Ty); @@ -1351,6 +1375,7 @@ namespace gbe IRBuilder<> Builder(&entry); Builder.SetInsertPoint(bbIter); + SETDEBUGLOCATION(Builder, bbIter); PointerType * AITy = cast<AllocaInst>(base)->getType(); Value * btiArray = Builder.CreateAlloca(AITy->getElementType(), ArraySize, base->getName() + ".bti"); diff --git a/backend/src/llvm/llvm_loadstore_optimization.cpp b/backend/src/llvm/llvm_loadstore_optimization.cpp index c6349fa..989b539 100644 --- a/backend/src/llvm/llvm_loadstore_optimization.cpp +++ b/backend/src/llvm/llvm_loadstore_optimization.cpp @@ -41,6 +41,8 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DebugInfo.h" #endif /* LLVM_VERSION_MINOR <= 2 */ #include "llvm/Pass.h" #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 1 @@ -55,6 +57,20 @@ #include "llvm/Analysis/ScalarEvolutionExpressions.h" using namespace llvm; + +#define OCL_PROFILING (bool)(getenv("OCL_PROFILING")[0]-48) +#define SETDEBUGLOCATION(BUILDER, INSN) \ + if(OCL_PROFILING) { \ + llvm::BasicBlock *bb = INSN->getParent(); \ + llvm::BasicBlock::iterator iter =bb->begin(); \ + while(!(iter++)->isIdenticalTo(INSN)) ; \ + llvm::MDNode *N = iter->getMetadata("dbg"); \ + llvm::DebugLoc dg = iter->getDebugLoc(); \ + while(!N) N = (++iter)->getMetadata("dbg"); \ + BUILDER.SetCurrentDebugLocation(dg); \ + } +// end define SETDEBUGLOCATION + namespace gbe { class GenLoadStoreOptimization : public BasicBlockPass { @@ -167,6 +183,7 @@ namespace gbe { unsigned addrSpace = ld->getPointerAddressSpace(); // insert before first load Builder.SetInsertPoint(ld); + SETDEBUGLOCATION(Builder, ld); VectorType *vecTy = VectorType::get(ld->getType(), size); Value *vecPtr = Builder.CreateBitCast(ld->getPointerOperand(), PointerType::get(vecTy, addrSpace)); @@ -226,6 +243,7 @@ namespace gbe { unsigned align = st->getAlignment(); // insert before the last store Builder.SetInsertPoint(merged[size-1]); + SETDEBUGLOCATION(Builder, merged[size-1]); Type *dataTy = st->getValueOperand()->getType(); VectorType *vecTy = VectorType::get(dataTy, size); diff --git a/backend/src/llvm/llvm_printf_parser.cpp b/backend/src/llvm/llvm_printf_parser.cpp index 3d84457..3848bfa 100644 --- a/backend/src/llvm/llvm_printf_parser.cpp +++ b/backend/src/llvm/llvm_printf_parser.cpp @@ -59,6 +59,9 @@ #if LLVM_VERSION_MINOR >= 5 #include "llvm/IR/CallSite.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DebugInfo.h" + #else #include "llvm/Support/CallSite.h" #include "llvm/Support/CFG.h" @@ -73,6 +76,21 @@ using namespace llvm; +#define OCL_PROFILING (bool)(getenv("OCL_PROFILING")[0]-48) +#define SETDEBUGLOCATION(INSN, ISBEGIN) \ + if(OCL_PROFILING) { \ + llvm::BasicBlock *bb=INSN->getParent(); \ + llvm::BasicBlock::iterator iter = bb->begin(); \ + if(ISBEGIN)while(!(iter++)->isIdenticalTo(INSN)) ; \ + else while(!iter->isIdenticalTo(INSN)) iter++; \ + llvm::MDNode *N = iter->getMetadata("dbg"); \ + llvm::DebugLoc dg = iter->getDebugLoc(); \ + if(ISBEGIN)while(!N) {N = (++iter)->getMetadata("dbg");} \ + else while(!N) {iter++;N = iter->getMetadata("dbg");} \ + builder->SetCurrentDebugLocation(dg); \ + } +// end define SETDEBUGLOCATION + namespace gbe { using namespace ir; @@ -659,6 +677,7 @@ error: Value* val = NULL; builder->SetInsertPoint(F.begin()->begin());// Insert the common var in the begin. + SETDEBUGLOCATION(F.begin()->begin(), true); /* FIXME: Because the OpenCL language do not support va macro, and we do not want to introduce the va_list, va_start and va_end into our code, we just simulate @@ -721,6 +740,7 @@ error: /* Now generate the instructions. */ for (auto pInfo : infoVect) { builder->SetInsertPoint(pInfo.call); + SETDEBUGLOCATION(pInfo.call, false); deadprintfs.push_back(PrintfInst(cast<Instruction>(pInfo.call), generateOnePrintfInstruction(pInfo))); } diff --git a/backend/src/llvm/llvm_sampler_fix.cpp b/backend/src/llvm/llvm_sampler_fix.cpp index 8c76324..a1146d3 100644 --- a/backend/src/llvm/llvm_sampler_fix.cpp +++ b/backend/src/llvm/llvm_sampler_fix.cpp @@ -36,6 +36,8 @@ #include "llvm/IR/IRBuilder.h" #if LLVM_VERSION_MINOR >= 5 #include "llvm/IR/CFG.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DebugInfo.h" #else #include "llvm/Support/CFG.h" #endif @@ -47,6 +49,19 @@ using namespace llvm; +#define OCL_PROFILING (bool)(getenv("OCL_PROFILING")[0]-48) +#define SETDEBUGLOCATION(BUILDER, INSN) \ + if(OCL_PROFILING) { \ + llvm::BasicBlock *bb = INSN->getParent(); \ + llvm::BasicBlock::iterator iter =bb->begin(); \ + while(!(iter++)->isIdenticalTo(INSN)) ; \ + llvm::MDNode *N = iter->getMetadata("dbg"); \ + llvm::DebugLoc dg = iter->getDebugLoc(); \ + while(!N) N = (++iter)->getMetadata("dbg"); \ + BUILDER.SetCurrentDebugLocation(dg); \ + } +// end define SETDEBUGLOCATION + namespace gbe { class SamplerFix : public FunctionPass { @@ -82,6 +97,7 @@ namespace gbe { IRBuilder<> Builder(I->getParent()); Builder.SetInsertPoint(I); + SETDEBUGLOCATION(Builder, I); Value *addressMask = ConstantInt::get(i32Ty, __CLK_ADDRESS_MASK); Value *addressMode = Builder.CreateAnd(I->getOperand(0), addressMask); Value *clampInt = ConstantInt::get(i32Ty, CLK_ADDRESS_CLAMP); @@ -108,6 +124,7 @@ namespace gbe { } else { IRBuilder<> Builder(I->getParent()); Builder.SetInsertPoint(I); + SETDEBUGLOCATION(Builder, I); Value *normalizeMask = ConstantInt::get(i32Ty, CLK_NORMALIZED_COORDS_TRUE); Value *normalizeMode = Builder.CreateAnd(I->getOperand(0), normalizeMask); needFixVal = Builder.CreateICmpEQ(normalizeMode, ConstantInt::get(i32Ty, 0)); diff --git a/backend/src/llvm/llvm_scalarize.cpp b/backend/src/llvm/llvm_scalarize.cpp index bc985c6..d4e87af 100644 --- a/backend/src/llvm/llvm_scalarize.cpp +++ b/backend/src/llvm/llvm_scalarize.cpp @@ -87,6 +87,8 @@ #if LLVM_VERSION_MINOR >= 5 #include "llvm/IR/CallSite.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DebugInfo.h" #else #include "llvm/Support/CallSite.h" #include "llvm/Support/CFG.h" @@ -98,6 +100,19 @@ using namespace llvm; +#define OCL_PROFILING (bool)(getenv("OCL_PROFILING")[0]-48) +#define SETDEBUGLOCATION(BUILDER, INSN) \ + if(OCL_PROFILING) { \ + llvm::BasicBlock *bb = INSN->getParent(); \ + llvm::BasicBlock::iterator iter =bb->begin(); \ + while(!(iter++)->isIdenticalTo(INSN)) ; \ + llvm::MDNode *N = iter->getMetadata("dbg"); \ + llvm::DebugLoc dg = iter->getDebugLoc(); \ + while(!N) N = (++iter)->getMetadata("dbg"); \ + BUILDER->SetCurrentDebugLocation(dg); \ + } +// end define SETDEBUGLOCATION + namespace gbe { struct VectorValues { @@ -231,6 +246,7 @@ namespace gbe { void setAppendPoint(Instruction *insn) { BasicBlock::iterator next(insn); builder->SetInsertPoint(++next); + SETDEBUGLOCATION(builder, next); } DenseMap<Value*, VectorValues> vectorVals; @@ -526,6 +542,7 @@ namespace gbe { assert((canGetComponentArgs(inst) || isa<PHINode>(inst)) && "Scalarizing an op whose arguments haven't been scalarized "); builder->SetInsertPoint(inst); + SETDEBUGLOCATION(builder, inst); if (IsPerComponentOp(inst)) return scalarizePerComponent(inst); @@ -793,6 +810,7 @@ namespace gbe { ReversePostOrderTraversal<Function*> rpot(&F); BasicBlock::iterator instI = (*rpot.begin())->begin(); builder->SetInsertPoint(instI); + SETDEBUGLOCATION(builder, instI); Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); diff --git a/backend/src/llvm/llvm_timestamp.cpp b/backend/src/llvm/llvm_timestamp.cpp new file mode 100644 index 0000000..f847d38 --- /dev/null +++ b/backend/src/llvm/llvm_timestamp.cpp @@ -0,0 +1,337 @@ + + +/** + * \file llvm_timestamp.cpp + * + */ + + +#include <stdio.h> +#include <stdlib.h> + +#include "llvm/Config/llvm-config.h" +#if LLVM_VERSION_MINOR <= 2 +#include "llvm/Function.h" +#include "llvm/InstrTypes.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Module.h" +#else +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#endif /* LLVM_VERSION_MINOR <= 2 */ +#include "llvm/Pass.h" +#if LLVM_VERSION_MINOR <= 1 +#include "llvm/Support/IRBuilder.h" +#elif LLVM_VERSION_MINOR == 2 +#include "llvm/IRBuilder.h" +#else +#include "llvm/IR/IRBuilder.h" +#endif /* LLVM_VERSION_MINOR <= 1 */ + +#if LLVM_VERSION_MINOR >= 5 +#include "llvm/IR/CallSite.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/DebugLoc.h" //ynbai +#include "llvm/IR/DebugInfo.h" + +#else +#include "llvm/Support/CallSite.h" +#include "llvm/Support/CFG.h" +#endif + +#include "llvm/Support/raw_ostream.h" +#include "llvm/IR/Attributes.h" + +#include "llvm/llvm_gen_backend.hpp" +#include "sys/map.hpp" +#include "ir/timestamp.hpp" + +#include <iostream> +#include <vector> +#include <fstream> + +using namespace llvm; +using std::vector; + +#define OCL_PROFILING (bool)(getenv("OCL_PROFILING")[0]-48) + +namespace gbe +{ + using namespace ir; + + class TimestampParser : public FunctionPass + { + public: + static char ID; + typedef std::pair<Instruction*, bool> TimestampInst; + Module* module; + IRBuilder<>* builder; + Type* intTy; + Value* tbuf_ptr; + Value* g1Xg2Xg3; + Value* wg_offset; + int tm_num; + + TimestampParser(void) : FunctionPass(ID) + { + module = NULL; + builder = NULL; + intTy = NULL; + tbuf_ptr = NULL; + g1Xg2Xg3 = NULL; + wg_offset = NULL; + tm_num = 0; + } + + ~TimestampParser(void) + { + } + + INLINE void storeRegionOffsetI(Value* addr_base, int idx, Value* param[], std::vector<Type *> ParamTys, bool isSt); + bool generateOneTimestampInstruction(llvm::BasicBlock *BB); + + virtual const char *getPassName() const + { + return "Timestamp Parser"; + } + + virtual bool runOnFunction(llvm::Function &F); + }; + + INLINE void TimestampParser::storeRegionOffsetI(Value* addr_base, int idx, Value* param[], std::vector<Type *> ParamTys, bool isSt){ + /* timestamp format in the buffer: + tm_start_region_2, tm_start_region_1, tm_start_region_0, tm_end_region_2, tm_end_region_1, tm_end_region_0 + ^ + | + addr_base + */ + param[0] = ConstantInt::get(IntegerType::get(module->getContext(), 16), idx); + SmallVector<Value *, 2> Args(param, param+2); + Value* region = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction( + "__gen_ocl_region", FunctionType::get(IntegerType::getInt32Ty(module->getContext()), ParamTys, false))), + Args); + Value* bi = builder->CreateAdd(addr_base, ConstantInt::get(intTy, ((!isSt)*3 + 2 - idx)*sizeof(uint))); + Value* data_addr = builder->CreateIntToPtr(bi, Type::getInt32PtrTy(module->getContext(), 1)); + builder->CreateStore(region, data_addr); + + } + + bool TimestampParser::generateOneTimestampInstruction(llvm::BasicBlock *BB) + { + +#define SETDEBUGLOCATION(INSN, ASC) \ + if(OCL_PROFILING) { \ + llvm::BasicBlock *bb = INSN->getParent(); \ + llvm::BasicBlock::iterator iter = ASC?bb->begin():bb->end(); \ + while(!(ASC?iter++:iter--)->isIdenticalTo(INSN)) ; \ + llvm::MDNode *N = iter->getMetadata("dbg"); \ + llvm::DebugLoc dg = iter->getDebugLoc(); \ + while(!N) N = (ASC?++iter:--iter)->getMetadata("dbg"); \ + builder->SetCurrentDebugLocation(dg); \ + } +// end define SETDEBUGLOCATION + + Value * op0 = NULL; + Value * val = NULL; + + ///////////////////////////////////////////////////// + /* Calculate the data address. + data_addr = (data_offset + tbuf_ptr + wg_offset * sizeof(uint32)) + + totalSizeofSize * global_size2 * global_size1 * global_size0 * tm_num + data_offset = global_size2 * global_size1 * global_size0 * out_buf_sizeof_offset + totalSizeofSize = tm_num * sizeof(uint32) + */ + CallInst *read_tm_inst_st = NULL, *read_tm_inst_end = NULL; + + // ecch basic block has 2 timestamps, + //and tm_num is even when it is at the begin of bb + int bbNum = (tm_num-tm_num%2)/2; + Value* data_offset = builder->CreateMul(g1Xg2Xg3, ConstantInt::get(intTy, bbNum)); + // index_offset + wg_offset + op0 = builder->CreateAdd(data_offset, wg_offset); + // (index_offset + wg_offset)* sizeof(int) + op0 = builder->CreateMul(op0, ConstantInt::get(intTy, sizeof(uint)*6)); + Value* b = builder->CreateAdd(tbuf_ptr,op0); + + read_tm_inst_st = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction( + "__gen_ocl_read_tm", IntegerType::getInt32Ty(module->getContext()), + NULL))); + tm_num++; + + llvm::BasicBlock::iterator BI = BB->end(); + builder->SetInsertPoint(--BI); + SETDEBUGLOCATION(BI, false); + read_tm_inst_end = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction( + "__gen_ocl_read_tm", IntegerType::getInt32Ty(module->getContext()), + NULL))); + tm_num++; + + Value* para[2]; + std::vector<Type *> ParamTys(2); + ParamTys[0]=IntegerType::get(module->getContext(), 16); + ParamTys[1]=IntegerType::getInt32Ty(module->getContext()); + + for(int i=0; i<3; i++) + { + para[1]=read_tm_inst_st; + storeRegionOffsetI(b, i, para, ParamTys, true); + para[1]=read_tm_inst_end; + storeRegionOffsetI(b, i, para, ParamTys, false); + } + + CallInst* timestamp_inst = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction( + "__gen_ocl_timestamp", Type::getVoidTy(module->getContext()), + NULL))); + return true; + } + + bool TimestampParser::runOnFunction(llvm::Function &F) + { + +#define SETDEBUGLOCATION(INSN, ISBEGIN) \ + if(OCL_PROFILING) { \ + llvm::BasicBlock *bb=INSN->getParent(); \ + llvm::BasicBlock::iterator iter = bb->begin(); \ + if(ISBEGIN)while(!(iter++)->isIdenticalTo(INSN)) ; \ + else while(!iter->isIdenticalTo(INSN)) iter++; \ + llvm::MDNode *N = iter->getMetadata("dbg"); \ + llvm::DebugLoc dg = iter->getDebugLoc(); \ + if(ISBEGIN)while(!N) {N = (++iter)->getMetadata("dbg");} \ + else while(!N) {iter++;N = iter->getMetadata("dbg");} \ + builder->SetCurrentDebugLocation(dg); \ + } +// end define SETDEBUGLOCATION + + bool hasTimestamp = false; + switch (F.getCallingConv()) { +#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2 + case CallingConv::PTX_Device: + return false; + case CallingConv::PTX_Kernel: +#else + case CallingConv::C: + case CallingConv::Fast: + case CallingConv::SPIR_KERNEL: +#endif + break; + default: + GBE_ASSERTM(false, "Unsupported calling convention"); + } + + module = F.getParent(); + intTy = IntegerType::get(module->getContext(), 32); + + // As we inline all function calls, so skip non-kernel functions + bool bKernel = isKernelFunction(F); + if(!bKernel) return false; + + builder = new IRBuilder<>(module->getContext()); + + if (!tbuf_ptr) { + /* alloc a new buffer ptr to collect the timestamps. */ + Type *ptrTy = Type::getInt32PtrTy(module->getContext()); + llvm::Constant *tBuf = new GlobalVariable(*module, ptrTy, false, + GlobalVariable::ExternalLinkage, + nullptr, + StringRef("__gen_ocl_timestamp_buf"), + nullptr, + GlobalVariable::NotThreadLocal, + 1); + tbuf_ptr = builder->CreatePtrToInt(tBuf, Type::getInt32Ty(module->getContext())); + } + + //if (!wg_offset || !g1Xg2Xg3) { + Value* op0 = NULL; + Value* val = NULL; + + builder->SetInsertPoint(F.begin()->begin());// Insert the common var in the begin. + SETDEBUGLOCATION(F.begin()->begin(), true); + + /* FIXME: Because the OpenCL language do not support va macro, and we do not want + to introduce the va_list, va_start and va_end into our code, we just simulate + the function calls to caculate the offset caculation here. */ + + + +#define BUILD_CALL_INST(name) \ + CallInst* name = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction( \ + "__gen_ocl_get_"#name, \ + IntegerType::getInt32Ty(module->getContext()), \ + NULL))) + + BUILD_CALL_INST(group_id2); + BUILD_CALL_INST(group_id1); + BUILD_CALL_INST(group_id0); + BUILD_CALL_INST(global_size2); + BUILD_CALL_INST(global_size1); + BUILD_CALL_INST(global_size0); + BUILD_CALL_INST(local_id2); + BUILD_CALL_INST(local_id1); + BUILD_CALL_INST(local_id0); + BUILD_CALL_INST(local_size2); + BUILD_CALL_INST(local_size1); + BUILD_CALL_INST(local_size0); + +#undef BUILD_CALL_INST + + /* calculate offset for later usage. + wg_offset = ((local_id2 + local_size2 * group_id2) * (global_size1 * global_size0) + + (local_id1 + local_size1 * group_id1) * global_size0 + + (local_id0 + local_size0 * group_id0)) */ + + + // local_size2 * group_id2 + val = builder->CreateMul(local_size2, group_id2); + // local_id2 + local_size2 * group_id2 + val = builder->CreateAdd(local_id2, val); + // global_size1 * global_size0 + op0 = builder->CreateMul(global_size1, global_size0); + // (local_id2 + local_size2 * group_id2) * (global_size1 * global_size0) + Value* offset1 = builder->CreateMul(val, op0); + // local_size1 * group_id1 + val = builder->CreateMul(local_size1, group_id1); + // local_id1 + local_size1 * group_id1 + val = builder->CreateAdd(local_id1, val); + // (local_id1 + local_size1 * group_id1) * global_size_0 + Value* offset2 = builder->CreateMul(val, global_size0); + // local_size0 * group_id0 + val = builder->CreateMul(local_size0, group_id0); + // local_id0 + local_size0 * group_id0 + val = builder->CreateAdd(local_id0, val); + // The total sum + val = builder->CreateAdd(val, offset1); + wg_offset = builder->CreateAdd(val, offset2); + + // global_size2 * global_size1 + op0 = builder->CreateMul(global_size2, global_size1); + // global_size2 * global_size1 * global_size0 + g1Xg2Xg3 = builder->CreateMul(op0, global_size0); + + generateOneTimestampInstruction(F.begin());// first basic block + + // } + + + for (llvm::Function::iterator B = F.begin(), BE = F.end(); B != BE; B++) + if(B!=F.begin()){ + builder->SetInsertPoint(B->getFirstInsertionPt());// insert at the beginning of each basicblock. except first basicblock + SETDEBUGLOCATION(B->getFirstInsertionPt(),false); + generateOneTimestampInstruction(B); + } + + delete builder; + + return false; + } + + FunctionPass* createTimestampParserPass() + { + return new TimestampParser(); + } + char TimestampParser::ID = 0; + +} // end namespace diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp index 891f2a1..012b754 100644 --- a/backend/src/llvm/llvm_to_gen.cpp +++ b/backend/src/llvm/llvm_to_gen.cpp @@ -71,6 +71,8 @@ #include <fcntl.h> #include <memory> +#include <stdlib.h> + namespace gbe { BVAR(OCL_OUTPUT_CFG, false); @@ -226,7 +228,7 @@ namespace gbe { std::string errInfo; std::unique_ptr<llvm::raw_fd_ostream> o = NULL; - if (OCL_OUTPUT_LLVM_BEFORE_LINK || OCL_OUTPUT_LLVM_AFTER_LINK || OCL_OUTPUT_LLVM_AFTER_GEN) + //if (OCL_OUTPUT_LLVM_BEFORE_LINK || OCL_OUTPUT_LLVM_AFTER_LINK || OCL_OUTPUT_LLVM_AFTER_GEN) o = std::unique_ptr<llvm::raw_fd_ostream>(new llvm::raw_fd_ostream(fileno(stdout), false)); // Get the module from its file @@ -300,6 +302,12 @@ namespace gbe passes.add(createLowerSwitchPass()); // simplify cfg will generate switch-case instruction passes.add(createScalarizePass()); // Expand all vector ops + //setenv("OCL_PROFILING","0",0); + if(OCL_PROFILING) { + passes.add(createTimestampParserPass()); // by ynbai + passes.add(createExpandConstantExprPass()); + } + if(OCL_OUTPUT_CFG) passes.add(createCFGPrinterPass()); if(OCL_OUTPUT_CFG_ONLY) -- 1.9.1 _______________________________________________ Beignet mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/beignet
