add debugloc when llvm instuctions inserted, the debugloc is inherited from 
the contiguous one.

Signed-off-by: Bai Yannan <[email protected]>
---
 backend/src/backend/program.cpp                  |   7 +
 backend/src/llvm/llvm_gen_backend.cpp            |  25 ++
 backend/src/llvm/llvm_loadstore_optimization.cpp |  18 ++
 backend/src/llvm/llvm_printf_parser.cpp          |  20 ++
 backend/src/llvm/llvm_sampler_fix.cpp            |  17 ++
 backend/src/llvm/llvm_scalarize.cpp              |  18 ++
 backend/src/llvm/llvm_timestamp.cpp              | 337 +++++++++++++++++++++++
 backend/src/llvm/llvm_to_gen.cpp                 |  10 +-
 8 files changed, 451 insertions(+), 1 deletion(-)
 create mode 100644 backend/src/llvm/llvm_timestamp.cpp

diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index f5865c2..af817de 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -49,6 +49,7 @@
 #include <iostream>
 #include <unistd.h>
 #include <mutex>
+#include <cstdlib>
 
 #ifdef GBE_COMPILER_AVAILABLE
 /* Not defined for LLVM 3.0 */
@@ -554,6 +555,12 @@ namespace gbe {
     args.push_back("stringInput.cl");
     args.push_back("-ffp-contract=off");
 
+       if(getenv("OCL_PROFILING")) {
+               char * isProfiling =  getenv("OCL_PROFILING");
+               if(*isProfiling == '1')
+                       args.push_back("-g");
+       }
+
     // The compiler invocation needs a DiagnosticsEngine so it can report 
problems
     std::string ErrorString;
     llvm::raw_string_ostream ErrorInfo(ErrorString);
diff --git a/backend/src/llvm/llvm_gen_backend.cpp 
b/backend/src/llvm/llvm_gen_backend.cpp
index 4905415..238370a 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -108,6 +108,8 @@
 
 #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >=5
 #include "llvm/IR/Mangler.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DebugInfo.h"
 #else
 #include "llvm/Target/Mangler.h"
 #endif
@@ -178,6 +180,20 @@
 
 using namespace llvm;
 
+#define OCL_PROFILING (bool)(getenv("OCL_PROFILING")[0]-48)
+#define SETDEBUGLOCATION(BUILDER, INSN)                                \       
+       if(OCL_PROFILING) {                                                     
        \
+               llvm::BasicBlock *bb = INSN->getParent();               \
+               llvm::BasicBlock::iterator iter =bb->begin();   \
+               while(!(iter++)->isIdenticalTo(INSN))   ;               \
+               llvm::MDNode *N = iter->getMetadata("dbg");     \
+               llvm::DebugLoc dg = iter->getDebugLoc();                \
+               while(!N)       N = (++iter)->getMetadata("dbg");       \
+               BUILDER.SetCurrentDebugLocation(dg);                    \
+       }
+// end define SETDEBUGLOCATION
+
+
 namespace gbe
 {
   /*! Gen IR manipulates only scalar types */
@@ -977,6 +993,7 @@ namespace gbe
           Value *trueVal = getPointerBase((*iter).second[0]);
           Value *falseVal = getPointerBase((*iter).second[1]);
           Builder.SetInsertPoint(si);
+                 SETDEBUGLOCATION(Builder, si);
           Value *base = Builder.CreateSelect(si->getCondition(), trueVal, 
falseVal);
           pointerBaseMap.insert(std::make_pair(ptr, base));
         return base;
@@ -984,6 +1001,7 @@ namespace gbe
           PHINode *phi = dyn_cast<PHINode>(ptr);
           IRBuilder<> Builder(phi->getParent());
           Builder.SetInsertPoint(phi);
+                 SETDEBUGLOCATION(Builder, phi);
 
           PHINode *basePhi = Builder.CreatePHI(ptr->getType(), 
phi->getNumIncomingValues());
           unsigned srcNum = pointers.size();
@@ -997,7 +1015,10 @@ namespace gbe
             IRBuilder<> Builder2(phi->getIncomingBlock(x));
             BasicBlock *predBB = phi->getIncomingBlock(x);
             if (predBB->getTerminator())
+            {
               Builder2.SetInsertPoint(predBB->getTerminator());
+                         SETDEBUGLOCATION(Builder2, predBB->getTerminator());
+            }
 
 #if (LLVM_VERSION_MAJOR== 3 && LLVM_VERSION_MINOR < 6)
   // llvm 3.5 and older version don't have CreateBitOrPointerCast() define
@@ -1065,6 +1086,7 @@ namespace gbe
           Value *trueVal = getBtiRegister((*iter).second[0]);
           Value *falseVal = getBtiRegister((*iter).second[1]);
           Builder.SetInsertPoint(si);
+                 SETDEBUGLOCATION(Builder, si);
           Value *bti = Builder.CreateSelect(si->getCondition(), trueVal, 
falseVal);
           BtiValueMap.insert(std::make_pair(Val, bti));
           return bti;
@@ -1072,6 +1094,7 @@ namespace gbe
           PHINode *phi = dyn_cast<PHINode>(Val);
           IRBuilder<> Builder(phi->getParent());
           Builder.SetInsertPoint(phi);
+                 SETDEBUGLOCATION(Builder, phi);
 
           PHINode *btiPhi = 
Builder.CreatePHI(IntegerType::get(Val->getContext(), 32), 
phi->getNumIncomingValues());
           PtrOrigMapIter iter = pointerOrigMap.find(Val);
@@ -1240,6 +1263,7 @@ namespace gbe
             isLoad = false;
           }
           Builder.SetInsertPoint(cast<Instruction>(theUser));
+                 SETDEBUGLOCATION(Builder, cast<Instruction>(theUser));
 
           Type *int32Ty = Type::getInt32Ty(ptr->getContext());
           Value *v1 = Builder.CreatePtrToInt(pointerOp, int32Ty);
@@ -1351,6 +1375,7 @@ namespace gbe
 
       IRBuilder<> Builder(&entry);
       Builder.SetInsertPoint(bbIter);
+         SETDEBUGLOCATION(Builder, bbIter);
 
       PointerType * AITy = cast<AllocaInst>(base)->getType();
       Value * btiArray = Builder.CreateAlloca(AITy->getElementType(), 
ArraySize, base->getName() + ".bti");
diff --git a/backend/src/llvm/llvm_loadstore_optimization.cpp 
b/backend/src/llvm/llvm_loadstore_optimization.cpp
index c6349fa..989b539 100644
--- a/backend/src/llvm/llvm_loadstore_optimization.cpp
+++ b/backend/src/llvm/llvm_loadstore_optimization.cpp
@@ -41,6 +41,8 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DebugInfo.h"
 #endif  /* LLVM_VERSION_MINOR <= 2 */
 #include "llvm/Pass.h"
 #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 1
@@ -55,6 +57,20 @@
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 
 using namespace llvm;
+
+#define OCL_PROFILING (bool)(getenv("OCL_PROFILING")[0]-48)
+#define SETDEBUGLOCATION(BUILDER, INSN)                                \       
+       if(OCL_PROFILING) {                                                     
        \
+               llvm::BasicBlock *bb = INSN->getParent();               \
+               llvm::BasicBlock::iterator iter =bb->begin();   \
+               while(!(iter++)->isIdenticalTo(INSN))   ;               \
+               llvm::MDNode *N = iter->getMetadata("dbg");     \
+               llvm::DebugLoc dg = iter->getDebugLoc();                \
+               while(!N)       N = (++iter)->getMetadata("dbg");       \
+               BUILDER.SetCurrentDebugLocation(dg);                    \
+       }
+// end define SETDEBUGLOCATION
+
 namespace gbe {
   class GenLoadStoreOptimization : public BasicBlockPass {
 
@@ -167,6 +183,7 @@ namespace gbe {
     unsigned addrSpace = ld->getPointerAddressSpace();
     // insert before first load
     Builder.SetInsertPoint(ld);
+       SETDEBUGLOCATION(Builder, ld);
     VectorType *vecTy = VectorType::get(ld->getType(), size);
     Value *vecPtr = Builder.CreateBitCast(ld->getPointerOperand(),
                                         PointerType::get(vecTy, addrSpace));
@@ -226,6 +243,7 @@ namespace gbe {
     unsigned align = st->getAlignment();
     // insert before the last store
     Builder.SetInsertPoint(merged[size-1]);
+       SETDEBUGLOCATION(Builder, merged[size-1]);
 
     Type *dataTy = st->getValueOperand()->getType();
     VectorType *vecTy = VectorType::get(dataTy, size);
diff --git a/backend/src/llvm/llvm_printf_parser.cpp 
b/backend/src/llvm/llvm_printf_parser.cpp
index 3d84457..3848bfa 100644
--- a/backend/src/llvm/llvm_printf_parser.cpp
+++ b/backend/src/llvm/llvm_printf_parser.cpp
@@ -59,6 +59,9 @@
 #if LLVM_VERSION_MINOR >= 5
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/CFG.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DebugInfo.h"
+
 #else
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/CFG.h"
@@ -73,6 +76,21 @@
 
 using namespace llvm;
 
+#define OCL_PROFILING (bool)(getenv("OCL_PROFILING")[0]-48)
+#define SETDEBUGLOCATION(INSN, ISBEGIN)                                        
\
+       if(OCL_PROFILING) {                                                     
                        \
+               llvm::BasicBlock *bb=INSN->getParent();                         
        \
+               llvm::BasicBlock::iterator iter = bb->begin();                  
\
+               if(ISBEGIN)while(!(iter++)->isIdenticalTo(INSN))        ;       
\
+               else while(!iter->isIdenticalTo(INSN)) iter++;                  
\
+               llvm::MDNode *N = iter->getMetadata("dbg");                     
\
+               llvm::DebugLoc dg = iter->getDebugLoc();                        
        \
+               if(ISBEGIN)while(!N) {N = (++iter)->getMetadata("dbg");}        
        \
+               else    while(!N) {iter++;N = iter->getMetadata("dbg");}        
        \
+               builder->SetCurrentDebugLocation(dg);                           
        \
+       }
+// end define SETDEBUGLOCATION
+
 namespace gbe
 {
   using namespace ir;
@@ -659,6 +677,7 @@ error:
       Value* val = NULL;
 
       builder->SetInsertPoint(F.begin()->begin());// Insert the common var in 
the begin.
+      SETDEBUGLOCATION(F.begin()->begin(), true);
 
       /* FIXME: Because the OpenCL language do not support va macro, and we do 
not want
          to introduce the va_list, va_start and va_end into our code, we just 
simulate
@@ -721,6 +740,7 @@ error:
     /* Now generate the instructions. */
     for (auto pInfo : infoVect) {
       builder->SetInsertPoint(pInfo.call);
+         SETDEBUGLOCATION(pInfo.call, false);
       deadprintfs.push_back(PrintfInst(cast<Instruction>(pInfo.call), 
generateOnePrintfInstruction(pInfo)));
     }
 
diff --git a/backend/src/llvm/llvm_sampler_fix.cpp 
b/backend/src/llvm/llvm_sampler_fix.cpp
index 8c76324..a1146d3 100644
--- a/backend/src/llvm/llvm_sampler_fix.cpp
+++ b/backend/src/llvm/llvm_sampler_fix.cpp
@@ -36,6 +36,8 @@
 #include "llvm/IR/IRBuilder.h"
 #if LLVM_VERSION_MINOR >= 5
 #include "llvm/IR/CFG.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DebugInfo.h"
 #else
 #include "llvm/Support/CFG.h"
 #endif
@@ -47,6 +49,19 @@
 
 using namespace llvm;
 
+#define OCL_PROFILING (bool)(getenv("OCL_PROFILING")[0]-48)
+#define SETDEBUGLOCATION(BUILDER, INSN)                                \       
+       if(OCL_PROFILING) {                                                     
        \
+               llvm::BasicBlock *bb = INSN->getParent();               \
+               llvm::BasicBlock::iterator iter =bb->begin();   \
+               while(!(iter++)->isIdenticalTo(INSN))   ;               \
+               llvm::MDNode *N = iter->getMetadata("dbg");     \
+               llvm::DebugLoc dg = iter->getDebugLoc();                \
+               while(!N)       N = (++iter)->getMetadata("dbg");       \
+               BUILDER.SetCurrentDebugLocation(dg);                    \
+       }
+// end define SETDEBUGLOCATION
+
 namespace gbe {
 
   class SamplerFix : public FunctionPass {
@@ -82,6 +97,7 @@ namespace gbe {
           IRBuilder<> Builder(I->getParent());
 
           Builder.SetInsertPoint(I);
+                 SETDEBUGLOCATION(Builder, I);
           Value *addressMask = ConstantInt::get(i32Ty, __CLK_ADDRESS_MASK);
           Value *addressMode = Builder.CreateAnd(I->getOperand(0), 
addressMask);
           Value *clampInt =  ConstantInt::get(i32Ty, CLK_ADDRESS_CLAMP);
@@ -108,6 +124,7 @@ namespace gbe {
         } else {
           IRBuilder<> Builder(I->getParent());
           Builder.SetInsertPoint(I);
+                 SETDEBUGLOCATION(Builder, I);
           Value *normalizeMask = ConstantInt::get(i32Ty, 
CLK_NORMALIZED_COORDS_TRUE);
           Value *normalizeMode = Builder.CreateAnd(I->getOperand(0), 
normalizeMask);
           needFixVal = Builder.CreateICmpEQ(normalizeMode, 
ConstantInt::get(i32Ty, 0));
diff --git a/backend/src/llvm/llvm_scalarize.cpp 
b/backend/src/llvm/llvm_scalarize.cpp
index bc985c6..d4e87af 100644
--- a/backend/src/llvm/llvm_scalarize.cpp
+++ b/backend/src/llvm/llvm_scalarize.cpp
@@ -87,6 +87,8 @@
 #if LLVM_VERSION_MINOR >= 5
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/CFG.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DebugInfo.h"
 #else
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/CFG.h"
@@ -98,6 +100,19 @@
 
 using namespace llvm;
 
+#define OCL_PROFILING (bool)(getenv("OCL_PROFILING")[0]-48)
+#define SETDEBUGLOCATION(BUILDER, INSN)                                \       
+       if(OCL_PROFILING) {                                                     
        \
+               llvm::BasicBlock *bb = INSN->getParent();               \
+               llvm::BasicBlock::iterator iter =bb->begin();   \
+               while(!(iter++)->isIdenticalTo(INSN))   ;               \
+               llvm::MDNode *N = iter->getMetadata("dbg");     \
+               llvm::DebugLoc dg = iter->getDebugLoc();                \
+               while(!N)       N = (++iter)->getMetadata("dbg");       \
+               BUILDER->SetCurrentDebugLocation(dg);                   \
+       }
+// end define SETDEBUGLOCATION
+
 namespace gbe {
 
   struct VectorValues {
@@ -231,6 +246,7 @@ namespace gbe {
     void setAppendPoint(Instruction *insn)  {
       BasicBlock::iterator next(insn);
       builder->SetInsertPoint(++next);
+         SETDEBUGLOCATION(builder, next);
     }
 
     DenseMap<Value*, VectorValues> vectorVals;
@@ -526,6 +542,7 @@ namespace gbe {
     assert((canGetComponentArgs(inst) || isa<PHINode>(inst)) &&
            "Scalarizing an op whose arguments haven't been scalarized ");
     builder->SetInsertPoint(inst);
+       SETDEBUGLOCATION(builder, inst);
 
     if (IsPerComponentOp(inst))
       return scalarizePerComponent(inst);
@@ -793,6 +810,7 @@ namespace gbe {
     ReversePostOrderTraversal<Function*> rpot(&F);
     BasicBlock::iterator instI = (*rpot.begin())->begin();
     builder->SetInsertPoint(instI);
+       SETDEBUGLOCATION(builder, instI);
 
     Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
 
diff --git a/backend/src/llvm/llvm_timestamp.cpp 
b/backend/src/llvm/llvm_timestamp.cpp
new file mode 100644
index 0000000..f847d38
--- /dev/null
+++ b/backend/src/llvm/llvm_timestamp.cpp
@@ -0,0 +1,337 @@
+
+
+/**
+ * \file llvm_timestamp.cpp
+ *
+ */
+
+ 
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "llvm/Config/llvm-config.h"
+#if LLVM_VERSION_MINOR <= 2
+#include "llvm/Function.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#else
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#endif  /* LLVM_VERSION_MINOR <= 2 */
+#include "llvm/Pass.h"
+#if LLVM_VERSION_MINOR <= 1
+#include "llvm/Support/IRBuilder.h"
+#elif LLVM_VERSION_MINOR == 2
+#include "llvm/IRBuilder.h"
+#else
+#include "llvm/IR/IRBuilder.h"
+#endif /* LLVM_VERSION_MINOR <= 1 */
+
+#if LLVM_VERSION_MINOR >= 5
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/DebugLoc.h" //ynbai
+#include "llvm/IR/DebugInfo.h"
+
+#else
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#endif
+
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/IR/Attributes.h"
+
+#include "llvm/llvm_gen_backend.hpp"
+#include "sys/map.hpp"
+#include "ir/timestamp.hpp"
+
+#include <iostream>
+#include <vector>
+#include <fstream>
+
+using namespace llvm;
+using std::vector;
+
+#define OCL_PROFILING (bool)(getenv("OCL_PROFILING")[0]-48)
+
+namespace gbe
+{
+  using namespace ir;
+ 
+  class TimestampParser : public FunctionPass
+  {
+  public:
+    static char ID;
+       typedef std::pair<Instruction*, bool> TimestampInst;
+    Module* module;
+    IRBuilder<>* builder;
+    Type* intTy;
+    Value* tbuf_ptr;
+       Value* g1Xg2Xg3;
+    Value* wg_offset;
+       int tm_num;
+
+    TimestampParser(void) : FunctionPass(ID)
+    {
+      module = NULL;
+      builder = NULL;
+      intTy = NULL;
+      tbuf_ptr = NULL;
+         g1Xg2Xg3 = NULL;
+      wg_offset = NULL;
+         tm_num = 0;
+    }
+
+    ~TimestampParser(void)
+    {
+    }
+
+       INLINE void storeRegionOffsetI(Value* addr_base, int idx, Value* 
param[], std::vector<Type *> ParamTys, bool isSt);
+       bool generateOneTimestampInstruction(llvm::BasicBlock *BB);
+
+    virtual const char *getPassName() const
+    {
+      return "Timestamp Parser";
+    }
+
+    virtual bool runOnFunction(llvm::Function &F);
+  };
+
+  INLINE void TimestampParser::storeRegionOffsetI(Value* addr_base, int idx, 
Value* param[], std::vector<Type *> ParamTys, bool isSt){
+       /* timestamp format in the buffer:
+               tm_start_region_2, tm_start_region_1, tm_start_region_0, 
tm_end_region_2, tm_end_region_1, tm_end_region_0
+               ^
+               |
+               addr_base
+       */
+       param[0] = ConstantInt::get(IntegerType::get(module->getContext(), 16), 
idx);
+       SmallVector<Value *, 2> Args(param, param+2);
+       Value* region = 
builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction(   
+       "__gen_ocl_region", 
FunctionType::get(IntegerType::getInt32Ty(module->getContext()), ParamTys, 
false))),        
+                              Args);
+       Value* bi = builder->CreateAdd(addr_base, ConstantInt::get(intTy, 
((!isSt)*3 + 2 - idx)*sizeof(uint)));
+       Value* data_addr = builder->CreateIntToPtr(bi, 
Type::getInt32PtrTy(module->getContext(), 1));   
+       builder->CreateStore(region, data_addr);
+
+  }
+  
+  bool TimestampParser::generateOneTimestampInstruction(llvm::BasicBlock *BB)
+  {
+
+#define SETDEBUGLOCATION(INSN, ASC)                                            
                        \       
+       if(OCL_PROFILING) {                                                     
                        \
+               llvm::BasicBlock *bb = INSN->getParent();                       
                        \
+               llvm::BasicBlock::iterator iter = ASC?bb->begin():bb->end();    
\
+               while(!(ASC?iter++:iter--)->isIdenticalTo(INSN))        ;       
                \
+               llvm::MDNode *N = iter->getMetadata("dbg");                     
                \
+               llvm::DebugLoc dg = iter->getDebugLoc();                        
                        \
+               while(!N)       N = (ASC?++iter:--iter)->getMetadata("dbg");    
        \
+               builder->SetCurrentDebugLocation(dg);                           
                        \
+       }
+// end define SETDEBUGLOCATION
+
+       Value * op0 = NULL;
+       Value * val = NULL;
+               
+       /////////////////////////////////////////////////////
+      /* Calculate the data address.
+      data_addr = (data_offset + tbuf_ptr + wg_offset * sizeof(uint32)) + 
+                       totalSizeofSize * global_size2 * global_size1 * 
global_size0 * tm_num 
+      data_offset = global_size2 * global_size1 * global_size0 * 
out_buf_sizeof_offset
+      totalSizeofSize = tm_num * sizeof(uint32)
+      */
+    CallInst *read_tm_inst_st = NULL, *read_tm_inst_end = NULL;
+               
+       // ecch basic block has 2 timestamps, 
+       //and tm_num is even when it is at the begin of bb 
+       int bbNum = (tm_num-tm_num%2)/2; 
+       Value* data_offset = builder->CreateMul(g1Xg2Xg3, 
ConstantInt::get(intTy, bbNum));
+    // index_offset + wg_offset
+    op0 = builder->CreateAdd(data_offset, wg_offset);
+    // (index_offset + wg_offset)* sizeof(int)
+    op0 = builder->CreateMul(op0, ConstantInt::get(intTy, sizeof(uint)*6));
+       Value* b = builder->CreateAdd(tbuf_ptr,op0);
+       
+       read_tm_inst_st = 
builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction(
+                          "__gen_ocl_read_tm", 
IntegerType::getInt32Ty(module->getContext()),
+                          NULL)));
+       tm_num++;
+       
+       llvm::BasicBlock::iterator BI = BB->end();
+       builder->SetInsertPoint(--BI);
+       SETDEBUGLOCATION(BI, false);
+       read_tm_inst_end = 
builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction(
+                          "__gen_ocl_read_tm", 
IntegerType::getInt32Ty(module->getContext()),
+                          NULL)));
+       tm_num++;
+       
+       Value* para[2];
+       std::vector<Type *> ParamTys(2);
+       ParamTys[0]=IntegerType::get(module->getContext(), 16);
+       ParamTys[1]=IntegerType::getInt32Ty(module->getContext());
+
+       for(int i=0; i<3; i++)
+       {
+               para[1]=read_tm_inst_st;
+               storeRegionOffsetI(b, i, para, ParamTys, true);
+               para[1]=read_tm_inst_end;
+               storeRegionOffsetI(b, i, para, ParamTys, false);
+       }
+
+       CallInst* timestamp_inst = 
builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction(
+                              "__gen_ocl_timestamp", 
Type::getVoidTy(module->getContext()),
+                              NULL)));
+       return true;
+  }
+
+  bool TimestampParser::runOnFunction(llvm::Function &F)
+  {
+
+#define SETDEBUGLOCATION(INSN, ISBEGIN)                                        
\
+       if(OCL_PROFILING) {                                                     
                        \
+               llvm::BasicBlock *bb=INSN->getParent();                         
        \
+               llvm::BasicBlock::iterator iter = bb->begin();                  
\
+               if(ISBEGIN)while(!(iter++)->isIdenticalTo(INSN))        ;       
\
+               else while(!iter->isIdenticalTo(INSN)) iter++;                  
\
+               llvm::MDNode *N = iter->getMetadata("dbg");                     
\
+               llvm::DebugLoc dg = iter->getDebugLoc();                        
        \
+               if(ISBEGIN)while(!N) {N = (++iter)->getMetadata("dbg");}        
\
+               else    while(!N) {iter++;N = iter->getMetadata("dbg");}        
\
+               builder->SetCurrentDebugLocation(dg);                           
                \
+       }
+// end define SETDEBUGLOCATION
+
+    bool hasTimestamp = false;
+    switch (F.getCallingConv()) {
+#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2
+      case CallingConv::PTX_Device:
+        return false;
+      case CallingConv::PTX_Kernel:
+#else
+      case CallingConv::C:
+      case CallingConv::Fast:
+      case CallingConv::SPIR_KERNEL:
+#endif
+        break;
+      default:
+        GBE_ASSERTM(false, "Unsupported calling convention");
+    }
+
+    module = F.getParent();
+    intTy = IntegerType::get(module->getContext(), 32);
+
+    // As we inline all function calls, so skip non-kernel functions
+    bool bKernel = isKernelFunction(F);
+    if(!bKernel) return false;
+
+    builder = new IRBuilder<>(module->getContext());
+
+       if (!tbuf_ptr) {
+      /* alloc a new buffer ptr to collect the timestamps. */
+      Type *ptrTy = Type::getInt32PtrTy(module->getContext());
+         llvm::Constant *tBuf = new GlobalVariable(*module, ptrTy, false,
+                                GlobalVariable::ExternalLinkage,
+                                nullptr,
+                                StringRef("__gen_ocl_timestamp_buf"),
+                                nullptr,
+                                GlobalVariable::NotThreadLocal,
+                                1);
+      tbuf_ptr = builder->CreatePtrToInt(tBuf, 
Type::getInt32Ty(module->getContext()));
+    }
+
+       //if (!wg_offset || !g1Xg2Xg3) {
+      Value* op0 = NULL;
+      Value* val = NULL;
+
+      builder->SetInsertPoint(F.begin()->begin());// Insert the common var in 
the begin.
+      SETDEBUGLOCATION(F.begin()->begin(), true);
+         
+      /* FIXME: Because the OpenCL language do not support va macro, and we do 
not want
+         to introduce the va_list, va_start and va_end into our code, we just 
simulate
+         the function calls to caculate the offset caculation here. */
+
+       
+ 
+#define BUILD_CALL_INST(name) \
+       CallInst* name = 
builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction( \
+                                "__gen_ocl_get_"#name,                         
                \
+                                IntegerType::getInt32Ty(module->getContext()), 
                \
+                                NULL)))
+
+      BUILD_CALL_INST(group_id2);
+      BUILD_CALL_INST(group_id1);
+      BUILD_CALL_INST(group_id0);
+      BUILD_CALL_INST(global_size2);
+      BUILD_CALL_INST(global_size1);
+      BUILD_CALL_INST(global_size0);
+      BUILD_CALL_INST(local_id2);
+      BUILD_CALL_INST(local_id1);
+      BUILD_CALL_INST(local_id0);
+      BUILD_CALL_INST(local_size2);
+      BUILD_CALL_INST(local_size1);
+      BUILD_CALL_INST(local_size0);
+
+#undef BUILD_CALL_INST
+
+      /* calculate offset for later usage.
+         wg_offset = ((local_id2 + local_size2 * group_id2) * (global_size1 * 
global_size0)
+         + (local_id1 + local_size1 * group_id1) * global_size0
+         + (local_id0 + local_size0 * group_id0))  */
+
+
+      // local_size2 * group_id2
+      val = builder->CreateMul(local_size2, group_id2);
+      // local_id2 + local_size2 * group_id2
+      val = builder->CreateAdd(local_id2, val);
+      // global_size1 * global_size0
+      op0 = builder->CreateMul(global_size1, global_size0);
+      // (local_id2 + local_size2 * group_id2) * (global_size1 * global_size0)
+      Value* offset1 = builder->CreateMul(val, op0);
+      // local_size1 * group_id1
+      val = builder->CreateMul(local_size1, group_id1);
+      // local_id1 + local_size1 * group_id1
+      val = builder->CreateAdd(local_id1, val);
+      // (local_id1 + local_size1 * group_id1) * global_size_0
+      Value* offset2 = builder->CreateMul(val, global_size0);
+      // local_size0 * group_id0
+      val = builder->CreateMul(local_size0, group_id0);
+      // local_id0 + local_size0 * group_id0
+      val = builder->CreateAdd(local_id0, val);
+      // The total sum
+      val = builder->CreateAdd(val, offset1);
+      wg_offset = builder->CreateAdd(val, offset2);
+
+      // global_size2 * global_size1
+      op0 = builder->CreateMul(global_size2, global_size1);
+      // global_size2 * global_size1 * global_size0
+      g1Xg2Xg3 = builder->CreateMul(op0, global_size0);
+
+         generateOneTimestampInstruction(F.begin());// first basic block
+
+   // }
+
+       
+       for (llvm::Function::iterator B = F.begin(), BE = F.end(); B != BE; 
B++) 
+               if(B!=F.begin()){
+                       builder->SetInsertPoint(B->getFirstInsertionPt());// 
insert at the beginning of each basicblock. except first basicblock
+                       SETDEBUGLOCATION(B->getFirstInsertionPt(),false);
+                       generateOneTimestampInstruction(B);
+       }
+
+    delete builder;
+
+    return false;
+  }
+
+  FunctionPass* createTimestampParserPass()
+  {
+    return new TimestampParser();
+  }
+  char TimestampParser::ID = 0;
+
+} // end namespace
diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp
index 891f2a1..012b754 100644
--- a/backend/src/llvm/llvm_to_gen.cpp
+++ b/backend/src/llvm/llvm_to_gen.cpp
@@ -71,6 +71,8 @@
 #include <fcntl.h>
 #include <memory>
 
+#include <stdlib.h>
+
 namespace gbe
 {
   BVAR(OCL_OUTPUT_CFG, false);
@@ -226,7 +228,7 @@ namespace gbe
   {
     std::string errInfo;
     std::unique_ptr<llvm::raw_fd_ostream> o = NULL;
-    if (OCL_OUTPUT_LLVM_BEFORE_LINK || OCL_OUTPUT_LLVM_AFTER_LINK || 
OCL_OUTPUT_LLVM_AFTER_GEN)
+    //if (OCL_OUTPUT_LLVM_BEFORE_LINK || OCL_OUTPUT_LLVM_AFTER_LINK || 
OCL_OUTPUT_LLVM_AFTER_GEN)
       o = std::unique_ptr<llvm::raw_fd_ostream>(new 
llvm::raw_fd_ostream(fileno(stdout), false));
 
     // Get the module from its file
@@ -300,6 +302,12 @@ namespace gbe
     passes.add(createLowerSwitchPass());           // simplify cfg will 
generate switch-case instruction
     passes.add(createScalarizePass());             // Expand all vector ops
 
+       //setenv("OCL_PROFILING","0",0);
+       if(OCL_PROFILING) {
+               passes.add(createTimestampParserPass());  // by ynbai
+               passes.add(createExpandConstantExprPass());
+       }
+
     if(OCL_OUTPUT_CFG)
       passes.add(createCFGPrinterPass());
     if(OCL_OUTPUT_CFG_ONLY)
-- 
1.9.1

_______________________________________________
Beignet mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/beignet

Reply via email to