efwright created this revision.
efwright added a reviewer: jdoerfert.
Herald added subscribers: sunshaoce, guansong, hiraditya, yaxunl.
Herald added a project: All.
efwright requested review of this revision.
Herald added subscribers: llvm-commits, openmp-commits, cfe-commits, jplehr, 
sstefan1.
Herald added projects: clang, OpenMP, LLVM.

This is a portion of the work for implementing OpenMP's "simd" loop directive 
for GPUs. For now only working on upstreaming the code generation portion. 
Right now the runtime just runs the loop sequentially.

Style-wise we're using a similar methodology of other directives in 
libomptarget where the "parallel region" is outlined, and passed as an argument 
into the appropriate runtime function. These changes are in the OMPIRBuilder 
and right now are only enabled if OMPIRBuilder is enabled. The code also 
depends on the OMPCanonicalLoop class existing in the AST, which currently only 
happens when the OMPIRBuilder is enabled.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D154568

Files:
  clang/lib/CodeGen/CGStmtOpenMP.cpp
  clang/lib/CodeGen/CodeGenFunction.cpp
  clang/lib/Parse/ParseOpenMP.cpp
  llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
  llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
  llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
  openmp/libomptarget/DeviceRTL/src/Workshare.cpp

Index: openmp/libomptarget/DeviceRTL/src/Workshare.cpp
===================================================================
--- openmp/libomptarget/DeviceRTL/src/Workshare.cpp
+++ openmp/libomptarget/DeviceRTL/src/Workshare.cpp
@@ -658,6 +658,19 @@
 void __kmpc_distribute_static_fini(IdentTy *loc, int32_t global_tid) {
   FunctionTracingRAII();
 }
+
+void __kmpc_simd_51(
+  IdentTy *ident, void *WorkFn, uint64_t TripCount,
+  void **Args, uint32_t nargs
+) {
+  FunctionTracingRAII();
+
+  ASSERT(WorkFn); 
+  for(uint64_t omp_iv = 0; omp_iv < TripCount; omp_iv++) {
+    ((void (*)(uint64_t, void**))WorkFn)(omp_iv, Args);
+  }
+  
+}
 }
 
 #pragma omp end declare target
Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
===================================================================
--- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -454,6 +454,7 @@
   for (OutlineInfo &OI : OutlineInfos) {
     // Skip functions that have not finalized yet; may happen with nested
     // function generation.
+    
     if (Fn && OI.getFunction() != Fn) {
       DeferredOutlines.push_back(OI);
       continue;
@@ -462,7 +463,6 @@
     ParallelRegionBlockSet.clear();
     Blocks.clear();
     OI.collectBlocks(ParallelRegionBlockSet, Blocks);
-
     Function *OuterFn = OI.getFunction();
     CodeExtractorAnalysisCache CEAC(*OuterFn);
     CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
@@ -481,11 +481,10 @@
     assert(Extractor.isEligible() &&
            "Expected OpenMP outlining to be possible!");
 
-    for (auto *V : OI.ExcludeArgsFromAggregate)
+    for (auto *V : OI.ExcludeArgsFromAggregate) {
       Extractor.excludeArgFromAggregate(V);
-
+    }
     Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
-
     LLVM_DEBUG(dbgs() << "After      outlining: " << *OuterFn << "\n");
     LLVM_DEBUG(dbgs() << "   Outlined function: " << *OutlinedFn << "\n");
     assert(OutlinedFn->getReturnType()->isVoidTy() &&
@@ -1230,6 +1229,308 @@
   return AfterIP;
 }
 
+
+IRBuilder<>::InsertPoint OpenMPIRBuilder::createSimdLoop(
+  const LocationDescription &Loc, InsertPointTy OuterAllocaIP,
+  LoopBodyCallbackTy BodyGenCB,
+  TripCountCallbackTy DistanceCB,
+  PrivatizeCallbackTy PrivCB,
+  FinalizeCallbackTy FiniCB,
+  bool SPMDMode
+)
+{
+  assert(!isConflictIP(Loc.IP, OuterAllocaIP) && "IPs must not be ambiguous");
+
+  if (!updateToLocation(Loc))
+    return Loc.IP;
+
+  uint32_t SrcLocStrSize;
+  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
+  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+  Value *ThreadID = getOrCreateThreadID(Ident);
+
+  BasicBlock *InsertBB = Builder.GetInsertBlock();
+  Function *OuterFn = InsertBB->getParent();
+
+  LLVM_DEBUG(dbgs() << "At the start of createSimdLoop: " << *OuterFn << "\n");
+
+  // Save the outer alloca block because the insertion iterator may get
+  // invalidated and we still need this later.
+  BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock();
+
+  // Vector to remember instructions we used only during the modeling but which
+  // we want to delete at the end.
+  SmallVector<Instruction *, 16> ToBeDeleted;
+
+  // Create an artificial insertion point that will also ensure the blocks we
+  // are about to split are not degenerated.
+  auto *UI = new UnreachableInst(Builder.getContext(), InsertBB);
+
+  Instruction *ThenTI = UI, *ElseTI = nullptr;
+
+  BasicBlock *ThenBB = ThenTI->getParent();
+  BasicBlock *LRegDistanceBB = ThenBB->splitBasicBlock(ThenTI, "omp.loop.distance");
+  BasicBlock *PRegEntryBB = LRegDistanceBB->splitBasicBlock(ThenTI, "omp.loop.entry");
+  BasicBlock *PRegBodyBB =
+      PRegEntryBB->splitBasicBlock(ThenTI, "omp.loop.region");
+  BasicBlock *PRegPreFiniBB =
+      PRegBodyBB->splitBasicBlock(ThenTI, "omp.loop.pre_finalize");
+  BasicBlock *PRegExitBB =
+      PRegPreFiniBB->splitBasicBlock(ThenTI, "omp.loop.exit");
+
+
+  auto FiniCBWrapper = [&](InsertPointTy IP) {
+    // Hide "open-ended" blocks from the given FiniCB by setting the right jump
+    // target to the region exit blocks
+    if (IP.getBlock()->end() == IP.getPoint()) {
+      IRBuilder<>::InsertPointGuard IPG(Builder);
+      Builder.restoreIP(IP);
+      Instruction *I = Builder.CreateBr(PRegExitBB);
+      IP = InsertPointTy(I->getParent(), I->getIterator());
+    }
+    assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
+           IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
+           "Unexpected insertion point for finalization call!");
+    return FiniCB(IP);
+  };
+
+  FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, false});
+
+  // Compute the loop trip count
+  // Insert after the outer alloca to ensure all variables need
+  // in its calculation are ready
+  InsertPointTy DistanceIP(LRegDistanceBB, LRegDistanceBB->begin());
+  Value *DistVal;
+  bool IsTripCountSigned;
+  assert(DistanceCB && "expected loop trip count callback function!");
+  DistanceCB(DistanceIP, DistVal, IsTripCountSigned);
+  assert(DistVal && "trip count call back should return integer trip count");
+  Type *DistValType = DistVal->getType();
+  assert(DistValType->isIntegerTy() && "trip count should be integer type");
+
+  // Possibly need to cast DistVal to Int64
+  // FIXME if the integer is signed it needs to be converted to
+  // unsigned. If the tripcount is <0 it's fine to just convert
+  // it to 0.
+  if(!DistValType->isIntegerTy(64)) {
+    Builder.SetInsertPoint(LRegDistanceBB->getTerminator());
+    DistVal = Builder.CreateIntCast(
+     DistVal, Int64, /*IsTripCountSigned*/ false, DistVal->getName()+".casted");
+  }
+
+  LLVM_DEBUG(dbgs() << "After DistanceCB: " << *LRegDistanceBB << "\n");
+  LLVM_DEBUG(dbgs() << "Trip count variable: " << *DistVal << "\n");
+
+  // Create the virtual iteration variable that will be pulled into
+  // the outlined function.
+  Builder.restoreIP(OuterAllocaIP);
+  AllocaInst *OMPIVAlloca = Builder.CreateAlloca(Int64, nullptr, "omp.iv.tmp");
+  LoadInst *OMPIVLoad = Builder.CreateLoad(Int64, OMPIVAlloca, "omp.iv");
+
+  // Generate the privatization allocas in the block that will become the entry
+  // of the outlined function.
+  Builder.SetInsertPoint(PRegEntryBB->getTerminator());
+  InsertPointTy InnerAllocaIP = Builder.saveIP();
+
+  // Use omp.iv in the outlined region. Cast it if needed.
+  Instruction *OMPIV;
+  if(!DistValType->isIntegerTy(64)) {
+    // Cast omp.iv to the same type as the trip count.
+    // If the cast is needed, keep it in the outlined region
+    OMPIV = dyn_cast<Instruction>(
+      Builder.CreateTrunc(OMPIVLoad, DistValType, "omp.iv.casted"));
+  } else {
+    // If cast is unneeded, we still need to generate a fake use of
+    // omp.iv so the outlined function picks it up as the first arg
+    Instruction *OMPIVUse = dyn_cast<Instruction>(
+     Builder.CreateAdd(OMPIVLoad, Builder.getInt64(0), "omp.iv.tobedeleted"));
+    OMPIV = OMPIVLoad;
+    ToBeDeleted.push_back(OMPIVUse);
+  }
+
+  // Order matters
+  ToBeDeleted.push_back(OMPIVLoad);
+  ToBeDeleted.push_back(OMPIVAlloca);
+
+  LLVM_DEBUG(llvm::dbgs() << "omp.iv variable generated: " << *OuterFn << "\n");
+
+  LLVM_DEBUG(dbgs() << "Before body codegen: " << *OuterFn << "\n");
+  assert(BodyGenCB && "Expected body generation callback!");
+  InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
+
+  BodyGenCB(InnerAllocaIP, CodeGenIP, OMPIV);
+
+  LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n");
+
+  FunctionCallee RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_simd_51);
+
+  OutlineInfo OI;
+
+  // Adjust the finalization stack, verify the adjustment, and call the
+  // finalize function a last time to finalize values between the pre-fini
+  // block and the exit block if we left the parallel "the normal way".
+  auto FiniInfo = FinalizationStack.pop_back_val();
+  (void)FiniInfo;
+  assert(FiniInfo.DK == OMPD_parallel &&
+         "Unexpected finalization stack state!");
+
+  Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator();
+
+  InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator());
+  FiniCB(PreFiniIP);
+
+  OI.OuterAllocaBB = OuterAllocaBlock;
+  OI.EntryBB = PRegEntryBB;
+  OI.ExitBB = PRegExitBB;
+
+  SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
+  SmallVector<BasicBlock *, 32> Blocks;
+  OI.collectBlocks(ParallelRegionBlockSet, Blocks);
+
+  // Ensure a single exit node for the outlined region by creating one.
+  // We might have multiple incoming edges to the exit now due to finalizations,
+  // e.g., cancel calls that cause the control flow to leave the region.
+  BasicBlock *PRegOutlinedExitBB = PRegExitBB;
+  PRegExitBB = SplitBlock(PRegExitBB, &*PRegExitBB->getFirstInsertionPt());
+  PRegOutlinedExitBB->setName("omp.loop.outlined.exit");
+  Blocks.push_back(PRegOutlinedExitBB);
+
+  CodeExtractorAnalysisCache CEAC(*OuterFn);
+
+  CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
+                          /* AggregateArgs */ true,
+                          /* BlockFrequencyInfo */ nullptr,
+                          /* BranchProbabilityInfo */ nullptr,
+                          /* AssumptionCache */ nullptr,
+                          /* AllowVarArgs */ false,
+                          /* AllowAlloca */ true,
+                          /* AllocationBlock */ OuterAllocaBlock,
+                          /* Suffix */ ".omp_simd");
+
+  BasicBlock *CommonExit = nullptr;
+  SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands;
+  Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
+  Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands);
+
+  LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n");
+
+  auto PrivHelper = [&](Value &V) {
+    // Exclude omp.iv from aggregate
+    if (&V == OMPIVLoad) {
+      OI.ExcludeArgsFromAggregate.push_back(&V);
+      return;
+    }
+
+    // Get all uses of value that are inside of the outlined region
+    SetVector<Use *> Uses;
+    for (Use &U : V.uses())
+      if (auto *UserI = dyn_cast<Instruction>(U.getUser()))
+        if (ParallelRegionBlockSet.count(UserI->getParent()))
+          Uses.insert(&U);
+
+    Value *Inner = &V;
+
+    // If the value isn't a pointer type, store it in a pointer
+    // Unpack it inside the outlined region
+    if (!V.getType()->isPointerTy()) {
+      IRBuilder<>::InsertPointGuard Guard(Builder);
+      LLVM_DEBUG(llvm::dbgs() << "Forwarding input as pointer: " << V << "\n");
+
+      Builder.restoreIP(OuterAllocaIP);
+      Value *Ptr = Builder.CreateAlloca(
+        V.getType(), nullptr, V.getName() + ".reloaded");
+
+      // Store to stack at end of the block that currently branches to the entry
+      // block of the to-be-outlined region.
+      Builder.SetInsertPoint(
+        InsertBB, InsertBB->getTerminator()->getIterator());
+      Builder.CreateStore(&V, Ptr);
+
+      // Load back next to allocations in the to-be-outlined region.
+      Builder.restoreIP(InnerAllocaIP);
+      Inner = Builder.CreateLoad(V.getType(), Ptr);
+    }
+
+    Value *ReplacementValue = nullptr;
+    Builder.restoreIP(
+      PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue));
+    assert(ReplacementValue &&
+      "Expected copy/create callback to set replacement value!");
+    if (ReplacementValue == &V)
+      return;
+
+    for (Use *UPtr : Uses)
+      UPtr->set(ReplacementValue);
+
+  };
+
+  InnerAllocaIP = IRBuilder<>::InsertPoint(
+      OMPIV->getParent(), OMPIV->getNextNode()->getIterator());
+
+  // Reset the outer alloca insertion point to the entry of the relevant block
+  // in case it was invalidated.
+  OuterAllocaIP = IRBuilder<>::InsertPoint(
+    OuterAllocaBlock, OuterAllocaBlock->getFirstInsertionPt());
+
+  for (Value *Input : Inputs) {
+    PrivHelper(*Input);
+  }
+
+  assert(Outputs.empty() &&
+    "OpenMP outlining should not produce live-out values!");
+
+  LLVM_DEBUG(dbgs() << "After  privatization: " << *OuterFn << "\n");
+  for (auto *BB : Blocks) {
+    LLVM_DEBUG(dbgs() << " PBR: " << BB->getName() << "\n");
+  }
+
+  int NumInputs = Inputs.size()-1; // One argument is always omp.iv
+  OI.PostOutlineCB = [=](Function &OutlinedFn) {
+    OutlinedFn.addFnAttr(Attribute::NoUnwind);
+    OutlinedFn.addFnAttr(Attribute::NoRecurse);
+
+    assert(OutlinedFn.arg_size() == 2 &&
+           "Expected omp.iv & structArg as arguments");
+
+    CallInst *CI = cast<CallInst>(OutlinedFn.user_back());
+    BasicBlock *CallBlock = CI->getParent();
+    CallBlock->setName("omp_loop");
+    Builder.SetInsertPoint(CI);
+
+    Value * StructArg = CI->getArgOperand(1); // 0 should be omp.iv
+
+    Value *SimdArgs[] = {
+        Ident,
+        Builder.CreateBitCast(&OutlinedFn, LoopTaskPtr),
+        DistVal,
+        Builder.CreateCast(Instruction::BitCast, StructArg, Int8PtrPtr),
+        Builder.getInt32(NumInputs)};
+
+    SmallVector<Value *, 16> RealArgs;
+    RealArgs.append(std::begin(SimdArgs), std::end(SimdArgs));
+
+    CallInst *Simd51Call = Builder.CreateCall(RTLFn, RealArgs);
+
+    LLVM_DEBUG(dbgs() << "With runtime call placed: " << *Builder.GetInsertBlock()->getParent() << "\n");
+
+    InsertPointTy ExitIP(PRegExitBB, PRegExitBB->end());
+
+    CI->eraseFromParent();
+
+    for (Instruction *I : ToBeDeleted)
+      I->eraseFromParent();
+
+  };
+
+  addOutlineInfo(std::move(OI));
+
+  InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
+  UI->eraseFromParent();
+
+  return AfterIP;
+
+}
+
 void OpenMPIRBuilder::emitFlush(const LocationDescription &Loc) {
   // Build call void __kmpc_flush(ident_t *loc)
   uint32_t SrcLocStrSize;
Index: llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
===================================================================
--- llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -119,6 +119,8 @@
 __OMP_FUNCTION_TYPE(InterWarpCopy, false, Void, VoidPtr, Int32)
 __OMP_FUNCTION_TYPE(GlobalList, false, Void, VoidPtr, Int32, VoidPtr)
 
+__OMP_FUNCTION_TYPE(LoopTask, false, Void, Int64, VoidPtrPtr)
+
 #undef __OMP_FUNCTION_TYPE
 #undef OMP_FUNCTION_TYPE
 
@@ -484,6 +486,8 @@
 __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,)
 __OMP_RTL(__kmpc_syncwarp, false, Void, Int64)
 
+__OMP_RTL(__kmpc_simd_51, false, Void, IdentPtr, LoopTaskPtr, Int64, VoidPtrPtr, Int32)
+
 __OMP_RTL(__last, false, Void, )
 
 #undef __OMP_RTL
Index: llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
===================================================================
--- llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -506,6 +506,16 @@
   using BodyGenCallbackTy =
       function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
 
+  using LoopBodyCallbackTy =
+      function_ref<void(
+        InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *IterationNum
+      )>;
+
+  using TripCountCallbackTy =
+      function_ref<
+        void(InsertPointTy CodeGenIP, Value *&TripCount, bool &Signed)
+      >;
+
   // This is created primarily for sections construct as llvm::function_ref
   // (BodyGenCallbackTy) is not storable (as described in the comments of
   // function_ref class - function_ref contains non-ownable reference
@@ -605,6 +615,15 @@
                  Value *NumThreads, omp::ProcBindKind ProcBind,
                  bool IsCancellable);
 
+  IRBuilder<>::InsertPoint
+  createSimdLoop(const LocationDescription &Loc, InsertPointTy AllocaIP,
+                 LoopBodyCallbackTy BodyGenCB,
+                 TripCountCallbackTy DistanceCB,
+                 PrivatizeCallbackTy PrivCB,
+                 FinalizeCallbackTy FiniCB,
+                 bool SPMDMode);
+
+
   /// Generator for the control flow structure of an OpenMP canonical loop.
   ///
   /// This generator operates on the logical iteration space of the loop, i.e.
Index: clang/lib/Parse/ParseOpenMP.cpp
===================================================================
--- clang/lib/Parse/ParseOpenMP.cpp
+++ clang/lib/Parse/ParseOpenMP.cpp
@@ -2953,7 +2953,7 @@
         Sema::CompoundScopeRAII Scope(Actions);
         AssociatedStmt = ParseStatement();
 
-        if (AssociatedStmt.isUsable() && isOpenMPLoopDirective(DKind) &&
+        if (AssociatedStmt.isUsable() && isOpenMPLoopDirective(DKind)  &&
             getLangOpts().OpenMPIRBuilder)
           AssociatedStmt = Actions.ActOnOpenMPLoopnest(AssociatedStmt.get());
       }
Index: clang/lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.cpp
+++ clang/lib/CodeGen/CodeGenFunction.cpp
@@ -94,7 +94,8 @@
   // seems to be a reasonable spot. We do it here, as opposed to the deletion
   // time of the CodeGenModule, because we have to ensure the IR has not yet
   // been "emitted" to the outside, thus, modifications are still sensible.
-  if (CGM.getLangOpts().OpenMPIRBuilder && CurFn)
+  //if (CGM.getLangOpts().OpenMPIRBuilder && CurFn)
+  if (CurFn)
     CGM.getOpenMPRuntime().getOMPBuilder().finalize(CurFn);
 }
 
Index: clang/lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -2664,59 +2664,102 @@
 }
 
 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
-  bool UseOMPIRBuilder =
-      CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S);
+  bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder;
+  //bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIsDevice;
   if (UseOMPIRBuilder) {
-    auto &&CodeGenIRBuilder = [this, &S, UseOMPIRBuilder](CodeGenFunction &CGF,
-                                                          PrePostActionTy &) {
-      // Use the OpenMPIRBuilder if enabled.
-      if (UseOMPIRBuilder) {
-        llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars =
-            GetAlignedMapping(S, CGF);
-        // Emit the associated statement and get its loop representation.
-        const Stmt *Inner = S.getRawStmt();
-        llvm::CanonicalLoopInfo *CLI =
-            EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
-
-        llvm::OpenMPIRBuilder &OMPBuilder =
-            CGM.getOpenMPRuntime().getOMPBuilder();
-        // Add SIMD specific metadata
-        llvm::ConstantInt *Simdlen = nullptr;
-        if (const auto *C = S.getSingleClause<OMPSimdlenClause>()) {
-          RValue Len =
-              this->EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
-                                /*ignoreResult=*/true);
-          auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
-          Simdlen = Val;
-        }
-        llvm::ConstantInt *Safelen = nullptr;
-        if (const auto *C = S.getSingleClause<OMPSafelenClause>()) {
-          RValue Len =
-              this->EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
-                                /*ignoreResult=*/true);
-          auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
-          Safelen = Val;
-        }
-        llvm::omp::OrderKind Order = llvm::omp::OrderKind::OMP_ORDER_unknown;
-        if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
-          if (C->getKind() == OpenMPOrderClauseKind ::OMPC_ORDER_concurrent) {
-            Order = llvm::omp::OrderKind::OMP_ORDER_concurrent;
+    auto *CS = dyn_cast<CapturedStmt>(S.getAssociatedStmt());
+    auto *CL = dyn_cast<OMPCanonicalLoop>(CS->getCapturedStmt());
+    CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
+
+    CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
+    llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
+      AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
+
+    const auto *For = dyn_cast<ForStmt>(CL->getLoopStmt());
+    if(const Stmt *InitStmt = For->getInit())
+      EmitStmt(InitStmt);
+    const DeclRefExpr *LoopVarRef = CL->getLoopVarRef();
+    LValue LCVal = EmitLValue(LoopVarRef);
+    Address LoopVarAddress = LCVal.getAddress(*this);
+    llvm::AllocaInst *LoopVar = dyn_cast<llvm::AllocaInst>(LoopVarAddress.getPointer());
+
+    llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
+
+    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+
+    // FIXME check if trip count is signed
+    auto DistanceCB = [this, CL, LoopVar](InsertPointTy CodeGenIP, llvm::Value *&TripCount, bool &Signed) -> void {
+      Builder.restoreIP(CodeGenIP);
+
+      const CapturedStmt *DistanceFunc = CL->getDistanceFunc();
+      EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(*this, DistanceFunc);
+
+      QualType LogicalTy = DistanceFunc->getCapturedDecl()
+                           ->getParam(0)
+                           ->getType()
+                           .getNonReferenceType();
+      Address CountAddr = CreateMemTemp(LogicalTy, ".count.addr");
+      emitCapturedStmtCall(*this, DistanceClosure, {CountAddr.getPointer()});
+      TripCount = Builder.CreateLoad(CountAddr, ".count");
+
+      return;
+    };
+
+    auto FiniCB = [this](InsertPointTy IP) {
+      OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
+    };
+
+    auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+                     llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
+      ReplVal = &Val;
+      return CodeGenIP;
+    };
+
+    const Stmt *loopBody = S.getBody();
+    auto BodyGenCB = [loopBody, this, CL, LoopVar, &S]
+                     (InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+                      llvm::Value *Virtual) {
+      llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
+
+      OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
+          *this,
+          loopBody,
+          AllocaIP,
+          CodeGenIP,
+          "simd");
+
+      Builder.restoreIP(AllocaIP);
+      llvm::AllocaInst *NewLoopVar =
+            Builder.CreateAlloca(LoopVar->getAllocatedType(), LoopVar->getAddressSpace(),
+                                 LoopVar->getArraySize(), LoopVar->getName()+".loopvar");
+
+      for(llvm::User *U : LoopVar->users()) {
+        if(auto I = dyn_cast<llvm::Instruction>(U)) {
+          if(I->getParent() == CodeGenIPBB) {
+            U->replaceUsesOfWith(LoopVar, NewLoopVar);
           }
         }
-        // Add simd metadata to the collapsed loop. Do not generate
-        // another loop for if clause. Support for if clause is done earlier.
-        OMPBuilder.applySimd(CLI, AlignedVars,
-                             /*IfCond*/ nullptr, Order, Simdlen, Safelen);
-        return;
       }
+
+      const CapturedStmt *LoopVarFunc = CL->getLoopVarFunc();
+      EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(*this, LoopVarFunc);
+      Builder.SetInsertPoint(CodeGenIPBB, CodeGenIPBB->begin());
+      emitCapturedStmtCall(*this, LoopVarClosure,
+                           {NewLoopVar, Virtual});
+
     };
-    {
-      auto LPCRegion =
-          CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
-      OMPLexicalScope Scope(*this, S, OMPD_unknown);
-      CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd,
-                                                  CodeGenIRBuilder);
-    }
+
+    Builder.restoreIP(
+      OMPBuilder.createSimdLoop(
+        Builder,
+        AllocaIP,
+        BodyGenCB,
+        DistanceCB,
+        PrivCB,
+        FiniCB,
+        /*SPMD*/ true
+    ));
+
     return;
   }
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to