ABataev updated this revision to Diff 338107.
ABataev added a comment.

Rebase


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99432/new/

https://reviews.llvm.org/D99432

Files:
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
  clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp

Index: clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
===================================================================
--- clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
+++ clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
@@ -28,16 +28,19 @@
 int main(int argc, char **argv) {
   int b[10], c[10], d[10];
 #pragma omp target teams map(tofrom:a)
+  {
+    double escaped = 0;
 #pragma omp distribute parallel for firstprivate(b) lastprivate(c) if(a)
   for (int i= 0; i < argc; ++i)
-    a = foo(&i) + foo(&a) + foo(&b[i]) + foo(&c[i]) + foo(&d[i]);
+    a = foo(&i) + foo(&a) + foo(&b[i]) + foo(&c[i]) + foo(&d[i]) + escaped;
+  }
   return 0;
 }
 
 // SEQ: [[MEM_TY:%.+]] = type { [128 x i8] }
 // SEQ-DAG: [[SHARED_GLOBAL_RD:@.+]] = weak addrspace(3) global [[MEM_TY]] undef
 // SEQ-DAG: [[KERNEL_PTR:@.+]] = internal addrspace(3) global i8* undef
-// SEQ-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 40
+// SEQ-DAG: [[KERNEL_SIZE:@.+]] = internal unnamed_addr constant i{{64|32}} 48
 // SEQ-DAG: [[KERNEL_SHARED:@.+]] = internal unnamed_addr constant i16 1
 // CHECK-DAG: @__omp_offloading_{{.*}}_main_[[LINE:l.+]]_exec_mode = weak constant i8 0
 
@@ -47,9 +50,10 @@
 // SEQ: call void @__kmpc_get_team_static_memory(i16 1, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([[MEM_TY]], [[MEM_TY]] addrspace(3)* [[SHARED_GLOBAL_RD]], i32 0, i32 0, i32 0) to i8*), i{{64|32}} [[SIZE]], i16 [[SHARED]], i8** addrspacecast (i8* addrspace(3)* [[KERNEL_PTR]] to i8**))
 // SEQ: [[PTR:%.+]] = load i8*, i8* addrspace(3)* [[KERNEL_PTR]],
 // SEQ: [[GEP:%.+]] = getelementptr inbounds i8, i8* [[PTR]], i{{64|32}} 0
-// PAR: [[GEP:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{32|64}} 40, i16 1)
+// PAR: [[GEP:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{32|64}} 48, i16 1)
 // CHECK: [[STACK:%.+]] = bitcast i8* [[GEP]] to %struct._globalized_locals_ty*
-// CHECK: getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[STACK]], i{{32|64}} 0, i{{32|64}} 0
+// CHECK-DAG: getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[STACK]], i{{32|64}} 0, i{{32|64}} 1
+// CHECK-DAG: getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[STACK]], i{{32|64}} 0, i{{32|64}} 0
 // CHECK-NOT: getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[STACK]],
 // CHECK: call void @__kmpc_for_static_init_4(
 
Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -229,6 +229,7 @@
   llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
   bool AllEscaped = false;
   bool IsForCombinedParallelRegion = false;
+  bool IsInSPMDKernel = false;
 
   void markAsEscaped(const ValueDecl *VD) {
     // Do not globalize declare target variables.
@@ -242,6 +243,9 @@
     // Variables captured by value must be globalized.
     if (auto *CSI = CGF.CapturedStmtInfo) {
       if (const FieldDecl *FD = CSI->lookup(cast<VarDecl>(VD))) {
+        // Do not globalize captured vars in SPMD mode.
+        if (IsInSPMDKernel)
+          return;
         // Check if need to capture the variable that was already captured by
         // value in the outer region.
         if (!IsForCombinedParallelRegion) {
@@ -351,9 +355,10 @@
 
 public:
   CheckVarsEscapingDeclContext(CodeGenFunction &CGF,
-                               ArrayRef<const ValueDecl *> TeamsReductions)
-      : CGF(CGF), EscapedDecls(TeamsReductions.begin(), TeamsReductions.end()) {
-  }
+                               ArrayRef<const ValueDecl *> TeamsReductions,
+                               bool IsInSPMDKernel = false)
+      : CGF(CGF), EscapedDecls(TeamsReductions.begin(), TeamsReductions.end()),
+        IsInSPMDKernel(IsInSPMDKernel) {}
   virtual ~CheckVarsEscapingDeclContext() = default;
   void VisitDeclStmt(const DeclStmt *S) {
     if (!S)
@@ -1631,65 +1636,30 @@
     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
   SourceLocation Loc = D.getBeginLoc();
 
-  const RecordDecl *GlobalizedRD = nullptr;
-  llvm::SmallVector<const ValueDecl *, 4> LastPrivatesReductions;
-  llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
-  unsigned WarpSize = CGM.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
-  // Globalize team reductions variable unconditionally in all modes.
-  if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD)
-    getTeamsReductionVars(CGM.getContext(), D, LastPrivatesReductions);
-  if (getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD) {
-    getDistributeLastprivateVars(CGM.getContext(), D, LastPrivatesReductions);
-    if (!LastPrivatesReductions.empty()) {
-      GlobalizedRD = ::buildRecordForGlobalizedVars(
-          CGM.getContext(), llvm::None, LastPrivatesReductions,
-          MappedDeclsFields, WarpSize);
-    }
-  } else if (!LastPrivatesReductions.empty()) {
-    assert(!TeamAndReductions.first &&
-           "Previous team declaration is not expected.");
-    TeamAndReductions.first = D.getCapturedStmt(OMPD_teams)->getCapturedDecl();
-    std::swap(TeamAndReductions.second, LastPrivatesReductions);
-  }
+  assert(!TeamAndReductions.first &&
+         "Previous team declaration is not expected.");
+  if (getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD)
+    getDistributeLastprivateVars(CGM.getContext(), D, TeamAndReductions.second);
+  else
+    getTeamsReductionVars(CGM.getContext(), D, TeamAndReductions.second);
+  TeamAndReductions.first = D.getCapturedStmt(OMPD_teams)->getCapturedDecl();
 
   // Emit target region as a standalone region.
   class NVPTXPrePostActionTy : public PrePostActionTy {
     SourceLocation &Loc;
-    const RecordDecl *GlobalizedRD;
-    llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
-        &MappedDeclsFields;
 
   public:
-    NVPTXPrePostActionTy(
-        SourceLocation &Loc, const RecordDecl *GlobalizedRD,
-        llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
-            &MappedDeclsFields)
-        : Loc(Loc), GlobalizedRD(GlobalizedRD),
-          MappedDeclsFields(MappedDeclsFields) {}
+    NVPTXPrePostActionTy(SourceLocation &Loc) : Loc(Loc) {}
     void Enter(CodeGenFunction &CGF) override {
       auto &Rt =
           static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
-      if (GlobalizedRD) {
-        auto I = Rt.FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first;
-        I->getSecond().GlobalRecord = GlobalizedRD;
-        I->getSecond().MappedParams =
-            std::make_unique<CodeGenFunction::OMPMapVars>();
-        DeclToAddrMapTy &Data = I->getSecond().LocalVarData;
-        for (const auto &Pair : MappedDeclsFields) {
-          assert(Pair.getFirst()->isCanonicalDecl() &&
-                 "Expected canonical declaration");
-          Data.insert(std::make_pair(Pair.getFirst(),
-                                     MappedVarData(Pair.getSecond(),
-                                                   /*IsOnePerTeam=*/true)));
-        }
-      }
       Rt.emitGenericVarsProlog(CGF, Loc);
     }
     void Exit(CodeGenFunction &CGF) override {
       static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime())
           .emitGenericVarsEpilog(CGF);
     }
-  } Action(Loc, GlobalizedRD, MappedDeclsFields);
+  } Action(Loc);
   CodeGen.setAction(Action);
   llvm::Function *OutlinedFun = CGOpenMPRuntime::emitTeamsOutlinedFunction(
       D, ThreadIDVar, InnermostKind, CodeGen);
@@ -4311,7 +4281,8 @@
 
 void CGOpenMPRuntimeGPU::emitFunctionProlog(CodeGenFunction &CGF,
                                               const Decl *D) {
-  if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic)
+  if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic &&
+      getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD)
     return;
 
   assert(D && "Expected function or captured|block decl.");
@@ -4328,13 +4299,16 @@
   } else if (const auto *CD = dyn_cast<CapturedDecl>(D)) {
     Body = CD->getBody();
     NeedToDelayGlobalization = CGF.CapturedStmtInfo->getKind() == CR_OpenMP;
-    if (NeedToDelayGlobalization &&
+    if (NeedToDelayGlobalization && !IsInTTDRegion &&
         getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD)
       return;
   }
   if (!Body)
     return;
-  CheckVarsEscapingDeclContext VarChecker(CGF, TeamAndReductions.second);
+  bool IsInSPMDKernel = NeedToDelayGlobalization &&
+                        getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD;
+  CheckVarsEscapingDeclContext VarChecker(CGF, TeamAndReductions.second,
+                                          IsInSPMDKernel);
   VarChecker.Visit(Body);
   const RecordDecl *GlobalizedVarsRecord =
       VarChecker.getGlobalizedRecord(IsInTTDRegion);
@@ -4359,6 +4333,8 @@
     const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);
     Data.insert(std::make_pair(VD, MappedVarData(FD, IsInTTDRegion)));
   }
+  if (IsInSPMDKernel)
+    return;
   if (!IsInTTDRegion && !NeedToDelayGlobalization && !IsInParallelRegion) {
     CheckVarsEscapingDeclContext VarChecker(CGF, llvm::None);
     VarChecker.Visit(Body);
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to