ABataev created this revision.
ABataev added a reviewer: jdoerfert.
Herald added subscribers: guansong, yaxunl.
ABataev requested review of this revision.
Herald added a subscriber: sstefan1.
Herald added a project: clang.

Compiler supports generic code emission, but in some cases may
erroneously consider the function context as SPMD context or Non-SPMD
parallel context. Need to clear/restore context upon entrance/exit
to/from function to avoid incorrect codegen.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D99762

Files:
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
  clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
  clang/test/OpenMP/declare_target_codegen_globalization.cpp
  clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c
  clang/test/OpenMP/remarks_parallel_in_target_state_machine.c

Index: clang/test/OpenMP/remarks_parallel_in_target_state_machine.c
===================================================================
--- clang/test/OpenMP/remarks_parallel_in_target_state_machine.c
+++ clang/test/OpenMP/remarks_parallel_in_target_state_machine.c
@@ -44,4 +44,4 @@
 }
 
 // expected-remark@* {{OpenMP runtime call __kmpc_global_thread_num moved to}}
-// expected-remark@* {{OpenMP runtime call __kmpc_global_thread_num deduplicated}}
+// expected-remark@* 2 {{OpenMP runtime call __kmpc_global_thread_num deduplicated}}
Index: clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c
===================================================================
--- clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c
+++ clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c
@@ -98,5 +98,5 @@
   }
 }
 
-// all-remark@* 3 {{OpenMP runtime call __kmpc_global_thread_num moved to}}
-// all-remark@* 3 {{OpenMP runtime call __kmpc_global_thread_num deduplicated}}
+// all-remark@* 5 {{OpenMP runtime call __kmpc_global_thread_num moved to}}
+// all-remark@* 12 {{OpenMP runtime call __kmpc_global_thread_num deduplicated}}
Index: clang/test/OpenMP/declare_target_codegen_globalization.cpp
===================================================================
--- clang/test/OpenMP/declare_target_codegen_globalization.cpp
+++ clang/test/OpenMP/declare_target_codegen_globalization.cpp
@@ -37,11 +37,14 @@
 // CHECK: define {{.*}}[[BAR]]()
 // CHECK: alloca i32,
 // CHECK: [[A_LOCAL_ADDR:%.+]] = alloca i32,
+// CHECK: [[PL:%.+]] = call i16 @__kmpc_parallel_level(
+// CHECK: [[IS_IN_PARALLEL:%.+]] = icmp eq i16 [[PL]], 0
 // CHECK: [[RES:%.+]] = call i8 @__kmpc_is_spmd_exec_mode()
 // CHECK: [[IS_SPMD:%.+]] = icmp ne i8 [[RES]], 0
 // CHECK: br i1 [[IS_SPMD]], label
 // CHECK: br label
-// CHECK: [[RES:%.+]] = call i8* @__kmpc_data_sharing_coalesced_push_stack(i64 128, i16 0)
+// CHECK: [[SZ:%.+]] = select i1 [[IS_IN_PARALLEL]], i64 4, i64 128
+// CHECK: [[RES:%.+]] = call i8* @__kmpc_data_sharing_coalesced_push_stack(i64 [[SZ]], i16 0)
 // CHECK: [[GLOBALS:%.+]] = bitcast i8* [[RES]] to [[GLOBAL_ST:%.+]]*
 // CHECK: br label
 // CHECK: [[ITEMS:%.+]] = phi [[GLOBAL_ST]]* [ null, {{.+}} ], [ [[GLOBALS]], {{.+}} ]
@@ -49,7 +52,9 @@
 // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
 // CHECK: [[LID:%.+]] = and i32 [[TID]], 31
 // CHECK: [[A_GLOBAL_ADDR:%.+]] = getelementptr inbounds [32 x i32], [32 x i32]* [[A_ADDR]], i32 0, i32 [[LID]]
-// CHECK: [[A_ADDR:%.+]] = select i1 [[IS_SPMD]], i32* [[A_LOCAL_ADDR]], i32* [[A_GLOBAL_ADDR]]
+// CHECK: [[A_GLOBAL_PARALLEL_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
+// CHECK: [[A_PARALLEL_ADDR:%.+]] = select i1 [[IS_IN_PARALLEL]], i32* [[A_GLOBAL_PARALLEL_ADDR]], i32* [[A_GLOBAL_ADDR]]
+// CHECK: [[A_ADDR:%.+]] = select i1 [[IS_SPMD]], i32* [[A_LOCAL_ADDR]], i32* [[A_PARALLEL_ADDR]]
 // CHECK: call {{.*}}[[FOO]](i32* nonnull align {{[0-9]+}} dereferenceable{{.*}} [[A_ADDR]])
 // CHECK: br i1 [[IS_SPMD]], label
 // CHECK: [[BC:%.+]] = bitcast [[GLOBAL_ST]]* [[ITEMS]] to i8*
Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
+++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
@@ -427,6 +427,20 @@
   /// true if we're definitely in the parallel region.
   bool IsInParallelRegion = false;
 
+  struct StateMode {
+    ExecutionMode SavedExecutionMode = EM_Unknown;
+    bool SavedIsInTargetMasterThreadRegion = false;
+    bool SavedIsInTTDRegion = false;
+    bool SavedIsInParallelRegion = false;
+    StateMode(ExecutionMode SavedExecutionMode,
+              bool SavedIsInTargetMasterThreadRegion, bool SavedIsInTTDRegion,
+              bool SavedIsInParallelRegion)
+        : SavedExecutionMode(SavedExecutionMode),
+          SavedIsInTargetMasterThreadRegion(SavedIsInTargetMasterThreadRegion),
+          SavedIsInTTDRegion(SavedIsInTTDRegion),
+          SavedIsInParallelRegion(SavedIsInParallelRegion) {}
+  };
+  llvm::DenseMap<CanonicalDeclPtr<const Decl>, StateMode> SavedExecutionModes;
   /// Map between an outlined function and its wrapper.
   llvm::DenseMap<llvm::Function *, llvm::Function *> WrapperFunctionsMap;
 
Index: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -4311,9 +4311,6 @@
 
 void CGOpenMPRuntimeGPU::emitFunctionProlog(CodeGenFunction &CGF,
                                               const Decl *D) {
-  if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic)
-    return;
-
   assert(D && "Expected function or captured|block decl.");
   assert(FunctionGlobalizedDecls.count(CGF.CurFn) == 0 &&
          "Function is registered already.");
@@ -4332,6 +4329,18 @@
         getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD)
       return;
   }
+  if (!NeedToDelayGlobalization) {
+    SavedExecutionModes.try_emplace(D, CurrentExecutionMode,
+                                    IsInTargetMasterThreadRegion, IsInTTDRegion,
+                                    IsInParallelRegion);
+    CurrentExecutionMode = EM_Unknown;
+    IsInTargetMasterThreadRegion = false;
+    IsInTTDRegion = false;
+    IsInParallelRegion = false;
+  }
+  if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic)
+    return;
+
   if (!Body)
     return;
   CheckVarsEscapingDeclContext VarChecker(CGF, TeamAndReductions.second);
@@ -4457,6 +4466,14 @@
 }
 
 void CGOpenMPRuntimeGPU::functionFinished(CodeGenFunction &CGF) {
+  auto I = SavedExecutionModes.find(CGF.CurCodeDecl);
+  if (I != SavedExecutionModes.end()) {
+    CurrentExecutionMode = I->second.SavedExecutionMode;
+    IsInTargetMasterThreadRegion = I->second.SavedIsInTargetMasterThreadRegion;
+    IsInTTDRegion = I->second.SavedIsInTTDRegion;
+    IsInParallelRegion = I->second.SavedIsInParallelRegion;
+    SavedExecutionModes.erase(CGF.CurCodeDecl);
+  }
   FunctionGlobalizedDecls.erase(CGF.CurFn);
   CGOpenMPRuntime::functionFinished(CGF);
 }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
  • [PATCH] D99762: [OPENMP]Fix ... Alexey Bataev via Phabricator via cfe-commits

Reply via email to