tianshilei1992 created this revision.
Herald added subscribers: cfe-commits, guansong, yaxunl.
Herald added a reviewer: jdoerfert.
Herald added a project: clang.

Previously for nowait target, CG emitted a function call to
`__tgt_target_nowait`, etc. However, in OpenMP RTL, these functions just
directly call the no-nowait version, which means nowait is not working as
expected.

OpenMP specification says a target is acutally a target task, which is an untied
and detachable task. It is natural to go to the direction that generates a task
for a nowait target. However, OpenMP task has a problem that it must be within
to a parallel region; otherwise the task will be executed immediately. As a
result, if we directly wrap to a regular task, the nowait target outside of a
parallel region is still a synchronous version.

In D77609 <https://reviews.llvm.org/D77609>, I added the support for unshackled 
task in OpenMP RTL. Basically,
unshackled task is a task that is not bound to any parallel region. So all
nowait target will be tranformed into an unshackled task. In order to
distinguish from regular task, a new flag bit is set for unshackled task. This
flag will be used by RTL for later process.

Signed-off-by: Shilei Tian <tianshilei1...@gmail.com>


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D78075

Files:
  clang/lib/CodeGen/CGOpenMPRuntime.cpp
  clang/lib/CodeGen/CGOpenMPRuntime.h
  clang/lib/CodeGen/CGStmtOpenMP.cpp

Index: clang/lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -3615,6 +3615,8 @@
   OMPTaskDataTy Data;
   // The task is not final.
   Data.Final.setInt(/*IntVal=*/false);
+  // The task is unshackled
+  Data.Unshackled = true;
   // Get list of firstprivate variables.
   for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
     auto IRef = C->varlist_begin();
Index: clang/lib/CodeGen/CGOpenMPRuntime.h
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntime.h
+++ clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -118,6 +118,7 @@
   unsigned NumberOfParts = 0;
   bool Tied = true;
   bool Nogroup = false;
+  bool Unshackled = false;
 };
 
 /// Class intended to support codegen of all kind of the reduction clauses.
Index: clang/lib/CodeGen/CGOpenMPRuntime.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -5045,6 +5045,7 @@
     DestructorsFlag = 0x8,
     PriorityFlag = 0x20,
     DetachableFlag = 0x40,
+    UnshackledFlag = 0x80,
   };
   unsigned Flags = Data.Tied ? TiedFlag : 0;
   bool NeedsCleanup = false;
@@ -5057,6 +5058,8 @@
     Flags = Flags | PriorityFlag;
   if (D.hasClausesOfKind<OMPDetachClause>())
     Flags = Flags | DetachableFlag;
+  if (Data.Unshackled)
+    Flags |= UnshackledFlag;
   llvm::Value *TaskFlags =
       Data.Final.getPointer()
           ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
@@ -9789,6 +9792,7 @@
   assert(OutlinedFn && "Invalid outlined function!");
 
   const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
+  const bool HasNowaitClause = D.hasClausesOfKind<OMPNowaitClause>();
   llvm::SmallVector<llvm::Value *, 16> CapturedVars;
   const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
   auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
@@ -9801,7 +9805,8 @@
   llvm::Value *MapTypesArray = nullptr;
   // Fill up the pointer arrays and transfer execution to the device.
   auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
-                    &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
+                    &MapTypesArray, &CS, RequiresOuterTask, HasNowaitClause,
+                    &CapturedVars,
                     SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
     if (Device.getInt() == OMPC_DEVICE_ancestor) {
       // Reverse offloading is not supported, so just execute on the host.
@@ -9851,7 +9856,6 @@
     // Emit tripcount for the target loop-based directive.
     emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
 
-    bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
     // The target region is an outlined function launched by the runtime
     // via calls __tgt_target() or __tgt_target_teams().
     //
@@ -9897,9 +9901,7 @@
                                        NumTeams,
                                        NumThreads};
       Return = CGF.EmitRuntimeCall(
-          createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
-                                          : OMPRTL__tgt_target_teams),
-          OffloadingArgs);
+          createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs);
     } else {
       llvm::Value *OffloadingArgs[] = {DeviceID,
                                        OutlinedFnID,
@@ -9908,10 +9910,8 @@
                                        InputInfo.PointersArray.getPointer(),
                                        InputInfo.SizesArray.getPointer(),
                                        MapTypesArray};
-      Return = CGF.EmitRuntimeCall(
-          createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
-                                          : OMPRTL__tgt_target),
-          OffloadingArgs);
+      Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target),
+                                   OffloadingArgs);
     }
 
     // Check the error code and execute the host version if required.
@@ -9923,7 +9923,7 @@
     CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
 
     CGF.EmitBlock(OffloadFailedBlock);
-    if (RequiresOuterTask) {
+    if (RequiresOuterTask || HasNowaitClause) {
       CapturedVars.clear();
       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
     }
@@ -9934,10 +9934,9 @@
   };
 
   // Notify that the host version must be executed.
-  auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
-                    RequiresOuterTask](CodeGenFunction &CGF,
-                                       PrePostActionTy &) {
-    if (RequiresOuterTask) {
+  auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, RequiresOuterTask,
+                    HasNowaitClause](CodeGenFunction &CGF, PrePostActionTy &) {
+    if (RequiresOuterTask || HasNowaitClause) {
       CapturedVars.clear();
       CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
     }
@@ -9945,7 +9944,7 @@
   };
 
   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
-                          &CapturedVars, RequiresOuterTask,
+                          &CapturedVars, RequiresOuterTask, HasNowaitClause,
                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
     // Fill up the arrays with all the captured variables.
     MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
@@ -10035,15 +10034,16 @@
         Address(Info.PointersArray, CGM.getPointerAlign());
     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
     MapTypesArray = Info.MapTypesArray;
-    if (RequiresOuterTask)
+    if (RequiresOuterTask || HasNowaitClause)
       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
     else
       emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
   };
 
-  auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
-                             CodeGenFunction &CGF, PrePostActionTy &) {
-    if (RequiresOuterTask) {
+  auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask,
+                          HasNowaitClause](CodeGenFunction &CGF,
+                                           PrePostActionTy &) {
+    if (RequiresOuterTask || HasNowaitClause) {
       CodeGenFunction::OMPTargetDataInfo InputInfo;
       CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
     } else {
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to