tianshilei1992 created this revision. Herald added subscribers: cfe-commits, guansong, yaxunl. Herald added a reviewer: jdoerfert. Herald added a project: clang.
Previously for nowait target, CG emitted a function call to `__tgt_target_nowait`, etc. However, in OpenMP RTL, these functions just directly call the no-nowait version, which means nowait is not working as expected. OpenMP specification says a target is acutally a target task, which is an untied and detachable task. It is natural to go to the direction that generates a task for a nowait target. However, OpenMP task has a problem that it must be within to a parallel region; otherwise the task will be executed immediately. As a result, if we directly wrap to a regular task, the nowait target outside of a parallel region is still a synchronous version. In D77609 <https://reviews.llvm.org/D77609>, I added the support for unshackled task in OpenMP RTL. Basically, unshackled task is a task that is not bound to any parallel region. So all nowait target will be tranformed into an unshackled task. In order to distinguish from regular task, a new flag bit is set for unshackled task. This flag will be used by RTL for later process. Signed-off-by: Shilei Tian <tianshilei1...@gmail.com> Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D78075 Files: clang/lib/CodeGen/CGOpenMPRuntime.cpp clang/lib/CodeGen/CGOpenMPRuntime.h clang/lib/CodeGen/CGStmtOpenMP.cpp
Index: clang/lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- clang/lib/CodeGen/CGStmtOpenMP.cpp +++ clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -3615,6 +3615,8 @@ OMPTaskDataTy Data; // The task is not final. Data.Final.setInt(/*IntVal=*/false); + // The task is unshackled + Data.Unshackled = true; // Get list of firstprivate variables. for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { auto IRef = C->varlist_begin(); Index: clang/lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntime.h +++ clang/lib/CodeGen/CGOpenMPRuntime.h @@ -118,6 +118,7 @@ unsigned NumberOfParts = 0; bool Tied = true; bool Nogroup = false; + bool Unshackled = false; }; /// Class intended to support codegen of all kind of the reduction clauses. Index: clang/lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -5045,6 +5045,7 @@ DestructorsFlag = 0x8, PriorityFlag = 0x20, DetachableFlag = 0x40, + UnshackledFlag = 0x80, }; unsigned Flags = Data.Tied ? TiedFlag : 0; bool NeedsCleanup = false; @@ -5057,6 +5058,8 @@ Flags = Flags | PriorityFlag; if (D.hasClausesOfKind<OMPDetachClause>()) Flags = Flags | DetachableFlag; + if (Data.Unshackled) + Flags |= UnshackledFlag; llvm::Value *TaskFlags = Data.Final.getPointer() ? CGF.Builder.CreateSelect(Data.Final.getPointer(), @@ -9789,6 +9792,7 @@ assert(OutlinedFn && "Invalid outlined function!"); const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>(); + const bool HasNowaitClause = D.hasClausesOfKind<OMPNowaitClause>(); llvm::SmallVector<llvm::Value *, 16> CapturedVars; const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, @@ -9801,7 +9805,8 @@ llvm::Value *MapTypesArray = nullptr; // Fill up the pointer arrays and transfer execution to the device. auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, - &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars, + &MapTypesArray, &CS, RequiresOuterTask, HasNowaitClause, + &CapturedVars, SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { if (Device.getInt() == OMPC_DEVICE_ancestor) { // Reverse offloading is not supported, so just execute on the host. @@ -9851,7 +9856,6 @@ // Emit tripcount for the target loop-based directive. emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); - bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); // The target region is an outlined function launched by the runtime // via calls __tgt_target() or __tgt_target_teams(). // @@ -9897,9 +9901,7 @@ NumTeams, NumThreads}; Return = CGF.EmitRuntimeCall( - createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait - : OMPRTL__tgt_target_teams), - OffloadingArgs); + createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs); } else { llvm::Value *OffloadingArgs[] = {DeviceID, OutlinedFnID, @@ -9908,10 +9910,8 @@ InputInfo.PointersArray.getPointer(), InputInfo.SizesArray.getPointer(), MapTypesArray}; - Return = CGF.EmitRuntimeCall( - createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait - : OMPRTL__tgt_target), - OffloadingArgs); + Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target), + OffloadingArgs); } // Check the error code and execute the host version if required. @@ -9923,7 +9923,7 @@ CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); CGF.EmitBlock(OffloadFailedBlock); - if (RequiresOuterTask) { + if (RequiresOuterTask || HasNowaitClause) { CapturedVars.clear(); CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); } @@ -9934,10 +9934,9 @@ }; // Notify that the host version must be executed. - auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, - RequiresOuterTask](CodeGenFunction &CGF, - PrePostActionTy &) { - if (RequiresOuterTask) { + auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, RequiresOuterTask, + HasNowaitClause](CodeGenFunction &CGF, PrePostActionTy &) { + if (RequiresOuterTask || HasNowaitClause) { CapturedVars.clear(); CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); } @@ -9945,7 +9944,7 @@ }; auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, - &CapturedVars, RequiresOuterTask, + &CapturedVars, RequiresOuterTask, HasNowaitClause, &CS](CodeGenFunction &CGF, PrePostActionTy &) { // Fill up the arrays with all the captured variables. MappableExprsHandler::MapBaseValuesArrayTy BasePointers; @@ -10035,15 +10034,16 @@ Address(Info.PointersArray, CGM.getPointerAlign()); InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); MapTypesArray = Info.MapTypesArray; - if (RequiresOuterTask) + if (RequiresOuterTask || HasNowaitClause) CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); else emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); }; - auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( - CodeGenFunction &CGF, PrePostActionTy &) { - if (RequiresOuterTask) { + auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask, + HasNowaitClause](CodeGenFunction &CGF, + PrePostActionTy &) { + if (RequiresOuterTask || HasNowaitClause) { CodeGenFunction::OMPTargetDataInfo InputInfo; CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); } else {
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits