ABataev created this revision. ABataev added a reviewer: jdoerfert. Herald added subscribers: sstefan1, guansong, yaxunl. Herald added a project: clang.
In untied tasks, need to allocate the space for local variales, declared in task region, when the memory for task data is allocated. THe function can be interrupted and we can exit from the function in untied task switch. Need to keep the state of the local variables in this case. Also, the compiler should not call cleanup when exiting in untied task switch until the real exit out of the declaration scope is met during execution. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D84457 Files: clang/lib/CodeGen/CGOpenMPRuntime.cpp clang/lib/CodeGen/CGOpenMPRuntime.h clang/lib/CodeGen/CGStmtOpenMP.cpp clang/test/OpenMP/task_codegen.cpp
Index: clang/test/OpenMP/task_codegen.cpp =================================================================== --- clang/test/OpenMP/task_codegen.cpp +++ clang/test/OpenMP/task_codegen.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix UNTIEDRT // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s // @@ -259,7 +259,7 @@ a = 4; c = 5; } -// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 0, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY6:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 0, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY6:@.+]] to i32 (i32, i8*)*)) // CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]]) #pragma omp task untied { @@ -296,26 +296,54 @@ // CHECK: store i32 4, i32* [[A_PTR]] // CHECK: define internal i32 [[TASK_ENTRY6]](i32 %0, [[KMP_TASK_T]]{{.*}}* noalias %1) -// CHECK: switch i32 %{{.+}}, label +// UNTIEDRT: [[S1_ADDR_PTR:%.+]] = alloca %struct.S*, +// UNTIEDRT: call void (i8*, ...) %{{.+}}(i8* %{{.+}}, %struct.S** [[S1_ADDR_PTR]]) +// UNTIEDRT: [[S1_ADDR:%.+]] = load %struct.S*, %struct.S** [[S1_ADDR_PTR]], +// CHECK: switch i32 %{{.+}}, label %[[DONE:.+]] [ + +// CHECK: [[DONE]]: +// CHECK: br label %[[CLEANUP:[^,]+]] + // CHECK: load i32*, i32** % // CHECK: store i32 1, i32* % // CHECK: call i32 @__kmpc_omp_task(% +// UNTIEDRT: br label %[[EXIT:[^,]+]] +// UNTIEDRT: call void [[CONSTR:@.+]](%struct.S* [[S1_ADDR]]) // CHECK: call i8* @__kmpc_omp_task_alloc( // CHECK: call i32 @__kmpc_omp_task(% // CHECK: load i32*, i32** % // CHECK: store i32 2, i32* % // CHECK: call i32 @__kmpc_omp_task(% +// UNTIEDRT: br label %[[EXIT]] // CHECK: call i32 @__kmpc_omp_taskyield(% // CHECK: load i32*, i32** % // CHECK: store i32 3, i32* % // CHECK: call i32 @__kmpc_omp_task(% +// UNTIEDRT: br label %[[EXIT]] + +// s1 = S(); +// UNTIEDRT: call void [[CONSTR]](%struct.S* [[TMP:%.+]]) +// UNTIEDRT: [[DST:%.+]] = bitcast %struct.S* [[S1_ADDR]] to i8* +// UNTIEDRT: [[SRC:%.+]] = bitcast %struct.S* [[TMP]] to i8* +// UNTIEDRT: call void @llvm.memcpy.{{.+}}(i8* {{.*}}[[DST]], i8* {{.*}}[[SRC]], i64 4, i1 false) +// UNTIEDRT: call void [[DESTR:@.+]](%struct.S* [[TMP]]) // CHECK: call i32 @__kmpc_omp_taskwait(% // CHECK: load i32*, i32** % // CHECK: store i32 4, i32* % // CHECK: call i32 @__kmpc_omp_task(% +// UNTIEDRT: br label %[[EXIT]] + +// UNTIEDRT: call void [[DESTR]](%struct.S* [[S1_ADDR]]) +// CHECK: br label %[[CLEANUP]] + +// CHECK: [[CLEANUP]]: +// UNTIEDRT: br label %[[EXIT]] + +// UNTIEDRT: [[EXIT]]: +// UNTIEDRT-NEXT: ret i32 0 struct S1 { int a; Index: clang/lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- clang/lib/CodeGen/CGStmtOpenMP.cpp +++ clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -21,6 +21,7 @@ #include "clang/AST/OpenMPClause.h" #include "clang/AST/Stmt.h" #include "clang/AST/StmtOpenMP.h" +#include "clang/AST/StmtVisitor.h" #include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/PrettyStackTrace.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" @@ -3785,6 +3786,42 @@ checkForLastprivateConditionalUpdate(*this, S); } +namespace { +/// Get the list of variables declared in the context of the untied tasks. +class CheckVarsEscapingUntiedTaskDeclContext final + : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> { + llvm::SmallVector<const VarDecl *, 4> PrivateDecls; + +public: + explicit CheckVarsEscapingUntiedTaskDeclContext() = default; + virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default; + void VisitDeclStmt(const DeclStmt *S) { + if (!S) + return; + // Need to privatize only local vars, static locals can be processed as is. + for (const Decl *D : S->decls()) { + if (const auto *VD = dyn_cast_or_null<VarDecl>(D)) + if (VD->hasLocalStorage()) + PrivateDecls.push_back(VD); + } + } + void VisitOMPExecutableDirective(const OMPExecutableDirective *) { return; } + void VisitCapturedStmt(const CapturedStmt *) { return; } + void VisitLambdaExpr(const LambdaExpr *) { return; } + void VisitBlockExpr(const BlockExpr *) { return; } + void VisitStmt(const Stmt *S) { + if (!S) + return; + for (const Stmt *Child : S->children()) + if (Child) + Visit(Child); + } + + /// Swaps list of vars with the provided one. + ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; } +}; +} // anonymous namespace + void CodeGenFunction::EmitOMPTaskBasedDirective( const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, @@ -3885,14 +3922,22 @@ Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); } + // Get list of local vars for untied tasks. + if (!Data.Tied) { + CheckVarsEscapingUntiedTaskDeclContext Checker; + Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt()); + Data.PrivateLocals.append(Checker.getPrivateDecls().begin(), + Checker.getPrivateDecls().end()); + } auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, CapturedRegion](CodeGenFunction &CGF, PrePostActionTy &Action) { + llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, Address> UntiedLocalVars; // Set proper addresses for generated private copies. OMPPrivateScope Scope(CGF); llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs; if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || - !Data.LastprivateVars.empty()) { + !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) { llvm::FunctionType *CopyFnTy = llvm::FunctionType::get( CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true); enum { PrivatesParam = 2, CopyFnParam = 3 }; @@ -3928,6 +3973,15 @@ PrivatePtrs.emplace_back(VD, PrivatePtr); CallArgs.push_back(PrivatePtr.getPointer()); } + for (const VarDecl *VD : Data.PrivateLocals) { + QualType Ty = VD->getType().getNonReferenceType(); + if (VD->getType()->isLValueReferenceType()) + Ty = CGF.getContext().getPointerType(Ty); + Address PrivatePtr = CGF.CreateMemTemp( + CGF.getContext().getPointerType(Ty), ".local.ptr.addr"); + UntiedLocalVars.try_emplace(VD, PrivatePtr); + CallArgs.push_back(PrivatePtr.getPointer()); + } CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); for (const auto &Pair : LastprivateDstsOrigs) { @@ -3946,6 +4000,13 @@ CGF.getContext().getDeclAlign(Pair.first)); Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); } + // Adjust mapping for internal locals by mapping actual memory instead of + // a pointer to this memory. + for (auto &Pair : UntiedLocalVars) { + Address Replacement(CGF.Builder.CreateLoad(Pair.second), + CGF.getContext().getDeclAlign(Pair.first)); + Pair.getSecond() = Replacement; + } } if (Data.Reductions) { OMPPrivateScope FirstprivateScope(CGF); @@ -4040,6 +4101,8 @@ } (void)InRedScope.Privatize(); + CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF.CGM, + UntiedLocalVars); Action.Enter(CGF); BodyGen(CGF); }; Index: clang/lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntime.h +++ clang/lib/CodeGen/CGOpenMPRuntime.h @@ -105,6 +105,7 @@ SmallVector<const Expr *, 4> ReductionOrigs; SmallVector<const Expr *, 4> ReductionCopies; SmallVector<const Expr *, 4> ReductionOps; + SmallVector<CanonicalDeclPtr<const VarDecl>, 4> PrivateLocals; struct DependData { OpenMPDependClauseKind DepKind = OMPC_DEPEND_unknown; const Expr *IteratorExpr = nullptr; @@ -245,6 +246,19 @@ ~NontemporalDeclsRAII(); }; + /// Manages list of nontemporal decls for the specified directive. + class UntiedTaskLocalDeclsRAII { + CodeGenModule &CGM; + const bool NeedToPush; + + public: + UntiedTaskLocalDeclsRAII( + CodeGenModule &CGM, + const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, Address> + &LocalVars); + ~UntiedTaskLocalDeclsRAII(); + }; + /// Maps the expression for the lastprivate variable to the global copy used /// to store new value because original variables are not mapped in inner /// parallel regions. Only private copies are captured but we need also to @@ -715,6 +729,10 @@ /// The set is the union of all current stack elements. llvm::SmallVector<NontemporalDeclsSet, 4> NontemporalDeclsStack; + using UntiedLocalVarsAddressesMap = + llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, Address>; + llvm::SmallVector<UntiedLocalVarsAddressesMap, 4> UntiedLocalVarsStack; + /// Stack for list of addresses of declarations in current context marked as /// lastprivate conditional. The set is the union of all current stack /// elements. Index: clang/lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -181,7 +181,7 @@ UntiedCodeGen(CGF); CodeGenFunction::JumpDest CurPoint = CGF.getJumpDestInCurrentScope(".untied.next."); - CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); + CGF.EmitBranch(CGF.ReturnBlock.getBlock()); CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), CGF.Builder.GetInsertBlock()); @@ -3436,6 +3436,7 @@ const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), PrivateElemInit(PrivateElemInit) {} + PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} const Expr *OriginalRef = nullptr; const VarDecl *Original = nullptr; const VarDecl *PrivateCopy = nullptr; @@ -3456,6 +3457,10 @@ for (const auto &Pair : Privates) { const VarDecl *VD = Pair.second.Original; QualType Type = VD->getType().getNonReferenceType(); + // If the private variable is a local variable with lvalue ref type, + // allocate the pointer instead of the pointee type. + if (!Pair.second.OriginalRef && VD->getType()->isLValueReferenceType()) + Type = C.getPointerType(Type); FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); if (VD->hasAttrs()) { for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), @@ -3709,10 +3714,7 @@ /// \endcode static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, - ArrayRef<const Expr *> PrivateVars, - ArrayRef<const Expr *> FirstprivateVars, - ArrayRef<const Expr *> LastprivateVars, - QualType PrivatesQTy, + const OMPTaskDataTy &Data, QualType PrivatesQTy, ArrayRef<PrivateDataTy> Privates) { ASTContext &C = CGM.getContext(); FunctionArgList Args; @@ -3721,9 +3723,9 @@ C.getPointerType(PrivatesQTy).withConst().withRestrict(), ImplicitParamDecl::Other); Args.push_back(&TaskPrivatesArg); - llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos; + llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; unsigned Counter = 1; - for (const Expr *E : PrivateVars) { + for (const Expr *E : Data.PrivateVars) { Args.push_back(ImplicitParamDecl::Create( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) @@ -3734,7 +3736,7 @@ PrivateVarsPos[VD] = Counter; ++Counter; } - for (const Expr *E : FirstprivateVars) { + for (const Expr *E : Data.FirstprivateVars) { Args.push_back(ImplicitParamDecl::Create( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) @@ -3745,7 +3747,7 @@ PrivateVarsPos[VD] = Counter; ++Counter; } - for (const Expr *E : LastprivateVars) { + for (const Expr *E : Data.LastprivateVars) { Args.push_back(ImplicitParamDecl::Create( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(C.getPointerType(E->getType())) @@ -3756,6 +3758,17 @@ PrivateVarsPos[VD] = Counter; ++Counter; } + for (const VarDecl *VD : Data.PrivateLocals) { + QualType Ty = VD->getType().getNonReferenceType(); + if (VD->getType()->isLValueReferenceType()) + Ty = C.getPointerType(Ty); + Args.push_back(ImplicitParamDecl::Create( + C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, + C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), + ImplicitParamDecl::Other)); + PrivateVarsPos[VD] = Counter; + ++Counter; + } const auto &TaskPrivatesMapFnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); llvm::FunctionType *TaskPrivatesMapTy = @@ -4011,16 +4024,16 @@ /// Checks if destructor function is required to be generated. /// \return true if cleanups are required, false otherwise. static bool -checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) { - bool NeedsCleanup = false; - auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); - const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl()); - for (const FieldDecl *FD : PrivateRD->fields()) { - NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType(); - if (NeedsCleanup) - break; +checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, + ArrayRef<PrivateDataTy> Privates) { + for (const PrivateDataTy &P : Privates) { + if (!P.second.OriginalRef) + continue; + QualType Ty = P.second.Original->getType().getNonReferenceType(); + if (Ty.isDestructedType()) + return true; } - return NeedsCleanup; + return false; } namespace { @@ -4190,9 +4203,12 @@ /*PrivateElemInit=*/nullptr)); ++I; } - llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) { - return L.first > R.first; - }); + for (const VarDecl *VD : Data.PrivateLocals) + Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); + llvm::stable_sort(Privates, + [](const PrivateDataTy &L, const PrivateDataTy &R) { + return L.first > R.first; + }); QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); // Build type kmp_routine_entry_t (if not built yet). emitKmpRoutineEntryT(KmpInt32Ty); @@ -4234,9 +4250,8 @@ std::next(TaskFunction->arg_begin(), 3)->getType(); if (!Privates.empty()) { auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); - TaskPrivatesMap = emitTaskPrivateMappingFunction( - CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars, - FI->getType(), Privates); + TaskPrivatesMap = + emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( TaskPrivatesMap, TaskPrivatesMapTy); } else { @@ -4266,7 +4281,8 @@ unsigned Flags = Data.Tied ? TiedFlag : 0; bool NeedsCleanup = false; if (!Privates.empty()) { - NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD); + NeedsCleanup = + checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); if (NeedsCleanup) Flags = Flags | DestructorsFlag; } @@ -11218,56 +11234,64 @@ if (!VD) return Address::invalid(); const VarDecl *CVD = VD->getCanonicalDecl(); - if (!CVD->hasAttr<OMPAllocateDeclAttr>()) + if (CVD->hasAttr<OMPAllocateDeclAttr>()) { + const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); + // Use the default allocation. + if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || + AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && + !AA->getAllocator()) + return Address::invalid(); + llvm::Value *Size; + CharUnits Align = CGM.getContext().getDeclAlign(CVD); + if (CVD->getType()->isVariablyModifiedType()) { + Size = CGF.getTypeSize(CVD->getType()); + // Align the size: ((size + align - 1) / align) * align + Size = CGF.Builder.CreateNUWAdd( + Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); + Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); + Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); + } else { + CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); + Size = CGM.getSize(Sz.alignTo(Align)); + } + llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); + assert(AA->getAllocator() && + "Expected allocator expression for non-default allocator."); + llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); + // According to the standard, the original allocator type is a enum + // (integer). Convert to pointer type, if required. + if (Allocator->getType()->isIntegerTy()) + Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); + else if (Allocator->getType()->isPointerTy()) + Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + Allocator, CGM.VoidPtrTy); + llvm::Value *Args[] = {ThreadID, Size, Allocator}; + + llvm::Value *Addr = + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_alloc), + Args, getName({CVD->getName(), ".void.addr"})); + llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, + Allocator}; + llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_free); + + CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, + llvm::makeArrayRef(FiniArgs)); + Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + Addr, + CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), + getName({CVD->getName(), ".addr"})); + return Address(Addr, Align); + } + if (UntiedLocalVarsStack.empty()) return Address::invalid(); - const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); - // Use the default allocation. - if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || - AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && - !AA->getAllocator()) + const UntiedLocalVarsAddressesMap &UntiedData = UntiedLocalVarsStack.back(); + auto It = UntiedData.find(VD); + if (It == UntiedData.end()) return Address::invalid(); - llvm::Value *Size; - CharUnits Align = CGM.getContext().getDeclAlign(CVD); - if (CVD->getType()->isVariablyModifiedType()) { - Size = CGF.getTypeSize(CVD->getType()); - // Align the size: ((size + align - 1) / align) * align - Size = CGF.Builder.CreateNUWAdd( - Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); - Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); - Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); - } else { - CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); - Size = CGM.getSize(Sz.alignTo(Align)); - } - llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); - assert(AA->getAllocator() && - "Expected allocator expression for non-default allocator."); - llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); - // According to the standard, the original allocator type is a enum (integer). - // Convert to pointer type, if required. - if (Allocator->getType()->isIntegerTy()) - Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); - else if (Allocator->getType()->isPointerTy()) - Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, - CGM.VoidPtrTy); - llvm::Value *Args[] = {ThreadID, Size, Allocator}; - - llvm::Value *Addr = - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_alloc), - Args, getName({CVD->getName(), ".void.addr"})); - llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr, - Allocator}; - llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_free); - CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn, - llvm::makeArrayRef(FiniArgs)); - Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - Addr, - CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), - getName({CVD->getName(), ".addr"})); - return Address(Addr, Align); + return It->second; } CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII( @@ -11302,6 +11326,21 @@ CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back(); } +CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( + CodeGenModule &CGM, + const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, Address> &LocalVars) + : CGM(CGM), NeedToPush(!LocalVars.empty()) { + if (!NeedToPush) + return; + CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars); +} + +CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() { + if (!NeedToPush) + return; + CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back(); +} + bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits