https://github.com/bhandarkar-pranav updated https://github.com/llvm/llvm-project/pull/97088
>From dc9e64a29d6d1fd84ad630cb002d1129ea6a0a31 Mon Sep 17 00:00:00 2001 From: Pranav Bhandarkar <pranav.bhandar...@amd.com> Date: Sat, 15 Jun 2024 02:00:48 -0500 Subject: [PATCH 01/13] checkpoint commit. Use emitOffloadinArrays from OMPIRBuilder in emitTargetCallKernelLaunch in CGOpenMPRuntime.cpp --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 241 +++++++++++++++++- clang/lib/CodeGen/CGStmtOpenMP.cpp | 1 + .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 22 +- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 10 + 4 files changed, 265 insertions(+), 9 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index f6d12d46cfc07..9632ef912ebfe 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -38,6 +38,7 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Value.h" #include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include <cassert> @@ -45,6 +46,8 @@ #include <numeric> #include <optional> +#define DEBUG_TYPE "clang-openmp-codegen" + using namespace clang; using namespace CodeGen; using namespace llvm::omp; @@ -8831,9 +8834,11 @@ emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, } PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); - return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, + auto *Str = OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, PLoc.getLine(), PLoc.getColumn(), SrcLocStrSize); + LLVM_DEBUG(llvm::dbgs() << "Output of emitMappingInfo: " << *Str << "\n"); + return Str; } /// Emit the arrays used to pass the captures and map information to the @@ -9447,8 +9452,96 @@ llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D, } return DynCGroupMem; } +static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF, + const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, + llvm::OpenMPIRBuilder &OMPBuilder, + MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) { + // Get mappable expression information. + MappableExprsHandler MEHandler(D, CGF); + llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; + llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; + CodeGenModule &CGM = CGF.CGM; + auto RI = CS.getCapturedRecordDecl()->field_begin(); + auto *CV = CapturedVars.begin(); + for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), + CE = CS.capture_end(); + CI != CE; ++CI, ++RI, ++CV) { + MappableExprsHandler::MapCombinedInfoTy CurInfo; + MappableExprsHandler::StructRangeInfoTy PartialStruct; -static void emitTargetCallKernelLaunch( + // VLA sizes are passed to the outlined region by copy and do not have map + // information associated. + if (CI->capturesVariableArrayType()) { + CurInfo.Exprs.push_back(nullptr); + CurInfo.BasePointers.push_back(*CV); + CurInfo.DevicePtrDecls.push_back(nullptr); + CurInfo.DevicePointers.push_back( + MappableExprsHandler::DeviceInfoTy::None); + CurInfo.Pointers.push_back(*CV); + CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( + CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); + // Copy to the device as an argument. No need to retrieve it. + CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | + OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM | + OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); + CurInfo.Mappers.push_back(nullptr); + } else { + // If we have any information in the map clause, we use it, otherwise we + // just do a default mapping. + MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); + if (!CI->capturesThis()) + MappedVarSet.insert(CI->getCapturedVar()); + else + MappedVarSet.insert(nullptr); + if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) + MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); + // Generate correct mapping for variables captured by reference in + // lambdas. + if (CI->capturesVariable()) + MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, + CurInfo, LambdaPointers); + } + // We expect to have at least an element of information for this capture. + assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && + "Non-existing map pointer for capture!"); + assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && + CurInfo.BasePointers.size() == CurInfo.Sizes.size() && + CurInfo.BasePointers.size() == CurInfo.Types.size() && + CurInfo.BasePointers.size() == CurInfo.Mappers.size() && + "Inconsistent map information sizes!"); + + // If there is an entry in PartialStruct it means we have a struct with + // individual members mapped. Emit an extra combined entry. + if (PartialStruct.Base.isValid()) { + CombinedInfo.append(PartialStruct.PreliminaryMapData); + MEHandler.emitCombinedEntry( + CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(), + OMPBuilder, nullptr, + !PartialStruct.PreliminaryMapData.BasePointers.empty()); + } + + // We need to append the results of this capture to what we already have. + CombinedInfo.append(CurInfo); + } + // Adjust MEMBER_OF flags for the lambdas captures. + MEHandler.adjustMemberOfForLambdaCaptures( + OMPBuilder, LambdaPointers, CombinedInfo.BasePointers, + CombinedInfo.Pointers, CombinedInfo.Types); + // Map any list items in a map clause that were not captures because they + // weren't referenced within the construct. + MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet); + + auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { + return emitMappingInformation(CGF, OMPBuilder, MapExpr); + }; + if (CGM.getCodeGenOpts().getDebugInfo() != + llvm::codegenoptions::NoDebugInfo) { + CombinedInfo.Names.resize(CombinedInfo.Exprs.size()); + llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(), + FillInfoMap); + } +} +static void emitTargetCallKernelLaunchNew( CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask, @@ -9464,8 +9557,139 @@ static void emitTargetCallKernelLaunch( // Fill up the arrays with all the captured variables. MappableExprsHandler::MapCombinedInfoTy CombinedInfo; + CGOpenMPRuntime::TargetDataInfo Info; - // Get mappable expression information. + auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) + -> llvm::OpenMPIRBuilder::MapInfosTy & { + CGF.Builder.restoreIP(CodeGenIP); + genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo); + return CombinedInfo; + }; + auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) { + if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) { + Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl); + } + }; + + auto CustomMapperCB = [&](unsigned int I) { + llvm::Value *MFunc = nullptr; + if (CombinedInfo.Mappers[I]) { + Info.HasMapper = true; + MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( + cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); + } + return MFunc; + }; + // Fill up the arrays and create the arguments. + LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n"); + OMPBuilder.emitOffloadingArrays(llvm::OpenMPIRBuilder::InsertPointTy(CGF.AllocaInsertPt->getParent(), + CGF.AllocaInsertPt->getIterator()), + CGF.Builder.saveIP(), Info, + GenMapInfoCB, /*IsNonContiguous=*/true, + DeviceAddrCB, CustomMapperCB); + bool EmitDebug = !CombinedInfo.Names.empty(); + OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info, + EmitDebug, + /*ForEndCall=*/false); + + LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n"); + InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; + InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, + CGF.VoidPtrTy, CGM.getPointerAlign()); + InputInfo.PointersArray = + Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); + InputInfo.SizesArray = + Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); + InputInfo.MappersArray = + Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); + MapTypesArray = Info.RTArgs.MapTypesArray; + MapNamesArray = Info.RTArgs.MapNamesArray; + + auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars, + RequiresOuterTask, &CS, OffloadingMandatory, Device, + OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray, + SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { + bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor; + + if (IsReverseOffloading) { + // Reverse offloading is not supported, so just execute on the host. + // FIXME: This fallback solution is incorrect since it ignores the + // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to + // assert here and ensure SEMA emits an error. + emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars, + RequiresOuterTask, CS, OffloadingMandatory, CGF); + return; + } + + bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>(); + unsigned NumTargetItems = InputInfo.NumberOfTargetItems; + + llvm::Value *BasePointersArray = + InputInfo.BasePointersArray.emitRawPointer(CGF); + llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF); + llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF); + llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF); + + auto &&EmitTargetCallFallbackCB = + [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS, + OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) + -> llvm::OpenMPIRBuilder::InsertPointTy { + CGF.Builder.restoreIP(IP); + emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars, + RequiresOuterTask, CS, OffloadingMandatory, CGF); + return CGF.Builder.saveIP(); + }; + + llvm::Value *DeviceID = emitDeviceID(Device, CGF); + llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D); + llvm::Value *NumThreads = + OMPRuntime->emitNumThreadsForTargetDirective(CGF, D); + llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc()); + llvm::Value *NumIterations = + OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter); + llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF); + llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( + CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator()); + + llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs( + BasePointersArray, PointersArray, SizesArray, MapTypesArray, + nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray); + + llvm::OpenMPIRBuilder::TargetKernelArgs Args( + NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads, + DynCGGroupMem, HasNoWait); + + CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch( + CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args, + DeviceID, RTLoc, AllocaIP)); + }; + + if (RequiresOuterTask) { + if (NewClangTargetTaskCodeGen) { + llvm::errs() << "Using OMPIRBuilder for target task codegen\n"; + } else { + CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); + } + } else + OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); +} +static void emitTargetCallKernelLaunch( + CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, + const OMPExecutableDirective &D, + llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask, + const CapturedStmt &CS, bool OffloadingMandatory, + llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, + llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo, + llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray, + llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + const OMPLoopDirective &D)> + SizeEmitter, + CodeGenFunction &CGF, CodeGenModule &CGM) { + llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder(); + + // Fill up the arrays with all the captured variables. + MappableExprsHandler::MapCombinedInfoTy CombinedInfo; +// Get mappable expression information. MappableExprsHandler MEHandler(D, CGF); llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; @@ -9542,6 +9766,7 @@ static void emitTargetCallKernelLaunch( CGOpenMPRuntime::TargetDataInfo Info; // Fill up the arrays and create the arguments. + LLVM_DEBUG(llvm::dbgs() << "InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n"); emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() != llvm::codegenoptions::NoDebugInfo; @@ -9549,6 +9774,7 @@ static void emitTargetCallKernelLaunch( EmitDebug, /*ForEndCall=*/false); + LLVM_DEBUG(llvm::dbgs() << "InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n"); InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); @@ -9688,7 +9914,13 @@ void CGOpenMPRuntime::emitTargetCall( OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray, SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { - emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars, + if (OpenMPClangTargetCodegen) + emitTargetCallKernelLaunchNew(this, OutlinedFn, D, CapturedVars, + RequiresOuterTask, CS, OffloadingMandatory, + Device, OutlinedFnID, InputInfo, MapTypesArray, + MapNamesArray, SizeEmitter, CGF, CGM); + else + emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars, RequiresOuterTask, CS, OffloadingMandatory, Device, OutlinedFnID, InputInfo, MapTypesArray, MapNamesArray, SizeEmitter, CGF, CGM); @@ -9711,6 +9943,7 @@ void CGOpenMPRuntime::emitTargetCall( } else { RegionCodeGenTy ThenRCG(TargetThenGen); ThenRCG(CGF); + LLVM_DEBUG(llvm::dbgs() << "Generated code after emitTargetCall:\n" << *(OutlinedFn->getParent()) << "\n"); } } else { RegionCodeGenTy ElseRCG(TargetElseGen); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index f73d32de7c484..123cfbe1b229d 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -40,6 +40,7 @@ using namespace clang; using namespace CodeGen; using namespace llvm::omp; +#define DEBUG_TYPE "clang-openmp-codegen" #define TTL_CODEGEN_TYPE "target-teams-loop-codegen" diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index bff49dab4a313..035639b10e31a 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -1778,6 +1778,22 @@ class OpenMPIRBuilder { MapInfosTy &CombinedInfo, TargetDataInfo &Info); + /// Callback type for creating the map infos for the kernel parameters. + /// \param CodeGenIP is the insertion point where code should be generated, + /// if any. + using GenMapInfoCallbackTy = + function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>; + + /// Emit the arrays used to pass the captures and map information to the + /// offloading runtime library. If there is no map or capture information, + /// return nullptr by reference. + void emitOffloadingArrays( + InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, + bool IsNonContiguous = false, + function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr, + function_ref<Value *(unsigned int)> CustomMapperCB = nullptr); + /// Emit the arrays used to pass the captures and map information to the /// offloading runtime library. If there is no map or capture information, /// return nullptr by reference. @@ -1787,6 +1803,7 @@ class OpenMPIRBuilder { function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr, function_ref<Value *(unsigned int)> CustomMapperCB = nullptr); + /// Creates offloading entry for the provided entry ID \a ID, address \a /// Addr, size \a Size, and flags \a Flags. void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, @@ -2190,11 +2207,6 @@ class OpenMPIRBuilder { /// duplicating the body code. enum BodyGenTy { Priv, DupNoPriv, NoPriv }; - /// Callback type for creating the map infos for the kernel parameters. - /// \param CodeGenIP is the insertion point where code should be generated, - /// if any. - using GenMapInfoCallbackTy = - function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>; /// Generator for '#omp target data' /// diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 92213e19c9d9d..7c0dbc0925306 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -5562,6 +5562,16 @@ void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP, } } +void OpenMPIRBuilder::emitOffloadingArrays( + InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, + GenMapInfoCallbackTy GenMapInfoCB, bool IsNonContiguous, + function_ref<void(unsigned int, Value *)> DeviceAddrCB, + function_ref<Value *(unsigned int)> CustomMapperCB) { + + OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(CodeGenIP); + emitOffloadingArrays(AllocaIP, CodeGenIP, MapInfo, + Info, IsNonContiguous, DeviceAddrCB, CustomMapperCB); +} void OpenMPIRBuilder::emitOffloadingArrays( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info, bool IsNonContiguous, >From 0bb7eaaee6ca6301cd7e9a9285ad9959ca801613 Mon Sep 17 00:00:00 2001 From: Pranav Bhandarkar <pranav.bhandar...@amd.com> Date: Tue, 25 Jun 2024 16:07:37 -0500 Subject: [PATCH 02/13] emitOffloadingArraysArgument and some other prints --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 50 +++++++++---------- clang/lib/CodeGen/CGOpenMPRuntime.h | 39 +++++++++++++++ clang/lib/CodeGen/CGStmtOpenMP.cpp | 2 + clang/lib/CodeGen/CodeGenFunction.h | 1 + .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 12 ++++- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 42 ++++++++++------ .../Frontend/OpenMPIRBuilderTest.cpp | 4 +- 7 files changed, 104 insertions(+), 46 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 9632ef912ebfe..ee03183f3f5a3 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -3002,6 +3002,10 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); llvm::FunctionType *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo); + LLVM_DEBUG(llvm::dbgs() << "TaskEntryTy=" << *TaskEntryTy << "\n"); + LLVM_DEBUG(llvm::dbgs() << "KmpTaskTWithPrivatesPtrQTy = " + << KmpTaskTWithPrivatesPtrQTy << "\n"); + LLVM_DEBUG(llvm::dbgs() << "TaskTypeArg = " << TaskTypeArg << "\n"); std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); auto *TaskEntry = llvm::Function::Create( TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); @@ -3706,6 +3710,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap); + LLVM_DEBUG(llvm::dbgs() << "ProxyTaskFunction is " << *TaskEntry); // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, // kmp_routine_entry_t *task_entry); @@ -9582,15 +9587,14 @@ static void emitTargetCallKernelLaunchNew( }; // Fill up the arrays and create the arguments. LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n"); - OMPBuilder.emitOffloadingArrays(llvm::OpenMPIRBuilder::InsertPointTy(CGF.AllocaInsertPt->getParent(), - CGF.AllocaInsertPt->getIterator()), - CGF.Builder.saveIP(), Info, - GenMapInfoCB, /*IsNonContiguous=*/true, - DeviceAddrCB, CustomMapperCB); - bool EmitDebug = !CombinedInfo.Names.empty(); - OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info, - EmitDebug, - /*ForEndCall=*/false); + + llvm::OpenMPIRBuilder::InsertPointTy OffloadingArraysAllocaIP( + CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator()); + + OMPBuilder.emitOffloadingArraysAndArgs( + OffloadingArraysAllocaIP, CGF.Builder.saveIP(), Info, Info.RTArgs, + GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false, + DeviceAddrCB, CustomMapperCB); LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n"); InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; @@ -9664,13 +9668,9 @@ static void emitTargetCallKernelLaunchNew( DeviceID, RTLoc, AllocaIP)); }; - if (RequiresOuterTask) { - if (NewClangTargetTaskCodeGen) { - llvm::errs() << "Using OMPIRBuilder for target task codegen\n"; - } else { - CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); - } - } else + if (RequiresOuterTask) + CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); + else OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); } static void emitTargetCallKernelLaunch( @@ -9768,10 +9768,9 @@ static void emitTargetCallKernelLaunch( // Fill up the arrays and create the arguments. LLVM_DEBUG(llvm::dbgs() << "InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n"); emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); - bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() != + Info.EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() != llvm::codegenoptions::NoDebugInfo; OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info, - EmitDebug, /*ForEndCall=*/false); LLVM_DEBUG(llvm::dbgs() << "InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n"); @@ -9914,16 +9913,16 @@ void CGOpenMPRuntime::emitTargetCall( OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray, SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { - if (OpenMPClangTargetCodegen) + // if (OpenMPClangTargetCodegen) emitTargetCallKernelLaunchNew(this, OutlinedFn, D, CapturedVars, RequiresOuterTask, CS, OffloadingMandatory, Device, OutlinedFnID, InputInfo, MapTypesArray, MapNamesArray, SizeEmitter, CGF, CGM); - else - emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars, - RequiresOuterTask, CS, OffloadingMandatory, - Device, OutlinedFnID, InputInfo, MapTypesArray, - MapNamesArray, SizeEmitter, CGF, CGM); + // else + // emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars, + // RequiresOuterTask, CS, OffloadingMandatory, + // Device, OutlinedFnID, InputInfo, MapTypesArray, + // MapNamesArray, SizeEmitter, CGF, CGM); }; auto &&TargetElseGen = @@ -10684,10 +10683,9 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( /*IsNonContiguous=*/true); bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || D.hasClausesOfKind<OMPNowaitClause>(); - bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() != + Info.EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() != llvm::codegenoptions::NoDebugInfo; OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info, - EmitDebug, /*ForEndCall=*/false); InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 522ae3d35d22d..b9303a9414b22 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -122,6 +122,45 @@ struct OMPTaskDataTy final { bool IsReductionWithTaskMod = false; bool IsWorksharingReduction = false; bool HasNowaitClause = false; + void printTo(llvm::raw_ostream &os, const ASTContext &Ctx) const { + auto &&printSVHelper = + [&os, &Ctx](const SmallVector<const Expr *, 4> &V) -> void { + for (auto &v : V) { + v->dump(os, Ctx); + } + }; + auto &&printSV = + [&os, printSVHelper](std::string s, + const SmallVector<const Expr *, 4> &V) -> void { + os << s << ":[\n"; + printSVHelper(V); + os << "]\n"; + }; + // SmallVector<const Expr *, 4> PrivateVars; + // SmallVector<const Expr *, 4> PrivateCopies; + // SmallVector<const Expr *, 4> FirstprivateVars; + // SmallVector<const Expr *, 4> FirstprivateCopies; + // SmallVector<const Expr *, 4> FirstprivateInits; + // SmallVector<const Expr *, 4> LastprivateVars; + // SmallVector<const Expr *, 4> LastprivateCopies; + // SmallVector<const Expr *, 4> ReductionVars; + // SmallVector<const Expr *, 4> ReductionOrigs; + // SmallVector<const Expr *, 4> ReductionCopies; + // SmallVector<const Expr *, 4> ReductionOps; + // SmallVector<CanonicalDeclPtr<const VarDecl>, 4> PrivateLocals; + + printSV("PrivateVars", PrivateVars); + printSV("PrivateCopies", PrivateCopies); + printSV("FirstprivateVars", FirstprivateVars); + printSV("FirstprivateCopies", FirstprivateCopies); + printSV("FirstprivateInits", FirstprivateInits); + printSV("LastprivateVars", LastprivateVars); + printSV("LastprivateCopies", LastprivateCopies); + printSV("ReductionVars", ReductionVars); + printSV("ReductionOrigs", ReductionOrigs); + printSV("ReductionCopies", ReductionCopies); + printSV("ReductionOps", ReductionOps); + } }; /// Class intended to support codegen of all kind of the reduction clauses. diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 123cfbe1b229d..bd6743666826b 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -5181,6 +5181,8 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( } BodyGen(CGF); }; + LLVM_DEBUG(llvm::dbgs() << "Inputs to emitTaskOutineFunction\n"); + LLVM_DEBUG(llvm::dbgs() << "PartID = " << **PartId << "\n"); llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true, Data.NumberOfParts); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 06fc7259b5901..6092ab1684267 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -441,6 +441,7 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::AssertingVH<llvm::Instruction> PostAllocaInsertPt = nullptr; public: + void printLocalDeclMap(); /// Return PostAllocaInsertPt. If it is not yet created, then insert it /// immediately after AllocaInsertPt. llvm::Instruction *getPostAllocaInsertPoint() { diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 035639b10e31a..03573b4e02029 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -1671,6 +1671,8 @@ class OpenMPIRBuilder { /// The total number of pointers passed to the runtime library. unsigned NumberOfPtrs = 0u; + bool EmitDebug = false; + explicit TargetDataInfo() {} explicit TargetDataInfo(bool RequiresDevicePointerInfo, bool SeparateBeginEndCalls) @@ -1769,7 +1771,6 @@ class OpenMPIRBuilder { void emitOffloadingArraysArgument(IRBuilderBase &Builder, OpenMPIRBuilder::TargetDataRTArgs &RTArgs, OpenMPIRBuilder::TargetDataInfo &Info, - bool EmitDebug = false, bool ForEndCall = false); /// Emit an array of struct descriptors to be assigned to the offload args. @@ -1789,7 +1790,7 @@ class OpenMPIRBuilder { /// return nullptr by reference. void emitOffloadingArrays( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, + GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info, bool IsNonContiguous = false, function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr, function_ref<Value *(unsigned int)> CustomMapperCB = nullptr); @@ -1804,6 +1805,13 @@ class OpenMPIRBuilder { function_ref<Value *(unsigned int)> CustomMapperCB = nullptr); + void emitOffloadingArraysAndArgs( + InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, + TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB, + bool IsNonContiguous = false, bool ForEndCall = false, + function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr, + function_ref<Value *(unsigned int)> CustomMapperCB = nullptr); + /// Creates offloading entry for the provided entry ID \a ID, address \a /// Addr, size \a Size, and flags \a Flags. void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 7c0dbc0925306..8d6e6a354a1d8 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -4923,8 +4923,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData( CustomMapperCB); TargetDataRTArgs RTArgs; - emitOffloadingArraysArgument(Builder, RTArgs, Info, - !MapInfo->Names.empty()); + emitOffloadingArraysArgument(Builder, RTArgs, Info); // Emit the number of elements in the offloading arrays. Value *PointerNum = Builder.getInt32(Info.NumberOfPtrs); @@ -4977,8 +4976,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData( // Generate code for the closing of the data region. auto EndThenGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { TargetDataRTArgs RTArgs; - emitOffloadingArraysArgument(Builder, RTArgs, Info, !MapInfo->Names.empty(), - /*ForEndCall=*/true); + Info.EmitDebug = !MapInfo->Names.empty(); + emitOffloadingArraysArgument(Builder, RTArgs, Info, /*ForEndCall=*/true); // Emit the number of elements in the offloading arrays. Value *PointerNum = Builder.getInt32(Info.NumberOfPtrs); @@ -5234,7 +5233,18 @@ static void emitTargetOutlinedFunction( OutlinedFn, OutlinedFnID); } -static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, +void OpenMPIRBuilder::emitOffloadingArraysAndArgs( + InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, + TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB, + bool IsNonContiguous, bool ForEndCall, + function_ref<void(unsigned int, Value *)> DeviceAddrCB, + function_ref<Value *(unsigned int)> CustomMapperCB) { + emitOffloadingArrays(AllocaIP, CodeGenIP, GenMapInfoCB, Info, IsNonContiguous, + DeviceAddrCB, CustomMapperCB); + emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall); + } + + static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, Function *OutlinedFn, Constant *OutlinedFnID, int32_t NumTeams, int32_t NumThreads, @@ -5245,13 +5255,11 @@ static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, /*RequiresDevicePointerInfo=*/false, /*SeparateBeginEndCalls=*/true); - OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP()); - OMPBuilder.emitOffloadingArrays(AllocaIP, Builder.saveIP(), MapInfo, Info, - /*IsNonContiguous=*/true); - OpenMPIRBuilder::TargetDataRTArgs RTArgs; - OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info, - !MapInfo.Names.empty()); + OMPBuilder.emitOffloadingArraysAndArgs(AllocaIP, Builder.saveIP(), Info, + RTArgs, GenMapInfoCB, + /*IsNonContiguous=*/true, + /*ForEndCall=*/false); // emitKernelLaunch auto &&EmitTargetCallFallbackCB = @@ -5261,7 +5269,7 @@ static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, return Builder.saveIP(); }; - unsigned NumTargetItems = MapInfo.BasePointers.size(); + unsigned NumTargetItems = Info.NumberOfPtrs; // TODO: Use correct device ID Value *DeviceID = Builder.getInt64(OMP_DEVICEID_UNDEF); Value *NumTeamsVal = Builder.getInt32(NumTeams); @@ -5438,7 +5446,6 @@ void OpenMPIRBuilder::emitMapperCall(const LocationDescription &Loc, void OpenMPIRBuilder::emitOffloadingArraysArgument(IRBuilderBase &Builder, TargetDataRTArgs &RTArgs, TargetDataInfo &Info, - bool EmitDebug, bool ForEndCall) { assert((!ForEndCall || Info.separateBeginEndCalls()) && "expected region end call to runtime only when end call is separate"); @@ -5478,7 +5485,7 @@ void OpenMPIRBuilder::emitOffloadingArraysArgument(IRBuilderBase &Builder, // Only emit the mapper information arrays if debug information is // requested. - if (!EmitDebug) + if (!Info.EmitDebug) RTArgs.MapNamesArray = ConstantPointerNull::get(VoidPtrPtrTy); else RTArgs.MapNamesArray = Builder.CreateConstInBoundsGEP2_32( @@ -5563,8 +5570,9 @@ void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP, } void OpenMPIRBuilder::emitOffloadingArrays( - InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, - GenMapInfoCallbackTy GenMapInfoCB, bool IsNonContiguous, + InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info, + bool IsNonContiguous, function_ref<void(unsigned int, Value *)> DeviceAddrCB, function_ref<Value *(unsigned int)> CustomMapperCB) { @@ -5677,9 +5685,11 @@ void OpenMPIRBuilder::emitOffloadingArrays( auto *MapNamesArrayGbl = createOffloadMapnames(CombinedInfo.Names, MapnamesName); Info.RTArgs.MapNamesArray = MapNamesArrayGbl; + Info.EmitDebug = true; } else { Info.RTArgs.MapNamesArray = Constant::getNullValue(PointerType::getUnqual(Builder.getContext())); + Info.EmitDebug = false; } // If there's a present map type modifier, it must not be applied to the end diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index 3ed3034f489ce..54070a1ae35f8 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -6891,8 +6891,8 @@ TEST_F(OpenMPIRBuilderTest, EmitOffloadingArraysArguments) { Info.RTArgs.MappersArray = ConstantPointerNull::get(Array4VoidPtrTy->getPointerTo()); Info.NumberOfPtrs = 4; - - OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info, false, false); + Info.EmitDebug = false; + OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info, false); EXPECT_NE(RTArgs.BasePointersArray, nullptr); EXPECT_NE(RTArgs.PointersArray, nullptr); >From af98fabd5685e42dade598caf3c1279ccfab7fba Mon Sep 17 00:00:00 2001 From: Pranav Bhandarkar <pranav.bhandar...@amd.com> Date: Wed, 26 Jun 2024 11:33:53 -0500 Subject: [PATCH 03/13] clean up, clean up, everybody clean up --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 206 +------------------------- clang/lib/CodeGen/CGOpenMPRuntime.h | 39 ----- clang/lib/CodeGen/CGStmtOpenMP.cpp | 4 - clang/lib/CodeGen/CodeGenFunction.h | 1 - 4 files changed, 5 insertions(+), 245 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index ee03183f3f5a3..b2fa50d16437e 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -38,7 +38,6 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Value.h" #include "llvm/Support/AtomicOrdering.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include <cassert> @@ -46,8 +45,6 @@ #include <numeric> #include <optional> -#define DEBUG_TYPE "clang-openmp-codegen" - using namespace clang; using namespace CodeGen; using namespace llvm::omp; @@ -3002,10 +2999,6 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); llvm::FunctionType *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo); - LLVM_DEBUG(llvm::dbgs() << "TaskEntryTy=" << *TaskEntryTy << "\n"); - LLVM_DEBUG(llvm::dbgs() << "KmpTaskTWithPrivatesPtrQTy = " - << KmpTaskTWithPrivatesPtrQTy << "\n"); - LLVM_DEBUG(llvm::dbgs() << "TaskTypeArg = " << TaskTypeArg << "\n"); std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); auto *TaskEntry = llvm::Function::Create( TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); @@ -3710,7 +3703,6 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap); - LLVM_DEBUG(llvm::dbgs() << "ProxyTaskFunction is " << *TaskEntry); // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, // kmp_routine_entry_t *task_entry); @@ -8839,11 +8831,9 @@ emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, } PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); - auto *Str = OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, + return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, PLoc.getLine(), PLoc.getColumn(), SrcLocStrSize); - LLVM_DEBUG(llvm::dbgs() << "Output of emitMappingInfo: " << *Str << "\n"); - return Str; } /// Emit the arrays used to pass the captures and map information to the @@ -9546,7 +9536,7 @@ static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF, FillInfoMap); } } -static void emitTargetCallKernelLaunchNew( +static void emitTargetCallKernelLaunch( CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask, @@ -9585,9 +9575,8 @@ static void emitTargetCallKernelLaunchNew( } return MFunc; }; - // Fill up the arrays and create the arguments. - LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n"); - + // Fill up the basepointers, pointers and mapper arrays and create the + // arguments. llvm::OpenMPIRBuilder::InsertPointTy OffloadingArraysAllocaIP( CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator()); @@ -9596,184 +9585,6 @@ static void emitTargetCallKernelLaunchNew( GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false, DeviceAddrCB, CustomMapperCB); - LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n"); - InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; - InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, - CGF.VoidPtrTy, CGM.getPointerAlign()); - InputInfo.PointersArray = - Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); - InputInfo.SizesArray = - Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); - InputInfo.MappersArray = - Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); - MapTypesArray = Info.RTArgs.MapTypesArray; - MapNamesArray = Info.RTArgs.MapNamesArray; - - auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars, - RequiresOuterTask, &CS, OffloadingMandatory, Device, - OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray, - SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { - bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor; - - if (IsReverseOffloading) { - // Reverse offloading is not supported, so just execute on the host. - // FIXME: This fallback solution is incorrect since it ignores the - // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to - // assert here and ensure SEMA emits an error. - emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars, - RequiresOuterTask, CS, OffloadingMandatory, CGF); - return; - } - - bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>(); - unsigned NumTargetItems = InputInfo.NumberOfTargetItems; - - llvm::Value *BasePointersArray = - InputInfo.BasePointersArray.emitRawPointer(CGF); - llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF); - llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF); - llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF); - - auto &&EmitTargetCallFallbackCB = - [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS, - OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) - -> llvm::OpenMPIRBuilder::InsertPointTy { - CGF.Builder.restoreIP(IP); - emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars, - RequiresOuterTask, CS, OffloadingMandatory, CGF); - return CGF.Builder.saveIP(); - }; - - llvm::Value *DeviceID = emitDeviceID(Device, CGF); - llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D); - llvm::Value *NumThreads = - OMPRuntime->emitNumThreadsForTargetDirective(CGF, D); - llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc()); - llvm::Value *NumIterations = - OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter); - llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF); - llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( - CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator()); - - llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs( - BasePointersArray, PointersArray, SizesArray, MapTypesArray, - nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray); - - llvm::OpenMPIRBuilder::TargetKernelArgs Args( - NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads, - DynCGGroupMem, HasNoWait); - - CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch( - CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args, - DeviceID, RTLoc, AllocaIP)); - }; - - if (RequiresOuterTask) - CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); - else - OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); -} -static void emitTargetCallKernelLaunch( - CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, - const OMPExecutableDirective &D, - llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask, - const CapturedStmt &CS, bool OffloadingMandatory, - llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, - llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo, - llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray, - llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, - const OMPLoopDirective &D)> - SizeEmitter, - CodeGenFunction &CGF, CodeGenModule &CGM) { - llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder(); - - // Fill up the arrays with all the captured variables. - MappableExprsHandler::MapCombinedInfoTy CombinedInfo; -// Get mappable expression information. - MappableExprsHandler MEHandler(D, CGF); - llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; - llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; - - auto RI = CS.getCapturedRecordDecl()->field_begin(); - auto *CV = CapturedVars.begin(); - for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), - CE = CS.capture_end(); - CI != CE; ++CI, ++RI, ++CV) { - MappableExprsHandler::MapCombinedInfoTy CurInfo; - MappableExprsHandler::StructRangeInfoTy PartialStruct; - - // VLA sizes are passed to the outlined region by copy and do not have map - // information associated. - if (CI->capturesVariableArrayType()) { - CurInfo.Exprs.push_back(nullptr); - CurInfo.BasePointers.push_back(*CV); - CurInfo.DevicePtrDecls.push_back(nullptr); - CurInfo.DevicePointers.push_back( - MappableExprsHandler::DeviceInfoTy::None); - CurInfo.Pointers.push_back(*CV); - CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( - CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); - // Copy to the device as an argument. No need to retrieve it. - CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | - OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM | - OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); - CurInfo.Mappers.push_back(nullptr); - } else { - // If we have any information in the map clause, we use it, otherwise we - // just do a default mapping. - MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); - if (!CI->capturesThis()) - MappedVarSet.insert(CI->getCapturedVar()); - else - MappedVarSet.insert(nullptr); - if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) - MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); - // Generate correct mapping for variables captured by reference in - // lambdas. - if (CI->capturesVariable()) - MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, - CurInfo, LambdaPointers); - } - // We expect to have at least an element of information for this capture. - assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && - "Non-existing map pointer for capture!"); - assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && - CurInfo.BasePointers.size() == CurInfo.Sizes.size() && - CurInfo.BasePointers.size() == CurInfo.Types.size() && - CurInfo.BasePointers.size() == CurInfo.Mappers.size() && - "Inconsistent map information sizes!"); - - // If there is an entry in PartialStruct it means we have a struct with - // individual members mapped. Emit an extra combined entry. - if (PartialStruct.Base.isValid()) { - CombinedInfo.append(PartialStruct.PreliminaryMapData); - MEHandler.emitCombinedEntry( - CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(), - OMPBuilder, nullptr, - !PartialStruct.PreliminaryMapData.BasePointers.empty()); - } - - // We need to append the results of this capture to what we already have. - CombinedInfo.append(CurInfo); - } - // Adjust MEMBER_OF flags for the lambdas captures. - MEHandler.adjustMemberOfForLambdaCaptures( - OMPBuilder, LambdaPointers, CombinedInfo.BasePointers, - CombinedInfo.Pointers, CombinedInfo.Types); - // Map any list items in a map clause that were not captures because they - // weren't referenced within the construct. - MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet); - - CGOpenMPRuntime::TargetDataInfo Info; - // Fill up the arrays and create the arguments. - LLVM_DEBUG(llvm::dbgs() << "InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n"); - emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); - Info.EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() != - llvm::codegenoptions::NoDebugInfo; - OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info, - /*ForEndCall=*/false); - - LLVM_DEBUG(llvm::dbgs() << "InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n"); InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); @@ -9913,16 +9724,10 @@ void CGOpenMPRuntime::emitTargetCall( OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray, SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { - // if (OpenMPClangTargetCodegen) - emitTargetCallKernelLaunchNew(this, OutlinedFn, D, CapturedVars, + emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars, RequiresOuterTask, CS, OffloadingMandatory, Device, OutlinedFnID, InputInfo, MapTypesArray, MapNamesArray, SizeEmitter, CGF, CGM); - // else - // emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars, - // RequiresOuterTask, CS, OffloadingMandatory, - // Device, OutlinedFnID, InputInfo, MapTypesArray, - // MapNamesArray, SizeEmitter, CGF, CGM); }; auto &&TargetElseGen = @@ -9942,7 +9747,6 @@ void CGOpenMPRuntime::emitTargetCall( } else { RegionCodeGenTy ThenRCG(TargetThenGen); ThenRCG(CGF); - LLVM_DEBUG(llvm::dbgs() << "Generated code after emitTargetCall:\n" << *(OutlinedFn->getParent()) << "\n"); } } else { RegionCodeGenTy ElseRCG(TargetElseGen); diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index b9303a9414b22..522ae3d35d22d 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -122,45 +122,6 @@ struct OMPTaskDataTy final { bool IsReductionWithTaskMod = false; bool IsWorksharingReduction = false; bool HasNowaitClause = false; - void printTo(llvm::raw_ostream &os, const ASTContext &Ctx) const { - auto &&printSVHelper = - [&os, &Ctx](const SmallVector<const Expr *, 4> &V) -> void { - for (auto &v : V) { - v->dump(os, Ctx); - } - }; - auto &&printSV = - [&os, printSVHelper](std::string s, - const SmallVector<const Expr *, 4> &V) -> void { - os << s << ":[\n"; - printSVHelper(V); - os << "]\n"; - }; - // SmallVector<const Expr *, 4> PrivateVars; - // SmallVector<const Expr *, 4> PrivateCopies; - // SmallVector<const Expr *, 4> FirstprivateVars; - // SmallVector<const Expr *, 4> FirstprivateCopies; - // SmallVector<const Expr *, 4> FirstprivateInits; - // SmallVector<const Expr *, 4> LastprivateVars; - // SmallVector<const Expr *, 4> LastprivateCopies; - // SmallVector<const Expr *, 4> ReductionVars; - // SmallVector<const Expr *, 4> ReductionOrigs; - // SmallVector<const Expr *, 4> ReductionCopies; - // SmallVector<const Expr *, 4> ReductionOps; - // SmallVector<CanonicalDeclPtr<const VarDecl>, 4> PrivateLocals; - - printSV("PrivateVars", PrivateVars); - printSV("PrivateCopies", PrivateCopies); - printSV("FirstprivateVars", FirstprivateVars); - printSV("FirstprivateCopies", FirstprivateCopies); - printSV("FirstprivateInits", FirstprivateInits); - printSV("LastprivateVars", LastprivateVars); - printSV("LastprivateCopies", LastprivateCopies); - printSV("ReductionVars", ReductionVars); - printSV("ReductionOrigs", ReductionOrigs); - printSV("ReductionCopies", ReductionCopies); - printSV("ReductionOps", ReductionOps); - } }; /// Class intended to support codegen of all kind of the reduction clauses. diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index bd6743666826b..c85135978e2b3 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -35,12 +35,10 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/AtomicOrdering.h" -#include "llvm/Support/Debug.h" #include <optional> using namespace clang; using namespace CodeGen; using namespace llvm::omp; -#define DEBUG_TYPE "clang-openmp-codegen" #define TTL_CODEGEN_TYPE "target-teams-loop-codegen" @@ -5181,8 +5179,6 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( } BodyGen(CGF); }; - LLVM_DEBUG(llvm::dbgs() << "Inputs to emitTaskOutineFunction\n"); - LLVM_DEBUG(llvm::dbgs() << "PartID = " << **PartId << "\n"); llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true, Data.NumberOfParts); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 6092ab1684267..06fc7259b5901 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -441,7 +441,6 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::AssertingVH<llvm::Instruction> PostAllocaInsertPt = nullptr; public: - void printLocalDeclMap(); /// Return PostAllocaInsertPt. If it is not yet created, then insert it /// immediately after AllocaInsertPt. llvm::Instruction *getPostAllocaInsertPoint() { >From c99d13fb3bd3da60d8b7362e7135cb160917d800 Mon Sep 17 00:00:00 2001 From: Pranav Bhandarkar <pranav.bhandar...@amd.com> Date: Wed, 26 Jun 2024 11:39:07 -0500 Subject: [PATCH 04/13] Add Debug.h include in CGStmtOpenMP.cpp because removal is not related to my change --- clang/lib/CodeGen/CGStmtOpenMP.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index c85135978e2b3..f73d32de7c484 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/Debug.h" #include <optional> using namespace clang; using namespace CodeGen; >From ae97854579cf0d966c766c211f65c647c2e9fa4a Mon Sep 17 00:00:00 2001 From: Pranav Bhandarkar <pranav.bhandar...@amd.com> Date: Wed, 26 Jun 2024 14:13:36 -0500 Subject: [PATCH 05/13] Document emitOffladingArrays and emitOffloadingArraysAndArgs in OMPIRBuilder.h --- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 03573b4e02029..38d90983c2817 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -1787,24 +1787,36 @@ class OpenMPIRBuilder { /// Emit the arrays used to pass the captures and map information to the /// offloading runtime library. If there is no map or capture information, - /// return nullptr by reference. + /// return nullptr by reference. This is the first of two overloads - this + /// one accepts a reference to a MapInfosTy object that contains combined + /// information generated for mappable clauses, including base pointers, + /// pointers, sizes, map types, user-defined mappers, and non-contiguous + /// information. void emitOffloadingArrays( - InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info, - bool IsNonContiguous = false, + InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, + TargetDataInfo &Info, bool IsNonContiguous = false, function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr, function_ref<Value *(unsigned int)> CustomMapperCB = nullptr); /// Emit the arrays used to pass the captures and map information to the /// offloading runtime library. If there is no map or capture information, - /// return nullptr by reference. + /// return nullptr by reference. This is the second of two overloads - Instead + /// of accepting a reference to a MapInfosTy object, this overload accepts + /// a call back of type GenMapInfoCallbackTy to populate a MapInfosTy object + /// with mapping information. void emitOffloadingArrays( - InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, - TargetDataInfo &Info, bool IsNonContiguous = false, + InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info, + bool IsNonContiguous = false, function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr, function_ref<Value *(unsigned int)> CustomMapperCB = nullptr); - + /// Allocates memory for and populates the arrays required for offloading + /// (offload_{baseptrs|ptrs|mappers|sizes|maptypes|mapnames}). Then, it + /// emits their base addresses as arguments to be passed to the runtime + /// library. In essence, this function is a combination of + /// emitOffloadingArrays and emitOffloadingArraysArgument and should arguably + /// be preferred by clients of OpenMPIRBuilder. void emitOffloadingArraysAndArgs( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB, >From 88a47b0449cb332f7cc835214efcdaea2c3a8a9f Mon Sep 17 00:00:00 2001 From: Pranav Bhandarkar <pranav.bhandar...@amd.com> Date: Wed, 26 Jun 2024 15:29:44 -0500 Subject: [PATCH 06/13] refactor genMapInfo --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 42 +++++++++++++++++++++------ 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index b2fa50d16437e..b3493324a27a1 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -9447,14 +9447,14 @@ llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D, } return DynCGroupMem; } -static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF, - const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, - llvm::OpenMPIRBuilder &OMPBuilder, - MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) { - // Get mappable expression information. - MappableExprsHandler MEHandler(D, CGF); - llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; - llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; +static void genMapInfoForCaptures( + MappableExprsHandler &MEHandler, CodeGenFunction &CGF, + const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, + llvm::OpenMPIRBuilder &OMPBuilder, + llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, + llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet, + MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) { + CodeGenModule &CGM = CGF.CGM; auto RI = CS.getCapturedRecordDecl()->field_begin(); auto *CV = CapturedVars.begin(); @@ -9522,9 +9522,18 @@ static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF, MEHandler.adjustMemberOfForLambdaCaptures( OMPBuilder, LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, CombinedInfo.Types); +} +static void +genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, + MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, + llvm::OpenMPIRBuilder &OMPBuilder, + const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet = + llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) { + + CodeGenModule &CGM = CGF.CGM; // Map any list items in a map clause that were not captures because they // weren't referenced within the construct. - MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet); + MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkippedVarSet); auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { return emitMappingInformation(CGF, OMPBuilder, MapExpr); @@ -9536,6 +9545,21 @@ static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF, FillInfoMap); } } + +static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF, + const CapturedStmt &CS, + llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, + llvm::OpenMPIRBuilder &OMPBuilder, + MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) { + // Get mappable expression information. + MappableExprsHandler MEHandler(D, CGF); + llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; + llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; + + genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder, + LambdaPointers, MappedVarSet, CombinedInfo); + genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet); +} static void emitTargetCallKernelLaunch( CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, >From e97cd161933d07a08aad52e37b506ae27be27560 Mon Sep 17 00:00:00 2001 From: Pranav Bhandarkar <pranav.bhandar...@amd.com> Date: Thu, 27 Jun 2024 13:51:34 -0500 Subject: [PATCH 07/13] Use CGOpenMPRuntime::emitTargetDataStandAloneCall --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 44 +++++++++++++++++++++------ 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index b3493324a27a1..5372bbbbc2da1 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -10500,21 +10500,45 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( PrePostActionTy &) { // Fill up the arrays with all the mapped variables. MappableExprsHandler::MapCombinedInfoTy CombinedInfo; + CGOpenMPRuntime::TargetDataInfo Info; // Get map clause information. - MappableExprsHandler MEHandler(D, CGF); - MEHandler.generateAllInfo(CombinedInfo, OMPBuilder); + auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) + -> llvm::OpenMPIRBuilder::MapInfosTy & { + CGF.Builder.restoreIP(CodeGenIP); + MappableExprsHandler MEHandler(D, CGF); + genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder); + return CombinedInfo; + }; - CGOpenMPRuntime::TargetDataInfo Info; - // Fill up the arrays and create the arguments. - emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, - /*IsNonContiguous=*/true); + auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) { + if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) { + Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl); + } + }; + + auto CustomMapperCB = [&](unsigned int I) { + llvm::Value *MFunc = nullptr; + if (CombinedInfo.Mappers[I]) { + Info.HasMapper = true; + MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( + cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); + } + return MFunc; + }; + + // Fill up the basepointers, pointers and mapper arrays and create the + // arguments. + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + InsertPointTy OffloadingArraysAllocaIP(CGF.AllocaInsertPt->getParent(), + CGF.AllocaInsertPt->getIterator()); + + OMPBuilder.emitOffloadingArraysAndArgs( + OffloadingArraysAllocaIP, CGF.Builder.saveIP(), Info, Info.RTArgs, + GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false, + DeviceAddrCB, CustomMapperCB); bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || D.hasClausesOfKind<OMPNowaitClause>(); - Info.EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() != - llvm::codegenoptions::NoDebugInfo; - OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info, - /*ForEndCall=*/false); InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); >From 200bd07a9384242cd7999442860bc90e2ba9b69b Mon Sep 17 00:00:00 2001 From: Pranav Bhandarkar <pranav.bhandar...@amd.com> Date: Fri, 28 Jun 2024 10:49:35 -0500 Subject: [PATCH 08/13] Use static function emitOffloadingArraysAndArgs in emitTargetCallKernelLaunch in CGOpenMPRuntime.cpp --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 67 ++++++++++--------- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 13 ++++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 9 +++ 3 files changed, 59 insertions(+), 30 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 5372bbbbc2da1..c0e9eb3b6a07e 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -8884,6 +8884,40 @@ static void emitOffloadingArrays( /*IsNonContiguous=*/true, DeviceAddrCB, CustomMapperCB); } +/// Emit the arrays used to pass the captures and map information to the +/// offloading runtime library. If there is no map or capture information, +/// return nullptr by reference. +static void emitOffloadingArraysAndArgs( + CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, + CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, + bool IsNonContiguous = false, bool ForEndCall = false) { + CodeGenModule &CGM = CGF.CGM; + + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(), + CGF.AllocaInsertPt->getIterator()); + InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(), + CGF.Builder.GetInsertPoint()); + + auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) { + if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) { + Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl); + } + }; + + auto CustomMapperCB = [&](unsigned int I) { + llvm::Value *MFunc = nullptr; + if (CombinedInfo.Mappers[I]) { + Info.HasMapper = true; + MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( + cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); + } + return MFunc; + }; + OMPBuilder.emitOffloadingArraysAndArgs( + AllocaIP, CodeGenIP, Info, Info.RTArgs, CombinedInfo, IsNonContiguous, + ForEndCall, DeviceAddrCB, CustomMapperCB); +} /// Check for inner distribute directive. static const OMPExecutableDirective * @@ -9577,37 +9611,10 @@ static void emitTargetCallKernelLaunch( // Fill up the arrays with all the captured variables. MappableExprsHandler::MapCombinedInfoTy CombinedInfo; CGOpenMPRuntime::TargetDataInfo Info; + genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo); - auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) - -> llvm::OpenMPIRBuilder::MapInfosTy & { - CGF.Builder.restoreIP(CodeGenIP); - genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo); - return CombinedInfo; - }; - auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) { - if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) { - Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl); - } - }; - - auto CustomMapperCB = [&](unsigned int I) { - llvm::Value *MFunc = nullptr; - if (CombinedInfo.Mappers[I]) { - Info.HasMapper = true; - MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( - cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); - } - return MFunc; - }; - // Fill up the basepointers, pointers and mapper arrays and create the - // arguments. - llvm::OpenMPIRBuilder::InsertPointTy OffloadingArraysAllocaIP( - CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator()); - - OMPBuilder.emitOffloadingArraysAndArgs( - OffloadingArraysAllocaIP, CGF.Builder.saveIP(), Info, Info.RTArgs, - GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false, - DeviceAddrCB, CustomMapperCB); + emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder, + /*IsNonContiguous=*/true, /*ForEndCall=*/false); InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 38d90983c2817..2cb3da09a97c1 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -1824,6 +1824,19 @@ class OpenMPIRBuilder { function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr, function_ref<Value *(unsigned int)> CustomMapperCB = nullptr); + /// Allocates memory for and populates the arrays required for offloading + /// (offload_{baseptrs|ptrs|mappers|sizes|maptypes|mapnames}). Then, it + /// emits their base addresses as arguments to be passed to the runtime + /// library. In essence, this function is a combination of + /// emitOffloadingArrays and emitOffloadingArraysArgument and should arguably + /// be preferred by clients of OpenMPIRBuilder. + void emitOffloadingArraysAndArgs( + InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, + TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, + bool IsNonContiguous = false, bool ForEndCall = false, + function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr, + function_ref<Value *(unsigned int)> CustomMapperCB = nullptr); + /// Creates offloading entry for the provided entry ID \a ID, address \a /// Addr, size \a Size, and flags \a Flags. void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 8d6e6a354a1d8..abf21da0fa7d4 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -5243,6 +5243,15 @@ void OpenMPIRBuilder::emitOffloadingArraysAndArgs( DeviceAddrCB, CustomMapperCB); emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall); } + void OpenMPIRBuilder::emitOffloadingArraysAndArgs( + InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, + TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, bool IsNonContiguous, + bool ForEndCall, function_ref<void(unsigned int, Value *)> DeviceAddrCB, + function_ref<Value *(unsigned int)> CustomMapperCB) { + emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info, + IsNonContiguous, DeviceAddrCB, CustomMapperCB); + emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall); + } static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, >From 879cfa1cf2714a0bfa0e42152634ec841b94c3ce Mon Sep 17 00:00:00 2001 From: Pranav Bhandarkar <pranav.bhandar...@amd.com> Date: Fri, 28 Jun 2024 11:27:58 -0500 Subject: [PATCH 09/13] Use static function emitOffloadingArraysAndArgs in emitTargetDataStandaloneCall in CGOpenMPRuntime.cpp --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 40 ++++----------------------- 1 file changed, 5 insertions(+), 35 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index c0e9eb3b6a07e..14590146ceb51 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -10508,44 +10508,14 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( // Fill up the arrays with all the mapped variables. MappableExprsHandler::MapCombinedInfoTy CombinedInfo; CGOpenMPRuntime::TargetDataInfo Info; + MappableExprsHandler MEHandler(D, CGF); + genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder); + emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder, + /*IsNonContiguous=*/true, /*ForEndCall=*/false); - // Get map clause information. - auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) - -> llvm::OpenMPIRBuilder::MapInfosTy & { - CGF.Builder.restoreIP(CodeGenIP); - MappableExprsHandler MEHandler(D, CGF); - genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder); - return CombinedInfo; - }; - - auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) { - if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) { - Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl); - } - }; - - auto CustomMapperCB = [&](unsigned int I) { - llvm::Value *MFunc = nullptr; - if (CombinedInfo.Mappers[I]) { - Info.HasMapper = true; - MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( - cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); - } - return MFunc; - }; - - // Fill up the basepointers, pointers and mapper arrays and create the - // arguments. - using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; - InsertPointTy OffloadingArraysAllocaIP(CGF.AllocaInsertPt->getParent(), - CGF.AllocaInsertPt->getIterator()); - - OMPBuilder.emitOffloadingArraysAndArgs( - OffloadingArraysAllocaIP, CGF.Builder.saveIP(), Info, Info.RTArgs, - GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false, - DeviceAddrCB, CustomMapperCB); bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || D.hasClausesOfKind<OMPNowaitClause>(); + InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); >From 178be4f9b97226523d43f7ae9e11a438348774fc Mon Sep 17 00:00:00 2001 From: Pranav Bhandarkar <pranav.bhandar...@amd.com> Date: Fri, 28 Jun 2024 11:30:10 -0500 Subject: [PATCH 10/13] Remove emitOffloadingArrays from CGOpenMPRuntime.cpp because it is not used anymore. Use emitOffloadingArraysAndArgs --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 49 --------------------------- 1 file changed, 49 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 14590146ceb51..9fbc06e89f017 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -8835,55 +8835,6 @@ emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, PLoc.getLine(), PLoc.getColumn(), SrcLocStrSize); } - -/// Emit the arrays used to pass the captures and map information to the -/// offloading runtime library. If there is no map or capture information, -/// return nullptr by reference. -static void emitOffloadingArrays( - CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, - CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, - bool IsNonContiguous = false) { - CodeGenModule &CGM = CGF.CGM; - - // Reset the array information. - Info.clearArrayInfo(); - Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); - - using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; - InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(), - CGF.AllocaInsertPt->getIterator()); - InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(), - CGF.Builder.GetInsertPoint()); - - auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { - return emitMappingInformation(CGF, OMPBuilder, MapExpr); - }; - if (CGM.getCodeGenOpts().getDebugInfo() != - llvm::codegenoptions::NoDebugInfo) { - CombinedInfo.Names.resize(CombinedInfo.Exprs.size()); - llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(), - FillInfoMap); - } - - auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) { - if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) { - Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl); - } - }; - - auto CustomMapperCB = [&](unsigned int I) { - llvm::Value *MFunc = nullptr; - if (CombinedInfo.Mappers[I]) { - Info.HasMapper = true; - MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( - cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); - } - return MFunc; - }; - OMPBuilder.emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info, - /*IsNonContiguous=*/true, DeviceAddrCB, - CustomMapperCB); -} /// Emit the arrays used to pass the captures and map information to the /// offloading runtime library. If there is no map or capture information, /// return nullptr by reference. >From d21f7f6f0ba9063260a08bc9d770bb0f1e0761bf Mon Sep 17 00:00:00 2001 From: Pranav Bhandarkar <pranav.bhandar...@amd.com> Date: Fri, 28 Jun 2024 12:05:02 -0500 Subject: [PATCH 11/13] Remove overloads of emitOffloadingArrays and emitOffloadingArraysAndArgs that accept GenMapInfoCallBackTy --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 1 - .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 34 ++----------------- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 32 ++++------------- 3 files changed, 9 insertions(+), 58 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 9fbc06e89f017..262bbd988e1e3 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -9440,7 +9440,6 @@ static void genMapInfoForCaptures( llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) { - CodeGenModule &CGM = CGF.CGM; auto RI = CS.getCapturedRecordDecl()->field_begin(); auto *CV = CapturedVars.begin(); for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 2cb3da09a97c1..a0b54e25124d7 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -1787,43 +1787,15 @@ class OpenMPIRBuilder { /// Emit the arrays used to pass the captures and map information to the /// offloading runtime library. If there is no map or capture information, - /// return nullptr by reference. This is the first of two overloads - this - /// one accepts a reference to a MapInfosTy object that contains combined - /// information generated for mappable clauses, including base pointers, - /// pointers, sizes, map types, user-defined mappers, and non-contiguous - /// information. + /// return nullptr by reference. Accepts a reference to a MapInfosTy object + /// that contains information generated for mappable clauses, + /// including base pointers, pointers, sizes, map types, user-defined mappers. void emitOffloadingArrays( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info, bool IsNonContiguous = false, function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr, function_ref<Value *(unsigned int)> CustomMapperCB = nullptr); - /// Emit the arrays used to pass the captures and map information to the - /// offloading runtime library. If there is no map or capture information, - /// return nullptr by reference. This is the second of two overloads - Instead - /// of accepting a reference to a MapInfosTy object, this overload accepts - /// a call back of type GenMapInfoCallbackTy to populate a MapInfosTy object - /// with mapping information. - void emitOffloadingArrays( - InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info, - bool IsNonContiguous = false, - function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr, - function_ref<Value *(unsigned int)> CustomMapperCB = nullptr); - - /// Allocates memory for and populates the arrays required for offloading - /// (offload_{baseptrs|ptrs|mappers|sizes|maptypes|mapnames}). Then, it - /// emits their base addresses as arguments to be passed to the runtime - /// library. In essence, this function is a combination of - /// emitOffloadingArrays and emitOffloadingArraysArgument and should arguably - /// be preferred by clients of OpenMPIRBuilder. - void emitOffloadingArraysAndArgs( - InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, - TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB, - bool IsNonContiguous = false, bool ForEndCall = false, - function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr, - function_ref<Value *(unsigned int)> CustomMapperCB = nullptr); - /// Allocates memory for and populates the arrays required for offloading /// (offload_{baseptrs|ptrs|mappers|sizes|maptypes|mapnames}). Then, it /// emits their base addresses as arguments to be passed to the runtime diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index abf21da0fa7d4..df48eb430e097 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -5235,23 +5235,13 @@ static void emitTargetOutlinedFunction( void OpenMPIRBuilder::emitOffloadingArraysAndArgs( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, - TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB, - bool IsNonContiguous, bool ForEndCall, - function_ref<void(unsigned int, Value *)> DeviceAddrCB, + TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, bool IsNonContiguous, + bool ForEndCall, function_ref<void(unsigned int, Value *)> DeviceAddrCB, function_ref<Value *(unsigned int)> CustomMapperCB) { - emitOffloadingArrays(AllocaIP, CodeGenIP, GenMapInfoCB, Info, IsNonContiguous, + emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info, IsNonContiguous, DeviceAddrCB, CustomMapperCB); emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall); - } - void OpenMPIRBuilder::emitOffloadingArraysAndArgs( - InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, - TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, bool IsNonContiguous, - bool ForEndCall, function_ref<void(unsigned int, Value *)> DeviceAddrCB, - function_ref<Value *(unsigned int)> CustomMapperCB) { - emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info, - IsNonContiguous, DeviceAddrCB, CustomMapperCB); - emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall); - } +} static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, @@ -5264,9 +5254,10 @@ void OpenMPIRBuilder::emitOffloadingArraysAndArgs( /*RequiresDevicePointerInfo=*/false, /*SeparateBeginEndCalls=*/true); + OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP()); OpenMPIRBuilder::TargetDataRTArgs RTArgs; OMPBuilder.emitOffloadingArraysAndArgs(AllocaIP, Builder.saveIP(), Info, - RTArgs, GenMapInfoCB, + RTArgs, MapInfo, /*IsNonContiguous=*/true, /*ForEndCall=*/false); @@ -5578,17 +5569,6 @@ void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP, } } -void OpenMPIRBuilder::emitOffloadingArrays( - InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info, - bool IsNonContiguous, - function_ref<void(unsigned int, Value *)> DeviceAddrCB, - function_ref<Value *(unsigned int)> CustomMapperCB) { - - OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(CodeGenIP); - emitOffloadingArrays(AllocaIP, CodeGenIP, MapInfo, - Info, IsNonContiguous, DeviceAddrCB, CustomMapperCB); -} void OpenMPIRBuilder::emitOffloadingArrays( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info, bool IsNonContiguous, >From bff9d7b276df201baa48b4739dabfa9329c71dd0 Mon Sep 17 00:00:00 2001 From: Pranav Bhandarkar <pranav.bhandar...@amd.com> Date: Fri, 28 Jun 2024 12:16:41 -0500 Subject: [PATCH 12/13] Undo an unnecessary change in the location of the declaration of GenMapInfoCallBackTy in OMPIRBuilder.h --- llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index a0b54e25124d7..db748611ac501 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -1779,12 +1779,6 @@ class OpenMPIRBuilder { MapInfosTy &CombinedInfo, TargetDataInfo &Info); - /// Callback type for creating the map infos for the kernel parameters. - /// \param CodeGenIP is the insertion point where code should be generated, - /// if any. - using GenMapInfoCallbackTy = - function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>; - /// Emit the arrays used to pass the captures and map information to the /// offloading runtime library. If there is no map or capture information, /// return nullptr by reference. Accepts a reference to a MapInfosTy object @@ -2212,6 +2206,11 @@ class OpenMPIRBuilder { /// duplicating the body code. enum BodyGenTy { Priv, DupNoPriv, NoPriv }; + /// Callback type for creating the map infos for the kernel parameters. + /// \param CodeGenIP is the insertion point where code should be generated, + /// if any. + using GenMapInfoCallbackTy = + function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>; /// Generator for '#omp target data' /// >From 9d5c42b905b308b84c54dea81518bc84aa1d79b4 Mon Sep 17 00:00:00 2001 From: Pranav Bhandarkar <pranav.bhandar...@amd.com> Date: Mon, 22 Jul 2024 13:23:00 -0500 Subject: [PATCH 13/13] Address review comments --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 262bbd988e1e3..4ab9195bded2a 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -9436,10 +9436,10 @@ static void genMapInfoForCaptures( MappableExprsHandler &MEHandler, CodeGenFunction &CGF, const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, llvm::OpenMPIRBuilder &OMPBuilder, - llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) { + llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; auto RI = CS.getCapturedRecordDecl()->field_begin(); auto *CV = CapturedVars.begin(); for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), @@ -9537,11 +9537,10 @@ static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) { // Get mappable expression information. MappableExprsHandler MEHandler(D, CGF); - llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder, - LambdaPointers, MappedVarSet, CombinedInfo); + MappedVarSet, CombinedInfo); genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet); } static void emitTargetCallKernelLaunch( _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits