https://github.com/bhandarkar-pranav updated https://github.com/llvm/llvm-project/pull/97088
>From ad6ef960b14c23bde1460a0977b6401dc21dfea4 Mon Sep 17 00:00:00 2001 From: Pranav Bhandarkar <pranav.bhandar...@amd.com> Date: Sat, 15 Jun 2024 02:00:48 -0500 Subject: [PATCH 01/13] checkpoint commit. Use emitOffloadinArrays from OMPIRBuilder in emitTargetCallKernelLaunch in CGOpenMPRuntime.cpp --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 241 +++++++++++++++++- clang/lib/CodeGen/CGStmtOpenMP.cpp | 1 + .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 22 +- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 10 + 4 files changed, 265 insertions(+), 9 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index a6a87ec88ee8a..4c95aab3c33c1 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -40,6 +40,7 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Value.h" #include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include <cassert> @@ -47,6 +48,8 @@ #include <numeric> #include <optional> +#define DEBUG_TYPE "clang-openmp-codegen" + using namespace clang; using namespace CodeGen; using namespace llvm::omp; @@ -8868,9 +8871,11 @@ emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, } PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); - return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, + auto *Str = OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, PLoc.getLine(), PLoc.getColumn(), SrcLocStrSize); + LLVM_DEBUG(llvm::dbgs() << "Output of emitMappingInfo: " << *Str << "\n"); + return Str; } /// Emit the arrays used to pass the captures and map information to the @@ -9484,8 +9489,96 @@ llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D, } return DynCGroupMem; } +static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF, + const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, + llvm::OpenMPIRBuilder &OMPBuilder, + MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) { + // Get mappable expression information. + MappableExprsHandler MEHandler(D, CGF); + llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; + llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; + CodeGenModule &CGM = CGF.CGM; + auto RI = CS.getCapturedRecordDecl()->field_begin(); + auto *CV = CapturedVars.begin(); + for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), + CE = CS.capture_end(); + CI != CE; ++CI, ++RI, ++CV) { + MappableExprsHandler::MapCombinedInfoTy CurInfo; + MappableExprsHandler::StructRangeInfoTy PartialStruct; -static void emitTargetCallKernelLaunch( + // VLA sizes are passed to the outlined region by copy and do not have map + // information associated. + if (CI->capturesVariableArrayType()) { + CurInfo.Exprs.push_back(nullptr); + CurInfo.BasePointers.push_back(*CV); + CurInfo.DevicePtrDecls.push_back(nullptr); + CurInfo.DevicePointers.push_back( + MappableExprsHandler::DeviceInfoTy::None); + CurInfo.Pointers.push_back(*CV); + CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( + CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); + // Copy to the device as an argument. No need to retrieve it. + CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | + OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM | + OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); + CurInfo.Mappers.push_back(nullptr); + } else { + // If we have any information in the map clause, we use it, otherwise we + // just do a default mapping. + MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); + if (!CI->capturesThis()) + MappedVarSet.insert(CI->getCapturedVar()); + else + MappedVarSet.insert(nullptr); + if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) + MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); + // Generate correct mapping for variables captured by reference in + // lambdas. + if (CI->capturesVariable()) + MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, + CurInfo, LambdaPointers); + } + // We expect to have at least an element of information for this capture. + assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && + "Non-existing map pointer for capture!"); + assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && + CurInfo.BasePointers.size() == CurInfo.Sizes.size() && + CurInfo.BasePointers.size() == CurInfo.Types.size() && + CurInfo.BasePointers.size() == CurInfo.Mappers.size() && + "Inconsistent map information sizes!"); + + // If there is an entry in PartialStruct it means we have a struct with + // individual members mapped. Emit an extra combined entry. + if (PartialStruct.Base.isValid()) { + CombinedInfo.append(PartialStruct.PreliminaryMapData); + MEHandler.emitCombinedEntry( + CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(), + OMPBuilder, nullptr, + !PartialStruct.PreliminaryMapData.BasePointers.empty()); + } + + // We need to append the results of this capture to what we already have. + CombinedInfo.append(CurInfo); + } + // Adjust MEMBER_OF flags for the lambdas captures. + MEHandler.adjustMemberOfForLambdaCaptures( + OMPBuilder, LambdaPointers, CombinedInfo.BasePointers, + CombinedInfo.Pointers, CombinedInfo.Types); + // Map any list items in a map clause that were not captures because they + // weren't referenced within the construct. + MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet); + + auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { + return emitMappingInformation(CGF, OMPBuilder, MapExpr); + }; + if (CGM.getCodeGenOpts().getDebugInfo() != + llvm::codegenoptions::NoDebugInfo) { + CombinedInfo.Names.resize(CombinedInfo.Exprs.size()); + llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(), + FillInfoMap); + } +} +static void emitTargetCallKernelLaunchNew( CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask, @@ -9501,8 +9594,139 @@ static void emitTargetCallKernelLaunch( // Fill up the arrays with all the captured variables. MappableExprsHandler::MapCombinedInfoTy CombinedInfo; + CGOpenMPRuntime::TargetDataInfo Info; - // Get mappable expression information. + auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) + -> llvm::OpenMPIRBuilder::MapInfosTy & { + CGF.Builder.restoreIP(CodeGenIP); + genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo); + return CombinedInfo; + }; + auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) { + if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) { + Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl); + } + }; + + auto CustomMapperCB = [&](unsigned int I) { + llvm::Value *MFunc = nullptr; + if (CombinedInfo.Mappers[I]) { + Info.HasMapper = true; + MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( + cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); + } + return MFunc; + }; + // Fill up the arrays and create the arguments. + LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n"); + OMPBuilder.emitOffloadingArrays(llvm::OpenMPIRBuilder::InsertPointTy(CGF.AllocaInsertPt->getParent(), + CGF.AllocaInsertPt->getIterator()), + CGF.Builder.saveIP(), Info, + GenMapInfoCB, /*IsNonContiguous=*/true, + DeviceAddrCB, CustomMapperCB); + bool EmitDebug = !CombinedInfo.Names.empty(); + OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info, + EmitDebug, + /*ForEndCall=*/false); + + LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n"); + InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; + InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, + CGF.VoidPtrTy, CGM.getPointerAlign()); + InputInfo.PointersArray = + Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); + InputInfo.SizesArray = + Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); + InputInfo.MappersArray = + Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); + MapTypesArray = Info.RTArgs.MapTypesArray; + MapNamesArray = Info.RTArgs.MapNamesArray; + + auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars, + RequiresOuterTask, &CS, OffloadingMandatory, Device, + OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray, + SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { + bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor; + + if (IsReverseOffloading) { + // Reverse offloading is not supported, so just execute on the host. + // FIXME: This fallback solution is incorrect since it ignores the + // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to + // assert here and ensure SEMA emits an error. + emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars, + RequiresOuterTask, CS, OffloadingMandatory, CGF); + return; + } + + bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>(); + unsigned NumTargetItems = InputInfo.NumberOfTargetItems; + + llvm::Value *BasePointersArray = + InputInfo.BasePointersArray.emitRawPointer(CGF); + llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF); + llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF); + llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF); + + auto &&EmitTargetCallFallbackCB = + [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS, + OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) + -> llvm::OpenMPIRBuilder::InsertPointTy { + CGF.Builder.restoreIP(IP); + emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars, + RequiresOuterTask, CS, OffloadingMandatory, CGF); + return CGF.Builder.saveIP(); + }; + + llvm::Value *DeviceID = emitDeviceID(Device, CGF); + llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D); + llvm::Value *NumThreads = + OMPRuntime->emitNumThreadsForTargetDirective(CGF, D); + llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc()); + llvm::Value *NumIterations = + OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter); + llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF); + llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( + CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator()); + + llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs( + BasePointersArray, PointersArray, SizesArray, MapTypesArray, + nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray); + + llvm::OpenMPIRBuilder::TargetKernelArgs Args( + NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads, + DynCGGroupMem, HasNoWait); + + CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch( + CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args, + DeviceID, RTLoc, AllocaIP)); + }; + + if (RequiresOuterTask) { + if (NewClangTargetTaskCodeGen) { + llvm::errs() << "Using OMPIRBuilder for target task codegen\n"; + } else { + CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); + } + } else + OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); +} +static void emitTargetCallKernelLaunch( + CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, + const OMPExecutableDirective &D, + llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask, + const CapturedStmt &CS, bool OffloadingMandatory, + llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, + llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo, + llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray, + llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + const OMPLoopDirective &D)> + SizeEmitter, + CodeGenFunction &CGF, CodeGenModule &CGM) { + llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder(); + + // Fill up the arrays with all the captured variables. + MappableExprsHandler::MapCombinedInfoTy CombinedInfo; +// Get mappable expression information. MappableExprsHandler MEHandler(D, CGF); llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; @@ -9579,6 +9803,7 @@ static void emitTargetCallKernelLaunch( CGOpenMPRuntime::TargetDataInfo Info; // Fill up the arrays and create the arguments. + LLVM_DEBUG(llvm::dbgs() << "InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n"); emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() != llvm::codegenoptions::NoDebugInfo; @@ -9586,6 +9811,7 @@ static void emitTargetCallKernelLaunch( EmitDebug, /*ForEndCall=*/false); + LLVM_DEBUG(llvm::dbgs() << "InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n"); InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); @@ -9725,7 +9951,13 @@ void CGOpenMPRuntime::emitTargetCall( OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray, SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { - emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars, + if (OpenMPClangTargetCodegen) + emitTargetCallKernelLaunchNew(this, OutlinedFn, D, CapturedVars, + RequiresOuterTask, CS, OffloadingMandatory, + Device, OutlinedFnID, InputInfo, MapTypesArray, + MapNamesArray, SizeEmitter, CGF, CGM); + else + emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars, RequiresOuterTask, CS, OffloadingMandatory, Device, OutlinedFnID, InputInfo, MapTypesArray, MapNamesArray, SizeEmitter, CGF, CGM); @@ -9748,6 +9980,7 @@ void CGOpenMPRuntime::emitTargetCall( } else { RegionCodeGenTy ThenRCG(TargetThenGen); ThenRCG(CGF); + LLVM_DEBUG(llvm::dbgs() << "Generated code after emitTargetCall:\n" << *(OutlinedFn->getParent()) << "\n"); } } else { RegionCodeGenTy ElseRCG(TargetElseGen); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 853046bf43495..379c7f16db9c5 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -40,6 +40,7 @@ using namespace clang; using namespace CodeGen; using namespace llvm::omp; +#define DEBUG_TYPE "clang-openmp-codegen" #define TTL_CODEGEN_TYPE "target-teams-loop-codegen" diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index a6995888de7d4..f4449f3c0a44f 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -2358,6 +2358,22 @@ class OpenMPIRBuilder { MapInfosTy &CombinedInfo, TargetDataInfo &Info); + /// Callback type for creating the map infos for the kernel parameters. + /// \param CodeGenIP is the insertion point where code should be generated, + /// if any. + using GenMapInfoCallbackTy = + function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>; + + /// Emit the arrays used to pass the captures and map information to the + /// offloading runtime library. If there is no map or capture information, + /// return nullptr by reference. + void emitOffloadingArrays( + InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, + bool IsNonContiguous = false, + function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr, + function_ref<Value *(unsigned int)> CustomMapperCB = nullptr); + /// Emit the arrays used to pass the captures and map information to the /// offloading runtime library. If there is no map or capture information, /// return nullptr by reference. @@ -2367,6 +2383,7 @@ class OpenMPIRBuilder { function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr, function_ref<Value *(unsigned int)> CustomMapperCB = nullptr); + /// Creates offloading entry for the provided entry ID \a ID, address \a /// Addr, size \a Size, and flags \a Flags. void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, @@ -2770,11 +2787,6 @@ class OpenMPIRBuilder { /// duplicating the body code. enum BodyGenTy { Priv, DupNoPriv, NoPriv }; - /// Callback type for creating the map infos for the kernel parameters. - /// \param CodeGenIP is the insertion point where code should be generated, - /// if any. - using GenMapInfoCallbackTy = - function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>; /// Generator for '#omp target data' /// diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 98da6e2efcb5c..7b9e585d58664 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -7403,6 +7403,16 @@ void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP, } } +void OpenMPIRBuilder::emitOffloadingArrays( + InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, + GenMapInfoCallbackTy GenMapInfoCB, bool IsNonContiguous, + function_ref<void(unsigned int, Value *)> DeviceAddrCB, + function_ref<Value *(unsigned int)> CustomMapperCB) { + + OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(CodeGenIP); + emitOffloadingArrays(AllocaIP, CodeGenIP, MapInfo, + Info, IsNonContiguous, DeviceAddrCB, CustomMapperCB); +} void OpenMPIRBuilder::emitOffloadingArrays( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info, bool IsNonContiguous, >From 449d1b46690659950f4d3c164ccc5fe5acd0128c Mon Sep 17 00:00:00 2001 From: Pranav Bhandarkar <pranav.bhandar...@amd.com> Date: Tue, 25 Jun 2024 16:07:37 -0500 Subject: [PATCH 02/13] emitOffloadingArraysArgument and some other prints --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 50 +++++++++---------- clang/lib/CodeGen/CGOpenMPRuntime.h | 39 +++++++++++++++ clang/lib/CodeGen/CGStmtOpenMP.cpp | 2 + clang/lib/CodeGen/CodeGenFunction.h | 1 + .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 12 ++++- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 39 +++++++++------ .../Frontend/OpenMPIRBuilderTest.cpp | 4 +- 7 files changed, 102 insertions(+), 45 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 4c95aab3c33c1..9495a122cbe34 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -3013,6 +3013,10 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); llvm::FunctionType *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo); + LLVM_DEBUG(llvm::dbgs() << "TaskEntryTy=" << *TaskEntryTy << "\n"); + LLVM_DEBUG(llvm::dbgs() << "KmpTaskTWithPrivatesPtrQTy = " + << KmpTaskTWithPrivatesPtrQTy << "\n"); + LLVM_DEBUG(llvm::dbgs() << "TaskTypeArg = " << TaskTypeArg << "\n"); std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); auto *TaskEntry = llvm::Function::Create( TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); @@ -3717,6 +3721,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap); + LLVM_DEBUG(llvm::dbgs() << "ProxyTaskFunction is " << *TaskEntry); // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, // kmp_routine_entry_t *task_entry); @@ -9619,15 +9624,14 @@ static void emitTargetCallKernelLaunchNew( }; // Fill up the arrays and create the arguments. LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n"); - OMPBuilder.emitOffloadingArrays(llvm::OpenMPIRBuilder::InsertPointTy(CGF.AllocaInsertPt->getParent(), - CGF.AllocaInsertPt->getIterator()), - CGF.Builder.saveIP(), Info, - GenMapInfoCB, /*IsNonContiguous=*/true, - DeviceAddrCB, CustomMapperCB); - bool EmitDebug = !CombinedInfo.Names.empty(); - OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info, - EmitDebug, - /*ForEndCall=*/false); + + llvm::OpenMPIRBuilder::InsertPointTy OffloadingArraysAllocaIP( + CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator()); + + OMPBuilder.emitOffloadingArraysAndArgs( + OffloadingArraysAllocaIP, CGF.Builder.saveIP(), Info, Info.RTArgs, + GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false, + DeviceAddrCB, CustomMapperCB); LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n"); InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; @@ -9701,13 +9705,9 @@ static void emitTargetCallKernelLaunchNew( DeviceID, RTLoc, AllocaIP)); }; - if (RequiresOuterTask) { - if (NewClangTargetTaskCodeGen) { - llvm::errs() << "Using OMPIRBuilder for target task codegen\n"; - } else { - CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); - } - } else + if (RequiresOuterTask) + CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); + else OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); } static void emitTargetCallKernelLaunch( @@ -9805,10 +9805,9 @@ static void emitTargetCallKernelLaunch( // Fill up the arrays and create the arguments. LLVM_DEBUG(llvm::dbgs() << "InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n"); emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); - bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() != + Info.EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() != llvm::codegenoptions::NoDebugInfo; OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info, - EmitDebug, /*ForEndCall=*/false); LLVM_DEBUG(llvm::dbgs() << "InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n"); @@ -9951,16 +9950,16 @@ void CGOpenMPRuntime::emitTargetCall( OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray, SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { - if (OpenMPClangTargetCodegen) + // if (OpenMPClangTargetCodegen) emitTargetCallKernelLaunchNew(this, OutlinedFn, D, CapturedVars, RequiresOuterTask, CS, OffloadingMandatory, Device, OutlinedFnID, InputInfo, MapTypesArray, MapNamesArray, SizeEmitter, CGF, CGM); - else - emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars, - RequiresOuterTask, CS, OffloadingMandatory, - Device, OutlinedFnID, InputInfo, MapTypesArray, - MapNamesArray, SizeEmitter, CGF, CGM); + // else + // emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars, + // RequiresOuterTask, CS, OffloadingMandatory, + // Device, OutlinedFnID, InputInfo, MapTypesArray, + // MapNamesArray, SizeEmitter, CGF, CGM); }; auto &&TargetElseGen = @@ -10723,10 +10722,9 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( /*IsNonContiguous=*/true); bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || D.hasClausesOfKind<OMPNowaitClause>(); - bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() != + Info.EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() != llvm::codegenoptions::NoDebugInfo; OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info, - EmitDebug, /*ForEndCall=*/false); InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index f65314d014c08..f6e3677232f07 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -122,6 +122,45 @@ struct OMPTaskDataTy final { bool IsReductionWithTaskMod = false; bool IsWorksharingReduction = false; bool HasNowaitClause = false; + void printTo(llvm::raw_ostream &os, const ASTContext &Ctx) const { + auto &&printSVHelper = + [&os, &Ctx](const SmallVector<const Expr *, 4> &V) -> void { + for (auto &v : V) { + v->dump(os, Ctx); + } + }; + auto &&printSV = + [&os, printSVHelper](std::string s, + const SmallVector<const Expr *, 4> &V) -> void { + os << s << ":[\n"; + printSVHelper(V); + os << "]\n"; + }; + // SmallVector<const Expr *, 4> PrivateVars; + // SmallVector<const Expr *, 4> PrivateCopies; + // SmallVector<const Expr *, 4> FirstprivateVars; + // SmallVector<const Expr *, 4> FirstprivateCopies; + // SmallVector<const Expr *, 4> FirstprivateInits; + // SmallVector<const Expr *, 4> LastprivateVars; + // SmallVector<const Expr *, 4> LastprivateCopies; + // SmallVector<const Expr *, 4> ReductionVars; + // SmallVector<const Expr *, 4> ReductionOrigs; + // SmallVector<const Expr *, 4> ReductionCopies; + // SmallVector<const Expr *, 4> ReductionOps; + // SmallVector<CanonicalDeclPtr<const VarDecl>, 4> PrivateLocals; + + printSV("PrivateVars", PrivateVars); + printSV("PrivateCopies", PrivateCopies); + printSV("FirstprivateVars", FirstprivateVars); + printSV("FirstprivateCopies", FirstprivateCopies); + printSV("FirstprivateInits", FirstprivateInits); + printSV("LastprivateVars", LastprivateVars); + printSV("LastprivateCopies", LastprivateCopies); + printSV("ReductionVars", ReductionVars); + printSV("ReductionOrigs", ReductionOrigs); + printSV("ReductionCopies", ReductionCopies); + printSV("ReductionOps", ReductionOps); + } }; /// Class intended to support codegen of all kind of the reduction clauses. diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 379c7f16db9c5..0a060324c60a7 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -5264,6 +5264,8 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( } BodyGen(CGF); }; + LLVM_DEBUG(llvm::dbgs() << "Inputs to emitTaskOutineFunction\n"); + LLVM_DEBUG(llvm::dbgs() << "PartID = " << **PartId << "\n"); llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( S, *I, *PartId, *TaskT, EKind, CodeGen, /*Tied=*/true, Data.NumberOfParts); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 67e3019565cd0..09ffe7a68a64f 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -440,6 +440,7 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::AssertingVH<llvm::Instruction> PostAllocaInsertPt = nullptr; public: + void printLocalDeclMap(); /// Return PostAllocaInsertPt. If it is not yet created, then insert it /// immediately after AllocaInsertPt. llvm::Instruction *getPostAllocaInsertPoint() { diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index f4449f3c0a44f..7782ad5998917 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -2231,6 +2231,8 @@ class OpenMPIRBuilder { /// The total number of pointers passed to the runtime library. unsigned NumberOfPtrs = 0u; + bool EmitDebug = false; + explicit TargetDataInfo() {} explicit TargetDataInfo(bool RequiresDevicePointerInfo, bool SeparateBeginEndCalls) @@ -2349,7 +2351,6 @@ class OpenMPIRBuilder { void emitOffloadingArraysArgument(IRBuilderBase &Builder, OpenMPIRBuilder::TargetDataRTArgs &RTArgs, OpenMPIRBuilder::TargetDataInfo &Info, - bool EmitDebug = false, bool ForEndCall = false); /// Emit an array of struct descriptors to be assigned to the offload args. @@ -2369,7 +2370,7 @@ class OpenMPIRBuilder { /// return nullptr by reference. void emitOffloadingArrays( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, + GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info, bool IsNonContiguous = false, function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr, function_ref<Value *(unsigned int)> CustomMapperCB = nullptr); @@ -2384,6 +2385,13 @@ class OpenMPIRBuilder { function_ref<Value *(unsigned int)> CustomMapperCB = nullptr); + void emitOffloadingArraysAndArgs( + InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, + TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB, + bool IsNonContiguous = false, bool ForEndCall = false, + function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr, + function_ref<Value *(unsigned int)> CustomMapperCB = nullptr); + /// Creates offloading entry for the provided entry ID \a ID, address \a /// Addr, size \a Size, and flags \a Flags. void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 7b9e585d58664..88c04c3803e21 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -6372,8 +6372,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData( CustomMapperCB); TargetDataRTArgs RTArgs; - emitOffloadingArraysArgument(Builder, RTArgs, Info, - !MapInfo->Names.empty()); + emitOffloadingArraysArgument(Builder, RTArgs, Info); // Emit the number of elements in the offloading arrays. Value *PointerNum = Builder.getInt32(Info.NumberOfPtrs); @@ -6426,8 +6425,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData( // Generate code for the closing of the data region. auto EndThenGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { TargetDataRTArgs RTArgs; - emitOffloadingArraysArgument(Builder, RTArgs, Info, !MapInfo->Names.empty(), - /*ForEndCall=*/true); + Info.EmitDebug = !MapInfo->Names.empty(); + emitOffloadingArraysArgument(Builder, RTArgs, Info, /*ForEndCall=*/true); // Emit the number of elements in the offloading arrays. Value *PointerNum = Builder.getInt32(Info.NumberOfPtrs); @@ -7057,6 +7056,16 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetTask( << "\n"); return Builder.saveIP(); } +void OpenMPIRBuilder::emitOffloadingArraysAndArgs( + InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, + TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB, + bool IsNonContiguous, bool ForEndCall, + function_ref<void(unsigned int, Value *)> DeviceAddrCB, + function_ref<Value *(unsigned int)> CustomMapperCB) { + emitOffloadingArrays(AllocaIP, CodeGenIP, GenMapInfoCB, Info, IsNonContiguous, + DeviceAddrCB, CustomMapperCB); + emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall); + } static void emitTargetCall( OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, Function *OutlinedFn, @@ -7069,13 +7078,11 @@ static void emitTargetCall( /*RequiresDevicePointerInfo=*/false, /*SeparateBeginEndCalls=*/true); - OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP()); - OMPBuilder.emitOffloadingArrays(AllocaIP, Builder.saveIP(), MapInfo, Info, - /*IsNonContiguous=*/true); - OpenMPIRBuilder::TargetDataRTArgs RTArgs; - OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info, - !MapInfo.Names.empty()); + OMPBuilder.emitOffloadingArraysAndArgs(AllocaIP, Builder.saveIP(), Info, + RTArgs, GenMapInfoCB, + /*IsNonContiguous=*/true, + /*ForEndCall=*/false); // emitKernelLaunch auto &&EmitTargetCallFallbackCB = @@ -7085,7 +7092,7 @@ static void emitTargetCall( return Builder.saveIP(); }; - unsigned NumTargetItems = MapInfo.BasePointers.size(); + unsigned NumTargetItems = Info.NumberOfPtrs; // TODO: Use correct device ID Value *DeviceID = Builder.getInt64(OMP_DEVICEID_UNDEF); Value *NumTeamsVal = Builder.getInt32(NumTeams); @@ -7279,7 +7286,6 @@ void OpenMPIRBuilder::emitMapperCall(const LocationDescription &Loc, void OpenMPIRBuilder::emitOffloadingArraysArgument(IRBuilderBase &Builder, TargetDataRTArgs &RTArgs, TargetDataInfo &Info, - bool EmitDebug, bool ForEndCall) { assert((!ForEndCall || Info.separateBeginEndCalls()) && "expected region end call to runtime only when end call is separate"); @@ -7319,7 +7325,7 @@ void OpenMPIRBuilder::emitOffloadingArraysArgument(IRBuilderBase &Builder, // Only emit the mapper information arrays if debug information is // requested. - if (!EmitDebug) + if (!Info.EmitDebug) RTArgs.MapNamesArray = ConstantPointerNull::get(VoidPtrPtrTy); else RTArgs.MapNamesArray = Builder.CreateConstInBoundsGEP2_32( @@ -7404,8 +7410,9 @@ void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP, } void OpenMPIRBuilder::emitOffloadingArrays( - InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, - GenMapInfoCallbackTy GenMapInfoCB, bool IsNonContiguous, + InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info, + bool IsNonContiguous, function_ref<void(unsigned int, Value *)> DeviceAddrCB, function_ref<Value *(unsigned int)> CustomMapperCB) { @@ -7518,9 +7525,11 @@ void OpenMPIRBuilder::emitOffloadingArrays( auto *MapNamesArrayGbl = createOffloadMapnames(CombinedInfo.Names, MapnamesName); Info.RTArgs.MapNamesArray = MapNamesArrayGbl; + Info.EmitDebug = true; } else { Info.RTArgs.MapNamesArray = Constant::getNullValue(PointerType::getUnqual(Builder.getContext())); + Info.EmitDebug = false; } // If there's a present map type modifier, it must not be applied to the end diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index 8653bbd3d38fd..cb4c289f409a1 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -6902,8 +6902,8 @@ TEST_F(OpenMPIRBuilderTest, EmitOffloadingArraysArguments) { Info.RTArgs.MappersArray = ConstantPointerNull::get(Array4VoidPtrTy->getPointerTo()); Info.NumberOfPtrs = 4; - - OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info, false, false); + Info.EmitDebug = false; + OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info, false); EXPECT_NE(RTArgs.BasePointersArray, nullptr); EXPECT_NE(RTArgs.PointersArray, nullptr); >From 5fac34338cda635a915fe2489cb30fc54c36fa4f Mon Sep 17 00:00:00 2001 From: Pranav Bhandarkar <pranav.bhandar...@amd.com> Date: Wed, 26 Jun 2024 11:33:53 -0500 Subject: [PATCH 03/13] clean up, clean up, everybody clean up --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 206 +------------------------- clang/lib/CodeGen/CGOpenMPRuntime.h | 39 ----- clang/lib/CodeGen/CGStmtOpenMP.cpp | 4 - clang/lib/CodeGen/CodeGenFunction.h | 1 - 4 files changed, 5 insertions(+), 245 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 9495a122cbe34..e3b7cdbe0dfa8 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -40,7 +40,6 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Value.h" #include "llvm/Support/AtomicOrdering.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include <cassert> @@ -48,8 +47,6 @@ #include <numeric> #include <optional> -#define DEBUG_TYPE "clang-openmp-codegen" - using namespace clang; using namespace CodeGen; using namespace llvm::omp; @@ -3013,10 +3010,6 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); llvm::FunctionType *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo); - LLVM_DEBUG(llvm::dbgs() << "TaskEntryTy=" << *TaskEntryTy << "\n"); - LLVM_DEBUG(llvm::dbgs() << "KmpTaskTWithPrivatesPtrQTy = " - << KmpTaskTWithPrivatesPtrQTy << "\n"); - LLVM_DEBUG(llvm::dbgs() << "TaskTypeArg = " << TaskTypeArg << "\n"); std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); auto *TaskEntry = llvm::Function::Create( TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); @@ -3721,7 +3714,6 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap); - LLVM_DEBUG(llvm::dbgs() << "ProxyTaskFunction is " << *TaskEntry); // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, // kmp_routine_entry_t *task_entry); @@ -8876,11 +8868,9 @@ emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, } PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); - auto *Str = OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, + return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, PLoc.getLine(), PLoc.getColumn(), SrcLocStrSize); - LLVM_DEBUG(llvm::dbgs() << "Output of emitMappingInfo: " << *Str << "\n"); - return Str; } /// Emit the arrays used to pass the captures and map information to the @@ -9583,7 +9573,7 @@ static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF, FillInfoMap); } } -static void emitTargetCallKernelLaunchNew( +static void emitTargetCallKernelLaunch( CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask, @@ -9622,9 +9612,8 @@ static void emitTargetCallKernelLaunchNew( } return MFunc; }; - // Fill up the arrays and create the arguments. - LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n"); - + // Fill up the basepointers, pointers and mapper arrays and create the + // arguments. llvm::OpenMPIRBuilder::InsertPointTy OffloadingArraysAllocaIP( CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator()); @@ -9633,184 +9622,6 @@ static void emitTargetCallKernelLaunchNew( GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false, DeviceAddrCB, CustomMapperCB); - LLVM_DEBUG(llvm::dbgs() << "emitTargetCallKernelLaunchNew:InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n"); - InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; - InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, - CGF.VoidPtrTy, CGM.getPointerAlign()); - InputInfo.PointersArray = - Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); - InputInfo.SizesArray = - Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); - InputInfo.MappersArray = - Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); - MapTypesArray = Info.RTArgs.MapTypesArray; - MapNamesArray = Info.RTArgs.MapNamesArray; - - auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars, - RequiresOuterTask, &CS, OffloadingMandatory, Device, - OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray, - SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { - bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor; - - if (IsReverseOffloading) { - // Reverse offloading is not supported, so just execute on the host. - // FIXME: This fallback solution is incorrect since it ignores the - // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to - // assert here and ensure SEMA emits an error. - emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars, - RequiresOuterTask, CS, OffloadingMandatory, CGF); - return; - } - - bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>(); - unsigned NumTargetItems = InputInfo.NumberOfTargetItems; - - llvm::Value *BasePointersArray = - InputInfo.BasePointersArray.emitRawPointer(CGF); - llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF); - llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF); - llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF); - - auto &&EmitTargetCallFallbackCB = - [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS, - OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) - -> llvm::OpenMPIRBuilder::InsertPointTy { - CGF.Builder.restoreIP(IP); - emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars, - RequiresOuterTask, CS, OffloadingMandatory, CGF); - return CGF.Builder.saveIP(); - }; - - llvm::Value *DeviceID = emitDeviceID(Device, CGF); - llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D); - llvm::Value *NumThreads = - OMPRuntime->emitNumThreadsForTargetDirective(CGF, D); - llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc()); - llvm::Value *NumIterations = - OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter); - llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF); - llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( - CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator()); - - llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs( - BasePointersArray, PointersArray, SizesArray, MapTypesArray, - nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray); - - llvm::OpenMPIRBuilder::TargetKernelArgs Args( - NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads, - DynCGGroupMem, HasNoWait); - - CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch( - CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args, - DeviceID, RTLoc, AllocaIP)); - }; - - if (RequiresOuterTask) - CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); - else - OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); -} -static void emitTargetCallKernelLaunch( - CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, - const OMPExecutableDirective &D, - llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask, - const CapturedStmt &CS, bool OffloadingMandatory, - llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, - llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo, - llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray, - llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, - const OMPLoopDirective &D)> - SizeEmitter, - CodeGenFunction &CGF, CodeGenModule &CGM) { - llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder(); - - // Fill up the arrays with all the captured variables. - MappableExprsHandler::MapCombinedInfoTy CombinedInfo; -// Get mappable expression information. - MappableExprsHandler MEHandler(D, CGF); - llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; - llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; - - auto RI = CS.getCapturedRecordDecl()->field_begin(); - auto *CV = CapturedVars.begin(); - for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), - CE = CS.capture_end(); - CI != CE; ++CI, ++RI, ++CV) { - MappableExprsHandler::MapCombinedInfoTy CurInfo; - MappableExprsHandler::StructRangeInfoTy PartialStruct; - - // VLA sizes are passed to the outlined region by copy and do not have map - // information associated. - if (CI->capturesVariableArrayType()) { - CurInfo.Exprs.push_back(nullptr); - CurInfo.BasePointers.push_back(*CV); - CurInfo.DevicePtrDecls.push_back(nullptr); - CurInfo.DevicePointers.push_back( - MappableExprsHandler::DeviceInfoTy::None); - CurInfo.Pointers.push_back(*CV); - CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( - CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); - // Copy to the device as an argument. No need to retrieve it. - CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | - OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM | - OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); - CurInfo.Mappers.push_back(nullptr); - } else { - // If we have any information in the map clause, we use it, otherwise we - // just do a default mapping. - MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); - if (!CI->capturesThis()) - MappedVarSet.insert(CI->getCapturedVar()); - else - MappedVarSet.insert(nullptr); - if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) - MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); - // Generate correct mapping for variables captured by reference in - // lambdas. - if (CI->capturesVariable()) - MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, - CurInfo, LambdaPointers); - } - // We expect to have at least an element of information for this capture. - assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && - "Non-existing map pointer for capture!"); - assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && - CurInfo.BasePointers.size() == CurInfo.Sizes.size() && - CurInfo.BasePointers.size() == CurInfo.Types.size() && - CurInfo.BasePointers.size() == CurInfo.Mappers.size() && - "Inconsistent map information sizes!"); - - // If there is an entry in PartialStruct it means we have a struct with - // individual members mapped. Emit an extra combined entry. - if (PartialStruct.Base.isValid()) { - CombinedInfo.append(PartialStruct.PreliminaryMapData); - MEHandler.emitCombinedEntry( - CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(), - OMPBuilder, nullptr, - !PartialStruct.PreliminaryMapData.BasePointers.empty()); - } - - // We need to append the results of this capture to what we already have. - CombinedInfo.append(CurInfo); - } - // Adjust MEMBER_OF flags for the lambdas captures. - MEHandler.adjustMemberOfForLambdaCaptures( - OMPBuilder, LambdaPointers, CombinedInfo.BasePointers, - CombinedInfo.Pointers, CombinedInfo.Types); - // Map any list items in a map clause that were not captures because they - // weren't referenced within the construct. - MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet); - - CGOpenMPRuntime::TargetDataInfo Info; - // Fill up the arrays and create the arguments. - LLVM_DEBUG(llvm::dbgs() << "InsertBlock before emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n"); - emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); - Info.EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() != - llvm::codegenoptions::NoDebugInfo; - OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info, - /*ForEndCall=*/false); - - LLVM_DEBUG(llvm::dbgs() << "InsertBlock after emitting offload arrays: " << *CGF.Builder.GetInsertBlock() << "\n"); InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); @@ -9950,16 +9761,10 @@ void CGOpenMPRuntime::emitTargetCall( OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray, SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { - // if (OpenMPClangTargetCodegen) - emitTargetCallKernelLaunchNew(this, OutlinedFn, D, CapturedVars, + emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars, RequiresOuterTask, CS, OffloadingMandatory, Device, OutlinedFnID, InputInfo, MapTypesArray, MapNamesArray, SizeEmitter, CGF, CGM); - // else - // emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars, - // RequiresOuterTask, CS, OffloadingMandatory, - // Device, OutlinedFnID, InputInfo, MapTypesArray, - // MapNamesArray, SizeEmitter, CGF, CGM); }; auto &&TargetElseGen = @@ -9979,7 +9784,6 @@ void CGOpenMPRuntime::emitTargetCall( } else { RegionCodeGenTy ThenRCG(TargetThenGen); ThenRCG(CGF); - LLVM_DEBUG(llvm::dbgs() << "Generated code after emitTargetCall:\n" << *(OutlinedFn->getParent()) << "\n"); } } else { RegionCodeGenTy ElseRCG(TargetElseGen); diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index f6e3677232f07..f65314d014c08 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -122,45 +122,6 @@ struct OMPTaskDataTy final { bool IsReductionWithTaskMod = false; bool IsWorksharingReduction = false; bool HasNowaitClause = false; - void printTo(llvm::raw_ostream &os, const ASTContext &Ctx) const { - auto &&printSVHelper = - [&os, &Ctx](const SmallVector<const Expr *, 4> &V) -> void { - for (auto &v : V) { - v->dump(os, Ctx); - } - }; - auto &&printSV = - [&os, printSVHelper](std::string s, - const SmallVector<const Expr *, 4> &V) -> void { - os << s << ":[\n"; - printSVHelper(V); - os << "]\n"; - }; - // SmallVector<const Expr *, 4> PrivateVars; - // SmallVector<const Expr *, 4> PrivateCopies; - // SmallVector<const Expr *, 4> FirstprivateVars; - // SmallVector<const Expr *, 4> FirstprivateCopies; - // SmallVector<const Expr *, 4> FirstprivateInits; - // SmallVector<const Expr *, 4> LastprivateVars; - // SmallVector<const Expr *, 4> LastprivateCopies; - // SmallVector<const Expr *, 4> ReductionVars; - // SmallVector<const Expr *, 4> ReductionOrigs; - // SmallVector<const Expr *, 4> ReductionCopies; - // SmallVector<const Expr *, 4> ReductionOps; - // SmallVector<CanonicalDeclPtr<const VarDecl>, 4> PrivateLocals; - - printSV("PrivateVars", PrivateVars); - printSV("PrivateCopies", PrivateCopies); - printSV("FirstprivateVars", FirstprivateVars); - printSV("FirstprivateCopies", FirstprivateCopies); - printSV("FirstprivateInits", FirstprivateInits); - printSV("LastprivateVars", LastprivateVars); - printSV("LastprivateCopies", LastprivateCopies); - printSV("ReductionVars", ReductionVars); - printSV("ReductionOrigs", ReductionOrigs); - printSV("ReductionCopies", ReductionCopies); - printSV("ReductionOps", ReductionOps); - } }; /// Class intended to support codegen of all kind of the reduction clauses. diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 0a060324c60a7..74d99d9812bda 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -35,12 +35,10 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/AtomicOrdering.h" -#include "llvm/Support/Debug.h" #include <optional> using namespace clang; using namespace CodeGen; using namespace llvm::omp; -#define DEBUG_TYPE "clang-openmp-codegen" #define TTL_CODEGEN_TYPE "target-teams-loop-codegen" @@ -5264,8 +5262,6 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( } BodyGen(CGF); }; - LLVM_DEBUG(llvm::dbgs() << "Inputs to emitTaskOutineFunction\n"); - LLVM_DEBUG(llvm::dbgs() << "PartID = " << **PartId << "\n"); llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( S, *I, *PartId, *TaskT, EKind, CodeGen, /*Tied=*/true, Data.NumberOfParts); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 09ffe7a68a64f..67e3019565cd0 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -440,7 +440,6 @@ class CodeGenFunction : public CodeGenTypeCache { llvm::AssertingVH<llvm::Instruction> PostAllocaInsertPt = nullptr; public: - void printLocalDeclMap(); /// Return PostAllocaInsertPt. If it is not yet created, then insert it /// immediately after AllocaInsertPt. llvm::Instruction *getPostAllocaInsertPoint() { >From f761f4c33afb873074782d51222ed87540627d49 Mon Sep 17 00:00:00 2001 From: Pranav Bhandarkar <pranav.bhandar...@amd.com> Date: Wed, 26 Jun 2024 11:39:07 -0500 Subject: [PATCH 04/13] Add Debug.h include in CGStmtOpenMP.cpp because removal is not related to my change --- clang/lib/CodeGen/CGStmtOpenMP.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 74d99d9812bda..853046bf43495 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/Debug.h" #include <optional> using namespace clang; using namespace CodeGen; >From 5c34a68ada279b5ba9b5bc8116aa08c4af2d197c Mon Sep 17 00:00:00 2001 From: Pranav Bhandarkar <pranav.bhandar...@amd.com> Date: Wed, 26 Jun 2024 14:13:36 -0500 Subject: [PATCH 05/13] Document emitOffladingArrays and emitOffloadingArraysAndArgs in OMPIRBuilder.h --- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 7782ad5998917..50786ac3d5261 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -2367,24 +2367,36 @@ class OpenMPIRBuilder { /// Emit the arrays used to pass the captures and map information to the /// offloading runtime library. If there is no map or capture information, - /// return nullptr by reference. + /// return nullptr by reference. This is the first of two overloads - this + /// one accepts a reference to a MapInfosTy object that contains combined + /// information generated for mappable clauses, including base pointers, + /// pointers, sizes, map types, user-defined mappers, and non-contiguous + /// information. void emitOffloadingArrays( - InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info, - bool IsNonContiguous = false, + InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, + TargetDataInfo &Info, bool IsNonContiguous = false, function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr, function_ref<Value *(unsigned int)> CustomMapperCB = nullptr); /// Emit the arrays used to pass the captures and map information to the /// offloading runtime library. If there is no map or capture information, - /// return nullptr by reference. + /// return nullptr by reference. This is the second of two overloads - Instead + /// of accepting a reference to a MapInfosTy object, this overload accepts + /// a call back of type GenMapInfoCallbackTy to populate a MapInfosTy object + /// with mapping information. void emitOffloadingArrays( - InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, - TargetDataInfo &Info, bool IsNonContiguous = false, + InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info, + bool IsNonContiguous = false, function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr, function_ref<Value *(unsigned int)> CustomMapperCB = nullptr); - + /// Allocates memory for and populates the arrays required for offloading + /// (offload_{baseptrs|ptrs|mappers|sizes|maptypes|mapnames}). Then, it + /// emits their base addresses as arguments to be passed to the runtime + /// library. In essence, this function is a combination of + /// emitOffloadingArrays and emitOffloadingArraysArgument and should arguably + /// be preferred by clients of OpenMPIRBuilder. void emitOffloadingArraysAndArgs( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB, >From 09ee31148d9d0297e893a49cdb546427c88258cc Mon Sep 17 00:00:00 2001 From: Pranav Bhandarkar <pranav.bhandar...@amd.com> Date: Wed, 26 Jun 2024 15:29:44 -0500 Subject: [PATCH 06/13] refactor genMapInfo --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 42 +++++++++++++++++++++------ 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index e3b7cdbe0dfa8..0122f33d201d7 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -9484,14 +9484,14 @@ llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D, } return DynCGroupMem; } -static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF, - const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, - llvm::OpenMPIRBuilder &OMPBuilder, - MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) { - // Get mappable expression information. - MappableExprsHandler MEHandler(D, CGF); - llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; - llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; +static void genMapInfoForCaptures( + MappableExprsHandler &MEHandler, CodeGenFunction &CGF, + const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, + llvm::OpenMPIRBuilder &OMPBuilder, + llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, + llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet, + MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) { + CodeGenModule &CGM = CGF.CGM; auto RI = CS.getCapturedRecordDecl()->field_begin(); auto *CV = CapturedVars.begin(); @@ -9559,9 +9559,18 @@ static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF, MEHandler.adjustMemberOfForLambdaCaptures( OMPBuilder, LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, CombinedInfo.Types); +} +static void +genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF, + MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, + llvm::OpenMPIRBuilder &OMPBuilder, + const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet = + llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) { + + CodeGenModule &CGM = CGF.CGM; // Map any list items in a map clause that were not captures because they // weren't referenced within the construct. - MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet); + MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkippedVarSet); auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { return emitMappingInformation(CGF, OMPBuilder, MapExpr); @@ -9573,6 +9582,21 @@ static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF, FillInfoMap); } } + +static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF, + const CapturedStmt &CS, + llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, + llvm::OpenMPIRBuilder &OMPBuilder, + MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) { + // Get mappable expression information. + MappableExprsHandler MEHandler(D, CGF); + llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; + llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; + + genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder, + LambdaPointers, MappedVarSet, CombinedInfo); + genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet); +} static void emitTargetCallKernelLaunch( CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, const OMPExecutableDirective &D, >From 4872692f6ac421e86924f2d524fe10b45fe69dad Mon Sep 17 00:00:00 2001 From: Pranav Bhandarkar <pranav.bhandar...@amd.com> Date: Thu, 27 Jun 2024 13:51:34 -0500 Subject: [PATCH 07/13] Use CGOpenMPRuntime::emitTargetDataStandAloneCall --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 44 +++++++++++++++++++++------ 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 0122f33d201d7..53f7d4d807f21 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -10539,21 +10539,45 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( PrePostActionTy &) { // Fill up the arrays with all the mapped variables. MappableExprsHandler::MapCombinedInfoTy CombinedInfo; + CGOpenMPRuntime::TargetDataInfo Info; // Get map clause information. - MappableExprsHandler MEHandler(D, CGF); - MEHandler.generateAllInfo(CombinedInfo, OMPBuilder); + auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) + -> llvm::OpenMPIRBuilder::MapInfosTy & { + CGF.Builder.restoreIP(CodeGenIP); + MappableExprsHandler MEHandler(D, CGF); + genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder); + return CombinedInfo; + }; - CGOpenMPRuntime::TargetDataInfo Info; - // Fill up the arrays and create the arguments. - emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, - /*IsNonContiguous=*/true); + auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) { + if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) { + Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl); + } + }; + + auto CustomMapperCB = [&](unsigned int I) { + llvm::Value *MFunc = nullptr; + if (CombinedInfo.Mappers[I]) { + Info.HasMapper = true; + MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( + cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); + } + return MFunc; + }; + + // Fill up the basepointers, pointers and mapper arrays and create the + // arguments. + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + InsertPointTy OffloadingArraysAllocaIP(CGF.AllocaInsertPt->getParent(), + CGF.AllocaInsertPt->getIterator()); + + OMPBuilder.emitOffloadingArraysAndArgs( + OffloadingArraysAllocaIP, CGF.Builder.saveIP(), Info, Info.RTArgs, + GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false, + DeviceAddrCB, CustomMapperCB); bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || D.hasClausesOfKind<OMPNowaitClause>(); - Info.EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() != - llvm::codegenoptions::NoDebugInfo; - OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info, - /*ForEndCall=*/false); InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); >From 0e1f43f7615bede8b2d9d124bd2cc2a2a36e061e Mon Sep 17 00:00:00 2001 From: Pranav Bhandarkar <pranav.bhandar...@amd.com> Date: Fri, 28 Jun 2024 10:49:35 -0500 Subject: [PATCH 08/13] Use static function emitOffloadingArraysAndArgs in emitTargetCallKernelLaunch in CGOpenMPRuntime.cpp --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 67 ++++++++++--------- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 13 ++++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 13 ++++ 3 files changed, 63 insertions(+), 30 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 53f7d4d807f21..0b41f80706f2e 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -8921,6 +8921,40 @@ static void emitOffloadingArrays( /*IsNonContiguous=*/true, DeviceAddrCB, CustomMapperCB); } +/// Emit the arrays used to pass the captures and map information to the +/// offloading runtime library. If there is no map or capture information, +/// return nullptr by reference. +static void emitOffloadingArraysAndArgs( + CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, + CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, + bool IsNonContiguous = false, bool ForEndCall = false) { + CodeGenModule &CGM = CGF.CGM; + + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(), + CGF.AllocaInsertPt->getIterator()); + InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(), + CGF.Builder.GetInsertPoint()); + + auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) { + if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) { + Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl); + } + }; + + auto CustomMapperCB = [&](unsigned int I) { + llvm::Value *MFunc = nullptr; + if (CombinedInfo.Mappers[I]) { + Info.HasMapper = true; + MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( + cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); + } + return MFunc; + }; + OMPBuilder.emitOffloadingArraysAndArgs( + AllocaIP, CodeGenIP, Info, Info.RTArgs, CombinedInfo, IsNonContiguous, + ForEndCall, DeviceAddrCB, CustomMapperCB); +} /// Check for inner distribute directive. static const OMPExecutableDirective * @@ -9614,37 +9648,10 @@ static void emitTargetCallKernelLaunch( // Fill up the arrays with all the captured variables. MappableExprsHandler::MapCombinedInfoTy CombinedInfo; CGOpenMPRuntime::TargetDataInfo Info; + genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo); - auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) - -> llvm::OpenMPIRBuilder::MapInfosTy & { - CGF.Builder.restoreIP(CodeGenIP); - genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo); - return CombinedInfo; - }; - auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) { - if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) { - Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl); - } - }; - - auto CustomMapperCB = [&](unsigned int I) { - llvm::Value *MFunc = nullptr; - if (CombinedInfo.Mappers[I]) { - Info.HasMapper = true; - MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( - cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); - } - return MFunc; - }; - // Fill up the basepointers, pointers and mapper arrays and create the - // arguments. - llvm::OpenMPIRBuilder::InsertPointTy OffloadingArraysAllocaIP( - CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator()); - - OMPBuilder.emitOffloadingArraysAndArgs( - OffloadingArraysAllocaIP, CGF.Builder.saveIP(), Info, Info.RTArgs, - GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false, - DeviceAddrCB, CustomMapperCB); + emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder, + /*IsNonContiguous=*/true, /*ForEndCall=*/false); InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 50786ac3d5261..1377ca8ce45b2 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -2404,6 +2404,19 @@ class OpenMPIRBuilder { function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr, function_ref<Value *(unsigned int)> CustomMapperCB = nullptr); + /// Allocates memory for and populates the arrays required for offloading + /// (offload_{baseptrs|ptrs|mappers|sizes|maptypes|mapnames}). Then, it + /// emits their base addresses as arguments to be passed to the runtime + /// library. In essence, this function is a combination of + /// emitOffloadingArrays and emitOffloadingArraysArgument and should arguably + /// be preferred by clients of OpenMPIRBuilder. + void emitOffloadingArraysAndArgs( + InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, + TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, + bool IsNonContiguous = false, bool ForEndCall = false, + function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr, + function_ref<Value *(unsigned int)> CustomMapperCB = nullptr); + /// Creates offloading entry for the provided entry ID \a ID, address \a /// Addr, size \a Size, and flags \a Flags. void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 88c04c3803e21..3b71b2be3086f 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -7066,6 +7066,17 @@ void OpenMPIRBuilder::emitOffloadingArraysAndArgs( DeviceAddrCB, CustomMapperCB); emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall); } + +void OpenMPIRBuilder::emitOffloadingArraysAndArgs( + InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, + TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, bool IsNonContiguous, + bool ForEndCall, function_ref<void(unsigned int, Value *)> DeviceAddrCB, + function_ref<Value *(unsigned int)> CustomMapperCB) { + emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info, + IsNonContiguous, DeviceAddrCB, CustomMapperCB); + emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall); + } + static void emitTargetCall( OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, Function *OutlinedFn, @@ -7074,6 +7085,8 @@ static void emitTargetCall( OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB, SmallVector<llvm::OpenMPIRBuilder::DependData> Dependencies = {}) { + + OpenMPIRBuilder::TargetDataInfo Info( /*RequiresDevicePointerInfo=*/false, /*SeparateBeginEndCalls=*/true); >From 32edf70615a2a049cca7bd275c9d7436e749e725 Mon Sep 17 00:00:00 2001 From: Pranav Bhandarkar <pranav.bhandar...@amd.com> Date: Fri, 28 Jun 2024 11:27:58 -0500 Subject: [PATCH 09/13] Use static function emitOffloadingArraysAndArgs in emitTargetDataStandaloneCall in CGOpenMPRuntime.cpp --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 40 ++++----------------------- 1 file changed, 5 insertions(+), 35 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 0b41f80706f2e..26976b1565209 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -10547,44 +10547,14 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( // Fill up the arrays with all the mapped variables. MappableExprsHandler::MapCombinedInfoTy CombinedInfo; CGOpenMPRuntime::TargetDataInfo Info; + MappableExprsHandler MEHandler(D, CGF); + genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder); + emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder, + /*IsNonContiguous=*/true, /*ForEndCall=*/false); - // Get map clause information. - auto GenMapInfoCB = [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP) - -> llvm::OpenMPIRBuilder::MapInfosTy & { - CGF.Builder.restoreIP(CodeGenIP); - MappableExprsHandler MEHandler(D, CGF); - genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder); - return CombinedInfo; - }; - - auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) { - if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) { - Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl); - } - }; - - auto CustomMapperCB = [&](unsigned int I) { - llvm::Value *MFunc = nullptr; - if (CombinedInfo.Mappers[I]) { - Info.HasMapper = true; - MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( - cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); - } - return MFunc; - }; - - // Fill up the basepointers, pointers and mapper arrays and create the - // arguments. - using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; - InsertPointTy OffloadingArraysAllocaIP(CGF.AllocaInsertPt->getParent(), - CGF.AllocaInsertPt->getIterator()); - - OMPBuilder.emitOffloadingArraysAndArgs( - OffloadingArraysAllocaIP, CGF.Builder.saveIP(), Info, Info.RTArgs, - GenMapInfoCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false, - DeviceAddrCB, CustomMapperCB); bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || D.hasClausesOfKind<OMPNowaitClause>(); + InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); >From 6ce0c84f93d1066c1a4f4bb7b1530b5c2b9d4144 Mon Sep 17 00:00:00 2001 From: Pranav Bhandarkar <pranav.bhandar...@amd.com> Date: Fri, 28 Jun 2024 11:30:10 -0500 Subject: [PATCH 10/13] Remove emitOffloadingArrays from CGOpenMPRuntime.cpp because it is not used anymore. Use emitOffloadingArraysAndArgs --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 49 --------------------------- 1 file changed, 49 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 26976b1565209..57e958f644b18 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -8872,55 +8872,6 @@ emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, PLoc.getLine(), PLoc.getColumn(), SrcLocStrSize); } - -/// Emit the arrays used to pass the captures and map information to the -/// offloading runtime library. If there is no map or capture information, -/// return nullptr by reference. -static void emitOffloadingArrays( - CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, - CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, - bool IsNonContiguous = false) { - CodeGenModule &CGM = CGF.CGM; - - // Reset the array information. - Info.clearArrayInfo(); - Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); - - using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; - InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(), - CGF.AllocaInsertPt->getIterator()); - InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(), - CGF.Builder.GetInsertPoint()); - - auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { - return emitMappingInformation(CGF, OMPBuilder, MapExpr); - }; - if (CGM.getCodeGenOpts().getDebugInfo() != - llvm::codegenoptions::NoDebugInfo) { - CombinedInfo.Names.resize(CombinedInfo.Exprs.size()); - llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(), - FillInfoMap); - } - - auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) { - if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) { - Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl); - } - }; - - auto CustomMapperCB = [&](unsigned int I) { - llvm::Value *MFunc = nullptr; - if (CombinedInfo.Mappers[I]) { - Info.HasMapper = true; - MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( - cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); - } - return MFunc; - }; - OMPBuilder.emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info, - /*IsNonContiguous=*/true, DeviceAddrCB, - CustomMapperCB); -} /// Emit the arrays used to pass the captures and map information to the /// offloading runtime library. If there is no map or capture information, /// return nullptr by reference. >From a7ce3ce2ad355427c0aa58b996df0f304edc2185 Mon Sep 17 00:00:00 2001 From: Pranav Bhandarkar <pranav.bhandar...@amd.com> Date: Fri, 28 Jun 2024 12:05:02 -0500 Subject: [PATCH 11/13] Remove overloads of emitOffloadingArrays and emitOffloadingArraysAndArgs that accept GenMapInfoCallBackTy --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 1 - .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 34 ++----------------- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 31 +++-------------- 3 files changed, 8 insertions(+), 58 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 57e958f644b18..8a39dbdbeec53 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -9477,7 +9477,6 @@ static void genMapInfoForCaptures( llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) { - CodeGenModule &CGM = CGF.CGM; auto RI = CS.getCapturedRecordDecl()->field_begin(); auto *CV = CapturedVars.begin(); for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 1377ca8ce45b2..60e79ec3726ce 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -2367,43 +2367,15 @@ class OpenMPIRBuilder { /// Emit the arrays used to pass the captures and map information to the /// offloading runtime library. If there is no map or capture information, - /// return nullptr by reference. This is the first of two overloads - this - /// one accepts a reference to a MapInfosTy object that contains combined - /// information generated for mappable clauses, including base pointers, - /// pointers, sizes, map types, user-defined mappers, and non-contiguous - /// information. + /// return nullptr by reference. Accepts a reference to a MapInfosTy object + /// that contains information generated for mappable clauses, + /// including base pointers, pointers, sizes, map types, user-defined mappers. void emitOffloadingArrays( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info, bool IsNonContiguous = false, function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr, function_ref<Value *(unsigned int)> CustomMapperCB = nullptr); - /// Emit the arrays used to pass the captures and map information to the - /// offloading runtime library. If there is no map or capture information, - /// return nullptr by reference. This is the second of two overloads - Instead - /// of accepting a reference to a MapInfosTy object, this overload accepts - /// a call back of type GenMapInfoCallbackTy to populate a MapInfosTy object - /// with mapping information. - void emitOffloadingArrays( - InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info, - bool IsNonContiguous = false, - function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr, - function_ref<Value *(unsigned int)> CustomMapperCB = nullptr); - - /// Allocates memory for and populates the arrays required for offloading - /// (offload_{baseptrs|ptrs|mappers|sizes|maptypes|mapnames}). Then, it - /// emits their base addresses as arguments to be passed to the runtime - /// library. In essence, this function is a combination of - /// emitOffloadingArrays and emitOffloadingArraysArgument and should arguably - /// be preferred by clients of OpenMPIRBuilder. - void emitOffloadingArraysAndArgs( - InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, - TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB, - bool IsNonContiguous = false, bool ForEndCall = false, - function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr, - function_ref<Value *(unsigned int)> CustomMapperCB = nullptr); - /// Allocates memory for and populates the arrays required for offloading /// (offload_{baseptrs|ptrs|mappers|sizes|maptypes|mapnames}). Then, it /// emits their base addresses as arguments to be passed to the runtime diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 3b71b2be3086f..e97029ceca33f 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -7056,26 +7056,15 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetTask( << "\n"); return Builder.saveIP(); } -void OpenMPIRBuilder::emitOffloadingArraysAndArgs( - InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, - TargetDataRTArgs &RTArgs, GenMapInfoCallbackTy GenMapInfoCB, - bool IsNonContiguous, bool ForEndCall, - function_ref<void(unsigned int, Value *)> DeviceAddrCB, - function_ref<Value *(unsigned int)> CustomMapperCB) { - emitOffloadingArrays(AllocaIP, CodeGenIP, GenMapInfoCB, Info, IsNonContiguous, - DeviceAddrCB, CustomMapperCB); - emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall); - } - void OpenMPIRBuilder::emitOffloadingArraysAndArgs( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, bool IsNonContiguous, bool ForEndCall, function_ref<void(unsigned int, Value *)> DeviceAddrCB, function_ref<Value *(unsigned int)> CustomMapperCB) { - emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info, - IsNonContiguous, DeviceAddrCB, CustomMapperCB); + emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info, IsNonContiguous, + DeviceAddrCB, CustomMapperCB); emitOffloadingArraysArgument(Builder, RTArgs, Info, ForEndCall); - } +} static void emitTargetCall( OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, @@ -7091,9 +7080,10 @@ static void emitTargetCall( /*RequiresDevicePointerInfo=*/false, /*SeparateBeginEndCalls=*/true); + OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP()); OpenMPIRBuilder::TargetDataRTArgs RTArgs; OMPBuilder.emitOffloadingArraysAndArgs(AllocaIP, Builder.saveIP(), Info, - RTArgs, GenMapInfoCB, + RTArgs, MapInfo, /*IsNonContiguous=*/true, /*ForEndCall=*/false); @@ -7422,17 +7412,6 @@ void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP, } } -void OpenMPIRBuilder::emitOffloadingArrays( - InsertPointTy AllocaIP, InsertPointTy CodeGenIP, - GenMapInfoCallbackTy GenMapInfoCB, TargetDataInfo &Info, - bool IsNonContiguous, - function_ref<void(unsigned int, Value *)> DeviceAddrCB, - function_ref<Value *(unsigned int)> CustomMapperCB) { - - OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(CodeGenIP); - emitOffloadingArrays(AllocaIP, CodeGenIP, MapInfo, - Info, IsNonContiguous, DeviceAddrCB, CustomMapperCB); -} void OpenMPIRBuilder::emitOffloadingArrays( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info, bool IsNonContiguous, >From b9b687eb564dfda81afd3007a80b7ce5f3c0174b Mon Sep 17 00:00:00 2001 From: Pranav Bhandarkar <pranav.bhandar...@amd.com> Date: Fri, 28 Jun 2024 12:16:41 -0500 Subject: [PATCH 12/13] Undo an unnecessary change in the location of the declaration of GenMapInfoCallBackTy in OMPIRBuilder.h --- llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 60e79ec3726ce..1614d5716d28c 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -2359,12 +2359,6 @@ class OpenMPIRBuilder { MapInfosTy &CombinedInfo, TargetDataInfo &Info); - /// Callback type for creating the map infos for the kernel parameters. - /// \param CodeGenIP is the insertion point where code should be generated, - /// if any. - using GenMapInfoCallbackTy = - function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>; - /// Emit the arrays used to pass the captures and map information to the /// offloading runtime library. If there is no map or capture information, /// return nullptr by reference. Accepts a reference to a MapInfosTy object @@ -2792,6 +2786,11 @@ class OpenMPIRBuilder { /// duplicating the body code. enum BodyGenTy { Priv, DupNoPriv, NoPriv }; + /// Callback type for creating the map infos for the kernel parameters. + /// \param CodeGenIP is the insertion point where code should be generated, + /// if any. + using GenMapInfoCallbackTy = + function_ref<MapInfosTy &(InsertPointTy CodeGenIP)>; /// Generator for '#omp target data' /// >From e74f34bbec42f634e030a08259f6fdd64a7ce7c7 Mon Sep 17 00:00:00 2001 From: Pranav Bhandarkar <pranav.bhandar...@amd.com> Date: Mon, 22 Jul 2024 13:23:00 -0500 Subject: [PATCH 13/13] Address review comments --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 8a39dbdbeec53..3210bd414f8ba 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -9473,10 +9473,10 @@ static void genMapInfoForCaptures( MappableExprsHandler &MEHandler, CodeGenFunction &CGF, const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, llvm::OpenMPIRBuilder &OMPBuilder, - llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) { + llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; auto RI = CS.getCapturedRecordDecl()->field_begin(); auto *CV = CapturedVars.begin(); for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), @@ -9574,11 +9574,10 @@ static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) { // Get mappable expression information. MappableExprsHandler MEHandler(D, CGF); - llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder, - LambdaPointers, MappedVarSet, CombinedInfo); + MappedVarSet, CombinedInfo); genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, MappedVarSet); } static void emitTargetCallKernelLaunch( _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits