jdoerfert updated this revision to Diff 190840.
jdoerfert added a comment.
Remove code extracted into separate commits, see D59418
<https://reviews.llvm.org/D59418>, D59420 <https://reviews.llvm.org/D59420>,
and D59421 <https://reviews.llvm.org/D59421>.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D59328/new/
https://reviews.llvm.org/D59328
Files:
clang/lib/CodeGen/CGOpenMPRuntimeTRegion.cpp
clang/lib/CodeGen/CGOpenMPRuntimeTRegion.h
clang/lib/CodeGen/CMakeLists.txt
clang/lib/CodeGen/CodeGenModule.cpp
clang/test/OpenMP/target_tregion_no_SPMD_mode.c
Index: clang/test/OpenMP/target_tregion_no_SPMD_mode.c
===================================================================
--- /dev/null
+++ clang/test/OpenMP/target_tregion_no_SPMD_mode.c
@@ -0,0 +1,72 @@
+// RUN: %clang_cc1 -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -fopenmp -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -mllvm -openmp-tregion-runtime -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+
+// CHECK: loop_in_loop_in_tregion
+// CHECK: %0 = call i8 @__kmpc_target_region_kernel_init(i1 false, i1 true, i1 true, i1 true)
+// CHECK: call void @__kmpc_target_region_kernel_deinit(i1 false, i1 true)
+void loop_in_loop_in_tregion(int *A, int *B) {
+#pragma omp target
+ for (int i = 0; i < 512; i++) {
+ for (int j = 0; j < 1024; j++)
+ A[j] += B[i + j];
+ }
+}
+
+// CHECK: parallel_loops_and_accesses_in_tregion
+// CHECK: %0 = call i8 @__kmpc_target_region_kernel_init(i1 false, i1 true, i1 true, i1 true)
+// CHECK: call void @__kmpc_target_region_kernel_parallel(i1 false, i1 true, void (i8*, i8*)* @.omp_TRegion._wrapper, i8* undef, i16 0, i8* %2, i16 16, i1 false)
+// CHECK: call void @__kmpc_target_region_kernel_parallel(i1 false, i1 true, void (i8*, i8*)* @.omp_TRegion.1_wrapper, i8* undef, i16 0, i8* %5, i16 16, i1 false)
+// CHECK: call void @__kmpc_target_region_kernel_parallel(i1 false, i1 true, void (i8*, i8*)* @.omp_TRegion.2_wrapper, i8* undef, i16 0, i8* %8, i16 16, i1 false)
+// CHECK: call void @__kmpc_target_region_kernel_deinit(i1 false, i1 true)
+void parallel_loops_and_accesses_in_tregion(int *A, int *B) {
+#pragma omp target
+ {
+#pragma omp parallel for
+ for (int j = 0; j < 1024; j++)
+ A[j] += B[0 + j];
+#pragma omp parallel for
+ for (int j = 0; j < 1024; j++)
+ A[j] += B[1 + j];
+#pragma omp parallel for
+ for (int j = 0; j < 1024; j++)
+ A[j] += B[2 + j];
+
+ // This needs a guard in SPMD mode
+ A[0] = B[0];
+ }
+}
+
+void extern_func();
+static void parallel_loop(int *A, int *B, int i) {
+#pragma omp parallel for
+ for (int j = 0; j < 1024; j++)
+ A[j] += B[i + j];
+}
+
+// CHECK: parallel_loop_in_function_in_loop_with_global_acc_in_tregion
+// CHECK: %1 = call i8 @__kmpc_target_region_kernel_init(i1 false, i1 true, i1 true, i1 true)
+// CHECK: call void @__kmpc_target_region_kernel_deinit(i1 false, i1 true)
+int Global[512];
+void parallel_loop_in_function_in_loop_with_global_acc_in_tregion(int *A, int *B) {
+#pragma omp target
+ for (int i = 0; i < 512; i++) {
+ parallel_loop(A, B, i);
+ Global[i]++;
+ }
+}
+
+// CHECK: parallel_loop
+// CHECK: call void @__kmpc_target_region_kernel_parallel(i1 false, i1 true, void (i8*, i8*)* @.omp_TRegion.3_wrapper, i8* undef, i16 0, i8* %0, i16 24, i1 false)
+
+// CHECK: parallel_loops_in_functions_and_extern_func_in_tregion
+// CHECK: %0 = call i8 @__kmpc_target_region_kernel_init(i1 false, i1 true, i1 true, i1 true)
+// CHECK: call void @__kmpc_target_region_kernel_deinit(i1 false, i1 true)
+void parallel_loops_in_functions_and_extern_func_in_tregion(int *A, int *B) {
+#pragma omp target
+ {
+ parallel_loop(A, B, 1);
+ parallel_loop(A, B, 2);
+ extern_func();
+ parallel_loop(A, B, 3);
+ }
+}
Index: clang/lib/CodeGen/CodeGenModule.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenModule.cpp
+++ clang/lib/CodeGen/CodeGenModule.cpp
@@ -20,6 +20,7 @@
#include "CGOpenCLRuntime.h"
#include "CGOpenMPRuntime.h"
#include "CGOpenMPRuntimeNVPTX.h"
+#include "CGOpenMPRuntimeTRegion.h"
#include "CodeGenFunction.h"
#include "CodeGenPGO.h"
#include "ConstantEmitter.h"
@@ -67,6 +68,11 @@
llvm::cl::desc("Emit limited coverage mapping information (experimental)"),
llvm::cl::init(false));
+static llvm::cl::opt<bool> UseGenericTRegionInterface(
+ "openmp-tregion-runtime", llvm::cl::ZeroOrMore, llvm::cl::Hidden,
+ llvm::cl::desc("Use the generic target region OpenMP runtime interface"),
+ llvm::cl::init(false));
+
static const char AnnotationSection[] = "llvm.metadata";
static CGCXXABI *createCXXABI(CodeGenModule &CGM) {
@@ -206,7 +212,10 @@
case llvm::Triple::nvptx64:
assert(getLangOpts().OpenMPIsDevice &&
"OpenMP NVPTX is only prepared to deal with device code.");
- OpenMPRuntime.reset(new CGOpenMPRuntimeNVPTX(*this));
+ if (UseGenericTRegionInterface)
+ OpenMPRuntime.reset(new CGOpenMPRuntimeTRegion(*this));
+ else
+ OpenMPRuntime.reset(new CGOpenMPRuntimeNVPTX(*this));
break;
default:
if (LangOpts.OpenMPSimd)
Index: clang/lib/CodeGen/CMakeLists.txt
===================================================================
--- clang/lib/CodeGen/CMakeLists.txt
+++ clang/lib/CodeGen/CMakeLists.txt
@@ -70,6 +70,7 @@
CGOpenMPRuntime.cpp
CGOpenMPRuntimeNVPTX.cpp
CGOpenMPRuntimeTarget.cpp
+ CGOpenMPRuntimeTRegion.cpp
CGRecordLayoutBuilder.cpp
CGStmt.cpp
CGStmtOpenMP.cpp
Index: clang/lib/CodeGen/CGOpenMPRuntimeTRegion.h
===================================================================
--- /dev/null
+++ clang/lib/CodeGen/CGOpenMPRuntimeTRegion.h
@@ -0,0 +1,181 @@
+//===-- CGOpenMPRuntimeTRegion.h --- OpenMP RT TRegion interface codegen --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Code generation interface for OpenMP target offloading though the generic
+// Target Region (TRegion) interface.
+//
+// See openmp/libomptarget/deviceRTLs/common/target_Region.h for further
+// information on the interface functions and their intended use.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMETREGION_H
+#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMETREGION_H
+
+#include "CGOpenMPRuntimeTarget.h"
+#include "llvm/ADT/SmallBitVector.h"
+
+namespace clang {
+namespace CodeGen {
+
+class CGOpenMPRuntimeTRegion : public CGOpenMPRuntimeTarget {
+ // TODO: The target region interface only covers kernel codes for now. This
+ // therefore codegen implicitly assumes the target region kernel
+ // interface is targeted. Once a second target region interface is put
+ // in place, e.g., specialized to many-core offloading, we might need
+ // to make the target interface explicit.
+
+ /// Create an outlined function for a target kernel.
+ ///
+ /// \param D Directive to emit.
+ /// \param ParentName Name of the function that encloses the target region.
+ /// \param OutlinedFn Outlined function value to be defined by this call.
+ /// \param OutlinedFnID Outlined function ID value to be defined by this call.
+ /// \param CodeGen Object containing the target statements.
+ /// An outlined function may not be an entry if, e.g. the if clause always
+ /// evaluates to false.
+ void emitKernel(const OMPExecutableDirective &D, StringRef ParentName,
+ llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
+ const RegionCodeGenTy &CodeGen);
+
+ /// Helper for generic kernel mode, target directive's entry function.
+ void emitKernelHeader(CodeGenFunction &CGF, llvm::BasicBlock *&ExitBB);
+
+ /// Signal termination of generic mode execution.
+ void emitKernelFooter(CodeGenFunction &CGF, llvm::BasicBlock *ExitBB);
+
+ //
+ // Base class overrides.
+ //
+
+ /// Emit outlined function for 'target' directive.
+ ///
+ /// \param D Directive to emit.
+ /// \param ParentName Name of the function that encloses the target region.
+ /// \param OutlinedFn Outlined function value to be defined by this call.
+ /// \param OutlinedFnID Outlined function ID value to be defined by this call.
+ /// \param IsOffloadEntry True if the outlined function is an offload entry.
+ /// An outlined function may not be an entry if, e.g. the if clause always
+ /// evaluates to false.
+ void emitTargetOutlinedFunction(const OMPExecutableDirective &D,
+ StringRef ParentName,
+ llvm::Function *&OutlinedFn,
+ llvm::Constant *&OutlinedFnID,
+ bool IsOffloadEntry,
+ const RegionCodeGenTy &CodeGen) override;
+
+protected:
+ /// Get the function name of an outlined region, customized to the target.
+ StringRef getOutlinedHelperName() const override { return ".omp_TRegion."; }
+
+public:
+ explicit CGOpenMPRuntimeTRegion(CodeGenModule &CGM);
+
+ /// Emits inlined function for the specified OpenMP parallel directive.
+ ///
+ /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
+ /// kmp_int32 BoundID, struct context_vars*).
+ /// \param D OpenMP directive.
+ /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
+ /// \param InnermostKind Kind of innermost directive (for simple directives it
+ /// is a directive itself, for combined - its innermost directive).
+ /// \param CodeGen Code generation sequence for the \a D directive.
+ llvm::Function *
+ emitParallelOutlinedFunction(const OMPExecutableDirective &D,
+ const VarDecl *ThreadIDVar,
+ OpenMPDirectiveKind InnermostKind,
+ const RegionCodeGenTy &CodeGen) override;
+
+ /// Emits code for parallel or serial call of the \a OutlinedFn with
+ /// variables captured in a record which address is stored in \a
+ /// CapturedStruct.
+ /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
+ /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
+ /// \param CapturedVars A pointer to the record with the references to
+ /// variables used in \a OutlinedFn function.
+ /// \param IfCond Condition in the associated 'if' clause, if it was
+ /// specified, nullptr otherwise.
+ void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
+ llvm::Function *OutlinedFn,
+ ArrayRef<llvm::Value *> CapturedVars,
+ const Expr *IfCond) override;
+
+ /// Emits a critical region.
+ /// \param CriticalName Name of the critical region.
+ /// \param CriticalOpGen Generator for the statement associated with the given
+ /// critical region.
+ /// \param Hint Value of the 'hint' clause (optional).
+ void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName,
+ const RegionCodeGenTy &CriticalOpGen,
+ SourceLocation Loc,
+ const Expr *Hint = nullptr) override;
+
+ /// Emit a code for reduction clause.
+ ///
+ /// \param Privates List of private copies for original reduction arguments.
+ /// \param LHSExprs List of LHS in \a ReductionOps reduction operations.
+ /// \param RHSExprs List of RHS in \a ReductionOps reduction operations.
+ /// \param ReductionOps List of reduction operations in form 'LHS binop RHS'
+ /// or 'operator binop(LHS, RHS)'.
+ /// \param Options List of options for reduction codegen:
+ /// WithNowait true if parent directive has also nowait clause, false
+ /// otherwise.
+ /// SimpleReduction Emit reduction operation only. Used for omp simd
+ /// directive on the host.
+ /// ReductionKind The kind of reduction to perform.
+ virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
+ ArrayRef<const Expr *> Privates,
+ ArrayRef<const Expr *> LHSExprs,
+ ArrayRef<const Expr *> RHSExprs,
+ ArrayRef<const Expr *> ReductionOps,
+ ReductionOptionsTy Options) override;
+
+protected:
+
+ /// Hook to allow derived classes to perform checks on the AST that justify
+ /// execution without runtime support.
+ virtual bool mayNeedRuntimeSupport() const { return true; }
+
+ /// Hook to allow derived classes to perform checks on the AST that justify
+ /// execution without data sharing support.
+ virtual bool mayPerformDataSharing() const { return true; }
+
+private:
+
+ /// Helper to check if SPMD mode is enabled. Derived classes that perform
+ /// checks on the AST to justify SPMD mode can overload the
+ /// CGOpenMPRuntimeTarget::getExecutionMode().
+ bool isKnownSPMDMode() const { return getExecutionMode() == EM_SPMD; }
+
+ /// Simple container for a wrapper of an outlined parallel function and the
+ /// layout of the passed variables (= captured variables, both shared and
+ /// firstprivate).
+ struct WrapperInfo {
+ llvm::Function *WrapperFn = nullptr;
+ llvm::StructType *SharedVarsStructTy = nullptr;
+ llvm::StructType *PrivateVarsStructTy = nullptr;
+ llvm::SmallBitVector CaptureIsPrivate;
+ };
+
+ /// Map an outlined function to its wrapper and shared struct type. The latter
+ /// defines the layout of the payload and the wrapper will unpack that payload
+ /// and pass the values to the outlined function.
+ llvm::DenseMap<llvm::Function *, WrapperInfo> WrapperInfoMap;
+
+ /// Emit function which wraps the outline parallel region
+ /// and controls the parameters which are passed to this function.
+ /// The wrapper ensures that the outlined function is called
+ /// with the correct arguments when data is shared.
+ void createParallelDataSharingWrapper(llvm::Function *OutlinedParallelFn,
+ const OMPExecutableDirective &D);
+};
+
+} // namespace CodeGen
+} // namespace clang
+
+#endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEKERNEL_H
Index: clang/lib/CodeGen/CGOpenMPRuntimeTRegion.cpp
===================================================================
--- /dev/null
+++ clang/lib/CodeGen/CGOpenMPRuntimeTRegion.cpp
@@ -0,0 +1,421 @@
+//===-- CGOpenMPRuntimeTRegion.cpp - OpenMP RT TRegion interface codegen --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the code generation interface for OpenMP target offloading
+// though the Target Region (TRegion) interface.
+//
+// See the file comment in CGOpenMPRuntimeTRegion.h for more information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CGOpenMPRuntimeTRegion.h"
+#include "CodeGenFunction.h"
+#include "clang/AST/StmtVisitor.h"
+
+using namespace clang;
+using namespace CodeGen;
+
+void CGOpenMPRuntimeTRegion::emitKernel(const OMPExecutableDirective &D,
+ StringRef ParentName,
+ llvm::Function *&OutlinedFn,
+ llvm::Constant *&OutlinedFnID,
+ const RegionCodeGenTy &CodeGen) {
+ WrapperInfoMap.clear();
+
+ // Emit target region as a standalone region.
+ class KernelPrePostActionTy : public PrePostActionTy {
+ CGOpenMPRuntimeTRegion &RT;
+ llvm::BasicBlock *ExitBB;
+
+ public:
+ KernelPrePostActionTy(CGOpenMPRuntimeTRegion &RT)
+ : RT(RT), ExitBB(nullptr) {}
+
+ void Enter(CodeGenFunction &CGF) override {
+ RT.emitKernelHeader(CGF, ExitBB);
+ // Skip target region initialization.
+ RT.setLocThreadIdInsertPt(CGF, /* AtCurrentPoint */ true);
+ }
+
+ void Exit(CodeGenFunction &CGF) override {
+ RT.clearLocThreadIdInsertPt(CGF);
+ RT.emitKernelFooter(CGF, ExitBB);
+ }
+
+ } Action(*this);
+ CodeGen.setAction(Action);
+
+ emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
+ /* IsOffloadEntry */ true, CodeGen);
+}
+
+void CGOpenMPRuntimeTRegion::emitKernelHeader(CodeGenFunction &CGF,
+ llvm::BasicBlock *&ExitBB) {
+ CGBuilderTy &Bld = CGF.Builder;
+
+ // Setup BBs in entry function.
+ llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute");
+ ExitBB = CGF.createBasicBlock(".exit");
+
+ llvm::Value *Args[] = {
+ /* UseSPMDMode */ Bld.getInt1(isKnownSPMDMode()),
+ /* UseStateMachine */ Bld.getInt1(1),
+ /* RequiresOMPRuntime */
+ Bld.getInt1(mayNeedRuntimeSupport()),
+ /* RequiresDataSharing */ Bld.getInt1(mayPerformDataSharing())};
+ llvm::CallInst *InitCI = CGF.EmitRuntimeCall(
+ createTargetRuntimeFunction(OMPRTL__kmpc_target_region_kernel_init),
+ Args);
+
+ llvm::Value *ExecuteCnd = Bld.CreateICmpEQ(InitCI, Bld.getInt8(1));
+
+ Bld.CreateCondBr(ExecuteCnd, ExecuteBB, ExitBB);
+ CGF.EmitBlock(ExecuteBB);
+}
+
+void CGOpenMPRuntimeTRegion::emitKernelFooter(CodeGenFunction &CGF,
+ llvm::BasicBlock *ExitBB) {
+ if (!CGF.HaveInsertPoint())
+ return;
+
+ llvm::BasicBlock *OMPDeInitBB = CGF.createBasicBlock(".omp.deinit");
+ CGF.EmitBranch(OMPDeInitBB);
+
+ CGF.EmitBlock(OMPDeInitBB);
+
+ CGBuilderTy &Bld = CGF.Builder;
+ // DeInitialize the OMP state in the runtime; called by all active threads.
+ llvm::Value *Args[] = {/* UseSPMDMode */ Bld.getInt1(isKnownSPMDMode()),
+ /* RequiredOMPRuntime */
+ Bld.getInt1(mayNeedRuntimeSupport())};
+
+ CGF.EmitRuntimeCall(
+ createTargetRuntimeFunction(OMPRTL__kmpc_target_region_kernel_deinit),
+ Args);
+
+ CGF.EmitBranch(ExitBB);
+ CGF.EmitBlock(ExitBB);
+}
+
+void CGOpenMPRuntimeTRegion::emitTargetOutlinedFunction(
+ const OMPExecutableDirective &D, StringRef ParentName,
+ llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
+ bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
+ if (!IsOffloadEntry) // Nothing to do.
+ return;
+
+ assert(!ParentName.empty() && "Invalid target region parent name!");
+
+ emitKernel(D, ParentName, OutlinedFn, OutlinedFnID, CodeGen);
+
+ // Create a unique global variable to indicate the execution mode of this
+ // target region. The execution mode is either 'non-SPMD' or 'SPMD'. Initially
+ // all regions are executed in non-SPMD mode. This variable is picked up by
+ // the offload library to setup the device appropriately before kernel launch.
+ auto *GVMode = new llvm::GlobalVariable(
+ CGM.getModule(), CGM.Int8Ty, /* isConstant */ true,
+ llvm::GlobalValue::WeakAnyLinkage, llvm::ConstantInt::get(CGM.Int8Ty, 1),
+ Twine(OutlinedFn->getName(), "_exec_mode"));
+ CGM.addCompilerUsedGlobal(GVMode);
+}
+
+CGOpenMPRuntimeTRegion::CGOpenMPRuntimeTRegion(CodeGenModule &CGM)
+ : CGOpenMPRuntimeTarget(CGM) {
+ if (!CGM.getLangOpts().OpenMPIsDevice)
+ llvm_unreachable("TRegion code generation does only handle device code!");
+}
+
+llvm::Function *CGOpenMPRuntimeTRegion::emitParallelOutlinedFunction(
+ const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
+ OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+
+ // Emit target region as a standalone region.
+ llvm::Function *OutlinedFun =
+ cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction(
+ D, ThreadIDVar, InnermostKind, CodeGen));
+
+ createParallelDataSharingWrapper(OutlinedFun, D);
+
+ return OutlinedFun;
+}
+
+void CGOpenMPRuntimeTRegion::createParallelDataSharingWrapper(
+ llvm::Function *OutlinedParallelFn, const OMPExecutableDirective &D) {
+ ASTContext &Ctx = CGM.getContext();
+ const auto &CS = *D.getCapturedStmt(OMPD_parallel);
+
+ // Create a function that takes as argument the source thread.
+ FunctionArgList WrapperArgs;
+ ImplicitParamDecl SharedVarsArgDecl(Ctx, /* DC */ nullptr, D.getBeginLoc(),
+ /* Id */ nullptr, Ctx.VoidPtrTy,
+ ImplicitParamDecl::Other);
+ ImplicitParamDecl PrivateVarsArgDecl(Ctx, /* DC */ nullptr, D.getBeginLoc(),
+ /* Id */ nullptr, Ctx.VoidPtrTy,
+ ImplicitParamDecl::Other);
+ WrapperArgs.emplace_back(&SharedVarsArgDecl);
+ WrapperArgs.emplace_back(&PrivateVarsArgDecl);
+
+ const CGFunctionInfo &CGFI =
+ CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, WrapperArgs);
+
+ auto *WrapperFn = llvm::Function::Create(
+ CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
+ Twine(OutlinedParallelFn->getName(), "_wrapper"), &CGM.getModule());
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), WrapperFn, CGFI);
+
+ OutlinedParallelFn->setLinkage(llvm::GlobalValue::InternalLinkage);
+ OutlinedParallelFn->setDoesNotRecurse();
+ WrapperFn->setLinkage(llvm::GlobalValue::InternalLinkage);
+ WrapperFn->setDoesNotRecurse();
+
+ CodeGenFunction CGF(CGM, /* suppressNewContext */ true);
+ CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, WrapperFn, CGFI, WrapperArgs,
+ D.getBeginLoc(), D.getBeginLoc());
+
+ auto AI = WrapperFn->arg_begin();
+ llvm::Argument &SharedVarsArg = *(AI++);
+ llvm::Argument &PrivateVarsArg = *(AI);
+ SharedVarsArg.setName("shared_vars");
+ PrivateVarsArg.setName("private_vars");
+
+ Address ZeroAddr = CGF.CreateMemTemp(CGF.getContext().getIntTypeForBitwidth(
+ /* DestWidth */ 32, /* Signed */ 1),
+ /* Name */ ".zero.addr");
+ CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/* C */ 0));
+
+ setLocThreadIdInsertPt(CGF, /* AtCurrentPoint */ true);
+
+ // Create the array of arguments and fill it with boilerplate values.
+ SmallVector<llvm::Value *, 8> Args;
+ Args.emplace_back(emitThreadIDAddress(CGF, D.getBeginLoc()).getPointer());
+ Args.emplace_back(ZeroAddr.getPointer());
+
+ CGBuilderTy &Bld = CGF.Builder;
+
+ // Collect all variables marked as shared.
+ llvm::SmallPtrSet<const ValueDecl *, 16> SharedVars;
+ for (const auto *C : D.getClausesOfKind<OMPSharedClause>())
+ for (const Expr *E : C->getVarRefs())
+ SharedVars.insert(CGOpenMPRuntimeTarget::getUnderlyingVar(E));
+
+ // Retrieve the shared and private variables from argument pointers and pass
+ // them to the outlined function.
+ llvm::SmallVector<llvm::Type *, 8> SharedStructMemberTypes;
+ llvm::SmallVector<llvm::Type *, 8> PrivateStructMemberTypes;
+
+ WrapperInfo &WI = WrapperInfoMap[OutlinedParallelFn];
+ WI.WrapperFn = WrapperFn;
+
+ auto ArgIt = OutlinedParallelFn->arg_begin() + /* global_tid & bound_tid */ 2;
+
+ // If we require loop bounds they are already part of the outlined function
+ // encoding, just after global_tid and bound_tid.
+ bool RequiresLoopBounds =
+ isOpenMPLoopBoundSharingDirective(D.getDirectiveKind());
+ if (RequiresLoopBounds) {
+ // Register the lower bound in the wrapper info.
+ WI.CaptureIsPrivate.push_back(true);
+ PrivateStructMemberTypes.push_back((ArgIt++)->getType());
+ // Register the upper bound in the wrapper info.
+ WI.CaptureIsPrivate.push_back(true);
+ PrivateStructMemberTypes.push_back((ArgIt++)->getType());
+ }
+
+ auto CIt = CS.capture_begin();
+ for (unsigned I = 0, E = CS.capture_size(); I < E; ++I, ++CIt) {
+ bool IsPrivate = CIt->capturesVariableArrayType() ||
+ CIt->capturesVariableByCopy() ||
+ !SharedVars.count(CIt->getCapturedVar());
+ WI.CaptureIsPrivate.push_back(IsPrivate);
+
+ auto &StructMemberTypes =
+ IsPrivate ? PrivateStructMemberTypes : SharedStructMemberTypes;
+ llvm::Type *ArgTy = (ArgIt++)->getType();
+ if (!IsPrivate) {
+ assert(ArgTy->isPointerTy());
+ ArgTy = ArgTy->getPointerElementType();
+ }
+ StructMemberTypes.push_back(ArgTy);
+ }
+
+ // Verify the position of the outlined function argument iterator as a sanity
+ // check.
+ assert(ArgIt == OutlinedParallelFn->arg_end() &&
+ "Not all arguments have been processed!");
+
+ llvm::Value *SharedVarsStructPtr = nullptr;
+ llvm::Value *PrivateVarsStructPtr = nullptr;
+ llvm::LLVMContext &LLVMCtx = OutlinedParallelFn->getContext();
+ if (!PrivateStructMemberTypes.empty()) {
+ WI.PrivateVarsStructTy = llvm::StructType::create(
+ LLVMCtx, PrivateStructMemberTypes, "omp.private.struct");
+ PrivateVarsStructPtr = Bld.CreateBitCast(
+ &PrivateVarsArg, WI.PrivateVarsStructTy->getPointerTo());
+ }
+ if (!SharedStructMemberTypes.empty()) {
+ WI.SharedVarsStructTy = llvm::StructType::create(
+ LLVMCtx, SharedStructMemberTypes, "omp.shared.struct");
+ SharedVarsStructPtr = Bld.CreateBitCast(
+ &SharedVarsArg, WI.SharedVarsStructTy->getPointerTo());
+ }
+
+ assert(WI.CaptureIsPrivate.size() + /* global_tid & bound_tid */ 2 ==
+ OutlinedParallelFn->arg_size() &&
+ "Not all arguments have been processed!");
+
+ unsigned PrivateIdx = 0, SharedIdx = 0;
+ for (int i = 0, e = WI.CaptureIsPrivate.size(); i < e; i++) {
+ bool IsPrivate = WI.CaptureIsPrivate[i];
+
+ llvm::Value *StructPtr =
+ IsPrivate ? PrivateVarsStructPtr : SharedVarsStructPtr;
+ unsigned &Idx = IsPrivate ? PrivateIdx : SharedIdx;
+
+ // TODO: Figure out the real alignment
+ if (IsPrivate) {
+ Args.emplace_back(
+ Bld.CreateAlignedLoad(Bld.CreateStructGEP(StructPtr, Idx++), 1));
+ } else {
+ llvm::Value *GEP = Bld.CreateStructGEP(StructPtr, Idx++);
+ Args.emplace_back(GEP);
+ }
+ }
+
+ assert(Args.size() == OutlinedParallelFn->arg_size());
+ emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedParallelFn, Args);
+
+ CGF.FinishFunction();
+
+ clearLocThreadIdInsertPt(CGF);
+}
+
+void CGOpenMPRuntimeTRegion::emitParallelCall(
+ CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *Fn,
+ ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) {
+ if (!CGF.HaveInsertPoint())
+ return;
+
+ const WrapperInfo &WI = WrapperInfoMap[Fn];
+
+ auto &&ParGen = [this, CapturedVars, WI](CodeGenFunction &CGF,
+ PrePostActionTy &) {
+ CGBuilderTy &Bld = CGF.Builder;
+ assert(WI.WrapperFn && "Wrapper function does not exist!");
+
+ llvm::Value *SharedVarsSize = llvm::Constant::getNullValue(CGM.Int16Ty);
+ llvm::Value *PrivateVarsSize = SharedVarsSize;
+ llvm::Value *SharedStructAlloca = llvm::UndefValue::get(CGM.VoidPtrTy);
+ llvm::Value *PrivateStructAlloca = SharedStructAlloca;
+
+ if (WI.SharedVarsStructTy) {
+ SharedStructAlloca = CGF.CreateDefaultAlignTempAlloca(
+ WI.SharedVarsStructTy, ".shared.vars")
+ .getPointer();
+ const llvm::DataLayout &DL = WI.WrapperFn->getParent()->getDataLayout();
+ SharedVarsSize = Bld.getInt16(DL.getTypeAllocSize(WI.SharedVarsStructTy));
+ }
+ if (WI.PrivateVarsStructTy) {
+ PrivateStructAlloca = CGF.CreateDefaultAlignTempAlloca(
+ WI.PrivateVarsStructTy, ".private.vars")
+ .getPointer();
+ const llvm::DataLayout &DL = WI.WrapperFn->getParent()->getDataLayout();
+ PrivateVarsSize =
+ Bld.getInt16(DL.getTypeAllocSize(WI.PrivateVarsStructTy));
+ }
+
+ llvm::SmallVector<llvm::Value *, 4> Args;
+ Args.push_back(
+ /* UseSPMDMode */ Bld.getInt1(isKnownSPMDMode()));
+ Args.push_back(
+ /* RequiredOMPRuntime */ Bld.getInt1(mayNeedRuntimeSupport()));
+ Args.push_back(WI.WrapperFn);
+ Args.push_back(CGF.EmitCastToVoidPtr(SharedStructAlloca));
+ Args.push_back(SharedVarsSize);
+ Args.push_back(CGF.EmitCastToVoidPtr(PrivateStructAlloca));
+ Args.push_back(PrivateVarsSize);
+ Args.push_back(
+ /* SharedPointers */ Bld.getInt1(0));
+
+ assert((CapturedVars.empty() ||
+ (WI.SharedVarsStructTy || WI.PrivateVarsStructTy)) &&
+ "Expected the shared or private struct type to be set if variables "
+ "are captured!");
+ assert((CapturedVars.empty() ||
+ CapturedVars.size() ==
+ (WI.SharedVarsStructTy ? WI.SharedVarsStructTy->getNumElements()
+ : 0) +
+ (WI.PrivateVarsStructTy
+ ? WI.PrivateVarsStructTy->getNumElements()
+ : 0)) &&
+ "# elements in shared struct types should be number of captured "
+ "variables!");
+
+ // Store all captured variables into a single local structure that is then
+ // passed to the runtime library.
+ unsigned PrivateIdx = 0, SharedIdx = 0;
+ for (int i = 0, e = WI.CaptureIsPrivate.size(); i < e; i++) {
+ bool IsPrivate = WI.CaptureIsPrivate[i];
+
+ llvm::Value *StructPtr =
+ IsPrivate ? PrivateStructAlloca : SharedStructAlloca;
+ unsigned &Idx = IsPrivate ? PrivateIdx : SharedIdx;
+ llvm::Value *GEP = Bld.CreateStructGEP(StructPtr, Idx++);
+ llvm::Value *Var = IsPrivate ? CapturedVars[i]
+ : Bld.CreateAlignedLoad(CapturedVars[i], 1);
+ Bld.CreateDefaultAlignedStore(Var, GEP);
+ }
+
+ CGF.EmitRuntimeCall(
+ createTargetRuntimeFunction(OMPRTL__kmpc_target_region_kernel_parallel),
+ Args);
+
+ SharedIdx = 0;
+ for (int i = 0, e = WI.CaptureIsPrivate.size(); i < e; i++) {
+ bool IsPrivate = WI.CaptureIsPrivate[i];
+ if (IsPrivate)
+ continue;
+
+ llvm::Value *GEP = Bld.CreateStructGEP(SharedStructAlloca, SharedIdx++);
+ llvm::Value *Var = Bld.CreateAlignedLoad(GEP, 1);
+ Bld.CreateDefaultAlignedStore(Var, CapturedVars[i]);
+ }
+ };
+
+ auto &&SeqGen = [this, &ParGen, Loc](CodeGenFunction &CGF,
+ PrePostActionTy &Action) {
+ // Use an artifical "num_threads(1)" clause to force sequential execution if
+ // the expression in the 'if clause' evaluated to false. We expect the
+ // middle-end to clean this up.
+ emitNumThreadsClause(CGF, CGF.Builder.getInt32(/* C */ 1), Loc);
+ ParGen(CGF, Action);
+ };
+
+ if (IfCond) {
+ emitOMPIfClause(CGF, IfCond, ParGen, SeqGen);
+ } else {
+ CodeGenFunction::RunCleanupsScope Scope(CGF);
+ RegionCodeGenTy ThenRCG(ParGen);
+ ThenRCG(CGF);
+ }
+}
+
+void CGOpenMPRuntimeTRegion::emitCriticalRegion(
+ CodeGenFunction &CGF, StringRef CriticalName,
+ const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
+ const Expr *Hint) {
+ llvm_unreachable(
+ "TODO: TRegion code generation does not support critical regions yet!");
+}
+
+void CGOpenMPRuntimeTRegion::emitReduction(
+ CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
+ ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
+ ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
+ llvm_unreachable(
+ "TODO: TRegion code generation does not support reductions yet!");
+}
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits