sfantao created this revision.
sfantao added reviewers: ABataev, hfinkel, carlo.bertolli, arpith-jacob, kkwli0.
sfantao added subscribers: fraggamuffin, caomhin, cfe-commits.
This patch implements the launching of a target region in the presence of a
nested teams region, i.e calls tgt_target_teams with the required arguments
gathered from the enclosed teams directive.
The actual codegen of the region enclosed by the teams construct will be
contributed in a separate patch.
http://reviews.llvm.org/D17019
Files:
lib/CodeGen/CGOpenMPRuntime.cpp
lib/CodeGen/CGOpenMPRuntime.h
lib/CodeGen/CGStmtOpenMP.cpp
lib/CodeGen/CodeGenFunction.h
test/OpenMP/teams_codegen.cpp
Index: test/OpenMP/teams_codegen.cpp
===================================================================
--- /dev/null
+++ test/OpenMP/teams_codegen.cpp
@@ -0,0 +1,211 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+#ifdef CK1
+
+int Gbla;
+long long Gblb;
+
+// CK1-LABEL: teams_argument_global_local
+int teams_argument_global_local(int a){
+ int comp = 1;
+
+ int la = 23;
+ float lc = 25.0;
+
+ // CK1: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+ // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
+ #pragma omp target
+ #pragma omp teams
+ {
+ ++comp;
+ }
+
+ // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 0)
+ // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
+ // CK1-64-DAG: [[NTA]] = bitcast i64* [[NTB:%[^,]+]] to i32*
+ // CK1-64-DAG: store i64 [[NTC:%[^,]+]], i64* [[NTB]],
+ // CK1-64-DAG: [[NTC]] = load i64, i64* [[NTD:%[^,]+]],
+ // CK1-64-DAG: [[NTE:%[^,]+]] = bitcast i64* [[NTD]] to i32*
+ // CK1-64-DAG: store i32 [[NTF:%[^,]+]], i32* [[NTE]],
+ // CK1-64-DAG: [[NTF]] = load i32, i32* {{%[^,]+}},
+
+
+ // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
+ #pragma omp target
+ #pragma omp teams num_teams(la)
+ {
+ ++comp;
+ }
+
+ // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 [[NT:%[^,]+]])
+ // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
+ // CK1-64-DAG: [[NTA]] = bitcast i64* [[NTB:%[^,]+]] to i32*
+ // CK1-64-DAG: store i64 [[NTC:%[^,]+]], i64* [[NTB]],
+ // CK1-64-DAG: [[NTC]] = load i64, i64* [[NTD:%[^,]+]],
+ // CK1-64-DAG: [[NTE:%[^,]+]] = bitcast i64* [[NTD]] to i32*
+ // CK1-64-DAG: store i32 [[NTF:%[^,]+]], i32* [[NTE]],
+ // CK1-64-DAG: [[NTF]] = load i32, i32* {{%[^,]+}},
+ // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
+ #pragma omp target
+ #pragma omp teams thread_limit(la)
+ {
+ ++comp;
+ }
+
+ // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+
+ // CK1-DAG: [[NT]] = add nsw i32 [[NTA:%[^,]+]], [[NTB:%[^,]+]]
+ // CK1-64-DAG: [[NTB]] = load i32, i32* %c{{.+}},
+ // CK1-64-DAG: [[NTA]] = load i32, i32* %c{{.+}},
+
+ // CK1-DAG: [[TL]] = trunc i64 [[TLA:%[^,]+]] to i32
+ // CK1-DAG: [[TLA]] = add nsw i64 [[TLB:%[^,]+]], [[TLC:%[^,]+]]
+ // CK1-DAG: [[TLC]] = fptosi float [[TLD:%[^,]+]] to i64
+ // CK1-DAG: [[TLD]] = load float, float* %{{.+}},
+ // CK1-DAG: [[TLB]] = load i64, i64* %{{.+}},
+
+ // CK1: call void @{{.+}}(i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}})
+ #pragma omp target
+ #pragma omp teams num_teams(Gbla+a) thread_limit(Gblb+(long long)lc)
+ {
+ ++comp;
+ }
+
+ return comp;
+}
+
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+#ifdef CK2
+
+// CK2-DAG: [[SSI:%.+]] = type { i32, float }
+// CK2-DAG: [[SSL:%.+]] = type { i64, float }
+template <typename T>
+struct SS{
+ T a;
+ float b;
+};
+
+SS<int> Gbla;
+SS<long long> Gblb;
+
+// CK2-LABEL: teams_template_arg
+int teams_template_arg(void) {
+ int comp = 1;
+
+ SS<int> la;
+ SS<long long> lb;
+
+ // CK2-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+
+ // CK2-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
+ // CK2-DAG: [[NTA]] = getelementptr inbounds [[SSI]], [[SSI]]* [[NTB:%[^,]+]], i32 0, i32 0
+ // CK2-DAG: [[NTB]] = load [[SSI]]*, [[SSI]]** %{{.+}},
+
+ // CK2-DAG: [[TL]] = trunc i64 [[TLA:%[^,]+]] to i32
+ // CK2-DAG: [[TLA]] = fptosi float [[TLB:%[^,]+]] to i64
+ // CK2-DAG: [[TLB]] = load float, float* [[TLC:%[^,]+]],
+ // CK2-DAG: [[TLC]] = getelementptr inbounds [[SSI]], [[SSI]]* %{{.+}}, i32 0, i32 1
+
+ // CK2: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}}, {{.+}} {{.+}})
+ #pragma omp target
+ #pragma omp teams num_teams(Gbla.a) thread_limit((long long)la.b)
+ {
+ ++comp;
+ }
+
+ // CK2-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+
+ // CK2-DAG: [[TL]] = trunc i64 [[TLD:%[^,]+]] to i32
+ // CK2-DAG: [[TLD]] = load i64, i64* [[TLA:%[^,]+]],
+ // CK2-DAG: [[TLA]] = getelementptr inbounds [[SSL]], [[SSL]]* [[TLB:%[^,]+]], i32 0, i32 0
+ // CK2-DAG: [[TLB]] = load [[SSL]]*, [[SSL]]** %{{.+}},
+
+ // CK2-DAG: [[NT]] = trunc i64 [[NTA:%[^,]+]] to i32
+ // CK2-DAG: [[NTA]] = fptosi float [[NTB:%[^,]+]] to i64
+ // CK2-DAG: [[NTB]] = load float, float* [[NTC:%[^,]+]],
+ // CK2-DAG: [[NTC]] = getelementptr inbounds [[SSL]], [[SSL]]* %{{.+}}, i32 0, i32 1
+
+ // CK2: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}}, {{.+}} {{.+}})
+ #pragma omp target
+ #pragma omp teams num_teams((long long)lb.b) thread_limit(Gblb.a)
+ {
+ ++comp;
+ }
+ return comp;
+}
+#endif // CK2
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32
+#ifdef CK3
+
+// CK3: [[SSI:%.+]] = type { i32, float }
+// CK3-LABEL: teams_template_struct
+
+template <typename T, int X, long long Y>
+struct SS{
+ T a;
+ float b;
+
+ int foo(void) {
+ int comp = 1;
+
+ // CK3-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 123)
+
+ // CK3-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
+ // CK3-DAG: [[NTA]] = getelementptr inbounds [[SSI]], [[SSI]]* [[NTB:%[^,]+]], i32 0, i32 0
+ // CK3-DAG: [[NTB]] = load [[SSI]]*, [[SSI]]** %{{.+}},
+
+ // CK3: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}})
+ #pragma omp target
+ #pragma omp teams num_teams(a) thread_limit(X)
+ {
+ ++comp;
+ }
+
+ // CK3-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 456, i32 [[TL:%[^,]+]])
+
+ // CK3-DAG: [[TL]] = add nsw i32 [[TLA:%[^,]+]], 123
+ // CK3-DAG: [[TLA]] = fptosi float [[TLB:%[^,]+]] to i32
+ // CK3-DAG: [[TLB]] = load float, float* [[TLC:%[^,]+]],
+ // CK3-DAG: [[TLC]] = getelementptr inbounds [[SSI]], [[SSI]]* [[TLD:%[^,]+]], i32 0, i32 1
+ // CK3-DAG: [[TLD]] = load [[SSI]]*, [[SSI]]** %{{.+}},
+
+ // CK3: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}})
+ #pragma omp target
+ #pragma omp teams num_teams(Y) thread_limit((int)b+X)
+ {
+ ++comp;
+ }
+ return comp;
+ }
+};
+
+int teams_template_struct(void) {
+ SS<int, 123, 456> V;
+ return V.foo();
+
+}
+#endif // CK3
+#endif
Index: lib/CodeGen/CodeGenFunction.h
===================================================================
--- lib/CodeGen/CodeGenFunction.h
+++ lib/CodeGen/CodeGenFunction.h
@@ -2216,6 +2216,8 @@
llvm::Function *EmitCapturedStmt(const CapturedStmt &S, CapturedRegionKind K);
llvm::Function *GenerateCapturedStmtFunction(const CapturedStmt &S);
Address GenerateCapturedStmtArgument(const CapturedStmt &S);
+ llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
+ QualType ReturnQTy);
llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S);
void GenerateOpenMPCapturedVars(const CapturedStmt &S,
SmallVectorImpl<llvm::Value *> &CapturedVars);
Index: lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- lib/CodeGen/CGStmtOpenMP.cpp
+++ lib/CodeGen/CGStmtOpenMP.cpp
@@ -91,6 +91,12 @@
llvm::Function *
CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
+ return GenerateOpenMPCapturedStmtFunction(S, getContext().VoidTy);
+}
+
+llvm::Function *
+CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
+ QualType ReturnQTy) {
assert(
CapturedStmtInfo &&
"CapturedStmtInfo should be set when generating the captured function");
@@ -140,7 +146,7 @@
// Create the function declaration.
FunctionType::ExtInfo ExtInfo;
const CGFunctionInfo &FuncInfo =
- CGM.getTypes().arrangeFreeFunctionDeclaration(Ctx.VoidTy, Args, ExtInfo,
+ CGM.getTypes().arrangeFreeFunctionDeclaration(ReturnQTy, Args, ExtInfo,
/*IsVariadic=*/false);
llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
@@ -152,7 +158,7 @@
F->addFnAttr(llvm::Attribute::NoUnwind);
// Generate the function.
- StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(),
+ StartFunction(CD, ReturnQTy, F, FuncInfo, Args, CD->getLocation(),
CD->getBody()->getLocStart());
unsigned Cnt = CD->getContextParamPosition();
I = S.captures().begin();
@@ -2635,6 +2641,8 @@
llvm::Function *Fn = nullptr;
llvm::Constant *FnID = nullptr;
+ llvm::Function *NestedNumTeamsFn = nullptr;
+ llvm::Function *NestedThreadLimitFn = nullptr;
// Check if we have any if clause associated with the directive.
const Expr *IfCond = nullptr;
@@ -2673,15 +2681,21 @@
ParentName =
CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CurFuncDecl)));
- CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
- IsOffloadEntry);
+ CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
+ S, ParentName, Fn, FnID, NestedNumTeamsFn, NestedThreadLimitFn,
+ IsOffloadEntry);
- CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device,
+ CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, NestedNumTeamsFn,
+ NestedThreadLimitFn, IfCond, Device,
CapturedVars);
}
-void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) {
- llvm_unreachable("CodeGen for 'omp teams' is not supported yet.");
+void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
+ LexicalScope Scope(*this, S.getSourceRange());
+ const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
+
+ // FIXME: We should fork teams here instead of just emit the statement.
+ EmitStmt(CS.getCapturedStmt());
}
void CodeGenFunction::EmitOMPCancellationPointDirective(
Index: lib/CodeGen/CGOpenMPRuntime.h
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.h
+++ lib/CodeGen/CGOpenMPRuntime.h
@@ -166,6 +166,10 @@
// arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
// *arg_types);
OMPRTL__tgt_target,
+ // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
+ // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
+ // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
+ OMPRTL__tgt_target_teams,
// Call to void __tgt_register_lib(__tgt_bin_desc *desc);
OMPRTL__tgt_register_lib,
// Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
@@ -925,32 +929,42 @@
/// \param ParentName Name of the function that encloses the target region.
/// \param OutlinedFn Outlined function value to be defined by this call.
/// \param OutlinedFnID Outlined function ID value to be defined by this call.
+ /// \param NestedNumTeamsFn Function that returns the number of teams
+ /// associated with a nested temas directive.
+ /// \param NestedThreadLimitFn Function that returns the thread
+ /// limitassociated with a nested temas directive.
/// \param IsOffloadEntry True if the outlined function is an offload entry.
/// An oulined function may not be an entry if, e.g. the if clause always
/// evaluates to false.
virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D,
StringRef ParentName,
llvm::Function *&OutlinedFn,
llvm::Constant *&OutlinedFnID,
+ llvm::Function *&NestedNumTeamsFn,
+ llvm::Function *&NestedThreadLimitFn,
bool IsOffloadEntry);
/// \brief Emit the target offloading code associated with \a D. The emitted
/// code attempts offloading the execution to the device, an the event of
/// a failure it executes the host version outlined in \a OutlinedFn.
/// \param D Directive to emit.
/// \param OutlinedFn Host version of the code to be offloaded.
/// \param OutlinedFnID ID of host version of the code to be offloaded.
+ /// \param NestedNumTeamsFn Function that return the number of teams of a
+ /// nested teams region, if any.
+ /// \param NestedThreadLimitFn Function that return the thread limit of a
+ /// nested teams region, if any.
/// \param IfCond Expression evaluated in if clause associated with the target
/// directive, or null if no if clause is used.
/// \param Device Expression evaluated in device clause associated with the
/// target directive, or null if no device clause is used.
/// \param CapturedVars Values captured in the current region.
- virtual void emitTargetCall(CodeGenFunction &CGF,
- const OMPExecutableDirective &D,
- llvm::Value *OutlinedFn,
- llvm::Value *OutlinedFnID, const Expr *IfCond,
- const Expr *Device,
- ArrayRef<llvm::Value *> CapturedVars);
+ virtual void
+ emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
+ llvm::Value *OutlinedFn, llvm::Value *OutlinedFnID,
+ llvm::Function *NestedNumTeamsFn,
+ llvm::Function *NestedThreadLimitFn, const Expr *IfCond,
+ const Expr *Device, ArrayRef<llvm::Value *> CapturedVars);
/// \brief Emit the target regions enclosed in \a GD function definition or
/// the function itself in case it is a valid device function. Returns true if
Index: lib/CodeGen/CGOpenMPRuntime.cpp
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.cpp
+++ lib/CodeGen/CGOpenMPRuntime.cpp
@@ -935,6 +935,24 @@
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
break;
}
+ case OMPRTL__tgt_target_teams: {
+ // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
+ // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
+ // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
+ llvm::Type *TypeParams[] = {CGM.Int32Ty,
+ CGM.VoidPtrTy,
+ CGM.Int32Ty,
+ CGM.VoidPtrPtrTy,
+ CGM.VoidPtrPtrTy,
+ CGM.SizeTy->getPointerTo(),
+ CGM.Int32Ty->getPointerTo(),
+ CGM.Int32Ty,
+ CGM.Int32Ty};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
+ break;
+ }
case OMPRTL__tgt_register_lib: {
// Build void __tgt_register_lib(__tgt_bin_desc *desc);
QualType ParamTy =
@@ -3717,6 +3735,7 @@
void CGOpenMPRuntime::emitTargetOutlinedFunction(
const OMPExecutableDirective &D, StringRef ParentName,
llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
+ llvm::Function *&NestedNumTeamsFn, llvm::Function *&NestedThreadLimitFn,
bool IsOffloadEntry) {
assert(!ParentName.empty() && "Invalid target region parent name!");
@@ -3753,11 +3772,13 @@
<< Column;
}
- CodeGenFunction CGF(CGM, true);
- CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
- CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
+ {
+ CodeGenFunction CGF(CGM, true);
+ CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
+ CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
- OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
+ OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
+ }
// If this target outline function is not an offload entry, we don't need to
// register it.
@@ -3787,13 +3808,62 @@
// Register the information for the entry associated with this target region.
OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
DeviceID, FileID, ParentName, Line, Column, OutlinedFn, OutlinedFnID);
+
+ // If the current target region has a teams region enclosed, we need to get
+ // the number of teams and thread limit to pass to the runtime function call
+ // later on. This is done through a function that returns the value. This is
+ // required because the expression is captured in the enclosing target
+ // environment when the teams directive is not combined with target. This only
+ // has to be done for the host.
+ //
+ // FIXME: Accommodate other combined directives with teams when they become
+ // available.
+ if (!CGM.getLangOpts().OpenMPIsDevice)
+ if (auto *TeamsDir = dyn_cast<OMPTeamsDirective>(CS.getCapturedStmt())) {
+ if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
+ auto &&CodeGen = [NTE](CodeGenFunction &CGF) {
+ auto *V = CGF.EmitScalarExpr(NTE->getNumTeams());
+ CGF.Builder.CreateRet(
+ CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
+ CGF.EmitBlock(CGF.createBasicBlock());
+ };
+
+ CodeGenFunction CGF(CGM, true);
+ CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen,
+ ".omp_offload.get_num_teams");
+ CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
+
+ NestedNumTeamsFn =
+ CGF.GenerateOpenMPCapturedStmtFunction(CS, CGM.getContext().IntTy);
+ NestedNumTeamsFn->addFnAttr(llvm::Attribute::AlwaysInline);
+ }
+ if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
+ auto &&CodeGen = [TLE](CodeGenFunction &CGF) {
+ auto *V = CGF.EmitScalarExpr(TLE->getThreadLimit());
+ CGF.Builder.CreateRet(
+ CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
+ CGF.EmitBlock(CGF.createBasicBlock());
+ };
+
+ CodeGenFunction CGF(CGM, true);
+ CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen,
+ ".omp_offload.get_thread_limit");
+ CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
+
+ NestedThreadLimitFn =
+ CGF.GenerateOpenMPCapturedStmtFunction(CS, CGM.getContext().IntTy);
+ NestedThreadLimitFn->addFnAttr(llvm::Attribute::AlwaysInline);
+ }
+ }
return;
}
void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
const OMPExecutableDirective &D,
llvm::Value *OutlinedFn,
llvm::Value *OutlinedFnID,
+ llvm::Function *NestedNumTeamsFn,
+ llvm::Function *NestedThreadLimitFn,
const Expr *IfCond, const Expr *Device,
ArrayRef<llvm::Value *> CapturedVars) {
if (!CGF.HaveInsertPoint())
@@ -3917,8 +3987,9 @@
// Fill up the pointer arrays and transfer execution to the device.
auto &&ThenGen = [this, &Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes,
- hasVLACaptures, Device, OutlinedFnID, OffloadError,
- OffloadErrorQType](CodeGenFunction &CGF) {
+ &CS, hasVLACaptures, Device, OutlinedFnID, OffloadError,
+ OffloadErrorQType, NestedNumTeamsFn,
+ NestedThreadLimitFn](CodeGenFunction &CGF) {
unsigned PointerNumVal = BasePointers.size();
llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal);
llvm::Value *BasePointersArray;
@@ -4059,11 +4130,53 @@
else
DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
- llvm::Value *OffloadingArgs[] = {
- DeviceID, OutlinedFnID, PointerNum, BasePointersArray,
- PointersArray, SizesArray, MapTypesArray};
- auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target),
- OffloadingArgs);
+ // Return value of the runtime offloading call.
+ llvm::Value *Return;
+
+ // If the target region is associated with a teams region (not necessarily
+ // combined) we need to get the value with the number of teams and thread
+ // limit. These expressions can be nested in the target region - in that
+ // case we rely on NestedNumTeamsFn and NestedThreadLimitFn to retrieve the
+ // right values. If teams is not nested (is in a combined directive with
+ // target) we can emit the expressions directly.
+
+ // FIXME: We need to accommodate combined directives here when they become
+ // supported.
+ if (auto *TD = dyn_cast<OMPTeamsDirective>(CS.getCapturedStmt())) {
+ llvm::Value *NumTeams = nullptr;
+ llvm::Value *ThreadLimit = nullptr;
+ if (TD->getSingleClause<OMPNumTeamsClause>()) {
+ assert(NestedNumTeamsFn && "Helper function is required to get the "
+ "number of teams of an enclosed teams "
+ "directive.");
+ NumTeams = CGF.Builder.CreateCall(NestedNumTeamsFn, BasePointers);
+ } else
+ NumTeams = CGF.Builder.getInt32(0);
+ if (TD->getSingleClause<OMPThreadLimitClause>()) {
+ assert(NestedThreadLimitFn && "Helper function is required to get the "
+ "thread limit of an enclosed teams "
+ "directive.");
+ ThreadLimit = CGF.Builder.CreateCall(NestedThreadLimitFn, BasePointers);
+ } else
+ ThreadLimit = CGF.Builder.getInt32(0);
+
+ assert(NumTeams && ThreadLimit && "Thread limit and number of teams "
+ "should be defined for a teams "
+ "region.");
+
+ llvm::Value *OffloadingArgs[] = {
+ DeviceID, OutlinedFnID, PointerNum,
+ BasePointersArray, PointersArray, SizesArray,
+ MapTypesArray, NumTeams, ThreadLimit};
+ Return = CGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs);
+ } else {
+ llvm::Value *OffloadingArgs[] = {
+ DeviceID, OutlinedFnID, PointerNum, BasePointersArray,
+ PointersArray, SizesArray, MapTypesArray};
+ Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target),
+ OffloadingArgs);
+ }
CGF.EmitStoreOfScalar(Return, OffloadError);
};
@@ -4133,7 +4246,9 @@
llvm::Function *Fn;
llvm::Constant *Addr;
- emitTargetOutlinedFunction(*E, ParentName, Fn, Addr,
+ llvm::Function *NTFn;
+ llvm::Function *TLFn;
+ emitTargetOutlinedFunction(*E, ParentName, Fn, Addr, NTFn, TLFn,
/*isOffloadEntry=*/true);
assert(Fn && Addr && "Target region emission failed.");
return;
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits