sfantao created this revision.
sfantao added reviewers: ABataev, hfinkel, carlo.bertolli, arpith-jacob, kkwli0.
sfantao added subscribers: fraggamuffin, caomhin, cfe-commits.

This patch implements the launching of a target region in the presence of a 
nested teams region, i.e calls tgt_target_teams with the required arguments 
gathered from the enclosed teams directive.

The actual codegen of the region enclosed by the teams construct will be 
contributed in a separate patch. 

http://reviews.llvm.org/D17019

Files:
  lib/CodeGen/CGOpenMPRuntime.cpp
  lib/CodeGen/CGOpenMPRuntime.h
  lib/CodeGen/CGStmtOpenMP.cpp
  lib/CodeGen/CodeGenFunction.h
  test/OpenMP/teams_codegen.cpp

Index: test/OpenMP/teams_codegen.cpp
===================================================================
--- /dev/null
+++ test/OpenMP/teams_codegen.cpp
@@ -0,0 +1,211 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+#ifdef CK1
+
+int Gbla;
+long long Gblb;
+
+// CK1-LABEL: teams_argument_global_local
+int teams_argument_global_local(int a){
+  int comp = 1;
+
+  int la = 23;
+  float lc = 25.0;
+
+  // CK1: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+  // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
+  #pragma omp target
+  #pragma omp teams
+  {
+    ++comp;
+  }
+
+  // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 0)
+  // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
+  // CK1-64-DAG: [[NTA]] = bitcast i64* [[NTB:%[^,]+]] to i32*
+  // CK1-64-DAG: store i64 [[NTC:%[^,]+]], i64* [[NTB]],
+  // CK1-64-DAG: [[NTC]] = load i64, i64* [[NTD:%[^,]+]],
+  // CK1-64-DAG: [[NTE:%[^,]+]] = bitcast i64* [[NTD]] to i32*
+  // CK1-64-DAG: store i32 [[NTF:%[^,]+]], i32* [[NTE]],
+  // CK1-64-DAG: [[NTF]] = load i32, i32* {{%[^,]+}},
+
+
+  // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
+  #pragma omp target
+  #pragma omp teams num_teams(la)
+  {
+    ++comp;
+  }
+
+  // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 [[NT:%[^,]+]])
+  // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
+  // CK1-64-DAG: [[NTA]] = bitcast i64* [[NTB:%[^,]+]] to i32*
+  // CK1-64-DAG: store i64 [[NTC:%[^,]+]], i64* [[NTB]],
+  // CK1-64-DAG: [[NTC]] = load i64, i64* [[NTD:%[^,]+]],
+  // CK1-64-DAG: [[NTE:%[^,]+]] = bitcast i64* [[NTD]] to i32*
+  // CK1-64-DAG: store i32 [[NTF:%[^,]+]], i32* [[NTE]],
+  // CK1-64-DAG: [[NTF]] = load i32, i32* {{%[^,]+}},
+  // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
+  #pragma omp target
+  #pragma omp teams thread_limit(la)
+  {
+    ++comp;
+  }
+
+  // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+
+  // CK1-DAG: [[NT]] = add nsw i32 [[NTA:%[^,]+]], [[NTB:%[^,]+]]
+  // CK1-64-DAG: [[NTB]] = load i32, i32* %c{{.+}},
+  // CK1-64-DAG: [[NTA]] = load i32, i32* %c{{.+}},
+
+  // CK1-DAG: [[TL]] = trunc i64 [[TLA:%[^,]+]] to i32
+  // CK1-DAG: [[TLA]] = add nsw i64 [[TLB:%[^,]+]], [[TLC:%[^,]+]]
+  // CK1-DAG: [[TLC]] = fptosi float [[TLD:%[^,]+]] to i64
+  // CK1-DAG: [[TLD]] = load float, float* %{{.+}},
+  // CK1-DAG: [[TLB]] = load i64, i64* %{{.+}},
+
+  // CK1: call void @{{.+}}(i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}})
+  #pragma omp target
+  #pragma omp teams num_teams(Gbla+a) thread_limit(Gblb+(long long)lc)
+  {
+    ++comp;
+  }
+
+  return comp;
+}
+
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+#ifdef CK2
+
+// CK2-DAG: [[SSI:%.+]] = type { i32, float }
+// CK2-DAG: [[SSL:%.+]] = type { i64, float }
+template <typename T>
+struct SS{
+  T a;
+  float b;
+};
+
+SS<int> Gbla;
+SS<long long> Gblb;
+
+// CK2-LABEL: teams_template_arg
+int teams_template_arg(void) {
+  int comp = 1;
+
+  SS<int> la;
+  SS<long long> lb;
+
+  // CK2-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+
+  // CK2-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
+  // CK2-DAG: [[NTA]] = getelementptr inbounds [[SSI]], [[SSI]]* [[NTB:%[^,]+]], i32 0, i32 0
+  // CK2-DAG: [[NTB]] = load [[SSI]]*, [[SSI]]** %{{.+}},
+
+  // CK2-DAG: [[TL]] = trunc i64 [[TLA:%[^,]+]] to i32
+  // CK2-DAG: [[TLA]] = fptosi float [[TLB:%[^,]+]] to i64
+  // CK2-DAG: [[TLB]] = load float, float* [[TLC:%[^,]+]],
+  // CK2-DAG: [[TLC]] = getelementptr inbounds [[SSI]], [[SSI]]* %{{.+}}, i32 0, i32 1
+
+  // CK2: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}}, {{.+}} {{.+}})
+  #pragma omp target
+  #pragma omp teams num_teams(Gbla.a) thread_limit((long long)la.b)
+  {
+    ++comp;
+  }
+
+  // CK2-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+
+  // CK2-DAG: [[TL]] = trunc i64 [[TLD:%[^,]+]] to i32
+  // CK2-DAG: [[TLD]] = load i64, i64* [[TLA:%[^,]+]],
+  // CK2-DAG: [[TLA]] = getelementptr inbounds [[SSL]], [[SSL]]* [[TLB:%[^,]+]], i32 0, i32 0
+  // CK2-DAG: [[TLB]] = load [[SSL]]*, [[SSL]]** %{{.+}},
+
+  // CK2-DAG: [[NT]] = trunc i64 [[NTA:%[^,]+]] to i32
+  // CK2-DAG: [[NTA]] = fptosi float [[NTB:%[^,]+]] to i64
+  // CK2-DAG: [[NTB]] = load float, float* [[NTC:%[^,]+]],
+  // CK2-DAG: [[NTC]] = getelementptr inbounds [[SSL]], [[SSL]]* %{{.+}}, i32 0, i32 1
+
+  // CK2: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}}, {{.+}} {{.+}})
+  #pragma omp target
+  #pragma omp teams num_teams((long long)lb.b) thread_limit(Gblb.a)
+  {
+    ++comp;
+  }
+  return comp;
+}
+#endif // CK2
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32
+#ifdef CK3
+
+// CK3: [[SSI:%.+]] = type { i32, float }
+// CK3-LABEL: teams_template_struct
+
+template <typename T, int X, long long Y>
+struct SS{
+  T a;
+  float b;
+
+  int foo(void) {
+    int comp = 1;
+
+    // CK3-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 123)
+
+    // CK3-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
+    // CK3-DAG: [[NTA]] = getelementptr inbounds [[SSI]], [[SSI]]* [[NTB:%[^,]+]], i32 0, i32 0
+    // CK3-DAG: [[NTB]] = load [[SSI]]*, [[SSI]]** %{{.+}},
+
+    // CK3: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}})
+    #pragma omp target
+    #pragma omp teams num_teams(a) thread_limit(X)
+    {
+      ++comp;
+    }
+
+    // CK3-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 456, i32 [[TL:%[^,]+]])
+
+    // CK3-DAG: [[TL]] = add nsw i32 [[TLA:%[^,]+]], 123
+    // CK3-DAG: [[TLA]] = fptosi float [[TLB:%[^,]+]] to i32
+    // CK3-DAG: [[TLB]] = load float, float* [[TLC:%[^,]+]],
+    // CK3-DAG: [[TLC]] = getelementptr inbounds [[SSI]], [[SSI]]* [[TLD:%[^,]+]], i32 0, i32 1
+    // CK3-DAG: [[TLD]] = load [[SSI]]*, [[SSI]]** %{{.+}},
+
+    // CK3: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}})
+    #pragma omp target
+    #pragma omp teams num_teams(Y) thread_limit((int)b+X)
+    {
+      ++comp;
+    }
+    return comp;
+  }
+};
+
+int teams_template_struct(void) {
+  SS<int, 123, 456> V;
+  return V.foo();
+
+}
+#endif // CK3
+#endif
Index: lib/CodeGen/CodeGenFunction.h
===================================================================
--- lib/CodeGen/CodeGenFunction.h
+++ lib/CodeGen/CodeGenFunction.h
@@ -2216,6 +2216,8 @@
   llvm::Function *EmitCapturedStmt(const CapturedStmt &S, CapturedRegionKind K);
   llvm::Function *GenerateCapturedStmtFunction(const CapturedStmt &S);
   Address GenerateCapturedStmtArgument(const CapturedStmt &S);
+  llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
+                                                     QualType ReturnQTy);
   llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S);
   void GenerateOpenMPCapturedVars(const CapturedStmt &S,
                                   SmallVectorImpl<llvm::Value *> &CapturedVars);
Index: lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- lib/CodeGen/CGStmtOpenMP.cpp
+++ lib/CodeGen/CGStmtOpenMP.cpp
@@ -91,6 +91,12 @@
 
 llvm::Function *
 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
+  return GenerateOpenMPCapturedStmtFunction(S, getContext().VoidTy);
+}
+
+llvm::Function *
+CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
+                                                    QualType ReturnQTy) {
   assert(
       CapturedStmtInfo &&
       "CapturedStmtInfo should be set when generating the captured function");
@@ -140,7 +146,7 @@
   // Create the function declaration.
   FunctionType::ExtInfo ExtInfo;
   const CGFunctionInfo &FuncInfo =
-      CGM.getTypes().arrangeFreeFunctionDeclaration(Ctx.VoidTy, Args, ExtInfo,
+      CGM.getTypes().arrangeFreeFunctionDeclaration(ReturnQTy, Args, ExtInfo,
                                                     /*IsVariadic=*/false);
   llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
 
@@ -152,7 +158,7 @@
     F->addFnAttr(llvm::Attribute::NoUnwind);
 
   // Generate the function.
-  StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(),
+  StartFunction(CD, ReturnQTy, F, FuncInfo, Args, CD->getLocation(),
                 CD->getBody()->getLocStart());
   unsigned Cnt = CD->getContextParamPosition();
   I = S.captures().begin();
@@ -2635,6 +2641,8 @@
 
   llvm::Function *Fn = nullptr;
   llvm::Constant *FnID = nullptr;
+  llvm::Function *NestedNumTeamsFn = nullptr;
+  llvm::Function *NestedThreadLimitFn = nullptr;
 
   // Check if we have any if clause associated with the directive.
   const Expr *IfCond = nullptr;
@@ -2673,15 +2681,21 @@
     ParentName =
         CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CurFuncDecl)));
 
-  CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
-                                                    IsOffloadEntry);
+  CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
+      S, ParentName, Fn, FnID, NestedNumTeamsFn, NestedThreadLimitFn,
+      IsOffloadEntry);
 
-  CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device,
+  CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, NestedNumTeamsFn,
+                                        NestedThreadLimitFn, IfCond, Device,
                                         CapturedVars);
 }
 
-void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) {
-  llvm_unreachable("CodeGen for 'omp teams' is not supported yet.");
+void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
+  LexicalScope Scope(*this, S.getSourceRange());
+  const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
+
+  // FIXME: We should fork teams here instead of just emit the statement.
+  EmitStmt(CS.getCapturedStmt());
 }
 
 void CodeGenFunction::EmitOMPCancellationPointDirective(
Index: lib/CodeGen/CGOpenMPRuntime.h
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.h
+++ lib/CodeGen/CGOpenMPRuntime.h
@@ -166,6 +166,10 @@
     // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
     // *arg_types);
     OMPRTL__tgt_target,
+    // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
+    // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
+    // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
+    OMPRTL__tgt_target_teams,
     // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
     OMPRTL__tgt_register_lib,
     // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
@@ -925,32 +929,42 @@
   /// \param ParentName Name of the function that encloses the target region.
   /// \param OutlinedFn Outlined function value to be defined by this call.
   /// \param OutlinedFnID Outlined function ID value to be defined by this call.
+  /// \param NestedNumTeamsFn Function that returns the number of teams
+  /// associated with a nested temas directive.
+  /// \param NestedThreadLimitFn Function that returns the thread
+  /// limitassociated with a nested temas directive.
   /// \param IsOffloadEntry True if the outlined function is an offload entry.
   /// An oulined function may not be an entry if, e.g. the if clause always
   /// evaluates to false.
   virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D,
                                           StringRef ParentName,
                                           llvm::Function *&OutlinedFn,
                                           llvm::Constant *&OutlinedFnID,
+                                          llvm::Function *&NestedNumTeamsFn,
+                                          llvm::Function *&NestedThreadLimitFn,
                                           bool IsOffloadEntry);
 
   /// \brief Emit the target offloading code associated with \a D. The emitted
   /// code attempts offloading the execution to the device, an the event of
   /// a failure it executes the host version outlined in \a OutlinedFn.
   /// \param D Directive to emit.
   /// \param OutlinedFn Host version of the code to be offloaded.
   /// \param OutlinedFnID ID of host version of the code to be offloaded.
+  /// \param NestedNumTeamsFn Function that return the number of teams of a
+  /// nested teams region, if any.
+  /// \param NestedThreadLimitFn Function that return the thread limit of a
+  /// nested teams region, if any.
   /// \param IfCond Expression evaluated in if clause associated with the target
   /// directive, or null if no if clause is used.
   /// \param Device Expression evaluated in device clause associated with the
   /// target directive, or null if no device clause is used.
   /// \param CapturedVars Values captured in the current region.
-  virtual void emitTargetCall(CodeGenFunction &CGF,
-                              const OMPExecutableDirective &D,
-                              llvm::Value *OutlinedFn,
-                              llvm::Value *OutlinedFnID, const Expr *IfCond,
-                              const Expr *Device,
-                              ArrayRef<llvm::Value *> CapturedVars);
+  virtual void
+  emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
+                 llvm::Value *OutlinedFn, llvm::Value *OutlinedFnID,
+                 llvm::Function *NestedNumTeamsFn,
+                 llvm::Function *NestedThreadLimitFn, const Expr *IfCond,
+                 const Expr *Device, ArrayRef<llvm::Value *> CapturedVars);
 
   /// \brief Emit the target regions enclosed in \a GD function definition or
   /// the function itself in case it is a valid device function. Returns true if
Index: lib/CodeGen/CGOpenMPRuntime.cpp
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.cpp
+++ lib/CodeGen/CGOpenMPRuntime.cpp
@@ -935,6 +935,24 @@
     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
     break;
   }
+  case OMPRTL__tgt_target_teams: {
+    // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
+    // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
+    // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
+    llvm::Type *TypeParams[] = {CGM.Int32Ty,
+                                CGM.VoidPtrTy,
+                                CGM.Int32Ty,
+                                CGM.VoidPtrPtrTy,
+                                CGM.VoidPtrPtrTy,
+                                CGM.SizeTy->getPointerTo(),
+                                CGM.Int32Ty->getPointerTo(),
+                                CGM.Int32Ty,
+                                CGM.Int32Ty};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
+    break;
+  }
   case OMPRTL__tgt_register_lib: {
     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
     QualType ParamTy =
@@ -3717,6 +3735,7 @@
 void CGOpenMPRuntime::emitTargetOutlinedFunction(
     const OMPExecutableDirective &D, StringRef ParentName,
     llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
+    llvm::Function *&NestedNumTeamsFn, llvm::Function *&NestedThreadLimitFn,
     bool IsOffloadEntry) {
 
   assert(!ParentName.empty() && "Invalid target region parent name!");
@@ -3753,11 +3772,13 @@
        << Column;
   }
 
-  CodeGenFunction CGF(CGM, true);
-  CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
-  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
+  {
+    CodeGenFunction CGF(CGM, true);
+    CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
+    CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
 
-  OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
+    OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
+  }
 
   // If this target outline function is not an offload entry, we don't need to
   // register it.
@@ -3787,13 +3808,62 @@
   // Register the information for the entry associated with this target region.
   OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
       DeviceID, FileID, ParentName, Line, Column, OutlinedFn, OutlinedFnID);
+
+  // If the current target region has a teams region enclosed, we need to get
+  // the number of teams and thread limit to pass to the runtime function call
+  // later on. This is done through a function that returns the value. This is
+  // required because the expression is captured in the enclosing target
+  // environment when the teams directive is not combined with target. This only
+  // has to be done for the host.
+  //
+  // FIXME: Accommodate other combined directives with teams when they become
+  // available.
+  if (!CGM.getLangOpts().OpenMPIsDevice)
+    if (auto *TeamsDir = dyn_cast<OMPTeamsDirective>(CS.getCapturedStmt())) {
+      if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
+        auto &&CodeGen = [NTE](CodeGenFunction &CGF) {
+          auto *V = CGF.EmitScalarExpr(NTE->getNumTeams());
+          CGF.Builder.CreateRet(
+              CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
+          CGF.EmitBlock(CGF.createBasicBlock());
+        };
+
+        CodeGenFunction CGF(CGM, true);
+        CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen,
+                                        ".omp_offload.get_num_teams");
+        CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
+
+        NestedNumTeamsFn =
+            CGF.GenerateOpenMPCapturedStmtFunction(CS, CGM.getContext().IntTy);
+        NestedNumTeamsFn->addFnAttr(llvm::Attribute::AlwaysInline);
+      }
+      if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
+        auto &&CodeGen = [TLE](CodeGenFunction &CGF) {
+          auto *V = CGF.EmitScalarExpr(TLE->getThreadLimit());
+          CGF.Builder.CreateRet(
+              CGF.Builder.CreateIntCast(V, CGF.Int32Ty, /*isSigned=*/true));
+          CGF.EmitBlock(CGF.createBasicBlock());
+        };
+
+        CodeGenFunction CGF(CGM, true);
+        CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen,
+                                        ".omp_offload.get_thread_limit");
+        CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
+
+        NestedThreadLimitFn =
+            CGF.GenerateOpenMPCapturedStmtFunction(CS, CGM.getContext().IntTy);
+        NestedThreadLimitFn->addFnAttr(llvm::Attribute::AlwaysInline);
+      }
+    }
   return;
 }
 
 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
                                      const OMPExecutableDirective &D,
                                      llvm::Value *OutlinedFn,
                                      llvm::Value *OutlinedFnID,
+                                     llvm::Function *NestedNumTeamsFn,
+                                     llvm::Function *NestedThreadLimitFn,
                                      const Expr *IfCond, const Expr *Device,
                                      ArrayRef<llvm::Value *> CapturedVars) {
   if (!CGF.HaveInsertPoint())
@@ -3917,8 +3987,9 @@
 
   // Fill up the pointer arrays and transfer execution to the device.
   auto &&ThenGen = [this, &Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes,
-                    hasVLACaptures, Device, OutlinedFnID, OffloadError,
-                    OffloadErrorQType](CodeGenFunction &CGF) {
+                    &CS, hasVLACaptures, Device, OutlinedFnID, OffloadError,
+                    OffloadErrorQType, NestedNumTeamsFn,
+                    NestedThreadLimitFn](CodeGenFunction &CGF) {
     unsigned PointerNumVal = BasePointers.size();
     llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal);
     llvm::Value *BasePointersArray;
@@ -4059,11 +4130,53 @@
     else
       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
 
-    llvm::Value *OffloadingArgs[] = {
-        DeviceID,      OutlinedFnID, PointerNum,   BasePointersArray,
-        PointersArray, SizesArray,   MapTypesArray};
-    auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target),
-                                      OffloadingArgs);
+    // Return value of the runtime offloading call.
+    llvm::Value *Return;
+
+    // If the target region is associated with a teams region (not necessarily
+    // combined) we need to get the value with the number of teams and thread
+    // limit. These expressions can be nested in the target region - in that
+    // case we rely on NestedNumTeamsFn and NestedThreadLimitFn to retrieve the
+    // right values. If teams is not nested (is in a combined directive with
+    // target) we can emit the expressions directly.
+
+    // FIXME: We need to accommodate combined directives here when they become
+    // supported.
+    if (auto *TD = dyn_cast<OMPTeamsDirective>(CS.getCapturedStmt())) {
+      llvm::Value *NumTeams = nullptr;
+      llvm::Value *ThreadLimit = nullptr;
+      if (TD->getSingleClause<OMPNumTeamsClause>()) {
+        assert(NestedNumTeamsFn && "Helper function is required to get the "
+                                   "number of teams of an enclosed teams "
+                                   "directive.");
+        NumTeams = CGF.Builder.CreateCall(NestedNumTeamsFn, BasePointers);
+      } else
+        NumTeams = CGF.Builder.getInt32(0);
+      if (TD->getSingleClause<OMPThreadLimitClause>()) {
+        assert(NestedThreadLimitFn && "Helper function is required to get the "
+                                      "thread limit of an enclosed teams "
+                                      "directive.");
+        ThreadLimit = CGF.Builder.CreateCall(NestedThreadLimitFn, BasePointers);
+      } else
+        ThreadLimit = CGF.Builder.getInt32(0);
+
+      assert(NumTeams && ThreadLimit && "Thread limit and number of teams "
+                                        "should be defined for a teams "
+                                        "region.");
+
+      llvm::Value *OffloadingArgs[] = {
+          DeviceID,          OutlinedFnID,  PointerNum,
+          BasePointersArray, PointersArray, SizesArray,
+          MapTypesArray,     NumTeams,      ThreadLimit};
+      Return = CGF.EmitRuntimeCall(
+          createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs);
+    } else {
+      llvm::Value *OffloadingArgs[] = {
+          DeviceID,      OutlinedFnID, PointerNum,   BasePointersArray,
+          PointersArray, SizesArray,   MapTypesArray};
+      Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target),
+                                   OffloadingArgs);
+    }
 
     CGF.EmitStoreOfScalar(Return, OffloadError);
   };
@@ -4133,7 +4246,9 @@
 
     llvm::Function *Fn;
     llvm::Constant *Addr;
-    emitTargetOutlinedFunction(*E, ParentName, Fn, Addr,
+    llvm::Function *NTFn;
+    llvm::Function *TLFn;
+    emitTargetOutlinedFunction(*E, ParentName, Fn, Addr, NTFn, TLFn,
                                /*isOffloadEntry=*/true);
     assert(Fn && Addr && "Target region emission failed.");
     return;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to