sfantao updated this revision to Diff 49331.
sfantao marked an inline comment as done.
sfantao added a comment.

Emit num teams and thread limit using the inlined directives machinery.


http://reviews.llvm.org/D17019

Files:
  lib/CodeGen/CGOpenMPRuntime.cpp
  lib/CodeGen/CGOpenMPRuntime.h
  lib/CodeGen/CGStmtOpenMP.cpp
  lib/CodeGen/CodeGenFunction.h
  test/OpenMP/teams_codegen.cpp

Index: test/OpenMP/teams_codegen.cpp
===================================================================
--- /dev/null
+++ test/OpenMP/teams_codegen.cpp
@@ -0,0 +1,194 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+// Test host codegen.
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32
+#ifdef CK1
+
+int Gbla;
+long long Gblb;
+
+// CK1-LABEL: teams_argument_global_local
+int teams_argument_global_local(int a){
+  int comp = 1;
+
+  int la = 23;
+  float lc = 25.0;
+
+  // CK1: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+  // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
+  #pragma omp target
+  #pragma omp teams
+  {
+    ++comp;
+  }
+
+  // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 0)
+  // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
+
+  // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
+  #pragma omp target
+  #pragma omp teams num_teams(la)
+  {
+    ++comp;
+  }
+
+  // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 [[NT:%[^,]+]])
+  // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
+
+  // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
+  #pragma omp target
+  #pragma omp teams thread_limit(la)
+  {
+    ++comp;
+  }
+
+  // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+
+  // CK1-DAG: [[NT]] = add nsw i32 [[NTA:%[^,]+]], [[NTB:%[^,]+]]
+  // CK1-DAG: [[NTA]] = load i32, i32* @Gbla,
+  // CK1-DAG: [[NTB]] = load i32, i32* %{{.+}},
+
+  // CK1-DAG: [[TL]] = trunc i64 [[TLA:%[^,]+]] to i32
+  // CK1-DAG: [[TLA]] = add nsw i64 [[TLB:%[^,]+]], [[TLC:%[^,]+]]
+  // CK1-DAG: [[TLC]] = fptosi float [[TLD:%[^,]+]] to i64
+  // CK1-DAG: [[TLD]] = load float, float* %{{.+}},
+  // CK1-DAG: [[TLB]] = load i64, i64* @Gblb,
+
+  // CK1: call void @{{.+}}(i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}})
+  #pragma omp target
+  #pragma omp teams num_teams(Gbla+a) thread_limit(Gblb+(long long)lc)
+  {
+    ++comp;
+  }
+
+  return comp;
+}
+
+#endif // CK1
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32
+#ifdef CK2
+
+// CK2-DAG: [[SSI:%.+]] = type { i32, float }
+// CK2-DAG: [[SSL:%.+]] = type { i64, float }
+template <typename T>
+struct SS{
+  T a;
+  float b;
+};
+
+SS<int> Gbla;
+SS<long long> Gblb;
+
+// CK2-LABEL: teams_template_arg
+int teams_template_arg(void) {
+  int comp = 1;
+
+  SS<int> la;
+  SS<long long> lb;
+
+  // CK2-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+
+  // CK2-DAG: [[NT]] = load i32, i32* getelementptr inbounds ([[SSI]], [[SSI]]* @Gbla, i32 0, i32 0)
+
+  // CK2-DAG: [[TL]] = trunc i64 [[TLA:%[^,]+]] to i32
+  // CK2-DAG: [[TLA]] = fptosi float [[TLB:%[^,]+]] to i64
+  // CK2-DAG: [[TLB]] = load float, float* [[TLC:%[^,]+]],
+  // CK2-DAG: [[TLC]] = getelementptr inbounds [[SSI]], [[SSI]]* %{{.+}}, i32 0, i32 1
+
+  // CK2: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}}, {{.+}} {{.+}})
+  #pragma omp target
+  #pragma omp teams num_teams(Gbla.a) thread_limit((long long)la.b)
+  {
+    ++comp;
+  }
+
+  // CK2-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+
+  // CK2-DAG: [[TL]] = trunc i64 [[TLD:%[^,]+]] to i32
+  // CK2-DAG: [[TLD]] = load i64, i64* getelementptr inbounds ([[SSL]], [[SSL]]* @Gblb, i32 0, i32 0),
+
+  // CK2-DAG: [[NT]] = trunc i64 [[NTA:%[^,]+]] to i32
+  // CK2-DAG: [[NTA]] = fptosi float [[NTB:%[^,]+]] to i64
+  // CK2-DAG: [[NTB]] = load float, float* [[NTC:%[^,]+]],
+  // CK2-DAG: [[NTC]] = getelementptr inbounds [[SSL]], [[SSL]]* %{{.+}}, i32 0, i32 1
+
+  // CK2: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}}, {{.+}} {{.+}})
+  #pragma omp target
+  #pragma omp teams num_teams((long long)lb.b) thread_limit(Gblb.a)
+  {
+    ++comp;
+  }
+  return comp;
+}
+#endif // CK2
+
+// Test host codegen.
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32
+#ifdef CK3
+
+// CK3: [[SSI:%.+]] = type { i32, float }
+// CK3-LABEL: teams_template_struct
+
+template <typename T, int X, long long Y>
+struct SS{
+  T a;
+  float b;
+
+  int foo(void) {
+    int comp = 1;
+
+    // CK3-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 123)
+
+    // CK3-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
+    // CK3-DAG: [[NTA]] = getelementptr inbounds [[SSI]], [[SSI]]* [[NTB:%[^,]+]], i32 0, i32 0
+    // CK3-DAG: [[NTB]] = load [[SSI]]*, [[SSI]]** %{{.+}},
+
+    // CK3: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}})
+    #pragma omp target
+    #pragma omp teams num_teams(a) thread_limit(X)
+    {
+      ++comp;
+    }
+
+    // CK3-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 456, i32 [[TL:%[^,]+]])
+
+    // CK3-DAG: [[TL]] = add nsw i32 [[TLA:%[^,]+]], 123
+    // CK3-DAG: [[TLA]] = fptosi float [[TLB:%[^,]+]] to i32
+    // CK3-DAG: [[TLB]] = load float, float* [[TLC:%[^,]+]],
+    // CK3-DAG: [[TLC]] = getelementptr inbounds [[SSI]], [[SSI]]* [[THIS:%[^,]+]], i32 0, i32 1
+
+    // CK3: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}})
+    #pragma omp target
+    #pragma omp teams num_teams(Y) thread_limit((int)b+X)
+    {
+      ++comp;
+    }
+    return comp;
+  }
+};
+
+int teams_template_struct(void) {
+  SS<int, 123, 456> V;
+  return V.foo();
+
+}
+#endif // CK3
+#endif
Index: lib/CodeGen/CodeGenFunction.h
===================================================================
--- lib/CodeGen/CodeGenFunction.h
+++ lib/CodeGen/CodeGenFunction.h
@@ -2223,6 +2223,16 @@
   llvm::Function *GenerateCapturedStmtFunction(const CapturedStmt &S);
   Address GenerateCapturedStmtArgument(const CapturedStmt &S);
   llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S);
+  /// \brief Start an OpenMP inlined region by forcing all the variables
+  /// captured to be defined as local variables at the current scope. This is
+  /// required for regions that capture global variables that have to be emitted
+  /// as inlined regions.
+  void StartOpenMPInlinedCapturedRegion(
+      const CapturedStmt &S, SmallVector<const VarDecl *, 4> &ForcedLocalVars);
+  /// \brief Close an OpenMP inlined region by restoring the local variable
+  /// information that existed when the region was started.
+  void
+  CloseOpenMPInlinedCapturedRegion(ArrayRef<const VarDecl *> ForcedLocalVars);
   void GenerateOpenMPCapturedVars(const CapturedStmt &S,
                                   SmallVectorImpl<llvm::Value *> &CapturedVars);
   void emitOMPSimpleStore(LValue LVal, RValue RVal, QualType RValTy,
Index: lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- lib/CodeGen/CGStmtOpenMP.cpp
+++ lib/CodeGen/CGStmtOpenMP.cpp
@@ -239,6 +239,39 @@
   return F;
 }
 
+/// \brief Start an OpenMP inlined region by forcing all the variables captured
+/// to be defined as local variables at the current scope. This is required for
+/// regions that capture global variables that have to be emitted as inlined
+/// regions.
+void CodeGenFunction::StartOpenMPInlinedCapturedRegion(
+    const CapturedStmt &S, SmallVector<const VarDecl *, 4> &ForcedLocalVars) {
+  auto I = S.capture_init_begin();
+  auto CI = S.capture_begin();
+  auto CE = S.capture_end();
+  for (; CI != CE; ++CI, ++I) {
+    if (!CI->capturesVariable() && !CI->capturesVariableByCopy())
+      continue;
+
+    auto FoundDecl = LocalDeclMap.find(CI->getCapturedVar());
+    // If it already exist, we do not need to force it.
+    if (FoundDecl != LocalDeclMap.end())
+      continue;
+
+    setAddrOfLocalVar(CI->getCapturedVar(), EmitLValue(*I).getAddress());
+  }
+}
+
+/// \brief Close an OpenMP inlined region by restoring the local variable
+/// information that existed when the region was started.
+void CodeGenFunction::CloseOpenMPInlinedCapturedRegion(
+    ArrayRef<const VarDecl *> ForcedLocalVars) {
+  for (auto *VD : ForcedLocalVars) {
+    assert(LocalDeclMap.find(VD) != LocalDeclMap.end() &&
+           "Variable expected to exist in the local declarations map.");
+    LocalDeclMap.erase(VD);
+  }
+}
+
 //===----------------------------------------------------------------------===//
 //                              OpenMP Directive Emission
 //===----------------------------------------------------------------------===//
@@ -2667,8 +2700,12 @@
                                         CapturedVars);
 }
 
-void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) {
-  llvm_unreachable("CodeGen for 'omp teams' is not supported yet.");
+void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
+  LexicalScope Scope(*this, S.getSourceRange());
+  const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
+
+  // FIXME: We should fork teams here instead of just emit the statement.
+  EmitStmt(CS.getCapturedStmt());
 }
 
 void CodeGenFunction::EmitOMPCancellationPointDirective(
Index: lib/CodeGen/CGOpenMPRuntime.h
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.h
+++ lib/CodeGen/CGOpenMPRuntime.h
@@ -34,6 +34,7 @@
 } // namespace llvm
 
 namespace clang {
+class CapturedStmt;
 class Expr;
 class GlobalDecl;
 class OMPExecutableDirective;
@@ -661,10 +662,14 @@
   /// \param CodeGen Code generation sequence for the \a D directive.
   /// \param HasCancel true if region has inner cancel directive, false
   /// otherwise.
+  /// \param CS the captured statement associated with the region. It is only
+  /// required if the local variables information of the enclosing function may
+  /// not match with the captured variables.
   virtual void emitInlinedDirective(CodeGenFunction &CGF,
                                     OpenMPDirectiveKind InnermostKind,
                                     const RegionCodeGenTy &CodeGen,
-                                    bool HasCancel = false);
+                                    bool HasCancel = false,
+                                    const CapturedStmt *CS = nullptr);
   /// \brief Emit a code for reduction clause. Next code should be emitted for
   /// reduction:
   /// \code
Index: lib/CodeGen/CGOpenMPRuntime.cpp
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.cpp
+++ lib/CodeGen/CGOpenMPRuntime.cpp
@@ -255,21 +255,38 @@
 /// \brief RAII for emitting code of OpenMP constructs.
 class InlinedOpenMPRegionRAII {
   CodeGenFunction &CGF;
+  /// \brief Saves the varaibles that were forced to be local in the current
+  /// inlined region.
+  SmallVector<const VarDecl *, 4> ForcedLocalVars;
 
 public:
-  /// \brief Constructs region for combined constructs.
+  /// \brief Constructs inlined region. Mostly used for combined constructs. If
+  /// a captured statement is provided it also ensures the captured variables
+  /// are all defined in the scope of the enclosing function. This is typical
+  /// used for regions that make local instances of global variables, e.g.
+  /// target regions.
   /// \param CodeGen Code generation sequence for combined directives. Includes
   /// a list of functions used for code generation of implicitly inlined
   /// regions.
   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
-                          OpenMPDirectiveKind Kind, bool HasCancel)
+                          OpenMPDirectiveKind Kind, bool HasCancel,
+                          const CapturedStmt *CS)
       : CGF(CGF) {
     // Start emission for the construct.
     CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
         CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
+
+    // Ensures that all the captures are local in the current inlined region.
+    if (CS)
+      CGF.StartOpenMPInlinedCapturedRegion(*CS, ForcedLocalVars);
   }
 
   ~InlinedOpenMPRegionRAII() {
+    // Restore the local variable information if we have anything forced in this
+    // inlined region.
+    if (!ForcedLocalVars.empty())
+      CGF.CloseOpenMPInlinedCapturedRegion(ForcedLocalVars);
+
     // Restore original CapturedStmtInfo only if we're done with code emission.
     auto *OldCSI =
         cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
@@ -481,6 +498,10 @@
   // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
   // *arg_types);
   OMPRTL__tgt_target,
+  // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
+  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
+  // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
+  OMPRTL__tgt_target_teams,
   // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
   OMPRTL__tgt_register_lib,
   // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
@@ -1153,6 +1174,24 @@
     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
     break;
   }
+  case OMPRTL__tgt_target_teams: {
+    // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
+    // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
+    // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
+    llvm::Type *TypeParams[] = {CGM.Int32Ty,
+                                CGM.VoidPtrTy,
+                                CGM.Int32Ty,
+                                CGM.VoidPtrPtrTy,
+                                CGM.VoidPtrPtrTy,
+                                CGM.SizeTy->getPointerTo(),
+                                CGM.Int32Ty->getPointerTo(),
+                                CGM.Int32Ty,
+                                CGM.Int32Ty};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
+    break;
+  }
   case OMPRTL__tgt_register_lib: {
     // Build void __tgt_register_lib(__tgt_bin_desc *desc);
     QualType ParamTy =
@@ -3766,10 +3805,11 @@
 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
                                            OpenMPDirectiveKind InnerKind,
                                            const RegionCodeGenTy &CodeGen,
-                                           bool HasCancel) {
+                                           bool HasCancel,
+                                           const CapturedStmt *CS) {
   if (!CGF.HaveInsertPoint())
     return;
-  InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
+  InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, CS);
   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
 }
 
@@ -3972,6 +4012,114 @@
       DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID);
 }
 
+/// \brief Emit the num_teams clause of an enclosed teams directive at the
+/// target region scope. If there is no teams directive associated with the
+/// target directive, or if there is no num_teams clause associated with the
+/// enclosed teams directive, return nullptr.
+static llvm::Value *
+emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
+                                     CodeGenFunction &CGF,
+                                     const OMPExecutableDirective &D) {
+
+  assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
+                                              "teams directive expected to be "
+                                              "emitted only for the host!");
+
+  // FIXME: For the moment we do not support combined directives with target and
+  // teams, so we do not expect to get any num_teams clause in the provided
+  // directive. Once we support that, this assertion can be replaced by the
+  // actual emission of the clause expression.
+  assert(D.getSingleClause<OMPNumTeamsClause>() == nullptr &&
+         "Not expecting clause in directive.");
+
+  // If the current target region has a teams region enclosed, we need to get
+  // the number of teams to pass to the runtime function call. This is done
+  // by generating the expression in a inlined region. This is required because
+  // the expression is captured in the enclosing target environment when the
+  // teams directive is not combined with target.
+
+  const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
+
+  // FIXME: Accommodate other combined directives with teams when they become
+  // available.
+  if (auto *TeamsDir = dyn_cast<OMPTeamsDirective>(CS.getCapturedStmt())) {
+    if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
+      llvm::Value *NumTeams = nullptr;
+      auto &&CodeGen = [NTE, &NumTeams](CodeGenFunction &CGF) {
+        NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
+        NumTeams =
+            CGF.Builder.CreateIntCast(NumTeams, CGF.Int32Ty, /*IsSigned=*/true);
+      };
+
+      OMPRuntime.emitInlinedDirective(CGF, OMPD_target, CodeGen,
+                                      /*HasCancel=*/false, &CS);
+      assert(NumTeams && "Invalid number of teams value.");
+      return NumTeams;
+    }
+
+    // If we have an enclosed teams directive but no num_teams clause we use
+    // the default value 0.
+    return CGF.Builder.getInt32(0);
+  }
+
+  // No teams associated with the directive.
+  return nullptr;
+}
+
+/// \brief Emit the thread_limit clause of an enclosed teams directive at the
+/// target region scope. If there is no teams directive associated with the
+/// target directive, or if there is no thread_limit clause associated with the
+/// enclosed teams directive, return nullptr.
+static llvm::Value *
+emitThreadLimitClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
+                                        CodeGenFunction &CGF,
+                                        const OMPExecutableDirective &D) {
+
+  assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
+                                              "teams directive expected to be "
+                                              "emitted only for the host!");
+
+  // FIXME: For the moment we do not support combined directives with target and
+  // teams, so we do not expect to get any thread_limit clause in the provided
+  // directive. Once we support that, this assertion can be replaced by the
+  // actual emission of the clause expression.
+  assert(D.getSingleClause<OMPThreadLimitClause>() == nullptr &&
+         "Not expecting clause in directive.");
+
+  // If the current target region has a teams region enclosed, we need to get
+  // the thread limit to pass to the runtime function call. This is done
+  // by generating the expression in a inlined region. This is required because
+  // the expression is captured in the enclosing target environment when the
+  // teams directive is not combined with target.
+
+  const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
+
+  // FIXME: Accommodate other combined directives with teams when they become
+  // available.
+  if (auto *TeamsDir = dyn_cast<OMPTeamsDirective>(CS.getCapturedStmt())) {
+    if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
+      llvm::Value *ThreadLimit = nullptr;
+      auto &&CodeGen = [TLE, &ThreadLimit](CodeGenFunction &CGF) {
+        ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit());
+        ThreadLimit = CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty,
+                                                /*IsSigned=*/true);
+      };
+
+      OMPRuntime.emitInlinedDirective(CGF, OMPD_target, CodeGen,
+                                      /*HasCancel=*/false, &CS);
+      assert(ThreadLimit && "Invalid number of teams value.");
+      return ThreadLimit;
+    }
+
+    // If we have an enclosed teams directive but no thread_limit clause we use
+    // the default value 0.
+    return CGF.Builder.getInt32(0);
+  }
+
+  // No teams associated with the directive.
+  return nullptr;
+}
+
 void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
                                      const OMPExecutableDirective &D,
                                      llvm::Value *OutlinedFn,
@@ -4100,7 +4248,7 @@
   // Fill up the pointer arrays and transfer execution to the device.
   auto &&ThenGen = [this, &Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes,
                     hasVLACaptures, Device, OutlinedFnID, OffloadError,
-                    OffloadErrorQType](CodeGenFunction &CGF) {
+                    OffloadErrorQType, &D](CodeGenFunction &CGF) {
     unsigned PointerNumVal = BasePointers.size();
     llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal);
     llvm::Value *BasePointersArray;
@@ -4240,11 +4388,28 @@
     else
       DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
 
-    llvm::Value *OffloadingArgs[] = {
-        DeviceID,      OutlinedFnID, PointerNum,   BasePointersArray,
-        PointersArray, SizesArray,   MapTypesArray};
-    auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target),
-                                      OffloadingArgs);
+    // Return value of the runtime offloading call.
+    llvm::Value *Return;
+
+    auto *NumTeams = emitNumTeamsClauseForTargetDirective(*this, CGF, D);
+    auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(*this, CGF, D);
+
+    if (NumTeams) {
+      assert(ThreadLimit && "Thread limit expression should be available along "
+                            "with number of teams.");
+      llvm::Value *OffloadingArgs[] = {
+          DeviceID,          OutlinedFnID,  PointerNum,
+          BasePointersArray, PointersArray, SizesArray,
+          MapTypesArray,     NumTeams,      ThreadLimit};
+      Return = CGF.EmitRuntimeCall(
+          createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs);
+    } else {
+      llvm::Value *OffloadingArgs[] = {
+          DeviceID,      OutlinedFnID, PointerNum,   BasePointersArray,
+          PointersArray, SizesArray,   MapTypesArray};
+      Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target),
+                                   OffloadingArgs);
+    }
 
     CGF.EmitStoreOfScalar(Return, OffloadError);
   };
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to