[PATCH] D29910: [OpenMP] Specialize default schedule on a worksharing loop on the NVPTX device.

Arpith Jacob via Phabricator via cfe-commits Tue, 14 Feb 2017 13:23:33 -0800

arpith-jacob updated this revision to Diff 88423.
arpith-jacob added a comment.


Hi Alexey,

Thank you for reviewing this patch.

> I don't like the idea of adding some kind of default scheduling, that is not 
> defined in standard in Sema

Actually, "default scheduling" is defined in the OpenMP spec.  It is called 
"def-sched-var" and controls the scheduling of loops.  It's value is 
implementation (compiler) defined.  So why not allow the target device to 
choose this value in the compiler?

  http://www.openmp.org/wp-content/uploads/openmp-4.5.pdf
  
  Section 2.3.1: ICV Descriptions, pg 46:
  def-sched-var - controls the implementation defined default scheduling of 
loop regions. There is one copy of this ICV per device.
  
  Section 2.3.2: ICV Initialization, pg 47:
  Table 2.1:
  def-sched-var   No environment variable      Initial value is implementation 
defined
  
  Section 2.7.1.1: Determining the Schedule of a Worksharing Loop
  When execution encounters a loop directive, the schedule clause (if any) on 
the directive, and the run-sched-var and def-sched-var ICVs are used to 
determine how loop iterations are assigned to threads. See Section 2.3 on page 
36 for details of how the values of the ICVs are determined. If the loop 
directive does not have a schedule clause then the current value of the 
def-sched-var ICV determines the schedule.

I've reworked the patch to handle the default scheduling in Sema and removed 
the function from OpenMPKind.cpp.  Please let me know if this looks good.

I can rewrite the patch as you suggested, involving NVPTX specific RT, but I 
think the code will look quite ugly.


https://reviews.llvm.org/D29910

Files:
  include/clang/AST/StmtOpenMP.h
  include/clang/Basic/OpenMPKinds.h
  lib/AST/StmtOpenMP.cpp
  lib/CodeGen/CGStmtOpenMP.cpp
  lib/Sema/SemaOpenMP.cpp
  test/OpenMP/nvptx_coalesced_scheduling_codegen.cpp

Index: test/OpenMP/nvptx_coalesced_scheduling_codegen.cpp
===================================================================
--- /dev/null
+++ test/OpenMP/nvptx_coalesced_scheduling_codegen.cpp
@@ -0,0 +1,322 @@
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// Check that the execution mode of the target regions on the gpu is set to the right mode.
+// CHECK-DAG: {{@__omp_offloading_.+l19}}_exec_mode = weak constant i8 0
+
+template<typename tx>
+tx ftemplate() {
+  tx a[100];
+  tx b[10][10];
+
+  #pragma omp target parallel
+  {
+    #pragma omp for
+    for (int i = 0; i < 99; i++) {
+      a[i] = 1;
+    }
+
+    #pragma omp for schedule(auto)
+    for (int i = 0; i < 98; i++) {
+      a[i] = 2;
+    }
+
+    #pragma omp for schedule(static,1)
+    for (int i = 0; i < 97; i++) {
+      a[i] = 3;
+    }
+
+    #pragma omp for schedule(static,2)
+    for (int i = 0; i < 96; i++) {
+      a[i] = 1;
+    }
+
+    #pragma omp for schedule(static)
+    for (int i = 0; i < 95; i++) {
+      a[i] = 1;
+    }
+
+    #pragma omp for schedule(auto) ordered
+    for (int i = 0; i < 94; i++) {
+      a[i] = 1;
+    }
+
+    #pragma omp for schedule(runtime)
+    for (int i = 0; i < 93; i++) {
+      a[i] = 1;
+    }
+
+    #pragma omp for schedule(dynamic)
+    for (int i = 0; i < 92; i++) {
+      a[i] = 1;
+    }
+
+    #pragma omp for schedule(guided)
+    for (int i = 0; i < 91; i++) {
+      a[i] = 1;
+    }
+  }
+
+  return a[0] + b[9][9];
+}
+
+int bar(){
+  int a = 0;
+
+  a += ftemplate<int>();
+
+  return a;
+}
+
+  // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l19}}(
+  // CHECK: call void @__kmpc_spmd_kernel_init(
+  // CHECK: br label {{%?}}[[EXEC:.+]]
+  //
+  // CHECK: [[EXEC]]
+  // CHECK: {{call|invoke}} void [[OP1:@.+]](i32*
+  // CHECK: br label {{%?}}[[DONE:.+]]
+  //
+  // CHECK: [[DONE]]
+  // CHECK: call void @__kmpc_spmd_kernel_deinit()
+  // CHECK: br label {{%?}}[[EXIT:.+]]
+  //
+  // CHECK: [[EXIT]]
+  // CHECK: ret void
+  // CHECK: }
+
+  // CHECK: define internal void [[OP1]](
+
+  // No schedule clause.
+  //
+  // CHECK: store i32 0, i32* [[LB_PTR:%.+]], align
+  // CHECK: store i32 98, i32* [[UB_PTR:%.+]], align
+  // CHECK: store i32 1, i32* [[ST_PTR:%.+]], align
+  // CHECK: call void @__kmpc_for_static_init_4(%ident_t* {{@.+}}, i32 {{%.+}}, i32 33, i32* {{%.+}}, i32* [[LB_PTR]], i32* [[UB_PTR]], i32* [[ST_PTR]], i32 1, i32 1)
+  // CHECK: [[LB:%.+]] = load i32, i32* [[LB_PTR]], align
+  // CHECK: store i32 [[LB]], i32* [[IV_PTR:%.+]], align
+  // CHECK: br label {{%?}}[[FOR_COND:.+]]
+  //
+  // CHECK: [[FOR_COND]]
+  // CHECK: [[IV:%.+]] = load i32, i32* [[IV_PTR]], align
+  // CHECK: [[CMP:%.+]] = icmp slt i32 [[IV]], 99
+  // CHECK: br i1 [[CMP]], label {{%?}}[[FOR_BODY:.+]], label {{%?}}[[FOR_END:.+]]
+  //
+  // [[FOR_BODY]]
+  // CHECK: [[IV:%.+]] = load i32, i32* [[IV_PTR]], align
+  // CHECK: [[MUL:%.+]] = mul nsw i32 [[IV]], 1
+  // CHECK: [[ADD:%.+]] = add nsw i32 0, [[MUL]]
+  // CHECK: store i32 [[ADD]], i32* [[I_PTR:%.+]], align
+  // CHECK: [[I:%.+]] = load i32, i32* [[I_PTR]], align
+  // CHECK-32: [[ELEM_PTR:%.+]] = getelementptr inbounds [100 x i32], [100 x i32]* {{%.+}}, i32 0, i32 [[I]]
+  // CHECK-64: [[IDX:%.+]] = sext i32 [[I]] to i64
+  // CHECK-64: [[ELEM_PTR:%.+]] = getelementptr inbounds [100 x i32], [100 x i32]* {{%.+}}, i64 0, i64 [[IDX]]
+  // CHECK: store i32 1, i32* [[ELEM_PTR]], align
+  // CHECK: br label {{%?}}[[FOR_CONT:.+]]
+  //
+  // CHECK: [[FOR_CONT]]
+  // CHECK: br label {{%?}}[[FOR_INC:.+]]
+  //
+  // CHECK: [[FOR_INC]]
+  // CHECK: [[IV:%.+]] = load i32, i32* [[IV_PTR]], align
+  // CHECK: [[ST:%.+]] = load i32, i32* [[ST_PTR]], align
+  // CHECK: [[ADD:%.+]] = add nsw i32 [[IV]], [[ST]]
+  // CHECK: store i32 [[ADD]], i32* [[IV_PTR]], align
+  // CHECK: br label {{%?}}[[FOR_COND]]
+  //
+  // CHECK: [[FOR_END]]
+
+
+
+  // schedule(auto) clause.
+  //
+  // CHECK: store i32 0, i32* [[LB_PTR:%.+]], align
+  // CHECK: store i32 97, i32* [[UB_PTR:%.+]], align
+  // CHECK: store i32 1, i32* [[ST_PTR:%.+]], align
+  // CHECK: call void @__kmpc_for_static_init_4(%ident_t* {{@.+}}, i32 {{%.+}}, i32 33, i32* {{%.+}}, i32* [[LB_PTR]], i32* [[UB_PTR]], i32* [[ST_PTR]], i32 1, i32 1)
+  // CHECK: [[LB:%.+]] = load i32, i32* [[LB_PTR]], align
+  // CHECK: store i32 [[LB]], i32* [[IV_PTR:%.+]], align
+  // CHECK: br label {{%?}}[[FOR_COND:.+]]
+  //
+  // CHECK: [[FOR_COND]]
+  // CHECK: [[IV:%.+]] = load i32, i32* [[IV_PTR]], align
+  // CHECK: [[CMP:%.+]] = icmp slt i32 [[IV]], 98
+  // CHECK: br i1 [[CMP]], label {{%?}}[[FOR_BODY:.+]], label {{%?}}[[FOR_END:.+]]
+  //
+  // [[FOR_BODY]]
+  // CHECK: [[IV:%.+]] = load i32, i32* [[IV_PTR]], align
+  // CHECK: [[MUL:%.+]] = mul nsw i32 [[IV]], 1
+  // CHECK: [[ADD:%.+]] = add nsw i32 0, [[MUL]]
+  // CHECK: store i32 [[ADD]], i32* [[I_PTR:%.+]], align
+  // CHECK: [[I:%.+]] = load i32, i32* [[I_PTR]], align
+  // CHECK-32: [[ELEM_PTR:%.+]] = getelementptr inbounds [100 x i32], [100 x i32]* {{%.+}}, i32 0, i32 [[I]]
+  // CHECK-64: [[IDX:%.+]] = sext i32 [[I]] to i64
+  // CHECK-64: [[ELEM_PTR:%.+]] = getelementptr inbounds [100 x i32], [100 x i32]* {{%.+}}, i64 0, i64 [[IDX]]
+  // CHECK: store i32 2, i32* [[ELEM_PTR]], align
+  // CHECK: br label {{%?}}[[FOR_CONT:.+]]
+  //
+  // CHECK: [[FOR_CONT]]
+  // CHECK: br label {{%?}}[[FOR_INC:.+]]
+  //
+  // CHECK: [[FOR_INC]]
+  // CHECK: [[IV:%.+]] = load i32, i32* [[IV_PTR]], align
+  // CHECK: [[ST:%.+]] = load i32, i32* [[ST_PTR]], align
+  // CHECK: [[ADD:%.+]] = add nsw i32 [[IV]], [[ST]]
+  // CHECK: store i32 [[ADD]], i32* [[IV_PTR]], align
+  // CHECK: br label {{%?}}[[FOR_COND]]
+  //
+  // CHECK: [[FOR_END]]
+
+
+
+  // schedule(static,1) clause.
+  //
+  // CHECK: store i32 0, i32* [[LB_PTR:%.+]], align
+  // CHECK: store i32 96, i32* [[UB_PTR:%.+]], align
+  // CHECK: store i32 1, i32* [[ST_PTR:%.+]], align
+  // CHECK: call void @__kmpc_for_static_init_4(%ident_t* {{@.+}}, i32 {{%.+}}, i32 33, i32* {{%.+}}, i32* [[LB_PTR]], i32* [[UB_PTR]], i32* [[ST_PTR]], i32 1, i32 1)
+  // CHECK: [[LB:%.+]] = load i32, i32* [[LB_PTR]], align
+  // CHECK: store i32 [[LB]], i32* [[IV_PTR:%.+]], align
+  // CHECK: br label {{%?}}[[FOR_COND:.+]]
+  //
+  // CHECK: [[FOR_COND]]
+  // CHECK: [[IV:%.+]] = load i32, i32* [[IV_PTR]], align
+  // CHECK: [[CMP:%.+]] = icmp slt i32 [[IV]], 97
+  // CHECK: br i1 [[CMP]], label {{%?}}[[FOR_BODY:.+]], label {{%?}}[[FOR_END:.+]]
+  //
+  // [[FOR_BODY]]
+  // CHECK: [[IV:%.+]] = load i32, i32* [[IV_PTR]], align
+  // CHECK: [[MUL:%.+]] = mul nsw i32 [[IV]], 1
+  // CHECK: [[ADD:%.+]] = add nsw i32 0, [[MUL]]
+  // CHECK: store i32 [[ADD]], i32* [[I_PTR:%.+]], align
+  // CHECK: [[I:%.+]] = load i32, i32* [[I_PTR]], align
+  // CHECK-32: [[ELEM_PTR:%.+]] = getelementptr inbounds [100 x i32], [100 x i32]* {{%.+}}, i32 0, i32 [[I]]
+  // CHECK-64: [[IDX:%.+]] = sext i32 [[I]] to i64
+  // CHECK-64: [[ELEM_PTR:%.+]] = getelementptr inbounds [100 x i32], [100 x i32]* {{%.+}}, i64 0, i64 [[IDX]]
+  // CHECK: store i32 3, i32* [[ELEM_PTR]], align
+  // CHECK: br label {{%?}}[[FOR_CONT:.+]]
+  //
+  // CHECK: [[FOR_CONT]]
+  // CHECK: br label {{%?}}[[FOR_INC:.+]]
+  //
+  // CHECK: [[FOR_INC]]
+  // CHECK: [[IV:%.+]] = load i32, i32* [[IV_PTR]], align
+  // CHECK: [[ST:%.+]] = load i32, i32* [[ST_PTR]], align
+  // CHECK: [[ADD:%.+]] = add nsw i32 [[IV]], [[ST]]
+  // CHECK: store i32 [[ADD]], i32* [[IV_PTR]], align
+  // CHECK: br label {{%?}}[[FOR_COND]]
+  //
+  // CHECK: [[FOR_END]]
+
+
+
+  // schedule(static,2) clause.  Non-coalesced codegen.
+  //
+  // CHECK: store i32 0, i32* [[LB_PTR:%.+]], align
+  // CHECK: store i32 95, i32* [[UB_PTR:%.+]], align
+  // CHECK: store i32 1, i32* [[ST_PTR:%.+]], align
+  // CHECK: call void @__kmpc_for_static_init_4(%ident_t* {{@.+}}, i32 {{%.+}}, i32 33, i32* {{%.+}}, i32* [[LB_PTR]], i32* [[UB_PTR]], i32* [[ST_PTR]], i32 1, i32 2)
+  // CHECK: br label {{%?}}[[DISPATCH_COND:.+]]
+  //
+  // CHECK: [[DISPATCH_COND]]
+  // CHECK: [[UB:%.+]] = load i32, i32* [[UB_PTR]], align
+  // CHECK: = icmp sgt i32 [[UB]], 95
+  //
+  // CHECK: [[LB:%.+]] = load i32, i32* [[LB_PTR]], align
+  // CHECK: store i32 [[LB]], i32* [[IV_PTR:%.+]], align
+  //
+  // CHECK:  = getelementptr
+  //
+  // CHECK: [[IV:%.+]] = load i32, i32* [[IV_PTR]], align
+  // CHECK: [[ADD:%.+]] = add nsw i32 [[IV]], 1
+  // CHECK: store i32 [[ADD]], i32* [[IV_PTR]], align
+
+
+
+  // schedule(static) clause.  Non-coalesced codegen.
+  //
+  // CHECK: store i32 0, i32* [[LB_PTR:%.+]], align
+  // CHECK: store i32 94, i32* [[UB_PTR:%.+]], align
+  // CHECK: store i32 1, i32* [[ST_PTR:%.+]], align
+  // CHECK: call void @__kmpc_for_static_init_4(%ident_t* {{@.+}}, i32 {{%.+}}, i32 34, i32* {{%.+}}, i32* [[LB_PTR]], i32* [[UB_PTR]], i32* [[ST_PTR]], i32 1, i32 1)
+  // CHECK: [[UB:%.+]] = load i32, i32* [[UB_PTR]], align
+  // CHECK: = icmp sgt i32 [[UB]], 94
+  //
+  // CHECK: [[LB:%.+]] = load i32, i32* [[LB_PTR]], align
+  // CHECK: store i32 [[LB]], i32* [[IV_PTR:%.+]], align
+  //
+  // CHECK:  = getelementptr
+  //
+  // CHECK: [[IV:%.+]] = load i32, i32* [[IV_PTR]], align
+  // CHECK: [[ADD:%.+]] = add nsw i32 [[IV]], 1
+  // CHECK: store i32 [[ADD]], i32* [[IV_PTR]], align
+
+
+
+  // schedule(auto) ordered clause.  Non-coalesced codegen.
+  //
+  // CHECK: store i32 0, i32* [[LB_PTR:%.+]], align
+  // CHECK: store i32 93, i32* [[UB_PTR:%.+]], align
+  // CHECK: store i32 1, i32* [[ST_PTR:%.+]], align
+  // CHECK: call void @__kmpc_dispatch_init_4(%ident_t* {{@.+}}, i32 {{%.+}}, i32 70
+  // CHECK: call i32 @__kmpc_dispatch_next_4(%ident_t* {{@.+}}, i32 {{%.+}}, i32* {{%.+}}, i32* [[LB_PTR]], i32* [[UB_PTR]], i32* [[ST_PTR]])
+  //
+  // CHECK: [[LB:%.+]] = load i32, i32* [[LB_PTR]], align
+  // CHECK: store i32 [[LB]], i32* [[IV_PTR:%.+]], align
+  //
+  // CHECK:  = getelementptr
+  //
+  // CHECK: [[IV:%.+]] = load i32, i32* [[IV_PTR]], align
+  // CHECK: [[ADD:%.+]] = add nsw i32 [[IV]], 1
+  // CHECK: store i32 [[ADD]], i32* [[IV_PTR]], align
+
+
+
+  // schedule(runtime) clause.  Non-coalesced codegen.
+  //
+  // CHECK: store i32 0, i32* [[LB_PTR:%.+]], align
+  // CHECK: store i32 92, i32* [[UB_PTR:%.+]], align
+  // CHECK: store i32 1, i32* [[ST_PTR:%.+]], align
+  // CHECK: call void @__kmpc_dispatch_init_4(%ident_t* {{@.+}}, i32 {{%.+}}, i32 37
+  // CHECK: call i32 @__kmpc_dispatch_next_4(%ident_t* {{@.+}}, i32 {{%.+}}, i32* {{%.+}}, i32* [[LB_PTR]], i32* [[UB_PTR]], i32* [[ST_PTR]])
+  //
+  // CHECK: [[LB:%.+]] = load i32, i32* [[LB_PTR]], align
+  // CHECK: store i32 [[LB]], i32* [[IV_PTR:%.+]], align
+  //
+  // CHECK:  = getelementptr
+  //
+  // CHECK: [[IV:%.+]] = load i32, i32* [[IV_PTR]], align
+  // CHECK: [[ADD:%.+]] = add nsw i32 [[IV]], 1
+  // CHECK: store i32 [[ADD]], i32* [[IV_PTR]], align
+
+
+
+  // schedule(dynamic) clause.  Non-coalesced codegen.
+  //
+  // CHECK: store i32 0, i32* [[LB_PTR:%.+]], align
+  // CHECK: store i32 91, i32* [[UB_PTR:%.+]], align
+  // CHECK: store i32 1, i32* [[ST_PTR:%.+]], align
+  // CHECK: call void @__kmpc_dispatch_init_4(%ident_t* {{@.+}}, i32 {{%.+}}, i32 35
+  // CHECK: call i32 @__kmpc_dispatch_next_4(%ident_t* {{@.+}}, i32 {{%.+}}, i32* {{%.+}}, i32* [[LB_PTR]], i32* [[UB_PTR]], i32* [[ST_PTR]])
+  //
+  // CHECK: [[LB:%.+]] = load i32, i32* [[LB_PTR]], align
+  // CHECK: store i32 [[LB]], i32* [[IV_PTR:%.+]], align
+  //
+  // CHECK:  = getelementptr
+  //
+  // CHECK: [[IV:%.+]] = load i32, i32* [[IV_PTR]], align
+  // CHECK: [[ADD:%.+]] = add nsw i32 [[IV]], 1
+  // CHECK: store i32 [[ADD]], i32* [[IV_PTR]], align
+
+
+
+  // CHECK: ret void
+  // CHECK: }
+
+#endif
Index: lib/Sema/SemaOpenMP.cpp
===================================================================
--- lib/Sema/SemaOpenMP.cpp
+++ lib/Sema/SemaOpenMP.cpp
@@ -3855,15 +3855,71 @@
   return PostUpdate;
 }
 
+/// Get the default schedule type for any loop-based OpenMP directive,
+/// specialized for a particular target.  This is used to guide codegen
+/// if a) no 'schedule' clause is specified, or b) a 'schedule' type of
+/// 'auto' is specified by the user.
+static OpenMPDefaultScheduleKind
+getDefaultSchedule(Sema &S, OpenMPDirectiveKind Kind,
+                   ArrayRef<OMPClause *> Clauses) {
+  OpenMPDefaultScheduleKind DefaultSchedule = OMPDSK_unknown;
+
+  if (S.getLangOpts().OpenMPIsDevice &&
+      S.Context.getTargetInfo().getTriple().isNVPTX()) {
+    // Force a schedule type of (static,1) if there is no schedule clause, or
+    // the user specifies schedule(auto) or schedule(static,1).
+    bool ChunkSizeOne = false;
+    auto ScheduleKind = OMPC_SCHEDULE_unknown;
+    auto ScheduleClause =
+        OMPExecutableDirective::getClausesOfKind<OMPScheduleClause>(Clauses);
+    if (ScheduleClause.begin() != ScheduleClause.end()) {
+      ScheduleKind = (*ScheduleClause.begin())->getScheduleKind();
+      if (const auto *Ch = (*ScheduleClause.begin())->getChunkSize()) {
+        if (!Ch->isValueDependent() && !Ch->isTypeDependent() &&
+            !Ch->isInstantiationDependent() &&
+            !Ch->containsUnexpandedParameterPack()) {
+          SourceLocation ChLoc = Ch->getLocStart();
+          ExprResult Val = S.PerformOpenMPImplicitIntegerConversion(
+              ChLoc, const_cast<Expr *>(Ch));
+          if (!Val.isInvalid()) {
+            Expr *ValExpr = Val.get();
+            llvm::APSInt Result;
+            ChunkSizeOne = ValExpr->isIntegerConstantExpr(Result, S.Context) &&
+                           Result == 1;
+          }
+        }
+      }
+    }
+
+    // Ordered clause requires dynamic dispatch.
+    auto OrderedClause =
+        OMPExecutableDirective::getClausesOfKind<OMPOrderedClause>(Clauses);
+    bool Ordered = OrderedClause.begin() != OrderedClause.end();
+
+    bool StaticOneSchedule =
+        (!Ordered && (ScheduleKind == OMPC_SCHEDULE_unknown ||
+                      ScheduleKind == OMPC_SCHEDULE_auto ||
+                      (ScheduleKind == OMPC_SCHEDULE_static && ChunkSizeOne)));
+
+    if (StaticOneSchedule)
+      DefaultSchedule = OMPDSK_static_chunkone;
+  }
+
+  return DefaultSchedule;
+}
+
 /// \brief Called on a for stmt to check itself and nested loops (if any).
 /// \return Returns 0 if one of the collapsed stmts is not canonical for loop,
 /// number of collapsed loops otherwise.
 static unsigned
-CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
-                Expr *OrderedLoopCountExpr, Stmt *AStmt, Sema &SemaRef,
-                DSAStackTy &DSA,
+CheckOpenMPLoop(OpenMPDirectiveKind DKind, ArrayRef<OMPClause *> Clauses,
+                Expr *CollapseLoopCountExpr, Expr *OrderedLoopCountExpr,
+                Stmt *AStmt, Sema &SemaRef, DSAStackTy &DSA,
                 llvm::DenseMap<ValueDecl *, Expr *> &VarsWithImplicitDSA,
                 OMPLoopDirective::HelperExprs &Built) {
+  OpenMPDefaultScheduleKind DefaultScheduleKind =
+      getDefaultSchedule(SemaRef, DKind, Clauses);
+
   unsigned NestedLoopCount = 1;
   if (CollapseLoopCountExpr) {
     // Found 'collapse' clause - calculate collapse number.
@@ -4136,17 +4192,20 @@
   // Loop condition (IV < NumIterations) or (IV <= UB) for worksharing loops.
   SourceLocation CondLoc;
   ExprResult Cond =
-      (isOpenMPWorksharingDirective(DKind) ||
-       isOpenMPTaskLoopDirective(DKind) || isOpenMPDistributeDirective(DKind))
+      (DefaultScheduleKind != OMPDSK_static_chunkone &&
+       (isOpenMPWorksharingDirective(DKind) ||
+        isOpenMPTaskLoopDirective(DKind) || isOpenMPDistributeDirective(DKind)))
           ? SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), UB.get())
           : SemaRef.BuildBinOp(CurScope, CondLoc, BO_LT, IV.get(),
                                NumIterations.get());
 
-  // Loop increment (IV = IV + 1)
+  // Loop increment (IV = IV + 1) or (IV = IV + ST) if (static,1) scheduling.
   SourceLocation IncLoc;
   ExprResult Inc =
-      SemaRef.BuildBinOp(CurScope, IncLoc, BO_Add, IV.get(),
-                         SemaRef.ActOnIntegerConstant(IncLoc, 1).get());
+      DefaultScheduleKind == OMPDSK_static_chunkone
+          ? SemaRef.BuildBinOp(CurScope, IncLoc, BO_Add, IV.get(), ST.get())
+          : SemaRef.BuildBinOp(CurScope, IncLoc, BO_Add, IV.get(),
+                               SemaRef.ActOnIntegerConstant(IncLoc, 1).get());
   if (!Inc.isUsable())
     return 0;
   Inc = SemaRef.BuildBinOp(CurScope, IncLoc, BO_Assign, IV.get(), Inc.get());
@@ -4295,6 +4354,7 @@
   Built.NUB = NextUB.get();
   Built.PrevLB = PrevLB.get();
   Built.PrevUB = PrevUB.get();
+  Built.DefaultScheduleKind = DefaultScheduleKind;
 
   Expr *CounterVal = SemaRef.DefaultLvalueConversion(IV.get()).get();
   // Fill data for doacross depend clauses.
@@ -4417,9 +4477,10 @@
   OMPLoopDirective::HelperExprs B;
   // In presence of clause 'collapse' or 'ordered' with number of loops, it will
   // define the nested loops number.
-  unsigned NestedLoopCount = CheckOpenMPLoop(
-      OMPD_simd, getCollapseNumberExpr(Clauses), getOrderedNumberExpr(Clauses),
-      AStmt, *this, *DSAStack, VarsWithImplicitDSA, B);
+  unsigned NestedLoopCount =
+      CheckOpenMPLoop(OMPD_simd, Clauses, getCollapseNumberExpr(Clauses),
+                      getOrderedNumberExpr(Clauses), AStmt, *this, *DSAStack,
+                      VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
     return StmtError();
 
@@ -4456,9 +4517,10 @@
   OMPLoopDirective::HelperExprs B;
   // In presence of clause 'collapse' or 'ordered' with number of loops, it will
   // define the nested loops number.
-  unsigned NestedLoopCount = CheckOpenMPLoop(
-      OMPD_for, getCollapseNumberExpr(Clauses), getOrderedNumberExpr(Clauses),
-      AStmt, *this, *DSAStack, VarsWithImplicitDSA, B);
+  unsigned NestedLoopCount =
+      CheckOpenMPLoop(OMPD_for, Clauses, getCollapseNumberExpr(Clauses),
+                      getOrderedNumberExpr(Clauses), AStmt, *this, *DSAStack,
+                      VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
     return StmtError();
 
@@ -4493,7 +4555,7 @@
   // In presence of clause 'collapse' or 'ordered' with number of loops, it will
   // define the nested loops number.
   unsigned NestedLoopCount =
-      CheckOpenMPLoop(OMPD_for_simd, getCollapseNumberExpr(Clauses),
+      CheckOpenMPLoop(OMPD_for_simd, Clauses, getCollapseNumberExpr(Clauses),
                       getOrderedNumberExpr(Clauses), AStmt, *this, *DSAStack,
                       VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
@@ -4694,10 +4756,10 @@
   OMPLoopDirective::HelperExprs B;
   // In presence of clause 'collapse' or 'ordered' with number of loops, it will
   // define the nested loops number.
-  unsigned NestedLoopCount =
-      CheckOpenMPLoop(OMPD_parallel_for, getCollapseNumberExpr(Clauses),
-                      getOrderedNumberExpr(Clauses), AStmt, *this, *DSAStack,
-                      VarsWithImplicitDSA, B);
+  unsigned NestedLoopCount = CheckOpenMPLoop(
+      OMPD_parallel_for, Clauses, getCollapseNumberExpr(Clauses),
+      getOrderedNumberExpr(Clauses), AStmt, *this, *DSAStack,
+      VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
     return StmtError();
 
@@ -4739,10 +4801,10 @@
   OMPLoopDirective::HelperExprs B;
   // In presence of clause 'collapse' or 'ordered' with number of loops, it will
   // define the nested loops number.
-  unsigned NestedLoopCount =
-      CheckOpenMPLoop(OMPD_parallel_for_simd, getCollapseNumberExpr(Clauses),
-                      getOrderedNumberExpr(Clauses), AStmt, *this, *DSAStack,
-                      VarsWithImplicitDSA, B);
+  unsigned NestedLoopCount = CheckOpenMPLoop(
+      OMPD_parallel_for_simd, Clauses, getCollapseNumberExpr(Clauses),
+      getOrderedNumberExpr(Clauses), AStmt, *this, *DSAStack,
+      VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
     return StmtError();
 
@@ -5697,10 +5759,10 @@
   OMPLoopDirective::HelperExprs B;
   // In presence of clause 'collapse' or 'ordered' with number of loops, it will
   // define the nested loops number.
-  unsigned NestedLoopCount =
-      CheckOpenMPLoop(OMPD_target_parallel_for, getCollapseNumberExpr(Clauses),
-                      getOrderedNumberExpr(Clauses), AStmt, *this, *DSAStack,
-                      VarsWithImplicitDSA, B);
+  unsigned NestedLoopCount = CheckOpenMPLoop(
+      OMPD_target_parallel_for, Clauses, getCollapseNumberExpr(Clauses),
+      getOrderedNumberExpr(Clauses), AStmt, *this, *DSAStack,
+      VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
     return StmtError();
 
@@ -5905,7 +5967,7 @@
   // In presence of clause 'collapse' or 'ordered' with number of loops, it will
   // define the nested loops number.
   unsigned NestedLoopCount =
-      CheckOpenMPLoop(OMPD_taskloop, getCollapseNumberExpr(Clauses),
+      CheckOpenMPLoop(OMPD_taskloop, Clauses, getCollapseNumberExpr(Clauses),
                       /*OrderedLoopCountExpr=*/nullptr, AStmt, *this, *DSAStack,
                       VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
@@ -5936,10 +5998,10 @@
   OMPLoopDirective::HelperExprs B;
   // In presence of clause 'collapse' or 'ordered' with number of loops, it will
   // define the nested loops number.
-  unsigned NestedLoopCount =
-      CheckOpenMPLoop(OMPD_taskloop_simd, getCollapseNumberExpr(Clauses),
-                      /*OrderedLoopCountExpr=*/nullptr, AStmt, *this, *DSAStack,
-                      VarsWithImplicitDSA, B);
+  unsigned NestedLoopCount = CheckOpenMPLoop(
+      OMPD_taskloop_simd, Clauses, getCollapseNumberExpr(Clauses),
+      /*OrderedLoopCountExpr=*/nullptr, AStmt, *this, *DSAStack,
+      VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
     return StmtError();
 
@@ -5980,7 +6042,7 @@
   // In presence of clause 'collapse' with number of loops, it will
   // define the nested loops number.
   unsigned NestedLoopCount =
-      CheckOpenMPLoop(OMPD_distribute, getCollapseNumberExpr(Clauses),
+      CheckOpenMPLoop(OMPD_distribute, Clauses, getCollapseNumberExpr(Clauses),
                       nullptr /*ordered not a clause on distribute*/, AStmt,
                       *this, *DSAStack, VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
@@ -6013,7 +6075,7 @@
   // In presence of clause 'collapse' with number of loops, it will
   // define the nested loops number.
   unsigned NestedLoopCount = CheckOpenMPLoop(
-      OMPD_distribute_parallel_for, getCollapseNumberExpr(Clauses),
+      OMPD_distribute_parallel_for, Clauses, getCollapseNumberExpr(Clauses),
       nullptr /*ordered not a clause on distribute*/, AStmt, *this, *DSAStack,
       VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
@@ -6045,10 +6107,11 @@
   OMPLoopDirective::HelperExprs B;
   // In presence of clause 'collapse' with number of loops, it will
   // define the nested loops number.
-  unsigned NestedLoopCount = CheckOpenMPLoop(
-      OMPD_distribute_parallel_for_simd, getCollapseNumberExpr(Clauses),
-      nullptr /*ordered not a clause on distribute*/, AStmt, *this, *DSAStack,
-      VarsWithImplicitDSA, B);
+  unsigned NestedLoopCount =
+      CheckOpenMPLoop(OMPD_distribute_parallel_for_simd, Clauses,
+                      getCollapseNumberExpr(Clauses),
+                      nullptr /*ordered not a clause on distribute*/, AStmt,
+                      *this, *DSAStack, VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
     return StmtError();
 
@@ -6081,10 +6144,10 @@
   OMPLoopDirective::HelperExprs B;
   // In presence of clause 'collapse' with number of loops, it will
   // define the nested loops number.
-  unsigned NestedLoopCount =
-      CheckOpenMPLoop(OMPD_distribute_simd, getCollapseNumberExpr(Clauses),
-                      nullptr /*ordered not a clause on distribute*/, AStmt,
-                      *this, *DSAStack, VarsWithImplicitDSA, B);
+  unsigned NestedLoopCount = CheckOpenMPLoop(
+      OMPD_distribute_simd, Clauses, getCollapseNumberExpr(Clauses),
+      nullptr /*ordered not a clause on distribute*/, AStmt, *this, *DSAStack,
+      VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
     return StmtError();
 
@@ -6118,7 +6181,7 @@
   // In presence of clause 'collapse' or 'ordered' with number of loops, it will
   // define the nested loops number.
   unsigned NestedLoopCount = CheckOpenMPLoop(
-      OMPD_target_parallel_for_simd, getCollapseNumberExpr(Clauses),
+      OMPD_target_parallel_for_simd, Clauses, getCollapseNumberExpr(Clauses),
       getOrderedNumberExpr(Clauses), AStmt, *this, *DSAStack,
       VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
@@ -6164,7 +6227,7 @@
   // In presence of clause 'collapse' with number of loops, it will define the
   // nested loops number.
   unsigned NestedLoopCount =
-      CheckOpenMPLoop(OMPD_target_simd, getCollapseNumberExpr(Clauses),
+      CheckOpenMPLoop(OMPD_target_simd, Clauses, getCollapseNumberExpr(Clauses),
                       getOrderedNumberExpr(Clauses), AStmt, *this, *DSAStack,
                       VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
@@ -6210,10 +6273,10 @@
   OMPLoopDirective::HelperExprs B;
   // In presence of clause 'collapse' with number of loops, it will
   // define the nested loops number.
-  unsigned NestedLoopCount =
-      CheckOpenMPLoop(OMPD_teams_distribute, getCollapseNumberExpr(Clauses),
-                      nullptr /*ordered not a clause on distribute*/, AStmt,
-                      *this, *DSAStack, VarsWithImplicitDSA, B);
+  unsigned NestedLoopCount = CheckOpenMPLoop(
+      OMPD_teams_distribute, Clauses, getCollapseNumberExpr(Clauses),
+      nullptr /*ordered not a clause on distribute*/, AStmt, *this, *DSAStack,
+      VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
     return StmtError();
 
@@ -6244,7 +6307,7 @@
   // In presence of clause 'collapse' with number of loops, it will
   // define the nested loops number.
   unsigned NestedLoopCount = CheckOpenMPLoop(
-      OMPD_teams_distribute_simd, getCollapseNumberExpr(Clauses),
+      OMPD_teams_distribute_simd, Clauses, getCollapseNumberExpr(Clauses),
       nullptr /*ordered not a clause on distribute*/, AStmt, *this, *DSAStack,
       VarsWithImplicitDSA, B);
 
@@ -6291,10 +6354,11 @@
   OMPLoopDirective::HelperExprs B;
   // In presence of clause 'collapse' with number of loops, it will
   // define the nested loops number.
-  auto NestedLoopCount = CheckOpenMPLoop(
-      OMPD_teams_distribute_parallel_for_simd, getCollapseNumberExpr(Clauses),
-      nullptr /*ordered not a clause on distribute*/, AStmt, *this, *DSAStack,
-      VarsWithImplicitDSA, B);
+  auto NestedLoopCount =
+      CheckOpenMPLoop(OMPD_teams_distribute_parallel_for_simd, Clauses,
+                      getCollapseNumberExpr(Clauses),
+                      nullptr /*ordered not a clause on distribute*/, AStmt,
+                      *this, *DSAStack, VarsWithImplicitDSA, B);
 
   if (NestedLoopCount == 0)
     return StmtError();
@@ -6339,10 +6403,11 @@
   OMPLoopDirective::HelperExprs B;
   // In presence of clause 'collapse' with number of loops, it will
   // define the nested loops number.
-  unsigned NestedLoopCount = CheckOpenMPLoop(
-      OMPD_teams_distribute_parallel_for, getCollapseNumberExpr(Clauses),
-      nullptr /*ordered not a clause on distribute*/, AStmt, *this, *DSAStack,
-      VarsWithImplicitDSA, B);
+  unsigned NestedLoopCount =
+      CheckOpenMPLoop(OMPD_teams_distribute_parallel_for, Clauses,
+                      getCollapseNumberExpr(Clauses),
+                      nullptr /*ordered not a clause on distribute*/, AStmt,
+                      *this, *DSAStack, VarsWithImplicitDSA, B);
 
   if (NestedLoopCount == 0)
     return StmtError();
@@ -6406,8 +6471,7 @@
   // In presence of clause 'collapse' with number of loops, it will
   // define the nested loops number.
   auto NestedLoopCount = CheckOpenMPLoop(
-      OMPD_target_teams_distribute,
-      getCollapseNumberExpr(Clauses),
+      OMPD_target_teams_distribute, Clauses, getCollapseNumberExpr(Clauses),
       nullptr /*ordered not a clause on distribute*/, AStmt, *this, *DSAStack,
       VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
@@ -6439,11 +6503,11 @@
   OMPLoopDirective::HelperExprs B;
   // In presence of clause 'collapse' with number of loops, it will
   // define the nested loops number.
-  auto NestedLoopCount = CheckOpenMPLoop(
-      OMPD_target_teams_distribute_parallel_for,
-      getCollapseNumberExpr(Clauses),
-      nullptr /*ordered not a clause on distribute*/, AStmt, *this, *DSAStack,
-      VarsWithImplicitDSA, B);
+  auto NestedLoopCount =
+      CheckOpenMPLoop(OMPD_target_teams_distribute_parallel_for, Clauses,
+                      getCollapseNumberExpr(Clauses),
+                      nullptr /*ordered not a clause on distribute*/, AStmt,
+                      *this, *DSAStack, VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
     return StmtError();
 
@@ -6484,11 +6548,11 @@
   OMPLoopDirective::HelperExprs B;
   // In presence of clause 'collapse' with number of loops, it will
   // define the nested loops number.
-  auto NestedLoopCount = CheckOpenMPLoop(
-      OMPD_target_teams_distribute_parallel_for_simd,
-      getCollapseNumberExpr(Clauses),
-      nullptr /*ordered not a clause on distribute*/, AStmt, *this, *DSAStack,
-      VarsWithImplicitDSA, B);
+  auto NestedLoopCount =
+      CheckOpenMPLoop(OMPD_target_teams_distribute_parallel_for_simd, Clauses,
+                      getCollapseNumberExpr(Clauses),
+                      nullptr /*ordered not a clause on distribute*/, AStmt,
+                      *this, *DSAStack, VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
     return StmtError();
 
@@ -6530,10 +6594,11 @@
   OMPLoopDirective::HelperExprs B;
   // In presence of clause 'collapse' with number of loops, it will
   // define the nested loops number.
-  auto NestedLoopCount = CheckOpenMPLoop(
-      OMPD_target_teams_distribute_simd, getCollapseNumberExpr(Clauses),
-      nullptr /*ordered not a clause on distribute*/, AStmt, *this, *DSAStack,
-      VarsWithImplicitDSA, B);
+  auto NestedLoopCount =
+      CheckOpenMPLoop(OMPD_target_teams_distribute_simd, Clauses,
+                      getCollapseNumberExpr(Clauses),
+                      nullptr /*ordered not a clause on distribute*/, AStmt,
+                      *this, *DSAStack, VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
     return StmtError();
 
Index: lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- lib/CodeGen/CGStmtOpenMP.cpp
+++ lib/CodeGen/CGStmtOpenMP.cpp
@@ -2187,31 +2187,57 @@
       }
       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
+      // For NVPTX and other GPU targets high performance is often achieved
+      // if adjacent threads access memory in a coalesced manner.  This is
+      // true for loops that access memory with stride one if a static
+      // schedule with chunk size of 1 is used.  We generate such code
+      // whenever the OpenMP standard gives us freedom to do so.
+      //
+      // This case is called if there is no schedule clause, with a
+      // schedule(auto), or with a schedule(static,1).
+      //
+      // Codegen is optimized for this case.  Since chunk size is 1 we do not
+      // need to generate the inner loop, i.e., the chunk iterator can be
+      // removed.
+      // while(idx < GlobalUB) {
+      //   BODY;
+      //   idx += ST;
+      // }
+      if (S.getDefaultSchedule() == OMPDSK_static_chunkone) {
+        ScheduleKind.Schedule = OMPC_SCHEDULE_static;
+        if (!Chunk) // Force use of chunk=1
+          Chunk = Builder.getIntN(IVSize, 1);
+      }
       // OpenMP 4.5, 2.7.1 Loop Construct, Description.
       // If the static schedule kind is specified or if the ordered clause is
       // specified, and if no monotonic modifier is specified, the effect will
       // be as if the monotonic modifier was specified.
-      if (RT.isStaticNonchunked(ScheduleKind.Schedule,
-                                /* Chunked */ Chunk != nullptr) &&
-          !Ordered) {
+      if (S.getDefaultSchedule() == OMPDSK_static_chunkone ||
+          (RT.isStaticNonchunked(ScheduleKind.Schedule,
+                                 /* Chunked */ Chunk != nullptr) &&
+           !Ordered)) {
         if (isOpenMPSimdDirective(S.getDirectiveKind()))
           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
         // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
         // When no chunk_size is specified, the iteration space is divided into
         // chunks that are approximately equal in size, and at most one chunk is
         // distributed to each thread. Note that the size of the chunks is
         // unspecified in this case.
-        RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind,
-                             IVSize, IVSigned, Ordered,
-                             IL.getAddress(), LB.getAddress(),
-                             UB.getAddress(), ST.getAddress());
+        RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize,
+                             IVSigned, Ordered, IL.getAddress(),
+                             LB.getAddress(), UB.getAddress(), ST.getAddress(),
+                             Chunk);
         auto LoopExit =
             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
-        // UB = min(UB, GlobalUB);
-        EmitIgnoredExpr(S.getEnsureUpperBound());
+        if (S.getDefaultSchedule() != OMPDSK_static_chunkone) {
+          // UB = min(UB, GlobalUB);
+          EmitIgnoredExpr(S.getEnsureUpperBound());
+        }
         // IV = LB;
         EmitIgnoredExpr(S.getInit());
         // while (idx <= UB) { BODY; ++idx; }
+        // if OMPDSK_static_chunkone:
+        //   while (idx <= GlobalUB) { BODY; idx += ST; }
         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
                          S.getInc(),
                          [&S, LoopExit](CodeGenFunction &CGF) {
Index: lib/AST/StmtOpenMP.cpp
===================================================================
--- lib/AST/StmtOpenMP.cpp
+++ lib/AST/StmtOpenMP.cpp
@@ -105,6 +105,7 @@
   Dir->setUpdates(Exprs.Updates);
   Dir->setFinals(Exprs.Finals);
   Dir->setPreInits(Exprs.PreInits);
+  // TODO: Set default schedule.
   return Dir;
 }
 
@@ -156,6 +157,7 @@
   Dir->setFinals(Exprs.Finals);
   Dir->setPreInits(Exprs.PreInits);
   Dir->setHasCancel(HasCancel);
+  Dir->setDefaultSchedule(Exprs.DefaultScheduleKind);
   return Dir;
 }
 
@@ -207,6 +209,7 @@
   Dir->setUpdates(Exprs.Updates);
   Dir->setFinals(Exprs.Finals);
   Dir->setPreInits(Exprs.PreInits);
+  // TODO: Set default schedule.
   return Dir;
 }
 
@@ -373,6 +376,7 @@
   Dir->setFinals(Exprs.Finals);
   Dir->setPreInits(Exprs.PreInits);
   Dir->setHasCancel(HasCancel);
+  // TODO: Set default schedule.
   return Dir;
 }
 
@@ -423,6 +427,7 @@
   Dir->setUpdates(Exprs.Updates);
   Dir->setFinals(Exprs.Finals);
   Dir->setPreInits(Exprs.PreInits);
+  // TODO: Set default schedule.
   return Dir;
 }
 
@@ -760,6 +765,7 @@
   Dir->setFinals(Exprs.Finals);
   Dir->setPreInits(Exprs.PreInits);
   Dir->setHasCancel(HasCancel);
+  // TODO: Set default schedule.
   return Dir;
 }
 
@@ -1003,6 +1009,7 @@
   Dir->setUpdates(Exprs.Updates);
   Dir->setFinals(Exprs.Finals);
   Dir->setPreInits(Exprs.PreInits);
+  // TODO: Set default schedule.
   return Dir;
 }
 
@@ -1077,6 +1084,7 @@
   Dir->setUpdates(Exprs.Updates);
   Dir->setFinals(Exprs.Finals);
   Dir->setPreInits(Exprs.PreInits);
+  // TODO: Set default schedule.
   return Dir;
 }
 
@@ -1133,6 +1141,7 @@
   Dir->setUpdates(Exprs.Updates);
   Dir->setFinals(Exprs.Finals);
   Dir->setPreInits(Exprs.PreInits);
+  // TODO: Set default schedule.
   return Dir;
 }
 
@@ -1188,6 +1197,7 @@
   Dir->setUpdates(Exprs.Updates);
   Dir->setFinals(Exprs.Finals);
   Dir->setPreInits(Exprs.PreInits);
+  // TODO: Set default schedule.
   return Dir;
 }
 
@@ -1242,6 +1252,7 @@
   Dir->setUpdates(Exprs.Updates);
   Dir->setFinals(Exprs.Finals);
   Dir->setPreInits(Exprs.PreInits);
+  // TODO: Set default schedule.
   return Dir;
 }
 
@@ -1336,6 +1347,7 @@
   Dir->setUpdates(Exprs.Updates);
   Dir->setFinals(Exprs.Finals);
   Dir->setPreInits(Exprs.PreInits);
+  // TODO: Set default schedule.
   return Dir;
 }
 
@@ -1389,6 +1401,7 @@
   Dir->setUpdates(Exprs.Updates);
   Dir->setFinals(Exprs.Finals);
   Dir->setPreInits(Exprs.PreInits);
+  // TODO: Set default schedule.
   return Dir;
 }
 
@@ -1444,6 +1457,7 @@
   Dir->setUpdates(Exprs.Updates);
   Dir->setFinals(Exprs.Finals);
   Dir->setPreInits(Exprs.PreInits);
+  // TODO: Set default schedule.
   return Dir;
 }
 
@@ -1502,6 +1516,7 @@
   Dir->setUpdates(Exprs.Updates);
   Dir->setFinals(Exprs.Finals);
   Dir->setPreInits(Exprs.PreInits);
+  // TODO: Set default schedule.
   return Dir;
 }
 
@@ -1582,6 +1597,7 @@
   Dir->setUpdates(Exprs.Updates);
   Dir->setFinals(Exprs.Finals);
   Dir->setPreInits(Exprs.PreInits);
+  // TODO: Set default schedule.
   return Dir;
 }
 
@@ -1640,6 +1656,7 @@
   Dir->setUpdates(Exprs.Updates);
   Dir->setFinals(Exprs.Finals);
   Dir->setPreInits(Exprs.PreInits);
+  // TODO: Set default schedule.
   return Dir;
 }
 
@@ -1701,6 +1718,7 @@
   Dir->setUpdates(Exprs.Updates);
   Dir->setFinals(Exprs.Finals);
   Dir->setPreInits(Exprs.PreInits);
+  // TODO: Set default schedule.
   return Dir;
 }
 
@@ -1759,6 +1777,7 @@
   Dir->setUpdates(Exprs.Updates);
   Dir->setFinals(Exprs.Finals);
   Dir->setPreInits(Exprs.PreInits);
+  // TODO: Set default schedule.
   return Dir;
 }
 
Index: include/clang/Basic/OpenMPKinds.h
===================================================================
--- include/clang/Basic/OpenMPKinds.h
+++ include/clang/Basic/OpenMPKinds.h
@@ -127,6 +127,9 @@
   OpenMPScheduleClauseModifier M2 = OMPC_SCHEDULE_MODIFIER_unknown;
 };
 
+/// Default schedule type for any loop-based (#for) OpenMP directive.
+enum OpenMPDefaultScheduleKind { OMPDSK_static_chunkone, OMPDSK_unknown };
+
 OpenMPDirectiveKind getOpenMPDirectiveKind(llvm::StringRef Str);
 const char *getOpenMPDirectiveName(OpenMPDirectiveKind Kind);
 
Index: include/clang/AST/StmtOpenMP.h
===================================================================
--- include/clang/AST/StmtOpenMP.h
+++ include/clang/AST/StmtOpenMP.h
@@ -314,6 +314,9 @@
   friend class ASTStmtReader;
   /// \brief Number of collapsed loops as specified by 'collapse' clause.
   unsigned CollapsedNum;
+  /// \brief DefaultScheduleKind - Schedule type to use for a given target
+  /// if no 'schedule' clause or a 'schedule' type 'auto' is specified.
+  OpenMPDefaultScheduleKind DefaultScheduleKind;
 
   /// \brief Offsets to the stored exprs.
   /// This enumeration contains offsets to all the pointers to children
@@ -412,7 +415,7 @@
       : OMPExecutableDirective(That, SC, Kind, StartLoc, EndLoc, NumClauses,
                                numLoopChildren(CollapsedNum, Kind) +
                                    NumSpecialChildren),
-        CollapsedNum(CollapsedNum) {}
+        CollapsedNum(CollapsedNum), DefaultScheduleKind(OMPDSK_unknown) {}
 
   /// \brief Offset to the start of children expression arrays.
   static unsigned getArraysOffset(OpenMPDirectiveKind Kind) {
@@ -521,6 +524,9 @@
            "expected worksharing loop directive");
     *std::next(child_begin(), PrevUpperBoundVariableOffset) = PrevUB;
   }
+  void setDefaultSchedule(OpenMPDefaultScheduleKind SK) {
+    DefaultScheduleKind = SK;
+  }
   void setCounters(ArrayRef<Expr *> A);
   void setPrivateCounters(ArrayRef<Expr *> A);
   void setInits(ArrayRef<Expr *> A);
@@ -567,6 +573,9 @@
     /// \brief PreviousUpperBound - local variable passed to runtime in the
     /// enclosing schedule or null if that does not apply.
     Expr *PrevUB;
+    /// \brief DefaultScheduleKind - Schedule type to use for the given target
+    /// if no 'schedule' clause or a 'schedule' type 'auto' is specified.
+    OpenMPDefaultScheduleKind DefaultScheduleKind;
     /// \brief Counters Loop counters.
     SmallVector<Expr *, 4> Counters;
     /// \brief PrivateCounters Loop counters.
@@ -608,6 +617,7 @@
       NumIterations = nullptr;
       PrevLB = nullptr;
       PrevUB = nullptr;
+      DefaultScheduleKind = OMPDSK_unknown;
       Counters.resize(Size);
       PrivateCounters.resize(Size);
       Inits.resize(Size);
@@ -739,6 +749,9 @@
     return const_cast<Expr *>(reinterpret_cast<const Expr *>(
         *std::next(child_begin(), PrevUpperBoundVariableOffset)));
   }
+  OpenMPDefaultScheduleKind getDefaultSchedule() const {
+    return DefaultScheduleKind;
+  }
   const Stmt *getBody() const {
     // This relies on the loop form is already checked by Sema.
     Stmt *Body = getAssociatedStmt()->IgnoreContainers(true);

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D29910: [OpenMP] Specialize default schedule on a worksharing loop on the NVPTX device.

Reply via email to