gtbercea updated this revision to Diff 171081.
gtbercea added a comment.

  Refactor chunk one checking.


Repository:
  rC Clang

https://reviews.llvm.org/D53448

Files:
  include/clang/AST/StmtOpenMP.h
  lib/AST/StmtOpenMP.cpp
  lib/CodeGen/CGOpenMPRuntime.cpp
  lib/CodeGen/CGOpenMPRuntime.h
  lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
  lib/CodeGen/CGOpenMPRuntimeNVPTX.h
  lib/CodeGen/CGStmtOpenMP.cpp
  lib/Sema/SemaOpenMP.cpp
  lib/Serialization/ASTReaderStmt.cpp
  lib/Serialization/ASTWriterStmt.cpp
  test/OpenMP/distribute_parallel_for_codegen.cpp
  test/OpenMP/distribute_parallel_for_simd_codegen.cpp

Index: test/OpenMP/distribute_parallel_for_simd_codegen.cpp
===================================================================
--- test/OpenMP/distribute_parallel_for_simd_codegen.cpp
+++ test/OpenMP/distribute_parallel_for_simd_codegen.cpp
@@ -406,18 +406,16 @@
       a[i] = b[i] + c[i];
       // LAMBDA: define{{.+}} void [[OMP_OUTLINED_3]](
       // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
+      // LAMBDA-DAG: [[OMP_CAPT_EXPR:%.capture_expr.1]] = alloca
       // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
       // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
       // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
 
-      // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
       // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
-      // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
 
-      // LAMBDA: [[DIST_OUTER_LOOP_HEADER]]:
       // check EUB for distribute
       // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
-      // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}},
+      // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]],
       // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
       // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
       // LAMBDA-DAG: [[EUB_TRUE]]:
@@ -436,18 +434,9 @@
 
       // check exit condition
       // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
-      // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
+      // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_CAPT_EXPR]],
       // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
-      // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
-
-      // LAMBDA: [[DIST_OUTER_LOOP_BODY]]:
-      // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER:.+]]
-
-      // LAMBDA: [[DIST_INNER_LOOP_HEADER]]:
-      // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
-      // LAMBDA-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
-      // LAMBDA: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
-      // LAMBDA: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
+      // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
 
       // check that PrevLB and PrevUB are passed to the 'for'
       // LAMBDA: [[DIST_INNER_LOOP_BODY]]:
@@ -466,25 +455,39 @@
       // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
       // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
       // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
-      // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER]]
-
-      // LAMBDA: [[DIST_INNER_LOOP_END]]:
-      // LAMBDA: br label %[[DIST_OUTER_LOOP_INC:.+]]
-
-      // LAMBDA: [[DIST_OUTER_LOOP_INC]]:
-      // check NextLB and NextUB
       // LAMBDA-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
       // LAMBDA-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
       // LAMBDA-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
       // LAMBDA: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
       // LAMBDA-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
       // LAMBDA-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
       // LAMBDA-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
       // LAMBDA: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
-      // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER]]
 
-      // outer loop exit
-      // LAMBDA: [[DIST_OUTER_LOOP_END]]:
+      // Update UB
+      // LAMBDA-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
+      // LAMBDA: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_CAPT_EXPR]],
+      // LAMBDA-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]]
+      // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]]
+      // LAMBDA-DAG: [[EUB_TRUE_1]]:
+      // LAMBDA: [[NUM_IT_3:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]],
+      // LAMBDA: br label %[[EUB_END_1:.+]]
+      // LAMBDA-DAG: [[EUB_FALSE_1]]:
+      // LAMBDA: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]],
+      // LAMBDA: br label %[[EUB_END_1]]
+      // LAMBDA-DAG: [[EUB_END_1]]:
+      // LAMBDA-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ]
+      // LAMBDA: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]],
+
+      // Store LB in IV
+      // LAMBDA-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
+      // LAMBDA: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]],
+
+      // LAMBDA: [[DIST_INNER_LOOP_END]]:
+      // LAMBDA: br label %[[LOOP_EXIT:.+]]
+
+      // loop exit
+      // LAMBDA: [[LOOP_EXIT]]:
       // LAMBDA-DAG: call void @__kmpc_for_static_fini(
       // LAMBDA: ret
 
@@ -1154,18 +1157,17 @@
     a[i] = b[i] + c[i];
     // CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
     // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
+    // CHECK-DAG: [[OMP_CAPT_EXPR:%.capture_expr.1]] = alloca
     // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
     // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
     // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
 
     // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
     // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
-    // CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
 
-    // CHECK: [[DIST_OUTER_LOOP_HEADER]]:
     // check EUB for distribute
     // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
-    // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
+    // CHECK: [[NUM_IT_1:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]],
     // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
     // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
     // CHECK-DAG: [[EUB_TRUE]]:
@@ -1184,18 +1186,9 @@
 
     // check exit condition
     // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
-    // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
+    // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_CAPT_EXPR]],
     // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
-    // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
-
-    // CHECK: [[DIST_OUTER_LOOP_BODY]]:
-    // CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]]
-
-    // CHECK: [[DIST_INNER_LOOP_HEADER]]:
-    // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
-    // CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
-    // CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
-    // CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
+    // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
 
     // check that PrevLB and PrevUB are passed to the 'for'
     // CHECK: [[DIST_INNER_LOOP_BODY]]:
@@ -1214,25 +1207,39 @@
     // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
     // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
     // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
-    // CHECK: br label %[[DIST_INNER_LOOP_HEADER]]
-
-    // CHECK: [[DIST_INNER_LOOP_END]]:
-    // CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]]
-
-    // CHECK: [[DIST_OUTER_LOOP_INC]]:
-    // check NextLB and NextUB
     // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
     // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
     // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
     // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
     // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
     // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
     // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
     // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
-    // CHECK: br label %[[DIST_OUTER_LOOP_HEADER]]
 
-    // outer loop exit
-    // CHECK: [[DIST_OUTER_LOOP_END]]:
+    // Update UB
+    // CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
+    // CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_CAPT_EXPR]],
+    // CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]]
+    // CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]]
+    // CHECK-DAG: [[EUB_TRUE_1]]:
+    // CHECK: [[NUM_IT_3:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]],
+    // CHECK: br label %[[EUB_END_1:.+]]
+    // CHECK-DAG: [[EUB_FALSE_1]]:
+    // CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]],
+    // CHECK: br label %[[EUB_END_1]]
+    // CHECK-DAG: [[EUB_END_1]]:
+    // CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ]
+    // CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]],
+
+    // Store LB in IV
+    // CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
+    // CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]],
+
+    // CHECK: [[DIST_INNER_LOOP_END]]:
+    // CHECK: br label %[[LOOP_EXIT:.+]]
+
+    // loop exit
+    // CHECK: [[LOOP_EXIT]]:
     // CHECK-DAG: call void @__kmpc_for_static_fini(
     // CHECK: ret
 
@@ -1867,18 +1874,17 @@
 
 // CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
+// CHECK-DAG: [[OMP_CAPT_EXPR:%.capture_expr.1]] = alloca
 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
 
 // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
-// CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
 
-// CHECK: [[DIST_OUTER_LOOP_HEADER]]:
 // check EUB for distribute
 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
-// CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
+// CHECK: [[NUM_IT_1:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]],
 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
 // CHECK-DAG: [[EUB_TRUE]]:
@@ -1897,18 +1903,9 @@
 
 // check exit condition
 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
-// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
+// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_CAPT_EXPR]],
 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
-// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
-
-// CHECK: [[DIST_OUTER_LOOP_BODY]]:
-// CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]]
-
-// CHECK: [[DIST_INNER_LOOP_HEADER]]:
-// CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
-// CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
-// CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
-// CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
+// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
 
 // check that PrevLB and PrevUB are passed to the 'for'
 // CHECK: [[DIST_INNER_LOOP_BODY]]:
@@ -1927,25 +1924,39 @@
 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
-// CHECK: br label %[[DIST_INNER_LOOP_HEADER]]
-
-// CHECK: [[DIST_INNER_LOOP_END]]:
-// CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]]
-
-// CHECK: [[DIST_OUTER_LOOP_INC]]:
-// check NextLB and NextUB
 // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
 // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
 // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
 // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
 // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
 // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
 // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
 // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
-// CHECK: br label %[[DIST_OUTER_LOOP_HEADER]]
 
-// outer loop exit
-// CHECK: [[DIST_OUTER_LOOP_END]]:
+// Update UB
+// CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
+// CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_CAPT_EXPR]],
+// CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]]
+// CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]]
+// CHECK-DAG: [[EUB_TRUE_1]]:
+// CHECK: [[NUM_IT_3:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]],
+// CHECK: br label %[[EUB_END_1:.+]]
+// CHECK-DAG: [[EUB_FALSE_1]]:
+// CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]],
+// CHECK: br label %[[EUB_END_1]]
+// CHECK-DAG: [[EUB_END_1]]:
+// CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ]
+// CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]],
+
+// Store LB in IV
+// CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
+// CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]],
+
+// CHECK: [[DIST_INNER_LOOP_END]]:
+// CHECK: br label %[[LOOP_EXIT:.+]]
+
+// loop exit
+// CHECK: [[LOOP_EXIT]]:
 // CHECK-DAG: call void @__kmpc_for_static_fini(
 // CHECK: ret
 
Index: test/OpenMP/distribute_parallel_for_codegen.cpp
===================================================================
--- test/OpenMP/distribute_parallel_for_codegen.cpp
+++ test/OpenMP/distribute_parallel_for_codegen.cpp
@@ -407,18 +407,16 @@
       a[i] = b[i] + c[i];
       // LAMBDA: define{{.+}} void [[OMP_OUTLINED_3]](
       // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
+      // LAMBDA-DAG: [[OMP_CAPT_EXPR:%.capture_expr.1]] = alloca
       // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
       // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
       // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
 
-      // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
       // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
-      // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
 
-      // LAMBDA: [[DIST_OUTER_LOOP_HEADER]]:
       // check EUB for distribute
       // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
-      // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}},
+      // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]],
       // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
       // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
       // LAMBDA-DAG: [[EUB_TRUE]]:
@@ -437,18 +435,9 @@
 
       // check exit condition
       // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
-      // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
+      // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_CAPT_EXPR]],
       // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
-      // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
-
-      // LAMBDA: [[DIST_OUTER_LOOP_BODY]]:
-      // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER:.+]]
-
-      // LAMBDA: [[DIST_INNER_LOOP_HEADER]]:
-      // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
-      // LAMBDA-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
-      // LAMBDA: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
-      // LAMBDA: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
+      // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
 
       // check that PrevLB and PrevUB are passed to the 'for'
       // LAMBDA: [[DIST_INNER_LOOP_BODY]]:
@@ -467,25 +456,39 @@
       // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
       // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
       // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
-      // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER]]
-
-      // LAMBDA: [[DIST_INNER_LOOP_END]]:
-      // LAMBDA: br label %[[DIST_OUTER_LOOP_INC:.+]]
-
-      // LAMBDA: [[DIST_OUTER_LOOP_INC]]:
-      // check NextLB and NextUB
       // LAMBDA-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
       // LAMBDA-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
       // LAMBDA-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
       // LAMBDA: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
       // LAMBDA-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
       // LAMBDA-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
       // LAMBDA-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
       // LAMBDA: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
-      // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER]]
 
-      // outer loop exit
-      // LAMBDA: [[DIST_OUTER_LOOP_END]]:
+      // Update UB
+      // LAMBDA-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
+      // LAMBDA: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_CAPT_EXPR]],
+      // LAMBDA-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]]
+      // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]]
+      // LAMBDA-DAG: [[EUB_TRUE_1]]:
+      // LAMBDA: [[NUM_IT_3:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]],
+      // LAMBDA: br label %[[EUB_END_1:.+]]
+      // LAMBDA-DAG: [[EUB_FALSE_1]]:
+      // LAMBDA: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]],
+      // LAMBDA: br label %[[EUB_END_1]]
+      // LAMBDA-DAG: [[EUB_END_1]]:
+      // LAMBDA-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ]
+      // LAMBDA: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]],
+
+      // Store LB in IV
+      // LAMBDA-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
+      // LAMBDA: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]],
+
+      // LAMBDA: [[DIST_INNER_LOOP_END]]:
+      // LAMBDA: br label %[[LOOP_EXIT:.+]]
+
+      // loop exit
+      // LAMBDA: [[LOOP_EXIT]]:
       // LAMBDA-DAG: call void @__kmpc_for_static_fini(
       // LAMBDA: ret
 
@@ -1155,18 +1158,17 @@
     a[i] = b[i] + c[i];
     // CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
     // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
+    // CHECK-DAG: [[OMP_CAPT_EXPR:%.capture_expr.1]] = alloca
     // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
     // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
     // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
 
     // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
     // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
-    // CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
 
-    // CHECK: [[DIST_OUTER_LOOP_HEADER]]:
     // check EUB for distribute
     // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
-    // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
+    // CHECK: [[NUM_IT_1:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]],
     // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
     // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
     // CHECK-DAG: [[EUB_TRUE]]:
@@ -1185,18 +1187,9 @@
 
     // check exit condition
     // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
-    // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
+    // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_CAPT_EXPR]],
     // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
-    // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
-
-    // CHECK: [[DIST_OUTER_LOOP_BODY]]:
-    // CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]]
-
-    // CHECK: [[DIST_INNER_LOOP_HEADER]]:
-    // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
-    // CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
-    // CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
-    // CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
+    // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
 
     // check that PrevLB and PrevUB are passed to the 'for'
     // CHECK: [[DIST_INNER_LOOP_BODY]]:
@@ -1215,25 +1208,39 @@
     // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
     // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
     // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
-    // CHECK: br label %[[DIST_INNER_LOOP_HEADER]]
-
-    // CHECK: [[DIST_INNER_LOOP_END]]:
-    // CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]]
-
-    // CHECK: [[DIST_OUTER_LOOP_INC]]:
-    // check NextLB and NextUB
     // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
     // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
     // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
     // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
     // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
     // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
     // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
     // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
-    // CHECK: br label %[[DIST_OUTER_LOOP_HEADER]]
 
-    // outer loop exit
-    // CHECK: [[DIST_OUTER_LOOP_END]]:
+    // Update UB
+    // CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
+    // CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_CAPT_EXPR]],
+    // CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]]
+    // CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]]
+    // CHECK-DAG: [[EUB_TRUE_1]]:
+    // CHECK: [[NUM_IT_3:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]],
+    // CHECK: br label %[[EUB_END_1:.+]]
+    // CHECK-DAG: [[EUB_FALSE_1]]:
+    // CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]],
+    // CHECK: br label %[[EUB_END_1]]
+    // CHECK-DAG: [[EUB_END_1]]:
+    // CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ]
+    // CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]],
+
+    // Store LB in IV
+    // CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
+    // CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]],
+
+    // CHECK: [[DIST_INNER_LOOP_END]]:
+    // CHECK: br label %[[LOOP_EXIT:.+]]
+
+    // loop exit
+    // CHECK: [[LOOP_EXIT]]:
     // CHECK-DAG: call void @__kmpc_for_static_fini(
     // CHECK: ret
 
@@ -1868,18 +1875,17 @@
 
 // CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
 // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
+// CHECK-DAG: [[OMP_CAPT_EXPR:%.capture_expr.1]] = alloca
 // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
 // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
 // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
 
 // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
-// CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
 
-// CHECK: [[DIST_OUTER_LOOP_HEADER]]:
 // check EUB for distribute
 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
-// CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
+// CHECK: [[NUM_IT_1:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]],
 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
 // CHECK-DAG: [[EUB_TRUE]]:
@@ -1898,18 +1904,9 @@
 
 // check exit condition
 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
-// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
+// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_CAPT_EXPR]],
 // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
-// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
-
-// CHECK: [[DIST_OUTER_LOOP_BODY]]:
-// CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]]
-
-// CHECK: [[DIST_INNER_LOOP_HEADER]]:
-// CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
-// CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
-// CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
-// CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
+// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
 
 // check that PrevLB and PrevUB are passed to the 'for'
 // CHECK: [[DIST_INNER_LOOP_BODY]]:
@@ -1928,25 +1925,39 @@
 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
-// CHECK: br label %[[DIST_INNER_LOOP_HEADER]]
-
-// CHECK: [[DIST_INNER_LOOP_END]]:
-// CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]]
-
-// CHECK: [[DIST_OUTER_LOOP_INC]]:
-// check NextLB and NextUB
 // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
 // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
 // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
 // CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
 // CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
 // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
 // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
 // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
-// CHECK: br label %[[DIST_OUTER_LOOP_HEADER]]
 
-// outer loop exit
-// CHECK: [[DIST_OUTER_LOOP_END]]:
+// Update UB
+// CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
+// CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}} [[OMP_CAPT_EXPR]],
+// CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]]
+// CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]]
+// CHECK-DAG: [[EUB_TRUE_1]]:
+// CHECK: [[NUM_IT_3:%.+]] = load{{.+}} [[OMP_CAPT_EXPR]],
+// CHECK: br label %[[EUB_END_1:.+]]
+// CHECK-DAG: [[EUB_FALSE_1]]:
+// CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]],
+// CHECK: br label %[[EUB_END_1]]
+// CHECK-DAG: [[EUB_END_1]]:
+// CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ]
+// CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]],
+
+// Store LB in IV
+// CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
+// CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]],
+
+// CHECK: [[DIST_INNER_LOOP_END]]:
+// CHECK: br label %[[LOOP_EXIT:.+]]
+
+// loop exit
+// CHECK: [[LOOP_EXIT]]:
 // CHECK-DAG: call void @__kmpc_for_static_fini(
 // CHECK: ret
 
Index: lib/Serialization/ASTWriterStmt.cpp
===================================================================
--- lib/Serialization/ASTWriterStmt.cpp
+++ lib/Serialization/ASTWriterStmt.cpp
@@ -1854,6 +1854,8 @@
     Record.AddStmt(D->getCombinedCond());
     Record.AddStmt(D->getCombinedNextLowerBound());
     Record.AddStmt(D->getCombinedNextUpperBound());
+    Record.AddStmt(D->getCombinedDistCond());
+    Record.AddStmt(D->getCombinedParForInDistCond());
   }
   for (auto I : D->counters()) {
     Record.AddStmt(I);
Index: lib/Serialization/ASTReaderStmt.cpp
===================================================================
--- lib/Serialization/ASTReaderStmt.cpp
+++ lib/Serialization/ASTReaderStmt.cpp
@@ -1856,6 +1856,8 @@
     D->setCombinedCond(Record.readSubExpr());
     D->setCombinedNextLowerBound(Record.readSubExpr());
     D->setCombinedNextUpperBound(Record.readSubExpr());
+    D->setCombinedDistCond(Record.readSubExpr());
+    D->setCombinedParForInDistCond(Record.readSubExpr());
   }
   SmallVector<Expr *, 4> Sub;
   unsigned CollapsedNum = D->getCollapsedNumber();
Index: lib/Sema/SemaOpenMP.cpp
===================================================================
--- lib/Sema/SemaOpenMP.cpp
+++ lib/Sema/SemaOpenMP.cpp
@@ -354,7 +354,7 @@
       return OMPD_unknown;
     return std::next(Stack.back().first.rbegin())->Directive;
   }
-  
+
   /// Add requires decl to internal vector
   void addRequiresDecl(OMPRequiresDecl *RD) {
     RequiresDecls.push_back(RD);
@@ -381,7 +381,7 @@
     }
     return IsDuplicate;
   }
-  
+
   /// Set default data sharing attribute to none.
   void setDefaultDSANone(SourceLocation Loc) {
     assert(!isStackEmpty());
@@ -5201,6 +5201,12 @@
           ? SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), UB.get())
           : SemaRef.BuildBinOp(CurScope, CondLoc, BO_LT, IV.get(),
                                NumIterations.get());
+  ExprResult CombDistCond;
+  if (isOpenMPLoopBoundSharingDirective(DKind)) {
+    CombDistCond =
+        SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), LastIteration.get());
+  }
+
   ExprResult CombCond;
   if (isOpenMPLoopBoundSharingDirective(DKind)) {
     CombCond =
@@ -5275,7 +5281,7 @@
   // on PrevUB instead of NumIterations - used to implement 'for' when found
   // in combination with 'distribute', like in 'distribute parallel for'
   SourceLocation DistIncLoc = AStmt->getBeginLoc();
-  ExprResult DistCond, DistInc, PrevEUB;
+  ExprResult DistCond, DistInc, PrevEUB, ParForInDistCond;
   if (isOpenMPLoopBoundSharingDirective(DKind)) {
     DistCond = SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), UB.get());
     assert(DistCond.isUsable() && "distribute cond expr was not built");
@@ -5298,6 +5304,11 @@
     PrevEUB = SemaRef.BuildBinOp(CurScope, DistIncLoc, BO_Assign, UB.get(),
                                  CondOp.get());
     PrevEUB = SemaRef.ActOnFinishFullExpr(PrevEUB.get());
+
+    // Build IV <= PrevEUB to be used in parallel for is in combination with
+    // a distribute directive with schedule(static, 1)
+    ParForInDistCond =
+        SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), PrevUB.get());
   }
 
   // Build updates and final values of the loop counters.
@@ -5421,6 +5432,8 @@
   Built.DistCombinedFields.Cond = CombCond.get();
   Built.DistCombinedFields.NLB = CombNextLB.get();
   Built.DistCombinedFields.NUB = CombNextUB.get();
+  Built.DistCombinedFields.DistCond = CombDistCond.get();
+  Built.DistCombinedFields.ParForInDistCond = ParForInDistCond.get();
 
   return NestedLoopCount;
 }
Index: lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- lib/CodeGen/CGStmtOpenMP.cpp
+++ lib/CodeGen/CGStmtOpenMP.cpp
@@ -2006,7 +2006,7 @@
   RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit);
 
   // for combined 'distribute' and 'for' the increment expression of distribute
-  // is store in DistInc. For 'distribute' alone, it is in Inc.
+  // is stored in DistInc. For 'distribute' alone, it is in Inc.
   Expr *IncExpr;
   if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
     IncExpr = S.getDistInc();
@@ -2298,22 +2298,28 @@
       (void)LoopScope.Privatize();
 
       // Detect the loop schedule kind and chunk.
-      llvm::Value *Chunk = nullptr;
+      const Expr *ChunkExpr = nullptr;
       OpenMPScheduleTy ScheduleKind;
       if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
         ScheduleKind.Schedule = C->getScheduleKind();
         ScheduleKind.M1 = C->getFirstScheduleModifier();
         ScheduleKind.M2 = C->getSecondScheduleModifier();
-        if (const Expr *Ch = C->getChunkSize()) {
-          Chunk = EmitScalarExpr(Ch);
-          Chunk = EmitScalarConversion(Chunk, Ch->getType(),
-                                       S.getIterationVariable()->getType(),
-                                       S.getBeginLoc());
-        }
+        ChunkExpr = C->getChunkSize();
       } else {
         // Default behaviour for schedule clause.
         CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
-            *this, S, ScheduleKind.Schedule, Chunk);
+            *this, S, ScheduleKind.Schedule, ChunkExpr);
+      }
+      bool HasChunkSizeOne = false;
+      llvm::Value *Chunk = nullptr;
+      if (ChunkExpr) {
+        Chunk = EmitScalarExpr(ChunkExpr);
+        Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(),
+                                     S.getIterationVariable()->getType(),
+                                     S.getBeginLoc());
+        llvm::APSInt EvaluatedChunk;
+        if (ChunkExpr->EvaluateAsInt(EvaluatedChunk, getContext()))
+          HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
       }
       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
@@ -2357,6 +2363,42 @@
                                                          S.getDirectiveKind());
         };
         OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
+      } else if (RT.isStaticChunked(ScheduleKind.Schedule,
+                                    /* Chunked */ Chunk != nullptr) &&
+                 HasChunkSizeOne &&
+                 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) {
+        if (isOpenMPSimdDirective(S.getDirectiveKind()))
+          EmitOMPSimdInit(S, /*IsMonotonic=*/true);
+        CGOpenMPRuntime::StaticRTInput StaticInit(
+            IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
+            UB.getAddress(), ST.getAddress(), Chunk);
+        RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
+                             ScheduleKind, StaticInit);
+        JumpDest LoopExit =
+            getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
+        // IV = LB;
+        EmitIgnoredExpr(S.getInit());
+
+        // Generate the following loop:
+        //
+        // while (IV <= PrevUB) {
+        //   BODY;
+        //   IV += ST;
+        // }
+        EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCombinedParForInDistCond(),
+                         S.getDistInc(),
+                         [&S, LoopExit](CodeGenFunction &CGF) {
+                           CGF.EmitOMPLoopBody(S, LoopExit);
+                           CGF.EmitStopPoint(&S);
+                         },
+                         [&](CodeGenFunction &) {});
+        EmitBlock(LoopExit.getBlock());
+        // Tell the runtime we are done.
+        auto &&CodeGen = [&S](CodeGenFunction &CGF) {
+          CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
+                                                         S.getDirectiveKind());
+        };
+        OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
       } else {
         const bool IsMonotonic =
             Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
@@ -3370,7 +3412,7 @@
                 ? S.getCombinedCond()
                 : S.getCond();
 
-        // for distribute alone,  codegen
+        // for distribute alone, codegen
         // while (idx <= UB) { BODY; ++idx; }
         // when combined with 'for' (e.g. as in 'distribute parallel for')
         // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
@@ -3382,6 +3424,53 @@
         EmitBlock(LoopExit.getBlock());
         // Tell the runtime we are done.
         RT.emitForStaticFinish(*this, S.getBeginLoc(), S.getDirectiveKind());
+      } else if (RT.isStaticChunked(ScheduleKind,
+                                    /* Chunked */ Chunk != nullptr) &&
+                 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) {
+        // Generate a single loop when distribute is in combination with
+        // other worksharing pragmas like parallel for.
+        if (isOpenMPSimdDirective(S.getDirectiveKind()))
+          EmitOMPSimdInit(S, /*IsMonotonic=*/true);
+        CGOpenMPRuntime::StaticRTInput StaticInit(
+            IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(), LB.getAddress(),
+            UB.getAddress(), ST.getAddress(), Chunk);
+        RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind,
+                                    StaticInit);
+
+        JumpDest LoopExit =
+            getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
+        // UB = min(UB, GlobalUB);
+        EmitIgnoredExpr(S.getCombinedEnsureUpperBound());
+
+        // IV = LB;
+        EmitIgnoredExpr(S.getCombinedInit());
+
+        // IV < GlobalUB;
+        const Expr *Cond = S.getCombinedDistCond();
+
+        // Generate the following loop:
+        //
+        // while (IV <= GlobalUB) {
+        //   <CodeGen rest of pragma>(LB, UB);
+        //   LB += ST;
+        //   UB += ST;
+        //   UB = min(UB, GlobalUB);
+        //   IV = LB;
+        // }
+        //
+        EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
+                         [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
+                           CodeGenLoop(CGF, S, LoopExit);
+                         },
+                         [&S](CodeGenFunction &CGF) {
+                           CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound());
+                           CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound());
+                           CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound());
+                           CGF.EmitIgnoredExpr(S.getCombinedInit());
+                         });
+        EmitBlock(LoopExit.getBlock());
+        // Tell the runtime we are done.
+        RT.emitForStaticFinish(*this, S.getBeginLoc(), S.getDirectiveKind());
       } else {
         // Emit the outer loop, which requests its work chunk [LB..UB] from
         // runtime and runs the inner loop to process it.
Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.h
===================================================================
--- lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -348,7 +348,7 @@
   /// Choose a default value for the schedule clause.
   void getDefaultScheduleAndChunk(CodeGenFunction &CGF,
       const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind,
-      llvm::Value *&Chunk) const override;
+      const Expr *&ChunkExpr) const override;
 
 private:
   /// Track the execution mode when codegening directives within a target
Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
===================================================================
--- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -4247,8 +4247,11 @@
 void CGOpenMPRuntimeNVPTX::getDefaultScheduleAndChunk(
     CodeGenFunction &CGF, const OMPLoopDirective &S,
     OpenMPScheduleClauseKind &ScheduleKind,
-    llvm::Value *&Chunk) const {
+    const Expr *&ChunkExpr) const {
   ScheduleKind = OMPC_SCHEDULE_static;
-  Chunk = CGF.Builder.getIntN(CGF.getContext().getTypeSize(
-      S.getIterationVariable()->getType()), 1);
+  // Chunk size is 1 in this case.
+  llvm::APInt ChunkSize(32, 1);
+  ChunkExpr = IntegerLiteral::Create(CGF.getContext(), ChunkSize,
+      CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
+      SourceLocation());
 }
Index: lib/CodeGen/CGOpenMPRuntime.h
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.h
+++ lib/CodeGen/CGOpenMPRuntime.h
@@ -890,6 +890,20 @@
   virtual bool isStaticNonchunked(OpenMPDistScheduleClauseKind ScheduleKind,
                                   bool Chunked) const;
 
+  /// Check if the specified \a ScheduleKind is static chunked.
+  /// \param ScheduleKind Schedule kind specified in the 'schedule' clause.
+  /// \param Chunked True if chunk is specified in the clause.
+  ///
+  virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
+                               bool Chunked) const;
+
+  /// Check if the specified \a ScheduleKind is static non-chunked.
+  /// \param ScheduleKind Schedule kind specified in the 'dist_schedule' clause.
+  /// \param Chunked True if chunk is specified in the clause.
+  ///
+  virtual bool isStaticChunked(OpenMPDistScheduleClauseKind ScheduleKind,
+                               bool Chunked) const;
+
   /// Check if the specified \a ScheduleKind is dynamic.
   /// This kind of worksharing directive is emitted without outer loop.
   /// \param ScheduleKind Schedule Kind specified in the 'schedule' clause.
@@ -1506,7 +1520,7 @@
   /// schedule clause.
   virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF,
       const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind,
-      llvm::Value *&Chunk) const {}
+      const Expr *&ChunkExpr) const {}
 
   /// Emits call of the outlined function with the provided arguments,
   /// translating these arguments to correct target-specific arguments.
Index: lib/CodeGen/CGOpenMPRuntime.cpp
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.cpp
+++ lib/CodeGen/CGOpenMPRuntime.cpp
@@ -3292,6 +3292,18 @@
   return Schedule == OMP_dist_sch_static;
 }
 
+bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
+                                      bool Chunked) const {
+  OpenMPSchedType Schedule =
+      getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
+  return Schedule == OMP_sch_static_chunked;
+}
+
+bool CGOpenMPRuntime::isStaticChunked(
+    OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
+  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
+  return Schedule == OMP_dist_sch_static_chunked;
+}
 
 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
   OpenMPSchedType Schedule =
Index: lib/AST/StmtOpenMP.cpp
===================================================================
--- lib/AST/StmtOpenMP.cpp
+++ lib/AST/StmtOpenMP.cpp
@@ -1079,6 +1079,8 @@
   Dir->setCombinedCond(Exprs.DistCombinedFields.Cond);
   Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB);
   Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
+  Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
+  Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
   Dir->HasCancel = HasCancel;
   return Dir;
 }
@@ -1145,6 +1147,8 @@
   Dir->setCombinedCond(Exprs.DistCombinedFields.Cond);
   Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB);
   Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
+  Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
+  Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
   return Dir;
 }
 
@@ -1457,6 +1461,8 @@
   Dir->setCombinedCond(Exprs.DistCombinedFields.Cond);
   Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB);
   Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
+  Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
+  Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
   return Dir;
 }
 
@@ -1524,6 +1530,8 @@
   Dir->setCombinedCond(Exprs.DistCombinedFields.Cond);
   Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB);
   Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
+  Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
+  Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
   Dir->HasCancel = HasCancel;
   return Dir;
 }
@@ -1670,6 +1678,8 @@
   Dir->setCombinedCond(Exprs.DistCombinedFields.Cond);
   Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB);
   Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
+  Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
+  Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
   Dir->HasCancel = HasCancel;
   return Dir;
 }
@@ -1741,6 +1751,8 @@
   Dir->setCombinedCond(Exprs.DistCombinedFields.Cond);
   Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB);
   Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
+  Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
+  Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
   return Dir;
 }
 
Index: include/clang/AST/StmtOpenMP.h
===================================================================
--- include/clang/AST/StmtOpenMP.h
+++ include/clang/AST/StmtOpenMP.h
@@ -392,9 +392,11 @@
     CombinedConditionOffset = 25,
     CombinedNextLowerBoundOffset = 26,
     CombinedNextUpperBoundOffset = 27,
+    CombinedDistConditionOffset = 28,
+    CombinedParForInDistConditionOffset = 29,
     // Offset to the end (and start of the following counters/updates/finals
     // arrays) for combined distribute loop directives.
-    CombinedDistributeEnd = 28,
+    CombinedDistributeEnd = 30,
   };
 
   /// Get the counters storage.
@@ -605,6 +607,17 @@
            "expected loop bound sharing directive");
     *std::next(child_begin(), CombinedNextUpperBoundOffset) = CombNUB;
   }
+  void setCombinedDistCond(Expr *CombDistCond) {
+    assert(isOpenMPLoopBoundSharingDirective(getDirectiveKind()) &&
+           "expected loop bound distribute sharing directive");
+    *std::next(child_begin(), CombinedDistConditionOffset) = CombDistCond;
+  }
+  void setCombinedParForInDistCond(Expr *CombParForInDistCond) {
+    assert(isOpenMPLoopBoundSharingDirective(getDirectiveKind()) &&
+           "expected loop bound distribute sharing directive");
+    *std::next(child_begin(),
+               CombinedParForInDistConditionOffset) = CombParForInDistCond;
+  }
   void setCounters(ArrayRef<Expr *> A);
   void setPrivateCounters(ArrayRef<Expr *> A);
   void setInits(ArrayRef<Expr *> A);
@@ -637,6 +650,13 @@
     /// Update of UpperBound for statically scheduled omp loops for
     /// outer loop in combined constructs (e.g. 'distribute parallel for')
     Expr *NUB;
+    /// Distribute Loop condition used when composing 'omp distribute'
+    ///  with 'omp for' in a same construct when schedule is chunked.
+    Expr *DistCond;
+    /// 'omp parallel for' loop condition used when composed with
+    /// 'omp distribute' in the same construct and when schedule is
+    /// chunked and the chunk size is 1.
+    Expr *ParForInDistCond;
   };
 
   /// The expressions built for the OpenMP loop CodeGen for the
@@ -754,6 +774,8 @@
       DistCombinedFields.Cond = nullptr;
       DistCombinedFields.NLB = nullptr;
       DistCombinedFields.NUB = nullptr;
+      DistCombinedFields.DistCond = nullptr;
+      DistCombinedFields.ParForInDistCond = nullptr;
     }
   };
 
@@ -922,6 +944,18 @@
     return const_cast<Expr *>(reinterpret_cast<const Expr *>(
         *std::next(child_begin(), CombinedNextUpperBoundOffset)));
   }
+  Expr *getCombinedDistCond() const {
+    assert(isOpenMPLoopBoundSharingDirective(getDirectiveKind()) &&
+           "expected loop bound distribute sharing directive");
+    return const_cast<Expr *>(reinterpret_cast<const Expr *>(
+        *std::next(child_begin(), CombinedDistConditionOffset)));
+  }
+  Expr *getCombinedParForInDistCond() const {
+    assert(isOpenMPLoopBoundSharingDirective(getDirectiveKind()) &&
+           "expected loop bound distribute sharing directive");
+    return const_cast<Expr *>(reinterpret_cast<const Expr *>(
+        *std::next(child_begin(), CombinedParForInDistConditionOffset)));
+  }
   const Stmt *getBody() const {
     // This relies on the loop form is already checked by Sema.
     const Stmt *Body =
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to