Author: Tom Eccles
Date: 2025-11-27T16:19:01Z
New Revision: a11d5ea0ad65b3bb5a6846310b2b2cc91136d8b3

URL: 
https://github.com/llvm/llvm-project/commit/a11d5ea0ad65b3bb5a6846310b2b2cc91136d8b3
DIFF: 
https://github.com/llvm/llvm-project/commit/a11d5ea0ad65b3bb5a6846310b2b2cc91136d8b3.diff

LOG: Revert "[OMPIRBuilder] always leave PARALLEL via the same barrier 
(#164586)"

This reverts commit 0e5633fcd984b54acc071c2c982c1ff4691aa10f.

Added: 
    

Modified: 
    clang/test/OpenMP/cancel_codegen.cpp
    clang/test/OpenMP/critical_codegen.cpp
    clang/test/OpenMP/critical_codegen_attr.cpp
    clang/test/OpenMP/irbuilder_nested_parallel_for.c
    clang/test/OpenMP/masked_codegen.cpp
    clang/test/OpenMP/master_codegen.cpp
    clang/test/OpenMP/nested_loop_codegen.cpp
    clang/test/OpenMP/ordered_codegen.cpp
    clang/test/OpenMP/parallel_codegen.cpp
    flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90
    llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
    llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
    llvm/test/Transforms/OpenMP/parallel_region_merging.ll
    llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
    mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
    mlir/test/Target/LLVMIR/openmp-barrier-cancel.mlir
    mlir/test/Target/LLVMIR/openmp-cancel.mlir
    mlir/test/Target/LLVMIR/openmp-cancellation-point.mlir
    mlir/test/Target/LLVMIR/openmp-outline-infinite-loop.mlir
    mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir
    mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir
    mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir
    mlir/test/Target/LLVMIR/openmp-reduction-sections.mlir

Removed: 
    


################################################################################
diff  --git a/clang/test/OpenMP/cancel_codegen.cpp 
b/clang/test/OpenMP/cancel_codegen.cpp
index 6090a91b6a3d9..16e7542a8e826 100644
--- a/clang/test/OpenMP/cancel_codegen.cpp
+++ b/clang/test/OpenMP/cancel_codegen.cpp
@@ -774,6 +774,8 @@ for (int i = 0; i < argc; ++i) {
 // CHECK3-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 
[[OMP_GLOBAL_THREAD_NUM12]])
 // CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_AFTER:%.*]]
 // CHECK3:       omp_section_loop.after:
+// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_AFTERSECTIONS_FINI:%.*]]
+// CHECK3:       omp_section_loop.aftersections.fini:
 // CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_PREHEADER13:%.*]]
 // CHECK3:       omp_section_loop.preheader13:
 // CHECK3-NEXT:    store i32 0, ptr [[P_LOWERBOUND29]], align 4
@@ -809,16 +811,16 @@ for (int i = 0; i < argc; ++i) {
 // CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_BODY_CASE23_SECTION_AFTER:%.*]]
 // CHECK3:       omp_section_loop.body.case23.section.after:
 // CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]]
-// CHECK3:       omp_section_loop.body.case26:
+// CHECK3:       omp_section_loop.body.case25:
 // CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM27:%.*]] = call i32 
@__kmpc_global_thread_num(ptr @[[GLOB1]])
 // CHECK3-NEXT:    [[TMP18:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 
[[OMP_GLOBAL_THREAD_NUM27]], i32 3)
 // CHECK3-NEXT:    [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 0
 // CHECK3-NEXT:    br i1 [[TMP19]], label 
[[OMP_SECTION_LOOP_BODY_CASE25_SPLIT:%.*]], label 
[[OMP_SECTION_LOOP_BODY_CASE25_CNCL:%.*]]
-// CHECK3:       omp_section_loop.body.case26.split:
+// CHECK3:       omp_section_loop.body.case25.split:
 // CHECK3-NEXT:    br label 
[[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER26:%.*]]
-// CHECK3:       omp_section_loop.body.case26.section.after27:
+// CHECK3:       omp_section_loop.body.case25.section.after26:
 // CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_BODY_CASE25_SECTION_AFTER:%.*]]
-// CHECK3:       omp_section_loop.body.case26.section.after:
+// CHECK3:       omp_section_loop.body.case25.section.after:
 // CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_BODY16_SECTIONS_AFTER]]
 // CHECK3:       omp_section_loop.body16.sections.after:
 // CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_INC17]]
@@ -831,6 +833,8 @@ for (int i = 0; i < argc; ++i) {
 // CHECK3-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 
[[OMP_GLOBAL_THREAD_NUM33]])
 // CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_AFTER19:%.*]]
 // CHECK3:       omp_section_loop.after19:
+// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_AFTER19SECTIONS_FINI:%.*]]
+// CHECK3:       omp_section_loop.after19sections.fini:
 // CHECK3-NEXT:    [[TMP20:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
 // CHECK3-NEXT:    store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4
 // CHECK3-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
@@ -890,8 +894,8 @@ for (int i = 0; i < argc; ++i) {
 // CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_EXIT]]
 // CHECK3:       omp_section_loop.body.case23.cncl:
 // CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
-// CHECK3:       omp_section_loop.body.case26.cncl:
-// CHECK3-NEXT:    br label [[OMP_REGION_FINALIZE:.*]]
+// CHECK3:       omp_section_loop.body.case25.cncl:
+// CHECK3-NEXT:    br label [[OMP_SECTION_LOOP_EXIT18]]
 // CHECK3:       .cancel.continue:
 // CHECK3-NEXT:    br label [[OMP_IF_END:%.*]]
 // CHECK3:       omp_if.else:
@@ -963,10 +967,8 @@ for (int i = 0; i < argc; ++i) {
 // CHECK3-NEXT:    [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(ptr 
@[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
 // CHECK3-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
 // CHECK3-NEXT:    br i1 [[TMP9]], label [[DOTCONT:%.*]], label 
[[DOTCNCL5:%.*]]
-// CHECK3:       .cncl4:
-// CHECK3-NEXT:    br label [[FINI:%.*]]
-// CHECK3:       .fini
-// CHECK3-NEXT:    br label %[[EXIT_STUB:omp.par.exit.exitStub]]
+// CHECK3:       .cncl5:
+// CHECK3-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
 // CHECK3:       .cont:
 // CHECK3-NEXT:    [[TMP10:%.*]] = load i32, ptr [[LOADGEP_ARGC_ADDR]], align 4
 // CHECK3-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[LOADGEP_ARGV_ADDR]], align 8
@@ -982,14 +984,16 @@ for (int i = 0; i < argc; ++i) {
 // CHECK3:       omp.par.region.parallel.after:
 // CHECK3-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 // CHECK3:       omp.par.pre_finalize:
-// CHECK3-NEXT:    br label [[FINI]]
+// CHECK3-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]
 // CHECK3:       14:
 // CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 
@__kmpc_global_thread_num(ptr @[[GLOB1]])
 // CHECK3-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 
[[OMP_GLOBAL_THREAD_NUM1]], i32 1)
 // CHECK3-NEXT:    [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0
 // CHECK3-NEXT:    br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label 
[[DOTCNCL:%.*]]
 // CHECK3:       .cncl:
-// CHECK3-NEXT:    br label [[FINI]]
+// CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 
@__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK3-NEXT:    [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(ptr 
@[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+// CHECK3-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]
 // CHECK3:       .split:
 // CHECK3-NEXT:    br label [[TMP4]]
 // CHECK3:       omp.par.exit.exitStub:
@@ -1085,7 +1089,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK3:       .omp.sections.case.split:
 // CHECK3-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
 // CHECK3:       .omp.sections.case.cncl:
-// CHECK3-NEXT:    br label [[FINI:%.*]]
+// CHECK3-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK3:       .omp.sections.exit:
 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK3:       omp.inner.for.inc:
@@ -1096,7 +1100,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK3:       omp.inner.for.end:
 // CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 
@__kmpc_global_thread_num(ptr @[[GLOB19:[0-9]+]])
 // CHECK3-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB15]], i32 
[[OMP_GLOBAL_THREAD_NUM3]])
-// CHECK3-NEXT:    br label [[CANCEL_CONT:.*]]
+// CHECK3-NEXT:    br label [[CANCEL_CONT]]
 // CHECK3:       cancel.cont:
 // CHECK3-NEXT:    ret void
 // CHECK3:       cancel.exit:
@@ -1149,8 +1153,6 @@ for (int i = 0; i < argc; ++i) {
 // CHECK3:       .omp.sections.case.split:
 // CHECK3-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
 // CHECK3:       .omp.sections.case.cncl:
-// CHECK3-NEXT:    br label [[DOTFINI:.%*]]
-// CHECK3:       .fini:
 // CHECK3-NEXT:    br label [[CANCEL_CONT:%.*]]
 // CHECK3:       .omp.sections.case2:
 // CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 
@__kmpc_global_thread_num(ptr @[[GLOB1]])
@@ -1160,11 +1162,9 @@ for (int i = 0; i < argc; ++i) {
 // CHECK3:       .omp.sections.case2.split:
 // CHECK3-NEXT:    br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]]
 // CHECK3:       .omp.sections.case2.section.after:
-// CHECK3-NEXT:    br label [[OMP_REGION_FINALIZE]]
-// CHECK3:       omp_region.finalize:
-// CHECK3-NEXT:    br label [[OMP_SECTIONS_EXIT:.*]]
+// CHECK3-NEXT:    br label [[DOTOMP_SECTIONS_EXIT]]
 // CHECK3:       .omp.sections.case2.cncl:
-// CHECK3-NEXT:    br label [[FINI:.*]]
+// CHECK3-NEXT:    br label [[OMP_INNER_FOR_END]]
 // CHECK3:       .omp.sections.exit:
 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK3:       omp.inner.for.inc:

diff  --git a/clang/test/OpenMP/critical_codegen.cpp 
b/clang/test/OpenMP/critical_codegen.cpp
index 9620613dfdb87..5c752d354804b 100644
--- a/clang/test/OpenMP/critical_codegen.cpp
+++ b/clang/test/OpenMP/critical_codegen.cpp
@@ -35,8 +35,6 @@ int main() {
 // ALL-NEXT:                   store i8 2, ptr [[A_ADDR]]
 // IRBUILDER-NEXT:             br label %[[AFTER:[^ ,]+]]
 // IRBUILDER:                  [[AFTER]]
-// IRBUILDER-NEXT:             br label %[[OMP_REGION_FINALIZE:[^ ,]+]]
-// IRBUILDER:                  [[OMP_REGION_FINALIZE]]
 // ALL-NEXT:                   call {{.*}}void @__kmpc_end_critical(ptr 
[[DEFAULT_LOC]], i32 [[GTID]], ptr [[UNNAMED_LOCK]])
 #pragma omp critical
   a = 2;

diff  --git a/clang/test/OpenMP/critical_codegen_attr.cpp 
b/clang/test/OpenMP/critical_codegen_attr.cpp
index 50b0b04fcfd4a..32482a92e76b8 100644
--- a/clang/test/OpenMP/critical_codegen_attr.cpp
+++ b/clang/test/OpenMP/critical_codegen_attr.cpp
@@ -35,8 +35,6 @@ int main() {
 // ALL-NEXT:                   store i8 2, ptr [[A_ADDR]]
 // IRBUILDER-NEXT:             br label %[[AFTER:[^ ,]+]]
 // IRBUILDER:                  [[AFTER]]
-// IRBUILDER-NEXT:             br label %[[OMP_REGION_FINALIZE:[^ ,]+]]
-// IRBUILDER:                  [[OMP_REGION_FINALIZE]]
 // ALL-NEXT:                   call {{.*}}void @__kmpc_end_critical(ptr 
[[DEFAULT_LOC]], i32 [[GTID]], ptr [[UNNAMED_LOCK]])
   [[omp::directive(critical)]]
   a = 2;

diff  --git a/clang/test/OpenMP/irbuilder_nested_parallel_for.c 
b/clang/test/OpenMP/irbuilder_nested_parallel_for.c
index 56cf9644de5ed..5cc5640a5173b 100644
--- a/clang/test/OpenMP/irbuilder_nested_parallel_for.c
+++ b/clang/test/OpenMP/irbuilder_nested_parallel_for.c
@@ -449,7 +449,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-NEXT:    call void @__captured_stmt.19(ptr [[DOTCOUNT_ADDR188]], ptr 
[[AGG_CAPTURED186]])
 // CHECK-NEXT:    [[DOTCOUNT189:%.*]] = load i32, ptr [[DOTCOUNT_ADDR188]], 
align 4
 // CHECK-NEXT:    br label [[OMP_LOOP_PREHEADER190:%.*]]
-// CHECK:       omp_loop.preheader190:
+// CHECK:       omp_loop.preheader187:
 // CHECK-NEXT:    store i32 0, ptr [[P_LOWERBOUND204]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1
 // CHECK-NEXT:    store i32 [[TMP3]], ptr [[P_UPPERBOUND205]], align 4
@@ -461,13 +461,13 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-NEXT:    [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]]
 // CHECK-NEXT:    [[TMP7:%.*]] = add i32 [[TMP6]], 1
 // CHECK-NEXT:    br label [[OMP_LOOP_HEADER191:%.*]]
-// CHECK:       omp_loop.header191:
+// CHECK:       omp_loop.header188:
 // CHECK-NEXT:    [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, 
[[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], 
[[OMP_LOOP_INC194:%.*]] ]
 // CHECK-NEXT:    br label [[OMP_LOOP_COND192:%.*]]
-// CHECK:       omp_loop.cond192:
+// CHECK:       omp_loop.cond189:
 // CHECK-NEXT:    [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], 
[[TMP7]]
 // CHECK-NEXT:    br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], 
label [[OMP_LOOP_EXIT195:%.*]]
-// CHECK:       omp_loop.body193:
+// CHECK:       omp_loop.body190:
 // CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]]
 // CHECK-NEXT:    call void @__captured_stmt.20(ptr [[I185]], i32 [[TMP8]], 
ptr [[AGG_CAPTURED187]])
 // CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4
@@ -478,15 +478,15 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[R_ADDR]], align 8
 // CHECK-NEXT:    store float [[CONV202]], ptr [[TMP11]], align 4
 // CHECK-NEXT:    br label [[OMP_LOOP_INC194]]
-// CHECK:       omp_loop.inc194:
+// CHECK:       omp_loop.inc191:
 // CHECK-NEXT:    [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1
 // CHECK-NEXT:    br label [[OMP_LOOP_HEADER191]]
-// CHECK:       omp_loop.exit195:
+// CHECK:       omp_loop.exit192:
 // CHECK-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 
[[OMP_GLOBAL_THREAD_NUM207]])
 // CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 
@__kmpc_global_thread_num(ptr @[[GLOB1]])
 // CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 
[[OMP_GLOBAL_THREAD_NUM208]])
 // CHECK-NEXT:    br label [[OMP_LOOP_AFTER196:%.*]]
-// CHECK:       omp_loop.after196:
+// CHECK:       omp_loop.after193:
 // CHECK-NEXT:    ret void
 //
 //
@@ -576,7 +576,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-NEXT:    call void @__captured_stmt.17(ptr [[DOTCOUNT_ADDR163]], ptr 
[[AGG_CAPTURED161]])
 // CHECK-NEXT:    [[DOTCOUNT164:%.*]] = load i32, ptr [[DOTCOUNT_ADDR163]], 
align 4
 // CHECK-NEXT:    br label [[OMP_LOOP_PREHEADER165:%.*]]
-// CHECK:       omp_loop.preheader165:
+// CHECK:       omp_loop.preheader163:
 // CHECK-NEXT:    store i32 0, ptr [[P_LOWERBOUND179]], align 4
 // CHECK-NEXT:    [[TMP13:%.*]] = sub i32 [[DOTCOUNT164]], 1
 // CHECK-NEXT:    store i32 [[TMP13]], ptr [[P_UPPERBOUND180]], align 4
@@ -588,24 +588,24 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-NEXT:    [[TMP16:%.*]] = sub i32 [[TMP15]], [[TMP14]]
 // CHECK-NEXT:    [[TMP17:%.*]] = add i32 [[TMP16]], 1
 // CHECK-NEXT:    br label [[OMP_LOOP_HEADER166:%.*]]
-// CHECK:       omp_loop.header166:
+// CHECK:       omp_loop.header164:
 // CHECK-NEXT:    [[OMP_LOOP_IV172:%.*]] = phi i32 [ 0, 
[[OMP_LOOP_PREHEADER165]] ], [ [[OMP_LOOP_NEXT174:%.*]], 
[[OMP_LOOP_INC169:%.*]] ]
 // CHECK-NEXT:    br label [[OMP_LOOP_COND167:%.*]]
-// CHECK:       omp_loop.cond167:
+// CHECK:       omp_loop.cond165:
 // CHECK-NEXT:    [[OMP_LOOP_CMP173:%.*]] = icmp ult i32 [[OMP_LOOP_IV172]], 
[[TMP17]]
 // CHECK-NEXT:    br i1 [[OMP_LOOP_CMP173]], label [[OMP_LOOP_BODY168:%.*]], 
label [[OMP_LOOP_EXIT170:%.*]]
-// CHECK:       omp_loop.exit170:
+// CHECK:       omp_loop.exit168:
 // CHECK-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 
[[OMP_GLOBAL_THREAD_NUM182]])
 // CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM183:%.*]] = call i32 
@__kmpc_global_thread_num(ptr @[[GLOB1]])
 // CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 
[[OMP_GLOBAL_THREAD_NUM183]])
 // CHECK-NEXT:    br label [[OMP_LOOP_AFTER171:%.*]]
-// CHECK:       omp_loop.after171:
+// CHECK:       omp_loop.after169:
 // CHECK-NEXT:    br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]]
 // CHECK:       omp.par.region.parallel.after:
 // CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 // CHECK:       omp.par.pre_finalize:
 // CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT184_EXITSTUB:%.*]]
-// CHECK:       omp_loop.body168:
+// CHECK:       omp_loop.body166:
 // CHECK-NEXT:    [[TMP18:%.*]] = add i32 [[OMP_LOOP_IV172]], [[TMP14]]
 // CHECK-NEXT:    call void @__captured_stmt.18(ptr [[I160]], i32 [[TMP18]], 
ptr [[AGG_CAPTURED162]])
 // CHECK-NEXT:    [[TMP19:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4
@@ -616,7 +616,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8
 // CHECK-NEXT:    store float [[CONV177]], ptr [[TMP21]], align 4
 // CHECK-NEXT:    br label [[OMP_LOOP_INC169]]
-// CHECK:       omp_loop.inc169:
+// CHECK:       omp_loop.inc167:
 // CHECK-NEXT:    [[OMP_LOOP_NEXT174]] = add nuw i32 [[OMP_LOOP_IV172]], 1
 // CHECK-NEXT:    br label [[OMP_LOOP_HEADER166]]
 // CHECK:       omp_loop.body:
@@ -758,7 +758,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK:       omp_loop.after86:
 // CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM99:%.*]] = call i32 
@__kmpc_global_thread_num(ptr @[[GLOB1]])
 // CHECK-NEXT:    br label [[OMP_PARALLEL213:%.*]]
-// CHECK:       omp_parallel213:
+// CHECK:       omp_parallel210:
 // CHECK-NEXT:    [[GEP_A_ADDR210:%.*]] = getelementptr { ptr, ptr, ptr }, ptr 
[[STRUCTARG209]], i32 0, i32 0
 // CHECK-NEXT:    store ptr [[LOADGEP_A_ADDR]], ptr [[GEP_A_ADDR210]], align 8
 // CHECK-NEXT:    [[GEP_B_ADDR211:%.*]] = getelementptr { ptr, ptr, ptr }, ptr 
[[STRUCTARG209]], i32 0, i32 1
@@ -777,7 +777,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-NEXT:    call void @__captured_stmt.15(ptr [[DOTCOUNT_ADDR138]], ptr 
[[AGG_CAPTURED136]])
 // CHECK-NEXT:    [[DOTCOUNT139:%.*]] = load i32, ptr [[DOTCOUNT_ADDR138]], 
align 4
 // CHECK-NEXT:    br label [[OMP_LOOP_PREHEADER140:%.*]]
-// CHECK:       omp_loop.preheader140:
+// CHECK:       omp_loop.preheader139:
 // CHECK-NEXT:    store i32 0, ptr [[P_LOWERBOUND154]], align 4
 // CHECK-NEXT:    [[TMP21:%.*]] = sub i32 [[DOTCOUNT139]], 1
 // CHECK-NEXT:    store i32 [[TMP21]], ptr [[P_UPPERBOUND155]], align 4
@@ -789,26 +789,24 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-NEXT:    [[TMP24:%.*]] = sub i32 [[TMP23]], [[TMP22]]
 // CHECK-NEXT:    [[TMP25:%.*]] = add i32 [[TMP24]], 1
 // CHECK-NEXT:    br label [[OMP_LOOP_HEADER141:%.*]]
-// CHECK:       omp_loop.header141:
+// CHECK:       omp_loop.header140:
 // CHECK-NEXT:    [[OMP_LOOP_IV147:%.*]] = phi i32 [ 0, 
[[OMP_LOOP_PREHEADER140]] ], [ [[OMP_LOOP_NEXT149:%.*]], 
[[OMP_LOOP_INC144:%.*]] ]
 // CHECK-NEXT:    br label [[OMP_LOOP_COND142:%.*]]
-// CHECK:       omp_loop.cond142:
+// CHECK:       omp_loop.cond141:
 // CHECK-NEXT:    [[OMP_LOOP_CMP148:%.*]] = icmp ult i32 [[OMP_LOOP_IV147]], 
[[TMP25]]
 // CHECK-NEXT:    br i1 [[OMP_LOOP_CMP148]], label [[OMP_LOOP_BODY143:%.*]], 
label [[OMP_LOOP_EXIT145:%.*]]
-// CHECK:       omp_loop.exit145:
+// CHECK:       omp_loop.exit144:
 // CHECK-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 
[[OMP_GLOBAL_THREAD_NUM157]])
 // CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM158:%.*]] = call i32 
@__kmpc_global_thread_num(ptr @[[GLOB1]])
 // CHECK-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB2]], i32 
[[OMP_GLOBAL_THREAD_NUM158]])
 // CHECK-NEXT:    br label [[OMP_LOOP_AFTER146:%.*]]
-// CHECK:       omp_loop.after146:
+// CHECK:       omp_loop.after145:
 // CHECK-NEXT:    br label [[OMP_PAR_REGION9_PARALLEL_AFTER:%.*]]
 // CHECK:       omp.par.region9.parallel.after:
 // CHECK-NEXT:    br label [[OMP_PAR_PRE_FINALIZE10:%.*]]
 // CHECK:       omp.par.pre_finalize10:
-// CHECK-NEXT:    br label [[FINI159:%.*]]
-// CHECK:       .fini159:
-// CHECK-NEXT:    br label [[OMP_PAR_EXIT11_EXITSTUB:%.*]]
-// CHECK:       omp_loop.body143:
+// CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT159_EXITSTUB:%.*]]
+// CHECK:       omp_loop.body142:
 // CHECK-NEXT:    [[TMP26:%.*]] = add i32 [[OMP_LOOP_IV147]], [[TMP22]]
 // CHECK-NEXT:    call void @__captured_stmt.16(ptr [[I135]], i32 [[TMP26]], 
ptr [[AGG_CAPTURED137]])
 // CHECK-NEXT:    [[TMP27:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4
@@ -819,7 +817,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8
 // CHECK-NEXT:    store float [[CONV152]], ptr [[TMP29]], align 4
 // CHECK-NEXT:    br label [[OMP_LOOP_INC144]]
-// CHECK:       omp_loop.inc144:
+// CHECK:       omp_loop.inc143:
 // CHECK-NEXT:    [[OMP_LOOP_NEXT149]] = add nuw i32 [[OMP_LOOP_IV147]], 1
 // CHECK-NEXT:    br label [[OMP_LOOP_HEADER141]]
 // CHECK:       omp_loop.body83:
@@ -1559,8 +1557,6 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG:       omp.par.region.parallel.after:
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 // CHECK-DEBUG:       omp.par.pre_finalize:
-// CHECK-DEBUG-NEXT:    br label [[FINI:.*]]
-// CHECK-DEBUG:       .fini:
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg 
[[DBG30]]
 // CHECK-DEBUG:       omp_loop.body:
 // CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP5]], !dbg 
[[DBG29]]
@@ -1704,8 +1700,6 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG:       omp.par.region.parallel.after:
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 // CHECK-DEBUG:       omp.par.pre_finalize:
-// CHECK-DEBUG-NEXT:    br label [[FINI16:%.*]]
-// CHECK-DEBUG:       .fini16:
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT16_EXITSTUB:%.*]], 
!dbg [[DBG92]]
 // CHECK-DEBUG:       omp.par.exit.exitStub:
 // CHECK-DEBUG-NEXT:    ret void
@@ -1775,8 +1769,6 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG:       omp.par.region5.parallel.after:
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_PRE_FINALIZE6:%.*]]
 // CHECK-DEBUG:       omp.par.pre_finalize6:
-// CHECK-DEBUG-NEXT:    br label [[FINI:%.*]]
-// CHECK-DEBUG:       .fini:
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg 
[[DBG103]]
 // CHECK-DEBUG:       omp_loop.body:
 // CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP6]], 
!dbg [[DBG102]]
@@ -1907,7 +1899,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG-NEXT:    call void @__captured_stmt.19(ptr 
[[DOTCOUNT_ADDR188]], ptr [[AGG_CAPTURED186]]), !dbg [[DBG148]]
 // CHECK-DEBUG-NEXT:    [[DOTCOUNT189:%.*]] = load i32, ptr 
[[DOTCOUNT_ADDR188]], align 4, !dbg [[DBG148]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER190:%.*]], !dbg [[DBG148]]
-// CHECK-DEBUG:       omp_loop.preheader190:
+// CHECK-DEBUG:       omp_loop.preheader187:
 // CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND204]], align 4, !dbg 
[[DBG148]]
 // CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1, !dbg 
[[DBG148]]
 // CHECK-DEBUG-NEXT:    store i32 [[TMP3]], ptr [[P_UPPERBOUND205]], align 4, 
!dbg [[DBG148]]
@@ -1919,13 +1911,13 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg 
[[DBG148]]
 // CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = add i32 [[TMP6]], 1, !dbg [[DBG148]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER191:%.*]], !dbg [[DBG148]]
-// CHECK-DEBUG:       omp_loop.header191:
+// CHECK-DEBUG:       omp_loop.header188:
 // CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, 
[[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], 
[[OMP_LOOP_INC194:%.*]] ], !dbg [[DBG148]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND192:%.*]], !dbg [[DBG148]]
-// CHECK-DEBUG:       omp_loop.cond192:
+// CHECK-DEBUG:       omp_loop.cond189:
 // CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 
[[OMP_LOOP_IV197]], [[TMP7]], !dbg [[DBG148]]
 // CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP198]], label 
[[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]], !dbg [[DBG148]]
-// CHECK-DEBUG:       omp_loop.body193:
+// CHECK-DEBUG:       omp_loop.body190:
 // CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]], 
!dbg [[DBG150:![0-9]+]]
 // CHECK-DEBUG-NEXT:    call void @__captured_stmt.20(ptr [[I185]], i32 
[[TMP8]], ptr [[AGG_CAPTURED187]]), !dbg [[DBG148]]
 // CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg 
[[DBG151:![0-9]+]]
@@ -1936,15 +1928,15 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG-NEXT:    [[TMP11:%.*]] = load ptr, ptr [[R_ADDR]], align 8, 
!dbg [[DBG153:![0-9]+]]
 // CHECK-DEBUG-NEXT:    store float [[CONV202]], ptr [[TMP11]], align 4, !dbg 
[[DBG154:![0-9]+]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC194]], !dbg [[DBG148]]
-// CHECK-DEBUG:       omp_loop.inc194:
+// CHECK-DEBUG:       omp_loop.inc191:
 // CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 
1, !dbg [[DBG148]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER191]], !dbg [[DBG148]]
-// CHECK-DEBUG:       omp_loop.exit195:
+// CHECK-DEBUG:       omp_loop.exit192:
 // CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB42]], i32 
[[OMP_GLOBAL_THREAD_NUM207]]), !dbg [[DBG148]]
 // CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 
@__kmpc_global_thread_num(ptr @[[GLOB42]]), !dbg [[DBG150]]
 // CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB43:[0-9]+]], i32 
[[OMP_GLOBAL_THREAD_NUM208]]), !dbg [[DBG150]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER196:%.*]], !dbg [[DBG148]]
-// CHECK-DEBUG:       omp_loop.after196:
+// CHECK-DEBUG:       omp_loop.after193:
 // CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG155:![0-9]+]]
 //
 //
@@ -2039,7 +2031,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG-NEXT:    call void @__captured_stmt.17(ptr 
[[DOTCOUNT_ADDR163]], ptr [[AGG_CAPTURED161]]), !dbg [[DBG174]]
 // CHECK-DEBUG-NEXT:    [[DOTCOUNT164:%.*]] = load i32, ptr 
[[DOTCOUNT_ADDR163]], align 4, !dbg [[DBG174]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER165:%.*]], !dbg [[DBG174]]
-// CHECK-DEBUG:       omp_loop.preheader165:
+// CHECK-DEBUG:       omp_loop.preheader163:
 // CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND179]], align 4, !dbg 
[[DBG174]]
 // CHECK-DEBUG-NEXT:    [[TMP13:%.*]] = sub i32 [[DOTCOUNT164]], 1, !dbg 
[[DBG174]]
 // CHECK-DEBUG-NEXT:    store i32 [[TMP13]], ptr [[P_UPPERBOUND180]], align 4, 
!dbg [[DBG174]]
@@ -2051,26 +2043,24 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG-NEXT:    [[TMP16:%.*]] = sub i32 [[TMP15]], [[TMP14]], !dbg 
[[DBG174]]
 // CHECK-DEBUG-NEXT:    [[TMP17:%.*]] = add i32 [[TMP16]], 1, !dbg [[DBG174]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER166:%.*]], !dbg [[DBG174]]
-// CHECK-DEBUG:       omp_loop.header166:
+// CHECK-DEBUG:       omp_loop.header164:
 // CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV172:%.*]] = phi i32 [ 0, 
[[OMP_LOOP_PREHEADER165]] ], [ [[OMP_LOOP_NEXT174:%.*]], 
[[OMP_LOOP_INC169:%.*]] ], !dbg [[DBG174]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND167:%.*]], !dbg [[DBG174]]
-// CHECK-DEBUG:       omp_loop.cond167:
+// CHECK-DEBUG:       omp_loop.cond165:
 // CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP173:%.*]] = icmp ult i32 
[[OMP_LOOP_IV172]], [[TMP17]], !dbg [[DBG174]]
 // CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP173]], label 
[[OMP_LOOP_BODY168:%.*]], label [[OMP_LOOP_EXIT170:%.*]], !dbg [[DBG174]]
-// CHECK-DEBUG:       omp_loop.exit170:
+// CHECK-DEBUG:       omp_loop.exit168:
 // CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB39]], i32 
[[OMP_GLOBAL_THREAD_NUM182]]), !dbg [[DBG174]]
 // CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM183:%.*]] = call i32 
@__kmpc_global_thread_num(ptr @[[GLOB39]]), !dbg [[DBG176:![0-9]+]]
 // CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB40:[0-9]+]], i32 
[[OMP_GLOBAL_THREAD_NUM183]]), !dbg [[DBG176]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER171:%.*]], !dbg [[DBG174]]
-// CHECK-DEBUG:       omp_loop.after171:
+// CHECK-DEBUG:       omp_loop.after169:
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg 
[[DBG177:![0-9]+]]
 // CHECK-DEBUG:       omp.par.region.parallel.after:
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 // CHECK-DEBUG:       omp.par.pre_finalize:
-// CHECK-DEBUG-NEXT:    br label [[FINI184:%.*]]
-// CHECK-DEBUG:       .fini184:
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT184_EXITSTUB:%.*]], 
!dbg [[DBG177]]
-// CHECK-DEBUG:       omp_loop.body168:
+// CHECK-DEBUG:       omp_loop.body166:
 // CHECK-DEBUG-NEXT:    [[TMP18:%.*]] = add i32 [[OMP_LOOP_IV172]], [[TMP14]], 
!dbg [[DBG176]]
 // CHECK-DEBUG-NEXT:    call void @__captured_stmt.18(ptr [[I160]], i32 
[[TMP18]], ptr [[AGG_CAPTURED162]]), !dbg [[DBG174]]
 // CHECK-DEBUG-NEXT:    [[TMP19:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], 
align 4, !dbg [[DBG178:![0-9]+]]
@@ -2081,7 +2071,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG-NEXT:    [[TMP21:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], 
align 8, !dbg [[DBG180:![0-9]+]]
 // CHECK-DEBUG-NEXT:    store float [[CONV177]], ptr [[TMP21]], align 4, !dbg 
[[DBG181:![0-9]+]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC169]], !dbg [[DBG174]]
-// CHECK-DEBUG:       omp_loop.inc169:
+// CHECK-DEBUG:       omp_loop.inc167:
 // CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT174]] = add nuw i32 [[OMP_LOOP_IV172]], 
1, !dbg [[DBG174]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER166]], !dbg [[DBG174]]
 // CHECK-DEBUG:       omp_loop.body:
@@ -2228,7 +2218,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG:       omp_loop.after86:
 // CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM99:%.*]] = call i32 
@__kmpc_global_thread_num(ptr @[[GLOB31:[0-9]+]]), !dbg [[DBG208:![0-9]+]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_PARALLEL213:%.*]]
-// CHECK-DEBUG:       omp_parallel213:
+// CHECK-DEBUG:       omp_parallel210:
 // CHECK-DEBUG-NEXT:    [[GEP_A_ADDR210:%.*]] = getelementptr { ptr, ptr, ptr 
}, ptr [[STRUCTARG209]], i32 0, i32 0
 // CHECK-DEBUG-NEXT:    store ptr [[LOADGEP_A_ADDR]], ptr [[GEP_A_ADDR210]], 
align 8
 // CHECK-DEBUG-NEXT:    [[GEP_B_ADDR211:%.*]] = getelementptr { ptr, ptr, ptr 
}, ptr [[STRUCTARG209]], i32 0, i32 1
@@ -2248,7 +2238,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG-NEXT:    call void @__captured_stmt.15(ptr 
[[DOTCOUNT_ADDR138]], ptr [[AGG_CAPTURED136]]), !dbg [[DBG217]]
 // CHECK-DEBUG-NEXT:    [[DOTCOUNT139:%.*]] = load i32, ptr 
[[DOTCOUNT_ADDR138]], align 4, !dbg [[DBG217]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER140:%.*]], !dbg [[DBG217]]
-// CHECK-DEBUG:       omp_loop.preheader140:
+// CHECK-DEBUG:       omp_loop.preheader139:
 // CHECK-DEBUG-NEXT:    store i32 0, ptr [[P_LOWERBOUND154]], align 4, !dbg 
[[DBG217]]
 // CHECK-DEBUG-NEXT:    [[TMP21:%.*]] = sub i32 [[DOTCOUNT139]], 1, !dbg 
[[DBG217]]
 // CHECK-DEBUG-NEXT:    store i32 [[TMP21]], ptr [[P_UPPERBOUND155]], align 4, 
!dbg [[DBG217]]
@@ -2260,26 +2250,24 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG-NEXT:    [[TMP24:%.*]] = sub i32 [[TMP23]], [[TMP22]], !dbg 
[[DBG217]]
 // CHECK-DEBUG-NEXT:    [[TMP25:%.*]] = add i32 [[TMP24]], 1, !dbg [[DBG217]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER141:%.*]], !dbg [[DBG217]]
-// CHECK-DEBUG:       omp_loop.header141:
+// CHECK-DEBUG:       omp_loop.header140:
 // CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV147:%.*]] = phi i32 [ 0, 
[[OMP_LOOP_PREHEADER140]] ], [ [[OMP_LOOP_NEXT149:%.*]], 
[[OMP_LOOP_INC144:%.*]] ], !dbg [[DBG217]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND142:%.*]], !dbg [[DBG217]]
-// CHECK-DEBUG:       omp_loop.cond142:
+// CHECK-DEBUG:       omp_loop.cond141:
 // CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP148:%.*]] = icmp ult i32 
[[OMP_LOOP_IV147]], [[TMP25]], !dbg [[DBG217]]
 // CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP148]], label 
[[OMP_LOOP_BODY143:%.*]], label [[OMP_LOOP_EXIT145:%.*]], !dbg [[DBG217]]
-// CHECK-DEBUG:       omp_loop.exit145:
+// CHECK-DEBUG:       omp_loop.exit144:
 // CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB36]], i32 
[[OMP_GLOBAL_THREAD_NUM157]]), !dbg [[DBG217]]
 // CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM158:%.*]] = call i32 
@__kmpc_global_thread_num(ptr @[[GLOB36]]), !dbg [[DBG219:![0-9]+]]
 // CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB37:[0-9]+]], i32 
[[OMP_GLOBAL_THREAD_NUM158]]), !dbg [[DBG219]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER146:%.*]], !dbg [[DBG217]]
-// CHECK-DEBUG:       omp_loop.after146:
+// CHECK-DEBUG:       omp_loop.after145:
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION9_PARALLEL_AFTER:%.*]], !dbg 
[[DBG220:![0-9]+]]
 // CHECK-DEBUG:       omp.par.region9.parallel.after:
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_PRE_FINALIZE10:%.*]]
 // CHECK-DEBUG:       omp.par.pre_finalize10:
-// CHECK-DEBUG-NEXT:    br label [[FINI159:%.*]]
-// CHECK-DEBUG:       .fini159:
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT159_EXITSTUB:%.*]], 
!dbg [[DBG220]]
-// CHECK-DEBUG:       omp_loop.body143:
+// CHECK-DEBUG:       omp_loop.body142:
 // CHECK-DEBUG-NEXT:    [[TMP26:%.*]] = add i32 [[OMP_LOOP_IV147]], [[TMP22]], 
!dbg [[DBG219]]
 // CHECK-DEBUG-NEXT:    call void @__captured_stmt.16(ptr [[I135]], i32 
[[TMP26]], ptr [[AGG_CAPTURED137]]), !dbg [[DBG217]]
 // CHECK-DEBUG-NEXT:    [[TMP27:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], 
align 4, !dbg [[DBG221:![0-9]+]]
@@ -2290,7 +2278,7 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG-NEXT:    [[TMP29:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], 
align 8, !dbg [[DBG223:![0-9]+]]
 // CHECK-DEBUG-NEXT:    store float [[CONV152]], ptr [[TMP29]], align 4, !dbg 
[[DBG224:![0-9]+]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC144]], !dbg [[DBG217]]
-// CHECK-DEBUG:       omp_loop.inc144:
+// CHECK-DEBUG:       omp_loop.inc143:
 // CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT149]] = add nuw i32 [[OMP_LOOP_IV147]], 
1, !dbg [[DBG217]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER141]], !dbg [[DBG217]]
 // CHECK-DEBUG:       omp_loop.body83:
@@ -2387,8 +2375,8 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG:       omp_loop.after121:
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_REGION103_PARALLEL_AFTER:%.*]], 
!dbg [[DBG244:![0-9]+]]
 // CHECK-DEBUG:       omp.par.region103.parallel.after:
-// CHECK-DEBUG-NEXT:    br label [[FINI134:%.*]]
-// CHECK-DEBUG:       .fini134:
+// CHECK-DEBUG-NEXT:    br label [[OMP_PAR_PRE_FINALIZE104:%.*]]
+// CHECK-DEBUG:       omp.par.pre_finalize104:
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT134_EXITSTUB:%.*]], 
!dbg [[DBG244]]
 // CHECK-DEBUG:       omp_loop.body118:
 // CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV122]], [[TMP6]], 
!dbg [[DBG243]]
@@ -2472,8 +2460,6 @@ void parallel_for_2(float *r, int a, double b) {
 // CHECK-DEBUG:       omp.par.region44.parallel.after:
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_PRE_FINALIZE45:%.*]]
 // CHECK-DEBUG:       omp.par.pre_finalize45:
-// CHECK-DEBUG-NEXT:    br label [[FINI:%.*]]
-// CHECK-DEBUG:       .fini:
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg 
[[DBG260]]
 // CHECK-DEBUG:       omp_loop.body59:
 // CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV63]], [[TMP6]], 
!dbg [[DBG259]]

diff  --git a/clang/test/OpenMP/masked_codegen.cpp 
b/clang/test/OpenMP/masked_codegen.cpp
index bc6f68de9b248..a39de12d69337 100644
--- a/clang/test/OpenMP/masked_codegen.cpp
+++ b/clang/test/OpenMP/masked_codegen.cpp
@@ -35,8 +35,6 @@ int main() {
 // ALL-NEXT:                   store i8 2, ptr [[A_ADDR]]
 // IRBUILDER-NEXT:             br label %[[AFTER:[^ ,]+]]
 // IRBUILDER:                  [[AFTER]]
-// IRBUILDER-NEXT:             br label %[[OMP_REGION_FINALIZE:[^ ,]+]]
-// IRBUILDER:                  [[OMP_REGION_FINALIZE]]
 // ALL-NEXT:                   call {{.*}}void @__kmpc_end_masked(ptr 
[[DEFAULT_LOC]], i32 [[GTID]])
 // ALL-NEXT:                   br label {{%?}}[[EXIT]]
 // ALL:                        [[EXIT]]

diff  --git a/clang/test/OpenMP/master_codegen.cpp 
b/clang/test/OpenMP/master_codegen.cpp
index 5a92444d9a927..a7af326caacfe 100644
--- a/clang/test/OpenMP/master_codegen.cpp
+++ b/clang/test/OpenMP/master_codegen.cpp
@@ -35,8 +35,6 @@ int main() {
 // ALL-NEXT:                   store i8 2, ptr [[A_ADDR]]
 // IRBUILDER-NEXT:             br label %[[AFTER:[^ ,]+]]
 // IRBUILDER:                  [[AFTER]]
-// IRBUILDER-NEXT:             br label %[[OMP_REGION_FINALIZE:[^ ,]+]]
-// IRBUILDER:                  [[OMP_REGION_FINALIZE]]
 // ALL-NEXT:                   call {{.*}}void @__kmpc_end_master(ptr 
[[DEFAULT_LOC]], i32 [[GTID]])
 // ALL-NEXT:                   br label {{%?}}[[EXIT]]
 // ALL:                        [[EXIT]]

diff  --git a/clang/test/OpenMP/nested_loop_codegen.cpp 
b/clang/test/OpenMP/nested_loop_codegen.cpp
index e01fd0da31ee8..9aefc6a739e51 100644
--- a/clang/test/OpenMP/nested_loop_codegen.cpp
+++ b/clang/test/OpenMP/nested_loop_codegen.cpp
@@ -904,8 +904,6 @@ int inline_decl() {
 // CHECK4:       omp.par.region.parallel.after:
 // CHECK4-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 // CHECK4:       omp.par.pre_finalize:
-// CHECK4-NEXT:    br label [[FINI:%.*]]
-// CHECK4:       .fini:
 // CHECK4-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg 
[[DBG27]]
 // CHECK4:       for.body:
 // CHECK4-NEXT:    store i32 0, ptr [[LOADGEP_K]], align 4, !dbg 
[[DBG28:![0-9]+]]
@@ -1085,8 +1083,6 @@ int inline_decl() {
 // CHECK4:       omp.par.region.parallel.after:
 // CHECK4-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 // CHECK4:       omp.par.pre_finalize:
-// CHECK4-NEXT:    br label [[FINI:%.*]]
-// CHECK4:       .fini:
 // CHECK4-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg 
[[DBG90]]
 // CHECK4:       for.body:
 // CHECK4-NEXT:      #dbg_declare(ptr [[K]], [[META91:![0-9]+]], 
!DIExpression(), [[META95:![0-9]+]])

diff  --git a/clang/test/OpenMP/ordered_codegen.cpp 
b/clang/test/OpenMP/ordered_codegen.cpp
index 3b29feac7caa2..5cd95f1927e5c 100644
--- a/clang/test/OpenMP/ordered_codegen.cpp
+++ b/clang/test/OpenMP/ordered_codegen.cpp
@@ -794,8 +794,6 @@ void foo_simd(int low, int up) {
 // CHECK1-IRBUILDER-NEXT:    store float [[MUL8]], ptr [[ARRAYIDX10]], align 4
 // CHECK1-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]]
 // CHECK1-IRBUILDER:       omp.inner.for.body.ordered.after:
-// CHECK1-IRBUILDER-NEXT:    br label [[OMP_REGION_FINALIZE:%.*]]
-// CHECK1-IRBUILDER:       omp_region.finalize:
 // CHECK1-IRBUILDER-NEXT:    call void @__kmpc_end_ordered(ptr @[[GLOB1]], i32 
[[OMP_GLOBAL_THREAD_NUM2]])
 // CHECK1-IRBUILDER-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK1-IRBUILDER:       omp.body.continue:
@@ -886,8 +884,6 @@ void foo_simd(int low, int up) {
 // CHECK1-IRBUILDER-NEXT:    store float [[MUL7]], ptr [[ARRAYIDX8]], align 4
 // CHECK1-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]]
 // CHECK1-IRBUILDER:       omp.inner.for.body.ordered.after:
-// CHECK1-IRBUILDER-NEXT:    br label [[OMP_REGION_FINALIZE:%.*]]
-// CHECK1-IRBUILDER:       omp_region.finalize:
 // CHECK1-IRBUILDER-NEXT:    call void @__kmpc_end_ordered(ptr @[[GLOB1]], i32 
[[OMP_GLOBAL_THREAD_NUM3]])
 // CHECK1-IRBUILDER-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK1-IRBUILDER:       omp.body.continue:
@@ -1026,8 +1022,6 @@ void foo_simd(int low, int up) {
 // CHECK1-IRBUILDER-NEXT:    store float [[MUL29]], ptr [[ARRAYIDX31]], align 4
 // CHECK1-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]]
 // CHECK1-IRBUILDER:       omp.inner.for.body.ordered.after:
-// CHECK1-IRBUILDER-NEXT:    br label [[OMP_REGION_FINALIZE:%.*]]
-// CHECK1-IRBUILDER:       omp_region.finalize:
 // CHECK1-IRBUILDER-NEXT:    call void @__kmpc_end_ordered(ptr @[[GLOB1]], i32 
[[OMP_GLOBAL_THREAD_NUM23]])
 // CHECK1-IRBUILDER-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK1-IRBUILDER:       omp.body.continue:
@@ -1137,8 +1131,6 @@ void foo_simd(int low, int up) {
 // CHECK1-IRBUILDER-NEXT:    store float [[MUL14]], ptr [[ARRAYIDX16]], align 4
 // CHECK1-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]]
 // CHECK1-IRBUILDER:       omp.inner.for.body.ordered.after:
-// CHECK1-IRBUILDER-NEXT:    br label [[OMP_REGION_FINALIZE:%.*]]
-// CHECK1-IRBUILDER:       omp_region.finalize:
 // CHECK1-IRBUILDER-NEXT:    call void @__kmpc_end_ordered(ptr @[[GLOB1]], i32 
[[OMP_GLOBAL_THREAD_NUM8]])
 // CHECK1-IRBUILDER-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK1-IRBUILDER:       omp.body.continue:
@@ -1304,19 +1296,17 @@ void foo_simd(int low, int up) {
 // CHECK1-IRBUILDER-NEXT:    call void @__captured_stmt.1(ptr [[I28]])
 // CHECK1-IRBUILDER-NEXT:    br label 
[[OMP_INNER_FOR_BODY33_ORDERED_AFTER:%.*]]
 // CHECK1-IRBUILDER:       omp.inner.for.body33.ordered.after:
-// CHECK1-IRBUILDER-NEXT:    br label [[OMP_REGION_FINALIZE38:%.*]]
-// CHECK1-IRBUILDER:       omp_region.finalize38:
-// CHECK1-IRBUILDER-NEXT:    br label [[OMP_BODY_CONTINUE39:%.*]]
-// CHECK1-IRBUILDER:       omp.body.continue39:
-// CHECK1-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_INC40:%.*]]
-// CHECK1-IRBUILDER:       omp.inner.for.inc40:
+// CHECK1-IRBUILDER-NEXT:    br label [[OMP_BODY_CONTINUE38:%.*]]
+// CHECK1-IRBUILDER:       omp.body.continue38:
+// CHECK1-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_INC39:%.*]]
+// CHECK1-IRBUILDER:       omp.inner.for.inc39:
 // CHECK1-IRBUILDER-NEXT:    [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV16]], 
align 4
 // CHECK1-IRBUILDER-NEXT:    [[ADD40:%.*]] = add i32 [[TMP32]], 1
 // CHECK1-IRBUILDER-NEXT:    store i32 [[ADD40]], ptr [[DOTOMP_IV16]], align 4
 // CHECK1-IRBUILDER-NEXT:    [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 
@__kmpc_global_thread_num(ptr @[[GLOB12]])
 // CHECK1-IRBUILDER-NEXT:    call void @__kmpc_dispatch_fini_4u(ptr 
@[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]])
 // CHECK1-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_COND30]], !llvm.loop 
[[LOOP5:![0-9]+]]
-// CHECK1-IRBUILDER:       omp.inner.for.end43:
+// CHECK1-IRBUILDER:       omp.inner.for.end42:
 // CHECK1-IRBUILDER-NEXT:    br label [[OMP_DISPATCH_INC:%.*]]
 // CHECK1-IRBUILDER:       omp.dispatch.inc:
 // CHECK1-IRBUILDER-NEXT:    br label [[OMP_DISPATCH_COND]]
@@ -2044,8 +2034,6 @@ void foo_simd(int low, int up) {
 // CHECK3-IRBUILDER-NEXT:    store float [[MUL8]], ptr [[ARRAYIDX10]], align 4
 // CHECK3-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]]
 // CHECK3-IRBUILDER:       omp.inner.for.body.ordered.after:
-// CHECK3-IRBUILDER-NEXT:    br label [[OMP_REGION_FINALIZE:%.*]]
-// CHECK3-IRBUILDER:       omp_region.finalize:
 // CHECK3-IRBUILDER-NEXT:    call void @__kmpc_end_ordered(ptr @[[GLOB1]], i32 
[[OMP_GLOBAL_THREAD_NUM2]])
 // CHECK3-IRBUILDER-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK3-IRBUILDER:       omp.body.continue:
@@ -2136,8 +2124,6 @@ void foo_simd(int low, int up) {
 // CHECK3-IRBUILDER-NEXT:    store float [[MUL7]], ptr [[ARRAYIDX8]], align 4
 // CHECK3-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]]
 // CHECK3-IRBUILDER:       omp.inner.for.body.ordered.after:
-// CHECK3-IRBUILDER-NEXT:    br label [[OMP_REGION_FINALIZE:%.*]]
-// CHECK3-IRBUILDER:       omp_region.finalize:
 // CHECK3-IRBUILDER-NEXT:    call void @__kmpc_end_ordered(ptr @[[GLOB1]], i32 
[[OMP_GLOBAL_THREAD_NUM3]])
 // CHECK3-IRBUILDER-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK3-IRBUILDER:       omp.body.continue:
@@ -2276,8 +2262,6 @@ void foo_simd(int low, int up) {
 // CHECK3-IRBUILDER-NEXT:    store float [[MUL29]], ptr [[ARRAYIDX31]], align 4
 // CHECK3-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]]
 // CHECK3-IRBUILDER:       omp.inner.for.body.ordered.after:
-// CHECK3-IRBUILDER-NEXT:    br label [[OMP_REGION_FINALIZE:%.*]]
-// CHECK3-IRBUILDER:       omp_region.finalize:
 // CHECK3-IRBUILDER-NEXT:    call void @__kmpc_end_ordered(ptr @[[GLOB1]], i32 
[[OMP_GLOBAL_THREAD_NUM23]])
 // CHECK3-IRBUILDER-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK3-IRBUILDER:       omp.body.continue:
@@ -2387,8 +2371,6 @@ void foo_simd(int low, int up) {
 // CHECK3-IRBUILDER-NEXT:    store float [[MUL14]], ptr [[ARRAYIDX16]], align 4
 // CHECK3-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]]
 // CHECK3-IRBUILDER:       omp.inner.for.body.ordered.after:
-// CHECK3-IRBUILDER-NEXT:    br label [[OMP_REGION_FINALIZE:%.*]]
-// CHECK3-IRBUILDER:       omp_region.finalize:
 // CHECK3-IRBUILDER-NEXT:    call void @__kmpc_end_ordered(ptr @[[GLOB1]], i32 
[[OMP_GLOBAL_THREAD_NUM8]])
 // CHECK3-IRBUILDER-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK3-IRBUILDER:       omp.body.continue:
@@ -2554,19 +2536,17 @@ void foo_simd(int low, int up) {
 // CHECK3-IRBUILDER-NEXT:    call void @__captured_stmt.1(ptr [[I28]])
 // CHECK3-IRBUILDER-NEXT:    br label 
[[OMP_INNER_FOR_BODY33_ORDERED_AFTER:%.*]]
 // CHECK3-IRBUILDER:       omp.inner.for.body33.ordered.after:
-// CHECK3-IRBUILDER-NEXT:    br label [[OMP_REGION_FINALIZE38:%.*]]
-// CHECK3-IRBUILDER:       omp_region.finalize38:
-// CHECK3-IRBUILDER-NEXT:    br label [[OMP_BODY_CONTINUE39:%.*]]
-// CHECK3-IRBUILDER:       omp.body.continue39:
-// CHECK3-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_INC40:%.*]]
-// CHECK3-IRBUILDER:       omp.inner.for.inc40:
+// CHECK3-IRBUILDER-NEXT:    br label [[OMP_BODY_CONTINUE38:%.*]]
+// CHECK3-IRBUILDER:       omp.body.continue38:
+// CHECK3-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_INC39:%.*]]
+// CHECK3-IRBUILDER:       omp.inner.for.inc39:
 // CHECK3-IRBUILDER-NEXT:    [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV16]], 
align 4
 // CHECK3-IRBUILDER-NEXT:    [[ADD40:%.*]] = add i32 [[TMP32]], 1
 // CHECK3-IRBUILDER-NEXT:    store i32 [[ADD40]], ptr [[DOTOMP_IV16]], align 4
 // CHECK3-IRBUILDER-NEXT:    [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 
@__kmpc_global_thread_num(ptr @[[GLOB12]])
 // CHECK3-IRBUILDER-NEXT:    call void @__kmpc_dispatch_fini_4u(ptr 
@[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]])
 // CHECK3-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_COND30]], !llvm.loop 
[[LOOP5:![0-9]+]]
-// CHECK3-IRBUILDER:       omp.inner.for.end43:
+// CHECK3-IRBUILDER:       omp.inner.for.end42:
 // CHECK3-IRBUILDER-NEXT:    br label [[OMP_DISPATCH_INC:%.*]]
 // CHECK3-IRBUILDER:       omp.dispatch.inc:
 // CHECK3-IRBUILDER-NEXT:    br label [[OMP_DISPATCH_COND]]

diff  --git a/clang/test/OpenMP/parallel_codegen.cpp 
b/clang/test/OpenMP/parallel_codegen.cpp
index 9f6004e37db9c..e8e57aedaa164 100644
--- a/clang/test/OpenMP/parallel_codegen.cpp
+++ b/clang/test/OpenMP/parallel_codegen.cpp
@@ -906,8 +906,6 @@ int main (int argc, char **argv) {
 // CHECK4:       omp.par.region.parallel.after:
 // CHECK4-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 // CHECK4:       omp.par.pre_finalize:
-// CHECK4-NEXT:    br label [[FINI:%.*]]
-// CHECK4:       .fini:
 // CHECK4-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg 
[[DBG35]]
 // CHECK4:       omp.par.exit.exitStub:
 // CHECK4-NEXT:    ret void
@@ -977,8 +975,6 @@ int main (int argc, char **argv) {
 // CHECK4:       omp.par.region.parallel.after:
 // CHECK4-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 // CHECK4:       omp.par.pre_finalize:
-// CHECK4-NEXT:    br label [[FINI:%.*]]
-// CHECK4:       .fini:
 // CHECK4-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg 
[[DBG66]]
 // CHECK4:       omp.par.exit.exitStub:
 // CHECK4-NEXT:    ret void

diff  --git 
a/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90 
b/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90
index fd59d39b552da..cf77c46346b7f 100644
--- a/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90
+++ b/flang/test/Integration/OpenMP/parallel-private-reduction-worstcase.f90
@@ -174,13 +174,10 @@ subroutine worst_case(a, b, c, d)
 ! CHECK-NEXT:    br label %omp.par.pre_finalize
 
 ! CHECK:       omp.par.pre_finalize:                             ; preds = 
%reduce.finalize
-! CHECK-NEXT:    br label %.fini
-
-! CHECK:       .fini:
 ! CHECK-NEXT:    %{{.*}} = load ptr, ptr
 ! CHECK-NEXT:    br label %omp.reduction.cleanup
 
-! CHECK:       omp.reduction.cleanup:                            ; preds = 
%.fini
+! CHECK:       omp.reduction.cleanup:                            ; preds = 
%omp.par.pre_finalize
 !                [null check]
 ! CHECK:         br i1 %{{.*}}, label %omp.reduction.cleanup43, label 
%omp.reduction.cleanup44
 

diff  --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h 
b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 3efbdc4fe17d6..b801e212ceced 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -576,33 +576,16 @@ class OpenMPIRBuilder {
   using FinalizeCallbackTy = std::function<Error(InsertPointTy CodeGenIP)>;
 
   struct FinalizationInfo {
-    FinalizationInfo(FinalizeCallbackTy FiniCB, omp::Directive DK,
-                     bool IsCancellable)
-        : DK(DK), IsCancellable(IsCancellable), FiniCB(std::move(FiniCB)) {}
+    /// The finalization callback provided by the last in-flight invocation of
+    /// createXXXX for the directive of kind DK.
+    FinalizeCallbackTy FiniCB;
+
     /// The directive kind of the innermost directive that has an associated
     /// region which might require finalization when it is left.
-    const omp::Directive DK;
+    omp::Directive DK;
 
     /// Flag to indicate if the directive is cancellable.
-    const bool IsCancellable;
-
-    /// The basic block to which control should be transferred to
-    /// implement the FiniCB. Memoized to avoid generating finalization
-    /// multiple times.
-    Expected<BasicBlock *> getFiniBB(IRBuilderBase &Builder);
-
-    /// For cases where there is an unavoidable existing finalization block
-    /// (e.g. loop finialization after omp sections). The existing finalization
-    /// block must not contain any non-finalization code.
-    Error mergeFiniBB(IRBuilderBase &Builder, BasicBlock *ExistingFiniBB);
-
-  private:
-    /// Access via getFiniBB.
-    BasicBlock *FiniBB = nullptr;
-
-    /// The finalization callback provided by the last in-flight invocation of
-    /// createXXXX for the directive of kind DK.
-    FinalizeCallbackTy FiniCB;
+    bool IsCancellable;
   };
 
   /// Push a finalization callback on the finalization stack.
@@ -2263,7 +2246,8 @@ class OpenMPIRBuilder {
   ///
   /// \return an error, if any were triggered during execution.
   LLVM_ABI Error emitCancelationCheckImpl(Value *CancelFlag,
-                                          omp::Directive CanceledDirective);
+                                          omp::Directive CanceledDirective,
+                                          FinalizeCallbackTy ExitCB = {});
 
   /// Generate a target region entry call.
   ///
@@ -3418,8 +3402,7 @@ class OpenMPIRBuilder {
   /// Common interface to finalize the region
   ///
   /// \param OMPD Directive to generate exiting code for
-  /// \param FinIP Insertion point for emitting Finalization code and exit 
call.
-  ///              This block must not contain any non-finalization code.
+  /// \param FinIP Insertion point for emitting Finalization code and exit call
   /// \param ExitCall Call to the ending OMP Runtime Function
   /// \param HasFinalize indicate if the directive will require finalization
   ///         and has a finalization callback in the stack that

diff  --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp 
b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index be3053c34bc4e..cf88c4309974f 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -682,47 +682,6 @@ OpenMPIRBuilder::getOrCreateRuntimeFunction(Module &M, 
RuntimeFunction FnID) {
   return {FnTy, Fn};
 }
 
-Expected<BasicBlock *>
-OpenMPIRBuilder::FinalizationInfo::getFiniBB(IRBuilderBase &Builder) {
-  if (!FiniBB) {
-    Function *ParentFunc = Builder.GetInsertBlock()->getParent();
-    IRBuilderBase::InsertPointGuard Guard(Builder);
-    FiniBB = BasicBlock::Create(Builder.getContext(), ".fini", ParentFunc);
-    Builder.SetInsertPoint(FiniBB);
-    // FiniCB adds the branch to the exit stub.
-    if (Error Err = FiniCB(Builder.saveIP()))
-      return Err;
-  }
-  return FiniBB;
-}
-
-Error OpenMPIRBuilder::FinalizationInfo::mergeFiniBB(IRBuilderBase &Builder,
-                                                     BasicBlock *OtherFiniBB) {
-  // Simple case: FiniBB does not exist yet: re-use OtherFiniBB.
-  if (!FiniBB) {
-    FiniBB = OtherFiniBB;
-    IRBuilderBase::InsertPointGuard Guard(Builder);
-    Builder.SetInsertPoint(FiniBB->getFirstNonPHIIt());
-    if (Error Err = FiniCB(Builder.saveIP()))
-      return Err;
-
-    return Error::success();
-  }
-
-  // Move instructions from FiniBB to the start of OtherFiniBB.
-  auto EndIt = FiniBB->end();
-  if (FiniBB->size() >= 1)
-    if (auto Prev = std::prev(EndIt); Prev->isTerminator())
-      EndIt = Prev;
-  OtherFiniBB->splice(OtherFiniBB->getFirstNonPHIIt(), FiniBB, FiniBB->begin(),
-                      EndIt);
-
-  FiniBB->replaceAllUsesWith(OtherFiniBB);
-  FiniBB->eraseFromParent();
-  FiniBB = OtherFiniBB;
-  return Error::success();
-}
-
 Function *OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr(RuntimeFunction FnID) 
{
   FunctionCallee RTLFn = getOrCreateRuntimeFunction(M, FnID);
   auto *Fn = dyn_cast<llvm::Function>(RTLFn.getCallee());
@@ -1170,9 +1129,21 @@ OpenMPIRBuilder::createCancel(const LocationDescription 
&Loc,
   Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
   Value *Result = createRuntimeFunctionCall(
       getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args);
+  auto ExitCB = [this, CanceledDirective, Loc](InsertPointTy IP) -> Error {
+    if (CanceledDirective == OMPD_parallel) {
+      IRBuilder<>::InsertPointGuard IPG(Builder);
+      Builder.restoreIP(IP);
+      return createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
+                           omp::Directive::OMPD_unknown,
+                           /* ForceSimpleCall */ false,
+                           /* CheckCancelFlag */ false)
+          .takeError();
+    }
+    return Error::success();
+  };
 
   // The actual cancel logic is shared with others, e.g., cancel_barriers.
-  if (Error Err = emitCancelationCheckImpl(Result, CanceledDirective))
+  if (Error Err = emitCancelationCheckImpl(Result, CanceledDirective, ExitCB))
     return Err;
 
   // Update the insertion point and remove the terminator we introduced.
@@ -1209,9 +1180,21 @@ OpenMPIRBuilder::createCancellationPoint(const 
LocationDescription &Loc,
   Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
   Value *Result = createRuntimeFunctionCall(
       getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancellationpoint), Args);
+  auto ExitCB = [this, CanceledDirective, Loc](InsertPointTy IP) -> Error {
+    if (CanceledDirective == OMPD_parallel) {
+      IRBuilder<>::InsertPointGuard IPG(Builder);
+      Builder.restoreIP(IP);
+      return createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
+                           omp::Directive::OMPD_unknown,
+                           /* ForceSimpleCall */ false,
+                           /* CheckCancelFlag */ false)
+          .takeError();
+    }
+    return Error::success();
+  };
 
   // The actual cancel logic is shared with others, e.g., cancel_barriers.
-  if (Error Err = emitCancelationCheckImpl(Result, CanceledDirective))
+  if (Error Err = emitCancelationCheckImpl(Result, CanceledDirective, ExitCB))
     return Err;
 
   // Update the insertion point and remove the terminator we introduced.
@@ -1315,7 +1298,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy 
OpenMPIRBuilder::emitKernelLaunch(
 }
 
 Error OpenMPIRBuilder::emitCancelationCheckImpl(
-    Value *CancelFlag, omp::Directive CanceledDirective) {
+    Value *CancelFlag, omp::Directive CanceledDirective,
+    FinalizeCallbackTy ExitCB) {
   assert(isLastFinalizationInfoCancellable(CanceledDirective) &&
          "Unexpected cancellation!");
 
@@ -1342,12 +1326,13 @@ Error OpenMPIRBuilder::emitCancelationCheckImpl(
 
   // From the cancellation block we finalize all variables and go to the
   // post finalization block that is known to the FiniCB callback.
-  auto &FI = FinalizationStack.back();
-  Expected<BasicBlock *> FiniBBOrErr = FI.getFiniBB(Builder);
-  if (!FiniBBOrErr)
-    return FiniBBOrErr.takeError();
   Builder.SetInsertPoint(CancellationBlock);
-  Builder.CreateBr(*FiniBBOrErr);
+  if (ExitCB)
+    if (Error Err = ExitCB(Builder.saveIP()))
+      return Err;
+  auto &FI = FinalizationStack.back();
+  if (Error Err = FI.FiniCB(Builder.saveIP()))
+    return Err;
 
   // The continuation block is where code generation continues.
   Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin());
@@ -1836,18 +1821,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy 
OpenMPIRBuilder::createParallel(
   Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator();
 
   InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator());
-  Expected<BasicBlock *> FiniBBOrErr = FiniInfo.getFiniBB(Builder);
-  if (!FiniBBOrErr)
-    return FiniBBOrErr.takeError();
-  {
-    IRBuilderBase::InsertPointGuard Guard(Builder);
-    Builder.restoreIP(PreFiniIP);
-    Builder.CreateBr(*FiniBBOrErr);
-    // There's currently a branch to omp.par.exit. Delete it. We will get there
-    // via the fini block
-    if (Instruction *Term = Builder.GetInsertBlock()->getTerminator())
-      Term->eraseFromParent();
-  }
+  if (Error Err = FiniCB(PreFiniIP))
+    return Err;
 
   // Register the outlined info.
   addOutlineInfo(std::move(OI));
@@ -2283,7 +2258,23 @@ OpenMPIRBuilder::InsertPointOrErrorTy 
OpenMPIRBuilder::createSections(
   if (!updateToLocation(Loc))
     return Loc.IP;
 
-  FinalizationStack.push_back({FiniCB, OMPD_sections, IsCancellable});
+  // FiniCBWrapper needs to create a branch to the loop finalization block, but
+  // this has not been created yet at some times when this callback runs.
+  SmallVector<BranchInst *> CancellationBranches;
+  auto FiniCBWrapper = [&](InsertPointTy IP) {
+    if (IP.getBlock()->end() != IP.getPoint())
+      return FiniCB(IP);
+    // This must be done otherwise any nested constructs using 
FinalizeOMPRegion
+    // will fail because that function requires the Finalization Basic Block to
+    // have a terminator, which is already removed by EmitOMPRegionBody.
+    // IP is currently at cancelation block.
+    BranchInst *DummyBranch = Builder.CreateBr(IP.getBlock());
+    IP = InsertPointTy(DummyBranch->getParent(), DummyBranch->getIterator());
+    CancellationBranches.push_back(DummyBranch);
+    return FiniCB(IP);
+  };
+
+  FinalizationStack.push_back({FiniCBWrapper, OMPD_sections, IsCancellable});
 
   // Each section is emitted as a switch case
   // Each finalization callback is handled from clang.EmitOMPSectionDirective()
@@ -2349,8 +2340,20 @@ OpenMPIRBuilder::InsertPointOrErrorTy 
OpenMPIRBuilder::createSections(
   auto FiniInfo = FinalizationStack.pop_back_val();
   assert(FiniInfo.DK == OMPD_sections &&
          "Unexpected finalization stack state!");
-  if (Error Err = FiniInfo.mergeFiniBB(Builder, LoopFini))
-    return Err;
+  if (FinalizeCallbackTy &CB = FiniInfo.FiniCB) {
+    Builder.restoreIP(AfterIP);
+    BasicBlock *FiniBB =
+        splitBBWithSuffix(Builder, /*CreateBranch=*/true, "sections.fini");
+    if (Error Err = CB(Builder.saveIP()))
+      return Err;
+    AfterIP = {FiniBB, FiniBB->begin()};
+  }
+
+  // Now we can fix the dummy branch to point to the right place
+  for (BranchInst *DummyBranch : CancellationBranches) {
+    assert(DummyBranch->getNumSuccessors() == 1);
+    DummyBranch->setSuccessor(0, LoopFini);
+  }
 
   return AfterIP;
 }
@@ -6715,6 +6718,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy 
OpenMPIRBuilder::EmitOMPInlinedRegion(
       emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
   if (!AfterIP)
     return AfterIP.takeError();
+  assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB &&
+         "Unexpected Control Flow State!");
+  MergeBlockIntoPredecessor(FiniBB);
 
   // If we are skipping the region of a non conditional, remove the exit
   // block, and clear the builder's insertion point.
@@ -6774,12 +6780,14 @@ OpenMPIRBuilder::InsertPointOrErrorTy 
OpenMPIRBuilder::emitCommonDirectiveExit(
     FinalizationInfo Fi = FinalizationStack.pop_back_val();
     assert(Fi.DK == OMPD && "Unexpected Directive for Finalization call!");
 
-    if (Error Err = Fi.mergeFiniBB(Builder, FinIP.getBlock()))
-      return std::move(Err);
+    if (Error Err = Fi.FiniCB(FinIP))
+      return Err;
+
+    BasicBlock *FiniBB = FinIP.getBlock();
+    Instruction *FiniBBTI = FiniBB->getTerminator();
 
-    // Exit condition: insertion point is before the terminator of the new Fini
-    // block
-    Builder.SetInsertPoint(FinIP.getBlock()->getTerminator());
+    // set Builder IP for call creation
+    Builder.SetInsertPoint(FiniBBTI);
   }
 
   if (!ExitCall)

diff  --git a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll 
b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll
index 1bbac5cc3154b..83452e72b56b9 100644
--- a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll
+++ b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll
@@ -4880,8 +4880,6 @@ entry:
 ; CHECK2:       omp.par.merged.split:
 ; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK2:       omp_region.body.split:
-; CHECK2-NEXT:    br label [[OMP_REGION_FINALIZE:%.*]]
-; CHECK2:       omp_region.finalize:
 ; CHECK2-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB2]], i32 
[[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK2:       omp.par.exit.exitStub:
@@ -4976,8 +4974,6 @@ entry:
 ; CHECK2:       omp.par.merged.split:
 ; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK2:       omp_region.body.split:
-; CHECK2-NEXT:    br label [[OMP_REGION_FINALIZE:%.*]]
-; CHECK2:       omp_region.finalize:
 ; CHECK2-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB2]], i32 
[[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK2:       omp.par.exit.exitStub:
@@ -5074,8 +5070,6 @@ entry:
 ; CHECK2:       omp.par.merged.split:
 ; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK2:       omp_region.body.split:
-; CHECK2-NEXT:    br label [[OMP_REGION_FINALIZE:%.*]]
-; CHECK2:       omp_region.finalize:
 ; CHECK2-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB2]], i32 
[[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK2:       omp.par.exit.exitStub:
@@ -5163,8 +5157,6 @@ entry:
 ; CHECK2:       omp.par.merged.split:
 ; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK2:       omp_region.body.split:
-; CHECK2-NEXT:    br label [[OMP_REGION_FINALIZE:%.*]]
-; CHECK2:       omp_region.finalize:
 ; CHECK2-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB2]], i32 
[[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK2:       omp.par.exit.exitStub:
@@ -5262,8 +5254,6 @@ entry:
 ; CHECK2:       omp.par.merged.split:
 ; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK2:       omp_region.body.split:
-; CHECK2-NEXT:    br label [[OMP_REGION_FINALIZE:%.*]]
-; CHECK2:       omp_region.finalize:
 ; CHECK2-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB2]], i32 
[[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK2:       omp.par.exit.exitStub:
@@ -5444,8 +5434,6 @@ entry:
 ; CHECK2:       omp.par.merged.split:
 ; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK2:       omp_region.body.split:
-; CHECK2-NEXT:    br label [[OMP_REGION_FINALIZE:%.*]]
-; CHECK2:       omp_region.finalize:
 ; CHECK2-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB2]], i32 
[[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK2:       omp.par.exit.exitStub:
@@ -5636,10 +5624,8 @@ entry:
 ; CHECK2:       omp.par.region.split:
 ; CHECK2-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 ; CHECK2:       omp.par.pre_finalize:
-; CHECK2-NEXT:    br label [[FINI:%.*]]
-; CHECK2:       .fini:
-; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_EXITSTUB:.*]]
-; CHECK2:       omp_region.body6:
+; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
+; CHECK2:       omp_region.body5:
 ; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED2:%.*]]
 ; CHECK2:       seq.par.merged2:
 ; CHECK2-NEXT:    [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, ptr 
[[LOADGEP_ADD_SEQ_OUTPUT_ALLOC]], align 4
@@ -5648,9 +5634,7 @@ entry:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split.split.split:
 ; CHECK2-NEXT:    br label [[OMP_REGION_BODY5_SPLIT:%.*]]
-; CHECK2:       omp_region.body6.split:
-; CHECK2-NEXT:    br label [[OMP_REGION_FINALIZE5:%.*]]
-; CHECK2:       omp_region.finalize{{.*}}:
+; CHECK2:       omp_region.body5.split:
 ; CHECK2-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB2]], i32 
[[OMP_GLOBAL_THREAD_NUM3]])
 ; CHECK2-NEXT:    br label [[OMP_REGION_END4]]
 ; CHECK2:       omp_region.body:
@@ -5662,8 +5646,6 @@ entry:
 ; CHECK2:       omp.par.merged.split:
 ; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK2:       omp_region.body.split:
-; CHECK2-NEXT:    br label [[OMP_REGION_FINALIZE:%.*]]
-; CHECK2:       omp_region.finalize:
 ; CHECK2-NEXT:    call void @__kmpc_end_master(ptr @[[GLOB2]], i32 
[[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK2:       omp.par.exit.exitStub:

diff  --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp 
b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index dab0a46eeb3bc..1f35b7a5cfaa4 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -428,8 +428,8 @@ TEST_F(OpenMPIRBuilderTest, CreateCancel) {
                        OMPBuilder.createCancel(Loc, nullptr, OMPD_parallel));
   Builder.restoreIP(NewIP);
   EXPECT_FALSE(M->global_empty());
-  EXPECT_EQ(M->size(), 3U);
-  EXPECT_EQ(F->size(), 5U);
+  EXPECT_EQ(M->size(), 4U);
+  EXPECT_EQ(F->size(), 4U);
   EXPECT_EQ(BB->size(), 4U);
 
   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
@@ -449,16 +449,23 @@ TEST_F(OpenMPIRBuilderTest, CreateCancel) {
   Instruction *CancelBBTI = Cancel->getParent()->getTerminator();
   EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U);
   EXPECT_EQ(CancelBBTI->getSuccessor(0), NewIP.getBlock());
-  EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 1U);
+  EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U);
+  CallInst *GTID1 = dyn_cast<CallInst>(&CancelBBTI->getSuccessor(1)->front());
+  EXPECT_NE(GTID1, nullptr);
+  EXPECT_EQ(GTID1->arg_size(), 1U);
+  EXPECT_EQ(GTID1->getCalledFunction()->getName(), "__kmpc_global_thread_num");
+  EXPECT_FALSE(GTID1->getCalledFunction()->doesNotAccessMemory());
+  EXPECT_FALSE(GTID1->getCalledFunction()->doesNotFreeMemory());
+  CallInst *Barrier = dyn_cast<CallInst>(GTID1->getNextNode());
+  EXPECT_NE(Barrier, nullptr);
+  EXPECT_EQ(Barrier->arg_size(), 2U);
+  EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier");
+  EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory());
+  EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory());
+  EXPECT_TRUE(Barrier->use_empty());
   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(),
             1U);
-  // cancel branch instruction (1) -> .cncl -> .fini -> CBB
-  EXPECT_EQ(CancelBBTI->getSuccessor(1)
-                ->getTerminator()
-                ->getSuccessor(0)
-                ->getTerminator()
-                ->getSuccessor(0),
-            CBB);
+  EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), 
CBB);
 
   EXPECT_EQ(cast<CallInst>(Cancel)->getArgOperand(1), GTID);
 
@@ -490,8 +497,8 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) {
       OMPBuilder.createCancel(Loc, Builder.getTrue(), OMPD_parallel));
   Builder.restoreIP(NewIP);
   EXPECT_FALSE(M->global_empty());
-  EXPECT_EQ(M->size(), 3U);
-  EXPECT_EQ(F->size(), 8U);
+  EXPECT_EQ(M->size(), 4U);
+  EXPECT_EQ(F->size(), 7U);
   EXPECT_EQ(BB->size(), 1U);
   ASSERT_TRUE(isa<BranchInst>(BB->getTerminator()));
   ASSERT_EQ(BB->getTerminator()->getNumSuccessors(), 2U);
@@ -517,15 +524,23 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) {
   EXPECT_EQ(CancelBBTI->getSuccessor(0)->size(), 1U);
   EXPECT_EQ(CancelBBTI->getSuccessor(0)->getUniqueSuccessor(),
             NewIP.getBlock());
-  EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 1U);
+  EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U);
+  CallInst *GTID1 = dyn_cast<CallInst>(&CancelBBTI->getSuccessor(1)->front());
+  EXPECT_NE(GTID1, nullptr);
+  EXPECT_EQ(GTID1->arg_size(), 1U);
+  EXPECT_EQ(GTID1->getCalledFunction()->getName(), "__kmpc_global_thread_num");
+  EXPECT_FALSE(GTID1->getCalledFunction()->doesNotAccessMemory());
+  EXPECT_FALSE(GTID1->getCalledFunction()->doesNotFreeMemory());
+  CallInst *Barrier = dyn_cast<CallInst>(GTID1->getNextNode());
+  EXPECT_NE(Barrier, nullptr);
+  EXPECT_EQ(Barrier->arg_size(), 2U);
+  EXPECT_EQ(Barrier->getCalledFunction()->getName(), "__kmpc_cancel_barrier");
+  EXPECT_FALSE(Barrier->getCalledFunction()->doesNotAccessMemory());
+  EXPECT_FALSE(Barrier->getCalledFunction()->doesNotFreeMemory());
+  EXPECT_TRUE(Barrier->use_empty());
   EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(),
             1U);
-  EXPECT_EQ(CancelBBTI->getSuccessor(1)
-                ->getTerminator()
-                ->getSuccessor(0)
-                ->getTerminator()
-                ->getSuccessor(0),
-            CBB);
+  EXPECT_EQ(CancelBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0), 
CBB);
 
   EXPECT_EQ(cast<CallInst>(Cancel)->getArgOperand(1), GTID);
 
@@ -557,7 +572,7 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) {
   Builder.restoreIP(NewIP);
   EXPECT_FALSE(M->global_empty());
   EXPECT_EQ(M->size(), 3U);
-  EXPECT_EQ(F->size(), 5U);
+  EXPECT_EQ(F->size(), 4U);
   EXPECT_EQ(BB->size(), 4U);
 
   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
@@ -580,11 +595,7 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) {
   EXPECT_EQ(BarrierBBTI->getSuccessor(1)->size(), 1U);
   EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(),
             1U);
-  EXPECT_EQ(BarrierBBTI->getSuccessor(1)
-                ->getTerminator()
-                ->getSuccessor(0)
-                ->getTerminator()
-                ->getSuccessor(0),
+  EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getSuccessor(0),
             CBB);
 
   EXPECT_EQ(cast<CallInst>(Barrier)->getArgOperand(1), GTID);
@@ -1280,8 +1291,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) {
 
   EXPECT_EQ(NumBodiesGenerated, 1U);
   EXPECT_EQ(NumPrivatizedVars, 0U);
-  EXPECT_EQ(NumFinalizationPoints, 1U);
-  EXPECT_TRUE(FakeDestructor->hasNUses(1));
+  EXPECT_EQ(NumFinalizationPoints, 2U);
+  EXPECT_TRUE(FakeDestructor->hasNUses(2));
 
   Builder.restoreIP(AfterIP);
   Builder.CreateRetVoid();
@@ -2905,8 +2916,7 @@ TEST_F(OpenMPIRBuilderTest, MasterDirective) {
   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
   EXPECT_TRUE(EntryBr->isConditional());
   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
-  BasicBlock *FinalizeBB = ThenBB->getUniqueSuccessor();
-  BasicBlock *ExitBB = FinalizeBB->getUniqueSuccessor();
+  BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
 
   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
@@ -2918,7 +2928,7 @@ TEST_F(OpenMPIRBuilderTest, MasterDirective) {
   EXPECT_TRUE(isa<GlobalVariable>(MasterEntryCI->getArgOperand(0)));
 
   CallInst *MasterEndCI = nullptr;
-  for (auto &FI : *FinalizeBB) {
+  for (auto &FI : *ThenBB) {
     Instruction *cur = &FI;
     if (isa<CallInst>(cur)) {
       MasterEndCI = cast<CallInst>(cur);
@@ -2988,8 +2998,7 @@ TEST_F(OpenMPIRBuilderTest, MaskedDirective) {
   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
   EXPECT_TRUE(EntryBr->isConditional());
   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
-  BasicBlock *FinalizeBB = ThenBB->getUniqueSuccessor();
-  BasicBlock *ExitBB = FinalizeBB->getUniqueSuccessor();
+  BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
 
   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
@@ -3001,7 +3010,7 @@ TEST_F(OpenMPIRBuilderTest, MaskedDirective) {
   EXPECT_TRUE(isa<GlobalVariable>(MaskedEntryCI->getArgOperand(0)));
 
   CallInst *MaskedEndCI = nullptr;
-  for (auto &FI : *FinalizeBB) {
+  for (auto &FI : *ThenBB) {
     Instruction *cur = &FI;
     if (isa<CallInst>(cur)) {
       MaskedEndCI = cast<CallInst>(cur);
@@ -3053,9 +3062,6 @@ TEST_F(OpenMPIRBuilderTest, CriticalDirective) {
                                 FINICB_WRAPPER(FiniCB), "testCRT", nullptr));
   Builder.restoreIP(AfterIP);
 
-  BasicBlock *FinalizeBB = EntryBB->getUniqueSuccessor();
-  EXPECT_NE(FinalizeBB, nullptr);
-
   CallInst *CriticalEntryCI = nullptr;
   for (auto &EI : *EntryBB) {
     Instruction *cur = &EI;
@@ -3072,7 +3078,7 @@ TEST_F(OpenMPIRBuilderTest, CriticalDirective) {
   EXPECT_TRUE(isa<GlobalVariable>(CriticalEntryCI->getArgOperand(0)));
 
   CallInst *CriticalEndCI = nullptr;
-  for (auto &FI : *FinalizeBB) {
+  for (auto &FI : *EntryBB) {
     Instruction *cur = &FI;
     if (isa<CallInst>(cur)) {
       CriticalEndCI = cast<CallInst>(cur);
@@ -3306,9 +3312,6 @@ TEST_F(OpenMPIRBuilderTest, OrderedDirectiveThreads) {
                                           FINICB_WRAPPER(FiniCB), true));
   Builder.restoreIP(AfterIP);
 
-  BasicBlock *FinalizeBB = EntryBB->getUniqueSuccessor();
-  EXPECT_NE(FinalizeBB, nullptr);
-
   Builder.CreateRetVoid();
   OMPBuilder.finalize();
   EXPECT_FALSE(verifyModule(*M, &errs()));
@@ -3331,7 +3334,7 @@ TEST_F(OpenMPIRBuilderTest, OrderedDirectiveThreads) {
   EXPECT_TRUE(isa<GlobalVariable>(OrderedEntryCI->getArgOperand(0)));
 
   CallInst *OrderedEndCI = nullptr;
-  for (auto &FI : *FinalizeBB) {
+  for (auto &FI : *EntryBB) {
     Instruction *Cur = &FI;
     if (isa<CallInst>(Cur)) {
       OrderedEndCI = cast<CallInst>(Cur);
@@ -3505,8 +3508,7 @@ TEST_F(OpenMPIRBuilderTest, SingleDirective) {
   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
   EXPECT_TRUE(EntryBr->isConditional());
   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
-  BasicBlock *FinalizeBB = ThenBB->getUniqueSuccessor();
-  BasicBlock *ExitBB = FinalizeBB->getUniqueSuccessor();
+  BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
 
   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
@@ -3518,7 +3520,7 @@ TEST_F(OpenMPIRBuilderTest, SingleDirective) {
   EXPECT_TRUE(isa<GlobalVariable>(SingleEntryCI->getArgOperand(0)));
 
   CallInst *SingleEndCI = nullptr;
-  for (auto &FI : *FinalizeBB) {
+  for (auto &FI : *ThenBB) {
     Instruction *cur = &FI;
     if (isa<CallInst>(cur)) {
       SingleEndCI = cast<CallInst>(cur);
@@ -3599,8 +3601,7 @@ TEST_F(OpenMPIRBuilderTest, SingleDirectiveNowait) {
   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
   EXPECT_TRUE(EntryBr->isConditional());
   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
-  BasicBlock *FinalizeBB = ThenBB->getUniqueSuccessor();
-  BasicBlock *ExitBB = FinalizeBB->getUniqueSuccessor();
+  BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
 
   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
@@ -3612,7 +3613,7 @@ TEST_F(OpenMPIRBuilderTest, SingleDirectiveNowait) {
   EXPECT_TRUE(isa<GlobalVariable>(SingleEntryCI->getArgOperand(0)));
 
   CallInst *SingleEndCI = nullptr;
-  for (auto &FI : *FinalizeBB) {
+  for (auto &FI : *ThenBB) {
     Instruction *cur = &FI;
     if (isa<CallInst>(cur)) {
       SingleEndCI = cast<CallInst>(cur);
@@ -3723,8 +3724,7 @@ TEST_F(OpenMPIRBuilderTest, SingleDirectiveCopyPrivate) {
   BranchInst *EntryBr = cast<BranchInst>(EntryBB->getTerminator());
   EXPECT_TRUE(EntryBr->isConditional());
   EXPECT_EQ(EntryBr->getSuccessor(0), ThenBB);
-  BasicBlock *FinalizeBB = ThenBB->getUniqueSuccessor();
-  BasicBlock *ExitBB = FinalizeBB->getUniqueSuccessor();
+  BasicBlock *ExitBB = ThenBB->getUniqueSuccessor();
   EXPECT_EQ(EntryBr->getSuccessor(1), ExitBB);
 
   CmpInst *CondInst = cast<CmpInst>(EntryBr->getCondition());
@@ -3743,28 +3743,25 @@ TEST_F(OpenMPIRBuilderTest, SingleDirectiveCopyPrivate) 
{
   EXPECT_EQ(PrivLI->getPointerOperand(), PrivAI);
   // icmp
   EXPECT_TRUE(ThenBBI.next<ICmpInst>());
-
-  // check FinalizeBB
-  BBInstIter FinalizeBBI(FinalizeBB);
   // store 1, DidIt
-  auto *DidItSI = FinalizeBBI.next<StoreInst>();
+  auto *DidItSI = ThenBBI.next<StoreInst>();
   EXPECT_NE(DidItSI, nullptr);
   EXPECT_EQ(DidItSI->getValueOperand(),
             ConstantInt::get(Type::getInt32Ty(Ctx), 1));
   Value *DidIt = DidItSI->getPointerOperand();
   // call __kmpc_end_single
-  auto *SingleEndCI = FinalizeBBI.next<CallInst>();
+  auto *SingleEndCI = ThenBBI.next<CallInst>();
   EXPECT_NE(SingleEndCI, nullptr);
   EXPECT_EQ(SingleEndCI->getCalledFunction()->getName(), "__kmpc_end_single");
   EXPECT_EQ(SingleEndCI->arg_size(), 2U);
   EXPECT_TRUE(isa<GlobalVariable>(SingleEndCI->getArgOperand(0)));
   EXPECT_EQ(SingleEndCI->getArgOperand(1), SingleEntryCI->getArgOperand(1));
   // br ExitBB
-  auto *ExitBBBI = FinalizeBBI.next<BranchInst>();
+  auto *ExitBBBI = ThenBBI.next<BranchInst>();
   EXPECT_NE(ExitBBBI, nullptr);
   EXPECT_TRUE(ExitBBBI->isUnconditional());
   EXPECT_EQ(ExitBBBI->getOperand(0), ExitBB);
-  EXPECT_FALSE(FinalizeBBI.hasNext());
+  EXPECT_FALSE(ThenBBI.hasNext());
 
   // check ExitBB
   BBInstIter ExitBBI(ExitBB);

diff  --git 
a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp 
b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index cdab9f87a8758..0d5b553c8e652 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2729,7 +2729,6 @@ convertOmpParallel(omp::ParallelOp opInst, 
llvm::IRBuilderBase &builder,
   ArrayRef<bool> isByRef = getIsByRef(opInst.getReductionByref());
   assert(isByRef.size() == opInst.getNumReductionVars());
   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
-  bool isCancellable = constructIsCancellable(opInst);
 
   if (failed(checkImplementationStatus(*opInst)))
     return failure();
@@ -2868,18 +2867,6 @@ convertOmpParallel(omp::ParallelOp opInst, 
llvm::IRBuilderBase &builder,
                                   privateVarsInfo.privatizers)))
       return llvm::make_error<PreviouslyReportedError>();
 
-    // If we could be performing cancellation, add the cancellation barrier on
-    // the way out of the outlined region.
-    if (isCancellable) {
-      auto IPOrErr = ompBuilder->createBarrier(
-          llvm::OpenMPIRBuilder::LocationDescription(builder),
-          llvm::omp::Directive::OMPD_unknown,
-          /* ForceSimpleCall */ false,
-          /* CheckCancelFlag */ false);
-      if (!IPOrErr)
-        return IPOrErr.takeError();
-    }
-
     builder.restoreIP(oldIP);
     return llvm::Error::success();
   };
@@ -2893,6 +2880,7 @@ convertOmpParallel(omp::ParallelOp opInst, 
llvm::IRBuilderBase &builder,
   auto pbKind = llvm::omp::OMP_PROC_BIND_default;
   if (auto bind = opInst.getProcBindKind())
     pbKind = getProcBindKind(*bind);
+  bool isCancellable = constructIsCancellable(opInst);
 
   llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
       findAllocaInsertPoint(builder, moduleTranslation);

diff  --git a/mlir/test/Target/LLVMIR/openmp-barrier-cancel.mlir 
b/mlir/test/Target/LLVMIR/openmp-barrier-cancel.mlir
index 6585549de7f96..c4b245667a1f3 100644
--- a/mlir/test/Target/LLVMIR/openmp-barrier-cancel.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-barrier-cancel.mlir
@@ -29,24 +29,22 @@ llvm.func @test() {
 // CHECK:         %[[VAL_14:.*]] = icmp eq i32 %[[VAL_13]], 0
 // CHECK:         br i1 %[[VAL_14]], label %[[VAL_15:.*]], label %[[VAL_16:.*]]
 // CHECK:       omp.par.region1.cncl:                             ; preds = 
%[[VAL_11]]
-// CHECK:         br label %[[FINI:.*]]
-// CHECK:       .fini:
-// CHECK:         %[[TID:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
-// CHECK:         %[[CNCL_BARRIER:.*]] = call i32 @__kmpc_cancel_barrier(ptr 
@2, i32 %[[TID]])
-// CHECK:         br label %[[EXIT_STUB:.*]]
+// CHECK:         %[[VAL_17:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
+// CHECK:         %[[VAL_18:.*]] = call i32 @__kmpc_cancel_barrier(ptr @2, i32 
%[[VAL_17]])
+// CHECK:         br label %[[VAL_19:.*]]
 // CHECK:       omp.par.region1.split:                            ; preds = 
%[[VAL_11]]
 // CHECK:         %[[VAL_20:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
 // CHECK:         %[[VAL_21:.*]] = call i32 @__kmpc_cancel_barrier(ptr @3, i32 
%[[VAL_20]])
 // CHECK:         %[[VAL_22:.*]] = icmp eq i32 %[[VAL_21]], 0
 // CHECK:         br i1 %[[VAL_22]], label %[[VAL_23:.*]], label %[[VAL_24:.*]]
 // CHECK:       omp.par.region1.split.cncl:                       ; preds = 
%[[VAL_15]]
-// CHECK:         br label %[[FINI]]
+// CHECK:         br label %[[VAL_19]]
 // CHECK:       omp.par.region1.split.cont:                       ; preds = 
%[[VAL_15]]
 // CHECK:         br label %[[VAL_25:.*]]
 // CHECK:       omp.region.cont:                                  ; preds = 
%[[VAL_23]]
 // CHECK:         br label %[[VAL_26:.*]]
 // CHECK:       omp.par.pre_finalize:                             ; preds = 
%[[VAL_25]]
-// CHECK:         br label %[[FINI]]
-// CHECK:       omp.par.exit.exitStub:
+// CHECK:         br label %[[VAL_19]]
+// CHECK:       omp.par.exit.exitStub:                            ; preds = 
%[[VAL_26]], %[[VAL_24]], %[[VAL_16]]
 // CHECK:         ret void
 

diff  --git a/mlir/test/Target/LLVMIR/openmp-cancel.mlir 
b/mlir/test/Target/LLVMIR/openmp-cancel.mlir
index 5e20b8793f499..21241702ad569 100644
--- a/mlir/test/Target/LLVMIR/openmp-cancel.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-cancel.mlir
@@ -24,18 +24,16 @@ llvm.func @cancel_parallel() {
 // CHECK:         %[[VAL_15:.*]] = icmp eq i32 %[[VAL_14]], 0
 // CHECK:         br i1 %[[VAL_15]], label %[[VAL_16:.*]], label %[[VAL_17:.*]]
 // CHECK:       omp.par.region1.cncl:                             ; preds = 
%[[VAL_12]]
-// CHECK:         br label %[[VAL_20:.*]]
-// CHECK:       .fini:
 // CHECK:         %[[VAL_18:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
 // CHECK:         %[[VAL_19:.*]] = call i32 @__kmpc_cancel_barrier(ptr @2, i32 
%[[VAL_18]])
-// CHECK:         br label %[[EXIT_STUB:.*]]
+// CHECK:         br label %[[VAL_20:.*]]
 // CHECK:       omp.par.region1.split:                            ; preds = 
%[[VAL_12]]
 // CHECK:         br label %[[VAL_21:.*]]
 // CHECK:       omp.region.cont:                                  ; preds = 
%[[VAL_16]]
 // CHECK:         br label %[[VAL_22:.*]]
 // CHECK:       omp.par.pre_finalize:                             ; preds = 
%[[VAL_21]]
 // CHECK:         br label %[[VAL_20]]
-// CHECK:       omp.par.exit.exitStub:
+// CHECK:       omp.par.exit.exitStub:                            ; preds = 
%[[VAL_22]], %[[VAL_17]]
 // CHECK:         ret void
 
 llvm.func @cancel_parallel_if(%arg0 : i1) {
@@ -69,20 +67,18 @@ llvm.func @cancel_parallel_if(%arg0 : i1) {
 // CHECK:         br label %[[VAL_26:.*]]
 // CHECK:       omp.par.pre_finalize:                             ; preds = 
%[[VAL_25]]
 // CHECK:         br label %[[VAL_27:.*]]
-// CHECK:       .fini:
-// CHECK:         %[[VAL_32:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
-// CHECK:         %[[VAL_33:.*]] = call i32 @__kmpc_cancel_barrier(ptr @2, i32 
%[[VAL_32]])
-// CHECK:         br label %[[EXIT_STUB:.*]]
-// CHECK:       6:                                                ; preds = 
%[[VAL_20]]
+// CHECK:       5:                                                ; preds = 
%[[VAL_20]]
 // CHECK:         %[[VAL_28:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
 // CHECK:         %[[VAL_29:.*]] = call i32 @__kmpc_cancel(ptr @1, i32 
%[[VAL_28]], i32 1)
 // CHECK:         %[[VAL_30:.*]] = icmp eq i32 %[[VAL_29]], 0
 // CHECK:         br i1 %[[VAL_30]], label %[[VAL_24]], label %[[VAL_31:.*]]
 // CHECK:       .cncl:                                            ; preds = 
%[[VAL_21]]
+// CHECK:         %[[VAL_32:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
+// CHECK:         %[[VAL_33:.*]] = call i32 @__kmpc_cancel_barrier(ptr @2, i32 
%[[VAL_32]])
 // CHECK:         br label %[[VAL_27]]
 // CHECK:       .split:                                           ; preds = 
%[[VAL_21]]
 // CHECK:         br label %[[VAL_23]]
-// CHECK:       omp.par.exit.exitStub:
+// CHECK:       omp.par.exit.exitStub:                            ; preds = 
%[[VAL_31]], %[[VAL_26]]
 // CHECK:         ret void
 
 llvm.func @cancel_sections_if(%cond : i1) {
@@ -149,12 +145,14 @@ llvm.func @cancel_sections_if(%cond : i1) {
 // CHECK:       omp_section_loop.inc:                             ; preds = 
%[[VAL_23]]
 // CHECK:         %[[VAL_15]] = add nuw i32 %[[VAL_14]], 1
 // CHECK:         br label %[[VAL_12]]
-// CHECK:       omp_section_loop.exit:
+// CHECK:       omp_section_loop.exit:                            ; preds = 
%[[VAL_33]], %[[VAL_16]]
 // CHECK:         call void @__kmpc_for_static_fini(ptr @1, i32 %[[VAL_7]])
 // CHECK:         %[[VAL_36:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
 // CHECK:         call void @__kmpc_barrier(ptr @2, i32 %[[VAL_36]])
 // CHECK:         br label %[[VAL_37:.*]]
 // CHECK:       omp_section_loop.after:                           ; preds = 
%[[VAL_19]]
+// CHECK:         br label %[[VAL_38:.*]]
+// CHECK:       omp_section_loop.aftersections.fini:              ; preds = 
%[[VAL_37]]
 // CHECK:         ret void
 // CHECK:       .cncl:                                            ; preds = 
%[[VAL_27]]
 // CHECK:         br label %[[VAL_19]]
@@ -234,7 +232,7 @@ llvm.func @cancel_wsloop_if(%lb : i32, %ub : i32, %step : 
i32, %cond : i1) {
 // CHECK:       omp_loop.inc:                                     ; preds = 
%[[VAL_52]]
 // CHECK:         %[[VAL_34]] = add nuw i32 %[[VAL_33]], 1
 // CHECK:         br label %[[VAL_31]]
-// CHECK:       omp_loop.exit:
+// CHECK:       omp_loop.exit:                                    ; preds = 
%[[VAL_50]], %[[VAL_35]]
 // CHECK:         call void @__kmpc_for_static_fini(ptr @1, i32 %[[VAL_26]])
 // CHECK:         %[[VAL_53:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
 // CHECK:         call void @__kmpc_barrier(ptr @2, i32 %[[VAL_53]])

diff  --git a/mlir/test/Target/LLVMIR/openmp-cancellation-point.mlir 
b/mlir/test/Target/LLVMIR/openmp-cancellation-point.mlir
index 93fa2064ab99a..5e0d3f9f7e293 100644
--- a/mlir/test/Target/LLVMIR/openmp-cancellation-point.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-cancellation-point.mlir
@@ -24,18 +24,16 @@ llvm.func @cancellation_point_parallel() {
 // CHECK:         %[[VAL_15:.*]] = icmp eq i32 %[[VAL_14]], 0
 // CHECK:         br i1 %[[VAL_15]], label %[[VAL_16:.*]], label %[[VAL_17:.*]]
 // CHECK:       omp.par.region1.cncl:                             ; preds = 
%[[VAL_12]]
-// CHECK:         br label %[[FINI:.*]]
-// CHECK:       .fini:
 // CHECK:         %[[VAL_18:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
 // CHECK:         %[[VAL_19:.*]] = call i32 @__kmpc_cancel_barrier(ptr @2, i32 
%[[VAL_18]])
-// CHECK:         br label %[[EXIT_STUB:.*]]
+// CHECK:         br label %[[VAL_20:.*]]
 // CHECK:       omp.par.region1.split:                            ; preds = 
%[[VAL_12]]
 // CHECK:         br label %[[VAL_21:.*]]
 // CHECK:       omp.region.cont:                                  ; preds = 
%[[VAL_16]]
 // CHECK:         br label %[[VAL_22:.*]]
 // CHECK:       omp.par.pre_finalize:                             ; preds = 
%[[VAL_21]]
-// CHECK:         br label %[[FINI]]
-// CHECK:       omp.par.exit.exitStub:
+// CHECK:         br label %[[VAL_20]]
+// CHECK:       omp.par.exit.exitStub:                            ; preds = 
%[[VAL_22]], %[[VAL_17]]
 // CHECK:         ret void
 
 llvm.func @cancellation_point_sections() {
@@ -96,12 +94,14 @@ llvm.func @cancellation_point_sections() {
 // CHECK:       omp_section_loop.inc:                             ; preds = 
%[[VAL_46]]
 // CHECK:         %[[VAL_38]] = add nuw i32 %[[VAL_37]], 1
 // CHECK:         br label %[[VAL_35]]
-// CHECK:       omp_section_loop.exit:
+// CHECK:       omp_section_loop.exit:                            ; preds = 
%[[VAL_53]], %[[VAL_39]]
 // CHECK:         call void @__kmpc_for_static_fini(ptr @1, i32 %[[VAL_30]])
 // CHECK:         %[[VAL_55:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
 // CHECK:         call void @__kmpc_barrier(ptr @2, i32 %[[VAL_55]])
 // CHECK:         br label %[[VAL_56:.*]]
 // CHECK:       omp_section_loop.after:                           ; preds = 
%[[VAL_42]]
+// CHECK:         br label %[[VAL_57:.*]]
+// CHECK:       omp_section_loop.aftersections.fini:              ; preds = 
%[[VAL_56]]
 // CHECK:         ret void
 // CHECK:       omp.section.region.cncl:                          ; preds = 
%[[VAL_48]]
 // CHECK:         br label %[[VAL_42]]
@@ -175,7 +175,7 @@ llvm.func @cancellation_point_wsloop(%lb : i32, %ub : i32, 
%step : i32) {
 // CHECK:       omp_loop.inc:                                     ; preds = 
%[[VAL_106]]
 // CHECK:         %[[VAL_92]] = add nuw i32 %[[VAL_91]], 1
 // CHECK:         br label %[[VAL_89]]
-// CHECK:       omp_loop.exit:
+// CHECK:       omp_loop.exit:                                    ; preds = 
%[[VAL_105]], %[[VAL_93]]
 // CHECK:         call void @__kmpc_for_static_fini(ptr @1, i32 %[[VAL_84]])
 // CHECK:         %[[VAL_107:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
 // CHECK:         call void @__kmpc_barrier(ptr @2, i32 %[[VAL_107]])

diff  --git a/mlir/test/Target/LLVMIR/openmp-outline-infinite-loop.mlir 
b/mlir/test/Target/LLVMIR/openmp-outline-infinite-loop.mlir
index 99f37c7e79be8..faccfc678adfe 100644
--- a/mlir/test/Target/LLVMIR/openmp-outline-infinite-loop.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-outline-infinite-loop.mlir
@@ -21,11 +21,9 @@ llvm.func @parallel_infinite_loop() -> () {
 // CHECK:       omp.region.cont:                                  ; No 
predecessors!
 // CHECK:         br label %[[VAL_4:.*]]
 // CHECK:       omp.par.pre_finalize:                             ; preds = 
%[[VAL_5:.*]]
-// CHECK:         br label %[[FINI:.*]]
-// CHECK:       [[OMP_PAR_EXIT:omp.par.exit]]:                                 
    ; preds = %[[FINI]]
+// CHECK:         br label %[[VAL_6:.*]]
+// CHECK:       omp.par.exit:                                     ; preds = 
%[[VAL_4]]
 // CHECK:         ret void
-// CHECK:       [[FINI]]:
-// CHECK:         br label %[[OMP_PAR_EXIT]]
 // CHECK:       }
 
 // CHECK-LABEL: define internal void @parallel_infinite_loop..omp_par(

diff  --git a/mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir 
b/mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir
index c79c369b69d7f..887d2977e45cc 100644
--- a/mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-parallel-reduction-multiblock.mlir
@@ -108,8 +108,6 @@ llvm.func @missordered_blocks_(%arg0: !llvm.ptr 
{fir.bindc_name = "x"}, %arg1: !
 // CHECK:       reduce.finalize:                                  ; preds = 
%[[VAL_49]], %[[VAL_43]]
 // CHECK:         br label %[[VAL_53:.*]]
 // CHECK:       omp.par.pre_finalize:                             ; preds = 
%[[VAL_48]]
-// CHECK:         br label %[[FINI:.*]]
-// CHECK:       .fini:
 // CHECK:         %[[VAL_54:.*]] = load ptr, ptr %[[VAL_20]], align 8
 // CHECK:         %[[VAL_55:.*]] = load ptr, ptr %[[VAL_21]], align 8
 // CHECK:         br label %[[VAL_56:.*]]
@@ -117,5 +115,5 @@ llvm.func @missordered_blocks_(%arg0: !llvm.ptr 
{fir.bindc_name = "x"}, %arg1: !
 // CHECK:         br label %[[VAL_38]]
 // CHECK:       omp.reduction.neutral1:                           ; preds = 
%[[VAL_25]]
 // CHECK:         br label %[[VAL_30]]
-// CHECK:       omp.par.exit.exitStub:                            ; preds = 
%[[FINI]]
+// CHECK:       omp.par.exit.exitStub:                            ; preds = 
%[[VAL_53]]
 // CHECK:         ret void

diff  --git a/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir 
b/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir
index 13f52f054869e..b302b4b20edd5 100644
--- a/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-reduction-array-sections.mlir
@@ -127,6 +127,8 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr 
{fir.bindc_name = "x"}) attribute
 // CHECK:         call void @__kmpc_barrier(ptr @2, i32 %[[VAL_36]])
 // CHECK:         br label %[[VAL_37:.*]]
 // CHECK:       omp_section_loop.after:                           ; preds = 
%[[VAL_35]]
+// CHECK:         br label %[[VAL_38:.*]]
+// CHECK:       omp_section_loop.aftersections.fini:              ; preds = 
%[[VAL_37]]
 // CHECK:         %[[VAL_39:.*]] = getelementptr inbounds [1 x ptr], ptr 
%[[VAL_14]], i64 0, i64 0
 // CHECK:         store ptr %[[VAL_21]], ptr %[[VAL_39]], align 8
 // CHECK:         %[[VAL_40:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
@@ -135,9 +137,9 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr 
{fir.bindc_name = "x"}) attribute
 // CHECK:           i32 1, label %[[VAL_43:.*]]
 // CHECK:           i32 2, label %[[VAL_44:.*]]
 // CHECK:         ]
-// CHECK:       reduce.switch.atomic:                             ; preds = 
%[[VAL_37]]
+// CHECK:       reduce.switch.atomic:                             ; preds = 
%[[VAL_38]]
 // CHECK:         unreachable
-// CHECK:       reduce.switch.nonatomic:                          ; preds = 
%[[VAL_37]]
+// CHECK:       reduce.switch.nonatomic:                          ; preds = 
%[[VAL_38]]
 // CHECK:         %[[VAL_45:.*]] = load ptr, ptr %[[VAL_21]], align 8
 // CHECK:         br label %[[VAL_46:.*]]
 // CHECK:       omp.reduction.nonatomic.body:                     ; preds = 
%[[VAL_43]]
@@ -155,7 +157,7 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr 
{fir.bindc_name = "x"}) attribute
 // CHECK:       omp.reduction.nonatomic.body17:                   ; preds = 
%[[VAL_47]]
 // CHECK:         %[[VAL_50]] = sub i64 %[[VAL_49]], 1
 // CHECK:         br label %[[VAL_47]]
-// CHECK:       reduce.finalize:                                  ; preds = 
%[[VAL_53]], %[[VAL_37]]
+// CHECK:       reduce.finalize:                                  ; preds = 
%[[VAL_53]], %[[VAL_38]]
 // CHECK:         %[[VAL_55:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
 // CHECK:         call void @__kmpc_barrier(ptr @2, i32 %[[VAL_55]])
 // CHECK:         %[[VAL_56:.*]] = load ptr, ptr %[[VAL_21]], align 8
@@ -171,9 +173,7 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr 
{fir.bindc_name = "x"}) attribute
 // CHECK:       omp.region.cont:                                  ; preds = 
%[[VAL_62]]
 // CHECK:         br label %[[VAL_64:.*]]
 // CHECK:       omp.par.pre_finalize:                             ; preds = 
%[[VAL_63]]
-// CHECK:         br label %[[FINI:.fini.*]]
-// CHECK:       [[FINI]]:
-// CHECK:         br label %[[EXIT:.*]]
+// CHECK:         br label %[[VAL_65:.*]]
 // CHECK:       omp.reduction.cleanup21:                          ; preds = 
%[[VAL_57]]
 // CHECK:         br label %[[VAL_61]]
 // CHECK:       omp_section_loop.body:                            ; preds = 
%[[VAL_32]]
@@ -219,5 +219,5 @@ llvm.func @sectionsreduction_(%arg0: !llvm.ptr 
{fir.bindc_name = "x"}) attribute
 // CHECK:       omp_section_loop.inc:                             ; preds = 
%[[VAL_69]]
 // CHECK:         %[[VAL_31]] = add nuw i32 %[[VAL_30]], 1
 // CHECK:         br label %[[VAL_28]]
-// CHECK:       omp.par.exit.exitStub:                            ; preds = 
%[[FINI]]
+// CHECK:       omp.par.exit.exitStub:                            ; preds = 
%[[VAL_64]]
 // CHECK:         ret void

diff  --git a/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir 
b/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir
index cb30d3b2f4473..a714ca68a1e95 100644
--- a/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir
@@ -96,10 +96,8 @@ module {
 // CHECK:       reduce.finalize:                                  ; preds = 
%[[VAL_34]], %[[VAL_28]]
 // CHECK:         br label %[[VAL_38:.*]]
 // CHECK:       omp.par.pre_finalize:                             ; preds = 
%[[VAL_33]]
-// CHECK:         br label %[[FINI:.*]]
-// CHECK:       [[FINI]]:
 // CHECK:         br label %[[VAL_39:.*]]
-// CHECK:       omp.par.exit.exitStub:                            ; preds = 
%[[FINI]]
+// CHECK:       omp.par.exit.exitStub:                            ; preds = 
%[[VAL_38]]
 // CHECK:         ret void
 // CHECK:         %[[VAL_40:.*]] = getelementptr inbounds [2 x ptr], ptr 
%[[VAL_41:.*]], i64 0, i64 0
 // CHECK:         %[[VAL_42:.*]] = load ptr, ptr %[[VAL_40]], align 8

diff  --git a/mlir/test/Target/LLVMIR/openmp-reduction-sections.mlir 
b/mlir/test/Target/LLVMIR/openmp-reduction-sections.mlir
index 00f6c1b02206e..19da6f8517fcd 100644
--- a/mlir/test/Target/LLVMIR/openmp-reduction-sections.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-reduction-sections.mlir
@@ -86,6 +86,8 @@ llvm.func @sections_(%arg0: !llvm.ptr {fir.bindc_name = "x"}) 
attributes {fir.in
 // CHECK:         call void @__kmpc_barrier(ptr @2, i32 %[[VAL_40]])
 // CHECK:         br label %[[VAL_41:.*]]
 // CHECK:       omp_section_loop.after:                           ; preds = 
%[[VAL_39]]
+// CHECK:         br label %[[VAL_42:.*]]
+// CHECK:       omp_section_loop.aftersections.fini:              ; preds = 
%[[VAL_41]]
 // CHECK:         %[[VAL_43:.*]] = getelementptr inbounds [1 x ptr], ptr 
%[[VAL_21]], i64 0, i64 0
 // CHECK:         store ptr %[[VAL_20]], ptr %[[VAL_43]], align 8
 // CHECK:         %[[VAL_44:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
@@ -94,25 +96,23 @@ llvm.func @sections_(%arg0: !llvm.ptr {fir.bindc_name = 
"x"}) attributes {fir.in
 // CHECK:           i32 1, label %[[VAL_47:.*]]
 // CHECK:           i32 2, label %[[VAL_48:.*]]
 // CHECK:         ]
-// CHECK:       reduce.switch.atomic:                             ; preds = 
%[[VAL_41]]
+// CHECK:       reduce.switch.atomic:                             ; preds = 
%[[VAL_42]]
 // CHECK:         unreachable
-// CHECK:       reduce.switch.nonatomic:                          ; preds = 
%[[VAL_41]]
+// CHECK:       reduce.switch.nonatomic:                          ; preds = 
%[[VAL_42]]
 // CHECK:         %[[VAL_49:.*]] = load float, ptr %[[VAL_11]], align 4
 // CHECK:         %[[VAL_50:.*]] = load float, ptr %[[VAL_20]], align 4
 // CHECK:         %[[VAL_51:.*]] = fadd contract float %[[VAL_49]], %[[VAL_50]]
 // CHECK:         store float %[[VAL_51]], ptr %[[VAL_11]], align 4
 // CHECK:         call void @__kmpc_end_reduce(ptr @1, i32 %[[VAL_44]], ptr 
@.gomp_critical_user_.reduction.var)
 // CHECK:         br label %[[VAL_46]]
-// CHECK:       reduce.finalize:                                  ; preds = 
%[[VAL_47]], %[[VAL_41]]
+// CHECK:       reduce.finalize:                                  ; preds = 
%[[VAL_47]], %[[VAL_42]]
 // CHECK:         %[[VAL_52:.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
 // CHECK:         call void @__kmpc_barrier(ptr @2, i32 %[[VAL_52]])
 // CHECK:         br label %[[VAL_53:.*]]
 // CHECK:       omp.region.cont:                                  ; preds = 
%[[VAL_46]]
 // CHECK:         br label %[[VAL_54:.*]]
 // CHECK:       omp.par.pre_finalize:                             ; preds = 
%[[VAL_53]]
-// CHECK:         br label %[[FINI:.fini.*]]
-// CHECK:       [[FINI]]:
-// CHECK:         br label %[[EXIT:.*]]
+// CHECK:         br label %[[VAL_55:.*]]
 // CHECK:       omp_section_loop.body:                            ; preds = 
%[[VAL_36]]
 // CHECK:         %[[VAL_56:.*]] = add i32 %[[VAL_34]], %[[VAL_28]]
 // CHECK:         %[[VAL_57:.*]] = mul i32 %[[VAL_56]], 1
@@ -144,10 +144,8 @@ llvm.func @sections_(%arg0: !llvm.ptr {fir.bindc_name = 
"x"}) attributes {fir.in
 // CHECK:       omp_section_loop.inc:                             ; preds = 
%[[VAL_59]]
 // CHECK:         %[[VAL_35]] = add nuw i32 %[[VAL_34]], 1
 // CHECK:         br label %[[VAL_32]]
-// CHECK:       omp.par.exit.exitStub:                            ; preds = 
%[[FINI]]
+// CHECK:       omp.par.exit.exitStub:                            ; preds = 
%[[VAL_54]]
 // CHECK:         ret void
-
-// CHECK-LABEL: define internal void @.omp.reduction.func
 // CHECK:         %[[VAL_70:.*]] = getelementptr inbounds [1 x ptr], ptr 
%[[VAL_71:.*]], i64 0, i64 0
 // CHECK:         %[[VAL_72:.*]] = load ptr, ptr %[[VAL_70]], align 8
 // CHECK:         %[[VAL_73:.*]] = load float, ptr %[[VAL_72]], align 4


        
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to