peixin updated this revision to Diff 364389.
peixin added a comment.

@Meinersbur Thanks for the review.

Removed the typo fixes and gave the alloca register one name.

Also found some typos of invalid case style for variable 'cur' in 
llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp and will fix them when pushing 
this patch to main branch.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107430/new/

https://reviews.llvm.org/D107430

Files:
  clang/lib/CodeGen/CGStmtOpenMP.cpp
  clang/lib/Sema/SemaOpenMP.cpp
  clang/test/OpenMP/ordered_codegen.cpp
  llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
  llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
  llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp

Index: llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
===================================================================
--- llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -2113,6 +2113,77 @@
   EXPECT_EQ(CriticalEndCI->getArgOperand(2)->getType(), CriticalNamePtrTy);
 }
 
+TEST_F(OpenMPIRBuilderTest, OrderedDirective) {
+  using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+  OpenMPIRBuilder OMPBuilder(*M);
+  OMPBuilder.initialize();
+  F->setName("func");
+  IRBuilder<> Builder(BB);
+
+  OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
+
+  AllocaInst *PrivAI =
+      Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst");
+
+  BasicBlock *EntryBB = nullptr;
+
+  auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+                       BasicBlock &FiniBB) {
+    EntryBB = FiniBB.getUniquePredecessor();
+
+    llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
+    llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
+    EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
+    EXPECT_EQ(EntryBB, CodeGenIPBB);
+
+    Builder.restoreIP(CodeGenIP);
+    Builder.CreateStore(F->arg_begin(), PrivAI);
+    Value *PrivLoad =
+        Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
+    Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
+  };
+
+  auto FiniCB = [&](InsertPointTy IP) {
+    BasicBlock *IPBB = IP.getBlock();
+    EXPECT_NE(IPBB->end(), IP.getPoint());
+  };
+
+  Builder.restoreIP(OMPBuilder.createOrdered(Builder, BodyGenCB, FiniCB));
+
+  Value *EntryBBTI = EntryBB->getTerminator();
+  EXPECT_EQ(EntryBBTI, nullptr);
+
+  CallInst *OrderedEntryCI = nullptr;
+  for (auto &EI : *EntryBB) {
+    Instruction *Cur = &EI;
+    if (isa<CallInst>(Cur)) {
+      OrderedEntryCI = cast<CallInst>(Cur);
+      if (OrderedEntryCI->getCalledFunction()->getName() == "__kmpc_ordered")
+        break;
+      OrderedEntryCI = nullptr;
+    }
+  }
+  EXPECT_NE(OrderedEntryCI, nullptr);
+  EXPECT_EQ(OrderedEntryCI->getNumArgOperands(), 2U);
+  EXPECT_EQ(OrderedEntryCI->getCalledFunction()->getName(), "__kmpc_ordered");
+  EXPECT_TRUE(isa<GlobalVariable>(OrderedEntryCI->getArgOperand(0)));
+
+  CallInst *OrderedEndCI = nullptr;
+  for (auto &FI : *EntryBB) {
+    Instruction *Cur = &FI;
+    if (isa<CallInst>(Cur)) {
+      OrderedEndCI = cast<CallInst>(Cur);
+      if (OrderedEndCI->getCalledFunction()->getName() == "__kmpc_end_ordered")
+        break;
+      OrderedEndCI = nullptr;
+    }
+  }
+  EXPECT_NE(OrderedEndCI, nullptr);
+  EXPECT_EQ(OrderedEndCI->getNumArgOperands(), 2U);
+  EXPECT_TRUE(isa<GlobalVariable>(OrderedEndCI->getArgOperand(0)));
+  EXPECT_EQ(OrderedEndCI->getArgOperand(1), OrderedEntryCI->getArgOperand(1));
+}
+
 TEST_F(OpenMPIRBuilderTest, CopyinBlocks) {
   OpenMPIRBuilder OMPBuilder(*M);
   OMPBuilder.initialize();
Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
===================================================================
--- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -1960,6 +1960,30 @@
                               /*Conditional*/ false, /*hasFinalize*/ true);
 }
 
+OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::createOrdered(const LocationDescription &Loc,
+                               BodyGenCallbackTy BodyGenCB,
+                               FinalizeCallbackTy FiniCB) {
+  if (!updateToLocation(Loc))
+    return Loc.IP;
+
+  Directive OMPD = Directive::OMPD_ordered;
+  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+  Value *Ident = getOrCreateIdent(SrcLocStr);
+  Value *ThreadId = getOrCreateThreadID(Ident);
+  Value *Args[] = {Ident, ThreadId};
+
+  Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_ordered);
+  Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
+
+  Function *ExitRTLFn =
+      getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_ordered);
+  Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
+
+  return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
+                              /*Conditional*/ false, /*hasFinalize*/ true);
+}
+
 OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion(
     Directive OMPD, Instruction *EntryCall, Instruction *ExitCall,
     BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional,
Index: llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
===================================================================
--- llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -724,6 +724,17 @@
                                FinalizeCallbackTy FiniCB,
                                StringRef CriticalName, Value *HintInst);
 
+  /// Generator for '#omp ordered'
+  ///
+  /// \param Loc The insert and source location description.
+  /// \param BodyGenCB Callback that will generate the region code.
+  /// \param FiniCB Callback to finalize variable copies.
+  ///
+  /// \returns The insertion position *after* the ordered.
+  InsertPointTy createOrdered(const LocationDescription &Loc,
+                              BodyGenCallbackTy BodyGenCB,
+                              FinalizeCallbackTy FiniCB);
+
   /// Generator for '#omp sections'
   ///
   /// \param Loc The insert and source location description.
Index: clang/test/OpenMP/ordered_codegen.cpp
===================================================================
--- clang/test/OpenMP/ordered_codegen.cpp
+++ clang/test/OpenMP/ordered_codegen.cpp
@@ -1,11 +1,19 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix=CHECK1
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=CHECK1,CHECK1-NORMAL
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK2
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK2,CHECK2-NORMAL
 
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -fopenmp-version=45 -o - | FileCheck %s --check-prefix=CHECK3
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=CHECK1,CHECK1-IRBUILDER
+// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK2,CHECK2-IRBUILDER
+
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -fopenmp-version=45 -o - | FileCheck %s --check-prefixes=CHECK3,CHECK3-NORMAL
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -fopenmp-version=45 -o %t %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK4
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK4,CHECK4-NORMAL
+
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -fopenmp-version=45 -o - | FileCheck %s --check-prefixes=CHECK3,CHECK3-IRBUILDER
+// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -fopenmp-version=45 -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK4,CHECK4-IRBUILDER
 
 // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix=CHECK5
 // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
@@ -128,7 +136,7 @@
 // CHECK1-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+// CHECK1-NORMAL-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
 // CHECK1-NEXT:    store float* [[A]], float** [[A_ADDR]], align 8
 // CHECK1-NEXT:    store float* [[B]], float** [[B_ADDR]], align 8
 // CHECK1-NEXT:    store float* [[C]], float** [[C_ADDR]], align 8
@@ -137,9 +145,12 @@
 // CHECK1-NEXT:    store i32 4571423, i32* [[DOTOMP_UB]], align 4
 // CHECK1-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
 // CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT:    call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
+// CHECK1-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]])
+// CHECK1-IRBUILDER-NEXT:    call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
+// CHECK1-NORMAL-NEXT:    call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
 // CHECK1-NEXT:    br label [[OMP_DISPATCH_COND:%.*]]
 // CHECK1:       omp.dispatch.cond:
+// CHECK1-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]])
 // CHECK1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
 // CHECK1-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
 // CHECK1-NEXT:    br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -157,6 +168,7 @@
 // CHECK1-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP5]], 7
 // CHECK1-NEXT:    [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]]
 // CHECK1-NEXT:    store i32 [[SUB]], i32* [[I]], align 4
+// CHECK1-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK1-NEXT:    call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK1-NEXT:    [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8
 // CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[I]], align 4
@@ -188,6 +200,7 @@
 // CHECK1-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP17]], 1
 // CHECK1-NEXT:    store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
+// CHECK1-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]])
 // CHECK1-NEXT:    call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK1:       omp.inner.for.end:
@@ -195,7 +208,9 @@
 // CHECK1:       omp.dispatch.inc:
 // CHECK1-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK1:       omp.dispatch.end:
-// CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
+// CHECK1-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-IRBUILDER-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]])
+// CHECK1-NORMAL-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -213,7 +228,7 @@
 // CHECK1-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
 // CHECK1-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[I:%.*]] = alloca i64, align 8
-// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-NORMAL-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK1-NEXT:    store float* [[A]], float** [[A_ADDR]], align 8
 // CHECK1-NEXT:    store float* [[B]], float** [[B_ADDR]], align 8
 // CHECK1-NEXT:    store float* [[C]], float** [[C_ADDR]], align 8
@@ -222,9 +237,11 @@
 // CHECK1-NEXT:    store i64 16908287, i64* [[DOTOMP_UB]], align 8
 // CHECK1-NEXT:    store i64 1, i64* [[DOTOMP_STRIDE]], align 8
 // CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// CHECK1-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]])
 // CHECK1-NEXT:    call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741891, i64 0, i64 16908287, i64 1, i64 1)
 // CHECK1-NEXT:    br label [[OMP_DISPATCH_COND:%.*]]
 // CHECK1:       omp.dispatch.cond:
+// CHECK1-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]])
 // CHECK1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]])
 // CHECK1-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
 // CHECK1-NEXT:    br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -243,6 +260,7 @@
 // CHECK1-NEXT:    [[MUL:%.*]] = mul i64 [[TMP5]], 127
 // CHECK1-NEXT:    [[ADD1:%.*]] = add i64 131071, [[MUL]]
 // CHECK1-NEXT:    store i64 [[ADD1]], i64* [[I]], align 8
+// CHECK1-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK1-NEXT:    call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK1-NEXT:    [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8
 // CHECK1-NEXT:    [[TMP7:%.*]] = load i64, i64* [[I]], align 8
@@ -270,6 +288,7 @@
 // CHECK1-NEXT:    [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
 // CHECK1-NEXT:    [[ADD7:%.*]] = add i64 [[TMP17]], 1
 // CHECK1-NEXT:    store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8
+// CHECK1-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]])
 // CHECK1-NEXT:    call void @__kmpc_dispatch_fini_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK1:       omp.inner.for.end:
@@ -277,7 +296,9 @@
 // CHECK1:       omp.dispatch.inc:
 // CHECK1-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK1:       omp.dispatch.end:
-// CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK1-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-IRBUILDER-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK1-NORMAL-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -303,7 +324,7 @@
 // CHECK1-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[I8:%.*]] = alloca i8, align 1
 // CHECK1-NEXT:    [[X9:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-NORMAL-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK1-NEXT:    store float* [[A]], float** [[A_ADDR]], align 8
 // CHECK1-NEXT:    store float* [[B]], float** [[B_ADDR]], align 8
 // CHECK1-NEXT:    store float* [[C]], float** [[C_ADDR]], align 8
@@ -336,9 +357,11 @@
 // CHECK1-NEXT:    store i64 1, i64* [[DOTOMP_STRIDE]], align 8
 // CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8
+// CHECK1-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]])
 // CHECK1-NEXT:    call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741894, i64 0, i64 [[TMP6]], i64 1, i64 1)
 // CHECK1-NEXT:    br label [[OMP_DISPATCH_COND:%.*]]
 // CHECK1:       omp.dispatch.cond:
+// CHECK1-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]])
 // CHECK1-NEXT:    [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]])
 // CHECK1-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK1-NEXT:    br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -369,6 +392,7 @@
 // CHECK1-NEXT:    [[SUB20:%.*]] = sub nsw i64 11, [[MUL19]]
 // CHECK1-NEXT:    [[CONV21:%.*]] = trunc i64 [[SUB20]] to i32
 // CHECK1-NEXT:    store i32 [[CONV21]], i32* [[X9]], align 4
+// CHECK1-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK1-NEXT:    call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK1-NEXT:    [[TMP15:%.*]] = load float*, float** [[B_ADDR]], align 8
 // CHECK1-NEXT:    [[TMP16:%.*]] = load i8, i8* [[I8]], align 1
@@ -400,6 +424,7 @@
 // CHECK1-NEXT:    [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
 // CHECK1-NEXT:    [[ADD30:%.*]] = add nsw i64 [[TMP26]], 1
 // CHECK1-NEXT:    store i64 [[ADD30]], i64* [[DOTOMP_IV]], align 8
+// CHECK1-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]])
 // CHECK1-NEXT:    call void @__kmpc_dispatch_fini_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK1:       omp.inner.for.end:
@@ -409,7 +434,9 @@
 // CHECK1:       omp.dispatch.end:
 // CHECK1-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK1:       omp.precond.end:
-// CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK1-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-IRBUILDER-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK1-NORMAL-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -430,7 +457,7 @@
 // CHECK1-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[I:%.*]] = alloca i8, align 1
 // CHECK1-NEXT:    [[X2:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-NORMAL-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK1-NEXT:    store float* [[A]], float** [[A_ADDR]], align 8
 // CHECK1-NEXT:    store float* [[B]], float** [[B_ADDR]], align 8
 // CHECK1-NEXT:    store float* [[C]], float** [[C_ADDR]], align 8
@@ -440,9 +467,11 @@
 // CHECK1-NEXT:    store i32 199, i32* [[DOTOMP_UB]], align 4
 // CHECK1-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
 // CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// CHECK1-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]])
 // CHECK1-NEXT:    call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741893, i32 0, i32 199, i32 1, i32 1)
 // CHECK1-NEXT:    br label [[OMP_DISPATCH_COND:%.*]]
 // CHECK1:       omp.dispatch.cond:
+// CHECK1-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
 // CHECK1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
 // CHECK1-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
 // CHECK1-NEXT:    br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -470,6 +499,7 @@
 // CHECK1-NEXT:    [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1
 // CHECK1-NEXT:    [[ADD6:%.*]] = add nsw i32 -10, [[MUL5]]
 // CHECK1-NEXT:    store i32 [[ADD6]], i32* [[X2]], align 4
+// CHECK1-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK1-NEXT:    call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK1-NEXT:    [[TMP8:%.*]] = load float*, float** [[B_ADDR]], align 8
 // CHECK1-NEXT:    [[TMP9:%.*]] = load i8, i8* [[I]], align 1
@@ -501,6 +531,7 @@
 // CHECK1-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
 // CHECK1-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP19]], 1
 // CHECK1-NEXT:    store i32 [[ADD15]], i32* [[DOTOMP_IV]], align 4
+// CHECK1-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
 // CHECK1-NEXT:    call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK1:       omp.inner.for.end:
@@ -508,7 +539,9 @@
 // CHECK1:       omp.dispatch.inc:
 // CHECK1-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK1:       omp.dispatch.end:
-// CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK1-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-IRBUILDER-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK1-NORMAL-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
 // CHECK1-NEXT:    ret void
 //
 //
@@ -535,7 +568,7 @@
 // CHECK1-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
 // CHECK1-NEXT:    [[I28:%.*]] = alloca i32, align 4
-// CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-NORMAL-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK1-NEXT:    store i32 [[LOW]], i32* [[LOW_ADDR]], align 4
 // CHECK1-NEXT:    store i32 [[UP]], i32* [[UP_ADDR]], align 4
 // CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[LOW_ADDR]], align 4
@@ -575,7 +608,15 @@
 // CHECK1-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64
 // CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
 // CHECK1-NEXT:    store float 0.000000e+00, float* [[ARRAYIDX]], align 4, !llvm.access.group !3
-// CHECK1-NEXT:    call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3
+// CHECK1-NORMAL-NEXT:    call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3
+// TODO: To be fixed after IRBuilder of ordered directive with simd clause is implemented with the option -fopenmp-enable-irbuilder enabled.
+// CHECK1-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-IRBUILDER-NEXT:    call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+// CHECK1-IRBUILDER-NEXT:    [[TMP1_IRBUILER:%.*]] = load i32, i32* [[I5]], align 4
+// CHECK1-IRBUILDER-NEXT:    [[IDXPROM_IRBUILER:%.*]] = sext i32 [[TMP1_IRBUILER]] to i64
+// CHECK1-IRBUILDER-NEXT:    [[ARRAYIDX_IRBUILER:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM_IRBUILER]]
+// CHECK1-IRBUILDER-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX_IRBUILER]], align 4
+// CHECK1-IRBUILDER-NEXT:    call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK1-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK1:       omp.body.continue:
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
@@ -622,9 +663,11 @@
 // CHECK1-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
 // CHECK1-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
 // CHECK1-NEXT:    [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_20]], align 4
+// CHECK1-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]])
 // CHECK1-NEXT:    call void @__kmpc_dispatch_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 [[TMP25]], i32 1, i32 1)
 // CHECK1-NEXT:    br label [[OMP_DISPATCH_COND:%.*]]
 // CHECK1:       omp.dispatch.cond:
+// CHECK1-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]])
 // CHECK1-NEXT:    [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
 // CHECK1-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0
 // CHECK1-NEXT:    br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -632,13 +675,15 @@
 // CHECK1-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
 // CHECK1-NEXT:    store i32 [[TMP27]], i32* [[DOTOMP_IV16]], align 4
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND29:%.*]]
-// CHECK1:       omp.inner.for.cond29:
+// CHECK1-IRBUILDER:       omp.inner.for.cond33:
+// CHECK1-NORMAL:       omp.inner.for.cond29:
 // CHECK1-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
 // CHECK1-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7
 // CHECK1-NEXT:    [[ADD30:%.*]] = add i32 [[TMP29]], 1
 // CHECK1-NEXT:    [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]]
 // CHECK1-NEXT:    br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]]
-// CHECK1:       omp.inner.for.body32:
+// CHECK1-IRBUILDER:       omp.inner.for.body36:
+// CHECK1-NORMAL:       omp.inner.for.body32:
 // CHECK1-NEXT:    [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7
 // CHECK1-NEXT:    [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
 // CHECK1-NEXT:    [[MUL33:%.*]] = mul i32 [[TMP31]], 1
@@ -648,17 +693,29 @@
 // CHECK1-NEXT:    [[IDXPROM35:%.*]] = sext i32 [[TMP32]] to i64
 // CHECK1-NEXT:    [[ARRAYIDX36:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM35]]
 // CHECK1-NEXT:    store float 0.000000e+00, float* [[ARRAYIDX36]], align 4, !llvm.access.group !7
-// CHECK1-NEXT:    call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7
+// CHECK1-NORMAL-NEXT:    call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7
+// TODO: To be fixed after IRBuilder of ordered directive with simd clause is implemented with the option -fopenmp-enable-irbuilder enabled.
+// CHECK1-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-IRBUILDER-NEXT:    call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+// CHECK1-IRBUILDER-NEXT:    [[TMP1_IRBUILER:%.*]] = load i32, i32* [[I28]], align 4
+// CHECK1-IRBUILDER-NEXT:    [[IDXPROM_IRBUILER:%.*]] = sext i32 [[TMP1_IRBUILER]] to i64
+// CHECK1-IRBUILDER-NEXT:    [[ARRAYIDX_IRBUILER:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM_IRBUILER]]
+// CHECK1-IRBUILDER-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX_IRBUILER]], align 4
+// CHECK1-IRBUILDER-NEXT:    call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK1-NEXT:    br label [[OMP_BODY_CONTINUE37:%.*]]
-// CHECK1:       omp.body.continue37:
+// CHECK1-IRBUILDER:       omp.body.continue44:
+// CHECK1-NORMAL:       omp.body.continue37:
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_INC38:%.*]]
-// CHECK1:       omp.inner.for.inc38:
+// CHECK1-IRBUILDER:       omp.inner.for.inc45:
+// CHECK1-NORMAL:       omp.inner.for.inc38:
 // CHECK1-NEXT:    [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
 // CHECK1-NEXT:    [[ADD39:%.*]] = add i32 [[TMP33]], 1
 // CHECK1-NEXT:    store i32 [[ADD39]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
+// CHECK1-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]])
 // CHECK1-NEXT:    call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group !7
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]]
-// CHECK1:       omp.inner.for.end40:
+// CHECK1-IRBUILDER:       omp.inner.for.end48:
+// CHECK1-NORMAL:       omp.inner.for.end40:
 // CHECK1-NEXT:    br label [[OMP_DISPATCH_INC:%.*]]
 // CHECK1:       omp.dispatch.inc:
 // CHECK1-NEXT:    br label [[OMP_DISPATCH_COND]]
@@ -681,34 +738,36 @@
 // CHECK1:       .omp.final.done:
 // CHECK1-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK1:       omp.precond.end:
-// CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK1-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-IRBUILDER-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK1-NORMAL-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
 // CHECK1-NEXT:    ret void
 //
 //
-// CHECK1-LABEL: define {{[^@]+}}@__captured_stmt
-// CHECK1-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3:[0-9]+]] {
-// CHECK1-NEXT:  entry:
-// CHECK1-NEXT:    [[I_ADDR:%.*]] = alloca i32*, align 8
-// CHECK1-NEXT:    store i32* [[I]], i32** [[I_ADDR]], align 8
-// CHECK1-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8
-// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-// CHECK1-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
-// CHECK1-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
-// CHECK1-NEXT:    ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@__captured_stmt.1
-// CHECK1-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] {
-// CHECK1-NEXT:  entry:
-// CHECK1-NEXT:    [[I_ADDR:%.*]] = alloca i32*, align 8
-// CHECK1-NEXT:    store i32* [[I]], i32** [[I_ADDR]], align 8
-// CHECK1-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8
-// CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-// CHECK1-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
-// CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
-// CHECK1-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
-// CHECK1-NEXT:    ret void
+// CHECK1-NORMAL-LABEL: define {{[^@]+}}@__captured_stmt
+// CHECK1-NORMAL-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK1-NORMAL-NEXT:  entry:
+// CHECK1-NORMAL-NEXT:    [[I_ADDR:%.*]] = alloca i32*, align 8
+// CHECK1-NORMAL-NEXT:    store i32* [[I]], i32** [[I_ADDR]], align 8
+// CHECK1-NORMAL-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8
+// CHECK1-NORMAL-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+// CHECK1-NORMAL-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
+// CHECK1-NORMAL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
+// CHECK1-NORMAL-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
+// CHECK1-NORMAL-NEXT:    ret void
+//
+//
+// CHECK1-NORMAL-LABEL: define {{[^@]+}}@__captured_stmt.1
+// CHECK1-NORMAL-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] {
+// CHECK1-NORMAL-NEXT:  entry:
+// CHECK1-NORMAL-NEXT:    [[I_ADDR:%.*]] = alloca i32*, align 8
+// CHECK1-NORMAL-NEXT:    store i32* [[I]], i32** [[I_ADDR]], align 8
+// CHECK1-NORMAL-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8
+// CHECK1-NORMAL-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+// CHECK1-NORMAL-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
+// CHECK1-NORMAL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
+// CHECK1-NORMAL-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
+// CHECK1-NORMAL-NEXT:    ret void
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@_Z18static_not_chunkedPfS_S_S_
@@ -725,7 +784,7 @@
 // CHECK2-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+// CHECK2-NORMAL-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
 // CHECK2-NEXT:    store float* [[A]], float** [[A_ADDR]], align 8
 // CHECK2-NEXT:    store float* [[B]], float** [[B_ADDR]], align 8
 // CHECK2-NEXT:    store float* [[C]], float** [[C_ADDR]], align 8
@@ -734,9 +793,12 @@
 // CHECK2-NEXT:    store i32 4571423, i32* [[DOTOMP_UB]], align 4
 // CHECK2-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
 // CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
-// CHECK2-NEXT:    call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
+// CHECK2-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]])
+// CHECK2-IRBUILDER-NEXT:    call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
+// CHECK2-NORMAL-NEXT:    call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
 // CHECK2-NEXT:    br label [[OMP_DISPATCH_COND:%.*]]
 // CHECK2:       omp.dispatch.cond:
+// CHECK2-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]])
 // CHECK2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
 // CHECK2-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
 // CHECK2-NEXT:    br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -754,6 +816,7 @@
 // CHECK2-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP5]], 7
 // CHECK2-NEXT:    [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]]
 // CHECK2-NEXT:    store i32 [[SUB]], i32* [[I]], align 4
+// CHECK2-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK2-NEXT:    call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK2-NEXT:    [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8
 // CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[I]], align 4
@@ -785,6 +848,7 @@
 // CHECK2-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP17]], 1
 // CHECK2-NEXT:    store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
+// CHECK2-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]])
 // CHECK2-NEXT:    call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK2:       omp.inner.for.end:
@@ -792,7 +856,9 @@
 // CHECK2:       omp.dispatch.inc:
 // CHECK2-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK2:       omp.dispatch.end:
-// CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
+// CHECK2-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-IRBUILDER-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]])
+// CHECK2-NORMAL-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
 // CHECK2-NEXT:    ret void
 //
 //
@@ -810,7 +876,7 @@
 // CHECK2-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
 // CHECK2-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[I:%.*]] = alloca i64, align 8
-// CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-NORMAL-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK2-NEXT:    store float* [[A]], float** [[A_ADDR]], align 8
 // CHECK2-NEXT:    store float* [[B]], float** [[B_ADDR]], align 8
 // CHECK2-NEXT:    store float* [[C]], float** [[C_ADDR]], align 8
@@ -819,9 +885,11 @@
 // CHECK2-NEXT:    store i64 16908287, i64* [[DOTOMP_UB]], align 8
 // CHECK2-NEXT:    store i64 1, i64* [[DOTOMP_STRIDE]], align 8
 // CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// CHECK2-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]])
 // CHECK2-NEXT:    call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741891, i64 0, i64 16908287, i64 1, i64 1)
 // CHECK2-NEXT:    br label [[OMP_DISPATCH_COND:%.*]]
 // CHECK2:       omp.dispatch.cond:
+// CHECK2-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]])
 // CHECK2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]])
 // CHECK2-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
 // CHECK2-NEXT:    br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -840,6 +908,7 @@
 // CHECK2-NEXT:    [[MUL:%.*]] = mul i64 [[TMP5]], 127
 // CHECK2-NEXT:    [[ADD1:%.*]] = add i64 131071, [[MUL]]
 // CHECK2-NEXT:    store i64 [[ADD1]], i64* [[I]], align 8
+// CHECK2-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK2-NEXT:    call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK2-NEXT:    [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8
 // CHECK2-NEXT:    [[TMP7:%.*]] = load i64, i64* [[I]], align 8
@@ -867,6 +936,7 @@
 // CHECK2-NEXT:    [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
 // CHECK2-NEXT:    [[ADD7:%.*]] = add i64 [[TMP17]], 1
 // CHECK2-NEXT:    store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8
+// CHECK2-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]])
 // CHECK2-NEXT:    call void @__kmpc_dispatch_fini_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK2:       omp.inner.for.end:
@@ -874,7 +944,9 @@
 // CHECK2:       omp.dispatch.inc:
 // CHECK2-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK2:       omp.dispatch.end:
-// CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK2-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-IRBUILDER-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK2-NORMAL-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
 // CHECK2-NEXT:    ret void
 //
 //
@@ -900,7 +972,7 @@
 // CHECK2-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[I8:%.*]] = alloca i8, align 1
 // CHECK2-NEXT:    [[X9:%.*]] = alloca i32, align 4
-// CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-NORMAL-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK2-NEXT:    store float* [[A]], float** [[A_ADDR]], align 8
 // CHECK2-NEXT:    store float* [[B]], float** [[B_ADDR]], align 8
 // CHECK2-NEXT:    store float* [[C]], float** [[C_ADDR]], align 8
@@ -933,9 +1005,11 @@
 // CHECK2-NEXT:    store i64 1, i64* [[DOTOMP_STRIDE]], align 8
 // CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
 // CHECK2-NEXT:    [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8
+// CHECK2-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]])
 // CHECK2-NEXT:    call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741894, i64 0, i64 [[TMP6]], i64 1, i64 1)
 // CHECK2-NEXT:    br label [[OMP_DISPATCH_COND:%.*]]
 // CHECK2:       omp.dispatch.cond:
+// CHECK2-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]])
 // CHECK2-NEXT:    [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]])
 // CHECK2-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK2-NEXT:    br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -966,6 +1040,7 @@
 // CHECK2-NEXT:    [[SUB20:%.*]] = sub nsw i64 11, [[MUL19]]
 // CHECK2-NEXT:    [[CONV21:%.*]] = trunc i64 [[SUB20]] to i32
 // CHECK2-NEXT:    store i32 [[CONV21]], i32* [[X9]], align 4
+// CHECK2-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK2-NEXT:    call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK2-NEXT:    [[TMP15:%.*]] = load float*, float** [[B_ADDR]], align 8
 // CHECK2-NEXT:    [[TMP16:%.*]] = load i8, i8* [[I8]], align 1
@@ -997,6 +1072,7 @@
 // CHECK2-NEXT:    [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
 // CHECK2-NEXT:    [[ADD30:%.*]] = add nsw i64 [[TMP26]], 1
 // CHECK2-NEXT:    store i64 [[ADD30]], i64* [[DOTOMP_IV]], align 8
+// CHECK2-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]])
 // CHECK2-NEXT:    call void @__kmpc_dispatch_fini_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK2:       omp.inner.for.end:
@@ -1006,7 +1082,9 @@
 // CHECK2:       omp.dispatch.end:
 // CHECK2-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK2:       omp.precond.end:
-// CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK2-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-IRBUILDER-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK2-NORMAL-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
 // CHECK2-NEXT:    ret void
 //
 //
@@ -1027,7 +1105,7 @@
 // CHECK2-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[I:%.*]] = alloca i8, align 1
 // CHECK2-NEXT:    [[X2:%.*]] = alloca i32, align 4
-// CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-NORMAL-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK2-NEXT:    store float* [[A]], float** [[A_ADDR]], align 8
 // CHECK2-NEXT:    store float* [[B]], float** [[B_ADDR]], align 8
 // CHECK2-NEXT:    store float* [[C]], float** [[C_ADDR]], align 8
@@ -1037,9 +1115,11 @@
 // CHECK2-NEXT:    store i32 199, i32* [[DOTOMP_UB]], align 4
 // CHECK2-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
 // CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// CHECK2-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]])
 // CHECK2-NEXT:    call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741893, i32 0, i32 199, i32 1, i32 1)
 // CHECK2-NEXT:    br label [[OMP_DISPATCH_COND:%.*]]
 // CHECK2:       omp.dispatch.cond:
+// CHECK2-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
 // CHECK2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
 // CHECK2-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
 // CHECK2-NEXT:    br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -1067,6 +1147,7 @@
 // CHECK2-NEXT:    [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1
 // CHECK2-NEXT:    [[ADD6:%.*]] = add nsw i32 -10, [[MUL5]]
 // CHECK2-NEXT:    store i32 [[ADD6]], i32* [[X2]], align 4
+// CHECK2-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK2-NEXT:    call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK2-NEXT:    [[TMP8:%.*]] = load float*, float** [[B_ADDR]], align 8
 // CHECK2-NEXT:    [[TMP9:%.*]] = load i8, i8* [[I]], align 1
@@ -1098,6 +1179,7 @@
 // CHECK2-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
 // CHECK2-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP19]], 1
 // CHECK2-NEXT:    store i32 [[ADD15]], i32* [[DOTOMP_IV]], align 4
+// CHECK2-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
 // CHECK2-NEXT:    call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK2:       omp.inner.for.end:
@@ -1105,7 +1187,9 @@
 // CHECK2:       omp.dispatch.inc:
 // CHECK2-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK2:       omp.dispatch.end:
-// CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK2-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-IRBUILDER-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK2-NORMAL-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
 // CHECK2-NEXT:    ret void
 //
 //
@@ -1132,7 +1216,7 @@
 // CHECK2-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[I28:%.*]] = alloca i32, align 4
-// CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-NORMAL-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK2-NEXT:    store i32 [[LOW]], i32* [[LOW_ADDR]], align 4
 // CHECK2-NEXT:    store i32 [[UP]], i32* [[UP_ADDR]], align 4
 // CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[LOW_ADDR]], align 4
@@ -1172,7 +1256,15 @@
 // CHECK2-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64
 // CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
 // CHECK2-NEXT:    store float 0.000000e+00, float* [[ARRAYIDX]], align 4, !llvm.access.group !3
-// CHECK2-NEXT:    call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3
+// CHECK2-NORMAL-NEXT:    call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3
+// TODO: To be fixed after IRBuilder of ordered directive with simd clause is implemented with the option -fopenmp-enable-irbuilder enabled.
+// CHECK2-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-IRBUILDER-NEXT:    call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+// CHECK2-IRBUILDER-NEXT:    [[TMP1_IRBUILER:%.*]] = load i32, i32* [[I5]], align 4
+// CHECK2-IRBUILDER-NEXT:    [[IDXPROM_IRBUILER:%.*]] = sext i32 [[TMP1_IRBUILER]] to i64
+// CHECK2-IRBUILDER-NEXT:    [[ARRAYIDX_IRBUILER:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM_IRBUILER]]
+// CHECK2-IRBUILDER-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX_IRBUILER]], align 4
+// CHECK2-IRBUILDER-NEXT:    call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK2-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK2:       omp.body.continue:
 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
@@ -1219,9 +1311,11 @@
 // CHECK2-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
 // CHECK2-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
 // CHECK2-NEXT:    [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_20]], align 4
+// CHECK2-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]])
 // CHECK2-NEXT:    call void @__kmpc_dispatch_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 [[TMP25]], i32 1, i32 1)
 // CHECK2-NEXT:    br label [[OMP_DISPATCH_COND:%.*]]
 // CHECK2:       omp.dispatch.cond:
+// CHECK2-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]])
 // CHECK2-NEXT:    [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
 // CHECK2-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0
 // CHECK2-NEXT:    br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -1229,13 +1323,15 @@
 // CHECK2-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
 // CHECK2-NEXT:    store i32 [[TMP27]], i32* [[DOTOMP_IV16]], align 4
 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND29:%.*]]
-// CHECK2:       omp.inner.for.cond29:
+// CHECK2-IRBUILDER:       omp.inner.for.cond33:
+// CHECK2-NORMAL:       omp.inner.for.cond29:
 // CHECK2-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
 // CHECK2-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7
 // CHECK2-NEXT:    [[ADD30:%.*]] = add i32 [[TMP29]], 1
 // CHECK2-NEXT:    [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]]
 // CHECK2-NEXT:    br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]]
-// CHECK2:       omp.inner.for.body32:
+// CHECK2-IRBUILDER:       omp.inner.for.body36:
+// CHECK2-NORMAL:       omp.inner.for.body32:
 // CHECK2-NEXT:    [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7
 // CHECK2-NEXT:    [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
 // CHECK2-NEXT:    [[MUL33:%.*]] = mul i32 [[TMP31]], 1
@@ -1245,17 +1341,29 @@
 // CHECK2-NEXT:    [[IDXPROM35:%.*]] = sext i32 [[TMP32]] to i64
 // CHECK2-NEXT:    [[ARRAYIDX36:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM35]]
 // CHECK2-NEXT:    store float 0.000000e+00, float* [[ARRAYIDX36]], align 4, !llvm.access.group !7
-// CHECK2-NEXT:    call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7
+// CHECK2-NORMAL-NEXT:    call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7
+// TODO: To be fixed after IRBuilder of ordered directive with simd clause is implemented with the option -fopenmp-enable-irbuilder enabled.
+// CHECK2-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+// CHECK2-IRBUILDER-NEXT:    call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+// CHECK2-IRBUILDER-NEXT:    [[TMP1_IRBUILER:%.*]] = load i32, i32* [[I28]], align 4
+// CHECK2-IRBUILDER-NEXT:    [[IDXPROM_IRBUILER:%.*]] = sext i32 [[TMP1_IRBUILER]] to i64
+// CHECK2-IRBUILDER-NEXT:    [[ARRAYIDX_IRBUILER:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM_IRBUILER]]
+// CHECK2-IRBUILDER-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX_IRBUILER]], align 4
+// CHECK2-IRBUILDER-NEXT:    call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK2-NEXT:    br label [[OMP_BODY_CONTINUE37:%.*]]
-// CHECK2:       omp.body.continue37:
+// CHECK2-IRBUILDER:       omp.body.continue44:
+// CHECK2-NORMAL:       omp.body.continue37:
 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_INC38:%.*]]
-// CHECK2:       omp.inner.for.inc38:
+// CHECK2-IRBUILDER:       omp.inner.for.inc45:
+// CHECK2-NORMAL:       omp.inner.for.inc38:
 // CHECK2-NEXT:    [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
 // CHECK2-NEXT:    [[ADD39:%.*]] = add i32 [[TMP33]], 1
 // CHECK2-NEXT:    store i32 [[ADD39]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
+// CHECK2-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]])
 // CHECK2-NEXT:    call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group !7
 // CHECK2-NEXT:    br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]]
-// CHECK2:       omp.inner.for.end40:
+// CHECK2-IRBUILDER:       omp.inner.for.end48:
+// CHECK2-NORMAL:       omp.inner.for.end40:
 // CHECK2-NEXT:    br label [[OMP_DISPATCH_INC:%.*]]
 // CHECK2:       omp.dispatch.inc:
 // CHECK2-NEXT:    br label [[OMP_DISPATCH_COND]]
@@ -1278,34 +1386,36 @@
 // CHECK2:       .omp.final.done:
 // CHECK2-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK2:       omp.precond.end:
-// CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
-// CHECK2-NEXT:    ret void
-//
-//
-// CHECK2-LABEL: define {{[^@]+}}@__captured_stmt
-// CHECK2-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3:[0-9]+]] {
-// CHECK2-NEXT:  entry:
-// CHECK2-NEXT:    [[I_ADDR:%.*]] = alloca i32*, align 8
-// CHECK2-NEXT:    store i32* [[I]], i32** [[I_ADDR]], align 8
-// CHECK2-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8
-// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-// CHECK2-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
-// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
-// CHECK2-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
+// CHECK2-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-IRBUILDER-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK2-NORMAL-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
 // CHECK2-NEXT:    ret void
 //
 //
-// CHECK2-LABEL: define {{[^@]+}}@__captured_stmt.1
-// CHECK2-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] {
-// CHECK2-NEXT:  entry:
-// CHECK2-NEXT:    [[I_ADDR:%.*]] = alloca i32*, align 8
-// CHECK2-NEXT:    store i32* [[I]], i32** [[I_ADDR]], align 8
-// CHECK2-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8
-// CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-// CHECK2-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
-// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
-// CHECK2-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
-// CHECK2-NEXT:    ret void
+// CHECK2-NORMAL-LABEL: define {{[^@]+}}@__captured_stmt
+// CHECK2-NORMAL-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK2-NORMAL-NEXT:  entry:
+// CHECK2-NORMAL-NEXT:    [[I_ADDR:%.*]] = alloca i32*, align 8
+// CHECK2-NORMAL-NEXT:    store i32* [[I]], i32** [[I_ADDR]], align 8
+// CHECK2-NORMAL-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8
+// CHECK2-NORMAL-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+// CHECK2-NORMAL-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
+// CHECK2-NORMAL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
+// CHECK2-NORMAL-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
+// CHECK2-NORMAL-NEXT:    ret void
+//
+//
+// CHECK2-NORMAL-LABEL: define {{[^@]+}}@__captured_stmt.1
+// CHECK2-NORMAL-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] {
+// CHECK2-NORMAL-NEXT:  entry:
+// CHECK2-NORMAL-NEXT:    [[I_ADDR:%.*]] = alloca i32*, align 8
+// CHECK2-NORMAL-NEXT:    store i32* [[I]], i32** [[I_ADDR]], align 8
+// CHECK2-NORMAL-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8
+// CHECK2-NORMAL-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+// CHECK2-NORMAL-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
+// CHECK2-NORMAL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
+// CHECK2-NORMAL-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
+// CHECK2-NORMAL-NEXT:    ret void
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z18static_not_chunkedPfS_S_S_
@@ -1322,7 +1432,7 @@
 // CHECK3-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+// CHECK3-NORMAL-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
 // CHECK3-NEXT:    store float* [[A]], float** [[A_ADDR]], align 8
 // CHECK3-NEXT:    store float* [[B]], float** [[B_ADDR]], align 8
 // CHECK3-NEXT:    store float* [[C]], float** [[C_ADDR]], align 8
@@ -1331,9 +1441,12 @@
 // CHECK3-NEXT:    store i32 4571423, i32* [[DOTOMP_UB]], align 4
 // CHECK3-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
 // CHECK3-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
-// CHECK3-NEXT:    call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
+// CHECK3-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]])
+// CHECK3-IRBUILDER-NEXT:    call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
+// CHECK3-NORMAL-NEXT:    call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
 // CHECK3-NEXT:    br label [[OMP_DISPATCH_COND:%.*]]
 // CHECK3:       omp.dispatch.cond:
+// CHECK3-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]])
 // CHECK3-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
 // CHECK3-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
 // CHECK3-NEXT:    br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -1351,6 +1464,7 @@
 // CHECK3-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP5]], 7
 // CHECK3-NEXT:    [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]]
 // CHECK3-NEXT:    store i32 [[SUB]], i32* [[I]], align 4
+// CHECK3-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK3-NEXT:    call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK3-NEXT:    [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8
 // CHECK3-NEXT:    [[TMP7:%.*]] = load i32, i32* [[I]], align 4
@@ -1382,6 +1496,7 @@
 // CHECK3-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
 // CHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP17]], 1
 // CHECK3-NEXT:    store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
+// CHECK3-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]])
 // CHECK3-NEXT:    call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK3:       omp.inner.for.end:
@@ -1389,7 +1504,9 @@
 // CHECK3:       omp.dispatch.inc:
 // CHECK3-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK3:       omp.dispatch.end:
-// CHECK3-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
+// CHECK3-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-IRBUILDER-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]])
+// CHECK3-NORMAL-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
 // CHECK3-NEXT:    ret void
 //
 //
@@ -1407,7 +1524,7 @@
 // CHECK3-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
 // CHECK3-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[I:%.*]] = alloca i64, align 8
-// CHECK3-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-NORMAL-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK3-NEXT:    store float* [[A]], float** [[A_ADDR]], align 8
 // CHECK3-NEXT:    store float* [[B]], float** [[B_ADDR]], align 8
 // CHECK3-NEXT:    store float* [[C]], float** [[C_ADDR]], align 8
@@ -1416,9 +1533,11 @@
 // CHECK3-NEXT:    store i64 16908287, i64* [[DOTOMP_UB]], align 8
 // CHECK3-NEXT:    store i64 1, i64* [[DOTOMP_STRIDE]], align 8
 // CHECK3-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// CHECK3-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]])
 // CHECK3-NEXT:    call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 67, i64 0, i64 16908287, i64 1, i64 1)
 // CHECK3-NEXT:    br label [[OMP_DISPATCH_COND:%.*]]
 // CHECK3:       omp.dispatch.cond:
+// CHECK3-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]])
 // CHECK3-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]])
 // CHECK3-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
 // CHECK3-NEXT:    br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -1437,6 +1556,7 @@
 // CHECK3-NEXT:    [[MUL:%.*]] = mul i64 [[TMP5]], 127
 // CHECK3-NEXT:    [[ADD1:%.*]] = add i64 131071, [[MUL]]
 // CHECK3-NEXT:    store i64 [[ADD1]], i64* [[I]], align 8
+// CHECK3-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK3-NEXT:    call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK3-NEXT:    [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8
 // CHECK3-NEXT:    [[TMP7:%.*]] = load i64, i64* [[I]], align 8
@@ -1464,6 +1584,7 @@
 // CHECK3-NEXT:    [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
 // CHECK3-NEXT:    [[ADD7:%.*]] = add i64 [[TMP17]], 1
 // CHECK3-NEXT:    store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8
+// CHECK3-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]])
 // CHECK3-NEXT:    call void @__kmpc_dispatch_fini_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK3:       omp.inner.for.end:
@@ -1471,7 +1592,9 @@
 // CHECK3:       omp.dispatch.inc:
 // CHECK3-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK3:       omp.dispatch.end:
-// CHECK3-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK3-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-IRBUILDER-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK3-NORMAL-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
 // CHECK3-NEXT:    ret void
 //
 //
@@ -1497,7 +1620,7 @@
 // CHECK3-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[I8:%.*]] = alloca i8, align 1
 // CHECK3-NEXT:    [[X9:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-NORMAL-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK3-NEXT:    store float* [[A]], float** [[A_ADDR]], align 8
 // CHECK3-NEXT:    store float* [[B]], float** [[B_ADDR]], align 8
 // CHECK3-NEXT:    store float* [[C]], float** [[C_ADDR]], align 8
@@ -1530,9 +1653,11 @@
 // CHECK3-NEXT:    store i64 1, i64* [[DOTOMP_STRIDE]], align 8
 // CHECK3-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8
+// CHECK3-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]])
 // CHECK3-NEXT:    call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 70, i64 0, i64 [[TMP6]], i64 1, i64 1)
 // CHECK3-NEXT:    br label [[OMP_DISPATCH_COND:%.*]]
 // CHECK3:       omp.dispatch.cond:
+// CHECK3-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]])
 // CHECK3-NEXT:    [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]])
 // CHECK3-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK3-NEXT:    br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -1563,6 +1688,7 @@
 // CHECK3-NEXT:    [[SUB20:%.*]] = sub nsw i64 11, [[MUL19]]
 // CHECK3-NEXT:    [[CONV21:%.*]] = trunc i64 [[SUB20]] to i32
 // CHECK3-NEXT:    store i32 [[CONV21]], i32* [[X9]], align 4
+// CHECK3-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK3-NEXT:    call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK3-NEXT:    [[TMP15:%.*]] = load float*, float** [[B_ADDR]], align 8
 // CHECK3-NEXT:    [[TMP16:%.*]] = load i8, i8* [[I8]], align 1
@@ -1594,6 +1720,7 @@
 // CHECK3-NEXT:    [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
 // CHECK3-NEXT:    [[ADD30:%.*]] = add nsw i64 [[TMP26]], 1
 // CHECK3-NEXT:    store i64 [[ADD30]], i64* [[DOTOMP_IV]], align 8
+// CHECK3-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]])
 // CHECK3-NEXT:    call void @__kmpc_dispatch_fini_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK3:       omp.inner.for.end:
@@ -1603,7 +1730,9 @@
 // CHECK3:       omp.dispatch.end:
 // CHECK3-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK3:       omp.precond.end:
-// CHECK3-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK3-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-IRBUILDER-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK3-NORMAL-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
 // CHECK3-NEXT:    ret void
 //
 //
@@ -1624,7 +1753,7 @@
 // CHECK3-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[I:%.*]] = alloca i8, align 1
 // CHECK3-NEXT:    [[X2:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-NORMAL-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK3-NEXT:    store float* [[A]], float** [[A_ADDR]], align 8
 // CHECK3-NEXT:    store float* [[B]], float** [[B_ADDR]], align 8
 // CHECK3-NEXT:    store float* [[C]], float** [[C_ADDR]], align 8
@@ -1634,9 +1763,11 @@
 // CHECK3-NEXT:    store i32 199, i32* [[DOTOMP_UB]], align 4
 // CHECK3-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
 // CHECK3-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// CHECK3-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]])
 // CHECK3-NEXT:    call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 69, i32 0, i32 199, i32 1, i32 1)
 // CHECK3-NEXT:    br label [[OMP_DISPATCH_COND:%.*]]
 // CHECK3:       omp.dispatch.cond:
+// CHECK3-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
 // CHECK3-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
 // CHECK3-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
 // CHECK3-NEXT:    br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -1664,6 +1795,7 @@
 // CHECK3-NEXT:    [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1
 // CHECK3-NEXT:    [[ADD6:%.*]] = add nsw i32 -10, [[MUL5]]
 // CHECK3-NEXT:    store i32 [[ADD6]], i32* [[X2]], align 4
+// CHECK3-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK3-NEXT:    call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK3-NEXT:    [[TMP8:%.*]] = load float*, float** [[B_ADDR]], align 8
 // CHECK3-NEXT:    [[TMP9:%.*]] = load i8, i8* [[I]], align 1
@@ -1695,6 +1827,7 @@
 // CHECK3-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
 // CHECK3-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP19]], 1
 // CHECK3-NEXT:    store i32 [[ADD15]], i32* [[DOTOMP_IV]], align 4
+// CHECK3-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
 // CHECK3-NEXT:    call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK3:       omp.inner.for.end:
@@ -1702,7 +1835,9 @@
 // CHECK3:       omp.dispatch.inc:
 // CHECK3-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK3:       omp.dispatch.end:
-// CHECK3-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK3-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-IRBUILDER-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK3-NORMAL-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
 // CHECK3-NEXT:    ret void
 //
 //
@@ -1729,7 +1864,7 @@
 // CHECK3-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[I28:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-NORMAL-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK3-NEXT:    store i32 [[LOW]], i32* [[LOW_ADDR]], align 4
 // CHECK3-NEXT:    store i32 [[UP]], i32* [[UP_ADDR]], align 4
 // CHECK3-NEXT:    [[TMP1:%.*]] = load i32, i32* [[LOW_ADDR]], align 4
@@ -1769,7 +1904,15 @@
 // CHECK3-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64
 // CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
 // CHECK3-NEXT:    store float 0.000000e+00, float* [[ARRAYIDX]], align 4, !llvm.access.group !3
-// CHECK3-NEXT:    call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3
+// CHECK3-NORMAL-NEXT:    call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3
+// TODO: To be fixed after IRBuilder of ordered directive with simd clause is implemented with the option -fopenmp-enable-irbuilder enabled.
+// CHECK3-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-IRBUILDER-NEXT:    call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+// CHECK3-IRBUILDER-NEXT:    [[TMP1_IRBUILER:%.*]] = load i32, i32* [[I5]], align 4
+// CHECK3-IRBUILDER-NEXT:    [[IDXPROM_IRBUILER:%.*]] = sext i32 [[TMP1_IRBUILER]] to i64
+// CHECK3-IRBUILDER-NEXT:    [[ARRAYIDX_IRBUILER:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM_IRBUILER]]
+// CHECK3-IRBUILDER-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX_IRBUILER]], align 4
+// CHECK3-IRBUILDER-NEXT:    call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK3-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK3:       omp.body.continue:
 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
@@ -1816,9 +1959,11 @@
 // CHECK3-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
 // CHECK3-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
 // CHECK3-NEXT:    [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_20]], align 4
+// CHECK3-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]])
 // CHECK3-NEXT:    call void @__kmpc_dispatch_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 [[TMP25]], i32 1, i32 1)
 // CHECK3-NEXT:    br label [[OMP_DISPATCH_COND:%.*]]
 // CHECK3:       omp.dispatch.cond:
+// CHECK3-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]])
 // CHECK3-NEXT:    [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
 // CHECK3-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0
 // CHECK3-NEXT:    br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -1826,13 +1971,15 @@
 // CHECK3-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
 // CHECK3-NEXT:    store i32 [[TMP27]], i32* [[DOTOMP_IV16]], align 4
 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND29:%.*]]
-// CHECK3:       omp.inner.for.cond29:
+// CHECK3-IRBUILDER:       omp.inner.for.cond33:
+// CHECK3-NORMAL:       omp.inner.for.cond29:
 // CHECK3-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
 // CHECK3-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7
 // CHECK3-NEXT:    [[ADD30:%.*]] = add i32 [[TMP29]], 1
 // CHECK3-NEXT:    [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]]
 // CHECK3-NEXT:    br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]]
-// CHECK3:       omp.inner.for.body32:
+// CHECK3-IRBUILDER:       omp.inner.for.body36:
+// CHECK3-NORMAL:       omp.inner.for.body32:
 // CHECK3-NEXT:    [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7
 // CHECK3-NEXT:    [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
 // CHECK3-NEXT:    [[MUL33:%.*]] = mul i32 [[TMP31]], 1
@@ -1842,17 +1989,29 @@
 // CHECK3-NEXT:    [[IDXPROM35:%.*]] = sext i32 [[TMP32]] to i64
 // CHECK3-NEXT:    [[ARRAYIDX36:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM35]]
 // CHECK3-NEXT:    store float 0.000000e+00, float* [[ARRAYIDX36]], align 4, !llvm.access.group !7
-// CHECK3-NEXT:    call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7
+// CHECK3-NORMAL-NEXT:    call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7
+// TODO: To be fixed after IRBuilder of ordered directive with simd clause is implemented with the option -fopenmp-enable-irbuilder enabled.
+// CHECK3-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-IRBUILDER-NEXT:    call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+// CHECK3-IRBUILDER-NEXT:    [[TMP1_IRBUILER:%.*]] = load i32, i32* [[I28]], align 4
+// CHECK3-IRBUILDER-NEXT:    [[IDXPROM_IRBUILER:%.*]] = sext i32 [[TMP1_IRBUILER]] to i64
+// CHECK3-IRBUILDER-NEXT:    [[ARRAYIDX_IRBUILER:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM_IRBUILER]]
+// CHECK3-IRBUILDER-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX_IRBUILER]], align 4
+// CHECK3-IRBUILDER-NEXT:    call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK3-NEXT:    br label [[OMP_BODY_CONTINUE37:%.*]]
-// CHECK3:       omp.body.continue37:
+// CHECK3-IRBUILDER:       omp.body.continue44:
+// CHECK3-NORMAL:       omp.body.continue37:
 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_INC38:%.*]]
-// CHECK3:       omp.inner.for.inc38:
+// CHECK3-IRBUILDER:       omp.inner.for.inc45:
+// CHECK3-NORMAL:       omp.inner.for.inc38:
 // CHECK3-NEXT:    [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
 // CHECK3-NEXT:    [[ADD39:%.*]] = add i32 [[TMP33]], 1
 // CHECK3-NEXT:    store i32 [[ADD39]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
+// CHECK3-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]])
 // CHECK3-NEXT:    call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group !7
 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]]
-// CHECK3:       omp.inner.for.end40:
+// CHECK3-IRBUILDER:       omp.inner.for.end48:
+// CHECK3-NORMAL:       omp.inner.for.end40:
 // CHECK3-NEXT:    br label [[OMP_DISPATCH_INC:%.*]]
 // CHECK3:       omp.dispatch.inc:
 // CHECK3-NEXT:    br label [[OMP_DISPATCH_COND]]
@@ -1875,34 +2034,36 @@
 // CHECK3:       .omp.final.done:
 // CHECK3-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK3:       omp.precond.end:
-// CHECK3-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK3-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-IRBUILDER-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK3-NORMAL-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
 // CHECK3-NEXT:    ret void
 //
 //
-// CHECK3-LABEL: define {{[^@]+}}@__captured_stmt
-// CHECK3-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3:[0-9]+]] {
-// CHECK3-NEXT:  entry:
-// CHECK3-NEXT:    [[I_ADDR:%.*]] = alloca i32*, align 8
-// CHECK3-NEXT:    store i32* [[I]], i32** [[I_ADDR]], align 8
-// CHECK3-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8
-// CHECK3-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-// CHECK3-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
-// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
-// CHECK3-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
-// CHECK3-NEXT:    ret void
-//
-//
-// CHECK3-LABEL: define {{[^@]+}}@__captured_stmt.1
-// CHECK3-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] {
-// CHECK3-NEXT:  entry:
-// CHECK3-NEXT:    [[I_ADDR:%.*]] = alloca i32*, align 8
-// CHECK3-NEXT:    store i32* [[I]], i32** [[I_ADDR]], align 8
-// CHECK3-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8
-// CHECK3-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-// CHECK3-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
-// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
-// CHECK3-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
-// CHECK3-NEXT:    ret void
+// CHECK3-NORMAL-LABEL: define {{[^@]+}}@__captured_stmt
+// CHECK3-NORMAL-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK3-NORMAL-NEXT:  entry:
+// CHECK3-NORMAL-NEXT:    [[I_ADDR:%.*]] = alloca i32*, align 8
+// CHECK3-NORMAL-NEXT:    store i32* [[I]], i32** [[I_ADDR]], align 8
+// CHECK3-NORMAL-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8
+// CHECK3-NORMAL-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+// CHECK3-NORMAL-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
+// CHECK3-NORMAL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
+// CHECK3-NORMAL-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
+// CHECK3-NORMAL-NEXT:    ret void
+//
+//
+// CHECK3-NORMAL-LABEL: define {{[^@]+}}@__captured_stmt.1
+// CHECK3-NORMAL-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] {
+// CHECK3-NORMAL-NEXT:  entry:
+// CHECK3-NORMAL-NEXT:    [[I_ADDR:%.*]] = alloca i32*, align 8
+// CHECK3-NORMAL-NEXT:    store i32* [[I]], i32** [[I_ADDR]], align 8
+// CHECK3-NORMAL-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8
+// CHECK3-NORMAL-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+// CHECK3-NORMAL-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
+// CHECK3-NORMAL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
+// CHECK3-NORMAL-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
+// CHECK3-NORMAL-NEXT:    ret void
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@_Z18static_not_chunkedPfS_S_S_
@@ -1919,7 +2080,7 @@
 // CHECK4-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[I:%.*]] = alloca i32, align 4
-// CHECK4-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+// CHECK4-NORMAL-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
 // CHECK4-NEXT:    store float* [[A]], float** [[A_ADDR]], align 8
 // CHECK4-NEXT:    store float* [[B]], float** [[B_ADDR]], align 8
 // CHECK4-NEXT:    store float* [[C]], float** [[C_ADDR]], align 8
@@ -1928,9 +2089,12 @@
 // CHECK4-NEXT:    store i32 4571423, i32* [[DOTOMP_UB]], align 4
 // CHECK4-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
 // CHECK4-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
-// CHECK4-NEXT:    call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
+// CHECK4-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]])
+// CHECK4-IRBUILDER-NEXT:    call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
+// CHECK4-NORMAL-NEXT:    call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
 // CHECK4-NEXT:    br label [[OMP_DISPATCH_COND:%.*]]
 // CHECK4:       omp.dispatch.cond:
+// CHECK4-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]])
 // CHECK4-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
 // CHECK4-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
 // CHECK4-NEXT:    br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -1948,6 +2112,7 @@
 // CHECK4-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP5]], 7
 // CHECK4-NEXT:    [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]]
 // CHECK4-NEXT:    store i32 [[SUB]], i32* [[I]], align 4
+// CHECK4-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK4-NEXT:    call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK4-NEXT:    [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8
 // CHECK4-NEXT:    [[TMP7:%.*]] = load i32, i32* [[I]], align 4
@@ -1979,6 +2144,7 @@
 // CHECK4-NEXT:    [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
 // CHECK4-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP17]], 1
 // CHECK4-NEXT:    store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
+// CHECK4-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]])
 // CHECK4-NEXT:    call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK4-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK4:       omp.inner.for.end:
@@ -1986,7 +2152,9 @@
 // CHECK4:       omp.dispatch.inc:
 // CHECK4-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK4:       omp.dispatch.end:
-// CHECK4-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
+// CHECK4-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK4-IRBUILDER-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]])
+// CHECK4-NORMAL-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
 // CHECK4-NEXT:    ret void
 //
 //
@@ -2004,7 +2172,7 @@
 // CHECK4-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
 // CHECK4-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[I:%.*]] = alloca i64, align 8
-// CHECK4-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK4-NORMAL-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK4-NEXT:    store float* [[A]], float** [[A_ADDR]], align 8
 // CHECK4-NEXT:    store float* [[B]], float** [[B_ADDR]], align 8
 // CHECK4-NEXT:    store float* [[C]], float** [[C_ADDR]], align 8
@@ -2013,9 +2181,11 @@
 // CHECK4-NEXT:    store i64 16908287, i64* [[DOTOMP_UB]], align 8
 // CHECK4-NEXT:    store i64 1, i64* [[DOTOMP_STRIDE]], align 8
 // CHECK4-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// CHECK4-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]])
 // CHECK4-NEXT:    call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 67, i64 0, i64 16908287, i64 1, i64 1)
 // CHECK4-NEXT:    br label [[OMP_DISPATCH_COND:%.*]]
 // CHECK4:       omp.dispatch.cond:
+// CHECK4-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]])
 // CHECK4-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]])
 // CHECK4-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
 // CHECK4-NEXT:    br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -2034,6 +2204,7 @@
 // CHECK4-NEXT:    [[MUL:%.*]] = mul i64 [[TMP5]], 127
 // CHECK4-NEXT:    [[ADD1:%.*]] = add i64 131071, [[MUL]]
 // CHECK4-NEXT:    store i64 [[ADD1]], i64* [[I]], align 8
+// CHECK4-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK4-NEXT:    call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK4-NEXT:    [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8
 // CHECK4-NEXT:    [[TMP7:%.*]] = load i64, i64* [[I]], align 8
@@ -2061,6 +2232,7 @@
 // CHECK4-NEXT:    [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
 // CHECK4-NEXT:    [[ADD7:%.*]] = add i64 [[TMP17]], 1
 // CHECK4-NEXT:    store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8
+// CHECK4-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]])
 // CHECK4-NEXT:    call void @__kmpc_dispatch_fini_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK4-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK4:       omp.inner.for.end:
@@ -2068,7 +2240,9 @@
 // CHECK4:       omp.dispatch.inc:
 // CHECK4-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK4:       omp.dispatch.end:
-// CHECK4-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK4-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK4-IRBUILDER-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK4-NORMAL-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
 // CHECK4-NEXT:    ret void
 //
 //
@@ -2094,7 +2268,7 @@
 // CHECK4-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[I8:%.*]] = alloca i8, align 1
 // CHECK4-NEXT:    [[X9:%.*]] = alloca i32, align 4
-// CHECK4-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK4-NORMAL-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK4-NEXT:    store float* [[A]], float** [[A_ADDR]], align 8
 // CHECK4-NEXT:    store float* [[B]], float** [[B_ADDR]], align 8
 // CHECK4-NEXT:    store float* [[C]], float** [[C_ADDR]], align 8
@@ -2127,9 +2301,11 @@
 // CHECK4-NEXT:    store i64 1, i64* [[DOTOMP_STRIDE]], align 8
 // CHECK4-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
 // CHECK4-NEXT:    [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8
+// CHECK4-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]])
 // CHECK4-NEXT:    call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 70, i64 0, i64 [[TMP6]], i64 1, i64 1)
 // CHECK4-NEXT:    br label [[OMP_DISPATCH_COND:%.*]]
 // CHECK4:       omp.dispatch.cond:
+// CHECK4-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]])
 // CHECK4-NEXT:    [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]])
 // CHECK4-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0
 // CHECK4-NEXT:    br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -2160,6 +2336,7 @@
 // CHECK4-NEXT:    [[SUB20:%.*]] = sub nsw i64 11, [[MUL19]]
 // CHECK4-NEXT:    [[CONV21:%.*]] = trunc i64 [[SUB20]] to i32
 // CHECK4-NEXT:    store i32 [[CONV21]], i32* [[X9]], align 4
+// CHECK4-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK4-NEXT:    call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK4-NEXT:    [[TMP15:%.*]] = load float*, float** [[B_ADDR]], align 8
 // CHECK4-NEXT:    [[TMP16:%.*]] = load i8, i8* [[I8]], align 1
@@ -2191,6 +2368,7 @@
 // CHECK4-NEXT:    [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
 // CHECK4-NEXT:    [[ADD30:%.*]] = add nsw i64 [[TMP26]], 1
 // CHECK4-NEXT:    store i64 [[ADD30]], i64* [[DOTOMP_IV]], align 8
+// CHECK4-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]])
 // CHECK4-NEXT:    call void @__kmpc_dispatch_fini_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK4-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK4:       omp.inner.for.end:
@@ -2200,7 +2378,9 @@
 // CHECK4:       omp.dispatch.end:
 // CHECK4-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK4:       omp.precond.end:
-// CHECK4-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK4-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK4-IRBUILDER-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK4-NORMAL-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
 // CHECK4-NEXT:    ret void
 //
 //
@@ -2221,7 +2401,7 @@
 // CHECK4-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[I:%.*]] = alloca i8, align 1
 // CHECK4-NEXT:    [[X2:%.*]] = alloca i32, align 4
-// CHECK4-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK4-NORMAL-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK4-NEXT:    store float* [[A]], float** [[A_ADDR]], align 8
 // CHECK4-NEXT:    store float* [[B]], float** [[B_ADDR]], align 8
 // CHECK4-NEXT:    store float* [[C]], float** [[C_ADDR]], align 8
@@ -2231,9 +2411,11 @@
 // CHECK4-NEXT:    store i32 199, i32* [[DOTOMP_UB]], align 4
 // CHECK4-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
 // CHECK4-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// CHECK4-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]])
 // CHECK4-NEXT:    call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 69, i32 0, i32 199, i32 1, i32 1)
 // CHECK4-NEXT:    br label [[OMP_DISPATCH_COND:%.*]]
 // CHECK4:       omp.dispatch.cond:
+// CHECK4-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
 // CHECK4-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
 // CHECK4-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
 // CHECK4-NEXT:    br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -2261,6 +2443,7 @@
 // CHECK4-NEXT:    [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1
 // CHECK4-NEXT:    [[ADD6:%.*]] = add nsw i32 -10, [[MUL5]]
 // CHECK4-NEXT:    store i32 [[ADD6]], i32* [[X2]], align 4
+// CHECK4-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK4-NEXT:    call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK4-NEXT:    [[TMP8:%.*]] = load float*, float** [[B_ADDR]], align 8
 // CHECK4-NEXT:    [[TMP9:%.*]] = load i8, i8* [[I]], align 1
@@ -2292,6 +2475,7 @@
 // CHECK4-NEXT:    [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
 // CHECK4-NEXT:    [[ADD15:%.*]] = add nsw i32 [[TMP19]], 1
 // CHECK4-NEXT:    store i32 [[ADD15]], i32* [[DOTOMP_IV]], align 4
+// CHECK4-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
 // CHECK4-NEXT:    call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK4-NEXT:    br label [[OMP_INNER_FOR_COND]]
 // CHECK4:       omp.inner.for.end:
@@ -2299,7 +2483,9 @@
 // CHECK4:       omp.dispatch.inc:
 // CHECK4-NEXT:    br label [[OMP_DISPATCH_COND]]
 // CHECK4:       omp.dispatch.end:
-// CHECK4-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK4-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK4-IRBUILDER-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK4-NORMAL-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
 // CHECK4-NEXT:    ret void
 //
 //
@@ -2326,7 +2512,7 @@
 // CHECK4-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[I28:%.*]] = alloca i32, align 4
-// CHECK4-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK4-NORMAL-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK4-NEXT:    store i32 [[LOW]], i32* [[LOW_ADDR]], align 4
 // CHECK4-NEXT:    store i32 [[UP]], i32* [[UP_ADDR]], align 4
 // CHECK4-NEXT:    [[TMP1:%.*]] = load i32, i32* [[LOW_ADDR]], align 4
@@ -2366,7 +2552,15 @@
 // CHECK4-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64
 // CHECK4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
 // CHECK4-NEXT:    store float 0.000000e+00, float* [[ARRAYIDX]], align 4, !llvm.access.group !3
-// CHECK4-NEXT:    call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3
+// CHECK4-NORMAL-NEXT:    call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3
+// TODO: To be fixed after IRBuilder of ordered directive with simd clause is implemented with the option -fopenmp-enable-irbuilder enabled.
+// CHECK4-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK4-IRBUILDER-NEXT:    call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+// CHECK4-IRBUILDER-NEXT:    [[TMP1_IRBUILER:%.*]] = load i32, i32* [[I5]], align 4
+// CHECK4-IRBUILDER-NEXT:    [[IDXPROM_IRBUILER:%.*]] = sext i32 [[TMP1_IRBUILER]] to i64
+// CHECK4-IRBUILDER-NEXT:    [[ARRAYIDX_IRBUILER:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM_IRBUILER]]
+// CHECK4-IRBUILDER-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX_IRBUILER]], align 4
+// CHECK4-IRBUILDER-NEXT:    call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK4-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK4:       omp.body.continue:
 // CHECK4-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
@@ -2413,9 +2607,11 @@
 // CHECK4-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
 // CHECK4-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
 // CHECK4-NEXT:    [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_20]], align 4
+// CHECK4-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]])
 // CHECK4-NEXT:    call void @__kmpc_dispatch_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 [[TMP25]], i32 1, i32 1)
 // CHECK4-NEXT:    br label [[OMP_DISPATCH_COND:%.*]]
 // CHECK4:       omp.dispatch.cond:
+// CHECK4-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]])
 // CHECK4-NEXT:    [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
 // CHECK4-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0
 // CHECK4-NEXT:    br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -2423,13 +2619,15 @@
 // CHECK4-NEXT:    [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
 // CHECK4-NEXT:    store i32 [[TMP27]], i32* [[DOTOMP_IV16]], align 4
 // CHECK4-NEXT:    br label [[OMP_INNER_FOR_COND29:%.*]]
-// CHECK4:       omp.inner.for.cond29:
+// CHECK4-IRBUILDER:       omp.inner.for.cond33:
+// CHECK4-NORMAL:       omp.inner.for.cond29:
 // CHECK4-NEXT:    [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
 // CHECK4-NEXT:    [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7
 // CHECK4-NEXT:    [[ADD30:%.*]] = add i32 [[TMP29]], 1
 // CHECK4-NEXT:    [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]]
 // CHECK4-NEXT:    br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]]
-// CHECK4:       omp.inner.for.body32:
+// CHECK4-IRBUILDER:       omp.inner.for.body36:
+// CHECK4-NORMAL:       omp.inner.for.body32:
 // CHECK4-NEXT:    [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7
 // CHECK4-NEXT:    [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
 // CHECK4-NEXT:    [[MUL33:%.*]] = mul i32 [[TMP31]], 1
@@ -2439,17 +2637,29 @@
 // CHECK4-NEXT:    [[IDXPROM35:%.*]] = sext i32 [[TMP32]] to i64
 // CHECK4-NEXT:    [[ARRAYIDX36:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM35]]
 // CHECK4-NEXT:    store float 0.000000e+00, float* [[ARRAYIDX36]], align 4, !llvm.access.group !7
-// CHECK4-NEXT:    call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7
+// CHECK4-NORMAL-NEXT:    call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7
+// TODO: To be fixed after IRBuilder of ordered directive with simd clause is implemented with the option -fopenmp-enable-irbuilder enabled.
+// CHECK4-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+// CHECK4-IRBUILDER-NEXT:    call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+// CHECK4-IRBUILDER-NEXT:    [[TMP1_IRBUILER:%.*]] = load i32, i32* [[I28]], align 4
+// CHECK4-IRBUILDER-NEXT:    [[IDXPROM_IRBUILER:%.*]] = sext i32 [[TMP1_IRBUILER]] to i64
+// CHECK4-IRBUILDER-NEXT:    [[ARRAYIDX_IRBUILER:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM_IRBUILER]]
+// CHECK4-IRBUILDER-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX_IRBUILER]], align 4
+// CHECK4-IRBUILDER-NEXT:    call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 // CHECK4-NEXT:    br label [[OMP_BODY_CONTINUE37:%.*]]
-// CHECK4:       omp.body.continue37:
+// CHECK4-IRBUILDER:       omp.body.continue44:
+// CHECK4-NORMAL:       omp.body.continue37:
 // CHECK4-NEXT:    br label [[OMP_INNER_FOR_INC38:%.*]]
-// CHECK4:       omp.inner.for.inc38:
+// CHECK4-IRBUILDER:       omp.inner.for.inc45:
+// CHECK4-NORMAL:       omp.inner.for.inc38:
 // CHECK4-NEXT:    [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
 // CHECK4-NEXT:    [[ADD39:%.*]] = add i32 [[TMP33]], 1
 // CHECK4-NEXT:    store i32 [[ADD39]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
+// CHECK4-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]])
 // CHECK4-NEXT:    call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group !7
 // CHECK4-NEXT:    br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]]
-// CHECK4:       omp.inner.for.end40:
+// CHECK4-IRBUILDER:       omp.inner.for.end48:
+// CHECK4-NORMAL:       omp.inner.for.end40:
 // CHECK4-NEXT:    br label [[OMP_DISPATCH_INC:%.*]]
 // CHECK4:       omp.dispatch.inc:
 // CHECK4-NEXT:    br label [[OMP_DISPATCH_COND]]
@@ -2472,34 +2682,36 @@
 // CHECK4:       .omp.final.done:
 // CHECK4-NEXT:    br label [[OMP_PRECOND_END]]
 // CHECK4:       omp.precond.end:
-// CHECK4-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK4-IRBUILDER-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK4-IRBUILDER-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK4-NORMAL-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
 // CHECK4-NEXT:    ret void
 //
 //
-// CHECK4-LABEL: define {{[^@]+}}@__captured_stmt
-// CHECK4-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3:[0-9]+]] {
-// CHECK4-NEXT:  entry:
-// CHECK4-NEXT:    [[I_ADDR:%.*]] = alloca i32*, align 8
-// CHECK4-NEXT:    store i32* [[I]], i32** [[I_ADDR]], align 8
-// CHECK4-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8
-// CHECK4-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-// CHECK4-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
-// CHECK4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
-// CHECK4-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
-// CHECK4-NEXT:    ret void
-//
-//
-// CHECK4-LABEL: define {{[^@]+}}@__captured_stmt.1
-// CHECK4-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] {
-// CHECK4-NEXT:  entry:
-// CHECK4-NEXT:    [[I_ADDR:%.*]] = alloca i32*, align 8
-// CHECK4-NEXT:    store i32* [[I]], i32** [[I_ADDR]], align 8
-// CHECK4-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8
-// CHECK4-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-// CHECK4-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
-// CHECK4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
-// CHECK4-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
-// CHECK4-NEXT:    ret void
+// CHECK4-NORMAL-LABEL: define {{[^@]+}}@__captured_stmt
+// CHECK4-NORMAL-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK4-NORMAL-NEXT:  entry:
+// CHECK4-NORMAL-NEXT:    [[I_ADDR:%.*]] = alloca i32*, align 8
+// CHECK4-NORMAL-NEXT:    store i32* [[I]], i32** [[I_ADDR]], align 8
+// CHECK4-NORMAL-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8
+// CHECK4-NORMAL-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+// CHECK4-NORMAL-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
+// CHECK4-NORMAL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
+// CHECK4-NORMAL-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
+// CHECK4-NORMAL-NEXT:    ret void
+//
+//
+// CHECK4-NORMAL-LABEL: define {{[^@]+}}@__captured_stmt.1
+// CHECK4-NORMAL-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] {
+// CHECK4-NORMAL-NEXT:  entry:
+// CHECK4-NORMAL-NEXT:    [[I_ADDR:%.*]] = alloca i32*, align 8
+// CHECK4-NORMAL-NEXT:    store i32* [[I]], i32** [[I_ADDR]], align 8
+// CHECK4-NORMAL-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8
+// CHECK4-NORMAL-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+// CHECK4-NORMAL-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
+// CHECK4-NORMAL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
+// CHECK4-NORMAL-NEXT:    store float 1.000000e+00, float* [[ARRAYIDX]], align 4
+// CHECK4-NORMAL-NEXT:    ret void
 //
 //
 // CHECK5-LABEL: define {{[^@]+}}@_Z18static_not_chunkedPfS_S_S_
Index: clang/lib/Sema/SemaOpenMP.cpp
===================================================================
--- clang/lib/Sema/SemaOpenMP.cpp
+++ clang/lib/Sema/SemaOpenMP.cpp
@@ -5320,8 +5320,9 @@
 
       if (Rel == BO_LE || Rel == BO_GE) {
         // Add one to the range if the relational operator is inclusive.
-        Range =
-            AssertSuccess(Actions.BuildUnaryOp(nullptr, {}, UO_PreInc, Range));
+        Range = AssertSuccess(Actions.BuildBinOp(
+            nullptr, {}, BO_Add, Range,
+            Actions.ActOnIntegerConstant(SourceLocation(), 1).get()));
       }
 
       // Divide by the absolute step amount.
Index: clang/lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -5314,6 +5314,33 @@
 }
 
 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
+  if (CGM.getLangOpts().OpenMPIRBuilder) {
+    llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
+    using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+
+    // TODO: The ordered directive with depend or simd clause.
+    const CapturedStmt *CS = S.getInnermostCapturedStmt();
+    const Stmt *OrderedRegionBodyStmt = CS->getCapturedStmt();
+
+    auto FiniCB = [this](InsertPointTy IP) {
+      OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
+    };
+
+    auto BodyGenCB = [OrderedRegionBodyStmt, this](InsertPointTy AllocaIP,
+                                                   InsertPointTy CodeGenIP,
+                                                   llvm::BasicBlock &FiniBB) {
+      OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
+      OMPBuilderCBHelpers::EmitOMPRegionBody(*this, OrderedRegionBodyStmt,
+                                             CodeGenIP, FiniBB);
+    };
+
+    LexicalScope Scope(*this, S.getSourceRange());
+    EmitStopPoint(&S);
+    Builder.restoreIP(OMPBuilder.createOrdered(Builder, BodyGenCB, FiniCB));
+
+    return;
+  }
+
   if (S.hasClausesOfKind<OMPDependClause>()) {
     assert(!S.hasAssociatedStmt() &&
            "No associated statement must be in ordered depend construct.");
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to