peixin updated this revision to Diff 364389.
peixin added a comment.
@Meinersbur Thanks for the review.
Removed the typo fixes and gave the alloca register one name.
Also found some typos of invalid case style for variable 'cur' in
llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp and will fix them when pushing
this patch to main branch.
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D107430/new/
https://reviews.llvm.org/D107430
Files:
clang/lib/CodeGen/CGStmtOpenMP.cpp
clang/lib/Sema/SemaOpenMP.cpp
clang/test/OpenMP/ordered_codegen.cpp
llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
Index: llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
===================================================================
--- llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -2113,6 +2113,77 @@
EXPECT_EQ(CriticalEndCI->getArgOperand(2)->getType(), CriticalNamePtrTy);
}
+TEST_F(OpenMPIRBuilderTest, OrderedDirective) {
+ using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+ OpenMPIRBuilder OMPBuilder(*M);
+ OMPBuilder.initialize();
+ F->setName("func");
+ IRBuilder<> Builder(BB);
+
+ OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
+
+ AllocaInst *PrivAI =
+ Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst");
+
+ BasicBlock *EntryBB = nullptr;
+
+ auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+ BasicBlock &FiniBB) {
+ EntryBB = FiniBB.getUniquePredecessor();
+
+ llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
+ llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
+ EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
+ EXPECT_EQ(EntryBB, CodeGenIPBB);
+
+ Builder.restoreIP(CodeGenIP);
+ Builder.CreateStore(F->arg_begin(), PrivAI);
+ Value *PrivLoad =
+ Builder.CreateLoad(PrivAI->getAllocatedType(), PrivAI, "local.use");
+ Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
+ };
+
+ auto FiniCB = [&](InsertPointTy IP) {
+ BasicBlock *IPBB = IP.getBlock();
+ EXPECT_NE(IPBB->end(), IP.getPoint());
+ };
+
+ Builder.restoreIP(OMPBuilder.createOrdered(Builder, BodyGenCB, FiniCB));
+
+ Value *EntryBBTI = EntryBB->getTerminator();
+ EXPECT_EQ(EntryBBTI, nullptr);
+
+ CallInst *OrderedEntryCI = nullptr;
+ for (auto &EI : *EntryBB) {
+ Instruction *Cur = &EI;
+ if (isa<CallInst>(Cur)) {
+ OrderedEntryCI = cast<CallInst>(Cur);
+ if (OrderedEntryCI->getCalledFunction()->getName() == "__kmpc_ordered")
+ break;
+ OrderedEntryCI = nullptr;
+ }
+ }
+ EXPECT_NE(OrderedEntryCI, nullptr);
+ EXPECT_EQ(OrderedEntryCI->getNumArgOperands(), 2U);
+ EXPECT_EQ(OrderedEntryCI->getCalledFunction()->getName(), "__kmpc_ordered");
+ EXPECT_TRUE(isa<GlobalVariable>(OrderedEntryCI->getArgOperand(0)));
+
+ CallInst *OrderedEndCI = nullptr;
+ for (auto &FI : *EntryBB) {
+ Instruction *Cur = &FI;
+ if (isa<CallInst>(Cur)) {
+ OrderedEndCI = cast<CallInst>(Cur);
+ if (OrderedEndCI->getCalledFunction()->getName() == "__kmpc_end_ordered")
+ break;
+ OrderedEndCI = nullptr;
+ }
+ }
+ EXPECT_NE(OrderedEndCI, nullptr);
+ EXPECT_EQ(OrderedEndCI->getNumArgOperands(), 2U);
+ EXPECT_TRUE(isa<GlobalVariable>(OrderedEndCI->getArgOperand(0)));
+ EXPECT_EQ(OrderedEndCI->getArgOperand(1), OrderedEntryCI->getArgOperand(1));
+}
+
TEST_F(OpenMPIRBuilderTest, CopyinBlocks) {
OpenMPIRBuilder OMPBuilder(*M);
OMPBuilder.initialize();
Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
===================================================================
--- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -1960,6 +1960,30 @@
/*Conditional*/ false, /*hasFinalize*/ true);
}
+OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::createOrdered(const LocationDescription &Loc,
+ BodyGenCallbackTy BodyGenCB,
+ FinalizeCallbackTy FiniCB) {
+ if (!updateToLocation(Loc))
+ return Loc.IP;
+
+ Directive OMPD = Directive::OMPD_ordered;
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *Ident = getOrCreateIdent(SrcLocStr);
+ Value *ThreadId = getOrCreateThreadID(Ident);
+ Value *Args[] = {Ident, ThreadId};
+
+ Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_ordered);
+ Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
+
+ Function *ExitRTLFn =
+ getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_ordered);
+ Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
+
+ return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
+ /*Conditional*/ false, /*hasFinalize*/ true);
+}
+
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion(
Directive OMPD, Instruction *EntryCall, Instruction *ExitCall,
BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional,
Index: llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
===================================================================
--- llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -724,6 +724,17 @@
FinalizeCallbackTy FiniCB,
StringRef CriticalName, Value *HintInst);
+ /// Generator for '#omp ordered'
+ ///
+ /// \param Loc The insert and source location description.
+ /// \param BodyGenCB Callback that will generate the region code.
+ /// \param FiniCB Callback to finalize variable copies.
+ ///
+ /// \returns The insertion position *after* the ordered.
+ InsertPointTy createOrdered(const LocationDescription &Loc,
+ BodyGenCallbackTy BodyGenCB,
+ FinalizeCallbackTy FiniCB);
+
/// Generator for '#omp sections'
///
/// \param Loc The insert and source location description.
Index: clang/test/OpenMP/ordered_codegen.cpp
===================================================================
--- clang/test/OpenMP/ordered_codegen.cpp
+++ clang/test/OpenMP/ordered_codegen.cpp
@@ -1,11 +1,19 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix=CHECK1
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=CHECK1,CHECK1-NORMAL
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK2
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK2,CHECK2-NORMAL
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -fopenmp-version=45 -o - | FileCheck %s --check-prefix=CHECK3
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=CHECK1,CHECK1-IRBUILDER
+// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK2,CHECK2-IRBUILDER
+
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -fopenmp-version=45 -o - | FileCheck %s --check-prefixes=CHECK3,CHECK3-NORMAL
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -fopenmp-version=45 -o %t %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK4
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK4,CHECK4-NORMAL
+
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -fopenmp-version=45 -o - | FileCheck %s --check-prefixes=CHECK3,CHECK3-IRBUILDER
+// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -fopenmp-version=45 -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK4,CHECK4-IRBUILDER
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix=CHECK5
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
@@ -128,7 +136,7 @@
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+// CHECK1-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8
// CHECK1-NEXT: store float* [[B]], float** [[B_ADDR]], align 8
// CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8
@@ -137,9 +145,12 @@
// CHECK1-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4
// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
+// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]])
+// CHECK1-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
+// CHECK1-NORMAL-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK1: omp.dispatch.cond:
+// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]])
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -157,6 +168,7 @@
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP5]], 7
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]]
// CHECK1-NEXT: store i32 [[SUB]], i32* [[I]], align 4
+// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[I]], align 4
@@ -188,6 +200,7 @@
// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1
// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
+// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]])
// CHECK1-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
@@ -195,7 +208,9 @@
// CHECK1: omp.dispatch.inc:
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK1: omp.dispatch.end:
-// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
+// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]])
+// CHECK1-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
// CHECK1-NEXT: ret void
//
//
@@ -213,7 +228,7 @@
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8
// CHECK1-NEXT: store float* [[B]], float** [[B_ADDR]], align 8
// CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8
@@ -222,9 +237,11 @@
// CHECK1-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8
// CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]])
// CHECK1-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741891, i64 0, i64 16908287, i64 1, i64 1)
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK1: omp.dispatch.cond:
+// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]])
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]])
// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -243,6 +260,7 @@
// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP5]], 127
// CHECK1-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]]
// CHECK1-NEXT: store i64 [[ADD1]], i64* [[I]], align 8
+// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[I]], align 8
@@ -270,6 +288,7 @@
// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
// CHECK1-NEXT: [[ADD7:%.*]] = add i64 [[TMP17]], 1
// CHECK1-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8
+// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]])
// CHECK1-NEXT: call void @__kmpc_dispatch_fini_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
@@ -277,7 +296,9 @@
// CHECK1: omp.dispatch.inc:
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK1: omp.dispatch.end:
-// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK1-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK1-NEXT: ret void
//
//
@@ -303,7 +324,7 @@
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I8:%.*]] = alloca i8, align 1
// CHECK1-NEXT: [[X9:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8
// CHECK1-NEXT: store float* [[B]], float** [[B_ADDR]], align 8
// CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8
@@ -336,9 +357,11 @@
// CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8
+// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]])
// CHECK1-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741894, i64 0, i64 [[TMP6]], i64 1, i64 1)
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK1: omp.dispatch.cond:
+// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]])
// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]])
// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0
// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -369,6 +392,7 @@
// CHECK1-NEXT: [[SUB20:%.*]] = sub nsw i64 11, [[MUL19]]
// CHECK1-NEXT: [[CONV21:%.*]] = trunc i64 [[SUB20]] to i32
// CHECK1-NEXT: store i32 [[CONV21]], i32* [[X9]], align 4
+// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: [[TMP15:%.*]] = load float*, float** [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP16:%.*]] = load i8, i8* [[I8]], align 1
@@ -400,6 +424,7 @@
// CHECK1-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i64 [[TMP26]], 1
// CHECK1-NEXT: store i64 [[ADD30]], i64* [[DOTOMP_IV]], align 8
+// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]])
// CHECK1-NEXT: call void @__kmpc_dispatch_fini_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
@@ -409,7 +434,9 @@
// CHECK1: omp.dispatch.end:
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
// CHECK1: omp.precond.end:
-// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK1-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK1-NEXT: ret void
//
//
@@ -430,7 +457,7 @@
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I:%.*]] = alloca i8, align 1
// CHECK1-NEXT: [[X2:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: store float* [[A]], float** [[A_ADDR]], align 8
// CHECK1-NEXT: store float* [[B]], float** [[B_ADDR]], align 8
// CHECK1-NEXT: store float* [[C]], float** [[C_ADDR]], align 8
@@ -440,9 +467,11 @@
// CHECK1-NEXT: store i32 199, i32* [[DOTOMP_UB]], align 4
// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]])
// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741893, i32 0, i32 199, i32 1, i32 1)
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK1: omp.dispatch.cond:
+// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -470,6 +499,7 @@
// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 -10, [[MUL5]]
// CHECK1-NEXT: store i32 [[ADD6]], i32* [[X2]], align 4
+// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: [[TMP8:%.*]] = load float*, float** [[B_ADDR]], align 8
// CHECK1-NEXT: [[TMP9:%.*]] = load i8, i8* [[I]], align 1
@@ -501,6 +531,7 @@
// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP19]], 1
// CHECK1-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_IV]], align 4
+// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
// CHECK1-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
@@ -508,7 +539,9 @@
// CHECK1: omp.dispatch.inc:
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK1: omp.dispatch.end:
-// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK1-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK1-NEXT: ret void
//
//
@@ -535,7 +568,7 @@
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[I28:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: store i32 [[LOW]], i32* [[LOW_ADDR]], align 4
// CHECK1-NEXT: store i32 [[UP]], i32* [[UP_ADDR]], align 4
// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[LOW_ADDR]], align 4
@@ -575,7 +608,15 @@
// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
// CHECK1-NEXT: store float 0.000000e+00, float* [[ARRAYIDX]], align 4, !llvm.access.group !3
-// CHECK1-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3
+// CHECK1-NORMAL-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3
+// TODO: To be fixed after IRBuilder of ordered directive with simd clause is implemented with the option -fopenmp-enable-irbuilder enabled.
+// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-IRBUILDER-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+// CHECK1-IRBUILDER-NEXT: [[TMP1_IRBUILER:%.*]] = load i32, i32* [[I5]], align 4
+// CHECK1-IRBUILDER-NEXT: [[IDXPROM_IRBUILER:%.*]] = sext i32 [[TMP1_IRBUILER]] to i64
+// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX_IRBUILER:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM_IRBUILER]]
+// CHECK1-IRBUILDER-NEXT: store float 1.000000e+00, float* [[ARRAYIDX_IRBUILER]], align 4
+// CHECK1-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
@@ -622,9 +663,11 @@
// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_20]], align 4
+// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]])
// CHECK1-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 [[TMP25]], i32 1, i32 1)
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK1: omp.dispatch.cond:
+// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]])
// CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0
// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -632,13 +675,15 @@
// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// CHECK1-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV16]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND29:%.*]]
-// CHECK1: omp.inner.for.cond29:
+// CHECK1-IRBUILDER: omp.inner.for.cond33:
+// CHECK1-NORMAL: omp.inner.for.cond29:
// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7
// CHECK1-NEXT: [[ADD30:%.*]] = add i32 [[TMP29]], 1
// CHECK1-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]]
// CHECK1-NEXT: br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]]
-// CHECK1: omp.inner.for.body32:
+// CHECK1-IRBUILDER: omp.inner.for.body36:
+// CHECK1-NORMAL: omp.inner.for.body32:
// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7
// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
// CHECK1-NEXT: [[MUL33:%.*]] = mul i32 [[TMP31]], 1
@@ -648,17 +693,29 @@
// CHECK1-NEXT: [[IDXPROM35:%.*]] = sext i32 [[TMP32]] to i64
// CHECK1-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM35]]
// CHECK1-NEXT: store float 0.000000e+00, float* [[ARRAYIDX36]], align 4, !llvm.access.group !7
-// CHECK1-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7
+// CHECK1-NORMAL-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7
+// TODO: To be fixed after IRBuilder of ordered directive with simd clause is implemented with the option -fopenmp-enable-irbuilder enabled.
+// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-IRBUILDER-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+// CHECK1-IRBUILDER-NEXT: [[TMP1_IRBUILER:%.*]] = load i32, i32* [[I28]], align 4
+// CHECK1-IRBUILDER-NEXT: [[IDXPROM_IRBUILER:%.*]] = sext i32 [[TMP1_IRBUILER]] to i64
+// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX_IRBUILER:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM_IRBUILER]]
+// CHECK1-IRBUILDER-NEXT: store float 1.000000e+00, float* [[ARRAYIDX_IRBUILER]], align 4
+// CHECK1-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE37:%.*]]
-// CHECK1: omp.body.continue37:
+// CHECK1-IRBUILDER: omp.body.continue44:
+// CHECK1-NORMAL: omp.body.continue37:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC38:%.*]]
-// CHECK1: omp.inner.for.inc38:
+// CHECK1-IRBUILDER: omp.inner.for.inc45:
+// CHECK1-NORMAL: omp.inner.for.inc38:
// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
// CHECK1-NEXT: [[ADD39:%.*]] = add i32 [[TMP33]], 1
// CHECK1-NEXT: store i32 [[ADD39]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
+// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]])
// CHECK1-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group !7
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]]
-// CHECK1: omp.inner.for.end40:
+// CHECK1-IRBUILDER: omp.inner.for.end48:
+// CHECK1-NORMAL: omp.inner.for.end40:
// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK1: omp.dispatch.inc:
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]]
@@ -681,34 +738,36 @@
// CHECK1: .omp.final.done:
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
// CHECK1: omp.precond.end:
-// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK1-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK1-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK1-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK1-NEXT: ret void
//
//
-// CHECK1-LABEL: define {{[^@]+}}@__captured_stmt
-// CHECK1-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3:[0-9]+]] {
-// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8
-// CHECK1-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
-// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
-// CHECK1-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4
-// CHECK1-NEXT: ret void
-//
-//
-// CHECK1-LABEL: define {{[^@]+}}@__captured_stmt.1
-// CHECK1-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] {
-// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8
-// CHECK1-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
-// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
-// CHECK1-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4
-// CHECK1-NEXT: ret void
+// CHECK1-NORMAL-LABEL: define {{[^@]+}}@__captured_stmt
+// CHECK1-NORMAL-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK1-NORMAL-NEXT: entry:
+// CHECK1-NORMAL-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8
+// CHECK1-NORMAL-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8
+// CHECK1-NORMAL-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8
+// CHECK1-NORMAL-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+// CHECK1-NORMAL-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
+// CHECK1-NORMAL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
+// CHECK1-NORMAL-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4
+// CHECK1-NORMAL-NEXT: ret void
+//
+//
+// CHECK1-NORMAL-LABEL: define {{[^@]+}}@__captured_stmt.1
+// CHECK1-NORMAL-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] {
+// CHECK1-NORMAL-NEXT: entry:
+// CHECK1-NORMAL-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8
+// CHECK1-NORMAL-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8
+// CHECK1-NORMAL-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8
+// CHECK1-NORMAL-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+// CHECK1-NORMAL-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
+// CHECK1-NORMAL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
+// CHECK1-NORMAL-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4
+// CHECK1-NORMAL-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@_Z18static_not_chunkedPfS_S_S_
@@ -725,7 +784,7 @@
// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+// CHECK2-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK2-NEXT: store float* [[A]], float** [[A_ADDR]], align 8
// CHECK2-NEXT: store float* [[B]], float** [[B_ADDR]], align 8
// CHECK2-NEXT: store float* [[C]], float** [[C_ADDR]], align 8
@@ -734,9 +793,12 @@
// CHECK2-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4
// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
-// CHECK2-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
+// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]])
+// CHECK2-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
+// CHECK2-NORMAL-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
// CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK2: omp.dispatch.cond:
+// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]])
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -754,6 +816,7 @@
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP5]], 7
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]]
// CHECK2-NEXT: store i32 [[SUB]], i32* [[I]], align 4
+// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8
// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[I]], align 4
@@ -785,6 +848,7 @@
// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1
// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
+// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]])
// CHECK2-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
@@ -792,7 +856,9 @@
// CHECK2: omp.dispatch.inc:
// CHECK2-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK2: omp.dispatch.end:
-// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
+// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]])
+// CHECK2-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
// CHECK2-NEXT: ret void
//
//
@@ -810,7 +876,7 @@
// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: store float* [[A]], float** [[A_ADDR]], align 8
// CHECK2-NEXT: store float* [[B]], float** [[B_ADDR]], align 8
// CHECK2-NEXT: store float* [[C]], float** [[C_ADDR]], align 8
@@ -819,9 +885,11 @@
// CHECK2-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8
// CHECK2-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]])
// CHECK2-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741891, i64 0, i64 16908287, i64 1, i64 1)
// CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK2: omp.dispatch.cond:
+// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]])
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]])
// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -840,6 +908,7 @@
// CHECK2-NEXT: [[MUL:%.*]] = mul i64 [[TMP5]], 127
// CHECK2-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]]
// CHECK2-NEXT: store i64 [[ADD1]], i64* [[I]], align 8
+// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8
// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[I]], align 8
@@ -867,6 +936,7 @@
// CHECK2-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
// CHECK2-NEXT: [[ADD7:%.*]] = add i64 [[TMP17]], 1
// CHECK2-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8
+// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]])
// CHECK2-NEXT: call void @__kmpc_dispatch_fini_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
@@ -874,7 +944,9 @@
// CHECK2: omp.dispatch.inc:
// CHECK2-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK2: omp.dispatch.end:
-// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK2-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK2-NEXT: ret void
//
//
@@ -900,7 +972,7 @@
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I8:%.*]] = alloca i8, align 1
// CHECK2-NEXT: [[X9:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: store float* [[A]], float** [[A_ADDR]], align 8
// CHECK2-NEXT: store float* [[B]], float** [[B_ADDR]], align 8
// CHECK2-NEXT: store float* [[C]], float** [[C_ADDR]], align 8
@@ -933,9 +1005,11 @@
// CHECK2-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8
+// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]])
// CHECK2-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741894, i64 0, i64 [[TMP6]], i64 1, i64 1)
// CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK2: omp.dispatch.cond:
+// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]])
// CHECK2-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]])
// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0
// CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -966,6 +1040,7 @@
// CHECK2-NEXT: [[SUB20:%.*]] = sub nsw i64 11, [[MUL19]]
// CHECK2-NEXT: [[CONV21:%.*]] = trunc i64 [[SUB20]] to i32
// CHECK2-NEXT: store i32 [[CONV21]], i32* [[X9]], align 4
+// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: [[TMP15:%.*]] = load float*, float** [[B_ADDR]], align 8
// CHECK2-NEXT: [[TMP16:%.*]] = load i8, i8* [[I8]], align 1
@@ -997,6 +1072,7 @@
// CHECK2-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
// CHECK2-NEXT: [[ADD30:%.*]] = add nsw i64 [[TMP26]], 1
// CHECK2-NEXT: store i64 [[ADD30]], i64* [[DOTOMP_IV]], align 8
+// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]])
// CHECK2-NEXT: call void @__kmpc_dispatch_fini_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
@@ -1006,7 +1082,9 @@
// CHECK2: omp.dispatch.end:
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
// CHECK2: omp.precond.end:
-// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK2-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK2-NEXT: ret void
//
//
@@ -1027,7 +1105,7 @@
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I:%.*]] = alloca i8, align 1
// CHECK2-NEXT: [[X2:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: store float* [[A]], float** [[A_ADDR]], align 8
// CHECK2-NEXT: store float* [[B]], float** [[B_ADDR]], align 8
// CHECK2-NEXT: store float* [[C]], float** [[C_ADDR]], align 8
@@ -1037,9 +1115,11 @@
// CHECK2-NEXT: store i32 199, i32* [[DOTOMP_UB]], align 4
// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]])
// CHECK2-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 1073741893, i32 0, i32 199, i32 1, i32 1)
// CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK2: omp.dispatch.cond:
+// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -1067,6 +1147,7 @@
// CHECK2-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 -10, [[MUL5]]
// CHECK2-NEXT: store i32 [[ADD6]], i32* [[X2]], align 4
+// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: [[TMP8:%.*]] = load float*, float** [[B_ADDR]], align 8
// CHECK2-NEXT: [[TMP9:%.*]] = load i8, i8* [[I]], align 1
@@ -1098,6 +1179,7 @@
// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK2-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP19]], 1
// CHECK2-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_IV]], align 4
+// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
// CHECK2-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
@@ -1105,7 +1187,9 @@
// CHECK2: omp.dispatch.inc:
// CHECK2-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK2: omp.dispatch.end:
-// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK2-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK2-NEXT: ret void
//
//
@@ -1132,7 +1216,7 @@
// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[I28:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: store i32 [[LOW]], i32* [[LOW_ADDR]], align 4
// CHECK2-NEXT: store i32 [[UP]], i32* [[UP_ADDR]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[LOW_ADDR]], align 4
@@ -1172,7 +1256,15 @@
// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
// CHECK2-NEXT: store float 0.000000e+00, float* [[ARRAYIDX]], align 4, !llvm.access.group !3
-// CHECK2-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3
+// CHECK2-NORMAL-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3
+// TODO: To be fixed after IRBuilder of ordered directive with simd clause is implemented with the option -fopenmp-enable-irbuilder enabled.
+// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-IRBUILDER-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+// CHECK2-IRBUILDER-NEXT: [[TMP1_IRBUILER:%.*]] = load i32, i32* [[I5]], align 4
+// CHECK2-IRBUILDER-NEXT: [[IDXPROM_IRBUILER:%.*]] = sext i32 [[TMP1_IRBUILER]] to i64
+// CHECK2-IRBUILDER-NEXT: [[ARRAYIDX_IRBUILER:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM_IRBUILER]]
+// CHECK2-IRBUILDER-NEXT: store float 1.000000e+00, float* [[ARRAYIDX_IRBUILER]], align 4
+// CHECK2-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
@@ -1219,9 +1311,11 @@
// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_20]], align 4
+// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]])
// CHECK2-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 [[TMP25]], i32 1, i32 1)
// CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK2: omp.dispatch.cond:
+// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]])
// CHECK2-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0
// CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -1229,13 +1323,15 @@
// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// CHECK2-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV16]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND29:%.*]]
-// CHECK2: omp.inner.for.cond29:
+// CHECK2-IRBUILDER: omp.inner.for.cond33:
+// CHECK2-NORMAL: omp.inner.for.cond29:
// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7
// CHECK2-NEXT: [[ADD30:%.*]] = add i32 [[TMP29]], 1
// CHECK2-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]]
// CHECK2-NEXT: br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]]
-// CHECK2: omp.inner.for.body32:
+// CHECK2-IRBUILDER: omp.inner.for.body36:
+// CHECK2-NORMAL: omp.inner.for.body32:
// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7
// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
// CHECK2-NEXT: [[MUL33:%.*]] = mul i32 [[TMP31]], 1
@@ -1245,17 +1341,29 @@
// CHECK2-NEXT: [[IDXPROM35:%.*]] = sext i32 [[TMP32]] to i64
// CHECK2-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM35]]
// CHECK2-NEXT: store float 0.000000e+00, float* [[ARRAYIDX36]], align 4, !llvm.access.group !7
-// CHECK2-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7
+// CHECK2-NORMAL-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7
+// TODO: To be fixed after IRBuilder of ordered directive with simd clause is implemented with the option -fopenmp-enable-irbuilder enabled.
+// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+// CHECK2-IRBUILDER-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+// CHECK2-IRBUILDER-NEXT: [[TMP1_IRBUILER:%.*]] = load i32, i32* [[I28]], align 4
+// CHECK2-IRBUILDER-NEXT: [[IDXPROM_IRBUILER:%.*]] = sext i32 [[TMP1_IRBUILER]] to i64
+// CHECK2-IRBUILDER-NEXT: [[ARRAYIDX_IRBUILER:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM_IRBUILER]]
+// CHECK2-IRBUILDER-NEXT: store float 1.000000e+00, float* [[ARRAYIDX_IRBUILER]], align 4
+// CHECK2-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE37:%.*]]
-// CHECK2: omp.body.continue37:
+// CHECK2-IRBUILDER: omp.body.continue44:
+// CHECK2-NORMAL: omp.body.continue37:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC38:%.*]]
-// CHECK2: omp.inner.for.inc38:
+// CHECK2-IRBUILDER: omp.inner.for.inc45:
+// CHECK2-NORMAL: omp.inner.for.inc38:
// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
// CHECK2-NEXT: [[ADD39:%.*]] = add i32 [[TMP33]], 1
// CHECK2-NEXT: store i32 [[ADD39]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
+// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]])
// CHECK2-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group !7
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]]
-// CHECK2: omp.inner.for.end40:
+// CHECK2-IRBUILDER: omp.inner.for.end48:
+// CHECK2-NORMAL: omp.inner.for.end40:
// CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK2: omp.dispatch.inc:
// CHECK2-NEXT: br label [[OMP_DISPATCH_COND]]
@@ -1278,34 +1386,36 @@
// CHECK2: .omp.final.done:
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
// CHECK2: omp.precond.end:
-// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
-// CHECK2-NEXT: ret void
-//
-//
-// CHECK2-LABEL: define {{[^@]+}}@__captured_stmt
-// CHECK2-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3:[0-9]+]] {
-// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8
-// CHECK2-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
-// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
-// CHECK2-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4
+// CHECK2-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK2-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK2-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK2-NEXT: ret void
//
//
-// CHECK2-LABEL: define {{[^@]+}}@__captured_stmt.1
-// CHECK2-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] {
-// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8
-// CHECK2-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
-// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
-// CHECK2-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4
-// CHECK2-NEXT: ret void
+// CHECK2-NORMAL-LABEL: define {{[^@]+}}@__captured_stmt
+// CHECK2-NORMAL-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK2-NORMAL-NEXT: entry:
+// CHECK2-NORMAL-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8
+// CHECK2-NORMAL-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8
+// CHECK2-NORMAL-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8
+// CHECK2-NORMAL-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+// CHECK2-NORMAL-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
+// CHECK2-NORMAL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
+// CHECK2-NORMAL-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4
+// CHECK2-NORMAL-NEXT: ret void
+//
+//
+// CHECK2-NORMAL-LABEL: define {{[^@]+}}@__captured_stmt.1
+// CHECK2-NORMAL-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] {
+// CHECK2-NORMAL-NEXT: entry:
+// CHECK2-NORMAL-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8
+// CHECK2-NORMAL-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8
+// CHECK2-NORMAL-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8
+// CHECK2-NORMAL-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+// CHECK2-NORMAL-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
+// CHECK2-NORMAL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
+// CHECK2-NORMAL-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4
+// CHECK2-NORMAL-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@_Z18static_not_chunkedPfS_S_S_
@@ -1322,7 +1432,7 @@
// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+// CHECK3-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK3-NEXT: store float* [[A]], float** [[A_ADDR]], align 8
// CHECK3-NEXT: store float* [[B]], float** [[B_ADDR]], align 8
// CHECK3-NEXT: store float* [[C]], float** [[C_ADDR]], align 8
@@ -1331,9 +1441,12 @@
// CHECK3-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4
// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
-// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
+// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]])
+// CHECK3-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
+// CHECK3-NORMAL-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
// CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK3: omp.dispatch.cond:
+// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]])
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -1351,6 +1464,7 @@
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP5]], 7
// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]]
// CHECK3-NEXT: store i32 [[SUB]], i32* [[I]], align 4
+// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8
// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[I]], align 4
@@ -1382,6 +1496,7 @@
// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1
// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
+// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]])
// CHECK3-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
@@ -1389,7 +1504,9 @@
// CHECK3: omp.dispatch.inc:
// CHECK3-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK3: omp.dispatch.end:
-// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
+// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]])
+// CHECK3-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
// CHECK3-NEXT: ret void
//
//
@@ -1407,7 +1524,7 @@
// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[I:%.*]] = alloca i64, align 8
-// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT: store float* [[A]], float** [[A_ADDR]], align 8
// CHECK3-NEXT: store float* [[B]], float** [[B_ADDR]], align 8
// CHECK3-NEXT: store float* [[C]], float** [[C_ADDR]], align 8
@@ -1416,9 +1533,11 @@
// CHECK3-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8
// CHECK3-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8
// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]])
// CHECK3-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 67, i64 0, i64 16908287, i64 1, i64 1)
// CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK3: omp.dispatch.cond:
+// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]])
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]])
// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -1437,6 +1556,7 @@
// CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP5]], 127
// CHECK3-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]]
// CHECK3-NEXT: store i64 [[ADD1]], i64* [[I]], align 8
+// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8
// CHECK3-NEXT: [[TMP7:%.*]] = load i64, i64* [[I]], align 8
@@ -1464,6 +1584,7 @@
// CHECK3-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
// CHECK3-NEXT: [[ADD7:%.*]] = add i64 [[TMP17]], 1
// CHECK3-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8
+// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]])
// CHECK3-NEXT: call void @__kmpc_dispatch_fini_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
@@ -1471,7 +1592,9 @@
// CHECK3: omp.dispatch.inc:
// CHECK3-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK3: omp.dispatch.end:
-// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK3-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK3-NEXT: ret void
//
//
@@ -1497,7 +1620,7 @@
// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[I8:%.*]] = alloca i8, align 1
// CHECK3-NEXT: [[X9:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT: store float* [[A]], float** [[A_ADDR]], align 8
// CHECK3-NEXT: store float* [[B]], float** [[B_ADDR]], align 8
// CHECK3-NEXT: store float* [[C]], float** [[C_ADDR]], align 8
@@ -1530,9 +1653,11 @@
// CHECK3-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8
// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8
+// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]])
// CHECK3-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 70, i64 0, i64 [[TMP6]], i64 1, i64 1)
// CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK3: omp.dispatch.cond:
+// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]])
// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]])
// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0
// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -1563,6 +1688,7 @@
// CHECK3-NEXT: [[SUB20:%.*]] = sub nsw i64 11, [[MUL19]]
// CHECK3-NEXT: [[CONV21:%.*]] = trunc i64 [[SUB20]] to i32
// CHECK3-NEXT: store i32 [[CONV21]], i32* [[X9]], align 4
+// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: [[TMP15:%.*]] = load float*, float** [[B_ADDR]], align 8
// CHECK3-NEXT: [[TMP16:%.*]] = load i8, i8* [[I8]], align 1
@@ -1594,6 +1720,7 @@
// CHECK3-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
// CHECK3-NEXT: [[ADD30:%.*]] = add nsw i64 [[TMP26]], 1
// CHECK3-NEXT: store i64 [[ADD30]], i64* [[DOTOMP_IV]], align 8
+// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]])
// CHECK3-NEXT: call void @__kmpc_dispatch_fini_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
@@ -1603,7 +1730,9 @@
// CHECK3: omp.dispatch.end:
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
// CHECK3: omp.precond.end:
-// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK3-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK3-NEXT: ret void
//
//
@@ -1624,7 +1753,7 @@
// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[I:%.*]] = alloca i8, align 1
// CHECK3-NEXT: [[X2:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT: store float* [[A]], float** [[A_ADDR]], align 8
// CHECK3-NEXT: store float* [[B]], float** [[B_ADDR]], align 8
// CHECK3-NEXT: store float* [[C]], float** [[C_ADDR]], align 8
@@ -1634,9 +1763,11 @@
// CHECK3-NEXT: store i32 199, i32* [[DOTOMP_UB]], align 4
// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]])
// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 69, i32 0, i32 199, i32 1, i32 1)
// CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK3: omp.dispatch.cond:
+// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -1664,6 +1795,7 @@
// CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 -10, [[MUL5]]
// CHECK3-NEXT: store i32 [[ADD6]], i32* [[X2]], align 4
+// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: [[TMP8:%.*]] = load float*, float** [[B_ADDR]], align 8
// CHECK3-NEXT: [[TMP9:%.*]] = load i8, i8* [[I]], align 1
@@ -1695,6 +1827,7 @@
// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK3-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP19]], 1
// CHECK3-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_IV]], align 4
+// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
// CHECK3-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
@@ -1702,7 +1835,9 @@
// CHECK3: omp.dispatch.inc:
// CHECK3-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK3: omp.dispatch.end:
-// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK3-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK3-NEXT: ret void
//
//
@@ -1729,7 +1864,7 @@
// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[I28:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK3-NEXT: store i32 [[LOW]], i32* [[LOW_ADDR]], align 4
// CHECK3-NEXT: store i32 [[UP]], i32* [[UP_ADDR]], align 4
// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[LOW_ADDR]], align 4
@@ -1769,7 +1904,15 @@
// CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64
// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
// CHECK3-NEXT: store float 0.000000e+00, float* [[ARRAYIDX]], align 4, !llvm.access.group !3
-// CHECK3-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3
+// CHECK3-NORMAL-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3
+// TODO: To be fixed after IRBuilder of ordered directive with simd clause is implemented with the option -fopenmp-enable-irbuilder enabled.
+// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-IRBUILDER-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+// CHECK3-IRBUILDER-NEXT: [[TMP1_IRBUILER:%.*]] = load i32, i32* [[I5]], align 4
+// CHECK3-IRBUILDER-NEXT: [[IDXPROM_IRBUILER:%.*]] = sext i32 [[TMP1_IRBUILER]] to i64
+// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX_IRBUILER:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM_IRBUILER]]
+// CHECK3-IRBUILDER-NEXT: store float 1.000000e+00, float* [[ARRAYIDX_IRBUILER]], align 4
+// CHECK3-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
@@ -1816,9 +1959,11 @@
// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_20]], align 4
+// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]])
// CHECK3-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 [[TMP25]], i32 1, i32 1)
// CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK3: omp.dispatch.cond:
+// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]])
// CHECK3-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0
// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -1826,13 +1971,15 @@
// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// CHECK3-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV16]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND29:%.*]]
-// CHECK3: omp.inner.for.cond29:
+// CHECK3-IRBUILDER: omp.inner.for.cond33:
+// CHECK3-NORMAL: omp.inner.for.cond29:
// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7
// CHECK3-NEXT: [[ADD30:%.*]] = add i32 [[TMP29]], 1
// CHECK3-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]]
// CHECK3-NEXT: br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]]
-// CHECK3: omp.inner.for.body32:
+// CHECK3-IRBUILDER: omp.inner.for.body36:
+// CHECK3-NORMAL: omp.inner.for.body32:
// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7
// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
// CHECK3-NEXT: [[MUL33:%.*]] = mul i32 [[TMP31]], 1
@@ -1842,17 +1989,29 @@
// CHECK3-NEXT: [[IDXPROM35:%.*]] = sext i32 [[TMP32]] to i64
// CHECK3-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM35]]
// CHECK3-NEXT: store float 0.000000e+00, float* [[ARRAYIDX36]], align 4, !llvm.access.group !7
-// CHECK3-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7
+// CHECK3-NORMAL-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7
+// TODO: To be fixed after IRBuilder of ordered directive with simd clause is implemented with the option -fopenmp-enable-irbuilder enabled.
+// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-IRBUILDER-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+// CHECK3-IRBUILDER-NEXT: [[TMP1_IRBUILER:%.*]] = load i32, i32* [[I28]], align 4
+// CHECK3-IRBUILDER-NEXT: [[IDXPROM_IRBUILER:%.*]] = sext i32 [[TMP1_IRBUILER]] to i64
+// CHECK3-IRBUILDER-NEXT: [[ARRAYIDX_IRBUILER:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM_IRBUILER]]
+// CHECK3-IRBUILDER-NEXT: store float 1.000000e+00, float* [[ARRAYIDX_IRBUILER]], align 4
+// CHECK3-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE37:%.*]]
-// CHECK3: omp.body.continue37:
+// CHECK3-IRBUILDER: omp.body.continue44:
+// CHECK3-NORMAL: omp.body.continue37:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC38:%.*]]
-// CHECK3: omp.inner.for.inc38:
+// CHECK3-IRBUILDER: omp.inner.for.inc45:
+// CHECK3-NORMAL: omp.inner.for.inc38:
// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
// CHECK3-NEXT: [[ADD39:%.*]] = add i32 [[TMP33]], 1
// CHECK3-NEXT: store i32 [[ADD39]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
+// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]])
// CHECK3-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group !7
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]]
-// CHECK3: omp.inner.for.end40:
+// CHECK3-IRBUILDER: omp.inner.for.end48:
+// CHECK3-NORMAL: omp.inner.for.end40:
// CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK3: omp.dispatch.inc:
// CHECK3-NEXT: br label [[OMP_DISPATCH_COND]]
@@ -1875,34 +2034,36 @@
// CHECK3: .omp.final.done:
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
// CHECK3: omp.precond.end:
-// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK3-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK3-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK3-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK3-NEXT: ret void
//
//
-// CHECK3-LABEL: define {{[^@]+}}@__captured_stmt
-// CHECK3-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3:[0-9]+]] {
-// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8
-// CHECK3-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8
-// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8
-// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-// CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
-// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
-// CHECK3-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4
-// CHECK3-NEXT: ret void
-//
-//
-// CHECK3-LABEL: define {{[^@]+}}@__captured_stmt.1
-// CHECK3-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] {
-// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8
-// CHECK3-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8
-// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8
-// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-// CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
-// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
-// CHECK3-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4
-// CHECK3-NEXT: ret void
+// CHECK3-NORMAL-LABEL: define {{[^@]+}}@__captured_stmt
+// CHECK3-NORMAL-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK3-NORMAL-NEXT: entry:
+// CHECK3-NORMAL-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8
+// CHECK3-NORMAL-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8
+// CHECK3-NORMAL-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8
+// CHECK3-NORMAL-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+// CHECK3-NORMAL-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
+// CHECK3-NORMAL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
+// CHECK3-NORMAL-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4
+// CHECK3-NORMAL-NEXT: ret void
+//
+//
+// CHECK3-NORMAL-LABEL: define {{[^@]+}}@__captured_stmt.1
+// CHECK3-NORMAL-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] {
+// CHECK3-NORMAL-NEXT: entry:
+// CHECK3-NORMAL-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8
+// CHECK3-NORMAL-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8
+// CHECK3-NORMAL-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8
+// CHECK3-NORMAL-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+// CHECK3-NORMAL-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
+// CHECK3-NORMAL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
+// CHECK3-NORMAL-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4
+// CHECK3-NORMAL-NEXT: ret void
//
//
// CHECK4-LABEL: define {{[^@]+}}@_Z18static_not_chunkedPfS_S_S_
@@ -1919,7 +2080,7 @@
// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+// CHECK4-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK4-NEXT: store float* [[A]], float** [[A_ADDR]], align 8
// CHECK4-NEXT: store float* [[B]], float** [[B_ADDR]], align 8
// CHECK4-NEXT: store float* [[C]], float** [[C_ADDR]], align 8
@@ -1928,9 +2089,12 @@
// CHECK4-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4
// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
-// CHECK4-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
+// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]])
+// CHECK4-IRBUILDER-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
+// CHECK4-NORMAL-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 4571423, i32 1, i32 1)
// CHECK4-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK4: omp.dispatch.cond:
+// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]])
// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
// CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -1948,6 +2112,7 @@
// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP5]], 7
// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]]
// CHECK4-NEXT: store i32 [[SUB]], i32* [[I]], align 4
+// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8
// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[I]], align 4
@@ -1979,6 +2144,7 @@
// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1
// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4
+// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]])
// CHECK4-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK4: omp.inner.for.end:
@@ -1986,7 +2152,9 @@
// CHECK4: omp.dispatch.inc:
// CHECK4-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK4: omp.dispatch.end:
-// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
+// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP0]])
+// CHECK4-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP0]])
// CHECK4-NEXT: ret void
//
//
@@ -2004,7 +2172,7 @@
// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[I:%.*]] = alloca i64, align 8
-// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK4-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT: store float* [[A]], float** [[A_ADDR]], align 8
// CHECK4-NEXT: store float* [[B]], float** [[B_ADDR]], align 8
// CHECK4-NEXT: store float* [[C]], float** [[C_ADDR]], align 8
@@ -2013,9 +2181,11 @@
// CHECK4-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8
// CHECK4-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8
// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]])
// CHECK4-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 67, i64 0, i64 16908287, i64 1, i64 1)
// CHECK4-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK4: omp.dispatch.cond:
+// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]])
// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]])
// CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -2034,6 +2204,7 @@
// CHECK4-NEXT: [[MUL:%.*]] = mul i64 [[TMP5]], 127
// CHECK4-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]]
// CHECK4-NEXT: store i64 [[ADD1]], i64* [[I]], align 8
+// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: [[TMP6:%.*]] = load float*, float** [[B_ADDR]], align 8
// CHECK4-NEXT: [[TMP7:%.*]] = load i64, i64* [[I]], align 8
@@ -2061,6 +2232,7 @@
// CHECK4-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
// CHECK4-NEXT: [[ADD7:%.*]] = add i64 [[TMP17]], 1
// CHECK4-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8
+// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6]])
// CHECK4-NEXT: call void @__kmpc_dispatch_fini_8u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK4: omp.inner.for.end:
@@ -2068,7 +2240,9 @@
// CHECK4: omp.dispatch.inc:
// CHECK4-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK4: omp.dispatch.end:
-// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK4-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK4-NEXT: ret void
//
//
@@ -2094,7 +2268,7 @@
// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[I8:%.*]] = alloca i8, align 1
// CHECK4-NEXT: [[X9:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK4-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT: store float* [[A]], float** [[A_ADDR]], align 8
// CHECK4-NEXT: store float* [[B]], float** [[B_ADDR]], align 8
// CHECK4-NEXT: store float* [[C]], float** [[C_ADDR]], align 8
@@ -2127,9 +2301,11 @@
// CHECK4-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8
// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK4-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8
+// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8:[0-9]+]])
// CHECK4-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 70, i64 0, i64 [[TMP6]], i64 1, i64 1)
// CHECK4-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK4: omp.dispatch.cond:
+// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]])
// CHECK4-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]])
// CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0
// CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -2160,6 +2336,7 @@
// CHECK4-NEXT: [[SUB20:%.*]] = sub nsw i64 11, [[MUL19]]
// CHECK4-NEXT: [[CONV21:%.*]] = trunc i64 [[SUB20]] to i32
// CHECK4-NEXT: store i32 [[CONV21]], i32* [[X9]], align 4
+// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: [[TMP15:%.*]] = load float*, float** [[B_ADDR]], align 8
// CHECK4-NEXT: [[TMP16:%.*]] = load i8, i8* [[I8]], align 1
@@ -2191,6 +2368,7 @@
// CHECK4-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8
// CHECK4-NEXT: [[ADD30:%.*]] = add nsw i64 [[TMP26]], 1
// CHECK4-NEXT: store i64 [[ADD30]], i64* [[DOTOMP_IV]], align 8
+// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB8]])
// CHECK4-NEXT: call void @__kmpc_dispatch_fini_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK4: omp.inner.for.end:
@@ -2200,7 +2378,9 @@
// CHECK4: omp.dispatch.end:
// CHECK4-NEXT: br label [[OMP_PRECOND_END]]
// CHECK4: omp.precond.end:
-// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK4-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK4-NEXT: ret void
//
//
@@ -2221,7 +2401,7 @@
// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[I:%.*]] = alloca i8, align 1
// CHECK4-NEXT: [[X2:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK4-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT: store float* [[A]], float** [[A_ADDR]], align 8
// CHECK4-NEXT: store float* [[B]], float** [[B_ADDR]], align 8
// CHECK4-NEXT: store float* [[C]], float** [[C_ADDR]], align 8
@@ -2231,9 +2411,11 @@
// CHECK4-NEXT: store i32 199, i32* [[DOTOMP_UB]], align 4
// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10:[0-9]+]])
// CHECK4-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 69, i32 0, i32 199, i32 1, i32 1)
// CHECK4-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK4: omp.dispatch.cond:
+// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
// CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
// CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -2261,6 +2443,7 @@
// CHECK4-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 -10, [[MUL5]]
// CHECK4-NEXT: store i32 [[ADD6]], i32* [[X2]], align 4
+// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: [[TMP8:%.*]] = load float*, float** [[B_ADDR]], align 8
// CHECK4-NEXT: [[TMP9:%.*]] = load i8, i8* [[I]], align 1
@@ -2292,6 +2475,7 @@
// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4
// CHECK4-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP19]], 1
// CHECK4-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_IV]], align 4
+// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB10]])
// CHECK4-NEXT: call void @__kmpc_dispatch_fini_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK4: omp.inner.for.end:
@@ -2299,7 +2483,9 @@
// CHECK4: omp.dispatch.inc:
// CHECK4-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK4: omp.dispatch.end:
-// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK4-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK4-NEXT: ret void
//
//
@@ -2326,7 +2512,7 @@
// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[I28:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK4-NORMAL-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK4-NEXT: store i32 [[LOW]], i32* [[LOW_ADDR]], align 4
// CHECK4-NEXT: store i32 [[UP]], i32* [[UP_ADDR]], align 4
// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[LOW_ADDR]], align 4
@@ -2366,7 +2552,15 @@
// CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64
// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
// CHECK4-NEXT: store float 0.000000e+00, float* [[ARRAYIDX]], align 4, !llvm.access.group !3
-// CHECK4-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3
+// CHECK4-NORMAL-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3
+// TODO: To be fixed after IRBuilder of ordered directive with simd clause is implemented with the option -fopenmp-enable-irbuilder enabled.
+// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK4-IRBUILDER-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+// CHECK4-IRBUILDER-NEXT: [[TMP1_IRBUILER:%.*]] = load i32, i32* [[I5]], align 4
+// CHECK4-IRBUILDER-NEXT: [[IDXPROM_IRBUILER:%.*]] = sext i32 [[TMP1_IRBUILER]] to i64
+// CHECK4-IRBUILDER-NEXT: [[ARRAYIDX_IRBUILER:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM_IRBUILER]]
+// CHECK4-IRBUILDER-NEXT: store float 1.000000e+00, float* [[ARRAYIDX_IRBUILER]], align 4
+// CHECK4-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK4: omp.body.continue:
// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
@@ -2413,9 +2607,11 @@
// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4
// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_20]], align 4
+// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]])
// CHECK4-NEXT: call void @__kmpc_dispatch_init_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 66, i32 0, i32 [[TMP25]], i32 1, i32 1)
// CHECK4-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK4: omp.dispatch.cond:
+// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]])
// CHECK4-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]])
// CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0
// CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
@@ -2423,13 +2619,15 @@
// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
// CHECK4-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV16]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND29:%.*]]
-// CHECK4: omp.inner.for.cond29:
+// CHECK4-IRBUILDER: omp.inner.for.cond33:
+// CHECK4-NORMAL: omp.inner.for.cond29:
// CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
// CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7
// CHECK4-NEXT: [[ADD30:%.*]] = add i32 [[TMP29]], 1
// CHECK4-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]]
// CHECK4-NEXT: br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]]
-// CHECK4: omp.inner.for.body32:
+// CHECK4-IRBUILDER: omp.inner.for.body36:
+// CHECK4-NORMAL: omp.inner.for.body32:
// CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7
// CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
// CHECK4-NEXT: [[MUL33:%.*]] = mul i32 [[TMP31]], 1
@@ -2439,17 +2637,29 @@
// CHECK4-NEXT: [[IDXPROM35:%.*]] = sext i32 [[TMP32]] to i64
// CHECK4-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM35]]
// CHECK4-NEXT: store float 0.000000e+00, float* [[ARRAYIDX36]], align 4, !llvm.access.group !7
-// CHECK4-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7
+// CHECK4-NORMAL-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7
+// TODO: To be fixed after IRBuilder of ordered directive with simd clause is implemented with the option -fopenmp-enable-irbuilder enabled.
+// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
+// CHECK4-IRBUILDER-NEXT: call void @__kmpc_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
+// CHECK4-IRBUILDER-NEXT: [[TMP1_IRBUILER:%.*]] = load i32, i32* [[I28]], align 4
+// CHECK4-IRBUILDER-NEXT: [[IDXPROM_IRBUILER:%.*]] = sext i32 [[TMP1_IRBUILER]] to i64
+// CHECK4-IRBUILDER-NEXT: [[ARRAYIDX_IRBUILER:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM_IRBUILER]]
+// CHECK4-IRBUILDER-NEXT: store float 1.000000e+00, float* [[ARRAYIDX_IRBUILER]], align 4
+// CHECK4-IRBUILDER-NEXT: call void @__kmpc_end_ordered(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
// CHECK4-NEXT: br label [[OMP_BODY_CONTINUE37:%.*]]
-// CHECK4: omp.body.continue37:
+// CHECK4-IRBUILDER: omp.body.continue44:
+// CHECK4-NORMAL: omp.body.continue37:
// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC38:%.*]]
-// CHECK4: omp.inner.for.inc38:
+// CHECK4-IRBUILDER: omp.inner.for.inc45:
+// CHECK4-NORMAL: omp.inner.for.inc38:
// CHECK4-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
// CHECK4-NEXT: [[ADD39:%.*]] = add i32 [[TMP33]], 1
// CHECK4-NEXT: store i32 [[ADD39]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7
+// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12]])
// CHECK4-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group !7
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]]
-// CHECK4: omp.inner.for.end40:
+// CHECK4-IRBUILDER: omp.inner.for.end48:
+// CHECK4-NORMAL: omp.inner.for.end40:
// CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK4: omp.dispatch.inc:
// CHECK4-NEXT: br label [[OMP_DISPATCH_COND]]
@@ -2472,34 +2682,36 @@
// CHECK4: .omp.final.done:
// CHECK4-NEXT: br label [[OMP_PRECOND_END]]
// CHECK4: omp.precond.end:
-// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+// CHECK4-IRBUILDER-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+// CHECK4-IRBUILDER-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]])
+// CHECK4-NORMAL-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
// CHECK4-NEXT: ret void
//
//
-// CHECK4-LABEL: define {{[^@]+}}@__captured_stmt
-// CHECK4-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3:[0-9]+]] {
-// CHECK4-NEXT: entry:
-// CHECK4-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8
-// CHECK4-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8
-// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8
-// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-// CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
-// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
-// CHECK4-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4
-// CHECK4-NEXT: ret void
-//
-//
-// CHECK4-LABEL: define {{[^@]+}}@__captured_stmt.1
-// CHECK4-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] {
-// CHECK4-NEXT: entry:
-// CHECK4-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8
-// CHECK4-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8
-// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8
-// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-// CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
-// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
-// CHECK4-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4
-// CHECK4-NEXT: ret void
+// CHECK4-NORMAL-LABEL: define {{[^@]+}}@__captured_stmt
+// CHECK4-NORMAL-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK4-NORMAL-NEXT: entry:
+// CHECK4-NORMAL-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8
+// CHECK4-NORMAL-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8
+// CHECK4-NORMAL-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8
+// CHECK4-NORMAL-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+// CHECK4-NORMAL-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
+// CHECK4-NORMAL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
+// CHECK4-NORMAL-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4
+// CHECK4-NORMAL-NEXT: ret void
+//
+//
+// CHECK4-NORMAL-LABEL: define {{[^@]+}}@__captured_stmt.1
+// CHECK4-NORMAL-SAME: (i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] {
+// CHECK4-NORMAL-NEXT: entry:
+// CHECK4-NORMAL-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8
+// CHECK4-NORMAL-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8
+// CHECK4-NORMAL-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8
+// CHECK4-NORMAL-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+// CHECK4-NORMAL-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP1]] to i64
+// CHECK4-NORMAL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]]
+// CHECK4-NORMAL-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4
+// CHECK4-NORMAL-NEXT: ret void
//
//
// CHECK5-LABEL: define {{[^@]+}}@_Z18static_not_chunkedPfS_S_S_
Index: clang/lib/Sema/SemaOpenMP.cpp
===================================================================
--- clang/lib/Sema/SemaOpenMP.cpp
+++ clang/lib/Sema/SemaOpenMP.cpp
@@ -5320,8 +5320,9 @@
if (Rel == BO_LE || Rel == BO_GE) {
// Add one to the range if the relational operator is inclusive.
- Range =
- AssertSuccess(Actions.BuildUnaryOp(nullptr, {}, UO_PreInc, Range));
+ Range = AssertSuccess(Actions.BuildBinOp(
+ nullptr, {}, BO_Add, Range,
+ Actions.ActOnIntegerConstant(SourceLocation(), 1).get()));
}
// Divide by the absolute step amount.
Index: clang/lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -5314,6 +5314,33 @@
}
void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
+ if (CGM.getLangOpts().OpenMPIRBuilder) {
+ llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
+ using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+
+ // TODO: The ordered directive with depend or simd clause.
+ const CapturedStmt *CS = S.getInnermostCapturedStmt();
+ const Stmt *OrderedRegionBodyStmt = CS->getCapturedStmt();
+
+ auto FiniCB = [this](InsertPointTy IP) {
+ OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
+ };
+
+ auto BodyGenCB = [OrderedRegionBodyStmt, this](InsertPointTy AllocaIP,
+ InsertPointTy CodeGenIP,
+ llvm::BasicBlock &FiniBB) {
+ OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
+ OMPBuilderCBHelpers::EmitOMPRegionBody(*this, OrderedRegionBodyStmt,
+ CodeGenIP, FiniBB);
+ };
+
+ LexicalScope Scope(*this, S.getSourceRange());
+ EmitStopPoint(&S);
+ Builder.restoreIP(OMPBuilder.createOrdered(Builder, BodyGenCB, FiniCB));
+
+ return;
+ }
+
if (S.hasClausesOfKind<OMPDependClause>()) {
assert(!S.hasAssociatedStmt() &&
"No associated statement must be in ordered depend construct.");
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits