https://github.com/Parigi updated https://github.com/llvm/llvm-project/pull/181841
>From b937e1e308df7de84265fd57282cd2d33c40ecc2 Mon Sep 17 00:00:00 2001 From: Luca Parigi <[email protected]> Date: Tue, 17 Feb 2026 16:41:08 +0100 Subject: [PATCH 1/2] [CIR][OpenMP] Emit #pragma omp for as omp.wsloop + omp.loop_nest OMPForDirective emit omp.wsloop with omp.loop_nest using CIR ops for bounds/step, converting to standard MLIR integers via UnrealizedConversionCastOp. Add reconcile-unrealized-casts pass to the CIR-to-LLVM pipeline. Add CIR-level and LLVM IR lowering tests. --- clang/lib/CIR/CodeGen/CIRGenFunction.h | 13 + clang/lib/CIR/CodeGen/CIRGenStmt.cpp | 170 ++++++--- clang/lib/CIR/CodeGen/CIRGenStmtOpenMP.cpp | 202 ++++++++++- .../CIR/Lowering/DirectToLLVM/CMakeLists.txt | 1 + .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 6 + clang/test/CIR/CodeGenOpenMP/pragma-omp-for.c | 326 ++++++++++++++++++ clang/test/CIR/Lowering/pragma-omp-for.c | 188 ++++++++++ 7 files changed, 854 insertions(+), 52 deletions(-) create mode 100644 clang/test/CIR/CodeGenOpenMP/pragma-omp-for.c create mode 100644 clang/test/CIR/Lowering/pragma-omp-for.c diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index cc0087ba2d6bd..32e3044dc08a5 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -63,6 +63,19 @@ class CIRGenFunction : public CIRGenTypeCache { /// is where the next operations will be introduced. CIRGenBuilderTy &builder; + /// State used to communicate OpenMP loop bounds from `emitOMPForDirective` + /// to `emitForStmt`. + struct LoopBounds { + mlir::Value lowerBound; + mlir::Value upperBound; + mlir::Value step; + mlir::Type inductionVarType; + const VarDecl *inductionVar; + bool inclusive; + }; + + std::optional<LoopBounds> currentOMPLoopBounds; + /// A jump destination is an abstract label, branching to which may /// require a jump out through normal cleanups. struct JumpDest { diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp index db3827340c455..f6b3b976ac499 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp @@ -22,6 +22,10 @@ #include "clang/AST/StmtOpenMP.h" #include "clang/CIR/MissingFeatures.h" +// Required to construct OpenMP operations such as `omp.wsloop` and +// `omp.loop_nest` during lowering. +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" + using namespace clang; using namespace clang::CIRGen; using namespace cir; @@ -939,16 +943,36 @@ CIRGenFunction::emitCXXForRangeStmt(const CXXForRangeStmt &s, return mlir::success(); } +/// Emit a `for` statement as either a CIR `cir.for` or, when inside an +/// OpenMP `#pragma omp for`, an `omp.loop_nest` within the wsloop created +/// by emitOMPForDirective. + mlir::LogicalResult CIRGenFunction::emitForStmt(const ForStmt &s) { + + // CIR for-loop operation (used in the non-OpenMP case). cir::ForOp forOp; + // OpenMP loop nest operation (used when inside `omp.wsloop`). + mlir::omp::LoopNestOp loopNestOp; + + auto scopeLoc = getLoc(s.getSourceRange()); + bool isOpenMPFor = currentOMPLoopBounds.has_value(); + + // This lambda emits either an OpenMP `omp.loop_nest` or a regular CIR + // `cir.for`, depending on whether we are inside an OpenMP for directive. // TODO: pass in an array of attributes. auto forStmtBuilder = [&]() -> mlir::LogicalResult { mlir::LogicalResult loopRes = mlir::success(); - // Evaluate the first part before the loop. - if (s.getInit()) - if (emitStmt(s.getInit(), /*useCurrentScope=*/true).failed()) - return mlir::failure(); + + // For OpenMP loops, init is emitted by emitOMPForDirective before the + // wsloop so that the alloca lives outside the loop region. + if (!isOpenMPFor) { + // Evaluate the first part before the loop. + if (s.getInit()) + if (emitStmt(s.getInit(), /*useCurrentScope=*/true).failed()) + return mlir::failure(); + } + assert(!cir::MissingFeatures::loopInfoStack()); // In the classic codegen, if there are any cleanups between here and the // loop-exit scope, a block is created to stage the loop exit. We probably @@ -956,58 +980,110 @@ mlir::LogicalResult CIRGenFunction::emitForStmt(const ForStmt &s) { // to be sure we handle all cases. assert(!cir::MissingFeatures::requiresCleanups()); - forOp = builder.createFor( - getLoc(s.getSourceRange()), - /*condBuilder=*/ - [&](mlir::OpBuilder &b, mlir::Location loc) { - assert(!cir::MissingFeatures::createProfileWeightsForLoop()); - assert(!cir::MissingFeatures::emitCondLikelihoodViaExpectIntrinsic()); - mlir::Value condVal; - if (s.getCond()) { - // If the for statement has a condition scope, - // emit the local variable declaration. - if (s.getConditionVariable()) - emitDecl(*s.getConditionVariable()); - // C99 6.8.5p2/p4: The first substatement is executed if the - // expression compares unequal to 0. The condition must be a - // scalar type. - condVal = evaluateExprAsBool(s.getCond()); - } else { - condVal = cir::ConstantOp::create(b, loc, builder.getTrueAttr()); - } - builder.createCondition(condVal); - }, - /*bodyBuilder=*/ - [&](mlir::OpBuilder &b, mlir::Location loc) { - // The scope of the for loop body is nested within the scope of the - // for loop's init-statement and condition. - if (emitStmt(s.getBody(), /*useCurrentScope=*/false).failed()) - loopRes = mlir::failure(); - emitStopPoint(&s); - }, - /*stepBuilder=*/ - [&](mlir::OpBuilder &b, mlir::Location loc) { - if (s.getInc()) - if (emitStmt(s.getInc(), /*useCurrentScope=*/true).failed()) + // OpenMP path: emit omp.loop_nest using bounds from emitOMPForDirective. + if (isOpenMPFor) { + mlir::OpBuilder::InsertionGuard guard(builder); + + mlir::Type loopBoundsType = currentOMPLoopBounds->inductionVarType; + mlir::Value lb = currentOMPLoopBounds->lowerBound; + mlir::Value ub = currentOMPLoopBounds->upperBound; + mlir::Value step = currentOMPLoopBounds->step; + bool inclusive = currentOMPLoopBounds->inclusive; + const VarDecl *inductionVar = currentOMPLoopBounds->inductionVar; + + loopNestOp = loopNestOp.create(builder, scopeLoc, 1, lb, ub, step, + inclusive, nullptr); + + mlir::Region ®ion = loopNestOp.getRegion(); + mlir::Block *block = new mlir::Block(); + region.push_back(block); + + block->addArgument(loopBoundsType, scopeLoc); + builder.setInsertionPointToStart(block); + + // Store the IV block argument into the loop variable alloca, converting + // back from standard integer to CIR integer type. + mlir::Value iv = block->getArgument(0); + Address inductionAddr = getAddrOfLocalVar(inductionVar); + mlir::Value civVal = + mlir::UnrealizedConversionCastOp::create( + builder, scopeLoc, inductionAddr.getElementType(), iv) + .getResult(0); + cir::StoreOp::create(builder, scopeLoc, civVal, + inductionAddr.getPointer(), + /*is_volatile=*/nullptr, /*alignment=*/nullptr, + /*sync_scope=*/nullptr, /*mem_order=*/nullptr); + + // Emit the loop body. + if (s.getBody()) { + if (emitStmt(s.getBody(), /*useCurrentScope=*/true).failed()) + loopRes = mlir::failure(); + } + + mlir::omp::YieldOp::create(builder, getLoc(s.getEndLoc())); + } else { + forOp = builder.createFor( + getLoc(s.getSourceRange()), + /*condBuilder=*/ + [&](mlir::OpBuilder &b, mlir::Location loc) { + assert(!cir::MissingFeatures::createProfileWeightsForLoop()); + assert( + !cir::MissingFeatures::emitCondLikelihoodViaExpectIntrinsic()); + mlir::Value condVal; + if (s.getCond()) { + // If the for statement has a condition scope, + // emit the local variable declaration. + if (s.getConditionVariable()) + emitDecl(*s.getConditionVariable()); + // C99 6.8.5p2/p4: The first substatement is executed if the + // expression compares unequal to 0. The condition must be a + // scalar type. + condVal = evaluateExprAsBool(s.getCond()); + } else { + condVal = cir::ConstantOp::create(b, loc, builder.getTrueAttr()); + } + builder.createCondition(condVal); + }, + /*bodyBuilder=*/ + [&](mlir::OpBuilder &b, mlir::Location loc) { + // The scope of the for loop body is nested within the scope of the + // for loop's init-statement and condition. + if (emitStmt(s.getBody(), /*useCurrentScope=*/false).failed()) loopRes = mlir::failure(); - builder.createYield(loc); - }); + emitStopPoint(&s); + }, + /*stepBuilder=*/ + [&](mlir::OpBuilder &b, mlir::Location loc) { + if (s.getInc()) + if (emitStmt(s.getInc(), /*useCurrentScope=*/true).failed()) + loopRes = mlir::failure(); + builder.createYield(loc); + }); + } return loopRes; }; auto res = mlir::success(); - auto scopeLoc = getLoc(s.getSourceRange()); - cir::ScopeOp::create(builder, scopeLoc, /*scopeBuilder=*/ - [&](mlir::OpBuilder &b, mlir::Location loc) { - LexicalScope lexScope{*this, loc, - builder.getInsertionBlock()}; - res = forStmtBuilder(); - }); + + if (isOpenMPFor) { + res = forStmtBuilder(); + } else { + cir::ScopeOp::create(builder, scopeLoc, /*scopeBuilder=*/ + [&](mlir::OpBuilder &b, mlir::Location loc) { + LexicalScope lexScope{*this, loc, + builder.getInsertionBlock()}; + res = forStmtBuilder(); + }); + } if (res.failed()) return res; - terminateBody(builder, forOp.getBody(), getLoc(s.getEndLoc())); + // Only regular CIR loops require explicit termination. + // OpenMP wsloop/loop_nest regions terminate via omp.yield. + if (!isOpenMPFor) { + terminateBody(builder, forOp.getBody(), getLoc(s.getEndLoc())); + } return mlir::success(); } diff --git a/clang/lib/CIR/CodeGen/CIRGenStmtOpenMP.cpp b/clang/lib/CIR/CodeGen/CIRGenStmtOpenMP.cpp index 0d3b44db98307..a3eab79fbba64 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmtOpenMP.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmtOpenMP.cpp @@ -13,8 +13,11 @@ #include "CIRGenBuilder.h" #include "CIRGenFunction.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" +#include "mlir/IR/BuiltinOps.h" #include "clang/AST/StmtOpenMP.h" +#include "clang/CIR/Dialect/IR/CIRDialect.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" + using namespace clang; using namespace clang::CIRGen; @@ -65,6 +68,200 @@ CIRGenFunction::emitOMPParallelDirective(const OMPParallelDirective &s) { return res; } +// Helpers and implementation for emitOMPForDirective, which lowers an +// OMPForDirective into an omp.wsloop + omp.loop_nest. + +namespace { +/// Extract integer literal value from an expression, if present. +static std::optional<int64_t> getIntLiteralValue(const Expr *expr) { + if (const auto *intLit = dyn_cast<IntegerLiteral>(expr->IgnoreImpCasts())) + return intLit->getValue().getSExtValue(); + return std::nullopt; +} + +/// Ensure a CIR value has the given CIR integer type, inserting an integral +/// cast if necessary. Loads through CIR pointers first. +static mlir::Value ensureCIRIntType(CIRGenBuilderTy &builder, + mlir::Location loc, mlir::Value cirValue, + cir::IntType targetCIRType) { + if (mlir::isa<cir::PointerType>(cirValue.getType())) + cirValue = cir::LoadOp::create(builder, loc, cirValue).getResult(); + + if (cirValue.getType() == targetCIRType) + return cirValue; + + return builder.createCast(loc, cir::CastKind::integral, cirValue, + targetCIRType); +} + +/// Convert a CIR integer value to a standard MLIR integer type suitable for +/// use as an omp.loop_nest operand. +static mlir::Value cirIntToStdInt(mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value cirValue) { + auto cirIntType = mlir::cast<cir::IntType>(cirValue.getType()); + mlir::Type stdIntType = builder.getIntegerType(cirIntType.getWidth()); + return mlir::UnrealizedConversionCastOp::create(builder, loc, stdIntType, + cirValue) + .getResult(0); +} +} // anonymous namespace + +mlir::LogicalResult +CIRGenFunction::emitOMPForDirective(const OMPForDirective &s) { + + mlir::LogicalResult res = mlir::success(); + mlir::Location begin = getLoc(s.getBeginLoc()); + + // Extract the underlying canonical `for` loop from the CapturedStmt + const CapturedStmt *capturedStmt = s.getInnermostCapturedStmt(); + const ForStmt *forStmt = dyn_cast<ForStmt>(capturedStmt->getCapturedStmt()); + + if (!forStmt) { + return mlir::failure(); + } + + // Loop bounds are first built as CIR integer values, then converted to + // standard MLIR integers via UnrealizedConversionCastOp before being + // passed to omp.loop_nest (which requires IntLikeType operands). + mlir::Value lowerBound; + mlir::Value upperBound; + mlir::Value step; + bool inclusive = false; + + // Extract loop variable type and lower bound. + const auto *declStmt = dyn_cast_or_null<DeclStmt>(forStmt->getInit()); + const auto *varDecl = + declStmt ? dyn_cast<VarDecl>(declStmt->getSingleDecl()) : nullptr; + + if (!varDecl) + return mlir::failure(); + + // The loop variable's CIR integer type is the canonical type for all bounds. + QualType loopVarQType = varDecl->getType(); + auto cirType = convertType(loopVarQType); + auto cirIntType = mlir::cast<cir::IntType>(cirType); + + // Extract lower bound. + if (!varDecl->hasInit()) + return mlir::failure(); + + if (auto constVal = getIntLiteralValue(varDecl->getInit())) { + lowerBound = builder.getConstInt(begin, cirIntType, *constVal); + } else { + mlir::Value cirValue = emitScalarExpr(varDecl->getInit()); + lowerBound = ensureCIRIntType(builder, begin, cirValue, cirIntType); + } + + // Extract upper bound and comparison operator. + const auto *condBinOp = dyn_cast_or_null<BinaryOperator>(forStmt->getCond()); + if (!condBinOp) + return mlir::failure(); + + BinaryOperatorKind opKind = condBinOp->getOpcode(); + + // Determine which side of the comparison holds the upper bound. + // Canonical forms: `i < ub`, `i <= ub` (var on LHS, bound on RHS) + // `ub > i`, `ub >= i` (bound on LHS, var on RHS) + const Expr *boundExpr = nullptr; + if (opKind == BO_LT || opKind == BO_LE) { + boundExpr = condBinOp->getRHS(); + inclusive = (opKind == BO_LE); + } else if (opKind == BO_GT || opKind == BO_GE) { + boundExpr = condBinOp->getLHS(); + inclusive = (opKind == BO_GE); + } else { + return mlir::failure(); + } + + if (auto constVal = getIntLiteralValue(boundExpr)) { + upperBound = builder.getConstInt(begin, cirIntType, *constVal); + } else { + mlir::Value cirValue = emitScalarExpr(boundExpr); + upperBound = ensureCIRIntType(builder, begin, cirValue, cirIntType); + } + + // Extract step. + if (const auto *unaryOp = + dyn_cast_or_null<UnaryOperator>(forStmt->getInc())) { + int64_t val = unaryOp->isIncrementOp() ? 1 : -1; + step = builder.getConstInt(begin, cirIntType, val); + } else if (const auto *binOp = + dyn_cast_or_null<BinaryOperator>(forStmt->getInc())) { + const Expr *stepExpr = nullptr; + + if (binOp->isCompoundAssignmentOp()) { + stepExpr = binOp->getRHS(); + } else if (binOp->isAssignmentOp()) { + // i = i + step or i = step + i + if (auto *subBinOp = + dyn_cast<BinaryOperator>(binOp->getRHS()->IgnoreImpCasts())) { + const Expr *lhs = subBinOp->getLHS()->IgnoreImpCasts(); + const Expr *rhs = subBinOp->getRHS()->IgnoreImpCasts(); + // Identify which operand is the loop variable and which is the step. + if (auto *lhsRef = dyn_cast<DeclRefExpr>(lhs)) { + stepExpr = (lhsRef->getDecl() == varDecl) ? rhs : lhs; + } else if (auto *rhsRef = dyn_cast<DeclRefExpr>(rhs)) { + stepExpr = (rhsRef->getDecl() == varDecl) ? lhs : rhs; + } + } + } + + if (stepExpr) { + if (auto constVal = getIntLiteralValue(stepExpr)) { + step = builder.getConstInt(begin, cirIntType, *constVal); + } else { + mlir::Value cirValue = emitScalarExpr(stepExpr); + step = ensureCIRIntType(builder, begin, cirValue, cirIntType); + } + } + } + + // Default to unit step if not recognized. + if (!step) + step = builder.getConstInt(begin, cirIntType, 1); + + // Emit init, convert bounds to std integers, and create the wsloop. + + // Emit the loop init statement (e.g. `int i = 0`) to create the alloca + // for the induction variable *before* the wsloop. + if (forStmt->getInit()) + if (emitStmt(forStmt->getInit(), /*useCurrentScope=*/true).failed()) + return mlir::failure(); + + // Convert CIR integer bounds to standard MLIR integers at the boundary. + // omp.loop_nest requires IntLikeType (AnyInteger | Index), not CIR types. + mlir::Value stdLB = cirIntToStdInt(builder, begin, lowerBound); + mlir::Value stdUB = cirIntToStdInt(builder, begin, upperBound); + mlir::Value stdStep = cirIntToStdInt(builder, begin, step); + mlir::Type loopBoundsType = stdLB.getType(); + + currentOMPLoopBounds = + LoopBounds{stdLB, stdUB, stdStep, loopBoundsType, varDecl, inclusive}; + + // Create wsloop with empty region + llvm::SmallVector<mlir::Type> retTy; + llvm::SmallVector<mlir::Value> operands; + auto wsloopOp = mlir::omp::WsloopOp::create(builder, begin, retTy, operands); + + mlir::Region ®ion = wsloopOp.getRegion(); + mlir::Block *block = new mlir::Block(); + region.push_back(block); + + // Emit the ForStmt body (will create loop_nest when it detects OpenMP + // context) + mlir::OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPointToStart(block); + + if (emitStmt(forStmt, /*useCurrentScope=*/false).failed()) { + res = mlir::failure(); + } + + // Clear loop-bound state + currentOMPLoopBounds = std::nullopt; + + return res; +} + mlir::LogicalResult CIRGenFunction::emitOMPTaskwaitDirective(const OMPTaskwaitDirective &s) { getCIRGenModule().errorNYI(s.getSourceRange(), "OpenMP OMPTaskwaitDirective"); @@ -113,11 +310,6 @@ CIRGenFunction::emitOMPFuseDirective(const OMPFuseDirective &s) { return mlir::failure(); } mlir::LogicalResult -CIRGenFunction::emitOMPForDirective(const OMPForDirective &s) { - getCIRGenModule().errorNYI(s.getSourceRange(), "OpenMP OMPForDirective"); - return mlir::failure(); -} -mlir::LogicalResult CIRGenFunction::emitOMPForSimdDirective(const OMPForSimdDirective &s) { getCIRGenModule().errorNYI(s.getSourceRange(), "OpenMP OMPForSimdDirective"); return mlir::failure(); diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/CMakeLists.txt b/clang/lib/CIR/Lowering/DirectToLLVM/CMakeLists.txt index c7467fe40ba30..49864dcdb62d5 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/CMakeLists.txt +++ b/clang/lib/CIR/Lowering/DirectToLLVM/CMakeLists.txt @@ -22,6 +22,7 @@ add_clang_library(clangCIRLoweringDirectToLLVM MLIRBuiltinToLLVMIRTranslation MLIRLLVMToLLVMIRTranslation MLIROpenMPToLLVMIRTranslation + MLIRReconcileUnrealizedCasts MLIRIR ) diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 28b3454d20613..0feeaf748fd75 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -17,6 +17,7 @@ #include <optional> #include "mlir/Conversion/LLVMCommon/TypeConverter.h" +#include "mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h" #include "mlir/Dialect/DLTI/DLTI.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" @@ -3562,6 +3563,10 @@ void ConvertCIRToLLVMPass::runOnOperation() { target.addIllegalDialect<mlir::BuiltinDialect, cir::CIRDialect, mlir::func::FuncDialect>(); + // Allow unrealized conversion casts to survive CIR-to-LLVM conversion. + // They are resolved by the reconcile-unrealized-casts pass that runs after. + target.addLegalOp<mlir::UnrealizedConversionCastOp>(); + llvm::SmallVector<mlir::Operation *> ops; ops.push_back(module); collectUnreachable(module, ops); @@ -4800,6 +4805,7 @@ std::unique_ptr<mlir::Pass> createConvertCIRToLLVMPass() { void populateCIRToLLVMPasses(mlir::OpPassManager &pm) { mlir::populateCIRPreLoweringPasses(pm); pm.addPass(createConvertCIRToLLVMPass()); + pm.addPass(mlir::createReconcileUnrealizedCastsPass()); } std::unique_ptr<llvm::Module> diff --git a/clang/test/CIR/CodeGenOpenMP/pragma-omp-for.c b/clang/test/CIR/CodeGenOpenMP/pragma-omp-for.c new file mode 100644 index 0000000000000..49a046f358e10 --- /dev/null +++ b/clang/test/CIR/CodeGenOpenMP/pragma-omp-for.c @@ -0,0 +1,326 @@ +// RUN: %clang_cc1 -fopenmp -emit-cir -fclangir %s -o - | FileCheck %s + +void before(int); +void during(int); +void after(int); + +void emit_simple_for() { + // CHECK: cir.func{{.*}}@{{.*}}emit_simple_for + int j = 5; + before(j); + // CHECK: cir.call @{{.*}}before +#pragma omp parallel + { +#pragma omp for + for (int i = 0; i < 10; i++) { + during(j); + } + } + // CHECK: omp.parallel { + + // CIR constants for bounds, then cast to std integer + // CHECK: %[[C0_CIR:.*]] = cir.const #cir.int<0> : !s32i + // CHECK: %[[C10_CIR:.*]] = cir.const #cir.int<10> : !s32i + // CHECK: %[[C1_CIR:.*]] = cir.const #cir.int<1> : !s32i + + // induction variable alloca (emitted before wsloop) + // CHECK: %[[I_ALLOCA:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] + + // conversion to std integer for omp.loop_nest + // CHECK: %[[C0:.*]] = builtin.unrealized_conversion_cast %[[C0_CIR]] : !s32i to i32 + // CHECK: %[[C10:.*]] = builtin.unrealized_conversion_cast %[[C10_CIR]] : !s32i to i32 + // CHECK: %[[C1:.*]] = builtin.unrealized_conversion_cast %[[C1_CIR]] : !s32i to i32 + + // omp loop + // CHECK: omp.wsloop { + // CHECK-NEXT: omp.loop_nest (%[[IV:.*]]) : i32 = (%[[C0]]) to (%[[C10]]) step (%[[C1]]) { + + // store induction variable block arg into alloca + // CHECK: %[[IV_CIR:.*]] = builtin.unrealized_conversion_cast %[[IV]] : i32 to !s32i + // CHECK: cir.store %[[IV_CIR]], %[[I_ALLOCA]] : !s32i, !cir.ptr<!s32i> + + // during(j) + // CHECK: cir.load {{.*}} %{{.*}} : !cir.ptr<!s32i>, !s32i + // CHECK: cir.call @{{.*}}during + + // CHECK: omp.yield + // CHECK: } + // CHECK: } + + // CHECK: omp.terminator + // CHECK: } + after(j); + // CHECK: cir.call @{{.*}}after +} + +void emit_for_with_vars() { + // CHECK: cir.func{{.*}}@{{.*}}emit_for_with_vars + int j = 5; + before(j); + // CHECK: cir.call @{{.*}}before +#pragma omp parallel + { + int lb = 1; + long ub = 10; + short step = 1; +#pragma omp for + for (int i = 0; i < ub; i=i+step) { + during(j); + } + } + + // CHECK: omp.parallel { + + // allocas + // CHECK: %[[LB:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["lb", init] + // CHECK: %[[UB:.*]] = cir.alloca !s64i, !cir.ptr<!s64i>, ["ub", init] + // CHECK: %[[STEP:.*]] = cir.alloca !s16i, !cir.ptr<!s16i>, ["step", init] + + // stores + // CHECK: cir.store {{.*}}, %[[LB]] : !s32i, !cir.ptr<!s32i> + // CHECK: cir.store {{.*}}, %[[UB]] : !s64i, !cir.ptr<!s64i> + // CHECK: cir.store {{.*}}, %[[STEP]] : !s16i, !cir.ptr<!s16i> + + // lower bound (CIR constant + cast to i32) + // CHECK: %[[LB0_CIR:.*]] = cir.const #cir.int<0> : !s32i + + // upper bound: load, integral cast to i32, then unrealized cast + // CHECK: %[[UBLOAD:.*]] = cir.load {{.*}} %[[UB]] : !cir.ptr<!s64i>, !s64i + // CHECK: %[[UBCAST:.*]] = cir.cast integral %[[UBLOAD]] : !s64i -> !s32i + + // step: load, integral cast to i32, then unrealized cast + // CHECK: %[[STEPLOAD:.*]] = cir.load {{.*}} %[[STEP]] : !cir.ptr<!s16i>, !s16i + // CHECK: %[[STEPCONV:.*]] = cir.cast integral %[[STEPLOAD]] : !s16i -> !s32i + + // induction variable alloca (emitted before wsloop) + // CHECK: %[[I2_ALLOCA:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] + + // conversion to std integer for omp.loop_nest + // CHECK: %[[LB0:.*]] = builtin.unrealized_conversion_cast %[[LB0_CIR]] : !s32i to i32 + // CHECK: %[[UBSTD:.*]] = builtin.unrealized_conversion_cast %[[UBCAST]] : !s32i to i32 + // CHECK: %[[STEPSTD:.*]] = builtin.unrealized_conversion_cast %[[STEPCONV]] : !s32i to i32 + + // omp loop + // CHECK: omp.wsloop { + // CHECK-NEXT: omp.loop_nest (%[[IV2:.*]]) : i32 = (%[[LB0]]) to (%[[UBSTD]]) step (%[[STEPSTD]]) { + + // store induction variable block arg into alloca + // CHECK: %[[IV2_CIR:.*]] = builtin.unrealized_conversion_cast %[[IV2]] : i32 to !s32i + // CHECK: cir.store %[[IV2_CIR]], %[[I2_ALLOCA]] : !s32i, !cir.ptr<!s32i> + + // during(j) + // CHECK: cir.load {{.*}} %{{.*}} : !cir.ptr<!s32i>, !s32i + // CHECK: cir.call @{{.*}}during + + // CHECK: omp.yield + // CHECK: } + // CHECK: } + + // CHECK: omp.terminator + // CHECK: } + + after(j); + // CHECK: cir.call @{{.*}}after +} + +void emit_for_with_induction_var() { + // CHECK: cir.func{{.*}}@{{.*}}emit_for_with_induction_var +#pragma omp parallel + { +#pragma omp for + for (int i = 0; i < 10; i++) { + during(i); + } + } + // CHECK: omp.parallel { + + // CIR constants + // CHECK: %[[IC0_CIR:.*]] = cir.const #cir.int<0> : !s32i + // CHECK: %[[IC10_CIR:.*]] = cir.const #cir.int<10> : !s32i + // CHECK: %[[IC1_CIR:.*]] = cir.const #cir.int<1> : !s32i + + // induction variable alloca + // CHECK: %[[IV_ALLOCA:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] + + // conversion to std integer + // CHECK: %[[IC0:.*]] = builtin.unrealized_conversion_cast %[[IC0_CIR]] : !s32i to i32 + // CHECK: %[[IC10:.*]] = builtin.unrealized_conversion_cast %[[IC10_CIR]] : !s32i to i32 + // CHECK: %[[IC1:.*]] = builtin.unrealized_conversion_cast %[[IC1_CIR]] : !s32i to i32 + + // omp loop + // CHECK: omp.wsloop { + // CHECK-NEXT: omp.loop_nest (%[[IV3:.*]]) : i32 = (%[[IC0]]) to (%[[IC10]]) step (%[[IC1]]) { + + // store induction variable into alloca + // CHECK: %[[IV3_CIR:.*]] = builtin.unrealized_conversion_cast %[[IV3]] : i32 to !s32i + // CHECK: cir.store %[[IV3_CIR]], %[[IV_ALLOCA]] : !s32i, !cir.ptr<!s32i> + + // during(i) - loads the induction variable from the alloca + // CHECK: %[[I_VAL:.*]] = cir.load %[[IV_ALLOCA]] : !cir.ptr<!s32i>, !s32i + // CHECK: cir.call @{{.*}}during(%[[I_VAL]]) + + // CHECK: omp.yield + // CHECK: } + // CHECK: } + + // CHECK: omp.terminator + // CHECK: } +} + +// Test inclusive upper bound (i <= 9) +void emit_for_inclusive_bound() { + // CHECK: cir.func{{.*}}@{{.*}}emit_for_inclusive_bound +#pragma omp parallel + { +#pragma omp for + for (int i = 0; i <= 9; i++) { + during(i); + } + } + // CHECK: omp.parallel { + + // CHECK: cir.const #cir.int<0> : !s32i + // CHECK: cir.const #cir.int<9> : !s32i + // CHECK: cir.const #cir.int<1> : !s32i + // CHECK: %[[INC_ALLOCA:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] + // CHECK: %[[INC_C0:.*]] = builtin.unrealized_conversion_cast {{.*}} : !s32i to i32 + // CHECK: %[[INC_C9:.*]] = builtin.unrealized_conversion_cast {{.*}} : !s32i to i32 + // CHECK: %[[INC_C1:.*]] = builtin.unrealized_conversion_cast {{.*}} : !s32i to i32 + + // CHECK: omp.wsloop { + // inclusive = true + // CHECK-NEXT: omp.loop_nest (%[[INC_IV:.*]]) : i32 = (%[[INC_C0]]) to (%[[INC_C9]]) inclusive step (%[[INC_C1]]) { + + // CHECK: builtin.unrealized_conversion_cast %[[INC_IV]] : i32 to !s32i + // CHECK: cir.store + // CHECK: cir.call @{{.*}}during + + // CHECK: omp.yield + // CHECK: } + // CHECK: } + // CHECK: omp.terminator + // CHECK: } +} + +// Test reversed comparison (10 > i) +void emit_for_reversed_cmp() { + // CHECK: cir.func{{.*}}@{{.*}}emit_for_reversed_cmp +#pragma omp parallel + { +#pragma omp for + for (int i = 0; 10 > i; i++) { + during(i); + } + } + // CHECK: omp.parallel { + + // CHECK: cir.const #cir.int<0> : !s32i + // CHECK: cir.const #cir.int<10> : !s32i + // CHECK: cir.const #cir.int<1> : !s32i + // CHECK: cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] + // CHECK: %[[REV_C0:.*]] = builtin.unrealized_conversion_cast {{.*}} : !s32i to i32 + // CHECK: %[[REV_C10:.*]] = builtin.unrealized_conversion_cast {{.*}} : !s32i to i32 + // CHECK: %[[REV_C1:.*]] = builtin.unrealized_conversion_cast {{.*}} : !s32i to i32 + + // CHECK: omp.wsloop { + // CHECK-NEXT: omp.loop_nest (%{{.*}}) : i32 = (%[[REV_C0]]) to (%[[REV_C10]]) step (%[[REV_C1]]) { + // CHECK: omp.yield + // CHECK: } + // CHECK: } + // CHECK: omp.terminator + // CHECK: } +} + +// Test reversed inclusive comparison (9 >= i) +void emit_for_reversed_inclusive_cmp() { + // CHECK: cir.func{{.*}}@{{.*}}emit_for_reversed_inclusive_cmp +#pragma omp parallel + { +#pragma omp for + for (int i = 0; 9 >= i; i++) { + during(i); + } + } + // CHECK: omp.parallel { + + // CHECK: cir.const #cir.int<0> : !s32i + // CHECK: cir.const #cir.int<9> : !s32i + // CHECK: cir.const #cir.int<1> : !s32i + // CHECK: cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] + // CHECK: %[[RI_C0:.*]] = builtin.unrealized_conversion_cast {{.*}} : !s32i to i32 + // CHECK: %[[RI_C9:.*]] = builtin.unrealized_conversion_cast {{.*}} : !s32i to i32 + // CHECK: %[[RI_C1:.*]] = builtin.unrealized_conversion_cast {{.*}} : !s32i to i32 + + // CHECK: omp.wsloop { + // CHECK-NEXT: omp.loop_nest (%{{.*}}) : i32 = (%[[RI_C0]]) to (%[[RI_C9]]) inclusive step (%[[RI_C1]]) { + // CHECK: omp.yield + // CHECK: } + // CHECK: } + // CHECK: omp.terminator + // CHECK: } +} + +// Test compound assignment step (i += 2) +void emit_for_compound_step() { + // CHECK: cir.func{{.*}}@{{.*}}emit_for_compound_step +#pragma omp parallel + { +#pragma omp for + for (int i = 0; i < 20; i += 2) { + during(i); + } + } + // CHECK: omp.parallel { + + // CHECK: cir.const #cir.int<0> : !s32i + // CHECK: cir.const #cir.int<20> : !s32i + // CHECK: cir.const #cir.int<2> : !s32i + // CHECK: cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] + // CHECK: %[[CS_C0:.*]] = builtin.unrealized_conversion_cast {{.*}} : !s32i to i32 + // CHECK: %[[CS_C20:.*]] = builtin.unrealized_conversion_cast {{.*}} : !s32i to i32 + // CHECK: %[[CS_C2:.*]] = builtin.unrealized_conversion_cast {{.*}} : !s32i to i32 + + // CHECK: omp.wsloop { + // CHECK-NEXT: omp.loop_nest (%{{.*}}) : i32 = (%[[CS_C0]]) to (%[[CS_C20]]) step (%[[CS_C2]]) { + // CHECK: omp.yield + // CHECK: } + // CHECK: } + // CHECK: omp.terminator + // CHECK: } +} + +// Test commuted step expression (i = step + i) +void emit_for_commuted_step() { + // CHECK: cir.func{{.*}}@{{.*}}emit_for_commuted_step + short step = 3; +#pragma omp parallel + { +#pragma omp for + for (int i = 0; i < 30; i = step + i) { + during(i); + } + } + // CHECK: omp.parallel { + + // CHECK: cir.const #cir.int<0> : !s32i + // CHECK: cir.const #cir.int<30> : !s32i + + // step is loaded and cast to the loop variable type (i32) in CIR + // CHECK: %[[CM_STEP_LOAD:.*]] = cir.load {{.*}} : !cir.ptr<!s16i>, !s16i + // CHECK: %[[CM_STEP_CIR:.*]] = cir.cast integral %[[CM_STEP_LOAD]] : !s16i -> !s32i + + // CHECK: cir.alloca !s32i, !cir.ptr<!s32i>, ["i", init] + + // conversion to std integer + // CHECK: %[[CM_C0:.*]] = builtin.unrealized_conversion_cast {{.*}} : !s32i to i32 + // CHECK: %[[CM_C30:.*]] = builtin.unrealized_conversion_cast {{.*}} : !s32i to i32 + // CHECK: %[[CM_STEP:.*]] = builtin.unrealized_conversion_cast %[[CM_STEP_CIR]] : !s32i to i32 + + // CHECK: omp.wsloop { + // CHECK-NEXT: omp.loop_nest (%{{.*}}) : i32 = (%[[CM_C0]]) to (%[[CM_C30]]) step (%[[CM_STEP]]) { + // CHECK: omp.yield + // CHECK: } + // CHECK: } + // CHECK: omp.terminator + // CHECK: } +} diff --git a/clang/test/CIR/Lowering/pragma-omp-for.c b/clang/test/CIR/Lowering/pragma-omp-for.c new file mode 100644 index 0000000000000..76f069a4cd9a9 --- /dev/null +++ b/clang/test/CIR/Lowering/pragma-omp-for.c @@ -0,0 +1,188 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fopenmp -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck %s --input-file %t-cir.ll + +void before(int); +void during(int); +void after(int); + +// Test simple for loop with constant bounds: for (int i = 0; i < 10; i++) +void emit_simple_for() { + int j = 5; + before(j); +#pragma omp parallel + { +#pragma omp for + for (int i = 0; i < 10; i++) { + during(j); + } + } + after(j); +} + +// CHECK-LABEL: define dso_local void @emit_simple_for() +// CHECK: call void @before(i32 %{{.*}}) +// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @{{.*}}, i32 1, ptr @emit_simple_for..omp_par, ptr %{{.*}}) +// CHECK: call void @after(i32 %{{.*}}) + +// CHECK-LABEL: define internal void @emit_simple_for..omp_par( +// CHECK: store i32 0, ptr %p.lowerbound +// CHECK: store i32 9, ptr %p.upperbound +// CHECK: store i32 1, ptr %p.stride +// CHECK: call void @__kmpc_for_static_init_4u( +// CHECK: omp_loop.body: +// CHECK: omp.loop_nest.region: +// CHECK: store i32 %{{.*}}, ptr %{{.*}}, align 4 +// CHECK: call void @during(i32 %{{.*}}) +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: call void @__kmpc_barrier( + +// Test for loop with variable bounds and type conversions +void emit_for_with_vars() { + int j = 5; + before(j); +#pragma omp parallel + { + int lb = 1; + long ub = 10; + short step = 1; +#pragma omp for + for (int i = 0; i < ub; i = i + step) { + during(j); + } + } + after(j); +} + +// CHECK-LABEL: define dso_local void @emit_for_with_vars() +// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @{{.*}}, i32 1, ptr @emit_for_with_vars..omp_par, ptr %{{.*}}) + +// CHECK-LABEL: define internal void @emit_for_with_vars..omp_par( +// variable upper bound: loaded and truncated from i64 to i32 +// CHECK: %{{.*}} = trunc i64 %{{.*}} to i32 +// variable step: loaded and sign-extended from i16 to i32 +// CHECK: %{{.*}} = sext i16 %{{.*}} to i32 +// CHECK: call void @__kmpc_for_static_init_4u( +// CHECK: omp.loop_nest.region: +// CHECK: call void @during(i32 %{{.*}}) +// CHECK: call void @__kmpc_for_static_fini( + +// Test induction variable is accessible in the loop body: during(i) +void emit_for_with_induction_var() { +#pragma omp parallel + { +#pragma omp for + for (int i = 0; i < 10; i++) { + during(i); + } + } +} + +// CHECK-LABEL: define internal void @emit_for_with_induction_var..omp_par( +// CHECK: store i32 0, ptr %p.lowerbound +// CHECK: store i32 9, ptr %p.upperbound +// CHECK: omp.loop_nest.region: +// IV is stored to the alloca and then loaded for during(i) +// CHECK: store i32 %{{.*}}, ptr %[[IV_PTR:.*]], align 4 +// CHECK: %[[IV_LOAD:.*]] = load i32, ptr %[[IV_PTR]], align 4 +// CHECK: call void @during(i32 %[[IV_LOAD]]) + +// Test inclusive upper bound: for (int i = 0; i <= 9; i++) +void emit_for_inclusive_bound() { +#pragma omp parallel + { +#pragma omp for + for (int i = 0; i <= 9; i++) { + during(i); + } + } +} + +// CHECK-LABEL: define internal void @emit_for_inclusive_bound..omp_par( +// inclusive i <= 9 has same trip count as i < 10 +// CHECK: store i32 0, ptr %p.lowerbound +// CHECK: store i32 9, ptr %p.upperbound +// CHECK: call void @__kmpc_for_static_init_4u( +// CHECK: omp.loop_nest.region: +// CHECK: call void @during(i32 %{{.*}}) + +// Test reversed comparison: for (int i = 0; 10 > i; i++) +void emit_for_reversed_cmp() { +#pragma omp parallel + { +#pragma omp for + for (int i = 0; 10 > i; i++) { + during(i); + } + } +} + +// CHECK-LABEL: define internal void @emit_for_reversed_cmp..omp_par( +// reversed cmp (10 > i) produces same bounds as (i < 10) +// CHECK: store i32 0, ptr %p.lowerbound +// CHECK: store i32 9, ptr %p.upperbound +// CHECK: call void @__kmpc_for_static_init_4u( + +// Test reversed inclusive comparison: for (int i = 0; 9 >= i; i++) +void emit_for_reversed_inclusive_cmp() { +#pragma omp parallel + { +#pragma omp for + for (int i = 0; 9 >= i; i++) { + during(i); + } + } +} + +// CHECK-LABEL: define internal void @emit_for_reversed_inclusive_cmp..omp_par( +// reversed inclusive cmp (9 >= i) produces same bounds as (i <= 9) +// CHECK: store i32 0, ptr %p.lowerbound +// CHECK: store i32 9, ptr %p.upperbound +// CHECK: call void @__kmpc_for_static_init_4u( + +// Test compound assignment step: for (int i = 0; i < 20; i += 2) +void emit_for_compound_step() { +#pragma omp parallel + { +#pragma omp for + for (int i = 0; i < 20; i += 2) { + during(i); + } + } +} + +// CHECK-LABEL: define internal void @emit_for_compound_step..omp_par( +// step = 2 visible in the loop body IV computation +// CHECK: call void @__kmpc_for_static_init_4u( +// CHECK: omp_loop.body: +// CHECK: %{{.*}} = mul i32 %{{.*}}, 2 +// CHECK: omp.loop_nest.region: +// CHECK: call void @during(i32 %{{.*}}) + +// Test commuted step expression: for (int i = 0; i < 30; i = step + i) +void emit_for_commuted_step() { + short step = 3; +#pragma omp parallel + { +#pragma omp for + for (int i = 0; i < 30; i = step + i) { + during(i); + } + } +} + +// CHECK-LABEL: define internal void @emit_for_commuted_step..omp_par( +// variable step loaded and sign-extended from i16 +// CHECK: %{{.*}} = sext i16 %{{.*}} to i32 +// CHECK: call void @__kmpc_for_static_init_4u( +// CHECK: omp_loop.body: +// step is variable, multiplied into IV +// CHECK: %{{.*}} = mul i32 %{{.*}}, %{{.*}} +// CHECK: omp.loop_nest.region: +// CHECK: call void @during(i32 %{{.*}}) + +// Verify OpenMP runtime declarations +// CHECK: declare i32 @__kmpc_global_thread_num(ptr) +// CHECK: declare void @__kmpc_for_static_init_4u(ptr, i32, i32, ptr, ptr, ptr, ptr, i32, i32) +// CHECK: declare void @__kmpc_for_static_fini(ptr, i32) +// CHECK: declare void @__kmpc_barrier(ptr, i32) +// CHECK: declare {{.*}}void @__kmpc_fork_call(ptr, i32, ptr, ...) >From 1ccd034420e7482dda388717e30b6b1d9a65f65f Mon Sep 17 00:00:00 2001 From: Luca Parigi <[email protected]> Date: Mon, 2 Mar 2026 13:10:43 +0100 Subject: [PATCH 2/2] [CIR][OpenMP] Updated lowering test on pragma-omp-for.c Completed the check on lowered code which was not fully covered and reduced the test to two representative cases to avoid repetition. --- clang/test/CIR/Lowering/pragma-omp-for.c | 416 ++++++++++++++++------- 1 file changed, 285 insertions(+), 131 deletions(-) diff --git a/clang/test/CIR/Lowering/pragma-omp-for.c b/clang/test/CIR/Lowering/pragma-omp-for.c index 76f069a4cd9a9..03605c4c42933 100644 --- a/clang/test/CIR/Lowering/pragma-omp-for.c +++ b/clang/test/CIR/Lowering/pragma-omp-for.c @@ -1,9 +1,17 @@ // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fopenmp -fclangir -emit-llvm %s -o %t-cir.ll // RUN: FileCheck %s --input-file %t-cir.ll +// CHECK: %struct.ident_t = type { i32, i32, i32, i32, ptr } +// CHECK: @[[LOC:.*]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 +// CHECK: @{{[0-9]+}} = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @[[LOC]] }, align 8 +// CHECK: @{{[0-9]+}} = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 0, i32 22, ptr @[[LOC]] }, align 8 + void before(int); +// CHECK: declare void @before(i32) void during(int); +// CHECK: declare void @during(i32) void after(int); +// CHECK: declare void @after(i32) // Test simple for loop with constant bounds: for (int i = 0; i < 10; i++) void emit_simple_for() { @@ -19,22 +27,133 @@ void emit_simple_for() { after(j); } -// CHECK-LABEL: define dso_local void @emit_simple_for() -// CHECK: call void @before(i32 %{{.*}}) -// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @{{.*}}, i32 1, ptr @emit_simple_for..omp_par, ptr %{{.*}}) -// CHECK: call void @after(i32 %{{.*}}) +// CHECK-LABEL: define{{.*}} void @emit_simple_for() +// CHECK: %structArg = alloca { ptr, ptr }, align 8 +// CHECK: %[[I_ALLOCA:.*]] = alloca i32, i64 1, align 4 +// CHECK: %[[J_ALLOCA:.*]] = alloca i32, i64 1, align 4 +// CHECK: store i32 5, ptr %[[J_ALLOCA]], align 4 +// CHECK: %[[J_VAL:.*]] = load i32, ptr %[[J_ALLOCA]], align 4 +// CHECK: call void @before(i32 %[[J_VAL]]) +// CHECK: br label %entry + +// CHECK: entry: +// CHECK: %[[GTN_OUTER:.*]] = call i32 @__kmpc_global_thread_num(ptr @{{[0-9]+}}) +// CHECK: br label %omp_parallel + +// CHECK: omp_parallel: +// CHECK: %[[GEP_I:.*]] = getelementptr { ptr, ptr }, ptr %structArg, i32 0, i32 0 +// CHECK: store ptr %[[I_ALLOCA]], ptr %[[GEP_I]], align 8 +// CHECK: %[[GEP_J:.*]] = getelementptr { ptr, ptr }, ptr %structArg, i32 0, i32 1 +// CHECK: store ptr %[[J_ALLOCA]], ptr %[[GEP_J]], align 8 +// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @{{[0-9]+}}, i32 1, ptr @emit_simple_for..omp_par, ptr %structArg) +// CHECK: br label %omp.par.exit + +// CHECK: omp.par.exit: +// CHECK: %[[J_AFTER:.*]] = load i32, ptr %[[J_ALLOCA]], align 4 +// CHECK: call void @after(i32 %[[J_AFTER]]) +// CHECK: ret void + + +// CHECK-LABEL: define{{.*}} void @emit_simple_for..omp_par(ptr noalias %tid.addr, ptr noalias %zero.addr, ptr %0) + +// CHECK: omp.par.entry: +// CHECK: %[[GEP_I_PAR:.*]] = getelementptr { ptr, ptr }, ptr %0, i32 0, i32 0 +// CHECK: %[[LOADGEP_I:.*]] = load ptr, ptr %[[GEP_I_PAR]], align 8 +// CHECK: %[[GEP_J_PAR:.*]] = getelementptr { ptr, ptr }, ptr %0, i32 0, i32 1 +// CHECK: %[[LOADGEP_J:.*]] = load ptr, ptr %[[GEP_J_PAR]], align 8 +// CHECK: %p.lastiter = alloca i32, align 4 +// CHECK: %p.lowerbound = alloca i32, align 4 +// CHECK: %p.upperbound = alloca i32, align 4 +// CHECK: %p.stride = alloca i32, align 4 +// CHECK: %[[TID_LOCAL:.*]] = alloca i32, align 4 +// CHECK: %[[TID_LOAD:.*]] = load i32, ptr %tid.addr, align 4 +// CHECK: store i32 %[[TID_LOAD]], ptr %[[TID_LOCAL]], align 4 +// CHECK: %[[TID:.*]] = load i32, ptr %[[TID_LOCAL]], align 4 +// CHECK: br label %omp.region.after_alloca2 + +// CHECK: omp.region.after_alloca2: +// CHECK: br label %omp.region.after_alloca + +// CHECK: omp.region.after_alloca: +// CHECK: br label %omp.par.region + +// CHECK: omp.par.region: +// CHECK: br label %omp.par.region1 + +// CHECK: omp.par.region1: +// initialize i = 0 before the worksharing loop +// CHECK: store i32 0, ptr %[[LOADGEP_I]], align 4 +// CHECK: br label %omp.wsloop.region + +// CHECK: omp.wsloop.region: +// CHECK: br label %omp_loop.preheader + +// CHECK: omp_loop.preheader: +// set normalized loop bounds: lb=0, ub=9 (tripcount-1), stride=1 +// CHECK: store i32 0, ptr %p.lowerbound, align 4 +// CHECK: store i32 9, ptr %p.upperbound, align 4 +// CHECK: store i32 1, ptr %p.stride, align 4 +// CHECK: %[[GTN_WSLOOP:.*]] = call i32 @__kmpc_global_thread_num(ptr @{{[0-9]+}}) +// CHECK: call void @__kmpc_for_static_init_4u(ptr @{{[0-9]+}}, i32 %[[GTN_WSLOOP]], i32 34, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i32 1, i32 0) +// reload thread-local lb/ub after static partitioning and compute local trip count +// CHECK: %[[SF_LB:.*]] = load i32, ptr %p.lowerbound, align 4 +// CHECK: %[[SF_UB:.*]] = load i32, ptr %p.upperbound, align 4 +// CHECK: %[[SF_DIFF:.*]] = sub i32 %[[SF_UB]], %[[SF_LB]] +// CHECK: %[[SF_TC:.*]] = add i32 %[[SF_DIFF]], 1 +// CHECK: br label %omp_loop.header + +// CHECK: omp_loop.header: +// CHECK: %omp_loop.iv = phi i32 [ 0, %omp_loop.preheader ], [ %[[LOOP_NEXT:.*]], %omp_loop.inc ] +// CHECK: br label %omp_loop.cond + +// CHECK: omp_loop.cond: +// CHECK: %[[LOOP_CMP:.*]] = icmp ult i32 %omp_loop.iv, %[[SF_TC]] +// CHECK: br i1 %[[LOOP_CMP]], label %omp_loop.body, label %omp_loop.exit + +// CHECK: omp_loop.exit: +// CHECK: call void @__kmpc_for_static_fini(ptr @{{[0-9]+}}, i32 %[[GTN_WSLOOP]]) +// CHECK: %[[GTN_BARRIER:.*]] = call i32 @__kmpc_global_thread_num(ptr @{{[0-9]+}}) +// CHECK: call void @__kmpc_barrier(ptr @{{[0-9]+}}, i32 %[[GTN_BARRIER]]) +// CHECK: br label %omp_loop.after + +// CHECK: omp_loop.after: +// CHECK: br label %omp.region.cont3 + +// CHECK: omp.region.cont3: +// CHECK: br label %omp.region.cont + +// CHECK: omp.region.cont: +// CHECK: br label %omp.par.pre_finalize + +// CHECK: omp.par.pre_finalize: +// CHECK: br label %.fini + +// CHECK: .fini: +// CHECK: br label %omp.par.exit.exitStub -// CHECK-LABEL: define internal void @emit_simple_for..omp_par( -// CHECK: store i32 0, ptr %p.lowerbound -// CHECK: store i32 9, ptr %p.upperbound -// CHECK: store i32 1, ptr %p.stride -// CHECK: call void @__kmpc_for_static_init_4u( // CHECK: omp_loop.body: +// real IV = (normalized_iv + lb_offset) * stride + init_val +// CHECK: %[[BODY_IV:.*]] = add i32 %omp_loop.iv, %[[SF_LB]] +// CHECK: %[[BODY_SCALED:.*]] = mul i32 %[[BODY_IV]], 1 +// CHECK: %[[BODY_FINAL:.*]] = add i32 %[[BODY_SCALED]], 0 +// CHECK: br label %omp.loop_nest.region + // CHECK: omp.loop_nest.region: -// CHECK: store i32 %{{.*}}, ptr %{{.*}}, align 4 -// CHECK: call void @during(i32 %{{.*}}) -// CHECK: call void @__kmpc_for_static_fini( -// CHECK: call void @__kmpc_barrier( +// store computed IV to i's alloca; load j and call during(j) +// CHECK: store i32 %[[BODY_FINAL]], ptr %[[LOADGEP_I]], align 4 +// CHECK: %[[J_DURING:.*]] = load i32, ptr %[[LOADGEP_J]], align 4 +// CHECK: call void @during(i32 %[[J_DURING]]) +// CHECK: br label %omp.region.cont4 + +// CHECK: omp.region.cont4: +// CHECK: br label %omp_loop.inc + +// CHECK: omp_loop.inc: +// CHECK: %[[LOOP_NEXT]] = add nuw i32 %omp_loop.iv, 1 +// CHECK: br label %omp_loop.header + +// CHECK: omp.par.exit.exitStub: +// CHECK: ret void // Test for loop with variable bounds and type conversions void emit_for_with_vars() { @@ -53,136 +172,171 @@ void emit_for_with_vars() { after(j); } -// CHECK-LABEL: define dso_local void @emit_for_with_vars() -// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @{{.*}}, i32 1, ptr @emit_for_with_vars..omp_par, ptr %{{.*}}) +// CHECK-LABEL: define{{.*}} void @emit_for_with_vars() +// CHECK: %structArg = alloca { ptr, ptr, ptr, ptr, ptr }, align 8 +// CHECK: %[[LB_ALLOCA:.*]] = alloca i32, i64 1, align 4 +// CHECK: %[[UB_ALLOCA:.*]] = alloca i64, i64 1, align 8 +// CHECK: %[[STEP_ALLOCA:.*]] = alloca i16, i64 1, align 2 +// CHECK: %[[I_ALLOCA:.*]] = alloca i32, i64 1, align 4 +// CHECK: %[[J_ALLOCA:.*]] = alloca i32, i64 1, align 4 +// CHECK: store i32 5, ptr %[[J_ALLOCA]], align 4 +// CHECK: %[[J_VAL:.*]] = load i32, ptr %[[J_ALLOCA]], align 4 +// CHECK: call void @before(i32 %[[J_VAL]]) +// CHECK: br label %entry -// CHECK-LABEL: define internal void @emit_for_with_vars..omp_par( -// variable upper bound: loaded and truncated from i64 to i32 -// CHECK: %{{.*}} = trunc i64 %{{.*}} to i32 -// variable step: loaded and sign-extended from i16 to i32 -// CHECK: %{{.*}} = sext i16 %{{.*}} to i32 -// CHECK: call void @__kmpc_for_static_init_4u( -// CHECK: omp.loop_nest.region: -// CHECK: call void @during(i32 %{{.*}}) -// CHECK: call void @__kmpc_for_static_fini( +// CHECK: entry: +// CHECK: %[[GTN_OUTER:.*]] = call i32 @__kmpc_global_thread_num(ptr @{{[0-9]+}}) +// CHECK: br label %omp_parallel -// Test induction variable is accessible in the loop body: during(i) -void emit_for_with_induction_var() { -#pragma omp parallel - { -#pragma omp for - for (int i = 0; i < 10; i++) { - during(i); - } - } -} +// CHECK: omp_parallel: +// CHECK: %[[GEP_LB:.*]] = getelementptr { ptr, ptr, ptr, ptr, ptr }, ptr %structArg, i32 0, i32 0 +// CHECK: store ptr %[[LB_ALLOCA]], ptr %[[GEP_LB]], align 8 +// CHECK: %[[GEP_UB:.*]] = getelementptr { ptr, ptr, ptr, ptr, ptr }, ptr %structArg, i32 0, i32 1 +// CHECK: store ptr %[[UB_ALLOCA]], ptr %[[GEP_UB]], align 8 +// CHECK: %[[GEP_STEP:.*]] = getelementptr { ptr, ptr, ptr, ptr, ptr }, ptr %structArg, i32 0, i32 2 +// CHECK: store ptr %[[STEP_ALLOCA]], ptr %[[GEP_STEP]], align 8 +// CHECK: %[[GEP_I:.*]] = getelementptr { ptr, ptr, ptr, ptr, ptr }, ptr %structArg, i32 0, i32 3 +// CHECK: store ptr %[[I_ALLOCA]], ptr %[[GEP_I]], align 8 +// CHECK: %[[GEP_J:.*]] = getelementptr { ptr, ptr, ptr, ptr, ptr }, ptr %structArg, i32 0, i32 4 +// CHECK: store ptr %[[J_ALLOCA]], ptr %[[GEP_J]], align 8 +// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @{{[0-9]+}}, i32 1, ptr @emit_for_with_vars..omp_par, ptr %structArg) +// CHECK: br label %omp.par.exit -// CHECK-LABEL: define internal void @emit_for_with_induction_var..omp_par( -// CHECK: store i32 0, ptr %p.lowerbound -// CHECK: store i32 9, ptr %p.upperbound -// CHECK: omp.loop_nest.region: -// IV is stored to the alloca and then loaded for during(i) -// CHECK: store i32 %{{.*}}, ptr %[[IV_PTR:.*]], align 4 -// CHECK: %[[IV_LOAD:.*]] = load i32, ptr %[[IV_PTR]], align 4 -// CHECK: call void @during(i32 %[[IV_LOAD]]) +// CHECK: omp.par.exit: +// CHECK: %[[J_AFTER:.*]] = load i32, ptr %[[J_ALLOCA]], align 4 +// CHECK: call void @after(i32 %[[J_AFTER]]) +// CHECK: ret void -// Test inclusive upper bound: for (int i = 0; i <= 9; i++) -void emit_for_inclusive_bound() { -#pragma omp parallel - { -#pragma omp for - for (int i = 0; i <= 9; i++) { - during(i); - } - } -} -// CHECK-LABEL: define internal void @emit_for_inclusive_bound..omp_par( -// inclusive i <= 9 has same trip count as i < 10 -// CHECK: store i32 0, ptr %p.lowerbound -// CHECK: store i32 9, ptr %p.upperbound -// CHECK: call void @__kmpc_for_static_init_4u( -// CHECK: omp.loop_nest.region: -// CHECK: call void @during(i32 %{{.*}}) +// CHECK-LABEL: define{{.*}} void @emit_for_with_vars..omp_par(ptr noalias %tid.addr, ptr noalias %zero.addr, ptr %0) -// Test reversed comparison: for (int i = 0; 10 > i; i++) -void emit_for_reversed_cmp() { -#pragma omp parallel - { -#pragma omp for - for (int i = 0; 10 > i; i++) { - during(i); - } - } -} +// CHECK: omp.par.entry: +// CHECK: %[[GEP_LB_PAR:.*]] = getelementptr { ptr, ptr, ptr, ptr, ptr }, ptr %0, i32 0, i32 0 +// CHECK: %[[LOADGEP_LB:.*]] = load ptr, ptr %[[GEP_LB_PAR]], align 8 +// CHECK: %[[GEP_UB_PAR:.*]] = getelementptr { ptr, ptr, ptr, ptr, ptr }, ptr %0, i32 0, i32 1 +// CHECK: %[[LOADGEP_UB:.*]] = load ptr, ptr %[[GEP_UB_PAR]], align 8 +// CHECK: %[[GEP_STEP_PAR:.*]] = getelementptr { ptr, ptr, ptr, ptr, ptr }, ptr %0, i32 0, i32 2 +// CHECK: %[[LOADGEP_STEP:.*]] = load ptr, ptr %[[GEP_STEP_PAR]], align 8 +// CHECK: %[[GEP_I_PAR:.*]] = getelementptr { ptr, ptr, ptr, ptr, ptr }, ptr %0, i32 0, i32 3 +// CHECK: %[[LOADGEP_I:.*]] = load ptr, ptr %[[GEP_I_PAR]], align 8 +// CHECK: %[[GEP_J_PAR:.*]] = getelementptr { ptr, ptr, ptr, ptr, ptr }, ptr %0, i32 0, i32 4 +// CHECK: %[[LOADGEP_J:.*]] = load ptr, ptr %[[GEP_J_PAR]], align 8 +// CHECK: %p.lastiter = alloca i32, align 4 +// CHECK: %p.lowerbound = alloca i32, align 4 +// CHECK: %p.upperbound = alloca i32, align 4 +// CHECK: %p.stride = alloca i32, align 4 +// CHECK: %[[TID_LOCAL:.*]] = alloca i32, align 4 +// CHECK: %[[TID_LOAD:.*]] = load i32, ptr %tid.addr, align 4 +// CHECK: store i32 %[[TID_LOAD]], ptr %[[TID_LOCAL]], align 4 +// CHECK: %[[TID:.*]] = load i32, ptr %[[TID_LOCAL]], align 4 +// CHECK: br label %omp.region.after_alloca2 -// CHECK-LABEL: define internal void @emit_for_reversed_cmp..omp_par( -// reversed cmp (10 > i) produces same bounds as (i < 10) -// CHECK: store i32 0, ptr %p.lowerbound -// CHECK: store i32 9, ptr %p.upperbound -// CHECK: call void @__kmpc_for_static_init_4u( +// CHECK: omp.region.after_alloca2: +// CHECK: br label %omp.region.after_alloca -// Test reversed inclusive comparison: for (int i = 0; 9 >= i; i++) -void emit_for_reversed_inclusive_cmp() { -#pragma omp parallel - { -#pragma omp for - for (int i = 0; 9 >= i; i++) { - during(i); - } - } -} +// CHECK: omp.region.after_alloca: +// CHECK: br label %omp.par.region -// CHECK-LABEL: define internal void @emit_for_reversed_inclusive_cmp..omp_par( -// reversed inclusive cmp (9 >= i) produces same bounds as (i <= 9) -// CHECK: store i32 0, ptr %p.lowerbound -// CHECK: store i32 9, ptr %p.upperbound -// CHECK: call void @__kmpc_for_static_init_4u( +// CHECK: omp.par.region: +// CHECK: br label %omp.par.region1 -// Test compound assignment step: for (int i = 0; i < 20; i += 2) -void emit_for_compound_step() { -#pragma omp parallel - { -#pragma omp for - for (int i = 0; i < 20; i += 2) { - during(i); - } - } -} +// CHECK: omp.par.region1: +// initialize lb=1, ub=10, step=1 and i=0 +// CHECK: store i32 1, ptr %[[LOADGEP_LB]], align 4 +// CHECK: store i64 10, ptr %[[LOADGEP_UB]], align 8 +// CHECK: store i16 1, ptr %[[LOADGEP_STEP]], align 2 +// load ub and step, truncate/extend to i32 for trip count computation +// CHECK: %[[UB_VAL:.*]] = load i64, ptr %[[LOADGEP_UB]], align 8 +// CHECK: %[[UB_TRUNC:.*]] = trunc i64 %[[UB_VAL]] to i32 +// CHECK: %[[STEP_VAL:.*]] = load i16, ptr %[[LOADGEP_STEP]], align 2 +// CHECK: %[[STEP_SEXT:.*]] = sext i16 %[[STEP_VAL]] to i32 +// CHECK: store i32 0, ptr %[[LOADGEP_I]], align 4 +// CHECK: br label %omp.wsloop.region -// CHECK-LABEL: define internal void @emit_for_compound_step..omp_par( -// step = 2 visible in the loop body IV computation -// CHECK: call void @__kmpc_for_static_init_4u( -// CHECK: omp_loop.body: -// CHECK: %{{.*}} = mul i32 %{{.*}}, 2 -// CHECK: omp.loop_nest.region: -// CHECK: call void @during(i32 %{{.*}}) +// CHECK: omp.wsloop.region: +// compute absolute value of step to normalize direction +// CHECK: %[[STEP_NEG:.*]] = icmp slt i32 %[[STEP_SEXT]], 0 +// CHECK: %[[STEP_NEG_VAL:.*]] = sub i32 0, %[[STEP_SEXT]] +// CHECK: %[[STEP_ABS:.*]] = select i1 %[[STEP_NEG]], i32 %[[STEP_NEG_VAL]], i32 %[[STEP_SEXT]] +// select lb/ub based on step direction +// CHECK: %[[RANGE_LO:.*]] = select i1 %[[STEP_NEG]], i32 %[[UB_TRUNC]], i32 0 +// CHECK: %[[RANGE_HI:.*]] = select i1 %[[STEP_NEG]], i32 0, i32 %[[UB_TRUNC]] +// CHECK: %[[RANGE_DIFF:.*]] = sub nsw i32 %[[RANGE_HI]], %[[RANGE_LO]] +// CHECK: %[[RANGE_EMPTY:.*]] = icmp sle i32 %[[RANGE_HI]], %[[RANGE_LO]] +// compute trip count = (diff - 1) / abs(step) + 1 +// CHECK: %[[TC_SUB:.*]] = sub i32 %[[RANGE_DIFF]], 1 +// CHECK: %[[TC_DIV:.*]] = udiv i32 %[[TC_SUB]], %[[STEP_ABS]] +// CHECK: %[[TC_ADD:.*]] = add i32 %[[TC_DIV]], 1 +// CHECK: %[[TC_ONE:.*]] = icmp ule i32 %[[RANGE_DIFF]], %[[STEP_ABS]] +// CHECK: %[[TC_CLAMPED:.*]] = select i1 %[[TC_ONE]], i32 1, i32 %[[TC_ADD]] +// CHECK: %omp_loop.tripcount = select i1 %[[RANGE_EMPTY]], i32 0, i32 %[[TC_CLAMPED]] +// CHECK: br label %omp_loop.preheader -// Test commuted step expression: for (int i = 0; i < 30; i = step + i) -void emit_for_commuted_step() { - short step = 3; -#pragma omp parallel - { -#pragma omp for - for (int i = 0; i < 30; i = step + i) { - during(i); - } - } -} +// CHECK: omp_loop.preheader: +// set normalized loop bounds: lb=0, ub=tripcount-1, stride=1 +// CHECK: store i32 0, ptr %p.lowerbound, align 4 +// CHECK: %[[TC_MINUS1:.*]] = sub i32 %omp_loop.tripcount, 1 +// CHECK: store i32 %[[TC_MINUS1]], ptr %p.upperbound, align 4 +// CHECK: store i32 1, ptr %p.stride, align 4 +// CHECK: %[[GTN_WSLOOP:.*]] = call i32 @__kmpc_global_thread_num(ptr @{{[0-9]+}}) +// CHECK: call void @__kmpc_for_static_init_4u(ptr @{{[0-9]+}}, i32 %[[GTN_WSLOOP]], i32 34, ptr %p.lastiter, ptr %p.lowerbound, ptr %p.upperbound, ptr %p.stride, i32 1, i32 0) +// reload thread-local lb/ub after static partitioning and compute local trip count +// CHECK: %[[SF_LB:.*]] = load i32, ptr %p.lowerbound, align 4 +// CHECK: %[[SF_UB:.*]] = load i32, ptr %p.upperbound, align 4 +// CHECK: %[[SF_DIFF:.*]] = sub i32 %[[SF_UB]], %[[SF_LB]] +// CHECK: %[[SF_TC:.*]] = add i32 %[[SF_DIFF]], 1 +// CHECK: br label %omp_loop.header + +// CHECK: omp_loop.header: +// CHECK: %omp_loop.iv = phi i32 [ 0, %omp_loop.preheader ], [ %[[LOOP_NEXT:.*]], %omp_loop.inc ] +// CHECK: br label %omp_loop.cond + +// CHECK: omp_loop.cond: +// CHECK: %[[LOOP_CMP:.*]] = icmp ult i32 %omp_loop.iv, %[[SF_TC]] +// CHECK: br i1 %[[LOOP_CMP]], label %omp_loop.body, label %omp_loop.exit + +// CHECK: omp_loop.exit: +// CHECK: call void @__kmpc_for_static_fini(ptr @{{[0-9]+}}, i32 %[[GTN_WSLOOP]]) +// CHECK: %[[GTN_BARRIER:.*]] = call i32 @__kmpc_global_thread_num(ptr @{{[0-9]+}}) +// CHECK: call void @__kmpc_barrier(ptr @{{[0-9]+}}, i32 %[[GTN_BARRIER]]) +// CHECK: br label %omp_loop.after + +// CHECK: omp_loop.after: +// CHECK: br label %omp.region.cont3 + +// CHECK: omp.region.cont3: +// CHECK: br label %omp.region.cont + +// CHECK: omp.region.cont: +// CHECK: br label %omp.par.pre_finalize + +// CHECK: omp.par.pre_finalize: +// CHECK: br label %.fini + +// CHECK: .fini: +// CHECK: br label %omp.par.exit.exitStub -// CHECK-LABEL: define internal void @emit_for_commuted_step..omp_par( -// variable step loaded and sign-extended from i16 -// CHECK: %{{.*}} = sext i16 %{{.*}} to i32 -// CHECK: call void @__kmpc_for_static_init_4u( // CHECK: omp_loop.body: -// step is variable, multiplied into IV -// CHECK: %{{.*}} = mul i32 %{{.*}}, %{{.*}} +// real IV = (normalized_iv + lb_offset) * step + init_val +// CHECK: %[[BODY_IV:.*]] = add i32 %omp_loop.iv, %[[SF_LB]] +// CHECK: %[[BODY_SCALED:.*]] = mul i32 %[[BODY_IV]], %[[STEP_SEXT]] +// CHECK: %[[BODY_FINAL:.*]] = add i32 %[[BODY_SCALED]], 0 +// CHECK: br label %omp.loop_nest.region + // CHECK: omp.loop_nest.region: -// CHECK: call void @during(i32 %{{.*}}) - -// Verify OpenMP runtime declarations -// CHECK: declare i32 @__kmpc_global_thread_num(ptr) -// CHECK: declare void @__kmpc_for_static_init_4u(ptr, i32, i32, ptr, ptr, ptr, ptr, i32, i32) -// CHECK: declare void @__kmpc_for_static_fini(ptr, i32) -// CHECK: declare void @__kmpc_barrier(ptr, i32) -// CHECK: declare {{.*}}void @__kmpc_fork_call(ptr, i32, ptr, ...) +// store computed IV to i's alloca; load j and call during(j) +// CHECK: store i32 %[[BODY_FINAL]], ptr %[[LOADGEP_I]], align 4 +// CHECK: %[[J_DURING:.*]] = load i32, ptr %[[LOADGEP_J]], align 4 +// CHECK: call void @during(i32 %[[J_DURING]]) +// CHECK: br label %omp.region.cont4 + +// CHECK: omp.region.cont4: +// CHECK: br label %omp_loop.inc + +// CHECK: omp_loop.inc: +// CHECK: %[[LOOP_NEXT]] = add nuw i32 %omp_loop.iv, 1 +// CHECK: br label %omp_loop.header + +// CHECK: omp.par.exit.exitStub: +// CHECK: ret void + _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
