https://github.com/Meinersbur updated 
https://github.com/llvm/llvm-project/pull/144785

>From fed2aa77ab2cd5d2354d128fcfbe70bad8a4ec22 Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-proj...@meinersbur.de>
Date: Wed, 18 Jun 2025 21:55:47 +0200
Subject: [PATCH 1/3] [Flang][MLIR] Add `!$omp unroll` and
 `omp.unroll_heuristic`

---
 flang/lib/Lower/OpenMP/OpenMP.cpp             | 164 +++++++-
 .../test/Lower/OpenMP/unroll-heuristic01.f90  |  39 ++
 .../test/Lower/OpenMP/unroll-heuristic02.f90  |  70 ++++
 flang/test/Parser/OpenMP/unroll-heuristic.f90 |  43 +++
 .../OpenMP/{unroll.f90 => unroll-partial.f90} |   0
 .../Dialect/OpenMP/OpenMPClauseOperands.h     |   6 +-
 .../mlir/Dialect/OpenMP/OpenMPDialect.h       |   8 +-
 .../mlir/Dialect/OpenMP/OpenMPInterfaces.h    |   1 +
 .../mlir/Dialect/OpenMP/OpenMPOpBase.td       |  11 +
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 207 ++++++++++
 .../mlir/Dialect/OpenMP/OpenMPOpsAttributes.h |  17 +
 .../mlir/Dialect/OpenMP/OpenMPOpsEnums.h      |  14 +
 .../Dialect/OpenMP/OpenMPOpsInterfaces.td     |  58 +++
 .../mlir/Target/LLVMIR/ModuleTranslation.h    |  43 +++
 .../Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp  |  10 +
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  | 361 ++++++++++++++++++
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      |  78 ++++
 .../OpenMP/cli-canonical_loop-invalid.mlir    |  50 +++
 .../Dialect/OpenMP/cli-canonical_loop.mlir    | 157 ++++++++
 .../Dialect/OpenMP/cli-unroll-heuristic.mlir  |  59 +++
 .../LLVMIR/openmp-cli-canonical_loop.mlir     | 175 +++++++++
 .../LLVMIR/openmp-cli-unroll-heuristic01.mlir |  56 +++
 .../LLVMIR/openmp-cli-unroll-heuristic02.mlir |  93 +++++
 mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp   |  11 +
 24 files changed, 1718 insertions(+), 13 deletions(-)
 create mode 100644 flang/test/Lower/OpenMP/unroll-heuristic01.f90
 create mode 100644 flang/test/Lower/OpenMP/unroll-heuristic02.f90
 create mode 100644 flang/test/Parser/OpenMP/unroll-heuristic.f90
 rename flang/test/Parser/OpenMP/{unroll.f90 => unroll-partial.f90} (100%)
 create mode 100644 mlir/include/mlir/Dialect/OpenMP/OpenMPOpsAttributes.h
 create mode 100644 mlir/include/mlir/Dialect/OpenMP/OpenMPOpsEnums.h
 create mode 100644 mlir/test/Dialect/OpenMP/cli-canonical_loop-invalid.mlir
 create mode 100644 mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir
 create mode 100644 mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir
 create mode 100644 mlir/test/Target/LLVMIR/openmp-cli-canonical_loop.mlir
 create mode 100644 mlir/test/Target/LLVMIR/openmp-cli-unroll-heuristic01.mlir
 create mode 100644 mlir/test/Target/LLVMIR/openmp-cli-unroll-heuristic02.mlir

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 82673f0948a5b..3a8c7dcb0690a 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -2128,6 +2128,161 @@ genLoopOp(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
   return loopOp;
 }
 
+static mlir::omp::CanonicalLoopOp
+genCanonicalLoopOp(lower::AbstractConverter &converter, lower::SymMap 
&symTable,
+                   semantics::SemanticsContext &semaCtx,
+                   lower::pft::Evaluation &eval, mlir::Location loc,
+                   const ConstructQueue &queue,
+                   ConstructQueue::const_iterator item,
+                   llvm::ArrayRef<const semantics::Symbol *> ivs,
+                   llvm::omp::Directive directive, DataSharingProcessor &dsp) {
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+
+  assert(ivs.size() == 1 && "Nested loops not yet implemented");
+  const semantics::Symbol *iv = ivs[0];
+
+  auto &nestedEval = eval.getFirstNestedEvaluation();
+  if (nestedEval.getIf<parser::DoConstruct>()->IsDoConcurrent()) {
+    TODO(loc, "Do Concurrent in unroll construct");
+  }
+
+  // Get the loop bounds (and increment)
+  auto &doLoopEval = nestedEval.getFirstNestedEvaluation();
+  auto *doStmt = doLoopEval.getIf<parser::NonLabelDoStmt>();
+  assert(doStmt && "Expected do loop to be in the nested evaluation");
+  auto &loopControl = std::get<std::optional<parser::LoopControl>>(doStmt->t);
+  assert(loopControl.has_value());
+  auto *bounds = std::get_if<parser::LoopControl::Bounds>(&loopControl->u);
+  assert(bounds && "Expected bounds for canonical loop");
+  lower::StatementContext stmtCtx;
+  mlir::Value loopLBVar = fir::getBase(
+      converter.genExprValue(*semantics::GetExpr(bounds->lower), stmtCtx));
+  mlir::Value loopUBVar = fir::getBase(
+      converter.genExprValue(*semantics::GetExpr(bounds->upper), stmtCtx));
+  mlir::Value loopStepVar = [&]() {
+    if (bounds->step) {
+      return fir::getBase(
+          converter.genExprValue(*semantics::GetExpr(bounds->step), stmtCtx));
+    } else {
+      // If `step` is not present, assume it is `1`.
+      return firOpBuilder.createIntegerConstant(loc, firOpBuilder.getI32Type(),
+                                                1);
+    }
+  }();
+
+  // Get the integer kind for the loop variable and cast the loop bounds
+  size_t loopVarTypeSize = bounds->name.thing.symbol->GetUltimate().size();
+  mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize);
+  loopLBVar = firOpBuilder.createConvert(loc, loopVarType, loopLBVar);
+  loopUBVar = firOpBuilder.createConvert(loc, loopVarType, loopUBVar);
+  loopStepVar = firOpBuilder.createConvert(loc, loopVarType, loopStepVar);
+
+  // Start lowering
+  mlir::Value zero = firOpBuilder.createIntegerConstant(loc, loopVarType, 0);
+  mlir::Value one = firOpBuilder.createIntegerConstant(loc, loopVarType, 1);
+  mlir::Value isDownwards = firOpBuilder.create<mlir::arith::CmpIOp>(
+      loc, mlir::arith::CmpIPredicate::slt, loopStepVar, zero);
+
+  // Ensure we are counting upwards. If not, negate step and swap lb and ub.
+  mlir::Value negStep =
+      firOpBuilder.create<mlir::arith::SubIOp>(loc, zero, loopStepVar);
+  mlir::Value incr = firOpBuilder.create<mlir::arith::SelectOp>(
+      loc, isDownwards, negStep, loopStepVar);
+  mlir::Value lb = firOpBuilder.create<mlir::arith::SelectOp>(
+      loc, isDownwards, loopUBVar, loopLBVar);
+  mlir::Value ub = firOpBuilder.create<mlir::arith::SelectOp>(
+      loc, isDownwards, loopLBVar, loopUBVar);
+
+  // Compute the trip count assuming lb <= ub. This guarantees that the result
+  // is non-negative and we can use unsigned arithmetic.
+  mlir::Value span = firOpBuilder.create<mlir::arith::SubIOp>(
+      loc, ub, lb, ::mlir::arith::IntegerOverflowFlags::nuw);
+  mlir::Value tcMinusOne =
+      firOpBuilder.create<mlir::arith::DivUIOp>(loc, span, incr);
+  mlir::Value tcIfLooping = firOpBuilder.create<mlir::arith::AddIOp>(
+      loc, tcMinusOne, one, ::mlir::arith::IntegerOverflowFlags::nuw);
+
+  // Fall back to 0 if lb > ub
+  mlir::Value isZeroTC = firOpBuilder.create<mlir::arith::CmpIOp>(
+      loc, mlir::arith::CmpIPredicate::slt, ub, lb);
+  mlir::Value tripcount = firOpBuilder.create<mlir::arith::SelectOp>(
+      loc, isZeroTC, zero, tcIfLooping);
+
+  // Create the CLI handle.
+  auto newcli = firOpBuilder.create<mlir::omp::NewCliOp>(loc);
+  mlir::Value cli = newcli.getResult();
+
+  auto ivCallback = [&](mlir::Operation *op)
+      -> llvm::SmallVector<const Fortran::semantics::Symbol *> {
+    mlir::Region &region = op->getRegion(0);
+
+    // Create the op's region skeleton (BB taking the iv as argument)
+    firOpBuilder.createBlock(&region, {}, {loopVarType}, {loc});
+
+    // Compute the value of the loop variable from the logical iteration 
number.
+    mlir::Value natIterNum = fir::getBase(region.front().getArgument(0));
+    mlir::Value scaled =
+        firOpBuilder.create<mlir::arith::MulIOp>(loc, natIterNum, loopStepVar);
+    mlir::Value userVal =
+        firOpBuilder.create<mlir::arith::AddIOp>(loc, loopLBVar, scaled);
+
+    // The argument is not currently in memory, so make a temporary for the
+    // argument, and store it there, then bind that location to the argument.
+    mlir::Operation *storeOp =
+        createAndSetPrivatizedLoopVar(converter, loc, userVal, iv);
+
+    firOpBuilder.setInsertionPointAfter(storeOp);
+    return {iv};
+  };
+
+  // Create the omp.canonical_loop operation
+  auto canonLoop = genOpWithBody<mlir::omp::CanonicalLoopOp>(
+      OpWithBodyGenInfo(converter, symTable, semaCtx, loc, nestedEval,
+                        directive)
+          .setClauses(&item->clauses)
+          .setDataSharingProcessor(&dsp)
+          .setGenRegionEntryCb(ivCallback),
+      queue, item, tripcount, cli);
+
+  firOpBuilder.setInsertionPointAfter(canonLoop);
+  return canonLoop;
+}
+
+static void genUnrollOp(Fortran::lower::AbstractConverter &converter,
+                        Fortran::lower::SymMap &symTable,
+                        lower::StatementContext &stmtCtx,
+                        Fortran::semantics::SemanticsContext &semaCtx,
+                        Fortran::lower::pft::Evaluation &eval,
+                        mlir::Location loc, const ConstructQueue &queue,
+                        ConstructQueue::const_iterator item) {
+  fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+
+  mlir::omp::LoopRelatedClauseOps loopInfo;
+  llvm::SmallVector<const semantics::Symbol *> iv;
+  collectLoopRelatedInfo(converter, loc, eval, item->clauses, loopInfo, iv);
+
+  // Clauses for unrolling not yet implemnted
+  ClauseProcessor cp(converter, semaCtx, item->clauses);
+  cp.processTODO<clause::Partial, clause::Full>(
+      loc, llvm::omp::Directive::OMPD_unroll);
+
+  // Even though unroll does not support data-sharing clauses, but this is
+  // required to fill the symbol table.
+  DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
+                           /*shouldCollectPreDeterminedSymbols=*/true,
+                           /*useDelayedPrivatization=*/false, symTable);
+  dsp.processStep1();
+
+  // Emit the associated loop
+  auto canonLoop =
+      genCanonicalLoopOp(converter, symTable, semaCtx, eval, loc, queue, item,
+                         iv, llvm::omp::Directive::OMPD_unroll, dsp);
+
+  // Apply unrolling to it
+  auto cli = canonLoop.getCli();
+  firOpBuilder.create<mlir::omp::UnrollHeuristicOp>(loc, cli);
+}
+
 static mlir::omp::MaskedOp
 genMaskedOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
             lower::StatementContext &stmtCtx,
@@ -3516,12 +3671,9 @@ static void genOMPDispatch(lower::AbstractConverter 
&converter,
     newOp = genTeamsOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue,
                        item);
     break;
-  case llvm::omp::Directive::OMPD_tile:
-  case llvm::omp::Directive::OMPD_unroll: {
-    unsigned version = semaCtx.langOptions().OpenMPVersion;
-    TODO(loc, "Unhandled loop directive (" +
-                  llvm::omp::getOpenMPDirectiveName(dir, version) + ")");
-  }
+  case llvm::omp::Directive::OMPD_unroll:
+    genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item);
+    break;
   // case llvm::omp::Directive::OMPD_workdistribute:
   case llvm::omp::Directive::OMPD_workshare:
     newOp = genWorkshareOp(converter, symTable, stmtCtx, semaCtx, eval, loc,
diff --git a/flang/test/Lower/OpenMP/unroll-heuristic01.f90 
b/flang/test/Lower/OpenMP/unroll-heuristic01.f90
new file mode 100644
index 0000000000000..a5f5c003b8a7c
--- /dev/null
+++ b/flang/test/Lower/OpenMP/unroll-heuristic01.f90
@@ -0,0 +1,39 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 -o - %s 2>&1 | 
FileCheck %s
+
+
+subroutine omp_unroll_heuristic01(lb, ub, inc)
+  integer res, i, lb, ub, inc
+
+  !$omp unroll
+  do i = lb, ub, inc
+    res = i
+  end do
+  !$omp end unroll
+
+end subroutine omp_unroll_heuristic01
+
+
+!CHECK-LABEL: func.func @_QPomp_unroll_heuristic01(
+!CHECK:      %c0_i32 = arith.constant 0 : i32
+!CHECK-NEXT: %c1_i32 = arith.constant 1 : i32
+!CHECK-NEXT: %13 = arith.cmpi slt, %12, %c0_i32 : i32
+!CHECK-NEXT: %14 = arith.subi %c0_i32, %12 : i32
+!CHECK-NEXT: %15 = arith.select %13, %14, %12 : i32
+!CHECK-NEXT: %16 = arith.select %13, %11, %10 : i32
+!CHECK-NEXT: %17 = arith.select %13, %10, %11 : i32
+!CHECK-NEXT: %18 = arith.subi %17, %16 overflow<nuw> : i32
+!CHECK-NEXT: %19 = arith.divui %18, %15 : i32
+!CHECK-NEXT: %20 = arith.addi %19, %c1_i32 overflow<nuw> : i32
+!CHECK-NEXT: %21 = arith.cmpi slt, %17, %16 : i32
+!CHECK-NEXT: %22 = arith.select %21, %c0_i32, %20 : i32
+!CHECK-NEXT: %canonloop_s0 = omp.new_cli
+!CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%22) {
+!CHECK-NEXT:   %23 = arith.muli %iv, %12 : i32
+!CHECK-NEXT:   %24 = arith.addi %10, %23 : i32
+!CHECK-NEXT:   hlfir.assign %24 to %9#0 : i32, !fir.ref<i32>
+!CHECK-NEXT:   %25 = fir.load %9#0 : !fir.ref<i32>
+!CHECK-NEXT:   hlfir.assign %25 to %6#0 : i32, !fir.ref<i32>
+!CHECK-NEXT:   omp.terminator
+!CHECK-NEXT: }
+!CHECK-NEXT: omp.unroll_heuristic(%canonloop_s0)
+!CHECK-NEXT: return
diff --git a/flang/test/Lower/OpenMP/unroll-heuristic02.f90 
b/flang/test/Lower/OpenMP/unroll-heuristic02.f90
new file mode 100644
index 0000000000000..669f185f910c4
--- /dev/null
+++ b/flang/test/Lower/OpenMP/unroll-heuristic02.f90
@@ -0,0 +1,70 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 -o - %s 2>&1 | 
FileCheck %s
+
+
+subroutine omp_unroll_heuristic_nested02(outer_lb, outer_ub, outer_inc, 
inner_lb, inner_ub, inner_inc)
+  integer res, i, j, inner_lb, inner_ub, inner_inc, outer_lb, outer_ub, 
outer_inc
+
+  !$omp unroll
+  do i = outer_lb, outer_ub, outer_inc
+    !$omp unroll
+    do j = inner_lb, inner_ub, inner_inc
+      res = i + j
+    end do
+    !$omp end unroll
+  end do
+  !$omp end unroll
+
+end subroutine omp_unroll_heuristic_nested02
+
+
+!CHECK-LABEL: func.func @_QPomp_unroll_heuristic_nested02(%arg0: !fir.ref<i32> 
{fir.bindc_name = "outer_lb"}, %arg1: !fir.ref<i32> {fir.bindc_name = 
"outer_ub"}, %arg2: !fir.ref<i32> {fir.bindc_name = "outer_inc"}, %arg3: 
!fir.ref<i32> {fir.bindc_name = "inner_lb"}, %arg4: !fir.ref<i32> 
{fir.bindc_name = "inner_ub"}, %arg5: !fir.ref<i32> {fir.bindc_name = 
"inner_inc"}) {
+!CHECK:      %c0_i32 = arith.constant 0 : i32
+!CHECK-NEXT: %c1_i32 = arith.constant 1 : i32
+!CHECK-NEXT: %18 = arith.cmpi slt, %17, %c0_i32 : i32
+!CHECK-NEXT: %19 = arith.subi %c0_i32, %17 : i32
+!CHECK-NEXT: %20 = arith.select %18, %19, %17 : i32
+!CHECK-NEXT: %21 = arith.select %18, %16, %15 : i32
+!CHECK-NEXT: %22 = arith.select %18, %15, %16 : i32
+!CHECK-NEXT: %23 = arith.subi %22, %21 overflow<nuw> : i32
+!CHECK-NEXT: %24 = arith.divui %23, %20 : i32
+!CHECK-NEXT: %25 = arith.addi %24, %c1_i32 overflow<nuw> : i32
+!CHECK-NEXT: %26 = arith.cmpi slt, %22, %21 : i32
+!CHECK-NEXT: %27 = arith.select %26, %c0_i32, %25 : i32
+!CHECK-NEXT: %canonloop_s0 = omp.new_cli
+!CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%27) {
+!CHECK-NEXT:   %28 = arith.muli %iv, %17 : i32
+!CHECK-NEXT:   %29 = arith.addi %15, %28 : i32
+!CHECK-NEXT:   hlfir.assign %29 to %14#0 : i32, !fir.ref<i32>
+!CHECK-NEXT:   %30 = fir.alloca i32 {bindc_name = "j", pinned, uniq_name = 
"_QFomp_unroll_heuristic_nested02Ej"}
+!CHECK-NEXT:   %31:2 = hlfir.declare %30 {uniq_name = 
"_QFomp_unroll_heuristic_nested02Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, 
!fir.ref<i32>)
+!CHECK-NEXT:   %32 = fir.load %4#0 : !fir.ref<i32>
+!CHECK-NEXT:   %33 = fir.load %5#0 : !fir.ref<i32>
+!CHECK-NEXT:   %34 = fir.load %3#0 : !fir.ref<i32>
+!CHECK-NEXT:   %c0_i32_0 = arith.constant 0 : i32
+!CHECK-NEXT:   %c1_i32_1 = arith.constant 1 : i32
+!CHECK-NEXT:   %35 = arith.cmpi slt, %34, %c0_i32_0 : i32
+!CHECK-NEXT:   %36 = arith.subi %c0_i32_0, %34 : i32
+!CHECK-NEXT:   %37 = arith.select %35, %36, %34 : i32
+!CHECK-NEXT:   %38 = arith.select %35, %33, %32 : i32
+!CHECK-NEXT:   %39 = arith.select %35, %32, %33 : i32
+!CHECK-NEXT:   %40 = arith.subi %39, %38 overflow<nuw> : i32
+!CHECK-NEXT:   %41 = arith.divui %40, %37 : i32
+!CHECK-NEXT:   %42 = arith.addi %41, %c1_i32_1 overflow<nuw> : i32
+!CHECK-NEXT:   %43 = arith.cmpi slt, %39, %38 : i32
+!CHECK-NEXT:   %44 = arith.select %43, %c0_i32_0, %42 : i32
+!CHECK-NEXT:   %canonloop_s0_s0 = omp.new_cli
+!CHECK-NEXT:   omp.canonical_loop(%canonloop_s0_s0) %iv_2 : i32 in range(%44) {
+!CHECK-NEXT:     %45 = arith.muli %iv_2, %34 : i32
+!CHECK-NEXT:     %46 = arith.addi %32, %45 : i32
+!CHECK-NEXT:     hlfir.assign %46 to %31#0 : i32, !fir.ref<i32>
+!CHECK-NEXT:     %47 = fir.load %14#0 : !fir.ref<i32>
+!CHECK-NEXT:     %48 = fir.load %31#0 : !fir.ref<i32>
+!CHECK-NEXT:     %49 = arith.addi %47, %48 : i32
+!CHECK-NEXT:     hlfir.assign %49 to %12#0 : i32, !fir.ref<i32>
+!CHECK-NEXT:     omp.terminator
+!CHECK-NEXT:   }
+!CHECK-NEXT:   omp.unroll_heuristic(%canonloop_s0_s0)
+!CHECK-NEXT:   omp.terminator
+!CHECK-NEXT: }
+!CHECK-NEXT: omp.unroll_heuristic(%canonloop_s0)
+!CHECK-NEXT: return
diff --git a/flang/test/Parser/OpenMP/unroll-heuristic.f90 
b/flang/test/Parser/OpenMP/unroll-heuristic.f90
new file mode 100644
index 0000000000000..2f589af0c83ca
--- /dev/null
+++ b/flang/test/Parser/OpenMP/unroll-heuristic.f90
@@ -0,0 +1,43 @@
+! RUN: %flang_fc1 -fopenmp -fopenmp-version=51 %s -fdebug-unparse         | 
FileCheck --check-prefix=UNPARSE %s
+! RUN: %flang_fc1 -fopenmp -fopenmp-version=51 %s -fdebug-dump-parse-tree | 
FileCheck --check-prefix=PTREE %s
+
+subroutine openmp_parse_unroll_heuristic
+  integer i
+
+  !$omp unroll
+  do i = 1, 100
+    call func(i)
+  end do
+  !$omp end unroll
+END subroutine openmp_parse_unroll_heuristic
+
+
+!UNPARSE:      !$OMP UNROLL
+!UNPARSE-NEXT: DO i=1_4,100_4
+!UNPARSE-NEXT:   CALL func(i)
+!UNPARSE-NEXT: END DO
+!UNPARSE-NEXT: !$OMP END UNROLL
+
+!PTREE:      OpenMPConstruct -> OpenMPLoopConstruct
+!PTREE-NEXT: | OmpBeginLoopDirective
+!PTREE-NEXT: | | OmpLoopDirective -> llvm::omp::Directive = unroll
+!PTREE-NEXT: | | OmpClauseList ->
+!PTREE-NEXT: | DoConstruct
+!PTREE-NEXT: | | NonLabelDoStmt
+!PTREE-NEXT: | | | LoopControl -> LoopBounds
+!PTREE-NEXT: | | | | Scalar -> Name = 'i'
+!PTREE-NEXT: | | | | Scalar -> Expr = '1_4'
+!PTREE-NEXT: | | | | | LiteralConstant -> IntLiteralConstant = '1'
+!PTREE-NEXT: | | | | Scalar -> Expr = '100_4'
+!PTREE-NEXT: | | | | | LiteralConstant -> IntLiteralConstant = '100'
+!PTREE-NEXT: | | Block
+!PTREE-NEXT: | | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt 
-> CallStmt = 'CALL func(i)'
+!PTREE-NEXT: | | | | | | Call
+!PTREE-NEXT: | | | | | ProcedureDesignator -> Name = 'func'
+!PTREE-NEXT: | | | | | ActualArgSpec
+!PTREE-NEXT: | | | | | | ActualArg -> Expr = 'i'
+!PTREE-NEXT: | | | | | | | Designator -> DataRef -> Name = 'i'
+!PTREE-NEXT: | | EndDoStmt ->
+!PTREE-NEXT: | OmpEndLoopDirective
+!PTREE-NEXT: | | OmpLoopDirective -> llvm::omp::Directive = unroll
+!PTREE-NEXT: | | OmpClauseList ->
diff --git a/flang/test/Parser/OpenMP/unroll.f90 
b/flang/test/Parser/OpenMP/unroll-partial.f90
similarity index 100%
rename from flang/test/Parser/OpenMP/unroll.f90
rename to flang/test/Parser/OpenMP/unroll-partial.f90
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h 
b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
index f9a85626a3f14..faf820dcfdb29 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h
@@ -15,14 +15,10 @@
 #ifndef MLIR_DIALECT_OPENMP_OPENMPCLAUSEOPERANDS_H_
 #define MLIR_DIALECT_OPENMP_OPENMPCLAUSEOPERANDS_H_
 
+#include "mlir/Dialect/OpenMP/OpenMPOpsAttributes.h"
 #include "mlir/IR/BuiltinAttributes.h"
 #include "llvm/ADT/SmallVector.h"
 
-#include "mlir/Dialect/OpenMP/OpenMPOpsEnums.h.inc"
-
-#define GET_ATTRDEF_CLASSES
-#include "mlir/Dialect/OpenMP/OpenMPOpsAttributes.h.inc"
-
 #include "mlir/Dialect/OpenMP/OpenMPClauseOps.h.inc"
 
 namespace mlir {
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h 
b/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h
index 248ac2eb72c61..0a844fc2380bf 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h
@@ -16,6 +16,7 @@
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/OpenACCMPCommon/Interfaces/AtomicInterfaces.h"
 #include "mlir/Dialect/OpenACCMPCommon/Interfaces/OpenACCMPOpsInterfaces.h"
+#include "mlir/Dialect/OpenMP/OpenMPInterfaces.h"
 #include "mlir/IR/Dialect.h"
 #include "mlir/IR/OpDefinition.h"
 #include "mlir/IR/PatternMatch.h"
@@ -24,6 +25,11 @@
 #include "mlir/Interfaces/SideEffectInterfaces.h"
 #include "llvm/Frontend/OpenMP/OMPDeviceConstants.h"
 
+namespace mlir::omp {
+/// Find the omp.new_cli, generator, and consumer of a canonical loop info.
+std::tuple<NewCliOp, OpOperand *, OpOperand *> decodeCli(mlir::Value cli);
+} // namespace mlir::omp
+
 #define GET_TYPEDEF_CLASSES
 #include "mlir/Dialect/OpenMP/OpenMPOpsTypes.h.inc"
 
@@ -33,8 +39,6 @@
 
 #include "mlir/Dialect/OpenMP/OpenMPTypeInterfaces.h.inc"
 
-#include "mlir/Dialect/OpenMP/OpenMPInterfaces.h"
-
 #define GET_OP_CLASSES
 #include "mlir/Dialect/OpenMP/OpenMPOps.h.inc"
 
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPInterfaces.h 
b/mlir/include/mlir/Dialect/OpenMP/OpenMPInterfaces.h
index 989ab1710c211..bc9534974d21f 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPInterfaces.h
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPInterfaces.h
@@ -14,6 +14,7 @@
 #define MLIR_DIALECT_OPENMP_OPENMPINTERFACES_H_
 
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/OpenMP/OpenMPOpsAttributes.h"
 #include "mlir/IR/Dialect.h"
 #include "mlir/IR/OpDefinition.h"
 #include "mlir/IR/PatternMatch.h"
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td 
b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td
index f3dd44d2c0717..bbcfb87fa03c6 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td
@@ -204,4 +204,15 @@ class OpenMP_Op<string mnemonic, list<Trait> traits = [],
   let regions = !if(singleRegion, (region AnyRegion:$region), (region));
 }
 
+
+// Base class for OpenMP loop transformations (that either consume or generate
+// loops)
+//
+// Doesn't actually create a C++ base class (only defines default values for
+// tablegen classes that derive from this). Use LoopTransformationInterface
+// instead for common operations.
+class OpenMPTransform_Op<string mnemonic, list<Trait> traits = []> :
+      OpenMP_Op<mnemonic, 
!listconcat([DeclareOpInterfaceMethods<LoopTransformationInterface>], traits)  
> {
+}
+
 #endif  // OPENMP_OP_BASE
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td 
b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index ac80926053a2d..8641c9b8150ee 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -22,6 +22,7 @@ include "mlir/Dialect/OpenMP/OpenMPOpBase.td"
 include "mlir/Interfaces/ControlFlowInterfaces.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
 include "mlir/IR/EnumAttr.td"
+include "mlir/IR/OpAsmInterface.td"
 include "mlir/IR/OpBase.td"
 include "mlir/IR/SymbolInterfaces.td"
 
@@ -356,6 +357,212 @@ def SingleOp : OpenMP_Op<"single", traits = [
   let hasVerifier = 1;
 }
 
+//===---------------------------------------------------------------------===//
+// OpenMP Canonical Loop Info Type
+//===---------------------------------------------------------------------===//
+
+def CanonicalLoopInfoType : OpenMP_Type<"CanonicalLoopInfo", "cli"> {
+  let summary = "Type for representing a reference to a canonical loop";
+  let description = [{
+    A variable of type CanonicalLoopInfo refers to an OpenMP-compatible
+    canonical loop in the same function. Values of this type are not
+    available at runtime and therefore cannot be used by the program itself,
+    i.e. an opaque type. It is similar to the transform dialect's
+    `!transform.interface` type, but instead of implementing an interface
+    for each transformation, the OpenMP dialect itself defines possible
+    operations on this type.
+
+    A value of type CanonicalLoopInfoType (in the following: CLI) value can be
+
+    1. created by omp.new_cli.
+    2. passed to omp.canonical_loop to associate the loop to that CLI. A CLI
+       can only be associated once.
+    3. passed to an omp loop transformation operation that modifies the loop
+       associated with the CLI. The CLI is the "applyee" and the operation is
+       the consumer. A CLI can only be consumed once.
+    4. passed to an omp loop transformation operation to associate the cli with
+       a result of that transformation. The CLI is the "generatee" and the
+       operation is the generator.
+
+    A CLI cannot
+
+    1. be returned from a function.
+    2. be passed to operations that are not specifically designed to take a
+       CanonicalLoopInfoType, including AnyType.
+
+    A CLI directly corresponds to an object of
+    OpenMPIRBuilder's CanonicalLoopInfo struct when lowering to LLVM-IR.
+  }];
+}
+
+//===---------------------------------------------------------------------===//
+// OpenMP Canonical Loop Info Creation
+//===---------------------------------------------------------------------===//
+
+def NewCliOp : OpenMP_Op<"new_cli",
+    [DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>]> {
+  let summary = "Create a new Canonical Loop Info value.";
+  let description = [{
+    Create a new CLI that can be passed as an argument to a CanonicalLoopOp
+    and to loop transformation operations to handle dependencies between
+    loop transformation operations.
+  }];
+
+  let arguments = (ins );
+  let results = (outs CanonicalLoopInfoType:$result);
+  let assemblyFormat = [{
+      attr-dict
+  }];
+
+  let builders = [
+    OpBuilder<(ins )>,
+  ];
+
+  let hasVerifier = 1;
+}
+
+//===---------------------------------------------------------------------===//
+// OpenMP Canonical Loop Operation
+//===---------------------------------------------------------------------===//
+def CanonicalLoopOp : OpenMPTransform_Op<"canonical_loop", 
+    [DeclareOpInterfaceMethods<OpAsmOpInterface, [ "getAsmBlockNames", 
"getAsmBlockArgumentNames"]>]> {
+  let summary = "OpenMP Canonical Loop Operation";
+  let description = [{
+    All loops that conform to OpenMP's definition of a canonical loop can be
+    simplified to a CanonicalLoopOp. In particular, there are no loop-carried
+    variables and the number of iterations it will execute is know before the
+    operation. This allows e.g. to determine the number of threads and chunks
+    the iterations space is split into before executing any iteration. More
+    restrictions may apply in cases such as (collapsed) loop nests, doacross
+    loops, etc.
+
+    In contrast to other loop operations such as `scf.for`, the number of
+    iterations is determined by only a single variable, the trip-count. The
+    induction variable value is the logical iteration number of that iteration,
+    which OpenMP defines to be between 0 and the trip-count (exclusive).
+    Loop representation having lower-bound, upper-bound, and step-size 
operands,
+    require passes to do more work than necessary, including handling special
+    cases such as upper-bound smaller than lower-bound, upper-bound equal to
+    the integer type's maximal value, negative step size, etc. This complexity
+    is better only handled once by the front-end and can apply its semantics
+    for such cases while still being able to represent any kind of loop, which
+    kind of the point of a mid-end intermediate representation. User-defined
+    types such as random-access iterators in C++ could not directly be
+    represented anyway.
+
+    The induction variable is always of the same type as the tripcount 
argument.
+    Since it can never be negative, tripcount is always interpreted as an
+    unsigned integer. It is the caller's responsibility to ensure the tripcount
+    is not negative when its interpretation is signed, i.e.
+    `%tripcount = max(0,%tripcount)`.
+
+    An optional argument to a omp.canonical_loop that can be passed in
+    is a CanonicalLoopInfo value that can be used to refer to the canonical
+    loop to apply transformations -- such as tiling, unrolling, or
+    work-sharing -- to the loop, similar to the transform dialect but
+    with OpenMP-specific semantics. Because it is optional, it has to be the
+    last of the operands, but appears first in the pretty format printing.
+
+    The pretty assembly format is inspired by python syntax, where `range(n)`
+    returns an iterator that runs from $0$ to $n-1$. The pretty assembly syntax
+    is one of:
+
+     omp.canonical_loop(%cli) %iv : !type in range(%tripcount)
+     omp.canonical_loop       %iv : !type in range(%tripcount)
+
+    A CanonicalLoopOp is lowered to LLVM-IR using
+    `OpenMPIRBuilder::createCanonicalLoop`.
+
+    #### Examples
+
+    Translation from lower-bound, upper-bound, step-size to trip-count.
+    ```c
+    for (int i = 3; i < 42; i+=2) {
+      B[i] = A[i];
+    }
+    ```
+
+    ```mlir
+    %lb = arith.constant 3 : i32
+    %ub = arith.constant 42 : i32
+    %step = arith.constant 2 : i32
+    %range = arith.sub %ub, %lb : i32
+    %tripcount = arith.div %range, %step : i32
+    omp.canonical_loop %iv : i32 in range(%tripcount) {
+      %offset = arith.mul %iv, %step : i32
+      %i = arith.add %offset, %lb : i32
+      %a = load %arrA[%i] : memref<?xf32>
+      store %a, %arrB[%i] : memref<?xf32>
+    }
+    ```
+
+    Nested canonical loop with transformation of the inner loop.
+    ```mlir
+    %outer = omp.new_cli : !omp.cli
+    %inner = omp.new_cli : !omp.cli
+    omp.canonical_loop(%outer) %iv1 : i32 in range(%tc1) {
+      omp.canonical_loop(%inner) %iv2 : i32 in range(%tc2) {
+        %a = load %arrA[%iv1, %iv2] : memref<?x?xf32>
+        store %a, %arrB[%iv1, %iv2] : memref<?x?xf32>
+      }
+    }
+    omp.unroll_full(%inner)
+    ```
+  }];
+
+
+  let arguments = (ins IntLikeType:$tripCount,
+                       Optional<CanonicalLoopInfoType>:$cli);
+  let regions = (region AnyRegion:$region);
+
+  let extraClassDeclaration = [{
+    ::mlir::Value getInductionVar();
+  }];
+
+  let builders = [
+    OpBuilder<(ins "::mlir::Value":$tripCount)>,
+    OpBuilder<(ins "::mlir::Value":$tripCount, "::mlir::Value":$cli)>,
+  ];
+
+  let hasCustomAssemblyFormat = 1;
+  let hasVerifier = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// OpenMP unroll_heuristic operation
+//===----------------------------------------------------------------------===//
+
+def UnrollHeuristicOp : OpenMPTransform_Op<"unroll_heuristic", []> {
+  let summary = "OpenMP heuristic unroll operation";
+  let description = [{
+    Represents a `#pragma omp unroll` construct introduced in OpenMP 5.1.
+
+    The operation has one applyee and no generatees. The applyee is unrolled
+    according to implementation-defined heuristics. Implementations may choose
+    to not unroll the loop, partially unroll by a chosen factor, or fully
+    unroll it. Even if the implementation chooses to partially unroll the
+    applyee, the resulting unrolled loop is not accessible as a generatee. Use
+    omp.unroll_partial if a generatee is required.
+
+    The lowering is implemented using `OpenMPIRBuilder::unrollLoopHeuristic`,
+    which just attaches `llvm.loop.unroll.enable` metadata to the loop so the
+    unrolling is carried-out by LLVM's LoopUnroll pass. That is, unrolling only
+    actually performed in optimized builds.
+
+    Assembly formats:
+      omp.unroll_heuristic(%cli)
+      omp.unroll_heuristic(%cli) -> ()
+  }];
+
+  let arguments = (ins CanonicalLoopInfoType:$applyee);
+
+  let builders = [
+    OpBuilder<(ins "::mlir::Value":$cli)>,
+  ];
+
+  let hasCustomAssemblyFormat = 1;
+}
+
 
//===----------------------------------------------------------------------===//
 // 2.8.3 Workshare Construct
 
//===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsAttributes.h 
b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsAttributes.h
new file mode 100644
index 0000000000000..9a653c4b557b5
--- /dev/null
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsAttributes.h
@@ -0,0 +1,17 @@
+//===- OpenMPOpsAttributes.h ------------------------------------*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_OPENMP_OPENMPOPSATTRIBUTES_H_
+#define MLIR_DIALECT_OPENMP_OPENMPOPSATTRIBUTES_H_
+
+#include "mlir/Dialect/OpenMP/OpenMPOpsEnums.h"
+
+#define GET_ATTRDEF_CLASSES
+#include "mlir/Dialect/OpenMP/OpenMPOpsAttributes.h.inc"
+
+#endif // MLIR_DIALECT_OPENMP_OPENMPOPSATTRIBUTES_H_
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsEnums.h 
b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsEnums.h
new file mode 100644
index 0000000000000..0f6c41a179536
--- /dev/null
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsEnums.h
@@ -0,0 +1,14 @@
+//===- OpenMPOpsEnums.h -----------------------------------------*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_OPENMP_OPENMPOPSENUMS_H_
+#define MLIR_DIALECT_OPENMP_OPENMPOPSENUMS_H_
+
+#include "mlir/Dialect/OpenMP/OpenMPOpsEnums.h.inc"
+
+#endif // MLIR_DIALECT_OPENMP_OPENMPOPSENUMS_H_
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td 
b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
index 92bf34ef3145f..02ec95c10d67d 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
@@ -551,4 +551,62 @@ def OffloadModuleInterface : 
OpInterface<"OffloadModuleInterface"> {
   ];
 }
 
+def LoopTransformationInterface : OpInterface<"LoopTransformationInterface"> {
+  let description = [{
+    Methods that are common for OpenMP loop transformation operations.
+  }];
+
+  let cppNamespace = "::mlir::omp";
+
+  let methods = [
+
+    InterfaceMethod<
+      /*description=*/[{
+        Get the indices for the arguments that represent CanonicalLoopInfo
+        applyees, i.e. loops that are transformed/consumed by this operation.
+      }],
+      /*returnType=*/ "std::pair<unsigned, unsigned>",
+      /*methodName=*/ "getApplyeesODSOperandIndexAndLength",
+      /*args=*/(ins)
+    >,
+
+    InterfaceMethod<
+      /*description=*/[{
+        Get the indices for the arguments that represent CanonicalLoopInfo
+        generatees, i.e. loops that created by this operation.
+      }],
+      /*returnType=*/ "std::pair<unsigned, unsigned>",
+      /*methodName=*/ "getGenerateesODSOperandIndexAndLength",
+      /*args=*/(ins)
+    >,
+
+    InterfaceMethod<
+      /*description=*/[{
+        Return the number of applyees of this loop transformation.
+      }],
+      /*returnType=*/ "unsigned",
+      /*methodName=*/ "getNumApplyees",
+      /*args=*/       (ins),
+      /*methodBody=*/ "",
+      /*defaultImpl=*/[{
+        return  $_op.getApplyeesODSOperandIndexAndLength().second;
+      }]
+    >,
+
+    InterfaceMethod<
+      /*description=*/[{
+        Return the number of generatees of this loop transformation.
+      }],
+      /*returnType=*/ "unsigned",
+      /*methodName=*/ "getNumGeneratees",
+      /*args=*/       (ins),
+      /*methodBody=*/ "",
+      /*defaultImpl=*/[{
+        return  $_op.getGenerateesODSOperandIndexAndLength().second;
+      }]
+    >,
+
+  ];
+}
+
 #endif // OPENMP_OPS_INTERFACES
diff --git a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h 
b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
index 97ae14aa0d6af..7de9021694438 100644
--- a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
+++ b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
@@ -15,6 +15,7 @@
 #define MLIR_TARGET_LLVMIR_MODULETRANSLATION_H
 
 #include "mlir/Dialect/LLVMIR/LLVMInterfaces.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "mlir/IR/Operation.h"
 #include "mlir/IR/SymbolTable.h"
 #include "mlir/IR/Value.h"
@@ -23,6 +24,7 @@
 #include "mlir/Target/LLVMIR/TypeToLLVM.h"
 
 #include "llvm/ADT/SetVector.h"
+#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
 #include "llvm/IR/FPEnv.h"
 
 namespace llvm {
@@ -107,6 +109,41 @@ class ModuleTranslation {
     return blockMapping.lookup(block);
   }
 
+  /// Find the LLVM-IR loop that represents an MLIR loop.
+  llvm::CanonicalLoopInfo *lookupOMPLoop(omp::NewCliOp mlir) const {
+    llvm::CanonicalLoopInfo *result = loopMapping.lookup(mlir);
+    assert(result && "attempt to get non-existing loop");
+    return result;
+  }
+
+  /// Find the LLVM-IR loop that represents an MLIR loop.
+  llvm::CanonicalLoopInfo *lookupOMPLoop(Value mlir) const {
+    return lookupOMPLoop(mlir.getDefiningOp<omp::NewCliOp>());
+  }
+
+  /// Mark an OpenMP loop as having been consumed.
+  void invalidateOmpLoop(omp::NewCliOp mlir) { loopMapping.erase(mlir); }
+
+  /// Mark an OpenMP loop as having been consumed.
+  void invalidateOmpLoop(Value mlir) {
+    invalidateOmpLoop(mlir.getDefiningOp<omp::NewCliOp>());
+  }
+
+  /// Map an MLIR OpenMP dialect CanonicalLoopInfo to its lowered LLVM-IR
+  /// OpenMPIRBuilder CanonicalLoopInfo
+  void mapOmpLoop(omp::NewCliOp mlir, llvm::CanonicalLoopInfo *llvm) {
+    assert(llvm && "argument must be non-null");
+    llvm::CanonicalLoopInfo *&cur = loopMapping[mlir];
+    assert(cur == nullptr && "attempting to map a loop that is already 
mapped");
+    cur = llvm;
+  }
+
+  /// Map an MLIR OpenMP dialect CanonicalLoopInfo to its lowered LLVM-IR
+  /// OpenMPIRBuilder CanonicalLoopInfo
+  void mapOmpLoop(Value mlir, llvm::CanonicalLoopInfo *llvm) {
+    mapOmpLoop(mlir.getDefiningOp<omp::NewCliOp>(), llvm);
+  }
+
   /// Stores the mapping between an MLIR operation with successors and a
   /// corresponding LLVM IR instruction.
   void mapBranch(Operation *mlir, llvm::Instruction *llvm) {
@@ -431,6 +468,12 @@ class ModuleTranslation {
   DenseMap<Value, llvm::Value *> valueMapping;
   DenseMap<Block *, llvm::BasicBlock *> blockMapping;
 
+  /// List of not yet consumed MLIR loop handles (represented by an omp.new_cli
+  /// operation which creates a value of type CanonicalLoopInfoType) and their
+  /// LLVM-IR representation as CanonicalLoopInfo which is managed by the
+  /// OpenMPIRBuilder.
+  DenseMap<omp::NewCliOp, llvm::CanonicalLoopInfo *> loopMapping;
+
   /// A mapping between MLIR LLVM dialect terminators and LLVM IR terminators
   /// they are converted to. This allows for connecting PHI nodes to the source
   /// values after all operations are converted.
diff --git a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp 
b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
index 7a0a7f86bc1e9..e77c4a0b94de9 100644
--- a/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
+++ b/mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp
@@ -42,6 +42,16 @@ template <typename T>
 struct OpenMPOpConversion : public ConvertOpToLLVMPattern<T> {
   using ConvertOpToLLVMPattern<T>::ConvertOpToLLVMPattern;
 
+  OpenMPOpConversion(LLVMTypeConverter &typeConverter,
+                     PatternBenefit benefit = 1)
+      : ConvertOpToLLVMPattern<T>(typeConverter, benefit) {
+    // Operations using CanonicalLoopInfoType are lowered only by
+    // mlir::translateModuleToLLVMIR() using the OpenMPIRBuilder. Until then,
+    // the type and operations using it must be preserved.
+    typeConverter.addConversion(
+        [&](::mlir::omp::CanonicalLoopInfoType type) { return type; });
+  }
+
   LogicalResult
   matchAndRewrite(T op, typename T::Adaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp 
b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index e94d570b57122..64b6dd32b0c37 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -24,6 +24,7 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/STLForwardCompat.h"
 #include "llvm/ADT/SmallString.h"
@@ -3012,6 +3013,366 @@ void LoopNestOp::gatherWrappers(
   }
 }
 
+//===----------------------------------------------------------------------===//
+// OpenMP canonical loop handling
+//===----------------------------------------------------------------------===//
+
+std::tuple<NewCliOp, OpOperand *, OpOperand *>
+mlir::omp ::decodeCli(Value cli) {
+
+  // Defining a CLI for a generated loop is optional; if there is none then
+  // there is no followup-tranformation
+  if (!cli)
+    return {{}, nullptr, nullptr};
+
+  MLIRContext *ctx = cli.getContext();
+  assert(cli.getType() == CanonicalLoopInfoType::get(ctx) &&
+         "Unexpected type of cli");
+
+  NewCliOp create = cast<NewCliOp>(cli.getDefiningOp());
+  OpOperand *gen = nullptr;
+  OpOperand *cons = nullptr;
+  for (OpOperand &use : cli.getUses()) {
+    auto op = cast<LoopTransformationInterface>(use.getOwner());
+    auto applyees = op.getApplyeesODSOperandIndexAndLength();
+    auto generatees = op.getGenerateesODSOperandIndexAndLength();
+
+    unsigned opnum = use.getOperandNumber();
+    if (generatees.first <= opnum &&
+        opnum < generatees.first + generatees.second) {
+      assert(!gen && "Each CLI may have at most one consumer");
+      gen = &use;
+    } else if (applyees.first <= opnum &&
+               opnum < applyees.first + applyees.second) {
+      assert(!cons && "Each CLI may have at most one def");
+      cons = &use;
+    } else {
+      llvm_unreachable("Unexpected operand for a CLI");
+    }
+  }
+
+  return {create, gen, cons};
+}
+
+void NewCliOp::build(::mlir::OpBuilder &odsBuilder,
+                     ::mlir::OperationState &odsState) {
+  odsState.addTypes(CanonicalLoopInfoType::get(odsBuilder.getContext()));
+}
+
+void NewCliOp::getAsmResultNames(OpAsmSetValueNameFn setNameFn) {
+  Value result = getResult();
+  auto [newCli, gen, cond] = decodeCli(result);
+
+  // Derive the CLI variable name from its generator:
+  //  * "canonloop" for omp.canonical_loop
+  //  * custom name for loop transformation generatees
+  //  * "cli" as fallback if no generator
+  //  * "_r<idx>" suffix for nested loops, where <idx> is the sequential order
+  //  at that level
+  //  * "_s<idx>" suffix for operations with multiple regions, where <idx> is
+  //  the index of that region
+  std::string cliName{"cli"};
+  if (gen) {
+    cliName =
+        TypeSwitch<Operation *, std::string>(gen->getOwner())
+            .Case([&](CanonicalLoopOp op) {
+              // Find the canonical loop nesting: For each ancestor add a
+              // "+_r<idx>" suffix (in reverse order)
+              SmallVector<std::string> components;
+              Operation *o = op.getOperation();
+              while (o) {
+                if (o->hasTrait<mlir::OpTrait::IsIsolatedFromAbove>())
+                  break;
+
+                Region *r = o->getParentRegion();
+                if (!r)
+                  break;
+
+                Operation *parent = r->getParentOp();
+                auto getSequentialIndex = [](Region *r, Operation *o) {
+                  llvm::ReversePostOrderTraversal<Block *> traversal(
+                      &r->getBlocks().front());
+                  size_t idx = 0;
+                  for (Block *b : traversal) {
+                    for (Operation &op : *b) {
+                      if (&op == o)
+                        return idx;
+                      // Only consider operations that are containers as
+                      // possible children
+                      if (!op.getRegions().empty())
+                        idx += 1;
+                    }
+                  }
+                  llvm_unreachable("Operation not part of the region");
+                };
+                size_t sequentialIdx = getSequentialIndex(r, o);
+                components.push_back(("s" + Twine(sequentialIdx)).str());
+
+                if (!parent)
+                  break;
+
+                // If the operation has more than one region, also count in
+                // which of the regions
+                if (parent->getRegions().size() > 1) {
+                  auto getRegionIndex = [](Operation *o, Region *r) {
+                    for (auto [idx, region] :
+                         llvm::enumerate(o->getRegions())) {
+                      if (&region == r)
+                        return idx;
+                    }
+                    llvm_unreachable("Region not child its parent operation");
+                  };
+                  size_t regionIdx = getRegionIndex(parent, r);
+                  components.push_back(("r" + Twine(regionIdx)).str());
+                }
+
+                // next parent
+                o = parent;
+              }
+
+              SmallString<64> Name("canonloop");
+              for (std::string s : reverse(components)) {
+                Name += '_';
+                Name += s;
+              }
+
+              return Name;
+            })
+            .Case([&](UnrollHeuristicOp op) -> std::string {
+              llvm_unreachable("heuristic unrolling does not generate a loop");
+            })
+            .Default([&](Operation *op) {
+              assert(!"TODO: Custom name for this operation");
+              return "transformed";
+            });
+  }
+
+  setNameFn(result, cliName);
+}
+
+LogicalResult NewCliOp::verify() {
+  Value cli = getResult();
+
+  MLIRContext *ctx = cli.getContext();
+  assert(cli.getType() == CanonicalLoopInfoType::get(ctx) &&
+         "Unexpected type of cli");
+
+  // Check that the CLI is used in at most generator and one consumer
+  OpOperand *gen = nullptr;
+  OpOperand *cons = nullptr;
+  for (mlir::OpOperand &use : cli.getUses()) {
+    auto op = cast<mlir::omp::LoopTransformationInterface>(use.getOwner());
+    auto applyees = op.getApplyeesODSOperandIndexAndLength();
+    auto generatees = op.getGenerateesODSOperandIndexAndLength();
+
+    unsigned opnum = use.getOperandNumber();
+    if (generatees.first <= opnum &&
+        opnum < generatees.first + generatees.second) {
+      if (gen) {
+        InFlightDiagnostic error =
+            emitOpError("CLI must have at most one generator");
+        error.attachNote(gen->getOwner()->getLoc())
+            .append("first generator here:");
+        error.attachNote(use.getOwner()->getLoc())
+            .append("second generator here:");
+        return error;
+      }
+
+      gen = &use;
+    } else if (applyees.first <= opnum &&
+               opnum < applyees.first + applyees.second) {
+      if (cons) {
+        InFlightDiagnostic error =
+            emitOpError("CLI must have at most one consumer");
+        error.attachNote(cons->getOwner()->getLoc())
+            .append("first consumer here:")
+            .appendOp(*cons->getOwner(),
+                      OpPrintingFlags().printGenericOpForm());
+        error.attachNote(use.getOwner()->getLoc())
+            .append("second consumer here:")
+            .appendOp(*use.getOwner(), OpPrintingFlags().printGenericOpForm());
+        return error;
+      }
+
+      cons = &use;
+    } else {
+      llvm_unreachable("Unexpected operand for a CLI");
+    }
+  }
+
+  // If the CLI is source of a transformation, it must have a generator
+  if (cons && !gen) {
+    InFlightDiagnostic error = emitOpError("CLI has no generator");
+    error.attachNote(cons->getOwner()->getLoc())
+        .append("see consumer here: ")
+        .appendOp(*cons->getOwner(), OpPrintingFlags().printGenericOpForm());
+    return error;
+  }
+
+  return success();
+}
+
+void CanonicalLoopOp::build(OpBuilder &odsBuilder, OperationState &odsState,
+                            Value tripCount) {
+  odsState.addOperands(tripCount);
+  odsState.addOperands(Value());
+  (void)odsState.addRegion();
+}
+
+void CanonicalLoopOp::build(OpBuilder &odsBuilder, OperationState &odsState,
+                            Value tripCount, ::mlir::Value cli) {
+  odsState.addOperands(tripCount);
+  odsState.addOperands(cli);
+  (void)odsState.addRegion();
+}
+
+void CanonicalLoopOp::getAsmBlockNames(OpAsmSetBlockNameFn setNameFn) {
+  setNameFn(&getRegion().front(), "body_entry");
+}
+
+void CanonicalLoopOp::getAsmBlockArgumentNames(Region &region,
+                                               OpAsmSetValueNameFn setNameFn) {
+  setNameFn(region.getArgument(0), "iv");
+}
+
+void CanonicalLoopOp::print(OpAsmPrinter &p) {
+  if (getCli())
+    p << '(' << getCli() << ')';
+  p << ' ' << getInductionVar() << " : " << getInductionVar().getType()
+    << " in range(" << getTripCount() << ") ";
+
+  p.printRegion(getRegion(), /*printEntryBlockArgs=*/false,
+                /*printBlockTerminators=*/true);
+
+  p.printOptionalAttrDict((*this)->getAttrs());
+}
+
+mlir::ParseResult CanonicalLoopOp::parse(::mlir::OpAsmParser &parser,
+                                         ::mlir::OperationState &result) {
+  CanonicalLoopInfoType cliType =
+      CanonicalLoopInfoType::get(parser.getContext());
+
+  // Parse (optional) omp.cli identifier
+  OpAsmParser::UnresolvedOperand cli;
+  SmallVector<mlir::Value, 1> cliOperand;
+  if (!parser.parseOptionalLParen()) {
+    if (parser.parseOperand(cli) ||
+        parser.resolveOperand(cli, cliType, cliOperand) || 
parser.parseRParen())
+      return failure();
+  }
+
+  // We derive the type of tripCount from inductionVariable. MLIR requires the
+  // type of tripCount to be known when calling resolveOperand so we have parse
+  // the type before processing the inductionVariable.
+  OpAsmParser::Argument inductionVariable;
+  OpAsmParser::UnresolvedOperand tripcount;
+  if (parser.parseArgument(inductionVariable, /*allowType*/ true) ||
+      parser.parseKeyword("in") || parser.parseKeyword("range") ||
+      parser.parseLParen() || parser.parseOperand(tripcount) ||
+      parser.parseRParen() ||
+      parser.resolveOperand(tripcount, inductionVariable.type, 
result.operands))
+    return failure();
+
+  // Parse the loop body.
+  Region *region = result.addRegion();
+  if (parser.parseRegion(*region, {inductionVariable}))
+    return failure();
+
+  // We parsed the cli operand forst, but because it is optional, it must be
+  // last in the operand list.
+  result.operands.append(cliOperand);
+
+  // Parse the optional attribute list.
+  if (parser.parseOptionalAttrDict(result.attributes))
+    return failure();
+
+  return mlir::success();
+}
+
+LogicalResult CanonicalLoopOp::verify() {
+  // The region's entry must accept the induction variable
+  // It can also be empty if just created
+  if (!getRegion().empty()) {
+    Region &region = getRegion();
+    if (region.getNumArguments() != 1)
+      return emitOpError(
+          "Canonical loop region must have exactly one argument");
+
+    if (getInductionVar().getType() != getTripCount().getType())
+      return emitOpError(
+          "Region argument must be the same type as the trip count");
+  }
+
+  return success();
+}
+
+Value CanonicalLoopOp::getInductionVar() { return getRegion().getArgument(0); }
+
+std::pair<unsigned, unsigned>
+CanonicalLoopOp::getApplyeesODSOperandIndexAndLength() {
+  // No applyees
+  return {0, 0};
+}
+
+std::pair<unsigned, unsigned>
+CanonicalLoopOp::getGenerateesODSOperandIndexAndLength() {
+  return getODSOperandIndexAndLength(odsIndex_cli);
+}
+
+//===----------------------------------------------------------------------===//
+// UnrollHeuristicOp
+//===----------------------------------------------------------------------===//
+
+void UnrollHeuristicOp::build(::mlir::OpBuilder &odsBuilder,
+                              ::mlir::OperationState &odsState,
+                              ::mlir::Value cli) {
+  odsState.addOperands(cli);
+}
+
+void UnrollHeuristicOp::print(OpAsmPrinter &p) {
+  p << '(' << getApplyee() << ')';
+
+  p.printOptionalAttrDict((*this)->getAttrs());
+}
+
+mlir::ParseResult UnrollHeuristicOp::parse(::mlir::OpAsmParser &parser,
+                                           ::mlir::OperationState &result) {
+  auto cliType = CanonicalLoopInfoType::get(parser.getContext());
+
+  if (parser.parseLParen())
+    return failure();
+
+  OpAsmParser::UnresolvedOperand applyee;
+  if (parser.parseOperand(applyee) ||
+      parser.resolveOperand(applyee, cliType, result.operands))
+    return failure();
+
+  if (parser.parseRParen())
+    return failure();
+
+  // Optional output loop (full unrolling has none)
+  if (!parser.parseOptionalArrow()) {
+    if (parser.parseLParen() || parser.parseRParen())
+      return failure();
+  }
+
+  // Parse the optional attribute list.
+  if (parser.parseOptionalAttrDict(result.attributes))
+    return failure();
+
+  return mlir::success();
+}
+
+std::pair<unsigned, unsigned>
+UnrollHeuristicOp ::getApplyeesODSOperandIndexAndLength() {
+  return getODSOperandIndexAndLength(odsIndex_applyee);
+}
+
+std::pair<unsigned, unsigned>
+UnrollHeuristicOp::getGenerateesODSOperandIndexAndLength() {
+  return {0, 0};
+}
+
 
//===----------------------------------------------------------------------===//
 // Critical construct (2.17.1)
 
//===----------------------------------------------------------------------===//
diff --git 
a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp 
b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 6bccc1d6f5d30..d3cfdf26a344b 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -3014,6 +3014,67 @@ convertOmpLoopNest(Operation &opInst, 
llvm::IRBuilderBase &builder,
   return success();
 }
 
+/// Convert an omp.canonical_loop to LLVM-IR
+static LogicalResult
+convertOmpCanonicalLoopOp(omp::CanonicalLoopOp op, llvm::IRBuilderBase 
&builder,
+                          LLVM::ModuleTranslation &moduleTranslation) {
+  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+
+  llvm::OpenMPIRBuilder::LocationDescription loopLoc(builder);
+  Value loopIV = op.getInductionVar();
+  Value loopTC = op.getTripCount();
+
+  llvm::Value *llvmTC = moduleTranslation.lookupValue(loopTC);
+
+  llvm::Expected<llvm::CanonicalLoopInfo *> llvmOrError =
+      ompBuilder->createCanonicalLoop(
+          loopLoc,
+          [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *llvmIV) {
+            // Register the mapping of MLIR induction variable to LLVM-IR
+            // induction variable
+            moduleTranslation.mapValue(loopIV, llvmIV);
+
+            builder.restoreIP(ip);
+            llvm::Expected<llvm::BasicBlock *> bodyGenStatus =
+                convertOmpOpRegions(op.getRegion(), "omp.loop.region", builder,
+                                    moduleTranslation);
+
+            return bodyGenStatus.takeError();
+          },
+          llvmTC, "omp.loop");
+  if (!llvmOrError)
+    return op.emitError(llvm::toString(llvmOrError.takeError()));
+
+  llvm::CanonicalLoopInfo *llvmCLI = *llvmOrError;
+  llvm::IRBuilderBase::InsertPoint afterIP = llvmCLI->getAfterIP();
+  builder.restoreIP(afterIP);
+
+  // Register the mapping of MLIR loop to LLVM-IR OpenMPIRBuilder loop
+  if (Value cli = op.getCli())
+    moduleTranslation.mapOmpLoop(cli, llvmCLI);
+
+  return success();
+}
+
+/// Apply a `#pragma omp unroll` / "!$omp unroll" transformation using the
+/// OpenMPIRBuilder.
+static LogicalResult
+applyUnrollHeuristic(omp::UnrollHeuristicOp op, llvm::IRBuilderBase &builder,
+                     LLVM::ModuleTranslation &moduleTranslation) {
+  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+
+  Value applyee = op.getApplyee();
+  assert(applyee && "Loop to apply unrolling on required");
+
+  llvm::CanonicalLoopInfo *consBuilderCLI =
+      moduleTranslation.lookupOMPLoop(applyee);
+  llvm::OpenMPIRBuilder::LocationDescription loc(builder);
+  ompBuilder->unrollLoopHeuristic(loc.DL, consBuilderCLI);
+
+  moduleTranslation.invalidateOmpLoop(applyee);
+  return success();
+}
+
 /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
 static llvm::AtomicOrdering
 convertAtomicOrdering(std::optional<omp::ClauseMemoryOrderKind> ao) {
@@ -5866,6 +5927,23 @@ convertHostOrTargetOperation(Operation *op, 
llvm::IRBuilderBase &builder,
                 // etc. and then discarded
                 return success();
               })
+          .Case([&](omp::NewCliOp op) {
+            // Meta-operation: Doesn't do anything by itself, but used to
+            // identify a loop.
+            return success();
+          })
+          .Case([&](omp::CanonicalLoopOp op) {
+            return convertOmpCanonicalLoopOp(op, builder, moduleTranslation);
+          })
+          .Case([&](omp::UnrollHeuristicOp op) {
+            // FIXME: Handling omp.unroll_heuristic as an executable requires
+            // that the generator (e.g. omp.canonical_loop) has been seen 
first.
+            // For construct that require all codegen to occur inside a 
callback
+            // (e.g. OpenMPIRBilder::createParallel), all codegen of that
+            // contained region including their transformations must occur at
+            // the omp.canonical_loop.
+            return applyUnrollHeuristic(op, builder, moduleTranslation);
+          })
           .Default([&](Operation *inst) {
             return inst->emitError()
                    << "not yet implemented: " << inst->getName();
diff --git a/mlir/test/Dialect/OpenMP/cli-canonical_loop-invalid.mlir 
b/mlir/test/Dialect/OpenMP/cli-canonical_loop-invalid.mlir
new file mode 100644
index 0000000000000..345c53d2890b8
--- /dev/null
+++ b/mlir/test/Dialect/OpenMP/cli-canonical_loop-invalid.mlir
@@ -0,0 +1,50 @@
+// RUN: mlir-opt %s -verify-diagnostics
+
+
+func.func @omp_canonloop_block_arg(%tc : i32) -> () {
+  // expected-error@below {{Canonical loop region must have exactly one 
argument}}
+  "omp.canonical_loop" (%tc) ({
+    ^bb0(%iv: i32, %somearg: i32):
+      omp.terminator
+  }) : (i32) -> ()
+  return
+}
+
+
+func.func @omp_canonloop_multiple_generators(%tc : i32) -> () {
+  // expected-error@below {{'omp.new_cli' op CLI must have at most one 
generator}}
+  %cli = omp.new_cli
+  // expected-note@below {{second generator here}}
+  omp.canonical_loop(%cli) %iv1 : i32 in range(%tc) {
+    omp.terminator
+  }
+  // expected-note@below {{first generator here}}
+  omp.canonical_loop(%cli) %iv2 : i32 in range(%tc) {
+    omp.terminator
+  }
+  return
+}
+
+
+func.func @omp_canonloop_multiple_consumers() -> () {
+  // expected-error@below {{'omp.new_cli' op CLI must have at most one 
consumer}}
+  %cli = omp.new_cli
+  %tc = llvm.mlir.constant(4 : i32) : i32
+  omp.canonical_loop(%cli) %iv1 : i32 in range(%tc) {
+    omp.terminator
+  }
+  // expected-note@below {{second consumer here}}
+  omp.unroll_heuristic(%cli)
+  // expected-note@below {{first consumer here}}
+  omp.unroll_heuristic(%cli)
+  return
+}
+
+
+func.func @omp_canonloop_no_generator() -> () {
+  // expected-error@below {{'omp.new_cli' op CLI has no generator}}
+  %cli = omp.new_cli
+  // expected-note@below {{see consumer here}}
+  omp.unroll_heuristic(%cli)
+  return
+}
diff --git a/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir 
b/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir
new file mode 100644
index 0000000000000..adadb8bbac49d
--- /dev/null
+++ b/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir
@@ -0,0 +1,157 @@
+// RUN: mlir-opt %s | FileCheck %s
+// RUN: mlir-opt %s | mlir-opt | FileCheck %s
+
+
+// CHECK-LABEL: @omp_canonloop_raw(
+// CHECK-SAME: %[[tc:.+]]: i32)
+func.func @omp_canonloop_raw(%tc : i32) -> () {
+  // CHECK: omp.canonical_loop %iv : i32 in range(%[[tc]]) {
+  "omp.canonical_loop" (%tc) ({
+    ^bb0(%iv: i32):
+      // CHECK-NEXT: = llvm.add %iv, %iv : i32
+      %newval = llvm.add %iv, %iv : i32
+      // CHECK-NEXT: omp.terminator
+      omp.terminator
+  // CHECK-NEXT: }
+  }) : (i32) -> ()
+  // CHECK-NEXT: return
+  return
+}
+
+
+// CHECK-LABEL: @omp_canonloop_sequential_raw(
+// CHECK-SAME: %[[tc:.+]]: i32)
+func.func @omp_canonloop_sequential_raw(%tc : i32) -> () {
+  // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+  %canonloop_s0 = "omp.new_cli" () : () -> (!omp.cli)
+  // CHECK-NEXT:  omp.canonical_loop(%canonloop_s0) %iv : i32 in 
range(%[[tc]]) {
+  "omp.canonical_loop" (%tc, %canonloop_s0) ({
+    ^bb_first(%iv_first: i32):
+      // CHECK-NEXT: = llvm.add %iv, %iv : i32
+      %newval = llvm.add %iv_first, %iv_first : i32
+    // CHECK-NEXT: omp.terminator
+    omp.terminator
+  // CHECK-NEXT: }
+  }) : (i32, !omp.cli) -> ()
+
+  // CHECK-NEXT: %canonloop_s1 = omp.new_cli
+  %canonloop_s1 = "omp.new_cli" () : () -> (!omp.cli)
+  // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv : i32 in range(%[[tc]]) 
{
+  "omp.canonical_loop" (%tc, %canonloop_s1) ({
+    ^bb_second(%iv_second: i32):
+    // CHECK: omp.terminator
+    omp.terminator
+  // CHECK-NEXT: }
+  }) : (i32, !omp.cli) -> ()
+
+  // CHECK-NEXT: return
+  return
+}
+
+
+// CHECK-LABEL: @omp_nested_canonloop_raw(
+// CHECK-SAME: %[[tc_outer:.+]]: i32, %[[tc_inner:.+]]: i32)
+func.func @omp_nested_canonloop_raw(%tc_outer : i32, %tc_inner : i32) -> () {
+  // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+  %outer = "omp.new_cli" () : () -> (!omp.cli)
+  // CHECK-NEXT: %canonloop_s0_s0 = omp.new_cli
+  %inner = "omp.new_cli" () : () -> (!omp.cli)
+  // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in 
range(%[[tc_outer]]) {
+  "omp.canonical_loop" (%tc_outer, %outer) ({
+    ^bb_outer(%iv_outer: i32):
+      // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in 
range(%[[tc_inner]]) {
+      "omp.canonical_loop" (%tc_inner, %inner) ({
+        ^bb_inner(%iv_inner: i32):
+          // CHECK-NEXT: = llvm.add %iv, %iv_0 : i32
+          %newval = llvm.add %iv_outer, %iv_inner: i32
+          // CHECK-NEXT: omp.terminator
+          omp.terminator
+      }) : (i32, !omp.cli) -> ()
+      // CHECK: omp.terminator
+      omp.terminator
+  }) : (i32, !omp.cli) -> ()
+  return
+}
+
+
+// CHECK-LABEL: @omp_canonloop_pretty(
+// CHECK-SAME: %[[tc:.+]]: i32)
+func.func @omp_canonloop_pretty(%tc : i32) -> () {
+  // CHECK-NEXT: omp.canonical_loop %iv : i32 in range(%[[tc]]) {
+  omp.canonical_loop %iv : i32 in range(%tc) {
+    // CHECK-NEXT: llvm.add %iv, %iv : i32
+    %newval = llvm.add %iv, %iv: i32
+    // CHECK-NEXT: omp.terminator
+    omp.terminator
+  }
+  return
+}
+
+
+// CHECK-LABEL: @omp_canonloop_constant_pretty()
+func.func @omp_canonloop_constant_pretty() -> () {
+  // CHECK-NEXT:  %[[tc:.+]] = llvm.mlir.constant(4 : i32) : i32
+  %tc = llvm.mlir.constant(4 : i32) : i32
+  // CHECK-NEXT: omp.canonical_loop %iv : i32 in range(%[[tc]]) {
+  omp.canonical_loop %iv : i32 in range(%tc) {
+    // CHECK-NEXT: llvm.add %iv, %iv : i32
+    %newval = llvm.add %iv, %iv: i32
+    // CHECK-NEXT: omp.terminator
+    omp.terminator
+  }
+  return
+}
+
+
+// CHECK-LABEL: @omp_canonloop_sequential_pretty(
+// CHECK-SAME: %[[tc:.+]]: i32)
+func.func @omp_canonloop_sequential_pretty(%tc : i32) -> () {
+  // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+  %canonloop_s0 = omp.new_cli
+  // CHECK-NEXT:  omp.canonical_loop(%canonloop_s0) %iv : i32 in 
range(%[[tc]]) {
+  omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%tc) {
+    // CHECK-NEXT: omp.terminator
+    omp.terminator
+  }
+
+  // CHECK: %canonloop_s1 = omp.new_cli
+  %canonloop_s1 = omp.new_cli
+  // CHECK-NEXT:  omp.canonical_loop(%canonloop_s1) %iv : i32 in 
range(%[[tc]]) {
+  omp.canonical_loop(%canonloop_s1) %iv_0 : i32 in range(%tc) {
+    // CHECK-NEXT: omp.terminator
+    omp.terminator
+  }
+
+  return
+}
+
+
+// CHECK-LABEL: @omp_canonloop_nested_pretty(
+// CHECK-SAME: %[[tc:.+]]: i32)
+func.func @omp_canonloop_nested_pretty(%tc : i32) -> () {
+  // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+  %canonloop_s0 = omp.new_cli
+  // CHECK-NEXT: %canonloop_s0_s0 = omp.new_cli
+  %canonloop_s0_s0 = omp.new_cli
+  // CHECK-NEXT:  omp.canonical_loop(%canonloop_s0) %iv : i32 in 
range(%[[tc]]) {
+  omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%tc) {
+    // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in 
range(%[[tc]]) {
+    omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%tc) {
+      // CHECK: omp.terminator
+      omp.terminator
+    }
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+  return
+}
+
+
+// CHECK-LABEL: @omp_newcli_unused(
+// CHECK-SAME: )
+func.func @omp_newcli_unused() -> () {
+  // CHECK-NEXT:  %cli = omp.new_cli
+  %cli = omp.new_cli
+  // CHECK-NEXT: return
+  return
+}
diff --git a/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir 
b/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir
new file mode 100644
index 0000000000000..cda7d0b500166
--- /dev/null
+++ b/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir
@@ -0,0 +1,59 @@
+// RUN: mlir-opt %s            | FileCheck %s
+// RUN: mlir-opt %s | mlir-opt | FileCheck %s
+
+
+// CHECK-LABEL: @omp_unroll_heuristic_raw(
+// CHECK-SAME: %[[tc:.+]]: i32) {
+func.func @omp_unroll_heuristic_raw(%tc : i32) -> () {
+  // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+  %canonloop = "omp.new_cli" () : () -> (!omp.cli)
+  // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) 
{
+  "omp.canonical_loop" (%tc, %canonloop) ({
+    ^bb0(%iv: i32):
+      omp.terminator
+  }) : (i32, !omp.cli) -> ()
+  // CHECK: omp.unroll_heuristic(%canonloop_s0)
+  "omp.unroll_heuristic" (%canonloop) : (!omp.cli) -> ()
+  return
+}
+
+
+// CHECK-LABEL: @omp_unroll_heuristic_pretty(
+// CHECK-SAME: %[[tc:.+]]: i32) {
+func.func @omp_unroll_heuristic_pretty(%tc : i32) -> () {
+  // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli
+  %canonloop = "omp.new_cli" () : () -> (!omp.cli)
+  // CHECK-NEXT:  omp.canonical_loop(%canonloop_s0) %iv : i32 in 
range(%[[tc]]) {
+  omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
+    omp.terminator
+  }
+  // CHECK: omp.unroll_heuristic(%canonloop_s0)
+  omp.unroll_heuristic(%canonloop)
+  return
+}
+
+
+// CHECK-LABEL: @omp_unroll_heuristic_nested_pretty(
+// CHECK-SAME: %[[tc:.+]]: i32) {
+func.func @omp_unroll_heuristic_nested_pretty(%tc : i32) -> () {
+  // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+  %cli_outer = omp.new_cli
+  // CHECK-NEXT: %canonloop_s0_s0 = omp.new_cli
+  %cli_inner = omp.new_cli
+  // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) 
{
+  omp.canonical_loop(%cli_outer) %iv_outer : i32 in range(%tc) {
+    // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in 
range(%[[tc]]) {
+    omp.canonical_loop(%cli_inner) %iv_inner : i32 in range(%tc) {
+      // CHECK: omp.terminator
+      omp.terminator
+    }
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+
+  // CHECK: omp.unroll_heuristic(%canonloop_s0)
+  omp.unroll_heuristic(%cli_outer)
+  // CHECK-NEXT: omp.unroll_heuristic(%canonloop_s0_s0)
+  omp.unroll_heuristic(%cli_inner)
+  return
+}
diff --git a/mlir/test/Target/LLVMIR/openmp-cli-canonical_loop.mlir 
b/mlir/test/Target/LLVMIR/openmp-cli-canonical_loop.mlir
new file mode 100644
index 0000000000000..9abef003d6183
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-cli-canonical_loop.mlir
@@ -0,0 +1,175 @@
+// Test lowering of standalone omp.canonical_loop
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// CHECK-LABEL: define void @anon_loop(
+// CHECK-SAME:    ptr %[[ptr:.+]],
+// CHECK-SAME:    i32 %[[tc:.+]]) {
+// CHECK-NEXT:    br label %omp_omp.loop.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT:  omp_omp.loop.preheader:
+// CHECK-NEXT:    br label %omp_omp.loop.header
+// CHECK-EMPTY:
+// CHECK-NEXT:  omp_omp.loop.header:
+// CHECK-NEXT:    %omp_omp.loop.iv = phi i32 [ 0, %omp_omp.loop.preheader ], [ 
%omp_omp.loop.next, %omp_omp.loop.inc ]
+// CHECK-NEXT:    br label %omp_omp.loop.cond
+// CHECK-EMPTY:
+// CHECK-NEXT:  omp_omp.loop.cond:
+// CHECK-NEXT:    %omp_omp.loop.cmp = icmp ult i32 %omp_omp.loop.iv, %[[tc]]
+// CHECK-NEXT:    br i1 %omp_omp.loop.cmp, label %omp_omp.loop.body, label 
%omp_omp.loop.exit
+// CHECK-EMPTY:
+// CHECK-NEXT:  omp_omp.loop.body:
+// CHECK-NEXT:    br label %omp.loop.region
+// CHECK-EMPTY:
+// CHECK-NEXT:  omp.loop.region:
+// CHECK-NEXT:    store float 4.200000e+01, ptr %[[ptr]], align 4
+// CHECK-NEXT:    br label %omp.region.cont
+// CHECK-EMPTY:
+// CHECK-NEXT:  omp.region.cont:
+// CHECK-NEXT:    br label %omp_omp.loop.inc
+// CHECK-EMPTY:
+// CHECK-NEXT:  omp_omp.loop.inc:
+// CHECK-NEXT:    %omp_omp.loop.next = add nuw i32 %omp_omp.loop.iv, 1
+// CHECK-NEXT:    br label %omp_omp.loop.header
+// CHECK-EMPTY:
+// CHECK-NEXT:  omp_omp.loop.exit:
+// CHECK-NEXT:    br label %omp_omp.loop.after
+// CHECK-EMPTY:
+// CHECK-NEXT:  omp_omp.loop.after:
+// CHECK-NEXT:    ret void
+// CHECK-NEXT:  }
+llvm.func @anon_loop(%ptr: !llvm.ptr, %tc : i32) -> () {
+  omp.canonical_loop %iv : i32 in range(%tc) {
+    %val = llvm.mlir.constant(42.0 : f32) : f32
+    llvm.store %val, %ptr : f32, !llvm.ptr
+    omp.terminator
+  }
+  llvm.return
+}
+
+
+
+// CHECK-LABEL: define void @trivial_loop(
+// CHECK-SAME:    ptr %[[ptr:.+]],
+// CHECK-SAME:    i32 %[[tc:.+]]) {
+// CHECK-NEXT:    br label %omp_omp.loop.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT:  omp_omp.loop.preheader:
+// CHECK-NEXT:    br label %omp_omp.loop.header
+// CHECK-EMPTY:
+// CHECK-NEXT:  omp_omp.loop.header:
+// CHECK-NEXT:    %omp_omp.loop.iv = phi i32 [ 0, %omp_omp.loop.preheader ], [ 
%omp_omp.loop.next, %omp_omp.loop.inc ]
+// CHECK-NEXT:    br label %omp_omp.loop.cond
+// CHECK-EMPTY:
+// CHECK-NEXT:  omp_omp.loop.cond:
+// CHECK-NEXT:    %omp_omp.loop.cmp = icmp ult i32 %omp_omp.loop.iv, %[[tc]]
+// CHECK-NEXT:    br i1 %omp_omp.loop.cmp, label %omp_omp.loop.body, label 
%omp_omp.loop.exit
+// CHECK-EMPTY:
+// CHECK-NEXT:  omp_omp.loop.body:
+// CHECK-NEXT:    br label %omp.loop.region
+// CHECK-EMPTY:
+// CHECK-NEXT:  omp.loop.region:
+// CHECK-NEXT:    store float 4.200000e+01, ptr %[[ptr]], align 4
+// CHECK-NEXT:    br label %omp.region.cont
+// CHECK-EMPTY:
+// CHECK-NEXT:  omp.region.cont:
+// CHECK-NEXT:    br label %omp_omp.loop.inc
+// CHECK-EMPTY:
+// CHECK-NEXT:  omp_omp.loop.inc:
+// CHECK-NEXT:    %omp_omp.loop.next = add nuw i32 %omp_omp.loop.iv, 1
+// CHECK-NEXT:    br label %omp_omp.loop.header
+// CHECK-EMPTY:
+// CHECK-NEXT:  omp_omp.loop.exit:
+// CHECK-NEXT:    br label %omp_omp.loop.after
+// CHECK-EMPTY:
+// CHECK-NEXT:  omp_omp.loop.after:
+// CHECK-NEXT:    ret void
+// CHECK-NEXT:  }
+llvm.func @trivial_loop(%ptr: !llvm.ptr, %tc : i32) -> () {
+  %cli = omp.new_cli
+  omp.canonical_loop(%cli) %iv : i32 in range(%tc) {
+    %val = llvm.mlir.constant(42.0 : f32) : f32
+    llvm.store %val, %ptr : f32, !llvm.ptr
+    omp.terminator
+  }
+  llvm.return
+}
+
+
+// CHECK-LABEL: define void @nested_loop(
+// CHECK-SAME:    ptr %[[ptr:.+]], i32 %[[outer_tc:.+]], i32 %[[inner_tc:.+]]) 
{
+// CHECK-NEXT:  br label %omp_omp.loop.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.preheader:
+// CHECK-NEXT:  br label %omp_omp.loop.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.header:
+// CHECK-NEXT:  %omp_omp.loop.iv = phi i32 [ 0, %omp_omp.loop.preheader ], [ 
%omp_omp.loop.next, %omp_omp.loop.inc ]
+// CHECK-NEXT:  br label %omp_omp.loop.cond
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.cond:
+// CHECK-NEXT:  %omp_omp.loop.cmp = icmp ult i32 %omp_omp.loop.iv, 
%[[outer_tc]]
+// CHECK-NEXT:  br i1 %omp_omp.loop.cmp, label %omp_omp.loop.body, label 
%omp_omp.loop.exit
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.body:
+// CHECK-NEXT:  br label %omp.loop.region
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.loop.region:
+// CHECK-NEXT:  br label %omp_omp.loop.preheader1
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.preheader1:
+// CHECK-NEXT:  br label %omp_omp.loop.header2
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.header2:
+// CHECK-NEXT:  %omp_omp.loop.iv8 = phi i32 [ 0, %omp_omp.loop.preheader1 ], [ 
%omp_omp.loop.next10, %omp_omp.loop.inc5 ]
+// CHECK-NEXT:  br label %omp_omp.loop.cond3
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.cond3:
+// CHECK-NEXT:  %omp_omp.loop.cmp9 = icmp ult i32 %omp_omp.loop.iv8, 
%[[inner_tc]]
+// CHECK-NEXT:  br i1 %omp_omp.loop.cmp9, label %omp_omp.loop.body4, label 
%omp_omp.loop.exit6
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.body4:
+// CHECK-NEXT:  br label %omp.loop.region12
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.loop.region12:
+// CHECK-NEXT:  store float 4.200000e+01, ptr %[[ptr]], align 4
+// CHECK-NEXT:  br label %omp.region.cont11
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.region.cont11:
+// CHECK-NEXT:  br label %omp_omp.loop.inc5
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.inc5:
+// CHECK-NEXT:  %omp_omp.loop.next10 = add nuw i32 %omp_omp.loop.iv8, 1
+// CHECK-NEXT:  br label %omp_omp.loop.header2
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.exit6:
+// CHECK-NEXT:  br label %omp_omp.loop.after7
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.after7:
+// CHECK-NEXT:  br label %omp.region.cont
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.region.cont:
+// CHECK-NEXT:  br label %omp_omp.loop.inc
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.inc:
+// CHECK-NEXT:  %omp_omp.loop.next = add nuw i32 %omp_omp.loop.iv, 1
+// CHECK-NEXT:  br label %omp_omp.loop.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.exit:
+// CHECK-NEXT:  br label %omp_omp.loop.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.after:
+// CHECK-NEXT:  ret void
+// CHECK-NEXT: }
+llvm.func @nested_loop(%ptr: !llvm.ptr, %outer_tc : i32, %inner_tc : i32) -> 
() {
+  %outer_cli = omp.new_cli
+  %inner_cli = omp.new_cli
+  omp.canonical_loop(%outer_cli) %outer_iv : i32 in range(%outer_tc) {
+    omp.canonical_loop(%inner_cli) %inner_iv : i32 in range(%inner_tc) {
+      %val = llvm.mlir.constant(42.0 : f32) : f32
+      llvm.store %val, %ptr : f32, !llvm.ptr
+      omp.terminator
+    }
+    omp.terminator
+  }
+  llvm.return
+}
diff --git a/mlir/test/Target/LLVMIR/openmp-cli-unroll-heuristic01.mlir 
b/mlir/test/Target/LLVMIR/openmp-cli-unroll-heuristic01.mlir
new file mode 100644
index 0000000000000..0f0448e15f983
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-cli-unroll-heuristic01.mlir
@@ -0,0 +1,56 @@
+// Test lowering of the omp.unroll_heuristic
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+
+// CHECK-LABEL: define void @unroll_heuristic_trivial_loop(
+// CHECK-SAME:    ptr %[[ptr:.+]], i32 %[[tc:.+]]) {
+// CHECK-NEXT:   br label %omp_omp.loop.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.preheader:
+// CHECK-NEXT:   br label %omp_omp.loop.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.header:
+// CHECK-NEXT:   %omp_omp.loop.iv = phi i32 [ 0, %omp_omp.loop.preheader ], [ 
%omp_omp.loop.next, %omp_omp.loop.inc ]
+// CHECK-NEXT:   br label %omp_omp.loop.cond
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.cond:
+// CHECK-NEXT:   %omp_omp.loop.cmp = icmp ult i32 %omp_omp.loop.iv, %[[tc]]
+// CHECK-NEXT:   br i1 %omp_omp.loop.cmp, label %omp_omp.loop.body, label 
%omp_omp.loop.exit
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.body:
+// CHECK-NEXT:   br label %omp.loop.region
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.loop.region:
+// CHECK-NEXT:   store float 4.200000e+01, ptr %[[ptr]], align 4
+// CHECK-NEXT:   br label %omp.region.cont
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.region.cont:
+// CHECK-NEXT:   br label %omp_omp.loop.inc
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.inc:
+// CHECK-NEXT:   %omp_omp.loop.next = add nuw i32 %omp_omp.loop.iv, 1
+// CHECK-NEXT:   br label %omp_omp.loop.header, !llvm.loop ![[$MD1:[0-9]+]]
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.exit:
+// CHECK-NEXT:   br label %omp_omp.loop.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.after:
+// CHECK-NEXT:   ret void
+// CHECK-NEXT: }
+llvm.func @unroll_heuristic_trivial_loop(%ptr: !llvm.ptr, %tc: i32) -> () {
+  %literal_cli = omp.new_cli
+  omp.canonical_loop(%literal_cli) %iv : i32 in range(%tc) {
+    %val = llvm.mlir.constant(42.0 : f32) : f32
+    llvm.store %val, %ptr : f32, !llvm.ptr
+    omp.terminator
+  }
+  omp.unroll_heuristic(%literal_cli)
+  llvm.return
+}
+
+
+// Start of metadata
+// CHECK-LABEL: !llvm.module.flags
+
+// CHECK: ![[$MD1]] = distinct !{![[$MD1]], ![[$MD2:[0-9]+]]}
+// CHECK: ![[$MD2]] = !{!"llvm.loop.unroll.enable"}
diff --git a/mlir/test/Target/LLVMIR/openmp-cli-unroll-heuristic02.mlir 
b/mlir/test/Target/LLVMIR/openmp-cli-unroll-heuristic02.mlir
new file mode 100644
index 0000000000000..f82b4990e378e
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-cli-unroll-heuristic02.mlir
@@ -0,0 +1,93 @@
+// Test lowering of the omp.unroll_heuristic
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+
+// CHECK-LABEL: define void @unroll_heuristic_nested_loop(
+// CHECK-SAME:    ptr %[[ptr:.+]], i32 %[[outer_tc:.+]], i32 %[[inner_tc:.+]]) 
{
+// CHECK-NEXT:   br label %omp_omp.loop.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.preheader:
+// CHECK-NEXT:   br label %omp_omp.loop.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.header:
+// CHECK-NEXT:   %omp_omp.loop.iv = phi i32 [ 0, %omp_omp.loop.preheader ], [ 
%omp_omp.loop.next, %omp_omp.loop.inc ]
+// CHECK-NEXT:   br label %omp_omp.loop.cond
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.cond:
+// CHECK-NEXT:   %omp_omp.loop.cmp = icmp ult i32 %omp_omp.loop.iv, 
%[[outer_tc]]
+// CHECK-NEXT:   br i1 %omp_omp.loop.cmp, label %omp_omp.loop.body, label 
%omp_omp.loop.exit
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.body:
+// CHECK-NEXT:   br label %omp.loop.region
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.loop.region:
+// CHECK-NEXT:   br label %omp_omp.loop.preheader1
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.preheader1:
+// CHECK-NEXT:   br label %omp_omp.loop.header2
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.header2:
+// CHECK-NEXT:   %omp_omp.loop.iv8 = phi i32 [ 0, %omp_omp.loop.preheader1 ], 
[ %omp_omp.loop.next10, %omp_omp.loop.inc5 ]
+// CHECK-NEXT:   br label %omp_omp.loop.cond3
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.cond3:
+// CHECK-NEXT:   %omp_omp.loop.cmp9 = icmp ult i32 %omp_omp.loop.iv8, 
%[[inner_tc]]
+// CHECK-NEXT:   br i1 %omp_omp.loop.cmp9, label %omp_omp.loop.body4, label 
%omp_omp.loop.exit6
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.body4:
+// CHECK-NEXT:   br label %omp.loop.region12
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.loop.region12:
+// CHECK-NEXT:   store float 4.200000e+01, ptr %[[ptr]], align 4
+// CHECK-NEXT:   br label %omp.region.cont11
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.region.cont11:
+// CHECK-NEXT:   br label %omp_omp.loop.inc5
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.inc5:
+// CHECK-NEXT:   %omp_omp.loop.next10 = add nuw i32 %omp_omp.loop.iv8, 1
+// CHECK-NEXT:   br label %omp_omp.loop.header2, !llvm.loop ![[$MD1:[0-9]+]]
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.exit6:
+// CHECK-NEXT:   br label %omp_omp.loop.after7
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.after7:
+// CHECK-NEXT:   br label %omp.region.cont
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.region.cont:
+// CHECK-NEXT:   br label %omp_omp.loop.inc
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.inc:
+// CHECK-NEXT:   %omp_omp.loop.next = add nuw i32 %omp_omp.loop.iv, 1
+// CHECK-NEXT:   br label %omp_omp.loop.header, !llvm.loop ![[$MD3:[0-9]+]]
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.exit:
+// CHECK-NEXT:   br label %omp_omp.loop.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.after:
+// CHECK-NEXT:   ret void
+// CHECK-NEXT: }
+llvm.func @unroll_heuristic_nested_loop(%ptr: !llvm.ptr, %outer_tc: i32, 
%inner_tc: i32) -> () {
+  %outer_cli = omp.new_cli
+  %inner_cli = omp.new_cli
+  omp.canonical_loop(%outer_cli) %outer_iv : i32 in range(%outer_tc) {
+    omp.canonical_loop(%inner_cli) %inner_iv : i32 in range(%inner_tc) {
+      %val = llvm.mlir.constant(42.0 : f32) : f32
+      llvm.store %val, %ptr : f32, !llvm.ptr
+      omp.terminator
+    }
+    omp.terminator
+  }
+  omp.unroll_heuristic(%outer_cli)
+  omp.unroll_heuristic(%inner_cli)
+  llvm.return
+}
+
+
+// Start of metadata
+// CHECK-LABEL: !llvm.module.flags
+
+// CHECK: ![[$MD1]] = distinct !{![[$MD1]], ![[$MD2:[0-9]+]]}
+// CHECK: ![[$MD2]] = !{!"llvm.loop.unroll.enable"}
+// CHECK: ![[$MD3]] = distinct !{![[$MD3]], ![[$MD2]]}
+
diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp 
b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
index 6008ed4673d1b..cbb4030f3adb4 100644
--- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
@@ -2223,6 +2223,17 @@ generateNamedOperandGetters(const Operator &op, Class 
&opClass,
                     "'SameVariadicOperandSize' traits");
   }
 
+  // Print the ods names so they don't need to be hardcoded in the source.
+  for (int i = 0; i != numOperands; ++i) {
+    const auto &operand = op.getOperand(i);
+    if (operand.name.empty())
+      continue;
+
+    opClass.declare<Field>("static constexpr int", Twine("odsIndex_") +
+                                                       operand.name + " = " +
+                                                       Twine(i));
+  }
+
   // First emit a few "sink" getter methods upon which we layer all nicer named
   // getter methods.
   // If generating for an adaptor, the method is put into the non-templated

>From a12dca3b1d6976ff7c1b9647f4a9650a8ec3018b Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-proj...@meinersbur.de>
Date: Tue, 8 Jul 2025 01:49:13 +0200
Subject: [PATCH 2/3] Emit error on unsupported compositionPost-merge
 fixAddress commentsIntroduce isApplyee/isGeneratee

---
 flang/lib/Lower/OpenMP/OpenMP.cpp             | 46 +++++++++++++++++--
 ...nested-loop-transformation-construct01.f90 |  2 +-
 ...nested-loop-transformation-construct02.f90 |  4 +-
 .../mlir/Dialect/OpenMP/OpenMPDialect.h       |  5 --
 4 files changed, 46 insertions(+), 11 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 9d8cab02b2c9e..0f568b3626515 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -64,6 +64,28 @@ static void processHostEvalClauses(lower::AbstractConverter 
&converter,
                                    lower::pft::Evaluation &eval,
                                    mlir::Location loc);
 
+static llvm::omp::Directive
+getOpenMPDirectiveEnum(const parser::OmpLoopDirective &beginStatment) {
+  return beginStatment.v;
+}
+
+static llvm::omp::Directive getOpenMPDirectiveEnum(
+    const parser::OmpBeginLoopDirective &beginLoopDirective) {
+  return getOpenMPDirectiveEnum(
+      std::get<parser::OmpLoopDirective>(beginLoopDirective.t));
+}
+
+static llvm::omp::Directive
+getOpenMPDirectiveEnum(const parser::OpenMPLoopConstruct &ompLoopConstruct) {
+  return getOpenMPDirectiveEnum(
+      std::get<parser::OmpBeginLoopDirective>(ompLoopConstruct.t));
+}
+
+static llvm::omp::Directive getOpenMPDirectiveEnum(
+    const common::Indirection<parser::OpenMPLoopConstruct> &ompLoopConstruct) {
+  return getOpenMPDirectiveEnum(ompLoopConstruct.value());
+}
+
 namespace {
 /// Structure holding information that is needed to pass host-evaluated
 /// information to later lowering stages.
@@ -3489,6 +3511,11 @@ static void genOMPDispatch(lower::AbstractConverter 
&converter,
     newOp = genTeamsOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue,
                        item);
     break;
+  case llvm::omp::Directive::OMPD_tile: {
+    unsigned version = semaCtx.langOptions().OpenMPVersion;
+    TODO(loc, "Unhandled loop directive (" +
+                  llvm::omp::getOpenMPDirectiveName(dir, version) + ")");
+  }
   case llvm::omp::Directive::OMPD_unroll:
     genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item);
     break;
@@ -3927,12 +3954,25 @@ static void genOMP(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
     if (auto *ompNestedLoopCons{
             std::get_if<common::Indirection<parser::OpenMPLoopConstruct>>(
                 &*optLoopCons)}) {
-      genOMP(converter, symTable, semaCtx, eval, ompNestedLoopCons->value());
+      llvm::omp::Directive nestedDirective =
+          getOpenMPDirectiveEnum(*ompNestedLoopCons);
+      switch (nestedDirective) {
+      case llvm::omp::Directive::OMPD_tile:
+        // Emit the omp.loop_nest with annotation for tiling
+        genOMP(converter, symTable, semaCtx, eval, ompNestedLoopCons->value());
+        break;
+      default: {
+        unsigned version = semaCtx.langOptions().OpenMPVersion;
+        TODO(currentLocation,
+             "Applying a loop-associated on the loop generated by the " +
+                 llvm::omp::getOpenMPDirectiveName(nestedDirective, version) +
+                 " construct");
+      }
+      }
     }
   }
 
-  llvm::omp::Directive directive =
-      std::get<parser::OmpLoopDirective>(beginLoopDirective.t).v;
+  llvm::omp::Directive directive = getOpenMPDirectiveEnum(beginLoopDirective);
   const parser::CharBlock &source =
       std::get<parser::OmpLoopDirective>(beginLoopDirective.t).source;
   ConstructQueue queue{
diff --git a/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90 
b/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90
index a76e7e52100db..17eba93a7405d 100644
--- a/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90
+++ b/flang/test/Lower/OpenMP/nested-loop-transformation-construct01.f90
@@ -1,6 +1,6 @@
 ! Test to ensure TODO message is emitted for tile OpenMP 5.1 Directives when 
they are nested.
 
-!RUN: not %flang -fopenmp -fopenmp-version=51 %s 2>&1 | FileCheck %s
+!RUN: not %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 -o - %s 2>&1 | 
FileCheck %s
 
 subroutine loop_transformation_construct
   implicit none
diff --git a/flang/test/Lower/OpenMP/nested-loop-transformation-construct02.f90 
b/flang/test/Lower/OpenMP/nested-loop-transformation-construct02.f90
index 33b7c5a917619..cdc628a3b2e64 100644
--- a/flang/test/Lower/OpenMP/nested-loop-transformation-construct02.f90
+++ b/flang/test/Lower/OpenMP/nested-loop-transformation-construct02.f90
@@ -1,6 +1,6 @@
 ! Test to ensure TODO message is emitted for unroll OpenMP 5.1 Directives when 
they are nested.
 
-!RUN: not %flang -fopenmp -fopenmp-version=51 %s 2>&1 | FileCheck %s
+!RUN: not %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 -o - %s 2>&1 | 
FileCheck %s
 
 program loop_transformation_construct
   implicit none
@@ -17,4 +17,4 @@ program loop_transformation_construct
   !$omp end do
 end program loop_transformation_construct
 
-!CHECK: not yet implemented: Unhandled loop directive (unroll)
+!CHECK: not yet implemented: Applying a loop-associated on the loop generated 
by the unroll construct
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h 
b/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h
index 0233f9c19e726..7cf738352ba47 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h
@@ -25,11 +25,6 @@
 #include "mlir/Interfaces/SideEffectInterfaces.h"
 #include "llvm/Frontend/OpenMP/OMPDeviceConstants.h"
 
-namespace mlir::omp {
-/// Find the omp.new_cli, generator, and consumer of a canonical loop info.
-std::tuple<NewCliOp, OpOperand *, OpOperand *> decodeCli(mlir::Value cli);
-} // namespace mlir::omp
-
 #define GET_TYPEDEF_CLASSES
 #include "mlir/Dialect/OpenMP/OpenMPOpsTypes.h.inc"
 

>From 45773330d7f0ef3095badb82cf937fdf9fb0f2ee Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-proj...@meinersbur.de>
Date: Tue, 8 Jul 2025 12:41:52 +0200
Subject: [PATCH 3/3] Address review comments

---
 flang/lib/Lower/OpenMP/OpenMP.cpp | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp 
b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 0f568b3626515..db353247bc2fd 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -2191,7 +2191,9 @@ genCanonicalLoopOp(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
 
   auto &nestedEval = eval.getFirstNestedEvaluation();
   if (nestedEval.getIf<parser::DoConstruct>()->IsDoConcurrent()) {
-    TODO(loc, "Do Concurrent in unroll construct");
+    // OpenMP specifies DO CONCURRENT only with the `!omp loop` construct. Will
+    // need to add special cases for this combination.
+    TODO(loc, "DO CONCURRENT as canonical loop not supported");
   }
 
   // Get the loop bounds (and increment)
@@ -2211,11 +2213,11 @@ genCanonicalLoopOp(lower::AbstractConverter &converter, 
lower::SymMap &symTable,
     if (bounds->step) {
       return fir::getBase(
           converter.genExprValue(*semantics::GetExpr(bounds->step), stmtCtx));
-    } else {
-      // If `step` is not present, assume it is `1`.
-      return firOpBuilder.createIntegerConstant(loc, firOpBuilder.getI32Type(),
-                                                1);
     }
+
+    // If `step` is not present, assume it is `1`.
+    return firOpBuilder.createIntegerConstant(loc, firOpBuilder.getI32Type(),
+                                              1);
   }();
 
   // Get the integer kind for the loop variable and cast the loop bounds

_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to