[flang-commits] [flang] [mlir] [mlir][omp] Add omp.tile operation (PR #160292)

Michael Kruse via flang-commits flang-commits at lists.llvm.org
Thu Oct 2 09:52:55 PDT 2025


https://github.com/Meinersbur updated https://github.com/llvm/llvm-project/pull/160292

>From b3919715ebe223b39dd789dcd471a864666d7008 Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Fri, 19 Sep 2025 14:43:48 +0200
Subject: [PATCH 1/8] Improve canonloop/iv naming

---
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  | 237 +++++++++++++-----
 .../Dialect/OpenMP/cli-canonical_loop.mlir    | 127 ++++++++--
 .../Dialect/OpenMP/cli-unroll-heuristic.mlir  |  28 +--
 3 files changed, 292 insertions(+), 100 deletions(-)

diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 3d70e28ed23ab..cf549a6bb50b4 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -77,6 +77,178 @@ struct LLVMPointerPointerLikeModel
 };
 } // namespace
 
+/// Generate a name of a canonical loop nest of the format
+/// `<prefix>(_s<num>_r<num>)*` that describes its nesting inside parent
+/// operations (`_r<num>`) and that operation's region (`_s<num>`). The region
+/// number is omitted if the parent operation has just one region. If a loop
+/// nest just consists of canonical loops nested inside each other, also uses
+/// `d<num>` where <num> is the nesting depth of the loop.
+static std::string generateLoopNestingName(StringRef prefix,
+                                           CanonicalLoopOp op) {
+  struct Component {
+    // An region argument of an operation
+    Operation *parentOp;
+    size_t regionInOpIdx;
+    bool isOnlyRegionInOp;
+    bool skipRegion;
+
+    // An operation somewhere in a parent region
+    Operation *thisOp;
+    Region *parentRegion;
+    size_t opInRegionIdx;
+    bool isOnlyOpInRegion;
+    bool skipOp;
+    int depth = -1;
+  };
+  SmallVector<Component> components;
+
+  // Gather a list of parent regions and operations, and the position within
+  // their parent
+  Operation *o = op.getOperation();
+  while (o) {
+    if (o->hasTrait<mlir::OpTrait::IsIsolatedFromAbove>())
+      break;
+
+    // Operation within a region
+    Region *r = o->getParentRegion();
+    if (!r)
+      break;
+
+    llvm::ReversePostOrderTraversal<Block *> traversal(&r->getBlocks().front());
+    size_t idx = 0;
+    bool found = false;
+    size_t sequentialIdx = -1;
+    bool isOnlyLoop = true;
+    for (Block *b : traversal) {
+      for (Operation &op : *b) {
+        if (&op == o && !found) {
+          sequentialIdx = idx;
+          found = true;
+        }
+        if (op.getNumRegions()) {
+          idx += 1;
+          if (idx > 1)
+            isOnlyLoop = false;
+        }
+        if (found && !isOnlyLoop)
+          break;
+      }
+    }
+
+    Component &comp = components.emplace_back();
+    comp.thisOp = o;
+    comp.parentRegion = r;
+    comp.opInRegionIdx = sequentialIdx;
+    comp.isOnlyOpInRegion = isOnlyLoop;
+
+    // Region argument of an operation
+    Operation *parent = r->getParentOp();
+
+    comp.parentOp = parent;
+    comp.regionInOpIdx = 0;
+    comp.isOnlyRegionInOp = true;
+    if (parent && parent->getRegions().size() > 1) {
+      auto getRegionIndex = [](Operation *o, Region *r) {
+        for (auto [idx, region] : llvm::enumerate(o->getRegions())) {
+          if (&region == r)
+            return idx;
+        }
+        llvm_unreachable("Region not child of its parent operation");
+      };
+      comp.regionInOpIdx = getRegionIndex(parent, r);
+      comp.isOnlyRegionInOp = false;
+    }
+
+    if (!parent)
+      break;
+
+    // next parent
+    o = parent;
+  }
+
+  // Reorder components from outermost to innermost
+  std::reverse(components.begin(), components.end());
+
+  // Determine whether a component is not needed
+  for (auto &c : components) {
+    c.skipRegion = c.isOnlyRegionInOp;
+    c.skipOp = c.isOnlyOpInRegion && !isa<CanonicalLoopOp>(c.thisOp);
+  }
+
+  // Find runs of perfect nests and merge them into a single component
+  int curNestRoot = 0;
+  int curNestDepth = 1;
+  auto mergeLoopNest = [&](int innermost) {
+    auto outermost = curNestRoot;
+
+    // Don't do enything if it does not consist of at least 2 loops
+    if (outermost < innermost) {
+      for (auto i : llvm::seq<int>(outermost + 1, innermost)) {
+        components[i].skipOp = true;
+      }
+      components[innermost].depth = curNestDepth;
+    }
+
+    // Start new root
+    curNestRoot = innermost + 1;
+    curNestDepth = 1;
+  };
+  for (auto &&[i, c] : llvm::enumerate(components)) {
+    if (i <= curNestRoot)
+      continue;
+
+    // Check whether this region can be included
+    if (!c.skipRegion) {
+      mergeLoopNest(i);
+      continue;
+    }
+
+    if (c.skipOp)
+      continue;
+
+    if (!c.isOnlyOpInRegion) {
+      mergeLoopNest(i);
+      continue;
+    }
+
+    curNestDepth += 1;
+  }
+
+  // Finalize innermost loop nest
+  mergeLoopNest(components.size() - 1);
+
+  // Outermost loop does not need a suffix if it has no sibling
+  for (auto &c : components) {
+    if (c.skipOp)
+      continue;
+    if (c.isOnlyOpInRegion)
+      c.skipOp = true;
+    break;
+  }
+
+  // Compile name
+  SmallString<64> Name{prefix};
+  for (auto &c : components) {
+    auto addComponent = [&Name](char letter, int64_t idx) {
+      Name += '_';
+      Name += letter;
+      Name += idx;
+    };
+
+    if (!c.skipRegion)
+      addComponent('r', c.regionInOpIdx);
+
+    if (!c.skipOp) {
+      if (c.depth >= 0)
+        addComponent('d', c.depth - 1);
+      else
+        addComponent('s', c.opInRegionIdx);
+    }
+  }
+
+  return Name.str().str();
+}
+
 void OpenMPDialect::initialize() {
   addOperations<
 #define GET_OP_LIST
@@ -3141,67 +3313,7 @@ void NewCliOp::getAsmResultNames(OpAsmSetValueNameFn setNameFn) {
     cliName =
         TypeSwitch<Operation *, std::string>(gen->getOwner())
             .Case([&](CanonicalLoopOp op) {
-              // Find the canonical loop nesting: For each ancestor add a
-              // "+_r<idx>" suffix (in reverse order)
-              SmallVector<std::string> components;
-              Operation *o = op.getOperation();
-              while (o) {
-                if (o->hasTrait<mlir::OpTrait::IsIsolatedFromAbove>())
-                  break;
-
-                Region *r = o->getParentRegion();
-                if (!r)
-                  break;
-
-                auto getSequentialIndex = [](Region *r, Operation *o) {
-                  llvm::ReversePostOrderTraversal<Block *> traversal(
-                      &r->getBlocks().front());
-                  size_t idx = 0;
-                  for (Block *b : traversal) {
-                    for (Operation &op : *b) {
-                      if (&op == o)
-                        return idx;
-                      // Only consider operations that are containers as
-                      // possible children
-                      if (!op.getRegions().empty())
-                        idx += 1;
-                    }
-                  }
-                  llvm_unreachable("Operation not part of the region");
-                };
-                size_t sequentialIdx = getSequentialIndex(r, o);
-                components.push_back(("s" + Twine(sequentialIdx)).str());
-
-                Operation *parent = r->getParentOp();
-                if (!parent)
-                  break;
-
-                // If the operation has more than one region, also count in
-                // which of the regions
-                if (parent->getRegions().size() > 1) {
-                  auto getRegionIndex = [](Operation *o, Region *r) {
-                    for (auto [idx, region] :
-                         llvm::enumerate(o->getRegions())) {
-                      if (&region == r)
-                        return idx;
-                    }
-                    llvm_unreachable("Region not child its parent operation");
-                  };
-                  size_t regionIdx = getRegionIndex(parent, r);
-                  components.push_back(("r" + Twine(regionIdx)).str());
-                }
-
-                // next parent
-                o = parent;
-              }
-
-              SmallString<64> Name("canonloop");
-              for (const std::string &s : reverse(components)) {
-                Name += '_';
-                Name += s;
-              }
-
-              return Name;
+              return generateLoopNestingName("canonloop", op);
             })
             .Case([&](UnrollHeuristicOp op) -> std::string {
               llvm_unreachable("heuristic unrolling does not generate a loop");
@@ -3292,7 +3404,8 @@ void CanonicalLoopOp::getAsmBlockNames(OpAsmSetBlockNameFn setNameFn) {
 
 void CanonicalLoopOp::getAsmBlockArgumentNames(Region &region,
                                                OpAsmSetValueNameFn setNameFn) {
-  setNameFn(region.getArgument(0), "iv");
+  std::string ivName = generateLoopNestingName("iv", *this);
+  setNameFn(region.getArgument(0), ivName);
 }
 
 void CanonicalLoopOp::print(OpAsmPrinter &p) {
diff --git a/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir b/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir
index adadb8bbac49d..874e3922805ec 100644
--- a/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir
+++ b/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir
@@ -1,5 +1,5 @@
-// RUN: mlir-opt %s | FileCheck %s
-// RUN: mlir-opt %s | mlir-opt | FileCheck %s
+// RUN: mlir-opt %s | FileCheck %s --enable-var-scope
+// RUN: mlir-opt %s | mlir-opt | FileCheck %s --enable-var-scope
 
 
 // CHECK-LABEL: @omp_canonloop_raw(
@@ -24,10 +24,10 @@ func.func @omp_canonloop_raw(%tc : i32) -> () {
 func.func @omp_canonloop_sequential_raw(%tc : i32) -> () {
   // CHECK-NEXT: %canonloop_s0 = omp.new_cli
   %canonloop_s0 = "omp.new_cli" () : () -> (!omp.cli)
-  // CHECK-NEXT:  omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
+  // CHECK-NEXT:  omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%[[tc]]) {
   "omp.canonical_loop" (%tc, %canonloop_s0) ({
     ^bb_first(%iv_first: i32):
-      // CHECK-NEXT: = llvm.add %iv, %iv : i32
+      // CHECK-NEXT: = llvm.add %iv_s0, %iv_s0 : i32
       %newval = llvm.add %iv_first, %iv_first : i32
     // CHECK-NEXT: omp.terminator
     omp.terminator
@@ -36,7 +36,7 @@ func.func @omp_canonloop_sequential_raw(%tc : i32) -> () {
 
   // CHECK-NEXT: %canonloop_s1 = omp.new_cli
   %canonloop_s1 = "omp.new_cli" () : () -> (!omp.cli)
-  // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv : i32 in range(%[[tc]]) {
+  // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%[[tc]]) {
   "omp.canonical_loop" (%tc, %canonloop_s1) ({
     ^bb_second(%iv_second: i32):
     // CHECK: omp.terminator
@@ -52,17 +52,17 @@ func.func @omp_canonloop_sequential_raw(%tc : i32) -> () {
 // CHECK-LABEL: @omp_nested_canonloop_raw(
 // CHECK-SAME: %[[tc_outer:.+]]: i32, %[[tc_inner:.+]]: i32)
 func.func @omp_nested_canonloop_raw(%tc_outer : i32, %tc_inner : i32) -> () {
-  // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+  // CHECK-NEXT: %canonloop = omp.new_cli
   %outer = "omp.new_cli" () : () -> (!omp.cli)
-  // CHECK-NEXT: %canonloop_s0_s0 = omp.new_cli
+  // CHECK-NEXT: %canonloop_d1 = omp.new_cli
   %inner = "omp.new_cli" () : () -> (!omp.cli)
-  // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc_outer]]) {
+  // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc_outer]]) {
   "omp.canonical_loop" (%tc_outer, %outer) ({
     ^bb_outer(%iv_outer: i32):
-      // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%[[tc_inner]]) {
+      // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc_inner]]) {
       "omp.canonical_loop" (%tc_inner, %inner) ({
         ^bb_inner(%iv_inner: i32):
-          // CHECK-NEXT: = llvm.add %iv, %iv_0 : i32
+          // CHECK-NEXT: = llvm.add %iv, %iv_d1 : i32
           %newval = llvm.add %iv_outer, %iv_inner: i32
           // CHECK-NEXT: omp.terminator
           omp.terminator
@@ -108,16 +108,24 @@ func.func @omp_canonloop_constant_pretty() -> () {
 func.func @omp_canonloop_sequential_pretty(%tc : i32) -> () {
   // CHECK-NEXT: %canonloop_s0 = omp.new_cli
   %canonloop_s0 = omp.new_cli
-  // CHECK-NEXT:  omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
-  omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%tc) {
+  // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%[[tc]]) {
+  omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%tc) {
     // CHECK-NEXT: omp.terminator
     omp.terminator
   }
 
   // CHECK: %canonloop_s1 = omp.new_cli
   %canonloop_s1 = omp.new_cli
-  // CHECK-NEXT:  omp.canonical_loop(%canonloop_s1) %iv : i32 in range(%[[tc]]) {
-  omp.canonical_loop(%canonloop_s1) %iv_0 : i32 in range(%tc) {
+  // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%[[tc]]) {
+  omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%tc) {
+    // CHECK-NEXT: omp.terminator
+    omp.terminator
+  }
+
+  // CHECK: %canonloop_s2 = omp.new_cli
+  %canonloop_s2 = omp.new_cli
+  // CHECK-NEXT: omp.canonical_loop(%canonloop_s2) %iv_s2 : i32 in range(%[[tc]]) {
+  omp.canonical_loop(%canonloop_s2) %iv_s2 : i32 in range(%tc) {
     // CHECK-NEXT: omp.terminator
     omp.terminator
   }
@@ -126,17 +134,17 @@ func.func @omp_canonloop_sequential_pretty(%tc : i32) -> () {
 }
 
 
-// CHECK-LABEL: @omp_canonloop_nested_pretty(
+// CHECK-LABEL: @omp_canonloop_2d_nested_pretty(
 // CHECK-SAME: %[[tc:.+]]: i32)
-func.func @omp_canonloop_nested_pretty(%tc : i32) -> () {
-  // CHECK-NEXT: %canonloop_s0 = omp.new_cli
-  %canonloop_s0 = omp.new_cli
-  // CHECK-NEXT: %canonloop_s0_s0 = omp.new_cli
-  %canonloop_s0_s0 = omp.new_cli
-  // CHECK-NEXT:  omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
-  omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%tc) {
-    // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%[[tc]]) {
-    omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%tc) {
+func.func @omp_canonloop_2d_nested_pretty(%tc : i32) -> () {
+  // CHECK-NEXT: %canonloop = omp.new_cli
+  %canonloop = omp.new_cli
+  // CHECK-NEXT: %canonloop_d1 = omp.new_cli
+  %canonloop_d1 = omp.new_cli
+  // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
+  omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
+    // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc]]) {
+    omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%tc) {
       // CHECK: omp.terminator
       omp.terminator
     }
@@ -147,6 +155,77 @@ func.func @omp_canonloop_nested_pretty(%tc : i32) -> () {
 }
 
 
+// CHECK-LABEL: @omp_canonloop_3d_nested_pretty(
+// CHECK-SAME: %[[tc:.+]]: i32)
+func.func @omp_canonloop_3d_nested_pretty(%tc : i32) -> () {
+  // CHECK: %canonloop = omp.new_cli
+  %canonloop = omp.new_cli
+  // CHECK: %canonloop_d1 = omp.new_cli
+  %canonloop_d1 = omp.new_cli
+  // CHECK: %canonloop_d2 = omp.new_cli
+  %canonloop_d2 = omp.new_cli
+  // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
+  omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
+    // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc]]) {
+    omp.canonical_loop(%canonloop_d1) %iv_1d : i32 in range(%tc) {
+      // CHECK-NEXT: omp.canonical_loop(%canonloop_d2) %iv_d2 : i32 in range(%[[tc]]) {
+      omp.canonical_loop(%canonloop_d2) %iv_d2 : i32 in range(%tc) {
+        // CHECK-NEXT: omp.terminator
+        omp.terminator
+      // CHECK-NEXT: }
+      }
+      // CHECK-NEXT: omp.terminator
+      omp.terminator
+    // CHECK-NEXT: }
+    }
+    // CHECK-NEXT: omp.terminator
+    omp.terminator
+  }
+
+  return
+}
+
+
+// CHECK-LABEL: @omp_canonloop_sequential_nested_pretty(
+// CHECK-SAME: %[[tc:.+]]: i32)
+func.func @omp_canonloop_sequential_nested_pretty(%tc : i32) -> () {
+  // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+  %canonloop_s0 = omp.new_cli
+  // CHECK-NEXT: %canonloop_s0_d1 = omp.new_cli
+  %canonloop_s0_d1 = omp.new_cli
+  // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%[[tc]]) {
+  omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%tc) {
+   // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_d1) %iv_s0_d1 : i32 in range(%[[tc]]) {
+    omp.canonical_loop(%canonloop_s0_d1) %iv_s0_d1 : i32 in range(%tc) {
+      // CHECK-NEXT: omp.terminator
+      omp.terminator
+    // CHECK-NEXT: }
+    }
+    // CHECK-NEXT: omp.terminator
+    omp.terminator
+  // CHECK-NEXT: }
+  }
+
+  // CHECK-NEXT: %canonloop_s1 = omp.new_cli
+  %canonloop_s1 = omp.new_cli
+  // CHECK-NEXT: %canonloop_s1_d1 = omp.new_cli
+  %canonloop_s1_d1 = omp.new_cli
+  // CHECK-NEXT:  omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%[[tc]]) {
+  omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%tc) {
+    // CHECK-NEXT:  omp.canonical_loop(%canonloop_s1_d1) %iv_s1_d1 : i32 in range(%[[tc]]) {
+    omp.canonical_loop(%canonloop_s1_d1) %iv_s1d1 : i32 in range(%tc) {
+      // CHECK-NEXT: omp.terminator
+      omp.terminator
+    // CHECK-NEXT: }
+    }
+    // CHECK-NEXT: omp.terminator
+    omp.terminator
+  }
+
+  return
+}
+
+
 // CHECK-LABEL: @omp_newcli_unused(
 // CHECK-SAME: )
 func.func @omp_newcli_unused() -> () {
diff --git a/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir b/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir
index cda7d0b500166..16884f4245e76 100644
--- a/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir
+++ b/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir
@@ -1,18 +1,18 @@
-// RUN: mlir-opt %s            | FileCheck %s
-// RUN: mlir-opt %s | mlir-opt | FileCheck %s
+// RUN: mlir-opt %s            | FileCheck %s --enable-var-scope
+// RUN: mlir-opt %s | mlir-opt | FileCheck %s --enable-var-scope
 
 
 // CHECK-LABEL: @omp_unroll_heuristic_raw(
 // CHECK-SAME: %[[tc:.+]]: i32) {
 func.func @omp_unroll_heuristic_raw(%tc : i32) -> () {
-  // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+  // CHECK-NEXT: %canonloop = omp.new_cli
   %canonloop = "omp.new_cli" () : () -> (!omp.cli)
-  // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
+  // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
   "omp.canonical_loop" (%tc, %canonloop) ({
     ^bb0(%iv: i32):
       omp.terminator
   }) : (i32, !omp.cli) -> ()
-  // CHECK: omp.unroll_heuristic(%canonloop_s0)
+  // CHECK: omp.unroll_heuristic(%canonloop)
   "omp.unroll_heuristic" (%canonloop) : (!omp.cli) -> ()
   return
 }
@@ -22,12 +22,12 @@ func.func @omp_unroll_heuristic_raw(%tc : i32) -> () {
 // CHECK-SAME: %[[tc:.+]]: i32) {
 func.func @omp_unroll_heuristic_pretty(%tc : i32) -> () {
   // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli
-  %canonloop = "omp.new_cli" () : () -> (!omp.cli)
-  // CHECK-NEXT:  omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
+  %canonloop = omp.new_cli
+  // CHECK-NEXT:  omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
   omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
     omp.terminator
   }
-  // CHECK: omp.unroll_heuristic(%canonloop_s0)
+  // CHECK: omp.unroll_heuristic(%canonloop)
   omp.unroll_heuristic(%canonloop)
   return
 }
@@ -36,13 +36,13 @@ func.func @omp_unroll_heuristic_pretty(%tc : i32) -> () {
 // CHECK-LABEL: @omp_unroll_heuristic_nested_pretty(
 // CHECK-SAME: %[[tc:.+]]: i32) {
 func.func @omp_unroll_heuristic_nested_pretty(%tc : i32) -> () {
-  // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+  // CHECK-NEXT: %canonloop = omp.new_cli
   %cli_outer = omp.new_cli
-  // CHECK-NEXT: %canonloop_s0_s0 = omp.new_cli
+  // CHECK-NEXT: %canonloop_d1 = omp.new_cli
   %cli_inner = omp.new_cli
-  // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
+  // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
   omp.canonical_loop(%cli_outer) %iv_outer : i32 in range(%tc) {
-    // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%[[tc]]) {
+    // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc]]) {
     omp.canonical_loop(%cli_inner) %iv_inner : i32 in range(%tc) {
       // CHECK: omp.terminator
       omp.terminator
@@ -51,9 +51,9 @@ func.func @omp_unroll_heuristic_nested_pretty(%tc : i32) -> () {
     omp.terminator
   }
 
-  // CHECK: omp.unroll_heuristic(%canonloop_s0)
+  // CHECK: omp.unroll_heuristic(%canonloop)
   omp.unroll_heuristic(%cli_outer)
-  // CHECK-NEXT: omp.unroll_heuristic(%canonloop_s0_s0)
+  // CHECK-NEXT: omp.unroll_heuristic(%canonloop_d1)
   omp.unroll_heuristic(%cli_inner)
   return
 }

>From ce66eec648f6415c199d6115f3be4d188eee59ba Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Tue, 23 Sep 2025 12:19:41 +0200
Subject: [PATCH 2/8] Avoid compiler warning

---
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index cf549a6bb50b4..1674891410194 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -170,22 +170,21 @@ static std::string generateLoopNestingName(StringRef prefix,
   std::reverse(components.begin(), components.end());
 
   // Determine whether a component is not needed
-  for (auto &c : components) {
+  for (Component &c : components) {
     c.skipRegion = c.isOnlyRegionInOp;
     c.skipOp = c.isOnlyOpInRegion && !isa<CanonicalLoopOp>(c.thisOp);
   }
 
   // Find runs of perfect nests and merge them into a single component
-  int curNestRoot = 0;
-  int curNestDepth = 1;
-  auto mergeLoopNest = [&](int innermost) {
-    auto outermost = curNestRoot;
+  size_t curNestRoot = 0;
+  size_t curNestDepth = 1;
+  auto mergeLoopNest = [&](size_t innermost) {
+    size_t outermost = curNestRoot;
 
     // Don't do enything if it does not consist of at least 2 loops
     if (outermost < innermost) {
-      for (auto i : llvm::seq<int>(outermost + 1, innermost)) {
+      for (auto i : llvm::seq<int>(outermost + 1, innermost))
         components[i].skipOp = true;
-      }
       components[innermost].depth = curNestDepth;
     }
 

>From 3a141d6729e26a7eab821b86eee240ab4bfa322f Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Tue, 23 Sep 2025 12:59:14 +0200
Subject: [PATCH 3/8] Add perfect-nest and rectangular loop nest tests

---
 flang/lib/Semantics/resolve-directives.cpp | 146 ++++++++++++++++++++-
 flang/test/Semantics/OpenMP/do08.f90       |   1 +
 flang/test/Semantics/OpenMP/do13.f90       |   1 +
 flang/test/Semantics/OpenMP/do22.f90       |  73 +++++++++++
 4 files changed, 215 insertions(+), 6 deletions(-)
 create mode 100644 flang/test/Semantics/OpenMP/do22.f90

diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index 2d1bec9968593..5f2c9f676099c 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -149,6 +149,9 @@ template <typename T> class DirectiveAttributeVisitor {
     dataSharingAttributeObjects_.clear();
   }
   bool HasDataSharingAttributeObject(const Symbol &);
+  std::tuple<const parser::Name *, const parser::ScalarExpr *,
+      const parser::ScalarExpr *, const parser::ScalarExpr *>
+  GetLoopBounds(const parser::DoConstruct &);
   const parser::Name *GetLoopIndex(const parser::DoConstruct &);
   const parser::DoConstruct *GetDoConstructIf(
       const parser::ExecutionPartConstruct &);
@@ -933,6 +936,13 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor<llvm::omp::Directive> {
     privateDataSharingAttributeObjects_.clear();
   }
 
+  /// Check that loops in the loop nest are perfectly nested, as well that lower
+  /// bound, upper bound, and step expressions do not use the iv
+  /// of a surrounding loop of the associated loops nest.
+  /// We do not support non-perfectly nested loops not non-rectangular loops yet
+  /// (both introduced in OpenMP 5.0)
+  void CheckPerfectNestAndRectangularLoop(const parser::OpenMPLoopConstruct &x);
+
   // Predetermined DSA rules
   void PrivatizeAssociatedLoopIndexAndCheckLoopLevel(
       const parser::OpenMPLoopConstruct &);
@@ -1009,14 +1019,15 @@ bool DirectiveAttributeVisitor<T>::HasDataSharingAttributeObject(
 }
 
 template <typename T>
-const parser::Name *DirectiveAttributeVisitor<T>::GetLoopIndex(
-    const parser::DoConstruct &x) {
+std::tuple<const parser::Name *, const parser::ScalarExpr *,
+    const parser::ScalarExpr *, const parser::ScalarExpr *>
+DirectiveAttributeVisitor<T>::GetLoopBounds(const parser::DoConstruct &x) {
   using Bounds = parser::LoopControl::Bounds;
   if (x.GetLoopControl()) {
     if (const Bounds * b{std::get_if<Bounds>(&x.GetLoopControl()->u)}) {
-      return &b->name.thing;
-    } else {
-      return nullptr;
+      auto &&step = b->step;
+      return {&b->name.thing, &b->lower, &b->upper,
+          step.has_value() ? &step.value() : nullptr};
     }
   } else {
     context_
@@ -1024,8 +1035,15 @@ const parser::Name *DirectiveAttributeVisitor<T>::GetLoopIndex(
             "Loop control is not present in the DO LOOP"_err_en_US)
         .Attach(GetContext().directiveSource,
             "associated with the enclosing LOOP construct"_en_US);
-    return nullptr;
   }
+  return {nullptr, nullptr, nullptr, nullptr};
+}
+
+template <typename T>
+const parser::Name *DirectiveAttributeVisitor<T>::GetLoopIndex(
+    const parser::DoConstruct &x) {
+  auto &&[iv, lb, ub, step] = GetLoopBounds(x);
+  return iv;
 }
 
 template <typename T>
@@ -1957,6 +1975,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) {
       }
     }
   }
+  CheckPerfectNestAndRectangularLoop(x);
   PrivatizeAssociatedLoopIndexAndCheckLoopLevel(x);
   ordCollapseLevel = GetNumAffectedLoopsFromLoopConstruct(x) + 1;
   return true;
@@ -2152,6 +2171,121 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses(
   }
 }
 
+void OmpAttributeVisitor::CheckPerfectNestAndRectangularLoop(
+    const parser::OpenMPLoopConstruct
+        &x) { // GetAssociatedLoopLevelFromClauses(clauseList);
+  auto &&dirContext = GetContext();
+  std::int64_t dirDepth{dirContext.associatedLoopLevel};
+  if (dirDepth <= 0)
+    return;
+
+  Symbol::Flag ivDSA;
+  if (!llvm::omp::allSimdSet.test(GetContext().directive)) {
+    ivDSA = Symbol::Flag::OmpPrivate;
+  } else if (dirDepth == 1) {
+    ivDSA = Symbol::Flag::OmpLinear;
+  } else {
+    ivDSA = Symbol::Flag::OmpLastPrivate;
+  }
+
+  auto checkExprHasSymbols = [&](llvm::SmallVector<Symbol *> &ivs,
+                                 const parser::ScalarExpr *bound) {
+    if (ivs.empty())
+      return;
+
+    if (auto boundExpr{semantics::AnalyzeExpr(context_, *bound)}) {
+      semantics::UnorderedSymbolSet boundSyms =
+          evaluate::CollectSymbols(*boundExpr);
+      for (auto iv : ivs) {
+        if (boundSyms.count(*iv) != 0) {
+          // TODO: Point to occurence of iv in boundExpr, directiveSource as a
+          // note
+          context_.Say(dirContext.directiveSource,
+              "Trip count must be computable and invariant"_err_en_US);
+        }
+      }
+    }
+  };
+
+  // Skip over loop transformation directives
+  const parser::OpenMPLoopConstruct *innerMostLoop = &x;
+  const parser::NestedConstruct *innerMostNest = nullptr;
+  while (auto &optLoopCons{
+      std::get<std::optional<parser::NestedConstruct>>(innerMostLoop->t)}) {
+    innerMostNest = &(optLoopCons.value());
+    if (const auto *innerLoop{
+            std::get_if<common::Indirection<parser::OpenMPLoopConstruct>>(
+                innerMostNest)}) {
+      innerMostLoop = &(innerLoop->value());
+    } else
+      break;
+  }
+
+  if (!innerMostNest)
+    return;
+  const auto &outer{std::get_if<parser::DoConstruct>(innerMostNest)};
+  if (!outer)
+    return;
+
+  llvm::SmallVector<Symbol *> ivs;
+  int curLevel = 0;
+  const parser::DoConstruct *loop{outer};
+  while (true) {
+    auto [iv, lb, ub, step] = GetLoopBounds(*loop);
+
+    if (lb)
+      checkExprHasSymbols(ivs, lb);
+    if (ub)
+      checkExprHasSymbols(ivs, ub);
+    if (step)
+      checkExprHasSymbols(ivs, step);
+    if (iv) {
+      if (auto *symbol{ResolveOmp(*iv, ivDSA, currScope())})
+        ivs.push_back(symbol);
+    }
+
+    // Stop after processing all affected loops
+    if (curLevel + 1 >= dirDepth)
+      break;
+
+    // Recurse into nested loop
+    const auto &block{std::get<parser::Block>(loop->t)};
+    if (block.empty()) {
+      // Insufficient number of nested loops already reported by
+      // CheckAssocLoopLevel()
+      break;
+    }
+
+    loop = GetDoConstructIf(block.front());
+    if (!loop) {
+      // Insufficient number of nested loops already reported by
+      // CheckAssocLoopLevel()
+      break;
+    }
+
+    auto checkPerfectNest = [&, this]() {
+      auto blockSize = block.size();
+      if (blockSize <= 1)
+        return;
+
+      if (parser::Unwrap<parser::ContinueStmt>(x))
+        blockSize -= 1;
+
+      if (blockSize <= 1)
+        return;
+
+      // Non-perfectly nested loop
+      // TODO: Point to non-DO statement, directiveSource as a note
+      context_.Say(dirContext.directiveSource,
+          "Canonical loop nest must be perfectly nested."_err_en_US);
+    };
+
+    checkPerfectNest();
+
+    ++curLevel;
+  }
+}
+
 // 2.15.1.1 Data-sharing Attribute Rules - Predetermined
 //   - The loop iteration variable(s) in the associated do-loop(s) of a do,
 //     parallel do, taskloop, or distribute construct is (are) private.
diff --git a/flang/test/Semantics/OpenMP/do08.f90 b/flang/test/Semantics/OpenMP/do08.f90
index 5143dff0dd315..bb3c1d0cd3855 100644
--- a/flang/test/Semantics/OpenMP/do08.f90
+++ b/flang/test/Semantics/OpenMP/do08.f90
@@ -61,6 +61,7 @@ program omp
   !$omp end do
 
 
+  !ERROR: Canonical loop nest must be perfectly nested.
   !ERROR: The value of the parameter in the COLLAPSE or ORDERED clause must not be larger than the number of nested loops following the construct.
   !$omp do collapse(3)
   do 60 i=2,200,2
diff --git a/flang/test/Semantics/OpenMP/do13.f90 b/flang/test/Semantics/OpenMP/do13.f90
index 6e9d1dddade4c..8f7844f4136f9 100644
--- a/flang/test/Semantics/OpenMP/do13.f90
+++ b/flang/test/Semantics/OpenMP/do13.f90
@@ -59,6 +59,7 @@ program omp
   !$omp end do
 
 
+  !ERROR: Canonical loop nest must be perfectly nested.
   !ERROR: The value of the parameter in the COLLAPSE or ORDERED clause must not be larger than the number of nested loops following the construct.
   !$omp do collapse(3)
   do 60 i=1,10
diff --git a/flang/test/Semantics/OpenMP/do22.f90 b/flang/test/Semantics/OpenMP/do22.f90
new file mode 100644
index 0000000000000..9d96d3af54e5c
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/do22.f90
@@ -0,0 +1,73 @@
+! RUN: %python %S/../test_errors.py %s %flang -fopenmp
+! Check for existence of loop following a DO directive
+
+subroutine do_imperfectly_nested_before
+  integer i, j
+
+  !ERROR: The value of the parameter in the COLLAPSE or ORDERED clause must not be larger than the number of nested loops following the construct.
+  !$omp do collapse(2)
+  do i = 1, 10
+    print *, i
+    do j = 1, 10
+      print *, i, j
+    end do
+  end do
+  !$omp end do
+end subroutine
+
+
+subroutine do_imperfectly_nested_behind
+  integer i, j
+
+  !ERROR: Canonical loop nest must be perfectly nested.
+  !$omp do collapse(2)
+  do i = 1, 10
+    do j = 1, 10
+      print *, i, j
+    end do
+    print *, i
+  end do
+  !$omp end do
+end subroutine
+
+
+subroutine do_nonrectangular_lb
+  integer i, j
+
+  !ERROR: Trip count must be computable and invariant
+  !$omp do collapse(2)
+  do i = 1, 10
+    do j = i, 10
+      print *, i, j
+    end do
+  end do
+  !$omp end do
+end subroutine
+
+
+subroutine do_nonrectangular_ub
+  integer i, j
+
+  !ERROR: Trip count must be computable and invariant
+  !$omp do collapse(2)
+  do i = 1, 10
+    do j = 0, i
+      print *, i, j
+    end do
+  end do
+  !$omp end do
+end subroutine
+
+
+subroutine do_nonrectangular_step
+  integer i, j
+
+  !ERROR: Trip count must be computable and invariant
+  !$omp do collapse(2)
+  do i = 1, 10
+    do j = 1, 10, i
+      print *, i, j
+    end do
+  end do
+  !$omp end do
+end subroutine

>From ff4eccb41e2973e1179734dd455f1e797e89d9be Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Tue, 23 Sep 2025 14:45:45 +0200
Subject: [PATCH 4/8] Add omp.tile operation

---
 .../mlir/Dialect/OpenMP/OpenMPClauses.td      |  29 ++
 .../mlir/Dialect/OpenMP/OpenMPOpBase.td       |  69 +++-
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td |  63 ++-
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  | 387 +++++++++++++++---
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      |  42 ++
 .../Dialect/OpenMP/cli-canonical_loop.mlir    | 127 ++++--
 mlir/test/Dialect/OpenMP/cli-tile.mlir        | 138 +++++++
 .../Dialect/OpenMP/cli-unroll-heuristic.mlir  |  28 +-
 mlir/test/Dialect/OpenMP/invalid-tile.mlir    | 119 ++++++
 .../test/Target/LLVMIR/openmp-cli-tile01.mlir | 101 +++++
 .../test/Target/LLVMIR/openmp-cli-tile02.mlir | 190 +++++++++
 mlir/test/mlir-tblgen/op-format-invalid.td    |   2 +-
 .../tools/mlir-tblgen/AttrOrTypeFormatGen.cpp |   1 +
 mlir/tools/mlir-tblgen/FormatGen.cpp          |   2 +-
 mlir/tools/mlir-tblgen/OpFormatGen.cpp        |   1 +
 15 files changed, 1157 insertions(+), 142 deletions(-)
 create mode 100644 mlir/test/Dialect/OpenMP/cli-tile.mlir
 create mode 100644 mlir/test/Dialect/OpenMP/invalid-tile.mlir
 create mode 100644 mlir/test/Target/LLVMIR/openmp-cli-tile01.mlir
 create mode 100644 mlir/test/Target/LLVMIR/openmp-cli-tile02.mlir

diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
index 1eda5e4bc1618..8e43c4284d078 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
@@ -995,6 +995,35 @@ class OpenMP_NumTeamsClauseSkip<
 
 def OpenMP_NumTeamsClause : OpenMP_NumTeamsClauseSkip<>;
 
+//===----------------------------------------------------------------------===//
+//  V5.1: [10.1.2] `sizes` clause
+//===----------------------------------------------------------------------===//
+
+class OpenMP_SizesClauseSkip<
+    bit traits = false, bit arguments = false, bit assemblyFormat = false,
+    bit description = false, bit extraClassDeclaration = false
+  > : OpenMP_Clause<traits, arguments, assemblyFormat, description,
+                    extraClassDeclaration> {
+  let arguments = (ins
+    Variadic<IntLikeType>:$sizes
+  );
+
+  let optAssemblyFormat = [{
+    `sizes` `(` $sizes `:` type($sizes) `)`
+  }];
+
+  let description = [{
+    The `sizes` clauses defines the size of a grid over a multi-dimensional
+    logical iteration space. This grid is used for loop transformations such as
+    `tile` and `strip`. The size per dimension can be a variable, but only
+    values that are not at least 2 make sense. It is not specified what happens
+    when smaller values are used, but should still result in a loop nest that
+    executes each logical iteration once.
+  }];
+}
+
+def OpenMP_SizesClause : OpenMP_SizesClauseSkip<>;
+
 //===----------------------------------------------------------------------===//
 // V5.2: [10.1.2] `num_threads` clause
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td
index bbcfb87fa03c6..5ad4e4b5b61d1 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td
@@ -38,6 +38,44 @@ def OpenMP_MapBoundsType : OpenMP_Type<"MapBounds", "map_bounds_ty"> {
   let summary = "Type for representing omp map clause bounds information";
 }
 
+//===---------------------------------------------------------------------===//
+// OpenMP Canonical Loop Info Type
+//===---------------------------------------------------------------------===//
+
+def CanonicalLoopInfoType : OpenMP_Type<"CanonicalLoopInfo", "cli"> {
+  let summary = "Type for representing a reference to a canonical loop";
+  let description = [{
+    A variable of type CanonicalLoopInfo refers to an OpenMP-compatible
+    canonical loop in the same function. Values of this type are not
+    available at runtime and therefore cannot be used by the program itself,
+    i.e. an opaque type. It is similar to the transform dialect's
+    `!transform.interface` type, but instead of implementing an interface
+    for each transformation, the OpenMP dialect itself defines possible
+    operations on this type.
+
+    A value of type CanonicalLoopInfoType (in the following: CLI) value can be
+
+    1. created by omp.new_cli.
+    2. passed to omp.canonical_loop to associate the loop to that CLI. A CLI
+       can only be associated once.
+    3. passed to an omp loop transformation operation that modifies the loop
+       associated with the CLI. The CLI is the "applyee" and the operation is
+       the consumer. A CLI can only be consumed once.
+    4. passed to an omp loop transformation operation to associate the cli with
+       a result of that transformation. The CLI is the "generatee" and the
+       operation is the generator.
+
+    A CLI cannot
+
+    1. be returned from a function.
+    2. be passed to operations that are not specifically designed to take a
+       CanonicalLoopInfoType, including AnyType.
+
+    A CLI directly corresponds to an object of
+    OpenMPIRBuilder's CanonicalLoopInfo struct when lowering to LLVM-IR.
+  }];
+}
+
 //===----------------------------------------------------------------------===//
 // Base classes for OpenMP dialect operations.
 //===----------------------------------------------------------------------===//
@@ -211,8 +249,35 @@ class OpenMP_Op<string mnemonic, list<Trait> traits = [],
 // Doesn't actually create a C++ base class (only defines default values for
 // tablegen classes that derive from this). Use LoopTransformationInterface
 // instead for common operations.
-class OpenMPTransform_Op<string mnemonic, list<Trait> traits = []> :
-      OpenMP_Op<mnemonic, !listconcat([DeclareOpInterfaceMethods<LoopTransformationInterface>], traits)  > {
+class OpenMPTransform_Op<string mnemonic,
+                         list<Trait> traits = [],
+                         list<OpenMP_Clause> clauses = []> :
+      OpenMP_Op<mnemonic,
+                traits = !listconcat([DeclareOpInterfaceMethods<LoopTransformationInterface>], traits),
+                clauses = clauses> {
+}
+
+// Base clause for loop transformations using the standard syntax.
+//
+//     omp.opname ($generatees) <- ($applyees) clause(...) clause(...) ... <attr-dicr>
+//     omp.opname                  ($applyees) clause(...) clause(...) ... <attr-dict>
+//
+// $generatees is optional and is assumed to be empty if omitted
+class OpenMPTransformBase_Op<string mnemonic,
+                         list<Trait> traits = [],
+                         list<OpenMP_Clause> clauses = []> :
+      OpenMPTransform_Op<mnemonic,
+                         traits = !listconcat(traits, [AttrSizedOperandSegments]),
+                         clauses = clauses> {
+
+  let arguments = !con(
+                       (ins Variadic<CanonicalLoopInfoType>:$generatees,
+                            Variadic<CanonicalLoopInfoType>:$applyees
+                      ), clausesArgs);
+
+  let assemblyFormat = [{ custom<LoopTransformClis>($generatees, $applyees) }]
+                         # clausesAssemblyFormat
+                         # [{ attr-dict }];
 }
 
 #endif  // OPENMP_OP_BASE
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 5c77e215467e4..b73091ea0ca53 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -357,44 +357,6 @@ def SingleOp : OpenMP_Op<"single", traits = [
   let hasVerifier = 1;
 }
 
-//===---------------------------------------------------------------------===//
-// OpenMP Canonical Loop Info Type
-//===---------------------------------------------------------------------===//
-
-def CanonicalLoopInfoType : OpenMP_Type<"CanonicalLoopInfo", "cli"> {
-  let summary = "Type for representing a reference to a canonical loop";
-  let description = [{
-    A variable of type CanonicalLoopInfo refers to an OpenMP-compatible
-    canonical loop in the same function. Values of this type are not
-    available at runtime and therefore cannot be used by the program itself,
-    i.e. an opaque type. It is similar to the transform dialect's
-    `!transform.interface` type, but instead of implementing an interface
-    for each transformation, the OpenMP dialect itself defines possible
-    operations on this type.
-
-    A value of type CanonicalLoopInfoType (in the following: CLI) value can be
-
-    1. created by omp.new_cli.
-    2. passed to omp.canonical_loop to associate the loop to that CLI. A CLI
-       can only be associated once.
-    3. passed to an omp loop transformation operation that modifies the loop
-       associated with the CLI. The CLI is the "applyee" and the operation is
-       the consumer. A CLI can only be consumed once.
-    4. passed to an omp loop transformation operation to associate the cli with
-       a result of that transformation. The CLI is the "generatee" and the
-       operation is the generator.
-
-    A CLI cannot
-
-    1. be returned from a function.
-    2. be passed to operations that are not specifically designed to take a
-       CanonicalLoopInfoType, including AnyType.
-
-    A CLI directly corresponds to an object of
-    OpenMPIRBuilder's CanonicalLoopInfo struct when lowering to LLVM-IR.
-  }];
-}
-
 //===---------------------------------------------------------------------===//
 // OpenMP Canonical Loop Info Creation
 //===---------------------------------------------------------------------===//
@@ -563,6 +525,31 @@ def UnrollHeuristicOp : OpenMPTransform_Op<"unroll_heuristic", []> {
   let hasCustomAssemblyFormat = 1;
 }
 
+//===----------------------------------------------------------------------===//
+// OpenMP tile operation
+//===----------------------------------------------------------------------===//
+
+def TileOp : OpenMPTransformBase_Op<"tile",
+                                clauses = [OpenMP_SizesClause]> {
+  let summary = "OpenMP tile operation";
+  let description = [{
+    Represents the OpenMP tile directive introduced in OpenMP 5.1.
+
+    The construct partitions the logical iteration space of the affected loops
+    into equally-sized tiles, then creates two sets of nested loops. The outer
+    loops, called the grid loops, iterate over all tiles. The inner loops,
+    called the intratile loops, iterate over the logical iterations of a tile.
+    The sizes clause determines the size of a tile.
+
+    Currently, the affected loops must be rectangular (the tripcount of the
+    inner loop must not depend on any iv of an surrounding affected loop) and
+    perfectly nested (except for the innermost affected loop, no operations
+    other than the nested loop and the terminator in the loop body).
+  }] # clausesDescription;
+
+  let hasVerifier = 1;
+}
+
 //===----------------------------------------------------------------------===//
 // 2.8.3 Workshare Construct
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 3d70e28ed23ab..d3cc7e55ae155 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -33,6 +33,7 @@
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/ADT/bit.h"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
+#include "llvm/Support/InterleavedRange.h"
 #include <cstddef>
 #include <iterator>
 #include <optional>
@@ -77,6 +78,177 @@ struct LLVMPointerPointerLikeModel
 };
 } // namespace
 
+/// Generate a name of a canonical loop nest of the format
+/// `<prefix>(_s<num>_r<num>)*` that describes its nesting inside parent
+/// operations (`_r<num>`) and that operation's region (`_s<num>`). The region
+/// number is omitted if the parent operation has just one region. If a loop
+/// nest just consists of canonical loops nested inside each other, also uses
+/// `d<num>` where <num> is the nesting depth of the loop.
+static std::string generateLoopNestingName(StringRef prefix,
+                                           CanonicalLoopOp op) {
+  struct Component {
+    // An region argument of an operation
+    Operation *parentOp;
+    size_t regionInOpIdx;
+    bool isOnlyRegionInOp;
+    bool skipRegion;
+
+    // An operation somewhere in a parent region
+    Operation *thisOp;
+    Region *parentRegion;
+    size_t opInRegionIdx;
+    bool isOnlyOpInRegion;
+    bool skipOp;
+    int depth = -1;
+  };
+  SmallVector<Component> components;
+
+  // Gather a list of parent regions and operations, and the position within
+  // their parent
+  Operation *o = op.getOperation();
+  while (o) {
+    if (o->hasTrait<mlir::OpTrait::IsIsolatedFromAbove>())
+      break;
+
+    // Operation within a region
+    Region *r = o->getParentRegion();
+    if (!r)
+      break;
+
+    llvm::ReversePostOrderTraversal<Block *> traversal(&r->getBlocks().front());
+    size_t idx = 0;
+    bool found = false;
+    size_t sequentialIdx = -1;
+    bool isOnlyLoop = true;
+    for (Block *b : traversal) {
+      for (Operation &op : *b) {
+        if (&op == o && !found) {
+          sequentialIdx = idx;
+          found = true;
+        }
+        if (op.getNumRegions()) {
+          idx += 1;
+          if (idx > 1)
+            isOnlyLoop = false;
+        }
+        if (found && !isOnlyLoop)
+          break;
+      }
+    }
+
+    Component &comp = components.emplace_back();
+    comp.thisOp = o;
+    comp.parentRegion = r;
+    comp.opInRegionIdx = sequentialIdx;
+    comp.isOnlyOpInRegion = isOnlyLoop;
+
+    // Region argument of an operation
+    Operation *parent = r->getParentOp();
+
+    comp.parentOp = parent;
+    comp.regionInOpIdx = 0;
+    comp.isOnlyRegionInOp = true;
+    if (parent && parent->getRegions().size() > 1) {
+      auto getRegionIndex = [](Operation *o, Region *r) {
+        for (auto [idx, region] : llvm::enumerate(o->getRegions())) {
+          if (&region == r)
+            return idx;
+        }
+        llvm_unreachable("Region not child of its parent operation");
+      };
+      comp.regionInOpIdx = getRegionIndex(parent, r);
+      comp.isOnlyRegionInOp = false;
+    }
+
+    if (!parent)
+      break;
+
+    // next parent
+    o = parent;
+  }
+
+  // Reorder components from outermost to innermost
+  std::reverse(components.begin(), components.end());
+
+  // Determine whether a component is not needed
+  for (Component &c : components) {
+    c.skipRegion = c.isOnlyRegionInOp;
+    c.skipOp = c.isOnlyOpInRegion && !isa<CanonicalLoopOp>(c.thisOp);
+  }
+
+  // Find runs of perfect nests and merge them into a single component
+  size_t curNestRoot = 0;
+  size_t curNestDepth = 1;
+  auto mergeLoopNest = [&](size_t innermost) {
+    size_t outermost = curNestRoot;
+
+    // Don't do enything if it does not consist of at least 2 loops
+    if (outermost < innermost) {
+      for (auto i : llvm::seq<int>(outermost + 1, innermost))
+        components[i].skipOp = true;
+      components[innermost].depth = curNestDepth;
+    }
+
+    // Start new root
+    curNestRoot = innermost + 1;
+    curNestDepth = 1;
+  };
+  for (auto &&[i, c] : llvm::enumerate(components)) {
+    if (i <= curNestRoot)
+      continue;
+
+    // Check whether this region can be included
+    if (!c.skipRegion) {
+      mergeLoopNest(i);
+      continue;
+    }
+
+    if (c.skipOp)
+      continue;
+
+    if (!c.isOnlyOpInRegion) {
+      mergeLoopNest(i);
+      continue;
+    }
+
+    curNestDepth += 1;
+  }
+
+  // Finalize innermost loop nest
+  mergeLoopNest(components.size() - 1);
+
+  // Outermost loop does not need a suffix if it has no sibling
+  for (auto &c : components) {
+    if (c.skipOp)
+      continue;
+    if (c.isOnlyOpInRegion)
+      c.skipOp = true;
+    break;
+  }
+
+  // Compile name
+  SmallString<64> Name{prefix};
+  for (auto &c : components) {
+    auto addComponent = [&Name](char letter, int64_t idx) {
+      Name += '_';
+      Name += letter;
+      Name += idx;
+    };
+
+    if (!c.skipRegion)
+      addComponent('r', c.regionInOpIdx);
+
+    if (!c.skipOp) {
+      if (c.depth >= 0)
+        addComponent('d', c.depth - 1);
+      else
+        addComponent('s', c.opInRegionIdx);
+    }
+  }
+
+  return Name.str().str();
+}
+
 void OpenMPDialect::initialize() {
   addOperations<
 #define GET_OP_LIST
@@ -3141,71 +3313,29 @@ void NewCliOp::getAsmResultNames(OpAsmSetValueNameFn setNameFn) {
     cliName =
         TypeSwitch<Operation *, std::string>(gen->getOwner())
             .Case([&](CanonicalLoopOp op) {
-              // Find the canonical loop nesting: For each ancestor add a
-              // "+_r<idx>" suffix (in reverse order)
-              SmallVector<std::string> components;
-              Operation *o = op.getOperation();
-              while (o) {
-                if (o->hasTrait<mlir::OpTrait::IsIsolatedFromAbove>())
-                  break;
-
-                Region *r = o->getParentRegion();
-                if (!r)
-                  break;
-
-                auto getSequentialIndex = [](Region *r, Operation *o) {
-                  llvm::ReversePostOrderTraversal<Block *> traversal(
-                      &r->getBlocks().front());
-                  size_t idx = 0;
-                  for (Block *b : traversal) {
-                    for (Operation &op : *b) {
-                      if (&op == o)
-                        return idx;
-                      // Only consider operations that are containers as
-                      // possible children
-                      if (!op.getRegions().empty())
-                        idx += 1;
-                    }
-                  }
-                  llvm_unreachable("Operation not part of the region");
-                };
-                size_t sequentialIdx = getSequentialIndex(r, o);
-                components.push_back(("s" + Twine(sequentialIdx)).str());
-
-                Operation *parent = r->getParentOp();
-                if (!parent)
-                  break;
-
-                // If the operation has more than one region, also count in
-                // which of the regions
-                if (parent->getRegions().size() > 1) {
-                  auto getRegionIndex = [](Operation *o, Region *r) {
-                    for (auto [idx, region] :
-                         llvm::enumerate(o->getRegions())) {
-                      if (&region == r)
-                        return idx;
-                    }
-                    llvm_unreachable("Region not child its parent operation");
-                  };
-                  size_t regionIdx = getRegionIndex(parent, r);
-                  components.push_back(("r" + Twine(regionIdx)).str());
-                }
-
-                // next parent
-                o = parent;
-              }
-
-              SmallString<64> Name("canonloop");
-              for (const std::string &s : reverse(components)) {
-                Name += '_';
-                Name += s;
-              }
-
-              return Name;
+              return generateLoopNestingName("canonloop", op);
             })
             .Case([&](UnrollHeuristicOp op) -> std::string {
               llvm_unreachable("heuristic unrolling does not generate a loop");
             })
+            .Case([&](TileOp op) -> std::string {
+              auto [generateesFirst, generateesCount] =
+                  op.getGenerateesODSOperandIndexAndLength();
+              unsigned firstGrid = generateesFirst;
+              unsigned firstIntratile = generateesFirst + generateesCount / 2;
+              unsigned end = generateesFirst + generateesCount;
+              unsigned opnum = gen->getOperandNumber();
+              // In the OpenMP apply and looprange clauses, indices are 1-based
+              if (firstGrid <= opnum && opnum < firstIntratile) {
+                unsigned gridnum = opnum - firstGrid + 1;
+                return ("grid" + Twine(gridnum)).str();
+              }
+              if (firstIntratile <= opnum && opnum < end) {
+                unsigned intratilenum = opnum - firstIntratile + 1;
+                return ("intratile" + Twine(intratilenum)).str();
+              }
+              llvm_unreachable("Unexpected generatee argument");
+            })
             .Default([&](Operation *op) {
               assert(false && "TODO: Custom name for this operation");
               return "transformed";
@@ -3292,7 +3422,8 @@ void CanonicalLoopOp::getAsmBlockNames(OpAsmSetBlockNameFn setNameFn) {
 
 void CanonicalLoopOp::getAsmBlockArgumentNames(Region &region,
                                                OpAsmSetValueNameFn setNameFn) {
-  setNameFn(region.getArgument(0), "iv");
+  std::string ivName = generateLoopNestingName("iv", *this);
+  setNameFn(region.getArgument(0), ivName);
 }
 
 void CanonicalLoopOp::print(OpAsmPrinter &p) {
@@ -3433,6 +3564,138 @@ UnrollHeuristicOp::getGenerateesODSOperandIndexAndLength() {
   return {0, 0};
 }
 
+//===----------------------------------------------------------------------===//
+// TileOp
+//===----------------------------------------------------------------------===//
+
+static void printLoopTransformClis(OpAsmPrinter &p, TileOp op,
+                                   OperandRange generatees,
+                                   OperandRange applyees) {
+  if (!generatees.empty())
+    p << '(' << llvm::interleaved(generatees) << ')';
+
+  if (!applyees.empty())
+    p << " <- (" << llvm::interleaved(applyees) << ')';
+}
+
+static ParseResult parseLoopTransformClis(
+    OpAsmParser &parser,
+    SmallVectorImpl<OpAsmParser::UnresolvedOperand> &generateesOperands,
+    SmallVectorImpl<OpAsmParser::UnresolvedOperand> &applyeesOperands) {
+  if (parser.parseOptionalLess()) {
+    // Syntax 1: generatees present
+
+    if (parser.parseOperandList(generateesOperands,
+                                mlir::OpAsmParser::Delimiter::Paren))
+      return failure();
+
+    if (parser.parseLess())
+      return failure();
+  } else {
+    // Syntax 2: generatees omitted
+  }
+
+  // Parse `<-` (`<` has already been parsed)
+  if (parser.parseMinus())
+    return failure();
+
+  if (parser.parseOperandList(applyeesOperands,
+                              mlir::OpAsmParser::Delimiter::Paren))
+    return failure();
+
+  return success();
+}
+
+LogicalResult TileOp::verify() {
+  if (getApplyees().empty())
+    return emitOpError() << "must apply to at least one loop";
+
+  if (getSizes().size() != getApplyees().size())
+    return emitOpError() << "there must be one tile size for each applyee";
+
+  if (!getGeneratees().empty() &&
+      2 * getSizes().size() != getGeneratees().size())
+    return emitOpError()
+           << "expecting two times the number of generatees than applyees";
+
+  DenseSet<Value> parentIVs;
+
+  Value parent = getApplyees().front();
+  for (auto &&applyee : llvm::drop_begin(getApplyees())) {
+    auto [parentCreate, parentGen, parentCons] = decodeCli(parent);
+    auto [create, gen, cons] = decodeCli(applyee);
+
+    if (!parentGen)
+      return emitOpError() << "applyee CLI has no generator";
+
+    auto parentLoop = dyn_cast_or_null<CanonicalLoopOp>(parentGen->getOwner());
+    if (!parentGen)
+      return emitOpError()
+             << "currently only supports omp.canonical_loop as applyee";
+
+    parentIVs.insert(parentLoop.getInductionVar());
+
+    if (!gen)
+      return emitOpError() << "applyee CLI has no generator";
+    auto loop = dyn_cast_or_null<CanonicalLoopOp>(gen->getOwner());
+    if (!loop)
+      return emitOpError()
+             << "currently only supports omp.canonical_loop as applyee";
+
+    // Canonical loop must be perfectly nested, i.e. the body of the parent must
+    // only contain the omp.canonical_loop of the nested loops, and
+    // omp.terminator
+    bool isPerfectlyNested = [&]() {
+      auto &parentBody = parentLoop.getRegion();
+      if (!parentBody.hasOneBlock())
+        return false;
+      auto &parentBlock = parentBody.getBlocks().front();
+
+      auto nestedLoopIt = parentBlock.begin();
+      if (nestedLoopIt == parentBlock.end() ||
+          (&*nestedLoopIt != loop.getOperation()))
+        return false;
+
+      auto termIt = std::next(nestedLoopIt);
+      if (termIt == parentBlock.end() || !isa<TerminatorOp>(termIt))
+        return false;
+
+      if (std::next(termIt) != parentBlock.end())
+        return false;
+
+      return true;
+    }();
+    if (!isPerfectlyNested)
+      return emitOpError() << "tiled loop nest must be perfectly nested";
+
+    if (parentIVs.contains(loop.getTripCount()))
+      return emitOpError() << "tiled loop nest must be rectangular";
+
+    parent = applyee;
+  }
+
+  // TODO: The tile sizes must be computed before the loop, but checking this
+  // requires dominance analysis. For instance:
+  //
+  //      %canonloop = omp.new_cli
+  //      omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
+  //        // write to %x
+  //        omp.terminator
+  //      }
+  //      %ts = llvm.load %x
+  //      omp.tile <- (%canonloop) sizes(%ts : i32)
+
+  return success();
+}
+
+std::pair<unsigned, unsigned> TileOp ::getApplyeesODSOperandIndexAndLength() {
+  return getODSOperandIndexAndLength(odsIndex_applyees);
+}
+
+std::pair<unsigned, unsigned> TileOp::getGenerateesODSOperandIndexAndLength() {
+  return getODSOperandIndexAndLength(odsIndex_generatees);
+}
+
 //===----------------------------------------------------------------------===//
 // Critical construct (2.17.1)
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 4921a1990b6e8..171ac61dd66fe 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -3154,6 +3154,45 @@ applyUnrollHeuristic(omp::UnrollHeuristicOp op, llvm::IRBuilderBase &builder,
   return success();
 }
 
+/// Apply a `#pragma omp tile` / `!$omp tile` transformation using the
+/// OpenMPIRBuilder.
+static LogicalResult applyTile(omp::TileOp op, llvm::IRBuilderBase &builder,
+                               LLVM::ModuleTranslation &moduleTranslation) {
+  llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+  llvm::OpenMPIRBuilder::LocationDescription loc(builder);
+
+  SmallVector<llvm::CanonicalLoopInfo *> translatedLoops;
+  SmallVector<llvm::Value *> translatedSizes;
+
+  for (Value size : op.getSizes()) {
+    llvm::Value *translatedSize = moduleTranslation.lookupValue(size);
+    assert(translatedSize &&
+           "sizes clause arguments must already be translated");
+    translatedSizes.push_back(translatedSize);
+  }
+
+  for (Value applyee : op.getApplyees()) {
+    llvm::CanonicalLoopInfo *consBuilderCLI =
+        moduleTranslation.lookupOMPLoop(applyee);
+    assert(applyee && "Canonical loop must already been translated");
+    translatedLoops.push_back(consBuilderCLI);
+  }
+
+  auto generatedLoops =
+      ompBuilder->tileLoops(loc.DL, translatedLoops, translatedSizes);
+  if (!op.getGeneratees().empty()) {
+    for (auto [mlirLoop, genLoop] :
+         zip_equal(op.getGeneratees(), generatedLoops))
+      moduleTranslation.mapOmpLoop(mlirLoop, genLoop);
+  }
+
+  // CLIs can only be consumed once
+  for (Value applyee : op.getApplyees())
+    moduleTranslation.invalidateOmpLoop(applyee);
+
+  return success();
+}
+
 /// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
 static llvm::AtomicOrdering
 convertAtomicOrdering(std::optional<omp::ClauseMemoryOrderKind> ao) {
@@ -6196,6 +6235,9 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
             // the omp.canonical_loop.
             return applyUnrollHeuristic(op, builder, moduleTranslation);
           })
+          .Case([&](omp::TileOp op) {
+            return applyTile(op, builder, moduleTranslation);
+          })
           .Case([&](omp::TargetAllocMemOp) {
             return convertTargetAllocMemOp(*op, builder, moduleTranslation);
           })
diff --git a/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir b/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir
index adadb8bbac49d..874e3922805ec 100644
--- a/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir
+++ b/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir
@@ -1,5 +1,5 @@
-// RUN: mlir-opt %s | FileCheck %s
-// RUN: mlir-opt %s | mlir-opt | FileCheck %s
+// RUN: mlir-opt %s | FileCheck %s --enable-var-scope
+// RUN: mlir-opt %s | mlir-opt | FileCheck %s --enable-var-scope
 
 
 // CHECK-LABEL: @omp_canonloop_raw(
@@ -24,10 +24,10 @@ func.func @omp_canonloop_raw(%tc : i32) -> () {
 func.func @omp_canonloop_sequential_raw(%tc : i32) -> () {
   // CHECK-NEXT: %canonloop_s0 = omp.new_cli
   %canonloop_s0 = "omp.new_cli" () : () -> (!omp.cli)
-  // CHECK-NEXT:  omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
+  // CHECK-NEXT:  omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%[[tc]]) {
   "omp.canonical_loop" (%tc, %canonloop_s0) ({
     ^bb_first(%iv_first: i32):
-      // CHECK-NEXT: = llvm.add %iv, %iv : i32
+      // CHECK-NEXT: = llvm.add %iv_s0, %iv_s0 : i32
       %newval = llvm.add %iv_first, %iv_first : i32
     // CHECK-NEXT: omp.terminator
     omp.terminator
@@ -36,7 +36,7 @@ func.func @omp_canonloop_sequential_raw(%tc : i32) -> () {
 
   // CHECK-NEXT: %canonloop_s1 = omp.new_cli
   %canonloop_s1 = "omp.new_cli" () : () -> (!omp.cli)
-  // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv : i32 in range(%[[tc]]) {
+  // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%[[tc]]) {
   "omp.canonical_loop" (%tc, %canonloop_s1) ({
     ^bb_second(%iv_second: i32):
     // CHECK: omp.terminator
@@ -52,17 +52,17 @@ func.func @omp_canonloop_sequential_raw(%tc : i32) -> () {
 // CHECK-LABEL: @omp_nested_canonloop_raw(
 // CHECK-SAME: %[[tc_outer:.+]]: i32, %[[tc_inner:.+]]: i32)
 func.func @omp_nested_canonloop_raw(%tc_outer : i32, %tc_inner : i32) -> () {
-  // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+  // CHECK-NEXT: %canonloop = omp.new_cli
   %outer = "omp.new_cli" () : () -> (!omp.cli)
-  // CHECK-NEXT: %canonloop_s0_s0 = omp.new_cli
+  // CHECK-NEXT: %canonloop_d1 = omp.new_cli
   %inner = "omp.new_cli" () : () -> (!omp.cli)
-  // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc_outer]]) {
+  // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc_outer]]) {
   "omp.canonical_loop" (%tc_outer, %outer) ({
     ^bb_outer(%iv_outer: i32):
-      // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%[[tc_inner]]) {
+      // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc_inner]]) {
       "omp.canonical_loop" (%tc_inner, %inner) ({
         ^bb_inner(%iv_inner: i32):
-          // CHECK-NEXT: = llvm.add %iv, %iv_0 : i32
+          // CHECK-NEXT: = llvm.add %iv, %iv_d1 : i32
           %newval = llvm.add %iv_outer, %iv_inner: i32
           // CHECK-NEXT: omp.terminator
           omp.terminator
@@ -108,16 +108,24 @@ func.func @omp_canonloop_constant_pretty() -> () {
 func.func @omp_canonloop_sequential_pretty(%tc : i32) -> () {
   // CHECK-NEXT: %canonloop_s0 = omp.new_cli
   %canonloop_s0 = omp.new_cli
-  // CHECK-NEXT:  omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
-  omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%tc) {
+  // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%[[tc]]) {
+  omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%tc) {
     // CHECK-NEXT: omp.terminator
     omp.terminator
   }
 
   // CHECK: %canonloop_s1 = omp.new_cli
   %canonloop_s1 = omp.new_cli
-  // CHECK-NEXT:  omp.canonical_loop(%canonloop_s1) %iv : i32 in range(%[[tc]]) {
-  omp.canonical_loop(%canonloop_s1) %iv_0 : i32 in range(%tc) {
+  // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%[[tc]]) {
+  omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%tc) {
+    // CHECK-NEXT: omp.terminator
+    omp.terminator
+  }
+
+  // CHECK: %canonloop_s2 = omp.new_cli
+  %canonloop_s2 = omp.new_cli
+  // CHECK-NEXT: omp.canonical_loop(%canonloop_s2) %iv_s2 : i32 in range(%[[tc]]) {
+  omp.canonical_loop(%canonloop_s2) %iv_s2 : i32 in range(%tc) {
     // CHECK-NEXT: omp.terminator
     omp.terminator
   }
@@ -126,17 +134,17 @@ func.func @omp_canonloop_sequential_pretty(%tc : i32) -> () {
 }
 
 
-// CHECK-LABEL: @omp_canonloop_nested_pretty(
+// CHECK-LABEL: @omp_canonloop_2d_nested_pretty(
 // CHECK-SAME: %[[tc:.+]]: i32)
-func.func @omp_canonloop_nested_pretty(%tc : i32) -> () {
-  // CHECK-NEXT: %canonloop_s0 = omp.new_cli
-  %canonloop_s0 = omp.new_cli
-  // CHECK-NEXT: %canonloop_s0_s0 = omp.new_cli
-  %canonloop_s0_s0 = omp.new_cli
-  // CHECK-NEXT:  omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
-  omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%tc) {
-    // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%[[tc]]) {
-    omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%tc) {
+func.func @omp_canonloop_2d_nested_pretty(%tc : i32) -> () {
+  // CHECK-NEXT: %canonloop = omp.new_cli
+  %canonloop = omp.new_cli
+  // CHECK-NEXT: %canonloop_d1 = omp.new_cli
+  %canonloop_d1 = omp.new_cli
+  // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
+  omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
+    // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc]]) {
+    omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%tc) {
       // CHECK: omp.terminator
       omp.terminator
     }
@@ -147,6 +155,77 @@ func.func @omp_canonloop_nested_pretty(%tc : i32) -> () {
 }
 
 
+// CHECK-LABEL: @omp_canonloop_3d_nested_pretty(
+// CHECK-SAME: %[[tc:.+]]: i32)
+func.func @omp_canonloop_3d_nested_pretty(%tc : i32) -> () {
+  // CHECK: %canonloop = omp.new_cli
+  %canonloop = omp.new_cli
+  // CHECK: %canonloop_d1 = omp.new_cli
+  %canonloop_d1 = omp.new_cli
+  // CHECK: %canonloop_d2 = omp.new_cli
+  %canonloop_d2 = omp.new_cli
+  // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
+  omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
+    // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc]]) {
+    omp.canonical_loop(%canonloop_d1) %iv_1d : i32 in range(%tc) {
+      // CHECK-NEXT: omp.canonical_loop(%canonloop_d2) %iv_d2 : i32 in range(%[[tc]]) {
+      omp.canonical_loop(%canonloop_d2) %iv_d2 : i32 in range(%tc) {
+        // CHECK-NEXT: omp.terminator
+        omp.terminator
+      // CHECK-NEXT: }
+      }
+      // CHECK-NEXT: omp.terminator
+      omp.terminator
+    // CHECK-NEXT: }
+    }
+    // CHECK-NEXT: omp.terminator
+    omp.terminator
+  }
+
+  return
+}
+
+
+// CHECK-LABEL: @omp_canonloop_sequential_nested_pretty(
+// CHECK-SAME: %[[tc:.+]]: i32)
+func.func @omp_canonloop_sequential_nested_pretty(%tc : i32) -> () {
+  // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+  %canonloop_s0 = omp.new_cli
+  // CHECK-NEXT: %canonloop_s0_d1 = omp.new_cli
+  %canonloop_s0_d1 = omp.new_cli
+  // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%[[tc]]) {
+  omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%tc) {
+   // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_d1) %iv_s0_d1 : i32 in range(%[[tc]]) {
+    omp.canonical_loop(%canonloop_s0_d1) %iv_s0_d1 : i32 in range(%tc) {
+      // CHECK-NEXT: omp.terminator
+      omp.terminator
+    // CHECK-NEXT: }
+    }
+    // CHECK-NEXT: omp.terminator
+    omp.terminator
+  // CHECK-NEXT: }
+  }
+
+  // CHECK-NEXT: %canonloop_s1 = omp.new_cli
+  %canonloop_s1 = omp.new_cli
+  // CHECK-NEXT: %canonloop_s1_d1 = omp.new_cli
+  %canonloop_s1_d1 = omp.new_cli
+  // CHECK-NEXT:  omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%[[tc]]) {
+  omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%tc) {
+    // CHECK-NEXT:  omp.canonical_loop(%canonloop_s1_d1) %iv_s1_d1 : i32 in range(%[[tc]]) {
+    omp.canonical_loop(%canonloop_s1_d1) %iv_s1d1 : i32 in range(%tc) {
+      // CHECK-NEXT: omp.terminator
+      omp.terminator
+    // CHECK-NEXT: }
+    }
+    // CHECK-NEXT: omp.terminator
+    omp.terminator
+  }
+
+  return
+}
+
+
 // CHECK-LABEL: @omp_newcli_unused(
 // CHECK-SAME: )
 func.func @omp_newcli_unused() -> () {
diff --git a/mlir/test/Dialect/OpenMP/cli-tile.mlir b/mlir/test/Dialect/OpenMP/cli-tile.mlir
new file mode 100644
index 0000000000000..73d54784c52b7
--- /dev/null
+++ b/mlir/test/Dialect/OpenMP/cli-tile.mlir
@@ -0,0 +1,138 @@
+// RUN: mlir-opt %s            | FileCheck %s --enable-var-scope
+// RUN: mlir-opt %s | mlir-opt | FileCheck %s --enable-var-scope
+
+
+// Raw syntax check (MLIR output is always pretty-printed)
+// CHECK-LABEL: @omp_tile_raw(
+// CHECK-SAME: %[[tc:.+]]: i32, %[[ts:.+]]: i32) {
+func.func @omp_tile_raw(%tc : i32, %ts : i32) -> () {
+  // CHECK-NEXT: %canonloop = omp.new_cli
+  %canonloop = "omp.new_cli" () : () -> (!omp.cli)
+  // CHECK-NEXT: %grid1 = omp.new_cli
+  %grid = "omp.new_cli" () : () -> (!omp.cli)
+  // CHECK-NEXT: %intratile1 = omp.new_cli
+  %intratile = "omp.new_cli" () : () -> (!omp.cli)
+  // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
+  "omp.canonical_loop" (%tc, %canonloop) ({
+    ^bb0(%iv: i32):
+      // CHECK: omp.terminator
+      omp.terminator
+  }) : (i32, !omp.cli) -> ()
+  // CHECK: omp.tile (%grid1, %intratile1) <- (%canonloop) sizes(%[[ts]] : i32)
+  "omp.tile"(%grid,  %intratile, %canonloop, %ts) <{operandSegmentSizes = array<i32: 2, 1, 1>}> : (!omp.cli,  !omp.cli, !omp.cli, i32) -> ()
+  //"omp.tile" (%canonloop) : (!omp.cli) -> ()
+  return
+}
+
+
+// Pretty syntax check
+// CHECK-LABEL: @omp_tile_pretty(
+// CHECK-SAME: %[[tc:.+]]: i32, %[[ts:.+]]: i32) {
+func.func @omp_tile_pretty(%tc : i32, %ts : i32) -> () {
+  // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli
+  %canonloop = omp.new_cli
+  // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli
+  %grid = omp.new_cli
+  // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli
+  %intratile = omp.new_cli
+  // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
+  omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+  // CHECK: omp.tile (%grid1, %intratile1) <- (%canonloop) sizes(%[[ts]] : i32)
+  omp.tile(%grid, %intratile) <- (%canonloop) sizes(%ts : i32)
+  return
+}
+
+
+// Specifying the generatees for omp.tile is optional
+// CHECK-LABEL: @omp_tile_optionalgen_pretty(
+// CHECK-SAME: %[[tc:.+]]: i32, %[[ts:.+]]: i32) {
+func.func @omp_tile_optionalgen_pretty(%tc : i32, %ts : i32) -> () {
+  // CHECK-NEXT: %canonloop = omp.new_cli
+  %canonloop = omp.new_cli
+  // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
+  omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+  // CHECK: omp.tile <- (%canonloop) sizes(%[[ts]] : i32)
+  omp.tile <- (%canonloop) sizes(%ts : i32)
+  return
+}
+
+
+// Two-dimensional tiling
+// CHECK-LABEL: @omp_tile_2d_pretty(
+// CHECK-SAME: %[[tc1:.+]]: i32, %[[tc2:.+]]: i32, %[[ts1:.+]]: i32, %[[ts2:.+]]: i32) {
+func.func @omp_tile_2d_pretty(%tc1 : i32, %tc2 : i32, %ts1 : i32, %ts2 : i32) -> () {
+  // CHECK-NEXT: %canonloop = omp.new_cli
+  %cli_outer = omp.new_cli
+  // CHECK-NEXT: %canonloop_d1 = omp.new_cli
+  %cli_inner = omp.new_cli
+  // CHECK-NEXT: %grid1 = omp.new_cli
+  %grid1 = omp.new_cli
+  // CHECK-NEXT: %grid2 = omp.new_cli
+  %grid2 = omp.new_cli
+  // CHECK-NEXT: %intratile1 = omp.new_cli
+  %intratile1 = omp.new_cli
+  // CHECK-NEXT: %intratile2 = omp.new_cli
+  %intratile2 = omp.new_cli
+  // CHECK-NEXT:  omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc1]]) {
+  omp.canonical_loop(%cli_outer) %iv_outer : i32 in range(%tc1) {
+    // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc2]]) {
+    omp.canonical_loop(%cli_inner) %iv_inner : i32 in range(%tc2) {
+      // CHECK: omp.terminator
+      omp.terminator
+    }
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+  // CHECK:  omp.tile (%grid1, %grid2, %intratile1, %intratile2) <- (%canonloop, %canonloop_d1) sizes(%[[ts1]], %[[ts2]] : i32, i32)
+  omp.tile (%grid1, %grid2, %intratile1, %intratile2) <- (%cli_outer, %cli_inner) sizes(%ts1, %ts2 : i32, i32)
+  return
+}
+
+
+// Three-dimensional tiling
+// CHECK-LABEL: @omp_tile_3d_pretty(
+// CHECK-SAME: %[[tc:.+]]: i32, %[[ts:.+]]: i32) {
+func.func @omp_tile_3d_pretty(%tc : i32, %ts : i32) -> () {
+  // CHECK-NEXT: %canonloop = omp.new_cli
+  %cli_outer = omp.new_cli
+  // CHECK-NEXT: %canonloop_d1 = omp.new_cli
+  %cli_middle = omp.new_cli
+  // CHECK-NEXT: %canonloop_d2 = omp.new_cli
+  %cli_inner = omp.new_cli
+  // CHECK-NEXT: %grid1 = omp.new_cli
+  %grid1 = omp.new_cli
+  // CHECK-NEXT: %grid2 = omp.new_cli
+  %grid2 = omp.new_cli
+  // CHECK-NEXT: %grid3 = omp.new_cli
+  %grid3 = omp.new_cli
+  // CHECK-NEXT: %intratile1 = omp.new_cli
+  %intratile1 = omp.new_cli
+  // CHECK-NEXT: %intratile2 = omp.new_cli
+  %intratile2 = omp.new_cli
+  // CHECK-NEXT: %intratile3 = omp.new_cli
+  %intratile3 = omp.new_cli
+  // CHECK-NEXT:  omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
+  omp.canonical_loop(%cli_outer) %iv_outer : i32 in range(%tc) {
+    // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc]]) {
+    omp.canonical_loop(%cli_middle) %iv_middle : i32 in range(%tc) {
+    // CHECK-NEXT: omp.canonical_loop(%canonloop_d2) %iv_d2 : i32 in range(%[[tc]]) {
+      omp.canonical_loop(%cli_inner) %iv_inner : i32 in range(%tc) {
+        // CHECK: omp.terminator
+        omp.terminator
+      }
+      // CHECK: omp.terminator
+      omp.terminator
+    }
+    // CHECK: omp.terminator
+    omp.terminator
+  }
+  // CHECK:  omp.tile (%grid1, %grid2, %grid3, %intratile1, %intratile2, %intratile3) <- (%canonloop, %canonloop_d1, %canonloop_d2) sizes(%[[ts]], %[[ts]], %[[ts]] : i32, i32, i32)
+  omp.tile (%grid1, %grid2, %grid3, %intratile1, %intratile2, %intratile3) <- (%cli_outer, %cli_middle, %cli_inner) sizes(%ts, %ts, %ts: i32, i32, i32)
+  return
+}
diff --git a/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir b/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir
index cda7d0b500166..16884f4245e76 100644
--- a/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir
+++ b/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir
@@ -1,18 +1,18 @@
-// RUN: mlir-opt %s            | FileCheck %s
-// RUN: mlir-opt %s | mlir-opt | FileCheck %s
+// RUN: mlir-opt %s            | FileCheck %s --enable-var-scope
+// RUN: mlir-opt %s | mlir-opt | FileCheck %s --enable-var-scope
 
 
 // CHECK-LABEL: @omp_unroll_heuristic_raw(
 // CHECK-SAME: %[[tc:.+]]: i32) {
 func.func @omp_unroll_heuristic_raw(%tc : i32) -> () {
-  // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+  // CHECK-NEXT: %canonloop = omp.new_cli
   %canonloop = "omp.new_cli" () : () -> (!omp.cli)
-  // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
+  // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
   "omp.canonical_loop" (%tc, %canonloop) ({
     ^bb0(%iv: i32):
       omp.terminator
   }) : (i32, !omp.cli) -> ()
-  // CHECK: omp.unroll_heuristic(%canonloop_s0)
+  // CHECK: omp.unroll_heuristic(%canonloop)
   "omp.unroll_heuristic" (%canonloop) : (!omp.cli) -> ()
   return
 }
@@ -22,12 +22,12 @@ func.func @omp_unroll_heuristic_raw(%tc : i32) -> () {
 // CHECK-SAME: %[[tc:.+]]: i32) {
 func.func @omp_unroll_heuristic_pretty(%tc : i32) -> () {
   // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli
-  %canonloop = "omp.new_cli" () : () -> (!omp.cli)
-  // CHECK-NEXT:  omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
+  %canonloop = omp.new_cli
+  // CHECK-NEXT:  omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
   omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
     omp.terminator
   }
-  // CHECK: omp.unroll_heuristic(%canonloop_s0)
+  // CHECK: omp.unroll_heuristic(%canonloop)
   omp.unroll_heuristic(%canonloop)
   return
 }
@@ -36,13 +36,13 @@ func.func @omp_unroll_heuristic_pretty(%tc : i32) -> () {
 // CHECK-LABEL: @omp_unroll_heuristic_nested_pretty(
 // CHECK-SAME: %[[tc:.+]]: i32) {
 func.func @omp_unroll_heuristic_nested_pretty(%tc : i32) -> () {
-  // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+  // CHECK-NEXT: %canonloop = omp.new_cli
   %cli_outer = omp.new_cli
-  // CHECK-NEXT: %canonloop_s0_s0 = omp.new_cli
+  // CHECK-NEXT: %canonloop_d1 = omp.new_cli
   %cli_inner = omp.new_cli
-  // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
+  // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
   omp.canonical_loop(%cli_outer) %iv_outer : i32 in range(%tc) {
-    // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%[[tc]]) {
+    // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc]]) {
     omp.canonical_loop(%cli_inner) %iv_inner : i32 in range(%tc) {
       // CHECK: omp.terminator
       omp.terminator
@@ -51,9 +51,9 @@ func.func @omp_unroll_heuristic_nested_pretty(%tc : i32) -> () {
     omp.terminator
   }
 
-  // CHECK: omp.unroll_heuristic(%canonloop_s0)
+  // CHECK: omp.unroll_heuristic(%canonloop)
   omp.unroll_heuristic(%cli_outer)
-  // CHECK-NEXT: omp.unroll_heuristic(%canonloop_s0_s0)
+  // CHECK-NEXT: omp.unroll_heuristic(%canonloop_d1)
   omp.unroll_heuristic(%cli_inner)
   return
 }
diff --git a/mlir/test/Dialect/OpenMP/invalid-tile.mlir b/mlir/test/Dialect/OpenMP/invalid-tile.mlir
new file mode 100644
index 0000000000000..e63a062d810ed
--- /dev/null
+++ b/mlir/test/Dialect/OpenMP/invalid-tile.mlir
@@ -0,0 +1,119 @@
+// RUN: mlir-opt -split-input-file -verify-diagnostics %s
+
+
+func.func @missing_sizes(%tc : i32, %ts : i32) {
+  %canonloop = omp.new_cli
+  omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
+    omp.terminator
+  }
+
+  // expected-error at +1 {{'omp.tile' op there must be one tile size for each applyee}}
+  omp.tile <-(%canonloop)
+
+  llvm.return
+}
+
+// -----
+
+func.func @no_loop(%tc : i32, %ts : i32) {
+  // expected-error at +1 {{'omp.tile' op must apply to at least one loop}}
+  omp.tile <-()
+
+  return
+}
+
+// -----
+
+func.func @missing_generator(%tc : i32, %ts : i32) {
+  // expected-error at +1 {{'omp.new_cli' op CLI has no generator}}
+  %canonloop = omp.new_cli
+
+  // expected-note at +1 {{see consumer here: "omp.tile"(%0, %arg1) <{operandSegmentSizes = array<i32: 0, 1, 1>}> : (!omp.cli, i32) -> ()}}
+  omp.tile <-(%canonloop) sizes(%ts : i32)
+
+  return
+}
+
+// -----
+
+func.func @insufficient_sizes(%tc : i32, %ts : i32) {
+  %canonloop1 = omp.new_cli
+  %canonloop2 = omp.new_cli
+  omp.canonical_loop(%canonloop1) %iv : i32 in range(%tc) {
+    omp.terminator
+  }
+  omp.canonical_loop(%canonloop2) %iv : i32 in range(%tc) {
+    omp.terminator
+  }
+
+  // expected-error at +1 {{'omp.tile' op there must be one tile size for each applyee}}
+  omp.tile <-(%canonloop1, %canonloop2) sizes(%ts : i32)
+
+  llvm.return
+}
+
+// -----
+
+func.func @insufficient_applyees(%tc : i32, %ts : i32) {
+  %canonloop = omp.new_cli
+  omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
+    omp.terminator
+  }
+
+  // expected-error at +1 {{omp.tile' op there must be one tile size for each applyee}}
+  omp.tile <- (%canonloop) sizes(%ts, %ts : i32, i32)
+
+  return
+}
+
+// -----
+
+func.func @insufficient_generatees(%tc : i32, %ts : i32) {
+  %canonloop = omp.new_cli
+  %grid = omp.new_cli
+  omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
+    omp.terminator
+  }
+
+  // expected-error at +1 {{'omp.tile' op expecting two times the number of generatees than applyees}}
+  omp.tile (%grid) <- (%canonloop) sizes(%ts : i32)
+
+  return
+}
+
+// -----
+
+func.func @not_perfectly_nested(%tc : i32, %ts : i32) {
+  %canonloop1 = omp.new_cli
+  %canonloop2 = omp.new_cli
+  omp.canonical_loop(%canonloop1) %iv1 : i32 in range(%tc) {
+    %v = arith.constant 42 : i32
+    omp.canonical_loop(%canonloop2) %iv2 : i32 in range(%tc) {
+      omp.terminator
+    }
+    omp.terminator
+  }
+
+  // expected-error at +1 {{'omp.tile' op tiled loop nest must be perfectly nested}}
+  omp.tile <-(%canonloop1, %canonloop2) sizes(%ts, %ts : i32, i32)
+
+  llvm.return
+}
+
+// -----
+
+func.func @non_nectangular(%tc : i32, %ts : i32) {
+  %canonloop1 = omp.new_cli
+  %canonloop2 = omp.new_cli
+  omp.canonical_loop(%canonloop1) %iv1 : i32 in range(%tc) {
+    omp.canonical_loop(%canonloop2) %iv2 : i32 in range(%iv1) {
+      omp.terminator
+    }
+    omp.terminator
+  }
+
+  // expected-error at +1 {{'omp.tile' op tiled loop nest must be rectangular}}
+  omp.tile <-(%canonloop1, %canonloop2) sizes(%ts, %ts : i32, i32)
+
+  llvm.return
+}
diff --git a/mlir/test/Target/LLVMIR/openmp-cli-tile01.mlir b/mlir/test/Target/LLVMIR/openmp-cli-tile01.mlir
new file mode 100644
index 0000000000000..4ac4f02103e8c
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-cli-tile01.mlir
@@ -0,0 +1,101 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+
+
+llvm.func @tile_trivial_loop(%baseptr: !llvm.ptr, %tc: i32, %ts: i32) -> () {
+  %literal_cli = omp.new_cli
+  omp.canonical_loop(%literal_cli) %iv : i32 in range(%tc) {
+    %ptr = llvm.getelementptr inbounds %baseptr[%iv] : (!llvm.ptr, i32) -> !llvm.ptr, f32
+    %val = llvm.mlir.constant(42.0 : f32) : f32
+    llvm.store %val, %ptr : f32, !llvm.ptr
+    omp.terminator
+  }
+  omp.tile <- (%literal_cli) sizes(%ts : i32)
+  llvm.return
+}
+
+
+// CHECK: ; ModuleID = 'LLVMDialectModule'
+// CHECK-NEXT: source_filename = "LLVMDialectModule"
+// CHECK-EMPTY:
+// CHECK-NEXT: define void @tile_trivial_loop(ptr %0, i32 %1, i32 %2) {
+// CHECK-NEXT:   br label %omp_omp.loop.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.preheader:                           ; preds = %3
+// CHECK-NEXT:   %4 = udiv i32 %1, %2
+// CHECK-NEXT:   %5 = urem i32 %1, %2
+// CHECK-NEXT:   %6 = icmp ne i32 %5, 0
+// CHECK-NEXT:   %7 = zext i1 %6 to i32
+// CHECK-NEXT:   %omp_floor0.tripcount = add nuw i32 %4, %7
+// CHECK-NEXT:   br label %omp_floor0.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.preheader:                             ; preds = %omp_omp.loop.preheader
+// CHECK-NEXT:   br label %omp_floor0.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.header:                                ; preds = %omp_floor0.inc, %omp_floor0.preheader
+// CHECK-NEXT:   %omp_floor0.iv = phi i32 [ 0, %omp_floor0.preheader ], [ %omp_floor0.next, %omp_floor0.inc ]
+// CHECK-NEXT:   br label %omp_floor0.cond
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.cond:                                  ; preds = %omp_floor0.header
+// CHECK-NEXT:   %omp_floor0.cmp = icmp ult i32 %omp_floor0.iv, %omp_floor0.tripcount
+// CHECK-NEXT:   br i1 %omp_floor0.cmp, label %omp_floor0.body, label %omp_floor0.exit
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.body:                                  ; preds = %omp_floor0.cond
+// CHECK-NEXT:   %8 = icmp eq i32 %omp_floor0.iv, %4
+// CHECK-NEXT:   %9 = select i1 %8, i32 %5, i32 %2
+// CHECK-NEXT:   br label %omp_tile0.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.preheader:                              ; preds = %omp_floor0.body
+// CHECK-NEXT:   br label %omp_tile0.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.header:                                 ; preds = %omp_tile0.inc, %omp_tile0.preheader
+// CHECK-NEXT:   %omp_tile0.iv = phi i32 [ 0, %omp_tile0.preheader ], [ %omp_tile0.next, %omp_tile0.inc ]
+// CHECK-NEXT:   br label %omp_tile0.cond
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.cond:                                   ; preds = %omp_tile0.header
+// CHECK-NEXT:   %omp_tile0.cmp = icmp ult i32 %omp_tile0.iv, %9
+// CHECK-NEXT:   br i1 %omp_tile0.cmp, label %omp_tile0.body, label %omp_tile0.exit
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.body:                                   ; preds = %omp_tile0.cond
+// CHECK-NEXT:   %10 = mul nuw i32 %2, %omp_floor0.iv
+// CHECK-NEXT:   %11 = add nuw i32 %10, %omp_tile0.iv
+// CHECK-NEXT:   br label %omp_omp.loop.body
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.body:                                ; preds = %omp_tile0.body
+// CHECK-NEXT:   br label %omp.loop.region
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.loop.region:                                  ; preds = %omp_omp.loop.body
+// CHECK-NEXT:   %12 = getelementptr inbounds float, ptr %0, i32 %11
+// CHECK-NEXT:   store float 4.200000e+01, ptr %12, align 4
+// CHECK-NEXT:   br label %omp.region.cont
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.region.cont:                                  ; preds = %omp.loop.region
+// CHECK-NEXT:   br label %omp_tile0.inc
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.inc:                                    ; preds = %omp.region.cont
+// CHECK-NEXT:   %omp_tile0.next = add nuw i32 %omp_tile0.iv, 1
+// CHECK-NEXT:   br label %omp_tile0.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.exit:                                   ; preds = %omp_tile0.cond
+// CHECK-NEXT:   br label %omp_tile0.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.after:                                  ; preds = %omp_tile0.exit
+// CHECK-NEXT:   br label %omp_floor0.inc
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.inc:                                   ; preds = %omp_tile0.after
+// CHECK-NEXT:   %omp_floor0.next = add nuw i32 %omp_floor0.iv, 1
+// CHECK-NEXT:   br label %omp_floor0.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.exit:                                  ; preds = %omp_floor0.cond
+// CHECK-NEXT:   br label %omp_floor0.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.after:                                 ; preds = %omp_floor0.exit
+// CHECK-NEXT:   br label %omp_omp.loop.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.after:                               ; preds = %omp_floor0.after
+// CHECK-NEXT:   ret void
+// CHECK-NEXT: }
+// CHECK-EMPTY:
+// CHECK-NEXT: !llvm.module.flags = !{!0}
+// CHECK-EMPTY:
+// CHECK-NEXT: !0 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/mlir/test/Target/LLVMIR/openmp-cli-tile02.mlir b/mlir/test/Target/LLVMIR/openmp-cli-tile02.mlir
new file mode 100644
index 0000000000000..6fad81cd0c299
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-cli-tile02.mlir
@@ -0,0 +1,190 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+
+llvm.func @tile_2d_loop(%baseptr: !llvm.ptr, %tc1: i32, %tc2: i32, %ts1: i32, %ts2: i32) -> () {
+  %literal_outer = omp.new_cli
+  %literal_inner = omp.new_cli
+  omp.canonical_loop(%literal_outer) %iv1 : i32 in range(%tc1) {
+    omp.canonical_loop(%literal_inner) %iv2 : i32 in range(%tc2) {
+      %idx = llvm.add %iv1, %iv2 : i32
+      %ptr = llvm.getelementptr inbounds %baseptr[%idx] : (!llvm.ptr, i32) -> !llvm.ptr, f32
+      %val = llvm.mlir.constant(42.0 : f32) : f32
+      llvm.store %val, %ptr : f32, !llvm.ptr
+      omp.terminator
+    }
+    omp.terminator
+  }
+  omp.tile <- (%literal_outer, %literal_inner) sizes(%ts1, %ts2 : i32,i32)
+  llvm.return
+}
+
+
+// CHECK: ; ModuleID = 'LLVMDialectModule'
+// CHECK-NEXT: source_filename = "LLVMDialectModule"
+// CHECK-EMPTY:
+// CHECK-NEXT: define void @tile_2d_loop(ptr %0, i32 %1, i32 %2, i32 %3, i32 %4) {
+// CHECK-NEXT:   br label %omp_omp.loop.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.preheader:                           ; preds = %5
+// CHECK-NEXT:   %6 = udiv i32 %1, %3
+// CHECK-NEXT:   %7 = urem i32 %1, %3
+// CHECK-NEXT:   %8 = icmp ne i32 %7, 0
+// CHECK-NEXT:   %9 = zext i1 %8 to i32
+// CHECK-NEXT:   %omp_floor0.tripcount = add nuw i32 %6, %9
+// CHECK-NEXT:   %10 = udiv i32 %2, %4
+// CHECK-NEXT:   %11 = urem i32 %2, %4
+// CHECK-NEXT:   %12 = icmp ne i32 %11, 0
+// CHECK-NEXT:   %13 = zext i1 %12 to i32
+// CHECK-NEXT:   %omp_floor1.tripcount = add nuw i32 %10, %13
+// CHECK-NEXT:   br label %omp_floor0.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.header:                              ; preds = %omp_omp.loop.inc
+// CHECK-NEXT:   %omp_omp.loop.iv = phi i32 [ %omp_omp.loop.next, %omp_omp.loop.inc ]
+// CHECK-NEXT:   br label %omp_omp.loop.cond
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.cond:                                ; preds = %omp_omp.loop.header
+// CHECK-NEXT:   %omp_omp.loop.cmp = icmp ult i32 %19, %1
+// CHECK-NEXT:   br i1 %omp_omp.loop.cmp, label %omp_omp.loop.body, label %omp_omp.loop.exit
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.body:                                ; preds = %omp_tile1.body, %omp_omp.loop.cond
+// CHECK-NEXT:   br label %omp.loop.region
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.loop.region:                                  ; preds = %omp_omp.loop.body
+// CHECK-NEXT:   br label %omp_omp.loop.preheader1
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.preheader1:                          ; preds = %omp.loop.region
+// CHECK-NEXT:   br label %omp_omp.loop.body4
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.preheader:                             ; preds = %omp_omp.loop.preheader
+// CHECK-NEXT:   br label %omp_floor0.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.header:                                ; preds = %omp_floor0.inc, %omp_floor0.preheader
+// CHECK-NEXT:   %omp_floor0.iv = phi i32 [ 0, %omp_floor0.preheader ], [ %omp_floor0.next, %omp_floor0.inc ]
+// CHECK-NEXT:   br label %omp_floor0.cond
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.cond:                                  ; preds = %omp_floor0.header
+// CHECK-NEXT:   %omp_floor0.cmp = icmp ult i32 %omp_floor0.iv, %omp_floor0.tripcount
+// CHECK-NEXT:   br i1 %omp_floor0.cmp, label %omp_floor0.body, label %omp_floor0.exit
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.body:                                  ; preds = %omp_floor0.cond
+// CHECK-NEXT:   br label %omp_floor1.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor1.preheader:                             ; preds = %omp_floor0.body
+// CHECK-NEXT:   br label %omp_floor1.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor1.header:                                ; preds = %omp_floor1.inc, %omp_floor1.preheader
+// CHECK-NEXT:   %omp_floor1.iv = phi i32 [ 0, %omp_floor1.preheader ], [ %omp_floor1.next, %omp_floor1.inc ]
+// CHECK-NEXT:   br label %omp_floor1.cond
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor1.cond:                                  ; preds = %omp_floor1.header
+// CHECK-NEXT:   %omp_floor1.cmp = icmp ult i32 %omp_floor1.iv, %omp_floor1.tripcount
+// CHECK-NEXT:   br i1 %omp_floor1.cmp, label %omp_floor1.body, label %omp_floor1.exit
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor1.body:                                  ; preds = %omp_floor1.cond
+// CHECK-NEXT:   %14 = icmp eq i32 %omp_floor0.iv, %6
+// CHECK-NEXT:   %15 = select i1 %14, i32 %7, i32 %3
+// CHECK-NEXT:   %16 = icmp eq i32 %omp_floor1.iv, %10
+// CHECK-NEXT:   %17 = select i1 %16, i32 %11, i32 %4
+// CHECK-NEXT:   br label %omp_tile0.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.preheader:                              ; preds = %omp_floor1.body
+// CHECK-NEXT:   br label %omp_tile0.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.header:                                 ; preds = %omp_tile0.inc, %omp_tile0.preheader
+// CHECK-NEXT:   %omp_tile0.iv = phi i32 [ 0, %omp_tile0.preheader ], [ %omp_tile0.next, %omp_tile0.inc ]
+// CHECK-NEXT:   br label %omp_tile0.cond
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.cond:                                   ; preds = %omp_tile0.header
+// CHECK-NEXT:   %omp_tile0.cmp = icmp ult i32 %omp_tile0.iv, %15
+// CHECK-NEXT:   br i1 %omp_tile0.cmp, label %omp_tile0.body, label %omp_tile0.exit
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.body:                                   ; preds = %omp_tile0.cond
+// CHECK-NEXT:   br label %omp_tile1.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile1.preheader:                              ; preds = %omp_tile0.body
+// CHECK-NEXT:   br label %omp_tile1.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile1.header:                                 ; preds = %omp_tile1.inc, %omp_tile1.preheader
+// CHECK-NEXT:   %omp_tile1.iv = phi i32 [ 0, %omp_tile1.preheader ], [ %omp_tile1.next, %omp_tile1.inc ]
+// CHECK-NEXT:   br label %omp_tile1.cond
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile1.cond:                                   ; preds = %omp_tile1.header
+// CHECK-NEXT:   %omp_tile1.cmp = icmp ult i32 %omp_tile1.iv, %17
+// CHECK-NEXT:   br i1 %omp_tile1.cmp, label %omp_tile1.body, label %omp_tile1.exit
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile1.body:                                   ; preds = %omp_tile1.cond
+// CHECK-NEXT:   %18 = mul nuw i32 %3, %omp_floor0.iv
+// CHECK-NEXT:   %19 = add nuw i32 %18, %omp_tile0.iv
+// CHECK-NEXT:   %20 = mul nuw i32 %4, %omp_floor1.iv
+// CHECK-NEXT:   %21 = add nuw i32 %20, %omp_tile1.iv
+// CHECK-NEXT:   br label %omp_omp.loop.body
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.body4:                               ; preds = %omp_omp.loop.preheader1
+// CHECK-NEXT:   br label %omp.loop.region12
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.loop.region12:                                ; preds = %omp_omp.loop.body4
+// CHECK-NEXT:   %22 = add i32 %19, %21
+// CHECK-NEXT:   %23 = getelementptr inbounds float, ptr %0, i32 %22
+// CHECK-NEXT:   store float 4.200000e+01, ptr %23, align 4
+// CHECK-NEXT:   br label %omp.region.cont11
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.region.cont11:                                ; preds = %omp.loop.region12
+// CHECK-NEXT:   br label %omp_tile1.inc
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile1.inc:                                    ; preds = %omp.region.cont11
+// CHECK-NEXT:   %omp_tile1.next = add nuw i32 %omp_tile1.iv, 1
+// CHECK-NEXT:   br label %omp_tile1.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile1.exit:                                   ; preds = %omp_tile1.cond
+// CHECK-NEXT:   br label %omp_tile1.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile1.after:                                  ; preds = %omp_tile1.exit
+// CHECK-NEXT:   br label %omp_tile0.inc
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.inc:                                    ; preds = %omp_tile1.after
+// CHECK-NEXT:   %omp_tile0.next = add nuw i32 %omp_tile0.iv, 1
+// CHECK-NEXT:   br label %omp_tile0.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.exit:                                   ; preds = %omp_tile0.cond
+// CHECK-NEXT:   br label %omp_tile0.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.after:                                  ; preds = %omp_tile0.exit
+// CHECK-NEXT:   br label %omp_floor1.inc
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor1.inc:                                   ; preds = %omp_tile0.after
+// CHECK-NEXT:   %omp_floor1.next = add nuw i32 %omp_floor1.iv, 1
+// CHECK-NEXT:   br label %omp_floor1.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor1.exit:                                  ; preds = %omp_floor1.cond
+// CHECK-NEXT:   br label %omp_floor1.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor1.after:                                 ; preds = %omp_floor1.exit
+// CHECK-NEXT:   br label %omp_floor0.inc
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.inc:                                   ; preds = %omp_floor1.after
+// CHECK-NEXT:   %omp_floor0.next = add nuw i32 %omp_floor0.iv, 1
+// CHECK-NEXT:   br label %omp_floor0.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.exit:                                  ; preds = %omp_floor0.cond
+// CHECK-NEXT:   br label %omp_floor0.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.after:                                 ; preds = %omp_floor0.exit
+// CHECK-NEXT:   br label %omp_omp.loop.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.region.cont:                                  ; No predecessors!
+// CHECK-NEXT:   br label %omp_omp.loop.inc
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.inc:                                 ; preds = %omp.region.cont
+// CHECK-NEXT:   %omp_omp.loop.next = add nuw i32 %19, 1
+// CHECK-NEXT:   br label %omp_omp.loop.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.exit:                                ; preds = %omp_omp.loop.cond
+// CHECK-NEXT:   br label %omp_omp.loop.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.after:                               ; preds = %omp_floor0.after, %omp_omp.loop.exit
+// CHECK-NEXT:   ret void
+// CHECK-NEXT: }
+// CHECK-EMPTY:
+// CHECK-NEXT: !llvm.module.flags = !{!0}
+// CHECK-EMPTY:
+// CHECK-NEXT: !0 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/mlir/test/mlir-tblgen/op-format-invalid.td b/mlir/test/mlir-tblgen/op-format-invalid.td
index 2f29543f67381..0a022ad43a749 100644
--- a/mlir/test/mlir-tblgen/op-format-invalid.td
+++ b/mlir/test/mlir-tblgen/op-format-invalid.td
@@ -307,7 +307,7 @@ def DirectiveTypeZOperandInvalidI : TestFormat_Op<[{
 def LiteralInvalidA : TestFormat_Op<[{
   `a:`
 }]>;
-// CHECK: error: expected valid literal but got '1': single character literal must be a letter or one of '_:,=<>()[]{}?+*'
+// CHECK: error: expected valid literal but got '1': single character literal must be a letter or one of '_:,=<>()[]{}?+-*'
 def LiteralInvalidB : TestFormat_Op<[{
   `1`
 }]>;
diff --git a/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp b/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp
index a1899a81afcce..8dd971374fa21 100644
--- a/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp
+++ b/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp
@@ -403,6 +403,7 @@ void DefFormat::genLiteralParser(StringRef value, FmtContext &ctx,
               .Case("]", "RSquare")
               .Case("?", "Question")
               .Case("+", "Plus")
+              .Case("-", "Minus")
               .Case("*", "Star")
               .Case("...", "Ellipsis")
        << "()";
diff --git a/mlir/tools/mlir-tblgen/FormatGen.cpp b/mlir/tools/mlir-tblgen/FormatGen.cpp
index 4dfdde2146679..04d3ed1f3b70d 100644
--- a/mlir/tools/mlir-tblgen/FormatGen.cpp
+++ b/mlir/tools/mlir-tblgen/FormatGen.cpp
@@ -518,7 +518,7 @@ bool mlir::tblgen::isValidLiteral(StringRef value,
   // If there is only one character, this must either be punctuation or a
   // single character bare identifier.
   if (value.size() == 1) {
-    StringRef bare = "_:,=<>()[]{}?+*";
+    StringRef bare = "_:,=<>()[]{}?+-*";
     if (isalpha(front) || bare.contains(front))
       return true;
     if (emitError)
diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp
index 0d113b3748354..ccf21d16005af 100644
--- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp
@@ -852,6 +852,7 @@ static void genLiteralParser(StringRef value, MethodBody &body) {
               .Case("]", "RSquare()")
               .Case("?", "Question()")
               .Case("+", "Plus()")
+              .Case("-", "Minus()")
               .Case("*", "Star()")
               .Case("...", "Ellipsis()");
 }

>From 493442453cbac2366aa89abde361f7badaad4948 Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Tue, 23 Sep 2025 16:39:44 +0200
Subject: [PATCH 5/8] Fix symbol resolution

---
 flang/lib/Semantics/resolve-directives.cpp | 48 ++++++++++------------
 1 file changed, 22 insertions(+), 26 deletions(-)

diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index 5f2c9f676099c..a0109324e546c 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -1975,7 +1975,10 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) {
       }
     }
   }
+
+  // Must be done before iv privatization
   CheckPerfectNestAndRectangularLoop(x);
+
   PrivatizeAssociatedLoopIndexAndCheckLoopLevel(x);
   ordCollapseLevel = GetNumAffectedLoopsFromLoopConstruct(x) + 1;
   return true;
@@ -2172,37 +2175,29 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses(
 }
 
 void OmpAttributeVisitor::CheckPerfectNestAndRectangularLoop(
-    const parser::OpenMPLoopConstruct
-        &x) { // GetAssociatedLoopLevelFromClauses(clauseList);
-  auto &&dirContext = GetContext();
+    const parser::OpenMPLoopConstruct &x) {
+  auto &dirContext = GetContext();
   std::int64_t dirDepth{dirContext.associatedLoopLevel};
   if (dirDepth <= 0)
     return;
 
-  Symbol::Flag ivDSA;
-  if (!llvm::omp::allSimdSet.test(GetContext().directive)) {
-    ivDSA = Symbol::Flag::OmpPrivate;
-  } else if (dirDepth == 1) {
-    ivDSA = Symbol::Flag::OmpLinear;
-  } else {
-    ivDSA = Symbol::Flag::OmpLastPrivate;
-  }
-
   auto checkExprHasSymbols = [&](llvm::SmallVector<Symbol *> &ivs,
                                  const parser::ScalarExpr *bound) {
     if (ivs.empty())
       return;
-
-    if (auto boundExpr{semantics::AnalyzeExpr(context_, *bound)}) {
-      semantics::UnorderedSymbolSet boundSyms =
-          evaluate::CollectSymbols(*boundExpr);
-      for (auto iv : ivs) {
-        if (boundSyms.count(*iv) != 0) {
-          // TODO: Point to occurence of iv in boundExpr, directiveSource as a
-          // note
-          context_.Say(dirContext.directiveSource,
-              "Trip count must be computable and invariant"_err_en_US);
-        }
+    auto boundExpr{semantics::AnalyzeExpr(context_, *bound)};
+    if (!boundExpr)
+      return;
+    semantics::UnorderedSymbolSet boundSyms =
+        evaluate::CollectSymbols(*boundExpr);
+    if (boundSyms.empty())
+      return;
+    for (Symbol *iv : ivs) {
+      if (boundSyms.count(*iv) != 0) {
+        // TODO: Point to occurence of iv in boundExpr, directiveSource as a
+        //       note
+        context_.Say(dirContext.directiveSource,
+            "Trip count must be computable and invariant"_err_en_US);
       }
     }
   };
@@ -2217,8 +2212,9 @@ void OmpAttributeVisitor::CheckPerfectNestAndRectangularLoop(
             std::get_if<common::Indirection<parser::OpenMPLoopConstruct>>(
                 innerMostNest)}) {
       innerMostLoop = &(innerLoop->value());
-    } else
+    } else {
       break;
+    }
   }
 
   if (!innerMostNest)
@@ -2228,7 +2224,7 @@ void OmpAttributeVisitor::CheckPerfectNestAndRectangularLoop(
     return;
 
   llvm::SmallVector<Symbol *> ivs;
-  int curLevel = 0;
+  int curLevel{0};
   const parser::DoConstruct *loop{outer};
   while (true) {
     auto [iv, lb, ub, step] = GetLoopBounds(*loop);
@@ -2240,7 +2236,7 @@ void OmpAttributeVisitor::CheckPerfectNestAndRectangularLoop(
     if (step)
       checkExprHasSymbols(ivs, step);
     if (iv) {
-      if (auto *symbol{ResolveOmp(*iv, ivDSA, currScope())})
+      if (auto *symbol{currScope().FindSymbol(iv->source)})
         ivs.push_back(symbol);
     }
 

>From 7655a11ac10f0cca877e56a73e2948a265db00ab Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Wed, 24 Sep 2025 12:45:18 +0200
Subject: [PATCH 6/8] Avoid structured binding capture to appease compiler

---
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index d3cc7e55ae155..f681b0346f489 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -3300,6 +3300,9 @@ void NewCliOp::getAsmResultNames(OpAsmSetValueNameFn setNameFn) {
   Value result = getResult();
   auto [newCli, gen, cons] = decodeCli(result);
 
+  // Structured binding `gen` cannot be captured in lambdas before C++20
+  OpOperand *generator = gen;
+
   // Derive the CLI variable name from its generator:
   //  * "canonloop" for omp.canonical_loop
   //  * custom name for loop transformation generatees
@@ -3324,7 +3327,7 @@ void NewCliOp::getAsmResultNames(OpAsmSetValueNameFn setNameFn) {
               unsigned firstGrid = generateesFirst;
               unsigned firstIntratile = generateesFirst + generateesCount / 2;
               unsigned end = generateesFirst + generateesCount;
-              unsigned opnum = gen->getOperandNumber();
+              unsigned opnum = generator->getOperandNumber();
               // In the OpenMP apply and looprange clauses, indices are 1-based
               if (firstGrid <= opnum && opnum < firstIntratile) {
                 unsigned gridnum = opnum - firstGrid + 1;

>From 44ea4603871557d460b1a147245bd9fc6241de95 Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Thu, 2 Oct 2025 17:45:03 +0200
Subject: [PATCH 7/8] remove merge conflict leftovers

---
 flang/test/Semantics/OpenMP/do08.f90 | 1 -
 flang/test/Semantics/OpenMP/do13.f90 | 1 -
 2 files changed, 2 deletions(-)

diff --git a/flang/test/Semantics/OpenMP/do08.f90 b/flang/test/Semantics/OpenMP/do08.f90
index bb3c1d0cd3855..5143dff0dd315 100644
--- a/flang/test/Semantics/OpenMP/do08.f90
+++ b/flang/test/Semantics/OpenMP/do08.f90
@@ -61,7 +61,6 @@ program omp
   !$omp end do
 
 
-  !ERROR: Canonical loop nest must be perfectly nested.
   !ERROR: The value of the parameter in the COLLAPSE or ORDERED clause must not be larger than the number of nested loops following the construct.
   !$omp do collapse(3)
   do 60 i=2,200,2
diff --git a/flang/test/Semantics/OpenMP/do13.f90 b/flang/test/Semantics/OpenMP/do13.f90
index 8f7844f4136f9..6e9d1dddade4c 100644
--- a/flang/test/Semantics/OpenMP/do13.f90
+++ b/flang/test/Semantics/OpenMP/do13.f90
@@ -59,7 +59,6 @@ program omp
   !$omp end do
 
 
-  !ERROR: Canonical loop nest must be perfectly nested.
   !ERROR: The value of the parameter in the COLLAPSE or ORDERED clause must not be larger than the number of nested loops following the construct.
   !$omp do collapse(3)
   do 60 i=1,10

>From bcc5cd5ac3586d72574630d39ec59a81e272a3f3 Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Thu, 2 Oct 2025 17:46:51 +0200
Subject: [PATCH 8/8] Add - token to format test

---
 mlir/test/mlir-tblgen/op-format-spec.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/test/mlir-tblgen/op-format-spec.td b/mlir/test/mlir-tblgen/op-format-spec.td
index 1541cd09f53e0..1ac231116454b 100644
--- a/mlir/test/mlir-tblgen/op-format-spec.td
+++ b/mlir/test/mlir-tblgen/op-format-spec.td
@@ -123,7 +123,7 @@ def DirectiveTypeValid : TestFormat_Op<[{
 
 // CHECK-NOT: error
 def LiteralValid : TestFormat_Op<[{
-  `_` `:` `,` `=` `<` `>` `(` `)` `[` `]` `?` `+` `*` ` ` `` `->` `\n` `abc$._`
+  `_` `:` `,` `=` `<` `>` `(` `)` `[` `]` `?` `+` `-` `*` ` ` `` `->` `\n` `abc$._`
   attr-dict
 }]>;
 



More information about the flang-commits mailing list