[Openmp-commits] [flang] [llvm] [mlir] [openmp] [Flang] Add standalone tile support (PR #160298)
Michael Kruse via Openmp-commits
openmp-commits at lists.llvm.org
Thu Oct 2 14:43:21 PDT 2025
https://github.com/Meinersbur updated https://github.com/llvm/llvm-project/pull/160298
>From b3919715ebe223b39dd789dcd471a864666d7008 Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Fri, 19 Sep 2025 14:43:48 +0200
Subject: [PATCH 01/12] Improve canonloop/iv naming
---
mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 237 +++++++++++++-----
.../Dialect/OpenMP/cli-canonical_loop.mlir | 127 ++++++++--
.../Dialect/OpenMP/cli-unroll-heuristic.mlir | 28 +--
3 files changed, 292 insertions(+), 100 deletions(-)
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 3d70e28ed23ab..cf549a6bb50b4 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -77,6 +77,178 @@ struct LLVMPointerPointerLikeModel
};
} // namespace
+/// Generate a name of a canonical loop nest of the format
+/// `<prefix>(_s<num>_r<num>)*` that describes its nesting inside parent
+/// operations (`_r<num>`) and that operation's region (`_s<num>`). The region
+/// number is omitted if the parent operation has just one region. If a loop
+/// nest just consists of canonical loops nested inside each other, also uses
+/// `d<num>` where <num> is the nesting depth of the loop.
+static std::string generateLoopNestingName(StringRef prefix,
+ CanonicalLoopOp op) {
+ struct Component {
+ // An region argument of an operation
+ Operation *parentOp;
+ size_t regionInOpIdx;
+ bool isOnlyRegionInOp;
+ bool skipRegion;
+
+ // An operation somewhere in a parent region
+ Operation *thisOp;
+ Region *parentRegion;
+ size_t opInRegionIdx;
+ bool isOnlyOpInRegion;
+ bool skipOp;
+ int depth = -1;
+ };
+ SmallVector<Component> components;
+
+ // Gather a list of parent regions and operations, and the position within
+ // their parent
+ Operation *o = op.getOperation();
+ while (o) {
+ if (o->hasTrait<mlir::OpTrait::IsIsolatedFromAbove>())
+ break;
+
+ // Operation within a region
+ Region *r = o->getParentRegion();
+ if (!r)
+ break;
+
+ llvm::ReversePostOrderTraversal<Block *> traversal(&r->getBlocks().front());
+ size_t idx = 0;
+ bool found = false;
+ size_t sequentialIdx = -1;
+ bool isOnlyLoop = true;
+ for (Block *b : traversal) {
+ for (Operation &op : *b) {
+ if (&op == o && !found) {
+ sequentialIdx = idx;
+ found = true;
+ }
+ if (op.getNumRegions()) {
+ idx += 1;
+ if (idx > 1)
+ isOnlyLoop = false;
+ }
+ if (found && !isOnlyLoop)
+ break;
+ }
+ }
+
+ Component &comp = components.emplace_back();
+ comp.thisOp = o;
+ comp.parentRegion = r;
+ comp.opInRegionIdx = sequentialIdx;
+ comp.isOnlyOpInRegion = isOnlyLoop;
+
+ // Region argument of an operation
+ Operation *parent = r->getParentOp();
+
+ comp.parentOp = parent;
+ comp.regionInOpIdx = 0;
+ comp.isOnlyRegionInOp = true;
+ if (parent && parent->getRegions().size() > 1) {
+ auto getRegionIndex = [](Operation *o, Region *r) {
+ for (auto [idx, region] : llvm::enumerate(o->getRegions())) {
+ if (®ion == r)
+ return idx;
+ }
+ llvm_unreachable("Region not child of its parent operation");
+ };
+ comp.regionInOpIdx = getRegionIndex(parent, r);
+ comp.isOnlyRegionInOp = false;
+ }
+
+ if (!parent)
+ break;
+
+ // next parent
+ o = parent;
+ }
+
+ // Reorder components from outermost to innermost
+ std::reverse(components.begin(), components.end());
+
+ // Determine whether a component is not needed
+ for (auto &c : components) {
+ c.skipRegion = c.isOnlyRegionInOp;
+ c.skipOp = c.isOnlyOpInRegion && !isa<CanonicalLoopOp>(c.thisOp);
+ }
+
+ // Find runs of perfect nests and merge them into a single component
+ int curNestRoot = 0;
+ int curNestDepth = 1;
+ auto mergeLoopNest = [&](int innermost) {
+ auto outermost = curNestRoot;
+
+ // Don't do enything if it does not consist of at least 2 loops
+ if (outermost < innermost) {
+ for (auto i : llvm::seq<int>(outermost + 1, innermost)) {
+ components[i].skipOp = true;
+ }
+ components[innermost].depth = curNestDepth;
+ }
+
+ // Start new root
+ curNestRoot = innermost + 1;
+ curNestDepth = 1;
+ };
+ for (auto &&[i, c] : llvm::enumerate(components)) {
+ if (i <= curNestRoot)
+ continue;
+
+ // Check whether this region can be included
+ if (!c.skipRegion) {
+ mergeLoopNest(i);
+ continue;
+ }
+
+ if (c.skipOp)
+ continue;
+
+ if (!c.isOnlyOpInRegion) {
+ mergeLoopNest(i);
+ continue;
+ }
+
+ curNestDepth += 1;
+ }
+
+ // Finalize innermost loop nest
+ mergeLoopNest(components.size() - 1);
+
+ // Outermost loop does not need a suffix if it has no sibling
+ for (auto &c : components) {
+ if (c.skipOp)
+ continue;
+ if (c.isOnlyOpInRegion)
+ c.skipOp = true;
+ break;
+ }
+
+ // Compile name
+ SmallString<64> Name{prefix};
+ for (auto &c : components) {
+ auto addComponent = [&Name](char letter, int64_t idx) {
+ Name += '_';
+ Name += letter;
+ Name += idx;
+ };
+
+ if (!c.skipRegion)
+ addComponent('r', c.regionInOpIdx);
+
+ if (!c.skipOp) {
+ if (c.depth >= 0)
+ addComponent('d', c.depth - 1);
+ else
+ addComponent('s', c.opInRegionIdx);
+ }
+ }
+
+ return Name.str().str();
+}
+
void OpenMPDialect::initialize() {
addOperations<
#define GET_OP_LIST
@@ -3141,67 +3313,7 @@ void NewCliOp::getAsmResultNames(OpAsmSetValueNameFn setNameFn) {
cliName =
TypeSwitch<Operation *, std::string>(gen->getOwner())
.Case([&](CanonicalLoopOp op) {
- // Find the canonical loop nesting: For each ancestor add a
- // "+_r<idx>" suffix (in reverse order)
- SmallVector<std::string> components;
- Operation *o = op.getOperation();
- while (o) {
- if (o->hasTrait<mlir::OpTrait::IsIsolatedFromAbove>())
- break;
-
- Region *r = o->getParentRegion();
- if (!r)
- break;
-
- auto getSequentialIndex = [](Region *r, Operation *o) {
- llvm::ReversePostOrderTraversal<Block *> traversal(
- &r->getBlocks().front());
- size_t idx = 0;
- for (Block *b : traversal) {
- for (Operation &op : *b) {
- if (&op == o)
- return idx;
- // Only consider operations that are containers as
- // possible children
- if (!op.getRegions().empty())
- idx += 1;
- }
- }
- llvm_unreachable("Operation not part of the region");
- };
- size_t sequentialIdx = getSequentialIndex(r, o);
- components.push_back(("s" + Twine(sequentialIdx)).str());
-
- Operation *parent = r->getParentOp();
- if (!parent)
- break;
-
- // If the operation has more than one region, also count in
- // which of the regions
- if (parent->getRegions().size() > 1) {
- auto getRegionIndex = [](Operation *o, Region *r) {
- for (auto [idx, region] :
- llvm::enumerate(o->getRegions())) {
- if (®ion == r)
- return idx;
- }
- llvm_unreachable("Region not child its parent operation");
- };
- size_t regionIdx = getRegionIndex(parent, r);
- components.push_back(("r" + Twine(regionIdx)).str());
- }
-
- // next parent
- o = parent;
- }
-
- SmallString<64> Name("canonloop");
- for (const std::string &s : reverse(components)) {
- Name += '_';
- Name += s;
- }
-
- return Name;
+ return generateLoopNestingName("canonloop", op);
})
.Case([&](UnrollHeuristicOp op) -> std::string {
llvm_unreachable("heuristic unrolling does not generate a loop");
@@ -3292,7 +3404,8 @@ void CanonicalLoopOp::getAsmBlockNames(OpAsmSetBlockNameFn setNameFn) {
void CanonicalLoopOp::getAsmBlockArgumentNames(Region ®ion,
OpAsmSetValueNameFn setNameFn) {
- setNameFn(region.getArgument(0), "iv");
+ std::string ivName = generateLoopNestingName("iv", *this);
+ setNameFn(region.getArgument(0), ivName);
}
void CanonicalLoopOp::print(OpAsmPrinter &p) {
diff --git a/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir b/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir
index adadb8bbac49d..874e3922805ec 100644
--- a/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir
+++ b/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir
@@ -1,5 +1,5 @@
-// RUN: mlir-opt %s | FileCheck %s
-// RUN: mlir-opt %s | mlir-opt | FileCheck %s
+// RUN: mlir-opt %s | FileCheck %s --enable-var-scope
+// RUN: mlir-opt %s | mlir-opt | FileCheck %s --enable-var-scope
// CHECK-LABEL: @omp_canonloop_raw(
@@ -24,10 +24,10 @@ func.func @omp_canonloop_raw(%tc : i32) -> () {
func.func @omp_canonloop_sequential_raw(%tc : i32) -> () {
// CHECK-NEXT: %canonloop_s0 = omp.new_cli
%canonloop_s0 = "omp.new_cli" () : () -> (!omp.cli)
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%[[tc]]) {
"omp.canonical_loop" (%tc, %canonloop_s0) ({
^bb_first(%iv_first: i32):
- // CHECK-NEXT: = llvm.add %iv, %iv : i32
+ // CHECK-NEXT: = llvm.add %iv_s0, %iv_s0 : i32
%newval = llvm.add %iv_first, %iv_first : i32
// CHECK-NEXT: omp.terminator
omp.terminator
@@ -36,7 +36,7 @@ func.func @omp_canonloop_sequential_raw(%tc : i32) -> () {
// CHECK-NEXT: %canonloop_s1 = omp.new_cli
%canonloop_s1 = "omp.new_cli" () : () -> (!omp.cli)
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv : i32 in range(%[[tc]]) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%[[tc]]) {
"omp.canonical_loop" (%tc, %canonloop_s1) ({
^bb_second(%iv_second: i32):
// CHECK: omp.terminator
@@ -52,17 +52,17 @@ func.func @omp_canonloop_sequential_raw(%tc : i32) -> () {
// CHECK-LABEL: @omp_nested_canonloop_raw(
// CHECK-SAME: %[[tc_outer:.+]]: i32, %[[tc_inner:.+]]: i32)
func.func @omp_nested_canonloop_raw(%tc_outer : i32, %tc_inner : i32) -> () {
- // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+ // CHECK-NEXT: %canonloop = omp.new_cli
%outer = "omp.new_cli" () : () -> (!omp.cli)
- // CHECK-NEXT: %canonloop_s0_s0 = omp.new_cli
+ // CHECK-NEXT: %canonloop_d1 = omp.new_cli
%inner = "omp.new_cli" () : () -> (!omp.cli)
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc_outer]]) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc_outer]]) {
"omp.canonical_loop" (%tc_outer, %outer) ({
^bb_outer(%iv_outer: i32):
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%[[tc_inner]]) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc_inner]]) {
"omp.canonical_loop" (%tc_inner, %inner) ({
^bb_inner(%iv_inner: i32):
- // CHECK-NEXT: = llvm.add %iv, %iv_0 : i32
+ // CHECK-NEXT: = llvm.add %iv, %iv_d1 : i32
%newval = llvm.add %iv_outer, %iv_inner: i32
// CHECK-NEXT: omp.terminator
omp.terminator
@@ -108,16 +108,24 @@ func.func @omp_canonloop_constant_pretty() -> () {
func.func @omp_canonloop_sequential_pretty(%tc : i32) -> () {
// CHECK-NEXT: %canonloop_s0 = omp.new_cli
%canonloop_s0 = omp.new_cli
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
- omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%tc) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%tc) {
// CHECK-NEXT: omp.terminator
omp.terminator
}
// CHECK: %canonloop_s1 = omp.new_cli
%canonloop_s1 = omp.new_cli
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv : i32 in range(%[[tc]]) {
- omp.canonical_loop(%canonloop_s1) %iv_0 : i32 in range(%tc) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%tc) {
+ // CHECK-NEXT: omp.terminator
+ omp.terminator
+ }
+
+ // CHECK: %canonloop_s2 = omp.new_cli
+ %canonloop_s2 = omp.new_cli
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s2) %iv_s2 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_s2) %iv_s2 : i32 in range(%tc) {
// CHECK-NEXT: omp.terminator
omp.terminator
}
@@ -126,17 +134,17 @@ func.func @omp_canonloop_sequential_pretty(%tc : i32) -> () {
}
-// CHECK-LABEL: @omp_canonloop_nested_pretty(
+// CHECK-LABEL: @omp_canonloop_2d_nested_pretty(
// CHECK-SAME: %[[tc:.+]]: i32)
-func.func @omp_canonloop_nested_pretty(%tc : i32) -> () {
- // CHECK-NEXT: %canonloop_s0 = omp.new_cli
- %canonloop_s0 = omp.new_cli
- // CHECK-NEXT: %canonloop_s0_s0 = omp.new_cli
- %canonloop_s0_s0 = omp.new_cli
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
- omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%tc) {
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%[[tc]]) {
- omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%tc) {
+func.func @omp_canonloop_2d_nested_pretty(%tc : i32) -> () {
+ // CHECK-NEXT: %canonloop = omp.new_cli
+ %canonloop = omp.new_cli
+ // CHECK-NEXT: %canonloop_d1 = omp.new_cli
+ %canonloop_d1 = omp.new_cli
+ // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%tc) {
// CHECK: omp.terminator
omp.terminator
}
@@ -147,6 +155,77 @@ func.func @omp_canonloop_nested_pretty(%tc : i32) -> () {
}
+// CHECK-LABEL: @omp_canonloop_3d_nested_pretty(
+// CHECK-SAME: %[[tc:.+]]: i32)
+func.func @omp_canonloop_3d_nested_pretty(%tc : i32) -> () {
+ // CHECK: %canonloop = omp.new_cli
+ %canonloop = omp.new_cli
+ // CHECK: %canonloop_d1 = omp.new_cli
+ %canonloop_d1 = omp.new_cli
+ // CHECK: %canonloop_d2 = omp.new_cli
+ %canonloop_d2 = omp.new_cli
+ // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_d1) %iv_1d : i32 in range(%tc) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_d2) %iv_d2 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_d2) %iv_d2 : i32 in range(%tc) {
+ // CHECK-NEXT: omp.terminator
+ omp.terminator
+ // CHECK-NEXT: }
+ }
+ // CHECK-NEXT: omp.terminator
+ omp.terminator
+ // CHECK-NEXT: }
+ }
+ // CHECK-NEXT: omp.terminator
+ omp.terminator
+ }
+
+ return
+}
+
+
+// CHECK-LABEL: @omp_canonloop_sequential_nested_pretty(
+// CHECK-SAME: %[[tc:.+]]: i32)
+func.func @omp_canonloop_sequential_nested_pretty(%tc : i32) -> () {
+ // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+ %canonloop_s0 = omp.new_cli
+ // CHECK-NEXT: %canonloop_s0_d1 = omp.new_cli
+ %canonloop_s0_d1 = omp.new_cli
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%tc) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_d1) %iv_s0_d1 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_s0_d1) %iv_s0_d1 : i32 in range(%tc) {
+ // CHECK-NEXT: omp.terminator
+ omp.terminator
+ // CHECK-NEXT: }
+ }
+ // CHECK-NEXT: omp.terminator
+ omp.terminator
+ // CHECK-NEXT: }
+ }
+
+ // CHECK-NEXT: %canonloop_s1 = omp.new_cli
+ %canonloop_s1 = omp.new_cli
+ // CHECK-NEXT: %canonloop_s1_d1 = omp.new_cli
+ %canonloop_s1_d1 = omp.new_cli
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%tc) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s1_d1) %iv_s1_d1 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_s1_d1) %iv_s1d1 : i32 in range(%tc) {
+ // CHECK-NEXT: omp.terminator
+ omp.terminator
+ // CHECK-NEXT: }
+ }
+ // CHECK-NEXT: omp.terminator
+ omp.terminator
+ }
+
+ return
+}
+
+
// CHECK-LABEL: @omp_newcli_unused(
// CHECK-SAME: )
func.func @omp_newcli_unused() -> () {
diff --git a/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir b/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir
index cda7d0b500166..16884f4245e76 100644
--- a/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir
+++ b/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir
@@ -1,18 +1,18 @@
-// RUN: mlir-opt %s | FileCheck %s
-// RUN: mlir-opt %s | mlir-opt | FileCheck %s
+// RUN: mlir-opt %s | FileCheck %s --enable-var-scope
+// RUN: mlir-opt %s | mlir-opt | FileCheck %s --enable-var-scope
// CHECK-LABEL: @omp_unroll_heuristic_raw(
// CHECK-SAME: %[[tc:.+]]: i32) {
func.func @omp_unroll_heuristic_raw(%tc : i32) -> () {
- // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+ // CHECK-NEXT: %canonloop = omp.new_cli
%canonloop = "omp.new_cli" () : () -> (!omp.cli)
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
"omp.canonical_loop" (%tc, %canonloop) ({
^bb0(%iv: i32):
omp.terminator
}) : (i32, !omp.cli) -> ()
- // CHECK: omp.unroll_heuristic(%canonloop_s0)
+ // CHECK: omp.unroll_heuristic(%canonloop)
"omp.unroll_heuristic" (%canonloop) : (!omp.cli) -> ()
return
}
@@ -22,12 +22,12 @@ func.func @omp_unroll_heuristic_raw(%tc : i32) -> () {
// CHECK-SAME: %[[tc:.+]]: i32) {
func.func @omp_unroll_heuristic_pretty(%tc : i32) -> () {
// CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli
- %canonloop = "omp.new_cli" () : () -> (!omp.cli)
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
+ %canonloop = omp.new_cli
+ // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
omp.terminator
}
- // CHECK: omp.unroll_heuristic(%canonloop_s0)
+ // CHECK: omp.unroll_heuristic(%canonloop)
omp.unroll_heuristic(%canonloop)
return
}
@@ -36,13 +36,13 @@ func.func @omp_unroll_heuristic_pretty(%tc : i32) -> () {
// CHECK-LABEL: @omp_unroll_heuristic_nested_pretty(
// CHECK-SAME: %[[tc:.+]]: i32) {
func.func @omp_unroll_heuristic_nested_pretty(%tc : i32) -> () {
- // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+ // CHECK-NEXT: %canonloop = omp.new_cli
%cli_outer = omp.new_cli
- // CHECK-NEXT: %canonloop_s0_s0 = omp.new_cli
+ // CHECK-NEXT: %canonloop_d1 = omp.new_cli
%cli_inner = omp.new_cli
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
omp.canonical_loop(%cli_outer) %iv_outer : i32 in range(%tc) {
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%[[tc]]) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc]]) {
omp.canonical_loop(%cli_inner) %iv_inner : i32 in range(%tc) {
// CHECK: omp.terminator
omp.terminator
@@ -51,9 +51,9 @@ func.func @omp_unroll_heuristic_nested_pretty(%tc : i32) -> () {
omp.terminator
}
- // CHECK: omp.unroll_heuristic(%canonloop_s0)
+ // CHECK: omp.unroll_heuristic(%canonloop)
omp.unroll_heuristic(%cli_outer)
- // CHECK-NEXT: omp.unroll_heuristic(%canonloop_s0_s0)
+ // CHECK-NEXT: omp.unroll_heuristic(%canonloop_d1)
omp.unroll_heuristic(%cli_inner)
return
}
>From ce66eec648f6415c199d6115f3be4d188eee59ba Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Tue, 23 Sep 2025 12:19:41 +0200
Subject: [PATCH 02/12] Avoid compiler warning
---
mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 13 ++++++-------
1 file changed, 6 insertions(+), 7 deletions(-)
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index cf549a6bb50b4..1674891410194 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -170,22 +170,21 @@ static std::string generateLoopNestingName(StringRef prefix,
std::reverse(components.begin(), components.end());
// Determine whether a component is not needed
- for (auto &c : components) {
+ for (Component &c : components) {
c.skipRegion = c.isOnlyRegionInOp;
c.skipOp = c.isOnlyOpInRegion && !isa<CanonicalLoopOp>(c.thisOp);
}
// Find runs of perfect nests and merge them into a single component
- int curNestRoot = 0;
- int curNestDepth = 1;
- auto mergeLoopNest = [&](int innermost) {
- auto outermost = curNestRoot;
+ size_t curNestRoot = 0;
+ size_t curNestDepth = 1;
+ auto mergeLoopNest = [&](size_t innermost) {
+ size_t outermost = curNestRoot;
// Don't do enything if it does not consist of at least 2 loops
if (outermost < innermost) {
- for (auto i : llvm::seq<int>(outermost + 1, innermost)) {
+ for (auto i : llvm::seq<int>(outermost + 1, innermost))
components[i].skipOp = true;
- }
components[innermost].depth = curNestDepth;
}
>From 3a141d6729e26a7eab821b86eee240ab4bfa322f Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Tue, 23 Sep 2025 12:59:14 +0200
Subject: [PATCH 03/12] Add perfect-nest and rectangular loop nest tests
---
flang/lib/Semantics/resolve-directives.cpp | 146 ++++++++++++++++++++-
flang/test/Semantics/OpenMP/do08.f90 | 1 +
flang/test/Semantics/OpenMP/do13.f90 | 1 +
flang/test/Semantics/OpenMP/do22.f90 | 73 +++++++++++
4 files changed, 215 insertions(+), 6 deletions(-)
create mode 100644 flang/test/Semantics/OpenMP/do22.f90
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index 2d1bec9968593..5f2c9f676099c 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -149,6 +149,9 @@ template <typename T> class DirectiveAttributeVisitor {
dataSharingAttributeObjects_.clear();
}
bool HasDataSharingAttributeObject(const Symbol &);
+ std::tuple<const parser::Name *, const parser::ScalarExpr *,
+ const parser::ScalarExpr *, const parser::ScalarExpr *>
+ GetLoopBounds(const parser::DoConstruct &);
const parser::Name *GetLoopIndex(const parser::DoConstruct &);
const parser::DoConstruct *GetDoConstructIf(
const parser::ExecutionPartConstruct &);
@@ -933,6 +936,13 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor<llvm::omp::Directive> {
privateDataSharingAttributeObjects_.clear();
}
+ /// Check that loops in the loop nest are perfectly nested, as well that lower
+ /// bound, upper bound, and step expressions do not use the iv
+ /// of a surrounding loop of the associated loops nest.
+ /// We do not support non-perfectly nested loops not non-rectangular loops yet
+ /// (both introduced in OpenMP 5.0)
+ void CheckPerfectNestAndRectangularLoop(const parser::OpenMPLoopConstruct &x);
+
// Predetermined DSA rules
void PrivatizeAssociatedLoopIndexAndCheckLoopLevel(
const parser::OpenMPLoopConstruct &);
@@ -1009,14 +1019,15 @@ bool DirectiveAttributeVisitor<T>::HasDataSharingAttributeObject(
}
template <typename T>
-const parser::Name *DirectiveAttributeVisitor<T>::GetLoopIndex(
- const parser::DoConstruct &x) {
+std::tuple<const parser::Name *, const parser::ScalarExpr *,
+ const parser::ScalarExpr *, const parser::ScalarExpr *>
+DirectiveAttributeVisitor<T>::GetLoopBounds(const parser::DoConstruct &x) {
using Bounds = parser::LoopControl::Bounds;
if (x.GetLoopControl()) {
if (const Bounds * b{std::get_if<Bounds>(&x.GetLoopControl()->u)}) {
- return &b->name.thing;
- } else {
- return nullptr;
+ auto &&step = b->step;
+ return {&b->name.thing, &b->lower, &b->upper,
+ step.has_value() ? &step.value() : nullptr};
}
} else {
context_
@@ -1024,8 +1035,15 @@ const parser::Name *DirectiveAttributeVisitor<T>::GetLoopIndex(
"Loop control is not present in the DO LOOP"_err_en_US)
.Attach(GetContext().directiveSource,
"associated with the enclosing LOOP construct"_en_US);
- return nullptr;
}
+ return {nullptr, nullptr, nullptr, nullptr};
+}
+
+template <typename T>
+const parser::Name *DirectiveAttributeVisitor<T>::GetLoopIndex(
+ const parser::DoConstruct &x) {
+ auto &&[iv, lb, ub, step] = GetLoopBounds(x);
+ return iv;
}
template <typename T>
@@ -1957,6 +1975,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) {
}
}
}
+ CheckPerfectNestAndRectangularLoop(x);
PrivatizeAssociatedLoopIndexAndCheckLoopLevel(x);
ordCollapseLevel = GetNumAffectedLoopsFromLoopConstruct(x) + 1;
return true;
@@ -2152,6 +2171,121 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses(
}
}
+void OmpAttributeVisitor::CheckPerfectNestAndRectangularLoop(
+ const parser::OpenMPLoopConstruct
+ &x) { // GetAssociatedLoopLevelFromClauses(clauseList);
+ auto &&dirContext = GetContext();
+ std::int64_t dirDepth{dirContext.associatedLoopLevel};
+ if (dirDepth <= 0)
+ return;
+
+ Symbol::Flag ivDSA;
+ if (!llvm::omp::allSimdSet.test(GetContext().directive)) {
+ ivDSA = Symbol::Flag::OmpPrivate;
+ } else if (dirDepth == 1) {
+ ivDSA = Symbol::Flag::OmpLinear;
+ } else {
+ ivDSA = Symbol::Flag::OmpLastPrivate;
+ }
+
+ auto checkExprHasSymbols = [&](llvm::SmallVector<Symbol *> &ivs,
+ const parser::ScalarExpr *bound) {
+ if (ivs.empty())
+ return;
+
+ if (auto boundExpr{semantics::AnalyzeExpr(context_, *bound)}) {
+ semantics::UnorderedSymbolSet boundSyms =
+ evaluate::CollectSymbols(*boundExpr);
+ for (auto iv : ivs) {
+ if (boundSyms.count(*iv) != 0) {
+ // TODO: Point to occurence of iv in boundExpr, directiveSource as a
+ // note
+ context_.Say(dirContext.directiveSource,
+ "Trip count must be computable and invariant"_err_en_US);
+ }
+ }
+ }
+ };
+
+ // Skip over loop transformation directives
+ const parser::OpenMPLoopConstruct *innerMostLoop = &x;
+ const parser::NestedConstruct *innerMostNest = nullptr;
+ while (auto &optLoopCons{
+ std::get<std::optional<parser::NestedConstruct>>(innerMostLoop->t)}) {
+ innerMostNest = &(optLoopCons.value());
+ if (const auto *innerLoop{
+ std::get_if<common::Indirection<parser::OpenMPLoopConstruct>>(
+ innerMostNest)}) {
+ innerMostLoop = &(innerLoop->value());
+ } else
+ break;
+ }
+
+ if (!innerMostNest)
+ return;
+ const auto &outer{std::get_if<parser::DoConstruct>(innerMostNest)};
+ if (!outer)
+ return;
+
+ llvm::SmallVector<Symbol *> ivs;
+ int curLevel = 0;
+ const parser::DoConstruct *loop{outer};
+ while (true) {
+ auto [iv, lb, ub, step] = GetLoopBounds(*loop);
+
+ if (lb)
+ checkExprHasSymbols(ivs, lb);
+ if (ub)
+ checkExprHasSymbols(ivs, ub);
+ if (step)
+ checkExprHasSymbols(ivs, step);
+ if (iv) {
+ if (auto *symbol{ResolveOmp(*iv, ivDSA, currScope())})
+ ivs.push_back(symbol);
+ }
+
+ // Stop after processing all affected loops
+ if (curLevel + 1 >= dirDepth)
+ break;
+
+ // Recurse into nested loop
+ const auto &block{std::get<parser::Block>(loop->t)};
+ if (block.empty()) {
+ // Insufficient number of nested loops already reported by
+ // CheckAssocLoopLevel()
+ break;
+ }
+
+ loop = GetDoConstructIf(block.front());
+ if (!loop) {
+ // Insufficient number of nested loops already reported by
+ // CheckAssocLoopLevel()
+ break;
+ }
+
+ auto checkPerfectNest = [&, this]() {
+ auto blockSize = block.size();
+ if (blockSize <= 1)
+ return;
+
+ if (parser::Unwrap<parser::ContinueStmt>(x))
+ blockSize -= 1;
+
+ if (blockSize <= 1)
+ return;
+
+ // Non-perfectly nested loop
+ // TODO: Point to non-DO statement, directiveSource as a note
+ context_.Say(dirContext.directiveSource,
+ "Canonical loop nest must be perfectly nested."_err_en_US);
+ };
+
+ checkPerfectNest();
+
+ ++curLevel;
+ }
+}
+
// 2.15.1.1 Data-sharing Attribute Rules - Predetermined
// - The loop iteration variable(s) in the associated do-loop(s) of a do,
// parallel do, taskloop, or distribute construct is (are) private.
diff --git a/flang/test/Semantics/OpenMP/do08.f90 b/flang/test/Semantics/OpenMP/do08.f90
index 5143dff0dd315..bb3c1d0cd3855 100644
--- a/flang/test/Semantics/OpenMP/do08.f90
+++ b/flang/test/Semantics/OpenMP/do08.f90
@@ -61,6 +61,7 @@ program omp
!$omp end do
+ !ERROR: Canonical loop nest must be perfectly nested.
!ERROR: The value of the parameter in the COLLAPSE or ORDERED clause must not be larger than the number of nested loops following the construct.
!$omp do collapse(3)
do 60 i=2,200,2
diff --git a/flang/test/Semantics/OpenMP/do13.f90 b/flang/test/Semantics/OpenMP/do13.f90
index 6e9d1dddade4c..8f7844f4136f9 100644
--- a/flang/test/Semantics/OpenMP/do13.f90
+++ b/flang/test/Semantics/OpenMP/do13.f90
@@ -59,6 +59,7 @@ program omp
!$omp end do
+ !ERROR: Canonical loop nest must be perfectly nested.
!ERROR: The value of the parameter in the COLLAPSE or ORDERED clause must not be larger than the number of nested loops following the construct.
!$omp do collapse(3)
do 60 i=1,10
diff --git a/flang/test/Semantics/OpenMP/do22.f90 b/flang/test/Semantics/OpenMP/do22.f90
new file mode 100644
index 0000000000000..9d96d3af54e5c
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/do22.f90
@@ -0,0 +1,73 @@
+! RUN: %python %S/../test_errors.py %s %flang -fopenmp
+! Check for existence of loop following a DO directive
+
+subroutine do_imperfectly_nested_before
+ integer i, j
+
+ !ERROR: The value of the parameter in the COLLAPSE or ORDERED clause must not be larger than the number of nested loops following the construct.
+ !$omp do collapse(2)
+ do i = 1, 10
+ print *, i
+ do j = 1, 10
+ print *, i, j
+ end do
+ end do
+ !$omp end do
+end subroutine
+
+
+subroutine do_imperfectly_nested_behind
+ integer i, j
+
+ !ERROR: Canonical loop nest must be perfectly nested.
+ !$omp do collapse(2)
+ do i = 1, 10
+ do j = 1, 10
+ print *, i, j
+ end do
+ print *, i
+ end do
+ !$omp end do
+end subroutine
+
+
+subroutine do_nonrectangular_lb
+ integer i, j
+
+ !ERROR: Trip count must be computable and invariant
+ !$omp do collapse(2)
+ do i = 1, 10
+ do j = i, 10
+ print *, i, j
+ end do
+ end do
+ !$omp end do
+end subroutine
+
+
+subroutine do_nonrectangular_ub
+ integer i, j
+
+ !ERROR: Trip count must be computable and invariant
+ !$omp do collapse(2)
+ do i = 1, 10
+ do j = 0, i
+ print *, i, j
+ end do
+ end do
+ !$omp end do
+end subroutine
+
+
+subroutine do_nonrectangular_step
+ integer i, j
+
+ !ERROR: Trip count must be computable and invariant
+ !$omp do collapse(2)
+ do i = 1, 10
+ do j = 1, 10, i
+ print *, i, j
+ end do
+ end do
+ !$omp end do
+end subroutine
>From ff4eccb41e2973e1179734dd455f1e797e89d9be Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Tue, 23 Sep 2025 14:45:45 +0200
Subject: [PATCH 04/12] Add omp.tile operation
---
.../mlir/Dialect/OpenMP/OpenMPClauses.td | 29 ++
.../mlir/Dialect/OpenMP/OpenMPOpBase.td | 69 +++-
mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 63 ++-
mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 387 +++++++++++++++---
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 42 ++
.../Dialect/OpenMP/cli-canonical_loop.mlir | 127 ++++--
mlir/test/Dialect/OpenMP/cli-tile.mlir | 138 +++++++
.../Dialect/OpenMP/cli-unroll-heuristic.mlir | 28 +-
mlir/test/Dialect/OpenMP/invalid-tile.mlir | 119 ++++++
.../test/Target/LLVMIR/openmp-cli-tile01.mlir | 101 +++++
.../test/Target/LLVMIR/openmp-cli-tile02.mlir | 190 +++++++++
mlir/test/mlir-tblgen/op-format-invalid.td | 2 +-
.../tools/mlir-tblgen/AttrOrTypeFormatGen.cpp | 1 +
mlir/tools/mlir-tblgen/FormatGen.cpp | 2 +-
mlir/tools/mlir-tblgen/OpFormatGen.cpp | 1 +
15 files changed, 1157 insertions(+), 142 deletions(-)
create mode 100644 mlir/test/Dialect/OpenMP/cli-tile.mlir
create mode 100644 mlir/test/Dialect/OpenMP/invalid-tile.mlir
create mode 100644 mlir/test/Target/LLVMIR/openmp-cli-tile01.mlir
create mode 100644 mlir/test/Target/LLVMIR/openmp-cli-tile02.mlir
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
index 1eda5e4bc1618..8e43c4284d078 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td
@@ -995,6 +995,35 @@ class OpenMP_NumTeamsClauseSkip<
def OpenMP_NumTeamsClause : OpenMP_NumTeamsClauseSkip<>;
+//===----------------------------------------------------------------------===//
+// V5.1: [10.1.2] `sizes` clause
+//===----------------------------------------------------------------------===//
+
+class OpenMP_SizesClauseSkip<
+ bit traits = false, bit arguments = false, bit assemblyFormat = false,
+ bit description = false, bit extraClassDeclaration = false
+ > : OpenMP_Clause<traits, arguments, assemblyFormat, description,
+ extraClassDeclaration> {
+ let arguments = (ins
+ Variadic<IntLikeType>:$sizes
+ );
+
+ let optAssemblyFormat = [{
+ `sizes` `(` $sizes `:` type($sizes) `)`
+ }];
+
+ let description = [{
+ The `sizes` clauses defines the size of a grid over a multi-dimensional
+ logical iteration space. This grid is used for loop transformations such as
+ `tile` and `strip`. The size per dimension can be a variable, but only
+ values that are not at least 2 make sense. It is not specified what happens
+ when smaller values are used, but should still result in a loop nest that
+ executes each logical iteration once.
+ }];
+}
+
+def OpenMP_SizesClause : OpenMP_SizesClauseSkip<>;
+
//===----------------------------------------------------------------------===//
// V5.2: [10.1.2] `num_threads` clause
//===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td
index bbcfb87fa03c6..5ad4e4b5b61d1 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpBase.td
@@ -38,6 +38,44 @@ def OpenMP_MapBoundsType : OpenMP_Type<"MapBounds", "map_bounds_ty"> {
let summary = "Type for representing omp map clause bounds information";
}
+//===---------------------------------------------------------------------===//
+// OpenMP Canonical Loop Info Type
+//===---------------------------------------------------------------------===//
+
+def CanonicalLoopInfoType : OpenMP_Type<"CanonicalLoopInfo", "cli"> {
+ let summary = "Type for representing a reference to a canonical loop";
+ let description = [{
+ A variable of type CanonicalLoopInfo refers to an OpenMP-compatible
+ canonical loop in the same function. Values of this type are not
+ available at runtime and therefore cannot be used by the program itself,
+ i.e. an opaque type. It is similar to the transform dialect's
+ `!transform.interface` type, but instead of implementing an interface
+ for each transformation, the OpenMP dialect itself defines possible
+ operations on this type.
+
+ A value of type CanonicalLoopInfoType (in the following: CLI) value can be
+
+ 1. created by omp.new_cli.
+ 2. passed to omp.canonical_loop to associate the loop to that CLI. A CLI
+ can only be associated once.
+ 3. passed to an omp loop transformation operation that modifies the loop
+ associated with the CLI. The CLI is the "applyee" and the operation is
+ the consumer. A CLI can only be consumed once.
+ 4. passed to an omp loop transformation operation to associate the cli with
+ a result of that transformation. The CLI is the "generatee" and the
+ operation is the generator.
+
+ A CLI cannot
+
+ 1. be returned from a function.
+ 2. be passed to operations that are not specifically designed to take a
+ CanonicalLoopInfoType, including AnyType.
+
+ A CLI directly corresponds to an object of
+ OpenMPIRBuilder's CanonicalLoopInfo struct when lowering to LLVM-IR.
+ }];
+}
+
//===----------------------------------------------------------------------===//
// Base classes for OpenMP dialect operations.
//===----------------------------------------------------------------------===//
@@ -211,8 +249,35 @@ class OpenMP_Op<string mnemonic, list<Trait> traits = [],
// Doesn't actually create a C++ base class (only defines default values for
// tablegen classes that derive from this). Use LoopTransformationInterface
// instead for common operations.
-class OpenMPTransform_Op<string mnemonic, list<Trait> traits = []> :
- OpenMP_Op<mnemonic, !listconcat([DeclareOpInterfaceMethods<LoopTransformationInterface>], traits) > {
+class OpenMPTransform_Op<string mnemonic,
+ list<Trait> traits = [],
+ list<OpenMP_Clause> clauses = []> :
+ OpenMP_Op<mnemonic,
+ traits = !listconcat([DeclareOpInterfaceMethods<LoopTransformationInterface>], traits),
+ clauses = clauses> {
+}
+
+// Base clause for loop transformations using the standard syntax.
+//
+// omp.opname ($generatees) <- ($applyees) clause(...) clause(...) ... <attr-dicr>
+// omp.opname ($applyees) clause(...) clause(...) ... <attr-dict>
+//
+// $generatees is optional and is assumed to be empty if omitted
+class OpenMPTransformBase_Op<string mnemonic,
+ list<Trait> traits = [],
+ list<OpenMP_Clause> clauses = []> :
+ OpenMPTransform_Op<mnemonic,
+ traits = !listconcat(traits, [AttrSizedOperandSegments]),
+ clauses = clauses> {
+
+ let arguments = !con(
+ (ins Variadic<CanonicalLoopInfoType>:$generatees,
+ Variadic<CanonicalLoopInfoType>:$applyees
+ ), clausesArgs);
+
+ let assemblyFormat = [{ custom<LoopTransformClis>($generatees, $applyees) }]
+ # clausesAssemblyFormat
+ # [{ attr-dict }];
}
#endif // OPENMP_OP_BASE
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 5c77e215467e4..b73091ea0ca53 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -357,44 +357,6 @@ def SingleOp : OpenMP_Op<"single", traits = [
let hasVerifier = 1;
}
-//===---------------------------------------------------------------------===//
-// OpenMP Canonical Loop Info Type
-//===---------------------------------------------------------------------===//
-
-def CanonicalLoopInfoType : OpenMP_Type<"CanonicalLoopInfo", "cli"> {
- let summary = "Type for representing a reference to a canonical loop";
- let description = [{
- A variable of type CanonicalLoopInfo refers to an OpenMP-compatible
- canonical loop in the same function. Values of this type are not
- available at runtime and therefore cannot be used by the program itself,
- i.e. an opaque type. It is similar to the transform dialect's
- `!transform.interface` type, but instead of implementing an interface
- for each transformation, the OpenMP dialect itself defines possible
- operations on this type.
-
- A value of type CanonicalLoopInfoType (in the following: CLI) value can be
-
- 1. created by omp.new_cli.
- 2. passed to omp.canonical_loop to associate the loop to that CLI. A CLI
- can only be associated once.
- 3. passed to an omp loop transformation operation that modifies the loop
- associated with the CLI. The CLI is the "applyee" and the operation is
- the consumer. A CLI can only be consumed once.
- 4. passed to an omp loop transformation operation to associate the cli with
- a result of that transformation. The CLI is the "generatee" and the
- operation is the generator.
-
- A CLI cannot
-
- 1. be returned from a function.
- 2. be passed to operations that are not specifically designed to take a
- CanonicalLoopInfoType, including AnyType.
-
- A CLI directly corresponds to an object of
- OpenMPIRBuilder's CanonicalLoopInfo struct when lowering to LLVM-IR.
- }];
-}
-
//===---------------------------------------------------------------------===//
// OpenMP Canonical Loop Info Creation
//===---------------------------------------------------------------------===//
@@ -563,6 +525,31 @@ def UnrollHeuristicOp : OpenMPTransform_Op<"unroll_heuristic", []> {
let hasCustomAssemblyFormat = 1;
}
+//===----------------------------------------------------------------------===//
+// OpenMP tile operation
+//===----------------------------------------------------------------------===//
+
+def TileOp : OpenMPTransformBase_Op<"tile",
+ clauses = [OpenMP_SizesClause]> {
+ let summary = "OpenMP tile operation";
+ let description = [{
+ Represents the OpenMP tile directive introduced in OpenMP 5.1.
+
+ The construct partitions the logical iteration space of the affected loops
+ into equally-sized tiles, then creates two sets of nested loops. The outer
+ loops, called the grid loops, iterate over all tiles. The inner loops,
+ called the intratile loops, iterate over the logical iterations of a tile.
+ The sizes clause determines the size of a tile.
+
+ Currently, the affected loops must be rectangular (the tripcount of the
+ inner loop must not depend on any iv of an surrounding affected loop) and
+ perfectly nested (except for the innermost affected loop, no operations
+ other than the nested loop and the terminator in the loop body).
+ }] # clausesDescription;
+
+ let hasVerifier = 1;
+}
+
//===----------------------------------------------------------------------===//
// 2.8.3 Workshare Construct
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 3d70e28ed23ab..d3cc7e55ae155 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -33,6 +33,7 @@
#include "llvm/ADT/TypeSwitch.h"
#include "llvm/ADT/bit.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
+#include "llvm/Support/InterleavedRange.h"
#include <cstddef>
#include <iterator>
#include <optional>
@@ -77,6 +78,177 @@ struct LLVMPointerPointerLikeModel
};
} // namespace
+/// Generate a name of a canonical loop nest of the format
+/// `<prefix>(_s<num>_r<num>)*` that describes its nesting inside parent
+/// operations (`_r<num>`) and that operation's region (`_s<num>`). The region
+/// number is omitted if the parent operation has just one region. If a loop
+/// nest just consists of canonical loops nested inside each other, also uses
+/// `d<num>` where <num> is the nesting depth of the loop.
+static std::string generateLoopNestingName(StringRef prefix,
+ CanonicalLoopOp op) {
+ struct Component {
+ // An region argument of an operation
+ Operation *parentOp;
+ size_t regionInOpIdx;
+ bool isOnlyRegionInOp;
+ bool skipRegion;
+
+ // An operation somewhere in a parent region
+ Operation *thisOp;
+ Region *parentRegion;
+ size_t opInRegionIdx;
+ bool isOnlyOpInRegion;
+ bool skipOp;
+ int depth = -1;
+ };
+ SmallVector<Component> components;
+
+ // Gather a list of parent regions and operations, and the position within
+ // their parent
+ Operation *o = op.getOperation();
+ while (o) {
+ if (o->hasTrait<mlir::OpTrait::IsIsolatedFromAbove>())
+ break;
+
+ // Operation within a region
+ Region *r = o->getParentRegion();
+ if (!r)
+ break;
+
+ llvm::ReversePostOrderTraversal<Block *> traversal(&r->getBlocks().front());
+ size_t idx = 0;
+ bool found = false;
+ size_t sequentialIdx = -1;
+ bool isOnlyLoop = true;
+ for (Block *b : traversal) {
+ for (Operation &op : *b) {
+ if (&op == o && !found) {
+ sequentialIdx = idx;
+ found = true;
+ }
+ if (op.getNumRegions()) {
+ idx += 1;
+ if (idx > 1)
+ isOnlyLoop = false;
+ }
+ if (found && !isOnlyLoop)
+ break;
+ }
+ }
+
+ Component &comp = components.emplace_back();
+ comp.thisOp = o;
+ comp.parentRegion = r;
+ comp.opInRegionIdx = sequentialIdx;
+ comp.isOnlyOpInRegion = isOnlyLoop;
+
+ // Region argument of an operation
+ Operation *parent = r->getParentOp();
+
+ comp.parentOp = parent;
+ comp.regionInOpIdx = 0;
+ comp.isOnlyRegionInOp = true;
+ if (parent && parent->getRegions().size() > 1) {
+ auto getRegionIndex = [](Operation *o, Region *r) {
+ for (auto [idx, region] : llvm::enumerate(o->getRegions())) {
+ if (®ion == r)
+ return idx;
+ }
+ llvm_unreachable("Region not child of its parent operation");
+ };
+ comp.regionInOpIdx = getRegionIndex(parent, r);
+ comp.isOnlyRegionInOp = false;
+ }
+
+ if (!parent)
+ break;
+
+ // next parent
+ o = parent;
+ }
+
+ // Reorder components from outermost to innermost
+ std::reverse(components.begin(), components.end());
+
+ // Determine whether a component is not needed
+ for (Component &c : components) {
+ c.skipRegion = c.isOnlyRegionInOp;
+ c.skipOp = c.isOnlyOpInRegion && !isa<CanonicalLoopOp>(c.thisOp);
+ }
+
+ // Find runs of perfect nests and merge them into a single component
+ size_t curNestRoot = 0;
+ size_t curNestDepth = 1;
+ auto mergeLoopNest = [&](size_t innermost) {
+ size_t outermost = curNestRoot;
+
+ // Don't do enything if it does not consist of at least 2 loops
+ if (outermost < innermost) {
+ for (auto i : llvm::seq<int>(outermost + 1, innermost))
+ components[i].skipOp = true;
+ components[innermost].depth = curNestDepth;
+ }
+
+ // Start new root
+ curNestRoot = innermost + 1;
+ curNestDepth = 1;
+ };
+ for (auto &&[i, c] : llvm::enumerate(components)) {
+ if (i <= curNestRoot)
+ continue;
+
+ // Check whether this region can be included
+ if (!c.skipRegion) {
+ mergeLoopNest(i);
+ continue;
+ }
+
+ if (c.skipOp)
+ continue;
+
+ if (!c.isOnlyOpInRegion) {
+ mergeLoopNest(i);
+ continue;
+ }
+
+ curNestDepth += 1;
+ }
+
+ // Finalize innermost loop nest
+ mergeLoopNest(components.size() - 1);
+
+ // Outermost loop does not need a suffix if it has no sibling
+ for (auto &c : components) {
+ if (c.skipOp)
+ continue;
+ if (c.isOnlyOpInRegion)
+ c.skipOp = true;
+ break;
+ }
+
+ // Compile name
+ SmallString<64> Name{prefix};
+ for (auto &c : components) {
+ auto addComponent = [&Name](char letter, int64_t idx) {
+ Name += '_';
+ Name += letter;
+ Name += idx;
+ };
+
+ if (!c.skipRegion)
+ addComponent('r', c.regionInOpIdx);
+
+ if (!c.skipOp) {
+ if (c.depth >= 0)
+ addComponent('d', c.depth - 1);
+ else
+ addComponent('s', c.opInRegionIdx);
+ }
+ }
+
+ return Name.str().str();
+}
+
void OpenMPDialect::initialize() {
addOperations<
#define GET_OP_LIST
@@ -3141,71 +3313,29 @@ void NewCliOp::getAsmResultNames(OpAsmSetValueNameFn setNameFn) {
cliName =
TypeSwitch<Operation *, std::string>(gen->getOwner())
.Case([&](CanonicalLoopOp op) {
- // Find the canonical loop nesting: For each ancestor add a
- // "+_r<idx>" suffix (in reverse order)
- SmallVector<std::string> components;
- Operation *o = op.getOperation();
- while (o) {
- if (o->hasTrait<mlir::OpTrait::IsIsolatedFromAbove>())
- break;
-
- Region *r = o->getParentRegion();
- if (!r)
- break;
-
- auto getSequentialIndex = [](Region *r, Operation *o) {
- llvm::ReversePostOrderTraversal<Block *> traversal(
- &r->getBlocks().front());
- size_t idx = 0;
- for (Block *b : traversal) {
- for (Operation &op : *b) {
- if (&op == o)
- return idx;
- // Only consider operations that are containers as
- // possible children
- if (!op.getRegions().empty())
- idx += 1;
- }
- }
- llvm_unreachable("Operation not part of the region");
- };
- size_t sequentialIdx = getSequentialIndex(r, o);
- components.push_back(("s" + Twine(sequentialIdx)).str());
-
- Operation *parent = r->getParentOp();
- if (!parent)
- break;
-
- // If the operation has more than one region, also count in
- // which of the regions
- if (parent->getRegions().size() > 1) {
- auto getRegionIndex = [](Operation *o, Region *r) {
- for (auto [idx, region] :
- llvm::enumerate(o->getRegions())) {
- if (®ion == r)
- return idx;
- }
- llvm_unreachable("Region not child its parent operation");
- };
- size_t regionIdx = getRegionIndex(parent, r);
- components.push_back(("r" + Twine(regionIdx)).str());
- }
-
- // next parent
- o = parent;
- }
-
- SmallString<64> Name("canonloop");
- for (const std::string &s : reverse(components)) {
- Name += '_';
- Name += s;
- }
-
- return Name;
+ return generateLoopNestingName("canonloop", op);
})
.Case([&](UnrollHeuristicOp op) -> std::string {
llvm_unreachable("heuristic unrolling does not generate a loop");
})
+ .Case([&](TileOp op) -> std::string {
+ auto [generateesFirst, generateesCount] =
+ op.getGenerateesODSOperandIndexAndLength();
+ unsigned firstGrid = generateesFirst;
+ unsigned firstIntratile = generateesFirst + generateesCount / 2;
+ unsigned end = generateesFirst + generateesCount;
+ unsigned opnum = gen->getOperandNumber();
+ // In the OpenMP apply and looprange clauses, indices are 1-based
+ if (firstGrid <= opnum && opnum < firstIntratile) {
+ unsigned gridnum = opnum - firstGrid + 1;
+ return ("grid" + Twine(gridnum)).str();
+ }
+ if (firstIntratile <= opnum && opnum < end) {
+ unsigned intratilenum = opnum - firstIntratile + 1;
+ return ("intratile" + Twine(intratilenum)).str();
+ }
+ llvm_unreachable("Unexpected generatee argument");
+ })
.Default([&](Operation *op) {
assert(false && "TODO: Custom name for this operation");
return "transformed";
@@ -3292,7 +3422,8 @@ void CanonicalLoopOp::getAsmBlockNames(OpAsmSetBlockNameFn setNameFn) {
void CanonicalLoopOp::getAsmBlockArgumentNames(Region ®ion,
OpAsmSetValueNameFn setNameFn) {
- setNameFn(region.getArgument(0), "iv");
+ std::string ivName = generateLoopNestingName("iv", *this);
+ setNameFn(region.getArgument(0), ivName);
}
void CanonicalLoopOp::print(OpAsmPrinter &p) {
@@ -3433,6 +3564,138 @@ UnrollHeuristicOp::getGenerateesODSOperandIndexAndLength() {
return {0, 0};
}
+//===----------------------------------------------------------------------===//
+// TileOp
+//===----------------------------------------------------------------------===//
+
+static void printLoopTransformClis(OpAsmPrinter &p, TileOp op,
+ OperandRange generatees,
+ OperandRange applyees) {
+ if (!generatees.empty())
+ p << '(' << llvm::interleaved(generatees) << ')';
+
+ if (!applyees.empty())
+ p << " <- (" << llvm::interleaved(applyees) << ')';
+}
+
+static ParseResult parseLoopTransformClis(
+ OpAsmParser &parser,
+ SmallVectorImpl<OpAsmParser::UnresolvedOperand> &generateesOperands,
+ SmallVectorImpl<OpAsmParser::UnresolvedOperand> &applyeesOperands) {
+ if (parser.parseOptionalLess()) {
+ // Syntax 1: generatees present
+
+ if (parser.parseOperandList(generateesOperands,
+ mlir::OpAsmParser::Delimiter::Paren))
+ return failure();
+
+ if (parser.parseLess())
+ return failure();
+ } else {
+ // Syntax 2: generatees omitted
+ }
+
+ // Parse `<-` (`<` has already been parsed)
+ if (parser.parseMinus())
+ return failure();
+
+ if (parser.parseOperandList(applyeesOperands,
+ mlir::OpAsmParser::Delimiter::Paren))
+ return failure();
+
+ return success();
+}
+
+LogicalResult TileOp::verify() {
+ if (getApplyees().empty())
+ return emitOpError() << "must apply to at least one loop";
+
+ if (getSizes().size() != getApplyees().size())
+ return emitOpError() << "there must be one tile size for each applyee";
+
+ if (!getGeneratees().empty() &&
+ 2 * getSizes().size() != getGeneratees().size())
+ return emitOpError()
+ << "expecting two times the number of generatees than applyees";
+
+ DenseSet<Value> parentIVs;
+
+ Value parent = getApplyees().front();
+ for (auto &&applyee : llvm::drop_begin(getApplyees())) {
+ auto [parentCreate, parentGen, parentCons] = decodeCli(parent);
+ auto [create, gen, cons] = decodeCli(applyee);
+
+ if (!parentGen)
+ return emitOpError() << "applyee CLI has no generator";
+
+ auto parentLoop = dyn_cast_or_null<CanonicalLoopOp>(parentGen->getOwner());
+ if (!parentGen)
+ return emitOpError()
+ << "currently only supports omp.canonical_loop as applyee";
+
+ parentIVs.insert(parentLoop.getInductionVar());
+
+ if (!gen)
+ return emitOpError() << "applyee CLI has no generator";
+ auto loop = dyn_cast_or_null<CanonicalLoopOp>(gen->getOwner());
+ if (!loop)
+ return emitOpError()
+ << "currently only supports omp.canonical_loop as applyee";
+
+ // Canonical loop must be perfectly nested, i.e. the body of the parent must
+ // only contain the omp.canonical_loop of the nested loops, and
+ // omp.terminator
+ bool isPerfectlyNested = [&]() {
+ auto &parentBody = parentLoop.getRegion();
+ if (!parentBody.hasOneBlock())
+ return false;
+ auto &parentBlock = parentBody.getBlocks().front();
+
+ auto nestedLoopIt = parentBlock.begin();
+ if (nestedLoopIt == parentBlock.end() ||
+ (&*nestedLoopIt != loop.getOperation()))
+ return false;
+
+ auto termIt = std::next(nestedLoopIt);
+ if (termIt == parentBlock.end() || !isa<TerminatorOp>(termIt))
+ return false;
+
+ if (std::next(termIt) != parentBlock.end())
+ return false;
+
+ return true;
+ }();
+ if (!isPerfectlyNested)
+ return emitOpError() << "tiled loop nest must be perfectly nested";
+
+ if (parentIVs.contains(loop.getTripCount()))
+ return emitOpError() << "tiled loop nest must be rectangular";
+
+ parent = applyee;
+ }
+
+ // TODO: The tile sizes must be computed before the loop, but checking this
+ // requires dominance analysis. For instance:
+ //
+ // %canonloop = omp.new_cli
+ // omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
+ // // write to %x
+ // omp.terminator
+ // }
+ // %ts = llvm.load %x
+ // omp.tile <- (%canonloop) sizes(%ts : i32)
+
+ return success();
+}
+
+std::pair<unsigned, unsigned> TileOp ::getApplyeesODSOperandIndexAndLength() {
+ return getODSOperandIndexAndLength(odsIndex_applyees);
+}
+
+std::pair<unsigned, unsigned> TileOp::getGenerateesODSOperandIndexAndLength() {
+ return getODSOperandIndexAndLength(odsIndex_generatees);
+}
+
//===----------------------------------------------------------------------===//
// Critical construct (2.17.1)
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 4921a1990b6e8..171ac61dd66fe 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -3154,6 +3154,45 @@ applyUnrollHeuristic(omp::UnrollHeuristicOp op, llvm::IRBuilderBase &builder,
return success();
}
+/// Apply a `#pragma omp tile` / `!$omp tile` transformation using the
+/// OpenMPIRBuilder.
+static LogicalResult applyTile(omp::TileOp op, llvm::IRBuilderBase &builder,
+ LLVM::ModuleTranslation &moduleTranslation) {
+ llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
+ llvm::OpenMPIRBuilder::LocationDescription loc(builder);
+
+ SmallVector<llvm::CanonicalLoopInfo *> translatedLoops;
+ SmallVector<llvm::Value *> translatedSizes;
+
+ for (Value size : op.getSizes()) {
+ llvm::Value *translatedSize = moduleTranslation.lookupValue(size);
+ assert(translatedSize &&
+ "sizes clause arguments must already be translated");
+ translatedSizes.push_back(translatedSize);
+ }
+
+ for (Value applyee : op.getApplyees()) {
+ llvm::CanonicalLoopInfo *consBuilderCLI =
+ moduleTranslation.lookupOMPLoop(applyee);
+ assert(applyee && "Canonical loop must already been translated");
+ translatedLoops.push_back(consBuilderCLI);
+ }
+
+ auto generatedLoops =
+ ompBuilder->tileLoops(loc.DL, translatedLoops, translatedSizes);
+ if (!op.getGeneratees().empty()) {
+ for (auto [mlirLoop, genLoop] :
+ zip_equal(op.getGeneratees(), generatedLoops))
+ moduleTranslation.mapOmpLoop(mlirLoop, genLoop);
+ }
+
+ // CLIs can only be consumed once
+ for (Value applyee : op.getApplyees())
+ moduleTranslation.invalidateOmpLoop(applyee);
+
+ return success();
+}
+
/// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
static llvm::AtomicOrdering
convertAtomicOrdering(std::optional<omp::ClauseMemoryOrderKind> ao) {
@@ -6196,6 +6235,9 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
// the omp.canonical_loop.
return applyUnrollHeuristic(op, builder, moduleTranslation);
})
+ .Case([&](omp::TileOp op) {
+ return applyTile(op, builder, moduleTranslation);
+ })
.Case([&](omp::TargetAllocMemOp) {
return convertTargetAllocMemOp(*op, builder, moduleTranslation);
})
diff --git a/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir b/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir
index adadb8bbac49d..874e3922805ec 100644
--- a/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir
+++ b/mlir/test/Dialect/OpenMP/cli-canonical_loop.mlir
@@ -1,5 +1,5 @@
-// RUN: mlir-opt %s | FileCheck %s
-// RUN: mlir-opt %s | mlir-opt | FileCheck %s
+// RUN: mlir-opt %s | FileCheck %s --enable-var-scope
+// RUN: mlir-opt %s | mlir-opt | FileCheck %s --enable-var-scope
// CHECK-LABEL: @omp_canonloop_raw(
@@ -24,10 +24,10 @@ func.func @omp_canonloop_raw(%tc : i32) -> () {
func.func @omp_canonloop_sequential_raw(%tc : i32) -> () {
// CHECK-NEXT: %canonloop_s0 = omp.new_cli
%canonloop_s0 = "omp.new_cli" () : () -> (!omp.cli)
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%[[tc]]) {
"omp.canonical_loop" (%tc, %canonloop_s0) ({
^bb_first(%iv_first: i32):
- // CHECK-NEXT: = llvm.add %iv, %iv : i32
+ // CHECK-NEXT: = llvm.add %iv_s0, %iv_s0 : i32
%newval = llvm.add %iv_first, %iv_first : i32
// CHECK-NEXT: omp.terminator
omp.terminator
@@ -36,7 +36,7 @@ func.func @omp_canonloop_sequential_raw(%tc : i32) -> () {
// CHECK-NEXT: %canonloop_s1 = omp.new_cli
%canonloop_s1 = "omp.new_cli" () : () -> (!omp.cli)
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv : i32 in range(%[[tc]]) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%[[tc]]) {
"omp.canonical_loop" (%tc, %canonloop_s1) ({
^bb_second(%iv_second: i32):
// CHECK: omp.terminator
@@ -52,17 +52,17 @@ func.func @omp_canonloop_sequential_raw(%tc : i32) -> () {
// CHECK-LABEL: @omp_nested_canonloop_raw(
// CHECK-SAME: %[[tc_outer:.+]]: i32, %[[tc_inner:.+]]: i32)
func.func @omp_nested_canonloop_raw(%tc_outer : i32, %tc_inner : i32) -> () {
- // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+ // CHECK-NEXT: %canonloop = omp.new_cli
%outer = "omp.new_cli" () : () -> (!omp.cli)
- // CHECK-NEXT: %canonloop_s0_s0 = omp.new_cli
+ // CHECK-NEXT: %canonloop_d1 = omp.new_cli
%inner = "omp.new_cli" () : () -> (!omp.cli)
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc_outer]]) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc_outer]]) {
"omp.canonical_loop" (%tc_outer, %outer) ({
^bb_outer(%iv_outer: i32):
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%[[tc_inner]]) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc_inner]]) {
"omp.canonical_loop" (%tc_inner, %inner) ({
^bb_inner(%iv_inner: i32):
- // CHECK-NEXT: = llvm.add %iv, %iv_0 : i32
+ // CHECK-NEXT: = llvm.add %iv, %iv_d1 : i32
%newval = llvm.add %iv_outer, %iv_inner: i32
// CHECK-NEXT: omp.terminator
omp.terminator
@@ -108,16 +108,24 @@ func.func @omp_canonloop_constant_pretty() -> () {
func.func @omp_canonloop_sequential_pretty(%tc : i32) -> () {
// CHECK-NEXT: %canonloop_s0 = omp.new_cli
%canonloop_s0 = omp.new_cli
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
- omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%tc) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%tc) {
// CHECK-NEXT: omp.terminator
omp.terminator
}
// CHECK: %canonloop_s1 = omp.new_cli
%canonloop_s1 = omp.new_cli
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv : i32 in range(%[[tc]]) {
- omp.canonical_loop(%canonloop_s1) %iv_0 : i32 in range(%tc) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%tc) {
+ // CHECK-NEXT: omp.terminator
+ omp.terminator
+ }
+
+ // CHECK: %canonloop_s2 = omp.new_cli
+ %canonloop_s2 = omp.new_cli
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s2) %iv_s2 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_s2) %iv_s2 : i32 in range(%tc) {
// CHECK-NEXT: omp.terminator
omp.terminator
}
@@ -126,17 +134,17 @@ func.func @omp_canonloop_sequential_pretty(%tc : i32) -> () {
}
-// CHECK-LABEL: @omp_canonloop_nested_pretty(
+// CHECK-LABEL: @omp_canonloop_2d_nested_pretty(
// CHECK-SAME: %[[tc:.+]]: i32)
-func.func @omp_canonloop_nested_pretty(%tc : i32) -> () {
- // CHECK-NEXT: %canonloop_s0 = omp.new_cli
- %canonloop_s0 = omp.new_cli
- // CHECK-NEXT: %canonloop_s0_s0 = omp.new_cli
- %canonloop_s0_s0 = omp.new_cli
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
- omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%tc) {
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%[[tc]]) {
- omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%tc) {
+func.func @omp_canonloop_2d_nested_pretty(%tc : i32) -> () {
+ // CHECK-NEXT: %canonloop = omp.new_cli
+ %canonloop = omp.new_cli
+ // CHECK-NEXT: %canonloop_d1 = omp.new_cli
+ %canonloop_d1 = omp.new_cli
+ // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%tc) {
// CHECK: omp.terminator
omp.terminator
}
@@ -147,6 +155,77 @@ func.func @omp_canonloop_nested_pretty(%tc : i32) -> () {
}
+// CHECK-LABEL: @omp_canonloop_3d_nested_pretty(
+// CHECK-SAME: %[[tc:.+]]: i32)
+func.func @omp_canonloop_3d_nested_pretty(%tc : i32) -> () {
+ // CHECK: %canonloop = omp.new_cli
+ %canonloop = omp.new_cli
+ // CHECK: %canonloop_d1 = omp.new_cli
+ %canonloop_d1 = omp.new_cli
+ // CHECK: %canonloop_d2 = omp.new_cli
+ %canonloop_d2 = omp.new_cli
+ // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_d1) %iv_1d : i32 in range(%tc) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_d2) %iv_d2 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_d2) %iv_d2 : i32 in range(%tc) {
+ // CHECK-NEXT: omp.terminator
+ omp.terminator
+ // CHECK-NEXT: }
+ }
+ // CHECK-NEXT: omp.terminator
+ omp.terminator
+ // CHECK-NEXT: }
+ }
+ // CHECK-NEXT: omp.terminator
+ omp.terminator
+ }
+
+ return
+}
+
+
+// CHECK-LABEL: @omp_canonloop_sequential_nested_pretty(
+// CHECK-SAME: %[[tc:.+]]: i32)
+func.func @omp_canonloop_sequential_nested_pretty(%tc : i32) -> () {
+ // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+ %canonloop_s0 = omp.new_cli
+ // CHECK-NEXT: %canonloop_s0_d1 = omp.new_cli
+ %canonloop_s0_d1 = omp.new_cli
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_s0) %iv_s0 : i32 in range(%tc) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_d1) %iv_s0_d1 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_s0_d1) %iv_s0_d1 : i32 in range(%tc) {
+ // CHECK-NEXT: omp.terminator
+ omp.terminator
+ // CHECK-NEXT: }
+ }
+ // CHECK-NEXT: omp.terminator
+ omp.terminator
+ // CHECK-NEXT: }
+ }
+
+ // CHECK-NEXT: %canonloop_s1 = omp.new_cli
+ %canonloop_s1 = omp.new_cli
+ // CHECK-NEXT: %canonloop_s1_d1 = omp.new_cli
+ %canonloop_s1_d1 = omp.new_cli
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_s1) %iv_s1 : i32 in range(%tc) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_s1_d1) %iv_s1_d1 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop_s1_d1) %iv_s1d1 : i32 in range(%tc) {
+ // CHECK-NEXT: omp.terminator
+ omp.terminator
+ // CHECK-NEXT: }
+ }
+ // CHECK-NEXT: omp.terminator
+ omp.terminator
+ }
+
+ return
+}
+
+
// CHECK-LABEL: @omp_newcli_unused(
// CHECK-SAME: )
func.func @omp_newcli_unused() -> () {
diff --git a/mlir/test/Dialect/OpenMP/cli-tile.mlir b/mlir/test/Dialect/OpenMP/cli-tile.mlir
new file mode 100644
index 0000000000000..73d54784c52b7
--- /dev/null
+++ b/mlir/test/Dialect/OpenMP/cli-tile.mlir
@@ -0,0 +1,138 @@
+// RUN: mlir-opt %s | FileCheck %s --enable-var-scope
+// RUN: mlir-opt %s | mlir-opt | FileCheck %s --enable-var-scope
+
+
+// Raw syntax check (MLIR output is always pretty-printed)
+// CHECK-LABEL: @omp_tile_raw(
+// CHECK-SAME: %[[tc:.+]]: i32, %[[ts:.+]]: i32) {
+func.func @omp_tile_raw(%tc : i32, %ts : i32) -> () {
+ // CHECK-NEXT: %canonloop = omp.new_cli
+ %canonloop = "omp.new_cli" () : () -> (!omp.cli)
+ // CHECK-NEXT: %grid1 = omp.new_cli
+ %grid = "omp.new_cli" () : () -> (!omp.cli)
+ // CHECK-NEXT: %intratile1 = omp.new_cli
+ %intratile = "omp.new_cli" () : () -> (!omp.cli)
+ // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
+ "omp.canonical_loop" (%tc, %canonloop) ({
+ ^bb0(%iv: i32):
+ // CHECK: omp.terminator
+ omp.terminator
+ }) : (i32, !omp.cli) -> ()
+ // CHECK: omp.tile (%grid1, %intratile1) <- (%canonloop) sizes(%[[ts]] : i32)
+ "omp.tile"(%grid, %intratile, %canonloop, %ts) <{operandSegmentSizes = array<i32: 2, 1, 1>}> : (!omp.cli, !omp.cli, !omp.cli, i32) -> ()
+ //"omp.tile" (%canonloop) : (!omp.cli) -> ()
+ return
+}
+
+
+// Pretty syntax check
+// CHECK-LABEL: @omp_tile_pretty(
+// CHECK-SAME: %[[tc:.+]]: i32, %[[ts:.+]]: i32) {
+func.func @omp_tile_pretty(%tc : i32, %ts : i32) -> () {
+ // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli
+ %canonloop = omp.new_cli
+ // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli
+ %grid = omp.new_cli
+ // CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli
+ %intratile = omp.new_cli
+ // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+ // CHECK: omp.tile (%grid1, %intratile1) <- (%canonloop) sizes(%[[ts]] : i32)
+ omp.tile(%grid, %intratile) <- (%canonloop) sizes(%ts : i32)
+ return
+}
+
+
+// Specifying the generatees for omp.tile is optional
+// CHECK-LABEL: @omp_tile_optionalgen_pretty(
+// CHECK-SAME: %[[tc:.+]]: i32, %[[ts:.+]]: i32) {
+func.func @omp_tile_optionalgen_pretty(%tc : i32, %ts : i32) -> () {
+ // CHECK-NEXT: %canonloop = omp.new_cli
+ %canonloop = omp.new_cli
+ // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+ // CHECK: omp.tile <- (%canonloop) sizes(%[[ts]] : i32)
+ omp.tile <- (%canonloop) sizes(%ts : i32)
+ return
+}
+
+
+// Two-dimensional tiling
+// CHECK-LABEL: @omp_tile_2d_pretty(
+// CHECK-SAME: %[[tc1:.+]]: i32, %[[tc2:.+]]: i32, %[[ts1:.+]]: i32, %[[ts2:.+]]: i32) {
+func.func @omp_tile_2d_pretty(%tc1 : i32, %tc2 : i32, %ts1 : i32, %ts2 : i32) -> () {
+ // CHECK-NEXT: %canonloop = omp.new_cli
+ %cli_outer = omp.new_cli
+ // CHECK-NEXT: %canonloop_d1 = omp.new_cli
+ %cli_inner = omp.new_cli
+ // CHECK-NEXT: %grid1 = omp.new_cli
+ %grid1 = omp.new_cli
+ // CHECK-NEXT: %grid2 = omp.new_cli
+ %grid2 = omp.new_cli
+ // CHECK-NEXT: %intratile1 = omp.new_cli
+ %intratile1 = omp.new_cli
+ // CHECK-NEXT: %intratile2 = omp.new_cli
+ %intratile2 = omp.new_cli
+ // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc1]]) {
+ omp.canonical_loop(%cli_outer) %iv_outer : i32 in range(%tc1) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc2]]) {
+ omp.canonical_loop(%cli_inner) %iv_inner : i32 in range(%tc2) {
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+ // CHECK: omp.tile (%grid1, %grid2, %intratile1, %intratile2) <- (%canonloop, %canonloop_d1) sizes(%[[ts1]], %[[ts2]] : i32, i32)
+ omp.tile (%grid1, %grid2, %intratile1, %intratile2) <- (%cli_outer, %cli_inner) sizes(%ts1, %ts2 : i32, i32)
+ return
+}
+
+
+// Three-dimensional tiling
+// CHECK-LABEL: @omp_tile_3d_pretty(
+// CHECK-SAME: %[[tc:.+]]: i32, %[[ts:.+]]: i32) {
+func.func @omp_tile_3d_pretty(%tc : i32, %ts : i32) -> () {
+ // CHECK-NEXT: %canonloop = omp.new_cli
+ %cli_outer = omp.new_cli
+ // CHECK-NEXT: %canonloop_d1 = omp.new_cli
+ %cli_middle = omp.new_cli
+ // CHECK-NEXT: %canonloop_d2 = omp.new_cli
+ %cli_inner = omp.new_cli
+ // CHECK-NEXT: %grid1 = omp.new_cli
+ %grid1 = omp.new_cli
+ // CHECK-NEXT: %grid2 = omp.new_cli
+ %grid2 = omp.new_cli
+ // CHECK-NEXT: %grid3 = omp.new_cli
+ %grid3 = omp.new_cli
+ // CHECK-NEXT: %intratile1 = omp.new_cli
+ %intratile1 = omp.new_cli
+ // CHECK-NEXT: %intratile2 = omp.new_cli
+ %intratile2 = omp.new_cli
+ // CHECK-NEXT: %intratile3 = omp.new_cli
+ %intratile3 = omp.new_cli
+ // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%cli_outer) %iv_outer : i32 in range(%tc) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%cli_middle) %iv_middle : i32 in range(%tc) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_d2) %iv_d2 : i32 in range(%[[tc]]) {
+ omp.canonical_loop(%cli_inner) %iv_inner : i32 in range(%tc) {
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+ // CHECK: omp.terminator
+ omp.terminator
+ }
+ // CHECK: omp.tile (%grid1, %grid2, %grid3, %intratile1, %intratile2, %intratile3) <- (%canonloop, %canonloop_d1, %canonloop_d2) sizes(%[[ts]], %[[ts]], %[[ts]] : i32, i32, i32)
+ omp.tile (%grid1, %grid2, %grid3, %intratile1, %intratile2, %intratile3) <- (%cli_outer, %cli_middle, %cli_inner) sizes(%ts, %ts, %ts: i32, i32, i32)
+ return
+}
diff --git a/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir b/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir
index cda7d0b500166..16884f4245e76 100644
--- a/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir
+++ b/mlir/test/Dialect/OpenMP/cli-unroll-heuristic.mlir
@@ -1,18 +1,18 @@
-// RUN: mlir-opt %s | FileCheck %s
-// RUN: mlir-opt %s | mlir-opt | FileCheck %s
+// RUN: mlir-opt %s | FileCheck %s --enable-var-scope
+// RUN: mlir-opt %s | mlir-opt | FileCheck %s --enable-var-scope
// CHECK-LABEL: @omp_unroll_heuristic_raw(
// CHECK-SAME: %[[tc:.+]]: i32) {
func.func @omp_unroll_heuristic_raw(%tc : i32) -> () {
- // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+ // CHECK-NEXT: %canonloop = omp.new_cli
%canonloop = "omp.new_cli" () : () -> (!omp.cli)
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
"omp.canonical_loop" (%tc, %canonloop) ({
^bb0(%iv: i32):
omp.terminator
}) : (i32, !omp.cli) -> ()
- // CHECK: omp.unroll_heuristic(%canonloop_s0)
+ // CHECK: omp.unroll_heuristic(%canonloop)
"omp.unroll_heuristic" (%canonloop) : (!omp.cli) -> ()
return
}
@@ -22,12 +22,12 @@ func.func @omp_unroll_heuristic_raw(%tc : i32) -> () {
// CHECK-SAME: %[[tc:.+]]: i32) {
func.func @omp_unroll_heuristic_pretty(%tc : i32) -> () {
// CHECK-NEXT: %[[CANONLOOP:.+]] = omp.new_cli
- %canonloop = "omp.new_cli" () : () -> (!omp.cli)
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
+ %canonloop = omp.new_cli
+ // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
omp.terminator
}
- // CHECK: omp.unroll_heuristic(%canonloop_s0)
+ // CHECK: omp.unroll_heuristic(%canonloop)
omp.unroll_heuristic(%canonloop)
return
}
@@ -36,13 +36,13 @@ func.func @omp_unroll_heuristic_pretty(%tc : i32) -> () {
// CHECK-LABEL: @omp_unroll_heuristic_nested_pretty(
// CHECK-SAME: %[[tc:.+]]: i32) {
func.func @omp_unroll_heuristic_nested_pretty(%tc : i32) -> () {
- // CHECK-NEXT: %canonloop_s0 = omp.new_cli
+ // CHECK-NEXT: %canonloop = omp.new_cli
%cli_outer = omp.new_cli
- // CHECK-NEXT: %canonloop_s0_s0 = omp.new_cli
+ // CHECK-NEXT: %canonloop_d1 = omp.new_cli
%cli_inner = omp.new_cli
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s0) %iv : i32 in range(%[[tc]]) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop) %iv : i32 in range(%[[tc]]) {
omp.canonical_loop(%cli_outer) %iv_outer : i32 in range(%tc) {
- // CHECK-NEXT: omp.canonical_loop(%canonloop_s0_s0) %iv_0 : i32 in range(%[[tc]]) {
+ // CHECK-NEXT: omp.canonical_loop(%canonloop_d1) %iv_d1 : i32 in range(%[[tc]]) {
omp.canonical_loop(%cli_inner) %iv_inner : i32 in range(%tc) {
// CHECK: omp.terminator
omp.terminator
@@ -51,9 +51,9 @@ func.func @omp_unroll_heuristic_nested_pretty(%tc : i32) -> () {
omp.terminator
}
- // CHECK: omp.unroll_heuristic(%canonloop_s0)
+ // CHECK: omp.unroll_heuristic(%canonloop)
omp.unroll_heuristic(%cli_outer)
- // CHECK-NEXT: omp.unroll_heuristic(%canonloop_s0_s0)
+ // CHECK-NEXT: omp.unroll_heuristic(%canonloop_d1)
omp.unroll_heuristic(%cli_inner)
return
}
diff --git a/mlir/test/Dialect/OpenMP/invalid-tile.mlir b/mlir/test/Dialect/OpenMP/invalid-tile.mlir
new file mode 100644
index 0000000000000..e63a062d810ed
--- /dev/null
+++ b/mlir/test/Dialect/OpenMP/invalid-tile.mlir
@@ -0,0 +1,119 @@
+// RUN: mlir-opt -split-input-file -verify-diagnostics %s
+
+
+func.func @missing_sizes(%tc : i32, %ts : i32) {
+ %canonloop = omp.new_cli
+ omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
+ omp.terminator
+ }
+
+ // expected-error at +1 {{'omp.tile' op there must be one tile size for each applyee}}
+ omp.tile <-(%canonloop)
+
+ llvm.return
+}
+
+// -----
+
+func.func @no_loop(%tc : i32, %ts : i32) {
+ // expected-error at +1 {{'omp.tile' op must apply to at least one loop}}
+ omp.tile <-()
+
+ return
+}
+
+// -----
+
+func.func @missing_generator(%tc : i32, %ts : i32) {
+ // expected-error at +1 {{'omp.new_cli' op CLI has no generator}}
+ %canonloop = omp.new_cli
+
+ // expected-note at +1 {{see consumer here: "omp.tile"(%0, %arg1) <{operandSegmentSizes = array<i32: 0, 1, 1>}> : (!omp.cli, i32) -> ()}}
+ omp.tile <-(%canonloop) sizes(%ts : i32)
+
+ return
+}
+
+// -----
+
+func.func @insufficient_sizes(%tc : i32, %ts : i32) {
+ %canonloop1 = omp.new_cli
+ %canonloop2 = omp.new_cli
+ omp.canonical_loop(%canonloop1) %iv : i32 in range(%tc) {
+ omp.terminator
+ }
+ omp.canonical_loop(%canonloop2) %iv : i32 in range(%tc) {
+ omp.terminator
+ }
+
+ // expected-error at +1 {{'omp.tile' op there must be one tile size for each applyee}}
+ omp.tile <-(%canonloop1, %canonloop2) sizes(%ts : i32)
+
+ llvm.return
+}
+
+// -----
+
+func.func @insufficient_applyees(%tc : i32, %ts : i32) {
+ %canonloop = omp.new_cli
+ omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
+ omp.terminator
+ }
+
+ // expected-error at +1 {{omp.tile' op there must be one tile size for each applyee}}
+ omp.tile <- (%canonloop) sizes(%ts, %ts : i32, i32)
+
+ return
+}
+
+// -----
+
+func.func @insufficient_generatees(%tc : i32, %ts : i32) {
+ %canonloop = omp.new_cli
+ %grid = omp.new_cli
+ omp.canonical_loop(%canonloop) %iv : i32 in range(%tc) {
+ omp.terminator
+ }
+
+ // expected-error at +1 {{'omp.tile' op expecting two times the number of generatees than applyees}}
+ omp.tile (%grid) <- (%canonloop) sizes(%ts : i32)
+
+ return
+}
+
+// -----
+
+func.func @not_perfectly_nested(%tc : i32, %ts : i32) {
+ %canonloop1 = omp.new_cli
+ %canonloop2 = omp.new_cli
+ omp.canonical_loop(%canonloop1) %iv1 : i32 in range(%tc) {
+ %v = arith.constant 42 : i32
+ omp.canonical_loop(%canonloop2) %iv2 : i32 in range(%tc) {
+ omp.terminator
+ }
+ omp.terminator
+ }
+
+ // expected-error at +1 {{'omp.tile' op tiled loop nest must be perfectly nested}}
+ omp.tile <-(%canonloop1, %canonloop2) sizes(%ts, %ts : i32, i32)
+
+ llvm.return
+}
+
+// -----
+
+func.func @non_nectangular(%tc : i32, %ts : i32) {
+ %canonloop1 = omp.new_cli
+ %canonloop2 = omp.new_cli
+ omp.canonical_loop(%canonloop1) %iv1 : i32 in range(%tc) {
+ omp.canonical_loop(%canonloop2) %iv2 : i32 in range(%iv1) {
+ omp.terminator
+ }
+ omp.terminator
+ }
+
+ // expected-error at +1 {{'omp.tile' op tiled loop nest must be rectangular}}
+ omp.tile <-(%canonloop1, %canonloop2) sizes(%ts, %ts : i32, i32)
+
+ llvm.return
+}
diff --git a/mlir/test/Target/LLVMIR/openmp-cli-tile01.mlir b/mlir/test/Target/LLVMIR/openmp-cli-tile01.mlir
new file mode 100644
index 0000000000000..4ac4f02103e8c
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-cli-tile01.mlir
@@ -0,0 +1,101 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+
+
+llvm.func @tile_trivial_loop(%baseptr: !llvm.ptr, %tc: i32, %ts: i32) -> () {
+ %literal_cli = omp.new_cli
+ omp.canonical_loop(%literal_cli) %iv : i32 in range(%tc) {
+ %ptr = llvm.getelementptr inbounds %baseptr[%iv] : (!llvm.ptr, i32) -> !llvm.ptr, f32
+ %val = llvm.mlir.constant(42.0 : f32) : f32
+ llvm.store %val, %ptr : f32, !llvm.ptr
+ omp.terminator
+ }
+ omp.tile <- (%literal_cli) sizes(%ts : i32)
+ llvm.return
+}
+
+
+// CHECK: ; ModuleID = 'LLVMDialectModule'
+// CHECK-NEXT: source_filename = "LLVMDialectModule"
+// CHECK-EMPTY:
+// CHECK-NEXT: define void @tile_trivial_loop(ptr %0, i32 %1, i32 %2) {
+// CHECK-NEXT: br label %omp_omp.loop.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.preheader: ; preds = %3
+// CHECK-NEXT: %4 = udiv i32 %1, %2
+// CHECK-NEXT: %5 = urem i32 %1, %2
+// CHECK-NEXT: %6 = icmp ne i32 %5, 0
+// CHECK-NEXT: %7 = zext i1 %6 to i32
+// CHECK-NEXT: %omp_floor0.tripcount = add nuw i32 %4, %7
+// CHECK-NEXT: br label %omp_floor0.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.preheader: ; preds = %omp_omp.loop.preheader
+// CHECK-NEXT: br label %omp_floor0.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.header: ; preds = %omp_floor0.inc, %omp_floor0.preheader
+// CHECK-NEXT: %omp_floor0.iv = phi i32 [ 0, %omp_floor0.preheader ], [ %omp_floor0.next, %omp_floor0.inc ]
+// CHECK-NEXT: br label %omp_floor0.cond
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.cond: ; preds = %omp_floor0.header
+// CHECK-NEXT: %omp_floor0.cmp = icmp ult i32 %omp_floor0.iv, %omp_floor0.tripcount
+// CHECK-NEXT: br i1 %omp_floor0.cmp, label %omp_floor0.body, label %omp_floor0.exit
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.body: ; preds = %omp_floor0.cond
+// CHECK-NEXT: %8 = icmp eq i32 %omp_floor0.iv, %4
+// CHECK-NEXT: %9 = select i1 %8, i32 %5, i32 %2
+// CHECK-NEXT: br label %omp_tile0.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.preheader: ; preds = %omp_floor0.body
+// CHECK-NEXT: br label %omp_tile0.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.header: ; preds = %omp_tile0.inc, %omp_tile0.preheader
+// CHECK-NEXT: %omp_tile0.iv = phi i32 [ 0, %omp_tile0.preheader ], [ %omp_tile0.next, %omp_tile0.inc ]
+// CHECK-NEXT: br label %omp_tile0.cond
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.cond: ; preds = %omp_tile0.header
+// CHECK-NEXT: %omp_tile0.cmp = icmp ult i32 %omp_tile0.iv, %9
+// CHECK-NEXT: br i1 %omp_tile0.cmp, label %omp_tile0.body, label %omp_tile0.exit
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.body: ; preds = %omp_tile0.cond
+// CHECK-NEXT: %10 = mul nuw i32 %2, %omp_floor0.iv
+// CHECK-NEXT: %11 = add nuw i32 %10, %omp_tile0.iv
+// CHECK-NEXT: br label %omp_omp.loop.body
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.body: ; preds = %omp_tile0.body
+// CHECK-NEXT: br label %omp.loop.region
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.loop.region: ; preds = %omp_omp.loop.body
+// CHECK-NEXT: %12 = getelementptr inbounds float, ptr %0, i32 %11
+// CHECK-NEXT: store float 4.200000e+01, ptr %12, align 4
+// CHECK-NEXT: br label %omp.region.cont
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.region.cont: ; preds = %omp.loop.region
+// CHECK-NEXT: br label %omp_tile0.inc
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.inc: ; preds = %omp.region.cont
+// CHECK-NEXT: %omp_tile0.next = add nuw i32 %omp_tile0.iv, 1
+// CHECK-NEXT: br label %omp_tile0.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.exit: ; preds = %omp_tile0.cond
+// CHECK-NEXT: br label %omp_tile0.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.after: ; preds = %omp_tile0.exit
+// CHECK-NEXT: br label %omp_floor0.inc
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.inc: ; preds = %omp_tile0.after
+// CHECK-NEXT: %omp_floor0.next = add nuw i32 %omp_floor0.iv, 1
+// CHECK-NEXT: br label %omp_floor0.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.exit: ; preds = %omp_floor0.cond
+// CHECK-NEXT: br label %omp_floor0.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.after: ; preds = %omp_floor0.exit
+// CHECK-NEXT: br label %omp_omp.loop.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.after: ; preds = %omp_floor0.after
+// CHECK-NEXT: ret void
+// CHECK-NEXT: }
+// CHECK-EMPTY:
+// CHECK-NEXT: !llvm.module.flags = !{!0}
+// CHECK-EMPTY:
+// CHECK-NEXT: !0 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/mlir/test/Target/LLVMIR/openmp-cli-tile02.mlir b/mlir/test/Target/LLVMIR/openmp-cli-tile02.mlir
new file mode 100644
index 0000000000000..6fad81cd0c299
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/openmp-cli-tile02.mlir
@@ -0,0 +1,190 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+
+llvm.func @tile_2d_loop(%baseptr: !llvm.ptr, %tc1: i32, %tc2: i32, %ts1: i32, %ts2: i32) -> () {
+ %literal_outer = omp.new_cli
+ %literal_inner = omp.new_cli
+ omp.canonical_loop(%literal_outer) %iv1 : i32 in range(%tc1) {
+ omp.canonical_loop(%literal_inner) %iv2 : i32 in range(%tc2) {
+ %idx = llvm.add %iv1, %iv2 : i32
+ %ptr = llvm.getelementptr inbounds %baseptr[%idx] : (!llvm.ptr, i32) -> !llvm.ptr, f32
+ %val = llvm.mlir.constant(42.0 : f32) : f32
+ llvm.store %val, %ptr : f32, !llvm.ptr
+ omp.terminator
+ }
+ omp.terminator
+ }
+ omp.tile <- (%literal_outer, %literal_inner) sizes(%ts1, %ts2 : i32,i32)
+ llvm.return
+}
+
+
+// CHECK: ; ModuleID = 'LLVMDialectModule'
+// CHECK-NEXT: source_filename = "LLVMDialectModule"
+// CHECK-EMPTY:
+// CHECK-NEXT: define void @tile_2d_loop(ptr %0, i32 %1, i32 %2, i32 %3, i32 %4) {
+// CHECK-NEXT: br label %omp_omp.loop.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.preheader: ; preds = %5
+// CHECK-NEXT: %6 = udiv i32 %1, %3
+// CHECK-NEXT: %7 = urem i32 %1, %3
+// CHECK-NEXT: %8 = icmp ne i32 %7, 0
+// CHECK-NEXT: %9 = zext i1 %8 to i32
+// CHECK-NEXT: %omp_floor0.tripcount = add nuw i32 %6, %9
+// CHECK-NEXT: %10 = udiv i32 %2, %4
+// CHECK-NEXT: %11 = urem i32 %2, %4
+// CHECK-NEXT: %12 = icmp ne i32 %11, 0
+// CHECK-NEXT: %13 = zext i1 %12 to i32
+// CHECK-NEXT: %omp_floor1.tripcount = add nuw i32 %10, %13
+// CHECK-NEXT: br label %omp_floor0.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.header: ; preds = %omp_omp.loop.inc
+// CHECK-NEXT: %omp_omp.loop.iv = phi i32 [ %omp_omp.loop.next, %omp_omp.loop.inc ]
+// CHECK-NEXT: br label %omp_omp.loop.cond
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.cond: ; preds = %omp_omp.loop.header
+// CHECK-NEXT: %omp_omp.loop.cmp = icmp ult i32 %19, %1
+// CHECK-NEXT: br i1 %omp_omp.loop.cmp, label %omp_omp.loop.body, label %omp_omp.loop.exit
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.body: ; preds = %omp_tile1.body, %omp_omp.loop.cond
+// CHECK-NEXT: br label %omp.loop.region
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.loop.region: ; preds = %omp_omp.loop.body
+// CHECK-NEXT: br label %omp_omp.loop.preheader1
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.preheader1: ; preds = %omp.loop.region
+// CHECK-NEXT: br label %omp_omp.loop.body4
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.preheader: ; preds = %omp_omp.loop.preheader
+// CHECK-NEXT: br label %omp_floor0.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.header: ; preds = %omp_floor0.inc, %omp_floor0.preheader
+// CHECK-NEXT: %omp_floor0.iv = phi i32 [ 0, %omp_floor0.preheader ], [ %omp_floor0.next, %omp_floor0.inc ]
+// CHECK-NEXT: br label %omp_floor0.cond
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.cond: ; preds = %omp_floor0.header
+// CHECK-NEXT: %omp_floor0.cmp = icmp ult i32 %omp_floor0.iv, %omp_floor0.tripcount
+// CHECK-NEXT: br i1 %omp_floor0.cmp, label %omp_floor0.body, label %omp_floor0.exit
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.body: ; preds = %omp_floor0.cond
+// CHECK-NEXT: br label %omp_floor1.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor1.preheader: ; preds = %omp_floor0.body
+// CHECK-NEXT: br label %omp_floor1.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor1.header: ; preds = %omp_floor1.inc, %omp_floor1.preheader
+// CHECK-NEXT: %omp_floor1.iv = phi i32 [ 0, %omp_floor1.preheader ], [ %omp_floor1.next, %omp_floor1.inc ]
+// CHECK-NEXT: br label %omp_floor1.cond
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor1.cond: ; preds = %omp_floor1.header
+// CHECK-NEXT: %omp_floor1.cmp = icmp ult i32 %omp_floor1.iv, %omp_floor1.tripcount
+// CHECK-NEXT: br i1 %omp_floor1.cmp, label %omp_floor1.body, label %omp_floor1.exit
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor1.body: ; preds = %omp_floor1.cond
+// CHECK-NEXT: %14 = icmp eq i32 %omp_floor0.iv, %6
+// CHECK-NEXT: %15 = select i1 %14, i32 %7, i32 %3
+// CHECK-NEXT: %16 = icmp eq i32 %omp_floor1.iv, %10
+// CHECK-NEXT: %17 = select i1 %16, i32 %11, i32 %4
+// CHECK-NEXT: br label %omp_tile0.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.preheader: ; preds = %omp_floor1.body
+// CHECK-NEXT: br label %omp_tile0.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.header: ; preds = %omp_tile0.inc, %omp_tile0.preheader
+// CHECK-NEXT: %omp_tile0.iv = phi i32 [ 0, %omp_tile0.preheader ], [ %omp_tile0.next, %omp_tile0.inc ]
+// CHECK-NEXT: br label %omp_tile0.cond
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.cond: ; preds = %omp_tile0.header
+// CHECK-NEXT: %omp_tile0.cmp = icmp ult i32 %omp_tile0.iv, %15
+// CHECK-NEXT: br i1 %omp_tile0.cmp, label %omp_tile0.body, label %omp_tile0.exit
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.body: ; preds = %omp_tile0.cond
+// CHECK-NEXT: br label %omp_tile1.preheader
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile1.preheader: ; preds = %omp_tile0.body
+// CHECK-NEXT: br label %omp_tile1.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile1.header: ; preds = %omp_tile1.inc, %omp_tile1.preheader
+// CHECK-NEXT: %omp_tile1.iv = phi i32 [ 0, %omp_tile1.preheader ], [ %omp_tile1.next, %omp_tile1.inc ]
+// CHECK-NEXT: br label %omp_tile1.cond
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile1.cond: ; preds = %omp_tile1.header
+// CHECK-NEXT: %omp_tile1.cmp = icmp ult i32 %omp_tile1.iv, %17
+// CHECK-NEXT: br i1 %omp_tile1.cmp, label %omp_tile1.body, label %omp_tile1.exit
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile1.body: ; preds = %omp_tile1.cond
+// CHECK-NEXT: %18 = mul nuw i32 %3, %omp_floor0.iv
+// CHECK-NEXT: %19 = add nuw i32 %18, %omp_tile0.iv
+// CHECK-NEXT: %20 = mul nuw i32 %4, %omp_floor1.iv
+// CHECK-NEXT: %21 = add nuw i32 %20, %omp_tile1.iv
+// CHECK-NEXT: br label %omp_omp.loop.body
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.body4: ; preds = %omp_omp.loop.preheader1
+// CHECK-NEXT: br label %omp.loop.region12
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.loop.region12: ; preds = %omp_omp.loop.body4
+// CHECK-NEXT: %22 = add i32 %19, %21
+// CHECK-NEXT: %23 = getelementptr inbounds float, ptr %0, i32 %22
+// CHECK-NEXT: store float 4.200000e+01, ptr %23, align 4
+// CHECK-NEXT: br label %omp.region.cont11
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.region.cont11: ; preds = %omp.loop.region12
+// CHECK-NEXT: br label %omp_tile1.inc
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile1.inc: ; preds = %omp.region.cont11
+// CHECK-NEXT: %omp_tile1.next = add nuw i32 %omp_tile1.iv, 1
+// CHECK-NEXT: br label %omp_tile1.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile1.exit: ; preds = %omp_tile1.cond
+// CHECK-NEXT: br label %omp_tile1.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile1.after: ; preds = %omp_tile1.exit
+// CHECK-NEXT: br label %omp_tile0.inc
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.inc: ; preds = %omp_tile1.after
+// CHECK-NEXT: %omp_tile0.next = add nuw i32 %omp_tile0.iv, 1
+// CHECK-NEXT: br label %omp_tile0.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.exit: ; preds = %omp_tile0.cond
+// CHECK-NEXT: br label %omp_tile0.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_tile0.after: ; preds = %omp_tile0.exit
+// CHECK-NEXT: br label %omp_floor1.inc
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor1.inc: ; preds = %omp_tile0.after
+// CHECK-NEXT: %omp_floor1.next = add nuw i32 %omp_floor1.iv, 1
+// CHECK-NEXT: br label %omp_floor1.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor1.exit: ; preds = %omp_floor1.cond
+// CHECK-NEXT: br label %omp_floor1.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor1.after: ; preds = %omp_floor1.exit
+// CHECK-NEXT: br label %omp_floor0.inc
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.inc: ; preds = %omp_floor1.after
+// CHECK-NEXT: %omp_floor0.next = add nuw i32 %omp_floor0.iv, 1
+// CHECK-NEXT: br label %omp_floor0.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.exit: ; preds = %omp_floor0.cond
+// CHECK-NEXT: br label %omp_floor0.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_floor0.after: ; preds = %omp_floor0.exit
+// CHECK-NEXT: br label %omp_omp.loop.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp.region.cont: ; No predecessors!
+// CHECK-NEXT: br label %omp_omp.loop.inc
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.inc: ; preds = %omp.region.cont
+// CHECK-NEXT: %omp_omp.loop.next = add nuw i32 %19, 1
+// CHECK-NEXT: br label %omp_omp.loop.header
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.exit: ; preds = %omp_omp.loop.cond
+// CHECK-NEXT: br label %omp_omp.loop.after
+// CHECK-EMPTY:
+// CHECK-NEXT: omp_omp.loop.after: ; preds = %omp_floor0.after, %omp_omp.loop.exit
+// CHECK-NEXT: ret void
+// CHECK-NEXT: }
+// CHECK-EMPTY:
+// CHECK-NEXT: !llvm.module.flags = !{!0}
+// CHECK-EMPTY:
+// CHECK-NEXT: !0 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/mlir/test/mlir-tblgen/op-format-invalid.td b/mlir/test/mlir-tblgen/op-format-invalid.td
index 2f29543f67381..0a022ad43a749 100644
--- a/mlir/test/mlir-tblgen/op-format-invalid.td
+++ b/mlir/test/mlir-tblgen/op-format-invalid.td
@@ -307,7 +307,7 @@ def DirectiveTypeZOperandInvalidI : TestFormat_Op<[{
def LiteralInvalidA : TestFormat_Op<[{
`a:`
}]>;
-// CHECK: error: expected valid literal but got '1': single character literal must be a letter or one of '_:,=<>()[]{}?+*'
+// CHECK: error: expected valid literal but got '1': single character literal must be a letter or one of '_:,=<>()[]{}?+-*'
def LiteralInvalidB : TestFormat_Op<[{
`1`
}]>;
diff --git a/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp b/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp
index a1899a81afcce..8dd971374fa21 100644
--- a/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp
+++ b/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp
@@ -403,6 +403,7 @@ void DefFormat::genLiteralParser(StringRef value, FmtContext &ctx,
.Case("]", "RSquare")
.Case("?", "Question")
.Case("+", "Plus")
+ .Case("-", "Minus")
.Case("*", "Star")
.Case("...", "Ellipsis")
<< "()";
diff --git a/mlir/tools/mlir-tblgen/FormatGen.cpp b/mlir/tools/mlir-tblgen/FormatGen.cpp
index 4dfdde2146679..04d3ed1f3b70d 100644
--- a/mlir/tools/mlir-tblgen/FormatGen.cpp
+++ b/mlir/tools/mlir-tblgen/FormatGen.cpp
@@ -518,7 +518,7 @@ bool mlir::tblgen::isValidLiteral(StringRef value,
// If there is only one character, this must either be punctuation or a
// single character bare identifier.
if (value.size() == 1) {
- StringRef bare = "_:,=<>()[]{}?+*";
+ StringRef bare = "_:,=<>()[]{}?+-*";
if (isalpha(front) || bare.contains(front))
return true;
if (emitError)
diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp
index 0d113b3748354..ccf21d16005af 100644
--- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp
@@ -852,6 +852,7 @@ static void genLiteralParser(StringRef value, MethodBody &body) {
.Case("]", "RSquare()")
.Case("?", "Question()")
.Case("+", "Plus()")
+ .Case("-", "Minus()")
.Case("*", "Star()")
.Case("...", "Ellipsis()");
}
>From bfe9c6b642ebc01f113dbf0a574e424e83f7162a Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Tue, 23 Sep 2025 15:33:52 +0200
Subject: [PATCH 05/12] [flang] Add standalone tile support
---
flang/lib/Lower/OpenMP/ClauseProcessor.cpp | 13 +
flang/lib/Lower/OpenMP/ClauseProcessor.h | 2 +
flang/lib/Lower/OpenMP/OpenMP.cpp | 360 ++++++++++++------
flang/lib/Lower/OpenMP/Utils.cpp | 23 +-
flang/lib/Lower/OpenMP/Utils.h | 7 +
.../lib/Semantics/check-directive-structure.h | 7 +-
flang/lib/Semantics/check-omp-structure.cpp | 8 +-
flang/lib/Semantics/resolve-directives.cpp | 16 +-
flang/test/Lower/OpenMP/tile01.f90 | 58 +++
flang/test/Lower/OpenMP/tile02.f90 | 88 +++++
.../loop-transformation-construct02.f90 | 5 +-
flang/test/Parser/OpenMP/tile-fail.f90 | 32 ++
flang/test/Parser/OpenMP/tile.f90 | 15 +-
flang/test/Semantics/OpenMP/tile01.f90 | 26 ++
flang/test/Semantics/OpenMP/tile02.f90 | 15 +
flang/test/Semantics/OpenMP/tile03.f90 | 15 +
flang/test/Semantics/OpenMP/tile04.f90 | 38 ++
flang/test/Semantics/OpenMP/tile05.f90 | 14 +
flang/test/Semantics/OpenMP/tile06.f90 | 44 +++
flang/test/Semantics/OpenMP/tile07.f90 | 35 ++
flang/test/Semantics/OpenMP/tile08.f90 | 15 +
llvm/include/llvm/Frontend/OpenMP/OMP.td | 3 +
openmp/runtime/test/transform/tile/intfor.f90 | 31 ++
.../runtime/test/transform/tile/intfor_2d.f90 | 53 +++
.../transform/tile/intfor_2d_varsizes.F90 | 60 +++
25 files changed, 841 insertions(+), 142 deletions(-)
create mode 100644 flang/test/Lower/OpenMP/tile01.f90
create mode 100644 flang/test/Lower/OpenMP/tile02.f90
create mode 100644 flang/test/Parser/OpenMP/tile-fail.f90
create mode 100644 flang/test/Semantics/OpenMP/tile01.f90
create mode 100644 flang/test/Semantics/OpenMP/tile02.f90
create mode 100644 flang/test/Semantics/OpenMP/tile03.f90
create mode 100644 flang/test/Semantics/OpenMP/tile04.f90
create mode 100644 flang/test/Semantics/OpenMP/tile05.f90
create mode 100644 flang/test/Semantics/OpenMP/tile06.f90
create mode 100644 flang/test/Semantics/OpenMP/tile07.f90
create mode 100644 flang/test/Semantics/OpenMP/tile08.f90
create mode 100644 openmp/runtime/test/transform/tile/intfor.f90
create mode 100644 openmp/runtime/test/transform/tile/intfor_2d.f90
create mode 100644 openmp/runtime/test/transform/tile/intfor_2d_varsizes.F90
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index a96884f5680ba..55eda7e3404c1 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -431,6 +431,19 @@ bool ClauseProcessor::processNumTasks(
return false;
}
+bool ClauseProcessor::processSizes(StatementContext &stmtCtx,
+ mlir::omp::SizesClauseOps &result) const {
+ if (auto *clause = findUniqueClause<omp::clause::Sizes>()) {
+ result.sizes.reserve(clause->v.size());
+ for (const ExprTy &vv : clause->v)
+ result.sizes.push_back(fir::getBase(converter.genExprValue(vv, stmtCtx)));
+
+ return true;
+ }
+
+ return false;
+}
+
bool ClauseProcessor::processNumTeams(
lower::StatementContext &stmtCtx,
mlir::omp::NumTeamsClauseOps &result) const {
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h
index 324ea3c1047a5..9e352fa574a97 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.h
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h
@@ -66,6 +66,8 @@ class ClauseProcessor {
mlir::omp::LoopRelatedClauseOps &loopResult,
mlir::omp::CollapseClauseOps &collapseResult,
llvm::SmallVectorImpl<const semantics::Symbol *> &iv) const;
+ bool processSizes(StatementContext &stmtCtx,
+ mlir::omp::SizesClauseOps &result) const;
bool processDevice(lower::StatementContext &stmtCtx,
mlir::omp::DeviceClauseOps &result) const;
bool processDeviceType(mlir::omp::DeviceTypeClauseOps &result) const;
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 5681be664d450..7812d9fe00be2 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1984,125 +1984,241 @@ genLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
return loopOp;
}
-static mlir::omp::CanonicalLoopOp
-genCanonicalLoopOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
- semantics::SemanticsContext &semaCtx,
- lower::pft::Evaluation &eval, mlir::Location loc,
- const ConstructQueue &queue,
- ConstructQueue::const_iterator item,
- llvm::ArrayRef<const semantics::Symbol *> ivs,
- llvm::omp::Directive directive) {
+static void genCanonicalLoopNest(
+ lower::AbstractConverter &converter, lower::SymMap &symTable,
+ semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
+ mlir::Location loc, const ConstructQueue &queue,
+ ConstructQueue::const_iterator item, size_t numLoops,
+ llvm::SmallVectorImpl<mlir::omp::CanonicalLoopOp> &loops) {
+ assert(loops.empty() && "Expecting empty list to fill");
+ assert(numLoops >= 1 && "Expecting at least one loop");
+
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
- assert(ivs.size() == 1 && "Nested loops not yet implemented");
- const semantics::Symbol *iv = ivs[0];
+ mlir::omp::LoopRelatedClauseOps loopInfo;
+ llvm::SmallVector<const semantics::Symbol *, 3> ivs;
+ collectLoopRelatedInfo(converter, loc, eval, numLoops, loopInfo, ivs);
+ assert(ivs.size() == numLoops &&
+ "Expected to parse as many loop variables as there are loops");
+
+ // Steps that follow:
+ // 1. Emit all of the loop's prologues (compute the tripcount)
+ // 2. Emit omp.canonical_loop nested inside each other (iteratively)
+ // 2.1. In the innermost omp.canonical_loop, emit the loop body prologue (in
+ // the body callback)
+ //
+ // Since emitting prologues and body code is split, remember prologue values
+ // for use when emitting the same loop's epilogues.
+ llvm::SmallVector<mlir::Value> tripcounts;
+ llvm::SmallVector<mlir::Value> clis;
+ llvm::SmallVector<lower::pft::Evaluation *> evals;
+ llvm::SmallVector<mlir::Type> loopVarTypes;
+ llvm::SmallVector<mlir::Value> loopStepVars;
+ llvm::SmallVector<mlir::Value> loopLBVars;
+ llvm::SmallVector<mlir::Value> blockArgs;
+
+ // Step 1: Loop prologues
+ // Computing the trip count must happen before entering the outermost loop
+ lower::pft::Evaluation *innermostEval = &eval.getFirstNestedEvaluation();
+ for ([[maybe_unused]] auto iv : ivs) {
+ if (innermostEval->getIf<parser::DoConstruct>()->IsDoConcurrent()) {
+ // OpenMP specifies DO CONCURRENT only with the `!omp loop` construct.
+ // Will need to add special cases for this combination.
+ TODO(loc, "DO CONCURRENT as canonical loop not supported");
+ }
+
+ auto &doLoopEval = innermostEval->getFirstNestedEvaluation();
+ evals.push_back(innermostEval);
+
+ // Get the loop bounds (and increment)
+ // auto &doLoopEval = nestedEval.getFirstNestedEvaluation();
+ auto *doStmt = doLoopEval.getIf<parser::NonLabelDoStmt>();
+ assert(doStmt && "Expected do loop to be in the nested evaluation");
+ auto &loopControl = std::get<std::optional<parser::LoopControl>>(doStmt->t);
+ assert(loopControl.has_value());
+ auto *bounds = std::get_if<parser::LoopControl::Bounds>(&loopControl->u);
+ assert(bounds && "Expected bounds for canonical loop");
+ lower::StatementContext stmtCtx;
+ mlir::Value loopLBVar = fir::getBase(
+ converter.genExprValue(*semantics::GetExpr(bounds->lower), stmtCtx));
+ mlir::Value loopUBVar = fir::getBase(
+ converter.genExprValue(*semantics::GetExpr(bounds->upper), stmtCtx));
+ mlir::Value loopStepVar = [&]() {
+ if (bounds->step) {
+ return fir::getBase(
+ converter.genExprValue(*semantics::GetExpr(bounds->step), stmtCtx));
+ }
- auto &nestedEval = eval.getFirstNestedEvaluation();
- if (nestedEval.getIf<parser::DoConstruct>()->IsDoConcurrent()) {
- // OpenMP specifies DO CONCURRENT only with the `!omp loop` construct. Will
- // need to add special cases for this combination.
- TODO(loc, "DO CONCURRENT as canonical loop not supported");
+ // If `step` is not present, assume it is `1`.
+ auto intTy = firOpBuilder.getI32Type();
+ return firOpBuilder.createIntegerConstant(loc, intTy, 1);
+ }();
+
+ // Get the integer kind for the loop variable and cast the loop bounds
+ size_t loopVarTypeSize = bounds->name.thing.symbol->GetUltimate().size();
+ mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize);
+ loopVarTypes.push_back(loopVarType);
+ loopLBVar = firOpBuilder.createConvert(loc, loopVarType, loopLBVar);
+ loopUBVar = firOpBuilder.createConvert(loc, loopVarType, loopUBVar);
+ loopStepVar = firOpBuilder.createConvert(loc, loopVarType, loopStepVar);
+ loopLBVars.push_back(loopLBVar);
+ loopStepVars.push_back(loopStepVar);
+
+ // Start lowering
+ mlir::Value zero = firOpBuilder.createIntegerConstant(loc, loopVarType, 0);
+ mlir::Value one = firOpBuilder.createIntegerConstant(loc, loopVarType, 1);
+ mlir::Value isDownwards = firOpBuilder.create<mlir::arith::CmpIOp>(
+ loc, mlir::arith::CmpIPredicate::slt, loopStepVar, zero);
+
+ // Ensure we are counting upwards. If not, negate step and swap lb and ub.
+ mlir::Value negStep =
+ firOpBuilder.create<mlir::arith::SubIOp>(loc, zero, loopStepVar);
+ mlir::Value incr = firOpBuilder.create<mlir::arith::SelectOp>(
+ loc, isDownwards, negStep, loopStepVar);
+ mlir::Value lb = firOpBuilder.create<mlir::arith::SelectOp>(
+ loc, isDownwards, loopUBVar, loopLBVar);
+ mlir::Value ub = firOpBuilder.create<mlir::arith::SelectOp>(
+ loc, isDownwards, loopLBVar, loopUBVar);
+
+ // Compute the trip count assuming lb <= ub. This guarantees that the result
+ // is non-negative and we can use unsigned arithmetic.
+ mlir::Value span = firOpBuilder.create<mlir::arith::SubIOp>(
+ loc, ub, lb, ::mlir::arith::IntegerOverflowFlags::nuw);
+ mlir::Value tcMinusOne =
+ firOpBuilder.create<mlir::arith::DivUIOp>(loc, span, incr);
+ mlir::Value tcIfLooping = firOpBuilder.create<mlir::arith::AddIOp>(
+ loc, tcMinusOne, one, ::mlir::arith::IntegerOverflowFlags::nuw);
+
+ // Fall back to 0 if lb > ub
+ mlir::Value isZeroTC = firOpBuilder.create<mlir::arith::CmpIOp>(
+ loc, mlir::arith::CmpIPredicate::slt, ub, lb);
+ mlir::Value tripcount = firOpBuilder.create<mlir::arith::SelectOp>(
+ loc, isZeroTC, zero, tcIfLooping);
+ tripcounts.push_back(tripcount);
+
+ // Create the CLI handle.
+ auto newcli = firOpBuilder.create<mlir::omp::NewCliOp>(loc);
+ mlir::Value cli = newcli.getResult();
+ clis.push_back(cli);
+
+ innermostEval = &*std::next(innermostEval->getNestedEvaluations().begin());
}
- // Get the loop bounds (and increment)
- auto &doLoopEval = nestedEval.getFirstNestedEvaluation();
- auto *doStmt = doLoopEval.getIf<parser::NonLabelDoStmt>();
- assert(doStmt && "Expected do loop to be in the nested evaluation");
- auto &loopControl = std::get<std::optional<parser::LoopControl>>(doStmt->t);
- assert(loopControl.has_value());
- auto *bounds = std::get_if<parser::LoopControl::Bounds>(&loopControl->u);
- assert(bounds && "Expected bounds for canonical loop");
- lower::StatementContext stmtCtx;
- mlir::Value loopLBVar = fir::getBase(
- converter.genExprValue(*semantics::GetExpr(bounds->lower), stmtCtx));
- mlir::Value loopUBVar = fir::getBase(
- converter.genExprValue(*semantics::GetExpr(bounds->upper), stmtCtx));
- mlir::Value loopStepVar = [&]() {
- if (bounds->step) {
- return fir::getBase(
- converter.genExprValue(*semantics::GetExpr(bounds->step), stmtCtx));
- }
+ // Step 2: Create nested canoncial loops
+ for (auto i : llvm::seq<size_t>(numLoops)) {
+ bool isInnermost = (i == numLoops - 1);
+ mlir::Type loopVarType = loopVarTypes[i];
+ mlir::Value tripcount = tripcounts[i];
+ mlir::Value cli = clis[i];
+ auto &&eval = evals[i];
+
+ auto ivCallback = [&, i, isInnermost](mlir::Operation *op)
+ -> llvm::SmallVector<const Fortran::semantics::Symbol *> {
+ mlir::Region ®ion = op->getRegion(0);
+
+ // Create the op's region skeleton (BB taking the iv as argument)
+ firOpBuilder.createBlock(®ion, {}, {loopVarType}, {loc});
+ blockArgs.push_back(region.front().getArgument(0));
+
+ // Step 2.1: Emit body prologue code
+ // Compute the translation from logical iteration number to the value of
+ // the loop's iteration variable only in the innermost body. Currently,
+ // loop transformations do not allow any instruction between loops, but
+ // this will change with
+ if (isInnermost) {
+ assert(blockArgs.size() == numLoops &&
+ "Expecting all block args to have been collected by now");
+ for (auto j : llvm::seq<size_t>(numLoops)) {
+ mlir::Value natIterNum = fir::getBase(blockArgs[j]);
+ mlir::Value scaled = firOpBuilder.create<mlir::arith::MulIOp>(
+ loc, natIterNum, loopStepVars[j]);
+ mlir::Value userVal = firOpBuilder.create<mlir::arith::AddIOp>(
+ loc, loopLBVars[j], scaled);
+
+ mlir::OpBuilder::InsertPoint insPt =
+ firOpBuilder.saveInsertionPoint();
+ firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
+ mlir::Type tempTy = converter.genType(*ivs[j]);
+ firOpBuilder.restoreInsertionPoint(insPt);
+
+ // Write the loop value into loop variable
+ mlir::Value cvtVal = firOpBuilder.createConvert(loc, tempTy, userVal);
+ hlfir::Entity lhs{converter.getSymbolAddress(*ivs[j])};
+ lhs = hlfir::derefPointersAndAllocatables(loc, firOpBuilder, lhs);
+ mlir::Operation *storeOp =
+ hlfir::AssignOp::create(firOpBuilder, loc, cvtVal, lhs);
+ firOpBuilder.setInsertionPointAfter(storeOp);
+ }
+ }
- // If `step` is not present, assume it is `1`.
- return firOpBuilder.createIntegerConstant(loc, firOpBuilder.getI32Type(),
- 1);
- }();
+ return {ivs[i]};
+ };
- // Get the integer kind for the loop variable and cast the loop bounds
- size_t loopVarTypeSize = bounds->name.thing.symbol->GetUltimate().size();
- mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize);
- loopLBVar = firOpBuilder.createConvert(loc, loopVarType, loopLBVar);
- loopUBVar = firOpBuilder.createConvert(loc, loopVarType, loopUBVar);
- loopStepVar = firOpBuilder.createConvert(loc, loopVarType, loopStepVar);
-
- // Start lowering
- mlir::Value zero = firOpBuilder.createIntegerConstant(loc, loopVarType, 0);
- mlir::Value one = firOpBuilder.createIntegerConstant(loc, loopVarType, 1);
- mlir::Value isDownwards = mlir::arith::CmpIOp::create(
- firOpBuilder, loc, mlir::arith::CmpIPredicate::slt, loopStepVar, zero);
-
- // Ensure we are counting upwards. If not, negate step and swap lb and ub.
- mlir::Value negStep =
- mlir::arith::SubIOp::create(firOpBuilder, loc, zero, loopStepVar);
- mlir::Value incr = mlir::arith::SelectOp::create(
- firOpBuilder, loc, isDownwards, negStep, loopStepVar);
- mlir::Value lb = mlir::arith::SelectOp::create(firOpBuilder, loc, isDownwards,
- loopUBVar, loopLBVar);
- mlir::Value ub = mlir::arith::SelectOp::create(firOpBuilder, loc, isDownwards,
- loopLBVar, loopUBVar);
-
- // Compute the trip count assuming lb <= ub. This guarantees that the result
- // is non-negative and we can use unsigned arithmetic.
- mlir::Value span = mlir::arith::SubIOp::create(
- firOpBuilder, loc, ub, lb, ::mlir::arith::IntegerOverflowFlags::nuw);
- mlir::Value tcMinusOne =
- mlir::arith::DivUIOp::create(firOpBuilder, loc, span, incr);
- mlir::Value tcIfLooping =
- mlir::arith::AddIOp::create(firOpBuilder, loc, tcMinusOne, one,
- ::mlir::arith::IntegerOverflowFlags::nuw);
-
- // Fall back to 0 if lb > ub
- mlir::Value isZeroTC = mlir::arith::CmpIOp::create(
- firOpBuilder, loc, mlir::arith::CmpIPredicate::slt, ub, lb);
- mlir::Value tripcount = mlir::arith::SelectOp::create(
- firOpBuilder, loc, isZeroTC, zero, tcIfLooping);
-
- // Create the CLI handle.
- auto newcli = mlir::omp::NewCliOp::create(firOpBuilder, loc);
- mlir::Value cli = newcli.getResult();
-
- auto ivCallback = [&](mlir::Operation *op)
- -> llvm::SmallVector<const Fortran::semantics::Symbol *> {
- mlir::Region ®ion = op->getRegion(0);
-
- // Create the op's region skeleton (BB taking the iv as argument)
- firOpBuilder.createBlock(®ion, {}, {loopVarType}, {loc});
-
- // Compute the value of the loop variable from the logical iteration number.
- mlir::Value natIterNum = fir::getBase(region.front().getArgument(0));
- mlir::Value scaled =
- mlir::arith::MulIOp::create(firOpBuilder, loc, natIterNum, loopStepVar);
- mlir::Value userVal =
- mlir::arith::AddIOp::create(firOpBuilder, loc, loopLBVar, scaled);
-
- // Write loop value to loop variable
- mlir::Operation *storeOp = setLoopVar(converter, loc, userVal, iv);
-
- firOpBuilder.setInsertionPointAfter(storeOp);
- return {iv};
- };
+ // Create the omp.canonical_loop operation
+ auto opGenInfo = OpWithBodyGenInfo(converter, symTable, semaCtx, loc, *eval,
+ llvm::omp::Directive::OMPD_unknown)
+ .setGenSkeletonOnly(!isInnermost)
+ .setClauses(&item->clauses)
+ .setPrivatize(false)
+ .setGenRegionEntryCb(ivCallback);
+ auto canonLoop = genOpWithBody<mlir::omp::CanonicalLoopOp>(
+ std::move(opGenInfo), queue, item, tripcount, cli);
+ loops.push_back(canonLoop);
+
+ // Insert next loop nested inside last loop
+ firOpBuilder.setInsertionPoint(
+ canonLoop.getRegion().back().getTerminator());
+ }
- // Create the omp.canonical_loop operation
- auto canonLoop = genOpWithBody<mlir::omp::CanonicalLoopOp>(
- OpWithBodyGenInfo(converter, symTable, semaCtx, loc, nestedEval,
- directive)
- .setClauses(&item->clauses)
- .setPrivatize(false)
- .setGenRegionEntryCb(ivCallback),
- queue, item, tripcount, cli);
+ firOpBuilder.setInsertionPointAfter(loops.front());
+}
+
+static void genTileOp(Fortran::lower::AbstractConverter &converter,
+ Fortran::lower::SymMap &symTable,
+ lower::StatementContext &stmtCtx,
+ Fortran::semantics::SemanticsContext &semaCtx,
+ Fortran::lower::pft::Evaluation &eval, mlir::Location loc,
+ const ConstructQueue &queue,
+ ConstructQueue::const_iterator item) {
+ fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
- firOpBuilder.setInsertionPointAfter(canonLoop);
- return canonLoop;
+ mlir::omp::SizesClauseOps sizesClause;
+ ClauseProcessor cp(converter, semaCtx, item->clauses);
+ cp.processSizes(stmtCtx, sizesClause);
+
+ size_t numLoops = sizesClause.sizes.size();
+ llvm::SmallVector<mlir::omp::CanonicalLoopOp, 3> canonLoops;
+ canonLoops.reserve(numLoops);
+
+ genCanonicalLoopNest(converter, symTable, semaCtx, eval, loc, queue, item,
+ numLoops, canonLoops);
+ assert((canonLoops.size() == numLoops) &&
+ "Expecting the predetermined number of loops");
+
+ llvm::SmallVector<mlir::Value, 3> applyees;
+ applyees.reserve(numLoops);
+ for (mlir::omp::CanonicalLoopOp l : canonLoops)
+ applyees.push_back(l.getCli());
+
+ // Emit the associated loops and create a CLI for each affected loop
+ llvm::SmallVector<mlir::Value, 3> gridGeneratees;
+ llvm::SmallVector<mlir::Value, 3> intratileGeneratees;
+ gridGeneratees.reserve(numLoops);
+ intratileGeneratees.reserve(numLoops);
+ for ([[maybe_unused]] auto i : llvm::seq<int>(0, sizesClause.sizes.size())) {
+ auto gridCLI = firOpBuilder.create<mlir::omp::NewCliOp>(loc);
+ gridGeneratees.push_back(gridCLI.getResult());
+ auto intratileCLI = firOpBuilder.create<mlir::omp::NewCliOp>(loc);
+ intratileGeneratees.push_back(intratileCLI.getResult());
+ }
+
+ llvm::SmallVector<mlir::Value, 6> generatees;
+ generatees.reserve(2 * numLoops);
+ generatees.append(gridGeneratees);
+ generatees.append(intratileGeneratees);
+
+ firOpBuilder.create<mlir::omp::TileOp>(loc, generatees, applyees,
+ sizesClause.sizes);
}
static void genUnrollOp(Fortran::lower::AbstractConverter &converter,
@@ -2114,22 +2230,22 @@ static void genUnrollOp(Fortran::lower::AbstractConverter &converter,
ConstructQueue::const_iterator item) {
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
- mlir::omp::LoopRelatedClauseOps loopInfo;
- llvm::SmallVector<const semantics::Symbol *> iv;
- collectLoopRelatedInfo(converter, loc, eval, item->clauses, loopInfo, iv);
-
// Clauses for unrolling not yet implemnted
ClauseProcessor cp(converter, semaCtx, item->clauses);
cp.processTODO<clause::Partial, clause::Full>(
loc, llvm::omp::Directive::OMPD_unroll);
// Emit the associated loop
- auto canonLoop =
- genCanonicalLoopOp(converter, symTable, semaCtx, eval, loc, queue, item,
- iv, llvm::omp::Directive::OMPD_unroll);
+ llvm::SmallVector<mlir::omp::CanonicalLoopOp, 1> canonLoops;
+ genCanonicalLoopNest(converter, symTable, semaCtx, eval, loc, queue, item, 1,
+ canonLoops);
+
+ llvm::SmallVector<mlir::Value, 1> applyees;
+ for (auto &&canonLoop : canonLoops)
+ applyees.push_back(canonLoop.getCli());
// Apply unrolling to it
- auto cli = canonLoop.getCli();
+ auto cli = llvm::getSingleElement(canonLoops).getCli();
mlir::omp::UnrollHeuristicOp::create(firOpBuilder, loc, cli);
}
@@ -3362,13 +3478,9 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
newOp = genTeamsOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue,
item);
break;
- case llvm::omp::Directive::OMPD_tile: {
- unsigned version = semaCtx.langOptions().OpenMPVersion;
- if (!semaCtx.langOptions().OpenMPSimd)
- TODO(loc, "Unhandled loop directive (" +
- llvm::omp::getOpenMPDirectiveName(dir, version) + ")");
+ case llvm::omp::Directive::OMPD_tile:
+ genTileOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item);
break;
- }
case llvm::omp::Directive::OMPD_unroll:
genUnrollOp(converter, symTable, stmtCtx, semaCtx, eval, loc, queue, item);
break;
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index 83b7ccb1ce0ee..4a392d4944fc8 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -667,6 +667,25 @@ int64_t collectLoopRelatedInfo(
numCollapse = collapseValue;
}
+ collectLoopRelatedInfo(converter, currentLocation, eval, numCollapse, result,
+ iv);
+ return numCollapse;
+}
+
+void collectLoopRelatedInfo(
+ lower::AbstractConverter &converter, mlir::Location currentLocation,
+ lower::pft::Evaluation &eval, int64_t numCollapse,
+ mlir::omp::LoopRelatedClauseOps &result,
+ llvm::SmallVectorImpl<const semantics::Symbol *> &iv) {
+
+ fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+
+ // Collect the loops to collapse.
+ lower::pft::Evaluation *doConstructEval = &eval.getFirstNestedEvaluation();
+ if (doConstructEval->getIf<parser::DoConstruct>()->IsDoConcurrent()) {
+ TODO(currentLocation, "Do Concurrent in Worksharing loop construct");
+ }
+
// Collect sizes from tile directive if present.
std::int64_t sizesLengthValue = 0l;
if (auto *ompCons{eval.getIf<parser::OpenMPConstruct>()}) {
@@ -676,7 +695,7 @@ int64_t collectLoopRelatedInfo(
});
}
- collapseValue = std::max(collapseValue, sizesLengthValue);
+ std::int64_t collapseValue = std::max(numCollapse, sizesLengthValue);
std::size_t loopVarTypeSize = 0;
do {
lower::pft::Evaluation *doLoop =
@@ -709,8 +728,6 @@ int64_t collectLoopRelatedInfo(
} while (collapseValue > 0);
convertLoopBounds(converter, currentLocation, result, loopVarTypeSize);
-
- return numCollapse;
}
} // namespace omp
diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h
index 5f191d89ae205..69499f9c7b621 100644
--- a/flang/lib/Lower/OpenMP/Utils.h
+++ b/flang/lib/Lower/OpenMP/Utils.h
@@ -165,6 +165,13 @@ int64_t collectLoopRelatedInfo(
mlir::omp::LoopRelatedClauseOps &result,
llvm::SmallVectorImpl<const semantics::Symbol *> &iv);
+void collectLoopRelatedInfo(
+ lower::AbstractConverter &converter, mlir::Location currentLocation,
+ lower::pft::Evaluation &eval, std::int64_t collapseValue,
+ // const omp::List<omp::Clause> &clauses,
+ mlir::omp::LoopRelatedClauseOps &result,
+ llvm::SmallVectorImpl<const semantics::Symbol *> &iv);
+
void collectTileSizesFromOpenMPConstruct(
const parser::OpenMPConstruct *ompCons,
llvm::SmallVectorImpl<int64_t> &tileSizes,
diff --git a/flang/lib/Semantics/check-directive-structure.h b/flang/lib/Semantics/check-directive-structure.h
index b1bf3e550aebc..bd78d3cfe91e7 100644
--- a/flang/lib/Semantics/check-directive-structure.h
+++ b/flang/lib/Semantics/check-directive-structure.h
@@ -383,7 +383,8 @@ class DirectiveStructureChecker : public virtual BaseChecker {
const C &clause, const parser::ScalarIntConstantExpr &i);
void RequiresPositiveParameter(const C &clause,
- const parser::ScalarIntExpr &i, llvm::StringRef paramName = "parameter");
+ const parser::ScalarIntExpr &i, llvm::StringRef paramName = "parameter",
+ bool allowZero = true);
void OptionalConstantPositiveParameter(
const C &clause, const std::optional<parser::ScalarIntConstantExpr> &o);
@@ -657,9 +658,9 @@ void DirectiveStructureChecker<D, C, PC, ClauseEnumSize>::SayNotMatching(
template <typename D, typename C, typename PC, std::size_t ClauseEnumSize>
void DirectiveStructureChecker<D, C, PC,
ClauseEnumSize>::RequiresPositiveParameter(const C &clause,
- const parser::ScalarIntExpr &i, llvm::StringRef paramName) {
+ const parser::ScalarIntExpr &i, llvm::StringRef paramName, bool allowZero) {
if (const auto v{GetIntValue(i)}) {
- if (*v < 0) {
+ if (*v < (allowZero ? 0 : 1)) {
context_.Say(GetContext().clauseSource,
"The %s of the %s clause must be "
"a positive integer expression"_err_en_US,
diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp
index c39daef6b0ea9..ab182c7674062 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -2749,6 +2749,13 @@ void OmpStructureChecker::Enter(const parser::OmpClause &x) {
}
}
+void OmpStructureChecker::Enter(const parser::OmpClause::Sizes &c) {
+ CheckAllowedClause(llvm::omp::Clause::OMPC_sizes);
+ for (const parser::Cosubscript &v : c.v)
+ RequiresPositiveParameter(llvm::omp::Clause::OMPC_sizes, v,
+ /*paramName=*/"parameter", /*allowZero=*/false);
+}
+
// Following clauses do not have a separate node in parse-tree.h.
CHECK_SIMPLE_CLAUSE(Absent, OMPC_absent)
CHECK_SIMPLE_CLAUSE(Affinity, OMPC_affinity)
@@ -2790,7 +2797,6 @@ CHECK_SIMPLE_CLAUSE(Notinbranch, OMPC_notinbranch)
CHECK_SIMPLE_CLAUSE(Partial, OMPC_partial)
CHECK_SIMPLE_CLAUSE(ProcBind, OMPC_proc_bind)
CHECK_SIMPLE_CLAUSE(Simd, OMPC_simd)
-CHECK_SIMPLE_CLAUSE(Sizes, OMPC_sizes)
CHECK_SIMPLE_CLAUSE(Permutation, OMPC_permutation)
CHECK_SIMPLE_CLAUSE(Uniform, OMPC_uniform)
CHECK_SIMPLE_CLAUSE(Unknown, OMPC_unknown)
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index 5f2c9f676099c..2fa6cbc5d8167 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -2392,10 +2392,18 @@ void OmpAttributeVisitor::PrivatizeAssociatedLoopIndexAndCheckLoopLevel(
void OmpAttributeVisitor::CheckAssocLoopLevel(
std::int64_t level, const parser::OmpClause *clause) {
if (clause && level != 0) {
- context_.Say(clause->source,
- "The value of the parameter in the COLLAPSE or ORDERED clause must"
- " not be larger than the number of nested loops"
- " following the construct."_err_en_US);
+ switch (clause->Id()) {
+ case llvm::omp::OMPC_sizes:
+ context_.Say(clause->source,
+ "The SIZES clause has more entries than there are nested canonical loops."_err_en_US);
+ break;
+ default:
+ context_.Say(clause->source,
+ "The value of the parameter in the COLLAPSE or ORDERED clause must"
+ " not be larger than the number of nested loops"
+ " following the construct."_err_en_US);
+ break;
+ }
}
}
diff --git a/flang/test/Lower/OpenMP/tile01.f90 b/flang/test/Lower/OpenMP/tile01.f90
new file mode 100644
index 0000000000000..7603eee4b18d8
--- /dev/null
+++ b/flang/test/Lower/OpenMP/tile01.f90
@@ -0,0 +1,58 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 -o - %s | FileCheck %s
+
+
+subroutine omp_tile01(lb, ub, inc)
+ integer res, i, lb, ub, inc
+
+ !$omp tile sizes(4)
+ do i = lb, ub, inc
+ res = i
+ end do
+ !$omp end tile
+
+end subroutine omp_tile01
+
+
+! CHECK: func.func @_QPomp_tile01(
+! CHECK: %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "lb"},
+! CHECK: %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "ub"},
+! CHECK: %[[ARG2:.*]]: !fir.ref<i32> {fir.bindc_name = "inc"}) {
+! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_tile01Ei"}
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFomp_tile01Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_tile01Einc"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_tile01Elb"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "res", uniq_name = "_QFomp_tile01Eres"}
+! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFomp_tile01Eres"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_tile01Eub"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_8:.*]] = arith.constant 4 : i32
+! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_4]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_12:.*]] = arith.constant 0 : i32
+! CHECK: %[[VAL_13:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_14:.*]] = arith.cmpi slt, %[[VAL_11]], %[[VAL_12]] : i32
+! CHECK: %[[VAL_15:.*]] = arith.subi %[[VAL_12]], %[[VAL_11]] : i32
+! CHECK: %[[VAL_16:.*]] = arith.select %[[VAL_14]], %[[VAL_15]], %[[VAL_11]] : i32
+! CHECK: %[[VAL_17:.*]] = arith.select %[[VAL_14]], %[[VAL_10]], %[[VAL_9]] : i32
+! CHECK: %[[VAL_18:.*]] = arith.select %[[VAL_14]], %[[VAL_9]], %[[VAL_10]] : i32
+! CHECK: %[[VAL_19:.*]] = arith.subi %[[VAL_18]], %[[VAL_17]] overflow<nuw> : i32
+! CHECK: %[[VAL_20:.*]] = arith.divui %[[VAL_19]], %[[VAL_16]] : i32
+! CHECK: %[[VAL_21:.*]] = arith.addi %[[VAL_20]], %[[VAL_13]] overflow<nuw> : i32
+! CHECK: %[[VAL_22:.*]] = arith.cmpi slt, %[[VAL_18]], %[[VAL_17]] : i32
+! CHECK: %[[VAL_23:.*]] = arith.select %[[VAL_22]], %[[VAL_12]], %[[VAL_21]] : i32
+! CHECK: %[[VAL_24:.*]] = omp.new_cli
+! CHECK: omp.canonical_loop(%[[VAL_24]]) %[[VAL_25:.*]] : i32 in range(%[[VAL_23]]) {
+! CHECK: %[[VAL_26:.*]] = arith.muli %[[VAL_25]], %[[VAL_11]] : i32
+! CHECK: %[[VAL_27:.*]] = arith.addi %[[VAL_9]], %[[VAL_26]] : i32
+! CHECK: hlfir.assign %[[VAL_27]] to %[[VAL_2]]#0 : i32, !fir.ref<i32>
+! CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_2]]#0 : !fir.ref<i32>
+! CHECK: hlfir.assign %[[VAL_28]] to %[[VAL_6]]#0 : i32, !fir.ref<i32>
+! CHECK: omp.terminator
+! CHECK: }
+! CHECK: %[[VAL_29:.*]] = omp.new_cli
+! CHECK: %[[VAL_30:.*]] = omp.new_cli
+! CHECK: omp.tile (%[[VAL_29]], %[[VAL_30]]) <- (%[[VAL_24]]) sizes(%[[VAL_8]] : i32)
+! CHECK: return
+! CHECK: }
+
diff --git a/flang/test/Lower/OpenMP/tile02.f90 b/flang/test/Lower/OpenMP/tile02.f90
new file mode 100644
index 0000000000000..5df506d17ed05
--- /dev/null
+++ b/flang/test/Lower/OpenMP/tile02.f90
@@ -0,0 +1,88 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=51 -o - %s | FileCheck %s
+
+
+subroutine omp_tile02(lb, ub, inc)
+ integer res, i, lb, ub, inc
+
+ !$omp tile sizes(3,7)
+ do i = lb, ub, inc
+ do j = lb, ub, inc
+ res = i + j
+ end do
+ end do
+ !$omp end tile
+
+end subroutine omp_tile02
+
+
+! CHECK: func.func @_QPomp_tile02(
+! CHECK: %[[ARG0:.*]]: !fir.ref<i32> {fir.bindc_name = "lb"},
+! CHECK: %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "ub"},
+! CHECK: %[[ARG2:.*]]: !fir.ref<i32> {fir.bindc_name = "inc"}) {
+! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_tile02Ei"}
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFomp_tile02Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_tile02Einc"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFomp_tile02Ej"}
+! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFomp_tile02Ej"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_tile02Elb"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_7:.*]] = fir.alloca i32 {bindc_name = "res", uniq_name = "_QFomp_tile02Eres"}
+! CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFomp_tile02Eres"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QFomp_tile02Eub"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_10:.*]] = arith.constant 3 : i32
+! CHECK: %[[VAL_11:.*]] = arith.constant 7 : i32
+! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_15:.*]] = arith.constant 0 : i32
+! CHECK: %[[VAL_16:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_17:.*]] = arith.cmpi slt, %[[VAL_14]], %[[VAL_15]] : i32
+! CHECK: %[[VAL_18:.*]] = arith.subi %[[VAL_15]], %[[VAL_14]] : i32
+! CHECK: %[[VAL_19:.*]] = arith.select %[[VAL_17]], %[[VAL_18]], %[[VAL_14]] : i32
+! CHECK: %[[VAL_20:.*]] = arith.select %[[VAL_17]], %[[VAL_13]], %[[VAL_12]] : i32
+! CHECK: %[[VAL_21:.*]] = arith.select %[[VAL_17]], %[[VAL_12]], %[[VAL_13]] : i32
+! CHECK: %[[VAL_22:.*]] = arith.subi %[[VAL_21]], %[[VAL_20]] overflow<nuw> : i32
+! CHECK: %[[VAL_23:.*]] = arith.divui %[[VAL_22]], %[[VAL_19]] : i32
+! CHECK: %[[VAL_24:.*]] = arith.addi %[[VAL_23]], %[[VAL_16]] overflow<nuw> : i32
+! CHECK: %[[VAL_25:.*]] = arith.cmpi slt, %[[VAL_21]], %[[VAL_20]] : i32
+! CHECK: %[[VAL_26:.*]] = arith.select %[[VAL_25]], %[[VAL_15]], %[[VAL_24]] : i32
+! CHECK: %[[VAL_27:.*]] = omp.new_cli
+! CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_29:.*]] = fir.load %[[VAL_9]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_31:.*]] = arith.constant 0 : i32
+! CHECK: %[[VAL_32:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_33:.*]] = arith.cmpi slt, %[[VAL_30]], %[[VAL_31]] : i32
+! CHECK: %[[VAL_34:.*]] = arith.subi %[[VAL_31]], %[[VAL_30]] : i32
+! CHECK: %[[VAL_35:.*]] = arith.select %[[VAL_33]], %[[VAL_34]], %[[VAL_30]] : i32
+! CHECK: %[[VAL_36:.*]] = arith.select %[[VAL_33]], %[[VAL_29]], %[[VAL_28]] : i32
+! CHECK: %[[VAL_37:.*]] = arith.select %[[VAL_33]], %[[VAL_28]], %[[VAL_29]] : i32
+! CHECK: %[[VAL_38:.*]] = arith.subi %[[VAL_37]], %[[VAL_36]] overflow<nuw> : i32
+! CHECK: %[[VAL_39:.*]] = arith.divui %[[VAL_38]], %[[VAL_35]] : i32
+! CHECK: %[[VAL_40:.*]] = arith.addi %[[VAL_39]], %[[VAL_32]] overflow<nuw> : i32
+! CHECK: %[[VAL_41:.*]] = arith.cmpi slt, %[[VAL_37]], %[[VAL_36]] : i32
+! CHECK: %[[VAL_42:.*]] = arith.select %[[VAL_41]], %[[VAL_31]], %[[VAL_40]] : i32
+! CHECK: %[[VAL_43:.*]] = omp.new_cli
+! CHECK: omp.canonical_loop(%[[VAL_27]]) %[[VAL_44:.*]] : i32 in range(%[[VAL_26]]) {
+! CHECK: omp.canonical_loop(%[[VAL_43]]) %[[VAL_45:.*]] : i32 in range(%[[VAL_42]]) {
+! CHECK: %[[VAL_46:.*]] = arith.muli %[[VAL_44]], %[[VAL_14]] : i32
+! CHECK: %[[VAL_47:.*]] = arith.addi %[[VAL_12]], %[[VAL_46]] : i32
+! CHECK: hlfir.assign %[[VAL_47]] to %[[VAL_2]]#0 : i32, !fir.ref<i32>
+! CHECK: %[[VAL_48:.*]] = arith.muli %[[VAL_45]], %[[VAL_30]] : i32
+! CHECK: %[[VAL_49:.*]] = arith.addi %[[VAL_28]], %[[VAL_48]] : i32
+! CHECK: hlfir.assign %[[VAL_49]] to %[[VAL_5]]#0 : i32, !fir.ref<i32>
+! CHECK: %[[VAL_50:.*]] = fir.load %[[VAL_2]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_51:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_52:.*]] = arith.addi %[[VAL_50]], %[[VAL_51]] : i32
+! CHECK: hlfir.assign %[[VAL_52]] to %[[VAL_8]]#0 : i32, !fir.ref<i32>
+! CHECK: omp.terminator
+! CHECK: }
+! CHECK: omp.terminator
+! CHECK: }
+! CHECK: %[[VAL_53:.*]] = omp.new_cli
+! CHECK: %[[VAL_54:.*]] = omp.new_cli
+! CHECK: %[[VAL_55:.*]] = omp.new_cli
+! CHECK: %[[VAL_56:.*]] = omp.new_cli
+! CHECK: omp.tile (%[[VAL_53]], %[[VAL_55]], %[[VAL_54]], %[[VAL_56]]) <- (%[[VAL_27]], %[[VAL_43]]) sizes(%[[VAL_10]], %[[VAL_11]] : i32, i32)
+! CHECK: return
+! CHECK: }
diff --git a/flang/test/Parser/OpenMP/loop-transformation-construct02.f90 b/flang/test/Parser/OpenMP/loop-transformation-construct02.f90
index a6af35a0111a3..a876c77a274b5 100644
--- a/flang/test/Parser/OpenMP/loop-transformation-construct02.f90
+++ b/flang/test/Parser/OpenMP/loop-transformation-construct02.f90
@@ -11,7 +11,7 @@ subroutine loop_transformation_construct
!$omp do
!$omp unroll
- !$omp tile
+ !$omp tile sizes(2)
do i = 1, I
y(i) = y(i) * 5
end do
@@ -34,7 +34,8 @@ subroutine loop_transformation_construct
!CHECK-PARSE-NEXT: | | | | OpenMPLoopConstruct
!CHECK-PARSE-NEXT: | | | | | OmpBeginLoopDirective
!CHECK-PARSE-NEXT: | | | | | | OmpDirectiveName -> llvm::omp::Directive = tile
-!CHECK-PARSE-NEXT: | | | | | | OmpClauseList ->
+!CHECK-PARSE-NEXT: | | | | | | OmpClauseList -> OmpClause -> Sizes -> Scalar -> Integer -> Expr = '2_4'
+!CHECK-PARSE-NEXT: | | | | | | | LiteralConstant -> IntLiteralConstant = '2'
!CHECK-PARSE-NEXT: | | | | | | Flags = None
!CHECK-PARSE-NEXT: | | | | | DoConstruct
!CHECK-PARSE-NEXT: | | | | | | NonLabelDoStmt
diff --git a/flang/test/Parser/OpenMP/tile-fail.f90 b/flang/test/Parser/OpenMP/tile-fail.f90
new file mode 100644
index 0000000000000..267ed0ad48437
--- /dev/null
+++ b/flang/test/Parser/OpenMP/tile-fail.f90
@@ -0,0 +1,32 @@
+! RUN: split-file %s %t
+! RUN: not %flang_fc1 -fsyntax-only -fopenmp %t/stray_end1.f90 2>&1 | FileCheck %t/stray_end1.f90
+! RUN: not %flang_fc1 -fsyntax-only -fopenmp %t/stray_end2.f90 2>&1 | FileCheck %t/stray_end2.f90
+! RUN: not %flang_fc1 -fsyntax-only -fopenmp %t/stray_begin.f90 2>&1 | FileCheck %t/stray_begin.f90
+
+
+!--- stray_end1.f90
+! Parser error
+
+subroutine stray_end1
+ !CHECK: error: expected OpenMP construct
+ !$omp end tile
+end subroutine
+
+
+!--- stray_end2.f90
+! Semantic error
+
+subroutine stray_end2
+ print *
+ !CHECK: error: The END TILE directive must follow the DO loop associated with the loop construct
+ !$omp end tile
+end subroutine
+
+
+!--- stray_begin.f90
+
+subroutine stray_begin
+ !CHECK: error: A DO loop must follow the TILE directive
+ !$omp tile sizes(2)
+end subroutine
+
diff --git a/flang/test/Parser/OpenMP/tile.f90 b/flang/test/Parser/OpenMP/tile.f90
index 2ea17471866a4..82004fd37a0f2 100644
--- a/flang/test/Parser/OpenMP/tile.f90
+++ b/flang/test/Parser/OpenMP/tile.f90
@@ -1,12 +1,12 @@
-! RUN: %flang_fc1 -fdebug-unparse -fopenmp %s | FileCheck --ignore-case %s
-! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp %s | FileCheck --check-prefix="PARSE-TREE" %s
+! RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=51 %s | FileCheck --ignore-case %s
+! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=51 %s | FileCheck --check-prefix="PARSE-TREE" %s
subroutine openmp_tiles(x)
integer, intent(inout)::x
-!CHECK: !$omp tile
-!$omp tile
+!CHECK: !$omp tile sizes(2_4)
+!$omp tile sizes(2)
!CHECK: do
do x = 1, 100
call F1()
@@ -17,7 +17,12 @@ subroutine openmp_tiles(x)
!PARSE-TREE: OpenMPConstruct -> OpenMPLoopConstruct
!PARSE-TREE: OmpBeginLoopDirective
+!PARSE-TREE: OmpClauseList -> OmpClause -> Sizes -> Scalar -> Integer -> Expr = '2_4'
+!PARSE-TREE: LiteralConstant -> IntLiteralConstant = '2'
+!PARSE-TREE: Flags = None
+!PARSE-TREE: DoConstruct
+!PARSE-TREE: EndDoStmt
+!PARSE-TREE: OmpEndLoopDirective
!PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = tile
END subroutine openmp_tiles
-
diff --git a/flang/test/Semantics/OpenMP/tile01.f90 b/flang/test/Semantics/OpenMP/tile01.f90
new file mode 100644
index 0000000000000..3d7b3f4f42e92
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/tile01.f90
@@ -0,0 +1,26 @@
+! Testing the Semantics of tile
+!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51
+
+
+subroutine missing_sizes
+ implicit none
+ integer i
+
+ !ERROR: At least one of SIZES clause must appear on the TILE directive
+ !$omp tile
+ do i = 1, 42
+ print *, i
+ end do
+end subroutine
+
+
+subroutine double_sizes
+ implicit none
+ integer i
+
+ !ERROR: At most one SIZES clause can appear on the TILE directive
+ !$omp tile sizes(2) sizes(2)
+ do i = 1, 5
+ print *, i
+ end do
+end subroutine
diff --git a/flang/test/Semantics/OpenMP/tile02.f90 b/flang/test/Semantics/OpenMP/tile02.f90
new file mode 100644
index 0000000000000..676796375353f
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/tile02.f90
@@ -0,0 +1,15 @@
+! Testing the Semantics of tile
+!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51
+
+
+subroutine on_unroll
+ implicit none
+ integer i
+
+ !ERROR: If a loop construct has been fully unrolled, it cannot then be tiled
+ !$omp tile sizes(2)
+ !$omp unroll
+ do i = 1, 5
+ print *, i
+ end do
+end subroutine
diff --git a/flang/test/Semantics/OpenMP/tile03.f90 b/flang/test/Semantics/OpenMP/tile03.f90
new file mode 100644
index 0000000000000..e5c134638ac8d
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/tile03.f90
@@ -0,0 +1,15 @@
+! Testing the Semantics of tile
+!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51
+
+
+subroutine loop_assoc
+ implicit none
+ integer :: i = 0
+
+ !$omp tile sizes(2)
+ !ERROR: The associated loop of a loop-associated directive cannot be a DO WHILE.
+ do while (i <= 10)
+ i = i + 1
+ print *, i
+ end do
+end subroutine
diff --git a/flang/test/Semantics/OpenMP/tile04.f90 b/flang/test/Semantics/OpenMP/tile04.f90
new file mode 100644
index 0000000000000..2b503efbcf52b
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/tile04.f90
@@ -0,0 +1,38 @@
+! Testing the Semantics of tile
+!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51
+
+
+subroutine threads_zero
+ implicit none
+ integer i
+
+ !ERROR: The parameter of the NUM_THREADS clause must be a positive integer expression
+ !$omp parallel do num_threads(-1)
+ do i = 1, 5
+ print *, i
+ end do
+end subroutine
+
+
+subroutine sizes_zero
+ implicit none
+ integer i
+
+ !ERROR: The parameter of the SIZES clause must be a positive integer expression
+ !$omp tile sizes(0)
+ do i = 1, 5
+ print *, i
+ end do
+end subroutine
+
+
+subroutine sizes_negative
+ implicit none
+ integer i
+
+ !ERROR: The parameter of the SIZES clause must be a positive integer expression
+ !$omp tile sizes(-1)
+ do i = 1, 5
+ print *, i
+ end do
+end subroutine
diff --git a/flang/test/Semantics/OpenMP/tile05.f90 b/flang/test/Semantics/OpenMP/tile05.f90
new file mode 100644
index 0000000000000..70c43811a5832
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/tile05.f90
@@ -0,0 +1,14 @@
+! Testing the Semantics of tile
+!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51
+
+
+subroutine insufficient_loops
+ implicit none
+ integer i
+
+ !ERROR: The SIZES clause has more entries than there are nested canonical loops.
+ !$omp tile sizes(2, 2)
+ do i = 1, 5
+ print *, i
+ end do
+end subroutine
diff --git a/flang/test/Semantics/OpenMP/tile06.f90 b/flang/test/Semantics/OpenMP/tile06.f90
new file mode 100644
index 0000000000000..52518d43f0554
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/tile06.f90
@@ -0,0 +1,44 @@
+! Testing the Semantics of tile
+!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51
+
+
+subroutine nonrectangular_loop_lb
+ implicit none
+ integer i, j
+
+ !ERROR: Trip count must be computable and invariant
+ !$omp tile sizes(2,2)
+ do i = 1, 5
+ do j = 1, i
+ print *, i, j
+ end do
+ end do
+end subroutine
+
+
+subroutine nonrectangular_loop_ub
+ implicit none
+ integer i, j
+
+ !ERROR: Trip count must be computable and invariant
+ !$omp tile sizes(2,2)
+ do i = 1, 5
+ do j = 1, i
+ print *, i, j
+ end do
+ end do
+end subroutine
+
+
+subroutine nonrectangular_loop_step
+ implicit none
+ integer i, j
+
+ !ERROR: Trip count must be computable and invariant
+ !$omp tile sizes(2,2)
+ do i = 1, 5
+ do j = 1, 42, i
+ print *, i, j
+ end do
+ end do
+end subroutine
diff --git a/flang/test/Semantics/OpenMP/tile07.f90 b/flang/test/Semantics/OpenMP/tile07.f90
new file mode 100644
index 0000000000000..70a6f5fc529a4
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/tile07.f90
@@ -0,0 +1,35 @@
+! Testing the Semantics of tile
+!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51
+
+
+subroutine non_perfectly_nested_loop_behind
+ implicit none
+ integer i, j
+
+ !ERROR: Canonical loop nest must be perfectly nested.
+ !$omp tile sizes(2,2)
+ do i = 1, 5
+ do j = 1, 42
+ print *, j
+ end do
+ print *, i
+ end do
+end subroutine
+
+
+subroutine non_perfectly_nested_loop_before
+ implicit none
+ integer i, j
+
+ !ERROR: The SIZES clause has more entries than there are nested canonical loops.
+ !$omp tile sizes(2,2)
+ do i = 1, 5
+ print *, i
+ do j = 1, 42
+ print *, j
+ end do
+ end do
+end subroutine
+
+
+
diff --git a/flang/test/Semantics/OpenMP/tile08.f90 b/flang/test/Semantics/OpenMP/tile08.f90
new file mode 100644
index 0000000000000..f42805cb81b7d
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/tile08.f90
@@ -0,0 +1,15 @@
+! Testing the Semantics of tile
+!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51
+
+
+subroutine do_concurrent
+ implicit none
+ integer i, j
+
+
+ !$omp tile sizes(2,2)
+ !ERROR: DO CONCURRENT loops cannot form part of a loop nest.
+ do concurrent (i = 1:42, j = 1:42)
+ print *, i, j
+ end do
+end subroutine
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index 4d9b8f8a6c51e..2911b4c8df1b1 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -1324,6 +1324,9 @@ def OMP_Tile : Directive<[Spelling<"tile">]> {
let allowedOnceClauses = [
VersionedClause<OMPC_Sizes, 51>,
];
+ let requiredClauses = [
+ VersionedClause<OMPC_Sizes, 51>,
+ ];
let association = AS_Loop;
let category = CA_Executable;
}
diff --git a/openmp/runtime/test/transform/tile/intfor.f90 b/openmp/runtime/test/transform/tile/intfor.f90
new file mode 100644
index 0000000000000..dac0de6a99021
--- /dev/null
+++ b/openmp/runtime/test/transform/tile/intfor.f90
@@ -0,0 +1,31 @@
+! This test checks lowering of the OpenMP tile directive
+! It is done 3 times corresponding to every possible fraction of the last
+! iteration before passing beyond UB.
+
+! RUN: %flang %flags %openmp_flags -fopenmp-version=51 -DUB=16 %s -o %t-ub16.exe
+! RUN: %flang %flags %openmp_flags -fopenmp-version=51 -DUB=17 %s -o %t-ub17.exe
+! RUN: %flang %flags %openmp_flags -fopenmp-version=51 -DUB=18 %s -o %t-ub18.exe
+! RUN: %t-ub16.exe | FileCheck %s --match-full-lines
+! RUN: %t-ub17.exe | FileCheck %s --match-full-lines
+! RUN: %t-ub18.exe | FileCheck %s --match-full-lines
+
+program tile_intfor_1d
+ integer i
+ print *, 'do'
+
+ !$OMP TILE SIZES(2)
+ do i=7, UB, 3
+ print '("i=", I0)', i
+ end do
+ !$OMP END TILE
+
+ print *, 'done'
+end program
+
+
+! CHECK: do
+! CHECK-NEXT: i=7
+! CHECK-NEXT: i=10
+! CHECK-NEXT: i=13
+! CHECK-NEXT: i=16
+! CHECK-NEXT: done
diff --git a/openmp/runtime/test/transform/tile/intfor_2d.f90 b/openmp/runtime/test/transform/tile/intfor_2d.f90
new file mode 100644
index 0000000000000..6bc90c768b8d3
--- /dev/null
+++ b/openmp/runtime/test/transform/tile/intfor_2d.f90
@@ -0,0 +1,53 @@
+! This test checks lowering of OpenMP tile directive
+
+! RUN: %flang %flags %openmp_flags -fopenmp-version=51 %s -o %t.exe
+! RUN: %t.exe | FileCheck %s --match-full-lines
+
+
+program tile_intfor_2d
+ integer i, j
+ print *, 'do'
+
+ !$OMP TILE SIZES(2,3)
+ do i = 7, 16, 3
+ do j = 0, 4
+ print '("i=", I0," j=", I0)', i, j
+ end do
+ end do
+ !$OMP END TILE
+
+ print *, 'done'
+end program
+
+
+! CHECK: do
+
+! complete tile
+! CHECK-NEXT: i=7 j=0
+! CHECK-NEXT: i=7 j=1
+! CHECK-NEXT: i=7 j=2
+! CHECK-NEXT: i=10 j=0
+! CHECK-NEXT: i=10 j=1
+! CHECK-NEXT: i=10 j=2
+
+! partial tile
+! CHECK-NEXT: i=7 j=3
+! CHECK-NEXT: i=7 j=4
+! CHECK-NEXT: i=10 j=3
+! CHECK-NEXT: i=10 j=4
+
+! complete tile
+! CHECK-NEXT: i=13 j=0
+! CHECK-NEXT: i=13 j=1
+! CHECK-NEXT: i=13 j=2
+! CHECK-NEXT: i=16 j=0
+! CHECK-NEXT: i=16 j=1
+! CHECK-NEXT: i=16 j=2
+
+! partial tile
+! CHECK-NEXT: i=13 j=3
+! CHECK-NEXT: i=13 j=4
+! CHECK-NEXT: i=16 j=3
+! CHECK-NEXT: i=16 j=4
+
+! CHECK-NEXT: done
diff --git a/openmp/runtime/test/transform/tile/intfor_2d_varsizes.F90 b/openmp/runtime/test/transform/tile/intfor_2d_varsizes.F90
new file mode 100644
index 0000000000000..4cb5adf606dd2
--- /dev/null
+++ b/openmp/runtime/test/transform/tile/intfor_2d_varsizes.F90
@@ -0,0 +1,60 @@
+! This test checks lowering of OpenMP tile directive
+
+! RUN: %flang %flags %openmp_flags -fopenmp-version=51 %s -o %t.exe
+! RUN: %t.exe | FileCheck %s --match-full-lines
+
+program tile_intfor_varsizes
+ integer i
+
+ call kernel(7,17,3,2)
+ call kernel(7,17,3,3)
+
+end program
+
+
+subroutine kernel(lb, ub, step, ts)
+ integer i, j, lb, ub, step, ts
+
+ print *, 'do'
+
+ !$OMP TILE SIZES(ts,ts)
+ do i = lb, ub, step
+ do j = 0, 2
+ print '("i=", I0," j=", I0)', i, j
+ end do
+ end do
+ !$OMP END TILE
+
+ print *, 'done'
+
+end subroutine
+
+! CHECK: do
+! CHECK-NEXT: i=7 j=0
+! CHECK-NEXT: i=7 j=1
+! CHECK-NEXT: i=10 j=0
+! CHECK-NEXT: i=10 j=1
+! CHECK-NEXT: i=7 j=2
+! CHECK-NEXT: i=10 j=2
+! CHECK-NEXT: i=13 j=0
+! CHECK-NEXT: i=13 j=1
+! CHECK-NEXT: i=16 j=0
+! CHECK-NEXT: i=16 j=1
+! CHECK-NEXT: i=13 j=2
+! CHECK-NEXT: i=16 j=2
+! CHECK-NEXT: done
+
+! CHECK: do
+! CHECK-NEXT: i=7 j=0
+! CHECK-NEXT: i=7 j=1
+! CHECK-NEXT: i=7 j=2
+! CHECK-NEXT: i=10 j=0
+! CHECK-NEXT: i=10 j=1
+! CHECK-NEXT: i=10 j=2
+! CHECK-NEXT: i=13 j=0
+! CHECK-NEXT: i=13 j=1
+! CHECK-NEXT: i=13 j=2
+! CHECK-NEXT: i=16 j=0
+! CHECK-NEXT: i=16 j=1
+! CHECK-NEXT: i=16 j=2
+! CHECK-NEXT: done
>From e7c0c5a1d64797acaddbbca50927545868f7256c Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Tue, 23 Sep 2025 16:03:43 +0200
Subject: [PATCH 06/12] dos2unix
---
flang/test/Parser/OpenMP/tile-fail.f90 | 64 +++++++++++++-------------
1 file changed, 32 insertions(+), 32 deletions(-)
diff --git a/flang/test/Parser/OpenMP/tile-fail.f90 b/flang/test/Parser/OpenMP/tile-fail.f90
index 267ed0ad48437..0a92e5bcb6570 100644
--- a/flang/test/Parser/OpenMP/tile-fail.f90
+++ b/flang/test/Parser/OpenMP/tile-fail.f90
@@ -1,32 +1,32 @@
-! RUN: split-file %s %t
-! RUN: not %flang_fc1 -fsyntax-only -fopenmp %t/stray_end1.f90 2>&1 | FileCheck %t/stray_end1.f90
-! RUN: not %flang_fc1 -fsyntax-only -fopenmp %t/stray_end2.f90 2>&1 | FileCheck %t/stray_end2.f90
-! RUN: not %flang_fc1 -fsyntax-only -fopenmp %t/stray_begin.f90 2>&1 | FileCheck %t/stray_begin.f90
-
-
-!--- stray_end1.f90
-! Parser error
-
-subroutine stray_end1
- !CHECK: error: expected OpenMP construct
- !$omp end tile
-end subroutine
-
-
-!--- stray_end2.f90
-! Semantic error
-
-subroutine stray_end2
- print *
- !CHECK: error: The END TILE directive must follow the DO loop associated with the loop construct
- !$omp end tile
-end subroutine
-
-
-!--- stray_begin.f90
-
-subroutine stray_begin
- !CHECK: error: A DO loop must follow the TILE directive
- !$omp tile sizes(2)
-end subroutine
-
+! RUN: split-file %s %t
+! RUN: not %flang_fc1 -fsyntax-only -fopenmp %t/stray_end1.f90 2>&1 | FileCheck %t/stray_end1.f90
+! RUN: not %flang_fc1 -fsyntax-only -fopenmp %t/stray_end2.f90 2>&1 | FileCheck %t/stray_end2.f90
+! RUN: not %flang_fc1 -fsyntax-only -fopenmp %t/stray_begin.f90 2>&1 | FileCheck %t/stray_begin.f90
+
+
+!--- stray_end1.f90
+! Parser error
+
+subroutine stray_end1
+ !CHECK: error: expected OpenMP construct
+ !$omp end tile
+end subroutine
+
+
+!--- stray_end2.f90
+! Semantic error
+
+subroutine stray_end2
+ print *
+ !CHECK: error: The END TILE directive must follow the DO loop associated with the loop construct
+ !$omp end tile
+end subroutine
+
+
+!--- stray_begin.f90
+
+subroutine stray_begin
+ !CHECK: error: A DO loop must follow the TILE directive
+ !$omp tile sizes(2)
+end subroutine
+
>From 493442453cbac2366aa89abde361f7badaad4948 Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Tue, 23 Sep 2025 16:39:44 +0200
Subject: [PATCH 07/12] Fix symbol resolution
---
flang/lib/Semantics/resolve-directives.cpp | 48 ++++++++++------------
1 file changed, 22 insertions(+), 26 deletions(-)
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index 5f2c9f676099c..a0109324e546c 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -1975,7 +1975,10 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) {
}
}
}
+
+ // Must be done before iv privatization
CheckPerfectNestAndRectangularLoop(x);
+
PrivatizeAssociatedLoopIndexAndCheckLoopLevel(x);
ordCollapseLevel = GetNumAffectedLoopsFromLoopConstruct(x) + 1;
return true;
@@ -2172,37 +2175,29 @@ void OmpAttributeVisitor::CollectNumAffectedLoopsFromClauses(
}
void OmpAttributeVisitor::CheckPerfectNestAndRectangularLoop(
- const parser::OpenMPLoopConstruct
- &x) { // GetAssociatedLoopLevelFromClauses(clauseList);
- auto &&dirContext = GetContext();
+ const parser::OpenMPLoopConstruct &x) {
+ auto &dirContext = GetContext();
std::int64_t dirDepth{dirContext.associatedLoopLevel};
if (dirDepth <= 0)
return;
- Symbol::Flag ivDSA;
- if (!llvm::omp::allSimdSet.test(GetContext().directive)) {
- ivDSA = Symbol::Flag::OmpPrivate;
- } else if (dirDepth == 1) {
- ivDSA = Symbol::Flag::OmpLinear;
- } else {
- ivDSA = Symbol::Flag::OmpLastPrivate;
- }
-
auto checkExprHasSymbols = [&](llvm::SmallVector<Symbol *> &ivs,
const parser::ScalarExpr *bound) {
if (ivs.empty())
return;
-
- if (auto boundExpr{semantics::AnalyzeExpr(context_, *bound)}) {
- semantics::UnorderedSymbolSet boundSyms =
- evaluate::CollectSymbols(*boundExpr);
- for (auto iv : ivs) {
- if (boundSyms.count(*iv) != 0) {
- // TODO: Point to occurence of iv in boundExpr, directiveSource as a
- // note
- context_.Say(dirContext.directiveSource,
- "Trip count must be computable and invariant"_err_en_US);
- }
+ auto boundExpr{semantics::AnalyzeExpr(context_, *bound)};
+ if (!boundExpr)
+ return;
+ semantics::UnorderedSymbolSet boundSyms =
+ evaluate::CollectSymbols(*boundExpr);
+ if (boundSyms.empty())
+ return;
+ for (Symbol *iv : ivs) {
+ if (boundSyms.count(*iv) != 0) {
+ // TODO: Point to occurence of iv in boundExpr, directiveSource as a
+ // note
+ context_.Say(dirContext.directiveSource,
+ "Trip count must be computable and invariant"_err_en_US);
}
}
};
@@ -2217,8 +2212,9 @@ void OmpAttributeVisitor::CheckPerfectNestAndRectangularLoop(
std::get_if<common::Indirection<parser::OpenMPLoopConstruct>>(
innerMostNest)}) {
innerMostLoop = &(innerLoop->value());
- } else
+ } else {
break;
+ }
}
if (!innerMostNest)
@@ -2228,7 +2224,7 @@ void OmpAttributeVisitor::CheckPerfectNestAndRectangularLoop(
return;
llvm::SmallVector<Symbol *> ivs;
- int curLevel = 0;
+ int curLevel{0};
const parser::DoConstruct *loop{outer};
while (true) {
auto [iv, lb, ub, step] = GetLoopBounds(*loop);
@@ -2240,7 +2236,7 @@ void OmpAttributeVisitor::CheckPerfectNestAndRectangularLoop(
if (step)
checkExprHasSymbols(ivs, step);
if (iv) {
- if (auto *symbol{ResolveOmp(*iv, ivDSA, currScope())})
+ if (auto *symbol{currScope().FindSymbol(iv->source)})
ivs.push_back(symbol);
}
>From 01a056f711c139ee0f1e26b19cf7513701f52992 Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Wed, 24 Sep 2025 12:01:39 +0200
Subject: [PATCH 08/12] avoid compiler warning
---
flang/lib/Lower/OpenMP/Utils.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index 4a392d4944fc8..29cccbd1bfe5a 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -652,7 +652,6 @@ int64_t collectLoopRelatedInfo(
mlir::omp::LoopRelatedClauseOps &result,
llvm::SmallVectorImpl<const semantics::Symbol *> &iv) {
int64_t numCollapse = 1;
- fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
// Collect the loops to collapse.
lower::pft::Evaluation *doConstructEval = &eval.getFirstNestedEvaluation();
>From 7655a11ac10f0cca877e56a73e2948a265db00ab Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Wed, 24 Sep 2025 12:45:18 +0200
Subject: [PATCH 09/12] Avoid structured binding capture to appease compiler
---
mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index d3cc7e55ae155..f681b0346f489 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -3300,6 +3300,9 @@ void NewCliOp::getAsmResultNames(OpAsmSetValueNameFn setNameFn) {
Value result = getResult();
auto [newCli, gen, cons] = decodeCli(result);
+ // Structured binding `gen` cannot be captured in lambdas before C++20
+ OpOperand *generator = gen;
+
// Derive the CLI variable name from its generator:
// * "canonloop" for omp.canonical_loop
// * custom name for loop transformation generatees
@@ -3324,7 +3327,7 @@ void NewCliOp::getAsmResultNames(OpAsmSetValueNameFn setNameFn) {
unsigned firstGrid = generateesFirst;
unsigned firstIntratile = generateesFirst + generateesCount / 2;
unsigned end = generateesFirst + generateesCount;
- unsigned opnum = gen->getOperandNumber();
+ unsigned opnum = generator->getOperandNumber();
// In the OpenMP apply and looprange clauses, indices are 1-based
if (firstGrid <= opnum && opnum < firstIntratile) {
unsigned gridnum = opnum - firstGrid + 1;
>From f078a8e81f28f88083d0156b91414fddd5a13e8a Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Wed, 1 Oct 2025 11:23:52 +0200
Subject: [PATCH 10/12] regexify varnames
---
.../test/Target/LLVMIR/openmp-cli-tile01.mlir | 127 ++++-----
.../test/Target/LLVMIR/openmp-cli-tile02.mlir | 262 +++++++++---------
2 files changed, 188 insertions(+), 201 deletions(-)
diff --git a/mlir/test/Target/LLVMIR/openmp-cli-tile01.mlir b/mlir/test/Target/LLVMIR/openmp-cli-tile01.mlir
index 4ac4f02103e8c..0d559b69a3ad1 100644
--- a/mlir/test/Target/LLVMIR/openmp-cli-tile01.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-cli-tile01.mlir
@@ -1,5 +1,4 @@
-// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
-
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s --enable-var-scope
llvm.func @tile_trivial_loop(%baseptr: !llvm.ptr, %tc: i32, %ts: i32) -> () {
@@ -15,87 +14,81 @@ llvm.func @tile_trivial_loop(%baseptr: !llvm.ptr, %tc: i32, %ts: i32) -> () {
}
-// CHECK: ; ModuleID = 'LLVMDialectModule'
-// CHECK-NEXT: source_filename = "LLVMDialectModule"
-// CHECK-EMPTY:
-// CHECK-NEXT: define void @tile_trivial_loop(ptr %0, i32 %1, i32 %2) {
-// CHECK-NEXT: br label %omp_omp.loop.preheader
-// CHECK-EMPTY:
-// CHECK-NEXT: omp_omp.loop.preheader: ; preds = %3
-// CHECK-NEXT: %4 = udiv i32 %1, %2
-// CHECK-NEXT: %5 = urem i32 %1, %2
-// CHECK-NEXT: %6 = icmp ne i32 %5, 0
-// CHECK-NEXT: %7 = zext i1 %6 to i32
-// CHECK-NEXT: %omp_floor0.tripcount = add nuw i32 %4, %7
-// CHECK-NEXT: br label %omp_floor0.preheader
-// CHECK-EMPTY:
-// CHECK-NEXT: omp_floor0.preheader: ; preds = %omp_omp.loop.preheader
-// CHECK-NEXT: br label %omp_floor0.header
+// CHECK-LABEL: define void @tile_trivial_loop(
+// CHECK-SAME: ptr %[[TMP0:.+]], i32 %[[TMP1:.+]], i32 %[[TMP2:.+]]) {
+// CHECK-NEXT: br label %[[OMP_OMP_LOOP_PREHEADER:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_floor0.header: ; preds = %omp_floor0.inc, %omp_floor0.preheader
-// CHECK-NEXT: %omp_floor0.iv = phi i32 [ 0, %omp_floor0.preheader ], [ %omp_floor0.next, %omp_floor0.inc ]
-// CHECK-NEXT: br label %omp_floor0.cond
+// CHECK-NEXT: [[OMP_OMP_LOOP_PREHEADER]]:
+// CHECK-NEXT: %[[TMP4:.+]] = udiv i32 %[[TMP1:.+]], %[[TMP2:.+]]
+// CHECK-NEXT: %[[TMP5:.+]] = urem i32 %[[TMP1:.+]], %[[TMP2:.+]]
+// CHECK-NEXT: %[[TMP6:.+]] = icmp ne i32 %[[TMP5:.+]], 0
+// CHECK-NEXT: %[[TMP7:.+]] = zext i1 %[[TMP6:.+]] to i32
+// CHECK-NEXT: %[[OMP_FLOOR0_TRIPCOUNT:.+]] = add nuw i32 %[[TMP4:.+]], %[[TMP7:.+]]
+// CHECK-NEXT: br label %[[OMP_FLOOR0_PREHEADER:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_floor0.cond: ; preds = %omp_floor0.header
-// CHECK-NEXT: %omp_floor0.cmp = icmp ult i32 %omp_floor0.iv, %omp_floor0.tripcount
-// CHECK-NEXT: br i1 %omp_floor0.cmp, label %omp_floor0.body, label %omp_floor0.exit
+// CHECK-NEXT: [[OMP_FLOOR0_PREHEADER]]:
+// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_floor0.body: ; preds = %omp_floor0.cond
-// CHECK-NEXT: %8 = icmp eq i32 %omp_floor0.iv, %4
-// CHECK-NEXT: %9 = select i1 %8, i32 %5, i32 %2
-// CHECK-NEXT: br label %omp_tile0.preheader
+// CHECK-NEXT: [[OMP_FLOOR0_HEADER]]:
+// CHECK-NEXT: %[[OMP_FLOOR0_IV:.+]] = phi i32 [ 0, %[[OMP_FLOOR0_PREHEADER:.+]] ], [ %[[OMP_FLOOR0_NEXT:.+]], %[[OMP_FLOOR0_INC:.+]] ]
+// CHECK-NEXT: br label %[[OMP_FLOOR0_COND:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_tile0.preheader: ; preds = %omp_floor0.body
-// CHECK-NEXT: br label %omp_tile0.header
+// CHECK-NEXT: [[OMP_FLOOR0_COND]]:
+// CHECK-NEXT: %[[OMP_FLOOR0_CMP:.+]] = icmp ult i32 %[[OMP_FLOOR0_IV:.+]], %[[OMP_FLOOR0_TRIPCOUNT:.+]]
+// CHECK-NEXT: br i1 %[[OMP_FLOOR0_CMP:.+]], label %[[OMP_FLOOR0_BODY:.+]], label %[[OMP_FLOOR0_EXIT:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_tile0.header: ; preds = %omp_tile0.inc, %omp_tile0.preheader
-// CHECK-NEXT: %omp_tile0.iv = phi i32 [ 0, %omp_tile0.preheader ], [ %omp_tile0.next, %omp_tile0.inc ]
-// CHECK-NEXT: br label %omp_tile0.cond
+// CHECK-NEXT: [[OMP_FLOOR0_BODY]]:
+// CHECK-NEXT: %[[TMP8:.+]] = icmp eq i32 %[[OMP_FLOOR0_IV:.+]], %[[TMP4:.+]]
+// CHECK-NEXT: %[[TMP9:.+]] = select i1 %[[TMP8:.+]], i32 %[[TMP5:.+]], i32 %[[TMP2:.+]]
+// CHECK-NEXT: br label %[[OMP_TILE0_PREHEADER:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_tile0.cond: ; preds = %omp_tile0.header
-// CHECK-NEXT: %omp_tile0.cmp = icmp ult i32 %omp_tile0.iv, %9
-// CHECK-NEXT: br i1 %omp_tile0.cmp, label %omp_tile0.body, label %omp_tile0.exit
+// CHECK-NEXT: [[OMP_TILE0_PREHEADER]]:
+// CHECK-NEXT: br label %[[OMP_TILE0_HEADER:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_tile0.body: ; preds = %omp_tile0.cond
-// CHECK-NEXT: %10 = mul nuw i32 %2, %omp_floor0.iv
-// CHECK-NEXT: %11 = add nuw i32 %10, %omp_tile0.iv
-// CHECK-NEXT: br label %omp_omp.loop.body
+// CHECK-NEXT: [[OMP_TILE0_HEADER]]:
+// CHECK-NEXT: %[[OMP_TILE0_IV:.+]] = phi i32 [ 0, %[[OMP_TILE0_PREHEADER:.+]] ], [ %[[OMP_TILE0_NEXT:.+]], %[[OMP_TILE0_INC:.+]] ]
+// CHECK-NEXT: br label %[[OMP_TILE0_COND:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_omp.loop.body: ; preds = %omp_tile0.body
-// CHECK-NEXT: br label %omp.loop.region
+// CHECK-NEXT: [[OMP_TILE0_COND]]:
+// CHECK-NEXT: %[[OMP_TILE0_CMP:.+]] = icmp ult i32 %[[OMP_TILE0_IV:.+]], %[[TMP9:.+]]
+// CHECK-NEXT: br i1 %[[OMP_TILE0_CMP:.+]], label %[[OMP_TILE0_BODY:.+]], label %[[OMP_TILE0_EXIT:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp.loop.region: ; preds = %omp_omp.loop.body
-// CHECK-NEXT: %12 = getelementptr inbounds float, ptr %0, i32 %11
-// CHECK-NEXT: store float 4.200000e+01, ptr %12, align 4
-// CHECK-NEXT: br label %omp.region.cont
+// CHECK-NEXT: [[OMP_TILE0_BODY]]:
+// CHECK-NEXT: %[[TMP10:.+]] = mul nuw i32 %[[TMP2:.+]], %[[OMP_FLOOR0_IV:.+]]
+// CHECK-NEXT: %[[TMP11:.+]] = add nuw i32 %[[TMP10:.+]], %[[OMP_TILE0_IV:.+]]
+// CHECK-NEXT: br label %[[OMP_OMP_LOOP_BODY:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp.region.cont: ; preds = %omp.loop.region
-// CHECK-NEXT: br label %omp_tile0.inc
+// CHECK-NEXT: [[OMP_OMP_LOOP_BODY]]:
+// CHECK-NEXT: br label %[[OMP_LOOP_REGION:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_tile0.inc: ; preds = %omp.region.cont
-// CHECK-NEXT: %omp_tile0.next = add nuw i32 %omp_tile0.iv, 1
-// CHECK-NEXT: br label %omp_tile0.header
+// CHECK-NEXT: [[OMP_LOOP_REGION]]:
+// CHECK-NEXT: %[[TMP12:.+]] = getelementptr inbounds float, ptr %[[TMP0:.+]], i32 %[[TMP11:.+]]
+// CHECK-NEXT: store float 4.200000e+01, ptr %[[TMP12:.+]], align 4
+// CHECK-NEXT: br label %[[OMP_REGION_CONT:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_tile0.exit: ; preds = %omp_tile0.cond
-// CHECK-NEXT: br label %omp_tile0.after
+// CHECK-NEXT: [[OMP_REGION_CONT]]:
+// CHECK-NEXT: br label %[[OMP_TILE0_INC:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_tile0.after: ; preds = %omp_tile0.exit
-// CHECK-NEXT: br label %omp_floor0.inc
+// CHECK-NEXT: [[OMP_TILE0_INC]]:
+// CHECK-NEXT: %[[OMP_TILE0_NEXT:.+]] = add nuw i32 %[[OMP_TILE0_IV:.+]], 1
+// CHECK-NEXT: br label %[[OMP_TILE0_HEADER:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_floor0.inc: ; preds = %omp_tile0.after
-// CHECK-NEXT: %omp_floor0.next = add nuw i32 %omp_floor0.iv, 1
-// CHECK-NEXT: br label %omp_floor0.header
+// CHECK-NEXT: [[OMP_TILE0_EXIT]]:
+// CHECK-NEXT: br label %[[OMP_TILE0_AFTER:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_floor0.exit: ; preds = %omp_floor0.cond
-// CHECK-NEXT: br label %omp_floor0.after
+// CHECK-NEXT: [[OMP_TILE0_AFTER]]:
+// CHECK-NEXT: br label %[[OMP_FLOOR0_INC:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_floor0.after: ; preds = %omp_floor0.exit
-// CHECK-NEXT: br label %omp_omp.loop.after
+// CHECK-NEXT: [[OMP_FLOOR0_INC]]:
+// CHECK-NEXT: %[[OMP_FLOOR0_NEXT:.+]] = add nuw i32 %[[OMP_FLOOR0_IV:.+]], 1
+// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_omp.loop.after: ; preds = %omp_floor0.after
-// CHECK-NEXT: ret void
-// CHECK-NEXT: }
+// CHECK-NEXT: [[OMP_FLOOR0_EXIT]]:
+// CHECK-NEXT: br label %[[OMP_FLOOR0_AFTER:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: !llvm.module.flags = !{!0}
+// CHECK-NEXT: [[OMP_FLOOR0_AFTER]]:
+// CHECK-NEXT: br label %[[OMP_OMP_LOOP_AFTER:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: !0 = !{i32 2, !"Debug Info Version", i32 3}
+// CHECK-NEXT: [[OMP_OMP_LOOP_AFTER]]:
+// CHECK-NEXT: ret void
+// CHECK-NEXT: }
diff --git a/mlir/test/Target/LLVMIR/openmp-cli-tile02.mlir b/mlir/test/Target/LLVMIR/openmp-cli-tile02.mlir
index 6fad81cd0c299..22c2973164159 100644
--- a/mlir/test/Target/LLVMIR/openmp-cli-tile02.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-cli-tile02.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s --enable-var-scope
llvm.func @tile_2d_loop(%baseptr: !llvm.ptr, %tc1: i32, %tc2: i32, %ts1: i32, %ts2: i32) -> () {
@@ -19,172 +19,166 @@ llvm.func @tile_2d_loop(%baseptr: !llvm.ptr, %tc1: i32, %tc2: i32, %ts1: i32, %t
}
-// CHECK: ; ModuleID = 'LLVMDialectModule'
-// CHECK-NEXT: source_filename = "LLVMDialectModule"
+// CHECK-LABEL: define void @tile_2d_loop(
+// CHECK-SAME: ptr %[[TMP0:.+]], i32 %[[TMP1:.+]], i32 %[[TMP2:.+]], i32 %[[TMP3:.+]], i32 %[[TMP4:.+]]) {
+// CHECK-NEXT: br label %[[OMP_OMP_LOOP_PREHEADER:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT: [[OMP_OMP_LOOP_PREHEADER]]:
+// CHECK-NEXT: %[[TMP6:.+]] = udiv i32 %[[TMP1:.+]], %[[TMP3:.+]]
+// CHECK-NEXT: %[[TMP7:.+]] = urem i32 %[[TMP1:.+]], %[[TMP3:.+]]
+// CHECK-NEXT: %[[TMP8:.+]] = icmp ne i32 %[[TMP7:.+]], 0
+// CHECK-NEXT: %[[TMP9:.+]] = zext i1 %[[TMP8:.+]] to i32
+// CHECK-NEXT: %[[OMP_FLOOR0_TRIPCOUNT:.+]] = add nuw i32 %[[TMP6:.+]], %[[TMP9:.+]]
+// CHECK-NEXT: %[[TMP10:.+]] = udiv i32 %[[TMP2:.+]], %[[TMP4:.+]]
+// CHECK-NEXT: %[[TMP11:.+]] = urem i32 %[[TMP2:.+]], %[[TMP4:.+]]
+// CHECK-NEXT: %[[TMP12:.+]] = icmp ne i32 %[[TMP11:.+]], 0
+// CHECK-NEXT: %[[TMP13:.+]] = zext i1 %[[TMP12:.+]] to i32
+// CHECK-NEXT: %[[OMP_FLOOR1_TRIPCOUNT:.+]] = add nuw i32 %[[TMP10:.+]], %[[TMP13:.+]]
+// CHECK-NEXT: br label %[[OMP_FLOOR0_PREHEADER:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT: [[OMP_OMP_LOOP_HEADER:.+]]:
+// CHECK-NEXT: %[[OMP_OMP_LOOP_IV:.+]] = phi i32 [ %[[OMP_OMP_LOOP_NEXT:.+]], %[[OMP_OMP_LOOP_INC:.+]] ]
+// CHECK-NEXT: br label %[[OMP_OMP_LOOP_COND:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT: [[OMP_OMP_LOOP_COND]]:
+// CHECK-NEXT: %[[OMP_OMP_LOOP_CMP:.+]] = icmp ult i32 %[[TMP19:.+]], %[[TMP1:.+]]
+// CHECK-NEXT: br i1 %[[OMP_OMP_LOOP_CMP:.+]], label %[[OMP_OMP_LOOP_BODY:.+]], label %[[OMP_OMP_LOOP_EXIT:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT: [[OMP_OMP_LOOP_BODY]]:
+// CHECK-NEXT: br label %[[OMP_LOOP_REGION:.+]]
+// CHECK-EMPTY:
+// CHECK-NEXT: [[OMP_LOOP_REGION]]:
+// CHECK-NEXT: br label %[[OMP_OMP_LOOP_PREHEADER1:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: define void @tile_2d_loop(ptr %0, i32 %1, i32 %2, i32 %3, i32 %4) {
-// CHECK-NEXT: br label %omp_omp.loop.preheader
-// CHECK-EMPTY:
-// CHECK-NEXT: omp_omp.loop.preheader: ; preds = %5
-// CHECK-NEXT: %6 = udiv i32 %1, %3
-// CHECK-NEXT: %7 = urem i32 %1, %3
-// CHECK-NEXT: %8 = icmp ne i32 %7, 0
-// CHECK-NEXT: %9 = zext i1 %8 to i32
-// CHECK-NEXT: %omp_floor0.tripcount = add nuw i32 %6, %9
-// CHECK-NEXT: %10 = udiv i32 %2, %4
-// CHECK-NEXT: %11 = urem i32 %2, %4
-// CHECK-NEXT: %12 = icmp ne i32 %11, 0
-// CHECK-NEXT: %13 = zext i1 %12 to i32
-// CHECK-NEXT: %omp_floor1.tripcount = add nuw i32 %10, %13
-// CHECK-NEXT: br label %omp_floor0.preheader
+// CHECK-NEXT: [[OMP_OMP_LOOP_PREHEADER1]]:
+// CHECK-NEXT: br label %[[OMP_OMP_LOOP_BODY4:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_omp.loop.header: ; preds = %omp_omp.loop.inc
-// CHECK-NEXT: %omp_omp.loop.iv = phi i32 [ %omp_omp.loop.next, %omp_omp.loop.inc ]
-// CHECK-NEXT: br label %omp_omp.loop.cond
+// CHECK-NEXT: [[OMP_FLOOR0_PREHEADER]]:
+// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_omp.loop.cond: ; preds = %omp_omp.loop.header
-// CHECK-NEXT: %omp_omp.loop.cmp = icmp ult i32 %19, %1
-// CHECK-NEXT: br i1 %omp_omp.loop.cmp, label %omp_omp.loop.body, label %omp_omp.loop.exit
+// CHECK-NEXT: [[OMP_FLOOR0_HEADER]]:
+// CHECK-NEXT: %[[OMP_FLOOR0_IV:.+]] = phi i32 [ 0, %[[OMP_FLOOR0_PREHEADER:.+]] ], [ %[[OMP_FLOOR0_NEXT:.+]], %[[OMP_FLOOR0_INC:.+]] ]
+// CHECK-NEXT: br label %[[OMP_FLOOR0_COND:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_omp.loop.body: ; preds = %omp_tile1.body, %omp_omp.loop.cond
-// CHECK-NEXT: br label %omp.loop.region
+// CHECK-NEXT: [[OMP_FLOOR0_COND]]:
+// CHECK-NEXT: %[[OMP_FLOOR0_CMP:.+]] = icmp ult i32 %[[OMP_FLOOR0_IV:.+]], %[[OMP_FLOOR0_TRIPCOUNT:.+]]
+// CHECK-NEXT: br i1 %[[OMP_FLOOR0_CMP:.+]], label %[[OMP_FLOOR0_BODY:.+]], label %[[OMP_FLOOR0_EXIT:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp.loop.region: ; preds = %omp_omp.loop.body
-// CHECK-NEXT: br label %omp_omp.loop.preheader1
+// CHECK-NEXT: [[OMP_FLOOR0_BODY]]:
+// CHECK-NEXT: br label %[[OMP_FLOOR1_PREHEADER:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_omp.loop.preheader1: ; preds = %omp.loop.region
-// CHECK-NEXT: br label %omp_omp.loop.body4
+// CHECK-NEXT: [[OMP_FLOOR1_PREHEADER]]:
+// CHECK-NEXT: br label %[[OMP_FLOOR1_HEADER:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_floor0.preheader: ; preds = %omp_omp.loop.preheader
-// CHECK-NEXT: br label %omp_floor0.header
+// CHECK-NEXT: [[OMP_FLOOR1_HEADER]]:
+// CHECK-NEXT: %[[OMP_FLOOR1_IV:.+]] = phi i32 [ 0, %[[OMP_FLOOR1_PREHEADER:.+]] ], [ %[[OMP_FLOOR1_NEXT:.+]], %[[OMP_FLOOR1_INC:.+]] ]
+// CHECK-NEXT: br label %[[OMP_FLOOR1_COND:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_floor0.header: ; preds = %omp_floor0.inc, %omp_floor0.preheader
-// CHECK-NEXT: %omp_floor0.iv = phi i32 [ 0, %omp_floor0.preheader ], [ %omp_floor0.next, %omp_floor0.inc ]
-// CHECK-NEXT: br label %omp_floor0.cond
+// CHECK-NEXT: [[OMP_FLOOR1_COND]]:
+// CHECK-NEXT: %[[OMP_FLOOR1_CMP:.+]] = icmp ult i32 %[[OMP_FLOOR1_IV:.+]], %[[OMP_FLOOR1_TRIPCOUNT:.+]]
+// CHECK-NEXT: br i1 %[[OMP_FLOOR1_CMP:.+]], label %[[OMP_FLOOR1_BODY:.+]], label %[[OMP_FLOOR1_EXIT:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_floor0.cond: ; preds = %omp_floor0.header
-// CHECK-NEXT: %omp_floor0.cmp = icmp ult i32 %omp_floor0.iv, %omp_floor0.tripcount
-// CHECK-NEXT: br i1 %omp_floor0.cmp, label %omp_floor0.body, label %omp_floor0.exit
+// CHECK-NEXT: [[OMP_FLOOR1_BODY]]:
+// CHECK-NEXT: %[[TMP14:.+]] = icmp eq i32 %[[OMP_FLOOR0_IV:.+]], %[[TMP6:.+]]
+// CHECK-NEXT: %[[TMP15:.+]] = select i1 %[[TMP14:.+]], i32 %[[TMP7:.+]], i32 %[[TMP3:.+]]
+// CHECK-NEXT: %[[TMP16:.+]] = icmp eq i32 %[[OMP_FLOOR1_IV:.+]], %[[TMP10:.+]]
+// CHECK-NEXT: %[[TMP17:.+]] = select i1 %[[TMP16:.+]], i32 %[[TMP11:.+]], i32 %[[TMP4:.+]]
+// CHECK-NEXT: br label %[[OMP_TILE0_PREHEADER:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_floor0.body: ; preds = %omp_floor0.cond
-// CHECK-NEXT: br label %omp_floor1.preheader
+// CHECK-NEXT: [[OMP_TILE0_PREHEADER]]:
+// CHECK-NEXT: br label %[[OMP_TILE0_HEADER:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_floor1.preheader: ; preds = %omp_floor0.body
-// CHECK-NEXT: br label %omp_floor1.header
+// CHECK-NEXT: [[OMP_TILE0_HEADER]]:
+// CHECK-NEXT: %[[OMP_TILE0_IV:.+]] = phi i32 [ 0, %[[OMP_TILE0_PREHEADER:.+]] ], [ %[[OMP_TILE0_NEXT:.+]], %[[OMP_TILE0_INC:.+]] ]
+// CHECK-NEXT: br label %[[OMP_TILE0_COND:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_floor1.header: ; preds = %omp_floor1.inc, %omp_floor1.preheader
-// CHECK-NEXT: %omp_floor1.iv = phi i32 [ 0, %omp_floor1.preheader ], [ %omp_floor1.next, %omp_floor1.inc ]
-// CHECK-NEXT: br label %omp_floor1.cond
+// CHECK-NEXT: [[OMP_TILE0_COND]]:
+// CHECK-NEXT: %[[OMP_TILE0_CMP:.+]] = icmp ult i32 %[[OMP_TILE0_IV:.+]], %[[TMP15:.+]]
+// CHECK-NEXT: br i1 %[[OMP_TILE0_CMP:.+]], label %[[OMP_TILE0_BODY:.+]], label %[[OMP_TILE0_EXIT:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_floor1.cond: ; preds = %omp_floor1.header
-// CHECK-NEXT: %omp_floor1.cmp = icmp ult i32 %omp_floor1.iv, %omp_floor1.tripcount
-// CHECK-NEXT: br i1 %omp_floor1.cmp, label %omp_floor1.body, label %omp_floor1.exit
+// CHECK-NEXT: [[OMP_TILE0_BODY]]:
+// CHECK-NEXT: br label %[[OMP_TILE1_PREHEADER:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_floor1.body: ; preds = %omp_floor1.cond
-// CHECK-NEXT: %14 = icmp eq i32 %omp_floor0.iv, %6
-// CHECK-NEXT: %15 = select i1 %14, i32 %7, i32 %3
-// CHECK-NEXT: %16 = icmp eq i32 %omp_floor1.iv, %10
-// CHECK-NEXT: %17 = select i1 %16, i32 %11, i32 %4
-// CHECK-NEXT: br label %omp_tile0.preheader
+// CHECK-NEXT: [[OMP_TILE1_PREHEADER]]:
+// CHECK-NEXT: br label %[[OMP_TILE1_HEADER:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_tile0.preheader: ; preds = %omp_floor1.body
-// CHECK-NEXT: br label %omp_tile0.header
+// CHECK-NEXT: [[OMP_TILE1_HEADER]]:
+// CHECK-NEXT: %[[OMP_TILE1_IV:.+]] = phi i32 [ 0, %[[OMP_TILE1_PREHEADER:.+]] ], [ %[[OMP_TILE1_NEXT:.+]], %[[OMP_TILE1_INC:.+]] ]
+// CHECK-NEXT: br label %[[OMP_TILE1_COND:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_tile0.header: ; preds = %omp_tile0.inc, %omp_tile0.preheader
-// CHECK-NEXT: %omp_tile0.iv = phi i32 [ 0, %omp_tile0.preheader ], [ %omp_tile0.next, %omp_tile0.inc ]
-// CHECK-NEXT: br label %omp_tile0.cond
+// CHECK-NEXT: [[OMP_TILE1_COND]]:
+// CHECK-NEXT: %[[OMP_TILE1_CMP:.+]] = icmp ult i32 %[[OMP_TILE1_IV:.+]], %[[TMP17:.+]]
+// CHECK-NEXT: br i1 %[[OMP_TILE1_CMP:.+]], label %[[OMP_TILE1_BODY:.+]], label %[[OMP_TILE1_EXIT:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_tile0.cond: ; preds = %omp_tile0.header
-// CHECK-NEXT: %omp_tile0.cmp = icmp ult i32 %omp_tile0.iv, %15
-// CHECK-NEXT: br i1 %omp_tile0.cmp, label %omp_tile0.body, label %omp_tile0.exit
+// CHECK-NEXT: [[OMP_TILE1_BODY]]:
+// CHECK-NEXT: %[[TMP18:.+]] = mul nuw i32 %[[TMP3:.+]], %[[OMP_FLOOR0_IV:.+]]
+// CHECK-NEXT: %[[TMP19:.+]] = add nuw i32 %[[TMP18:.+]], %[[OMP_TILE0_IV:.+]]
+// CHECK-NEXT: %[[TMP20:.+]] = mul nuw i32 %[[TMP4:.+]], %[[OMP_FLOOR1_IV:.+]]
+// CHECK-NEXT: %[[TMP21:.+]] = add nuw i32 %[[TMP20:.+]], %[[OMP_TILE1_IV:.+]]
+// CHECK-NEXT: br label %[[OMP_OMP_LOOP_BODY:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_tile0.body: ; preds = %omp_tile0.cond
-// CHECK-NEXT: br label %omp_tile1.preheader
+// CHECK-NEXT: [[OMP_OMP_LOOP_BODY4]]:
+// CHECK-NEXT: br label %[[OMP_LOOP_REGION12:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_tile1.preheader: ; preds = %omp_tile0.body
-// CHECK-NEXT: br label %omp_tile1.header
+// CHECK-NEXT: [[OMP_LOOP_REGION12]]:
+// CHECK-NEXT: %[[TMP22:.+]] = add i32 %[[TMP19:.+]], %[[TMP21:.+]]
+// CHECK-NEXT: %[[TMP23:.+]] = getelementptr inbounds float, ptr %[[TMP0:.+]], i32 %[[TMP22:.+]]
+// CHECK-NEXT: store float 4.200000e+01, ptr %[[TMP23:.+]], align 4
+// CHECK-NEXT: br label %[[OMP_REGION_CONT11:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_tile1.header: ; preds = %omp_tile1.inc, %omp_tile1.preheader
-// CHECK-NEXT: %omp_tile1.iv = phi i32 [ 0, %omp_tile1.preheader ], [ %omp_tile1.next, %omp_tile1.inc ]
-// CHECK-NEXT: br label %omp_tile1.cond
+// CHECK-NEXT: [[OMP_REGION_CONT11]]:
+// CHECK-NEXT: br label %[[OMP_TILE1_INC:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_tile1.cond: ; preds = %omp_tile1.header
-// CHECK-NEXT: %omp_tile1.cmp = icmp ult i32 %omp_tile1.iv, %17
-// CHECK-NEXT: br i1 %omp_tile1.cmp, label %omp_tile1.body, label %omp_tile1.exit
+// CHECK-NEXT: [[OMP_TILE1_INC]]:
+// CHECK-NEXT: %[[OMP_TILE1_NEXT:.+]] = add nuw i32 %[[OMP_TILE1_IV:.+]], 1
+// CHECK-NEXT: br label %[[OMP_TILE1_HEADER:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_tile1.body: ; preds = %omp_tile1.cond
-// CHECK-NEXT: %18 = mul nuw i32 %3, %omp_floor0.iv
-// CHECK-NEXT: %19 = add nuw i32 %18, %omp_tile0.iv
-// CHECK-NEXT: %20 = mul nuw i32 %4, %omp_floor1.iv
-// CHECK-NEXT: %21 = add nuw i32 %20, %omp_tile1.iv
-// CHECK-NEXT: br label %omp_omp.loop.body
+// CHECK-NEXT: [[OMP_TILE1_EXIT]]:
+// CHECK-NEXT: br label %[[OMP_TILE1_AFTER:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_omp.loop.body4: ; preds = %omp_omp.loop.preheader1
-// CHECK-NEXT: br label %omp.loop.region12
+// CHECK-NEXT: [[OMP_TILE1_AFTER]]:
+// CHECK-NEXT: br label %[[OMP_TILE0_INC:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp.loop.region12: ; preds = %omp_omp.loop.body4
-// CHECK-NEXT: %22 = add i32 %19, %21
-// CHECK-NEXT: %23 = getelementptr inbounds float, ptr %0, i32 %22
-// CHECK-NEXT: store float 4.200000e+01, ptr %23, align 4
-// CHECK-NEXT: br label %omp.region.cont11
+// CHECK-NEXT: [[OMP_TILE0_INC]]:
+// CHECK-NEXT: %[[OMP_TILE0_NEXT:.+]] = add nuw i32 %[[OMP_TILE0_IV:.+]], 1
+// CHECK-NEXT: br label %[[OMP_TILE0_HEADER:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp.region.cont11: ; preds = %omp.loop.region12
-// CHECK-NEXT: br label %omp_tile1.inc
+// CHECK-NEXT: [[OMP_TILE0_EXIT]]:
+// CHECK-NEXT: br label %[[OMP_TILE0_AFTER:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_tile1.inc: ; preds = %omp.region.cont11
-// CHECK-NEXT: %omp_tile1.next = add nuw i32 %omp_tile1.iv, 1
-// CHECK-NEXT: br label %omp_tile1.header
+// CHECK-NEXT: [[OMP_TILE0_AFTER]]:
+// CHECK-NEXT: br label %[[OMP_FLOOR1_INC:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_tile1.exit: ; preds = %omp_tile1.cond
-// CHECK-NEXT: br label %omp_tile1.after
+// CHECK-NEXT: [[OMP_FLOOR1_INC]]:
+// CHECK-NEXT: %[[OMP_FLOOR1_NEXT:.+]] = add nuw i32 %[[OMP_FLOOR1_IV:.+]], 1
+// CHECK-NEXT: br label %[[OMP_FLOOR1_HEADER:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_tile1.after: ; preds = %omp_tile1.exit
-// CHECK-NEXT: br label %omp_tile0.inc
+// CHECK-NEXT: [[OMP_FLOOR1_EXIT]]:
+// CHECK-NEXT: br label %[[OMP_FLOOR1_AFTER:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_tile0.inc: ; preds = %omp_tile1.after
-// CHECK-NEXT: %omp_tile0.next = add nuw i32 %omp_tile0.iv, 1
-// CHECK-NEXT: br label %omp_tile0.header
+// CHECK-NEXT: [[OMP_FLOOR1_AFTER]]:
+// CHECK-NEXT: br label %[[OMP_FLOOR0_INC:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_tile0.exit: ; preds = %omp_tile0.cond
-// CHECK-NEXT: br label %omp_tile0.after
+// CHECK-NEXT: [[OMP_FLOOR0_INC]]:
+// CHECK-NEXT: %[[OMP_FLOOR0_NEXT:.+]] = add nuw i32 %[[OMP_FLOOR0_IV:.+]], 1
+// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_tile0.after: ; preds = %omp_tile0.exit
-// CHECK-NEXT: br label %omp_floor1.inc
+// CHECK-NEXT: [[OMP_FLOOR0_EXIT]]:
+// CHECK-NEXT: br label %[[OMP_FLOOR0_AFTER:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_floor1.inc: ; preds = %omp_tile0.after
-// CHECK-NEXT: %omp_floor1.next = add nuw i32 %omp_floor1.iv, 1
-// CHECK-NEXT: br label %omp_floor1.header
+// CHECK-NEXT: [[OMP_FLOOR0_AFTER]]:
+// CHECK-NEXT: br label %[[OMP_OMP_LOOP_AFTER:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_floor1.exit: ; preds = %omp_floor1.cond
-// CHECK-NEXT: br label %omp_floor1.after
+// CHECK-NEXT: [[OMP_REGION_CONT:.+]]:
+// CHECK-NEXT: br label %[[OMP_OMP_LOOP_INC:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_floor1.after: ; preds = %omp_floor1.exit
-// CHECK-NEXT: br label %omp_floor0.inc
+// CHECK-NEXT: [[OMP_OMP_LOOP_INC]]:
+// CHECK-NEXT: %[[OMP_OMP_LOOP_NEXT:.+]] = add nuw i32 %[[TMP19:.+]], 1
+// CHECK-NEXT: br label %[[OMP_OMP_LOOP_HEADER:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_floor0.inc: ; preds = %omp_floor1.after
-// CHECK-NEXT: %omp_floor0.next = add nuw i32 %omp_floor0.iv, 1
-// CHECK-NEXT: br label %omp_floor0.header
+// CHECK-NEXT: [[OMP_OMP_LOOP_EXIT]]:
+// CHECK-NEXT: br label %[[OMP_OMP_LOOP_AFTER:.+]]
// CHECK-EMPTY:
-// CHECK-NEXT: omp_floor0.exit: ; preds = %omp_floor0.cond
-// CHECK-NEXT: br label %omp_floor0.after
-// CHECK-EMPTY:
-// CHECK-NEXT: omp_floor0.after: ; preds = %omp_floor0.exit
-// CHECK-NEXT: br label %omp_omp.loop.after
-// CHECK-EMPTY:
-// CHECK-NEXT: omp.region.cont: ; No predecessors!
-// CHECK-NEXT: br label %omp_omp.loop.inc
-// CHECK-EMPTY:
-// CHECK-NEXT: omp_omp.loop.inc: ; preds = %omp.region.cont
-// CHECK-NEXT: %omp_omp.loop.next = add nuw i32 %19, 1
-// CHECK-NEXT: br label %omp_omp.loop.header
-// CHECK-EMPTY:
-// CHECK-NEXT: omp_omp.loop.exit: ; preds = %omp_omp.loop.cond
-// CHECK-NEXT: br label %omp_omp.loop.after
-// CHECK-EMPTY:
-// CHECK-NEXT: omp_omp.loop.after: ; preds = %omp_floor0.after, %omp_omp.loop.exit
-// CHECK-NEXT: ret void
-// CHECK-NEXT: }
-// CHECK-EMPTY:
-// CHECK-NEXT: !llvm.module.flags = !{!0}
-// CHECK-EMPTY:
-// CHECK-NEXT: !0 = !{i32 2, !"Debug Info Version", i32 3}
+// CHECK-NEXT: [[OMP_OMP_LOOP_AFTER]]:
+// CHECK-NEXT: ret void
+// CHECK-NEXT: }
>From 44ea4603871557d460b1a147245bd9fc6241de95 Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Thu, 2 Oct 2025 17:45:03 +0200
Subject: [PATCH 11/12] remove merge conflict leftovers
---
flang/test/Semantics/OpenMP/do08.f90 | 1 -
flang/test/Semantics/OpenMP/do13.f90 | 1 -
2 files changed, 2 deletions(-)
diff --git a/flang/test/Semantics/OpenMP/do08.f90 b/flang/test/Semantics/OpenMP/do08.f90
index bb3c1d0cd3855..5143dff0dd315 100644
--- a/flang/test/Semantics/OpenMP/do08.f90
+++ b/flang/test/Semantics/OpenMP/do08.f90
@@ -61,7 +61,6 @@ program omp
!$omp end do
- !ERROR: Canonical loop nest must be perfectly nested.
!ERROR: The value of the parameter in the COLLAPSE or ORDERED clause must not be larger than the number of nested loops following the construct.
!$omp do collapse(3)
do 60 i=2,200,2
diff --git a/flang/test/Semantics/OpenMP/do13.f90 b/flang/test/Semantics/OpenMP/do13.f90
index 8f7844f4136f9..6e9d1dddade4c 100644
--- a/flang/test/Semantics/OpenMP/do13.f90
+++ b/flang/test/Semantics/OpenMP/do13.f90
@@ -59,7 +59,6 @@ program omp
!$omp end do
- !ERROR: Canonical loop nest must be perfectly nested.
!ERROR: The value of the parameter in the COLLAPSE or ORDERED clause must not be larger than the number of nested loops following the construct.
!$omp do collapse(3)
do 60 i=1,10
>From bcc5cd5ac3586d72574630d39ec59a81e272a3f3 Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Thu, 2 Oct 2025 17:46:51 +0200
Subject: [PATCH 12/12] Add - token to format test
---
mlir/test/mlir-tblgen/op-format-spec.td | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mlir/test/mlir-tblgen/op-format-spec.td b/mlir/test/mlir-tblgen/op-format-spec.td
index 1541cd09f53e0..1ac231116454b 100644
--- a/mlir/test/mlir-tblgen/op-format-spec.td
+++ b/mlir/test/mlir-tblgen/op-format-spec.td
@@ -123,7 +123,7 @@ def DirectiveTypeValid : TestFormat_Op<[{
// CHECK-NOT: error
def LiteralValid : TestFormat_Op<[{
- `_` `:` `,` `=` `<` `>` `(` `)` `[` `]` `?` `+` `*` ` ` `` `->` `\n` `abc$._`
+ `_` `:` `,` `=` `<` `>` `(` `)` `[` `]` `?` `+` `-` `*` ` ` `` `->` `\n` `abc$._`
attr-dict
}]>;
More information about the Openmp-commits
mailing list