[Mlir-commits] [mlir] [mlir][acc] Improve acc.loop support as a container (PR #137887)
Razvan Lupusoru
llvmlistbot at llvm.org
Tue Apr 29 16:28:50 PDT 2025
https://github.com/razvanlupusoru updated https://github.com/llvm/llvm-project/pull/137887
>From dac2419d76dd40f61479acee829d500d75f2945f Mon Sep 17 00:00:00 2001
From: Razvan Lupusoru <rlupusoru at nvidia.com>
Date: Tue, 29 Apr 2025 15:18:06 -0700
Subject: [PATCH 1/2] [mlir][acc] Improve acc.loop support as a container
Dialects which have their own loop representation not representable
with numeric bounds + steps cannot be represented cleanly with
acc.loop. In such a case, we permit the dialects representation with
acc.loop merely encompasing its loop representation. This limitation
became obvious when looking at range / random iterator C++ loops.
The API of acc.loop was updated to test for this differentiation.
Additionally, the verifier was updated to check for consistent bounds
and whether inner-loops are contained when it works as a container.
---
mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td | 5 +++++
mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp | 17 +++++++++++++++++
2 files changed, 22 insertions(+)
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index df7ff28ca5926..3ad8e4f9ccbeb 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -2127,6 +2127,11 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
/// Used to retrieve the block inside the op's region.
Block &getBody() { return getLoopRegions().front()->front(); }
+ /// Used to determine if this operation is merely a container for a loop
+ /// operation instead of being loop-like itself.
+ bool isLoopLike() { return !getLowerbound().empty(); }
+ bool isContainerLike() { return !isLoopLike(); }
+
/// Return true if the op has the auto attribute for the
/// mlir::acc::DeviceType::None device_type.
bool hasAuto();
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index 91025e90b8e76..a7b01abe34e03 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -2298,6 +2298,14 @@ LogicalResult checkDeviceTypes(mlir::ArrayAttr deviceTypes) {
}
LogicalResult acc::LoopOp::verify() {
+ if (getUpperbound().size() != getStep().size())
+ return emitError() << "number of upperbounds expected to be the same as "
+ "number of steps";
+
+ if (getUpperbound().size() != getLowerbound().size())
+ return emitError() << "number of upperbounds expected to be the same as "
+ "number of lowerbounds";
+
if (!getUpperbound().empty() && getInclusiveUpperbound() &&
(getUpperbound().size() != getInclusiveUpperbound()->size()))
return emitError() << "inclusiveUpperbound size is expected to be the same"
@@ -2415,6 +2423,15 @@ LogicalResult acc::LoopOp::verify() {
if (getRegion().empty())
return emitError("expected non-empty body.");
+ // When it is container-like - it is expected to hold a loop-like operation.
+ // TODO: Get the collapse attribute into account.
+ if (isContainerLike()) {
+ // TODO: Ensure there is a single loop-like operation at any one level.
+ auto loopLikeOps = getRegion().getOps<LoopLikeOpInterface>();
+ if (loopLikeOps.empty())
+ return emitError("expected to hold a loop-like operation.");
+ }
+
return success();
}
>From 7119f547c0903a81abe38ac8001614fe60a2049d Mon Sep 17 00:00:00 2001
From: Razvan Lupusoru <rlupusoru at nvidia.com>
Date: Tue, 29 Apr 2025 16:28:28 -0700
Subject: [PATCH 2/2] Update acc.loop verification when it is container-like
---
mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp | 44 ++++++++++++++++++---
mlir/test/Dialect/OpenACC/invalid.mlir | 40 +++++++++++++++++--
mlir/test/Dialect/OpenACC/ops.mlir | 52 +++++++++++++++++++++++--
3 files changed, 125 insertions(+), 11 deletions(-)
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index a7b01abe34e03..d23563f1f0fb0 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -2424,12 +2424,46 @@ LogicalResult acc::LoopOp::verify() {
return emitError("expected non-empty body.");
// When it is container-like - it is expected to hold a loop-like operation.
- // TODO: Get the collapse attribute into account.
if (isContainerLike()) {
- // TODO: Ensure there is a single loop-like operation at any one level.
- auto loopLikeOps = getRegion().getOps<LoopLikeOpInterface>();
- if (loopLikeOps.empty())
- return emitError("expected to hold a loop-like operation.");
+ // Obtain the maximum collapse count - we use this to check that there
+ // are enough loops contained.
+ uint64_t collapseCount = getCollapseValue().value_or(1);
+ if (getCollapseAttr()) {
+ for (auto collapseEntry : getCollapseAttr()) {
+ auto intAttr = mlir::dyn_cast<IntegerAttr>(collapseEntry);
+ if (intAttr.getValue().getZExtValue() > collapseCount)
+ collapseCount = intAttr.getValue().getZExtValue();
+ }
+ }
+
+ // We want to check that we find enough loop-like operations inside.
+ // PreOrder walk allows us to walk in a breadth-first manner at each nesting
+ // level.
+ mlir::Operation *expectedParent = this->getOperation();
+ bool foundSibling = false;
+ getRegion().walk<WalkOrder::PreOrder>([&](mlir::Operation *op) {
+ if (mlir::isa<mlir::LoopLikeOpInterface>(op)) {
+ // This effectively checks that we are not looking at a sibling loop.
+ if (op->getParentOfType<mlir::LoopLikeOpInterface>() !=
+ expectedParent) {
+ foundSibling = true;
+ return mlir::WalkResult::interrupt();
+ }
+
+ collapseCount--;
+ expectedParent = op;
+ }
+ // We found enough contained loops.
+ if (collapseCount == 0)
+ return mlir::WalkResult::interrupt();
+ return mlir::WalkResult::advance();
+ });
+
+ if (foundSibling)
+ return emitError("found sibling loops inside container-like acc.loop");
+ if (collapseCount != 0)
+ return emitError("failed to find enough loop-like operations inside "
+ "container-like acc.loop");
}
return success();
diff --git a/mlir/test/Dialect/OpenACC/invalid.mlir b/mlir/test/Dialect/OpenACC/invalid.mlir
index 0cc65f7b30d98..c8d7a87112917 100644
--- a/mlir/test/Dialect/OpenACC/invalid.mlir
+++ b/mlir/test/Dialect/OpenACC/invalid.mlir
@@ -752,7 +752,6 @@ func.func @acc_combined() {
// expected-error @below {{expected 'loop'}}
acc.parallel combined() {
}
-
return
}
@@ -762,7 +761,6 @@ func.func @acc_combined() {
// expected-error @below {{expected compute construct name}}
acc.loop combined(loop) {
}
-
return
}
@@ -772,7 +770,6 @@ func.func @acc_combined() {
// expected-error @below {{expected 'loop'}}
acc.parallel combined(parallel loop) {
}
-
return
}
@@ -782,6 +779,43 @@ func.func @acc_combined() {
// expected-error @below {{expected ')'}}
acc.loop combined(parallel loop) {
}
+ return
+}
+
+// -----
+func.func @acc_loop_container() {
+ %c0 = arith.constant 0 : index
+ %c10 = arith.constant 10 : index
+ %c1 = arith.constant 1 : index
+ // expected-error @below {{found sibling loops inside container-like acc.loop}}
+ acc.loop {
+ scf.for %arg4 = %c0 to %c10 step %c1 {
+ scf.yield
+ }
+ scf.for %arg5 = %c0 to %c10 step %c1 {
+ scf.yield
+ }
+ acc.yield
+ } attributes { collapse = [2], collapseDeviceType = [#acc.device_type<none>] }
+ return
+}
+
+// -----
+
+func.func @acc_loop_container() {
+ %c0 = arith.constant 0 : index
+ %c10 = arith.constant 10 : index
+ %c1 = arith.constant 1 : index
+ // expected-error @below {{failed to find enough loop-like operations inside container-like acc.loop}}
+ acc.loop {
+ scf.for %arg4 = %c0 to %c10 step %c1 {
+ scf.for %arg5 = %c0 to %c10 step %c1 {
+ scf.yield
+ }
+ scf.yield
+ }
+ acc.yield
+ } attributes { collapse = [3], collapseDeviceType = [#acc.device_type<none>] }
return
}
diff --git a/mlir/test/Dialect/OpenACC/ops.mlir b/mlir/test/Dialect/OpenACC/ops.mlir
index 28ab6f9fcfb4c..c49e649acc87d 100644
--- a/mlir/test/Dialect/OpenACC/ops.mlir
+++ b/mlir/test/Dialect/OpenACC/ops.mlir
@@ -1862,22 +1862,26 @@ func.func @acc_num_gangs() {
// CHECK-LABEL: func.func @acc_combined
func.func @acc_combined() {
+ %c0 = arith.constant 0 : index
+ %c10 = arith.constant 10 : index
+ %c1 = arith.constant 1 : index
+
acc.parallel combined(loop) {
- acc.loop combined(parallel) {
+ acc.loop combined(parallel) control(%arg3 : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
acc.yield
}
acc.terminator
}
acc.kernels combined(loop) {
- acc.loop combined(kernels) {
+ acc.loop combined(kernels) control(%arg3 : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
acc.yield
}
acc.terminator
}
acc.serial combined(loop) {
- acc.loop combined(serial) {
+ acc.loop combined(serial) control(%arg3 : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
acc.yield
}
acc.terminator
@@ -1933,3 +1937,45 @@ acc.private.recipe @privatization_memref_i32 : memref<i32> init {
// CHECK-LABEL: acc.private.recipe @privatization_memref_i32
// CHECK: memref.alloca
+
+// -----
+
+func.func @acc_loop_container() {
+ %c0 = arith.constant 0 : index
+ %c10 = arith.constant 10 : index
+ %c1 = arith.constant 1 : index
+ acc.loop {
+ scf.for %arg4 = %c0 to %c10 step %c1 {
+ scf.yield
+ }
+ acc.yield
+ }
+ return
+}
+
+// CHECK-LABEL: func.func @acc_loop_container
+// CHECK: acc.loop
+// CHECK: scf.for
+
+// -----
+
+func.func @acc_loop_container() {
+ %c0 = arith.constant 0 : index
+ %c10 = arith.constant 10 : index
+ %c1 = arith.constant 1 : index
+ acc.loop {
+ scf.for %arg4 = %c0 to %c10 step %c1 {
+ scf.for %arg5 = %c0 to %c10 step %c1 {
+ scf.yield
+ }
+ scf.yield
+ }
+ acc.yield
+ } attributes { collapse = [2], collapseDeviceType = [#acc.device_type<none>] }
+ return
+}
+
+// CHECK-LABEL: func.func @acc_loop_container
+// CHECK: acc.loop
+// CHECK: scf.for
+// CHECK: scf.for
More information about the Mlir-commits
mailing list