[Mlir-commits] [mlir] [mlir] fix memory effects in GPU barrier elimination (PR #117432)
Oleksandr Alex Zinenko
llvmlistbot at llvm.org
Sun Nov 24 14:24:39 PST 2024
https://github.com/ftynse updated https://github.com/llvm/llvm-project/pull/117432
>From d8803271b84b4dfb61c4d79f0df93a9f6f3e55c3 Mon Sep 17 00:00:00 2001
From: Alex Zinenko <git at ozinenko.com>
Date: Sat, 23 Nov 2024 13:04:28 +0100
Subject: [PATCH 1/2] [mlir] fix memory effects in GPU barrier elimination
Existing implementation may trigger infinite cycles when collecting effects
above or below the current block after wrapping around a loop-like construct.
Limit this case to only looking at the immediate block (loop body). This is
correct because wrap around is intended to consider effects of different
iterations of the same loop and shouldn't be existing the loop block.
Reported-by: Fabian Mora <fmora.dev at gmail.com>
---
.../GPU/Transforms/EliminateBarriers.cpp | 79 ++++++++++++-------
.../test/Dialect/GPU/barrier-elimination.mlir | 15 ++++
2 files changed, 66 insertions(+), 28 deletions(-)
diff --git a/mlir/lib/Dialect/GPU/Transforms/EliminateBarriers.cpp b/mlir/lib/Dialect/GPU/Transforms/EliminateBarriers.cpp
index 1adc381092bf3a..0ffd8131b89348 100644
--- a/mlir/lib/Dialect/GPU/Transforms/EliminateBarriers.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/EliminateBarriers.cpp
@@ -132,6 +132,29 @@ collectEffects(Operation *op,
return false;
}
+/// Get all effects before the given operation caused by other operations in the
+/// same block. That is, this will not consider operations beyond the block.
+static bool
+getEffectsBeforeInBlock(Operation *op,
+ SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
+ bool stopAtBarrier) {
+ if (op == &op->getBlock()->front())
+ return true;
+
+ for (Operation *it = op->getPrevNode(); it != nullptr;
+ it = it->getPrevNode()) {
+ if (isa<BarrierOp>(it)) {
+ if (stopAtBarrier)
+ return true;
+ continue;
+ }
+
+ if (!collectEffects(it, effects))
+ return false;
+ }
+ return true;
+}
+
/// Collects memory effects from operations that may be executed before `op` in
/// a trivial structured control flow, e.g., without branches. Stops at the
/// parallel region boundary or at the barrier operation if `stopAtBarrier` is
@@ -153,19 +176,7 @@ getEffectsBefore(Operation *op,
}
// Collect all effects before the op.
- if (op != &op->getBlock()->front()) {
- for (Operation *it = op->getPrevNode(); it != nullptr;
- it = it->getPrevNode()) {
- if (isa<BarrierOp>(it)) {
- if (stopAtBarrier)
- return true;
- else
- continue;
- }
- if (!collectEffects(it, effects))
- return false;
- }
- }
+ getEffectsBeforeInBlock(op, effects, stopAtBarrier);
// Stop if reached the parallel region boundary.
if (isParallelRegionBoundary(op->getParentOp()))
@@ -191,8 +202,8 @@ getEffectsBefore(Operation *op,
// appropriately.
if (isSequentialLoopLike(op->getParentOp())) {
// Assuming loop terminators have no side effects.
- return getEffectsBefore(op->getBlock()->getTerminator(), effects,
- /*stopAtBarrier=*/true);
+ return getEffectsBeforeInBlock(op->getBlock()->getTerminator(), effects,
+ /*stopAtBarrier=*/true);
}
// If the parent operation is not guaranteed to execute its (single-block)
@@ -212,6 +223,28 @@ getEffectsBefore(Operation *op,
return !conservative;
}
+/// Get all effects after the given operation caused by other operations in the
+/// same block. That is, this will not consider operations beyond the block.
+static bool
+getEffectsAfterInBlock(Operation *op,
+ SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
+ bool stopAtBarrier) {
+ if (op == &op->getBlock()->back())
+ return true;
+
+ for (Operation *it = op->getNextNode(); it != nullptr;
+ it = it->getNextNode()) {
+ if (isa<BarrierOp>(it)) {
+ if (stopAtBarrier)
+ return true;
+ continue;
+ }
+ if (!collectEffects(it, effects))
+ return false;
+ }
+ return true;
+}
+
/// Collects memory effects from operations that may be executed after `op` in
/// a trivial structured control flow, e.g., without branches. Stops at the
/// parallel region boundary or at the barrier operation if `stopAtBarrier` is
@@ -233,17 +266,7 @@ getEffectsAfter(Operation *op,
}
// Collect all effects after the op.
- if (op != &op->getBlock()->back())
- for (Operation *it = op->getNextNode(); it != nullptr;
- it = it->getNextNode()) {
- if (isa<BarrierOp>(it)) {
- if (stopAtBarrier)
- return true;
- continue;
- }
- if (!collectEffects(it, effects))
- return false;
- }
+ getEffectsAfterInBlock(op, effects, stopAtBarrier);
// Stop if reached the parallel region boundary.
if (isParallelRegionBoundary(op->getParentOp()))
@@ -272,8 +295,8 @@ getEffectsAfter(Operation *op,
return true;
bool exact = collectEffects(&op->getBlock()->front(), effects);
- return getEffectsAfter(&op->getBlock()->front(), effects,
- /*stopAtBarrier=*/true) &&
+ return getEffectsAfterInBlock(&op->getBlock()->front(), effects,
+ /*stopAtBarrier=*/true) &&
exact;
}
diff --git a/mlir/test/Dialect/GPU/barrier-elimination.mlir b/mlir/test/Dialect/GPU/barrier-elimination.mlir
index 1f5b84937deb05..3595b600143a9d 100644
--- a/mlir/test/Dialect/GPU/barrier-elimination.mlir
+++ b/mlir/test/Dialect/GPU/barrier-elimination.mlir
@@ -182,3 +182,18 @@ attributes {__parallel_region_boundary_for_test} {
%4 = memref.load %C[] : memref<f32>
return %0, %1, %2, %3, %4 : f32, f32, f32, f32, f32
}
+
+// CHECK-LABEL: @nested_loop_barrier_only
+func.func @nested_loop_barrier_only() attributes {__parallel_region_boundary_for_test} {
+ %c0 = arith.constant 0 : index
+ %c42 = arith.constant 42 : index
+ %c1 = arith.constant 1 : index
+ // CHECK-NOT: scf.for
+ // CHECK-NOT: gpu.barrier
+ scf.for %j = %c0 to %c42 step %c1 {
+ scf.for %i = %c0 to %c42 step %c1 {
+ gpu.barrier
+ }
+ }
+ return
+}
>From 0c2ad58338322ce811316c317f817e81a9f83ebf Mon Sep 17 00:00:00 2001
From: "Oleksandr \"Alex\" Zinenko" <ftynse at gmail.com>
Date: Sun, 24 Nov 2024 23:24:32 +0100
Subject: [PATCH 2/2] Update mlir/test/Dialect/GPU/barrier-elimination.mlir
Co-authored-by: Fabian Mora <6982088+fabianmcg at users.noreply.github.com>
---
mlir/test/Dialect/GPU/barrier-elimination.mlir | 2 ++
1 file changed, 2 insertions(+)
diff --git a/mlir/test/Dialect/GPU/barrier-elimination.mlir b/mlir/test/Dialect/GPU/barrier-elimination.mlir
index 3595b600143a9d..7f6619adcd78f9 100644
--- a/mlir/test/Dialect/GPU/barrier-elimination.mlir
+++ b/mlir/test/Dialect/GPU/barrier-elimination.mlir
@@ -188,6 +188,8 @@ func.func @nested_loop_barrier_only() attributes {__parallel_region_boundary_for
%c0 = arith.constant 0 : index
%c42 = arith.constant 42 : index
%c1 = arith.constant 1 : index
+ // Note: the barrier can be removed and as consequence the loops get folded
+ // by the greedy rewriter.
// CHECK-NOT: scf.for
// CHECK-NOT: gpu.barrier
scf.for %j = %c0 to %c42 step %c1 {
More information about the Mlir-commits
mailing list