[Mlir-commits] [mlir] [MLIR][Affine] Fix affine data copy generation copy placement for missing memref definition check (PR #130750)
Uday Bondhugula
llvmlistbot at llvm.org
Tue Mar 11 03:52:27 PDT 2025
https://github.com/bondhugula created https://github.com/llvm/llvm-project/pull/130750
This was exposed with the test case previously added but when performing
generation with limited memory capacity.
>From aaf864b683bfa805b0f74a3295c03bf37eb7c4f2 Mon Sep 17 00:00:00 2001
From: Uday Bondhugula <uday at polymagelabs.com>
Date: Tue, 11 Mar 2025 16:19:39 +0530
Subject: [PATCH] [MLIR][Affine] Fix affine data copy generation copy placement
for missing memref definition check
This was exposed with the test case previously added but when performing
generation with limited memory capacity.
---
mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp | 34 +++++++++++++------
.../test/Dialect/Affine/affine-data-copy.mlir | 16 +++++++++
.../lib/Dialect/Affine/TestAffineDataCopy.cpp | 19 ++++++++---
3 files changed, 54 insertions(+), 15 deletions(-)
diff --git a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
index 5c94ec2985c3d..b58bf3f271d47 100644
--- a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
+++ b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
@@ -1774,23 +1774,37 @@ findHighestBlockForPlacement(const MemRefRegion ®ion, Block &block,
SmallVector<Value, 4> symbols;
cst->getValues(cst->getNumDimVars(), cst->getNumDimAndSymbolVars(), &symbols);
- SmallVector<AffineForOp, 4> enclosingFors;
- getAffineForIVs(*block.begin(), &enclosingFors);
+ SmallVector<Operation *, 4> enclosingAffineOps;
+ getEnclosingAffineOps(*block.begin(), &enclosingAffineOps);
// Walk up loop parents till we find an IV on which this region is
- // symbolic/variant.
- auto it = enclosingFors.rbegin();
- for (auto e = enclosingFors.rend(); it != e; ++it) {
+ // symbolic/variant or we hit `hoistGuard`.
+ auto it = enclosingAffineOps.rbegin();
+ AffineForOp lastInvariantFor;
+ for (auto e = enclosingAffineOps.rend(); it != e; ++it) {
+ Operation *enclosingOp = *it;
+ // We can't hoist past the definition of the memref being copied.
+ Value memref = region.memref;
+ if (!memref.getParentRegion()->isAncestor(enclosingOp->getParentRegion())) {
+ LLVM_DEBUG(
+ llvm::dbgs()
+ << "memref definition will end up not dominating hoist location\n");
+ break;
+ }
+
+ auto affineFor = dyn_cast<AffineForOp>(enclosingOp);
+ if (!affineFor)
+ break;
// TODO: also need to be checking this for regions symbols that
// aren't loop IVs, whether we are within their resp. defs' dominance scope.
- if (llvm::is_contained(symbols, it->getInductionVar()))
+ if (llvm::is_contained(symbols, affineFor.getInductionVar()))
break;
+ lastInvariantFor = affineFor;
}
- if (it != enclosingFors.rbegin()) {
- auto lastInvariantIV = *std::prev(it);
- *copyInPlacementStart = Block::iterator(lastInvariantIV.getOperation());
+ if (it != enclosingAffineOps.rbegin()) {
+ *copyInPlacementStart = Block::iterator(lastInvariantFor);
*copyOutPlacementStart = std::next(*copyInPlacementStart);
- *copyPlacementBlock = lastInvariantIV->getBlock();
+ *copyPlacementBlock = lastInvariantFor->getBlock();
} else {
*copyInPlacementStart = begin;
*copyOutPlacementStart = end;
diff --git a/mlir/test/Dialect/Affine/affine-data-copy.mlir b/mlir/test/Dialect/Affine/affine-data-copy.mlir
index 1f3aac3401bea..a1f0d952e7c63 100644
--- a/mlir/test/Dialect/Affine/affine-data-copy.mlir
+++ b/mlir/test/Dialect/Affine/affine-data-copy.mlir
@@ -8,6 +8,7 @@
// affine.load op in the innermost loop as a filter.
// RUN: mlir-opt %s -split-input-file -test-affine-data-copy='memref-filter' | FileCheck %s --check-prefix=FILTER
// RUN: mlir-opt %s -split-input-file -test-affine-data-copy='for-memref-region' | FileCheck %s --check-prefix=MEMREF_REGION
+// RUN: mlir-opt %s -split-input-file -test-affine-data-copy='capacity-kib=32' | FileCheck %s --check-prefix=LIMITED-MEM
// -copy-skip-non-stride-loops forces the copies to be placed right inside the
// tile space loops, avoiding the sensitivity of copy placement depth to memory
@@ -23,6 +24,7 @@
// CHECK-LABEL: func @matmul
// FILTER-LABEL: func @matmul
+// LIMITED-MEM-LABEL: func @matmul
func.func @matmul(%A: memref<4096x4096xf32>, %B: memref<4096x4096xf32>, %C: memref<4096x4096xf32>) -> memref<4096x4096xf32> {
affine.for %i = 0 to 4096 step 128 {
affine.for %j = 0 to 4096 step 128 {
@@ -43,6 +45,7 @@ func.func @matmul(%A: memref<4096x4096xf32>, %B: memref<4096x4096xf32>, %C: memr
}
}
return %C : memref<4096x4096xf32>
+ // LIMITED-MEM: return
}
// Buffers of size 128x128 get created here for all three matrices.
@@ -421,6 +424,7 @@ func.func @scalar_memref_copy_in_loop(%3:memref<480xi1>) {
}
// CHECK-LABEL: func @memref_def_inside
+// LIMITED-MEM-LABEL: func @memref_def_inside
func.func @memref_def_inside(%arg0: index) {
%0 = llvm.mlir.constant(1.000000e+00 : f32) : f32
// No copy generation can happen at this depth given the definition inside.
@@ -429,5 +433,17 @@ func.func @memref_def_inside(%arg0: index) {
// CHECK: affine.store {{.*}} : memref<1xf32>
affine.store %0, %alloc_7[0] : memref<1xf32>
}
+
+ // With the limited capacity specified, buffer generation happens at the
+ // innermost depth. Tests that copy-placement is proper and respects the
+ // memref definition.
+
+ // LIMITED-MEM: affine.for %{{.*}} = 0 to 29
+ // LIMITED-MEM-NEXT: memref.alloc() : memref<1xf32>
+ // LIMITED-MEM-NEXT: memref.alloc() : memref<1xf32>
+ // LIMITED-MEM-NEXT: affine.store %{{.*}}, %{{.*}}[0] : memref<1xf32>
+ // LIMITED-MEM-NEXT: affine.load %{{.*}}[%c0{{.*}}] : memref<1xf32>
+ // LIMITED-MEM-NEXT: affine.store %{{.*}}, %{{.*}}[0] : memref<1xf32>
+ // LIMITED-MEM-NEXT: memref.dealloc %{{.*}} : memref<1xf32>
return
}
diff --git a/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp b/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp
index 404f34ebee17a..d6aaa6faf94cb 100644
--- a/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp
+++ b/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp
@@ -53,6 +53,11 @@ struct TestAffineDataCopy
*this, "for-memref-region",
llvm::cl::desc("Test copy generation for a single memref region"),
llvm::cl::init(false)};
+ Option<uint64_t> clCapacityLimit{
+ *this, "capacity-kib",
+ llvm::cl::desc("Test copy generation enforcing a limit of capacity "
+ "(default: unlimited)"),
+ llvm::cl::init(UINT64_MAX)};
};
} // namespace
@@ -73,24 +78,24 @@ void TestAffineDataCopy::runOnOperation() {
auto loopNest = depthToLoops[0][0];
auto innermostLoop = depthToLoops[innermostLoopIdx][0];
AffineLoadOp load;
+ // For simplicity, these options are tested on the first memref being loaded
+ // from in the innermost loop.
if (clMemRefFilter || clTestGenerateCopyForMemRegion) {
- // Gather MemRef filter. For simplicity, we use the first loaded memref
- // found in the innermost loop.
for (auto &op : *innermostLoop.getBody()) {
if (auto ld = dyn_cast<AffineLoadOp>(op)) {
load = ld;
break;
}
}
+ if (!load)
+ return;
}
- if (!load)
- return;
AffineCopyOptions copyOptions = {/*generateDma=*/false,
/*slowMemorySpace=*/0,
/*fastMemorySpace=*/0,
/*tagMemorySpace=*/0,
- /*fastMemCapacityBytes=*/32 * 1024 * 1024UL};
+ /*fastMemCapacityBytes=*/clCapacityLimit};
DenseSet<Operation *> copyNests;
if (clMemRefFilter) {
if (failed(affineDataCopyGenerate(loopNest, copyOptions, load.getMemRef(),
@@ -103,6 +108,10 @@ void TestAffineDataCopy::runOnOperation() {
return;
if (failed(generateCopyForMemRegion(region, loopNest, copyOptions, result)))
return;
+ } else if (failed(affineDataCopyGenerate(loopNest, copyOptions,
+ /*filterMemref=*/std::nullopt,
+ copyNests))) {
+ return;
}
// Promote any single iteration loops in the copy nests and simplify
More information about the Mlir-commits
mailing list