[Mlir-commits] [mlir] [MLIR][Affine] Fix affine data copy generation copy placement for missing memref definition check (PR #130750)

Uday Bondhugula llvmlistbot at llvm.org
Tue Mar 11 03:52:27 PDT 2025


https://github.com/bondhugula created https://github.com/llvm/llvm-project/pull/130750

This was exposed with the test case previously added but when performing
generation with limited memory capacity.


>From aaf864b683bfa805b0f74a3295c03bf37eb7c4f2 Mon Sep 17 00:00:00 2001
From: Uday Bondhugula <uday at polymagelabs.com>
Date: Tue, 11 Mar 2025 16:19:39 +0530
Subject: [PATCH] [MLIR][Affine] Fix affine data copy generation copy placement
 for missing memref definition check

This was exposed with the test case previously added but when performing
generation with limited memory capacity.
---
 mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp   | 34 +++++++++++++------
 .../test/Dialect/Affine/affine-data-copy.mlir | 16 +++++++++
 .../lib/Dialect/Affine/TestAffineDataCopy.cpp | 19 ++++++++---
 3 files changed, 54 insertions(+), 15 deletions(-)

diff --git a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
index 5c94ec2985c3d..b58bf3f271d47 100644
--- a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
+++ b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
@@ -1774,23 +1774,37 @@ findHighestBlockForPlacement(const MemRefRegion &region, Block &block,
   SmallVector<Value, 4> symbols;
   cst->getValues(cst->getNumDimVars(), cst->getNumDimAndSymbolVars(), &symbols);
 
-  SmallVector<AffineForOp, 4> enclosingFors;
-  getAffineForIVs(*block.begin(), &enclosingFors);
+  SmallVector<Operation *, 4> enclosingAffineOps;
+  getEnclosingAffineOps(*block.begin(), &enclosingAffineOps);
   // Walk up loop parents till we find an IV on which this region is
-  // symbolic/variant.
-  auto it = enclosingFors.rbegin();
-  for (auto e = enclosingFors.rend(); it != e; ++it) {
+  // symbolic/variant or we hit `hoistGuard`.
+  auto it = enclosingAffineOps.rbegin();
+  AffineForOp lastInvariantFor;
+  for (auto e = enclosingAffineOps.rend(); it != e; ++it) {
+    Operation *enclosingOp = *it;
+    // We can't hoist past the definition of the memref being copied.
+    Value memref = region.memref;
+    if (!memref.getParentRegion()->isAncestor(enclosingOp->getParentRegion())) {
+      LLVM_DEBUG(
+          llvm::dbgs()
+          << "memref definition will end up not dominating hoist location\n");
+      break;
+    }
+
+    auto affineFor = dyn_cast<AffineForOp>(enclosingOp);
+    if (!affineFor)
+      break;
     // TODO: also need to be checking this for regions symbols that
     // aren't loop IVs, whether we are within their resp. defs' dominance scope.
-    if (llvm::is_contained(symbols, it->getInductionVar()))
+    if (llvm::is_contained(symbols, affineFor.getInductionVar()))
       break;
+    lastInvariantFor = affineFor;
   }
 
-  if (it != enclosingFors.rbegin()) {
-    auto lastInvariantIV = *std::prev(it);
-    *copyInPlacementStart = Block::iterator(lastInvariantIV.getOperation());
+  if (it != enclosingAffineOps.rbegin()) {
+    *copyInPlacementStart = Block::iterator(lastInvariantFor);
     *copyOutPlacementStart = std::next(*copyInPlacementStart);
-    *copyPlacementBlock = lastInvariantIV->getBlock();
+    *copyPlacementBlock = lastInvariantFor->getBlock();
   } else {
     *copyInPlacementStart = begin;
     *copyOutPlacementStart = end;
diff --git a/mlir/test/Dialect/Affine/affine-data-copy.mlir b/mlir/test/Dialect/Affine/affine-data-copy.mlir
index 1f3aac3401bea..a1f0d952e7c63 100644
--- a/mlir/test/Dialect/Affine/affine-data-copy.mlir
+++ b/mlir/test/Dialect/Affine/affine-data-copy.mlir
@@ -8,6 +8,7 @@
 // affine.load op in the innermost loop as a filter.
 // RUN: mlir-opt %s -split-input-file -test-affine-data-copy='memref-filter' | FileCheck %s --check-prefix=FILTER
 // RUN: mlir-opt %s -split-input-file -test-affine-data-copy='for-memref-region' | FileCheck %s --check-prefix=MEMREF_REGION
+// RUN: mlir-opt %s -split-input-file -test-affine-data-copy='capacity-kib=32' | FileCheck %s --check-prefix=LIMITED-MEM
 
 // -copy-skip-non-stride-loops forces the copies to be placed right inside the
 // tile space loops, avoiding the sensitivity of copy placement depth to memory
@@ -23,6 +24,7 @@
 
 // CHECK-LABEL: func @matmul
 // FILTER-LABEL: func @matmul
+// LIMITED-MEM-LABEL: func @matmul
 func.func @matmul(%A: memref<4096x4096xf32>, %B: memref<4096x4096xf32>, %C: memref<4096x4096xf32>) -> memref<4096x4096xf32> {
   affine.for %i = 0 to 4096 step 128 {
     affine.for %j = 0 to 4096 step 128 {
@@ -43,6 +45,7 @@ func.func @matmul(%A: memref<4096x4096xf32>, %B: memref<4096x4096xf32>, %C: memr
     }
   }
   return %C : memref<4096x4096xf32>
+  // LIMITED-MEM: return
 }
 
 // Buffers of size 128x128 get created here for all three matrices.
@@ -421,6 +424,7 @@ func.func @scalar_memref_copy_in_loop(%3:memref<480xi1>) {
 }
 
 // CHECK-LABEL: func @memref_def_inside
+// LIMITED-MEM-LABEL: func @memref_def_inside
 func.func @memref_def_inside(%arg0: index) {
   %0 = llvm.mlir.constant(1.000000e+00 : f32) : f32
   // No copy generation can happen at this depth given the definition inside.
@@ -429,5 +433,17 @@ func.func @memref_def_inside(%arg0: index) {
     // CHECK: affine.store {{.*}} : memref<1xf32>
     affine.store %0, %alloc_7[0] : memref<1xf32>
   }
+
+  // With the limited capacity specified, buffer generation happens at the
+  // innermost depth. Tests that copy-placement is proper and respects the
+  // memref definition.
+
+  // LIMITED-MEM:      affine.for %{{.*}} = 0 to 29
+  // LIMITED-MEM-NEXT:   memref.alloc() : memref<1xf32>
+  // LIMITED-MEM-NEXT:   memref.alloc() : memref<1xf32>
+  // LIMITED-MEM-NEXT:   affine.store %{{.*}}, %{{.*}}[0] : memref<1xf32>
+  // LIMITED-MEM-NEXT:   affine.load %{{.*}}[%c0{{.*}}] : memref<1xf32>
+  // LIMITED-MEM-NEXT:   affine.store %{{.*}}, %{{.*}}[0] : memref<1xf32>
+  // LIMITED-MEM-NEXT:   memref.dealloc %{{.*}} : memref<1xf32>
   return
 }
diff --git a/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp b/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp
index 404f34ebee17a..d6aaa6faf94cb 100644
--- a/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp
+++ b/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp
@@ -53,6 +53,11 @@ struct TestAffineDataCopy
       *this, "for-memref-region",
       llvm::cl::desc("Test copy generation for a single memref region"),
       llvm::cl::init(false)};
+  Option<uint64_t> clCapacityLimit{
+      *this, "capacity-kib",
+      llvm::cl::desc("Test copy generation enforcing a limit of capacity "
+                     "(default: unlimited)"),
+      llvm::cl::init(UINT64_MAX)};
 };
 
 } // namespace
@@ -73,24 +78,24 @@ void TestAffineDataCopy::runOnOperation() {
   auto loopNest = depthToLoops[0][0];
   auto innermostLoop = depthToLoops[innermostLoopIdx][0];
   AffineLoadOp load;
+  // For simplicity, these options are tested on the first memref being loaded
+  // from in the innermost loop.
   if (clMemRefFilter || clTestGenerateCopyForMemRegion) {
-    // Gather MemRef filter. For simplicity, we use the first loaded memref
-    // found in the innermost loop.
     for (auto &op : *innermostLoop.getBody()) {
       if (auto ld = dyn_cast<AffineLoadOp>(op)) {
         load = ld;
         break;
       }
     }
+    if (!load)
+      return;
   }
-  if (!load)
-    return;
 
   AffineCopyOptions copyOptions = {/*generateDma=*/false,
                                    /*slowMemorySpace=*/0,
                                    /*fastMemorySpace=*/0,
                                    /*tagMemorySpace=*/0,
-                                   /*fastMemCapacityBytes=*/32 * 1024 * 1024UL};
+                                   /*fastMemCapacityBytes=*/clCapacityLimit};
   DenseSet<Operation *> copyNests;
   if (clMemRefFilter) {
     if (failed(affineDataCopyGenerate(loopNest, copyOptions, load.getMemRef(),
@@ -103,6 +108,10 @@ void TestAffineDataCopy::runOnOperation() {
       return;
     if (failed(generateCopyForMemRegion(region, loopNest, copyOptions, result)))
       return;
+  } else if (failed(affineDataCopyGenerate(loopNest, copyOptions,
+                                           /*filterMemref=*/std::nullopt,
+                                           copyNests))) {
+    return;
   }
 
   // Promote any single iteration loops in the copy nests and simplify



More information about the Mlir-commits mailing list