[Mlir-commits] [mlir] [MLIR][Affine] Fix copy generation for missing memref definition depth check (PR #129187)

Thu Feb 27 21:12:29 PST 2025

llvmbot wrote:




@llvm/pr-subscribers-mlir

Author: Uday Bondhugula (bondhugula)

<details>
<summary>Changes</summary>

Fixes: https://github.com/llvm/llvm-project/issues/122210


---
Full diff: https://github.com/llvm/llvm-project/pull/129187.diff


2 Files Affected:

- (modified) mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp (+37-25) 
- (modified) mlir/test/Dialect/Affine/affine-data-copy.mlir (+77) 


``````````diff

diff --git a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
index 82b96e9876a6f..f4a41343e9a4d 100644
--- a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
+++ b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
@@ -1828,14 +1828,14 @@ static void getMultiLevelStrides(const MemRefRegion &region,
   }
 }
 
-/// Generates a point-wise copy from/to `memref' to/from `fastMemRef' and
-/// returns the outermost AffineForOp of the copy loop nest. `lbMaps` and
-/// `ubMaps` along with `lbOperands` and `ubOperands` hold the lower and upper
-/// bound information for the copy loop nest. `fastBufOffsets` contain the
-/// expressions to be subtracted out from the respective copy loop iterators in
-/// order to index the fast buffer. If `copyOut' is true, generates a copy-out;
-/// otherwise a copy-in. Builder `b` should be set to the point the copy nest is
-/// inserted.
+/// Generates a point-wise copy from/to a non-zero ranked `memref' to/from
+/// `fastMemRef' and returns the outermost AffineForOp of the copy loop nest.
+/// `lbMaps` and `ubMaps` along with `lbOperands` and `ubOperands` hold the
+/// lower and upper bound information for the copy loop nest. `fastBufOffsets`
+/// contain the expressions to be subtracted out from the respective copy loop
+/// iterators in order to index the fast buffer. If `copyOut' is true, generates
+/// a copy-out; otherwise a copy-in. Builder `b` should be set to the point the
+/// copy nest is inserted.
 //
 /// The copy-in nest is generated as follows as an example for a 2-d region:
 /// for x = ...
@@ -1856,6 +1856,8 @@ generatePointWiseCopy(Location loc, Value memref, Value fastMemRef,
   }));
 
   unsigned rank = cast<MemRefType>(memref.getType()).getRank();
+  // A copy nest can't be generated for 0-ranked memrefs.
+  assert(rank != 0 && "non-zero rank memref expected");
   assert(lbMaps.size() == rank && "wrong number of lb maps");
   assert(ubMaps.size() == rank && "wrong number of ub maps");
 
@@ -1919,19 +1921,20 @@ emitRemarkForBlock(Block &block) {
   return block.getParentOp()->emitRemark();
 }
 
-/// Creates a buffer in the faster memory space for the specified memref region;
-/// generates a copy from the lower memory space to this one, and replaces all
-/// loads/stores in the block range [`begin', `end') of `block' to load/store
-/// from that buffer. Returns failure if copies could not be generated due to
-/// yet unimplemented cases. `copyInPlacementStart` and `copyOutPlacementStart`
-/// in copyPlacementBlock specify the insertion points where the incoming copies
-/// and outgoing copies, respectively, should be inserted (the insertion happens
-/// right before the insertion point). Since `begin` can itself be invalidated
-/// due to the memref rewriting done from this method, the output argument
-/// `nBegin` is set to its replacement (set to `begin` if no invalidation
-/// happens). Since outgoing copies could have  been inserted at `end`, the
-/// output argument `nEnd` is set to the new end. `sizeInBytes` is set to the
-/// size of the fast buffer allocated.
+/// Creates a buffer in the faster memory space for the specified memref region
+/// (memref has to be non-zero ranked); generates a copy from the lower memory
+/// space to this one, and replaces all loads/stores in the block range
+/// [`begin', `end') of `block' to load/store from that buffer. Returns failure
+/// if copies could not be generated due to yet unimplemented cases.
+/// `copyInPlacementStart` and `copyOutPlacementStart` in copyPlacementBlock
+/// specify the insertion points where the incoming copies and outgoing copies,
+/// respectively, should be inserted (the insertion happens right before the
+/// insertion point). Since `begin` can itself be invalidated due to the memref
+/// rewriting done from this method, the output argument `nBegin` is set to its
+/// replacement (set to `begin` if no invalidation happens). Since outgoing
+/// copies could have  been inserted at `end`, the output argument `nEnd` is set
+/// to the new end. `sizeInBytes` is set to the size of the fast buffer
+/// allocated.
 static LogicalResult generateCopy(
     const MemRefRegion &region, Block *block, Block::iterator begin,
     Block::iterator end, Block *copyPlacementBlock,
@@ -1982,6 +1985,11 @@ static LogicalResult generateCopy(
   SmallVector<Value, 4> bufIndices;
 
   unsigned rank = memRefType.getRank();
+  if (rank == 0) {
+    LLVM_DEBUG(llvm::dbgs() << "Non-zero ranked memrefs supported\n");
+    return failure();
+  }
+
   SmallVector<int64_t, 4> fastBufferShape;
 
   // Compute the extents of the buffer.
@@ -2322,17 +2330,21 @@ mlir::affine::affineDataCopyGenerate(Block::iterator begin, Block::iterator end,
       memref = storeOp.getMemRef();
       memrefType = storeOp.getMemRefType();
     }
-    // Neither load nor a store op.
+    // Not an affine.load/store op.
     if (!memref)
       return;
 
-    auto memorySpaceAttr =
-        dyn_cast_or_null<IntegerAttr>(memrefType.getMemorySpace());
     if ((filterMemRef.has_value() && filterMemRef != memref) ||
-        (memorySpaceAttr &&
+        (isa_and_nonnull<IntegerAttr>(memrefType.getMemorySpace()) &&
          memrefType.getMemorySpaceAsInt() != copyOptions.slowMemorySpace))
       return;
 
+    if (!memref.getParentRegion()->isAncestor(block->getParent())) {
+      LLVM_DEBUG(llvm::dbgs() << "memref definition is inside of the depth at "
+                                 "which copy-in/copy-out would happen\n");
+      return;
+    }
+
     // Compute the MemRefRegion accessed.
     auto region = std::make_unique<MemRefRegion>(opInst->getLoc());
     if (failed(region->compute(opInst, copyDepth, /*sliceState=*/nullptr,
diff --git a/mlir/test/Dialect/Affine/affine-data-copy.mlir b/mlir/test/Dialect/Affine/affine-data-copy.mlir
index 5615acae5ecc4..453a0eabc4fdd 100644
--- a/mlir/test/Dialect/Affine/affine-data-copy.mlir
+++ b/mlir/test/Dialect/Affine/affine-data-copy.mlir
@@ -354,3 +354,80 @@ func.func @arbitrary_memory_space() {
   }
   return
 }
+
+// CHECK-LABEL: zero_ranked
+func.func @zero_ranked(%3:memref<480xi1>) {
+  %false = arith.constant false
+  %4 = memref.alloc() {alignment = 128 : i64} : memref<i1>
+  affine.store %false, %4[] : memref<i1>
+  %5 = memref.alloc() {alignment = 128 : i64} : memref<i1>
+  memref.copy %4, %5 : memref<i1> to memref<i1>
+  affine.for %arg0 = 0 to 480 {
+    %11 = affine.load %3[%arg0] : memref<480xi1>
+    %12 = affine.load %5[] : memref<i1>
+    %13 = arith.cmpi slt, %11, %12 : i1
+    %14 = arith.select %13, %11, %12 : i1
+    affine.store %14, %5[] : memref<i1>
+  }
+  return
+}
+
+// CHECK-LABEL: func @scalar_memref_copy_without_dma
+func.func @scalar_memref_copy_without_dma() {
+    %false = arith.constant false
+    %4 = memref.alloc() {alignment = 128 : i64} : memref<i1>
+    affine.store %false, %4[] : memref<i1>
+
+    // CHECK: %[[FALSE:.*]] = arith.constant false
+    // CHECK: %[[MEMREF:.*]] = memref.alloc() {alignment = 128 : i64} : memref<i1>
+    // CHECK: affine.store %[[FALSE]], %[[MEMREF]][] : memref<i1>
+    return
+}
+
+// CHECK-LABEL: func @scalar_memref_copy_in_loop
+func.func @scalar_memref_copy_in_loop(%3:memref<480xi1>) {
+  %false = arith.constant false
+  %4 = memref.alloc() {alignment = 128 : i64} : memref<i1>
+  affine.store %false, %4[] : memref<i1>
+  %5 = memref.alloc() {alignment = 128 : i64} : memref<i1>
+  memref.copy %4, %5 : memref<i1> to memref<i1>
+  affine.for %arg0 = 0 to 480 {
+    %11 = affine.load %3[%arg0] : memref<480xi1>
+    %12 = affine.load %5[] : memref<i1>
+    %13 = arith.cmpi slt, %11, %12 : i1
+    %14 = arith.select %13, %11, %12 : i1
+    affine.store %14, %5[] : memref<i1>
+  }
+
+  // CHECK: %[[FALSE:.*]] = arith.constant false
+  // CHECK: %[[MEMREF:.*]] = memref.alloc() {alignment = 128 : i64} : memref<i1>
+  // CHECK: affine.store %[[FALSE]], %[[MEMREF]][] : memref<i1>
+  // CHECK: %[[TARGET:.*]] = memref.alloc() {alignment = 128 : i64} : memref<i1>
+  // CHECK: memref.copy %alloc, %[[TARGET]] : memref<i1> to memref<i1>
+  // CHECK: %[[FAST_MEMREF:.*]] = memref.alloc() : memref<480xi1>
+  // CHECK: affine.for %{{.*}} = 0 to 480 {
+  // CHECK:   %{{.*}} = affine.load %arg0[%{{.*}}] : memref<480xi1>
+  // CHECK:   affine.store %{{.*}}, %[[FAST_MEMREF]][%{{.*}}] : memref<480xi1>
+  // CHECK: }
+  // CHECK: affine.for %arg1 = 0 to 480 {
+  // CHECK:   %[[L0:.*]] = affine.load %[[FAST_MEMREF]][%arg1] : memref<480xi1>
+  // CHECK:   %[[L1:.*]] = affine.load %[[TARGET]][] : memref<i1>
+  // CHECK:   %[[CMPI:.*]] = arith.cmpi slt, %[[L0]], %[[L1]] : i1
+  // CHECK:   %[[SELECT:.*]] = arith.select %[[CMPI]], %[[L0]], %[[L1]] : i1
+  // CHECK:   affine.store %[[SELECT]], %[[TARGET]][] : memref<i1>
+  // CHECK: }
+  // CHECK: memref.dealloc %[[FAST_MEMREF]] : memref<480xi1>
+  return
+}
+
+// CHECK-LABEL: func @memref_def_inside
+func.func @memref_def_inside(%arg0: index) {
+  %0 = llvm.mlir.constant(1.000000e+00 : f32) : f32
+  // No copy generation can happen at this depth given the definition inside.
+  affine.for %arg1 = 0 to 29 {
+    %alloc_7 = memref.alloc() : memref<1xf32>
+    // CHECK: affine.store {{.*}} : memref<1xf32>
+    affine.store %0, %alloc_7[0] : memref<1xf32>
+  }
+  return
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/129187