[Mlir-commits] [mlir] a8aeb65 - [mlir][memref] Extend multi-buffering transform

Mon Oct 3 11:46:14 PDT 2022

Author: Kirsten Lee
Date: 2022-10-03T18:45:38Z
New Revision: a8aeb651cdae4e687500575108e12c89e540f59c

URL: https://github.com/llvm/llvm-project/commit/a8aeb651cdae4e687500575108e12c89e540f59c
DIFF: https://github.com/llvm/llvm-project/commit/a8aeb651cdae4e687500575108e12c89e540f59c.diff

LOG: [mlir][memref] Extend multi-buffering transform

Extend multi-buffering to simplify the affine map created if any of its operands are constants. This avoids downstream problems where more complex affine.apply operations cannot be expanded.
Transfer attributes from the old allocation to the new allocation.

Reviewed By: ThomasRaoux

Differential Revision: https://reviews.llvm.org/D134894

Added: 
    

Modified: 
    mlir/lib/Dialect/MemRef/Transforms/MultiBuffer.cpp
    mlir/test/Dialect/MemRef/multibuffer.mlir
    mlir/test/Dialect/MemRef/transform-ops.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Dialect/MemRef/Transforms/MultiBuffer.cpp b/mlir/lib/Dialect/MemRef/Transforms/MultiBuffer.cpp
index 75e3746dac8b7..d28a603c9c2b9 100644

--- a/mlir/lib/Dialect/MemRef/Transforms/MultiBuffer.cpp
+++ b/mlir/lib/Dialect/MemRef/Transforms/MultiBuffer.cpp
@@ -104,11 +104,11 @@ FailureOr<memref::AllocOp> mlir::memref::multiBuffer(memref::AllocOp allocOp,
   llvm::Optional<OpFoldResult> singleStep = candidateLoop.getSingleStep();
   if (!inductionVar || !lowerBound || !singleStep)
     return failure();
+
+  if (!dom.dominates(allocOp.getOperation(), candidateLoop))
+    return failure();
+
   OpBuilder builder(candidateLoop);
-  Value stepValue =
-      getOrCreateValue(*singleStep, builder, candidateLoop->getLoc());
-  Value lowerBoundValue =
-      getOrCreateValue(*lowerBound, builder, candidateLoop->getLoc());
   SmallVector<int64_t, 4> newShape(1, multiplier);
   ArrayRef<int64_t> oldShape = allocOp.getType().getShape();
   newShape.append(oldShape.begin(), oldShape.end());
@@ -117,15 +117,28 @@ FailureOr<memref::AllocOp> mlir::memref::multiBuffer(memref::AllocOp allocOp,
                                    allocOp.getType().getMemorySpace());
   builder.setInsertionPoint(allocOp);
   Location loc = allocOp->getLoc();
-  auto newAlloc = builder.create<memref::AllocOp>(loc, newMemref);
+  auto newAlloc = builder.create<memref::AllocOp>(loc, newMemref, ValueRange{},
+                                                  allocOp->getAttrs());
   builder.setInsertionPoint(&candidateLoop.getLoopBody().front(),
                             candidateLoop.getLoopBody().front().begin());
+
+  SmallVector<Value> operands = {*inductionVar};
   AffineExpr induc = getAffineDimExpr(0, allocOp.getContext());
-  AffineExpr init = getAffineDimExpr(1, allocOp.getContext());
-  AffineExpr step = getAffineDimExpr(2, allocOp.getContext());
+  unsigned dimCount = 1;
+  auto getAffineExpr = [&](OpFoldResult e) -> AffineExpr {
+    if (Optional<int64_t> constValue = getConstantIntValue(e)) {
+      return getAffineConstantExpr(*constValue, allocOp.getContext());
+    } else {
+      auto value = getOrCreateValue(e, builder, candidateLoop->getLoc());
+      operands.push_back(value);
+      return getAffineDimExpr(dimCount++, allocOp.getContext());
+    }
+  };
+  auto init = getAffineExpr(*lowerBound);
+  auto step = getAffineExpr(*singleStep);
+
   AffineExpr expr = ((induc - init).floorDiv(step)) % multiplier;
-  auto map = AffineMap::get(3, 0, expr);
-  std::array<Value, 3> operands = {*inductionVar, lowerBoundValue, stepValue};
+  auto map = AffineMap::get(dimCount, 0, expr);
   Value bufferIndex = builder.create<AffineApplyOp>(loc, map, operands);
   SmallVector<OpFoldResult> offsets, sizes, strides;
   offsets.push_back(bufferIndex);

diff  --git a/mlir/test/Dialect/MemRef/multibuffer.mlir b/mlir/test/Dialect/MemRef/multibuffer.mlir
index b70b51e6aa622..4ab7d993e6fd1 100644
--- a/mlir/test/Dialect/MemRef/multibuffer.mlir
+++ b/mlir/test/Dialect/MemRef/multibuffer.mlir
@@ -1,19 +1,19 @@
 // RUN: mlir-opt %s -allow-unregistered-dialect -test-multi-buffering=multiplier=5 -cse -split-input-file | FileCheck %s
 
-// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2) -> (((d0 - d1) floordiv d2) mod 5)>
+// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0) -> (((d0 - 1) floordiv 3) mod 5)>
 
 // CHECK-LABEL: func @multi_buffer
 func.func @multi_buffer(%a: memref<1024x1024xf32>) {
-// CHECK-DAG: %[[A:.*]] = memref.alloc() : memref<5x4x128xf32>
+// CHECK-DAG: %[[A:.*]] = memref.alloc() {someAttribute} : memref<5x4x128xf32>
 // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
 // CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index
-  %0 = memref.alloc() : memref<4x128xf32>
+  %0 = memref.alloc() {someAttribute} : memref<4x128xf32>
   %c1024 = arith.constant 1024 : index
   %c1 = arith.constant 1 : index
   %c3 = arith.constant 3 : index
 // CHECK: scf.for %[[IV:.*]] = %[[C1]]
   scf.for %arg2 = %c1 to %c1024 step %c3 {
-// CHECK: %[[I:.*]] = affine.apply #[[$MAP1]](%[[IV]], %[[C1]], %[[C3]])
+// CHECK: %[[I:.*]] = affine.apply #[[$MAP1]](%[[IV]])
 // CHECK: %[[SV:.*]] = memref.subview %[[A]][%[[I]], 0, 0] [1, 4, 128] [1, 1, 1] : memref<5x4x128xf32> to memref<4x128xf32, strided<[128, 1], offset: ?>>
    %1 = memref.subview %a[%arg2, 0] [4, 128] [1, 1] :
     memref<1024x1024xf32> to memref<4x128xf32, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>>
@@ -32,15 +32,13 @@ func.func @multi_buffer(%a: memref<1024x1024xf32>) {
 // CHECK-LABEL: func @multi_buffer_affine
 func.func @multi_buffer_affine(%a: memref<1024x1024xf32>) {
 // CHECK-DAG: %[[A:.*]] = memref.alloc() : memref<5x4x128xf32>
-// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
-// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index
   %0 = memref.alloc() : memref<4x128xf32>
   %c1024 = arith.constant 1024 : index
   %c1 = arith.constant 1 : index
   %c3 = arith.constant 3 : index
 // CHECK: affine.for %[[IV:.*]] = 1
   affine.for %arg2 = 1 to 1024 step 3 {
-// CHECK: %[[I:.*]] = affine.apply #[[$MAP1]](%[[IV]], %[[C1]], %[[C3]])
+// CHECK: %[[I:.*]] = affine.apply #[[$MAP1]](%[[IV]])
 // CHECK: %[[SV:.*]] = memref.subview %[[A]][%[[I]], 0, 0] [1, 4, 128] [1, 1, 1] : memref<5x4x128xf32> to memref<4x128xf32, strided<[128, 1], offset: ?>>
    %1 = memref.subview %a[%arg2, 0] [4, 128] [1, 1] :
     memref<1024x1024xf32> to memref<4x128xf32, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>>
@@ -56,7 +54,7 @@ func.func @multi_buffer_affine(%a: memref<1024x1024xf32>) {
 
 // -----
 
-// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1, d2) -> (((d0 - d1) floordiv d2) mod 5)>
+// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0) -> (((d0 - 1) floordiv 3) mod 5)>
 
 // CHECK-LABEL: func @multi_buffer_subview_use
 func.func @multi_buffer_subview_use(%a: memref<1024x1024xf32>) {
@@ -69,7 +67,7 @@ func.func @multi_buffer_subview_use(%a: memref<1024x1024xf32>) {
   %c3 = arith.constant 3 : index
 // CHECK: scf.for %[[IV:.*]] = %[[C1]]
   scf.for %arg2 = %c1 to %c1024 step %c3 {
-// CHECK: %[[I:.*]] = affine.apply #[[$MAP1]](%[[IV]], %[[C1]], %[[C3]])
+// CHECK: %[[I:.*]] = affine.apply #[[$MAP1]](%[[IV]])
 // CHECK: %[[SV:.*]] = memref.subview %[[A]][%[[I]], 0, 0] [1, 4, 128] [1, 1, 1] : memref<5x4x128xf32> to memref<4x128xf32, strided<[128, 1], offset: ?>>
    %1 = memref.subview %a[%arg2, 0] [4, 128] [1, 1] :
     memref<1024x1024xf32> to memref<4x128xf32, affine_map<(d0, d1)[s0] -> (d0 * 1024 + s0 + d1)>>

diff  --git a/mlir/test/Dialect/MemRef/transform-ops.mlir b/mlir/test/Dialect/MemRef/transform-ops.mlir
index 5b6f70c7be8ec..9216ebcc554be 100644
--- a/mlir/test/Dialect/MemRef/transform-ops.mlir
+++ b/mlir/test/Dialect/MemRef/transform-ops.mlir
@@ -1,6 +1,6 @@
 // RUN: mlir-opt %s -test-transform-dialect-interpreter -verify-diagnostics -allow-unregistered-dialect | FileCheck %s
 
-// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2) -> (((d0 - d1) floordiv d2) mod 2)>
+// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0) -> ((d0 floordiv 4) mod 2)>
 // CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0)[s0] -> (d0 + s0)>
 
 // CHECK-LABEL: func @multi_buffer
@@ -17,7 +17,7 @@ func.func @multi_buffer(%in: memref<16xf32>) {
 
   // CHECK: scf.for %[[IV:.*]] = %[[C0]]
   scf.for %i0 = %c0 to %c16 step %c4 {
-    // CHECK: %[[I:.*]] = affine.apply #[[$MAP0]](%[[IV]], %[[C0]], %[[C4]])
+    // CHECK: %[[I:.*]] = affine.apply #[[$MAP0]](%[[IV]])
     // CHECK: %[[SV:.*]] = memref.subview %[[A]][%[[I]], 0] [1, 4] [1, 1] : memref<2x4xf32> to memref<4xf32, strided<[1], offset: ?>>
     %1 = memref.subview %in[%i0] [4] [1] : memref<16xf32> to memref<4xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
     // CHECK: memref.copy %{{.*}}, %[[SV]] : memref<4xf32, #[[$MAP1]]> to memref<4xf32, strided<[1], offset: ?>>