[PATCH] D147790: [mlir] [bufferization] Fix dealloc errors.

Fri Apr 7 08:09:07 PDT 2023

cxy-1993 created this revision.
cxy-1993 added reviewers: springerm, mehdi_amini, rriddle.
cxy-1993 added a project: MLIR.
Herald added subscribers: bviyer, Moerafaat, zero9178, bzcheeseman, sdasgup3, Groverkss, wenzhicui, wrengr, cota, teijeong, rdzhabarov, tatianashp, msifontes, jurahul, Kayjukh, grosul1, Joonsoo, liufengdb, aartbik, mgester, arpith-jacob, antiagainst, shauheen, thopre.
Herald added a project: All.
cxy-1993 requested review of this revision.
Herald added a reviewer: nicolasvasilache.
Herald added subscribers: stephenneuendorffer, nicolasvasilache.

This patch solves problem that bufferization pass generate dealloc on inappropriate location.

Let me know if you need RFC on this patch, thanks for your time.

In fact, this patch can not pass test. If input IR is:

   mlir
  #map = affine_map<(d0) -> (d0 * 5)>                                                
  func.func @ternimator_use_not_deallocated(%arg0: tensor<10x10xf32>) -> tensor<10x10xf32> {
    %0 = scf.forall (%arg1, %arg2) in (2, 2) shared_outs(%arg3 = %arg0) -> (tensor<10x10xf32>) {
      %1 = bufferization.alloc_tensor() : tensor<5x5xf32>                            
      %2 = affine.apply #map(%arg1)                                                  
      %3 = affine.apply #map(%arg2)                                                  
      scf.forall.in_parallel {                                                       
        tensor.parallel_insert_slice %1 into %arg3[%2, %3] [5, 5] [1, 1] : tensor<5x5xf32> into tensor<10x10xf32>
      }                                                                              
    }                                                                                
    return %0 : tensor<10x10xf32>                                                    
  }

when bufferize with param:

  -one-shot-bufferize="allow-unknown-ops copy-before-write"

will add copy  before tensor.parallel_insert_slice and cause failure. Please kindly show me how to solve this problem, thanks.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D147790

Files:
  mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
  mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir


Index: mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir
===================================================================

--- mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir
+++ mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir
@@ -199,3 +199,26 @@
   %3 = tensor.extract %0[%pos3] : tensor<100xf32>
   return %2, %3 : f32, f32
 }
+
+// -----
+
+// CHECK-LABEL: func @ternimator_use_not_deallocated
+#map = affine_map<(d0) -> (d0 * 5)>
+func.func @ternimator_use_not_deallocated(%arg0: tensor<10x10xf32>) -> tensor<10x10xf32> {
+  // CHECK: %[[alloc:.*]] = memref.alloc
+  // CHECK: memref.copy {{.*}} %[[alloc]]
+  // CHECK: scf.forall ({{.*}}) in (2, 2) {
+  %0 = scf.forall (%arg1, %arg2) in (2, 2) shared_outs(%arg3 = %arg0) -> (tensor<10x10xf32>) {
+    // CHECK: %[[local_alloc:.*]] = memref.alloc
+    // CHECK-NOT: memref.dealloc
+    // CHECK: %[[subview:.*]] = memref.subview %[[alloc]]
+    // CHECK: memref.copy %[[local_alloc]], %[[subview]]
+    %1 = bufferization.alloc_tensor() : tensor<5x5xf32>
+    %2 = affine.apply #map(%arg1)
+    %3 = affine.apply #map(%arg2)
+    scf.forall.in_parallel {
+      tensor.parallel_insert_slice %1 into %arg3[%2, %3] [5, 5] [1, 1] : tensor<5x5xf32> into tensor<10x10xf32>
+    }
+  }
+  return %0 : tensor<10x10xf32>
+}
Index: mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
===================================================================
--- mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
+++ mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
@@ -279,6 +279,9 @@
          "expected that op allocates");
 
   AnalysisState analysisState(options);
+  if (analysisState.isTensorYielded(opResult))
+    return false;
+
   if (op->hasAttr(BufferizationDialect::kEscapeAttrName)) {
     // AllocTensorOp has one result.
     ArrayAttr escapeAttr =
@@ -287,12 +290,7 @@
   }
 
   // No "escape" annotation found.
-  if (options.createDeallocs) {
-    // Perform an ad-hoc analysis.
-    return !analysisState.isTensorYielded(opResult);
-  }
-
-  return false;
+  return options.createDeallocs;
 }
 
 //===----------------------------------------------------------------------===//
@@ -611,9 +609,13 @@
     if (isa<ToMemrefOp>(op))
       return true;
 
-    // Check if the op is returning/yielding.
-    if (isRegionReturnLike(op))
-      return true;
+    // Check if the op is recursively returning/yielding.
+    Operation *parentOp = op;
+    do {
+      if (isRegionReturnLike(parentOp) ||
+          parentOp->hasTrait<OpTrait::IsTerminator>())
+        return true;
+    } while ((parentOp = parentOp->getParentOp()));
 
     // Add all aliasing OpResults to the worklist.
     // Note: In the absence of detailed analysis information (e.g., there may be


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D147790.511696.patch
Type: text/x-patch
Size: 2813 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230407/e89b8ac8/attachment.bin>