[Mlir-commits] [mlir] ce9ce66 - [mlir][sparse] fix a memory leakage when converting from a tensor slice

Mon Feb 13 14:44:18 PST 2023

Author: Peiming Liu
Date: 2023-02-13T22:44:12Z
New Revision: ce9ce66b8deffef133b6ce11115fd88471544a5b

URL: https://github.com/llvm/llvm-project/commit/ce9ce66b8deffef133b6ce11115fd88471544a5b
DIFF: https://github.com/llvm/llvm-project/commit/ce9ce66b8deffef133b6ce11115fd88471544a5b.diff

LOG: [mlir][sparse] fix a memory leakage when converting from a tensor slice

Reviewed By: aartbik

Differential Revision: https://reviews.llvm.org/D143929

Added: 
    

Modified: 
    mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp
    mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_foreach_slices.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp
index df19b61dbede1..e9724bff11e4d 100644

--- a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp
@@ -15,6 +15,7 @@
 #include "mlir/Dialect/Linalg/Utils/Utils.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/IR/SCF.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
 
 using namespace mlir;
 using namespace mlir::sparse_tensor;
@@ -206,7 +207,14 @@ void LoopEmitter::initializeLoopEmit(OpBuilder &builder, Location loc,
     Type elementType = rtp.getElementType();
     if (!enc) {
       // Non-annotated dense tensors.
-      auto denseTp = MemRefType::get(shape, elementType);
+      BaseMemRefType denseTp = MemRefType::get(shape, elementType);
+
+      // TODO: if we unconditionally use fully dynamic layout here, it breaks
+      // some vectorization passes which requires static stride = 1.
+      // Is it possible to call vectorization pass after bufferization?
+      if (llvm::isa_and_nonnull<tensor::ExtractSliceOp>(tensor.getDefiningOp()))
+        denseTp = bufferization::getMemRefTypeWithFullyDynamicLayout(rtp);
+
       Value denseVal =
           builder.create<bufferization::ToMemrefOp>(loc, denseTp, tensor);
       // Dense outputs need special handling.

diff  --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_foreach_slices.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_foreach_slices.mlir
index d9be6ab8b127c..548818ccb02db 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_foreach_slices.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_foreach_slices.mlir
@@ -69,25 +69,24 @@ module {
     //
     call @foreach_print_slice(%a) : (tensor<4x4xf64, #CSR_SLICE>) -> ()
 
-    // FIXME: investigate why a tensor copy is inserted for this slice
-//    %dense = tensor.extract_slice %sa[1, 1][4, 4][1, 2] : tensor<8x8xf64> to
-//                                                          tensor<4x4xf64>
-//    %b = sparse_tensor.convert %dense : tensor<4x4xf64> to tensor<4x4xf64, #CSR>
-//    // Foreach on sparse tensor instead of slice they should yield the same result.
-//    //
-//    // C_HECK-NEXT: 1
-//    // C_HECK-NEXT: 0
-//    // C_HECK-NEXT: 2.3
-//    // C_HECK-NEXT: 2
-//    // C_HECK-NEXT: 3
-//    // C_HECK-NEXT: 1
-//    // C_HECK-NEXT: 3
-//    // C_HECK-NEXT: 2
-//    // C_HECK-NEXT: 2.1
-//    //
-//    call @foreach_print_non_slice(%b) : (tensor<4x4xf64, #CSR>) -> ()
-//    bufferization.dealloc_tensor %b : tensor<4x4xf64, #CSR>
+    %dense = tensor.extract_slice %sa[1, 1][4, 4][1, 2] : tensor<8x8xf64> to
+                                                          tensor<4x4xf64>
+    %b = sparse_tensor.convert %dense : tensor<4x4xf64> to tensor<4x4xf64, #CSR>
+    // Foreach on sparse tensor instead of slice should yield the same result.
+    //
+    // CHECK-NEXT: 1
+    // CHECK-NEXT: 0
+    // CHECK-NEXT: 2.3
+    // CHECK-NEXT: 2
+    // CHECK-NEXT: 3
+    // CHECK-NEXT: 1
+    // CHECK-NEXT: 3
+    // CHECK-NEXT: 2
+    // CHECK-NEXT: 2.1
+    //
+    call @foreach_print_non_slice(%b) : (tensor<4x4xf64, #CSR>) -> ()
 
+    bufferization.dealloc_tensor %b : tensor<4x4xf64, #CSR>
     bufferization.dealloc_tensor %tmp : tensor<8x8xf64, #CSR>
     return
   }