[Mlir-commits] [mlir] b1752dd - [mlir][sparse] Fix memory leaks (part 4) (#85729)

Mon Mar 18 23:38:19 PDT 2024

Author: Matthias Springer
Date: 2024-03-19T15:38:16+09:00
New Revision: b1752ddf0a97969cdd931e6431c953cfd4079e50

URL: https://github.com/llvm/llvm-project/commit/b1752ddf0a97969cdd931e6431c953cfd4079e50
DIFF: https://github.com/llvm/llvm-project/commit/b1752ddf0a97969cdd931e6431c953cfd4079e50.diff

LOG: [mlir][sparse] Fix memory leaks (part 4) (#85729)

This commit fixes memory leaks in sparse tensor integration tests by
adding `bufferization.dealloc_tensor` ops.

Note: Buffer deallocation will be automated in the future with the
ownership-based buffer deallocation pass, making `dealloc_tensor`
obsolete (only codegen path, not when using the runtime library).

This commit fixes the remaining memory leaks in the MLIR test suite.
`check-mlir` now passes when built with ASAN.

Added: 
    

Modified: 
    mlir/test/Integration/Dialect/SparseTensor/CPU/reshape_dot.mlir
    mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_block_matmul.mlir
    mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/reshape_dot.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/reshape_dot.mlir
index ebf9f4392d859b..f7975e0738fa81 100644

--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/reshape_dot.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/reshape_dot.mlir
@@ -35,8 +35,8 @@
 #COO_3D = #sparse_tensor.encoding<{ map = (d0, d1, d2) -> (d0 : compressed(nonunique), d1 : singleton(nonunique), d2 : singleton), posWidth = 32, crdWidth = 32 }>
 
 module {
-  func.func private @printMemref3dF32(%ptr : tensor<?x?x?xf32>) attributes { llvm.emit_c_interface }
-  func.func private @printMemref2dF32(%ptr : tensor<?x?xf32>) attributes { llvm.emit_c_interface }
+  func.func private @printMemref3dF32(%ptr : tensor<?x?x?xf32> {bufferization.access = "read"}) attributes { llvm.emit_c_interface }
+  func.func private @printMemref2dF32(%ptr : tensor<?x?xf32> {bufferization.access = "read"}) attributes { llvm.emit_c_interface }
 
   func.func @test_sparse_rhs(%arg0: tensor<5x6xf32>, %arg1: tensor<6x2x3xf32, #COO_3D>) -> tensor<?x?x?xf32> {
     %collapsed = tensor.collapse_shape %arg1 [[0], [1, 2]] : tensor<6x2x3xf32, #COO_3D> into tensor<6x6xf32, #COO_2D>
@@ -46,6 +46,11 @@ module {
     %2 = linalg.matmul ins(%arg0, %collapsed : tensor<5x6xf32>, tensor<6x6xf32, #COO_2D>) outs(%1 : tensor<5x6xf32>) -> tensor<5x6xf32>
     %expanded = tensor.expand_shape %2 [[0], [1, 2]] : tensor<5x6xf32> into tensor<5x2x3xf32>
     %ret1 = tensor.cast %expanded : tensor<5x2x3xf32> to tensor<?x?x?xf32>
+
+    // Note: tensor.collapse_shape is a metadata-only operation on dense tensors
+    // but requires reallocation on sparse tensors.
+    bufferization.dealloc_tensor %collapsed : tensor<6x6xf32, #COO_2D>
+
     return %ret1 : tensor<?x?x?xf32>
   }
 
@@ -57,6 +62,11 @@ module {
     %2 = linalg.matmul ins(%arg0, %collapsed : tensor<5x6xf32, #COO_2D>, tensor<6x6xf32, #COO_2D>) outs(%1 : tensor<5x6xf32>) -> tensor<5x6xf32>
     %expanded = tensor.expand_shape %2 [[0], [1, 2]] : tensor<5x6xf32> into tensor<5x2x3xf32>
     %ret1 = tensor.cast %expanded : tensor<5x2x3xf32> to tensor<?x?x?xf32>
+
+    // Note: tensor.collapse_shape is a metadata-only operation on dense tensors
+    // but requires reallocation on sparse tensors.
+    bufferization.dealloc_tensor %collapsed : tensor<6x6xf32, #COO_2D>
+
     return %ret1 : tensor<?x?x?xf32>
   }
 
@@ -80,6 +90,11 @@ module {
     %2 = linalg.matmul ins(%arg0, %collapsed : tensor<5x6xf32, #COO_2D>, tensor<6x6xf32, #COO_2D>) outs(%1 : tensor<5x6xf32>) -> tensor<5x6xf32>
     %expanded = tensor.expand_shape %2 [[0], [1, 2]] : tensor<5x6xf32> into tensor<5x2x3xf32>
     %ret1 = tensor.cast %expanded : tensor<5x2x3xf32> to tensor<?x?x?xf32>
+
+    // Note: tensor.collapse_shape is a metadata-only operation on dense tensors
+    // but requires reallocation on sparse tensors.
+    bufferization.dealloc_tensor %collapsed : tensor<6x6xf32, #COO_2D>
+
     return %ret1 : tensor<?x?x?xf32>
   }
 
@@ -192,6 +207,7 @@ module {
     bufferization.dealloc_tensor %so1 : tensor<?x?x?xf32>
     bufferization.dealloc_tensor %so2 : tensor<?x?x?xf32>
     bufferization.dealloc_tensor %so3 : tensor<?x?x?xf32>
+
     return
   }
 }

diff  --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_block_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_block_matmul.mlir
index 464de9c8a2c3a6..efef01155cc784 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_block_matmul.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_block_matmul.mlir
@@ -161,6 +161,14 @@ module {
     call @dump_dense_f64(%s24)  : (tensor<4x4xf64>) -> ()
     call @dump_dense_f64(%scsr) : (tensor<4x4xf64>) -> ()
 
+    bufferization.dealloc_tensor %a : tensor<4x8xf64, #BSR>
+    bufferization.dealloc_tensor %b : tensor<4x8xf64, #NV_24>
+    bufferization.dealloc_tensor %c : tensor<4x8xf64, #CSR>
+    bufferization.dealloc_tensor %d : tensor<4x4xf64>
+    bufferization.dealloc_tensor %s : tensor<4x4xf64>
+    bufferization.dealloc_tensor %s24 : tensor<4x4xf64>
+    bufferization.dealloc_tensor %scsr : tensor<4x4xf64>
+
     return
   }
 }

diff  --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack.mlir
index 7cde6b93d3250c..34d450c2403f61 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack.mlir
@@ -279,6 +279,31 @@ module {
     %si = tensor.extract %li[] : tensor<i64>
     vector.print %si : i64
 
+    // TODO: This check is no longer needed once the codegen path uses the
+    // buffer deallocation pass. "dealloc_tensor" turn into a no-op in the
+    // codegen path.
+    %has_runtime = sparse_tensor.has_runtime_library
+    scf.if %has_runtime {
+      // sparse_tensor.assemble copies buffers when running with the runtime
+      // library. Deallocations are needed not needed when running in codgen
+      // mode.
+      bufferization.dealloc_tensor %s4 : tensor<10x10xf64, #SortedCOO>
+      bufferization.dealloc_tensor %s5 : tensor<10x10xf64, #SortedCOOI32>
+      bufferization.dealloc_tensor %csr : tensor<2x2xf64, #CSR>
+      bufferization.dealloc_tensor %bs : tensor<2x10x10xf64, #BCOO>
+    }
+
+    bufferization.dealloc_tensor %li : tensor<i64>
+    bufferization.dealloc_tensor %od : tensor<3xf64>
+    bufferization.dealloc_tensor %op : tensor<2xi32>
+    bufferization.dealloc_tensor %oi : tensor<3x2xi32>
+    bufferization.dealloc_tensor %d_csr : tensor<4xf64>
+    bufferization.dealloc_tensor %p_csr : tensor<3xi32>
+    bufferization.dealloc_tensor %i_csr : tensor<3xi32>
+    bufferization.dealloc_tensor %bod : tensor<6xf64>
+    bufferization.dealloc_tensor %bop : tensor<4xindex>
+    bufferization.dealloc_tensor %boi : tensor<6x2xindex>
+
     return
   }
 }