[Mlir-commits] [mlir] [mlir][sparse][gpu] free all buffers allocated for spGEMM (PR #66813)
Aart Bik
llvmlistbot at llvm.org
Tue Sep 19 13:13:25 PDT 2023
https://github.com/aartbik created https://github.com/llvm/llvm-project/pull/66813
Yup, a bit of an oversight ;-)
>From dfd292784f37e684c9224a99d98d5c8a61020cd1 Mon Sep 17 00:00:00 2001
From: Aart Bik <ajcbik at google.com>
Date: Tue, 19 Sep 2023 13:06:06 -0700
Subject: [PATCH] [mlir][sparse][gpu] free all buffers allocated for spGEMM
Yup, a bit of an oversight ;-)
---
.../Transforms/SparseGPUCodegen.cpp | 15 +++++++++--
.../SparseTensor/GPU/gpu_spgemm_lib.mlir | 26 ++++++++++++++-----
2 files changed, 32 insertions(+), 9 deletions(-)
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp
index efdd3347558b44b..91b346c8a9b4c4d 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseGPUCodegen.cpp
@@ -795,10 +795,10 @@ rewriteSpGEMM(PatternRewriter &rewriter, linalg::GenericOp op, bool enableRT,
Value rowC = e1.getResult(0);
token = e1.getAsyncToken();
auto e2 = genAllocBuffer(rewriter, loc, cTp.getCrdType(), zero, token);
- Value colC = e2.getResult(0);
+ Value colC = e2.getResult(0); // no free needed
token = e2.getAsyncToken();
auto e3 = genAllocBuffer(rewriter, loc, dnCType, zero, token);
- Value valC = e3.getResult(0);
+ Value valC = e3.getResult(0); // no free needed
token = e3.getAsyncToken();
Operation *spGenC =
genSpMat(rewriter, loc, spmatHandleTp, tokenTp, token, szm, szn, zero,
@@ -881,6 +881,17 @@ rewriteSpGEMM(PatternRewriter &rewriter, linalg::GenericOp op, bool enableRT,
token = genCopyMemRef(rewriter, loc, rowH, rowC, token);
token = genCopyMemRef(rewriter, loc, colH, colC, token);
token = genCopyMemRef(rewriter, loc, valH, valC, token);
+ token = genDeallocMemRef(rewriter, loc, rowA, token);
+ token = genDeallocMemRef(rewriter, loc, colA, token);
+ token = genDeallocMemRef(rewriter, loc, valA, token);
+ token = genDeallocMemRef(rewriter, loc, rowB, token);
+ token = genDeallocMemRef(rewriter, loc, colB, token);
+ token = genDeallocMemRef(rewriter, loc, valB, token);
+ token = genDeallocMemRef(rewriter, loc, rowC, token);
+ token = genDeallocMemRef(rewriter, loc, colC, token);
+ token = genDeallocMemRef(rewriter, loc, valC, token);
+ token = genDeallocMemRef(rewriter, loc, buffer1, token);
+ token = genDeallocMemRef(rewriter, loc, buffer2, token);
tokens.push_back(token);
genBlockingWait(rewriter, loc, tokens);
tokens.clear();
diff --git a/mlir/test/Dialect/SparseTensor/GPU/gpu_spgemm_lib.mlir b/mlir/test/Dialect/SparseTensor/GPU/gpu_spgemm_lib.mlir
index 7b4c48dc34105d0..1bb51f4fcf51805 100644
--- a/mlir/test/Dialect/SparseTensor/GPU/gpu_spgemm_lib.mlir
+++ b/mlir/test/Dialect/SparseTensor/GPU/gpu_spgemm_lib.mlir
@@ -5,7 +5,7 @@
// CHECK-LABEL: func.func @matmulCSR(
// CHECK-SAME: %[[VAL_0:.*0]]: tensor<8x8xf32, #{{.*}}>,
-// CHECK-SAME: %[[VAL_1:.*1]]: tensor<8x8xf32, #{{.*}}>
+// CHECK-SAME: %[[VAL_1:.*1]]: tensor<8x8xf32, #{{.*}}>) -> tensor<8x8xf32, #{{.*}}> {
// CHECK: %[[VAL_2:.*]] = arith.constant 8 : index
// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index
// CHECK: %[[VAL_4:.*]] = arith.constant 9 : index
@@ -72,12 +72,24 @@
// CHECK: %[[VAL_88:.*]] = gpu.memcpy async {{\[}}%[[VAL_87]]] %[[VAL_81]], %[[VAL_49]] : memref<?xindex>, memref<?xindex>
// CHECK: %[[VAL_89:.*]] = gpu.memcpy async {{\[}}%[[VAL_88]]] %[[VAL_82]], %[[VAL_75]] : memref<?xindex>, memref<?xindex>
// CHECK: %[[VAL_90:.*]] = gpu.memcpy async {{\[}}%[[VAL_89]]] %[[VAL_83]], %[[VAL_77]] : memref<?xf32>, memref<?xf32>
-// CHECK: gpu.wait {{\[}}%[[VAL_90]]]
-// CHECK: %[[VAL_91:.*]] = bufferization.to_tensor %[[VAL_83]] : memref<?xf32>
-// CHECK: %[[VAL_92:.*]] = bufferization.to_tensor %[[VAL_81]] : memref<?xindex>
-// CHECK: %[[VAL_93:.*]] = bufferization.to_tensor %[[VAL_82]] : memref<?xindex>
-// CHECK: %[[VAL_94:.*]] = sparse_tensor.pack %[[VAL_91]], %[[VAL_92]], %[[VAL_93]] : tensor<?xf32>, tensor<?xindex>, tensor<?xindex> to tensor<8x8xf32, #{{.*}}>
-// CHECK: return %[[VAL_94]] : tensor<8x8xf32, #{{.*}}>
+// CHECK: %[[VAL_91:.*]] = gpu.dealloc async {{.*}} : memref<?xindex>
+// CHECK: %[[VAL_92:.*]] = gpu.dealloc async {{.*}} : memref<?xindex>
+// CHECK: %[[VAL_93:.*]] = gpu.dealloc async {{.*}} : memref<?xf32>
+// CHECK: %[[VAL_94:.*]] = gpu.dealloc async {{.*}} : memref<?xindex>
+// CHECK: %[[VAL_95:.*]] = gpu.dealloc async {{.*}} : memref<?xindex>
+// CHECK: %[[VAL_96:.*]] = gpu.dealloc async {{.*}} : memref<?xf32>
+// CHECK: %[[VAL_97:.*]] = gpu.dealloc async {{.*}} : memref<?xindex>
+// CHECK: %[[VAL_98:.*]] = gpu.dealloc async {{.*}} : memref<?xindex>
+// CHECK: %[[VAL_99:.*]] = gpu.dealloc async {{.*}} : memref<?xf32>
+// CHECK: %[[VAL_a0:.*]] = gpu.dealloc async {{.*}} : memref<?xi8>
+// CHECK: %[[VAL_a1:.*]] = gpu.dealloc async {{.*}} : memref<?xi8>
+// CHECK: gpu.wait [%[[VAL_a1]]]
+// CHECK: %[[VAL_a2:.*]] = bufferization.to_tensor %[[VAL_83]] : memref<?xf32>
+// CHECK: %[[VAL_a3:.*]] = bufferization.to_tensor %[[VAL_81]] : memref<?xindex>
+// CHECK: %[[VAL_a4:.*]] = bufferization.to_tensor %[[VAL_82]] : memref<?xindex>
+// CHECK: %[[VAL_a5:.*]] = sparse_tensor.pack %[[VAL_a2]], %[[VAL_a3]], %[[VAL_a4]] : tensor<?xf32>, tensor<?xindex>, tensor<?xindex> to tensor<8x8xf32, #{{.*}}>
+// CHECK: return %[[VAL_a5]] : tensor<8x8xf32, #{{.*}}>
+// CHECK: }
func.func @matmulCSR(%A: tensor<8x8xf32, #CSR>,
%B: tensor<8x8xf32, #CSR>) -> tensor<8x8xf32, #CSR> {
%init = bufferization.alloc_tensor() : tensor<8x8xf32, #CSR>
More information about the Mlir-commits
mailing list