[Mlir-commits] [mlir] 3bd82f3 - [mlir][sparse] compute allocation size_hint
Aart Bik
llvmlistbot at llvm.org
Mon Feb 6 14:09:01 PST 2023
Author: Aart Bik
Date: 2023-02-06T14:08:53-08:00
New Revision: 3bd82f30dcc25533de1ff900a704efa77a6951da
URL: https://github.com/llvm/llvm-project/commit/3bd82f30dcc25533de1ff900a704efa77a6951da
DIFF: https://github.com/llvm/llvm-project/commit/3bd82f30dcc25533de1ff900a704efa77a6951da.diff
LOG: [mlir][sparse] compute allocation size_hint
This adds the hint to a number of tensor allocations in codegens,
shaving off quite some time from e.g. reading in sparse matrices
due to zero-reallocation scheme. Note that we can probably provide
hints on all allocations, and refine the heuristics that use them
for general tensors.
Reviewed By: bixia
Differential Revision: https://reviews.llvm.org/D143309
Added:
Modified:
mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
index 9f3388d61f3f2..b17305e104576 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
@@ -229,6 +229,10 @@ static void createAllocFields(OpBuilder &builder, Location loc, Type type,
ptrHeuristic = constantIndex(builder, loc, 2);
idxHeuristic = builder.create<arith::MulIOp>(
loc, constantIndex(builder, loc, rank), sizeHint); // AOS
+ } else if (rank == 2 && isDenseDim(rtp, 0) && isCompressedDim(rtp, 1)) {
+ ptrHeuristic = builder.create<arith::AddIOp>(
+ loc, sizeHint, constantIndex(builder, loc, 1));
+ idxHeuristic = sizeHint;
} else {
ptrHeuristic = idxHeuristic = constantIndex(builder, loc, 16);
}
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
index 27fef5cf0c8d4..76745a5731616 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
@@ -410,9 +410,14 @@ struct Sparse2SparseReshapeRewriter : public OpRewritePattern<ReshapeOp> {
// foreach srcCoords %srcTensor
// insert translateIndicesArray(srcCoords), %tmp
// %t = sparse_tensor.cast %tmp
+ Value nnz = rewriter.create<NumberOfEntriesOp>(loc, srcTensor);
RankedTensorType cooTp = getUnorderedCOOFromType(dstTp);
- auto cooBuffer =
- rewriter.create<AllocTensorOp>(loc, cooTp, dstDynSizes).getResult();
+ Value cooBuffer =
+ rewriter
+ .create<AllocTensorOp>(loc, cooTp, dstDynSizes, Value(),
+ /*sizeHint=*/nnz, Attribute())
+ .getResult();
+
ForeachOp foreachOp = rewriter.create<ForeachOp>(
loc, srcTensor, cooBuffer,
[&](OpBuilder &builder, Location loc, ValueRange args, Value v,
@@ -787,6 +792,7 @@ struct ConvertRewriter : public OpRewritePattern<ConvertOp> {
SmallVector<Value> srcSizes;
sizesForTensor(rewriter, srcSizes, loc, srcTp, src);
Value tmpCoo = Value();
+ Value nnz = rewriter.create<NumberOfEntriesOp>(loc, src);
// We need a tmp COO buffer if and only if
// 1. the src tensor is not a COO and
// 2. the src tensor is not ordered in the same way as the target
@@ -802,8 +808,10 @@ struct ConvertRewriter : public OpRewritePattern<ConvertOp> {
getDynamicSizes(srcTp, srcSizes, dynSrcSizes);
srcTp =
getUnorderedCOOFromTypeWithOrdering(srcTp, encDst.getDimOrdering());
- tmpCoo =
- rewriter.create<AllocTensorOp>(loc, srcTp, dynSrcSizes).getResult();
+ tmpCoo = rewriter
+ .create<AllocTensorOp>(loc, srcTp, dynSrcSizes, Value(),
+ /*sizeHint=*/nnz, Attribute())
+ .getResult();
auto foreachOp = rewriter.create<ForeachOp>(
loc, src, tmpCoo,
[&](OpBuilder &builder, Location loc, ValueRange args, Value v,
@@ -823,11 +831,6 @@ struct ConvertRewriter : public OpRewritePattern<ConvertOp> {
// Only need to sort if the srcTp is not already sorted (we faithfully take
// the guarantee from the sparse tensor encoding).
if (!isAllDimOrdered(srcTp)) {
- // Retrieve NNZ.
- Value nnz = rewriter.create<NumberOfEntriesOp>(loc, src);
- nnz = rewriter.create<arith::IndexCastOp>(loc, rewriter.getIndexType(),
- nnz);
-
// Retrieve the values-array.
Value y = genToValues(rewriter, loc, src);
SparseTensorEncodingAttr encSrc = getSparseTensorEncoding(srcTp);
@@ -858,8 +861,10 @@ struct ConvertRewriter : public OpRewritePattern<ConvertOp> {
// For each element in the COO tensor, insert the element to the dst tensor.
SmallVector<Value> dynDstSizes;
getDynamicSizes(dstTp, srcSizes, dynDstSizes);
- Value dst =
- rewriter.create<AllocTensorOp>(loc, dstTp, dynDstSizes).getResult();
+ Value dst = rewriter
+ .create<AllocTensorOp>(loc, dstTp, dynDstSizes, Value(),
+ /*sizeHint=*/nnz, Attribute())
+ .getResult();
SmallVector<Value> indices(srcTp.getRank(), Value());
auto foreachOp = rewriter.create<ForeachOp>(
loc, src, dst,
@@ -1027,18 +1032,21 @@ struct NewRewriter : public OpRewritePattern<NewOp> {
// get the next element from the input file
// insert the element to %tmp
// %t = sparse_tensor.ConvertOp %tmp
- RankedTensorType cooTp =
- getUnorderedCOOFromTypeWithOrdering(dstTp, encDst.getDimOrdering());
- Value cooBuffer =
- rewriter.create<AllocTensorOp>(loc, cooTp, dynSizesArray).getResult();
-
Value c0 = constantIndex(rewriter, loc, 0);
Value c1 = constantIndex(rewriter, loc, 1);
Value nnz = createFuncCall(rewriter, loc, "getSparseTensorReaderNNZ",
{indexTp}, {reader}, EmitCInterface::Off)
.getResult(0);
- Value symmetric;
+ RankedTensorType cooTp =
+ getUnorderedCOOFromTypeWithOrdering(dstTp, encDst.getDimOrdering());
+ Value cooBuffer =
+ rewriter
+ .create<AllocTensorOp>(loc, cooTp, dynSizesArray, Value(),
+ /*sizeHint=*/nnz, Attribute())
+ .getResult();
+
// The verifier ensures only 2D tensors can have the expandSymmetry flag.
+ Value symmetric;
if (rank == 2 && op.getExpandSymmetry()) {
symmetric =
createFuncCall(rewriter, loc, "getSparseTensorReaderIsSymmetric",
diff --git a/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir b/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir
index 00811d4a0a892..df958480d5db8 100644
--- a/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir
+++ b/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir
@@ -20,8 +20,8 @@
// CHECK: call @copySparseTensorReaderDimSizes(%[[R]], %[[DS]])
// CHECK: %[[D0:.*]] = memref.load %[[DS]]{{\[}}%[[C0]]]
// CHECK: %[[D1:.*]] = memref.load %[[DS]]{{\[}}%[[C1]]]
-// CHECK: %[[T:.*]] = bufferization.alloc_tensor(%[[D0]], %[[D1]])
// CHECK: %[[N:.*]] = call @getSparseTensorReaderNNZ(%[[R]])
+// CHECK: %[[T:.*]] = bufferization.alloc_tensor(%[[D0]], %[[D1]]) size_hint=%[[N]]
// CHECK: %[[S:.*]] = call @getSparseTensorReaderIsSymmetric(%[[R]])
// CHECK: %[[VB:.*]] = memref.alloca()
// CHECK: %[[T2:.*]] = scf.for %{{.*}} = %[[C0]] to %[[N]] step %[[C1]] iter_args(%[[A2:.*]] = %[[T]])
@@ -59,8 +59,8 @@ func.func @sparse_new_symmetry(%arg0: !llvm.ptr<i8>) -> tensor<?x?xf32, #CSR> {
// CHECK: call @copySparseTensorReaderDimSizes(%[[R]], %[[DS]])
// CHECK: %[[D0:.*]] = memref.load %[[DS]]{{\[}}%[[C0]]]
// CHECK: %[[D1:.*]] = memref.load %[[DS]]{{\[}}%[[C1]]]
-// CHECK: %[[T:.*]] = bufferization.alloc_tensor(%[[D0]], %[[D1]])
// CHECK: %[[N:.*]] = call @getSparseTensorReaderNNZ(%[[R]])
+// CHECK: %[[T:.*]] = bufferization.alloc_tensor(%[[D0]], %[[D1]]) size_hint=%[[N]]
// CHECK: %[[VB:.*]] = memref.alloca()
// CHECK: %[[T2:.*]] = scf.for %{{.*}} = %[[C0]] to %[[N]] step %[[C1]] iter_args(%[[A2:.*]] = %[[T]])
// CHECK: func.call @getSparseTensorReaderNextF32(%[[R]], %[[DS]], %[[VB]])
@@ -90,8 +90,8 @@ func.func @sparse_new(%arg0: !llvm.ptr<i8>) -> tensor<?x?xf32, #CSR> {
// CHECK: call @copySparseTensorReaderDimSizes(%[[R]], %[[DS]])
// CHECK: %[[D0:.*]] = memref.load %[[DS]]{{\[}}%[[C0]]]
// CHECK: %[[D1:.*]] = memref.load %[[DS]]{{\[}}%[[C1]]]
-// CHECK: %[[T:.*]] = bufferization.alloc_tensor(%[[D0]], %[[D1]])
// CHECK: %[[N:.*]] = call @getSparseTensorReaderNNZ(%[[R]])
+// CHECK: %[[T:.*]] = bufferization.alloc_tensor(%[[D0]], %[[D1]]) size_hint=%[[N]]
// CHECK: %[[VB:.*]] = memref.alloca()
// CHECK: %[[T2:.*]] = scf.for %{{.*}} = %[[C0]] to %[[N]] step %[[C1]] iter_args(%[[A2:.*]] = %[[T]])
// CHECK: func.call @getSparseTensorReaderNextF32(%[[R]], %[[DS]], %[[VB]])
More information about the Mlir-commits
mailing list