[Mlir-commits] [mlir] 9e8d931 - [mlir][sparse] allow foreach operation to generate out-of-order loop on non-annotated tensor.
Peiming Liu
llvmlistbot at llvm.org
Thu Feb 16 15:23:26 PST 2023
Author: Peiming Liu
Date: 2023-02-16T23:23:20Z
New Revision: 9e8d9316ceaf771fdb5507fa2adebcf0605efd47
URL: https://github.com/llvm/llvm-project/commit/9e8d9316ceaf771fdb5507fa2adebcf0605efd47
DIFF: https://github.com/llvm/llvm-project/commit/9e8d9316ceaf771fdb5507fa2adebcf0605efd47.diff
LOG: [mlir][sparse] allow foreach operation to generate out-of-order loop on non-annotated tensor.
No need for a temp COO and sort even when converting dense -> CSC, we can instead rotate the loop to yield a ordered coordinates at beginning.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144213
Added:
Modified:
mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h
mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_ptr.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
index 20c69c67d2b76..b719d29be3c4d 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
@@ -1147,13 +1147,20 @@ def SparseTensor_YieldOp : SparseTensor_Op<"yield", [Pure, Terminator]>,
def SparseTensor_ForeachOp : SparseTensor_Op<"foreach",
[SingleBlockImplicitTerminator<"YieldOp">]>,
Arguments<(ins AnyTensor:$tensor,
- Variadic<AnyType>:$initArgs)>,
+ Variadic<AnyType>:$initArgs,
+ OptionalAttr<AffineMapAttr>:$order)>,
Results<(outs Variadic<AnyType>:$results)> {
let summary = "Iterates over elements in a tensor";
let description = [{
Iterates over stored elements in a tensor (which are typically, but not always,
non-zero for sparse tensors) and executes the block.
+ `tensor`: the input tensor to iterate over.
+ `initArgs`: the initial loop argument to carry and update during each iteration.
+ `order`: an optional permutation affine map that specifies the order in which
+ the dimensions are visited (e.g., row first or column first). This is only
+ applicable when the input tensor is a non-annotated dense tensor.
+
For an input tensor with rank n, the block must take n + 1 (and additional loop
carried variables as described below) arguments. The first n arguments must be
Index type, together indicating the current coordinates of the element being visited.
@@ -1208,15 +1215,33 @@ def SparseTensor_ForeachOp : SparseTensor_Op<"foreach",
// [%row, %col] -> [0, 0], [0, 1], [1, 0], [1, 1], [2, 0], [2, 1]
}
+ // foreach on a row-major dense tensor but visit column first
+ sparse_tensor.foreach in %0 {order=affine_map<(i,j)->(j,i)>}: tensor<2x3xf64> do {
+ ^bb0(%row: index, %col: index, %arg3: f64):
+ // [%row, %col] -> [0, 0], [1, 0], [2, 0], [0, 1], [1, 1], [2, 1]
+ }
+
```
}];
let builders = [
- OpBuilder<(ins "Value":$tensor,
+ OpBuilder<(ins "Value":$tensor, "ValueRange":$iterArgs, "AffineMapAttr":$order,
"function_ref<void(OpBuilder &, Location, ValueRange, Value, ValueRange)>")>,
+ OpBuilder<(ins "Value":$tensor, "AffineMapAttr":$order,
+ "function_ref<void(OpBuilder &, Location, ValueRange, Value, ValueRange)>":$bodyBuilder),
+ [{
+ build($_builder, $_state, tensor, ValueRange(), order, bodyBuilder);
+ }]>,
OpBuilder<(ins "Value":$tensor,
- "ValueRange":$iterArgs,
- "function_ref<void(OpBuilder &, Location, ValueRange, Value, ValueRange)>")>
+ "function_ref<void(OpBuilder &, Location, ValueRange, Value, ValueRange)>":$bodyBuilder),
+ [{
+ build($_builder, $_state, tensor, ValueRange(), nullptr, bodyBuilder);
+ }]>,
+ OpBuilder<(ins "Value":$tensor, "ValueRange":$iterArgs,
+ "function_ref<void(OpBuilder &, Location, ValueRange, Value, ValueRange)>":$bodyBuilder),
+ [{
+ build($_builder, $_state, tensor, iterArgs, nullptr, bodyBuilder);
+ }]>
];
let regions = (region SizedRegion<1>:$region);
diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
index 7f9111e24e785..207834bacf7e6 100644
--- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
+++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
@@ -982,17 +982,10 @@ LogicalResult CompressOp::verify() {
void ForeachOp::build(
OpBuilder &builder, OperationState &result, Value tensor,
+ ValueRange initArgs, AffineMapAttr order,
function_ref<void(OpBuilder &, Location, ValueRange, Value, ValueRange)>
bodyBuilder) {
- build(builder, result, tensor, std::nullopt, bodyBuilder);
-}
-
-void ForeachOp::build(
- OpBuilder &builder, OperationState &result, Value tensor,
- ValueRange initArgs,
- function_ref<void(OpBuilder &, Location, ValueRange, Value, ValueRange)>
- bodyBuilder) {
- build(builder, result, initArgs.getTypes(), tensor, initArgs);
+ build(builder, result, initArgs.getTypes(), tensor, initArgs, order);
// Builds foreach body.
if (!bodyBuilder)
return;
@@ -1023,6 +1016,10 @@ LogicalResult ForeachOp::verify() {
const Dimension dimRank = t.getDimRank();
const auto args = getBody()->getArguments();
+ if (getOrder().has_value() &&
+ (t.getEncoding() || !getOrder()->isPermutation()))
+ return emitError("Only support permuted order on non encoded dense tensor");
+
if (static_cast<size_t>(dimRank) + 1 + getInitArgs().size() != args.size())
return emitError("Unmatched number of arguments in the block");
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
index 97ec7e26b8d3c..809e9712c752a 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
@@ -500,17 +500,46 @@ Operation *mlir::sparse_tensor::getTop(Operation *op) {
void sparse_tensor::foreachInSparseConstant(
Location loc, RewriterBase &rewriter, SparseElementsAttr attr,
- function_ref<void(ArrayRef<Value>, Value)> callback) {
- int64_t rank = attr.getType().getRank();
+ AffineMap order, function_ref<void(ArrayRef<Value>, Value)> callback) {
+ Dimension dimRank = getSparseTensorType(attr).getDimRank();
// Foreach on constant.
DenseElementsAttr indicesAttr = attr.getIndices();
DenseElementsAttr valuesAttr = attr.getValues();
+ using CooValuePair = std::pair<SmallVector<IntegerAttr>, Attribute>;
+ SmallVector<CooValuePair> cooV;
+ for (size_t i = 0, nse = valuesAttr.size(); i < nse; i++) {
+ cooV.emplace_back();
+ for (Dimension j = 0; j < dimRank; j++) {
+ auto coordAttr = indicesAttr.getValues<IntegerAttr>()[i * dimRank + j];
+ cooV.back().first.push_back(coordAttr);
+ }
+ auto valAttr = valuesAttr.getValues<Attribute>()[i];
+ cooV.back().second = valAttr;
+ }
+
+ // Sorts the sparse element attribute based on coordinates.
+ std::sort(cooV.begin(), cooV.end(),
+ [order](const CooValuePair &lhs, const CooValuePair &rhs) {
+ const SmallVectorImpl<IntegerAttr> &lc = lhs.first;
+ const SmallVectorImpl<IntegerAttr> &rc = rhs.first;
+ for (size_t i = 0, e = lc.size(); i < e; i++) {
+ auto l =
+ order
+ ? order.getResult(i).cast<AffineDimExpr>().getPosition()
+ : i;
+ if (lc[l].getInt() == rc[l].getInt())
+ continue;
+ return lc[l].getInt() < rc[l].getInt();
+ }
+ llvm_unreachable("no equal coordinate in sparse element attr");
+ });
+
SmallVector<Value> coords;
- for (int i = 0, e = valuesAttr.size(); i < e; i++) {
+ for (size_t i = 0, nse = valuesAttr.size(); i < nse; i++) {
coords.clear();
- for (int j = 0; j < rank; j++) {
- auto coordAttr = indicesAttr.getValues<IntegerAttr>()[i * rank + j];
+ for (Dimension j = 0; j < dimRank; j++) {
+ auto coordAttr = cooV[i].first[j];
auto coord =
rewriter.create<arith::ConstantIndexOp>(loc, coordAttr.getInt());
// Remaps coordinates.
@@ -518,11 +547,11 @@ void sparse_tensor::foreachInSparseConstant(
}
Value val;
if (attr.getElementType().isa<ComplexType>()) {
- auto valAttr = valuesAttr.getValues<ArrayAttr>()[i];
+ auto valAttr = cooV[i].second.cast<ArrayAttr>();
val = rewriter.create<complex::ConstantOp>(loc, attr.getElementType(),
valAttr);
} else {
- auto valAttr = valuesAttr.getValues<TypedAttr>()[i];
+ auto valAttr = cooV[i].second.cast<TypedAttr>();
// Remaps value.
val = rewriter.create<arith::ConstantOp>(loc, valAttr);
}
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h
index 9e98c9d052fad..2624d5c826f4a 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h
@@ -216,7 +216,7 @@ Operation *getTop(Operation *op);
/// callback({%c3}, %v3)
void foreachInSparseConstant(
Location loc, RewriterBase &rewriter, SparseElementsAttr attr,
- function_ref<void(ArrayRef<Value>, Value)> callback);
+ AffineMap order, function_ref<void(ArrayRef<Value>, Value)> callback);
/// Converts the vector indices and store it into the memory pointed by
/// `ind`, apply (optional) `offset` on `offsetDim`.
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
index dd98cdbf72b93..2051ac8415775 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
@@ -158,7 +158,7 @@ static LogicalResult genForeachOnSparseConstant(ForeachOp op,
// Foreach on constant.
foreachInSparseConstant(
- loc, rewriter, attr,
+ loc, rewriter, attr, op.getOrder().value_or(AffineMap()),
[&reduc, &rewriter, op](ArrayRef<Value> coords, Value v) mutable {
SmallVector<Value> args;
args.append(coords.begin(), coords.end());
@@ -669,19 +669,16 @@ struct ConvertRewriter : public OpRewritePattern<ConvertOp> {
}
const auto encDst = dstTp.getEncoding();
- // We don't need a temporary COO tensor if the destination has an identity
- // ordering. Otherwise, we use the destination ordering for the temporary
- // COO tensor.
- // TODO: enhance foreachOp to take ordering to remove the need of a
- // temporary COO tensor here.
- const RankedTensorType bufferTp = dstTp.isIdentity()
- ? dstTp.getRankedTensorType()
- : getUnorderedCOOFromTypeWithOrdering(
- dstTp, dstTp.getDimToLvlMap());
+ // We don't need a temporary COO tensor for dense => sparse conversion.
+ const RankedTensorType bufferTp = dstTp.getRankedTensorType();
auto buffer =
rewriter.create<AllocTensorOp>(loc, bufferTp, dynSizes).getResult();
+ AffineMapAttr foreachOrder = nullptr;
+ if (encDst.getDimOrdering())
+ foreachOrder = AffineMapAttr::get(encDst.getDimOrdering());
+
auto foreachOp = rewriter.create<ForeachOp>(
- loc, src, buffer,
+ loc, src, buffer, foreachOrder,
[&](OpBuilder &builder, Location loc, ValueRange indices, Value v,
ValueRange reduc) {
Value input = reduc.front();
@@ -709,14 +706,8 @@ struct ConvertRewriter : public OpRewritePattern<ConvertOp> {
});
rewriter.setInsertionPointAfter(op);
src = rewriter.create<LoadOp>(loc, foreachOp.getResult(0), true);
- if (bufferTp != dstTp) {
- rewriter.replaceOpWithNewOp<ConvertOp>(op, dstTp.getRankedTensorType(),
- src);
- rewriter.create<DeallocTensorOp>(loc, src);
- } else {
- rewriter.replaceOp(op, src);
- }
+ rewriter.replaceOp(op, src);
return success();
}
@@ -928,16 +919,28 @@ struct ForeachRewriter : public OpRewritePattern<ForeachOp> {
for (Dimension d = 0; d < dimRank; d++) {
// TODO: provide utility function for loop sequences that only contains
// one for loop?
- loopEmitter.enterNewLoopSeq(rewriter, loc, 0, static_cast<size_t>(d));
+ Dimension ld =
+ op.getOrder()
+ ? op.getOrder()->getResult(d).cast<AffineDimExpr>().getPosition()
+ : d;
+ loopEmitter.enterNewLoopSeq(rewriter, loc, 0, static_cast<size_t>(ld));
// Note that reduc will be taken care of by loop emitter and get updated
// in place.
- loopEmitter.enterLoopOverTensorAtDim(rewriter, loc, 0, d, reduc);
+
+ loopEmitter.enterLoopOverTensorAtDim(rewriter, loc, 0, ld, reduc);
}
SmallVector<Value> coords;
coords.reserve(dimRank);
loopEmitter.getCoordinateArray(coords);
+ if (op.getOrder()) {
+ SmallVector<Value> tmp = coords; // keep a copy
+ for (Dimension d = 0; d < dimRank; d++) {
+ auto l = op.getOrder()->getDimPosition(d);
+ coords[l] = tmp[d];
+ }
+ }
Value vals = loopEmitter.getValBuffer()[0];
Value pidx = loopEmitter.getPidxs()[0].back();
// Loads the value from sparse tensor using pointer index;
diff --git a/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir b/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir
index 2646b2db71148..92f63767a436f 100644
--- a/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir
+++ b/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir
@@ -183,30 +183,17 @@ func.func @sparse_constant() -> tensor<8x7xf32, #CSR>{
return %1 : tensor<8x7xf32, #CSR>
}
-// CHECK-RWT-LABEL: func.func @sparse_constant_csc() -> tensor<8x7xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ], dimOrdering = affine_map<(d0, d1) -> (d1, d0)> }>> {
-// CHECK-RWT: %[[F0:.*]] = arith.constant sparse<{{\[\[}}0, 0], [1, 6]], [1.000000e+00, 5.000000e+00]> : tensor<8x7xf32>
-// CHECK-RWT: %[[T0:.*]] = bufferization.alloc_tensor()
-// CHECK-RWT: %[[T1:.*]] = sparse_tensor.foreach in %[[F0]] init(%[[T0]])
-// CHECK-RWT: ^bb0(%[[L0I0:.*]]: index, %[[L0I1:.*]]: index, %[[L0V:.*]]: f32, %[[L0T:.*]]: tensor
-// CHECK-RWT: %[[L0T2:.*]] = sparse_tensor.insert %[[L0V]] into %[[L0T]]{{\[}}%[[L0I1]], %[[L0I0]]]
-// CHECK-RWT: sparse_tensor.yield %[[L0T2]]
-// CHECK-RWT: }
-// CHECK-RWT: %[[COO:.*]] = sparse_tensor.load %[[T1]] hasInserts
-// CHECK-RWT: %[[NNZ:.*]] = sparse_tensor.number_of_entries %[[COO]]
-// CHECK-RWT: %[[V:.*]] = sparse_tensor.values %[[COO]]
-// CHECK-RWT: %[[I:.*]] = sparse_tensor.indices_buffer %[[COO]]
-// CHECK-RWT: sparse_tensor.sort_coo hybrid_quick_sort %[[NNZ]], %[[I]] jointly %[[V]] {nx = 2 : index, ny = 0 : index}
-// CHECK-RWT: %[[T3:.*]] = bufferization.alloc_tensor()
-// CHECK-RWT: %[[T4:.*]] = sparse_tensor.foreach in %[[COO]] init(%[[T3]])
-// CHECK-RWT: ^bb0(%[[L1I0:.*]]: index, %[[L1I1:.*]]: index, %[[L1V:.*]]: f32, %[[L1T:.*]]: tensor
-// CHECK-RWT: %[[L1T2:.*]] = sparse_tensor.insert %[[L1V]] into %[[L1T]]{{\[}}%[[L1I1]], %[[L1I0]]]
-// CHECK-RWT: sparse_tensor.yield %[[L1T2]]
-// CHECK-RWT: }
-// CHECK-RWT: %[[T5:.*]] = sparse_tensor.load %[[T4]] hasInserts
-// CHECK-RWT: %[[T6:.*]] = sparse_tensor.convert %[[T5]]
-// CHECK-RWT: bufferization.dealloc_tensor %[[COO]]
-// CHECK-RWT: return %[[T6]]
-// CHECK-RWT: }
+// CHECK-RWT-LABEL: func.func @sparse_constant_csc() -> tensor<8x7xf32,
+// CHECK-RWT: %[[VAL_0:.*]] = arith.constant sparse<{{\[\[}}0, 0], [1, 6]], [1.000000e+00, 5.000000e+00]> : tensor<8x7xf32>
+// CHECK-RWT: %[[VAL_1:.*]] = bufferization.alloc_tensor() :
+// CHECK-RWT: %[[VAL_2:.*]] = sparse_tensor.foreach in %[[VAL_0]] init(%[[VAL_1]]) {order = #map} : tensor<8x7xf32>,
+// CHECK-RWT: ^bb0(%[[VAL_3:.*]]: index, %[[VAL_4:.*]]: index, %[[VAL_5:.*]]: f32, %[[VAL_6:.*]]: tensor
+// CHECK-RWT: %[[VAL_7:.*]] = sparse_tensor.insert %[[VAL_5]] into %[[VAL_6]]{{\[}}%[[VAL_4]], %[[VAL_3]]] :
+// CHECK-RWT: sparse_tensor.yield %[[VAL_7]] :
+// CHECK-RWT: }
+// CHECK-RWT: %[[VAL_8:.*]] = sparse_tensor.load %[[VAL_9:.*]] hasInserts :
+// CHECK-RWT: return %[[VAL_8]] :
+// CHECK-RWT: }
func.func @sparse_constant_csc() -> tensor<8x7xf32, #CSC>{
// Initialize a tensor.
%0 = arith.constant sparse<[[0, 0], [1, 6]], [1.0, 5.0]> : tensor<8x7xf32>
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_ptr.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_ptr.mlir
index 662cdf8907682..e05fcd40e472f 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_ptr.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_ptr.mlir
@@ -90,7 +90,7 @@ module {
%c1 = arith.constant 1 : index
%t1 = arith.constant sparse<
[ [0,0], [0,1], [0,63], [1,0], [1,1], [31,0], [31,63] ],
- [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0 ]> : tensor<32x64xf64>
+ [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0 ]> : tensor<32x64xf64>
%t2 = tensor.cast %t1 : tensor<32x64xf64> to tensor<?x?xf64>
// Dense to sparse.
More information about the Mlir-commits
mailing list