[Mlir-commits] [mlir] c248219 - [mlir][sparse] Implements concatenate operation for sparse tensor
Peiming Liu
llvmlistbot at llvm.org
Tue Aug 16 13:47:52 PDT 2022
Author: Peiming Liu
Date: 2022-08-16T20:47:47Z
New Revision: c248219b09c1e724468d4603f647466b3e282330
URL: https://github.com/llvm/llvm-project/commit/c248219b09c1e724468d4603f647466b3e282330
DIFF: https://github.com/llvm/llvm-project/commit/c248219b09c1e724468d4603f647466b3e282330.diff
LOG: [mlir][sparse] Implements concatenate operation for sparse tensor
This patch implements the conversion rule for operation introduced in https://reviews.llvm.org/D131200.
Also contains integration test for correctness
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D131200
Added:
mlir/test/Dialect/SparseTensor/sparse_concat.mlir
mlir/test/Integration/Dialect/SparseTensor/CPU/concatenate.mlir
Modified:
mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
index dca1c52413724..d949cf6006a0a 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
@@ -117,6 +117,26 @@ static Value genNewCall(OpBuilder &builder, Operation *op,
.getResult(0);
}
+/// Compute the size from type (for static sizes) or from an already-converted
+/// opaque pointer source (for dynamic sizes) at the given dimension.
+static Value sizeFromPtrAtDim(OpBuilder &builder, Operation *op,
+ SparseTensorEncodingAttr &enc, ShapedType stp,
+ Value src, unsigned dim) {
+ auto shape = stp.getShape();
+ if (shape[dim] == ShapedType::kDynamicSize)
+ return genDimSizeCall(builder, op, enc, src, dim);
+ return constantIndex(builder, op->getLoc(), shape[dim]);
+}
+
+/// Populates given sizes array from type (for static sizes) and from
+/// an already-converted opaque pointer source (for dynamic sizes).
+static void sizesFromPtr(OpBuilder &builder, SmallVector<Value, 4> &sizes,
+ Operation *op, SparseTensorEncodingAttr &enc,
+ ShapedType stp, Value src) {
+ for (unsigned i = 0, rank = stp.getRank(); i < rank; i++)
+ sizes.push_back(sizeFromPtrAtDim(builder, op, enc, stp, src, i));
+}
+
/// Populates given sizes array from type.
static void sizesFromType(OpBuilder &builder, SmallVector<Value, 4> &sizes,
Location loc, ShapedType stp) {
@@ -135,18 +155,42 @@ static void sizesFromSrc(OpBuilder &builder, SmallVector<Value, 4> &sizes,
sizes.push_back(linalg::createOrFoldDimOp(builder, loc, src, i));
}
-/// Populates given sizes array from type (for static sizes) and from
-/// an already converted into opague pointer source (for dynamic sizes).
-static void sizesFromPtr(OpBuilder &builder, SmallVector<Value, 4> &sizes,
- Operation *op, SparseTensorEncodingAttr &enc,
- ShapedType stp, Value src) {
+/// Populates the given sizes array for concatenation from type (for static
+/// sizes) and from an already-converted opaque pointer source (for dynamic
+/// sizes).
+static void concatSizesFromInputs(OpBuilder &builder,
+ SmallVector<Value, 4> &sizes, Operation *op,
+ ShapedType dstTp, ValueRange srcs,
+ unsigned dim) {
Location loc = op->getLoc();
- auto shape = stp.getShape();
- for (unsigned i = 0, rank = stp.getRank(); i < rank; i++)
- if (shape[i] == ShapedType::kDynamicSize)
- sizes.push_back(genDimSizeCall(builder, op, enc, src, i));
- else
- sizes.push_back(constantIndex(builder, loc, shape[i]));
+ auto dstShape = dstTp.getShape();
+
+ auto srcTp = srcs[0].getType().cast<ShapedType>();
+ auto srcEnc = getSparseTensorEncoding(srcTp);
+ // We first fills the sizes from an input tensor, and then
+ // compute the size of the concatenation dimension if necessary.
+ if (srcEnc)
+ // Reuses sizes from an arbitrary input tensor is fine.
+ sizesFromPtr(builder, sizes, op, srcEnc, srcTp, srcs[0]);
+ else
+ sizesFromSrc(builder, sizes, loc, srcs[0]);
+
+ // Sum up on the `dim` if the dimension is dynamic.
+ if (dstShape[dim] != ShapedType::kDynamicSize) {
+ // Faithfully take the static size.
+ sizes[dim] = constantIndex(builder, loc, dstShape[dim]);
+ } else {
+ // Else, compute the shape dynamically.
+ for (size_t i = 1, sz = srcs.size(); i < sz; i++) {
+ auto srcTp = srcs[i].getType().cast<ShapedType>();
+ auto encSrc = getSparseTensorEncoding(srcTp);
+ Value srcSz =
+ encSrc ? sizeFromPtrAtDim(builder, op, encSrc, srcTp, srcs[i], dim)
+ : linalg::createOrFoldDimOp(builder, loc, srcs[i], dim);
+ // Sum up all the sizes.
+ sizes[dim] = builder.create<arith::AddIOp>(loc, sizes[dim], srcSz);
+ }
+ }
}
/// Generates an uninitialized temporary buffer of the given size and
@@ -234,6 +278,20 @@ static void newParams(OpBuilder &builder, SmallVector<Value, 8> ¶ms,
params.push_back(ptr);
}
+/// Generates the code to read the value from tensor[ivs].The generated code
+/// looks like the following and the insertion point after this routine is
+/// inside the if-then branch behind the assignment to ind.
+/// if (tensor[ivs] != 0)
+/// insert_point
+static Value genValueForDense(OpBuilder &builder, Location loc, Value tensor,
+ ValueRange ivs) {
+ Value val = builder.create<tensor::ExtractOp>(loc, tensor, ivs);
+ Value cond = genIsNonzero(builder, loc, val);
+ scf::IfOp ifOp = builder.create<scf::IfOp>(loc, cond, /*else*/ false);
+ builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+ return val;
+}
+
/// Generates the code to read the value from tensor[ivs], and conditionally
/// stores the indices ivs to the memory in ind. The generated code looks like
/// the following and the insertion point after this routine is inside the
@@ -243,10 +301,7 @@ static void newParams(OpBuilder &builder, SmallVector<Value, 8> ¶ms,
/// ind = ivs
static Value genIndexAndValueForDense(OpBuilder &builder, Location loc,
Value tensor, Value ind, ValueRange ivs) {
- Value val = builder.create<tensor::ExtractOp>(loc, tensor, ivs);
- Value cond = genIsNonzero(builder, loc, val);
- scf::IfOp ifOp = builder.create<scf::IfOp>(loc, cond, /*else*/ false);
- builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+ Value val = genValueForDense(builder, loc, tensor, ivs);
unsigned i = 0;
for (auto iv : ivs) {
Value idx = constantIndex(builder, loc, i++);
@@ -346,18 +401,43 @@ static void deallocDenseTensor(OpBuilder &builder, Location loc, Value buffer) {
builder.create<memref::DeallocOp>(loc, buffer);
}
-/// Inserts the element returned by genGetNextCall(_, ind, elemPtr) into
-/// the tensor created by allocDenseTensor(). The `rank` is the rank
-/// of the `tensor` and the length of `ind`.
-static void insertScalarIntoDenseTensor(OpBuilder &builder, Location loc,
- Value elemPtr, Value tensor,
- unsigned rank, Value ind) {
+/// Converts a pointer to COO (from calls to iter->next()) into a vector of
+/// indices, apply (optional) `offset` on `offsetDim`.
+static SmallVector<Value, 4> loadIndices(OpBuilder &builder, Location loc,
+ unsigned rank, Value ind,
+ unsigned offsetDim = 0,
+ Value offset = Value()) {
SmallVector<Value, 4> ivs;
ivs.reserve(rank);
for (unsigned i = 0; i < rank; i++) {
Value idx = constantIndex(builder, loc, i);
- ivs.push_back(builder.create<memref::LoadOp>(loc, ind, idx));
+ idx = builder.create<memref::LoadOp>(loc, ind, idx);
+ if (offsetDim == i && offset)
+ idx = builder.create<arith::AddIOp>(loc, idx, offset);
+ ivs.push_back(idx);
+ }
+ return ivs;
+}
+
+/// Converts the vector indices and store it into the memory pointed by
+/// `ind`, apply (optional) `offset` on `offsetDim`.
+static void storeIndices(OpBuilder &builder, Location loc, unsigned rank,
+ Value ind, ValueRange ivs, unsigned offsetDim = 0,
+ Value offset = Value()) {
+ for (unsigned i = 0; i < rank; i++) {
+ Value idx = ivs[i];
+ if (offsetDim == i && offset)
+ idx = builder.create<arith::AddIOp>(loc, idx, offset);
+ builder.create<memref::StoreOp>(loc, idx, ind,
+ constantIndex(builder, loc, i));
}
+}
+
+/// Inserts a value stored in `elemPtr` into a dense tensor created by
+/// allocDenseTensor().
+static void insertScalarIntoDenseTensor(OpBuilder &builder, Location loc,
+ Value elemPtr, Value tensor,
+ ValueRange ivs) {
Value elemV = builder.create<memref::LoadOp>(loc, elemPtr);
builder.create<memref::StoreOp>(loc, elemV, tensor, ivs);
}
@@ -510,6 +590,100 @@ genSparse2SparseReshape(Operation *op, ConversionPatternRewriter &rewriter,
return success();
}
+// Generates a while loop that iterates over the COO list extracted
+// from `t`, using `bodyBuilder` to build the loop body.
+// while (elem = coo->getNext()) {
+// bodyBuilder
+// }
+// TODO: Get rid of Operation *op in the parameters list! It seems
+// that we only use it for op->getLoc(), pass the loc directly instead!
+// TODO: It can be used by other operators (ReshapeOp, ConvertOP) conversion to
+// reduce code repetition!
+static void genSparseCOOIterationLoop(
+ ConversionPatternRewriter &rewriter, Operation *op, Value t,
+ RankedTensorType tensorTp,
+ function_ref<void(OpBuilder &, Location, Value, Value)> bodyBuilder) {
+ Location loc = op->getLoc();
+ auto enc = getSparseTensorEncoding(tensorTp);
+ assert(enc && "Generating Sparse Tensor COO Loop on a Dense Tensor!");
+
+ unsigned rank = tensorTp.getRank();
+ Type elemTp = tensorTp.getElementType();
+
+ // Start an iterator over the tensor (in original index order).
+ auto noPerm = SparseTensorEncodingAttr::get(
+ rewriter.getContext(), enc.getDimLevelType(), AffineMap(),
+ enc.getPointerBitWidth(), enc.getIndexBitWidth());
+ SmallVector<Value, 4> sizes;
+ SmallVector<Value, 8> params;
+ sizesFromPtr(rewriter, sizes, op, noPerm, tensorTp, t);
+ newParams(rewriter, params, op, tensorTp, noPerm, Action::kToIterator, sizes,
+ t);
+ Value iter = genNewCall(rewriter, op, params);
+
+ // Construct a while loop over the iterator.
+ Value srcIdx = genAlloca(rewriter, loc, rank, rewriter.getIndexType());
+ Value elemPtr = genAllocaScalar(rewriter, loc, elemTp);
+ SmallVector<Value> noArgs;
+ SmallVector<Type> noTypes;
+ auto whileOp = rewriter.create<scf::WhileOp>(loc, noTypes, noArgs);
+ Block *before = rewriter.createBlock(&whileOp.getBefore(), {}, noTypes);
+ rewriter.setInsertionPointToEnd(before);
+ Value cond = genGetNextCall(rewriter, op, iter, srcIdx, elemPtr);
+ rewriter.create<scf::ConditionOp>(loc, cond, before->getArguments());
+ Block *after = rewriter.createBlock(&whileOp.getAfter(), {}, noTypes);
+ rewriter.setInsertionPointToStart(after);
+ // Callback here to build loop body.
+ bodyBuilder(rewriter, loc, srcIdx, elemPtr);
+ rewriter.create<scf::YieldOp>(loc);
+ // Finish generating loop.
+ rewriter.setInsertionPointAfter(whileOp);
+
+ // Free memory for iterator.
+ genDelCOOCall(rewriter, op, elemTp, iter);
+}
+
+// Generate loop that iterates over a dense tensor.
+// for i1 in dim1
+// ..
+// for ik in dimk
+// val = a[i1,..,ik]
+// if val != 0
+// bodyBuilder(v, [i1, ..., ik])
+// TODO: It can be used by other operators (ReshapeOp, ConvertOP) conversion to
+// reduce code repetition!
+static void genDenseTensorIterationLoop(
+ ConversionPatternRewriter &rewriter, Operation *op, Value t,
+ RankedTensorType tensorTp,
+ function_ref<void(OpBuilder &, Location, ValueRange)> bodyBuilder) {
+ Location loc = op->getLoc();
+ auto enc = getSparseTensorEncoding(tensorTp);
+ assert(!enc && "Generating Densor Tensor Loop on a Sparse Tensor!");
+
+ unsigned rank = tensorTp.getRank();
+ Value zero = constantIndex(rewriter, loc, 0);
+ Value one = constantIndex(rewriter, loc, 1);
+
+ SmallVector<Value> lo;
+ SmallVector<Value> hi;
+ SmallVector<Value> st;
+
+ // Fill out loop iteration information.
+ for (unsigned i = 0; i < rank; i++) {
+ lo.push_back(zero);
+ hi.push_back(linalg::createOrFoldDimOp(rewriter, loc, t, i));
+ st.push_back(one);
+ }
+
+ scf::buildLoopNest(rewriter, op->getLoc(), lo, hi, st, {},
+ [&](OpBuilder &builder, Location loc, ValueRange ivs,
+ ValueRange args) -> scf::ValueVector {
+ // Invoke callback to build the body of the loop.
+ bodyBuilder(builder, loc, ivs);
+ return {};
+ });
+}
+
//===----------------------------------------------------------------------===//
// Conversion rules.
//===----------------------------------------------------------------------===//
@@ -760,7 +934,8 @@ class SparseTensorConvertConverter : public OpConversionPattern<ConvertOp> {
rewriter.create<scf::ConditionOp>(loc, cond, before->getArguments());
Block *after = rewriter.createBlock(&whileOp.getAfter(), {}, noTypes);
rewriter.setInsertionPointToStart(after);
- insertScalarIntoDenseTensor(rewriter, loc, elemPtr, dst, rank, ind);
+ SmallVector<Value, 4> ivs = loadIndices(rewriter, loc, rank, ind);
+ insertScalarIntoDenseTensor(rewriter, loc, elemPtr, dst, ivs);
rewriter.create<scf::YieldOp>(loc);
rewriter.setInsertionPointAfter(whileOp);
genDelCOOCall(rewriter, op, elemTp, iter);
@@ -1043,6 +1218,139 @@ class SparseTensorCompressConverter : public OpConversionPattern<CompressOp> {
}
};
+/// Sparse conversion rule for the concatenate operator.
+class SparseTensorConcatConverter : public OpConversionPattern<ConcatenateOp> {
+public:
+ using OpConversionPattern::OpConversionPattern;
+ LogicalResult
+ matchAndRewrite(ConcatenateOp op, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ // The conversion works as follow:
+ // (1). When output is sparse, and mix of inputs:
+ // a_sparse = concat (b_dense, c_sparse, ....)
+ // =>
+ // coo_for_a = newSparseCOO(shapeOf(a))
+ // for i, j, k // dense input
+ // coo->add(adjustForOffset(i,j,k), b[i,j,k])
+ //
+ // for elem in sparse_input
+ // coo->add(adjustForOffset(elem.indices), elem.value)
+ // ...
+ // a = newSparseTensor(coo_for_a)
+ // return a
+ //
+ // (2). When output is dense, and mix of inputs:
+ // a_dense = concat (b_dense, c_sparse, ....)
+ // =>
+ // a = malloc(shapeOf(a))
+ // for i, j, k // dense input
+ // a[ adjustForOffset(i,j,k) ] = b[i,j,k]
+ //
+ // for elem in sparse_input
+ // a[ adjustForOffset(elem.indices) ] = elem.value
+ // return a
+ Location loc = op.getLoc();
+ auto dstTp = op.getType().cast<RankedTensorType>();
+ auto encDst = getSparseTensorEncoding(dstTp);
+ Type elemTp = dstTp.getElementType();
+ uint64_t concatDim = op.getDimension().getZExtValue();
+ unsigned rank = dstTp.getRank();
+
+ Value dst; // destination tensor
+ Value dstPerm; // destination tensor permutation (if sparse out)
+ // A pointer to the value being inserted (if dense => sparse)
+ Value elemPtr;
+ // Memory that holds the COO for destination tensor (if sparse out)
+ Value dstIdx;
+ // The offset applied to the dimenstion to be concated (starting from 0)
+ Value offset = constantIndex(rewriter, loc, 0);
+
+ SmallVector<Value, 4> sizes;
+ SmallVector<Value, 8> params;
+ concatSizesFromInputs(rewriter, sizes, op, dstTp, op.getInputs(),
+ concatDim);
+
+ if (encDst) {
+ // Start a new COO for the destination tensor.
+ newParams(rewriter, params, op, dstTp, encDst, Action::kEmptyCOO, sizes);
+ dst = genNewCall(rewriter, op, params);
+ dstPerm = params[2];
+ elemPtr = genAllocaScalar(rewriter, loc, elemTp);
+ dstIdx = genAlloca(rewriter, loc, rank, rewriter.getIndexType());
+ } else {
+ // TODO: Dense buffers should be allocated/deallocated via the callback
+ // in BufferizationOptions.
+ dst = allocDenseTensor(rewriter, loc, dstTp, sizes);
+ }
+ for (auto it : llvm::zip(op.getInputs(), adaptor.getInputs())) {
+ Value orignalOp = std::get<0>(it); // Input (with encoding) from Op
+ Value adaptedOp = std::get<1>(it); // Input (type converted) from adaptor
+ RankedTensorType srcTp = orignalOp.getType().cast<RankedTensorType>();
+ auto encSrc = getSparseTensorEncoding(srcTp);
+ if (encSrc) {
+ genSparseCOOIterationLoop(
+ rewriter, op, adaptedOp, srcTp,
+ [&](OpBuilder &builder, Location loc, Value idx,
+ Value elemPtr) -> void {
+ auto indVec =
+ loadIndices(builder, loc, rank, idx, concatDim, offset);
+ if (encDst) {
+ // Case: sparse => sparse
+ storeIndices(builder, loc, rank, dstIdx, indVec);
+ genAddEltCall(builder, op, elemTp, dst, elemPtr, dstIdx,
+ dstPerm);
+ } else {
+ // Case: sparse => dense
+ insertScalarIntoDenseTensor(builder, loc, elemPtr, dst, indVec);
+ }
+ });
+ } else {
+ genDenseTensorIterationLoop(
+ rewriter, op, adaptedOp, srcTp,
+ [&](OpBuilder &builder, Location loc, ValueRange idx) -> void {
+ if (encDst) {
+ // Case: dense => sparse
+ storeIndices(builder, loc, rank, dstIdx, idx, concatDim,
+ offset);
+ Value val = genValueForDense(builder, loc, adaptedOp, idx);
+ builder.create<memref::StoreOp>(loc, val, elemPtr);
+ genAddEltCall(builder, op, elemTp, dst, elemPtr, dstIdx,
+ dstPerm);
+ } else {
+ // Case: dense => dense
+ Value val = genValueForDense(builder, loc, adaptedOp, idx);
+ SmallVector<Value, 4> indVec(idx);
+ // Apply offset.
+ indVec[concatDim] = builder.create<arith::AddIOp>(
+ loc, indVec[concatDim], offset);
+ builder.create<memref::StoreOp>(loc, val, dst, indVec);
+ }
+ });
+ }
+ // Accumulate offset.
+ // TODO: avoid calling sparseDimSize multiple times by caching the result!
+ Value curDim = encSrc ? sizeFromPtrAtDim(rewriter, op, encSrc, srcTp,
+ adaptedOp, concatDim)
+ : linalg::createOrFoldDimOp(rewriter, loc,
+ adaptedOp, concatDim);
+
+ offset = rewriter.create<arith::AddIOp>(loc, offset, curDim);
+ }
+ if (encDst) {
+ params[6] = constantAction(rewriter, loc, Action::kFromCOO);
+ // In sparse output case, the destination holds the COO.
+ Value coo = dst;
+ params[7] = coo;
+ dst = genNewCall(rewriter, op, params);
+ // Release resources.
+ genDelCOOCall(rewriter, op, elemTp, coo);
+ rewriter.replaceOp(op, dst);
+ } else {
+ rewriter.replaceOpWithNewOp<bufferization::ToTensorOp>(op, dstTp, dst);
+ }
+ return success();
+ }
+};
/// Sparse conversion rule for the output operator.
class SparseTensorOutConverter : public OpConversionPattern<OutOp> {
public:
@@ -1099,12 +1407,13 @@ void mlir::populateSparseTensorConversionPatterns(
SparseCastConverter, SparseTensorNewConverter,
SparseReshapeConverter<tensor::ExpandShapeOp>,
SparseReshapeConverter<tensor::CollapseShapeOp>,
- SparseTensorAllocConverter, SparseTensorDeallocConverter,
- SparseTensorToPointersConverter, SparseTensorToIndicesConverter,
- SparseTensorToValuesConverter, SparseTensorLoadConverter,
- SparseTensorLexInsertConverter, SparseTensorExpandConverter,
- SparseTensorCompressConverter, SparseTensorOutConverter>(
- typeConverter, patterns.getContext());
+ SparseTensorConcatConverter, SparseTensorAllocConverter,
+ SparseTensorDeallocConverter, SparseTensorToPointersConverter,
+ SparseTensorToIndicesConverter, SparseTensorToValuesConverter,
+ SparseTensorLoadConverter, SparseTensorLexInsertConverter,
+ SparseTensorExpandConverter, SparseTensorCompressConverter,
+ SparseTensorOutConverter>(typeConverter, patterns.getContext());
+
patterns.add<SparseTensorConvertConverter>(typeConverter,
patterns.getContext(), options);
}
diff --git a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir
new file mode 100644
index 0000000000000..4bdb5dd8a711f
--- /dev/null
+++ b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir
@@ -0,0 +1,360 @@
+// RUN: mlir-opt %s --sparse-tensor-conversion --canonicalize --cse | FileCheck %s
+
+#SparseMatrix = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}>
+
+#SparseMatrix_P = #sparse_tensor.encoding<{
+ dimLevelType = [ "compressed", "compressed" ],
+ dimOrdering = affine_map<(i,j) -> (j,i)>
+}>
+
+// CHECK-LABEL: func.func @concat_mix_dense(
+// CHECK-SAME: %[[TMP_arg0:.*]]: tensor<2x4xf64>,
+// CHECK-SAME: %[[TMP_arg1:.*]]: !llvm.ptr<i8>)
+// CHECK-DAG: %[[TMP_c2:.*]] = arith.constant 2 : index
+// CHECK-DAG: %[[TMP_c6_i32:.*]] = arith.constant 6 : i32
+// CHECK-DAG: %[[TMP_c1_i32:.*]] = arith.constant 1 : i32
+// CHECK-DAG: %[[TMP_c0_i32:.*]] = arith.constant 0 : i32
+// CHECK-DAG: %[[TMP_c1_i8:.*]] = arith.constant 1 : i8
+// CHECK-DAG: %[[TMP_c3:.*]] = arith.constant 3 : index
+// CHECK-DAG: %[[TMP_c1:.*]] = arith.constant 1 : index
+// CHECK-DAG: %[[TMP_cst:.*]] = arith.constant 0.000000e+00 : f64
+// CHECK-DAG: %[[TMP_c0:.*]] = arith.constant 0 : index
+// CHECK-DAG: %[[TMP_c4:.*]] = arith.constant 4 : index
+// CHECK: %[[TMP_0:.*]] = memref.alloc() : memref<5x4xf64>
+// CHECK: linalg.fill ins(%[[TMP_cst]] : f64) outs(%[[TMP_0]] : memref<5x4xf64>)
+// CHECK: scf.for %[[TMP_arg2:.*]] = %[[TMP_c0]] to %[[TMP_c2]] step %[[TMP_c1]] {
+// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_c0]] to %[[TMP_c4]] step %[[TMP_c1]] {
+// CHECK: %[[TMP_12:.*]] = tensor.extract %[[TMP_arg0]][%[[TMP_arg2]], %[[TMP_arg3]]] : tensor<2x4xf64>
+// CHECK: %[[TMP_13:.*]] = arith.cmpf une, %[[TMP_12]], %[[TMP_cst]] : f64
+// CHECK: scf.if %[[TMP_13]] {
+// CHECK: memref.store %[[TMP_12]], %[[TMP_0]][%[[TMP_arg2]], %[[TMP_arg3]]] : memref<5x4xf64>
+// CHECK: }
+// CHECK: }
+// CHECK: }
+// CHECK: %[[TMP_1:.*]] = memref.alloca() : memref<2xi8>
+// CHECK: %[[TMP_2:.*]] = memref.cast %[[TMP_1]] : memref<2xi8> to memref<?xi8>
+// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_1]][%[[TMP_c0]]] : memref<2xi8>
+// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_1]][%[[TMP_c1]]] : memref<2xi8>
+// CHECK: %[[TMP_3:.*]] = memref.alloca() : memref<2xindex>
+// CHECK: %[[TMP_4:.*]] = memref.cast %[[TMP_3]] : memref<2xindex> to memref<?xindex>
+// CHECK: memref.store %[[TMP_c3]], %[[TMP_3]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK: memref.store %[[TMP_c4]], %[[TMP_3]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK: %[[TMP_5:.*]] = memref.alloca() : memref<2xindex>
+// CHECK: %[[TMP_6:.*]] = memref.cast %[[TMP_5]] : memref<2xindex> to memref<?xindex>
+// CHECK: memref.store %[[TMP_c0]], %[[TMP_5]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK: memref.store %[[TMP_c1]], %[[TMP_5]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK: %[[TMP_7:.*]] = call @newSparseTensor(%[[TMP_2]], %[[TMP_4]], %[[TMP_6]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]]) : (memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+// CHECK: %[[TMP_8:.*]] = memref.alloca() : memref<2xindex>
+// CHECK: %[[TMP_9:.*]] = memref.cast %[[TMP_8]] : memref<2xindex> to memref<?xindex>
+// CHECK: %[[TMP_10:.*]] = memref.alloca() : memref<f64>
+// CHECK: scf.while : () -> () {
+// CHECK: %[[TMP_12:.*]] = func.call @getNextF64(%[[TMP_7]], %[[TMP_9]], %[[TMP_10]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<f64>) -> i1
+// CHECK: scf.condition(%[[TMP_12]])
+// CHECK: } do {
+// CHECK: %[[TMP_12:.*]] = memref.load %[[TMP_8]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK: %[[TMP_13:.*]] = arith.addi %[[TMP_12]], %[[TMP_c2]] : index
+// CHECK: %[[TMP_14:.*]] = memref.load %[[TMP_8]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK: %[[TMP_15:.*]] = memref.load %[[TMP_10]][] : memref<f64>
+// CHECK: memref.store %[[TMP_15]], %[[TMP_0]][%[[TMP_13]], %[[TMP_14]]] : memref<5x4xf64>
+// CHECK: scf.yield
+// CHECK: }
+// CHECK: call @delSparseTensorCOOF64(%[[TMP_7]]) : (!llvm.ptr<i8>) -> ()
+// CHECK: %[[TMP_11:.*]] = bufferization.to_tensor %[[TMP_0]] : memref<5x4xf64>
+// CHECK: return %[[TMP_11]] : tensor<5x4xf64>
+// CHECK: }
+func.func @concat_mix_dense(%arg0: tensor<2x4xf64>, %arg1: tensor<3x4xf64, #SparseMatrix>) -> tensor<5x4xf64> {
+ %0 = sparse_tensor.concatenate %arg0, %arg1 {dimension = 0 : index}
+ : tensor<2x4xf64>, tensor<3x4xf64, #SparseMatrix> to tensor<5x4xf64>
+ return %0 : tensor<5x4xf64>
+}
+
+// CHECK-LABEL: func.func @concat_mix_sparse(
+// CHECK-SAME: %[[TMP_arg0:.*]]: tensor<2x4xf64>,
+// CHECK-SAME: %[[TMP_arg1:.*]]: !llvm.ptr<i8>)
+// CHECK-DAG: %[[TMP_c2:.*]] = arith.constant 2 : index
+// CHECK-DAG: %[[TMP_c2_i32:.*]] = arith.constant 2 : i32
+// CHECK-DAG: %[[TMP_c6_i32:.*]] = arith.constant 6 : i32
+// CHECK-DAG: %[[TMP_c3:.*]] = arith.constant 3 : index
+// CHECK-DAG: %[[TMP_cst:.*]] = arith.constant 0.000000e+00 : f64
+// CHECK-DAG: %[[TMP_c4_i32:.*]] = arith.constant 4 : i32
+// CHECK-DAG: %[[TMP_c1_i32:.*]] = arith.constant 1 : i32
+// CHECK-DAG: %[[TMP_c0_i32:.*]] = arith.constant 0 : i32
+// CHECK-DAG: %[[TMP_c1:.*]] = arith.constant 1 : index
+// CHECK-DAG: %[[TMP_c0:.*]] = arith.constant 0 : index
+// CHECK-DAG: %[[TMP_c5:.*]] = arith.constant 5 : index
+// CHECK-DAG: %[[TMP_c4:.*]] = arith.constant 4 : index
+// CHECK-DAG: %[[TMP_c1_i8:.*]] = arith.constant 1 : i8
+// CHECK: %[[TMP_0:.*]] = memref.alloca() : memref<2xi8>
+// CHECK: %[[TMP_1:.*]] = memref.cast %[[TMP_0]] : memref<2xi8> to memref<?xi8>
+// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_0]][%[[TMP_c0]]] : memref<2xi8>
+// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_0]][%[[TMP_c1]]] : memref<2xi8>
+// CHECK: %[[TMP_2:.*]] = memref.alloca() : memref<2xindex>
+// CHECK: %[[TMP_3:.*]] = memref.cast %[[TMP_2]] : memref<2xindex> to memref<?xindex>
+// CHECK: memref.store %[[TMP_c5]], %[[TMP_2]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK: memref.store %[[TMP_c4]], %[[TMP_2]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK: %[[TMP_4:.*]] = memref.alloca() : memref<2xindex>
+// CHECK: %[[TMP_5:.*]] = memref.cast %[[TMP_4]] : memref<2xindex> to memref<?xindex>
+// CHECK: memref.store %[[TMP_c0]], %[[TMP_4]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK: memref.store %[[TMP_c1]], %[[TMP_4]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK: %[[TMP_6:.*]] = llvm.mlir.null : !llvm.ptr<i8>
+// CHECK: %[[TMP_7:.*]] = call @newSparseTensor(%[[TMP_1]], %[[TMP_3]], %[[TMP_5]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c4_i32]], %[[TMP_6]]) : (memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+// CHECK: %[[TMP_8:.*]] = memref.alloca() : memref<f64>
+// CHECK: %[[TMP_9:.*]] = memref.alloca() : memref<2xindex>
+// CHECK: %[[TMP_10:.*]] = memref.cast %[[TMP_9]] : memref<2xindex> to memref<?xindex>
+// CHECK: scf.for %[[TMP_arg2:.*]] = %[[TMP_c0]] to %[[TMP_c2]] step %[[TMP_c1]] {
+// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_c0]] to %[[TMP_c4]] step %[[TMP_c1]] {
+// CHECK: memref.store %[[TMP_arg2]], %[[TMP_9]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK: memref.store %[[TMP_arg3]], %[[TMP_9]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK: %[[TMP_22:.*]] = tensor.extract %[[TMP_arg0]][%[[TMP_arg2]], %[[TMP_arg3]]] : tensor<2x4xf64>
+// CHECK: %[[TMP_23:.*]] = arith.cmpf une, %[[TMP_22]], %[[TMP_cst]] : f64
+// CHECK: scf.if %[[TMP_23]] {
+// CHECK: memref.store %[[TMP_22]], %[[TMP_8]][] : memref<f64>
+// CHECK: %[[TMP_24:.*]] = func.call @addEltF64(%[[TMP_7]], %[[TMP_8]], %[[TMP_10]], %[[TMP_5]]) : (!llvm.ptr<i8>, memref<f64>, memref<?xindex>, memref<?xindex>) -> !llvm.ptr<i8>
+// CHECK: }
+// CHECK: }
+// CHECK: }
+// CHECK: %[[TMP_11:.*]] = memref.alloca() : memref<2xi8>
+// CHECK: %[[TMP_12:.*]] = memref.cast %[[TMP_11]] : memref<2xi8> to memref<?xi8>
+// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_11]][%[[TMP_c0]]] : memref<2xi8>
+// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_11]][%[[TMP_c1]]] : memref<2xi8>
+// CHECK: %[[TMP_13:.*]] = memref.alloca() : memref<2xindex>
+// CHECK: %[[TMP_14:.*]] = memref.cast %[[TMP_13]] : memref<2xindex> to memref<?xindex>
+// CHECK: memref.store %[[TMP_c3]], %[[TMP_13]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK: memref.store %[[TMP_c4]], %[[TMP_13]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK: %[[TMP_15:.*]] = memref.alloca() : memref<2xindex>
+// CHECK: %[[TMP_16:.*]] = memref.cast %[[TMP_15]] : memref<2xindex> to memref<?xindex>
+// CHECK: memref.store %[[TMP_c0]], %[[TMP_15]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK: memref.store %[[TMP_c1]], %[[TMP_15]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK: %[[TMP_17:.*]] = call @newSparseTensor(%[[TMP_12]], %[[TMP_14]], %[[TMP_16]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]]) : (memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+// CHECK: %[[TMP_18:.*]] = memref.alloca() : memref<2xindex>
+// CHECK: %[[TMP_19:.*]] = memref.cast %[[TMP_18]] : memref<2xindex> to memref<?xindex>
+// CHECK: %[[TMP_20:.*]] = memref.alloca() : memref<f64>
+// CHECK: scf.while : () -> () {
+// CHECK: %[[TMP_22:.*]] = func.call @getNextF64(%[[TMP_17]], %[[TMP_19]], %[[TMP_20]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<f64>) -> i1
+// CHECK: scf.condition(%[[TMP_22]])
+// CHECK: } do {
+// CHECK: %[[TMP_22:.*]] = memref.load %[[TMP_18]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK: %[[TMP_23:.*]] = arith.addi %[[TMP_22]], %[[TMP_c2]] : index
+// CHECK: %[[TMP_24:.*]] = memref.load %[[TMP_18]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK: memref.store %[[TMP_23]], %[[TMP_9]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK: memref.store %[[TMP_24]], %[[TMP_9]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK: %[[TMP_25:.*]] = func.call @addEltF64(%[[TMP_7]], %[[TMP_20]], %[[TMP_10]], %[[TMP_5]]) : (!llvm.ptr<i8>, memref<f64>, memref<?xindex>, memref<?xindex>) -> !llvm.ptr<i8>
+// CHECK: scf.yield
+// CHECK: }
+// CHECK: call @delSparseTensorCOOF64(%[[TMP_17]]) : (!llvm.ptr<i8>) -> ()
+// CHECK: %[[TMP_21:.*]] = call @newSparseTensor(%[[TMP_1]], %[[TMP_3]], %[[TMP_5]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c2_i32]], %[[TMP_7]]) : (memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+// CHECK: call @delSparseTensorCOOF64(%[[TMP_7]]) : (!llvm.ptr<i8>) -> ()
+// CHECK: return %[[TMP_21]] : !llvm.ptr<i8>
+// CHECK: }
+func.func @concat_mix_sparse(%arg0: tensor<2x4xf64>, %arg1: tensor<3x4xf64, #SparseMatrix>) -> tensor<5x4xf64, #SparseMatrix> {
+ %0 = sparse_tensor.concatenate %arg0, %arg1 {dimension = 0 : index}
+ : tensor<2x4xf64>, tensor<3x4xf64, #SparseMatrix> to tensor<5x4xf64, #SparseMatrix>
+ return %0 : tensor<5x4xf64, #SparseMatrix>
+}
+
+// CHECK-LABEL: func.func @concat_mix_sparse_perm_dim1(
+// CHECK-SAME: %[[TMP_arg0:.*]]: tensor<4x2xf64>,
+// CHECK-SAME: %[[TMP_arg1:.*]]: !llvm.ptr<i8>)
+// CHECK-DAG: %[[TMP_c2:.*]] = arith.constant 2 : index
+// CHECK-DAG: %[[TMP_c2_i32:.*]] = arith.constant 2 : i32
+// CHECK-DAG: %[[TMP_c6_i32:.*]] = arith.constant 6 : i32
+// CHECK-DAG: %[[TMP_c3:.*]] = arith.constant 3 : index
+// CHECK-DAG: %[[TMP_cst:.*]] = arith.constant 0.000000e+00 : f64
+// CHECK-DAG: %[[TMP_c4_i32:.*]] = arith.constant 4 : i32
+// CHECK-DAG: %[[TMP_c1_i32:.*]] = arith.constant 1 : i32
+// CHECK-DAG: %[[TMP_c0_i32:.*]] = arith.constant 0 : i32
+// CHECK-DAG: %[[TMP_c1:.*]] = arith.constant 1 : index
+// CHECK-DAG: %[[TMP_c0:.*]] = arith.constant 0 : index
+// CHECK-DAG: %[[TMP_c4:.*]] = arith.constant 4 : index
+// CHECK-DAG: %[[TMP_c5:.*]] = arith.constant 5 : index
+// CHECK-DAG: %[[TMP_c1_i8:.*]] = arith.constant 1 : i8
+// CHECK: %[[TMP_0:.*]] = memref.alloca() : memref<2xi8>
+// CHECK: %[[TMP_1:.*]] = memref.cast %[[TMP_0]] : memref<2xi8> to memref<?xi8>
+// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_0]][%[[TMP_c0]]] : memref<2xi8>
+// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_0]][%[[TMP_c1]]] : memref<2xi8>
+// CHECK: %[[TMP_2:.*]] = memref.alloca() : memref<2xindex>
+// CHECK: %[[TMP_3:.*]] = memref.cast %[[TMP_2]] : memref<2xindex> to memref<?xindex>
+// CHECK: memref.store %[[TMP_c4]], %[[TMP_2]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK: memref.store %[[TMP_c5]], %[[TMP_2]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK: %[[TMP_4:.*]] = memref.alloca() : memref<2xindex>
+// CHECK: %[[TMP_5:.*]] = memref.cast %[[TMP_4]] : memref<2xindex> to memref<?xindex>
+// CHECK: memref.store %[[TMP_c1]], %[[TMP_4]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK: memref.store %[[TMP_c0]], %[[TMP_4]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK: %[[TMP_6:.*]] = llvm.mlir.null : !llvm.ptr<i8>
+// CHECK: %[[TMP_7:.*]] = call @newSparseTensor(%[[TMP_1]], %[[TMP_3]], %[[TMP_5]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c4_i32]], %[[TMP_6]]) : (memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+// CHECK: %[[TMP_8:.*]] = memref.alloca() : memref<f64>
+// CHECK: %[[TMP_9:.*]] = memref.alloca() : memref<2xindex>
+// CHECK: %[[TMP_10:.*]] = memref.cast %[[TMP_9]] : memref<2xindex> to memref<?xindex>
+// CHECK: scf.for %[[TMP_arg2:.*]] = %[[TMP_c0]] to %[[TMP_c4]] step %[[TMP_c1]] {
+// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_c0]] to %[[TMP_c2]] step %[[TMP_c1]] {
+// CHECK: memref.store %[[TMP_arg2]], %[[TMP_9]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK: memref.store %[[TMP_arg3]], %[[TMP_9]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK: %[[TMP_22:.*]] = tensor.extract %[[TMP_arg0]][%[[TMP_arg2]], %[[TMP_arg3]]] : tensor<4x2xf64>
+// CHECK: %[[TMP_23:.*]] = arith.cmpf une, %[[TMP_22]], %[[TMP_cst]] : f64
+// CHECK: scf.if %[[TMP_23]] {
+// CHECK: memref.store %[[TMP_22]], %[[TMP_8]][] : memref<f64>
+// CHECK: %[[TMP_24:.*]] = func.call @addEltF64(%[[TMP_7]], %[[TMP_8]], %[[TMP_10]], %[[TMP_5]]) : (!llvm.ptr<i8>, memref<f64>, memref<?xindex>, memref<?xindex>) -> !llvm.ptr<i8>
+// CHECK: }
+// CHECK: }
+// CHECK: }
+// CHECK: %[[TMP_11:.*]] = memref.alloca() : memref<2xi8>
+// CHECK: %[[TMP_12:.*]] = memref.cast %[[TMP_11]] : memref<2xi8> to memref<?xi8>
+// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_11]][%[[TMP_c0]]] : memref<2xi8>
+// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_11]][%[[TMP_c1]]] : memref<2xi8>
+// CHECK: %[[TMP_13:.*]] = memref.alloca() : memref<2xindex>
+// CHECK: %[[TMP_14:.*]] = memref.cast %[[TMP_13]] : memref<2xindex> to memref<?xindex>
+// CHECK: memref.store %[[TMP_c4]], %[[TMP_13]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK: memref.store %[[TMP_c3]], %[[TMP_13]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK: %[[TMP_15:.*]] = memref.alloca() : memref<2xindex>
+// CHECK: %[[TMP_16:.*]] = memref.cast %[[TMP_15]] : memref<2xindex> to memref<?xindex>
+// CHECK: memref.store %[[TMP_c0]], %[[TMP_15]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK: memref.store %[[TMP_c1]], %[[TMP_15]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK: %[[TMP_17:.*]] = call @newSparseTensor(%[[TMP_12]], %[[TMP_14]], %[[TMP_16]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]]) : (memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+// CHECK: %[[TMP_18:.*]] = memref.alloca() : memref<2xindex>
+// CHECK: %[[TMP_19:.*]] = memref.cast %[[TMP_18]] : memref<2xindex> to memref<?xindex>
+// CHECK: %[[TMP_20:.*]] = memref.alloca() : memref<f64>
+// CHECK: scf.while : () -> () {
+// CHECK: %[[TMP_22:.*]] = func.call @getNextF64(%[[TMP_17]], %[[TMP_19]], %[[TMP_20]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<f64>) -> i1
+// CHECK: scf.condition(%[[TMP_22]])
+// CHECK: } do {
+// CHECK: %[[TMP_22:.*]] = memref.load %[[TMP_18]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK: %[[TMP_23:.*]] = memref.load %[[TMP_18]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK: %[[TMP_24:.*]] = arith.addi %[[TMP_23]], %[[TMP_c2]] : index
+// CHECK: memref.store %[[TMP_22]], %[[TMP_9]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK: memref.store %[[TMP_24]], %[[TMP_9]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK: %[[TMP_25:.*]] = func.call @addEltF64(%[[TMP_7]], %[[TMP_20]], %[[TMP_10]], %[[TMP_5]]) : (!llvm.ptr<i8>, memref<f64>, memref<?xindex>, memref<?xindex>) -> !llvm.ptr<i8>
+// CHECK: scf.yield
+// CHECK: }
+// CHECK: call @delSparseTensorCOOF64(%[[TMP_17]]) : (!llvm.ptr<i8>) -> ()
+// CHECK: %[[TMP_21:.*]] = call @newSparseTensor(%[[TMP_1]], %[[TMP_3]], %[[TMP_5]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c2_i32]], %[[TMP_7]]) : (memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+// CHECK: call @delSparseTensorCOOF64(%[[TMP_7]]) : (!llvm.ptr<i8>) -> ()
+// CHECK: return %[[TMP_21]] : !llvm.ptr<i8>
+// CHECK: }
+func.func @concat_mix_sparse_perm_dim1(%arg0: tensor<4x2xf64>, %arg1: tensor<4x3xf64, #SparseMatrix_P>) -> tensor<4x5xf64, #SparseMatrix_P> {
+ %0 = sparse_tensor.concatenate %arg0, %arg1 {dimension = 1 : index}
+ : tensor<4x2xf64>, tensor<4x3xf64, #SparseMatrix_P> to tensor<4x5xf64, #SparseMatrix_P>
+ return %0 : tensor<4x5xf64, #SparseMatrix_P>
+}
+
+// CHECK-LABEL: func.func @concat_mix_dense_perm_dim1(
+// CHECK-SAME: %[[TMP_arg0:.*]]: tensor<4x2xf64>,
+// CHECK-SAME: %[[TMP_arg1:.*]]: !llvm.ptr<i8>)
+// CHECK-DAG: %[[TMP_c2:.*]] = arith.constant 2 : index
+// CHECK-DAG: %[[TMP_c6_i32:.*]] = arith.constant 6 : i32
+// CHECK-DAG: %[[TMP_c1_i32:.*]] = arith.constant 1 : i32
+// CHECK-DAG: %[[TMP_c0_i32:.*]] = arith.constant 0 : i32
+// CHECK-DAG: %[[TMP_c1_i8:.*]] = arith.constant 1 : i8
+// CHECK-DAG: %[[TMP_c3:.*]] = arith.constant 3 : index
+// CHECK-DAG: %[[TMP_c1:.*]] = arith.constant 1 : index
+// CHECK-DAG: %[[TMP_cst:.*]] = arith.constant 0.000000e+00 : f64
+// CHECK-DAG: %[[TMP_c0:.*]] = arith.constant 0 : index
+// CHECK-DAG: %[[TMP_c4:.*]] = arith.constant 4 : index
+// CHECK: %[[TMP_0:.*]] = memref.alloc() : memref<4x5xf64>
+// CHECK: linalg.fill ins(%[[TMP_cst]] : f64) outs(%[[TMP_0]] : memref<4x5xf64>)
+// CHECK: scf.for %[[TMP_arg2:.*]] = %[[TMP_c0]] to %[[TMP_c4]] step %[[TMP_c1]] {
+// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_c0]] to %[[TMP_c2]] step %[[TMP_c1]] {
+// CHECK: %[[TMP_12:.*]] = tensor.extract %[[TMP_arg0]][%[[TMP_arg2]], %[[TMP_arg3]]] : tensor<4x2xf64>
+// CHECK: %[[TMP_13:.*]] = arith.cmpf une, %[[TMP_12]], %[[TMP_cst]] : f64
+// CHECK: scf.if %[[TMP_13]] {
+// CHECK: memref.store %[[TMP_12]], %[[TMP_0]][%[[TMP_arg2]], %[[TMP_arg3]]] : memref<4x5xf64>
+// CHECK: }
+// CHECK: }
+// CHECK: }
+// CHECK: %[[TMP_1:.*]] = memref.alloca() : memref<2xi8>
+// CHECK: %[[TMP_2:.*]] = memref.cast %[[TMP_1]] : memref<2xi8> to memref<?xi8>
+// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_1]][%[[TMP_c0]]] : memref<2xi8>
+// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_1]][%[[TMP_c1]]] : memref<2xi8>
+// CHECK: %[[TMP_3:.*]] = memref.alloca() : memref<2xindex>
+// CHECK: %[[TMP_4:.*]] = memref.cast %[[TMP_3]] : memref<2xindex> to memref<?xindex>
+// CHECK: memref.store %[[TMP_c4]], %[[TMP_3]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK: memref.store %[[TMP_c3]], %[[TMP_3]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK: %[[TMP_5:.*]] = memref.alloca() : memref<2xindex>
+// CHECK: %[[TMP_6:.*]] = memref.cast %[[TMP_5]] : memref<2xindex> to memref<?xindex>
+// CHECK: memref.store %[[TMP_c0]], %[[TMP_5]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK: memref.store %[[TMP_c1]], %[[TMP_5]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK: %[[TMP_7:.*]] = call @newSparseTensor(%[[TMP_2]], %[[TMP_4]], %[[TMP_6]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]]) : (memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+// CHECK: %[[TMP_8:.*]] = memref.alloca() : memref<2xindex>
+// CHECK: %[[TMP_9:.*]] = memref.cast %[[TMP_8]] : memref<2xindex> to memref<?xindex>
+// CHECK: %[[TMP_10:.*]] = memref.alloca() : memref<f64>
+// CHECK: scf.while : () -> () {
+// CHECK: %[[TMP_12:.*]] = func.call @getNextF64(%[[TMP_7]], %[[TMP_9]], %[[TMP_10]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<f64>) -> i1
+// CHECK: scf.condition(%[[TMP_12]])
+// CHECK: } do {
+// CHECK: %[[TMP_12:.*]] = memref.load %[[TMP_8]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK: %[[TMP_13:.*]] = memref.load %[[TMP_8]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK: %[[TMP_14:.*]] = arith.addi %[[TMP_13]], %[[TMP_c2]] : index
+// CHECK: %[[TMP_15:.*]] = memref.load %[[TMP_10]][] : memref<f64>
+// CHECK: memref.store %[[TMP_15]], %[[TMP_0]][%[[TMP_12]], %[[TMP_14]]] : memref<4x5xf64>
+// CHECK: scf.yield
+// CHECK: }
+// CHECK: call @delSparseTensorCOOF64(%[[TMP_7]]) : (!llvm.ptr<i8>) -> ()
+// CHECK: %[[TMP_11:.*]] = bufferization.to_tensor %[[TMP_0]] : memref<4x5xf64>
+// CHECK: return %[[TMP_11]] : tensor<4x5xf64>
+// CHECK: }
+func.func @concat_mix_dense_perm_dim1(%arg0: tensor<4x2xf64>, %arg1: tensor<4x3xf64, #SparseMatrix_P>) -> tensor<4x5xf64> {
+ %0 = sparse_tensor.concatenate %arg0, %arg1 {dimension = 1 : index}
+ : tensor<4x2xf64>, tensor<4x3xf64, #SparseMatrix_P> to tensor<4x5xf64>
+ return %0 : tensor<4x5xf64>
+}
+
+// CHECK-LABEL: func.func @concat_mix_dense_perm_dim1_dyn(
+// CHECK-SAME: %[[TMP_arg0:.*]]: tensor<3x2xf64>,
+// CHECK-SAME: %[[TMP_arg1:.*]]: !llvm.ptr<i8>)
+// CHECK-DAG: %[[TMP_c2:.*]] = arith.constant 2 : index
+// CHECK-DAG: %[[TMP_c6_i32:.*]] = arith.constant 6 : i32
+// CHECK-DAG: %[[TMP_c1_i32:.*]] = arith.constant 1 : i32
+// CHECK-DAG: %[[TMP_c0_i32:.*]] = arith.constant 0 : i32
+// CHECK-DAG: %[[TMP_c1_i8:.*]] = arith.constant 1 : i8
+// CHECK-DAG: %[[TMP_cst:.*]] = arith.constant 0.000000e+00 : f64
+// CHECK-DAG: %[[TMP_c0:.*]] = arith.constant 0 : index
+// CHECK-DAG: %[[TMP_c3:.*]] = arith.constant 3 : index
+// CHECK-DAG: %[[TMP_c1:.*]] = arith.constant 1 : index
+// CHECK: %[[TMP_0:.*]] = memref.alloc() : memref<3x5xf64>
+// CHECK: %[[TMP_1:.*]] = memref.cast %[[TMP_0]] : memref<3x5xf64> to memref<?x?xf64>
+// CHECK: linalg.fill ins(%[[TMP_cst]] : f64) outs(%[[TMP_0]] : memref<3x5xf64>)
+// CHECK: scf.for %[[TMP_arg2:.*]] = %[[TMP_c0]] to %[[TMP_c3]] step %[[TMP_c1]] {
+// CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_c0]] to %[[TMP_c2]] step %[[TMP_c1]] {
+// CHECK: %[[TMP_13:.*]] = tensor.extract %[[TMP_arg0]][%[[TMP_arg2]], %[[TMP_arg3]]] : tensor<3x2xf64>
+// CHECK: %[[TMP_14:.*]] = arith.cmpf une, %[[TMP_13]], %[[TMP_cst]] : f64
+// CHECK: scf.if %[[TMP_14]] {
+// CHECK: memref.store %[[TMP_13]], %[[TMP_0]][%[[TMP_arg2]], %[[TMP_arg3]]] : memref<3x5xf64>
+// CHECK: }
+// CHECK: }
+// CHECK: }
+// CHECK: %[[TMP_2:.*]] = memref.alloca() : memref<2xi8>
+// CHECK: %[[TMP_3:.*]] = memref.cast %[[TMP_2]] : memref<2xi8> to memref<?xi8>
+// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_2]][%[[TMP_c0]]] : memref<2xi8>
+// CHECK: memref.store %[[TMP_c1_i8]], %[[TMP_2]][%[[TMP_c1]]] : memref<2xi8>
+// CHECK: %[[TMP_4:.*]] = memref.alloca() : memref<2xindex>
+// CHECK: %[[TMP_5:.*]] = memref.cast %[[TMP_4]] : memref<2xindex> to memref<?xindex>
+// CHECK: memref.store %[[TMP_c3]], %[[TMP_4]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK: memref.store %[[TMP_c3]], %[[TMP_4]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK: %[[TMP_6:.*]] = memref.alloca() : memref<2xindex>
+// CHECK: %[[TMP_7:.*]] = memref.cast %[[TMP_6]] : memref<2xindex> to memref<?xindex>
+// CHECK: memref.store %[[TMP_c0]], %[[TMP_6]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK: memref.store %[[TMP_c1]], %[[TMP_6]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK: %[[TMP_8:.*]] = call @newSparseTensor(%[[TMP_3]], %[[TMP_5]], %[[TMP_7]], %[[TMP_c0_i32]], %[[TMP_c0_i32]], %[[TMP_c1_i32]], %[[TMP_c6_i32]], %[[TMP_arg1]]) : (memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+// CHECK: %[[TMP_9:.*]] = memref.alloca() : memref<2xindex>
+// CHECK: %[[TMP_10:.*]] = memref.cast %[[TMP_9]] : memref<2xindex> to memref<?xindex>
+// CHECK: %[[TMP_11:.*]] = memref.alloca() : memref<f64>
+// CHECK: scf.while : () -> () {
+// CHECK: %[[TMP_13:.*]] = func.call @getNextF64(%[[TMP_8]], %[[TMP_10]], %[[TMP_11]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<f64>) -> i1
+// CHECK: scf.condition(%[[TMP_13]])
+// CHECK: } do {
+// CHECK: %[[TMP_13:.*]] = memref.load %[[TMP_9]][%[[TMP_c0]]] : memref<2xindex>
+// CHECK: %[[TMP_14:.*]] = memref.load %[[TMP_9]][%[[TMP_c1]]] : memref<2xindex>
+// CHECK: %[[TMP_15:.*]] = arith.addi %[[TMP_14]], %[[TMP_c2]] : index
+// CHECK: %[[TMP_16:.*]] = memref.load %[[TMP_11]][] : memref<f64>
+// CHECK: memref.store %[[TMP_16]], %[[TMP_0]][%[[TMP_13]], %[[TMP_15]]] : memref<3x5xf64>
+// CHECK: scf.yield
+// CHECK: }
+// CHECK: call @delSparseTensorCOOF64(%[[TMP_8]]) : (!llvm.ptr<i8>) -> ()
+// CHECK: %[[TMP_12:.*]] = bufferization.to_tensor %[[TMP_1]] : memref<?x?xf64>
+// CHECK: return %[[TMP_12]] : tensor<?x?xf64>
+// CHECK: }
+// CHECK: }
+func.func @concat_mix_dense_perm_dim1_dyn(%arg0: tensor<3x2xf64>, %arg1: tensor<3x3xf64, #SparseMatrix>) -> tensor<?x?xf64> {
+ %0 = sparse_tensor.concatenate %arg0, %arg1 {dimension = 1 : index}
+ : tensor<3x2xf64>, tensor<3x3xf64, #SparseMatrix> to tensor<?x?xf64>
+ return %0 : tensor<?x?xf64>
+}
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/concatenate.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/concatenate.mlir
new file mode 100644
index 0000000000000..37f6f749d4dff
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/concatenate.mlir
@@ -0,0 +1,430 @@
+// RUN: mlir-opt %s --sparse-compiler | \
+// RUN: mlir-cpu-runner \
+// RUN: -e entry -entry-point-result=void \
+// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
+// RUN: FileCheck %s
+
+#MAT_C_C = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}>
+#MAT_D_C = #sparse_tensor.encoding<{dimLevelType = ["dense", "compressed"]}>
+#MAT_C_D = #sparse_tensor.encoding<{dimLevelType = ["compressed", "dense"]}>
+
+#MAT_C_C_P = #sparse_tensor.encoding<{
+ dimLevelType = [ "compressed", "compressed" ],
+ dimOrdering = affine_map<(i,j) -> (j,i)>
+}>
+
+#MAT_C_D_P = #sparse_tensor.encoding<{
+ dimLevelType = [ "compressed", "dense" ],
+ dimOrdering = affine_map<(i,j) -> (j,i)>
+}>
+
+#MAT_D_C_P = #sparse_tensor.encoding<{
+ dimLevelType = [ "dense", "compressed" ],
+ dimOrdering = affine_map<(i,j) -> (j,i)>
+}>
+
+module {
+ //
+ // Tests without permutation.
+ //
+
+ // Concats all sparse matrices (with
diff erent encodings) to a sparse matrix.
+ func.func @concat_sparse_sparse(%arg0: tensor<2x4xf64, #MAT_C_C>, %arg1: tensor<3x4xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64, #MAT_C_C> {
+ %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 0 : index}
+ : tensor<2x4xf64, #MAT_C_C>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C> to tensor<9x4xf64, #MAT_C_C>
+ return %0 : tensor<9x4xf64, #MAT_C_C>
+ }
+
+ // Concats all sparse matrices (with
diff erent encodings) to a dense matrix.
+ func.func @concat_sparse_dense(%arg0: tensor<2x4xf64, #MAT_C_C>, %arg1: tensor<3x4xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64> {
+ %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 0 : index}
+ : tensor<2x4xf64, #MAT_C_C>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C> to tensor<9x4xf64>
+ return %0 : tensor<9x4xf64>
+ }
+
+ // Concats mix sparse and dense matrices to a sparse matrix
+ func.func @concat_mix_sparse(%arg0: tensor<2x4xf64>, %arg1: tensor<3x4xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64, #MAT_C_C> {
+ %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 0 : index}
+ : tensor<2x4xf64>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C> to tensor<9x4xf64, #MAT_C_C>
+ return %0 : tensor<9x4xf64, #MAT_C_C>
+ }
+
+ // Concats mix sparse and dense matrices to a dense matrix
+ func.func @concat_mix_dense(%arg0: tensor<2x4xf64>, %arg1: tensor<3x4xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64> {
+ %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 0 : index}
+ : tensor<2x4xf64>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C> to tensor<9x4xf64>
+ return %0 : tensor<9x4xf64>
+ }
+
+ //
+ // Tests with permutation.
+ //
+
+ // Concats all sparse matrices (with
diff erent encodings) to a sparse matrix.
+ func.func @concat_sparse_sparse_perm(%arg0: tensor<2x4xf64, #MAT_C_C_P>, %arg1: tensor<3x4xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64, #MAT_C_C_P> {
+ %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 0 : index}
+ : tensor<2x4xf64, #MAT_C_C_P>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C> to tensor<9x4xf64, #MAT_C_C_P>
+ return %0 : tensor<9x4xf64, #MAT_C_C_P>
+ }
+
+ // Concats all sparse matrices (with
diff erent encodings) to a dense matrix.
+ func.func @concat_sparse_dense_perm(%arg0: tensor<2x4xf64, #MAT_C_C_P>, %arg1: tensor<3x4xf64, #MAT_C_D_P>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64> {
+ %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 0 : index}
+ : tensor<2x4xf64, #MAT_C_C_P>, tensor<3x4xf64, #MAT_C_D_P>, tensor<4x4xf64, #MAT_D_C> to tensor<9x4xf64>
+ return %0 : tensor<9x4xf64>
+ }
+
+ // Concats mix sparse and dense matrices to a sparse matrix
+ func.func @concat_mix_sparse_perm(%arg0: tensor<2x4xf64>, %arg1: tensor<3x4xf64, #MAT_C_D_P>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64, #MAT_C_C> {
+ %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 0 : index}
+ : tensor<2x4xf64>, tensor<3x4xf64, #MAT_C_D_P>, tensor<4x4xf64, #MAT_D_C> to tensor<9x4xf64, #MAT_C_C>
+ return %0 : tensor<9x4xf64, #MAT_C_C>
+ }
+
+ // Concats mix sparse and dense matrices to a dense matrix
+ func.func @concat_mix_dense_perm(%arg0: tensor<2x4xf64>, %arg1: tensor<3x4xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C_P>) -> tensor<9x4xf64> {
+ %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 0 : index}
+ : tensor<2x4xf64>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C_P> to tensor<9x4xf64>
+ return %0 : tensor<9x4xf64>
+ }
+
+ //
+ // Tests without perumutation (concatenate on dimension 1)
+ //
+
+ // Concats all sparse matrices (with
diff erent encodings) to a sparse matrix.
+ func.func @concat_sparse_sparse_dim1(%arg0: tensor<4x2xf64, #MAT_C_C>, %arg1: tensor<4x3xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64, #MAT_C_C> {
+ %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 1 : index}
+ : tensor<4x2xf64, #MAT_C_C>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C> to tensor<4x9xf64, #MAT_C_C>
+ return %0 : tensor<4x9xf64, #MAT_C_C>
+ }
+
+ // Concats all sparse matrices (with
diff erent encodings) to a dense matrix.
+ func.func @concat_sparse_dense_dim1(%arg0: tensor<4x2xf64, #MAT_C_C>, %arg1: tensor<4x3xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64> {
+ %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 1 : index}
+ : tensor<4x2xf64, #MAT_C_C>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C> to tensor<4x9xf64>
+ return %0 : tensor<4x9xf64>
+ }
+
+ // Concats mix sparse and dense matrices to a sparse matrix
+ func.func @concat_mix_sparse_dim1(%arg0: tensor<4x2xf64>, %arg1: tensor<4x3xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64, #MAT_C_C> {
+ %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 1 : index}
+ : tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C> to tensor<4x9xf64, #MAT_C_C>
+ return %0 : tensor<4x9xf64, #MAT_C_C>
+ }
+
+ // Concats mix sparse and dense matrices to a dense matrix
+ func.func @concat_mix_dense_dim1(%arg0: tensor<4x2xf64>, %arg1: tensor<4x3xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64> {
+ %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 1 : index}
+ : tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C> to tensor<4x9xf64>
+ return %0 : tensor<4x9xf64>
+ }
+
+ //
+ // Tests with perumutation (concatenate on dimension 1)
+ //
+
+ // Concats all sparse matrices (with
diff erent encodings) to a sparse matrix.
+ func.func @concat_sparse_sparse_perm_dim1(%arg0: tensor<4x2xf64, #MAT_C_C_P>, %arg1: tensor<4x3xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64, #MAT_C_C_P> {
+ %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 1 : index}
+ : tensor<4x2xf64, #MAT_C_C_P>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C> to tensor<4x9xf64, #MAT_C_C_P>
+ return %0 : tensor<4x9xf64, #MAT_C_C_P>
+ }
+
+ // Concats all sparse matrices (with
diff erent encodings) to a dense matrix.
+ func.func @concat_sparse_dense_perm_dim1(%arg0: tensor<4x2xf64, #MAT_C_C_P>, %arg1: tensor<4x3xf64, #MAT_C_D_P>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64> {
+ %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 1 : index}
+ : tensor<4x2xf64, #MAT_C_C_P>, tensor<4x3xf64, #MAT_C_D_P>, tensor<4x4xf64, #MAT_D_C> to tensor<4x9xf64>
+ return %0 : tensor<4x9xf64>
+ }
+
+ // Concats mix sparse and dense matrices to a sparse matrix
+ func.func @concat_mix_sparse_perm_dim1(%arg0: tensor<4x2xf64>, %arg1: tensor<4x3xf64, #MAT_C_D_P>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64, #MAT_C_C> {
+ %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 1 : index}
+ : tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D_P>, tensor<4x4xf64, #MAT_D_C> to tensor<4x9xf64, #MAT_C_C>
+ return %0 : tensor<4x9xf64, #MAT_C_C>
+ }
+
+ // Concats mix sparse and dense matrices to a dense matrix
+ func.func @concat_mix_dense_perm_dim1(%arg0: tensor<4x2xf64>, %arg1: tensor<4x3xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C_P>) -> tensor<4x9xf64> {
+ %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 1 : index}
+ : tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C_P> to tensor<4x9xf64>
+ return %0 : tensor<4x9xf64>
+ }
+
+ //
+ // Concats mix sparse and dense matrices to a sparse matrix (with dynamic sizes)
+ //
+ func.func @concat_mix_sparse_dyn(%arg0: tensor<4x2xf64>, %arg1: tensor<4x3xf64, #MAT_C_D>, %arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<?x?xf64, #MAT_C_C> {
+ %0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 1 : index}
+ : tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C> to tensor<?x?xf64, #MAT_C_C>
+ return %0 : tensor<?x?xf64, #MAT_C_C>
+ }
+
+ func.func @dump_mat_9x4(%A: tensor<9x4xf64, #MAT_C_C>) {
+ %c0 = arith.constant 0 : index
+ %du = arith.constant -1.0 : f64
+
+ %c = sparse_tensor.convert %A : tensor<9x4xf64, #MAT_C_C> to tensor<9x4xf64>
+ %m = bufferization.to_memref %c : memref<9x4xf64>
+ %v = vector.transfer_read %m[%c0, %c0], %du: memref<9x4xf64>, vector<9x4xf64>
+ vector.print %v : vector<9x4xf64>
+
+ %1 = sparse_tensor.values %A : tensor<9x4xf64, #MAT_C_C> to memref<?xf64>
+ %2 = vector.transfer_read %1[%c0], %du: memref<?xf64>, vector<36xf64>
+ vector.print %2 : vector<36xf64>
+
+ return
+ }
+
+ func.func @dump_mat_perm_9x4(%A: tensor<9x4xf64, #MAT_C_C_P>) {
+ %c0 = arith.constant 0 : index
+ %du = arith.constant -1.0 : f64
+
+ %c = sparse_tensor.convert %A : tensor<9x4xf64, #MAT_C_C_P> to tensor<9x4xf64>
+ %m = bufferization.to_memref %c : memref<9x4xf64>
+ %v = vector.transfer_read %m[%c0, %c0], %du: memref<9x4xf64>, vector<9x4xf64>
+ vector.print %v : vector<9x4xf64>
+
+ %1 = sparse_tensor.values %A : tensor<9x4xf64, #MAT_C_C_P> to memref<?xf64>
+ %2 = vector.transfer_read %1[%c0], %du: memref<?xf64>, vector<36xf64>
+ vector.print %2 : vector<36xf64>
+
+ return
+ }
+
+ func.func @dump_mat_dense_9x4(%A: tensor<9x4xf64>) {
+ %c0 = arith.constant 0 : index
+ %du = arith.constant -1.0 : f64
+
+ %m = bufferization.to_memref %A : memref<9x4xf64>
+ %v = vector.transfer_read %m[%c0, %c0], %du: memref<9x4xf64>, vector<9x4xf64>
+ vector.print %v : vector<9x4xf64>
+
+ return
+ }
+
+ func.func @dump_mat_4x9(%A: tensor<4x9xf64, #MAT_C_C>) {
+ %c0 = arith.constant 0 : index
+ %du = arith.constant -1.0 : f64
+
+ %c = sparse_tensor.convert %A : tensor<4x9xf64, #MAT_C_C> to tensor<4x9xf64>
+ %m = bufferization.to_memref %c : memref<4x9xf64>
+ %v = vector.transfer_read %m[%c0, %c0], %du: memref<4x9xf64>, vector<4x9xf64>
+ vector.print %v : vector<4x9xf64>
+
+ %1 = sparse_tensor.values %A : tensor<4x9xf64, #MAT_C_C> to memref<?xf64>
+ %2 = vector.transfer_read %1[%c0], %du: memref<?xf64>, vector<36xf64>
+ vector.print %2 : vector<36xf64>
+
+ return
+ }
+
+ func.func @dump_mat_dyn(%A: tensor<?x?xf64, #MAT_C_C>) {
+ %c0 = arith.constant 0 : index
+ %du = arith.constant -1.0 : f64
+
+ %c = sparse_tensor.convert %A : tensor<?x?xf64, #MAT_C_C> to tensor<?x?xf64>
+ %m = bufferization.to_memref %c : memref<?x?xf64>
+ %v = vector.transfer_read %m[%c0, %c0], %du: memref<?x?xf64>, vector<4x9xf64>
+ vector.print %v : vector<4x9xf64>
+
+ %1 = sparse_tensor.values %A : tensor<?x?xf64, #MAT_C_C> to memref<?xf64>
+ %2 = vector.transfer_read %1[%c0], %du: memref<?xf64>, vector<36xf64>
+ vector.print %2 : vector<36xf64>
+
+ return
+ }
+
+ func.func @dump_mat_perm_4x9(%A: tensor<4x9xf64, #MAT_C_C_P>) {
+ %c0 = arith.constant 0 : index
+ %du = arith.constant -1.0 : f64
+
+ %c = sparse_tensor.convert %A : tensor<4x9xf64, #MAT_C_C_P> to tensor<4x9xf64>
+ %m = bufferization.to_memref %c : memref<4x9xf64>
+ %v = vector.transfer_read %m[%c0, %c0], %du: memref<4x9xf64>, vector<4x9xf64>
+ vector.print %v : vector<4x9xf64>
+
+ %1 = sparse_tensor.values %A : tensor<4x9xf64, #MAT_C_C_P> to memref<?xf64>
+ %2 = vector.transfer_read %1[%c0], %du: memref<?xf64>, vector<36xf64>
+ vector.print %2 : vector<36xf64>
+
+ return
+ }
+
+ func.func @dump_mat_dense_4x9(%A: tensor<4x9xf64>) {
+ %c0 = arith.constant 0 : index
+ %du = arith.constant -1.0 : f64
+
+ %m = bufferization.to_memref %A : memref<4x9xf64>
+ %v = vector.transfer_read %m[%c0, %c0], %du: memref<4x9xf64>, vector<4x9xf64>
+ vector.print %v : vector<4x9xf64>
+
+ return
+ }
+
+ // Driver method to call and verify kernels.
+ func.func @entry() {
+ %m42 = arith.constant dense<
+ [ [ 1.0, 0.0 ],
+ [ 3.1, 0.0 ],
+ [ 0.0, 2.0 ],
+ [ 0.0, 0.0 ] ]> : tensor<4x2xf64>
+ %m43 = arith.constant dense<
+ [ [ 1.0, 0.0, 1.0 ],
+ [ 1.0, 0.0, 0.5 ],
+ [ 0.0, 0.0, 1.0 ],
+ [ 5.0, 2.0, 0.0 ] ]> : tensor<4x3xf64>
+ %m24 = arith.constant dense<
+ [ [ 1.0, 0.0, 3.0, 0.0],
+ [ 0.0, 2.0, 0.0, 0.0] ]> : tensor<2x4xf64>
+ %m34 = arith.constant dense<
+ [ [ 1.0, 0.0, 1.0, 1.0],
+ [ 0.0, 0.5, 0.0, 0.0],
+ [ 1.0, 5.0, 2.0, 0.0] ]> : tensor<3x4xf64>
+ %m44 = arith.constant dense<
+ [ [ 0.0, 0.0, 1.5, 1.0],
+ [ 0.0, 3.5, 0.0, 0.0],
+ [ 1.0, 5.0, 2.0, 0.0],
+ [ 1.0, 0.5, 0.0, 0.0] ]> : tensor<4x4xf64>
+
+ %sm24cc = sparse_tensor.convert %m24 : tensor<2x4xf64> to tensor<2x4xf64, #MAT_C_C>
+ %sm34cd = sparse_tensor.convert %m34 : tensor<3x4xf64> to tensor<3x4xf64, #MAT_C_D>
+ %sm42cc = sparse_tensor.convert %m42 : tensor<4x2xf64> to tensor<4x2xf64, #MAT_C_C>
+ %sm43cd = sparse_tensor.convert %m43 : tensor<4x3xf64> to tensor<4x3xf64, #MAT_C_D>
+ %sm44dc = sparse_tensor.convert %m44 : tensor<4x4xf64> to tensor<4x4xf64, #MAT_D_C>
+
+ %sm24ccp = sparse_tensor.convert %m24 : tensor<2x4xf64> to tensor<2x4xf64, #MAT_C_C_P>
+ %sm34cdp = sparse_tensor.convert %m34 : tensor<3x4xf64> to tensor<3x4xf64, #MAT_C_D_P>
+ %sm42ccp = sparse_tensor.convert %m42 : tensor<4x2xf64> to tensor<4x2xf64, #MAT_C_C_P>
+ %sm43cdp = sparse_tensor.convert %m43 : tensor<4x3xf64> to tensor<4x3xf64, #MAT_C_D_P>
+ %sm44dcp = sparse_tensor.convert %m44 : tensor<4x4xf64> to tensor<4x4xf64, #MAT_D_C_P>
+
+ %sm43cd_dyn = sparse_tensor.convert %m43 : tensor<4x3xf64> to tensor<?x?xf64, #MAT_C_D>
+ %sm44dc_dyn = sparse_tensor.convert %m44 : tensor<4x4xf64> to tensor<?x?xf64, #MAT_D_C>
+
+ // CHECK: ( ( 1, 0, 3, 0 ), ( 0, 2, 0, 0 ), ( 1, 0, 1, 1 ), ( 0, 0.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 0, 0, 1.5, 1 ), ( 0, 3.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 1, 0.5, 0, 0 ) )
+ // CHECK-NEXT: ( 1, 3, 2, 1, 0, 1, 1, 0, 0.5, 0, 0, 1, 5, 2, 0, 1.5, 1, 3.5, 1, 5, 2, 1, 0.5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 )
+ %0 = call @concat_sparse_sparse(%sm24cc, %sm34cd, %sm44dc)
+ : (tensor<2x4xf64, #MAT_C_C>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64, #MAT_C_C>
+ call @dump_mat_9x4(%0) : (tensor<9x4xf64, #MAT_C_C>) -> ()
+
+ // CHECK-NEXT: ( ( 1, 0, 3, 0 ), ( 0, 2, 0, 0 ), ( 1, 0, 1, 1 ), ( 0, 0.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 0, 0, 1.5, 1 ), ( 0, 3.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 1, 0.5, 0, 0 ) )
+ %1 = call @concat_sparse_dense(%sm24cc, %sm34cd, %sm44dc)
+ : (tensor<2x4xf64, #MAT_C_C>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64>
+ call @dump_mat_dense_9x4(%1) : (tensor<9x4xf64>) -> ()
+
+ // CHECK-NEXT: ( ( 1, 0, 3, 0 ), ( 0, 2, 0, 0 ), ( 1, 0, 1, 1 ), ( 0, 0.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 0, 0, 1.5, 1 ), ( 0, 3.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 1, 0.5, 0, 0 ) )
+ // CHECK-NEXT: ( 1, 3, 2, 1, 0, 1, 1, 0, 0.5, 0, 0, 1, 5, 2, 0, 1.5, 1, 3.5, 1, 5, 2, 1, 0.5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 )
+ %2 = call @concat_mix_sparse(%m24, %sm34cd, %sm44dc)
+ : (tensor<2x4xf64>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64, #MAT_C_C>
+ call @dump_mat_9x4(%2) : (tensor<9x4xf64, #MAT_C_C>) -> ()
+
+ // CHECK-NEXT: ( ( 1, 0, 3, 0 ), ( 0, 2, 0, 0 ), ( 1, 0, 1, 1 ), ( 0, 0.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 0, 0, 1.5, 1 ), ( 0, 3.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 1, 0.5, 0, 0 ) )
+ %3 = call @concat_mix_dense(%m24, %sm34cd, %sm44dc)
+ : (tensor<2x4xf64>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64>
+ call @dump_mat_dense_9x4(%3) : (tensor<9x4xf64>) -> ()
+
+ // CHECK-NEXT: ( ( 1, 0, 3, 0 ), ( 0, 2, 0, 0 ), ( 1, 0, 1, 1 ), ( 0, 0.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 0, 0, 1.5, 1 ), ( 0, 3.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 1, 0.5, 0, 0 ) )
+ // CHECK-NEXT: ( 1, 1, 0, 1, 1, 1, 2, 0, 0.5, 5, 3.5, 5, 0.5, 3, 1, 0, 2, 1.5, 2, 1, 0, 0, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 )
+ %4 = call @concat_sparse_sparse_perm(%sm24ccp, %sm34cd, %sm44dc)
+ : (tensor<2x4xf64, #MAT_C_C_P>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64, #MAT_C_C_P>
+ call @dump_mat_perm_9x4(%4) : (tensor<9x4xf64, #MAT_C_C_P>) -> ()
+
+ // CHECK-NEXT: ( ( 1, 0, 3, 0 ), ( 0, 2, 0, 0 ), ( 1, 0, 1, 1 ), ( 0, 0.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 0, 0, 1.5, 1 ), ( 0, 3.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 1, 0.5, 0, 0 ) )
+ %5 = call @concat_sparse_dense_perm(%sm24ccp, %sm34cdp, %sm44dc)
+ : (tensor<2x4xf64, #MAT_C_C_P>, tensor<3x4xf64, #MAT_C_D_P>, tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64>
+ call @dump_mat_dense_9x4(%5) : (tensor<9x4xf64>) -> ()
+
+ // CHECK-NEXT: ( ( 1, 0, 3, 0 ), ( 0, 2, 0, 0 ), ( 1, 0, 1, 1 ), ( 0, 0.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 0, 0, 1.5, 1 ), ( 0, 3.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 1, 0.5, 0, 0 ) )
+ // CHECK-NEXT: ( 1, 3, 2, 1, 0, 1, 1, 0, 0.5, 0, 0, 1, 5, 2, 0, 1.5, 1, 3.5, 1, 5, 2, 1, 0.5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 )
+ %6 = call @concat_mix_sparse_perm(%m24, %sm34cdp, %sm44dc)
+ : (tensor<2x4xf64>, tensor<3x4xf64, #MAT_C_D_P>, tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64, #MAT_C_C>
+ call @dump_mat_9x4(%6) : (tensor<9x4xf64, #MAT_C_C>) -> ()
+
+ // CHECK-NEXT: ( ( 1, 0, 3, 0 ), ( 0, 2, 0, 0 ), ( 1, 0, 1, 1 ), ( 0, 0.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 0, 0, 1.5, 1 ), ( 0, 3.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 1, 0.5, 0, 0 ) )
+ %7 = call @concat_mix_dense_perm(%m24, %sm34cd, %sm44dcp)
+ : (tensor<2x4xf64>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C_P>) -> tensor<9x4xf64>
+ call @dump_mat_dense_9x4(%7) : (tensor<9x4xf64>) -> ()
+
+ // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) )
+ // CHECK-NEXT: ( 1, 1, 0, 1, 1.5, 1, 3.1, 1, 0, 0.5, 3.5, 2, 0, 0, 1, 1, 5, 2, 5, 2, 0, 1, 0.5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 )
+ %8 = call @concat_sparse_sparse_dim1(%sm42cc, %sm43cd, %sm44dc)
+ : (tensor<4x2xf64, #MAT_C_C>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64, #MAT_C_C>
+ call @dump_mat_4x9(%8) : (tensor<4x9xf64, #MAT_C_C>) -> ()
+
+ // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) )
+ %9 = call @concat_sparse_dense_dim1(%sm42cc, %sm43cd, %sm44dc)
+ : (tensor<4x2xf64, #MAT_C_C>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64>
+ call @dump_mat_dense_4x9(%9) : (tensor<4x9xf64>) -> ()
+
+ // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) )
+ // CHECK-NEXT: ( 1, 1, 0, 1, 1.5, 1, 3.1, 1, 0, 0.5, 3.5, 2, 0, 0, 1, 1, 5, 2, 5, 2, 0, 1, 0.5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 )
+ %10 = call @concat_mix_sparse_dim1(%m42, %sm43cd, %sm44dc)
+ : (tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64, #MAT_C_C>
+ call @dump_mat_4x9(%10) : (tensor<4x9xf64, #MAT_C_C>) -> ()
+
+ // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) )
+ %11 = call @concat_mix_dense_dim1(%m42, %sm43cd, %sm44dc)
+ : (tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64>
+ call @dump_mat_dense_4x9(%11) : (tensor<4x9xf64>) -> ()
+
+ // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) )
+ // CHECK-NEXT: ( 1, 3.1, 2, 1, 1, 0, 5, 0, 0, 0, 2, 1, 0.5, 1, 0, 1, 1, 3.5, 5, 0.5, 1.5, 2, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 )
+ %12 = call @concat_sparse_sparse_perm_dim1(%sm42ccp, %sm43cd, %sm44dc)
+ : (tensor<4x2xf64, #MAT_C_C_P>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64, #MAT_C_C_P>
+ call @dump_mat_perm_4x9(%12) : (tensor<4x9xf64, #MAT_C_C_P>) -> ()
+
+ // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) )
+ %13 = call @concat_sparse_dense_perm_dim1(%sm42ccp, %sm43cdp, %sm44dc)
+ : (tensor<4x2xf64, #MAT_C_C_P>, tensor<4x3xf64, #MAT_C_D_P>, tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64>
+ call @dump_mat_dense_4x9(%13) : (tensor<4x9xf64>) -> ()
+
+ // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) )
+ // CHECK-NEXT: ( 1, 1, 0, 1, 1.5, 1, 3.1, 1, 0, 0.5, 3.5, 2, 0, 0, 1, 1, 5, 2, 5, 2, 0, 1, 0.5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 )
+ %14 = call @concat_mix_sparse_perm_dim1(%m42, %sm43cdp, %sm44dc)
+ : (tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D_P>, tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64, #MAT_C_C>
+ call @dump_mat_4x9(%14) : (tensor<4x9xf64, #MAT_C_C>) -> ()
+
+ // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) )
+ %15 = call @concat_mix_dense_perm_dim1(%m42, %sm43cd, %sm44dcp)
+ : (tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C_P>) -> tensor<4x9xf64>
+ call @dump_mat_dense_4x9(%15) : (tensor<4x9xf64>) -> ()
+
+ // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) )
+ // CHECK-NEXT: ( 1, 1, 0, 1, 1.5, 1, 3.1, 1, 0, 0.5, 3.5, 2, 0, 0, 1, 1, 5, 2, 5, 2, 0, 1, 0.5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 )
+ %16 = call @concat_mix_sparse_dyn(%m42, %sm43cd, %sm44dc)
+ : (tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<?x?xf64, #MAT_C_C>
+ call @dump_mat_dyn(%16) : (tensor<?x?xf64, #MAT_C_C>) -> ()
+
+ // Release resources.
+ bufferization.dealloc_tensor %sm24cc : tensor<2x4xf64, #MAT_C_C>
+ bufferization.dealloc_tensor %sm34cd : tensor<3x4xf64, #MAT_C_D>
+ bufferization.dealloc_tensor %sm42cc : tensor<4x2xf64, #MAT_C_C>
+ bufferization.dealloc_tensor %sm43cd : tensor<4x3xf64, #MAT_C_D>
+ bufferization.dealloc_tensor %sm44dc : tensor<4x4xf64, #MAT_D_C>
+ bufferization.dealloc_tensor %sm24ccp : tensor<2x4xf64, #MAT_C_C_P>
+ bufferization.dealloc_tensor %sm34cdp : tensor<3x4xf64, #MAT_C_D_P>
+ bufferization.dealloc_tensor %sm42ccp : tensor<4x2xf64, #MAT_C_C_P>
+ bufferization.dealloc_tensor %sm43cdp : tensor<4x3xf64, #MAT_C_D_P>
+ bufferization.dealloc_tensor %sm44dcp : tensor<4x4xf64, #MAT_D_C_P>
+ bufferization.dealloc_tensor %0 : tensor<9x4xf64, #MAT_C_C>
+ bufferization.dealloc_tensor %1 : tensor<9x4xf64>
+ bufferization.dealloc_tensor %2 : tensor<9x4xf64, #MAT_C_C>
+ bufferization.dealloc_tensor %3 : tensor<9x4xf64>
+ bufferization.dealloc_tensor %4 : tensor<9x4xf64, #MAT_C_C_P>
+ bufferization.dealloc_tensor %5 : tensor<9x4xf64>
+ bufferization.dealloc_tensor %6 : tensor<9x4xf64, #MAT_C_C>
+ bufferization.dealloc_tensor %7 : tensor<9x4xf64>
+ bufferization.dealloc_tensor %8 : tensor<4x9xf64, #MAT_C_C>
+ bufferization.dealloc_tensor %9 : tensor<4x9xf64>
+ bufferization.dealloc_tensor %10 : tensor<4x9xf64, #MAT_C_C>
+ bufferization.dealloc_tensor %11 : tensor<4x9xf64>
+ bufferization.dealloc_tensor %12 : tensor<4x9xf64, #MAT_C_C_P>
+ bufferization.dealloc_tensor %13 : tensor<4x9xf64>
+ bufferization.dealloc_tensor %14 : tensor<4x9xf64, #MAT_C_C>
+ bufferization.dealloc_tensor %15 : tensor<4x9xf64>
+ bufferization.dealloc_tensor %16 : tensor<?x?xf64, #MAT_C_C>
+ return
+ }
+}
More information about the Mlir-commits
mailing list