[Mlir-commits] [mlir] e5e4dec - [mlir][sparse] support affine expression on sparse dimensions (codegen implementation)
Peiming Liu
llvmlistbot at llvm.org
Tue Nov 22 16:05:01 PST 2022
Author: Peiming Liu
Date: 2022-11-23T00:04:55Z
New Revision: e5e4deca5ee8f0dcaeee58803ac397ae9d5a0a97
URL: https://github.com/llvm/llvm-project/commit/e5e4deca5ee8f0dcaeee58803ac397ae9d5a0a97
DIFF: https://github.com/llvm/llvm-project/commit/e5e4deca5ee8f0dcaeee58803ac397ae9d5a0a97.diff
LOG: [mlir][sparse] support affine expression on sparse dimensions (codegen implementation)
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D138172
Added:
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_1d_nwc_wcf.mlir
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d.mlir
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_nhwc_hwcf.mlir
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d.mlir
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d_ndhwc_dhwcf.mlir
Modified:
mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
mlir/test/Dialect/SparseTensor/sparse_affine.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
index a591bc3bd9b68..2ac3f3bb07298 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
@@ -332,7 +332,68 @@ Operation *SparseTensorLoopEmitter::enterLoopOverTensorAtDim(
Operation *SparseTensorLoopEmitter::enterFilterLoopOverTensorAtDim(
OpBuilder &builder, Location loc, size_t tid, size_t dim, AffineExpr affine,
MutableArrayRef<Value> reduc) {
- llvm_unreachable("need to be implemented");
+ assert(!affine.isa<AffineDimExpr>() && !isDenseDLT(dimTypes[tid][dim]));
+ assert(dimTypes[tid].size() > dim);
+ // We can not re-enter the same level.
+ assert(!coord[tid][dim]);
+
+ Value step = constantIndex(builder, loc, 1);
+
+ Value lo = pidxs[tid][dim];
+ Value hi = highs[tid][dim];
+
+ // TODO: We should instead use a whileOp for filter loop to allow early
+ // break when exceeding (for ordered dimensions).
+ // TODO: There are many other potiential opportunities that we might apply in
+ // the future. E.g., we could use binary search to located the pointer index.
+ scf::ForOp forOp = builder.create<scf::ForOp>(loc, lo, hi, step, reduc);
+
+ // In-place update on the reduction variable vector.
+ assert(forOp.getNumRegionIterArgs() == reduc.size());
+ for (int i = 0, e = reduc.size(); i < e; i++)
+ reduc[i] = forOp.getRegionIterArg(i);
+
+ builder.setInsertionPointToStart(forOp.getBody());
+ Value iv = forOp.getInductionVar();
+
+ pidxs[tid][dim] = iv;
+ // Generating a load on the indices array yields the coordinate.
+ Value ptr = idxBuffer[tid][dim];
+ coord[tid][dim] = genIndexLoad(builder, loc, ptr, iv);
+
+ // Generate an if condition to filter out indices that is not equal to the
+ // result of the affine expression.
+ Value expected = genAffine(builder, affine, loc);
+ auto pred = builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::eq,
+ coord[tid][dim], expected);
+ SmallVector<Type> types;
+ for (Value red : reduc) {
+ types.push_back(red.getType());
+ }
+
+ bool hasReduc = !types.empty();
+ scf::IfOp ifOp =
+ builder.create<scf::IfOp>(loc, types, pred, /*else*/ hasReduc);
+ if (hasReduc) {
+ // scf.for (a) -> v
+ // %s = scf.if (a) -> v
+ // user-generated code.
+ // else
+ // yield a
+ // yield %s
+ builder.create<scf::YieldOp>(loc, ifOp.getResults());
+ builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
+ // On mismatch.
+ builder.create<scf::YieldOp>(loc, reduc);
+ }
+ // Set the insert point to matched branch.
+ builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+
+ // NOTE: we can also prepares for next dim here in advance
+ // Push the loop into stack
+ loopStack.emplace_back(ArrayRef<size_t>(tid), ArrayRef<size_t>(dim), forOp,
+ coord[tid][dim], nullptr);
+ return forOp;
}
void SparseTensorLoopEmitter::genDenseAffineAddressAtCurLevel(
@@ -520,7 +581,6 @@ void SparseTensorLoopEmitter::exitForLoop(RewriterBase &rewriter, Location loc,
if (forOp) {
if (!reduc.empty()) {
assert(reduc.size() == forOp.getNumResults());
- rewriter.setInsertionPointToEnd(forOp.getBody());
rewriter.create<scf::YieldOp>(loc, reduc);
}
// Exit the loop.
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
index b822a71e228ef..46cc2182c5c10 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
@@ -1116,7 +1116,9 @@ static Operation *genFor(Merger &merger, CodeGen &codegen, OpBuilder &builder,
Operation *loop =
genLoopBoundary(codegen, merger, [&](MutableArrayRef<Value> reduc) {
if (merger.isFilterLoop(idx)) {
- assert(isSparse);
+ // extraTids/extraDims must be empty because filter loops only
+ // corresponding to the one and only sparse tensor level.
+ assert(isSparse && extraTids.empty() && extraDims.empty());
OpOperand *t = &op->getOpOperand(tid);
auto enc = getSparseTensorEncoding(t->get().getType());
// Retrieves the affine expression for the filter loop.
@@ -1410,7 +1412,8 @@ static void translateBitsToTidDimPairs(
// expression. This is also the best place we can do it to avoid
// putting it inside inner loops.
// NOTE: It assumes that the levels of the input tensor are
- // initialized in order, another more admissible approach might be
+ // initialized in order (and it is also currently guaranteed by
+ // computeIterationGraph), another more admissible approach might be
// accepting out-of-order access between consecutive dense levels.
affineTids.push_back(tid);
affineDims.push_back(i);
diff --git a/mlir/test/Dialect/SparseTensor/sparse_affine.mlir b/mlir/test/Dialect/SparseTensor/sparse_affine.mlir
index 1fa2df56e175b..d1be0664e1e47 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_affine.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_affine.mlir
@@ -4,6 +4,7 @@
#SpVec = #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>
#CSR = #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>
#Row = #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense" ] }>
+#EncDenseVec = #sparse_tensor.encoding<{ dimLevelType = [ "dense" ] }>
#trait1 = {
indexing_maps = [
@@ -55,6 +56,94 @@ func.func @mul_inv_dense1d(%arga: tensor<32xf32, #SpVec>,
return %0 : tensor<32xf32>
}
+// CHECK-LABEL: func.func @mul_inv_sparse1d(
+// CHECK-SAME: %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME: %[[VAL_1:.*]]: tensor<4xf32, #sparse_tensor.encoding<{{{.*}}}>>)
+// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index
+// CHECK: %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_4:.*]] = arith.constant 3 : index
+// CHECK: %[[VAL_5:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK: %[[VAL_6:.*]] = bufferization.alloc_tensor() : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK: %[[VAL_7:.*]] = sparse_tensor.pointers %[[VAL_0]] {dimension = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
+// CHECK: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK: %[[VAL_9:.*]] = sparse_tensor.pointers %[[VAL_1]] {dimension = 0 : index} : tensor<4xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
+// CHECK: %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 0 : index} : tensor<4xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
+// CHECK: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<4xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_2]]] : memref<?xindex>
+// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK: %[[VAL_14:.*]] = scf.for %[[VAL_15:.*]] = %[[VAL_12]] to %[[VAL_13]] step %[[VAL_3]] iter_args(%[[VAL_16:.*]] = %[[VAL_6]]) -> (tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_15]]] : memref<?xindex>
+// CHECK: %[[VAL_18:.*]] = arith.cmpi eq, %[[VAL_17]], %[[VAL_4]] : index
+// CHECK: %[[VAL_19:.*]] = scf.if %[[VAL_18]] -> (tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK: %[[VAL_20:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_15]]] : memref<?xf32>
+// CHECK: %[[VAL_21:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_2]]] : memref<?xindex>
+// CHECK: %[[VAL_22:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK: %[[VAL_23:.*]] = scf.for %[[VAL_24:.*]] = %[[VAL_21]] to %[[VAL_22]] step %[[VAL_3]] iter_args(%[[VAL_25:.*]] = %[[VAL_16]]) -> (tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK: %[[VAL_26:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_24]]] : memref<?xf32>
+// CHECK: %[[VAL_27:.*]] = arith.mulf %[[VAL_26]], %[[VAL_20]] : f32
+// CHECK: %[[VAL_28:.*]] = arith.addf %[[VAL_27]], %[[VAL_5]] : f32
+// CHECK: %[[VAL_29:.*]] = sparse_tensor.insert %[[VAL_28]] into %[[VAL_25]]{{\[}}%[[VAL_17]]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK: scf.yield %[[VAL_29]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK: }
+// CHECK: scf.yield %[[VAL_30:.*]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK: } else {
+// CHECK: scf.yield %[[VAL_16]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK: }
+// CHECK: scf.yield %[[VAL_31:.*]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK: }
+// CHECK: %[[VAL_32:.*]] = sparse_tensor.load %[[VAL_33:.*]] hasInserts : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK: return %[[VAL_32]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>
+func.func @mul_inv_sparse1d(%arga: tensor<32xf32, #SpVec>,
+ %argb: tensor<4xf32, #SpVec>) -> tensor<32xf32, #SpVec> {
+ %argx = bufferization.alloc_tensor() : tensor<32xf32, #SpVec>
+ %0 = linalg.generic #trait1
+ ins(%arga, %argb: tensor<32xf32, #SpVec>, tensor<4xf32, #SpVec>)
+ outs(%argx: tensor<32xf32, #SpVec>) {
+ ^bb(%a: f32, %b: f32, %x: f32):
+ %0 = arith.mulf %a, %b : f32
+ %1 = arith.addf %x, %0 : f32
+ linalg.yield %1 : f32
+ } -> tensor<32xf32, #SpVec>
+ return %0 : tensor<32xf32, #SpVec>
+}
+
+
+// CHECK-LABEL: func.func @mul_inv_enc_dense1d(
+// CHECK-SAME: %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME: %[[VAL_1:.*]]: tensor<4xf32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK: %[[VAL_2:.*]] = arith.constant 32 : index
+// CHECK: %[[VAL_3:.*]] = arith.constant 3 : index
+// CHECK: %[[VAL_4:.*]] = arith.constant 0 : index
+// CHECK: %[[VAL_5:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_6:.*]] = bufferization.alloc_tensor() : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<4xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK: %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_6]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref<?xf32>
+// CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_4]] to %[[VAL_2]] step %[[VAL_5]] {
+// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_11]]] : memref<?xf32>
+// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_11]]] : memref<?xf32>
+// CHECK: %[[VAL_14:.*]] = arith.mulf %[[VAL_13]], %[[VAL_10]] : f32
+// CHECK: %[[VAL_15:.*]] = arith.addf %[[VAL_12]], %[[VAL_14]] : f32
+// CHECK: memref.store %[[VAL_15]], %[[VAL_9]]{{\[}}%[[VAL_11]]] : memref<?xf32>
+// CHECK: }
+// CHECK: %[[VAL_16:.*]] = sparse_tensor.load %[[VAL_6]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK: return %[[VAL_16]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK: }
+func.func @mul_inv_enc_dense1d(%arga: tensor<32xf32, #EncDenseVec>,
+ %argb: tensor<4xf32, #EncDenseVec>) -> tensor<32xf32, #EncDenseVec> {
+ %argx = bufferization.alloc_tensor() : tensor<32xf32, #EncDenseVec>
+ %0 = linalg.generic #trait1
+ ins(%arga, %argb: tensor<32xf32, #EncDenseVec>, tensor<4xf32, #EncDenseVec>)
+ outs(%argx: tensor<32xf32, #EncDenseVec>) {
+ ^bb(%a: f32, %b: f32, %x: f32):
+ %0 = arith.mulf %a, %b : f32
+ %1 = arith.addf %x, %0 : f32
+ linalg.yield %1 : f32
+ } -> tensor<32xf32, #EncDenseVec>
+ return %0 : tensor<32xf32, #EncDenseVec>
+}
+
#trait2 = {
indexing_maps = [
affine_map<(i) -> (i)>, // a
@@ -105,6 +194,57 @@ func.func @and_affine_dense1d(%arga: tensor<32xi32, #SpVec>,
return %0 : tensor<32xi32>
}
+// CHECK-LABEL: func.func @and_affine_sparse1d(
+// CHECK-SAME: %[[VAL_0:.*]]: tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME: %[[VAL_1:.*]]: tensor<34xi32, #sparse_tensor.encoding<{{{.*}}}>>)
+// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index
+// CHECK: %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_4:.*]] = arith.constant 2 : index
+// CHECK: %[[VAL_5:.*]] = bufferization.alloc_tensor() : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK: %[[VAL_6:.*]] = sparse_tensor.pointers %[[VAL_0]] {dimension = 0 : index} : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
+// CHECK: %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 0 : index} : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
+// CHECK: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi32>
+// CHECK: %[[VAL_9:.*]] = sparse_tensor.pointers %[[VAL_1]] {dimension = 0 : index} : tensor<34xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
+// CHECK: %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 0 : index} : tensor<34xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
+// CHECK: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<34xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi32>
+// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_2]]] : memref<?xindex>
+// CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK: %[[VAL_14:.*]] = scf.for %[[VAL_15:.*]] = %[[VAL_12]] to %[[VAL_13]] step %[[VAL_3]] iter_args(%[[VAL_16:.*]] = %[[VAL_5]]) -> (tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_15]]] : memref<?xindex>
+// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_15]]] : memref<?xi32>
+// CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_2]]] : memref<?xindex>
+// CHECK: %[[VAL_20:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK: %[[VAL_21:.*]] = scf.for %[[VAL_22:.*]] = %[[VAL_19]] to %[[VAL_20]] step %[[VAL_3]] iter_args(%[[VAL_23:.*]] = %[[VAL_16]]) -> (tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK: %[[VAL_24:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_22]]] : memref<?xindex>
+// CHECK: %[[VAL_25:.*]] = arith.addi %[[VAL_17]], %[[VAL_4]] : index
+// CHECK: %[[VAL_26:.*]] = arith.cmpi eq, %[[VAL_24]], %[[VAL_25]] : index
+// CHECK: %[[VAL_27:.*]] = scf.if %[[VAL_26]] -> (tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK: %[[VAL_28:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_22]]] : memref<?xi32>
+// CHECK: %[[VAL_29:.*]] = arith.andi %[[VAL_18]], %[[VAL_28]] : i32
+// CHECK: %[[VAL_30:.*]] = sparse_tensor.insert %[[VAL_29]] into %[[VAL_23]]{{\[}}%[[VAL_17]]] : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK: scf.yield %[[VAL_30]] : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK: } else {
+// CHECK: scf.yield %[[VAL_23]] : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK: }
+// CHECK: scf.yield %[[VAL_31:.*]] : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK: }
+// CHECK: scf.yield %[[VAL_32:.*]] : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK: }
+// CHECK: %[[VAL_33:.*]] = sparse_tensor.load %[[VAL_34:.*]] hasInserts : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK: return %[[VAL_33]] : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>>
+func.func @and_affine_sparse1d(%arga: tensor<32xi32, #SpVec>,
+ %argb: tensor<34xi32, #SpVec>) -> tensor<32xi32, #SpVec> {
+ %argx = bufferization.alloc_tensor() : tensor<32xi32, #SpVec>
+ %0 = linalg.generic #trait2
+ ins(%arga, %argb: tensor<32xi32, #SpVec>, tensor<34xi32, #SpVec>)
+ outs(%argx: tensor<32xi32, #SpVec>) {
+ ^bb(%a: i32, %b: i32, %x: i32):
+ %0 = arith.andi %a, %b : i32
+ linalg.yield %0 : i32
+ } -> tensor<32xi32, #SpVec>
+ return %0 : tensor<32xi32, #SpVec>
+}
+
#trait3 = {
indexing_maps = [
affine_map<(i,j) -> (i,j)>, // a
@@ -162,6 +302,69 @@ func.func @mul_affine_dense2d(%arga: tensor<32x16xf64, #CSR>,
return %0 : tensor<32x16xf64>
}
+
+// CHECK-LABEL: func.func @mul_affine_sparse2d(
+// CHECK-SAME: %[[VAL_0:.*]]: tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME: %[[VAL_1:.*]]: tensor<34x19xf64, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK: %[[VAL_2:.*]] = arith.constant 32 : index
+// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index
+// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index
+// CHECK: %[[VAL_5:.*]] = arith.constant 2 : index
+// CHECK: %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f64
+// CHECK: %[[VAL_7:.*]] = arith.constant 3 : index
+// CHECK: %[[VAL_8:.*]] = bufferization.alloc_tensor() : tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK: %[[VAL_9:.*]] = sparse_tensor.pointers %[[VAL_0]] {dimension = 1 : index} : tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
+// CHECK: %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 1 : index} : tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
+// CHECK: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK: %[[VAL_12:.*]] = sparse_tensor.pointers %[[VAL_1]] {dimension = 1 : index} : tensor<34x19xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
+// CHECK: %[[VAL_13:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 1 : index} : tensor<34x19xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
+// CHECK: %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<34x19xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK: %[[VAL_15:.*]] = scf.for %[[VAL_16:.*]] = %[[VAL_3]] to %[[VAL_2]] step %[[VAL_4]] iter_args(%[[VAL_17:.*]] = %[[VAL_8]]) -> (tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK: %[[VAL_18:.*]] = arith.addi %[[VAL_16]], %[[VAL_5]] : index
+// CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_16]]] : memref<?xindex>
+// CHECK: %[[VAL_20:.*]] = arith.addi %[[VAL_16]], %[[VAL_4]] : index
+// CHECK: %[[VAL_21:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_20]]] : memref<?xindex>
+// CHECK: %[[VAL_22:.*]] = scf.for %[[VAL_23:.*]] = %[[VAL_19]] to %[[VAL_21]] step %[[VAL_4]] iter_args(%[[VAL_24:.*]] = %[[VAL_17]]) -> (tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK: %[[VAL_25:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_23]]] : memref<?xindex>
+// CHECK: %[[VAL_26:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_23]]] : memref<?xf64>
+// CHECK: %[[VAL_27:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_18]]] : memref<?xindex>
+// CHECK: %[[VAL_28:.*]] = arith.addi %[[VAL_18]], %[[VAL_4]] : index
+// CHECK: %[[VAL_29:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_28]]] : memref<?xindex>
+// CHECK: %[[VAL_30:.*]]:2 = scf.for %[[VAL_31:.*]] = %[[VAL_27]] to %[[VAL_29]] step %[[VAL_4]] iter_args(%[[VAL_32:.*]] = %[[VAL_6]], %[[VAL_33:.*]] = %[[VAL_24]]) -> (f64, tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK: %[[VAL_34:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_31]]] : memref<?xindex>
+// CHECK: %[[VAL_35:.*]] = arith.addi %[[VAL_25]], %[[VAL_7]] : index
+// CHECK: %[[VAL_36:.*]] = arith.cmpi eq, %[[VAL_34]], %[[VAL_35]] : index
+// CHECK: %[[VAL_37:.*]]:2 = scf.if %[[VAL_36]] -> (f64, tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK: %[[VAL_38:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_31]]] : memref<?xf64>
+// CHECK: %[[VAL_39:.*]] = arith.mulf %[[VAL_26]], %[[VAL_38]] : f64
+// CHECK: %[[VAL_40:.*]] = arith.addf %[[VAL_32]], %[[VAL_39]] : f64
+// CHECK: scf.yield %[[VAL_40]], %[[VAL_33]] : f64, tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK: } else {
+// CHECK: scf.yield %[[VAL_32]], %[[VAL_33]] : f64, tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK: }
+// CHECK: scf.yield %[[VAL_41:.*]]#0, %[[VAL_41]]#1 : f64, tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK: }
+// CHECK: %[[VAL_42:.*]] = sparse_tensor.insert %[[VAL_43:.*]]#0 into %[[VAL_43]]#1{{\[}}%[[VAL_16]], %[[VAL_25]]] : tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK: scf.yield %[[VAL_42]] : tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK: }
+// CHECK: scf.yield %[[VAL_44:.*]] : tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK: }
+// CHECK: %[[VAL_45:.*]] = sparse_tensor.load %[[VAL_46:.*]] hasInserts : tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK: return %[[VAL_45]] : tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>
+func.func @mul_affine_sparse2d(%arga: tensor<32x16xf64, #CSR>,
+ %argb: tensor<34x19xf64, #CSR>) -> tensor<32x16xf64, #CSR> {
+ %argx = bufferization.alloc_tensor() : tensor<32x16xf64, #CSR>
+ %0 = linalg.generic #trait3
+ ins(%arga, %argb: tensor<32x16xf64, #CSR>, tensor<34x19xf64, #CSR>)
+ outs(%argx: tensor<32x16xf64, #CSR>) {
+ ^bb(%a: f64, %b: f64, %x: f64):
+ %0 = arith.mulf %a, %b : f64
+ %1 = arith.addf %x, %0 : f64
+ linalg.yield %1 : f64
+ } -> tensor<32x16xf64, #CSR>
+ return %0 : tensor<32x16xf64, #CSR>
+}
+
#trait4 = {
indexing_maps = [
affine_map<(i,j) -> (i+2,j)>, // a
@@ -286,3 +489,4 @@ func.func @mul_const_affine_dense_dim_2d(%arga: tensor<34x16xf64, #CSR>,
} -> tensor<32x16xf64>
return %0 : tensor<32x16xf64>
}
+
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_1d_nwc_wcf.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_1d_nwc_wcf.mlir
new file mode 100644
index 0000000000000..07a2250c9a74b
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_1d_nwc_wcf.mlir
@@ -0,0 +1,128 @@
+// RUN: mlir-opt %s --sparse-compiler=enable-runtime-library=true | \
+// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
+// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
+// RUN: FileCheck %s
+
+// RUN: mlir-opt %s --sparse-compiler="enable-runtime-library=false enable-buffer-initialization=true" | \
+// RUN: mlir-cpu-runner \
+// RUN: -e entry -entry-point-result=void \
+// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
+// RUN: FileCheck %s
+
+#CCC = #sparse_tensor.encoding<{
+ dimLevelType = [ "compressed", "compressed", "compressed" ] }>
+
+#CDC = #sparse_tensor.encoding<{
+ dimLevelType = [ "compressed", "dense", "compressed" ]
+ // FIXME: Still inadmissible might need investigation
+ // dimOrdering = affine_map<(i,j,k) -> (j,k,i)>
+}>
+
+// Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f
+func.func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> tensor<?x?x?xf32> {
+ %buf = bufferization.alloc_tensor(%s1, %s2, %s3) : tensor<?x?x?xf32>
+ %ret = linalg.fill ins(%f : f32) outs(%buf : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
+ return %ret : tensor<?x?x?xf32>
+}
+
+func.func @conv_1d_nwc_wcf(%arg0: tensor<?x?x?xf32>, %arg1: tensor<?x?x?xf32>, %arg2: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
+ %ret = linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>,
+ strides = dense<1> : tensor<1xi64>}
+ ins (%arg0, %arg1: tensor<?x?x?xf32>, tensor<?x?x?xf32>)
+ outs (%arg2: tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
+ return %ret : tensor<?x?x?xf32>
+}
+
+func.func @conv_1d_nwc_wcf_CCC(%arg0: tensor<?x?x?xf32, #CCC>, %arg1: tensor<?x?x?xf32, #CCC>) -> tensor<?x?x?xf32, #CCC> {
+ %c1 = arith.constant 1 : index
+ %c3 = arith.constant 3 : index
+ %c6 = arith.constant 6 : index
+ %s = bufferization.alloc_tensor(%c3, %c6, %c1) : tensor<?x?x?xf32, #CCC>
+ %ret = linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>,
+ strides = dense<1> : tensor<1xi64>}
+ ins (%arg0, %arg1: tensor<?x?x?xf32, #CCC>, tensor<?x?x?xf32, #CCC>)
+ outs (%s: tensor<?x?x?xf32, #CCC>) -> tensor<?x?x?xf32, #CCC>
+ return %ret : tensor<?x?x?xf32, #CCC>
+}
+
+func.func @conv_1d_nwc_wcf_CDC(%arg0: tensor<?x?x?xf32, #CDC>, %arg1: tensor<?x?x?xf32, #CDC>) -> tensor<?x?x?xf32, #CDC> {
+ %c1 = arith.constant 1 : index
+ %c3 = arith.constant 3 : index
+ %c6 = arith.constant 6 : index
+ %s = bufferization.alloc_tensor(%c3, %c6, %c1) : tensor<?x?x?xf32, #CDC>
+ %ret = linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>,
+ strides = dense<1> : tensor<1xi64>}
+ ins (%arg0, %arg1: tensor<?x?x?xf32, #CDC>, tensor<?x?x?xf32, #CDC>)
+ outs (%s: tensor<?x?x?xf32, #CDC>) -> tensor<?x?x?xf32, #CDC>
+ return %ret : tensor<?x?x?xf32, #CDC>
+}
+
+func.func @entry() {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c3 = arith.constant 3 : index
+ %c6 = arith.constant 6 : index
+ %c8 = arith.constant 8 : index
+ %f10 = arith.constant 10.00000e+00 : f32
+ %val = arith.constant 2.00000e+00 : f32
+ %zero = arith.constant 0.00000e+00 : f32
+
+ %in1D_tmp = call @alloc_3d_filled_f32(%c3, %c8, %c1, %val) : (index, index, index, f32) -> (tensor<?x?x?xf32>)
+ %in1D_nwc = tensor.insert %f10 into %in1D_tmp[%c0, %c3, %c0] : tensor<?x?x?xf32>
+ %filter1D_nwc = call @alloc_3d_filled_f32(%c3, %c1, %c1, %val) : (index, index, index, f32) -> (tensor<?x?x?xf32>)
+ %out1D_nwc = call @alloc_3d_filled_f32(%c3, %c6, %c1, %zero) : (index, index, index, f32) -> (tensor<?x?x?xf32>)
+
+ %in1D_nwc_CCC = sparse_tensor.convert %in1D_nwc
+ : tensor<?x?x?xf32> to tensor<?x?x?xf32, #CCC>
+ %filter1D_nwc_CCC = sparse_tensor.convert %filter1D_nwc
+ : tensor<?x?x?xf32> to tensor<?x?x?xf32, #CCC>
+
+ %in1D_nwc_CDC = sparse_tensor.convert %in1D_nwc
+ : tensor<?x?x?xf32> to tensor<?x?x?xf32, #CDC>
+ %filter1D_nwc_CDC = sparse_tensor.convert %filter1D_nwc
+ : tensor<?x?x?xf32> to tensor<?x?x?xf32, #CDC>
+
+ %dense_ret = call @conv_1d_nwc_wcf(%in1D_nwc, %filter1D_nwc, %out1D_nwc) : (tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>)
+ %CCC_ret = call @conv_1d_nwc_wcf_CCC(%in1D_nwc_CCC, %filter1D_nwc_CCC) : (tensor<?x?x?xf32, #CCC>, tensor<?x?x?xf32, #CCC>) -> (tensor<?x?x?xf32, #CCC>)
+ %CDC_ret = call @conv_1d_nwc_wcf_CDC(%in1D_nwc_CDC, %filter1D_nwc_CDC) : (tensor<?x?x?xf32, #CDC>, tensor<?x?x?xf32, #CDC>) -> (tensor<?x?x?xf32, #CDC>)
+
+ // CHECK: ( ( ( 12 ), ( 28 ), ( 28 ), ( 28 ), ( 12 ), ( 12 ) ),
+ // CHECK-SAME: ( ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ) ),
+ // CHECK-SAME: ( ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ) ) )
+ %dense_v = vector.transfer_read %dense_ret[%c0, %c0, %c0], %zero
+ : tensor<?x?x?xf32>, vector<3x6x1xf32>
+ vector.print %dense_v : vector<3x6x1xf32>
+
+ // CHECK: ( ( ( 12 ), ( 28 ), ( 28 ), ( 28 ), ( 12 ), ( 12 ) ),
+ // CHECK-SAME: ( ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ) ),
+ // CHECK-SAME: ( ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ) ) )
+ %1 = sparse_tensor.convert %CCC_ret
+ : tensor<?x?x?xf32, #CCC> to tensor<?x?x?xf32>
+ %v1 = vector.transfer_read %1[%c0, %c0, %c0], %zero
+ : tensor<?x?x?xf32>, vector<3x6x1xf32>
+ vector.print %v1 : vector<3x6x1xf32>
+
+ // CHECK: ( ( ( 12 ), ( 28 ), ( 28 ), ( 28 ), ( 12 ), ( 12 ) ),
+ // CHECK-SAME: ( ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ) ),
+ // CHECK-SAME: ( ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ), ( 12 ) ) )
+ %2 = sparse_tensor.convert %CDC_ret
+ : tensor<?x?x?xf32, #CDC> to tensor<?x?x?xf32>
+ %v2 = vector.transfer_read %2[%c0, %c0, %c0], %zero
+ : tensor<?x?x?xf32>, vector<3x6x1xf32>
+ vector.print %v2 : vector<3x6x1xf32>
+
+ // Free the resources
+ bufferization.dealloc_tensor %in1D_nwc : tensor<?x?x?xf32>
+ bufferization.dealloc_tensor %filter1D_nwc : tensor<?x?x?xf32>
+ bufferization.dealloc_tensor %out1D_nwc : tensor<?x?x?xf32>
+
+ bufferization.dealloc_tensor %in1D_nwc_CDC : tensor<?x?x?xf32, #CDC>
+ bufferization.dealloc_tensor %filter1D_nwc_CDC : tensor<?x?x?xf32, #CDC>
+ bufferization.dealloc_tensor %in1D_nwc_CCC : tensor<?x?x?xf32, #CCC>
+ bufferization.dealloc_tensor %filter1D_nwc_CCC : tensor<?x?x?xf32, #CCC>
+
+ bufferization.dealloc_tensor %CCC_ret : tensor<?x?x?xf32, #CCC>
+ bufferization.dealloc_tensor %CDC_ret : tensor<?x?x?xf32, #CDC>
+
+ return
+}
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d.mlir
new file mode 100644
index 0000000000000..06ddef8801a19
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d.mlir
@@ -0,0 +1,209 @@
+// RUN: mlir-opt %s --sparse-compiler=enable-runtime-library=true | \
+// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
+// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
+// RUN: FileCheck %s
+
+// RUN: mlir-opt %s --sparse-compiler="enable-runtime-library=false enable-buffer-initialization=true" | \
+// RUN: mlir-cpu-runner \
+// RUN: -e entry -entry-point-result=void \
+// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
+// RUN: FileCheck %s
+
+#DCSR = #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>
+#CSR = #sparse_tensor.encoding<{dimLevelType = ["dense", "compressed"]}>
+#CSC = #sparse_tensor.encoding<{
+ dimLevelType = [ "dense", "compressed" ],
+ dimOrdering = affine_map<(i,j) -> (j,i)>
+}>
+
+// An example of a 2D convolution with a sparse filter.
+module {
+
+ func.func @conv2d(%input: tensor<8x8xi32>,
+ %filter: tensor<3x3xi32, #DCSR>,
+ %output: tensor<6x6xi32>) -> tensor<6x6xi32> {
+ %0 = linalg.conv_2d
+ ins (%input, %filter: tensor<8x8xi32>, tensor<3x3xi32, #DCSR>)
+ outs (%output: tensor<6x6xi32>) -> tensor<6x6xi32>
+ return %0 : tensor<6x6xi32>
+ }
+
+ func.func @conv2d_sparse_out(%input: tensor<8x8xi32>,
+ %filter: tensor<3x3xi32, #DCSR>) -> tensor<6x6xi32, #DCSR> {
+ %s = bufferization.alloc_tensor() : tensor<6x6xi32, #DCSR>
+ %0 = linalg.conv_2d
+ ins (%input, %filter: tensor<8x8xi32>, tensor<3x3xi32, #DCSR>)
+ outs (%s: tensor<6x6xi32, #DCSR>) -> tensor<6x6xi32, #DCSR>
+ return %0 : tensor<6x6xi32, #DCSR>
+ }
+
+ func.func @conv2d_all_sparse_DCSR(%input: tensor<8x8xi32, #DCSR>,
+ %filter: tensor<3x3xi32, #DCSR>) -> tensor<6x6xi32, #DCSR> {
+ %s = bufferization.alloc_tensor() : tensor<6x6xi32, #DCSR>
+ %0 = linalg.conv_2d
+ ins (%input, %filter: tensor<8x8xi32, #DCSR>, tensor<3x3xi32, #DCSR>)
+ outs (%s: tensor<6x6xi32, #DCSR>) -> tensor<6x6xi32, #DCSR>
+ return %0 : tensor<6x6xi32, #DCSR>
+ }
+
+ func.func @conv2d_all_sparse_CSR(%input: tensor<8x8xi32, #CSR>,
+ %filter: tensor<3x3xi32, #CSR>) -> tensor<6x6xi32, #CSR> {
+ %s = bufferization.alloc_tensor() : tensor<6x6xi32, #CSR>
+ %0 = linalg.conv_2d
+ ins (%input, %filter: tensor<8x8xi32, #CSR>, tensor<3x3xi32, #CSR>)
+ outs (%s: tensor<6x6xi32, #CSR>) -> tensor<6x6xi32, #CSR>
+ return %0 : tensor<6x6xi32, #CSR>
+ }
+
+ func.func @conv2d_all_sparse_CSC(%input: tensor<8x8xi32, #CSC>,
+ %filter: tensor<3x3xi32, #CSC>) -> tensor<6x6xi32, #CSC> {
+ %s = bufferization.alloc_tensor() : tensor<6x6xi32, #CSC>
+ %0 = linalg.conv_2d
+ ins (%input, %filter: tensor<8x8xi32, #CSC>, tensor<3x3xi32, #CSC>)
+ outs (%s: tensor<6x6xi32, #CSC>) -> tensor<6x6xi32, #CSC>
+ return %0 : tensor<6x6xi32, #CSC>
+ }
+
+ func.func @entry() {
+ %c0 = arith.constant 0 : index
+ %i0 = arith.constant 0 : i32
+
+ // A typical edge detection filter.
+ %filter = arith.constant dense<[
+ [ 1, 0, -1 ],
+ [ 0, 0, 0 ],
+ [ -1, 0, 1 ]
+ ]> : tensor<3x3xi32>
+ %sparse_filter_DCSR = sparse_tensor.convert %filter
+ : tensor<3x3xi32> to tensor<3x3xi32, #DCSR>
+ %sparse_filter_CSR = sparse_tensor.convert %filter
+ : tensor<3x3xi32> to tensor<3x3xi32, #CSR>
+ %sparse_filter_CSC = sparse_tensor.convert %filter
+ : tensor<3x3xi32> to tensor<3x3xi32, #CSC>
+
+
+ %input = arith.constant dense<[
+ [ 1, 2, 3, 4, 0, 6, 7, 8 ],
+ [ 2, 2, 4, 4, 0, 0, 6, 8 ],
+ [ 2, 2, 4, 4, 0, 0, 6, 8 ],
+ [ 2, 2, 3, 4, 0, 0, 7, 8 ],
+ [ 1, 3, 3, 4, 0, 0, 6, 8 ],
+ [ 3, 2, 3, 4, 0, 0, 7, 8 ],
+ [ 1, 3, 3, 4, 3, 6, 6, 8 ],
+ [ 1, 3, 3, 4, 3, 0, 7, 8 ]
+ ]> : tensor<8x8xi32>
+ %sparse_input_DCSR = sparse_tensor.convert %input
+ : tensor<8x8xi32> to tensor<8x8xi32, #DCSR>
+ %sparse_input_CSR = sparse_tensor.convert %input
+ : tensor<8x8xi32> to tensor<8x8xi32, #CSR>
+ %sparse_input_CSC = sparse_tensor.convert %input
+ : tensor<8x8xi32> to tensor<8x8xi32, #CSC>
+
+ // Call the kernel.
+ %output = arith.constant dense<0> : tensor<6x6xi32>
+ %0 = call @conv2d(%input, %sparse_filter_DCSR, %output)
+ : (tensor<8x8xi32>,
+ tensor<3x3xi32, #DCSR>, tensor<6x6xi32>) -> tensor<6x6xi32>
+ %1 = call @conv2d_sparse_out(%input, %sparse_filter_DCSR)
+ : (tensor<8x8xi32>,
+ tensor<3x3xi32, #DCSR>) -> tensor<6x6xi32, #DCSR>
+ %2 = call @conv2d_all_sparse_DCSR(%sparse_input_DCSR, %sparse_filter_DCSR)
+ : (tensor<8x8xi32, #DCSR>,
+ tensor<3x3xi32, #DCSR>) -> tensor<6x6xi32, #DCSR>
+ %3 = call @conv2d_all_sparse_CSR(%sparse_input_CSR, %sparse_filter_CSR)
+ : (tensor<8x8xi32, #CSR>,
+ tensor<3x3xi32, #CSR>) -> tensor<6x6xi32, #CSR>
+ %4 = call @conv2d_all_sparse_CSC(%sparse_input_CSC, %sparse_filter_CSC)
+ : (tensor<8x8xi32, #CSC>,
+ tensor<3x3xi32, #CSC>) -> tensor<6x6xi32, #CSC>
+
+
+ // Verify the output.
+ //
+ // CHECK: ( ( 0, 0, -1, -6, -1, 6 ),
+ // CHECK-SAME: ( -1, 0, 1, 0, 1, 0 ),
+ // CHECK-SAME: ( 0, -1, 1, 0, 0, 0 ),
+ // CHECK-SAME: ( -1, 0, 0, 0, 0, 0 ),
+ // CHECK-SAME: ( 0, 0, 3, 6, -3, -6 ),
+ // CHECK-SAME: ( 2, -1, 3, 0, -3, 0 ) )
+ //
+ %v = vector.transfer_read %0[%c0, %c0], %i0
+ : tensor<6x6xi32>, vector<6x6xi32>
+ vector.print %v : vector<6x6xi32>
+
+ //
+ // Should be the same as dense output
+ // CHECK: ( ( 0, 0, -1, -6, -1, 6 ),
+ // CHECK-SAME: ( -1, 0, 1, 0, 1, 0 ),
+ // CHECK-SAME: ( 0, -1, 1, 0, 0, 0 ),
+ // CHECK-SAME: ( -1, 0, 0, 0, 0, 0 ),
+ // CHECK-SAME: ( 0, 0, 3, 6, -3, -6 ),
+ // CHECK-SAME: ( 2, -1, 3, 0, -3, 0 ) )
+ //
+ %sparse_ret = sparse_tensor.convert %1
+ : tensor<6x6xi32, #DCSR> to tensor<6x6xi32>
+ %v1 = vector.transfer_read %sparse_ret[%c0, %c0], %i0
+ : tensor<6x6xi32>, vector<6x6xi32>
+ vector.print %v1 : vector<6x6xi32>
+
+ //
+ // Should be the same as dense output
+ // CHECK: ( ( 0, 0, -1, -6, -1, 6 ),
+ // CHECK-SAME: ( -1, 0, 1, 0, 1, 0 ),
+ // CHECK-SAME: ( 0, -1, 1, 0, 0, 0 ),
+ // CHECK-SAME: ( -1, 0, 0, 0, 0, 0 ),
+ // CHECK-SAME: ( 0, 0, 3, 6, -3, -6 ),
+ // CHECK-SAME: ( 2, -1, 3, 0, -3, 0 ) )
+ //
+ %all_sparse_DCSR = sparse_tensor.convert %2
+ : tensor<6x6xi32, #DCSR> to tensor<6x6xi32>
+ %v2 = vector.transfer_read %all_sparse_DCSR[%c0, %c0], %i0
+ : tensor<6x6xi32>, vector<6x6xi32>
+ vector.print %v2 : vector<6x6xi32>
+
+ //
+ // Should be the same as dense output
+ // CHECK: ( ( 0, 0, -1, -6, -1, 6 ),
+ // CHECK-SAME: ( -1, 0, 1, 0, 1, 0 ),
+ // CHECK-SAME: ( 0, -1, 1, 0, 0, 0 ),
+ // CHECK-SAME: ( -1, 0, 0, 0, 0, 0 ),
+ // CHECK-SAME: ( 0, 0, 3, 6, -3, -6 ),
+ // CHECK-SAME: ( 2, -1, 3, 0, -3, 0 ) )
+ //
+ %all_sparse_CSR = sparse_tensor.convert %3
+ : tensor<6x6xi32, #CSR> to tensor<6x6xi32>
+ %v3 = vector.transfer_read %all_sparse_CSR[%c0, %c0], %i0
+ : tensor<6x6xi32>, vector<6x6xi32>
+ vector.print %v3 : vector<6x6xi32>
+
+ //
+ // Should be the same as dense output
+ // CHECK: ( ( 0, 0, -1, -6, -1, 6 ),
+ // CHECK-SAME: ( -1, 0, 1, 0, 1, 0 ),
+ // CHECK-SAME: ( 0, -1, 1, 0, 0, 0 ),
+ // CHECK-SAME: ( -1, 0, 0, 0, 0, 0 ),
+ // CHECK-SAME: ( 0, 0, 3, 6, -3, -6 ),
+ // CHECK-SAME: ( 2, -1, 3, 0, -3, 0 ) )
+ //
+ %all_sparse_CSC = sparse_tensor.convert %4
+ : tensor<6x6xi32, #CSC> to tensor<6x6xi32>
+ %v4 = vector.transfer_read %all_sparse_CSC[%c0, %c0], %i0
+ : tensor<6x6xi32>, vector<6x6xi32>
+ vector.print %v4 : vector<6x6xi32>
+
+ // Release the resources.
+ bufferization.dealloc_tensor %sparse_filter_DCSR : tensor<3x3xi32, #DCSR>
+ bufferization.dealloc_tensor %sparse_filter_CSR : tensor<3x3xi32, #CSR>
+ bufferization.dealloc_tensor %sparse_filter_CSC : tensor<3x3xi32, #CSC>
+
+ bufferization.dealloc_tensor %sparse_input_DCSR : tensor<8x8xi32, #DCSR>
+ bufferization.dealloc_tensor %sparse_input_CSR : tensor<8x8xi32, #CSR>
+ bufferization.dealloc_tensor %sparse_input_CSC : tensor<8x8xi32, #CSC>
+
+ bufferization.dealloc_tensor %1 : tensor<6x6xi32, #DCSR>
+ bufferization.dealloc_tensor %2 : tensor<6x6xi32, #DCSR>
+ bufferization.dealloc_tensor %3 : tensor<6x6xi32, #CSR>
+ bufferization.dealloc_tensor %4 : tensor<6x6xi32, #CSC>
+ return
+ }
+}
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_nhwc_hwcf.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_nhwc_hwcf.mlir
new file mode 100644
index 0000000000000..6926a5a9e07d6
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_nhwc_hwcf.mlir
@@ -0,0 +1,172 @@
+// RUN: mlir-opt %s --sparse-compiler=enable-runtime-library=true | \
+// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
+// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
+// RUN: FileCheck %s
+
+// RUN: mlir-opt %s --sparse-compiler="enable-runtime-library=false enable-buffer-initialization=true" | \
+// RUN: mlir-cpu-runner \
+// RUN: -e entry -entry-point-result=void \
+// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
+// RUN: FileCheck %s
+
+#CCCC = #sparse_tensor.encoding<{
+ dimLevelType = [ "compressed", "compressed", "compressed", "compressed" ]
+}>
+
+#CDCD = #sparse_tensor.encoding<{
+ dimLevelType = [ "compressed", "dense", "compressed", "dense" ]
+}>
+
+// Creates and returns 4-D buffer of size (%s1, %s2, %s3, %s4) filled with the value %f
+func.func @alloc_4d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %f : f32) -> tensor<?x?x?x?xf32> {
+ %buf = bufferization.alloc_tensor(%s1, %s2, %s3, %s4) : tensor<?x?x?x?xf32>
+ %ret = linalg.fill ins(%f : f32) outs(%buf : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
+ return %ret : tensor<?x?x?x?xf32>
+}
+
+func.func @conv_2d_nhwc_hwcf(%arg0: tensor<?x?x?x?xf32>, %arg1: tensor<?x?x?x?xf32>, %arg2: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> {
+ %ret = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>,
+ strides = dense<1> : tensor<2xi64>}
+ ins (%arg0, %arg1: tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>)
+ outs (%arg2: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
+ return %ret : tensor<?x?x?x?xf32>
+}
+
+func.func @conv_2d_nhwc_hwcf_CCCC(%arg0: tensor<?x?x?x?xf32, #CCCC>, %arg1: tensor<?x?x?x?xf32, #CCCC>) -> tensor<?x?x?x?xf32, #CCCC> {
+ %c1 = arith.constant 1 : index
+ %c3 = arith.constant 3 : index
+ %c6 = arith.constant 6 : index
+ %s = bufferization.alloc_tensor(%c3, %c6, %c6, %c1) : tensor<?x?x?x?xf32, #CCCC>
+ %ret = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>,
+ strides = dense<1> : tensor<2xi64>}
+ ins (%arg0, %arg1: tensor<?x?x?x?xf32, #CCCC>, tensor<?x?x?x?xf32, #CCCC>)
+ outs (%s: tensor<?x?x?x?xf32, #CCCC>) -> tensor<?x?x?x?xf32, #CCCC>
+ return %ret : tensor<?x?x?x?xf32, #CCCC>
+}
+
+func.func @conv_2d_nhwc_hwcf_CDCD(%arg0: tensor<?x?x?x?xf32, #CDCD>, %arg1: tensor<?x?x?x?xf32, #CDCD>) -> tensor<?x?x?x?xf32, #CDCD> {
+ %c1 = arith.constant 1 : index
+ %c3 = arith.constant 3 : index
+ %c6 = arith.constant 6 : index
+ %s = bufferization.alloc_tensor(%c3, %c6, %c6, %c1) : tensor<?x?x?x?xf32, #CDCD>
+ %ret = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>,
+ strides = dense<1> : tensor<2xi64>}
+ ins (%arg0, %arg1: tensor<?x?x?x?xf32, #CDCD>, tensor<?x?x?x?xf32, #CDCD>)
+ outs (%s: tensor<?x?x?x?xf32, #CDCD>) -> tensor<?x?x?x?xf32, #CDCD>
+ return %ret : tensor<?x?x?x?xf32, #CDCD>
+}
+
+func.func @entry() {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c3 = arith.constant 3 : index
+ %c6 = arith.constant 6 : index
+ %c8 = arith.constant 8 : index
+ %f10 = arith.constant 10.00000e+00 : f32
+ %val = arith.constant 2.00000e+00 : f32
+ %zero = arith.constant 0.00000e+00 : f32
+
+ %filter2D_nhwc = call @alloc_4d_filled_f32(%c3, %c3, %c3, %c1, %val) :(index, index, index, index, f32) -> (tensor<?x?x?x?xf32>)
+ %in2D_tmp = call @alloc_4d_filled_f32(%c3, %c8, %c8, %c3, %val) : (index, index, index, index, f32) -> (tensor<?x?x?x?xf32>)
+ %in2D_nhwc = tensor.insert %f10 into %in2D_tmp[%c0, %c0, %c3, %c0] : tensor<?x?x?x?xf32>
+ %out2D_nhwc = call @alloc_4d_filled_f32(%c3, %c6, %c6, %c1, %zero) : (index, index, index, index, f32) -> (tensor<?x?x?x?xf32>)
+
+ %in2D_nhwc_CCCC = sparse_tensor.convert %in2D_nhwc
+ : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32, #CCCC>
+ %filter2D_nhwc_CCCC = sparse_tensor.convert %filter2D_nhwc
+ : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32, #CCCC>
+
+ %in2D_nhwc_CDCD = sparse_tensor.convert %in2D_nhwc
+ : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32, #CDCD>
+ %filter2D_nhwc_CDCD = sparse_tensor.convert %filter2D_nhwc
+ : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32, #CDCD>
+
+ %dense_ret = call @conv_2d_nhwc_hwcf(%in2D_nhwc, %filter2D_nhwc, %out2D_nhwc) : (tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>) -> (tensor<?x?x?x?xf32>)
+ %CCCC_ret = call @conv_2d_nhwc_hwcf_CCCC(%in2D_nhwc_CCCC, %filter2D_nhwc_CCCC) : (tensor<?x?x?x?xf32, #CCCC>, tensor<?x?x?x?xf32, #CCCC>) -> (tensor<?x?x?x?xf32, #CCCC>)
+ %CDCD_ret = call @conv_2d_nhwc_hwcf_CDCD(%in2D_nhwc_CDCD, %filter2D_nhwc_CDCD) : (tensor<?x?x?x?xf32, #CDCD>, tensor<?x?x?x?xf32, #CDCD>) -> (tensor<?x?x?x?xf32, #CDCD>)
+
+ // CHECK: ( ( ( ( 108 ), ( 124 ), ( 124 ), ( 124 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
+ // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
+ // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ) )
+ %dense_v = vector.transfer_read %dense_ret[%c0, %c0, %c0, %c0], %zero
+ : tensor<?x?x?x?xf32>, vector<3x6x6x1xf32>
+ vector.print %dense_v : vector<3x6x6x1xf32>
+
+ // CHECK: ( ( ( ( 108 ), ( 124 ), ( 124 ), ( 124 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
+ // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
+ // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ) )
+ %1 = sparse_tensor.convert %CCCC_ret
+ : tensor<?x?x?x?xf32, #CCCC> to tensor<?x?x?x?xf32>
+ %v1 = vector.transfer_read %1[%c0, %c0, %c0, %c0], %zero
+ : tensor<?x?x?x?xf32>, vector<3x6x6x1xf32>
+ vector.print %v1 : vector<3x6x6x1xf32>
+
+ // CHECK: ( ( ( ( 108 ), ( 124 ), ( 124 ), ( 124 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
+ // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
+ // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ) )
+ %2 = sparse_tensor.convert %CDCD_ret
+ : tensor<?x?x?x?xf32, #CDCD> to tensor<?x?x?x?xf32>
+ %v2 = vector.transfer_read %2[%c0, %c0, %c0, %c0], %zero
+ : tensor<?x?x?x?xf32>, vector<3x6x6x1xf32>
+ vector.print %v2 : vector<3x6x6x1xf32>
+
+ // Free the resources
+ bufferization.dealloc_tensor %in2D_nhwc : tensor<?x?x?x?xf32>
+ bufferization.dealloc_tensor %filter2D_nhwc : tensor<?x?x?x?xf32>
+ bufferization.dealloc_tensor %out2D_nhwc : tensor<?x?x?x?xf32>
+
+ bufferization.dealloc_tensor %in2D_nhwc_CDCD : tensor<?x?x?x?xf32, #CDCD>
+ bufferization.dealloc_tensor %filter2D_nhwc_CDCD : tensor<?x?x?x?xf32, #CDCD>
+ bufferization.dealloc_tensor %in2D_nhwc_CCCC : tensor<?x?x?x?xf32, #CCCC>
+ bufferization.dealloc_tensor %filter2D_nhwc_CCCC : tensor<?x?x?x?xf32, #CCCC>
+
+ bufferization.dealloc_tensor %CCCC_ret : tensor<?x?x?x?xf32, #CCCC>
+ bufferization.dealloc_tensor %CDCD_ret : tensor<?x?x?x?xf32, #CDCD>
+
+ return
+}
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d.mlir
new file mode 100644
index 0000000000000..13700dfb0a19a
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d.mlir
@@ -0,0 +1,219 @@
+// RUN: mlir-opt %s --sparse-compiler=enable-runtime-library=true | \
+// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
+// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
+// RUN: FileCheck %s
+
+// RUN: mlir-opt %s --sparse-compiler="enable-runtime-library=false enable-buffer-initialization=true" | \
+// RUN: mlir-cpu-runner \
+// RUN: -e entry -entry-point-result=void \
+// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
+// RUN: FileCheck %s
+
+#CCC = #sparse_tensor.encoding<{
+ dimLevelType = [ "compressed", "compressed", "compressed" ]
+}>
+
+#CDC = #sparse_tensor.encoding<{
+ dimLevelType = [ "compressed", "dense", "compressed" ]
+}>
+
+// Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f
+func.func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> tensor<?x?x?xf32> {
+ %buf = bufferization.alloc_tensor(%s1, %s2, %s3) : tensor<?x?x?xf32>
+ %ret = linalg.fill ins(%f : f32) outs(%buf : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
+ return %ret : tensor<?x?x?xf32>
+}
+
+func.func @conv_3d(%arg0: tensor<?x?x?xf32>, %arg1: tensor<?x?x?xf32>, %arg2: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
+ %ret = linalg.conv_3d
+ ins (%arg0, %arg1: tensor<?x?x?xf32>, tensor<?x?x?xf32>)
+ outs (%arg2: tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
+ return %ret : tensor<?x?x?xf32>
+}
+
+func.func @conv_3d_CCC(%arg0: tensor<?x?x?xf32, #CCC>, %arg1: tensor<?x?x?xf32, #CCC>) -> tensor<?x?x?xf32, #CCC> {
+ %c6 = arith.constant 6 : index
+ %s = bufferization.alloc_tensor(%c6, %c6, %c6) : tensor<?x?x?xf32, #CCC>
+ %ret = linalg.conv_3d
+ ins (%arg0, %arg1: tensor<?x?x?xf32, #CCC>, tensor<?x?x?xf32, #CCC>)
+ outs (%s: tensor<?x?x?xf32, #CCC>) -> tensor<?x?x?xf32, #CCC>
+ return %ret : tensor<?x?x?xf32, #CCC>
+}
+
+func.func @conv_3d_CDC(%arg0: tensor<?x?x?xf32, #CDC>, %arg1: tensor<?x?x?xf32, #CDC>) -> tensor<?x?x?xf32, #CDC> {
+ %c6 = arith.constant 6 : index
+ %s = bufferization.alloc_tensor(%c6, %c6, %c6) : tensor<?x?x?xf32, #CDC>
+ %ret = linalg.conv_3d
+ ins (%arg0, %arg1: tensor<?x?x?xf32, #CDC>, tensor<?x?x?xf32, #CDC>)
+ outs (%s: tensor<?x?x?xf32, #CDC>) -> tensor<?x?x?xf32, #CDC>
+ return %ret : tensor<?x?x?xf32, #CDC>
+}
+
+func.func @entry() {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c3 = arith.constant 3 : index
+ %c6 = arith.constant 6 : index
+ %c8 = arith.constant 8 : index
+ %f10 = arith.constant 10.00000e+00 : f32
+ %val = arith.constant 2.00000e+00 : f32
+ %zero = arith.constant 0.00000e+00 : f32
+
+ %filter3D = call @alloc_3d_filled_f32(%c3, %c3, %c3, %val) : (index, index, index, f32) -> (tensor<?x?x?xf32>)
+ %in3D_tmp = call @alloc_3d_filled_f32(%c8, %c8, %c8, %val) : (index, index, index, f32) -> (tensor<?x?x?xf32>)
+ %in3D = tensor.insert %f10 into %in3D_tmp[%c0, %c3, %c0] : tensor<?x?x?xf32>
+ %out3D = call @alloc_3d_filled_f32(%c6, %c6, %c6, %zero) : (index, index, index, f32) -> (tensor<?x?x?xf32>)
+
+ %in3D_CCC = sparse_tensor.convert %in3D
+ : tensor<?x?x?xf32> to tensor<?x?x?xf32, #CCC>
+ %filter3D_CCC = sparse_tensor.convert %filter3D
+ : tensor<?x?x?xf32> to tensor<?x?x?xf32, #CCC>
+
+ %in3D_CDC = sparse_tensor.convert %in3D
+ : tensor<?x?x?xf32> to tensor<?x?x?xf32, #CDC>
+ %filter3D_CDC = sparse_tensor.convert %filter3D
+ : tensor<?x?x?xf32> to tensor<?x?x?xf32, #CDC>
+
+ %dense_ret = call @conv_3d(%in3D, %filter3D, %out3D) : (tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>) -> (tensor<?x?x?xf32>)
+ %CCC_ret = call @conv_3d_CCC(%in3D_CCC, %filter3D_CCC) : (tensor<?x?x?xf32, #CCC>, tensor<?x?x?xf32, #CCC>) -> (tensor<?x?x?xf32, #CCC>)
+ %CDC_ret = call @conv_3d_CDC(%in3D_CDC, %filter3D_CDC) : (tensor<?x?x?xf32, #CDC>, tensor<?x?x?xf32, #CDC>) -> (tensor<?x?x?xf32, #CDC>)
+
+ // CHECK:( ( ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 124, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 124, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 124, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ),
+ // CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ),
+ // CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ),
+ // CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ),
+ // CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ),
+ // CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ) )
+ %dense_v = vector.transfer_read %dense_ret[%c0, %c0, %c0], %zero
+ : tensor<?x?x?xf32>, vector<6x6x6xf32>
+ vector.print %dense_v : vector<6x6x6xf32>
+
+ // CHECK-NEXT:( ( ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 124, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 124, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 124, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ),
+ // CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ),
+ // CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ),
+ // CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ),
+ // CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ),
+ // CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ) )
+ %1 = sparse_tensor.convert %CCC_ret
+ : tensor<?x?x?xf32, #CCC> to tensor<?x?x?xf32>
+ %v1 = vector.transfer_read %1[%c0, %c0, %c0], %zero
+ : tensor<?x?x?xf32>, vector<6x6x6xf32>
+ vector.print %v1 : vector<6x6x6xf32>
+
+ // CHECK-NEXT:( ( ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 124, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 124, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 124, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ),
+ // CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ),
+ // CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ),
+ // CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ),
+ // CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ),
+ // CHECK-SAME: ( ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ),
+ // CHECK-SAME: ( 108, 108, 108, 108, 108, 108 ) ) )
+ %2 = sparse_tensor.convert %CCC_ret
+ : tensor<?x?x?xf32, #CCC> to tensor<?x?x?xf32>
+ %v2 = vector.transfer_read %2[%c0, %c0, %c0], %zero
+ : tensor<?x?x?xf32>, vector<6x6x6xf32>
+ vector.print %v2 : vector<6x6x6xf32>
+
+ // Free the resources
+ bufferization.dealloc_tensor %in3D : tensor<?x?x?xf32>
+ bufferization.dealloc_tensor %filter3D : tensor<?x?x?xf32>
+ bufferization.dealloc_tensor %out3D : tensor<?x?x?xf32>
+
+ bufferization.dealloc_tensor %in3D_CDC : tensor<?x?x?xf32, #CDC>
+ bufferization.dealloc_tensor %filter3D_CDC : tensor<?x?x?xf32, #CDC>
+ bufferization.dealloc_tensor %in3D_CCC : tensor<?x?x?xf32, #CCC>
+ bufferization.dealloc_tensor %filter3D_CCC : tensor<?x?x?xf32, #CCC>
+
+ bufferization.dealloc_tensor %CCC_ret : tensor<?x?x?xf32, #CCC>
+ bufferization.dealloc_tensor %CDC_ret : tensor<?x?x?xf32, #CDC>
+
+ return
+}
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d_ndhwc_dhwcf.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d_ndhwc_dhwcf.mlir
new file mode 100644
index 0000000000000..872e0ad494a65
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d_ndhwc_dhwcf.mlir
@@ -0,0 +1,239 @@
+// RUN: mlir-opt %s --sparse-compiler=enable-runtime-library=true | \
+// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
+// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
+// RUN: FileCheck %s
+
+// RUN: mlir-opt %s --sparse-compiler="enable-runtime-library=false enable-buffer-initialization=true" | \
+// RUN: mlir-cpu-runner \
+// RUN: -e entry -entry-point-result=void \
+// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
+// RUN: FileCheck %s
+
+#CCCCC = #sparse_tensor.encoding<{
+ dimLevelType = [ "compressed", "compressed", "compressed", "compressed", "compressed" ]
+}>
+
+#CDCDC = #sparse_tensor.encoding<{
+ dimLevelType = [ "compressed", "dense", "compressed", "dense", "compressed"]
+}>
+
+// Creates and returns 5-D buffer of size (%s1, %s2, %s3, %s4, %s5) filled with the value %f
+func.func @alloc_5d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %s5 : index, %f : f32) -> tensor<?x?x?x?x?xf32> {
+ %buf = bufferization.alloc_tensor(%s1, %s2, %s3, %s4, %s5) : tensor<?x?x?x?x?xf32>
+ %ret = linalg.fill ins(%f : f32) outs(%buf : tensor<?x?x?x?x?xf32>) -> tensor<?x?x?x?x?xf32>
+ return %ret : tensor<?x?x?x?x?xf32>
+}
+
+func.func @conv_3d_ndhwc_dhwcf(%arg0: tensor<?x?x?x?x?xf32>,
+ %arg1: tensor<?x?x?x?x?xf32>,
+ %arg2: tensor<?x?x?x?x?xf32>) -> tensor<?x?x?x?x?xf32> {
+ %ret = linalg.conv_3d_ndhwc_dhwcf {dilations = dense<1> : tensor<3xi64>,
+ strides = dense<1> : tensor<3xi64>}
+ ins (%arg0, %arg1: tensor<?x?x?x?x?xf32>, tensor<?x?x?x?x?xf32>)
+ outs (%arg2: tensor<?x?x?x?x?xf32>) -> tensor<?x?x?x?x?xf32>
+ return %ret : tensor<?x?x?x?x?xf32>
+}
+
+func.func @conv_3d_ndhwc_dhwcf_CCCCC(%arg0: tensor<?x?x?x?x?xf32, #CCCCC>,
+ %arg1: tensor<?x?x?x?x?xf32, #CCCCC>)
+ -> tensor<?x?x?x?x?xf32, #CCCCC> {
+ %c1 = arith.constant 1 : index
+ %c6 = arith.constant 6 : index
+ %s = bufferization.alloc_tensor(%c1, %c6, %c6, %c6, %c1)
+ : tensor<?x?x?x?x?xf32, #CCCCC>
+ %ret = linalg.conv_3d_ndhwc_dhwcf {dilations = dense<1> : tensor<3xi64>,
+ strides = dense<1> : tensor<3xi64>}
+ ins (%arg0, %arg1: tensor<?x?x?x?x?xf32, #CCCCC>, tensor<?x?x?x?x?xf32, #CCCCC>)
+ outs (%s: tensor<?x?x?x?x?xf32, #CCCCC>) -> tensor<?x?x?x?x?xf32, #CCCCC>
+ return %ret : tensor<?x?x?x?x?xf32, #CCCCC>
+}
+
+func.func @conv_3d_ndhwc_dhwcf_CDCDC(%arg0: tensor<?x?x?x?x?xf32, #CDCDC>,
+ %arg1: tensor<?x?x?x?x?xf32, #CDCDC>)
+ -> tensor<?x?x?x?x?xf32, #CDCDC> {
+ %c1 = arith.constant 1 : index
+ %c6 = arith.constant 6 : index
+ %s = bufferization.alloc_tensor(%c1, %c6, %c6, %c6, %c1)
+ : tensor<?x?x?x?x?xf32, #CDCDC>
+ %ret = linalg.conv_3d_ndhwc_dhwcf {dilations = dense<1> : tensor<3xi64>,
+ strides = dense<1> : tensor<3xi64>}
+ ins (%arg0, %arg1: tensor<?x?x?x?x?xf32, #CDCDC>, tensor<?x?x?x?x?xf32, #CDCDC>)
+ outs (%s: tensor<?x?x?x?x?xf32, #CDCDC>) -> tensor<?x?x?x?x?xf32, #CDCDC>
+ return %ret : tensor<?x?x?x?x?xf32, #CDCDC>
+}
+
+func.func @entry() {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c3 = arith.constant 3 : index
+ %c6 = arith.constant 6 : index
+ %c8 = arith.constant 8 : index
+ %f10 = arith.constant 10.00000e+00 : f32
+ %val = arith.constant 2.00000e+00 : f32
+ %zero = arith.constant 0.00000e+00 : f32
+
+ %in3D_tmp = call @alloc_5d_filled_f32(%c1, %c8, %c8, %c8, %c1, %val) : (index, index, index, index, index, f32) -> (tensor<?x?x?x?x?xf32>)
+ %in3D_ndhwc = tensor.insert %f10 into %in3D_tmp[%c0, %c0, %c0, %c3, %c0] : tensor<?x?x?x?x?xf32>
+
+ %filter3D_ndhwc = call @alloc_5d_filled_f32(%c3, %c3, %c3, %c1, %c1, %val) : (index, index, index, index, index, f32) -> (tensor<?x?x?x?x?xf32>)
+ %out3D_ndhwc = call @alloc_5d_filled_f32(%c1, %c6, %c6, %c6, %c1, %zero) : (index, index, index, index, index, f32) -> (tensor<?x?x?x?x?xf32>)
+
+ %in3D_ndhwc_CCCCC = sparse_tensor.convert %in3D_ndhwc
+ : tensor<?x?x?x?x?xf32> to tensor<?x?x?x?x?xf32, #CCCCC>
+ %filter3D_ndhwc_CCCCC = sparse_tensor.convert %filter3D_ndhwc
+ : tensor<?x?x?x?x?xf32> to tensor<?x?x?x?x?xf32, #CCCCC>
+
+ %in3D_ndhwc_CDCDC = sparse_tensor.convert %in3D_ndhwc
+ : tensor<?x?x?x?x?xf32> to tensor<?x?x?x?x?xf32, #CDCDC>
+ %filter3D_ndhwc_CDCDC = sparse_tensor.convert %filter3D_ndhwc
+ : tensor<?x?x?x?x?xf32> to tensor<?x?x?x?x?xf32, #CDCDC>
+
+ // CHECK:( ( ( ( ( 108 ), ( 124 ), ( 124 ), ( 124 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
+ // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
+ // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
+ // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
+ // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
+ // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ) ) )
+ %dense_ret = call @conv_3d_ndhwc_dhwcf(%in3D_ndhwc, %filter3D_ndhwc, %out3D_ndhwc)
+ : (tensor<?x?x?x?x?xf32>, tensor<?x?x?x?x?xf32>, tensor<?x?x?x?x?xf32>) -> (tensor<?x?x?x?x?xf32>)
+ %dense_v = vector.transfer_read %dense_ret[%c0, %c0, %c0, %c0, %c0], %zero
+ : tensor<?x?x?x?x?xf32>, vector<1x6x6x6x1xf32>
+ vector.print %dense_v : vector<1x6x6x6x1xf32>
+
+ %CCCCC_ret = call @conv_3d_ndhwc_dhwcf_CCCCC(%in3D_ndhwc_CCCCC, %filter3D_ndhwc_CCCCC)
+ : (tensor<?x?x?x?x?xf32, #CCCCC>,
+ tensor<?x?x?x?x?xf32, #CCCCC>) -> (tensor<?x?x?x?x?xf32, #CCCCC>)
+
+ // CHECK-NEXT:( ( ( ( ( 108 ), ( 124 ), ( 124 ), ( 124 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
+ // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
+ // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
+ // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
+ // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
+ // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ) ) )
+ %1 = sparse_tensor.convert %CCCCC_ret
+ : tensor<?x?x?x?x?xf32, #CCCCC> to tensor<?x?x?x?x?xf32>
+ %v1 = vector.transfer_read %1[%c0, %c0, %c0, %c0, %c0], %zero
+ : tensor<?x?x?x?x?xf32>, vector<1x6x6x6x1xf32>
+ vector.print %v1 : vector<1x6x6x6x1xf32>
+
+ %CDCDC_ret = call @conv_3d_ndhwc_dhwcf_CDCDC(%in3D_ndhwc_CDCDC, %filter3D_ndhwc_CDCDC)
+ : (tensor<?x?x?x?x?xf32, #CDCDC>,
+ tensor<?x?x?x?x?xf32, #CDCDC>) -> (tensor<?x?x?x?x?xf32, #CDCDC>)
+
+ // CHECK-NEXT:( ( ( ( ( 108 ), ( 124 ), ( 124 ), ( 124 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
+ // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
+ // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
+ // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
+ // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ),
+ // CHECK-SAME: ( ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ),
+ // CHECK-SAME: ( ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ), ( 108 ) ) ) ) )
+ %2 = sparse_tensor.convert %CDCDC_ret
+ : tensor<?x?x?x?x?xf32, #CDCDC> to tensor<?x?x?x?x?xf32>
+ %v2 = vector.transfer_read %dense_ret[%c0, %c0, %c0, %c0, %c0], %zero
+ : tensor<?x?x?x?x?xf32>, vector<1x6x6x6x1xf32>
+ vector.print %v2 : vector<1x6x6x6x1xf32>
+
+ // Free the resources
+ bufferization.dealloc_tensor %in3D_ndhwc : tensor<?x?x?x?x?xf32>
+ bufferization.dealloc_tensor %filter3D_ndhwc : tensor<?x?x?x?x?xf32>
+ bufferization.dealloc_tensor %out3D_ndhwc : tensor<?x?x?x?x?xf32>
+
+ bufferization.dealloc_tensor %in3D_ndhwc_CDCDC : tensor<?x?x?x?x?xf32, #CDCDC>
+ bufferization.dealloc_tensor %filter3D_ndhwc_CDCDC : tensor<?x?x?x?x?xf32, #CDCDC>
+ bufferization.dealloc_tensor %in3D_ndhwc_CCCCC : tensor<?x?x?x?x?xf32, #CCCCC>
+ bufferization.dealloc_tensor %filter3D_ndhwc_CCCCC : tensor<?x?x?x?x?xf32, #CCCCC>
+
+ bufferization.dealloc_tensor %CCCCC_ret : tensor<?x?x?x?x?xf32, #CCCCC>
+ bufferization.dealloc_tensor %CDCDC_ret : tensor<?x?x?x?x?xf32, #CDCDC>
+
+ return
+}
More information about the Mlir-commits
mailing list