[Mlir-commits] [mlir] fad84c3 - [mlir][sparse] Support sparse2sparse collapse for dynamic sizes

Tue Sep 27 18:41:12 PDT 2022

Author: Anlun Xu
Date: 2022-09-27T18:40:59-07:00
New Revision: fad84c3dbe85f3dd4a74ece6deab4306c813248e

URL: https://github.com/llvm/llvm-project/commit/fad84c3dbe85f3dd4a74ece6deab4306c813248e
DIFF: https://github.com/llvm/llvm-project/commit/fad84c3dbe85f3dd4a74ece6deab4306c813248e.diff

LOG: [mlir][sparse] Support sparse2sparse collapse for dynamic sizes

This patch implements sparse2sparse collapse for operands with dynamic shape.

Reviewed By: aartbik

Differential Revision: https://reviews.llvm.org/D131599

Added: 
    

Modified: 
    mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
    mlir/test/Dialect/SparseTensor/sparse_reshape.mlir
    mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reshape.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
index 3469eb5613977..43de81da1fda8 100644

--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
@@ -478,20 +478,20 @@ static bool canUseDirectConversion(
 static void translateIndices(Location loc, ConversionPatternRewriter &rewriter,
                              ArrayRef<ReassociationIndices> reassociation,
                              TensorType dstTp, TensorType srcTp, Value dstIdx,
-                             Value srcIdx) {
+                             Value srcIdx, ArrayRef<Value> dstShape,
+                             ArrayRef<Value> srcShape) {
   unsigned dstRank = dstTp.getRank();
   unsigned srcRank = srcTp.getRank();
   unsigned start = 0;
   unsigned i = 0;
   bool isExpand = srcRank > dstRank;
-  ArrayRef<int64_t> shape = isExpand ? srcTp.getShape() : dstTp.getShape();
+  ArrayRef<Value> shape = isExpand ? srcShape : dstShape;
   // Iterate over reassociation map.
   for (const auto &map : llvm::enumerate(reassociation)) {
     // Prepare strides information in dimension slice.
-    uint64_t linear = 1;
+    Value linear = constantIndex(rewriter, loc, 1);
     for (unsigned j = start, end = start + map.value().size(); j < end; j++) {
-      assert(!ShapedType::isDynamic(shape[j]));
-      linear *= shape[j];
+      linear = rewriter.create<arith::MulIOp>(loc, linear, shape[j]);
     }
     // Start collapse.
     Value idx = constantIndex(rewriter, loc, i++);
@@ -500,22 +500,17 @@ static void translateIndices(Location loc, ConversionPatternRewriter &rewriter,
       val = rewriter.create<memref::LoadOp>(loc, srcIdx, idx);
     // Iterate over dimension slice.
     for (unsigned j = start, end = start + map.value().size(); j < end; j++) {
-      linear /= shape[j];
-      Value stride = constantIndex(rewriter, loc, linear);
+      linear = rewriter.create<arith::DivUIOp>(loc, linear, shape[j]);
       Value jdx = constantIndex(rewriter, loc, j);
       if (isExpand) {
         Value old = rewriter.create<memref::LoadOp>(loc, srcIdx, jdx);
-        Value mul = linear == 1
-                        ? old
-                        : rewriter.create<arith::MulIOp>(loc, old, stride);
+        Value mul = rewriter.create<arith::MulIOp>(loc, old, linear);
         val = val ? rewriter.create<arith::AddIOp>(loc, val, mul) : mul;
       } else {
         Value old = val;
-        if (linear != 1)
-          val = rewriter.create<arith::DivUIOp>(loc, val, stride);
+        val = rewriter.create<arith::DivUIOp>(loc, val, linear);
         rewriter.create<memref::StoreOp>(loc, val, dstIdx, jdx);
-        if (linear != 1)
-          val = rewriter.create<arith::RemUIOp>(loc, old, stride);
+        val = rewriter.create<arith::RemUIOp>(loc, old, linear);
       }
     }
     // Finalize expansion.
@@ -527,6 +522,65 @@ static void translateIndices(Location loc, ConversionPatternRewriter &rewriter,
   assert((isExpand && i == dstRank) || (!isExpand && i == srcRank));
 }
 
+/// Helper method to compute the shape of destination tensor of a reshape
+/// operator. This is only used when operands have dynamic shape. The shape of
+/// the destination is stored into dstShape.
+void genReshapeDstShape(Location loc, ConversionPatternRewriter &rewriter,
+                        SmallVector<Value, 4> &dstShape,
+                        ArrayRef<Value> srcShape,
+                        ArrayRef<int64_t> staticDstShape,
+                        ArrayRef<ReassociationIndices> reassociation) {
+  // Collapse shape.
+  if (reassociation.size() < srcShape.size()) {
+    unsigned start = 0;
+    for (const auto &map : llvm::enumerate(reassociation)) {
+      auto dstDim = constantIndex(rewriter, loc, 1);
+      for (unsigned i = start; i < start + map.value().size(); i++) {
+        dstDim = rewriter.create<arith::MulIOp>(loc, dstDim, srcShape[i]);
+      }
+      dstShape.push_back(dstDim);
+      start = start + map.value().size();
+    }
+    assert(start == srcShape.size());
+    return;
+  }
+
+  // Expand shape.
+  assert(reassociation.size() == srcShape.size());
+  unsigned start = 0;
+  // Expand the i-th dimension in srcShape.
+  for (unsigned i = 0, size = srcShape.size(); i < size; i++) {
+    auto map = reassociation[i];
+    auto srcDim = srcShape[i];
+    // Iterate through dimensions expanded from the i-th dimension.
+    for (unsigned j = start; j < start + map.size(); j++) {
+      // There can be only one dynamic sized dimension among dimensions expanded
+      // from the i-th dimension in srcShape. For example, if srcDim = 8, then
+      // the expanded shape could be <2x?x2>, but not <2x?x?>.
+      if (staticDstShape[j] == ShapedType::kDynamicSize) {
+        // The expanded dimension has dynamic size. We compute the dimension
+        // by dividing srcDim by the product of the static dimensions.
+        int64_t product = 1;
+        for (unsigned k = start; k < start + map.size(); k++) {
+          if (staticDstShape[k] != ShapedType::kDynamicSize) {
+            product *= staticDstShape[k];
+          }
+        }
+        // Compute the dynamic dimension size.
+        Value productVal = constantIndex(rewriter, loc, product);
+        Value dynamicSize =
+            rewriter.create<arith::DivUIOp>(loc, srcDim, productVal);
+        dstShape.push_back(dynamicSize);
+      } else {
+        // The expanded dimension is statically known.
+        dstShape.push_back(constantIndex(rewriter, loc, staticDstShape[j]));
+      }
+    }
+    start = start + map.size();
+  }
+  assert(start == staticDstShape.size());
+}
+
 /// Generate code for a general sparse to sparse reshaping operation.
 /// Note that unlike dense reshaping (which can be done with a "cheap"
 /// change of view), sparse reshaping is currently done with actual
@@ -562,19 +616,23 @@ genSparse2SparseReshape(ReshapeOp op, typename ReshapeOp::Adaptor adaptor,
   auto noPerm = SparseTensorEncodingAttr::get(
       op->getContext(), encSrc.getDimLevelType(), AffineMap(),
       encSrc.getPointerBitWidth(), encSrc.getIndexBitWidth());
-  SmallVector<Value, 4> sizes;
+  SmallVector<Value, 4> srcSizes;
   SmallVector<Value, 8> params;
-  sizesFromPtr(rewriter, sizes, loc, encSrc, srcTp, adaptor.getSrc());
-  newParams(rewriter, params, loc, srcTp, noPerm, Action::kToIterator, sizes,
+  sizesFromPtr(rewriter, srcSizes, loc, encSrc, srcTp, adaptor.getSrc());
+  newParams(rewriter, params, loc, srcTp, noPerm, Action::kToIterator, srcSizes,
             adaptor.getSrc());
   Value iter = genNewCall(rewriter, loc, params);
   // Start a new COO for the destination tensor.
-  sizes.clear();
+  SmallVector<Value, 4> dstSizes;
   params.clear();
-  // Fills sizes array using the sizes from destination type.
-  assert(dstTp.hasStaticShape());
-  sizesFromType(rewriter, sizes, loc, dstTp);
-  newParams(rewriter, params, loc, dstTp, encDst, Action::kEmptyCOO, sizes);
+  if (dstTp.hasStaticShape()) {
+    sizesFromType(rewriter, dstSizes, loc, dstTp);
+  } else {
+    ArrayRef<int64_t> dstShape = dstTp.getShape();
+    genReshapeDstShape(loc, rewriter, dstSizes, srcSizes, dstShape,
+                       op.getReassociationIndices());
+  }
+  newParams(rewriter, params, loc, dstTp, encDst, Action::kEmptyCOO, dstSizes);
   Value coo = genNewCall(rewriter, loc, params);
   Value dstPerm = params[2];
   // Construct a while loop over the iterator.
@@ -593,7 +651,7 @@ genSparse2SparseReshape(ReshapeOp op, typename ReshapeOp::Adaptor adaptor,
   Block *after = rewriter.createBlock(&whileOp.getAfter(), {}, noTypes);
   rewriter.setInsertionPointToStart(after);
   translateIndices(loc, rewriter, op.getReassociationIndices(), dstTp, srcTp,
-                   dstIdx, srcIdx);
+                   dstIdx, srcIdx, dstSizes, srcSizes);
   genAddEltCall(rewriter, loc, elemTp, coo, elemPtr, dstIdx, dstPerm);
   rewriter.create<scf::YieldOp>(loc);
   // Final call to construct sparse tensor storage and free temporary resources.

diff  --git a/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir b/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir
index 65eb56b9bac37..c58e34be65839 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir
@@ -1,5 +1,5 @@
 // RUN: mlir-opt %s | mlir-opt | FileCheck %s --check-prefix=CHECK-ROUND
-// RUN: mlir-opt %s --sparse-tensor-conversion --cse | FileCheck %s --check-prefix=CHECK-CONV
+// RUN: mlir-opt %s --sparse-tensor-conversion --cse --canonicalize | FileCheck %s --check-prefix=CHECK-CONV
 
 #SparseVector = #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>
 #SparseMatrix = #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>
@@ -22,13 +22,13 @@
 // CHECK-CONV-DAG:  call @newSparseTensor
 // CHECK-CONV:      scf.while : () -> () {
 // CHECK-CONV:        call @getNextF64
-// CHECK-CONV:        scf.condition(%13)
+// CHECK-CONV:        scf.condition
 // CHECK-CONV:      } do {
-// CHECK-CONV:        %[[X:.*]] = memref.load %{{.*}}[%[[C0]]] : memref<?xindex>
+// CHECK-CONV:        %[[X:.*]] = memref.load %{{.*}}[%[[C0]]] : memref<1xindex>
 // CHECK-CONV:        %[[D:.*]] = arith.divui %[[X]], %[[C10]] : index
-// CHECK-CONV:        memref.store %[[D]], %{{.*}}[%[[C0]]] : memref<?xindex>
+// CHECK-CONV:        memref.store %[[D]], %{{.*}}[%[[C0]]] : memref<2xindex>
 // CHECK-CONV:        %[[R:.*]] = arith.remui %[[X]], %[[C10]] : index
-// CHECK-CONV:        memref.store %[[R]], %{{.*}}[%[[C1]]] : memref<?xindex>
+// CHECK-CONV:        memref.store %[[R]], %{{.*}}[%[[C1]]] : memref<2xindex>
 // CHECK-CONV:        call @addEltF64
 // CHECK-CONV:        scf.yield
 // CHECK-CONV:      }
@@ -61,13 +61,13 @@ func.func @sparse_expand(%arg0: tensor<100xf64, #SparseVector>) -> tensor<10x10x
 // CHECK-CONV-DAG:  call @newSparseTensor
 // CHECK-CONV:      scf.while : () -> () {
 // CHECK-CONV:        call @getNextF64
-// CHECK-CONV:        scf.condition(%13)
+// CHECK-CONV:        scf.condition
 // CHECK-CONV:      } do {
-// CHECK-CONV:        %[[X:.*]] = memref.load %{{.*}}[%[[C0]]] : memref<?xindex>
+// CHECK-CONV:        %[[X:.*]] = memref.load %{{.*}}[%[[C0]]] : memref<2xindex>
 // CHECK-CONV:        %[[M:.*]] = arith.muli %[[X]], %[[C10]] : index
-// CHECK-CONV:        %[[Y:.*]] = memref.load %{{.*}}[%[[C1]]] : memref<?xindex>
+// CHECK-CONV:        %[[Y:.*]] = memref.load %{{.*}}[%[[C1]]] : memref<2xindex>
 // CHECK-CONV:        %[[A:.*]] = arith.addi %[[M]], %[[Y]] : index
-// CHECK-CONV:        memref.store %[[A]], %{{.*}}[%[[C0]]] : memref<?xindex>
+// CHECK-CONV:        memref.store %[[A]], %{{.*}}[%[[C0]]] : memref<1xindex>
 // CHECK-CONV:        call @addEltF64
 // CHECK-CONV:        scf.yield
 // CHECK-CONV:      }
@@ -81,3 +81,90 @@ func.func @sparse_collapse(%arg0: tensor<10x10xf64, #SparseMatrix>) -> tensor<10
     tensor<10x10xf64, #SparseMatrix> into tensor<100xf64, #SparseVector>
   return %0 : tensor<100xf64, #SparseVector>
 }
+
+//
+// roundtrip:
+//
+// CHECK-ROUND-LABEL: func.func @dynamic_sparse_expand(
+// CHECK-ROUND-SAME:  %[[A:.*]]: tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<?x10xf64, #sparse_tensor.encoding<{{{.*}}}>>
+//      CHECK-ROUND:  %[[E:.*]] = tensor.expand_shape %[[A]] {{\[\[}}0, 1]] : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>> into tensor<?x10xf64, #sparse_tensor.encoding<{{{.*}}}>>
+//      CHECK-ROUND:  return %[[E]] : tensor<?x10xf64, #sparse_tensor.encoding<{{{.*}}}>>
+//
+// conversion:
+//
+// CHECK-CONV-LABEL: func.func @dynamic_sparse_expand(
+// CHECK-CONV-DAG:  %[[C0:.*]] = arith.constant 0 : index
+// CHECK-CONV-DAG:  %[[C1:.*]] = arith.constant 1 : index
+// CHECK-CONV-DAG:  %[[C10:.*]] = arith.constant 10 : index
+// CHECK-CONV-DAG:  %[[D1:.*]] = arith.divui %{{.*}}, %[[C10]] : index
+// CHECK-CONV-DAG:  call @newSparseTensor
+// CHECK-CONV-DAG:  call @newSparseTensor
+// CHECK-CONV:      scf.while : () -> () {
+// CHECK-CONV:        call @getNextF64
+// CHECK-CONV:        scf.condition
+// CHECK-CONV:      } do {
+// CHECK-CONV:        %[[M:.*]] = arith.muli %[[D1]], %[[C10]] : index
+// CHECK-CONV:        %[[L:.*]] = memref.load %{{.*}}[%[[C0]]] : memref<1xindex>
+// CHECK-CONV:        %[[D2:.*]] = arith.divui %[[M]], %[[D1]] : index
+// CHECK-CONV:        %[[D3:.*]] = arith.divui %[[L]], %[[D2]] : index
+// CHECK-CONV:        memref.store %[[D3]], %{{.*}}[%[[C0]]] : memref<2xindex>
+// CHECK-CONV:        %[[R:.*]] = arith.remui %[[L]], %[[D2]] : index
+// CHECK-CONV:        %[[D4:.*]] = arith.divui %[[D2]], %[[C10]] : index
+// CHECK-CONV:        %[[D5:.*]] = arith.divui %[[R]], %[[D4]] : index
+// CHECK-CONV:        memref.store %[[D5]], %{{.*}}[%[[C1]]] : memref<2xindex>
+// CHECK-CONV:        call @addEltF64
+// CHECK-CONV:        scf.yield
+// CHECK-CONV:      }
+// CHECK-CONV:      %[[N:.*]] = call @newSparseTensor
+// CHECK-CONV:      call @delSparseTensorCOOF64
+// CHECK-CONV:      call @delSparseTensorCOOF64
+// CHECK-CONV:      return %[[N]] : !llvm.ptr<i8>
+//
+func.func @dynamic_sparse_expand(%arg0: tensor<?xf64, #SparseVector>) -> tensor<?x10xf64, #SparseMatrix> {
+  %0 = tensor.expand_shape %arg0 [[0, 1]] :
+    tensor<?xf64, #SparseVector> into tensor<?x10xf64, #SparseMatrix>
+  return %0 : tensor<?x10xf64, #SparseMatrix>
+}
+
+//
+// roundtrip:
+//
+// CHECK-ROUND-LABEL: func.func @dynamic_sparse_collapse(
+// CHECK-ROUND-SAME:  %[[A:.*]]: tensor<10x?xf64, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>>
+//      CHECK-ROUND:  %[[C:.*]] = tensor.collapse_shape %[[A]] {{\[\[}}0, 1]] : tensor<10x?xf64, #sparse_tensor.encoding<{{{.*}}}>> into tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>>
+//      CHECK-ROUND:  return %[[C]] : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>>
+//
+// conversion:
+//
+// CHECK-CONV-LABEL: func.func @dynamic_sparse_collapse(
+// CHECK-CONV-DAG:  %[[C0:.*]] = arith.constant 0 : index
+// CHECK-CONV-DAG:  %[[C1:.*]] = arith.constant 1 : index
+// CHECK-CONV-DAG:  %[[C10:.*]] = arith.constant 10 : index
+// CHECK-CONV-DAG:  %[[M1:.*]] = arith.muli %{{.*}}, %[[C10]] : index
+// CHECK-CONV-DAG:  call @newSparseTensor
+// CHECK-CONV-DAG:  call @newSparseTensor
+// CHECK-CONV:      scf.while : () -> () {
+// CHECK-CONV:        call @getNextF64
+// CHECK-CONV:        scf.condition
+// CHECK-CONV:      } do {
+// CHECK-CONV:        %[[D1:.*]] = arith.divui %[[M1]], %[[C10]] : index
+// CHECK-CONV:        %[[X:.*]] = memref.load %{{.*}}[%[[C0]]] : memref<2xindex>
+// CHECK-CONV:        %[[M2:.*]] = arith.muli %[[X]], %[[D1]] : index
+// CHECK-CONV:        %[[D2:.*]] = arith.divui %[[D1]], %{{.*}} : index
+// CHECK-CONV:        %[[Y:.*]] = memref.load %{{.*}}[%[[C1]]] : memref<2xindex>
+// CHECK-CONV:        %[[M3:.*]] = arith.muli %[[Y]], %[[D2]] : index
+// CHECK-CONV:        %[[A:.*]] = arith.addi %[[M2]], %[[M3]] : index
+// CHECK-CONV:        memref.store %[[A]], %{{.*}}[%[[C0]]] : memref<1xindex>
+// CHECK-CONV:        call @addEltF64
+// CHECK-CONV:        scf.yield
+// CHECK-CONV:      }
+// CHECK-CONV:      %[[N:.*]] = call @newSparseTensor
+// CHECK-CONV:      call @delSparseTensorCOOF64
+// CHECK-CONV:      call @delSparseTensorCOOF64
+// CHECK-CONV:      return %[[N]] : !llvm.ptr<i8>
+//
+func.func @dynamic_sparse_collapse(%arg0: tensor<10x?xf64, #SparseMatrix>) -> tensor<?xf64, #SparseVector> {
+  %0 = tensor.collapse_shape %arg0 [[0, 1]] :
+    tensor<10x?xf64, #SparseMatrix> into tensor<?xf64, #SparseVector>
+  return %0 : tensor<?xf64, #SparseVector>
+}

diff  --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reshape.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reshape.mlir
index 105cd2ed67f6b..09a294f34baa8 100755
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reshape.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reshape.mlir
@@ -120,6 +120,11 @@ module {
     return %0 : tensor<?x2x?xf64, #Sparse3dTensor>
   }
 
+  func.func @expand_sparse2sparse_dyn(%arg0: tensor<?x?xf64, #SparseMatrix>) -> tensor<?x2x?xf64, #Sparse3dTensor> {
+    %0 = tensor.expand_shape %arg0 [[0], [1, 2]] : tensor<?x?xf64, #SparseMatrix> into tensor<?x2x?xf64, #Sparse3dTensor>
+    return %0 : tensor<?x2x?xf64, #Sparse3dTensor>
+  }
+
   func.func @collapse_dense_dyn(%arg0: tensor<?x?x?x?xf64>) -> tensor<?x?xf64> {
     %0 = tensor.collapse_shape %arg0 [[0, 1], [2, 3]] : tensor<?x?x?x?xf64> into tensor<?x?xf64>
     return %0 : tensor<?x?xf64>
@@ -135,6 +140,11 @@ module {
     return %0 : tensor<?x?xf64, #SparseMatrix>
   }
 
+  func.func @collapse_sparse2sparse_dyn(%arg0: tensor<?x?x?x?xf64, #Sparse4dTensor>) -> tensor<?x?xf64, #SparseMatrix> {
+    %0 = tensor.collapse_shape %arg0 [[0, 1], [2, 3]] : tensor<?x?x?x?xf64, #Sparse4dTensor> into tensor<?x?xf64, #SparseMatrix>
+    return %0 : tensor<?x?xf64, #SparseMatrix>
+  }
+
   //
   // Main driver.
   //
@@ -177,6 +187,7 @@ module {
     %expand8 = call @expand_dense_dyn(%dm) : (tensor<?x?xf64>) -> tensor<?x2x?xf64>
     %expand9 = call @expand_from_sparse_dyn(%sdm) : (tensor<?x?xf64, #SparseMatrix>) -> tensor<?x2x?xf64>
     %expand10 = call @expand_to_sparse_dyn(%dm) : (tensor<?x?xf64>) -> tensor<?x2x?xf64, #Sparse3dTensor>
+    %expand11 = call @expand_sparse2sparse_dyn(%sdm) : (tensor<?x?xf64, #SparseMatrix>) -> tensor<?x2x?xf64, #Sparse3dTensor>
 
     %collapse0 = call @collapse_dense(%m) : (tensor<3x4xf64>) -> tensor<12xf64>
     %collapse1 = call @collapse_from_sparse(%sm) : (tensor<3x4xf64, #SparseMatrix>) -> tensor<12xf64>
@@ -189,6 +200,7 @@ module {
     %collapse8 = call @collapse_dense_dyn(%dn) : (tensor<?x?x?x?xf64>) -> tensor<?x?xf64>
     %collapse9 = call @collapse_from_sparse_dyn(%sdn) : (tensor<?x?x?x?xf64, #Sparse4dTensor>) -> tensor<?x?xf64>
     %collapse10 = call @collapse_to_sparse_dyn(%dn) : (tensor<?x?x?x?xf64>) -> tensor<?x?xf64, #SparseMatrix>
+    %collapse11 = call @collapse_sparse2sparse_dyn(%sdn) : (tensor<?x?x?x?xf64, #Sparse4dTensor>) -> tensor<?x?xf64, #SparseMatrix>
 
     //
     // Verify results of expand
@@ -204,6 +216,7 @@ module {
     // CHECK-NEXT: ( ( ( 1.1, 1.2 ), ( 1.3, 1.4 ) ), ( ( 2.1, 2.2 ), ( 2.3, 2.4 ) ), ( ( 3.1, 3.2 ), ( 3.3, 3.4 ) ) )
     // CHECK-NEXT: ( ( ( 1.1, 1.2 ), ( 1.3, 1.4 ) ), ( ( 2.1, 2.2 ), ( 2.3, 2.4 ) ), ( ( 3.1, 3.2 ), ( 3.3, 3.4 ) ) )
     // CHECK-NEXT: ( 1.1, 1.2, 1.3, 1.4, 2.1, 2.2, 2.3, 2.4, 3.1, 3.2, 3.3, 3.4, -1, -1, -1, -1 )
+    // CHECK-NEXT: ( 1.1, 1.2, 1.3, 1.4, 2.1, 2.2, 2.3, 2.4, 3.1, 3.2, 3.3, 3.4, -1, -1, -1, -1 )
     //
 
     %m0 = vector.transfer_read %expand0[%c0, %c0], %df: tensor<3x4xf64>, vector<3x4xf64>
@@ -235,6 +248,10 @@ module {
     %a10 = sparse_tensor.values %expand10 : tensor<?x2x?xf64, #Sparse3dTensor> to memref<?xf64>
     %m10 = vector.transfer_read %a10[%c0], %df: memref<?xf64>, vector<16xf64>
     vector.print %m10 : vector<16xf64>
+    %a11 = sparse_tensor.values %expand11 : tensor<?x2x?xf64, #Sparse3dTensor> to memref<?xf64>
+    %m11 = vector.transfer_read %a11[%c0], %df: memref<?xf64>, vector<16xf64>
+    vector.print %m11 : vector<16xf64>
+
 
     // 
     // Verify results of collapse
@@ -250,6 +267,7 @@ module {
     // CHECK-NEXT: ( ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ), ( 11, 12, 13, 14, 15, 16, 17, 18, 19, 20 ), ( 21, 22, 23, 24, 25, 26, 27, 28, 29, 30 ), ( 31, 32, 33, 34, 35, 36, 37, 38, 39, 40 ), ( 41, 42, 43, 44, 45, 26, 47, 48, 49, 50 ), ( 51, 52, 53, 54, 55, 56, 57, 58, 59, 60 ) )
     // CHECK-NEXT: ( ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ), ( 11, 12, 13, 14, 15, 16, 17, 18, 19, 20 ), ( 21, 22, 23, 24, 25, 26, 27, 28, 29, 30 ), ( 31, 32, 33, 34, 35, 36, 37, 38, 39, 40 ), ( 41, 42, 43, 44, 45, 26, 47, 48, 49, 50 ), ( 51, 52, 53, 54, 55, 56, 57, 58, 59, 60 ) )
     // CHECK-NEXT: ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 26, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, -1, -1, -1, -1 )
+    // CHECK-NEXT: ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 26, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, -1, -1, -1, -1 )
     //
 
     %v0 = vector.transfer_read %collapse0[%c0], %df: tensor<12xf64>, vector<12xf64>
@@ -281,6 +299,10 @@ module {
     %b10 = sparse_tensor.values %collapse10 : tensor<?x?xf64, #SparseMatrix> to memref<?xf64>
     %v10 = vector.transfer_read %b10[%c0], %df: memref<?xf64>, vector<64xf64>
     vector.print %v10 : vector<64xf64>
+    %b11 = sparse_tensor.values %collapse11 : tensor<?x?xf64, #SparseMatrix> to memref<?xf64>
+    %v11 = vector.transfer_read %b11[%c0], %df: memref<?xf64>, vector<64xf64>
+    vector.print %v11 : vector<64xf64>
+
 
     // Release sparse resources.
     bufferization.dealloc_tensor %sv : tensor<12xf64, #SparseVector>
@@ -293,11 +315,13 @@ module {
     bufferization.dealloc_tensor %expand6 : tensor<3x2x2xf64, #Sparse3dTensor>
     bufferization.dealloc_tensor %expand7 : tensor<3x2x2xf64, #Sparse3dTensor>
     bufferization.dealloc_tensor %expand10 : tensor<?x2x?xf64, #Sparse3dTensor>
+    bufferization.dealloc_tensor %expand11 : tensor<?x2x?xf64, #Sparse3dTensor>
     bufferization.dealloc_tensor %collapse2 : tensor<12xf64, #SparseVector>
     bufferization.dealloc_tensor %collapse3 : tensor<12xf64, #SparseVector>
     bufferization.dealloc_tensor %collapse6 : tensor<6x10xf64, #SparseMatrix>
     bufferization.dealloc_tensor %collapse7 : tensor<6x10xf64, #SparseMatrix>
     bufferization.dealloc_tensor %collapse10 : tensor<?x?xf64, #SparseMatrix>
+    bufferization.dealloc_tensor %collapse11 : tensor<?x?xf64, #SparseMatrix>
 
     // Release dense resources.
     bufferization.dealloc_tensor %expand1 : tensor<3x4xf64>