[Mlir-commits] [mlir] fa6726e - [mlir][sparse] supports sparse_tensor.pack on libgen path

Tue Aug 15 13:30:44 PDT 2023

Author: Peiming Liu
Date: 2023-08-15T20:20:54Z
New Revision: fa6726e27bb872ada13fe44c6609e9336785dd36

URL: https://github.com/llvm/llvm-project/commit/fa6726e27bb872ada13fe44c6609e9336785dd36
DIFF: https://github.com/llvm/llvm-project/commit/fa6726e27bb872ada13fe44c6609e9336785dd36.diff

LOG: [mlir][sparse] supports sparse_tensor.pack on libgen path

Reviewed By: aartbik

Differential Revision: https://reviews.llvm.org/D158012

Added: 
    mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_libgen.mlir

Modified: 
    mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
    mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
    mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
    mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h
    mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
    mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
    mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp
    mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
index 4b601ff7b5772f..72eefeaa9dc985 100644

--- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
@@ -150,6 +150,7 @@ enum class Action : uint32_t {
   kEmptyCOO = 4,
   kToCOO = 5,
   kToIterator = 6,
+  kPack = 7,
 };
 
 /// This enum defines all the sparse representations supportable by

diff  --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
index c38ed59068be53..ac9f1a840ab0a6 100644
--- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
@@ -335,6 +335,18 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
                       const uint64_t *lvl2dim,
                       SparseTensorEnumeratorBase<V> &lvlEnumerator);
 
+  /// Constructs a sparse tensor with the given encoding, and initializes
+  /// the contents from the level buffers.  This ctor allocates exactly
+  /// the required amount of overhead storage, not using any heuristics.
+  /// It assumes that the data provided by `lvlBufs` can be directly used to
+  /// interpret the result sparse tensor and performs *NO* integrity test on the
+  /// input data. It also assume that the trailing COO coordinate buffer is
+  /// passed in as a single AoS memory.
+  SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes,
+                      uint64_t lvlRank, const uint64_t *lvlSizes,
+                      const DimLevelType *lvlTypes, const uint64_t *lvl2dim,
+                      const intptr_t *lvlBufs);
+
   /// Allocates a new empty sparse tensor.  The preconditions/assertions
   /// are as per the `SparseTensorStorageBase` ctor; which is to say,
   /// the `dimSizes` and `lvlSizes` must both be "sizes" not "shapes",
@@ -403,6 +415,19 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
                       uint64_t srcRank, const uint64_t *src2lvl,
                       const SparseTensorStorageBase &source);
 
+  /// Allocates a new sparse tensor and initialize it with the data stored level
+  /// buffers directly.
+  ///
+  /// Precondition:
+  /// * as per the `SparseTensorStorageBase` ctor.
+  /// * the data integrity stored in `buffers` is guaranteed by users already.
+  static SparseTensorStorage<P, C, V> *
+  packFromLvlBuffers(uint64_t dimRank, const uint64_t *dimShape,
+                     uint64_t lvlRank, const uint64_t *lvlSizes,
+                     const DimLevelType *lvlTypes, const uint64_t *lvl2dim,
+                     uint64_t srcRank, const uint64_t *src2lvl,
+                     const intptr_t *buffers);
+
   ~SparseTensorStorage() final = default;
 
   /// Partially specialize these getter methods based on template types.
@@ -626,7 +651,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
   /// Finalizes the sparse position structure at this level.
   void finalizeSegment(uint64_t l, uint64_t full = 0, uint64_t count = 1) {
     if (count == 0)
-      return; // Short-circuit, since it'll be a nop.
+      return;                       // Short-circuit, since it'll be a nop.
     const auto dlt = getLvlType(l); // Avoid redundant bounds checking.
     if (isCompressedDLT(dlt)) {
       appendPos(l, coordinates[l].size(), count);
@@ -995,6 +1020,18 @@ SparseTensorStorage<P, C, V> *SparseTensorStorage<P, C, V>::newFromSparseTensor(
   return tensor;
 }
 
+template <typename P, typename C, typename V>
+SparseTensorStorage<P, C, V> *SparseTensorStorage<P, C, V>::packFromLvlBuffers(
+    uint64_t dimRank, const uint64_t *dimShape, uint64_t lvlRank,
+    const uint64_t *lvlSizes, const DimLevelType *lvlTypes,
+    const uint64_t *lvl2dim, uint64_t srcRank, const uint64_t *src2lvl,
+    const intptr_t *buffers) {
+  assert(dimShape && "Got nullptr for dimension shape");
+  auto *tensor = new SparseTensorStorage<P, C, V>(
+      dimRank, dimShape, lvlRank, lvlSizes, lvlTypes, lvl2dim, buffers);
+  return tensor;
+}
+
 template <typename P, typename C, typename V>
 SparseTensorStorage<P, C, V>::SparseTensorStorage(
     uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
@@ -1153,6 +1190,59 @@ SparseTensorStorage<P, C, V>::SparseTensorStorage(
   }
 }
 
+template <typename P, typename C, typename V>
+SparseTensorStorage<P, C, V>::SparseTensorStorage(
+    uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
+    const uint64_t *lvlSizes, const DimLevelType *lvlTypes,
+    const uint64_t *lvl2dim, const intptr_t *lvlBufs)
+    : SparseTensorStorage(dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes,
+                          lvl2dim) {
+  uint64_t trailCOOLen = 0, parentSz = 1, bufIdx = 0;
+  for (uint64_t l = 0; l < lvlRank; l++) {
+    if (!isUniqueLvl(l) && isCompressedLvl(l)) {
+      // A `compressed-nu` level marks the start of trailing COO start level.
+      // Since the coordinate buffer used for trailing COO are passed in as AoS
+      // scheme, and SparseTensorStorage uses a SoA scheme, we can not simply
+      // copy the value from the provided buffers.
+      trailCOOLen = lvlRank - l;
+      break;
+    }
+    assert(!isSingletonLvl(l) &&
+           "Singleton level not following a compressed-nu level");
+    if (isCompressedLvl(l)) {
+      P *posPtr = reinterpret_cast<P *>(lvlBufs[bufIdx++]);
+      C *crdPtr = reinterpret_cast<C *>(lvlBufs[bufIdx++]);
+      // Copies the lvlBuf into the vectors. The buffer can not be simply reused
+      // because the memory passed from users is not necessarily allocated on
+      // heap.
+      positions[l].assign(posPtr, posPtr + parentSz + 1);
+      coordinates[l].assign(crdPtr, crdPtr + positions[l][parentSz]);
+    } else {
+      assert(isDenseLvl(l) && "Level is not dense");
+    }
+    parentSz = assembledSize(parentSz, l);
+  }
+
+  if (trailCOOLen != 0) {
+    uint64_t cooStartLvl = lvlRank - trailCOOLen;
+    assert(!isUniqueLvl(cooStartLvl) && isCompressedLvl(cooStartLvl));
+    P *posPtr = reinterpret_cast<P *>(lvlBufs[bufIdx++]);
+    C *aosCrdPtr = reinterpret_cast<C *>(lvlBufs[bufIdx++]);
+    positions[cooStartLvl].assign(posPtr, posPtr + parentSz + 1);
+    P crdLen = positions[cooStartLvl][parentSz];
+    for (uint64_t l = cooStartLvl; l < lvlRank; l++) {
+      coordinates[l].resize(crdLen);
+      for (uint64_t n = 0; n < crdLen; n++) {
+        coordinates[l][n] = *(aosCrdPtr + (l - cooStartLvl) + n * trailCOOLen);
+      }
+    }
+    parentSz = assembledSize(parentSz, cooStartLvl);
+  }
+
+  V *valPtr = reinterpret_cast<V *>(lvlBufs[bufIdx]);
+  values.assign(valPtr, valPtr + parentSz);
+}
+
 #undef ASSERT_DENSE_DLT
 
 } // namespace sparse_tensor

diff  --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
index 67d074e85de667..06d101886ae726 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
@@ -661,6 +661,14 @@ Value sparse_tensor::reshapeValuesToLevels(OpBuilder &builder, Location loc,
   return builder.create<memref::ReshapeOp>(loc, resTp, valuesBuffer, lvlCoords);
 }
 
+TypedValue<BaseMemRefType>
+sparse_tensor::genToMemref(OpBuilder &builder, Location loc, Value tensor) {
+  auto tTp = llvm::cast<TensorType>(tensor.getType());
+  auto mTp = MemRefType::get(tTp.getShape(), tTp.getElementType());
+  return builder.create<bufferization::ToMemrefOp>(loc, mTp, tensor)
+      .getResult();
+}
+
 Value sparse_tensor::genToPositions(OpBuilder &builder, Location loc,
                                     Value tensor, Level lvl) {
   const auto srcTp = getSparseTensorType(tensor);

diff  --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h
index caa549e45e417c..a6468b3e14795f 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h
@@ -420,6 +420,10 @@ inline bool isZeroRankedTensorOrScalar(Type type) {
   return !rtp || rtp.getRank() == 0;
 }
 
+// Generates code to cast a tensor to a memref.
+TypedValue<BaseMemRefType> genToMemref(OpBuilder &builder, Location loc,
+                                       Value tensor);
+
 /// Infers the result type and generates `ToPositionsOp`.
 Value genToPositions(OpBuilder &builder, Location loc, Value tensor, Level lvl);
 

diff  --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
index 73840ba1327572..2acb359f1d0fb4 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
@@ -539,15 +539,8 @@ static void genEndInsert(OpBuilder &builder, Location loc,
   }
 }
 
-static TypedValue<BaseMemRefType> genToMemref(OpBuilder &builder, Location loc,
-                                              Value tensor) {
-  auto tTp = llvm::cast<TensorType>(tensor.getType());
-  auto mTp = MemRefType::get(tTp.getShape(), tTp.getElementType());
-  return builder.create<bufferization::ToMemrefOp>(loc, mTp, tensor)
-      .getResult();
-}
-
-Value genSliceToSize(OpBuilder &builder, Location loc, Value mem, Value sz) {
+static Value genSliceToSize(OpBuilder &builder, Location loc, Value mem,
+                            Value sz) {
   auto elemTp = llvm::cast<MemRefType>(mem.getType()).getElementType();
   return builder
       .create<memref::SubViewOp>(

diff  --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
index 5784506836a2f5..e7ba2debe0353e 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
@@ -131,8 +131,11 @@ static void fillDimSizes(OpBuilder &builder, Location loc, SparseTensorType stt,
 }
 
 /// Returns an array with the dimension-sizes of the given tensor.
+/// If the *tensor* parameters is null, the tensor type is assumed to have a
+/// static shape.
 static SmallVector<Value> getDimSizes(OpBuilder &builder, Location loc,
-                                      SparseTensorType stt, Value tensor) {
+                                      SparseTensorType stt,
+                                      Value tensor = Value()) {
   SmallVector<Value> out;
   fillDimSizes(builder, loc, stt, tensor, out);
   return out;
@@ -210,6 +213,32 @@ static Value genLvlTypesBuffer(OpBuilder &builder, Location loc,
   return allocaBuffer(builder, loc, lvlTypes);
 }
 
+/// Extracts the bare (aligned) pointers that point to the tensor.
+static Value extractBarePtrFromTensor(OpBuilder &builder, Location loc,
+                                      Value tensor) {
+  auto buf = genToMemref(builder, loc, tensor);
+  return builder.create<memref::ExtractAlignedPointerAsIndexOp>(loc, buf);
+}
+
+/// Generates a temporary buffer for the level-types of the given encoding.
+static Value genLvlPtrsBuffers(OpBuilder &builder, Location loc,
+                               ValueRange lvlTensors, Value valTensor) {
+  SmallVector<Value> lvlBarePtrs;
+  lvlBarePtrs.reserve(lvlTensors.size() + 1);
+  // Passing in lvl buffer pointers.
+  for (const auto lvl : lvlTensors)
+    lvlBarePtrs.push_back(extractBarePtrFromTensor(builder, loc, lvl));
+
+  // Passing in value buffer pointers.
+  lvlBarePtrs.push_back(extractBarePtrFromTensor(builder, loc, valTensor));
+  Value idxPtr = builder.create<memref::ExtractAlignedPointerAsIndexOp>(
+      loc, allocaBuffer(builder, loc, lvlBarePtrs));
+  Value idxCast =
+      builder.create<arith::IndexCastOp>(loc, builder.getI64Type(), idxPtr);
+  return builder.create<LLVM::IntToPtrOp>(loc, getOpaquePointerType(builder),
+                                          idxCast);
+}
+
 /// This class abstracts over the API of `_mlir_ciface_newSparseTensor`:
 /// the "swiss army knife" method of the sparse runtime support library
 /// for materializing sparse tensors into the computation.  This abstraction
@@ -1282,7 +1311,7 @@ class SparseTensorConcatConverter : public OpConversionPattern<ConcatenateOp> {
     const Dimension concatDim = op.getDimension();
     const Dimension dimRank = dstTp.getDimRank();
 
-    Value dst;     // destination tensor
+    Value dst;         // destination tensor
     Value dstDimToLvl; // destination tensor permutation (if sparse out)
     // A pointer to the value being inserted (if dense => sparse)
     Value elemPtr;
@@ -1437,6 +1466,29 @@ class SparseTensorOutConverter : public OpConversionPattern<OutOp> {
   }
 };
 
+/// Sparse conversion rule for the sparse_tensor.pack operator.
+class SparseTensorPackConverter : public OpConversionPattern<PackOp> {
+public:
+  using OpConversionPattern::OpConversionPattern;
+  LogicalResult
+  matchAndRewrite(PackOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    const Location loc = op->getLoc();
+    const auto dstTp = getSparseTensorType(op.getResult());
+    // PackOps always returns a static shaped tensor result.
+    assert(dstTp.hasStaticDimShape());
+    SmallVector<Value> dimSizes = getDimSizes(rewriter, loc, dstTp);
+    Value dst =
+        NewCallParams(rewriter, loc)
+            .genBuffers(dstTp.withoutDimToLvl(), dimSizes)
+            .genNewCall(Action::kPack,
+                        genLvlPtrsBuffers(rewriter, loc, adaptor.getLevels(),
+                                          adaptor.getValues()));
+    rewriter.replaceOp(op, dst);
+    return success();
+  }
+};
+
 } // namespace
 
 //===----------------------------------------------------------------------===//
@@ -1457,18 +1509,18 @@ mlir::SparseTensorTypeToPtrConverter::SparseTensorTypeToPtrConverter() {
 void mlir::populateSparseTensorConversionPatterns(
     TypeConverter &typeConverter, RewritePatternSet &patterns,
     const SparseTensorConversionOptions &options) {
-  patterns
-      .add<SparseReturnConverter, SparseTensorToDimSizeConverter,
-           SparseCastConverter, SparseTensorNewConverter,
-           SparseReshapeConverter<tensor::ExpandShapeOp>,
-           SparseReshapeConverter<tensor::CollapseShapeOp>,
-           SparseTensorConcatConverter, SparseTensorAllocConverter,
-           SparseTensorDeallocConverter, SparseTensorToPositionsConverter,
-           SparseTensorToCoordinatesConverter, SparseTensorToValuesConverter,
-           SparseNumberOfEntriesConverter, SparseTensorLoadConverter,
-           SparseTensorInsertConverter, SparseTensorExpandConverter,
-           SparseTensorCompressConverter, SparseTensorOutConverter>(
-          typeConverter, patterns.getContext());
+  patterns.add<SparseReturnConverter, SparseTensorToDimSizeConverter,
+               SparseCastConverter, SparseTensorNewConverter,
+               SparseReshapeConverter<tensor::ExpandShapeOp>,
+               SparseReshapeConverter<tensor::CollapseShapeOp>,
+               SparseTensorConcatConverter, SparseTensorAllocConverter,
+               SparseTensorDeallocConverter, SparseTensorToPositionsConverter,
+               SparseTensorToCoordinatesConverter,
+               SparseTensorToValuesConverter, SparseNumberOfEntriesConverter,
+               SparseTensorLoadConverter, SparseTensorInsertConverter,
+               SparseTensorExpandConverter, SparseTensorCompressConverter,
+               SparseTensorOutConverter, SparseTensorPackConverter>(
+      typeConverter, patterns.getContext());
 
   patterns.add<SparseTensorConvertConverter>(typeConverter,
                                              patterns.getContext(), options);

diff  --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp
index eb5f4ac2419620..ff6d4a483a079b 100644
--- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp
@@ -313,6 +313,13 @@ extern "C" {
       auto *coo = tensor.toCOO(lvlRank, lvlSizes, dimRank, dim2lvl);           \
       return new SparseTensorIterator<V>(coo);                                 \
     }                                                                          \
+    case Action::kPack: {                                                      \
+      assert(ptr && "Received nullptr for SparseTensorStorage object");        \
+      intptr_t *buffers = static_cast<intptr_t *>(ptr);                        \
+      return SparseTensorStorage<P, C, V>::packFromLvlBuffers(                 \
+          dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, lvl2dim, dimRank,    \
+          dim2lvl, buffers);                                                   \
+    }                                                                          \
     }                                                                          \
     MLIR_SPARSETENSOR_FATAL("unknown action: %d\n",                            \
                             static_cast<uint32_t>(action));                    \

diff  --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack.mlir
index 0f7af5b126c5ec..5e861fe38358ae 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack.mlir
@@ -24,7 +24,7 @@
 // REDEFINE: %{sparse_compiler_opts} = enable-runtime-library=false vl=4
 // RUN: %if mlir_arm_sve_tests %{ %{compile_sve} | %{run_sve} | FileCheck %s %}
 
-// TODO: Pack only support CodeGen Path
+// TODO: support sparse_tensor.unpack on libgen path.
 
 #SortedCOO = #sparse_tensor.encoding<{
   lvlTypes = [ "compressed-nu", "singleton" ]

diff  --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_libgen.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_libgen.mlir
new file mode 100644
index 00000000000000..3e4194a5b478b0
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_pack_libgen.mlir
@@ -0,0 +1,165 @@
+//--------------------------------------------------------------------------------------------------
+// WHEN CREATING A NEW TEST, PLEASE JUST COPY & PASTE WITHOUT EDITS.
+//
+// Set-up that's shared across all tests in this directory. In principle, this
+// config could be moved to lit.local.cfg. However, there are downstream users that
+//  do not use these LIT config files. Hence why this is kept inline.
+//
+// DEFINE: %{sparse_compiler_opts} = enable-runtime-library=true
+// DEFINE: %{sparse_compiler_opts_sve} = enable-arm-sve=true %{sparse_compiler_opts}
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler="%{sparse_compiler_opts}"
+// DEFINE: %{compile_sve} = mlir-opt %s --sparse-compiler="%{sparse_compiler_opts_sve}"
+// DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils
+// DEFINE: %{run_opts} = -e entry -entry-point-result=void
+// DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs}
+// DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs}
+//
+// DEFINE: %{env} =
+//--------------------------------------------------------------------------------------------------
+
+// RUN: %{compile} | %{run} | FileCheck %s
+//
+// Do the same run, but now with VLA vectorization.
+// REDEFINE: %{sparse_compiler_opts} = enable-runtime-library=false vl=4
+// RUN: %if mlir_arm_sve_tests %{ %{compile_sve} | %{run_sve} | FileCheck %s %}
+
+// TODO: This is considered to be a short-living tests and should be merged with sparse_pack.mlir
+// after sparse_tensor.unpack is supported on libgen path.
+
+#SortedCOO = #sparse_tensor.encoding<{
+  lvlTypes = [ "compressed-nu", "singleton" ]
+}>
+
+#SortedCOOI32 = #sparse_tensor.encoding<{
+  lvlTypes = [ "compressed-nu", "singleton" ],
+  posWidth = 32,
+  crdWidth = 32
+}>
+
+#CSR = #sparse_tensor.encoding<{
+  lvlTypes = [ "dense", "compressed" ],
+  posWidth = 32,
+  crdWidth = 32
+}>
+
+// TODO: "compressed-hi" is not supported by libgen path.
+// #BCOO = #sparse_tensor.encoding<{
+//   lvlTypes = [ "dense", "compressed-hi-nu", "singleton" ]
+//}>
+
+module {
+  //
+  // Main driver.
+  //
+  func.func @entry() {
+    %c0 = arith.constant 0 : index
+    %f0 = arith.constant 0.0 : f64
+    %i0 = arith.constant 0 : i32
+    //
+    // Initialize a 3-dim dense tensor.
+    //
+    %data = arith.constant dense<
+       [  1.0,  2.0,  3.0]
+    > : tensor<3xf64>
+
+    %pos = arith.constant dense<
+       [0, 3]
+    > : tensor<2xindex>
+
+    %index = arith.constant dense<
+       [[  1,  2],
+        [  5,  6],
+        [  7,  8]]
+    > : tensor<3x2xindex>
+
+    %pos32 = arith.constant dense<
+       [0, 3]
+    > : tensor<2xi32>
+
+    %index32 = arith.constant dense<
+       [[  1,  2],
+        [  5,  6],
+        [  7,  8]]
+    > : tensor<3x2xi32>
+
+    %s4 = sparse_tensor.pack %data, %pos, %index : tensor<3xf64>, tensor<2xindex>, tensor<3x2xindex>
+                                          to tensor<10x10xf64, #SortedCOO>
+    %s5= sparse_tensor.pack %data, %pos32, %index32 : tensor<3xf64>, tensor<2xi32>, tensor<3x2xi32>
+                                           to tensor<10x10xf64, #SortedCOOI32>
+
+    %csr_data = arith.constant dense<
+       [  1.0,  2.0,  3.0,  4.0]
+    > : tensor<4xf64>
+
+    %csr_pos32 = arith.constant dense<
+       [0, 1, 3]
+    > : tensor<3xi32>
+
+    %csr_index32 = arith.constant dense<
+       [1, 0, 1]
+    > : tensor<3xi32>
+    %csr= sparse_tensor.pack %csr_data, %csr_pos32, %csr_index32 : tensor<4xf64>, tensor<3xi32>, tensor<3xi32>
+                                           to tensor<2x2xf64, #CSR>
+
+    // CHECK:1
+    // CHECK-NEXT:2
+    // CHECK-NEXT:1
+    //
+    // CHECK-NEXT:5
+    // CHECK-NEXT:6
+    // CHECK-NEXT:2
+    //
+    // CHECK-NEXT:7
+    // CHECK-NEXT:8
+    // CHECK-NEXT:3
+    sparse_tensor.foreach in %s4 : tensor<10x10xf64, #SortedCOO> do {
+      ^bb0(%1: index, %2: index, %v: f64) :
+        vector.print %1: index
+        vector.print %2: index
+        vector.print %v: f64
+     }
+
+    // CHECK-NEXT:1
+    // CHECK-NEXT:2
+    // CHECK-NEXT:1
+    //
+    // CHECK-NEXT:5
+    // CHECK-NEXT:6
+    // CHECK-NEXT:2
+    //
+    // CHECK-NEXT:7
+    // CHECK-NEXT:8
+    // CHECK-NEXT:3
+    sparse_tensor.foreach in %s5 : tensor<10x10xf64, #SortedCOOI32> do {
+      ^bb0(%1: index, %2: index, %v: f64) :
+        vector.print %1: index
+        vector.print %2: index
+        vector.print %v: f64
+     }
+
+    // CHECK-NEXT:0
+    // CHECK-NEXT:1
+    // CHECK-NEXT:1
+    //
+    // CHECK-NEXT:1
+    // CHECK-NEXT:0
+    // CHECK-NEXT:2
+    //
+    // CHECK-NEXT:1
+    // CHECK-NEXT:1
+    // CHECK-NEXT:3
+    sparse_tensor.foreach in %csr : tensor<2x2xf64, #CSR> do {
+      ^bb0(%1: index, %2: index, %v: f64) :
+        vector.print %1: index
+        vector.print %2: index
+        vector.print %v: f64
+     }
+
+
+    bufferization.dealloc_tensor %s4  : tensor<10x10xf64, #SortedCOO>
+    bufferization.dealloc_tensor %s5  : tensor<10x10xf64, #SortedCOOI32>
+    bufferization.dealloc_tensor %csr  : tensor<2x2xf64, #CSR>
+
+    return
+  }
+}