[Mlir-commits] [mlir] 28882b6 - [mlir][sparse] Implementing sparse=>dense conversion.

Thu Oct 28 15:27:42 PDT 2021

Author: wren romano
Date: 2021-10-28T15:27:35-07:00
New Revision: 28882b6575d2ceda68c8cf09f191898f662b2103

URL: https://github.com/llvm/llvm-project/commit/28882b6575d2ceda68c8cf09f191898f662b2103
DIFF: https://github.com/llvm/llvm-project/commit/28882b6575d2ceda68c8cf09f191898f662b2103.diff

LOG: [mlir][sparse] Implementing sparse=>dense conversion.

Depends On D110882, D110883, D110884

Reviewed By: aartbik

Differential Revision: https://reviews.llvm.org/D110790

Added: 
    mlir/test/Dialect/SparseTensor/conversion_sparse2dense.mlir
    mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2dense.mlir

Modified: 
    mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td
    mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
    mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp
    mlir/lib/ExecutionEngine/SparseUtils.cpp

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td
index 1886c98535da1..414b757dcec7a 100644

--- a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td
@@ -106,6 +106,7 @@ def SparseTensorConversion : Pass<"sparse-tensor-conversion", "ModuleOp"> {
   let dependentDialects = [
     "arith::ArithmeticDialect",
     "LLVM::LLVMDialect",
+    "linalg::LinalgDialect",
     "memref::MemRefDialect",
     "scf::SCFDialect",
     "sparse_tensor::SparseTensorDialect",

diff  --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
index 8a2daa9ecb9d1..694853f7cd11a 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
@@ -36,7 +36,8 @@ enum Action : uint32_t {
   kFromFile = 1,
   kFromCOO = 2,
   kEmptyCOO = 3,
-  kToCOO = 4
+  kToCOO = 4,
+  kToIter = 5
 };
 
 //===----------------------------------------------------------------------===//
@@ -202,7 +203,9 @@ static void sizesFromPtr(ConversionPatternRewriter &rewriter,
       sizes.push_back(constantIndex(rewriter, op->getLoc(), shape[i]));
 }
 
-/// Generates a temporary buffer of the given size and type.
+/// Generates an uninitialized temporary buffer of the given size and
+/// type, but returns it as type `memref<? x $tp>` (rather than as type
+/// `memref<$sz x $tp>`).
 static Value genAlloca(ConversionPatternRewriter &rewriter, Location loc,
                        unsigned sz, Type tp) {
   auto memTp = MemRefType::get({ShapedType::kDynamicSize}, tp);
@@ -210,6 +213,13 @@ static Value genAlloca(ConversionPatternRewriter &rewriter, Location loc,
   return rewriter.create<memref::AllocaOp>(loc, memTp, ValueRange{a});
 }
 
+/// Generates an uninitialized temporary buffer with room for one value
+/// of the given type, and returns the `memref<$tp>`.
+static Value genAllocaScalar(ConversionPatternRewriter &rewriter, Location loc,
+                             Type tp) {
+  return rewriter.create<memref::AllocaOp>(loc, MemRefType::get({}, tp));
+}
+
 /// Generates a temporary buffer of the given type and given contents.
 static Value genBuffer(ConversionPatternRewriter &rewriter, Location loc,
                        ArrayRef<Value> values) {
@@ -345,6 +355,39 @@ static void genAddEltCall(ConversionPatternRewriter &rewriter, Operation *op,
   rewriter.create<CallOp>(loc, pTp, fn, params);
 }
 
+/// Generates a call to `iter->getNext()`.  If there is a next element,
+/// then it is copied into the out-parameters `ind` and `elemPtr`,
+/// and the return value is true.  If there isn't a next element, then
+/// the return value is false.
+static Value genGetNextCall(ConversionPatternRewriter &rewriter, Operation *op,
+                            Value iter, Value ind, Value elemPtr) {
+  Location loc = op->getLoc();
+  Type elemTp = elemPtr.getType().cast<ShapedType>().getElementType();
+  StringRef name;
+  if (elemTp.isF64())
+    name = "getNextF64";
+  else if (elemTp.isF32())
+    name = "getNextF32";
+  else if (elemTp.isInteger(64))
+    name = "getNextI64";
+  else if (elemTp.isInteger(32))
+    name = "getNextI32";
+  else if (elemTp.isInteger(16))
+    name = "getNextI16";
+  else if (elemTp.isInteger(8))
+    name = "getNextI8";
+  else
+    llvm_unreachable("Unknown element type");
+  SmallVector<Value, 3> params;
+  params.push_back(iter);
+  params.push_back(ind);
+  params.push_back(elemPtr);
+  Type i1 = rewriter.getI1Type();
+  auto fn = getFunc(op, name, i1, params, /*emitCInterface=*/true);
+  auto call = rewriter.create<CallOp>(loc, i1, fn, params);
+  return call.getResult(0);
+}
+
 /// If the tensor is a sparse constant, generates and returns the pair of
 /// the constants for the indices and the values.
 static Optional<std::pair<Value, Value>>
@@ -379,6 +422,37 @@ static Value genIndexAndValueForSparse(ConversionPatternRewriter &rewriter,
   return rewriter.create<tensor::ExtractOp>(loc, values, ivs[0]);
 }
 
+/// Generates code to allocate a tensor of the given type, and zero
+/// initialize it.  This function assumes the TensorType is fully
+/// specified (i.e., has static rank and sizes).
+// TODO(D112674): support dynamic sizes.
+static Value allocDenseTensor(ConversionPatternRewriter &rewriter, Location loc,
+                              RankedTensorType tensorTp) {
+  Type elemTp = tensorTp.getElementType();
+  auto memTp = MemRefType::get(tensorTp.getShape(), elemTp);
+  Value mem = rewriter.create<memref::AllocOp>(loc, memTp);
+  Value zero = constantZero(rewriter, loc, elemTp);
+  rewriter.create<linalg::FillOp>(loc, zero, mem).result();
+  return mem;
+}
+
+/// Inserts the element returned by genGetNextCall(_, ind, elemPtr) into
+/// the tensor created by allocDenseTensor().  The `rank` is the rank
+/// of the `tensor` and the length of `ind`.
+static void insertScalarIntoDenseTensor(ConversionPatternRewriter &rewriter,
+                                        Location loc, Value elemPtr,
+                                        Value tensor, unsigned rank,
+                                        Value ind) {
+  SmallVector<Value, 4> ivs;
+  ivs.reserve(rank);
+  for (unsigned i = 0; i < rank; i++) {
+    Value idx = constantIndex(rewriter, loc, i);
+    ivs.push_back(rewriter.create<memref::LoadOp>(loc, ind, idx));
+  }
+  Value elemV = rewriter.create<memref::LoadOp>(loc, elemPtr);
+  rewriter.create<memref::StoreOp>(loc, elemV, tensor, ivs);
+}
+
 //===----------------------------------------------------------------------===//
 // Conversion rules.
 //===----------------------------------------------------------------------===//
@@ -509,8 +583,49 @@ class SparseTensorConvertConverter : public OpConversionPattern<ConvertOp> {
       rewriter.replaceOp(op, genNewCall(rewriter, op, params));
       return success();
     }
-    if (!encDst || encSrc) {
-      // TODO: sparse => dense
+    if (!encDst && encSrc) {
+      // This is sparse => dense conversion, which is handled as follows:
+      //   dst = new Tensor(0);
+      //   iter = src->toCOO()->getIterator();
+      //   while (elem = iter->getNext()) {
+      //     dst[elem.indices] = elem.value;
+      //   }
+      Location loc = op->getLoc();
+      RankedTensorType tensorTp = resType.dyn_cast<RankedTensorType>();
+      if (!tensorTp)
+        return failure();
+      unsigned rank = tensorTp.getRank();
+      Value dst = allocDenseTensor(rewriter, loc, tensorTp);
+      Value ind = genAlloca(rewriter, loc, rank, rewriter.getIndexType());
+      Value elemPtr = genAllocaScalar(rewriter, loc, tensorTp.getElementType());
+      encDst = SparseTensorEncodingAttr::get(
+          op->getContext(),
+          SmallVector<SparseTensorEncodingAttr::DimLevelType>(
+              rank, SparseTensorEncodingAttr::DimLevelType::Dense),
+          AffineMap(), encSrc.getPointerBitWidth(), encSrc.getIndexBitWidth());
+      SmallVector<Value, 4> sizes;
+      SmallVector<Value, 8> params;
+      // TODO(D112674): support dynamic sizes.
+      sizesFromType(rewriter, sizes, loc, tensorTp);
+      newParams(rewriter, params, op, encDst, kToIter, sizes, src);
+      Value iter = genNewCall(rewriter, op, params);
+      SmallVector<Value> noArgs;
+      SmallVector<Type> noTypes;
+      auto whileOp = rewriter.create<scf::WhileOp>(loc, noTypes, noArgs);
+      Block *before = rewriter.createBlock(&whileOp.before(), {}, noTypes);
+      rewriter.setInsertionPointToEnd(before);
+      Value cond = genGetNextCall(rewriter, op, iter, ind, elemPtr);
+      rewriter.create<scf::ConditionOp>(loc, cond, before->getArguments());
+      Block *after = rewriter.createBlock(&whileOp.after(), {}, noTypes);
+      rewriter.setInsertionPointToStart(after);
+      insertScalarIntoDenseTensor(rewriter, loc, elemPtr, dst, rank, ind);
+      rewriter.create<scf::YieldOp>(loc);
+      rewriter.setInsertionPointAfter(whileOp);
+      rewriter.replaceOpWithNewOp<memref::TensorLoadOp>(op, resType, dst);
+      return success();
+    }
+    if (!encDst && !encSrc) {
+      // dense => dense
       return failure();
     }
     // This is a dense => sparse conversion or a sparse constant in COO =>

diff  --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp
index 69657445336bb..ae7dd0c2dd4d4 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp
@@ -118,7 +118,8 @@ struct SparseTensorConversionPass
     // The following operations and dialects may be introduced by the
     // rewriting rules, and are therefore marked as legal.
     target.addLegalOp<arith::CmpFOp, arith::CmpIOp, arith::ConstantOp,
-                      arith::IndexCastOp, tensor::ExtractOp>();
+                      arith::IndexCastOp, linalg::FillOp, linalg::YieldOp,
+                      tensor::ExtractOp>();
     target.addLegalDialect<LLVM::LLVMDialect, memref::MemRefDialect,
                            scf::SCFDialect>();
     // Populate with rules and apply rewriting rules.

diff  --git a/mlir/lib/ExecutionEngine/SparseUtils.cpp b/mlir/lib/ExecutionEngine/SparseUtils.cpp
index f3a12dbd333f8..0b82378c2db2d 100644
--- a/mlir/lib/ExecutionEngine/SparseUtils.cpp
+++ b/mlir/lib/ExecutionEngine/SparseUtils.cpp
@@ -87,12 +87,13 @@ template <typename V>
 struct SparseTensorCOO {
 public:
   SparseTensorCOO(const std::vector<uint64_t> &szs, uint64_t capacity)
-      : sizes(szs) {
+      : sizes(szs), iteratorLocked(false), iteratorPos(0) {
     if (capacity)
       elements.reserve(capacity);
   }
   /// Adds element as indices and value.
   void add(const std::vector<uint64_t> &ind, V val) {
+    assert(!iteratorLocked && "Attempt to add() after startIterator()");
     uint64_t rank = getRank();
     assert(rank == ind.size());
     for (uint64_t r = 0; r < rank; r++)
@@ -100,7 +101,10 @@ struct SparseTensorCOO {
     elements.emplace_back(ind, val);
   }
   /// Sorts elements lexicographically by index.
-  void sort() { std::sort(elements.begin(), elements.end(), lexOrder); }
+  void sort() {
+    assert(!iteratorLocked && "Attempt to sort() after startIterator()");
+    std::sort(elements.begin(), elements.end(), lexOrder);
+  }
   /// Returns rank.
   uint64_t getRank() const { return sizes.size(); }
   /// Getter for sizes array.
@@ -108,6 +112,20 @@ struct SparseTensorCOO {
   /// Getter for elements array.
   const std::vector<Element<V>> &getElements() const { return elements; }
 
+  /// Switch into iterator mode.
+  void startIterator() {
+    iteratorLocked = true;
+    iteratorPos = 0;
+  }
+  /// Get the next element.
+  const Element<V> *getNext() {
+    assert(iteratorLocked && "Attempt to getNext() before startIterator()");
+    if (iteratorPos < elements.size())
+      return &(elements[iteratorPos++]);
+    iteratorLocked = false;
+    return nullptr;
+  }
+
   /// Factory method. Permutes the original dimensions according to
   /// the given ordering and expects subsequent add() calls to honor
   /// that same ordering for the given indices. The result is a
@@ -134,8 +152,10 @@ struct SparseTensorCOO {
     }
     return false;
   }
-  std::vector<uint64_t> sizes; // per-dimension sizes
+  const std::vector<uint64_t> sizes; // per-dimension sizes
   std::vector<Element<V>> elements;
+  bool iteratorLocked;
+  unsigned iteratorPos;
 };
 
 /// Abstract base class of sparse tensor storage. Note that we use
@@ -539,25 +559,35 @@ enum Action : uint32_t {
   kFromFile = 1,
   kFromCOO = 2,
   kEmptyCOO = 3,
-  kToCOO = 4
+  kToCOO = 4,
+  kToIter = 5
 };
 
 #define CASE(p, i, v, P, I, V)                                                 \
   if (ptrTp == (p) && indTp == (i) && valTp == (v)) {                          \
     SparseTensorCOO<V> *tensor = nullptr;                                      \
-    if (action == kFromFile)                                                   \
-      tensor =                                                                 \
-          openSparseTensorCOO<V>(static_cast<char *>(ptr), rank, sizes, perm); \
-    else if (action == kFromCOO)                                               \
-      tensor = static_cast<SparseTensorCOO<V> *>(ptr);                         \
-    else if (action == kEmptyCOO)                                              \
+    if (action <= kFromCOO) {                                                  \
+      if (action == kFromFile) {                                               \
+        char *filename = static_cast<char *>(ptr);                             \
+        tensor = openSparseTensorCOO<V>(filename, rank, sizes, perm);          \
+      } else if (action == kFromCOO) {                                         \
+        tensor = static_cast<SparseTensorCOO<V> *>(ptr);                       \
+      } else {                                                                 \
+        assert(action == kEmpty);                                              \
+      }                                                                        \
+      return SparseTensorStorage<P, I, V>::newSparseTensor(rank, sizes, perm,  \
+                                                           sparsity, tensor);  \
+    } else if (action == kEmptyCOO) {                                          \
       return SparseTensorCOO<V>::newSparseTensorCOO(rank, sizes, perm);        \
-    else if (action == kToCOO)                                                 \
-      return static_cast<SparseTensorStorage<P, I, V> *>(ptr)->toCOO(perm);    \
-    else                                                                       \
-      assert(action == kEmpty);                                                \
-    return SparseTensorStorage<P, I, V>::newSparseTensor(rank, sizes, perm,    \
-                                                         sparsity, tensor);    \
+    } else {                                                                   \
+      tensor = static_cast<SparseTensorStorage<P, I, V> *>(ptr)->toCOO(perm);  \
+      if (action == kToIter) {                                                 \
+        tensor->startIterator();                                               \
+      } else {                                                                 \
+        assert(action == kToCOO);                                              \
+      }                                                                        \
+      return tensor;                                                           \
+    }                                                                          \
   }
 
 #define IMPL1(NAME, TYPE, LIB)                                                 \
@@ -604,6 +634,23 @@ enum Action : uint32_t {
     return tensor;                                                             \
   }
 
+#define IMPL_GETNEXT(NAME, V)                                                  \
+  bool _mlir_ciface_##NAME(void *ptr, StridedMemRefType<uint64_t, 1> *iref,    \
+                           StridedMemRefType<V, 0> *vref) {                    \
+    assert(iref->strides[0] == 1);                                             \
+    uint64_t *indx = iref->data + iref->offset;                                \
+    V *value = vref->data + vref->offset;                                      \
+    const uint64_t isize = iref->sizes[0];                                     \
+    auto iter = static_cast<SparseTensorCOO<V> *>(ptr);                        \
+    const Element<V> *elem = iter->getNext();                                  \
+    if (elem == nullptr)                                                       \
+      return false;                                                            \
+    for (uint64_t r = 0; r < isize; r++)                                       \
+      indx[r] = elem->indices[r];                                              \
+    *value = elem->value;                                                      \
+    return true;                                                               \
+  }
+
 /// Constructs a new sparse tensor. This is the "swiss army knife"
 /// method for materializing sparse tensors into the computation.
 ///
@@ -613,6 +660,7 @@ enum Action : uint32_t {
 /// kFromCOO = returns storage, where ptr contains coordinate scheme to assign
 /// kEmptyCOO = returns empty coordinate scheme to fill and use with kFromCOO
 /// kToCOO = returns coordinate scheme from storage in ptr to use with kFromCOO
+/// kToIter = returns iterator from storage in ptr (call IMPL_GETNEXT to use)
 void *
 _mlir_ciface_newSparseTensor(StridedMemRefType<uint8_t, 1> *aref, // NOLINT
                              StridedMemRefType<index_t, 1> *sref,
@@ -710,10 +758,19 @@ IMPL3(addEltI32, int32_t)
 IMPL3(addEltI16, int16_t)
 IMPL3(addEltI8, int8_t)
 
+/// Helper to enumerate elements of coordinate scheme, one per value type.
+IMPL_GETNEXT(getNextF64, double)
+IMPL_GETNEXT(getNextF32, float)
+IMPL_GETNEXT(getNextI64, int64_t)
+IMPL_GETNEXT(getNextI32, int32_t)
+IMPL_GETNEXT(getNextI16, int16_t)
+IMPL_GETNEXT(getNextI8, int8_t)
+
 #undef CASE
 #undef IMPL1
 #undef IMPL2
 #undef IMPL3
+#undef IMPL_GETNEXT
 
 //===----------------------------------------------------------------------===//
 //

diff  --git a/mlir/test/Dialect/SparseTensor/conversion_sparse2dense.mlir b/mlir/test/Dialect/SparseTensor/conversion_sparse2dense.mlir
new file mode 100644
index 0000000000000..3c3d0602c65f2
--- /dev/null
+++ b/mlir/test/Dialect/SparseTensor/conversion_sparse2dense.mlir
@@ -0,0 +1,162 @@
+// RUN: mlir-opt %s --sparse-tensor-conversion --canonicalize --cse | FileCheck %s
+
+#SparseVector = #sparse_tensor.encoding<{
+  dimLevelType = ["compressed"]
+}>
+
+#SparseMatrix = #sparse_tensor.encoding<{
+  dimLevelType = ["dense", "compressed"]
+}>
+
+#SparseTensor = #sparse_tensor.encoding<{
+  dimLevelType = ["dense", "compressed", "compressed"],
+  dimOrdering = affine_map<(i,j,k) -> (k,i,j)>
+}>
+
+// CHECK-LABEL: func @sparse_convert_1d(
+//  CHECK-SAME: %[[Arg:.*]]: !llvm.ptr<i8>) -> tensor<13xi32>
+//   CHECK-DAG: %[[I0:.*]] = arith.constant 0 : index
+//   CHECK-DAG: %[[I13:.*]] = arith.constant 13 : index
+//
+//   CHECK-DAG: %[[M:.*]] = memref.alloc() : memref<13xi32>
+//   CHECK-DAG: %[[E0:.*]] = arith.constant 0 : i32
+//   CHECK-DAG: linalg.fill(%[[E0]], %[[M]]) : i32, memref<13xi32>
+//   CHECK-DAG: %[[IndS:.*]] = memref.alloca() : memref<1xindex>
+//   CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<1xindex> to memref<?xindex>
+//   CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref<i32>
+//
+//   CHECK-DAG: %[[AttrsS:.*]] = memref.alloca() : memref<1xi8>
+//   CHECK-DAG: %[[AttrsD:.*]] = memref.cast %[[AttrsS]] : memref<1xi8> to memref<?xi8>
+//   CHECK-DAG: %[[Attr0:.*]] = arith.constant 0 : i8
+//   CHECK-DAG: memref.store %[[Attr0]], %[[AttrsS]][%[[I0]]] : memref<1xi8>
+//
+//   CHECK-DAG: %[[SizesS:.*]] = memref.alloca() : memref<1xindex>
+//   CHECK-DAG: %[[SizesD:.*]] = memref.cast %[[SizesS]] : memref<1xindex> to memref<?xindex>
+//   CHECK-DAG: memref.store %[[I13]], %[[SizesS]][%[[I0]]] : memref<1xindex>
+//
+//   CHECK-DAG: %[[PermS:.*]] = memref.alloca() : memref<1xindex>
+//   CHECK-DAG: %[[PermD:.*]] = memref.cast %[[PermS]] : memref<1xindex> to memref<?xindex>
+//   CHECK-DAG: memref.store %[[I0]], %[[PermS]][%[[I0]]] : memref<1xindex>
+//
+//   CHECK-DAG: %[[SecTp:.*]] = arith.constant 1 : i32
+//   CHECK-DAG: %[[ElemTp:.*]] = arith.constant 4 : i32
+//   CHECK-DAG: %[[ActionToIter:.*]] = arith.constant 5 : i32
+//       CHECK: %[[Iter:.*]] = call @newSparseTensor(%[[AttrsD]], %[[SizesD]], %[[PermD]], %[[SecTp]], %[[SecTp]], %[[ElemTp]], %[[ActionToIter]], %[[Arg]]) : (memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+//       CHECK: scf.while : () -> () {
+//       CHECK:   %[[Cond:.*]] = call @getNextI32(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<i32>) -> i1
+//       CHECK:   scf.condition(%[[Cond]])
+//       CHECK: } do {
+//       CHECK:   %[[Iv0:.*]] = memref.load %[[IndS]][%[[I0]]] : memref<1xindex>
+//       CHECK:   %[[ElemVal:.*]] = memref.load %[[ElemBuffer]][] : memref<i32>
+//       CHECK:   memref.store %[[ElemVal]], %[[M]][%[[Iv0]]] : memref<13xi32>
+//       CHECK:   scf.yield
+//       CHECK: }
+//       CHECK: %[[T:.*]] = memref.tensor_load %[[M]] : memref<13xi32>
+//       CHECK: return %[[T]] : tensor<13xi32>
+func @sparse_convert_1d(%arg0: tensor<13xi32, #SparseVector>) -> tensor<13xi32> {
+  %0 = sparse_tensor.convert %arg0 : tensor<13xi32, #SparseVector> to tensor<13xi32>
+  return %0 : tensor<13xi32>
+}
+
+// CHECK-LABEL: func @sparse_convert_2d(
+//  CHECK-SAME: %[[Arg:.*]]: !llvm.ptr<i8>) -> tensor<2x4xf64>
+//   CHECK-DAG: %[[I0:.*]] = arith.constant 0 : index
+//   CHECK-DAG: %[[I1:.*]] = arith.constant 1 : index
+//   CHECK-DAG: %[[I2:.*]] = arith.constant 2 : index
+//   CHECK-DAG: %[[I4:.*]] = arith.constant 4 : index
+//
+//   CHECK-DAG: %[[M:.*]] = memref.alloc() : memref<2x4xf64>
+//   CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64
+//   CHECK-DAG: linalg.fill(%[[E0]], %[[M]]) : f64, memref<2x4xf64>
+//   CHECK-DAG: %[[IndS:.*]] = memref.alloca() : memref<2xindex>
+//   CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<2xindex> to memref<?xindex>
+//   CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref<f64>
+//
+//   CHECK-DAG: %[[AttrsS:.*]] = memref.alloca() : memref<2xi8>
+//   CHECK-DAG: %[[AttrsD:.*]] = memref.cast %[[AttrsS]] : memref<2xi8> to memref<?xi8>
+//   CHECK-DAG: %[[Attr0:.*]] = arith.constant 0 : i8
+//   CHECK-DAG: memref.store %[[Attr0]], %[[AttrsS]][%[[I0]]] : memref<2xi8>
+//   CHECK-DAG: memref.store %[[Attr0]], %[[AttrsS]][%[[I1]]] : memref<2xi8>
+//
+//   CHECK-DAG: %[[SizesS:.*]] = memref.alloca() : memref<2xindex>
+//   CHECK-DAG: %[[SizesD:.*]] = memref.cast %[[SizesS]] : memref<2xindex> to memref<?xindex>
+//   CHECK-DAG: memref.store %[[I2]], %[[SizesS]][%[[I0]]] : memref<2xindex>
+//   CHECK-DAG: memref.store %[[I4]], %[[SizesS]][%[[I1]]] : memref<2xindex>
+//
+//   CHECK-DAG: %[[PermS:.*]] = memref.alloca() : memref<2xindex>
+//   CHECK-DAG: %[[PermD:.*]] = memref.cast %[[PermS]] : memref<2xindex> to memref<?xindex>
+//   CHECK-DAG: memref.store %[[I0]], %[[PermS]][%[[I0]]] : memref<2xindex>
+//   CHECK-DAG: memref.store %[[I1]], %[[PermS]][%[[I1]]] : memref<2xindex>
+//
+//   CHECK-DAG: %[[ActionToIter:.*]] = arith.constant 5 : i32
+//       CHECK: %[[Iter:.*]] = call @newSparseTensor(%[[AttrsD]], %[[SizesD]], %[[PermD]], %{{.*}}, %{{.*}}, %{{.*}}, %[[ActionToIter]], %[[Arg]]) : (memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+//       CHECK: scf.while : () -> () {
+//       CHECK:   %[[Cond:.*]] = call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<f64>) -> i1
+//       CHECK:   scf.condition(%[[Cond]])
+//       CHECK: } do {
+//       CHECK:   %[[Iv0:.*]] = memref.load %[[IndS]][%[[I0]]] : memref<2xindex>
+//       CHECK:   %[[Iv1:.*]] = memref.load %[[IndS]][%[[I1]]] : memref<2xindex>
+//       CHECK:   %[[ElemVal:.*]] = memref.load %[[ElemBuffer]][] : memref<f64>
+//       CHECK:   memref.store %[[ElemVal]], %[[M]][%[[Iv0]], %[[Iv1]]] : memref<2x4xf64>
+//       CHECK:   scf.yield
+//       CHECK: }
+//       CHECK: %[[T:.*]] = memref.tensor_load %[[M]] : memref<2x4xf64>
+//       CHECK: return %[[T]] : tensor<2x4xf64>
+func @sparse_convert_2d(%arg0: tensor<2x4xf64, #SparseMatrix>) -> tensor<2x4xf64> {
+  %0 = sparse_tensor.convert %arg0 : tensor<2x4xf64, #SparseMatrix> to tensor<2x4xf64>
+  return %0 : tensor<2x4xf64>
+}
+
+// CHECK-LABEL: func @sparse_convert_3d(
+//  CHECK-SAME: %[[Arg:.*]]: !llvm.ptr<i8>) -> tensor<2x3x4xf64>
+//   CHECK-DAG: %[[I0:.*]] = arith.constant 0 : index
+//   CHECK-DAG: %[[I1:.*]] = arith.constant 1 : index
+//   CHECK-DAG: %[[I2:.*]] = arith.constant 2 : index
+//   CHECK-DAG: %[[I3:.*]] = arith.constant 3 : index
+//   CHECK-DAG: %[[I4:.*]] = arith.constant 4 : index
+//
+//   CHECK-DAG: %[[M:.*]] = memref.alloc() : memref<2x3x4xf64>
+//   CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64
+//   CHECK-DAG: linalg.fill(%[[E0]], %[[M]]) : f64, memref<2x3x4xf64>
+//   CHECK-DAG: %[[IndS:.*]] = memref.alloca() : memref<3xindex>
+//   CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<3xindex> to memref<?xindex>
+//   CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref<f64>
+//
+//   CHECK-DAG: %[[AttrsS:.*]] = memref.alloca() : memref<3xi8>
+//   CHECK-DAG: %[[AttrsD:.*]] = memref.cast %[[AttrsS]] : memref<3xi8> to memref<?xi8>
+//   CHECK-DAG: %[[Attr0:.*]] = arith.constant 0 : i8
+//   CHECK-DAG: memref.store %[[Attr0]], %[[AttrsS]][%[[I0]]] : memref<3xi8>
+//   CHECK-DAG: memref.store %[[Attr0]], %[[AttrsS]][%[[I1]]] : memref<3xi8>
+//   CHECK-DAG: memref.store %[[Attr0]], %[[AttrsS]][%[[I2]]] : memref<3xi8>
+//
+//   CHECK-DAG: %[[SizesS:.*]] = memref.alloca() : memref<3xindex>
+//   CHECK-DAG: %[[SizesD:.*]] = memref.cast %[[SizesS]] : memref<3xindex> to memref<?xindex>
+//   CHECK-DAG: memref.store %[[I2]], %[[SizesS]][%[[I0]]] : memref<3xindex>
+//   CHECK-DAG: memref.store %[[I3]], %[[SizesS]][%[[I1]]] : memref<3xindex>
+//   CHECK-DAG: memref.store %[[I4]], %[[SizesS]][%[[I2]]] : memref<3xindex>
+//
+//   CHECK-DAG: %[[PermS:.*]] = memref.alloca() : memref<3xindex>
+//   CHECK-DAG: %[[PermD:.*]] = memref.cast %[[PermS]] : memref<3xindex> to memref<?xindex>
+//   CHECK-DAG: memref.store %[[I0]], %[[PermS]][%[[I0]]] : memref<3xindex>
+//   CHECK-DAG: memref.store %[[I1]], %[[PermS]][%[[I1]]] : memref<3xindex>
+//   CHECK-DAG: memref.store %[[I2]], %[[PermS]][%[[I2]]] : memref<3xindex>
+//
+//   CHECK-DAG: %[[ActionToIter:.*]] = arith.constant 5 : i32
+//       CHECK: %[[Iter:.*]] = call @newSparseTensor(%[[AttrsD]], %[[SizesD]], %[[PermD]], %{{.*}}, %{{.*}}, %{{.*}}, %[[ActionToIter]], %[[Arg]]) : (memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+//       CHECK: scf.while : () -> () {
+//       CHECK:   %[[Cond:.*]] = call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<f64>) -> i1
+//       CHECK:   scf.condition(%[[Cond]])
+//       CHECK: } do {
+//       CHECK:   %[[Iv0:.*]] = memref.load %[[IndS]][%[[I0]]] : memref<3xindex>
+//       CHECK:   %[[Iv1:.*]] = memref.load %[[IndS]][%[[I1]]] : memref<3xindex>
+//       CHECK:   %[[Iv2:.*]] = memref.load %[[IndS]][%[[I2]]] : memref<3xindex>
+//       CHECK:   %[[ElemVal:.*]] = memref.load %[[ElemBuffer]][] : memref<f64>
+//       CHECK:   memref.store %[[ElemVal]], %[[M]][%[[Iv0]], %[[Iv1]], %[[Iv2]]] : memref<2x3x4xf64>
+//       CHECK:   scf.yield
+//       CHECK: }
+//       CHECK: %[[T:.*]] = memref.tensor_load %[[M]] : memref<2x3x4xf64>
+//       CHECK: return %[[T]] : tensor<2x3x4xf64>
+func @sparse_convert_3d(%arg0: tensor<2x3x4xf64, #SparseTensor>) -> tensor<2x3x4xf64> {
+  %0 = sparse_tensor.convert %arg0 : tensor<2x3x4xf64, #SparseTensor> to tensor<2x3x4xf64>
+  return %0 : tensor<2x3x4xf64>
+}

diff  --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2dense.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2dense.mlir
new file mode 100644
index 0000000000000..a242840ccbb1a
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_sparse2dense.mlir
@@ -0,0 +1,144 @@
+// RUN: mlir-opt %s \
+// RUN:  -sparsification -sparse-tensor-conversion \
+// RUN:  -linalg-bufferize -convert-linalg-to-loops \
+// RUN:  -convert-vector-to-scf -convert-scf-to-std \
+// RUN:  -func-bufferize -tensor-constant-bufferize -tensor-bufferize \
+// RUN:  -std-bufferize -finalizing-bufferize \
+// RUN:  -convert-vector-to-llvm -convert-memref-to-llvm -convert-std-to-llvm \
+// RUN:  -reconcile-unrealized-casts \
+// RUN:  | \
+// RUN: mlir-cpu-runner \
+// RUN:  -e entry -entry-point-result=void \
+// RUN:  -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext \
+// RUN:  | \
+// RUN: FileCheck %s
+
+#Tensor1  = #sparse_tensor.encoding<{
+  dimLevelType = [ "compressed", "compressed", "compressed" ],
+  dimOrdering = affine_map<(i,j,k) -> (i,j,k)>
+}>
+
+#Tensor2  = #sparse_tensor.encoding<{
+  dimLevelType = [ "compressed", "compressed", "compressed" ],
+  dimOrdering = affine_map<(i,j,k) -> (j,k,i)>
+}>
+
+#Tensor3  = #sparse_tensor.encoding<{
+  dimLevelType = [ "compressed", "compressed", "compressed" ],
+  dimOrdering = affine_map<(i,j,k) -> (k,i,j)>
+}>
+
+#Tensor4  = #sparse_tensor.encoding<{
+  dimLevelType = [ "dense", "compressed", "compressed" ],
+  dimOrdering = affine_map<(i,j,k) -> (i,j,k)>
+}>
+
+#Tensor5  = #sparse_tensor.encoding<{
+  dimLevelType = [ "dense", "compressed", "compressed" ],
+  dimOrdering = affine_map<(i,j,k) -> (j,k,i)>
+}>
+
+#Tensor6  = #sparse_tensor.encoding<{
+  dimLevelType = [ "dense", "compressed", "compressed" ],
+  dimOrdering = affine_map<(i,j,k) -> (k,i,j)>
+}>
+
+//
+// Integration test that tests conversions from sparse to dense tensors.
+//
+module {
+  //
+  // Output utilities.
+  //
+  func @dumpf64(%arg0: tensor<2x3x4xf64>) {
+    %c0 = arith.constant 0 : index
+    %d0 = arith.constant -1.0 : f64
+    %0 = vector.transfer_read %arg0[%c0, %c0, %c0], %d0: tensor<2x3x4xf64>, vector<2x3x4xf64>
+    vector.print %0 : vector<2x3x4xf64>
+    return
+  }
+
+  //
+  // Main driver.
+  //
+  func @entry() {
+    //
+    // Initialize a 3-dim dense tensor.
+    //
+    %t = arith.constant dense<[
+       [  [  1.0,  2.0,  3.0,  4.0 ],
+          [  5.0,  6.0,  7.0,  8.0 ],
+          [  9.0, 10.0, 11.0, 12.0 ] ],
+       [  [ 13.0, 14.0, 15.0, 16.0 ],
+          [ 17.0, 18.0, 19.0, 20.0 ],
+          [ 21.0, 22.0, 23.0, 24.0 ] ]
+    ]> : tensor<2x3x4xf64>
+
+    //
+    // Convert dense tensor directly to various sparse tensors.
+    //    tensor1: stored as 2x3x4
+    //    tensor2: stored as 3x4x2
+    //    tensor3: stored as 4x2x3
+    //
+    %1 = sparse_tensor.convert %t : tensor<2x3x4xf64> to tensor<2x3x4xf64, #Tensor1>
+    %2 = sparse_tensor.convert %t : tensor<2x3x4xf64> to tensor<2x3x4xf64, #Tensor2>
+    %3 = sparse_tensor.convert %t : tensor<2x3x4xf64> to tensor<2x3x4xf64, #Tensor3>
+    %4 = sparse_tensor.convert %t : tensor<2x3x4xf64> to tensor<2x3x4xf64, #Tensor4>
+    %5 = sparse_tensor.convert %t : tensor<2x3x4xf64> to tensor<2x3x4xf64, #Tensor5>
+    %6 = sparse_tensor.convert %t : tensor<2x3x4xf64> to tensor<2x3x4xf64, #Tensor6>
+
+    //
+    // Convert sparse tensor back to dense.
+    //
+    %a = sparse_tensor.convert %1 : tensor<2x3x4xf64, #Tensor1> to tensor<2x3x4xf64>
+    %b = sparse_tensor.convert %2 : tensor<2x3x4xf64, #Tensor2> to tensor<2x3x4xf64>
+    %c = sparse_tensor.convert %3 : tensor<2x3x4xf64, #Tensor3> to tensor<2x3x4xf64>
+    %d = sparse_tensor.convert %4 : tensor<2x3x4xf64, #Tensor4> to tensor<2x3x4xf64>
+    %e = sparse_tensor.convert %5 : tensor<2x3x4xf64, #Tensor5> to tensor<2x3x4xf64>
+    %f = sparse_tensor.convert %6 : tensor<2x3x4xf64, #Tensor6> to tensor<2x3x4xf64>
+
+    //
+    // Check round-trip equality.
+    //
+    // CHECK:      ( ( ( 1, 2, 3, 4 ), ( 5, 6, 7, 8 ), ( 9, 10, 11, 12 ) ), ( ( 13, 14, 15, 16 ), ( 17, 18, 19, 20 ), ( 21, 22, 23, 24 ) ) )
+    // CHECK-NEXT: ( ( ( 1, 2, 3, 4 ), ( 5, 6, 7, 8 ), ( 9, 10, 11, 12 ) ), ( ( 13, 14, 15, 16 ), ( 17, 18, 19, 20 ), ( 21, 22, 23, 24 ) ) )
+    // CHECK-NEXT: ( ( ( 1, 2, 3, 4 ), ( 5, 6, 7, 8 ), ( 9, 10, 11, 12 ) ), ( ( 13, 14, 15, 16 ), ( 17, 18, 19, 20 ), ( 21, 22, 23, 24 ) ) )
+    // CHECK-NEXT: ( ( ( 1, 2, 3, 4 ), ( 5, 6, 7, 8 ), ( 9, 10, 11, 12 ) ), ( ( 13, 14, 15, 16 ), ( 17, 18, 19, 20 ), ( 21, 22, 23, 24 ) ) )
+    // CHECK-NEXT: ( ( ( 1, 2, 3, 4 ), ( 5, 6, 7, 8 ), ( 9, 10, 11, 12 ) ), ( ( 13, 14, 15, 16 ), ( 17, 18, 19, 20 ), ( 21, 22, 23, 24 ) ) )
+    // CHECK-NEXT: ( ( ( 1, 2, 3, 4 ), ( 5, 6, 7, 8 ), ( 9, 10, 11, 12 ) ), ( ( 13, 14, 15, 16 ), ( 17, 18, 19, 20 ), ( 21, 22, 23, 24 ) ) )
+    // CHECK-NEXT: ( ( ( 1, 2, 3, 4 ), ( 5, 6, 7, 8 ), ( 9, 10, 11, 12 ) ), ( ( 13, 14, 15, 16 ), ( 17, 18, 19, 20 ), ( 21, 22, 23, 24 ) ) )
+    call @dumpf64(%t) : (tensor<2x3x4xf64>) -> ()
+    call @dumpf64(%a) : (tensor<2x3x4xf64>) -> ()
+    call @dumpf64(%b) : (tensor<2x3x4xf64>) -> ()
+    call @dumpf64(%c) : (tensor<2x3x4xf64>) -> ()
+    call @dumpf64(%d) : (tensor<2x3x4xf64>) -> ()
+    call @dumpf64(%e) : (tensor<2x3x4xf64>) -> ()
+    call @dumpf64(%f) : (tensor<2x3x4xf64>) -> ()
+
+    //
+    // Release the resources.
+    //
+    sparse_tensor.release %1 : tensor<2x3x4xf64, #Tensor1>
+    sparse_tensor.release %2 : tensor<2x3x4xf64, #Tensor2>
+    sparse_tensor.release %3 : tensor<2x3x4xf64, #Tensor3>
+    sparse_tensor.release %4 : tensor<2x3x4xf64, #Tensor4>
+    sparse_tensor.release %5 : tensor<2x3x4xf64, #Tensor5>
+    sparse_tensor.release %6 : tensor<2x3x4xf64, #Tensor6>
+
+    %ma = memref.buffer_cast %a : memref<2x3x4xf64>
+    %mb = memref.buffer_cast %b : memref<2x3x4xf64>
+    %mc = memref.buffer_cast %c : memref<2x3x4xf64>
+    %md = memref.buffer_cast %d : memref<2x3x4xf64>
+    %me = memref.buffer_cast %e : memref<2x3x4xf64>
+    %mf = memref.buffer_cast %f : memref<2x3x4xf64>
+
+    memref.dealloc %ma : memref<2x3x4xf64>
+    memref.dealloc %mb : memref<2x3x4xf64>
+    memref.dealloc %mc : memref<2x3x4xf64>
+    memref.dealloc %md : memref<2x3x4xf64>
+    memref.dealloc %me : memref<2x3x4xf64>
+    memref.dealloc %mf : memref<2x3x4xf64>
+
+    return
+  }
+}