[llvm] [mlir][sparse] add a forwarding insertion to SparseTensorStorage (PR #68939)

Thu Oct 12 17:55:47 PDT 2023

https://github.com/aartbik updated https://github.com/llvm/llvm-project/pull/68939

>From 8943e100e4675b3bd537f672cc28524410cd5b3d Mon Sep 17 00:00:00 2001
From: Aart Bik <ajcbik at google.com>
Date: Thu, 12 Oct 2023 15:04:58 -0700
Subject: [PATCH 1/5] [mlir][sparse] refactor dense2sparse and const2sparse
 conversion

The new code uses the MapRef data structure, which prepares
our move to non-permutation types, such as block-sparsity
---
 .../mlir/Dialect/SparseTensor/IR/Enums.h      |   5 +-
 .../ExecutionEngine/SparseTensor/Storage.h    | 189 +++++++-----
 .../ExecutionEngine/SparseTensorRuntime.h     |  35 +--
 .../Transforms/SparseTensorConversion.cpp     | 217 ++++++++++++-
 .../ExecutionEngine/SparseTensorRuntime.cpp   |  55 ++--
 .../test/Dialect/SparseTensor/conversion.mlir |   2 +-
 .../SparseTensor/convert_dense2sparse.mlir    | 292 ++++++++++++++++++
 .../Dialect/SparseTensor/sparse_expand.mlir   |   6 +-
 .../SparseTensor/sparse_fill_zero.mlir        |   2 +-
 9 files changed, 664 insertions(+), 139 deletions(-)

diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
index ca9555248130f08..f1643d66c26a195 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
@@ -143,11 +143,10 @@ constexpr bool isComplexPrimaryType(PrimaryType valTy) {
 /// The actions performed by @newSparseTensor.
 enum class Action : uint32_t {
   kEmpty = 0,
-  // newSparseTensor no longer handles `kFromFile=1`, so we leave this
-  // number reserved to help catch any code that still needs updating.
+  kEmptyForward = 1,
   kFromCOO = 2,
   kSparseToSparse = 3,
-  kEmptyCOO = 4,
+  kFuture = 4, // not used
   kToCOO = 5,
   kToIterator = 6,
   kPack = 7,
diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
index 607be1cbf956a7d..0d95c60a08689d2 100644
--- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
@@ -33,7 +33,6 @@
     assert((isCompressedDLT(dlt) || isSingletonDLT(dlt)) &&                    \
            "Level is neither compressed nor singleton");                       \
   } while (false)
-#define ASSERT_DENSE_DLT(dlt) assert(isDenseDLT(dlt) && "Level is not dense");
 
 namespace mlir {
 namespace sparse_tensor {
@@ -44,6 +43,12 @@ class SparseTensorEnumeratorBase;
 template <typename P, typename C, typename V>
 class SparseTensorEnumerator;
 
+//===----------------------------------------------------------------------===//
+//
+//  SparseTensorStorage
+//
+//===----------------------------------------------------------------------===//
+
 /// Abstract base class for `SparseTensorStorage<P,C,V>`. This class
 /// takes responsibility for all the `<P,C,V>`-independent aspects
 /// of the tensor (e.g., shape, sparsity, mapping). In addition,
@@ -97,7 +102,7 @@ class SparseTensorStorageBase {
 
   /// Safely looks up the size of the given tensor-dimension.
   uint64_t getDimSize(uint64_t d) const {
-    assert(d < getDimRank() && "Dimension is out of bounds");
+    assert(d < getDimRank());
     return dimSizes[d];
   }
 
@@ -106,7 +111,7 @@ class SparseTensorStorageBase {
 
   /// Safely looks up the size of the given storage-level.
   uint64_t getLvlSize(uint64_t l) const {
-    assert(l < getLvlRank() && "Level is out of bounds");
+    assert(l < getLvlRank());
     return lvlSizes[l];
   }
 
@@ -115,7 +120,7 @@ class SparseTensorStorageBase {
 
   /// Safely looks up the type of the given level.
   DimLevelType getLvlType(uint64_t l) const {
-    assert(l < getLvlRank() && "Level is out of bounds");
+    assert(l < getLvlRank());
     return lvlTypes[l];
   }
 
@@ -173,6 +178,13 @@ class SparseTensorStorageBase {
   MLIR_SPARSETENSOR_FOREVERY_V(DECL_GETVALUES)
 #undef DECL_GETVALUES
 
+  /// Element-wise forwarding insertions. The first argument is the
+  /// dimension-coordinates for the value being inserted.
+#define DECL_FORWARDINGINSERT(VNAME, V)                                        \
+  virtual void forwardingInsert(const uint64_t *, V);
+  MLIR_SPARSETENSOR_FOREVERY_V(DECL_FORWARDINGINSERT)
+#undef DECL_FORWARDINGINSERT
+
   /// Element-wise insertion in lexicographic coordinate order. The first
   /// argument is the level-coordinates for the value being inserted.
 #define DECL_LEXINSERT(VNAME, V) virtual void lexInsert(const uint64_t *, V);
@@ -182,24 +194,17 @@ class SparseTensorStorageBase {
   /// Expanded insertion.  Note that this method resets the
   /// values/filled-switch array back to all-zero/false while only
   /// iterating over the nonzero elements.
-  ///
-  /// Arguments:
-  /// * `lvlCoords` the level-coordinates shared by the values being inserted.
-  /// * `values` a map from last-level coordinates to their associated value.
-  /// * `filled` a map from last-level coordinates to bool, indicating
-  ///   whether `values` contains a valid value to be inserted.
-  /// * `added` a map from `[0..count)` to last-level coordinates for
-  ///   which `filled` is true and `values` contains the assotiated value.
-  /// * `count` the size of `added`.
-  /// * `expsz` the size of the expanded vector (verification only).
 #define DECL_EXPINSERT(VNAME, V)                                               \
   virtual void expInsert(uint64_t *, V *, bool *, uint64_t *, uint64_t,        \
                          uint64_t);
   MLIR_SPARSETENSOR_FOREVERY_V(DECL_EXPINSERT)
 #undef DECL_EXPINSERT
 
-  /// Finishes insertion.
-  virtual void endInsert() = 0;
+  /// Finalizes forwarding insertions.
+  virtual void endForwardingInsert() = 0;
+
+  /// Finalizes lexicographic insertions.
+  virtual void endLexInsert() = 0;
 
 private:
   const std::vector<uint64_t> dimSizes;
@@ -207,6 +212,8 @@ class SparseTensorStorageBase {
   const std::vector<DimLevelType> lvlTypes;
   const std::vector<uint64_t> dim2lvlVec;
   const std::vector<uint64_t> lvl2dimVec;
+
+protected:
   const MapRef map; // non-owning pointers into dim2lvl/lvl2dim vectors
 };
 
@@ -229,7 +236,8 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
                       const uint64_t *lvl2dim)
       : SparseTensorStorageBase(dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes,
                                 dim2lvl, lvl2dim),
-        positions(lvlRank), coordinates(lvlRank), lvlCursor(lvlRank) {}
+        positions(lvlRank), coordinates(lvlRank), lvlCursor(lvlRank), lvlCOO() {
+  }
 
 public:
   /// Constructs a sparse tensor with the given encoding, and allocates
@@ -242,11 +250,12 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
   SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes,
                       uint64_t lvlRank, const uint64_t *lvlSizes,
                       const DimLevelType *lvlTypes, const uint64_t *dim2lvl,
-                      const uint64_t *lvl2dim, bool initializeValuesIfAllDense);
+                      const uint64_t *lvl2dim, SparseTensorCOO<V> *coo,
+                      bool initializeValuesIfAllDense);
 
   /// Constructs a sparse tensor with the given encoding, and initializes
   /// the contents from the COO. This ctor performs the same heuristic
-  /// overhead-storage allocation as the ctor taking a `bool`.
+  /// overhead-storage allocation as the ctor above.
   SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes,
                       uint64_t lvlRank, const DimLevelType *lvlTypes,
                       const uint64_t *dim2lvl, const uint64_t *lvl2dim,
@@ -279,10 +288,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
   static SparseTensorStorage<P, C, V> *
   newEmpty(uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
            const uint64_t *lvlSizes, const DimLevelType *lvlTypes,
-           const uint64_t *dim2lvl, const uint64_t *lvl2dim) {
-    return new SparseTensorStorage<P, C, V>(
-        dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim, true);
-  }
+           const uint64_t *dim2lvl, const uint64_t *lvl2dim, bool forwarding);
 
   /// Allocates a new sparse tensor and initializes it from the given COO.
   /// The preconditions are as per the `SparseTensorStorageBase` ctor
@@ -303,19 +309,6 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
 
   /// Allocates a new sparse tensor and initializes it with the contents
   /// of another sparse tensor.
-  ///
-  /// Preconditions:
-  /// * as per the `SparseTensorStorageBase` ctor.
-  /// * `src2lvl` must be valid for `srcRank`, must map coordinates valid
-  ///    for `source.getDimSizes()` to coordinates valid for `lvlSizes`,
-  ///    and therefore must be the inverse of `lvl2dim`.
-  /// * `source` must have the same value type `V`.
-  ///
-  /// Asserts:
-  /// * `dimRank` and `lvlRank` are nonzero.
-  /// * `srcRank == source.getDimRank()`.
-  /// * `lvlSizes` contains only nonzero sizes.
-  /// * `source.getDimSizes()` is a refinement of `dimShape`.
   //
   // TODO: The `dimRank` and `dimShape` arguments are only used for
   // verifying that the source tensor has the expected shape.  So if we
@@ -337,10 +330,6 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
 
   /// Allocates a new sparse tensor and initialize it with the data stored level
   /// buffers directly.
-  ///
-  /// Precondition:
-  /// * as per the `SparseTensorStorageBase` ctor.
-  /// * the data integrity stored in `buffers` is guaranteed by users already.
   static SparseTensorStorage<P, C, V> *packFromLvlBuffers(
       uint64_t dimRank, const uint64_t *dimShape, uint64_t lvlRank,
       const uint64_t *lvlSizes, const DimLevelType *lvlTypes,
@@ -352,12 +341,12 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
   /// Partially specialize these getter methods based on template types.
   void getPositions(std::vector<P> **out, uint64_t lvl) final {
     assert(out && "Received nullptr for out parameter");
-    assert(lvl < getLvlRank() && "Level is out of bounds");
+    assert(lvl < getLvlRank());
     *out = &positions[lvl];
   }
   void getCoordinates(std::vector<C> **out, uint64_t lvl) final {
     assert(out && "Received nullptr for out parameter");
-    assert(lvl < getLvlRank() && "Level is out of bounds");
+    assert(lvl < getLvlRank());
     *out = &coordinates[lvl];
   }
   void getValues(std::vector<V> **out) final {
@@ -365,15 +354,23 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
     *out = &values;
   }
 
+  /// Returns coordinate at given position.
   uint64_t getCrd(uint64_t lvl, uint64_t pos) const final {
     ASSERT_COMPRESSED_OR_SINGLETON_LVL(lvl);
-    assert(pos < coordinates[lvl].size() && "Position is out of bounds");
+    assert(pos < coordinates[lvl].size());
     return coordinates[lvl][pos]; // Converts the stored `C` into `uint64_t`.
   }
 
+  /// Partially specialize forwarding insertions based on template types.
+  void forwardingInsert(const uint64_t *dimCoords, V val) final {
+    assert(dimCoords && lvlCOO);
+    map.pushforward(dimCoords, lvlCursor.data());
+    lvlCOO->add(lvlCursor, val);
+  }
+
   /// Partially specialize lexicographical insertions based on template types.
   void lexInsert(const uint64_t *lvlCoords, V val) final {
-    assert(lvlCoords && "Received nullptr for level-coordinates");
+    assert(lvlCoords);
     // TODO: get rid of this! canonicalize all-dense "sparse" array into dense
     // tensors.
     bool allDense = std::all_of(getLvlTypes().begin(), getLvlTypes().end(),
@@ -429,8 +426,22 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
     }
   }
 
+  /// Finalizes forwarding insertions.
+  void endForwardingInsert() final {
+    // Ensure lvlCOO is sorted.
+    assert(lvlCOO);
+    lvlCOO->sort();
+    // Now actually insert the `elements`.
+    const auto &elements = lvlCOO->getElements();
+    const uint64_t nse = elements.size();
+    assert(values.size() == 0);
+    values.reserve(nse);
+    fromCOO(elements, 0, nse, 0);
+    delete lvlCOO;
+  }
+
   /// Finalizes lexicographic insertions.
-  void endInsert() final {
+  void endLexInsert() final {
     if (values.empty())
       finalizeSegment(0);
     else
@@ -533,7 +544,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
   /// does not check that `pos` is semantically valid (i.e., larger than
   /// the previous position and smaller than `coordinates[lvl].capacity()`).
   void appendPos(uint64_t lvl, uint64_t pos, uint64_t count = 1) {
-    assert(isCompressedLvl(lvl) && "Level is not compressed");
+    assert(isCompressedLvl(lvl));
     positions[lvl].insert(positions[lvl].end(), count,
                           detail::checkOverflowCast<P>(pos));
   }
@@ -552,7 +563,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
     if (isCompressedDLT(dlt) || isSingletonDLT(dlt)) {
       coordinates[lvl].push_back(detail::checkOverflowCast<C>(crd));
     } else { // Dense level.
-      ASSERT_DENSE_DLT(dlt);
+      assert(isDenseDLT(dlt));
       assert(crd >= full && "Coordinate was already filled");
       if (crd == full)
         return; // Short-circuit, since it'll be a nop.
@@ -572,7 +583,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
     // Subscript assignment to `std::vector` requires that the `pos`-th
     // entry has been initialized; thus we must be sure to check `size()`
     // here, instead of `capacity()` as would be ideal.
-    assert(pos < coordinates[lvl].size() && "Position is out of bounds");
+    assert(pos < coordinates[lvl].size());
     coordinates[lvl][pos] = detail::checkOverflowCast<C>(crd);
   }
 
@@ -644,7 +655,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
     } else if (isSingletonDLT(dlt)) {
       return; // Nothing to finalize.
     } else {  // Dense dimension.
-      ASSERT_DENSE_DLT(dlt);
+      assert(isDenseDLT(dlt));
       const uint64_t sz = getLvlSizes()[l];
       assert(sz >= full && "Segment is overfull");
       count = detail::checkedMul(count, sz - full);
@@ -663,7 +674,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
   void endPath(uint64_t diffLvl) {
     const uint64_t lvlRank = getLvlRank();
     const uint64_t lastLvl = lvlRank - 1;
-    assert(diffLvl <= lvlRank && "Level-diff is out of bounds");
+    assert(diffLvl <= lvlRank);
     const uint64_t stop = lvlRank - diffLvl;
     for (uint64_t i = 0; i < stop; ++i) {
       const uint64_t l = lastLvl - i;
@@ -676,7 +687,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
   void insPath(const uint64_t *lvlCoords, uint64_t diffLvl, uint64_t full,
                V val) {
     const uint64_t lvlRank = getLvlRank();
-    assert(diffLvl <= lvlRank && "Level-diff is out of bounds");
+    assert(diffLvl <= lvlRank);
     for (uint64_t l = diffLvl; l < lvlRank; ++l) {
       const uint64_t c = lvlCoords[l];
       appendCrd(l, full, c);
@@ -716,11 +727,17 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
   std::vector<std::vector<C>> coordinates;
   std::vector<V> values;
   std::vector<uint64_t> lvlCursor; // cursor for lexicographic insertion.
+  SparseTensorCOO<V> *lvlCOO;      // COO used during forwarding
 };
 
 #undef ASSERT_COMPRESSED_OR_SINGLETON_LVL
 
 //===----------------------------------------------------------------------===//
+//
+//  SparseTensorEnumerator
+//
+//===----------------------------------------------------------------------===//
+
 /// A (higher-order) function object for enumerating the elements of some
 /// `SparseTensorStorage` under a permutation.  That is, the `forallElements`
 /// method encapsulates the loop-nest for enumerating the elements of
@@ -808,7 +825,6 @@ class SparseTensorEnumeratorBase {
   std::vector<uint64_t> trgCursor; // in target order.
 };
 
-//===----------------------------------------------------------------------===//
 template <typename P, typename C, typename V>
 class SparseTensorEnumerator final : public SparseTensorEnumeratorBase<V> {
   using Base = SparseTensorEnumeratorBase<V>;
@@ -848,8 +864,7 @@ class SparseTensorEnumerator final : public SparseTensorEnumeratorBase<V> {
     // Recover the `<P,C,V>` type parameters of `src`.
     const auto &src = static_cast<const StorageImpl &>(this->src);
     if (l == src.getLvlRank()) {
-      assert(parentPos < src.values.size() &&
-             "Value position is out of bounds");
+      assert(parentPos < src.values.size());
       // TODO: <https://github.com/llvm/llvm-project/issues/54179>
       yield(this->trgCursor, src.values[parentPos]);
       return;
@@ -860,13 +875,12 @@ class SparseTensorEnumerator final : public SparseTensorEnumeratorBase<V> {
       // Look up the bounds of the `l`-level segment determined by the
       // `(l - 1)`-level position `parentPos`.
       const std::vector<P> &positionsL = src.positions[l];
-      assert(parentPos + 1 < positionsL.size() &&
-             "Parent position is out of bounds");
+      assert(parentPos + 1 < positionsL.size());
       const uint64_t pstart = static_cast<uint64_t>(positionsL[parentPos]);
       const uint64_t pstop = static_cast<uint64_t>(positionsL[parentPos + 1]);
       // Loop-invariant code for looking up the `l`-level coordinates.
       const std::vector<C> &coordinatesL = src.coordinates[l];
-      assert(pstop <= coordinatesL.size() && "Stop position is out of bounds");
+      assert(pstop <= coordinatesL.size());
       for (uint64_t pos = pstart; pos < pstop; ++pos) {
         cursorL = static_cast<uint64_t>(coordinatesL[pos]);
         forallElements(yield, pos, l + 1);
@@ -875,7 +889,7 @@ class SparseTensorEnumerator final : public SparseTensorEnumeratorBase<V> {
       cursorL = src.getCrd(l, parentPos);
       forallElements(yield, parentPos, l + 1);
     } else { // Dense level.
-      ASSERT_DENSE_DLT(dlt);
+      assert(isDenseDLT(dlt));
       const uint64_t sz = src.getLvlSizes()[l];
       const uint64_t pstart = parentPos * sz;
       for (uint64_t c = 0; c < sz; ++c) {
@@ -887,6 +901,11 @@ class SparseTensorEnumerator final : public SparseTensorEnumeratorBase<V> {
 };
 
 //===----------------------------------------------------------------------===//
+//
+//  SparseTensorNNZ
+//
+//===----------------------------------------------------------------------===//
+
 /// Statistics regarding the number of nonzero subtensors in
 /// a source tensor, for direct sparse=>sparse conversion a la
 /// <https://arxiv.org/abs/2001.02609>.
@@ -959,7 +978,23 @@ class SparseTensorNNZ final {
 };
 
 //===----------------------------------------------------------------------===//
-// Definitions of the ctors and factories of `SparseTensorStorage<P,C,V>`.
+//
+//  SparseTensorStorage Factories
+//
+//===----------------------------------------------------------------------===//
+
+template <typename P, typename C, typename V>
+SparseTensorStorage<P, C, V> *SparseTensorStorage<P, C, V>::newEmpty(
+    uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
+    const uint64_t *lvlSizes, const DimLevelType *lvlTypes,
+    const uint64_t *dim2lvl, const uint64_t *lvl2dim, bool forwarding) {
+  SparseTensorCOO<V> *lvlCOO = nullptr;
+  if (forwarding)
+    lvlCOO = new SparseTensorCOO<V>(lvlRank, lvlSizes);
+  return new SparseTensorStorage<P, C, V>(dimRank, dimSizes, lvlRank, lvlSizes,
+                                          lvlTypes, dim2lvl, lvl2dim, lvlCOO,
+                                          !forwarding);
+}
 
 // TODO: MapRef
 template <typename P, typename C, typename V>
@@ -967,8 +1002,7 @@ SparseTensorStorage<P, C, V> *SparseTensorStorage<P, C, V>::newFromCOO(
     uint64_t dimRank, const uint64_t *dimShape, uint64_t lvlRank,
     const DimLevelType *lvlTypes, const uint64_t *dim2lvl,
     const uint64_t *lvl2dim, SparseTensorCOO<V> &lvlCOO) {
-  assert(dimShape && "Got nullptr for dimension shape");
-  assert(lvl2dim && "Got nullptr for level-to-dimension mapping");
+  assert(dimShape && dim2lvl && lvl2dim);
   const auto &lvlSizes = lvlCOO.getDimSizes();
   assert(lvlRank == lvlSizes.size() && "Level-rank mismatch");
   // Must reconstruct `dimSizes` from `lvlSizes`.  While this is easy
@@ -1026,14 +1060,21 @@ SparseTensorStorage<P, C, V> *SparseTensorStorage<P, C, V>::packFromLvlBuffers(
   return tensor;
 }
 
+//===----------------------------------------------------------------------===//
+//
+//  SparseTensorStorage Constructors
+//
+//===----------------------------------------------------------------------===//
+
 template <typename P, typename C, typename V>
 SparseTensorStorage<P, C, V>::SparseTensorStorage(
     uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
     const uint64_t *lvlSizes, const DimLevelType *lvlTypes,
-    const uint64_t *dim2lvl, const uint64_t *lvl2dim,
+    const uint64_t *dim2lvl, const uint64_t *lvl2dim, SparseTensorCOO<V> *coo,
     bool initializeValuesIfAllDense)
     : SparseTensorStorage(dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes,
                           dim2lvl, lvl2dim) {
+  lvlCOO = coo;
   // Provide hints on capacity of positions and coordinates.
   // TODO: needs much fine-tuning based on actual sparsity; currently
   // we reserve position/coordinate space based on all previous dense
@@ -1054,7 +1095,7 @@ SparseTensorStorage<P, C, V>::SparseTensorStorage(
       sz = 1;
       allDense = false;
     } else { // Dense level.
-      ASSERT_DENSE_DLT(dlt);
+      assert(isDenseDLT(dlt));
       sz = detail::checkedMul(sz, lvlSizes[l]);
     }
   }
@@ -1062,6 +1103,7 @@ SparseTensorStorage<P, C, V>::SparseTensorStorage(
     values.resize(sz, 0);
 }
 
+// TODO: share more code with forwarding methods?
 template <typename P, typename C, typename V>
 SparseTensorStorage<P, C, V>::SparseTensorStorage( // NOLINT
     uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
@@ -1069,14 +1111,14 @@ SparseTensorStorage<P, C, V>::SparseTensorStorage( // NOLINT
     const uint64_t *lvl2dim, SparseTensorCOO<V> &lvlCOO)
     : SparseTensorStorage(dimRank, dimSizes, lvlRank,
                           lvlCOO.getDimSizes().data(), lvlTypes, dim2lvl,
-                          lvl2dim, false) {
+                          lvl2dim, nullptr, false) {
+  // Ensure lvlCOO is sorted.
   assert(lvlRank == lvlCOO.getDimSizes().size() && "Level-rank mismatch");
-  // Ensure the preconditions of `fromCOO`.  (One is already ensured by
-  // using `lvlSizes = lvlCOO.getDimSizes()` in the ctor above.)
   lvlCOO.sort();
   // Now actually insert the `elements`.
   const auto &elements = lvlCOO.getElements();
   const uint64_t nse = elements.size();
+  assert(values.size() == 0);
   values.reserve(nse);
   fromCOO(elements, 0, nse, 0);
 }
@@ -1123,7 +1165,7 @@ SparseTensorStorage<P, C, V>::SparseTensorStorage(
       if (isCompressedDLT(dlt) || isSingletonDLT(dlt))
         coordinates[l].resize(parentSz, 0);
       else
-        ASSERT_DENSE_DLT(dlt); // Future-proofing.
+        assert(isDenseDLT(dlt));
     }
     values.resize(parentSz, 0); // Both allocate and zero-initialize.
   }
@@ -1137,7 +1179,7 @@ SparseTensorStorage<P, C, V>::SparseTensorStorage(
         // however, it's semantically invalid here since that entry
         // does not represent a segment of `coordinates[l]`.  Moreover, that
         // entry must be immutable for `assembledSize` to remain valid.
-        assert(parentPos < parentSz && "Parent position is out of bounds");
+        assert(parentPos < parentSz);
         const uint64_t currentPos = positions[l][parentPos];
         // This increment won't overflow the `P` type, since it can't
         // exceed the original value of `positions[l][parentPos+1]`
@@ -1150,12 +1192,12 @@ SparseTensorStorage<P, C, V>::SparseTensorStorage(
         writeCrd(l, parentPos, lvlCoords[l]);
         // the new parentPos equals the old parentPos.
       } else { // Dense level.
-        ASSERT_DENSE_DLT(dlt);
+        assert(isDenseDLT(dlt));
         parentPos = parentPos * getLvlSizes()[l] + lvlCoords[l];
       }
       parentSz = assembledSize(parentSz, l);
     }
-    assert(parentPos < values.size() && "Value position is out of bounds");
+    assert(parentPos < values.size());
     values[parentPos] = val;
   });
   // The finalizeYieldPos loop
@@ -1175,8 +1217,7 @@ SparseTensorStorage<P, C, V>::SparseTensorStorage(
     } else {
       // Both dense and singleton are no-ops for the finalizeYieldPos loop.
       // This assertion is for future-proofing.
-      assert((isDenseDLT(dlt) || isSingletonDLT(dlt)) &&
-             "Level is neither dense nor singleton");
+      assert((isDenseDLT(dlt) || isSingletonDLT(dlt)));
     }
     parentSz = assembledSize(parentSz, l);
   }
@@ -1210,7 +1251,7 @@ SparseTensorStorage<P, C, V>::SparseTensorStorage(
       positions[l].assign(posPtr, posPtr + parentSz + 1);
       coordinates[l].assign(crdPtr, crdPtr + positions[l][parentSz]);
     } else {
-      assert(isDenseLvl(l) && "Level is not dense");
+      assert(isDenseLvl(l));
     }
     parentSz = assembledSize(parentSz, l);
   }
@@ -1235,8 +1276,6 @@ SparseTensorStorage<P, C, V>::SparseTensorStorage(
   values.assign(valPtr, valPtr + parentSz);
 }
 
-#undef ASSERT_DENSE_DLT
-
 } // namespace sparse_tensor
 } // namespace mlir
 
diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h
index e723a354345849d..776f74cdc804ea0 100644
--- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h
@@ -37,7 +37,6 @@ extern "C" {
 //
 //===----------------------------------------------------------------------===//
 
-/// The @newSparseTensor function for constructing a new sparse tensor.
 /// This is the "swiss army knife" method for materializing sparse
 /// tensors into the computation.  The types of the `ptr` argument and
 /// the result depend on the action, as explained in the following table
@@ -45,14 +44,13 @@ extern "C" {
 /// a coordinate-scheme object, and "Iterator" means an iterator object).
 ///
 /// Action:         `ptr`:          Returns:
-/// kEmpty          unused          STS, empty
-/// kEmptyCOO       unused          COO, empty
-/// kFromFile       char* filename  STS, read from the file
+/// kEmpty          -               STS, empty
+/// kEmptyForward   -               STS, empty, with forwarding COO
 /// kFromCOO        COO             STS, copied from the COO source
-/// kToCOO          STS             COO, copied from the STS source
 /// kSparseToSparse STS             STS, copied from the STS source
-/// kToIterator     STS             Iterator, call @getNext to use and
-///                                 @delSparseTensorIterator to free.
+/// kToCOO          STS             COO, copied from the STS source
+/// kToIterator     STS             Iterator (@getNext/@delSparseTensorIterator)
+/// kPack           buffers         STS, from level buffers
 MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_newSparseTensor( // NOLINT
     StridedMemRefType<index_type, 1> *dimSizesRef,
     StridedMemRefType<index_type, 1> *lvlSizesRef,
@@ -84,19 +82,15 @@ MLIR_SPARSETENSOR_FOREVERY_O(DECL_SPARSEPOSITIONS)
 MLIR_SPARSETENSOR_FOREVERY_O(DECL_SPARSECOORDINATES)
 #undef DECL_SPARSECOORDINATES
 
-/// Coordinate-scheme method for adding a new element.
-/// TODO: remove dim2lvl
-#define DECL_ADDELT(VNAME, V)                                                  \
-  MLIR_CRUNNERUTILS_EXPORT void *_mlir_ciface_addElt##VNAME(                   \
-      void *lvlCOO, StridedMemRefType<V, 0> *vref,                             \
-      StridedMemRefType<index_type, 1> *dimCoordsRef,                          \
-      StridedMemRefType<index_type, 1> *dim2lvlRef);
-MLIR_SPARSETENSOR_FOREVERY_V(DECL_ADDELT)
-#undef DECL_ADDELT
+/// Tensor-storage method for a dim to lvl forwarding insertion.
+#define DECL_FORWARDINGINSERT(VNAME, V)                                        \
+  MLIR_CRUNNERUTILS_EXPORT void _mlir_ciface_forwardingInsert##VNAME(          \
+      void *tensor, StridedMemRefType<V, 0> *vref,                             \
+      StridedMemRefType<index_type, 1> *dimCoordsRef);                         \
+  MLIR_SPARSETENSOR_FOREVERY_V(DECL_FORWARDINGINSERT)
 
+#undef DECL_FORWARDINGINSERT
 /// Coordinate-scheme method for getting the next element while iterating.
-/// The `cref` argument uses the same coordinate-space as the `iter` (which
-/// can be either dim- or lvl-coords, depending on context).
 #define DECL_GETNEXT(VNAME, V)                                                 \
   MLIR_CRUNNERUTILS_EXPORT bool _mlir_ciface_getNext##VNAME(                   \
       void *iter, StridedMemRefType<index_type, 1> *cref,                      \
@@ -185,8 +179,11 @@ MLIR_CRUNNERUTILS_EXPORT index_type sparseLvlSize(void *tensor, index_type l);
 /// Tensor-storage method to get the size of the given dimension.
 MLIR_CRUNNERUTILS_EXPORT index_type sparseDimSize(void *tensor, index_type d);
 
+/// Tensor-storage method to finalize forwarding insertions.
+MLIR_CRUNNERUTILS_EXPORT void endForwardingInsert(void *tensor);
+
 /// Tensor-storage method to finalize lexicographic insertions.
-MLIR_CRUNNERUTILS_EXPORT void endInsert(void *tensor);
+MLIR_CRUNNERUTILS_EXPORT void endLexInsert(void *tensor);
 
 /// Coordinate-scheme method to write to file in extended FROSTT format.
 #define DECL_OUTSPARSETENSOR(VNAME, V)                                         \
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
index 4c2d6be29c02f43..88559ddaf0a4a29 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
@@ -241,16 +241,6 @@ class NewCallParams final {
     return true;
   }
 
-  /// Gets the dimension-to-level mapping.
-  //
-  // TODO: This is only ever used for passing into `genAddEltCall`;
-  // is there a better way to encapsulate that pattern (both to avoid
-  // this one-off getter, and to avoid potential mixups)?
-  Value getDimToLvl() const {
-    assert(isInitialized() && "Must initialize before getDimToLvl");
-    return params[kParamDim2Lvl];
-  }
-
   /// Generates a function call, with the current static parameters
   /// and the given dynamic arguments.
   Value genNewCall(Action action, Value ptr = Value()) {
@@ -299,6 +289,74 @@ static void genDelCOOCall(OpBuilder &builder, Location loc, Type elemTp,
   createFuncCall(builder, loc, name, {}, coo, EmitCInterface::Off);
 }
 
+<<<<<<< HEAD
+=======
+/// Generates a call to release/delete a `SparseTensorIterator`.
+static void genDelIteratorCall(OpBuilder &builder, Location loc, Type elemTp,
+                               Value iter) {
+  SmallString<26> name{"delSparseTensorIterator",
+                       primaryTypeFunctionSuffix(elemTp)};
+  createFuncCall(builder, loc, name, {}, iter, EmitCInterface::Off);
+}
+
+/// Generates a call with a forwarding insertion.
+static void genForwardingInsert(OpBuilder &builder, Location loc, Type eltType,
+                                Value tensor, Value valPtr, Value dimCoords) {
+  SmallString<9> name{"forwardingInsert", primaryTypeFunctionSuffix(eltType)};
+  SmallVector<Value, 4> params{tensor, valPtr, dimCoords};
+  Type pTp = getOpaquePointerType(builder);
+  createFuncCall(builder, loc, name, pTp, params, EmitCInterface::On);
+}
+
+/// Generates a call to `iter->getNext()`.  If there is a next element,
+/// then it is copied into the out-parameters `coords` and `elemPtr`,
+/// and the return value is true.  If there isn't a next element, then
+/// the return value is false.
+///
+/// The `coords` argument uses the same coordinate-space as the `iter`
+/// (which can be either dim- or lvl-coords, depending on context).
+static Value genGetNextCall(OpBuilder &builder, Location loc, Value iter,
+                            Value coords, Value elemPtr) {
+  Type elemTp = cast<ShapedType>(elemPtr.getType()).getElementType();
+  SmallString<10> name{"getNext", primaryTypeFunctionSuffix(elemTp)};
+  SmallVector<Value, 3> params{iter, coords, elemPtr};
+  Type i1 = builder.getI1Type();
+  return createFuncCall(builder, loc, name, i1, params, EmitCInterface::On)
+      .getResult(0);
+}
+
+/// Loads the value stored in `elemPtr`, and stores it at the coordinates
+/// `cvs` into a dense tensor created by `allocDenseTensor`.
+static void insertScalarIntoDenseTensor(OpBuilder &builder, Location loc,
+                                        Value elemPtr, Value tensor,
+                                        ValueRange cvs) {
+  Value elemV = builder.create<memref::LoadOp>(loc, elemPtr);
+  builder.create<memref::StoreOp>(loc, elemV, tensor, cvs);
+}
+
+/// Determine if the runtime library supports direct conversion to the
+/// given target `dimTypes`.
+static bool canUseDirectConversion(ArrayRef<DimLevelType> dimTypes) {
+  bool alreadyCompressed = false;
+  for (const auto dlt : dimTypes) {
+    if (isCompressedDLT(dlt)) {
+      if (alreadyCompressed)
+        return false; // Multiple compressed dimensions not yet supported.
+      alreadyCompressed = true;
+    } else if (isDenseDLT(dlt)) {
+      if (alreadyCompressed)
+        return false; // Dense after Compressed not yet supported.
+    } else if (isSingletonDLT(dlt)) {
+      // Direct conversion doesn't have any particular problems with
+      // singleton after compressed.
+    } else { // TODO: investigate
+      return false;
+    }
+  }
+  return true;
+}
+
+>>>>>>> 3d8f29859dbb ([mlir][sparse] refactor dense2sparse and const2sparse conversion)
 //===----------------------------------------------------------------------===//
 // Conversion rules.
 //===----------------------------------------------------------------------===//
@@ -482,6 +540,7 @@ class SparseTensorReorderCOOConverter
     const auto srcTp = getSparseTensorType(op.getInputCoo());
     const auto dstTp = getSparseTensorType(op);
 
+<<<<<<< HEAD
     const Value src = adaptor.getInputCoo();
 
     NewCallParams params(rewriter, loc);
@@ -489,6 +548,142 @@ class SparseTensorReorderCOOConverter
     rewriter.replaceOp(op, params.genBuffers(dstTp, dimSizes)
                                .genNewCall(Action::kSortCOOInPlace, src));
 
+=======
+    const Dimension dimRank = srcTp.getDimRank();
+    const Type elemTp = srcTp.getElementType();
+    const Value src = adaptor.getOperands()[0];
+    if (srcTp.hasEncoding() && dstTp.hasEncoding()) {
+      const auto srcEnc = srcTp.getEncoding();
+      const auto dstEnc = dstTp.getEncoding();
+      // This is a sparse => sparse conversion, which is handled as follows:
+      //   t = src->toCOO();         ; src to COO in dst order
+      //   dst = newSparseTensor(t)
+      // Using the coordinate scheme as an intermediate does not always
+      // yield the fastest conversion but avoids the need for a full
+      // O(N^2) conversion matrix.
+      if (dstEnc == srcEnc) {
+        rewriter.replaceOp(op, adaptor.getOperands()); // hidden nop cast
+        return success();
+      }
+      NewCallParams params(rewriter, loc);
+      SmallVector<Value> dimSizes = getDimSizes(rewriter, loc, srcTp, src);
+      bool useDirectConversion;
+      switch (options.sparseToSparseStrategy) {
+      case SparseToSparseConversionStrategy::kViaCOO:
+        useDirectConversion = false;
+        break;
+      case SparseToSparseConversionStrategy::kDirect:
+        useDirectConversion = true;
+        assert(canUseDirectConversion(dstEnc.getLvlTypes()) &&
+               "Unsupported target for direct sparse-to-sparse conversion");
+        break;
+      case SparseToSparseConversionStrategy::kAuto:
+        useDirectConversion = canUseDirectConversion(dstEnc.getLvlTypes());
+        break;
+      }
+      if (useDirectConversion) {
+        rewriter.replaceOp(
+            op, params.genBuffers(srcTp.withEncoding(dstEnc), dimSizes)
+                    .genNewCall(Action::kSparseToSparse, src));
+      } else { // use via-COO conversion.
+        // Set up encoding with right mix of src and dst so that the two
+        // method calls can share most parameters, while still providing
+        // the correct sparsity information to either of them.
+        const auto mixedEnc =
+            dstEnc.withBitWidths(srcEnc.getPosWidth(), srcEnc.getCrdWidth());
+        // TODO: This is the only place where `kToCOO` (or `kToIterator`)
+        // is called with a non-identity permutation.  Is there any clean
+        // way to push the permutation over to the `kFromCOO` side instead?
+        Value coo = params.genBuffers(srcTp.withEncoding(mixedEnc), dimSizes)
+                        .genNewCall(Action::kToCOO, src);
+        Value dst = params.setTemplateTypes(srcTp.withEncoding(dstEnc))
+                        .genNewCall(Action::kFromCOO, coo);
+        genDelCOOCall(rewriter, loc, elemTp, coo);
+        rewriter.replaceOp(op, dst);
+      }
+      return success();
+    }
+    if (srcTp.hasEncoding() && !dstTp.hasEncoding()) {
+      const auto srcEnc = srcTp.getEncoding();
+      // This is sparse => dense conversion, which is handled as follows:
+      //   dst = new Tensor(0);
+      //   iter = new SparseTensorIterator(src);
+      //   while (elem = iter->getNext()) {
+      //     dst[elem.coords] = elem.value;
+      //   }
+      //   delete iter;
+      //
+      // Fabricate a no-permutation encoding for NewCallParams
+      // The position/coordinate types must be those of `src`.
+      // The dimLevelTypes aren't actually used by Action::kToIterator.
+      const auto dstEnc = SparseTensorEncodingAttr::get(
+          op->getContext(),
+          SmallVector<DimLevelType>(dimRank, DimLevelType::Dense), AffineMap(),
+          AffineMap(), srcEnc.getPosWidth(), srcEnc.getCrdWidth());
+      SmallVector<Value> dimSizes = getDimSizes(rewriter, loc, srcTp, src);
+      Value iter = NewCallParams(rewriter, loc)
+                       .genBuffers(dstTp.withEncoding(dstEnc), dimSizes)
+                       .genNewCall(Action::kToIterator, src);
+      const Type iTp = rewriter.getIndexType();
+      Value dimCoords = genAlloca(rewriter, loc, dimRank, iTp);
+      Value elemPtr = genAllocaScalar(rewriter, loc, elemTp);
+      // TODO: Dense buffers should be allocated/deallocated via the callback
+      // in BufferizationOptions.
+      Value dst = allocDenseTensor(rewriter, loc, dstTp, dimSizes);
+      const SmallVector<Value> noArgs;
+      const SmallVector<Type> noTypes;
+      auto whileOp = rewriter.create<scf::WhileOp>(loc, noTypes, noArgs);
+      Block *before = rewriter.createBlock(&whileOp.getBefore(), {}, noTypes);
+      rewriter.setInsertionPointToEnd(before);
+      Value cond = genGetNextCall(rewriter, loc, iter, dimCoords, elemPtr);
+      rewriter.create<scf::ConditionOp>(loc, cond, before->getArguments());
+      Block *after = rewriter.createBlock(&whileOp.getAfter(), {}, noTypes);
+      rewriter.setInsertionPointToStart(after);
+      const auto dcvs = loadAll(rewriter, loc, dimRank, dimCoords);
+      insertScalarIntoDenseTensor(rewriter, loc, elemPtr, dst, dcvs);
+      rewriter.create<scf::YieldOp>(loc);
+      rewriter.setInsertionPointAfter(whileOp);
+      genDelIteratorCall(rewriter, loc, elemTp, iter);
+      rewriter.replaceOpWithNewOp<bufferization::ToTensorOp>(
+          op, dstTp.getRankedTensorType(), dst);
+      return success();
+    }
+    assert(!srcTp.hasEncoding() && dstTp.hasEncoding());
+    // This is a "dense => sparse conversion" or a "sparse constant => sparse
+    // conversion" which is conceptually handled as follows, with an additional
+    // test for nonzero values for the dense case.
+    //
+    //   st = newSparseTensor()                  ; ST with forwarding COO
+    //   for i1 in dim1, ..., ik in dimk         ; loop nest or range(NNZ) loop
+    //     val = a[i1,..,ik]                     ;
+    //     st->insertForwarding(val, [i1,..,ik]) ; maps dim to level
+    //   st->endForwardingInsert()               ; finalize forwarding
+    //
+    // Note that the traversal code is actually implemented using MLIR IR to
+    // avoid having to expose too much low-level memref traversal details to
+    // the runtime support library.
+    SmallVector<Value> dimSizes;
+    sizesFromSrc(rewriter, dimSizes, loc, src);
+    NewCallParams params(rewriter, loc);
+    Value tensor =
+        params.genBuffers(dstTp, dimSizes).genNewCall(Action::kEmptyForward);
+    const Type iTp = rewriter.getIndexType();
+    Value dimCoords = genAlloca(rewriter, loc, dimRank, iTp);
+    Value elemPtr = genAllocaScalar(rewriter, loc, elemTp);
+    genDenseTensorOrSparseConstantIterLoop(
+        rewriter, loc, src, dimRank,
+        [&](OpBuilder &builder, Location loc, Value val, ValueRange dcvs) {
+          assert(dcvs.size() == static_cast<size_t>(dimRank));
+          storeAll(builder, loc, dimCoords, dcvs);
+          builder.create<memref::StoreOp>(loc, val, elemPtr);
+          genForwardingInsert(builder, loc, elemTp, tensor, elemPtr, dimCoords);
+        });
+    // Final call to construct sparse tensor storage.
+    StringRef name = "endForwardingInsert";
+    createFuncCall(rewriter, op->getLoc(), name, {}, tensor,
+                   EmitCInterface::Off);
+    rewriter.replaceOp(op, tensor);
+>>>>>>> 3d8f29859dbb ([mlir][sparse] refactor dense2sparse and const2sparse conversion)
     return success();
   }
 };
@@ -606,7 +801,7 @@ class SparseTensorLoadConverter : public OpConversionPattern<LoadOp> {
                   ConversionPatternRewriter &rewriter) const override {
     if (op.getHasInserts()) {
       // Finalize any pending insertions.
-      StringRef name = "endInsert";
+      StringRef name = "endLexInsert";
       createFuncCall(rewriter, op->getLoc(), name, {}, adaptor.getOperands(),
                      EmitCInterface::Off);
     }
diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp
index 83ceecaf5a30ee1..ac2e318719c0286 100644
--- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp
@@ -177,9 +177,16 @@ extern "C" {
 #define CASE(p, c, v, P, C, V)                                                 \
   if (posTp == (p) && crdTp == (c) && valTp == (v)) {                          \
     switch (action) {                                                          \
-    case Action::kEmpty:                                                       \
+    case Action::kEmpty: {                                                     \
       return SparseTensorStorage<P, C, V>::newEmpty(                           \
-          dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim);   \
+          dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim,    \
+          false);                                                              \
+    }                                                                          \
+    case Action::kEmptyForward: {                                              \
+      return SparseTensorStorage<P, C, V>::newEmpty(                           \
+          dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim,    \
+          true);                                                               \
+    }                                                                          \
     case Action::kFromCOO: {                                                   \
       assert(ptr && "Received nullptr for SparseTensorCOO object");            \
       auto &coo = *static_cast<SparseTensorCOO<V> *>(ptr);                     \
@@ -193,8 +200,9 @@ extern "C" {
           dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim,    \
           dimRank, tensor);                                                    \
     }                                                                          \
-    case Action::kEmptyCOO:                                                    \
-      return new SparseTensorCOO<V>(lvlRank, lvlSizes);                        \
+    case Action::kFuture: {                                                    \
+      break;                                                                   \
+    }                                                                          \
     case Action::kToCOO: {                                                     \
       assert(ptr && "Received nullptr for SparseTensorStorage object");        \
       auto &tensor = *static_cast<SparseTensorStorage<P, C, V> *>(ptr);        \
@@ -405,29 +413,20 @@ MLIR_SPARSETENSOR_FOREVERY_O(IMPL_SPARSECOORDINATES)
 #undef IMPL_SPARSECOORDINATES
 #undef IMPL_GETOVERHEAD
 
-// TODO: use MapRef here for translation of coordinates
-// TODO: remove dim2lvl
-#define IMPL_ADDELT(VNAME, V)                                                  \
-  void *_mlir_ciface_addElt##VNAME(                                            \
-      void *lvlCOO, StridedMemRefType<V, 0> *vref,                             \
-      StridedMemRefType<index_type, 1> *dimCoordsRef,                          \
-      StridedMemRefType<index_type, 1> *dim2lvlRef) {                          \
-    assert(lvlCOO &&vref);                                                     \
+#define IMPL_FORWARDINGINSERT(VNAME, V)                                        \
+  void _mlir_ciface_forwardingInsert##VNAME(                                   \
+      void *t, StridedMemRefType<V, 0> *vref,                                  \
+      StridedMemRefType<index_type, 1> *dimCoordsRef) {                        \
+    assert(t &&vref);                                                          \
+    auto &tensor = *static_cast<SparseTensorStorageBase *>(t);                 \
     ASSERT_NO_STRIDE(dimCoordsRef);                                            \
-    ASSERT_NO_STRIDE(dim2lvlRef);                                              \
-    const uint64_t rank = MEMREF_GET_USIZE(dimCoordsRef);                      \
-    ASSERT_USIZE_EQ(dim2lvlRef, rank);                                         \
     const index_type *dimCoords = MEMREF_GET_PAYLOAD(dimCoordsRef);            \
-    const index_type *dim2lvl = MEMREF_GET_PAYLOAD(dim2lvlRef);                \
-    std::vector<index_type> lvlCoords(rank);                                   \
-    for (uint64_t d = 0; d < rank; ++d)                                        \
-      lvlCoords[dim2lvl[d]] = dimCoords[d];                                    \
-    V *value = MEMREF_GET_PAYLOAD(vref);                                       \
-    static_cast<SparseTensorCOO<V> *>(lvlCOO)->add(lvlCoords, *value);         \
-    return lvlCOO;                                                             \
+    assert(dimCoords);                                                         \
+    const V *value = MEMREF_GET_PAYLOAD(vref);                                 \
+    tensor.forwardingInsert(dimCoords, *value);                                \
   }
-MLIR_SPARSETENSOR_FOREVERY_V(IMPL_ADDELT)
-#undef IMPL_ADDELT
+MLIR_SPARSETENSOR_FOREVERY_V(IMPL_FORWARDINGINSERT)
+#undef IMPL_FORWARDINGINSERT
 
 // NOTE: the `cref` argument uses the same coordinate-space as the `iter`
 // (which can be either dim- or lvl-coords, depending on context).
@@ -692,8 +691,12 @@ index_type sparseDimSize(void *tensor, index_type d) {
   return static_cast<SparseTensorStorageBase *>(tensor)->getDimSize(d);
 }
 
-void endInsert(void *tensor) {
-  return static_cast<SparseTensorStorageBase *>(tensor)->endInsert();
+void endForwardingInsert(void *tensor) {
+  return static_cast<SparseTensorStorageBase *>(tensor)->endForwardingInsert();
+}
+
+void endLexInsert(void *tensor) {
+  return static_cast<SparseTensorStorageBase *>(tensor)->endLexInsert();
 }
 
 #define IMPL_OUTSPARSETENSOR(VNAME, V)                                         \
diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir
index 29093a055ab2e04..96300a98a6a4bc5 100644
--- a/mlir/test/Dialect/SparseTensor/conversion.mlir
+++ b/mlir/test/Dialect/SparseTensor/conversion.mlir
@@ -296,7 +296,7 @@ func.func @sparse_reconstruct(%arg0: tensor<128xf32, #SparseVector>) -> tensor<1
 
 // CHECK-LABEL: func @sparse_reconstruct_ins(
 //  CHECK-SAME: %[[A:.*]]: !llvm.ptr<i8>
-//       CHECK: call @endInsert(%[[A]]) : (!llvm.ptr<i8>) -> ()
+//       CHECK: call @endLexInsert(%[[A]]) : (!llvm.ptr<i8>) -> ()
 //       CHECK: return %[[A]] : !llvm.ptr<i8>
 func.func @sparse_reconstruct_ins(%arg0: tensor<128xf32, #SparseVector>) -> tensor<128xf32, #SparseVector> {
   %0 = sparse_tensor.load %arg0 hasInserts : tensor<128xf32, #SparseVector>
diff --git a/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir b/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir
index 4dba16df39f5c65..aaa2c6f2a91106c 100644
--- a/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir
+++ b/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir
@@ -16,45 +16,215 @@
   map = (d0, d1, d2) -> (d2 : dense, d0 : compressed, d1 : compressed)
 }>
 
+<<<<<<< HEAD
 // CHECK-LABEL:   func.func @sparse_convert_1d
 // CHECK:           sparse_tensor.foreach
 // CHECK:            scf.if
 // CHECK:              sparse_tensor.insert
 // CHECK-NOT:       sparse_tensor.reorder_coo
 // CHECK:           sparse_tensor.load
+=======
+// CHECK-LABEL:   func.func @sparse_convert_1d(
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?xi32>) -> !llvm.ptr<i8> {
+// CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 1 : i32
+// CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 6 : i32
+// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : i32
+// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
+// CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 8 : i8
+// CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_6]] : tensor<?xi32>
+// CHECK:           %[[VAL_8:.*]] = memref.alloca() : memref<1xi8>
+// CHECK:           %[[VAL_9:.*]] = memref.cast %[[VAL_8]] : memref<1xi8> to memref<?xi8>
+// CHECK:           memref.store %[[VAL_5]], %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref<1xi8>
+// CHECK:           %[[VAL_10:.*]] = memref.alloca() : memref<1xindex>
+// CHECK:           %[[VAL_11:.*]] = memref.cast %[[VAL_10]] : memref<1xindex> to memref<?xindex>
+// CHECK:           memref.store %[[VAL_7]], %[[VAL_10]]{{\[}}%[[VAL_6]]] : memref<1xindex>
+// CHECK:           %[[VAL_12:.*]] = memref.alloca() : memref<1xindex>
+// CHECK:           %[[VAL_13:.*]] = memref.cast %[[VAL_12]] : memref<1xindex> to memref<?xindex>
+// CHECK:           memref.store %[[VAL_6]], %[[VAL_12]]{{\[}}%[[VAL_6]]] : memref<1xindex>
+// CHECK:           %[[VAL_14:.*]] = llvm.mlir.zero : !llvm.ptr<i8>
+// CHECK:           %[[VAL_15:.*]] = call @newSparseTensor(%[[VAL_11]], %[[VAL_11]], %[[VAL_9]], %[[VAL_13]], %[[VAL_13]], %[[VAL_3]], %[[VAL_3]], %[[VAL_2]], %[[VAL_1]], %[[VAL_14]]) : (memref<?xindex>, memref<?xindex>, memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+// CHECK:           %[[VAL_16:.*]] = memref.alloca() : memref<1xindex>
+// CHECK:           %[[VAL_17:.*]] = memref.cast %[[VAL_16]] : memref<1xindex> to memref<?xindex>
+// CHECK:           %[[VAL_18:.*]] = memref.alloca() : memref<i32>
+// CHECK:           scf.for %[[VAL_19:.*]] = %[[VAL_6]] to %[[VAL_7]] step %[[VAL_4]] {
+// CHECK:             %[[VAL_20:.*]] = tensor.extract %[[VAL_0]]{{\[}}%[[VAL_19]]] : tensor<?xi32>
+// CHECK:             %[[VAL_21:.*]] = arith.cmpi ne, %[[VAL_20]], %[[VAL_3]] : i32
+// CHECK:             scf.if %[[VAL_21]] {
+// CHECK:               memref.store %[[VAL_19]], %[[VAL_16]]{{\[}}%[[VAL_6]]] : memref<1xindex>
+// CHECK:               memref.store %[[VAL_20]], %[[VAL_18]][] : memref<i32>
+// CHECK:               %[[VAL_22:.*]] = func.call @forwardingInsertI32(%[[VAL_15]], %[[VAL_18]], %[[VAL_17]]) : (!llvm.ptr<i8>, memref<i32>, memref<?xindex>) -> !llvm.ptr<i8>
+// CHECK:             }
+// CHECK:           }
+// CHECK:           call @endForwardingInsert(%[[VAL_15]]) : (!llvm.ptr<i8>) -> ()
+// CHECK:           return %[[VAL_15]] : !llvm.ptr<i8>
+// CHECK:         }
+>>>>>>> 3d8f29859dbb ([mlir][sparse] refactor dense2sparse and const2sparse conversion)
 func.func @sparse_convert_1d(%arg0: tensor<?xi32>) -> tensor<?xi32, #SparseVector> {
   %0 = sparse_tensor.convert %arg0 : tensor<?xi32> to tensor<?xi32, #SparseVector>
   return %0 : tensor<?xi32, #SparseVector>
 }
 
+<<<<<<< HEAD
 // CHECK-LABEL:   func.func @sparse_convert_complex
 // CHECK:           sparse_tensor.foreach
 // CHECK:            scf.if
 // CHECK:              sparse_tensor.insert
 // CHECK-NOT:       sparse_tensor.reorder_coo
 // CHECK:           sparse_tensor.load
+=======
+// CHECK-LABEL:   func.func @sparse_convert_complex(
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<100xcomplex<f64>>) -> !llvm.ptr<i8> {
+// CHECK-DAG:       %[[VAL_1:.*]] = complex.constant [0.000000e+00, 0.000000e+00] : complex<f64>
+// CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1 : i32
+// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 9 : i32
+// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : i32
+// CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
+// CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 100 : index
+// CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 8 : i8
+// CHECK-DAG:       %[[VAL_8:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_9:.*]] = memref.alloca() : memref<1xi8>
+// CHECK:           %[[VAL_10:.*]] = memref.cast %[[VAL_9]] : memref<1xi8> to memref<?xi8>
+// CHECK:           memref.store %[[VAL_7]], %[[VAL_9]]{{\[}}%[[VAL_5]]] : memref<1xi8>
+// CHECK:           %[[VAL_11:.*]] = memref.alloca() : memref<1xindex>
+// CHECK:           %[[VAL_12:.*]] = memref.cast %[[VAL_11]] : memref<1xindex> to memref<?xindex>
+// CHECK:           memref.store %[[VAL_6]], %[[VAL_11]]{{\[}}%[[VAL_5]]] : memref<1xindex>
+// CHECK:           %[[VAL_13:.*]] = memref.alloca() : memref<1xindex>
+// CHECK:           %[[VAL_14:.*]] = memref.cast %[[VAL_13]] : memref<1xindex> to memref<?xindex>
+// CHECK:           memref.store %[[VAL_5]], %[[VAL_13]]{{\[}}%[[VAL_5]]] : memref<1xindex>
+// CHECK:           %[[VAL_15:.*]] = llvm.mlir.zero : !llvm.ptr<i8>
+// CHECK:           %[[VAL_16:.*]] = call @newSparseTensor(%[[VAL_12]], %[[VAL_12]], %[[VAL_10]], %[[VAL_14]], %[[VAL_14]], %[[VAL_4]], %[[VAL_4]], %[[VAL_3]], %[[VAL_2]], %[[VAL_15]]) : (memref<?xindex>, memref<?xindex>, memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+// CHECK:           %[[VAL_17:.*]] = memref.alloca() : memref<1xindex>
+// CHECK:           %[[VAL_18:.*]] = memref.cast %[[VAL_17]] : memref<1xindex> to memref<?xindex>
+// CHECK:           %[[VAL_19:.*]] = memref.alloca() : memref<complex<f64>>
+// CHECK:           scf.for %[[VAL_20:.*]] = %[[VAL_5]] to %[[VAL_6]] step %[[VAL_8]] {
+// CHECK:             %[[VAL_21:.*]] = tensor.extract %[[VAL_0]]{{\[}}%[[VAL_20]]] : tensor<100xcomplex<f64>>
+// CHECK:             %[[VAL_22:.*]] = complex.neq %[[VAL_21]], %[[VAL_1]] : complex<f64>
+// CHECK:             scf.if %[[VAL_22]] {
+// CHECK:               memref.store %[[VAL_20]], %[[VAL_17]]{{\[}}%[[VAL_5]]] : memref<1xindex>
+// CHECK:               memref.store %[[VAL_21]], %[[VAL_19]][] : memref<complex<f64>>
+// CHECK:               %[[VAL_23:.*]] = func.call @forwardingInsertC64(%[[VAL_16]], %[[VAL_19]], %[[VAL_18]]) : (!llvm.ptr<i8>, memref<complex<f64>>, memref<?xindex>) -> !llvm.ptr<i8>
+// CHECK:             }
+// CHECK:           }
+// CHECK:           call @endForwardingInsert(%[[VAL_16]]) : (!llvm.ptr<i8>) -> ()
+// CHECK:           return %[[VAL_16]] : !llvm.ptr<i8>
+// CHECK:         }
+>>>>>>> 3d8f29859dbb ([mlir][sparse] refactor dense2sparse and const2sparse conversion)
 func.func @sparse_convert_complex(%arg0: tensor<100xcomplex<f64>>) -> tensor<100xcomplex<f64>, #SparseVector> {
   %0 = sparse_tensor.convert %arg0 : tensor<100xcomplex<f64>> to tensor<100xcomplex<f64>, #SparseVector>
   return %0 : tensor<100xcomplex<f64>, #SparseVector>
 }
 
+<<<<<<< HEAD
 // CHECK-LABEL:   func.func @sparse_convert_2d
 // CHECK:           sparse_tensor.foreach
 // CHECK:            scf.if
 // CHECK:              sparse_tensor.insert
 // CHECK-NOT:       sparse_tensor.reorder_coo
 // CHECK:           sparse_tensor.load
+=======
+// CHECK-LABEL:   func.func @sparse_convert_2d(
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<2x4xf64>) -> !llvm.ptr<i8> {
+// CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 0.000000e+00 : f64
+// CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1 : i32
+// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : i32
+// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
+// CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 2 : index
+// CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
+// CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 4 : index
+// CHECK-DAG:       %[[VAL_8:.*]] = arith.constant 4 : i8
+// CHECK-DAG:       %[[VAL_9:.*]] = arith.constant 8 : i8
+// CHECK:           %[[VAL_10:.*]] = memref.alloca() : memref<2xi8>
+// CHECK:           %[[VAL_11:.*]] = memref.cast %[[VAL_10]] : memref<2xi8> to memref<?xi8>
+// CHECK:           memref.store %[[VAL_8]], %[[VAL_10]]{{\[}}%[[VAL_4]]] : memref<2xi8>
+// CHECK:           memref.store %[[VAL_9]], %[[VAL_10]]{{\[}}%[[VAL_6]]] : memref<2xi8>
+// CHECK:           %[[VAL_12:.*]] = memref.alloca() : memref<2xindex>
+// CHECK:           %[[VAL_13:.*]] = memref.cast %[[VAL_12]] : memref<2xindex> to memref<?xindex>
+// CHECK:           memref.store %[[VAL_5]], %[[VAL_12]]{{\[}}%[[VAL_4]]] : memref<2xindex>
+// CHECK:           memref.store %[[VAL_7]], %[[VAL_12]]{{\[}}%[[VAL_6]]] : memref<2xindex>
+// CHECK:           %[[VAL_14:.*]] = memref.alloca() : memref<2xindex>
+// CHECK:           %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref<?xindex>
+// CHECK:           memref.store %[[VAL_4]], %[[VAL_14]]{{\[}}%[[VAL_4]]] : memref<2xindex>
+// CHECK:           memref.store %[[VAL_6]], %[[VAL_14]]{{\[}}%[[VAL_6]]] : memref<2xindex>
+// CHECK:           %[[VAL_16:.*]] = llvm.mlir.zero : !llvm.ptr<i8>
+// CHECK:           %[[VAL_17:.*]] = call @newSparseTensor(%[[VAL_13]], %[[VAL_13]], %[[VAL_11]], %[[VAL_15]], %[[VAL_15]], %[[VAL_3]], %[[VAL_3]], %[[VAL_2]], %[[VAL_2]], %[[VAL_16]]) : (memref<?xindex>, memref<?xindex>, memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+// CHECK:           %[[VAL_18:.*]] = memref.alloca() : memref<2xindex>
+// CHECK:           %[[VAL_19:.*]] = memref.cast %[[VAL_18]] : memref<2xindex> to memref<?xindex>
+// CHECK:           %[[VAL_20:.*]] = memref.alloca() : memref<f64>
+// CHECK:           scf.for %[[VAL_21:.*]] = %[[VAL_4]] to %[[VAL_5]] step %[[VAL_6]] {
+// CHECK:             scf.for %[[VAL_22:.*]] = %[[VAL_4]] to %[[VAL_7]] step %[[VAL_6]] {
+// CHECK:               %[[VAL_23:.*]] = tensor.extract %[[VAL_0]]{{\[}}%[[VAL_21]], %[[VAL_22]]] : tensor<2x4xf64>
+// CHECK:               %[[VAL_24:.*]] = arith.cmpf une, %[[VAL_23]], %[[VAL_1]] : f64
+// CHECK:               scf.if %[[VAL_24]] {
+// CHECK:                 memref.store %[[VAL_21]], %[[VAL_18]]{{\[}}%[[VAL_4]]] : memref<2xindex>
+// CHECK:                 memref.store %[[VAL_22]], %[[VAL_18]]{{\[}}%[[VAL_6]]] : memref<2xindex>
+// CHECK:                 memref.store %[[VAL_23]], %[[VAL_20]][] : memref<f64>
+// CHECK:                 %[[VAL_25:.*]] = func.call @forwardingInsertF64(%[[VAL_17]], %[[VAL_20]], %[[VAL_19]]) : (!llvm.ptr<i8>, memref<f64>, memref<?xindex>) -> !llvm.ptr<i8>
+// CHECK:               }
+// CHECK:             }
+// CHECK:           }
+// CHECK:           call @endForwardingInsert(%[[VAL_17]]) : (!llvm.ptr<i8>) -> ()
+// CHECK:           return %[[VAL_17]] : !llvm.ptr<i8>
+// CHECK:         }
+>>>>>>> 3d8f29859dbb ([mlir][sparse] refactor dense2sparse and const2sparse conversion)
 func.func @sparse_convert_2d(%arg0: tensor<2x4xf64>) -> tensor<2x4xf64, #CSR> {
   %0 = sparse_tensor.convert %arg0 : tensor<2x4xf64> to tensor<2x4xf64, #CSR>
   return %0 : tensor<2x4xf64, #CSR>
 }
 
+<<<<<<< HEAD
 // CHECK-LABEL:   func.func @sparse_constant
 // CHECK:           sparse_tensor.foreach
 // CHECK-NOT:         scf.if
 // CHECK:               sparse_tensor.insert
 // CHECK-NOT:       sparse_tensor.reorder_coo
 // CHECK:           sparse_tensor.load
+=======
+// CHECK-LABEL:   func.func @sparse_constant() -> !llvm.ptr<i8> {
+// CHECK-DAG:       %[[VAL_0:.*]] = arith.constant dense<[1.000000e+00, 5.000000e+00]> : tensor<2xf32>
+// CHECK-DAG:       %[[VAL_1:.*]] = arith.constant dense<{{\[\[}}0, 0], [1, 6]]> : tensor<2x2xi64>
+// CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1 : i32
+// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 2 : i32
+// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : i32
+// CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
+// CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 8 : index
+// CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
+// CHECK-DAG:       %[[VAL_8:.*]] = arith.constant 7 : index
+// CHECK-DAG:       %[[VAL_9:.*]] = arith.constant 4 : i8
+// CHECK-DAG:       %[[VAL_10:.*]] = arith.constant 8 : i8
+// CHECK-DAG:       %[[VAL_11:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_12:.*]] = memref.alloca() : memref<2xi8>
+// CHECK:           %[[VAL_13:.*]] = memref.cast %[[VAL_12]] : memref<2xi8> to memref<?xi8>
+// CHECK:           memref.store %[[VAL_9]], %[[VAL_12]]{{\[}}%[[VAL_5]]] : memref<2xi8>
+// CHECK:           memref.store %[[VAL_10]], %[[VAL_12]]{{\[}}%[[VAL_7]]] : memref<2xi8>
+// CHECK:           %[[VAL_14:.*]] = memref.alloca() : memref<2xindex>
+// CHECK:           %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref<?xindex>
+// CHECK:           memref.store %[[VAL_6]], %[[VAL_14]]{{\[}}%[[VAL_5]]] : memref<2xindex>
+// CHECK:           memref.store %[[VAL_8]], %[[VAL_14]]{{\[}}%[[VAL_7]]] : memref<2xindex>
+// CHECK:           %[[VAL_16:.*]] = memref.alloca() : memref<2xindex>
+// CHECK:           %[[VAL_17:.*]] = memref.cast %[[VAL_16]] : memref<2xindex> to memref<?xindex>
+// CHECK:           memref.store %[[VAL_5]], %[[VAL_16]]{{\[}}%[[VAL_5]]] : memref<2xindex>
+// CHECK:           memref.store %[[VAL_7]], %[[VAL_16]]{{\[}}%[[VAL_7]]] : memref<2xindex>
+// CHECK:           %[[VAL_18:.*]] = llvm.mlir.zero : !llvm.ptr<i8>
+// CHECK:           %[[VAL_19:.*]] = call @newSparseTensor(%[[VAL_15]], %[[VAL_15]], %[[VAL_13]], %[[VAL_17]], %[[VAL_17]], %[[VAL_4]], %[[VAL_4]], %[[VAL_3]], %[[VAL_2]], %[[VAL_18]]) : (memref<?xindex>, memref<?xindex>, memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+// CHECK:           %[[VAL_20:.*]] = memref.alloca() : memref<2xindex>
+// CHECK:           %[[VAL_21:.*]] = memref.cast %[[VAL_20]] : memref<2xindex> to memref<?xindex>
+// CHECK:           %[[VAL_22:.*]] = memref.alloca() : memref<f32>
+// CHECK:           scf.for %[[VAL_23:.*]] = %[[VAL_5]] to %[[VAL_11]] step %[[VAL_7]] {
+// CHECK:             %[[VAL_24:.*]] = tensor.extract %[[VAL_1]]{{\[}}%[[VAL_23]], %[[VAL_5]]] : tensor<2x2xi64>
+// CHECK:             %[[VAL_25:.*]] = arith.index_cast %[[VAL_24]] : i64 to index
+// CHECK:             %[[VAL_26:.*]] = tensor.extract %[[VAL_1]]{{\[}}%[[VAL_23]], %[[VAL_7]]] : tensor<2x2xi64>
+// CHECK:             %[[VAL_27:.*]] = arith.index_cast %[[VAL_26]] : i64 to index
+// CHECK:             %[[VAL_28:.*]] = tensor.extract %[[VAL_0]]{{\[}}%[[VAL_23]]] : tensor<2xf32>
+// CHECK:             memref.store %[[VAL_25]], %[[VAL_20]]{{\[}}%[[VAL_5]]] : memref<2xindex>
+// CHECK:             memref.store %[[VAL_27]], %[[VAL_20]]{{\[}}%[[VAL_7]]] : memref<2xindex>
+// CHECK:             memref.store %[[VAL_28]], %[[VAL_22]][] : memref<f32>
+// CHECK:             %[[VAL_29:.*]] = func.call @forwardingInsertF32(%[[VAL_19]], %[[VAL_22]], %[[VAL_21]]) : (!llvm.ptr<i8>, memref<f32>, memref<?xindex>) -> !llvm.ptr<i8>
+// CHECK:           }
+// CHECK:           call @endForwardingInsert(%[[VAL_19]]) : (!llvm.ptr<i8>) -> ()
+// CHECK:           return %[[VAL_19]] : !llvm.ptr<i8>
+// CHECK:         }
+>>>>>>> 3d8f29859dbb ([mlir][sparse] refactor dense2sparse and const2sparse conversion)
 func.func @sparse_constant() -> tensor<8x7xf32, #CSR>{
   // Initialize a tensor.
   %0 = arith.constant sparse<[[0, 0], [1, 6]], [1.0, 5.0]> : tensor<8x7xf32>
@@ -63,12 +233,67 @@ func.func @sparse_constant() -> tensor<8x7xf32, #CSR>{
   return %1 : tensor<8x7xf32, #CSR>
 }
 
+<<<<<<< HEAD
 // CHECK-LABEL:   func.func @sparse_constant_csc
 // CHECK:           sparse_tensor.foreach
 // CHECK-NOT:         scf.if
 // CHECK:               sparse_tensor.insert
 // CHECK-NOT:       sparse_tensor.reorder_coo
 // CHECK:           sparse_tensor.load
+=======
+// CHECK-LABEL:   func.func @sparse_constant_csc() -> !llvm.ptr<i8> {
+// CHECK-DAG:       %[[VAL_0:.*]] = arith.constant dense<[1.000000e+00, 5.000000e+00]> : tensor<2xf32>
+// CHECK-DAG:       %[[VAL_1:.*]] = arith.constant dense<{{\[\[}}0, 0], [1, 6]]> : tensor<2x2xi64>
+// CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1 : i32
+// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 2 : i32
+// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : i32
+// CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
+// CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 8 : index
+// CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
+// CHECK-DAG:       %[[VAL_8:.*]] = arith.constant 7 : index
+// CHECK-DAG:       %[[VAL_9:.*]] = arith.constant 4 : i8
+// CHECK-DAG:       %[[VAL_10:.*]] = arith.constant 8 : i8
+// CHECK-DAG:       %[[VAL_11:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_12:.*]] = memref.alloca() : memref<2xi8>
+// CHECK:           %[[VAL_13:.*]] = memref.cast %[[VAL_12]] : memref<2xi8> to memref<?xi8>
+// CHECK:           memref.store %[[VAL_9]], %[[VAL_12]]{{\[}}%[[VAL_5]]] : memref<2xi8>
+// CHECK:           memref.store %[[VAL_10]], %[[VAL_12]]{{\[}}%[[VAL_7]]] : memref<2xi8>
+// CHECK:           %[[VAL_14:.*]] = memref.alloca() : memref<2xindex>
+// CHECK:           %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref<?xindex>
+// CHECK:           memref.store %[[VAL_6]], %[[VAL_14]]{{\[}}%[[VAL_5]]] : memref<2xindex>
+// CHECK:           memref.store %[[VAL_8]], %[[VAL_14]]{{\[}}%[[VAL_7]]] : memref<2xindex>
+// CHECK:           %[[VAL_16:.*]] = memref.alloca() : memref<2xindex>
+// CHECK:           %[[VAL_17:.*]] = memref.cast %[[VAL_16]] : memref<2xindex> to memref<?xindex>
+// CHECK:           memref.store %[[VAL_7]], %[[VAL_16]]{{\[}}%[[VAL_5]]] : memref<2xindex>
+// CHECK:           memref.store %[[VAL_5]], %[[VAL_16]]{{\[}}%[[VAL_7]]] : memref<2xindex>
+// CHECK:           %[[VAL_18:.*]] = memref.alloca() : memref<2xindex>
+// CHECK:           %[[VAL_19:.*]] = memref.cast %[[VAL_18]] : memref<2xindex> to memref<?xindex>
+// CHECK:           memref.store %[[VAL_7]], %[[VAL_18]]{{\[}}%[[VAL_5]]] : memref<2xindex>
+// CHECK:           memref.store %[[VAL_5]], %[[VAL_18]]{{\[}}%[[VAL_7]]] : memref<2xindex>
+// CHECK:           %[[VAL_20:.*]] = memref.alloca() : memref<2xindex>
+// CHECK:           %[[VAL_21:.*]] = memref.cast %[[VAL_20]] : memref<2xindex> to memref<?xindex>
+// CHECK:           memref.store %[[VAL_8]], %[[VAL_20]]{{\[}}%[[VAL_5]]] : memref<2xindex>
+// CHECK:           memref.store %[[VAL_6]], %[[VAL_20]]{{\[}}%[[VAL_7]]] : memref<2xindex>
+// CHECK:           %[[VAL_22:.*]] = llvm.mlir.zero : !llvm.ptr<i8>
+// CHECK:           %[[VAL_23:.*]] = call @newSparseTensor(%[[VAL_15]], %[[VAL_21]], %[[VAL_13]], %[[VAL_17]], %[[VAL_19]], %[[VAL_4]], %[[VAL_4]], %[[VAL_3]], %[[VAL_2]], %[[VAL_22]]) : (memref<?xindex>, memref<?xindex>, memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+// CHECK:           %[[VAL_24:.*]] = memref.alloca() : memref<2xindex>
+// CHECK:           %[[VAL_25:.*]] = memref.cast %[[VAL_24]] : memref<2xindex> to memref<?xindex>
+// CHECK:           %[[VAL_26:.*]] = memref.alloca() : memref<f32>
+// CHECK:           scf.for %[[VAL_27:.*]] = %[[VAL_5]] to %[[VAL_11]] step %[[VAL_7]] {
+// CHECK:             %[[VAL_28:.*]] = tensor.extract %[[VAL_1]]{{\[}}%[[VAL_27]], %[[VAL_5]]] : tensor<2x2xi64>
+// CHECK:             %[[VAL_29:.*]] = arith.index_cast %[[VAL_28]] : i64 to index
+// CHECK:             %[[VAL_30:.*]] = tensor.extract %[[VAL_1]]{{\[}}%[[VAL_27]], %[[VAL_7]]] : tensor<2x2xi64>
+// CHECK:             %[[VAL_31:.*]] = arith.index_cast %[[VAL_30]] : i64 to index
+// CHECK:             %[[VAL_32:.*]] = tensor.extract %[[VAL_0]]{{\[}}%[[VAL_27]]] : tensor<2xf32>
+// CHECK:             memref.store %[[VAL_29]], %[[VAL_24]]{{\[}}%[[VAL_5]]] : memref<2xindex>
+// CHECK:             memref.store %[[VAL_31]], %[[VAL_24]]{{\[}}%[[VAL_7]]] : memref<2xindex>
+// CHECK:             memref.store %[[VAL_32]], %[[VAL_26]][] : memref<f32>
+// CHECK:             %[[VAL_33:.*]] = func.call @forwardingInsertF32(%[[VAL_23]], %[[VAL_26]], %[[VAL_25]]) : (!llvm.ptr<i8>, memref<f32>, memref<?xindex>) -> !llvm.ptr<i8>
+// CHECK:           }
+// CHECK:           call @endForwardingInsert(%[[VAL_23]]) : (!llvm.ptr<i8>) -> ()
+// CHECK:           return %[[VAL_23]] : !llvm.ptr<i8>
+// CHECK:         }
+>>>>>>> 3d8f29859dbb ([mlir][sparse] refactor dense2sparse and const2sparse conversion)
 func.func @sparse_constant_csc() -> tensor<8x7xf32, #CSC>{
   // Initialize a tensor.
   %0 = arith.constant sparse<[[0, 0], [1, 6]], [1.0, 5.0]> : tensor<8x7xf32>
@@ -77,6 +302,7 @@ func.func @sparse_constant_csc() -> tensor<8x7xf32, #CSC>{
   return %1 : tensor<8x7xf32, #CSC>
 }
 
+<<<<<<< HEAD
 // CHECK-LABEL:   func.func @sparse_convert_3d
 // CHECK:           sparse_tensor.foreach
 // CHECK:             scf.if
@@ -86,6 +312,72 @@ func.func @sparse_constant_csc() -> tensor<8x7xf32, #CSC>{
 // CHECK:           sparse_tensor.foreach
 // CHECK:             sparse_tensor.insert
 // CHECK:           sparse_tensor.load
+=======
+// CHECK-LABEL:   func.func @sparse_convert_3d(
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?x?xf64>) -> !llvm.ptr<i8> {
+// CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 0.000000e+00 : f64
+// CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1 : i32
+// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : i32
+// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 8 : i8
+// CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 4 : i8
+// CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 2 : index
+// CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
+// CHECK-DAG:       %[[VAL_8:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_9:.*]] = tensor.dim %[[VAL_0]], %[[VAL_8]] : tensor<?x?x?xf64>
+// CHECK:           %[[VAL_10:.*]] = tensor.dim %[[VAL_0]], %[[VAL_7]] : tensor<?x?x?xf64>
+// CHECK:           %[[VAL_11:.*]] = tensor.dim %[[VAL_0]], %[[VAL_6]] : tensor<?x?x?xf64>
+// CHECK:           %[[VAL_12:.*]] = memref.alloca() : memref<3xi8>
+// CHECK:           %[[VAL_13:.*]] = memref.cast %[[VAL_12]] : memref<3xi8> to memref<?xi8>
+// CHECK:           memref.store %[[VAL_5]], %[[VAL_12]]{{\[}}%[[VAL_8]]] : memref<3xi8>
+// CHECK:           memref.store %[[VAL_4]], %[[VAL_12]]{{\[}}%[[VAL_7]]] : memref<3xi8>
+// CHECK:           memref.store %[[VAL_4]], %[[VAL_12]]{{\[}}%[[VAL_6]]] : memref<3xi8>
+// CHECK:           %[[VAL_14:.*]] = memref.alloca() : memref<3xindex>
+// CHECK:           %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<3xindex> to memref<?xindex>
+// CHECK:           memref.store %[[VAL_9]], %[[VAL_14]]{{\[}}%[[VAL_8]]] : memref<3xindex>
+// CHECK:           memref.store %[[VAL_10]], %[[VAL_14]]{{\[}}%[[VAL_7]]] : memref<3xindex>
+// CHECK:           memref.store %[[VAL_11]], %[[VAL_14]]{{\[}}%[[VAL_6]]] : memref<3xindex>
+// CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_6]]] : memref<3xindex>
+// CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_8]]] : memref<3xindex>
+// CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_7]]] : memref<3xindex>
+// CHECK:           %[[VAL_19:.*]] = memref.alloca() : memref<3xindex>
+// CHECK:           %[[VAL_20:.*]] = memref.cast %[[VAL_19]] : memref<3xindex> to memref<?xindex>
+// CHECK:           memref.store %[[VAL_7]], %[[VAL_19]]{{\[}}%[[VAL_8]]] : memref<3xindex>
+// CHECK:           memref.store %[[VAL_6]], %[[VAL_19]]{{\[}}%[[VAL_7]]] : memref<3xindex>
+// CHECK:           memref.store %[[VAL_8]], %[[VAL_19]]{{\[}}%[[VAL_6]]] : memref<3xindex>
+// CHECK:           %[[VAL_21:.*]] = memref.alloca() : memref<3xindex>
+// CHECK:           %[[VAL_22:.*]] = memref.cast %[[VAL_21]] : memref<3xindex> to memref<?xindex>
+// CHECK:           memref.store %[[VAL_6]], %[[VAL_21]]{{\[}}%[[VAL_8]]] : memref<3xindex>
+// CHECK:           memref.store %[[VAL_8]], %[[VAL_21]]{{\[}}%[[VAL_7]]] : memref<3xindex>
+// CHECK:           memref.store %[[VAL_7]], %[[VAL_21]]{{\[}}%[[VAL_6]]] : memref<3xindex>
+// CHECK:           %[[VAL_23:.*]] = memref.alloca() : memref<3xindex>
+// CHECK:           %[[VAL_24:.*]] = memref.cast %[[VAL_23]] : memref<3xindex> to memref<?xindex>
+// CHECK:           memref.store %[[VAL_16]], %[[VAL_23]]{{\[}}%[[VAL_8]]] : memref<3xindex>
+// CHECK:           memref.store %[[VAL_17]], %[[VAL_23]]{{\[}}%[[VAL_7]]] : memref<3xindex>
+// CHECK:           memref.store %[[VAL_18]], %[[VAL_23]]{{\[}}%[[VAL_6]]] : memref<3xindex>
+// CHECK:           %[[VAL_25:.*]] = llvm.mlir.zero : !llvm.ptr<i8>
+// CHECK:           %[[VAL_26:.*]] = call @newSparseTensor(%[[VAL_15]], %[[VAL_24]], %[[VAL_13]], %[[VAL_20]], %[[VAL_22]], %[[VAL_3]], %[[VAL_3]], %[[VAL_2]], %[[VAL_2]], %[[VAL_25]]) : (memref<?xindex>, memref<?xindex>, memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
+// CHECK:           %[[VAL_27:.*]] = memref.alloca() : memref<3xindex>
+// CHECK:           %[[VAL_28:.*]] = memref.cast %[[VAL_27]] : memref<3xindex> to memref<?xindex>
+// CHECK:           %[[VAL_29:.*]] = memref.alloca() : memref<f64>
+// CHECK:           scf.for %[[VAL_30:.*]] = %[[VAL_8]] to %[[VAL_9]] step %[[VAL_7]] {
+// CHECK:             scf.for %[[VAL_31:.*]] = %[[VAL_8]] to %[[VAL_10]] step %[[VAL_7]] {
+// CHECK:               scf.for %[[VAL_32:.*]] = %[[VAL_8]] to %[[VAL_11]] step %[[VAL_7]] {
+// CHECK:                 %[[VAL_33:.*]] = tensor.extract %[[VAL_0]]{{\[}}%[[VAL_30]], %[[VAL_31]], %[[VAL_32]]] : tensor<?x?x?xf64>
+// CHECK:                 %[[VAL_34:.*]] = arith.cmpf une, %[[VAL_33]], %[[VAL_1]] : f64
+// CHECK:                 scf.if %[[VAL_34]] {
+// CHECK:                   memref.store %[[VAL_30]], %[[VAL_27]]{{\[}}%[[VAL_8]]] : memref<3xindex>
+// CHECK:                   memref.store %[[VAL_31]], %[[VAL_27]]{{\[}}%[[VAL_7]]] : memref<3xindex>
+// CHECK:                   memref.store %[[VAL_32]], %[[VAL_27]]{{\[}}%[[VAL_6]]] : memref<3xindex>
+// CHECK:                   memref.store %[[VAL_33]], %[[VAL_29]][] : memref<f64>
+// CHECK:                   %[[VAL_35:.*]] = func.call @forwardingInsertF64(%[[VAL_26]], %[[VAL_29]], %[[VAL_28]]) : (!llvm.ptr<i8>, memref<f64>, memref<?xindex>) -> !llvm.ptr<i8>
+// CHECK:                 }
+// CHECK:               }
+// CHECK:             }
+// CHECK:           }
+// CHECK:           call @endForwardingInsert(%[[VAL_26]]) : (!llvm.ptr<i8>) -> ()
+// CHECK:           return %[[VAL_26]] : !llvm.ptr<i8>
+// CHECK:         }
+>>>>>>> 3d8f29859dbb ([mlir][sparse] refactor dense2sparse and const2sparse conversion)
 func.func @sparse_convert_3d(%arg0: tensor<?x?x?xf64>) -> tensor<?x?x?xf64, #SparseTensor> {
   %0 = sparse_tensor.convert %arg0 : tensor<?x?x?xf64> to tensor<?x?x?xf64, #SparseTensor>
   return %0 : tensor<?x?x?xf64, #SparseTensor>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_expand.mlir b/mlir/test/Dialect/SparseTensor/sparse_expand.mlir
index d19d7fe2871d674..9d8db10aa423022 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_expand.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_expand.mlir
@@ -62,7 +62,7 @@
 // CHECK-CONVERT: memref.dealloc %[[A]] : memref<?xf64>
 // CHECK-CONVERT: memref.dealloc %[[B]] : memref<?xi1>
 // CHECK-CONVERT: memref.dealloc %[[C]] : memref<?xindex>
-// CHECK-CONVERT: call @endInsert
+// CHECK-CONVERT: call @endLexInsert
 //
 func.func @kernel(%arga: tensor<?x?xf64, #DCSC>) -> tensor<?xf64, #SV> {
   %c0 = arith.constant 0 : index
@@ -115,7 +115,7 @@ func.func @kernel(%arga: tensor<?x?xf64, #DCSC>) -> tensor<?xf64, #SV> {
 // CHECK-CONVERT: memref.dealloc %[[A]] : memref<?xf64>
 // CHECK-CONVERT: memref.dealloc %[[B]] : memref<?xi1>
 // CHECK-CONVERT: memref.dealloc %[[C]] : memref<?xindex>
-// CHECK-CONVERT: call @endInsert
+// CHECK-CONVERT: call @endLexInsert
 //
 func.func @matmul1(%A: tensor<8x2xf64, #CSR>,
                    %B: tensor<2x4xf64, #CSR>) -> tensor<8x4xf64, #CSR> {
@@ -163,7 +163,7 @@ func.func @matmul1(%A: tensor<8x2xf64, #CSR>,
 // CHECK-CONVERT: memref.dealloc %[[A]] : memref<?xf64>
 // CHECK-CONVERT: memref.dealloc %[[B]] : memref<?xi1>
 // CHECK-CONVERT: memref.dealloc %[[C]] : memref<?xindex>
-// CHECK-CONVERT: call @endInsert
+// CHECK-CONVERT: call @endLexInsert
 //
 func.func @matmul2(%A: tensor<8x2xf64, #CSC>,
                    %B: tensor<2x4xf64, #CSC>) -> tensor<8x4xf64, #CSC> {
diff --git a/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir b/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir
index 7d852ca9cc1aa26..8ecbc1da965a156 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir
@@ -112,7 +112,7 @@
 // CHECK:           memref.dealloc %[[VAL_20]] : memref<300xf64>
 // CHECK:           memref.dealloc %[[VAL_22]] : memref<300xi1>
 // CHECK:           memref.dealloc %[[VAL_24]] : memref<300xindex>
-// CHECK:           call @endInsert(%[[VAL_19]]) : (!llvm.ptr<i8>) -> ()
+// CHECK:           call @endLexInsert(%[[VAL_19]]) : (!llvm.ptr<i8>) -> ()
 // CHECK:           return %[[VAL_19]] : !llvm.ptr<i8>
 // CHECK:       }
 func.func @fill_zero_after_alloc(%arg0: tensor<100x200xf64, #DCSR>,

>From 50b556f31def5521cb835318c326ae45bf92a60b Mon Sep 17 00:00:00 2001
From: Aart Bik <ajcbik at google.com>
Date: Thu, 12 Oct 2023 15:40:37 -0700
Subject: [PATCH 2/5] whitespace

---
 mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h
index 776f74cdc804ea0..f9312c866f36317 100644
--- a/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h
@@ -88,8 +88,8 @@ MLIR_SPARSETENSOR_FOREVERY_O(DECL_SPARSECOORDINATES)
       void *tensor, StridedMemRefType<V, 0> *vref,                             \
       StridedMemRefType<index_type, 1> *dimCoordsRef);                         \
   MLIR_SPARSETENSOR_FOREVERY_V(DECL_FORWARDINGINSERT)
-
 #undef DECL_FORWARDINGINSERT
+
 /// Coordinate-scheme method for getting the next element while iterating.
 #define DECL_GETNEXT(VNAME, V)                                                 \
   MLIR_CRUNNERUTILS_EXPORT bool _mlir_ciface_getNext##VNAME(                   \

>From 18eccc2f721f3a5721a4491cde31f9c1470d1346 Mon Sep 17 00:00:00 2001
From: Aart Bik <ajcbik at google.com>
Date: Thu, 12 Oct 2023 16:00:46 -0700
Subject: [PATCH 3/5] add missing file

---
 mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp | 7 +++++++
 mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp  | 4 ++--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp b/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp
index 1d654cae3b4b125..050dff2da1fa476 100644
--- a/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp
@@ -80,6 +80,13 @@ MLIR_SPARSETENSOR_FOREVERY_FIXED_O(IMPL_GETCOORDINATES)
 MLIR_SPARSETENSOR_FOREVERY_V(IMPL_GETVALUES)
 #undef IMPL_GETVALUES
 
+#define IMPL_FORWARDINGINSERT(VNAME, V)                                        \
+  void SparseTensorStorageBase::forwardingInsert(const uint64_t *, V) {        \
+    FATAL_PIV("forwardingInsert" #VNAME);                                      \
+  }
+MLIR_SPARSETENSOR_FOREVERY_V(IMPL_FORWARDINGINSERT)
+#undef IMPL_FORWARDINGINSERT
+
 #define IMPL_LEXINSERT(VNAME, V)                                               \
   void SparseTensorStorageBase::lexInsert(const uint64_t *, V) {               \
     FATAL_PIV("lexInsert" #VNAME);                                             \
diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp
index ac2e318719c0286..cd1b663578a48ce 100644
--- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp
@@ -418,12 +418,12 @@ MLIR_SPARSETENSOR_FOREVERY_O(IMPL_SPARSECOORDINATES)
       void *t, StridedMemRefType<V, 0> *vref,                                  \
       StridedMemRefType<index_type, 1> *dimCoordsRef) {                        \
     assert(t &&vref);                                                          \
-    auto &tensor = *static_cast<SparseTensorStorageBase *>(t);                 \
     ASSERT_NO_STRIDE(dimCoordsRef);                                            \
     const index_type *dimCoords = MEMREF_GET_PAYLOAD(dimCoordsRef);            \
     assert(dimCoords);                                                         \
     const V *value = MEMREF_GET_PAYLOAD(vref);                                 \
-    tensor.forwardingInsert(dimCoords, *value);                                \
+    static_cast<SparseTensorStorageBase *>(t)->forwardingInsert(dimCoords,     \
+                                                                *value);       \
   }
 MLIR_SPARSETENSOR_FOREVERY_V(IMPL_FORWARDINGINSERT)
 #undef IMPL_FORWARDINGINSERT

>From 2833d39fdf0873cb2dc99b82805ede269d79d4f4 Mon Sep 17 00:00:00 2001
From: Aart Bik <ajcbik at google.com>
Date: Thu, 12 Oct 2023 16:50:57 -0700
Subject: [PATCH 4/5] revert to Peiming's version

---
 .../Transforms/SparseTensorConversion.cpp     | 217 +------------
 .../SparseTensor/convert_dense2sparse.mlir    | 292 ------------------
 2 files changed, 11 insertions(+), 498 deletions(-)

diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
index 88559ddaf0a4a29..4c2d6be29c02f43 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
@@ -241,6 +241,16 @@ class NewCallParams final {
     return true;
   }
 
+  /// Gets the dimension-to-level mapping.
+  //
+  // TODO: This is only ever used for passing into `genAddEltCall`;
+  // is there a better way to encapsulate that pattern (both to avoid
+  // this one-off getter, and to avoid potential mixups)?
+  Value getDimToLvl() const {
+    assert(isInitialized() && "Must initialize before getDimToLvl");
+    return params[kParamDim2Lvl];
+  }
+
   /// Generates a function call, with the current static parameters
   /// and the given dynamic arguments.
   Value genNewCall(Action action, Value ptr = Value()) {
@@ -289,74 +299,6 @@ static void genDelCOOCall(OpBuilder &builder, Location loc, Type elemTp,
   createFuncCall(builder, loc, name, {}, coo, EmitCInterface::Off);
 }
 
-<<<<<<< HEAD
-=======
-/// Generates a call to release/delete a `SparseTensorIterator`.
-static void genDelIteratorCall(OpBuilder &builder, Location loc, Type elemTp,
-                               Value iter) {
-  SmallString<26> name{"delSparseTensorIterator",
-                       primaryTypeFunctionSuffix(elemTp)};
-  createFuncCall(builder, loc, name, {}, iter, EmitCInterface::Off);
-}
-
-/// Generates a call with a forwarding insertion.
-static void genForwardingInsert(OpBuilder &builder, Location loc, Type eltType,
-                                Value tensor, Value valPtr, Value dimCoords) {
-  SmallString<9> name{"forwardingInsert", primaryTypeFunctionSuffix(eltType)};
-  SmallVector<Value, 4> params{tensor, valPtr, dimCoords};
-  Type pTp = getOpaquePointerType(builder);
-  createFuncCall(builder, loc, name, pTp, params, EmitCInterface::On);
-}
-
-/// Generates a call to `iter->getNext()`.  If there is a next element,
-/// then it is copied into the out-parameters `coords` and `elemPtr`,
-/// and the return value is true.  If there isn't a next element, then
-/// the return value is false.
-///
-/// The `coords` argument uses the same coordinate-space as the `iter`
-/// (which can be either dim- or lvl-coords, depending on context).
-static Value genGetNextCall(OpBuilder &builder, Location loc, Value iter,
-                            Value coords, Value elemPtr) {
-  Type elemTp = cast<ShapedType>(elemPtr.getType()).getElementType();
-  SmallString<10> name{"getNext", primaryTypeFunctionSuffix(elemTp)};
-  SmallVector<Value, 3> params{iter, coords, elemPtr};
-  Type i1 = builder.getI1Type();
-  return createFuncCall(builder, loc, name, i1, params, EmitCInterface::On)
-      .getResult(0);
-}
-
-/// Loads the value stored in `elemPtr`, and stores it at the coordinates
-/// `cvs` into a dense tensor created by `allocDenseTensor`.
-static void insertScalarIntoDenseTensor(OpBuilder &builder, Location loc,
-                                        Value elemPtr, Value tensor,
-                                        ValueRange cvs) {
-  Value elemV = builder.create<memref::LoadOp>(loc, elemPtr);
-  builder.create<memref::StoreOp>(loc, elemV, tensor, cvs);
-}
-
-/// Determine if the runtime library supports direct conversion to the
-/// given target `dimTypes`.
-static bool canUseDirectConversion(ArrayRef<DimLevelType> dimTypes) {
-  bool alreadyCompressed = false;
-  for (const auto dlt : dimTypes) {
-    if (isCompressedDLT(dlt)) {
-      if (alreadyCompressed)
-        return false; // Multiple compressed dimensions not yet supported.
-      alreadyCompressed = true;
-    } else if (isDenseDLT(dlt)) {
-      if (alreadyCompressed)
-        return false; // Dense after Compressed not yet supported.
-    } else if (isSingletonDLT(dlt)) {
-      // Direct conversion doesn't have any particular problems with
-      // singleton after compressed.
-    } else { // TODO: investigate
-      return false;
-    }
-  }
-  return true;
-}
-
->>>>>>> 3d8f29859dbb ([mlir][sparse] refactor dense2sparse and const2sparse conversion)
 //===----------------------------------------------------------------------===//
 // Conversion rules.
 //===----------------------------------------------------------------------===//
@@ -540,7 +482,6 @@ class SparseTensorReorderCOOConverter
     const auto srcTp = getSparseTensorType(op.getInputCoo());
     const auto dstTp = getSparseTensorType(op);
 
-<<<<<<< HEAD
     const Value src = adaptor.getInputCoo();
 
     NewCallParams params(rewriter, loc);
@@ -548,142 +489,6 @@ class SparseTensorReorderCOOConverter
     rewriter.replaceOp(op, params.genBuffers(dstTp, dimSizes)
                                .genNewCall(Action::kSortCOOInPlace, src));
 
-=======
-    const Dimension dimRank = srcTp.getDimRank();
-    const Type elemTp = srcTp.getElementType();
-    const Value src = adaptor.getOperands()[0];
-    if (srcTp.hasEncoding() && dstTp.hasEncoding()) {
-      const auto srcEnc = srcTp.getEncoding();
-      const auto dstEnc = dstTp.getEncoding();
-      // This is a sparse => sparse conversion, which is handled as follows:
-      //   t = src->toCOO();         ; src to COO in dst order
-      //   dst = newSparseTensor(t)
-      // Using the coordinate scheme as an intermediate does not always
-      // yield the fastest conversion but avoids the need for a full
-      // O(N^2) conversion matrix.
-      if (dstEnc == srcEnc) {
-        rewriter.replaceOp(op, adaptor.getOperands()); // hidden nop cast
-        return success();
-      }
-      NewCallParams params(rewriter, loc);
-      SmallVector<Value> dimSizes = getDimSizes(rewriter, loc, srcTp, src);
-      bool useDirectConversion;
-      switch (options.sparseToSparseStrategy) {
-      case SparseToSparseConversionStrategy::kViaCOO:
-        useDirectConversion = false;
-        break;
-      case SparseToSparseConversionStrategy::kDirect:
-        useDirectConversion = true;
-        assert(canUseDirectConversion(dstEnc.getLvlTypes()) &&
-               "Unsupported target for direct sparse-to-sparse conversion");
-        break;
-      case SparseToSparseConversionStrategy::kAuto:
-        useDirectConversion = canUseDirectConversion(dstEnc.getLvlTypes());
-        break;
-      }
-      if (useDirectConversion) {
-        rewriter.replaceOp(
-            op, params.genBuffers(srcTp.withEncoding(dstEnc), dimSizes)
-                    .genNewCall(Action::kSparseToSparse, src));
-      } else { // use via-COO conversion.
-        // Set up encoding with right mix of src and dst so that the two
-        // method calls can share most parameters, while still providing
-        // the correct sparsity information to either of them.
-        const auto mixedEnc =
-            dstEnc.withBitWidths(srcEnc.getPosWidth(), srcEnc.getCrdWidth());
-        // TODO: This is the only place where `kToCOO` (or `kToIterator`)
-        // is called with a non-identity permutation.  Is there any clean
-        // way to push the permutation over to the `kFromCOO` side instead?
-        Value coo = params.genBuffers(srcTp.withEncoding(mixedEnc), dimSizes)
-                        .genNewCall(Action::kToCOO, src);
-        Value dst = params.setTemplateTypes(srcTp.withEncoding(dstEnc))
-                        .genNewCall(Action::kFromCOO, coo);
-        genDelCOOCall(rewriter, loc, elemTp, coo);
-        rewriter.replaceOp(op, dst);
-      }
-      return success();
-    }
-    if (srcTp.hasEncoding() && !dstTp.hasEncoding()) {
-      const auto srcEnc = srcTp.getEncoding();
-      // This is sparse => dense conversion, which is handled as follows:
-      //   dst = new Tensor(0);
-      //   iter = new SparseTensorIterator(src);
-      //   while (elem = iter->getNext()) {
-      //     dst[elem.coords] = elem.value;
-      //   }
-      //   delete iter;
-      //
-      // Fabricate a no-permutation encoding for NewCallParams
-      // The position/coordinate types must be those of `src`.
-      // The dimLevelTypes aren't actually used by Action::kToIterator.
-      const auto dstEnc = SparseTensorEncodingAttr::get(
-          op->getContext(),
-          SmallVector<DimLevelType>(dimRank, DimLevelType::Dense), AffineMap(),
-          AffineMap(), srcEnc.getPosWidth(), srcEnc.getCrdWidth());
-      SmallVector<Value> dimSizes = getDimSizes(rewriter, loc, srcTp, src);
-      Value iter = NewCallParams(rewriter, loc)
-                       .genBuffers(dstTp.withEncoding(dstEnc), dimSizes)
-                       .genNewCall(Action::kToIterator, src);
-      const Type iTp = rewriter.getIndexType();
-      Value dimCoords = genAlloca(rewriter, loc, dimRank, iTp);
-      Value elemPtr = genAllocaScalar(rewriter, loc, elemTp);
-      // TODO: Dense buffers should be allocated/deallocated via the callback
-      // in BufferizationOptions.
-      Value dst = allocDenseTensor(rewriter, loc, dstTp, dimSizes);
-      const SmallVector<Value> noArgs;
-      const SmallVector<Type> noTypes;
-      auto whileOp = rewriter.create<scf::WhileOp>(loc, noTypes, noArgs);
-      Block *before = rewriter.createBlock(&whileOp.getBefore(), {}, noTypes);
-      rewriter.setInsertionPointToEnd(before);
-      Value cond = genGetNextCall(rewriter, loc, iter, dimCoords, elemPtr);
-      rewriter.create<scf::ConditionOp>(loc, cond, before->getArguments());
-      Block *after = rewriter.createBlock(&whileOp.getAfter(), {}, noTypes);
-      rewriter.setInsertionPointToStart(after);
-      const auto dcvs = loadAll(rewriter, loc, dimRank, dimCoords);
-      insertScalarIntoDenseTensor(rewriter, loc, elemPtr, dst, dcvs);
-      rewriter.create<scf::YieldOp>(loc);
-      rewriter.setInsertionPointAfter(whileOp);
-      genDelIteratorCall(rewriter, loc, elemTp, iter);
-      rewriter.replaceOpWithNewOp<bufferization::ToTensorOp>(
-          op, dstTp.getRankedTensorType(), dst);
-      return success();
-    }
-    assert(!srcTp.hasEncoding() && dstTp.hasEncoding());
-    // This is a "dense => sparse conversion" or a "sparse constant => sparse
-    // conversion" which is conceptually handled as follows, with an additional
-    // test for nonzero values for the dense case.
-    //
-    //   st = newSparseTensor()                  ; ST with forwarding COO
-    //   for i1 in dim1, ..., ik in dimk         ; loop nest or range(NNZ) loop
-    //     val = a[i1,..,ik]                     ;
-    //     st->insertForwarding(val, [i1,..,ik]) ; maps dim to level
-    //   st->endForwardingInsert()               ; finalize forwarding
-    //
-    // Note that the traversal code is actually implemented using MLIR IR to
-    // avoid having to expose too much low-level memref traversal details to
-    // the runtime support library.
-    SmallVector<Value> dimSizes;
-    sizesFromSrc(rewriter, dimSizes, loc, src);
-    NewCallParams params(rewriter, loc);
-    Value tensor =
-        params.genBuffers(dstTp, dimSizes).genNewCall(Action::kEmptyForward);
-    const Type iTp = rewriter.getIndexType();
-    Value dimCoords = genAlloca(rewriter, loc, dimRank, iTp);
-    Value elemPtr = genAllocaScalar(rewriter, loc, elemTp);
-    genDenseTensorOrSparseConstantIterLoop(
-        rewriter, loc, src, dimRank,
-        [&](OpBuilder &builder, Location loc, Value val, ValueRange dcvs) {
-          assert(dcvs.size() == static_cast<size_t>(dimRank));
-          storeAll(builder, loc, dimCoords, dcvs);
-          builder.create<memref::StoreOp>(loc, val, elemPtr);
-          genForwardingInsert(builder, loc, elemTp, tensor, elemPtr, dimCoords);
-        });
-    // Final call to construct sparse tensor storage.
-    StringRef name = "endForwardingInsert";
-    createFuncCall(rewriter, op->getLoc(), name, {}, tensor,
-                   EmitCInterface::Off);
-    rewriter.replaceOp(op, tensor);
->>>>>>> 3d8f29859dbb ([mlir][sparse] refactor dense2sparse and const2sparse conversion)
     return success();
   }
 };
@@ -801,7 +606,7 @@ class SparseTensorLoadConverter : public OpConversionPattern<LoadOp> {
                   ConversionPatternRewriter &rewriter) const override {
     if (op.getHasInserts()) {
       // Finalize any pending insertions.
-      StringRef name = "endLexInsert";
+      StringRef name = "endInsert";
       createFuncCall(rewriter, op->getLoc(), name, {}, adaptor.getOperands(),
                      EmitCInterface::Off);
     }
diff --git a/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir b/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir
index aaa2c6f2a91106c..4dba16df39f5c65 100644
--- a/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir
+++ b/mlir/test/Dialect/SparseTensor/convert_dense2sparse.mlir
@@ -16,215 +16,45 @@
   map = (d0, d1, d2) -> (d2 : dense, d0 : compressed, d1 : compressed)
 }>
 
-<<<<<<< HEAD
 // CHECK-LABEL:   func.func @sparse_convert_1d
 // CHECK:           sparse_tensor.foreach
 // CHECK:            scf.if
 // CHECK:              sparse_tensor.insert
 // CHECK-NOT:       sparse_tensor.reorder_coo
 // CHECK:           sparse_tensor.load
-=======
-// CHECK-LABEL:   func.func @sparse_convert_1d(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?xi32>) -> !llvm.ptr<i8> {
-// CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 1 : i32
-// CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 6 : i32
-// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : i32
-// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 8 : i8
-// CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 0 : index
-// CHECK:           %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_6]] : tensor<?xi32>
-// CHECK:           %[[VAL_8:.*]] = memref.alloca() : memref<1xi8>
-// CHECK:           %[[VAL_9:.*]] = memref.cast %[[VAL_8]] : memref<1xi8> to memref<?xi8>
-// CHECK:           memref.store %[[VAL_5]], %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref<1xi8>
-// CHECK:           %[[VAL_10:.*]] = memref.alloca() : memref<1xindex>
-// CHECK:           %[[VAL_11:.*]] = memref.cast %[[VAL_10]] : memref<1xindex> to memref<?xindex>
-// CHECK:           memref.store %[[VAL_7]], %[[VAL_10]]{{\[}}%[[VAL_6]]] : memref<1xindex>
-// CHECK:           %[[VAL_12:.*]] = memref.alloca() : memref<1xindex>
-// CHECK:           %[[VAL_13:.*]] = memref.cast %[[VAL_12]] : memref<1xindex> to memref<?xindex>
-// CHECK:           memref.store %[[VAL_6]], %[[VAL_12]]{{\[}}%[[VAL_6]]] : memref<1xindex>
-// CHECK:           %[[VAL_14:.*]] = llvm.mlir.zero : !llvm.ptr<i8>
-// CHECK:           %[[VAL_15:.*]] = call @newSparseTensor(%[[VAL_11]], %[[VAL_11]], %[[VAL_9]], %[[VAL_13]], %[[VAL_13]], %[[VAL_3]], %[[VAL_3]], %[[VAL_2]], %[[VAL_1]], %[[VAL_14]]) : (memref<?xindex>, memref<?xindex>, memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
-// CHECK:           %[[VAL_16:.*]] = memref.alloca() : memref<1xindex>
-// CHECK:           %[[VAL_17:.*]] = memref.cast %[[VAL_16]] : memref<1xindex> to memref<?xindex>
-// CHECK:           %[[VAL_18:.*]] = memref.alloca() : memref<i32>
-// CHECK:           scf.for %[[VAL_19:.*]] = %[[VAL_6]] to %[[VAL_7]] step %[[VAL_4]] {
-// CHECK:             %[[VAL_20:.*]] = tensor.extract %[[VAL_0]]{{\[}}%[[VAL_19]]] : tensor<?xi32>
-// CHECK:             %[[VAL_21:.*]] = arith.cmpi ne, %[[VAL_20]], %[[VAL_3]] : i32
-// CHECK:             scf.if %[[VAL_21]] {
-// CHECK:               memref.store %[[VAL_19]], %[[VAL_16]]{{\[}}%[[VAL_6]]] : memref<1xindex>
-// CHECK:               memref.store %[[VAL_20]], %[[VAL_18]][] : memref<i32>
-// CHECK:               %[[VAL_22:.*]] = func.call @forwardingInsertI32(%[[VAL_15]], %[[VAL_18]], %[[VAL_17]]) : (!llvm.ptr<i8>, memref<i32>, memref<?xindex>) -> !llvm.ptr<i8>
-// CHECK:             }
-// CHECK:           }
-// CHECK:           call @endForwardingInsert(%[[VAL_15]]) : (!llvm.ptr<i8>) -> ()
-// CHECK:           return %[[VAL_15]] : !llvm.ptr<i8>
-// CHECK:         }
->>>>>>> 3d8f29859dbb ([mlir][sparse] refactor dense2sparse and const2sparse conversion)
 func.func @sparse_convert_1d(%arg0: tensor<?xi32>) -> tensor<?xi32, #SparseVector> {
   %0 = sparse_tensor.convert %arg0 : tensor<?xi32> to tensor<?xi32, #SparseVector>
   return %0 : tensor<?xi32, #SparseVector>
 }
 
-<<<<<<< HEAD
 // CHECK-LABEL:   func.func @sparse_convert_complex
 // CHECK:           sparse_tensor.foreach
 // CHECK:            scf.if
 // CHECK:              sparse_tensor.insert
 // CHECK-NOT:       sparse_tensor.reorder_coo
 // CHECK:           sparse_tensor.load
-=======
-// CHECK-LABEL:   func.func @sparse_convert_complex(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<100xcomplex<f64>>) -> !llvm.ptr<i8> {
-// CHECK-DAG:       %[[VAL_1:.*]] = complex.constant [0.000000e+00, 0.000000e+00] : complex<f64>
-// CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1 : i32
-// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 9 : i32
-// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : i32
-// CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 100 : index
-// CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 8 : i8
-// CHECK-DAG:       %[[VAL_8:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_9:.*]] = memref.alloca() : memref<1xi8>
-// CHECK:           %[[VAL_10:.*]] = memref.cast %[[VAL_9]] : memref<1xi8> to memref<?xi8>
-// CHECK:           memref.store %[[VAL_7]], %[[VAL_9]]{{\[}}%[[VAL_5]]] : memref<1xi8>
-// CHECK:           %[[VAL_11:.*]] = memref.alloca() : memref<1xindex>
-// CHECK:           %[[VAL_12:.*]] = memref.cast %[[VAL_11]] : memref<1xindex> to memref<?xindex>
-// CHECK:           memref.store %[[VAL_6]], %[[VAL_11]]{{\[}}%[[VAL_5]]] : memref<1xindex>
-// CHECK:           %[[VAL_13:.*]] = memref.alloca() : memref<1xindex>
-// CHECK:           %[[VAL_14:.*]] = memref.cast %[[VAL_13]] : memref<1xindex> to memref<?xindex>
-// CHECK:           memref.store %[[VAL_5]], %[[VAL_13]]{{\[}}%[[VAL_5]]] : memref<1xindex>
-// CHECK:           %[[VAL_15:.*]] = llvm.mlir.zero : !llvm.ptr<i8>
-// CHECK:           %[[VAL_16:.*]] = call @newSparseTensor(%[[VAL_12]], %[[VAL_12]], %[[VAL_10]], %[[VAL_14]], %[[VAL_14]], %[[VAL_4]], %[[VAL_4]], %[[VAL_3]], %[[VAL_2]], %[[VAL_15]]) : (memref<?xindex>, memref<?xindex>, memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
-// CHECK:           %[[VAL_17:.*]] = memref.alloca() : memref<1xindex>
-// CHECK:           %[[VAL_18:.*]] = memref.cast %[[VAL_17]] : memref<1xindex> to memref<?xindex>
-// CHECK:           %[[VAL_19:.*]] = memref.alloca() : memref<complex<f64>>
-// CHECK:           scf.for %[[VAL_20:.*]] = %[[VAL_5]] to %[[VAL_6]] step %[[VAL_8]] {
-// CHECK:             %[[VAL_21:.*]] = tensor.extract %[[VAL_0]]{{\[}}%[[VAL_20]]] : tensor<100xcomplex<f64>>
-// CHECK:             %[[VAL_22:.*]] = complex.neq %[[VAL_21]], %[[VAL_1]] : complex<f64>
-// CHECK:             scf.if %[[VAL_22]] {
-// CHECK:               memref.store %[[VAL_20]], %[[VAL_17]]{{\[}}%[[VAL_5]]] : memref<1xindex>
-// CHECK:               memref.store %[[VAL_21]], %[[VAL_19]][] : memref<complex<f64>>
-// CHECK:               %[[VAL_23:.*]] = func.call @forwardingInsertC64(%[[VAL_16]], %[[VAL_19]], %[[VAL_18]]) : (!llvm.ptr<i8>, memref<complex<f64>>, memref<?xindex>) -> !llvm.ptr<i8>
-// CHECK:             }
-// CHECK:           }
-// CHECK:           call @endForwardingInsert(%[[VAL_16]]) : (!llvm.ptr<i8>) -> ()
-// CHECK:           return %[[VAL_16]] : !llvm.ptr<i8>
-// CHECK:         }
->>>>>>> 3d8f29859dbb ([mlir][sparse] refactor dense2sparse and const2sparse conversion)
 func.func @sparse_convert_complex(%arg0: tensor<100xcomplex<f64>>) -> tensor<100xcomplex<f64>, #SparseVector> {
   %0 = sparse_tensor.convert %arg0 : tensor<100xcomplex<f64>> to tensor<100xcomplex<f64>, #SparseVector>
   return %0 : tensor<100xcomplex<f64>, #SparseVector>
 }
 
-<<<<<<< HEAD
 // CHECK-LABEL:   func.func @sparse_convert_2d
 // CHECK:           sparse_tensor.foreach
 // CHECK:            scf.if
 // CHECK:              sparse_tensor.insert
 // CHECK-NOT:       sparse_tensor.reorder_coo
 // CHECK:           sparse_tensor.load
-=======
-// CHECK-LABEL:   func.func @sparse_convert_2d(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<2x4xf64>) -> !llvm.ptr<i8> {
-// CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 0.000000e+00 : f64
-// CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1 : i32
-// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : i32
-// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 2 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 4 : index
-// CHECK-DAG:       %[[VAL_8:.*]] = arith.constant 4 : i8
-// CHECK-DAG:       %[[VAL_9:.*]] = arith.constant 8 : i8
-// CHECK:           %[[VAL_10:.*]] = memref.alloca() : memref<2xi8>
-// CHECK:           %[[VAL_11:.*]] = memref.cast %[[VAL_10]] : memref<2xi8> to memref<?xi8>
-// CHECK:           memref.store %[[VAL_8]], %[[VAL_10]]{{\[}}%[[VAL_4]]] : memref<2xi8>
-// CHECK:           memref.store %[[VAL_9]], %[[VAL_10]]{{\[}}%[[VAL_6]]] : memref<2xi8>
-// CHECK:           %[[VAL_12:.*]] = memref.alloca() : memref<2xindex>
-// CHECK:           %[[VAL_13:.*]] = memref.cast %[[VAL_12]] : memref<2xindex> to memref<?xindex>
-// CHECK:           memref.store %[[VAL_5]], %[[VAL_12]]{{\[}}%[[VAL_4]]] : memref<2xindex>
-// CHECK:           memref.store %[[VAL_7]], %[[VAL_12]]{{\[}}%[[VAL_6]]] : memref<2xindex>
-// CHECK:           %[[VAL_14:.*]] = memref.alloca() : memref<2xindex>
-// CHECK:           %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref<?xindex>
-// CHECK:           memref.store %[[VAL_4]], %[[VAL_14]]{{\[}}%[[VAL_4]]] : memref<2xindex>
-// CHECK:           memref.store %[[VAL_6]], %[[VAL_14]]{{\[}}%[[VAL_6]]] : memref<2xindex>
-// CHECK:           %[[VAL_16:.*]] = llvm.mlir.zero : !llvm.ptr<i8>
-// CHECK:           %[[VAL_17:.*]] = call @newSparseTensor(%[[VAL_13]], %[[VAL_13]], %[[VAL_11]], %[[VAL_15]], %[[VAL_15]], %[[VAL_3]], %[[VAL_3]], %[[VAL_2]], %[[VAL_2]], %[[VAL_16]]) : (memref<?xindex>, memref<?xindex>, memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
-// CHECK:           %[[VAL_18:.*]] = memref.alloca() : memref<2xindex>
-// CHECK:           %[[VAL_19:.*]] = memref.cast %[[VAL_18]] : memref<2xindex> to memref<?xindex>
-// CHECK:           %[[VAL_20:.*]] = memref.alloca() : memref<f64>
-// CHECK:           scf.for %[[VAL_21:.*]] = %[[VAL_4]] to %[[VAL_5]] step %[[VAL_6]] {
-// CHECK:             scf.for %[[VAL_22:.*]] = %[[VAL_4]] to %[[VAL_7]] step %[[VAL_6]] {
-// CHECK:               %[[VAL_23:.*]] = tensor.extract %[[VAL_0]]{{\[}}%[[VAL_21]], %[[VAL_22]]] : tensor<2x4xf64>
-// CHECK:               %[[VAL_24:.*]] = arith.cmpf une, %[[VAL_23]], %[[VAL_1]] : f64
-// CHECK:               scf.if %[[VAL_24]] {
-// CHECK:                 memref.store %[[VAL_21]], %[[VAL_18]]{{\[}}%[[VAL_4]]] : memref<2xindex>
-// CHECK:                 memref.store %[[VAL_22]], %[[VAL_18]]{{\[}}%[[VAL_6]]] : memref<2xindex>
-// CHECK:                 memref.store %[[VAL_23]], %[[VAL_20]][] : memref<f64>
-// CHECK:                 %[[VAL_25:.*]] = func.call @forwardingInsertF64(%[[VAL_17]], %[[VAL_20]], %[[VAL_19]]) : (!llvm.ptr<i8>, memref<f64>, memref<?xindex>) -> !llvm.ptr<i8>
-// CHECK:               }
-// CHECK:             }
-// CHECK:           }
-// CHECK:           call @endForwardingInsert(%[[VAL_17]]) : (!llvm.ptr<i8>) -> ()
-// CHECK:           return %[[VAL_17]] : !llvm.ptr<i8>
-// CHECK:         }
->>>>>>> 3d8f29859dbb ([mlir][sparse] refactor dense2sparse and const2sparse conversion)
 func.func @sparse_convert_2d(%arg0: tensor<2x4xf64>) -> tensor<2x4xf64, #CSR> {
   %0 = sparse_tensor.convert %arg0 : tensor<2x4xf64> to tensor<2x4xf64, #CSR>
   return %0 : tensor<2x4xf64, #CSR>
 }
 
-<<<<<<< HEAD
 // CHECK-LABEL:   func.func @sparse_constant
 // CHECK:           sparse_tensor.foreach
 // CHECK-NOT:         scf.if
 // CHECK:               sparse_tensor.insert
 // CHECK-NOT:       sparse_tensor.reorder_coo
 // CHECK:           sparse_tensor.load
-=======
-// CHECK-LABEL:   func.func @sparse_constant() -> !llvm.ptr<i8> {
-// CHECK-DAG:       %[[VAL_0:.*]] = arith.constant dense<[1.000000e+00, 5.000000e+00]> : tensor<2xf32>
-// CHECK-DAG:       %[[VAL_1:.*]] = arith.constant dense<{{\[\[}}0, 0], [1, 6]]> : tensor<2x2xi64>
-// CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1 : i32
-// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 2 : i32
-// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : i32
-// CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 8 : index
-// CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_8:.*]] = arith.constant 7 : index
-// CHECK-DAG:       %[[VAL_9:.*]] = arith.constant 4 : i8
-// CHECK-DAG:       %[[VAL_10:.*]] = arith.constant 8 : i8
-// CHECK-DAG:       %[[VAL_11:.*]] = arith.constant 2 : index
-// CHECK:           %[[VAL_12:.*]] = memref.alloca() : memref<2xi8>
-// CHECK:           %[[VAL_13:.*]] = memref.cast %[[VAL_12]] : memref<2xi8> to memref<?xi8>
-// CHECK:           memref.store %[[VAL_9]], %[[VAL_12]]{{\[}}%[[VAL_5]]] : memref<2xi8>
-// CHECK:           memref.store %[[VAL_10]], %[[VAL_12]]{{\[}}%[[VAL_7]]] : memref<2xi8>
-// CHECK:           %[[VAL_14:.*]] = memref.alloca() : memref<2xindex>
-// CHECK:           %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref<?xindex>
-// CHECK:           memref.store %[[VAL_6]], %[[VAL_14]]{{\[}}%[[VAL_5]]] : memref<2xindex>
-// CHECK:           memref.store %[[VAL_8]], %[[VAL_14]]{{\[}}%[[VAL_7]]] : memref<2xindex>
-// CHECK:           %[[VAL_16:.*]] = memref.alloca() : memref<2xindex>
-// CHECK:           %[[VAL_17:.*]] = memref.cast %[[VAL_16]] : memref<2xindex> to memref<?xindex>
-// CHECK:           memref.store %[[VAL_5]], %[[VAL_16]]{{\[}}%[[VAL_5]]] : memref<2xindex>
-// CHECK:           memref.store %[[VAL_7]], %[[VAL_16]]{{\[}}%[[VAL_7]]] : memref<2xindex>
-// CHECK:           %[[VAL_18:.*]] = llvm.mlir.zero : !llvm.ptr<i8>
-// CHECK:           %[[VAL_19:.*]] = call @newSparseTensor(%[[VAL_15]], %[[VAL_15]], %[[VAL_13]], %[[VAL_17]], %[[VAL_17]], %[[VAL_4]], %[[VAL_4]], %[[VAL_3]], %[[VAL_2]], %[[VAL_18]]) : (memref<?xindex>, memref<?xindex>, memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
-// CHECK:           %[[VAL_20:.*]] = memref.alloca() : memref<2xindex>
-// CHECK:           %[[VAL_21:.*]] = memref.cast %[[VAL_20]] : memref<2xindex> to memref<?xindex>
-// CHECK:           %[[VAL_22:.*]] = memref.alloca() : memref<f32>
-// CHECK:           scf.for %[[VAL_23:.*]] = %[[VAL_5]] to %[[VAL_11]] step %[[VAL_7]] {
-// CHECK:             %[[VAL_24:.*]] = tensor.extract %[[VAL_1]]{{\[}}%[[VAL_23]], %[[VAL_5]]] : tensor<2x2xi64>
-// CHECK:             %[[VAL_25:.*]] = arith.index_cast %[[VAL_24]] : i64 to index
-// CHECK:             %[[VAL_26:.*]] = tensor.extract %[[VAL_1]]{{\[}}%[[VAL_23]], %[[VAL_7]]] : tensor<2x2xi64>
-// CHECK:             %[[VAL_27:.*]] = arith.index_cast %[[VAL_26]] : i64 to index
-// CHECK:             %[[VAL_28:.*]] = tensor.extract %[[VAL_0]]{{\[}}%[[VAL_23]]] : tensor<2xf32>
-// CHECK:             memref.store %[[VAL_25]], %[[VAL_20]]{{\[}}%[[VAL_5]]] : memref<2xindex>
-// CHECK:             memref.store %[[VAL_27]], %[[VAL_20]]{{\[}}%[[VAL_7]]] : memref<2xindex>
-// CHECK:             memref.store %[[VAL_28]], %[[VAL_22]][] : memref<f32>
-// CHECK:             %[[VAL_29:.*]] = func.call @forwardingInsertF32(%[[VAL_19]], %[[VAL_22]], %[[VAL_21]]) : (!llvm.ptr<i8>, memref<f32>, memref<?xindex>) -> !llvm.ptr<i8>
-// CHECK:           }
-// CHECK:           call @endForwardingInsert(%[[VAL_19]]) : (!llvm.ptr<i8>) -> ()
-// CHECK:           return %[[VAL_19]] : !llvm.ptr<i8>
-// CHECK:         }
->>>>>>> 3d8f29859dbb ([mlir][sparse] refactor dense2sparse and const2sparse conversion)
 func.func @sparse_constant() -> tensor<8x7xf32, #CSR>{
   // Initialize a tensor.
   %0 = arith.constant sparse<[[0, 0], [1, 6]], [1.0, 5.0]> : tensor<8x7xf32>
@@ -233,67 +63,12 @@ func.func @sparse_constant() -> tensor<8x7xf32, #CSR>{
   return %1 : tensor<8x7xf32, #CSR>
 }
 
-<<<<<<< HEAD
 // CHECK-LABEL:   func.func @sparse_constant_csc
 // CHECK:           sparse_tensor.foreach
 // CHECK-NOT:         scf.if
 // CHECK:               sparse_tensor.insert
 // CHECK-NOT:       sparse_tensor.reorder_coo
 // CHECK:           sparse_tensor.load
-=======
-// CHECK-LABEL:   func.func @sparse_constant_csc() -> !llvm.ptr<i8> {
-// CHECK-DAG:       %[[VAL_0:.*]] = arith.constant dense<[1.000000e+00, 5.000000e+00]> : tensor<2xf32>
-// CHECK-DAG:       %[[VAL_1:.*]] = arith.constant dense<{{\[\[}}0, 0], [1, 6]]> : tensor<2x2xi64>
-// CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1 : i32
-// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 2 : i32
-// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : i32
-// CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 8 : index
-// CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_8:.*]] = arith.constant 7 : index
-// CHECK-DAG:       %[[VAL_9:.*]] = arith.constant 4 : i8
-// CHECK-DAG:       %[[VAL_10:.*]] = arith.constant 8 : i8
-// CHECK-DAG:       %[[VAL_11:.*]] = arith.constant 2 : index
-// CHECK:           %[[VAL_12:.*]] = memref.alloca() : memref<2xi8>
-// CHECK:           %[[VAL_13:.*]] = memref.cast %[[VAL_12]] : memref<2xi8> to memref<?xi8>
-// CHECK:           memref.store %[[VAL_9]], %[[VAL_12]]{{\[}}%[[VAL_5]]] : memref<2xi8>
-// CHECK:           memref.store %[[VAL_10]], %[[VAL_12]]{{\[}}%[[VAL_7]]] : memref<2xi8>
-// CHECK:           %[[VAL_14:.*]] = memref.alloca() : memref<2xindex>
-// CHECK:           %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref<?xindex>
-// CHECK:           memref.store %[[VAL_6]], %[[VAL_14]]{{\[}}%[[VAL_5]]] : memref<2xindex>
-// CHECK:           memref.store %[[VAL_8]], %[[VAL_14]]{{\[}}%[[VAL_7]]] : memref<2xindex>
-// CHECK:           %[[VAL_16:.*]] = memref.alloca() : memref<2xindex>
-// CHECK:           %[[VAL_17:.*]] = memref.cast %[[VAL_16]] : memref<2xindex> to memref<?xindex>
-// CHECK:           memref.store %[[VAL_7]], %[[VAL_16]]{{\[}}%[[VAL_5]]] : memref<2xindex>
-// CHECK:           memref.store %[[VAL_5]], %[[VAL_16]]{{\[}}%[[VAL_7]]] : memref<2xindex>
-// CHECK:           %[[VAL_18:.*]] = memref.alloca() : memref<2xindex>
-// CHECK:           %[[VAL_19:.*]] = memref.cast %[[VAL_18]] : memref<2xindex> to memref<?xindex>
-// CHECK:           memref.store %[[VAL_7]], %[[VAL_18]]{{\[}}%[[VAL_5]]] : memref<2xindex>
-// CHECK:           memref.store %[[VAL_5]], %[[VAL_18]]{{\[}}%[[VAL_7]]] : memref<2xindex>
-// CHECK:           %[[VAL_20:.*]] = memref.alloca() : memref<2xindex>
-// CHECK:           %[[VAL_21:.*]] = memref.cast %[[VAL_20]] : memref<2xindex> to memref<?xindex>
-// CHECK:           memref.store %[[VAL_8]], %[[VAL_20]]{{\[}}%[[VAL_5]]] : memref<2xindex>
-// CHECK:           memref.store %[[VAL_6]], %[[VAL_20]]{{\[}}%[[VAL_7]]] : memref<2xindex>
-// CHECK:           %[[VAL_22:.*]] = llvm.mlir.zero : !llvm.ptr<i8>
-// CHECK:           %[[VAL_23:.*]] = call @newSparseTensor(%[[VAL_15]], %[[VAL_21]], %[[VAL_13]], %[[VAL_17]], %[[VAL_19]], %[[VAL_4]], %[[VAL_4]], %[[VAL_3]], %[[VAL_2]], %[[VAL_22]]) : (memref<?xindex>, memref<?xindex>, memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
-// CHECK:           %[[VAL_24:.*]] = memref.alloca() : memref<2xindex>
-// CHECK:           %[[VAL_25:.*]] = memref.cast %[[VAL_24]] : memref<2xindex> to memref<?xindex>
-// CHECK:           %[[VAL_26:.*]] = memref.alloca() : memref<f32>
-// CHECK:           scf.for %[[VAL_27:.*]] = %[[VAL_5]] to %[[VAL_11]] step %[[VAL_7]] {
-// CHECK:             %[[VAL_28:.*]] = tensor.extract %[[VAL_1]]{{\[}}%[[VAL_27]], %[[VAL_5]]] : tensor<2x2xi64>
-// CHECK:             %[[VAL_29:.*]] = arith.index_cast %[[VAL_28]] : i64 to index
-// CHECK:             %[[VAL_30:.*]] = tensor.extract %[[VAL_1]]{{\[}}%[[VAL_27]], %[[VAL_7]]] : tensor<2x2xi64>
-// CHECK:             %[[VAL_31:.*]] = arith.index_cast %[[VAL_30]] : i64 to index
-// CHECK:             %[[VAL_32:.*]] = tensor.extract %[[VAL_0]]{{\[}}%[[VAL_27]]] : tensor<2xf32>
-// CHECK:             memref.store %[[VAL_29]], %[[VAL_24]]{{\[}}%[[VAL_5]]] : memref<2xindex>
-// CHECK:             memref.store %[[VAL_31]], %[[VAL_24]]{{\[}}%[[VAL_7]]] : memref<2xindex>
-// CHECK:             memref.store %[[VAL_32]], %[[VAL_26]][] : memref<f32>
-// CHECK:             %[[VAL_33:.*]] = func.call @forwardingInsertF32(%[[VAL_23]], %[[VAL_26]], %[[VAL_25]]) : (!llvm.ptr<i8>, memref<f32>, memref<?xindex>) -> !llvm.ptr<i8>
-// CHECK:           }
-// CHECK:           call @endForwardingInsert(%[[VAL_23]]) : (!llvm.ptr<i8>) -> ()
-// CHECK:           return %[[VAL_23]] : !llvm.ptr<i8>
-// CHECK:         }
->>>>>>> 3d8f29859dbb ([mlir][sparse] refactor dense2sparse and const2sparse conversion)
 func.func @sparse_constant_csc() -> tensor<8x7xf32, #CSC>{
   // Initialize a tensor.
   %0 = arith.constant sparse<[[0, 0], [1, 6]], [1.0, 5.0]> : tensor<8x7xf32>
@@ -302,7 +77,6 @@ func.func @sparse_constant_csc() -> tensor<8x7xf32, #CSC>{
   return %1 : tensor<8x7xf32, #CSC>
 }
 
-<<<<<<< HEAD
 // CHECK-LABEL:   func.func @sparse_convert_3d
 // CHECK:           sparse_tensor.foreach
 // CHECK:             scf.if
@@ -312,72 +86,6 @@ func.func @sparse_constant_csc() -> tensor<8x7xf32, #CSC>{
 // CHECK:           sparse_tensor.foreach
 // CHECK:             sparse_tensor.insert
 // CHECK:           sparse_tensor.load
-=======
-// CHECK-LABEL:   func.func @sparse_convert_3d(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?x?xf64>) -> !llvm.ptr<i8> {
-// CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 0.000000e+00 : f64
-// CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1 : i32
-// CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : i32
-// CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 8 : i8
-// CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 4 : i8
-// CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 2 : index
-// CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_8:.*]] = arith.constant 0 : index
-// CHECK:           %[[VAL_9:.*]] = tensor.dim %[[VAL_0]], %[[VAL_8]] : tensor<?x?x?xf64>
-// CHECK:           %[[VAL_10:.*]] = tensor.dim %[[VAL_0]], %[[VAL_7]] : tensor<?x?x?xf64>
-// CHECK:           %[[VAL_11:.*]] = tensor.dim %[[VAL_0]], %[[VAL_6]] : tensor<?x?x?xf64>
-// CHECK:           %[[VAL_12:.*]] = memref.alloca() : memref<3xi8>
-// CHECK:           %[[VAL_13:.*]] = memref.cast %[[VAL_12]] : memref<3xi8> to memref<?xi8>
-// CHECK:           memref.store %[[VAL_5]], %[[VAL_12]]{{\[}}%[[VAL_8]]] : memref<3xi8>
-// CHECK:           memref.store %[[VAL_4]], %[[VAL_12]]{{\[}}%[[VAL_7]]] : memref<3xi8>
-// CHECK:           memref.store %[[VAL_4]], %[[VAL_12]]{{\[}}%[[VAL_6]]] : memref<3xi8>
-// CHECK:           %[[VAL_14:.*]] = memref.alloca() : memref<3xindex>
-// CHECK:           %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<3xindex> to memref<?xindex>
-// CHECK:           memref.store %[[VAL_9]], %[[VAL_14]]{{\[}}%[[VAL_8]]] : memref<3xindex>
-// CHECK:           memref.store %[[VAL_10]], %[[VAL_14]]{{\[}}%[[VAL_7]]] : memref<3xindex>
-// CHECK:           memref.store %[[VAL_11]], %[[VAL_14]]{{\[}}%[[VAL_6]]] : memref<3xindex>
-// CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_6]]] : memref<3xindex>
-// CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_8]]] : memref<3xindex>
-// CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_7]]] : memref<3xindex>
-// CHECK:           %[[VAL_19:.*]] = memref.alloca() : memref<3xindex>
-// CHECK:           %[[VAL_20:.*]] = memref.cast %[[VAL_19]] : memref<3xindex> to memref<?xindex>
-// CHECK:           memref.store %[[VAL_7]], %[[VAL_19]]{{\[}}%[[VAL_8]]] : memref<3xindex>
-// CHECK:           memref.store %[[VAL_6]], %[[VAL_19]]{{\[}}%[[VAL_7]]] : memref<3xindex>
-// CHECK:           memref.store %[[VAL_8]], %[[VAL_19]]{{\[}}%[[VAL_6]]] : memref<3xindex>
-// CHECK:           %[[VAL_21:.*]] = memref.alloca() : memref<3xindex>
-// CHECK:           %[[VAL_22:.*]] = memref.cast %[[VAL_21]] : memref<3xindex> to memref<?xindex>
-// CHECK:           memref.store %[[VAL_6]], %[[VAL_21]]{{\[}}%[[VAL_8]]] : memref<3xindex>
-// CHECK:           memref.store %[[VAL_8]], %[[VAL_21]]{{\[}}%[[VAL_7]]] : memref<3xindex>
-// CHECK:           memref.store %[[VAL_7]], %[[VAL_21]]{{\[}}%[[VAL_6]]] : memref<3xindex>
-// CHECK:           %[[VAL_23:.*]] = memref.alloca() : memref<3xindex>
-// CHECK:           %[[VAL_24:.*]] = memref.cast %[[VAL_23]] : memref<3xindex> to memref<?xindex>
-// CHECK:           memref.store %[[VAL_16]], %[[VAL_23]]{{\[}}%[[VAL_8]]] : memref<3xindex>
-// CHECK:           memref.store %[[VAL_17]], %[[VAL_23]]{{\[}}%[[VAL_7]]] : memref<3xindex>
-// CHECK:           memref.store %[[VAL_18]], %[[VAL_23]]{{\[}}%[[VAL_6]]] : memref<3xindex>
-// CHECK:           %[[VAL_25:.*]] = llvm.mlir.zero : !llvm.ptr<i8>
-// CHECK:           %[[VAL_26:.*]] = call @newSparseTensor(%[[VAL_15]], %[[VAL_24]], %[[VAL_13]], %[[VAL_20]], %[[VAL_22]], %[[VAL_3]], %[[VAL_3]], %[[VAL_2]], %[[VAL_2]], %[[VAL_25]]) : (memref<?xindex>, memref<?xindex>, memref<?xi8>, memref<?xindex>, memref<?xindex>, i32, i32, i32, i32, !llvm.ptr<i8>) -> !llvm.ptr<i8>
-// CHECK:           %[[VAL_27:.*]] = memref.alloca() : memref<3xindex>
-// CHECK:           %[[VAL_28:.*]] = memref.cast %[[VAL_27]] : memref<3xindex> to memref<?xindex>
-// CHECK:           %[[VAL_29:.*]] = memref.alloca() : memref<f64>
-// CHECK:           scf.for %[[VAL_30:.*]] = %[[VAL_8]] to %[[VAL_9]] step %[[VAL_7]] {
-// CHECK:             scf.for %[[VAL_31:.*]] = %[[VAL_8]] to %[[VAL_10]] step %[[VAL_7]] {
-// CHECK:               scf.for %[[VAL_32:.*]] = %[[VAL_8]] to %[[VAL_11]] step %[[VAL_7]] {
-// CHECK:                 %[[VAL_33:.*]] = tensor.extract %[[VAL_0]]{{\[}}%[[VAL_30]], %[[VAL_31]], %[[VAL_32]]] : tensor<?x?x?xf64>
-// CHECK:                 %[[VAL_34:.*]] = arith.cmpf une, %[[VAL_33]], %[[VAL_1]] : f64
-// CHECK:                 scf.if %[[VAL_34]] {
-// CHECK:                   memref.store %[[VAL_30]], %[[VAL_27]]{{\[}}%[[VAL_8]]] : memref<3xindex>
-// CHECK:                   memref.store %[[VAL_31]], %[[VAL_27]]{{\[}}%[[VAL_7]]] : memref<3xindex>
-// CHECK:                   memref.store %[[VAL_32]], %[[VAL_27]]{{\[}}%[[VAL_6]]] : memref<3xindex>
-// CHECK:                   memref.store %[[VAL_33]], %[[VAL_29]][] : memref<f64>
-// CHECK:                   %[[VAL_35:.*]] = func.call @forwardingInsertF64(%[[VAL_26]], %[[VAL_29]], %[[VAL_28]]) : (!llvm.ptr<i8>, memref<f64>, memref<?xindex>) -> !llvm.ptr<i8>
-// CHECK:                 }
-// CHECK:               }
-// CHECK:             }
-// CHECK:           }
-// CHECK:           call @endForwardingInsert(%[[VAL_26]]) : (!llvm.ptr<i8>) -> ()
-// CHECK:           return %[[VAL_26]] : !llvm.ptr<i8>
-// CHECK:         }
->>>>>>> 3d8f29859dbb ([mlir][sparse] refactor dense2sparse and const2sparse conversion)
 func.func @sparse_convert_3d(%arg0: tensor<?x?x?xf64>) -> tensor<?x?x?xf64, #SparseTensor> {
   %0 = sparse_tensor.convert %arg0 : tensor<?x?x?xf64> to tensor<?x?x?xf64, #SparseTensor>
   return %0 : tensor<?x?x?xf64, #SparseTensor>

>From fc6b8745dd9f0f5040687daea541ba9470f4ed3c Mon Sep 17 00:00:00 2001
From: Aart Bik <39774503+aartbik at users.noreply.github.com>
Date: Thu, 12 Oct 2023 17:55:09 -0700
Subject: [PATCH 5/5] Update SparseTensorConversion.cpp

---
 .../Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
index 8e2dbcf864f97ba..ce3b49915319ceb 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
@@ -596,7 +596,7 @@ class SparseTensorLoadConverter : public OpConversionPattern<LoadOp> {
                   ConversionPatternRewriter &rewriter) const override {
     if (op.getHasInserts()) {
       // Finalize any pending insertions.
-      StringRef name = "endInsert";
+      StringRef name = "endLexInsert";
       createFuncCall(rewriter, op->getLoc(), name, {}, adaptor.getOperands(),
                      EmitCInterface::Off);
     }