[Mlir-commits] [mlir] 36c95ee - [mlir][sparse] group tensor id and levels into pairs in loop emitter

Thu May 4 09:15:47 PDT 2023

Author: Peiming Liu
Date: 2023-05-04T16:15:42Z
New Revision: 36c95ee739c0d7e49dc69e0d8d86d30667c14d49

URL: https://github.com/llvm/llvm-project/commit/36c95ee739c0d7e49dc69e0d8d86d30667c14d49
DIFF: https://github.com/llvm/llvm-project/commit/36c95ee739c0d7e49dc69e0d8d86d30667c14d49.diff

LOG: [mlir][sparse] group tensor id and levels into pairs in loop emitter

This addressed some unresolved comments in https://reviews.llvm.org/D142930

Reviewed By: aartbik, wrengr

Differential Revision: https://reviews.llvm.org/D148565

Added: 
    

Modified: 
    mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h
    mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp
    mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.h
    mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
    mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h
index 1ee5c19b284fe..bf8bd722a50ad 100644

--- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h
@@ -85,6 +85,25 @@ class CodegenEnv {
     return latticeMerger.getDimLevelType(b);
   }
 
+  //
+  // LoopEmitter delegates.
+  //
+
+  constexpr TensorLevel makeTensorLevel(TensorId t, Level l) const {
+    // Make sure LoopEmitter, GenericOp, and Merger agree on the number of
+    // tensors. Merger has one more synthetic tensor for loop invariants.
+    assert(loopEmitter.getNumTensors() == linalgOp->getNumOperands() &&
+           loopEmitter.getNumTensors() == latticeMerger.getNumTensors() - 1);
+    return loopEmitter.makeTensorLevel(t, l);
+  }
+  std::pair<TensorId, Level> unpackTensorLevel(TensorLevel tl) const {
+    return loopEmitter.unpackTensorLevel(tl);
+  }
+  template <class ContainerTy>
+  auto unpackTensorLevelRange(ContainerTy &&c) const {
+    return loopEmitter.unpackTensorLevelRange(std::forward<ContainerTy>(c));
+  }
+
   //
   // Code generation environment verify functions.
   //

diff  --git a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp
index ba6b4641408a5..731a1a9e460e5 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp
@@ -456,13 +456,12 @@ void LoopEmitter::initializeLoopEmit(OpBuilder &builder, Location loc,
 }
 
 void LoopEmitter::enterNewLoopSeq(OpBuilder &builder, Location loc,
-                                  ArrayRef<TensorId> tids,
-                                  ArrayRef<Level> lvls) {
+                                  ArrayRef<TensorLevel> tidLvls) {
   // TODO: sort
   assert(loopSeqStack.size() == loopStack.size());
   // Prepares for all the tensors used in the current loop sequence.
   std::vector<std::tuple<TensorId, Level, bool>> slicedTids;
-  for (auto [tid, lvl] : llvm::zip(tids, lvls)) {
+  for (auto [tid, lvl] : unpackTensorLevelRange(tidLvls)) {
     if (!dependentLvlMap[tid][lvl].empty()) {
       bool fullyRed = genSliceBegin(builder, loc, tid, lvl);
       slicedTids.emplace_back(tid, lvl, fullyRed);
@@ -660,17 +659,19 @@ Operation *LoopEmitter::emitWhileLoopOverSliceAtSparseLvl(
   return loop;
 }
 
-Operation *LoopEmitter::enterLoopOverTensorAtLvl(
-    OpBuilder &builder, Location loc, ArrayRef<TensorId> tids,
-    ArrayRef<Level> lvls, MutableArrayRef<Value> reduc, bool isParallel) {
+Operation *LoopEmitter::enterLoopOverTensorAtLvl(OpBuilder &builder,
+                                                 Location loc,
+                                                 ArrayRef<TensorLevel> tidLvls,
+                                                 MutableArrayRef<Value> reduc,
+                                                 bool isParallel) {
   // TODO: support multiple return on parallel for?
   assert(!isParallel || reduc.size() <= 1);
   bool isSparseCond = false, isSparseSliceCond = false;
-  size_t tid = tids.front(), lvl = lvls.front();
+  auto [tid, lvl] = unpackTensorLevel(tidLvls.front());
 
   // Finds out the tensor level that we should use to generate loops. Amongs all
   // the tensor levels, there is at most one sparse tensor level.
-  for (auto [t, l] : llvm::zip(tids, lvls)) {
+  for (auto [t, l] : unpackTensorLevelRange(tidLvls)) {
     assert(lvlTypes[t].size() > l);         // Must be a valid tid, dim pair
     assert(!coords[t][l] ||                 // We cannot re-enter the same level
            !dependentLvlMap[t][l].empty()); // unless it is a slice-driver loop
@@ -712,12 +713,9 @@ Operation *LoopEmitter::enterLoopOverTensorAtLvl(
   Operation *l = nullptr;
 
   // At most one tensor used as condition in for loop;
-  SmallVector<TensorId, 1> condTid;
-  SmallVector<Level, 1> condLvl;
-  // There Might be multiple dense slice driven tensor.
-  SmallVector<TensorId> sliceTids;
-  SmallVector<Level> sliceLvls;
-  SmallVector<bool> sliceReduc;
+  SmallVector<TensorLevel, 1> condTidLvl;
+  // There might be multiple dense slice driven tensor.
+  SmallVector<SliceLoopInfo> sliceDrivenInfo;
 
   // Generates loops 
diff erently depending on whether we need a slice-driven
   // loop or a simple level traversal loop.
@@ -734,9 +732,7 @@ Operation *LoopEmitter::enterLoopOverTensorAtLvl(
           lvl, reduc);
     }
     levelReducedDep[tid][lvl]++;
-    sliceTids.push_back(tid);
-    sliceLvls.push_back(lvl);
-    sliceReduc.push_back(fullyReduced);
+    sliceDrivenInfo.emplace_back(tid, lvl, fullyReduced);
   } else {
     Value lo = isSparseCond ? posits[tid][lvl]           // current offset
                             : loopSeqStack.back().first; // universal index
@@ -747,21 +743,19 @@ Operation *LoopEmitter::enterLoopOverTensorAtLvl(
       // Adjust for loop hi for dense slice-driven loop.
       if (fullyReduced) {
         hi = sliceSz;
-        condTid.push_back(tid);
-        condLvl.push_back(lvl);
+        condTidLvl.push_back(makeTensorLevel(tid, lvl));
       } else {
         hi = SUBI(lvlSizes[tid][lvl], sliceSz);
         hi = ADDI(hi, C_IDX(1));
       }
     } else {
-      condTid.push_back(tid);
-      condLvl.push_back(lvl);
+      condTidLvl.push_back(makeTensorLevel(tid, lvl));
     }
     l = emitForLoopOverTensorAtLvl(builder, loc, tid, lvl, lo, hi, reduc,
                                    isParallel);
   }
   Value iv = coords[tid][lvl];
-  for (auto [t, l] : llvm::zip(tids, lvls)) {
+  for (auto [t, l] : unpackTensorLevelRange(tidLvls)) {
     // We only need to handle slice-driven loops on dense level here.
     // If it is a slice-driven loop on sparse level, it needs a while loop to
     // insert break statements, and it must have been handled correctly in L692.
@@ -774,9 +768,7 @@ Operation *LoopEmitter::enterLoopOverTensorAtLvl(
       } else {
         // Puts sliced dense loop into LoopInfo so that LoopEmitter knows how to
         // exit it.
-        sliceTids.push_back(t);
-        sliceLvls.push_back(l);
-        sliceReduc.push_back(fullyReduc);
+        sliceDrivenInfo.emplace_back(t, l, fullyReduc);
         // Update the slice information as we enter the new loop.
         assert(*info.slicedOnLvl == l);
         info.minCrd = info.offset = iv;
@@ -787,10 +779,10 @@ Operation *LoopEmitter::enterLoopOverTensorAtLvl(
   }
   // NOTE: we can also prepare for next dim here in advance
   // Pushes the loop into stack.
-  loopStack.emplace_back(condTid, condLvl, sliceTids, sliceLvls, sliceReduc, l,
+  loopStack.emplace_back(condTidLvl, sliceDrivenInfo, l,
                          builder.getInsertionBlock(), iv, loopTag);
   // Emit extra locals.
-  emitExtraLocalsForTensorsAtDenseLvls(builder, loc, tids, lvls);
+  emitExtraLocalsForTensorsAtDenseLvls(builder, loc, tidLvls);
   return l;
 }
 
@@ -854,16 +846,17 @@ Operation *LoopEmitter::enterFilterLoopOverTensorAtLvl(
 
   // NOTE: we can also prepare for next lvl here in advance
   // Push the loop into stack
-  loopStack.emplace_back(ArrayRef<TensorId>(tid), ArrayRef<Level>(lvl),
-                         ArrayRef<TensorId>(), ArrayRef<Level>(),
-                         ArrayRef<bool>(), forOp, builder.getInsertionBlock(),
-                         coords[tid][lvl], nullptr);
+  loopStack.emplace_back(ArrayRef<TensorLevel>(makeTensorLevel(tid, lvl)),
+                         ArrayRef<SliceLoopInfo>(), forOp,
+                         builder.getInsertionBlock(), coords[tid][lvl],
+                         nullptr);
   return forOp;
 }
 
 void LoopEmitter::genDenseAffineAddress(OpBuilder &builder, Location loc,
-                                        TensorId tid, Level lvl,
+                                        TensorLevel tidLvl,
                                         AffineExpr lvlExpr) {
+  auto [tid, lvl] = unpackTensorLevel(tidLvl);
   assert(isDenseDLT(lvlTypes[tid][lvl]));
   // For dense levels, the level-coordinate also serves as the position.
   Value lvlCrd = genAffine(builder, loc, lvlExpr);
@@ -871,16 +864,15 @@ void LoopEmitter::genDenseAffineAddress(OpBuilder &builder, Location loc,
 }
 
 Operation *LoopEmitter::enterCoIterationOverTensorsAtLvls(
-    OpBuilder &builder, Location loc, ArrayRef<TensorId> tids,
-    ArrayRef<Level> lvls, bool needsUniv, MutableArrayRef<Value> reduc) {
+    OpBuilder &builder, Location loc, ArrayRef<TensorLevel> tidLvls,
+    bool needsUniv, MutableArrayRef<Value> reduc) {
   // NOTE: the slice driven tensor-related reduction variable must
   // appear before normal tensors.
-  assert(tids.size() == lvls.size());
   SmallVector<Type> types;
   SmallVector<Value> operands;
   // Construct the while-loop with a parameter for each coordinate.
   const Type indexType = builder.getIndexType();
-  for (auto [tid, lvl] : llvm::zip(tids, lvls)) {
+  for (auto [tid, lvl] : unpackTensorLevelRange(tidLvls)) {
     // TODO: support coiteration with slice driven tensors.
     const auto lvlTp = lvlTypes[tid][lvl];
     assert(dependentLvlMap[tid][lvl].empty() && "TODO: not yet implemented");
@@ -922,7 +914,7 @@ Operation *LoopEmitter::enterCoIterationOverTensorsAtLvls(
   builder.setInsertionPointToStart(&whileOp.getBefore().front());
   Value cond;
   unsigned o = 0;
-  for (auto [t, lvl] : llvm::zip(tids, lvls)) {
+  for (auto [t, lvl] : unpackTensorLevelRange(tidLvls)) {
     const TensorId tid = t; // Why `t` can not be captured by lambda?
     const auto lvlTp = lvlTypes[tid][lvl];
     if (isCompressedDLT(lvlTp) || isSingletonDLT(lvlTp) ||
@@ -956,7 +948,7 @@ Operation *LoopEmitter::enterCoIterationOverTensorsAtLvls(
 
   SmallVector<std::pair<Value, unsigned>> slicesPreds;
   unsigned i = 0;
-  for (auto [tid, lvl] : llvm::zip(tids, lvls)) {
+  for (auto [tid, lvl] : unpackTensorLevelRange(tidLvls)) {
     // Prepares for next level.
     const auto lvlTp = lvlTypes[tid][lvl];
     if (isCompressedDLT(lvlTp) || isSingletonDLT(lvlTp) ||
@@ -1007,7 +999,7 @@ Operation *LoopEmitter::enterCoIterationOverTensorsAtLvls(
   Value min;
   // Finds the minimum coordinate
   if (!needsUniv) {
-    for (auto [tid, lvl] : llvm::zip(tids, lvls)) {
+    for (auto [tid, lvl] : unpackTensorLevelRange(tidLvls)) {
       const auto lvlTp = lvlTypes[tid][lvl];
       if (isCompressedDLT(lvlTp) || isSingletonDLT(lvlTp) ||
           isCompressedWithHiDLT(lvlTp)) {
@@ -1027,12 +1019,11 @@ Operation *LoopEmitter::enterCoIterationOverTensorsAtLvls(
   }
 
   // Sets up the loop stack.
-  loopStack.emplace_back(tids, lvls, ArrayRef<TensorId>(), ArrayRef<Level>(),
-                         ArrayRef<bool>(), whileOp, builder.getInsertionBlock(),
-                         min, loopTag);
+  loopStack.emplace_back(tidLvls, ArrayRef<SliceLoopInfo>(), whileOp,
+                         builder.getInsertionBlock(), min, loopTag);
   assert(loopStack.size() == loopSeqStack.size());
 
-  for (auto [tid, dstLvl] : llvm::zip(tids, lvls)) {
+  for (auto [tid, dstLvl] : unpackTensorLevelRange(tidLvls)) {
     const auto reassoc = getCollapseReassociation(tid, dstLvl);
     assert(reassoc.size() == 1 || isUniqueCOOType(tensors[tid].getType()));
     // TODO: Refactors this into smaller functions.
@@ -1079,7 +1070,7 @@ Operation *LoopEmitter::enterCoIterationOverTensorsAtLvls(
   }
 
   // Emits extra locals
-  emitExtraLocalsForTensorsAtDenseLvls(builder, loc, tids, lvls);
+  emitExtraLocalsForTensorsAtDenseLvls(builder, loc, tidLvls);
 
   // Updates reduction variables
   assert(after->getNumArguments() == o + reduc.size() + (needsUniv ? 1 : 0));
@@ -1140,15 +1131,12 @@ void LoopEmitter::prepareLoopOverTensorAtLvl(OpBuilder &builder, Location loc,
   llvm_unreachable("Unrecognized level-type!");
 }
 
-void LoopEmitter::emitExtraLocalsForTensorsAtDenseLvls(OpBuilder &builder,
-                                                       Location loc,
-                                                       ArrayRef<TensorId> tids,
-                                                       ArrayRef<Level> lvls) {
+void LoopEmitter::emitExtraLocalsForTensorsAtDenseLvls(
+    OpBuilder &builder, Location loc, ArrayRef<TensorLevel> tidLvls) {
   // Initialize dense positions. Note that we generate dense coordinates of the
   // output tensor unconditionally, since they may not appear in the lattice,
   // but may be needed for linearized codegen.
-  assert(tids.size() == lvls.size());
-  for (auto [tid, lvl] : llvm::zip(tids, lvls)) {
+  for (auto [tid, lvl] : unpackTensorLevelRange(tidLvls)) {
     if (isDenseDLT(lvlTypes[tid][lvl])) {
       // Slice-driven dense level should have be handled already.
       if (!dependentLvlMap[tid][lvl].empty())
@@ -1175,8 +1163,7 @@ void LoopEmitter::exitForLoop(RewriterBase &rewriter, Location loc,
                               MutableArrayRef<Value> reduc) {
   const LoopInfo &loopInfo = loopStack.back();
   rewriter.setInsertionPointToEnd(loopInfo.userCodeBlock);
-  for (auto [tid, lvl, reduced] : llvm::zip(
-           loopInfo.slicedTids, loopInfo.slicedLvls, loopInfo.sliceReduced)) {
+  for (auto [tid, lvl, reduced] : loopInfo.sliceDrivenInfo) {
     SliceInfo &info = sliceStack[tid].back();
     assert(isDenseDLT(lvlTypes[tid][lvl]));
     assert(*info.slicedOnLvl == lvl && !reduced);
@@ -1253,7 +1240,7 @@ void LoopEmitter::exitForLoop(RewriterBase &rewriter, Location loc,
   // Finished iterating a tensor, clean up
   // We only do the clean up on for loop as while loops do not necessarily
   // finish the iteration on a sparse tensor
-  for (auto [tid, lvl] : llvm::zip(loopInfo.tids, loopInfo.lvls)) {
+  for (auto [tid, lvl] : unpackTensorLevelRange(loopInfo.tidLvls)) {
     // Reset to null.
     coords[tid][lvl] = Value();
     posits[tid][lvl] = Value();
@@ -1278,8 +1265,7 @@ void LoopEmitter::exitWhileLoop(OpBuilder &builder, Location loc,
   unsigned o = 0;
   SmallVector<Value> operands;
   unsigned delta = 0;
-  for (auto [tid, lvl, resolved] : llvm::zip(
-           loopInfo.slicedTids, loopInfo.slicedLvls, loopInfo.sliceReduced)) {
+  for (auto [tid, lvl, resolved] : loopInfo.sliceDrivenInfo) {
     // TODO: handle dense.
     assert(isCompressedDLT(lvlTypes[tid][lvl]));
     levelReducedDep[tid][lvl]--;
@@ -1291,7 +1277,7 @@ void LoopEmitter::exitWhileLoop(OpBuilder &builder, Location loc,
     // fully reduced while op for iterating one slices.
     // FIXME: since we didn't implement coiteration, this must be iteration
     // just on fully resolved slice.
-    assert(loopInfo.slicedTids.size() == 1 && loopInfo.tids.empty());
+    assert(loopInfo.sliceDrivenInfo.size() == 1 && loopInfo.tidLvls.empty());
     // The if guard to filter out out-range coordinates.
     assert(llvm::isa<scf::IfOp>(builder.getInsertionBlock()->getParentOp()));
     posits[tid][lvl] = whileOp->getResult(o++);
@@ -1308,7 +1294,7 @@ void LoopEmitter::exitWhileLoop(OpBuilder &builder, Location loc,
   };
 
   Value one = C_IDX(1);
-  for (auto [tid, dstLvl] : llvm::zip(loopInfo.tids, loopInfo.lvls)) {
+  for (auto [tid, dstLvl] : unpackTensorLevelRange(loopInfo.tidLvls)) {
     const auto lvlTp = lvlTypes[tid][dstLvl];
     if (isCompressedDLT(lvlTp) || isSingletonDLT(lvlTp) ||
         isCompressedWithHiDLT(lvlTp)) {
@@ -1376,7 +1362,6 @@ void LoopEmitter::exitCurrentLoop(RewriterBase &rewriter, Location loc,
   // Clean up the values, it would help use to discover potential bug at a
   // earlier stage (instead of silently using a wrong value).
   const LoopInfo &loopInfo = loopStack.back();
-  assert(loopInfo.tids.size() == loopInfo.lvls.size());
   SmallVector<Value> red;
   if (llvm::isa<scf::WhileOp>(loopInfo.loop)) {
     exitWhileLoop(rewriter, loc, reduc);

diff  --git a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.h b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.h
index d069633836e6e..fe4354de3bb45 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.h
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.h
@@ -42,6 +42,8 @@ namespace sparse_tensor {
 // typecheck this to avoid mixups in the code.
 using LoopOrd = unsigned;
 
+// A compressed <tensor id, level> pair.
+using TensorLevel = unsigned;
 //===----------------------------------------------------------------------===//
 // SparseTensorLoopEmiter class, manages sparse tensors and helps to
 // generate loop structure to (co)-iterate sparse tensors.
@@ -134,7 +136,7 @@ class LoopEmitter {
   ///   // loop sequence end.
   /// }
   void enterNewLoopSeq(OpBuilder &builder, Location loc,
-                       ArrayRef<TensorId> tids, ArrayRef<Level> lvls);
+                       ArrayRef<TensorLevel> tidLvls);
 
   /// Exits the current loop sequence, this will reset universal index to 0.
   void exitCurrentLoopSeq(OpBuilder &builder, Location loc);
@@ -149,8 +151,7 @@ class LoopEmitter {
   /// The function will also perform in-place update on the `reduc` vector to
   /// return the reduction variable used inside the generated loop.
   Operation *enterLoopOverTensorAtLvl(OpBuilder &builder, Location loc,
-                                      ArrayRef<TensorId> tids,
-                                      ArrayRef<Level> lvls,
+                                      ArrayRef<TensorLevel> tidLvls,
                                       MutableArrayRef<Value> reduc = {},
                                       bool isParallel = false);
 
@@ -159,13 +160,13 @@ class LoopEmitter {
                                             AffineExpr affine,
                                             MutableArrayRef<Value> reduc = {});
 
-  void genDenseAffineAddress(OpBuilder &builder, Location loc, TensorId tid,
-                             Level lvl, AffineExpr lvlExpr);
+  void genDenseAffineAddress(OpBuilder &builder, Location loc,
+                             TensorLevel tidLvl, AffineExpr lvlExpr);
 
   /// Emits a co-iteration loop over a set of tensors.
   Operation *enterCoIterationOverTensorsAtLvls(
-      OpBuilder &builder, Location loc, ArrayRef<TensorId> tids,
-      ArrayRef<Level> lvls, bool needsUniv, MutableArrayRef<Value> reduc = {});
+      OpBuilder &builder, Location loc, ArrayRef<TensorLevel> tidLvls,
+      bool needsUniv, MutableArrayRef<Value> reduc = {});
 
   void exitCurrentLoop(RewriterBase &rewriter, Location loc,
                        MutableArrayRef<Value> reduc = {});
@@ -190,6 +191,31 @@ class LoopEmitter {
     return n < getCurrentDepth() ? loopStack[n].iv : Value();
   }
 
+  /// Gets the total number of tensors that loopEmitter is operating on.
+  unsigned getNumTensors() const { return tensors.size(); }
+
+  /// Compresses a TensorId and Level into a TensorLevel.
+  constexpr TensorLevel makeTensorLevel(TensorId t, Level l) const {
+    return l * getNumTensors() + t;
+  }
+
+  /// De-compresses a TensorLevel back to a pair of TensorId and Level.
+  std::pair<TensorId, Level> unpackTensorLevel(TensorLevel tidLvl) const {
+    unsigned nt = getNumTensors();
+    return std::make_pair(tidLvl % nt, tidLvl / nt);
+  }
+
+  /// Converts a range of TensorLevel to a range of std::pair<TensorId, Level>
+  template <class ContainerTy>
+  auto unpackTensorLevelRange(ContainerTy &&c) const {
+    using EltTy = decltype(*c.begin());
+    static_assert(std::is_same_v<llvm::remove_cvref_t<EltTy>, TensorLevel>,
+                  "Must be unpacking a TensorLevel range");
+    return llvm::map_range(std::forward<ContainerTy>(c), [this](EltTy tl) {
+      return this->unpackTensorLevel(tl);
+    });
+  }
+
   ///
   /// Getters.
   ///
@@ -209,32 +235,30 @@ class LoopEmitter {
   }
 
 private:
+  // A tuple that stored the slice-driven loop information.
+  struct SliceLoopInfo final {
+    SliceLoopInfo(TensorId tid, Level lvl, bool reduced)
+        : tid(tid), lvl(lvl), reduced(reduced) {}
+    TensorId tid;
+    Level lvl;
+    bool reduced;
+  };
   // LoopInfo stores information of a loop generated by LoopEmitter. E.g.,
   // the set of tensors levels that the loop is iterating over.
   struct LoopInfo final {
-    LoopInfo(ArrayRef<TensorId> tids, ArrayRef<Level> lvls,
-             ArrayRef<TensorId> slicedTids, ArrayRef<Level> slicedLvls,
-             ArrayRef<bool> sliceReduced, Operation *loop, Block *userBlock,
-             Value iv, StringAttr loopTag)
-        : tids(tids), lvls(lvls), slicedTids(slicedTids),
-          slicedLvls(slicedLvls), sliceReduced(sliceReduced), loop(loop),
+    LoopInfo(ArrayRef<TensorLevel> tidLvls,
+             ArrayRef<SliceLoopInfo> sliceDrivenInfo, Operation *loop,
+             Block *userBlock, Value iv, StringAttr loopTag)
+        : tidLvls(tidLvls), sliceDrivenInfo(sliceDrivenInfo), loop(loop),
           userCodeBlock(userBlock), iv(iv) {
       // Attached a special tag to loop emitter generated loop.
       if (loopTag)
         loop->setAttr(LoopEmitter::getLoopEmitterLoopAttrName(), loopTag);
     }
-    // TODO: maybe use a vector<pair> for tid and lvl?
-    //       (Or compress them together with a `TensorLoopId`.)
-    // The set of tensors that the loop is operating on
-    const llvm::SmallVector<TensorId> tids;
-    // The corresponding levels for the tensors
-    const llvm::SmallVector<Level> lvls;
-    // The set of tensors for slice-driven loop conditions.
-    const llvm::SmallVector<TensorId> slicedTids;
-    // The corresponding level for slice-driven tensors.
-    const llvm::SmallVector<Level> slicedLvls;
-    // Whether the tensor is fully reduced (e.g., i + j => j).
-    const llvm::SmallVector<bool> sliceReduced;
+    // The set of <tensor, lvl> that the loop is operating on
+    const llvm::SmallVector<TensorLevel> tidLvls;
+    // Slice-driven loop conditions.
+    const llvm::SmallVector<SliceLoopInfo> sliceDrivenInfo;
     const Operation *loop;      // the loop operation
     Block *const userCodeBlock; // the block holding users' generated code.
     const Value iv;             // the induction variable for the loop
@@ -295,8 +319,6 @@ class LoopEmitter {
                                                  Location loc, Value crd,
                                                  TensorId tid, Level lvl);
 
-  unsigned getNumTensors() const { return tensors.size(); }
-
   bool isOutputTensor(TensorId tid) const {
     return hasOutput && tid == getNumTensors() - 1;
   }
@@ -318,8 +340,7 @@ class LoopEmitter {
   /// point used to generate the loops, but are still required to generate
   /// expressions.
   void emitExtraLocalsForTensorsAtDenseLvls(OpBuilder &builder, Location loc,
-                                            ArrayRef<TensorId> tids,
-                                            ArrayRef<Level> lvls);
+                                            ArrayRef<TensorLevel> tidLvls);
 
   /// Emits a for loop to iterate over a tensor level with the provided lower
   /// bound `lo` and upper bound `hi`.

diff  --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
index 73cbe611aa376..4aba829d7dbdd 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
@@ -990,14 +990,12 @@ struct ForeachRewriter : public OpRewritePattern<ForeachOp> {
     for (Level l = 0; l < lvlRank; l++) {
       // TODO: provide utility function for loop sequences that only contains
       // one for loop?
-      // FIXME(wrengr): what is this "ld" supposed to be really?
-      const Level ld = op.getOrder() ? op.getOrder()->getDimPosition(l) : l;
-      const SmallVector<TensorId, 1> tids{0};
-      loopEmitter.enterNewLoopSeq(rewriter, loc, tids, ld);
+      const SmallVector<TensorLevel, 1> tidLvls{
+          loopEmitter.makeTensorLevel(0, l)};
+      loopEmitter.enterNewLoopSeq(rewriter, loc, tidLvls);
       // Note that reduc will be taken care of by loop emitter and get updated
       // in place.
-
-      loopEmitter.enterLoopOverTensorAtLvl(rewriter, loc, tids, l, reduc);
+      loopEmitter.enterLoopOverTensorAtLvl(rewriter, loc, tidLvls, reduc);
     }
 
     SmallVector<Value> lcvs;

diff  --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
index 4ed879121e98c..afeabb33fcd78 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
@@ -1296,13 +1296,16 @@ static bool isParallelFor(CodegenEnv &env, bool isOuter, bool isSparse) {
 
 /// Generates a for-loop on a single index.
 static Operation *genFor(CodegenEnv &env, OpBuilder &builder, bool isOuter,
-                         bool isInner, LoopId ldx, ArrayRef<TensorId> tids,
-                         ArrayRef<Level> lvls) {
+                         bool isInner, LoopId ldx,
+                         ArrayRef<TensorLevel> tidLvls) {
   linalg::GenericOp op = env.op();
   Location loc = op.getLoc();
   auto iteratorTypes = op.getIteratorTypesArray();
-  bool isSparse = llvm::any_of(tids, [ldx, &env](TensorId tid) {
-    const auto dlt = env.dlt(tid, ldx);
+  bool isSparse = llvm::any_of(tidLvls, [ldx, &env](TensorLevel tidLvl) {
+    // Queries the DLT based on the tensor id and loop idx, as requested by
+    // `CodegenEnv::dlt(TensorId, LoopIdx)`. The returned DLT from CodegenEnv
+    // should be consistent with the DLT indexed by <TensorId, Level>.
+    const auto dlt = env.dlt(env.unpackTensorLevel(tidLvl).first, ldx);
     return isCompressedDLT(dlt) || isSingletonDLT(dlt);
   });
 
@@ -1310,11 +1313,10 @@ static Operation *genFor(CodegenEnv &env, OpBuilder &builder, bool isOuter,
 
   Operation *loop = *env.genLoopBoundary([&](MutableArrayRef<Value> reduc) {
     if (env.merger().isFilterLoop(ldx)) {
-      const TensorId tid = tids.front();
-      const Level lvl = lvls.front();
+      const auto [tid, lvl] = env.unpackTensorLevel(tidLvls.front());
       // tids/lvls must only have one value because filter loops only
       // corresponding to the one and only sparse tensor level.
-      assert(isSparse && tids.size() == 1 && lvls.size() == 1);
+      assert(isSparse && tidLvls.size() == 1);
       OpOperand *t = &op->getOpOperand(tid);
       auto enc = getSparseTensorEncoding(t->get().getType());
       // Retrieves the affine expression for the filter loop.
@@ -1324,8 +1326,8 @@ static Operation *genFor(CodegenEnv &env, OpBuilder &builder, bool isOuter,
       return env.emitter().enterFilterLoopOverTensorAtLvl(builder, loc, tid,
                                                           lvl, a, reduc);
     }
-    return env.emitter().enterLoopOverTensorAtLvl(builder, loc, tids, lvls,
-                                                  reduc, isParallel);
+    return env.emitter().enterLoopOverTensorAtLvl(builder, loc, tidLvls, reduc,
+                                                  isParallel);
   });
   assert(loop);
   return loop;
@@ -1333,13 +1335,12 @@ static Operation *genFor(CodegenEnv &env, OpBuilder &builder, bool isOuter,
 
 /// Emit a while-loop for co-iteration over multiple indices.
 static Operation *genWhile(CodegenEnv &env, OpBuilder &builder, LoopId idx,
-                           bool needsUniv, ArrayRef<TensorId> tids,
-                           ArrayRef<Level> lvls) {
+                           bool needsUniv, ArrayRef<TensorLevel> tidLvls) {
   Operation *loop = *env.genLoopBoundary([&](MutableArrayRef<Value> reduc) {
     // Construct the while-loop with a parameter for each
     // index.
     return env.emitter().enterCoIterationOverTensorsAtLvls(
-        builder, env.op().getLoc(), tids, lvls, needsUniv, reduc);
+        builder, env.op().getLoc(), tidLvls, needsUniv, reduc);
   });
   assert(loop);
   return loop;
@@ -1348,16 +1349,15 @@ static Operation *genWhile(CodegenEnv &env, OpBuilder &builder, LoopId idx,
 /// Generates a for-loop or a while-loop, depending on whether it implements
 /// singleton iteration or co-iteration over the given conjunction.
 static Operation *genLoop(CodegenEnv &env, OpBuilder &builder, LoopOrd at,
-                          bool needsUniv, ArrayRef<TensorId> tids,
-                          ArrayRef<Level> lvls, bool isFor) {
-  assert(tids.size() == lvls.size());
+                          bool needsUniv, ArrayRef<TensorLevel> tidLvls,
+                          bool isFor) {
   const LoopId idx = env.topSortAt(at);
   if (isFor) {
     bool isOuter = at == 0;
     bool isInner = at == env.topSortSize() - 1;
-    return genFor(env, builder, isOuter, isInner, idx, tids, lvls);
+    return genFor(env, builder, isOuter, isInner, idx, tidLvls);
   }
-  return genWhile(env, builder, idx, needsUniv, tids, lvls);
+  return genWhile(env, builder, idx, needsUniv, tidLvls);
 }
 
 /// Generates the induction structure for a while-loop.
@@ -1480,8 +1480,7 @@ static bool startLoopSeq(CodegenEnv &env, OpBuilder &builder, ExprId exp,
   const LatPointId l0 = env.set(lts)[0];
   bool needsUniv = false;
 
-  SmallVector<TensorId> tids;
-  SmallVector<Level> lvls;
+  SmallVector<TensorLevel> tidLvls;
   env.merger().foreachTensorLoopId(l0, [&](TensorLoopId b, TensorId tid,
                                            std::optional<Level> lvl,
                                            DimLevelType dlt, bool isIdxReduc) {
@@ -1493,12 +1492,11 @@ static bool startLoopSeq(CodegenEnv &env, OpBuilder &builder, ExprId exp,
       // Only when this is a index reduction loop, can the dlt be undefined.
       assert(!isUndefDLT(dlt) || isIdxReduc);
       // sparse/singleton levels, or a dense/sparse index reduction loop.
-      tids.push_back(tid);
-      lvls.push_back(*lvl);
+      tidLvls.push_back(env.makeTensorLevel(tid, *lvl));
     }
   });
 
-  env.emitter().enterNewLoopSeq(builder, env.op().getLoc(), tids, lvls);
+  env.emitter().enterNewLoopSeq(builder, env.op().getLoc(), tidLvls);
 
   // Maintain the universal index only if it is actually
   // consumed by a subsequent lattice point.
@@ -1529,7 +1527,8 @@ static void genConstantDenseAddressFromLevel(CodegenEnv &env,
       // FIXME: `toOrigDim` is deprecated.
       AffineExpr lvlExpr = lvlExprs[toOrigDim(enc, l)];
       if (enc.isDenseLvl(l) && lvlExpr.isa<AffineConstantExpr>())
-        env.emitter().genDenseAffineAddress(builder, loc, tid, l, lvlExpr);
+        env.emitter().genDenseAffineAddress(
+            builder, loc, env.makeTensorLevel(tid, l), lvlExpr);
       else
         return; // break on first non-dense non-constant level
     }
@@ -1548,23 +1547,21 @@ static void genInitConstantDenseAddress(CodegenEnv &env,
 
 /// Return true if the lattices bit can be iterated by a for loop.
 static bool translateBitsToTidLvlPairs(
-    CodegenEnv &env, LatPointId li, LoopId ldx, SmallVectorImpl<TensorId> &tids,
-    SmallVectorImpl<Level> &lvls, SmallVectorImpl<TensorId> &affineTids,
-    SmallVectorImpl<Level> &affineLvls, SmallVectorImpl<AffineExpr> &exps) {
+    CodegenEnv &env, LatPointId li, LoopId ldx,
+    SmallVectorImpl<TensorLevel> &tidLvls,
+    SmallVectorImpl<std::pair<TensorLevel, AffineExpr>> &affineTidLvls) {
   const BitVector &simple = env.lat(li).simple;
   const TensorId outTid = env.merger().getOutTensorID();
   const std::optional<Level> outLvl = env.merger().getLvl(outTid, ldx);
 
   unsigned numloopCond = 0;
   bool hasNonUnique = false;
-
   env.merger().foreachTensorLoopId(
       li, [&, ldx](TensorLoopId b, TensorId tid, std::optional<Level> lvl,
                    DimLevelType dlt, bool isIdxReduc) {
         if (simple[b]) {
           if (isIdxReduc) {
-            tids.push_back(tid);
-            lvls.push_back(*lvl);
+            tidLvls.push_back(env.makeTensorLevel(tid, *lvl));
             numloopCond++;
             return;
           }
@@ -1582,12 +1579,10 @@ static bool translateBitsToTidLvlPairs(
               return;
           }
           hasNonUnique = !isUniqueDLT(dlt) || hasNonUnique;
-          tids.push_back(tid);
-          lvls.push_back(*lvl);
+          tidLvls.push_back(env.makeTensorLevel(tid, *lvl));
           numloopCond++;
         } else if (isDenseDLT(dlt) || isIdxReduc) {
-          tids.push_back(tid);
-          lvls.push_back(*lvl);
+          tidLvls.push_back(env.makeTensorLevel(tid, *lvl));
         } else {
           assert(isUndefDLT(dlt));
           linalg::GenericOp op = env.op();
@@ -1625,9 +1620,7 @@ static bool translateBitsToTidLvlPairs(
                 // computeIterationGraph), another more admissible approach
                 // might be accepting out-of-order access between consecutive
                 // dense levels.
-                affineTids.push_back(tid);
-                affineLvls.push_back(l);
-                exps.push_back(exp);
+                affineTidLvls.emplace_back(env.makeTensorLevel(tid, l), exp);
               }
             }
           }
@@ -1638,8 +1631,7 @@ static bool translateBitsToTidLvlPairs(
     // Note that we generate dense indices of the output tensor
     // unconditionally, since they may not appear in the lattice, but may be
     // needed for linearized env.
-    tids.push_back(outTid);
-    lvls.push_back(*outLvl);
+    tidLvls.push_back(env.makeTensorLevel(outTid, *outLvl));
   }
 
   assert(numloopCond > 0);
@@ -1653,29 +1645,27 @@ static std::pair<Operation *, bool> startLoop(CodegenEnv &env,
                                               OpBuilder &builder, LoopOrd at,
                                               LatPointId li, bool needsUniv) {
   // The set of tensors + lvls to generate loops on
-  SmallVector<TensorId> tids, affineTids;
-  SmallVector<Level> lvls, affineLvls;
+  SmallVector<TensorLevel> tidLvls;
   // The set of dense tensors with non-trivial affine expression that just
   // becomes invariant and the address shall now be generated at the current
   // level.
-  SmallVector<AffineExpr> affines;
-  bool isSingleCond = translateBitsToTidLvlPairs(
-      env, li, env.topSortAt(at), tids, lvls, affineTids, affineLvls, affines);
+  SmallVector<std::pair<TensorLevel, AffineExpr>> affineTidLvls;
+  bool isSingleCond = translateBitsToTidLvlPairs(env, li, env.topSortAt(at),
+                                                 tidLvls, affineTidLvls);
 
   // Emit the for/while-loop control.
-  Operation *loop =
-      genLoop(env, builder, at, needsUniv, tids, lvls, isSingleCond);
+  Operation *loop = genLoop(env, builder, at, needsUniv, tidLvls, isSingleCond);
   Location loc = env.op().getLoc();
-  for (auto [tid, lvl, exp] : llvm::zip(affineTids, affineLvls, affines)) {
-    env.emitter().genDenseAffineAddress(builder, loc, tid, lvl, exp);
+  for (auto [tidLvl, exp] : affineTidLvls) {
+    env.emitter().genDenseAffineAddress(builder, loc, tidLvl, exp);
   }
 
   // Until now, we have entered every <tid, lvl> pair in {cond, extra,
   // affine}Tids/Lvls. The addresses of the upcoming levels which are dependent
   // on constant affines expression may now be determined.
-  auto allTids = llvm::concat<TensorId>(tids, affineTids);
-  auto allLvls = llvm::concat<Level>(lvls, affineLvls);
-  for (auto [tid, lvl] : llvm::zip(allTids, allLvls)) {
+  auto allTidLvls =
+      llvm::concat<TensorLevel>(tidLvls, llvm::make_first_range(affineTidLvls));
+  for (auto [tid, lvl] : env.unpackTensorLevelRange(allTidLvls)) {
     if (tid != env.merger().getOutTensorID())
       genConstantDenseAddressFromLevel(env, builder, tid, lvl + 1);
   }