[Mlir-commits] [mlir] [mlir][sparse] implement non-permutation MapRef encoding (PR #69406)

Wed Oct 18 11:52:34 PDT 2023

https://github.com/aartbik updated https://github.com/llvm/llvm-project/pull/69406

>From 20f86596420141e6d1867b7cda44625cdd83009b Mon Sep 17 00:00:00 2001
From: Aart Bik <ajcbik at google.com>
Date: Tue, 17 Oct 2023 18:11:45 -0700
Subject: [PATCH 1/9] [mlir][sparse] implement non-permutation MapRef encoding

This enables reading block sparse from file using libgen!
(and soon also direct IR codegen)
---
 .../mlir/Dialect/SparseTensor/IR/Enums.h      | 23 +++++
 .../ExecutionEngine/SparseTensor/MapRef.h     | 56 +++++++++---
 .../SparseTensor/Transforms/CodegenUtils.cpp  | 83 ++++++++++++++----
 .../ExecutionEngine/SparseTensor/MapRef.cpp   | 44 ++++++++--
 .../ExecutionEngine/SparseTensor/Storage.cpp  |  4 +-
 .../ExecutionEngine/SparseTensorRuntime.cpp   |  8 +-
 .../Dialect/SparseTensor/CPU/block.mlir       | 85 +++++++++++++++++++
 mlir/test/Integration/data/block.mtx          | 10 +++
 8 files changed, 273 insertions(+), 40 deletions(-)
 create mode 100755 mlir/test/Integration/Dialect/SparseTensor/CPU/block.mlir
 create mode 100755 mlir/test/Integration/data/block.mtx

diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
index 08887abcd0f1055..b59b7c8998eb26c 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
@@ -443,6 +443,29 @@ static_assert((isUniqueDLT(DimLevelType::Dense) &&
                !isUniqueDLT(DimLevelType::LooseCompressedNuNo)),
               "isUniqueDLT definition is broken");
 
+/// Bit manipulations for affine encoding.
+constexpr uint64_t encodeDim(uint64_t i, uint64_t cf, uint64_t cm) {
+  assert(i <= 0xffffffffu && cf <= 0x3fffffffu && cm <= 0x3fffffffu);
+  if (cf != 0)
+    return (0x1L << 62) | (cf << 32) | i;
+  if (cm != 0)
+    return (0x2L << 62) | (cm << 32) | i;
+  return i;
+}
+constexpr uint64_t encodeLvl(uint64_t i, uint64_t c, uint64_t ii) {
+  assert(i <= 0xffffffffu && c <= 0xffffu && ii <= 0xffffu);
+  if (c != 0)
+    return (0x3L << 62) | (c << 32) | (ii << 48) | i;
+  return i;
+}
+constexpr bool isEncodedFloor(uint64_t v) { return (v >> 62) == 0x01; }
+constexpr bool isEncodedMod(uint64_t v) { return (v >> 62) == 0x02; }
+constexpr bool isEncodedMul(uint64_t v) { return (v >> 62) == 0x03; }
+constexpr uint64_t decodeIndex(uint64_t v) { return v & 0xffffffffu; }
+constexpr uint64_t decodeConst(uint64_t v) { return (v >> 32) & 0x3fffffffu; }
+constexpr uint64_t decodeMulc(uint64_t v) { return (v >> 32) & 0xffffu; }
+constexpr uint64_t decodeMuli(uint64_t v) { return (v >> 48) & 0xffffu; }
+
 } // namespace sparse_tensor
 } // namespace mlir
 
diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h
index 22ae70a61d95eff..5631b59abe9c8fb 100644
--- a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h
@@ -30,6 +30,23 @@ namespace sparse_tensor {
 /// The pushforward/backward operations are fast for (1) but incur some obvious
 /// overhead for situation (2).
 ///
+/// Note that because the indices in the mappings refer to dimensions
+/// and levels (and *not* the sizes of these dimensions and levels), the
+/// 64-bit encoding gives ample room for a compact encoding of affine
+/// operations in the higher 32-bit.
+///
+/// The compact encoding is as follows:
+///
+/// | 00       |                             main index |  e.g. i
+/// | 01 floor |       constant | 32-bit for main index |  e.g. i floor c
+/// | 10 mod   |       constant | 32-bit for main index |  e.g. i mod c
+/// | 11 mul   | index/constant | 32-bit for main index |  e.g. i + 2 * ii
+///
+/// This encoding provides sufficient generality for currently supported
+/// sparse tensor types. To generalize this more, we will need to provide
+/// a broader encoding scheme for affine functions. Also, the library
+/// encoding may be replaced with pure "direct-IR" code in the future.
+///
 class MapRef final {
 public:
   MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, const uint64_t *l2d);
@@ -38,13 +55,22 @@ class MapRef final {
   // Push forward maps from dimensions to levels.
   //
 
-  template <typename T>
-  inline void pushforward(const T *in, T *out) const {
+  // Map from dimRank in to lvlRank out.
+  template <typename T> inline void pushforward(const T *in, T *out) const {
     if (isPermutation) {
-      for (uint64_t i = 0; i < lvlRank; ++i)
-        out[i] = in[lvl2dim[i]];
+      for (uint64_t l = 0; l < lvlRank; l++) {
+        out[l] = in[dim2lvl[l]];
+      }
     } else {
-      assert(0 && "coming soon");
+      uint64_t i, c;
+      for (uint64_t l = 0; l < lvlRank; l++)
+        if (isFloor(l, i, c)) {
+          out[l] = in[i] / c;
+        } else if (isMod(l, i, c)) {
+          out[l] = in[i] % c;
+        } else {
+          out[l] = in[dim2lvl[l]];
+        }
     }
   }
 
@@ -52,13 +78,19 @@ class MapRef final {
   // Push backward maps from levels to dimensions.
   //
 
-  template <typename T>
-  inline void pushbackward(const T *in, T *out) const {
+  // Map from lvlRank in to dimRank out.
+  template <typename T> inline void pushbackward(const T *in, T *out) const {
     if (isPermutation) {
-      for (uint64_t i = 0; i < dimRank; ++i)
-        out[i] = in[dim2lvl[i]];
+      for (uint64_t d = 0; d < dimRank; d++)
+        out[d] = in[lvl2dim[d]];
     } else {
-      assert(0 && "coming soon");
+      uint64_t i, c, ii;
+      for (uint64_t d = 0; d < dimRank; d++)
+        if (isMul(d, i, c, ii)) {
+          out[d] = in[i] + c * in[ii];
+        } else {
+          out[d] = in[lvl2dim[d]];
+        }
     }
   }
 
@@ -68,6 +100,10 @@ class MapRef final {
 private:
   bool isPermutationMap() const;
 
+  bool isFloor(uint64_t l, uint64_t &i, uint64_t &c) const;
+  bool isMod(uint64_t l, uint64_t &i, uint64_t &c) const;
+  bool isMul(uint64_t d, uint64_t &i, uint64_t &c, uint64_t &ii) const;
+
   const uint64_t dimRank;
   const uint64_t lvlRank;
   const uint64_t *const dim2lvl; // non-owning pointer
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
index 298ff098835564d..88677c09615101d 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
@@ -675,9 +675,9 @@ Value sparse_tensor::genMapBuffers(OpBuilder &builder, Location loc,
                                    /*out*/ Value &lvl2dimBuffer) {
   const Dimension dimRank = stt.getDimRank();
   const Level lvlRank = stt.getLvlRank();
-  // For an identity mapping, the dim2lvl and lvl2dim mappings are
-  // identical as are dimSizes and lvlSizes, so buffers are reused
-  // as much as possible.
+  //  For an identity mapping, the dim2lvl and lvl2dim mappings are
+  //  identical as are dimSizes and lvlSizes, so buffers are reused
+  //  as much as possible.
   if (stt.isIdentity()) {
     assert(dimRank == lvlRank);
     SmallVector<Value> iotaValues;
@@ -688,25 +688,72 @@ Value sparse_tensor::genMapBuffers(OpBuilder &builder, Location loc,
     return dimSizesBuffer;
   }
   // Otherwise, some code needs to be generated to set up the buffers.
-  // TODO: use the lvl2dim once available and deal with non-permutations!
+  // This code deals with permutations as well as non-permutations that
+  // arise from rank changing blocking.
   const auto dimToLvl = stt.getDimToLvl();
-  assert(dimToLvl.isPermutation());
-  SmallVector<Value> dim2lvlValues(dimRank);
-  SmallVector<Value> lvl2dimValues(lvlRank);
+  SmallVector<Value> dim2lvlValues(lvlRank); // for each lvl, expr in dim vars
+  SmallVector<Value> lvl2dimValues(dimRank); // for each dim, expr in lvl vars
   SmallVector<Value> lvlSizesValues(lvlRank);
+  // Generate dim2lvl.
+  assert(lvlRank == dimToLvl.getNumResults());
   for (Level l = 0; l < lvlRank; l++) {
-    // The `d`th source variable occurs in the `l`th result position.
-    Dimension d = dimToLvl.getDimPosition(l);
-    Value lvl = constantIndex(builder, loc, l);
-    Value dim = constantIndex(builder, loc, d);
-    dim2lvlValues[d] = lvl;
-    lvl2dimValues[l] = dim;
-    if (stt.isDynamicDim(d))
-      lvlSizesValues[l] =
-          builder.create<memref::LoadOp>(loc, dimSizesBuffer, dim);
-    else
-      lvlSizesValues[l] = dimShapesValues[d];
+    AffineExpr exp = dimToLvl.getResult(l);
+    // We expect:
+    //    (1) l = d
+    //    (2) l = d / c
+    //    (3) l = d % c
+    Dimension d = 0;
+    uint64_t cf = 0, cm = 0;
+    switch (exp.getKind()) {
+    case AffineExprKind::DimId:
+      d = exp.cast<AffineDimExpr>().getPosition();
+      break;
+    case AffineExprKind::FloorDiv:
+      d = exp.cast<AffineBinaryOpExpr>()
+              .getLHS()
+              .cast<AffineDimExpr>()
+              .getPosition();
+      cf = exp.cast<AffineBinaryOpExpr>()
+               .getRHS()
+               .cast<AffineConstantExpr>()
+               .getValue();
+      break;
+    case AffineExprKind::Mod:
+      d = exp.cast<AffineBinaryOpExpr>()
+              .getLHS()
+              .cast<AffineDimExpr>()
+              .getPosition();
+      cm = exp.cast<AffineBinaryOpExpr>()
+               .getRHS()
+               .cast<AffineConstantExpr>()
+               .getValue();
+      break;
+    default:
+      llvm::report_fatal_error("unsupported dim2lvl in sparse tensor type");
+    }
+    llvm::dbgs() << "ENCODE " << d << " " << cf << " " << cm << " into "
+                 << encodeDim(d, cf, cm) << "\n";
+    dim2lvlValues[l] = constantIndex(builder, loc, encodeDim(d, cf, cm));
+    lvl2dimValues[d] = constantIndex(builder, loc, l); // FIXME, use lvlToDim
+    // Compute the level sizes.
+    //    (1) l = d        : size(d)
+    //    (2) l = d / c    : size(d) / c
+    //    (3) l = d % c    : c
+    Value lvlSz;
+    if (cm == 0) {
+      lvlSz = dimShapesValues[d];
+      if (stt.isDynamicDim(d))
+        lvlSz = builder.create<memref::LoadOp>(loc, dimSizesBuffer,
+                                               constantIndex(builder, loc, d));
+      if (cf != 0)
+        lvlSz = builder.create<arith::DivUIOp>(loc, lvlSz,
+                                               constantIndex(builder, loc, cf));
+    } else {
+      lvlSz = constantIndex(builder, loc, cm);
+    }
+    lvlSizesValues[l] = lvlSz;
   }
+  // Return buffers.
   dim2lvlBuffer = allocaBuffer(builder, loc, dim2lvlValues);
   lvl2dimBuffer = allocaBuffer(builder, loc, lvl2dimValues);
   return allocaBuffer(builder, loc, lvlSizesValues);
diff --git a/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp
index ee4d6fa0d34b491..ace6ac8152a29cb 100644
--- a/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp
@@ -7,14 +7,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/ExecutionEngine/SparseTensor/MapRef.h"
+#include "mlir/Dialect/SparseTensor/IR/Enums.h"
 
 mlir::sparse_tensor::MapRef::MapRef(uint64_t d, uint64_t l, const uint64_t *d2l,
                                     const uint64_t *l2d)
     : dimRank(d), lvlRank(l), dim2lvl(d2l), lvl2dim(l2d),
       isPermutation(isPermutationMap()) {
   if (isPermutation) {
-    for (uint64_t i = 0; i < dimRank; i++)
-      assert(lvl2dim[dim2lvl[i]] == i);
+    for (uint64_t l = 0; l < lvlRank; l++)
+      assert(lvl2dim[dim2lvl[l]] == l);
   }
 }
 
@@ -22,11 +23,42 @@ bool mlir::sparse_tensor::MapRef::isPermutationMap() const {
   if (dimRank != lvlRank)
     return false;
   std::vector<bool> seen(dimRank, false);
-  for (uint64_t i = 0; i < dimRank; i++) {
-    const uint64_t j = dim2lvl[i];
-    if (j >= dimRank || seen[j])
+  for (uint64_t l = 0; l < lvlRank; l++) {
+    const uint64_t d = dim2lvl[l];
+    if (d >= dimRank || seen[d])
       return false;
-    seen[j] = true;
+    seen[d] = true;
   }
   return true;
 }
+
+bool mlir::sparse_tensor::MapRef::isFloor(uint64_t l, uint64_t &i,
+                                          uint64_t &c) const {
+  if (isEncodedFloor(dim2lvl[l])) {
+    i = decodeIndex(dim2lvl[l]);
+    c = decodeConst(dim2lvl[l]);
+    return true;
+  }
+  return false;
+}
+
+bool mlir::sparse_tensor::MapRef::isMod(uint64_t l, uint64_t &i,
+                                        uint64_t &c) const {
+  if (isEncodedMod(dim2lvl[l])) {
+    i = decodeIndex(dim2lvl[l]);
+    c = decodeConst(dim2lvl[l]);
+    return true;
+  }
+  return false;
+}
+
+bool mlir::sparse_tensor::MapRef::isMul(uint64_t d, uint64_t &i, uint64_t &c,
+                                        uint64_t &ii) const {
+  if (isEncodedMul(lvl2dim[d])) {
+    i = decodeIndex(lvl2dim[d]);
+    c = decodeMulc(lvl2dim[d]);
+    ii = decodeMuli(lvl2dim[d]);
+    return true;
+  }
+  return false;
+}
diff --git a/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp b/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp
index f5890ebb6f3ff6f..40805a179d4b385 100644
--- a/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp
@@ -24,8 +24,8 @@ SparseTensorStorageBase::SparseTensorStorageBase( // NOLINT
     : dimSizes(dimSizes, dimSizes + dimRank),
       lvlSizes(lvlSizes, lvlSizes + lvlRank),
       lvlTypes(lvlTypes, lvlTypes + lvlRank),
-      dim2lvlVec(dim2lvl, dim2lvl + dimRank),
-      lvl2dimVec(lvl2dim, lvl2dim + lvlRank),
+      dim2lvlVec(dim2lvl, dim2lvl + lvlRank),
+      lvl2dimVec(lvl2dim, lvl2dim + dimRank),
       map(dimRank, lvlRank, dim2lvlVec.data(), lvl2dimVec.data()) {
   assert(dimSizes && lvlSizes && lvlTypes && dim2lvl && lvl2dim);
   // Validate dim-indexed parameters.
diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp
index 36d888a08de6d60..7a6756e689b27be 100644
--- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp
@@ -185,8 +185,8 @@ void *_mlir_ciface_newSparseTensor( // NOLINT
   const uint64_t dimRank = MEMREF_GET_USIZE(dimSizesRef);
   const uint64_t lvlRank = MEMREF_GET_USIZE(lvlSizesRef);
   ASSERT_USIZE_EQ(lvlTypesRef, lvlRank);
-  ASSERT_USIZE_EQ(dim2lvlRef, dimRank);
-  ASSERT_USIZE_EQ(lvl2dimRef, lvlRank);
+  ASSERT_USIZE_EQ(dim2lvlRef, lvlRank);
+  ASSERT_USIZE_EQ(lvl2dimRef, dimRank);
   const index_type *dimSizes = MEMREF_GET_PAYLOAD(dimSizesRef);
   const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef);
   const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef);
@@ -423,10 +423,10 @@ void _mlir_ciface_getSparseTensorReaderDimSizes(
     ASSERT_NO_STRIDE(cref);                                                    \
     ASSERT_NO_STRIDE(vref);                                                    \
     const uint64_t dimRank = reader.getRank();                                 \
-    const uint64_t lvlRank = MEMREF_GET_USIZE(lvl2dimRef);                     \
+    const uint64_t lvlRank = MEMREF_GET_USIZE(dim2lvlRef);                     \
     const uint64_t cSize = MEMREF_GET_USIZE(cref);                             \
     const uint64_t vSize = MEMREF_GET_USIZE(vref);                             \
-    ASSERT_USIZE_EQ(dim2lvlRef, dimRank);                                      \
+    ASSERT_USIZE_EQ(lvl2dimRef, dimRank);                                      \
     assert(cSize >= lvlRank * vSize);                                          \
     assert(vSize >= reader.getNSE() && "Not enough space in buffers");         \
     (void)dimRank;                                                             \
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/block.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/block.mlir
new file mode 100755
index 000000000000000..d0b5e77bd4a724e
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/block.mlir
@@ -0,0 +1,85 @@
+//--------------------------------------------------------------------------------------------------
+// WHEN CREATING A NEW TEST, PLEASE JUST COPY & PASTE WITHOUT EDITS.
+//
+// Set-up that's shared across all tests in this directory. In principle, this
+// config could be moved to lit.local.cfg. However, there are downstream users that
+//  do not use these LIT config files. Hence why this is kept inline.
+//
+// DEFINE: %{sparse_compiler_opts} = enable-runtime-library=true
+// DEFINE: %{sparse_compiler_opts_sve} = enable-arm-sve=true %{sparse_compiler_opts}
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler="%{sparse_compiler_opts}"
+// DEFINE: %{compile_sve} = mlir-opt %s --sparse-compiler="%{sparse_compiler_opts_sve}"
+// DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils
+// DEFINE: %{run_opts} = -e entry -entry-point-result=void
+// DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs}
+// DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs}
+//
+// DEFINE: %{env} =
+//--------------------------------------------------------------------------------------------------
+
+// REDEFINE: %{env} = TENSOR0="%mlir_src_dir/test/Integration/data/block.mtx"
+// RUN: %{compile} | env %{env} %{run} | FileCheck %s
+//
+// TODO: enable!
+// Do the same run, but now with direct IR generation.
+// REDEFINE: %{sparse_compiler_opts} = enable-runtime-library=false
+// R_UN: %{compile} | env %{env} %{run} | FileCheck %s
+
+#BSR = #sparse_tensor.encoding<{
+  map = (i, j) ->
+    ( i floordiv 2 : dense
+    , j floordiv 2 : compressed
+    , i mod 2 : dense
+    , j mod 2 : dense
+    )
+}>
+
+!Filename = !llvm.ptr<i8>
+
+//
+// Example 2x2 block storage:
+//
+//  +-----+-----+-----+    +-----+-----+-----+
+//  | 1 2 | . . | 4 . |    | 1 2 |     | 4 0 |
+//  | . 3 | . . | . 5 |    | 0 3 |     | 0 5 |
+//  +-----+-----+-----+ => +-----+-----+-----+
+//  | . . | 6 7 | . . |    |     | 6 7 |     |
+//  | . . | 8 . | . . |    |     | 8 0 |     |
+//  +-----+-----+-----+    +-----+-----+-----+
+//
+// Stored as:
+//
+//    positions[1]   : 0 2 3
+//    coordinates[1] : 0 2 1
+//    values         : 1.000000 2.000000 0.000000 3.000000 4.000000 0.000000 0.000000 5.000000 6.000000 7.000000 8.000000 0.000000
+//
+module {
+
+  func.func private @getTensorFilename(index) -> (!Filename)
+
+  func.func @entry() {
+    %c0 = arith.constant 0   : index
+    %f0 = arith.constant 0.0 : f64
+
+    %fileName = call @getTensorFilename(%c0) : (index) -> (!Filename)
+    %A = sparse_tensor.new %fileName : !Filename to tensor<?x?xf64, #BSR>
+
+    // CHECK:      ( 0, 2, 3 )
+    // CHECK-NEXT: ( 0, 2, 1 )
+    // CHECK-NEXT: ( 1, 2, 0, 3, 4, 0, 0, 5, 6, 7, 8, 0 )
+    %pos = sparse_tensor.positions %A {level = 1 : index } : tensor<?x?xf64, #BSR> to memref<?xindex>
+    %vecp = vector.transfer_read %pos[%c0], %c0 : memref<?xindex>, vector<3xindex>
+    vector.print %vecp : vector<3xindex>
+    %crd = sparse_tensor.coordinates %A {level = 1 : index } : tensor<?x?xf64, #BSR> to memref<?xindex>
+    %vecc = vector.transfer_read %crd[%c0], %c0 : memref<?xindex>, vector<3xindex>
+    vector.print %vecc : vector<3xindex>
+    %val = sparse_tensor.values %A : tensor<?x?xf64, #BSR> to memref<?xf64>
+    %vecv = vector.transfer_read %val[%c0], %f0 : memref<?xf64>, vector<12xf64>
+    vector.print %vecv : vector<12xf64>
+
+    // Release the resources.
+    bufferization.dealloc_tensor %A: tensor<?x?xf64, #BSR>
+
+    return
+  }
+}
diff --git a/mlir/test/Integration/data/block.mtx b/mlir/test/Integration/data/block.mtx
new file mode 100755
index 000000000000000..9bb3ea7d50a104b
--- /dev/null
+++ b/mlir/test/Integration/data/block.mtx
@@ -0,0 +1,10 @@
+%%MatrixMarket matrix coordinate real general
+4 6 8
+1 1 1.0
+1 2 2.0
+1 5 4.0
+2 2 3.0
+2 6 5.0
+3 3 6.0
+3 4 7.0
+4 3 8.0

>From 41fa9288472143c21b48c3756bc4eeb2b0bef33f Mon Sep 17 00:00:00 2001
From: Aart Bik <ajcbik at google.com>
Date: Tue, 17 Oct 2023 18:18:25 -0700
Subject: [PATCH 2/9] minor edits

---
 mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h   | 6 ++++--
 mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp | 2 --
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h
index 5631b59abe9c8fb..36e04d82824b260 100644
--- a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h
@@ -56,7 +56,8 @@ class MapRef final {
   //
 
   // Map from dimRank in to lvlRank out.
-  template <typename T> inline void pushforward(const T *in, T *out) const {
+  template <typename T>
+  inline void pushforward(const T *in, T *out) const {
     if (isPermutation) {
       for (uint64_t l = 0; l < lvlRank; l++) {
         out[l] = in[dim2lvl[l]];
@@ -79,7 +80,8 @@ class MapRef final {
   //
 
   // Map from lvlRank in to dimRank out.
-  template <typename T> inline void pushbackward(const T *in, T *out) const {
+  template <typename T>
+  inline void pushbackward(const T *in, T *out) const {
     if (isPermutation) {
       for (uint64_t d = 0; d < dimRank; d++)
         out[d] = in[lvl2dim[d]];
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
index 88677c09615101d..de3243848e4222a 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
@@ -731,8 +731,6 @@ Value sparse_tensor::genMapBuffers(OpBuilder &builder, Location loc,
     default:
       llvm::report_fatal_error("unsupported dim2lvl in sparse tensor type");
     }
-    llvm::dbgs() << "ENCODE " << d << " " << cf << " " << cm << " into "
-                 << encodeDim(d, cf, cm) << "\n";
     dim2lvlValues[l] = constantIndex(builder, loc, encodeDim(d, cf, cm));
     lvl2dimValues[d] = constantIndex(builder, loc, l); // FIXME, use lvlToDim
     // Compute the level sizes.

>From 727a482539175b138f799f619b2c057172b9d5ce Mon Sep 17 00:00:00 2001
From: Aart Bik <ajcbik at google.com>
Date: Tue, 17 Oct 2023 18:20:18 -0700
Subject: [PATCH 3/9] include assert

---
 mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
index b59b7c8998eb26c..864ac052d856153 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
@@ -31,6 +31,7 @@
 // NOTE: Client code will need to include "mlir/ExecutionEngine/Float16bits.h"
 // if they want to use the `MLIR_SPARSETENSOR_FOREVERY_V` macro.
 
+#include <cassert>
 #include <cinttypes>
 #include <complex>
 #include <optional>

>From 6280ba338f40b7122394e0dde6c72d43ea3dee76 Mon Sep 17 00:00:00 2001
From: Aart Bik <ajcbik at google.com>
Date: Tue, 17 Oct 2023 21:21:24 -0700
Subject: [PATCH 4/9] minor edit

---
 mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
index 864ac052d856153..51d2cf8c90398c6 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
@@ -454,7 +454,7 @@ constexpr uint64_t encodeDim(uint64_t i, uint64_t cf, uint64_t cm) {
   return i;
 }
 constexpr uint64_t encodeLvl(uint64_t i, uint64_t c, uint64_t ii) {
-  assert(i <= 0xffffffffu && c <= 0xffffu && ii <= 0xffffu);
+  assert(i <= 0xffffffffu && c <= 0xffffu && ii <= 0x3fffu);
   if (c != 0)
     return (0x3L << 62) | (c << 32) | (ii << 48) | i;
   return i;
@@ -465,7 +465,7 @@ constexpr bool isEncodedMul(uint64_t v) { return (v >> 62) == 0x03; }
 constexpr uint64_t decodeIndex(uint64_t v) { return v & 0xffffffffu; }
 constexpr uint64_t decodeConst(uint64_t v) { return (v >> 32) & 0x3fffffffu; }
 constexpr uint64_t decodeMulc(uint64_t v) { return (v >> 32) & 0xffffu; }
-constexpr uint64_t decodeMuli(uint64_t v) { return (v >> 48) & 0xffffu; }
+constexpr uint64_t decodeMuli(uint64_t v) { return (v >> 48) & 0x3fffu; }
 
 } // namespace sparse_tensor
 } // namespace mlir

>From c661ee266f9ed8b236e1a3885ef4bfb91c48dbee Mon Sep 17 00:00:00 2001
From: Aart Bik <ajcbik at google.com>
Date: Tue, 17 Oct 2023 21:24:06 -0700
Subject: [PATCH 5/9] whitespace

---
 mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
index de3243848e4222a..98b412c8ec9eb5b 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
@@ -675,9 +675,9 @@ Value sparse_tensor::genMapBuffers(OpBuilder &builder, Location loc,
                                    /*out*/ Value &lvl2dimBuffer) {
   const Dimension dimRank = stt.getDimRank();
   const Level lvlRank = stt.getLvlRank();
-  //  For an identity mapping, the dim2lvl and lvl2dim mappings are
-  //  identical as are dimSizes and lvlSizes, so buffers are reused
-  //  as much as possible.
+  // For an identity mapping, the dim2lvl and lvl2dim mappings are
+  // identical as are dimSizes and lvlSizes, so buffers are reused
+  // as much as possible.
   if (stt.isIdentity()) {
     assert(dimRank == lvlRank);
     SmallVector<Value> iotaValues;

>From b86d4f4b338160b9326353a67cde05ebb8bb5d23 Mon Sep 17 00:00:00 2001
From: Aart Bik <ajcbik at google.com>
Date: Wed, 18 Oct 2023 10:00:48 -0700
Subject: [PATCH 6/9] document bit encoding better

---
 .../mlir/Dialect/SparseTensor/IR/Enums.h      | 57 ++++++++++++++-----
 .../ExecutionEngine/SparseTensor/MapRef.h     | 17 ------
 2 files changed, 42 insertions(+), 32 deletions(-)

diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
index 51d2cf8c90398c6..991de4a4e406bf8 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
@@ -445,27 +445,54 @@ static_assert((isUniqueDLT(DimLevelType::Dense) &&
               "isUniqueDLT definition is broken");
 
 /// Bit manipulations for affine encoding.
+///
+/// Note that because the indices in the mappings refer to dimensions
+/// and levels (and *not* the sizes of these dimensions and levels), the
+/// 64-bit encoding gives ample room for a compact encoding of affine
+/// operations in the higher bits. Pure permutations still allow for
+/// 60-bit indices. But non-permutations reserve 20-bits for the
+/// potential three components (index i, constant, index ii).
+///
+/// The compact encoding is as follows:
+///
+///  0xffffffffffffffff
+/// |0000      |                        60-bit idx|  e.g. i + 2 * ii e.g. i
+/// |0001 floor|           20-bit const|20-bit idx|  e.g. i + 2 * ii e.g. i floor c
+/// |0010 mod  |           20-bit const|20-bit idx|  e.g. i + 2 * ii e.g. i mod c
+/// |0011 mul  |20-bit idx|20-bit const|20-bit idx|  e.g. i + 2 * ii
+///
+/// This encoding provides sufficient generality for currently supported
+/// sparse tensor types. To generalize this more, we will need to provide
+/// a broader encoding scheme for affine functions. Also, the library
+/// encoding may be replaced with pure "direct-IR" code in the future.
+///
 constexpr uint64_t encodeDim(uint64_t i, uint64_t cf, uint64_t cm) {
-  assert(i <= 0xffffffffu && cf <= 0x3fffffffu && cm <= 0x3fffffffu);
-  if (cf != 0)
-    return (0x1L << 62) | (cf << 32) | i;
-  if (cm != 0)
-    return (0x2L << 62) | (cm << 32) | i;
+  if (cf != 0) {
+    assert(cf <= 0xfffff && cm == 0 && i <= 0xfffff);
+    return (0x01L << 60) | (cf << 20) | i;
+  }
+  if (cm != 0) {
+    assert(cf == 0 && cm <= 0xfffff i <= 0xfffff);
+    return (0x02L << 60) | (cm << 20) | i;
+  }
+  assert(i <= 0x0fffffffffffffffu);
   return i;
 }
 constexpr uint64_t encodeLvl(uint64_t i, uint64_t c, uint64_t ii) {
-  assert(i <= 0xffffffffu && c <= 0xffffu && ii <= 0x3fffu);
-  if (c != 0)
-    return (0x3L << 62) | (c << 32) | (ii << 48) | i;
+  if (c != 0) {
+    assert(c <= 0xfffff && ii <= 0xfffff && i <= 0xfffff);
+    return (0x03L << 60) | (c << 20) | (ii << 40) | i;
+  }
+  assert(i <= 0x0fffffffffffffffu);
   return i;
 }
-constexpr bool isEncodedFloor(uint64_t v) { return (v >> 62) == 0x01; }
-constexpr bool isEncodedMod(uint64_t v) { return (v >> 62) == 0x02; }
-constexpr bool isEncodedMul(uint64_t v) { return (v >> 62) == 0x03; }
-constexpr uint64_t decodeIndex(uint64_t v) { return v & 0xffffffffu; }
-constexpr uint64_t decodeConst(uint64_t v) { return (v >> 32) & 0x3fffffffu; }
-constexpr uint64_t decodeMulc(uint64_t v) { return (v >> 32) & 0xffffu; }
-constexpr uint64_t decodeMuli(uint64_t v) { return (v >> 48) & 0x3fffu; }
+constexpr bool isEncodedFloor(uint64_t v) { return (v >> 60) == 0x01; }
+constexpr bool isEncodedMod(uint64_t v) { return (v >> 60) == 0x02; }
+constexpr bool isEncodedMul(uint64_t v) { return (v >> 60) == 0x03; }
+constexpr uint64_t decodeIndex(uint64_t v)  { return v & 0xfffffu; }
+constexpr uint64_t decodeCons(uint64_t v) { return (v >> 20) & 0xfffffu; }
+constexpr uint64_t decodeMulc(uint64_t v) { return (v >> 20) & 0xfffffu; }
+constexpr uint64_t decodeMuli(uint64_t v) { return (v >> 40) & 0xfffffu; }
 
 } // namespace sparse_tensor
 } // namespace mlir
diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h
index 36e04d82824b260..5c09aa4e4b60c16 100644
--- a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h
@@ -30,23 +30,6 @@ namespace sparse_tensor {
 /// The pushforward/backward operations are fast for (1) but incur some obvious
 /// overhead for situation (2).
 ///
-/// Note that because the indices in the mappings refer to dimensions
-/// and levels (and *not* the sizes of these dimensions and levels), the
-/// 64-bit encoding gives ample room for a compact encoding of affine
-/// operations in the higher 32-bit.
-///
-/// The compact encoding is as follows:
-///
-/// | 00       |                             main index |  e.g. i
-/// | 01 floor |       constant | 32-bit for main index |  e.g. i floor c
-/// | 10 mod   |       constant | 32-bit for main index |  e.g. i mod c
-/// | 11 mul   | index/constant | 32-bit for main index |  e.g. i + 2 * ii
-///
-/// This encoding provides sufficient generality for currently supported
-/// sparse tensor types. To generalize this more, we will need to provide
-/// a broader encoding scheme for affine functions. Also, the library
-/// encoding may be replaced with pure "direct-IR" code in the future.
-///
 class MapRef final {
 public:
   MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, const uint64_t *l2d);

>From c511c4bfa0574897aab3ac3c59b8099b98bba271 Mon Sep 17 00:00:00 2001
From: Aart Bik <ajcbik at google.com>
Date: Wed, 18 Oct 2023 10:07:19 -0700
Subject: [PATCH 7/9] edit

---
 mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
index 991de4a4e406bf8..ddf9deef0471219 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
@@ -456,10 +456,10 @@ static_assert((isUniqueDLT(DimLevelType::Dense) &&
 /// The compact encoding is as follows:
 ///
 ///  0xffffffffffffffff
-/// |0000      |                        60-bit idx|  e.g. i + 2 * ii e.g. i
-/// |0001 floor|           20-bit const|20-bit idx|  e.g. i + 2 * ii e.g. i floor c
-/// |0010 mod  |           20-bit const|20-bit idx|  e.g. i + 2 * ii e.g. i mod c
-/// |0011 mul  |20-bit idx|20-bit const|20-bit idx|  e.g. i + 2 * ii
+/// |0000      |                        60-bit idx| e.g. i
+/// |0001 floor|           20-bit const|20-bit idx| e.g. i floor c
+/// |0010 mod  |           20-bit const|20-bit idx| e.g. i mod c
+/// |0011 mul  |20-bit idx|20-bit const|20-bit idx| e.g. i + c * ii
 ///
 /// This encoding provides sufficient generality for currently supported
 /// sparse tensor types. To generalize this more, we will need to provide
@@ -472,7 +472,7 @@ constexpr uint64_t encodeDim(uint64_t i, uint64_t cf, uint64_t cm) {
     return (0x01L << 60) | (cf << 20) | i;
   }
   if (cm != 0) {
-    assert(cf == 0 && cm <= 0xfffff i <= 0xfffff);
+    assert(cm <= 0xfffff && i <= 0xfffff);
     return (0x02L << 60) | (cm << 20) | i;
   }
   assert(i <= 0x0fffffffffffffffu);

>From ad747b22c5c7a6357f8f67a0b5c1a62f573911cf Mon Sep 17 00:00:00 2001
From: Aart Bik <ajcbik at google.com>
Date: Wed, 18 Oct 2023 10:11:24 -0700
Subject: [PATCH 8/9] edit

---
 mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
index ddf9deef0471219..2442232e2844a5e 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
@@ -490,7 +490,7 @@ constexpr bool isEncodedFloor(uint64_t v) { return (v >> 60) == 0x01; }
 constexpr bool isEncodedMod(uint64_t v) { return (v >> 60) == 0x02; }
 constexpr bool isEncodedMul(uint64_t v) { return (v >> 60) == 0x03; }
 constexpr uint64_t decodeIndex(uint64_t v)  { return v & 0xfffffu; }
-constexpr uint64_t decodeCons(uint64_t v) { return (v >> 20) & 0xfffffu; }
+constexpr uint64_t decodeConst(uint64_t v) { return (v >> 20) & 0xfffffu; }
 constexpr uint64_t decodeMulc(uint64_t v) { return (v >> 20) & 0xfffffu; }
 constexpr uint64_t decodeMuli(uint64_t v) { return (v >> 40) & 0xfffffu; }
 

>From 8a4e24b31207a3cc90dc8ffd5be44d30c7a7d5df Mon Sep 17 00:00:00 2001
From: Aart Bik <ajcbik at google.com>
Date: Wed, 18 Oct 2023 11:51:34 -0700
Subject: [PATCH 9/9] clang-format, of course

---
 mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
index 2442232e2844a5e..c65a27567d59d9a 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
@@ -489,7 +489,7 @@ constexpr uint64_t encodeLvl(uint64_t i, uint64_t c, uint64_t ii) {
 constexpr bool isEncodedFloor(uint64_t v) { return (v >> 60) == 0x01; }
 constexpr bool isEncodedMod(uint64_t v) { return (v >> 60) == 0x02; }
 constexpr bool isEncodedMul(uint64_t v) { return (v >> 60) == 0x03; }
-constexpr uint64_t decodeIndex(uint64_t v)  { return v & 0xfffffu; }
+constexpr uint64_t decodeIndex(uint64_t v) { return v & 0xfffffu; }
 constexpr uint64_t decodeConst(uint64_t v) { return (v >> 20) & 0xfffffu; }
 constexpr uint64_t decodeMulc(uint64_t v) { return (v >> 20) & 0xfffffu; }
 constexpr uint64_t decodeMuli(uint64_t v) { return (v >> 40) & 0xfffffu; }