[Mlir-commits] [mlir] [mlir][sparse] refactor dim2lvl/lvl2dim passing into MapRef (PR #68649)

Mon Oct 9 17:58:30 PDT 2023

llvmbot wrote:




@llvm/pr-subscribers-mlir-sparse

Author: Aart Bik (aartbik)

<details>
<summary>Changes</summary>

This revision refactors all "swiss army knife" entry points to pass dim2lvl/lvl2dim mapping, so that the callee can construct a MapRef (shown for SparseTensorStorage class). This is a next step towards completely centralizing mapping code into a single MapRef class.

---

Patch is 32.79 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/68649.diff


4 Files Affected:

- (modified) mlir/include/mlir/ExecutionEngine/SparseTensor/File.h (+1-1) 
- (modified) mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h (+84-125) 
- (modified) mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp (+5-6) 
- (modified) mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp (+9-12) 


``````````diff

diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h
index 9157bfa7e773239..efc3f82d6a307ea 100644
--- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h
@@ -203,7 +203,7 @@ class SparseTensorReader final {
     MapRef map(dimRank, lvlRank, dim2lvl, lvl2dim);
     auto *coo = readCOO<V>(map, lvlSizes);
     auto *tensor = SparseTensorStorage<P, I, V>::newFromCOO(
-        dimRank, getDimSizes(), lvlRank, lvlTypes, lvl2dim, *coo);
+        dimRank, getDimSizes(), lvlRank, lvlTypes, dim2lvl, lvl2dim, *coo);
     delete coo;
     return tensor;
   }
diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
index 0407bccaae8790c..303a41bc471d5d9 100644
--- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
@@ -24,13 +24,8 @@
 #include "mlir/ExecutionEngine/SparseTensor/ArithmeticUtils.h"
 #include "mlir/ExecutionEngine/SparseTensor/COO.h"
 #include "mlir/ExecutionEngine/SparseTensor/ErrorHandling.h"
+#include "mlir/ExecutionEngine/SparseTensor/MapRef.h"
 
-#define ASSERT_VALID_DIM(d)                                                    \
-  assert(d < getDimRank() && "Dimension is out of bounds");
-#define ASSERT_VALID_LVL(l)                                                    \
-  assert(l < getLvlRank() && "Level is out of bounds");
-#define ASSERT_COMPRESSED_LVL(l)                                               \
-  assert(isCompressedLvl(l) && "Level is not compressed");
 #define ASSERT_COMPRESSED_OR_SINGLETON_LVL(l)                                  \
   do {                                                                         \
     const DimLevelType dlt = getLvlType(l);                                    \
@@ -49,9 +44,9 @@ class SparseTensorEnumeratorBase;
 template <typename P, typename C, typename V>
 class SparseTensorEnumerator;
 
-/// Abstract base class for `SparseTensorStorage<P,C,V>`.  This class
+/// Abstract base class for `SparseTensorStorage<P,C,V>`. This class
 /// takes responsibility for all the `<P,C,V>`-independent aspects
-/// of the tensor (e.g., shape, sparsity, permutation).  In addition,
+/// of the tensor (e.g., shape, sparsity, mapping). In addition,
 /// we use function overloading to implement "partial" method
 /// specialization, which the C-API relies on to catch type errors
 /// arising from our use of opaque pointers.
@@ -62,24 +57,20 @@ class SparseTensorEnumerator;
 /// coordinate spaces (and their associated rank, shape, sizes, etc).
 /// Denotationally, we have the *dimensions* of the tensor represented
 /// by this object.  Operationally, we have the *levels* of the storage
-/// representation itself.  We use this "dimension" vs "level" terminology
-/// throughout, since alternative terminology like "tensor-dimension",
-/// "original-dimension", "storage-dimension", etc, is both more verbose
-/// and prone to introduce confusion whenever the qualifiers are dropped.
-/// Where necessary, we use "axis" as the generic term.
+/// representation itself.
 ///
 /// The *size* of an axis is the cardinality of possible coordinate
 /// values along that axis (regardless of which coordinates have stored
-/// element values).  As such, each size must be non-zero since if any
+/// element values). As such, each size must be non-zero since if any
 /// axis has size-zero then the whole tensor would have trivial storage
-/// (since there are no possible coordinates).  Thus we use the plural
+/// (since there are no possible coordinates). Thus we use the plural
 /// term *sizes* for a collection of non-zero cardinalities, and use
-/// this term whenever referring to run-time cardinalities.  Whereas we
+/// this term whenever referring to run-time cardinalities. Whereas we
 /// use the term *shape* for a collection of compile-time cardinalities,
 /// where zero is used to indicate cardinalities which are dynamic (i.e.,
-/// unknown/unspecified at compile-time).  At run-time, these dynamic
+/// unknown/unspecified at compile-time). At run-time, these dynamic
 /// cardinalities will be inferred from or checked against sizes otherwise
-/// specified.  Thus, dynamic cardinalities always have an "immutable but
+/// specified. Thus, dynamic cardinalities always have an "immutable but
 /// unknown" value; so the term "dynamic" should not be taken to indicate
 /// run-time mutability.
 class SparseTensorStorageBase {
@@ -89,25 +80,10 @@ class SparseTensorStorageBase {
 
 public:
   /// Constructs a new sparse-tensor storage object with the given encoding.
-  ///
-  /// Preconditions:
-  /// * `dimSizes`, `lvlSizes`, `lvlTypes`, and `lvl2dim` must be nonnull.
-  /// * `dimSizes` must be valid for `dimRank`.
-  /// * `lvlSizes`, `lvlTypes`, and `lvl2dim` must be valid for `lvlRank`.
-  /// * `lvl2dim` must map `lvlSizes`-coordinates to `dimSizes`-coordinates.
-  ///
-  /// Asserts:
-  /// * `dimRank` and `lvlRank` are nonzero.
-  /// * `dimSizes` and `lvlSizes` contain only nonzero sizes.
   SparseTensorStorageBase(uint64_t dimRank, const uint64_t *dimSizes,
                           uint64_t lvlRank, const uint64_t *lvlSizes,
-                          const DimLevelType *lvlTypes,
+                          const DimLevelType *lvlTypes, const uint64_t *dim2lvl,
                           const uint64_t *lvl2dim);
-  // NOTE: For the most part we only need the `dimRank`.  But we need
-  // `dimSizes` for `toCOO` to support the identity permutation nicely
-  // (i.e., without the caller needing to already know the tensor's
-  // dimension-sizes; e.g., as in `fromMLIRSparseTensor`).
-
   virtual ~SparseTensorStorageBase() = default;
 
   /// Gets the number of tensor-dimensions.
@@ -121,7 +97,7 @@ class SparseTensorStorageBase {
 
   /// Safely looks up the size of the given tensor-dimension.
   uint64_t getDimSize(uint64_t d) const {
-    ASSERT_VALID_DIM(d);
+    assert(d < getDimRank() && "Dimension is out of bounds");
     return dimSizes[d];
   }
 
@@ -130,19 +106,16 @@ class SparseTensorStorageBase {
 
   /// Safely looks up the size of the given storage-level.
   uint64_t getLvlSize(uint64_t l) const {
-    ASSERT_VALID_LVL(l);
+    assert(l < getLvlRank() && "Level is out of bounds");
     return lvlSizes[l];
   }
 
-  /// Gets the level-to-dimension mapping.
-  const std::vector<uint64_t> &getLvl2Dim() const { return lvl2dim; }
-
   /// Gets the level-types array.
   const std::vector<DimLevelType> &getLvlTypes() const { return lvlTypes; }
 
   /// Safely looks up the type of the given level.
   DimLevelType getLvlType(uint64_t l) const {
-    ASSERT_VALID_LVL(l);
+    assert(l < getLvlRank() && "Level is out of bounds");
     return lvlTypes[l];
   }
 
@@ -165,6 +138,10 @@ class SparseTensorStorageBase {
   /// Safely checks if the level is unique.
   bool isUniqueLvl(uint64_t l) const { return isUniqueDLT(getLvlType(l)); }
 
+  /// Gets the level-to-dimension mapping.
+  // TODO: REMOVE THIS
+  const std::vector<uint64_t> &getLvl2Dim() const { return lvl2dimVec; }
+
   /// Allocates a new enumerator.  Callers must make sure to delete
   /// the enumerator when they're done with it. The first argument
   /// is the out-parameter for storing the newly allocated enumerator;
@@ -228,12 +205,14 @@ class SparseTensorStorageBase {
   const std::vector<uint64_t> dimSizes;
   const std::vector<uint64_t> lvlSizes;
   const std::vector<DimLevelType> lvlTypes;
-  const std::vector<uint64_t> lvl2dim;
+  const std::vector<uint64_t> dim2lvlVec;
+  const std::vector<uint64_t> lvl2dimVec;
+  const MapRef map; // non-owning pointers into dim2lvl/lvl2dim vectors
 };
 
 /// A memory-resident sparse tensor using a storage scheme based on
-/// per-level sparse/dense annotations.  This data structure provides
-/// a bufferized form of a sparse tensor type.  In contrast to generating
+/// per-level sparse/dense annotations. This data structure provides
+/// a bufferized form of a sparse tensor type. In contrast to generating
 /// setup methods for each differently annotated sparse tensor, this
 /// method provides a convenient "one-size-fits-all" solution that simply
 /// takes an input tensor and annotations to implement all required setup
@@ -244,58 +223,45 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
   /// Beware that the object is not necessarily guaranteed to be in a
   /// valid state after this constructor alone; e.g., `isCompressedLvl(l)`
   /// doesn't entail `!(positions[l].empty())`.
-  ///
-  /// Preconditions/assertions are as per the `SparseTensorStorageBase` ctor.
   SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes,
                       uint64_t lvlRank, const uint64_t *lvlSizes,
-                      const DimLevelType *lvlTypes, const uint64_t *lvl2dim)
+                      const DimLevelType *lvlTypes, const uint64_t *dim2lvl,
+                      const uint64_t *lvl2dim)
       : SparseTensorStorageBase(dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes,
-                                lvl2dim),
+                                dim2lvl, lvl2dim),
         positions(lvlRank), coordinates(lvlRank), lvlCursor(lvlRank) {}
 
 public:
   /// Constructs a sparse tensor with the given encoding, and allocates
-  /// overhead storage according to some simple heuristics.  When the
+  /// overhead storage according to some simple heuristics. When the
   /// `bool` argument is true and `lvlTypes` are all dense, then this
-  /// ctor will also initialize the values array with zeros.  That
+  /// ctor will also initialize the values array with zeros. That
   /// argument should be true when an empty tensor is intended; whereas
   /// it should usually be false when the ctor will be followed up by
   /// some other form of initialization.
-  ///
-  /// Preconditions/assertions are as per the `SparseTensorStorageBase` ctor.
   SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes,
                       uint64_t lvlRank, const uint64_t *lvlSizes,
-                      const DimLevelType *lvlTypes, const uint64_t *lvl2dim,
-                      bool initializeValuesIfAllDense);
+                      const DimLevelType *lvlTypes, const uint64_t *dim2lvl,
+                      const uint64_t *lvl2dim, bool initializeValuesIfAllDense);
 
   /// Constructs a sparse tensor with the given encoding, and initializes
-  /// the contents from the COO.  This ctor performs the same heuristic
-  /// overhead-storage allocation as the ctor taking a `bool`, and
-  /// has the same preconditions/assertions (where we define `lvlSizes =
-  /// lvlCOO.getDimSizes().data()`), with the following addition:
-  ///
-  /// Asserts:
-  /// * `lvlRank == lvlCOO.getRank()`.
+  /// the contents from the COO. This ctor performs the same heuristic
+  /// overhead-storage allocation as the ctor taking a `bool`.
   SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes,
                       uint64_t lvlRank, const DimLevelType *lvlTypes,
-                      const uint64_t *lvl2dim, SparseTensorCOO<V> &lvlCOO);
+                      const uint64_t *dim2lvl, const uint64_t *lvl2dim,
+                      SparseTensorCOO<V> &lvlCOO);
 
   /// Constructs a sparse tensor with the given encoding, and initializes
-  /// the contents from the enumerator.  This ctor allocates exactly
+  /// the contents from the enumerator. This ctor allocates exactly
   /// the required amount of overhead storage, not using any heuristics.
-  /// Preconditions/assertions are as per the `SparseTensorStorageBase`
-  /// ctor (where we define `lvlSizes = lvlEnumerator.getTrgSizes().data()`),
-  /// with the following addition:
-  ///
-  /// Asserts:
-  /// * `lvlRank == lvlEnumerator.getTrgRank()`.
   SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes,
                       uint64_t lvlRank, const DimLevelType *lvlTypes,
-                      const uint64_t *lvl2dim,
+                      const uint64_t *dim2lvl, const uint64_t *lvl2dim,
                       SparseTensorEnumeratorBase<V> &lvlEnumerator);
 
   /// Constructs a sparse tensor with the given encoding, and initializes
-  /// the contents from the level buffers.  This ctor allocates exactly
+  /// the contents from the level buffers. This ctor allocates exactly
   /// the required amount of overhead storage, not using any heuristics.
   /// It assumes that the data provided by `lvlBufs` can be directly used to
   /// interpret the result sparse tensor and performs *NO* integrity test on the
@@ -303,8 +269,8 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
   /// passed in as a single AoS memory.
   SparseTensorStorage(uint64_t dimRank, const uint64_t *dimSizes,
                       uint64_t lvlRank, const uint64_t *lvlSizes,
-                      const DimLevelType *lvlTypes, const uint64_t *lvl2dim,
-                      const intptr_t *lvlBufs);
+                      const DimLevelType *lvlTypes, const uint64_t *dim2lvl,
+                      const uint64_t *lvl2dim, const intptr_t *lvlBufs);
 
   /// Allocates a new empty sparse tensor. The preconditions/assertions
   /// are as per the `SparseTensorStorageBase` ctor; which is to say,
@@ -313,21 +279,15 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
   static SparseTensorStorage<P, C, V> *
   newEmpty(uint64_t dimRank, const uint64_t *dimSizes, uint64_t lvlRank,
            const uint64_t *lvlSizes, const DimLevelType *lvlTypes,
-           const uint64_t *lvl2dim) {
-    return new SparseTensorStorage<P, C, V>(dimRank, dimSizes, lvlRank,
-                                            lvlSizes, lvlTypes, lvl2dim, true);
+           const uint64_t *dim2lvl, const uint64_t *lvl2dim) {
+    return new SparseTensorStorage<P, C, V>(
+        dimRank, dimSizes, lvlRank, lvlSizes, lvlTypes, dim2lvl, lvl2dim, true);
   }
 
   /// Allocates a new sparse tensor and initializes it from the given COO.
   /// The preconditions are as per the `SparseTensorStorageBase` ctor
   /// (where we define `lvlSizes = lvlCOO.getDimSizes().data()`), but
   /// using the following assertions in lieu of the base ctor's assertions:
-  ///
-  /// Asserts:
-  /// * `dimRank` and `lvlRank` are nonzero.
-  /// * `lvlRank == lvlCOO.getRank()`.
-  /// * `lvlCOO.getDimSizes()` under the `lvl2dim` mapping is a refinement
-  ///   of `dimShape`.
   //
   // TODO: The ability to reconstruct dynamic dimensions-sizes does not
   // easily generalize to arbitrary `lvl2dim` mappings.  When compiling
@@ -338,8 +298,8 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
   // to update the type/preconditions of this factory too.
   static SparseTensorStorage<P, C, V> *
   newFromCOO(uint64_t dimRank, const uint64_t *dimShape, uint64_t lvlRank,
-             const DimLevelType *lvlTypes, const uint64_t *lvl2dim,
-             SparseTensorCOO<V> &lvlCOO);
+             const DimLevelType *lvlTypes, const uint64_t *dim2lvl,
+             const uint64_t *lvl2dim, SparseTensorCOO<V> &lvlCOO);
 
   /// Allocates a new sparse tensor and initializes it with the contents
   /// of another sparse tensor.
@@ -370,8 +330,9 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
   static SparseTensorStorage<P, C, V> *
   newFromSparseTensor(uint64_t dimRank, const uint64_t *dimShape,
                       uint64_t lvlRank, const uint64_t *lvlSizes,
-                      const DimLevelType *lvlTypes, const uint64_t *lvl2dim,
-                      uint64_t srcRank, const uint64_t *src2lvl,
+                      const DimLevelType *lvlTypes,
+                      const uint64_t *src2lvl, // FIXME: dim2lvl,
+                      const uint64_t *lvl2dim, uint64_t srcRank,
                       const SparseTensorStorageBase &source);
 
   /// Allocates a new sparse tensor and initialize it with the data stored level
@@ -380,24 +341,23 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
   /// Precondition:
   /// * as per the `SparseTensorStorageBase` ctor.
   /// * the data integrity stored in `buffers` is guaranteed by users already.
-  static SparseTensorStorage<P, C, V> *
-  packFromLvlBuffers(uint64_t dimRank, const uint64_t *dimShape,
-                     uint64_t lvlRank, const uint64_t *lvlSizes,
-                     const DimLevelType *lvlTypes, const uint64_t *lvl2dim,
-                     uint64_t srcRank, const uint64_t *src2lvl,
-                     const intptr_t *buffers);
+  static SparseTensorStorage<P, C, V> *packFromLvlBuffers(
+      uint64_t dimRank, const uint64_t *dimShape, uint64_t lvlRank,
+      const uint64_t *lvlSizes, const DimLevelType *lvlTypes,
+      const uint64_t *src2lvl, // FIXME: dim2lvl
+      const uint64_t *lvl2dim, uint64_t srcRank, const intptr_t *buffers);
 
   ~SparseTensorStorage() final = default;
 
   /// Partially specialize these getter methods based on template types.
   void getPositions(std::vector<P> **out, uint64_t lvl) final {
     assert(out && "Received nullptr for out parameter");
-    ASSERT_VALID_LVL(lvl);
+    assert(lvl < getLvlRank() && "Level is out of bounds");
     *out = &positions[lvl];
   }
   void getCoordinates(std::vector<C> **out, uint64_t lvl) final {
     assert(out && "Received nullptr for out parameter");
-    ASSERT_VALID_LVL(lvl);
+    assert(lvl < getLvlRank() && "Level is out of bounds");
     *out = &coordinates[lvl];
   }
   void getValues(std::vector<V> **out) final {
@@ -477,12 +437,12 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
 
   /// Allocates a new COO object and initializes it with the contents
   /// of this tensor under the given mapping from the `getDimSizes()`
-  /// coordinate-space to the `trgSizes` coordinate-space.  Callers must
+  /// coordinate-space to the `trgSizes` coordinate-space. Callers must
   /// make sure to delete the COO when they're done with it.
-  ///
-  /// Preconditions/assertions are as per the `SparseTensorEnumerator` ctor.
   SparseTensorCOO<V> *toCOO(uint64_t trgRank, const uint64_t *trgSizes,
-                            uint64_t srcRank, const uint64_t *src2trg) const {
+                            uint64_t srcRank,
+                            const uint64_t *src2trg, // FIXME: dim2lvl
+                            const uint64_t *lvl2dim) const {
     // We inline `newEnumerator` to avoid virtual dispatch and allocation.
     // TODO: use MapRef here too for the translation
     SparseTensorEnumerator<P, C, V> enumerator(*this, trgRank, trgSizes,
@@ -503,7 +463,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
   /// does not check that `pos` is semantically valid (i.e., larger than
   /// the previous position and smaller than `coordinates[lvl].capacity()`).
   void appendPos(uint64_t lvl, uint64_t pos, uint64_t count = 1) {
-    ASSERT_COMPRESSED_LVL(lvl);
+    assert(isCompressedLvl(lvl) && "Level is not compressed");
     positions[lvl].insert(positions[lvl].end(), count,
                           detail::checkOverflowCast<P>(pos));
   }
@@ -689,9 +649,6 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
 };
 
 #undef ASSERT_COMPRESSED_OR_SINGLETON_LVL
-#undef ASSERT_COMPRESSED_LVL
-#undef ASSERT_VALID_LVL
-#undef ASSERT_VALID_DIM
 
 //===----------------------------------------------------------------------===//
 /// A (higher-order) function object for enumerating the elements of some
@@ -934,11 +891,12 @@ class SparseTensorNNZ final {
 //===----------------------------------------------------------------------===//
 // Definitions of the ctors and factories of `SparseTensorStorage<P,C,V>`.
 
+// TODO: MapRef
 template <typename P, typename C, typename V>
 SparseTensorStorage<P, C, V> *SparseTensorStorage<P, C, V>::newFromCOO(
     uint64_t dimRank, const uint64_t *dimShape, uint64_t lvlRank,
-    const DimLevelType *lvlTypes, const uint64_t *lvl2dim,
-    SparseTensorCOO<V> &lvlCOO) {
+    const DimLevelType *lvlTypes, const uint64_t *dim2lvl,
+    const uint64_t *lvl2dim, SparseTensorCOO<V> &lvlCOO) {
   assert(dimShape && "Got nullptr for dimension shape");
   assert(lvl2dim && "Got nullptr for level-to-dimension mapping");
   const auto &lvlSizes = lvlCOO.getDimSizes();
@@ -955,14 +913,15 @@ SparseTensorStorage<P, C, V> *Sparse...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/68649