[Mlir-commits] [mlir] 2754604 - [mlir][sparse] sparse runtime support library improvements

Wed Sep 1 16:51:28 PDT 2021

Author: Aart Bik
Date: 2021-09-01T16:51:14-07:00
New Revision: 2754604e5442d20540144c8c46a633792577a7a0

URL: https://github.com/llvm/llvm-project/commit/2754604e5442d20540144c8c46a633792577a7a0
DIFF: https://github.com/llvm/llvm-project/commit/2754604e5442d20540144c8c46a633792577a7a0.diff

LOG: [mlir][sparse] sparse runtime support library improvements

(1) renamed SparseTensor to SparseTensorCOO, the other one remains SparseTensorStorage to focus on contrast

(2) documents difference between public API exclusively for compiler-generated code and methods that could be used by other runtimes (TBD) that want to interact with MLIR

Reviewed By: bixia

Differential Revision: https://reviews.llvm.org/D109039

Added: 
    

Modified: 
    mlir/lib/ExecutionEngine/SparseUtils.cpp

Removed: 
    


################################################################################
diff  --git a/mlir/lib/ExecutionEngine/SparseUtils.cpp b/mlir/lib/ExecutionEngine/SparseUtils.cpp
index 51195b03018ef..e39c06ae0ad6b 100644

--- a/mlir/lib/ExecutionEngine/SparseUtils.cpp
+++ b/mlir/lib/ExecutionEngine/SparseUtils.cpp
@@ -34,10 +34,11 @@
 // The following memory-resident sparse storage schemes are supported:
 //
 // (a) A coordinate scheme for temporarily storing and lexicographically
-//     sorting a sparse tensor by index.
+//     sorting a sparse tensor by index (SparseTensorCOO).
 //
 // (b) A "one-size-fits-all" sparse tensor storage scheme defined by per-rank
-//     sparse/dense annnotations to be used by generated MLIR code.
+//     sparse/dense annnotations together with a dimension ordering to be
+//     used by MLIR compiler-generated code (SparseTensorStorage).
 //
 // The following external formats are supported:
 //
@@ -47,6 +48,19 @@
 // (2) Formidable Repository of Open Sparse Tensors and Tools (FROSTT): *.tns
 //     http://frostt.io/tensors/file-formats.html
 //
+// Two public APIs are supported:
+//
+// (I) Methods operating on MLIR buffers (memrefs) to interact with sparse
+//     tensors. These methods should be used exclusively by MLIR
+//     compiler-generated code.
+//
+// (II) Methods that accept C-style data structures to interact with sparse
+//      tensors. These methods can be used by any external runtime that wants
+//      to interact with MLIR compiler-generated code.
+//
+// In both cases (I) and (II), the SparseTensorStorage format is externally
+// only visible as an opague pointer.
+//
 //===----------------------------------------------------------------------===//
 
 namespace {
@@ -69,9 +83,9 @@ struct Element {
 /// by indices before passing it back to the client (most packed storage
 /// formats require the elements to appear in lexicographic index order).
 template <typename V>
-struct SparseTensor {
+struct SparseTensorCOO {
 public:
-  SparseTensor(const std::vector<uint64_t> &szs, uint64_t capacity)
+  SparseTensorCOO(const std::vector<uint64_t> &szs, uint64_t capacity)
       : sizes(szs), pos(0) {
     if (capacity)
       elements.reserve(capacity);
@@ -98,13 +112,13 @@ struct SparseTensor {
   /// the given ordering and expects subsequent add() calls to honor
   /// that same ordering for the given indices. The result is a
   /// fully permuted coordinate scheme.
-  static SparseTensor<V> *newSparseTensor(uint64_t size, uint64_t *sizes,
-                                          uint64_t *perm,
-                                          uint64_t capacity = 0) {
+  static SparseTensorCOO<V> *newSparseTensorCOO(uint64_t size, uint64_t *sizes,
+                                                uint64_t *perm,
+                                                uint64_t capacity = 0) {
     std::vector<uint64_t> permsz(size);
     for (uint64_t r = 0; r < size; r++)
       permsz[perm[r]] = sizes[r];
-    return new SparseTensor<V>(permsz, capacity);
+    return new SparseTensorCOO<V>(permsz, capacity);
   }
 
 private:
@@ -160,8 +174,7 @@ class SparseTensorStorageBase {
 
 /// A memory-resident sparse tensor using a storage scheme based on per-rank
 /// annotations on dense/sparse. This data structure provides a bufferized
-/// form of an imaginary SparseTensorType, until such a type becomes a
-/// first-class citizen of MLIR. In contrast to generating setup methods for
+/// form of a sparse tensor type. In contrast to generating setup methods for
 /// each 
diff erently annotated sparse tensor, this method provides a convenient
 /// "one-size-fits-all" solution that simply takes an input tensor and
 /// annotations to implement all required setup in a general manner.
@@ -171,7 +184,7 @@ class SparseTensorStorage : public SparseTensorStorageBase {
   /// Constructs a sparse tensor storage scheme from the given sparse
   /// tensor in coordinate scheme following the given per-rank dimension
   /// dense/sparse annotations.
-  SparseTensorStorage(SparseTensor<V> *tensor, uint8_t *sparsity,
+  SparseTensorStorage(SparseTensorCOO<V> *tensor, uint8_t *sparsity,
                       uint64_t *perm)
       : sizes(tensor->getSizes()), rev(getRank()), pointers(getRank()),
         indices(getRank()) {
@@ -217,14 +230,14 @@ class SparseTensorStorage : public SparseTensorStorageBase {
 
   /// Returns this sparse tensor storage scheme as a new memory-resident
   /// sparse tensor in coordinate scheme with the given dimension order.
-  SparseTensor<V> *toCOO(uint64_t *perm) {
+  SparseTensorCOO<V> *toCOO(uint64_t *perm) {
     // Restore original order of the dimension sizes and allocate coordinate
     // scheme with desired new ordering specified in perm.
     uint64_t size = getRank();
     std::vector<uint64_t> orgsz(size);
     for (uint64_t r = 0; r < size; r++)
       orgsz[rev[r]] = sizes[r];
-    SparseTensor<V> *tensor = SparseTensor<V>::newSparseTensor(
+    SparseTensorCOO<V> *tensor = SparseTensorCOO<V>::newSparseTensorCOO(
         size, orgsz.data(), perm, values.size());
     // Populate coordinate scheme restored from old ordering and changed with
     // new ordering. Rather than applying both reorderings during the recursion,
@@ -240,7 +253,7 @@ class SparseTensorStorage : public SparseTensorStorageBase {
   /// Factory method. Expects a coordinate scheme that respects the same
   /// permutation as is desired for the new sparse storage scheme.
   static SparseTensorStorage<P, I, V> *
-  newSparseTensor(SparseTensor<V> *t, uint8_t *sparsity, uint64_t *perm) {
+  newSparseTensor(SparseTensorCOO<V> *t, uint8_t *sparsity, uint64_t *perm) {
     t->sort(); // sort lexicographically
     SparseTensorStorage<P, I, V> *n =
         new SparseTensorStorage<P, I, V>(t, sparsity, perm);
@@ -252,7 +265,7 @@ class SparseTensorStorage : public SparseTensorStorageBase {
   /// Initializes sparse tensor storage scheme from a memory-resident sparse
   /// tensor in coordinate scheme. This method prepares the pointers and indices
   /// arrays under the given per-rank dimension dense/sparse annotations.
-  void fromCOO(SparseTensor<V> *tensor, uint8_t *sparsity, uint64_t lo,
+  void fromCOO(SparseTensorCOO<V> *tensor, uint8_t *sparsity, uint64_t lo,
                uint64_t hi, uint64_t d) {
     const std::vector<Element<V>> &elements = tensor->getElements();
     // Once dimensions are exhausted, insert the numerical values.
@@ -296,7 +309,7 @@ class SparseTensorStorage : public SparseTensorStorageBase {
 
   /// Stores the sparse tensor storage scheme into a memory-resident sparse
   /// tensor in coordinate scheme.
-  void toCOO(SparseTensor<V> *tensor, std::vector<uint64_t> &reord,
+  void toCOO(SparseTensorCOO<V> *tensor, std::vector<uint64_t> &reord,
              std::vector<uint64_t> &idx, uint64_t pos, uint64_t d) {
     if (d == getRank()) {
       tensor->add(idx, values[pos]);
@@ -403,8 +416,9 @@ static void readExtFROSTTHeader(FILE *file, char *name, uint64_t *idata) {
 /// Reads a sparse tensor with the given filename into a memory-resident
 /// sparse tensor in coordinate scheme.
 template <typename V>
-static SparseTensor<V> *openTensor(char *filename, uint64_t size,
-                                   uint64_t *sizes, uint64_t *perm) {
+static SparseTensorCOO<V> *openSparseTensorCOO(char *filename, uint64_t size,
+                                               uint64_t *sizes,
+                                               uint64_t *perm) {
   // Open the file.
   FILE *file = fopen(filename, "r");
   if (!file) {
@@ -428,8 +442,8 @@ static SparseTensor<V> *openTensor(char *filename, uint64_t size,
   for (uint64_t r = 0; r < size; r++)
     assert((sizes[r] == 0 || sizes[r] == idata[2 + r]) &&
            "dimension size mismatch");
-  SparseTensor<V> *tensor =
-      SparseTensor<V>::newSparseTensor(size, idata + 2, perm, nnz);
+  SparseTensorCOO<V> *tensor =
+      SparseTensorCOO<V>::newSparseTensorCOO(size, idata + 2, perm, nnz);
   //  Read all nonzero elements.
   std::vector<uint64_t> indices(size);
   for (uint64_t k = 0; k < nnz; k++) {
@@ -471,13 +485,13 @@ char *getTensorFilename(uint64_t id) {
 
 //===----------------------------------------------------------------------===//
 //
-// Public API of the sparse runtime support library that support an opaque
-// implementation of a bufferized SparseTensor in MLIR. This could be replaced
-// by actual codegen in MLIR.
+// Public API with methods that operate on MLIR buffers (memrefs) to interact
+// with sparse tensors, which are only visible as opaque pointers externally.
+// These methods should be used exclusively by MLIR compiler-generated code.
 //
 // Because we cannot use C++ templates with C linkage, some macro magic is used
 // to generate implementations for all required type combinations that can be
-// called from MLIR generated code.
+// called from MLIR compiler-generated code.
 //
 //===----------------------------------------------------------------------===//
 
@@ -492,13 +506,14 @@ char *getTensorFilename(uint64_t id) {
 
 #define CASE(p, i, v, P, I, V)                                                 \
   if (ptrTp == (p) && indTp == (i) && valTp == (v)) {                          \
-    SparseTensor<V> *tensor;                                                   \
+    SparseTensorCOO<V> *tensor;                                                \
     if (action == 0)                                                           \
-      tensor = openTensor<V>(static_cast<char *>(ptr), asize, sizes, perm);    \
+      tensor = openSparseTensorCOO<V>(static_cast<char *>(ptr), asize, sizes,  \
+                                      perm);                                   \
     else if (action == 1)                                                      \
-      tensor = static_cast<SparseTensor<V> *>(ptr);                            \
+      tensor = static_cast<SparseTensorCOO<V> *>(ptr);                         \
     else if (action == 2)                                                      \
-      return SparseTensor<V>::newSparseTensor(asize, sizes, perm);             \
+      return SparseTensorCOO<V>::newSparseTensorCOO(asize, sizes, perm);       \
     else                                                                       \
       return static_cast<SparseTensorStorage<P, I, V> *>(ptr)->toCOO(perm);    \
     return SparseTensorStorage<P, I, V>::newSparseTensor(tensor, sparsity,     \
@@ -532,7 +547,7 @@ char *getTensorFilename(uint64_t id) {
     std::vector<uint64_t> indices(isize);                                      \
     for (uint64_t r = 0; r < isize; r++)                                       \
       indices[perm[r]] = indx[r];                                              \
-    static_cast<SparseTensor<TYPE> *>(tensor)->add(indices, value);            \
+    static_cast<SparseTensorCOO<TYPE> *>(tensor)->add(indices, value);         \
     return tensor;                                                             \
   }
 
@@ -634,11 +649,6 @@ void *newSparseTensor(uint8_t *abase, uint8_t *adata, uint64_t aoff,
   exit(1);
 }
 
-/// Returns size of sparse tensor in given dimension.
-uint64_t sparseDimSize(void *tensor, uint64_t d) {
-  return static_cast<SparseTensorStorageBase *>(tensor)->getDimSize(d);
-}
-
 /// Methods that provide direct access to pointers, indices, and values.
 IMPL2(MemRef1DU64, sparsePointers, uint64_t, getPointers)
 IMPL2(MemRef1DU64, sparsePointers64, uint64_t, getPointers)
@@ -657,11 +667,6 @@ IMPL1(MemRef1DI32, sparseValuesI32, int32_t, getValues)
 IMPL1(MemRef1DI16, sparseValuesI16, int16_t, getValues)
 IMPL1(MemRef1DI8, sparseValuesI8, int8_t, getValues)
 
-/// Releases sparse tensor storage.
-void delSparseTensor(void *tensor) {
-  delete static_cast<SparseTensorStorageBase *>(tensor);
-}
-
 /// Helper to add value to coordinate scheme, one per value type.
 IMPL3(addEltF64, double)
 IMPL3(addEltF32, float)
@@ -676,6 +681,27 @@ IMPL3(addEltI8, int8_t)
 #undef IMPL2
 #undef IMPL3
 
+//===----------------------------------------------------------------------===//
+//
+// Public API with methods that accept C-style data structures to interact
+// with sparse tensors, which are only visible as opaque pointers externally.
+// These methods can be used by any external runtime that wants to interact
+// with MLIR compiler-generated code.
+//
+//===----------------------------------------------------------------------===//
+
+// TODO: setup sparse tensor from C style format
+
+/// Returns size of sparse tensor in given dimension.
+uint64_t sparseDimSize(void *tensor, uint64_t d) {
+  return static_cast<SparseTensorStorageBase *>(tensor)->getDimSize(d);
+}
+
+/// Releases sparse tensor storage.
+void delSparseTensor(void *tensor) {
+  delete static_cast<SparseTensorStorageBase *>(tensor);
+}
+
 } // extern "C"
 
 #endif // MLIR_CRUNNERUTILS_DEFINE_FUNCTIONS