[Mlir-commits] [mlir] 1d77bb9 - [mlir][sparse] template the memory resident coordinate scheme storage

Fri Jul 30 11:21:19 PDT 2021

Author: Aart Bik
Date: 2021-07-30T11:21:05-07:00
New Revision: 1d77bb9e1b82fe35688416ff0987dd9f82eb2d2d

URL: https://github.com/llvm/llvm-project/commit/1d77bb9e1b82fe35688416ff0987dd9f82eb2d2d
DIFF: https://github.com/llvm/llvm-project/commit/1d77bb9e1b82fe35688416ff0987dd9f82eb2d2d.diff

LOG: [mlir][sparse] template the memory resident coordinate scheme storage

Rationale:
External file formats always store the values as doubles, so this was
hard coded in the memory resident COO scheme used to pass data into the
final sparse storage scheme during setup. However, with alternative methods
on the horizon of setting up these temporary COO schemes, it is time to
properly template this data structure.

Reviewed By: gussmith23

Differential Revision: https://reviews.llvm.org/D107001

Added: 
    

Modified: 
    mlir/lib/ExecutionEngine/SparseUtils.cpp

Removed: 
    


################################################################################
diff  --git a/mlir/lib/ExecutionEngine/SparseUtils.cpp b/mlir/lib/ExecutionEngine/SparseUtils.cpp
index 95e861a2181dd..7e128a678df1c 100644

--- a/mlir/lib/ExecutionEngine/SparseUtils.cpp
+++ b/mlir/lib/ExecutionEngine/SparseUtils.cpp
@@ -56,18 +56,19 @@ namespace {
 ///   ({i}, a[i])
 /// and a rank-5 tensor element like
 ///   ({i,j,k,l,m}, a[i,j,k,l,m])
+template <typename V>
 struct Element {
-  Element(const std::vector<uint64_t> &ind, double val)
-      : indices(ind), value(val){};
+  Element(const std::vector<uint64_t> &ind, V val) : indices(ind), value(val){};
   std::vector<uint64_t> indices;
-  double value;
+  V value;
 };
 
 /// A memory-resident sparse tensor in coordinate scheme (collection of
 /// elements). This data structure is used to read a sparse tensor from
-/// external file format into memory and sort the elements lexicographically
+/// any external format into memory and sort the elements lexicographically
 /// by indices before passing it back to the client (most packed storage
 /// formats require the elements to appear in lexicographic index order).
+template <typename V>
 struct SparseTensor {
 public:
   SparseTensor(const std::vector<uint64_t> &szs, uint64_t capacity)
@@ -75,26 +76,26 @@ struct SparseTensor {
     elements.reserve(capacity);
   }
   /// Adds element as indices and value.
-  void add(const std::vector<uint64_t> &ind, double val) {
+  void add(const std::vector<uint64_t> &ind, V val) {
     assert(getRank() == ind.size());
     for (int64_t r = 0, rank = getRank(); r < rank; r++)
       assert(ind[r] < sizes[r]); // within bounds
-    elements.emplace_back(Element(ind, val));
+    elements.emplace_back(Element<V>(ind, val));
   }
   /// Sorts elements lexicographically by index.
   void sort() { std::sort(elements.begin(), elements.end(), lexOrder); }
   /// Primitive one-time iteration.
-  const Element &next() { return elements[pos++]; }
+  const Element<V> &next() { return elements[pos++]; }
   /// Returns rank.
   uint64_t getRank() const { return sizes.size(); }
   /// Getter for sizes array.
   const std::vector<uint64_t> &getSizes() const { return sizes; }
   /// Getter for elements array.
-  const std::vector<Element> &getElements() const { return elements; }
+  const std::vector<Element<V>> &getElements() const { return elements; }
 
 private:
   /// Returns true if indices of e1 < indices of e2.
-  static bool lexOrder(const Element &e1, const Element &e2) {
+  static bool lexOrder(const Element<V> &e1, const Element<V> &e2) {
     assert(e1.indices.size() == e2.indices.size());
     for (int64_t r = 0, rank = e1.indices.size(); r < rank; r++) {
       if (e1.indices[r] == e2.indices[r])
@@ -104,7 +105,7 @@ struct SparseTensor {
     return false;
   }
   std::vector<uint64_t> sizes; // per-rank dimension sizes
-  std::vector<Element> elements;
+  std::vector<Element<V>> elements;
   uint64_t pos;
 };
 
@@ -150,12 +151,12 @@ class SparseTensorStorageBase {
 /// each 
diff erently annotated sparse tensor, this method provides a convenient
 /// "one-size-fits-all" solution that simply takes an input tensor and
 /// annotations to implement all required setup in a general manner.
-template <typename P, typename I, typename V>
+template <typename P, typename I, typename V, typename Ve>
 class SparseTensorStorage : public SparseTensorStorageBase {
 public:
   /// Constructs sparse tensor storage scheme following the given
   /// per-rank dimension dense/sparse annotations.
-  SparseTensorStorage(SparseTensor *tensor, uint8_t *sparsity)
+  SparseTensorStorage(SparseTensor<Ve> *tensor, uint8_t *sparsity)
       : sizes(tensor->getSizes()), pointers(getRank()), indices(getRank()) {
     // Provide hints on capacity.
     // TODO: needs fine-tuning based on sparsity
@@ -195,12 +196,12 @@ class SparseTensorStorage : public SparseTensorStorageBase {
   /// representation of an external sparse tensor. This method prepares
   /// the pointers and indices arrays under the given per-rank dimension
   /// dense/sparse annotations.
-  void traverse(SparseTensor *tensor, uint8_t *sparsity, uint64_t lo,
+  void traverse(SparseTensor<Ve> *tensor, uint8_t *sparsity, uint64_t lo,
                 uint64_t hi, uint64_t d) {
-    const std::vector<Element> &elements = tensor->getElements();
+    const std::vector<Element<Ve>> &elements = tensor->getElements();
     // Once dimensions are exhausted, insert the numerical values.
     if (d == getRank()) {
-      values.push_back(lo < hi ? elements[lo].value : 0.0);
+      values.push_back(lo < hi ? elements[lo].value : 0);
       return;
     }
     // Prepare a sparse pointer structure at this dimension.
@@ -320,8 +321,9 @@ static void readExtFROSTTHeader(FILE *file, char *name, uint64_t *idata) {
 }
 
 /// Reads a sparse tensor with the given filename into a memory-resident
-/// sparse tensor in coordinate scheme.
-static SparseTensor *openTensor(char *filename, uint64_t *perm) {
+/// sparse tensor in coordinate scheme. The external formats always store
+/// the numerical values with the type double.
+static SparseTensor<double> *openTensor(char *filename, uint64_t *perm) {
   // Open the file.
   FILE *file = fopen(filename, "r");
   if (!file) {
@@ -345,7 +347,7 @@ static SparseTensor *openTensor(char *filename, uint64_t *perm) {
   std::vector<uint64_t> indices(rank);
   for (uint64_t r = 0; r < rank; r++)
     indices[perm[r]] = idata[2 + r];
-  SparseTensor *tensor = new SparseTensor(indices, nnz);
+  SparseTensor<double> *tensor = new SparseTensor<double>(indices, nnz);
   // Read all nonzero elements.
   for (uint64_t k = 0; k < nnz; k++) {
     uint64_t idx = -1;
@@ -374,10 +376,10 @@ static SparseTensor *openTensor(char *filename, uint64_t *perm) {
 template <typename P, typename I, typename V>
 void *newSparseTensor(char *filename, uint8_t *sparsity, uint64_t *perm,
                       uint64_t size) {
-  SparseTensor *t = openTensor(filename, perm);
+  SparseTensor<double> *t = openTensor(filename, perm);
   assert(size == t->getRank()); // sparsity array must match rank
   SparseTensorStorageBase *tensor =
-      new SparseTensorStorage<P, I, V>(t, sparsity);
+      new SparseTensorStorage<P, I, V, double>(t, sparsity);
   delete t;
   return tensor;
 }
@@ -521,8 +523,6 @@ void *newSparseTensor(char *filename, uint8_t *abase, uint8_t *adata,
   exit(1);
 }
 
-#undef CASE
-
 uint64_t sparseDimSize(void *tensor, uint64_t d) {
   return static_cast<SparseTensorStorageBase *>(tensor)->getDimSize(d);
 }