[Mlir-commits] [mlir] 03fe15c - [mlir][sparse] speed up sparse tensor file I/O by more than 2x

Aart Bik llvmlistbot at llvm.org
Tue Dec 14 08:30:45 PST 2021


Author: Aart Bik
Date: 2021-12-14T08:30:31-08:00
New Revision: 03fe15cee6e2aa1444f955c86aadfed529353831

URL: https://github.com/llvm/llvm-project/commit/03fe15cee6e2aa1444f955c86aadfed529353831
DIFF: https://github.com/llvm/llvm-project/commit/03fe15cee6e2aa1444f955c86aadfed529353831.diff

LOG: [mlir][sparse] speed up sparse tensor file I/O by more than 2x

data point using the 3-dim tensor nell-2.tns

MLIR:
READ FILE INTO COO: 24424.369294 ms ---> improves to ----> 9638.501044 ms
SORT COO BEFORE PACK: 762.834831 ms
PACK COO TO TENSOR: 1243.376245 ms

TACO:
b file read: 13270.9 ms
b pack: 7137.74 ms
b size: (12092 x 9184 x 28818), 925300328 bytes

https://github.com/llvm/llvm-project/issues/52679

Reviewed By: bixia

Differential Revision: https://reviews.llvm.org/D115696

Added: 
    

Modified: 
    mlir/lib/ExecutionEngine/SparseTensorUtils.cpp

Removed: 
    


################################################################################
diff  --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp
index 4cde9b1e050b6..364d49d37bdce 100644
--- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp
@@ -67,6 +67,8 @@
 
 namespace {
 
+static constexpr int kColWidth = 1025;
+
 /// A sparse tensor element in coordinate scheme (value and indices).
 /// For example, a rank-1 vector element would look like
 ///   ({i}, a[i])
@@ -297,7 +299,7 @@ class SparseTensorStorage : public SparseTensorStorageBase {
   }
   void getValues(std::vector<V> **out) override { *out = &values; }
 
-  /// Partially specialize lexicographic insertions based on template types.
+  /// Partially specialize lexicographical insertions based on template types.
   void lexInsert(uint64_t *cursor, V val) override {
     // First, wrap up pending insertion path.
     uint64_t 
diff  = 0;
@@ -547,9 +549,8 @@ static char *toLower(char *token) {
 }
 
 /// Read the MME header of a general sparse matrix of type real.
-static void readMMEHeader(FILE *file, char *name, uint64_t *idata,
-                          bool *is_symmetric) {
-  char line[1025];
+static void readMMEHeader(FILE *file, char *filename, char *line,
+                          uint64_t *idata, bool *is_symmetric) {
   char header[64];
   char object[64];
   char format[64];
@@ -558,7 +559,7 @@ static void readMMEHeader(FILE *file, char *name, uint64_t *idata,
   // Read header line.
   if (fscanf(file, "%63s %63s %63s %63s %63s\n", header, object, format, field,
              symmetry) != 5) {
-    fprintf(stderr, "Corrupt header in %s\n", name);
+    fprintf(stderr, "Corrupt header in %s\n", filename);
     exit(1);
   }
   *is_symmetric = (strcmp(toLower(symmetry), "symmetric") == 0);
@@ -568,13 +569,14 @@ static void readMMEHeader(FILE *file, char *name, uint64_t *idata,
       strcmp(toLower(format), "coordinate") || strcmp(toLower(field), "real") ||
       (strcmp(toLower(symmetry), "general") && !(*is_symmetric))) {
     fprintf(stderr,
-            "Cannot find a general sparse matrix with type real in %s\n", name);
+            "Cannot find a general sparse matrix with type real in %s\n",
+            filename);
     exit(1);
   }
   // Skip comments.
   while (1) {
-    if (!fgets(line, 1025, file)) {
-      fprintf(stderr, "Cannot find data in %s\n", name);
+    if (!fgets(line, kColWidth, file)) {
+      fprintf(stderr, "Cannot find data in %s\n", filename);
       exit(1);
     }
     if (line[0] != '%')
@@ -584,7 +586,7 @@ static void readMMEHeader(FILE *file, char *name, uint64_t *idata,
   idata[0] = 2; // rank
   if (sscanf(line, "%" PRIu64 "%" PRIu64 "%" PRIu64 "\n", idata + 2, idata + 3,
              idata + 1) != 3) {
-    fprintf(stderr, "Cannot find size in %s\n", name);
+    fprintf(stderr, "Cannot find size in %s\n", filename);
     exit(1);
   }
 }
@@ -593,12 +595,12 @@ static void readMMEHeader(FILE *file, char *name, uint64_t *idata,
 /// format, we assume that the file starts with optional comments followed
 /// by two lines that define the rank, the number of nonzeros, and the
 /// dimensions sizes (one per rank) of the sparse tensor.
-static void readExtFROSTTHeader(FILE *file, char *name, uint64_t *idata) {
-  char line[1025];
+static void readExtFROSTTHeader(FILE *file, char *filename, char *line,
+                                uint64_t *idata) {
   // Skip comments.
   while (1) {
-    if (!fgets(line, 1025, file)) {
-      fprintf(stderr, "Cannot find data in %s\n", name);
+    if (!fgets(line, kColWidth, file)) {
+      fprintf(stderr, "Cannot find data in %s\n", filename);
       exit(1);
     }
     if (line[0] != '#')
@@ -606,16 +608,17 @@ static void readExtFROSTTHeader(FILE *file, char *name, uint64_t *idata) {
   }
   // Next line contains RANK and NNZ.
   if (sscanf(line, "%" PRIu64 "%" PRIu64 "\n", idata, idata + 1) != 2) {
-    fprintf(stderr, "Cannot find metadata in %s\n", name);
+    fprintf(stderr, "Cannot find metadata in %s\n", filename);
     exit(1);
   }
   // Followed by a line with the dimension sizes (one per rank).
   for (uint64_t r = 0; r < idata[0]; r++) {
     if (fscanf(file, "%" PRIu64, idata + 2 + r) != 1) {
-      fprintf(stderr, "Cannot find dimension size %s\n", name);
+      fprintf(stderr, "Cannot find dimension size %s\n", filename);
       exit(1);
     }
   }
+  fgets(line, kColWidth, file); // end of line
 }
 
 /// Reads a sparse tensor with the given filename into a memory-resident
@@ -631,12 +634,13 @@ static SparseTensorCOO<V> *openSparseTensorCOO(char *filename, uint64_t rank,
     exit(1);
   }
   // Perform some file format dependent set up.
+  char line[kColWidth];
   uint64_t idata[512];
   bool is_symmetric = false;
   if (strstr(filename, ".mtx")) {
-    readMMEHeader(file, filename, idata, &is_symmetric);
+    readMMEHeader(file, filename, line, idata, &is_symmetric);
   } else if (strstr(filename, ".tns")) {
-    readExtFROSTTHeader(file, filename, idata);
+    readExtFROSTTHeader(file, filename, line, idata);
   } else {
     fprintf(stderr, "Unknown format %s\n", filename);
     exit(1);
@@ -653,22 +657,19 @@ static SparseTensorCOO<V> *openSparseTensorCOO(char *filename, uint64_t rank,
   //  Read all nonzero elements.
   std::vector<uint64_t> indices(rank);
   for (uint64_t k = 0; k < nnz; k++) {
-    uint64_t idx = -1u;
+    if (!fgets(line, kColWidth, file)) {
+      fprintf(stderr, "Cannot find next line of data in %s\n", filename);
+      exit(1);
+    }
+    char *linePtr = line;
     for (uint64_t r = 0; r < rank; r++) {
-      if (fscanf(file, "%" PRIu64, &idx) != 1) {
-        fprintf(stderr, "Cannot find next index in %s\n", filename);
-        exit(1);
-      }
+      uint64_t idx = strtoul(linePtr, &linePtr, 10);
       // Add 0-based index.
       indices[perm[r]] = idx - 1;
     }
     // The external formats always store the numerical values with the type
     // double, but we cast these values to the sparse tensor object type.
-    double value;
-    if (fscanf(file, "%lg\n", &value) != 1) {
-      fprintf(stderr, "Cannot find next value in %s\n", filename);
-      exit(1);
-    }
+    double value = strtod(linePtr, &linePtr);
     tensor->add(indices, value);
     // We currently chose to deal with symmetric matrices by fully constructing
     // them. In the future, we may want to make symmetry implicit for storage
@@ -972,7 +973,8 @@ IMPL_GETNEXT(getNextI32, int32_t)
 IMPL_GETNEXT(getNextI16, int16_t)
 IMPL_GETNEXT(getNextI8, int8_t)
 
-/// Helper to insert elements in lexicograph index order, one per value type.
+/// Helper to insert elements in lexicographical index order, one per value
+/// type.
 IMPL_LEXINSERT(lexInsertF64, double)
 IMPL_LEXINSERT(lexInsertF32, float)
 IMPL_LEXINSERT(lexInsertI64, int64_t)


        


More information about the Mlir-commits mailing list