[Mlir-commits] [mlir] 03fe15c - [mlir][sparse] speed up sparse tensor file I/O by more than 2x
Aart Bik
llvmlistbot at llvm.org
Tue Dec 14 08:30:45 PST 2021
Author: Aart Bik
Date: 2021-12-14T08:30:31-08:00
New Revision: 03fe15cee6e2aa1444f955c86aadfed529353831
URL: https://github.com/llvm/llvm-project/commit/03fe15cee6e2aa1444f955c86aadfed529353831
DIFF: https://github.com/llvm/llvm-project/commit/03fe15cee6e2aa1444f955c86aadfed529353831.diff
LOG: [mlir][sparse] speed up sparse tensor file I/O by more than 2x
data point using the 3-dim tensor nell-2.tns
MLIR:
READ FILE INTO COO: 24424.369294 ms ---> improves to ----> 9638.501044 ms
SORT COO BEFORE PACK: 762.834831 ms
PACK COO TO TENSOR: 1243.376245 ms
TACO:
b file read: 13270.9 ms
b pack: 7137.74 ms
b size: (12092 x 9184 x 28818), 925300328 bytes
https://github.com/llvm/llvm-project/issues/52679
Reviewed By: bixia
Differential Revision: https://reviews.llvm.org/D115696
Added:
Modified:
mlir/lib/ExecutionEngine/SparseTensorUtils.cpp
Removed:
################################################################################
diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp
index 4cde9b1e050b6..364d49d37bdce 100644
--- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp
@@ -67,6 +67,8 @@
namespace {
+static constexpr int kColWidth = 1025;
+
/// A sparse tensor element in coordinate scheme (value and indices).
/// For example, a rank-1 vector element would look like
/// ({i}, a[i])
@@ -297,7 +299,7 @@ class SparseTensorStorage : public SparseTensorStorageBase {
}
void getValues(std::vector<V> **out) override { *out = &values; }
- /// Partially specialize lexicographic insertions based on template types.
+ /// Partially specialize lexicographical insertions based on template types.
void lexInsert(uint64_t *cursor, V val) override {
// First, wrap up pending insertion path.
uint64_t
diff = 0;
@@ -547,9 +549,8 @@ static char *toLower(char *token) {
}
/// Read the MME header of a general sparse matrix of type real.
-static void readMMEHeader(FILE *file, char *name, uint64_t *idata,
- bool *is_symmetric) {
- char line[1025];
+static void readMMEHeader(FILE *file, char *filename, char *line,
+ uint64_t *idata, bool *is_symmetric) {
char header[64];
char object[64];
char format[64];
@@ -558,7 +559,7 @@ static void readMMEHeader(FILE *file, char *name, uint64_t *idata,
// Read header line.
if (fscanf(file, "%63s %63s %63s %63s %63s\n", header, object, format, field,
symmetry) != 5) {
- fprintf(stderr, "Corrupt header in %s\n", name);
+ fprintf(stderr, "Corrupt header in %s\n", filename);
exit(1);
}
*is_symmetric = (strcmp(toLower(symmetry), "symmetric") == 0);
@@ -568,13 +569,14 @@ static void readMMEHeader(FILE *file, char *name, uint64_t *idata,
strcmp(toLower(format), "coordinate") || strcmp(toLower(field), "real") ||
(strcmp(toLower(symmetry), "general") && !(*is_symmetric))) {
fprintf(stderr,
- "Cannot find a general sparse matrix with type real in %s\n", name);
+ "Cannot find a general sparse matrix with type real in %s\n",
+ filename);
exit(1);
}
// Skip comments.
while (1) {
- if (!fgets(line, 1025, file)) {
- fprintf(stderr, "Cannot find data in %s\n", name);
+ if (!fgets(line, kColWidth, file)) {
+ fprintf(stderr, "Cannot find data in %s\n", filename);
exit(1);
}
if (line[0] != '%')
@@ -584,7 +586,7 @@ static void readMMEHeader(FILE *file, char *name, uint64_t *idata,
idata[0] = 2; // rank
if (sscanf(line, "%" PRIu64 "%" PRIu64 "%" PRIu64 "\n", idata + 2, idata + 3,
idata + 1) != 3) {
- fprintf(stderr, "Cannot find size in %s\n", name);
+ fprintf(stderr, "Cannot find size in %s\n", filename);
exit(1);
}
}
@@ -593,12 +595,12 @@ static void readMMEHeader(FILE *file, char *name, uint64_t *idata,
/// format, we assume that the file starts with optional comments followed
/// by two lines that define the rank, the number of nonzeros, and the
/// dimensions sizes (one per rank) of the sparse tensor.
-static void readExtFROSTTHeader(FILE *file, char *name, uint64_t *idata) {
- char line[1025];
+static void readExtFROSTTHeader(FILE *file, char *filename, char *line,
+ uint64_t *idata) {
// Skip comments.
while (1) {
- if (!fgets(line, 1025, file)) {
- fprintf(stderr, "Cannot find data in %s\n", name);
+ if (!fgets(line, kColWidth, file)) {
+ fprintf(stderr, "Cannot find data in %s\n", filename);
exit(1);
}
if (line[0] != '#')
@@ -606,16 +608,17 @@ static void readExtFROSTTHeader(FILE *file, char *name, uint64_t *idata) {
}
// Next line contains RANK and NNZ.
if (sscanf(line, "%" PRIu64 "%" PRIu64 "\n", idata, idata + 1) != 2) {
- fprintf(stderr, "Cannot find metadata in %s\n", name);
+ fprintf(stderr, "Cannot find metadata in %s\n", filename);
exit(1);
}
// Followed by a line with the dimension sizes (one per rank).
for (uint64_t r = 0; r < idata[0]; r++) {
if (fscanf(file, "%" PRIu64, idata + 2 + r) != 1) {
- fprintf(stderr, "Cannot find dimension size %s\n", name);
+ fprintf(stderr, "Cannot find dimension size %s\n", filename);
exit(1);
}
}
+ fgets(line, kColWidth, file); // end of line
}
/// Reads a sparse tensor with the given filename into a memory-resident
@@ -631,12 +634,13 @@ static SparseTensorCOO<V> *openSparseTensorCOO(char *filename, uint64_t rank,
exit(1);
}
// Perform some file format dependent set up.
+ char line[kColWidth];
uint64_t idata[512];
bool is_symmetric = false;
if (strstr(filename, ".mtx")) {
- readMMEHeader(file, filename, idata, &is_symmetric);
+ readMMEHeader(file, filename, line, idata, &is_symmetric);
} else if (strstr(filename, ".tns")) {
- readExtFROSTTHeader(file, filename, idata);
+ readExtFROSTTHeader(file, filename, line, idata);
} else {
fprintf(stderr, "Unknown format %s\n", filename);
exit(1);
@@ -653,22 +657,19 @@ static SparseTensorCOO<V> *openSparseTensorCOO(char *filename, uint64_t rank,
// Read all nonzero elements.
std::vector<uint64_t> indices(rank);
for (uint64_t k = 0; k < nnz; k++) {
- uint64_t idx = -1u;
+ if (!fgets(line, kColWidth, file)) {
+ fprintf(stderr, "Cannot find next line of data in %s\n", filename);
+ exit(1);
+ }
+ char *linePtr = line;
for (uint64_t r = 0; r < rank; r++) {
- if (fscanf(file, "%" PRIu64, &idx) != 1) {
- fprintf(stderr, "Cannot find next index in %s\n", filename);
- exit(1);
- }
+ uint64_t idx = strtoul(linePtr, &linePtr, 10);
// Add 0-based index.
indices[perm[r]] = idx - 1;
}
// The external formats always store the numerical values with the type
// double, but we cast these values to the sparse tensor object type.
- double value;
- if (fscanf(file, "%lg\n", &value) != 1) {
- fprintf(stderr, "Cannot find next value in %s\n", filename);
- exit(1);
- }
+ double value = strtod(linePtr, &linePtr);
tensor->add(indices, value);
// We currently chose to deal with symmetric matrices by fully constructing
// them. In the future, we may want to make symmetry implicit for storage
@@ -972,7 +973,8 @@ IMPL_GETNEXT(getNextI32, int32_t)
IMPL_GETNEXT(getNextI16, int16_t)
IMPL_GETNEXT(getNextI8, int8_t)
-/// Helper to insert elements in lexicograph index order, one per value type.
+/// Helper to insert elements in lexicographical index order, one per value
+/// type.
IMPL_LEXINSERT(lexInsertF64, double)
IMPL_LEXINSERT(lexInsertF32, float)
IMPL_LEXINSERT(lexInsertI64, int64_t)
More information about the Mlir-commits
mailing list