[llvm-branch-commits] [mlir] d8fc273 - [mlir][sparse] improved sparse runtime support library

Aart Bik via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Sat Jan 16 12:21:05 PST 2021


Author: Aart Bik
Date: 2021-01-16T12:16:10-08:00
New Revision: d8fc27301d18f0935ba99ead7ac61aa6a53f16e4

URL: https://github.com/llvm/llvm-project/commit/d8fc27301d18f0935ba99ead7ac61aa6a53f16e4
DIFF: https://github.com/llvm/llvm-project/commit/d8fc27301d18f0935ba99ead7ac61aa6a53f16e4.diff

LOG: [mlir][sparse] improved sparse runtime support library

Added the ability to read (an extended version of) the FROSTT
file format, so that we can now read in sparse tensors of arbitrary
rank. Generalized the API to deal with more than two dimensions.

Also added the ability to sort the indices of sparse tensors
lexicographically. This is an important step towards supporting
auto gen of initialization code, since sparse storage formats
are easier to initialize if the indices are sorted. Since most
external formats don't enforce such properties, it is convenient
to have this ability in our runtime support library.

Lastly, the re-entrant problem of the original implementation
is fixed by passing an opaque object around (rather than having
a single static variable, ugh!).

Reviewed By: nicolasvasilache

Differential Revision: https://reviews.llvm.org/D94852

Added: 
    mlir/integration_test/Sparse/CPU/frostt-example.mlir
    mlir/integration_test/data/test.tns

Modified: 
    mlir/include/mlir/ExecutionEngine/CRunnerUtils.h
    mlir/integration_test/CMakeLists.txt
    mlir/integration_test/Sparse/CPU/matrix-market-example.mlir
    mlir/lib/ExecutionEngine/SparseUtils.cpp

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h b/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h
index edcd8e0dc545..2d0608a8656b 100644
--- a/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h
+++ b/mlir/include/mlir/ExecutionEngine/CRunnerUtils.h
@@ -198,7 +198,7 @@ class DynamicMemRefType {
 };
 
 //===----------------------------------------------------------------------===//
-// Small runtime support "lib" for vector.print lowering during codegen.
+// Small runtime support library for vector.print lowering during codegen.
 //===----------------------------------------------------------------------===//
 extern "C" MLIR_CRUNNERUTILS_EXPORT void printI64(int64_t i);
 extern "C" MLIR_CRUNNERUTILS_EXPORT void printU64(uint64_t u);
@@ -210,15 +210,13 @@ extern "C" MLIR_CRUNNERUTILS_EXPORT void printComma();
 extern "C" MLIR_CRUNNERUTILS_EXPORT void printNewline();
 
 //===----------------------------------------------------------------------===//
-// Small runtime support for sparse tensors.
+// Small runtime support library for sparse tensors.
 //===----------------------------------------------------------------------===//
-extern "C" MLIR_CRUNNERUTILS_EXPORT void openMatrixC(char *filename,
-                                                     uint64_t *mdata,
-                                                     uint64_t *ndata,
-                                                     uint64_t *nnzdata);
+extern "C" MLIR_CRUNNERUTILS_EXPORT void *openTensorC(char *filename,
+                                                      uint64_t *idata);
 extern "C" MLIR_CRUNNERUTILS_EXPORT void
-readMatrixItemC(uint64_t *idata, uint64_t *jdata, double *ddata);
-extern "C" MLIR_CRUNNERUTILS_EXPORT void closeMatrix();
-extern "C" MLIR_CRUNNERUTILS_EXPORT char *getMatrix(uint64_t id);
+readTensorItemC(void *tensor, uint64_t *idata, double *ddata);
+extern "C" MLIR_CRUNNERUTILS_EXPORT void closeTensor(void *tensor);
+extern "C" MLIR_CRUNNERUTILS_EXPORT char *getTensorFilename(uint64_t id);
 
 #endif // EXECUTIONENGINE_CRUNNERUTILS_H_

diff  --git a/mlir/integration_test/CMakeLists.txt b/mlir/integration_test/CMakeLists.txt
index bc5ad90e1253..fb2be5256dcc 100644
--- a/mlir/integration_test/CMakeLists.txt
+++ b/mlir/integration_test/CMakeLists.txt
@@ -31,4 +31,5 @@ add_lit_testsuites(MLIR_INTEGRATION ${CMAKE_CURRENT_SOURCE_DIR}
 
 # Copy test data over.
 file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/test.mtx
+          ${CMAKE_CURRENT_SOURCE_DIR}/data/test.tns
         DESTINATION ${MLIR_INTEGRATION_TEST_DIR}/data/)

diff  --git a/mlir/integration_test/Sparse/CPU/frostt-example.mlir b/mlir/integration_test/Sparse/CPU/frostt-example.mlir
new file mode 100644
index 000000000000..8144270aa91f
--- /dev/null
+++ b/mlir/integration_test/Sparse/CPU/frostt-example.mlir
@@ -0,0 +1,149 @@
+// RUN: mlir-opt %s \
+// RUN:  -convert-scf-to-std -convert-vector-to-scf \
+// RUN:  -convert-linalg-to-llvm -convert-vector-to-llvm | \
+// RUN: TENSOR0="%mlir_integration_test_dir/data/test.tns" \
+// RUN: mlir-cpu-runner \
+// RUN:  -e entry -entry-point-result=void  \
+// RUN:  -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
+// RUN: FileCheck %s
+
+module {
+  //
+  // Example of using the sparse runtime support library to read a sparse tensor
+  // in the FROSTT file format (http://frostt.io/tensors/file-formats.html).
+  //
+  func private @openTensor(!llvm.ptr<i8>, memref<?xindex>) -> (!llvm.ptr<i8>)
+  func private @readTensorItem(!llvm.ptr<i8>, memref<?xindex>, memref<?xf64>) -> ()
+  func private @closeTensor(!llvm.ptr<i8>) -> ()
+  func private @getTensorFilename(index) -> (!llvm.ptr<i8>)
+
+  func @entry() {
+    %d0  = constant 0.0 : f64
+    %i0  = constant 0   : i64
+    %c0  = constant 0   : index
+    %c1  = constant 1   : index
+    %c2  = constant 2   : index
+    %c10 = constant 10  : index
+
+    //
+    // Setup memrefs to get meta data, indices and values.
+    // The index array should provide sufficient space.
+    //
+    %idata = alloc(%c10) : memref<?xindex>
+    %ddata = alloc(%c1)  : memref<?xf64>
+
+    //
+    // Obtain the sparse tensor filename through this test helper.
+    //
+    %fileName = call @getTensorFilename(%c0) : (index) -> (!llvm.ptr<i8>)
+
+    //
+    // Read a sparse tensor. The call yields a pointer to an opaque
+    // memory-resident sparse tensor object that is only understood by
+    // other methods in the sparse runtime support library. This call also
+    // provides the rank and the number of nonzero elements (nnz) through
+    // a memref array.
+    //
+    %tensor = call @openTensor(%fileName, %idata)
+      : (!llvm.ptr<i8>, memref<?xindex>) -> (!llvm.ptr<i8>)
+
+    //
+    // Print some meta data.
+    //
+    %rank = load %idata[%c0] : memref<?xindex>
+    %nnz  = load %idata[%c1] : memref<?xindex>
+    vector.print %rank : index
+    vector.print %nnz  : index
+    scf.for %r = %c2 to %c10 step %c1 {
+      %d = load %idata[%r] : memref<?xindex>
+      vector.print %d : index
+    }
+
+    //
+    // Now we are ready to read in the nonzero elements of the sparse tensor
+    // and insert these into a sparse storage scheme. In this example, we
+    // simply print the elements on the fly.
+    //
+    scf.for %k = %c0 to %nnz step %c1 {
+      call @readTensorItem(%tensor, %idata, %ddata)
+        : (!llvm.ptr<i8>, memref<?xindex>, memref<?xf64>) -> ()
+      //
+      // Build index vector and print element (here, using the
+      // knowledge that the read sparse tensor has rank 8).
+      //
+      %0 = vector.broadcast %i0 : i64 to vector<8xi64>
+      %1 = scf.for %r = %c0 to %rank step %c1 iter_args(%in = %0) -> vector<8xi64> {
+        %i  = load %idata[%r] : memref<?xindex>
+        %ii = index_cast %i : index to i64
+        %ri = index_cast %r : index to i32
+        %out = vector.insertelement %ii, %in[%ri : i32] : vector<8xi64>
+        scf.yield %out : vector<8xi64>
+      }
+      %2 = load %ddata[%c0] : memref<?xf64>
+      vector.print %1 : vector<8xi64>
+      vector.print %2 : f64
+    }
+
+    //
+    // Since at this point we have processed the contents, make sure to
+    // close the sparse tensor to release its memory resources.
+    //
+    call @closeTensor(%tensor) : (!llvm.ptr<i8>) -> ()
+
+    //
+    // Verify that the results are as expected.
+    //
+    // CHECK: 8
+    // CHECK: 16
+    // CHECK: 7
+    // CHECK: 3
+    // CHECK: 3
+    // CHECK: 3
+    // CHECK: 3
+    // CHECK: 3
+    // CHECK: 5
+    // CHECK: 3
+    //
+    // CHECK:      ( 0, 0, 0, 0, 0, 0, 0, 0 )
+    // CHECK-NEXT: 1
+    // CHECK-NEXT: ( 0, 0, 0, 0, 0, 0, 0, 2 )
+    // CHECK-NEXT: 1.3
+    // CHECK-NEXT: ( 0, 0, 0, 0, 0, 0, 4, 0 )
+    // CHECK-NEXT: 1.5
+    // CHECK-NEXT: ( 0, 0, 0, 1, 0, 0, 0, 1 )
+    // CHECK-NEXT: 1.22
+    // CHECK-NEXT: ( 0, 0, 0, 1, 0, 0, 0, 2 )
+    // CHECK-NEXT: 1.23
+    // CHECK-NEXT: ( 1, 0, 1, 0, 1, 1, 1, 0 )
+    // CHECK-NEXT: 2.111
+    // CHECK-NEXT: ( 1, 0, 1, 0, 1, 1, 1, 2 )
+    // CHECK-NEXT: 2.113
+    // CHECK-NEXT: ( 1, 1, 1, 0, 1, 1, 1, 0 )
+    // CHECK-NEXT: 2.11
+    // CHECK-NEXT: ( 1, 1, 1, 0, 1, 1, 1, 1 )
+    // CHECK-NEXT: 2.1
+    // CHECK-NEXT: ( 1, 1, 1, 1, 1, 1, 1, 1 )
+    // CHECK-NEXT: 2
+    // CHECK-NEXT: ( 2, 2, 2, 2, 0, 0, 1, 2 )
+    // CHECK-NEXT: 3.112
+    // CHECK-NEXT: ( 2, 2, 2, 2, 0, 1, 0, 2 )
+    // CHECK-NEXT: 3.121
+    // CHECK-NEXT: ( 2, 2, 2, 2, 0, 1, 1, 2 )
+    // CHECK-NEXT: 3.122
+    // CHECK-NEXT: ( 2, 2, 2, 2, 0, 2, 2, 2 )
+    // CHECK-NEXT: 3.1
+    // CHECK-NEXT: ( 2, 2, 2, 2, 2, 2, 2, 2 )
+    // CHECK-NEXT: 3
+    // CHECK-NEXT: ( 6, 0, 0, 0, 0, 0, 0, 0 )
+    // CHECK-NEXT: 7
+    //
+
+    //
+    // Free.
+    //
+    dealloc %idata : memref<?xindex>
+    dealloc %ddata : memref<?xf64>
+
+    return
+  }
+}

diff  --git a/mlir/integration_test/Sparse/CPU/matrix-market-example.mlir b/mlir/integration_test/Sparse/CPU/matrix-market-example.mlir
index b03c5b111d98..b078a51b5e23 100644
--- a/mlir/integration_test/Sparse/CPU/matrix-market-example.mlir
+++ b/mlir/integration_test/Sparse/CPU/matrix-market-example.mlir
@@ -1,99 +1,116 @@
 // RUN: mlir-opt %s \
 // RUN:  -convert-scf-to-std -convert-vector-to-scf \
 // RUN:  -convert-linalg-to-llvm -convert-vector-to-llvm | \
-// RUN: MATRIX0="%mlir_integration_test_dir/data/test.mtx" \
+// RUN: TENSOR0="%mlir_integration_test_dir/data/test.mtx" \
 // RUN: mlir-cpu-runner \
 // RUN:  -e entry -entry-point-result=void  \
 // RUN:  -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
 module {
-  func private @openMatrix(!llvm.ptr<i8>, memref<index>, memref<index>, memref<index>) -> ()
-  func private @readMatrixItem(memref<index>, memref<index>, memref<f64>) -> ()
-  func private @closeMatrix() -> ()
-  func private @getMatrix(index) -> (!llvm.ptr<i8>)
+  //
+  // Example of using the sparse runtime support library to read a sparse matrix
+  // in the Matrix Market Exchange Format (https://math.nist.gov/MatrixMarket).
+  //
+  func private @openTensor(!llvm.ptr<i8>, memref<?xindex>) -> (!llvm.ptr<i8>)
+  func private @readTensorItem(!llvm.ptr<i8>, memref<?xindex>, memref<?xf64>) -> ()
+  func private @closeTensor(!llvm.ptr<i8>) -> ()
+  func private @getTensorFilename(index) -> (!llvm.ptr<i8>)
 
   func @entry() {
     %d0  = constant 0.0 : f64
     %c0  = constant 0 : index
     %c1  = constant 1 : index
+    %c2  = constant 2 : index
+    %c3  = constant 3 : index
+    %c4  = constant 4 : index
     %c5  = constant 5 : index
-    %m   = alloc() : memref<index>
-    %n   = alloc() : memref<index>
-    %nnz = alloc() : memref<index>
-    %i   = alloc() : memref<index>
-    %j   = alloc() : memref<index>
-    %d   = alloc() : memref<f64>
 
     //
-    // Read the header of a sparse matrix. This yields the
-    // size (m x n) and number of nonzero elements (nnz).
+    // Setup memrefs to get meta data, indices, and values.
     //
-    %file = call @getMatrix(%c0) : (index) -> (!llvm.ptr<i8>)
-    call @openMatrix(%file, %m, %n, %nnz)
-        : (!llvm.ptr<i8>, memref<index>,
-	                  memref<index>, memref<index>) -> ()
-    %M = load %m[]   : memref<index>
-    %N = load %n[]   : memref<index>
-    %Z = load %nnz[] : memref<index>
+    %idata = alloc(%c4) : memref<?xindex>
+    %ddata = alloc(%c1) : memref<?xf64>
 
     //
-    // At this point, code should prepare a proper sparse storage
-    // scheme for an m x n matrix with nnz nonzero elements. For
-    // simplicity, however, here we simply set up a dense matrix.
+    // Obtain the sparse matrix filename through this test helper.
     //
-    %a = alloc(%M, %N) : memref<?x?xf64>
-    scf.for %ii = %c0 to %M step %c1 {
-      scf.for %jj = %c0 to %N step %c1 {
+    %fileName = call @getTensorFilename(%c0) : (index) -> (!llvm.ptr<i8>)
+
+    //
+    // Read a sparse matrix. The call yields a pointer to an opaque
+    // memory-resident sparse tensor object that is only understood by
+    // other methods in the sparse runtime support library. This call also
+    // provides the rank (always 2 for the Matrix Market), number of
+    // nonzero elements (nnz), and the size (m x n) through a memref array.
+    //
+    %tensor = call @openTensor(%fileName, %idata)
+      : (!llvm.ptr<i8>, memref<?xindex>) -> (!llvm.ptr<i8>)
+    %rank = load %idata[%c0] : memref<?xindex>
+    %nnz  = load %idata[%c1] : memref<?xindex>
+    %m    = load %idata[%c2] : memref<?xindex>
+    %n    = load %idata[%c3] : memref<?xindex>
+
+    //
+    // At this point, code should prepare a proper sparse storage scheme for
+    // an m x n matrix with nnz nonzero elements. For simplicity, here we
+    // simply intialize a dense m x n matrix to all zeroes.
+    //
+    %a = alloc(%m, %n) : memref<?x?xf64>
+    scf.for %ii = %c0 to %m step %c1 {
+      scf.for %jj = %c0 to %n step %c1 {
         store %d0, %a[%ii, %jj] : memref<?x?xf64>
       }
     }
 
     //
-    // Now we are ready to read in the nonzero elements of the
-    // sparse matrix and insert these into a sparse storage
-    // scheme. In this example, we simply insert them in the
-    // dense matrix.
-    //
-    scf.for %k = %c0 to %Z step %c1 {
-      call @readMatrixItem(%i, %j, %d)
-          : (memref<index>, memref<index>, memref<f64>) -> ()
-      %I = load %i[] : memref<index>
-      %J = load %j[] : memref<index>
-      %D = load %d[] : memref<f64>
-      store %D, %a[%I, %J] : memref<?x?xf64>
+    // Now we are ready to read in nnz nonzero elements of the sparse matrix
+    // and insert these into a sparse storage scheme. In this example, we
+    // simply insert them in the dense matrix.
+    //
+    scf.for %k = %c0 to %nnz step %c1 {
+      call @readTensorItem(%tensor, %idata, %ddata)
+        : (!llvm.ptr<i8>, memref<?xindex>, memref<?xf64>) -> ()
+      %i = load %idata[%c0] : memref<?xindex>
+      %j = load %idata[%c1] : memref<?xindex>
+      %d = load %ddata[%c0] : memref<?xf64>
+      store %d, %a[%i, %j] : memref<?x?xf64>
     }
-    call @closeMatrix() : () -> ()
+
+    //
+    // Since at this point we have copied the sparse matrix to our own
+    // storage scheme, make sure to close the matrix to release its
+    // memory resources.
+    //
+    call @closeTensor(%tensor) : (!llvm.ptr<i8>) -> ()
 
     //
     // Verify that the results are as expected.
     //
     %A = vector.transfer_read %a[%c0, %c0], %d0 : memref<?x?xf64>, vector<5x5xf64>
-    vector.print %M : index
-    vector.print %N : index
-    vector.print %Z : index
-    vector.print %A : vector<5x5xf64>
+    vector.print %rank : index
+    vector.print %nnz  : index
+    vector.print %m    : index
+    vector.print %n    : index
+    vector.print %A    : vector<5x5xf64>
     //
+    // CHECK: 2
+    // CHECK: 9
     // CHECK: 5
     // CHECK: 5
-    // CHECK: 9
     //
-    // CHECK: ( ( 1, 0, 0, 1.4, 0 ),
-    // CHECK-SAME: ( 0, 2, 0, 0, 2.5 ),
-    // CHECK-SAME: ( 0, 0, 3, 0, 0 ),
-    // CHECK-SAME: ( 4.1, 0, 0, 4, 0 ),
-    // CHECK-SAME: ( 0, 5.2, 0, 0, 5 ) )
+    // CHECK:      ( ( 1, 0, 0, 1.4, 0 ),
+    // CHECK-SAME:   ( 0, 2, 0, 0, 2.5 ),
+    // CHECK-SAME:   ( 0, 0, 3, 0, 0 ),
+    // CHECK-SAME:   ( 4.1, 0, 0, 4, 0 ),
+    // CHECK-SAME:   ( 0, 5.2, 0, 0, 5 ) )
 
     //
     // Free.
     //
-    dealloc %m   : memref<index>
-    dealloc %n   : memref<index>
-    dealloc %nnz : memref<index>
-    dealloc %i   : memref<index>
-    dealloc %j   : memref<index>
-    dealloc %d   : memref<f64>
-    dealloc %a   : memref<?x?xf64>
+    dealloc %idata : memref<?xindex>
+    dealloc %ddata : memref<?xf64>
+    dealloc %a     : memref<?x?xf64>
 
     return
   }

diff  --git a/mlir/integration_test/data/test.tns b/mlir/integration_test/data/test.tns
new file mode 100644
index 000000000000..e9faab3c3496
--- /dev/null
+++ b/mlir/integration_test/data/test.tns
@@ -0,0 +1,25 @@
+# This is a rank 8 test sparse tensor in FROSTT file format,
+# extended with two meta data lines:
+#   rank nnz
+#   dims (one per rank)
+#
+# see http://frostt.io/tensors/file-formats.html
+#
+8 16
+7 3 3 3 3 3 5 3
+2 2 2 1 2 2 2 1 2.11
+3 3 3 3 3 3 3 3 3.0
+1 1 1 1 1 1 1 3 1.3
+1 1 1 2 1 1 1 3 1.23
+1 1 1 2 1 1 1 2 1.22
+2 2 2 2 2 2 2 2 2.0
+1 1 1 1 1 1 1 1 1.0
+7 1 1 1 1 1 1 1 7.0
+3 3 3 3 1 3 3 3 3.1
+3 3 3 3 1 2 2 3 3.122
+3 3 3 3 1 1 2 3 3.112
+3 3 3 3 1 2 1 3 3.121
+2 2 2 1 2 2 2 2 2.1
+2 1 2 1 2 2 2 1 2.111
+2 1 2 1 2 2 2 3 2.113
+1 1 1 1 1 1 5 1 1.5

diff  --git a/mlir/lib/ExecutionEngine/SparseUtils.cpp b/mlir/lib/ExecutionEngine/SparseUtils.cpp
index 870cf4f8efe2..0d160220639f 100644
--- a/mlir/lib/ExecutionEngine/SparseUtils.cpp
+++ b/mlir/lib/ExecutionEngine/SparseUtils.cpp
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements a light-weight runtime library that is useful for
-// sparse tensor manipulations. The functionality provided in this library
+// This file implements a light-weight runtime support library that is useful
+// for sparse tensor manipulations. The functionality provided in this library
 // is meant to simplify benchmarking, testing, and debugging MLIR code that
 // operates on sparse tensors. The provided functionality is **not** part
 // of core MLIR, however.
@@ -18,32 +18,83 @@
 
 #ifdef MLIR_CRUNNERUTILS_DEFINE_FUNCTIONS
 
+#include <algorithm>
+#include <cassert>
 #include <cctype>
 #include <cinttypes>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
+#include <vector>
 
 //===----------------------------------------------------------------------===//
 //
-// Internal support for reading matrices in the Matrix Market Exchange Format.
-// See https://math.nist.gov/MatrixMarket for details on this format.
+// Internal support for reading sparse tensors in one of the following
+// external file formats:
+//
+// (1) Matrix Market Exchange (MME): *.mtx
+//     https://math.nist.gov/MatrixMarket/formats.html
+//
+// (2) Formidable Repository of Open Sparse Tensors and Tools (FROSTT): *.tns
+//     http://frostt.io/tensors/file-formats.html
 //
 //===----------------------------------------------------------------------===//
 
-// Helper to convert string to lower case.
+namespace {
+
+/// A sparse tensor element in coordinate scheme (value and indices).
+/// For example, a rank-1 vector element would look like
+///   ({i}, a[i])
+/// and a rank-5 tensor element like
+///   ({i,j,k,l,m}, a[i,j,k,l,m])
+struct Element {
+  Element(const std::vector<int64_t> &ind, double val)
+      : indices(ind), value(val){};
+  std::vector<int64_t> indices;
+  double value;
+};
+
+/// A memory-resident sparse tensor in coordinate scheme (collection of
+/// elements). This data structure is used to read a sparse tensor from
+/// external file format into memory and sort the elements lexicographically
+/// by indices before passing it back to the client (most packed storage
+/// formats require the elements to appear in lexicographic index order).
+struct SparseTensor {
+public:
+  SparseTensor(int64_t capacity) : pos(0) { elements.reserve(capacity); }
+  // Add element as indices and value.
+  void add(const std::vector<int64_t> &ind, double val) {
+    elements.emplace_back(Element(ind, val));
+  }
+  // Sort elements lexicographically by index.
+  void sort() { std::sort(elements.begin(), elements.end(), lexOrder); }
+  // Primitive one-time iteration.
+  const Element &next() { return elements[pos++]; }
+
+private:
+  // Returns true if indices of e1 < indices of e2.
+  static bool lexOrder(const Element &e1, const Element &e2) {
+    assert(e1.indices.size() == e2.indices.size());
+    for (int64_t r = 0, rank = e1.indices.size(); r < rank; r++) {
+      if (e1.indices[r] == e2.indices[r])
+        continue;
+      return e1.indices[r] < e2.indices[r];
+    }
+    return false;
+  }
+  std::vector<Element> elements;
+  uint64_t pos;
+};
+
+/// Helper to convert string to lower case.
 static char *toLower(char *token) {
   for (char *c = token; *c; c++)
     *c = tolower(*c);
   return token;
 }
 
-// Read the header of a general sparse matrix of type real.
-//
-// TODO: support other formats as well?
-//
-static void readHeader(FILE *file, char *name, uint64_t *m, uint64_t *n,
-                       uint64_t *nnz) {
+/// Read the MME header of a general sparse matrix of type real.
+static void readMMEHeader(FILE *file, char *name, uint64_t *idata) {
   char line[1025];
   char header[64];
   char object[64];
@@ -75,117 +126,166 @@ static void readHeader(FILE *file, char *name, uint64_t *m, uint64_t *n,
       break;
   }
   // Next line contains M N NNZ.
-  if (sscanf(line, "%" PRIu64 "%" PRIu64 "%" PRIu64, m, n, nnz) != 3) {
+  idata[0] = 2; // rank
+  if (sscanf(line, "%" PRIu64 "%" PRIu64 "%" PRIu64 "\n", idata + 2, idata + 3,
+             idata + 1) != 3) {
     fprintf(stderr, "Cannot find size in %s\n", name);
     exit(1);
   }
 }
 
-// Read next data item.
-static void readItem(FILE *file, char *name, uint64_t *i, uint64_t *j,
-                     double *d) {
-  if (fscanf(file, "%" PRIu64 " %" PRIu64 " %lg\n", i, j, d) != 3) {
-    fprintf(stderr, "Cannot find next data item in %s\n", name);
+/// Read the "extended" FROSTT header. Although not part of the documented
+/// format, we assume that the file starts with optional comments followed
+/// by two lines that define the rank, the number of nonzeros, and the
+/// dimensions sizes (one per rank) of the sparse tensor.
+static void readExtFROSTTHeader(FILE *file, char *name, uint64_t *idata) {
+  char line[1025];
+  // Skip comments.
+  while (1) {
+    if (!fgets(line, 1025, file)) {
+      fprintf(stderr, "Cannot find data in %s\n", name);
+      exit(1);
+    }
+    if (line[0] != '#')
+      break;
+  }
+  // Next line contains RANK and NNZ.
+  if (sscanf(line, "%" PRIu64 "%" PRIu64 "\n", idata, idata + 1) != 2) {
+    fprintf(stderr, "Cannot find metadata in %s\n", name);
     exit(1);
   }
-  // Translate 1-based to 0-based.
-  *i = *i - 1;
-  *j = *j - 1;
+  // Followed by a line with the dimension sizes (one per rank).
+  for (uint64_t r = 0; r < idata[0]; r++) {
+    if (fscanf(file, "%" PRIu64, idata + 2 + r) != 1) {
+      fprintf(stderr, "Cannot find dimension size %s\n", name);
+      exit(1);
+    }
+  }
 }
 
+} // anonymous namespace
+
 //===----------------------------------------------------------------------===//
 //
-// Public API of the sparse runtime library.
+// Public API of the sparse runtime support library that enables MLIR code
+// to read a sparse tensor from an external format (MME for FROSTT).
 //
-// Enables MLIR code to read a matrix in Matrix Market Exchange Format
-// as follows:
+// For example, a sparse matrix in MME can be read as follows.
 //
-//   call @openMatrix("A.mtx", %m, %n, %nnz) : (!llvm.ptr<i8>,
-//                                              memref<index>,
-//                                              memref<index>,
-//                                              memref<index>) -> ()
-//   .... prepare reading in m x n matrix A with nnz nonzero elements ....
-//   %u = load %nnz[] : memref<index>
-//   scf.for %k = %c0 to %u step %c1 {
-//     call @readMatrixItem(%i, %j, %d) : (memref<index>,
-//                                         memref<index>, memref<f64>) -> ()
-//     .... process next nonzero element A[i][j] = d ....
+//   %tensor = call @openTensor(%fileName, %idata)
+//     : (!llvm.ptr<i8>, memref<?xindex>) -> (!llvm.ptr<i8>)
+//   %rank = load %idata[%c0] : memref<?xindex>    # always 2 for MME
+//   %nnz  = load %idata[%c1] : memref<?xindex>
+//   %m    = load %idata[%c2] : memref<?xindex>
+//   %n    = load %idata[%c3] : memref<?xindex>
+//   .. prepare reading in m x n sparse tensor A with nnz nonzero elements ..
+//   scf.for %k = %c0 to %nnz step %c1 {
+//     call @readTensorItem(%tensor, %idata, %ddata)
+//       : (!llvm.ptr<i8>, memref<?xindex>, memref<?xf64>) -> ()
+//     %i = load %idata[%c0] : memref<?xindex>
+//     %j = load %idata[%c1] : memref<?xindex>
+//     %d = load %ddata[%c0] : memref<?xf64>
+//     .. process next nonzero element A[i][j] = d
+//        where the elements appear in lexicographic order ..
 //   }
-//   call @closeMatrix() : () -> ()
+//   call @closeTensor(%tensor) : (!llvm.ptr<i8>) -> ()
 //
-// The implementation is *not* thread-safe. Also, only *one* matrix file can
-// be open at the time. A matrix file must be closed before reading in a next.
 //
 // Note that input parameters in the "MLIRized" version of a function mimic
-// the data layout of a MemRef<T>:
+// the data layout of a MemRef<?xT>:
 //
 //   struct MemRef {
 //     T *base;
 //     T *data;
 //     int64_t off;
+//     int64_t sizes[1];
+//     int64_t strides[1];
 //   }
 //
 //===----------------------------------------------------------------------===//
 
-// Currently open matrix. This is *not* thread-safe or re-entrant.
-static FILE *sparseFile = nullptr;
-static char *sparseFilename = nullptr;
-
-extern "C" void openMatrixC(char *filename, uint64_t *mdata, uint64_t *ndata,
-                            uint64_t *nnzdata) {
-  if (sparseFile != nullptr) {
-    fprintf(stderr, "Other file still open %s vs. %s\n", sparseFilename,
-            filename);
+/// Reads in a sparse tensor with the given filename. The call yields a
+/// pointer to an opaque memory-resident sparse tensor object that is only
+/// understood by other methods in the sparse runtime support library. An
+/// array parameter is used to pass the rank, the number of nonzero elements,
+/// and the dimension sizes (one per rank).
+extern "C" void *openTensorC(char *filename, uint64_t *idata) {
+  // Open the file.
+  FILE *file = fopen(filename, "r");
+  if (!file) {
+    fprintf(stderr, "Cannot find %s\n", filename);
     exit(1);
   }
-  sparseFile = fopen(filename, "r");
-  if (!sparseFile) {
-    fprintf(stderr, "Cannot find %s\n", filename);
+  // Perform some file format dependent set up.
+  if (strstr(filename, ".mtx")) {
+    readMMEHeader(file, filename, idata);
+  } else if (strstr(filename, ".tns")) {
+    readExtFROSTTHeader(file, filename, idata);
+  } else {
+    fprintf(stderr, "Unknown format %s\n", filename);
     exit(1);
   }
-  sparseFilename = filename;
-  readHeader(sparseFile, filename, mdata, ndata, nnzdata);
+  // Read all nonzero elements.
+  uint64_t rank = idata[0];
+  uint64_t nnz = idata[1];
+  SparseTensor *tensor = new SparseTensor(nnz);
+  std::vector<int64_t> indices(rank);
+  double value;
+  for (uint64_t k = 0; k < nnz; k++) {
+    for (uint64_t r = 0; r < rank; r++) {
+      if (fscanf(file, "%" PRIu64, &indices[r]) != 1) {
+        fprintf(stderr, "Cannot find next index in %s\n", filename);
+        exit(1);
+      }
+      indices[r]--; // 0-based index
+    }
+    if (fscanf(file, "%lg\n", &value) != 1) {
+      fprintf(stderr, "Cannot find next value in %s\n", filename);
+      exit(1);
+    }
+    tensor->add(indices, value);
+  }
+  // Close the file and return sorted tensor.
+  fclose(file);
+  tensor->sort(); // sort lexicographically
+  return tensor;
 }
 
-// "MLIRized" version.
-extern "C" void openMatrix(char *filename, uint64_t *mbase, uint64_t *mdata,
-                           int64_t moff, uint64_t *nbase, uint64_t *ndata,
-                           int64_t noff, uint64_t *nnzbase, uint64_t *nnzdata,
-                           int64_t nnzoff) {
-  openMatrixC(filename, mdata, ndata, nnzdata);
+/// "MLIRized" version.
+extern "C" void *openTensor(char *filename, uint64_t *ibase, uint64_t *idata,
+                            uint64_t ioff, uint64_t isize, uint64_t istride) {
+  assert(istride == 1);
+  return openTensorC(filename, idata + ioff);
 }
 
-extern "C" void readMatrixItemC(uint64_t *idata, uint64_t *jdata,
-                                double *ddata) {
-  if (sparseFile == nullptr) {
-    fprintf(stderr, "Cannot read item from unopened matrix\n");
-    exit(1);
-  }
-  readItem(sparseFile, sparseFilename, idata, jdata, ddata);
+/// Yields the next element from the given opaque sparse tensor object.
+extern "C" void readTensorItemC(void *tensor, uint64_t *idata, double *ddata) {
+  const Element &e = static_cast<SparseTensor *>(tensor)->next();
+  for (uint64_t r = 0, rank = e.indices.size(); r < rank; r++)
+    idata[r] = e.indices[r];
+  ddata[0] = e.value;
 }
 
-// "MLIRized" version.
-extern "C" void readMatrixItem(uint64_t *ibase, uint64_t *idata, int64_t ioff,
-                               uint64_t *jbase, uint64_t *jdata, int64_t joff,
-                               double *dbase, double *ddata, int64_t doff) {
-  readMatrixItemC(idata, jdata, ddata);
+/// "MLIRized" version.
+extern "C" void readTensorItem(void *tensor, uint64_t *ibase, uint64_t *idata,
+                               uint64_t ioff, uint64_t isize, uint64_t istride,
+                               double *dbase, double *ddata, uint64_t doff,
+                               uint64_t dsize, uint64_t dstride) {
+  assert(istride == 1 && dstride == 1);
+  readTensorItemC(tensor, idata + ioff, ddata + doff);
 }
 
-extern "C" void closeMatrix() {
-  if (sparseFile == nullptr) {
-    fprintf(stderr, "Cannot close unopened matrix\n");
-    exit(1);
-  }
-  fclose(sparseFile);
-  sparseFile = nullptr;
-  sparseFilename = nullptr;
+/// Closes the given opaque sparse tensor object, releasing its memory
+/// resources. After this call, the opague object cannot be used anymore.
+extern "C" void closeTensor(void *tensor) {
+  delete static_cast<SparseTensor *>(tensor);
 }
 
-// Helper method to read matrix filenames from the environment, defined
-// with the naming convention ${MATRIX0}, ${MATRIX1}, etc.
-extern "C" char *getMatrix(uint64_t id) {
+/// Helper method to read a sparse tensor filename from the environment,
+/// defined with the naming convention ${TENSOR0}, ${TENSOR1}, etc.
+extern "C" char *getTensorFilename(uint64_t id) {
   char var[80];
-  sprintf(var, "MATRIX%" PRIu64, id);
+  sprintf(var, "TENSOR%" PRIu64, id);
   char *env = getenv(var);
   return env;
 }


        


More information about the llvm-branch-commits mailing list