[Mlir-commits] [mlir] 5b1c5fc - [mlir][sparse] Add complex number reading from files.

Wed Jun 8 13:33:40 PDT 2022

Author: bixia1
Date: 2022-06-08T13:33:35-07:00
New Revision: 5b1c5fc53adc3a9b6ebb966634277958c7a5bac4

URL: https://github.com/llvm/llvm-project/commit/5b1c5fc53adc3a9b6ebb966634277958c7a5bac4
DIFF: https://github.com/llvm/llvm-project/commit/5b1c5fc53adc3a9b6ebb966634277958c7a5bac4.diff

LOG: [mlir][sparse] Add complex number reading from files.

Support complex numbers for Matrix Market Exchange Formats. Add a test case.

Reviewed By: aartbik

Differential Revision: https://reviews.llvm.org/D127138

Added: 
    mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_c32.mlir
    mlir/test/Integration/data/test_symmetric_complex.mtx

Modified: 
    mlir/lib/ExecutionEngine/SparseTensorUtils.cpp
    mlir/test/CMakeLists.txt
    mlir/test/Integration/data/wide.mtx

Removed: 
    


################################################################################
diff  --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp
index 2ec72b0e3172..a8ade3cba94e 100644

--- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp
@@ -1105,6 +1105,15 @@ static char *toLower(char *token) {
 /// as well as providing the buffers and methods for parsing those headers.
 class SparseTensorFile final {
 public:
+  enum class ValueKind {
+    kInvalid = 0,
+    kPattern = 1,
+    kReal = 2,
+    kInteger = 3,
+    kComplex = 4,
+    kUndefined = 5
+  };
+
   explicit SparseTensorFile(char *filename) : filename(filename) {
     assert(filename && "Received nullptr for filename");
   }
@@ -1158,32 +1167,36 @@ class SparseTensorFile final {
       readExtFROSTTHeader();
     else
       FATAL("Unknown format %s\n", filename);
-    assert(isValid && "Failed to read the header");
+    assert(isValid() && "Failed to read the header");
   }
 
+  ValueKind getValueKind() const { return valueKind_; }
+
+  bool isValid() const { return valueKind_ != ValueKind::kUndefined; }
+
   /// Gets the MME "pattern" property setting.  Is only valid after
   /// parsing the header.
   bool isPattern() const {
-    assert(isValid && "Attempt to isPattern() before readHeader()");
-    return isPattern_;
+    assert(isValid() && "Attempt to isPattern() before readHeader()");
+    return valueKind_ == ValueKind::kPattern;
   }
 
   /// Gets the MME "symmetric" property setting.  Is only valid after
   /// parsing the header.
   bool isSymmetric() const {
-    assert(isValid && "Attempt to isSymmetric() before readHeader()");
+    assert(isValid() && "Attempt to isSymmetric() before readHeader()");
     return isSymmetric_;
   }
 
   /// Gets the rank of the tensor.  Is only valid after parsing the header.
   uint64_t getRank() const {
-    assert(isValid && "Attempt to getRank() before readHeader()");
+    assert(isValid() && "Attempt to getRank() before readHeader()");
     return idata[0];
   }
 
   /// Gets the number of non-zeros.  Is only valid after parsing the header.
   uint64_t getNNZ() const {
-    assert(isValid && "Attempt to getNNZ() before readHeader()");
+    assert(isValid() && "Attempt to getNNZ() before readHeader()");
     return idata[1];
   }
 
@@ -1214,8 +1227,7 @@ class SparseTensorFile final {
 
   const char *filename;
   FILE *file = nullptr;
-  bool isValid = false;
-  bool isPattern_ = false;
+  ValueKind valueKind_ = ValueKind::kInvalid;
   bool isSymmetric_ = false;
   uint64_t idata[512];
   char line[kColWidth];
@@ -1232,14 +1244,24 @@ void SparseTensorFile::readMMEHeader() {
   if (fscanf(file, "%63s %63s %63s %63s %63s\n", header, object, format, field,
              symmetry) != 5)
     FATAL("Corrupt header in %s\n", filename);
-  // Set properties
-  isPattern_ = (strcmp(toLower(field), "pattern") == 0);
+  // Process `field`, which specify pattern or the data type of the values.
+  if (strcmp(toLower(field), "pattern") == 0)
+    valueKind_ = ValueKind::kPattern;
+  else if (strcmp(toLower(field), "real") == 0)
+    valueKind_ = ValueKind::kReal;
+  else if (strcmp(toLower(field), "integer") == 0)
+    valueKind_ = ValueKind::kInteger;
+  else if (strcmp(toLower(field), "complex") == 0)
+    valueKind_ = ValueKind::kComplex;
+  else
+    FATAL("Unexpected header field value in %s\n", filename);
+
+  // Set properties.
   isSymmetric_ = (strcmp(toLower(symmetry), "symmetric") == 0);
   // Make sure this is a general sparse matrix.
   if (strcmp(toLower(header), "%%matrixmarket") ||
       strcmp(toLower(object), "matrix") ||
       strcmp(toLower(format), "coordinate") ||
-      (strcmp(toLower(field), "real") && !isPattern_) ||
       (strcmp(toLower(symmetry), "general") && !isSymmetric_))
     FATAL("Cannot find a general sparse matrix in %s\n", filename);
   // Skip comments.
@@ -1253,7 +1275,6 @@ void SparseTensorFile::readMMEHeader() {
   if (sscanf(line, "%" PRIu64 "%" PRIu64 "%" PRIu64 "\n", idata + 2, idata + 3,
              idata + 1) != 3)
     FATAL("Cannot find size in %s\n", filename);
-  isValid = true;
 }
 
 /// Read the "extended" FROSTT header. Although not part of the documented
@@ -1275,18 +1296,78 @@ void SparseTensorFile::readExtFROSTTHeader() {
     if (fscanf(file, "%" PRIu64, idata + 2 + r) != 1)
       FATAL("Cannot find dimension size %s\n", filename);
   readLine(); // end of line
-  isValid = true;
+  // The FROSTT format does not define the data type of the nonzero elements.
+  valueKind_ = ValueKind::kUndefined;
+}
+
+// Adds a value to a tensor in coordinate scheme. If is_symmetric_value is true,
+// also adds the value to its symmetric location.
+template <typename T, typename V>
+static inline void addValue(T *coo, V value,
+                            const std::vector<uint64_t> indices,
+                            bool is_symmetric_value) {
+  // TODO: <https://github.com/llvm/llvm-project/issues/54179>
+  coo->add(indices, value);
+  // We currently chose to deal with symmetric matrices by fully constructing
+  // them. In the future, we may want to make symmetry implicit for storage
+  // reasons.
+  if (is_symmetric_value)
+    coo->add({indices[1], indices[0]}, value);
+}
+
+// Reads an element of a complex type for the current indices in coordinate
+// scheme.
+template <typename V>
+static inline void readCOOValue(SparseTensorCOO<std::complex<V>> *coo,
+                                const std::vector<uint64_t> indices,
+                                char **linePtr, bool is_pattern,
+                                bool add_symmetric_value) {
+  // Read two values to make a complex. The external formats always store
+  // numerical values with the type double, but we cast these values to the
+  // sparse tensor object type. For a pattern tensor, we arbitrarily pick the
+  // value 1 for all entries.
+  V re = is_pattern ? 1.0 : strtod(*linePtr, linePtr);
+  V im = is_pattern ? 1.0 : strtod(*linePtr, linePtr);
+  std::complex<V> value = {re, im};
+  addValue(coo, value, indices, add_symmetric_value);
+}
+
+// Reads an element of a non-complex type for the current indices in coordinate
+// scheme.
+template <typename V,
+          typename std::enable_if<
+              !std::is_same<std::complex<float>, V>::value &&
+              !std::is_same<std::complex<double>, V>::value>::type * = nullptr>
+static void inline readCOOValue(SparseTensorCOO<V> *coo,
+                                const std::vector<uint64_t> indices,
+                                char **linePtr, bool is_pattern,
+                                bool is_symmetric_value) {
+  // The external formats always store these numerical values with the type
+  // double, but we cast these values to the sparse tensor object type.
+  // For a pattern tensor, we arbitrarily pick the value 1 for all entries.
+  double value = is_pattern ? 1.0 : strtod(*linePtr, linePtr);
+  addValue(coo, value, indices, is_symmetric_value);
 }
 
 /// Reads a sparse tensor with the given filename into a memory-resident
 /// sparse tensor in coordinate scheme.
 template <typename V>
-static SparseTensorCOO<V> *openSparseTensorCOO(char *filename, uint64_t rank,
-                                               const uint64_t *shape,
-                                               const uint64_t *perm) {
+static SparseTensorCOO<V> *
+openSparseTensorCOO(char *filename, uint64_t rank, const uint64_t *shape,
+                    const uint64_t *perm, PrimaryType valTp) {
   SparseTensorFile stfile(filename);
   stfile.openFile();
   stfile.readHeader();
+  // Check tensor element type against the value type in the input file.
+  SparseTensorFile::ValueKind valueKind = stfile.getValueKind();
+  bool tensorIsInteger =
+      (valTp >= PrimaryType::kI64 && valTp <= PrimaryType::kI8);
+  bool tensorIsReal = (valTp >= PrimaryType::kF64 && valTp <= PrimaryType::kI8);
+  if ((valueKind == SparseTensorFile::ValueKind::kReal && tensorIsInteger) ||
+      (valueKind == SparseTensorFile::ValueKind::kComplex && tensorIsReal)) {
+    FATAL("Tensor element type %d not compatible with values in file %s\n",
+          valTp, filename);
+  }
   stfile.assertMatchesShape(rank, shape);
   // Prepare sparse tensor object with per-dimension sizes
   // and the number of nonzeros as initial capacity.
@@ -1302,17 +1383,8 @@ static SparseTensorCOO<V> *openSparseTensorCOO(char *filename, uint64_t rank,
       // Add 0-based index.
       indices[perm[r]] = idx - 1;
     }
-    // The external formats always store the numerical values with the type
-    // double, but we cast these values to the sparse tensor object type.
-    // For a pattern tensor, we arbitrarily pick the value 1 for all entries.
-    double value = stfile.isPattern() ? 1.0 : strtod(linePtr, &linePtr);
-    // TODO: <https://github.com/llvm/llvm-project/issues/54179>
-    coo->add(indices, value);
-    // We currently chose to deal with symmetric matrices by fully constructing
-    // them. In the future, we may want to make symmetry implicit for storage
-    // reasons.
-    if (stfile.isSymmetric() && indices[0] != indices[1])
-      coo->add({indices[1], indices[0]}, value);
+    readCOOValue(coo, indices, &linePtr, stfile.isPattern(),
+                 stfile.isSymmetric() && indices[0] != indices[1]);
   }
   // Close the file and return tensor.
   stfile.closeFile();
@@ -1441,7 +1513,7 @@ extern "C" {
     if (action <= Action::kFromCOO) {                                          \
       if (action == Action::kFromFile) {                                       \
         char *filename = static_cast<char *>(ptr);                             \
-        coo = openSparseTensorCOO<V>(filename, rank, shape, perm);             \
+        coo = openSparseTensorCOO<V>(filename, rank, shape, perm, v);          \
       } else if (action == Action::kFromCOO) {                                 \
         coo = static_cast<SparseTensorCOO<V> *>(ptr);                          \
       } else {                                                                 \

diff  --git a/mlir/test/CMakeLists.txt b/mlir/test/CMakeLists.txt
index b43fa5c6975e..c203f2217ec5 100644
--- a/mlir/test/CMakeLists.txt
+++ b/mlir/test/CMakeLists.txt
@@ -48,6 +48,7 @@ if (MLIR_INCLUDE_INTEGRATION_TESTS)
   file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/Integration/data/mttkrp_b.tns
             ${CMAKE_CURRENT_SOURCE_DIR}/Integration/data/test.mtx
             ${CMAKE_CURRENT_SOURCE_DIR}/Integration/data/test_symmetric.mtx
+            ${CMAKE_CURRENT_SOURCE_DIR}/Integration/data/test_symmetric_complex.mtx
             ${CMAKE_CURRENT_SOURCE_DIR}/Integration/data/test.tns
             ${CMAKE_CURRENT_SOURCE_DIR}/Integration/data/wide.mtx
           DESTINATION ${MLIR_INTEGRATION_TEST_DIR}/data/)

diff  --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_c32.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_c32.mlir
new file mode 100644
index 000000000000..fe460f5b535b
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_c32.mlir
@@ -0,0 +1,86 @@
+// RUN: mlir-opt %s --sparse-compiler | \
+// RUN: TENSOR0="%mlir_integration_test_dir/data/test_symmetric_complex.mtx" \
+// RUN: mlir-cpu-runner \
+// RUN:  -e entry -entry-point-result=void  \
+// RUN:  -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
+// RUN: FileCheck %s
+
+!Filename = !llvm.ptr<i8>
+
+#SparseMatrix = #sparse_tensor.encoding<{
+  dimLevelType = [ "compressed", "compressed" ]
+}>
+
+#trait_sum_reduce = {
+  indexing_maps = [
+    affine_map<(i,j) -> (i,j)>, // A
+    affine_map<(i,j) -> ()>     // x (out)
+  ],
+  iterator_types = ["reduction", "reduction"],
+  doc = "x += A(i,j)"
+}
+
+//
+// Integration test that lowers a kernel annotated as sparse to
+// actual sparse code, initializes a matching sparse storage scheme
+// from file, and runs the resulting code with the JIT compiler.
+//
+module {
+  //
+  // A kernel that sum-reduces a matrix to a single scalar.
+  //
+  func.func @kernel_sum_reduce(%arga: tensor<?x?xcomplex<f64>, #SparseMatrix>,
+                          %argx: tensor<complex<f64>> {linalg.inplaceable = true}) -> tensor<complex<f64>> {
+    %0 = linalg.generic #trait_sum_reduce
+      ins(%arga: tensor<?x?xcomplex<f64>, #SparseMatrix>)
+      outs(%argx: tensor<complex<f64>>) {
+      ^bb(%a: complex<f64>, %x: complex<f64>):
+        %0 = complex.add %x, %a : complex<f64>
+        linalg.yield %0 : complex<f64>
+    } -> tensor<complex<f64>>
+    return %0 : tensor<complex<f64>>
+  }
+
+  func.func private @getTensorFilename(index) -> (!Filename)
+
+  //
+  // Main driver that reads matrix from file and calls the sparse kernel.
+  //
+  func.func @entry() {
+    //%d0 = arith.constant 0.0 : complex<f64>
+    %d0 = complex.constant [0.0 : f64, 0.0 : f64] : complex<f64>
+    %c0 = arith.constant 0 : index
+
+    // Setup memory for a single reduction scalar,
+    // initialized to zero.
+    %xdata = memref.alloc() : memref<complex<f64>>
+    memref.store %d0, %xdata[] : memref<complex<f64>>
+    %x = bufferization.to_tensor %xdata : memref<complex<f64>>
+
+    // Read the sparse matrix from file, construct sparse storage.
+    %fileName = call @getTensorFilename(%c0) : (index) -> (!Filename)
+    %a = sparse_tensor.new %fileName : !Filename to tensor<?x?xcomplex<f64>, #SparseMatrix>
+
+    // Call the kernel.
+    %0 = call @kernel_sum_reduce(%a, %x)
+      : (tensor<?x?xcomplex<f64>, #SparseMatrix>, tensor<complex<f64>>) -> tensor<complex<f64>>
+
+    // Print the result for verification.
+    //
+    // CHECK: 30.2
+    // CHECK-NEXT: 22.2
+    //
+    %m = bufferization.to_memref %0 : memref<complex<f64>>
+    %v = memref.load %m[] : memref<complex<f64>>
+    %real = complex.re %v : complex<f64>
+    %imag = complex.im %v : complex<f64>
+    vector.print %real : f64
+    vector.print %imag : f64
+
+    // Release the resources.
+    memref.dealloc %xdata : memref<complex<f64>>
+    sparse_tensor.release %a : tensor<?x?xcomplex<f64>, #SparseMatrix>
+
+    return
+  }
+}

diff  --git a/mlir/test/Integration/data/test_symmetric_complex.mtx b/mlir/test/Integration/data/test_symmetric_complex.mtx
new file mode 100644
index 000000000000..5e5d0702136b
--- /dev/null
+++ b/mlir/test/Integration/data/test_symmetric_complex.mtx
@@ -0,0 +1,13 @@
+%%MatrixMarket matrix coordinate complex symmetric
+%
+% This is a test sparse matrix in Matrix Market Exchange Format.
+% see https://math.nist.gov/MatrixMarket
+%
+5 5 7
+1 1 5.0 1.0
+1 3 4.1 2.1
+2 2 3.0 3.0
+2 4 2.0 4.0
+3 3 1.0 3.0
+4 4 4.0 2.0
+5 5 5.0 1.0

diff  --git a/mlir/test/Integration/data/wide.mtx b/mlir/test/Integration/data/wide.mtx
index 9e0d5f2a1132..e92457d92b5f 100644
--- a/mlir/test/Integration/data/wide.mtx
+++ b/mlir/test/Integration/data/wide.mtx
@@ -1,4 +1,4 @@
-%%MatrixMarket matrix coordinate real general
+%%MatrixMarket matrix coordinate integer general
 %
 % This is a test sparse matrix in Matrix Market Exchange Format.
 % see https://math.nist.gov/MatrixMarket