[Mlir-commits] [mlir] ad932a7 - [mlir][sparse][taco] Support true dense tensors and all dense sparse tensors.

Mon Feb 14 15:35:05 PST 2022

Author: Bixia Zheng
Date: 2022-02-14T15:35:01-08:00
New Revision: ad932a75f9ae58afbf5d43521d18c9a5a12876ff

URL: https://github.com/llvm/llvm-project/commit/ad932a75f9ae58afbf5d43521d18c9a5a12876ff
DIFF: https://github.com/llvm/llvm-project/commit/ad932a75f9ae58afbf5d43521d18c9a5a12876ff.diff

LOG: [mlir][sparse][taco] Support true dense tensors and all dense sparse tensors.

The only method to create a true dense tensor (i.e un-annotated) in MLIR-PyTACO
is through the from_array method. However, the annotated all dense tensors are
also implemented as true dense tensor currently. The PR fixes the
implementation to support annotated all dense sparse tensors.

Extend the tensor init method to support the construction of a tensor without
any sparsity annotation.

Change the tensor to_file method to only support writing unpacked sparse
tensors to file through the MLIR sparse tensor dialect.

Add unit tests for true dense tensors and all dense sparse tensors.

Reviewed By: aartbik

Differential Revision: https://reviews.llvm.org/D119500

Added: 
    mlir/test/Integration/Dialect/SparseTensor/taco/test_true_dense_tensor_algebra.py
    mlir/test/Integration/Dialect/SparseTensor/taco/unit_test_tensor_core.py

Modified: 
    mlir/test/Integration/Dialect/SparseTensor/taco/test_simple_tensor_algebra.py
    mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py
    mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py
    mlir/test/Integration/Dialect/SparseTensor/taco/tools/testing_utils.py
    mlir/test/Integration/Dialect/SparseTensor/taco/unit_test_tensor_io.py

Removed: 
    


################################################################################
diff  --git a/mlir/test/Integration/Dialect/SparseTensor/taco/test_simple_tensor_algebra.py b/mlir/test/Integration/Dialect/SparseTensor/taco/test_simple_tensor_algebra.py
index 021519028496..68ae7efd0319 100644

--- a/mlir/test/Integration/Dialect/SparseTensor/taco/test_simple_tensor_algebra.py
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/test_simple_tensor_algebra.py
@@ -1,5 +1,6 @@
 # RUN: SUPPORTLIB=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext %PYTHON %s | FileCheck %s
 
+import numpy as np
 import os
 import sys
 
@@ -17,7 +18,7 @@
 A = pt.tensor([2, 3])
 B = pt.tensor([2, 3])
 C = pt.tensor([2, 3])
-D = pt.tensor([2, 3], dense)
+D = pt.tensor([2, 3], compressed)
 A.insert([0, 1], 10)
 A.insert([1, 2], 40)
 B.insert([0, 0], 20)
@@ -26,5 +27,9 @@
 C.insert([1, 2], 7)
 D[i, j] = A[i, j] + B[i, j] - C[i, j]
 
-# CHECK: [20. 5. 0. 0. 0. 63.]
-print(D.to_array().reshape(6))
+indices, values = D.get_coordinates_and_values()
+passed = np.allclose(indices, [[0, 0], [0, 1], [1, 2]])
+passed += np.allclose(values, [20.0, 5.0, 63.0])
+
+# CHECK: Number of passed: 2
+print("Number of passed:", passed)

diff  --git a/mlir/test/Integration/Dialect/SparseTensor/taco/test_true_dense_tensor_algebra.py b/mlir/test/Integration/Dialect/SparseTensor/taco/test_true_dense_tensor_algebra.py
new file mode 100644
index 000000000000..6055e7947102
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/test_true_dense_tensor_algebra.py
@@ -0,0 +1,22 @@
+# RUN: SUPPORTLIB=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext %PYTHON %s | FileCheck %s
+
+import numpy as np
+import os
+import sys
+
+_SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(_SCRIPT_PATH)
+from tools import mlir_pytaco_api as pt
+
+i, j = pt.get_index_vars(2)
+# Both tensors are true dense tensors.
+A = pt.from_array(np.full([2,3], 1, dtype=np.float64))
+B = pt.from_array(np.full([2,3], 2, dtype=np.float64))
+# Define the result tensor as a true dense tensor. The parameter is_dense=True
+# is an MLIR-PyTACO extension.
+C = pt.tensor([2, 3], is_dense=True)
+
+C[i, j] = A[i, j] + B[i, j]
+
+# CHECK: [3. 3. 3. 3. 3. 3.]
+print(C.to_array().reshape(6))

diff  --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py
index 2d3b23e5ed86..9d32b2c6accb 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py
@@ -298,19 +298,12 @@ def __post_init__(self) -> None:
                        f"len({self.format_pack}) != "
                        f"len({self.ordering})")
 
-  def is_dense(self) -> bool:
-    """Returns true if all the Tensor dimensions have a dense format."""
-    return all([f == ModeFormat.DENSE for f in self.format_pack.formats])
-
   def rank(self) -> int:
     """Returns the number of dimensions represented by the format."""
     return self.format_pack.rank()
 
   def mlir_tensor_attr(self) -> Optional[sparse_tensor.EncodingAttr]:
     """Constructs the MLIR attributes for the tensor format."""
-    if self.is_dense():
-      return None
-
     order = (
         range(self.rank()) if
         (self.ordering is None) else self.ordering.ordering)
@@ -467,22 +460,22 @@ class _StructOpInfo:
       op.
     dst_dtype: A DType representing the data type of the structured op result.
     dst_name: A string representing the name of the structured op result.
-    dst_format: A Format object representing the destination tensor format.
+    dst_format: An optional Format object representing the destination tensor
+      format. None represents a true dense tensor.
   """
   dst_indices: Tuple[IndexVar, ...]
   dst_dims: Tuple[int, ...]
   dst_dtype: DType
   dst_name: str
-  dst_format: Format
+  dst_format: Optional[Format]
 
   def __post_init__(self) -> None:
     """Verifies the integrity of the attribute values."""
     assert len(self.dst_indices) == len(self.dst_dims)
-    assert self.dst_format is not None
 
   def emit_tensor_init(self) -> ir.RankedTensorType:
     """Returns an initialization for the destination tensor."""
-    if self.dst_format.is_dense():
+    if self.dst_format is None:
       # Initialize the dense tensor.
       ir_type = _mlir_type_from_taco_type(self.dst_dtype)
       tensor = linalg.InitTensorOp(self.dst_dims, ir_type).result
@@ -613,7 +606,8 @@ def __init__(self,
                fmt: Optional[Union[ModeFormat, List[ModeFormat],
                                    Format]] = None,
                dtype: Optional[DType] = None,
-               name: Optional[str] = None):
+               name: Optional[str] = None,
+               is_dense: bool = False):
     """The tensor constructor interface defined by TACO API.
 
     Args:
@@ -630,25 +624,35 @@ def __init__(self,
       dtype: An object of dtype, representing the data type of the tensor.
       name: A string name of the tensor. If a name is not given, creates a
         unique name for the tensor.
+      is_dense: A boolean variable to indicate whether the tensor is a dense
+        tensor without any sparsity annotation.
 
     Raises:
       ValueError: If there is any inconsistency among the input arguments.
     """
-    # Take care of the argument default values.
-    fmt = fmt or ModeFormat.COMPRESSED
+    # Take care of the argument default values common to both sparse tensors
+    # and dense tensors.
     dtype = dtype or DType(Type.FLOAT64)
     self._name = name or self._get_unique_name()
-
-    self._dtype = dtype
     self._assignment = None
+    self._sparse_value_location = _SparseValueInfo._UNPACKED
+    self._dense_storage = None
+    self._dtype = dtype
+
+    if is_dense:
+      assert (fmt is None)
+      assert (isinstance(value_or_shape, tuple) or isinstance(
+          value_or_shape, list)) and _all_instance_of(value_or_shape, int)
+      self._shape = value_or_shape
+      self._format = None
+      return
+
+    fmt = fmt or ModeFormat.COMPRESSED
     # We currently use _coords and _values to host the sparse tensor value with
-    # COO format, and _dense_storage to host the dense tensor value. We haven't
-    # implement the conversion between the two storages yet. This will be
-    # improved in a follow up CL.
+    # COO format, and _dense_storage to host the dense tensor value. We don't
+    # support the conversion between the two storages.
     self._coords = []
     self._values = []
-    self._sparse_value_location = _SparseValueInfo._UNPACKED
-    self._dense_storage = None
     self._stats = _Stats()
     if value_or_shape is None or isinstance(value_or_shape, int) or isinstance(
         value_or_shape, float):
@@ -694,7 +698,7 @@ def unpack(self) -> None:
 
   def __repr__(self) -> str:
     self._sync_value()
-    self._unpack()
+    self.unpack()
     value_str = (f"{repr(self._dense_storage)})" if self.is_dense() else
                  f"{repr(self._coords)} {repr(self._values)})")
     return (f"Tensor(_name={repr(self._name)} "
@@ -733,8 +737,8 @@ def insert(self, coords: List[int], val: Union[float, int]) -> None:
     self._values.append(self._dtype.value(val))
 
   def is_dense(self) -> bool:
-    """Returns true if all the Tensor dimensions have a dense format."""
-    return self._format.is_dense()
+    """Returns true if the tensor doesn't have sparsity annotation."""
+    return self._format is None
 
   def to_array(self) -> np.ndarray:
     """Returns the numpy array for the Tensor.
@@ -767,7 +771,7 @@ def from_array(array: np.ndarray) -> "Tensor":
     """
     if array.dtype != np.float64:
       raise ValueError(f"Expected float64 value type: {array.dtype}.")
-    tensor = Tensor(array.shape, ModeFormat.DENSE)
+    tensor = Tensor(array.shape, is_dense=True)
     tensor._dense_storage = np.copy(array)
     return tensor
 
@@ -843,26 +847,22 @@ def to_file(self, filename: str) -> None:
 
     Args:
       filename: A string file name.
+
+    Raises:
+       ValueError: If the tensor is dense, or an unpacked sparse tensor.
     """
     self._sync_value()
-    if not self.is_unpacked():
-      utils.output_sparse_tensor(self._packed_sparse_value, filename,
-                                 self._format.format_pack.formats)
-      return
 
-    # TODO: Use MLIR code to output the value.
-    coords, values = self.get_coordinates_and_values()
-    assert len(coords) == len(values)
-    with open(filename, "w") as file:
-      # Output a comment line and the meta data.
-      file.write("; extended FROSTT format\n")
-      file.write(f"{self.order} {len(coords)}\n")
-      file.write(f"{' '.join(map(lambda i: str(i), self.shape))}\n")
-      # Output each (coordinate value) pair in a line.
-      for c, v in zip(coords, values):
-        # The coordinates are 1-based in the text file and 0-based in memory.
-        plus_one_to_str = lambda x: str(x + 1)
-        file.write(f"{' '.join(map(plus_one_to_str,c))} {v}\n")
+    if self.is_dense():
+      raise ValueError("Writing dense tensors without sparsity annotation to "
+                       "file is not supported.")
+
+    if self.is_unpacked():
+      raise ValueError("Writing unpacked sparse tensors to file is not "
+                       "supported.")
+
+    utils.output_sparse_tensor(self._packed_sparse_value, filename,
+                               self._format.format_pack.formats)
 
   @property
   def dtype(self) -> DType:
@@ -956,8 +956,9 @@ def _sync_value(self) -> None:
 
   def mlir_tensor_type(self) -> ir.RankedTensorType:
     """Returns the MLIR type for the tensor."""
-    return _mlir_tensor_type(self._dtype, tuple(self._shape),
-                             self._format.mlir_tensor_attr())
+    mlir_attr = None if (
+        self._format is None) else self._format.mlir_tensor_attr()
+    return _mlir_tensor_type(self._dtype, tuple(self._shape), mlir_attr)
 
   def dense_dst_ctype_pointer(self) -> ctypes.pointer:
     """Returns the ctypes pointer for the pointer to an MemRefDescriptor.
@@ -990,9 +991,15 @@ def ctype_pointer(self) -> ctypes.pointer:
 
   def get_coordinates_and_values(
       self) -> Tuple[List[Tuple[int, ...]], List[_AnyRuntimeType]]:
-    """Returns the coordinates and values for the non-zero elements."""
+    """Returns the coordinates and values for the non-zero elements.
+
+    This method also evaluate the assignment to the tensor and unpack the
+    sparse tensor.
+    """
+    self._sync_value()
+
     if not self.is_dense():
-      assert (self.is_unpacked())
+      self.unpack()
       return (self._coords, self._values)
 
     # Coordinates for non-zero elements, grouped by dimensions.
@@ -1627,7 +1634,12 @@ def _validate_and_collect_expr_info(
   if isinstance(expr, Access):
     src_indices = expr.indices
     src_dims = tuple(expr.tensor.shape)
-    mode_formats = tuple(expr.tensor.format.format_pack.formats)
+    if expr.tensor.format is None:
+      # Treat each dimension of a dense tensor as DENSE for the purpose of
+      # calculating temporary tensor storage format.
+      mode_formats = tuple([ModeFormat.DENSE] * len(src_dims))
+    else:
+      mode_formats = tuple(expr.tensor.format.format_pack.formats)
     assert len(src_dims) == len(mode_formats)
     dim_infos = tuple([_DimInfo(d, m) for d, m in zip(src_dims, mode_formats)])
   else:

diff  --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py
index f66eb9b6fdd0..94cb740a006f 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py
@@ -51,8 +51,6 @@ def read(filename: str, fmt: Format) -> Tensor:
     raise ValueError("Expected string filename ends with "
                      f"{_MTX_FILENAME_SUFFIX} or {_TNS_FILENAME_SUFFIX}: "
                      f"{filename}.")
-  if not isinstance(fmt, Format) or fmt.is_dense():
-    raise ValueError(f"Expected a sparse Format object: {fmt}.")
 
   return Tensor.from_file(filename, fmt, DType(Type.FLOAT64))
 

diff  --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/testing_utils.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/testing_utils.py
index 02437b441556..466c9df04298 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/testing_utils.py
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/testing_utils.py
@@ -36,3 +36,10 @@ def file_as_string(file: str) -> str:
   """Returns contents of file as string."""
   with open(file, "r") as f:
     return f.read()
+
+
+def run_test(f):
+  """Prints the test name and runs the test."""
+  print(f.__name__)
+  f()
+  return f

diff  --git a/mlir/test/Integration/Dialect/SparseTensor/taco/unit_test_tensor_core.py b/mlir/test/Integration/Dialect/SparseTensor/taco/unit_test_tensor_core.py
new file mode 100644
index 000000000000..6b770f7eacc6
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/unit_test_tensor_core.py
@@ -0,0 +1,39 @@
+# RUN: SUPPORTLIB=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext %PYTHON %s | FileCheck %s
+
+from string import Template
+
+import numpy as np
+import os
+import sys
+import tempfile
+
+_SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(_SCRIPT_PATH)
+from tools import mlir_pytaco
+from tools import testing_utils as testing_utils
+
+# Define the aliases to shorten the code.
+_COMPRESSED = mlir_pytaco.ModeFormat.COMPRESSED
+_DENSE = mlir_pytaco.ModeFormat.DENSE
+
+
+# CHECK-LABEL: test_tensor_all_dense_sparse
+ at testing_utils.run_test
+def test_tensor_all_dense_sparse():
+  a = mlir_pytaco.Tensor([4], [_DENSE])
+  passed = (not a.is_dense())
+  passed += (a.order == 1)
+  passed += (a.shape[0] == 4)
+  # CHECK: Number of passed: 3
+  print("Number of passed:", passed)
+
+
+# CHECK-LABEL: test_tensor_true_dense
+ at testing_utils.run_test
+def test_tensor_true_dense():
+  a = mlir_pytaco.Tensor.from_array(np.random.uniform(size=5))
+  passed = a.is_dense()
+  passed += (a.order == 1)
+  passed += (a.shape[0] == 5)
+  # CHECK: Number of passed: 3
+  print("Number of passed:", passed)

diff  --git a/mlir/test/Integration/Dialect/SparseTensor/taco/unit_test_tensor_io.py b/mlir/test/Integration/Dialect/SparseTensor/taco/unit_test_tensor_io.py
index 87246fd65f28..da5c11a1a972 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/taco/unit_test_tensor_io.py
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/unit_test_tensor_io.py
@@ -12,18 +12,14 @@
 from tools import mlir_pytaco
 from tools import mlir_pytaco_io
 from tools import mlir_pytaco_utils as pytaco_utils
+from tools import testing_utils as testing_utils
+
 
 # Define the aliases to shorten the code.
 _COMPRESSED = mlir_pytaco.ModeFormat.COMPRESSED
 _DENSE = mlir_pytaco.ModeFormat.DENSE
 
 
-def _run(f):
-  print(f.__name__)
-  f()
-  return f
-
-
 _FORMAT = mlir_pytaco.Format([_COMPRESSED, _COMPRESSED])
 _MTX_DATA_TEMPLATE = Template(
     """%%MatrixMarket matrix coordinate real $general_or_symmetry
@@ -40,7 +36,7 @@ def _get_mtx_data(value):
 
 
 # CHECK-LABEL: test_read_mtx_matrix_general
- at _run
+ at testing_utils.run_test
 def test_read_mtx_matrix_general():
   with tempfile.TemporaryDirectory() as test_dir:
     file_name = os.path.join(test_dir, "data.mtx")
@@ -60,7 +56,7 @@ def test_read_mtx_matrix_general():
 
 
 # CHECK-LABEL: test_read_mtx_matrix_symmetry
- at _run
+ at testing_utils.run_test
 def test_read_mtx_matrix_symmetry():
   with tempfile.TemporaryDirectory() as test_dir:
     file_name = os.path.join(test_dir, "data.mtx")
@@ -91,7 +87,7 @@ def test_read_mtx_matrix_symmetry():
 
 
 # CHECK-LABEL: test_read_tns
- at _run
+ at testing_utils.run_test
 def test_read_tns():
   with tempfile.TemporaryDirectory() as test_dir:
     file_name = os.path.join(test_dir, "data.tns")
@@ -111,7 +107,7 @@ def test_read_tns():
 
 
 # CHECK-LABEL: test_write_unpacked_tns
- at _run
+ at testing_utils.run_test
 def test_write_unpacked_tns():
   a = mlir_pytaco.Tensor([2, 3])
   a.insert([0, 1], 10)
@@ -119,19 +115,15 @@ def test_write_unpacked_tns():
   a.insert([0, 0], 20)
   with tempfile.TemporaryDirectory() as test_dir:
     file_name = os.path.join(test_dir, "data.tns")
-    mlir_pytaco_io.write(file_name, a)
-    with open(file_name, "r") as file:
-      lines = file.readlines()
-  passed = 0
-  # Skip the comment line in the output.
-  if lines[1:] == ["2 3\n", "2 3\n", "1 2 10.0\n", "2 3 40.0\n", "1 1 20.0\n"]:
-    passed = 1
-  # CHECK: 1
-  print(passed)
+    try:
+      mlir_pytaco_io.write(file_name, a)
+    except ValueError as e:
+      # CHECK: Writing unpacked sparse tensors to file is not supported
+      print(e)
 
 
 # CHECK-LABEL: test_write_packed_tns
- at _run
+ at testing_utils.run_test
 def test_write_packed_tns():
   a = mlir_pytaco.Tensor([2, 3])
   a.insert([0, 1], 10)