[Mlir-commits] [mlir] 93c81f4 - [mlir][taco] Uses sparse_tensor.new to read tensor input data from files.
Bixia Zheng
llvmlistbot at llvm.org
Thu Feb 3 08:26:38 PST 2022
Author: Bixia Zheng
Date: 2022-02-03T08:26:33-08:00
New Revision: 93c81f44cce802be7f2b723a96ed8e10db6101fb
URL: https://github.com/llvm/llvm-project/commit/93c81f44cce802be7f2b723a96ed8e10db6101fb
DIFF: https://github.com/llvm/llvm-project/commit/93c81f44cce802be7f2b723a96ed8e10db6101fb.diff
LOG: [mlir][taco] Uses sparse_tensor.new to read tensor input data from files.
Replace the Python implementation for reading tensor input data from files with
create_sparse_tensor that uses sparse_tensor.new.
The MLIR TNS format has two extra meta data lines. Add the extra meta data to a
test data file.
Implement TACO tensor methods evaluate and unpack.
Add unit tests.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D118803
Added:
mlir/test/Integration/Dialect/SparseTensor/taco/unit_test_tensor_io.py
Modified:
mlir/test/Integration/Dialect/SparseTensor/taco/data/nell-2.tns
mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py
mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py
Removed:
################################################################################
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/data/nell-2.tns b/mlir/test/Integration/Dialect/SparseTensor/taco/data/nell-2.tns
index a6c570c3c7d8f..b82ce864820fb 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/taco/data/nell-2.tns
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/data/nell-2.tns
@@ -1,3 +1,8 @@
+# Extended FROSTT format:
+# rank number-non-zero-elements
+# dimension-sizes
+3 5
+2 4 4
1 1 1 1.0
1 2 2 2.0
1 3 4 3.0
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py
index f74ae09b9087e..24f114dba64a9 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py
@@ -532,6 +532,24 @@ def get_formats(self, idx: int) -> Tuple[ModeFormat]:
return tuple(self._get_element(idx).dst_format.format_pack.formats)
+class _SparseValueInfo(enum.Enum):
+ """Describes how a sparse tensor value is stored.
+ _UNPACKED: The sparse tensor value is stored as (coordnates, values) in
+ Python.
+ _PACKED: The sparse tensor value is stored as a C pointer to a packed MLIR
+ sparse tensor.
+ """
+ _UNPACKED = 0
+ _PACKED = 1
+
+
+ at dataclasses.dataclass(frozen=True)
+class _Assignment:
+ """Records an assignment to a tensor T as T[indices] = expression."""
+ indices: Tuple["IndexVar", ...]
+ expression: "IndexExpr"
+
+
class Tensor:
"""The tensor class.
@@ -622,12 +640,14 @@ def __init__(self,
self._name = name or self._get_unique_name()
self._dtype = dtype
+ self._assignment = None
# We currently use _coords and _values to host the sparse tensor value with
# COO format, and _dense_storage to host the dense tensor value. We haven't
# implement the conversion between the two storages yet. This will be
# improved in a follow up CL.
self._coords = []
self._values = []
+ self._sparse_value_location = _SparseValueInfo._UNPACKED
self._dense_storage = None
self._stats = _Stats()
if value_or_shape is None or isinstance(value_or_shape, int) or isinstance(
@@ -647,7 +667,29 @@ def __init__(self,
"Must be a tuple or list for a shape or a single value"
f"if initializing a scalar tensor: {value_or_shape}.")
+ def is_unpacked(self) -> bool:
+ """Returns true if the tensor value is not packed as MLIR sparse tensor."""
+ return (self._sparse_value_location == _SparseValueInfo._UNPACKED)
+
+ def unpack(self) -> None:
+ """Unpacks the MLIR sparse tensor representation."""
+ if self.is_dense() or self.is_unpacked():
+ return
+
+ # Use the output MLIR sparse tensor pointer to retrieve the COO-flavored
+ # values and verify the values.
+ rank, nse, shape, values, indices = utils.sparse_tensor_to_coo_tensor(
+ self._packed_sparse_value, np.float64)
+ assert rank == self.order
+ assert np.allclose(self.shape, shape)
+ assert nse == len(values)
+ self._coords = indices
+ self._values = values
+ self._sparse_value_location = _SparseValueInfo._UNPACKED
+
def __repr__(self) -> str:
+ self._sync_value()
+ self._unpack()
value_str = (f"{repr(self._dense_storage)})" if self.is_dense() else
f"{repr(self._coords)} {repr(self._values)})")
return (f"Tensor(_name={repr(self._name)} "
@@ -665,6 +707,11 @@ def insert(self, coords: List[int], val: Union[float, int]) -> None:
Raises:
ValueError: When there is any problem in the parameters.
"""
+ if self.is_dense():
+ raise ValueError("Insert method is not supported for dense tensors.")
+ if self._assignment != None or not self.is_unpacked():
+ raise ValueError(
+ "Can't use Insert method for a tensor constructed from a file.")
if not isinstance(coords, list):
raise ValueError(f"Non list coordinate detected: {coords}.")
if not _all_instance_of(coords, int):
@@ -692,6 +739,9 @@ def to_array(self) -> np.ndarray:
if not self.is_dense():
raise ValueError("Conversion from non-dense Tensor "
"to numpy array not supported yet.")
+
+ self._sync_value()
+
return self._dense_storage
@staticmethod
@@ -755,6 +805,32 @@ def from_coo(
return tensor
+ @staticmethod
+ def from_file(
+ filename: str,
+ fmt: Format,
+ dtype: DType,
+ ) -> "Tensor":
+ """Constructs a sparse tensor using the COO-flavored values from a file.
+
+ Args:
+ filename: A string for the name of the file that contains the sparse
+ tensor data.
+ fmt: The tensor storage format.
+ dtype: The tensor element data type.
+
+ Returns:
+ A tensor with the given non-zero values and storage format. The tensor
+ value is stored as an MLIR sparse tensor.
+ """
+ sparse_tensor, shape = utils.create_sparse_tensor(filename,
+ fmt.format_pack.formats)
+ tensor = Tensor(shape.tolist(), fmt)
+ tensor._sparse_value_location = _SparseValueInfo._PACKED
+ tensor._packed_sparse_value = sparse_tensor
+
+ return tensor
+
@property
def dtype(self) -> DType:
"""Returns the data type for the Tensor."""
@@ -827,7 +903,13 @@ def __setitem__(self, key, value) -> None:
raise ValueError("Mismatch between indices and tensor rank: "
f"len({indices}) != {self.order}.")
- result = value.evaluate(self, indices)
+ self._assignment = _Assignment(indices, value)
+
+ def evaluate(self) -> None:
+ """Evaluates the assignment to the tensor."""
+ result = self._assignment.expression.evaluate(self,
+ self._assignment.indices)
+ self._assignment = None
if self.is_dense():
assert isinstance(result, np.ndarray)
self._dense_storage = result
@@ -836,6 +918,11 @@ def __setitem__(self, key, value) -> None:
assert (result[0].ndim, result[1].ndim) == (1, 2)
(self._values, self._coords) = result
+ def _sync_value(self) -> None:
+ """Updates the tensor value by evaluating the pending assignment."""
+ if self._assignment is not None:
+ self.evaluate()
+
def mlir_tensor_type(self) -> ir.RankedTensorType:
"""Returns the MLIR type for the tensor."""
return _mlir_tensor_type(self._dtype, tuple(self._shape),
@@ -860,16 +947,21 @@ def ctype_pointer(self) -> ctypes.pointer:
self._dense_storage = np.zeros(self._shape, self._dtype.value)
return _ctype_pointer_from_array(self._dense_storage)
- shape = np.array(self._shape, np.int64)
- indices = np.array(self._coords, np.int64)
- values = np.array(self._values, self._dtype.value)
- ptr = utils.coo_tensor_to_sparse_tensor(shape, values, indices)
+ if self.is_unpacked():
+ shape = np.array(self._shape, np.int64)
+ indices = np.array(self._coords, np.int64)
+ values = np.array(self._values, self._dtype.value)
+ ptr = utils.coo_tensor_to_sparse_tensor(shape, values, indices)
+ else:
+ ptr = self._packed_sparse_value
+
return ctypes.pointer(ctypes.cast(ptr, ctypes.c_void_p))
def get_coordinates_and_values(
self) -> Tuple[List[Tuple[int, ...]], List[_AnyRuntimeType]]:
"""Returns the coordinates and values for the non-zero elements."""
if not self.is_dense():
+ assert (self.is_unpacked())
return (self._coords, self._values)
# Coordinates for non-zero elements, grouped by dimensions.
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py
index 0ee69c78da37a..5d446d6af1636 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py
@@ -30,118 +30,6 @@
_MTX_FILENAME_SUFFIX = ".mtx"
_TNS_FILENAME_SUFFIX = ".tns"
-_MTX_HEAD = "%%MatrixMarket"
-_MTX_MATRIX = "matrix"
-_MTX_COORDINATE = "coordinate"
-_MTX_REAL = "real"
-_MTX_SYMMETRY = "symmetric"
-_MTX_GENERAL = "general"
-_SYMMETRY_FIELD_ID = 4
-
-# The TACO supported header for .mtx has the following five fields:
-# . %%MatrixMarket
-# . matrix | tensor
-# . coordinate | array
-# . real
-# . symmetric | general
-#
-# This is what we support currently.
-_SUPPORTED_HEADER_FIELDS = ((_MTX_HEAD,), (_MTX_MATRIX,), (_MTX_COORDINATE,),
- (_MTX_REAL,), (_MTX_GENERAL, _MTX_SYMMETRY))
-
-_A_SPACE = " "
-_MTX_COMMENT = "%"
-_TNS_COMMENT = "#"
-
-
-def _coordinate_from_strings(strings: List[str]) -> List[int]:
- """"Return the coordinate represented by the input strings."""
- # Coordinates are 1-based in the text file and 0-based in memory.
- return [int(s) - 1 for s in strings]
-
-
-def _read_coordinate_format(file: TextIO, tensor: Tensor,
- is_symmetric: bool) -> None:
- """Reads tensor values in coordinate format."""
- rank = tensor.order
- # Process the data for the tensor.
- for line in file:
- if not line:
- continue
-
- fields = line.split(_A_SPACE)
- if rank != len(fields) - 1:
- raise ValueError("The format and data have mismatched ranks: "
- f"{rank} vs {len(fields)-1}.")
- coordinate = _coordinate_from_strings(fields[:-1])
- value = float(fields[-1])
- tensor.insert(coordinate, value)
- if is_symmetric and coordinate[0] != coordinate[-1]:
- coordinate.reverse()
- tensor.insert(coordinate, value)
-
-
-def _read_mtx(file: TextIO, fmt: Format) -> Tensor:
- """Inputs tensor from a text file with .mtx format."""
- # The first line should have this five fields:
- # head tensor-kind format data-type symmetry
- fields = file.readline().rstrip("\n").split(_A_SPACE)
- tuple_to_str = lambda x: "|".join(x)
- if len(fields) != len(_SUPPORTED_HEADER_FIELDS):
- raise ValueError(
- "Expected first line with theses fields "
- f"{' '.join(map(tuple_to_str, _SUPPORTED_HEADER_FIELDS))}: "
- f"{' '.join(fields)}")
-
- for i, values in enumerate(_SUPPORTED_HEADER_FIELDS):
- if fields[i] not in values:
- raise ValueError(f"The {i}th field can only be one of these values "
- f"{tuple_to_str(values)}: {fields[i]}")
-
- is_symmetric = (fields[_SYMMETRY_FIELD_ID] == _MTX_SYMMETRY)
- # Skip leading empty lines or comment lines.
- line = file.readline()
- while not line or line[0] == _MTX_COMMENT:
- line = file.readline()
-
- # Process the first data line with dimensions and number of non-zero values.
- fields = line.split(_A_SPACE)
- rank = fmt.rank()
- if rank != len(fields) - 1:
- raise ValueError("The format and data have mismatched ranks: "
- f"{rank} vs {len(fields)-1}.")
- shape = fields[:-1]
- shape = [int(s) for s in shape]
- num_non_zero = float(fields[-1])
-
- # Read the tensor values in coordinate format.
- tensor = Tensor(shape, fmt)
- _read_coordinate_format(file, tensor, is_symmetric)
- return tensor
-
-
-def _read_tns(file: TextIO, fmt: Format) -> Tensor:
- """Inputs tensor from a text file with .tns format."""
- rank = fmt.rank()
- coordinates = []
- values = []
- dtype = DType(Type.FLOAT64)
-
- for line in file:
- # Skip empty lines and comment lines.
- if not line or line[0] == _TNS_COMMENT:
- continue
-
- # Process each line with a coordinate and the value at the coordinate.
- fields = line.split(_A_SPACE)
- if rank != len(fields) - 1:
- raise ValueError("The format and data have mismatched ranks: "
- f"{rank} vs {len(fields)-1}.")
- coordinates.append(tuple(_coordinate_from_strings(fields[:-1])))
- values.append(dtype.value(fields[-1]))
-
- return Tensor.from_coo(coordinates, values, fmt, dtype)
-
def _write_tns(file: TextIO, tensor: Tensor) -> None:
"""Outputs a tensor to a file using .tns format."""
@@ -177,9 +65,7 @@ def read(filename: str, fmt: Format) -> Tensor:
if not isinstance(fmt, Format) or fmt.is_dense():
raise ValueError(f"Expected a sparse Format object: {fmt}.")
- with open(filename, "r") as file:
- return (_read_mtx(file, fmt) if filename.endswith(_MTX_FILENAME_SUFFIX) else
- _read_tns(file, fmt))
+ return Tensor.from_file(filename, fmt, DType(Type.FLOAT64))
def write(filename: str, tensor: Tensor) -> None:
@@ -202,5 +88,7 @@ def write(filename: str, tensor: Tensor) -> None:
if not isinstance(tensor, Tensor):
raise ValueError(f"Expected a Tensor object: {tensor}.")
+ # TODO: combine the evaluation and the outputing into one step.
+ tensor._sync_value()
with open(filename, "w") as file:
return _write_tns(file, tensor)
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/unit_test_tensor_io.py b/mlir/test/Integration/Dialect/SparseTensor/taco/unit_test_tensor_io.py
new file mode 100644
index 0000000000000..1466dc841dcd6
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/unit_test_tensor_io.py
@@ -0,0 +1,110 @@
+# RUN: SUPPORTLIB=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext %PYTHON %s | FileCheck %s
+
+from string import Template
+
+import numpy as np
+import os
+import sys
+import tempfile
+
+_SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(_SCRIPT_PATH)
+from tools import mlir_pytaco
+from tools import mlir_pytaco_io
+from tools import mlir_pytaco_utils as pytaco_utils
+
+# Define the aliases to shorten the code.
+_COMPRESSED = mlir_pytaco.ModeFormat.COMPRESSED
+_DENSE = mlir_pytaco.ModeFormat.DENSE
+
+
+def _run(f):
+ print(f.__name__)
+ f()
+ return f
+
+
+_FORMAT = mlir_pytaco.Format([_COMPRESSED, _COMPRESSED])
+_MTX_DATA_TEMPLATE = Template(
+ """%%MatrixMarket matrix coordinate real $general_or_symmetry
+3 3 3
+3 1 3
+1 2 2
+3 2 4
+""")
+
+
+def _get_mtx_data(value):
+ mtx_data = _MTX_DATA_TEMPLATE
+ return mtx_data.substitute(general_or_symmetry=value)
+
+
+# CHECK-LABEL: test_read_mtx_matrix_general
+ at _run
+def test_read_mtx_matrix_general():
+ with tempfile.TemporaryDirectory() as test_dir:
+ file_name = os.path.join(test_dir, "data.mtx")
+ with open(file_name, "w") as file:
+ file.write(_get_mtx_data("general"))
+ a = mlir_pytaco_io.read(file_name, _FORMAT)
+ passed = 0
+ # The value of a is stored as an MLIR sparse tensor.
+ passed += (not a.is_unpacked())
+ a.unpack()
+ passed += (a.is_unpacked())
+ coords, values = a.get_coordinates_and_values()
+ passed += np.allclose(coords, [[0, 1], [2, 0], [2, 1]])
+ passed += np.allclose(values, [2.0, 3.0, 4.0])
+ # CHECK: 4
+ print(passed)
+
+
+# CHECK-LABEL: test_read_mtx_matrix_symmetry
+ at _run
+def test_read_mtx_matrix_symmetry():
+ with tempfile.TemporaryDirectory() as test_dir:
+ file_name = os.path.join(test_dir, "data.mtx")
+ with open(file_name, "w") as file:
+ file.write(_get_mtx_data("symmetric"))
+ a = mlir_pytaco_io.read(file_name, _FORMAT)
+ passed = 0
+ # The value of a is stored as an MLIR sparse tensor.
+ passed += (not a.is_unpacked())
+ a.unpack()
+ passed += (a.is_unpacked())
+ coords, values = a.get_coordinates_and_values()
+ print(coords)
+ print(values)
+ passed += np.allclose(coords,
+ [[0, 1], [0, 2], [1, 0], [1, 2], [2, 0], [2, 1]])
+ passed += np.allclose(values, [2.0, 3.0, 2.0, 4.0, 3.0, 4.0])
+ # CHECK: 4
+ print(passed)
+
+
+_TNS_DATA = """2 3
+3 2
+3 1 3
+1 2 2
+3 2 4
+"""
+
+
+# CHECK-LABEL: test_read_tns
+ at _run
+def test_read_tns():
+ with tempfile.TemporaryDirectory() as test_dir:
+ file_name = os.path.join(test_dir, "data.tns")
+ with open(file_name, "w") as file:
+ file.write(_TNS_DATA)
+ a = mlir_pytaco_io.read(file_name, _FORMAT)
+ passed = 0
+ # The value of a is stored as an MLIR sparse tensor.
+ passed += (not a.is_unpacked())
+ a.unpack()
+ passed += (a.is_unpacked())
+ coords, values = a.get_coordinates_and_values()
+ passed += np.allclose(coords, [[0, 1], [2, 0], [2, 1]])
+ passed += np.allclose(values, [2.0, 3.0, 4.0])
+ # CHECK: 4
+ print(passed)
More information about the Mlir-commits
mailing list