[Mlir-commits] [mlir] c8ae8cf - [mlir][sparse][taco] Add support for float32.
Bixia Zheng
llvmlistbot at llvm.org
Wed Feb 23 18:24:29 PST 2022
Author: Bixia Zheng
Date: 2022-02-23T18:24:22-08:00
New Revision: c8ae8cfb5d53217f8ee11af666c3d5a51a3e3230
URL: https://github.com/llvm/llvm-project/commit/c8ae8cfb5d53217f8ee11af666c3d5a51a3e3230
DIFF: https://github.com/llvm/llvm-project/commit/c8ae8cfb5d53217f8ee11af666c3d5a51a3e3230.diff
LOG: [mlir][sparse][taco] Add support for float32.
Previously, we only support float64. We now support float32 and float64. When
constructing a tensor without providing a data type, the default is float32.
Fix the tests to data type consistency. All PyTACO application tests now use
float32 to match the default data type of TACO. Other tests may use float32 or
float64.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D120356
Added:
Modified:
mlir/test/Integration/Dialect/SparseTensor/taco/test_MTTKRP.py
mlir/test/Integration/Dialect/SparseTensor/taco/test_SDDMM.py
mlir/test/Integration/Dialect/SparseTensor/taco/test_SpMV.py
mlir/test/Integration/Dialect/SparseTensor/taco/test_true_dense_tensor_algebra.py
mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py
mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py
mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_utils.py
mlir/test/Integration/Dialect/SparseTensor/taco/unit_test_tensor_utils.py
Removed:
################################################################################
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/test_MTTKRP.py b/mlir/test/Integration/Dialect/SparseTensor/taco/test_MTTKRP.py
index 1e35a85755382..4e6fc1136d487 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/taco/test_MTTKRP.py
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/test_MTTKRP.py
@@ -30,8 +30,8 @@
# These two lines have been modified from the original program to use static
# data to support result comparison.
-C = pt.from_array(np.full((B.shape[1], 25), 1, dtype=np.float64))
-D = pt.from_array(np.full((B.shape[2], 25), 2, dtype=np.float64))
+C = pt.from_array(np.full((B.shape[1], 25), 1, dtype=np.float32))
+D = pt.from_array(np.full((B.shape[2], 25), 2, dtype=np.float32))
# Declare the result to be a dense matrix.
A = pt.tensor([B.shape[0], 25], rm)
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/test_SDDMM.py b/mlir/test/Integration/Dialect/SparseTensor/taco/test_SDDMM.py
index 9f017ad157783..1001490240169 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/taco/test_SDDMM.py
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/test_SDDMM.py
@@ -15,8 +15,8 @@
i, j, k = pt.get_index_vars(3)
# Set up dense matrices.
-A = pt.from_array(np.full((8, 8), 2.0))
-B = pt.from_array(np.full((8, 8), 3.0))
+A = pt.from_array(np.full((8, 8), 2.0, dtype=np.float32))
+B = pt.from_array(np.full((8, 8), 3.0, dtype=np.float32))
# Set up sparse matrices.
S = pt.tensor([8, 8], pt.format([pt.compressed, pt.compressed]))
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/test_SpMV.py b/mlir/test/Integration/Dialect/SparseTensor/taco/test_SpMV.py
index 41ee71fab4310..a0ddb6229f6a9 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/taco/test_SpMV.py
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/test_SpMV.py
@@ -31,8 +31,8 @@
# These two lines have been modified from the original program to use static
# data to support result comparison.
-x = pt.from_array(np.full((A.shape[1],), 1, dtype=np.float64))
-z = pt.from_array(np.full((A.shape[0],), 2, dtype=np.float64))
+x = pt.from_array(np.full((A.shape[1],), 1, dtype=np.float32))
+z = pt.from_array(np.full((A.shape[0],), 2, dtype=np.float32))
# Declare the result to be a dense vector
y = pt.tensor([A.shape[0]], dv)
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/test_true_dense_tensor_algebra.py b/mlir/test/Integration/Dialect/SparseTensor/taco/test_true_dense_tensor_algebra.py
index 6055e7947102c..3d3d4c0d39170 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/taco/test_true_dense_tensor_algebra.py
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/test_true_dense_tensor_algebra.py
@@ -14,7 +14,7 @@
B = pt.from_array(np.full([2,3], 2, dtype=np.float64))
# Define the result tensor as a true dense tensor. The parameter is_dense=True
# is an MLIR-PyTACO extension.
-C = pt.tensor([2, 3], is_dense=True)
+C = pt.tensor([2, 3], dtype=pt.float64, is_dense=True)
C[i, j] = A[i, j] + B[i, j]
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py
index c44a84e25a25d..59063c003ab17 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py
@@ -96,7 +96,7 @@ class DType:
kind: A Type enum representing the data type.
value: The numpy data type for the TACO data type.
"""
- kind: Type = Type.FLOAT64
+ kind: Type = Type.FLOAT32
def is_float(self) -> bool:
"""Returns whether the data type represents a floating point value."""
@@ -112,6 +112,30 @@ def value(self) -> _AnyRuntimeType:
return self.kind.value
+def _dtype_to_mlir_str(dtype: DType) -> str:
+ """Returns the MLIR string for the given dtype."""
+ dtype_to_str = {
+ Type.INT16: "i16",
+ Type.INT32: "i32",
+ Type.INT64: "i64",
+ Type.FLOAT32: "f32",
+ Type.FLOAT64: "f64"
+ }
+ return dtype_to_str[dtype.kind]
+
+
+def _nptype_to_taco_type(ty: np.dtype) -> DType:
+ """Returns the TACO type for the given numpy type."""
+ nptype_to_dtype = {
+ np.int16: Type.INT16,
+ np.int32: Type.INT32,
+ np.int64: Type.INT64,
+ np.float32: Type.FLOAT32,
+ np.float64: Type.FLOAT64
+ }
+ return DType(nptype_to_dtype[ty])
+
+
def _mlir_type_from_taco_type(dtype: DType) -> ir.Type:
"""Returns the MLIR type corresponding to the given TACO type."""
dtype_to_irtype = {
@@ -123,7 +147,6 @@ def _mlir_type_from_taco_type(dtype: DType) -> ir.Type:
}
return dtype_to_irtype[dtype.kind]
-
def _ctype_pointer_from_array(array: np.ndarray) -> ctypes.pointer:
"""Returns the ctype pointer for the given numpy array."""
return ctypes.pointer(
@@ -632,7 +655,7 @@ def __init__(self,
"""
# Take care of the argument default values common to both sparse tensors
# and dense tensors.
- dtype = dtype or DType(Type.FLOAT64)
+ dtype = dtype or DType(Type.FLOAT32)
self._name = name or self._get_unique_name()
self._assignment = None
self._sparse_value_location = _SparseValueInfo._UNPACKED
@@ -688,7 +711,7 @@ def unpack(self) -> None:
# Use the output MLIR sparse tensor pointer to retrieve the COO-flavored
# values and verify the values.
rank, nse, shape, values, indices = utils.sparse_tensor_to_coo_tensor(
- self._packed_sparse_value, np.float64)
+ self._packed_sparse_value, self._dtype.value)
assert rank == self.order
assert np.allclose(self.shape, shape)
assert nse == len(values)
@@ -757,7 +780,8 @@ def to_array(self) -> np.ndarray:
def from_array(array: np.ndarray) -> "Tensor":
"""Returns a dense tensor with the value copied from the input array.
- We currently only support the conversion of float64 numpy arrays to Tensor.
+ We currently only support the conversion of float32 and float64 numpy arrays
+ to Tensor.
Args:
array: The numpy array that provides the data type, shape and value for
@@ -767,11 +791,14 @@ def from_array(array: np.ndarray) -> "Tensor":
A Tensor object.
Raises:
- ValueError if the data type of the numpy array is not float64.
+ ValueError if the data type of the numpy array is not supported.
"""
- if array.dtype != np.float64:
- raise ValueError(f"Expected float64 value type: {array.dtype}.")
- tensor = Tensor(array.shape, is_dense=True)
+ if array.dtype != np.float32 and array.dtype != np.float64:
+ raise ValueError(f"Expected floating point value type: {array.dtype}.")
+ tensor = Tensor(
+ array.shape,
+ dtype=_nptype_to_taco_type(array.dtype.type),
+ is_dense=True)
tensor._dense_storage = np.copy(array)
return tensor
@@ -808,7 +835,7 @@ def from_coo(
# The size of each dimension is one more that such a maximum coordinate
# value.
shape = [c + 1 for c in max_coordinate]
- tensor = Tensor(shape, fmt)
+ tensor = Tensor(shape, fmt, dtype=dtype)
tensor._coords = coordinates
tensor._values = values
@@ -833,8 +860,9 @@ def from_file(
value is stored as an MLIR sparse tensor.
"""
sparse_tensor, shape = utils.create_sparse_tensor(filename,
- fmt.format_pack.formats)
- tensor = Tensor(shape.tolist(), fmt)
+ fmt.format_pack.formats,
+ _dtype_to_mlir_str(dtype))
+ tensor = Tensor(shape.tolist(), fmt, dtype=dtype)
tensor._set_packed_sparse_tensor(sparse_tensor)
return tensor
@@ -862,7 +890,8 @@ def to_file(self, filename: str) -> None:
"supported.")
utils.output_sparse_tensor(self._packed_sparse_value, filename,
- self._format.format_pack.formats)
+ self._format.format_pack.formats,
+ _dtype_to_mlir_str(self._dtype))
@property
def dtype(self) -> DType:
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py
index 94cb740a006fe..e6a7d8e1b4b85 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py
@@ -31,7 +31,8 @@
_TNS_FILENAME_SUFFIX = ".tns"
-def read(filename: str, fmt: Format) -> Tensor:
+def read(filename: str, fmt: Format,
+ dtype: DType = DType(Type.FLOAT32)) -> Tensor:
"""Inputs a tensor from a given file.
The name suffix of the file specifies the format of the input tensor. We
@@ -40,6 +41,7 @@ def read(filename: str, fmt: Format) -> Tensor:
Args:
filename: A string input filename.
fmt: The storage format of the tensor.
+ dtype: The data type, default to float32.
Raises:
ValueError: If filename doesn't end with .mtx or .tns, or fmt is not an
@@ -52,7 +54,7 @@ def read(filename: str, fmt: Format) -> Tensor:
f"{_MTX_FILENAME_SUFFIX} or {_TNS_FILENAME_SUFFIX}: "
f"{filename}.")
- return Tensor.from_file(filename, fmt, DType(Type.FLOAT64))
+ return Tensor.from_file(filename, fmt, dtype)
def write(filename: str, tensor: Tensor) -> None:
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_utils.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_utils.py
index 62aa98ee8aaf8..3272a71b6c92b 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_utils.py
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_utils.py
@@ -4,7 +4,7 @@
# This file contains the utilities to process sparse tensor outputs.
-from typing import Sequence, Tuple
+from typing import Callable, Dict, Sequence, Tuple
import ctypes
import functools
import numpy as np
@@ -18,6 +18,10 @@
from mlir.dialects import sparse_tensor
from mlir.passmanager import PassManager
+# Type aliases for type annotation.
+_SupportFunc = Callable[..., None]
+_SupportFuncLocator = Callable[[np.dtype], Tuple[_SupportFunc, _SupportFunc]]
+
# The name for the environment variable that provides the full path for the
# supporting library.
_SUPPORTLIB_ENV_VAR = "SUPPORTLIB"
@@ -36,15 +40,28 @@ def _get_support_lib_name() -> str:
return os.getenv(_SUPPORTLIB_ENV_VAR, _DEFAULT_SUPPORTLIB)
+def _record_support_funcs(
+ ty: np.dtype, to_func: _SupportFunc, from_func: _SupportFunc,
+ ty_to_funcs: Dict[np.dtype, Tuple[_SupportFunc, _SupportFunc]]) -> None:
+ """Records the two supporting functions for a given data type."""
+ to_func.restype = ctypes.c_void_p
+ from_func.restype = ctypes.c_void_p
+ ty_to_funcs[ty] = (to_func, from_func)
+
+
@functools.lru_cache()
-def _get_c_shared_lib() -> ctypes.CDLL:
- """Loads the supporting C shared library with the needed routines.
+def _get_support_func_locator() -> _SupportFuncLocator:
+ """Constructs a function to locate the supporting functions for a data type.
+
+ Loads the supporting C shared library with the needed routines. Constructs a
+ dictionary from the supported data types to the routines for the data types,
+ and then a function to look up the dictionary for a given data type.
The name of the supporting C shared library is either provided by an
an environment variable or a default value.
Returns:
- The supporting C shared library.
+ The function to look up the supporting functions for a given data type.
Raises:
OSError: If there is any problem in loading the shared library.
@@ -54,19 +71,25 @@ def _get_c_shared_lib() -> ctypes.CDLL:
# library.
c_lib = ctypes.CDLL(_get_support_lib_name())
+ type_to_funcs = {}
try:
- c_lib.convertToMLIRSparseTensorF64.restype = ctypes.c_void_p
+ _record_support_funcs(np.float32, c_lib.convertToMLIRSparseTensorF32,
+ c_lib.convertFromMLIRSparseTensorF32, type_to_funcs)
except Exception as e:
- raise ValueError("Missing function convertToMLIRSparseTensorF64 from "
- f"the supporting C shared library: {e} ") from e
+ raise ValueError(f"Missing supporting function: {e}") from e
try:
- c_lib.convertFromMLIRSparseTensorF64.restype = ctypes.c_void_p
+ _record_support_funcs(np.float64, c_lib.convertToMLIRSparseTensorF64,
+ c_lib.convertFromMLIRSparseTensorF64, type_to_funcs)
except Exception as e:
- raise ValueError("Missing function convertFromMLIRSparseTensorF64 from "
- f"the C shared library: {e} ") from e
+ raise ValueError(f"Missing supporting function: {e}") from e
+
+ def get_support_funcs(ty: np.dtype):
+ funcs = type_to_funcs[ty]
+ assert funcs is not None
+ return funcs
- return c_lib
+ return get_support_funcs
def sparse_tensor_to_coo_tensor(
@@ -93,17 +116,14 @@ def sparse_tensor_to_coo_tensor(
OSError: If there is any problem in loading the shared library.
ValueError: If the shared library doesn't contain the needed routines.
"""
- c_lib = _get_c_shared_lib()
-
+ convert_from = _get_support_func_locator()(dtype)[1]
rank = ctypes.c_ulonglong(0)
nse = ctypes.c_ulonglong(0)
shape = ctypes.POINTER(ctypes.c_ulonglong)()
values = ctypes.POINTER(np.ctypeslib.as_ctypes_type(dtype))()
indices = ctypes.POINTER(ctypes.c_ulonglong)()
- c_lib.convertFromMLIRSparseTensorF64(sparse_tensor, ctypes.byref(rank),
- ctypes.byref(nse), ctypes.byref(shape),
- ctypes.byref(values),
- ctypes.byref(indices))
+ convert_from(sparse_tensor, ctypes.byref(rank), ctypes.byref(nse),
+ ctypes.byref(shape), ctypes.byref(values), ctypes.byref(indices))
# Convert the returned values to the corresponding numpy types.
shape = np.ctypeslib.as_array(shape, shape=[rank.value])
@@ -138,8 +158,8 @@ def coo_tensor_to_sparse_tensor(np_shape: np.ndarray, np_values: np.ndarray,
ctypes.POINTER(np.ctypeslib.as_ctypes_type(np_values.dtype)))
indices = np_indices.ctypes.data_as(ctypes.POINTER(ctypes.c_ulonglong))
- c_lib = _get_c_shared_lib()
- ptr = c_lib.convertToMLIRSparseTensorF64(rank, nse, shape, values, indices)
+ convert_to = _get_support_func_locator()(np_values.dtype.type)[0]
+ ptr = convert_to(rank, nse, shape, values, indices)
assert ptr is not None, "Problem with calling convertToMLIRSparseTensorF64"
return ptr
@@ -171,11 +191,11 @@ class _SparseTensorDescriptor(ctypes.Structure):
]
-def _output_one_dim(dim: int, rank: int, shape: str) -> str:
+def _output_one_dim(dim: int, rank: int, shape: str, type: str) -> str:
"""Produces the MLIR text code to output the size for the given dimension."""
return f"""
%c{dim} = arith.constant {dim} : index
- %d{dim} = tensor.dim %t, %c{dim} : tensor<{shape}xf64, #enc>
+ %d{dim} = tensor.dim %t, %c{dim} : tensor<{shape}x{type}, #enc>
memref.store %d{dim}, %b[%c{dim}] : memref<{rank}xindex>
"""
@@ -187,7 +207,7 @@ def _output_one_dim(dim: int, rank: int, shape: str) -> str:
# (2) Use scf.for instead of an unrolled loop to write out the dimension sizes
# when tensor.dim supports non-constant dimension value.
def _get_create_sparse_tensor_kernel(
- sparsity_codes: Sequence[sparse_tensor.DimLevelType]) -> str:
+ sparsity_codes: Sequence[sparse_tensor.DimLevelType], type: str) -> str:
"""Creates an MLIR text kernel to contruct a sparse tensor from a file.
The kernel returns a _SparseTensorDescriptor structure.
@@ -203,7 +223,7 @@ def _get_create_sparse_tensor_kernel(
# Get the MLIR text code to write the dimension sizes to the output buffer.
output_dims = "\n".join(
- map(lambda d: _output_one_dim(d, rank, shape), range(rank)))
+ map(lambda d: _output_one_dim(d, rank, shape, type), range(rank)))
# Return the MLIR text kernel.
return f"""
@@ -211,18 +231,18 @@ def _get_create_sparse_tensor_kernel(
#enc = #sparse_tensor.encoding<{{
dimLevelType = [ {sparsity} ]
}}>
-func @{_ENTRY_NAME}(%filename: !Ptr) -> (tensor<{shape}xf64, #enc>, memref<{rank}xindex>)
+func @{_ENTRY_NAME}(%filename: !Ptr) -> (tensor<{shape}x{type}, #enc>, memref<{rank}xindex>)
attributes {{ llvm.emit_c_interface }} {{
- %t = sparse_tensor.new %filename : !Ptr to tensor<{shape}xf64, #enc>
+ %t = sparse_tensor.new %filename : !Ptr to tensor<{shape}x{type}, #enc>
%b = memref.alloc() : memref<{rank}xindex>
{output_dims}
- return %t, %b : tensor<{shape}xf64, #enc>, memref<{rank}xindex>
+ return %t, %b : tensor<{shape}x{type}, #enc>, memref<{rank}xindex>
}}"""
-def create_sparse_tensor(
- filename: str, sparsity: Sequence[sparse_tensor.DimLevelType]
-) -> Tuple[ctypes.c_void_p, np.ndarray]:
+def create_sparse_tensor(filename: str,
+ sparsity: Sequence[sparse_tensor.DimLevelType],
+ type: str) -> Tuple[ctypes.c_void_p, np.ndarray]:
"""Creates an MLIR sparse tensor from the input file.
Args:
@@ -241,7 +261,7 @@ def create_sparse_tensor(
ValueError: If the shared library doesn't contain the needed routine.
"""
with ir.Context() as ctx, ir.Location.unknown():
- module = _get_create_sparse_tensor_kernel(sparsity)
+ module = _get_create_sparse_tensor_kernel(sparsity, type)
module = ir.Module.parse(module)
engine = compile_and_build_engine(module)
@@ -265,7 +285,7 @@ def create_sparse_tensor(
# by using Python code to generate the kernel instead of doing MLIR text code
# stitching.
def _get_output_sparse_tensor_kernel(
- sparsity_codes: Sequence[sparse_tensor.DimLevelType]) -> str:
+ sparsity_codes: Sequence[sparse_tensor.DimLevelType], type: str) -> str:
"""Creates an MLIR text kernel to output a sparse tensor to a file.
The kernel returns void.
@@ -285,16 +305,16 @@ def _get_output_sparse_tensor_kernel(
#enc = #sparse_tensor.encoding<{{
dimLevelType = [ {sparsity} ]
}}>
-func @{_ENTRY_NAME}(%t: tensor<{shape}xf64, #enc>, %filename: !Ptr)
+func @{_ENTRY_NAME}(%t: tensor<{shape}x{type}, #enc>, %filename: !Ptr)
attributes {{ llvm.emit_c_interface }} {{
- sparse_tensor.out %t, %filename : tensor<{shape}xf64, #enc>, !Ptr
+ sparse_tensor.out %t, %filename : tensor<{shape}x{type}, #enc>, !Ptr
std.return
}}"""
-def output_sparse_tensor(
- tensor: ctypes.c_void_p, filename: str,
- sparsity: Sequence[sparse_tensor.DimLevelType]) -> None:
+def output_sparse_tensor(tensor: ctypes.c_void_p, filename: str,
+ sparsity: Sequence[sparse_tensor.DimLevelType],
+ type: str) -> None:
"""Outputs an MLIR sparse tensor to the given file.
Args:
@@ -303,13 +323,14 @@ def output_sparse_tensor(
a COO-flavored format.
sparsity: A sequence of DimLevelType values, one for each dimension of the
tensor.
+ type: The MLIR string for the data type.
Raises:
OSError: If there is any problem in loading the supporting C shared library.
ValueError: If the shared library doesn't contain the needed routine.
"""
with ir.Context() as ctx, ir.Location.unknown():
- module = _get_output_sparse_tensor_kernel(sparsity)
+ module = _get_output_sparse_tensor_kernel(sparsity, type)
module = ir.Module.parse(module)
engine = compile_and_build_engine(module)
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/unit_test_tensor_utils.py b/mlir/test/Integration/Dialect/SparseTensor/taco/unit_test_tensor_utils.py
index 273b913b3a205..b3c05335052f5 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/taco/unit_test_tensor_utils.py
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/unit_test_tensor_utils.py
@@ -75,7 +75,7 @@ def _implement_read_tns_test(
# Read the data from the file and construct an MLIR sparse tensor.
sparse_tensor, o_shape = pytaco_utils.create_sparse_tensor(
- file_name, sparsity_codes)
+ file_name, sparsity_codes, "f64")
passed = 0
More information about the Mlir-commits
mailing list