[Mlir-commits] [mlir] 3a42296 - [mlir][sparse][taco] Reorder a class.
Bixia Zheng
llvmlistbot at llvm.org
Tue Mar 15 08:52:05 PDT 2022
Author: Bixia Zheng
Date: 2022-03-15T08:51:22-07:00
New Revision: 3a4229696df39665c260d0d94f10ef2ef23d3370
URL: https://github.com/llvm/llvm-project/commit/3a4229696df39665c260d0d94f10ef2ef23d3370
DIFF: https://github.com/llvm/llvm-project/commit/3a4229696df39665c260d0d94f10ef2ef23d3370.diff
LOG: [mlir][sparse][taco] Reorder a class.
Define IndexExpr before IndexVar. This is to prepare for the next change
to support the use of index values in tensor expressions.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D121649
Added:
Modified:
mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py
Removed:
################################################################################
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py
index 2dce759bfca05..b0cb216944833 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py
@@ -365,1201 +365,1202 @@ def _make_format(formats: List[ModeFormat],
return Format(ModeFormatPack(formats), ModeOrdering(ordering))
-class _AtomicCounter:
- """An atomic counter."""
+class IndexExpr(abc.ABC):
+ """The index notation base class.
- def __init__(self):
- self._counter = 0
- self._counter_lock = threading.Lock()
+ We support the TACO API index_expression class with an alias of this class.
+ """
- def increment(self) -> int:
- """Increments the counter by one and returns the old value."""
- old_value = self._counter
- with self._counter_lock:
- self._counter = self._counter + 1
- return old_value
+ def _verify_operand_and_build_expr(self, rhs, op: _BinaryOp) -> "_BinaryExpr":
+ """Verifies the RHS operand and returns a binary expression.
+ Args:
+ rhs: The RHS of the binary operation, which could be any Python object
+ from user inputs.
+ op: A _BinaryOp object representing the binary operator.
-class IndexVar:
- """The tensor index class.
+ Raises:
+ ValueError: If rhs is not an IndexExpr.
+ """
+ if not isinstance(rhs, IndexExpr):
+ raise ValueError(f"Expected IndexExpr: {rhs}")
+ return _BinaryExpr(op, self, rhs)
- We support the TACO API index_var class with an alias of this class.
+ def _build_unary_expr(self, op: _UnaryOp) -> "_UnaryExpr":
+ """Build a unary expression.
- An IndexVar object represents an index variable in tensor index notation.
+ Args:
+ op: A _UnaryOp object representing the unary operation.
+ """
+ return _UnaryExpr(op, self)
- Attributes:
- name: A unique string name of the IndexVar.
- """
- _counter = _AtomicCounter()
+ def __add__(self, rhs) -> "_BinaryExpr":
+ """Defines the operator +.
- def __init__(self):
- id = self._counter.increment()
- self._name = f"{_TACO_INDEX_PREFIX}{id}"
+ Args:
+ rhs: The value being added, which could be any Python object from user
+ inputs.
- def __repr__(self) -> str:
- return f"IndexVar(name={repr(self._name)})"
+ Returns:
+ A _BinaryExpr object representing the operation.
- @property
- def name(self) -> str:
- """Returns the name of the IndexVar."""
- return self._name
+ Raises:
+ ValueError: If rhs is not an IndexExpr.
+ """
+ return self._verify_operand_and_build_expr(rhs, operator.add)
+ def __mul__(self, rhs) -> "_BinaryExpr":
+ """Defines the operator *.
-def get_index_vars(n: int) -> List[IndexVar]:
- """Returns a list of n IndexVar.
+ Args:
+ rhs: The value being multiplied, which could be any Python object from
+ user inputs.
- This routine is defined by the TACO API.
+ Returns:
+ A _BinaryExpr object representing the operation.
- Args:
- n: An integer representing the number of IndexVar to get.
+ Raises:
+ ValueError: If rhs is not an IndexExpr.
+ """
+ return self._verify_operand_and_build_expr(rhs, operator.mul)
- Returns:
- A list of IndexVar.
+ def __abs__(self) -> "_UnaryExpr":
+ """Defines the operator abs.
- Raises:
- ValueError: if n is not a positive integer.
- """
- if not isinstance(n, int) or n <= 0:
- raise ValueError(f"Expected an integer: {n}.")
- # If lock contention ever becomes an issue, we could implement a bulk getter
- # that returns a range by only claiming the lock once.
- return [IndexVar() for i in range(n)]
+ Returns:
+ A _UnaryExpr object representing the operation.
+ """
+ return self._build_unary_expr(operator.abs)
+ def __neg__(self) -> "_UnaryExpr":
+ """Defines the operator neg.
-def _mlir_symbols_from_index_vars(
- index_vars: Tuple[IndexVar, ...]) -> Tuple[lang.SymbolDef, ...]:
- """Returns a tuple of MLIR symbols for the given tuple of index_var."""
- return tuple(getattr(lang.S, i.name) for i in index_vars)
+ Returns:
+ A _UnaryExpr object representing the operation.
+ """
+ return self._build_unary_expr(operator.neg)
+ def __sub__(self, rhs) -> "_BinaryExpr":
+ """Defines the operator -.
-def _mlir_dimensions_from_index_vars(
- index_vars: Tuple[IndexVar, ...]) -> Tuple[lang.DimDef, ...]:
- """Returns a tuple of MLIR dimensions for the given tuple of index_var."""
- return tuple(getattr(lang.D, i.name) for i in index_vars)
+ Args:
+ rhs: The value being subtracted, which could be any Python object from
+ user inputs.
+ Returns:
+ A _BinaryExpr object representing the operation.
-def _mlir_tensor_type(
- dtype: DType, shape: Tuple[int, ...],
- attr: Optional[sparse_tensor.EncodingAttr]) -> ir.RankedTensorType:
- """Returns an MLIR tensor type.
+ Raises:
+ ValueError: If rhs is not an IndexExpr.
+ """
+ return self._verify_operand_and_build_expr(rhs, operator.sub)
- Args:
- dtype: An DType object for the element data type of the tensor.
- shape: A tuple of integer for the shape of the tensor.
- attr: An optional MLIR sparse tensor attribute, only provided if the tensor
- is a sparse tensor.
+ @abc.abstractmethod
+ def _visit(self,
+ func: _ExprVisitor,
+ args,
+ *,
+ leaf_checker: _SubtreeLeafChecker = None) -> None:
+ """A post-order visitor.
- Returns:
- An MLIR ranked tensor type.
- """
- ir_type = _mlir_type_from_taco_type(dtype)
- return ir.RankedTensorType.get(shape, ir_type, attr)
+ Args:
+ func: A callable applied to each node in the expression tree.
+ args: The variable-length arguments passed to the callable. These
+ arguments are grouped as an iterable and will be unpacked before passing
+ to the callable. This is to enable the keyword argument only syntax
+ after this argument.
+ leaf_checker: A callable object to identify nodes that should be treated
+ as leaf nodes to support partial tree visiting.
+ """
+ pass
+ @abc.abstractmethod
+ def _emit_expression(
+ self,
+ expr_to_opnd: Dict["IndexExpr", lang.OperandDef],
+ expr_to_info: _ExprInfoDict,
+ ) -> lang.ScalarExpression:
+ """Emits MLIR for the expression tree.
- at dataclasses.dataclass(frozen=True)
-class _StructOpInfo:
- """Information for generating a structured op in the linalg dialect.
+ Args:
+ expr_to_opnd: A dictionary for looking up structured op input operands for
+ the input nodes of the structured op.
+ expr_to_info: A dictionary for looking up code generation information for
+ expressions.
- This information is associated with an expression node that serves as the
- root for an expression subtree implemented with a structured op.
+ Returns:
+ A linalg dialect ScalarExpression for the expression.
+ """
+ pass
- Attributes:
- dst_indices: A tuple of IndexVar, representing the result dimensions of the
- structured op. This is used to construct the temporary variable for the
- tensor to hold the structured op result.
- dst_dims: A tuple of int, representing the result shape of the structured
- op.
- dst_dtype: A DType representing the data type of the structured op result.
- dst_name: A string representing the name of the structured op result.
- dst_format: An optional Format object representing the destination tensor
- format. None represents a true dense tensor.
- """
- dst_indices: Tuple[IndexVar, ...]
- dst_dims: Tuple[int, ...]
- dst_dtype: DType
- dst_name: str
- dst_format: Optional[Format]
+ @abc.abstractmethod
+ def dtype(self) -> DType:
+ """Returns the data type for the result of the expression."""
+ pass
- def __post_init__(self) -> None:
- """Verifies the integrity of the attribute values."""
- assert len(self.dst_indices) == len(self.dst_dims)
+ def _emit_structured_op(self, expr_to_info: _ExprInfoDict) -> None:
+ """Emits a structured op in the linalg dialect for the expression tree.
- def emit_tensor_init(self) -> ir.RankedTensorType:
- """Returns an initialization for the destination tensor."""
- if self.dst_format is None or self.dst_format.rank() == 0:
- # Initialize the dense tensor.
- ir_type = _mlir_type_from_taco_type(self.dst_dtype)
- tensor = linalg.InitTensorOp(self.dst_dims, ir_type).result
- zero = arith.ConstantOp(ir_type, 0.0)
- return linalg.fill(zero, outs=[tensor])
+ We define a DefineOpcallable in the domain specific language for the linalg
+ dialect and execute the callable to generate the structured op. Self is the
+ root of the expression tree for the structured op.
- # Initialize the sparse tensor.
- mlir_type = _mlir_tensor_type(self.dst_dtype, self.dst_dims,
- self.dst_format.mlir_tensor_attr())
- index_type = ir.IndexType.get()
- dims = [arith.ConstantOp(index_type, d).result for d in mlir_type.shape]
- return sparse_tensor.InitOp(mlir_type, dims)
+ Args:
+ expr_to_info: A dictionary for looking up code generation information for
+ expressions.
+ """
+ op_info = expr_to_info[self].structop_info
+ op_name = op_info.dst_name
+ op_def = lang.LinalgOpDef(name=op_name)
+ op_callable = lang.DefinedOpCallable(op_name, op_def)
+ # Collect the input expression nodes for the structured op.
+ expr_inputs = []
+ self._visit(
+ _gather_structured_op_input,
+ (self, expr_to_info, expr_inputs),
+ leaf_checker=_is_structured_op_leaf,
+ )
-class _Stats:
- """Information to describe how a tensor expression is implemented.
+ # Create a linalg structured op operand for each input expression node and
+ # build a dictionary for looking up the information.
+ expr_to_input_opnd = {
+ e: _emit_structured_op_input(e, expr_to_info, op_def)
+ for e in expr_inputs
+ }
- Currently, we only record the temporary tensors introduced for splitting the
- original expression.
- """
+ # Emit the expression tree, which produces the value assigned to the
+ # destination tensor.
+ value = self._emit_expression(expr_to_input_opnd, expr_to_info)
+ # Emit the structured op representation for the destination tensor.
+ dst_opnd = _emit_operand(op_def, op_info.dst_indices, op_info.dst_name,
+ lang.OperandKind.OUTPUT_TENSOR)
+ dst_dim_syms = _mlir_dimensions_from_index_vars(op_info.dst_indices)
+ dst_use = lang.TensorUse(dst_opnd, dst_dim_syms)
- def __init__(self):
- self._temps = []
+ expr_info = expr_to_info[self]
+ # If the structured op reduces some indices, explicitly represent the
+ # reduction. This is done by generating a ReduceFn for the dimensions being
+ # reduced in the linalg dialect and calling the function with the value
+ # being reduced. We only support add reduction currently.
+ if expr_info.reduce_indices:
+ reduce_dims = _mlir_dimensions_from_index_vars(expr_info.reduce_indices)
+ value = lang.ReduceFn.add[reduce_dims](value)
- def __repr__(self) -> str:
- return f"_Stats({repr(self._temps)})"
-
- def add_element(self, structop: _StructOpInfo):
- """Adds a temporary tensor."""
- self._temps.append(structop)
-
- def get_total(self) -> int:
- """Gets the total number of temporary tensors."""
- return len(self._temps)
-
- def _get_element(self, idx: int) -> _StructOpInfo:
- """Gets the ith temporary tensor."""
- assert idx < self.get_total()
- return self._temps[idx]
-
- def get_dimensions(self, idx: int) -> Tuple[int]:
- """Gets the dimensions for the ith temporary tensor."""
- return self._get_element(idx).dst_dims
-
- def get_formats(self, idx: int) -> Tuple[ModeFormat]:
- """Gets the ModeFormats for the ith temporary tensor."""
- return tuple(self._get_element(idx).dst_format.format_pack.formats)
+ # Emit the assignment as a comprehension in the linalg dialect.
+ comp = lang.Comprehension((dst_use, value))
+ op_def.comprehensions.append(comp)
+ # The structured op in the linalg dialect requires an explicit
+ # initialization for the destination tensor. Emit MLIR to initialize the
+ # destination tensor.
+ init = op_info.emit_tensor_init()
-class _SparseValueInfo(enum.Enum):
- """Describes how a sparse tensor value is stored.
- _UNPACKED: The sparse tensor value is stored as (coordnates, values) in
- Python.
- _PACKED: The sparse tensor value is stored as a C pointer to a packed MLIR
- sparse tensor.
- """
- _UNPACKED = 0
- _PACKED = 1
+ # Collect MLIR values for the linalg input operands, with the assumption
+ # that dictionary preserves the insertion order.
+ args = [
+ expr_to_info[expr].mlir_value
+ for expr, opnd in expr_to_input_opnd.items()
+ ]
+ # Execute the DefineOpcallable object for the linalg dialect operation to
+ # emit MLIR for the linalg structured op.
+ expr_info.mlir_value = op_callable(*args, outs=[init])
+ def _identify_structured_ops(
+ self,
+ expr_to_info: _ExprInfoDict,
+ dst: "Tensor",
+ dst_indices: Tuple["IndexVar", ...],
+ ) -> List["IndexExpr"]:
+ """Returns expression nodes for the roots of the identified structured ops.
- at dataclasses.dataclass(frozen=True)
-class _Assignment:
- """Records an assignment to a tensor T as T[indices] = expression."""
- indices: Tuple["IndexVar", ...]
- expression: "IndexExpr"
+ A structured op in the linalg dialect only supports reduction performed on
+ the whole expression. If the expression tree contains reduction that are
+ performed on part of the expression tree, the expression tree needs to be
+ implemented with multiple structured ops. This routine identifies all the
+ expression nodes that contain reduction as the root of structured ops in the
+ linalg dialect.
+ Args:
+ expr_to_info: A dictionary for looking up code generation information for
+ expressions.
+ dst: A destination Tensor that accepts the value of the expression tree.
+ dst_indices: The indices used by the destination index expression.
-class Tensor:
- """The tensor class.
+ Returns:
+ An ordered list of IndexExpr for the root expressions of the structured
+ ops, where child expressions go before parent expressions that use their
+ results.
+ """
+ reduce_indices = tuple(
+ set(expr_to_info[self].src_indices) - set(dst_indices))
+ for reduce_index in reduce_indices:
+ _mark_structured_op_root(self, reduce_index, expr_to_info)
- We support the TACO API tensor class with an alias of this class.
+ self._visit(_accumulate_reduce_indices, (expr_to_info,))
+ structop_roots = []
+ self._visit(_gather_structured_op, (expr_to_info, structop_roots))
- This class is part of the TACO API with the following methods:
- insert: Inserts a value to the given coordinate in the tensor.
- to_array: Returns a numpy ndarray for the tensor.
+ # Handle the root of the top level expression.
+ if not structop_roots or structop_roots[-1] != self:
+ # The top level expression is not a reduction. Add the top level
+ # expression as a structured op root.
+ structop_roots.append(self)
- TACO API also defines the following arrtibutes for the class:
- dtype: A dtype object representing the data type of the tensor.
- format: A format object representing the storage format of the tensor.
- name: A string object representing the name of the tensor.
- order: An integral rank of the tensor.
- shape: A list of integers representing the shape of the tensor.
+ # Use user specified information for the destination tensor to build an
+ # _StructOpInfo for the top level expression.
+ expr_to_info[self].structop_info = _StructOpInfo(dst_indices,
+ tuple(dst.shape),
+ self.dtype(), dst.name,
+ dst.format)
- We currently ignore the tensor dimension ordering for dense tensor.
- """
- _counter = _AtomicCounter()
+ return structop_roots
- def _get_unique_name(self) -> str:
- """Returns a unique name for creating a new Tensor."""
- return f"{_TACO_TENSOR_PREFIX}{self._counter.increment()}"
+ def _validate_and_collect_expr_info(
+ self,
+ dst: "Tensor",
+ dst_indices: Tuple["IndexVar", ...],
+ ) -> _ExprInfoDict:
+ """Propagates expression information for validation.
- def _init_format(self, fmt: Union[ModeFormat, List[ModeFormat],
- Format]) -> None:
- """Process the fmt argument for the Tensor constructor.
+ Propagates the indices used by child expression nodes to parent expression
+ nodes. Also collects and validates the sizes for the dimensions
+ corresponding to the indices.
Args:
- fmt: This argument can be a ModeFormat, List[ModeFormat], or format. If
- this argument is a ModeFormat, uses this ModeFormat for all the tensor
- dimensions. If this argument is a list of ModeFormat, the len of the
- list should equal to the rank of the tensor. If this argument is a
- format, uses it for the format of the tensor.
+ dst: A destination Tensor that accepts the value of the expression tree.
+ dst_indices: The indices used by the destination index expression.
Raises:
- ValueError: If fmt is not one of the expected type or is inconsistent
- with the rank of the tensor. This is because fmt could be an users
- input.
- """
- if isinstance(fmt, ModeFormat):
- self._format = _make_format([fmt] * self.order)
- elif isinstance(fmt, list):
- if len(fmt) == self.order and isinstance(fmt[0], ModeFormat):
- self._format = _make_format(fmt)
- else:
- raise ValueError("Inconsistent shape and format: "
- f"{self._shape}, {fmt}.")
- elif isinstance(fmt, Format):
- if fmt.rank() != self.order:
- raise ValueError("Inconsistent shape and format: "
- f"{self._shape}, {fmt}.")
- else:
- self._format = fmt
- else:
- raise ValueError(f"Invalid format argument: {fmt}.")
+ ValueError if there is any inconsistency in indices or dimensional
+ values.
- def __init__(self,
- value_or_shape: Optional[Union[List[int], Tuple[int, ...], float,
- int]] = None,
- fmt: Optional[Union[ModeFormat, List[ModeFormat],
- Format]] = None,
- dtype: Optional[DType] = None,
- name: Optional[str] = None,
- is_dense: bool = False):
- """The tensor constructor interface defined by TACO API.
+ Returns:
+ A dictionary of (IndexExpr, _ExprInfo).
+ """
+ expr_to_info = {}
+ # Validate the expression tree and construct expression information.
+ self._visit(_validate_and_collect_expr_info, (expr_to_info,))
- Args:
- value_or_shape: This argument is optional and can be int, float,
- List[int], or Tuple[int, ...]. If this argument is an int or float,
- creates a scalar tensor and initializes it with the value. If this
- argument is a list or tuple of int, uses it as the shape to create a
- tensor.
- fmt: This argument can be a ModeFormat, List[ModeFormat], or format. If
- this argument is a ModeFormat, uses this ModeFormat for all the tensor
- dimensions. If this argument is a list of ModeFormat, the len of the
- list should equal to the rank of the tensor. If this argument is a
- format, uses it for the format of the tensor.
- dtype: An object of dtype, representing the data type of the tensor.
- name: A string name of the tensor. If a name is not given, creates a
- unique name for the tensor.
- is_dense: A boolean variable to indicate whether the tensor is a dense
- tensor without any sparsity annotation.
+ # Validate the destination dimension information.
+ info = expr_to_info[self]
+ index_to_dim_info = {i: d for i, d in zip(info.src_indices, info.dim_infos)}
+ for i, d, in zip(dst_indices, dst.shape):
+ if i not in index_to_dim_info:
+ raise ValueError("Destination IndexVar not used in the "
+ f"source expression: {i}")
+ else:
+ if d != index_to_dim_info[i].dim:
+ raise ValueError(f"Inconsistent destination dimension for {i}: "
+ f"{d} vs {index_to_dim_info[i].dim}")
- Raises:
- ValueError: If there is any inconsistency among the input arguments.
- """
- # Take care of the argument default values common to both sparse tensors
- # and dense tensors.
- dtype = dtype or DType(Type.FLOAT32)
- self._name = name or self._get_unique_name()
- self._assignment = None
- self._engine = None
- self._sparse_value_location = _SparseValueInfo._UNPACKED
- self._dense_storage = None
- self._dtype = dtype
+ return expr_to_info
- if is_dense:
- assert (fmt is None)
- assert (isinstance(value_or_shape, tuple) or isinstance(
- value_or_shape, list)) and _all_instance_of(value_or_shape, int)
- self._shape = value_or_shape
- self._format = None
- return
+ def _emit_assignment(
+ self,
+ module: ir.Module,
+ dst: "Tensor",
+ dst_indices: Tuple["IndexVar", ...],
+ expr_to_info: _ExprInfoDict,
+ input_accesses: List["Access"],
+ ) -> None:
+ """Emits an MLIR function for assigning the expression to a tensor."""
+ input_types = [a.tensor.mlir_tensor_type() for a in input_accesses]
- fmt = fmt or ModeFormat.COMPRESSED
- # We currently use _coords and _values to host the sparse tensor value with
- # COO format, and _dense_storage to host the dense tensor value. We don't
- # support the conversion between the two storages.
- self._coords = []
- self._values = []
- self._stats = _Stats()
- if value_or_shape is None or isinstance(value_or_shape, int) or isinstance(
- value_or_shape, float):
- # Create a scalar tensor and ignore the fmt parameter.
- self._shape = []
- self._format = _make_format([], [])
- if value_or_shape is not None:
- self._dense_storage = np.array(value_or_shape, dtype=self._dtype.value)
- elif (isinstance(value_or_shape, tuple) or isinstance(
- value_or_shape, list)) and _all_instance_of(value_or_shape, int):
- # Create a tensor with the specified shape and format.
- self._shape = list(value_or_shape)
- self._init_format(fmt)
- else:
- raise ValueError("Invalid first argument. "
- "Must be a tuple or list for a shape or a single value"
- f"if initializing a scalar tensor: {value_or_shape}.")
+ # Build the kernel for the operations.
+ with ir.InsertionPoint(module.body):
- def _set_packed_sparse_tensor(self, pointer: ctypes.c_void_p) -> None:
- """Records the MLIR sparse tensor pointer."""
- self._sparse_value_location = _SparseValueInfo._PACKED
- self._packed_sparse_value = pointer
+ @builtin.FuncOp.from_py_func(*input_types, name=_ENTRY_NAME)
+ def linalg_funcop(*args):
+ # Set up the mapping from the Access nodes to their MLIR values.
+ for e, mlir in zip(input_accesses, args):
+ expr_to_info[e].mlir_value = mlir
- def is_unpacked(self) -> bool:
- """Returns true if the tensor value is not packed as MLIR sparse tensor."""
- return (self._sparse_value_location == _SparseValueInfo._UNPACKED)
+ # Emit structured ops in the linalg dialect to implement the assignment.
+ for structop_root in self._identify_structured_ops(
+ expr_to_info, dst, dst_indices):
+ structop_root._emit_structured_op(expr_to_info)
+ dst._record_stats(expr_to_info[structop_root].structop_info)
- def unpack(self) -> None:
- """Unpacks the MLIR sparse tensor representation."""
- if self.is_dense() or self.is_unpacked():
- return
+ # The function returns the MLIR value of the root expression.
+ return expr_to_info[self].mlir_value
- # Use the output MLIR sparse tensor pointer to retrieve the COO-flavored
- # values and verify the values.
- rank, nse, shape, values, indices = utils.sparse_tensor_to_coo_tensor(
- self._packed_sparse_value, self._dtype.value)
- assert rank == self.order
- assert np.array_equal(self.shape, shape)
- assert nse == len(values)
- self._coords = indices
- self._values = values
- self._sparse_value_location = _SparseValueInfo._UNPACKED
+ linalg_funcop.func_op.attributes[
+ "llvm.emit_c_interface"] = ir.UnitAttr.get()
- def __repr__(self) -> str:
- self._sync_value()
- self.unpack()
- value_str = (f"{repr(self._dense_storage)})" if self.is_dense() else
- f"{repr(self._coords)} {repr(self._values)})")
- return (f"Tensor(_name={repr(self._name)} "
- f"_dtype={repr(self._dtype)} : ") + value_str
+ def get_input_accesses(self) -> List["Access"]:
+ """Compute the list of input accesses for the expression."""
+ input_accesses = []
+ self._visit(_gather_input_accesses_index_vars, (input_accesses,))
+ return input_accesses
- def insert(self, coords: List[int], val: Union[float, int]) -> None:
- """Inserts a value to the given coordinate.
+ def compile(
+ self,
+ dst: "Tensor",
+ dst_indices: Tuple["IndexVar", ...],
+ ) -> execution_engine.ExecutionEngine:
+ """Compiles the tensor assignment dst[dst_indices] = expression.
Args:
- coords: A list of integer coordinates. The length of the list must be the
- same as the rank of the tensor.
- val: A value being inserted. It is either an integral or a floating point
- value. This value will be converted to the data type of the tensor.
+ dst: The destination tensor.
+ dst_indices: The tuple of IndexVar used to access the destination tensor.
+
+ Returns:
+ The execution engine for the tensor assignment.
Raises:
- ValueError: When there is any problem in the parameters.
+ ValueError: If the expression is not proper or not supported.
"""
- if self.is_dense():
- raise ValueError("Insert method is not supported for dense tensors.")
- if self._assignment != None or not self.is_unpacked():
- raise ValueError(
- "Can't use Insert method for a tensor constructed from a file.")
- if not isinstance(coords, list):
- raise ValueError(f"Non list coordinate detected: {coords}.")
- if not _all_instance_of(coords, int):
- raise ValueError(f"Non integer coordinate detected: {coords}.")
- if (len(coords) != self.order or
- any([c < 0 or c >= self._shape[i] for i, c in enumerate(coords)])):
- raise ValueError("Invalid coordinate for rank: "
- f"{self.order}, {coords}.")
+ expr_to_info = self._validate_and_collect_expr_info(dst, dst_indices)
+ input_accesses = self.get_input_accesses()
- if not isinstance(val, int) and not isinstance(val, float):
- raise ValueError(f"Value is neither int nor float: {val}.")
+ # Build and compile the module to produce the execution engine.
+ with ir.Context(), ir.Location.unknown():
+ module = ir.Module.create()
+ self._emit_assignment(module, dst, dst_indices, expr_to_info,
+ input_accesses)
+ engine = utils.compile_and_build_engine(module)
+
+ return engine
+
+
+class _AtomicCounter:
+ """An atomic counter."""
+
+ def __init__(self):
+ self._counter = 0
+ self._counter_lock = threading.Lock()
- self._coords.append(tuple(coords))
- self._values.append(self._dtype.value(val))
+ def increment(self) -> int:
+ """Increments the counter by one and returns the old value."""
+ old_value = self._counter
+ with self._counter_lock:
+ self._counter = self._counter + 1
+ return old_value
- def is_dense(self) -> bool:
- """Returns true if the tensor doesn't have sparsity annotation."""
- return self.order == 0 or self._format is None
- def to_array(self) -> np.ndarray:
- """Returns the numpy array for the Tensor.
+class IndexVar:
+ """The tensor index class.
- This is currenly only implemented for dense Tensor.
- """
- if not self.is_dense():
- raise ValueError("Conversion from non-dense Tensor "
- "to numpy array not supported yet.")
+ We support the TACO API index_var class with an alias of this class.
- self._sync_value()
+ An IndexVar object represents an index variable in tensor index notation.
- return self._dense_storage
+ Attributes:
+ name: A unique string name of the IndexVar.
+ """
+ _counter = _AtomicCounter()
- @staticmethod
- def from_array(array: np.ndarray) -> "Tensor":
- """Returns a dense tensor with the value copied from the input array.
+ def __init__(self):
+ id = self._counter.increment()
+ self._name = f"{_TACO_INDEX_PREFIX}{id}"
- We currently only support the conversion of float32 and float64 numpy arrays
- to Tensor.
+ def __repr__(self) -> str:
+ return f"IndexVar(name={repr(self._name)})"
- Args:
- array: The numpy array that provides the data type, shape and value for
- the tensor.
+ @property
+ def name(self) -> str:
+ """Returns the name of the IndexVar."""
+ return self._name
- Returns:
- A Tensor object.
- Raises:
- ValueError if the data type of the numpy array is not supported.
- """
- if array.dtype != np.float32 and array.dtype != np.float64:
- raise ValueError(f"Expected floating point value type: {array.dtype}.")
- tensor = Tensor(
- array.shape,
- dtype=_nptype_to_taco_type(array.dtype.type),
- is_dense=True)
- tensor._dense_storage = np.copy(array)
- return tensor
+def get_index_vars(n: int) -> List[IndexVar]:
+ """Returns a list of n IndexVar.
- @staticmethod
- def from_coo(
- coordinates: List[Tuple[int, ...]],
- values: List[_AnyRuntimeType],
- fmt: Format,
- dtype: DType,
- ) -> "Tensor":
- """Converts coordinates and values to a sparse tensor representation.
+ This routine is defined by the TACO API.
- Args:
- coordinates: A list of coordinates with non-zero values.
- values: The non-zero values.
- fmt: The tensor storage format.
- dtype: The tensor element data type.
+ Args:
+ n: An integer representing the number of IndexVar to get.
- Returns:
- A tensor with the given non-zero values and storage format. The shape of
- the tensor has the minimum size for each dimension to make the given
- coordinates valid.
- """
- assert (isinstance(coordinates, List) and
- _all_instance_of(coordinates, Tuple))
- assert (isinstance(values, List) and _all_instance_of(values, dtype.value))
- assert isinstance(fmt, Format)
+ Returns:
+ A list of IndexVar.
- rank = fmt.rank()
- assert all(len(c) == rank and _all_instance_of(c, int) for c in coordinates)
+ Raises:
+ ValueError: if n is not a positive integer.
+ """
+ if not isinstance(n, int) or n <= 0:
+ raise ValueError(f"Expected an integer: {n}.")
+ # If lock contention ever becomes an issue, we could implement a bulk getter
+ # that returns a range by only claiming the lock once.
+ return [IndexVar() for i in range(n)]
- # Find the maximum coordinate value for each dimension.
- max_coordinate = list(map(max, zip(*coordinates)))
- # The size of each dimension is one more that such a maximum coordinate
- # value.
- shape = [c + 1 for c in max_coordinate]
- tensor = Tensor(shape, fmt, dtype=dtype)
- tensor._coords = coordinates
- tensor._values = values
- return tensor
+def _mlir_symbols_from_index_vars(
+ index_vars: Tuple[IndexVar, ...]) -> Tuple[lang.SymbolDef, ...]:
+ """Returns a tuple of MLIR symbols for the given tuple of index_var."""
+ return tuple(getattr(lang.S, i.name) for i in index_vars)
- @staticmethod
- def from_file(
- filename: str,
- fmt: Format,
- dtype: DType,
- ) -> "Tensor":
- """Constructs a sparse tensor using the COO-flavored values from a file.
- Args:
- filename: A string for the name of the file that contains the sparse
- tensor data.
- fmt: The tensor storage format.
- dtype: The tensor element data type.
+def _mlir_dimensions_from_index_vars(
+ index_vars: Tuple[IndexVar, ...]) -> Tuple[lang.DimDef, ...]:
+ """Returns a tuple of MLIR dimensions for the given tuple of index_var."""
+ return tuple(getattr(lang.D, i.name) for i in index_vars)
- Returns:
- A tensor with the given non-zero values and storage format. The tensor
- value is stored as an MLIR sparse tensor.
- """
- sparse_tensor, shape = utils.create_sparse_tensor(filename,
- fmt.format_pack.formats,
- _dtype_to_mlir_str(dtype))
- tensor = Tensor(shape.tolist(), fmt, dtype=dtype)
- tensor._set_packed_sparse_tensor(sparse_tensor)
- return tensor
+def _mlir_tensor_type(
+ dtype: DType, shape: Tuple[int, ...],
+ attr: Optional[sparse_tensor.EncodingAttr]) -> ir.RankedTensorType:
+ """Returns an MLIR tensor type.
- def to_file(self, filename: str) -> None:
- """Output the tensor value to a file.
+ Args:
+ dtype: An DType object for the element data type of the tensor.
+ shape: A tuple of integer for the shape of the tensor.
+ attr: An optional MLIR sparse tensor attribute, only provided if the tensor
+ is a sparse tensor.
- This method evaluates any pending assignment to the tensor and outputs the
- tensor value.
+ Returns:
+ An MLIR ranked tensor type.
+ """
+ ir_type = _mlir_type_from_taco_type(dtype)
+ return ir.RankedTensorType.get(shape, ir_type, attr)
- Args:
- filename: A string file name.
- Raises:
- ValueError: If the tensor is dense, or an unpacked sparse tensor.
- """
- self._sync_value()
+ at dataclasses.dataclass(frozen=True)
+class _StructOpInfo:
+ """Information for generating a structured op in the linalg dialect.
- if self.is_dense():
- raise ValueError("Writing dense tensors without sparsity annotation to "
- "file is not supported.")
+ This information is associated with an expression node that serves as the
+ root for an expression subtree implemented with a structured op.
- if self.is_unpacked():
- raise ValueError("Writing unpacked sparse tensors to file is not "
- "supported.")
+ Attributes:
+ dst_indices: A tuple of IndexVar, representing the result dimensions of the
+ structured op. This is used to construct the temporary variable for the
+ tensor to hold the structured op result.
+ dst_dims: A tuple of int, representing the result shape of the structured
+ op.
+ dst_dtype: A DType representing the data type of the structured op result.
+ dst_name: A string representing the name of the structured op result.
+ dst_format: An optional Format object representing the destination tensor
+ format. None represents a true dense tensor.
+ """
+ dst_indices: Tuple[IndexVar, ...]
+ dst_dims: Tuple[int, ...]
+ dst_dtype: DType
+ dst_name: str
+ dst_format: Optional[Format]
- utils.output_sparse_tensor(self._packed_sparse_value, filename,
- self._format.format_pack.formats,
- _dtype_to_mlir_str(self._dtype))
+ def __post_init__(self) -> None:
+ """Verifies the integrity of the attribute values."""
+ assert len(self.dst_indices) == len(self.dst_dims)
- @property
- def dtype(self) -> DType:
- """Returns the data type for the Tensor."""
- return self._dtype
+ def emit_tensor_init(self) -> ir.RankedTensorType:
+ """Returns an initialization for the destination tensor."""
+ if self.dst_format is None or self.dst_format.rank() == 0:
+ # Initialize the dense tensor.
+ ir_type = _mlir_type_from_taco_type(self.dst_dtype)
+ tensor = linalg.InitTensorOp(self.dst_dims, ir_type).result
+ zero = arith.ConstantOp(ir_type, 0.0)
+ return linalg.fill(zero, outs=[tensor])
- @property
- def format(self) -> Format:
- """Returns the storage format for the Tensor."""
- return self._format
+ # Initialize the sparse tensor.
+ mlir_type = _mlir_tensor_type(self.dst_dtype, self.dst_dims,
+ self.dst_format.mlir_tensor_attr())
+ index_type = ir.IndexType.get()
+ dims = [arith.ConstantOp(index_type, d).result for d in mlir_type.shape]
+ return sparse_tensor.InitOp(mlir_type, dims)
- @property
- def name(self) -> str:
- """Returns the name for the Tensor."""
- return self._name
- @property
- def order(self) -> int:
- """Returns the rank of the Tensor."""
- return len(self._shape)
+class _Stats:
+ """Information to describe how a tensor expression is implemented.
- @property
- def shape(self) -> List[int]:
- """Returns the shape of the Tensor."""
- return self._shape
+ Currently, we only record the temporary tensors introduced for splitting the
+ original expression.
+ """
- def _verify_and_normalize_indices(self, indices) -> Tuple[IndexVar, ...]:
- """Verifies and normalizes the indices to access the tensor.
+ def __init__(self):
+ self._temps = []
- Args:
- indices: The index expression used to access a tensor, which could be any
- Python object from user inputs.
+ def __repr__(self) -> str:
+ return f"_Stats({repr(self._temps)})"
- Returns:
- A tuple of IndexVar.
+ def add_element(self, structop: _StructOpInfo):
+ """Adds a temporary tensor."""
+ self._temps.append(structop)
- Raises:
- ValueError: If indices is not 0 for scalar tensors, or not an IndexVar or
- a tuple of IndexVar for other tensors.
- """
- if self.order == 0:
- if not isinstance(indices, int) or indices != 0:
- raise ValueError(f"Expected 0 to index scalar tensors: {indices}")
- return ()
+ def get_total(self) -> int:
+ """Gets the total number of temporary tensors."""
+ return len(self._temps)
- if isinstance(indices, IndexVar):
- return (indices,)
- elif isinstance(indices, tuple) and _all_instance_of(indices, IndexVar):
- return indices
+ def _get_element(self, idx: int) -> _StructOpInfo:
+ """Gets the ith temporary tensor."""
+ assert idx < self.get_total()
+ return self._temps[idx]
- raise ValueError(f"Expected IndexVars: {indices}")
+ def get_dimensions(self, idx: int) -> Tuple[int]:
+ """Gets the dimensions for the ith temporary tensor."""
+ return self._get_element(idx).dst_dims
- def __getitem__(self, key) -> "Access":
- """Verifies and processes a tensor access.
+ def get_formats(self, idx: int) -> Tuple[ModeFormat]:
+ """Gets the ModeFormats for the ith temporary tensor."""
+ return tuple(self._get_element(idx).dst_format.format_pack.formats)
- In the tensor index notation, a tensor access T[i, j] is represented as
- retrieving a value with key (i, j) from the tensor object T in Python. This
- routine verifies the key for the tensor access and returns a tensor access
- object.
- Args:
- key: The key used to access the tensor, which could be any Python object
- from user inputs.
+class _SparseValueInfo(enum.Enum):
+ """Describes how a sparse tensor value is stored.
+ _UNPACKED: The sparse tensor value is stored as (coordnates, values) in
+ Python.
+ _PACKED: The sparse tensor value is stored as a C pointer to a packed MLIR
+ sparse tensor.
+ """
+ _UNPACKED = 0
+ _PACKED = 1
- Returns:
- The corresponding tensor access object.
- Raises:
- ValueError: If key is not an IndexVar or a tuple of IndexVar.
- """
- indices = self._verify_and_normalize_indices(key)
- return Access(self, indices)
+ at dataclasses.dataclass(frozen=True)
+class _Assignment:
+ """Records an assignment to a tensor T as T[indices] = expression."""
+ indices: Tuple["IndexVar", ...]
+ expression: "IndexExpr"
- def __setitem__(self, key, value) -> None:
- """Verifies and processes a tensor assignment.
- In the tensor index notation, a tensor assignment "T[i, j] = ..." is
- represented as setting a value for a tensor object T via key (i, j) in
- Python. This routine verifies the key, evaluates the value, and assigns the
- value to the tensor.
+class Tensor:
+ """The tensor class.
- We only support assignment of dense tensor currently.
+ We support the TACO API tensor class with an alias of this class.
- Args:
- key: The key used to access the tensor, which could be any Python object
- from user inputs.
- value: The value assigned to the tensor, which could be any Python object
- from user inputs.
+ This class is part of the TACO API with the following methods:
+ insert: Inserts a value to the given coordinate in the tensor.
+ to_array: Returns a numpy ndarray for the tensor.
- Raises:
- ValueError: If tensor is not a dense tensor, or the key is not an IndexVar
- or a tuple of IndexVar, or the length of the indices is not the same as
- the rank of the tensor.
- """
- indices = self._verify_and_normalize_indices(key)
- if len(indices) != self.order:
- raise ValueError("Mismatch between indices and tensor rank: "
- f"len({indices}) != {self.order}.")
+ TACO API also defines the following arrtibutes for the class:
+ dtype: A dtype object representing the data type of the tensor.
+ format: A format object representing the storage format of the tensor.
+ name: A string object representing the name of the tensor.
+ order: An integral rank of the tensor.
+ shape: A list of integers representing the shape of the tensor.
- self._assignment = _Assignment(indices, value)
- self._engine = None
+ We currently ignore the tensor dimension ordering for dense tensor.
+ """
+ _counter = _AtomicCounter()
- def compile(self, force_recompile: bool = False) -> None:
- """Compiles the tensor assignment to an execution engine.
+ def _get_unique_name(self) -> str:
+ """Returns a unique name for creating a new Tensor."""
+ return f"{_TACO_TENSOR_PREFIX}{self._counter.increment()}"
- Calling compile the second time does not do anything unless
- force_recompile is True.
+ def _init_format(self, fmt: Union[ModeFormat, List[ModeFormat],
+ Format]) -> None:
+ """Process the fmt argument for the Tensor constructor.
Args:
- force_recompile: A boolean value to enable recompilation, such as for the
- purpose of timing.
+ fmt: This argument can be a ModeFormat, List[ModeFormat], or format. If
+ this argument is a ModeFormat, uses this ModeFormat for all the tensor
+ dimensions. If this argument is a list of ModeFormat, the len of the
+ list should equal to the rank of the tensor. If this argument is a
+ format, uses it for the format of the tensor.
Raises:
- ValueError: If the assignment is not proper or not supported.
+ ValueError: If fmt is not one of the expected type or is inconsistent
+ with the rank of the tensor. This is because fmt could be an users
+ input.
"""
- if self._assignment is None or (self._engine is not None and
- not force_recompile):
- return
+ if isinstance(fmt, ModeFormat):
+ self._format = _make_format([fmt] * self.order)
+ elif isinstance(fmt, list):
+ if len(fmt) == self.order and isinstance(fmt[0], ModeFormat):
+ self._format = _make_format(fmt)
+ else:
+ raise ValueError("Inconsistent shape and format: "
+ f"{self._shape}, {fmt}.")
+ elif isinstance(fmt, Format):
+ if fmt.rank() != self.order:
+ raise ValueError("Inconsistent shape and format: "
+ f"{self._shape}, {fmt}.")
+ else:
+ self._format = fmt
+ else:
+ raise ValueError(f"Invalid format argument: {fmt}.")
- self._engine = self._assignment.expression.compile(self,
- self._assignment.indices)
+ def __init__(self,
+ value_or_shape: Optional[Union[List[int], Tuple[int, ...], float,
+ int]] = None,
+ fmt: Optional[Union[ModeFormat, List[ModeFormat],
+ Format]] = None,
+ dtype: Optional[DType] = None,
+ name: Optional[str] = None,
+ is_dense: bool = False):
+ """The tensor constructor interface defined by TACO API.
- def compute(self) -> None:
- """Executes the engine for the tensor assignment.
+ Args:
+ value_or_shape: This argument is optional and can be int, float,
+ List[int], or Tuple[int, ...]. If this argument is an int or float,
+ creates a scalar tensor and initializes it with the value. If this
+ argument is a list or tuple of int, uses it as the shape to create a
+ tensor.
+ fmt: This argument can be a ModeFormat, List[ModeFormat], or format. If
+ this argument is a ModeFormat, uses this ModeFormat for all the tensor
+ dimensions. If this argument is a list of ModeFormat, the len of the
+ list should equal to the rank of the tensor. If this argument is a
+ format, uses it for the format of the tensor.
+ dtype: An object of dtype, representing the data type of the tensor.
+ name: A string name of the tensor. If a name is not given, creates a
+ unique name for the tensor.
+ is_dense: A boolean variable to indicate whether the tensor is a dense
+ tensor without any sparsity annotation.
Raises:
- ValueError: If the assignment hasn't been compiled yet.
+ ValueError: If there is any inconsistency among the input arguments.
"""
- if self._assignment is None:
- return
+ # Take care of the argument default values common to both sparse tensors
+ # and dense tensors.
+ dtype = dtype or DType(Type.FLOAT32)
+ self._name = name or self._get_unique_name()
+ self._assignment = None
+ self._engine = None
+ self._sparse_value_location = _SparseValueInfo._UNPACKED
+ self._dense_storage = None
+ self._dtype = dtype
- if self._engine is None:
- raise ValueError("Need to invoke compile() before invoking compute().")
+ if is_dense:
+ assert (fmt is None)
+ assert (isinstance(value_or_shape, tuple) or isinstance(
+ value_or_shape, list)) and _all_instance_of(value_or_shape, int)
+ self._shape = value_or_shape
+ self._format = None
+ return
- input_accesses = self._assignment.expression.get_input_accesses()
- # Gather the pointers for the input buffers.
- input_pointers = [a.tensor.ctype_pointer() for a in input_accesses]
- if self.is_dense():
- # The pointer to receive dense output is the first argument to the
- # execution engine.
- arg_pointers = [self.dense_dst_ctype_pointer()] + input_pointers
+ fmt = fmt or ModeFormat.COMPRESSED
+ # We currently use _coords and _values to host the sparse tensor value with
+ # COO format, and _dense_storage to host the dense tensor value. We don't
+ # support the conversion between the two storages.
+ self._coords = []
+ self._values = []
+ self._stats = _Stats()
+ if value_or_shape is None or isinstance(value_or_shape, int) or isinstance(
+ value_or_shape, float):
+ # Create a scalar tensor and ignore the fmt parameter.
+ self._shape = []
+ self._format = _make_format([], [])
+ if value_or_shape is not None:
+ self._dense_storage = np.array(value_or_shape, dtype=self._dtype.value)
+ elif (isinstance(value_or_shape, tuple) or isinstance(
+ value_or_shape, list)) and _all_instance_of(value_or_shape, int):
+ # Create a tensor with the specified shape and format.
+ self._shape = list(value_or_shape)
+ self._init_format(fmt)
else:
- # The pointer to receive the sparse tensor output is the last argument
- # to the execution engine and is a pointer to pointer of char.
- arg_pointers = input_pointers + [
- ctypes.pointer(ctypes.pointer(ctypes.c_char(0)))
- ]
+ raise ValueError("Invalid first argument. "
+ "Must be a tuple or list for a shape or a single value"
+ f"if initializing a scalar tensor: {value_or_shape}.")
- # Invoke the execution engine to run the module.
- self._engine.invoke(_ENTRY_NAME, *arg_pointers)
+ def _set_packed_sparse_tensor(self, pointer: ctypes.c_void_p) -> None:
+ """Records the MLIR sparse tensor pointer."""
+ self._sparse_value_location = _SparseValueInfo._PACKED
+ self._packed_sparse_value = pointer
- # Retrieve the result.
- if self.is_dense():
- result = runtime.ranked_memref_to_numpy(arg_pointers[0][0])
- assert isinstance(result, np.ndarray)
- self._dense_storage = result
- else:
- self._set_packed_sparse_tensor(arg_pointers[-1][0])
+ def is_unpacked(self) -> bool:
+ """Returns true if the tensor value is not packed as MLIR sparse tensor."""
+ return (self._sparse_value_location == _SparseValueInfo._UNPACKED)
- self._assignment = None
- self._engine = None
+ def unpack(self) -> None:
+ """Unpacks the MLIR sparse tensor representation."""
+ if self.is_dense() or self.is_unpacked():
+ return
- def evaluate(self) -> None:
- """Evaluates the tensor assignment."""
- self.compile()
- self.compute()
+ # Use the output MLIR sparse tensor pointer to retrieve the COO-flavored
+ # values and verify the values.
+ rank, nse, shape, values, indices = utils.sparse_tensor_to_coo_tensor(
+ self._packed_sparse_value, self._dtype.value)
+ assert rank == self.order
+ assert np.array_equal(self.shape, shape)
+ assert nse == len(values)
+ self._coords = indices
+ self._values = values
+ self._sparse_value_location = _SparseValueInfo._UNPACKED
- def _sync_value(self) -> None:
- """Updates the tensor value by evaluating the pending assignment."""
- if self._assignment is not None:
- self.evaluate()
+ def __repr__(self) -> str:
+ self._sync_value()
+ self.unpack()
+ value_str = (f"{repr(self._dense_storage)})" if self.is_dense() else
+ f"{repr(self._coords)} {repr(self._values)})")
+ return (f"Tensor(_name={repr(self._name)} "
+ f"_dtype={repr(self._dtype)} : ") + value_str
- def mlir_tensor_type(self) -> ir.RankedTensorType:
- """Returns the MLIR type for the tensor."""
- mlir_attr = (None if (self._format is None or self.order == 0) else
- self._format.mlir_tensor_attr())
- return _mlir_tensor_type(self._dtype, tuple(self._shape), mlir_attr)
+ def insert(self, coords: List[int], val: Union[float, int]) -> None:
+ """Inserts a value to the given coordinate.
- def dense_dst_ctype_pointer(self) -> ctypes.pointer:
- """Returns the ctypes pointer for the pointer to an MemRefDescriptor.
+ Args:
+ coords: A list of integer coordinates. The length of the list must be the
+ same as the rank of the tensor.
+ val: A value being inserted. It is either an integral or a floating point
+ value. This value will be converted to the data type of the tensor.
- For a dense tensor output, the MLIR compiler allocates the storage for
- the tensor. This routine returns the pointer to an MLIR MemRefDescriptor for
- receiving the tensor.
+ Raises:
+ ValueError: When there is any problem in the parameters.
"""
- assert self.is_dense()
- mem_ref_desc = runtime.make_nd_memref_descriptor(
- self.order, np.ctypeslib.as_ctypes_type(self.dtype.value))()
- return ctypes.pointer(ctypes.pointer(mem_ref_desc))
-
- def ctype_pointer(self) -> ctypes.pointer:
- """Returns the ctypes pointer for the pointer to the input tensor."""
if self.is_dense():
- if self._dense_storage is None:
- self._dense_storage = np.zeros(self._shape, self._dtype.value)
- return _ctype_pointer_from_array(self._dense_storage)
+ raise ValueError("Insert method is not supported for dense tensors.")
+ if self._assignment != None or not self.is_unpacked():
+ raise ValueError(
+ "Can't use Insert method for a tensor constructed from a file.")
+ if not isinstance(coords, list):
+ raise ValueError(f"Non list coordinate detected: {coords}.")
+ if not _all_instance_of(coords, int):
+ raise ValueError(f"Non integer coordinate detected: {coords}.")
+ if (len(coords) != self.order or
+ any([c < 0 or c >= self._shape[i] for i, c in enumerate(coords)])):
+ raise ValueError("Invalid coordinate for rank: "
+ f"{self.order}, {coords}.")
- if self.is_unpacked():
- shape = np.array(self._shape, np.int64)
- indices = np.array(self._coords, np.int64)
- values = np.array(self._values, self._dtype.value)
- perm, sparse = self.format.get_permutation_and_sparsity()
- ptr = utils.coo_tensor_to_sparse_tensor(shape, values, indices, perm,
- sparse)
- else:
- ptr = self._packed_sparse_value
+ if not isinstance(val, int) and not isinstance(val, float):
+ raise ValueError(f"Value is neither int nor float: {val}.")
- return ctypes.pointer(ctypes.cast(ptr, ctypes.c_void_p))
+ self._coords.append(tuple(coords))
+ self._values.append(self._dtype.value(val))
- def get_scalar_value(self) -> _AnyRuntimeType:
- """Returns the value for the scalar tensor.
+ def is_dense(self) -> bool:
+ """Returns true if the tensor doesn't have sparsity annotation."""
+ return self.order == 0 or self._format is None
- This method also evaluates the assignment to the tensor.
+ def to_array(self) -> np.ndarray:
+ """Returns the numpy array for the Tensor.
- Raises:
- ValueError: If the tensor is not a scalar.
+ This is currenly only implemented for dense Tensor.
"""
- if self.order != 0:
- raise ValueError(f"Expected a scalar tensor, got: rank={self.order}")
+ if not self.is_dense():
+ raise ValueError("Conversion from non-dense Tensor "
+ "to numpy array not supported yet.")
self._sync_value()
+
return self._dense_storage
+ @staticmethod
+ def from_array(array: np.ndarray) -> "Tensor":
+ """Returns a dense tensor with the value copied from the input array.
- def get_coordinates_and_values(
- self) -> Tuple[List[Tuple[int, ...]], List[_AnyRuntimeType]]:
- """Returns the coordinates and values for the non-zero elements.
+ We currently only support the conversion of float32 and float64 numpy arrays
+ to Tensor.
- This method also evaluates the assignment to the tensor and unpack the
- sparse tensor.
+ Args:
+ array: The numpy array that provides the data type, shape and value for
+ the tensor.
+
+ Returns:
+ A Tensor object.
+
+ Raises:
+ ValueError if the data type of the numpy array is not supported.
"""
- self._sync_value()
+ if array.dtype != np.float32 and array.dtype != np.float64:
+ raise ValueError(f"Expected floating point value type: {array.dtype}.")
+ tensor = Tensor(
+ array.shape,
+ dtype=_nptype_to_taco_type(array.dtype.type),
+ is_dense=True)
+ tensor._dense_storage = np.copy(array)
+ return tensor
- if not self.is_dense():
- self.unpack()
- return (self._coords, self._values)
+ @staticmethod
+ def from_coo(
+ coordinates: List[Tuple[int, ...]],
+ values: List[_AnyRuntimeType],
+ fmt: Format,
+ dtype: DType,
+ ) -> "Tensor":
+ """Converts coordinates and values to a sparse tensor representation.
- if self.order == 0:
- return ([], self._dense_storage)
+ Args:
+ coordinates: A list of coordinates with non-zero values.
+ values: The non-zero values.
+ fmt: The tensor storage format.
+ dtype: The tensor element data type.
- # Coordinates for non-zero elements, grouped by dimensions.
- coords_by_dims = self._dense_storage.nonzero()
- # Coordinates for non-zero elements, grouped by elements.
- coords = np.transpose(coords_by_dims)
- values = self._dense_storage[coords_by_dims]
- return (coords, values)
+ Returns:
+ A tensor with the given non-zero values and storage format. The shape of
+ the tensor has the minimum size for each dimension to make the given
+ coordinates valid.
+ """
+ assert (isinstance(coordinates, List) and
+ _all_instance_of(coordinates, Tuple))
+ assert (isinstance(values, List) and _all_instance_of(values, dtype.value))
+ assert isinstance(fmt, Format)
- def _record_stats(self, structop: "_StructOpInfo"):
- """Collects information for temporary tensors."""
- # Exclude user specified destination tensors.
- if structop.dst_name == self.name:
- return
+ rank = fmt.rank()
+ assert all(len(c) == rank and _all_instance_of(c, int) for c in coordinates)
- self._stats.add_element(structop)
+ # Find the maximum coordinate value for each dimension.
+ max_coordinate = list(map(max, zip(*coordinates)))
+ # The size of each dimension is one more that such a maximum coordinate
+ # value.
+ shape = [c + 1 for c in max_coordinate]
+ tensor = Tensor(shape, fmt, dtype=dtype)
+ tensor._coords = coordinates
+ tensor._values = values
+ return tensor
-def _emit_operand(op_def: lang.LinalgOpDef, indices: Tuple[IndexVar, ...],
- name: str, kind: lang.OperandKind) -> lang.OperandDef:
- """Emits an operand for a tensor access in the current linalg operation.
+ @staticmethod
+ def from_file(
+ filename: str,
+ fmt: Format,
+ dtype: DType,
+ ) -> "Tensor":
+ """Constructs a sparse tensor using the COO-flavored values from a file.
- Args:
- op_def: A LinalgOpDef representing the current linalg dialect operation.
- indices: A tuple of IndexVar used to access the tensor.
- name: A unique string name of the tensor.
- kind: An OperandKind for the operand.
+ Args:
+ filename: A string for the name of the file that contains the sparse
+ tensor data.
+ fmt: The tensor storage format.
+ dtype: The tensor element data type.
- Returns:
- An OperandDef representing the operand.
- """
- dim_sym = _mlir_symbols_from_index_vars(indices)
- opnd = lang.OperandDef(kind, lang.T, dim_sym)
- op_def.add_operand(name, opnd)
- return opnd
+ Returns:
+ A tensor with the given non-zero values and storage format. The tensor
+ value is stored as an MLIR sparse tensor.
+ """
+ sparse_tensor, shape = utils.create_sparse_tensor(filename,
+ fmt.format_pack.formats,
+ _dtype_to_mlir_str(dtype))
+ tensor = Tensor(shape.tolist(), fmt, dtype=dtype)
+ tensor._set_packed_sparse_tensor(sparse_tensor)
+ return tensor
- at dataclasses.dataclass(frozen=True)
-class _DimInfo:
- """Information for an operand dimension.
+ def to_file(self, filename: str) -> None:
+ """Output the tensor value to a file.
- Attributes:
- dim: An integer for the size of the dimension.
- mode_format: A ModeFormat for the dimension sparsity.
- """
- dim: int
- mode_format: ModeFormat
+ This method evaluates any pending assignment to the tensor and outputs the
+ tensor value.
+ Args:
+ filename: A string file name.
- at dataclasses.dataclass()
-class _ExprInfo:
- """Expression information for validation and code generation.
+ Raises:
+ ValueError: If the tensor is dense, or an unpacked sparse tensor.
+ """
+ self._sync_value()
- Attributes:
- src_indices: A tuple of IndexVar for the indices used by the tensors in the
- expression tree.
- dim_infos: A tuple of _DimInfo, representing the dimension information
- corresponding to the src_indices.
- reduce_indices: A set of IndexVar for the indices reduced by the expression.
- acc_reduce_indices: An accumulated set of IndexVar for the indices reduced
- by the expression and its children.
- structop_info: Information to support the code generation for a structured
- op in the linalg dialect, if the corresponding expression node is the root
- of a subtree for a structured op.
- mlir_value: The MLIR value generated for the structured op.
- """
- src_indices: Tuple[IndexVar, ...]
- dim_infos: Tuple[_DimInfo, ...]
- reduce_indices: Optional[Set[IndexVar]] = None
- acc_reduce_indices: Optional[Set[IndexVar]] = None
- structop_info: Optional[_StructOpInfo] = None
- mlir_value: Optional[ir.Value] = None
+ if self.is_dense():
+ raise ValueError("Writing dense tensors without sparsity annotation to "
+ "file is not supported.")
- def __post_init__(self) -> None:
- """Verifies and fix up attribute values.
+ if self.is_unpacked():
+ raise ValueError("Writing unpacked sparse tensors to file is not "
+ "supported.")
- Verifies the consistency of the attributes and modifies the default values
- to support convenient initializer syntax.
- """
- assert len(self.src_indices) == len(self.dim_infos)
- self.reduce_indices = self.reduce_indices or set()
- self.acc_reduce_indices = self.acc_reduce_indices or set()
+ utils.output_sparse_tensor(self._packed_sparse_value, filename,
+ self._format.format_pack.formats,
+ _dtype_to_mlir_str(self._dtype))
+
+ @property
+ def dtype(self) -> DType:
+ """Returns the data type for the Tensor."""
+ return self._dtype
+
+ @property
+ def format(self) -> Format:
+ """Returns the storage format for the Tensor."""
+ return self._format
+ @property
+ def name(self) -> str:
+ """Returns the name for the Tensor."""
+ return self._name
-class IndexExpr(abc.ABC):
- """The index notation base class.
+ @property
+ def order(self) -> int:
+ """Returns the rank of the Tensor."""
+ return len(self._shape)
- We support the TACO API index_expression class with an alias of this class.
- """
+ @property
+ def shape(self) -> List[int]:
+ """Returns the shape of the Tensor."""
+ return self._shape
- def _verify_operand_and_build_expr(self, rhs, op: _BinaryOp) -> "_BinaryExpr":
- """Verifies the RHS operand and returns a binary expression.
+ def _verify_and_normalize_indices(self, indices) -> Tuple[IndexVar, ...]:
+ """Verifies and normalizes the indices to access the tensor.
Args:
- rhs: The RHS of the binary operation, which could be any Python object
- from user inputs.
- op: A _BinaryOp object representing the binary operator.
+ indices: The index expression used to access a tensor, which could be any
+ Python object from user inputs.
+
+ Returns:
+ A tuple of IndexVar.
Raises:
- ValueError: If rhs is not an IndexExpr.
+ ValueError: If indices is not 0 for scalar tensors, or not an IndexVar or
+ a tuple of IndexVar for other tensors.
"""
- if not isinstance(rhs, IndexExpr):
- raise ValueError(f"Expected IndexExpr: {rhs}")
- return _BinaryExpr(op, self, rhs)
+ if self.order == 0:
+ if not isinstance(indices, int) or indices != 0:
+ raise ValueError(f"Expected 0 to index scalar tensors: {indices}")
+ return ()
- def _build_unary_expr(self, op: _UnaryOp) -> "_UnaryExpr":
- """Build a unary expression.
+ if isinstance(indices, IndexVar):
+ return (indices,)
+ elif isinstance(indices, tuple) and _all_instance_of(indices, IndexVar):
+ return indices
- Args:
- op: A _UnaryOp object representing the unary operation.
- """
- return _UnaryExpr(op, self)
+ raise ValueError(f"Expected IndexVars: {indices}")
- def __add__(self, rhs) -> "_BinaryExpr":
- """Defines the operator +.
+ def __getitem__(self, key) -> "Access":
+ """Verifies and processes a tensor access.
+
+ In the tensor index notation, a tensor access T[i, j] is represented as
+ retrieving a value with key (i, j) from the tensor object T in Python. This
+ routine verifies the key for the tensor access and returns a tensor access
+ object.
Args:
- rhs: The value being added, which could be any Python object from user
- inputs.
+ key: The key used to access the tensor, which could be any Python object
+ from user inputs.
Returns:
- A _BinaryExpr object representing the operation.
+ The corresponding tensor access object.
Raises:
- ValueError: If rhs is not an IndexExpr.
+ ValueError: If key is not an IndexVar or a tuple of IndexVar.
"""
- return self._verify_operand_and_build_expr(rhs, operator.add)
-
- def __mul__(self, rhs) -> "_BinaryExpr":
- """Defines the operator *.
+ indices = self._verify_and_normalize_indices(key)
+ return Access(self, indices)
- Args:
- rhs: The value being multiplied, which could be any Python object from
- user inputs.
+ def __setitem__(self, key, value) -> None:
+ """Verifies and processes a tensor assignment.
- Returns:
- A _BinaryExpr object representing the operation.
+ In the tensor index notation, a tensor assignment "T[i, j] = ..." is
+ represented as setting a value for a tensor object T via key (i, j) in
+ Python. This routine verifies the key, evaluates the value, and assigns the
+ value to the tensor.
- Raises:
- ValueError: If rhs is not an IndexExpr.
- """
- return self._verify_operand_and_build_expr(rhs, operator.mul)
+ We only support assignment of dense tensor currently.
- def __abs__(self) -> "_UnaryExpr":
- """Defines the operator abs.
+ Args:
+ key: The key used to access the tensor, which could be any Python object
+ from user inputs.
+ value: The value assigned to the tensor, which could be any Python object
+ from user inputs.
- Returns:
- A _UnaryExpr object representing the operation.
+ Raises:
+ ValueError: If tensor is not a dense tensor, or the key is not an IndexVar
+ or a tuple of IndexVar, or the length of the indices is not the same as
+ the rank of the tensor.
"""
- return self._build_unary_expr(operator.abs)
+ indices = self._verify_and_normalize_indices(key)
+ if len(indices) != self.order:
+ raise ValueError("Mismatch between indices and tensor rank: "
+ f"len({indices}) != {self.order}.")
- def __neg__(self) -> "_UnaryExpr":
- """Defines the operator neg.
+ self._assignment = _Assignment(indices, value)
+ self._engine = None
- Returns:
- A _UnaryExpr object representing the operation.
- """
- return self._build_unary_expr(operator.neg)
+ def compile(self, force_recompile: bool = False) -> None:
+ """Compiles the tensor assignment to an execution engine.
- def __sub__(self, rhs) -> "_BinaryExpr":
- """Defines the operator -.
+ Calling compile the second time does not do anything unless
+ force_recompile is True.
Args:
- rhs: The value being subtracted, which could be any Python object from
- user inputs.
-
- Returns:
- A _BinaryExpr object representing the operation.
+ force_recompile: A boolean value to enable recompilation, such as for the
+ purpose of timing.
Raises:
- ValueError: If rhs is not an IndexExpr.
- """
- return self._verify_operand_and_build_expr(rhs, operator.sub)
-
- @abc.abstractmethod
- def _visit(self,
- func: _ExprVisitor,
- args,
- *,
- leaf_checker: _SubtreeLeafChecker = None) -> None:
- """A post-order visitor.
-
- Args:
- func: A callable applied to each node in the expression tree.
- args: The variable-length arguments passed to the callable. These
- arguments are grouped as an iterable and will be unpacked before passing
- to the callable. This is to enable the keyword argument only syntax
- after this argument.
- leaf_checker: A callable object to identify nodes that should be treated
- as leaf nodes to support partial tree visiting.
+ ValueError: If the assignment is not proper or not supported.
"""
- pass
+ if self._assignment is None or (self._engine is not None and
+ not force_recompile):
+ return
- @abc.abstractmethod
- def _emit_expression(
- self,
- expr_to_opnd: Dict["IndexExpr", lang.OperandDef],
- expr_to_info: _ExprInfoDict,
- ) -> lang.ScalarExpression:
- """Emits MLIR for the expression tree.
+ self._engine = self._assignment.expression.compile(self,
+ self._assignment.indices)
- Args:
- expr_to_opnd: A dictionary for looking up structured op input operands for
- the input nodes of the structured op.
- expr_to_info: A dictionary for looking up code generation information for
- expressions.
+ def compute(self) -> None:
+ """Executes the engine for the tensor assignment.
- Returns:
- A linalg dialect ScalarExpression for the expression.
+ Raises:
+ ValueError: If the assignment hasn't been compiled yet.
"""
- pass
-
- @abc.abstractmethod
- def dtype(self) -> DType:
- """Returns the data type for the result of the expression."""
- pass
+ if self._assignment is None:
+ return
- def _emit_structured_op(self, expr_to_info: _ExprInfoDict) -> None:
- """Emits a structured op in the linalg dialect for the expression tree.
+ if self._engine is None:
+ raise ValueError("Need to invoke compile() before invoking compute().")
- We define a DefineOpcallable in the domain specific language for the linalg
- dialect and execute the callable to generate the structured op. Self is the
- root of the expression tree for the structured op.
+ input_accesses = self._assignment.expression.get_input_accesses()
+ # Gather the pointers for the input buffers.
+ input_pointers = [a.tensor.ctype_pointer() for a in input_accesses]
+ if self.is_dense():
+ # The pointer to receive dense output is the first argument to the
+ # execution engine.
+ arg_pointers = [self.dense_dst_ctype_pointer()] + input_pointers
+ else:
+ # The pointer to receive the sparse tensor output is the last argument
+ # to the execution engine and is a pointer to pointer of char.
+ arg_pointers = input_pointers + [
+ ctypes.pointer(ctypes.pointer(ctypes.c_char(0)))
+ ]
- Args:
- expr_to_info: A dictionary for looking up code generation information for
- expressions.
- """
- op_info = expr_to_info[self].structop_info
- op_name = op_info.dst_name
- op_def = lang.LinalgOpDef(name=op_name)
- op_callable = lang.DefinedOpCallable(op_name, op_def)
+ # Invoke the execution engine to run the module.
+ self._engine.invoke(_ENTRY_NAME, *arg_pointers)
- # Collect the input expression nodes for the structured op.
- expr_inputs = []
- self._visit(
- _gather_structured_op_input,
- (self, expr_to_info, expr_inputs),
- leaf_checker=_is_structured_op_leaf,
- )
+ # Retrieve the result.
+ if self.is_dense():
+ result = runtime.ranked_memref_to_numpy(arg_pointers[0][0])
+ assert isinstance(result, np.ndarray)
+ self._dense_storage = result
+ else:
+ self._set_packed_sparse_tensor(arg_pointers[-1][0])
- # Create a linalg structured op operand for each input expression node and
- # build a dictionary for looking up the information.
- expr_to_input_opnd = {
- e: _emit_structured_op_input(e, expr_to_info, op_def)
- for e in expr_inputs
- }
+ self._assignment = None
+ self._engine = None
- # Emit the expression tree, which produces the value assigned to the
- # destination tensor.
- value = self._emit_expression(expr_to_input_opnd, expr_to_info)
- # Emit the structured op representation for the destination tensor.
- dst_opnd = _emit_operand(op_def, op_info.dst_indices, op_info.dst_name,
- lang.OperandKind.OUTPUT_TENSOR)
- dst_dim_syms = _mlir_dimensions_from_index_vars(op_info.dst_indices)
- dst_use = lang.TensorUse(dst_opnd, dst_dim_syms)
+ def evaluate(self) -> None:
+ """Evaluates the tensor assignment."""
+ self.compile()
+ self.compute()
- expr_info = expr_to_info[self]
- # If the structured op reduces some indices, explicitly represent the
- # reduction. This is done by generating a ReduceFn for the dimensions being
- # reduced in the linalg dialect and calling the function with the value
- # being reduced. We only support add reduction currently.
- if expr_info.reduce_indices:
- reduce_dims = _mlir_dimensions_from_index_vars(expr_info.reduce_indices)
- value = lang.ReduceFn.add[reduce_dims](value)
+ def _sync_value(self) -> None:
+ """Updates the tensor value by evaluating the pending assignment."""
+ if self._assignment is not None:
+ self.evaluate()
- # Emit the assignment as a comprehension in the linalg dialect.
- comp = lang.Comprehension((dst_use, value))
- op_def.comprehensions.append(comp)
+ def mlir_tensor_type(self) -> ir.RankedTensorType:
+ """Returns the MLIR type for the tensor."""
+ mlir_attr = (None if (self._format is None or self.order == 0) else
+ self._format.mlir_tensor_attr())
+ return _mlir_tensor_type(self._dtype, tuple(self._shape), mlir_attr)
- # The structured op in the linalg dialect requires an explicit
- # initialization for the destination tensor. Emit MLIR to initialize the
- # destination tensor.
- init = op_info.emit_tensor_init()
+ def dense_dst_ctype_pointer(self) -> ctypes.pointer:
+ """Returns the ctypes pointer for the pointer to an MemRefDescriptor.
- # Collect MLIR values for the linalg input operands, with the assumption
- # that dictionary preserves the insertion order.
- args = [
- expr_to_info[expr].mlir_value
- for expr, opnd in expr_to_input_opnd.items()
- ]
- # Execute the DefineOpcallable object for the linalg dialect operation to
- # emit MLIR for the linalg structured op.
- expr_info.mlir_value = op_callable(*args, outs=[init])
+ For a dense tensor output, the MLIR compiler allocates the storage for
+ the tensor. This routine returns the pointer to an MLIR MemRefDescriptor for
+ receiving the tensor.
+ """
+ assert self.is_dense()
+ mem_ref_desc = runtime.make_nd_memref_descriptor(
+ self.order, np.ctypeslib.as_ctypes_type(self.dtype.value))()
+ return ctypes.pointer(ctypes.pointer(mem_ref_desc))
- def _identify_structured_ops(
- self,
- expr_to_info: _ExprInfoDict,
- dst: Tensor,
- dst_indices: Tuple[IndexVar, ...],
- ) -> List["IndexExpr"]:
- """Returns expression nodes for the roots of the identified structured ops.
+ def ctype_pointer(self) -> ctypes.pointer:
+ """Returns the ctypes pointer for the pointer to the input tensor."""
+ if self.is_dense():
+ if self._dense_storage is None:
+ self._dense_storage = np.zeros(self._shape, self._dtype.value)
+ return _ctype_pointer_from_array(self._dense_storage)
- A structured op in the linalg dialect only supports reduction performed on
- the whole expression. If the expression tree contains reduction that are
- performed on part of the expression tree, the expression tree needs to be
- implemented with multiple structured ops. This routine identifies all the
- expression nodes that contain reduction as the root of structured ops in the
- linalg dialect.
+ if self.is_unpacked():
+ shape = np.array(self._shape, np.int64)
+ indices = np.array(self._coords, np.int64)
+ values = np.array(self._values, self._dtype.value)
+ perm, sparse = self.format.get_permutation_and_sparsity()
+ ptr = utils.coo_tensor_to_sparse_tensor(shape, values, indices, perm,
+ sparse)
+ else:
+ ptr = self._packed_sparse_value
- Args:
- expr_to_info: A dictionary for looking up code generation information for
- expressions.
- dst: A destination Tensor that accepts the value of the expression tree.
- dst_indices: The indices used by the destination index expression.
+ return ctypes.pointer(ctypes.cast(ptr, ctypes.c_void_p))
- Returns:
- An ordered list of IndexExpr for the root expressions of the structured
- ops, where child expressions go before parent expressions that use their
- results.
- """
- reduce_indices = tuple(
- set(expr_to_info[self].src_indices) - set(dst_indices))
- for reduce_index in reduce_indices:
- _mark_structured_op_root(self, reduce_index, expr_to_info)
+ def get_scalar_value(self) -> _AnyRuntimeType:
+ """Returns the value for the scalar tensor.
- self._visit(_accumulate_reduce_indices, (expr_to_info,))
- structop_roots = []
- self._visit(_gather_structured_op, (expr_to_info, structop_roots))
+ This method also evaluates the assignment to the tensor.
- # Handle the root of the top level expression.
- if not structop_roots or structop_roots[-1] != self:
- # The top level expression is not a reduction. Add the top level
- # expression as a structured op root.
- structop_roots.append(self)
+ Raises:
+ ValueError: If the tensor is not a scalar.
+ """
+ if self.order != 0:
+ raise ValueError(f"Expected a scalar tensor, got: rank={self.order}")
- # Use user specified information for the destination tensor to build an
- # _StructOpInfo for the top level expression.
- expr_to_info[self].structop_info = _StructOpInfo(dst_indices,
- tuple(dst.shape),
- self.dtype(), dst.name,
- dst.format)
+ self._sync_value()
+ return self._dense_storage
- return structop_roots
- def _validate_and_collect_expr_info(
- self,
- dst: Tensor,
- dst_indices: Tuple[IndexVar, ...],
- ) -> _ExprInfoDict:
- """Propagates expression information for validation.
+ def get_coordinates_and_values(
+ self) -> Tuple[List[Tuple[int, ...]], List[_AnyRuntimeType]]:
+ """Returns the coordinates and values for the non-zero elements.
- Propagates the indices used by child expression nodes to parent expression
- nodes. Also collects and validates the sizes for the dimensions
- corresponding to the indices.
+ This method also evaluates the assignment to the tensor and unpack the
+ sparse tensor.
+ """
+ self._sync_value()
- Args:
- dst: A destination Tensor that accepts the value of the expression tree.
- dst_indices: The indices used by the destination index expression.
+ if not self.is_dense():
+ self.unpack()
+ return (self._coords, self._values)
- Raises:
- ValueError if there is any inconsistency in indices or dimensional
- values.
+ if self.order == 0:
+ return ([], self._dense_storage)
- Returns:
- A dictionary of (IndexExpr, _ExprInfo).
- """
- expr_to_info = {}
- # Validate the expression tree and construct expression information.
- self._visit(_validate_and_collect_expr_info, (expr_to_info,))
+ # Coordinates for non-zero elements, grouped by dimensions.
+ coords_by_dims = self._dense_storage.nonzero()
+ # Coordinates for non-zero elements, grouped by elements.
+ coords = np.transpose(coords_by_dims)
+ values = self._dense_storage[coords_by_dims]
+ return (coords, values)
- # Validate the destination dimension information.
- info = expr_to_info[self]
- index_to_dim_info = {i: d for i, d in zip(info.src_indices, info.dim_infos)}
- for i, d, in zip(dst_indices, dst.shape):
- if i not in index_to_dim_info:
- raise ValueError("Destination IndexVar not used in the "
- f"source expression: {i}")
- else:
- if d != index_to_dim_info[i].dim:
- raise ValueError(f"Inconsistent destination dimension for {i}: "
- f"{d} vs {index_to_dim_info[i].dim}")
+ def _record_stats(self, structop: "_StructOpInfo"):
+ """Collects information for temporary tensors."""
+ # Exclude user specified destination tensors.
+ if structop.dst_name == self.name:
+ return
- return expr_to_info
+ self._stats.add_element(structop)
- def _emit_assignment(
- self,
- module: ir.Module,
- dst: Tensor,
- dst_indices: Tuple[IndexVar, ...],
- expr_to_info: _ExprInfoDict,
- input_accesses: List["Access"],
- ) -> None:
- """Emits an MLIR function for assigning the expression to a tensor."""
- input_types = [a.tensor.mlir_tensor_type() for a in input_accesses]
- # Build the kernel for the operations.
- with ir.InsertionPoint(module.body):
+def _emit_operand(op_def: lang.LinalgOpDef, indices: Tuple[IndexVar, ...],
+ name: str, kind: lang.OperandKind) -> lang.OperandDef:
+ """Emits an operand for a tensor access in the current linalg operation.
- @builtin.FuncOp.from_py_func(*input_types, name=_ENTRY_NAME)
- def linalg_funcop(*args):
- # Set up the mapping from the Access nodes to their MLIR values.
- for e, mlir in zip(input_accesses, args):
- expr_to_info[e].mlir_value = mlir
+ Args:
+ op_def: A LinalgOpDef representing the current linalg dialect operation.
+ indices: A tuple of IndexVar used to access the tensor.
+ name: A unique string name of the tensor.
+ kind: An OperandKind for the operand.
- # Emit structured ops in the linalg dialect to implement the assignment.
- for structop_root in self._identify_structured_ops(
- expr_to_info, dst, dst_indices):
- structop_root._emit_structured_op(expr_to_info)
- dst._record_stats(expr_to_info[structop_root].structop_info)
+ Returns:
+ An OperandDef representing the operand.
+ """
+ dim_sym = _mlir_symbols_from_index_vars(indices)
+ opnd = lang.OperandDef(kind, lang.T, dim_sym)
+ op_def.add_operand(name, opnd)
+ return opnd
- # The function returns the MLIR value of the root expression.
- return expr_to_info[self].mlir_value
- linalg_funcop.func_op.attributes[
- "llvm.emit_c_interface"] = ir.UnitAttr.get()
+ at dataclasses.dataclass(frozen=True)
+class _DimInfo:
+ """Information for an operand dimension.
- def get_input_accesses(self) -> List["Access"]:
- """Compute the list of input accesses for the expression."""
- input_accesses = []
- self._visit(_gather_input_accesses_index_vars, (input_accesses,))
- return input_accesses
+ Attributes:
+ dim: An integer for the size of the dimension.
+ mode_format: A ModeFormat for the dimension sparsity.
+ """
+ dim: int
+ mode_format: ModeFormat
- def compile(
- self,
- dst: Tensor,
- dst_indices: Tuple[IndexVar, ...],
- ) -> execution_engine.ExecutionEngine:
- """Compiles the tensor assignment dst[dst_indices] = expression.
- Args:
- dst: The destination tensor.
- dst_indices: The tuple of IndexVar used to access the destination tensor.
+ at dataclasses.dataclass()
+class _ExprInfo:
+ """Expression information for validation and code generation.
- Returns:
- The execution engine for the tensor assignment.
+ Attributes:
+ src_indices: A tuple of IndexVar for the indices used by the tensors in the
+ expression tree.
+ dim_infos: A tuple of _DimInfo, representing the dimension information
+ corresponding to the src_indices.
+ reduce_indices: A set of IndexVar for the indices reduced by the expression.
+ acc_reduce_indices: An accumulated set of IndexVar for the indices reduced
+ by the expression and its children.
+ structop_info: Information to support the code generation for a structured
+ op in the linalg dialect, if the corresponding expression node is the root
+ of a subtree for a structured op.
+ mlir_value: The MLIR value generated for the structured op.
+ """
+ src_indices: Tuple[IndexVar, ...]
+ dim_infos: Tuple[_DimInfo, ...]
+ reduce_indices: Optional[Set[IndexVar]] = None
+ acc_reduce_indices: Optional[Set[IndexVar]] = None
+ structop_info: Optional[_StructOpInfo] = None
+ mlir_value: Optional[ir.Value] = None
- Raises:
- ValueError: If the expression is not proper or not supported.
- """
- expr_to_info = self._validate_and_collect_expr_info(dst, dst_indices)
- input_accesses = self.get_input_accesses()
+ def __post_init__(self) -> None:
+ """Verifies and fix up attribute values.
- # Build and compile the module to produce the execution engine.
- with ir.Context(), ir.Location.unknown():
- module = ir.Module.create()
- self._emit_assignment(module, dst, dst_indices, expr_to_info,
- input_accesses)
- engine = utils.compile_and_build_engine(module)
+ Verifies the consistency of the attributes and modifies the default values
+ to support convenient initializer syntax.
+ """
+ assert len(self.src_indices) == len(self.dim_infos)
+ self.reduce_indices = self.reduce_indices or set()
+ self.acc_reduce_indices = self.acc_reduce_indices or set()
- return engine
@dataclasses.dataclass(frozen=True)
class Access(IndexExpr):
More information about the Mlir-commits
mailing list