[Mlir-commits] [mlir] 1f07853 - [mlir][sparse] introduce sparse_tensor.pack operation

Fri Feb 3 14:30:58 PST 2023

Author: Peiming Liu
Date: 2023-02-03T22:30:52Z
New Revision: 1f07853f2bc5278a3d73dfdd583e7607c82d8be1

URL: https://github.com/llvm/llvm-project/commit/1f07853f2bc5278a3d73dfdd583e7607c82d8be1
DIFF: https://github.com/llvm/llvm-project/commit/1f07853f2bc5278a3d73dfdd583e7607c82d8be1.diff

LOG: [mlir][sparse] introduce sparse_tensor.pack operation

Reviewed By: aartbik

Differential Revision: https://reviews.llvm.org/D143224

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h
    mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
    mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
    mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
    mlir/test/Dialect/SparseTensor/invalid.mlir
    mlir/test/Dialect/SparseTensor/roundtrip.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h
index 777a5b40d6119..3f8862581bd49 100644

--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h
@@ -63,6 +63,12 @@ bool isUniqueCOOType(RankedTensorType tp);
 /// of the tensor.
 unsigned getCOOStart(SparseTensorEncodingAttr enc);
 
+/// Helpers to setup a COO type.
+RankedTensorType getCOOFromTypeWithOrdering(RankedTensorType src,
+                                            AffineMap ordering, bool ordered);
+
+RankedTensorType getCOOFromType(RankedTensorType src, bool ordered);
+
 //
 // Dimension level types.
 //

diff  --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
index 521df943d657c..395015d23d8cd 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
@@ -58,6 +58,47 @@ def SparseTensor_NewOp : SparseTensor_Op<"new", [Pure]>,
   let hasVerifier = 1;
 }
 
+def SparseTensor_PackOp : SparseTensor_Op<"pack">,
+    Arguments<(ins AnyRankedTensor:$data,
+                   AnyRankedTensor:$indices)>,
+    Results<(outs AnySparseTensor: $result)> {
+  let summary = "Returns a sparse tensor from the given (data, indices) pair";
+
+  let description = [{
+    Packs the data/indices into a COO sparse tensor. The coordinates in `indices`
+    shall not exceed the dimension sizes of the returned sparse tensor.
+    Note that the returned tensor must be statically
+    shaped because it is impossible to infer the shape from sparse coordinates.
+
+    `$indices`: stored via a 2-D tensor of integer elements with shape [N, ndims],
+    which specifies the indices of the elements in the sparse tensor that contains
+    non-zero values.
+
+    `$data`: stored via a 1-D tensor with shape [N], that supplies the corresponding
+    values for the indices.
+
+    The operation can be used to materialize a sparse tensor from external sources. E.g.,
+    when passing from Python as two numpy arrays for data and indices.
+
+    Example:
+    ```mlir
+    %data    = arith.constant dense<[ 1     , 5     ]> : tensor<3xf64>
+    %indices = arith.constant dense<[[0,  0],[1,  2]]> : tensor<3x2xindex>
+
+    %st = sparse_tensor.pack %data, %indices : tensor<6xf64>, tensor<6x2xi32
+                                            to tensor<100x100xf64, #COO>
+    // %st = [[1, 0, 0, 0],
+    //        [0, 0, 5, 0],
+    //        [0, 0, 0, 0]]
+    ```
+  }];
+
+  let assemblyFormat = "$data `,` $indices attr-dict `:` type($data) `,` type($indices)"
+                                                   "`to` type($result)";
+
+  let hasVerifier = 1;
+}
+
 def SparseTensor_ConvertOp : SparseTensor_Op<"convert",
   [Pure, SameOperandsAndResultElementType]>,
     Arguments<(ins AnyTensor:$source)>,

diff  --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
index 364c7e7d45962..f61e1f52ce8f9 100644
--- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
+++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
@@ -15,6 +15,7 @@
 #include "mlir/IR/DialectImplementation.h"
 #include "mlir/IR/Matchers.h"
 #include "mlir/IR/OpImplementation.h"
+#include "mlir/IR/PatternMatch.h"
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Support/FormatVariadic.h"
 
@@ -440,6 +441,44 @@ unsigned mlir::sparse_tensor::getCOOStart(SparseTensorEncodingAttr enc) {
   return rank;
 }
 
+// Helpers to setup a COO type.
+RankedTensorType sparse_tensor::getCOOFromTypeWithOrdering(RankedTensorType src,
+                                                           AffineMap ordering,
+                                                           bool ordered) {
+  auto *ctx = src.getContext();
+  auto rank = src.getRank();
+  SmallVector<DimLevelType> dims;
+
+  // An unordered and non-unique compressed dim at beginning.
+  // If this is also the last dimension, then it is unique.
+  dims.push_back(*getDimLevelType(LevelFormat::Compressed, ordered, rank == 1));
+  if (rank > 1) {
+    // TODO: it is actually ordered at the level for ordered input.
+    // Followed by unordered non-unique n-2 singleton levels.
+    std::fill_n(std::back_inserter(dims), rank - 2,
+                *getDimLevelType(LevelFormat::Singleton, ordered, false));
+    // Ends by a unique singleton level unless the tensor rank is 1.
+    dims.push_back(*getDimLevelType(LevelFormat::Singleton, ordered, true));
+  }
+
+  SparseTensorEncodingAttr encSrc = getSparseTensorEncoding(src);
+  // TODO: Maybe pick the bitwidth based on input/output tensors (probably the
+  // largest one among them) in the original operation instead of using the
+  // default value.
+  unsigned pointerBitWidth = encSrc ? encSrc.getPointerBitWidth() : 0;
+  unsigned indexBitWidth = encSrc ? encSrc.getIndexBitWidth() : 0;
+  auto enc = SparseTensorEncodingAttr::get(ctx, dims, ordering, AffineMap(),
+                                           pointerBitWidth, indexBitWidth);
+  return RankedTensorType::get(src.getShape(), src.getElementType(), enc);
+}
+
+RankedTensorType sparse_tensor::getCOOFromType(RankedTensorType src,
+                                               bool ordered) {
+  return getCOOFromTypeWithOrdering(
+      src, AffineMap::getMultiDimIdentityMap(src.getRank(), src.getContext()),
+      ordered);
+}
+
 uint64_t mlir::sparse_tensor::toOrigDim(SparseTensorEncodingAttr enc,
                                         uint64_t d) {
   if (enc) {
@@ -575,6 +614,42 @@ LogicalResult NewOp::verify() {
   return success();
 }
 
+LogicalResult PackOp::verify() {
+  TensorType dataTp = getData().getType(), idxTp = getIndices().getType();
+  TensorType retTp = getResult().getType();
+
+  if (!isUniqueCOOType(retTp.cast<RankedTensorType>()))
+    return emitError("must be packed into a COO tensor");
+
+  if (!retTp.hasStaticShape() || !dataTp.hasStaticShape() ||
+      !idxTp.hasStaticShape())
+    return emitError("all input types must be statically shaped");
+
+  if (dataTp.getRank() != 1 || idxTp.getRank() != 2) {
+    return emitError(
+        "requires rank 1 tensor for value and rank 2 tensor for indices");
+  }
+
+  auto enc = getSparseTensorEncoding(retTp);
+  if (idxTp.getElementType() != enc.getIndexType() ||
+      dataTp.getElementType() != retTp.getElementType())
+    return emitError("unmatched type between input and output");
+
+  auto dNOE = dataTp.getShape()[0];
+  auto iNOE = idxTp.getShape()[0];
+  if (!ShapedType::isDynamic(dNOE) && !ShapedType::isDynamic(iNOE) &&
+      dNOE != iNOE)
+    return emitError("unmatched number of elements in data and indices");
+
+  // A tensor<?xNxi32> for indices means the input COO is rank N
+  auto inRank = idxTp.getShape()[1];
+  auto ouRank = retTp.getRank();
+  if (!ShapedType::isDynamic(inRank) && inRank != ouRank)
+    return emitError("unmatched rank between input and output");
+
+  return success();
+}
+
 LogicalResult ConvertOp::verify() {
   if (auto tp1 = getSource().getType().dyn_cast<RankedTensorType>()) {
     if (auto tp2 = getDest().getType().dyn_cast<RankedTensorType>()) {

diff  --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
index 9cd2331c24d19..27fef5cf0c8d4 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
@@ -151,40 +151,13 @@ static void sizesForTensor(OpBuilder &builder, SmallVectorImpl<Value> &sizes,
 
 // TODO: The dim level property of the COO type relies on input tensors, the
 // shape relies on the output tensor
-// Helpers to setup a COO type.
 static RankedTensorType
 getUnorderedCOOFromTypeWithOrdering(RankedTensorType src, AffineMap ordering) {
-  auto *ctx = src.getContext();
-  auto rank = src.getRank();
-  SmallVector<DimLevelType> dims;
-
-  // An unordered and non-unique compressed dim at beginning.
-  dims.push_back(DimLevelType::CompressedNuNo);
-
-  if (rank > 1) {
-    // TODO: it is actually ordered at the level for ordered input.
-    // Followed by unordered non-unique n-2 singleton levels.
-    std::fill_n(std::back_inserter(dims), rank - 2,
-                DimLevelType::SingletonNuNo);
-    // TODO: only if all the inputs (for concatentate) are unique at the last
-    // level should the COO has a unique level at the end. Ends by a unordered
-    // unique singleton level unless the tensor rank is 1.
-    dims.push_back(DimLevelType::SingletonNo);
-  }
-  SparseTensorEncodingAttr encSrc = getSparseTensorEncoding(src);
-  // TODO: Maybe pick the bitwidth based on input/output tensors (probably the
-  // largest one among them) in the original operation instead of using the
-  // default value.
-  unsigned pointerBitWidth = encSrc ? encSrc.getPointerBitWidth() : 0;
-  unsigned indexBitWidth = encSrc ? encSrc.getIndexBitWidth() : 0;
-  auto enc = SparseTensorEncodingAttr::get(ctx, dims, ordering, AffineMap(),
-                                           pointerBitWidth, indexBitWidth);
-  return RankedTensorType::get(src.getShape(), src.getElementType(), enc);
+  return getCOOFromTypeWithOrdering(src, ordering, false);
 }
 
 static RankedTensorType getUnorderedCOOFromType(RankedTensorType src) {
-  return getUnorderedCOOFromTypeWithOrdering(
-      src, AffineMap::getMultiDimIdentityMap(src.getRank(), src.getContext()));
+  return getCOOFromType(src, false);
 }
 
 /// Collects the dynamic dimension sizes for `tp` with the assumption that

diff  --git a/mlir/test/Dialect/SparseTensor/invalid.mlir b/mlir/test/Dialect/SparseTensor/invalid.mlir
index feb45e184d67a..68d231447c13b 100644
--- a/mlir/test/Dialect/SparseTensor/invalid.mlir
+++ b/mlir/test/Dialect/SparseTensor/invalid.mlir
@@ -8,6 +8,66 @@ func.func @invalid_new_dense(%arg0: !llvm.ptr<i8>) -> tensor<32xf32> {
 
 // -----
 
+#SparseVector = #sparse_tensor.encoding<{dimLevelType = ["compressed"], indexBitWidth=32}>
+
+func.func @non_static_pack_ret(%data: tensor<6xf64>, %index: tensor<6x1xi32>)
+                            -> tensor<?xf64, #SparseVector> {
+  // expected-error at +1 {{all input types must be statically shaped}}
+  %0 = sparse_tensor.pack %data, %index : tensor<6xf64>, tensor<6x1xi32>
+                                       to tensor<?xf64, #SparseVector>
+  return %0 : tensor<?xf64, #SparseVector>
+}
+
+// -----
+
+#SparseVector = #sparse_tensor.encoding<{dimLevelType = ["compressed"], indexBitWidth=32}>
+
+func.func @invalid_pack_data(%data: tensor<6x1xf64>, %index: tensor<6x1xi32>)
+                            -> tensor<100xf64, #SparseVector> {
+  // expected-error at +1 {{requires rank 1 tensor for value and rank 2 tensor for indices}}
+  %0 = sparse_tensor.pack %data, %index : tensor<6x1xf64>, tensor<6x1xi32>
+                                       to tensor<100xf64, #SparseVector>
+  return %0 : tensor<100xf64, #SparseVector>
+}
+
+// -----
+
+#SparseVector = #sparse_tensor.encoding<{dimLevelType = ["compressed"], indexBitWidth=32}>
+
+func.func @invalid_pack_type(%data: tensor<6xf64>, %index: tensor<6x1xi32>)
+                            -> tensor<100xf32, #SparseVector> {
+  // expected-error at +1 {{unmatched type between input and output}}
+  %0 = sparse_tensor.pack %data, %index : tensor<6xf64>, tensor<6x1xi32>
+                                       to tensor<100xf32, #SparseVector>
+  return %0 : tensor<100xf32, #SparseVector>
+}
+
+// -----
+
+#SparseVector = #sparse_tensor.encoding<{dimLevelType = ["compressed"], indexBitWidth=32}>
+
+func.func @invalid_pack_type(%data: tensor<5xf64>, %index: tensor<6x1xi32>)
+                            -> tensor<100xf64, #SparseVector> {
+  // expected-error at +1 {{unmatched number of elements in data and indices}}
+  %0 = sparse_tensor.pack %data, %index : tensor<5xf64>, tensor<6x1xi32>
+                                       to tensor<100xf64, #SparseVector>
+  return %0 : tensor<100xf64, #SparseVector>
+}
+
+// -----
+
+#SparseVector = #sparse_tensor.encoding<{dimLevelType = ["compressed"], indexBitWidth=32}>
+
+func.func @invalid_pack_type(%data: tensor<6xf64>, %index: tensor<6x2xi32>)
+                            -> tensor<100xf64, #SparseVector> {
+  // expected-error at +1 {{unmatched rank between input and output}}
+  %0 = sparse_tensor.pack %data, %index : tensor<6xf64>, tensor<6x2xi32>
+                                       to tensor<100xf64, #SparseVector>
+  return %0 : tensor<100xf64, #SparseVector>
+}
+
+// -----
+
 func.func @invalid_pointers_dense(%arg0: tensor<128xf64>) -> memref<?xindex> {
   // expected-error at +1 {{'sparse_tensor.pointers' op operand #0 must be sparse tensor of any type values, but got 'tensor<128xf64>'}}
   %0 = sparse_tensor.pointers %arg0 { dimension = 0 : index } : tensor<128xf64> to memref<?xindex>

diff  --git a/mlir/test/Dialect/SparseTensor/roundtrip.mlir b/mlir/test/Dialect/SparseTensor/roundtrip.mlir
index 1f48953f95fce..95358f9594d40 100644
--- a/mlir/test/Dialect/SparseTensor/roundtrip.mlir
+++ b/mlir/test/Dialect/SparseTensor/roundtrip.mlir
@@ -13,6 +13,22 @@ func.func @sparse_new(%arg0: !llvm.ptr<i8>) -> tensor<128xf64, #SparseVector> {
 
 // -----
 
+#SparseVector = #sparse_tensor.encoding<{dimLevelType = ["compressed"], indexBitWidth=32}>
+
+// CHECK-LABEL: func @sparse_pack(
+// CHECK-SAME: %[[D:.*]]: tensor<6xf64>,
+// CHECK-SAME: %[[I:.*]]: tensor<6x1xi32>)
+//       CHECK: %[[R:.*]] = sparse_tensor.pack %[[D]], %[[I]]
+//       CHECK: return %[[R]] : tensor<100xf64, #{{.*}}>
+func.func @sparse_pack(%data: tensor<6xf64>, %index: tensor<6x1xi32>)
+                            -> tensor<100xf64, #SparseVector> {
+  %0 = sparse_tensor.pack %data, %index : tensor<6xf64>, tensor<6x1xi32>
+                                       to tensor<100xf64, #SparseVector>
+  return %0 : tensor<100xf64, #SparseVector>
+}
+
+// -----
+
 #SparseMatrix = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}>
 
 // CHECK-LABEL: func @sparse_new_symmetry(