[Mlir-commits] [mlir] 1b43465 - [mlir][sparse] add more dimension level types and properties

Tue Aug 30 10:38:02 PDT 2022

Author: Aart Bik
Date: 2022-08-30T10:37:49-07:00
New Revision: 1b434652c56704be90d01039f4329ea9320bc742

URL: https://github.com/llvm/llvm-project/commit/1b434652c56704be90d01039f4329ea9320bc742
DIFF: https://github.com/llvm/llvm-project/commit/1b434652c56704be90d01039f4329ea9320bc742.diff

LOG: [mlir][sparse] add more dimension level types and properties

We recently removed the singleton dimension level type (see the revision
https://reviews.llvm.org/D131002) since it was unimplemented but also
incomplete (properties were missing). This revision add singleton back as
extra dimension level type, together with properties ordered/not-ordered
and unique/not-unique. Even though still not lowered to actual code, this
provides a complete way of defining many more sparse storage schemes (in
the long run, we want to support even dimension level types and properties
using the additional extensions proposed in [Chou]).

Note that the current solution of using suffixes for the properties is not
ideal, but keeps the extension relatively simple with respect to parsing and
printing. Furthermore, it is rather consistent with the TACO implementation
which uses things like Compressed-Unique as well. Nevertheless, we probably
want to separate dimension level types from properties when we add more types
and properties.

Reviewed By: Peiming

Differential Revision: https://reviews.llvm.org/D132897

Added: 
    

Modified: 
    mlir/include/mlir-c/Dialect/SparseTensor.h
    mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td
    mlir/include/mlir/ExecutionEngine/SparseTensorUtils.h
    mlir/lib/Bindings/Python/DialectSparseTensor.cpp
    mlir/lib/CAPI/Dialect/SparseTensor.cpp
    mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
    mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
    mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
    mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir-c/Dialect/SparseTensor.h b/mlir/include/mlir-c/Dialect/SparseTensor.h
index ac2b8b60fe1fa..9465f36c368ac 100644

--- a/mlir/include/mlir-c/Dialect/SparseTensor.h
+++ b/mlir/include/mlir-c/Dialect/SparseTensor.h
@@ -19,11 +19,8 @@ extern "C" {
 
 MLIR_DECLARE_CAPI_DIALECT_REGISTRATION(SparseTensor, sparse_tensor);
 
-/// Dimension level types that define sparse tensors:
-///   - MLIR_SPARSE_TENSOR_DIM_LEVEL_DENSE - dimension is dense, every
-///   entry is stored
-///   - MLIR_SPARSE_TENSOR_DIM_LEVEL_COMPRESSED - dimension is sparse,
-///   only nonzeros are stored (no duplicates).
+/// Dimension level types (and properties) that define sparse tensors.
+/// See the documentation in SparseTensorAttrDefs.td for their meaning.
 ///
 /// These correspond to SparseTensorEncodingAttr::DimLevelType in the C++ API.
 /// If updating, keep them in sync and update the static_assert in the impl
@@ -31,6 +28,13 @@ MLIR_DECLARE_CAPI_DIALECT_REGISTRATION(SparseTensor, sparse_tensor);
 enum MlirSparseTensorDimLevelType {
   MLIR_SPARSE_TENSOR_DIM_LEVEL_DENSE,
   MLIR_SPARSE_TENSOR_DIM_LEVEL_COMPRESSED,
+  MLIR_SPARSE_TENSOR_DIM_LEVEL_COMPRESSED_NU,
+  MLIR_SPARSE_TENSOR_DIM_LEVEL_COMPRESSED_NO,
+  MLIR_SPARSE_TENSOR_DIM_LEVEL_COMPRESSED_NU_NO,
+  MLIR_SPARSE_TENSOR_DIM_LEVEL_SINGLETON,
+  MLIR_SPARSE_TENSOR_DIM_LEVEL_SINGLETON_NU,
+  MLIR_SPARSE_TENSOR_DIM_LEVEL_SINGLETON_NO,
+  MLIR_SPARSE_TENSOR_DIM_LEVEL_SINGLETON_NU_NO,
 };
 
 //===----------------------------------------------------------------------===//

diff  --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td
index d9c743ea109ac..b2c27df434d88 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.td
@@ -16,7 +16,7 @@ include "mlir/IR/TensorEncoding.td"
 // All of the Tensor attributes will extend this class.
 class SparseTensor_Attr<string name,
                         list<Trait> traits = []>
-	: AttrDef<SparseTensor_Dialect, name, traits>;
+    : AttrDef<SparseTensor_Dialect, name, traits>;
 
 // Sparse tensor encoding attribute.
 def SparseTensorEncodingAttr : SparseTensor_Attr<"SparseTensorEncoding",
@@ -34,9 +34,21 @@ def SparseTensorEncodingAttr : SparseTensor_Attr<"SparseTensorEncoding",
     The attribute consists of the following fields.
     - Dimension level type for each dimension of a tensor type:
         - **dense** : dimension is dense, all entries along this dimension
-	  are stored.
+          are stored
         - **compressed** : dimension is sparse, only nonzeros along this dimensions
-	  are stored, without duplicates, i.e., compressed (unique).
+          are stored
+        - **singleton** : dimension stores individual indices with no siblings
+      By default, each dimension level types has the property of being unique
+      (no duplicates at that level) and ordered (indices appear sorted at that
+      level). The following two suffixes can be used to make the last two
+      dimension level types not-unique (duplicates may appear) and not-ordered
+      (indices may appear unsorted).
+        - **-nu** : not unique
+        - **-no** : not ordered
+      Currently, these suffixes, is present, should appear in this order.
+      In the future, we may introduce many more dimension level types and
+      properties, and separate specifying the two completely rather than
+      using this suffix mechanism.
     - Dimension ordering on the indices of this tensor type. Unlike dense
       storage, most sparse storage schemes do not provide fast random access.
       This affine map specifies the order of dimensions that should be supported
@@ -62,6 +74,12 @@ def SparseTensorEncodingAttr : SparseTensor_Attr<"SparseTensorEncoding",
     }>
     ... tensor<?xf32, #SparseVector> ...
 
+    // Sorted Coordinate Scheme.
+    #SortedCOO = #sparse_tensor.encoding<{
+      dimLevelType = [ "compressed-nu", "singleton" ]
+    }>
+    ... tensor<?x?xf64, #SortedCOO> ...
+
     // Doubly compressed sparse column storage with specific bitwidths.
     #DCSC = #sparse_tensor.encoding<{
       dimLevelType = [ "compressed", "compressed" ],
@@ -76,10 +94,10 @@ def SparseTensorEncodingAttr : SparseTensor_Attr<"SparseTensorEncoding",
   // Data in sparse tensor encoding.
   let parameters = (
     ins
-    // A dimension level type for each dimension of a tensor type.
+    // A dimension level type for each dimension of the tensor type.
     ArrayRefParameter<
       "SparseTensorEncodingAttr::DimLevelType",
-      "Per-dimension level type (dense or compressed)"
+      "per dimension level type"
       >: $dimLevelType,
     // A dimension order on the indices of this tensor type.
     // TODO: block structure with higher-dim inputs
@@ -94,9 +112,16 @@ def SparseTensorEncodingAttr : SparseTensor_Attr<"SparseTensorEncoding",
   let hasCustomAssemblyFormat = 1;
 
   let extraClassDeclaration = [{
-    // Dimension level types.
+    // Dimension level types. By default, each type has the unique and
+    // ordered properties. Alternatives properties are indicated by
+    // Nu (not-unique) and No (not-ordered).
+    //
+    // TODO: separate type and property in encoding
+    //
     enum class DimLevelType {
-      Dense, Compressed
+      Dense,
+      Compressed, CompressedNu, CompressedNo, CompressedNuNo,
+      Singleton, SingletonNu, SingletonNo, SingletonNuNo,
     };
   }];
 }

diff  --git a/mlir/include/mlir/ExecutionEngine/SparseTensorUtils.h b/mlir/include/mlir/ExecutionEngine/SparseTensorUtils.h
index 2ed0002246544..4ea6697cfeb55 100644
--- a/mlir/include/mlir/ExecutionEngine/SparseTensorUtils.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensorUtils.h
@@ -109,7 +109,7 @@ enum class Action : uint32_t {
   kSparseToSparse = 3,
   kEmptyCOO = 4,
   kToCOO = 5,
-  kToIterator = 6
+  kToIterator = 6,
 };
 
 /// This enum mimics `SparseTensorEncodingAttr::DimLevelType` for
@@ -118,7 +118,13 @@ enum class Action : uint32_t {
 enum class DimLevelType : uint8_t {
   kDense = 0,
   kCompressed = 1,
-  kSingleton = 2
+  kCompressedNu = 2,
+  kCompressedNo = 3,
+  kCompressedNuNo = 4,
+  kSingleton = 5,
+  kSingletonNu = 6,
+  kSingletonNo = 7,
+  kSingletonNuNo = 8,
 };
 
 //===----------------------------------------------------------------------===//

diff  --git a/mlir/lib/Bindings/Python/DialectSparseTensor.cpp b/mlir/lib/Bindings/Python/DialectSparseTensor.cpp
index 49b4a8998053e..ae9cfbb674264 100644
--- a/mlir/lib/Bindings/Python/DialectSparseTensor.cpp
+++ b/mlir/lib/Bindings/Python/DialectSparseTensor.cpp
@@ -18,7 +18,14 @@ using namespace mlir::python::adaptors;
 static void populateDialectSparseTensorSubmodule(const py::module &m) {
   py::enum_<MlirSparseTensorDimLevelType>(m, "DimLevelType", py::module_local())
       .value("dense", MLIR_SPARSE_TENSOR_DIM_LEVEL_DENSE)
-      .value("compressed", MLIR_SPARSE_TENSOR_DIM_LEVEL_COMPRESSED);
+      .value("compressed", MLIR_SPARSE_TENSOR_DIM_LEVEL_COMPRESSED)
+      .value("compressed-nu", MLIR_SPARSE_TENSOR_DIM_LEVEL_COMPRESSED_NU)
+      .value("compressed-no", MLIR_SPARSE_TENSOR_DIM_LEVEL_COMPRESSED_NO)
+      .value("compressed-nu-no", MLIR_SPARSE_TENSOR_DIM_LEVEL_COMPRESSED_NU_NO)
+      .value("singleton", MLIR_SPARSE_TENSOR_DIM_LEVEL_SINGLETON)
+      .value("singleton-nu", MLIR_SPARSE_TENSOR_DIM_LEVEL_SINGLETON_NU)
+      .value("singleton-no", MLIR_SPARSE_TENSOR_DIM_LEVEL_SINGLETON_NO)
+      .value("singleton-nu-no", MLIR_SPARSE_TENSOR_DIM_LEVEL_SINGLETON_NU_NO);
 
   mlir_attribute_subclass(m, "EncodingAttr",
                           mlirAttributeIsASparseTensorEncodingAttr)

diff  --git a/mlir/lib/CAPI/Dialect/SparseTensor.cpp b/mlir/lib/CAPI/Dialect/SparseTensor.cpp
index b7b2fd5d8e5c2..e3e32900c0a28 100644
--- a/mlir/lib/CAPI/Dialect/SparseTensor.cpp
+++ b/mlir/lib/CAPI/Dialect/SparseTensor.cpp
@@ -25,7 +25,28 @@ static_assert(
             static_cast<int>(SparseTensorEncodingAttr::DimLevelType::Dense) &&
         static_cast<int>(MLIR_SPARSE_TENSOR_DIM_LEVEL_COMPRESSED) ==
             static_cast<int>(
-                SparseTensorEncodingAttr::DimLevelType::Compressed),
+                SparseTensorEncodingAttr::DimLevelType::Compressed) &&
+        static_cast<int>(MLIR_SPARSE_TENSOR_DIM_LEVEL_COMPRESSED_NU) ==
+            static_cast<int>(
+                SparseTensorEncodingAttr::DimLevelType::CompressedNu) &&
+        static_cast<int>(MLIR_SPARSE_TENSOR_DIM_LEVEL_COMPRESSED_NO) ==
+            static_cast<int>(
+                SparseTensorEncodingAttr::DimLevelType::CompressedNo) &&
+        static_cast<int>(MLIR_SPARSE_TENSOR_DIM_LEVEL_COMPRESSED_NU_NO) ==
+            static_cast<int>(
+                SparseTensorEncodingAttr::DimLevelType::CompressedNuNo) &&
+        static_cast<int>(MLIR_SPARSE_TENSOR_DIM_LEVEL_SINGLETON) ==
+            static_cast<int>(
+                SparseTensorEncodingAttr::DimLevelType::Singleton) &&
+        static_cast<int>(MLIR_SPARSE_TENSOR_DIM_LEVEL_SINGLETON_NU) ==
+            static_cast<int>(
+                SparseTensorEncodingAttr::DimLevelType::SingletonNu) &&
+        static_cast<int>(MLIR_SPARSE_TENSOR_DIM_LEVEL_SINGLETON_NO) ==
+            static_cast<int>(
+                SparseTensorEncodingAttr::DimLevelType::SingletonNo) &&
+        static_cast<int>(MLIR_SPARSE_TENSOR_DIM_LEVEL_SINGLETON_NU_NO) ==
+            static_cast<int>(
+                SparseTensorEncodingAttr::DimLevelType::SingletonNuNo),
     "MlirSparseTensorDimLevelType (C-API) and DimLevelType (C++) mismatch");
 
 bool mlirAttributeIsASparseTensorEncodingAttr(MlirAttribute attr) {

diff  --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
index 2ff7e1367061a..8691b94351f9f 100644
--- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
+++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
@@ -72,6 +72,20 @@ Attribute SparseTensorEncodingAttr::parse(AsmParser &parser, Type type) {
           dlt.push_back(SparseTensorEncodingAttr::DimLevelType::Dense);
         } else if (strVal == "compressed") {
           dlt.push_back(SparseTensorEncodingAttr::DimLevelType::Compressed);
+        } else if (strVal == "compressed-nu") {
+          dlt.push_back(SparseTensorEncodingAttr::DimLevelType::CompressedNu);
+        } else if (strVal == "compressed-no") {
+          dlt.push_back(SparseTensorEncodingAttr::DimLevelType::CompressedNo);
+        } else if (strVal == "compressed-nu-no") {
+          dlt.push_back(SparseTensorEncodingAttr::DimLevelType::CompressedNuNo);
+        } else if (strVal == "singleton") {
+          dlt.push_back(SparseTensorEncodingAttr::DimLevelType::Singleton);
+        } else if (strVal == "singleton-nu") {
+          dlt.push_back(SparseTensorEncodingAttr::DimLevelType::SingletonNu);
+        } else if (strVal == "singleton-no") {
+          dlt.push_back(SparseTensorEncodingAttr::DimLevelType::SingletonNo);
+        } else if (strVal == "singleton-nu-no") {
+          dlt.push_back(SparseTensorEncodingAttr::DimLevelType::SingletonNuNo);
         } else {
           parser.emitError(parser.getNameLoc(),
                            "unexpected dimension level type: ")
@@ -125,6 +139,27 @@ void SparseTensorEncodingAttr::print(AsmPrinter &printer) const {
     case DimLevelType::Compressed:
       printer << "\"compressed\"";
       break;
+    case DimLevelType::CompressedNu:
+      printer << "\"compressed-nu\"";
+      break;
+    case DimLevelType::CompressedNo:
+      printer << "\"compressed-no\"";
+      break;
+    case DimLevelType::CompressedNuNo:
+      printer << "\"compressed-nu-no\"";
+      break;
+    case DimLevelType::Singleton:
+      printer << "\"singleton\"";
+      break;
+    case DimLevelType::SingletonNu:
+      printer << "\"singleton-nu\"";
+      break;
+    case DimLevelType::SingletonNo:
+      printer << "\"singleton-no\"";
+      break;
+    case DimLevelType::SingletonNuNo:
+      printer << "\"singleton-nu-no\"";
+      break;
     }
     if (i != e - 1)
       printer << ", ";

diff  --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
index d30a81dd4eaa5..9f9bd918c9c82 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
@@ -148,6 +148,20 @@ DimLevelType mlir::sparse_tensor::dimLevelTypeEncoding(
     return DimLevelType::kDense;
   case SparseTensorEncodingAttr::DimLevelType::Compressed:
     return DimLevelType::kCompressed;
+  case SparseTensorEncodingAttr::DimLevelType::CompressedNu:
+    return DimLevelType::kCompressedNu;
+  case SparseTensorEncodingAttr::DimLevelType::CompressedNo:
+    return DimLevelType::kCompressedNo;
+  case SparseTensorEncodingAttr::DimLevelType::CompressedNuNo:
+    return DimLevelType::kCompressedNuNo;
+  case SparseTensorEncodingAttr::DimLevelType::Singleton:
+    return DimLevelType::kSingleton;
+  case SparseTensorEncodingAttr::DimLevelType::SingletonNu:
+    return DimLevelType::kSingletonNu;
+  case SparseTensorEncodingAttr::DimLevelType::SingletonNo:
+    return DimLevelType::kSingletonNo;
+  case SparseTensorEncodingAttr::DimLevelType::SingletonNuNo:
+    return DimLevelType::kSingletonNuNo;
   }
   llvm_unreachable("Unknown SparseTensorEncodingAttr::DimLevelType");
 }

diff  --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
index 5bd10e89caa95..7fab9b34fa47b 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
@@ -466,6 +466,8 @@ static bool canUseDirectConversion(
       if (alreadyCompressed)
         return false; // Dense after Compressed not yet supported.
       break;
+    default: // TODO: investigate
+      return false;
     }
   }
   return true;

diff  --git a/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir b/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir
index 33cec89cde756..39faa506fed86 100644
--- a/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir
+++ b/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir
@@ -42,3 +42,23 @@ func.func private @sparse_csc(tensor<?x?xf32, #CSC>)
 // CHECK-LABEL: func private @sparse_dcsc(
 // CHECK-SAME: tensor<?x?xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ], dimOrdering = affine_map<(d0, d1) -> (d1, d0)>, indexBitWidth = 64 }>>)
 func.func private @sparse_dcsc(tensor<?x?xf32, #DCSC>)
+
+// -----
+
+#COO = #sparse_tensor.encoding<{
+  dimLevelType = [ "compressed-nu-no", "singleton-no" ]
+}>
+
+// CHECK-LABEL: func private @sparse_coo(
+// CHECK-SAME: tensor<?x?xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed-nu-no", "singleton-no" ] }>>)
+func.func private @sparse_coo(tensor<?x?xf32, #COO>)
+
+// -----
+
+#SortedCOO = #sparse_tensor.encoding<{
+  dimLevelType = [ "compressed-nu", "singleton" ]
+}>
+
+// CHECK-LABEL: func private @sparse_sorted_coo(
+// CHECK-SAME: tensor<10x10xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed-nu", "singleton" ] }>>)
+func.func private @sparse_sorted_coo(tensor<10x10xf64, #SortedCOO>)