[Mlir-commits] [mlir] [mlir][sparse] update doc of sparse tensor storage-layout/descriptor (PR #71249)

Fri Nov 3 16:08:15 PDT 2023

https://github.com/aartbik created https://github.com/llvm/llvm-project/pull/71249

This prepares actual "direct IR codegen" for loose compressed and 2:4. Also bit of cleanup of stale TODOs

>From a1555584f11fb1df7387ec6041fe85dd35bb1008 Mon Sep 17 00:00:00 2001
From: Aart Bik <ajcbik at google.com>
Date: Fri, 3 Nov 2023 16:06:22 -0700
Subject: [PATCH] [mlir][sparse] update doc of sparse tensor
 storage-layout/descriptor

This prepares actual "direct IR codegen" for loose compressed and 2:4.
Also bit of cleanup of stale TODOs
---
 .../IR/SparseTensorStorageLayout.h            | 54 +++++++++----------
 .../Transforms/SparseTensorDescriptor.cpp     |  2 +-
 .../Transforms/SparseTensorDescriptor.h       | 17 +-----
 3 files changed, 29 insertions(+), 44 deletions(-)

diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorStorageLayout.h b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorStorageLayout.h
index b7ce44d877eb29c..493038a6623c3a1 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorStorageLayout.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorStorageLayout.h
@@ -21,19 +21,22 @@ namespace sparse_tensor {
 
 ///===----------------------------------------------------------------------===//
 /// The sparse tensor storage scheme for a tensor is organized as a single
-/// compound type with the following fields. Note that every memref with ? size
-/// actually behaves as a "vector", i.e. the stored size is the capacity and the
-/// used size resides in the storage_specifier struct.
+/// compound type with the following fields. Note that every memref with `?`
+/// size actually behaves as a "vector", i.e. the stored size is the capacity
+/// and the used size resides in the storage_specifier struct.
 ///
 /// struct {
 ///   ; per-level l:
 ///   ;  if dense:
 ///        <nothing>
-///   ;  if compresed:
-///        memref<? x pos>  positions-l   ; positions for sparse level l
-///        memref<? x crd>  coordinates-l ; coordinates for sparse level l
-///   ;  if singleton:
-///        memref<? x crd>  coordinates-l ; coordinates for singleton level l
+///   ;  if compressed:
+///        memref<? x pos>  positions   ; positions for level l
+///        memref<? x crd>  coordinates ; coordinates for level l
+///   ;  if loose-compressed:
+///        memref<? x pos>  positions   ; lo/hi position pairs for level l
+///        memref<? x crd>  coordinates ; coordinates for level l
+///   ;  if singleton/2-out-of-4:
+///        memref<? x crd>  coordinates ; coordinates for level l
 ///
 ///   memref<? x eltType> values        ; values
 ///
@@ -59,25 +62,25 @@ namespace sparse_tensor {
 /// Examples.
 ///
 /// #CSR storage of 2-dim matrix yields
-///   memref<?xindex>                           ; positions-1
-///   memref<?xindex>                           ; coordinates-1
-///   memref<?xf64>                             ; values
-///   struct<(array<2 x i64>, array<3 x i64>)>) ; lvl0, lvl1, 3xsizes
+///  memref<?xindex>                           ; positions-1
+///  memref<?xindex>                           ; coordinates-1
+///  memref<?xf64>                             ; values
+///  struct<(array<2 x i64>, array<3 x i64>)>) ; lvl0, lvl1, 3xsizes
 ///
 /// #COO storage of 2-dim matrix yields
-///   memref<?xindex>,                          ; positions-0, essentially
-///   [0,sz] memref<?xindex>                           ; AOS coordinates storage
-///   memref<?xf64>                             ; values
-///   struct<(array<2 x i64>, array<3 x i64>)>) ; lvl0, lvl1, 3xsizes
+///  memref<?xindex>,                          ; positions-0, essentially [0,sz]
+///  memref<?xindex>                           ; AOS coordinates storage
+///  memref<?xf64>                             ; values
+///  struct<(array<2 x i64>, array<3 x i64>)>) ; lvl0, lvl1, 3xsizes
 ///
 /// Slice on #COO storage of 2-dim matrix yields
-///   ;; Inherited from the original sparse tensors
-///   memref<?xindex>,                          ; positions-0, essentially
-///   [0,sz] memref<?xindex>                           ; AOS coordinates storage
-///   memref<?xf64>                             ; values
-///   struct<(array<2 x i64>, array<3 x i64>,   ; lvl0, lvl1, 3xsizes
-///   ;; Extra slicing-metadata
-///           array<2 x i64>, array<2 x i64>)>) ; dim offset, dim stride.
+///  ;; Inherited from the original sparse tensors
+///  memref<?xindex>,                          ; positions-0, essentially   [0,sz]
+///  memref<?xindex>                           ; AOS coordinates storage
+///  memref<?xf64>                             ; values
+///  struct<(array<2 x i64>, array<3 x i64>,   ; lvl0, lvl1, 3xsizes
+///  ;; Extra slicing-metadata
+///          array<2 x i64>, array<2 x i64>)>) ; dim offset, dim stride.
 ///
 ///===----------------------------------------------------------------------===//
 
@@ -107,9 +110,6 @@ using FieldIndex = unsigned;
 /// encoding.
 class StorageLayout {
 public:
-  // TODO: Functions/methods marked with [NUMFIELDS] should use
-  // `FieldIndex` for their return type, via the same reasoning for why
-  // `Dimension`/`Level` are used both for identifiers and ranks.
   explicit StorageLayout(const SparseTensorType &stt)
       : StorageLayout(stt.getEncoding()) {}
   explicit StorageLayout(SparseTensorEncodingAttr enc) : enc(enc) {
@@ -154,12 +154,10 @@ class StorageLayout {
 // Wrapper functions to invoke StorageLayout-related method.
 //
 
-// See note [NUMFIELDS].
 inline unsigned getNumFieldsFromEncoding(SparseTensorEncodingAttr enc) {
   return StorageLayout(enc).getNumFields();
 }
 
-// See note [NUMFIELDS].
 inline unsigned getNumDataFieldsFromEncoding(SparseTensorEncodingAttr enc) {
   return StorageLayout(enc).getNumDataFields();
 }
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorDescriptor.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorDescriptor.cpp
index 5c363b0c781d5b9..455538954ab8f42 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorDescriptor.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorDescriptor.cpp
@@ -1,4 +1,4 @@
-//===- SparseTensorStorageLayout.cpp --------------------------------------===//
+//===- SparseTensorDescriptor.cpp -----------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorDescriptor.h b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorDescriptor.h
index cf7532a522fa7ad..4bd700eef522e04 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorDescriptor.h
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorDescriptor.h
@@ -1,4 +1,4 @@
-//===- SparseTensorDescriptor.h ------------------------------*- C++ -*-===//
+//===- SparseTensorDescriptor.h ---------------------------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -21,13 +21,6 @@
 namespace mlir {
 namespace sparse_tensor {
 
-//===----------------------------------------------------------------------===//
-// SparseTensorDescriptor and helpers that manage the sparse tensor memory
-// layout scheme during "direct code generation" (i.e. when sparsification
-// generates the buffers as part of actual IR, in constrast with the library
-// approach where data structures are hidden behind opaque pointers).
-//===----------------------------------------------------------------------===//
-
 class SparseTensorSpecifier {
 public:
   explicit SparseTensorSpecifier(Value specifier)
@@ -57,9 +50,6 @@ class SparseTensorSpecifier {
 template <typename ValueArrayRef>
 class SparseTensorDescriptorImpl {
 protected:
-  // TODO: Functions/methods marked with [NUMFIELDS] might should use
-  // `FieldIndex` for their return type, via the same reasoning for why
-  // `Dimension`/`Level` are used both for identifiers and ranks.
   SparseTensorDescriptorImpl(SparseTensorType stt, ValueArrayRef fields)
       : rType(stt), fields(fields), layout(stt) {
     assert(layout.getNumFields() == getNumFields());
@@ -76,7 +66,6 @@ class SparseTensorDescriptorImpl {
     return layout.getMemRefFieldIndex(kind, lvl);
   }
 
-  // TODO: See note [NUMFIELDS].
   unsigned getNumFields() const { return fields.size(); }
 
   ///
@@ -140,8 +129,7 @@ class SparseTensorDescriptorImpl {
   }
 
   ValueRange getMemRefFields() const {
-    // Drop the last metadata fields.
-    return fields.drop_back();
+    return fields.drop_back(); // drop the last metadata fields
   }
 
   std::pair<FieldIndex, unsigned> getCrdMemRefIndexAndStride(Level lvl) const {
@@ -173,7 +161,6 @@ class SparseTensorDescriptor : public SparseTensorDescriptorImpl<ValueRange> {
   Value getCrdMemRefOrView(OpBuilder &builder, Location loc, Level lvl) const;
 };
 
-/// Uses SmallVectorImpl<Value> & for mutable descriptors.
 /// Using SmallVector for mutable descriptor allows users to reuse it as a
 /// tmp buffers to append value for some special cases, though users should
 /// be responsible to restore the buffer to legal states after their use. It