[Mlir-commits] [mlir] 4c9ee14 - [mlir][sparse] refactoring SparseTensorUtils: (2 of 4) reordering
wren romano
llvmlistbot at llvm.org
Thu Sep 29 14:42:20 PDT 2022
Author: wren romano
Date: 2022-09-29T14:42:07-07:00
New Revision: 4c9ee145c03cd87ad2f838d19919c254907172ed
URL: https://github.com/llvm/llvm-project/commit/4c9ee145c03cd87ad2f838d19919c254907172ed
DIFF: https://github.com/llvm/llvm-project/commit/4c9ee145c03cd87ad2f838d19919c254907172ed.diff
LOG: [mlir][sparse] refactoring SparseTensorUtils: (2 of 4) reordering
Previously, the SparseTensorUtils.cpp library contained a C++ core implementation, but hid it in an anonymous namespace and only exposed a C-API for accessing it. Now we are factoring out that C++ core into a standalone C++ library so that it can be used directly by downstream clients (per request of one such client). This refactoring has been decomposed into a stack of differentials in order to simplify the code review process, however the full stack of changes should be considered together.
* D133462: Part 1: split one file into several
* (this): Part 2: Reorder chunks within files
* D133831: Part 3: General code cleanup
* D133833: Part 4: Update documentation
This part moves chunks of code within files, but again aims to make no other changes. Many of these movements are part of a stylistic shift to reorder the components of class definitions as follows: data members, ctors/factories, getters, other public methods, private methods.
Depends On D133462
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D133830
Added:
Modified:
mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h
mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
mlir/lib/ExecutionEngine/SparseTensorUtils.cpp
Removed:
################################################################################
diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h
index 93cf92c8fd3e3..85c57b6c613d2 100644
--- a/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/COO.h
@@ -54,7 +54,7 @@ using ElementConsumer =
/// by indices before passing it back to the client (most packed storage
/// formats require the elements to appear in lexicographic index order).
template <typename V>
-struct SparseTensorCOO final {
+class SparseTensorCOO final {
public:
SparseTensorCOO(const std::vector<uint64_t> &dimSizes, uint64_t capacity)
: dimSizes(dimSizes) {
@@ -64,6 +64,33 @@ struct SparseTensorCOO final {
}
}
+ /// Factory method. Permutes the original dimensions according to
+ /// the given ordering and expects subsequent add() calls to honor
+ /// that same ordering for the given indices. The result is a
+ /// fully permuted coordinate scheme.
+ ///
+ /// Precondition: `dimSizes` and `perm` must be valid for `rank`.
+ static SparseTensorCOO<V> *newSparseTensorCOO(uint64_t rank,
+ const uint64_t *dimSizes,
+ const uint64_t *perm,
+ uint64_t capacity = 0) {
+ std::vector<uint64_t> permsz(rank);
+ for (uint64_t r = 0; r < rank; r++) {
+ assert(dimSizes[r] > 0 && "Dimension size zero has trivial storage");
+ permsz[perm[r]] = dimSizes[r];
+ }
+ return new SparseTensorCOO<V>(permsz, capacity);
+ }
+
+ /// Get the rank of the tensor.
+ uint64_t getRank() const { return dimSizes.size(); }
+
+ /// Getter for the dimension-sizes array.
+ const std::vector<uint64_t> &getDimSizes() const { return dimSizes; }
+
+ /// Getter for the elements array.
+ const std::vector<Element<V>> &getElements() const { return elements; }
+
/// Adds element as indices and value.
void add(const std::vector<uint64_t> &ind, V val) {
assert(!iteratorLocked && "Attempt to add() after startIterator()");
@@ -107,15 +134,6 @@ struct SparseTensorCOO final {
});
}
- /// Get the rank of the tensor.
- uint64_t getRank() const { return dimSizes.size(); }
-
- /// Getter for the dimension-sizes array.
- const std::vector<uint64_t> &getDimSizes() const { return dimSizes; }
-
- /// Getter for the elements array.
- const std::vector<Element<V>> &getElements() const { return elements; }
-
/// Switch into iterator mode.
void startIterator() {
iteratorLocked = true;
@@ -131,24 +149,6 @@ struct SparseTensorCOO final {
return nullptr;
}
- /// Factory method. Permutes the original dimensions according to
- /// the given ordering and expects subsequent add() calls to honor
- /// that same ordering for the given indices. The result is a
- /// fully permuted coordinate scheme.
- ///
- /// Precondition: `dimSizes` and `perm` must be valid for `rank`.
- static SparseTensorCOO<V> *newSparseTensorCOO(uint64_t rank,
- const uint64_t *dimSizes,
- const uint64_t *perm,
- uint64_t capacity = 0) {
- std::vector<uint64_t> permsz(rank);
- for (uint64_t r = 0; r < rank; r++) {
- assert(dimSizes[r] > 0 && "Dimension size zero has trivial storage");
- permsz[perm[r]] = dimSizes[r];
- }
- return new SparseTensorCOO<V>(permsz, capacity);
- }
-
private:
const std::vector<uint64_t> dimSizes; // per-dimension sizes
std::vector<Element<V>> elements; // all COO elements
diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
index 281ea2aa1cb69..462a945eedeef 100644
--- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
@@ -41,31 +41,6 @@
namespace mlir {
namespace sparse_tensor {
-namespace detail {
-
-// TODO: try to unify this with `SparseTensorFile::assertMatchesShape`
-// which is used by `openSparseTensorCOO`. It's easy enough to resolve
-// the `std::vector` vs pointer mismatch for `dimSizes`; but it's trickier
-// to resolve the presence/absence of `perm` (without introducing extra
-// overhead), so perhaps the code duplication is unavoidable.
-//
-/// Asserts that the `dimSizes` (in target-order) under the `perm` (mapping
-/// semantic-order to target-order) are a refinement of the desired `shape`
-/// (in semantic-order).
-///
-/// Precondition: `perm` and `shape` must be valid for `rank`.
-inline void assertPermutedSizesMatchShape(const std::vector<uint64_t> &dimSizes,
- uint64_t rank, const uint64_t *perm,
- const uint64_t *shape) {
- assert(perm && shape);
- assert(rank == dimSizes.size() && "Rank mismatch");
- for (uint64_t r = 0; r < rank; r++)
- assert((shape[r] == 0 || shape[r] == dimSizes[perm[r]]) &&
- "Dimension size mismatch");
-}
-
-} // namespace detail
-
// This forward decl is sufficient to split `SparseTensorStorageBase` into
// its own header, but isn't sufficient for `SparseTensorStorage` to join it.
template <typename V>
@@ -78,6 +53,16 @@ class SparseTensorEnumeratorBase;
/// specialization, which the C-API relies on to catch type errors
/// arising from our use of opaque pointers.
class SparseTensorStorageBase {
+protected:
+ // Since this class is virtual, we must disallow public copying in
+ // order to avoid "slicing". Since this class has data members,
+ // that means making copying protected.
+ // <https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md#Rc-copy-virtual>
+ SparseTensorStorageBase(const SparseTensorStorageBase &) = default;
+ // Copy-assignment would be implicitly deleted (because `dimSizes`
+ // is const), so we explicitly delete it for clarity.
+ SparseTensorStorageBase &operator=(const SparseTensorStorageBase &) = delete;
+
public:
/// Constructs a new storage object. The `perm` maps the tensor's
/// semantic-ordering of dimensions to this object's storage-order.
@@ -206,16 +191,6 @@ class SparseTensorStorageBase {
/// Finishes insertion.
virtual void endInsert() = 0;
-protected:
- // Since this class is virtual, we must disallow public copying in
- // order to avoid "slicing". Since this class has data members,
- // that means making copying protected.
- // <https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md#Rc-copy-virtual>
- SparseTensorStorageBase(const SparseTensorStorageBase &) = default;
- // Copy-assignment would be implicitly deleted (because `dimSizes`
- // is const), so we explicitly delete it for clarity.
- SparseTensorStorageBase &operator=(const SparseTensorStorageBase &) = delete;
-
private:
const std::vector<uint64_t> dimSizes;
std::vector<uint64_t> rev;
@@ -254,47 +229,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
/// Precondition: `perm` and `sparsity` must be valid for `dimSizes.size()`.
SparseTensorStorage(const std::vector<uint64_t> &dimSizes,
const uint64_t *perm, const DimLevelType *sparsity,
- SparseTensorCOO<V> *coo)
- : SparseTensorStorage(dimSizes, perm, sparsity) {
- // Provide hints on capacity of pointers and indices.
- // TODO: needs much fine-tuning based on actual sparsity; currently
- // we reserve pointer/index space based on all previous dense
- // dimensions, which works well up to first sparse dim; but
- // we should really use nnz and dense/sparse distribution.
- bool allDense = true;
- uint64_t sz = 1;
- for (uint64_t r = 0, rank = getRank(); r < rank; r++) {
- if (isCompressedDim(r)) {
- // TODO: Take a parameter between 1 and `dimSizes[r]`, and multiply
- // `sz` by that before reserving. (For now we just use 1.)
- pointers[r].reserve(sz + 1);
- pointers[r].push_back(0);
- indices[r].reserve(sz);
- sz = 1;
- allDense = false;
- } else if (isSingletonDim(r)) {
- indices[r].reserve(sz);
- sz = 1;
- allDense = false;
- } else { // Dense dimension.
- assert(isDenseDim(r));
- sz = detail::checkedMul(sz, getDimSizes()[r]);
- }
- }
- // Then assign contents from coordinate scheme tensor if provided.
- if (coo) {
- // Ensure both preconditions of `fromCOO`.
- assert(coo->getDimSizes() == getDimSizes() && "Tensor size mismatch");
- coo->sort();
- // Now actually insert the `elements`.
- const std::vector<Element<V>> &elements = coo->getElements();
- uint64_t nnz = elements.size();
- values.reserve(nnz);
- fromCOO(elements, 0, nnz, 0);
- } else if (allDense) {
- values.resize(sz, 0);
- }
- }
+ SparseTensorCOO<V> *coo);
/// Constructs a sparse tensor storage scheme with the given dimensions,
/// permutation, and per-dimension dense/sparse annotations, using
@@ -307,6 +242,29 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
const uint64_t *perm, const DimLevelType *sparsity,
const SparseTensorStorageBase &tensor);
+ /// Factory method. Constructs a sparse tensor storage scheme with the given
+ /// dimensions, permutation, and per-dimension dense/sparse annotations,
+ /// using the coordinate scheme tensor for the initial contents if provided.
+ /// In the latter case, the coordinate scheme must respect the same
+ /// permutation as is desired for the new sparse tensor storage.
+ ///
+ /// Precondition: `shape`, `perm`, and `sparsity` must be valid for `rank`.
+ static SparseTensorStorage<P, I, V> *
+ newSparseTensor(uint64_t rank, const uint64_t *shape, const uint64_t *perm,
+ const DimLevelType *sparsity, SparseTensorCOO<V> *coo);
+
+ /// Factory method. Constructs a sparse tensor storage scheme with
+ /// the given dimensions, permutation, and per-dimension dense/sparse
+ /// annotations, using the sparse tensor for the initial contents.
+ ///
+ /// Preconditions:
+ /// * `shape`, `perm`, and `sparsity` must be valid for `rank`.
+ /// * The `tensor` must have the same value type `V`.
+ static SparseTensorStorage<P, I, V> *
+ newSparseTensor(uint64_t rank, const uint64_t *shape, const uint64_t *perm,
+ const DimLevelType *sparsity,
+ const SparseTensorStorageBase *source);
+
~SparseTensorStorage() final = default;
/// Partially specialize these getter methods based on template types.
@@ -396,55 +354,6 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
return coo;
}
- /// Factory method. Constructs a sparse tensor storage scheme with the given
- /// dimensions, permutation, and per-dimension dense/sparse annotations,
- /// using the coordinate scheme tensor for the initial contents if provided.
- /// In the latter case, the coordinate scheme must respect the same
- /// permutation as is desired for the new sparse tensor storage.
- ///
- /// Precondition: `shape`, `perm`, and `sparsity` must be valid for `rank`.
- static SparseTensorStorage<P, I, V> *
- newSparseTensor(uint64_t rank, const uint64_t *shape, const uint64_t *perm,
- const DimLevelType *sparsity, SparseTensorCOO<V> *coo) {
- SparseTensorStorage<P, I, V> *n = nullptr;
- if (coo) {
- const auto &coosz = coo->getDimSizes();
- detail::assertPermutedSizesMatchShape(coosz, rank, perm, shape);
- n = new SparseTensorStorage<P, I, V>(coosz, perm, sparsity, coo);
- } else {
- std::vector<uint64_t> permsz(rank);
- for (uint64_t r = 0; r < rank; r++) {
- assert(shape[r] > 0 && "Dimension size zero has trivial storage");
- permsz[perm[r]] = shape[r];
- }
- // We pass the null `coo` to ensure we select the intended constructor.
- n = new SparseTensorStorage<P, I, V>(permsz, perm, sparsity, coo);
- }
- return n;
- }
-
- /// Factory method. Constructs a sparse tensor storage scheme with
- /// the given dimensions, permutation, and per-dimension dense/sparse
- /// annotations, using the sparse tensor for the initial contents.
- ///
- /// Preconditions:
- /// * `shape`, `perm`, and `sparsity` must be valid for `rank`.
- /// * The `tensor` must have the same value type `V`.
- static SparseTensorStorage<P, I, V> *
- newSparseTensor(uint64_t rank, const uint64_t *shape, const uint64_t *perm,
- const DimLevelType *sparsity,
- const SparseTensorStorageBase *source) {
- assert(source && "Got nullptr for source");
- SparseTensorEnumeratorBase<V> *enumerator;
- source->newEnumerator(&enumerator, rank, perm);
- const auto &permsz = enumerator->permutedSizes();
- detail::assertPermutedSizesMatchShape(permsz, rank, perm, shape);
- auto *tensor =
- new SparseTensorStorage<P, I, V>(permsz, perm, sparsity, *source);
- delete enumerator;
- return tensor;
- }
-
private:
/// Appends an arbitrary new position to `pointers[d]`. This method
/// checks that `pos` is representable in the `P` type; however, it
@@ -820,6 +729,115 @@ class SparseTensorNNZ final {
std::vector<std::vector<uint64_t>> nnz;
};
+//===----------------------------------------------------------------------===//
+// Definitions of the ctors and factories of `SparseTensorStorage<P,I,V>`.
+
+namespace detail {
+
+// TODO: try to unify this with `SparseTensorFile::assertMatchesShape`
+// which is used by `openSparseTensorCOO`. It's easy enough to resolve
+// the `std::vector` vs pointer mismatch for `dimSizes`; but it's trickier
+// to resolve the presence/absence of `perm` (without introducing extra
+// overhead), so perhaps the code duplication is unavoidable.
+//
+/// Asserts that the `dimSizes` (in target-order) under the `perm` (mapping
+/// semantic-order to target-order) are a refinement of the desired `shape`
+/// (in semantic-order).
+///
+/// Precondition: `perm` and `shape` must be valid for `rank`.
+inline void assertPermutedSizesMatchShape(const std::vector<uint64_t> &dimSizes,
+ uint64_t rank, const uint64_t *perm,
+ const uint64_t *shape) {
+ assert(perm && shape);
+ assert(rank == dimSizes.size() && "Rank mismatch");
+ for (uint64_t r = 0; r < rank; r++)
+ assert((shape[r] == 0 || shape[r] == dimSizes[perm[r]]) &&
+ "Dimension size mismatch");
+}
+
+} // namespace detail
+
+template <typename P, typename I, typename V>
+SparseTensorStorage<P, I, V> *SparseTensorStorage<P, I, V>::newSparseTensor(
+ uint64_t rank, const uint64_t *shape, const uint64_t *perm,
+ const DimLevelType *sparsity, SparseTensorCOO<V> *coo) {
+ SparseTensorStorage<P, I, V> *n = nullptr;
+ if (coo) {
+ const auto &coosz = coo->getDimSizes();
+ detail::assertPermutedSizesMatchShape(coosz, rank, perm, shape);
+ n = new SparseTensorStorage<P, I, V>(coosz, perm, sparsity, coo);
+ } else {
+ std::vector<uint64_t> permsz(rank);
+ for (uint64_t r = 0; r < rank; r++) {
+ assert(shape[r] > 0 && "Dimension size zero has trivial storage");
+ permsz[perm[r]] = shape[r];
+ }
+ // We pass the null `coo` to ensure we select the intended constructor.
+ n = new SparseTensorStorage<P, I, V>(permsz, perm, sparsity, coo);
+ }
+ return n;
+}
+
+template <typename P, typename I, typename V>
+SparseTensorStorage<P, I, V> *SparseTensorStorage<P, I, V>::newSparseTensor(
+ uint64_t rank, const uint64_t *shape, const uint64_t *perm,
+ const DimLevelType *sparsity, const SparseTensorStorageBase *source) {
+ assert(source && "Got nullptr for source");
+ SparseTensorEnumeratorBase<V> *enumerator;
+ source->newEnumerator(&enumerator, rank, perm);
+ const auto &permsz = enumerator->permutedSizes();
+ detail::assertPermutedSizesMatchShape(permsz, rank, perm, shape);
+ auto *tensor =
+ new SparseTensorStorage<P, I, V>(permsz, perm, sparsity, *source);
+ delete enumerator;
+ return tensor;
+}
+
+template <typename P, typename I, typename V>
+SparseTensorStorage<P, I, V>::SparseTensorStorage(
+ const std::vector<uint64_t> &dimSizes, const uint64_t *perm,
+ const DimLevelType *sparsity, SparseTensorCOO<V> *coo)
+ : SparseTensorStorage(dimSizes, perm, sparsity) {
+ // Provide hints on capacity of pointers and indices.
+ // TODO: needs much fine-tuning based on actual sparsity; currently
+ // we reserve pointer/index space based on all previous dense
+ // dimensions, which works well up to first sparse dim; but
+ // we should really use nnz and dense/sparse distribution.
+ bool allDense = true;
+ uint64_t sz = 1;
+ for (uint64_t r = 0, rank = getRank(); r < rank; r++) {
+ if (isCompressedDim(r)) {
+ // TODO: Take a parameter between 1 and `dimSizes[r]`, and multiply
+ // `sz` by that before reserving. (For now we just use 1.)
+ pointers[r].reserve(sz + 1);
+ pointers[r].push_back(0);
+ indices[r].reserve(sz);
+ sz = 1;
+ allDense = false;
+ } else if (isSingletonDim(r)) {
+ indices[r].reserve(sz);
+ sz = 1;
+ allDense = false;
+ } else { // Dense dimension.
+ assert(isDenseDim(r));
+ sz = detail::checkedMul(sz, getDimSizes()[r]);
+ }
+ }
+ // Then assign contents from coordinate scheme tensor if provided.
+ if (coo) {
+ // Ensure both preconditions of `fromCOO`.
+ assert(coo->getDimSizes() == getDimSizes() && "Tensor size mismatch");
+ coo->sort();
+ // Now actually insert the `elements`.
+ const std::vector<Element<V>> &elements = coo->getElements();
+ uint64_t nnz = elements.size();
+ values.reserve(nnz);
+ fromCOO(elements, 0, nnz, 0);
+ } else if (allDense) {
+ values.resize(sz, 0);
+ }
+}
+
template <typename P, typename I, typename V>
SparseTensorStorage<P, I, V>::SparseTensorStorage(
const std::vector<uint64_t> &dimSizes, const uint64_t *perm,
diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp
index cb23c57a2efd8..8498586175702 100644
--- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp
@@ -12,34 +12,6 @@
// operates on sparse tensors. The provided functionality is **not** part
// of core MLIR, however.
//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/ExecutionEngine/SparseTensorUtils.h"
-#include "mlir/ExecutionEngine/SparseTensor/COO.h"
-#include "mlir/ExecutionEngine/SparseTensor/ErrorHandling.h"
-#include "mlir/ExecutionEngine/SparseTensor/File.h"
-#include "mlir/ExecutionEngine/SparseTensor/Storage.h"
-
-#ifdef MLIR_CRUNNERUTILS_DEFINE_FUNCTIONS
-
-#include <algorithm>
-#include <cassert>
-#include <cctype>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <fstream>
-#include <functional>
-#include <iostream>
-#include <limits>
-#include <numeric>
-
-using namespace mlir::sparse_tensor;
-
-//===----------------------------------------------------------------------===//
-//
-// Internal support for storing and reading sparse tensors.
-//
// The following memory-resident sparse storage schemes are supported:
//
// (a) A coordinate scheme for temporarily storing and lexicographically
@@ -72,6 +44,28 @@ using namespace mlir::sparse_tensor;
//
//===----------------------------------------------------------------------===//
+#include "mlir/ExecutionEngine/SparseTensorUtils.h"
+#include "mlir/ExecutionEngine/SparseTensor/COO.h"
+#include "mlir/ExecutionEngine/SparseTensor/ErrorHandling.h"
+#include "mlir/ExecutionEngine/SparseTensor/File.h"
+#include "mlir/ExecutionEngine/SparseTensor/Storage.h"
+
+#ifdef MLIR_CRUNNERUTILS_DEFINE_FUNCTIONS
+
+#include <algorithm>
+#include <cassert>
+#include <cctype>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <fstream>
+#include <functional>
+#include <iostream>
+#include <limits>
+#include <numeric>
+
+using namespace mlir::sparse_tensor;
+
namespace {
/// Initializes sparse tensor from an external COO-flavored format.
More information about the Mlir-commits
mailing list