[Mlir-commits] [mlir] [mlir][sparse] introduce MapRef, unify conversion/codegen for reader (PR #68360)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Thu Oct 5 15:04:10 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir-sparse
<details>
<summary>Changes</summary>
This revision introduces a MapRef, which will support a future generalization beyond permutations (e.g. block sparsity). This revision also unifies the conversion/codegen paths for the sparse_tensor.new operation from file (eg. the readers). Note that more unification is planned as well as general affine dim2lvl and lvl2dim (all marked with TODOs).
---
Patch is 74.82 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/68360.diff
13 Files Affected:
- (modified) mlir/include/mlir/ExecutionEngine/SparseTensor/File.h (+58-98)
- (added) mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h (+96)
- (modified) mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h (+7-101)
- (modified) mlir/include/mlir/ExecutionEngine/SparseTensorRuntime.h (-8)
- (modified) mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp (+89)
- (modified) mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h (+18)
- (modified) mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp (+20-53)
- (modified) mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp (+16-95)
- (modified) mlir/lib/ExecutionEngine/SparseTensor/CMakeLists.txt (+1)
- (added) mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp (+52)
- (modified) mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp (+21-39)
- (modified) mlir/test/Dialect/SparseTensor/codegen.mlir (+88-84)
- (modified) mlir/test/Dialect/SparseTensor/conversion.mlir (+9-9)
``````````diff
diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h
index 78c1a0544e3a521..9157bfa7e773239 100644
--- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h
@@ -20,6 +20,7 @@
#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H
#define MLIR_EXECUTIONENGINE_SPARSETENSOR_FILE_H
+#include "mlir/ExecutionEngine/SparseTensor/MapRef.h"
#include "mlir/ExecutionEngine/SparseTensor/Storage.h"
#include <fstream>
@@ -75,6 +76,10 @@ inline V readValue(char **linePtr, bool isPattern) {
} // namespace detail
+//===----------------------------------------------------------------------===//
+//
+// Reader class.
+//
//===----------------------------------------------------------------------===//
/// This class abstracts over the information stored in file headers,
@@ -132,6 +137,7 @@ class SparseTensorReader final {
/// Reads and parses the file's header.
void readHeader();
+ /// Returns the stored value kind.
ValueKind getValueKind() const { return valueKind_; }
/// Checks if a header has been successfully read.
@@ -185,58 +191,37 @@ class SparseTensorReader final {
/// valid after parsing the header.
void assertMatchesShape(uint64_t rank, const uint64_t *shape) const;
- /// Reads a sparse tensor element from the next line in the input file and
- /// returns the value of the element. Stores the coordinates of the element
- /// to the `dimCoords` array.
- template <typename V>
- V readElement(uint64_t dimRank, uint64_t *dimCoords) {
- assert(dimRank == getRank() && "rank mismatch");
- char *linePtr = readCoords(dimCoords);
- return detail::readValue<V>(&linePtr, isPattern());
- }
-
- /// Allocates a new COO object for `lvlSizes`, initializes it by reading
- /// all the elements from the file and applying `dim2lvl` to their
- /// dim-coordinates, and then closes the file. Templated on V only.
- template <typename V>
- SparseTensorCOO<V> *readCOO(uint64_t lvlRank, const uint64_t *lvlSizes,
- const uint64_t *dim2lvl);
-
/// Allocates a new sparse-tensor storage object with the given encoding,
/// initializes it by reading all the elements from the file, and then
/// closes the file. Templated on P, I, and V.
template <typename P, typename I, typename V>
SparseTensorStorage<P, I, V> *
readSparseTensor(uint64_t lvlRank, const uint64_t *lvlSizes,
- const DimLevelType *lvlTypes, const uint64_t *lvl2dim,
- const uint64_t *dim2lvl) {
- auto *lvlCOO = readCOO<V>(lvlRank, lvlSizes, dim2lvl);
+ const DimLevelType *lvlTypes, const uint64_t *dim2lvl,
+ const uint64_t *lvl2dim) {
+ const uint64_t dimRank = getRank();
+ MapRef map(dimRank, lvlRank, dim2lvl, lvl2dim);
+ auto *coo = readCOO<V>(map, lvlSizes);
auto *tensor = SparseTensorStorage<P, I, V>::newFromCOO(
- getRank(), getDimSizes(), lvlRank, lvlTypes, lvl2dim, *lvlCOO);
- delete lvlCOO;
+ dimRank, getDimSizes(), lvlRank, lvlTypes, lvl2dim, *coo);
+ delete coo;
return tensor;
}
/// Reads the COO tensor from the file, stores the coordinates and values to
/// the given buffers, returns a boolean value to indicate whether the COO
/// elements are sorted.
- /// Precondition: the buffers should have enough space to hold the elements.
template <typename C, typename V>
bool readToBuffers(uint64_t lvlRank, const uint64_t *dim2lvl,
- C *lvlCoordinates, V *values);
+ const uint64_t *lvl2dim, C *lvlCoordinates, V *values);
private:
- /// Attempts to read a line from the file. Is private because there's
- /// no reason for client code to call it.
+ /// Attempts to read a line from the file.
void readLine();
/// Reads the next line of the input file and parses the coordinates
/// into the `dimCoords` argument. Returns the position in the `line`
- /// buffer where the element's value should be parsed from. This method
- /// has been factored out from `readElement` to minimize code bloat
- /// for the generated library.
- ///
- /// Precondition: `dimCoords` is valid for `getRank()`.
+ /// buffer where the element's value should be parsed from.
template <typename C>
char *readCoords(C *dimCoords) {
readLine();
@@ -251,24 +236,20 @@ class SparseTensorReader final {
return linePtr;
}
- /// The internal implementation of `readCOO`. We template over
- /// `IsPattern` in order to perform LICM without needing to duplicate the
- /// source code.
- //
- // TODO: We currently take the `dim2lvl` argument as a `PermutationRef`
- // since that's what `readCOO` creates. Once we update `readCOO` to
- // functionalize the mapping, then this helper will just take that
- // same function.
+ /// Reads all the elements from the file while applying the given map.
+ template <typename V>
+ SparseTensorCOO<V> *readCOO(const MapRef &map, const uint64_t *lvlSizes);
+
+ /// The implementation of `readCOO` that is templated `IsPattern` in order
+ /// to perform LICM without needing to duplicate the source code.
template <typename V, bool IsPattern>
- void readCOOLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl,
- SparseTensorCOO<V> *lvlCOO);
+ void readCOOLoop(const MapRef &map, SparseTensorCOO<V> *coo);
- /// The internal implementation of `readToBuffers`. We template over
- /// `IsPattern` in order to perform LICM without needing to duplicate the
- /// source code.
+ /// The internal implementation of `readToBuffers`. We template over
+ /// `IsPattern` in order to perform LICM without needing to duplicate
+ /// the source code.
template <typename C, typename V, bool IsPattern>
- bool readToBuffersLoop(uint64_t lvlRank, detail::PermutationRef dim2lvl,
- C *lvlCoordinates, V *values);
+ bool readToBuffersLoop(const MapRef &map, C *lvlCoordinates, V *values);
/// Reads the MME header of a general sparse matrix of type real.
void readMMEHeader();
@@ -288,96 +269,76 @@ class SparseTensorReader final {
char line[kColWidth];
};
+//===----------------------------------------------------------------------===//
+//
+// Reader class methods.
+//
//===----------------------------------------------------------------------===//
template <typename V>
-SparseTensorCOO<V> *SparseTensorReader::readCOO(uint64_t lvlRank,
- const uint64_t *lvlSizes,
- const uint64_t *dim2lvl) {
+SparseTensorCOO<V> *SparseTensorReader::readCOO(const MapRef &map,
+ const uint64_t *lvlSizes) {
assert(isValid() && "Attempt to readCOO() before readHeader()");
- const uint64_t dimRank = getRank();
- assert(lvlRank == dimRank && "Rank mismatch");
- detail::PermutationRef d2l(dimRank, dim2lvl);
// Prepare a COO object with the number of stored elems as initial capacity.
- auto *lvlCOO = new SparseTensorCOO<V>(lvlRank, lvlSizes, getNSE());
- // Do some manual LICM, to avoid assertions in the for-loop.
- const bool IsPattern = isPattern();
- if (IsPattern)
- readCOOLoop<V, true>(lvlRank, d2l, lvlCOO);
+ auto *coo = new SparseTensorCOO<V>(map.getLvlRank(), lvlSizes, getNSE());
+ // Enter the reading loop.
+ if (isPattern())
+ readCOOLoop<V, true>(map, coo);
else
- readCOOLoop<V, false>(lvlRank, d2l, lvlCOO);
+ readCOOLoop<V, false>(map, coo);
// Close the file and return the COO.
closeFile();
- return lvlCOO;
+ return coo;
}
template <typename V, bool IsPattern>
-void SparseTensorReader::readCOOLoop(uint64_t lvlRank,
- detail::PermutationRef dim2lvl,
- SparseTensorCOO<V> *lvlCOO) {
- const uint64_t dimRank = getRank();
+void SparseTensorReader::readCOOLoop(const MapRef &map,
+ SparseTensorCOO<V> *coo) {
+ const uint64_t dimRank = map.getDimRank();
+ const uint64_t lvlRank = map.getLvlRank();
+ assert(dimRank == getRank());
std::vector<uint64_t> dimCoords(dimRank);
std::vector<uint64_t> lvlCoords(lvlRank);
- for (uint64_t nse = getNSE(), k = 0; k < nse; ++k) {
- // We inline `readElement` here in order to avoid redundant
- // assertions, since they're guaranteed by the call to `isValid()`
- // and the construction of `dimCoords` above.
+ for (uint64_t k = 0, nse = getNSE(); k < nse; k++) {
char *linePtr = readCoords(dimCoords.data());
const V value = detail::readValue<V, IsPattern>(&linePtr);
- dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoords.data());
- // TODO: <https://github.com/llvm/llvm-project/issues/54179>
- lvlCOO->add(lvlCoords, value);
+ map.pushforward(dimCoords.data(), lvlCoords.data());
+ coo->add(lvlCoords, value);
}
}
template <typename C, typename V>
bool SparseTensorReader::readToBuffers(uint64_t lvlRank,
const uint64_t *dim2lvl,
+ const uint64_t *lvl2dim,
C *lvlCoordinates, V *values) {
assert(isValid() && "Attempt to readCOO() before readHeader()");
- // Construct a `PermutationRef` for the `pushforward` below.
- // TODO: This specific implementation does not generalize to arbitrary
- // mappings, but once we functionalize the `dim2lvl` argument we can
- // simply use that function instead.
- const uint64_t dimRank = getRank();
- assert(lvlRank == dimRank && "Rank mismatch");
- detail::PermutationRef d2l(dimRank, dim2lvl);
- // Do some manual LICM, to avoid assertions in the for-loop.
+ MapRef map(getRank(), lvlRank, dim2lvl, lvl2dim);
bool isSorted =
- isPattern()
- ? readToBuffersLoop<C, V, true>(lvlRank, d2l, lvlCoordinates, values)
- : readToBuffersLoop<C, V, false>(lvlRank, d2l, lvlCoordinates,
- values);
-
- // Close the file and return isSorted.
+ isPattern() ? readToBuffersLoop<C, V, true>(map, lvlCoordinates, values)
+ : readToBuffersLoop<C, V, false>(map, lvlCoordinates, values);
closeFile();
return isSorted;
}
template <typename C, typename V, bool IsPattern>
-bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank,
- detail::PermutationRef dim2lvl,
- C *lvlCoordinates, V *values) {
- const uint64_t dimRank = getRank();
+bool SparseTensorReader::readToBuffersLoop(const MapRef &map, C *lvlCoordinates,
+ V *values) {
+ const uint64_t dimRank = map.getDimRank();
+ const uint64_t lvlRank = map.getLvlRank();
const uint64_t nse = getNSE();
+ assert(dimRank == getRank());
std::vector<C> dimCoords(dimRank);
- // Read the first element with isSorted=false as a way to avoid accessing its
- // previous element.
bool isSorted = false;
char *linePtr;
- // We inline `readElement` here in order to avoid redundant assertions,
- // since they're guaranteed by the call to `isValid()` and the construction
- // of `dimCoords` above.
const auto readNextElement = [&]() {
linePtr = readCoords<C>(dimCoords.data());
- dim2lvl.pushforward(dimRank, dimCoords.data(), lvlCoordinates);
+ map.pushforward(dimCoords.data(), lvlCoordinates);
*values = detail::readValue<V, IsPattern>(&linePtr);
if (isSorted) {
- // Note that isSorted was set to false while reading the first element,
+ // Note that isSorted is set to false when reading the first element,
// to guarantee the safeness of using prevLvlCoords.
C *prevLvlCoords = lvlCoordinates - lvlRank;
- // TODO: define a new CoordsLT which is like ElementLT but doesn't have
- // the V parameter, and use it here.
for (uint64_t l = 0; l < lvlRank; ++l) {
if (prevLvlCoords[l] != lvlCoordinates[l]) {
if (prevLvlCoords[l] > lvlCoordinates[l])
@@ -393,7 +354,6 @@ bool SparseTensorReader::readToBuffersLoop(uint64_t lvlRank,
isSorted = true;
for (uint64_t n = 1; n < nse; ++n)
readNextElement();
-
return isSorted;
}
diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h
new file mode 100644
index 000000000000000..1c155568802e579
--- /dev/null
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h
@@ -0,0 +1,96 @@
+//===- MapRef.h - A dim2lvl/lvl2dim map encoding ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// A dim2lvl/lvl2dim map encoding class, with utility methods.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H
+#define MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H
+
+#include <cinttypes>
+
+#include <cassert>
+
+namespace mlir {
+namespace sparse_tensor {
+
+/// A class for capturing the sparse tensor type map with a compact encoding.
+///
+/// Currently, the following situations are supported:
+/// (1) map is an identity
+/// (2) map is a permutation
+/// (3) map has affine ops (restricted set)
+///
+/// The pushforward/backward operations are fast for (1) and (2) but
+/// incur some obvious overhead for situation (3).
+///
+class MapRef final {
+public:
+ MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, const uint64_t *l2d);
+
+ //
+ // Push forward maps from dimensions to levels.
+ //
+
+ template <typename T> inline void pushforward(const T *in, T *out) const {
+ switch (kind) {
+ case MapKind::kIdentity:
+ for (uint64_t i = 0; i < dimRank; ++i)
+ out[i] = in[i]; // TODO: optimize with in == out ?
+ break;
+ case MapKind::kPermutation:
+ for (uint64_t i = 0; i < dimRank; ++i)
+ out[dim2lvl[i]] = in[i];
+ break;
+ case MapKind::kAffine:
+ assert(0 && "coming soon");
+ break;
+ }
+ }
+
+ //
+ // Push backward maps from levels to dimensions.
+ //
+
+ template <typename T> inline void pushbackward(const T *in, T *out) const {
+ switch (kind) {
+ case MapKind::kIdentity:
+ for (uint64_t i = 0; i < lvlRank; ++i)
+ out[i] = in[i];
+ break;
+ case MapKind::kPermutation:
+ for (uint64_t i = 0; i < lvlRank; ++i)
+ out[lvl2dim[i]] = in[i];
+ break;
+ case MapKind::kAffine:
+ assert(0 && "coming soon");
+ break;
+ }
+ }
+
+ uint64_t getDimRank() const { return dimRank; }
+ uint64_t getLvlRank() const { return lvlRank; }
+
+private:
+ enum class MapKind { kIdentity, kPermutation, kAffine };
+
+ bool isIdentity() const;
+ bool isPermutation() const;
+
+ MapKind kind;
+ const uint64_t dimRank;
+ const uint64_t lvlRank;
+ const uint64_t *const dim2lvl; // non-owning pointer
+ const uint64_t *const lvl2dim; // non-owning pointer
+};
+
+} // namespace sparse_tensor
+} // namespace mlir
+
+#endif // MLIR_EXECUTIONENGINE_SPARSETENSOR_MAPREF_H
diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
index 28c28c28109c3c7..37ad3c1b042313c 100644
--- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
@@ -49,103 +49,6 @@ class SparseTensorEnumeratorBase;
template <typename P, typename C, typename V>
class SparseTensorEnumerator;
-namespace detail {
-
-/// Checks whether the `perm` array is a permutation of `[0 .. size)`.
-inline bool isPermutation(uint64_t size, const uint64_t *perm) {
- assert(perm && "Got nullptr for permutation");
- std::vector<bool> seen(size, false);
- for (uint64_t i = 0; i < size; ++i) {
- const uint64_t j = perm[i];
- if (j >= size || seen[j])
- return false;
- seen[j] = true;
- }
- for (uint64_t i = 0; i < size; ++i)
- if (!seen[i])
- return false;
- return true;
-}
-
-/// Wrapper around `isPermutation` to ensure consistent error messages.
-inline void assertIsPermutation(uint64_t size, const uint64_t *perm) {
-#ifndef NDEBUG
- if (!isPermutation(size, perm))
- MLIR_SPARSETENSOR_FATAL("Not a permutation of [0..%" PRIu64 ")\n", size);
-#endif
-}
-
-/// A class for capturing the knowledge that `isPermutation` is true.
-class PermutationRef final {
-public:
- /// Asserts `isPermutation` and returns the witness to that being true.
- explicit PermutationRef(uint64_t size, const uint64_t *perm)
- : permSize(size), perm(perm) {
- assertIsPermutation(size, perm);
- }
-
- uint64_t size() const { return permSize; }
-
- const uint64_t *data() const { return perm; }
-
- const uint64_t &operator[](uint64_t i) const {
- assert(i < permSize && "index is out of bounds");
- return perm[i];
- }
-
- /// Constructs a pushforward array of values. This method is the inverse
- /// of `permute` in the sense that for all `p` and `xs` we have:
- /// * `p.permute(p.pushforward(xs)) == xs`
- /// * `p.pushforward(p.permute(xs)) == xs`
- template <typename T>
- inline std::vector<T> pushforward(const std::vector<T> &values) const {
- return pushforward(values.size(), values.data());
- }
-
- template <typename T>
- inline std::vector<T> pushforward(uint64_t size, const T *values) const {
- std::vector<T> out(permSize);
- pushforward(size, values, out.data());
- return out;
- }
-
- template <typename T>
- inline void pushforward(uint64_t size, const T *values, T *out) const {
- assert(size == permSize && "size mismatch");
- for (uint64_t i = 0; i < permSize; ++i)
- out[perm[i]] = values[i];
- }
-
- /// Constructs a permuted array of values. This method is the inverse
- /// of `pushforward` in the sense that for all `p` and `xs` we have:
- /// * `p.permute(p.pushforward(xs)) == xs`
- /// * `p.pushforward(p.permute(xs)) == xs`
- template <typename T>
- inline std::vector<T> permute(const std::vector<T> &values) const {
- return permute(values.size(), values.data());
- }
-
- template <typename T>
- inline std::vector<T> permute(uint64_t size, const T *values) const {
- std::vector<T> out(permSize);
- permute(size, values, out.data());
- return out;
- }
-
- template <typename T>
- inline void permute(uint64_t size, const T *values, T *out) const {
- assert(size == permSize && "size mismatch");
- for (uint64_t i = 0; i < permSize; ++i)
- out[i] = values[perm[i]];
- }
-
-private:
- const uint64_t permSize;
- const uint64_t *const perm; // non-owning pointer.
-};
-
-} // namespace detail
-
/// Abstract base class for `SparseTensorStorage<P,C,V>`. This class
/// takes responsibility for all the `<P,C,V>`-independent aspects
/// of the tensor (e.g., shape, sparsity, permutation). In addition,
@@ -263,7 +166,7 @@ class SparseTensorStorageBase {
bool isUniqueLvl(uint64_t l) const { return isUniqueDLT(getLvlType(l)); }
/// Allocates a new enumerator. Callers must make sure to delete
- /// the enumerator when they're done with it. The first argument
+ /// the enumerator when they're done with it. The first argument
/// is the out-parameter for storing the newly allocated enumerator;
/// all other arguments are passed along to the `SparseTensorEnumerator`
/// ctor and must satisfy the preconditions/assertions thereof.
@@ -326,6 +229,7 @@ class SparseTensorStorageBase {
const std::vector<uint64_t> lvl2dim;
};
+
/// A memory-resident sparse tensor using a storage scheme based on
/// per-level sparse/dense annotations. This data structure provides
/// a bufferized form of a sparse tensor type. In contrast to generating
@@ -401,7 +305,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
const DimLevelType *lvlTypes, const uint64_t *lvl2dim,
const intptr_t *lvlBufs);
- /// Allocates a new empty sparse tensor. The preconditions/assertions
+ /// Allocates a new empty sparse tensor. The preconditions/assertions
//...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/68360
More information about the Mlir-commits
mailing list