[Mlir-commits] [mlir] [mlir][sparse] support 2:4 structured sparsity and loose compressed (PR #69968)
Aart Bik
llvmlistbot at llvm.org
Mon Oct 23 13:46:45 PDT 2023
https://github.com/aartbik created https://github.com/llvm/llvm-project/pull/69968
This adds library support for these two new level formats.
>From 0227052c68bff354d1d53a0b6ea7a57636d24ba9 Mon Sep 17 00:00:00 2001
From: Aart Bik <ajcbik at google.com>
Date: Mon, 23 Oct 2023 13:15:56 -0700
Subject: [PATCH] [mlir][sparse] support 2:4 structured sparsity and loose
compressed
This adds library support for these two new level formats.
---
.../mlir/Dialect/SparseTensor/IR/Enums.h | 8 +-
.../ExecutionEngine/SparseTensor/Storage.h | 115 +++++++++--------
.../ExecutionEngine/SparseTensor/Storage.cpp | 7 +-
.../Dialect/SparseTensor/CPU/sparse_ds.mlir | 120 ++++++++++++++++++
mlir/test/Integration/data/ds.mtx | 14 ++
5 files changed, 208 insertions(+), 56 deletions(-)
create mode 100644 mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_ds.mlir
create mode 100755 mlir/test/Integration/data/ds.mtx
diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
index c65a27567d59d9a..1e9aa2bdf45dbdb 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
@@ -277,7 +277,7 @@ constexpr bool isCompressedDLT(DimLevelType dlt) {
static_cast<uint8_t>(DimLevelType::Compressed);
}
-/// Check if the `DimLevelType` is compressed (regardless of properties).
+/// Check if the `DimLevelType` is loose compressed (regardless of properties).
constexpr bool isLooseCompressedDLT(DimLevelType dlt) {
return (static_cast<uint8_t>(dlt) & ~3) ==
static_cast<uint8_t>(DimLevelType::LooseCompressed);
@@ -289,6 +289,12 @@ constexpr bool isSingletonDLT(DimLevelType dlt) {
static_cast<uint8_t>(DimLevelType::Singleton);
}
+/// Check if the `DimLevelType` is 2OutOf4 (regardless of properties).
+constexpr bool is2OutOf4DLT(DimLevelType dlt) {
+ return (static_cast<uint8_t>(dlt) & ~3) ==
+ static_cast<uint8_t>(DimLevelType::TwoOutOfFour);
+}
+
/// Check if the `DimLevelType` is ordered (regardless of storage format).
constexpr bool isOrderedDLT(DimLevelType dlt) {
return !(static_cast<uint8_t>(dlt) & 2);
diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
index ad92ee1f89fc153..460549726356370 100644
--- a/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/Storage.h
@@ -115,11 +115,19 @@ class SparseTensorStorageBase {
return isCompressedDLT(getLvlType(l));
}
+ /// Safely checks if the level uses loose compressed storage.
+ bool isLooseCompressedLvl(uint64_t l) const {
+ return isLooseCompressedDLT(getLvlType(l));
+ }
+
/// Safely checks if the level uses singleton storage.
bool isSingletonLvl(uint64_t l) const {
return isSingletonDLT(getLvlType(l));
}
+ /// Safely checks if the level uses 2 out of 4 storage.
+ bool is2OutOf4Lvl(uint64_t l) const { return is2OutOf4DLT(getLvlType(l)); }
+
/// Safely checks if the level is ordered.
bool isOrderedLvl(uint64_t l) const { return isOrderedDLT(getLvlType(l)); }
@@ -138,9 +146,6 @@ class SparseTensorStorageBase {
MLIR_SPARSETENSOR_FOREVERY_FIXED_O(DECL_GETCOORDINATES)
#undef DECL_GETCOORDINATES
- /// Gets the coordinate-value stored at the given level and position.
- virtual uint64_t getCrd(uint64_t lvl, uint64_t pos) const = 0;
-
/// Gets primary storage.
#define DECL_GETVALUES(VNAME, V) virtual void getValues(std::vector<V> **);
MLIR_SPARSETENSOR_FOREVERY_V(DECL_GETVALUES)
@@ -280,13 +285,6 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
*out = &values;
}
- /// Returns coordinate at given position.
- uint64_t getCrd(uint64_t lvl, uint64_t pos) const final {
- assert(isCompressedDLT(getLvlType(lvl)) || isSingletonDLT(getLvlType(lvl)));
- assert(pos < coordinates[lvl].size());
- return coordinates[lvl][pos]; // Converts the stored `C` into `uint64_t`.
- }
-
/// Partially specialize forwarding insertions based on template types.
void forwardingInsert(const uint64_t *dimCoords, V val) final {
assert(dimCoords && coo);
@@ -302,7 +300,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
if (allDense) {
uint64_t lvlRank = getLvlRank();
uint64_t valIdx = 0;
- // Linearize the address
+ // Linearize the address.
for (uint64_t lvl = 0; lvl < lvlRank; lvl++)
valIdx = valIdx * getLvlSize(lvl) + lvlCoords[lvl];
values[valIdx] = val;
@@ -441,16 +439,6 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
}
private:
- /// Appends an arbitrary new position to `positions[lvl]`. This method
- /// checks that `pos` is representable in the `P` type; however, it
- /// does not check that `pos` is semantically valid (i.e., larger than
- /// the previous position and smaller than `coordinates[lvl].capacity()`).
- void appendPos(uint64_t lvl, uint64_t pos, uint64_t count = 1) {
- assert(isCompressedLvl(lvl));
- positions[lvl].insert(positions[lvl].end(), count,
- detail::checkOverflowCast<P>(pos));
- }
-
/// Appends coordinate `crd` to level `lvl`, in the semantically
/// general sense. For non-dense levels, that means appending to the
/// `coordinates[lvl]` array, checking that `crd` is representable in
@@ -461,11 +449,11 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
/// `full` is the number of "entries" already written to `values` for this
/// segment (aka one after the highest coordinate previously appended).
void appendCrd(uint64_t lvl, uint64_t full, uint64_t crd) {
- const auto dlt = getLvlType(lvl); // Avoid redundant bounds checking.
- if (isCompressedDLT(dlt) || isSingletonDLT(dlt)) {
+ if (!isDenseLvl(lvl)) {
+ assert(isCompressedLvl(lvl) || isLooseCompressedLvl(lvl) ||
+ isSingletonLvl(lvl) || is2OutOf4Lvl(lvl));
coordinates[lvl].push_back(detail::checkOverflowCast<C>(crd));
} else { // Dense level.
- assert(isDenseDLT(dlt));
assert(crd >= full && "Coordinate was already filled");
if (crd == full)
return; // Short-circuit, since it'll be a nop.
@@ -482,15 +470,13 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
/// storage, as opposed to "level-sizes" which are the cardinality
/// of possible coordinates for that level.
uint64_t assembledSize(uint64_t parentSz, uint64_t l) const {
- const auto dlt = getLvlType(l); // Avoid redundant bounds checking.
- if (isCompressedDLT(dlt))
+ if (isCompressedLvl(l))
return positions[l][parentSz];
- if (isSingletonDLT(dlt))
+ if (isSingletonLvl(l))
return parentSz; // New size is same as the parent.
- if (isDenseDLT(dlt))
- return parentSz * getLvlSize(l);
- MLIR_SPARSETENSOR_FATAL("unsupported level type: %d\n",
- static_cast<uint8_t>(dlt));
+ // TODO: support levels assignment for loose/2:4?
+ assert(isDenseLvl(l));
+ return parentSz * getLvlSize(l);
}
/// Initializes sparse tensor storage scheme from a memory-resident sparse
@@ -514,7 +500,7 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
uint64_t seg = lo + 1;
if (isUniqueLvl(l))
while (seg < hi && lvlElements[seg].coords[l] == c)
- ++seg;
+ seg++;
// Handle segment in interval for sparse or dense level.
appendCrd(l, full, c);
full = c + 1;
@@ -529,14 +515,22 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
/// Finalizes the sparse position structure at this level.
void finalizeSegment(uint64_t l, uint64_t full = 0, uint64_t count = 1) {
if (count == 0)
- return; // Short-circuit, since it'll be a nop.
- const auto dlt = getLvlType(l); // Avoid redundant bounds checking.
- if (isCompressedDLT(dlt)) {
- appendPos(l, coordinates[l].size(), count);
- } else if (isSingletonDLT(dlt)) {
+ return; // Short-circuit, since it'll be a nop.
+ if (isCompressedLvl(l)) {
+ uint64_t pos = coordinates[l].size();
+ positions[l].insert(positions[l].end(), count,
+ detail::checkOverflowCast<P>(pos));
+ } else if (isLooseCompressedLvl(l)) {
+ // Finish this level, and push pairs for the empty ones, and one
+ // more for next level. Note that this always leaves one extra
+ // unused element at the end.
+ uint64_t pos = coordinates[l].size();
+ positions[l].insert(positions[l].end(), 2 * count,
+ detail::checkOverflowCast<P>(pos));
+ } else if (isSingletonLvl(l) || is2OutOf4Lvl(l)) {
return; // Nothing to finalize.
} else { // Dense dimension.
- assert(isDenseDLT(dlt));
+ assert(isDenseLvl(l));
const uint64_t sz = getLvlSizes()[l];
assert(sz >= full && "Segment is overfull");
count = detail::checkedMul(count, sz - full);
@@ -589,7 +583,6 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
(crd < cur && !isOrderedLvl(l))) {
return l;
}
-
if (crd < cur) {
assert(false && "non-lexicographic insertion");
return -1u;
@@ -609,27 +602,37 @@ class SparseTensorStorage final : public SparseTensorStorageBase {
return;
}
if (isCompressedLvl(l)) {
- // Look up the bounds of the `l`-level segment determined by the
- // `(l - 1)`-level position `parentPos`.
const std::vector<P> &positionsL = positions[l];
assert(parentPos + 1 < positionsL.size());
const uint64_t pstart = static_cast<uint64_t>(positionsL[parentPos]);
const uint64_t pstop = static_cast<uint64_t>(positionsL[parentPos + 1]);
- // Loop-invariant code for looking up the `l`-level coordinates.
const std::vector<C> &coordinatesL = coordinates[l];
assert(pstop <= coordinatesL.size());
- for (uint64_t pos = pstart; pos < pstop; ++pos) {
+ for (uint64_t pos = pstart; pos < pstop; pos++) {
lvlCursor[l] = static_cast<uint64_t>(coordinatesL[pos]);
toCOO(pos, l + 1, dimCoords);
}
- } else if (isSingletonLvl(l)) {
- lvlCursor[l] = getCrd(l, parentPos);
+ } else if (isLooseCompressedLvl(l)) {
+ const std::vector<P> &positionsL = positions[l];
+ assert(2 * parentPos + 1 < positionsL.size());
+ const uint64_t pstart = static_cast<uint64_t>(positionsL[2 * parentPos]);
+ const uint64_t pstop =
+ static_cast<uint64_t>(positionsL[2 * parentPos + 1]);
+ const std::vector<C> &coordinatesL = coordinates[l];
+ assert(pstop <= coordinatesL.size());
+ for (uint64_t pos = pstart; pos < pstop; pos++) {
+ lvlCursor[l] = static_cast<uint64_t>(coordinatesL[pos]);
+ toCOO(pos, l + 1, dimCoords);
+ }
+ } else if (isSingletonLvl(l) || is2OutOf4Lvl(l)) {
+ assert(parentPos < coordinates[l].size());
+ lvlCursor[l] = static_cast<uint64_t>(coordinates[l][parentPos]);
toCOO(parentPos, l + 1, dimCoords);
} else { // Dense level.
assert(isDenseLvl(l));
const uint64_t sz = getLvlSizes()[l];
const uint64_t pstart = parentPos * sz;
- for (uint64_t c = 0; c < sz; ++c) {
+ for (uint64_t c = 0; c < sz; c++) {
lvlCursor[l] = c;
toCOO(pstart + c, l + 1, dimCoords);
}
@@ -706,19 +709,30 @@ SparseTensorStorage<P, C, V>::SparseTensorStorage(
bool allDense = true;
uint64_t sz = 1;
for (uint64_t l = 0; l < lvlRank; l++) {
- const DimLevelType dlt = lvlTypes[l]; // Avoid redundant bounds checking.
- if (isCompressedDLT(dlt)) {
+ if (isCompressedLvl(l)) {
positions[l].reserve(sz + 1);
positions[l].push_back(0);
coordinates[l].reserve(sz);
sz = 1;
allDense = false;
- } else if (isSingletonDLT(dlt)) {
+ } else if (isLooseCompressedLvl(l)) {
+ positions[l].reserve(2 * sz + 1); // last one unused
+ positions[l].push_back(0);
coordinates[l].reserve(sz);
sz = 1;
allDense = false;
+ } else if (isSingletonLvl(l)) {
+ coordinates[l].reserve(sz);
+ sz = 1;
+ allDense = false;
+ } else if (is2OutOf4Lvl(l)) {
+ assert(allDense && l == lvlRank - 1 && "unexpected 2:4 usage");
+ sz = detail::checkedMul(sz, lvlSizes[l]) / 2;
+ coordinates[l].reserve(sz);
+ values.reserve(sz);
+ allDense = false;
} else { // Dense level.
- assert(isDenseDLT(dlt));
+ assert(isDenseLvl(l));
sz = detail::checkedMul(sz, lvlSizes[l]);
}
}
@@ -773,6 +787,7 @@ SparseTensorStorage<P, C, V>::SparseTensorStorage(
positions[l].assign(posPtr, posPtr + parentSz + 1);
coordinates[l].assign(crdPtr, crdPtr + positions[l][parentSz]);
} else {
+ // TODO: support levels assignment for loose/2:4?
assert(isDenseLvl(l));
}
parentSz = assembledSize(parentSz, l);
diff --git a/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp b/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp
index 40805a179d4b385..ea7e3125b7f47d9 100644
--- a/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp
@@ -36,11 +36,8 @@ SparseTensorStorageBase::SparseTensorStorageBase( // NOLINT
assert(lvlRank > 0 && "Trivial shape is unsupported");
for (uint64_t l = 0; l < lvlRank; ++l) {
assert(lvlSizes[l] > 0 && "Level size zero has trivial storage");
- const auto dlt = lvlTypes[l];
- if (!(isDenseDLT(dlt) || isCompressedDLT(dlt) || isSingletonDLT(dlt))) {
- MLIR_SPARSETENSOR_FATAL("unsupported level type: %d\n",
- static_cast<uint8_t>(dlt));
- }
+ assert(isDenseLvl(l) || isCompressedLvl(l) || isLooseCompressedLvl(l) ||
+ isSingletonLvl(l) || is2OutOf4Lvl(l));
}
}
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_ds.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_ds.mlir
new file mode 100644
index 000000000000000..773c34e1f3dabca
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_ds.mlir
@@ -0,0 +1,120 @@
+//--------------------------------------------------------------------------------------------------
+// WHEN CREATING A NEW TEST, PLEASE JUST COPY & PASTE WITHOUT EDITS.
+//
+// Set-up that's shared across all tests in this directory. In principle, this
+// config could be moved to lit.local.cfg. However, there are downstream users that
+// do not use these LIT config files. Hence why this is kept inline.
+//
+// DEFINE: %{sparse_compiler_opts} = enable-runtime-library=true
+// DEFINE: %{sparse_compiler_opts_sve} = enable-arm-sve=true %{sparse_compiler_opts}
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler="%{sparse_compiler_opts}"
+// DEFINE: %{compile_sve} = mlir-opt %s --sparse-compiler="%{sparse_compiler_opts_sve}"
+// DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils
+// DEFINE: %{run_opts} = -e entry -entry-point-result=void
+// DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs}
+// DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs}
+//
+// DEFINE: %{env} =
+//--------------------------------------------------------------------------------------------------
+
+// REDEFINE: %{env} = TENSOR0="%mlir_src_dir/test/Integration/data/ds.mtx"
+// RUN: %{compile} | env %{env} %{run} | FileCheck %s
+//
+// TODO: enable!
+// Do the same run, but now with direct IR generation.
+// REDEFINE: %{sparse_compiler_opts} = enable-runtime-library=false
+// R_UN: %{compile} | env %{env} %{run} | FileCheck %s
+
+!Filename = !llvm.ptr<i8>
+
+#CSR = #sparse_tensor.encoding<{
+ map = (i, j) -> ( i : dense, j : compressed)
+}>
+
+#CSR_hi = #sparse_tensor.encoding<{
+ map = (i, j) -> ( i : dense, j : loose_compressed)
+}>
+
+#NV_24 = #sparse_tensor.encoding<{
+ map = ( i, j ) -> ( i : dense,
+ j floordiv 4 : dense,
+ j mod 4 : block2_4),
+ crdWidth = 8
+}>
+
+module {
+
+ func.func private @getTensorFilename(index) -> (!Filename)
+
+ //
+ // Input matrix:
+ //
+ // [[0.0, 0.0, 1.0, 2.0, 0.0, 3.0, 0.0, 4.0],
+ // [0.0, 5.0, 6.0, 0.0, 7.0, 0.0, 0.0, 8.0],
+ // [9.0, 0.0, 10.0, 0.0, 11.0, 12.0, 0.0, 0.0]]
+ //
+ func.func @entry() {
+ %u0 = arith.constant 0 : i8
+ %c0 = arith.constant 0 : index
+ %f0 = arith.constant 0.0 : f64
+
+ %fileName = call @getTensorFilename(%c0) : (index) -> (!Filename)
+ %A1 = sparse_tensor.new %fileName : !Filename to tensor<?x?xf64, #CSR>
+ %A2 = sparse_tensor.new %fileName : !Filename to tensor<?x?xf64, #CSR_hi>
+ %A3 = sparse_tensor.new %fileName : !Filename to tensor<?x?xf64, #NV_24>
+
+ //
+ // CSR:
+ //
+ // CHECK: ( 0, 4, 8, 12 )
+ // CHECK-NEXT: ( 2, 3, 5, 7, 1, 2, 4, 7, 0, 2, 4, 5 )
+ // CHECK-NEXT: ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 )
+ //
+ %pos1 = sparse_tensor.positions %A1 {level = 1 : index } : tensor<?x?xf64, #CSR> to memref<?xindex>
+ %vecp1 = vector.transfer_read %pos1[%c0], %c0 : memref<?xindex>, vector<4xindex>
+ vector.print %vecp1 : vector<4xindex>
+ %crd1 = sparse_tensor.coordinates %A1 {level = 1 : index } : tensor<?x?xf64, #CSR> to memref<?xindex>
+ %vecc1 = vector.transfer_read %crd1[%c0], %c0 : memref<?xindex>, vector<12xindex>
+ vector.print %vecc1 : vector<12xindex>
+ %val1 = sparse_tensor.values %A1 : tensor<?x?xf64, #CSR> to memref<?xf64>
+ %vecv1 = vector.transfer_read %val1[%c0], %f0 : memref<?xf64>, vector<12xf64>
+ vector.print %vecv1 : vector<12xf64>
+
+ //
+ // CSR_hi:
+ //
+ // CHECK-NEXT: ( 0, 4, 4, 8, 8, 12 )
+ // CHECK-NEXT: ( 2, 3, 5, 7, 1, 2, 4, 7, 0, 2, 4, 5 )
+ // CHECK-NEXT: ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 )
+ //
+ %pos2 = sparse_tensor.positions %A2 {level = 1 : index } : tensor<?x?xf64, #CSR_hi> to memref<?xindex>
+ %vecp2 = vector.transfer_read %pos2[%c0], %c0 : memref<?xindex>, vector<6xindex>
+ vector.print %vecp2 : vector<6xindex>
+ %crd2 = sparse_tensor.coordinates %A2 {level = 1 : index } : tensor<?x?xf64, #CSR_hi> to memref<?xindex>
+ %vecc2 = vector.transfer_read %crd2[%c0], %c0 : memref<?xindex>, vector<12xindex>
+ vector.print %vecc2 : vector<12xindex>
+ %val2 = sparse_tensor.values %A2 : tensor<?x?xf64, #CSR_hi> to memref<?xf64>
+ %vecv2 = vector.transfer_read %val2[%c0], %f0 : memref<?xf64>, vector<12xf64>
+ vector.print %vecv2 : vector<12xf64>
+
+ //
+ // NV_24
+ //
+ // CHECK-NEXT: ( 2, 3, 1, 3, 1, 2, 0, 3, 0, 2, 0, 1 )
+ // CHECK-NEXT: ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 )
+ //
+ %crd3 = sparse_tensor.coordinates %A3 {level = 2 : index } : tensor<?x?xf64, #NV_24> to memref<?xi8>
+ %vecc3 = vector.transfer_read %crd3[%c0], %u0 : memref<?xi8>, vector<12xi8>
+ vector.print %vecc3 : vector<12xi8>
+ %val3 = sparse_tensor.values %A3 : tensor<?x?xf64, #NV_24> to memref<?xf64>
+ %vecv3 = vector.transfer_read %val3[%c0], %f0 : memref<?xf64>, vector<12xf64>
+ vector.print %vecv3 : vector<12xf64>
+
+ // Release the resources.
+ bufferization.dealloc_tensor %A1: tensor<?x?xf64, #CSR>
+ bufferization.dealloc_tensor %A2: tensor<?x?xf64, #CSR_hi>
+ bufferization.dealloc_tensor %A3: tensor<?x?xf64, #NV_24>
+
+ return
+ }
+}
diff --git a/mlir/test/Integration/data/ds.mtx b/mlir/test/Integration/data/ds.mtx
new file mode 100755
index 000000000000000..8acc2ce081b6b35
--- /dev/null
+++ b/mlir/test/Integration/data/ds.mtx
@@ -0,0 +1,14 @@
+%%MatrixMarket matrix coordinate real general
+3 8 12
+1 3 1.0
+1 4 2.0
+1 6 3.0
+1 8 4.0
+2 2 5.0
+2 3 6.0
+2 5 7.0
+2 8 8.0
+3 1 9.0
+3 3 10.0
+3 5 11.0
+3 6 12.0
More information about the Mlir-commits
mailing list