[Mlir-commits] [mlir] [mlir][sparse] implement non-permutation MapRef encoding (PR #69406)
Aart Bik
llvmlistbot at llvm.org
Tue Oct 17 18:13:28 PDT 2023
https://github.com/aartbik created https://github.com/llvm/llvm-project/pull/69406
This enables reading block sparse from file using libgen! (and soon also direct IR codegen)
>From 20f86596420141e6d1867b7cda44625cdd83009b Mon Sep 17 00:00:00 2001
From: Aart Bik <ajcbik at google.com>
Date: Tue, 17 Oct 2023 18:11:45 -0700
Subject: [PATCH] [mlir][sparse] implement non-permutation MapRef encoding
This enables reading block sparse from file using libgen!
(and soon also direct IR codegen)
---
.../mlir/Dialect/SparseTensor/IR/Enums.h | 23 +++++
.../ExecutionEngine/SparseTensor/MapRef.h | 56 +++++++++---
.../SparseTensor/Transforms/CodegenUtils.cpp | 83 ++++++++++++++----
.../ExecutionEngine/SparseTensor/MapRef.cpp | 44 ++++++++--
.../ExecutionEngine/SparseTensor/Storage.cpp | 4 +-
.../ExecutionEngine/SparseTensorRuntime.cpp | 8 +-
.../Dialect/SparseTensor/CPU/block.mlir | 85 +++++++++++++++++++
mlir/test/Integration/data/block.mtx | 10 +++
8 files changed, 273 insertions(+), 40 deletions(-)
create mode 100755 mlir/test/Integration/Dialect/SparseTensor/CPU/block.mlir
create mode 100755 mlir/test/Integration/data/block.mtx
diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
index 08887abcd0f1055..b59b7c8998eb26c 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/Enums.h
@@ -443,6 +443,29 @@ static_assert((isUniqueDLT(DimLevelType::Dense) &&
!isUniqueDLT(DimLevelType::LooseCompressedNuNo)),
"isUniqueDLT definition is broken");
+/// Bit manipulations for affine encoding.
+constexpr uint64_t encodeDim(uint64_t i, uint64_t cf, uint64_t cm) {
+ assert(i <= 0xffffffffu && cf <= 0x3fffffffu && cm <= 0x3fffffffu);
+ if (cf != 0)
+ return (0x1L << 62) | (cf << 32) | i;
+ if (cm != 0)
+ return (0x2L << 62) | (cm << 32) | i;
+ return i;
+}
+constexpr uint64_t encodeLvl(uint64_t i, uint64_t c, uint64_t ii) {
+ assert(i <= 0xffffffffu && c <= 0xffffu && ii <= 0xffffu);
+ if (c != 0)
+ return (0x3L << 62) | (c << 32) | (ii << 48) | i;
+ return i;
+}
+constexpr bool isEncodedFloor(uint64_t v) { return (v >> 62) == 0x01; }
+constexpr bool isEncodedMod(uint64_t v) { return (v >> 62) == 0x02; }
+constexpr bool isEncodedMul(uint64_t v) { return (v >> 62) == 0x03; }
+constexpr uint64_t decodeIndex(uint64_t v) { return v & 0xffffffffu; }
+constexpr uint64_t decodeConst(uint64_t v) { return (v >> 32) & 0x3fffffffu; }
+constexpr uint64_t decodeMulc(uint64_t v) { return (v >> 32) & 0xffffu; }
+constexpr uint64_t decodeMuli(uint64_t v) { return (v >> 48) & 0xffffu; }
+
} // namespace sparse_tensor
} // namespace mlir
diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h
index 22ae70a61d95eff..5631b59abe9c8fb 100644
--- a/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/MapRef.h
@@ -30,6 +30,23 @@ namespace sparse_tensor {
/// The pushforward/backward operations are fast for (1) but incur some obvious
/// overhead for situation (2).
///
+/// Note that because the indices in the mappings refer to dimensions
+/// and levels (and *not* the sizes of these dimensions and levels), the
+/// 64-bit encoding gives ample room for a compact encoding of affine
+/// operations in the higher 32-bit.
+///
+/// The compact encoding is as follows:
+///
+/// | 00 | main index | e.g. i
+/// | 01 floor | constant | 32-bit for main index | e.g. i floor c
+/// | 10 mod | constant | 32-bit for main index | e.g. i mod c
+/// | 11 mul | index/constant | 32-bit for main index | e.g. i + 2 * ii
+///
+/// This encoding provides sufficient generality for currently supported
+/// sparse tensor types. To generalize this more, we will need to provide
+/// a broader encoding scheme for affine functions. Also, the library
+/// encoding may be replaced with pure "direct-IR" code in the future.
+///
class MapRef final {
public:
MapRef(uint64_t d, uint64_t l, const uint64_t *d2l, const uint64_t *l2d);
@@ -38,13 +55,22 @@ class MapRef final {
// Push forward maps from dimensions to levels.
//
- template <typename T>
- inline void pushforward(const T *in, T *out) const {
+ // Map from dimRank in to lvlRank out.
+ template <typename T> inline void pushforward(const T *in, T *out) const {
if (isPermutation) {
- for (uint64_t i = 0; i < lvlRank; ++i)
- out[i] = in[lvl2dim[i]];
+ for (uint64_t l = 0; l < lvlRank; l++) {
+ out[l] = in[dim2lvl[l]];
+ }
} else {
- assert(0 && "coming soon");
+ uint64_t i, c;
+ for (uint64_t l = 0; l < lvlRank; l++)
+ if (isFloor(l, i, c)) {
+ out[l] = in[i] / c;
+ } else if (isMod(l, i, c)) {
+ out[l] = in[i] % c;
+ } else {
+ out[l] = in[dim2lvl[l]];
+ }
}
}
@@ -52,13 +78,19 @@ class MapRef final {
// Push backward maps from levels to dimensions.
//
- template <typename T>
- inline void pushbackward(const T *in, T *out) const {
+ // Map from lvlRank in to dimRank out.
+ template <typename T> inline void pushbackward(const T *in, T *out) const {
if (isPermutation) {
- for (uint64_t i = 0; i < dimRank; ++i)
- out[i] = in[dim2lvl[i]];
+ for (uint64_t d = 0; d < dimRank; d++)
+ out[d] = in[lvl2dim[d]];
} else {
- assert(0 && "coming soon");
+ uint64_t i, c, ii;
+ for (uint64_t d = 0; d < dimRank; d++)
+ if (isMul(d, i, c, ii)) {
+ out[d] = in[i] + c * in[ii];
+ } else {
+ out[d] = in[lvl2dim[d]];
+ }
}
}
@@ -68,6 +100,10 @@ class MapRef final {
private:
bool isPermutationMap() const;
+ bool isFloor(uint64_t l, uint64_t &i, uint64_t &c) const;
+ bool isMod(uint64_t l, uint64_t &i, uint64_t &c) const;
+ bool isMul(uint64_t d, uint64_t &i, uint64_t &c, uint64_t &ii) const;
+
const uint64_t dimRank;
const uint64_t lvlRank;
const uint64_t *const dim2lvl; // non-owning pointer
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
index 298ff098835564d..88677c09615101d 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
@@ -675,9 +675,9 @@ Value sparse_tensor::genMapBuffers(OpBuilder &builder, Location loc,
/*out*/ Value &lvl2dimBuffer) {
const Dimension dimRank = stt.getDimRank();
const Level lvlRank = stt.getLvlRank();
- // For an identity mapping, the dim2lvl and lvl2dim mappings are
- // identical as are dimSizes and lvlSizes, so buffers are reused
- // as much as possible.
+ // For an identity mapping, the dim2lvl and lvl2dim mappings are
+ // identical as are dimSizes and lvlSizes, so buffers are reused
+ // as much as possible.
if (stt.isIdentity()) {
assert(dimRank == lvlRank);
SmallVector<Value> iotaValues;
@@ -688,25 +688,72 @@ Value sparse_tensor::genMapBuffers(OpBuilder &builder, Location loc,
return dimSizesBuffer;
}
// Otherwise, some code needs to be generated to set up the buffers.
- // TODO: use the lvl2dim once available and deal with non-permutations!
+ // This code deals with permutations as well as non-permutations that
+ // arise from rank changing blocking.
const auto dimToLvl = stt.getDimToLvl();
- assert(dimToLvl.isPermutation());
- SmallVector<Value> dim2lvlValues(dimRank);
- SmallVector<Value> lvl2dimValues(lvlRank);
+ SmallVector<Value> dim2lvlValues(lvlRank); // for each lvl, expr in dim vars
+ SmallVector<Value> lvl2dimValues(dimRank); // for each dim, expr in lvl vars
SmallVector<Value> lvlSizesValues(lvlRank);
+ // Generate dim2lvl.
+ assert(lvlRank == dimToLvl.getNumResults());
for (Level l = 0; l < lvlRank; l++) {
- // The `d`th source variable occurs in the `l`th result position.
- Dimension d = dimToLvl.getDimPosition(l);
- Value lvl = constantIndex(builder, loc, l);
- Value dim = constantIndex(builder, loc, d);
- dim2lvlValues[d] = lvl;
- lvl2dimValues[l] = dim;
- if (stt.isDynamicDim(d))
- lvlSizesValues[l] =
- builder.create<memref::LoadOp>(loc, dimSizesBuffer, dim);
- else
- lvlSizesValues[l] = dimShapesValues[d];
+ AffineExpr exp = dimToLvl.getResult(l);
+ // We expect:
+ // (1) l = d
+ // (2) l = d / c
+ // (3) l = d % c
+ Dimension d = 0;
+ uint64_t cf = 0, cm = 0;
+ switch (exp.getKind()) {
+ case AffineExprKind::DimId:
+ d = exp.cast<AffineDimExpr>().getPosition();
+ break;
+ case AffineExprKind::FloorDiv:
+ d = exp.cast<AffineBinaryOpExpr>()
+ .getLHS()
+ .cast<AffineDimExpr>()
+ .getPosition();
+ cf = exp.cast<AffineBinaryOpExpr>()
+ .getRHS()
+ .cast<AffineConstantExpr>()
+ .getValue();
+ break;
+ case AffineExprKind::Mod:
+ d = exp.cast<AffineBinaryOpExpr>()
+ .getLHS()
+ .cast<AffineDimExpr>()
+ .getPosition();
+ cm = exp.cast<AffineBinaryOpExpr>()
+ .getRHS()
+ .cast<AffineConstantExpr>()
+ .getValue();
+ break;
+ default:
+ llvm::report_fatal_error("unsupported dim2lvl in sparse tensor type");
+ }
+ llvm::dbgs() << "ENCODE " << d << " " << cf << " " << cm << " into "
+ << encodeDim(d, cf, cm) << "\n";
+ dim2lvlValues[l] = constantIndex(builder, loc, encodeDim(d, cf, cm));
+ lvl2dimValues[d] = constantIndex(builder, loc, l); // FIXME, use lvlToDim
+ // Compute the level sizes.
+ // (1) l = d : size(d)
+ // (2) l = d / c : size(d) / c
+ // (3) l = d % c : c
+ Value lvlSz;
+ if (cm == 0) {
+ lvlSz = dimShapesValues[d];
+ if (stt.isDynamicDim(d))
+ lvlSz = builder.create<memref::LoadOp>(loc, dimSizesBuffer,
+ constantIndex(builder, loc, d));
+ if (cf != 0)
+ lvlSz = builder.create<arith::DivUIOp>(loc, lvlSz,
+ constantIndex(builder, loc, cf));
+ } else {
+ lvlSz = constantIndex(builder, loc, cm);
+ }
+ lvlSizesValues[l] = lvlSz;
}
+ // Return buffers.
dim2lvlBuffer = allocaBuffer(builder, loc, dim2lvlValues);
lvl2dimBuffer = allocaBuffer(builder, loc, lvl2dimValues);
return allocaBuffer(builder, loc, lvlSizesValues);
diff --git a/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp
index ee4d6fa0d34b491..ace6ac8152a29cb 100644
--- a/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensor/MapRef.cpp
@@ -7,14 +7,15 @@
//===----------------------------------------------------------------------===//
#include "mlir/ExecutionEngine/SparseTensor/MapRef.h"
+#include "mlir/Dialect/SparseTensor/IR/Enums.h"
mlir::sparse_tensor::MapRef::MapRef(uint64_t d, uint64_t l, const uint64_t *d2l,
const uint64_t *l2d)
: dimRank(d), lvlRank(l), dim2lvl(d2l), lvl2dim(l2d),
isPermutation(isPermutationMap()) {
if (isPermutation) {
- for (uint64_t i = 0; i < dimRank; i++)
- assert(lvl2dim[dim2lvl[i]] == i);
+ for (uint64_t l = 0; l < lvlRank; l++)
+ assert(lvl2dim[dim2lvl[l]] == l);
}
}
@@ -22,11 +23,42 @@ bool mlir::sparse_tensor::MapRef::isPermutationMap() const {
if (dimRank != lvlRank)
return false;
std::vector<bool> seen(dimRank, false);
- for (uint64_t i = 0; i < dimRank; i++) {
- const uint64_t j = dim2lvl[i];
- if (j >= dimRank || seen[j])
+ for (uint64_t l = 0; l < lvlRank; l++) {
+ const uint64_t d = dim2lvl[l];
+ if (d >= dimRank || seen[d])
return false;
- seen[j] = true;
+ seen[d] = true;
}
return true;
}
+
+bool mlir::sparse_tensor::MapRef::isFloor(uint64_t l, uint64_t &i,
+ uint64_t &c) const {
+ if (isEncodedFloor(dim2lvl[l])) {
+ i = decodeIndex(dim2lvl[l]);
+ c = decodeConst(dim2lvl[l]);
+ return true;
+ }
+ return false;
+}
+
+bool mlir::sparse_tensor::MapRef::isMod(uint64_t l, uint64_t &i,
+ uint64_t &c) const {
+ if (isEncodedMod(dim2lvl[l])) {
+ i = decodeIndex(dim2lvl[l]);
+ c = decodeConst(dim2lvl[l]);
+ return true;
+ }
+ return false;
+}
+
+bool mlir::sparse_tensor::MapRef::isMul(uint64_t d, uint64_t &i, uint64_t &c,
+ uint64_t &ii) const {
+ if (isEncodedMul(lvl2dim[d])) {
+ i = decodeIndex(lvl2dim[d]);
+ c = decodeMulc(lvl2dim[d]);
+ ii = decodeMuli(lvl2dim[d]);
+ return true;
+ }
+ return false;
+}
diff --git a/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp b/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp
index f5890ebb6f3ff6f..40805a179d4b385 100644
--- a/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensor/Storage.cpp
@@ -24,8 +24,8 @@ SparseTensorStorageBase::SparseTensorStorageBase( // NOLINT
: dimSizes(dimSizes, dimSizes + dimRank),
lvlSizes(lvlSizes, lvlSizes + lvlRank),
lvlTypes(lvlTypes, lvlTypes + lvlRank),
- dim2lvlVec(dim2lvl, dim2lvl + dimRank),
- lvl2dimVec(lvl2dim, lvl2dim + lvlRank),
+ dim2lvlVec(dim2lvl, dim2lvl + lvlRank),
+ lvl2dimVec(lvl2dim, lvl2dim + dimRank),
map(dimRank, lvlRank, dim2lvlVec.data(), lvl2dimVec.data()) {
assert(dimSizes && lvlSizes && lvlTypes && dim2lvl && lvl2dim);
// Validate dim-indexed parameters.
diff --git a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp
index 36d888a08de6d60..7a6756e689b27be 100644
--- a/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensorRuntime.cpp
@@ -185,8 +185,8 @@ void *_mlir_ciface_newSparseTensor( // NOLINT
const uint64_t dimRank = MEMREF_GET_USIZE(dimSizesRef);
const uint64_t lvlRank = MEMREF_GET_USIZE(lvlSizesRef);
ASSERT_USIZE_EQ(lvlTypesRef, lvlRank);
- ASSERT_USIZE_EQ(dim2lvlRef, dimRank);
- ASSERT_USIZE_EQ(lvl2dimRef, lvlRank);
+ ASSERT_USIZE_EQ(dim2lvlRef, lvlRank);
+ ASSERT_USIZE_EQ(lvl2dimRef, dimRank);
const index_type *dimSizes = MEMREF_GET_PAYLOAD(dimSizesRef);
const index_type *lvlSizes = MEMREF_GET_PAYLOAD(lvlSizesRef);
const DimLevelType *lvlTypes = MEMREF_GET_PAYLOAD(lvlTypesRef);
@@ -423,10 +423,10 @@ void _mlir_ciface_getSparseTensorReaderDimSizes(
ASSERT_NO_STRIDE(cref); \
ASSERT_NO_STRIDE(vref); \
const uint64_t dimRank = reader.getRank(); \
- const uint64_t lvlRank = MEMREF_GET_USIZE(lvl2dimRef); \
+ const uint64_t lvlRank = MEMREF_GET_USIZE(dim2lvlRef); \
const uint64_t cSize = MEMREF_GET_USIZE(cref); \
const uint64_t vSize = MEMREF_GET_USIZE(vref); \
- ASSERT_USIZE_EQ(dim2lvlRef, dimRank); \
+ ASSERT_USIZE_EQ(lvl2dimRef, dimRank); \
assert(cSize >= lvlRank * vSize); \
assert(vSize >= reader.getNSE() && "Not enough space in buffers"); \
(void)dimRank; \
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/block.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/block.mlir
new file mode 100755
index 000000000000000..d0b5e77bd4a724e
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/block.mlir
@@ -0,0 +1,85 @@
+//--------------------------------------------------------------------------------------------------
+// WHEN CREATING A NEW TEST, PLEASE JUST COPY & PASTE WITHOUT EDITS.
+//
+// Set-up that's shared across all tests in this directory. In principle, this
+// config could be moved to lit.local.cfg. However, there are downstream users that
+// do not use these LIT config files. Hence why this is kept inline.
+//
+// DEFINE: %{sparse_compiler_opts} = enable-runtime-library=true
+// DEFINE: %{sparse_compiler_opts_sve} = enable-arm-sve=true %{sparse_compiler_opts}
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler="%{sparse_compiler_opts}"
+// DEFINE: %{compile_sve} = mlir-opt %s --sparse-compiler="%{sparse_compiler_opts_sve}"
+// DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils
+// DEFINE: %{run_opts} = -e entry -entry-point-result=void
+// DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs}
+// DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs}
+//
+// DEFINE: %{env} =
+//--------------------------------------------------------------------------------------------------
+
+// REDEFINE: %{env} = TENSOR0="%mlir_src_dir/test/Integration/data/block.mtx"
+// RUN: %{compile} | env %{env} %{run} | FileCheck %s
+//
+// TODO: enable!
+// Do the same run, but now with direct IR generation.
+// REDEFINE: %{sparse_compiler_opts} = enable-runtime-library=false
+// R_UN: %{compile} | env %{env} %{run} | FileCheck %s
+
+#BSR = #sparse_tensor.encoding<{
+ map = (i, j) ->
+ ( i floordiv 2 : dense
+ , j floordiv 2 : compressed
+ , i mod 2 : dense
+ , j mod 2 : dense
+ )
+}>
+
+!Filename = !llvm.ptr<i8>
+
+//
+// Example 2x2 block storage:
+//
+// +-----+-----+-----+ +-----+-----+-----+
+// | 1 2 | . . | 4 . | | 1 2 | | 4 0 |
+// | . 3 | . . | . 5 | | 0 3 | | 0 5 |
+// +-----+-----+-----+ => +-----+-----+-----+
+// | . . | 6 7 | . . | | | 6 7 | |
+// | . . | 8 . | . . | | | 8 0 | |
+// +-----+-----+-----+ +-----+-----+-----+
+//
+// Stored as:
+//
+// positions[1] : 0 2 3
+// coordinates[1] : 0 2 1
+// values : 1.000000 2.000000 0.000000 3.000000 4.000000 0.000000 0.000000 5.000000 6.000000 7.000000 8.000000 0.000000
+//
+module {
+
+ func.func private @getTensorFilename(index) -> (!Filename)
+
+ func.func @entry() {
+ %c0 = arith.constant 0 : index
+ %f0 = arith.constant 0.0 : f64
+
+ %fileName = call @getTensorFilename(%c0) : (index) -> (!Filename)
+ %A = sparse_tensor.new %fileName : !Filename to tensor<?x?xf64, #BSR>
+
+ // CHECK: ( 0, 2, 3 )
+ // CHECK-NEXT: ( 0, 2, 1 )
+ // CHECK-NEXT: ( 1, 2, 0, 3, 4, 0, 0, 5, 6, 7, 8, 0 )
+ %pos = sparse_tensor.positions %A {level = 1 : index } : tensor<?x?xf64, #BSR> to memref<?xindex>
+ %vecp = vector.transfer_read %pos[%c0], %c0 : memref<?xindex>, vector<3xindex>
+ vector.print %vecp : vector<3xindex>
+ %crd = sparse_tensor.coordinates %A {level = 1 : index } : tensor<?x?xf64, #BSR> to memref<?xindex>
+ %vecc = vector.transfer_read %crd[%c0], %c0 : memref<?xindex>, vector<3xindex>
+ vector.print %vecc : vector<3xindex>
+ %val = sparse_tensor.values %A : tensor<?x?xf64, #BSR> to memref<?xf64>
+ %vecv = vector.transfer_read %val[%c0], %f0 : memref<?xf64>, vector<12xf64>
+ vector.print %vecv : vector<12xf64>
+
+ // Release the resources.
+ bufferization.dealloc_tensor %A: tensor<?x?xf64, #BSR>
+
+ return
+ }
+}
diff --git a/mlir/test/Integration/data/block.mtx b/mlir/test/Integration/data/block.mtx
new file mode 100755
index 000000000000000..9bb3ea7d50a104b
--- /dev/null
+++ b/mlir/test/Integration/data/block.mtx
@@ -0,0 +1,10 @@
+%%MatrixMarket matrix coordinate real general
+4 6 8
+1 1 1.0
+1 2 2.0
+1 5 4.0
+2 2 3.0
+2 6 5.0
+3 3 6.0
+3 4 7.0
+4 3 8.0
More information about the Mlir-commits
mailing list