[Mlir-commits] [mlir] 160d483 - [mlir][sparse] implement loose-compressed/2:4 on direct IR codegen path (#71461)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Mon Nov 6 17:31:00 PST 2023
Author: Aart Bik
Date: 2023-11-06T17:30:56-08:00
New Revision: 160d483b1f3bda51a7ef28fae85920edcc43298e
URL: https://github.com/llvm/llvm-project/commit/160d483b1f3bda51a7ef28fae85920edcc43298e
DIFF: https://github.com/llvm/llvm-project/commit/160d483b1f3bda51a7ef28fae85920edcc43298e.diff
LOG: [mlir][sparse] implement loose-compressed/2:4 on direct IR codegen path (#71461)
Fills in the missing cases for direct IR codegen.
Note that non-permutation handling is still TBD.
Added:
mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_loose.mlir
Modified:
mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
index db969436a30712d..d5c9ee41215ae97 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
@@ -633,8 +633,8 @@ void sparse_tensor::fillDimShape(OpBuilder &builder, Location loc,
SmallVectorImpl<Value> &out) {
out.clear();
out.reserve(stt.getDimRank());
- for (const Size sh : stt.getDimShape()) {
- const auto s = ShapedType::isDynamic(sh) ? 0 : sh;
+ for (const Size sz : stt.getDimShape()) {
+ const auto s = ShapedType::isDynamic(sz) ? 0 : sz;
out.push_back(constantIndex(builder, loc, s));
}
}
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
index 0364c9ac908c219..08c38394a46343a 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
@@ -18,8 +18,6 @@
#include "CodegenUtils.h"
#include "SparseTensorDescriptor.h"
-#include "llvm/Support/FormatVariadic.h"
-
#include "mlir/Dialect/Arith/Utils/Utils.h"
#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
@@ -116,31 +114,36 @@ static void allocSchemeForRank(OpBuilder &builder, Location loc,
const SparseTensorType stt(desc.getRankedTensorType());
Value linear = constantIndex(builder, loc, 1);
const Level lvlRank = stt.getLvlRank();
- for (Level l = startLvl; l < lvlRank; l++) {
- const auto dlt = stt.getLvlType(l);
- if (isCompressedDLT(dlt)) {
+ for (Level lvl = startLvl; lvl < lvlRank; lvl++) {
+ const auto dlt = stt.getLvlType(lvl);
+ if (isCompressedDLT(dlt) || isLooseCompressedDLT(dlt)) {
// Append linear x positions, initialized to zero. Since each compressed
// dimension initially already has a single zero entry, this maintains
- // the desired "linear + 1" length property at all times.
+ // the desired "linear + 1" length property at all times. For loose
+ // compression, we multiply linear by two in order to append both the
+ // lo/hi positions.
Value posZero = constantZero(builder, loc, stt.getPosType());
- createPushback(builder, loc, desc, SparseTensorFieldKind::PosMemRef, l,
- posZero, linear);
+ if (isLooseCompressedDLT(dlt)) {
+ Value two = constantIndex(builder, loc, 2);
+ linear = builder.create<arith::MulIOp>(loc, linear, two);
+ }
+ createPushback(builder, loc, desc, SparseTensorFieldKind::PosMemRef, lvl,
+ /*value=*/posZero, /*repeat=*/linear);
return;
- }
- if (isSingletonDLT(dlt)) {
+ } else if (isSingletonDLT(dlt) || is2OutOf4DLT(dlt)) {
return; // nothing to do
}
// Keep compounding the size, but nothing needs to be initialized
// at this level. We will eventually reach a compressed level or
// otherwise the values array for the from-here "all-dense" case.
assert(isDenseDLT(dlt));
- Value size = desc.getLvlSize(builder, loc, l);
+ Value size = desc.getLvlSize(builder, loc, lvl);
linear = builder.create<arith::MulIOp>(loc, linear, size);
}
// Reached values array so prepare for an insertion.
Value valZero = constantZero(builder, loc, stt.getElementType());
createPushback(builder, loc, desc, SparseTensorFieldKind::ValMemRef,
- std::nullopt, valZero, linear);
+ std::nullopt, /*value=*/valZero, /*repeat=*/linear);
}
/// Creates allocation operation.
@@ -157,12 +160,9 @@ static Value createAllocation(OpBuilder &builder, Location loc,
}
/// Creates allocation for each field in sparse tensor type. Note that
-/// for all dynamic memrefs, the memory size is really the capacity of
-/// the "vector", while the actual size resides in the sizes array.
-///
-/// TODO: for efficiency, we will need heuristics to make educated guesses
-/// on the required capacities (see heuristic variable).
-///
+/// for all dynamic memrefs in the sparse tensor stroage layout, the
+/// memory size is really the capacity of the "vector", while the actual
+/// size resides in the sizes array.
static void createAllocFields(OpBuilder &builder, Location loc,
SparseTensorType stt, ValueRange dynSizes,
bool enableInit, SmallVectorImpl<Value> &fields,
@@ -206,6 +206,8 @@ static void createAllocFields(OpBuilder &builder, Location loc,
constantIndex(builder, loc, 16);
}
+ // Initializes all fields. An initial storage specifier and allocated
+ // positions/coordinates/values memrefs (with heuristic capacity).
foreachFieldAndTypeInSparseTensor(
stt,
[&builder, &fields, stt, loc, posHeuristic, crdHeuristic, valHeuristic,
@@ -218,14 +220,16 @@ static void createAllocFields(OpBuilder &builder, Location loc,
field = SparseTensorSpecifier::getInitValue(builder, loc, stt);
break;
case SparseTensorFieldKind::PosMemRef:
+ field = createAllocation(builder, loc, cast<MemRefType>(fType),
+ posHeuristic, enableInit);
+ break;
case SparseTensorFieldKind::CrdMemRef:
+ field = createAllocation(builder, loc, cast<MemRefType>(fType),
+ crdHeuristic, enableInit);
+ break;
case SparseTensorFieldKind::ValMemRef:
- field = createAllocation(
- builder, loc, cast<MemRefType>(fType),
- (fKind == SparseTensorFieldKind::PosMemRef) ? posHeuristic
- : (fKind == SparseTensorFieldKind::CrdMemRef) ? crdHeuristic
- : valHeuristic,
- enableInit);
+ field = createAllocation(builder, loc, cast<MemRefType>(fType),
+ valHeuristic, enableInit);
break;
}
assert(field);
@@ -234,21 +238,19 @@ static void createAllocFields(OpBuilder &builder, Location loc,
return true;
});
+ // Initialize the storage scheme to an empty tensor. Sets the lvlSizes
+ // and gives all position fields an initial zero entry, so that it is
+ // easier to maintain the "linear + 1" length property.
MutSparseTensorDescriptor desc(stt, fields);
-
- // Initialize the storage scheme to an empty tensor. Initialized memSizes
- // to all zeros, sets the dimSizes to known values and gives all position
- // fields an initial zero entry, so that it is easier to maintain the
- // "linear + 1" length property.
Value posZero = constantZero(builder, loc, stt.getPosType());
- for (Level lvlRank = stt.getLvlRank(), l = 0; l < lvlRank; l++) {
- // Fills dim sizes array.
+ for (Level lvl = 0, lvlRank = stt.getLvlRank(); lvl < lvlRank; lvl++) {
// FIXME: `toOrigDim` is deprecated.
- desc.setLvlSize(builder, loc, l, dimSizes[toOrigDim(stt.getEncoding(), l)]);
- // Pushes a leading zero to positions memref.
- if (stt.isCompressedLvl(l))
- createPushback(builder, loc, desc, SparseTensorFieldKind::PosMemRef, l,
- posZero);
+ desc.setLvlSize(builder, loc, lvl,
+ dimSizes[toOrigDim(stt.getEncoding(), lvl)]);
+ const auto dlt = stt.getLvlType(lvl);
+ if (isCompressedDLT(dlt) || isLooseCompressedDLT(dlt))
+ createPushback(builder, loc, desc, SparseTensorFieldKind::PosMemRef, lvl,
+ /*value=*/posZero);
}
allocSchemeForRank(builder, loc, desc, /*rank=*/0);
}
@@ -347,7 +349,7 @@ static Value genCompressed(OpBuilder &builder, Location loc,
Value mszp1 = builder.create<arith::AddIOp>(loc, msz, one);
genStore(builder, loc, mszp1, positionsAtLvl, pp1);
createPushback(builder, loc, desc, SparseTensorFieldKind::CrdMemRef, lvl,
- lvlCoords[lvl]);
+ /*value=*/lvlCoords[lvl]);
// Prepare the next level "as needed".
if ((lvl + 1) < lvlRank)
allocSchemeForRank(builder, loc, desc, lvl + 1);
@@ -371,8 +373,6 @@ static void genEndInsert(OpBuilder &builder, Location loc,
const Level lvlRank = stt.getLvlRank();
for (Level l = 0; l < lvlRank; l++) {
const auto dlt = stt.getLvlType(l);
- if (isLooseCompressedDLT(dlt))
- llvm_unreachable("TODO: Not yet implemented");
if (isCompressedDLT(dlt)) {
// Compressed dimensions need a position cleanup for all entries
// that were not visited during the insertion pass.
@@ -407,7 +407,8 @@ static void genEndInsert(OpBuilder &builder, Location loc,
builder.setInsertionPointAfter(loop);
}
} else {
- assert(isDenseDLT(dlt) || isSingletonDLT(dlt));
+ assert(isDenseDLT(dlt) || isLooseCompressedDLT(dlt) ||
+ isSingletonDLT(dlt) || is2OutOf4DLT(dlt));
}
}
}
@@ -483,33 +484,37 @@ class SparseInsertGenerator
Value value = args.back();
Value parentPos = constantZero(builder, loc, builder.getIndexType());
// Generate code for every level.
- for (Level l = 0; l < lvlRank; l++) {
- const auto dlt = stt.getLvlType(l);
- if (isCompressedDLT(dlt)) {
+ for (Level lvl = 0; lvl < lvlRank; lvl++) {
+ const auto dlt = stt.getLvlType(lvl);
+ if (isCompressedDLT(dlt) || isLooseCompressedDLT(dlt)) {
// Create:
// if (!present) {
- // coordinates[l].push_back(coords[l])
- // <update positions and prepare level l + 1>
+ // coordinates[lvl].push_back(coords[lvl])
+ // <update positions and prepare level lvl + 1>
// }
- // positions[l] = coordinates.size() - 1
- // <insert @ positions[l] at next level l + 1>
+ // positions[lvl] = coordinates.size() - 1
+ // <insert @ positions[lvl] at next level lvl + 1>
+ if (isLooseCompressedDLT(dlt)) {
+ Value two = constantIndex(builder, loc, 2);
+ parentPos = builder.create<arith::MulIOp>(loc, parentPos, two);
+ }
parentPos =
- genCompressed(builder, loc, desc, coords, value, parentPos, l);
- } else if (isSingletonDLT(dlt)) {
+ genCompressed(builder, loc, desc, coords, value, parentPos, lvl);
+ } else if (isSingletonDLT(dlt) || is2OutOf4DLT(dlt)) {
// Create:
- // coordinates[l].push_back(coords[l])
- // positions[l] = positions[l-1]
- // <insert @ positions[l] at next level l + 1>
- createPushback(builder, loc, desc, SparseTensorFieldKind::CrdMemRef, l,
- coords[l]);
+ // coordinates[lvl].push_back(coords[lvl])
+ // positions[lvl] = positions[lvl-1]
+ // <insert @ positions[lvl] at next level lvl + 1>
+ createPushback(builder, loc, desc, SparseTensorFieldKind::CrdMemRef,
+ lvl, /*value=*/coords[lvl]);
} else {
assert(isDenseDLT(dlt));
// Construct the new position as:
- // positions[l] = size * positions[l-1] + coords[l]
- // <insert @ positions[l] at next level l + 1>
- Value size = desc.getLvlSize(builder, loc, l);
+ // positions[lvl] = size * positions[lvl-1] + coords[lvl]
+ // <insert @ positions[lvl] at next level lvl + 1>
+ Value size = desc.getLvlSize(builder, loc, lvl);
Value mult = builder.create<arith::MulIOp>(loc, size, parentPos);
- parentPos = builder.create<arith::AddIOp>(loc, mult, coords[l]);
+ parentPos = builder.create<arith::AddIOp>(loc, mult, coords[lvl]);
}
}
// Reached the actual value append/insert.
@@ -526,7 +531,6 @@ class SparseInsertGenerator
// <namePrefix>_<DLT>_<shape>_<ordering>_<eltType>_<crdWidth>_<posWidth>
constexpr const char kInsertFuncNamePrefix[] = "_insert_";
const SparseTensorType stt(llvm::cast<RankedTensorType>(rtp));
-
SmallString<32> nameBuffer;
llvm::raw_svector_ostream nameOstream(nameBuffer);
nameOstream << kInsertFuncNamePrefix;
@@ -543,8 +547,8 @@ class SparseInsertGenerator
// Static dim sizes are used in the generated code while dynamic sizes are
// loaded from the dimSizes buffer. This is the reason for adding the shape
// to the function name.
- for (const auto sh : stt.getDimShape())
- nameOstream << sh << "_";
+ for (const auto sz : stt.getDimShape())
+ nameOstream << sz << "_";
// Permutation information is also used in generating insertion.
if (!stt.isIdentity())
nameOstream << stt.getDimToLvl() << "_";
@@ -607,7 +611,6 @@ class SparseCallConverter : public OpConversionPattern<func::CallOp> {
assert(retOffset < newCall.getNumResults());
auto retType = ret.getType();
if (failed(typeConverter->convertType(retType, sparseFlat)))
- // This should never happen.
llvm_unreachable("Failed to convert type in sparse tensor codegen");
// Converted types can not be empty when the type conversion succeed.
@@ -755,9 +758,7 @@ class SparseTensorAllocConverter
const auto resType = getSparseTensorType(op);
if (!resType.hasEncoding())
return failure();
-
- // Construct allocation for each field.
- const Location loc = op.getLoc();
+ Location loc = op.getLoc();
if (op.getCopy()) {
auto desc = getDescriptorFromTensorTuple(adaptor.getCopy());
SmallVector<Value> fields;
@@ -778,18 +779,18 @@ class SparseTensorAllocConverter
return success();
}
- const Value sizeHint = op.getSizeHint();
- const ValueRange dynSizes = adaptor.getDynamicSizes();
+ // Construct allocation for each field.
+ Value sizeHint = op.getSizeHint();
+ ValueRange dynSizes = adaptor.getDynamicSizes();
const size_t found = dynSizes.size();
const int64_t expected = resType.getNumDynamicDims();
if (found != static_cast<size_t>(expected))
- return rewriter.notifyMatchFailure(
- op, llvm::formatv(
- "Got wrong number of dynamic sizes: Found={0}, Expected={1}",
- found, expected));
+ return rewriter.notifyMatchFailure(op,
+ "Got wrong number of dynamic sizes");
SmallVector<Value> fields;
createAllocFields(rewriter, loc, resType, dynSizes,
enableBufferInitialization, fields, sizeHint);
+
// Replace operation with resulting memrefs.
rewriter.replaceOp(op, genTuple(rewriter, loc, resType, fields));
return success();
@@ -817,19 +818,18 @@ class SparseTensorEmptyConverter : public OpConversionPattern<tensor::EmptyOp> {
return failure();
// Construct allocation for each field.
- const Location loc = op.getLoc();
- const Value sizeHint; // none
+ Location loc = op.getLoc();
+ Value sizeHint; // none
const ValueRange dynSizes = adaptor.getDynamicSizes();
const size_t found = dynSizes.size();
const int64_t expected = resType.getNumDynamicDims();
if (found != static_cast<size_t>(expected))
- return rewriter.notifyMatchFailure(
- op, llvm::formatv(
- "Got wrong number of dynamic sizes: Found={0}, Expected={1}",
- found, expected));
+ return rewriter.notifyMatchFailure(op,
+ "Got wrong number of dynamic sizes");
SmallVector<Value> fields;
createAllocFields(rewriter, loc, resType, dynSizes,
enableBufferInitialization, fields, sizeHint);
+
// Replace operation with resulting memrefs.
rewriter.replaceOp(op, genTuple(rewriter, loc, resType, fields));
return success();
@@ -1496,7 +1496,6 @@ struct SparseNewConverter : public OpConversionPattern<NewOp> {
SmallVector<Value> fields;
createAllocFields(rewriter, loc, dstTp, dynSizes, /*enableInit=*/false,
fields, nse);
- MutSparseTensorDescriptor desc(dstTp, fields);
// Now construct the dim2lvl and lvl2dim buffers.
Value dim2lvlBuffer;
@@ -1505,6 +1504,7 @@ struct SparseNewConverter : public OpConversionPattern<NewOp> {
dim2lvlBuffer, lvl2dimBuffer);
// Read the COO tensor data.
+ MutSparseTensorDescriptor desc(dstTp, fields);
Value xs = desc.getAOSMemRef();
Value ys = desc.getValMemRef();
const Type boolTp = rewriter.getIntegerType(1);
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
index e9d4005feaee389..4fe9c59d8c320a7 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
@@ -380,14 +380,13 @@ class SparseTensorAllocConverter
LogicalResult
matchAndRewrite(bufferization::AllocTensorOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override {
- if (op.getCopy())
- return rewriter.notifyMatchFailure(op,
- "sparse tensor copy not implemented");
- Location loc = op.getLoc();
const auto stt = getSparseTensorType(op);
if (!stt.hasEncoding())
return failure();
+ if (op.getCopy())
+ return rewriter.notifyMatchFailure(op, "alloc copy not implemented");
// Gather all dimension sizes as SSA values.
+ Location loc = op.getLoc();
const Dimension dimRank = stt.getDimRank();
SmallVector<Value> dimSizes;
dimSizes.reserve(dimRank);
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_loose.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_loose.mlir
new file mode 100755
index 000000000000000..39d9b40439bcfb0
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_loose.mlir
@@ -0,0 +1,61 @@
+//--------------------------------------------------------------------------------------------------
+// WHEN CREATING A NEW TEST, PLEASE JUST COPY & PASTE WITHOUT EDITS.
+//
+// Set-up that's shared across all tests in this directory. In principle, this
+// config could be moved to lit.local.cfg. However, there are downstream users that
+// do not use these LIT config files. Hence why this is kept inline.
+//
+// DEFINE: %{sparse_compiler_opts} = enable-runtime-library=true
+// DEFINE: %{sparse_compiler_opts_sve} = enable-arm-sve=true %{sparse_compiler_opts}
+// DEFINE: %{compile} = mlir-opt %s --sparse-compiler="%{sparse_compiler_opts}"
+// DEFINE: %{compile_sve} = mlir-opt %s --sparse-compiler="%{sparse_compiler_opts_sve}"
+// DEFINE: %{run_libs} = -shared-libs=%mlir_c_runner_utils,%mlir_runner_utils
+// DEFINE: %{run_opts} = -e entry -entry-point-result=void
+// DEFINE: %{run} = mlir-cpu-runner %{run_opts} %{run_libs}
+// DEFINE: %{run_sve} = %mcr_aarch64_cmd --march=aarch64 --mattr="+sve" %{run_opts} %{run_libs}
+//
+// DEFINE: %{env} =
+//--------------------------------------------------------------------------------------------------
+
+// RUN: %{compile} | env %{env} %{run} | FileCheck %s
+//
+// Do the same run, but now with direct IR generation.
+// REDEFINE: %{sparse_compiler_opts} = enable-runtime-library=false
+// RUN: %{compile} | env %{env} %{run} | FileCheck %s
+
+#CSR_hi = #sparse_tensor.encoding<{
+ map = (i, j) -> ( i : dense, j : loose_compressed)
+}>
+
+module {
+ func.func @entry() {
+ %c0 = arith.constant 0 : index
+ %f0 = arith.constant 0.0 : f64
+ %d = arith.constant dense<[[ 1.0, 2.0, 3.0, 4.0 ],
+ [ 5.0, 6.0, 7.0, 8.0 ],
+ [ 0.0, 0.0, 5.5, 0.0 ],
+ [ 9.0, 10.0, 11.0, 12.0 ],
+ [13.0, 14.0, 15.0, 16.0 ]]> : tensor<5x4xf64>
+ %s = sparse_tensor.convert %d : tensor<5x4xf64> to tensor<5x4xf64, #CSR_hi>
+
+ //
+ // CHECK: ( 0, 4, 4, 8, 8, 9, 9, 13 )
+ // CHECK-NEXT: ( 0, 1, 2, 3, 0, 1, 2, 3, 2, 0, 1, 2, 3, 0, 1, 2, 3 )
+ // CHECK-NEXT: ( 1, 2, 3, 4, 5, 6, 7, 8, 5.5, 9, 10, 11, 12, 13, 14, 15, 16 )
+ //
+ %pos = sparse_tensor.positions %s {level = 1 : index } : tensor<5x4xf64, #CSR_hi> to memref<?xindex>
+ %vecp = vector.transfer_read %pos[%c0], %c0 : memref<?xindex>, vector<8xindex>
+ vector.print %vecp : vector<8xindex>
+ %crd = sparse_tensor.coordinates %s {level = 1 : index } : tensor<5x4xf64, #CSR_hi> to memref<?xindex>
+ %vecc = vector.transfer_read %crd[%c0], %c0 : memref<?xindex>, vector<17xindex>
+ vector.print %vecc : vector<17xindex>
+ %val = sparse_tensor.values %s : tensor<5x4xf64, #CSR_hi> to memref<?xf64>
+ %vecv = vector.transfer_read %val[%c0], %f0 : memref<?xf64>, vector<17xf64>
+ vector.print %vecv : vector<17xf64>
+
+ // Release the resources.
+ bufferization.dealloc_tensor %s: tensor<5x4xf64, #CSR_hi>
+
+ return
+ }
+}
More information about the Mlir-commits
mailing list