[Mlir-commits] [mlir] a7bf2e5 - [mlir][sparse] refactoring isAdmissibleTensorExp into codegen
Peiming Liu
llvmlistbot at llvm.org
Fri Jan 27 11:37:29 PST 2023
Author: Peiming Liu
Date: 2023-01-27T19:37:23Z
New Revision: a7bf2e558feb954396c93fd3fa740775944a179b
URL: https://github.com/llvm/llvm-project/commit/a7bf2e558feb954396c93fd3fa740775944a179b
DIFF: https://github.com/llvm/llvm-project/commit/a7bf2e558feb954396c93fd3fa740775944a179b.diff
LOG: [mlir][sparse] refactoring isAdmissibleTensorExp into codegen
This patch moves some utils into CodegenEnv class, it should make the code easier to follow and it eliminates several indirect value assignment that use `ptr**`.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D142040
Added:
Modified:
mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.cpp
mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h
mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.cpp
index d844999f553f..4a8fbf76bd56 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.cpp
@@ -7,11 +7,25 @@
//===----------------------------------------------------------------------===//
#include "CodegenEnv.h"
+
+#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
+#include "mlir/Dialect/Linalg/Utils/Utils.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include <optional>
using namespace mlir;
using namespace mlir::sparse_tensor;
+//===----------------------------------------------------------------------===//
+// Code generation environment helper functions
+//===----------------------------------------------------------------------===//
+
+/// Returns true if tensor materializes uninitialized into the computation.
+static bool isMaterializing(Value val) {
+ return val.getDefiningOp<tensor::EmptyOp>() ||
+ val.getDefiningOp<bufferization::AllocTensorOp>();
+}
+
//===----------------------------------------------------------------------===//
// Code generation environment constructor and general methods
//===----------------------------------------------------------------------===//
@@ -25,11 +39,18 @@ CodegenEnv::CodegenEnv(linalg::GenericOp linop, SparsificationOptions opts,
expFilled(), expAdded(), expCount(), redVal(), redExp(-1u),
redCustom(-1u) {}
-void CodegenEnv::startEmit(OpOperand *so, unsigned lv) {
- assert(sparseOut == nullptr && insChain == nullptr &&
- "must only start emitting once");
- sparseOut = so;
- outerParNest = lv;
+LogicalResult CodegenEnv::initTensorExp() {
+ // Builds the tensor expression for the Linalg operation in SSA form.
+ std::optional<unsigned> optExp = latticeMerger.buildTensorExpFromLinalg(op());
+ if (!optExp || !isAdmissibleTensorExp(*optExp))
+ return failure();
+
+ tensorExp = *optExp;
+ return success();
+}
+
+void CodegenEnv::startEmit() {
+ assert(insChain == nullptr && "must only start emitting once");
if (sparseOut) {
insChain = sparseOut->get();
latticeMerger.setHasSparseOut(true);
@@ -66,6 +87,71 @@ std::optional<Operation *> CodegenEnv::genLoopBoundary(
return r;
}
+//===----------------------------------------------------------------------===//
+// Code generation environment verify functions.
+//===----------------------------------------------------------------------===//
+
+bool CodegenEnv::isAdmissibleTensorExp(unsigned exp) {
+ // We reject any expression that makes a reduction from `-outTensor`, as those
+ // expressions create a dependency between the current iteration (i) and the
+ // previous iteration (i-1). It would require iterating over the whole
+ // coordinate space, which prevent exploiting sparsity for faster code.
+ for (utils::IteratorType it : linalgOp.getIteratorTypesArray()) {
+ if (it == utils::IteratorType::reduction) {
+ if (latticeMerger.hasNegateOnOut(exp))
+ return false;
+ break;
+ }
+ }
+
+ OpOperand *lhs = linalgOp.getDpsInitOperand(0);
+ unsigned tensor = lhs->getOperandNumber();
+ auto enc = getSparseTensorEncoding(lhs->get().getType());
+ // An non-annotated output tensor is assumed dense, and becomes a random
+ // access n-dim memref. Admissible since insertions cannot occur.
+ if (!enc || enc.isAllDense())
+ return true;
+
+ // A tensor expression with a sparse output tensor that changes its values
+ // but not its nonzero structure, an operation called "simply dynamic" in
+ // [Bik96,Ch9], is also admissible without special env.
+ if (latticeMerger.isSingleCondition(tensor, exp))
+ return true;
+
+ // Accept "truly dynamic" if the output tensor materializes uninitialized
+ // into the computation and insertions occur in lexicographic index order.
+ sparseOut = lhs;
+ return isMaterializing(lhs->get());
+}
+
+bool CodegenEnv::isAdmissibleTopoOrder() {
+ if (!hasSparseOutput())
+ return true;
+
+ OpOperand *lhs = linalgOp.getDpsInitOperand(0);
+ // Accept "truly dynamic" if the output tensor materializes uninitialized
+ // into the computation and insertions occur in lexicographic index order.
+ unsigned nest = 0;
+ auto iteratorTypes = linalgOp.getIteratorTypesArray();
+ for (unsigned i = 0, e = latticeMerger.getNumLoops(); i < e; i++) {
+ if (!latticeMerger.isFilterLoop(topSortAt(i))) {
+ // We only count non-filter loops as filter loops should be considered
+ // as a special type of parallel loops.
+ if (linalg::isReductionIterator(iteratorTypes[topSortAt(i)]))
+ break; // terminate at first reduction
+ nest++;
+ }
+ }
+ // Determine admissible dynamic insertion situations:
+ // (1) fully injective, since there are no reductions,
+ // (2) admissible 1-d expansion in innermost dimension.
+ if (nest >= linalgOp.getRank(lhs) - 1) {
+ outerParNest = nest;
+ return true;
+ }
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// Code generation environment topological sort methods
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h
index 853ee4dc51fd..6f16d8f8daef 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h
@@ -44,12 +44,15 @@ class CodegenEnv {
// General methods.
//
+ LogicalResult initTensorExp();
+ unsigned getTensorExp() const { return tensorExp; }
+
linalg::GenericOp op() const { return linalgOp; }
const SparsificationOptions &options() const { return sparseOptions; }
Merger &merger() { return latticeMerger; }
LoopEmitter &emitter() { return loopEmitter; }
- void startEmit(OpOperand *so, unsigned lv);
+ void startEmit();
/// Generates loop boundary statements (entering/exiting loops). The function
/// passes and updates the passed-in parameters.
@@ -72,6 +75,18 @@ class CodegenEnv {
return latticeMerger.getDimLevelType(b);
}
+ //
+ // Code generation environment verify functions.
+ //
+
+ /// Whether the tensor expression is admissible for codegen.
+ /// It also sets the sparseOut if the output tensor is sparse.
+ bool isAdmissibleTensorExp(unsigned exp);
+
+ /// Whether the iteration graph is sorted in admissible topoOrder.
+ /// Sets outerParNest on success with sparse output
+ bool isAdmissibleTopoOrder();
+
//
// Topological delegate and sort methods.
//
@@ -156,6 +171,9 @@ class CodegenEnv {
Value redVal;
unsigned redExp;
unsigned redCustom;
+
+ // The root tensor expression of the kernel.
+ unsigned tensorExp;
};
} // namespace sparse_tensor
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
index c31f20e4582f..72063a16c7f0 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
@@ -250,6 +250,16 @@ static unsigned getNumCompoundAffineOnSparseDims(linalg::GenericOp op) {
return num;
}
+static bool hasCompoundAffineOnSparseOut(linalg::GenericOp op) {
+ OpOperand *out = op.getDpsInitOperand(0);
+ auto enc = getSparseTensorEncoding(out->get().getType());
+ if (!enc || enc.isAllDense())
+ return false;
+
+ return getNumCompoundAffineOnSparseDims(op.getMatchingIndexingMap(out),
+ out->get());
+}
+
/// Helper method to inspect sparse encodings in the tensor types.
/// Fills the per-dimension sparsity information for all tensors.
/// Returns true if the sparse annotations and affine subscript
@@ -496,7 +506,7 @@ static bool computeIterationGraph(CodegenEnv &env, unsigned mask,
// will be skipped more often.
if (mask & SortMask::kIncludeUndef) {
unsigned tensor = t.getOperandNumber();
- for (unsigned i = 0; i < n; i++)
+ for (unsigned i = 0; i < n; i++) {
if (isCompressedDLT(env.dlt(tensor, i)) ||
isSingletonDLT(env.dlt(tensor, i))) {
for (unsigned j = 0; j < n; j++)
@@ -508,6 +518,7 @@ static bool computeIterationGraph(CodegenEnv &env, unsigned mask,
assert(isDenseDLT(env.dlt(tensor, i)) ||
isUndefDLT(env.dlt(tensor, i)));
}
+ }
}
}
// Topologically sort the iteration graph to determine loop order.
@@ -516,92 +527,6 @@ static bool computeIterationGraph(CodegenEnv &env, unsigned mask,
return topSortOptimal(env, n, iteratorTypes, inDegree, adjM);
}
-/// Returns true if tensor materializes uninitialized into the computation.
-static bool isMaterializing(Value val) {
- return val.getDefiningOp<tensor::EmptyOp>() ||
- val.getDefiningOp<bufferization::AllocTensorOp>();
-}
-
-/// Returns true when the tensor expression is admissible for codegen.
-/// Since all sparse input tensors are admissible, we just need to check
-/// whether the out tensor in the tensor expression codegen is admissible.
-/// Sets `sparseOut` to the tensor and `outerParNest` to the outer injective
-/// nesting depth when a "truly dynamic" sparse tensor output occurs.
-static bool isAdmissibleTensorExp(CodegenEnv &env, unsigned exp,
- OpOperand **sparseOut,
- unsigned *outerParNest) {
- // We reject any expression that makes a reduction from `-outTensor`, as those
- // expressions create a dependency between the current iteration (i) and the
- // previous iteration (i-1). It would require iterating over the whole
- // coordinate space, which prevent exploiting sparsity for faster code.
- for (utils::IteratorType it : env.op().getIteratorTypesArray()) {
- if (it == utils::IteratorType::reduction) {
- if (env.merger().hasNegateOnOut(exp))
- return false;
- break;
- }
- }
-
- OpOperand *lhs = env.op().getDpsInitOperand(0);
- unsigned tensor = lhs->getOperandNumber();
- auto enc = getSparseTensorEncoding(lhs->get().getType());
- // An non-annotated output tensor is assumed dense, and becomes a random
- // access n-dim memref. Admissible since insertions cannot occur.
- if (!enc)
- return true;
- // An all-dense annotated "sparse" output tensor becomes a linearized random
- // access 1-dim memref. Also admissible since insertions cannot occur.
- bool allDense = true;
- unsigned numLoops =
- env.merger().getNumLoops(); // numNativeLoops + numFilterLoops
- for (unsigned i = 0; i < env.merger().getNumLoops(); i++)
- if (isCompressedDLT(env.dlt(tensor, i)) ||
- isSingletonDLT(env.dlt(tensor, i))) {
- allDense = false;
- break;
- } else {
- assert(isDenseDLT(env.dlt(tensor, i)) || isUndefDLT(env.dlt(tensor, i)));
- }
- if (allDense)
- return true;
-
- // TODO: support compound affine expression on sparse output.
- if (getNumCompoundAffineOnSparseDims(env.op().getMatchingIndexingMap(lhs),
- lhs->get()) != 0)
- return false;
-
- // A tensor expression with a sparse output tensor that changes its values
- // but not its nonzero structure, an operation called "simply dynamic" in
- // [Bik96,Ch9], is also admissible without special env.
- if (env.merger().isSingleCondition(tensor, exp))
- return true;
-
- // Accept "truly dynamic" if the output tensor materializes uninitialized
- // into the computation and insertions occur in lexicographic index order.
- if (isMaterializing(lhs->get())) {
- auto iteratorTypes = env.op().getIteratorTypesArray();
- unsigned nest = 0;
- for (unsigned i = 0; i < numLoops; i++) {
- if (!env.merger().isFilterLoop(env.topSortAt(i))) {
- // We only count non-filter loops as filter loops should be considered
- // as a special type of parallel loops.
- if (linalg::isReductionIterator(iteratorTypes[env.topSortAt(i)]))
- break; // terminate at first reduction
- nest++;
- }
- }
- // Determine admissible dynamic insertion situations:
- // (1) fully injective, since there are no reductions,
- // (2) admissible 1-d expansion in innermost dimension.
- if (nest >= env.op().getRank(lhs) - 1) {
- *sparseOut = lhs;
- *outerParNest = nest;
- return true;
- }
- }
- return false;
-}
-
//===----------------------------------------------------------------------===//
// Sparse compiler synthesis methods (statements and expressions).
//===----------------------------------------------------------------------===//
@@ -1271,72 +1196,73 @@ static bool translateBitsToTidDimPairs(
unsigned numloopCond = 0;
// Converts bits to array + dim pair
- env.merger().foreachTidDimPairInBits(all, [&,
- idx](unsigned b, unsigned tid,
- std::optional<unsigned> dim,
- DimLevelType dlt) {
- if (simple.test(b)) {
- if (isUndefDLT(dlt)) {
- // An undefined dlt in the lattices, we probably mean to iterate based
- // on the dim of output tensor.
- // E.g., this could be a synthetic tensor (for invariants and sparse
- // output tensor).
- // out[i][j] = invariant; or a broadcast
- // out[i][j] = in[i] (j is undef for input)
- tid = env.merger().getOutTensorID();
- dim = env.merger().getDimNum(tid, idx);
- // Skips invalid dim (e.g., when this is a zero ranked tensor).
- if (!dim)
- return;
- }
- tids.push_back(tid);
- dims.push_back(*dim);
- numloopCond++;
- } else if (isDenseDLT(dlt)) {
- tids.push_back(tid);
- dims.push_back(*dim);
- } else {
- assert(isUndefDLT(dlt));
- linalg::GenericOp op = env.op();
- if (tid >= op.getNumDpsInputs())
- // We only handle affine expression on input tensors (for now).
- return;
- OpOperand *operand = &op->getOpOperand(tid);
- auto enc = getSparseTensorEncoding(operand->get().getType());
- // Non-annotated dense tensors requires no special handling.
- if (!enc)
- return;
-
- ArrayRef<AffineExpr> affines =
- op.getMatchingIndexingMap(operand).getResults();
- assert(affines.size() == enc.getDimLevelType().size());
- for (unsigned i = 0, e = affines.size(); i < e; i++) {
- AffineExpr exp = affines[toOrigDim(enc, i)];
- // Skip simple affine expression and non dense dimensions (which has
- // it own filter loop).
- if (exp.isa<AffineDimExpr>() || !isDenseDLT(getDimLevelType(enc, i)))
- continue;
-
- // Constant affine expression are handled in genLoop
- if (!exp.isa<AffineConstantExpr>()) {
- bool atLevel = false;
- if (isInvariantAffine(env, exp, idx, atLevel) && atLevel) {
- // If the compound affine is invariant and we are right at the
- // level. We need to generate the address according to the affine
- // expression. This is also the best place we can do it to avoid
- // putting it inside inner loops.
- // NOTE: It assumes that the levels of the input tensor are
- // initialized in order (and it is also currently guaranteed by
- // computeIterationGraph), another more admissible approach might be
- // accepting out-of-order access between consecutive dense levels.
- affineTids.push_back(tid);
- affineDims.push_back(i);
- exps.push_back(exp);
+ env.merger().foreachTidDimPairInBits(
+ all, [&, idx](unsigned b, unsigned tid, std::optional<unsigned> dim,
+ DimLevelType dlt) {
+ if (simple.test(b)) {
+ if (isUndefDLT(dlt)) {
+ // An undefined dlt in the lattices, we probably mean to iterate
+ // based on the dim of output tensor.
+ // E.g., this could be a synthetic tensor (for invariants and sparse
+ // output tensor).
+ // out[i][j] = invariant; or a broadcast
+ // out[i][j] = in[i] (j is undef for input)
+ tid = env.merger().getOutTensorID();
+ dim = env.merger().getDimNum(tid, idx);
+ // Skips invalid dim (e.g., when this is a zero ranked tensor).
+ if (!dim)
+ return;
+ }
+ tids.push_back(tid);
+ dims.push_back(*dim);
+ numloopCond++;
+ } else if (isDenseDLT(dlt)) {
+ tids.push_back(tid);
+ dims.push_back(*dim);
+ } else {
+ assert(isUndefDLT(dlt));
+ linalg::GenericOp op = env.op();
+ if (tid >= op.getNumDpsInputs())
+ // We only handle affine expression on input tensors (for now).
+ return;
+ OpOperand *operand = &op->getOpOperand(tid);
+ auto enc = getSparseTensorEncoding(operand->get().getType());
+ // Non-annotated dense tensors requires no special handling.
+ if (!enc)
+ return;
+
+ ArrayRef<AffineExpr> affines =
+ op.getMatchingIndexingMap(operand).getResults();
+ assert(affines.size() == enc.getDimLevelType().size());
+ for (unsigned i = 0, e = affines.size(); i < e; i++) {
+ AffineExpr exp = affines[toOrigDim(enc, i)];
+ // Skip simple affine expression and non dense dimensions (which has
+ // it own filter loop).
+ if (exp.isa<AffineDimExpr>() ||
+ !isDenseDLT(getDimLevelType(enc, i)))
+ continue;
+
+ // Constant affine expression are handled in genLoop
+ if (!exp.isa<AffineConstantExpr>()) {
+ bool atLevel = false;
+ if (isInvariantAffine(env, exp, idx, atLevel) && atLevel) {
+ // If the compound affine is invariant and we are right at the
+ // level. We need to generate the address according to the
+ // affine expression. This is also the best place we can do it
+ // to avoid putting it inside inner loops.
+ // NOTE: It assumes that the levels of the input tensor are
+ // initialized in order (and it is also currently guaranteed by
+ // computeIterationGraph), another more admissible approach
+ // might be accepting out-of-order access between consecutive
+ // dense levels.
+ affineTids.push_back(tid);
+ affineDims.push_back(i);
+ exps.push_back(exp);
+ }
+ }
}
}
- }
- }
- });
+ });
if (isDenseDLT(env.dlt(env.merger().getOutTensorID(), idx))) {
// Note that we generate dense indices of the output tensor
@@ -1513,8 +1439,9 @@ struct GenericOpSparsifier : public OpRewritePattern<linalg::GenericOp> {
LogicalResult matchAndRewrite(linalg::GenericOp op,
PatternRewriter &rewriter) const override {
- // Only accept single output operations.
- if (op.getNumDpsInits() != 1)
+ // Only accept single output operations without affine index on sparse
+ // output.
+ if (op.getNumDpsInits() != 1 || hasCompoundAffineOnSparseOut(op))
return failure();
// Sets up a code generation environment.
@@ -1528,11 +1455,10 @@ struct GenericOpSparsifier : public OpRewritePattern<linalg::GenericOp> {
if (!findSparseAnnotations(env))
return failure();
- // Builds the tensor expression for the Linalg operation in SSA form.
- std::optional<unsigned> optExp = env.merger().buildTensorExpFromLinalg(op);
- if (!optExp)
+ // Constructs the tensor expressions tree from `op`, returns failure if the
+ // tree can not be built or the tensor expression is inadmissible.
+ if (failed(env.initTensorExp()))
return failure();
- unsigned exp = *optExp;
// Computes a topologically sorted iteration graph to ensure tensors
// are visited in natural index order. Gradually relaxes the considered
@@ -1541,31 +1467,31 @@ struct GenericOpSparsifier : public OpRewritePattern<linalg::GenericOp> {
// to resolve cycles by inserting a conversion.
bool isAdmissible = false;
bool hasCycle = true;
- OpOperand *sparseOut = nullptr;
- unsigned outerParNest = -1u;
+
// An const list of all masks that we used for interation graph
// computation. Must be ordered from more strict to less strict.
const auto allMask = {SortMask::kIncludeAll, SortMask::kIncludeUndef,
SortMask::kIncludeDense, SortMask::kSparseOnly};
- for (auto mask : allMask)
+ for (auto mask : allMask) {
if (computeIterationGraph(env, mask)) {
hasCycle = false;
- if (isAdmissibleTensorExp(env, exp, &sparseOut, &outerParNest)) {
+ if (env.isAdmissibleTopoOrder()) {
isAdmissible = true;
break;
}
// else try a set of less strict constraints.
}
+ }
if (hasCycle)
return resolveCycle(env, rewriter); // one last shot
if (!isAdmissible)
return failure(); // inadmissible expression, reject
// Recursively generates code if admissible.
- env.startEmit(sparseOut, outerParNest);
+ env.startEmit();
genBuffers(env, rewriter);
genInitConstantDenseAddress(env, rewriter);
- genStmt(env, rewriter, exp, 0);
+ genStmt(env, rewriter, env.getTensorExp(), 0);
genResult(env, rewriter);
return success();
}
More information about the Mlir-commits
mailing list