[Mlir-commits] [mlir] [mlir][sparse] schedule sparse kernels in a separate pass from sparsification. (PR #72423)
Yinying Li
llvmlistbot at llvm.org
Wed Nov 15 10:54:29 PST 2023
================
@@ -0,0 +1,273 @@
+//===- LoopScheduler.cpp -------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "LoopScheduler.h"
+
+#include "mlir/Dialect/Linalg/IR/Linalg.h"
+#include "mlir/Dialect/SparseTensor/IR/SparseTensor.h"
+#include "mlir/Dialect/SparseTensor/IR/SparseTensorType.h"
+#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
+#include "mlir/IR/AffineExprVisitor.h"
+#include "mlir/IR/BuiltinTypes.h"
+
+using namespace mlir;
+using namespace mlir::sparse_tensor;
+
+namespace {
+
+/// A helper class that visits an affine expression and tries to find an
+/// AffineDimExpr to which the corresponding iterator from a GenericOp matches
+/// the desired iterator type.
+/// If there is no matched iterator type, returns the first DimExpr in the
+/// expression.
+class AffineDimFinder : public AffineExprVisitor<AffineDimFinder> {
+public:
+ explicit AffineDimFinder(ArrayRef<utils::IteratorType> itTypes)
+ : iterTypes(itTypes) {}
+
+ // Overrides method from AffineExprVisitor.
+ void visitDimExpr(AffineDimExpr expr) {
+ if (pickedDim == nullptr || pickIterType == iterTypes[expr.getPosition()])
+ pickedDim = expr;
+ }
+
+ /// Set the desired iterator type that we want to pick.
+ void setPickedIterType(utils::IteratorType iterType) {
+ pickIterType = iterType;
+ }
+
+ /// Get the desired AffineDimExpr.
+ AffineDimExpr getDimExpr() const {
+ return llvm::cast<AffineDimExpr>(pickedDim);
+ }
+
+ void walkPostOrder(AffineExpr expr) {
+ pickedDim = nullptr;
+ AffineExprVisitor<AffineDimFinder>::walkPostOrder(expr);
+ }
+
+private:
+ /// The picked AffineDimExpr after visit.
+ AffineExpr pickedDim;
+ /// The iterator type that we want.
+ utils::IteratorType pickIterType;
+ /// The mapping between dim=>iterator type.
+ ArrayRef<utils::IteratorType> iterTypes;
+};
+
+// Flattens an affine expression into a list of AffineDimExprs.
+struct AffineDimCollector : public AffineExprVisitor<AffineDimCollector> {
+ // Overrides method from AffineExprVisitor.
+ void visitDimExpr(AffineDimExpr expr) { dims.push_back(expr); }
+ SmallVector<AffineDimExpr> dims;
+};
+
+} // namespace
+
+inline static bool includesAny(SortMask mask1, SortMask mask2) {
+ return static_cast<unsigned>(mask1) & static_cast<unsigned>(mask2);
+}
+
+inline static bool includesDenseInput(SortMask mask) {
+ return includesAny(mask, SortMask::kIncludeDenseInput);
+}
+
+inline static bool includesDenseOutput(SortMask mask) {
+ return includesAny(mask, SortMask::kIncludeDenseOutput);
+}
+
+/// A helper to compute a topological sort. O(n^2) time complexity
+/// as we use adj matrix for the graph.
+/// The sorted result will put the first Reduction iterator to the
+/// latest possible position.
+AffineMap LoopScheduler::topoSort() {
+ std::vector<unsigned> redIt; // reduce iterator with 0 degree
+ std::vector<unsigned> parIt; // parallel iterator with 0 degree
+ const unsigned numLoops = getNumLoops();
+ for (unsigned i = 0; i < numLoops; i++) {
+ if (inDegree[i] == 0) {
+ if (iterTypes[i] == utils::IteratorType::reduction)
+ redIt.push_back(i);
+ else
+ parIt.push_back(i);
+ }
+ }
+
+ SmallVector<unsigned> loopOrder;
+ while (!redIt.empty() || !parIt.empty()) {
+ // We always prefer parallel loop over reduction loop because putting
+ // reduction loop early might make the loop sequence inadmissible.
+ auto &it = !parIt.empty() ? parIt : redIt;
+ auto src = it.back();
+ loopOrder.push_back(src);
+ it.pop_back();
+ // Update in-degree, and push 0-degree node into worklist.
+ for (unsigned dst = 0; dst < numLoops; dst++) {
+ if (itGraph[src][dst] && --inDegree[dst] == 0) {
+ if (iterTypes[dst] == utils::IteratorType::reduction)
+ redIt.push_back(dst);
+ else
+ parIt.push_back(dst);
+ }
+ }
+ }
+
+ if (loopOrder.size() == numLoops)
+ return AffineMap::getPermutationMap(loopOrder, out.getContext());
+
+ // Cycle detected.
+ return AffineMap();
+}
+
+LoopScheduler LoopScheduler::fromGenericOp(linalg::GenericOp genericOp) {
+ // Must be a demapped sparse kernel.
+ assert(!hasAnyNonIdentityOperandsOrResults(genericOp) &&
+ hasAnySparseOperandOrResult(genericOp) &&
+ genericOp.getNumDpsInits() == 1);
+
+ SmallVector<AffineMap> loopMap = genericOp.getIndexingMapsArray();
+ SmallVector<Value> ins = genericOp.getDpsInputs();
+
+ AffineMap outMap = loopMap.back();
+ loopMap.pop_back();
+
+ Value out = genericOp.getDpsInitOperand(0)->get();
+ SmallVector<utils::IteratorType> iterTypes =
+ genericOp.getIteratorTypesArray();
+
+ return LoopScheduler(std::move(ins), std::move(loopMap), out, outMap,
+ std::move(iterTypes));
+}
+
+LoopScheduler::LoopScheduler(SmallVector<Value> &&ins,
+ SmallVector<AffineMap> &&loop2InsLvl, Value out,
+ AffineMap loop2OutLvl,
+ SmallVector<utils::IteratorType> &&iterTypes)
+ : ins(std::move(ins)), loop2InsLvl(std::move(loop2InsLvl)), out(out),
+ loop2OutLvl(loop2OutLvl), iterTypes(std::move(iterTypes)) {
+ // One map per tensor.
+ assert(loop2InsLvl.size() == ins.size());
+ // All the affine maps have the same number of dimensions (loops).
+ assert(llvm::all_equal(llvm::map_range(
+ loop2InsLvl, [](AffineMap m) { return m.getNumDims(); })));
+ // The number of results of the map should match the rank of the tensor.
+ assert(llvm::all_of(llvm::zip(loop2InsLvl, ins), [](auto mvPair) {
+ auto [m, v] = mvPair;
+ return m.getNumResults() ==
+ v.getType().template cast<ShapedType>().getRank();
+ }));
+
+ itGraph.resize(getNumLoops(), std::vector<bool>(getNumLoops(), false));
+ inDegree.resize(getNumLoops());
+}
+
+AffineMap LoopScheduler::schedule(SortMask mask, Value ignored) {
+ // Reset the interation graph.
+ for (auto &row : itGraph)
+ std::fill(row.begin(), row.end(), false);
+ // Reset cached in-degree.
+ std::fill(inDegree.begin(), inDegree.end(), 0);
+
+ for (auto [in, map] : llvm::zip(ins, loop2InsLvl)) {
+ // Get map and encoding.
+ const auto enc = getSparseTensorEncoding(in.getType());
+ // Skips dense inputs when not requested.
+ if ((!enc && !includesDenseInput(mask)) || in == ignored)
+ continue;
+
+ addConstraints(in, map);
+ }
+
+ // Get map and encoding.
+ const auto enc = getSparseTensorEncoding(out.getType());
+ if ((enc || includesDenseOutput(mask)) && out != ignored)
+ addConstraints(out, loop2OutLvl);
+
+ return topoSort();
+}
+
+void LoopScheduler::addConstraints(Value t, AffineMap loop2LvlMap) {
+ auto addIterOrdering = [this](unsigned f, unsigned t) {
+ if (!itGraph[f][t] && f != t) {
+ itGraph[f][t] = true;
+ inDegree[t]++;
+ }
+ };
+
+ AffineDimFinder finder(iterTypes);
+ finder.setPickedIterType(utils::IteratorType::reduction);
+
+ // To compute iteration graph for tensor[d0 + d1 + d3, d4 + d5 + d6],
+ // we requires there exist d_x \in {d0, d1, d3} and d_y \in {d4, d5, d6},
----------------
yinying-lisa-li wrote:
require
https://github.com/llvm/llvm-project/pull/72423
More information about the Mlir-commits
mailing list