[Mlir-commits] [mlir] 4e2c978 - [mlir][linalg] Use getUpperBoundForIndex in hoisting (NFC).
Tobias Gysi
llvmlistbot at llvm.org
Wed Nov 10 08:03:20 PST 2021
Author: Tobias Gysi
Date: 2021-11-10T15:55:55Z
New Revision: 4e2c978f440fb791b84258234a0d3b00c36ce6a4
URL: https://github.com/llvm/llvm-project/commit/4e2c978f440fb791b84258234a0d3b00c36ce6a4
DIFF: https://github.com/llvm/llvm-project/commit/4e2c978f440fb791b84258234a0d3b00c36ce6a4.diff
LOG: [mlir][linalg] Use getUpperBoundForIndex in hoisting (NFC).
Use the custom upper bound computation in hoisting by the new getUpperBoundForIndex method.
Depends On D113546
Reviewed By: nicolasvasilache
Differential Revision: https://reviews.llvm.org/D113547
Added:
Modified:
mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
index 0fc87ba2d7cf..09c48a90327a 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
@@ -11,9 +11,7 @@
//===----------------------------------------------------------------------===//
#include "mlir/Dialect/Linalg/Transforms/HoistPadding.h"
-#include "mlir/Analysis/AffineStructures.h"
#include "mlir/Analysis/SliceAnalysis.h"
-#include "mlir/Dialect/Affine/IR/AffineValueMap.h"
#include "mlir/Dialect/Affine/Utils.h"
#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
@@ -26,7 +24,6 @@
#include "mlir/IR/AsmState.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/Dominance.h"
-#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
#include "mlir/Transforms/LoopUtils.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Debug.h"
@@ -58,9 +55,8 @@ struct HoistingAnalysis {
bool isValid() { return valid; }
- /// Footprint of the packedTensor, computed from the packingLoops and
- /// `backwardSlice`.
- FailureOr<SmallVector<Value>> getPackedTensorSizes(ImplicitLocOpBuilder &b);
+ /// Footprint of the packedTensor, computed from the packingLoops.
+ SmallVector<Value> getPackedTensorSizes(ImplicitLocOpBuilder &b);
/// The outermost loop, determined by `nLevels` above which `padTensorOp` will
/// be hoisted.
@@ -229,21 +225,20 @@ HoistingAnalysis::HoistingAnalysis(PadTensorOp padTensorOp, int numLoops) {
valid = true;
}
-/// Add all index operands of `operation` to `indexEdges`. An index operand is
-/// an operand of type index.
-static void addIndexOperandsToIndexEdges(Operation *operation,
- SetVector<Value> &indexEdges) {
- for (Value operand : operation->getOperands())
- if (operand.getType().isIndex())
- indexEdges.insert(operand);
-}
-
SmallVector<scf::ForOp>
HoistingAnalysis::getIndexingLoops(PadTensorOp padTensorOp,
tensor::ExtractSliceOp sliceOp) {
// Set of all values used for index computation.
SetVector<Value> indexEdges;
+ // Add all index operands of `operation` to `indexEdges`. An index operand is
+ // an operand of type index.
+ auto addIndexOperandsToIndexEdges = [&](Operation *operation) {
+ for (Value operand : operation->getOperands())
+ if (operand.getType().isIndex())
+ indexEdges.insert(operand);
+ };
+
// Starting from `padTensorOp` and `sliceOp` walk the use-def edges of index
// type in `backwardSlice`. Add the index operands of an operation to
// `indexEdges` if one of its results is an index edge found so far and store
@@ -268,7 +263,7 @@ HoistingAnalysis::getIndexingLoops(PadTensorOp padTensorOp,
// Add the index operands of `padTensorOp` and `sliceOp` to start the
// exploration of the index computation.
if (op == padTensorOp || op == sliceOp) {
- addIndexOperandsToIndexEdges(op, indexEdges);
+ addIndexOperandsToIndexEdges(op);
continue;
}
// Add the index operands of the loop if its induction variable is
@@ -276,7 +271,7 @@ HoistingAnalysis::getIndexingLoops(PadTensorOp padTensorOp,
// `indexingLoops`
if (auto forOp = dyn_cast<scf::ForOp>(op)) {
if (indexEdges.contains(forOp.getInductionVar())) {
- addIndexOperandsToIndexEdges(op, indexEdges);
+ addIndexOperandsToIndexEdges(op);
indexingLoops.push_back(forOp);
continue;
}
@@ -285,199 +280,46 @@ HoistingAnalysis::getIndexingLoops(PadTensorOp padTensorOp,
// used for index computation.
if (llvm::any_of(op->getResults(),
[&](Value result) { return indexEdges.contains(result); }))
- addIndexOperandsToIndexEdges(op, indexEdges);
+ addIndexOperandsToIndexEdges(op);
}
return indexingLoops;
}
-static bool isDefinedOutsideOrConstant(scf::ForOp outer, Value v) {
- return outer.isDefinedOutsideOfLoop(v) || v.getDefiningOp<ConstantOp>();
-}
-
-/// For each loop in `loops`, determine the ops involved in the construction of
-/// its upper bound---up to the outerLimit loop--- and fold them as new
-/// inequalities in the constraint set.
-/// This is achieved by computing the backwardSlice of the loop's upper bound
-/// and iteratively folding each op in reverse topological order to guarantee
-/// use-def ordering.
-/// As operations are folded in, their result is projected out of the
-/// constraints set.
-/// The following operations are supported:
-/// - scf::ForOp are simply skipped.
-/// - AffineApplyOp are composed to replace the result by an equality.
-/// - AffineMinOp are composed by adding each entry as an upper bound.
-/// Additionally, the following terminal operations are handled:
-/// - DimOp and ConstantOp are skipped.
-/// If any other operation is met, return failure.
-// TODO: extend on a per-need basis.
-static LogicalResult
-foldUpperBoundsIntoConstraintsSet(FlatAffineValueConstraints &constraints,
- scf::ForOp outerLimit,
- ArrayRef<scf::ForOp> loops) {
- SetVector<Value> toProjectOut;
- for (scf::ForOp loop : loops) {
- auto ub = loop.upperBound();
-
- // Set of all values used for index computation.
- SetVector<Value> indexEdges;
- indexEdges.insert(ub);
-
- // Compute the backward slice `indexSlice` containing the index computation
- // performed to obtain the upper bound `ub`. Starting from `ub` add the
- // index operands of an operation to `indexEdges` if one of its results is
- // an index edge. Otherwise, stop the slice computation. For a loop, check
- // if its induction variable is an index edge.
- //
- // Example:
- // ```
- // %c0 = arith.constant 0
- // scf.for %i = %c0 to ...
- // scf.for %j = %c0 to ...
- // %ub = affine.min #map(%i)
- // scf.for %k = %c0 to %ub
- // ```
- // After computing the backward slice we obtain:
- // indexEdges = [%ub, %i, %c0]
- // indexSlice = [arith.constant 0, scf.for %i, affine.min #map(%i)]
- SetVector<Operation *> indexSlice;
- getBackwardSlice(ub, &indexSlice, [&](Operation *op) {
- // Continue only along the index operands of the ForOp.
- if (auto forOp = dyn_cast<scf::ForOp>(op)) {
- // Consider only loops part of the enclosing loops.
- if (!outerLimit->isAncestor(op))
- return false;
- if (!indexEdges.contains(forOp.getInductionVar()))
- return false;
- addIndexOperandsToIndexEdges(op, indexEdges);
- return true;
- }
- // All supported index operations have one result.
- assert(op->getNumResults() == 1 &&
- "expect operations to have one result");
- if (!indexEdges.contains(op->getResult(0)))
- return false;
- addIndexOperandsToIndexEdges(op, indexEdges);
- return true;
- });
- indexSlice.insert(ub.getDefiningOp());
-
- // Iterate over all ops in the slice and compose them in the constraints.
- for (Operation *op : llvm::reverse(indexSlice)) {
- // All ForOps have previously been added to the constraints and ConstantOp
- // and DimOp are terminals of the index computation.
- if (isa<scf::ForOp, arith::ConstantOp, tensor::DimOp>(op))
- continue;
- // Check all index computation operations are supported.
- if (!isa<AffineApplyOp, AffineMinOp>(op))
- return failure();
- // Ensure there is an id.
- auto ensureIdFailed = [&](Value v) {
- if (constraints.containsId(v)) {
- unsigned pos;
- constraints.findId(v, &pos);
- return pos >= constraints.getNumDimIds();
- }
- constraints.appendDimId(v);
- return false;
- };
-
- // Ensure all ids exist and add results for later projection.
- if (llvm::any_of(op->getResults(), ensureIdFailed) ||
- llvm::any_of(op->getOperands(), ensureIdFailed))
- return failure();
-
- // All supported ops have 1 result.
- // TODO: extend when needed.
- assert(op->getNumResults() == 1 &&
- "expect operations to have one result");
- toProjectOut.insert(op->getResult(0));
-
- // Compose supported ops.
- if (auto affineApplyOp = dyn_cast<AffineApplyOp>(op)) {
- AffineValueMap avm(affineApplyOp.getAffineMap(),
- affineApplyOp.getOperands(),
- affineApplyOp.getResult());
- if (failed(constraints.composeMap(&avm)))
- return failure();
- continue;
- }
- auto affineMinOp = cast<AffineMinOp>(op);
- unsigned pos;
- bool foundMinOp = constraints.findId(affineMinOp.getResult(), &pos);
- (void)foundMinOp;
- assert(foundMinOp);
- AffineMap alignedMap = constraints.computeAlignedMap(
- affineMinOp.getAffineMap(), affineMinOp.getOperands());
- if (failed(
- constraints.addBound(FlatAffineConstraints::UB, pos, alignedMap)))
- return failure();
- }
- }
- for (Value v : toProjectOut)
- constraints.projectOut(v);
- return success();
-}
-
-// Footprint of the packedTensor, computed from the packingLoops and
-// `backwardSlice`.
-FailureOr<SmallVector<Value>>
+SmallVector<Value>
HoistingAnalysis::getPackedTensorSizes(ImplicitLocOpBuilder &b) {
- // Create the base affine constaints for the packedLoops.
- auto constraints = FlatAffineValueConstraints::getHyperrectangular(
- llvm::to_vector<8>(llvm::map_range(
- packingLoops, [](scf::ForOp op) { return op.getInductionVar(); })),
- llvm::to_vector<8>(llvm::map_range(
- packingLoops, [](scf::ForOp op) { return op.lowerBound(); })),
- llvm::to_vector<8>(llvm::map_range(
- packingLoops, [](scf::ForOp op) { return op.upperBound(); })));
-
- // Iteratively try to fold the upper bounds into the constraints set.
- if (failed(foldUpperBoundsIntoConstraintsSet(
- constraints, outermostEnclosingForOp, packingLoops)))
- return failure();
-
- int nPackedLoops = packingLoops.size();
- SmallVector<AffineMap> lbs(nPackedLoops), ubs(nPackedLoops);
- // Compute the bounds of the first positions, assuming the others are fixed.
- constraints.getSliceBounds(/*pos=*/0, /*num=*/nPackedLoops,
- outermostEnclosingForOp->getContext(), &lbs, &ubs);
-
- SmallVector<Value> allValues;
- constraints.getAllValues(&allValues);
- SmallVector<Value> allNonLoopValues(allValues.begin() + nPackedLoops,
- allValues.end());
-
- // For each packingLoop, create the extent by (ub - lb).ceilDiv(step).
- // IP just before the outermost loop considered that we hoist above.
- assert(nPackedLoops == static_cast<int64_t>(lbs.size()) &&
- "expected matching lb sizes");
- assert(nPackedLoops == static_cast<int64_t>(ubs.size()) &&
- "expected matching ub sizes");
SmallVector<Value> dynamicTensorSizes;
- for (auto it : llvm::zip(packingLoops, lbs, ubs)) {
- scf::ForOp loop = std::get<0>(it);
- AffineMap lbMap = std::get<1>(it);
- AffineMap ubMap = std::get<2>(it);
- SmallVector<Value> lbOperands(allNonLoopValues);
- canonicalizeMapAndOperands(&lbMap, &lbOperands);
- Value lbVal = b.createOrFold<AffineMaxOp>(lbMap, lbOperands);
-
- SmallVector<Value> ubOperands(allNonLoopValues);
- canonicalizeMapAndOperands(&ubMap, &ubOperands);
- Value ubVal = b.createOrFold<AffineMinOp>(ubMap, ubOperands);
+ // Upper bound the packing loop lengths to size the packed tensor. Taking
+ // upper bounds can make the sizes of the packed tensor independent of the
+ // enclosing loops. This independence is a prerequisite for reusing the same
+ // buffer for all enclosing loop iterations and hoisting its allocation out of
+ // the enclosing loops.
+ for (auto forOp : packingLoops) {
+ // Compute an upper bound `ubVal` for the upper bound of `forOp`.
+ AffineMap boundMap;
+ SmallVector<Value> boundOperands;
+ getUpperBoundForIndex(forOp.upperBound(), boundMap, boundOperands);
+ Value ubVal = b.createOrFold<AffineMinOp>(boundMap, boundOperands);
+ // Compute the maximal packing loop length as (ub - lb).ceilDiv(step) and
+ // store the result to `dynamicTensorSizes`.
+ // TODO: instead of using the lower bound of `forOp` directly, implement a
+ // lower bound computation similar to the upper bound computation.
AffineExpr lb, ub, step;
bindDims(b.getContext(), lb, ub);
bindSymbols(b.getContext(), step);
Value res = b.createOrFold<AffineApplyOp>(
(ub - lb).ceilDiv(step),
- ValueRange{lbVal, ubVal, cast<scf::ForOp>(loop).step()});
-
+ ValueRange{forOp.lowerBound(), ubVal, cast<scf::ForOp>(forOp).step()});
dynamicTensorSizes.push_back(res);
}
+
return dynamicTensorSizes;
}
+static bool isDefinedOutsideOrConstant(scf::ForOp outer, Value v) {
+ return outer.isDefinedOutsideOfLoop(v) || v.getDefiningOp<ConstantOp>();
+}
+
/// Return the current iteration number in the loop (iv - lb).ceilDiv(step).
/// The returned Value is guaranteed not to depend on any loop comprised in
/// [`outer`, `forOp`].
@@ -512,10 +354,7 @@ FailureOr<Value> mlir::linalg::hoistPaddingOnTensors(PadTensorOp opToHoist,
scf::ForOp outer = analysis.outermostEnclosingForOp;
ImplicitLocOpBuilder b(outer->getLoc(), outer);
- auto maybeDynamicTensorSizes = analysis.getPackedTensorSizes(b);
- if (failed(maybeDynamicTensorSizes))
- return failure();
- SmallVector<Value> dynamicTensorSizes = *maybeDynamicTensorSizes;
+ SmallVector<Value> dynamicTensorSizes = analysis.getPackedTensorSizes(b);
// Update actual number of loops, which may be smaller.
int nPackedLoops = analysis.packingLoops.size();
More information about the Mlir-commits
mailing list