[Mlir-commits] [mlir] [mlir][TilingInterface] Use `LoopLikeOpInterface` in tiling using SCF to unify tiling with `scf.for` and `scf.forall`. (PR #77874)
Abhishek Varma
llvmlistbot at llvm.org
Fri Jan 12 00:04:07 PST 2024
================
@@ -135,93 +111,231 @@ static Operation *cloneOpAndUpdateDestinationArgs(RewriterBase &rewriter,
return clonedOp;
}
-/// Generate an empty loop nest that represents the tiled loop nest shell.
+/// Type of the call back function used to generate the body of the tiled
+/// loops. The loop generation methods use this callback to generate
+/// the body of the inner-most tile loop. The call back is provided with
+/// - `rewriter`: with insertion point set to the end of the inner most loop
+/// body
+/// - `ivs`: the induction variables for the surrounding loops.
+/// - `regionIterArgs`: the basic block arguments of the inner most loop that
+/// correspond to the init/result of the loop.
+/// - `resultOffsets/Sizes`: The call back returns the result of tiling the
+/// operation, but in `resultOffsets` and `resultSizes` the callback function
+/// is expected to populate the offsets/sizes to use while inserting the
+/// result back into the corresponding `regionIterArgs`. These values
+/// are used by the loop generation methods to create the appropriate yield
+/// values from the inner most loop.
+/// The call back returns the `TilingResult` obtained from tiling the operation.
+using TileLoopBodyFn = std::function<FailureOr<TilingResult>(
+ RewriterBase &rewriter, Location loc, ValueRange ivs,
+ ValueRange regionIterArgs,
+ SmallVector<SmallVector<OpFoldResult>> &resultOffsets,
+ SmallVector<SmallVector<OpFoldResult>> &resultSizes)>;
+
+/// Generate the tile-loop nest using `scf.for` operation.
/// - `loopRanges` specifies the lb, ub and step of the untiled iteration space.
/// - `tileSizes` is the tile sizes to use. Zero represent untiled loops.
-/// - In `offsets` and `sizes` return the multi-dimensional offset and size of
-/// the tile processed within the inner most loop.
-/// Note that this methods adds `scf.yield` operation for all but the innermost
-/// loop. These yield the value returned by the immediately inner loop. The
-/// caller is expected to add the scf.yield operation for the innermost loop.
-static SmallVector<scf::ForOp> generateTileLoopNest(
- OpBuilder &builder, Location loc, ArrayRef<Range> loopRanges,
- ArrayRef<OpFoldResult> tileSizes, SmallVector<OpFoldResult> &offsets,
- SmallVector<OpFoldResult> &sizes, ValueRange destinationTensors = {}) {
- if (loopRanges.empty())
- return {};
+/// - `destinationTensors` are the init values to use for the outer most loop.
+/// - `tileLoopBodyFn` is called to generated the loop body of the inner most
+/// loop.
+/// - `loops` is an in-out parameter into which the generated loops are
+/// populated.
+/// The `TilingResult` returned by calling `tileLoopBodyFn` is returned back
+/// to the caller.
+static FailureOr<TilingResult> generateLoopNestUsingForOp(
+ RewriterBase &rewriter, Location loc, ArrayRef<Range> loopRanges,
+ ArrayRef<OpFoldResult> tileSizes, ValueRange destinationTensors,
+ TileLoopBodyFn tileLoopBodyFn, SmallVector<LoopLikeOpInterface> &loops) {
+ assert(!loopRanges.empty() && "unexpected empty loop ranges");
assert(loopRanges.size() == tileSizes.size() &&
"expected as many tile sizes as loop ranges");
- OpBuilder::InsertionGuard guard(builder);
- SmallVector<scf::ForOp> loops;
- offsets.resize(loopRanges.size());
- sizes.resize(loopRanges.size());
-
- for (auto loopRange : llvm::enumerate(loopRanges)) {
- Value offset =
- getValueOrCreateConstantIndexOp(builder, loc, loopRange.value().offset);
- Value size =
- getValueOrCreateConstantIndexOp(builder, loc, loopRange.value().size);
- Value tileSize = getValueOrCreateConstantIndexOp(
- builder, loc, tileSizes[loopRange.index()]);
+ OpBuilder::InsertionGuard guard(rewriter);
+ SmallVector<Value> ivs;
+
+ for (auto [loopRange, tileSize] : llvm::zip_equal(loopRanges, tileSizes)) {
// No loops if tile size is zero. Set offset and size to the loop
// offset and size.
- if (matchPattern(tileSize, m_Zero())) {
- offsets[loopRange.index()] = offset;
- sizes[loopRange.index()] = size;
+ if (isConstantIntValue(tileSize, 0)) {
continue;
}
- auto loop = builder.create<scf::ForOp>(
- loc, offset, size, tileSize, destinationTensors,
- [&](OpBuilder &bodyBuilder, Location bodyLoc, Value iv,
- ValueRange /*iterArgs*/) {
- sizes[loopRange.index()] =
- getBoundedTileSize(bodyBuilder, bodyLoc, loopRange.value(), iv,
- getAsOpFoldResult(tileSize));
- });
- offsets[loopRange.index()] = loop.getInductionVar();
+ Value lb = getValueOrCreateConstantIndexOp(rewriter, loc, loopRange.offset);
+ Value ub = getValueOrCreateConstantIndexOp(rewriter, loc, loopRange.size);
+ Value step = getValueOrCreateConstantIndexOp(rewriter, loc, tileSize);
+ auto loop =
+ rewriter.create<scf::ForOp>(loc, lb, ub, step, destinationTensors,
+ [](OpBuilder &bodyBuilder, Location bodyLoc,
+ Value iv, ValueRange /*iterArgs*/) {});
loops.push_back(loop);
- builder.setInsertionPointToEnd(loop.getBody());
+ ivs.push_back(loop.getInductionVar());
+ rewriter.setInsertionPointToEnd(loop.getBody());
destinationTensors = loop.getRegionIterArgs();
}
+ SmallVector<SmallVector<OpFoldResult>> resultOffsets, resultSizes;
+ FailureOr<TilingResult> tilingResult = tileLoopBodyFn(
+ rewriter, loc, ivs, destinationTensors, resultOffsets, resultSizes);
+ if (failed(tilingResult)) {
+ return rewriter.notifyMatchFailure(
+ loc, "failed to generate inner tile loop body");
+ }
+ if (loops.empty()) {
+ return tilingResult;
+ }
+
+ // 6. Yield all the results of the tiled operation.
+ SmallVector<Value> yieldedValues;
+ for (auto [tiledValue, destinationTensor, resultOffset, resultSize] :
+ llvm::zip_equal(tilingResult->tiledValues, destinationTensors,
+ resultOffsets, resultSizes)) {
+ SmallVector<OpFoldResult> resultStride(resultOffset.size(),
+ rewriter.getIndexAttr(1));
+ auto insertSlice = rewriter.create<tensor::InsertSliceOp>(
+ loc, tiledValue, destinationTensor, resultOffset, resultSize,
+ resultStride);
+ yieldedValues.push_back(insertSlice);
+ }
+ rewriter.create<scf::YieldOp>(loc, yieldedValues);
+
// Add the scf.yield operations for all the outer loops.
- if (!loops.empty()) {
- for (auto [outerLoop, innerLoop] :
- llvm::zip_equal(MutableArrayRef(loops).drop_back(),
- MutableArrayRef(loops).drop_front())) {
- builder.setInsertionPointToEnd(outerLoop.getBody());
- builder.create<scf::YieldOp>(outerLoop.getLoc(), innerLoop.getResults());
- }
+ for (auto [outerLoop, innerLoop] :
+ llvm::zip_equal(MutableArrayRef(loops).drop_back(),
+ MutableArrayRef(loops).drop_front())) {
+ rewriter.setInsertionPointToEnd(
+ cast<scf::ForOp>(outerLoop.getOperation()).getBody());
+ rewriter.create<scf::YieldOp>(outerLoop.getLoc(), innerLoop->getResults());
}
- return loops;
+ return tilingResult;
+}
+
+/// Generate the tile-loop nest using `scf.for` operation.
----------------
Abhishek-Varma wrote:
nit: `scf.for` -> `scf.forall`
https://github.com/llvm/llvm-project/pull/77874
More information about the Mlir-commits
mailing list