[Mlir-commits] [mlir] [mlir][TilingInterface] Use `LoopLikeOpInterface` in tiling using SCF to unify tiling with `scf.for` and `scf.forall`. (PR #77874)
lorenzo chelini
llvmlistbot at llvm.org
Fri Jan 12 07:42:18 PST 2024
================
@@ -135,93 +111,231 @@ static Operation *cloneOpAndUpdateDestinationArgs(RewriterBase &rewriter,
return clonedOp;
}
-/// Generate an empty loop nest that represents the tiled loop nest shell.
+/// Type of the call back function used to generate the body of the tiled
+/// loops. The loop generation methods use this callback to generate
+/// the body of the inner-most tile loop. The call back is provided with
+/// - `rewriter`: with insertion point set to the end of the inner most loop
+/// body
+/// - `ivs`: the induction variables for the surrounding loops.
+/// - `regionIterArgs`: the basic block arguments of the inner most loop that
+/// correspond to the init/result of the loop.
+/// - `resultOffsets/Sizes`: The call back returns the result of tiling the
+/// operation, but in `resultOffsets` and `resultSizes` the callback function
+/// is expected to populate the offsets/sizes to use while inserting the
+/// result back into the corresponding `regionIterArgs`. These values
+/// are used by the loop generation methods to create the appropriate yield
+/// values from the inner most loop.
+/// The call back returns the `TilingResult` obtained from tiling the operation.
+using TileLoopBodyFn = std::function<FailureOr<TilingResult>(
+ RewriterBase &rewriter, Location loc, ValueRange ivs,
+ ValueRange regionIterArgs,
+ SmallVector<SmallVector<OpFoldResult>> &resultOffsets,
+ SmallVector<SmallVector<OpFoldResult>> &resultSizes)>;
+
+/// Generate the tile-loop nest using `scf.for` operation.
/// - `loopRanges` specifies the lb, ub and step of the untiled iteration space.
/// - `tileSizes` is the tile sizes to use. Zero represent untiled loops.
-/// - In `offsets` and `sizes` return the multi-dimensional offset and size of
-/// the tile processed within the inner most loop.
-/// Note that this methods adds `scf.yield` operation for all but the innermost
-/// loop. These yield the value returned by the immediately inner loop. The
-/// caller is expected to add the scf.yield operation for the innermost loop.
-static SmallVector<scf::ForOp> generateTileLoopNest(
- OpBuilder &builder, Location loc, ArrayRef<Range> loopRanges,
- ArrayRef<OpFoldResult> tileSizes, SmallVector<OpFoldResult> &offsets,
- SmallVector<OpFoldResult> &sizes, ValueRange destinationTensors = {}) {
- if (loopRanges.empty())
- return {};
+/// - `destinationTensors` are the init values to use for the outer most loop.
+/// - `tileLoopBodyFn` is called to generated the loop body of the inner most
+/// loop.
+/// - `loops` is an in-out parameter into which the generated loops are
+/// populated.
+/// The `TilingResult` returned by calling `tileLoopBodyFn` is returned back
+/// to the caller.
+static FailureOr<TilingResult> generateLoopNestUsingForOp(
+ RewriterBase &rewriter, Location loc, ArrayRef<Range> loopRanges,
+ ArrayRef<OpFoldResult> tileSizes, ValueRange destinationTensors,
+ TileLoopBodyFn tileLoopBodyFn, SmallVector<LoopLikeOpInterface> &loops) {
+ assert(!loopRanges.empty() && "unexpected empty loop ranges");
assert(loopRanges.size() == tileSizes.size() &&
"expected as many tile sizes as loop ranges");
- OpBuilder::InsertionGuard guard(builder);
- SmallVector<scf::ForOp> loops;
- offsets.resize(loopRanges.size());
- sizes.resize(loopRanges.size());
-
- for (auto loopRange : llvm::enumerate(loopRanges)) {
- Value offset =
- getValueOrCreateConstantIndexOp(builder, loc, loopRange.value().offset);
- Value size =
- getValueOrCreateConstantIndexOp(builder, loc, loopRange.value().size);
- Value tileSize = getValueOrCreateConstantIndexOp(
- builder, loc, tileSizes[loopRange.index()]);
+ OpBuilder::InsertionGuard guard(rewriter);
+ SmallVector<Value> ivs;
+
+ for (auto [loopRange, tileSize] : llvm::zip_equal(loopRanges, tileSizes)) {
// No loops if tile size is zero. Set offset and size to the loop
// offset and size.
- if (matchPattern(tileSize, m_Zero())) {
- offsets[loopRange.index()] = offset;
- sizes[loopRange.index()] = size;
+ if (isConstantIntValue(tileSize, 0)) {
continue;
}
- auto loop = builder.create<scf::ForOp>(
- loc, offset, size, tileSize, destinationTensors,
- [&](OpBuilder &bodyBuilder, Location bodyLoc, Value iv,
- ValueRange /*iterArgs*/) {
- sizes[loopRange.index()] =
- getBoundedTileSize(bodyBuilder, bodyLoc, loopRange.value(), iv,
- getAsOpFoldResult(tileSize));
- });
- offsets[loopRange.index()] = loop.getInductionVar();
+ Value lb = getValueOrCreateConstantIndexOp(rewriter, loc, loopRange.offset);
+ Value ub = getValueOrCreateConstantIndexOp(rewriter, loc, loopRange.size);
+ Value step = getValueOrCreateConstantIndexOp(rewriter, loc, tileSize);
+ auto loop =
+ rewriter.create<scf::ForOp>(loc, lb, ub, step, destinationTensors,
+ [](OpBuilder &bodyBuilder, Location bodyLoc,
+ Value iv, ValueRange /*iterArgs*/) {});
loops.push_back(loop);
- builder.setInsertionPointToEnd(loop.getBody());
+ ivs.push_back(loop.getInductionVar());
+ rewriter.setInsertionPointToEnd(loop.getBody());
destinationTensors = loop.getRegionIterArgs();
}
+ SmallVector<SmallVector<OpFoldResult>> resultOffsets, resultSizes;
+ FailureOr<TilingResult> tilingResult = tileLoopBodyFn(
+ rewriter, loc, ivs, destinationTensors, resultOffsets, resultSizes);
+ if (failed(tilingResult)) {
+ return rewriter.notifyMatchFailure(
+ loc, "failed to generate inner tile loop body");
+ }
+ if (loops.empty()) {
+ return tilingResult;
+ }
+
+ // 6. Yield all the results of the tiled operation.
+ SmallVector<Value> yieldedValues;
+ for (auto [tiledValue, destinationTensor, resultOffset, resultSize] :
+ llvm::zip_equal(tilingResult->tiledValues, destinationTensors,
+ resultOffsets, resultSizes)) {
+ SmallVector<OpFoldResult> resultStride(resultOffset.size(),
+ rewriter.getIndexAttr(1));
+ auto insertSlice = rewriter.create<tensor::InsertSliceOp>(
+ loc, tiledValue, destinationTensor, resultOffset, resultSize,
+ resultStride);
+ yieldedValues.push_back(insertSlice);
+ }
+ rewriter.create<scf::YieldOp>(loc, yieldedValues);
+
// Add the scf.yield operations for all the outer loops.
- if (!loops.empty()) {
- for (auto [outerLoop, innerLoop] :
- llvm::zip_equal(MutableArrayRef(loops).drop_back(),
- MutableArrayRef(loops).drop_front())) {
- builder.setInsertionPointToEnd(outerLoop.getBody());
- builder.create<scf::YieldOp>(outerLoop.getLoc(), innerLoop.getResults());
- }
+ for (auto [outerLoop, innerLoop] :
+ llvm::zip_equal(MutableArrayRef(loops).drop_back(),
+ MutableArrayRef(loops).drop_front())) {
+ rewriter.setInsertionPointToEnd(
+ cast<scf::ForOp>(outerLoop.getOperation()).getBody());
+ rewriter.create<scf::YieldOp>(outerLoop.getLoc(), innerLoop->getResults());
}
- return loops;
+ return tilingResult;
+}
+
+/// Generate the tile-loop nest using `scf.for` operation.
+/// - `loopRanges` specifies the lb, ub and step of the untiled iteration space.
+/// - `tileSizes` is the tile sizes to use. Zero represent untiled loops.
+/// - `destinationTensors` are the init values to use for the outer most loop.
+/// - `mappingVector` is the mapping attributes to use for loop construction.
+/// Can be empty.
+/// - `tileLoopBodyFn` is called to generated the loop body of the inner most
+/// loop.
+/// - `loops` is an in-out parameter into which the generated loops are
+/// populated.
+/// The `TilingResult` returned by calling `tileLoopBodyFn` is returned back
+/// to the caller.
+static FailureOr<TilingResult> generateLoopNestUsingForallOp(
+ RewriterBase &rewriter, Location loc, ArrayRef<Range> loopRanges,
+ ArrayRef<OpFoldResult> tileSizes, ArrayRef<Attribute> mappingVector,
+ ValueRange destinationTensors, TileLoopBodyFn tiledBodyFn,
+ SmallVector<LoopLikeOpInterface> &loops) {
+ SmallVector<OpFoldResult> lbs, ubs, steps;
+ assert(!loopRanges.empty() && "unexpected empty loop ranges");
+ assert(loopRanges.size() == tileSizes.size() &&
+ "expected as many tile sizes as loop ranges");
+ OpBuilder::InsertionGuard guard(rewriter);
+ SmallVector<OpFoldResult> offsets(loopRanges.size()),
+ sizes(loopRanges.size());
+
+ for (auto [tileSize, loopRange] : llvm::zip(tileSizes, loopRanges)) {
+ if (isConstantIntValue(tileSize, 0))
+ continue;
+ lbs.push_back(loopRange.offset);
+ ubs.push_back(loopRange.size);
+ steps.push_back(tileSize);
+ }
+ assert(!lbs.empty() && "Expected at least one loop range");
+
+ std::optional<ArrayAttr> mappingAttr;
+ if (!mappingVector.empty()) {
+ mappingAttr = rewriter.getArrayAttr(mappingVector);
+ }
+
+ auto forallOp = rewriter.create<scf::ForallOp>(
+ loc, lbs, ubs, steps, destinationTensors, mappingAttr);
+ loops.push_back(forallOp);
+
+ rewriter.setInsertionPoint(forallOp.getTerminator());
+ destinationTensors = forallOp.getRegionOutArgs();
+ SmallVector<SmallVector<OpFoldResult>> resultOffsets, resultSizes;
+ FailureOr<TilingResult> tilingResult =
+ tiledBodyFn(rewriter, loc, forallOp.getInductionVars(),
+ destinationTensors, resultOffsets, resultSizes);
+
+ if (failed(tilingResult)) {
+ return rewriter.notifyMatchFailure(loc, "failed to generate loop body");
+ }
+
+ rewriter.setInsertionPointToEnd(forallOp.getTerminator().getBody());
+ for (auto [tiledValue, destinationTensor, resultOffset, resultSize] :
+ llvm::zip_equal(tilingResult->tiledValues, destinationTensors,
+ resultOffsets, resultSizes)) {
+ SmallVector<OpFoldResult> resultStride(resultOffset.size(),
+ rewriter.getIndexAttr(1));
+
+ rewriter.create<tensor::ParallelInsertSliceOp>(
+ loc, tiledValue, destinationTensor, resultOffset, resultSize,
+ resultStride);
+ }
+ return tilingResult;
+}
+
+/// Generate the tile-loop nest using the loop construct specifed in `options`.
+/// - `options`: Tiling options specified.
+/// - `loopRanges` specifies the lb, ub and step of the untiled iteration space.
+/// - `tileSizes` is the tile sizes to use. Zero represent untiled loops.
+/// - `destinationTensors` are the init values to use for the outer most loop.
+/// - `tileLoopBodyFn` is called to generated the loop body of the inner most
+/// loop.
+/// - `loops` is an in-out parameter into which the generated loops are
+/// populated.
+/// The `TilingResult` returned by calling `tileLoopBodyFn` is returned back
+/// to the caller.
+static FailureOr<TilingResult>
+generateLoopNest(RewriterBase &rewriter, Location loc,
+ const scf::SCFTilingOptions &options,
+ ArrayRef<Range> loopRanges, ArrayRef<OpFoldResult> tileSizes,
+ ValueRange destinationTensors, TileLoopBodyFn tiledBodyFn,
+ SmallVector<LoopLikeOpInterface> &loops) {
+ // If the tile sizes are all zero, no loops are generated. Just call the
+ // callback function to handle untiled case.
+ if (llvm::all_of(tileSizes, isZeroIndex)) {
+ SmallVector<SmallVector<OpFoldResult>> resultOffsets, resultSizes;
+ return tiledBodyFn(rewriter, loc, ValueRange{}, destinationTensors,
+ resultOffsets, resultSizes);
+ }
+ if (options.loopType == scf::SCFTilingOptions::LoopType::ForOp) {
+ return generateLoopNestUsingForOp(rewriter, loc, loopRanges, tileSizes,
+ destinationTensors, tiledBodyFn, loops);
+ } else if (options.loopType == scf::SCFTilingOptions::LoopType::ForallOp) {
----------------
chelini wrote:
We can use an if statement here as we have a return value. See: https://clang.llvm.org/extra/clang-tidy/checks/readability/else-after-return.html
https://github.com/llvm/llvm-project/pull/77874
More information about the Mlir-commits
mailing list