[Mlir-commits] [mlir] 93d1108 - [MLIR][LoopOps] Adds the loop unroll transformation for loop::ForOp.
Andy Davis
llvmlistbot at llvm.org
Tue May 5 10:42:44 PDT 2020
Author: Andy Davis
Date: 2020-05-05T10:42:36-07:00
New Revision: 93d1108801ddfe3d5e68296cdc62e44b3382e31e
URL: https://github.com/llvm/llvm-project/commit/93d1108801ddfe3d5e68296cdc62e44b3382e31e
DIFF: https://github.com/llvm/llvm-project/commit/93d1108801ddfe3d5e68296cdc62e44b3382e31e.diff
LOG: [MLIR][LoopOps] Adds the loop unroll transformation for loop::ForOp.
Summary:
Adds the loop unroll transformation for loop::ForOp.
Adds support for promoting the body of single-iteration loop::ForOps into its containing block.
Adds check tests for loop::ForOps with dynamic and static lower/upper bounds and step.
Care was taken to share code (where possible) with the AffineForOp unroll transformation to ease maintenance and potential future transition to a LoopLike construct on which loop transformations for different loop types can implemented.
Reviewers: ftynse, nicolasvasilache
Reviewed By: ftynse
Subscribers: bondhugula, mgorny, zzheng, mehdi_amini, rriddle, jpienaar, shauheen, antiagainst, nicolasvasilache, arpith-jacob, mgester, lucyrfox, aartbik, liufengdb, Joonsoo, grosul1, frgossen, Kayjukh, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D79184
Added:
mlir/test/Dialect/Loops/loop-unroll.mlir
mlir/test/lib/Transforms/TestLoopUnrolling.cpp
Modified:
mlir/include/mlir/Transforms/LoopUtils.h
mlir/lib/Transforms/Utils/LoopUtils.cpp
mlir/test/lib/Transforms/CMakeLists.txt
mlir/tools/mlir-opt/mlir-opt.cpp
Removed:
################################################################################
diff --git a/mlir/include/mlir/Transforms/LoopUtils.h b/mlir/include/mlir/Transforms/LoopUtils.h
index 2f38a24236e3..7a07b6db23fc 100644
--- a/mlir/include/mlir/Transforms/LoopUtils.h
+++ b/mlir/include/mlir/Transforms/LoopUtils.h
@@ -38,8 +38,9 @@ LogicalResult loopUnrollFull(AffineForOp forOp);
/// Unrolls this for operation by the specified unroll factor. Returns failure
/// if the loop cannot be unrolled either due to restrictions or due to invalid
-/// unroll factors.
+/// unroll factors. Requires positive loop bounds and step.
LogicalResult loopUnrollByFactor(AffineForOp forOp, uint64_t unrollFactor);
+LogicalResult loopUnrollByFactor(loop::ForOp forOp, uint64_t unrollFactor);
/// Unrolls this loop by the specified unroll factor or its trip count,
/// whichever is lower.
@@ -68,9 +69,10 @@ LogicalResult loopUnrollJamByFactor(AffineForOp forOp,
LogicalResult loopUnrollJamUpToFactor(AffineForOp forOp,
uint64_t unrollJamFactor);
-/// Promotes the loop body of a AffineForOp to its containing block if the
-/// AffineForOp was known to have a single iteration.
+/// Promotes the loop body of a AffineForOp/loop::ForOp to its containing block
+/// if the loop was known to have a single iteration.
LogicalResult promoteIfSingleIteration(AffineForOp forOp);
+LogicalResult promoteIfSingleIteration(loop::ForOp forOp);
/// Promotes all single iteration AffineForOp's in the Function, i.e., moves
/// their body into the containing Block.
diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp
index 4b0cd6c8eb1d..35581eb2a392 100644
--- a/mlir/lib/Transforms/Utils/LoopUtils.cpp
+++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp
@@ -24,6 +24,7 @@
#include "mlir/IR/Function.h"
#include "mlir/IR/IntegerSet.h"
#include "mlir/IR/PatternMatch.h"
+#include "mlir/Support/MathExtras.h"
#include "mlir/Transforms/RegionUtils.h"
#include "mlir/Transforms/Utils.h"
#include "llvm/ADT/DenseMap.h"
@@ -118,6 +119,34 @@ static void getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
lb.erase();
}
+// Build the IR that performs ceil division of a positive value by a constant:
+// ceildiv(a, B) = divis(a + (B-1), B)
+// where divis is rounding-to-zero division.
+static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend,
+ int64_t divisor) {
+ assert(divisor > 0 && "expected positive divisor");
+ assert(dividend.getType().isIndex() && "expected index-typed value");
+
+ Value divisorMinusOneCst = builder.create<ConstantIndexOp>(loc, divisor - 1);
+ Value divisorCst = builder.create<ConstantIndexOp>(loc, divisor);
+ Value sum = builder.create<AddIOp>(loc, dividend, divisorMinusOneCst);
+ return builder.create<SignedDivIOp>(loc, sum, divisorCst);
+}
+
+// Build the IR that performs ceil division of a positive value by another
+// positive value:
+// ceildiv(a, b) = divis(a + (b - 1), b)
+// where divis is rounding-to-zero division.
+static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend,
+ Value divisor) {
+ assert(dividend.getType().isIndex() && "expected index-typed value");
+
+ Value cstOne = builder.create<ConstantIndexOp>(loc, 1);
+ Value divisorMinusOne = builder.create<SubIOp>(loc, divisor, cstOne);
+ Value sum = builder.create<AddIOp>(loc, dividend, divisorMinusOne);
+ return builder.create<SignedDivIOp>(loc, sum, divisor);
+}
+
/// Promotes the loop body of a forOp to its containing block if the forOp
/// was known to have a single iteration.
// TODO(bondhugula): extend this for arbitrary affine bounds.
@@ -161,6 +190,35 @@ LogicalResult mlir::promoteIfSingleIteration(AffineForOp forOp) {
return success();
}
+/// Promotes the loop body of a forOp to its containing block if the forOp
+/// it can be determined that the loop has a single iteration.
+LogicalResult mlir::promoteIfSingleIteration(loop::ForOp forOp) {
+ auto lbCstOp =
+ dyn_cast_or_null<ConstantIndexOp>(forOp.lowerBound().getDefiningOp());
+ auto ubCstOp =
+ dyn_cast_or_null<ConstantIndexOp>(forOp.upperBound().getDefiningOp());
+ auto stepCstOp =
+ dyn_cast_or_null<ConstantIndexOp>(forOp.step().getDefiningOp());
+ if (!lbCstOp || !ubCstOp || !stepCstOp || lbCstOp.getValue() < 0 ||
+ ubCstOp.getValue() < 0 || stepCstOp.getValue() < 0)
+ return failure();
+ int64_t tripCount = mlir::ceilDiv(ubCstOp.getValue() - lbCstOp.getValue(),
+ stepCstOp.getValue());
+ if (tripCount != 1)
+ return failure();
+ auto iv = forOp.getInductionVar();
+ iv.replaceAllUsesWith(lbCstOp);
+
+ // Move the loop body operations, except for its terminator, to the loop's
+ // containing block.
+ auto *parentBlock = forOp.getOperation()->getBlock();
+ forOp.getBody()->back().erase();
+ parentBlock->getOperations().splice(Block::iterator(forOp),
+ forOp.getBody()->getOperations());
+ forOp.erase();
+ return success();
+}
+
/// Promotes all single iteration 'for' ops in `f`, i.e., moves
/// their body into the containing Block.
void mlir::promoteSingleIterationLoops(FuncOp f) {
@@ -416,6 +474,37 @@ LogicalResult mlir::loopUnrollUpToFactor(AffineForOp forOp,
return loopUnrollByFactor(forOp, unrollFactor);
}
+// Generates unrolled copies of AffineForOp or loop::ForOp 'loopBodyBlock', with
+// associated 'forOpIV' by 'unrollFactor', calling 'ivRemapFn' to remap
+// 'forOpIV' for each unrolled body.
+static void generateUnrolledLoop(
+ Block *loopBodyBlock, Value forOpIV, uint64_t unrollFactor,
+ function_ref<Value(unsigned, Value, OpBuilder)> ivRemapFn) {
+ // Builder to insert unrolled bodies just before the terminator of the body of
+ // 'forOp'.
+ auto builder = OpBuilder::atBlockTerminator(loopBodyBlock);
+
+ // Keep a pointer to the last non-terminator operation in the original block
+ // so that we know what to clone (since we are doing this in-place).
+ Block::iterator srcBlockEnd = std::prev(loopBodyBlock->end(), 2);
+
+ // Unroll the contents of 'forOp' (append unrollFactor - 1 additional copies).
+ for (unsigned i = 1; i < unrollFactor; i++) {
+ BlockAndValueMapping operandMap;
+
+ // If the induction variable is used, create a remapping to the value for
+ // this unrolled instance.
+ if (!forOpIV.use_empty()) {
+ Value ivUnroll = ivRemapFn(i, forOpIV, builder);
+ operandMap.map(forOpIV, ivUnroll);
+ }
+
+ // Clone the original body of 'forOp'.
+ for (auto it = loopBodyBlock->begin(); it != std::next(srcBlockEnd); it++)
+ builder.clone(*it, operandMap);
+ }
+}
+
/// Unrolls this loop by the specified factor. Returns success if the loop
/// is successfully unrolled.
LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp,
@@ -467,38 +556,114 @@ LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp,
// Scale the step of loop being unrolled by unroll factor.
int64_t step = forOp.getStep();
forOp.setStep(step * unrollFactor);
+ generateUnrolledLoop(forOp.getBody(), forOp.getInductionVar(), unrollFactor,
+ [&](unsigned i, Value iv, OpBuilder b) {
+ // iv' = iv + i * step
+ auto d0 = b.getAffineDimExpr(0);
+ auto bumpMap = AffineMap::get(1, 0, d0 + i * step);
+ return b.create<AffineApplyOp>(forOp.getLoc(), bumpMap,
+ iv);
+ });
- // Builder to insert unrolled bodies just before the terminator of the body of
- // 'forOp'.
- auto builder = OpBuilder::atBlockTerminator(forOp.getBody());
+ // Promote the loop body up if this has turned into a single iteration loop.
+ promoteIfSingleIteration(forOp);
+ return success();
+}
- // Keep a pointer to the last non-terminator operation in the original block
- // so that we know what to clone (since we are doing this in-place).
- Block::iterator srcBlockEnd = std::prev(forOp.getBody()->end(), 2);
+/// Unrolls 'forOp' by 'unrollFactor', returns success if the loop is unrolled.
+LogicalResult mlir::loopUnrollByFactor(loop::ForOp forOp,
+ uint64_t unrollFactor) {
+ assert(unrollFactor > 0 && "expected positive unroll factor");
+ if (unrollFactor == 1)
+ return promoteIfSingleIteration(forOp);
- // Unroll the contents of 'forOp' (append unrollFactor - 1 additional copies).
- auto forOpIV = forOp.getInductionVar();
- for (unsigned i = 1; i < unrollFactor; i++) {
- BlockAndValueMapping operandMap;
+ // Return if the loop body is empty.
+ if (llvm::hasSingleElement(forOp.getBody()->getOperations()))
+ return success();
- // If the induction variable is used, create a remapping to the value for
- // this unrolled instance.
- if (!forOpIV.use_empty()) {
- // iv' = iv + 1/2/3...unrollFactor-1;
- auto d0 = builder.getAffineDimExpr(0);
- auto bumpMap = AffineMap::get(1, 0, d0 + i * step);
- auto ivUnroll =
- builder.create<AffineApplyOp>(forOp.getLoc(), bumpMap, forOpIV);
- operandMap.map(forOpIV, ivUnroll);
- }
+ // Compute tripCount = ceilDiv((upperBound - lowerBound), step) and populate
+ // 'upperBoundUnrolled' and 'stepUnrolled' for static and dynamic cases.
+ OpBuilder boundsBuilder(forOp);
+ auto loc = forOp.getLoc();
+ auto step = forOp.step();
+ Value upperBoundUnrolled;
+ Value stepUnrolled;
+ bool generateEpilogueLoop = true;
+
+ auto lbCstOp =
+ dyn_cast_or_null<ConstantIndexOp>(forOp.lowerBound().getDefiningOp());
+ auto ubCstOp =
+ dyn_cast_or_null<ConstantIndexOp>(forOp.upperBound().getDefiningOp());
+ auto stepCstOp =
+ dyn_cast_or_null<ConstantIndexOp>(forOp.step().getDefiningOp());
+ if (lbCstOp && ubCstOp && stepCstOp) {
+ // Constant loop bounds computation.
+ int64_t lbCst = lbCstOp.getValue();
+ int64_t ubCst = ubCstOp.getValue();
+ int64_t stepCst = stepCstOp.getValue();
+ assert(lbCst >= 0 && ubCst >= 0 && stepCst >= 0 &&
+ "expected positive loop bounds and step");
+ int64_t tripCount = mlir::ceilDiv(ubCst - lbCst, stepCst);
+ int64_t tripCountEvenMultiple = tripCount - (tripCount % unrollFactor);
+ int64_t upperBoundUnrolledCst = lbCst + tripCountEvenMultiple * stepCst;
+ assert(upperBoundUnrolledCst <= ubCst);
+ int64_t stepUnrolledCst = stepCst * unrollFactor;
+
+ // Create constant for 'upperBoundUnrolled' and set epilogue loop flag.
+ generateEpilogueLoop = upperBoundUnrolledCst < ubCst;
+ if (generateEpilogueLoop)
+ upperBoundUnrolled =
+ boundsBuilder.create<ConstantIndexOp>(loc, upperBoundUnrolledCst);
+ else
+ upperBoundUnrolled = ubCstOp;
+
+ // Create constant for 'stepUnrolled'.
+ stepUnrolled =
+ stepCst == stepUnrolledCst
+ ? step
+ : boundsBuilder.create<ConstantIndexOp>(loc, stepUnrolledCst);
+ } else {
+ // Dynamic loop bounds computation.
+ // TODO(andydavis) Add dynamic asserts for negative lb/ub/step, or
+ // consider using ceilDiv from AffineApplyExpander.
+ auto lowerBound = forOp.lowerBound();
+ auto upperBound = forOp.upperBound();
+ Value
diff = boundsBuilder.create<SubIOp>(loc, upperBound, lowerBound);
+ Value tripCount = ceilDivPositive(boundsBuilder, loc,
diff , step);
+ Value unrollFactorCst =
+ boundsBuilder.create<ConstantIndexOp>(loc, unrollFactor);
+ Value tripCountRem =
+ boundsBuilder.create<SignedRemIOp>(loc, tripCount, unrollFactorCst);
+ // Compute tripCountEvenMultiple = tripCount - (tripCount % unrollFactor)
+ Value tripCountEvenMultiple =
+ boundsBuilder.create<SubIOp>(loc, tripCount, tripCountRem);
+ // Compute upperBoundUnrolled = lowerBound + tripCountEvenMultiple * step
+ upperBoundUnrolled = boundsBuilder.create<AddIOp>(
+ loc, lowerBound,
+ boundsBuilder.create<MulIOp>(loc, tripCountEvenMultiple, step));
+ // Scale 'step' by 'unrollFactor'.
+ stepUnrolled = boundsBuilder.create<MulIOp>(loc, step, unrollFactorCst);
+ }
- // Clone the original body of 'forOp'.
- for (auto it = forOp.getBody()->begin(); it != std::next(srcBlockEnd);
- it++) {
- builder.clone(*it, operandMap);
- }
+ // Create epilogue clean up loop starting at 'upperBoundUnrolled'.
+ if (generateEpilogueLoop) {
+ OpBuilder epilogueBuilder(forOp.getOperation()->getBlock(),
+ std::next(Block::iterator(forOp)));
+ auto epilogueForOp = cast<loop::ForOp>(epilogueBuilder.clone(*forOp));
+ epilogueForOp.setLowerBound(upperBoundUnrolled);
+ promoteIfSingleIteration(epilogueForOp);
}
+ // Create unrolled loop.
+ forOp.setUpperBound(upperBoundUnrolled);
+ forOp.setStep(stepUnrolled);
+ generateUnrolledLoop(forOp.getBody(), forOp.getInductionVar(), unrollFactor,
+ [&](unsigned i, Value iv, OpBuilder b) {
+ // iv' = iv + step * i;
+ auto stride = b.create<MulIOp>(
+ loc, step, b.create<ConstantIndexOp>(loc, i));
+ return b.create<AddIOp>(loc, iv, stride);
+ });
// Promote the loop body up if this has turned into a single iteration loop.
promoteIfSingleIteration(forOp);
return success();
@@ -1032,34 +1197,6 @@ Loops mlir::tilePerfectlyNested(loop::ForOp rootForOp, ArrayRef<Value> sizes) {
return ::tile(forOps, sizes, forOps.back());
}
-// Build the IR that performs ceil division of a positive value by a constant:
-// ceildiv(a, B) = divis(a + (B-1), B)
-// where divis is rounding-to-zero division.
-static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend,
- int64_t divisor) {
- assert(divisor > 0 && "expected positive divisor");
- assert(dividend.getType().isIndex() && "expected index-typed value");
-
- Value divisorMinusOneCst = builder.create<ConstantIndexOp>(loc, divisor - 1);
- Value divisorCst = builder.create<ConstantIndexOp>(loc, divisor);
- Value sum = builder.create<AddIOp>(loc, dividend, divisorMinusOneCst);
- return builder.create<SignedDivIOp>(loc, sum, divisorCst);
-}
-
-// Build the IR that performs ceil division of a positive value by another
-// positive value:
-// ceildiv(a, b) = divis(a + (b - 1), b)
-// where divis is rounding-to-zero division.
-static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend,
- Value divisor) {
- assert(dividend.getType().isIndex() && "expected index-typed value");
-
- Value cstOne = builder.create<ConstantIndexOp>(loc, 1);
- Value divisorMinusOne = builder.create<SubIOp>(loc, divisor, cstOne);
- Value sum = builder.create<AddIOp>(loc, dividend, divisorMinusOne);
- return builder.create<SignedDivIOp>(loc, sum, divisor);
-}
-
// Hoist the ops within `outer` that appear before `inner`.
// Such ops include the ops that have been introduced by parametric tiling.
// Ops that come from triangular loops (i.e. that belong to the program slice
diff --git a/mlir/test/Dialect/Loops/loop-unroll.mlir b/mlir/test/Dialect/Loops/loop-unroll.mlir
new file mode 100644
index 000000000000..fa3ebc173e51
--- /dev/null
+++ b/mlir/test/Dialect/Loops/loop-unroll.mlir
@@ -0,0 +1,250 @@
+// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2' | FileCheck %s --check-prefix UNROLL-BY-2
+// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=3' | FileCheck %s --check-prefix UNROLL-BY-3
+// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2 loop-depth=0' | FileCheck %s --check-prefix UNROLL-OUTER-BY-2
+// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2 loop-depth=1' | FileCheck %s --check-prefix UNROLL-INNER-BY-2
+
+func @dynamic_loop_unroll(%arg0 : index, %arg1 : index, %arg2 : index,
+ %arg3: memref<?xf32>) {
+ %0 = constant 7.0 : f32
+ loop.for %i0 = %arg0 to %arg1 step %arg2 {
+ store %0, %arg3[%i0] : memref<?xf32>
+ }
+ return
+}
+// UNROLL-BY-2-LABEL: func @dynamic_loop_unroll
+// UNROLL-BY-2-SAME: %[[LB:.*0]]: index,
+// UNROLL-BY-2-SAME: %[[UB:.*1]]: index,
+// UNROLL-BY-2-SAME: %[[STEP:.*2]]: index,
+// UNROLL-BY-2-SAME: %[[MEM:.*3]]: memref<?xf32>
+//
+// UNROLL-BY-2-DAG: %[[V0:.*]] = subi %[[UB]], %[[LB]] : index
+// UNROLL-BY-2-DAG: %[[C1:.*]] = constant 1 : index
+// UNROLL-BY-2-DAG: %[[V1:.*]] = subi %[[STEP]], %[[C1]] : index
+// UNROLL-BY-2-DAG: %[[V2:.*]] = addi %[[V0]], %[[V1]] : index
+// Compute trip count in V3.
+// UNROLL-BY-2-DAG: %[[V3:.*]] = divi_signed %[[V2]], %[[STEP]] : index
+// Store unroll factor in C2.
+// UNROLL-BY-2-DAG: %[[C2:.*]] = constant 2 : index
+// UNROLL-BY-2-DAG: %[[V4:.*]] = remi_signed %[[V3]], %[[C2]] : index
+// UNROLL-BY-2-DAG: %[[V5:.*]] = subi %[[V3]], %[[V4]] : index
+// UNROLL-BY-2-DAG: %[[V6:.*]] = muli %[[V5]], %[[STEP]] : index
+// Compute upper bound of unrolled loop in V7.
+// UNROLL-BY-2-DAG: %[[V7:.*]] = addi %[[LB]], %[[V6]] : index
+// Compute step of unrolled loop in V8.
+// UNROLL-BY-2-DAG: %[[V8:.*]] = muli %[[STEP]], %[[C2]] : index
+// UNROLL-BY-2: loop.for %[[IV:.*]] = %[[LB]] to %[[V7]] step %[[V8]] {
+// UNROLL-BY-2-NEXT: store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
+// UNROLL-BY-2-NEXT: %[[C1_IV:.*]] = constant 1 : index
+// UNROLL-BY-2-NEXT: %[[V9:.*]] = muli %[[STEP]], %[[C1_IV]] : index
+// UNROLL-BY-2-NEXT: %[[V10:.*]] = addi %[[IV]], %[[V9]] : index
+// UNROLL-BY-2-NEXT: store %{{.*}}, %[[MEM]][%[[V10]]] : memref<?xf32>
+// UNROLL-BY-2-NEXT: }
+// UNROLL-BY-2-NEXT: loop.for %[[IV:.*]] = %[[V7]] to %[[UB]] step %[[STEP]] {
+// UNROLL-BY-2-NEXT: store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
+// UNROLL-BY-2-NEXT: }
+// UNROLL-BY-2-NEXT: return
+
+// UNROLL-BY-3-LABEL: func @dynamic_loop_unroll
+// UNROLL-BY-3-SAME: %[[LB:.*0]]: index,
+// UNROLL-BY-3-SAME: %[[UB:.*1]]: index,
+// UNROLL-BY-3-SAME: %[[STEP:.*2]]: index,
+// UNROLL-BY-3-SAME: %[[MEM:.*3]]: memref<?xf32>
+//
+// UNROLL-BY-3-DAG: %[[V0:.*]] = subi %[[UB]], %[[LB]] : index
+// UNROLL-BY-3-DAG: %[[C1:.*]] = constant 1 : index
+// UNROLL-BY-3-DAG: %[[V1:.*]] = subi %[[STEP]], %[[C1]] : index
+// UNROLL-BY-3-DAG: %[[V2:.*]] = addi %[[V0]], %[[V1]] : index
+// Compute trip count in V3.
+// UNROLL-BY-3-DAG: %[[V3:.*]] = divi_signed %[[V2]], %[[STEP]] : index
+// Store unroll factor in C3.
+// UNROLL-BY-3-DAG: %[[C3:.*]] = constant 3 : index
+// UNROLL-BY-3-DAG: %[[V4:.*]] = remi_signed %[[V3]], %[[C3]] : index
+// UNROLL-BY-3-DAG: %[[V5:.*]] = subi %[[V3]], %[[V4]] : index
+// UNROLL-BY-3-DAG: %[[V6:.*]] = muli %[[V5]], %[[STEP]] : index
+// Compute upper bound of unrolled loop in V7.
+// UNROLL-BY-3-DAG: %[[V7:.*]] = addi %[[LB]], %[[V6]] : index
+// Compute step of unrolled loop in V8.
+// UNROLL-BY-3-DAG: %[[V8:.*]] = muli %[[STEP]], %[[C3]] : index
+// UNROLL-BY-3: loop.for %[[IV:.*]] = %[[LB]] to %[[V7]] step %[[V8]] {
+// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
+// UNROLL-BY-3-NEXT: %[[C1_IV:.*]] = constant 1 : index
+// UNROLL-BY-3-NEXT: %[[V9:.*]] = muli %[[STEP]], %[[C1_IV]] : index
+// UNROLL-BY-3-NEXT: %[[V10:.*]] = addi %[[IV]], %[[V9]] : index
+// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[V10]]] : memref<?xf32>
+// UNROLL-BY-3-NEXT: %[[C2_IV:.*]] = constant 2 : index
+// UNROLL-BY-3-NEXT: %[[V11:.*]] = muli %[[STEP]], %[[C2_IV]] : index
+// UNROLL-BY-3-NEXT: %[[V12:.*]] = addi %[[IV]], %[[V11]] : index
+// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[V12]]] : memref<?xf32>
+// UNROLL-BY-3-NEXT: }
+// UNROLL-BY-3-NEXT: loop.for %[[IV:.*]] = %[[V7]] to %[[UB]] step %[[STEP]] {
+// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
+// UNROLL-BY-3-NEXT: }
+// UNROLL-BY-3-NEXT: return
+
+func @dynamic_loop_unroll_outer_by_2(
+ %arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index,
+ %arg5 : index, %arg6: memref<?xf32>) {
+ %0 = constant 7.0 : f32
+ loop.for %i0 = %arg0 to %arg1 step %arg2 {
+ loop.for %i1 = %arg3 to %arg4 step %arg5 {
+ store %0, %arg6[%i1] : memref<?xf32>
+ }
+ }
+ return
+}
+// UNROLL-OUTER-BY-2-LABEL: func @dynamic_loop_unroll_outer_by_2
+// UNROLL-OUTER-BY-2-SAME: %[[LB0:.*0]]: index,
+// UNROLL-OUTER-BY-2-SAME: %[[UB0:.*1]]: index,
+// UNROLL-OUTER-BY-2-SAME: %[[STEP0:.*2]]: index,
+// UNROLL-OUTER-BY-2-SAME: %[[LB1:.*3]]: index,
+// UNROLL-OUTER-BY-2-SAME: %[[UB1:.*4]]: index,
+// UNROLL-OUTER-BY-2-SAME: %[[STEP1:.*5]]: index,
+// UNROLL-OUTER-BY-2-SAME: %[[MEM:.*6]]: memref<?xf32>
+//
+// UNROLL-OUTER-BY-2: loop.for %[[IV0:.*]] = %[[LB0]] to %{{.*}} step %{{.*}} {
+// UNROLL-OUTER-BY-2-NEXT: loop.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
+// UNROLL-OUTER-BY-2-NEXT: store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
+// UNROLL-OUTER-BY-2-NEXT: }
+// UNROLL-OUTER-BY-2-NEXT: loop.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
+// UNROLL-OUTER-BY-2-NEXT: store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
+// UNROLL-OUTER-BY-2-NEXT: }
+// UNROLL-OUTER-BY-2-NEXT: }
+// UNROLL-OUTER-BY-2-NEXT: loop.for %[[IV0:.*]] = %{{.*}} to %[[UB0]] step %[[STEP0]] {
+// UNROLL-OUTER-BY-2-NEXT: loop.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
+// UNROLL-OUTER-BY-2-NEXT: store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
+// UNROLL-OUTER-BY-2-NEXT: }
+// UNROLL-OUTER-BY-2-NEXT: }
+// UNROLL-OUTER-BY-2-NEXT: return
+
+func @dynamic_loop_unroll_inner_by_2(
+ %arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index,
+ %arg5 : index, %arg6: memref<?xf32>) {
+ %0 = constant 7.0 : f32
+ loop.for %i0 = %arg0 to %arg1 step %arg2 {
+ loop.for %i1 = %arg3 to %arg4 step %arg5 {
+ store %0, %arg6[%i1] : memref<?xf32>
+ }
+ }
+ return
+}
+// UNROLL-INNER-BY-2-LABEL: func @dynamic_loop_unroll_inner_by_2
+// UNROLL-INNER-BY-2-SAME: %[[LB0:.*0]]: index,
+// UNROLL-INNER-BY-2-SAME: %[[UB0:.*1]]: index,
+// UNROLL-INNER-BY-2-SAME: %[[STEP0:.*2]]: index,
+// UNROLL-INNER-BY-2-SAME: %[[LB1:.*3]]: index,
+// UNROLL-INNER-BY-2-SAME: %[[UB1:.*4]]: index,
+// UNROLL-INNER-BY-2-SAME: %[[STEP1:.*5]]: index,
+// UNROLL-INNER-BY-2-SAME: %[[MEM:.*6]]: memref<?xf32>
+//
+// UNROLL-INNER-BY-2: loop.for %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
+// UNROLL-INNER-BY-2: loop.for %[[IV1:.*]] = %[[LB1]] to %{{.*}} step %{{.*}} {
+// UNROLL-INNER-BY-2-NEXT: store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
+// UNROLL-INNER-BY-2-NEXT: %[[C1_IV:.*]] = constant 1 : index
+// UNROLL-INNER-BY-2-NEXT: %[[V0:.*]] = muli %[[STEP1]], %[[C1_IV]] : index
+// UNROLL-INNER-BY-2-NEXT: %[[V1:.*]] = addi %[[IV1]], %[[V0]] : index
+// UNROLL-INNER-BY-2-NEXT: store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32>
+// UNROLL-INNER-BY-2-NEXT: }
+// UNROLL-INNER-BY-2-NEXT: loop.for %[[IV1:.*]] = %{{.*}} to %[[UB1]] step %[[STEP1]] {
+// UNROLL-INNER-BY-2-NEXT: store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
+// UNROLL-INNER-BY-2-NEXT: }
+// UNROLL-INNER-BY-2-NEXT: }
+// UNROLL-INNER-BY-2-NEXT: return
+
+// Test that no epilogue clean-up loop is generated because the trip count is
+// a multiple of the unroll factor.
+func @static_loop_unroll_by_2(%arg0 : memref<?xf32>) {
+ %0 = constant 7.0 : f32
+ %lb = constant 0 : index
+ %ub = constant 20 : index
+ %step = constant 1 : index
+ loop.for %i0 = %lb to %ub step %step {
+ store %0, %arg0[%i0] : memref<?xf32>
+ }
+ return
+}
+// UNROLL-BY-2-LABEL: func @static_loop_unroll_by_2
+// UNROLL-BY-2-SAME: %[[MEM:.*0]]: memref<?xf32>
+//
+// UNROLL-BY-2-DAG: %[[C0:.*]] = constant 0 : index
+// UNROLL-BY-2-DAG: %[[C1:.*]] = constant 1 : index
+// UNROLL-BY-2-DAG: %[[C20:.*]] = constant 20 : index
+// UNROLL-BY-2-DAG: %[[C2:.*]] = constant 2 : index
+// UNROLL-BY-2: loop.for %[[IV:.*]] = %[[C0]] to %[[C20]] step %[[C2]] {
+// UNROLL-BY-2-NEXT: store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
+// UNROLL-BY-2-NEXT: %[[C1_IV:.*]] = constant 1 : index
+// UNROLL-BY-2-NEXT: %[[V0:.*]] = muli %[[C1]], %[[C1_IV]] : index
+// UNROLL-BY-2-NEXT: %[[V1:.*]] = addi %[[IV]], %[[V0]] : index
+// UNROLL-BY-2-NEXT: store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32>
+// UNROLL-BY-2-NEXT: }
+// UNROLL-BY-2-NEXT: return
+
+// Test that epilogue clean up loop is generated (trip count is not
+// a multiple of unroll factor).
+func @static_loop_unroll_by_3(%arg0 : memref<?xf32>) {
+ %0 = constant 7.0 : f32
+ %lb = constant 0 : index
+ %ub = constant 20 : index
+ %step = constant 1 : index
+ loop.for %i0 = %lb to %ub step %step {
+ store %0, %arg0[%i0] : memref<?xf32>
+ }
+ return
+}
+
+// UNROLL-BY-3-LABEL: func @static_loop_unroll_by_3
+// UNROLL-BY-3-SAME: %[[MEM:.*0]]: memref<?xf32>
+//
+// UNROLL-BY-3-DAG: %[[C0:.*]] = constant 0 : index
+// UNROLL-BY-3-DAG: %[[C1:.*]] = constant 1 : index
+// UNROLL-BY-3-DAG: %[[C20:.*]] = constant 20 : index
+// UNROLL-BY-3-DAG: %[[C18:.*]] = constant 18 : index
+// UNROLL-BY-3-DAG: %[[C3:.*]] = constant 3 : index
+// UNROLL-BY-3: loop.for %[[IV:.*]] = %[[C0]] to %[[C18]] step %[[C3]] {
+// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
+// UNROLL-BY-3-NEXT: %[[C1_IV:.*]] = constant 1 : index
+// UNROLL-BY-3-NEXT: %[[V0:.*]] = muli %[[C1]], %[[C1_IV]] : index
+// UNROLL-BY-3-NEXT: %[[V1:.*]] = addi %[[IV]], %[[V0]] : index
+// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32>
+// UNROLL-BY-3-NEXT: %[[C2_IV:.*]] = constant 2 : index
+// UNROLL-BY-3-NEXT: %[[V2:.*]] = muli %[[C1]], %[[C2_IV]] : index
+// UNROLL-BY-3-NEXT: %[[V3:.*]] = addi %[[IV]], %[[V2]] : index
+// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[V3]]] : memref<?xf32>
+// UNROLL-BY-3-NEXT: }
+// UNROLL-BY-3-NEXT: loop.for %[[IV:.*]] = %[[C18]] to %[[C20]] step %[[C1]] {
+// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
+// UNROLL-BY-3-NEXT: }
+// UNROLL-BY-3-NEXT: return
+
+// Test that the single iteration epilogue loop body is promoted to the loops
+// containing block.
+func @static_loop_unroll_by_3_promote_epilogue(%arg0 : memref<?xf32>) {
+ %0 = constant 7.0 : f32
+ %lb = constant 0 : index
+ %ub = constant 10 : index
+ %step = constant 1 : index
+ loop.for %i0 = %lb to %ub step %step {
+ store %0, %arg0[%i0] : memref<?xf32>
+ }
+ return
+}
+// UNROLL-BY-3-LABEL: func @static_loop_unroll_by_3_promote_epilogue
+// UNROLL-BY-3-SAME: %[[MEM:.*0]]: memref<?xf32>
+//
+// UNROLL-BY-3-DAG: %[[C0:.*]] = constant 0 : index
+// UNROLL-BY-3-DAG: %[[C1:.*]] = constant 1 : index
+// UNROLL-BY-3-DAG: %[[C10:.*]] = constant 10 : index
+// UNROLL-BY-3-DAG: %[[C9:.*]] = constant 9 : index
+// UNROLL-BY-3-DAG: %[[C3:.*]] = constant 3 : index
+// UNROLL-BY-3: loop.for %[[IV:.*]] = %[[C0]] to %[[C9]] step %[[C3]] {
+// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
+// UNROLL-BY-3-NEXT: %[[C1_IV:.*]] = constant 1 : index
+// UNROLL-BY-3-NEXT: %[[V0:.*]] = muli %[[C1]], %[[C1_IV]] : index
+// UNROLL-BY-3-NEXT: %[[V1:.*]] = addi %[[IV]], %[[V0]] : index
+// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32>
+// UNROLL-BY-3-NEXT: %[[C2_IV:.*]] = constant 2 : index
+// UNROLL-BY-3-NEXT: %[[V2:.*]] = muli %[[C1]], %[[C2_IV]] : index
+// UNROLL-BY-3-NEXT: %[[V3:.*]] = addi %[[IV]], %[[V2]] : index
+// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[V3]]] : memref<?xf32>
+// UNROLL-BY-3-NEXT: }
+// UNROLL-BY-3-NEXT: store %{{.*}}, %[[MEM]][%[[C9]]] : memref<?xf32>
+// UNROLL-BY-3-NEXT: return
diff --git a/mlir/test/lib/Transforms/CMakeLists.txt b/mlir/test/lib/Transforms/CMakeLists.txt
index 248da51bcec7..e7b31b3d0bcf 100644
--- a/mlir/test/lib/Transforms/CMakeLists.txt
+++ b/mlir/test/lib/Transforms/CMakeLists.txt
@@ -14,6 +14,7 @@ add_mlir_library(MLIRTestTransforms
TestLiveness.cpp
TestLoopMapping.cpp
TestLoopParametricTiling.cpp
+ TestLoopUnrolling.cpp
TestOpaqueLoc.cpp
TestMemRefBoundCheck.cpp
TestMemRefDependenceCheck.cpp
diff --git a/mlir/test/lib/Transforms/TestLoopUnrolling.cpp b/mlir/test/lib/Transforms/TestLoopUnrolling.cpp
new file mode 100644
index 000000000000..7cd221f37f8c
--- /dev/null
+++ b/mlir/test/lib/Transforms/TestLoopUnrolling.cpp
@@ -0,0 +1,68 @@
+//===-------- TestLoopUnrolling.cpp --- loop unrolling test pass ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass to unroll loops by a specified unroll factor.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/LoopOps/LoopOps.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/LoopUtils.h"
+#include "mlir/Transforms/Passes.h"
+
+using namespace mlir;
+
+namespace {
+
+static unsigned getNestingDepth(Operation *op) {
+ Operation *currOp = op;
+ unsigned depth = 0;
+ while ((currOp = currOp->getParentOp())) {
+ if (isa<loop::ForOp>(currOp))
+ depth++;
+ }
+ return depth;
+}
+
+class TestLoopUnrollingPass
+ : public PassWrapper<TestLoopUnrollingPass, FunctionPass> {
+public:
+ TestLoopUnrollingPass() = default;
+ TestLoopUnrollingPass(const TestLoopUnrollingPass &) {}
+ explicit TestLoopUnrollingPass(uint64_t unrollFactorParam,
+ unsigned loopDepthParam) {
+ unrollFactor = unrollFactorParam;
+ loopDepth = loopDepthParam;
+ }
+
+ void runOnFunction() override {
+ FuncOp func = getFunction();
+ SmallVector<loop::ForOp, 4> loops;
+ func.walk([&](loop::ForOp forOp) {
+ if (getNestingDepth(forOp) == loopDepth)
+ loops.push_back(forOp);
+ });
+ for (auto loop : loops) {
+ loopUnrollByFactor(loop, unrollFactor);
+ }
+ }
+ Option<uint64_t> unrollFactor{*this, "unroll-factor",
+ llvm::cl::desc("Loop unroll factor."),
+ llvm::cl::init(1)};
+ Option<unsigned> loopDepth{*this, "loop-depth", llvm::cl::desc("Loop depth."),
+ llvm::cl::init(0)};
+};
+} // end namespace
+
+namespace mlir {
+void registerTestLoopUnrollingPass() {
+ PassRegistration<TestLoopUnrollingPass>(
+ "test-loop-unrolling", "Tests loop unrolling transformation");
+}
+} // namespace mlir
diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp
index c5cc533ab119..9d583dc2a319 100644
--- a/mlir/tools/mlir-opt/mlir-opt.cpp
+++ b/mlir/tools/mlir-opt/mlir-opt.cpp
@@ -53,6 +53,7 @@ void registerTestLinalgTransforms();
void registerTestLivenessPass();
void registerTestLoopFusion();
void registerTestLoopMappingPass();
+void registerTestLoopUnrollingPass();
void registerTestMatchers();
void registerTestMemRefDependenceCheck();
void registerTestMemRefStrideCalculation();
@@ -119,6 +120,7 @@ void registerTestPasses() {
registerTestLivenessPass();
registerTestLoopFusion();
registerTestLoopMappingPass();
+ registerTestLoopUnrollingPass();
registerTestMatchers();
registerTestMemRefDependenceCheck();
registerTestMemRefStrideCalculation();
More information about the Mlir-commits
mailing list