[Mlir-commits] [mlir] 93d1108 - [MLIR][LoopOps] Adds the loop unroll transformation for loop::ForOp.

Andy Davis llvmlistbot at llvm.org
Tue May 5 10:42:44 PDT 2020


Author: Andy Davis
Date: 2020-05-05T10:42:36-07:00
New Revision: 93d1108801ddfe3d5e68296cdc62e44b3382e31e

URL: https://github.com/llvm/llvm-project/commit/93d1108801ddfe3d5e68296cdc62e44b3382e31e
DIFF: https://github.com/llvm/llvm-project/commit/93d1108801ddfe3d5e68296cdc62e44b3382e31e.diff

LOG: [MLIR][LoopOps] Adds the loop unroll transformation for loop::ForOp.

Summary:
Adds the loop unroll transformation for loop::ForOp.
Adds support for promoting the body of single-iteration loop::ForOps into its containing block.
Adds check tests for loop::ForOps with dynamic and static lower/upper bounds and step.
Care was taken to share code (where possible) with the AffineForOp unroll transformation to ease maintenance and potential future transition to a LoopLike construct on which loop transformations for different loop types can implemented.

Reviewers: ftynse, nicolasvasilache

Reviewed By: ftynse

Subscribers: bondhugula, mgorny, zzheng, mehdi_amini, rriddle, jpienaar, shauheen, antiagainst, nicolasvasilache, arpith-jacob, mgester, lucyrfox, aartbik, liufengdb, Joonsoo, grosul1, frgossen, Kayjukh, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D79184

Added: 
    mlir/test/Dialect/Loops/loop-unroll.mlir
    mlir/test/lib/Transforms/TestLoopUnrolling.cpp

Modified: 
    mlir/include/mlir/Transforms/LoopUtils.h
    mlir/lib/Transforms/Utils/LoopUtils.cpp
    mlir/test/lib/Transforms/CMakeLists.txt
    mlir/tools/mlir-opt/mlir-opt.cpp

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Transforms/LoopUtils.h b/mlir/include/mlir/Transforms/LoopUtils.h
index 2f38a24236e3..7a07b6db23fc 100644
--- a/mlir/include/mlir/Transforms/LoopUtils.h
+++ b/mlir/include/mlir/Transforms/LoopUtils.h
@@ -38,8 +38,9 @@ LogicalResult loopUnrollFull(AffineForOp forOp);
 
 /// Unrolls this for operation by the specified unroll factor. Returns failure
 /// if the loop cannot be unrolled either due to restrictions or due to invalid
-/// unroll factors.
+/// unroll factors. Requires positive loop bounds and step.
 LogicalResult loopUnrollByFactor(AffineForOp forOp, uint64_t unrollFactor);
+LogicalResult loopUnrollByFactor(loop::ForOp forOp, uint64_t unrollFactor);
 
 /// Unrolls this loop by the specified unroll factor or its trip count,
 /// whichever is lower.
@@ -68,9 +69,10 @@ LogicalResult loopUnrollJamByFactor(AffineForOp forOp,
 LogicalResult loopUnrollJamUpToFactor(AffineForOp forOp,
                                       uint64_t unrollJamFactor);
 
-/// Promotes the loop body of a AffineForOp to its containing block if the
-/// AffineForOp was known to have a single iteration.
+/// Promotes the loop body of a AffineForOp/loop::ForOp to its containing block
+/// if the loop was known to have a single iteration.
 LogicalResult promoteIfSingleIteration(AffineForOp forOp);
+LogicalResult promoteIfSingleIteration(loop::ForOp forOp);
 
 /// Promotes all single iteration AffineForOp's in the Function, i.e., moves
 /// their body into the containing Block.

diff  --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp
index 4b0cd6c8eb1d..35581eb2a392 100644
--- a/mlir/lib/Transforms/Utils/LoopUtils.cpp
+++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp
@@ -24,6 +24,7 @@
 #include "mlir/IR/Function.h"
 #include "mlir/IR/IntegerSet.h"
 #include "mlir/IR/PatternMatch.h"
+#include "mlir/Support/MathExtras.h"
 #include "mlir/Transforms/RegionUtils.h"
 #include "mlir/Transforms/Utils.h"
 #include "llvm/ADT/DenseMap.h"
@@ -118,6 +119,34 @@ static void getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
     lb.erase();
 }
 
+// Build the IR that performs ceil division of a positive value by a constant:
+//    ceildiv(a, B) = divis(a + (B-1), B)
+// where divis is rounding-to-zero division.
+static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend,
+                             int64_t divisor) {
+  assert(divisor > 0 && "expected positive divisor");
+  assert(dividend.getType().isIndex() && "expected index-typed value");
+
+  Value divisorMinusOneCst = builder.create<ConstantIndexOp>(loc, divisor - 1);
+  Value divisorCst = builder.create<ConstantIndexOp>(loc, divisor);
+  Value sum = builder.create<AddIOp>(loc, dividend, divisorMinusOneCst);
+  return builder.create<SignedDivIOp>(loc, sum, divisorCst);
+}
+
+// Build the IR that performs ceil division of a positive value by another
+// positive value:
+//    ceildiv(a, b) = divis(a + (b - 1), b)
+// where divis is rounding-to-zero division.
+static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend,
+                             Value divisor) {
+  assert(dividend.getType().isIndex() && "expected index-typed value");
+
+  Value cstOne = builder.create<ConstantIndexOp>(loc, 1);
+  Value divisorMinusOne = builder.create<SubIOp>(loc, divisor, cstOne);
+  Value sum = builder.create<AddIOp>(loc, dividend, divisorMinusOne);
+  return builder.create<SignedDivIOp>(loc, sum, divisor);
+}
+
 /// Promotes the loop body of a forOp to its containing block if the forOp
 /// was known to have a single iteration.
 // TODO(bondhugula): extend this for arbitrary affine bounds.
@@ -161,6 +190,35 @@ LogicalResult mlir::promoteIfSingleIteration(AffineForOp forOp) {
   return success();
 }
 
+/// Promotes the loop body of a forOp to its containing block if the forOp
+/// it can be determined that the loop has a single iteration.
+LogicalResult mlir::promoteIfSingleIteration(loop::ForOp forOp) {
+  auto lbCstOp =
+      dyn_cast_or_null<ConstantIndexOp>(forOp.lowerBound().getDefiningOp());
+  auto ubCstOp =
+      dyn_cast_or_null<ConstantIndexOp>(forOp.upperBound().getDefiningOp());
+  auto stepCstOp =
+      dyn_cast_or_null<ConstantIndexOp>(forOp.step().getDefiningOp());
+  if (!lbCstOp || !ubCstOp || !stepCstOp || lbCstOp.getValue() < 0 ||
+      ubCstOp.getValue() < 0 || stepCstOp.getValue() < 0)
+    return failure();
+  int64_t tripCount = mlir::ceilDiv(ubCstOp.getValue() - lbCstOp.getValue(),
+                                    stepCstOp.getValue());
+  if (tripCount != 1)
+    return failure();
+  auto iv = forOp.getInductionVar();
+  iv.replaceAllUsesWith(lbCstOp);
+
+  // Move the loop body operations, except for its terminator, to the loop's
+  // containing block.
+  auto *parentBlock = forOp.getOperation()->getBlock();
+  forOp.getBody()->back().erase();
+  parentBlock->getOperations().splice(Block::iterator(forOp),
+                                      forOp.getBody()->getOperations());
+  forOp.erase();
+  return success();
+}
+
 /// Promotes all single iteration 'for' ops in `f`, i.e., moves
 /// their body into the containing Block.
 void mlir::promoteSingleIterationLoops(FuncOp f) {
@@ -416,6 +474,37 @@ LogicalResult mlir::loopUnrollUpToFactor(AffineForOp forOp,
   return loopUnrollByFactor(forOp, unrollFactor);
 }
 
+// Generates unrolled copies of AffineForOp or loop::ForOp 'loopBodyBlock', with
+// associated 'forOpIV' by 'unrollFactor', calling 'ivRemapFn' to remap
+// 'forOpIV' for each unrolled body.
+static void generateUnrolledLoop(
+    Block *loopBodyBlock, Value forOpIV, uint64_t unrollFactor,
+    function_ref<Value(unsigned, Value, OpBuilder)> ivRemapFn) {
+  // Builder to insert unrolled bodies just before the terminator of the body of
+  // 'forOp'.
+  auto builder = OpBuilder::atBlockTerminator(loopBodyBlock);
+
+  // Keep a pointer to the last non-terminator operation in the original block
+  // so that we know what to clone (since we are doing this in-place).
+  Block::iterator srcBlockEnd = std::prev(loopBodyBlock->end(), 2);
+
+  // Unroll the contents of 'forOp' (append unrollFactor - 1 additional copies).
+  for (unsigned i = 1; i < unrollFactor; i++) {
+    BlockAndValueMapping operandMap;
+
+    // If the induction variable is used, create a remapping to the value for
+    // this unrolled instance.
+    if (!forOpIV.use_empty()) {
+      Value ivUnroll = ivRemapFn(i, forOpIV, builder);
+      operandMap.map(forOpIV, ivUnroll);
+    }
+
+    // Clone the original body of 'forOp'.
+    for (auto it = loopBodyBlock->begin(); it != std::next(srcBlockEnd); it++)
+      builder.clone(*it, operandMap);
+  }
+}
+
 /// Unrolls this loop by the specified factor. Returns success if the loop
 /// is successfully unrolled.
 LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp,
@@ -467,38 +556,114 @@ LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp,
   // Scale the step of loop being unrolled by unroll factor.
   int64_t step = forOp.getStep();
   forOp.setStep(step * unrollFactor);
+  generateUnrolledLoop(forOp.getBody(), forOp.getInductionVar(), unrollFactor,
+                       [&](unsigned i, Value iv, OpBuilder b) {
+                         // iv' = iv + i * step
+                         auto d0 = b.getAffineDimExpr(0);
+                         auto bumpMap = AffineMap::get(1, 0, d0 + i * step);
+                         return b.create<AffineApplyOp>(forOp.getLoc(), bumpMap,
+                                                        iv);
+                       });
 
-  // Builder to insert unrolled bodies just before the terminator of the body of
-  // 'forOp'.
-  auto builder = OpBuilder::atBlockTerminator(forOp.getBody());
+  // Promote the loop body up if this has turned into a single iteration loop.
+  promoteIfSingleIteration(forOp);
+  return success();
+}
 
-  // Keep a pointer to the last non-terminator operation in the original block
-  // so that we know what to clone (since we are doing this in-place).
-  Block::iterator srcBlockEnd = std::prev(forOp.getBody()->end(), 2);
+/// Unrolls 'forOp' by 'unrollFactor', returns success if the loop is unrolled.
+LogicalResult mlir::loopUnrollByFactor(loop::ForOp forOp,
+                                       uint64_t unrollFactor) {
+  assert(unrollFactor > 0 && "expected positive unroll factor");
+  if (unrollFactor == 1)
+    return promoteIfSingleIteration(forOp);
 
-  // Unroll the contents of 'forOp' (append unrollFactor - 1 additional copies).
-  auto forOpIV = forOp.getInductionVar();
-  for (unsigned i = 1; i < unrollFactor; i++) {
-    BlockAndValueMapping operandMap;
+  // Return if the loop body is empty.
+  if (llvm::hasSingleElement(forOp.getBody()->getOperations()))
+    return success();
 
-    // If the induction variable is used, create a remapping to the value for
-    // this unrolled instance.
-    if (!forOpIV.use_empty()) {
-      // iv' = iv + 1/2/3...unrollFactor-1;
-      auto d0 = builder.getAffineDimExpr(0);
-      auto bumpMap = AffineMap::get(1, 0, d0 + i * step);
-      auto ivUnroll =
-          builder.create<AffineApplyOp>(forOp.getLoc(), bumpMap, forOpIV);
-      operandMap.map(forOpIV, ivUnroll);
-    }
+  // Compute tripCount = ceilDiv((upperBound - lowerBound), step) and populate
+  // 'upperBoundUnrolled' and 'stepUnrolled' for static and dynamic cases.
+  OpBuilder boundsBuilder(forOp);
+  auto loc = forOp.getLoc();
+  auto step = forOp.step();
+  Value upperBoundUnrolled;
+  Value stepUnrolled;
+  bool generateEpilogueLoop = true;
+
+  auto lbCstOp =
+      dyn_cast_or_null<ConstantIndexOp>(forOp.lowerBound().getDefiningOp());
+  auto ubCstOp =
+      dyn_cast_or_null<ConstantIndexOp>(forOp.upperBound().getDefiningOp());
+  auto stepCstOp =
+      dyn_cast_or_null<ConstantIndexOp>(forOp.step().getDefiningOp());
+  if (lbCstOp && ubCstOp && stepCstOp) {
+    // Constant loop bounds computation.
+    int64_t lbCst = lbCstOp.getValue();
+    int64_t ubCst = ubCstOp.getValue();
+    int64_t stepCst = stepCstOp.getValue();
+    assert(lbCst >= 0 && ubCst >= 0 && stepCst >= 0 &&
+           "expected positive loop bounds and step");
+    int64_t tripCount = mlir::ceilDiv(ubCst - lbCst, stepCst);
+    int64_t tripCountEvenMultiple = tripCount - (tripCount % unrollFactor);
+    int64_t upperBoundUnrolledCst = lbCst + tripCountEvenMultiple * stepCst;
+    assert(upperBoundUnrolledCst <= ubCst);
+    int64_t stepUnrolledCst = stepCst * unrollFactor;
+
+    // Create constant for 'upperBoundUnrolled' and set epilogue loop flag.
+    generateEpilogueLoop = upperBoundUnrolledCst < ubCst;
+    if (generateEpilogueLoop)
+      upperBoundUnrolled =
+          boundsBuilder.create<ConstantIndexOp>(loc, upperBoundUnrolledCst);
+    else
+      upperBoundUnrolled = ubCstOp;
+
+    // Create constant for 'stepUnrolled'.
+    stepUnrolled =
+        stepCst == stepUnrolledCst
+            ? step
+            : boundsBuilder.create<ConstantIndexOp>(loc, stepUnrolledCst);
+  } else {
+    // Dynamic loop bounds computation.
+    // TODO(andydavis) Add dynamic asserts for negative lb/ub/step, or
+    // consider using ceilDiv from AffineApplyExpander.
+    auto lowerBound = forOp.lowerBound();
+    auto upperBound = forOp.upperBound();
+    Value 
diff  = boundsBuilder.create<SubIOp>(loc, upperBound, lowerBound);
+    Value tripCount = ceilDivPositive(boundsBuilder, loc, 
diff , step);
+    Value unrollFactorCst =
+        boundsBuilder.create<ConstantIndexOp>(loc, unrollFactor);
+    Value tripCountRem =
+        boundsBuilder.create<SignedRemIOp>(loc, tripCount, unrollFactorCst);
+    // Compute tripCountEvenMultiple = tripCount - (tripCount % unrollFactor)
+    Value tripCountEvenMultiple =
+        boundsBuilder.create<SubIOp>(loc, tripCount, tripCountRem);
+    // Compute upperBoundUnrolled = lowerBound + tripCountEvenMultiple * step
+    upperBoundUnrolled = boundsBuilder.create<AddIOp>(
+        loc, lowerBound,
+        boundsBuilder.create<MulIOp>(loc, tripCountEvenMultiple, step));
+    // Scale 'step' by 'unrollFactor'.
+    stepUnrolled = boundsBuilder.create<MulIOp>(loc, step, unrollFactorCst);
+  }
 
-    // Clone the original body of 'forOp'.
-    for (auto it = forOp.getBody()->begin(); it != std::next(srcBlockEnd);
-         it++) {
-      builder.clone(*it, operandMap);
-    }
+  // Create epilogue clean up loop starting at 'upperBoundUnrolled'.
+  if (generateEpilogueLoop) {
+    OpBuilder epilogueBuilder(forOp.getOperation()->getBlock(),
+                              std::next(Block::iterator(forOp)));
+    auto epilogueForOp = cast<loop::ForOp>(epilogueBuilder.clone(*forOp));
+    epilogueForOp.setLowerBound(upperBoundUnrolled);
+    promoteIfSingleIteration(epilogueForOp);
   }
 
+  // Create unrolled loop.
+  forOp.setUpperBound(upperBoundUnrolled);
+  forOp.setStep(stepUnrolled);
+  generateUnrolledLoop(forOp.getBody(), forOp.getInductionVar(), unrollFactor,
+                       [&](unsigned i, Value iv, OpBuilder b) {
+                         // iv' = iv + step * i;
+                         auto stride = b.create<MulIOp>(
+                             loc, step, b.create<ConstantIndexOp>(loc, i));
+                         return b.create<AddIOp>(loc, iv, stride);
+                       });
   // Promote the loop body up if this has turned into a single iteration loop.
   promoteIfSingleIteration(forOp);
   return success();
@@ -1032,34 +1197,6 @@ Loops mlir::tilePerfectlyNested(loop::ForOp rootForOp, ArrayRef<Value> sizes) {
   return ::tile(forOps, sizes, forOps.back());
 }
 
-// Build the IR that performs ceil division of a positive value by a constant:
-//    ceildiv(a, B) = divis(a + (B-1), B)
-// where divis is rounding-to-zero division.
-static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend,
-                             int64_t divisor) {
-  assert(divisor > 0 && "expected positive divisor");
-  assert(dividend.getType().isIndex() && "expected index-typed value");
-
-  Value divisorMinusOneCst = builder.create<ConstantIndexOp>(loc, divisor - 1);
-  Value divisorCst = builder.create<ConstantIndexOp>(loc, divisor);
-  Value sum = builder.create<AddIOp>(loc, dividend, divisorMinusOneCst);
-  return builder.create<SignedDivIOp>(loc, sum, divisorCst);
-}
-
-// Build the IR that performs ceil division of a positive value by another
-// positive value:
-//    ceildiv(a, b) = divis(a + (b - 1), b)
-// where divis is rounding-to-zero division.
-static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend,
-                             Value divisor) {
-  assert(dividend.getType().isIndex() && "expected index-typed value");
-
-  Value cstOne = builder.create<ConstantIndexOp>(loc, 1);
-  Value divisorMinusOne = builder.create<SubIOp>(loc, divisor, cstOne);
-  Value sum = builder.create<AddIOp>(loc, dividend, divisorMinusOne);
-  return builder.create<SignedDivIOp>(loc, sum, divisor);
-}
-
 // Hoist the ops within `outer` that appear before `inner`.
 // Such ops include the ops that have been introduced by parametric tiling.
 // Ops that come from triangular loops (i.e. that belong to the program slice

diff  --git a/mlir/test/Dialect/Loops/loop-unroll.mlir b/mlir/test/Dialect/Loops/loop-unroll.mlir
new file mode 100644
index 000000000000..fa3ebc173e51
--- /dev/null
+++ b/mlir/test/Dialect/Loops/loop-unroll.mlir
@@ -0,0 +1,250 @@
+// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2' | FileCheck %s --check-prefix UNROLL-BY-2
+// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=3' | FileCheck %s --check-prefix UNROLL-BY-3
+// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2 loop-depth=0' | FileCheck %s --check-prefix UNROLL-OUTER-BY-2
+// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2 loop-depth=1' | FileCheck %s --check-prefix UNROLL-INNER-BY-2
+
+func @dynamic_loop_unroll(%arg0 : index, %arg1 : index, %arg2 : index,
+                          %arg3: memref<?xf32>) {
+  %0 = constant 7.0 : f32
+  loop.for %i0 = %arg0 to %arg1 step %arg2 {
+    store %0, %arg3[%i0] : memref<?xf32>
+  }
+  return
+}
+// UNROLL-BY-2-LABEL: func @dynamic_loop_unroll
+//  UNROLL-BY-2-SAME:  %[[LB:.*0]]: index,
+//  UNROLL-BY-2-SAME:  %[[UB:.*1]]: index,
+//  UNROLL-BY-2-SAME:  %[[STEP:.*2]]: index,
+//  UNROLL-BY-2-SAME:  %[[MEM:.*3]]: memref<?xf32>
+//
+//   UNROLL-BY-2-DAG:  %[[V0:.*]] = subi %[[UB]], %[[LB]] : index
+//   UNROLL-BY-2-DAG:  %[[C1:.*]] = constant 1 : index
+//   UNROLL-BY-2-DAG:  %[[V1:.*]] = subi %[[STEP]], %[[C1]] : index
+//   UNROLL-BY-2-DAG:  %[[V2:.*]] = addi %[[V0]], %[[V1]] : index
+//       Compute trip count in V3.
+//   UNROLL-BY-2-DAG:  %[[V3:.*]] = divi_signed %[[V2]], %[[STEP]] : index
+//       Store unroll factor in C2.
+//   UNROLL-BY-2-DAG:  %[[C2:.*]] = constant 2 : index
+//   UNROLL-BY-2-DAG:  %[[V4:.*]] = remi_signed %[[V3]], %[[C2]] : index
+//   UNROLL-BY-2-DAG:  %[[V5:.*]] = subi %[[V3]], %[[V4]] : index
+//   UNROLL-BY-2-DAG:  %[[V6:.*]] = muli %[[V5]], %[[STEP]] : index
+//       Compute upper bound of unrolled loop in V7.
+//   UNROLL-BY-2-DAG:  %[[V7:.*]] = addi %[[LB]], %[[V6]] : index
+//       Compute step of unrolled loop in V8.
+//   UNROLL-BY-2-DAG:  %[[V8:.*]] = muli %[[STEP]], %[[C2]] : index
+//       UNROLL-BY-2:  loop.for %[[IV:.*]] = %[[LB]] to %[[V7]] step %[[V8]] {
+//  UNROLL-BY-2-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
+//  UNROLL-BY-2-NEXT:    %[[C1_IV:.*]] = constant 1 : index
+//  UNROLL-BY-2-NEXT:    %[[V9:.*]] = muli %[[STEP]], %[[C1_IV]] : index
+//  UNROLL-BY-2-NEXT:    %[[V10:.*]] = addi %[[IV]], %[[V9]] : index
+//  UNROLL-BY-2-NEXT:    store %{{.*}}, %[[MEM]][%[[V10]]] : memref<?xf32>
+//  UNROLL-BY-2-NEXT:  }
+//  UNROLL-BY-2-NEXT:  loop.for %[[IV:.*]] = %[[V7]] to %[[UB]] step %[[STEP]] {
+//  UNROLL-BY-2-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
+//  UNROLL-BY-2-NEXT:  }
+//  UNROLL-BY-2-NEXT:  return
+
+// UNROLL-BY-3-LABEL: func @dynamic_loop_unroll
+//  UNROLL-BY-3-SAME:  %[[LB:.*0]]: index,
+//  UNROLL-BY-3-SAME:  %[[UB:.*1]]: index,
+//  UNROLL-BY-3-SAME:  %[[STEP:.*2]]: index,
+//  UNROLL-BY-3-SAME:  %[[MEM:.*3]]: memref<?xf32>
+//
+//   UNROLL-BY-3-DAG:  %[[V0:.*]] = subi %[[UB]], %[[LB]] : index
+//   UNROLL-BY-3-DAG:  %[[C1:.*]] = constant 1 : index
+//   UNROLL-BY-3-DAG:  %[[V1:.*]] = subi %[[STEP]], %[[C1]] : index
+//   UNROLL-BY-3-DAG:  %[[V2:.*]] = addi %[[V0]], %[[V1]] : index
+//       Compute trip count in V3.
+//   UNROLL-BY-3-DAG:  %[[V3:.*]] = divi_signed %[[V2]], %[[STEP]] : index
+//       Store unroll factor in C3.
+//   UNROLL-BY-3-DAG:  %[[C3:.*]] = constant 3 : index
+//   UNROLL-BY-3-DAG:  %[[V4:.*]] = remi_signed %[[V3]], %[[C3]] : index
+//   UNROLL-BY-3-DAG:  %[[V5:.*]] = subi %[[V3]], %[[V4]] : index
+//   UNROLL-BY-3-DAG:  %[[V6:.*]] = muli %[[V5]], %[[STEP]] : index
+//       Compute upper bound of unrolled loop in V7.
+//   UNROLL-BY-3-DAG:  %[[V7:.*]] = addi %[[LB]], %[[V6]] : index
+//       Compute step of unrolled loop in V8.
+//   UNROLL-BY-3-DAG:  %[[V8:.*]] = muli %[[STEP]], %[[C3]] : index
+//       UNROLL-BY-3:  loop.for %[[IV:.*]] = %[[LB]] to %[[V7]] step %[[V8]] {
+//  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
+//  UNROLL-BY-3-NEXT:    %[[C1_IV:.*]] = constant 1 : index
+//  UNROLL-BY-3-NEXT:    %[[V9:.*]] = muli %[[STEP]], %[[C1_IV]] : index
+//  UNROLL-BY-3-NEXT:    %[[V10:.*]] = addi %[[IV]], %[[V9]] : index
+//  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[V10]]] : memref<?xf32>
+//  UNROLL-BY-3-NEXT:    %[[C2_IV:.*]] = constant 2 : index
+//  UNROLL-BY-3-NEXT:    %[[V11:.*]] = muli %[[STEP]], %[[C2_IV]] : index
+//  UNROLL-BY-3-NEXT:    %[[V12:.*]] = addi %[[IV]], %[[V11]] : index
+//  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[V12]]] : memref<?xf32>
+//  UNROLL-BY-3-NEXT:  }
+//  UNROLL-BY-3-NEXT:  loop.for %[[IV:.*]] = %[[V7]] to %[[UB]] step %[[STEP]] {
+//  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
+//  UNROLL-BY-3-NEXT:  }
+//  UNROLL-BY-3-NEXT:  return
+
+func @dynamic_loop_unroll_outer_by_2(
+  %arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index,
+  %arg5 : index, %arg6: memref<?xf32>) {
+  %0 = constant 7.0 : f32
+  loop.for %i0 = %arg0 to %arg1 step %arg2 {
+    loop.for %i1 = %arg3 to %arg4 step %arg5 {
+     store %0, %arg6[%i1] : memref<?xf32>
+    }
+  }
+  return
+}
+// UNROLL-OUTER-BY-2-LABEL: func @dynamic_loop_unroll_outer_by_2
+//  UNROLL-OUTER-BY-2-SAME:  %[[LB0:.*0]]: index,
+//  UNROLL-OUTER-BY-2-SAME:  %[[UB0:.*1]]: index,
+//  UNROLL-OUTER-BY-2-SAME:  %[[STEP0:.*2]]: index,
+//  UNROLL-OUTER-BY-2-SAME:  %[[LB1:.*3]]: index,
+//  UNROLL-OUTER-BY-2-SAME:  %[[UB1:.*4]]: index,
+//  UNROLL-OUTER-BY-2-SAME:  %[[STEP1:.*5]]: index,
+//  UNROLL-OUTER-BY-2-SAME:  %[[MEM:.*6]]: memref<?xf32>
+//
+//       UNROLL-OUTER-BY-2:  loop.for %[[IV0:.*]] = %[[LB0]] to %{{.*}} step %{{.*}} {
+//  UNROLL-OUTER-BY-2-NEXT:    loop.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
+//  UNROLL-OUTER-BY-2-NEXT:      store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
+//  UNROLL-OUTER-BY-2-NEXT:    }
+//  UNROLL-OUTER-BY-2-NEXT:    loop.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
+//  UNROLL-OUTER-BY-2-NEXT:      store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
+//  UNROLL-OUTER-BY-2-NEXT:    }
+//  UNROLL-OUTER-BY-2-NEXT:  }
+//  UNROLL-OUTER-BY-2-NEXT:  loop.for %[[IV0:.*]] = %{{.*}} to %[[UB0]] step %[[STEP0]] {
+//  UNROLL-OUTER-BY-2-NEXT:    loop.for %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
+//  UNROLL-OUTER-BY-2-NEXT:      store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
+//  UNROLL-OUTER-BY-2-NEXT:    }
+//  UNROLL-OUTER-BY-2-NEXT:  }
+//  UNROLL-OUTER-BY-2-NEXT:  return
+
+func @dynamic_loop_unroll_inner_by_2(
+  %arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index,
+  %arg5 : index, %arg6: memref<?xf32>) {
+  %0 = constant 7.0 : f32
+  loop.for %i0 = %arg0 to %arg1 step %arg2 {
+    loop.for %i1 = %arg3 to %arg4 step %arg5 {
+     store %0, %arg6[%i1] : memref<?xf32>
+    }
+  }
+  return
+}
+// UNROLL-INNER-BY-2-LABEL: func @dynamic_loop_unroll_inner_by_2
+//  UNROLL-INNER-BY-2-SAME:  %[[LB0:.*0]]: index,
+//  UNROLL-INNER-BY-2-SAME:  %[[UB0:.*1]]: index,
+//  UNROLL-INNER-BY-2-SAME:  %[[STEP0:.*2]]: index,
+//  UNROLL-INNER-BY-2-SAME:  %[[LB1:.*3]]: index,
+//  UNROLL-INNER-BY-2-SAME:  %[[UB1:.*4]]: index,
+//  UNROLL-INNER-BY-2-SAME:  %[[STEP1:.*5]]: index,
+//  UNROLL-INNER-BY-2-SAME:  %[[MEM:.*6]]: memref<?xf32>
+//
+//       UNROLL-INNER-BY-2:  loop.for %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
+//       UNROLL-INNER-BY-2:    loop.for %[[IV1:.*]] = %[[LB1]] to %{{.*}} step %{{.*}} {
+//  UNROLL-INNER-BY-2-NEXT:      store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
+//  UNROLL-INNER-BY-2-NEXT:      %[[C1_IV:.*]] = constant 1 : index
+//  UNROLL-INNER-BY-2-NEXT:      %[[V0:.*]] = muli %[[STEP1]], %[[C1_IV]] : index
+//  UNROLL-INNER-BY-2-NEXT:      %[[V1:.*]] = addi %[[IV1]], %[[V0]] : index
+//  UNROLL-INNER-BY-2-NEXT:      store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32>
+//  UNROLL-INNER-BY-2-NEXT:    }
+//  UNROLL-INNER-BY-2-NEXT:    loop.for %[[IV1:.*]] = %{{.*}} to %[[UB1]] step %[[STEP1]] {
+//  UNROLL-INNER-BY-2-NEXT:      store %{{.*}}, %[[MEM]][%[[IV1]]] : memref<?xf32>
+//  UNROLL-INNER-BY-2-NEXT:    }
+//  UNROLL-INNER-BY-2-NEXT:  }
+//  UNROLL-INNER-BY-2-NEXT:  return
+
+// Test that no epilogue clean-up loop is generated because the trip count is
+// a multiple of the unroll factor.
+func @static_loop_unroll_by_2(%arg0 : memref<?xf32>) {
+  %0 = constant 7.0 : f32
+  %lb = constant 0 : index
+  %ub = constant 20 : index
+  %step = constant 1 : index
+  loop.for %i0 = %lb to %ub step %step {
+    store %0, %arg0[%i0] : memref<?xf32>
+  }
+  return
+}
+// UNROLL-BY-2-LABEL: func @static_loop_unroll_by_2
+//  UNROLL-BY-2-SAME:  %[[MEM:.*0]]: memref<?xf32>
+//
+//   UNROLL-BY-2-DAG:  %[[C0:.*]] = constant 0 : index
+//   UNROLL-BY-2-DAG:  %[[C1:.*]] = constant 1 : index
+//   UNROLL-BY-2-DAG:  %[[C20:.*]] = constant 20 : index
+//   UNROLL-BY-2-DAG:  %[[C2:.*]] = constant 2 : index
+//   UNROLL-BY-2:  loop.for %[[IV:.*]] = %[[C0]] to %[[C20]] step %[[C2]] {
+//  UNROLL-BY-2-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
+//  UNROLL-BY-2-NEXT:    %[[C1_IV:.*]] = constant 1 : index
+//  UNROLL-BY-2-NEXT:    %[[V0:.*]] = muli %[[C1]], %[[C1_IV]] : index
+//  UNROLL-BY-2-NEXT:    %[[V1:.*]] = addi %[[IV]], %[[V0]] : index
+//  UNROLL-BY-2-NEXT:    store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32>
+//  UNROLL-BY-2-NEXT:  }
+//  UNROLL-BY-2-NEXT:  return
+
+// Test that epilogue clean up loop is generated (trip count is not
+// a multiple of unroll factor).
+func @static_loop_unroll_by_3(%arg0 : memref<?xf32>) {
+  %0 = constant 7.0 : f32
+  %lb = constant 0 : index
+  %ub = constant 20 : index
+  %step = constant 1 : index
+  loop.for %i0 = %lb to %ub step %step {
+    store %0, %arg0[%i0] : memref<?xf32>
+  }
+  return
+}
+
+// UNROLL-BY-3-LABEL: func @static_loop_unroll_by_3
+//  UNROLL-BY-3-SAME:  %[[MEM:.*0]]: memref<?xf32>
+//
+//   UNROLL-BY-3-DAG:  %[[C0:.*]] = constant 0 : index
+//   UNROLL-BY-3-DAG:  %[[C1:.*]] = constant 1 : index
+//   UNROLL-BY-3-DAG:  %[[C20:.*]] = constant 20 : index
+//   UNROLL-BY-3-DAG:  %[[C18:.*]] = constant 18 : index
+//   UNROLL-BY-3-DAG:  %[[C3:.*]] = constant 3 : index
+//       UNROLL-BY-3: loop.for %[[IV:.*]] = %[[C0]] to %[[C18]] step %[[C3]] {
+//  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
+//  UNROLL-BY-3-NEXT:    %[[C1_IV:.*]] = constant 1 : index
+//  UNROLL-BY-3-NEXT:    %[[V0:.*]] = muli %[[C1]], %[[C1_IV]] : index
+//  UNROLL-BY-3-NEXT:    %[[V1:.*]] = addi %[[IV]], %[[V0]] : index
+//  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32>
+//  UNROLL-BY-3-NEXT:    %[[C2_IV:.*]] = constant 2 : index
+//  UNROLL-BY-3-NEXT:    %[[V2:.*]] = muli %[[C1]], %[[C2_IV]] : index
+//  UNROLL-BY-3-NEXT:    %[[V3:.*]] = addi %[[IV]], %[[V2]] : index
+//  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[V3]]] : memref<?xf32>
+//  UNROLL-BY-3-NEXT:  }
+//  UNROLL-BY-3-NEXT:  loop.for %[[IV:.*]] = %[[C18]] to %[[C20]] step %[[C1]] {
+//  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
+//  UNROLL-BY-3-NEXT:  }
+//  UNROLL-BY-3-NEXT:  return
+
+// Test that the single iteration epilogue loop body is promoted to the loops
+// containing block.
+func @static_loop_unroll_by_3_promote_epilogue(%arg0 : memref<?xf32>) {
+  %0 = constant 7.0 : f32
+  %lb = constant 0 : index
+  %ub = constant 10 : index
+  %step = constant 1 : index
+  loop.for %i0 = %lb to %ub step %step {
+    store %0, %arg0[%i0] : memref<?xf32>
+  }
+  return
+}
+// UNROLL-BY-3-LABEL: func @static_loop_unroll_by_3_promote_epilogue
+//  UNROLL-BY-3-SAME:  %[[MEM:.*0]]: memref<?xf32>
+//
+//   UNROLL-BY-3-DAG:  %[[C0:.*]] = constant 0 : index
+//   UNROLL-BY-3-DAG:  %[[C1:.*]] = constant 1 : index
+//   UNROLL-BY-3-DAG:  %[[C10:.*]] = constant 10 : index
+//   UNROLL-BY-3-DAG:  %[[C9:.*]] = constant 9 : index
+//   UNROLL-BY-3-DAG:  %[[C3:.*]] = constant 3 : index
+//       UNROLL-BY-3: loop.for %[[IV:.*]] = %[[C0]] to %[[C9]] step %[[C3]] {
+//  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[IV]]] : memref<?xf32>
+//  UNROLL-BY-3-NEXT:    %[[C1_IV:.*]] = constant 1 : index
+//  UNROLL-BY-3-NEXT:    %[[V0:.*]] = muli %[[C1]], %[[C1_IV]] : index
+//  UNROLL-BY-3-NEXT:    %[[V1:.*]] = addi %[[IV]], %[[V0]] : index
+//  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32>
+//  UNROLL-BY-3-NEXT:    %[[C2_IV:.*]] = constant 2 : index
+//  UNROLL-BY-3-NEXT:    %[[V2:.*]] = muli %[[C1]], %[[C2_IV]] : index
+//  UNROLL-BY-3-NEXT:    %[[V3:.*]] = addi %[[IV]], %[[V2]] : index
+//  UNROLL-BY-3-NEXT:    store %{{.*}}, %[[MEM]][%[[V3]]] : memref<?xf32>
+//  UNROLL-BY-3-NEXT:  }
+//  UNROLL-BY-3-NEXT:  store %{{.*}}, %[[MEM]][%[[C9]]] : memref<?xf32>
+//  UNROLL-BY-3-NEXT:  return

diff  --git a/mlir/test/lib/Transforms/CMakeLists.txt b/mlir/test/lib/Transforms/CMakeLists.txt
index 248da51bcec7..e7b31b3d0bcf 100644
--- a/mlir/test/lib/Transforms/CMakeLists.txt
+++ b/mlir/test/lib/Transforms/CMakeLists.txt
@@ -14,6 +14,7 @@ add_mlir_library(MLIRTestTransforms
   TestLiveness.cpp
   TestLoopMapping.cpp
   TestLoopParametricTiling.cpp
+  TestLoopUnrolling.cpp
   TestOpaqueLoc.cpp
   TestMemRefBoundCheck.cpp
   TestMemRefDependenceCheck.cpp

diff  --git a/mlir/test/lib/Transforms/TestLoopUnrolling.cpp b/mlir/test/lib/Transforms/TestLoopUnrolling.cpp
new file mode 100644
index 000000000000..7cd221f37f8c
--- /dev/null
+++ b/mlir/test/lib/Transforms/TestLoopUnrolling.cpp
@@ -0,0 +1,68 @@
+//===-------- TestLoopUnrolling.cpp --- loop unrolling test pass ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass to unroll loops by a specified unroll factor.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/LoopOps/LoopOps.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/LoopUtils.h"
+#include "mlir/Transforms/Passes.h"
+
+using namespace mlir;
+
+namespace {
+
+static unsigned getNestingDepth(Operation *op) {
+  Operation *currOp = op;
+  unsigned depth = 0;
+  while ((currOp = currOp->getParentOp())) {
+    if (isa<loop::ForOp>(currOp))
+      depth++;
+  }
+  return depth;
+}
+
+class TestLoopUnrollingPass
+    : public PassWrapper<TestLoopUnrollingPass, FunctionPass> {
+public:
+  TestLoopUnrollingPass() = default;
+  TestLoopUnrollingPass(const TestLoopUnrollingPass &) {}
+  explicit TestLoopUnrollingPass(uint64_t unrollFactorParam,
+                                 unsigned loopDepthParam) {
+    unrollFactor = unrollFactorParam;
+    loopDepth = loopDepthParam;
+  }
+
+  void runOnFunction() override {
+    FuncOp func = getFunction();
+    SmallVector<loop::ForOp, 4> loops;
+    func.walk([&](loop::ForOp forOp) {
+      if (getNestingDepth(forOp) == loopDepth)
+        loops.push_back(forOp);
+    });
+    for (auto loop : loops) {
+      loopUnrollByFactor(loop, unrollFactor);
+    }
+  }
+  Option<uint64_t> unrollFactor{*this, "unroll-factor",
+                                llvm::cl::desc("Loop unroll factor."),
+                                llvm::cl::init(1)};
+  Option<unsigned> loopDepth{*this, "loop-depth", llvm::cl::desc("Loop depth."),
+                             llvm::cl::init(0)};
+};
+} // end namespace
+
+namespace mlir {
+void registerTestLoopUnrollingPass() {
+  PassRegistration<TestLoopUnrollingPass>(
+      "test-loop-unrolling", "Tests loop unrolling transformation");
+}
+} // namespace mlir

diff  --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp
index c5cc533ab119..9d583dc2a319 100644
--- a/mlir/tools/mlir-opt/mlir-opt.cpp
+++ b/mlir/tools/mlir-opt/mlir-opt.cpp
@@ -53,6 +53,7 @@ void registerTestLinalgTransforms();
 void registerTestLivenessPass();
 void registerTestLoopFusion();
 void registerTestLoopMappingPass();
+void registerTestLoopUnrollingPass();
 void registerTestMatchers();
 void registerTestMemRefDependenceCheck();
 void registerTestMemRefStrideCalculation();
@@ -119,6 +120,7 @@ void registerTestPasses() {
   registerTestLivenessPass();
   registerTestLoopFusion();
   registerTestLoopMappingPass();
+  registerTestLoopUnrollingPass();
   registerTestMatchers();
   registerTestMemRefDependenceCheck();
   registerTestMemRefStrideCalculation();


        


More information about the Mlir-commits mailing list