[Mlir-commits] [mlir] 68316af - [MLIR][NFC] loop transforms/analyis utils cleanup / modernize

Wed Apr 1 10:11:53 PDT 2020

Author: Uday Bondhugula
Date: 2020-04-01T22:36:25+05:30
New Revision: 68316afb29ca5bda0538bc69f96eebdff3954b48

URL: https://github.com/llvm/llvm-project/commit/68316afb29ca5bda0538bc69f96eebdff3954b48
DIFF: https://github.com/llvm/llvm-project/commit/68316afb29ca5bda0538bc69f96eebdff3954b48.diff

LOG: [MLIR][NFC] loop transforms/analyis utils cleanup / modernize

Modernize/cleanup code in loop transforms utils - a lot of this code was
written prior to the currently available IR support / code style. This
patch also does some variable renames including inst -> op, comment
updates, turns getCleanupLoopLowerBound into a local function.

Differential Revision: https://reviews.llvm.org/D77175

Added: 
    

Modified: 
    mlir/include/mlir/Analysis/LoopAnalysis.h
    mlir/include/mlir/Transforms/LoopUtils.h
    mlir/lib/Analysis/LoopAnalysis.cpp
    mlir/lib/Transforms/PipelineDataTransfer.cpp
    mlir/lib/Transforms/Utils/LoopUtils.cpp

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Analysis/LoopAnalysis.h b/mlir/include/mlir/Analysis/LoopAnalysis.h
index 5141df736a9c..7ed19ef99f87 100644

--- a/mlir/include/mlir/Analysis/LoopAnalysis.h
+++ b/mlir/include/mlir/Analysis/LoopAnalysis.h
@@ -82,7 +82,7 @@ bool isVectorizableLoopBody(AffineForOp loop, int *memRefDim,
 /// 'def' and all its uses have the same shift factor.
 // TODO(mlir-team): extend this to check for memory-based dependence
 // violation when we have the support.
-bool isInstwiseShiftValid(AffineForOp forOp, ArrayRef<uint64_t> shifts);
+bool isOpwiseShiftValid(AffineForOp forOp, ArrayRef<uint64_t> shifts);
 } // end namespace mlir
 
 #endif // MLIR_ANALYSIS_LOOP_ANALYSIS_H

diff  --git a/mlir/include/mlir/Transforms/LoopUtils.h b/mlir/include/mlir/Transforms/LoopUtils.h
index c18f64572fcd..3a5a6e52c54f 100644
--- a/mlir/include/mlir/Transforms/LoopUtils.h
+++ b/mlir/include/mlir/Transforms/LoopUtils.h
@@ -72,20 +72,11 @@ LogicalResult promoteIfSingleIteration(AffineForOp forOp);
 /// their body into the containing Block.
 void promoteSingleIterationLoops(FuncOp f);
 
-/// Computes the cleanup loop lower bound of the loop being unrolled with
-/// the specified unroll factor; this bound will also be upper bound of the main
-/// part of the unrolled loop. Computes the bound as an AffineMap with its
-/// operands or a null map when the trip count can't be expressed as an affine
-/// expression.
-void getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
-                              AffineMap *map, SmallVectorImpl<Value> *operands,
-                              OpBuilder &builder);
-
-/// Skew the operations in the body of an affine.for operation with the
-/// specified operation-wise shifts. The shifts are with respect to the
-/// original execution order, and are multiplied by the loop 'step' before being
-/// applied. If `unrollPrologueEpilogue` is set, fully unroll the prologue and
-/// epilogue loops when possible.
+/// Skew the operations in an affine.for's body with the specified
+/// operation-wise shifts. The shifts are with respect to the original execution
+/// order, and are multiplied by the loop 'step' before being applied. If
+/// `unrollPrologueEpilogue` is set, fully unroll the prologue and epilogue
+/// loops when possible.
 LLVM_NODISCARD
 LogicalResult affineForOpBodySkew(AffineForOp forOp, ArrayRef<uint64_t> shifts,
                                   bool unrollPrologueEpilogue = false);

diff  --git a/mlir/lib/Analysis/LoopAnalysis.cpp b/mlir/lib/Analysis/LoopAnalysis.cpp
index 9b5725188fae..40e176367d18 100644
--- a/mlir/lib/Analysis/LoopAnalysis.cpp
+++ b/mlir/lib/Analysis/LoopAnalysis.cpp
@@ -30,9 +30,6 @@ using namespace mlir;
 /// expression is simplified before returning. This method only utilizes map
 /// composition to construct lower and upper bounds before computing the trip
 /// count expressions.
-// TODO(mlir-team): this should be moved into 'Transforms/' and be replaced by a
-// pure analysis method relying on FlatAffineConstraints; the latter will also
-// be more powerful (since both inequalities and equalities will be considered).
 void mlir::buildTripCountMapAndOperands(
     AffineForOp forOp, AffineMap *tripCountMap,
     SmallVectorImpl<Value> *tripCountOperands) {
@@ -270,8 +267,8 @@ static bool isContiguousAccess(Value iv, LoadOrStoreOp memoryOp,
   return true;
 }
 
-template <typename LoadOrStoreOpPointer>
-static bool isVectorElement(LoadOrStoreOpPointer memoryOp) {
+template <typename LoadOrStoreOp>
+static bool isVectorElement(LoadOrStoreOp memoryOp) {
   auto memRefType = memoryOp.getMemRefType();
   return memRefType.getElementType().template isa<VectorType>();
 }
@@ -351,7 +348,7 @@ bool mlir::isVectorizableLoopBody(AffineForOp loop,
 /// 'def' and all its uses have the same shift factor.
 // TODO(mlir-team): extend this to check for memory-based dependence violation
 // when we have the support.
-bool mlir::isInstwiseShiftValid(AffineForOp forOp, ArrayRef<uint64_t> shifts) {
+bool mlir::isOpwiseShiftValid(AffineForOp forOp, ArrayRef<uint64_t> shifts) {
   auto *forBody = forOp.getBody();
   assert(shifts.size() == forBody->getOperations().size());
 

diff  --git a/mlir/lib/Transforms/PipelineDataTransfer.cpp b/mlir/lib/Transforms/PipelineDataTransfer.cpp
index 4ca3620b9d28..4c1030336540 100644
--- a/mlir/lib/Transforms/PipelineDataTransfer.cpp
+++ b/mlir/lib/Transforms/PipelineDataTransfer.cpp
@@ -345,7 +345,7 @@ void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) {
       instShiftMap[&op] = 1;
 
   // Get shifts stored in map.
-  std::vector<uint64_t> shifts(forOp.getBody()->getOperations().size());
+  SmallVector<uint64_t, 8> shifts(forOp.getBody()->getOperations().size());
   unsigned s = 0;
   for (auto &op : forOp.getBody()->without_terminator()) {
     assert(instShiftMap.find(&op) != instShiftMap.end());
@@ -358,7 +358,7 @@ void PipelineDataTransfer::runOnAffineForOp(AffineForOp forOp) {
     });
   }
 
-  if (!isInstwiseShiftValid(forOp, shifts)) {
+  if (!isOpwiseShiftValid(forOp, shifts)) {
     // Violates dependences.
     LLVM_DEBUG(llvm::dbgs() << "Shifts invalid - unexpected\n";);
     return;

diff  --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp
index 90209ecbf89f..235becc92628 100644
--- a/mlir/lib/Transforms/Utils/LoopUtils.cpp
+++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp
@@ -53,15 +53,14 @@ struct LoopParams {
 /// part of the unrolled loop. Computes the bound as an AffineMap with its
 /// operands or a null map when the trip count can't be expressed as an affine
 /// expression.
-void mlir::getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
-                                    AffineMap *map,
-                                    SmallVectorImpl<Value> *operands,
-                                    OpBuilder &b) {
+static void getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
+                                     AffineMap &map,
+                                     SmallVectorImpl<Value> &operands) {
   auto lbMap = forOp.getLowerBoundMap();
 
   // Single result lower bound map only.
   if (lbMap.getNumResults() != 1) {
-    *map = AffineMap();
+    map = AffineMap();
     return;
   }
 
@@ -71,11 +70,11 @@ void mlir::getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
 
   // Sometimes the trip count cannot be expressed as an affine expression.
   if (!tripCountMap) {
-    *map = AffineMap();
+    map = AffineMap();
     return;
   }
 
-  unsigned step = forOp.getStep();
+  OpBuilder b(forOp);
   auto lb = b.create<AffineApplyOp>(forOp.getLoc(), lbMap,
                                     forOp.getLowerBoundOperands());
 
@@ -86,6 +85,7 @@ void mlir::getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
   // these affine.apply's make up the cleanup loop lower bound.
   SmallVector<AffineExpr, 4> bumpExprs(tripCountMap.getNumResults());
   SmallVector<Value, 4> bumpValues(tripCountMap.getNumResults());
+  int64_t step = forOp.getStep();
   for (unsigned i = 0, e = tripCountMap.getNumResults(); i < e; i++) {
     auto tripCountExpr = tripCountMap.getResult(i);
     bumpExprs[i] = (tripCountExpr - tripCountExpr % unrollFactor) * step;
@@ -99,19 +99,19 @@ void mlir::getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
   for (unsigned i = 0, e = bumpExprs.size(); i < e; i++)
     newUbExprs[i] = b.getAffineDimExpr(0) + b.getAffineDimExpr(i + 1);
 
-  operands->clear();
-  operands->push_back(lb);
-  operands->append(bumpValues.begin(), bumpValues.end());
-  *map = AffineMap::get(1 + tripCountMap.getNumResults(), 0, newUbExprs);
+  operands.clear();
+  operands.push_back(lb);
+  operands.append(bumpValues.begin(), bumpValues.end());
+  map = AffineMap::get(1 + tripCountMap.getNumResults(), 0, newUbExprs);
   // Simplify the map + operands.
-  fullyComposeAffineMapAndOperands(map, operands);
-  *map = simplifyAffineMap(*map);
-  canonicalizeMapAndOperands(map, operands);
+  fullyComposeAffineMapAndOperands(&map, &operands);
+  map = simplifyAffineMap(map);
+  canonicalizeMapAndOperands(&map, &operands);
   // Remove any affine.apply's that became dead from the simplification above.
-  for (auto v : bumpValues) {
+  for (auto v : bumpValues)
     if (v.use_empty())
       v.getDefiningOp()->erase();
-  }
+
   if (lb.use_empty())
     lb.erase();
 }
@@ -121,16 +121,15 @@ void mlir::getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
 // TODO(bondhugula): extend this for arbitrary affine bounds.
 LogicalResult mlir::promoteIfSingleIteration(AffineForOp forOp) {
   Optional<uint64_t> tripCount = getConstantTripCount(forOp);
-  if (!tripCount.hasValue() || tripCount.getValue() != 1)
+  if (!tripCount || tripCount.getValue() != 1)
     return failure();
 
-  // TODO(mlir-team): there is no builder for a max.
   if (forOp.getLowerBoundMap().getNumResults() != 1)
     return failure();
 
   // Replaces all IV uses to its single iteration value.
   auto iv = forOp.getInductionVar();
-  Operation *op = forOp.getOperation();
+  auto *parentBlock = forOp.getOperation()->getBlock();
   if (!iv.use_empty()) {
     if (forOp.hasConstantLowerBound()) {
       OpBuilder topBuilder(forOp.getParentOfType<FuncOp>().getBody());
@@ -140,7 +139,7 @@ LogicalResult mlir::promoteIfSingleIteration(AffineForOp forOp) {
     } else {
       auto lbOperands = forOp.getLowerBoundOperands();
       auto lbMap = forOp.getLowerBoundMap();
-      OpBuilder builder(op->getBlock(), Block::iterator(op));
+      OpBuilder builder(parentBlock, Block::iterator(forOp));
       if (lbMap == builder.getDimIdentityMap()) {
         // No need of generating an affine.apply.
         iv.replaceAllUsesWith(lbOperands[0]);
@@ -151,17 +150,16 @@ LogicalResult mlir::promoteIfSingleIteration(AffineForOp forOp) {
       }
     }
   }
-  // Move the loop body operations, except for terminator, to the loop's
+  // Move the loop body operations, except for its terminator, to the loop's
   // containing block.
-  auto *block = op->getBlock();
-  forOp.getBody()->getOperations().back().erase();
-  block->getOperations().splice(Block::iterator(op),
-                                forOp.getBody()->getOperations());
+  forOp.getBody()->back().erase();
+  parentBlock->getOperations().splice(Block::iterator(forOp),
+                                      forOp.getBody()->getOperations());
   forOp.erase();
   return success();
 }
 
-/// Promotes all single iteration for op's in the FuncOp, i.e., moves
+/// Promotes all single iteration 'for' ops in `f`, i.e., moves
 /// their body into the containing Block.
 void mlir::promoteSingleIterationLoops(FuncOp f) {
   // Gathers all innermost loops through a post order pruned walk.
@@ -233,6 +231,8 @@ static AffineForOp generateShiftedLoop(
 LogicalResult mlir::affineForOpBodySkew(AffineForOp forOp,
                                         ArrayRef<uint64_t> shifts,
                                         bool unrollPrologueEpilogue) {
+  assert(forOp.getBody()->getOperations().size() == shifts.size() &&
+         "too few/many shifts");
   if (forOp.getBody()->begin() == std::prev(forOp.getBody()->end()))
     return success();
 
@@ -247,20 +247,17 @@ LogicalResult mlir::affineForOpBodySkew(AffineForOp forOp,
   }
   uint64_t tripCount = mayBeConstTripCount.getValue();
 
-  assert(isInstwiseShiftValid(forOp, shifts) &&
+  assert(isOpwiseShiftValid(forOp, shifts) &&
          "shifts will lead to an invalid transformation\n");
 
   int64_t step = forOp.getStep();
 
-  unsigned numChildInsts = forOp.getBody()->getOperations().size();
+  unsigned numChildOps = shifts.size();
 
   // Do a linear time (counting) sort for the shifts.
-  uint64_t maxShift = 0;
-  for (unsigned i = 0; i < numChildInsts; i++) {
-    maxShift = std::max(maxShift, shifts[i]);
-  }
-  // Such large shifts are not the typical use case.
-  if (maxShift >= numChildInsts) {
+  uint64_t maxShift = *std::max_element(shifts.begin(), shifts.end());
+  if (maxShift >= numChildOps) {
+    // Large shifts are not the typical use case.
     forOp.emitWarning("not shifting because shifts are unrealistically large");
     return success();
   }
@@ -289,13 +286,13 @@ LogicalResult mlir::affineForOpBodySkew(AffineForOp forOp,
 
   auto origLbMap = forOp.getLowerBoundMap();
   uint64_t lbShift = 0;
-  OpBuilder b(forOp.getOperation());
+  OpBuilder b(forOp);
   for (uint64_t d = 0, e = sortedOpGroups.size(); d < e; ++d) {
     // If nothing is shifted by d, continue.
     if (sortedOpGroups[d].empty())
       continue;
     if (!opGroupQueue.empty()) {
-      assert(d >= 1 &&
+      assert(d > 0 &&
              "Queue expected to be empty when the first block is found");
       // The interval for which the loop needs to be generated here is:
       // [lbShift, min(lbShift + tripCount, d)) and the body of the
@@ -343,8 +340,7 @@ LogicalResult mlir::affineForOpBodySkew(AffineForOp forOp,
 
   if (unrollPrologueEpilogue && prologue)
     loopUnrollFull(prologue);
-  if (unrollPrologueEpilogue && !epilogue &&
-      epilogue.getOperation() != prologue.getOperation())
+  if (unrollPrologueEpilogue && !epilogue && epilogue != prologue)
     loopUnrollFull(epilogue);
 
   return success();
@@ -389,9 +385,8 @@ LogicalResult mlir::loopUnrollFull(AffineForOp forOp) {
   Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
   if (mayBeConstantTripCount.hasValue()) {
     uint64_t tripCount = mayBeConstantTripCount.getValue();
-    if (tripCount == 1) {
+    if (tripCount == 1)
       return promoteIfSingleIteration(forOp);
-    }
     return loopUnrollByFactor(forOp, tripCount);
   }
   return failure();
@@ -413,14 +408,14 @@ LogicalResult mlir::loopUnrollUpToFactor(AffineForOp forOp,
 /// is successfully unrolled.
 LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp,
                                        uint64_t unrollFactor) {
-  assert(unrollFactor >= 1 && "unroll factor should be >= 1");
+  assert(unrollFactor > 0 && "unroll factor should be positive");
 
   if (unrollFactor == 1)
     return promoteIfSingleIteration(forOp);
 
-  if (forOp.getBody()->empty() ||
-      forOp.getBody()->begin() == std::prev(forOp.getBody()->end()))
-    return failure();
+  // Nothing in the loop body other than the terminator.
+  if (has_single_element(forOp.getBody()->getOperations()))
+    return success();
 
   // Loops where the lower bound is a max expression isn't supported for
   // unrolling since the trip count can be expressed as an affine function when
@@ -438,20 +433,19 @@ LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp,
     return failure();
 
   // Generate the cleanup loop if trip count isn't a multiple of unrollFactor.
-  Operation *op = forOp.getOperation();
   if (getLargestDivisorOfTripCount(forOp) % unrollFactor != 0) {
-    OpBuilder builder(op->getBlock(), ++Block::iterator(op));
-    auto cleanupForInst = cast<AffineForOp>(builder.clone(*op));
+    OpBuilder builder(forOp.getOperation()->getBlock(),
+                      std::next(Block::iterator(forOp)));
+    auto cleanupForOp = cast<AffineForOp>(builder.clone(*forOp));
     AffineMap cleanupMap;
     SmallVector<Value, 4> cleanupOperands;
-    getCleanupLoopLowerBound(forOp, unrollFactor, &cleanupMap, &cleanupOperands,
-                             builder);
+    getCleanupLoopLowerBound(forOp, unrollFactor, cleanupMap, cleanupOperands);
     assert(cleanupMap &&
            "cleanup loop lower bound map for single result lower bound maps "
            "can always be determined");
-    cleanupForInst.setLowerBound(cleanupOperands, cleanupMap);
+    cleanupForOp.setLowerBound(cleanupOperands, cleanupMap);
     // Promote the loop body up if this has turned into a single iteration loop.
-    promoteIfSingleIteration(cleanupForInst);
+    promoteIfSingleIteration(cleanupForOp);
 
     // Adjust upper bound of the original loop; this is the same as the lower
     // bound of the cleanup loop.
@@ -470,7 +464,7 @@ LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp,
   // so that we know what to clone (since we are doing this in-place).
   Block::iterator srcBlockEnd = std::prev(forOp.getBody()->end(), 2);
 
-  // Unroll the contents of 'forOp' (append unrollFactor-1 additional copies).
+  // Unroll the contents of 'forOp' (append unrollFactor - 1 additional copies).
   auto forOpIV = forOp.getInductionVar();
   for (unsigned i = 1; i < unrollFactor; i++) {
     BlockAndValueMapping operandMap;
@@ -501,7 +495,6 @@ LogicalResult mlir::loopUnrollByFactor(AffineForOp forOp,
 LogicalResult mlir::loopUnrollJamUpToFactor(AffineForOp forOp,
                                             uint64_t unrollJamFactor) {
   Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
-
   if (mayBeConstantTripCount.hasValue() &&
       mayBeConstantTripCount.getValue() < unrollJamFactor)
     return loopUnrollJamByFactor(forOp, mayBeConstantTripCount.getValue());
@@ -524,6 +517,7 @@ LogicalResult mlir::loopUnrollJamByFactor(AffineForOp forOp,
         for (auto &block : region)
           walk(block);
     }
+
     void walk(Block &block) {
       for (auto it = block.begin(), e = std::prev(block.end()); it != e;) {
         auto subBlockStart = it;
@@ -531,21 +525,21 @@ LogicalResult mlir::loopUnrollJamByFactor(AffineForOp forOp,
           ++it;
         if (it != subBlockStart)
           subBlocks.push_back({subBlockStart, std::prev(it)});
-        // Process all for insts that appear next.
+        // Process all for ops that appear next.
         while (it != e && isa<AffineForOp>(&*it))
           walk(&*it++);
       }
     }
   };
 
-  assert(unrollJamFactor >= 1 && "unroll jam factor should be >= 1");
+  assert(unrollJamFactor > 0 && "unroll jam factor should be positive");
 
   if (unrollJamFactor == 1)
     return promoteIfSingleIteration(forOp);
 
-  if (forOp.getBody()->empty() ||
-      forOp.getBody()->begin() == std::prev(forOp.getBody()->end()))
-    return failure();
+  // Nothing in the loop body other than the terminator.
+  if (has_single_element(forOp.getBody()->getOperations()))
+    return success();
 
   // Loops where both lower and upper bounds are multi-result maps won't be
   // unrolled (since the trip can't be expressed as an affine function in
@@ -559,28 +553,29 @@ LogicalResult mlir::loopUnrollJamByFactor(AffineForOp forOp,
   Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
   // If the trip count is lower than the unroll jam factor, no unroll jam.
   if (mayBeConstantTripCount.hasValue() &&
-      mayBeConstantTripCount.getValue() < unrollJamFactor)
+      mayBeConstantTripCount.getValue() < unrollJamFactor) {
+    LLVM_DEBUG(llvm::dbgs() << "[failed] trip count < unroll-jam factor\n");
     return failure();
-
-  auto *forInst = forOp.getOperation();
+  }
 
   // Gather all sub-blocks to jam upon the loop being unrolled.
   JamBlockGatherer jbg;
-  jbg.walk(forInst);
+  jbg.walk(forOp);
   auto &subBlocks = jbg.subBlocks;
 
   // Generate the cleanup loop if trip count isn't a multiple of
   // unrollJamFactor.
   if (getLargestDivisorOfTripCount(forOp) % unrollJamFactor != 0) {
     // Insert the cleanup loop right after 'forOp'.
-    OpBuilder builder(forInst->getBlock(), std::next(Block::iterator(forInst)));
-    auto cleanupAffineForOp = cast<AffineForOp>(builder.clone(*forInst));
+    OpBuilder builder(forOp.getOperation()->getBlock(),
+                      std::next(Block::iterator(forOp)));
+    auto cleanupAffineForOp = cast<AffineForOp>(builder.clone(*forOp));
     // Adjust the lower bound of the cleanup loop; its upper bound is the same
     // as the original loop's upper bound.
     AffineMap cleanupMap;
     SmallVector<Value, 4> cleanupOperands;
-    getCleanupLoopLowerBound(forOp, unrollJamFactor, &cleanupMap,
-                             &cleanupOperands, builder);
+    getCleanupLoopLowerBound(forOp, unrollJamFactor, cleanupMap,
+                             cleanupOperands);
     cleanupAffineForOp.setLowerBound(cleanupOperands, cleanupMap);
 
     // Promote the cleanup loop if it has turned into a single iteration loop.
@@ -599,7 +594,7 @@ LogicalResult mlir::loopUnrollJamByFactor(AffineForOp forOp,
   // Unroll and jam (appends unrollJamFactor - 1 additional copies).
   for (unsigned i = unrollJamFactor - 1; i >= 1; --i) {
     // Operand map persists across all sub-blocks.
-    BlockAndValueMapping operandMapping;
+    BlockAndValueMapping operandMap;
     for (auto &subBlock : subBlocks) {
       // Builder to insert unroll-jammed bodies. Insert right at the end of
       // sub-block.
@@ -612,13 +607,12 @@ LogicalResult mlir::loopUnrollJamByFactor(AffineForOp forOp,
         auto d0 = builder.getAffineDimExpr(0);
         auto bumpMap = AffineMap::get(1, 0, {d0 + i * step});
         auto ivUnroll =
-            builder.create<AffineApplyOp>(forInst->getLoc(), bumpMap, forOpIV);
-        operandMapping.map(forOpIV, ivUnroll);
+            builder.create<AffineApplyOp>(forOp.getLoc(), bumpMap, forOpIV);
+        operandMap.map(forOpIV, ivUnroll);
       }
       // Clone the sub-block being unroll-jammed.
-      for (auto it = subBlock.first; it != std::next(subBlock.second); ++it) {
-        builder.clone(*it, operandMapping);
-      }
+      for (auto it = subBlock.first; it != std::next(subBlock.second); ++it)
+        builder.clone(*it, operandMap);
     }
   }
 
@@ -630,8 +624,6 @@ LogicalResult mlir::loopUnrollJamByFactor(AffineForOp forOp,
 /// Performs loop interchange on 'forOpA' and 'forOpB', where 'forOpB' is
 /// nested within 'forOpA' as the only non-terminator operation in its block.
 void mlir::interchangeLoops(AffineForOp forOpA, AffineForOp forOpB) {
-  auto *forOpAInst = forOpA.getOperation();
-
   assert(&*forOpA.getBody()->begin() == forOpB.getOperation());
   auto &forOpABody = forOpA.getBody()->getOperations();
   auto &forOpBBody = forOpB.getBody()->getOperations();
@@ -639,16 +631,17 @@ void mlir::interchangeLoops(AffineForOp forOpA, AffineForOp forOpB) {
   // 1) Splice forOpA's non-terminator operations (which is just forOpB) just
   // before forOpA (in ForOpA's parent's block) this should leave 'forOpA's
   // body containing only the terminator.
-  forOpAInst->getBlock()->getOperations().splice(Block::iterator(forOpAInst),
-                                                 forOpABody, forOpABody.begin(),
-                                                 std::prev(forOpABody.end()));
+  forOpA.getOperation()->getBlock()->getOperations().splice(
+      Block::iterator(forOpA), forOpABody, forOpABody.begin(),
+      std::prev(forOpABody.end()));
   // 2) Splice forOpB's non-terminator operations into the beginning of forOpA's
   // body (this leaves forOpB's body containing only the terminator).
   forOpABody.splice(forOpABody.begin(), forOpBBody, forOpBBody.begin(),
                     std::prev(forOpBBody.end()));
   // 3) Splice forOpA into the beginning of forOpB's body.
-  forOpBBody.splice(forOpBBody.begin(), forOpAInst->getBlock()->getOperations(),
-                    Block::iterator(forOpAInst));
+  forOpBBody.splice(forOpBBody.begin(),
+                    forOpA.getOperation()->getBlock()->getOperations(),
+                    Block::iterator(forOpA));
 }
 
 // Checks each dependence component against the permutation to see if the
@@ -873,8 +866,8 @@ stripmineSink(AffineForOp forOp, uint64_t factor,
   auto scaledStep = originalStep * factor;
   forOp.setStep(scaledStep);
 
-  auto *op = forOp.getOperation();
-  OpBuilder b(op->getBlock(), ++Block::iterator(op));
+  OpBuilder b(forOp.getOperation()->getBlock(),
+              std::next(Block::iterator(forOp)));
 
   // Lower-bound map creation.
   auto lbMap = forOp.getLowerBoundMap();
@@ -1180,7 +1173,6 @@ static LoopParams normalizeLoop(OpBuilder &boundsBuilder,
   if (auto stepCst = dyn_cast_or_null<ConstantIndexOp>(step.getDefiningOp()))
     isStepOne = stepCst.getValue() == 1;
 
-
   // Compute the number of iterations the loop executes: ceildiv(ub - lb, step)
   // assuming the step is strictly positive.  Update the bounds and the step
   // of the loop to go from 0 to the number of iterations, if necessary.
@@ -1823,16 +1815,16 @@ static LogicalResult generateCopy(
 
 /// Construct the memref region to just include the entire memref. Returns false
 /// dynamic shaped memref's for now. `numParamLoopIVs` is the number of
-/// enclosing loop IVs of opInst (starting from the outermost) that the region
+/// enclosing loop IVs of `op` (starting from the outermost) that the region
 /// is parametric on.
-static bool getFullMemRefAsRegion(Operation *opInst, unsigned numParamLoopIVs,
+static bool getFullMemRefAsRegion(Operation *op, unsigned numParamLoopIVs,
                                   MemRefRegion *region) {
   unsigned rank;
-  if (auto loadOp = dyn_cast<AffineLoadOp>(opInst)) {
+  if (auto loadOp = dyn_cast<AffineLoadOp>(op)) {
     rank = loadOp.getMemRefType().getRank();
     region->memref = loadOp.getMemRef();
     region->setWrite(false);
-  } else if (auto storeOp = dyn_cast<AffineStoreOp>(opInst)) {
+  } else if (auto storeOp = dyn_cast<AffineStoreOp>(op)) {
     rank = storeOp.getMemRefType().getRank();
     region->memref = storeOp.getMemRef();
     region->setWrite(true);
@@ -1849,7 +1841,7 @@ static bool getFullMemRefAsRegion(Operation *opInst, unsigned numParamLoopIVs,
   // Just get the first numSymbols IVs, which the memref region is parametric
   // on.
   SmallVector<AffineForOp, 4> ivs;
-  getLoopIVs(*opInst, &ivs);
+  getLoopIVs(*op, &ivs);
   ivs.resize(numParamLoopIVs);
   SmallVector<Value, 4> symbols;
   extractForInductionVars(ivs, &symbols);