[Mlir-commits] [mlir] 817d343 - [MLIR] Fix tilePerfectlyNested utility for handling non-unit step size

Mon Feb 22 11:20:48 PST 2021

Author: Vivek
Date: 2021-02-23T00:50:04+05:30
New Revision: 817d343fb048b330b77a7b4ef0af0dba7450dfb2

URL: https://github.com/llvm/llvm-project/commit/817d343fb048b330b77a7b4ef0af0dba7450dfb2
DIFF: https://github.com/llvm/llvm-project/commit/817d343fb048b330b77a7b4ef0af0dba7450dfb2.diff

LOG: [MLIR] Fix tilePerfectlyNested utility for handling non-unit step size

The current implementation of tilePerfectlyNested utility doesn't handle
the non-unit step size. We have added support to perform tiling
correctly even if the step size of the loop to be tiled is non-unit.
Fixes https://bugs.llvm.org/show_bug.cgi?id=49188.

Differential Revision: https://reviews.llvm.org/D97037

Added: 
    

Modified: 
    mlir/lib/Transforms/Utils/LoopUtils.cpp
    mlir/test/Dialect/Affine/loop-tiling.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp
index a8c32c84d1241..77d24fb0c1617 100644

--- a/mlir/lib/Transforms/Utils/LoopUtils.cpp
+++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp
@@ -848,7 +848,9 @@ constructTiledIndexSetHyperRect(MutableArrayRef<AffineForOp> origLoops,
     OperandRange newUbOperands = origLoops[i].getUpperBoundOperands();
     newLoops[i].setLowerBound(newLbOperands, origLoops[i].getLowerBoundMap());
     newLoops[i].setUpperBound(newUbOperands, origLoops[i].getUpperBoundMap());
-    newLoops[i].setStep(tileSizes[i]);
+    // If the step size of original loop is x and tileSize is y then after
+    // tiling the tile space loops' step size becomes x*y.
+    newLoops[i].setStep(tileSizes[i] * origLoops[i].getStep());
   }
   // Bounds for intra-tile loops.
   for (unsigned i = 0; i < width; i++) {
@@ -858,20 +860,23 @@ constructTiledIndexSetHyperRect(MutableArrayRef<AffineForOp> origLoops,
     AffineMap lbMap = b.getDimIdentityMap();
     newLoops[width + i].setLowerBound(
         /*operands=*/newLoops[i].getInductionVar(), lbMap);
+    // The step sizes of intra-tile loops is just the original loops' step size.
+    newLoops[width + i].setStep(origLoops[i].getStep());
 
     // Set the upper bound.
     if (mayBeConstantCount && mayBeConstantCount.getValue() < tileSizes[i]) {
       // Trip count is less than the tile size: upper bound is lower bound +
-      // trip count.
-      AffineMap ubMap =
-          b.getSingleDimShiftAffineMap(mayBeConstantCount.getValue());
+      // trip count * stepSize.
+      AffineMap ubMap = b.getSingleDimShiftAffineMap(
+          mayBeConstantCount.getValue() * origLoops[i].getStep());
       newLoops[width + i].setUpperBound(
           /*operands=*/newLoops[i].getInductionVar(), ubMap);
     } else if (largestDiv % tileSizes[i] != 0) {
-      // Intra-tile loop ii goes from i to min(i + tileSize, ub_i).
+      // Intra-tile loop ii goes from i to min(i + tileSize * stepSize, ub_i).
       // Construct the upper bound map; the operands are the original operands
       // with 'i' (tile-space loop) appended to it. The new upper bound map is
-      // the original one with an additional expression i + tileSize appended.
+      // the original one with an additional expression i + tileSize * stepSize
+      // appended.
 
       // Add dim operands from original upper bound.
       SmallVector<Value, 4> ubOperands;
@@ -892,8 +897,8 @@ constructTiledIndexSetHyperRect(MutableArrayRef<AffineForOp> origLoops,
       boundExprs.reserve(1 + origUbMap.getNumResults());
       AffineExpr dim = b.getAffineDimExpr(origUbMap.getNumDims());
       // The new upper bound map is the original one with an additional
-      // expression i + tileSize appended.
-      boundExprs.push_back(dim + tileSizes[i]);
+      // expression i + tileSize * stepSize (of original loop) appended.
+      boundExprs.push_back(dim + tileSizes[i] * origLoops[i].getStep());
       boundExprs.append(origUbMap.getResults().begin(),
                         origUbMap.getResults().end());
       AffineMap ubMap =
@@ -903,7 +908,8 @@ constructTiledIndexSetHyperRect(MutableArrayRef<AffineForOp> origLoops,
     } else {
       // No need of the min expression.
       AffineExpr dim = b.getAffineDimExpr(0);
-      AffineMap ubMap = AffineMap::get(1, 0, dim + tileSizes[i]);
+      AffineMap ubMap =
+          AffineMap::get(1, 0, dim + tileSizes[i] * origLoops[i].getStep());
       newLoops[width + i].setUpperBound(newLoops[i].getInductionVar(), ubMap);
     }
   }

diff  --git a/mlir/test/Dialect/Affine/loop-tiling.mlir b/mlir/test/Dialect/Affine/loop-tiling.mlir
index 67ca6ba9b69cb..0b6542f08e534 100644
--- a/mlir/test/Dialect/Affine/loop-tiling.mlir
+++ b/mlir/test/Dialect/Affine/loop-tiling.mlir
@@ -174,6 +174,24 @@ func @tile_with_loop_upper_bounds_in_two_symbols(%arg0: memref<?xf32>, %limit: i
 
 // -----
 
+// CHECK-DAG:  #[[$ID:.*]] = affine_map<(d0) -> (d0)>
+// CHECK-DAG:  [[$UBMAP:#map[0-9]+]] = affine_map<(d0)[s0] -> (d0 + 160, s0)>
+
+func @tile_loop_with_non_unit_step(%arg0 : memref<50xf32>, %arg1 : index) {
+  affine.for %i = 0 to %arg1 step 5 {
+    affine.load %arg0[%i] : memref<50xf32>
+  }
+  return
+}
+
+// CHECK-LABEL: func @tile_loop_with_non_unit_step(%arg{{.*}}: memref<50xf32>, %arg{{.*}}: index)
+// CHECK:     affine.for %[[I:.*]] = 0 to %[[N:.*]] step 160 {
+// CHECK-NEXT:       affine.for %[[II:.*]] = [[$ID:.*]](%[[I]]) to min
+// [[$UBMAP]](%[[I]])[%[[N]]] step 5 {
+// CHECK-NEXT:         affine.load %arg{{.*}}[%arg{{.*}}] : memref<50xf32>
+
+// -----
+
 func @tile_size_larger_than_trip_count_symbolic_bound(%M: index, %N :  index) {
   affine.for %i = affine_map<(d0) -> (d0)>(%M) to affine_map<(d0) -> (d0 + 2)>(%M) {
     affine.for %j = affine_map<(d0) -> (d0)>(%N) to affine_map<(d0) -> (d0 + 4)>(%N) {