[Mlir-commits] [mlir] Fix the condition for peeling the first iteration (PR #86350)

Fri Mar 22 15:14:40 PDT 2024

https://github.com/yzhang93 created https://github.com/llvm/llvm-project/pull/86350

This PR fixes the condition used in loop peeling of the first iteration. Using ceilDiv instead of floorDiv when calculating the loop counts, so that the first iteration gets peeled as needed.

>From aa19f072f4f7975f98d8997d7924432393eac558 Mon Sep 17 00:00:00 2001
From: yzhang93 <zhyuhang88 at gmail.com>
Date: Fri, 22 Mar 2024 15:03:38 -0700
Subject: [PATCH] Fix the condition for peeling the first iteration

---
 .../SCF/Transforms/LoopSpecialization.cpp     |  2 +-
 .../Dialect/SCF/for-loop-peeling-front.mlir   | 47 +++++++++++++++++--
 2 files changed, 44 insertions(+), 5 deletions(-)

diff --git a/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp b/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp
index a5bff0a892c3df..a30e349d49136c 100644
--- a/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp
@@ -220,7 +220,7 @@ LogicalResult mlir::scf::peelForLoopFirstIteration(RewriterBase &b, ForOp forOp,
   auto stepInt = getConstantIntValue(forOp.getStep());
 
   // Peeling is not needed if there is one or less iteration.
-  if (lbInt && ubInt && stepInt && (*ubInt - *lbInt) / *stepInt <= 1)
+  if (lbInt && ubInt && stepInt && ceil(float(*ubInt - *lbInt) / *stepInt) <= 1)
     return failure();
 
   AffineExpr lbSymbol, stepSymbol;
diff --git a/mlir/test/Dialect/SCF/for-loop-peeling-front.mlir b/mlir/test/Dialect/SCF/for-loop-peeling-front.mlir
index 65141ff7623ff2..fe3b3e686a3e4a 100644
--- a/mlir/test/Dialect/SCF/for-loop-peeling-front.mlir
+++ b/mlir/test/Dialect/SCF/for-loop-peeling-front.mlir
@@ -13,11 +13,11 @@
 //      CHECK:     %[[INIT:.*]] = arith.addi %[[ACC]], %[[CAST]] : i32
 //      CHECK:     scf.yield %[[INIT]]
 //      CHECK:   }
-//      CHECK:   %[[RESULT:.*]] = scf.for %[[IV:.*]] = %[[C4]] to %[[C17]]
-// CHECK-SAME:       step %[[C4]] iter_args(%[[ACC:.*]] = %[[FIRST]]) -> (i32) {
-//      CHECK:     %[[MIN2:.*]] = affine.min #[[MAP]](%[[C17]], %[[IV]])[%[[C4]]]
+//      CHECK:   %[[RESULT:.*]] = scf.for %[[IV2:.*]] = %[[C4]] to %[[C17]]
+// CHECK-SAME:       step %[[C4]] iter_args(%[[ACC2:.*]] = %[[FIRST]]) -> (i32) {
+//      CHECK:     %[[MIN2:.*]] = affine.min #[[MAP]](%[[C17]], %[[IV2]])[%[[C4]]]
 //      CHECK:     %[[CAST2:.*]] = arith.index_cast %[[MIN2]] : index to i32
-//      CHECK:     %[[ADD:.*]] = arith.addi %[[ACC]], %[[CAST2]] : i32
+//      CHECK:     %[[ADD:.*]] = arith.addi %[[ACC2]], %[[CAST2]] : i32
 //      CHECK:     scf.yield %[[ADD]]
 //      CHECK:   }
 //      CHECK:   return %[[RESULT]]
@@ -110,6 +110,45 @@ func.func @fully_dynamic_bounds(%lb : index, %ub: index, %step: index) -> i32 {
 
 // -----
 
+//  CHECK-DAG: #[[MAP:.*]] = affine_map<(d0, d1)[s0] -> (4, d0 - d1)>
+//      CHECK: func @two_iteration_example(
+//  CHECK-DAG:   %[[C0_I32:.*]] = arith.constant 0 : i32
+//  CHECK-DAG:   %[[C2:.*]] = arith.constant 2 : index
+//  CHECK-DAG:   %[[C4:.*]] = arith.constant 4 : index
+//  CHECK-DAG:   %[[C8:.*]] = arith.constant 8 : index
+//  CHECK-DAG:   %[[C6:.*]] = arith.constant 6 : index
+//      CHECK:   %[[FIRST:.*]] = scf.for %[[IV:.*]] = %[[C2]] to %[[C6]]
+// CHECK-SAME:       step %[[C4]] iter_args(%[[ACC:.*]] = %[[C0_I32]]) -> (i32) {
+//      CHECK:     %[[MIN:.*]] = affine.min #[[MAP]](%[[C6]], %[[IV]])[%[[C4]]]
+//      CHECK:     %[[CAST:.*]] = arith.index_cast %[[MIN]] : index to i32
+//      CHECK:     %[[INIT:.*]] = arith.addi %[[ACC]], %[[CAST]] : i32
+//      CHECK:     scf.yield %[[INIT]]
+//      CHECK:   }
+//      CHECK:   %[[RESULT:.*]] = scf.for %[[IV2:.*]] = %[[C6]] to %[[C8]]
+// CHECK-SAME:       step %[[C4]] iter_args(%[[ACC2:.*]] = %[[FIRST]]) -> (i32) {
+//      CHECK:     %[[MIN2:.*]] = affine.min #[[MAP]](%[[C8]], %[[IV2]])[%[[C4]]]
+//      CHECK:     %[[CAST2:.*]] = arith.index_cast %[[MIN2]] : index to i32
+//      CHECK:     %[[ADD:.*]] = arith.addi %[[ACC2]], %[[CAST2]] : i32
+//      CHECK:     scf.yield %[[ADD]]
+//      CHECK:   }
+//      CHECK:   return %[[RESULT]]
+#map = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)>
+func.func @two_iteration_example() -> i32 {
+  %c0_i32 = arith.constant 0 : i32
+  %lb = arith.constant 2 : index
+  %step = arith.constant 4 : index
+  %ub = arith.constant 8 : index
+  %r = scf.for %iv = %lb to %ub step %step iter_args(%arg = %c0_i32) -> i32 {
+    %s = affine.min #map(%ub, %iv)[%step]
+    %casted = arith.index_cast %s : index to i32
+    %0 = arith.addi %arg, %casted : i32
+    scf.yield %0 : i32
+  }
+  return %r : i32
+}
+
+// -----
+
 //  CHECK-DAG: #[[MAP:.*]] = affine_map<(d0, d1)[s0] -> (4, d0 - d1)>
 //      CHECK: func @no_peeling_front(
 //  CHECK-DAG:   %[[C0_I32:.*]] = arith.constant 0 : i32