[Mlir-commits] [mlir] Fix the condition for peeling the first iteration (PR #86350)

Fri Mar 22 15:15:08 PDT 2024

llvmbot wrote:



@llvm/pr-subscribers-mlir-scf

@llvm/pr-subscribers-mlir

Author: Vivian (yzhang93)

<details>
<summary>Changes</summary>

This PR fixes the condition used in loop peeling of the first iteration. Using ceilDiv instead of floorDiv when calculating the loop counts, so that the first iteration gets peeled as needed.

---
Full diff: https://github.com/llvm/llvm-project/pull/86350.diff


2 Files Affected:

- (modified) mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp (+1-1) 
- (modified) mlir/test/Dialect/SCF/for-loop-peeling-front.mlir (+43-4) 


``````````diff

diff --git a/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp b/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp
index a5bff0a892c3df..a30e349d49136c 100644
--- a/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp
@@ -220,7 +220,7 @@ LogicalResult mlir::scf::peelForLoopFirstIteration(RewriterBase &b, ForOp forOp,
   auto stepInt = getConstantIntValue(forOp.getStep());
 
   // Peeling is not needed if there is one or less iteration.
-  if (lbInt && ubInt && stepInt && (*ubInt - *lbInt) / *stepInt <= 1)
+  if (lbInt && ubInt && stepInt && ceil(float(*ubInt - *lbInt) / *stepInt) <= 1)
     return failure();
 
   AffineExpr lbSymbol, stepSymbol;
diff --git a/mlir/test/Dialect/SCF/for-loop-peeling-front.mlir b/mlir/test/Dialect/SCF/for-loop-peeling-front.mlir
index 65141ff7623ff2..fe3b3e686a3e4a 100644
--- a/mlir/test/Dialect/SCF/for-loop-peeling-front.mlir
+++ b/mlir/test/Dialect/SCF/for-loop-peeling-front.mlir
@@ -13,11 +13,11 @@
 //      CHECK:     %[[INIT:.*]] = arith.addi %[[ACC]], %[[CAST]] : i32
 //      CHECK:     scf.yield %[[INIT]]
 //      CHECK:   }
-//      CHECK:   %[[RESULT:.*]] = scf.for %[[IV:.*]] = %[[C4]] to %[[C17]]
-// CHECK-SAME:       step %[[C4]] iter_args(%[[ACC:.*]] = %[[FIRST]]) -> (i32) {
-//      CHECK:     %[[MIN2:.*]] = affine.min #[[MAP]](%[[C17]], %[[IV]])[%[[C4]]]
+//      CHECK:   %[[RESULT:.*]] = scf.for %[[IV2:.*]] = %[[C4]] to %[[C17]]
+// CHECK-SAME:       step %[[C4]] iter_args(%[[ACC2:.*]] = %[[FIRST]]) -> (i32) {
+//      CHECK:     %[[MIN2:.*]] = affine.min #[[MAP]](%[[C17]], %[[IV2]])[%[[C4]]]
 //      CHECK:     %[[CAST2:.*]] = arith.index_cast %[[MIN2]] : index to i32
-//      CHECK:     %[[ADD:.*]] = arith.addi %[[ACC]], %[[CAST2]] : i32
+//      CHECK:     %[[ADD:.*]] = arith.addi %[[ACC2]], %[[CAST2]] : i32
 //      CHECK:     scf.yield %[[ADD]]
 //      CHECK:   }
 //      CHECK:   return %[[RESULT]]
@@ -110,6 +110,45 @@ func.func @fully_dynamic_bounds(%lb : index, %ub: index, %step: index) -> i32 {
 
 // -----
 
+//  CHECK-DAG: #[[MAP:.*]] = affine_map<(d0, d1)[s0] -> (4, d0 - d1)>
+//      CHECK: func @two_iteration_example(
+//  CHECK-DAG:   %[[C0_I32:.*]] = arith.constant 0 : i32
+//  CHECK-DAG:   %[[C2:.*]] = arith.constant 2 : index
+//  CHECK-DAG:   %[[C4:.*]] = arith.constant 4 : index
+//  CHECK-DAG:   %[[C8:.*]] = arith.constant 8 : index
+//  CHECK-DAG:   %[[C6:.*]] = arith.constant 6 : index
+//      CHECK:   %[[FIRST:.*]] = scf.for %[[IV:.*]] = %[[C2]] to %[[C6]]
+// CHECK-SAME:       step %[[C4]] iter_args(%[[ACC:.*]] = %[[C0_I32]]) -> (i32) {
+//      CHECK:     %[[MIN:.*]] = affine.min #[[MAP]](%[[C6]], %[[IV]])[%[[C4]]]
+//      CHECK:     %[[CAST:.*]] = arith.index_cast %[[MIN]] : index to i32
+//      CHECK:     %[[INIT:.*]] = arith.addi %[[ACC]], %[[CAST]] : i32
+//      CHECK:     scf.yield %[[INIT]]
+//      CHECK:   }
+//      CHECK:   %[[RESULT:.*]] = scf.for %[[IV2:.*]] = %[[C6]] to %[[C8]]
+// CHECK-SAME:       step %[[C4]] iter_args(%[[ACC2:.*]] = %[[FIRST]]) -> (i32) {
+//      CHECK:     %[[MIN2:.*]] = affine.min #[[MAP]](%[[C8]], %[[IV2]])[%[[C4]]]
+//      CHECK:     %[[CAST2:.*]] = arith.index_cast %[[MIN2]] : index to i32
+//      CHECK:     %[[ADD:.*]] = arith.addi %[[ACC2]], %[[CAST2]] : i32
+//      CHECK:     scf.yield %[[ADD]]
+//      CHECK:   }
+//      CHECK:   return %[[RESULT]]
+#map = affine_map<(d0, d1)[s0] -> (s0, d0 - d1)>
+func.func @two_iteration_example() -> i32 {
+  %c0_i32 = arith.constant 0 : i32
+  %lb = arith.constant 2 : index
+  %step = arith.constant 4 : index
+  %ub = arith.constant 8 : index
+  %r = scf.for %iv = %lb to %ub step %step iter_args(%arg = %c0_i32) -> i32 {
+    %s = affine.min #map(%ub, %iv)[%step]
+    %casted = arith.index_cast %s : index to i32
+    %0 = arith.addi %arg, %casted : i32
+    scf.yield %0 : i32
+  }
+  return %r : i32
+}
+
+// -----
+
 //  CHECK-DAG: #[[MAP:.*]] = affine_map<(d0, d1)[s0] -> (4, d0 - d1)>
 //      CHECK: func @no_peeling_front(
 //  CHECK-DAG:   %[[C0_I32:.*]] = arith.constant 0 : i32

``````````

</details>


https://github.com/llvm/llvm-project/pull/86350