[Mlir-commits] [mlir] cdc8d0f - [mlir][affine] Option to unroll cleanup loop if smaller trip count.

Kai Sasaki llvmlistbot at llvm.org
Thu Aug 18 17:53:40 PDT 2022


Author: lewuathe
Date: 2022-08-19T09:35:20+09:00
New Revision: cdc8d0fcd7082448285059ada0126e562b6272a7

URL: https://github.com/llvm/llvm-project/commit/cdc8d0fcd7082448285059ada0126e562b6272a7
DIFF: https://github.com/llvm/llvm-project/commit/cdc8d0fcd7082448285059ada0126e562b6272a7.diff

LOG: [mlir][affine] Option to unroll cleanup loop if smaller trip count.

Add an option (cleanUpUnroll) to unroll cleanup loop even if the trip count is smaller the unroll factor.

Differential Revision: https://reviews.llvm.org/D129171

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/Affine/LoopUtils.h
    mlir/include/mlir/Dialect/Affine/Passes.td
    mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
    mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
    mlir/test/Dialect/Affine/unroll.mlir
    mlir/test/Dialect/SCF/loop-unroll.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/Affine/LoopUtils.h b/mlir/include/mlir/Dialect/Affine/LoopUtils.h
index 09b7717b3913..252933e8976e 100644
--- a/mlir/include/mlir/Dialect/Affine/LoopUtils.h
+++ b/mlir/include/mlir/Dialect/Affine/LoopUtils.h
@@ -45,9 +45,12 @@ LogicalResult loopUnrollFull(AffineForOp forOp);
 /// if the loop cannot be unrolled either due to restrictions or due to invalid
 /// unroll factors. Requires positive loop bounds and step. If specified,
 /// annotates the Ops in each unrolled iteration by applying `annotateFn`.
+/// When `cleanUpUnroll` is true, we can ensure the cleanup loop is unrolled
+/// regardless of the unroll factor.
 LogicalResult loopUnrollByFactor(
     AffineForOp forOp, uint64_t unrollFactor,
-    function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn = nullptr);
+    function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn = nullptr,
+    bool cleanUpUnroll = false);
 
 /// Unrolls this loop by the specified unroll factor or its trip count,
 /// whichever is lower.

diff  --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td
index c1c388b06518..d50c22569d56 100644
--- a/mlir/include/mlir/Dialect/Affine/Passes.td
+++ b/mlir/include/mlir/Dialect/Affine/Passes.td
@@ -212,6 +212,8 @@ def AffineLoopUnroll : Pass<"affine-loop-unroll", "func::FuncOp"> {
     Option<"unrollFullThreshold", "unroll-full-threshold", "unsigned",
            /*default=*/"1",
            "Unroll all loops with trip count less than or equal to this">,
+    Option<"cleanUpUnroll", "cleanup-unroll", "bool", /*default=*/"false",
+           "Fully unroll the cleanup loop when possible.">,
   ];
 }
 

diff  --git a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
index a3e978d04b38..8f7cae507a97 100644
--- a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
@@ -122,14 +122,16 @@ void LoopUnroll::runOnOperation() {
 LogicalResult LoopUnroll::runOnAffineForOp(AffineForOp forOp) {
   // Use the function callback if one was provided.
   if (getUnrollFactor)
-    return loopUnrollByFactor(forOp, getUnrollFactor(forOp));
+    return loopUnrollByFactor(forOp, getUnrollFactor(forOp),
+                              /*annotateFn=*/nullptr, cleanUpUnroll);
   // Unroll completely if full loop unroll was specified.
   if (unrollFull)
     return loopUnrollFull(forOp);
   // Otherwise, unroll by the given unroll factor.
   if (unrollUpToFactor)
     return loopUnrollUpToFactor(forOp, unrollFactor);
-  return loopUnrollByFactor(forOp, unrollFactor);
+  return loopUnrollByFactor(forOp, unrollFactor, /*annotateFn=*/nullptr,
+                            cleanUpUnroll);
 }
 
 std::unique_ptr<OperationPass<func::FuncOp>> mlir::createLoopUnrollPass(

diff  --git a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
index 33fee469fc69..6611ffe146b1 100644
--- a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
+++ b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
@@ -1090,7 +1090,8 @@ static LogicalResult generateCleanupLoopForUnroll(AffineForOp forOp,
 /// is successfully unrolled.
 LogicalResult mlir::loopUnrollByFactor(
     AffineForOp forOp, uint64_t unrollFactor,
-    function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn) {
+    function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn,
+    bool cleanUpUnroll) {
   assert(unrollFactor > 0 && "unroll factor should be positive");
 
   Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
@@ -1106,9 +1107,14 @@ LogicalResult mlir::loopUnrollByFactor(
     return success();
 
   // If the trip count is lower than the unroll factor, no unrolled body.
-  // TODO: option to specify cleanup loop unrolling.
-  if (mayBeConstantTripCount && *mayBeConstantTripCount < unrollFactor)
+  if (mayBeConstantTripCount && *mayBeConstantTripCount < unrollFactor) {
+    if (cleanUpUnroll) {
+      // Unroll the cleanup loop if cleanUpUnroll is specified.
+      return loopUnrollFull(forOp);
+    }
+
     return failure();
+  }
 
   // Generate the cleanup loop if trip count isn't a multiple of unrollFactor.
   if (getLargestDivisorOfTripCount(forOp) % unrollFactor != 0) {
@@ -1119,6 +1125,9 @@ LogicalResult mlir::loopUnrollByFactor(
     if (forOp.getLowerBoundMap().getNumResults() != 1 ||
         forOp.getUpperBoundMap().getNumResults() != 1)
       return failure();
+    if (cleanUpUnroll)
+      // Force unroll including cleanup loop
+      return loopUnrollFull(forOp);
     if (failed(generateCleanupLoopForUnroll(forOp, unrollFactor)))
       assert(false && "cleanup loop lower bound map for single result lower "
                       "and upper bound maps can always be determined");

diff  --git a/mlir/test/Dialect/Affine/unroll.mlir b/mlir/test/Dialect/Affine/unroll.mlir
index 022d9021af0f..2d324740feb4 100644
--- a/mlir/test/Dialect/Affine/unroll.mlir
+++ b/mlir/test/Dialect/Affine/unroll.mlir
@@ -2,6 +2,7 @@
 // RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-full unroll-full-threshold=2" | FileCheck %s --check-prefix SHORT
 // RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-factor=4" | FileCheck %s --check-prefix UNROLL-BY-4
 // RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-factor=1" | FileCheck %s --check-prefix UNROLL-BY-1
+// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-factor=5 cleanup-unroll=true" | FileCheck %s --check-prefix UNROLL-CLEANUP-LOOP
 
 // UNROLL-FULL-DAG: [[$MAP0:#map[0-9]+]] = affine_map<(d0) -> (d0 + 1)>
 // UNROLL-FULL-DAG: [[$MAP1:#map[0-9]+]] = affine_map<(d0) -> (d0 + 2)>
@@ -689,3 +690,60 @@ func.func @unroll_zero_trip_count_case() {
   }
   return
 }
+
+// UNROLL-CLEANUP-LOOP-LABEL: func @unroll_cleanup_loop_with_larger_unroll_factor()
+func.func @unroll_cleanup_loop_with_larger_unroll_factor() {
+  affine.for %i = 0 to 3 {
+    %x = "foo"(%i) : (index) -> i32
+  }
+  return
+// UNROLL-CLEANUP-LOOP-NEXT: %[[C0:.*]] = arith.constant 0 : index
+// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[C0]]) : (index) -> i32
+// UNROLL-CLEANUP-LOOP-NEXT: %[[V1:.*]] = affine.apply {{.*}}
+// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V1]]) : (index) -> i32
+// UNROLL-CLEANUP-LOOP-NEXT: %[[V2:.*]] = affine.apply {{.*}}
+// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V2]]) : (index) -> i32
+// UNROLL-CLEANUP-LOOP-NEXT: return
+}
+
+// UNROLL-CLEANUP-LOOP-LABEL: func @unroll_cleanup_loop_with_smaller_unroll_factor()
+func.func @unroll_cleanup_loop_with_smaller_unroll_factor() {
+  affine.for %i = 0 to 7 {
+    %x = "foo"(%i) : (index) -> i32
+  }
+  return
+// UNROLL-CLEANUP-LOOP-NEXT: %[[C0:.*]] = arith.constant 0 : index
+// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[C0]]) : (index) -> i32
+// UNROLL-CLEANUP-LOOP-NEXT: %[[V1:.*]] = affine.apply {{.*}}
+// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V1]]) : (index) -> i32
+// UNROLL-CLEANUP-LOOP-NEXT: %[[V2:.*]] = affine.apply {{.*}}
+// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V2]]) : (index) -> i32
+// UNROLL-CLEANUP-LOOP-NEXT: %[[V3:.*]] = affine.apply {{.*}}
+// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V3]]) : (index) -> i32
+// UNROLL-CLEANUP-LOOP-NEXT: %[[V4:.*]] = affine.apply {{.*}}
+// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V4]]) : (index) -> i32
+// UNROLL-CLEANUP-LOOP-NEXT: %[[V5:.*]] = affine.apply {{.*}}
+// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V5]]) : (index) -> i32
+// UNROLL-CLEANUP-LOOP-NEXT: %[[V6:.*]] = affine.apply {{.*}}
+// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V6]]) : (index) -> i32
+// UNROLL-CLEANUP-LOOP-NEXT: return
+}
+
+// UNROLL-CLEANUP-LOOP-LABEL: func @unroll_cleanup_loop_with_identical_unroll_factor()
+func.func @unroll_cleanup_loop_with_identical_unroll_factor() {
+  affine.for %i = 0 to 5 {
+    %x = "foo"(%i) : (index) -> i32
+  }
+  return
+// UNROLL-CLEANUP-LOOP-NEXT: %[[C0:.*]] = arith.constant 0 : index
+// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[C0]]) : (index) -> i32
+// UNROLL-CLEANUP-LOOP-NEXT: %[[V1:.*]] = affine.apply {{.*}}
+// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V1]]) : (index) -> i32
+// UNROLL-CLEANUP-LOOP-NEXT: %[[V2:.*]] = affine.apply {{.*}}
+// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V2]]) : (index) -> i32
+// UNROLL-CLEANUP-LOOP-NEXT: %[[V3:.*]] = affine.apply {{.*}}
+// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V3]]) : (index) -> i32
+// UNROLL-CLEANUP-LOOP-NEXT: %[[V4:.*]] = affine.apply {{.*}}
+// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V4]]) : (index) -> i32
+// UNROLL-CLEANUP-LOOP-NEXT: return
+}
\ No newline at end of file

diff  --git a/mlir/test/Dialect/SCF/loop-unroll.mlir b/mlir/test/Dialect/SCF/loop-unroll.mlir
index 7c029cb29df7..652e751d9104 100644
--- a/mlir/test/Dialect/SCF/loop-unroll.mlir
+++ b/mlir/test/Dialect/SCF/loop-unroll.mlir
@@ -4,6 +4,7 @@
 // RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2 loop-depth=1' | FileCheck %s --check-prefix UNROLL-INNER-BY-2
 // RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2 annotate=true' | FileCheck %s --check-prefix UNROLL-BY-2-ANNOTATE
 // RUN: mlir-opt %s --affine-loop-unroll='unroll-factor=6 unroll-up-to-factor=true' | FileCheck %s --check-prefix UNROLL-UP-TO
+// RUN: mlir-opt %s --affine-loop-unroll='unroll-factor=5 cleanup-unroll=true' | FileCheck %s --check-prefix CLEANUP-UNROLL-BY-5
 
 func.func @dynamic_loop_unroll(%arg0 : index, %arg1 : index, %arg2 : index,
                           %arg3: memref<?xf32>) {
@@ -314,3 +315,28 @@ func.func @static_loop_unroll_by_3_rename_epilogue_arguments() -> (f32, f32) {
 //  UNROLL-BY-3-NEXT:     scf.yield %[[EADD]], %[[EMUL]] : f32, f32
 //  UNROLL-BY-3-NEXT:   }
 //  UNROLL-BY-3-NEXT:   return %[[EFOR]]#0, %[[EFOR]]#1 : f32, f32
+
+// Test that epilogue clean up loop is generated (trip count is less
+// than an unroll factor).
+func.func @static_loop_unroll_by_5_with_cleanup(%arg0 : memref<?xf32>) {
+  %0 = arith.constant 7.0 : f32
+  %lb = arith.constant 0 : index
+  %ub = arith.constant 3 : index
+  affine.for %i0 = %lb to %ub {
+    memref.store %0, %arg0[%i0] : memref<?xf32>
+  }
+  return
+}
+
+// CLEANUP-UNROLL-BY-5-LABEL: func @static_loop_unroll_by_5_with_cleanup
+//  CLEANUP-UNROLL-BY-5-SAME:  %[[MEM:.*0]]: memref<?xf32>
+//
+//   CLEANUP-UNROLL-BY-5-DAG:  %[[C0:.*]] = arith.constant 0 : index
+//   CLEANUP-UNROLL-BY-5-DAG:  %[[C3:.*]] = arith.constant 3 : index
+//   CLEANUP-UNROLL-BY-5-NEXT: %[[V0:.*]] = affine.apply {{.*}}
+//   CLEANUP-UNROLL-BY-5-NEXT: memref.store %{{.*}}, %[[MEM]][%[[V0]]] : memref<?xf32>
+//   CLEANUP-UNROLL-BY-5-NEXT: %[[V1:.*]] = affine.apply {{.*}}
+//   CLEANUP-UNROLL-BY-5-NEXT: memref.store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32>
+//   CLEANUP-UNROLL-BY-5-NEXT: %[[V2:.*]] = affine.apply {{.*}}
+//   CLEANUP-UNROLL-BY-5-NEXT: memref.store %{{.*}}, %[[MEM]][%[[V2]]] : memref<?xf32>
+//   CLEANUP-UNROLL-BY-5-NEXT: return
\ No newline at end of file


        


More information about the Mlir-commits mailing list