[Mlir-commits] [mlir] cdc8d0f - [mlir][affine] Option to unroll cleanup loop if smaller trip count.
Kai Sasaki
llvmlistbot at llvm.org
Thu Aug 18 17:53:40 PDT 2022
Author: lewuathe
Date: 2022-08-19T09:35:20+09:00
New Revision: cdc8d0fcd7082448285059ada0126e562b6272a7
URL: https://github.com/llvm/llvm-project/commit/cdc8d0fcd7082448285059ada0126e562b6272a7
DIFF: https://github.com/llvm/llvm-project/commit/cdc8d0fcd7082448285059ada0126e562b6272a7.diff
LOG: [mlir][affine] Option to unroll cleanup loop if smaller trip count.
Add an option (cleanUpUnroll) to unroll cleanup loop even if the trip count is smaller the unroll factor.
Differential Revision: https://reviews.llvm.org/D129171
Added:
Modified:
mlir/include/mlir/Dialect/Affine/LoopUtils.h
mlir/include/mlir/Dialect/Affine/Passes.td
mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
mlir/test/Dialect/Affine/unroll.mlir
mlir/test/Dialect/SCF/loop-unroll.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/Affine/LoopUtils.h b/mlir/include/mlir/Dialect/Affine/LoopUtils.h
index 09b7717b3913..252933e8976e 100644
--- a/mlir/include/mlir/Dialect/Affine/LoopUtils.h
+++ b/mlir/include/mlir/Dialect/Affine/LoopUtils.h
@@ -45,9 +45,12 @@ LogicalResult loopUnrollFull(AffineForOp forOp);
/// if the loop cannot be unrolled either due to restrictions or due to invalid
/// unroll factors. Requires positive loop bounds and step. If specified,
/// annotates the Ops in each unrolled iteration by applying `annotateFn`.
+/// When `cleanUpUnroll` is true, we can ensure the cleanup loop is unrolled
+/// regardless of the unroll factor.
LogicalResult loopUnrollByFactor(
AffineForOp forOp, uint64_t unrollFactor,
- function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn = nullptr);
+ function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn = nullptr,
+ bool cleanUpUnroll = false);
/// Unrolls this loop by the specified unroll factor or its trip count,
/// whichever is lower.
diff --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td
index c1c388b06518..d50c22569d56 100644
--- a/mlir/include/mlir/Dialect/Affine/Passes.td
+++ b/mlir/include/mlir/Dialect/Affine/Passes.td
@@ -212,6 +212,8 @@ def AffineLoopUnroll : Pass<"affine-loop-unroll", "func::FuncOp"> {
Option<"unrollFullThreshold", "unroll-full-threshold", "unsigned",
/*default=*/"1",
"Unroll all loops with trip count less than or equal to this">,
+ Option<"cleanUpUnroll", "cleanup-unroll", "bool", /*default=*/"false",
+ "Fully unroll the cleanup loop when possible.">,
];
}
diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
index a3e978d04b38..8f7cae507a97 100644
--- a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
@@ -122,14 +122,16 @@ void LoopUnroll::runOnOperation() {
LogicalResult LoopUnroll::runOnAffineForOp(AffineForOp forOp) {
// Use the function callback if one was provided.
if (getUnrollFactor)
- return loopUnrollByFactor(forOp, getUnrollFactor(forOp));
+ return loopUnrollByFactor(forOp, getUnrollFactor(forOp),
+ /*annotateFn=*/nullptr, cleanUpUnroll);
// Unroll completely if full loop unroll was specified.
if (unrollFull)
return loopUnrollFull(forOp);
// Otherwise, unroll by the given unroll factor.
if (unrollUpToFactor)
return loopUnrollUpToFactor(forOp, unrollFactor);
- return loopUnrollByFactor(forOp, unrollFactor);
+ return loopUnrollByFactor(forOp, unrollFactor, /*annotateFn=*/nullptr,
+ cleanUpUnroll);
}
std::unique_ptr<OperationPass<func::FuncOp>> mlir::createLoopUnrollPass(
diff --git a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
index 33fee469fc69..6611ffe146b1 100644
--- a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
+++ b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
@@ -1090,7 +1090,8 @@ static LogicalResult generateCleanupLoopForUnroll(AffineForOp forOp,
/// is successfully unrolled.
LogicalResult mlir::loopUnrollByFactor(
AffineForOp forOp, uint64_t unrollFactor,
- function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn) {
+ function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn,
+ bool cleanUpUnroll) {
assert(unrollFactor > 0 && "unroll factor should be positive");
Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
@@ -1106,9 +1107,14 @@ LogicalResult mlir::loopUnrollByFactor(
return success();
// If the trip count is lower than the unroll factor, no unrolled body.
- // TODO: option to specify cleanup loop unrolling.
- if (mayBeConstantTripCount && *mayBeConstantTripCount < unrollFactor)
+ if (mayBeConstantTripCount && *mayBeConstantTripCount < unrollFactor) {
+ if (cleanUpUnroll) {
+ // Unroll the cleanup loop if cleanUpUnroll is specified.
+ return loopUnrollFull(forOp);
+ }
+
return failure();
+ }
// Generate the cleanup loop if trip count isn't a multiple of unrollFactor.
if (getLargestDivisorOfTripCount(forOp) % unrollFactor != 0) {
@@ -1119,6 +1125,9 @@ LogicalResult mlir::loopUnrollByFactor(
if (forOp.getLowerBoundMap().getNumResults() != 1 ||
forOp.getUpperBoundMap().getNumResults() != 1)
return failure();
+ if (cleanUpUnroll)
+ // Force unroll including cleanup loop
+ return loopUnrollFull(forOp);
if (failed(generateCleanupLoopForUnroll(forOp, unrollFactor)))
assert(false && "cleanup loop lower bound map for single result lower "
"and upper bound maps can always be determined");
diff --git a/mlir/test/Dialect/Affine/unroll.mlir b/mlir/test/Dialect/Affine/unroll.mlir
index 022d9021af0f..2d324740feb4 100644
--- a/mlir/test/Dialect/Affine/unroll.mlir
+++ b/mlir/test/Dialect/Affine/unroll.mlir
@@ -2,6 +2,7 @@
// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-full unroll-full-threshold=2" | FileCheck %s --check-prefix SHORT
// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-factor=4" | FileCheck %s --check-prefix UNROLL-BY-4
// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-factor=1" | FileCheck %s --check-prefix UNROLL-BY-1
+// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-factor=5 cleanup-unroll=true" | FileCheck %s --check-prefix UNROLL-CLEANUP-LOOP
// UNROLL-FULL-DAG: [[$MAP0:#map[0-9]+]] = affine_map<(d0) -> (d0 + 1)>
// UNROLL-FULL-DAG: [[$MAP1:#map[0-9]+]] = affine_map<(d0) -> (d0 + 2)>
@@ -689,3 +690,60 @@ func.func @unroll_zero_trip_count_case() {
}
return
}
+
+// UNROLL-CLEANUP-LOOP-LABEL: func @unroll_cleanup_loop_with_larger_unroll_factor()
+func.func @unroll_cleanup_loop_with_larger_unroll_factor() {
+ affine.for %i = 0 to 3 {
+ %x = "foo"(%i) : (index) -> i32
+ }
+ return
+// UNROLL-CLEANUP-LOOP-NEXT: %[[C0:.*]] = arith.constant 0 : index
+// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[C0]]) : (index) -> i32
+// UNROLL-CLEANUP-LOOP-NEXT: %[[V1:.*]] = affine.apply {{.*}}
+// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V1]]) : (index) -> i32
+// UNROLL-CLEANUP-LOOP-NEXT: %[[V2:.*]] = affine.apply {{.*}}
+// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V2]]) : (index) -> i32
+// UNROLL-CLEANUP-LOOP-NEXT: return
+}
+
+// UNROLL-CLEANUP-LOOP-LABEL: func @unroll_cleanup_loop_with_smaller_unroll_factor()
+func.func @unroll_cleanup_loop_with_smaller_unroll_factor() {
+ affine.for %i = 0 to 7 {
+ %x = "foo"(%i) : (index) -> i32
+ }
+ return
+// UNROLL-CLEANUP-LOOP-NEXT: %[[C0:.*]] = arith.constant 0 : index
+// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[C0]]) : (index) -> i32
+// UNROLL-CLEANUP-LOOP-NEXT: %[[V1:.*]] = affine.apply {{.*}}
+// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V1]]) : (index) -> i32
+// UNROLL-CLEANUP-LOOP-NEXT: %[[V2:.*]] = affine.apply {{.*}}
+// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V2]]) : (index) -> i32
+// UNROLL-CLEANUP-LOOP-NEXT: %[[V3:.*]] = affine.apply {{.*}}
+// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V3]]) : (index) -> i32
+// UNROLL-CLEANUP-LOOP-NEXT: %[[V4:.*]] = affine.apply {{.*}}
+// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V4]]) : (index) -> i32
+// UNROLL-CLEANUP-LOOP-NEXT: %[[V5:.*]] = affine.apply {{.*}}
+// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V5]]) : (index) -> i32
+// UNROLL-CLEANUP-LOOP-NEXT: %[[V6:.*]] = affine.apply {{.*}}
+// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V6]]) : (index) -> i32
+// UNROLL-CLEANUP-LOOP-NEXT: return
+}
+
+// UNROLL-CLEANUP-LOOP-LABEL: func @unroll_cleanup_loop_with_identical_unroll_factor()
+func.func @unroll_cleanup_loop_with_identical_unroll_factor() {
+ affine.for %i = 0 to 5 {
+ %x = "foo"(%i) : (index) -> i32
+ }
+ return
+// UNROLL-CLEANUP-LOOP-NEXT: %[[C0:.*]] = arith.constant 0 : index
+// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[C0]]) : (index) -> i32
+// UNROLL-CLEANUP-LOOP-NEXT: %[[V1:.*]] = affine.apply {{.*}}
+// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V1]]) : (index) -> i32
+// UNROLL-CLEANUP-LOOP-NEXT: %[[V2:.*]] = affine.apply {{.*}}
+// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V2]]) : (index) -> i32
+// UNROLL-CLEANUP-LOOP-NEXT: %[[V3:.*]] = affine.apply {{.*}}
+// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V3]]) : (index) -> i32
+// UNROLL-CLEANUP-LOOP-NEXT: %[[V4:.*]] = affine.apply {{.*}}
+// UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V4]]) : (index) -> i32
+// UNROLL-CLEANUP-LOOP-NEXT: return
+}
\ No newline at end of file
diff --git a/mlir/test/Dialect/SCF/loop-unroll.mlir b/mlir/test/Dialect/SCF/loop-unroll.mlir
index 7c029cb29df7..652e751d9104 100644
--- a/mlir/test/Dialect/SCF/loop-unroll.mlir
+++ b/mlir/test/Dialect/SCF/loop-unroll.mlir
@@ -4,6 +4,7 @@
// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2 loop-depth=1' | FileCheck %s --check-prefix UNROLL-INNER-BY-2
// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2 annotate=true' | FileCheck %s --check-prefix UNROLL-BY-2-ANNOTATE
// RUN: mlir-opt %s --affine-loop-unroll='unroll-factor=6 unroll-up-to-factor=true' | FileCheck %s --check-prefix UNROLL-UP-TO
+// RUN: mlir-opt %s --affine-loop-unroll='unroll-factor=5 cleanup-unroll=true' | FileCheck %s --check-prefix CLEANUP-UNROLL-BY-5
func.func @dynamic_loop_unroll(%arg0 : index, %arg1 : index, %arg2 : index,
%arg3: memref<?xf32>) {
@@ -314,3 +315,28 @@ func.func @static_loop_unroll_by_3_rename_epilogue_arguments() -> (f32, f32) {
// UNROLL-BY-3-NEXT: scf.yield %[[EADD]], %[[EMUL]] : f32, f32
// UNROLL-BY-3-NEXT: }
// UNROLL-BY-3-NEXT: return %[[EFOR]]#0, %[[EFOR]]#1 : f32, f32
+
+// Test that epilogue clean up loop is generated (trip count is less
+// than an unroll factor).
+func.func @static_loop_unroll_by_5_with_cleanup(%arg0 : memref<?xf32>) {
+ %0 = arith.constant 7.0 : f32
+ %lb = arith.constant 0 : index
+ %ub = arith.constant 3 : index
+ affine.for %i0 = %lb to %ub {
+ memref.store %0, %arg0[%i0] : memref<?xf32>
+ }
+ return
+}
+
+// CLEANUP-UNROLL-BY-5-LABEL: func @static_loop_unroll_by_5_with_cleanup
+// CLEANUP-UNROLL-BY-5-SAME: %[[MEM:.*0]]: memref<?xf32>
+//
+// CLEANUP-UNROLL-BY-5-DAG: %[[C0:.*]] = arith.constant 0 : index
+// CLEANUP-UNROLL-BY-5-DAG: %[[C3:.*]] = arith.constant 3 : index
+// CLEANUP-UNROLL-BY-5-NEXT: %[[V0:.*]] = affine.apply {{.*}}
+// CLEANUP-UNROLL-BY-5-NEXT: memref.store %{{.*}}, %[[MEM]][%[[V0]]] : memref<?xf32>
+// CLEANUP-UNROLL-BY-5-NEXT: %[[V1:.*]] = affine.apply {{.*}}
+// CLEANUP-UNROLL-BY-5-NEXT: memref.store %{{.*}}, %[[MEM]][%[[V1]]] : memref<?xf32>
+// CLEANUP-UNROLL-BY-5-NEXT: %[[V2:.*]] = affine.apply {{.*}}
+// CLEANUP-UNROLL-BY-5-NEXT: memref.store %{{.*}}, %[[MEM]][%[[V2]]] : memref<?xf32>
+// CLEANUP-UNROLL-BY-5-NEXT: return
\ No newline at end of file
More information about the Mlir-commits
mailing list