[Mlir-commits] [mlir] [mlir][affine]make affine-loop-unroll a FunctionOpInterface pass. (PR #126475)
lonely eagle
llvmlistbot at llvm.org
Wed Feb 12 19:27:21 PST 2025
https://github.com/linuxlonelyeagle updated https://github.com/llvm/llvm-project/pull/126475
>From 824b6506a7082087feb0c28c7ab6e179ccb3ae8a Mon Sep 17 00:00:00 2001
From: linuxlonelyeagle <2020382038 at qq.com>
Date: Mon, 10 Feb 2025 15:05:36 +0800
Subject: [PATCH 1/3] make affine-loop-unroll a FunctionOpInterface pass.
---
mlir/include/mlir/Dialect/Affine/Passes.h | 3 +-
mlir/include/mlir/Dialect/Affine/Passes.td | 2 +-
.../Dialect/Affine/Transforms/LoopUnroll.cpp | 75 +++++++------
mlir/test/Dialect/Affine/unroll.mlir | 100 ++++++++++++++++++
4 files changed, 143 insertions(+), 37 deletions(-)
diff --git a/mlir/include/mlir/Dialect/Affine/Passes.h b/mlir/include/mlir/Dialect/Affine/Passes.h
index bc29d04287ac4..37147b079e5d9 100644
--- a/mlir/include/mlir/Dialect/Affine/Passes.h
+++ b/mlir/include/mlir/Dialect/Affine/Passes.h
@@ -19,6 +19,7 @@
namespace mlir {
+class ModuleOp;
namespace func {
class FuncOp;
} // namespace func
@@ -93,7 +94,7 @@ std::unique_ptr<OperationPass<func::FuncOp>> createLoopTilingPass();
/// factors supplied through other means. If -1 is passed as the unrollFactor
/// and no callback is provided, anything passed from the command-line (if at
/// all) or the default unroll factor is used (LoopUnroll:kDefaultUnrollFactor).
-std::unique_ptr<OperationPass<func::FuncOp>> createLoopUnrollPass(
+std::unique_ptr<OperationPass<mlir::ModuleOp>> createLoopUnrollPass(
int unrollFactor = -1, bool unrollUpToFactor = false,
bool unrollFull = false,
const std::function<unsigned(AffineForOp)> &getUnrollFactor = nullptr);
diff --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td
index d7c7897c65730..d96b50c3e8104 100644
--- a/mlir/include/mlir/Dialect/Affine/Passes.td
+++ b/mlir/include/mlir/Dialect/Affine/Passes.td
@@ -199,7 +199,7 @@ def AffineLoopTiling : Pass<"affine-loop-tile", "func::FuncOp"> {
];
}
-def AffineLoopUnroll : Pass<"affine-loop-unroll", "func::FuncOp"> {
+def AffineLoopUnroll : Pass<"affine-loop-unroll", "ModuleOp"> {
let summary = "Unroll affine loops";
let constructor = "mlir::affine::createLoopUnrollPass()";
let options = [
diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
index 57df7ada91654..4dc9809574115 100644
--- a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
@@ -19,6 +19,7 @@
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/AffineMap.h"
#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinOps.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -82,7 +83,7 @@ static bool isInnermostAffineForOp(AffineForOp op) {
}
/// Gathers loops that have no affine.for's nested within.
-static void gatherInnermostLoops(func::FuncOp f,
+static void gatherInnermostLoops(FunctionOpInterface f,
SmallVectorImpl<AffineForOp> &loops) {
f.walk([&](AffineForOp forOp) {
if (isInnermostAffineForOp(forOp))
@@ -91,40 +92,44 @@ static void gatherInnermostLoops(func::FuncOp f,
}
void LoopUnroll::runOnOperation() {
- func::FuncOp func = getOperation();
- if (func.isExternal())
- return;
-
- if (unrollFull && unrollFullThreshold.hasValue()) {
- // Store short loops as we walk.
+ mlir::ModuleOp module = getOperation();
+ SmallVector<FunctionOpInterface> funcOps;
+ module.walk([&](FunctionOpInterface func) { funcOps.push_back(func); });
+ for (auto func : funcOps) {
+ if (func.isExternal())
+ return;
+
+ if (unrollFull && unrollFullThreshold.hasValue()) {
+ // Store short loops as we walk.
+ SmallVector<AffineForOp, 4> loops;
+
+ // Gathers all loops with trip count <= minTripCount. Do a post order walk
+ // so that loops are gathered from innermost to outermost (or else
+ // unrolling an outer one may delete gathered inner ones).
+ getOperation().walk([&](AffineForOp forOp) {
+ std::optional<uint64_t> tripCount = getConstantTripCount(forOp);
+ if (tripCount && *tripCount <= unrollFullThreshold)
+ loops.push_back(forOp);
+ });
+ for (auto forOp : loops)
+ (void)loopUnrollFull(forOp);
+ return;
+ }
+
+ // If the call back is provided, we will recurse until no loops are found.
SmallVector<AffineForOp, 4> loops;
-
- // Gathers all loops with trip count <= minTripCount. Do a post order walk
- // so that loops are gathered from innermost to outermost (or else unrolling
- // an outer one may delete gathered inner ones).
- getOperation().walk([&](AffineForOp forOp) {
- std::optional<uint64_t> tripCount = getConstantTripCount(forOp);
- if (tripCount && *tripCount <= unrollFullThreshold)
- loops.push_back(forOp);
- });
- for (auto forOp : loops)
- (void)loopUnrollFull(forOp);
- return;
- }
-
- // If the call back is provided, we will recurse until no loops are found.
- SmallVector<AffineForOp, 4> loops;
- for (unsigned i = 0; i < numRepetitions || getUnrollFactor; i++) {
- loops.clear();
- gatherInnermostLoops(func, loops);
- if (loops.empty())
- break;
- bool unrolled = false;
- for (auto forOp : loops)
- unrolled |= succeeded(runOnAffineForOp(forOp));
- if (!unrolled)
- // Break out if nothing was unrolled.
- break;
+ for (unsigned i = 0; i < numRepetitions || getUnrollFactor; i++) {
+ loops.clear();
+ gatherInnermostLoops(func, loops);
+ if (loops.empty())
+ break;
+ bool unrolled = false;
+ for (auto forOp : loops)
+ unrolled |= succeeded(runOnAffineForOp(forOp));
+ if (!unrolled)
+ // Break out if nothing was unrolled.
+ break;
+ }
}
}
@@ -145,7 +150,7 @@ LogicalResult LoopUnroll::runOnAffineForOp(AffineForOp forOp) {
cleanUpUnroll);
}
-std::unique_ptr<OperationPass<func::FuncOp>> mlir::affine::createLoopUnrollPass(
+std::unique_ptr<OperationPass<ModuleOp>> mlir::affine::createLoopUnrollPass(
int unrollFactor, bool unrollUpToFactor, bool unrollFull,
const std::function<unsigned(AffineForOp)> &getUnrollFactor) {
return std::make_unique<LoopUnroll>(
diff --git a/mlir/test/Dialect/Affine/unroll.mlir b/mlir/test/Dialect/Affine/unroll.mlir
index e398c3fe2011d..43485ca56deeb 100644
--- a/mlir/test/Dialect/Affine/unroll.mlir
+++ b/mlir/test/Dialect/Affine/unroll.mlir
@@ -240,6 +240,23 @@ func.func @loop_nest_unroll_full() {
return
} // UNROLL-FULL }
+gpu.module @unroll_full {
+ // UNROLL-FULL-LABEL: func @gpu_loop_nest_simplest() {
+ gpu.func @gpu_loop_nest_simplest() {
+ // UNROLL-FULL: affine.for %arg0 = 0 to 100 step 2 {
+ affine.for %i = 0 to 100 step 2 {
+ // UNROLL-FULL: %c1_i32 = arith.constant 1 : i32
+ // UNROLL-FULL-NEXT: %c1_i32_0 = arith.constant 1 : i32
+ // UNROLL-FULL-NEXT: %c1_i32_1 = arith.constant 1 : i32
+ // UNROLL-FULL-NEXT: %c1_i32_2 = arith.constant 1 : i32
+ affine.for %j = 0 to 4 {
+ %x = arith.constant 1 : i32
+ }
+ } // UNROLL-FULL: }
+ gpu.return // UNROLL-FULL: return
+ }
+}
+
// SHORT-LABEL: func @loop_nest_outer_unroll() {
func.func @loop_nest_outer_unroll() {
// SHORT: affine.for %arg0 = 0 to 4 {
@@ -260,6 +277,28 @@ func.func @loop_nest_outer_unroll() {
return // SHORT: return
} // SHORT }
+gpu.module @short {
+ // SHORT-LABEL: func @gpu_loop_nest_outer_unroll() {
+ gpu.func @gpu_loop_nest_outer_unroll() {
+ // SHORT: affine.for %arg0 = 0 to 4 {
+ // SHORT-NEXT: %0 = affine.apply [[$MAP0]](%arg0)
+ // SHORT-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
+ // SHORT-NEXT: }
+ // SHORT-NEXT: affine.for %arg0 = 0 to 4 {
+ // SHORT-NEXT: %0 = affine.apply [[$MAP0]](%arg0)
+ // SHORT-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
+ // SHORT-NEXT: }
+ affine.for %i = 0 to 2 {
+ affine.for %j = 0 to 4 {
+ %x = "affine.apply" (%j) { map = affine_map<(d0) -> (d0 + 1)> } :
+ (index) -> (index)
+ %y = "addi32"(%x, %x) : (index, index) -> index
+ }
+ }
+ gpu.return // SHORT: gpu.return
+ } // SHORT }
+}
+
// We are doing a minimal FileCheck here. We just need this test case to
// successfully run. Both %x and %y will get unrolled here as the min trip
// count threshold set to 2.
@@ -345,6 +384,37 @@ func.func @unroll_unit_stride_no_cleanup() {
return
}
+gpu.module @unroll_by_4{
+ // UNROLL-BY-4-LABEL: func @gpu_unroll_unit_stride_no_cleanup() {
+ gpu.func @gpu_unroll_unit_stride_no_cleanup() {
+ // UNROLL-BY-4: affine.for %arg0 = 0 to 100 {
+ affine.for %i = 0 to 100 {
+ // UNROLL-BY-4: for [[L1:%arg[0-9]+]] = 0 to 8 step 4 {
+ // UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
+ // UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
+ // UNROLL-BY-4-NEXT: %2 = affine.apply #map{{[0-9]*}}([[L1]])
+ // UNROLL-BY-4-NEXT: %3 = "addi32"(%2, %2) : (index, index) -> i32
+ // UNROLL-BY-4-NEXT: %4 = "addi32"(%3, %3) : (i32, i32) -> i32
+ // UNROLL-BY-4-NEXT: %5 = affine.apply #map{{[0-9]*}}([[L1]])
+ // UNROLL-BY-4-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32
+ // UNROLL-BY-4-NEXT: %7 = "addi32"(%6, %6) : (i32, i32) -> i32
+ // UNROLL-BY-4-NEXT: %8 = affine.apply #map{{[0-9]*}}([[L1]])
+ // UNROLL-BY-4-NEXT: %9 = "addi32"(%8, %8) : (index, index) -> i32
+ // UNROLL-BY-4-NEXT: %10 = "addi32"(%9, %9) : (i32, i32) -> i32
+ // UNROLL-BY-4-NEXT: }
+ affine.for %j = 0 to 8 {
+ %x = "addi32"(%j, %j) : (index, index) -> i32
+ %y = "addi32"(%x, %x) : (i32, i32) -> i32
+ }
+ // empty loop
+ // UNROLL-BY-4: affine.for %arg1 = 0 to 8 {
+ affine.for %k = 0 to 8 {
+ }
+ }
+ gpu.return
+ }
+}
+
// UNROLL-BY-4-LABEL: func @unroll_unit_stride_cleanup() {
func.func @unroll_unit_stride_cleanup() {
// UNROLL-BY-4: affine.for %arg0 = 0 to 100 {
@@ -632,6 +702,19 @@ func.func @unroll_by_one_should_promote_single_iteration_loop() {
// UNROLL-BY-1-NEXT: return
}
+gpu.module @unroll_by_1 {
+ // UNROLL-BY-1-LABEL: func @gpu_unroll_by_one_should_promote_single_iteration_loop()
+ gpu.func @gpu_unroll_by_one_should_promote_single_iteration_loop() {
+ affine.for %i = 0 to 1 {
+ %x = "foo"(%i) : (index) -> i32
+ }
+ gpu.return
+ // UNROLL-BY-1-NEXT: %c0 = arith.constant 0 : index
+ // UNROLL-BY-1-NEXT: %0 = "foo"(%c0) : (index) -> i32
+ // UNROLL-BY-1-NEXT: gpu.return
+ }
+}
+
// Test unrolling with affine.for iter_args.
// UNROLL-BY-4-LABEL: loop_unroll_with_iter_args_and_cleanup
@@ -706,6 +789,23 @@ func.func @unroll_cleanup_loop_with_larger_unroll_factor() {
// UNROLL-CLEANUP-LOOP-NEXT: return
}
+gpu.module @unroll_cleanup_loop {
+ // UNROLL-CLEANUP-LOOP-LABEL: func @gpu_unroll_cleanup_loop_with_larger_unroll_factor()
+ gpu.func @gpu_unroll_cleanup_loop_with_larger_unroll_factor() {
+ affine.for %i = 0 to 3 {
+ %x = "foo"(%i) : (index) -> i32
+ }
+ gpu.return
+ // UNROLL-CLEANUP-LOOP-NEXT: %[[C0:.*]] = arith.constant 0 : index
+ // UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[C0]]) : (index) -> i32
+ // UNROLL-CLEANUP-LOOP-NEXT: %[[V1:.*]] = affine.apply {{.*}}
+ // UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V1]]) : (index) -> i32
+ // UNROLL-CLEANUP-LOOP-NEXT: %[[V2:.*]] = affine.apply {{.*}}
+ // UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V2]]) : (index) -> i32
+ // UNROLL-CLEANUP-LOOP-NEXT: gpu.return
+ }
+}
+
// UNROLL-CLEANUP-LOOP-LABEL: func @unroll_cleanup_loop_with_smaller_unroll_factor()
func.func @unroll_cleanup_loop_with_smaller_unroll_factor() {
affine.for %i = 0 to 7 {
>From 086146476f07dc285f8e5139faffb1c914f6c8db Mon Sep 17 00:00:00 2001
From: linuxlonelyeagle <2020382038 at qq.com>
Date: Tue, 11 Feb 2025 10:53:03 +0800
Subject: [PATCH 2/3] use InterfacePass implement it.
---
mlir/include/mlir/Dialect/Affine/Passes.h | 3 +-
mlir/include/mlir/Dialect/Affine/Passes.td | 2 +-
.../Dialect/Affine/Transforms/LoopUnroll.cpp | 73 +++++++++----------
mlir/test/Dialect/Affine/unroll.mlir | 10 +--
mlir/test/Dialect/SCF/loop-unroll.mlir | 6 +-
5 files changed, 44 insertions(+), 50 deletions(-)
diff --git a/mlir/include/mlir/Dialect/Affine/Passes.h b/mlir/include/mlir/Dialect/Affine/Passes.h
index 37147b079e5d9..098cf386a3860 100644
--- a/mlir/include/mlir/Dialect/Affine/Passes.h
+++ b/mlir/include/mlir/Dialect/Affine/Passes.h
@@ -19,7 +19,6 @@
namespace mlir {
-class ModuleOp;
namespace func {
class FuncOp;
} // namespace func
@@ -94,7 +93,7 @@ std::unique_ptr<OperationPass<func::FuncOp>> createLoopTilingPass();
/// factors supplied through other means. If -1 is passed as the unrollFactor
/// and no callback is provided, anything passed from the command-line (if at
/// all) or the default unroll factor is used (LoopUnroll:kDefaultUnrollFactor).
-std::unique_ptr<OperationPass<mlir::ModuleOp>> createLoopUnrollPass(
+std::unique_ptr<Pass> createLoopUnrollPass(
int unrollFactor = -1, bool unrollUpToFactor = false,
bool unrollFull = false,
const std::function<unsigned(AffineForOp)> &getUnrollFactor = nullptr);
diff --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td
index d96b50c3e8104..5325d3b0a1d69 100644
--- a/mlir/include/mlir/Dialect/Affine/Passes.td
+++ b/mlir/include/mlir/Dialect/Affine/Passes.td
@@ -199,7 +199,7 @@ def AffineLoopTiling : Pass<"affine-loop-tile", "func::FuncOp"> {
];
}
-def AffineLoopUnroll : Pass<"affine-loop-unroll", "ModuleOp"> {
+def AffineLoopUnroll : InterfacePass<"affine-loop-unroll", "FunctionOpInterface"> {
let summary = "Unroll affine loops";
let constructor = "mlir::affine::createLoopUnrollPass()";
let options = [
diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
index 4dc9809574115..da66af2d54295 100644
--- a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
@@ -19,7 +19,6 @@
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/AffineMap.h"
#include "mlir/IR/Builders.h"
-#include "mlir/IR/BuiltinOps.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -92,44 +91,40 @@ static void gatherInnermostLoops(FunctionOpInterface f,
}
void LoopUnroll::runOnOperation() {
- mlir::ModuleOp module = getOperation();
- SmallVector<FunctionOpInterface> funcOps;
- module.walk([&](FunctionOpInterface func) { funcOps.push_back(func); });
- for (auto func : funcOps) {
- if (func.isExternal())
- return;
-
- if (unrollFull && unrollFullThreshold.hasValue()) {
- // Store short loops as we walk.
- SmallVector<AffineForOp, 4> loops;
-
- // Gathers all loops with trip count <= minTripCount. Do a post order walk
- // so that loops are gathered from innermost to outermost (or else
- // unrolling an outer one may delete gathered inner ones).
- getOperation().walk([&](AffineForOp forOp) {
- std::optional<uint64_t> tripCount = getConstantTripCount(forOp);
- if (tripCount && *tripCount <= unrollFullThreshold)
- loops.push_back(forOp);
- });
- for (auto forOp : loops)
- (void)loopUnrollFull(forOp);
- return;
- }
-
- // If the call back is provided, we will recurse until no loops are found.
+ FunctionOpInterface func = getOperation();
+ if (func.isExternal())
+ return;
+
+ if (unrollFull && unrollFullThreshold.hasValue()) {
+ // Store short loops as we walk.
SmallVector<AffineForOp, 4> loops;
- for (unsigned i = 0; i < numRepetitions || getUnrollFactor; i++) {
- loops.clear();
- gatherInnermostLoops(func, loops);
- if (loops.empty())
- break;
- bool unrolled = false;
- for (auto forOp : loops)
- unrolled |= succeeded(runOnAffineForOp(forOp));
- if (!unrolled)
- // Break out if nothing was unrolled.
- break;
- }
+
+ // Gathers all loops with trip count <= minTripCount. Do a post order walk
+ // so that loops are gathered from innermost to outermost (or else
+ // unrolling an outer one may delete gathered inner ones).
+ getOperation().walk([&](AffineForOp forOp) {
+ std::optional<uint64_t> tripCount = getConstantTripCount(forOp);
+ if (tripCount && *tripCount <= unrollFullThreshold)
+ loops.push_back(forOp);
+ });
+ for (auto forOp : loops)
+ (void)loopUnrollFull(forOp);
+ return;
+ }
+
+ // If the call back is provided, we will recurse until no loops are found.
+ SmallVector<AffineForOp, 4> loops;
+ for (unsigned i = 0; i < numRepetitions || getUnrollFactor; i++) {
+ loops.clear();
+ gatherInnermostLoops(func, loops);
+ if (loops.empty())
+ break;
+ bool unrolled = false;
+ for (auto forOp : loops)
+ unrolled |= succeeded(runOnAffineForOp(forOp));
+ if (!unrolled)
+ // Break out if nothing was unrolled.
+ break;
}
}
@@ -150,7 +145,7 @@ LogicalResult LoopUnroll::runOnAffineForOp(AffineForOp forOp) {
cleanUpUnroll);
}
-std::unique_ptr<OperationPass<ModuleOp>> mlir::affine::createLoopUnrollPass(
+std::unique_ptr<Pass> mlir::affine::createLoopUnrollPass(
int unrollFactor, bool unrollUpToFactor, bool unrollFull,
const std::function<unsigned(AffineForOp)> &getUnrollFactor) {
return std::make_unique<LoopUnroll>(
diff --git a/mlir/test/Dialect/Affine/unroll.mlir b/mlir/test/Dialect/Affine/unroll.mlir
index 43485ca56deeb..3f7920dc1eeb3 100644
--- a/mlir/test/Dialect/Affine/unroll.mlir
+++ b/mlir/test/Dialect/Affine/unroll.mlir
@@ -1,8 +1,8 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-full" | FileCheck %s --check-prefix UNROLL-FULL
-// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-full unroll-full-threshold=2" | FileCheck %s --check-prefix SHORT
-// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-factor=4" | FileCheck %s --check-prefix UNROLL-BY-4
-// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-factor=1" | FileCheck %s --check-prefix UNROLL-BY-1
-// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll="unroll-factor=5 cleanup-unroll=true" | FileCheck %s --check-prefix UNROLL-CLEANUP-LOOP
+// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-full=true}),gpu.module(gpu.func(affine-loop-unroll{unroll-full=true})))" | FileCheck %s --check-prefix UNROLL-FULL
+// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-full=true unroll-full-threshold=2}),gpu.module(gpu.func(affine-loop-unroll{unroll-full=true unroll-full-threshold=2})))" | FileCheck %s --check-prefix SHORT
+// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-factor=4}),gpu.module(gpu.func(affine-loop-unroll{unroll-factor=4})))" | FileCheck %s --check-prefix UNROLL-BY-4
+// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-factor=1}),gpu.module(gpu.func(affine-loop-unroll{unroll-factor=1})))" | FileCheck %s --check-prefix UNROLL-BY-1
+// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-factor=5 cleanup-unroll=true}),gpu.module(gpu.func(affine-loop-unroll{unroll-factor=5 cleanup-unroll=true})))" | FileCheck %s --check-prefix UNROLL-CLEANUP-LOOP
// UNROLL-FULL-DAG: [[$MAP0:#map[0-9]*]] = affine_map<(d0) -> (d0 + 1)>
// UNROLL-FULL-DAG: [[$MAP1:#map[0-9]*]] = affine_map<(d0) -> (d0 + 2)>
diff --git a/mlir/test/Dialect/SCF/loop-unroll.mlir b/mlir/test/Dialect/SCF/loop-unroll.mlir
index 0368505a1b70d..4c72d9e99d049 100644
--- a/mlir/test/Dialect/SCF/loop-unroll.mlir
+++ b/mlir/test/Dialect/SCF/loop-unroll.mlir
@@ -3,9 +3,9 @@
// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2 loop-depth=0' | FileCheck %s --check-prefix UNROLL-OUTER-BY-2
// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2 loop-depth=1' | FileCheck %s --check-prefix UNROLL-INNER-BY-2
// RUN: mlir-opt %s -test-loop-unrolling='unroll-factor=2 annotate=true' | FileCheck %s --check-prefix UNROLL-BY-2-ANNOTATE
-// RUN: mlir-opt %s --affine-loop-unroll='unroll-factor=6 unroll-up-to-factor=true' | FileCheck %s --check-prefix UNROLL-UP-TO
-// RUN: mlir-opt %s --affine-loop-unroll='unroll-factor=5 cleanup-unroll=true' | FileCheck %s --check-prefix CLEANUP-UNROLL-BY-5
-// RUN: mlir-opt %s --affine-loop-unroll --split-input-file | FileCheck %s
+// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-factor=6 unroll-up-to-factor=true}))" | FileCheck %s --check-prefix UNROLL-UP-TO
+// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-factor=5 cleanup-unroll=true}))" | FileCheck %s --check-prefix CLEANUP-UNROLL-BY-5
+// RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll))" --split-input-file | FileCheck %s
func.func @dynamic_loop_unroll(%arg0 : index, %arg1 : index, %arg2 : index,
%arg3: memref<?xf32>) {
>From 4383806504c84451f82eb9ee72ac0bd7aeaefd40 Mon Sep 17 00:00:00 2001
From: linuxlonelyeagle <2020382038 at qq.com>
Date: Thu, 13 Feb 2025 11:27:07 +0800
Subject: [PATCH 3/3] update c++ impl and update test.
---
mlir/include/mlir/Dialect/Affine/Passes.h | 3 +-
.../Dialect/Affine/Transforms/LoopUnroll.cpp | 3 +-
mlir/test/Dialect/Affine/unroll.mlir | 110 +++---------------
3 files changed, 18 insertions(+), 98 deletions(-)
diff --git a/mlir/include/mlir/Dialect/Affine/Passes.h b/mlir/include/mlir/Dialect/Affine/Passes.h
index 098cf386a3860..ea5034b60d8bd 100644
--- a/mlir/include/mlir/Dialect/Affine/Passes.h
+++ b/mlir/include/mlir/Dialect/Affine/Passes.h
@@ -14,6 +14,7 @@
#ifndef MLIR_DIALECT_AFFINE_PASSES_H
#define MLIR_DIALECT_AFFINE_PASSES_H
+#include "mlir/Interfaces/FunctionInterfaces.h"
#include "mlir/Pass/Pass.h"
#include <limits>
@@ -93,7 +94,7 @@ std::unique_ptr<OperationPass<func::FuncOp>> createLoopTilingPass();
/// factors supplied through other means. If -1 is passed as the unrollFactor
/// and no callback is provided, anything passed from the command-line (if at
/// all) or the default unroll factor is used (LoopUnroll:kDefaultUnrollFactor).
-std::unique_ptr<Pass> createLoopUnrollPass(
+std::unique_ptr<InterfacePass<FunctionOpInterface>> createLoopUnrollPass(
int unrollFactor = -1, bool unrollUpToFactor = false,
bool unrollFull = false,
const std::function<unsigned(AffineForOp)> &getUnrollFactor = nullptr);
diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
index da66af2d54295..7ff77968c61ad 100644
--- a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
@@ -145,7 +145,8 @@ LogicalResult LoopUnroll::runOnAffineForOp(AffineForOp forOp) {
cleanUpUnroll);
}
-std::unique_ptr<Pass> mlir::affine::createLoopUnrollPass(
+std::unique_ptr<InterfacePass<FunctionOpInterface>>
+mlir::affine::createLoopUnrollPass(
int unrollFactor, bool unrollUpToFactor, bool unrollFull,
const std::function<unsigned(AffineForOp)> &getUnrollFactor) {
return std::make_unique<LoopUnroll>(
diff --git a/mlir/test/Dialect/Affine/unroll.mlir b/mlir/test/Dialect/Affine/unroll.mlir
index 3f7920dc1eeb3..574e9f41494af 100644
--- a/mlir/test/Dialect/Affine/unroll.mlir
+++ b/mlir/test/Dialect/Affine/unroll.mlir
@@ -1,8 +1,9 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-full=true}),gpu.module(gpu.func(affine-loop-unroll{unroll-full=true})))" | FileCheck %s --check-prefix UNROLL-FULL
-// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-full=true unroll-full-threshold=2}),gpu.module(gpu.func(affine-loop-unroll{unroll-full=true unroll-full-threshold=2})))" | FileCheck %s --check-prefix SHORT
-// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-factor=4}),gpu.module(gpu.func(affine-loop-unroll{unroll-factor=4})))" | FileCheck %s --check-prefix UNROLL-BY-4
-// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-factor=1}),gpu.module(gpu.func(affine-loop-unroll{unroll-factor=1})))" | FileCheck %s --check-prefix UNROLL-BY-1
-// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-factor=5 cleanup-unroll=true}),gpu.module(gpu.func(affine-loop-unroll{unroll-factor=5 cleanup-unroll=true})))" | FileCheck %s --check-prefix UNROLL-CLEANUP-LOOP
+// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-full=true}))" | FileCheck %s --check-prefix UNROLL-FULL
+// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-full=true unroll-full-threshold=2}))" | FileCheck %s --check-prefix SHORT
+// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-factor=4}))" | FileCheck %s --check-prefix UNROLL-BY-4
+// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-factor=1}))" | FileCheck %s --check-prefix UNROLL-BY-1
+// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-factor=5 cleanup-unroll=true}))" | FileCheck %s --check-prefix UNROLL-CLEANUP-LOOP
+// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(gpu.module(gpu.func(affine-loop-unroll{unroll-full=true})))" | FileCheck %s --check-prefix GPU-UNROLL-FULL
// UNROLL-FULL-DAG: [[$MAP0:#map[0-9]*]] = affine_map<(d0) -> (d0 + 1)>
// UNROLL-FULL-DAG: [[$MAP1:#map[0-9]*]] = affine_map<(d0) -> (d0 + 2)>
@@ -241,19 +242,19 @@ func.func @loop_nest_unroll_full() {
} // UNROLL-FULL }
gpu.module @unroll_full {
- // UNROLL-FULL-LABEL: func @gpu_loop_nest_simplest() {
+ // GPU-UNROLL-FULL-LABEL: func @gpu_loop_nest_simplest() {
gpu.func @gpu_loop_nest_simplest() {
- // UNROLL-FULL: affine.for %arg0 = 0 to 100 step 2 {
+ // GPU-UNROLL-FULL: affine.for %arg0 = 0 to 100 step 2 {
affine.for %i = 0 to 100 step 2 {
- // UNROLL-FULL: %c1_i32 = arith.constant 1 : i32
- // UNROLL-FULL-NEXT: %c1_i32_0 = arith.constant 1 : i32
- // UNROLL-FULL-NEXT: %c1_i32_1 = arith.constant 1 : i32
- // UNROLL-FULL-NEXT: %c1_i32_2 = arith.constant 1 : i32
+ // GPU-UNROLL-FULL: %c1_i32 = arith.constant 1 : i32
+ // GPU-UNROLL-FULL-NEXT: %c1_i32_0 = arith.constant 1 : i32
+ // GPU-UNROLL-FULL-NEXT: %c1_i32_1 = arith.constant 1 : i32
+ // GPU-UNROLL-FULL-NEXT: %c1_i32_2 = arith.constant 1 : i32
affine.for %j = 0 to 4 {
%x = arith.constant 1 : i32
}
- } // UNROLL-FULL: }
- gpu.return // UNROLL-FULL: return
+ } // GPU-UNROLL-FULL: }
+ gpu.return // GPU-UNROLL-FULL: return
}
}
@@ -277,28 +278,6 @@ func.func @loop_nest_outer_unroll() {
return // SHORT: return
} // SHORT }
-gpu.module @short {
- // SHORT-LABEL: func @gpu_loop_nest_outer_unroll() {
- gpu.func @gpu_loop_nest_outer_unroll() {
- // SHORT: affine.for %arg0 = 0 to 4 {
- // SHORT-NEXT: %0 = affine.apply [[$MAP0]](%arg0)
- // SHORT-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
- // SHORT-NEXT: }
- // SHORT-NEXT: affine.for %arg0 = 0 to 4 {
- // SHORT-NEXT: %0 = affine.apply [[$MAP0]](%arg0)
- // SHORT-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
- // SHORT-NEXT: }
- affine.for %i = 0 to 2 {
- affine.for %j = 0 to 4 {
- %x = "affine.apply" (%j) { map = affine_map<(d0) -> (d0 + 1)> } :
- (index) -> (index)
- %y = "addi32"(%x, %x) : (index, index) -> index
- }
- }
- gpu.return // SHORT: gpu.return
- } // SHORT }
-}
-
// We are doing a minimal FileCheck here. We just need this test case to
// successfully run. Both %x and %y will get unrolled here as the min trip
// count threshold set to 2.
@@ -384,37 +363,6 @@ func.func @unroll_unit_stride_no_cleanup() {
return
}
-gpu.module @unroll_by_4{
- // UNROLL-BY-4-LABEL: func @gpu_unroll_unit_stride_no_cleanup() {
- gpu.func @gpu_unroll_unit_stride_no_cleanup() {
- // UNROLL-BY-4: affine.for %arg0 = 0 to 100 {
- affine.for %i = 0 to 100 {
- // UNROLL-BY-4: for [[L1:%arg[0-9]+]] = 0 to 8 step 4 {
- // UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
- // UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
- // UNROLL-BY-4-NEXT: %2 = affine.apply #map{{[0-9]*}}([[L1]])
- // UNROLL-BY-4-NEXT: %3 = "addi32"(%2, %2) : (index, index) -> i32
- // UNROLL-BY-4-NEXT: %4 = "addi32"(%3, %3) : (i32, i32) -> i32
- // UNROLL-BY-4-NEXT: %5 = affine.apply #map{{[0-9]*}}([[L1]])
- // UNROLL-BY-4-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32
- // UNROLL-BY-4-NEXT: %7 = "addi32"(%6, %6) : (i32, i32) -> i32
- // UNROLL-BY-4-NEXT: %8 = affine.apply #map{{[0-9]*}}([[L1]])
- // UNROLL-BY-4-NEXT: %9 = "addi32"(%8, %8) : (index, index) -> i32
- // UNROLL-BY-4-NEXT: %10 = "addi32"(%9, %9) : (i32, i32) -> i32
- // UNROLL-BY-4-NEXT: }
- affine.for %j = 0 to 8 {
- %x = "addi32"(%j, %j) : (index, index) -> i32
- %y = "addi32"(%x, %x) : (i32, i32) -> i32
- }
- // empty loop
- // UNROLL-BY-4: affine.for %arg1 = 0 to 8 {
- affine.for %k = 0 to 8 {
- }
- }
- gpu.return
- }
-}
-
// UNROLL-BY-4-LABEL: func @unroll_unit_stride_cleanup() {
func.func @unroll_unit_stride_cleanup() {
// UNROLL-BY-4: affine.for %arg0 = 0 to 100 {
@@ -702,19 +650,6 @@ func.func @unroll_by_one_should_promote_single_iteration_loop() {
// UNROLL-BY-1-NEXT: return
}
-gpu.module @unroll_by_1 {
- // UNROLL-BY-1-LABEL: func @gpu_unroll_by_one_should_promote_single_iteration_loop()
- gpu.func @gpu_unroll_by_one_should_promote_single_iteration_loop() {
- affine.for %i = 0 to 1 {
- %x = "foo"(%i) : (index) -> i32
- }
- gpu.return
- // UNROLL-BY-1-NEXT: %c0 = arith.constant 0 : index
- // UNROLL-BY-1-NEXT: %0 = "foo"(%c0) : (index) -> i32
- // UNROLL-BY-1-NEXT: gpu.return
- }
-}
-
// Test unrolling with affine.for iter_args.
// UNROLL-BY-4-LABEL: loop_unroll_with_iter_args_and_cleanup
@@ -789,23 +724,6 @@ func.func @unroll_cleanup_loop_with_larger_unroll_factor() {
// UNROLL-CLEANUP-LOOP-NEXT: return
}
-gpu.module @unroll_cleanup_loop {
- // UNROLL-CLEANUP-LOOP-LABEL: func @gpu_unroll_cleanup_loop_with_larger_unroll_factor()
- gpu.func @gpu_unroll_cleanup_loop_with_larger_unroll_factor() {
- affine.for %i = 0 to 3 {
- %x = "foo"(%i) : (index) -> i32
- }
- gpu.return
- // UNROLL-CLEANUP-LOOP-NEXT: %[[C0:.*]] = arith.constant 0 : index
- // UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[C0]]) : (index) -> i32
- // UNROLL-CLEANUP-LOOP-NEXT: %[[V1:.*]] = affine.apply {{.*}}
- // UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V1]]) : (index) -> i32
- // UNROLL-CLEANUP-LOOP-NEXT: %[[V2:.*]] = affine.apply {{.*}}
- // UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V2]]) : (index) -> i32
- // UNROLL-CLEANUP-LOOP-NEXT: gpu.return
- }
-}
-
// UNROLL-CLEANUP-LOOP-LABEL: func @unroll_cleanup_loop_with_smaller_unroll_factor()
func.func @unroll_cleanup_loop_with_smaller_unroll_factor() {
affine.for %i = 0 to 7 {
More information about the Mlir-commits
mailing list