[llvm] [Passes][LoopRotate] Move minsize handling fully into pass (PR #189956)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 1 06:21:04 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Nikita Popov (nikic)
<details>
<summary>Changes</summary>
Make this dependent only on the minsize attribute and drop the pipeline handling.
Rename the enable-loop-header-duplication option to enable-loop-header-duplication-at-minsize to clarify that it controls header duplication at minsize only (in other cases it is enabled by default, independently of this option).
---
Full diff: https://github.com/llvm/llvm-project/pull/189956.diff
4 Files Affected:
- (modified) llvm/lib/Passes/PassBuilderPipelines.cpp (+6-20)
- (modified) llvm/lib/Transforms/Scalar/LoopRotation.cpp (+15-5)
- (modified) llvm/test/Transforms/LoopRotate/oz-disable.ll (+29-10)
- (modified) llvm/test/Transforms/PhaseOrdering/enable-loop-header-duplication-oz.ll (+8-9)
``````````diff
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 6f5b1c2e2fcc7..847f064c23060 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -219,15 +219,6 @@ static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
cl::Hidden,
cl::desc("Enable the LoopFlatten Pass"));
-// Experimentally allow loop header duplication. This should allow for better
-// optimization at Oz, since loop-idiom recognition can then recognize things
-// like memcpy. If this ends up being useful for many targets, we should drop
-// this flag and make a code generation option that can be controlled
-// independent of the opt level and exposed through the frontend.
-static cl::opt<bool> EnableLoopHeaderDuplication(
- "enable-loop-header-duplication", cl::init(false), cl::Hidden,
- cl::desc("Enable loop header duplication at any optimization level"));
-
static cl::opt<bool>
EnableDFAJumpThreading("enable-dfa-jump-thread",
cl::desc("Enable DFA jump threading"),
@@ -688,10 +679,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/false));
- // Disable header duplication in loop rotation at -Oz.
- LPM1.addPass(LoopRotatePass(EnableLoopHeaderDuplication ||
- Level != OptimizationLevel::Oz,
- isLTOPreLink(Phase)));
+ LPM1.addPass(
+ LoopRotatePass(/*EnableHeaderDuplication=*/true, isLTOPreLink(Phase)));
// TODO: Investigate promotion cap for O1.
LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true));
@@ -855,10 +844,8 @@ void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
if (EnablePostPGOLoopRotation) {
// Disable header duplication in loop rotation at -Oz.
MPM.addPass(createModuleToFunctionPassAdaptor(
- createFunctionToLoopPassAdaptor(
- LoopRotatePass(EnableLoopHeaderDuplication ||
- Level != OptimizationLevel::Oz),
- /*UseMemorySSA=*/false),
+ createFunctionToLoopPassAdaptor(LoopRotatePass(),
+ /*UseMemorySSA=*/false),
PTO.EagerlyInvalidateAnalyses));
}
}
@@ -1574,9 +1561,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
LoopPassManager LPM;
// First rotate loops that may have been un-rotated by prior passes.
// Disable header duplication at -Oz.
- LPM.addPass(LoopRotatePass(EnableLoopHeaderDuplication ||
- Level != OptimizationLevel::Oz,
- LTOPreLink, /*CheckExitCount=*/true));
+ LPM.addPass(LoopRotatePass(/*EnableLoopHeaderDuplication=*/true, LTOPreLink,
+ /*CheckExitCount=*/true));
// Some loops may have become dead by now. Try to delete them.
// FIXME: see discussion in https://reviews.llvm.org/D112851,
// this may need to be revisited once we run GVN before loop deletion
diff --git a/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index 50d44369a40d0..2f2ce9e2782a1 100644
--- a/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -38,6 +38,15 @@ static cl::opt<bool> PrepareForLTOOption(
cl::desc("Run loop-rotation in the prepare-for-lto stage. This option "
"should be used for testing only."));
+// Experimentally allow loop header duplication. This should allow for better
+// optimization at Oz, since loop-idiom recognition can then recognize things
+// like memcpy. If this ends up being useful for many targets, we should drop
+// this flag and make a code generation option that can be controlled
+// independent of the opt level and exposed through the frontend.
+static cl::opt<bool> EnableLoopHeaderDuplicationAtMinSize(
+ "enable-loop-header-duplication-at-minsize", cl::init(false), cl::Hidden,
+ cl::desc("Enable loop header duplication even for minsize"));
+
LoopRotatePass::LoopRotatePass(bool EnableHeaderDuplication, bool PrepareForLTO,
bool CheckExitCount)
: EnableHeaderDuplication(EnableHeaderDuplication),
@@ -68,11 +77,12 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM,
// Vectorization requires loop-rotation. Use default threshold for loops the
// user explicitly marked for vectorization, even when header duplication is
// disabled.
- int Threshold =
- (EnableHeaderDuplication && !L.getHeader()->getParent()->hasMinSize()) ||
- hasVectorizeTransformation(&L) == TM_ForcedByUser
- ? DefaultRotationThreshold
- : 0;
+ int Threshold = EnableHeaderDuplication &&
+ (!L.getHeader()->getParent()->hasMinSize() ||
+ EnableLoopHeaderDuplicationAtMinSize ||
+ hasVectorizeTransformation(&L) == TM_ForcedByUser)
+ ? DefaultRotationThreshold
+ : 0;
const DataLayout &DL = L.getHeader()->getDataLayout();
const SimplifyQuery SQ = getBestSimplifyQuery(AR, DL);
diff --git a/llvm/test/Transforms/LoopRotate/oz-disable.ll b/llvm/test/Transforms/LoopRotate/oz-disable.ll
index c45603878ee65..b4b65b64bb15b 100644
--- a/llvm/test/Transforms/LoopRotate/oz-disable.ll
+++ b/llvm/test/Transforms/LoopRotate/oz-disable.ll
@@ -1,21 +1,40 @@
; REQUIRES: asserts
-; RUN: opt < %s -S -Os -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OS
-; RUN: opt < %s -S -Oz -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OZ
-; RUN: opt < %s -S -passes='default<Os>' -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OS
-; RUN: opt < %s -S -passes='default<Oz>' -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OZ
+; RUN: opt < %s -S -O2 -debug -debug-only=loop-rotate 2>&1 | FileCheck %s --check-prefixes=CHECK,DEFAULT
+; RUN: opt < %s -S -passes='default<O2>' -debug -debug-only=loop-rotate 2>&1 | FileCheck %s --check-prefixes=CHECK,DEFAULT
-;; Make sure -allow-loop-header-duplication overrides the default behavior at Oz
-; RUN: opt < %s -S -passes='default<Oz>' -enable-loop-header-duplication -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OS
+;; Make sure -enable-loop-header-duplication-at-minsize overrides the default behavior at Oz
+; RUN: opt < %s -S -passes='default<O2>' -enable-loop-header-duplication-at-minsize -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefixes=CHECK,ALLOW
-; Loop should be rotated for -Os but not for -Oz.
-; OS: rotating Loop at depth 1
-; OZ-NOT: rotating Loop at depth 1
+; optsize loop should always be rotated.
+; CHECK: rotating Loop at depth 1
+; minsize loop should only be rotated under the option.
+; DEFAULT-NOT: rotating Loop at depth 1
+; ALLOW: rotating Loop at depth 1
@e = global i32 10
declare void @use(i32)
-define void @test() {
+define void @test_optsize() optsize {
+entry:
+ %end = load i32, ptr @e
+ br label %loop
+
+loop:
+ %n.phi = phi i32 [ %n, %loop.fin ], [ 0, %entry ]
+ %cond = icmp eq i32 %n.phi, %end
+ br i1 %cond, label %exit, label %loop.fin
+
+loop.fin:
+ %n = add i32 %n.phi, 1
+ call void @use(i32 %n)
+ br label %loop
+
+exit:
+ ret void
+}
+
+define void @test_minsize() minsize {
entry:
%end = load i32, ptr @e
br label %loop
diff --git a/llvm/test/Transforms/PhaseOrdering/enable-loop-header-duplication-oz.ll b/llvm/test/Transforms/PhaseOrdering/enable-loop-header-duplication-oz.ll
index cd2ed37b22db5..17e1dfba0cfaa 100644
--- a/llvm/test/Transforms/PhaseOrdering/enable-loop-header-duplication-oz.ll
+++ b/llvm/test/Transforms/PhaseOrdering/enable-loop-header-duplication-oz.ll
@@ -1,17 +1,16 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
-;; Check that -enable-loop-header-duplication at Oz enables certain types of
-;; optimizations, for example replacing the loop body w/ a call to memset. If
-;; loop idiom recognition begins to recognize unrotated loops, this test will
-;; need to be updated.
+;; Check that -enable-loop-header-duplication-at-minsize at Oz enables certain
+;; types of optimizations, for example replacing the loop body w/ a call to
+;; memset. If loop idiom recognition begins to recognize unrotated loops, this
+;; test will need to be updated.
; RUN: opt -passes='default<Oz>' -S < %s | FileCheck %s --check-prefix=NOROTATION
-; RUN: opt -passes='default<Oz>' -S -enable-loop-header-duplication < %s | FileCheck %s --check-prefix=ROTATION
-; RUN: opt -passes='default<O2>' -S < %s | FileCheck %s --check-prefix=ROTATION
+; RUN: opt -passes='default<Oz>' -S -enable-loop-header-duplication-at-minsize < %s | FileCheck %s --check-prefix=ROTATION
-define void @test(i8* noalias nonnull align 1 %start, i8* %end) unnamed_addr {
+define void @test(i8* noalias nonnull align 1 %start, i8* %end) minsize {
; NOROTATION-LABEL: define void @test(
-; NOROTATION-SAME: ptr noalias nonnull writeonly align 1 captures(address) [[START:%.*]], ptr readnone captures(address) [[END:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] {
+; NOROTATION-SAME: ptr noalias nonnull writeonly align 1 captures(address) [[START:%.*]], ptr readnone captures(address) [[END:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; NOROTATION-NEXT: entry:
; NOROTATION-NEXT: br label [[LOOP_HEADER:%.*]]
; NOROTATION: loop.header:
@@ -26,7 +25,7 @@ define void @test(i8* noalias nonnull align 1 %start, i8* %end) unnamed_addr {
; NOROTATION-NEXT: ret void
;
; ROTATION-LABEL: define void @test(
-; ROTATION-SAME: ptr noalias nonnull writeonly align 1 captures(address) [[START:%.*]], ptr readnone captures(address) [[END:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] {
+; ROTATION-SAME: ptr noalias nonnull writeonly align 1 captures(address) [[START:%.*]], ptr readnone captures(address) [[END:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; ROTATION-NEXT: entry:
; ROTATION-NEXT: [[_12_I1:%.*]] = icmp eq ptr [[START]], [[END]]
; ROTATION-NEXT: br i1 [[_12_I1]], label [[EXIT:%.*]], label [[LOOP_LATCH_PREHEADER:%.*]]
``````````
</details>
https://github.com/llvm/llvm-project/pull/189956
More information about the llvm-commits
mailing list