[llvm] [Passes][LoopRotate] Move minsize handling fully into pass (PR #189956)

via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 1 06:21:04 PDT 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Nikita Popov (nikic)

<details>
<summary>Changes</summary>

Make this dependent only on the minsize attribute and drop the pipeline handling.

Rename the enable-loop-header-duplication option to enable-loop-header-duplication-at-minsize to clarify that it controls header duplication at minsize only (in other cases it is enabled by default, independently of this option).

---
Full diff: https://github.com/llvm/llvm-project/pull/189956.diff


4 Files Affected:

- (modified) llvm/lib/Passes/PassBuilderPipelines.cpp (+6-20) 
- (modified) llvm/lib/Transforms/Scalar/LoopRotation.cpp (+15-5) 
- (modified) llvm/test/Transforms/LoopRotate/oz-disable.ll (+29-10) 
- (modified) llvm/test/Transforms/PhaseOrdering/enable-loop-header-duplication-oz.ll (+8-9) 


``````````diff
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 6f5b1c2e2fcc7..847f064c23060 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -219,15 +219,6 @@ static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
                                        cl::Hidden,
                                        cl::desc("Enable the LoopFlatten Pass"));
 
-// Experimentally allow loop header duplication. This should allow for better
-// optimization at Oz, since loop-idiom recognition can then recognize things
-// like memcpy. If this ends up being useful for many targets, we should drop
-// this flag and make a code generation option that can be controlled
-// independent of the opt level and exposed through the frontend.
-static cl::opt<bool> EnableLoopHeaderDuplication(
-    "enable-loop-header-duplication", cl::init(false), cl::Hidden,
-    cl::desc("Enable loop header duplication at any optimization level"));
-
 static cl::opt<bool>
     EnableDFAJumpThreading("enable-dfa-jump-thread",
                            cl::desc("Enable DFA jump threading"),
@@ -688,10 +679,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
   LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
                         /*AllowSpeculation=*/false));
 
-  // Disable header duplication in loop rotation at -Oz.
-  LPM1.addPass(LoopRotatePass(EnableLoopHeaderDuplication ||
-                                  Level != OptimizationLevel::Oz,
-                              isLTOPreLink(Phase)));
+  LPM1.addPass(
+      LoopRotatePass(/*EnableHeaderDuplication=*/true, isLTOPreLink(Phase)));
   // TODO: Investigate promotion cap for O1.
   LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
                         /*AllowSpeculation=*/true));
@@ -855,10 +844,8 @@ void PassBuilder::addPostPGOLoopRotation(ModulePassManager &MPM,
   if (EnablePostPGOLoopRotation) {
     // Disable header duplication in loop rotation at -Oz.
     MPM.addPass(createModuleToFunctionPassAdaptor(
-        createFunctionToLoopPassAdaptor(
-            LoopRotatePass(EnableLoopHeaderDuplication ||
-                           Level != OptimizationLevel::Oz),
-            /*UseMemorySSA=*/false),
+        createFunctionToLoopPassAdaptor(LoopRotatePass(),
+                                        /*UseMemorySSA=*/false),
         PTO.EagerlyInvalidateAnalyses));
   }
 }
@@ -1574,9 +1561,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
   LoopPassManager LPM;
   // First rotate loops that may have been un-rotated by prior passes.
   // Disable header duplication at -Oz.
-  LPM.addPass(LoopRotatePass(EnableLoopHeaderDuplication ||
-                                 Level != OptimizationLevel::Oz,
-                             LTOPreLink, /*CheckExitCount=*/true));
+  LPM.addPass(LoopRotatePass(/*EnableLoopHeaderDuplication=*/true, LTOPreLink,
+                             /*CheckExitCount=*/true));
   // Some loops may have become dead by now. Try to delete them.
   // FIXME: see discussion in https://reviews.llvm.org/D112851,
   //        this may need to be revisited once we run GVN before loop deletion
diff --git a/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index 50d44369a40d0..2f2ce9e2782a1 100644
--- a/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -38,6 +38,15 @@ static cl::opt<bool> PrepareForLTOOption(
     cl::desc("Run loop-rotation in the prepare-for-lto stage. This option "
              "should be used for testing only."));
 
+// Experimentally allow loop header duplication. This should allow for better
+// optimization at Oz, since loop-idiom recognition can then recognize things
+// like memcpy. If this ends up being useful for many targets, we should drop
+// this flag and make a code generation option that can be controlled
+// independent of the opt level and exposed through the frontend.
+static cl::opt<bool> EnableLoopHeaderDuplicationAtMinSize(
+    "enable-loop-header-duplication-at-minsize", cl::init(false), cl::Hidden,
+    cl::desc("Enable loop header duplication even for minsize"));
+
 LoopRotatePass::LoopRotatePass(bool EnableHeaderDuplication, bool PrepareForLTO,
                                bool CheckExitCount)
     : EnableHeaderDuplication(EnableHeaderDuplication),
@@ -68,11 +77,12 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM,
   // Vectorization requires loop-rotation. Use default threshold for loops the
   // user explicitly marked for vectorization, even when header duplication is
   // disabled.
-  int Threshold =
-      (EnableHeaderDuplication && !L.getHeader()->getParent()->hasMinSize()) ||
-              hasVectorizeTransformation(&L) == TM_ForcedByUser
-          ? DefaultRotationThreshold
-          : 0;
+  int Threshold = EnableHeaderDuplication &&
+                          (!L.getHeader()->getParent()->hasMinSize() ||
+                           EnableLoopHeaderDuplicationAtMinSize ||
+                           hasVectorizeTransformation(&L) == TM_ForcedByUser)
+                      ? DefaultRotationThreshold
+                      : 0;
   const DataLayout &DL = L.getHeader()->getDataLayout();
   const SimplifyQuery SQ = getBestSimplifyQuery(AR, DL);
 
diff --git a/llvm/test/Transforms/LoopRotate/oz-disable.ll b/llvm/test/Transforms/LoopRotate/oz-disable.ll
index c45603878ee65..b4b65b64bb15b 100644
--- a/llvm/test/Transforms/LoopRotate/oz-disable.ll
+++ b/llvm/test/Transforms/LoopRotate/oz-disable.ll
@@ -1,21 +1,40 @@
 ; REQUIRES: asserts
-; RUN: opt < %s -S -Os -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OS
-; RUN: opt < %s -S -Oz -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OZ
-; RUN: opt < %s -S -passes='default<Os>' -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OS
-; RUN: opt < %s -S -passes='default<Oz>' -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OZ
+; RUN: opt < %s -S -O2 -debug -debug-only=loop-rotate 2>&1 | FileCheck %s --check-prefixes=CHECK,DEFAULT
+; RUN: opt < %s -S -passes='default<O2>' -debug -debug-only=loop-rotate 2>&1 | FileCheck %s --check-prefixes=CHECK,DEFAULT
 
-;; Make sure -allow-loop-header-duplication overrides the default behavior at Oz
-; RUN: opt < %s -S -passes='default<Oz>' -enable-loop-header-duplication -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OS
+;; Make sure -enable-loop-header-duplication-at-minsize overrides the default behavior at Oz
+; RUN: opt < %s -S -passes='default<O2>' -enable-loop-header-duplication-at-minsize -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefixes=CHECK,ALLOW
 
-; Loop should be rotated for -Os but not for -Oz.
-; OS: rotating Loop at depth 1
-; OZ-NOT: rotating Loop at depth 1
+; optsize loop should always be rotated.
+; CHECK: rotating Loop at depth 1
+; minsize loop should only be rotated under the option.
+; DEFAULT-NOT: rotating Loop at depth 1
+; ALLOW: rotating Loop at depth 1
 
 @e = global i32 10
 
 declare void @use(i32)
 
-define void @test() {
+define void @test_optsize() optsize {
+entry:
+  %end = load i32, ptr @e
+  br label %loop
+
+loop:
+  %n.phi = phi i32 [ %n, %loop.fin ], [ 0, %entry ]
+  %cond = icmp eq i32 %n.phi, %end
+  br i1 %cond, label %exit, label %loop.fin
+
+loop.fin:
+  %n = add i32 %n.phi, 1
+  call void @use(i32 %n)
+  br label %loop
+
+exit:
+  ret void
+}
+
+define void @test_minsize() minsize {
 entry:
   %end = load i32, ptr @e
   br label %loop
diff --git a/llvm/test/Transforms/PhaseOrdering/enable-loop-header-duplication-oz.ll b/llvm/test/Transforms/PhaseOrdering/enable-loop-header-duplication-oz.ll
index cd2ed37b22db5..17e1dfba0cfaa 100644
--- a/llvm/test/Transforms/PhaseOrdering/enable-loop-header-duplication-oz.ll
+++ b/llvm/test/Transforms/PhaseOrdering/enable-loop-header-duplication-oz.ll
@@ -1,17 +1,16 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
 
-;; Check that -enable-loop-header-duplication at Oz enables certain types of
-;; optimizations, for example replacing the loop body w/ a call to memset. If
-;; loop idiom recognition begins to recognize unrotated loops, this test will
-;; need to be updated.
+;; Check that -enable-loop-header-duplication-at-minsize at Oz enables certain
+;; types of optimizations, for example replacing the loop body w/ a call to
+;; memset. If loop idiom recognition begins to recognize unrotated loops, this
+;; test will need to be updated.
 
 ; RUN: opt -passes='default<Oz>' -S < %s  | FileCheck %s --check-prefix=NOROTATION
-; RUN: opt -passes='default<Oz>' -S  -enable-loop-header-duplication < %s  | FileCheck %s --check-prefix=ROTATION
-; RUN: opt -passes='default<O2>' -S  < %s  | FileCheck %s --check-prefix=ROTATION
+; RUN: opt -passes='default<Oz>' -S  -enable-loop-header-duplication-at-minsize < %s  | FileCheck %s --check-prefix=ROTATION
 
-define void @test(i8* noalias nonnull align 1 %start, i8* %end) unnamed_addr {
+define void @test(i8* noalias nonnull align 1 %start, i8* %end) minsize {
 ; NOROTATION-LABEL: define void @test(
-; NOROTATION-SAME: ptr noalias nonnull writeonly align 1 captures(address) [[START:%.*]], ptr readnone captures(address) [[END:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] {
+; NOROTATION-SAME: ptr noalias nonnull writeonly align 1 captures(address) [[START:%.*]], ptr readnone captures(address) [[END:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 ; NOROTATION-NEXT:  entry:
 ; NOROTATION-NEXT:    br label [[LOOP_HEADER:%.*]]
 ; NOROTATION:       loop.header:
@@ -26,7 +25,7 @@ define void @test(i8* noalias nonnull align 1 %start, i8* %end) unnamed_addr {
 ; NOROTATION-NEXT:    ret void
 ;
 ; ROTATION-LABEL: define void @test(
-; ROTATION-SAME: ptr noalias nonnull writeonly align 1 captures(address) [[START:%.*]], ptr readnone captures(address) [[END:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] {
+; ROTATION-SAME: ptr noalias nonnull writeonly align 1 captures(address) [[START:%.*]], ptr readnone captures(address) [[END:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 ; ROTATION-NEXT:  entry:
 ; ROTATION-NEXT:    [[_12_I1:%.*]] = icmp eq ptr [[START]], [[END]]
 ; ROTATION-NEXT:    br i1 [[_12_I1]], label [[EXIT:%.*]], label [[LOOP_LATCH_PREHEADER:%.*]]

``````````

</details>


https://github.com/llvm/llvm-project/pull/189956


More information about the llvm-commits mailing list