[llvm] [llvm][loop-rotate] Allow forcing loop-rotation (PR #82828)

Paul Kirth via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 23 13:54:47 PST 2024


https://github.com/ilovepi updated https://github.com/llvm/llvm-project/pull/82828

>From b34b5e679fbcceae5124f79f1399eafc31f1c533 Mon Sep 17 00:00:00 2001
From: Paul Kirth <paulkirth at google.com>
Date: Fri, 23 Feb 2024 20:43:13 +0000
Subject: [PATCH 1/2] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
 =?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4
---
 llvm/lib/Transforms/Scalar/LoopRotation.cpp   | 14 +++++++++++++-
 llvm/test/Transforms/LoopRotate/oz-disable.ll |  3 +++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index 7036759a4eed57..c6943695ec1549 100644
--- a/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -39,8 +39,20 @@ static cl::opt<bool> PrepareForLTOOption(
     cl::desc("Run loop-rotation in the prepare-for-lto stage. This option "
              "should be used for testing only."));
 
+// Experimentally allow loop header duplication. This should allow for better
+// optimization at Oz, since loop-idiom recognition can then recognize things
+// like memcpy. If this ends up being profitable, we should drop this flag and
+// making a code gen option that can be controled independent of the opt level
+// and exposed through clang. See
+// https://github.com/llvm/llvm-project/issues/50308 for details.
+static cl::opt<bool>
+    ForceHeaderDuplication("force-loop-header-duplication", cl::init(false),
+                           cl::Hidden,
+                           cl::desc("Always enable loop header duplication"));
+
 LoopRotatePass::LoopRotatePass(bool EnableHeaderDuplication, bool PrepareForLTO)
-    : EnableHeaderDuplication(EnableHeaderDuplication),
+    : EnableHeaderDuplication(EnableHeaderDuplication ||
+                              ForceHeaderDuplication),
       PrepareForLTO(PrepareForLTO) {}
 
 void LoopRotatePass::printPipeline(
diff --git a/llvm/test/Transforms/LoopRotate/oz-disable.ll b/llvm/test/Transforms/LoopRotate/oz-disable.ll
index 6a7847ac0ff215..b933200ed80586 100644
--- a/llvm/test/Transforms/LoopRotate/oz-disable.ll
+++ b/llvm/test/Transforms/LoopRotate/oz-disable.ll
@@ -4,6 +4,9 @@
 ; RUN: opt < %s -S -passes='default<Os>' -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OS
 ; RUN: opt < %s -S -passes='default<Oz>' -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OZ
 
+;; Make sure -force-loop-header-duplication overrides the default behavior at Oz
+; RUN: opt < %s -S -passes='default<Oz>' -force-loop-header-duplication -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OS
+
 ; Loop should be rotated for -Os but not for -Oz.
 ; OS: rotating Loop at depth 1
 ; OZ-NOT: rotating Loop at depth 1

>From b65bd96b2508d4f6c3f2c97654a1f70ad36e1d9b Mon Sep 17 00:00:00 2001
From: Paul Kirth <paulkirth at google.com>
Date: Fri, 23 Feb 2024 21:54:33 +0000
Subject: [PATCH 2/2] Move flag to PassPipelineBuilder and rename Flag

Created using spr 1.3.4
---
 llvm/lib/Passes/PassBuilderPipelines.cpp      | 21 +++++++++++++++----
 llvm/lib/Transforms/Scalar/LoopRotation.cpp   | 14 +------------
 llvm/test/Transforms/LoopRotate/oz-disable.ll |  4 ++--
 3 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 17b55b63ac03cf..a6fc335f3bd473 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -209,6 +209,15 @@ static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
                                        cl::Hidden,
                                        cl::desc("Enable the LoopFlatten Pass"));
 
+// Experimentally allow loop header duplication. This should allow for better
+// optimization at Oz, since loop-idiom recognition can then recognize things
+// like memcpy. If this ends up being profitable, we should drop this flag and
+// making a code gen option that can be controlled independent of the opt level
+// and exposed through clang.
+static cl::opt<bool> AllowLoopHeaderDuplication(
+    "allow-loop-header-duplication", cl::init(false), cl::Hidden,
+    cl::desc("Allow loop header duplication at any optimization level"));
+
 static cl::opt<bool>
     EnableDFAJumpThreading("enable-dfa-jump-thread",
                            cl::desc("Enable DFA jump threading"),
@@ -630,8 +639,9 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
                         /*AllowSpeculation=*/false));
 
   // Disable header duplication in loop rotation at -Oz.
-  LPM1.addPass(
-      LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase)));
+  LPM1.addPass(LoopRotatePass(AllowLoopHeaderDuplication ||
+                                  (Level != OptimizationLevel::Oz),
+                              isLTOPreLink(Phase)));
   // TODO: Investigate promotion cap for O1.
   LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
                         /*AllowSpeculation=*/true));
@@ -812,7 +822,8 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
     // Disable header duplication in loop rotation at -Oz.
     MPM.addPass(createModuleToFunctionPassAdaptor(
         createFunctionToLoopPassAdaptor(
-            LoopRotatePass(Level != OptimizationLevel::Oz),
+            LoopRotatePass(AllowLoopHeaderDuplication ||
+                           Level != OptimizationLevel::Oz),
             /*UseMemorySSA=*/false,
             /*UseBlockFrequencyInfo=*/false),
         PTO.EagerlyInvalidateAnalyses));
@@ -1422,7 +1433,9 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
   LoopPassManager LPM;
   // First rotate loops that may have been un-rotated by prior passes.
   // Disable header duplication at -Oz.
-  LPM.addPass(LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink));
+  LPM.addPass(LoopRotatePass(AllowLoopHeaderDuplication ||
+                                 Level != OptimizationLevel::Oz,
+                             LTOPreLink));
   // Some loops may have become dead by now. Try to delete them.
   // FIXME: see discussion in https://reviews.llvm.org/D112851,
   //        this may need to be revisited once we run GVN before loop deletion
diff --git a/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index c6943695ec1549..7036759a4eed57 100644
--- a/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -39,20 +39,8 @@ static cl::opt<bool> PrepareForLTOOption(
     cl::desc("Run loop-rotation in the prepare-for-lto stage. This option "
              "should be used for testing only."));
 
-// Experimentally allow loop header duplication. This should allow for better
-// optimization at Oz, since loop-idiom recognition can then recognize things
-// like memcpy. If this ends up being profitable, we should drop this flag and
-// making a code gen option that can be controled independent of the opt level
-// and exposed through clang. See
-// https://github.com/llvm/llvm-project/issues/50308 for details.
-static cl::opt<bool>
-    ForceHeaderDuplication("force-loop-header-duplication", cl::init(false),
-                           cl::Hidden,
-                           cl::desc("Always enable loop header duplication"));
-
 LoopRotatePass::LoopRotatePass(bool EnableHeaderDuplication, bool PrepareForLTO)
-    : EnableHeaderDuplication(EnableHeaderDuplication ||
-                              ForceHeaderDuplication),
+    : EnableHeaderDuplication(EnableHeaderDuplication),
       PrepareForLTO(PrepareForLTO) {}
 
 void LoopRotatePass::printPipeline(
diff --git a/llvm/test/Transforms/LoopRotate/oz-disable.ll b/llvm/test/Transforms/LoopRotate/oz-disable.ll
index b933200ed80586..3a4802962fe3a9 100644
--- a/llvm/test/Transforms/LoopRotate/oz-disable.ll
+++ b/llvm/test/Transforms/LoopRotate/oz-disable.ll
@@ -4,8 +4,8 @@
 ; RUN: opt < %s -S -passes='default<Os>' -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OS
 ; RUN: opt < %s -S -passes='default<Oz>' -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OZ
 
-;; Make sure -force-loop-header-duplication overrides the default behavior at Oz
-; RUN: opt < %s -S -passes='default<Oz>' -force-loop-header-duplication -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OS
+;; Make sure -allow-loop-header-duplication overrides the default behavior at Oz
+; RUN: opt < %s -S -passes='default<Oz>' -allow-loop-header-duplication -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OS
 
 ; Loop should be rotated for -Os but not for -Oz.
 ; OS: rotating Loop at depth 1



More information about the llvm-commits mailing list