[llvm] [llvm][loop-rotate] Allow forcing loop-rotation (PR #82828)
Paul Kirth via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 23 13:54:47 PST 2024
https://github.com/ilovepi updated https://github.com/llvm/llvm-project/pull/82828
>From b34b5e679fbcceae5124f79f1399eafc31f1c533 Mon Sep 17 00:00:00 2001
From: Paul Kirth <paulkirth at google.com>
Date: Fri, 23 Feb 2024 20:43:13 +0000
Subject: [PATCH 1/2] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
=?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.4
---
llvm/lib/Transforms/Scalar/LoopRotation.cpp | 14 +++++++++++++-
llvm/test/Transforms/LoopRotate/oz-disable.ll | 3 +++
2 files changed, 16 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index 7036759a4eed57..c6943695ec1549 100644
--- a/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -39,8 +39,20 @@ static cl::opt<bool> PrepareForLTOOption(
cl::desc("Run loop-rotation in the prepare-for-lto stage. This option "
"should be used for testing only."));
+// Experimentally allow loop header duplication. This should allow for better
+// optimization at Oz, since loop-idiom recognition can then recognize things
+// like memcpy. If this ends up being profitable, we should drop this flag and
+// making a code gen option that can be controled independent of the opt level
+// and exposed through clang. See
+// https://github.com/llvm/llvm-project/issues/50308 for details.
+static cl::opt<bool>
+ ForceHeaderDuplication("force-loop-header-duplication", cl::init(false),
+ cl::Hidden,
+ cl::desc("Always enable loop header duplication"));
+
LoopRotatePass::LoopRotatePass(bool EnableHeaderDuplication, bool PrepareForLTO)
- : EnableHeaderDuplication(EnableHeaderDuplication),
+ : EnableHeaderDuplication(EnableHeaderDuplication ||
+ ForceHeaderDuplication),
PrepareForLTO(PrepareForLTO) {}
void LoopRotatePass::printPipeline(
diff --git a/llvm/test/Transforms/LoopRotate/oz-disable.ll b/llvm/test/Transforms/LoopRotate/oz-disable.ll
index 6a7847ac0ff215..b933200ed80586 100644
--- a/llvm/test/Transforms/LoopRotate/oz-disable.ll
+++ b/llvm/test/Transforms/LoopRotate/oz-disable.ll
@@ -4,6 +4,9 @@
; RUN: opt < %s -S -passes='default<Os>' -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OS
; RUN: opt < %s -S -passes='default<Oz>' -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OZ
+;; Make sure -force-loop-header-duplication overrides the default behavior at Oz
+; RUN: opt < %s -S -passes='default<Oz>' -force-loop-header-duplication -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OS
+
; Loop should be rotated for -Os but not for -Oz.
; OS: rotating Loop at depth 1
; OZ-NOT: rotating Loop at depth 1
>From b65bd96b2508d4f6c3f2c97654a1f70ad36e1d9b Mon Sep 17 00:00:00 2001
From: Paul Kirth <paulkirth at google.com>
Date: Fri, 23 Feb 2024 21:54:33 +0000
Subject: [PATCH 2/2] Move flag to PassPipelineBuilder and rename Flag
Created using spr 1.3.4
---
llvm/lib/Passes/PassBuilderPipelines.cpp | 21 +++++++++++++++----
llvm/lib/Transforms/Scalar/LoopRotation.cpp | 14 +------------
llvm/test/Transforms/LoopRotate/oz-disable.ll | 4 ++--
3 files changed, 20 insertions(+), 19 deletions(-)
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 17b55b63ac03cf..a6fc335f3bd473 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -209,6 +209,15 @@ static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
cl::Hidden,
cl::desc("Enable the LoopFlatten Pass"));
+// Experimentally allow loop header duplication. This should allow for better
+// optimization at Oz, since loop-idiom recognition can then recognize things
+// like memcpy. If this ends up being profitable, we should drop this flag and
+// making a code gen option that can be controlled independent of the opt level
+// and exposed through clang.
+static cl::opt<bool> AllowLoopHeaderDuplication(
+ "allow-loop-header-duplication", cl::init(false), cl::Hidden,
+ cl::desc("Allow loop header duplication at any optimization level"));
+
static cl::opt<bool>
EnableDFAJumpThreading("enable-dfa-jump-thread",
cl::desc("Enable DFA jump threading"),
@@ -630,8 +639,9 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
/*AllowSpeculation=*/false));
// Disable header duplication in loop rotation at -Oz.
- LPM1.addPass(
- LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase)));
+ LPM1.addPass(LoopRotatePass(AllowLoopHeaderDuplication ||
+ (Level != OptimizationLevel::Oz),
+ isLTOPreLink(Phase)));
// TODO: Investigate promotion cap for O1.
LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true));
@@ -812,7 +822,8 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
// Disable header duplication in loop rotation at -Oz.
MPM.addPass(createModuleToFunctionPassAdaptor(
createFunctionToLoopPassAdaptor(
- LoopRotatePass(Level != OptimizationLevel::Oz),
+ LoopRotatePass(AllowLoopHeaderDuplication ||
+ Level != OptimizationLevel::Oz),
/*UseMemorySSA=*/false,
/*UseBlockFrequencyInfo=*/false),
PTO.EagerlyInvalidateAnalyses));
@@ -1422,7 +1433,9 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
LoopPassManager LPM;
// First rotate loops that may have been un-rotated by prior passes.
// Disable header duplication at -Oz.
- LPM.addPass(LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink));
+ LPM.addPass(LoopRotatePass(AllowLoopHeaderDuplication ||
+ Level != OptimizationLevel::Oz,
+ LTOPreLink));
// Some loops may have become dead by now. Try to delete them.
// FIXME: see discussion in https://reviews.llvm.org/D112851,
// this may need to be revisited once we run GVN before loop deletion
diff --git a/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index c6943695ec1549..7036759a4eed57 100644
--- a/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -39,20 +39,8 @@ static cl::opt<bool> PrepareForLTOOption(
cl::desc("Run loop-rotation in the prepare-for-lto stage. This option "
"should be used for testing only."));
-// Experimentally allow loop header duplication. This should allow for better
-// optimization at Oz, since loop-idiom recognition can then recognize things
-// like memcpy. If this ends up being profitable, we should drop this flag and
-// making a code gen option that can be controled independent of the opt level
-// and exposed through clang. See
-// https://github.com/llvm/llvm-project/issues/50308 for details.
-static cl::opt<bool>
- ForceHeaderDuplication("force-loop-header-duplication", cl::init(false),
- cl::Hidden,
- cl::desc("Always enable loop header duplication"));
-
LoopRotatePass::LoopRotatePass(bool EnableHeaderDuplication, bool PrepareForLTO)
- : EnableHeaderDuplication(EnableHeaderDuplication ||
- ForceHeaderDuplication),
+ : EnableHeaderDuplication(EnableHeaderDuplication),
PrepareForLTO(PrepareForLTO) {}
void LoopRotatePass::printPipeline(
diff --git a/llvm/test/Transforms/LoopRotate/oz-disable.ll b/llvm/test/Transforms/LoopRotate/oz-disable.ll
index b933200ed80586..3a4802962fe3a9 100644
--- a/llvm/test/Transforms/LoopRotate/oz-disable.ll
+++ b/llvm/test/Transforms/LoopRotate/oz-disable.ll
@@ -4,8 +4,8 @@
; RUN: opt < %s -S -passes='default<Os>' -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OS
; RUN: opt < %s -S -passes='default<Oz>' -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OZ
-;; Make sure -force-loop-header-duplication overrides the default behavior at Oz
-; RUN: opt < %s -S -passes='default<Oz>' -force-loop-header-duplication -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OS
+;; Make sure -allow-loop-header-duplication overrides the default behavior at Oz
+; RUN: opt < %s -S -passes='default<Oz>' -allow-loop-header-duplication -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OS
; Loop should be rotated for -Os but not for -Oz.
; OS: rotating Loop at depth 1
More information about the llvm-commits
mailing list