[llvm] Reland "[LTO] Run Argument Promotion before IPSCCP" (PR #111853)
Hari Limaye via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 5 06:30:07 PST 2024
https://github.com/hazzlim updated https://github.com/llvm/llvm-project/pull/111853
>From f9abf531d4b3405c117e24f91af99db7364373ca Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Thu, 17 Oct 2024 22:42:36 +0000
Subject: [PATCH 1/5] Precommit test for "[LTO] Run Argument Promotion before
IPSCCP"
---
.../PhaseOrdering/lto-argpromotion-ipsccp.ll | 68 +++++++++++++++++++
1 file changed, 68 insertions(+)
create mode 100644 llvm/test/Transforms/PhaseOrdering/lto-argpromotion-ipsccp.ll
diff --git a/llvm/test/Transforms/PhaseOrdering/lto-argpromotion-ipsccp.ll b/llvm/test/Transforms/PhaseOrdering/lto-argpromotion-ipsccp.ll
new file mode 100644
index 00000000000000..1ed523dfa7a90f
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/lto-argpromotion-ipsccp.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes='lto<O3>' -S < %s | FileCheck %s
+
+; FIXME: We should be able to propagate the constants from @parent to @child.
+
+define void @parent(ptr %p) {
+; CHECK-LABEL: define void @parent(
+; CHECK-SAME: ptr nocapture [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: tail call fastcc void @child(ptr [[P]], i32 1024, i32 5)
+; CHECK-NEXT: ret void
+;
+ %c = alloca i32
+ store i32 5, ptr %c
+ %n = alloca i32
+ store i32 1024, ptr %n
+ call void @child(ptr %p, ptr %n, ptr %c)
+ ret void
+}
+
+define internal void @child(ptr %p, ptr %n, ptr %c) noinline {
+; CHECK-LABEL: define internal fastcc void @child(
+; CHECK-SAME: ptr nocapture [[P:%.*]], i32 [[N_0_VAL:%.*]], i32 [[C_0_VAL:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[FOR_COND:.*]]
+; CHECK: [[FOR_COND]]:
+; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_INC:.*]] ]
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[I_0]], [[N_0_VAL]]
+; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[FOR_END:.*]], label %[[FOR_INC]]
+; CHECK: [[FOR_INC]]:
+; CHECK-NEXT: [[IDXPROM:%.*]] = zext nneg i32 [[I_0]] to i64
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[IDXPROM]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[TMP0]], [[C_0_VAL]]
+; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1
+; CHECK-NEXT: br label %[[FOR_COND]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.cond
+
+for.cond:
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %n.val = load i32, ptr %n
+ %cmp = icmp ne i32 %i.0, %n.val
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ br label %for.end
+
+for.body:
+ %idxprom = sext i32 %i.0 to i64
+ %arrayidx = getelementptr inbounds i32, ptr %p, i64 %idxprom
+ %0 = load i32, ptr %arrayidx, align 4
+ %c.val = load i32, ptr %c
+ %mul = mul i32 %0, %c.val
+ store i32 %mul, ptr %arrayidx, align 4
+ br label %for.inc
+
+for.inc:
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end:
+ ret void
+}
+
>From e94b2b9fe6efd2bedf659867a357193e61a2b216 Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Tue, 24 Sep 2024 16:12:29 +0000
Subject: [PATCH 2/5] Reland "[LTO] Run Argument Promotion before IPSCCP"
(#111839)
Run ArgumentPromotion before IPSCCP in the LTO pipeline, to expose more
constants to be propagated. We also run PostOrderFunctionAttrs to
improve the information available to ArgumentPromotion's alias analysis,
and SROA to clean up allocas.
---
llvm/lib/Passes/PassBuilderPipelines.cpp | 9 +++++++++
llvm/test/Other/new-pm-lto-defaults.ll | 9 ++++++---
.../PhaseOrdering/lto-argpromotion-ipsccp.ll | 10 +++++-----
3 files changed, 20 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 17710eb94b6ded..0a9a40d2978833 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1828,6 +1828,15 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
MPM.addPass(PGOIndirectCallPromotion(
true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
+ // Promoting by-reference arguments to by-value exposes more constants to
+ // IPSCCP.
+ MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
+ PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true)));
+ MPM.addPass(
+ createModuleToPostOrderCGSCCPassAdaptor(ArgumentPromotionPass()));
+ MPM.addPass(
+ createModuleToFunctionPassAdaptor(SROAPass(SROAOptions::ModifyCFG)));
+
// Propagate constants at call sites into the functions they call. This
// opens opportunities for globalopt (and inlining) by substituting function
// pointers passed as arguments to direct uses of functions.
diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll
index 5543472df685b0..2dd754ecef4d7b 100644
--- a/llvm/test/Other/new-pm-lto-defaults.ll
+++ b/llvm/test/Other/new-pm-lto-defaults.ll
@@ -41,14 +41,17 @@
; CHECK-O23SZ-NEXT: PGOIndirectCallPromotion
; CHECK-O23SZ-NEXT: Running analysis: ProfileSummaryAnalysis
; CHECK-O23SZ-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
-; CHECK-O23SZ-NEXT: Running pass: IPSCCPPass
-; CHECK-O23SZ-NEXT: Running analysis: AssumptionAnalysis on foo
-; CHECK-O23SZ-NEXT: Running pass: CalledValuePropagationPass
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}SCC
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
; CHECK-O1-NEXT: Running analysis: TargetLibraryAnalysis
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph{{.*}}>
+; CHECK-O23SZ-NEXT: Running pass: PostOrderFunctionAttrsPass
+; CHECK-O23SZ-NEXT: Running pass: ArgumentPromotionPass
+; CHECK-O23SZ-NEXT: Running pass: SROAPass
+; CHECK-O23SZ-NEXT: Running analysis: AssumptionAnalysis on foo
+; CHECK-O23SZ-NEXT: Running pass: IPSCCPPass
+; CHECK-O23SZ-NEXT: Running pass: CalledValuePropagationPass
; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass
; CHECK-O-NEXT: Running analysis: AAManager
; CHECK-O-NEXT: Running analysis: BasicAA
diff --git a/llvm/test/Transforms/PhaseOrdering/lto-argpromotion-ipsccp.ll b/llvm/test/Transforms/PhaseOrdering/lto-argpromotion-ipsccp.ll
index 1ed523dfa7a90f..72921acba5969f 100644
--- a/llvm/test/Transforms/PhaseOrdering/lto-argpromotion-ipsccp.ll
+++ b/llvm/test/Transforms/PhaseOrdering/lto-argpromotion-ipsccp.ll
@@ -1,12 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -passes='lto<O3>' -S < %s | FileCheck %s
-; FIXME: We should be able to propagate the constants from @parent to @child.
+; We should be able to propagate the constants from @parent to @child.
define void @parent(ptr %p) {
; CHECK-LABEL: define void @parent(
; CHECK-SAME: ptr nocapture [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: tail call fastcc void @child(ptr [[P]], i32 1024, i32 5)
+; CHECK-NEXT: tail call fastcc void @child(ptr [[P]])
; CHECK-NEXT: ret void
;
%c = alloca i32
@@ -19,18 +19,18 @@ define void @parent(ptr %p) {
define internal void @child(ptr %p, ptr %n, ptr %c) noinline {
; CHECK-LABEL: define internal fastcc void @child(
-; CHECK-SAME: ptr nocapture [[P:%.*]], i32 [[N_0_VAL:%.*]], i32 [[C_0_VAL:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] {
+; CHECK-SAME: ptr nocapture [[P:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[FOR_COND:.*]]
; CHECK: [[FOR_COND]]:
; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_INC:.*]] ]
-; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[I_0]], [[N_0_VAL]]
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[I_0]], 1024
; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[FOR_END:.*]], label %[[FOR_INC]]
; CHECK: [[FOR_INC]]:
; CHECK-NEXT: [[IDXPROM:%.*]] = zext nneg i32 [[I_0]] to i64
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[IDXPROM]]
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[TMP0]], [[C_0_VAL]]
+; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[TMP0]], 5
; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1
; CHECK-NEXT: br label %[[FOR_COND]]
>From b776bb36a4c7bf1afa4b98c04e49fe3b2395fa9c Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Fri, 18 Oct 2024 23:14:13 +0000
Subject: [PATCH 3/5] Move POFA earlier instead of running twice (For >01)
---
llvm/lib/Passes/PassBuilderPipelines.cpp | 13 ++++++++-----
llvm/test/Other/new-pm-lto-defaults.ll | 12 +++++-------
2 files changed, 13 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 0a9a40d2978833..651a16e1f2ae6e 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1830,8 +1830,8 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
// Promoting by-reference arguments to by-value exposes more constants to
// IPSCCP.
- MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
- PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true)));
+ MPM.addPass(
+ createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
MPM.addPass(
createModuleToPostOrderCGSCCPassAdaptor(ArgumentPromotionPass()));
MPM.addPass(
@@ -1849,9 +1849,12 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
MPM.addPass(CalledValuePropagationPass());
}
- // Now deduce any function attributes based in the current code.
- MPM.addPass(
- createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
+ // For higher optimization levels this Pass has just run, so don't repeat it.
+ if (Level.getSpeedupLevel() == 1) {
+ // Now deduce any function attributes based on the current code.
+ MPM.addPass(
+ createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
+ }
// Do RPO function attribute inference across the module to forward-propagate
// attributes where applicable.
diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll
index 2dd754ecef4d7b..3a49903be429c8 100644
--- a/llvm/test/Other/new-pm-lto-defaults.ll
+++ b/llvm/test/Other/new-pm-lto-defaults.ll
@@ -46,21 +46,19 @@
; CHECK-O1-NEXT: Running analysis: TargetLibraryAnalysis
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph{{.*}}>
-; CHECK-O23SZ-NEXT: Running pass: PostOrderFunctionAttrsPass
-; CHECK-O23SZ-NEXT: Running pass: ArgumentPromotionPass
-; CHECK-O23SZ-NEXT: Running pass: SROAPass
-; CHECK-O23SZ-NEXT: Running analysis: AssumptionAnalysis on foo
-; CHECK-O23SZ-NEXT: Running pass: IPSCCPPass
-; CHECK-O23SZ-NEXT: Running pass: CalledValuePropagationPass
; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass
; CHECK-O-NEXT: Running analysis: AAManager
; CHECK-O-NEXT: Running analysis: BasicAA
-; CHECK-O1-NEXT: Running analysis: AssumptionAnalysis on foo
+; CHECK-O: Running analysis: AssumptionAnalysis on foo
; CHECK-O1-NEXT: Running analysis: TargetIRAnalysis
; CHECK-O1-NEXT: Running analysis: DominatorTreeAnalysis
; CHECK-O-NEXT: Running analysis: ScopedNoAliasAA
; CHECK-O-NEXT: Running analysis: TypeBasedAA
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
+; CHECK-O23SZ-NEXT: Running pass: ArgumentPromotionPass
+; CHECK-O23SZ-NEXT: Running pass: SROAPass
+; CHECK-O23SZ-NEXT: Running pass: IPSCCPPass
+; CHECK-O23SZ-NEXT: Running pass: CalledValuePropagationPass
; CHECK-O-NEXT: Running pass: ReversePostOrderFunctionAttrsPass
; CHECK-O-NEXT: Running pass: GlobalSplitPass
; CHECK-O-NEXT: Running pass: WholeProgramDevirtPass
>From 8466049b4e7018774de5f4c2b5668a733a5270e5 Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Tue, 5 Nov 2024 11:13:39 +0000
Subject: [PATCH 4/5] Don't bother running POFA Pass at -01
---
llvm/lib/Passes/PassBuilderPipelines.cpp | 7 ------
llvm/test/Other/new-pm-lto-defaults.ll | 27 ++++++++++++------------
2 files changed, 13 insertions(+), 21 deletions(-)
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 651a16e1f2ae6e..93175568cfb475 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1849,13 +1849,6 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
MPM.addPass(CalledValuePropagationPass());
}
- // For higher optimization levels this Pass has just run, so don't repeat it.
- if (Level.getSpeedupLevel() == 1) {
- // Now deduce any function attributes based on the current code.
- MPM.addPass(
- createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
- }
-
// Do RPO function attribute inference across the module to forward-propagate
// attributes where applicable.
// FIXME: Is this really an optimization rather than a canonicalization?
diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll
index 3a49903be429c8..36a4f4784f6b7c 100644
--- a/llvm/test/Other/new-pm-lto-defaults.ll
+++ b/llvm/test/Other/new-pm-lto-defaults.ll
@@ -41,25 +41,24 @@
; CHECK-O23SZ-NEXT: PGOIndirectCallPromotion
; CHECK-O23SZ-NEXT: Running analysis: ProfileSummaryAnalysis
; CHECK-O23SZ-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
-; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}SCC
-; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
-; CHECK-O1-NEXT: Running analysis: TargetLibraryAnalysis
-; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
-; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph{{.*}}>
-; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass
-; CHECK-O-NEXT: Running analysis: AAManager
-; CHECK-O-NEXT: Running analysis: BasicAA
-; CHECK-O: Running analysis: AssumptionAnalysis on foo
-; CHECK-O1-NEXT: Running analysis: TargetIRAnalysis
-; CHECK-O1-NEXT: Running analysis: DominatorTreeAnalysis
-; CHECK-O-NEXT: Running analysis: ScopedNoAliasAA
-; CHECK-O-NEXT: Running analysis: TypeBasedAA
-; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
+; CHECK-O23SZ-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}SCC
+; CHECK-O23SZ-NEXT: Running analysis: LazyCallGraphAnalysis
+; CHECK-O23SZ-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
+; CHECK-O23SZ-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph{{.*}}>
+; CHECK-O23SZ-NEXT: Running pass: PostOrderFunctionAttrsPass
+; CHECK-O23SZ-NEXT: Running analysis: AAManager
+; CHECK-O23SZ-NEXT: Running analysis: BasicAA
+; CHECK-O23SZ-NEXT: Running analysis: AssumptionAnalysis on foo
+; CHECK-O23SZ-NEXT: Running analysis: ScopedNoAliasAA
+; CHECK-O23SZ-NEXT: Running analysis: TypeBasedAA
+; CHECK-O23SZ-NEXT: Running analysis: OuterAnalysisManagerProxy
; CHECK-O23SZ-NEXT: Running pass: ArgumentPromotionPass
; CHECK-O23SZ-NEXT: Running pass: SROAPass
; CHECK-O23SZ-NEXT: Running pass: IPSCCPPass
; CHECK-O23SZ-NEXT: Running pass: CalledValuePropagationPass
; CHECK-O-NEXT: Running pass: ReversePostOrderFunctionAttrsPass
+; CHECK-O1-NEXT: Running analysis: LazyCallGraphAnalysis
+; CHECK-O1-NEXT: Running analysis: TargetLibraryAnalysis
; CHECK-O-NEXT: Running pass: GlobalSplitPass
; CHECK-O-NEXT: Running pass: WholeProgramDevirtPass
; CHECK-O1-NEXT: Running pass: LowerTypeTestsPass
>From b2e6b6ef78037dd51a7c03abd6ddb67ab4cb7507 Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Tue, 5 Nov 2024 14:27:46 +0000
Subject: [PATCH 5/5] Use a single CGSCCPassManager
---
llvm/lib/Passes/PassBuilderPipelines.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 93175568cfb475..4c61f9d1920939 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1830,10 +1830,10 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
// Promoting by-reference arguments to by-value exposes more constants to
// IPSCCP.
- MPM.addPass(
- createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
- MPM.addPass(
- createModuleToPostOrderCGSCCPassAdaptor(ArgumentPromotionPass()));
+ CGSCCPassManager CGPM;
+ CGPM.addPass(PostOrderFunctionAttrsPass());
+ CGPM.addPass(ArgumentPromotionPass());
+ MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
MPM.addPass(
createModuleToFunctionPassAdaptor(SROAPass(SROAOptions::ModifyCFG)));
More information about the llvm-commits
mailing list