[llvm] [Pipelines] Perform mergefunc after constmerge (PR #92498)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 26 03:15:58 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: YAMAMOTO Takashi (yamt)
<details>
<summary>Changes</summary>
Constmerge can fold switch jump tables, possibly making functions identical again. It can help mergefunc.
On the otherhand, the opposite seems unlikely.
Fixes https://github.com/llvm/llvm-project/issues/92201
---
Full diff: https://github.com/llvm/llvm-project/pull/92498.diff
4 Files Affected:
- (modified) llvm/lib/Passes/PassBuilderPipelines.cpp (+5-4)
- (modified) llvm/test/Other/new-pm-defaults.ll (+1-1)
- (added) llvm/test/Transforms/PhaseOrdering/X86/merge-functions2.ll (+67)
- (added) llvm/test/Transforms/PhaseOrdering/X86/merge-functions3.ll (+47)
``````````diff
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 926515c9508a97..4fd5ee1946bb77 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1527,10 +1527,6 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
if (EnableIROutliner)
MPM.addPass(IROutlinerPass());
- // Merge functions if requested.
- if (PTO.MergeFunctions)
- MPM.addPass(MergeFunctionsPass());
-
// Now we need to do some global optimization transforms.
// FIXME: It would seem like these should come first in the optimization
// pipeline and maybe be the bottom of the canonicalization pipeline? Weird
@@ -1538,6 +1534,11 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
MPM.addPass(GlobalDCEPass());
MPM.addPass(ConstantMergePass());
+ // Merge functions if requested. It has a better chance to merge functions
+ // after ConstantMerge folded jump tables.
+ if (PTO.MergeFunctions)
+ MPM.addPass(MergeFunctionsPass());
+
if (PTO.CallGraphProfile && !LTOPreLink)
MPM.addPass(CGProfilePass(LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink));
diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll
index 489aed40c190b4..588337c15625e6 100644
--- a/llvm/test/Other/new-pm-defaults.ll
+++ b/llvm/test/Other/new-pm-defaults.ll
@@ -281,9 +281,9 @@
; CHECK-HOT-COLD-SPLIT-NEXT: Running pass: HotColdSplittingPass
; CHECK-IR-OUTLINER-NEXT: Running pass: IROutlinerPass
; CHECK-IR-OUTLINER-NEXT: Running analysis: IRSimilarityAnalysis
-; CHECK-MERGE-FUNCS-NEXT: Running pass: MergeFunctionsPass
; CHECK-O-NEXT: Running pass: GlobalDCEPass
; CHECK-O-NEXT: Running pass: ConstantMergePass
+; CHECK-MERGE-FUNCS-NEXT: Running pass: MergeFunctionsPass
; CHECK-DEFAULT-NEXT: Running pass: CGProfilePass
; CHECK-DEFAULT-NEXT: Running pass: RelLookupTableConverterPass
; CHECK-LTO-NOT: Running pass: RelLookupTableConverterPass
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/merge-functions2.ll b/llvm/test/Transforms/PhaseOrdering/X86/merge-functions2.ll
new file mode 100644
index 00000000000000..5d650d5f080bac
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/X86/merge-functions2.ll
@@ -0,0 +1,67 @@
+; RUN: opt -passes="default<O3>" -enable-merge-functions -S < %s | FileCheck %s
+
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx12.0.0"
+
+; Function Attrs: noinline nounwind optsize ssp uwtable
+define i32 @f(i32 noundef %x) #0 {
+; CHECK-LABEL: @f(
+entry:
+ %x.addr = alloca i32, align 4
+ store i32 %x, ptr %x.addr, align 4, !tbaa !5
+ %0 = load i32, ptr %x.addr, align 4, !tbaa !5
+ switch i32 %0, label %sw.default [
+ i32 0, label %sw.bb
+ i32 2, label %sw.bb
+ i32 4, label %sw.bb
+ i32 6, label %sw.bb
+ i32 7, label %sw.bb
+ ]
+
+sw.bb: ; preds = %entry, %entry, %entry, %entry, %entry
+ store i32 1, ptr %x.addr, align 4, !tbaa !5
+ br label %sw.epilog
+
+sw.default: ; preds = %entry
+ store i32 0, ptr %x.addr, align 4, !tbaa !5
+ br label %sw.epilog
+
+sw.epilog: ; preds = %sw.default, %sw.bb
+ %1 = load i32, ptr %x.addr, align 4, !tbaa !5
+ ret i32 %1
+}
+
+; Function Attrs: noinline nounwind optsize ssp uwtable
+define i32 @g(i32 noundef %x) #0 {
+; CHECK-LABEL: @g(
+; CHECK-NEXT: [[TMP2:%.*]] = tail call range(i32 0, 2) i32 @f(i32 noundef [[TMP0:%.*]]) #[[ATTR0:[0-9]+]]
+; CHECK-NEXT: ret i32 [[TMP2]]
+entry:
+ %x.addr = alloca i32, align 4
+ store i32 %x, ptr %x.addr, align 4, !tbaa !5
+ %0 = load i32, ptr %x.addr, align 4, !tbaa !5
+ switch i32 %0, label %sw.default [
+ i32 0, label %sw.bb
+ i32 2, label %sw.bb
+ i32 4, label %sw.bb
+ i32 6, label %sw.bb
+ i32 7, label %sw.bb
+ ]
+
+sw.bb: ; preds = %entry, %entry, %entry, %entry, %entry
+ store i32 1, ptr %x.addr, align 4, !tbaa !5
+ br label %sw.epilog
+
+sw.default: ; preds = %entry
+ store i32 0, ptr %x.addr, align 4, !tbaa !5
+ br label %sw.epilog
+
+sw.epilog: ; preds = %sw.default, %sw.bb
+ %1 = load i32, ptr %x.addr, align 4, !tbaa !5
+ ret i32 %1
+}
+
+!5 = !{!6, !6, i64 0}
+!6 = !{!"int", !7, i64 0}
+!7 = !{!"omnipotent char", !8, i64 0}
+!8 = !{!"Simple C/C++ TBAA"}
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/merge-functions3.ll b/llvm/test/Transforms/PhaseOrdering/X86/merge-functions3.ll
new file mode 100644
index 00000000000000..bce8f08ceda5e0
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/X86/merge-functions3.ll
@@ -0,0 +1,47 @@
+; RUN: opt -passes="default<O3>" -enable-merge-functions -S < %s | FileCheck %s
+
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx12.0.0"
+
+ at switch.table.f = private unnamed_addr constant [8 x i32] [i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 1], align 4
+ at switch.table.g = private unnamed_addr constant [8 x i32] [i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 1], align 4
+
+; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind optsize ssp willreturn memory(none) uwtable
+define range(i32 0, 2) i32 @f(i32 noundef %x) local_unnamed_addr #0 {
+; CHECK-LABEL: @f(
+entry:
+ %0 = icmp ult i32 %x, 8
+ br i1 %0, label %switch.lookup, label %sw.epilog
+
+switch.lookup: ; preds = %entry
+ %1 = zext nneg i32 %x to i64
+ %switch.gep = getelementptr inbounds [8 x i32], ptr @switch.table.f, i64 0, i64 %1
+ %switch.load = load i32, ptr %switch.gep, align 4
+ br label %sw.epilog
+
+sw.epilog: ; preds = %entry, %switch.lookup
+ %x.addr.0 = phi i32 [ %switch.load, %switch.lookup ], [ 0, %entry ]
+ ret i32 %x.addr.0
+}
+
+; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind optsize ssp willreturn memory(none) uwtable
+define range(i32 0, 2) i32 @g(i32 noundef %x) local_unnamed_addr #0 {
+; CHECK-LABEL: @g(
+; CHECK-NEXT: [[TMP2:%.*]] = tail call range(i32 0, 2) i32 @f(i32 noundef [[TMP0:%.*]]) #[[ATTR0:[0-9]+]]
+; CHECK-NEXT: ret i32 [[TMP2]]
+entry:
+ %0 = icmp ult i32 %x, 8
+ br i1 %0, label %switch.lookup, label %sw.epilog
+
+switch.lookup: ; preds = %entry
+ %1 = zext nneg i32 %x to i64
+ %switch.gep = getelementptr inbounds [8 x i32], ptr @switch.table.g, i64 0, i64 %1
+ %switch.load = load i32, ptr %switch.gep, align 4
+ br label %sw.epilog
+
+sw.epilog: ; preds = %entry, %switch.lookup
+ %x.addr.0 = phi i32 [ %switch.load, %switch.lookup ], [ 0, %entry ]
+ ret i32 %x.addr.0
+}
+
+attributes #0 = { mustprogress nofree noinline norecurse nosync nounwind optsize ssp willreturn memory(none) uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cmov,+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "tune-cpu"="generic" }
``````````
</details>
https://github.com/llvm/llvm-project/pull/92498
More information about the llvm-commits
mailing list