[llvm] 5d79110 - [Pipelines] Perform mergefunc after constmerge (#92498)

via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 5 03:28:05 PDT 2024


Author: YAMAMOTO Takashi
Date: 2024-07-05T12:28:03+02:00
New Revision: 5d7911095931d4894e63fa3379a598384f2eb029

URL: https://github.com/llvm/llvm-project/commit/5d7911095931d4894e63fa3379a598384f2eb029
DIFF: https://github.com/llvm/llvm-project/commit/5d7911095931d4894e63fa3379a598384f2eb029.diff

LOG: [Pipelines] Perform mergefunc after constmerge (#92498)

Constmerge can fold switch jump tables, possibly making functions
identical again. It can help mergefunc.
On the other hand, the opposite seems unlikely.

Fixes https://github.com/llvm/llvm-project/issues/92201.

Added: 
    llvm/test/Transforms/PhaseOrdering/X86/merge-functions2.ll
    llvm/test/Transforms/PhaseOrdering/X86/merge-functions3.ll

Modified: 
    llvm/lib/Passes/PassBuilderPipelines.cpp
    llvm/test/Other/new-pm-defaults.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 926515c9508a9..4fd5ee1946bb7 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1527,10 +1527,6 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
   if (EnableIROutliner)
     MPM.addPass(IROutlinerPass());
 
-  // Merge functions if requested.
-  if (PTO.MergeFunctions)
-    MPM.addPass(MergeFunctionsPass());
-
   // Now we need to do some global optimization transforms.
   // FIXME: It would seem like these should come first in the optimization
   // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
@@ -1538,6 +1534,11 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
   MPM.addPass(GlobalDCEPass());
   MPM.addPass(ConstantMergePass());
 
+  // Merge functions if requested. It has a better chance to merge functions
+  // after ConstantMerge folded jump tables.
+  if (PTO.MergeFunctions)
+    MPM.addPass(MergeFunctionsPass());
+
   if (PTO.CallGraphProfile && !LTOPreLink)
     MPM.addPass(CGProfilePass(LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
                               LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink));

diff  --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll
index 489aed40c190b..588337c15625e 100644
--- a/llvm/test/Other/new-pm-defaults.ll
+++ b/llvm/test/Other/new-pm-defaults.ll
@@ -281,9 +281,9 @@
 ; CHECK-HOT-COLD-SPLIT-NEXT: Running pass: HotColdSplittingPass
 ; CHECK-IR-OUTLINER-NEXT: Running pass: IROutlinerPass
 ; CHECK-IR-OUTLINER-NEXT: Running analysis: IRSimilarityAnalysis
-; CHECK-MERGE-FUNCS-NEXT: Running pass: MergeFunctionsPass
 ; CHECK-O-NEXT: Running pass: GlobalDCEPass
 ; CHECK-O-NEXT: Running pass: ConstantMergePass
+; CHECK-MERGE-FUNCS-NEXT: Running pass: MergeFunctionsPass
 ; CHECK-DEFAULT-NEXT: Running pass: CGProfilePass
 ; CHECK-DEFAULT-NEXT: Running pass: RelLookupTableConverterPass
 ; CHECK-LTO-NOT: Running pass: RelLookupTableConverterPass

diff  --git a/llvm/test/Transforms/PhaseOrdering/X86/merge-functions2.ll b/llvm/test/Transforms/PhaseOrdering/X86/merge-functions2.ll
new file mode 100644
index 0000000000000..d794106ede3b2
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/X86/merge-functions2.ll
@@ -0,0 +1,76 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes="default<O3>" -enable-merge-functions -S < %s | FileCheck %s
+
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx12.0.0"
+
+define i32 @f(i32 noundef %x) {
+; CHECK-LABEL: define range(i32 0, 2) i32 @f(
+; CHECK-SAME: i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[X]], 8
+; CHECK-NEXT:    br i1 [[TMP0]], label %[[SWITCH_LOOKUP:.*]], label %[[SW_EPILOG:.*]]
+; CHECK:       [[SWITCH_LOOKUP]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = zext nneg i32 [[X]] to i64
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [8 x i32], ptr @switch.table.g, i64 0, i64 [[TMP1]]
+; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
+; CHECK-NEXT:    br label %[[SW_EPILOG]]
+; CHECK:       [[SW_EPILOG]]:
+; CHECK-NEXT:    [[X_ADDR_0:%.*]] = phi i32 [ [[SWITCH_LOAD]], %[[SWITCH_LOOKUP]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    ret i32 [[X_ADDR_0]]
+;
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  %0 = load i32, ptr %x.addr, align 4
+  switch i32 %0, label %sw.default [
+  i32 0, label %sw.bb
+  i32 2, label %sw.bb
+  i32 4, label %sw.bb
+  i32 6, label %sw.bb
+  i32 7, label %sw.bb
+  ]
+
+sw.bb:                                            ; preds = %entry, %entry, %entry, %entry, %entry
+  store i32 1, ptr %x.addr, align 4
+  br label %sw.epilog
+
+sw.default:                                       ; preds = %entry
+  store i32 0, ptr %x.addr, align 4
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %sw.default, %sw.bb
+  %1 = load i32, ptr %x.addr, align 4
+  ret i32 %1
+}
+
+define i32 @g(i32 noundef %x) {
+; CHECK-LABEL: define range(i32 0, 2) i32 @g(
+; CHECK-SAME: i32 noundef [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call range(i32 0, 2) i32 @f(i32 noundef [[TMP0]]) #[[ATTR0]]
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, ptr %x.addr, align 4
+  %0 = load i32, ptr %x.addr, align 4
+  switch i32 %0, label %sw.default [
+  i32 0, label %sw.bb
+  i32 2, label %sw.bb
+  i32 4, label %sw.bb
+  i32 6, label %sw.bb
+  i32 7, label %sw.bb
+  ]
+
+sw.bb:                                            ; preds = %entry, %entry, %entry, %entry, %entry
+  store i32 1, ptr %x.addr, align 4
+  br label %sw.epilog
+
+sw.default:                                       ; preds = %entry
+  store i32 0, ptr %x.addr, align 4
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %sw.default, %sw.bb
+  %1 = load i32, ptr %x.addr, align 4
+  ret i32 %1
+}

diff  --git a/llvm/test/Transforms/PhaseOrdering/X86/merge-functions3.ll b/llvm/test/Transforms/PhaseOrdering/X86/merge-functions3.ll
new file mode 100644
index 0000000000000..7109c0ff4d55b
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/X86/merge-functions3.ll
@@ -0,0 +1,59 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes="default<O3>" -enable-merge-functions -S < %s | FileCheck %s
+
+target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx12.0.0"
+
+ at switch.table.f = private unnamed_addr constant [8 x i32] [i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 1], align 4
+ at switch.table.g = private unnamed_addr constant [8 x i32] [i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 1], align 4
+
+define range(i32 0, 2) i32 @f(i32 noundef %x) local_unnamed_addr {
+; CHECK-LABEL: define range(i32 0, 2) i32 @f(
+; CHECK-SAME: i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[X]], 8
+; CHECK-NEXT:    br i1 [[TMP0]], label %[[SWITCH_LOOKUP:.*]], label %[[SW_EPILOG:.*]]
+; CHECK:       [[SWITCH_LOOKUP]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = zext nneg i32 [[X]] to i64
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [8 x i32], ptr @switch.table.g, i64 0, i64 [[TMP1]]
+; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
+; CHECK-NEXT:    br label %[[SW_EPILOG]]
+; CHECK:       [[SW_EPILOG]]:
+; CHECK-NEXT:    [[X_ADDR_0:%.*]] = phi i32 [ [[SWITCH_LOAD]], %[[SWITCH_LOOKUP]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    ret i32 [[X_ADDR_0]]
+;
+entry:
+  %0 = icmp ult i32 %x, 8
+  br i1 %0, label %switch.lookup, label %sw.epilog
+
+switch.lookup:                                    ; preds = %entry
+  %1 = zext nneg i32 %x to i64
+  %switch.gep = getelementptr inbounds [8 x i32], ptr @switch.table.f, i64 0, i64 %1
+  %switch.load = load i32, ptr %switch.gep, align 4
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %entry, %switch.lookup
+  %x.addr.0 = phi i32 [ %switch.load, %switch.lookup ], [ 0, %entry ]
+  ret i32 %x.addr.0
+}
+
+define range(i32 0, 2) i32 @g(i32 noundef %x) local_unnamed_addr {
+; CHECK-LABEL: define range(i32 0, 2) i32 @g(
+; CHECK-SAME: i32 noundef [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call range(i32 0, 2) i32 @f(i32 noundef [[TMP0]]) #[[ATTR0]]
+; CHECK-NEXT:    ret i32 [[TMP2]]
+;
+entry:
+  %0 = icmp ult i32 %x, 8
+  br i1 %0, label %switch.lookup, label %sw.epilog
+
+switch.lookup:                                    ; preds = %entry
+  %1 = zext nneg i32 %x to i64
+  %switch.gep = getelementptr inbounds [8 x i32], ptr @switch.table.g, i64 0, i64 %1
+  %switch.load = load i32, ptr %switch.gep, align 4
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %entry, %switch.lookup
+  %x.addr.0 = phi i32 [ %switch.load, %switch.lookup ], [ 0, %entry ]
+  ret i32 %x.addr.0
+}


        


More information about the llvm-commits mailing list