[llvm] ada9da7 - [MachineOutliner] Add profile guided outlining (#154437)

via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 9 10:06:50 PDT 2025


Author: Ellis Hoag
Date: 2025-09-09T10:06:46-07:00
New Revision: ada9da71646d16dc7f3b5bfb27446ff1d86aa8f8

URL: https://github.com/llvm/llvm-project/commit/ada9da71646d16dc7f3b5bfb27446ff1d86aa8f8
DIFF: https://github.com/llvm/llvm-project/commit/ada9da71646d16dc7f3b5bfb27446ff1d86aa8f8.diff

LOG: [MachineOutliner] Add profile guided outlining (#154437)

Added: 
    llvm/test/CodeGen/AArch64/machine-outliner-pgo.ll

Modified: 
    llvm/docs/ReleaseNotes.md
    llvm/include/llvm/CodeGen/Passes.h
    llvm/include/llvm/Passes/CodeGenPassBuilder.h
    llvm/include/llvm/Target/CGPassBuilderOption.h
    llvm/lib/CodeGen/MachineOutliner.cpp
    llvm/lib/CodeGen/TargetPassConfig.cpp
    llvm/test/CodeGen/AArch64/machine-outliner-flags.ll

Removed: 
    


################################################################################
diff  --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index d5a26512a6dcf..16174553ba7f2 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -75,6 +75,10 @@ Changes to TableGen
 Changes to Interprocedural Optimizations
 ----------------------------------------
 
+* Added `-enable-machine-outliner={optimistic-pgo,conservative-pgo}` to read
+  profile data to guide the machine outliner
+  ([#154437](https://github.com/llvm/llvm-project/pull/154437)).
+
 Changes to Vectorizers
 ----------------------------------------
 

diff  --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index 593308150dc82..f17d550623efc 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -31,6 +31,7 @@ class ModulePass;
 class Pass;
 class TargetMachine;
 class raw_ostream;
+enum class RunOutliner;
 
 template <typename T> class IntrusiveRefCntPtr;
 namespace vfs {
@@ -520,7 +521,7 @@ LLVM_ABI ModulePass *createGlobalMergeFuncPass();
 
 /// This pass performs outlining on machine instructions directly before
 /// printing assembly.
-LLVM_ABI ModulePass *createMachineOutlinerPass(bool RunOnAllFunctions = true);
+LLVM_ABI ModulePass *createMachineOutlinerPass(RunOutliner RunOutlinerMode);
 
 /// This pass expands the reduction intrinsics into sequences of shuffles.
 LLVM_ABI FunctionPass *createExpandReductionsPass();

diff  --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 6a241f55245c7..6a235c017a993 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -1095,11 +1095,9 @@ Error CodeGenPassBuilder<Derived, TargetMachineT>::addMachinePasses(
   if (TM.Options.EnableMachineOutliner &&
       getOptLevel() != CodeGenOptLevel::None &&
       Opt.EnableMachineOutliner != RunOutliner::NeverOutline) {
-    bool RunOnAllFunctions =
-        (Opt.EnableMachineOutliner == RunOutliner::AlwaysOutline);
-    bool AddOutliner = RunOnAllFunctions || TM.Options.SupportsDefaultOutlining;
-    if (AddOutliner)
-      addPass(MachineOutlinerPass(RunOnAllFunctions));
+    if (Opt.EnableMachineOutliner != RunOutliner::TargetDefault ||
+        TM.Options.SupportsDefaultOutlining)
+      addPass(MachineOutlinerPass(Opt.EnableMachineOutliner));
   }
 
   addPass(StackFrameLayoutAnalysisPass());

diff  --git a/llvm/include/llvm/Target/CGPassBuilderOption.h b/llvm/include/llvm/Target/CGPassBuilderOption.h
index 8d0a7e61970fa..54bee9de7a093 100644
--- a/llvm/include/llvm/Target/CGPassBuilderOption.h
+++ b/llvm/include/llvm/Target/CGPassBuilderOption.h
@@ -21,7 +21,13 @@
 
 namespace llvm {
 
-enum class RunOutliner { TargetDefault, AlwaysOutline, NeverOutline };
+enum class RunOutliner {
+  TargetDefault,
+  AlwaysOutline,
+  OptimisticPGO,
+  ConservativePGO,
+  NeverOutline
+};
 enum class RegAllocType { Unset, Default, Basic, Fast, Greedy, PBQP };
 
 class RegAllocTypeParser : public cl::parser<RegAllocType> {

diff  --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp
index c7f09d577c4fa..fdae3b470de0f 100644
--- a/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -59,8 +59,10 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/CGData/CodeGenDataReader.h"
 #include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
@@ -107,6 +109,16 @@ STATISTIC(StableHashAttempts,
 STATISTIC(StableHashDropped,
           "Count of unsuccessful hashing attempts for outlined functions");
 STATISTIC(NumRemovedLOHs, "Total number of Linker Optimization Hints removed");
+STATISTIC(NumPGOBlockedOutlined,
+          "Number of times outlining was blocked by PGO");
+STATISTIC(NumPGOAllowedCold,
+          "Number of times outlining was allowed from cold functions");
+STATISTIC(NumPGOConservativeBlockedOutlined,
+          "Number of times outlining was blocked conservatively when profile "
+          "counts were missing");
+STATISTIC(NumPGOOptimisticOutlined,
+          "Number of times outlining was allowed optimistically when profile "
+          "counts were missing");
 
 // Set to true if the user wants the outliner to run on linkonceodr linkage
 // functions. This is false by default because the linker can dedupe linkonceodr
@@ -438,11 +450,10 @@ struct MachineOutliner : public ModulePass {
   /// The current repeat number of machine outlining.
   unsigned OutlineRepeatedNum = 0;
 
-  /// Set to true if the outliner should run on all functions in the module
-  /// considered safe for outlining.
-  /// Set to true by default for compatibility with llc's -run-pass option.
-  /// Set when the pass is constructed in TargetPassConfig.
-  bool RunOnAllFunctions = true;
+  /// The mode for whether to run the outliner
+  /// Set to always-outline by default for compatibility with llc's -run-pass
+  /// option.
+  RunOutliner RunOutlinerMode = RunOutliner::AlwaysOutline;
 
   /// This is a compact representation of hash sequences of outlined functions.
   /// It is used when OutlinerMode = CGDataMode::Write.
@@ -468,6 +479,11 @@ struct MachineOutliner : public ModulePass {
     AU.addRequired<TargetPassConfig>();
     AU.addPreserved<MachineModuleInfoWrapperPass>();
     AU.addUsedIfAvailable<ImmutableModuleSummaryIndexWrapperPass>();
+    if (RunOutlinerMode == RunOutliner::OptimisticPGO ||
+        RunOutlinerMode == RunOutliner::ConservativePGO) {
+      AU.addRequired<BlockFrequencyInfoWrapperPass>();
+      AU.addRequired<ProfileSummaryInfoWrapperPass>();
+    }
     AU.setPreservesAll();
     ModulePass::getAnalysisUsage(AU);
   }
@@ -578,9 +594,9 @@ struct MachineOutliner : public ModulePass {
 char MachineOutliner::ID = 0;
 
 namespace llvm {
-ModulePass *createMachineOutlinerPass(bool RunOnAllFunctions) {
+ModulePass *createMachineOutlinerPass(RunOutliner RunOutlinerMode) {
   MachineOutliner *OL = new MachineOutliner();
-  OL->RunOnAllFunctions = RunOnAllFunctions;
+  OL->RunOutlinerMode = RunOutlinerMode;
   return OL;
 }
 
@@ -1198,10 +1214,49 @@ bool MachineOutliner::outline(
   return OutlinedSomething;
 }
 
+static bool allowPGOOutlining(RunOutliner RunOutlinerMode,
+                              const ProfileSummaryInfo *PSI,
+                              const BlockFrequencyInfo *BFI,
+                              MachineBasicBlock &MBB) {
+  if (RunOutlinerMode != RunOutliner::OptimisticPGO &&
+      RunOutlinerMode != RunOutliner::ConservativePGO)
+    return true;
+  auto *MF = MBB.getParent();
+  if (MF->getFunction().hasFnAttribute(Attribute::Cold)) {
+    ++NumPGOAllowedCold;
+    return true;
+  }
+
+  auto *BB = MBB.getBasicBlock();
+  if (BB && PSI && BFI)
+    if (auto Count = BFI->getBlockProfileCount(BB))
+      return *Count <= PSI->getOrCompColdCountThreshold();
+
+  if (RunOutlinerMode == RunOutliner::OptimisticPGO) {
+    auto *TII = MF->getSubtarget().getInstrInfo();
+    if (TII->shouldOutlineFromFunctionByDefault(*MF)) {
+      // Profile data is unavailable, but we optimistically allow outlining
+      ++NumPGOOptimisticOutlined;
+      return true;
+    }
+    return false;
+  }
+  assert(RunOutlinerMode == RunOutliner::ConservativePGO);
+  // Profile data is unavailable, so we conservatively block outlining
+  ++NumPGOConservativeBlockedOutlined;
+  return false;
+}
+
 void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M) {
   // Build instruction mappings for each function in the module. Start by
   // iterating over each Function in M.
   LLVM_DEBUG(dbgs() << "*** Populating mapper ***\n");
+  bool EnableProfileGuidedOutlining =
+      RunOutlinerMode == RunOutliner::OptimisticPGO ||
+      RunOutlinerMode == RunOutliner::ConservativePGO;
+  ProfileSummaryInfo *PSI = nullptr;
+  if (EnableProfileGuidedOutlining)
+    PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
   for (Function &F : M) {
     LLVM_DEBUG(dbgs() << "MAPPING FUNCTION: " << F.getName() << "\n");
 
@@ -1222,7 +1277,11 @@ void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M) {
     }
 
     const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
-    if (!RunOnAllFunctions && !TII->shouldOutlineFromFunctionByDefault(*MF)) {
+    BlockFrequencyInfo *BFI = nullptr;
+    if (EnableProfileGuidedOutlining && F.hasProfileData())
+      BFI = &getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
+    if (RunOutlinerMode == RunOutliner::TargetDefault &&
+        !TII->shouldOutlineFromFunctionByDefault(*MF)) {
       LLVM_DEBUG(dbgs() << "SKIP: Target does not want to outline from "
                            "function by default\n");
       continue;
@@ -1262,6 +1321,11 @@ void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M) {
         continue;
       }
 
+      if (!allowPGOOutlining(RunOutlinerMode, PSI, BFI, MBB)) {
+        ++NumPGOBlockedOutlined;
+        continue;
+      }
+
       // MBB is suitable for outlining. Map it to a list of unsigneds.
       Mapper.convertToUnsignedVec(MBB, *TII);
     }
@@ -1434,10 +1498,22 @@ bool MachineOutliner::doOutline(Module &M, unsigned &OutlinedFunctionNum) {
   // the user how the outliner is running.
   LLVM_DEBUG({
     dbgs() << "Machine Outliner: Running on ";
-    if (RunOnAllFunctions)
+    switch (RunOutlinerMode) {
+    case RunOutliner::AlwaysOutline:
       dbgs() << "all functions";
-    else
+      break;
+    case RunOutliner::OptimisticPGO:
+      dbgs() << "optimistically cold functions";
+      break;
+    case RunOutliner::ConservativePGO:
+      dbgs() << "conservatively cold functions";
+      break;
+    case RunOutliner::TargetDefault:
       dbgs() << "target-default functions";
+      break;
+    case RunOutliner::NeverOutline:
+      llvm_unreachable("should not outline");
+    }
     dbgs() << "\n";
   });
 

diff  --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index f83973c30b48a..b6169e6c4dc34 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -134,12 +134,18 @@ static cl::opt<cl::boolOrDefault> DebugifyCheckAndStripAll(
 static cl::opt<RunOutliner> EnableMachineOutliner(
     "enable-machine-outliner", cl::desc("Enable the machine outliner"),
     cl::Hidden, cl::ValueOptional, cl::init(RunOutliner::TargetDefault),
-    cl::values(clEnumValN(RunOutliner::AlwaysOutline, "always",
-                          "Run on all functions guaranteed to be beneficial"),
-               clEnumValN(RunOutliner::NeverOutline, "never",
-                          "Disable all outlining"),
-               // Sentinel value for unspecified option.
-               clEnumValN(RunOutliner::AlwaysOutline, "", "")));
+    cl::values(
+        clEnumValN(RunOutliner::AlwaysOutline, "always",
+                   "Run on all functions guaranteed to be beneficial"),
+        clEnumValN(RunOutliner::OptimisticPGO, "optimistic-pgo",
+                   "Outline cold code only. If a code block does not have "
+                   "profile data, optimistically assume it is cold."),
+        clEnumValN(RunOutliner::ConservativePGO, "conservative-pgo",
+                   "Outline cold code only. If a code block does not have "
+                   "profile, data, conservatively assume it is hot."),
+        clEnumValN(RunOutliner::NeverOutline, "never", "Disable all outlining"),
+        // Sentinel value for unspecified option.
+        clEnumValN(RunOutliner::AlwaysOutline, "", "")));
 static cl::opt<bool> EnableGlobalMergeFunc(
     "enable-global-merge-func", cl::Hidden,
     cl::desc("Enable global merge functions that are based on hash function"));
@@ -1224,12 +1230,9 @@ void TargetPassConfig::addMachinePasses() {
   if (TM->Options.EnableMachineOutliner &&
       getOptLevel() != CodeGenOptLevel::None &&
       EnableMachineOutliner != RunOutliner::NeverOutline) {
-    bool RunOnAllFunctions =
-        (EnableMachineOutliner == RunOutliner::AlwaysOutline);
-    bool AddOutliner =
-        RunOnAllFunctions || TM->Options.SupportsDefaultOutlining;
-    if (AddOutliner)
-      addPass(createMachineOutlinerPass(RunOnAllFunctions));
+    if (EnableMachineOutliner != RunOutliner::TargetDefault ||
+        TM->Options.SupportsDefaultOutlining)
+      addPass(createMachineOutlinerPass(EnableMachineOutliner));
   }
 
   if (GCEmptyBlocks)

diff  --git a/llvm/test/CodeGen/AArch64/machine-outliner-flags.ll b/llvm/test/CodeGen/AArch64/machine-outliner-flags.ll
index c435093b794e3..0fbf2bc43d1ea 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-flags.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-flags.ll
@@ -1,25 +1,15 @@
 ; REQUIRES: asserts
-; RUN: llc %s -debug-pass=Structure -verify-machineinstrs \
-; RUN: --debug-only=machine-outliner -enable-machine-outliner=always \
-; RUN: -mtriple arm64---- -o /dev/null 2>&1 \
-; RUN: | FileCheck %s -check-prefix=ALWAYS
+; RUN: llc %s -debug-pass=Structure -verify-machineinstrs --debug-only=machine-outliner -enable-machine-outliner=always -mtriple arm64---- -o /dev/null 2>&1 | FileCheck %s -check-prefixes=CHECK,ALWAYS
+; RUN: llc %s -debug-pass=Structure -verify-machineinstrs --debug-only=machine-outliner -enable-machine-outliner -mtriple arm64---- -o /dev/null 2>&1 | FileCheck %s -check-prefixes=CHECK,ALWAYS
 
-; RUN: llc %s -debug-pass=Structure -verify-machineinstrs \
-; RUN: --debug-only=machine-outliner -enable-machine-outliner \
-; RUN: -mtriple arm64---- -o /dev/null 2>&1 \
-; RUN: | FileCheck %s -check-prefix=ENABLE
+; RUN: llc %s -debug-pass=Structure -verify-machineinstrs --debug-only=machine-outliner -mtriple arm64---- -o /dev/null 2>&1 | FileCheck %s -check-prefixes=CHECK,TARGET-DEFAULT
 
-; RUN: llc %s -debug-pass=Structure -verify-machineinstrs \
-; RUN: -enable-machine-outliner=never -mtriple arm64---- -o /dev/null 2>&1 \
-; RUN: | FileCheck %s -check-prefix=NEVER
+; RUN: llc %s -debug-pass=Structure -verify-machineinstrs --debug-only=machine-outliner -enable-machine-outliner=optimistic-pgo -mtriple arm64---- -o /dev/null 2>&1 | FileCheck %s -check-prefixes=CHECK,OPTIMISTIC
 
-; RUN: llc %s -debug-pass=Structure -verify-machineinstrs \
-; RUN: --debug-only=machine-outliner -mtriple arm64---- -o /dev/null 2>&1 \
-; RUN: | FileCheck %s -check-prefix=NOT-ADDED
+; RUN: llc %s -debug-pass=Structure -verify-machineinstrs --debug-only=machine-outliner -enable-machine-outliner=conservative-pgo -mtriple arm64---- -o /dev/null 2>&1 | FileCheck %s -check-prefixes=CHECK,CONSERVATIVE
 
-; RUN: llc %s -O=0 -debug-pass=Structure -verify-machineinstrs \
-; RUN: -mtriple arm64---- -o /dev/null 2>&1 \
-; RUN: | FileCheck %s -check-prefix=OPTNONE
+; RUN: llc %s -debug-pass=Structure -verify-machineinstrs --debug-only=machine-outliner -enable-machine-outliner=never -mtriple arm64---- -o /dev/null 2>&1 | FileCheck %s -check-prefix=DISABLED
+; RUN: llc %s -debug-pass=Structure -verify-machineinstrs --debug-only=machine-outliner -O=0 -mtriple arm64---- -o /dev/null 2>&1 | FileCheck %s -check-prefix=DISABLED
 
 ; Make sure that the outliner is added to the pass pipeline only when the
 ; appropriate flags/settings are set. Make sure it isn't added otherwise.
@@ -27,23 +17,21 @@
 ; Cases where it should be added:
 ;  * -enable-machine-outliner
 ;  * -enable-machine-outliner=always
-;  * -enable-machine-outliner is not passed (AArch64 supports
-;     target-default outlining)
+;  * -enable-machine-outliner=optimistic-pgo
+;  * -enable-machine-outliner=conservative-pgo
+;  * -enable-machine-outliner is not passed (AArch64 supports target-default outlining)
 ;
 ; Cases where it should not be added:
 ;  * -O0 or equivalent
 ;  * -enable-machine-outliner=never is passed
 
-; ALWAYS: Machine Outliner
+; CHECK: Machine Outliner
+; DISABLED-NOT: Machine Outliner
 ; ALWAYS: Machine Outliner: Running on all functions
-; ENABLE: Machine Outliner
-; ENABLE: Machine Outliner: Running on all functions
-; NEVER-NOT: Machine Outliner
-; NOT-ADDED: Machine Outliner
-; NOT-ADDED: Machine Outliner: Running on target-default functions
-; OPTNONE-NOT: Machine Outliner
+; OPTIMISTIC: Machine Outliner: Running on optimistically cold functions
+; CONSERVATIVE: Machine Outliner: Running on conservatively cold functions
+; TARGET-DEFAULT: Machine Outliner: Running on target-default functions
 
 define void @foo() {
   ret void;
 }
-

diff  --git a/llvm/test/CodeGen/AArch64/machine-outliner-pgo.ll b/llvm/test/CodeGen/AArch64/machine-outliner-pgo.ll
new file mode 100644
index 0000000000000..d0ea5e9ae1018
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-pgo.ll
@@ -0,0 +1,83 @@
+; RUN: rm -rf %t && split-file %s %t
+
+; RUN: llvm-profdata merge %t/a.proftext -o %t/a.profdata
+; RUN: opt %t/a.ll -passes=pgo-instr-use -pgo-test-profile-file=%t/a.profdata -S -o %t/a2.ll
+
+; RUN: llc < %t/a2.ll -enable-machine-outliner=conservative-pgo -mtriple=aarch64-linux-gnu -profile-summary-cold-count=0 | FileCheck %s --check-prefixes=CHECK,CONSERVATIVE
+; RUN: llc < %t/a2.ll -enable-machine-outliner=optimistic-pgo -mtriple=aarch64-linux-gnu -profile-summary-cold-count=0 | FileCheck %s --check-prefixes=CHECK,OPTIMISTIC
+
+;--- a.ll
+declare void @z(i32, i32, i32, i32)
+
+; CHECK-LABEL: always_outline:
+define void @always_outline() cold {
+entry:
+; CHECK: [[OUTLINED:OUTLINED_FUNCTION_[0-9]+]]
+  tail call void @z(i32 1, i32 2, i32 3, i32 4)
+  ret void
+; CHECK: .cfi_endproc
+}
+
+; CHECK-LABEL: cold:
+define void @cold() {
+entry:
+; CHECK: [[OUTLINED]]
+  tail call void @z(i32 1, i32 2, i32 3, i32 4)
+  ret void
+; CHECK: .cfi_endproc
+}
+
+; CHECK-LABEL: hot:
+define void @hot() minsize {
+entry:
+; CHECK-NOT: [[OUTLINED]]
+  tail call void @z(i32 1, i32 2, i32 3, i32 4)
+  ret void
+; CHECK: .cfi_endproc
+}
+
+; CHECK-LABEL: no_profile_minsize:
+define void @no_profile_minsize() minsize {
+entry:
+; CONSERVATIVE-NOT: [[OUTLINED]]
+; OPTIMISTIC: [[OUTLINED]]
+  tail call void @z(i32 1, i32 2, i32 3, i32 4)
+  ret void
+; CHECK: .cfi_endproc
+}
+
+; CHECK-LABEL: no_profile_optsize:
+define void @no_profile_optsize() optsize {
+entry:
+; CHECK-NOT: [[OUTLINED]]
+  tail call void @z(i32 1, i32 2, i32 3, i32 4)
+  ret void
+; CHECK: .cfi_endproc
+}
+
+; CHECK: [[OUTLINED]]:
+; CHECK-SAME: // @{{.*}} Tail Call
+; CHECK:      mov     w0, #1
+; CHECK-NEXT: mov     w1, #2
+; CHECK-NEXT: mov     w2, #3
+; CHECK-NEXT: mov     w3, #4
+; CHECK-NEXT: b       z
+
+;--- a.proftext
+:ir
+
+cold
+# Func Hash:
+742261418966908927
+# Num Counters:
+1
+# Counter Values:
+0
+
+hot
+# Func Hash:
+742261418966908927
+# Num Counters:
+1
+# Counter Values:
+100


        


More information about the llvm-commits mailing list