[llvm] ada9da7 - [MachineOutliner] Add profile guided outlining (#154437)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 9 10:06:50 PDT 2025
Author: Ellis Hoag
Date: 2025-09-09T10:06:46-07:00
New Revision: ada9da71646d16dc7f3b5bfb27446ff1d86aa8f8
URL: https://github.com/llvm/llvm-project/commit/ada9da71646d16dc7f3b5bfb27446ff1d86aa8f8
DIFF: https://github.com/llvm/llvm-project/commit/ada9da71646d16dc7f3b5bfb27446ff1d86aa8f8.diff
LOG: [MachineOutliner] Add profile guided outlining (#154437)
Added:
llvm/test/CodeGen/AArch64/machine-outliner-pgo.ll
Modified:
llvm/docs/ReleaseNotes.md
llvm/include/llvm/CodeGen/Passes.h
llvm/include/llvm/Passes/CodeGenPassBuilder.h
llvm/include/llvm/Target/CGPassBuilderOption.h
llvm/lib/CodeGen/MachineOutliner.cpp
llvm/lib/CodeGen/TargetPassConfig.cpp
llvm/test/CodeGen/AArch64/machine-outliner-flags.ll
Removed:
################################################################################
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index d5a26512a6dcf..16174553ba7f2 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -75,6 +75,10 @@ Changes to TableGen
Changes to Interprocedural Optimizations
----------------------------------------
+* Added `-enable-machine-outliner={optimistic-pgo,conservative-pgo}` to read
+ profile data to guide the machine outliner
+ ([#154437](https://github.com/llvm/llvm-project/pull/154437)).
+
Changes to Vectorizers
----------------------------------------
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index 593308150dc82..f17d550623efc 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -31,6 +31,7 @@ class ModulePass;
class Pass;
class TargetMachine;
class raw_ostream;
+enum class RunOutliner;
template <typename T> class IntrusiveRefCntPtr;
namespace vfs {
@@ -520,7 +521,7 @@ LLVM_ABI ModulePass *createGlobalMergeFuncPass();
/// This pass performs outlining on machine instructions directly before
/// printing assembly.
-LLVM_ABI ModulePass *createMachineOutlinerPass(bool RunOnAllFunctions = true);
+LLVM_ABI ModulePass *createMachineOutlinerPass(RunOutliner RunOutlinerMode);
/// This pass expands the reduction intrinsics into sequences of shuffles.
LLVM_ABI FunctionPass *createExpandReductionsPass();
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 6a241f55245c7..6a235c017a993 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -1095,11 +1095,9 @@ Error CodeGenPassBuilder<Derived, TargetMachineT>::addMachinePasses(
if (TM.Options.EnableMachineOutliner &&
getOptLevel() != CodeGenOptLevel::None &&
Opt.EnableMachineOutliner != RunOutliner::NeverOutline) {
- bool RunOnAllFunctions =
- (Opt.EnableMachineOutliner == RunOutliner::AlwaysOutline);
- bool AddOutliner = RunOnAllFunctions || TM.Options.SupportsDefaultOutlining;
- if (AddOutliner)
- addPass(MachineOutlinerPass(RunOnAllFunctions));
+ if (Opt.EnableMachineOutliner != RunOutliner::TargetDefault ||
+ TM.Options.SupportsDefaultOutlining)
+ addPass(MachineOutlinerPass(Opt.EnableMachineOutliner));
}
addPass(StackFrameLayoutAnalysisPass());
diff --git a/llvm/include/llvm/Target/CGPassBuilderOption.h b/llvm/include/llvm/Target/CGPassBuilderOption.h
index 8d0a7e61970fa..54bee9de7a093 100644
--- a/llvm/include/llvm/Target/CGPassBuilderOption.h
+++ b/llvm/include/llvm/Target/CGPassBuilderOption.h
@@ -21,7 +21,13 @@
namespace llvm {
-enum class RunOutliner { TargetDefault, AlwaysOutline, NeverOutline };
+enum class RunOutliner {
+ TargetDefault,
+ AlwaysOutline,
+ OptimisticPGO,
+ ConservativePGO,
+ NeverOutline
+};
enum class RegAllocType { Unset, Default, Basic, Fast, Greedy, PBQP };
class RegAllocTypeParser : public cl::parser<RegAllocType> {
diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp
index c7f09d577c4fa..fdae3b470de0f 100644
--- a/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -59,8 +59,10 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CGData/CodeGenDataReader.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
@@ -107,6 +109,16 @@ STATISTIC(StableHashAttempts,
STATISTIC(StableHashDropped,
"Count of unsuccessful hashing attempts for outlined functions");
STATISTIC(NumRemovedLOHs, "Total number of Linker Optimization Hints removed");
+STATISTIC(NumPGOBlockedOutlined,
+ "Number of times outlining was blocked by PGO");
+STATISTIC(NumPGOAllowedCold,
+ "Number of times outlining was allowed from cold functions");
+STATISTIC(NumPGOConservativeBlockedOutlined,
+ "Number of times outlining was blocked conservatively when profile "
+ "counts were missing");
+STATISTIC(NumPGOOptimisticOutlined,
+ "Number of times outlining was allowed optimistically when profile "
+ "counts were missing");
// Set to true if the user wants the outliner to run on linkonceodr linkage
// functions. This is false by default because the linker can dedupe linkonceodr
@@ -438,11 +450,10 @@ struct MachineOutliner : public ModulePass {
/// The current repeat number of machine outlining.
unsigned OutlineRepeatedNum = 0;
- /// Set to true if the outliner should run on all functions in the module
- /// considered safe for outlining.
- /// Set to true by default for compatibility with llc's -run-pass option.
- /// Set when the pass is constructed in TargetPassConfig.
- bool RunOnAllFunctions = true;
+ /// The mode for whether to run the outliner
+ /// Set to always-outline by default for compatibility with llc's -run-pass
+ /// option.
+ RunOutliner RunOutlinerMode = RunOutliner::AlwaysOutline;
/// This is a compact representation of hash sequences of outlined functions.
/// It is used when OutlinerMode = CGDataMode::Write.
@@ -468,6 +479,11 @@ struct MachineOutliner : public ModulePass {
AU.addRequired<TargetPassConfig>();
AU.addPreserved<MachineModuleInfoWrapperPass>();
AU.addUsedIfAvailable<ImmutableModuleSummaryIndexWrapperPass>();
+ if (RunOutlinerMode == RunOutliner::OptimisticPGO ||
+ RunOutlinerMode == RunOutliner::ConservativePGO) {
+ AU.addRequired<BlockFrequencyInfoWrapperPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ }
AU.setPreservesAll();
ModulePass::getAnalysisUsage(AU);
}
@@ -578,9 +594,9 @@ struct MachineOutliner : public ModulePass {
char MachineOutliner::ID = 0;
namespace llvm {
-ModulePass *createMachineOutlinerPass(bool RunOnAllFunctions) {
+ModulePass *createMachineOutlinerPass(RunOutliner RunOutlinerMode) {
MachineOutliner *OL = new MachineOutliner();
- OL->RunOnAllFunctions = RunOnAllFunctions;
+ OL->RunOutlinerMode = RunOutlinerMode;
return OL;
}
@@ -1198,10 +1214,49 @@ bool MachineOutliner::outline(
return OutlinedSomething;
}
+static bool allowPGOOutlining(RunOutliner RunOutlinerMode,
+ const ProfileSummaryInfo *PSI,
+ const BlockFrequencyInfo *BFI,
+ MachineBasicBlock &MBB) {
+ if (RunOutlinerMode != RunOutliner::OptimisticPGO &&
+ RunOutlinerMode != RunOutliner::ConservativePGO)
+ return true;
+ auto *MF = MBB.getParent();
+ if (MF->getFunction().hasFnAttribute(Attribute::Cold)) {
+ ++NumPGOAllowedCold;
+ return true;
+ }
+
+ auto *BB = MBB.getBasicBlock();
+ if (BB && PSI && BFI)
+ if (auto Count = BFI->getBlockProfileCount(BB))
+ return *Count <= PSI->getOrCompColdCountThreshold();
+
+ if (RunOutlinerMode == RunOutliner::OptimisticPGO) {
+ auto *TII = MF->getSubtarget().getInstrInfo();
+ if (TII->shouldOutlineFromFunctionByDefault(*MF)) {
+ // Profile data is unavailable, but we optimistically allow outlining
+ ++NumPGOOptimisticOutlined;
+ return true;
+ }
+ return false;
+ }
+ assert(RunOutlinerMode == RunOutliner::ConservativePGO);
+ // Profile data is unavailable, so we conservatively block outlining
+ ++NumPGOConservativeBlockedOutlined;
+ return false;
+}
+
void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M) {
// Build instruction mappings for each function in the module. Start by
// iterating over each Function in M.
LLVM_DEBUG(dbgs() << "*** Populating mapper ***\n");
+ bool EnableProfileGuidedOutlining =
+ RunOutlinerMode == RunOutliner::OptimisticPGO ||
+ RunOutlinerMode == RunOutliner::ConservativePGO;
+ ProfileSummaryInfo *PSI = nullptr;
+ if (EnableProfileGuidedOutlining)
+ PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
for (Function &F : M) {
LLVM_DEBUG(dbgs() << "MAPPING FUNCTION: " << F.getName() << "\n");
@@ -1222,7 +1277,11 @@ void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M) {
}
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
- if (!RunOnAllFunctions && !TII->shouldOutlineFromFunctionByDefault(*MF)) {
+ BlockFrequencyInfo *BFI = nullptr;
+ if (EnableProfileGuidedOutlining && F.hasProfileData())
+ BFI = &getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
+ if (RunOutlinerMode == RunOutliner::TargetDefault &&
+ !TII->shouldOutlineFromFunctionByDefault(*MF)) {
LLVM_DEBUG(dbgs() << "SKIP: Target does not want to outline from "
"function by default\n");
continue;
@@ -1262,6 +1321,11 @@ void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M) {
continue;
}
+ if (!allowPGOOutlining(RunOutlinerMode, PSI, BFI, MBB)) {
+ ++NumPGOBlockedOutlined;
+ continue;
+ }
+
// MBB is suitable for outlining. Map it to a list of unsigneds.
Mapper.convertToUnsignedVec(MBB, *TII);
}
@@ -1434,10 +1498,22 @@ bool MachineOutliner::doOutline(Module &M, unsigned &OutlinedFunctionNum) {
// the user how the outliner is running.
LLVM_DEBUG({
dbgs() << "Machine Outliner: Running on ";
- if (RunOnAllFunctions)
+ switch (RunOutlinerMode) {
+ case RunOutliner::AlwaysOutline:
dbgs() << "all functions";
- else
+ break;
+ case RunOutliner::OptimisticPGO:
+ dbgs() << "optimistically cold functions";
+ break;
+ case RunOutliner::ConservativePGO:
+ dbgs() << "conservatively cold functions";
+ break;
+ case RunOutliner::TargetDefault:
dbgs() << "target-default functions";
+ break;
+ case RunOutliner::NeverOutline:
+ llvm_unreachable("should not outline");
+ }
dbgs() << "\n";
});
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index f83973c30b48a..b6169e6c4dc34 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -134,12 +134,18 @@ static cl::opt<cl::boolOrDefault> DebugifyCheckAndStripAll(
static cl::opt<RunOutliner> EnableMachineOutliner(
"enable-machine-outliner", cl::desc("Enable the machine outliner"),
cl::Hidden, cl::ValueOptional, cl::init(RunOutliner::TargetDefault),
- cl::values(clEnumValN(RunOutliner::AlwaysOutline, "always",
- "Run on all functions guaranteed to be beneficial"),
- clEnumValN(RunOutliner::NeverOutline, "never",
- "Disable all outlining"),
- // Sentinel value for unspecified option.
- clEnumValN(RunOutliner::AlwaysOutline, "", "")));
+ cl::values(
+ clEnumValN(RunOutliner::AlwaysOutline, "always",
+ "Run on all functions guaranteed to be beneficial"),
+ clEnumValN(RunOutliner::OptimisticPGO, "optimistic-pgo",
+ "Outline cold code only. If a code block does not have "
+ "profile data, optimistically assume it is cold."),
+ clEnumValN(RunOutliner::ConservativePGO, "conservative-pgo",
+ "Outline cold code only. If a code block does not have "
+ "profile, data, conservatively assume it is hot."),
+ clEnumValN(RunOutliner::NeverOutline, "never", "Disable all outlining"),
+ // Sentinel value for unspecified option.
+ clEnumValN(RunOutliner::AlwaysOutline, "", "")));
static cl::opt<bool> EnableGlobalMergeFunc(
"enable-global-merge-func", cl::Hidden,
cl::desc("Enable global merge functions that are based on hash function"));
@@ -1224,12 +1230,9 @@ void TargetPassConfig::addMachinePasses() {
if (TM->Options.EnableMachineOutliner &&
getOptLevel() != CodeGenOptLevel::None &&
EnableMachineOutliner != RunOutliner::NeverOutline) {
- bool RunOnAllFunctions =
- (EnableMachineOutliner == RunOutliner::AlwaysOutline);
- bool AddOutliner =
- RunOnAllFunctions || TM->Options.SupportsDefaultOutlining;
- if (AddOutliner)
- addPass(createMachineOutlinerPass(RunOnAllFunctions));
+ if (EnableMachineOutliner != RunOutliner::TargetDefault ||
+ TM->Options.SupportsDefaultOutlining)
+ addPass(createMachineOutlinerPass(EnableMachineOutliner));
}
if (GCEmptyBlocks)
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-flags.ll b/llvm/test/CodeGen/AArch64/machine-outliner-flags.ll
index c435093b794e3..0fbf2bc43d1ea 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-flags.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-flags.ll
@@ -1,25 +1,15 @@
; REQUIRES: asserts
-; RUN: llc %s -debug-pass=Structure -verify-machineinstrs \
-; RUN: --debug-only=machine-outliner -enable-machine-outliner=always \
-; RUN: -mtriple arm64---- -o /dev/null 2>&1 \
-; RUN: | FileCheck %s -check-prefix=ALWAYS
+; RUN: llc %s -debug-pass=Structure -verify-machineinstrs --debug-only=machine-outliner -enable-machine-outliner=always -mtriple arm64---- -o /dev/null 2>&1 | FileCheck %s -check-prefixes=CHECK,ALWAYS
+; RUN: llc %s -debug-pass=Structure -verify-machineinstrs --debug-only=machine-outliner -enable-machine-outliner -mtriple arm64---- -o /dev/null 2>&1 | FileCheck %s -check-prefixes=CHECK,ALWAYS
-; RUN: llc %s -debug-pass=Structure -verify-machineinstrs \
-; RUN: --debug-only=machine-outliner -enable-machine-outliner \
-; RUN: -mtriple arm64---- -o /dev/null 2>&1 \
-; RUN: | FileCheck %s -check-prefix=ENABLE
+; RUN: llc %s -debug-pass=Structure -verify-machineinstrs --debug-only=machine-outliner -mtriple arm64---- -o /dev/null 2>&1 | FileCheck %s -check-prefixes=CHECK,TARGET-DEFAULT
-; RUN: llc %s -debug-pass=Structure -verify-machineinstrs \
-; RUN: -enable-machine-outliner=never -mtriple arm64---- -o /dev/null 2>&1 \
-; RUN: | FileCheck %s -check-prefix=NEVER
+; RUN: llc %s -debug-pass=Structure -verify-machineinstrs --debug-only=machine-outliner -enable-machine-outliner=optimistic-pgo -mtriple arm64---- -o /dev/null 2>&1 | FileCheck %s -check-prefixes=CHECK,OPTIMISTIC
-; RUN: llc %s -debug-pass=Structure -verify-machineinstrs \
-; RUN: --debug-only=machine-outliner -mtriple arm64---- -o /dev/null 2>&1 \
-; RUN: | FileCheck %s -check-prefix=NOT-ADDED
+; RUN: llc %s -debug-pass=Structure -verify-machineinstrs --debug-only=machine-outliner -enable-machine-outliner=conservative-pgo -mtriple arm64---- -o /dev/null 2>&1 | FileCheck %s -check-prefixes=CHECK,CONSERVATIVE
-; RUN: llc %s -O=0 -debug-pass=Structure -verify-machineinstrs \
-; RUN: -mtriple arm64---- -o /dev/null 2>&1 \
-; RUN: | FileCheck %s -check-prefix=OPTNONE
+; RUN: llc %s -debug-pass=Structure -verify-machineinstrs --debug-only=machine-outliner -enable-machine-outliner=never -mtriple arm64---- -o /dev/null 2>&1 | FileCheck %s -check-prefix=DISABLED
+; RUN: llc %s -debug-pass=Structure -verify-machineinstrs --debug-only=machine-outliner -O=0 -mtriple arm64---- -o /dev/null 2>&1 | FileCheck %s -check-prefix=DISABLED
; Make sure that the outliner is added to the pass pipeline only when the
; appropriate flags/settings are set. Make sure it isn't added otherwise.
@@ -27,23 +17,21 @@
; Cases where it should be added:
; * -enable-machine-outliner
; * -enable-machine-outliner=always
-; * -enable-machine-outliner is not passed (AArch64 supports
-; target-default outlining)
+; * -enable-machine-outliner=optimistic-pgo
+; * -enable-machine-outliner=conservative-pgo
+; * -enable-machine-outliner is not passed (AArch64 supports target-default outlining)
;
; Cases where it should not be added:
; * -O0 or equivalent
; * -enable-machine-outliner=never is passed
-; ALWAYS: Machine Outliner
+; CHECK: Machine Outliner
+; DISABLED-NOT: Machine Outliner
; ALWAYS: Machine Outliner: Running on all functions
-; ENABLE: Machine Outliner
-; ENABLE: Machine Outliner: Running on all functions
-; NEVER-NOT: Machine Outliner
-; NOT-ADDED: Machine Outliner
-; NOT-ADDED: Machine Outliner: Running on target-default functions
-; OPTNONE-NOT: Machine Outliner
+; OPTIMISTIC: Machine Outliner: Running on optimistically cold functions
+; CONSERVATIVE: Machine Outliner: Running on conservatively cold functions
+; TARGET-DEFAULT: Machine Outliner: Running on target-default functions
define void @foo() {
ret void;
}
-
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-pgo.ll b/llvm/test/CodeGen/AArch64/machine-outliner-pgo.ll
new file mode 100644
index 0000000000000..d0ea5e9ae1018
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-pgo.ll
@@ -0,0 +1,83 @@
+; RUN: rm -rf %t && split-file %s %t
+
+; RUN: llvm-profdata merge %t/a.proftext -o %t/a.profdata
+; RUN: opt %t/a.ll -passes=pgo-instr-use -pgo-test-profile-file=%t/a.profdata -S -o %t/a2.ll
+
+; RUN: llc < %t/a2.ll -enable-machine-outliner=conservative-pgo -mtriple=aarch64-linux-gnu -profile-summary-cold-count=0 | FileCheck %s --check-prefixes=CHECK,CONSERVATIVE
+; RUN: llc < %t/a2.ll -enable-machine-outliner=optimistic-pgo -mtriple=aarch64-linux-gnu -profile-summary-cold-count=0 | FileCheck %s --check-prefixes=CHECK,OPTIMISTIC
+
+;--- a.ll
+declare void @z(i32, i32, i32, i32)
+
+; CHECK-LABEL: always_outline:
+define void @always_outline() cold {
+entry:
+; CHECK: [[OUTLINED:OUTLINED_FUNCTION_[0-9]+]]
+ tail call void @z(i32 1, i32 2, i32 3, i32 4)
+ ret void
+; CHECK: .cfi_endproc
+}
+
+; CHECK-LABEL: cold:
+define void @cold() {
+entry:
+; CHECK: [[OUTLINED]]
+ tail call void @z(i32 1, i32 2, i32 3, i32 4)
+ ret void
+; CHECK: .cfi_endproc
+}
+
+; CHECK-LABEL: hot:
+define void @hot() minsize {
+entry:
+; CHECK-NOT: [[OUTLINED]]
+ tail call void @z(i32 1, i32 2, i32 3, i32 4)
+ ret void
+; CHECK: .cfi_endproc
+}
+
+; CHECK-LABEL: no_profile_minsize:
+define void @no_profile_minsize() minsize {
+entry:
+; CONSERVATIVE-NOT: [[OUTLINED]]
+; OPTIMISTIC: [[OUTLINED]]
+ tail call void @z(i32 1, i32 2, i32 3, i32 4)
+ ret void
+; CHECK: .cfi_endproc
+}
+
+; CHECK-LABEL: no_profile_optsize:
+define void @no_profile_optsize() optsize {
+entry:
+; CHECK-NOT: [[OUTLINED]]
+ tail call void @z(i32 1, i32 2, i32 3, i32 4)
+ ret void
+; CHECK: .cfi_endproc
+}
+
+; CHECK: [[OUTLINED]]:
+; CHECK-SAME: // @{{.*}} Tail Call
+; CHECK: mov w0, #1
+; CHECK-NEXT: mov w1, #2
+; CHECK-NEXT: mov w2, #3
+; CHECK-NEXT: mov w3, #4
+; CHECK-NEXT: b z
+
+;--- a.proftext
+:ir
+
+cold
+# Func Hash:
+742261418966908927
+# Num Counters:
+1
+# Counter Values:
+0
+
+hot
+# Func Hash:
+742261418966908927
+# Num Counters:
+1
+# Counter Values:
+100
More information about the llvm-commits
mailing list