[llvm] [CodeGen][StaticDataPartitioning]Place local-linkage global variables in hot or unlikely prefixed sections based on profile information (PR #125756)
Mingming Liu via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 20 13:59:25 PDT 2025
https://github.com/mingmingl-llvm updated https://github.com/llvm/llvm-project/pull/125756
>From 8eea1ea0109f07d590d189bc17d5f5411556d77a Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Tue, 4 Feb 2025 11:19:44 -0800
Subject: [PATCH 1/7] [CodeGen][StaticDataPartitioning]Place module-internal
global variables based on profile information
---
llvm/include/llvm/IR/Function.h | 6 -
llvm/include/llvm/IR/GlobalObject.h | 11 +
llvm/include/llvm/IR/MDBuilder.h | 4 +-
llvm/lib/CodeGen/StaticDataSplitter.cpp | 201 +++++++++++++-----
.../CodeGen/TargetLoweringObjectFileImpl.cpp | 6 +
llvm/lib/IR/Function.cpp | 16 --
llvm/lib/IR/Globals.cpp | 30 +++
llvm/lib/IR/MDBuilder.cpp | 6 +-
llvm/test/CodeGen/X86/data-section-prefix.ll | 27 +++
.../CodeGen/X86/global-variable-partition.ll | 159 ++++++++++++++
.../CodeGenPrepare/X86/section-samplepgo.ll | 4 +-
.../Transforms/CodeGenPrepare/X86/section.ll | 4 +-
.../Transforms/HotColdSplit/coldentrycount.ll | 4 +-
.../section-accurate-samplepgo.ll | 6 +-
14 files changed, 396 insertions(+), 88 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/data-section-prefix.ll
create mode 100644 llvm/test/CodeGen/X86/global-variable-partition.ll
diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h
index fcd5396ccfdbc..29041688124bc 100644
--- a/llvm/include/llvm/IR/Function.h
+++ b/llvm/include/llvm/IR/Function.h
@@ -346,12 +346,6 @@ class LLVM_ABI Function : public GlobalObject, public ilist_node<Function> {
/// sample PGO, to enable the same inlines as the profiled optimized binary.
DenseSet<GlobalValue::GUID> getImportGUIDs() const;
- /// Set the section prefix for this function.
- void setSectionPrefix(StringRef Prefix);
-
- /// Get the section prefix for this function.
- std::optional<StringRef> getSectionPrefix() const;
-
/// hasGC/getGC/setGC/clearGC - The name of the garbage collection algorithm
/// to use during code generation.
bool hasGC() const {
diff --git a/llvm/include/llvm/IR/GlobalObject.h b/llvm/include/llvm/IR/GlobalObject.h
index 08edc13d81f88..bb50c39813e14 100644
--- a/llvm/include/llvm/IR/GlobalObject.h
+++ b/llvm/include/llvm/IR/GlobalObject.h
@@ -124,6 +124,17 @@ class GlobalObject : public GlobalValue {
/// appropriate default object file section.
void setSection(StringRef S);
+ /// Set the section prefix for this global object.
+ void setSectionPrefix(StringRef Prefix);
+
+ /// Update the section prefix, unless the existing prefix is the same as
+ /// `KeepPrefix`.
+ void updateSectionPrefix(StringRef Prefix,
+ std::optional<StringRef> KeepPrefix = std::nullopt);
+
+ /// Get the section prefix for this global object.
+ std::optional<StringRef> getSectionPrefix() const;
+
bool hasComdat() const { return getComdat() != nullptr; }
const Comdat *getComdat() const { return ObjComdat; }
Comdat *getComdat() { return ObjComdat; }
diff --git a/llvm/include/llvm/IR/MDBuilder.h b/llvm/include/llvm/IR/MDBuilder.h
index e02ec8f5a3d8b..ce4e1da656049 100644
--- a/llvm/include/llvm/IR/MDBuilder.h
+++ b/llvm/include/llvm/IR/MDBuilder.h
@@ -89,8 +89,8 @@ class MDBuilder {
MDNode *createFunctionEntryCount(uint64_t Count, bool Synthetic,
const DenseSet<GlobalValue::GUID> *Imports);
- /// Return metadata containing the section prefix for a function.
- MDNode *createFunctionSectionPrefix(StringRef Prefix);
+ /// Return metadata containing the section prefix for a global object.
+ MDNode *createGlobalObjectSectionPrefix(StringRef Prefix);
/// Return metadata containing the pseudo probe descriptor for a function.
MDNode *createPseudoProbeDesc(uint64_t GUID, uint64_t Hash, StringRef FName);
diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp
index e5bf0a5a3a255..f09e3b41e0723 100644
--- a/llvm/lib/CodeGen/StaticDataSplitter.cpp
+++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp
@@ -9,13 +9,13 @@
// The pass uses branch profile data to assign hotness based section qualifiers
// for the following types of static data:
// - Jump tables
+// - Module-internal global variables
// - Constant pools (TODO)
-// - Other module-internal data (TODO)
//
// For the original RFC of this pass please see
// https://discourse.llvm.org/t/rfc-profile-guided-static-data-partitioning/83744
-#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/MBFIWrapper.h"
@@ -27,9 +27,12 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
using namespace llvm;
@@ -46,12 +49,27 @@ class StaticDataSplitter : public MachineFunctionPass {
const MachineBlockFrequencyInfo *MBFI = nullptr;
const ProfileSummaryInfo *PSI = nullptr;
- // Returns true iff any jump table is hot-cold categorized.
- bool splitJumpTables(MachineFunction &MF);
+ void updateStats(bool ProfileAvailable, const MachineJumpTableInfo *MJTI);
+ void updateJumpTableStats(bool ProfileAvailable,
+ const MachineJumpTableInfo &MJTI);
- // Same as above but works on functions with profile information.
- bool splitJumpTablesWithProfiles(const MachineFunction &MF,
- MachineJumpTableInfo &MJTI);
+ // Use profiles to partition static data.
+ bool partitionStaticDataWithProfiles(MachineFunction &MF);
+
+ // If the global value is a local linkage global variable, return it.
+ // Otherwise, return nullptr.
+ const GlobalVariable *getLocalLinkageGlobalVariable(const GlobalValue *GV);
+
+ // Returns true if the global variable is in one of {.rodata, .bss, .data,
+ // .data.rel.ro} sections
+ bool inStaticDataSection(const GlobalVariable *GV, const TargetMachine &TM);
+
+ // Iterate all global variables in the module and update the section prefix
+ // of the module-internal data.
+ void updateGlobalVariableSectionPrefix(MachineFunction &MF);
+
+ // Accummulated data profile count across machine functions in the module.
+ DenseMap<const GlobalVariable *, APInt> DataProfileCounts;
public:
static char ID;
@@ -77,13 +95,24 @@ bool StaticDataSplitter::runOnMachineFunction(MachineFunction &MF) {
MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
- return splitJumpTables(MF);
+ const bool ProfileAvailable = PSI && PSI->hasProfileSummary() && MBFI &&
+ MF.getFunction().hasProfileData();
+ bool Changed = false;
+
+ if (ProfileAvailable)
+ Changed |= partitionStaticDataWithProfiles(MF);
+
+ updateGlobalVariableSectionPrefix(MF);
+ updateStats(ProfileAvailable, MF.getJumpTableInfo());
+ return Changed;
}
-bool StaticDataSplitter::splitJumpTablesWithProfiles(
- const MachineFunction &MF, MachineJumpTableInfo &MJTI) {
+bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) {
int NumChangedJumpTables = 0;
+ const TargetMachine &TM = MF.getTarget();
+ MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+
// Jump table could be used by either terminating instructions or
// non-terminating ones, so we walk all instructions and use
// `MachineOperand::isJTI()` to identify jump table operands.
@@ -92,63 +121,131 @@ bool StaticDataSplitter::splitJumpTablesWithProfiles(
for (const auto &MBB : MF) {
for (const MachineInstr &I : MBB) {
for (const MachineOperand &Op : I.operands()) {
- if (!Op.isJTI())
- continue;
- const int JTI = Op.getIndex();
- // This is not a source block of jump table.
- if (JTI == -1)
+ std::optional<uint64_t> Count = std::nullopt;
+ if (!Op.isJTI() && !Op.isGlobal())
continue;
- auto Hotness = MachineFunctionDataHotness::Hot;
+ Count = MBFI->getBlockProfileCount(&MBB);
+
+ if (Op.isJTI()) {
+ assert(MJTI != nullptr && "Jump table info is not available.");
+ const int JTI = Op.getIndex();
+ // This is not a source block of jump table.
+ if (JTI == -1)
+ continue;
+
+ auto Hotness = MachineFunctionDataHotness::Hot;
+
+ // Hotness is based on source basic block hotness.
+ // TODO: PSI APIs are about instruction hotness. Introduce API for
+ // data access hotness.
+ if (Count && PSI->isColdCount(*Count))
+ Hotness = MachineFunctionDataHotness::Cold;
- // Hotness is based on source basic block hotness.
- // TODO: PSI APIs are about instruction hotness. Introduce API for data
- // access hotness.
- if (PSI->isColdBlock(&MBB, MBFI))
- Hotness = MachineFunctionDataHotness::Cold;
+ if (MJTI->updateJumpTableEntryHotness(JTI, Hotness))
+ ++NumChangedJumpTables;
+ } else if (Op.isGlobal()) {
+ // Find global variables with local linkage
+ const GlobalVariable *GV =
+ getLocalLinkageGlobalVariable(Op.getGlobal());
+ if (!GV || !inStaticDataSection(GV, TM))
+ continue;
- if (MJTI.updateJumpTableEntryHotness(JTI, Hotness))
- ++NumChangedJumpTables;
+ // Acccumulate data profile count across machine function
+ // instructions.
+ // TODO: Analyze global variable's initializers.
+ if (Count) {
+ auto [It, Inserted] =
+ DataProfileCounts.try_emplace(GV, APInt(128, 0));
+ It->second += *Count;
+ }
+ }
}
}
}
return NumChangedJumpTables > 0;
}
-bool StaticDataSplitter::splitJumpTables(MachineFunction &MF) {
- MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
- if (!MJTI || MJTI->getJumpTables().empty())
- return false;
-
- const bool ProfileAvailable = PSI && PSI->hasProfileSummary() && MBFI &&
- MF.getFunction().hasProfileData();
- auto statOnExit = llvm::make_scope_exit([&] {
- if (!AreStatisticsEnabled())
- return;
+void StaticDataSplitter::updateJumpTableStats(
+ bool ProfileAvailable, const MachineJumpTableInfo &MJTI) {
+ if (!ProfileAvailable) {
+ NumUnknownJumpTables += MJTI.getJumpTables().size();
+ return;
+ }
- if (!ProfileAvailable) {
- NumUnknownJumpTables += MJTI->getJumpTables().size();
- return;
+ for (size_t JTI = 0; JTI < MJTI.getJumpTables().size(); JTI++) {
+ auto Hotness = MJTI.getJumpTables()[JTI].Hotness;
+ if (Hotness == MachineFunctionDataHotness::Hot) {
+ ++NumHotJumpTables;
+ } else {
+ assert(Hotness == MachineFunctionDataHotness::Cold &&
+ "A jump table is either hot or cold when profile information is "
+ "available.");
+ ++NumColdJumpTables;
}
+ }
+}
- for (size_t JTI = 0; JTI < MJTI->getJumpTables().size(); JTI++) {
- auto Hotness = MJTI->getJumpTables()[JTI].Hotness;
- if (Hotness == MachineFunctionDataHotness::Hot) {
- ++NumHotJumpTables;
- } else {
- assert(Hotness == MachineFunctionDataHotness::Cold &&
- "A jump table is either hot or cold when profile information is "
- "available.");
- ++NumColdJumpTables;
- }
- }
- });
+void StaticDataSplitter::updateStats(bool ProfileAvailable,
+ const MachineJumpTableInfo *MJTI) {
+ if (!AreStatisticsEnabled())
+ return;
- // Place jump tables according to block hotness if function has profile data.
- if (ProfileAvailable)
- return splitJumpTablesWithProfiles(MF, *MJTI);
+ if (MJTI)
+ updateJumpTableStats(ProfileAvailable, *MJTI);
+}
- return true;
+const GlobalVariable *
+StaticDataSplitter::getLocalLinkageGlobalVariable(const GlobalValue *GV) {
+ if (!GV || GV->isDeclarationForLinker())
+ return nullptr;
+
+ return GV->hasLocalLinkage() ? dyn_cast<GlobalVariable>(GV) : nullptr;
+}
+
+bool StaticDataSplitter::inStaticDataSection(const GlobalVariable *GV,
+ const TargetMachine &TM) {
+ assert(GV && "Caller guaranteed");
+
+ // Skip LLVM reserved symbols.
+ if (GV->getName().starts_with("llvm."))
+ return false;
+
+ SectionKind Kind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
+ return Kind.isData() || Kind.isReadOnly() || Kind.isReadOnlyWithRel() ||
+ Kind.isBSS();
+}
+
+void StaticDataSplitter::updateGlobalVariableSectionPrefix(
+ MachineFunction &MF) {
+ for (GlobalVariable &GV : MF.getFunction().getParent()->globals()) {
+ if (GV.isDeclarationForLinker())
+ continue;
+ // DataProfileCounts accumulates data profile count across all machine
+ // function instructions, and it can't model the indirect accesses through
+ // other global variables' initializers.
+ // TODO: Analyze the users of module-internal global variables and see
+ // through the users' initializers. Do not place a global variable into
+ // unlikely section if any of its users are potentially hot.
+ auto Iter = DataProfileCounts.find(&GV);
+ if (Iter == DataProfileCounts.end())
+ continue;
+
+ // StaticDataSplitter is made a machine function pass rather than a module
+ // pass because (Lazy)MachineBlockFrequencyInfo is a machine-function
+ // analysis pass and cannot be used for a legacy module pass.
+ // As a result, we use `DataProfileCounts` to accumulate data
+ // profile count across machine functions and update global variable section
+ // prefix once per machine function.
+ // FIXME: Make StaticDataSplitter a module pass under new pass manager
+ // framework, and set global variable section prefix once per module after
+ // analyzing all machine functions.
+ if (PSI->isColdCount(Iter->second.getZExtValue())) {
+ GV.updateSectionPrefix("unlikely", std::make_optional(StringRef("hot")));
+ } else if (PSI->isHotCount(Iter->second.getZExtValue())) {
+ GV.updateSectionPrefix("hot");
+ }
+ }
}
char StaticDataSplitter::ID = 0;
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 3c2c7c8c9fed6..d20ab29cc1979 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -670,6 +670,7 @@ getELFSectionNameForGlobal(const GlobalObject *GO, SectionKind Kind,
}
bool HasPrefix = false;
+
if (const auto *F = dyn_cast<Function>(GO)) {
// Jump table hotness takes precedence over its enclosing function's hotness
// if it's known. The function's section prefix is used if jump table entry
@@ -687,6 +688,11 @@ getELFSectionNameForGlobal(const GlobalObject *GO, SectionKind Kind,
raw_svector_ostream(Name) << '.' << *Prefix;
HasPrefix = true;
}
+ } else if (const auto *GV = dyn_cast<GlobalVariable>(GO)) {
+ if (std::optional<StringRef> Prefix = GV->getSectionPrefix()) {
+ raw_svector_ostream(Name) << '.' << *Prefix;
+ HasPrefix = true;
+ }
}
if (UniqueSectionName) {
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index e6f0d64d071ba..5666f0a53866f 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -1164,22 +1164,6 @@ DenseSet<GlobalValue::GUID> Function::getImportGUIDs() const {
return R;
}
-void Function::setSectionPrefix(StringRef Prefix) {
- MDBuilder MDB(getContext());
- setMetadata(LLVMContext::MD_section_prefix,
- MDB.createFunctionSectionPrefix(Prefix));
-}
-
-std::optional<StringRef> Function::getSectionPrefix() const {
- if (MDNode *MD = getMetadata(LLVMContext::MD_section_prefix)) {
- assert(cast<MDString>(MD->getOperand(0))->getString() ==
- "function_section_prefix" &&
- "Metadata not match");
- return cast<MDString>(MD->getOperand(1))->getString();
- }
- return std::nullopt;
-}
-
bool Function::nullPointerIsDefined() const {
return hasFnAttribute(Attribute::NullPointerIsValid);
}
diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp
index db5e1cb57b1ba..884089262e465 100644
--- a/llvm/lib/IR/Globals.cpp
+++ b/llvm/lib/IR/Globals.cpp
@@ -18,6 +18,7 @@
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
@@ -286,6 +287,35 @@ void GlobalObject::setSection(StringRef S) {
setGlobalObjectFlag(HasSectionHashEntryBit, !S.empty());
}
+void GlobalObject::setSectionPrefix(StringRef Prefix) {
+ MDBuilder MDB(getContext());
+ setMetadata(LLVMContext::MD_section_prefix,
+ MDB.createGlobalObjectSectionPrefix(Prefix));
+}
+
+void GlobalObject::updateSectionPrefix(StringRef Prefix,
+ std::optional<StringRef> KeepPrefix) {
+ auto SectionPrefix = getSectionPrefix();
+ if (SectionPrefix && (*SectionPrefix == Prefix ||
+ (KeepPrefix && *SectionPrefix == *KeepPrefix)))
+ return;
+
+ setSectionPrefix(Prefix);
+ return;
+}
+
+std::optional<StringRef> GlobalObject::getSectionPrefix() const {
+ if (MDNode *MD = getMetadata(LLVMContext::MD_section_prefix)) {
+ [[maybe_unused]] StringRef MDName =
+ cast<MDString>(MD->getOperand(0))->getString();
+ assert((MDName == "section_prefix" ||
+ (isa<Function>(this) && MDName == "function_section_prefix")) &&
+ "Metadata not match");
+ return cast<MDString>(MD->getOperand(1))->getString();
+ }
+ return std::nullopt;
+}
+
bool GlobalValue::isNobuiltinFnDef() const {
const Function *F = dyn_cast<Function>(this);
if (!F || F->empty())
diff --git a/llvm/lib/IR/MDBuilder.cpp b/llvm/lib/IR/MDBuilder.cpp
index 26c8ab9fc36c8..b6aa8844a7eaf 100644
--- a/llvm/lib/IR/MDBuilder.cpp
+++ b/llvm/lib/IR/MDBuilder.cpp
@@ -87,9 +87,9 @@ MDNode *MDBuilder::createFunctionEntryCount(
return MDNode::get(Context, Ops);
}
-MDNode *MDBuilder::createFunctionSectionPrefix(StringRef Prefix) {
- return MDNode::get(
- Context, {createString("function_section_prefix"), createString(Prefix)});
+MDNode *MDBuilder::createGlobalObjectSectionPrefix(StringRef Prefix) {
+ return MDNode::get(Context,
+ {createString("section_prefix"), createString(Prefix)});
}
MDNode *MDBuilder::createRange(const APInt &Lo, const APInt &Hi) {
diff --git a/llvm/test/CodeGen/X86/data-section-prefix.ll b/llvm/test/CodeGen/X86/data-section-prefix.ll
new file mode 100644
index 0000000000000..4812fc70758fb
--- /dev/null
+++ b/llvm/test/CodeGen/X86/data-section-prefix.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple x86_64-linux-gnu -data-sections %s -o - | FileCheck %s --check-prefix=ELF
+; RUN: llc -mtriple x86_64-linux-gnu -unique-section-names=0 -data-sections %s -o - | FileCheck %s --check-prefix=ELF-NOUNIQ
+
+; RUN: llc -mtriple x86_64-windows-msvc -data-sections %s -o - | FileCheck %s --check-prefix=COFF-MSVC
+
+; ELF: .section .data.hot.foo,
+; ELF: .section .data.bar,
+; ELF: .section .bss.unlikely.baz,
+; ELF: .section .bss.quz,
+
+; ELF-NOUNIQ: .section .data.hot.,"aw", at progbits,unique,1
+; ELF-NOUNIQ: .section .data,"aw", at progbits,unique,2
+; ELF-NOUNIQ: .section .bss.unlikely.,"aw", at nobits,unique,3
+; ELF-NOUNIQ: .section .bss,"aw", at nobits,unique,4
+
+; COFF-MSVC: .section .data,"dw",one_only,foo
+; COFF-MSVC: .section .data,"dw",one_only,bar
+; COFF-MSVC: .section .bss,"bw",one_only,baz
+; COFF-MSVC: .section .bss,"bw",one_only,quz
+
+ at foo = global i32 1, !section_prefix !0
+ at bar = global i32 2
+ at baz = global i32 0, !section_prefix !1
+ at quz = global i32 0
+
+!0 = !{!"section_prefix", !"hot"}
+!1 = !{!"section_prefix", !"unlikely"}
diff --git a/llvm/test/CodeGen/X86/global-variable-partition.ll b/llvm/test/CodeGen/X86/global-variable-partition.ll
new file mode 100644
index 0000000000000..d457d766999e2
--- /dev/null
+++ b/llvm/test/CodeGen/X86/global-variable-partition.ll
@@ -0,0 +1,159 @@
+
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \
+; RUN: -partition-static-data-sections=true -data-sections=true \
+; RUN: -unique-section-names=true -relocation-model=pic \
+; RUN: %s -o - 2>&1 | FileCheck %s --check-prefixes=SYM,DATA
+
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \
+; RUN: -partition-static-data-sections=true -data-sections=true \
+; RUN: -unique-section-names=false -relocation-model=pic \
+; RUN: %s -o - 2>&1 | FileCheck %s --check-prefixes=UNIQ,DATA
+
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \
+; RUN: -partition-static-data-sections=true -data-sections=false \
+; RUN: -unique-section-names=false -relocation-model=pic \
+; RUN: %s -o - 2>&1 | FileCheck %s --check-prefixes=AGG,DATA
+
+; SYM: .section .rodata.str1.1.hot.
+; UNIQ: .section .rodata.str1.1.hot.,"aMS", at progbits,1
+; AGG: .section .rodata.str1.1.hot
+; DATA: .L.str
+; DATA: "hot\t"
+; DATA: .L.str.1
+; DATA: "%d\t%d\t%d\n"
+
+
+; SYM: .section .data.rel.ro.hot.hot_relro_array
+; SYM: .section .data.hot.hot_data,"aw", at progbits
+; SYM: .section .bss.hot.hot_bss,"aw", at nobits
+
+; UNIQ: .section .data.rel.ro.hot.,"aw", at progbits,unique,3
+; UNIQ: .section .data.hot.,"aw", at progbits,unique,4
+; UNIQ: .section .bss.hot.,"aw", at nobits,unique,5
+
+; AGG: .section .data.rel.ro.hot.,"aw", at progbits
+; AGG: .section .data.hot.,"aw", at progbits
+; AGG: .section .bss.hot.,"aw", at nobits
+
+
+; SYM: .section .rodata.str1.1.unlikely.,"aMS", at progbits,1
+; UNIQ: section .rodata.str1.1.unlikely.,"aMS", at progbits,1
+; AGG: .section .rodata.str1.1.unlikely.,"aMS", at progbits,1
+; DATA: .L.str.2:
+; DATA: "cold%d\t%d\t%d\n"
+
+
+; SYM: .section .bss.unlikely.cold_bss,"aw", at nobits
+; SYM: .section .data.unlikely.cold_data,"aw", at progbits
+; SYM: .section .data.rel.ro.unlikely.cold_relro_array,"aw", at progbits
+; SYM: .section .bss.unlikely._ZL4bss2,"aw", at nobits
+; SYM: .section .data.unlikely._ZL5data3,"aw", at progbits
+
+; UNIQ: .section .bss.unlikely.,"aw", at nobits,unique,6
+; UNIQ: .section .data.unlikely.,"aw", at progbits,unique,7
+; UNIQ: .section .data.rel.ro.unlikely.,"aw", at progbits,unique,8
+; UNIQ: .section .bss.unlikely.,"aw", at nobits,unique,9
+; UNIQ: .section .data.unlikely.,"aw", at progbits,unique,10
+
+; AGG: .section .bss.unlikely.,"aw", at nobits
+; AGG: .section .data.unlikely.,"aw", at progbits
+; AGG: .section .data.rel.ro.unlikely.,"aw", at progbits
+; AGG: .section .bss.unlikely.,"aw", at nobits
+; AGG: .section .data.unlikely.,"aw", at progbits
+
+ at .str = private unnamed_addr constant [5 x i8] c"hot\09\00", align 1
+ at .str.1 = private unnamed_addr constant [10 x i8] c"%d\09%d\09%d\0A\00", align 1
+ at hot_relro_array = internal constant [2 x ptr] [ptr @_ZL4bss2, ptr @_ZL5data3]
+ at hot_data = internal global i32 5
+ at hot_bss = internal global i32 0
+ at .str.2 = private unnamed_addr constant [14 x i8] c"cold%d\09%d\09%d\0A\00", align 1
+ at cold_bss = internal global i32 0
+ at cold_data = internal global i32 4
+ at cold_relro_array = internal constant [2 x ptr] [ptr @_ZL5data3, ptr @_ZL4bss2]
+ at _ZL4bss2 = internal global i32 0
+ at _ZL5data3 = internal global i32 3
+
+define void @hot_callee(i32 %0) !prof !51 {
+ %2 = call i32 (ptr, ...) @printf(ptr @.str)
+ %3 = srem i32 %0, 2
+ %4 = sext i32 %3 to i64
+ %5 = getelementptr inbounds [2 x ptr], ptr @hot_relro_array, i64 0, i64 %4
+ %6 = load ptr, ptr %5
+ %7 = load i32, ptr %6
+ %8 = load i32, ptr @hot_data
+ %9 = load i32, ptr @hot_bss
+ %10 = call i32 (ptr, ...) @printf(ptr @.str.1, i32 %7, i32 %8, i32 %9)
+ ret void
+}
+
+define void @cold_callee(i32 %0) !prof !52 {
+ %2 = load i32, ptr @cold_bss
+ %3 = load i32, ptr @cold_data
+ %4 = srem i32 %0, 2
+ %5 = sext i32 %4 to i64
+ %6 = getelementptr inbounds [2 x ptr], ptr @cold_relro_array, i64 0, i64 %5
+ %7 = load ptr, ptr %6
+ %8 = load i32, ptr %7
+ %9 = call i32 (ptr, ...) @printf(ptr @.str.2, i32 %2, i32 %3, i32 %8)
+ ret void
+}
+
+define i32 @main(i32 %0, ptr %1) !prof !52 {
+ %3 = call i64 @time(ptr null)
+ %4 = trunc i64 %3 to i32
+ call void @srand(i32 %4)
+ br label %11
+
+5: ; preds = %11
+ %6 = call i32 @rand()
+ store i32 %6, ptr @cold_bss
+ store i32 %6, ptr @cold_data
+ store i32 %6, ptr @_ZL4bss2
+ store i32 %6, ptr @_ZL5data3
+ call void @cold_callee(i32 %6)
+ ret i32 0
+
+11: ; preds = %11, %2
+ %12 = phi i32 [ 0, %2 ], [ %19, %11 ]
+ %13 = call i32 @rand()
+ %14 = srem i32 %13, 2
+ %15 = sext i32 %14 to i64
+ %16 = getelementptr inbounds [2 x ptr], ptr @hot_relro_array, i64 0, i64 %15
+ %17 = load ptr, ptr %16
+ store i32 %13, ptr %17
+ store i32 %13, ptr @hot_data
+ %18 = add i32 %13, 1
+ store i32 %18, ptr @hot_bss
+ call void @hot_callee(i32 %12)
+ %19 = add i32 %12, 1
+ %20 = icmp eq i32 %19, 100000
+ br i1 %20, label %5, label %11, !prof !53
+}
+
+declare void @srand(i32)
+declare i64 @time(ptr)
+declare i32 @rand()
+declare i32 @printf(ptr, ...)
+
+!llvm.module.flags = !{!12}
+
+!12 = !{i32 1, !"ProfileSummary", !13}
+!13 = !{!14, !15, !16, !17, !18, !19, !20, !23}
+!14 = !{!"ProfileFormat", !"InstrProf"}
+!15 = !{!"TotalCount", i64 1460183}
+!16 = !{!"MaxCount", i64 849024}
+!17 = !{!"MaxInternalCount", i64 32769}
+!18 = !{!"MaxFunctionCount", i64 849024}
+!19 = !{!"NumCounts", i64 23627}
+!20 = !{!"NumFunctions", i64 3271}
+!23 = !{!"DetailedSummary", !24}
+!24 = !{!36, !40}
+!36 = !{i32 990000, i64 166, i32 73}
+!40 = !{i32 999999, i64 1, i32 1443}
+!51 = !{!"function_entry_count", i64 100000}
+!52 = !{!"function_entry_count", i64 1}
+!53 = !{!"branch_weights", i32 1, i32 99999}
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/section-samplepgo.ll b/llvm/test/Transforms/CodeGenPrepare/X86/section-samplepgo.ll
index 58af88d8cf365..48d02e5cebc69 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/section-samplepgo.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/section-samplepgo.ll
@@ -34,8 +34,8 @@ define void @cold_func() !prof !16 {
ret void
}
-; CHECK: ![[HOT_ID]] = !{!"function_section_prefix", !"hot"}
-; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !"unlikely"}
+; CHECK: ![[HOT_ID]] = !{!"section_prefix", !"hot"}
+; CHECK: ![[COLD_ID]] = !{!"section_prefix", !"unlikely"}
!llvm.module.flags = !{!1}
!1 = !{i32 1, !"ProfileSummary", !2}
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/section.ll b/llvm/test/Transforms/CodeGenPrepare/X86/section.ll
index 6dad1122e4294..4baa0b5baa4be 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/section.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/section.ll
@@ -66,8 +66,8 @@ define void @cold_func3() !prof !16 {
ret void
}
-; CHECK: ![[HOT_ID]] = !{!"function_section_prefix", !"hot"}
-; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !"unlikely"}
+; CHECK: ![[HOT_ID]] = !{!"section_prefix", !"hot"}
+; CHECK: ![[COLD_ID]] = !{!"section_prefix", !"unlikely"}
!llvm.module.flags = !{!1}
!1 = !{i32 1, !"ProfileSummary", !2}
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
diff --git a/llvm/test/Transforms/HotColdSplit/coldentrycount.ll b/llvm/test/Transforms/HotColdSplit/coldentrycount.ll
index 6e5ef1aa25392..1e8825e651ec4 100644
--- a/llvm/test/Transforms/HotColdSplit/coldentrycount.ll
+++ b/llvm/test/Transforms/HotColdSplit/coldentrycount.ll
@@ -27,9 +27,9 @@ declare void @sink() cold
; CHECK: define {{.*}} @fun.cold.1{{.*}} ![[PROF:[0-9]+]] {{.*}}section_prefix ![[UNLIKELY:[0-9]+]]
; CHECK: ![[HOTPROF]] = !{!"function_entry_count", i64 100}
-; CHECK: ![[LIKELY]] = !{!"function_section_prefix", !"hot"}
+; CHECK: ![[LIKELY]] = !{!"section_prefix", !"hot"}
; CHECK: ![[PROF]] = !{!"function_entry_count", i64 0}
-; CHECK: ![[UNLIKELY]] = !{!"function_section_prefix", !"unlikely"}
+; CHECK: ![[UNLIKELY]] = !{!"section_prefix", !"unlikely"}
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
diff --git a/llvm/test/Transforms/SampleProfile/section-accurate-samplepgo.ll b/llvm/test/Transforms/SampleProfile/section-accurate-samplepgo.ll
index ef2ddbc33cee4..af4b875818f6f 100644
--- a/llvm/test/Transforms/SampleProfile/section-accurate-samplepgo.ll
+++ b/llvm/test/Transforms/SampleProfile/section-accurate-samplepgo.ll
@@ -36,11 +36,11 @@ attributes #1 = { "use-sample-profile" }
; CHECK: ![[NOPROFILE_ID]] = !{!"function_entry_count", i64 -1}
; CHECK: ![[ZERO_ID]] = !{!"function_entry_count", i64 0}
-; CHECK: ![[COLD_ID]] = !{!"function_section_prefix", !"unlikely"}
+; CHECK: ![[COLD_ID]] = !{!"section_prefix", !"unlikely"}
; UNKNOWN: ![[NOPROFILE_ID]] = !{!"function_entry_count", i64 -1}
-; UNKNOWN: ![[UNKNOWN_ID]] = !{!"function_section_prefix", !"unknown"}
+; UNKNOWN: ![[UNKNOWN_ID]] = !{!"section_prefix", !"unknown"}
; ACCURATE: ![[ZERO_ID]] = !{!"function_entry_count", i64 0}
-; ACCURATE: ![[COLD_ID]] = !{!"function_section_prefix", !"unlikely"}
+; ACCURATE: ![[COLD_ID]] = !{!"section_prefix", !"unlikely"}
!llvm.module.flags = !{!1}
!1 = !{i32 1, !"ProfileSummary", !2}
!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
>From 93d9881754b4713a6202011a2e1ffe520cf80367 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Tue, 4 Feb 2025 12:13:52 -0800
Subject: [PATCH 2/7] add comment for bss22 and data3
---
.../CodeGen/X86/global-variable-partition.ll | 22 ++++++++++++-------
1 file changed, 14 insertions(+), 8 deletions(-)
diff --git a/llvm/test/CodeGen/X86/global-variable-partition.ll b/llvm/test/CodeGen/X86/global-variable-partition.ll
index d457d766999e2..bb77f3362406b 100644
--- a/llvm/test/CodeGen/X86/global-variable-partition.ll
+++ b/llvm/test/CodeGen/X86/global-variable-partition.ll
@@ -50,8 +50,8 @@ target triple = "x86_64-unknown-linux-gnu"
; SYM: .section .bss.unlikely.cold_bss,"aw", at nobits
; SYM: .section .data.unlikely.cold_data,"aw", at progbits
; SYM: .section .data.rel.ro.unlikely.cold_relro_array,"aw", at progbits
-; SYM: .section .bss.unlikely._ZL4bss2,"aw", at nobits
-; SYM: .section .data.unlikely._ZL5data3,"aw", at progbits
+; SYM: .section .bss.unlikely.bss2,"aw", at nobits
+; SYM: .section .data.unlikely.data3,"aw", at progbits
; UNIQ: .section .bss.unlikely.,"aw", at nobits,unique,6
; UNIQ: .section .data.unlikely.,"aw", at progbits,unique,7
@@ -67,15 +67,21 @@ target triple = "x86_64-unknown-linux-gnu"
@.str = private unnamed_addr constant [5 x i8] c"hot\09\00", align 1
@.str.1 = private unnamed_addr constant [10 x i8] c"%d\09%d\09%d\0A\00", align 1
- at hot_relro_array = internal constant [2 x ptr] [ptr @_ZL4bss2, ptr @_ZL5data3]
+ at hot_relro_array = internal constant [2 x ptr] [ptr @bss2, ptr @data3]
@hot_data = internal global i32 5
@hot_bss = internal global i32 0
@.str.2 = private unnamed_addr constant [14 x i8] c"cold%d\09%d\09%d\0A\00", align 1
@cold_bss = internal global i32 0
@cold_data = internal global i32 4
- at cold_relro_array = internal constant [2 x ptr] [ptr @_ZL5data3, ptr @_ZL4bss2]
- at _ZL4bss2 = internal global i32 0
- at _ZL5data3 = internal global i32 3
+ at cold_relro_array = internal constant [2 x ptr] [ptr @data3, ptr @bss2]
+
+; COM: Currently static-data-splitter only analyzes access from code.
+; COM: @bss2 and @data3 are indirectly accessed by code through @hot_relro_array
+; COM: and @cold_relro_array.
+; COM: A follow-up item is to analyze access from data and prune the unlikely
+; COM: list.
+ at bss2 = internal global i32 0
+ at data3 = internal global i32 3
define void @hot_callee(i32 %0) !prof !51 {
%2 = call i32 (ptr, ...) @printf(ptr @.str)
@@ -112,8 +118,8 @@ define i32 @main(i32 %0, ptr %1) !prof !52 {
%6 = call i32 @rand()
store i32 %6, ptr @cold_bss
store i32 %6, ptr @cold_data
- store i32 %6, ptr @_ZL4bss2
- store i32 %6, ptr @_ZL5data3
+ store i32 %6, ptr @bss2
+ store i32 %6, ptr @data3
call void @cold_callee(i32 %6)
ret i32 0
>From 8f21570c7d9dd528ad56ff2eefe85af8125ba99c Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Wed, 5 Feb 2025 14:20:12 -0800
Subject: [PATCH 3/7] apply code review suggestions
---
llvm/lib/CodeGen/StaticDataSplitter.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp
index e6e20db0724a5..0716f6dac27f1 100644
--- a/llvm/lib/CodeGen/StaticDataSplitter.cpp
+++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp
@@ -123,11 +123,10 @@ bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) {
for (const auto &MBB : MF) {
for (const MachineInstr &I : MBB) {
for (const MachineOperand &Op : I.operands()) {
- std::optional<uint64_t> Count = std::nullopt;
if (!Op.isJTI() && !Op.isGlobal())
continue;
- Count = MBFI->getBlockProfileCount(&MBB);
+ std::optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
if (Op.isJTI()) {
assert(MJTI != nullptr && "Jump table info is not available.");
>From f07d34d0d02bf9a8c46364cf4e1e605d3b626b92 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Wed, 5 Feb 2025 14:48:39 -0800
Subject: [PATCH 4/7] record global variable section prefix updates as module
updates
---
llvm/include/llvm/IR/GlobalObject.h | 2 +-
llvm/lib/CodeGen/StaticDataSplitter.cpp | 13 +++++++++----
llvm/lib/IR/Globals.cpp | 6 +++---
3 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/llvm/include/llvm/IR/GlobalObject.h b/llvm/include/llvm/IR/GlobalObject.h
index bb50c39813e14..400ea6a1a7fca 100644
--- a/llvm/include/llvm/IR/GlobalObject.h
+++ b/llvm/include/llvm/IR/GlobalObject.h
@@ -129,7 +129,7 @@ class GlobalObject : public GlobalValue {
/// Update the section prefix, unless the existing prefix is the same as
/// `KeepPrefix`.
- void updateSectionPrefix(StringRef Prefix,
+ bool updateSectionPrefix(StringRef Prefix,
std::optional<StringRef> KeepPrefix = std::nullopt);
/// Get the section prefix for this global object.
diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp
index 0716f6dac27f1..bbe3f9ff8cbd3 100644
--- a/llvm/lib/CodeGen/StaticDataSplitter.cpp
+++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp
@@ -59,7 +59,7 @@ class StaticDataSplitter : public MachineFunctionPass {
// Iterate all global variables in the module and update the section prefix
// of the module-internal data.
- void updateGlobalVariableSectionPrefix(MachineFunction &MF);
+ bool updateGlobalVariableSectionPrefix(MachineFunction &MF);
// Accummulated data profile count across machine functions in the module.
DenseMap<const GlobalVariable *, APInt> DataProfileCounts;
@@ -105,6 +105,8 @@ bool StaticDataSplitter::runOnMachineFunction(MachineFunction &MF) {
bool Changed = partitionStaticDataWithProfiles(MF);
+ Changed |= updateGlobalVariableSectionPrefix(MF);
+
updateStatsWithProfiles(MF);
return Changed;
}
@@ -188,8 +190,9 @@ bool StaticDataSplitter::inStaticDataSection(const GlobalVariable *GV,
Kind.isBSS();
}
-void StaticDataSplitter::updateGlobalVariableSectionPrefix(
+bool StaticDataSplitter::updateGlobalVariableSectionPrefix(
MachineFunction &MF) {
+ bool Changed = false;
for (GlobalVariable &GV : MF.getFunction().getParent()->globals()) {
if (GV.isDeclarationForLinker())
continue;
@@ -213,11 +216,13 @@ void StaticDataSplitter::updateGlobalVariableSectionPrefix(
// framework, and set global variable section prefix once per module after
// analyzing all machine functions.
if (PSI->isColdCount(Iter->second.getZExtValue())) {
- GV.updateSectionPrefix("unlikely", std::make_optional(StringRef("hot")));
+ Changed |= GV.updateSectionPrefix("unlikely",
+ std::make_optional(StringRef("hot")));
} else if (PSI->isHotCount(Iter->second.getZExtValue())) {
- GV.updateSectionPrefix("hot");
+ Changed |= GV.updateSectionPrefix("hot");
}
}
+ return Changed;
}
void StaticDataSplitter::updateStatsWithProfiles(const MachineFunction &MF) {
diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp
index 884089262e465..5baf854cd552b 100644
--- a/llvm/lib/IR/Globals.cpp
+++ b/llvm/lib/IR/Globals.cpp
@@ -293,15 +293,15 @@ void GlobalObject::setSectionPrefix(StringRef Prefix) {
MDB.createGlobalObjectSectionPrefix(Prefix));
}
-void GlobalObject::updateSectionPrefix(StringRef Prefix,
+bool GlobalObject::updateSectionPrefix(StringRef Prefix,
std::optional<StringRef> KeepPrefix) {
auto SectionPrefix = getSectionPrefix();
if (SectionPrefix && (*SectionPrefix == Prefix ||
(KeepPrefix && *SectionPrefix == *KeepPrefix)))
- return;
+ return false;
setSectionPrefix(Prefix);
- return;
+ return true;
}
std::optional<StringRef> GlobalObject::getSectionPrefix() const {
>From 4a2a881a66bc99ba28609c05c1a03e432d16694c Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Fri, 7 Feb 2025 17:46:52 -0800
Subject: [PATCH 5/7] remove blank line
---
llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index d20ab29cc1979..6cbc4b9776a1b 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -670,7 +670,6 @@ getELFSectionNameForGlobal(const GlobalObject *GO, SectionKind Kind,
}
bool HasPrefix = false;
-
if (const auto *F = dyn_cast<Function>(GO)) {
// Jump table hotness takes precedence over its enclosing function's hotness
// if it's known. The function's section prefix is used if jump table entry
>From 1f50494cd83b2c222191353050a955dd36beb610 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Mon, 24 Feb 2025 15:51:49 -0800
Subject: [PATCH 6/7] Implement module-wide analysis of global variable
hotness. * In StaticDataProfileInfo.h/cpp, add an immutable pass to keep
track of constants and their profile information across functions in a
module. * Add a module pass, StaticDataAnnotator, to set global variable's
section prefix based on module-wide hotness.
---
.../llvm/Analysis/StaticDataProfileInfo.h | 68 +++++++
llvm/include/llvm/CodeGen/Passes.h | 9 +-
llvm/include/llvm/InitializePasses.h | 2 +
.../llvm/Passes/MachinePassRegistry.def | 1 +
llvm/lib/Analysis/CMakeLists.txt | 1 +
llvm/lib/Analysis/StaticDataProfileInfo.cpp | 50 +++++
llvm/lib/CodeGen/CMakeLists.txt | 1 +
llvm/lib/CodeGen/CodeGen.cpp | 1 +
llvm/lib/CodeGen/StaticDataAnnotator.cpp | 119 ++++++++++++
llvm/lib/CodeGen/StaticDataSplitter.cpp | 103 ++++-------
llvm/lib/CodeGen/TargetPassConfig.cpp | 7 +-
.../CodeGen/X86/global-variable-partition.ll | 173 +++++++++++-------
12 files changed, 392 insertions(+), 143 deletions(-)
create mode 100644 llvm/include/llvm/Analysis/StaticDataProfileInfo.h
create mode 100644 llvm/lib/Analysis/StaticDataProfileInfo.cpp
create mode 100644 llvm/lib/CodeGen/StaticDataAnnotator.cpp
diff --git a/llvm/include/llvm/Analysis/StaticDataProfileInfo.h b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h
new file mode 100644
index 0000000000000..4220f7d820db9
--- /dev/null
+++ b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h
@@ -0,0 +1,68 @@
+#ifndef LLVM_ANALYSIS_STATICDATAPROFILEINFO_H
+#define LLVM_ANALYSIS_STATICDATAPROFILEINFO_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+/// A class that holds the constants that represent static data and their
+/// profile information and provides methods to operate on them.
+class StaticDataProfileInfo {
+public:
+ /// Accummulate the profile count of a constant that will be lowered to static
+ /// data sections.
+ DenseMap<const Constant *, uint64_t> ConstantProfileCounts;
+
+ /// Keeps track of the constants that are seen at least once without profile
+ /// counts.
+ DenseSet<const Constant *> ConstantWithoutCounts;
+
+public:
+ StaticDataProfileInfo() = default;
+
+ /// If \p Count is not nullopt, add it to the profile count of the constant \p
+ /// C in a saturating way, and clamp the count to \p getInstrMaxCountValue if
+ /// the result exceeds it. Otherwise, mark the constant as having no profile
+ /// count.
+ void addConstantProfileCount(const Constant *C,
+ std::optional<uint64_t> Count);
+
+ /// If \p C has a count, return it. Otherwise, return std::nullopt.
+ std::optional<uint64_t> getConstantProfileCount(const Constant *C) const;
+
+ /// Return true if the constant \p C is seen at least once without profiles.
+ bool hasUnknownCount(const Constant *C) const {
+ return ConstantWithoutCounts.count(C);
+ }
+};
+
+/// This wraps the StaticDataProfileInfo object as an immutable pass, for a
+/// backend pass to operate on.
+class StaticDataProfileInfoWrapperPass : public ImmutablePass {
+public:
+ static char ID;
+ StaticDataProfileInfoWrapperPass();
+ bool doInitialization(Module &M) override;
+ bool doFinalization(Module &M) override;
+
+ StaticDataProfileInfo &getStaticDataProfileInfo() { return *Info; }
+ const StaticDataProfileInfo &getStaticDataProfileInfo() const {
+ return *Info;
+ }
+
+ /// This pass provides StaticDataProfileInfo for reads/writes but does not
+ /// modify \p M or other analysis. All analysis are preserved.
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+
+private:
+ std::unique_ptr<StaticDataProfileInfo> Info;
+};
+
+} // namespace llvm
+
+#endif // LLVM_ANALYSIS_STATICDATAPROFILEINFO_H
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index b5d2a7e6bf035..95e4de91b068c 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -71,10 +71,15 @@ namespace llvm {
/// using profile information.
MachineFunctionPass *createMachineFunctionSplitterPass();
- /// createStaticDataSplitterPass - This pass partitions a static data section
- /// into a hot and cold section using profile information.
+ /// createStaticDataSplitterPass - This is a machine-function pass that
+ /// categorizes static data hotness using profile information.
MachineFunctionPass *createStaticDataSplitterPass();
+ /// createStaticDataAnnotatorPASS - This is a module pass that reads from
+ /// StaticDataProfileInfoWrapperPass and annotates the section prefix of
+ /// global variables.
+ ModulePass *createStaticDataAnnotatorPass();
+
/// MachineFunctionPrinter pass - This pass prints out the machine function to
/// the given stream as a debugging tool.
MachineFunctionPass *
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index b8df4d1ecab1d..96c240ab4965f 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -205,6 +205,8 @@ void initializeMachineLoopInfoWrapperPassPass(PassRegistry &);
void initializeMachineModuleInfoWrapperPassPass(PassRegistry &);
void initializeMachineOptimizationRemarkEmitterPassPass(PassRegistry &);
void initializeMachineOutlinerPass(PassRegistry &);
+void initializeStaticDataProfileInfoWrapperPassPass(PassRegistry &);
+void initializeStaticDataAnnotatorPass(PassRegistry &);
void initializeMachinePipelinerPass(PassRegistry &);
void initializeMachinePostDominatorTreeWrapperPassPass(PassRegistry &);
void initializeMachineRegionInfoPassPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index e6b4a4b0a56ae..c5080a324864d 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -207,6 +207,7 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS(
#define DUMMY_MACHINE_MODULE_PASS(NAME, PASS_NAME)
#endif
DUMMY_MACHINE_MODULE_PASS("machine-outliner", MachineOutlinerPass)
+DUMMY_MACHINE_MODULE_PASS("static-data-annotator", StaticDataAnnotator)
DUMMY_MACHINE_MODULE_PASS("pseudo-probe-inserter", PseudoProbeInserterPass)
DUMMY_MACHINE_MODULE_PASS("mir-debugify", DebugifyMachineModule)
DUMMY_MACHINE_MODULE_PASS("mir-check-debugify", CheckDebugMachineModulePass)
diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt
index a44f6c6a135ef..fb2d7a82f670b 100644
--- a/llvm/lib/Analysis/CMakeLists.txt
+++ b/llvm/lib/Analysis/CMakeLists.txt
@@ -126,6 +126,7 @@ add_llvm_component_library(LLVMAnalysis
ScalarEvolutionAliasAnalysis.cpp
ScalarEvolutionDivision.cpp
ScalarEvolutionNormalization.cpp
+ StaticDataProfileInfo.cpp
StackLifetime.cpp
StackSafetyAnalysis.cpp
StructuralHash.cpp
diff --git a/llvm/lib/Analysis/StaticDataProfileInfo.cpp b/llvm/lib/Analysis/StaticDataProfileInfo.cpp
new file mode 100644
index 0000000000000..b124e101f8cdf
--- /dev/null
+++ b/llvm/lib/Analysis/StaticDataProfileInfo.cpp
@@ -0,0 +1,50 @@
+#include "llvm/Analysis/StaticDataProfileInfo.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include <sys/types.h>
+
+using namespace llvm;
+void StaticDataProfileInfo::addConstantProfileCount(
+ const Constant *C, std::optional<uint64_t> Count) {
+ if (!Count) {
+ ConstantWithoutCounts.insert(C);
+ return;
+ }
+ uint64_t &OriginalCount = ConstantProfileCounts[C];
+ OriginalCount += llvm::SaturatingAdd(*Count, OriginalCount);
+ // Clamp the count to getInstrMaxCountValue. InstrFDO reserves a few
+ // large values for special use.
+ if (OriginalCount > getInstrMaxCountValue())
+ OriginalCount = getInstrMaxCountValue();
+}
+
+std::optional<uint64_t>
+StaticDataProfileInfo::getConstantProfileCount(const Constant *C) const {
+ auto I = ConstantProfileCounts.find(C);
+ if (I == ConstantProfileCounts.end())
+ return std::nullopt;
+ return I->second;
+}
+
+bool StaticDataProfileInfoWrapperPass::doInitialization(Module &M) {
+ Info.reset(new StaticDataProfileInfo());
+ return false;
+}
+
+bool StaticDataProfileInfoWrapperPass::doFinalization(Module &M) {
+ Info.reset();
+ return false;
+}
+
+INITIALIZE_PASS(StaticDataProfileInfoWrapperPass, "static-data-profile-info",
+ "Static Data Profile Info", false, true)
+
+StaticDataProfileInfoWrapperPass::StaticDataProfileInfoWrapperPass()
+ : ImmutablePass(ID) {
+ initializeStaticDataProfileInfoWrapperPassPass(
+ *PassRegistry::getPassRegistry());
+}
+
+char StaticDataProfileInfoWrapperPass::ID = 0;
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index 88f863d8204d0..0680e01223e2b 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -227,6 +227,7 @@ add_llvm_component_library(LLVMCodeGen
StackProtector.cpp
StackSlotColoring.cpp
StaticDataSplitter.cpp
+ StaticDataAnnotator.cpp
SwiftErrorValueTracking.cpp
SwitchLoweringUtils.cpp
TailDuplication.cpp
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 35df2a479a545..7d37e1ce5ce8b 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -132,6 +132,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeStackProtectorPass(Registry);
initializeStackSlotColoringLegacyPass(Registry);
initializeStaticDataSplitterPass(Registry);
+ initializeStaticDataAnnotatorPass(Registry);
initializeStripDebugMachineModulePass(Registry);
initializeTailDuplicateLegacyPass(Registry);
initializeTargetPassConfigPass(Registry);
diff --git a/llvm/lib/CodeGen/StaticDataAnnotator.cpp b/llvm/lib/CodeGen/StaticDataAnnotator.cpp
new file mode 100644
index 0000000000000..04d918585f8af
--- /dev/null
+++ b/llvm/lib/CodeGen/StaticDataAnnotator.cpp
@@ -0,0 +1,119 @@
+//===- StaticDataAnnotator - Annotate static data's section prefix --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// To reason about module-wide data hotness in a module granularity, this file
+// implements a module pass StaticDataAnnotator to work coordinately with the
+// StaticDataSplitter pass.
+//
+// The StaticDataSplitter pass is a machine function pass. It analyzes data
+// hotness based on code and adds counters in the StaticDataProfileInfo.
+// The StaticDataAnnotator pass is a module pass. It iterates global variables
+// in the module, looks up counters from StaticDataProfileInfo and sets the
+// section prefix based on profiles.
+//
+// The three-pass structure is implemented for practical reasons, to work around
+// the limitation that a module pass based on legacy pass manager cannot make
+// use of MachineBlockFrequencyInfo analysis. In the future, we can consider
+// porting the StaticDataSplitter pass to a module-pass using the new pass
+// manager framework. That way, analysis are lazily computed as opposed to
+// eagerly scheduled, and a module pass can use MachineBlockFrequencyInfo.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/StaticDataProfileInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Analysis.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "static-data-annotator"
+
+using namespace llvm;
+
+class StaticDataAnnotator : public ModulePass {
+public:
+ static char ID;
+
+ StaticDataProfileInfo *SDPI = nullptr;
+ const ProfileSummaryInfo *PSI = nullptr;
+
+ StaticDataAnnotator() : ModulePass(ID) {
+ initializeStaticDataAnnotatorPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<StaticDataProfileInfoWrapperPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ AU.setPreservesAll();
+ ModulePass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override { return "Static Data Annotator"; }
+
+ bool runOnModule(Module &M) override;
+};
+
+// Returns true if the global variable already has a section prefix that is the
+// same as `Prefix`.
+static bool alreadyHasSectionPrefix(const GlobalVariable &GV,
+ StringRef Prefix) {
+ std::optional<StringRef> SectionPrefix = GV.getSectionPrefix();
+ return SectionPrefix && (*SectionPrefix == Prefix);
+}
+
+bool StaticDataAnnotator::runOnModule(Module &M) {
+ SDPI = &getAnalysis<StaticDataProfileInfoWrapperPass>()
+ .getStaticDataProfileInfo();
+ PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+
+ if (!PSI->hasProfileSummary())
+ return false;
+
+ bool Changed = false;
+ for (auto &GV : M.globals()) {
+ if (GV.isDeclarationForLinker())
+ continue;
+
+ // Skip global variables without profile counts. The module may not be
+ // profiled or instrumented.
+ auto Count = SDPI->getConstantProfileCount(&GV);
+ if (!Count)
+ continue;
+
+ if (PSI->isHotCount(*Count) && !alreadyHasSectionPrefix(GV, "hot")) {
+ // The variable counter is hot, set 'hot' section prefix if the section
+ // prefix isn't hot already.
+ GV.setSectionPrefix("hot");
+ Changed = true;
+ } else if (PSI->isColdCount(*Count) && !SDPI->hasUnknownCount(&GV) &&
+ !alreadyHasSectionPrefix(GV, "unlikely")) {
+ // The variable counter is cold, set 'unlikely' section prefix when
+ // 1) the section prefix isn't unlikely already, and
+ // 2) the variable is not seen without profile counts. The reason is that
+ // a variable without profile counts doesn't have all its uses profiled,
+ // for example when a function is not instrumented, or not sampled (new
+ // code paths).
+ GV.setSectionPrefix("unlikely");
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+char StaticDataAnnotator::ID = 0;
+
+INITIALIZE_PASS(StaticDataAnnotator, DEBUG_TYPE, "Static Data Annotator", false,
+ false)
+
+ModulePass *llvm::createStaticDataAnnotatorPass() {
+ return new StaticDataAnnotator();
+}
diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp
index aaf898e9b59c6..c647c3075d79c 100644
--- a/llvm/lib/CodeGen/StaticDataSplitter.cpp
+++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/StaticDataProfileInfo.h"
#include "llvm/CodeGen/MBFIWrapper.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -30,9 +31,7 @@
#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
-#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
using namespace llvm;
@@ -49,6 +48,7 @@ class StaticDataSplitter : public MachineFunctionPass {
const MachineBranchProbabilityInfo *MBPI = nullptr;
const MachineBlockFrequencyInfo *MBFI = nullptr;
const ProfileSummaryInfo *PSI = nullptr;
+ StaticDataProfileInfo *SDPI = nullptr;
// If the global value is a local linkage global variable, return it.
// Otherwise, return nullptr.
@@ -58,19 +58,16 @@ class StaticDataSplitter : public MachineFunctionPass {
// .data.rel.ro} sections.
bool inStaticDataSection(const GlobalVariable *GV, const TargetMachine &TM);
- // Iterate all global variables in the module and update the section prefix
- // of the module-internal data.
- bool updateGlobalVariableSectionPrefix(MachineFunction &MF);
+ // Use profiles to partition static data.
+ bool partitionStaticDataWithProfiles(MachineFunction &MF);
- // Accummulated data profile count across machine functions in the module.
- DenseMap<const GlobalVariable *, uint64_t> DataProfileCounts;
- // Update LLVM statistics for a machine function without profiles.
- void updateStatsWithoutProfiles(const MachineFunction &MF);
// Update LLVM statistics for a machine function with profiles.
void updateStatsWithProfiles(const MachineFunction &MF);
- // Use profiles to partition static data.
- bool partitionStaticDataWithProfiles(MachineFunction &MF);
+ // Update LLVM statistics for a machine function without profiles.
+ void updateStatsWithoutProfiles(const MachineFunction &MF);
+
+ void annotateStaticDataWithoutProfiles(const MachineFunction &MF);
public:
static char ID;
@@ -86,6 +83,7 @@ class StaticDataSplitter : public MachineFunctionPass {
AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
+ AU.addRequired<StaticDataProfileInfoWrapperPass>();
}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -96,18 +94,20 @@ bool StaticDataSplitter::runOnMachineFunction(MachineFunction &MF) {
MBFI = &getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI();
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ SDPI = &getAnalysis<StaticDataProfileInfoWrapperPass>()
+ .getStaticDataProfileInfo();
+
const bool ProfileAvailable = PSI && PSI->hasProfileSummary() && MBFI &&
MF.getFunction().hasProfileData();
if (!ProfileAvailable) {
+ annotateStaticDataWithoutProfiles(MF);
updateStatsWithoutProfiles(MF);
return false;
}
bool Changed = partitionStaticDataWithProfiles(MF);
- Changed |= updateGlobalVariableSectionPrefix(MF);
-
updateStatsWithProfiles(MF);
return Changed;
}
@@ -158,18 +158,7 @@ bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) {
if (!GV || GV->getName().starts_with("llvm.") ||
!inStaticDataSection(GV, TM))
continue;
-
- // Acccumulate data profile count across machine function
- // instructions.
- // TODO: Analyze global variable's initializers.
- if (Count) {
- uint64_t &GVCount = DataProfileCounts[GV];
- GVCount = llvm::SaturatingAdd(GVCount, *Count);
- // Clamp the count to getInstrMaxCountValue. InstrFDO reserves a few
- // large values for special use.
- if (GVCount > getInstrMaxCountValue())
- GVCount = getInstrMaxCountValue();
- }
+ SDPI->addConstantProfileCount(GV, Count);
}
}
}
@@ -194,51 +183,6 @@ bool StaticDataSplitter::inStaticDataSection(const GlobalVariable *GV,
Kind.isBSS();
}
-bool StaticDataSplitter::updateGlobalVariableSectionPrefix(
- MachineFunction &MF) {
- bool Changed = false;
- for (GlobalVariable &GV : MF.getFunction().getParent()->globals()) {
- if (GV.isDeclarationForLinker())
- continue;
- // DataProfileCounts accumulates data profile count across all machine
- // function instructions, and it can't model the indirect accesses through
- // other global variables' initializers.
- // TODO: Analyze the users of module-internal global variables and see
- // through the users' initializers. Do not place a global variable into
- // unlikely section if any of its users are potentially hot.
- auto Iter = DataProfileCounts.find(&GV);
- if (Iter == DataProfileCounts.end())
- continue;
-
- const std::optional<StringRef> Prefix = GV.getSectionPrefix();
-
- // StaticDataSplitter is made a machine function pass rather than a module
- // pass because (Lazy)MachineBlockFrequencyInfo is a machine-function
- // analysis pass and cannot be used for a legacy module pass.
- // As a result, we use `DataProfileCounts` to accumulate data
- // profile count across machine functions and update global variable section
- // prefix once per machine function.
- // FIXME: Make StaticDataSplitter a module pass under new pass manager
- // framework, and set global variable section prefix once per module after
- // analyzing all machine functions.
- if (PSI->isColdCount(Iter->second)) {
- assert((!Prefix || *Prefix != "hot") &&
- "Count monotonically increased so a hot variable won't become "
- "cold again.");
- if (!Prefix || *Prefix != "unlikely") {
- GV.setSectionPrefix("unlikely");
- Changed |= true;
- }
- } else if (PSI->isHotCount(Iter->second)) {
- if (!Prefix || *Prefix != "hot") {
- GV.setSectionPrefix("hot");
- Changed |= true;
- }
- }
- }
- return Changed;
-}
-
void StaticDataSplitter::updateStatsWithProfiles(const MachineFunction &MF) {
if (!AreStatisticsEnabled())
return;
@@ -257,6 +201,24 @@ void StaticDataSplitter::updateStatsWithProfiles(const MachineFunction &MF) {
}
}
+void StaticDataSplitter::annotateStaticDataWithoutProfiles(
+ const MachineFunction &MF) {
+ for (const auto &MBB : MF) {
+ for (const MachineInstr &I : MBB) {
+ for (const MachineOperand &Op : I.operands()) {
+ if (!Op.isGlobal())
+ continue;
+ const GlobalVariable *GV =
+ getLocalLinkageGlobalVariable(Op.getGlobal());
+ if (!GV || GV->getName().starts_with("llvm.") ||
+ !inStaticDataSection(GV, MF.getTarget()))
+ continue;
+ SDPI->addConstantProfileCount(GV, std::nullopt);
+ }
+ }
+ }
+}
+
void StaticDataSplitter::updateStatsWithoutProfiles(const MachineFunction &MF) {
if (!AreStatisticsEnabled())
return;
@@ -273,6 +235,7 @@ INITIALIZE_PASS_BEGIN(StaticDataSplitter, DEBUG_TYPE, "Split static data",
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(StaticDataProfileInfoWrapperPass)
INITIALIZE_PASS_END(StaticDataSplitter, DEBUG_TYPE, "Split static data", false,
false)
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 5d9da9df9092a..7f89043c0b20c 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -1257,8 +1257,13 @@ void TargetPassConfig::addMachinePasses() {
}
}
addPass(createMachineFunctionSplitterPass());
- if (SplitStaticData || TM->Options.EnableStaticDataPartitioning)
+ if (SplitStaticData || TM->Options.EnableStaticDataPartitioning) {
+ // The static data splitter pass is a machine function pass. and
+ // static data annotator pass is a module-wide pass. See the file comment
+ // in StaticDataAnnotator.cpp for the motivation.
addPass(createStaticDataSplitterPass());
+ addPass(createStaticDataAnnotatorPass());
+ }
}
// We run the BasicBlockSections pass if either we need BB sections or BB
// address map (or both).
diff --git a/llvm/test/CodeGen/X86/global-variable-partition.ll b/llvm/test/CodeGen/X86/global-variable-partition.ll
index bb77f3362406b..b216047a5ea66 100644
--- a/llvm/test/CodeGen/X86/global-variable-partition.ll
+++ b/llvm/test/CodeGen/X86/global-variable-partition.ll
@@ -1,23 +1,35 @@
-
+; The static-data-splitter processes data from @cold_func first,
+; @unprofiled_func secondly, and @hot_func after the two functions above.
+; Tests that data hotness is based on aggregated module-wide profile
+; information. This way linker-mergable data is emitted once per module.
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
+; The three RUN commands set `-relocation-model=pic` so `hot_relro_array` and
+; `cold_relro_array` are placed in the .data.rel.ro-prefixed section.
+
+; This RUN command sets `-data-sections=true -unique-section-names=true` so data
+; sections are uniqufied by numbers.
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \
; RUN: -partition-static-data-sections=true -data-sections=true \
; RUN: -unique-section-names=true -relocation-model=pic \
; RUN: %s -o - 2>&1 | FileCheck %s --check-prefixes=SYM,DATA
+; This RUN command sets `-data-sections=true -unique-section-names=false` so
+; data sections are uniqufied by variable names.
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \
; RUN: -partition-static-data-sections=true -data-sections=true \
; RUN: -unique-section-names=false -relocation-model=pic \
; RUN: %s -o - 2>&1 | FileCheck %s --check-prefixes=UNIQ,DATA
+; This RUN command sets `-data-sections=false -unique-section-names=false`.
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \
; RUN: -partition-static-data-sections=true -data-sections=false \
; RUN: -unique-section-names=false -relocation-model=pic \
; RUN: %s -o - 2>&1 | FileCheck %s --check-prefixes=AGG,DATA
+; For @.str and @.str.1
; SYM: .section .rodata.str1.1.hot.
; UNIQ: .section .rodata.str1.1.hot.,"aMS", at progbits,1
; AGG: .section .rodata.str1.1.hot
@@ -26,45 +38,66 @@ target triple = "x86_64-unknown-linux-gnu"
; DATA: .L.str.1
; DATA: "%d\t%d\t%d\n"
+; For @hot_relro_array
+; SYM: .section .data.rel.ro.hot.hot_relro_array
+; UNIQ: .section .data.rel.ro.hot.,"aw", at progbits,unique,3
+; AGG: .section .data.rel.ro.hot.,"aw", at progbits
-; SYM: .section .data.rel.ro.hot.hot_relro_array
+; For @hot_data, which is accessed by {cold_func, unprofiled_func, hot_func}.
; SYM: .section .data.hot.hot_data,"aw", at progbits
-; SYM: .section .bss.hot.hot_bss,"aw", at nobits
-
-; UNIQ: .section .data.rel.ro.hot.,"aw", at progbits,unique,3
; UNIQ: .section .data.hot.,"aw", at progbits,unique,4
-; UNIQ: .section .bss.hot.,"aw", at nobits,unique,5
-
-; AGG: .section .data.rel.ro.hot.,"aw", at progbits
; AGG: .section .data.hot.,"aw", at progbits
-; AGG: .section .bss.hot.,"aw", at nobits
+; For @hot_bss, which is accessed by {unprofiled_func, hot_func}.
+; SYM: .section .bss.hot.hot_bss,"aw", at nobits
+; UNIQ: .section .bss.hot.,"aw", at nobits,unique,5
+; AGG: .section .bss.hot.,"aw", at nobits
+; For @.str.2
; SYM: .section .rodata.str1.1.unlikely.,"aMS", at progbits,1
; UNIQ: section .rodata.str1.1.unlikely.,"aMS", at progbits,1
; AGG: .section .rodata.str1.1.unlikely.,"aMS", at progbits,1
; DATA: .L.str.2:
; DATA: "cold%d\t%d\t%d\n"
-
+; For @cold_bss
; SYM: .section .bss.unlikely.cold_bss,"aw", at nobits
-; SYM: .section .data.unlikely.cold_data,"aw", at progbits
-; SYM: .section .data.rel.ro.unlikely.cold_relro_array,"aw", at progbits
-; SYM: .section .bss.unlikely.bss2,"aw", at nobits
-; SYM: .section .data.unlikely.data3,"aw", at progbits
-
; UNIQ: .section .bss.unlikely.,"aw", at nobits,unique,6
-; UNIQ: .section .data.unlikely.,"aw", at progbits,unique,7
-; UNIQ: .section .data.rel.ro.unlikely.,"aw", at progbits,unique,8
-; UNIQ: .section .bss.unlikely.,"aw", at nobits,unique,9
-; UNIQ: .section .data.unlikely.,"aw", at progbits,unique,10
-
; AGG: .section .bss.unlikely.,"aw", at nobits
+
+; For @cold_data
+; SYM: .section .data.unlikely.cold_data,"aw", at progbits
+; UNIQ: .section .data.unlikely.,"aw", at progbits,unique,7
; AGG: .section .data.unlikely.,"aw", at progbits
+
+; For @cold_relro_array
+; SYM: .section .data.rel.ro.unlikely.cold_relro_array,"aw", at progbits
+; UNIQ: .section .data.rel.ro.unlikely.,"aw", at progbits,unique,8
; AGG: .section .data.rel.ro.unlikely.,"aw", at progbits
+
+; Currently static-data-splitter only analyzes access from code.
+; @bss2 and @data3 are indirectly accessed by code through @hot_relro_array
+; and @cold_relro_array. A follow-up item is to analyze indirect access via data
+; and prune the unlikely list.
+; For @bss2
+; SYM: .section .bss.unlikely.bss2,"aw", at nobits
+; UNIQ: .section .bss.unlikely.,"aw", at nobits,unique,9
; AGG: .section .bss.unlikely.,"aw", at nobits
+
+; For @data3
+; SYM: .section .data.unlikely.data3,"aw", at progbits
+; UNIQ: .section .data.unlikely.,"aw", at progbits,unique,10
; AGG: .section .data.unlikely.,"aw", at progbits
+; For @data_with_unknown_hotness
+; SYM: .type .Ldata_with_unknown_hotness, at object # @data_with_unknown_hotness
+; SYM: .section .data..Ldata_with_unknown_hotness,"aw", at progbits
+; UNIQ: .section .data,"aw", at progbits,unique,11
+; The `.section` directive is omitted for .data with -unique-section-names=false.
+; See MCSectionELF::shouldOmitSectionDirective for the implementation details.
+; AGG: .data
+; DATA: .Ldata_with_unknown_hotness:
+
@.str = private unnamed_addr constant [5 x i8] c"hot\09\00", align 1
@.str.1 = private unnamed_addr constant [10 x i8] c"%d\09%d\09%d\0A\00", align 1
@hot_relro_array = internal constant [2 x ptr] [ptr @bss2, ptr @data3]
@@ -74,29 +107,11 @@ target triple = "x86_64-unknown-linux-gnu"
@cold_bss = internal global i32 0
@cold_data = internal global i32 4
@cold_relro_array = internal constant [2 x ptr] [ptr @data3, ptr @bss2]
-
-; COM: Currently static-data-splitter only analyzes access from code.
-; COM: @bss2 and @data3 are indirectly accessed by code through @hot_relro_array
-; COM: and @cold_relro_array.
-; COM: A follow-up item is to analyze access from data and prune the unlikely
-; COM: list.
@bss2 = internal global i32 0
@data3 = internal global i32 3
+ at data_with_unknown_hotness = private global i32 5
-define void @hot_callee(i32 %0) !prof !51 {
- %2 = call i32 (ptr, ...) @printf(ptr @.str)
- %3 = srem i32 %0, 2
- %4 = sext i32 %3 to i64
- %5 = getelementptr inbounds [2 x ptr], ptr @hot_relro_array, i64 0, i64 %4
- %6 = load ptr, ptr %5
- %7 = load i32, ptr %6
- %8 = load i32, ptr @hot_data
- %9 = load i32, ptr @hot_bss
- %10 = call i32 (ptr, ...) @printf(ptr @.str.1, i32 %7, i32 %8, i32 %9)
- ret void
-}
-
-define void @cold_callee(i32 %0) !prof !52 {
+define void @cold_func(i32 %0) !prof !15 {
%2 = load i32, ptr @cold_bss
%3 = load i32, ptr @cold_data
%4 = srem i32 %0, 2
@@ -104,14 +119,34 @@ define void @cold_callee(i32 %0) !prof !52 {
%6 = getelementptr inbounds [2 x ptr], ptr @cold_relro_array, i64 0, i64 %5
%7 = load ptr, ptr %6
%8 = load i32, ptr %7
- %9 = call i32 (ptr, ...) @printf(ptr @.str.2, i32 %2, i32 %3, i32 %8)
+ %9 = load i32, ptr @data_with_unknown_hotness
+ %11 = load i32, ptr @hot_data
+ %12 = call i32 (...) @func_taking_arbitrary_param(ptr @.str.2, i32 %2, i32 %3, i32 %8, i32 %9, i32 %11)
+ ret void
+}
+
+define i32 @unprofiled_func() {
+ %a = load i32, ptr @data_with_unknown_hotness
+ %b = load i32, ptr @hot_data
+ %c = load i32, ptr @hot_bss
+ %ret = call i32 (...) @func_taking_arbitrary_param(i32 %a, i32 %b, i32 %c)
+ ret i32 %ret
+}
+
+define void @hot_func(i32 %0) !prof !14 {
+ %2 = call i32 (...) @func_taking_arbitrary_param(ptr @.str)
+ %3 = srem i32 %0, 2
+ %4 = sext i32 %3 to i64
+ %5 = getelementptr inbounds [2 x ptr], ptr @hot_relro_array, i64 0, i64 %4
+ %6 = load ptr, ptr %5
+ %7 = load i32, ptr %6
+ %8 = load i32, ptr @hot_data
+ %9 = load i32, ptr @hot_bss
+ %10 = call i32 (...) @func_taking_arbitrary_param(ptr @.str.1, i32 %7, i32 %8, i32 %9)
ret void
}
-define i32 @main(i32 %0, ptr %1) !prof !52 {
- %3 = call i64 @time(ptr null)
- %4 = trunc i64 %3 to i32
- call void @srand(i32 %4)
+define i32 @main(i32 %0, ptr %1) !prof !15 {
br label %11
5: ; preds = %11
@@ -120,7 +155,7 @@ define i32 @main(i32 %0, ptr %1) !prof !52 {
store i32 %6, ptr @cold_data
store i32 %6, ptr @bss2
store i32 %6, ptr @data3
- call void @cold_callee(i32 %6)
+ call void @cold_func(i32 %6)
ret i32 0
11: ; preds = %11, %2
@@ -134,32 +169,30 @@ define i32 @main(i32 %0, ptr %1) !prof !52 {
store i32 %13, ptr @hot_data
%18 = add i32 %13, 1
store i32 %18, ptr @hot_bss
- call void @hot_callee(i32 %12)
+ call void @hot_func(i32 %12)
%19 = add i32 %12, 1
%20 = icmp eq i32 %19, 100000
- br i1 %20, label %5, label %11, !prof !53
+ br i1 %20, label %5, label %11, !prof !16
}
-declare void @srand(i32)
-declare i64 @time(ptr)
declare i32 @rand()
-declare i32 @printf(ptr, ...)
-
-!llvm.module.flags = !{!12}
-
-!12 = !{i32 1, !"ProfileSummary", !13}
-!13 = !{!14, !15, !16, !17, !18, !19, !20, !23}
-!14 = !{!"ProfileFormat", !"InstrProf"}
-!15 = !{!"TotalCount", i64 1460183}
-!16 = !{!"MaxCount", i64 849024}
-!17 = !{!"MaxInternalCount", i64 32769}
-!18 = !{!"MaxFunctionCount", i64 849024}
-!19 = !{!"NumCounts", i64 23627}
-!20 = !{!"NumFunctions", i64 3271}
-!23 = !{!"DetailedSummary", !24}
-!24 = !{!36, !40}
-!36 = !{i32 990000, i64 166, i32 73}
-!40 = !{i32 999999, i64 1, i32 1443}
-!51 = !{!"function_entry_count", i64 100000}
-!52 = !{!"function_entry_count", i64 1}
-!53 = !{!"branch_weights", i32 1, i32 99999}
+declare i32 @func_taking_arbitrary_param(...)
+
+!llvm.module.flags = !{!1}
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 1460183}
+!5 = !{!"MaxCount", i64 849024}
+!6 = !{!"MaxInternalCount", i64 32769}
+!7 = !{!"MaxFunctionCount", i64 849024}
+!8 = !{!"NumCounts", i64 23627}
+!9 = !{!"NumFunctions", i64 3271}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13}
+!12 = !{i32 990000, i64 166, i32 73}
+!13 = !{i32 999999, i64 3, i32 1443}
+!14 = !{!"function_entry_count", i64 100000}
+!15 = !{!"function_entry_count", i64 1}
+!16 = !{!"branch_weights", i32 1, i32 99999}
>From 9302b2b708cdce89add071fa584c12ddb4f9d71d Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Thu, 20 Mar 2025 13:57:37 -0700
Subject: [PATCH 7/7] port code de-duplication based on feedback in the follow
up patch (https://github.com/llvm/llvm-project/pull/129781)
---
llvm/lib/CodeGen/StaticDataSplitter.cpp | 53 +++++++++++++------------
1 file changed, 28 insertions(+), 25 deletions(-)
diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp
index c647c3075d79c..77778556ce44e 100644
--- a/llvm/lib/CodeGen/StaticDataSplitter.cpp
+++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp
@@ -58,6 +58,11 @@ class StaticDataSplitter : public MachineFunctionPass {
// .data.rel.ro} sections.
bool inStaticDataSection(const GlobalVariable *GV, const TargetMachine &TM);
+ // Returns the constant if the operand refers to a global variable or constant
+ // that gets lowered to static data sections. Otherwise, return nullptr.
+ const Constant *getConstant(const MachineOperand &Op,
+ const TargetMachine &TM);
+
// Use profiles to partition static data.
bool partitionStaticDataWithProfiles(MachineFunction &MF);
@@ -84,6 +89,8 @@ class StaticDataSplitter : public MachineFunctionPass {
AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<StaticDataProfileInfoWrapperPass>();
+ // This pass does not modify the CFG.
+ AU.setPreservesCFG();
}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -112,6 +119,20 @@ bool StaticDataSplitter::runOnMachineFunction(MachineFunction &MF) {
return Changed;
}
+const Constant *StaticDataSplitter::getConstant(const MachineOperand &Op,
+ const TargetMachine &TM) {
+ if (!Op.isGlobal())
+ return nullptr;
+
+ // Find global variables with local linkage.
+ const GlobalVariable *GV = getLocalLinkageGlobalVariable(Op.getGlobal());
+ // Skip 'llvm.'-prefixed global variables conservatively because they are
+ // often handled specially, and skip those not in static data sections.
+ if (!GV || GV->getName().starts_with("llvm.") || !inStaticDataSection(GV, TM))
+ return nullptr;
+ return GV;
+}
+
bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) {
int NumChangedJumpTables = 0;
@@ -148,17 +169,8 @@ bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) {
if (MJTI->updateJumpTableEntryHotness(JTI, Hotness))
++NumChangedJumpTables;
- } else {
- // Find global variables with local linkage.
- const GlobalVariable *GV =
- getLocalLinkageGlobalVariable(Op.getGlobal());
- // Skip 'special' global variables conservatively because they are
- // often handled specially, and skip those not in static data
- // sections.
- if (!GV || GV->getName().starts_with("llvm.") ||
- !inStaticDataSection(GV, TM))
- continue;
- SDPI->addConstantProfileCount(GV, Count);
+ } else if (const Constant *C = getConstant(Op, TM)) {
+ SDPI->addConstantProfileCount(C, Count);
}
}
}
@@ -203,20 +215,11 @@ void StaticDataSplitter::updateStatsWithProfiles(const MachineFunction &MF) {
void StaticDataSplitter::annotateStaticDataWithoutProfiles(
const MachineFunction &MF) {
- for (const auto &MBB : MF) {
- for (const MachineInstr &I : MBB) {
- for (const MachineOperand &Op : I.operands()) {
- if (!Op.isGlobal())
- continue;
- const GlobalVariable *GV =
- getLocalLinkageGlobalVariable(Op.getGlobal());
- if (!GV || GV->getName().starts_with("llvm.") ||
- !inStaticDataSection(GV, MF.getTarget()))
- continue;
- SDPI->addConstantProfileCount(GV, std::nullopt);
- }
- }
- }
+ for (const auto &MBB : MF)
+ for (const MachineInstr &I : MBB)
+ for (const MachineOperand &Op : I.operands())
+ if (const Constant *C = getConstant(Op, MF.getTarget()))
+ SDPI->addConstantProfileCount(C, std::nullopt);
}
void StaticDataSplitter::updateStatsWithoutProfiles(const MachineFunction &MF) {
More information about the llvm-commits
mailing list