[llvm] Insert symbols for prefetch targets read from basic blocks section profile. (PR #168439)
Rahman Lavaee via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 19 13:47:56 PST 2025
https://github.com/rlavaee updated https://github.com/llvm/llvm-project/pull/168439
>From 49c5f229c8f4829551462bc1ad335c752d0fc643 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Sat, 8 Nov 2025 19:54:21 +0000
Subject: [PATCH 01/24] feat(AsmPrinter): Add support for emitting prefetch
target symbols
---
.../CodeGen/BasicBlockSectionsProfileReader.h | 45 ++++++++++++++++-
llvm/include/llvm/CodeGen/MachineBasicBlock.h | 24 +++++++++
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 50 ++++++++++++++++++-
.../BasicBlockSectionsProfileReader.cpp | 44 ++++++++++++++++
llvm/lib/CodeGen/MachineBasicBlock.cpp | 13 +++++
5 files changed, 174 insertions(+), 2 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index ee1f28377f7e4..5b230db30aec4 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -42,6 +42,17 @@ struct BBClusterInfo {
unsigned PositionInCluster;
};
+struct BBPosition {
+ UniqueBBID BBID;
+ unsigned BBOffset;
+};
+
+struct PrefetchHint {
+ BBPosition SitePosition;
+ StringRef TargetFunctionName;
+ BBPosition TargetPosition;
+};
+
// This represents the raw input profile for one function.
struct FunctionPathAndClusterInfo {
// BB Cluster information specified by `UniqueBBID`s.
@@ -50,9 +61,11 @@ struct FunctionPathAndClusterInfo {
// the edge a -> b (a is not cloned). The index of the path in this vector
// determines the `UniqueBBID::CloneID` of the cloned blocks in that path.
SmallVector<SmallVector<unsigned>> ClonePaths;
+ SmallVector<PrefetchHint> PrefetchHints;
+ DenseSet<BBPosition> PrefetchTargets;
// Node counts for each basic block.
DenseMap<UniqueBBID, uint64_t> NodeCounts;
- // Edge counts for each edge, stored as a nested map.
+ // Edge counts for each edge.
DenseMap<UniqueBBID, DenseMap<UniqueBBID, uint64_t>> EdgeCounts;
// Hash for each basic block. The Hashes are stored for every original block
// (not cloned blocks), hence the map key being unsigned instead of
@@ -60,6 +73,27 @@ struct FunctionPathAndClusterInfo {
DenseMap<unsigned, uint64_t> BBHashes;
};
+// Provides DenseMapInfo BBPosition.
+template <> struct DenseMapInfo<BBPosition> {
+ static inline BBPosition getEmptyKey() {
+ return {DenseMapInfo<UniqueBBID>::getEmptyKey(),
+ DenseMapInfo<unsigned>::getEmptyKey()};
+ }
+ static inline BBPosition getTombstoneKey() {
+ return BBPosition{DenseMapInfo<UniqueBBID>::getTombstoneKey(),
+ DenseMapInfo<unsigned>::getTombstoneKey()};
+ }
+ static unsigned getHashValue(const BBPosition &Val) {
+ std::pair<unsigned, unsigned> PairVal = std::make_pair(
+ DenseMapInfo<UniqueBBID>::getHashValue(Val.BBID), Val.BBOffset);
+ return DenseMapInfo<std::pair<unsigned, unsigned>>::getHashValue(PairVal);
+ }
+ static bool isEqual(const BBPosition &LHS, const BBPosition &RHS) {
+ return DenseMapInfo<UniqueBBID>::isEqual(LHS.BBID, RHS.BBID) &&
+ DenseMapInfo<unsigned>::isEqual(LHS.BBOffset, RHS.BBOffset);
+ }
+};
+
class BasicBlockSectionsProfileReader {
public:
friend class BasicBlockSectionsProfileReaderWrapperPass;
@@ -86,6 +120,11 @@ class BasicBlockSectionsProfileReader {
uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
const UniqueBBID &SinkBBID) const;
+ SmallVector<PrefetchHint>
+ getPrefetchHintsForFunction(StringRef FuncName) const;
+
+ DenseSet<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
+
private:
StringRef getAliasName(StringRef FuncName) const {
auto R = FuncAliasMap.find(FuncName);
@@ -194,6 +233,10 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
const UniqueBBID &DestBBID) const;
+ SmallVector<PrefetchHint>
+ getPrefetchHintsForFunction(StringRef FuncName) const;
+
+ DenseSet<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
// Initializes the FunctionNameToDIFilename map for the current module and
// then reads the profile for the matching functions.
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index fcf7bab09fcff..e6c6bc26ae9e6 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -100,6 +100,12 @@ template <> struct DenseMapInfo<MBBSectionID> {
}
};
+struct PrefetchTarget {
+ StringRef TargetFunction;
+ UniqueBBID TargetBBID;
+ unsigned TargetBBOffset;
+};
+
template <> struct ilist_traits<MachineInstr> {
private:
friend class MachineBasicBlock; // Set by the owning MachineBasicBlock.
@@ -213,6 +219,8 @@ class MachineBasicBlock
/// basic block sections and basic block labels.
std::optional<UniqueBBID> BBID;
+ SmallVector<unsigned> PrefetchTargets;
+
/// With basic block sections, this stores the Section ID of the basic block.
MBBSectionID SectionID{0};
@@ -229,6 +237,8 @@ class MachineBasicBlock
/// is only computed once and is cached.
mutable MCSymbol *CachedMCSymbol = nullptr;
+ mutable SmallVector<MCSymbol *, 4> CallInstSymbols;
+
/// Cached MCSymbol for this block (used if IsEHContTarget).
mutable MCSymbol *CachedEHContMCSymbol = nullptr;
@@ -710,6 +720,14 @@ class MachineBasicBlock
std::optional<UniqueBBID> getBBID() const { return BBID; }
+ const SmallVector<unsigned> &getPrefetchTargets() const {
+ return PrefetchTargets;
+ }
+
+ void setPrefetchTargets(const SmallVector<unsigned> &V) {
+ PrefetchTargets = V;
+ }
+
/// Returns the section ID of this basic block.
MBBSectionID getSectionID() const { return SectionID; }
@@ -1275,6 +1293,12 @@ class MachineBasicBlock
/// Return the MCSymbol for this basic block.
LLVM_ABI MCSymbol *getSymbol() const;
+ MCSymbol *getCallInstSymbol(unsigned CallInstNumber) const;
+
+ const SmallVector<MCSymbol *, 4>& getCallInstSymbols() const {
+ return CallInstSymbols;
+ }
+
/// Return the Windows EH Continuation Symbol for this basic block.
LLVM_ABI MCSymbol *getEHContSymbol() const;
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 3aa245b7f3f1e..a204bba5789a8 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -18,6 +18,7 @@
#include "WasmException.h"
#include "WinCFGuard.h"
#include "WinException.h"
+#include "llvm/Support/SMLoc.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/BitmaskEnum.h"
@@ -178,6 +179,11 @@ static cl::opt<bool> EmitJumpTableSizesSection(
cl::desc("Emit a section containing jump table addresses and sizes"),
cl::Hidden, cl::init(false));
+static cl::opt<bool> InsertNoopsForPrefetch(
+ "insert-noops-for-prefetch",
+ cl::desc("Whether to insert noops instead of prefetches."), cl::init(false),
+ cl::Hidden);
+
// This isn't turned on by default, since several of the scheduling models are
// not completely accurate, and we don't want to be misleading.
static cl::opt<bool> PrintLatency(
@@ -1982,10 +1988,34 @@ void AsmPrinter::emitFunctionBody() {
FunctionCallGraphInfo FuncCGInfo;
const auto &CallSitesInfoMap = MF->getCallSitesInfo();
for (auto &MBB : *MF) {
+ int NextPrefetchTargetIndex = MBB.getPrefetchTargets().empty() ? -1 : 0;
// Print a label for the basic block.
emitBasicBlockStart(MBB);
DenseMap<StringRef, unsigned> MnemonicCounts;
+ unsigned NumCallsInBlock = 0;
for (auto &MI : MBB) {
+ if (NextPrefetchTargetIndex != -1 &&
+ NumCallsInBlock >= MBB.getPrefetchTargets()[NextPrefetchTargetIndex]) {
+
+ MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
+ Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) +
+ Twine("_") +
+ utostr(MBB.getPrefetchTargets()[NextPrefetchTargetIndex]));
+ if (MF->getFunction().isWeakForLinker()) {
+ OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Weak);
+ errs() << "Emitting weak symbol: " << PrefetchTargetSymbol->getName() << "\n";
+ } else {
+ OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Global);
+ errs() << "Emitting global symbol: " << PrefetchTargetSymbol->getName() << "\n";
+ }
+ // OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Extern);
+ // errs() << "Emitting symbol: " << PrefetchTargetSymbol->getName() << "\n";
+ OutStreamer->emitLabel(PrefetchTargetSymbol);
+ ++NextPrefetchTargetIndex;
+ if (NextPrefetchTargetIndex >=
+ static_cast<int>(MBB.getPrefetchTargets().size()))
+ NextPrefetchTargetIndex = -1;
+ }
// Print the assembly for the instruction.
if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() &&
!MI.isDebugInstr()) {
@@ -2099,7 +2129,7 @@ void AsmPrinter::emitFunctionBody() {
break;
}
default:
- emitInstruction(&MI);
+ emitInstruction(&MI);
auto CountInstruction = [&](const MachineInstr &MI) {
// Skip Meta instructions inside bundles.
@@ -2136,6 +2166,24 @@ void AsmPrinter::emitFunctionBody() {
for (auto &Handler : Handlers)
Handler->endInstruction();
}
+ while (NextPrefetchTargetIndex != -1) {
+ MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
+ Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) +
+ Twine("_") +
+ utostr(MBB.getPrefetchTargets()[NextPrefetchTargetIndex]));
+ if (MF->getFunction().hasWeakLinkage()) {
+ OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_WeakDefinition);
+ } else {
+ OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Global);
+ }
+ OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Extern);
+ OutStreamer->emitLabel(PrefetchTargetSymbol);
+ ++NextPrefetchTargetIndex;
+ if (NextPrefetchTargetIndex >=
+ static_cast<int>(MBB.getPrefetchTargets().size()))
+ NextPrefetchTargetIndex = -1;
+ }
+
// We must emit temporary symbol for the end of this basic block, if either
// we have BBLabels enabled or if this basic blocks marks the end of a
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index c234c0f1b0b34..de146e172c174 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -93,6 +93,19 @@ uint64_t BasicBlockSectionsProfileReader::getEdgeCount(
return EdgeIt->second;
}
+SmallVector<PrefetchHint>
+BasicBlockSectionsProfileReader::getPrefetchHintsForFunction(
+ StringRef FuncName) const {
+ return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).PrefetchHints;
+}
+
+DenseSet<BBPosition>
+BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction(
+ StringRef FuncName) const {
+ return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName))
+ .PrefetchTargets;
+}
+
// Reads the version 1 basic block sections profile. Profile for each function
// is encoded as follows:
// m <module_name>
@@ -308,6 +321,25 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
}
continue;
}
+ case 't': { // Prefetch target specifier.
+ // Skip the profile when we the profile iterator (FI) refers to the
+ // past-the-end element.
+ if (FI == ProgramPathAndClusterInfo.end())
+ continue;
+ assert(Values.size() == 1);
+ SmallVector<StringRef, 2> PrefetchTargetStr;
+ Values[0].split(PrefetchTargetStr, '@');
+ assert(PrefetchTargetStr.size() == 2);
+ auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]);
+ if (!TargetBBID)
+ return TargetBBID.takeError();
+ unsigned long long TargetBBOffset;
+ if (getAsUnsignedInteger(PrefetchTargetStr[1], 10, TargetBBOffset))
+ return createProfileParseError(Twine("unsigned integer expected: '") +
+ PrefetchTargetStr[1]);
+ FI->second.PrefetchTargets.insert(BBPosition{*TargetBBID, static_cast<unsigned>(TargetBBOffset)});
+ continue;
+ }
default:
return createProfileParseError(Twine("invalid specifier: '") +
Twine(Specifier) + "'");
@@ -514,6 +546,18 @@ uint64_t BasicBlockSectionsProfileReaderWrapperPass::getEdgeCount(
return BBSPR.getEdgeCount(FuncName, SrcBBID, SinkBBID);
}
+SmallVector<PrefetchHint>
+BasicBlockSectionsProfileReaderWrapperPass::getPrefetchHintsForFunction(
+ StringRef FuncName) const {
+ return BBSPR.getPrefetchHintsForFunction(FuncName);
+}
+
+DenseSet<BBPosition>
+BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction(
+ StringRef FuncName) const {
+ return BBSPR.getPrefetchTargetsForFunction(FuncName);
+}
+
BasicBlockSectionsProfileReader &
BasicBlockSectionsProfileReaderWrapperPass::getBBSPR() {
return BBSPR;
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index ba0b025167307..19b218a2879dd 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -90,6 +90,19 @@ MCSymbol *MachineBasicBlock::getSymbol() const {
return CachedMCSymbol;
}
+MCSymbol *MachineBasicBlock::getCallInstSymbol(unsigned CallInstNumber) const {
+ if (CallInstSymbols.size() <= CallInstNumber) {
+ const MachineFunction *MF = getParent();
+ MCContext &Ctx = MF->getContext();
+ CallInstSymbols.resize(CallInstNumber + 1);
+ CallInstSymbols[CallInstNumber] = Ctx.createBlockSymbol(
+ "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber()) + "_" +
+ Twine(CallInstNumber),
+ /*AlwaysEmit=*/true);
+ }
+ return CallInstSymbols[CallInstNumber];
+}
+
MCSymbol *MachineBasicBlock::getEHContSymbol() const {
if (!CachedEHContMCSymbol) {
const MachineFunction *MF = getParent();
>From b25adef3703b8bb3813609f8282ebf8a53b6686d Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Tue, 11 Nov 2025 21:30:47 +0000
Subject: [PATCH 02/24] feat: Add prefetch-profile.txt for testing
---
build-release/prefetch-profile.txt | 3 +++
1 file changed, 3 insertions(+)
create mode 100644 build-release/prefetch-profile.txt
diff --git a/build-release/prefetch-profile.txt b/build-release/prefetch-profile.txt
new file mode 100644
index 0000000000000..294f57a46920b
--- /dev/null
+++ b/build-release/prefetch-profile.txt
@@ -0,0 +1,3 @@
+v1
+f f
+t 0 at 1
>From bbfb7ba90a5192bb78549a5cd202368872dfd09a Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 12 Nov 2025 18:58:18 +0000
Subject: [PATCH 03/24] Everything else.
---
build-release/prefetch-profile.txt | 3 --
.../CodeGen/BasicBlockSectionsProfileReader.h | 29 +++----------------
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 12 +++++++-
.../BasicBlockSectionsProfileReader.cpp | 10 +++----
4 files changed, 20 insertions(+), 34 deletions(-)
delete mode 100644 build-release/prefetch-profile.txt
diff --git a/build-release/prefetch-profile.txt b/build-release/prefetch-profile.txt
deleted file mode 100644
index 294f57a46920b..0000000000000
--- a/build-release/prefetch-profile.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-v1
-f f
-t 0 at 1
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 5b230db30aec4..fbf9b89754cd7 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -44,7 +44,7 @@ struct BBClusterInfo {
struct BBPosition {
UniqueBBID BBID;
- unsigned BBOffset;
+ unsigned CallsiteIndex;
};
struct PrefetchHint {
@@ -62,7 +62,7 @@ struct FunctionPathAndClusterInfo {
// determines the `UniqueBBID::CloneID` of the cloned blocks in that path.
SmallVector<SmallVector<unsigned>> ClonePaths;
SmallVector<PrefetchHint> PrefetchHints;
- DenseSet<BBPosition> PrefetchTargets;
+ SmallVector<BBPosition> PrefetchTargets;
// Node counts for each basic block.
DenseMap<UniqueBBID, uint64_t> NodeCounts;
// Edge counts for each edge.
@@ -73,27 +73,6 @@ struct FunctionPathAndClusterInfo {
DenseMap<unsigned, uint64_t> BBHashes;
};
-// Provides DenseMapInfo BBPosition.
-template <> struct DenseMapInfo<BBPosition> {
- static inline BBPosition getEmptyKey() {
- return {DenseMapInfo<UniqueBBID>::getEmptyKey(),
- DenseMapInfo<unsigned>::getEmptyKey()};
- }
- static inline BBPosition getTombstoneKey() {
- return BBPosition{DenseMapInfo<UniqueBBID>::getTombstoneKey(),
- DenseMapInfo<unsigned>::getTombstoneKey()};
- }
- static unsigned getHashValue(const BBPosition &Val) {
- std::pair<unsigned, unsigned> PairVal = std::make_pair(
- DenseMapInfo<UniqueBBID>::getHashValue(Val.BBID), Val.BBOffset);
- return DenseMapInfo<std::pair<unsigned, unsigned>>::getHashValue(PairVal);
- }
- static bool isEqual(const BBPosition &LHS, const BBPosition &RHS) {
- return DenseMapInfo<UniqueBBID>::isEqual(LHS.BBID, RHS.BBID) &&
- DenseMapInfo<unsigned>::isEqual(LHS.BBOffset, RHS.BBOffset);
- }
-};
-
class BasicBlockSectionsProfileReader {
public:
friend class BasicBlockSectionsProfileReaderWrapperPass;
@@ -123,7 +102,7 @@ class BasicBlockSectionsProfileReader {
SmallVector<PrefetchHint>
getPrefetchHintsForFunction(StringRef FuncName) const;
- DenseSet<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
+ DenseMap<UniqueBBID, SmallVector<unsigned>> getPrefetchTargetsForFunction(StringRef FuncName) const;
private:
StringRef getAliasName(StringRef FuncName) const {
@@ -236,7 +215,7 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
SmallVector<PrefetchHint>
getPrefetchHintsForFunction(StringRef FuncName) const;
- DenseSet<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
+ DenseMap<UniqueBBID, SmallVector<unsigned>> getPrefetchTargetsForFunction(StringRef FuncName) const;
// Initializes the FunctionNameToDIFilename map for the current module and
// then reads the profile for the matching functions.
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index a204bba5789a8..90445fedd5db3 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -485,6 +485,7 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
if (EmitBBHash)
AU.addRequired<MachineBlockHashInfo>();
+ AU.addUsedIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
}
bool AsmPrinter::doInitialization(Module &M) {
@@ -1987,7 +1988,16 @@ void AsmPrinter::emitFunctionBody() {
FunctionCallGraphInfo FuncCGInfo;
const auto &CallSitesInfoMap = MF->getCallSitesInfo();
- for (auto &MBB : *MF) {
+ DenseMap<UniqueBBID, SmallVector<unsigned>> FunctionPrefetchTargets;
+ if (auto *BBSPRPass =
+ getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>()) {
+ FunctionPrefetchTargets = BBSPRPass->getBBSPR().getPrefetchTargetsForFunction(MF->getName());
+}
+
+ for (auto &MBB : *MF) {
+
+ SmallVector<unsigned> BBPrefetchTargets;
+ = FunctionPrefetchTargets.lookup(MBB.g);
int NextPrefetchTargetIndex = MBB.getPrefetchTargets().empty() ? -1 : 0;
// Print a label for the basic block.
emitBasicBlockStart(MBB);
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index de146e172c174..c4784a6039c09 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -99,7 +99,7 @@ BasicBlockSectionsProfileReader::getPrefetchHintsForFunction(
return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).PrefetchHints;
}
-DenseSet<BBPosition>
+SmallVector<BBPosition>
BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction(
StringRef FuncName) const {
return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName))
@@ -333,11 +333,11 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]);
if (!TargetBBID)
return TargetBBID.takeError();
- unsigned long long TargetBBOffset;
- if (getAsUnsignedInteger(PrefetchTargetStr[1], 10, TargetBBOffset))
+ unsigned long long TargetCallsiteIndex;
+ if (getAsUnsignedInteger(PrefetchTargetStr[1], 10, TargetCallsiteIndex))
return createProfileParseError(Twine("unsigned integer expected: '") +
PrefetchTargetStr[1]);
- FI->second.PrefetchTargets.insert(BBPosition{*TargetBBID, static_cast<unsigned>(TargetBBOffset)});
+ FI->second.PrefetchTargets.push_back(BBPosition{*TargetBBID, static_cast<unsigned>(TargetCallsiteIndex)});
continue;
}
default:
@@ -552,7 +552,7 @@ BasicBlockSectionsProfileReaderWrapperPass::getPrefetchHintsForFunction(
return BBSPR.getPrefetchHintsForFunction(FuncName);
}
-DenseSet<BBPosition>
+SmallVector<BBPosition>
BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction(
StringRef FuncName) const {
return BBSPR.getPrefetchTargetsForFunction(FuncName);
>From 3e6b04f94548b6a61219eae6b32ba5a46ac1461b Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 12 Nov 2025 18:58:27 +0000
Subject: [PATCH 04/24] Add test.
---
llvm/test/CodeGen/X86/prefetch-symbols.ll | 42 +++++++++++++++++++++++
1 file changed, 42 insertions(+)
create mode 100644 llvm/test/CodeGen/X86/prefetch-symbols.ll
diff --git a/llvm/test/CodeGen/X86/prefetch-symbols.ll b/llvm/test/CodeGen/X86/prefetch-symbols.ll
new file mode 100644
index 0000000000000..979db7942ff2c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/prefetch-symbols.ll
@@ -0,0 +1,42 @@
+;; Check that specifying the function in the basic block sections profile
+;; without any other directives is a noop.
+;;
+;; Specify the bb sections profile:
+; RUN: echo 'v1' > %t
+; RUN: echo 'f _Z3foob' >> %t
+; RUN: echo 't 0 at 0' >> %t
+;;
+; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t | FileCheck
+
+define i32 @_Z3foob(i1 zeroext %0) nounwind {
+ %2 = alloca i32, align 4
+ %3 = alloca i8, align 1
+ %4 = zext i1 %0 to i8
+ store i8 %4, ptr %3, align 1
+ %5 = load i8, ptr %3, align 1
+ %6 = trunc i8 %5 to i1
+ %7 = zext i1 %6 to i32
+ %8 = icmp sgt i32 %7, 0
+ br i1 %8, label %9, label %11
+
+9: ; preds = %1
+ %10 = call i32 @_Z3barv()
+ store i32 %10, ptr %2, align 4
+ br label %13
+
+11: ; preds = %1
+ %12 = call i32 @_Z3bazv()
+ store i32 %12, ptr %2, align 4
+ br label %13
+
+13: ; preds = %11, %9
+ %14 = load i32, ptr %2, align 4
+ ret i32 %14
+}
+
+declare i32 @_Z3barv() #1
+declare i32 @_Z3bazv() #1
+
+
+; CHECK: _Z3foob
+; CHECK: llvm_prefetch_target
>From 996736018c889a680e2ff2d262d4496f398c9d1a Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 00:10:17 +0000
Subject: [PATCH 05/24] Fix everything
---
.../CodeGen/BasicBlockSectionsProfileReader.h | 4 +-
llvm/include/llvm/CodeGen/MachineBasicBlock.h | 24 +----
llvm/include/llvm/CodeGen/Passes.h | 2 +
llvm/include/llvm/InitializePasses.h | 1 +
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 73 ++++----------
llvm/lib/CodeGen/CMakeLists.txt | 1 +
llvm/lib/CodeGen/InsertCodePrefetch.cpp | 96 +++++++++++++++++++
llvm/lib/CodeGen/MachineBasicBlock.cpp | 13 ---
llvm/lib/CodeGen/TargetPassConfig.cpp | 1 +
9 files changed, 127 insertions(+), 88 deletions(-)
create mode 100644 llvm/lib/CodeGen/InsertCodePrefetch.cpp
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index fbf9b89754cd7..1fd904d64ab9d 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -102,7 +102,7 @@ class BasicBlockSectionsProfileReader {
SmallVector<PrefetchHint>
getPrefetchHintsForFunction(StringRef FuncName) const;
- DenseMap<UniqueBBID, SmallVector<unsigned>> getPrefetchTargetsForFunction(StringRef FuncName) const;
+ SmallVector<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
private:
StringRef getAliasName(StringRef FuncName) const {
@@ -215,7 +215,7 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
SmallVector<PrefetchHint>
getPrefetchHintsForFunction(StringRef FuncName) const;
- DenseMap<UniqueBBID, SmallVector<unsigned>> getPrefetchTargetsForFunction(StringRef FuncName) const;
+ SmallVector<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
// Initializes the FunctionNameToDIFilename map for the current module and
// then reads the profile for the matching functions.
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index e6c6bc26ae9e6..4be008bbf4bf1 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -100,12 +100,6 @@ template <> struct DenseMapInfo<MBBSectionID> {
}
};
-struct PrefetchTarget {
- StringRef TargetFunction;
- UniqueBBID TargetBBID;
- unsigned TargetBBOffset;
-};
-
template <> struct ilist_traits<MachineInstr> {
private:
friend class MachineBasicBlock; // Set by the owning MachineBasicBlock.
@@ -219,8 +213,6 @@ class MachineBasicBlock
/// basic block sections and basic block labels.
std::optional<UniqueBBID> BBID;
- SmallVector<unsigned> PrefetchTargets;
-
/// With basic block sections, this stores the Section ID of the basic block.
MBBSectionID SectionID{0};
@@ -237,7 +229,7 @@ class MachineBasicBlock
/// is only computed once and is cached.
mutable MCSymbol *CachedMCSymbol = nullptr;
- mutable SmallVector<MCSymbol *, 4> CallInstSymbols;
+ SmallVector<unsigned> PrefetchTargetIndexes;
/// Cached MCSymbol for this block (used if IsEHContTarget).
mutable MCSymbol *CachedEHContMCSymbol = nullptr;
@@ -720,12 +712,12 @@ class MachineBasicBlock
std::optional<UniqueBBID> getBBID() const { return BBID; }
- const SmallVector<unsigned> &getPrefetchTargets() const {
- return PrefetchTargets;
+ const SmallVector<unsigned> &getPrefetchTargetIndexes() const {
+ return PrefetchTargetIndexes;
}
- void setPrefetchTargets(const SmallVector<unsigned> &V) {
- PrefetchTargets = V;
+ void setPrefetchTargetIndexes(const SmallVector<unsigned> &V) {
+ PrefetchTargetIndexes = V;
}
/// Returns the section ID of this basic block.
@@ -1293,12 +1285,6 @@ class MachineBasicBlock
/// Return the MCSymbol for this basic block.
LLVM_ABI MCSymbol *getSymbol() const;
- MCSymbol *getCallInstSymbol(unsigned CallInstNumber) const;
-
- const SmallVector<MCSymbol *, 4>& getCallInstSymbols() const {
- return CallInstSymbols;
- }
-
/// Return the Windows EH Continuation Symbol for this basic block.
LLVM_ABI MCSymbol *getEHContSymbol() const;
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index a8525554b142e..f148d050a5772 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -69,6 +69,8 @@ LLVM_ABI MachineFunctionPass *createBasicBlockSectionsPass();
LLVM_ABI MachineFunctionPass *createBasicBlockPathCloningPass();
+LLVM_ABI MachineFunctionPass *createInsertCodePrefetchPass();
+
/// createMachineBlockHashInfoPass - This pass computes basic block hashes.
LLVM_ABI MachineFunctionPass *createMachineBlockHashInfoPass();
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 10a4d8525a9e8..35d5ab14dc226 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -56,6 +56,7 @@ LLVM_ABI void initializeAssignmentTrackingAnalysisPass(PassRegistry &);
LLVM_ABI void initializeAssumptionCacheTrackerPass(PassRegistry &);
LLVM_ABI void initializeAtomicExpandLegacyPass(PassRegistry &);
LLVM_ABI void initializeBasicBlockPathCloningPass(PassRegistry &);
+LLVM_ABI void initializeInsertCodePrefetchPass(PassRegistry &);
LLVM_ABI void
initializeBasicBlockSectionsProfileReaderWrapperPassPass(PassRegistry &);
LLVM_ABI void initializeBasicBlockSectionsPass(PassRegistry &);
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 90445fedd5db3..933fe6f7d177f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -179,11 +179,6 @@ static cl::opt<bool> EmitJumpTableSizesSection(
cl::desc("Emit a section containing jump table addresses and sizes"),
cl::Hidden, cl::init(false));
-static cl::opt<bool> InsertNoopsForPrefetch(
- "insert-noops-for-prefetch",
- cl::desc("Whether to insert noops instead of prefetches."), cl::init(false),
- cl::Hidden);
-
// This isn't turned on by default, since several of the scheduling models are
// not completely accurate, and we don't want to be misleading.
static cl::opt<bool> PrintLatency(
@@ -485,7 +480,6 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
if (EmitBBHash)
AU.addRequired<MachineBlockHashInfo>();
- AU.addUsedIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
}
bool AsmPrinter::doInitialization(Module &M) {
@@ -1988,44 +1982,29 @@ void AsmPrinter::emitFunctionBody() {
FunctionCallGraphInfo FuncCGInfo;
const auto &CallSitesInfoMap = MF->getCallSitesInfo();
- DenseMap<UniqueBBID, SmallVector<unsigned>> FunctionPrefetchTargets;
- if (auto *BBSPRPass =
- getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>()) {
- FunctionPrefetchTargets = BBSPRPass->getBBSPR().getPrefetchTargetsForFunction(MF->getName());
-}
for (auto &MBB : *MF) {
-
- SmallVector<unsigned> BBPrefetchTargets;
- = FunctionPrefetchTargets.lookup(MBB.g);
- int NextPrefetchTargetIndex = MBB.getPrefetchTargets().empty() ? -1 : 0;
// Print a label for the basic block.
emitBasicBlockStart(MBB);
DenseMap<StringRef, unsigned> MnemonicCounts;
- unsigned NumCallsInBlock = 0;
- for (auto &MI : MBB) {
- if (NextPrefetchTargetIndex != -1 &&
- NumCallsInBlock >= MBB.getPrefetchTargets()[NextPrefetchTargetIndex]) {
- MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
+ SmallVector<unsigned> PrefetchTargets = MBB.getPrefetchTargetIndexes();
+ auto PrefetchTargetIt = PrefetchTargets.begin();
+ unsigned NumCalls = 0;
+ auto EmitPrefetchTargetSymbolIfNeeded = [&]() {
+ if (PrefetchTargetIt == PrefetchTargets.end() || NumCalls < *PrefetchTargetIt)
+ return;
+ MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) +
Twine("_") +
- utostr(MBB.getPrefetchTargets()[NextPrefetchTargetIndex]));
- if (MF->getFunction().isWeakForLinker()) {
- OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Weak);
- errs() << "Emitting weak symbol: " << PrefetchTargetSymbol->getName() << "\n";
- } else {
- OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Global);
- errs() << "Emitting global symbol: " << PrefetchTargetSymbol->getName() << "\n";
- }
- // OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Extern);
- // errs() << "Emitting symbol: " << PrefetchTargetSymbol->getName() << "\n";
+ utostr(*PrefetchTargetIt));
+ OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MF->getFunction().isWeakForLinker() ? MCSA_Weak : MCSA_Global);
OutStreamer->emitLabel(PrefetchTargetSymbol);
- ++NextPrefetchTargetIndex;
- if (NextPrefetchTargetIndex >=
- static_cast<int>(MBB.getPrefetchTargets().size()))
- NextPrefetchTargetIndex = -1;
- }
+ ++PrefetchTargetIt;
+ };
+
+ for (auto &MI : MBB) {
+ EmitPrefetchTargetSymbolIfNeeded();
// Print the assembly for the instruction.
if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() &&
!MI.isDebugInstr()) {
@@ -2163,8 +2142,11 @@ void AsmPrinter::emitFunctionBody() {
break;
}
- if (MI.isCall() && MF->getTarget().Options.BBAddrMap)
+ if (MI.isCall()) {
+ if (MF->getTarget().Options.BBAddrMap)
OutStreamer->emitLabel(createCallsiteEndSymbol(MBB));
+ ++NumCalls;
+ }
if (TM.Options.EmitCallGraphSection && MI.isCall())
handleCallsiteForCallgraph(FuncCGInfo, CallSitesInfoMap, MI);
@@ -2176,24 +2158,7 @@ void AsmPrinter::emitFunctionBody() {
for (auto &Handler : Handlers)
Handler->endInstruction();
}
- while (NextPrefetchTargetIndex != -1) {
- MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
- Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) +
- Twine("_") +
- utostr(MBB.getPrefetchTargets()[NextPrefetchTargetIndex]));
- if (MF->getFunction().hasWeakLinkage()) {
- OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_WeakDefinition);
- } else {
- OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Global);
- }
- OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Extern);
- OutStreamer->emitLabel(PrefetchTargetSymbol);
- ++NextPrefetchTargetIndex;
- if (NextPrefetchTargetIndex >=
- static_cast<int>(MBB.getPrefetchTargets().size()))
- NextPrefetchTargetIndex = -1;
- }
-
+ EmitPrefetchTargetSymbolIfNeeded();
// We must emit temporary symbol for the end of this basic block, if either
// we have BBLabels enabled or if this basic blocks marks the end of a
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index 1cf0b4964760b..fcf28247179ca 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -79,6 +79,7 @@ add_llvm_component_library(LLVMCodeGen
IndirectBrExpandPass.cpp
InitUndef.cpp
InlineSpiller.cpp
+ InsertCodePrefetch.cpp
InterferenceCache.cpp
InterleavedAccessPass.cpp
InterleavedLoadCombinePass.cpp
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
new file mode 100644
index 0000000000000..7cb52302ac7db
--- /dev/null
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -0,0 +1,96 @@
+//===-- InsertCodePrefetch.cpp ---=========-----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Prefetch insertion pass implementation.
+//===----------------------------------------------------------------------===//
+/// Prefetch insertion pass.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/BasicBlockSectionUtils.h"
+#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+#define DEBUG_TYPE "prefetchinsertion"
+
+namespace {
+class InsertCodePrefetch : public MachineFunctionPass {
+public:
+ static char ID;
+
+ InsertCodePrefetch() : MachineFunctionPass(ID) {
+ initializeInsertCodePrefetchPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override { return "X86 Cide Prefetch Inserter Pass"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ /// Identify basic blocks that need separate sections and prepare to emit them
+ /// accordingly.
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Implementation
+//===----------------------------------------------------------------------===//
+
+char InsertCodePrefetch::ID = 0;
+INITIALIZE_PASS_BEGIN(
+ InsertCodePrefetch, DEBUG_TYPE,
+ "Reads prefetch", true,
+ false)
+INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass)
+INITIALIZE_PASS_END(
+ InsertCodePrefetch, DEBUG_TYPE,
+ "Reads prefetch", true,
+ false)
+
+bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
+ assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List &&
+ "BB Sections list not enabled!");
+ if (hasInstrProfHashMismatch(MF))
+ return false;
+ SmallVector<BBPosition> PrefetchTargets =
+ getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
+ .getPrefetchTargetsForFunction(MF.getName());
+ DenseMap<UniqueBBID, SmallVector<unsigned>> PrefetchTargetsByBBID;
+ for (const auto &Target: PrefetchTargets)
+ PrefetchTargetsByBBID[Target.BBID].push_back(Target.CallsiteIndex);
+ for (auto &MBB: MF) {
+ auto R = PrefetchTargetsByBBID.find(*MBB.getBBID());
+ if (R == PrefetchTargetsByBBID.end()) continue;
+ MBB.setPrefetchTargetIndexes(R->second);
+ }
+
+ return false;
+}
+
+void InsertCodePrefetch::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<BasicBlockSectionsProfileReaderWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachineFunctionPass *llvm::createInsertCodePrefetchPass() {
+ return new InsertCodePrefetch();
+}
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 19b218a2879dd..ba0b025167307 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -90,19 +90,6 @@ MCSymbol *MachineBasicBlock::getSymbol() const {
return CachedMCSymbol;
}
-MCSymbol *MachineBasicBlock::getCallInstSymbol(unsigned CallInstNumber) const {
- if (CallInstSymbols.size() <= CallInstNumber) {
- const MachineFunction *MF = getParent();
- MCContext &Ctx = MF->getContext();
- CallInstSymbols.resize(CallInstNumber + 1);
- CallInstSymbols[CallInstNumber] = Ctx.createBlockSymbol(
- "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber()) + "_" +
- Twine(CallInstNumber),
- /*AlwaysEmit=*/true);
- }
- return CallInstSymbols[CallInstNumber];
-}
-
MCSymbol *MachineBasicBlock::getEHContSymbol() const {
if (!CachedEHContMCSymbol) {
const MachineFunction *MF = getParent();
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index ceae0d29eea90..5334c5596d018 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -1291,6 +1291,7 @@ void TargetPassConfig::addMachinePasses() {
addPass(llvm::createBasicBlockSectionsProfileReaderWrapperPass(
TM->getBBSectionsFuncListBuf()));
addPass(llvm::createBasicBlockPathCloningPass());
+ addPass(llvm::createInsertCodePrefetchPass());
}
addPass(llvm::createBasicBlockSectionsPass());
}
>From a08b65a6c93963ad84ae98820973fae245637ea2 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 00:10:36 +0000
Subject: [PATCH 06/24] clang-format.
---
.../CodeGen/BasicBlockSectionsProfileReader.h | 6 +++--
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 25 +++++++++++--------
.../BasicBlockSectionsProfileReader.cpp | 3 ++-
llvm/lib/CodeGen/InsertCodePrefetch.cpp | 23 ++++++++---------
4 files changed, 31 insertions(+), 26 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 1fd904d64ab9d..2b8ee578cd917 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -102,7 +102,8 @@ class BasicBlockSectionsProfileReader {
SmallVector<PrefetchHint>
getPrefetchHintsForFunction(StringRef FuncName) const;
- SmallVector<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
+ SmallVector<BBPosition>
+ getPrefetchTargetsForFunction(StringRef FuncName) const;
private:
StringRef getAliasName(StringRef FuncName) const {
@@ -215,7 +216,8 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
SmallVector<PrefetchHint>
getPrefetchHintsForFunction(StringRef FuncName) const;
- SmallVector<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
+ SmallVector<BBPosition>
+ getPrefetchTargetsForFunction(StringRef FuncName) const;
// Initializes the FunctionNameToDIFilename map for the current module and
// then reads the profile for the matching functions.
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 933fe6f7d177f..4368cd4d256c9 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -18,7 +18,6 @@
#include "WasmException.h"
#include "WinCFGuard.h"
#include "WinException.h"
-#include "llvm/Support/SMLoc.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/BitmaskEnum.h"
@@ -120,6 +119,7 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/SMLoc.h"
#include "llvm/Support/VCSRevision.h"
#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
@@ -1983,7 +1983,7 @@ void AsmPrinter::emitFunctionBody() {
FunctionCallGraphInfo FuncCGInfo;
const auto &CallSitesInfoMap = MF->getCallSitesInfo();
- for (auto &MBB : *MF) {
+ for (auto &MBB : *MF) {
// Print a label for the basic block.
emitBasicBlockStart(MBB);
DenseMap<StringRef, unsigned> MnemonicCounts;
@@ -1992,15 +1992,18 @@ void AsmPrinter::emitFunctionBody() {
auto PrefetchTargetIt = PrefetchTargets.begin();
unsigned NumCalls = 0;
auto EmitPrefetchTargetSymbolIfNeeded = [&]() {
- if (PrefetchTargetIt == PrefetchTargets.end() || NumCalls < *PrefetchTargetIt)
+ if (PrefetchTargetIt == PrefetchTargets.end() ||
+ NumCalls < *PrefetchTargetIt)
return;
MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
- Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) +
- Twine("_") +
- utostr(*PrefetchTargetIt));
- OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MF->getFunction().isWeakForLinker() ? MCSA_Weak : MCSA_Global);
- OutStreamer->emitLabel(PrefetchTargetSymbol);
- ++PrefetchTargetIt;
+ Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") +
+ utostr(MBB.getBBID()->BaseID) + Twine("_") +
+ utostr(*PrefetchTargetIt));
+ OutStreamer->emitSymbolAttribute(
+ PrefetchTargetSymbol,
+ MF->getFunction().isWeakForLinker() ? MCSA_Weak : MCSA_Global);
+ OutStreamer->emitLabel(PrefetchTargetSymbol);
+ ++PrefetchTargetIt;
};
for (auto &MI : MBB) {
@@ -2118,7 +2121,7 @@ void AsmPrinter::emitFunctionBody() {
break;
}
default:
- emitInstruction(&MI);
+ emitInstruction(&MI);
auto CountInstruction = [&](const MachineInstr &MI) {
// Skip Meta instructions inside bundles.
@@ -2144,7 +2147,7 @@ void AsmPrinter::emitFunctionBody() {
if (MI.isCall()) {
if (MF->getTarget().Options.BBAddrMap)
- OutStreamer->emitLabel(createCallsiteEndSymbol(MBB));
+ OutStreamer->emitLabel(createCallsiteEndSymbol(MBB));
++NumCalls;
}
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index c4784a6039c09..9b54dd6803cf6 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -337,7 +337,8 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
if (getAsUnsignedInteger(PrefetchTargetStr[1], 10, TargetCallsiteIndex))
return createProfileParseError(Twine("unsigned integer expected: '") +
PrefetchTargetStr[1]);
- FI->second.PrefetchTargets.push_back(BBPosition{*TargetBBID, static_cast<unsigned>(TargetCallsiteIndex)});
+ FI->second.PrefetchTargets.push_back(
+ BBPosition{*TargetBBID, static_cast<unsigned>(TargetCallsiteIndex)});
continue;
}
default:
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 7cb52302ac7db..91cb6e599215d 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -39,7 +39,9 @@ class InsertCodePrefetch : public MachineFunctionPass {
initializeInsertCodePrefetchPass(*PassRegistry::getPassRegistry());
}
- StringRef getPassName() const override { return "X86 Cide Prefetch Inserter Pass"; }
+ StringRef getPassName() const override {
+ return "X86 Cide Prefetch Inserter Pass";
+ }
void getAnalysisUsage(AnalysisUsage &AU) const override;
@@ -55,15 +57,11 @@ class InsertCodePrefetch : public MachineFunctionPass {
//===----------------------------------------------------------------------===//
char InsertCodePrefetch::ID = 0;
-INITIALIZE_PASS_BEGIN(
- InsertCodePrefetch, DEBUG_TYPE,
- "Reads prefetch", true,
- false)
+INITIALIZE_PASS_BEGIN(InsertCodePrefetch, DEBUG_TYPE, "Reads prefetch", true,
+ false)
INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass)
-INITIALIZE_PASS_END(
- InsertCodePrefetch, DEBUG_TYPE,
- "Reads prefetch", true,
- false)
+INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Reads prefetch", true,
+ false)
bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List &&
@@ -74,11 +72,12 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
.getPrefetchTargetsForFunction(MF.getName());
DenseMap<UniqueBBID, SmallVector<unsigned>> PrefetchTargetsByBBID;
- for (const auto &Target: PrefetchTargets)
+ for (const auto &Target : PrefetchTargets)
PrefetchTargetsByBBID[Target.BBID].push_back(Target.CallsiteIndex);
- for (auto &MBB: MF) {
+ for (auto &MBB : MF) {
auto R = PrefetchTargetsByBBID.find(*MBB.getBBID());
- if (R == PrefetchTargetsByBBID.end()) continue;
+ if (R == PrefetchTargetsByBBID.end())
+ continue;
MBB.setPrefetchTargetIndexes(R->second);
}
>From d988a3c374d6f3212ccb4081a9321279d54ad92b Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 03:46:13 +0000
Subject: [PATCH 07/24] Fix the prefetch test.
---
llvm/test/CodeGen/X86/prefetch-symbols.ll | 23 ++++++++++++++++-------
1 file changed, 16 insertions(+), 7 deletions(-)
diff --git a/llvm/test/CodeGen/X86/prefetch-symbols.ll b/llvm/test/CodeGen/X86/prefetch-symbols.ll
index 979db7942ff2c..3eb91dfdabd27 100644
--- a/llvm/test/CodeGen/X86/prefetch-symbols.ll
+++ b/llvm/test/CodeGen/X86/prefetch-symbols.ll
@@ -1,12 +1,14 @@
-;; Check that specifying the function in the basic block sections profile
-;; without any other directives is a noop.
+;; Check prefetch directives in basic block section profiles.
;;
;; Specify the bb sections profile:
; RUN: echo 'v1' > %t
; RUN: echo 'f _Z3foob' >> %t
; RUN: echo 't 0 at 0' >> %t
+; RUN: echo 't 1 at 0' >> %t
+; RUN: echo 't 1 at 1' >> %t
+; RUN: echo 't 2 at 1' >> %t
;;
-; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t | FileCheck
+; RUN: llc < %s -mtriple=x86_64-pc-linux -asm-verbose=false -function-sections -basic-block-sections=%t | FileCheck %s
define i32 @_Z3foob(i1 zeroext %0) nounwind {
%2 = alloca i32, align 4
@@ -18,16 +20,27 @@ define i32 @_Z3foob(i1 zeroext %0) nounwind {
%7 = zext i1 %6 to i32
%8 = icmp sgt i32 %7, 0
br i1 %8, label %9, label %11
+; CHECK: _Z3foob:
+; CHECK-NEXT: .globl __llvm_prefetch_target__Z3foob_0_0
+; CHECK-NEXT: __llvm_prefetch_target__Z3foob_0_0:
9: ; preds = %1
%10 = call i32 @_Z3barv()
store i32 %10, ptr %2, align 4
br label %13
+; CHECK: .globl __llvm_prefetch_target__Z3foob_1_0
+; CHECK-NEXT: __llvm_prefetch_target__Z3foob_1_0:
+; CHECK-NEXT: callq _Z3barv at PLT
+; CHECK-NEXT: .globl __llvm_prefetch_target__Z3foob_1_1
+; CHECK-NEXT: __llvm_prefetch_target__Z3foob_1_1:
11: ; preds = %1
%12 = call i32 @_Z3bazv()
store i32 %12, ptr %2, align 4
br label %13
+; CHECK: callq _Z3bazv at PLT
+; CHECK-NEXT: .globl __llvm_prefetch_target__Z3foob_2_1
+; CHECK-NEXT: __llvm_prefetch_target__Z3foob_2_1:
13: ; preds = %11, %9
%14 = load i32, ptr %2, align 4
@@ -36,7 +49,3 @@ define i32 @_Z3foob(i1 zeroext %0) nounwind {
declare i32 @_Z3barv() #1
declare i32 @_Z3bazv() #1
-
-
-; CHECK: _Z3foob
-; CHECK: llvm_prefetch_target
>From 40084459b2edf0ba4d46f2c958856c33bca19d54 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 03:48:53 +0000
Subject: [PATCH 08/24] Rename the test.
---
...{prefetch-symbols.ll => basic-block-sections-code-prefetch.ll} | 0
1 file changed, 0 insertions(+), 0 deletions(-)
rename llvm/test/CodeGen/X86/{prefetch-symbols.ll => basic-block-sections-code-prefetch.ll} (100%)
diff --git a/llvm/test/CodeGen/X86/prefetch-symbols.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
similarity index 100%
rename from llvm/test/CodeGen/X86/prefetch-symbols.ll
rename to llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
>From e3b501f198f0640b7c43d73c81df74185532098e Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 06:59:43 +0000
Subject: [PATCH 09/24] Remove unrelated changes.
---
.../llvm/CodeGen/BasicBlockSectionsProfileReader.h | 12 ------------
llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 12 ------------
2 files changed, 24 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 2b8ee578cd917..801588509d340 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -47,12 +47,6 @@ struct BBPosition {
unsigned CallsiteIndex;
};
-struct PrefetchHint {
- BBPosition SitePosition;
- StringRef TargetFunctionName;
- BBPosition TargetPosition;
-};
-
// This represents the raw input profile for one function.
struct FunctionPathAndClusterInfo {
// BB Cluster information specified by `UniqueBBID`s.
@@ -61,7 +55,6 @@ struct FunctionPathAndClusterInfo {
// the edge a -> b (a is not cloned). The index of the path in this vector
// determines the `UniqueBBID::CloneID` of the cloned blocks in that path.
SmallVector<SmallVector<unsigned>> ClonePaths;
- SmallVector<PrefetchHint> PrefetchHints;
SmallVector<BBPosition> PrefetchTargets;
// Node counts for each basic block.
DenseMap<UniqueBBID, uint64_t> NodeCounts;
@@ -99,9 +92,6 @@ class BasicBlockSectionsProfileReader {
uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
const UniqueBBID &SinkBBID) const;
- SmallVector<PrefetchHint>
- getPrefetchHintsForFunction(StringRef FuncName) const;
-
SmallVector<BBPosition>
getPrefetchTargetsForFunction(StringRef FuncName) const;
@@ -213,8 +203,6 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
const UniqueBBID &DestBBID) const;
- SmallVector<PrefetchHint>
- getPrefetchHintsForFunction(StringRef FuncName) const;
SmallVector<BBPosition>
getPrefetchTargetsForFunction(StringRef FuncName) const;
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 9b54dd6803cf6..5b12c85f7eeef 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -93,12 +93,6 @@ uint64_t BasicBlockSectionsProfileReader::getEdgeCount(
return EdgeIt->second;
}
-SmallVector<PrefetchHint>
-BasicBlockSectionsProfileReader::getPrefetchHintsForFunction(
- StringRef FuncName) const {
- return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).PrefetchHints;
-}
-
SmallVector<BBPosition>
BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction(
StringRef FuncName) const {
@@ -547,12 +541,6 @@ uint64_t BasicBlockSectionsProfileReaderWrapperPass::getEdgeCount(
return BBSPR.getEdgeCount(FuncName, SrcBBID, SinkBBID);
}
-SmallVector<PrefetchHint>
-BasicBlockSectionsProfileReaderWrapperPass::getPrefetchHintsForFunction(
- StringRef FuncName) const {
- return BBSPR.getPrefetchHintsForFunction(FuncName);
-}
-
SmallVector<BBPosition>
BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction(
StringRef FuncName) const {
>From 715f1b8504521ac749324db8d70c7a98d8ddb2b3 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 18:58:09 +0000
Subject: [PATCH 10/24] Add some comments.
---
.../CodeGen/BasicBlockSectionsProfileReader.h | 19 ++++++++++----
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 2 --
llvm/lib/CodeGen/InsertCodePrefetch.cpp | 26 +++++++++----------
3 files changed, 27 insertions(+), 20 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 801588509d340..784bf8dd8f2a9 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -42,9 +42,14 @@ struct BBClusterInfo {
unsigned PositionInCluster;
};
-struct BBPosition {
+// Assuming a block is split into subblocks across its callsites, this struct
+// uniquely identifies the subblock in block `BBID` which starts from right after call number `SubblockIndex` (or
+// the beginning of the block if `SubblockIndex` is zero) to the call number
+// `SubblockIndex+1` (or the end of the block if there are are `SubblockIndex`
+// calls in the basic block).
+struct SubblockID {
UniqueBBID BBID;
- unsigned CallsiteIndex;
+ unsigned SubblockIndex;
};
// This represents the raw input profile for one function.
@@ -55,7 +60,9 @@ struct FunctionPathAndClusterInfo {
// the edge a -> b (a is not cloned). The index of the path in this vector
// determines the `UniqueBBID::CloneID` of the cloned blocks in that path.
SmallVector<SmallVector<unsigned>> ClonePaths;
- SmallVector<BBPosition> PrefetchTargets;
+ // Code prefetch targets, specified by the subblock ID of which beginning must
+ // be targetted for prefetching.
+ SmallVector<SubblockID> PrefetchTargets;
// Node counts for each basic block.
DenseMap<UniqueBBID, uint64_t> NodeCounts;
// Edge counts for each edge.
@@ -92,7 +99,9 @@ class BasicBlockSectionsProfileReader {
uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
const UniqueBBID &SinkBBID) const;
- SmallVector<BBPosition>
+ // Returns the prefetch targets (identified by their containing subblocks) for
+ // function `FuncName`.
+ SmallVector<SubblockID>
getPrefetchTargetsForFunction(StringRef FuncName) const;
private:
@@ -204,7 +213,7 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
const UniqueBBID &DestBBID) const;
- SmallVector<BBPosition>
+ SmallVector<SubblockID>
getPrefetchTargetsForFunction(StringRef FuncName) const;
// Initializes the FunctionNameToDIFilename map for the current module and
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 4368cd4d256c9..2d6f6687fe456 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -119,7 +119,6 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
-#include "llvm/Support/SMLoc.h"
#include "llvm/Support/VCSRevision.h"
#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
@@ -1982,7 +1981,6 @@ void AsmPrinter::emitFunctionBody() {
FunctionCallGraphInfo FuncCGInfo;
const auto &CallSitesInfoMap = MF->getCallSitesInfo();
-
for (auto &MBB : *MF) {
// Print a label for the basic block.
emitBasicBlockStart(MBB);
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 91cb6e599215d..df3d63098390b 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -1,4 +1,4 @@
-//===-- InsertCodePrefetch.cpp ---=========-----------------------------===//
+//===-- InsertCodePrefetch.cpp ---=========--------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -7,9 +7,14 @@
//===----------------------------------------------------------------------===//
//
/// \file
-/// Prefetch insertion pass implementation.
+/// Code Prefetch Insertion Pass.
//===----------------------------------------------------------------------===//
-/// Prefetch insertion pass.
+/// This pass inserts code prefetch instructions according to the prefetch
+/// directives in the basic block section profile. The target of a prefetch can
+/// be the beginning of any dynamic basic block, that is the beginning of a
+/// machine basic block, or immediately after a callsite. A global symbol will
+/// be emitted at the position of the target so it can be addressed from the
+/// prefetch instruction.
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SmallVector.h"
@@ -20,15 +25,11 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/InitializePasses.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/Target/TargetMachine.h"
using namespace llvm;
-#define DEBUG_TYPE "prefetchinsertion"
+#define DEBUG_TYPE "insert-code-prefetch"
namespace {
class InsertCodePrefetch : public MachineFunctionPass {
@@ -40,13 +41,12 @@ class InsertCodePrefetch : public MachineFunctionPass {
}
StringRef getPassName() const override {
- return "X86 Cide Prefetch Inserter Pass";
+ return "Code Prefetch Inserter Pass";
}
void getAnalysisUsage(AnalysisUsage &AU) const override;
- /// Identify basic blocks that need separate sections and prepare to emit them
- /// accordingly.
+ // Sets prefetch targets based on the bb section profile.
bool runOnMachineFunction(MachineFunction &MF) override;
};
@@ -57,10 +57,10 @@ class InsertCodePrefetch : public MachineFunctionPass {
//===----------------------------------------------------------------------===//
char InsertCodePrefetch::ID = 0;
-INITIALIZE_PASS_BEGIN(InsertCodePrefetch, DEBUG_TYPE, "Reads prefetch", true,
+INITIALIZE_PASS_BEGIN(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion", true,
false)
INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass)
-INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Reads prefetch", true,
+INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion", true,
false)
bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
>From a1e1e00d73a6fc8c7038a10cf0577823a74a66d6 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 18:58:22 +0000
Subject: [PATCH 11/24] clang-format.
---
.../llvm/CodeGen/BasicBlockSectionsProfileReader.h | 8 ++++----
llvm/lib/CodeGen/InsertCodePrefetch.cpp | 8 ++++----
2 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 784bf8dd8f2a9..88f3e8b620bce 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -43,10 +43,10 @@ struct BBClusterInfo {
};
// Assuming a block is split into subblocks across its callsites, this struct
-// uniquely identifies the subblock in block `BBID` which starts from right after call number `SubblockIndex` (or
-// the beginning of the block if `SubblockIndex` is zero) to the call number
-// `SubblockIndex+1` (or the end of the block if there are are `SubblockIndex`
-// calls in the basic block).
+// uniquely identifies the subblock in block `BBID` which starts from right
+// after call number `SubblockIndex` (or the beginning of the block if
+// `SubblockIndex` is zero) to the call number `SubblockIndex+1` (or the end of
+// the block if there are are `SubblockIndex` calls in the basic block).
struct SubblockID {
UniqueBBID BBID;
unsigned SubblockIndex;
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index df3d63098390b..29afb46a317a8 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -57,11 +57,11 @@ class InsertCodePrefetch : public MachineFunctionPass {
//===----------------------------------------------------------------------===//
char InsertCodePrefetch::ID = 0;
-INITIALIZE_PASS_BEGIN(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion", true,
- false)
+INITIALIZE_PASS_BEGIN(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion",
+ true, false)
INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass)
-INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion", true,
- false)
+INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion",
+ true, false)
bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List &&
>From 717e6fee27b902a14f11d1acb373188a8e17445e Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 19:16:54 +0000
Subject: [PATCH 12/24] Add comments and rename functions.
---
.../CodeGen/BasicBlockSectionsProfileReader.h | 2 +-
llvm/include/llvm/CodeGen/MachineBasicBlock.h | 15 ++++++++++-----
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 13 +++++++++----
llvm/lib/CodeGen/InsertCodePrefetch.cpp | 11 ++++++-----
4 files changed, 26 insertions(+), 15 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 88f3e8b620bce..c2bc7559b9fb4 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -46,7 +46,7 @@ struct BBClusterInfo {
// uniquely identifies the subblock in block `BBID` which starts from right
// after call number `SubblockIndex` (or the beginning of the block if
// `SubblockIndex` is zero) to the call number `SubblockIndex+1` (or the end of
-// the block if there are are `SubblockIndex` calls in the basic block).
+// the block if `SubblockIndex` is the last call in the block).
struct SubblockID {
UniqueBBID BBID;
unsigned SubblockIndex;
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 4be008bbf4bf1..20427954d22e4 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -229,7 +229,12 @@ class MachineBasicBlock
/// is only computed once and is cached.
mutable MCSymbol *CachedMCSymbol = nullptr;
- SmallVector<unsigned> PrefetchTargetIndexes;
+ /// Contains the subblock indices in this block that are targets of code prefetching.
+ /// The subblock indexed `i` specifies that region after the `i`th call (or the
+ /// beginning of the block if `i==0`) to before the`i+1`th callsite (or the
+ /// end of the block). The prefetch target is always the beginning of the
+ /// subblock.
+ SmallVector<unsigned> PrefetchTargetSubblockIndexes;
/// Cached MCSymbol for this block (used if IsEHContTarget).
mutable MCSymbol *CachedEHContMCSymbol = nullptr;
@@ -712,12 +717,12 @@ class MachineBasicBlock
std::optional<UniqueBBID> getBBID() const { return BBID; }
- const SmallVector<unsigned> &getPrefetchTargetIndexes() const {
- return PrefetchTargetIndexes;
+ const SmallVector<unsigned> &getPrefetchTargetSubblockIndexes() const {
+ return PrefetchTargetSubblockIndexes;
}
- void setPrefetchTargetIndexes(const SmallVector<unsigned> &V) {
- PrefetchTargetIndexes = V;
+ void setPrefetchTargetSubblockIndexes(const SmallVector<unsigned> &V) {
+ PrefetchTargetSubblockIndexes = V;
}
/// Returns the section ID of this basic block.
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 2d6f6687fe456..72cf557d51e03 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1986,17 +1986,20 @@ void AsmPrinter::emitFunctionBody() {
emitBasicBlockStart(MBB);
DenseMap<StringRef, unsigned> MnemonicCounts;
- SmallVector<unsigned> PrefetchTargets = MBB.getPrefetchTargetIndexes();
+ SmallVector<unsigned> PrefetchTargets = MBB.getPrefetchTargetSubblockIndexes();
auto PrefetchTargetIt = PrefetchTargets.begin();
unsigned NumCalls = 0;
+ // Helper to emit a symbol for the prefetch target and proceed to the next
+ // one.
auto EmitPrefetchTargetSymbolIfNeeded = [&]() {
- if (PrefetchTargetIt == PrefetchTargets.end() ||
- NumCalls < *PrefetchTargetIt)
- return;
+ if (PrefetchTargetIt == PrefetchTargets.end()) return;
+ if (NumCalls < *PrefetchTargetIt) return;
MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") +
utostr(MBB.getBBID()->BaseID) + Twine("_") +
utostr(*PrefetchTargetIt));
+ // If the function is weak-linkage it may be replaced by a strong version,
+ // in which case the prefetch targets should also be replaced.
OutStreamer->emitSymbolAttribute(
PrefetchTargetSymbol,
MF->getFunction().isWeakForLinker() ? MCSA_Weak : MCSA_Global);
@@ -2159,6 +2162,8 @@ void AsmPrinter::emitFunctionBody() {
for (auto &Handler : Handlers)
Handler->endInstruction();
}
+ // If the block ends with a call, we may need to emit a prefetch target
+ // at the end.
EmitPrefetchTargetSymbolIfNeeded();
// We must emit temporary symbol for the end of this basic block, if either
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 29afb46a317a8..e241ccbbee263 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -12,9 +12,9 @@
/// This pass inserts code prefetch instructions according to the prefetch
/// directives in the basic block section profile. The target of a prefetch can
/// be the beginning of any dynamic basic block, that is the beginning of a
-/// machine basic block, or immediately after a callsite. A global symbol will
-/// be emitted at the position of the target so it can be addressed from the
-/// prefetch instruction.
+/// machine basic block, or immediately after a callsite. A global symbol is
+/// emitted at the position of the target so it can be addressed from the
+/// prefetch instruction from any module.
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SmallVector.h"
@@ -68,6 +68,8 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
"BB Sections list not enabled!");
if (hasInstrProfHashMismatch(MF))
return false;
+ // Set each block's prefetch targets so AsmPrinter can emit a special symbol
+ // there.
SmallVector<BBPosition> PrefetchTargets =
getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
.getPrefetchTargetsForFunction(MF.getName());
@@ -78,9 +80,8 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
auto R = PrefetchTargetsByBBID.find(*MBB.getBBID());
if (R == PrefetchTargetsByBBID.end())
continue;
- MBB.setPrefetchTargetIndexes(R->second);
+ MBB.setPrefetchTargetSubblockIndexes(R->second);
}
-
return false;
}
>From 3605b0dddd7d66df9cd5b31d9d535f61fc8729fe Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 19:17:04 +0000
Subject: [PATCH 13/24] clang-format.
---
llvm/include/llvm/CodeGen/MachineBasicBlock.h | 10 +++++-----
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 9 ++++++---
2 files changed, 11 insertions(+), 8 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 20427954d22e4..a13fcb2bb841d 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -229,11 +229,11 @@ class MachineBasicBlock
/// is only computed once and is cached.
mutable MCSymbol *CachedMCSymbol = nullptr;
- /// Contains the subblock indices in this block that are targets of code prefetching.
- /// The subblock indexed `i` specifies that region after the `i`th call (or the
- /// beginning of the block if `i==0`) to before the`i+1`th callsite (or the
- /// end of the block). The prefetch target is always the beginning of the
- /// subblock.
+ /// Contains the subblock indices in this block that are targets of code
+ /// prefetching. The subblock indexed `i` specifies that region after the
+ /// `i`th call (or the beginning of the block if `i==0`) to before the`i+1`th
+ /// callsite (or the end of the block). The prefetch target is always the
+ /// beginning of the subblock.
SmallVector<unsigned> PrefetchTargetSubblockIndexes;
/// Cached MCSymbol for this block (used if IsEHContTarget).
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 72cf557d51e03..fb250c4b5308a 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1986,14 +1986,17 @@ void AsmPrinter::emitFunctionBody() {
emitBasicBlockStart(MBB);
DenseMap<StringRef, unsigned> MnemonicCounts;
- SmallVector<unsigned> PrefetchTargets = MBB.getPrefetchTargetSubblockIndexes();
+ SmallVector<unsigned> PrefetchTargets =
+ MBB.getPrefetchTargetSubblockIndexes();
auto PrefetchTargetIt = PrefetchTargets.begin();
unsigned NumCalls = 0;
// Helper to emit a symbol for the prefetch target and proceed to the next
// one.
auto EmitPrefetchTargetSymbolIfNeeded = [&]() {
- if (PrefetchTargetIt == PrefetchTargets.end()) return;
- if (NumCalls < *PrefetchTargetIt) return;
+ if (PrefetchTargetIt == PrefetchTargets.end())
+ return;
+ if (NumCalls < *PrefetchTargetIt)
+ return;
MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") +
utostr(MBB.getBBID()->BaseID) + Twine("_") +
>From 6408bd7070d47c10c92bae014e088e358f43ce99 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 19:34:26 +0000
Subject: [PATCH 14/24] Add optimization remarks for when prefetch targets
cannot be mapped.
---
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 8 ++++++++
llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 6 +++---
llvm/lib/CodeGen/InsertCodePrefetch.cpp | 4 ++--
3 files changed, 13 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index fb250c4b5308a..97234f3859ca7 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2168,6 +2168,14 @@ void AsmPrinter::emitFunctionBody() {
// If the block ends with a call, we may need to emit a prefetch target
// at the end.
EmitPrefetchTargetSymbolIfNeeded();
+ if (PrefetchTargetIt != PrefetchTargets.end()) {
+ MachineOptimizationRemarkMissed R(
+ "insert-code-prefetch", "MissingPrefetchTarget",
+ MF->getFunction().getSubprogram(), &MBB);
+ R << "failed to map "
+ << ore::NV("NumMissedTargets", PrefetchTargets.end() - PrefetchTargetIt)
+ << " prefetch targets";
+ }
// We must emit temporary symbol for the end of this basic block, if either
// we have BBLabels enabled or if this basic blocks marks the end of a
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 5b12c85f7eeef..9319854f53289 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -93,7 +93,7 @@ uint64_t BasicBlockSectionsProfileReader::getEdgeCount(
return EdgeIt->second;
}
-SmallVector<BBPosition>
+SmallVector<SubblockID>
BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction(
StringRef FuncName) const {
return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName))
@@ -332,7 +332,7 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
return createProfileParseError(Twine("unsigned integer expected: '") +
PrefetchTargetStr[1]);
FI->second.PrefetchTargets.push_back(
- BBPosition{*TargetBBID, static_cast<unsigned>(TargetCallsiteIndex)});
+ SubblockID{*TargetBBID, static_cast<unsigned>(TargetCallsiteIndex)});
continue;
}
default:
@@ -541,7 +541,7 @@ uint64_t BasicBlockSectionsProfileReaderWrapperPass::getEdgeCount(
return BBSPR.getEdgeCount(FuncName, SrcBBID, SinkBBID);
}
-SmallVector<BBPosition>
+SmallVector<SubblockID>
BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction(
StringRef FuncName) const {
return BBSPR.getPrefetchTargetsForFunction(FuncName);
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index e241ccbbee263..57037fd818479 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -70,12 +70,12 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
return false;
// Set each block's prefetch targets so AsmPrinter can emit a special symbol
// there.
- SmallVector<BBPosition> PrefetchTargets =
+ SmallVector<SubblockID> PrefetchTargets =
getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
.getPrefetchTargetsForFunction(MF.getName());
DenseMap<UniqueBBID, SmallVector<unsigned>> PrefetchTargetsByBBID;
for (const auto &Target : PrefetchTargets)
- PrefetchTargetsByBBID[Target.BBID].push_back(Target.CallsiteIndex);
+ PrefetchTargetsByBBID[Target.BBID].push_back(Target.SubblockIndex);
for (auto &MBB : MF) {
auto R = PrefetchTargetsByBBID.find(*MBB.getBBID());
if (R == PrefetchTargetsByBBID.end())
>From a06cb9d59cdc6292e2e3e4a3c6955b771f9f690a Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 22:01:34 +0000
Subject: [PATCH 15/24] Expand test to weak symbols.
---
.../X86/basic-block-sections-code-prefetch.ll | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
index 3eb91dfdabd27..35e25952aa2f8 100644
--- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
+++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
@@ -7,6 +7,9 @@
; RUN: echo 't 1 at 0' >> %t
; RUN: echo 't 1 at 1' >> %t
; RUN: echo 't 2 at 1' >> %t
+; RUN: echo 'f _Z3barv' >> %t
+; RUN: echo 't 0 at 0' >> %t
+; RUN: echo 't 21 at 1' >> %t
;;
; RUN: llc < %s -mtriple=x86_64-pc-linux -asm-verbose=false -function-sections -basic-block-sections=%t | FileCheck %s
@@ -47,5 +50,12 @@ define i32 @_Z3foob(i1 zeroext %0) nounwind {
ret i32 %14
}
-declare i32 @_Z3barv() #1
+define weak i32 @_Z3barv() nounwind {
+ %1 = call i32 @_Z3bazv()
+ ret i32 %1
+; CHECK: _Z3barv:
+; CHECK-NEXT: .weak __llvm_prefetch_target__Z3barv_0_0
+; CHECK-NEXT: __llvm_prefetch_target__Z3barv_0_0:
+}
+
declare i32 @_Z3bazv() #1
>From ceefc56d610004bebd5515a7bd88e6a6117aee48 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Mon, 17 Nov 2025 21:56:48 +0000
Subject: [PATCH 16/24] Change prefetch directive format to use , instead of @
---
llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 7 ++++---
.../X86/basic-block-sections-code-prefetch.ll | 12 ++++++------
2 files changed, 10 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 9319854f53289..3a37982387f59 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -320,10 +320,11 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
// past-the-end element.
if (FI == ProgramPathAndClusterInfo.end())
continue;
- assert(Values.size() == 1);
SmallVector<StringRef, 2> PrefetchTargetStr;
- Values[0].split(PrefetchTargetStr, '@');
- assert(PrefetchTargetStr.size() == 2);
+ Values[0].split(PrefetchTargetStr, ',');
+ if (PrefetchTargetStr.size() != 2)
+ return createProfileParseError(
+ Twine("Prefetch target target expected: ") + Value);
auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]);
if (!TargetBBID)
return TargetBBID.takeError();
diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
index 35e25952aa2f8..280bfef1c79b6 100644
--- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
+++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
@@ -3,13 +3,13 @@
;; Specify the bb sections profile:
; RUN: echo 'v1' > %t
; RUN: echo 'f _Z3foob' >> %t
-; RUN: echo 't 0 at 0' >> %t
-; RUN: echo 't 1 at 0' >> %t
-; RUN: echo 't 1 at 1' >> %t
-; RUN: echo 't 2 at 1' >> %t
+; RUN: echo 't 0,0' >> %t
+; RUN: echo 't 1,0' >> %t
+; RUN: echo 't 1,1' >> %t
+; RUN: echo 't 2,1' >> %t
; RUN: echo 'f _Z3barv' >> %t
-; RUN: echo 't 0 at 0' >> %t
-; RUN: echo 't 21 at 1' >> %t
+; RUN: echo 't 0,0' >> %t
+; RUN: echo 't 21,1' >> %t
;;
; RUN: llc < %s -mtriple=x86_64-pc-linux -asm-verbose=false -function-sections -basic-block-sections=%t | FileCheck %s
>From 639efd746fb00a06712ec7cb5afea30b233c1254 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Mon, 17 Nov 2025 22:05:27 +0000
Subject: [PATCH 17/24] Fix the error.
---
llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 3a37982387f59..05b6c1c3917e5 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -324,7 +324,7 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
Values[0].split(PrefetchTargetStr, ',');
if (PrefetchTargetStr.size() != 2)
return createProfileParseError(
- Twine("Prefetch target target expected: ") + Value);
+ Twine("Prefetch target target expected: ") + Values[0]);
auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]);
if (!TargetBBID)
return TargetBBID.takeError();
>From cc4e3333f3d27683817a12ce4b3987263fb82bb7 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 19 Nov 2025 06:42:17 +0000
Subject: [PATCH 18/24] Remove optimization remarks.
---
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 8 --------
1 file changed, 8 deletions(-)
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 97234f3859ca7..fb250c4b5308a 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2168,14 +2168,6 @@ void AsmPrinter::emitFunctionBody() {
// If the block ends with a call, we may need to emit a prefetch target
// at the end.
EmitPrefetchTargetSymbolIfNeeded();
- if (PrefetchTargetIt != PrefetchTargets.end()) {
- MachineOptimizationRemarkMissed R(
- "insert-code-prefetch", "MissingPrefetchTarget",
- MF->getFunction().getSubprogram(), &MBB);
- R << "failed to map "
- << ore::NV("NumMissedTargets", PrefetchTargets.end() - PrefetchTargetIt)
- << " prefetch targets";
- }
// We must emit temporary symbol for the end of this basic block, if either
// we have BBLabels enabled or if this basic blocks marks the end of a
>From 6d8bdb19d705e04cae2169a08500ab52a5bfe6cf Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 19 Nov 2025 20:38:00 +0000
Subject: [PATCH 19/24] Refine and polish.
---
.../CodeGen/BasicBlockSectionsProfileReader.h | 25 +++++------
llvm/include/llvm/CodeGen/MachineBasicBlock.h | 18 ++++----
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 40 ++++++++---------
.../BasicBlockSectionsProfileReader.cpp | 45 +++++++++++++++----
llvm/lib/CodeGen/InsertCodePrefetch.cpp | 8 ++--
.../X86/basic-block-sections-code-prefetch.ll | 5 +++
6 files changed, 84 insertions(+), 57 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index c2bc7559b9fb4..20e1b7ab68bbe 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -42,14 +42,11 @@ struct BBClusterInfo {
unsigned PositionInCluster;
};
-// Assuming a block is split into subblocks across its callsites, this struct
-// uniquely identifies the subblock in block `BBID` which starts from right
-// after call number `SubblockIndex` (or the beginning of the block if
-// `SubblockIndex` is zero) to the call number `SubblockIndex+1` (or the end of
-// the block if `SubblockIndex` is the last call in the block).
-struct SubblockID {
+// The prefetch symbol is emitted immediately after the call of the given index
+// in block `BBID` (or at the beginning of the block if CallsiteIndex is -1).
+struct CallsiteID {
UniqueBBID BBID;
- unsigned SubblockIndex;
+ int CallsiteIndex;
};
// This represents the raw input profile for one function.
@@ -60,9 +57,9 @@ struct FunctionPathAndClusterInfo {
// the edge a -> b (a is not cloned). The index of the path in this vector
// determines the `UniqueBBID::CloneID` of the cloned blocks in that path.
SmallVector<SmallVector<unsigned>> ClonePaths;
- // Code prefetch targets, specified by the subblock ID of which beginning must
- // be targetted for prefetching.
- SmallVector<SubblockID> PrefetchTargets;
+ // Code prefetch targets, specified by the callsite ID immediately after
+ // which beginning must be targetted for prefetching.
+ SmallVector<CallsiteID> PrefetchTargets;
// Node counts for each basic block.
DenseMap<UniqueBBID, uint64_t> NodeCounts;
// Edge counts for each edge.
@@ -99,9 +96,9 @@ class BasicBlockSectionsProfileReader {
uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
const UniqueBBID &SinkBBID) const;
- // Returns the prefetch targets (identified by their containing subblocks) for
- // function `FuncName`.
- SmallVector<SubblockID>
+ // Returns the prefetch targets (identified by their containing callsite IDs)
+ // for function `FuncName`.
+ SmallVector<CallsiteID>
getPrefetchTargetsForFunction(StringRef FuncName) const;
private:
@@ -213,7 +210,7 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
const UniqueBBID &DestBBID) const;
- SmallVector<SubblockID>
+ SmallVector<CallsiteID>
getPrefetchTargetsForFunction(StringRef FuncName) const;
// Initializes the FunctionNameToDIFilename map for the current module and
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index a13fcb2bb841d..6f48e36b55660 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -229,12 +229,10 @@ class MachineBasicBlock
/// is only computed once and is cached.
mutable MCSymbol *CachedMCSymbol = nullptr;
- /// Contains the subblock indices in this block that are targets of code
- /// prefetching. The subblock indexed `i` specifies that region after the
- /// `i`th call (or the beginning of the block if `i==0`) to before the`i+1`th
- /// callsite (or the end of the block). The prefetch target is always the
- /// beginning of the subblock.
- SmallVector<unsigned> PrefetchTargetSubblockIndexes;
+ /// Contains the callsite indices in this block that are targets of code
+ /// prefetching. The index `i` specifies the `i`th call, with `-1`
+ /// representing the beginning of the block.
+ SmallVector<int> PrefetchTargetCallsiteIndexes;
/// Cached MCSymbol for this block (used if IsEHContTarget).
mutable MCSymbol *CachedEHContMCSymbol = nullptr;
@@ -717,12 +715,12 @@ class MachineBasicBlock
std::optional<UniqueBBID> getBBID() const { return BBID; }
- const SmallVector<unsigned> &getPrefetchTargetSubblockIndexes() const {
- return PrefetchTargetSubblockIndexes;
+ const SmallVector<int> &getPrefetchTargetCallsiteIndexes() const {
+ return PrefetchTargetCallsiteIndexes;
}
- void setPrefetchTargetSubblockIndexes(const SmallVector<unsigned> &V) {
- PrefetchTargetSubblockIndexes = V;
+ void setPrefetchTargetCallsiteIndexes(const SmallVector<int> &V) {
+ PrefetchTargetCallsiteIndexes = V;
}
/// Returns the section ID of this basic block.
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index fb250c4b5308a..1fb1bd51f6d31 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1986,32 +1986,31 @@ void AsmPrinter::emitFunctionBody() {
emitBasicBlockStart(MBB);
DenseMap<StringRef, unsigned> MnemonicCounts;
- SmallVector<unsigned> PrefetchTargets =
- MBB.getPrefetchTargetSubblockIndexes();
+ SmallVector<int> PrefetchTargets =
+ MBB.getPrefetchTargetCallsiteIndexes();
auto PrefetchTargetIt = PrefetchTargets.begin();
- unsigned NumCalls = 0;
+ int CurrentCallsiteIndex = -1;
// Helper to emit a symbol for the prefetch target and proceed to the next
// one.
auto EmitPrefetchTargetSymbolIfNeeded = [&]() {
- if (PrefetchTargetIt == PrefetchTargets.end())
- return;
- if (NumCalls < *PrefetchTargetIt)
- return;
- MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
- Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") +
- utostr(MBB.getBBID()->BaseID) + Twine("_") +
- utostr(*PrefetchTargetIt));
- // If the function is weak-linkage it may be replaced by a strong version,
- // in which case the prefetch targets should also be replaced.
- OutStreamer->emitSymbolAttribute(
- PrefetchTargetSymbol,
- MF->getFunction().isWeakForLinker() ? MCSA_Weak : MCSA_Global);
- OutStreamer->emitLabel(PrefetchTargetSymbol);
- ++PrefetchTargetIt;
+ if (PrefetchTargetIt != PrefetchTargets.end() &&
+ *PrefetchTargetIt == CurrentCallsiteIndex) {
+ MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
+ Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") +
+ utostr(MBB.getBBID()->BaseID) + Twine("_") + utostr(static_cast<unsigned>(*PrefetchTargetIt + 1)));
+ // If the function is weak-linkage it may be replaced by a strong
+ // version, in which case the prefetch targets should also be replaced.
+ OutStreamer->emitSymbolAttribute(
+ PrefetchTargetSymbol,
+ MF->getFunction().isWeakForLinker() ? MCSA_Weak : MCSA_Global);
+ OutStreamer->emitLabel(PrefetchTargetSymbol);
+ ++PrefetchTargetIt;
+ }
};
for (auto &MI : MBB) {
EmitPrefetchTargetSymbolIfNeeded();
+
// Print the assembly for the instruction.
if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() &&
!MI.isDebugInstr()) {
@@ -2152,7 +2151,7 @@ void AsmPrinter::emitFunctionBody() {
if (MI.isCall()) {
if (MF->getTarget().Options.BBAddrMap)
OutStreamer->emitLabel(createCallsiteEndSymbol(MBB));
- ++NumCalls;
+ CurrentCallsiteIndex++;
}
if (TM.Options.EmitCallGraphSection && MI.isCall())
@@ -2165,8 +2164,7 @@ void AsmPrinter::emitFunctionBody() {
for (auto &Handler : Handlers)
Handler->endInstruction();
}
- // If the block ends with a call, we may need to emit a prefetch target
- // at the end.
+ // Emit the last prefetch target in case the last instruction was a call.
EmitPrefetchTargetSymbolIfNeeded();
// We must emit temporary symbol for the end of this basic block, if either
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 05b6c1c3917e5..0f440d6a53612 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -93,7 +93,7 @@ uint64_t BasicBlockSectionsProfileReader::getEdgeCount(
return EdgeIt->second;
}
-SmallVector<SubblockID>
+SmallVector<CallsiteID>
BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction(
StringRef FuncName) const {
return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName))
@@ -155,6 +155,35 @@ BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction(
// +-->: 5 :
// ....
// ****************************************************************************
+// This profile can also specify prefetch targets (starting with 't') which
+// instruct the compiler to emit a prefetch symbol for the given target.
+// A prefetch target is specified by a pair "<bbid>,<subblock_index>" where
+// bbid specifies the target basic block and subblock_index is a zero-based
+// index. Subblock 0 refers to the region at the beginning of the block up to
+// the first callsite. Subblock `i > 0` refers to the region immediately after
+// the `i`-th callsite up to the `i+1`-th callsite (or the end of the block).
+// The prefetch target is always emitted at the beginning of the subblock.
+// This is the beginning of the basic block for `i = 0` and immediately after
+// the `i`-th call for every `i > 0`.
+//
+// Example: A basic block in function "foo" with BBID 10 and two call instructions (call_A, call_B).
+// This block is conceptually split into subblocks, with the prefetch target
+// symbol emitted at the beginning of each subblock.
+//
+// +----------------------------------+
+// | __llvm_prefetch_target_foo_10_0: | <- Subblock 0 (before call_A)
+// | Instruction 1 |
+// | Instruction 2 |
+// | call_A (Callsite 0) |
+// | __llvm_prefetch_target_foo_10_1: | <--- Subblock 1 (after call_A,
+// | | before call_B)
+// | Instruction 3 |
+// | call_B (Callsite 1) |
+// | __llvm_prefetch_target_foo_10_2: | <--- Subblock 2 (after call_B,
+// | | before call_C)
+// | Instruction 4 |
+// +----------------------------------+
+//
Error BasicBlockSectionsProfileReader::ReadV1Profile() {
auto FI = ProgramPathAndClusterInfo.end();
@@ -315,7 +344,7 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
}
continue;
}
- case 't': { // Prefetch target specifier.
+ case 't': { // Callsite target specifier.
// Skip the profile when we the profile iterator (FI) refers to the
// past-the-end element.
if (FI == ProgramPathAndClusterInfo.end())
@@ -324,16 +353,16 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
Values[0].split(PrefetchTargetStr, ',');
if (PrefetchTargetStr.size() != 2)
return createProfileParseError(
- Twine("Prefetch target target expected: ") + Values[0]);
+ Twine("Callsite target expected: ") + Values[0]);
auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]);
if (!TargetBBID)
return TargetBBID.takeError();
- unsigned long long TargetCallsiteIndex;
- if (getAsUnsignedInteger(PrefetchTargetStr[1], 10, TargetCallsiteIndex))
- return createProfileParseError(Twine("unsigned integer expected: '") +
+ long long CallsiteIndex;
+ if (getAsSignedInteger(PrefetchTargetStr[1], 10, CallsiteIndex))
+ return createProfileParseError(Twine("signed integer expected: '") +
PrefetchTargetStr[1]);
FI->second.PrefetchTargets.push_back(
- SubblockID{*TargetBBID, static_cast<unsigned>(TargetCallsiteIndex)});
+ CallsiteID{*TargetBBID, static_cast<int>(CallsiteIndex - 1)});
continue;
}
default:
@@ -542,7 +571,7 @@ uint64_t BasicBlockSectionsProfileReaderWrapperPass::getEdgeCount(
return BBSPR.getEdgeCount(FuncName, SrcBBID, SinkBBID);
}
-SmallVector<SubblockID>
+SmallVector<CallsiteID>
BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction(
StringRef FuncName) const {
return BBSPR.getPrefetchTargetsForFunction(FuncName);
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 57037fd818479..d4c25c22417b5 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -70,17 +70,17 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
return false;
// Set each block's prefetch targets so AsmPrinter can emit a special symbol
// there.
- SmallVector<SubblockID> PrefetchTargets =
+ SmallVector<CallsiteID> PrefetchTargets =
getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
.getPrefetchTargetsForFunction(MF.getName());
- DenseMap<UniqueBBID, SmallVector<unsigned>> PrefetchTargetsByBBID;
+ DenseMap<UniqueBBID, SmallVector<int>> PrefetchTargetsByBBID;
for (const auto &Target : PrefetchTargets)
- PrefetchTargetsByBBID[Target.BBID].push_back(Target.SubblockIndex);
+ PrefetchTargetsByBBID[Target.BBID].push_back(Target.CallsiteIndex);
for (auto &MBB : MF) {
auto R = PrefetchTargetsByBBID.find(*MBB.getBBID());
if (R == PrefetchTargetsByBBID.end())
continue;
- MBB.setPrefetchTargetSubblockIndexes(R->second);
+ MBB.setPrefetchTargetCallsiteIndexes(R->second);
}
return false;
}
diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
index 280bfef1c79b6..8e23a30e273b5 100644
--- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
+++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
@@ -7,6 +7,7 @@
; RUN: echo 't 1,0' >> %t
; RUN: echo 't 1,1' >> %t
; RUN: echo 't 2,1' >> %t
+; RUN: echo 't 4,0' >> %t
; RUN: echo 'f _Z3barv' >> %t
; RUN: echo 't 0,0' >> %t
; RUN: echo 't 21,1' >> %t
@@ -48,6 +49,10 @@ define i32 @_Z3foob(i1 zeroext %0) nounwind {
13: ; preds = %11, %9
%14 = load i32, ptr %2, align 4
ret i32 %14
+; CHECK: .LBB0_3:
+; CHECK-NEXT: .globl __llvm_prefetch_target__Z3foob_4_0
+; CHECK-NEXT: __llvm_prefetch_target__Z3foob_4_0:
+
}
define weak i32 @_Z3barv() nounwind {
>From d93a5ecfd7cf0603f6f1c30a6b37d487251f5c88 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 19 Nov 2025 20:38:13 +0000
Subject: [PATCH 20/24] clang-format.
---
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 8 ++++----
llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 11 ++++++-----
2 files changed, 10 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 1fb1bd51f6d31..f87896c03536a 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1986,18 +1986,18 @@ void AsmPrinter::emitFunctionBody() {
emitBasicBlockStart(MBB);
DenseMap<StringRef, unsigned> MnemonicCounts;
- SmallVector<int> PrefetchTargets =
- MBB.getPrefetchTargetCallsiteIndexes();
+ SmallVector<int> PrefetchTargets = MBB.getPrefetchTargetCallsiteIndexes();
auto PrefetchTargetIt = PrefetchTargets.begin();
int CurrentCallsiteIndex = -1;
// Helper to emit a symbol for the prefetch target and proceed to the next
// one.
auto EmitPrefetchTargetSymbolIfNeeded = [&]() {
if (PrefetchTargetIt != PrefetchTargets.end() &&
- *PrefetchTargetIt == CurrentCallsiteIndex) {
+ *PrefetchTargetIt == CurrentCallsiteIndex) {
MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") +
- utostr(MBB.getBBID()->BaseID) + Twine("_") + utostr(static_cast<unsigned>(*PrefetchTargetIt + 1)));
+ utostr(MBB.getBBID()->BaseID) + Twine("_") +
+ utostr(static_cast<unsigned>(*PrefetchTargetIt + 1)));
// If the function is weak-linkage it may be replaced by a strong
// version, in which case the prefetch targets should also be replaced.
OutStreamer->emitSymbolAttribute(
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 0f440d6a53612..708080d7bbf0a 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -166,9 +166,10 @@ BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction(
// This is the beginning of the basic block for `i = 0` and immediately after
// the `i`-th call for every `i > 0`.
//
-// Example: A basic block in function "foo" with BBID 10 and two call instructions (call_A, call_B).
-// This block is conceptually split into subblocks, with the prefetch target
-// symbol emitted at the beginning of each subblock.
+// Example: A basic block in function "foo" with BBID 10 and two call
+// instructions (call_A, call_B). This block is conceptually split into
+// subblocks, with the prefetch target symbol emitted at the beginning of each
+// subblock.
//
// +----------------------------------+
// | __llvm_prefetch_target_foo_10_0: | <- Subblock 0 (before call_A)
@@ -352,8 +353,8 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
SmallVector<StringRef, 2> PrefetchTargetStr;
Values[0].split(PrefetchTargetStr, ',');
if (PrefetchTargetStr.size() != 2)
- return createProfileParseError(
- Twine("Callsite target expected: ") + Values[0]);
+ return createProfileParseError(Twine("Callsite target expected: ") +
+ Values[0]);
auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]);
if (!TargetBBID)
return TargetBBID.takeError();
>From 7cb4f6be1f9a0dce4592e6db859fb84773bb5e06 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 19 Nov 2025 21:00:00 +0000
Subject: [PATCH 21/24] Change to using unsigned values for CallsiteIndex
---
.../llvm/CodeGen/BasicBlockSectionsProfileReader.h | 7 ++++---
llvm/include/llvm/CodeGen/MachineBasicBlock.h | 11 ++++++-----
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 8 ++++----
llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 6 +++---
llvm/lib/CodeGen/InsertCodePrefetch.cpp | 7 ++++++-
5 files changed, 23 insertions(+), 16 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 20e1b7ab68bbe..161a810298d69 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -42,11 +42,12 @@ struct BBClusterInfo {
unsigned PositionInCluster;
};
-// The prefetch symbol is emitted immediately after the call of the given index
-// in block `BBID` (or at the beginning of the block if CallsiteIndex is -1).
+// The prefetch symbol is emitted immediately after the call of the given index,
+// in block `BBID` (First call has an index of 1). Zero callsite index means the
+// start of the block.
struct CallsiteID {
UniqueBBID BBID;
- int CallsiteIndex;
+ unsigned CallsiteIndex;
};
// This represents the raw input profile for one function.
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 6f48e36b55660..48248bd0461bc 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -230,9 +230,10 @@ class MachineBasicBlock
mutable MCSymbol *CachedMCSymbol = nullptr;
/// Contains the callsite indices in this block that are targets of code
- /// prefetching. The index `i` specifies the `i`th call, with `-1`
- /// representing the beginning of the block.
- SmallVector<int> PrefetchTargetCallsiteIndexes;
+ /// prefetching. The index `i` specifies the `i`th call, with zero
+ /// representing the beginning of the block and ` representing the first call.
+ /// Must be in ascending order and without duplicates.
+ SmallVector<unsigned> PrefetchTargetCallsiteIndexes;
/// Cached MCSymbol for this block (used if IsEHContTarget).
mutable MCSymbol *CachedEHContMCSymbol = nullptr;
@@ -715,11 +716,11 @@ class MachineBasicBlock
std::optional<UniqueBBID> getBBID() const { return BBID; }
- const SmallVector<int> &getPrefetchTargetCallsiteIndexes() const {
+ const SmallVector<unsigned> &getPrefetchTargetCallsiteIndexes() const {
return PrefetchTargetCallsiteIndexes;
}
- void setPrefetchTargetCallsiteIndexes(const SmallVector<int> &V) {
+ void setPrefetchTargetCallsiteIndexes(const SmallVector<unsigned> &V) {
PrefetchTargetCallsiteIndexes = V;
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index f87896c03536a..20e3c63b163c0 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1986,14 +1986,14 @@ void AsmPrinter::emitFunctionBody() {
emitBasicBlockStart(MBB);
DenseMap<StringRef, unsigned> MnemonicCounts;
- SmallVector<int> PrefetchTargets = MBB.getPrefetchTargetCallsiteIndexes();
+ SmallVector<unsigned> PrefetchTargets = MBB.getPrefetchTargetCallsiteIndexes();
auto PrefetchTargetIt = PrefetchTargets.begin();
- int CurrentCallsiteIndex = -1;
+ unsigned LastCallsiteIndex = 0;
// Helper to emit a symbol for the prefetch target and proceed to the next
// one.
auto EmitPrefetchTargetSymbolIfNeeded = [&]() {
if (PrefetchTargetIt != PrefetchTargets.end() &&
- *PrefetchTargetIt == CurrentCallsiteIndex) {
+ *PrefetchTargetIt == LastCallsiteIndex) {
MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") +
utostr(MBB.getBBID()->BaseID) + Twine("_") +
@@ -2151,7 +2151,7 @@ void AsmPrinter::emitFunctionBody() {
if (MI.isCall()) {
if (MF->getTarget().Options.BBAddrMap)
OutStreamer->emitLabel(createCallsiteEndSymbol(MBB));
- CurrentCallsiteIndex++;
+ LastCallsiteIndex++;
}
if (TM.Options.EmitCallGraphSection && MI.isCall())
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 708080d7bbf0a..8762f982f72ea 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -358,12 +358,12 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]);
if (!TargetBBID)
return TargetBBID.takeError();
- long long CallsiteIndex;
- if (getAsSignedInteger(PrefetchTargetStr[1], 10, CallsiteIndex))
+ unsigned long long CallsiteIndex;
+ if (getAsUnsignedInteger(PrefetchTargetStr[1], 10, CallsiteIndex))
return createProfileParseError(Twine("signed integer expected: '") +
PrefetchTargetStr[1]);
FI->second.PrefetchTargets.push_back(
- CallsiteID{*TargetBBID, static_cast<int>(CallsiteIndex - 1)});
+ CallsiteID{*TargetBBID, static_cast<unsigned>(CallsiteIndex)});
continue;
}
default:
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index d4c25c22417b5..5c3055f4ca1ea 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -73,9 +73,14 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
SmallVector<CallsiteID> PrefetchTargets =
getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
.getPrefetchTargetsForFunction(MF.getName());
- DenseMap<UniqueBBID, SmallVector<int>> PrefetchTargetsByBBID;
+ DenseMap<UniqueBBID, SmallVector<unsigned>> PrefetchTargetsByBBID;
for (const auto &Target : PrefetchTargets)
PrefetchTargetsByBBID[Target.BBID].push_back(Target.CallsiteIndex);
+ // Sort and uniquify the callsite indices for every block.
+ for (auto &[K, V]: PrefetchTargetsByBBID) {
+ llvm::sort(V);
+ V.erase(llvm::unique(V), V.end());
+ }
for (auto &MBB : MF) {
auto R = PrefetchTargetsByBBID.find(*MBB.getBBID());
if (R == PrefetchTargetsByBBID.end())
>From 9fdf7d0b90dcba196bfd3e5de62d1b901a797939 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 19 Nov 2025 21:00:12 +0000
Subject: [PATCH 22/24] clang-format.
---
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 3 ++-
llvm/lib/CodeGen/InsertCodePrefetch.cpp | 2 +-
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 20e3c63b163c0..d9b2450cb8a6f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1986,7 +1986,8 @@ void AsmPrinter::emitFunctionBody() {
emitBasicBlockStart(MBB);
DenseMap<StringRef, unsigned> MnemonicCounts;
- SmallVector<unsigned> PrefetchTargets = MBB.getPrefetchTargetCallsiteIndexes();
+ SmallVector<unsigned> PrefetchTargets =
+ MBB.getPrefetchTargetCallsiteIndexes();
auto PrefetchTargetIt = PrefetchTargets.begin();
unsigned LastCallsiteIndex = 0;
// Helper to emit a symbol for the prefetch target and proceed to the next
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 5c3055f4ca1ea..44864cbc99c52 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -77,7 +77,7 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
for (const auto &Target : PrefetchTargets)
PrefetchTargetsByBBID[Target.BBID].push_back(Target.CallsiteIndex);
// Sort and uniquify the callsite indices for every block.
- for (auto &[K, V]: PrefetchTargetsByBBID) {
+ for (auto &[K, V] : PrefetchTargetsByBBID) {
llvm::sort(V);
V.erase(llvm::unique(V), V.end());
}
>From 0c17e45c8973ee66126f326ed85b7788319727a0 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 19 Nov 2025 21:04:07 +0000
Subject: [PATCH 23/24] Fix AsmPrinter.
---
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index d9b2450cb8a6f..a48d333b538ec 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1998,7 +1998,7 @@ void AsmPrinter::emitFunctionBody() {
MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") +
utostr(MBB.getBBID()->BaseID) + Twine("_") +
- utostr(static_cast<unsigned>(*PrefetchTargetIt + 1)));
+ utostr(static_cast<unsigned>(*PrefetchTargetIt)));
// If the function is weak-linkage it may be replaced by a strong
// version, in which case the prefetch targets should also be replaced.
OutStreamer->emitSymbolAttribute(
>From 500b53670976838a277ac617013e8342ae98c65b Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 19 Nov 2025 21:47:28 +0000
Subject: [PATCH 24/24] use -O0
---
.../CodeGen/X86/basic-block-sections-code-prefetch.ll | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
index 8e23a30e273b5..e5778b4b77fc2 100644
--- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
+++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
@@ -7,12 +7,12 @@
; RUN: echo 't 1,0' >> %t
; RUN: echo 't 1,1' >> %t
; RUN: echo 't 2,1' >> %t
-; RUN: echo 't 4,0' >> %t
+; RUN: echo 't 3,0' >> %t
; RUN: echo 'f _Z3barv' >> %t
; RUN: echo 't 0,0' >> %t
; RUN: echo 't 21,1' >> %t
;;
-; RUN: llc < %s -mtriple=x86_64-pc-linux -asm-verbose=false -function-sections -basic-block-sections=%t | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux -asm-verbose=false -function-sections -basic-block-sections=%t -O0 | FileCheck %s
define i32 @_Z3foob(i1 zeroext %0) nounwind {
%2 = alloca i32, align 4
@@ -50,9 +50,8 @@ define i32 @_Z3foob(i1 zeroext %0) nounwind {
%14 = load i32, ptr %2, align 4
ret i32 %14
; CHECK: .LBB0_3:
-; CHECK-NEXT: .globl __llvm_prefetch_target__Z3foob_4_0
-; CHECK-NEXT: __llvm_prefetch_target__Z3foob_4_0:
-
+; CHECK-NEXT: .globl __llvm_prefetch_target__Z3foob_3_0
+; CHECK-NEXT: __llvm_prefetch_target__Z3foob_3_0:
}
define weak i32 @_Z3barv() nounwind {
More information about the llvm-commits
mailing list