[llvm] X86: Add prefetch insertion based on Propeller profile (PR #166324)
Rahman Lavaee via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 18 22:40:29 PST 2025
https://github.com/rlavaee updated https://github.com/llvm/llvm-project/pull/166324
>From c4b73ba7594e81e2c251d88da89fecf4cc7f153f Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Sat, 8 Nov 2025 19:54:21 +0000
Subject: [PATCH 01/23] feat(AsmPrinter): Add support for emitting prefetch
target symbols
---
.../CodeGen/BasicBlockSectionsProfileReader.h | 45 ++++++++++++++++-
llvm/include/llvm/CodeGen/MachineBasicBlock.h | 24 +++++++++
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 50 ++++++++++++++++++-
.../BasicBlockSectionsProfileReader.cpp | 44 ++++++++++++++++
llvm/lib/CodeGen/MachineBasicBlock.cpp | 13 +++++
5 files changed, 174 insertions(+), 2 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index ee1f28377f7e4..5b230db30aec4 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -42,6 +42,17 @@ struct BBClusterInfo {
unsigned PositionInCluster;
};
+struct BBPosition {
+ UniqueBBID BBID;
+ unsigned BBOffset;
+};
+
+struct PrefetchHint {
+ BBPosition SitePosition;
+ StringRef TargetFunctionName;
+ BBPosition TargetPosition;
+};
+
// This represents the raw input profile for one function.
struct FunctionPathAndClusterInfo {
// BB Cluster information specified by `UniqueBBID`s.
@@ -50,9 +61,11 @@ struct FunctionPathAndClusterInfo {
// the edge a -> b (a is not cloned). The index of the path in this vector
// determines the `UniqueBBID::CloneID` of the cloned blocks in that path.
SmallVector<SmallVector<unsigned>> ClonePaths;
+ SmallVector<PrefetchHint> PrefetchHints;
+ DenseSet<BBPosition> PrefetchTargets;
// Node counts for each basic block.
DenseMap<UniqueBBID, uint64_t> NodeCounts;
- // Edge counts for each edge, stored as a nested map.
+ // Edge counts for each edge.
DenseMap<UniqueBBID, DenseMap<UniqueBBID, uint64_t>> EdgeCounts;
// Hash for each basic block. The Hashes are stored for every original block
// (not cloned blocks), hence the map key being unsigned instead of
@@ -60,6 +73,27 @@ struct FunctionPathAndClusterInfo {
DenseMap<unsigned, uint64_t> BBHashes;
};
+// Provides DenseMapInfo BBPosition.
+template <> struct DenseMapInfo<BBPosition> {
+ static inline BBPosition getEmptyKey() {
+ return {DenseMapInfo<UniqueBBID>::getEmptyKey(),
+ DenseMapInfo<unsigned>::getEmptyKey()};
+ }
+ static inline BBPosition getTombstoneKey() {
+ return BBPosition{DenseMapInfo<UniqueBBID>::getTombstoneKey(),
+ DenseMapInfo<unsigned>::getTombstoneKey()};
+ }
+ static unsigned getHashValue(const BBPosition &Val) {
+ std::pair<unsigned, unsigned> PairVal = std::make_pair(
+ DenseMapInfo<UniqueBBID>::getHashValue(Val.BBID), Val.BBOffset);
+ return DenseMapInfo<std::pair<unsigned, unsigned>>::getHashValue(PairVal);
+ }
+ static bool isEqual(const BBPosition &LHS, const BBPosition &RHS) {
+ return DenseMapInfo<UniqueBBID>::isEqual(LHS.BBID, RHS.BBID) &&
+ DenseMapInfo<unsigned>::isEqual(LHS.BBOffset, RHS.BBOffset);
+ }
+};
+
class BasicBlockSectionsProfileReader {
public:
friend class BasicBlockSectionsProfileReaderWrapperPass;
@@ -86,6 +120,11 @@ class BasicBlockSectionsProfileReader {
uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
const UniqueBBID &SinkBBID) const;
+ SmallVector<PrefetchHint>
+ getPrefetchHintsForFunction(StringRef FuncName) const;
+
+ DenseSet<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
+
private:
StringRef getAliasName(StringRef FuncName) const {
auto R = FuncAliasMap.find(FuncName);
@@ -194,6 +233,10 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
const UniqueBBID &DestBBID) const;
+ SmallVector<PrefetchHint>
+ getPrefetchHintsForFunction(StringRef FuncName) const;
+
+ DenseSet<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
// Initializes the FunctionNameToDIFilename map for the current module and
// then reads the profile for the matching functions.
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index fcf7bab09fcff..e6c6bc26ae9e6 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -100,6 +100,12 @@ template <> struct DenseMapInfo<MBBSectionID> {
}
};
+struct PrefetchTarget {
+ StringRef TargetFunction;
+ UniqueBBID TargetBBID;
+ unsigned TargetBBOffset;
+};
+
template <> struct ilist_traits<MachineInstr> {
private:
friend class MachineBasicBlock; // Set by the owning MachineBasicBlock.
@@ -213,6 +219,8 @@ class MachineBasicBlock
/// basic block sections and basic block labels.
std::optional<UniqueBBID> BBID;
+ SmallVector<unsigned> PrefetchTargets;
+
/// With basic block sections, this stores the Section ID of the basic block.
MBBSectionID SectionID{0};
@@ -229,6 +237,8 @@ class MachineBasicBlock
/// is only computed once and is cached.
mutable MCSymbol *CachedMCSymbol = nullptr;
+ mutable SmallVector<MCSymbol *, 4> CallInstSymbols;
+
/// Cached MCSymbol for this block (used if IsEHContTarget).
mutable MCSymbol *CachedEHContMCSymbol = nullptr;
@@ -710,6 +720,14 @@ class MachineBasicBlock
std::optional<UniqueBBID> getBBID() const { return BBID; }
+ const SmallVector<unsigned> &getPrefetchTargets() const {
+ return PrefetchTargets;
+ }
+
+ void setPrefetchTargets(const SmallVector<unsigned> &V) {
+ PrefetchTargets = V;
+ }
+
/// Returns the section ID of this basic block.
MBBSectionID getSectionID() const { return SectionID; }
@@ -1275,6 +1293,12 @@ class MachineBasicBlock
/// Return the MCSymbol for this basic block.
LLVM_ABI MCSymbol *getSymbol() const;
+ MCSymbol *getCallInstSymbol(unsigned CallInstNumber) const;
+
+ const SmallVector<MCSymbol *, 4>& getCallInstSymbols() const {
+ return CallInstSymbols;
+ }
+
/// Return the Windows EH Continuation Symbol for this basic block.
LLVM_ABI MCSymbol *getEHContSymbol() const;
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 3aa245b7f3f1e..a204bba5789a8 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -18,6 +18,7 @@
#include "WasmException.h"
#include "WinCFGuard.h"
#include "WinException.h"
+#include "llvm/Support/SMLoc.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/BitmaskEnum.h"
@@ -178,6 +179,11 @@ static cl::opt<bool> EmitJumpTableSizesSection(
cl::desc("Emit a section containing jump table addresses and sizes"),
cl::Hidden, cl::init(false));
+static cl::opt<bool> InsertNoopsForPrefetch(
+ "insert-noops-for-prefetch",
+ cl::desc("Whether to insert noops instead of prefetches."), cl::init(false),
+ cl::Hidden);
+
// This isn't turned on by default, since several of the scheduling models are
// not completely accurate, and we don't want to be misleading.
static cl::opt<bool> PrintLatency(
@@ -1982,10 +1988,34 @@ void AsmPrinter::emitFunctionBody() {
FunctionCallGraphInfo FuncCGInfo;
const auto &CallSitesInfoMap = MF->getCallSitesInfo();
for (auto &MBB : *MF) {
+ int NextPrefetchTargetIndex = MBB.getPrefetchTargets().empty() ? -1 : 0;
// Print a label for the basic block.
emitBasicBlockStart(MBB);
DenseMap<StringRef, unsigned> MnemonicCounts;
+ unsigned NumCallsInBlock = 0;
for (auto &MI : MBB) {
+ if (NextPrefetchTargetIndex != -1 &&
+ NumCallsInBlock >= MBB.getPrefetchTargets()[NextPrefetchTargetIndex]) {
+
+ MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
+ Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) +
+ Twine("_") +
+ utostr(MBB.getPrefetchTargets()[NextPrefetchTargetIndex]));
+ if (MF->getFunction().isWeakForLinker()) {
+ OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Weak);
+ errs() << "Emitting weak symbol: " << PrefetchTargetSymbol->getName() << "\n";
+ } else {
+ OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Global);
+ errs() << "Emitting global symbol: " << PrefetchTargetSymbol->getName() << "\n";
+ }
+ // OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Extern);
+ // errs() << "Emitting symbol: " << PrefetchTargetSymbol->getName() << "\n";
+ OutStreamer->emitLabel(PrefetchTargetSymbol);
+ ++NextPrefetchTargetIndex;
+ if (NextPrefetchTargetIndex >=
+ static_cast<int>(MBB.getPrefetchTargets().size()))
+ NextPrefetchTargetIndex = -1;
+ }
// Print the assembly for the instruction.
if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() &&
!MI.isDebugInstr()) {
@@ -2099,7 +2129,7 @@ void AsmPrinter::emitFunctionBody() {
break;
}
default:
- emitInstruction(&MI);
+ emitInstruction(&MI);
auto CountInstruction = [&](const MachineInstr &MI) {
// Skip Meta instructions inside bundles.
@@ -2136,6 +2166,24 @@ void AsmPrinter::emitFunctionBody() {
for (auto &Handler : Handlers)
Handler->endInstruction();
}
+ while (NextPrefetchTargetIndex != -1) {
+ MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
+ Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) +
+ Twine("_") +
+ utostr(MBB.getPrefetchTargets()[NextPrefetchTargetIndex]));
+ if (MF->getFunction().hasWeakLinkage()) {
+ OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_WeakDefinition);
+ } else {
+ OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Global);
+ }
+ OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Extern);
+ OutStreamer->emitLabel(PrefetchTargetSymbol);
+ ++NextPrefetchTargetIndex;
+ if (NextPrefetchTargetIndex >=
+ static_cast<int>(MBB.getPrefetchTargets().size()))
+ NextPrefetchTargetIndex = -1;
+ }
+
// We must emit temporary symbol for the end of this basic block, if either
// we have BBLabels enabled or if this basic blocks marks the end of a
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index c234c0f1b0b34..de146e172c174 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -93,6 +93,19 @@ uint64_t BasicBlockSectionsProfileReader::getEdgeCount(
return EdgeIt->second;
}
+SmallVector<PrefetchHint>
+BasicBlockSectionsProfileReader::getPrefetchHintsForFunction(
+ StringRef FuncName) const {
+ return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).PrefetchHints;
+}
+
+DenseSet<BBPosition>
+BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction(
+ StringRef FuncName) const {
+ return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName))
+ .PrefetchTargets;
+}
+
// Reads the version 1 basic block sections profile. Profile for each function
// is encoded as follows:
// m <module_name>
@@ -308,6 +321,25 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
}
continue;
}
+ case 't': { // Prefetch target specifier.
+ // Skip the profile when we the profile iterator (FI) refers to the
+ // past-the-end element.
+ if (FI == ProgramPathAndClusterInfo.end())
+ continue;
+ assert(Values.size() == 1);
+ SmallVector<StringRef, 2> PrefetchTargetStr;
+ Values[0].split(PrefetchTargetStr, '@');
+ assert(PrefetchTargetStr.size() == 2);
+ auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]);
+ if (!TargetBBID)
+ return TargetBBID.takeError();
+ unsigned long long TargetBBOffset;
+ if (getAsUnsignedInteger(PrefetchTargetStr[1], 10, TargetBBOffset))
+ return createProfileParseError(Twine("unsigned integer expected: '") +
+ PrefetchTargetStr[1]);
+ FI->second.PrefetchTargets.insert(BBPosition{*TargetBBID, static_cast<unsigned>(TargetBBOffset)});
+ continue;
+ }
default:
return createProfileParseError(Twine("invalid specifier: '") +
Twine(Specifier) + "'");
@@ -514,6 +546,18 @@ uint64_t BasicBlockSectionsProfileReaderWrapperPass::getEdgeCount(
return BBSPR.getEdgeCount(FuncName, SrcBBID, SinkBBID);
}
+SmallVector<PrefetchHint>
+BasicBlockSectionsProfileReaderWrapperPass::getPrefetchHintsForFunction(
+ StringRef FuncName) const {
+ return BBSPR.getPrefetchHintsForFunction(FuncName);
+}
+
+DenseSet<BBPosition>
+BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction(
+ StringRef FuncName) const {
+ return BBSPR.getPrefetchTargetsForFunction(FuncName);
+}
+
BasicBlockSectionsProfileReader &
BasicBlockSectionsProfileReaderWrapperPass::getBBSPR() {
return BBSPR;
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index ba0b025167307..19b218a2879dd 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -90,6 +90,19 @@ MCSymbol *MachineBasicBlock::getSymbol() const {
return CachedMCSymbol;
}
+MCSymbol *MachineBasicBlock::getCallInstSymbol(unsigned CallInstNumber) const {
+ if (CallInstSymbols.size() <= CallInstNumber) {
+ const MachineFunction *MF = getParent();
+ MCContext &Ctx = MF->getContext();
+ CallInstSymbols.resize(CallInstNumber + 1);
+ CallInstSymbols[CallInstNumber] = Ctx.createBlockSymbol(
+ "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber()) + "_" +
+ Twine(CallInstNumber),
+ /*AlwaysEmit=*/true);
+ }
+ return CallInstSymbols[CallInstNumber];
+}
+
MCSymbol *MachineBasicBlock::getEHContSymbol() const {
if (!CachedEHContMCSymbol) {
const MachineFunction *MF = getParent();
>From 790f77925bcee9aa03ef0ddbf466132d59e5b933 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Tue, 11 Nov 2025 21:30:47 +0000
Subject: [PATCH 02/23] feat: Add prefetch-profile.txt for testing
---
build-release/prefetch-profile.txt | 3 +++
1 file changed, 3 insertions(+)
create mode 100644 build-release/prefetch-profile.txt
diff --git a/build-release/prefetch-profile.txt b/build-release/prefetch-profile.txt
new file mode 100644
index 0000000000000..294f57a46920b
--- /dev/null
+++ b/build-release/prefetch-profile.txt
@@ -0,0 +1,3 @@
+v1
+f f
+t 0 at 1
>From 8ac920dfd4d32a685154242f791e23cfafafcf5a Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 12 Nov 2025 18:58:18 +0000
Subject: [PATCH 03/23] Everything else.
---
build-release/prefetch-profile.txt | 3 --
.../CodeGen/BasicBlockSectionsProfileReader.h | 29 +++----------------
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 12 +++++++-
.../BasicBlockSectionsProfileReader.cpp | 10 +++----
4 files changed, 20 insertions(+), 34 deletions(-)
delete mode 100644 build-release/prefetch-profile.txt
diff --git a/build-release/prefetch-profile.txt b/build-release/prefetch-profile.txt
deleted file mode 100644
index 294f57a46920b..0000000000000
--- a/build-release/prefetch-profile.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-v1
-f f
-t 0 at 1
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 5b230db30aec4..fbf9b89754cd7 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -44,7 +44,7 @@ struct BBClusterInfo {
struct BBPosition {
UniqueBBID BBID;
- unsigned BBOffset;
+ unsigned CallsiteIndex;
};
struct PrefetchHint {
@@ -62,7 +62,7 @@ struct FunctionPathAndClusterInfo {
// determines the `UniqueBBID::CloneID` of the cloned blocks in that path.
SmallVector<SmallVector<unsigned>> ClonePaths;
SmallVector<PrefetchHint> PrefetchHints;
- DenseSet<BBPosition> PrefetchTargets;
+ SmallVector<BBPosition> PrefetchTargets;
// Node counts for each basic block.
DenseMap<UniqueBBID, uint64_t> NodeCounts;
// Edge counts for each edge.
@@ -73,27 +73,6 @@ struct FunctionPathAndClusterInfo {
DenseMap<unsigned, uint64_t> BBHashes;
};
-// Provides DenseMapInfo BBPosition.
-template <> struct DenseMapInfo<BBPosition> {
- static inline BBPosition getEmptyKey() {
- return {DenseMapInfo<UniqueBBID>::getEmptyKey(),
- DenseMapInfo<unsigned>::getEmptyKey()};
- }
- static inline BBPosition getTombstoneKey() {
- return BBPosition{DenseMapInfo<UniqueBBID>::getTombstoneKey(),
- DenseMapInfo<unsigned>::getTombstoneKey()};
- }
- static unsigned getHashValue(const BBPosition &Val) {
- std::pair<unsigned, unsigned> PairVal = std::make_pair(
- DenseMapInfo<UniqueBBID>::getHashValue(Val.BBID), Val.BBOffset);
- return DenseMapInfo<std::pair<unsigned, unsigned>>::getHashValue(PairVal);
- }
- static bool isEqual(const BBPosition &LHS, const BBPosition &RHS) {
- return DenseMapInfo<UniqueBBID>::isEqual(LHS.BBID, RHS.BBID) &&
- DenseMapInfo<unsigned>::isEqual(LHS.BBOffset, RHS.BBOffset);
- }
-};
-
class BasicBlockSectionsProfileReader {
public:
friend class BasicBlockSectionsProfileReaderWrapperPass;
@@ -123,7 +102,7 @@ class BasicBlockSectionsProfileReader {
SmallVector<PrefetchHint>
getPrefetchHintsForFunction(StringRef FuncName) const;
- DenseSet<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
+ DenseMap<UniqueBBID, SmallVector<unsigned>> getPrefetchTargetsForFunction(StringRef FuncName) const;
private:
StringRef getAliasName(StringRef FuncName) const {
@@ -236,7 +215,7 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
SmallVector<PrefetchHint>
getPrefetchHintsForFunction(StringRef FuncName) const;
- DenseSet<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
+ DenseMap<UniqueBBID, SmallVector<unsigned>> getPrefetchTargetsForFunction(StringRef FuncName) const;
// Initializes the FunctionNameToDIFilename map for the current module and
// then reads the profile for the matching functions.
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index a204bba5789a8..90445fedd5db3 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -485,6 +485,7 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
if (EmitBBHash)
AU.addRequired<MachineBlockHashInfo>();
+ AU.addUsedIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
}
bool AsmPrinter::doInitialization(Module &M) {
@@ -1987,7 +1988,16 @@ void AsmPrinter::emitFunctionBody() {
FunctionCallGraphInfo FuncCGInfo;
const auto &CallSitesInfoMap = MF->getCallSitesInfo();
- for (auto &MBB : *MF) {
+ DenseMap<UniqueBBID, SmallVector<unsigned>> FunctionPrefetchTargets;
+ if (auto *BBSPRPass =
+ getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>()) {
+ FunctionPrefetchTargets = BBSPRPass->getBBSPR().getPrefetchTargetsForFunction(MF->getName());
+}
+
+ for (auto &MBB : *MF) {
+
+ SmallVector<unsigned> BBPrefetchTargets;
+ = FunctionPrefetchTargets.lookup(MBB.g);
int NextPrefetchTargetIndex = MBB.getPrefetchTargets().empty() ? -1 : 0;
// Print a label for the basic block.
emitBasicBlockStart(MBB);
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index de146e172c174..c4784a6039c09 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -99,7 +99,7 @@ BasicBlockSectionsProfileReader::getPrefetchHintsForFunction(
return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).PrefetchHints;
}
-DenseSet<BBPosition>
+SmallVector<BBPosition>
BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction(
StringRef FuncName) const {
return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName))
@@ -333,11 +333,11 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]);
if (!TargetBBID)
return TargetBBID.takeError();
- unsigned long long TargetBBOffset;
- if (getAsUnsignedInteger(PrefetchTargetStr[1], 10, TargetBBOffset))
+ unsigned long long TargetCallsiteIndex;
+ if (getAsUnsignedInteger(PrefetchTargetStr[1], 10, TargetCallsiteIndex))
return createProfileParseError(Twine("unsigned integer expected: '") +
PrefetchTargetStr[1]);
- FI->second.PrefetchTargets.insert(BBPosition{*TargetBBID, static_cast<unsigned>(TargetBBOffset)});
+ FI->second.PrefetchTargets.push_back(BBPosition{*TargetBBID, static_cast<unsigned>(TargetCallsiteIndex)});
continue;
}
default:
@@ -552,7 +552,7 @@ BasicBlockSectionsProfileReaderWrapperPass::getPrefetchHintsForFunction(
return BBSPR.getPrefetchHintsForFunction(FuncName);
}
-DenseSet<BBPosition>
+SmallVector<BBPosition>
BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction(
StringRef FuncName) const {
return BBSPR.getPrefetchTargetsForFunction(FuncName);
>From d0ab0b5ab472f7de0da5648391d15239c73b2888 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 12 Nov 2025 18:58:27 +0000
Subject: [PATCH 04/23] Add test.
---
llvm/test/CodeGen/X86/prefetch-symbols.ll | 42 +++++++++++++++++++++++
1 file changed, 42 insertions(+)
create mode 100644 llvm/test/CodeGen/X86/prefetch-symbols.ll
diff --git a/llvm/test/CodeGen/X86/prefetch-symbols.ll b/llvm/test/CodeGen/X86/prefetch-symbols.ll
new file mode 100644
index 0000000000000..979db7942ff2c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/prefetch-symbols.ll
@@ -0,0 +1,42 @@
+;; Check that specifying the function in the basic block sections profile
+;; without any other directives is a noop.
+;;
+;; Specify the bb sections profile:
+; RUN: echo 'v1' > %t
+; RUN: echo 'f _Z3foob' >> %t
+; RUN: echo 't 0 at 0' >> %t
+;;
+; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t | FileCheck
+
+define i32 @_Z3foob(i1 zeroext %0) nounwind {
+ %2 = alloca i32, align 4
+ %3 = alloca i8, align 1
+ %4 = zext i1 %0 to i8
+ store i8 %4, ptr %3, align 1
+ %5 = load i8, ptr %3, align 1
+ %6 = trunc i8 %5 to i1
+ %7 = zext i1 %6 to i32
+ %8 = icmp sgt i32 %7, 0
+ br i1 %8, label %9, label %11
+
+9: ; preds = %1
+ %10 = call i32 @_Z3barv()
+ store i32 %10, ptr %2, align 4
+ br label %13
+
+11: ; preds = %1
+ %12 = call i32 @_Z3bazv()
+ store i32 %12, ptr %2, align 4
+ br label %13
+
+13: ; preds = %11, %9
+ %14 = load i32, ptr %2, align 4
+ ret i32 %14
+}
+
+declare i32 @_Z3barv() #1
+declare i32 @_Z3bazv() #1
+
+
+; CHECK: _Z3foob
+; CHECK: llvm_prefetch_target
>From 988fab7212cbeadfb021917f35c3da6b97f2903e Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 00:10:17 +0000
Subject: [PATCH 05/23] Fix everything
---
.../CodeGen/BasicBlockSectionsProfileReader.h | 4 +-
llvm/include/llvm/CodeGen/MachineBasicBlock.h | 24 +----
llvm/include/llvm/CodeGen/Passes.h | 2 +
llvm/include/llvm/InitializePasses.h | 1 +
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 73 ++++----------
llvm/lib/CodeGen/CMakeLists.txt | 1 +
llvm/lib/CodeGen/InsertCodePrefetch.cpp | 96 +++++++++++++++++++
llvm/lib/CodeGen/MachineBasicBlock.cpp | 13 ---
llvm/lib/CodeGen/TargetPassConfig.cpp | 1 +
9 files changed, 127 insertions(+), 88 deletions(-)
create mode 100644 llvm/lib/CodeGen/InsertCodePrefetch.cpp
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index fbf9b89754cd7..1fd904d64ab9d 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -102,7 +102,7 @@ class BasicBlockSectionsProfileReader {
SmallVector<PrefetchHint>
getPrefetchHintsForFunction(StringRef FuncName) const;
- DenseMap<UniqueBBID, SmallVector<unsigned>> getPrefetchTargetsForFunction(StringRef FuncName) const;
+ SmallVector<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
private:
StringRef getAliasName(StringRef FuncName) const {
@@ -215,7 +215,7 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
SmallVector<PrefetchHint>
getPrefetchHintsForFunction(StringRef FuncName) const;
- DenseMap<UniqueBBID, SmallVector<unsigned>> getPrefetchTargetsForFunction(StringRef FuncName) const;
+ SmallVector<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
// Initializes the FunctionNameToDIFilename map for the current module and
// then reads the profile for the matching functions.
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index e6c6bc26ae9e6..4be008bbf4bf1 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -100,12 +100,6 @@ template <> struct DenseMapInfo<MBBSectionID> {
}
};
-struct PrefetchTarget {
- StringRef TargetFunction;
- UniqueBBID TargetBBID;
- unsigned TargetBBOffset;
-};
-
template <> struct ilist_traits<MachineInstr> {
private:
friend class MachineBasicBlock; // Set by the owning MachineBasicBlock.
@@ -219,8 +213,6 @@ class MachineBasicBlock
/// basic block sections and basic block labels.
std::optional<UniqueBBID> BBID;
- SmallVector<unsigned> PrefetchTargets;
-
/// With basic block sections, this stores the Section ID of the basic block.
MBBSectionID SectionID{0};
@@ -237,7 +229,7 @@ class MachineBasicBlock
/// is only computed once and is cached.
mutable MCSymbol *CachedMCSymbol = nullptr;
- mutable SmallVector<MCSymbol *, 4> CallInstSymbols;
+ SmallVector<unsigned> PrefetchTargetIndexes;
/// Cached MCSymbol for this block (used if IsEHContTarget).
mutable MCSymbol *CachedEHContMCSymbol = nullptr;
@@ -720,12 +712,12 @@ class MachineBasicBlock
std::optional<UniqueBBID> getBBID() const { return BBID; }
- const SmallVector<unsigned> &getPrefetchTargets() const {
- return PrefetchTargets;
+ const SmallVector<unsigned> &getPrefetchTargetIndexes() const {
+ return PrefetchTargetIndexes;
}
- void setPrefetchTargets(const SmallVector<unsigned> &V) {
- PrefetchTargets = V;
+ void setPrefetchTargetIndexes(const SmallVector<unsigned> &V) {
+ PrefetchTargetIndexes = V;
}
/// Returns the section ID of this basic block.
@@ -1293,12 +1285,6 @@ class MachineBasicBlock
/// Return the MCSymbol for this basic block.
LLVM_ABI MCSymbol *getSymbol() const;
- MCSymbol *getCallInstSymbol(unsigned CallInstNumber) const;
-
- const SmallVector<MCSymbol *, 4>& getCallInstSymbols() const {
- return CallInstSymbols;
- }
-
/// Return the Windows EH Continuation Symbol for this basic block.
LLVM_ABI MCSymbol *getEHContSymbol() const;
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index a8525554b142e..f148d050a5772 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -69,6 +69,8 @@ LLVM_ABI MachineFunctionPass *createBasicBlockSectionsPass();
LLVM_ABI MachineFunctionPass *createBasicBlockPathCloningPass();
+LLVM_ABI MachineFunctionPass *createInsertCodePrefetchPass();
+
/// createMachineBlockHashInfoPass - This pass computes basic block hashes.
LLVM_ABI MachineFunctionPass *createMachineBlockHashInfoPass();
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 10a4d8525a9e8..35d5ab14dc226 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -56,6 +56,7 @@ LLVM_ABI void initializeAssignmentTrackingAnalysisPass(PassRegistry &);
LLVM_ABI void initializeAssumptionCacheTrackerPass(PassRegistry &);
LLVM_ABI void initializeAtomicExpandLegacyPass(PassRegistry &);
LLVM_ABI void initializeBasicBlockPathCloningPass(PassRegistry &);
+LLVM_ABI void initializeInsertCodePrefetchPass(PassRegistry &);
LLVM_ABI void
initializeBasicBlockSectionsProfileReaderWrapperPassPass(PassRegistry &);
LLVM_ABI void initializeBasicBlockSectionsPass(PassRegistry &);
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 90445fedd5db3..933fe6f7d177f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -179,11 +179,6 @@ static cl::opt<bool> EmitJumpTableSizesSection(
cl::desc("Emit a section containing jump table addresses and sizes"),
cl::Hidden, cl::init(false));
-static cl::opt<bool> InsertNoopsForPrefetch(
- "insert-noops-for-prefetch",
- cl::desc("Whether to insert noops instead of prefetches."), cl::init(false),
- cl::Hidden);
-
// This isn't turned on by default, since several of the scheduling models are
// not completely accurate, and we don't want to be misleading.
static cl::opt<bool> PrintLatency(
@@ -485,7 +480,6 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
if (EmitBBHash)
AU.addRequired<MachineBlockHashInfo>();
- AU.addUsedIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
}
bool AsmPrinter::doInitialization(Module &M) {
@@ -1988,44 +1982,29 @@ void AsmPrinter::emitFunctionBody() {
FunctionCallGraphInfo FuncCGInfo;
const auto &CallSitesInfoMap = MF->getCallSitesInfo();
- DenseMap<UniqueBBID, SmallVector<unsigned>> FunctionPrefetchTargets;
- if (auto *BBSPRPass =
- getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>()) {
- FunctionPrefetchTargets = BBSPRPass->getBBSPR().getPrefetchTargetsForFunction(MF->getName());
-}
for (auto &MBB : *MF) {
-
- SmallVector<unsigned> BBPrefetchTargets;
- = FunctionPrefetchTargets.lookup(MBB.g);
- int NextPrefetchTargetIndex = MBB.getPrefetchTargets().empty() ? -1 : 0;
// Print a label for the basic block.
emitBasicBlockStart(MBB);
DenseMap<StringRef, unsigned> MnemonicCounts;
- unsigned NumCallsInBlock = 0;
- for (auto &MI : MBB) {
- if (NextPrefetchTargetIndex != -1 &&
- NumCallsInBlock >= MBB.getPrefetchTargets()[NextPrefetchTargetIndex]) {
- MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
+ SmallVector<unsigned> PrefetchTargets = MBB.getPrefetchTargetIndexes();
+ auto PrefetchTargetIt = PrefetchTargets.begin();
+ unsigned NumCalls = 0;
+ auto EmitPrefetchTargetSymbolIfNeeded = [&]() {
+ if (PrefetchTargetIt == PrefetchTargets.end() || NumCalls < *PrefetchTargetIt)
+ return;
+ MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) +
Twine("_") +
- utostr(MBB.getPrefetchTargets()[NextPrefetchTargetIndex]));
- if (MF->getFunction().isWeakForLinker()) {
- OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Weak);
- errs() << "Emitting weak symbol: " << PrefetchTargetSymbol->getName() << "\n";
- } else {
- OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Global);
- errs() << "Emitting global symbol: " << PrefetchTargetSymbol->getName() << "\n";
- }
- // OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Extern);
- // errs() << "Emitting symbol: " << PrefetchTargetSymbol->getName() << "\n";
+ utostr(*PrefetchTargetIt));
+ OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MF->getFunction().isWeakForLinker() ? MCSA_Weak : MCSA_Global);
OutStreamer->emitLabel(PrefetchTargetSymbol);
- ++NextPrefetchTargetIndex;
- if (NextPrefetchTargetIndex >=
- static_cast<int>(MBB.getPrefetchTargets().size()))
- NextPrefetchTargetIndex = -1;
- }
+ ++PrefetchTargetIt;
+ };
+
+ for (auto &MI : MBB) {
+ EmitPrefetchTargetSymbolIfNeeded();
// Print the assembly for the instruction.
if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() &&
!MI.isDebugInstr()) {
@@ -2163,8 +2142,11 @@ void AsmPrinter::emitFunctionBody() {
break;
}
- if (MI.isCall() && MF->getTarget().Options.BBAddrMap)
+ if (MI.isCall()) {
+ if (MF->getTarget().Options.BBAddrMap)
OutStreamer->emitLabel(createCallsiteEndSymbol(MBB));
+ ++NumCalls;
+ }
if (TM.Options.EmitCallGraphSection && MI.isCall())
handleCallsiteForCallgraph(FuncCGInfo, CallSitesInfoMap, MI);
@@ -2176,24 +2158,7 @@ void AsmPrinter::emitFunctionBody() {
for (auto &Handler : Handlers)
Handler->endInstruction();
}
- while (NextPrefetchTargetIndex != -1) {
- MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
- Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) +
- Twine("_") +
- utostr(MBB.getPrefetchTargets()[NextPrefetchTargetIndex]));
- if (MF->getFunction().hasWeakLinkage()) {
- OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_WeakDefinition);
- } else {
- OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Global);
- }
- OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Extern);
- OutStreamer->emitLabel(PrefetchTargetSymbol);
- ++NextPrefetchTargetIndex;
- if (NextPrefetchTargetIndex >=
- static_cast<int>(MBB.getPrefetchTargets().size()))
- NextPrefetchTargetIndex = -1;
- }
-
+ EmitPrefetchTargetSymbolIfNeeded();
// We must emit temporary symbol for the end of this basic block, if either
// we have BBLabels enabled or if this basic blocks marks the end of a
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index 1cf0b4964760b..fcf28247179ca 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -79,6 +79,7 @@ add_llvm_component_library(LLVMCodeGen
IndirectBrExpandPass.cpp
InitUndef.cpp
InlineSpiller.cpp
+ InsertCodePrefetch.cpp
InterferenceCache.cpp
InterleavedAccessPass.cpp
InterleavedLoadCombinePass.cpp
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
new file mode 100644
index 0000000000000..7cb52302ac7db
--- /dev/null
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -0,0 +1,96 @@
+//===-- InsertCodePrefetch.cpp ---=========-----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Prefetch insertion pass implementation.
+//===----------------------------------------------------------------------===//
+/// Prefetch insertion pass.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/BasicBlockSectionUtils.h"
+#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+#define DEBUG_TYPE "prefetchinsertion"
+
+namespace {
+class InsertCodePrefetch : public MachineFunctionPass {
+public:
+ static char ID;
+
+ InsertCodePrefetch() : MachineFunctionPass(ID) {
+ initializeInsertCodePrefetchPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override { return "X86 Cide Prefetch Inserter Pass"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ /// Identify basic blocks that need separate sections and prepare to emit them
+ /// accordingly.
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Implementation
+//===----------------------------------------------------------------------===//
+
+char InsertCodePrefetch::ID = 0;
+INITIALIZE_PASS_BEGIN(
+ InsertCodePrefetch, DEBUG_TYPE,
+ "Reads prefetch", true,
+ false)
+INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass)
+INITIALIZE_PASS_END(
+ InsertCodePrefetch, DEBUG_TYPE,
+ "Reads prefetch", true,
+ false)
+
+bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
+ assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List &&
+ "BB Sections list not enabled!");
+ if (hasInstrProfHashMismatch(MF))
+ return false;
+ SmallVector<BBPosition> PrefetchTargets =
+ getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
+ .getPrefetchTargetsForFunction(MF.getName());
+ DenseMap<UniqueBBID, SmallVector<unsigned>> PrefetchTargetsByBBID;
+ for (const auto &Target: PrefetchTargets)
+ PrefetchTargetsByBBID[Target.BBID].push_back(Target.CallsiteIndex);
+ for (auto &MBB: MF) {
+ auto R = PrefetchTargetsByBBID.find(*MBB.getBBID());
+ if (R == PrefetchTargetsByBBID.end()) continue;
+ MBB.setPrefetchTargetIndexes(R->second);
+ }
+
+ return false;
+}
+
+void InsertCodePrefetch::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<BasicBlockSectionsProfileReaderWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachineFunctionPass *llvm::createInsertCodePrefetchPass() {
+ return new InsertCodePrefetch();
+}
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 19b218a2879dd..ba0b025167307 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -90,19 +90,6 @@ MCSymbol *MachineBasicBlock::getSymbol() const {
return CachedMCSymbol;
}
-MCSymbol *MachineBasicBlock::getCallInstSymbol(unsigned CallInstNumber) const {
- if (CallInstSymbols.size() <= CallInstNumber) {
- const MachineFunction *MF = getParent();
- MCContext &Ctx = MF->getContext();
- CallInstSymbols.resize(CallInstNumber + 1);
- CallInstSymbols[CallInstNumber] = Ctx.createBlockSymbol(
- "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber()) + "_" +
- Twine(CallInstNumber),
- /*AlwaysEmit=*/true);
- }
- return CallInstSymbols[CallInstNumber];
-}
-
MCSymbol *MachineBasicBlock::getEHContSymbol() const {
if (!CachedEHContMCSymbol) {
const MachineFunction *MF = getParent();
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 10b723887b21f..0434a4f1cd94d 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -1293,6 +1293,7 @@ void TargetPassConfig::addMachinePasses() {
addPass(llvm::createBasicBlockSectionsProfileReaderWrapperPass(
TM->getBBSectionsFuncListBuf()));
addPass(llvm::createBasicBlockPathCloningPass());
+ addPass(llvm::createInsertCodePrefetchPass());
}
addPass(llvm::createBasicBlockSectionsPass());
}
>From eef799bb89dd857065442c5b90b498183a5da59a Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 00:10:36 +0000
Subject: [PATCH 06/23] clang-format.
---
.../CodeGen/BasicBlockSectionsProfileReader.h | 6 +++--
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 25 +++++++++++--------
.../BasicBlockSectionsProfileReader.cpp | 3 ++-
llvm/lib/CodeGen/InsertCodePrefetch.cpp | 23 ++++++++---------
4 files changed, 31 insertions(+), 26 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 1fd904d64ab9d..2b8ee578cd917 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -102,7 +102,8 @@ class BasicBlockSectionsProfileReader {
SmallVector<PrefetchHint>
getPrefetchHintsForFunction(StringRef FuncName) const;
- SmallVector<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
+ SmallVector<BBPosition>
+ getPrefetchTargetsForFunction(StringRef FuncName) const;
private:
StringRef getAliasName(StringRef FuncName) const {
@@ -215,7 +216,8 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
SmallVector<PrefetchHint>
getPrefetchHintsForFunction(StringRef FuncName) const;
- SmallVector<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
+ SmallVector<BBPosition>
+ getPrefetchTargetsForFunction(StringRef FuncName) const;
// Initializes the FunctionNameToDIFilename map for the current module and
// then reads the profile for the matching functions.
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 933fe6f7d177f..4368cd4d256c9 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -18,7 +18,6 @@
#include "WasmException.h"
#include "WinCFGuard.h"
#include "WinException.h"
-#include "llvm/Support/SMLoc.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/BitmaskEnum.h"
@@ -120,6 +119,7 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/SMLoc.h"
#include "llvm/Support/VCSRevision.h"
#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
@@ -1983,7 +1983,7 @@ void AsmPrinter::emitFunctionBody() {
FunctionCallGraphInfo FuncCGInfo;
const auto &CallSitesInfoMap = MF->getCallSitesInfo();
- for (auto &MBB : *MF) {
+ for (auto &MBB : *MF) {
// Print a label for the basic block.
emitBasicBlockStart(MBB);
DenseMap<StringRef, unsigned> MnemonicCounts;
@@ -1992,15 +1992,18 @@ void AsmPrinter::emitFunctionBody() {
auto PrefetchTargetIt = PrefetchTargets.begin();
unsigned NumCalls = 0;
auto EmitPrefetchTargetSymbolIfNeeded = [&]() {
- if (PrefetchTargetIt == PrefetchTargets.end() || NumCalls < *PrefetchTargetIt)
+ if (PrefetchTargetIt == PrefetchTargets.end() ||
+ NumCalls < *PrefetchTargetIt)
return;
MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
- Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) +
- Twine("_") +
- utostr(*PrefetchTargetIt));
- OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MF->getFunction().isWeakForLinker() ? MCSA_Weak : MCSA_Global);
- OutStreamer->emitLabel(PrefetchTargetSymbol);
- ++PrefetchTargetIt;
+ Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") +
+ utostr(MBB.getBBID()->BaseID) + Twine("_") +
+ utostr(*PrefetchTargetIt));
+ OutStreamer->emitSymbolAttribute(
+ PrefetchTargetSymbol,
+ MF->getFunction().isWeakForLinker() ? MCSA_Weak : MCSA_Global);
+ OutStreamer->emitLabel(PrefetchTargetSymbol);
+ ++PrefetchTargetIt;
};
for (auto &MI : MBB) {
@@ -2118,7 +2121,7 @@ void AsmPrinter::emitFunctionBody() {
break;
}
default:
- emitInstruction(&MI);
+ emitInstruction(&MI);
auto CountInstruction = [&](const MachineInstr &MI) {
// Skip Meta instructions inside bundles.
@@ -2144,7 +2147,7 @@ void AsmPrinter::emitFunctionBody() {
if (MI.isCall()) {
if (MF->getTarget().Options.BBAddrMap)
- OutStreamer->emitLabel(createCallsiteEndSymbol(MBB));
+ OutStreamer->emitLabel(createCallsiteEndSymbol(MBB));
++NumCalls;
}
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index c4784a6039c09..9b54dd6803cf6 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -337,7 +337,8 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
if (getAsUnsignedInteger(PrefetchTargetStr[1], 10, TargetCallsiteIndex))
return createProfileParseError(Twine("unsigned integer expected: '") +
PrefetchTargetStr[1]);
- FI->second.PrefetchTargets.push_back(BBPosition{*TargetBBID, static_cast<unsigned>(TargetCallsiteIndex)});
+ FI->second.PrefetchTargets.push_back(
+ BBPosition{*TargetBBID, static_cast<unsigned>(TargetCallsiteIndex)});
continue;
}
default:
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 7cb52302ac7db..91cb6e599215d 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -39,7 +39,9 @@ class InsertCodePrefetch : public MachineFunctionPass {
initializeInsertCodePrefetchPass(*PassRegistry::getPassRegistry());
}
- StringRef getPassName() const override { return "X86 Cide Prefetch Inserter Pass"; }
+ StringRef getPassName() const override {
+ return "X86 Cide Prefetch Inserter Pass";
+ }
void getAnalysisUsage(AnalysisUsage &AU) const override;
@@ -55,15 +57,11 @@ class InsertCodePrefetch : public MachineFunctionPass {
//===----------------------------------------------------------------------===//
char InsertCodePrefetch::ID = 0;
-INITIALIZE_PASS_BEGIN(
- InsertCodePrefetch, DEBUG_TYPE,
- "Reads prefetch", true,
- false)
+INITIALIZE_PASS_BEGIN(InsertCodePrefetch, DEBUG_TYPE, "Reads prefetch", true,
+ false)
INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass)
-INITIALIZE_PASS_END(
- InsertCodePrefetch, DEBUG_TYPE,
- "Reads prefetch", true,
- false)
+INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Reads prefetch", true,
+ false)
bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List &&
@@ -74,11 +72,12 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
.getPrefetchTargetsForFunction(MF.getName());
DenseMap<UniqueBBID, SmallVector<unsigned>> PrefetchTargetsByBBID;
- for (const auto &Target: PrefetchTargets)
+ for (const auto &Target : PrefetchTargets)
PrefetchTargetsByBBID[Target.BBID].push_back(Target.CallsiteIndex);
- for (auto &MBB: MF) {
+ for (auto &MBB : MF) {
auto R = PrefetchTargetsByBBID.find(*MBB.getBBID());
- if (R == PrefetchTargetsByBBID.end()) continue;
+ if (R == PrefetchTargetsByBBID.end())
+ continue;
MBB.setPrefetchTargetIndexes(R->second);
}
>From 1d847c8971bec64238503b0ce39e6545c45ee986 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 03:46:13 +0000
Subject: [PATCH 07/23] Fix the prefetch test.
---
llvm/test/CodeGen/X86/prefetch-symbols.ll | 23 ++++++++++++++++-------
1 file changed, 16 insertions(+), 7 deletions(-)
diff --git a/llvm/test/CodeGen/X86/prefetch-symbols.ll b/llvm/test/CodeGen/X86/prefetch-symbols.ll
index 979db7942ff2c..3eb91dfdabd27 100644
--- a/llvm/test/CodeGen/X86/prefetch-symbols.ll
+++ b/llvm/test/CodeGen/X86/prefetch-symbols.ll
@@ -1,12 +1,14 @@
-;; Check that specifying the function in the basic block sections profile
-;; without any other directives is a noop.
+;; Check prefetch directives in basic block section profiles.
;;
;; Specify the bb sections profile:
; RUN: echo 'v1' > %t
; RUN: echo 'f _Z3foob' >> %t
; RUN: echo 't 0 at 0' >> %t
+; RUN: echo 't 1 at 0' >> %t
+; RUN: echo 't 1 at 1' >> %t
+; RUN: echo 't 2 at 1' >> %t
;;
-; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t | FileCheck
+; RUN: llc < %s -mtriple=x86_64-pc-linux -asm-verbose=false -function-sections -basic-block-sections=%t | FileCheck %s
define i32 @_Z3foob(i1 zeroext %0) nounwind {
%2 = alloca i32, align 4
@@ -18,16 +20,27 @@ define i32 @_Z3foob(i1 zeroext %0) nounwind {
%7 = zext i1 %6 to i32
%8 = icmp sgt i32 %7, 0
br i1 %8, label %9, label %11
+; CHECK: _Z3foob:
+; CHECK-NEXT: .globl __llvm_prefetch_target__Z3foob_0_0
+; CHECK-NEXT: __llvm_prefetch_target__Z3foob_0_0:
9: ; preds = %1
%10 = call i32 @_Z3barv()
store i32 %10, ptr %2, align 4
br label %13
+; CHECK: .globl __llvm_prefetch_target__Z3foob_1_0
+; CHECK-NEXT: __llvm_prefetch_target__Z3foob_1_0:
+; CHECK-NEXT: callq _Z3barv at PLT
+; CHECK-NEXT: .globl __llvm_prefetch_target__Z3foob_1_1
+; CHECK-NEXT: __llvm_prefetch_target__Z3foob_1_1:
11: ; preds = %1
%12 = call i32 @_Z3bazv()
store i32 %12, ptr %2, align 4
br label %13
+; CHECK: callq _Z3bazv at PLT
+; CHECK-NEXT: .globl __llvm_prefetch_target__Z3foob_2_1
+; CHECK-NEXT: __llvm_prefetch_target__Z3foob_2_1:
13: ; preds = %11, %9
%14 = load i32, ptr %2, align 4
@@ -36,7 +49,3 @@ define i32 @_Z3foob(i1 zeroext %0) nounwind {
declare i32 @_Z3barv() #1
declare i32 @_Z3bazv() #1
-
-
-; CHECK: _Z3foob
-; CHECK: llvm_prefetch_target
>From 36c8dc037342862d5445ce6ea3990f48d631c573 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 03:48:53 +0000
Subject: [PATCH 08/23] Rename the test.
---
...{prefetch-symbols.ll => basic-block-sections-code-prefetch.ll} | 0
1 file changed, 0 insertions(+), 0 deletions(-)
rename llvm/test/CodeGen/X86/{prefetch-symbols.ll => basic-block-sections-code-prefetch.ll} (100%)
diff --git a/llvm/test/CodeGen/X86/prefetch-symbols.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
similarity index 100%
rename from llvm/test/CodeGen/X86/prefetch-symbols.ll
rename to llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
>From f598b97ee05405e4c8f4c1d2f0fded5c05efd09e Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 06:59:43 +0000
Subject: [PATCH 09/23] Remove unrelated changes.
---
.../llvm/CodeGen/BasicBlockSectionsProfileReader.h | 12 ------------
llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 12 ------------
2 files changed, 24 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 2b8ee578cd917..801588509d340 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -47,12 +47,6 @@ struct BBPosition {
unsigned CallsiteIndex;
};
-struct PrefetchHint {
- BBPosition SitePosition;
- StringRef TargetFunctionName;
- BBPosition TargetPosition;
-};
-
// This represents the raw input profile for one function.
struct FunctionPathAndClusterInfo {
// BB Cluster information specified by `UniqueBBID`s.
@@ -61,7 +55,6 @@ struct FunctionPathAndClusterInfo {
// the edge a -> b (a is not cloned). The index of the path in this vector
// determines the `UniqueBBID::CloneID` of the cloned blocks in that path.
SmallVector<SmallVector<unsigned>> ClonePaths;
- SmallVector<PrefetchHint> PrefetchHints;
SmallVector<BBPosition> PrefetchTargets;
// Node counts for each basic block.
DenseMap<UniqueBBID, uint64_t> NodeCounts;
@@ -99,9 +92,6 @@ class BasicBlockSectionsProfileReader {
uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
const UniqueBBID &SinkBBID) const;
- SmallVector<PrefetchHint>
- getPrefetchHintsForFunction(StringRef FuncName) const;
-
SmallVector<BBPosition>
getPrefetchTargetsForFunction(StringRef FuncName) const;
@@ -213,8 +203,6 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
const UniqueBBID &DestBBID) const;
- SmallVector<PrefetchHint>
- getPrefetchHintsForFunction(StringRef FuncName) const;
SmallVector<BBPosition>
getPrefetchTargetsForFunction(StringRef FuncName) const;
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 9b54dd6803cf6..5b12c85f7eeef 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -93,12 +93,6 @@ uint64_t BasicBlockSectionsProfileReader::getEdgeCount(
return EdgeIt->second;
}
-SmallVector<PrefetchHint>
-BasicBlockSectionsProfileReader::getPrefetchHintsForFunction(
- StringRef FuncName) const {
- return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).PrefetchHints;
-}
-
SmallVector<BBPosition>
BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction(
StringRef FuncName) const {
@@ -547,12 +541,6 @@ uint64_t BasicBlockSectionsProfileReaderWrapperPass::getEdgeCount(
return BBSPR.getEdgeCount(FuncName, SrcBBID, SinkBBID);
}
-SmallVector<PrefetchHint>
-BasicBlockSectionsProfileReaderWrapperPass::getPrefetchHintsForFunction(
- StringRef FuncName) const {
- return BBSPR.getPrefetchHintsForFunction(FuncName);
-}
-
SmallVector<BBPosition>
BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction(
StringRef FuncName) const {
>From c7ca7c55e5a5881006318e759defd049e31d6be2 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 18:58:09 +0000
Subject: [PATCH 10/23] Add some comments.
---
.../CodeGen/BasicBlockSectionsProfileReader.h | 19 ++++++++++----
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 2 --
llvm/lib/CodeGen/InsertCodePrefetch.cpp | 26 +++++++++----------
3 files changed, 27 insertions(+), 20 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 801588509d340..784bf8dd8f2a9 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -42,9 +42,14 @@ struct BBClusterInfo {
unsigned PositionInCluster;
};
-struct BBPosition {
+// Assuming a block is split into subblocks across its callsites, this struct
+// uniquely identifies the subblock in block `BBID` which starts from right after call number `SubblockIndex` (or
+// the beginning of the block if `SubblockIndex` is zero) to the call number
+// `SubblockIndex+1` (or the end of the block if there are are `SubblockIndex`
+// calls in the basic block).
+struct SubblockID {
UniqueBBID BBID;
- unsigned CallsiteIndex;
+ unsigned SubblockIndex;
};
// This represents the raw input profile for one function.
@@ -55,7 +60,9 @@ struct FunctionPathAndClusterInfo {
// the edge a -> b (a is not cloned). The index of the path in this vector
// determines the `UniqueBBID::CloneID` of the cloned blocks in that path.
SmallVector<SmallVector<unsigned>> ClonePaths;
- SmallVector<BBPosition> PrefetchTargets;
+ // Code prefetch targets, specified by the subblock ID of which beginning must
+ // be targetted for prefetching.
+ SmallVector<SubblockID> PrefetchTargets;
// Node counts for each basic block.
DenseMap<UniqueBBID, uint64_t> NodeCounts;
// Edge counts for each edge.
@@ -92,7 +99,9 @@ class BasicBlockSectionsProfileReader {
uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
const UniqueBBID &SinkBBID) const;
- SmallVector<BBPosition>
+ // Returns the prefetch targets (identified by their containing subblocks) for
+ // function `FuncName`.
+ SmallVector<SubblockID>
getPrefetchTargetsForFunction(StringRef FuncName) const;
private:
@@ -204,7 +213,7 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
const UniqueBBID &DestBBID) const;
- SmallVector<BBPosition>
+ SmallVector<SubblockID>
getPrefetchTargetsForFunction(StringRef FuncName) const;
// Initializes the FunctionNameToDIFilename map for the current module and
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 4368cd4d256c9..2d6f6687fe456 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -119,7 +119,6 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
-#include "llvm/Support/SMLoc.h"
#include "llvm/Support/VCSRevision.h"
#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
@@ -1982,7 +1981,6 @@ void AsmPrinter::emitFunctionBody() {
FunctionCallGraphInfo FuncCGInfo;
const auto &CallSitesInfoMap = MF->getCallSitesInfo();
-
for (auto &MBB : *MF) {
// Print a label for the basic block.
emitBasicBlockStart(MBB);
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 91cb6e599215d..df3d63098390b 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -1,4 +1,4 @@
-//===-- InsertCodePrefetch.cpp ---=========-----------------------------===//
+//===-- InsertCodePrefetch.cpp ---=========--------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -7,9 +7,14 @@
//===----------------------------------------------------------------------===//
//
/// \file
-/// Prefetch insertion pass implementation.
+/// Code Prefetch Insertion Pass.
//===----------------------------------------------------------------------===//
-/// Prefetch insertion pass.
+/// This pass inserts code prefetch instructions according to the prefetch
+/// directives in the basic block section profile. The target of a prefetch can
+/// be the beginning of any dynamic basic block, that is the beginning of a
+/// machine basic block, or immediately after a callsite. A global symbol will
+/// be emitted at the position of the target so it can be addressed from the
+/// prefetch instruction.
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SmallVector.h"
@@ -20,15 +25,11 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/InitializePasses.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/Target/TargetMachine.h"
using namespace llvm;
-#define DEBUG_TYPE "prefetchinsertion"
+#define DEBUG_TYPE "insert-code-prefetch"
namespace {
class InsertCodePrefetch : public MachineFunctionPass {
@@ -40,13 +41,12 @@ class InsertCodePrefetch : public MachineFunctionPass {
}
StringRef getPassName() const override {
- return "X86 Cide Prefetch Inserter Pass";
+ return "Code Prefetch Inserter Pass";
}
void getAnalysisUsage(AnalysisUsage &AU) const override;
- /// Identify basic blocks that need separate sections and prepare to emit them
- /// accordingly.
+ // Sets prefetch targets based on the bb section profile.
bool runOnMachineFunction(MachineFunction &MF) override;
};
@@ -57,10 +57,10 @@ class InsertCodePrefetch : public MachineFunctionPass {
//===----------------------------------------------------------------------===//
char InsertCodePrefetch::ID = 0;
-INITIALIZE_PASS_BEGIN(InsertCodePrefetch, DEBUG_TYPE, "Reads prefetch", true,
+INITIALIZE_PASS_BEGIN(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion", true,
false)
INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass)
-INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Reads prefetch", true,
+INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion", true,
false)
bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
>From 5f76c60b18112ae00b7755b32ab64965e1844f13 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 18:58:22 +0000
Subject: [PATCH 11/23] clang-format.
---
.../llvm/CodeGen/BasicBlockSectionsProfileReader.h | 8 ++++----
llvm/lib/CodeGen/InsertCodePrefetch.cpp | 8 ++++----
2 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 784bf8dd8f2a9..88f3e8b620bce 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -43,10 +43,10 @@ struct BBClusterInfo {
};
// Assuming a block is split into subblocks across its callsites, this struct
-// uniquely identifies the subblock in block `BBID` which starts from right after call number `SubblockIndex` (or
-// the beginning of the block if `SubblockIndex` is zero) to the call number
-// `SubblockIndex+1` (or the end of the block if there are are `SubblockIndex`
-// calls in the basic block).
+// uniquely identifies the subblock in block `BBID` which starts from right
+// after call number `SubblockIndex` (or the beginning of the block if
+// `SubblockIndex` is zero) to the call number `SubblockIndex+1` (or the end of
+// the block if there are are `SubblockIndex` calls in the basic block).
struct SubblockID {
UniqueBBID BBID;
unsigned SubblockIndex;
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index df3d63098390b..29afb46a317a8 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -57,11 +57,11 @@ class InsertCodePrefetch : public MachineFunctionPass {
//===----------------------------------------------------------------------===//
char InsertCodePrefetch::ID = 0;
-INITIALIZE_PASS_BEGIN(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion", true,
- false)
+INITIALIZE_PASS_BEGIN(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion",
+ true, false)
INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass)
-INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion", true,
- false)
+INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion",
+ true, false)
bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List &&
>From 2ae595282ff040bbdc7e8a17beee59734f0a5f89 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 19:16:54 +0000
Subject: [PATCH 12/23] Add comments and rename functions.
---
.../CodeGen/BasicBlockSectionsProfileReader.h | 2 +-
llvm/include/llvm/CodeGen/MachineBasicBlock.h | 15 ++++++++++-----
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 13 +++++++++----
llvm/lib/CodeGen/InsertCodePrefetch.cpp | 11 ++++++-----
4 files changed, 26 insertions(+), 15 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 88f3e8b620bce..c2bc7559b9fb4 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -46,7 +46,7 @@ struct BBClusterInfo {
// uniquely identifies the subblock in block `BBID` which starts from right
// after call number `SubblockIndex` (or the beginning of the block if
// `SubblockIndex` is zero) to the call number `SubblockIndex+1` (or the end of
-// the block if there are are `SubblockIndex` calls in the basic block).
+// the block if `SubblockIndex` is the last call in the block).
struct SubblockID {
UniqueBBID BBID;
unsigned SubblockIndex;
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 4be008bbf4bf1..20427954d22e4 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -229,7 +229,12 @@ class MachineBasicBlock
/// is only computed once and is cached.
mutable MCSymbol *CachedMCSymbol = nullptr;
- SmallVector<unsigned> PrefetchTargetIndexes;
+ /// Contains the subblock indices in this block that are targets of code prefetching.
+ /// The subblock indexed `i` specifies that region after the `i`th call (or the
+ /// beginning of the block if `i==0`) to before the`i+1`th callsite (or the
+ /// end of the block). The prefetch target is always the beginning of the
+ /// subblock.
+ SmallVector<unsigned> PrefetchTargetSubblockIndexes;
/// Cached MCSymbol for this block (used if IsEHContTarget).
mutable MCSymbol *CachedEHContMCSymbol = nullptr;
@@ -712,12 +717,12 @@ class MachineBasicBlock
std::optional<UniqueBBID> getBBID() const { return BBID; }
- const SmallVector<unsigned> &getPrefetchTargetIndexes() const {
- return PrefetchTargetIndexes;
+ const SmallVector<unsigned> &getPrefetchTargetSubblockIndexes() const {
+ return PrefetchTargetSubblockIndexes;
}
- void setPrefetchTargetIndexes(const SmallVector<unsigned> &V) {
- PrefetchTargetIndexes = V;
+ void setPrefetchTargetSubblockIndexes(const SmallVector<unsigned> &V) {
+ PrefetchTargetSubblockIndexes = V;
}
/// Returns the section ID of this basic block.
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 2d6f6687fe456..72cf557d51e03 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1986,17 +1986,20 @@ void AsmPrinter::emitFunctionBody() {
emitBasicBlockStart(MBB);
DenseMap<StringRef, unsigned> MnemonicCounts;
- SmallVector<unsigned> PrefetchTargets = MBB.getPrefetchTargetIndexes();
+ SmallVector<unsigned> PrefetchTargets = MBB.getPrefetchTargetSubblockIndexes();
auto PrefetchTargetIt = PrefetchTargets.begin();
unsigned NumCalls = 0;
+ // Helper to emit a symbol for the prefetch target and proceed to the next
+ // one.
auto EmitPrefetchTargetSymbolIfNeeded = [&]() {
- if (PrefetchTargetIt == PrefetchTargets.end() ||
- NumCalls < *PrefetchTargetIt)
- return;
+ if (PrefetchTargetIt == PrefetchTargets.end()) return;
+ if (NumCalls < *PrefetchTargetIt) return;
MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") +
utostr(MBB.getBBID()->BaseID) + Twine("_") +
utostr(*PrefetchTargetIt));
+ // If the function is weak-linkage it may be replaced by a strong version,
+ // in which case the prefetch targets should also be replaced.
OutStreamer->emitSymbolAttribute(
PrefetchTargetSymbol,
MF->getFunction().isWeakForLinker() ? MCSA_Weak : MCSA_Global);
@@ -2159,6 +2162,8 @@ void AsmPrinter::emitFunctionBody() {
for (auto &Handler : Handlers)
Handler->endInstruction();
}
+ // If the block ends with a call, we may need to emit a prefetch target
+ // at the end.
EmitPrefetchTargetSymbolIfNeeded();
// We must emit temporary symbol for the end of this basic block, if either
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 29afb46a317a8..e241ccbbee263 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -12,9 +12,9 @@
/// This pass inserts code prefetch instructions according to the prefetch
/// directives in the basic block section profile. The target of a prefetch can
/// be the beginning of any dynamic basic block, that is the beginning of a
-/// machine basic block, or immediately after a callsite. A global symbol will
-/// be emitted at the position of the target so it can be addressed from the
-/// prefetch instruction.
+/// machine basic block, or immediately after a callsite. A global symbol is
+/// emitted at the position of the target so it can be addressed from the
+/// prefetch instruction from any module.
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SmallVector.h"
@@ -68,6 +68,8 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
"BB Sections list not enabled!");
if (hasInstrProfHashMismatch(MF))
return false;
+ // Set each block's prefetch targets so AsmPrinter can emit a special symbol
+ // there.
SmallVector<BBPosition> PrefetchTargets =
getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
.getPrefetchTargetsForFunction(MF.getName());
@@ -78,9 +80,8 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
auto R = PrefetchTargetsByBBID.find(*MBB.getBBID());
if (R == PrefetchTargetsByBBID.end())
continue;
- MBB.setPrefetchTargetIndexes(R->second);
+ MBB.setPrefetchTargetSubblockIndexes(R->second);
}
-
return false;
}
>From 606c5666e67c6da2d23775884ebb5bb0b0d0c7e0 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 19:17:04 +0000
Subject: [PATCH 13/23] clang-format.
---
llvm/include/llvm/CodeGen/MachineBasicBlock.h | 10 +++++-----
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 9 ++++++---
2 files changed, 11 insertions(+), 8 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 20427954d22e4..a13fcb2bb841d 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -229,11 +229,11 @@ class MachineBasicBlock
/// is only computed once and is cached.
mutable MCSymbol *CachedMCSymbol = nullptr;
- /// Contains the subblock indices in this block that are targets of code prefetching.
- /// The subblock indexed `i` specifies that region after the `i`th call (or the
- /// beginning of the block if `i==0`) to before the`i+1`th callsite (or the
- /// end of the block). The prefetch target is always the beginning of the
- /// subblock.
+ /// Contains the subblock indices in this block that are targets of code
+ /// prefetching. The subblock indexed `i` specifies that region after the
+ /// `i`th call (or the beginning of the block if `i==0`) to before the`i+1`th
+ /// callsite (or the end of the block). The prefetch target is always the
+ /// beginning of the subblock.
SmallVector<unsigned> PrefetchTargetSubblockIndexes;
/// Cached MCSymbol for this block (used if IsEHContTarget).
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 72cf557d51e03..fb250c4b5308a 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1986,14 +1986,17 @@ void AsmPrinter::emitFunctionBody() {
emitBasicBlockStart(MBB);
DenseMap<StringRef, unsigned> MnemonicCounts;
- SmallVector<unsigned> PrefetchTargets = MBB.getPrefetchTargetSubblockIndexes();
+ SmallVector<unsigned> PrefetchTargets =
+ MBB.getPrefetchTargetSubblockIndexes();
auto PrefetchTargetIt = PrefetchTargets.begin();
unsigned NumCalls = 0;
// Helper to emit a symbol for the prefetch target and proceed to the next
// one.
auto EmitPrefetchTargetSymbolIfNeeded = [&]() {
- if (PrefetchTargetIt == PrefetchTargets.end()) return;
- if (NumCalls < *PrefetchTargetIt) return;
+ if (PrefetchTargetIt == PrefetchTargets.end())
+ return;
+ if (NumCalls < *PrefetchTargetIt)
+ return;
MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") +
utostr(MBB.getBBID()->BaseID) + Twine("_") +
>From 52b0309f15b1dc01de80f8763e859a79a39d0f7d Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 19:34:26 +0000
Subject: [PATCH 14/23] Add optimization remarks for when prefetch targets
cannot be mapped.
---
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 8 ++++++++
llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 6 +++---
llvm/lib/CodeGen/InsertCodePrefetch.cpp | 4 ++--
3 files changed, 13 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index fb250c4b5308a..97234f3859ca7 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2168,6 +2168,14 @@ void AsmPrinter::emitFunctionBody() {
// If the block ends with a call, we may need to emit a prefetch target
// at the end.
EmitPrefetchTargetSymbolIfNeeded();
+ if (PrefetchTargetIt != PrefetchTargets.end()) {
+ MachineOptimizationRemarkMissed R(
+ "insert-code-prefetch", "MissingPrefetchTarget",
+ MF->getFunction().getSubprogram(), &MBB);
+ R << "failed to map "
+ << ore::NV("NumMissedTargets", PrefetchTargets.end() - PrefetchTargetIt)
+ << " prefetch targets";
+ }
// We must emit temporary symbol for the end of this basic block, if either
// we have BBLabels enabled or if this basic blocks marks the end of a
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 5b12c85f7eeef..9319854f53289 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -93,7 +93,7 @@ uint64_t BasicBlockSectionsProfileReader::getEdgeCount(
return EdgeIt->second;
}
-SmallVector<BBPosition>
+SmallVector<SubblockID>
BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction(
StringRef FuncName) const {
return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName))
@@ -332,7 +332,7 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
return createProfileParseError(Twine("unsigned integer expected: '") +
PrefetchTargetStr[1]);
FI->second.PrefetchTargets.push_back(
- BBPosition{*TargetBBID, static_cast<unsigned>(TargetCallsiteIndex)});
+ SubblockID{*TargetBBID, static_cast<unsigned>(TargetCallsiteIndex)});
continue;
}
default:
@@ -541,7 +541,7 @@ uint64_t BasicBlockSectionsProfileReaderWrapperPass::getEdgeCount(
return BBSPR.getEdgeCount(FuncName, SrcBBID, SinkBBID);
}
-SmallVector<BBPosition>
+SmallVector<SubblockID>
BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction(
StringRef FuncName) const {
return BBSPR.getPrefetchTargetsForFunction(FuncName);
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index e241ccbbee263..57037fd818479 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -70,12 +70,12 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
return false;
// Set each block's prefetch targets so AsmPrinter can emit a special symbol
// there.
- SmallVector<BBPosition> PrefetchTargets =
+ SmallVector<SubblockID> PrefetchTargets =
getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
.getPrefetchTargetsForFunction(MF.getName());
DenseMap<UniqueBBID, SmallVector<unsigned>> PrefetchTargetsByBBID;
for (const auto &Target : PrefetchTargets)
- PrefetchTargetsByBBID[Target.BBID].push_back(Target.CallsiteIndex);
+ PrefetchTargetsByBBID[Target.BBID].push_back(Target.SubblockIndex);
for (auto &MBB : MF) {
auto R = PrefetchTargetsByBBID.find(*MBB.getBBID());
if (R == PrefetchTargetsByBBID.end())
>From 8744c4d3ea5c3fdd03cc2cb2ebd93c8b8fb10a75 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 22:01:34 +0000
Subject: [PATCH 15/23] Expand test to weak symbols.
---
.../X86/basic-block-sections-code-prefetch.ll | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
index 3eb91dfdabd27..35e25952aa2f8 100644
--- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
+++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
@@ -7,6 +7,9 @@
; RUN: echo 't 1 at 0' >> %t
; RUN: echo 't 1 at 1' >> %t
; RUN: echo 't 2 at 1' >> %t
+; RUN: echo 'f _Z3barv' >> %t
+; RUN: echo 't 0 at 0' >> %t
+; RUN: echo 't 21 at 1' >> %t
;;
; RUN: llc < %s -mtriple=x86_64-pc-linux -asm-verbose=false -function-sections -basic-block-sections=%t | FileCheck %s
@@ -47,5 +50,12 @@ define i32 @_Z3foob(i1 zeroext %0) nounwind {
ret i32 %14
}
-declare i32 @_Z3barv() #1
+define weak i32 @_Z3barv() nounwind {
+ %1 = call i32 @_Z3bazv()
+ ret i32 %1
+; CHECK: _Z3barv:
+; CHECK-NEXT: .weak __llvm_prefetch_target__Z3barv_0_0
+; CHECK-NEXT: __llvm_prefetch_target__Z3barv_0_0:
+}
+
declare i32 @_Z3bazv() #1
>From cea8aa4d4fb81ad18ad46ee71583471d18dc3d2d Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Sat, 8 Nov 2025 19:54:21 +0000
Subject: [PATCH 16/23] feat(AsmPrinter): Add support for emitting prefetch
target symbols
---
.../CodeGen/BasicBlockSectionsProfileReader.h | 35 +++++++++++++++++++
llvm/include/llvm/CodeGen/MachineBasicBlock.h | 14 ++++++++
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 9 ++++-
.../BasicBlockSectionsProfileReader.cpp | 14 ++++++++
llvm/lib/CodeGen/MachineBasicBlock.cpp | 13 +++++++
5 files changed, 84 insertions(+), 1 deletion(-)
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index c2bc7559b9fb4..44bcfb074b5b8 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -52,6 +52,12 @@ struct SubblockID {
unsigned SubblockIndex;
};
+struct PrefetchHint {
+ SubblockID SitePosition;
+ StringRef TargetFunctionName;
+ osition TargetPosition;
+};
+
// This represents the raw input profile for one function.
struct FunctionPathAndClusterInfo {
// BB Cluster information specified by `UniqueBBID`s.
@@ -63,6 +69,7 @@ struct FunctionPathAndClusterInfo {
// Code prefetch targets, specified by the subblock ID of which beginning must
// be targetted for prefetching.
SmallVector<SubblockID> PrefetchTargets;
+ SmallVector<PrefetchHint> PrefetchHints;
// Node counts for each basic block.
DenseMap<UniqueBBID, uint64_t> NodeCounts;
// Edge counts for each edge.
@@ -73,6 +80,27 @@ struct FunctionPathAndClusterInfo {
DenseMap<unsigned, uint64_t> BBHashes;
};
+// Provides DenseMapInfo SubblockID.
+template <> struct DenseMapInfo<SubblockID> {
+ static inline SubblockID getEmptyKey() {
+ return {DenseMapInfo<UniqueBBID>::getEmptyKey(),
+ DenseMapInfo<unsigned>::getEmptyKey()};
+ }
+ static inline SubblockID getTombstoneKey() {
+ return SubblockID{DenseMapInfo<UniqueBBID>::getTombstoneKey(),
+ DenseMapInfo<unsigned>::getTombstoneKey()};
+ }
+ static unsigned getHashValue(const SubblockID &Val) {
+ std::pair<unsigned, unsigned> PairVal = std::make_pair(
+ DenseMapInfo<UniqueBBID>::getHashValue(Val.BBID), Val.BBOffset);
+ return DenseMapInfo<std::pair<unsigned, unsigned>>::getHashValue(PairVal);
+ }
+ static bool isEqual(const SubblockID &LHS, const SubblockID &RHS) {
+ return DenseMapInfo<UniqueBBID>::isEqual(LHS.BBID, RHS.BBID) &&
+ DenseMapInfo<unsigned>::isEqual(LHS.BBOffset, RHS.BBOffset);
+ }
+};
+
class BasicBlockSectionsProfileReader {
public:
friend class BasicBlockSectionsProfileReaderWrapperPass;
@@ -104,6 +132,9 @@ class BasicBlockSectionsProfileReader {
SmallVector<SubblockID>
getPrefetchTargetsForFunction(StringRef FuncName) const;
+ SmallVector<PrefetchHint>
+ getPrefetchHintsForFunction(StringRef FuncName) const;
+
private:
StringRef getAliasName(StringRef FuncName) const {
auto R = FuncAliasMap.find(FuncName);
@@ -212,6 +243,10 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
const UniqueBBID &DestBBID) const;
+ SmallVector<PrefetchHint>
+ getPrefetchHintsForFunction(StringRef FuncName) const;
+
+ DenseSet<SubblockID> getPrefetchTargetsForFunction(StringRef FuncName) const;
SmallVector<SubblockID>
getPrefetchTargetsForFunction(StringRef FuncName) const;
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index a13fcb2bb841d..63bc6b0f25284 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -100,6 +100,12 @@ template <> struct DenseMapInfo<MBBSectionID> {
}
};
+struct PrefetchTarget {
+ StringRef TargetFunction;
+ UniqueBBID TargetBBID;
+ unsigned TargetBBOffset;
+};
+
template <> struct ilist_traits<MachineInstr> {
private:
friend class MachineBasicBlock; // Set by the owning MachineBasicBlock.
@@ -213,6 +219,8 @@ class MachineBasicBlock
/// basic block sections and basic block labels.
std::optional<UniqueBBID> BBID;
+ SmallVector<unsigned> PrefetchTargets;
+
/// With basic block sections, this stores the Section ID of the basic block.
MBBSectionID SectionID{0};
@@ -1290,6 +1298,12 @@ class MachineBasicBlock
/// Return the MCSymbol for this basic block.
LLVM_ABI MCSymbol *getSymbol() const;
+ MCSymbol *getCallInstSymbol(unsigned CallInstNumber) const;
+
+ const SmallVector<MCSymbol *, 4>& getCallInstSymbols() const {
+ return CallInstSymbols;
+ }
+
/// Return the Windows EH Continuation Symbol for this basic block.
LLVM_ABI MCSymbol *getEHContSymbol() const;
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 97234f3859ca7..9a3532f623715 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -18,6 +18,7 @@
#include "WasmException.h"
#include "WinCFGuard.h"
#include "WinException.h"
+#include "llvm/Support/SMLoc.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/BitmaskEnum.h"
@@ -178,6 +179,11 @@ static cl::opt<bool> EmitJumpTableSizesSection(
cl::desc("Emit a section containing jump table addresses and sizes"),
cl::Hidden, cl::init(false));
+static cl::opt<bool> InsertNoopsForPrefetch(
+ "insert-noops-for-prefetch",
+ cl::desc("Whether to insert noops instead of prefetches."), cl::init(false),
+ cl::Hidden);
+
// This isn't turned on by default, since several of the scheduling models are
// not completely accurate, and we don't want to be misleading.
static cl::opt<bool> PrintLatency(
@@ -1982,6 +1988,7 @@ void AsmPrinter::emitFunctionBody() {
FunctionCallGraphInfo FuncCGInfo;
const auto &CallSitesInfoMap = MF->getCallSitesInfo();
for (auto &MBB : *MF) {
+ int NextPrefetchTargetIndex = MBB.getPrefetchTargets().empty() ? -1 : 0;
// Print a label for the basic block.
emitBasicBlockStart(MBB);
DenseMap<StringRef, unsigned> MnemonicCounts;
@@ -2125,7 +2132,7 @@ void AsmPrinter::emitFunctionBody() {
break;
}
default:
- emitInstruction(&MI);
+ emitInstruction(&MI);
auto CountInstruction = [&](const MachineInstr &MI) {
// Skip Meta instructions inside bundles.
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 9319854f53289..7dc83a290eeb2 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -100,6 +100,14 @@ BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction(
.PrefetchTargets;
}
+SmallVector<PrefetchHint>
+BasicBlockSectionsProfileReader::getPrefetchHintsForFunction(
+ StringRef FuncName) const {
+ return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).PrefetchHints;
+}
+
+
+
// Reads the version 1 basic block sections profile. Profile for each function
// is encoded as follows:
// m <module_name>
@@ -547,6 +555,12 @@ BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction(
return BBSPR.getPrefetchTargetsForFunction(FuncName);
}
+SmallVector<PrefetchHint>
+BasicBlockSectionsProfileReaderWrapperPass::getPrefetchHintsForFunction(
+ StringRef FuncName) const {
+ return BBSPR.getPrefetchHintsForFunction(FuncName);
+}
+
BasicBlockSectionsProfileReader &
BasicBlockSectionsProfileReaderWrapperPass::getBBSPR() {
return BBSPR;
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index ba0b025167307..19b218a2879dd 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -90,6 +90,19 @@ MCSymbol *MachineBasicBlock::getSymbol() const {
return CachedMCSymbol;
}
+MCSymbol *MachineBasicBlock::getCallInstSymbol(unsigned CallInstNumber) const {
+ if (CallInstSymbols.size() <= CallInstNumber) {
+ const MachineFunction *MF = getParent();
+ MCContext &Ctx = MF->getContext();
+ CallInstSymbols.resize(CallInstNumber + 1);
+ CallInstSymbols[CallInstNumber] = Ctx.createBlockSymbol(
+ "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber()) + "_" +
+ Twine(CallInstNumber),
+ /*AlwaysEmit=*/true);
+ }
+ return CallInstSymbols[CallInstNumber];
+}
+
MCSymbol *MachineBasicBlock::getEHContSymbol() const {
if (!CachedEHContMCSymbol) {
const MachineFunction *MF = getParent();
>From 5f1fbf346ff321e38dedf353427930c9f7cfaea3 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Sat, 8 Nov 2025 19:54:21 +0000
Subject: [PATCH 17/23] feat(AsmPrinter): Add support for emitting prefetch
target symbols
---
.../BasicBlockSectionsProfileReader.cpp | 33 +++++++++++++++++++
1 file changed, 33 insertions(+)
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 7dc83a290eeb2..0e8971c8fb7fb 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -323,6 +323,39 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
}
continue;
}
+ case 'i': { // Prefetch hint specifier.
+ // Skip the profile when we the profile iterator (FI) refers to the
+ // past-the-end element.
+ if (FI == ProgramPathAndClusterInfo.end())
+ continue;
+ assert(Values.size() == 2);
+ SmallVector<StringRef, 2> PrefetchSiteStr;
+ Values[0].split(PrefetchSiteStr, '@');
+ assert(PrefetchSiteStr.size() == 2);
+ auto SiteBBID = parseUniqueBBID(PrefetchSiteStr[0]);
+ if (!SiteBBID)
+ return SiteBBID.takeError();
+ unsigned long long SiteBBOffset;
+ if (getAsUnsignedInteger(PrefetchSiteStr[1], 10, SiteBBOffset))
+ return createProfileParseError(Twine("unsigned integer expected: '") +
+ PrefetchSiteStr[1]);
+
+ SmallVector<StringRef, 3> PrefetchTargetStr;
+ Values[1].split(PrefetchTargetStr, '@');
+ assert(PrefetchTargetStr.size() == 3);
+ auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[1]);
+ if (!TargetBBID)
+ return TargetBBID.takeError();
+ unsigned long long TargetBBOffset;
+ if (getAsUnsignedInteger(PrefetchTargetStr[2], 10, TargetBBOffset))
+ return createProfileParseError(Twine("unsigned integer expected: '") +
+ PrefetchTargetStr[2]);
+ FI->second.PrefetchHints.push_back(
+ PrefetchHint{{*SiteBBID, static_cast<unsigned>(SiteBBOffset)},
+ PrefetchTargetStr[0],
+ {*TargetBBID, static_cast<unsigned>(TargetBBOffset)}});
+ continue;
+ }
case 't': { // Prefetch target specifier.
// Skip the profile when we the profile iterator (FI) refers to the
// past-the-end element.
>From be3fe4d387706d0dd1f6622c9bb81149a3913908 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Sun, 9 Nov 2025 05:45:18 +0000
Subject: [PATCH 18/23] feat(CodeGen): Add PrefetchInsertion pass
---
llvm/include/llvm/CodeGen/MachineBasicBlock.h | 2 +-
llvm/include/llvm/CodeGen/MachineInstr.h | 3 +-
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 2 +-
llvm/lib/CodeGen/BasicBlockSections.cpp | 3 +-
llvm/lib/CodeGen/CodeGenPrepare.cpp | 1 +
llvm/lib/Target/X86/CMakeLists.txt | 1 +
llvm/lib/Target/X86/PrefetchInsertion.cpp | 209 ++++++++++++++++++
llvm/lib/Target/X86/X86.h | 2 +
llvm/lib/Target/X86/X86TargetMachine.cpp | 3 +
9 files changed, 222 insertions(+), 4 deletions(-)
create mode 100644 llvm/lib/Target/X86/PrefetchInsertion.cpp
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 63bc6b0f25284..5bc62d61e5f0d 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -1300,7 +1300,7 @@ class MachineBasicBlock
MCSymbol *getCallInstSymbol(unsigned CallInstNumber) const;
- const SmallVector<MCSymbol *, 4>& getCallInstSymbols() const {
+ const SmallVector<MCSymbol *, 4> &getCallInstSymbols() const {
return CallInstSymbols;
}
diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h
index 4fcb7f36e0238..1aba9ca962b16 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -123,8 +123,9 @@ class MachineInstr
NoUSWrap = 1 << 20, // Instruction supports geps
// no unsigned signed wrap.
SameSign = 1 << 21, // Both operands have the same sign.
- InBounds = 1 << 22 // Pointer arithmetic remains inbounds.
+ InBounds = 1 << 22, // Pointer arithmetic remains inbounds.
// Implies NoUSWrap.
+ Prefetch = 1 << 23, // Instruction is a prefetch.
};
private:
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 9a3532f623715..7d533fd2bdef0 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -18,7 +18,6 @@
#include "WasmException.h"
#include "WinCFGuard.h"
#include "WinException.h"
-#include "llvm/Support/SMLoc.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/BitmaskEnum.h"
@@ -120,6 +119,7 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/SMLoc.h"
#include "llvm/Support/VCSRevision.h"
#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp
index 52e2909bec072..755abdbceaf4a 100644
--- a/llvm/lib/CodeGen/BasicBlockSections.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSections.cpp
@@ -106,7 +106,8 @@ class BasicBlockSections : public MachineFunctionPass {
public:
static char ID;
- BasicBlockSectionsProfileReaderWrapperPass *BBSectionsProfileReader = nullptr;
+ // BasicBlockSectionsProfileReaderWrapperPass *BBSectionsProfileReader =
+ // nullptr;
BasicBlockSections() : MachineFunctionPass(ID) {
initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry());
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index b6dd174f9be80..9606f0c920ef9 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/FloatingPointPredicateUtils.h"
diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt
index f9bd233cf8ecf..b503b54f338d3 100644
--- a/llvm/lib/Target/X86/CMakeLists.txt
+++ b/llvm/lib/Target/X86/CMakeLists.txt
@@ -58,6 +58,7 @@ set(sources
X86IndirectThunks.cpp
X86InterleavedAccess.cpp
X86InsertPrefetch.cpp
+ PrefetchInsertion.cpp
X86InstCombineIntrinsic.cpp
X86InstrFMA3Info.cpp
X86InstrFoldTables.cpp
diff --git a/llvm/lib/Target/X86/PrefetchInsertion.cpp b/llvm/lib/Target/X86/PrefetchInsertion.cpp
new file mode 100644
index 0000000000000..720a38cb9b011
--- /dev/null
+++ b/llvm/lib/Target/X86/PrefetchInsertion.cpp
@@ -0,0 +1,209 @@
+//===-- PrefetchInsertion.cpp ---=========-----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Prefetch insertion pass implementation.
+//===----------------------------------------------------------------------===//
+/// Prefetch insertion pass.
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86InstrInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/BasicBlockSectionUtils.h"
+#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Target/TargetMachine.h"
+#include <map>
+
+using namespace llvm;
+#define DEBUG_TYPE "prefetchinsertion"
+
+static cl::opt<bool> UseCodePrefetchInstruction(
+ "use-code-prefetch-instruction",
+ cl::desc("Whether to use the new prefetchit1 instruction."), cl::init(true),
+ cl::Hidden);
+static cl::opt<bool> PrefetchNextAddress(
+ "prefetch-next-address",
+ cl::desc(
+ "Whether to prefetch the next address instead of the target address."),
+ cl::init(false), cl::Hidden);
+
+namespace {} // end anonymous namespace
+
+namespace llvm {
+class PrefetchInsertion : public MachineFunctionPass {
+public:
+ static char ID;
+
+ BasicBlockSectionsProfileReaderWrapperPass *BBSectionsProfileReader = nullptr;
+
+ PrefetchInsertion() : MachineFunctionPass(ID) {
+ initializePrefetchInsertionPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override { return "Prefetch Insertion Pass"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ /// Identify basic blocks that need separate sections and prepare to emit them
+ /// accordingly.
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // namespace llvm
+
+char PrefetchInsertion::ID = 0;
+INITIALIZE_PASS_BEGIN(
+ PrefetchInsertion, "prefetch-insertion",
+ "Applies path clonings for the -basic-block-sections=list option", false,
+ false)
+INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass)
+INITIALIZE_PASS_END(
+ PrefetchInsertion, "prefetch-insertion",
+ "Applies path clonings for the -basic-block-sections=list option", false,
+ false)
+
+bool PrefetchInsertion::runOnMachineFunction(MachineFunction &MF) {
+ assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List &&
+ "BB Sections list not enabled!");
+ if (hasInstrProfHashMismatch(MF))
+ return false;
+ // errs() << "Running on " << MF.getName() << "\n";
+ Function &F = MF.getFunction();
+ auto PtrTy = PointerType::getUnqual(F.getParent()->getContext());
+ DenseSet<BBPosition> PrefetchTargets =
+ getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
+ .getPrefetchTargetsForFunction(MF.getName());
+ // errs() << "Targets: Function: " << F.getName() << " "
+ // << PrefetchTargets.size() << "\n";
+ DenseMap<UniqueBBID, SmallVector<unsigned>> PrefetchTargetsByBBID;
+ for (const auto &P : PrefetchTargets)
+ PrefetchTargetsByBBID[P.BBID].push_back(P.BBOffset);
+ for (auto &[BBID, V] : PrefetchTargetsByBBID)
+ llvm::sort(V);
+ for (auto &BB : MF)
+ BB.setPrefetchTargets(PrefetchTargetsByBBID[*BB.getBBID()]);
+
+ for (const BBPosition &P : PrefetchTargets) {
+ SmallString<128> PrefetchTargetName("__llvm_prefetch_target_");
+ PrefetchTargetName += F.getName();
+ PrefetchTargetName += "_";
+ PrefetchTargetName += utostr(P.BBID.BaseID);
+ PrefetchTargetName += "_";
+ PrefetchTargetName += utostr(P.BBOffset);
+ F.getParent()->getOrInsertGlobal(PrefetchTargetName, PtrTy);
+ }
+
+ SmallVector<PrefetchHint> PrefetchHints =
+ getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
+ .getPrefetchHintsForFunction(MF.getName());
+ // errs() << "Hints: Function: " << F.getName() << " " << PrefetchHints.size()
+ // << "\n";
+ for (const PrefetchHint &H : PrefetchHints) {
+ SmallString<128> PrefetchTargetName("__llvm_prefetch_target_");
+ PrefetchTargetName += H.TargetFunctionName;
+ PrefetchTargetName += "_";
+ PrefetchTargetName += utostr(H.TargetPosition.BBID.BaseID);
+ PrefetchTargetName += "_";
+ PrefetchTargetName += utostr(H.TargetPosition.BBOffset);
+ F.getParent()->getOrInsertGlobal(PrefetchTargetName, PtrTy);
+ }
+
+ DenseMap<UniqueBBID, std::map<unsigned, SmallVector<PrefetchTarget>>>
+ PrefetchHintsByBBID;
+ for (const auto &H : PrefetchHints) {
+ PrefetchHintsByBBID[H.SitePosition.BBID][H.SitePosition.BBOffset].push_back(
+ PrefetchTarget{H.TargetFunctionName, H.TargetPosition.BBID,
+ H.TargetPosition.BBOffset});
+ }
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ for (auto &BB : MF) {
+ auto It = PrefetchHintsByBBID.find(*BB.getBBID());
+ if (It == PrefetchHintsByBBID.end())
+ continue;
+ auto BBPrefetchHintIt = It->second.begin();
+ unsigned NumInsts = 0;
+ auto E = BB.getFirstTerminator();
+ unsigned NumCallsites = 0;
+ for (auto I = BB.instr_begin();;) {
+ auto Current = I;
+ if (NumCallsites >= BBPrefetchHintIt->first || Current == E) {
+ for (const auto &PrefetchTarget : BBPrefetchHintIt->second) {
+ SmallString<128> PrefetchTargetName("__llvm_prefetch_target_");
+ PrefetchTargetName += PrefetchTarget.TargetFunction;
+ PrefetchTargetName += "_";
+ PrefetchTargetName += utostr(PrefetchTarget.TargetBBID.BaseID);
+ PrefetchTargetName += "_";
+ PrefetchTargetName += utostr(PrefetchTarget.TargetBBOffset);
+ auto *GV =
+ MF.getFunction().getParent()->getNamedValue(PrefetchTargetName);
+ // errs() << "Inserting prefetch for " << GV->getName() << " at "
+ // << MF.getName() << " " << BB.getName() << " " << NumInsts
+ // << "\n";
+ MachineInstr *PFetch = MF.CreateMachineInstr(
+ UseCodePrefetchInstruction ? TII->get(X86::PREFETCHIT1)
+ : TII->get(X86::PREFETCHT1),
+ Current != BB.instr_end() ? Current->getDebugLoc() : DebugLoc(),
+ true);
+ PFetch->setFlag(MachineInstr::Prefetch);
+ MachineInstrBuilder MIB(MF, PFetch);
+ if (!PrefetchNextAddress) {
+ MIB.addMemOperand(MF.getMachineMemOperand(
+ MachinePointerInfo(GV), MachineMemOperand::MOLoad, /*s=*/8,
+ /*base_alignment=*/llvm::Align(1)));
+ }
+ MIB.addReg(X86::RIP).addImm(1).addReg(X86::NoRegister);
+ if (PrefetchNextAddress)
+ MIB.addImm(0);
+ else
+ MIB.addGlobalAddress(GV);
+ MIB.addReg(X86::NoRegister);
+ BB.insert(Current, PFetch);
+ }
+ ++BBPrefetchHintIt;
+ if (BBPrefetchHintIt == PrefetchHintsByBBID[*BB.getBBID()].end())
+ break;
+ }
+ if (Current != E) {
+ // Print the assembly for the instruction.
+ if (!Current->isPosition() && !Current->isImplicitDef() &&
+ !Current->isKill() && !Current->isDebugInstr()) {
+ ++NumInsts;
+ }
+ if (Current->isCall())
+ ++NumCallsites;
+ ++I;
+ }
+ }
+ }
+ return true;
+}
+
+void PrefetchInsertion::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<BasicBlockSectionsProfileReaderWrapperPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+FunctionPass *llvm::createPrefetchInsertionPass() {
+ return new PrefetchInsertion();
+}
diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h
index 2b83d575ace91..e9d6cfa5ee4d2 100644
--- a/llvm/lib/Target/X86/X86.h
+++ b/llvm/lib/Target/X86/X86.h
@@ -161,6 +161,8 @@ FunctionPass *createX86DiscriminateMemOpsPass();
/// This pass applies profiling information to insert cache prefetches.
FunctionPass *createX86InsertPrefetchPass();
+FunctionPass *createPrefetchInsertionPass();
+
/// This pass insert wait instruction after X87 instructions which could raise
/// fp exceptions when strict-fp enabled.
FunctionPass *createX86InsertX87waitPass();
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index d4ad98af9b30c..a379e17f7f3a4 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -626,6 +626,9 @@ void X86PassConfig::addPreEmitPass2() {
// after all real instructions have been added to the epilog.
if (TT.isOSWindows() && TT.isX86_64())
addPass(createX86WinEHUnwindV2Pass());
+
+ if (TM->getBBSectionsType() == llvm::BasicBlockSection::List)
+ addPass(createPrefetchInsertionPass());
}
bool X86PassConfig::addPostFastRegAllocRewrite() {
>From 82b6033548d068fdc3ba1e28ba8da01d92fec400 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Sat, 15 Nov 2025 20:09:15 +0000
Subject: [PATCH 19/23] Implement inserting prefetches into the specified
positions.
---
.../CodeGen/BasicBlockSectionsProfileReader.h | 33 +--
.../include/llvm/CodeGen/InsertCodePrefetch.h | 23 ++
llvm/include/llvm/CodeGen/MachineBasicBlock.h | 6 -
llvm/include/llvm/CodeGen/TargetInstrInfo.h | 9 +
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 13 +-
.../BasicBlockSectionsProfileReader.cpp | 12 +-
llvm/lib/CodeGen/InsertCodePrefetch.cpp | 50 ++++-
llvm/lib/CodeGen/MachineBasicBlock.cpp | 13 --
llvm/lib/Target/X86/CMakeLists.txt | 1 -
llvm/lib/Target/X86/PrefetchInsertion.cpp | 209 ------------------
llvm/lib/Target/X86/X86InstrInfo.cpp | 19 ++
llvm/lib/Target/X86/X86InstrInfo.h | 5 +
llvm/lib/Target/X86/X86TargetMachine.cpp | 3 -
13 files changed, 123 insertions(+), 273 deletions(-)
create mode 100644 llvm/include/llvm/CodeGen/InsertCodePrefetch.h
delete mode 100644 llvm/lib/Target/X86/PrefetchInsertion.cpp
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 44bcfb074b5b8..061f14030b206 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -52,10 +52,12 @@ struct SubblockID {
unsigned SubblockIndex;
};
+// This represents a prefetch hint to be injected at site `SiteID`, targetting
+// `TargetID` in function `TargetFunction`.
struct PrefetchHint {
- SubblockID SitePosition;
- StringRef TargetFunctionName;
- osition TargetPosition;
+ SubblockID SiteID;
+ StringRef TargetFunction;
+ SubblockID TargetID;
};
// This represents the raw input profile for one function.
@@ -80,27 +82,6 @@ struct FunctionPathAndClusterInfo {
DenseMap<unsigned, uint64_t> BBHashes;
};
-// Provides DenseMapInfo SubblockID.
-template <> struct DenseMapInfo<SubblockID> {
- static inline SubblockID getEmptyKey() {
- return {DenseMapInfo<UniqueBBID>::getEmptyKey(),
- DenseMapInfo<unsigned>::getEmptyKey()};
- }
- static inline SubblockID getTombstoneKey() {
- return SubblockID{DenseMapInfo<UniqueBBID>::getTombstoneKey(),
- DenseMapInfo<unsigned>::getTombstoneKey()};
- }
- static unsigned getHashValue(const SubblockID &Val) {
- std::pair<unsigned, unsigned> PairVal = std::make_pair(
- DenseMapInfo<UniqueBBID>::getHashValue(Val.BBID), Val.BBOffset);
- return DenseMapInfo<std::pair<unsigned, unsigned>>::getHashValue(PairVal);
- }
- static bool isEqual(const SubblockID &LHS, const SubblockID &RHS) {
- return DenseMapInfo<UniqueBBID>::isEqual(LHS.BBID, RHS.BBID) &&
- DenseMapInfo<unsigned>::isEqual(LHS.BBOffset, RHS.BBOffset);
- }
-};
-
class BasicBlockSectionsProfileReader {
public:
friend class BasicBlockSectionsProfileReaderWrapperPass;
@@ -132,6 +113,7 @@ class BasicBlockSectionsProfileReader {
SmallVector<SubblockID>
getPrefetchTargetsForFunction(StringRef FuncName) const;
+ // Returns the prefetch hints to be injected in function `FuncName`.
SmallVector<PrefetchHint>
getPrefetchHintsForFunction(StringRef FuncName) const;
@@ -243,11 +225,10 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
const UniqueBBID &DestBBID) const;
+
SmallVector<PrefetchHint>
getPrefetchHintsForFunction(StringRef FuncName) const;
- DenseSet<SubblockID> getPrefetchTargetsForFunction(StringRef FuncName) const;
-
SmallVector<SubblockID>
getPrefetchTargetsForFunction(StringRef FuncName) const;
diff --git a/llvm/include/llvm/CodeGen/InsertCodePrefetch.h b/llvm/include/llvm/CodeGen/InsertCodePrefetch.h
new file mode 100644
index 0000000000000..b212a025f1d9f
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/InsertCodePrefetch.h
@@ -0,0 +1,23 @@
+//===- BasicBlockSectionUtils.h - Utilities for basic block sections --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_INSERTCODEPREFETCH_H
+#define LLVM_CODEGEN_INSERTCODEPREFETCH_H
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/UniqueBBID.h"
+#include "llvm/Support/CommandLine.h"
+
+namespace llvm {
+
+SmallString<128> getPrefetchTargetSymbolName(StringRef FunctionName, const UniqueBBID &BBID, unsigned SubblockIndex);
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_INSERTCODEPREFETCH_H
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 5bc62d61e5f0d..24445a2fe8ece 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -1298,12 +1298,6 @@ class MachineBasicBlock
/// Return the MCSymbol for this basic block.
LLVM_ABI MCSymbol *getSymbol() const;
- MCSymbol *getCallInstSymbol(unsigned CallInstNumber) const;
-
- const SmallVector<MCSymbol *, 4> &getCallInstSymbols() const {
- return CallInstSymbols;
- }
-
/// Return the Windows EH Continuation Symbol for this basic block.
LLVM_ABI MCSymbol *getEHContSymbol() const;
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 43f28ed79f9dd..78a2c6e937af6 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -2370,6 +2370,15 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo {
llvm_unreachable("unknown number of operands necessary");
}
+ /// Inserts a code prefetch instruction before `InsertBefore` in block `MBB`
+ /// targetting `GV`.
+ virtual bool insertCodePrefetchInstr(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ const GlobalValue *GV) const {
+ return false;
+ }
+
+
private:
mutable std::unique_ptr<MIRFormatter> Formatter;
unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode;
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 7d533fd2bdef0..502d209e45fa7 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -82,6 +82,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMRemarkStreamer.h"
+#include "llvm/CodeGen/InsertCodePrefetch.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
@@ -1988,7 +1989,6 @@ void AsmPrinter::emitFunctionBody() {
FunctionCallGraphInfo FuncCGInfo;
const auto &CallSitesInfoMap = MF->getCallSitesInfo();
for (auto &MBB : *MF) {
- int NextPrefetchTargetIndex = MBB.getPrefetchTargets().empty() ? -1 : 0;
// Print a label for the basic block.
emitBasicBlockStart(MBB);
DenseMap<StringRef, unsigned> MnemonicCounts;
@@ -1996,18 +1996,15 @@ void AsmPrinter::emitFunctionBody() {
SmallVector<unsigned> PrefetchTargets =
MBB.getPrefetchTargetSubblockIndexes();
auto PrefetchTargetIt = PrefetchTargets.begin();
- unsigned NumCalls = 0;
+ unsigned NumCallsInBB = 0;
// Helper to emit a symbol for the prefetch target and proceed to the next
// one.
auto EmitPrefetchTargetSymbolIfNeeded = [&]() {
if (PrefetchTargetIt == PrefetchTargets.end())
return;
- if (NumCalls < *PrefetchTargetIt)
+ if (NumCallsInBB < *PrefetchTargetIt)
return;
- MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
- Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") +
- utostr(MBB.getBBID()->BaseID) + Twine("_") +
- utostr(*PrefetchTargetIt));
+ MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(getPrefetchTargetSymbolName(MF->getName(), *MBB.getBBID(), *PrefetchTargetIt));
// If the function is weak-linkage it may be replaced by a strong version,
// in which case the prefetch targets should also be replaced.
OutStreamer->emitSymbolAttribute(
@@ -2159,7 +2156,7 @@ void AsmPrinter::emitFunctionBody() {
if (MI.isCall()) {
if (MF->getTarget().Options.BBAddrMap)
OutStreamer->emitLabel(createCallsiteEndSymbol(MBB));
- ++NumCalls;
+ ++NumCallsInBB;
}
if (TM.Options.EmitCallGraphSection && MI.isCall())
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 0e8971c8fb7fb..f55aaab7586b5 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -335,8 +335,8 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
auto SiteBBID = parseUniqueBBID(PrefetchSiteStr[0]);
if (!SiteBBID)
return SiteBBID.takeError();
- unsigned long long SiteBBOffset;
- if (getAsUnsignedInteger(PrefetchSiteStr[1], 10, SiteBBOffset))
+ unsigned long long SiteSubblockIndex;
+ if (getAsUnsignedInteger(PrefetchSiteStr[1], 10, SiteSubblockIndex))
return createProfileParseError(Twine("unsigned integer expected: '") +
PrefetchSiteStr[1]);
@@ -346,14 +346,14 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[1]);
if (!TargetBBID)
return TargetBBID.takeError();
- unsigned long long TargetBBOffset;
- if (getAsUnsignedInteger(PrefetchTargetStr[2], 10, TargetBBOffset))
+ unsigned long long TargetSubblockIndex;
+ if (getAsUnsignedInteger(PrefetchTargetStr[2], 10, TargetSubblockIndex))
return createProfileParseError(Twine("unsigned integer expected: '") +
PrefetchTargetStr[2]);
FI->second.PrefetchHints.push_back(
- PrefetchHint{{*SiteBBID, static_cast<unsigned>(SiteBBOffset)},
+ PrefetchHint{SubblockID{*SiteBBID, static_cast<unsigned>(SiteSubblockIndex)},
PrefetchTargetStr[0],
- {*TargetBBID, static_cast<unsigned>(TargetBBOffset)}});
+ SubblockID{*TargetBBID, static_cast<unsigned>(TargetSubblockIndex)}});
continue;
}
case 't': { // Prefetch target specifier.
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 57037fd818479..60cd004875891 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -17,6 +17,8 @@
/// prefetch instruction from any module.
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/InsertCodePrefetch.h"
+
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
@@ -26,11 +28,24 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/InitializePasses.h"
using namespace llvm;
#define DEBUG_TYPE "insert-code-prefetch"
+namespace llvm {
+SmallString<128> getPrefetchTargetSymbolName(StringRef FunctionName, const UniqueBBID &BBID, unsigned SubblockIndex) {
+ SmallString<128> R("__llvm_prefetch_target_");
+ R += FunctionName;
+ R += "_";
+ R += utostr(BBID.BaseID);
+ R += "_";
+ R += utostr(SubblockIndex);
+ return R;
+}
+} // namespace llvm
+
namespace {
class InsertCodePrefetch : public MachineFunctionPass {
public:
@@ -82,7 +97,40 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
continue;
MBB.setPrefetchTargetSubblockIndexes(R->second);
}
- return false;
+ SmallVector<PrefetchHint> PrefetchHints =
+ getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
+ .getPrefetchHintsForFunction(MF.getName());
+ DenseMap<UniqueBBID, SmallVector<PrefetchHint>>
+ PrefetchHintsBySiteBBID;
+ for (const auto &H : PrefetchHints)
+ PrefetchHintsBySiteBBID[H.SiteID.BBID].push_back(H);
+ for (auto &[SiteBBID, H]: PrefetchHintsBySiteBBID) {
+ llvm::sort(H, [](const PrefetchHint &H1, const PrefetchHint &H2) {
+ return H1.SiteID.SubblockIndex < H2.SiteID.SubblockIndex;
+ });
+ }
+ auto PtrTy = PointerType::getUnqual(MF.getFunction().getParent()->getContext());
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ for (auto &BB : MF) {
+ auto It = PrefetchHintsBySiteBBID.find(*BB.getBBID());
+ if (It == PrefetchHintsBySiteBBID.end())
+ continue;
+ const auto &PrefetchHints = It->second;
+ unsigned NumCallsInBB = 0;
+ auto InstrIt = BB.begin();
+ for(auto HintIt = PrefetchHints.begin() ; HintIt != PrefetchHints.end();) {
+ auto NextInstrIt = InstrIt == BB.end() ? BB.end() : std::next(InstrIt);
+ while (NumCallsInBB >= HintIt->SiteID.SubblockIndex) {
+ auto *GV = MF.getFunction().getParent()->getOrInsertGlobal(getPrefetchTargetSymbolName(HintIt->TargetFunction, HintIt->TargetID.BBID, HintIt->TargetID.SubblockIndex), PtrTy);
+ TII->insertCodePrefetchInstr(BB, NextInstrIt, GV);
+ ++HintIt;
+ }
+ if (InstrIt == BB.end()) break;
+ if (InstrIt->isCall()) ++NumCallsInBB;
+ InstrIt = NextInstrIt;
+ }
+ }
+ return true;
}
void InsertCodePrefetch::getAnalysisUsage(AnalysisUsage &AU) const {
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 19b218a2879dd..ba0b025167307 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -90,19 +90,6 @@ MCSymbol *MachineBasicBlock::getSymbol() const {
return CachedMCSymbol;
}
-MCSymbol *MachineBasicBlock::getCallInstSymbol(unsigned CallInstNumber) const {
- if (CallInstSymbols.size() <= CallInstNumber) {
- const MachineFunction *MF = getParent();
- MCContext &Ctx = MF->getContext();
- CallInstSymbols.resize(CallInstNumber + 1);
- CallInstSymbols[CallInstNumber] = Ctx.createBlockSymbol(
- "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber()) + "_" +
- Twine(CallInstNumber),
- /*AlwaysEmit=*/true);
- }
- return CallInstSymbols[CallInstNumber];
-}
-
MCSymbol *MachineBasicBlock::getEHContSymbol() const {
if (!CachedEHContMCSymbol) {
const MachineFunction *MF = getParent();
diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt
index b503b54f338d3..f9bd233cf8ecf 100644
--- a/llvm/lib/Target/X86/CMakeLists.txt
+++ b/llvm/lib/Target/X86/CMakeLists.txt
@@ -58,7 +58,6 @@ set(sources
X86IndirectThunks.cpp
X86InterleavedAccess.cpp
X86InsertPrefetch.cpp
- PrefetchInsertion.cpp
X86InstCombineIntrinsic.cpp
X86InstrFMA3Info.cpp
X86InstrFoldTables.cpp
diff --git a/llvm/lib/Target/X86/PrefetchInsertion.cpp b/llvm/lib/Target/X86/PrefetchInsertion.cpp
deleted file mode 100644
index 720a38cb9b011..0000000000000
--- a/llvm/lib/Target/X86/PrefetchInsertion.cpp
+++ /dev/null
@@ -1,209 +0,0 @@
-//===-- PrefetchInsertion.cpp ---=========-----------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// Prefetch insertion pass implementation.
-//===----------------------------------------------------------------------===//
-/// Prefetch insertion pass.
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/X86MCTargetDesc.h"
-#include "X86.h"
-#include "X86InstrBuilder.h"
-#include "X86InstrInfo.h"
-#include "X86MachineFunctionInfo.h"
-#include "X86Subtarget.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/CodeGen/BasicBlockSectionUtils.h"
-#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/Target/TargetMachine.h"
-#include <map>
-
-using namespace llvm;
-#define DEBUG_TYPE "prefetchinsertion"
-
-static cl::opt<bool> UseCodePrefetchInstruction(
- "use-code-prefetch-instruction",
- cl::desc("Whether to use the new prefetchit1 instruction."), cl::init(true),
- cl::Hidden);
-static cl::opt<bool> PrefetchNextAddress(
- "prefetch-next-address",
- cl::desc(
- "Whether to prefetch the next address instead of the target address."),
- cl::init(false), cl::Hidden);
-
-namespace {} // end anonymous namespace
-
-namespace llvm {
-class PrefetchInsertion : public MachineFunctionPass {
-public:
- static char ID;
-
- BasicBlockSectionsProfileReaderWrapperPass *BBSectionsProfileReader = nullptr;
-
- PrefetchInsertion() : MachineFunctionPass(ID) {
- initializePrefetchInsertionPass(*PassRegistry::getPassRegistry());
- }
-
- StringRef getPassName() const override { return "Prefetch Insertion Pass"; }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override;
-
- /// Identify basic blocks that need separate sections and prepare to emit them
- /// accordingly.
- bool runOnMachineFunction(MachineFunction &MF) override;
-};
-
-} // namespace llvm
-
-char PrefetchInsertion::ID = 0;
-INITIALIZE_PASS_BEGIN(
- PrefetchInsertion, "prefetch-insertion",
- "Applies path clonings for the -basic-block-sections=list option", false,
- false)
-INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass)
-INITIALIZE_PASS_END(
- PrefetchInsertion, "prefetch-insertion",
- "Applies path clonings for the -basic-block-sections=list option", false,
- false)
-
-bool PrefetchInsertion::runOnMachineFunction(MachineFunction &MF) {
- assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List &&
- "BB Sections list not enabled!");
- if (hasInstrProfHashMismatch(MF))
- return false;
- // errs() << "Running on " << MF.getName() << "\n";
- Function &F = MF.getFunction();
- auto PtrTy = PointerType::getUnqual(F.getParent()->getContext());
- DenseSet<BBPosition> PrefetchTargets =
- getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
- .getPrefetchTargetsForFunction(MF.getName());
- // errs() << "Targets: Function: " << F.getName() << " "
- // << PrefetchTargets.size() << "\n";
- DenseMap<UniqueBBID, SmallVector<unsigned>> PrefetchTargetsByBBID;
- for (const auto &P : PrefetchTargets)
- PrefetchTargetsByBBID[P.BBID].push_back(P.BBOffset);
- for (auto &[BBID, V] : PrefetchTargetsByBBID)
- llvm::sort(V);
- for (auto &BB : MF)
- BB.setPrefetchTargets(PrefetchTargetsByBBID[*BB.getBBID()]);
-
- for (const BBPosition &P : PrefetchTargets) {
- SmallString<128> PrefetchTargetName("__llvm_prefetch_target_");
- PrefetchTargetName += F.getName();
- PrefetchTargetName += "_";
- PrefetchTargetName += utostr(P.BBID.BaseID);
- PrefetchTargetName += "_";
- PrefetchTargetName += utostr(P.BBOffset);
- F.getParent()->getOrInsertGlobal(PrefetchTargetName, PtrTy);
- }
-
- SmallVector<PrefetchHint> PrefetchHints =
- getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
- .getPrefetchHintsForFunction(MF.getName());
- // errs() << "Hints: Function: " << F.getName() << " " << PrefetchHints.size()
- // << "\n";
- for (const PrefetchHint &H : PrefetchHints) {
- SmallString<128> PrefetchTargetName("__llvm_prefetch_target_");
- PrefetchTargetName += H.TargetFunctionName;
- PrefetchTargetName += "_";
- PrefetchTargetName += utostr(H.TargetPosition.BBID.BaseID);
- PrefetchTargetName += "_";
- PrefetchTargetName += utostr(H.TargetPosition.BBOffset);
- F.getParent()->getOrInsertGlobal(PrefetchTargetName, PtrTy);
- }
-
- DenseMap<UniqueBBID, std::map<unsigned, SmallVector<PrefetchTarget>>>
- PrefetchHintsByBBID;
- for (const auto &H : PrefetchHints) {
- PrefetchHintsByBBID[H.SitePosition.BBID][H.SitePosition.BBOffset].push_back(
- PrefetchTarget{H.TargetFunctionName, H.TargetPosition.BBID,
- H.TargetPosition.BBOffset});
- }
- const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
- for (auto &BB : MF) {
- auto It = PrefetchHintsByBBID.find(*BB.getBBID());
- if (It == PrefetchHintsByBBID.end())
- continue;
- auto BBPrefetchHintIt = It->second.begin();
- unsigned NumInsts = 0;
- auto E = BB.getFirstTerminator();
- unsigned NumCallsites = 0;
- for (auto I = BB.instr_begin();;) {
- auto Current = I;
- if (NumCallsites >= BBPrefetchHintIt->first || Current == E) {
- for (const auto &PrefetchTarget : BBPrefetchHintIt->second) {
- SmallString<128> PrefetchTargetName("__llvm_prefetch_target_");
- PrefetchTargetName += PrefetchTarget.TargetFunction;
- PrefetchTargetName += "_";
- PrefetchTargetName += utostr(PrefetchTarget.TargetBBID.BaseID);
- PrefetchTargetName += "_";
- PrefetchTargetName += utostr(PrefetchTarget.TargetBBOffset);
- auto *GV =
- MF.getFunction().getParent()->getNamedValue(PrefetchTargetName);
- // errs() << "Inserting prefetch for " << GV->getName() << " at "
- // << MF.getName() << " " << BB.getName() << " " << NumInsts
- // << "\n";
- MachineInstr *PFetch = MF.CreateMachineInstr(
- UseCodePrefetchInstruction ? TII->get(X86::PREFETCHIT1)
- : TII->get(X86::PREFETCHT1),
- Current != BB.instr_end() ? Current->getDebugLoc() : DebugLoc(),
- true);
- PFetch->setFlag(MachineInstr::Prefetch);
- MachineInstrBuilder MIB(MF, PFetch);
- if (!PrefetchNextAddress) {
- MIB.addMemOperand(MF.getMachineMemOperand(
- MachinePointerInfo(GV), MachineMemOperand::MOLoad, /*s=*/8,
- /*base_alignment=*/llvm::Align(1)));
- }
- MIB.addReg(X86::RIP).addImm(1).addReg(X86::NoRegister);
- if (PrefetchNextAddress)
- MIB.addImm(0);
- else
- MIB.addGlobalAddress(GV);
- MIB.addReg(X86::NoRegister);
- BB.insert(Current, PFetch);
- }
- ++BBPrefetchHintIt;
- if (BBPrefetchHintIt == PrefetchHintsByBBID[*BB.getBBID()].end())
- break;
- }
- if (Current != E) {
- // Print the assembly for the instruction.
- if (!Current->isPosition() && !Current->isImplicitDef() &&
- !Current->isKill() && !Current->isDebugInstr()) {
- ++NumInsts;
- }
- if (Current->isCall())
- ++NumCallsites;
- ++I;
- }
- }
- }
- return true;
-}
-
-void PrefetchInsertion::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- AU.addRequired<BasicBlockSectionsProfileReaderWrapperPass>();
- MachineFunctionPass::getAnalysisUsage(AU);
-}
-
-FunctionPass *llvm::createPrefetchInsertionPass() {
- return new PrefetchInsertion();
-}
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index cb0208a4a5f32..b49ef06478f9b 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -10978,5 +10978,24 @@ void X86InstrInfo::getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops,
M.getFullAddress(Ops);
}
+bool X86InstrInfo::insertCodePrefetchInstr(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ const GlobalValue *GV) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineInstr *PrefetchInstr = MF.CreateMachineInstr(get(X86::PREFETCHIT1),
+ InsertBefore == MBB.instr_end() ? MBB.findPrevDebugLoc(InsertBefore) : InsertBefore->getDebugLoc(),
+ true);
+ MachineInstrBuilder MIB(MF, PrefetchInstr);
+ MIB.addMemOperand(MF.getMachineMemOperand(
+ MachinePointerInfo(GV), MachineMemOperand::MOLoad, /*s=*/8,
+ /*base_alignment=*/llvm::Align(1)));
+ MIB.addReg(X86::RIP).addImm(1).addReg(X86::NoRegister);
+ MIB.addGlobalAddress(GV);
+ MIB.addReg(X86::NoRegister);
+ MBB.insert(InsertBefore, PrefetchInstr);
+ return true;
+}
+
+
#define GET_INSTRINFO_HELPERS
#include "X86GenInstrInfo.inc"
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index a547fcd421411..fb77ac96ceaad 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -767,6 +767,11 @@ class X86InstrInfo final : public X86GenInstrInfo {
/// \returns the index of operand that is commuted with \p Idx1. If the method
/// fails to commute the operands, it will return \p Idx1.
unsigned commuteOperandsForFold(MachineInstr &MI, unsigned Idx1) const;
+
+
+ bool insertCodePrefetchInstr(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ const GlobalValue *GV) const override;
};
} // namespace llvm
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index a379e17f7f3a4..d4ad98af9b30c 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -626,9 +626,6 @@ void X86PassConfig::addPreEmitPass2() {
// after all real instructions have been added to the epilog.
if (TT.isOSWindows() && TT.isX86_64())
addPass(createX86WinEHUnwindV2Pass());
-
- if (TM->getBBSectionsType() == llvm::BasicBlockSection::List)
- addPass(createPrefetchInsertionPass());
}
bool X86PassConfig::addPostFastRegAllocRewrite() {
>From 3cf143c54b2aed4dd76447df5c0a32f51b078949 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Sat, 15 Nov 2025 20:09:27 +0000
Subject: [PATCH 20/23] clang-format.
---
.../include/llvm/CodeGen/InsertCodePrefetch.h | 6 ++-
llvm/include/llvm/CodeGen/TargetInstrInfo.h | 1 -
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 6 ++-
.../BasicBlockSectionsProfileReader.cpp | 8 ++--
llvm/lib/CodeGen/InsertCodePrefetch.cpp | 40 +++++++++++--------
llvm/lib/Target/X86/X86InstrInfo.cpp | 33 +++++++--------
llvm/lib/Target/X86/X86InstrInfo.h | 5 +--
7 files changed, 55 insertions(+), 44 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/InsertCodePrefetch.h b/llvm/include/llvm/CodeGen/InsertCodePrefetch.h
index b212a025f1d9f..99241248862d3 100644
--- a/llvm/include/llvm/CodeGen/InsertCodePrefetch.h
+++ b/llvm/include/llvm/CodeGen/InsertCodePrefetch.h
@@ -11,12 +11,14 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/Support/UniqueBBID.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/UniqueBBID.h"
namespace llvm {
-SmallString<128> getPrefetchTargetSymbolName(StringRef FunctionName, const UniqueBBID &BBID, unsigned SubblockIndex);
+SmallString<128> getPrefetchTargetSymbolName(StringRef FunctionName,
+ const UniqueBBID &BBID,
+ unsigned SubblockIndex);
} // end namespace llvm
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 78a2c6e937af6..ca5a8308ae957 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -2378,7 +2378,6 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo {
return false;
}
-
private:
mutable std::unique_ptr<MIRFormatter> Formatter;
unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode;
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 502d209e45fa7..9be5d5885e257 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -39,6 +39,7 @@
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/CodeGen/InsertCodePrefetch.h"
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockHashInfo.h"
@@ -82,7 +83,6 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMRemarkStreamer.h"
-#include "llvm/CodeGen/InsertCodePrefetch.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
@@ -2004,7 +2004,9 @@ void AsmPrinter::emitFunctionBody() {
return;
if (NumCallsInBB < *PrefetchTargetIt)
return;
- MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(getPrefetchTargetSymbolName(MF->getName(), *MBB.getBBID(), *PrefetchTargetIt));
+ MCSymbol *PrefetchTargetSymbol =
+ OutContext.getOrCreateSymbol(getPrefetchTargetSymbolName(
+ MF->getName(), *MBB.getBBID(), *PrefetchTargetIt));
// If the function is weak-linkage it may be replaced by a strong version,
// in which case the prefetch targets should also be replaced.
OutStreamer->emitSymbolAttribute(
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index f55aaab7586b5..09781a05d917d 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -350,10 +350,10 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
if (getAsUnsignedInteger(PrefetchTargetStr[2], 10, TargetSubblockIndex))
return createProfileParseError(Twine("unsigned integer expected: '") +
PrefetchTargetStr[2]);
- FI->second.PrefetchHints.push_back(
- PrefetchHint{SubblockID{*SiteBBID, static_cast<unsigned>(SiteSubblockIndex)},
- PrefetchTargetStr[0],
- SubblockID{*TargetBBID, static_cast<unsigned>(TargetSubblockIndex)}});
+ FI->second.PrefetchHints.push_back(PrefetchHint{
+ SubblockID{*SiteBBID, static_cast<unsigned>(SiteSubblockIndex)},
+ PrefetchTargetStr[0],
+ SubblockID{*TargetBBID, static_cast<unsigned>(TargetSubblockIndex)}});
continue;
}
case 't': { // Prefetch target specifier.
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 60cd004875891..b7eba788a9796 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -35,14 +35,16 @@ using namespace llvm;
#define DEBUG_TYPE "insert-code-prefetch"
namespace llvm {
-SmallString<128> getPrefetchTargetSymbolName(StringRef FunctionName, const UniqueBBID &BBID, unsigned SubblockIndex) {
- SmallString<128> R("__llvm_prefetch_target_");
- R += FunctionName;
- R += "_";
- R += utostr(BBID.BaseID);
- R += "_";
- R += utostr(SubblockIndex);
- return R;
+SmallString<128> getPrefetchTargetSymbolName(StringRef FunctionName,
+ const UniqueBBID &BBID,
+ unsigned SubblockIndex) {
+ SmallString<128> R("__llvm_prefetch_target_");
+ R += FunctionName;
+ R += "_";
+ R += utostr(BBID.BaseID);
+ R += "_";
+ R += utostr(SubblockIndex);
+ return R;
}
} // namespace llvm
@@ -100,16 +102,16 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
SmallVector<PrefetchHint> PrefetchHints =
getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
.getPrefetchHintsForFunction(MF.getName());
- DenseMap<UniqueBBID, SmallVector<PrefetchHint>>
- PrefetchHintsBySiteBBID;
+ DenseMap<UniqueBBID, SmallVector<PrefetchHint>> PrefetchHintsBySiteBBID;
for (const auto &H : PrefetchHints)
PrefetchHintsBySiteBBID[H.SiteID.BBID].push_back(H);
- for (auto &[SiteBBID, H]: PrefetchHintsBySiteBBID) {
+ for (auto &[SiteBBID, H] : PrefetchHintsBySiteBBID) {
llvm::sort(H, [](const PrefetchHint &H1, const PrefetchHint &H2) {
return H1.SiteID.SubblockIndex < H2.SiteID.SubblockIndex;
});
}
- auto PtrTy = PointerType::getUnqual(MF.getFunction().getParent()->getContext());
+ auto PtrTy =
+ PointerType::getUnqual(MF.getFunction().getParent()->getContext());
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
for (auto &BB : MF) {
auto It = PrefetchHintsBySiteBBID.find(*BB.getBBID());
@@ -118,15 +120,21 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
const auto &PrefetchHints = It->second;
unsigned NumCallsInBB = 0;
auto InstrIt = BB.begin();
- for(auto HintIt = PrefetchHints.begin() ; HintIt != PrefetchHints.end();) {
+ for (auto HintIt = PrefetchHints.begin(); HintIt != PrefetchHints.end();) {
auto NextInstrIt = InstrIt == BB.end() ? BB.end() : std::next(InstrIt);
while (NumCallsInBB >= HintIt->SiteID.SubblockIndex) {
- auto *GV = MF.getFunction().getParent()->getOrInsertGlobal(getPrefetchTargetSymbolName(HintIt->TargetFunction, HintIt->TargetID.BBID, HintIt->TargetID.SubblockIndex), PtrTy);
+ auto *GV = MF.getFunction().getParent()->getOrInsertGlobal(
+ getPrefetchTargetSymbolName(HintIt->TargetFunction,
+ HintIt->TargetID.BBID,
+ HintIt->TargetID.SubblockIndex),
+ PtrTy);
TII->insertCodePrefetchInstr(BB, NextInstrIt, GV);
++HintIt;
}
- if (InstrIt == BB.end()) break;
- if (InstrIt->isCall()) ++NumCallsInBB;
+ if (InstrIt == BB.end())
+ break;
+ if (InstrIt->isCall())
+ ++NumCallsInBB;
InstrIt = NextInstrIt;
}
}
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index b49ef06478f9b..6556e16241557 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -10978,24 +10978,25 @@ void X86InstrInfo::getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops,
M.getFullAddress(Ops);
}
-bool X86InstrInfo::insertCodePrefetchInstr(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertBefore,
- const GlobalValue *GV) const {
+bool X86InstrInfo::insertCodePrefetchInstr(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
+ const GlobalValue *GV) const {
MachineFunction &MF = *MBB.getParent();
- MachineInstr *PrefetchInstr = MF.CreateMachineInstr(get(X86::PREFETCHIT1),
- InsertBefore == MBB.instr_end() ? MBB.findPrevDebugLoc(InsertBefore) : InsertBefore->getDebugLoc(),
- true);
- MachineInstrBuilder MIB(MF, PrefetchInstr);
- MIB.addMemOperand(MF.getMachineMemOperand(
- MachinePointerInfo(GV), MachineMemOperand::MOLoad, /*s=*/8,
- /*base_alignment=*/llvm::Align(1)));
- MIB.addReg(X86::RIP).addImm(1).addReg(X86::NoRegister);
- MIB.addGlobalAddress(GV);
- MIB.addReg(X86::NoRegister);
- MBB.insert(InsertBefore, PrefetchInstr);
- return true;
+ MachineInstr *PrefetchInstr = MF.CreateMachineInstr(
+ get(X86::PREFETCHIT1),
+ InsertBefore == MBB.instr_end() ? MBB.findPrevDebugLoc(InsertBefore)
+ : InsertBefore->getDebugLoc(),
+ true);
+ MachineInstrBuilder MIB(MF, PrefetchInstr);
+ MIB.addMemOperand(MF.getMachineMemOperand(MachinePointerInfo(GV),
+ MachineMemOperand::MOLoad, /*s=*/8,
+ /*base_alignment=*/llvm::Align(1)));
+ MIB.addReg(X86::RIP).addImm(1).addReg(X86::NoRegister);
+ MIB.addGlobalAddress(GV);
+ MIB.addReg(X86::NoRegister);
+ MBB.insert(InsertBefore, PrefetchInstr);
+ return true;
}
-
#define GET_INSTRINFO_HELPERS
#include "X86GenInstrInfo.inc"
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index fb77ac96ceaad..2fe67c56e1bcd 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -768,10 +768,9 @@ class X86InstrInfo final : public X86GenInstrInfo {
/// fails to commute the operands, it will return \p Idx1.
unsigned commuteOperandsForFold(MachineInstr &MI, unsigned Idx1) const;
-
bool insertCodePrefetchInstr(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertBefore,
- const GlobalValue *GV) const override;
+ MachineBasicBlock::iterator InsertBefore,
+ const GlobalValue *GV) const override;
};
} // namespace llvm
>From ffb10225c39e1aa425d33b13e643160bc2d843a9 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Mon, 17 Nov 2025 20:37:25 +0000
Subject: [PATCH 21/23] Expand the test case to prefetch hints.
---
.../CodeGen/BasicBlockSectionsProfileReader.cpp | 13 ++++++++++---
llvm/lib/CodeGen/InsertCodePrefetch.cpp | 8 ++++----
.../X86/basic-block-sections-code-prefetch.ll | 16 ++++++++++++++--
3 files changed, 28 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 09781a05d917d..8252d362271a0 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -328,9 +328,12 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
// past-the-end element.
if (FI == ProgramPathAndClusterInfo.end())
continue;
- assert(Values.size() == 2);
+ if (Values.size() != 2)
+ return createProfileParseError(Twine("Prefetch hint expected: "+ S));
SmallVector<StringRef, 2> PrefetchSiteStr;
Values[0].split(PrefetchSiteStr, '@');
+ if (PrefetchSiteStr.size() != 2)
+ return createProfileParseError(Twine("Prefetch site expected: ") + Values[0]);
assert(PrefetchSiteStr.size() == 2);
auto SiteBBID = parseUniqueBBID(PrefetchSiteStr[0]);
if (!SiteBBID)
@@ -342,7 +345,8 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
SmallVector<StringRef, 3> PrefetchTargetStr;
Values[1].split(PrefetchTargetStr, '@');
- assert(PrefetchTargetStr.size() == 3);
+ if (PrefetchTargetStr.size() != 3)
+ return createProfileParseError(Twine("Prefetch target target expected: ") + Values[1]);
auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[1]);
if (!TargetBBID)
return TargetBBID.takeError();
@@ -361,10 +365,13 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
// past-the-end element.
if (FI == ProgramPathAndClusterInfo.end())
continue;
- assert(Values.size() == 1);
+ if (Values.size() != 1)
+ return createProfileParseError(Twine("Prefetch target expected: ")+ S);
SmallVector<StringRef, 2> PrefetchTargetStr;
Values[0].split(PrefetchTargetStr, '@');
assert(PrefetchTargetStr.size() == 2);
+ if (PrefetchTargetStr.size() != 2)
+ return createProfileParseError(Twine("Prefetch target expected: ")+ Values[0]);
auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]);
if (!TargetBBID)
return TargetBBID.takeError();
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index b7eba788a9796..2197d60bf598e 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -105,8 +105,8 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
DenseMap<UniqueBBID, SmallVector<PrefetchHint>> PrefetchHintsBySiteBBID;
for (const auto &H : PrefetchHints)
PrefetchHintsBySiteBBID[H.SiteID.BBID].push_back(H);
- for (auto &[SiteBBID, H] : PrefetchHintsBySiteBBID) {
- llvm::sort(H, [](const PrefetchHint &H1, const PrefetchHint &H2) {
+ for (auto &[SiteBBID, Hints] : PrefetchHintsBySiteBBID) {
+ llvm::sort(Hints, [](const PrefetchHint &H1, const PrefetchHint &H2) {
return H1.SiteID.SubblockIndex < H2.SiteID.SubblockIndex;
});
}
@@ -122,13 +122,13 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
auto InstrIt = BB.begin();
for (auto HintIt = PrefetchHints.begin(); HintIt != PrefetchHints.end();) {
auto NextInstrIt = InstrIt == BB.end() ? BB.end() : std::next(InstrIt);
- while (NumCallsInBB >= HintIt->SiteID.SubblockIndex) {
+ while (HintIt != PrefetchHints.end() && NumCallsInBB >= HintIt->SiteID.SubblockIndex) {
auto *GV = MF.getFunction().getParent()->getOrInsertGlobal(
getPrefetchTargetSymbolName(HintIt->TargetFunction,
HintIt->TargetID.BBID,
HintIt->TargetID.SubblockIndex),
PtrTy);
- TII->insertCodePrefetchInstr(BB, NextInstrIt, GV);
+ TII->insertCodePrefetchInstr(BB, InstrIt, GV);
++HintIt;
}
if (InstrIt == BB.end())
diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
index 35e25952aa2f8..562da50f7c4ca 100644
--- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
+++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
@@ -7,11 +7,14 @@
; RUN: echo 't 1 at 0' >> %t
; RUN: echo 't 1 at 1' >> %t
; RUN: echo 't 2 at 1' >> %t
+; RUN: echo 'i 3 at 0 _Z3barv at 0@0' >> %t
+; RUN: echo 'i 2 at 1 _Z3foob at 1@0' >> %t
; RUN: echo 'f _Z3barv' >> %t
; RUN: echo 't 0 at 0' >> %t
+; RUN: echo 'i 0 at 1 _Z3foob at 0@0' >> %t
; RUN: echo 't 21 at 1' >> %t
;;
-; RUN: llc < %s -mtriple=x86_64-pc-linux -asm-verbose=false -function-sections -basic-block-sections=%t | FileCheck %s
+; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -asm-verbose=false -function-sections -basic-block-sections=%t | FileCheck %s
define i32 @_Z3foob(i1 zeroext %0) nounwind {
%2 = alloca i32, align 4
@@ -44,18 +47,27 @@ define i32 @_Z3foob(i1 zeroext %0) nounwind {
; CHECK: callq _Z3bazv at PLT
; CHECK-NEXT: .globl __llvm_prefetch_target__Z3foob_2_1
; CHECK-NEXT: __llvm_prefetch_target__Z3foob_2_1:
+; CHECK-NEXT: prefetchit1 __llvm_prefetch_target__Z3foob_1_0(%rip)
13: ; preds = %11, %9
%14 = load i32, ptr %2, align 4
ret i32 %14
+; CHECK: .LBB0_3:
+; CHECK-NEXT: prefetchit1 __llvm_prefetch_target__Z3barv_0_0(%rip)
+; CHECK: retq
+
}
define weak i32 @_Z3barv() nounwind {
%1 = call i32 @_Z3bazv()
- ret i32 %1
+ br label %2
; CHECK: _Z3barv:
; CHECK-NEXT: .weak __llvm_prefetch_target__Z3barv_0_0
; CHECK-NEXT: __llvm_prefetch_target__Z3barv_0_0:
+; CHECK: callq _Z3bazv at PLT
+; CHECK-NEXT: prefetchit1 __llvm_prefetch_target__Z3foob_0_0(%rip)
+2:
+ ret i32 %1
}
declare i32 @_Z3bazv() #1
>From 87f856a01d2ed36cf342ad90fe9ac62b16c52e84 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Mon, 17 Nov 2025 20:37:36 +0000
Subject: [PATCH 22/23] clang-format.
---
.../lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 13 ++++++++-----
llvm/lib/CodeGen/InsertCodePrefetch.cpp | 3 ++-
2 files changed, 10 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 8252d362271a0..9ac0ba34bba01 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -329,11 +329,12 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
if (FI == ProgramPathAndClusterInfo.end())
continue;
if (Values.size() != 2)
- return createProfileParseError(Twine("Prefetch hint expected: "+ S));
+ return createProfileParseError(Twine("Prefetch hint expected: " + S));
SmallVector<StringRef, 2> PrefetchSiteStr;
Values[0].split(PrefetchSiteStr, '@');
if (PrefetchSiteStr.size() != 2)
- return createProfileParseError(Twine("Prefetch site expected: ") + Values[0]);
+ return createProfileParseError(Twine("Prefetch site expected: ") +
+ Values[0]);
assert(PrefetchSiteStr.size() == 2);
auto SiteBBID = parseUniqueBBID(PrefetchSiteStr[0]);
if (!SiteBBID)
@@ -346,7 +347,8 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
SmallVector<StringRef, 3> PrefetchTargetStr;
Values[1].split(PrefetchTargetStr, '@');
if (PrefetchTargetStr.size() != 3)
- return createProfileParseError(Twine("Prefetch target target expected: ") + Values[1]);
+ return createProfileParseError(
+ Twine("Prefetch target target expected: ") + Values[1]);
auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[1]);
if (!TargetBBID)
return TargetBBID.takeError();
@@ -366,12 +368,13 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
if (FI == ProgramPathAndClusterInfo.end())
continue;
if (Values.size() != 1)
- return createProfileParseError(Twine("Prefetch target expected: ")+ S);
+ return createProfileParseError(Twine("Prefetch target expected: ") + S);
SmallVector<StringRef, 2> PrefetchTargetStr;
Values[0].split(PrefetchTargetStr, '@');
assert(PrefetchTargetStr.size() == 2);
if (PrefetchTargetStr.size() != 2)
- return createProfileParseError(Twine("Prefetch target expected: ")+ Values[0]);
+ return createProfileParseError(Twine("Prefetch target expected: ") +
+ Values[0]);
auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]);
if (!TargetBBID)
return TargetBBID.takeError();
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 2197d60bf598e..904097e25b7dc 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -122,7 +122,8 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
auto InstrIt = BB.begin();
for (auto HintIt = PrefetchHints.begin(); HintIt != PrefetchHints.end();) {
auto NextInstrIt = InstrIt == BB.end() ? BB.end() : std::next(InstrIt);
- while (HintIt != PrefetchHints.end() && NumCallsInBB >= HintIt->SiteID.SubblockIndex) {
+ while (HintIt != PrefetchHints.end() &&
+ NumCallsInBB >= HintIt->SiteID.SubblockIndex) {
auto *GV = MF.getFunction().getParent()->getOrInsertGlobal(
getPrefetchTargetSymbolName(HintIt->TargetFunction,
HintIt->TargetID.BBID,
>From 8d6b7efabe985e91ee4128cd310e4b4a69ef539b Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 19 Nov 2025 06:39:40 +0000
Subject: [PATCH 23/23] Remove some unwanted changes.
---
llvm/include/llvm/CodeGen/MachineBasicBlock.h | 6 ------
llvm/include/llvm/CodeGen/MachineInstr.h | 3 +--
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 8 +-------
3 files changed, 2 insertions(+), 15 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 24445a2fe8ece..837b63636528f 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -100,12 +100,6 @@ template <> struct DenseMapInfo<MBBSectionID> {
}
};
-struct PrefetchTarget {
- StringRef TargetFunction;
- UniqueBBID TargetBBID;
- unsigned TargetBBOffset;
-};
-
template <> struct ilist_traits<MachineInstr> {
private:
friend class MachineBasicBlock; // Set by the owning MachineBasicBlock.
diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h
index 1aba9ca962b16..4fcb7f36e0238 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -123,9 +123,8 @@ class MachineInstr
NoUSWrap = 1 << 20, // Instruction supports geps
// no unsigned signed wrap.
SameSign = 1 << 21, // Both operands have the same sign.
- InBounds = 1 << 22, // Pointer arithmetic remains inbounds.
+ InBounds = 1 << 22 // Pointer arithmetic remains inbounds.
// Implies NoUSWrap.
- Prefetch = 1 << 23, // Instruction is a prefetch.
};
private:
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 9be5d5885e257..8156dc29d6e12 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -120,7 +120,6 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
-#include "llvm/Support/SMLoc.h"
#include "llvm/Support/VCSRevision.h"
#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
@@ -180,11 +179,6 @@ static cl::opt<bool> EmitJumpTableSizesSection(
cl::desc("Emit a section containing jump table addresses and sizes"),
cl::Hidden, cl::init(false));
-static cl::opt<bool> InsertNoopsForPrefetch(
- "insert-noops-for-prefetch",
- cl::desc("Whether to insert noops instead of prefetches."), cl::init(false),
- cl::Hidden);
-
// This isn't turned on by default, since several of the scheduling models are
// not completely accurate, and we don't want to be misleading.
static cl::opt<bool> PrintLatency(
@@ -2131,7 +2125,7 @@ void AsmPrinter::emitFunctionBody() {
break;
}
default:
- emitInstruction(&MI);
+ emitInstruction(&MI);
auto CountInstruction = [&](const MachineInstr &MI) {
// Skip Meta instructions inside bundles.
More information about the llvm-commits
mailing list