[llvm] X86: Add prefetch insertion based on Propeller profile (PR #166324)
Rahman Lavaee via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 9 12:47:44 PDT 2026
https://github.com/rlavaee updated https://github.com/llvm/llvm-project/pull/166324
>From 468b6b835936f5b056364b1020d38af4252387e8 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Mon, 2 Mar 2026 22:02:10 +0000
Subject: [PATCH 01/13] [CodeGen] Centralize prefetch target storage in
MachineFunction
- Move prefetch target callsite indexes from MachineBasicBlock to a
centralized DenseMap in MachineFunction, renamed to PrefetchTargets.
- Change PrefetchTargets type to DenseMap<UniqueBBID, SmallVector<unsigned>>
to support both mapped and dangling targets.
- Refactor AsmPrinter to use member functions emitPrefetchTargetSymbol
and emitDanglingPrefetchTargets for consistent emission.
- Update MIR serialization to use the new PrefetchTargets map and
rename the YAML field to 'prefetch-targets'.
- Update the MIR format for bb_id to 'bb_id <base-id>, <clone-id>, <callsite-index>'
for improved readability and to avoid ambiguity with floating point.
- Update related tests and add MIR parsing error test cases.
---
llvm/include/llvm/CodeGen/MIRParser/MIParser.h | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MIRParser/MIParser.h b/llvm/include/llvm/CodeGen/MIRParser/MIParser.h
index 023919ee003c9..a79faa068b09c 100644
--- a/llvm/include/llvm/CodeGen/MIRParser/MIParser.h
+++ b/llvm/include/llvm/CodeGen/MIRParser/MIParser.h
@@ -239,8 +239,10 @@ bool parseVirtualRegisterReference(PerFunctionMIParsingState &PFS,
bool parseStackObjectReference(PerFunctionMIParsingState &PFS, int &FI,
StringRef Src, SMDiagnostic &Error);
-bool parsePrefetchTarget(PerFunctionMIParsingState &PFS, CallsiteID &Target,
- StringRef Src, SMDiagnostic &Error);
+bool parsePrefetchTarget(PerFunctionMIParsingState &PFS,
+ CallsiteID &Target, StringRef Src,
+ SMDiagnostic &Error);
+
bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node, StringRef Src,
SMDiagnostic &Error);
>From 857077936af08b81bf29ee56cf9fb0ad63b6b2d5 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Tue, 3 Mar 2026 01:22:12 +0000
Subject: [PATCH 02/13] clang-format.
---
llvm/include/llvm/CodeGen/MIRParser/MIParser.h | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MIRParser/MIParser.h b/llvm/include/llvm/CodeGen/MIRParser/MIParser.h
index a79faa068b09c..011e0fa5d4cdb 100644
--- a/llvm/include/llvm/CodeGen/MIRParser/MIParser.h
+++ b/llvm/include/llvm/CodeGen/MIRParser/MIParser.h
@@ -239,9 +239,8 @@ bool parseVirtualRegisterReference(PerFunctionMIParsingState &PFS,
bool parseStackObjectReference(PerFunctionMIParsingState &PFS, int &FI,
StringRef Src, SMDiagnostic &Error);
-bool parsePrefetchTarget(PerFunctionMIParsingState &PFS,
- CallsiteID &Target, StringRef Src,
- SMDiagnostic &Error);
+bool parsePrefetchTarget(PerFunctionMIParsingState &PFS, CallsiteID &Target,
+ StringRef Src, SMDiagnostic &Error);
bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node, StringRef Src,
SMDiagnostic &Error);
>From e3bec371a2da1fc397b796b7699225561ea3a85f Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Sat, 8 Nov 2025 19:54:21 +0000
Subject: [PATCH 03/13] feat(AsmPrinter): Add support for inserting code
prefetch instructions.
---
.../CodeGen/BasicBlockSectionsProfileReader.h | 30 +++++++
.../include/llvm/CodeGen/InsertCodePrefetch.h | 25 ++++++
llvm/include/llvm/CodeGen/MachineBasicBlock.h | 2 +
llvm/include/llvm/CodeGen/Passes.h | 2 +
llvm/include/llvm/CodeGen/TargetInstrInfo.h | 8 ++
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 1 +
llvm/lib/CodeGen/BasicBlockSections.cpp | 3 +-
.../BasicBlockSectionsProfileReader.cpp | 87 +++++++++++++++++--
llvm/lib/CodeGen/CodeGenPrepare.cpp | 1 +
llvm/lib/CodeGen/InsertCodePrefetch.cpp | 68 ++++++++++++++-
llvm/lib/Target/X86/X86InstrInfo.cpp | 20 +++++
llvm/lib/Target/X86/X86InstrInfo.h | 4 +
.../X86/basic-block-sections-code-prefetch.ll | 7 +-
13 files changed, 246 insertions(+), 12 deletions(-)
create mode 100644 llvm/include/llvm/CodeGen/InsertCodePrefetch.h
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 830f99cd661bc..f852fc7216e97 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -71,10 +71,30 @@ struct CFGProfile {
}
};
+<<<<<<< HEAD
+=======
+// The prefetch symbol is emitted immediately after the call of the given index,
+// in block `BBID` (First call has an index of 1). Zero callsite index means the
+// start of the block.
+struct CallsiteID {
+ UniqueBBID BBID;
+ unsigned CallsiteIndex;
+};
+
+// This represents a prefetch hint to be injected at site `SiteID`, targetting
+// `TargetID` in function `TargetFunction`.
+struct PrefetchHint {
+ CallsiteID SiteID;
+ StringRef TargetFunction;
+ CallsiteID TargetID;
+};
+
+>>>>>>> 5fb1ac189131 (feat(AsmPrinter): Add support for inserting code prefetch instructions.)
// This struct represents the raw optimization profile for a function,
// including CFG data (block and edge counts) and layout directives (clustering
// and cloning paths).
struct FunctionOptimizationProfile {
+// This represents the raw input profile for one function.
// BB Cluster information specified by `UniqueBBID`s.
SmallVector<BBClusterInfo> ClusterInfo;
// Paths to clone. A path a -> b -> c -> d implies cloning b, c, and d along
@@ -86,6 +106,9 @@ struct FunctionOptimizationProfile {
// Code prefetch targets, specified by the callsite ID. The target is the code
// immediately following this callsite.
SmallVector<CallsiteID> PrefetchTargets;
+ // Code prefetch hints, specified by the injection site ID, the target
+ // function and the target site ID.
+ SmallVector<PrefetchHint> PrefetchHints;
// Node counts for each basic block.
DenseMap<UniqueBBID, uint64_t> NodeCounts;
// Edge counts for each edge.
@@ -134,6 +157,10 @@ class BasicBlockSectionsProfileReader {
SmallVector<CallsiteID>
getPrefetchTargetsForFunction(StringRef FuncName) const;
+ // Returns the prefetch hints to be injected in function `FuncName`.
+ SmallVector<PrefetchHint>
+ getPrefetchHintsForFunction(StringRef FuncName) const;
+
private:
StringRef getAliasName(StringRef FuncName) const {
auto R = FuncAliasMap.find(FuncName);
@@ -240,6 +267,9 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
SmallVector<CallsiteID>
getPrefetchTargetsForFunction(StringRef FuncName) const;
+ SmallVector<PrefetchHint>
+ getPrefetchHintsForFunction(StringRef FuncName) const;
+
// Initializes the FunctionNameToDIFilename map for the current module and
// then reads the profile for the matching functions.
bool doInitialization(Module &M) override;
diff --git a/llvm/include/llvm/CodeGen/InsertCodePrefetch.h b/llvm/include/llvm/CodeGen/InsertCodePrefetch.h
new file mode 100644
index 0000000000000..99241248862d3
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/InsertCodePrefetch.h
@@ -0,0 +1,25 @@
+//===- BasicBlockSectionUtils.h - Utilities for basic block sections --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_INSERTCODEPREFETCH_H
+#define LLVM_CODEGEN_INSERTCODEPREFETCH_H
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/UniqueBBID.h"
+
+namespace llvm {
+
+SmallString<128> getPrefetchTargetSymbolName(StringRef FunctionName,
+ const UniqueBBID &BBID,
+ unsigned SubblockIndex);
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_INSERTCODEPREFETCH_H
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 029e3695b2cda..739c4b7dee07b 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -215,6 +215,8 @@ class MachineBasicBlock
/// basic block sections and basic block labels.
std::optional<UniqueBBID> BBID;
+ SmallVector<unsigned> PrefetchTargets;
+
/// With basic block sections, this stores the Section ID of the basic block.
MBBSectionID SectionID{0};
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index 6f7c3bd177cb0..a6fd7e7524638 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -73,6 +73,8 @@ LLVM_ABI MachineFunctionPass *createBasicBlockPathCloningPass();
/// and inference when using propeller.
LLVM_ABI MachineFunctionPass *createBasicBlockMatchingAndInferencePass();
+/// createInsertCodePrefetchPass - This pass enables inserting code prefetch
+/// hints based on the basic block section profile.
LLVM_ABI MachineFunctionPass *createInsertCodePrefetchPass();
/// createMachineBlockHashInfoPass - This pass computes basic block hashes.
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index ed968d7bd4593..d5b7bd5f928d2 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -2370,6 +2370,14 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo {
llvm_unreachable("unknown number of operands necessary");
}
+ /// Inserts a code prefetch instruction before `InsertBefore` in block `MBB`
+ /// targetting `GV`.
+ virtual bool insertCodePrefetchInstr(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ const GlobalValue *GV) const {
+ return false;
+ }
+
private:
mutable std::unique_ptr<MIRFormatter> Formatter;
unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode;
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index c77d2425d4ff4..204347d0ad395 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -39,6 +39,7 @@
#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/CodeGen/InsertCodePrefetch.h"
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockHashInfo.h"
diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp
index 583e755fc1e15..7a532b44c8c10 100644
--- a/llvm/lib/CodeGen/BasicBlockSections.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSections.cpp
@@ -108,7 +108,8 @@ class BasicBlockSections : public MachineFunctionPass {
public:
static char ID;
- BasicBlockSectionsProfileReaderWrapperPass *BBSectionsProfileReader = nullptr;
+ // BasicBlockSectionsProfileReaderWrapperPass *BBSectionsProfileReader =
+ // nullptr;
BasicBlockSections() : MachineFunctionPass(ID) {}
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 693a9bdf74e60..45942deacc05c 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -101,6 +101,14 @@ BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction(
: SmallVector<CallsiteID>();
}
+SmallVector<PrefetchHint>
+BasicBlockSectionsProfileReader::getPrefetchHintsForFunction(
+ StringRef FuncName) const {
+ auto R = ProgramOptimizationProfile.find(getAliasName(FuncName));
+ return R != ProgramOptimizationProfile.end() ? R->second.PrefetchHints
+ : SmallVector<PrefetchHint>();
+}
+
// Reads the version 1 basic block sections profile. Profile for each function
// is encoded as follows:
// m <module_name>
@@ -157,35 +165,53 @@ BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction(
// ....
// ****************************************************************************
// This profile can also specify prefetch targets (starting with 't') which
-// instruct the compiler to emit a prefetch symbol for the given target.
+// instruct the compiler to emit a prefetch symbol for the given target and
+// prefetch hints (starting with 'i') which instruct the compiler to insert a
+// prefetch hint instruction at the given site for the given target.
+//
// A prefetch target is specified by a pair "<bbid>,<subblock_index>" where
// bbid specifies the target basic block and subblock_index is a zero-based
-// index. Subblock 0 refers to the region at the beginning of the block up to
-// the first callsite. Subblock `i > 0` refers to the region immediately after
+// index. Callsite 0 refers to the region at the beginning of the block up to
+// the first callsite. Callsite `i > 0` refers to the region immediately after
// the `i`-th callsite up to the `i+1`-th callsite (or the end of the block).
// The prefetch target is always emitted at the beginning of the subblock.
// This is the beginning of the basic block for `i = 0` and immediately after
// the `i`-th call for every `i > 0`.
//
+// A prefetch int is specified by a pair "site target", where site is
+// specified as a pair "<bbid>,<callsite_index>" similar to prefetch
+// targets, and target is specified as a triple
+// "<function_name>,<bbid>,<callsite_index>".
+//
// Example: A basic block in function "foo" with BBID 10 and two call
// instructions (call_A, call_B). This block is conceptually split into
-// subblocks, with the prefetch target symbol emitted at the beginning of each
-// subblock.
+// subblocks, with the prefetch target symbol emitted at the beginning of
+// each subblock.
//
// +----------------------------------+
-// | __llvm_prefetch_target_foo_10_0: | <- Subblock 0 (before call_A)
+// | __llvm_prefetch_target_foo_10_0: | <- Callsite 0 (before call_A)
// | Instruction 1 |
// | Instruction 2 |
// | call_A (Callsite 0) |
-// | __llvm_prefetch_target_foo_10_1: | <--- Subblock 1 (after call_A,
+// | __llvm_prefetch_target_foo_10_1: | <--- Callsite 1 (after call_A,
// | | before call_B)
// | Instruction 3 |
// | call_B (Callsite 1) |
-// | __llvm_prefetch_target_foo_10_2: | <--- Subblock 2 (after call_B,
+// | __llvm_prefetch_target_foo_10_2: | <--- Callsite 2 (after call_B,
// | | before call_C)
// | Instruction 4 |
// +----------------------------------+
//
+// A prefetch hint specified in function "bar" as "120,1 foo,10,2" results
+// in a a hint inserted after the first call in block #120 of bar:
+// B
+// +----------------------------------------------------+
+// | Instruction 1 |
+// | call_C (Callsite 1) |
+// | code_prefetch __llvm_prfetch_target_foo_10 |
+// | Instruction 2 |
+// +----------------------------------------------------+
+//
Error BasicBlockSectionsProfileReader::ReadV1Profile() {
auto FI = ProgramOptimizationProfile.end();
@@ -367,6 +393,45 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
CallsiteID{*TargetBBID, static_cast<unsigned>(CallsiteIndex)});
continue;
}
+
+ case 'i': { // Prefetch hint specifier.
+ // Skip the profile when we the profile iterator (FI) refers to the
+ // past-the-end element.
+ if (FI == ProgramOptimizationProfile.end())
+ continue;
+ if (Values.size() != 2)
+ return createProfileParseError(Twine("Prefetch hint expected: " + S));
+ SmallVector<StringRef, 2> PrefetchSiteStr;
+ Values[0].split(PrefetchSiteStr, ',');
+ if (PrefetchSiteStr.size() != 2)
+ return createProfileParseError(Twine("Prefetch site expected: ") +
+ Values[0]);
+ auto SiteBBID = parseUniqueBBID(PrefetchSiteStr[0]);
+ if (!SiteBBID)
+ return SiteBBID.takeError();
+ unsigned long long SiteCallsiteIndex;
+ if (getAsUnsignedInteger(PrefetchSiteStr[1], 10, SiteCallsiteIndex))
+ return createProfileParseError(Twine("unsigned integer expected: '") +
+ PrefetchSiteStr[1]);
+
+ SmallVector<StringRef, 3> PrefetchTargetStr;
+ Values[1].split(PrefetchTargetStr, ',');
+ if (PrefetchTargetStr.size() != 3)
+ return createProfileParseError(
+ Twine("Prefetch target target expected: ") + Values[1]);
+ auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[1]);
+ if (!TargetBBID)
+ return TargetBBID.takeError();
+ unsigned long long TargetCallsiteIndex;
+ if (getAsUnsignedInteger(PrefetchTargetStr[2], 10, TargetCallsiteIndex))
+ return createProfileParseError(Twine("unsigned integer expected: '") +
+ PrefetchTargetStr[2]);
+ FI->second.PrefetchHints.push_back(PrefetchHint{
+ CallsiteID{*SiteBBID, static_cast<unsigned>(SiteCallsiteIndex)},
+ PrefetchTargetStr[0],
+ CallsiteID{*TargetBBID, static_cast<unsigned>(TargetCallsiteIndex)}});
+ continue;
+ }
default:
return createProfileParseError(Twine("invalid specifier: '") +
Twine(Specifier) + "'");
@@ -585,6 +650,12 @@ BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction(
return BBSPR.getPrefetchTargetsForFunction(FuncName);
}
+SmallVector<PrefetchHint>
+BasicBlockSectionsProfileReaderWrapperPass::getPrefetchHintsForFunction(
+ StringRef FuncName) const {
+ return BBSPR.getPrefetchHintsForFunction(FuncName);
+}
+
BasicBlockSectionsProfileReader &
BasicBlockSectionsProfileReaderWrapperPass::getBBSPR() {
return BBSPR;
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 537b91bc39e0e..50904cc30bc98 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/FloatingPointPredicateUtils.h"
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index cd220347257a8..87e2c776af19e 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -14,9 +14,13 @@
/// be the beginning of any dynamic basic block, that is the beginning of a
/// machine basic block, or immediately after a callsite. A global symbol is
/// emitted at the position of the target so it can be addressed from the
-/// prefetch instruction from any module.
+/// prefetch instruction from any module. In order to insert prefetch hints,
+/// `TargetInstrInfo::insertCodePrefetchInstr` must be implemented by the
+/// target.
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/InsertCodePrefetch.h"
+
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
@@ -27,11 +31,26 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/InitializePasses.h"
using namespace llvm;
#define DEBUG_TYPE "insert-code-prefetch"
+namespace llvm {
+SmallString<128> getPrefetchTargetSymbolName(StringRef FunctionName,
+ const UniqueBBID &BBID,
+ unsigned CallsiteIndex) {
+ SmallString<128> R("__llvm_prefetch_target_");
+ R += FunctionName;
+ R += "_";
+ R += utostr(BBID.BaseID);
+ R += "_";
+ R += utostr(CallsiteIndex);
+ return R;
+}
+} // namespace llvm
+
namespace {
class InsertCodePrefetch : public MachineFunctionPass {
public:
@@ -81,7 +100,52 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
V.erase(llvm::unique(V), V.end());
}
MF.setPrefetchTargets(PrefetchTargetsByBBID);
- return false;
+
+ SmallVector<PrefetchHint> PrefetchHints =
+ getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
+ .getPrefetchHintsForFunction(MF.getName());
+ DenseMap<UniqueBBID, SmallVector<PrefetchHint>> PrefetchHintsBySiteBBID;
+ for (const auto &H : PrefetchHints)
+ PrefetchHintsBySiteBBID[H.SiteID.BBID].push_back(H);
+ // Sort prefetch hints by their callsite index so we can insert them by one
+ // pass over the block's instructions.
+ for (auto &[SiteBBID, Hints] : PrefetchHintsBySiteBBID) {
+ llvm::sort(Hints, [](const PrefetchHint &H1, const PrefetchHint &H2) {
+ return H1.SiteID.CallsiteIndex < H2.SiteID.CallsiteIndex;
+ });
+ }
+ auto PtrTy =
+ PointerType::getUnqual(MF.getFunction().getParent()->getContext());
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ for (auto &BB : MF) {
+ auto It = PrefetchHintsBySiteBBID.find(*BB.getBBID());
+ if (It == PrefetchHintsBySiteBBID.end())
+ continue;
+ const auto &PrefetchHints = It->second;
+ unsigned NumCallsInBB = 0;
+ auto InstrIt = BB.begin();
+ for (auto HintIt = PrefetchHints.begin(); HintIt != PrefetchHints.end();) {
+ auto NextInstrIt = InstrIt == BB.end() ? BB.end() : std::next(InstrIt);
+ // Insert all the prefetch hints which must be placed after this call (or
+ // at the beginning of the block if `NumCallsInBB` is zero.
+ while (HintIt != PrefetchHints.end() &&
+ NumCallsInBB >= HintIt->SiteID.CallsiteIndex) {
+ auto *GV = MF.getFunction().getParent()->getOrInsertGlobal(
+ getPrefetchTargetSymbolName(HintIt->TargetFunction,
+ HintIt->TargetID.BBID,
+ HintIt->TargetID.CallsiteIndex),
+ PtrTy);
+ TII->insertCodePrefetchInstr(BB, InstrIt, GV);
+ ++HintIt;
+ }
+ if (InstrIt == BB.end())
+ break;
+ if (InstrIt->isCall())
+ ++NumCallsInBB;
+ InstrIt = NextInstrIt;
+ }
+ }
+ return true;
}
void InsertCodePrefetch::getAnalysisUsage(AnalysisUsage &AU) const {
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 2479a8dccfb00..edcc074f2db87 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -10785,5 +10785,25 @@ void X86InstrInfo::getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops,
M.getFullAddress(Ops);
}
+bool X86InstrInfo::insertCodePrefetchInstr(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
+ const GlobalValue *GV) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineInstr *PrefetchInstr = MF.CreateMachineInstr(
+ get(X86::PREFETCHIT1),
+ InsertBefore == MBB.instr_end() ? MBB.findPrevDebugLoc(InsertBefore)
+ : InsertBefore->getDebugLoc(),
+ true);
+ MachineInstrBuilder MIB(MF, PrefetchInstr);
+ MIB.addMemOperand(MF.getMachineMemOperand(MachinePointerInfo(GV),
+ MachineMemOperand::MOLoad, /*s=*/8,
+ /*base_alignment=*/llvm::Align(1)));
+ MIB.addReg(X86::RIP).addImm(1).addReg(X86::NoRegister);
+ MIB.addGlobalAddress(GV);
+ MIB.addReg(X86::NoRegister);
+ MBB.insert(InsertBefore, PrefetchInstr);
+ return true;
+}
+
#define GET_INSTRINFO_HELPERS
#include "X86GenInstrInfo.inc"
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index de8ccb44578a3..e0638d75ae295 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -766,6 +766,10 @@ class X86InstrInfo final : public X86GenInstrInfo {
/// \returns the index of operand that is commuted with \p Idx1. If the method
/// fails to commute the operands, it will return \p Idx1.
unsigned commuteOperandsForFold(MachineInstr &MI, unsigned Idx1) const;
+
+ bool insertCodePrefetchInstr(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ const GlobalValue *GV) const override;
};
} // namespace llvm
diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
index bef121aecf483..d8559c317c32e 100644
--- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
+++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
@@ -9,9 +9,12 @@
; RUN: echo 't 2,1' >> %t
; RUN: echo 't 3,0' >> %t
; RUN: echo 't 3,1' >> %t
+; RUN: echo 'i 3,0 bar,0,0' >> %t
+; RUN: echo 'i 2,1 foo,1,0' >> %t
; RUN: echo 'f bar' >> %t
; RUN: echo 't 0,0' >> %t
; RUN: echo 't 21,1' >> %t
+; RUN: echo 'i 0,1 foo,0,0' >> %t
; RUN: echo 'f qux' >> %t
; RUN: echo 't 0,0' >> %t
; RUN: echo 't 0,1' >> %t
@@ -39,6 +42,7 @@ cond.false: ; preds = %1
; CHECK: callq baz at PLT
; CHECK-NEXT: .globl __llvm_prefetch_target_foo_2_1
; CHECK-NEXT: __llvm_prefetch_target_foo_2_1:
+; CHECK-NEXT: prefetchit1 __llvm_prefetch_target_foo_1_0(%rip)
end: ; preds = %11, %9
ret void
@@ -47,6 +51,7 @@ end: ; preds = %11, %9
; CHECK-NEXT: __llvm_prefetch_target_foo_3_0:
; CHECK: .globl __llvm_prefetch_target_foo_3_1
; CHECK-NEXT: __llvm_prefetch_target_foo_3_1:
+; CHECK-NEXT: prefetchit1 __llvm_prefetch_target_bar_0_0(%rip)
}
define weak i32 @bar() nounwind {
@@ -58,6 +63,7 @@ define weak i32 @bar() nounwind {
; CHECK-NEXT: .weak __llvm_prefetch_target_bar_0_0
; CHECK-NEXT: __llvm_prefetch_target_bar_0_0:
; CHECK: callq baz at PLT
+; CHECK-NEXT: prefetchit1 __llvm_prefetch_target_foo_0_0(%rip)
}
define internal i32 @qux() nounwind {
@@ -72,4 +78,3 @@ define internal i32 @qux() nounwind {
}
declare i32 @baz()
-declare i32 @dummy()
>From 79f337c8cf2b6d294c1d6a424e01f3519b3b8574 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Tue, 6 Jan 2026 08:08:05 +0000
Subject: [PATCH 04/13] Update tests.
---
...asic-block-sections-code-prefetch-call-terminates-block.ll | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch-call-terminates-block.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch-call-terminates-block.ll
index bf98076770b5b..b3c8069d96f03 100644
--- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch-call-terminates-block.ll
+++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch-call-terminates-block.ll
@@ -1,9 +1,10 @@
-;; Check prefetch directives properly handle a block terminating with a call.
+;; Check prefetch directives properly handles prefetch targets and instructions after a call which terminates a block.
;;
;; Specify the bb sections profile:
; RUN: echo 'v1' > %t
; RUN: echo 'f foo' >> %t
; RUN: echo 't 0,1' >> %t
+; RUN: echo 'i 0,1 other,30,30' >> %t
;;
; RUN: llc < %s -mtriple=x86_64-pc-linux -asm-verbose=false -function-sections -basic-block-sections=%t -O1 | FileCheck %s
@@ -15,6 +16,7 @@ entry:
; CHECK: callq explode at PLT
; CHECK-NEXT: .globl __llvm_prefetch_target_foo_0_1
; CHECK-NEXT: __llvm_prefetch_target_foo_0_1:
+; CHECK-NEXT: prefetchit1 __llvm_prefetch_target_other_30_30(%rip)
continue:
ret i32 0
>From c312e726ea59745728b6d91d0bc6676019ba94de Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Tue, 6 Jan 2026 08:13:04 +0000
Subject: [PATCH 05/13] clang-format.
---
llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index f852fc7216e97..18c4e94bb1b5b 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -94,7 +94,7 @@ struct PrefetchHint {
// including CFG data (block and edge counts) and layout directives (clustering
// and cloning paths).
struct FunctionOptimizationProfile {
-// This represents the raw input profile for one function.
+ // This represents the raw input profile for one function.
// BB Cluster information specified by `UniqueBBID`s.
SmallVector<BBClusterInfo> ClusterInfo;
// Paths to clone. A path a -> b -> c -> d implies cloning b, c, and d along
>From e9cb9bf7d688a2ee0ca3b6a1c9097fa08a3a7ec0 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Tue, 6 Jan 2026 08:28:28 +0000
Subject: [PATCH 06/13] Remove unwanted changes.
---
llvm/lib/CodeGen/BasicBlockSections.cpp | 7 +++----
llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 10 +++++-----
2 files changed, 8 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp
index 7a532b44c8c10..1f82ba7e0821e 100644
--- a/llvm/lib/CodeGen/BasicBlockSections.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSections.cpp
@@ -108,10 +108,9 @@ class BasicBlockSections : public MachineFunctionPass {
public:
static char ID;
- // BasicBlockSectionsProfileReaderWrapperPass *BBSectionsProfileReader =
- // nullptr;
-
- BasicBlockSections() : MachineFunctionPass(ID) {}
+ BasicBlockSections() : MachineFunctionPass(ID) {
+ initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry());
+ }
StringRef getPassName() const override {
return "Basic Block Sections Analysis";
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 45942deacc05c..0da1adc08f0a0 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -171,8 +171,8 @@ BasicBlockSectionsProfileReader::getPrefetchHintsForFunction(
//
// A prefetch target is specified by a pair "<bbid>,<subblock_index>" where
// bbid specifies the target basic block and subblock_index is a zero-based
-// index. Callsite 0 refers to the region at the beginning of the block up to
-// the first callsite. Callsite `i > 0` refers to the region immediately after
+// index. Subblock 0 refers to the region at the beginning of the block up to
+// the first callsite. Subblock `i > 0` refers to the region immediately after
// the `i`-th callsite up to the `i+1`-th callsite (or the end of the block).
// The prefetch target is always emitted at the beginning of the subblock.
// This is the beginning of the basic block for `i = 0` and immediately after
@@ -189,15 +189,15 @@ BasicBlockSectionsProfileReader::getPrefetchHintsForFunction(
// each subblock.
//
// +----------------------------------+
-// | __llvm_prefetch_target_foo_10_0: | <- Callsite 0 (before call_A)
+// | __llvm_prefetch_target_foo_10_0: | <--- Subblock 0 (before call_A)
// | Instruction 1 |
// | Instruction 2 |
// | call_A (Callsite 0) |
-// | __llvm_prefetch_target_foo_10_1: | <--- Callsite 1 (after call_A,
+// | __llvm_prefetch_target_foo_10_1: | <--- Subblock 1 (after call_A,
// | | before call_B)
// | Instruction 3 |
// | call_B (Callsite 1) |
-// | __llvm_prefetch_target_foo_10_2: | <--- Callsite 2 (after call_B,
+// | __llvm_prefetch_target_foo_10_2: | <--- Subblock 2 (after call_B,
// | | before call_C)
// | Instruction 4 |
// +----------------------------------+
>From 8ac63b928bc29e2a87cbcbf4af5d99ece73c709f Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 4 Mar 2026 22:50:41 +0000
Subject: [PATCH 07/13] Additional changes.
---
.../CodeGen/BasicBlockSectionsProfileReader.h | 19 -------------------
llvm/include/llvm/Support/UniqueBBID.h | 8 ++++++++
.../X86/basic-block-sections-code-prefetch.ll | 2 +-
3 files changed, 9 insertions(+), 20 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 18c4e94bb1b5b..c4b2fd59793bd 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -71,25 +71,6 @@ struct CFGProfile {
}
};
-<<<<<<< HEAD
-=======
-// The prefetch symbol is emitted immediately after the call of the given index,
-// in block `BBID` (First call has an index of 1). Zero callsite index means the
-// start of the block.
-struct CallsiteID {
- UniqueBBID BBID;
- unsigned CallsiteIndex;
-};
-
-// This represents a prefetch hint to be injected at site `SiteID`, targetting
-// `TargetID` in function `TargetFunction`.
-struct PrefetchHint {
- CallsiteID SiteID;
- StringRef TargetFunction;
- CallsiteID TargetID;
-};
-
->>>>>>> 5fb1ac189131 (feat(AsmPrinter): Add support for inserting code prefetch instructions.)
// This struct represents the raw optimization profile for a function,
// including CFG data (block and edge counts) and layout directives (clustering
// and cloning paths).
diff --git a/llvm/include/llvm/Support/UniqueBBID.h b/llvm/include/llvm/Support/UniqueBBID.h
index 23a9467534318..bee4cf9158f45 100644
--- a/llvm/include/llvm/Support/UniqueBBID.h
+++ b/llvm/include/llvm/Support/UniqueBBID.h
@@ -33,6 +33,14 @@ struct CallsiteID {
unsigned CallsiteIndex;
};
+// This represents a prefetch hint to be injected at site `SiteID`, targetting
+// `TargetID` in function `TargetFunction`.
+struct PrefetchHint {
+ CallsiteID SiteID;
+ StringRef TargetFunction;
+ CallsiteID TargetID;
+};
+
// Provides DenseMapInfo for UniqueBBID.
template <> struct DenseMapInfo<UniqueBBID> {
static inline UniqueBBID getEmptyKey() {
diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
index d8559c317c32e..b530846f02a4a 100644
--- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
+++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
@@ -49,9 +49,9 @@ end: ; preds = %11, %9
; CHECK: .LBB0_3:
; CHECK-NEXT: .globl __llvm_prefetch_target_foo_3_0
; CHECK-NEXT: __llvm_prefetch_target_foo_3_0:
+; CHECK-NEXT: prefetchit1 __llvm_prefetch_target_bar_0_0(%rip)
; CHECK: .globl __llvm_prefetch_target_foo_3_1
; CHECK-NEXT: __llvm_prefetch_target_foo_3_1:
-; CHECK-NEXT: prefetchit1 __llvm_prefetch_target_bar_0_0(%rip)
}
define weak i32 @bar() nounwind {
>From 4390ae1edd160d2620ffdbe1c18a1710ba72932d Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 4 Mar 2026 23:07:44 +0000
Subject: [PATCH 08/13] Split setPrefetchTargets and insertPrefetchHints into
separate functions.
---
llvm/lib/CodeGen/InsertCodePrefetch.cpp | 36 +++++++++++++++----------
1 file changed, 22 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 87e2c776af19e..880c4bec8451d 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -81,16 +81,10 @@ INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass)
INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion",
true, false)
-bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
- assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List &&
- "BB Sections list not enabled!");
- if (hasInstrProfHashMismatch(MF))
- return false;
+static void setPrefetchTargets(MachineFunction &MF,
+ const SmallVector<CallsiteID> &PrefetchTargets) {
// Set each block's prefetch targets so AsmPrinter can emit a special symbol
// there.
- SmallVector<CallsiteID> PrefetchTargets =
- getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
- .getPrefetchTargetsForFunction(MF.getName());
DenseMap<UniqueBBID, SmallVector<unsigned>> PrefetchTargetsByBBID;
for (const auto &Target : PrefetchTargets)
PrefetchTargetsByBBID[Target.BBID].push_back(Target.CallsiteIndex);
@@ -100,10 +94,11 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
V.erase(llvm::unique(V), V.end());
}
MF.setPrefetchTargets(PrefetchTargetsByBBID);
+}
- SmallVector<PrefetchHint> PrefetchHints =
- getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
- .getPrefetchHintsForFunction(MF.getName());
+static void
+insertPrefetchHints(MachineFunction &MF,
+ const SmallVector<PrefetchHint> &PrefetchHints) {
DenseMap<UniqueBBID, SmallVector<PrefetchHint>> PrefetchHintsBySiteBBID;
for (const auto &H : PrefetchHints)
PrefetchHintsBySiteBBID[H.SiteID.BBID].push_back(H);
@@ -121,14 +116,14 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
auto It = PrefetchHintsBySiteBBID.find(*BB.getBBID());
if (It == PrefetchHintsBySiteBBID.end())
continue;
- const auto &PrefetchHints = It->second;
+ const auto &BBHints = It->second;
unsigned NumCallsInBB = 0;
auto InstrIt = BB.begin();
- for (auto HintIt = PrefetchHints.begin(); HintIt != PrefetchHints.end();) {
+ for (auto HintIt = BBHints.begin(); HintIt != BBHints.end();) {
auto NextInstrIt = InstrIt == BB.end() ? BB.end() : std::next(InstrIt);
// Insert all the prefetch hints which must be placed after this call (or
// at the beginning of the block if `NumCallsInBB` is zero.
- while (HintIt != PrefetchHints.end() &&
+ while (HintIt != BBHints.end() &&
NumCallsInBB >= HintIt->SiteID.CallsiteIndex) {
auto *GV = MF.getFunction().getParent()->getOrInsertGlobal(
getPrefetchTargetSymbolName(HintIt->TargetFunction,
@@ -145,6 +140,19 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
InstrIt = NextInstrIt;
}
}
+}
+
+bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
+ assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List &&
+ "BB Sections list not enabled!");
+ if (hasInstrProfHashMismatch(MF))
+ return false;
+
+ auto &ProfileReader =
+ getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>();
+ setPrefetchTargets(MF, ProfileReader.getPrefetchTargetsForFunction(MF.getName()));
+ insertPrefetchHints(MF, ProfileReader.getPrefetchHintsForFunction(MF.getName()));
+
return true;
}
>From 235c35163b6de12c5a359a7fdb6cceae8258b059 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 4 Mar 2026 23:07:57 +0000
Subject: [PATCH 09/13] clang-format.
---
llvm/lib/CodeGen/InsertCodePrefetch.cpp | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 880c4bec8451d..6f6d6efdc49c3 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -150,8 +150,10 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
auto &ProfileReader =
getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>();
- setPrefetchTargets(MF, ProfileReader.getPrefetchTargetsForFunction(MF.getName()));
- insertPrefetchHints(MF, ProfileReader.getPrefetchHintsForFunction(MF.getName()));
+ setPrefetchTargets(MF,
+ ProfileReader.getPrefetchTargetsForFunction(MF.getName()));
+ insertPrefetchHints(MF,
+ ProfileReader.getPrefetchHintsForFunction(MF.getName()));
return true;
}
>From eb4169916bde2572f611d24c24060b3ea159d0c3 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 5 Mar 2026 18:46:07 +0000
Subject: [PATCH 10/13] Implement fallback support.
---
llvm/include/llvm/CodeGen/AsmPrinter.h | 2 +-
llvm/include/llvm/CodeGen/TargetInstrInfo.h | 4 ++--
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 17 +++++++------
llvm/lib/CodeGen/InsertCodePrefetch.cpp | 24 +++++++++++++++----
llvm/lib/Target/X86/X86InstrInfo.cpp | 4 ++--
llvm/lib/Target/X86/X86InstrInfo.h | 2 +-
.../X86/basic-block-sections-code-prefetch.ll | 4 ++++
7 files changed, 40 insertions(+), 17 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h
index b160de2f060d6..daffdc5f41bd5 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -491,7 +491,7 @@ class LLVM_ABI AsmPrinter : public MachineFunctionPass {
/// Helper to emit a symbol for the prefetch target associated with the given
/// BBID and callsite index. The symbol is emitted as a label and its linkage
/// is set based on the function's linkage.
- void emitPrefetchTargetSymbol(unsigned BaseID, unsigned CallsiteIndex);
+ void emitPrefetchTargetSymbol(const UniqueBBID &BBID, unsigned CallsiteIndex);
/// Emit prefetch targets that were not mapped to any basic block. These
/// targets are emitted at the beginning of the function body.
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index d5b7bd5f928d2..c2d879a177a00 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -2372,10 +2372,10 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo {
/// Inserts a code prefetch instruction before `InsertBefore` in block `MBB`
/// targetting `GV`.
- virtual bool insertCodePrefetchInstr(MachineBasicBlock &MBB,
+ virtual MachineInstr* insertCodePrefetchInstr(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
const GlobalValue *GV) const {
- return false;
+ llvm_unreachable("target did not implement");
}
private:
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 204347d0ad395..10c17ead558de 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2020,13 +2020,12 @@ void AsmPrinter::handleCallsiteForCallgraph(
/// Helper to emit a symbol for the prefetch target associated with the given
/// BBID and callsite index.
-void AsmPrinter::emitPrefetchTargetSymbol(unsigned BaseID,
+void AsmPrinter::emitPrefetchTargetSymbol(const UniqueBBID &BBID,
unsigned CallsiteIndex) {
SmallString<128> FunctionName;
getNameWithPrefix(FunctionName, &MF->getFunction());
MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
- "__llvm_prefetch_target_" + FunctionName + "_" + Twine(BaseID) + "_" +
- Twine(CallsiteIndex));
+ getPrefetchTargetSymbolName(FunctionName, BBID, CallsiteIndex));
// If the function is weak-linkage it may be replaced by a strong
// version, in which case the prefetch targets should also be replaced.
OutStreamer->emitSymbolAttribute(
@@ -2050,7 +2049,7 @@ void AsmPrinter::emitDanglingPrefetchTargets() {
if (MFBBIDs.contains(BBID))
continue;
for (unsigned CallsiteIndex : CallsiteIndexes)
- emitPrefetchTargetSymbol(BBID.BaseID, CallsiteIndex);
+ emitPrefetchTargetSymbol(BBID, CallsiteIndex);
}
}
@@ -2125,7 +2124,7 @@ void AsmPrinter::emitFunctionBody() {
for (auto &MI : MBB) {
if (PrefetchTargetIt != PrefetchTargetEnd &&
*PrefetchTargetIt == LastCallsiteIndex) {
- emitPrefetchTargetSymbol(MBB.getBBID()->BaseID, *PrefetchTargetIt);
+ emitPrefetchTargetSymbol(*MBB.getBBID(), *PrefetchTargetIt);
++PrefetchTargetIt;
}
@@ -2276,8 +2275,12 @@ void AsmPrinter::emitFunctionBody() {
handleCallsiteForCallgraph(FuncCGInfo, CallSitesInfoMap, MI);
// If there is a post-instruction symbol, emit a label for it here.
- if (MCSymbol *S = MI.getPostInstrSymbol())
+ if (MCSymbol *S = MI.getPostInstrSymbol()) {
+ if (MCSymbolELF *ESym = static_cast<MCSymbolELF*>(S))
+ if (ESym->isWeakref())
+ OutStreamer->emitSymbolAttribute(S, MCSA_Weak);
OutStreamer->emitLabel(S);
+ }
for (auto &Handler : Handlers)
Handler->endInstruction();
@@ -2285,7 +2288,7 @@ void AsmPrinter::emitFunctionBody() {
// Emit the remaining prefetch targets for this block. This includes
// nonexisting callsite indexes.
while (PrefetchTargetIt != PrefetchTargetEnd) {
- emitPrefetchTargetSymbol(MBB.getBBID()->BaseID, *PrefetchTargetIt);
+ emitPrefetchTargetSymbol(*MBB.getBBID(), *PrefetchTargetIt);
++PrefetchTargetIt;
}
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 6f6d6efdc49c3..9bc4980424474 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -33,6 +33,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCSymbolELF.h"
using namespace llvm;
#define DEBUG_TYPE "insert-code-prefetch"
@@ -99,6 +100,8 @@ static void setPrefetchTargets(MachineFunction &MF,
static void
insertPrefetchHints(MachineFunction &MF,
const SmallVector<PrefetchHint> &PrefetchHints) {
+ bool IsELF = MF.getTarget().getTargetTriple().isOSBinFormatELF();
+ const Module *M = MF.getFunction().getParent();
DenseMap<UniqueBBID, SmallVector<PrefetchHint>> PrefetchHintsBySiteBBID;
for (const auto &H : PrefetchHints)
PrefetchHintsBySiteBBID[H.SiteID.BBID].push_back(H);
@@ -120,17 +123,30 @@ insertPrefetchHints(MachineFunction &MF,
unsigned NumCallsInBB = 0;
auto InstrIt = BB.begin();
for (auto HintIt = BBHints.begin(); HintIt != BBHints.end();) {
+ bool TargetFunctionDefined = false;
+ if (Function *TargetFunction = M->getFunction(HintIt->TargetFunction))
+ TargetFunctionDefined = !TargetFunction->isDeclaration();
auto NextInstrIt = InstrIt == BB.end() ? BB.end() : std::next(InstrIt);
// Insert all the prefetch hints which must be placed after this call (or
// at the beginning of the block if `NumCallsInBB` is zero.
while (HintIt != BBHints.end() &&
NumCallsInBB >= HintIt->SiteID.CallsiteIndex) {
- auto *GV = MF.getFunction().getParent()->getOrInsertGlobal(
- getPrefetchTargetSymbolName(HintIt->TargetFunction,
+ auto TargetSymbolName = getPrefetchTargetSymbolName(HintIt->TargetFunction,
HintIt->TargetID.BBID,
- HintIt->TargetID.CallsiteIndex),
+ HintIt->TargetID.CallsiteIndex);
+ auto *GV = MF.getFunction().getParent()->getOrInsertGlobal(TargetSymbolName,
PtrTy);
- TII->insertCodePrefetchInstr(BB, InstrIt, GV);
+ MachineInstr *PrefetchInstr = TII->insertCodePrefetchInstr(BB, InstrIt, GV);
+ if (!TargetFunctionDefined && IsELF) {
+ // If the target function is not defined in this module, we guard
+ // against undefined prefetch target symbol by emitting a fallback
+ // symbol with weak linkage right after the prefetch instruction. If
+ // there is no strong symbol, the fallback will be used and we
+ // prefetch the next address.
+ MCSymbolELF *WeakFallbackSym = static_cast<MCSymbolELF *>(MF.getContext().getOrCreateSymbol(TargetSymbolName));
+ WeakFallbackSym->setIsWeakref();
+ PrefetchInstr->setPostInstrSymbol(MF, WeakFallbackSym);
+ }
++HintIt;
}
if (InstrIt == BB.end())
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index edcc074f2db87..0ccd234b97f51 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -10785,7 +10785,7 @@ void X86InstrInfo::getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops,
M.getFullAddress(Ops);
}
-bool X86InstrInfo::insertCodePrefetchInstr(
+MachineInstr* X86InstrInfo::insertCodePrefetchInstr(
MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
const GlobalValue *GV) const {
MachineFunction &MF = *MBB.getParent();
@@ -10802,7 +10802,7 @@ bool X86InstrInfo::insertCodePrefetchInstr(
MIB.addGlobalAddress(GV);
MIB.addReg(X86::NoRegister);
MBB.insert(InsertBefore, PrefetchInstr);
- return true;
+ return PrefetchInstr;
}
#define GET_INSTRINFO_HELPERS
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index e0638d75ae295..b416e43250eb5 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -767,7 +767,7 @@ class X86InstrInfo final : public X86GenInstrInfo {
/// fails to commute the operands, it will return \p Idx1.
unsigned commuteOperandsForFold(MachineInstr &MI, unsigned Idx1) const;
- bool insertCodePrefetchInstr(MachineBasicBlock &MBB,
+ MachineInstr* insertCodePrefetchInstr(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
const GlobalValue *GV) const override;
};
diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
index b530846f02a4a..50b5fbf50dec7 100644
--- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
+++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
@@ -11,6 +11,7 @@
; RUN: echo 't 3,1' >> %t
; RUN: echo 'i 3,0 bar,0,0' >> %t
; RUN: echo 'i 2,1 foo,1,0' >> %t
+; RUN: echo 'i 1,1 undef,5,5' >> %t
; RUN: echo 'f bar' >> %t
; RUN: echo 't 0,0' >> %t
; RUN: echo 't 21,1' >> %t
@@ -33,8 +34,11 @@ cond.true: ; preds = %1
; CHECK: .globl __llvm_prefetch_target_foo_1_0
; CHECK-NEXT: __llvm_prefetch_target_foo_1_0:
; CHECK-NEXT: callq bar at PLT
+; CHECK-NEXT: prefetchit1 __llvm_prefetch_target_undef_5_5(%rip)
; CHECK-NEXT: .globl __llvm_prefetch_target_foo_1_1
; CHECK-NEXT: __llvm_prefetch_target_foo_1_1:
+; CHECK-NEXT: .weak __llvm_prefetch_target_undef_5_5
+; CHECK-NEXT: __llvm_prefetch_target_undef_5_5:
cond.false: ; preds = %1
call i32 @baz()
>From 642f25c7057fbe4a736d7c9700110a7d93c7fba3 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 5 Mar 2026 18:46:21 +0000
Subject: [PATCH 11/13] clang-format.
---
llvm/include/llvm/CodeGen/TargetInstrInfo.h | 7 ++++---
llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 2 +-
llvm/lib/CodeGen/InsertCodePrefetch.cpp | 16 +++++++++-------
llvm/lib/Target/X86/X86InstrInfo.cpp | 7 ++++---
llvm/lib/Target/X86/X86InstrInfo.h | 7 ++++---
5 files changed, 22 insertions(+), 17 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index c2d879a177a00..0d2c931ac5e55 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -2372,9 +2372,10 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo {
/// Inserts a code prefetch instruction before `InsertBefore` in block `MBB`
/// targetting `GV`.
- virtual MachineInstr* insertCodePrefetchInstr(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertBefore,
- const GlobalValue *GV) const {
+ virtual MachineInstr *
+ insertCodePrefetchInstr(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ const GlobalValue *GV) const {
llvm_unreachable("target did not implement");
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 10c17ead558de..8173f5ff7d79a 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2276,7 +2276,7 @@ void AsmPrinter::emitFunctionBody() {
// If there is a post-instruction symbol, emit a label for it here.
if (MCSymbol *S = MI.getPostInstrSymbol()) {
- if (MCSymbolELF *ESym = static_cast<MCSymbolELF*>(S))
+ if (MCSymbolELF *ESym = static_cast<MCSymbolELF *>(S))
if (ESym->isWeakref())
OutStreamer->emitSymbolAttribute(S, MCSA_Weak);
OutStreamer->emitLabel(S);
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 9bc4980424474..496a672c734cb 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -131,19 +131,21 @@ insertPrefetchHints(MachineFunction &MF,
// at the beginning of the block if `NumCallsInBB` is zero.
while (HintIt != BBHints.end() &&
NumCallsInBB >= HintIt->SiteID.CallsiteIndex) {
- auto TargetSymbolName = getPrefetchTargetSymbolName(HintIt->TargetFunction,
- HintIt->TargetID.BBID,
- HintIt->TargetID.CallsiteIndex);
- auto *GV = MF.getFunction().getParent()->getOrInsertGlobal(TargetSymbolName,
- PtrTy);
- MachineInstr *PrefetchInstr = TII->insertCodePrefetchInstr(BB, InstrIt, GV);
+ auto TargetSymbolName = getPrefetchTargetSymbolName(
+ HintIt->TargetFunction, HintIt->TargetID.BBID,
+ HintIt->TargetID.CallsiteIndex);
+ auto *GV = MF.getFunction().getParent()->getOrInsertGlobal(
+ TargetSymbolName, PtrTy);
+ MachineInstr *PrefetchInstr =
+ TII->insertCodePrefetchInstr(BB, InstrIt, GV);
if (!TargetFunctionDefined && IsELF) {
// If the target function is not defined in this module, we guard
// against undefined prefetch target symbol by emitting a fallback
// symbol with weak linkage right after the prefetch instruction. If
// there is no strong symbol, the fallback will be used and we
// prefetch the next address.
- MCSymbolELF *WeakFallbackSym = static_cast<MCSymbolELF *>(MF.getContext().getOrCreateSymbol(TargetSymbolName));
+ MCSymbolELF *WeakFallbackSym = static_cast<MCSymbolELF *>(
+ MF.getContext().getOrCreateSymbol(TargetSymbolName));
WeakFallbackSym->setIsWeakref();
PrefetchInstr->setPostInstrSymbol(MF, WeakFallbackSym);
}
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 0ccd234b97f51..f41d65db086cc 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -10785,9 +10785,10 @@ void X86InstrInfo::getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops,
M.getFullAddress(Ops);
}
-MachineInstr* X86InstrInfo::insertCodePrefetchInstr(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
- const GlobalValue *GV) const {
+MachineInstr *
+X86InstrInfo::insertCodePrefetchInstr(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ const GlobalValue *GV) const {
MachineFunction &MF = *MBB.getParent();
MachineInstr *PrefetchInstr = MF.CreateMachineInstr(
get(X86::PREFETCHIT1),
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index b416e43250eb5..9b77ff0c62237 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -767,9 +767,10 @@ class X86InstrInfo final : public X86GenInstrInfo {
/// fails to commute the operands, it will return \p Idx1.
unsigned commuteOperandsForFold(MachineInstr &MI, unsigned Idx1) const;
- MachineInstr* insertCodePrefetchInstr(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertBefore,
- const GlobalValue *GV) const override;
+ MachineInstr *
+ insertCodePrefetchInstr(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore,
+ const GlobalValue *GV) const override;
};
} // namespace llvm
>From 7f8c3acd4ce33dd064ba64c7794d216e60bc0ee0 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 5 Mar 2026 18:58:35 +0000
Subject: [PATCH 12/13] fix the prefetch test.
---
llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
index 50b5fbf50dec7..991e3d37fc366 100644
--- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
+++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
@@ -34,9 +34,9 @@ cond.true: ; preds = %1
; CHECK: .globl __llvm_prefetch_target_foo_1_0
; CHECK-NEXT: __llvm_prefetch_target_foo_1_0:
; CHECK-NEXT: callq bar at PLT
-; CHECK-NEXT: prefetchit1 __llvm_prefetch_target_undef_5_5(%rip)
; CHECK-NEXT: .globl __llvm_prefetch_target_foo_1_1
; CHECK-NEXT: __llvm_prefetch_target_foo_1_1:
+; CHECK-NEXT: prefetchit1 __llvm_prefetch_target_undef_5_5(%rip)
; CHECK-NEXT: .weak __llvm_prefetch_target_undef_5_5
; CHECK-NEXT: __llvm_prefetch_target_undef_5_5:
>From effda96d041e43dfeea8d8eeba5cb09408381959 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Sat, 7 Mar 2026 01:15:30 +0000
Subject: [PATCH 13/13] Change undef to extfunc.
---
.../CodeGen/X86/basic-block-sections-code-prefetch.ll | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
index 991e3d37fc366..acf6c588fafe8 100644
--- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
+++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
@@ -11,7 +11,7 @@
; RUN: echo 't 3,1' >> %t
; RUN: echo 'i 3,0 bar,0,0' >> %t
; RUN: echo 'i 2,1 foo,1,0' >> %t
-; RUN: echo 'i 1,1 undef,5,5' >> %t
+; RUN: echo 'i 1,1 extfunc,5,5' >> %t
; RUN: echo 'f bar' >> %t
; RUN: echo 't 0,0' >> %t
; RUN: echo 't 21,1' >> %t
@@ -36,9 +36,9 @@ cond.true: ; preds = %1
; CHECK-NEXT: callq bar at PLT
; CHECK-NEXT: .globl __llvm_prefetch_target_foo_1_1
; CHECK-NEXT: __llvm_prefetch_target_foo_1_1:
-; CHECK-NEXT: prefetchit1 __llvm_prefetch_target_undef_5_5(%rip)
-; CHECK-NEXT: .weak __llvm_prefetch_target_undef_5_5
-; CHECK-NEXT: __llvm_prefetch_target_undef_5_5:
+; CHECK-NEXT: prefetchit1 __llvm_prefetch_target_extfunc_5_5(%rip)
+; CHECK-NEXT: .weak __llvm_prefetch_target_extfunc_5_5
+; CHECK-NEXT: __llvm_prefetch_target_extfunc_5_5:
cond.false: ; preds = %1
call i32 @baz()
More information about the llvm-commits
mailing list