[llvm] X86: Add prefetch insertion based on Propeller profile (PR #166324)

Rahman Lavaee via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 18 22:40:29 PST 2025


https://github.com/rlavaee updated https://github.com/llvm/llvm-project/pull/166324

>From c4b73ba7594e81e2c251d88da89fecf4cc7f153f Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Sat, 8 Nov 2025 19:54:21 +0000
Subject: [PATCH 01/23] feat(AsmPrinter): Add support for emitting prefetch
 target symbols

---
 .../CodeGen/BasicBlockSectionsProfileReader.h | 45 ++++++++++++++++-
 llvm/include/llvm/CodeGen/MachineBasicBlock.h | 24 +++++++++
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    | 50 ++++++++++++++++++-
 .../BasicBlockSectionsProfileReader.cpp       | 44 ++++++++++++++++
 llvm/lib/CodeGen/MachineBasicBlock.cpp        | 13 +++++
 5 files changed, 174 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index ee1f28377f7e4..5b230db30aec4 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -42,6 +42,17 @@ struct BBClusterInfo {
   unsigned PositionInCluster;
 };
 
+struct BBPosition {
+  UniqueBBID BBID;
+  unsigned BBOffset;
+};
+
+struct PrefetchHint {
+  BBPosition SitePosition;
+  StringRef TargetFunctionName;
+  BBPosition TargetPosition;
+};
+
 // This represents the raw input profile for one function.
 struct FunctionPathAndClusterInfo {
   // BB Cluster information specified by `UniqueBBID`s.
@@ -50,9 +61,11 @@ struct FunctionPathAndClusterInfo {
   // the edge a -> b (a is not cloned). The index of the path in this vector
   // determines the `UniqueBBID::CloneID` of the cloned blocks in that path.
   SmallVector<SmallVector<unsigned>> ClonePaths;
+  SmallVector<PrefetchHint> PrefetchHints;
+  DenseSet<BBPosition> PrefetchTargets;
   // Node counts for each basic block.
   DenseMap<UniqueBBID, uint64_t> NodeCounts;
-  // Edge counts for each edge, stored as a nested map.
+  // Edge counts for each edge.
   DenseMap<UniqueBBID, DenseMap<UniqueBBID, uint64_t>> EdgeCounts;
   // Hash for each basic block. The Hashes are stored for every original block
   // (not cloned blocks), hence the map key being unsigned instead of
@@ -60,6 +73,27 @@ struct FunctionPathAndClusterInfo {
   DenseMap<unsigned, uint64_t> BBHashes;
 };
 
+// Provides DenseMapInfo BBPosition.
+template <> struct DenseMapInfo<BBPosition> {
+  static inline BBPosition getEmptyKey() {
+    return {DenseMapInfo<UniqueBBID>::getEmptyKey(),
+            DenseMapInfo<unsigned>::getEmptyKey()};
+  }
+  static inline BBPosition getTombstoneKey() {
+    return BBPosition{DenseMapInfo<UniqueBBID>::getTombstoneKey(),
+                      DenseMapInfo<unsigned>::getTombstoneKey()};
+  }
+  static unsigned getHashValue(const BBPosition &Val) {
+    std::pair<unsigned, unsigned> PairVal = std::make_pair(
+        DenseMapInfo<UniqueBBID>::getHashValue(Val.BBID), Val.BBOffset);
+    return DenseMapInfo<std::pair<unsigned, unsigned>>::getHashValue(PairVal);
+  }
+  static bool isEqual(const BBPosition &LHS, const BBPosition &RHS) {
+    return DenseMapInfo<UniqueBBID>::isEqual(LHS.BBID, RHS.BBID) &&
+           DenseMapInfo<unsigned>::isEqual(LHS.BBOffset, RHS.BBOffset);
+  }
+};
+
 class BasicBlockSectionsProfileReader {
 public:
   friend class BasicBlockSectionsProfileReaderWrapperPass;
@@ -86,6 +120,11 @@ class BasicBlockSectionsProfileReader {
   uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
                         const UniqueBBID &SinkBBID) const;
 
+  SmallVector<PrefetchHint>
+  getPrefetchHintsForFunction(StringRef FuncName) const;
+
+  DenseSet<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
+
 private:
   StringRef getAliasName(StringRef FuncName) const {
     auto R = FuncAliasMap.find(FuncName);
@@ -194,6 +233,10 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
 
   uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
                         const UniqueBBID &DestBBID) const;
+  SmallVector<PrefetchHint>
+  getPrefetchHintsForFunction(StringRef FuncName) const;
+
+  DenseSet<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
 
   // Initializes the FunctionNameToDIFilename map for the current module and
   // then reads the profile for the matching functions.
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index fcf7bab09fcff..e6c6bc26ae9e6 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -100,6 +100,12 @@ template <> struct DenseMapInfo<MBBSectionID> {
   }
 };
 
+struct PrefetchTarget {
+  StringRef TargetFunction;
+  UniqueBBID TargetBBID;
+  unsigned TargetBBOffset;
+};
+
 template <> struct ilist_traits<MachineInstr> {
 private:
   friend class MachineBasicBlock; // Set by the owning MachineBasicBlock.
@@ -213,6 +219,8 @@ class MachineBasicBlock
   /// basic block sections and basic block labels.
   std::optional<UniqueBBID> BBID;
 
+  SmallVector<unsigned> PrefetchTargets;
+
   /// With basic block sections, this stores the Section ID of the basic block.
   MBBSectionID SectionID{0};
 
@@ -229,6 +237,8 @@ class MachineBasicBlock
   /// is only computed once and is cached.
   mutable MCSymbol *CachedMCSymbol = nullptr;
 
+  mutable SmallVector<MCSymbol *, 4> CallInstSymbols;
+
   /// Cached MCSymbol for this block (used if IsEHContTarget).
   mutable MCSymbol *CachedEHContMCSymbol = nullptr;
 
@@ -710,6 +720,14 @@ class MachineBasicBlock
 
   std::optional<UniqueBBID> getBBID() const { return BBID; }
 
+  const SmallVector<unsigned> &getPrefetchTargets() const {
+    return PrefetchTargets;
+  }
+
+  void setPrefetchTargets(const SmallVector<unsigned> &V) {
+    PrefetchTargets = V;
+  }
+
   /// Returns the section ID of this basic block.
   MBBSectionID getSectionID() const { return SectionID; }
 
@@ -1275,6 +1293,12 @@ class MachineBasicBlock
   /// Return the MCSymbol for this basic block.
   LLVM_ABI MCSymbol *getSymbol() const;
 
+  MCSymbol *getCallInstSymbol(unsigned CallInstNumber) const;
+
+  const SmallVector<MCSymbol *, 4>& getCallInstSymbols() const {
+    return CallInstSymbols;
+  }
+
   /// Return the Windows EH Continuation Symbol for this basic block.
   LLVM_ABI MCSymbol *getEHContSymbol() const;
 
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 3aa245b7f3f1e..a204bba5789a8 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -18,6 +18,7 @@
 #include "WasmException.h"
 #include "WinCFGuard.h"
 #include "WinException.h"
+#include "llvm/Support/SMLoc.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/BitmaskEnum.h"
@@ -178,6 +179,11 @@ static cl::opt<bool> EmitJumpTableSizesSection(
     cl::desc("Emit a section containing jump table addresses and sizes"),
     cl::Hidden, cl::init(false));
 
+static cl::opt<bool> InsertNoopsForPrefetch(
+    "insert-noops-for-prefetch",
+    cl::desc("Whether to insert noops instead of prefetches."), cl::init(false),
+    cl::Hidden);
+
 // This isn't turned on by default, since several of the scheduling models are
 // not completely accurate, and we don't want to be misleading.
 static cl::opt<bool> PrintLatency(
@@ -1982,10 +1988,34 @@ void AsmPrinter::emitFunctionBody() {
   FunctionCallGraphInfo FuncCGInfo;
   const auto &CallSitesInfoMap = MF->getCallSitesInfo();
   for (auto &MBB : *MF) {
+    int NextPrefetchTargetIndex = MBB.getPrefetchTargets().empty() ? -1 : 0;
     // Print a label for the basic block.
     emitBasicBlockStart(MBB);
     DenseMap<StringRef, unsigned> MnemonicCounts;
+    unsigned NumCallsInBlock = 0;
     for (auto &MI : MBB) {
+      if (NextPrefetchTargetIndex != -1 &&
+          NumCallsInBlock >=  MBB.getPrefetchTargets()[NextPrefetchTargetIndex]) {
+
+        MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
+            Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) +
+            Twine("_") +
+            utostr(MBB.getPrefetchTargets()[NextPrefetchTargetIndex]));
+        if (MF->getFunction().isWeakForLinker()) {
+          OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Weak);
+          errs() << "Emitting weak symbol: " << PrefetchTargetSymbol->getName() << "\n";
+        } else {
+          OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Global);
+          errs() << "Emitting global symbol: " << PrefetchTargetSymbol->getName() << "\n";
+        }
+        // OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Extern);
+       // errs() << "Emitting symbol: " << PrefetchTargetSymbol->getName() << "\n";
+        OutStreamer->emitLabel(PrefetchTargetSymbol);
+        ++NextPrefetchTargetIndex;
+        if (NextPrefetchTargetIndex >=
+            static_cast<int>(MBB.getPrefetchTargets().size()))
+          NextPrefetchTargetIndex = -1;
+      }
       // Print the assembly for the instruction.
       if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() &&
           !MI.isDebugInstr()) {
@@ -2099,7 +2129,7 @@ void AsmPrinter::emitFunctionBody() {
         break;
       }
       default:
-        emitInstruction(&MI);
+         emitInstruction(&MI);
 
         auto CountInstruction = [&](const MachineInstr &MI) {
           // Skip Meta instructions inside bundles.
@@ -2136,6 +2166,24 @@ void AsmPrinter::emitFunctionBody() {
       for (auto &Handler : Handlers)
         Handler->endInstruction();
     }
+   while (NextPrefetchTargetIndex != -1) {
+        MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
+            Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) +
+            Twine("_") +
+            utostr(MBB.getPrefetchTargets()[NextPrefetchTargetIndex]));
+        if (MF->getFunction().hasWeakLinkage()) {
+          OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_WeakDefinition);
+        } else {
+          OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Global);
+        }
+        OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Extern);
+        OutStreamer->emitLabel(PrefetchTargetSymbol);
+        ++NextPrefetchTargetIndex;
+        if (NextPrefetchTargetIndex >=
+            static_cast<int>(MBB.getPrefetchTargets().size()))
+          NextPrefetchTargetIndex = -1;
+      }
+
 
     // We must emit temporary symbol for the end of this basic block, if either
     // we have BBLabels enabled or if this basic blocks marks the end of a
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index c234c0f1b0b34..de146e172c174 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -93,6 +93,19 @@ uint64_t BasicBlockSectionsProfileReader::getEdgeCount(
   return EdgeIt->second;
 }
 
+SmallVector<PrefetchHint>
+BasicBlockSectionsProfileReader::getPrefetchHintsForFunction(
+    StringRef FuncName) const {
+  return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).PrefetchHints;
+}
+
+DenseSet<BBPosition>
+BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction(
+    StringRef FuncName) const {
+  return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName))
+      .PrefetchTargets;
+}
+
 // Reads the version 1 basic block sections profile. Profile for each function
 // is encoded as follows:
 //   m <module_name>
@@ -308,6 +321,25 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
       }
       continue;
     }
+    case 't': { // Prefetch target specifier.
+      // Skip the profile when we the profile iterator (FI) refers to the
+      // past-the-end element.
+      if (FI == ProgramPathAndClusterInfo.end())
+        continue;
+      assert(Values.size() == 1);
+      SmallVector<StringRef, 2> PrefetchTargetStr;
+      Values[0].split(PrefetchTargetStr, '@');
+      assert(PrefetchTargetStr.size() == 2);
+      auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]);
+      if (!TargetBBID)
+        return TargetBBID.takeError();
+      unsigned long long TargetBBOffset;
+      if (getAsUnsignedInteger(PrefetchTargetStr[1], 10, TargetBBOffset))
+        return createProfileParseError(Twine("unsigned integer expected: '") +
+                                       PrefetchTargetStr[1]);
+      FI->second.PrefetchTargets.insert(BBPosition{*TargetBBID, static_cast<unsigned>(TargetBBOffset)});
+      continue;
+    }
     default:
       return createProfileParseError(Twine("invalid specifier: '") +
                                      Twine(Specifier) + "'");
@@ -514,6 +546,18 @@ uint64_t BasicBlockSectionsProfileReaderWrapperPass::getEdgeCount(
   return BBSPR.getEdgeCount(FuncName, SrcBBID, SinkBBID);
 }
 
+SmallVector<PrefetchHint>
+BasicBlockSectionsProfileReaderWrapperPass::getPrefetchHintsForFunction(
+    StringRef FuncName) const {
+  return BBSPR.getPrefetchHintsForFunction(FuncName);
+}
+
+DenseSet<BBPosition>
+BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction(
+    StringRef FuncName) const {
+  return BBSPR.getPrefetchTargetsForFunction(FuncName);
+}
+
 BasicBlockSectionsProfileReader &
 BasicBlockSectionsProfileReaderWrapperPass::getBBSPR() {
   return BBSPR;
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index ba0b025167307..19b218a2879dd 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -90,6 +90,19 @@ MCSymbol *MachineBasicBlock::getSymbol() const {
   return CachedMCSymbol;
 }
 
+MCSymbol *MachineBasicBlock::getCallInstSymbol(unsigned CallInstNumber) const {
+  if (CallInstSymbols.size() <= CallInstNumber) {
+    const MachineFunction *MF = getParent();
+    MCContext &Ctx = MF->getContext();
+    CallInstSymbols.resize(CallInstNumber + 1);
+    CallInstSymbols[CallInstNumber] = Ctx.createBlockSymbol(
+        "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber()) + "_" +
+            Twine(CallInstNumber),
+        /*AlwaysEmit=*/true);
+  }
+  return CallInstSymbols[CallInstNumber];
+}
+
 MCSymbol *MachineBasicBlock::getEHContSymbol() const {
   if (!CachedEHContMCSymbol) {
     const MachineFunction *MF = getParent();

>From 790f77925bcee9aa03ef0ddbf466132d59e5b933 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Tue, 11 Nov 2025 21:30:47 +0000
Subject: [PATCH 02/23] feat: Add prefetch-profile.txt for testing

---
 build-release/prefetch-profile.txt | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 build-release/prefetch-profile.txt

diff --git a/build-release/prefetch-profile.txt b/build-release/prefetch-profile.txt
new file mode 100644
index 0000000000000..294f57a46920b
--- /dev/null
+++ b/build-release/prefetch-profile.txt
@@ -0,0 +1,3 @@
+v1
+f f
+t 0 at 1

>From 8ac920dfd4d32a685154242f791e23cfafafcf5a Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 12 Nov 2025 18:58:18 +0000
Subject: [PATCH 03/23] Everything else.

---
 build-release/prefetch-profile.txt            |  3 --
 .../CodeGen/BasicBlockSectionsProfileReader.h | 29 +++----------------
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    | 12 +++++++-
 .../BasicBlockSectionsProfileReader.cpp       | 10 +++----
 4 files changed, 20 insertions(+), 34 deletions(-)
 delete mode 100644 build-release/prefetch-profile.txt

diff --git a/build-release/prefetch-profile.txt b/build-release/prefetch-profile.txt
deleted file mode 100644
index 294f57a46920b..0000000000000
--- a/build-release/prefetch-profile.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-v1
-f f
-t 0 at 1
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 5b230db30aec4..fbf9b89754cd7 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -44,7 +44,7 @@ struct BBClusterInfo {
 
 struct BBPosition {
   UniqueBBID BBID;
-  unsigned BBOffset;
+  unsigned CallsiteIndex;
 };
 
 struct PrefetchHint {
@@ -62,7 +62,7 @@ struct FunctionPathAndClusterInfo {
   // determines the `UniqueBBID::CloneID` of the cloned blocks in that path.
   SmallVector<SmallVector<unsigned>> ClonePaths;
   SmallVector<PrefetchHint> PrefetchHints;
-  DenseSet<BBPosition> PrefetchTargets;
+  SmallVector<BBPosition> PrefetchTargets;
   // Node counts for each basic block.
   DenseMap<UniqueBBID, uint64_t> NodeCounts;
   // Edge counts for each edge.
@@ -73,27 +73,6 @@ struct FunctionPathAndClusterInfo {
   DenseMap<unsigned, uint64_t> BBHashes;
 };
 
-// Provides DenseMapInfo BBPosition.
-template <> struct DenseMapInfo<BBPosition> {
-  static inline BBPosition getEmptyKey() {
-    return {DenseMapInfo<UniqueBBID>::getEmptyKey(),
-            DenseMapInfo<unsigned>::getEmptyKey()};
-  }
-  static inline BBPosition getTombstoneKey() {
-    return BBPosition{DenseMapInfo<UniqueBBID>::getTombstoneKey(),
-                      DenseMapInfo<unsigned>::getTombstoneKey()};
-  }
-  static unsigned getHashValue(const BBPosition &Val) {
-    std::pair<unsigned, unsigned> PairVal = std::make_pair(
-        DenseMapInfo<UniqueBBID>::getHashValue(Val.BBID), Val.BBOffset);
-    return DenseMapInfo<std::pair<unsigned, unsigned>>::getHashValue(PairVal);
-  }
-  static bool isEqual(const BBPosition &LHS, const BBPosition &RHS) {
-    return DenseMapInfo<UniqueBBID>::isEqual(LHS.BBID, RHS.BBID) &&
-           DenseMapInfo<unsigned>::isEqual(LHS.BBOffset, RHS.BBOffset);
-  }
-};
-
 class BasicBlockSectionsProfileReader {
 public:
   friend class BasicBlockSectionsProfileReaderWrapperPass;
@@ -123,7 +102,7 @@ class BasicBlockSectionsProfileReader {
   SmallVector<PrefetchHint>
   getPrefetchHintsForFunction(StringRef FuncName) const;
 
-  DenseSet<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
+  DenseMap<UniqueBBID, SmallVector<unsigned>> getPrefetchTargetsForFunction(StringRef FuncName) const;
 
 private:
   StringRef getAliasName(StringRef FuncName) const {
@@ -236,7 +215,7 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
   SmallVector<PrefetchHint>
   getPrefetchHintsForFunction(StringRef FuncName) const;
 
-  DenseSet<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
+  DenseMap<UniqueBBID, SmallVector<unsigned>> getPrefetchTargetsForFunction(StringRef FuncName) const;
 
   // Initializes the FunctionNameToDIFilename map for the current module and
   // then reads the profile for the matching functions.
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index a204bba5789a8..90445fedd5db3 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -485,6 +485,7 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
   if (EmitBBHash)
     AU.addRequired<MachineBlockHashInfo>();
+  AU.addUsedIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
 }
 
 bool AsmPrinter::doInitialization(Module &M) {
@@ -1987,7 +1988,16 @@ void AsmPrinter::emitFunctionBody() {
 
   FunctionCallGraphInfo FuncCGInfo;
   const auto &CallSitesInfoMap = MF->getCallSitesInfo();
-  for (auto &MBB : *MF) {
+  DenseMap<UniqueBBID, SmallVector<unsigned>> FunctionPrefetchTargets;
+  if (auto *BBSPRPass =
+          getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>()) {
+    FunctionPrefetchTargets = BBSPRPass->getBBSPR().getPrefetchTargetsForFunction(MF->getName());
+}
+
+   for (auto &MBB : *MF) {
+
+    SmallVector<unsigned> BBPrefetchTargets;
+    = FunctionPrefetchTargets.lookup(MBB.g);
     int NextPrefetchTargetIndex = MBB.getPrefetchTargets().empty() ? -1 : 0;
     // Print a label for the basic block.
     emitBasicBlockStart(MBB);
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index de146e172c174..c4784a6039c09 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -99,7 +99,7 @@ BasicBlockSectionsProfileReader::getPrefetchHintsForFunction(
   return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).PrefetchHints;
 }
 
-DenseSet<BBPosition>
+SmallVector<BBPosition>
 BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction(
     StringRef FuncName) const {
   return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName))
@@ -333,11 +333,11 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
       auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]);
       if (!TargetBBID)
         return TargetBBID.takeError();
-      unsigned long long TargetBBOffset;
-      if (getAsUnsignedInteger(PrefetchTargetStr[1], 10, TargetBBOffset))
+      unsigned long long TargetCallsiteIndex;
+      if (getAsUnsignedInteger(PrefetchTargetStr[1], 10, TargetCallsiteIndex))
         return createProfileParseError(Twine("unsigned integer expected: '") +
                                        PrefetchTargetStr[1]);
-      FI->second.PrefetchTargets.insert(BBPosition{*TargetBBID, static_cast<unsigned>(TargetBBOffset)});
+      FI->second.PrefetchTargets.push_back(BBPosition{*TargetBBID, static_cast<unsigned>(TargetCallsiteIndex)});
       continue;
     }
     default:
@@ -552,7 +552,7 @@ BasicBlockSectionsProfileReaderWrapperPass::getPrefetchHintsForFunction(
   return BBSPR.getPrefetchHintsForFunction(FuncName);
 }
 
-DenseSet<BBPosition>
+SmallVector<BBPosition>
 BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction(
     StringRef FuncName) const {
   return BBSPR.getPrefetchTargetsForFunction(FuncName);

>From d0ab0b5ab472f7de0da5648391d15239c73b2888 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 12 Nov 2025 18:58:27 +0000
Subject: [PATCH 04/23] Add test.

---
 llvm/test/CodeGen/X86/prefetch-symbols.ll | 42 +++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/prefetch-symbols.ll

diff --git a/llvm/test/CodeGen/X86/prefetch-symbols.ll b/llvm/test/CodeGen/X86/prefetch-symbols.ll
new file mode 100644
index 0000000000000..979db7942ff2c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/prefetch-symbols.ll
@@ -0,0 +1,42 @@
+;; Check that specifying the function in the basic block sections profile
+;; without any other directives is a noop.
+;;
+;; Specify the bb sections profile:
+; RUN: echo 'v1' > %t
+; RUN: echo 'f _Z3foob' >> %t
+; RUN: echo 't 0 at 0' >> %t
+;;
+; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t  | FileCheck
+
+define i32 @_Z3foob(i1 zeroext %0) nounwind {
+  %2 = alloca i32, align 4
+  %3 = alloca i8, align 1
+  %4 = zext i1 %0 to i8
+  store i8 %4, ptr %3, align 1
+  %5 = load i8, ptr %3, align 1
+  %6 = trunc i8 %5 to i1
+  %7 = zext i1 %6 to i32
+  %8 = icmp sgt i32 %7, 0
+  br i1 %8, label %9, label %11
+
+9:                                                ; preds = %1
+  %10 = call i32 @_Z3barv()
+  store i32 %10, ptr %2, align 4
+  br label %13
+
+11:                                               ; preds = %1
+  %12 = call i32 @_Z3bazv()
+  store i32 %12, ptr %2, align 4
+  br label %13
+
+13:                                               ; preds = %11, %9
+  %14 = load i32, ptr %2, align 4
+  ret i32 %14
+}
+
+declare i32 @_Z3barv() #1
+declare i32 @_Z3bazv() #1
+
+
+; CHECK: _Z3foob
+; CHECK: llvm_prefetch_target

>From 988fab7212cbeadfb021917f35c3da6b97f2903e Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 00:10:17 +0000
Subject: [PATCH 05/23] Fix everything

---
 .../CodeGen/BasicBlockSectionsProfileReader.h |  4 +-
 llvm/include/llvm/CodeGen/MachineBasicBlock.h | 24 +----
 llvm/include/llvm/CodeGen/Passes.h            |  2 +
 llvm/include/llvm/InitializePasses.h          |  1 +
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    | 73 ++++----------
 llvm/lib/CodeGen/CMakeLists.txt               |  1 +
 llvm/lib/CodeGen/InsertCodePrefetch.cpp       | 96 +++++++++++++++++++
 llvm/lib/CodeGen/MachineBasicBlock.cpp        | 13 ---
 llvm/lib/CodeGen/TargetPassConfig.cpp         |  1 +
 9 files changed, 127 insertions(+), 88 deletions(-)
 create mode 100644 llvm/lib/CodeGen/InsertCodePrefetch.cpp

diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index fbf9b89754cd7..1fd904d64ab9d 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -102,7 +102,7 @@ class BasicBlockSectionsProfileReader {
   SmallVector<PrefetchHint>
   getPrefetchHintsForFunction(StringRef FuncName) const;
 
-  DenseMap<UniqueBBID, SmallVector<unsigned>> getPrefetchTargetsForFunction(StringRef FuncName) const;
+  SmallVector<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
 
 private:
   StringRef getAliasName(StringRef FuncName) const {
@@ -215,7 +215,7 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
   SmallVector<PrefetchHint>
   getPrefetchHintsForFunction(StringRef FuncName) const;
 
-  DenseMap<UniqueBBID, SmallVector<unsigned>> getPrefetchTargetsForFunction(StringRef FuncName) const;
+  SmallVector<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
 
   // Initializes the FunctionNameToDIFilename map for the current module and
   // then reads the profile for the matching functions.
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index e6c6bc26ae9e6..4be008bbf4bf1 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -100,12 +100,6 @@ template <> struct DenseMapInfo<MBBSectionID> {
   }
 };
 
-struct PrefetchTarget {
-  StringRef TargetFunction;
-  UniqueBBID TargetBBID;
-  unsigned TargetBBOffset;
-};
-
 template <> struct ilist_traits<MachineInstr> {
 private:
   friend class MachineBasicBlock; // Set by the owning MachineBasicBlock.
@@ -219,8 +213,6 @@ class MachineBasicBlock
   /// basic block sections and basic block labels.
   std::optional<UniqueBBID> BBID;
 
-  SmallVector<unsigned> PrefetchTargets;
-
   /// With basic block sections, this stores the Section ID of the basic block.
   MBBSectionID SectionID{0};
 
@@ -237,7 +229,7 @@ class MachineBasicBlock
   /// is only computed once and is cached.
   mutable MCSymbol *CachedMCSymbol = nullptr;
 
-  mutable SmallVector<MCSymbol *, 4> CallInstSymbols;
+  SmallVector<unsigned> PrefetchTargetIndexes;
 
   /// Cached MCSymbol for this block (used if IsEHContTarget).
   mutable MCSymbol *CachedEHContMCSymbol = nullptr;
@@ -720,12 +712,12 @@ class MachineBasicBlock
 
   std::optional<UniqueBBID> getBBID() const { return BBID; }
 
-  const SmallVector<unsigned> &getPrefetchTargets() const {
-    return PrefetchTargets;
+  const SmallVector<unsigned> &getPrefetchTargetIndexes() const {
+    return PrefetchTargetIndexes;
   }
 
-  void setPrefetchTargets(const SmallVector<unsigned> &V) {
-    PrefetchTargets = V;
+  void setPrefetchTargetIndexes(const SmallVector<unsigned> &V) {
+    PrefetchTargetIndexes = V;
   }
 
   /// Returns the section ID of this basic block.
@@ -1293,12 +1285,6 @@ class MachineBasicBlock
   /// Return the MCSymbol for this basic block.
   LLVM_ABI MCSymbol *getSymbol() const;
 
-  MCSymbol *getCallInstSymbol(unsigned CallInstNumber) const;
-
-  const SmallVector<MCSymbol *, 4>& getCallInstSymbols() const {
-    return CallInstSymbols;
-  }
-
   /// Return the Windows EH Continuation Symbol for this basic block.
   LLVM_ABI MCSymbol *getEHContSymbol() const;
 
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index a8525554b142e..f148d050a5772 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -69,6 +69,8 @@ LLVM_ABI MachineFunctionPass *createBasicBlockSectionsPass();
 
 LLVM_ABI MachineFunctionPass *createBasicBlockPathCloningPass();
 
+LLVM_ABI MachineFunctionPass *createInsertCodePrefetchPass();
+
 /// createMachineBlockHashInfoPass - This pass computes basic block hashes.
 LLVM_ABI MachineFunctionPass *createMachineBlockHashInfoPass();
 
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 10a4d8525a9e8..35d5ab14dc226 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -56,6 +56,7 @@ LLVM_ABI void initializeAssignmentTrackingAnalysisPass(PassRegistry &);
 LLVM_ABI void initializeAssumptionCacheTrackerPass(PassRegistry &);
 LLVM_ABI void initializeAtomicExpandLegacyPass(PassRegistry &);
 LLVM_ABI void initializeBasicBlockPathCloningPass(PassRegistry &);
+LLVM_ABI void initializeInsertCodePrefetchPass(PassRegistry &);
 LLVM_ABI void
 initializeBasicBlockSectionsProfileReaderWrapperPassPass(PassRegistry &);
 LLVM_ABI void initializeBasicBlockSectionsPass(PassRegistry &);
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 90445fedd5db3..933fe6f7d177f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -179,11 +179,6 @@ static cl::opt<bool> EmitJumpTableSizesSection(
     cl::desc("Emit a section containing jump table addresses and sizes"),
     cl::Hidden, cl::init(false));
 
-static cl::opt<bool> InsertNoopsForPrefetch(
-    "insert-noops-for-prefetch",
-    cl::desc("Whether to insert noops instead of prefetches."), cl::init(false),
-    cl::Hidden);
-
 // This isn't turned on by default, since several of the scheduling models are
 // not completely accurate, and we don't want to be misleading.
 static cl::opt<bool> PrintLatency(
@@ -485,7 +480,6 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
   if (EmitBBHash)
     AU.addRequired<MachineBlockHashInfo>();
-  AU.addUsedIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
 }
 
 bool AsmPrinter::doInitialization(Module &M) {
@@ -1988,44 +1982,29 @@ void AsmPrinter::emitFunctionBody() {
 
   FunctionCallGraphInfo FuncCGInfo;
   const auto &CallSitesInfoMap = MF->getCallSitesInfo();
-  DenseMap<UniqueBBID, SmallVector<unsigned>> FunctionPrefetchTargets;
-  if (auto *BBSPRPass =
-          getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>()) {
-    FunctionPrefetchTargets = BBSPRPass->getBBSPR().getPrefetchTargetsForFunction(MF->getName());
-}
 
    for (auto &MBB : *MF) {
-
-    SmallVector<unsigned> BBPrefetchTargets;
-    = FunctionPrefetchTargets.lookup(MBB.g);
-    int NextPrefetchTargetIndex = MBB.getPrefetchTargets().empty() ? -1 : 0;
     // Print a label for the basic block.
     emitBasicBlockStart(MBB);
     DenseMap<StringRef, unsigned> MnemonicCounts;
-    unsigned NumCallsInBlock = 0;
-    for (auto &MI : MBB) {
-      if (NextPrefetchTargetIndex != -1 &&
-          NumCallsInBlock >=  MBB.getPrefetchTargets()[NextPrefetchTargetIndex]) {
 
-        MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
+    SmallVector<unsigned> PrefetchTargets = MBB.getPrefetchTargetIndexes();
+    auto PrefetchTargetIt = PrefetchTargets.begin();
+    unsigned NumCalls = 0;
+    auto EmitPrefetchTargetSymbolIfNeeded = [&]() {
+      if (PrefetchTargetIt == PrefetchTargets.end() || NumCalls < *PrefetchTargetIt)
+        return;
+      MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
             Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) +
             Twine("_") +
-            utostr(MBB.getPrefetchTargets()[NextPrefetchTargetIndex]));
-        if (MF->getFunction().isWeakForLinker()) {
-          OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Weak);
-          errs() << "Emitting weak symbol: " << PrefetchTargetSymbol->getName() << "\n";
-        } else {
-          OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Global);
-          errs() << "Emitting global symbol: " << PrefetchTargetSymbol->getName() << "\n";
-        }
-        // OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Extern);
-       // errs() << "Emitting symbol: " << PrefetchTargetSymbol->getName() << "\n";
+            utostr(*PrefetchTargetIt));
+          OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MF->getFunction().isWeakForLinker() ? MCSA_Weak : MCSA_Global);
         OutStreamer->emitLabel(PrefetchTargetSymbol);
-        ++NextPrefetchTargetIndex;
-        if (NextPrefetchTargetIndex >=
-            static_cast<int>(MBB.getPrefetchTargets().size()))
-          NextPrefetchTargetIndex = -1;
-      }
+        ++PrefetchTargetIt;
+    };
+
+    for (auto &MI : MBB) {
+      EmitPrefetchTargetSymbolIfNeeded();
       // Print the assembly for the instruction.
       if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() &&
           !MI.isDebugInstr()) {
@@ -2163,8 +2142,11 @@ void AsmPrinter::emitFunctionBody() {
         break;
       }
 
-      if (MI.isCall() && MF->getTarget().Options.BBAddrMap)
+      if (MI.isCall()) {
+        if (MF->getTarget().Options.BBAddrMap)
         OutStreamer->emitLabel(createCallsiteEndSymbol(MBB));
+        ++NumCalls;
+      }
 
       if (TM.Options.EmitCallGraphSection && MI.isCall())
         handleCallsiteForCallgraph(FuncCGInfo, CallSitesInfoMap, MI);
@@ -2176,24 +2158,7 @@ void AsmPrinter::emitFunctionBody() {
       for (auto &Handler : Handlers)
         Handler->endInstruction();
     }
-   while (NextPrefetchTargetIndex != -1) {
-        MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
-            Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) +
-            Twine("_") +
-            utostr(MBB.getPrefetchTargets()[NextPrefetchTargetIndex]));
-        if (MF->getFunction().hasWeakLinkage()) {
-          OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_WeakDefinition);
-        } else {
-          OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Global);
-        }
-        OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Extern);
-        OutStreamer->emitLabel(PrefetchTargetSymbol);
-        ++NextPrefetchTargetIndex;
-        if (NextPrefetchTargetIndex >=
-            static_cast<int>(MBB.getPrefetchTargets().size()))
-          NextPrefetchTargetIndex = -1;
-      }
-
+    EmitPrefetchTargetSymbolIfNeeded();
 
     // We must emit temporary symbol for the end of this basic block, if either
     // we have BBLabels enabled or if this basic blocks marks the end of a
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index 1cf0b4964760b..fcf28247179ca 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -79,6 +79,7 @@ add_llvm_component_library(LLVMCodeGen
   IndirectBrExpandPass.cpp
   InitUndef.cpp
   InlineSpiller.cpp
+  InsertCodePrefetch.cpp
   InterferenceCache.cpp
   InterleavedAccessPass.cpp
   InterleavedLoadCombinePass.cpp
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
new file mode 100644
index 0000000000000..7cb52302ac7db
--- /dev/null
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -0,0 +1,96 @@
+//===-- InsertCodePrefetch.cpp ---=========-----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Prefetch insertion pass implementation.
+//===----------------------------------------------------------------------===//
+/// Prefetch insertion pass.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/BasicBlockSectionUtils.h"
+#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+#define DEBUG_TYPE "prefetchinsertion"
+
+namespace {
+class InsertCodePrefetch : public MachineFunctionPass {
+public:
+  static char ID;
+
+  InsertCodePrefetch() : MachineFunctionPass(ID) {
+    initializeInsertCodePrefetchPass(*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override { return "X86 Cide Prefetch Inserter Pass"; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  /// Identify basic blocks that need separate sections and prepare to emit them
+  /// accordingly.
+  bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+//            Implementation
+//===----------------------------------------------------------------------===//
+
+char InsertCodePrefetch::ID = 0;
+INITIALIZE_PASS_BEGIN(
+    InsertCodePrefetch, DEBUG_TYPE,
+    "Reads prefetch", true,
+    false)
+INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass)
+INITIALIZE_PASS_END(
+    InsertCodePrefetch, DEBUG_TYPE,
+    "Reads prefetch", true,
+    false)
+
+bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
+  assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List &&
+         "BB Sections list not enabled!");
+  if (hasInstrProfHashMismatch(MF))
+    return false;
+  SmallVector<BBPosition> PrefetchTargets =
+      getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
+          .getPrefetchTargetsForFunction(MF.getName());
+  DenseMap<UniqueBBID, SmallVector<unsigned>> PrefetchTargetsByBBID;
+  for (const auto &Target: PrefetchTargets)
+    PrefetchTargetsByBBID[Target.BBID].push_back(Target.CallsiteIndex);
+  for (auto &MBB: MF) {
+    auto R = PrefetchTargetsByBBID.find(*MBB.getBBID());
+    if (R == PrefetchTargetsByBBID.end()) continue;
+    MBB.setPrefetchTargetIndexes(R->second);
+  }
+
+  return false;
+}
+
+void InsertCodePrefetch::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<BasicBlockSectionsProfileReaderWrapperPass>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachineFunctionPass *llvm::createInsertCodePrefetchPass() {
+  return new InsertCodePrefetch();
+}
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 19b218a2879dd..ba0b025167307 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -90,19 +90,6 @@ MCSymbol *MachineBasicBlock::getSymbol() const {
   return CachedMCSymbol;
 }
 
-MCSymbol *MachineBasicBlock::getCallInstSymbol(unsigned CallInstNumber) const {
-  if (CallInstSymbols.size() <= CallInstNumber) {
-    const MachineFunction *MF = getParent();
-    MCContext &Ctx = MF->getContext();
-    CallInstSymbols.resize(CallInstNumber + 1);
-    CallInstSymbols[CallInstNumber] = Ctx.createBlockSymbol(
-        "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber()) + "_" +
-            Twine(CallInstNumber),
-        /*AlwaysEmit=*/true);
-  }
-  return CallInstSymbols[CallInstNumber];
-}
-
 MCSymbol *MachineBasicBlock::getEHContSymbol() const {
   if (!CachedEHContMCSymbol) {
     const MachineFunction *MF = getParent();
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 10b723887b21f..0434a4f1cd94d 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -1293,6 +1293,7 @@ void TargetPassConfig::addMachinePasses() {
       addPass(llvm::createBasicBlockSectionsProfileReaderWrapperPass(
           TM->getBBSectionsFuncListBuf()));
       addPass(llvm::createBasicBlockPathCloningPass());
+      addPass(llvm::createInsertCodePrefetchPass());
     }
     addPass(llvm::createBasicBlockSectionsPass());
   }

>From eef799bb89dd857065442c5b90b498183a5da59a Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 00:10:36 +0000
Subject: [PATCH 06/23] clang-format.

---
 .../CodeGen/BasicBlockSectionsProfileReader.h |  6 +++--
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    | 25 +++++++++++--------
 .../BasicBlockSectionsProfileReader.cpp       |  3 ++-
 llvm/lib/CodeGen/InsertCodePrefetch.cpp       | 23 ++++++++---------
 4 files changed, 31 insertions(+), 26 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 1fd904d64ab9d..2b8ee578cd917 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -102,7 +102,8 @@ class BasicBlockSectionsProfileReader {
   SmallVector<PrefetchHint>
   getPrefetchHintsForFunction(StringRef FuncName) const;
 
-  SmallVector<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
+  SmallVector<BBPosition>
+  getPrefetchTargetsForFunction(StringRef FuncName) const;
 
 private:
   StringRef getAliasName(StringRef FuncName) const {
@@ -215,7 +216,8 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
   SmallVector<PrefetchHint>
   getPrefetchHintsForFunction(StringRef FuncName) const;
 
-  SmallVector<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
+  SmallVector<BBPosition>
+  getPrefetchTargetsForFunction(StringRef FuncName) const;
 
   // Initializes the FunctionNameToDIFilename map for the current module and
   // then reads the profile for the matching functions.
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 933fe6f7d177f..4368cd4d256c9 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -18,7 +18,6 @@
 #include "WasmException.h"
 #include "WinCFGuard.h"
 #include "WinException.h"
-#include "llvm/Support/SMLoc.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/BitmaskEnum.h"
@@ -120,6 +119,7 @@
 #include "llvm/Support/Format.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/SMLoc.h"
 #include "llvm/Support/VCSRevision.h"
 #include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/Support/raw_ostream.h"
@@ -1983,7 +1983,7 @@ void AsmPrinter::emitFunctionBody() {
   FunctionCallGraphInfo FuncCGInfo;
   const auto &CallSitesInfoMap = MF->getCallSitesInfo();
 
-   for (auto &MBB : *MF) {
+  for (auto &MBB : *MF) {
     // Print a label for the basic block.
     emitBasicBlockStart(MBB);
     DenseMap<StringRef, unsigned> MnemonicCounts;
@@ -1992,15 +1992,18 @@ void AsmPrinter::emitFunctionBody() {
     auto PrefetchTargetIt = PrefetchTargets.begin();
     unsigned NumCalls = 0;
     auto EmitPrefetchTargetSymbolIfNeeded = [&]() {
-      if (PrefetchTargetIt == PrefetchTargets.end() || NumCalls < *PrefetchTargetIt)
+      if (PrefetchTargetIt == PrefetchTargets.end() ||
+          NumCalls < *PrefetchTargetIt)
         return;
       MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
-            Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) +
-            Twine("_") +
-            utostr(*PrefetchTargetIt));
-          OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MF->getFunction().isWeakForLinker() ? MCSA_Weak : MCSA_Global);
-        OutStreamer->emitLabel(PrefetchTargetSymbol);
-        ++PrefetchTargetIt;
+          Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") +
+          utostr(MBB.getBBID()->BaseID) + Twine("_") +
+          utostr(*PrefetchTargetIt));
+      OutStreamer->emitSymbolAttribute(
+          PrefetchTargetSymbol,
+          MF->getFunction().isWeakForLinker() ? MCSA_Weak : MCSA_Global);
+      OutStreamer->emitLabel(PrefetchTargetSymbol);
+      ++PrefetchTargetIt;
     };
 
     for (auto &MI : MBB) {
@@ -2118,7 +2121,7 @@ void AsmPrinter::emitFunctionBody() {
         break;
       }
       default:
-         emitInstruction(&MI);
+        emitInstruction(&MI);
 
         auto CountInstruction = [&](const MachineInstr &MI) {
           // Skip Meta instructions inside bundles.
@@ -2144,7 +2147,7 @@ void AsmPrinter::emitFunctionBody() {
 
       if (MI.isCall()) {
         if (MF->getTarget().Options.BBAddrMap)
-        OutStreamer->emitLabel(createCallsiteEndSymbol(MBB));
+          OutStreamer->emitLabel(createCallsiteEndSymbol(MBB));
         ++NumCalls;
       }
 
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index c4784a6039c09..9b54dd6803cf6 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -337,7 +337,8 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
       if (getAsUnsignedInteger(PrefetchTargetStr[1], 10, TargetCallsiteIndex))
         return createProfileParseError(Twine("unsigned integer expected: '") +
                                        PrefetchTargetStr[1]);
-      FI->second.PrefetchTargets.push_back(BBPosition{*TargetBBID, static_cast<unsigned>(TargetCallsiteIndex)});
+      FI->second.PrefetchTargets.push_back(
+          BBPosition{*TargetBBID, static_cast<unsigned>(TargetCallsiteIndex)});
       continue;
     }
     default:
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 7cb52302ac7db..91cb6e599215d 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -39,7 +39,9 @@ class InsertCodePrefetch : public MachineFunctionPass {
     initializeInsertCodePrefetchPass(*PassRegistry::getPassRegistry());
   }
 
-  StringRef getPassName() const override { return "X86 Cide Prefetch Inserter Pass"; }
+  StringRef getPassName() const override {
+    return "X86 Cide Prefetch Inserter Pass";
+  }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 
@@ -55,15 +57,11 @@ class InsertCodePrefetch : public MachineFunctionPass {
 //===----------------------------------------------------------------------===//
 
 char InsertCodePrefetch::ID = 0;
-INITIALIZE_PASS_BEGIN(
-    InsertCodePrefetch, DEBUG_TYPE,
-    "Reads prefetch", true,
-    false)
+INITIALIZE_PASS_BEGIN(InsertCodePrefetch, DEBUG_TYPE, "Reads prefetch", true,
+                      false)
 INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass)
-INITIALIZE_PASS_END(
-    InsertCodePrefetch, DEBUG_TYPE,
-    "Reads prefetch", true,
-    false)
+INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Reads prefetch", true,
+                    false)
 
 bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
   assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List &&
@@ -74,11 +72,12 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
       getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
           .getPrefetchTargetsForFunction(MF.getName());
   DenseMap<UniqueBBID, SmallVector<unsigned>> PrefetchTargetsByBBID;
-  for (const auto &Target: PrefetchTargets)
+  for (const auto &Target : PrefetchTargets)
     PrefetchTargetsByBBID[Target.BBID].push_back(Target.CallsiteIndex);
-  for (auto &MBB: MF) {
+  for (auto &MBB : MF) {
     auto R = PrefetchTargetsByBBID.find(*MBB.getBBID());
-    if (R == PrefetchTargetsByBBID.end()) continue;
+    if (R == PrefetchTargetsByBBID.end())
+      continue;
     MBB.setPrefetchTargetIndexes(R->second);
   }
 

>From 1d847c8971bec64238503b0ce39e6545c45ee986 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 03:46:13 +0000
Subject: [PATCH 07/23] Fix the prefetch test.

---
 llvm/test/CodeGen/X86/prefetch-symbols.ll | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/llvm/test/CodeGen/X86/prefetch-symbols.ll b/llvm/test/CodeGen/X86/prefetch-symbols.ll
index 979db7942ff2c..3eb91dfdabd27 100644
--- a/llvm/test/CodeGen/X86/prefetch-symbols.ll
+++ b/llvm/test/CodeGen/X86/prefetch-symbols.ll
@@ -1,12 +1,14 @@
-;; Check that specifying the function in the basic block sections profile
-;; without any other directives is a noop.
+;; Check prefetch directives in basic block section profiles.
 ;;
 ;; Specify the bb sections profile:
 ; RUN: echo 'v1' > %t
 ; RUN: echo 'f _Z3foob' >> %t
 ; RUN: echo 't 0 at 0' >> %t
+; RUN: echo 't 1 at 0' >> %t
+; RUN: echo 't 1 at 1' >> %t
+; RUN: echo 't 2 at 1' >> %t
 ;;
-; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t  | FileCheck
+; RUN: llc < %s -mtriple=x86_64-pc-linux -asm-verbose=false -function-sections -basic-block-sections=%t  | FileCheck %s
 
 define i32 @_Z3foob(i1 zeroext %0) nounwind {
   %2 = alloca i32, align 4
@@ -18,16 +20,27 @@ define i32 @_Z3foob(i1 zeroext %0) nounwind {
   %7 = zext i1 %6 to i32
   %8 = icmp sgt i32 %7, 0
   br i1 %8, label %9, label %11
+; CHECK:      _Z3foob:
+; CHECK-NEXT:   .globl __llvm_prefetch_target__Z3foob_0_0
+; CHECK-NEXT: __llvm_prefetch_target__Z3foob_0_0:
 
 9:                                                ; preds = %1
   %10 = call i32 @_Z3barv()
   store i32 %10, ptr %2, align 4
   br label %13
+; CHECK:        .globl __llvm_prefetch_target__Z3foob_1_0
+; CHECK-NEXT: __llvm_prefetch_target__Z3foob_1_0:
+; CHECK-NEXT:   callq _Z3barv at PLT
+; CHECK-NEXT:   .globl __llvm_prefetch_target__Z3foob_1_1
+; CHECK-NEXT: __llvm_prefetch_target__Z3foob_1_1:
 
 11:                                               ; preds = %1
   %12 = call i32 @_Z3bazv()
   store i32 %12, ptr %2, align 4
   br label %13
+; CHECK:        callq _Z3bazv at PLT
+; CHECK-NEXT:   .globl __llvm_prefetch_target__Z3foob_2_1
+; CHECK-NEXT: __llvm_prefetch_target__Z3foob_2_1:
 
 13:                                               ; preds = %11, %9
   %14 = load i32, ptr %2, align 4
@@ -36,7 +49,3 @@ define i32 @_Z3foob(i1 zeroext %0) nounwind {
 
 declare i32 @_Z3barv() #1
 declare i32 @_Z3bazv() #1
-
-
-; CHECK: _Z3foob
-; CHECK: llvm_prefetch_target

>From 36c8dc037342862d5445ce6ea3990f48d631c573 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 03:48:53 +0000
Subject: [PATCH 08/23] Rename the test.

---
 ...{prefetch-symbols.ll => basic-block-sections-code-prefetch.ll} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename llvm/test/CodeGen/X86/{prefetch-symbols.ll => basic-block-sections-code-prefetch.ll} (100%)

diff --git a/llvm/test/CodeGen/X86/prefetch-symbols.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
similarity index 100%
rename from llvm/test/CodeGen/X86/prefetch-symbols.ll
rename to llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll

>From f598b97ee05405e4c8f4c1d2f0fded5c05efd09e Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 06:59:43 +0000
Subject: [PATCH 09/23] Remove unrelated changes.

---
 .../llvm/CodeGen/BasicBlockSectionsProfileReader.h   | 12 ------------
 llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 12 ------------
 2 files changed, 24 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 2b8ee578cd917..801588509d340 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -47,12 +47,6 @@ struct BBPosition {
   unsigned CallsiteIndex;
 };
 
-struct PrefetchHint {
-  BBPosition SitePosition;
-  StringRef TargetFunctionName;
-  BBPosition TargetPosition;
-};
-
 // This represents the raw input profile for one function.
 struct FunctionPathAndClusterInfo {
   // BB Cluster information specified by `UniqueBBID`s.
@@ -61,7 +55,6 @@ struct FunctionPathAndClusterInfo {
   // the edge a -> b (a is not cloned). The index of the path in this vector
   // determines the `UniqueBBID::CloneID` of the cloned blocks in that path.
   SmallVector<SmallVector<unsigned>> ClonePaths;
-  SmallVector<PrefetchHint> PrefetchHints;
   SmallVector<BBPosition> PrefetchTargets;
   // Node counts for each basic block.
   DenseMap<UniqueBBID, uint64_t> NodeCounts;
@@ -99,9 +92,6 @@ class BasicBlockSectionsProfileReader {
   uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
                         const UniqueBBID &SinkBBID) const;
 
-  SmallVector<PrefetchHint>
-  getPrefetchHintsForFunction(StringRef FuncName) const;
-
   SmallVector<BBPosition>
   getPrefetchTargetsForFunction(StringRef FuncName) const;
 
@@ -213,8 +203,6 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
 
   uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
                         const UniqueBBID &DestBBID) const;
-  SmallVector<PrefetchHint>
-  getPrefetchHintsForFunction(StringRef FuncName) const;
 
   SmallVector<BBPosition>
   getPrefetchTargetsForFunction(StringRef FuncName) const;
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 9b54dd6803cf6..5b12c85f7eeef 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -93,12 +93,6 @@ uint64_t BasicBlockSectionsProfileReader::getEdgeCount(
   return EdgeIt->second;
 }
 
-SmallVector<PrefetchHint>
-BasicBlockSectionsProfileReader::getPrefetchHintsForFunction(
-    StringRef FuncName) const {
-  return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).PrefetchHints;
-}
-
 SmallVector<BBPosition>
 BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction(
     StringRef FuncName) const {
@@ -547,12 +541,6 @@ uint64_t BasicBlockSectionsProfileReaderWrapperPass::getEdgeCount(
   return BBSPR.getEdgeCount(FuncName, SrcBBID, SinkBBID);
 }
 
-SmallVector<PrefetchHint>
-BasicBlockSectionsProfileReaderWrapperPass::getPrefetchHintsForFunction(
-    StringRef FuncName) const {
-  return BBSPR.getPrefetchHintsForFunction(FuncName);
-}
-
 SmallVector<BBPosition>
 BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction(
     StringRef FuncName) const {

>From c7ca7c55e5a5881006318e759defd049e31d6be2 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 18:58:09 +0000
Subject: [PATCH 10/23] Add some comments.

---
 .../CodeGen/BasicBlockSectionsProfileReader.h | 19 ++++++++++----
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    |  2 --
 llvm/lib/CodeGen/InsertCodePrefetch.cpp       | 26 +++++++++----------
 3 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 801588509d340..784bf8dd8f2a9 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -42,9 +42,14 @@ struct BBClusterInfo {
   unsigned PositionInCluster;
 };
 
-struct BBPosition {
+// Assuming a block is split into subblocks across its callsites, this struct
+// uniquely identifies the subblock in block `BBID` which starts from right after call number `SubblockIndex` (or
+// the beginning of the block if `SubblockIndex` is zero) to the call number
+// `SubblockIndex+1` (or the end of the block if there are are `SubblockIndex`
+// calls in the basic block).
+struct SubblockID {
   UniqueBBID BBID;
-  unsigned CallsiteIndex;
+  unsigned SubblockIndex;
 };
 
 // This represents the raw input profile for one function.
@@ -55,7 +60,9 @@ struct FunctionPathAndClusterInfo {
   // the edge a -> b (a is not cloned). The index of the path in this vector
   // determines the `UniqueBBID::CloneID` of the cloned blocks in that path.
   SmallVector<SmallVector<unsigned>> ClonePaths;
-  SmallVector<BBPosition> PrefetchTargets;
+  // Code prefetch targets, specified by the subblock ID of which beginning must
+  // be targetted for prefetching.
+  SmallVector<SubblockID> PrefetchTargets;
   // Node counts for each basic block.
   DenseMap<UniqueBBID, uint64_t> NodeCounts;
   // Edge counts for each edge.
@@ -92,7 +99,9 @@ class BasicBlockSectionsProfileReader {
   uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
                         const UniqueBBID &SinkBBID) const;
 
-  SmallVector<BBPosition>
+  // Returns the prefetch targets (identified by their containing subblocks) for
+  // function `FuncName`.
+  SmallVector<SubblockID>
   getPrefetchTargetsForFunction(StringRef FuncName) const;
 
 private:
@@ -204,7 +213,7 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
   uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
                         const UniqueBBID &DestBBID) const;
 
-  SmallVector<BBPosition>
+  SmallVector<SubblockID>
   getPrefetchTargetsForFunction(StringRef FuncName) const;
 
   // Initializes the FunctionNameToDIFilename map for the current module and
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 4368cd4d256c9..2d6f6687fe456 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -119,7 +119,6 @@
 #include "llvm/Support/Format.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Path.h"
-#include "llvm/Support/SMLoc.h"
 #include "llvm/Support/VCSRevision.h"
 #include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/Support/raw_ostream.h"
@@ -1982,7 +1981,6 @@ void AsmPrinter::emitFunctionBody() {
 
   FunctionCallGraphInfo FuncCGInfo;
   const auto &CallSitesInfoMap = MF->getCallSitesInfo();
-
   for (auto &MBB : *MF) {
     // Print a label for the basic block.
     emitBasicBlockStart(MBB);
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 91cb6e599215d..df3d63098390b 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -1,4 +1,4 @@
-//===-- InsertCodePrefetch.cpp ---=========-----------------------------===//
+//===-- InsertCodePrefetch.cpp ---=========--------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -7,9 +7,14 @@
 //===----------------------------------------------------------------------===//
 //
 /// \file
-/// Prefetch insertion pass implementation.
+/// Code Prefetch Insertion Pass.
 //===----------------------------------------------------------------------===//
-/// Prefetch insertion pass.
+/// This pass inserts code prefetch instructions according to the prefetch
+/// directives in the basic block section profile. The target of a prefetch can
+/// be the beginning of any dynamic basic block, that is the beginning of a
+/// machine basic block, or immediately after a callsite. A global symbol will
+/// be emitted at the position of the target so it can be addressed from the
+/// prefetch instruction.
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/SmallVector.h"
@@ -20,15 +25,11 @@
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/InitializePasses.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
-#define DEBUG_TYPE "prefetchinsertion"
+#define DEBUG_TYPE "insert-code-prefetch"
 
 namespace {
 class InsertCodePrefetch : public MachineFunctionPass {
@@ -40,13 +41,12 @@ class InsertCodePrefetch : public MachineFunctionPass {
   }
 
   StringRef getPassName() const override {
-    return "X86 Cide Prefetch Inserter Pass";
+    return "Code Prefetch Inserter Pass";
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 
-  /// Identify basic blocks that need separate sections and prepare to emit them
-  /// accordingly.
+  // Sets prefetch targets based on the bb section profile.
   bool runOnMachineFunction(MachineFunction &MF) override;
 };
 
@@ -57,10 +57,10 @@ class InsertCodePrefetch : public MachineFunctionPass {
 //===----------------------------------------------------------------------===//
 
 char InsertCodePrefetch::ID = 0;
-INITIALIZE_PASS_BEGIN(InsertCodePrefetch, DEBUG_TYPE, "Reads prefetch", true,
+INITIALIZE_PASS_BEGIN(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion", true,
                       false)
 INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass)
-INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Reads prefetch", true,
+INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion", true,
                     false)
 
 bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {

>From 5f76c60b18112ae00b7755b32ab64965e1844f13 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 18:58:22 +0000
Subject: [PATCH 11/23] clang-format.

---
 .../llvm/CodeGen/BasicBlockSectionsProfileReader.h        | 8 ++++----
 llvm/lib/CodeGen/InsertCodePrefetch.cpp                   | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 784bf8dd8f2a9..88f3e8b620bce 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -43,10 +43,10 @@ struct BBClusterInfo {
 };
 
 // Assuming a block is split into subblocks across its callsites, this struct
-// uniquely identifies the subblock in block `BBID` which starts from right after call number `SubblockIndex` (or
-// the beginning of the block if `SubblockIndex` is zero) to the call number
-// `SubblockIndex+1` (or the end of the block if there are are `SubblockIndex`
-// calls in the basic block).
+// uniquely identifies the subblock in block `BBID` which starts from right
+// after call number `SubblockIndex` (or the beginning of the block if
+// `SubblockIndex` is zero) to the call number `SubblockIndex+1` (or the end of
+// the block if there are are `SubblockIndex` calls in the basic block).
 struct SubblockID {
   UniqueBBID BBID;
   unsigned SubblockIndex;
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index df3d63098390b..29afb46a317a8 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -57,11 +57,11 @@ class InsertCodePrefetch : public MachineFunctionPass {
 //===----------------------------------------------------------------------===//
 
 char InsertCodePrefetch::ID = 0;
-INITIALIZE_PASS_BEGIN(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion", true,
-                      false)
+INITIALIZE_PASS_BEGIN(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion",
+                      true, false)
 INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass)
-INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion", true,
-                    false)
+INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion",
+                    true, false)
 
 bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
   assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List &&

>From 2ae595282ff040bbdc7e8a17beee59734f0a5f89 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 19:16:54 +0000
Subject: [PATCH 12/23] Add comments and rename functions.

---
 .../CodeGen/BasicBlockSectionsProfileReader.h     |  2 +-
 llvm/include/llvm/CodeGen/MachineBasicBlock.h     | 15 ++++++++++-----
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp        | 13 +++++++++----
 llvm/lib/CodeGen/InsertCodePrefetch.cpp           | 11 ++++++-----
 4 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 88f3e8b620bce..c2bc7559b9fb4 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -46,7 +46,7 @@ struct BBClusterInfo {
 // uniquely identifies the subblock in block `BBID` which starts from right
 // after call number `SubblockIndex` (or the beginning of the block if
 // `SubblockIndex` is zero) to the call number `SubblockIndex+1` (or the end of
-// the block if there are are `SubblockIndex` calls in the basic block).
+// the block if `SubblockIndex` is the last call in the block).
 struct SubblockID {
   UniqueBBID BBID;
   unsigned SubblockIndex;
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 4be008bbf4bf1..20427954d22e4 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -229,7 +229,12 @@ class MachineBasicBlock
   /// is only computed once and is cached.
   mutable MCSymbol *CachedMCSymbol = nullptr;
 
-  SmallVector<unsigned> PrefetchTargetIndexes;
+  /// Contains the subblock indices in this block that are targets of code prefetching.
+  /// The subblock indexed `i` specifies that region after the `i`th call (or the
+  /// beginning of the block if `i==0`) to before the`i+1`th callsite (or the
+  /// end of the block). The prefetch target is always the beginning of the
+  /// subblock.
+  SmallVector<unsigned> PrefetchTargetSubblockIndexes;
 
   /// Cached MCSymbol for this block (used if IsEHContTarget).
   mutable MCSymbol *CachedEHContMCSymbol = nullptr;
@@ -712,12 +717,12 @@ class MachineBasicBlock
 
   std::optional<UniqueBBID> getBBID() const { return BBID; }
 
-  const SmallVector<unsigned> &getPrefetchTargetIndexes() const {
-    return PrefetchTargetIndexes;
+  const SmallVector<unsigned> &getPrefetchTargetSubblockIndexes() const {
+    return PrefetchTargetSubblockIndexes;
   }
 
-  void setPrefetchTargetIndexes(const SmallVector<unsigned> &V) {
-    PrefetchTargetIndexes = V;
+  void setPrefetchTargetSubblockIndexes(const SmallVector<unsigned> &V) {
+    PrefetchTargetSubblockIndexes = V;
   }
 
   /// Returns the section ID of this basic block.
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 2d6f6687fe456..72cf557d51e03 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1986,17 +1986,20 @@ void AsmPrinter::emitFunctionBody() {
     emitBasicBlockStart(MBB);
     DenseMap<StringRef, unsigned> MnemonicCounts;
 
-    SmallVector<unsigned> PrefetchTargets = MBB.getPrefetchTargetIndexes();
+    SmallVector<unsigned> PrefetchTargets = MBB.getPrefetchTargetSubblockIndexes();
     auto PrefetchTargetIt = PrefetchTargets.begin();
     unsigned NumCalls = 0;
+    // Helper to emit a symbol for the prefetch target and proceed to the next
+    // one.
     auto EmitPrefetchTargetSymbolIfNeeded = [&]() {
-      if (PrefetchTargetIt == PrefetchTargets.end() ||
-          NumCalls < *PrefetchTargetIt)
-        return;
+      if (PrefetchTargetIt == PrefetchTargets.end()) return;
+      if (NumCalls < *PrefetchTargetIt) return;
       MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
           Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") +
           utostr(MBB.getBBID()->BaseID) + Twine("_") +
           utostr(*PrefetchTargetIt));
+      // If the function is weak-linkage it may be replaced by a strong version,
+      // in which case the prefetch targets should also be replaced.
       OutStreamer->emitSymbolAttribute(
           PrefetchTargetSymbol,
           MF->getFunction().isWeakForLinker() ? MCSA_Weak : MCSA_Global);
@@ -2159,6 +2162,8 @@ void AsmPrinter::emitFunctionBody() {
       for (auto &Handler : Handlers)
         Handler->endInstruction();
     }
+    // If the block ends with a call, we may need to emit a prefetch target
+    // at the end.
     EmitPrefetchTargetSymbolIfNeeded();
 
     // We must emit temporary symbol for the end of this basic block, if either
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 29afb46a317a8..e241ccbbee263 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -12,9 +12,9 @@
 /// This pass inserts code prefetch instructions according to the prefetch
 /// directives in the basic block section profile. The target of a prefetch can
 /// be the beginning of any dynamic basic block, that is the beginning of a
-/// machine basic block, or immediately after a callsite. A global symbol will
-/// be emitted at the position of the target so it can be addressed from the
-/// prefetch instruction.
+/// machine basic block, or immediately after a callsite. A global symbol is
+/// emitted at the position of the target so it can be addressed from the
+/// prefetch instruction from any module.
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/SmallVector.h"
@@ -68,6 +68,8 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
          "BB Sections list not enabled!");
   if (hasInstrProfHashMismatch(MF))
     return false;
+  // Set each block's prefetch targets so AsmPrinter can emit a special symbol
+  // there.
   SmallVector<BBPosition> PrefetchTargets =
       getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
           .getPrefetchTargetsForFunction(MF.getName());
@@ -78,9 +80,8 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
     auto R = PrefetchTargetsByBBID.find(*MBB.getBBID());
     if (R == PrefetchTargetsByBBID.end())
       continue;
-    MBB.setPrefetchTargetIndexes(R->second);
+    MBB.setPrefetchTargetSubblockIndexes(R->second);
   }
-
   return false;
 }
 

>From 606c5666e67c6da2d23775884ebb5bb0b0d0c7e0 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 19:17:04 +0000
Subject: [PATCH 13/23] clang-format.

---
 llvm/include/llvm/CodeGen/MachineBasicBlock.h | 10 +++++-----
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    |  9 ++++++---
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 20427954d22e4..a13fcb2bb841d 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -229,11 +229,11 @@ class MachineBasicBlock
   /// is only computed once and is cached.
   mutable MCSymbol *CachedMCSymbol = nullptr;
 
-  /// Contains the subblock indices in this block that are targets of code prefetching.
-  /// The subblock indexed `i` specifies that region after the `i`th call (or the
-  /// beginning of the block if `i==0`) to before the`i+1`th callsite (or the
-  /// end of the block). The prefetch target is always the beginning of the
-  /// subblock.
+  /// Contains the subblock indices in this block that are targets of code
+  /// prefetching. The subblock indexed `i` specifies that region after the
+  /// `i`th call (or the beginning of the block if `i==0`) to before the`i+1`th
+  /// callsite (or the end of the block). The prefetch target is always the
+  /// beginning of the subblock.
   SmallVector<unsigned> PrefetchTargetSubblockIndexes;
 
   /// Cached MCSymbol for this block (used if IsEHContTarget).
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 72cf557d51e03..fb250c4b5308a 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1986,14 +1986,17 @@ void AsmPrinter::emitFunctionBody() {
     emitBasicBlockStart(MBB);
     DenseMap<StringRef, unsigned> MnemonicCounts;
 
-    SmallVector<unsigned> PrefetchTargets = MBB.getPrefetchTargetSubblockIndexes();
+    SmallVector<unsigned> PrefetchTargets =
+        MBB.getPrefetchTargetSubblockIndexes();
     auto PrefetchTargetIt = PrefetchTargets.begin();
     unsigned NumCalls = 0;
     // Helper to emit a symbol for the prefetch target and proceed to the next
     // one.
     auto EmitPrefetchTargetSymbolIfNeeded = [&]() {
-      if (PrefetchTargetIt == PrefetchTargets.end()) return;
-      if (NumCalls < *PrefetchTargetIt) return;
+      if (PrefetchTargetIt == PrefetchTargets.end())
+        return;
+      if (NumCalls < *PrefetchTargetIt)
+        return;
       MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
           Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") +
           utostr(MBB.getBBID()->BaseID) + Twine("_") +

>From 52b0309f15b1dc01de80f8763e859a79a39d0f7d Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 19:34:26 +0000
Subject: [PATCH 14/23] Add optimization remarks for when prefetch targets
 cannot be mapped.

---
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp           | 8 ++++++++
 llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 6 +++---
 llvm/lib/CodeGen/InsertCodePrefetch.cpp              | 4 ++--
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index fb250c4b5308a..97234f3859ca7 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2168,6 +2168,14 @@ void AsmPrinter::emitFunctionBody() {
     // If the block ends with a call, we may need to emit a prefetch target
     // at the end.
     EmitPrefetchTargetSymbolIfNeeded();
+    if (PrefetchTargetIt != PrefetchTargets.end()) {
+      MachineOptimizationRemarkMissed R(
+          "insert-code-prefetch", "MissingPrefetchTarget",
+          MF->getFunction().getSubprogram(), &MBB);
+      R << "failed to map "
+        << ore::NV("NumMissedTargets", PrefetchTargets.end() - PrefetchTargetIt)
+        << " prefetch targets";
+    }
 
     // We must emit temporary symbol for the end of this basic block, if either
     // we have BBLabels enabled or if this basic blocks marks the end of a
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 5b12c85f7eeef..9319854f53289 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -93,7 +93,7 @@ uint64_t BasicBlockSectionsProfileReader::getEdgeCount(
   return EdgeIt->second;
 }
 
-SmallVector<BBPosition>
+SmallVector<SubblockID>
 BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction(
     StringRef FuncName) const {
   return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName))
@@ -332,7 +332,7 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
         return createProfileParseError(Twine("unsigned integer expected: '") +
                                        PrefetchTargetStr[1]);
       FI->second.PrefetchTargets.push_back(
-          BBPosition{*TargetBBID, static_cast<unsigned>(TargetCallsiteIndex)});
+          SubblockID{*TargetBBID, static_cast<unsigned>(TargetCallsiteIndex)});
       continue;
     }
     default:
@@ -541,7 +541,7 @@ uint64_t BasicBlockSectionsProfileReaderWrapperPass::getEdgeCount(
   return BBSPR.getEdgeCount(FuncName, SrcBBID, SinkBBID);
 }
 
-SmallVector<BBPosition>
+SmallVector<SubblockID>
 BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction(
     StringRef FuncName) const {
   return BBSPR.getPrefetchTargetsForFunction(FuncName);
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index e241ccbbee263..57037fd818479 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -70,12 +70,12 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
     return false;
   // Set each block's prefetch targets so AsmPrinter can emit a special symbol
   // there.
-  SmallVector<BBPosition> PrefetchTargets =
+  SmallVector<SubblockID> PrefetchTargets =
       getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
           .getPrefetchTargetsForFunction(MF.getName());
   DenseMap<UniqueBBID, SmallVector<unsigned>> PrefetchTargetsByBBID;
   for (const auto &Target : PrefetchTargets)
-    PrefetchTargetsByBBID[Target.BBID].push_back(Target.CallsiteIndex);
+    PrefetchTargetsByBBID[Target.BBID].push_back(Target.SubblockIndex);
   for (auto &MBB : MF) {
     auto R = PrefetchTargetsByBBID.find(*MBB.getBBID());
     if (R == PrefetchTargetsByBBID.end())

>From 8744c4d3ea5c3fdd03cc2cb2ebd93c8b8fb10a75 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 22:01:34 +0000
Subject: [PATCH 15/23] Expand test to weak symbols.

---
 .../X86/basic-block-sections-code-prefetch.ll        | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
index 3eb91dfdabd27..35e25952aa2f8 100644
--- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
+++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
@@ -7,6 +7,9 @@
 ; RUN: echo 't 1 at 0' >> %t
 ; RUN: echo 't 1 at 1' >> %t
 ; RUN: echo 't 2 at 1' >> %t
+; RUN: echo 'f _Z3barv' >> %t
+; RUN: echo 't 0 at 0' >> %t
+; RUN: echo 't 21 at 1' >> %t
 ;;
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -asm-verbose=false -function-sections -basic-block-sections=%t  | FileCheck %s
 
@@ -47,5 +50,12 @@ define i32 @_Z3foob(i1 zeroext %0) nounwind {
   ret i32 %14
 }
 
-declare i32 @_Z3barv() #1
+define weak i32 @_Z3barv() nounwind {
+  %1 = call i32 @_Z3bazv()
+  ret i32 %1
+; CHECK:      _Z3barv:
+; CHECK-NEXT:   .weak __llvm_prefetch_target__Z3barv_0_0
+; CHECK-NEXT: __llvm_prefetch_target__Z3barv_0_0:
+}
+
 declare i32 @_Z3bazv() #1

>From cea8aa4d4fb81ad18ad46ee71583471d18dc3d2d Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Sat, 8 Nov 2025 19:54:21 +0000
Subject: [PATCH 16/23] feat(AsmPrinter): Add support for emitting prefetch
 target symbols

---
 .../CodeGen/BasicBlockSectionsProfileReader.h | 35 +++++++++++++++++++
 llvm/include/llvm/CodeGen/MachineBasicBlock.h | 14 ++++++++
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    |  9 ++++-
 .../BasicBlockSectionsProfileReader.cpp       | 14 ++++++++
 llvm/lib/CodeGen/MachineBasicBlock.cpp        | 13 +++++++
 5 files changed, 84 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index c2bc7559b9fb4..44bcfb074b5b8 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -52,6 +52,12 @@ struct SubblockID {
   unsigned SubblockIndex;
 };
 
+struct PrefetchHint {
+  SubblockID SitePosition;
+  StringRef TargetFunctionName;
+  osition TargetPosition;
+};
+
 // This represents the raw input profile for one function.
 struct FunctionPathAndClusterInfo {
   // BB Cluster information specified by `UniqueBBID`s.
@@ -63,6 +69,7 @@ struct FunctionPathAndClusterInfo {
   // Code prefetch targets, specified by the subblock ID of which beginning must
   // be targetted for prefetching.
   SmallVector<SubblockID> PrefetchTargets;
+  SmallVector<PrefetchHint> PrefetchHints;
   // Node counts for each basic block.
   DenseMap<UniqueBBID, uint64_t> NodeCounts;
   // Edge counts for each edge.
@@ -73,6 +80,27 @@ struct FunctionPathAndClusterInfo {
   DenseMap<unsigned, uint64_t> BBHashes;
 };
 
+// Provides DenseMapInfo SubblockID.
+template <> struct DenseMapInfo<SubblockID> {
+  static inline SubblockID getEmptyKey() {
+    return {DenseMapInfo<UniqueBBID>::getEmptyKey(),
+            DenseMapInfo<unsigned>::getEmptyKey()};
+  }
+  static inline SubblockID getTombstoneKey() {
+    return SubblockID{DenseMapInfo<UniqueBBID>::getTombstoneKey(),
+                      DenseMapInfo<unsigned>::getTombstoneKey()};
+  }
+  static unsigned getHashValue(const SubblockID &Val) {
+    std::pair<unsigned, unsigned> PairVal = std::make_pair(
+        DenseMapInfo<UniqueBBID>::getHashValue(Val.BBID), Val.BBOffset);
+    return DenseMapInfo<std::pair<unsigned, unsigned>>::getHashValue(PairVal);
+  }
+  static bool isEqual(const SubblockID &LHS, const SubblockID &RHS) {
+    return DenseMapInfo<UniqueBBID>::isEqual(LHS.BBID, RHS.BBID) &&
+           DenseMapInfo<unsigned>::isEqual(LHS.BBOffset, RHS.BBOffset);
+  }
+};
+
 class BasicBlockSectionsProfileReader {
 public:
   friend class BasicBlockSectionsProfileReaderWrapperPass;
@@ -104,6 +132,9 @@ class BasicBlockSectionsProfileReader {
   SmallVector<SubblockID>
   getPrefetchTargetsForFunction(StringRef FuncName) const;
 
+  SmallVector<PrefetchHint>
+  getPrefetchHintsForFunction(StringRef FuncName) const;
+
 private:
   StringRef getAliasName(StringRef FuncName) const {
     auto R = FuncAliasMap.find(FuncName);
@@ -212,6 +243,10 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
 
   uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
                         const UniqueBBID &DestBBID) const;
+  SmallVector<PrefetchHint>
+  getPrefetchHintsForFunction(StringRef FuncName) const;
+
+  DenseSet<SubblockID> getPrefetchTargetsForFunction(StringRef FuncName) const;
 
   SmallVector<SubblockID>
   getPrefetchTargetsForFunction(StringRef FuncName) const;
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index a13fcb2bb841d..63bc6b0f25284 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -100,6 +100,12 @@ template <> struct DenseMapInfo<MBBSectionID> {
   }
 };
 
+struct PrefetchTarget {
+  StringRef TargetFunction;
+  UniqueBBID TargetBBID;
+  unsigned TargetBBOffset;
+};
+
 template <> struct ilist_traits<MachineInstr> {
 private:
   friend class MachineBasicBlock; // Set by the owning MachineBasicBlock.
@@ -213,6 +219,8 @@ class MachineBasicBlock
   /// basic block sections and basic block labels.
   std::optional<UniqueBBID> BBID;
 
+  SmallVector<unsigned> PrefetchTargets;
+
   /// With basic block sections, this stores the Section ID of the basic block.
   MBBSectionID SectionID{0};
 
@@ -1290,6 +1298,12 @@ class MachineBasicBlock
   /// Return the MCSymbol for this basic block.
   LLVM_ABI MCSymbol *getSymbol() const;
 
+  MCSymbol *getCallInstSymbol(unsigned CallInstNumber) const;
+
+  const SmallVector<MCSymbol *, 4>& getCallInstSymbols() const {
+    return CallInstSymbols;
+  }
+
   /// Return the Windows EH Continuation Symbol for this basic block.
   LLVM_ABI MCSymbol *getEHContSymbol() const;
 
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 97234f3859ca7..9a3532f623715 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -18,6 +18,7 @@
 #include "WasmException.h"
 #include "WinCFGuard.h"
 #include "WinException.h"
+#include "llvm/Support/SMLoc.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/BitmaskEnum.h"
@@ -178,6 +179,11 @@ static cl::opt<bool> EmitJumpTableSizesSection(
     cl::desc("Emit a section containing jump table addresses and sizes"),
     cl::Hidden, cl::init(false));
 
+static cl::opt<bool> InsertNoopsForPrefetch(
+    "insert-noops-for-prefetch",
+    cl::desc("Whether to insert noops instead of prefetches."), cl::init(false),
+    cl::Hidden);
+
 // This isn't turned on by default, since several of the scheduling models are
 // not completely accurate, and we don't want to be misleading.
 static cl::opt<bool> PrintLatency(
@@ -1982,6 +1988,7 @@ void AsmPrinter::emitFunctionBody() {
   FunctionCallGraphInfo FuncCGInfo;
   const auto &CallSitesInfoMap = MF->getCallSitesInfo();
   for (auto &MBB : *MF) {
+    int NextPrefetchTargetIndex = MBB.getPrefetchTargets().empty() ? -1 : 0;
     // Print a label for the basic block.
     emitBasicBlockStart(MBB);
     DenseMap<StringRef, unsigned> MnemonicCounts;
@@ -2125,7 +2132,7 @@ void AsmPrinter::emitFunctionBody() {
         break;
       }
       default:
-        emitInstruction(&MI);
+         emitInstruction(&MI);
 
         auto CountInstruction = [&](const MachineInstr &MI) {
           // Skip Meta instructions inside bundles.
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 9319854f53289..7dc83a290eeb2 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -100,6 +100,14 @@ BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction(
       .PrefetchTargets;
 }
 
+SmallVector<PrefetchHint>
+BasicBlockSectionsProfileReader::getPrefetchHintsForFunction(
+    StringRef FuncName) const {
+  return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).PrefetchHints;
+}
+
+
+
 // Reads the version 1 basic block sections profile. Profile for each function
 // is encoded as follows:
 //   m <module_name>
@@ -547,6 +555,12 @@ BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction(
   return BBSPR.getPrefetchTargetsForFunction(FuncName);
 }
 
+SmallVector<PrefetchHint>
+BasicBlockSectionsProfileReaderWrapperPass::getPrefetchHintsForFunction(
+    StringRef FuncName) const {
+  return BBSPR.getPrefetchHintsForFunction(FuncName);
+}
+
 BasicBlockSectionsProfileReader &
 BasicBlockSectionsProfileReaderWrapperPass::getBBSPR() {
   return BBSPR;
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index ba0b025167307..19b218a2879dd 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -90,6 +90,19 @@ MCSymbol *MachineBasicBlock::getSymbol() const {
   return CachedMCSymbol;
 }
 
+MCSymbol *MachineBasicBlock::getCallInstSymbol(unsigned CallInstNumber) const {
+  if (CallInstSymbols.size() <= CallInstNumber) {
+    const MachineFunction *MF = getParent();
+    MCContext &Ctx = MF->getContext();
+    CallInstSymbols.resize(CallInstNumber + 1);
+    CallInstSymbols[CallInstNumber] = Ctx.createBlockSymbol(
+        "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber()) + "_" +
+            Twine(CallInstNumber),
+        /*AlwaysEmit=*/true);
+  }
+  return CallInstSymbols[CallInstNumber];
+}
+
 MCSymbol *MachineBasicBlock::getEHContSymbol() const {
   if (!CachedEHContMCSymbol) {
     const MachineFunction *MF = getParent();

>From 5f1fbf346ff321e38dedf353427930c9f7cfaea3 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Sat, 8 Nov 2025 19:54:21 +0000
Subject: [PATCH 17/23] feat(AsmPrinter): Add support for emitting prefetch
 target symbols

---
 .../BasicBlockSectionsProfileReader.cpp       | 33 +++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 7dc83a290eeb2..0e8971c8fb7fb 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -323,6 +323,39 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
       }
       continue;
     }
+    case 'i': { // Prefetch hint specifier.
+      // Skip the profile when we the profile iterator (FI) refers to the
+      // past-the-end element.
+      if (FI == ProgramPathAndClusterInfo.end())
+        continue;
+      assert(Values.size() == 2);
+      SmallVector<StringRef, 2> PrefetchSiteStr;
+      Values[0].split(PrefetchSiteStr, '@');
+      assert(PrefetchSiteStr.size() == 2);
+      auto SiteBBID = parseUniqueBBID(PrefetchSiteStr[0]);
+      if (!SiteBBID)
+        return SiteBBID.takeError();
+      unsigned long long SiteBBOffset;
+      if (getAsUnsignedInteger(PrefetchSiteStr[1], 10, SiteBBOffset))
+        return createProfileParseError(Twine("unsigned integer expected: '") +
+                                       PrefetchSiteStr[1]);
+
+      SmallVector<StringRef, 3> PrefetchTargetStr;
+      Values[1].split(PrefetchTargetStr, '@');
+      assert(PrefetchTargetStr.size() == 3);
+      auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[1]);
+      if (!TargetBBID)
+        return TargetBBID.takeError();
+      unsigned long long TargetBBOffset;
+      if (getAsUnsignedInteger(PrefetchTargetStr[2], 10, TargetBBOffset))
+        return createProfileParseError(Twine("unsigned integer expected: '") +
+                                       PrefetchTargetStr[2]);
+      FI->second.PrefetchHints.push_back(
+          PrefetchHint{{*SiteBBID, static_cast<unsigned>(SiteBBOffset)},
+                       PrefetchTargetStr[0],
+                       {*TargetBBID, static_cast<unsigned>(TargetBBOffset)}});
+      continue;
+    }
     case 't': { // Prefetch target specifier.
       // Skip the profile when we the profile iterator (FI) refers to the
       // past-the-end element.

>From be3fe4d387706d0dd1f6622c9bb81149a3913908 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Sun, 9 Nov 2025 05:45:18 +0000
Subject: [PATCH 18/23] feat(CodeGen): Add PrefetchInsertion pass

---
 llvm/include/llvm/CodeGen/MachineBasicBlock.h |   2 +-
 llvm/include/llvm/CodeGen/MachineInstr.h      |   3 +-
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    |   2 +-
 llvm/lib/CodeGen/BasicBlockSections.cpp       |   3 +-
 llvm/lib/CodeGen/CodeGenPrepare.cpp           |   1 +
 llvm/lib/Target/X86/CMakeLists.txt            |   1 +
 llvm/lib/Target/X86/PrefetchInsertion.cpp     | 209 ++++++++++++++++++
 llvm/lib/Target/X86/X86.h                     |   2 +
 llvm/lib/Target/X86/X86TargetMachine.cpp      |   3 +
 9 files changed, 222 insertions(+), 4 deletions(-)
 create mode 100644 llvm/lib/Target/X86/PrefetchInsertion.cpp

diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 63bc6b0f25284..5bc62d61e5f0d 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -1300,7 +1300,7 @@ class MachineBasicBlock
 
   MCSymbol *getCallInstSymbol(unsigned CallInstNumber) const;
 
-  const SmallVector<MCSymbol *, 4>& getCallInstSymbols() const {
+  const SmallVector<MCSymbol *, 4> &getCallInstSymbols() const {
     return CallInstSymbols;
   }
 
diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h
index 4fcb7f36e0238..1aba9ca962b16 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -123,8 +123,9 @@ class MachineInstr
     NoUSWrap = 1 << 20,      // Instruction supports geps
                              // no unsigned signed wrap.
     SameSign = 1 << 21,      // Both operands have the same sign.
-    InBounds = 1 << 22       // Pointer arithmetic remains inbounds.
+    InBounds = 1 << 22,      // Pointer arithmetic remains inbounds.
                              // Implies NoUSWrap.
+    Prefetch = 1 << 23,      // Instruction is a prefetch.
   };
 
 private:
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 9a3532f623715..7d533fd2bdef0 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -18,7 +18,6 @@
 #include "WasmException.h"
 #include "WinCFGuard.h"
 #include "WinException.h"
-#include "llvm/Support/SMLoc.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/BitmaskEnum.h"
@@ -120,6 +119,7 @@
 #include "llvm/Support/Format.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/SMLoc.h"
 #include "llvm/Support/VCSRevision.h"
 #include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp
index 52e2909bec072..755abdbceaf4a 100644
--- a/llvm/lib/CodeGen/BasicBlockSections.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSections.cpp
@@ -106,7 +106,8 @@ class BasicBlockSections : public MachineFunctionPass {
 public:
   static char ID;
 
-  BasicBlockSectionsProfileReaderWrapperPass *BBSectionsProfileReader = nullptr;
+  // BasicBlockSectionsProfileReaderWrapperPass *BBSectionsProfileReader =
+  // nullptr;
 
   BasicBlockSections() : MachineFunctionPass(ID) {
     initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry());
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index b6dd174f9be80..9606f0c920ef9 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -22,6 +22,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/FloatingPointPredicateUtils.h"
diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt
index f9bd233cf8ecf..b503b54f338d3 100644
--- a/llvm/lib/Target/X86/CMakeLists.txt
+++ b/llvm/lib/Target/X86/CMakeLists.txt
@@ -58,6 +58,7 @@ set(sources
   X86IndirectThunks.cpp
   X86InterleavedAccess.cpp
   X86InsertPrefetch.cpp
+  PrefetchInsertion.cpp
   X86InstCombineIntrinsic.cpp
   X86InstrFMA3Info.cpp
   X86InstrFoldTables.cpp
diff --git a/llvm/lib/Target/X86/PrefetchInsertion.cpp b/llvm/lib/Target/X86/PrefetchInsertion.cpp
new file mode 100644
index 0000000000000..720a38cb9b011
--- /dev/null
+++ b/llvm/lib/Target/X86/PrefetchInsertion.cpp
@@ -0,0 +1,209 @@
+//===-- PrefetchInsertion.cpp ---=========-----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Prefetch insertion pass implementation.
+//===----------------------------------------------------------------------===//
+/// Prefetch insertion pass.
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86InstrInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/BasicBlockSectionUtils.h"
+#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Target/TargetMachine.h"
+#include <map>
+
+using namespace llvm;
+#define DEBUG_TYPE "prefetchinsertion"
+
+static cl::opt<bool> UseCodePrefetchInstruction(
+    "use-code-prefetch-instruction",
+    cl::desc("Whether to use the new prefetchit1 instruction."), cl::init(true),
+    cl::Hidden);
+static cl::opt<bool> PrefetchNextAddress(
+    "prefetch-next-address",
+    cl::desc(
+        "Whether to prefetch the next address instead of the target address."),
+    cl::init(false), cl::Hidden);
+
+namespace {} // end anonymous namespace
+
+namespace llvm {
+class PrefetchInsertion : public MachineFunctionPass {
+public:
+  static char ID;
+
+  BasicBlockSectionsProfileReaderWrapperPass *BBSectionsProfileReader = nullptr;
+
+  PrefetchInsertion() : MachineFunctionPass(ID) {
+    initializePrefetchInsertionPass(*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override { return "Prefetch Insertion Pass"; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  /// Identify basic blocks that need separate sections and prepare to emit them
+  /// accordingly.
+  bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // namespace llvm
+
+char PrefetchInsertion::ID = 0;
+INITIALIZE_PASS_BEGIN(
+    PrefetchInsertion, "prefetch-insertion",
+    "Applies path clonings for the -basic-block-sections=list option", false,
+    false)
+INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass)
+INITIALIZE_PASS_END(
+    PrefetchInsertion, "prefetch-insertion",
+    "Applies path clonings for the -basic-block-sections=list option", false,
+    false)
+
+bool PrefetchInsertion::runOnMachineFunction(MachineFunction &MF) {
+  assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List &&
+         "BB Sections list not enabled!");
+  if (hasInstrProfHashMismatch(MF))
+    return false;
+  // errs() << "Running on " << MF.getName() << "\n";
+  Function &F = MF.getFunction();
+  auto PtrTy = PointerType::getUnqual(F.getParent()->getContext());
+  DenseSet<BBPosition> PrefetchTargets =
+      getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
+          .getPrefetchTargetsForFunction(MF.getName());
+  // errs() << "Targets: Function: " << F.getName() << " "
+  //        << PrefetchTargets.size() << "\n";
+  DenseMap<UniqueBBID, SmallVector<unsigned>> PrefetchTargetsByBBID;
+  for (const auto &P : PrefetchTargets)
+    PrefetchTargetsByBBID[P.BBID].push_back(P.BBOffset);
+  for (auto &[BBID, V] : PrefetchTargetsByBBID)
+    llvm::sort(V);
+  for (auto &BB : MF)
+    BB.setPrefetchTargets(PrefetchTargetsByBBID[*BB.getBBID()]);
+
+  for (const BBPosition &P : PrefetchTargets) {
+    SmallString<128> PrefetchTargetName("__llvm_prefetch_target_");
+    PrefetchTargetName += F.getName();
+    PrefetchTargetName += "_";
+    PrefetchTargetName += utostr(P.BBID.BaseID);
+    PrefetchTargetName += "_";
+    PrefetchTargetName += utostr(P.BBOffset);
+    F.getParent()->getOrInsertGlobal(PrefetchTargetName, PtrTy);
+  }
+
+  SmallVector<PrefetchHint> PrefetchHints =
+      getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
+          .getPrefetchHintsForFunction(MF.getName());
+  // errs() << "Hints: Function: " << F.getName() << " " << PrefetchHints.size()
+  //        << "\n";
+  for (const PrefetchHint &H : PrefetchHints) {
+    SmallString<128> PrefetchTargetName("__llvm_prefetch_target_");
+    PrefetchTargetName += H.TargetFunctionName;
+    PrefetchTargetName += "_";
+    PrefetchTargetName += utostr(H.TargetPosition.BBID.BaseID);
+    PrefetchTargetName += "_";
+    PrefetchTargetName += utostr(H.TargetPosition.BBOffset);
+    F.getParent()->getOrInsertGlobal(PrefetchTargetName, PtrTy);
+  }
+
+  DenseMap<UniqueBBID, std::map<unsigned, SmallVector<PrefetchTarget>>>
+      PrefetchHintsByBBID;
+  for (const auto &H : PrefetchHints) {
+    PrefetchHintsByBBID[H.SitePosition.BBID][H.SitePosition.BBOffset].push_back(
+        PrefetchTarget{H.TargetFunctionName, H.TargetPosition.BBID,
+                       H.TargetPosition.BBOffset});
+  }
+  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+  for (auto &BB : MF) {
+    auto It = PrefetchHintsByBBID.find(*BB.getBBID());
+    if (It == PrefetchHintsByBBID.end())
+      continue;
+    auto BBPrefetchHintIt = It->second.begin();
+    unsigned NumInsts = 0;
+    auto E = BB.getFirstTerminator();
+    unsigned NumCallsites = 0;
+    for (auto I = BB.instr_begin();;) {
+      auto Current = I;
+      if (NumCallsites >= BBPrefetchHintIt->first || Current == E) {
+        for (const auto &PrefetchTarget : BBPrefetchHintIt->second) {
+          SmallString<128> PrefetchTargetName("__llvm_prefetch_target_");
+          PrefetchTargetName += PrefetchTarget.TargetFunction;
+          PrefetchTargetName += "_";
+          PrefetchTargetName += utostr(PrefetchTarget.TargetBBID.BaseID);
+          PrefetchTargetName += "_";
+          PrefetchTargetName += utostr(PrefetchTarget.TargetBBOffset);
+          auto *GV =
+              MF.getFunction().getParent()->getNamedValue(PrefetchTargetName);
+          // errs() << "Inserting prefetch for " << GV->getName() << " at "
+          //        << MF.getName() << " " << BB.getName() << " " << NumInsts
+          //        << "\n";
+          MachineInstr *PFetch = MF.CreateMachineInstr(
+              UseCodePrefetchInstruction ? TII->get(X86::PREFETCHIT1)
+                                         : TII->get(X86::PREFETCHT1),
+              Current != BB.instr_end() ? Current->getDebugLoc() : DebugLoc(),
+              true);
+          PFetch->setFlag(MachineInstr::Prefetch);
+          MachineInstrBuilder MIB(MF, PFetch);
+          if (!PrefetchNextAddress) {
+            MIB.addMemOperand(MF.getMachineMemOperand(
+                MachinePointerInfo(GV), MachineMemOperand::MOLoad, /*s=*/8,
+                /*base_alignment=*/llvm::Align(1)));
+          }
+          MIB.addReg(X86::RIP).addImm(1).addReg(X86::NoRegister);
+          if (PrefetchNextAddress)
+            MIB.addImm(0);
+          else
+            MIB.addGlobalAddress(GV);
+          MIB.addReg(X86::NoRegister);
+          BB.insert(Current, PFetch);
+        }
+        ++BBPrefetchHintIt;
+        if (BBPrefetchHintIt == PrefetchHintsByBBID[*BB.getBBID()].end())
+          break;
+      }
+      if (Current != E) {
+        // Print the assembly for the instruction.
+        if (!Current->isPosition() && !Current->isImplicitDef() &&
+            !Current->isKill() && !Current->isDebugInstr()) {
+          ++NumInsts;
+        }
+        if (Current->isCall())
+          ++NumCallsites;
+        ++I;
+      }
+    }
+  }
+  return true;
+}
+
+void PrefetchInsertion::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<BasicBlockSectionsProfileReaderWrapperPass>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+FunctionPass *llvm::createPrefetchInsertionPass() {
+  return new PrefetchInsertion();
+}
diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h
index 2b83d575ace91..e9d6cfa5ee4d2 100644
--- a/llvm/lib/Target/X86/X86.h
+++ b/llvm/lib/Target/X86/X86.h
@@ -161,6 +161,8 @@ FunctionPass *createX86DiscriminateMemOpsPass();
 /// This pass applies profiling information to insert cache prefetches.
 FunctionPass *createX86InsertPrefetchPass();
 
+FunctionPass *createPrefetchInsertionPass();
+
 /// This pass insert wait instruction after X87 instructions which could raise
 /// fp exceptions when strict-fp enabled.
 FunctionPass *createX86InsertX87waitPass();
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index d4ad98af9b30c..a379e17f7f3a4 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -626,6 +626,9 @@ void X86PassConfig::addPreEmitPass2() {
   // after all real instructions have been added to the epilog.
   if (TT.isOSWindows() && TT.isX86_64())
     addPass(createX86WinEHUnwindV2Pass());
+
+  if (TM->getBBSectionsType() == llvm::BasicBlockSection::List)
+    addPass(createPrefetchInsertionPass());
 }
 
 bool X86PassConfig::addPostFastRegAllocRewrite() {

>From 82b6033548d068fdc3ba1e28ba8da01d92fec400 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Sat, 15 Nov 2025 20:09:15 +0000
Subject: [PATCH 19/23] Implement inserting prefetches into the specified
 positions.

---
 .../CodeGen/BasicBlockSectionsProfileReader.h |  33 +--
 .../include/llvm/CodeGen/InsertCodePrefetch.h |  23 ++
 llvm/include/llvm/CodeGen/MachineBasicBlock.h |   6 -
 llvm/include/llvm/CodeGen/TargetInstrInfo.h   |   9 +
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    |  13 +-
 .../BasicBlockSectionsProfileReader.cpp       |  12 +-
 llvm/lib/CodeGen/InsertCodePrefetch.cpp       |  50 ++++-
 llvm/lib/CodeGen/MachineBasicBlock.cpp        |  13 --
 llvm/lib/Target/X86/CMakeLists.txt            |   1 -
 llvm/lib/Target/X86/PrefetchInsertion.cpp     | 209 ------------------
 llvm/lib/Target/X86/X86InstrInfo.cpp          |  19 ++
 llvm/lib/Target/X86/X86InstrInfo.h            |   5 +
 llvm/lib/Target/X86/X86TargetMachine.cpp      |   3 -
 13 files changed, 123 insertions(+), 273 deletions(-)
 create mode 100644 llvm/include/llvm/CodeGen/InsertCodePrefetch.h
 delete mode 100644 llvm/lib/Target/X86/PrefetchInsertion.cpp

diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 44bcfb074b5b8..061f14030b206 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -52,10 +52,12 @@ struct SubblockID {
   unsigned SubblockIndex;
 };
 
+// This represents a prefetch hint to be injected at site `SiteID`, targetting
+// `TargetID` in function `TargetFunction`.
 struct PrefetchHint {
-  SubblockID SitePosition;
-  StringRef TargetFunctionName;
-  osition TargetPosition;
+  SubblockID SiteID;
+  StringRef TargetFunction;
+  SubblockID TargetID;
 };
 
 // This represents the raw input profile for one function.
@@ -80,27 +82,6 @@ struct FunctionPathAndClusterInfo {
   DenseMap<unsigned, uint64_t> BBHashes;
 };
 
-// Provides DenseMapInfo SubblockID.
-template <> struct DenseMapInfo<SubblockID> {
-  static inline SubblockID getEmptyKey() {
-    return {DenseMapInfo<UniqueBBID>::getEmptyKey(),
-            DenseMapInfo<unsigned>::getEmptyKey()};
-  }
-  static inline SubblockID getTombstoneKey() {
-    return SubblockID{DenseMapInfo<UniqueBBID>::getTombstoneKey(),
-                      DenseMapInfo<unsigned>::getTombstoneKey()};
-  }
-  static unsigned getHashValue(const SubblockID &Val) {
-    std::pair<unsigned, unsigned> PairVal = std::make_pair(
-        DenseMapInfo<UniqueBBID>::getHashValue(Val.BBID), Val.BBOffset);
-    return DenseMapInfo<std::pair<unsigned, unsigned>>::getHashValue(PairVal);
-  }
-  static bool isEqual(const SubblockID &LHS, const SubblockID &RHS) {
-    return DenseMapInfo<UniqueBBID>::isEqual(LHS.BBID, RHS.BBID) &&
-           DenseMapInfo<unsigned>::isEqual(LHS.BBOffset, RHS.BBOffset);
-  }
-};
-
 class BasicBlockSectionsProfileReader {
 public:
   friend class BasicBlockSectionsProfileReaderWrapperPass;
@@ -132,6 +113,7 @@ class BasicBlockSectionsProfileReader {
   SmallVector<SubblockID>
   getPrefetchTargetsForFunction(StringRef FuncName) const;
 
+  // Returns the prefetch hints to be injected in function `FuncName`.
   SmallVector<PrefetchHint>
   getPrefetchHintsForFunction(StringRef FuncName) const;
 
@@ -243,11 +225,10 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
 
   uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
                         const UniqueBBID &DestBBID) const;
+
   SmallVector<PrefetchHint>
   getPrefetchHintsForFunction(StringRef FuncName) const;
 
-  DenseSet<SubblockID> getPrefetchTargetsForFunction(StringRef FuncName) const;
-
   SmallVector<SubblockID>
   getPrefetchTargetsForFunction(StringRef FuncName) const;
 
diff --git a/llvm/include/llvm/CodeGen/InsertCodePrefetch.h b/llvm/include/llvm/CodeGen/InsertCodePrefetch.h
new file mode 100644
index 0000000000000..b212a025f1d9f
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/InsertCodePrefetch.h
@@ -0,0 +1,23 @@
+//===- BasicBlockSectionUtils.h - Utilities for basic block sections     --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_INSERTCODEPREFETCH_H
+#define LLVM_CODEGEN_INSERTCODEPREFETCH_H
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/UniqueBBID.h"
+#include "llvm/Support/CommandLine.h"
+
+namespace llvm {
+
+SmallString<128> getPrefetchTargetSymbolName(StringRef FunctionName, const UniqueBBID &BBID, unsigned SubblockIndex);
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_INSERTCODEPREFETCH_H
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 5bc62d61e5f0d..24445a2fe8ece 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -1298,12 +1298,6 @@ class MachineBasicBlock
   /// Return the MCSymbol for this basic block.
   LLVM_ABI MCSymbol *getSymbol() const;
 
-  MCSymbol *getCallInstSymbol(unsigned CallInstNumber) const;
-
-  const SmallVector<MCSymbol *, 4> &getCallInstSymbols() const {
-    return CallInstSymbols;
-  }
-
   /// Return the Windows EH Continuation Symbol for this basic block.
   LLVM_ABI MCSymbol *getEHContSymbol() const;
 
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 43f28ed79f9dd..78a2c6e937af6 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -2370,6 +2370,15 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo {
     llvm_unreachable("unknown number of operands necessary");
   }
 
+  /// Inserts a code prefetch instruction before `InsertBefore` in block `MBB`
+  /// targetting `GV`.
+  virtual bool insertCodePrefetchInstr(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator InsertBefore,
+                                       const GlobalValue *GV) const {
+    return false;
+  }
+
+
 private:
   mutable std::unique_ptr<MIRFormatter> Formatter;
   unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode;
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 7d533fd2bdef0..502d209e45fa7 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -82,6 +82,7 @@
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMRemarkStreamer.h"
+#include "llvm/CodeGen/InsertCodePrefetch.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
@@ -1988,7 +1989,6 @@ void AsmPrinter::emitFunctionBody() {
   FunctionCallGraphInfo FuncCGInfo;
   const auto &CallSitesInfoMap = MF->getCallSitesInfo();
   for (auto &MBB : *MF) {
-    int NextPrefetchTargetIndex = MBB.getPrefetchTargets().empty() ? -1 : 0;
     // Print a label for the basic block.
     emitBasicBlockStart(MBB);
     DenseMap<StringRef, unsigned> MnemonicCounts;
@@ -1996,18 +1996,15 @@ void AsmPrinter::emitFunctionBody() {
     SmallVector<unsigned> PrefetchTargets =
         MBB.getPrefetchTargetSubblockIndexes();
     auto PrefetchTargetIt = PrefetchTargets.begin();
-    unsigned NumCalls = 0;
+    unsigned NumCallsInBB = 0;
     // Helper to emit a symbol for the prefetch target and proceed to the next
     // one.
     auto EmitPrefetchTargetSymbolIfNeeded = [&]() {
       if (PrefetchTargetIt == PrefetchTargets.end())
         return;
-      if (NumCalls < *PrefetchTargetIt)
+      if (NumCallsInBB < *PrefetchTargetIt)
         return;
-      MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
-          Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") +
-          utostr(MBB.getBBID()->BaseID) + Twine("_") +
-          utostr(*PrefetchTargetIt));
+      MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(getPrefetchTargetSymbolName(MF->getName(), *MBB.getBBID(), *PrefetchTargetIt));
       // If the function is weak-linkage it may be replaced by a strong version,
       // in which case the prefetch targets should also be replaced.
       OutStreamer->emitSymbolAttribute(
@@ -2159,7 +2156,7 @@ void AsmPrinter::emitFunctionBody() {
       if (MI.isCall()) {
         if (MF->getTarget().Options.BBAddrMap)
           OutStreamer->emitLabel(createCallsiteEndSymbol(MBB));
-        ++NumCalls;
+        ++NumCallsInBB;
       }
 
       if (TM.Options.EmitCallGraphSection && MI.isCall())
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 0e8971c8fb7fb..f55aaab7586b5 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -335,8 +335,8 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
       auto SiteBBID = parseUniqueBBID(PrefetchSiteStr[0]);
       if (!SiteBBID)
         return SiteBBID.takeError();
-      unsigned long long SiteBBOffset;
-      if (getAsUnsignedInteger(PrefetchSiteStr[1], 10, SiteBBOffset))
+      unsigned long long SiteSubblockIndex;
+      if (getAsUnsignedInteger(PrefetchSiteStr[1], 10, SiteSubblockIndex))
         return createProfileParseError(Twine("unsigned integer expected: '") +
                                        PrefetchSiteStr[1]);
 
@@ -346,14 +346,14 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
       auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[1]);
       if (!TargetBBID)
         return TargetBBID.takeError();
-      unsigned long long TargetBBOffset;
-      if (getAsUnsignedInteger(PrefetchTargetStr[2], 10, TargetBBOffset))
+      unsigned long long TargetSubblockIndex;
+      if (getAsUnsignedInteger(PrefetchTargetStr[2], 10, TargetSubblockIndex))
         return createProfileParseError(Twine("unsigned integer expected: '") +
                                        PrefetchTargetStr[2]);
       FI->second.PrefetchHints.push_back(
-          PrefetchHint{{*SiteBBID, static_cast<unsigned>(SiteBBOffset)},
+          PrefetchHint{SubblockID{*SiteBBID, static_cast<unsigned>(SiteSubblockIndex)},
                        PrefetchTargetStr[0],
-                       {*TargetBBID, static_cast<unsigned>(TargetBBOffset)}});
+                       SubblockID{*TargetBBID, static_cast<unsigned>(TargetSubblockIndex)}});
       continue;
     }
     case 't': { // Prefetch target specifier.
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 57037fd818479..60cd004875891 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -17,6 +17,8 @@
 /// prefetch instruction from any module.
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/InsertCodePrefetch.h"
+
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
@@ -26,11 +28,24 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/InitializePasses.h"
 
 using namespace llvm;
 #define DEBUG_TYPE "insert-code-prefetch"
 
+namespace llvm {
+SmallString<128> getPrefetchTargetSymbolName(StringRef FunctionName, const UniqueBBID &BBID, unsigned SubblockIndex) {
+          SmallString<128> R("__llvm_prefetch_target_");
+          R += FunctionName;
+          R += "_";
+          R += utostr(BBID.BaseID);
+          R += "_";
+          R += utostr(SubblockIndex);
+          return R;
+}
+} // namespace llvm
+
 namespace {
 class InsertCodePrefetch : public MachineFunctionPass {
 public:
@@ -82,7 +97,40 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
       continue;
     MBB.setPrefetchTargetSubblockIndexes(R->second);
   }
-  return false;
+  SmallVector<PrefetchHint> PrefetchHints =
+      getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
+          .getPrefetchHintsForFunction(MF.getName());
+  DenseMap<UniqueBBID, SmallVector<PrefetchHint>>
+      PrefetchHintsBySiteBBID;
+  for (const auto &H : PrefetchHints)
+    PrefetchHintsBySiteBBID[H.SiteID.BBID].push_back(H);
+  for (auto &[SiteBBID, H]: PrefetchHintsBySiteBBID) {
+    llvm::sort(H, [](const PrefetchHint &H1, const PrefetchHint &H2) {
+      return H1.SiteID.SubblockIndex < H2.SiteID.SubblockIndex;
+    });
+  }
+  auto PtrTy = PointerType::getUnqual(MF.getFunction().getParent()->getContext());
+  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+  for (auto &BB : MF) {
+    auto It = PrefetchHintsBySiteBBID.find(*BB.getBBID());
+    if (It == PrefetchHintsBySiteBBID.end())
+      continue;
+    const auto &PrefetchHints = It->second;
+    unsigned NumCallsInBB = 0;
+    auto InstrIt = BB.begin();
+    for(auto HintIt = PrefetchHints.begin() ; HintIt != PrefetchHints.end();) {
+      auto NextInstrIt = InstrIt == BB.end() ? BB.end() : std::next(InstrIt);
+      while (NumCallsInBB >= HintIt->SiteID.SubblockIndex) {
+        auto *GV = MF.getFunction().getParent()->getOrInsertGlobal(getPrefetchTargetSymbolName(HintIt->TargetFunction, HintIt->TargetID.BBID, HintIt->TargetID.SubblockIndex), PtrTy);
+        TII->insertCodePrefetchInstr(BB, NextInstrIt, GV);
+        ++HintIt;
+      }
+      if (InstrIt == BB.end()) break;
+      if (InstrIt->isCall()) ++NumCallsInBB;
+      InstrIt = NextInstrIt;
+    }
+  }
+  return true;
 }
 
 void InsertCodePrefetch::getAnalysisUsage(AnalysisUsage &AU) const {
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 19b218a2879dd..ba0b025167307 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -90,19 +90,6 @@ MCSymbol *MachineBasicBlock::getSymbol() const {
   return CachedMCSymbol;
 }
 
-MCSymbol *MachineBasicBlock::getCallInstSymbol(unsigned CallInstNumber) const {
-  if (CallInstSymbols.size() <= CallInstNumber) {
-    const MachineFunction *MF = getParent();
-    MCContext &Ctx = MF->getContext();
-    CallInstSymbols.resize(CallInstNumber + 1);
-    CallInstSymbols[CallInstNumber] = Ctx.createBlockSymbol(
-        "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber()) + "_" +
-            Twine(CallInstNumber),
-        /*AlwaysEmit=*/true);
-  }
-  return CallInstSymbols[CallInstNumber];
-}
-
 MCSymbol *MachineBasicBlock::getEHContSymbol() const {
   if (!CachedEHContMCSymbol) {
     const MachineFunction *MF = getParent();
diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt
index b503b54f338d3..f9bd233cf8ecf 100644
--- a/llvm/lib/Target/X86/CMakeLists.txt
+++ b/llvm/lib/Target/X86/CMakeLists.txt
@@ -58,7 +58,6 @@ set(sources
   X86IndirectThunks.cpp
   X86InterleavedAccess.cpp
   X86InsertPrefetch.cpp
-  PrefetchInsertion.cpp
   X86InstCombineIntrinsic.cpp
   X86InstrFMA3Info.cpp
   X86InstrFoldTables.cpp
diff --git a/llvm/lib/Target/X86/PrefetchInsertion.cpp b/llvm/lib/Target/X86/PrefetchInsertion.cpp
deleted file mode 100644
index 720a38cb9b011..0000000000000
--- a/llvm/lib/Target/X86/PrefetchInsertion.cpp
+++ /dev/null
@@ -1,209 +0,0 @@
-//===-- PrefetchInsertion.cpp ---=========-----------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// Prefetch insertion pass implementation.
-//===----------------------------------------------------------------------===//
-/// Prefetch insertion pass.
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/X86MCTargetDesc.h"
-#include "X86.h"
-#include "X86InstrBuilder.h"
-#include "X86InstrInfo.h"
-#include "X86MachineFunctionInfo.h"
-#include "X86Subtarget.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/CodeGen/BasicBlockSectionUtils.h"
-#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/Target/TargetMachine.h"
-#include <map>
-
-using namespace llvm;
-#define DEBUG_TYPE "prefetchinsertion"
-
-static cl::opt<bool> UseCodePrefetchInstruction(
-    "use-code-prefetch-instruction",
-    cl::desc("Whether to use the new prefetchit1 instruction."), cl::init(true),
-    cl::Hidden);
-static cl::opt<bool> PrefetchNextAddress(
-    "prefetch-next-address",
-    cl::desc(
-        "Whether to prefetch the next address instead of the target address."),
-    cl::init(false), cl::Hidden);
-
-namespace {} // end anonymous namespace
-
-namespace llvm {
-class PrefetchInsertion : public MachineFunctionPass {
-public:
-  static char ID;
-
-  BasicBlockSectionsProfileReaderWrapperPass *BBSectionsProfileReader = nullptr;
-
-  PrefetchInsertion() : MachineFunctionPass(ID) {
-    initializePrefetchInsertionPass(*PassRegistry::getPassRegistry());
-  }
-
-  StringRef getPassName() const override { return "Prefetch Insertion Pass"; }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override;
-
-  /// Identify basic blocks that need separate sections and prepare to emit them
-  /// accordingly.
-  bool runOnMachineFunction(MachineFunction &MF) override;
-};
-
-} // namespace llvm
-
-char PrefetchInsertion::ID = 0;
-INITIALIZE_PASS_BEGIN(
-    PrefetchInsertion, "prefetch-insertion",
-    "Applies path clonings for the -basic-block-sections=list option", false,
-    false)
-INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass)
-INITIALIZE_PASS_END(
-    PrefetchInsertion, "prefetch-insertion",
-    "Applies path clonings for the -basic-block-sections=list option", false,
-    false)
-
-bool PrefetchInsertion::runOnMachineFunction(MachineFunction &MF) {
-  assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List &&
-         "BB Sections list not enabled!");
-  if (hasInstrProfHashMismatch(MF))
-    return false;
-  // errs() << "Running on " << MF.getName() << "\n";
-  Function &F = MF.getFunction();
-  auto PtrTy = PointerType::getUnqual(F.getParent()->getContext());
-  DenseSet<BBPosition> PrefetchTargets =
-      getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
-          .getPrefetchTargetsForFunction(MF.getName());
-  // errs() << "Targets: Function: " << F.getName() << " "
-  //        << PrefetchTargets.size() << "\n";
-  DenseMap<UniqueBBID, SmallVector<unsigned>> PrefetchTargetsByBBID;
-  for (const auto &P : PrefetchTargets)
-    PrefetchTargetsByBBID[P.BBID].push_back(P.BBOffset);
-  for (auto &[BBID, V] : PrefetchTargetsByBBID)
-    llvm::sort(V);
-  for (auto &BB : MF)
-    BB.setPrefetchTargets(PrefetchTargetsByBBID[*BB.getBBID()]);
-
-  for (const BBPosition &P : PrefetchTargets) {
-    SmallString<128> PrefetchTargetName("__llvm_prefetch_target_");
-    PrefetchTargetName += F.getName();
-    PrefetchTargetName += "_";
-    PrefetchTargetName += utostr(P.BBID.BaseID);
-    PrefetchTargetName += "_";
-    PrefetchTargetName += utostr(P.BBOffset);
-    F.getParent()->getOrInsertGlobal(PrefetchTargetName, PtrTy);
-  }
-
-  SmallVector<PrefetchHint> PrefetchHints =
-      getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
-          .getPrefetchHintsForFunction(MF.getName());
-  // errs() << "Hints: Function: " << F.getName() << " " << PrefetchHints.size()
-  //        << "\n";
-  for (const PrefetchHint &H : PrefetchHints) {
-    SmallString<128> PrefetchTargetName("__llvm_prefetch_target_");
-    PrefetchTargetName += H.TargetFunctionName;
-    PrefetchTargetName += "_";
-    PrefetchTargetName += utostr(H.TargetPosition.BBID.BaseID);
-    PrefetchTargetName += "_";
-    PrefetchTargetName += utostr(H.TargetPosition.BBOffset);
-    F.getParent()->getOrInsertGlobal(PrefetchTargetName, PtrTy);
-  }
-
-  DenseMap<UniqueBBID, std::map<unsigned, SmallVector<PrefetchTarget>>>
-      PrefetchHintsByBBID;
-  for (const auto &H : PrefetchHints) {
-    PrefetchHintsByBBID[H.SitePosition.BBID][H.SitePosition.BBOffset].push_back(
-        PrefetchTarget{H.TargetFunctionName, H.TargetPosition.BBID,
-                       H.TargetPosition.BBOffset});
-  }
-  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
-  for (auto &BB : MF) {
-    auto It = PrefetchHintsByBBID.find(*BB.getBBID());
-    if (It == PrefetchHintsByBBID.end())
-      continue;
-    auto BBPrefetchHintIt = It->second.begin();
-    unsigned NumInsts = 0;
-    auto E = BB.getFirstTerminator();
-    unsigned NumCallsites = 0;
-    for (auto I = BB.instr_begin();;) {
-      auto Current = I;
-      if (NumCallsites >= BBPrefetchHintIt->first || Current == E) {
-        for (const auto &PrefetchTarget : BBPrefetchHintIt->second) {
-          SmallString<128> PrefetchTargetName("__llvm_prefetch_target_");
-          PrefetchTargetName += PrefetchTarget.TargetFunction;
-          PrefetchTargetName += "_";
-          PrefetchTargetName += utostr(PrefetchTarget.TargetBBID.BaseID);
-          PrefetchTargetName += "_";
-          PrefetchTargetName += utostr(PrefetchTarget.TargetBBOffset);
-          auto *GV =
-              MF.getFunction().getParent()->getNamedValue(PrefetchTargetName);
-          // errs() << "Inserting prefetch for " << GV->getName() << " at "
-          //        << MF.getName() << " " << BB.getName() << " " << NumInsts
-          //        << "\n";
-          MachineInstr *PFetch = MF.CreateMachineInstr(
-              UseCodePrefetchInstruction ? TII->get(X86::PREFETCHIT1)
-                                         : TII->get(X86::PREFETCHT1),
-              Current != BB.instr_end() ? Current->getDebugLoc() : DebugLoc(),
-              true);
-          PFetch->setFlag(MachineInstr::Prefetch);
-          MachineInstrBuilder MIB(MF, PFetch);
-          if (!PrefetchNextAddress) {
-            MIB.addMemOperand(MF.getMachineMemOperand(
-                MachinePointerInfo(GV), MachineMemOperand::MOLoad, /*s=*/8,
-                /*base_alignment=*/llvm::Align(1)));
-          }
-          MIB.addReg(X86::RIP).addImm(1).addReg(X86::NoRegister);
-          if (PrefetchNextAddress)
-            MIB.addImm(0);
-          else
-            MIB.addGlobalAddress(GV);
-          MIB.addReg(X86::NoRegister);
-          BB.insert(Current, PFetch);
-        }
-        ++BBPrefetchHintIt;
-        if (BBPrefetchHintIt == PrefetchHintsByBBID[*BB.getBBID()].end())
-          break;
-      }
-      if (Current != E) {
-        // Print the assembly for the instruction.
-        if (!Current->isPosition() && !Current->isImplicitDef() &&
-            !Current->isKill() && !Current->isDebugInstr()) {
-          ++NumInsts;
-        }
-        if (Current->isCall())
-          ++NumCallsites;
-        ++I;
-      }
-    }
-  }
-  return true;
-}
-
-void PrefetchInsertion::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesAll();
-  AU.addRequired<BasicBlockSectionsProfileReaderWrapperPass>();
-  MachineFunctionPass::getAnalysisUsage(AU);
-}
-
-FunctionPass *llvm::createPrefetchInsertionPass() {
-  return new PrefetchInsertion();
-}
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index cb0208a4a5f32..b49ef06478f9b 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -10978,5 +10978,24 @@ void X86InstrInfo::getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops,
   M.getFullAddress(Ops);
 }
 
+bool X86InstrInfo::insertCodePrefetchInstr(MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator InsertBefore,
+                                      const GlobalValue *GV) const {
+  MachineFunction &MF = *MBB.getParent();
+  MachineInstr *PrefetchInstr = MF.CreateMachineInstr(get(X86::PREFETCHIT1),
+              InsertBefore == MBB.instr_end() ? MBB.findPrevDebugLoc(InsertBefore) : InsertBefore->getDebugLoc(),
+              true);
+          MachineInstrBuilder MIB(MF, PrefetchInstr);
+            MIB.addMemOperand(MF.getMachineMemOperand(
+                MachinePointerInfo(GV), MachineMemOperand::MOLoad, /*s=*/8,
+                /*base_alignment=*/llvm::Align(1)));
+          MIB.addReg(X86::RIP).addImm(1).addReg(X86::NoRegister);
+          MIB.addGlobalAddress(GV);
+          MIB.addReg(X86::NoRegister);
+          MBB.insert(InsertBefore, PrefetchInstr);
+          return true;
+}
+
+
 #define GET_INSTRINFO_HELPERS
 #include "X86GenInstrInfo.inc"
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index a547fcd421411..fb77ac96ceaad 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -767,6 +767,11 @@ class X86InstrInfo final : public X86GenInstrInfo {
   /// \returns the index of operand that is commuted with \p Idx1. If the method
   /// fails to commute the operands, it will return \p Idx1.
   unsigned commuteOperandsForFold(MachineInstr &MI, unsigned Idx1) const;
+
+
+  bool insertCodePrefetchInstr(MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator InsertBefore,
+                                      const GlobalValue *GV) const override;
 };
 } // namespace llvm
 
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index a379e17f7f3a4..d4ad98af9b30c 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -626,9 +626,6 @@ void X86PassConfig::addPreEmitPass2() {
   // after all real instructions have been added to the epilog.
   if (TT.isOSWindows() && TT.isX86_64())
     addPass(createX86WinEHUnwindV2Pass());
-
-  if (TM->getBBSectionsType() == llvm::BasicBlockSection::List)
-    addPass(createPrefetchInsertionPass());
 }
 
 bool X86PassConfig::addPostFastRegAllocRewrite() {

>From 3cf143c54b2aed4dd76447df5c0a32f51b078949 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Sat, 15 Nov 2025 20:09:27 +0000
Subject: [PATCH 20/23] clang-format.

---
 .../include/llvm/CodeGen/InsertCodePrefetch.h |  6 ++-
 llvm/include/llvm/CodeGen/TargetInstrInfo.h   |  1 -
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    |  6 ++-
 .../BasicBlockSectionsProfileReader.cpp       |  8 ++--
 llvm/lib/CodeGen/InsertCodePrefetch.cpp       | 40 +++++++++++--------
 llvm/lib/Target/X86/X86InstrInfo.cpp          | 33 +++++++--------
 llvm/lib/Target/X86/X86InstrInfo.h            |  5 +--
 7 files changed, 55 insertions(+), 44 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/InsertCodePrefetch.h b/llvm/include/llvm/CodeGen/InsertCodePrefetch.h
index b212a025f1d9f..99241248862d3 100644
--- a/llvm/include/llvm/CodeGen/InsertCodePrefetch.h
+++ b/llvm/include/llvm/CodeGen/InsertCodePrefetch.h
@@ -11,12 +11,14 @@
 
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/Support/UniqueBBID.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/UniqueBBID.h"
 
 namespace llvm {
 
-SmallString<128> getPrefetchTargetSymbolName(StringRef FunctionName, const UniqueBBID &BBID, unsigned SubblockIndex);
+SmallString<128> getPrefetchTargetSymbolName(StringRef FunctionName,
+                                             const UniqueBBID &BBID,
+                                             unsigned SubblockIndex);
 
 } // end namespace llvm
 
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 78a2c6e937af6..ca5a8308ae957 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -2378,7 +2378,6 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo {
     return false;
   }
 
-
 private:
   mutable std::unique_ptr<MIRFormatter> Formatter;
   unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode;
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 502d209e45fa7..9be5d5885e257 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -39,6 +39,7 @@
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/CodeGen/InsertCodePrefetch.h"
 #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockHashInfo.h"
@@ -82,7 +83,6 @@
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMRemarkStreamer.h"
-#include "llvm/CodeGen/InsertCodePrefetch.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
@@ -2004,7 +2004,9 @@ void AsmPrinter::emitFunctionBody() {
         return;
       if (NumCallsInBB < *PrefetchTargetIt)
         return;
-      MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(getPrefetchTargetSymbolName(MF->getName(), *MBB.getBBID(), *PrefetchTargetIt));
+      MCSymbol *PrefetchTargetSymbol =
+          OutContext.getOrCreateSymbol(getPrefetchTargetSymbolName(
+              MF->getName(), *MBB.getBBID(), *PrefetchTargetIt));
       // If the function is weak-linkage it may be replaced by a strong version,
       // in which case the prefetch targets should also be replaced.
       OutStreamer->emitSymbolAttribute(
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index f55aaab7586b5..09781a05d917d 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -350,10 +350,10 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
       if (getAsUnsignedInteger(PrefetchTargetStr[2], 10, TargetSubblockIndex))
         return createProfileParseError(Twine("unsigned integer expected: '") +
                                        PrefetchTargetStr[2]);
-      FI->second.PrefetchHints.push_back(
-          PrefetchHint{SubblockID{*SiteBBID, static_cast<unsigned>(SiteSubblockIndex)},
-                       PrefetchTargetStr[0],
-                       SubblockID{*TargetBBID, static_cast<unsigned>(TargetSubblockIndex)}});
+      FI->second.PrefetchHints.push_back(PrefetchHint{
+          SubblockID{*SiteBBID, static_cast<unsigned>(SiteSubblockIndex)},
+          PrefetchTargetStr[0],
+          SubblockID{*TargetBBID, static_cast<unsigned>(TargetSubblockIndex)}});
       continue;
     }
     case 't': { // Prefetch target specifier.
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 60cd004875891..b7eba788a9796 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -35,14 +35,16 @@ using namespace llvm;
 #define DEBUG_TYPE "insert-code-prefetch"
 
 namespace llvm {
-SmallString<128> getPrefetchTargetSymbolName(StringRef FunctionName, const UniqueBBID &BBID, unsigned SubblockIndex) {
-          SmallString<128> R("__llvm_prefetch_target_");
-          R += FunctionName;
-          R += "_";
-          R += utostr(BBID.BaseID);
-          R += "_";
-          R += utostr(SubblockIndex);
-          return R;
+SmallString<128> getPrefetchTargetSymbolName(StringRef FunctionName,
+                                             const UniqueBBID &BBID,
+                                             unsigned SubblockIndex) {
+  SmallString<128> R("__llvm_prefetch_target_");
+  R += FunctionName;
+  R += "_";
+  R += utostr(BBID.BaseID);
+  R += "_";
+  R += utostr(SubblockIndex);
+  return R;
 }
 } // namespace llvm
 
@@ -100,16 +102,16 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
   SmallVector<PrefetchHint> PrefetchHints =
       getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
           .getPrefetchHintsForFunction(MF.getName());
-  DenseMap<UniqueBBID, SmallVector<PrefetchHint>>
-      PrefetchHintsBySiteBBID;
+  DenseMap<UniqueBBID, SmallVector<PrefetchHint>> PrefetchHintsBySiteBBID;
   for (const auto &H : PrefetchHints)
     PrefetchHintsBySiteBBID[H.SiteID.BBID].push_back(H);
-  for (auto &[SiteBBID, H]: PrefetchHintsBySiteBBID) {
+  for (auto &[SiteBBID, H] : PrefetchHintsBySiteBBID) {
     llvm::sort(H, [](const PrefetchHint &H1, const PrefetchHint &H2) {
       return H1.SiteID.SubblockIndex < H2.SiteID.SubblockIndex;
     });
   }
-  auto PtrTy = PointerType::getUnqual(MF.getFunction().getParent()->getContext());
+  auto PtrTy =
+      PointerType::getUnqual(MF.getFunction().getParent()->getContext());
   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
   for (auto &BB : MF) {
     auto It = PrefetchHintsBySiteBBID.find(*BB.getBBID());
@@ -118,15 +120,21 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
     const auto &PrefetchHints = It->second;
     unsigned NumCallsInBB = 0;
     auto InstrIt = BB.begin();
-    for(auto HintIt = PrefetchHints.begin() ; HintIt != PrefetchHints.end();) {
+    for (auto HintIt = PrefetchHints.begin(); HintIt != PrefetchHints.end();) {
       auto NextInstrIt = InstrIt == BB.end() ? BB.end() : std::next(InstrIt);
       while (NumCallsInBB >= HintIt->SiteID.SubblockIndex) {
-        auto *GV = MF.getFunction().getParent()->getOrInsertGlobal(getPrefetchTargetSymbolName(HintIt->TargetFunction, HintIt->TargetID.BBID, HintIt->TargetID.SubblockIndex), PtrTy);
+        auto *GV = MF.getFunction().getParent()->getOrInsertGlobal(
+            getPrefetchTargetSymbolName(HintIt->TargetFunction,
+                                        HintIt->TargetID.BBID,
+                                        HintIt->TargetID.SubblockIndex),
+            PtrTy);
         TII->insertCodePrefetchInstr(BB, NextInstrIt, GV);
         ++HintIt;
       }
-      if (InstrIt == BB.end()) break;
-      if (InstrIt->isCall()) ++NumCallsInBB;
+      if (InstrIt == BB.end())
+        break;
+      if (InstrIt->isCall())
+        ++NumCallsInBB;
       InstrIt = NextInstrIt;
     }
   }
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index b49ef06478f9b..6556e16241557 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -10978,24 +10978,25 @@ void X86InstrInfo::getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops,
   M.getFullAddress(Ops);
 }
 
-bool X86InstrInfo::insertCodePrefetchInstr(MachineBasicBlock &MBB,
-                                      MachineBasicBlock::iterator InsertBefore,
-                                      const GlobalValue *GV) const {
+bool X86InstrInfo::insertCodePrefetchInstr(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
+    const GlobalValue *GV) const {
   MachineFunction &MF = *MBB.getParent();
-  MachineInstr *PrefetchInstr = MF.CreateMachineInstr(get(X86::PREFETCHIT1),
-              InsertBefore == MBB.instr_end() ? MBB.findPrevDebugLoc(InsertBefore) : InsertBefore->getDebugLoc(),
-              true);
-          MachineInstrBuilder MIB(MF, PrefetchInstr);
-            MIB.addMemOperand(MF.getMachineMemOperand(
-                MachinePointerInfo(GV), MachineMemOperand::MOLoad, /*s=*/8,
-                /*base_alignment=*/llvm::Align(1)));
-          MIB.addReg(X86::RIP).addImm(1).addReg(X86::NoRegister);
-          MIB.addGlobalAddress(GV);
-          MIB.addReg(X86::NoRegister);
-          MBB.insert(InsertBefore, PrefetchInstr);
-          return true;
+  MachineInstr *PrefetchInstr = MF.CreateMachineInstr(
+      get(X86::PREFETCHIT1),
+      InsertBefore == MBB.instr_end() ? MBB.findPrevDebugLoc(InsertBefore)
+                                      : InsertBefore->getDebugLoc(),
+      true);
+  MachineInstrBuilder MIB(MF, PrefetchInstr);
+  MIB.addMemOperand(MF.getMachineMemOperand(MachinePointerInfo(GV),
+                                            MachineMemOperand::MOLoad, /*s=*/8,
+                                            /*base_alignment=*/llvm::Align(1)));
+  MIB.addReg(X86::RIP).addImm(1).addReg(X86::NoRegister);
+  MIB.addGlobalAddress(GV);
+  MIB.addReg(X86::NoRegister);
+  MBB.insert(InsertBefore, PrefetchInstr);
+  return true;
 }
 
-
 #define GET_INSTRINFO_HELPERS
 #include "X86GenInstrInfo.inc"
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index fb77ac96ceaad..2fe67c56e1bcd 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -768,10 +768,9 @@ class X86InstrInfo final : public X86GenInstrInfo {
   /// fails to commute the operands, it will return \p Idx1.
   unsigned commuteOperandsForFold(MachineInstr &MI, unsigned Idx1) const;
 
-
   bool insertCodePrefetchInstr(MachineBasicBlock &MBB,
-                                      MachineBasicBlock::iterator InsertBefore,
-                                      const GlobalValue *GV) const override;
+                               MachineBasicBlock::iterator InsertBefore,
+                               const GlobalValue *GV) const override;
 };
 } // namespace llvm
 

>From ffb10225c39e1aa425d33b13e643160bc2d843a9 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Mon, 17 Nov 2025 20:37:25 +0000
Subject: [PATCH 21/23] Expand the test case to prefetch hints.

---
 .../CodeGen/BasicBlockSectionsProfileReader.cpp  | 13 ++++++++++---
 llvm/lib/CodeGen/InsertCodePrefetch.cpp          |  8 ++++----
 .../X86/basic-block-sections-code-prefetch.ll    | 16 ++++++++++++++--
 3 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 09781a05d917d..8252d362271a0 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -328,9 +328,12 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
       // past-the-end element.
       if (FI == ProgramPathAndClusterInfo.end())
         continue;
-      assert(Values.size() == 2);
+      if (Values.size() != 2)
+        return createProfileParseError(Twine("Prefetch hint expected: "+ S));
       SmallVector<StringRef, 2> PrefetchSiteStr;
       Values[0].split(PrefetchSiteStr, '@');
+      if (PrefetchSiteStr.size() != 2)
+        return createProfileParseError(Twine("Prefetch site expected: ") + Values[0]);
       assert(PrefetchSiteStr.size() == 2);
       auto SiteBBID = parseUniqueBBID(PrefetchSiteStr[0]);
       if (!SiteBBID)
@@ -342,7 +345,8 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
 
       SmallVector<StringRef, 3> PrefetchTargetStr;
       Values[1].split(PrefetchTargetStr, '@');
-      assert(PrefetchTargetStr.size() == 3);
+      if (PrefetchTargetStr.size() != 3)
+        return createProfileParseError(Twine("Prefetch target target expected: ") + Values[1]);
       auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[1]);
       if (!TargetBBID)
         return TargetBBID.takeError();
@@ -361,10 +365,13 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
       // past-the-end element.
       if (FI == ProgramPathAndClusterInfo.end())
         continue;
-      assert(Values.size() == 1);
+      if (Values.size() != 1)
+        return createProfileParseError(Twine("Prefetch target expected: ")+ S);
       SmallVector<StringRef, 2> PrefetchTargetStr;
       Values[0].split(PrefetchTargetStr, '@');
       assert(PrefetchTargetStr.size() == 2);
+      if (PrefetchTargetStr.size() != 2)
+        return createProfileParseError(Twine("Prefetch target expected: ")+ Values[0]);
       auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]);
       if (!TargetBBID)
         return TargetBBID.takeError();
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index b7eba788a9796..2197d60bf598e 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -105,8 +105,8 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
   DenseMap<UniqueBBID, SmallVector<PrefetchHint>> PrefetchHintsBySiteBBID;
   for (const auto &H : PrefetchHints)
     PrefetchHintsBySiteBBID[H.SiteID.BBID].push_back(H);
-  for (auto &[SiteBBID, H] : PrefetchHintsBySiteBBID) {
-    llvm::sort(H, [](const PrefetchHint &H1, const PrefetchHint &H2) {
+  for (auto &[SiteBBID, Hints] : PrefetchHintsBySiteBBID) {
+    llvm::sort(Hints, [](const PrefetchHint &H1, const PrefetchHint &H2) {
       return H1.SiteID.SubblockIndex < H2.SiteID.SubblockIndex;
     });
   }
@@ -122,13 +122,13 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
     auto InstrIt = BB.begin();
     for (auto HintIt = PrefetchHints.begin(); HintIt != PrefetchHints.end();) {
       auto NextInstrIt = InstrIt == BB.end() ? BB.end() : std::next(InstrIt);
-      while (NumCallsInBB >= HintIt->SiteID.SubblockIndex) {
+      while (HintIt != PrefetchHints.end() && NumCallsInBB >= HintIt->SiteID.SubblockIndex) {
         auto *GV = MF.getFunction().getParent()->getOrInsertGlobal(
             getPrefetchTargetSymbolName(HintIt->TargetFunction,
                                         HintIt->TargetID.BBID,
                                         HintIt->TargetID.SubblockIndex),
             PtrTy);
-        TII->insertCodePrefetchInstr(BB, NextInstrIt, GV);
+        TII->insertCodePrefetchInstr(BB, InstrIt, GV);
         ++HintIt;
       }
       if (InstrIt == BB.end())
diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
index 35e25952aa2f8..562da50f7c4ca 100644
--- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
+++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
@@ -7,11 +7,14 @@
 ; RUN: echo 't 1 at 0' >> %t
 ; RUN: echo 't 1 at 1' >> %t
 ; RUN: echo 't 2 at 1' >> %t
+; RUN: echo 'i 3 at 0 _Z3barv at 0@0' >> %t
+; RUN: echo 'i 2 at 1 _Z3foob at 1@0' >> %t
 ; RUN: echo 'f _Z3barv' >> %t
 ; RUN: echo 't 0 at 0' >> %t
+; RUN: echo 'i 0 at 1 _Z3foob at 0@0' >> %t
 ; RUN: echo 't 21 at 1' >> %t
 ;;
-; RUN: llc < %s -mtriple=x86_64-pc-linux -asm-verbose=false -function-sections -basic-block-sections=%t  | FileCheck %s
+; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -asm-verbose=false -function-sections -basic-block-sections=%t  | FileCheck %s
 
 define i32 @_Z3foob(i1 zeroext %0) nounwind {
   %2 = alloca i32, align 4
@@ -44,18 +47,27 @@ define i32 @_Z3foob(i1 zeroext %0) nounwind {
 ; CHECK:        callq _Z3bazv at PLT
 ; CHECK-NEXT:   .globl __llvm_prefetch_target__Z3foob_2_1
 ; CHECK-NEXT: __llvm_prefetch_target__Z3foob_2_1:
+; CHECK-NEXT:   prefetchit1	__llvm_prefetch_target__Z3foob_1_0(%rip)
 
 13:                                               ; preds = %11, %9
   %14 = load i32, ptr %2, align 4
   ret i32 %14
+; CHECK:      .LBB0_3:
+; CHECK-NEXT:   prefetchit1	__llvm_prefetch_target__Z3barv_0_0(%rip)
+; CHECK:        retq
+
 }
 
 define weak i32 @_Z3barv() nounwind {
   %1 = call i32 @_Z3bazv()
-  ret i32 %1
+  br label %2
 ; CHECK:      _Z3barv:
 ; CHECK-NEXT:   .weak __llvm_prefetch_target__Z3barv_0_0
 ; CHECK-NEXT: __llvm_prefetch_target__Z3barv_0_0:
+; CHECK:        callq _Z3bazv at PLT
+; CHECK-NEXT:   prefetchit1	__llvm_prefetch_target__Z3foob_0_0(%rip)
+2:
+  ret i32 %1
 }
 
 declare i32 @_Z3bazv() #1

>From 87f856a01d2ed36cf342ad90fe9ac62b16c52e84 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Mon, 17 Nov 2025 20:37:36 +0000
Subject: [PATCH 22/23] clang-format.

---
 .../lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 13 ++++++++-----
 llvm/lib/CodeGen/InsertCodePrefetch.cpp             |  3 ++-
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 8252d362271a0..9ac0ba34bba01 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -329,11 +329,12 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
       if (FI == ProgramPathAndClusterInfo.end())
         continue;
       if (Values.size() != 2)
-        return createProfileParseError(Twine("Prefetch hint expected: "+ S));
+        return createProfileParseError(Twine("Prefetch hint expected: " + S));
       SmallVector<StringRef, 2> PrefetchSiteStr;
       Values[0].split(PrefetchSiteStr, '@');
       if (PrefetchSiteStr.size() != 2)
-        return createProfileParseError(Twine("Prefetch site expected: ") + Values[0]);
+        return createProfileParseError(Twine("Prefetch site expected: ") +
+                                       Values[0]);
       assert(PrefetchSiteStr.size() == 2);
       auto SiteBBID = parseUniqueBBID(PrefetchSiteStr[0]);
       if (!SiteBBID)
@@ -346,7 +347,8 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
       SmallVector<StringRef, 3> PrefetchTargetStr;
       Values[1].split(PrefetchTargetStr, '@');
       if (PrefetchTargetStr.size() != 3)
-        return createProfileParseError(Twine("Prefetch target target expected: ") + Values[1]);
+        return createProfileParseError(
+            Twine("Prefetch target target expected: ") + Values[1]);
       auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[1]);
       if (!TargetBBID)
         return TargetBBID.takeError();
@@ -366,12 +368,13 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
       if (FI == ProgramPathAndClusterInfo.end())
         continue;
       if (Values.size() != 1)
-        return createProfileParseError(Twine("Prefetch target expected: ")+ S);
+        return createProfileParseError(Twine("Prefetch target expected: ") + S);
       SmallVector<StringRef, 2> PrefetchTargetStr;
       Values[0].split(PrefetchTargetStr, '@');
       assert(PrefetchTargetStr.size() == 2);
       if (PrefetchTargetStr.size() != 2)
-        return createProfileParseError(Twine("Prefetch target expected: ")+ Values[0]);
+        return createProfileParseError(Twine("Prefetch target expected: ") +
+                                       Values[0]);
       auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]);
       if (!TargetBBID)
         return TargetBBID.takeError();
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 2197d60bf598e..904097e25b7dc 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -122,7 +122,8 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
     auto InstrIt = BB.begin();
     for (auto HintIt = PrefetchHints.begin(); HintIt != PrefetchHints.end();) {
       auto NextInstrIt = InstrIt == BB.end() ? BB.end() : std::next(InstrIt);
-      while (HintIt != PrefetchHints.end() && NumCallsInBB >= HintIt->SiteID.SubblockIndex) {
+      while (HintIt != PrefetchHints.end() &&
+             NumCallsInBB >= HintIt->SiteID.SubblockIndex) {
         auto *GV = MF.getFunction().getParent()->getOrInsertGlobal(
             getPrefetchTargetSymbolName(HintIt->TargetFunction,
                                         HintIt->TargetID.BBID,

>From 8d6b7efabe985e91ee4128cd310e4b4a69ef539b Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 19 Nov 2025 06:39:40 +0000
Subject: [PATCH 23/23] Remove some unwanted changes.

---
 llvm/include/llvm/CodeGen/MachineBasicBlock.h | 6 ------
 llvm/include/llvm/CodeGen/MachineInstr.h      | 3 +--
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    | 8 +-------
 3 files changed, 2 insertions(+), 15 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 24445a2fe8ece..837b63636528f 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -100,12 +100,6 @@ template <> struct DenseMapInfo<MBBSectionID> {
   }
 };
 
-struct PrefetchTarget {
-  StringRef TargetFunction;
-  UniqueBBID TargetBBID;
-  unsigned TargetBBOffset;
-};
-
 template <> struct ilist_traits<MachineInstr> {
 private:
   friend class MachineBasicBlock; // Set by the owning MachineBasicBlock.
diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h
index 1aba9ca962b16..4fcb7f36e0238 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -123,9 +123,8 @@ class MachineInstr
     NoUSWrap = 1 << 20,      // Instruction supports geps
                              // no unsigned signed wrap.
     SameSign = 1 << 21,      // Both operands have the same sign.
-    InBounds = 1 << 22,      // Pointer arithmetic remains inbounds.
+    InBounds = 1 << 22       // Pointer arithmetic remains inbounds.
                              // Implies NoUSWrap.
-    Prefetch = 1 << 23,      // Instruction is a prefetch.
   };
 
 private:
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 9be5d5885e257..8156dc29d6e12 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -120,7 +120,6 @@
 #include "llvm/Support/Format.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Path.h"
-#include "llvm/Support/SMLoc.h"
 #include "llvm/Support/VCSRevision.h"
 #include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/Support/raw_ostream.h"
@@ -180,11 +179,6 @@ static cl::opt<bool> EmitJumpTableSizesSection(
     cl::desc("Emit a section containing jump table addresses and sizes"),
     cl::Hidden, cl::init(false));
 
-static cl::opt<bool> InsertNoopsForPrefetch(
-    "insert-noops-for-prefetch",
-    cl::desc("Whether to insert noops instead of prefetches."), cl::init(false),
-    cl::Hidden);
-
 // This isn't turned on by default, since several of the scheduling models are
 // not completely accurate, and we don't want to be misleading.
 static cl::opt<bool> PrintLatency(
@@ -2131,7 +2125,7 @@ void AsmPrinter::emitFunctionBody() {
         break;
       }
       default:
-         emitInstruction(&MI);
+        emitInstruction(&MI);
 
         auto CountInstruction = [&](const MachineInstr &MI) {
           // Skip Meta instructions inside bundles.



More information about the llvm-commits mailing list