[llvm] Insert symbols for prefetch targets read from basic blocks section profile. (PR #168439)

Rahman Lavaee via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 18 23:09:12 PST 2025


https://github.com/rlavaee updated https://github.com/llvm/llvm-project/pull/168439

>From f9e6f528e7bb9904e32f64355ab4624a7cdcd777 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Sat, 8 Nov 2025 19:54:21 +0000
Subject: [PATCH 01/18] feat(AsmPrinter): Add support for emitting prefetch
 target symbols

---
 .../CodeGen/BasicBlockSectionsProfileReader.h | 45 ++++++++++++++++-
 llvm/include/llvm/CodeGen/MachineBasicBlock.h | 24 +++++++++
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    | 50 ++++++++++++++++++-
 .../BasicBlockSectionsProfileReader.cpp       | 44 ++++++++++++++++
 llvm/lib/CodeGen/MachineBasicBlock.cpp        | 13 +++++
 5 files changed, 174 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index ee1f28377f7e4..5b230db30aec4 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -42,6 +42,17 @@ struct BBClusterInfo {
   unsigned PositionInCluster;
 };
 
+struct BBPosition {
+  UniqueBBID BBID;
+  unsigned BBOffset;
+};
+
+struct PrefetchHint {
+  BBPosition SitePosition;
+  StringRef TargetFunctionName;
+  BBPosition TargetPosition;
+};
+
 // This represents the raw input profile for one function.
 struct FunctionPathAndClusterInfo {
   // BB Cluster information specified by `UniqueBBID`s.
@@ -50,9 +61,11 @@ struct FunctionPathAndClusterInfo {
   // the edge a -> b (a is not cloned). The index of the path in this vector
   // determines the `UniqueBBID::CloneID` of the cloned blocks in that path.
   SmallVector<SmallVector<unsigned>> ClonePaths;
+  SmallVector<PrefetchHint> PrefetchHints;
+  DenseSet<BBPosition> PrefetchTargets;
   // Node counts for each basic block.
   DenseMap<UniqueBBID, uint64_t> NodeCounts;
-  // Edge counts for each edge, stored as a nested map.
+  // Edge counts for each edge.
   DenseMap<UniqueBBID, DenseMap<UniqueBBID, uint64_t>> EdgeCounts;
   // Hash for each basic block. The Hashes are stored for every original block
   // (not cloned blocks), hence the map key being unsigned instead of
@@ -60,6 +73,27 @@ struct FunctionPathAndClusterInfo {
   DenseMap<unsigned, uint64_t> BBHashes;
 };
 
+// Provides DenseMapInfo BBPosition.
+template <> struct DenseMapInfo<BBPosition> {
+  static inline BBPosition getEmptyKey() {
+    return {DenseMapInfo<UniqueBBID>::getEmptyKey(),
+            DenseMapInfo<unsigned>::getEmptyKey()};
+  }
+  static inline BBPosition getTombstoneKey() {
+    return BBPosition{DenseMapInfo<UniqueBBID>::getTombstoneKey(),
+                      DenseMapInfo<unsigned>::getTombstoneKey()};
+  }
+  static unsigned getHashValue(const BBPosition &Val) {
+    std::pair<unsigned, unsigned> PairVal = std::make_pair(
+        DenseMapInfo<UniqueBBID>::getHashValue(Val.BBID), Val.BBOffset);
+    return DenseMapInfo<std::pair<unsigned, unsigned>>::getHashValue(PairVal);
+  }
+  static bool isEqual(const BBPosition &LHS, const BBPosition &RHS) {
+    return DenseMapInfo<UniqueBBID>::isEqual(LHS.BBID, RHS.BBID) &&
+           DenseMapInfo<unsigned>::isEqual(LHS.BBOffset, RHS.BBOffset);
+  }
+};
+
 class BasicBlockSectionsProfileReader {
 public:
   friend class BasicBlockSectionsProfileReaderWrapperPass;
@@ -86,6 +120,11 @@ class BasicBlockSectionsProfileReader {
   uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
                         const UniqueBBID &SinkBBID) const;
 
+  SmallVector<PrefetchHint>
+  getPrefetchHintsForFunction(StringRef FuncName) const;
+
+  DenseSet<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
+
 private:
   StringRef getAliasName(StringRef FuncName) const {
     auto R = FuncAliasMap.find(FuncName);
@@ -194,6 +233,10 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
 
   uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
                         const UniqueBBID &DestBBID) const;
+  SmallVector<PrefetchHint>
+  getPrefetchHintsForFunction(StringRef FuncName) const;
+
+  DenseSet<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
 
   // Initializes the FunctionNameToDIFilename map for the current module and
   // then reads the profile for the matching functions.
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index fcf7bab09fcff..e6c6bc26ae9e6 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -100,6 +100,12 @@ template <> struct DenseMapInfo<MBBSectionID> {
   }
 };
 
+struct PrefetchTarget {
+  StringRef TargetFunction;
+  UniqueBBID TargetBBID;
+  unsigned TargetBBOffset;
+};
+
 template <> struct ilist_traits<MachineInstr> {
 private:
   friend class MachineBasicBlock; // Set by the owning MachineBasicBlock.
@@ -213,6 +219,8 @@ class MachineBasicBlock
   /// basic block sections and basic block labels.
   std::optional<UniqueBBID> BBID;
 
+  SmallVector<unsigned> PrefetchTargets;
+
   /// With basic block sections, this stores the Section ID of the basic block.
   MBBSectionID SectionID{0};
 
@@ -229,6 +237,8 @@ class MachineBasicBlock
   /// is only computed once and is cached.
   mutable MCSymbol *CachedMCSymbol = nullptr;
 
+  mutable SmallVector<MCSymbol *, 4> CallInstSymbols;
+
   /// Cached MCSymbol for this block (used if IsEHContTarget).
   mutable MCSymbol *CachedEHContMCSymbol = nullptr;
 
@@ -710,6 +720,14 @@ class MachineBasicBlock
 
   std::optional<UniqueBBID> getBBID() const { return BBID; }
 
+  const SmallVector<unsigned> &getPrefetchTargets() const {
+    return PrefetchTargets;
+  }
+
+  void setPrefetchTargets(const SmallVector<unsigned> &V) {
+    PrefetchTargets = V;
+  }
+
   /// Returns the section ID of this basic block.
   MBBSectionID getSectionID() const { return SectionID; }
 
@@ -1275,6 +1293,12 @@ class MachineBasicBlock
   /// Return the MCSymbol for this basic block.
   LLVM_ABI MCSymbol *getSymbol() const;
 
+  MCSymbol *getCallInstSymbol(unsigned CallInstNumber) const;
+
+  const SmallVector<MCSymbol *, 4>& getCallInstSymbols() const {
+    return CallInstSymbols;
+  }
+
   /// Return the Windows EH Continuation Symbol for this basic block.
   LLVM_ABI MCSymbol *getEHContSymbol() const;
 
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 3aa245b7f3f1e..a204bba5789a8 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -18,6 +18,7 @@
 #include "WasmException.h"
 #include "WinCFGuard.h"
 #include "WinException.h"
+#include "llvm/Support/SMLoc.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/BitmaskEnum.h"
@@ -178,6 +179,11 @@ static cl::opt<bool> EmitJumpTableSizesSection(
     cl::desc("Emit a section containing jump table addresses and sizes"),
     cl::Hidden, cl::init(false));
 
+static cl::opt<bool> InsertNoopsForPrefetch(
+    "insert-noops-for-prefetch",
+    cl::desc("Whether to insert noops instead of prefetches."), cl::init(false),
+    cl::Hidden);
+
 // This isn't turned on by default, since several of the scheduling models are
 // not completely accurate, and we don't want to be misleading.
 static cl::opt<bool> PrintLatency(
@@ -1982,10 +1988,34 @@ void AsmPrinter::emitFunctionBody() {
   FunctionCallGraphInfo FuncCGInfo;
   const auto &CallSitesInfoMap = MF->getCallSitesInfo();
   for (auto &MBB : *MF) {
+    int NextPrefetchTargetIndex = MBB.getPrefetchTargets().empty() ? -1 : 0;
     // Print a label for the basic block.
     emitBasicBlockStart(MBB);
     DenseMap<StringRef, unsigned> MnemonicCounts;
+    unsigned NumCallsInBlock = 0;
     for (auto &MI : MBB) {
+      if (NextPrefetchTargetIndex != -1 &&
+          NumCallsInBlock >=  MBB.getPrefetchTargets()[NextPrefetchTargetIndex]) {
+
+        MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
+            Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) +
+            Twine("_") +
+            utostr(MBB.getPrefetchTargets()[NextPrefetchTargetIndex]));
+        if (MF->getFunction().isWeakForLinker()) {
+          OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Weak);
+          errs() << "Emitting weak symbol: " << PrefetchTargetSymbol->getName() << "\n";
+        } else {
+          OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Global);
+          errs() << "Emitting global symbol: " << PrefetchTargetSymbol->getName() << "\n";
+        }
+        // OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Extern);
+       // errs() << "Emitting symbol: " << PrefetchTargetSymbol->getName() << "\n";
+        OutStreamer->emitLabel(PrefetchTargetSymbol);
+        ++NextPrefetchTargetIndex;
+        if (NextPrefetchTargetIndex >=
+            static_cast<int>(MBB.getPrefetchTargets().size()))
+          NextPrefetchTargetIndex = -1;
+      }
       // Print the assembly for the instruction.
       if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() &&
           !MI.isDebugInstr()) {
@@ -2099,7 +2129,7 @@ void AsmPrinter::emitFunctionBody() {
         break;
       }
       default:
-        emitInstruction(&MI);
+         emitInstruction(&MI);
 
         auto CountInstruction = [&](const MachineInstr &MI) {
           // Skip Meta instructions inside bundles.
@@ -2136,6 +2166,24 @@ void AsmPrinter::emitFunctionBody() {
       for (auto &Handler : Handlers)
         Handler->endInstruction();
     }
+   while (NextPrefetchTargetIndex != -1) {
+        MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
+            Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) +
+            Twine("_") +
+            utostr(MBB.getPrefetchTargets()[NextPrefetchTargetIndex]));
+        if (MF->getFunction().hasWeakLinkage()) {
+          OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_WeakDefinition);
+        } else {
+          OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Global);
+        }
+        OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Extern);
+        OutStreamer->emitLabel(PrefetchTargetSymbol);
+        ++NextPrefetchTargetIndex;
+        if (NextPrefetchTargetIndex >=
+            static_cast<int>(MBB.getPrefetchTargets().size()))
+          NextPrefetchTargetIndex = -1;
+      }
+
 
     // We must emit temporary symbol for the end of this basic block, if either
     // we have BBLabels enabled or if this basic blocks marks the end of a
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index c234c0f1b0b34..de146e172c174 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -93,6 +93,19 @@ uint64_t BasicBlockSectionsProfileReader::getEdgeCount(
   return EdgeIt->second;
 }
 
+SmallVector<PrefetchHint>
+BasicBlockSectionsProfileReader::getPrefetchHintsForFunction(
+    StringRef FuncName) const {
+  return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).PrefetchHints;
+}
+
+DenseSet<BBPosition>
+BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction(
+    StringRef FuncName) const {
+  return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName))
+      .PrefetchTargets;
+}
+
 // Reads the version 1 basic block sections profile. Profile for each function
 // is encoded as follows:
 //   m <module_name>
@@ -308,6 +321,25 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
       }
       continue;
     }
+    case 't': { // Prefetch target specifier.
+      // Skip the profile when we the profile iterator (FI) refers to the
+      // past-the-end element.
+      if (FI == ProgramPathAndClusterInfo.end())
+        continue;
+      assert(Values.size() == 1);
+      SmallVector<StringRef, 2> PrefetchTargetStr;
+      Values[0].split(PrefetchTargetStr, '@');
+      assert(PrefetchTargetStr.size() == 2);
+      auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]);
+      if (!TargetBBID)
+        return TargetBBID.takeError();
+      unsigned long long TargetBBOffset;
+      if (getAsUnsignedInteger(PrefetchTargetStr[1], 10, TargetBBOffset))
+        return createProfileParseError(Twine("unsigned integer expected: '") +
+                                       PrefetchTargetStr[1]);
+      FI->second.PrefetchTargets.insert(BBPosition{*TargetBBID, static_cast<unsigned>(TargetBBOffset)});
+      continue;
+    }
     default:
       return createProfileParseError(Twine("invalid specifier: '") +
                                      Twine(Specifier) + "'");
@@ -514,6 +546,18 @@ uint64_t BasicBlockSectionsProfileReaderWrapperPass::getEdgeCount(
   return BBSPR.getEdgeCount(FuncName, SrcBBID, SinkBBID);
 }
 
+SmallVector<PrefetchHint>
+BasicBlockSectionsProfileReaderWrapperPass::getPrefetchHintsForFunction(
+    StringRef FuncName) const {
+  return BBSPR.getPrefetchHintsForFunction(FuncName);
+}
+
+DenseSet<BBPosition>
+BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction(
+    StringRef FuncName) const {
+  return BBSPR.getPrefetchTargetsForFunction(FuncName);
+}
+
 BasicBlockSectionsProfileReader &
 BasicBlockSectionsProfileReaderWrapperPass::getBBSPR() {
   return BBSPR;
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index ba0b025167307..19b218a2879dd 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -90,6 +90,19 @@ MCSymbol *MachineBasicBlock::getSymbol() const {
   return CachedMCSymbol;
 }
 
+MCSymbol *MachineBasicBlock::getCallInstSymbol(unsigned CallInstNumber) const {
+  if (CallInstSymbols.size() <= CallInstNumber) {
+    const MachineFunction *MF = getParent();
+    MCContext &Ctx = MF->getContext();
+    CallInstSymbols.resize(CallInstNumber + 1);
+    CallInstSymbols[CallInstNumber] = Ctx.createBlockSymbol(
+        "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber()) + "_" +
+            Twine(CallInstNumber),
+        /*AlwaysEmit=*/true);
+  }
+  return CallInstSymbols[CallInstNumber];
+}
+
 MCSymbol *MachineBasicBlock::getEHContSymbol() const {
   if (!CachedEHContMCSymbol) {
     const MachineFunction *MF = getParent();

>From 13d979bfa843054924c0563631a574ebc9b06995 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Tue, 11 Nov 2025 21:30:47 +0000
Subject: [PATCH 02/18] feat: Add prefetch-profile.txt for testing

---
 build-release/prefetch-profile.txt | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 build-release/prefetch-profile.txt

diff --git a/build-release/prefetch-profile.txt b/build-release/prefetch-profile.txt
new file mode 100644
index 0000000000000..294f57a46920b
--- /dev/null
+++ b/build-release/prefetch-profile.txt
@@ -0,0 +1,3 @@
+v1
+f f
+t 0 at 1

>From e56b3efa23d818aed91f2f35acd9bf0c26184762 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 12 Nov 2025 18:58:18 +0000
Subject: [PATCH 03/18] Everything else.

---
 build-release/prefetch-profile.txt            |  3 --
 .../CodeGen/BasicBlockSectionsProfileReader.h | 29 +++----------------
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    | 12 +++++++-
 .../BasicBlockSectionsProfileReader.cpp       | 10 +++----
 4 files changed, 20 insertions(+), 34 deletions(-)
 delete mode 100644 build-release/prefetch-profile.txt

diff --git a/build-release/prefetch-profile.txt b/build-release/prefetch-profile.txt
deleted file mode 100644
index 294f57a46920b..0000000000000
--- a/build-release/prefetch-profile.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-v1
-f f
-t 0 at 1
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 5b230db30aec4..fbf9b89754cd7 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -44,7 +44,7 @@ struct BBClusterInfo {
 
 struct BBPosition {
   UniqueBBID BBID;
-  unsigned BBOffset;
+  unsigned CallsiteIndex;
 };
 
 struct PrefetchHint {
@@ -62,7 +62,7 @@ struct FunctionPathAndClusterInfo {
   // determines the `UniqueBBID::CloneID` of the cloned blocks in that path.
   SmallVector<SmallVector<unsigned>> ClonePaths;
   SmallVector<PrefetchHint> PrefetchHints;
-  DenseSet<BBPosition> PrefetchTargets;
+  SmallVector<BBPosition> PrefetchTargets;
   // Node counts for each basic block.
   DenseMap<UniqueBBID, uint64_t> NodeCounts;
   // Edge counts for each edge.
@@ -73,27 +73,6 @@ struct FunctionPathAndClusterInfo {
   DenseMap<unsigned, uint64_t> BBHashes;
 };
 
-// Provides DenseMapInfo BBPosition.
-template <> struct DenseMapInfo<BBPosition> {
-  static inline BBPosition getEmptyKey() {
-    return {DenseMapInfo<UniqueBBID>::getEmptyKey(),
-            DenseMapInfo<unsigned>::getEmptyKey()};
-  }
-  static inline BBPosition getTombstoneKey() {
-    return BBPosition{DenseMapInfo<UniqueBBID>::getTombstoneKey(),
-                      DenseMapInfo<unsigned>::getTombstoneKey()};
-  }
-  static unsigned getHashValue(const BBPosition &Val) {
-    std::pair<unsigned, unsigned> PairVal = std::make_pair(
-        DenseMapInfo<UniqueBBID>::getHashValue(Val.BBID), Val.BBOffset);
-    return DenseMapInfo<std::pair<unsigned, unsigned>>::getHashValue(PairVal);
-  }
-  static bool isEqual(const BBPosition &LHS, const BBPosition &RHS) {
-    return DenseMapInfo<UniqueBBID>::isEqual(LHS.BBID, RHS.BBID) &&
-           DenseMapInfo<unsigned>::isEqual(LHS.BBOffset, RHS.BBOffset);
-  }
-};
-
 class BasicBlockSectionsProfileReader {
 public:
   friend class BasicBlockSectionsProfileReaderWrapperPass;
@@ -123,7 +102,7 @@ class BasicBlockSectionsProfileReader {
   SmallVector<PrefetchHint>
   getPrefetchHintsForFunction(StringRef FuncName) const;
 
-  DenseSet<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
+  DenseMap<UniqueBBID, SmallVector<unsigned>> getPrefetchTargetsForFunction(StringRef FuncName) const;
 
 private:
   StringRef getAliasName(StringRef FuncName) const {
@@ -236,7 +215,7 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
   SmallVector<PrefetchHint>
   getPrefetchHintsForFunction(StringRef FuncName) const;
 
-  DenseSet<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
+  DenseMap<UniqueBBID, SmallVector<unsigned>> getPrefetchTargetsForFunction(StringRef FuncName) const;
 
   // Initializes the FunctionNameToDIFilename map for the current module and
   // then reads the profile for the matching functions.
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index a204bba5789a8..90445fedd5db3 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -485,6 +485,7 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
   if (EmitBBHash)
     AU.addRequired<MachineBlockHashInfo>();
+  AU.addUsedIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
 }
 
 bool AsmPrinter::doInitialization(Module &M) {
@@ -1987,7 +1988,16 @@ void AsmPrinter::emitFunctionBody() {
 
   FunctionCallGraphInfo FuncCGInfo;
   const auto &CallSitesInfoMap = MF->getCallSitesInfo();
-  for (auto &MBB : *MF) {
+  DenseMap<UniqueBBID, SmallVector<unsigned>> FunctionPrefetchTargets;
+  if (auto *BBSPRPass =
+          getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>()) {
+    FunctionPrefetchTargets = BBSPRPass->getBBSPR().getPrefetchTargetsForFunction(MF->getName());
+}
+
+   for (auto &MBB : *MF) {
+
+    SmallVector<unsigned> BBPrefetchTargets;
+    = FunctionPrefetchTargets.lookup(MBB.g);
     int NextPrefetchTargetIndex = MBB.getPrefetchTargets().empty() ? -1 : 0;
     // Print a label for the basic block.
     emitBasicBlockStart(MBB);
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index de146e172c174..c4784a6039c09 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -99,7 +99,7 @@ BasicBlockSectionsProfileReader::getPrefetchHintsForFunction(
   return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).PrefetchHints;
 }
 
-DenseSet<BBPosition>
+SmallVector<BBPosition>
 BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction(
     StringRef FuncName) const {
   return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName))
@@ -333,11 +333,11 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
       auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]);
       if (!TargetBBID)
         return TargetBBID.takeError();
-      unsigned long long TargetBBOffset;
-      if (getAsUnsignedInteger(PrefetchTargetStr[1], 10, TargetBBOffset))
+      unsigned long long TargetCallsiteIndex;
+      if (getAsUnsignedInteger(PrefetchTargetStr[1], 10, TargetCallsiteIndex))
         return createProfileParseError(Twine("unsigned integer expected: '") +
                                        PrefetchTargetStr[1]);
-      FI->second.PrefetchTargets.insert(BBPosition{*TargetBBID, static_cast<unsigned>(TargetBBOffset)});
+      FI->second.PrefetchTargets.push_back(BBPosition{*TargetBBID, static_cast<unsigned>(TargetCallsiteIndex)});
       continue;
     }
     default:
@@ -552,7 +552,7 @@ BasicBlockSectionsProfileReaderWrapperPass::getPrefetchHintsForFunction(
   return BBSPR.getPrefetchHintsForFunction(FuncName);
 }
 
-DenseSet<BBPosition>
+SmallVector<BBPosition>
 BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction(
     StringRef FuncName) const {
   return BBSPR.getPrefetchTargetsForFunction(FuncName);

>From 089bd27a8dab1847b1a999c7eafaac9354777af6 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 12 Nov 2025 18:58:27 +0000
Subject: [PATCH 04/18] Add test.

---
 llvm/test/CodeGen/X86/prefetch-symbols.ll | 42 +++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/prefetch-symbols.ll

diff --git a/llvm/test/CodeGen/X86/prefetch-symbols.ll b/llvm/test/CodeGen/X86/prefetch-symbols.ll
new file mode 100644
index 0000000000000..979db7942ff2c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/prefetch-symbols.ll
@@ -0,0 +1,42 @@
+;; Check that specifying the function in the basic block sections profile
+;; without any other directives is a noop.
+;;
+;; Specify the bb sections profile:
+; RUN: echo 'v1' > %t
+; RUN: echo 'f _Z3foob' >> %t
+; RUN: echo 't 0 at 0' >> %t
+;;
+; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t  | FileCheck
+
+define i32 @_Z3foob(i1 zeroext %0) nounwind {
+  %2 = alloca i32, align 4
+  %3 = alloca i8, align 1
+  %4 = zext i1 %0 to i8
+  store i8 %4, ptr %3, align 1
+  %5 = load i8, ptr %3, align 1
+  %6 = trunc i8 %5 to i1
+  %7 = zext i1 %6 to i32
+  %8 = icmp sgt i32 %7, 0
+  br i1 %8, label %9, label %11
+
+9:                                                ; preds = %1
+  %10 = call i32 @_Z3barv()
+  store i32 %10, ptr %2, align 4
+  br label %13
+
+11:                                               ; preds = %1
+  %12 = call i32 @_Z3bazv()
+  store i32 %12, ptr %2, align 4
+  br label %13
+
+13:                                               ; preds = %11, %9
+  %14 = load i32, ptr %2, align 4
+  ret i32 %14
+}
+
+declare i32 @_Z3barv() #1
+declare i32 @_Z3bazv() #1
+
+
+; CHECK: _Z3foob
+; CHECK: llvm_prefetch_target

>From da19d56d10952aed3f40f921a897eb1acb1914c7 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 00:10:17 +0000
Subject: [PATCH 05/18] Fix everything

---
 .../CodeGen/BasicBlockSectionsProfileReader.h |  4 +-
 llvm/include/llvm/CodeGen/MachineBasicBlock.h | 24 +----
 llvm/include/llvm/CodeGen/Passes.h            |  2 +
 llvm/include/llvm/InitializePasses.h          |  1 +
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    | 73 ++++----------
 llvm/lib/CodeGen/CMakeLists.txt               |  1 +
 llvm/lib/CodeGen/InsertCodePrefetch.cpp       | 96 +++++++++++++++++++
 llvm/lib/CodeGen/MachineBasicBlock.cpp        | 13 ---
 llvm/lib/CodeGen/TargetPassConfig.cpp         |  1 +
 9 files changed, 127 insertions(+), 88 deletions(-)
 create mode 100644 llvm/lib/CodeGen/InsertCodePrefetch.cpp

diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index fbf9b89754cd7..1fd904d64ab9d 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -102,7 +102,7 @@ class BasicBlockSectionsProfileReader {
   SmallVector<PrefetchHint>
   getPrefetchHintsForFunction(StringRef FuncName) const;
 
-  DenseMap<UniqueBBID, SmallVector<unsigned>> getPrefetchTargetsForFunction(StringRef FuncName) const;
+  SmallVector<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
 
 private:
   StringRef getAliasName(StringRef FuncName) const {
@@ -215,7 +215,7 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
   SmallVector<PrefetchHint>
   getPrefetchHintsForFunction(StringRef FuncName) const;
 
-  DenseMap<UniqueBBID, SmallVector<unsigned>> getPrefetchTargetsForFunction(StringRef FuncName) const;
+  SmallVector<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
 
   // Initializes the FunctionNameToDIFilename map for the current module and
   // then reads the profile for the matching functions.
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index e6c6bc26ae9e6..4be008bbf4bf1 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -100,12 +100,6 @@ template <> struct DenseMapInfo<MBBSectionID> {
   }
 };
 
-struct PrefetchTarget {
-  StringRef TargetFunction;
-  UniqueBBID TargetBBID;
-  unsigned TargetBBOffset;
-};
-
 template <> struct ilist_traits<MachineInstr> {
 private:
   friend class MachineBasicBlock; // Set by the owning MachineBasicBlock.
@@ -219,8 +213,6 @@ class MachineBasicBlock
   /// basic block sections and basic block labels.
   std::optional<UniqueBBID> BBID;
 
-  SmallVector<unsigned> PrefetchTargets;
-
   /// With basic block sections, this stores the Section ID of the basic block.
   MBBSectionID SectionID{0};
 
@@ -237,7 +229,7 @@ class MachineBasicBlock
   /// is only computed once and is cached.
   mutable MCSymbol *CachedMCSymbol = nullptr;
 
-  mutable SmallVector<MCSymbol *, 4> CallInstSymbols;
+  SmallVector<unsigned> PrefetchTargetIndexes;
 
   /// Cached MCSymbol for this block (used if IsEHContTarget).
   mutable MCSymbol *CachedEHContMCSymbol = nullptr;
@@ -720,12 +712,12 @@ class MachineBasicBlock
 
   std::optional<UniqueBBID> getBBID() const { return BBID; }
 
-  const SmallVector<unsigned> &getPrefetchTargets() const {
-    return PrefetchTargets;
+  const SmallVector<unsigned> &getPrefetchTargetIndexes() const {
+    return PrefetchTargetIndexes;
   }
 
-  void setPrefetchTargets(const SmallVector<unsigned> &V) {
-    PrefetchTargets = V;
+  void setPrefetchTargetIndexes(const SmallVector<unsigned> &V) {
+    PrefetchTargetIndexes = V;
   }
 
   /// Returns the section ID of this basic block.
@@ -1293,12 +1285,6 @@ class MachineBasicBlock
   /// Return the MCSymbol for this basic block.
   LLVM_ABI MCSymbol *getSymbol() const;
 
-  MCSymbol *getCallInstSymbol(unsigned CallInstNumber) const;
-
-  const SmallVector<MCSymbol *, 4>& getCallInstSymbols() const {
-    return CallInstSymbols;
-  }
-
   /// Return the Windows EH Continuation Symbol for this basic block.
   LLVM_ABI MCSymbol *getEHContSymbol() const;
 
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index a8525554b142e..f148d050a5772 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -69,6 +69,8 @@ LLVM_ABI MachineFunctionPass *createBasicBlockSectionsPass();
 
 LLVM_ABI MachineFunctionPass *createBasicBlockPathCloningPass();
 
+LLVM_ABI MachineFunctionPass *createInsertCodePrefetchPass();
+
 /// createMachineBlockHashInfoPass - This pass computes basic block hashes.
 LLVM_ABI MachineFunctionPass *createMachineBlockHashInfoPass();
 
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 10a4d8525a9e8..35d5ab14dc226 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -56,6 +56,7 @@ LLVM_ABI void initializeAssignmentTrackingAnalysisPass(PassRegistry &);
 LLVM_ABI void initializeAssumptionCacheTrackerPass(PassRegistry &);
 LLVM_ABI void initializeAtomicExpandLegacyPass(PassRegistry &);
 LLVM_ABI void initializeBasicBlockPathCloningPass(PassRegistry &);
+LLVM_ABI void initializeInsertCodePrefetchPass(PassRegistry &);
 LLVM_ABI void
 initializeBasicBlockSectionsProfileReaderWrapperPassPass(PassRegistry &);
 LLVM_ABI void initializeBasicBlockSectionsPass(PassRegistry &);
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 90445fedd5db3..933fe6f7d177f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -179,11 +179,6 @@ static cl::opt<bool> EmitJumpTableSizesSection(
     cl::desc("Emit a section containing jump table addresses and sizes"),
     cl::Hidden, cl::init(false));
 
-static cl::opt<bool> InsertNoopsForPrefetch(
-    "insert-noops-for-prefetch",
-    cl::desc("Whether to insert noops instead of prefetches."), cl::init(false),
-    cl::Hidden);
-
 // This isn't turned on by default, since several of the scheduling models are
 // not completely accurate, and we don't want to be misleading.
 static cl::opt<bool> PrintLatency(
@@ -485,7 +480,6 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
   if (EmitBBHash)
     AU.addRequired<MachineBlockHashInfo>();
-  AU.addUsedIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
 }
 
 bool AsmPrinter::doInitialization(Module &M) {
@@ -1988,44 +1982,29 @@ void AsmPrinter::emitFunctionBody() {
 
   FunctionCallGraphInfo FuncCGInfo;
   const auto &CallSitesInfoMap = MF->getCallSitesInfo();
-  DenseMap<UniqueBBID, SmallVector<unsigned>> FunctionPrefetchTargets;
-  if (auto *BBSPRPass =
-          getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>()) {
-    FunctionPrefetchTargets = BBSPRPass->getBBSPR().getPrefetchTargetsForFunction(MF->getName());
-}
 
    for (auto &MBB : *MF) {
-
-    SmallVector<unsigned> BBPrefetchTargets;
-    = FunctionPrefetchTargets.lookup(MBB.g);
-    int NextPrefetchTargetIndex = MBB.getPrefetchTargets().empty() ? -1 : 0;
     // Print a label for the basic block.
     emitBasicBlockStart(MBB);
     DenseMap<StringRef, unsigned> MnemonicCounts;
-    unsigned NumCallsInBlock = 0;
-    for (auto &MI : MBB) {
-      if (NextPrefetchTargetIndex != -1 &&
-          NumCallsInBlock >=  MBB.getPrefetchTargets()[NextPrefetchTargetIndex]) {
 
-        MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
+    SmallVector<unsigned> PrefetchTargets = MBB.getPrefetchTargetIndexes();
+    auto PrefetchTargetIt = PrefetchTargets.begin();
+    unsigned NumCalls = 0;
+    auto EmitPrefetchTargetSymbolIfNeeded = [&]() {
+      if (PrefetchTargetIt == PrefetchTargets.end() || NumCalls < *PrefetchTargetIt)
+        return;
+      MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
             Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) +
             Twine("_") +
-            utostr(MBB.getPrefetchTargets()[NextPrefetchTargetIndex]));
-        if (MF->getFunction().isWeakForLinker()) {
-          OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Weak);
-          errs() << "Emitting weak symbol: " << PrefetchTargetSymbol->getName() << "\n";
-        } else {
-          OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Global);
-          errs() << "Emitting global symbol: " << PrefetchTargetSymbol->getName() << "\n";
-        }
-        // OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Extern);
-       // errs() << "Emitting symbol: " << PrefetchTargetSymbol->getName() << "\n";
+            utostr(*PrefetchTargetIt));
+          OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MF->getFunction().isWeakForLinker() ? MCSA_Weak : MCSA_Global);
         OutStreamer->emitLabel(PrefetchTargetSymbol);
-        ++NextPrefetchTargetIndex;
-        if (NextPrefetchTargetIndex >=
-            static_cast<int>(MBB.getPrefetchTargets().size()))
-          NextPrefetchTargetIndex = -1;
-      }
+        ++PrefetchTargetIt;
+    };
+
+    for (auto &MI : MBB) {
+      EmitPrefetchTargetSymbolIfNeeded();
       // Print the assembly for the instruction.
       if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() &&
           !MI.isDebugInstr()) {
@@ -2163,8 +2142,11 @@ void AsmPrinter::emitFunctionBody() {
         break;
       }
 
-      if (MI.isCall() && MF->getTarget().Options.BBAddrMap)
+      if (MI.isCall()) {
+        if (MF->getTarget().Options.BBAddrMap)
         OutStreamer->emitLabel(createCallsiteEndSymbol(MBB));
+        ++NumCalls;
+      }
 
       if (TM.Options.EmitCallGraphSection && MI.isCall())
         handleCallsiteForCallgraph(FuncCGInfo, CallSitesInfoMap, MI);
@@ -2176,24 +2158,7 @@ void AsmPrinter::emitFunctionBody() {
       for (auto &Handler : Handlers)
         Handler->endInstruction();
     }
-   while (NextPrefetchTargetIndex != -1) {
-        MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
-            Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) +
-            Twine("_") +
-            utostr(MBB.getPrefetchTargets()[NextPrefetchTargetIndex]));
-        if (MF->getFunction().hasWeakLinkage()) {
-          OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_WeakDefinition);
-        } else {
-          OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Global);
-        }
-        OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MCSA_Extern);
-        OutStreamer->emitLabel(PrefetchTargetSymbol);
-        ++NextPrefetchTargetIndex;
-        if (NextPrefetchTargetIndex >=
-            static_cast<int>(MBB.getPrefetchTargets().size()))
-          NextPrefetchTargetIndex = -1;
-      }
-
+    EmitPrefetchTargetSymbolIfNeeded();
 
     // We must emit temporary symbol for the end of this basic block, if either
     // we have BBLabels enabled or if this basic blocks marks the end of a
diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index 1cf0b4964760b..fcf28247179ca 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -79,6 +79,7 @@ add_llvm_component_library(LLVMCodeGen
   IndirectBrExpandPass.cpp
   InitUndef.cpp
   InlineSpiller.cpp
+  InsertCodePrefetch.cpp
   InterferenceCache.cpp
   InterleavedAccessPass.cpp
   InterleavedLoadCombinePass.cpp
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
new file mode 100644
index 0000000000000..7cb52302ac7db
--- /dev/null
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -0,0 +1,96 @@
+//===-- InsertCodePrefetch.cpp ---=========-----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Prefetch insertion pass implementation.
+//===----------------------------------------------------------------------===//
+/// Prefetch insertion pass.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/BasicBlockSectionUtils.h"
+#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+#define DEBUG_TYPE "prefetchinsertion"
+
+namespace {
+class InsertCodePrefetch : public MachineFunctionPass {
+public:
+  static char ID;
+
+  InsertCodePrefetch() : MachineFunctionPass(ID) {
+    initializeInsertCodePrefetchPass(*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override { return "X86 Cide Prefetch Inserter Pass"; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  /// Identify basic blocks that need separate sections and prepare to emit them
+  /// accordingly.
+  bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+//            Implementation
+//===----------------------------------------------------------------------===//
+
+char InsertCodePrefetch::ID = 0;
+INITIALIZE_PASS_BEGIN(
+    InsertCodePrefetch, DEBUG_TYPE,
+    "Reads prefetch", true,
+    false)
+INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass)
+INITIALIZE_PASS_END(
+    InsertCodePrefetch, DEBUG_TYPE,
+    "Reads prefetch", true,
+    false)
+
+bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
+  assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List &&
+         "BB Sections list not enabled!");
+  if (hasInstrProfHashMismatch(MF))
+    return false;
+  SmallVector<BBPosition> PrefetchTargets =
+      getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
+          .getPrefetchTargetsForFunction(MF.getName());
+  DenseMap<UniqueBBID, SmallVector<unsigned>> PrefetchTargetsByBBID;
+  for (const auto &Target: PrefetchTargets)
+    PrefetchTargetsByBBID[Target.BBID].push_back(Target.CallsiteIndex);
+  for (auto &MBB: MF) {
+    auto R = PrefetchTargetsByBBID.find(*MBB.getBBID());
+    if (R == PrefetchTargetsByBBID.end()) continue;
+    MBB.setPrefetchTargetIndexes(R->second);
+  }
+
+  return false;
+}
+
+void InsertCodePrefetch::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<BasicBlockSectionsProfileReaderWrapperPass>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachineFunctionPass *llvm::createInsertCodePrefetchPass() {
+  return new InsertCodePrefetch();
+}
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 19b218a2879dd..ba0b025167307 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -90,19 +90,6 @@ MCSymbol *MachineBasicBlock::getSymbol() const {
   return CachedMCSymbol;
 }
 
-MCSymbol *MachineBasicBlock::getCallInstSymbol(unsigned CallInstNumber) const {
-  if (CallInstSymbols.size() <= CallInstNumber) {
-    const MachineFunction *MF = getParent();
-    MCContext &Ctx = MF->getContext();
-    CallInstSymbols.resize(CallInstNumber + 1);
-    CallInstSymbols[CallInstNumber] = Ctx.createBlockSymbol(
-        "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber()) + "_" +
-            Twine(CallInstNumber),
-        /*AlwaysEmit=*/true);
-  }
-  return CallInstSymbols[CallInstNumber];
-}
-
 MCSymbol *MachineBasicBlock::getEHContSymbol() const {
   if (!CachedEHContMCSymbol) {
     const MachineFunction *MF = getParent();
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index ceae0d29eea90..5334c5596d018 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -1291,6 +1291,7 @@ void TargetPassConfig::addMachinePasses() {
       addPass(llvm::createBasicBlockSectionsProfileReaderWrapperPass(
           TM->getBBSectionsFuncListBuf()));
       addPass(llvm::createBasicBlockPathCloningPass());
+      addPass(llvm::createInsertCodePrefetchPass());
     }
     addPass(llvm::createBasicBlockSectionsPass());
   }

>From 7a3e96795c2c1830e64ed87ba9ffbf2195d37ff6 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 00:10:36 +0000
Subject: [PATCH 06/18] clang-format.

---
 .../CodeGen/BasicBlockSectionsProfileReader.h |  6 +++--
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    | 25 +++++++++++--------
 .../BasicBlockSectionsProfileReader.cpp       |  3 ++-
 llvm/lib/CodeGen/InsertCodePrefetch.cpp       | 23 ++++++++---------
 4 files changed, 31 insertions(+), 26 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 1fd904d64ab9d..2b8ee578cd917 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -102,7 +102,8 @@ class BasicBlockSectionsProfileReader {
   SmallVector<PrefetchHint>
   getPrefetchHintsForFunction(StringRef FuncName) const;
 
-  SmallVector<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
+  SmallVector<BBPosition>
+  getPrefetchTargetsForFunction(StringRef FuncName) const;
 
 private:
   StringRef getAliasName(StringRef FuncName) const {
@@ -215,7 +216,8 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
   SmallVector<PrefetchHint>
   getPrefetchHintsForFunction(StringRef FuncName) const;
 
-  SmallVector<BBPosition> getPrefetchTargetsForFunction(StringRef FuncName) const;
+  SmallVector<BBPosition>
+  getPrefetchTargetsForFunction(StringRef FuncName) const;
 
   // Initializes the FunctionNameToDIFilename map for the current module and
   // then reads the profile for the matching functions.
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 933fe6f7d177f..4368cd4d256c9 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -18,7 +18,6 @@
 #include "WasmException.h"
 #include "WinCFGuard.h"
 #include "WinException.h"
-#include "llvm/Support/SMLoc.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/BitmaskEnum.h"
@@ -120,6 +119,7 @@
 #include "llvm/Support/Format.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/SMLoc.h"
 #include "llvm/Support/VCSRevision.h"
 #include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/Support/raw_ostream.h"
@@ -1983,7 +1983,7 @@ void AsmPrinter::emitFunctionBody() {
   FunctionCallGraphInfo FuncCGInfo;
   const auto &CallSitesInfoMap = MF->getCallSitesInfo();
 
-   for (auto &MBB : *MF) {
+  for (auto &MBB : *MF) {
     // Print a label for the basic block.
     emitBasicBlockStart(MBB);
     DenseMap<StringRef, unsigned> MnemonicCounts;
@@ -1992,15 +1992,18 @@ void AsmPrinter::emitFunctionBody() {
     auto PrefetchTargetIt = PrefetchTargets.begin();
     unsigned NumCalls = 0;
     auto EmitPrefetchTargetSymbolIfNeeded = [&]() {
-      if (PrefetchTargetIt == PrefetchTargets.end() || NumCalls < *PrefetchTargetIt)
+      if (PrefetchTargetIt == PrefetchTargets.end() ||
+          NumCalls < *PrefetchTargetIt)
         return;
       MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
-            Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") + utostr(MBB.getBBID()->BaseID) +
-            Twine("_") +
-            utostr(*PrefetchTargetIt));
-          OutStreamer->emitSymbolAttribute(PrefetchTargetSymbol, MF->getFunction().isWeakForLinker() ? MCSA_Weak : MCSA_Global);
-        OutStreamer->emitLabel(PrefetchTargetSymbol);
-        ++PrefetchTargetIt;
+          Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") +
+          utostr(MBB.getBBID()->BaseID) + Twine("_") +
+          utostr(*PrefetchTargetIt));
+      OutStreamer->emitSymbolAttribute(
+          PrefetchTargetSymbol,
+          MF->getFunction().isWeakForLinker() ? MCSA_Weak : MCSA_Global);
+      OutStreamer->emitLabel(PrefetchTargetSymbol);
+      ++PrefetchTargetIt;
     };
 
     for (auto &MI : MBB) {
@@ -2118,7 +2121,7 @@ void AsmPrinter::emitFunctionBody() {
         break;
       }
       default:
-         emitInstruction(&MI);
+        emitInstruction(&MI);
 
         auto CountInstruction = [&](const MachineInstr &MI) {
           // Skip Meta instructions inside bundles.
@@ -2144,7 +2147,7 @@ void AsmPrinter::emitFunctionBody() {
 
       if (MI.isCall()) {
         if (MF->getTarget().Options.BBAddrMap)
-        OutStreamer->emitLabel(createCallsiteEndSymbol(MBB));
+          OutStreamer->emitLabel(createCallsiteEndSymbol(MBB));
         ++NumCalls;
       }
 
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index c4784a6039c09..9b54dd6803cf6 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -337,7 +337,8 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
       if (getAsUnsignedInteger(PrefetchTargetStr[1], 10, TargetCallsiteIndex))
         return createProfileParseError(Twine("unsigned integer expected: '") +
                                        PrefetchTargetStr[1]);
-      FI->second.PrefetchTargets.push_back(BBPosition{*TargetBBID, static_cast<unsigned>(TargetCallsiteIndex)});
+      FI->second.PrefetchTargets.push_back(
+          BBPosition{*TargetBBID, static_cast<unsigned>(TargetCallsiteIndex)});
       continue;
     }
     default:
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 7cb52302ac7db..91cb6e599215d 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -39,7 +39,9 @@ class InsertCodePrefetch : public MachineFunctionPass {
     initializeInsertCodePrefetchPass(*PassRegistry::getPassRegistry());
   }
 
-  StringRef getPassName() const override { return "X86 Cide Prefetch Inserter Pass"; }
+  StringRef getPassName() const override {
+    return "X86 Cide Prefetch Inserter Pass";
+  }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 
@@ -55,15 +57,11 @@ class InsertCodePrefetch : public MachineFunctionPass {
 //===----------------------------------------------------------------------===//
 
 char InsertCodePrefetch::ID = 0;
-INITIALIZE_PASS_BEGIN(
-    InsertCodePrefetch, DEBUG_TYPE,
-    "Reads prefetch", true,
-    false)
+INITIALIZE_PASS_BEGIN(InsertCodePrefetch, DEBUG_TYPE, "Reads prefetch", true,
+                      false)
 INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass)
-INITIALIZE_PASS_END(
-    InsertCodePrefetch, DEBUG_TYPE,
-    "Reads prefetch", true,
-    false)
+INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Reads prefetch", true,
+                    false)
 
 bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
   assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List &&
@@ -74,11 +72,12 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
       getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
           .getPrefetchTargetsForFunction(MF.getName());
   DenseMap<UniqueBBID, SmallVector<unsigned>> PrefetchTargetsByBBID;
-  for (const auto &Target: PrefetchTargets)
+  for (const auto &Target : PrefetchTargets)
     PrefetchTargetsByBBID[Target.BBID].push_back(Target.CallsiteIndex);
-  for (auto &MBB: MF) {
+  for (auto &MBB : MF) {
     auto R = PrefetchTargetsByBBID.find(*MBB.getBBID());
-    if (R == PrefetchTargetsByBBID.end()) continue;
+    if (R == PrefetchTargetsByBBID.end())
+      continue;
     MBB.setPrefetchTargetIndexes(R->second);
   }
 

>From 10a165ff7ce1e15ce8a7b34eca43ef9f438ebfb4 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 03:46:13 +0000
Subject: [PATCH 07/18] Fix the prefetch test.

---
 llvm/test/CodeGen/X86/prefetch-symbols.ll | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/llvm/test/CodeGen/X86/prefetch-symbols.ll b/llvm/test/CodeGen/X86/prefetch-symbols.ll
index 979db7942ff2c..3eb91dfdabd27 100644
--- a/llvm/test/CodeGen/X86/prefetch-symbols.ll
+++ b/llvm/test/CodeGen/X86/prefetch-symbols.ll
@@ -1,12 +1,14 @@
-;; Check that specifying the function in the basic block sections profile
-;; without any other directives is a noop.
+;; Check prefetch directives in basic block section profiles.
 ;;
 ;; Specify the bb sections profile:
 ; RUN: echo 'v1' > %t
 ; RUN: echo 'f _Z3foob' >> %t
 ; RUN: echo 't 0 at 0' >> %t
+; RUN: echo 't 1 at 0' >> %t
+; RUN: echo 't 1 at 1' >> %t
+; RUN: echo 't 2 at 1' >> %t
 ;;
-; RUN: llc < %s -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t  | FileCheck
+; RUN: llc < %s -mtriple=x86_64-pc-linux -asm-verbose=false -function-sections -basic-block-sections=%t  | FileCheck %s
 
 define i32 @_Z3foob(i1 zeroext %0) nounwind {
   %2 = alloca i32, align 4
@@ -18,16 +20,27 @@ define i32 @_Z3foob(i1 zeroext %0) nounwind {
   %7 = zext i1 %6 to i32
   %8 = icmp sgt i32 %7, 0
   br i1 %8, label %9, label %11
+; CHECK:      _Z3foob:
+; CHECK-NEXT:   .globl __llvm_prefetch_target__Z3foob_0_0
+; CHECK-NEXT: __llvm_prefetch_target__Z3foob_0_0:
 
 9:                                                ; preds = %1
   %10 = call i32 @_Z3barv()
   store i32 %10, ptr %2, align 4
   br label %13
+; CHECK:        .globl __llvm_prefetch_target__Z3foob_1_0
+; CHECK-NEXT: __llvm_prefetch_target__Z3foob_1_0:
+; CHECK-NEXT:   callq _Z3barv at PLT
+; CHECK-NEXT:   .globl __llvm_prefetch_target__Z3foob_1_1
+; CHECK-NEXT: __llvm_prefetch_target__Z3foob_1_1:
 
 11:                                               ; preds = %1
   %12 = call i32 @_Z3bazv()
   store i32 %12, ptr %2, align 4
   br label %13
+; CHECK:        callq _Z3bazv at PLT
+; CHECK-NEXT:   .globl __llvm_prefetch_target__Z3foob_2_1
+; CHECK-NEXT: __llvm_prefetch_target__Z3foob_2_1:
 
 13:                                               ; preds = %11, %9
   %14 = load i32, ptr %2, align 4
@@ -36,7 +49,3 @@ define i32 @_Z3foob(i1 zeroext %0) nounwind {
 
 declare i32 @_Z3barv() #1
 declare i32 @_Z3bazv() #1
-
-
-; CHECK: _Z3foob
-; CHECK: llvm_prefetch_target

>From bf344122a3a6a26f8ef68b0173f20a841c3bec3f Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 03:48:53 +0000
Subject: [PATCH 08/18] Rename the test.

---
 ...{prefetch-symbols.ll => basic-block-sections-code-prefetch.ll} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename llvm/test/CodeGen/X86/{prefetch-symbols.ll => basic-block-sections-code-prefetch.ll} (100%)

diff --git a/llvm/test/CodeGen/X86/prefetch-symbols.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
similarity index 100%
rename from llvm/test/CodeGen/X86/prefetch-symbols.ll
rename to llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll

>From fbf0f84a50b7d8f0c43364dfb3d29ec9a16f871a Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 06:59:43 +0000
Subject: [PATCH 09/18] Remove unrelated changes.

---
 .../llvm/CodeGen/BasicBlockSectionsProfileReader.h   | 12 ------------
 llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 12 ------------
 2 files changed, 24 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 2b8ee578cd917..801588509d340 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -47,12 +47,6 @@ struct BBPosition {
   unsigned CallsiteIndex;
 };
 
-struct PrefetchHint {
-  BBPosition SitePosition;
-  StringRef TargetFunctionName;
-  BBPosition TargetPosition;
-};
-
 // This represents the raw input profile for one function.
 struct FunctionPathAndClusterInfo {
   // BB Cluster information specified by `UniqueBBID`s.
@@ -61,7 +55,6 @@ struct FunctionPathAndClusterInfo {
   // the edge a -> b (a is not cloned). The index of the path in this vector
   // determines the `UniqueBBID::CloneID` of the cloned blocks in that path.
   SmallVector<SmallVector<unsigned>> ClonePaths;
-  SmallVector<PrefetchHint> PrefetchHints;
   SmallVector<BBPosition> PrefetchTargets;
   // Node counts for each basic block.
   DenseMap<UniqueBBID, uint64_t> NodeCounts;
@@ -99,9 +92,6 @@ class BasicBlockSectionsProfileReader {
   uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
                         const UniqueBBID &SinkBBID) const;
 
-  SmallVector<PrefetchHint>
-  getPrefetchHintsForFunction(StringRef FuncName) const;
-
   SmallVector<BBPosition>
   getPrefetchTargetsForFunction(StringRef FuncName) const;
 
@@ -213,8 +203,6 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
 
   uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
                         const UniqueBBID &DestBBID) const;
-  SmallVector<PrefetchHint>
-  getPrefetchHintsForFunction(StringRef FuncName) const;
 
   SmallVector<BBPosition>
   getPrefetchTargetsForFunction(StringRef FuncName) const;
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 9b54dd6803cf6..5b12c85f7eeef 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -93,12 +93,6 @@ uint64_t BasicBlockSectionsProfileReader::getEdgeCount(
   return EdgeIt->second;
 }
 
-SmallVector<PrefetchHint>
-BasicBlockSectionsProfileReader::getPrefetchHintsForFunction(
-    StringRef FuncName) const {
-  return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName)).PrefetchHints;
-}
-
 SmallVector<BBPosition>
 BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction(
     StringRef FuncName) const {
@@ -547,12 +541,6 @@ uint64_t BasicBlockSectionsProfileReaderWrapperPass::getEdgeCount(
   return BBSPR.getEdgeCount(FuncName, SrcBBID, SinkBBID);
 }
 
-SmallVector<PrefetchHint>
-BasicBlockSectionsProfileReaderWrapperPass::getPrefetchHintsForFunction(
-    StringRef FuncName) const {
-  return BBSPR.getPrefetchHintsForFunction(FuncName);
-}
-
 SmallVector<BBPosition>
 BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction(
     StringRef FuncName) const {

>From 8eef602d8fa4c4498ef25a7a325a379337065977 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 18:58:09 +0000
Subject: [PATCH 10/18] Add some comments.

---
 .../CodeGen/BasicBlockSectionsProfileReader.h | 19 ++++++++++----
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    |  2 --
 llvm/lib/CodeGen/InsertCodePrefetch.cpp       | 26 +++++++++----------
 3 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 801588509d340..784bf8dd8f2a9 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -42,9 +42,14 @@ struct BBClusterInfo {
   unsigned PositionInCluster;
 };
 
-struct BBPosition {
+// Assuming a block is split into subblocks across its callsites, this struct
+// uniquely identifies the subblock in block `BBID` which starts from right after call number `SubblockIndex` (or
+// the beginning of the block if `SubblockIndex` is zero) to the call number
+// `SubblockIndex+1` (or the end of the block if there are are `SubblockIndex`
+// calls in the basic block).
+struct SubblockID {
   UniqueBBID BBID;
-  unsigned CallsiteIndex;
+  unsigned SubblockIndex;
 };
 
 // This represents the raw input profile for one function.
@@ -55,7 +60,9 @@ struct FunctionPathAndClusterInfo {
   // the edge a -> b (a is not cloned). The index of the path in this vector
   // determines the `UniqueBBID::CloneID` of the cloned blocks in that path.
   SmallVector<SmallVector<unsigned>> ClonePaths;
-  SmallVector<BBPosition> PrefetchTargets;
+  // Code prefetch targets, specified by the subblock ID of which beginning must
+  // be targetted for prefetching.
+  SmallVector<SubblockID> PrefetchTargets;
   // Node counts for each basic block.
   DenseMap<UniqueBBID, uint64_t> NodeCounts;
   // Edge counts for each edge.
@@ -92,7 +99,9 @@ class BasicBlockSectionsProfileReader {
   uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
                         const UniqueBBID &SinkBBID) const;
 
-  SmallVector<BBPosition>
+  // Returns the prefetch targets (identified by their containing subblocks) for
+  // function `FuncName`.
+  SmallVector<SubblockID>
   getPrefetchTargetsForFunction(StringRef FuncName) const;
 
 private:
@@ -204,7 +213,7 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
   uint64_t getEdgeCount(StringRef FuncName, const UniqueBBID &SrcBBID,
                         const UniqueBBID &DestBBID) const;
 
-  SmallVector<BBPosition>
+  SmallVector<SubblockID>
   getPrefetchTargetsForFunction(StringRef FuncName) const;
 
   // Initializes the FunctionNameToDIFilename map for the current module and
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 4368cd4d256c9..2d6f6687fe456 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -119,7 +119,6 @@
 #include "llvm/Support/Format.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Path.h"
-#include "llvm/Support/SMLoc.h"
 #include "llvm/Support/VCSRevision.h"
 #include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/Support/raw_ostream.h"
@@ -1982,7 +1981,6 @@ void AsmPrinter::emitFunctionBody() {
 
   FunctionCallGraphInfo FuncCGInfo;
   const auto &CallSitesInfoMap = MF->getCallSitesInfo();
-
   for (auto &MBB : *MF) {
     // Print a label for the basic block.
     emitBasicBlockStart(MBB);
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 91cb6e599215d..df3d63098390b 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -1,4 +1,4 @@
-//===-- InsertCodePrefetch.cpp ---=========-----------------------------===//
+//===-- InsertCodePrefetch.cpp ---=========--------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -7,9 +7,14 @@
 //===----------------------------------------------------------------------===//
 //
 /// \file
-/// Prefetch insertion pass implementation.
+/// Code Prefetch Insertion Pass.
 //===----------------------------------------------------------------------===//
-/// Prefetch insertion pass.
+/// This pass inserts code prefetch instructions according to the prefetch
+/// directives in the basic block section profile. The target of a prefetch can
+/// be the beginning of any dynamic basic block, that is the beginning of a
+/// machine basic block, or immediately after a callsite. A global symbol will
+/// be emitted at the position of the target so it can be addressed from the
+/// prefetch instruction.
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/SmallVector.h"
@@ -20,15 +25,11 @@
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/InitializePasses.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
-#define DEBUG_TYPE "prefetchinsertion"
+#define DEBUG_TYPE "insert-code-prefetch"
 
 namespace {
 class InsertCodePrefetch : public MachineFunctionPass {
@@ -40,13 +41,12 @@ class InsertCodePrefetch : public MachineFunctionPass {
   }
 
   StringRef getPassName() const override {
-    return "X86 Cide Prefetch Inserter Pass";
+    return "Code Prefetch Inserter Pass";
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 
-  /// Identify basic blocks that need separate sections and prepare to emit them
-  /// accordingly.
+  // Sets prefetch targets based on the bb section profile.
   bool runOnMachineFunction(MachineFunction &MF) override;
 };
 
@@ -57,10 +57,10 @@ class InsertCodePrefetch : public MachineFunctionPass {
 //===----------------------------------------------------------------------===//
 
 char InsertCodePrefetch::ID = 0;
-INITIALIZE_PASS_BEGIN(InsertCodePrefetch, DEBUG_TYPE, "Reads prefetch", true,
+INITIALIZE_PASS_BEGIN(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion", true,
                       false)
 INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass)
-INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Reads prefetch", true,
+INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion", true,
                     false)
 
 bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {

>From c6b1fa075fb066b38e16b060e896a42aeb5a1245 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 18:58:22 +0000
Subject: [PATCH 11/18] clang-format.

---
 .../llvm/CodeGen/BasicBlockSectionsProfileReader.h        | 8 ++++----
 llvm/lib/CodeGen/InsertCodePrefetch.cpp                   | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 784bf8dd8f2a9..88f3e8b620bce 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -43,10 +43,10 @@ struct BBClusterInfo {
 };
 
 // Assuming a block is split into subblocks across its callsites, this struct
-// uniquely identifies the subblock in block `BBID` which starts from right after call number `SubblockIndex` (or
-// the beginning of the block if `SubblockIndex` is zero) to the call number
-// `SubblockIndex+1` (or the end of the block if there are are `SubblockIndex`
-// calls in the basic block).
+// uniquely identifies the subblock in block `BBID` which starts from right
+// after call number `SubblockIndex` (or the beginning of the block if
+// `SubblockIndex` is zero) to the call number `SubblockIndex+1` (or the end of
+// the block if there are are `SubblockIndex` calls in the basic block).
 struct SubblockID {
   UniqueBBID BBID;
   unsigned SubblockIndex;
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index df3d63098390b..29afb46a317a8 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -57,11 +57,11 @@ class InsertCodePrefetch : public MachineFunctionPass {
 //===----------------------------------------------------------------------===//
 
 char InsertCodePrefetch::ID = 0;
-INITIALIZE_PASS_BEGIN(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion", true,
-                      false)
+INITIALIZE_PASS_BEGIN(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion",
+                      true, false)
 INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass)
-INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion", true,
-                    false)
+INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion",
+                    true, false)
 
 bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
   assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List &&

>From e78fae53cea5066319ce1da32531b252c7f42726 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 19:16:54 +0000
Subject: [PATCH 12/18] Add comments and rename functions.

---
 .../CodeGen/BasicBlockSectionsProfileReader.h     |  2 +-
 llvm/include/llvm/CodeGen/MachineBasicBlock.h     | 15 ++++++++++-----
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp        | 13 +++++++++----
 llvm/lib/CodeGen/InsertCodePrefetch.cpp           | 11 ++++++-----
 4 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 88f3e8b620bce..c2bc7559b9fb4 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -46,7 +46,7 @@ struct BBClusterInfo {
 // uniquely identifies the subblock in block `BBID` which starts from right
 // after call number `SubblockIndex` (or the beginning of the block if
 // `SubblockIndex` is zero) to the call number `SubblockIndex+1` (or the end of
-// the block if there are are `SubblockIndex` calls in the basic block).
+// the block if `SubblockIndex` is the last call in the block).
 struct SubblockID {
   UniqueBBID BBID;
   unsigned SubblockIndex;
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 4be008bbf4bf1..20427954d22e4 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -229,7 +229,12 @@ class MachineBasicBlock
   /// is only computed once and is cached.
   mutable MCSymbol *CachedMCSymbol = nullptr;
 
-  SmallVector<unsigned> PrefetchTargetIndexes;
+  /// Contains the subblock indices in this block that are targets of code prefetching.
+  /// The subblock indexed `i` specifies that region after the `i`th call (or the
+  /// beginning of the block if `i==0`) to before the`i+1`th callsite (or the
+  /// end of the block). The prefetch target is always the beginning of the
+  /// subblock.
+  SmallVector<unsigned> PrefetchTargetSubblockIndexes;
 
   /// Cached MCSymbol for this block (used if IsEHContTarget).
   mutable MCSymbol *CachedEHContMCSymbol = nullptr;
@@ -712,12 +717,12 @@ class MachineBasicBlock
 
   std::optional<UniqueBBID> getBBID() const { return BBID; }
 
-  const SmallVector<unsigned> &getPrefetchTargetIndexes() const {
-    return PrefetchTargetIndexes;
+  const SmallVector<unsigned> &getPrefetchTargetSubblockIndexes() const {
+    return PrefetchTargetSubblockIndexes;
   }
 
-  void setPrefetchTargetIndexes(const SmallVector<unsigned> &V) {
-    PrefetchTargetIndexes = V;
+  void setPrefetchTargetSubblockIndexes(const SmallVector<unsigned> &V) {
+    PrefetchTargetSubblockIndexes = V;
   }
 
   /// Returns the section ID of this basic block.
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 2d6f6687fe456..72cf557d51e03 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1986,17 +1986,20 @@ void AsmPrinter::emitFunctionBody() {
     emitBasicBlockStart(MBB);
     DenseMap<StringRef, unsigned> MnemonicCounts;
 
-    SmallVector<unsigned> PrefetchTargets = MBB.getPrefetchTargetIndexes();
+    SmallVector<unsigned> PrefetchTargets = MBB.getPrefetchTargetSubblockIndexes();
     auto PrefetchTargetIt = PrefetchTargets.begin();
     unsigned NumCalls = 0;
+    // Helper to emit a symbol for the prefetch target and proceed to the next
+    // one.
     auto EmitPrefetchTargetSymbolIfNeeded = [&]() {
-      if (PrefetchTargetIt == PrefetchTargets.end() ||
-          NumCalls < *PrefetchTargetIt)
-        return;
+      if (PrefetchTargetIt == PrefetchTargets.end()) return;
+      if (NumCalls < *PrefetchTargetIt) return;
       MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
           Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") +
           utostr(MBB.getBBID()->BaseID) + Twine("_") +
           utostr(*PrefetchTargetIt));
+      // If the function is weak-linkage it may be replaced by a strong version,
+      // in which case the prefetch targets should also be replaced.
       OutStreamer->emitSymbolAttribute(
           PrefetchTargetSymbol,
           MF->getFunction().isWeakForLinker() ? MCSA_Weak : MCSA_Global);
@@ -2159,6 +2162,8 @@ void AsmPrinter::emitFunctionBody() {
       for (auto &Handler : Handlers)
         Handler->endInstruction();
     }
+    // If the block ends with a call, we may need to emit a prefetch target
+    // at the end.
     EmitPrefetchTargetSymbolIfNeeded();
 
     // We must emit temporary symbol for the end of this basic block, if either
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 29afb46a317a8..e241ccbbee263 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -12,9 +12,9 @@
 /// This pass inserts code prefetch instructions according to the prefetch
 /// directives in the basic block section profile. The target of a prefetch can
 /// be the beginning of any dynamic basic block, that is the beginning of a
-/// machine basic block, or immediately after a callsite. A global symbol will
-/// be emitted at the position of the target so it can be addressed from the
-/// prefetch instruction.
+/// machine basic block, or immediately after a callsite. A global symbol is
+/// emitted at the position of the target so it can be addressed from the
+/// prefetch instruction from any module.
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/SmallVector.h"
@@ -68,6 +68,8 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
          "BB Sections list not enabled!");
   if (hasInstrProfHashMismatch(MF))
     return false;
+  // Set each block's prefetch targets so AsmPrinter can emit a special symbol
+  // there.
   SmallVector<BBPosition> PrefetchTargets =
       getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
           .getPrefetchTargetsForFunction(MF.getName());
@@ -78,9 +80,8 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
     auto R = PrefetchTargetsByBBID.find(*MBB.getBBID());
     if (R == PrefetchTargetsByBBID.end())
       continue;
-    MBB.setPrefetchTargetIndexes(R->second);
+    MBB.setPrefetchTargetSubblockIndexes(R->second);
   }
-
   return false;
 }
 

>From ca67c4197caaee4cdf574cb347842ca47b938304 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 19:17:04 +0000
Subject: [PATCH 13/18] clang-format.

---
 llvm/include/llvm/CodeGen/MachineBasicBlock.h | 10 +++++-----
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    |  9 ++++++---
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 20427954d22e4..a13fcb2bb841d 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -229,11 +229,11 @@ class MachineBasicBlock
   /// is only computed once and is cached.
   mutable MCSymbol *CachedMCSymbol = nullptr;
 
-  /// Contains the subblock indices in this block that are targets of code prefetching.
-  /// The subblock indexed `i` specifies that region after the `i`th call (or the
-  /// beginning of the block if `i==0`) to before the`i+1`th callsite (or the
-  /// end of the block). The prefetch target is always the beginning of the
-  /// subblock.
+  /// Contains the subblock indices in this block that are targets of code
+  /// prefetching. The subblock indexed `i` specifies that region after the
+  /// `i`th call (or the beginning of the block if `i==0`) to before the`i+1`th
+  /// callsite (or the end of the block). The prefetch target is always the
+  /// beginning of the subblock.
   SmallVector<unsigned> PrefetchTargetSubblockIndexes;
 
   /// Cached MCSymbol for this block (used if IsEHContTarget).
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 72cf557d51e03..fb250c4b5308a 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1986,14 +1986,17 @@ void AsmPrinter::emitFunctionBody() {
     emitBasicBlockStart(MBB);
     DenseMap<StringRef, unsigned> MnemonicCounts;
 
-    SmallVector<unsigned> PrefetchTargets = MBB.getPrefetchTargetSubblockIndexes();
+    SmallVector<unsigned> PrefetchTargets =
+        MBB.getPrefetchTargetSubblockIndexes();
     auto PrefetchTargetIt = PrefetchTargets.begin();
     unsigned NumCalls = 0;
     // Helper to emit a symbol for the prefetch target and proceed to the next
     // one.
     auto EmitPrefetchTargetSymbolIfNeeded = [&]() {
-      if (PrefetchTargetIt == PrefetchTargets.end()) return;
-      if (NumCalls < *PrefetchTargetIt) return;
+      if (PrefetchTargetIt == PrefetchTargets.end())
+        return;
+      if (NumCalls < *PrefetchTargetIt)
+        return;
       MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
           Twine("__llvm_prefetch_target_") + MF->getName() + Twine("_") +
           utostr(MBB.getBBID()->BaseID) + Twine("_") +

>From 46275792dcf22f9d43b9dc9950aa9740bdda0a43 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 19:34:26 +0000
Subject: [PATCH 14/18] Add optimization remarks for when prefetch targets
 cannot be mapped.

---
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp           | 8 ++++++++
 llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 6 +++---
 llvm/lib/CodeGen/InsertCodePrefetch.cpp              | 4 ++--
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index fb250c4b5308a..97234f3859ca7 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2168,6 +2168,14 @@ void AsmPrinter::emitFunctionBody() {
     // If the block ends with a call, we may need to emit a prefetch target
     // at the end.
     EmitPrefetchTargetSymbolIfNeeded();
+    if (PrefetchTargetIt != PrefetchTargets.end()) {
+      MachineOptimizationRemarkMissed R(
+          "insert-code-prefetch", "MissingPrefetchTarget",
+          MF->getFunction().getSubprogram(), &MBB);
+      R << "failed to map "
+        << ore::NV("NumMissedTargets", PrefetchTargets.end() - PrefetchTargetIt)
+        << " prefetch targets";
+    }
 
     // We must emit temporary symbol for the end of this basic block, if either
     // we have BBLabels enabled or if this basic blocks marks the end of a
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 5b12c85f7eeef..9319854f53289 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -93,7 +93,7 @@ uint64_t BasicBlockSectionsProfileReader::getEdgeCount(
   return EdgeIt->second;
 }
 
-SmallVector<BBPosition>
+SmallVector<SubblockID>
 BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction(
     StringRef FuncName) const {
   return ProgramPathAndClusterInfo.lookup(getAliasName(FuncName))
@@ -332,7 +332,7 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
         return createProfileParseError(Twine("unsigned integer expected: '") +
                                        PrefetchTargetStr[1]);
       FI->second.PrefetchTargets.push_back(
-          BBPosition{*TargetBBID, static_cast<unsigned>(TargetCallsiteIndex)});
+          SubblockID{*TargetBBID, static_cast<unsigned>(TargetCallsiteIndex)});
       continue;
     }
     default:
@@ -541,7 +541,7 @@ uint64_t BasicBlockSectionsProfileReaderWrapperPass::getEdgeCount(
   return BBSPR.getEdgeCount(FuncName, SrcBBID, SinkBBID);
 }
 
-SmallVector<BBPosition>
+SmallVector<SubblockID>
 BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction(
     StringRef FuncName) const {
   return BBSPR.getPrefetchTargetsForFunction(FuncName);
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index e241ccbbee263..57037fd818479 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -70,12 +70,12 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
     return false;
   // Set each block's prefetch targets so AsmPrinter can emit a special symbol
   // there.
-  SmallVector<BBPosition> PrefetchTargets =
+  SmallVector<SubblockID> PrefetchTargets =
       getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
           .getPrefetchTargetsForFunction(MF.getName());
   DenseMap<UniqueBBID, SmallVector<unsigned>> PrefetchTargetsByBBID;
   for (const auto &Target : PrefetchTargets)
-    PrefetchTargetsByBBID[Target.BBID].push_back(Target.CallsiteIndex);
+    PrefetchTargetsByBBID[Target.BBID].push_back(Target.SubblockIndex);
   for (auto &MBB : MF) {
     auto R = PrefetchTargetsByBBID.find(*MBB.getBBID());
     if (R == PrefetchTargetsByBBID.end())

>From 90be932f9d24e27f51fdb837ac78c422f0e6e190 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 13 Nov 2025 22:01:34 +0000
Subject: [PATCH 15/18] Expand test to weak symbols.

---
 .../X86/basic-block-sections-code-prefetch.ll        | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
index 3eb91dfdabd27..35e25952aa2f8 100644
--- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
+++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
@@ -7,6 +7,9 @@
 ; RUN: echo 't 1 at 0' >> %t
 ; RUN: echo 't 1 at 1' >> %t
 ; RUN: echo 't 2 at 1' >> %t
+; RUN: echo 'f _Z3barv' >> %t
+; RUN: echo 't 0 at 0' >> %t
+; RUN: echo 't 21 at 1' >> %t
 ;;
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -asm-verbose=false -function-sections -basic-block-sections=%t  | FileCheck %s
 
@@ -47,5 +50,12 @@ define i32 @_Z3foob(i1 zeroext %0) nounwind {
   ret i32 %14
 }
 
-declare i32 @_Z3barv() #1
+define weak i32 @_Z3barv() nounwind {
+  %1 = call i32 @_Z3bazv()
+  ret i32 %1
+; CHECK:      _Z3barv:
+; CHECK-NEXT:   .weak __llvm_prefetch_target__Z3barv_0_0
+; CHECK-NEXT: __llvm_prefetch_target__Z3barv_0_0:
+}
+
 declare i32 @_Z3bazv() #1

>From 1ec1b83c0d0d3fe1cf258181b1fd8e570ff4cc1c Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Mon, 17 Nov 2025 21:56:48 +0000
Subject: [PATCH 16/18] Change prefetch directive format to use , instead of @

---
 llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp |  7 ++++---
 .../X86/basic-block-sections-code-prefetch.ll        | 12 ++++++------
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 9319854f53289..3a37982387f59 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -320,10 +320,11 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
       // past-the-end element.
       if (FI == ProgramPathAndClusterInfo.end())
         continue;
-      assert(Values.size() == 1);
       SmallVector<StringRef, 2> PrefetchTargetStr;
-      Values[0].split(PrefetchTargetStr, '@');
-      assert(PrefetchTargetStr.size() == 2);
+      Values[0].split(PrefetchTargetStr, ',');
+      if (PrefetchTargetStr.size() != 2)
+        return createProfileParseError(
+            Twine("Prefetch target target expected: ") + Value);
       auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]);
       if (!TargetBBID)
         return TargetBBID.takeError();
diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
index 35e25952aa2f8..280bfef1c79b6 100644
--- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
+++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
@@ -3,13 +3,13 @@
 ;; Specify the bb sections profile:
 ; RUN: echo 'v1' > %t
 ; RUN: echo 'f _Z3foob' >> %t
-; RUN: echo 't 0 at 0' >> %t
-; RUN: echo 't 1 at 0' >> %t
-; RUN: echo 't 1 at 1' >> %t
-; RUN: echo 't 2 at 1' >> %t
+; RUN: echo 't 0,0' >> %t
+; RUN: echo 't 1,0' >> %t
+; RUN: echo 't 1,1' >> %t
+; RUN: echo 't 2,1' >> %t
 ; RUN: echo 'f _Z3barv' >> %t
-; RUN: echo 't 0 at 0' >> %t
-; RUN: echo 't 21 at 1' >> %t
+; RUN: echo 't 0,0' >> %t
+; RUN: echo 't 21,1' >> %t
 ;;
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -asm-verbose=false -function-sections -basic-block-sections=%t  | FileCheck %s
 

>From 6be8fa7bb880932421a236407d57c92ee37f269b Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Mon, 17 Nov 2025 22:05:27 +0000
Subject: [PATCH 17/18] Fix the error.

---
 llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 3a37982387f59..05b6c1c3917e5 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -324,7 +324,7 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
       Values[0].split(PrefetchTargetStr, ',');
       if (PrefetchTargetStr.size() != 2)
         return createProfileParseError(
-            Twine("Prefetch target target expected: ") + Value);
+            Twine("Prefetch target target expected: ") + Values[0]);
       auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[0]);
       if (!TargetBBID)
         return TargetBBID.takeError();

>From b3202a17711982fa0c6a5d2a948e0bb124e533e9 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 19 Nov 2025 06:42:17 +0000
Subject: [PATCH 18/18] Remove optimization remarks.

---
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 97234f3859ca7..fb250c4b5308a 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2168,14 +2168,6 @@ void AsmPrinter::emitFunctionBody() {
     // If the block ends with a call, we may need to emit a prefetch target
     // at the end.
     EmitPrefetchTargetSymbolIfNeeded();
-    if (PrefetchTargetIt != PrefetchTargets.end()) {
-      MachineOptimizationRemarkMissed R(
-          "insert-code-prefetch", "MissingPrefetchTarget",
-          MF->getFunction().getSubprogram(), &MBB);
-      R << "failed to map "
-        << ore::NV("NumMissedTargets", PrefetchTargets.end() - PrefetchTargetIt)
-        << " prefetch targets";
-    }
 
     // We must emit temporary symbol for the end of this basic block, if either
     // we have BBLabels enabled or if this basic blocks marks the end of a



More information about the llvm-commits mailing list