[llvm] [Codegen, X86] Add prefetch insertion based on Propeller profile (PR #166324)

Rahman Lavaee via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 1 00:46:46 PDT 2026


https://github.com/rlavaee updated https://github.com/llvm/llvm-project/pull/166324

>From 67c42daa8b5989ff608022e6c0cdf7f6cfdb29e2 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Mon, 2 Mar 2026 22:02:10 +0000
Subject: [PATCH 01/23] [CodeGen] Centralize prefetch target storage in
 MachineFunction

- Move prefetch target callsite indexes from MachineBasicBlock to a
  centralized DenseMap in MachineFunction, renamed to PrefetchTargets.
- Change PrefetchTargets type to DenseMap<UniqueBBID, SmallVector<unsigned>>
  to support both mapped and dangling targets.
- Refactor AsmPrinter to use member functions emitPrefetchTargetSymbol
  and emitDanglingPrefetchTargets for consistent emission.
- Update MIR serialization to use the new PrefetchTargets map and
  rename the YAML field to 'prefetch-targets'.
- Update the MIR format for bb_id to 'bb_id <base-id>, <clone-id>, <callsite-index>'
  for improved readability and to avoid ambiguity with floating point.
- Update related tests and add MIR parsing error test cases.
---
 llvm/include/llvm/CodeGen/MIRParser/MIParser.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/MIRParser/MIParser.h b/llvm/include/llvm/CodeGen/MIRParser/MIParser.h
index 023919ee003c9..a79faa068b09c 100644
--- a/llvm/include/llvm/CodeGen/MIRParser/MIParser.h
+++ b/llvm/include/llvm/CodeGen/MIRParser/MIParser.h
@@ -239,8 +239,10 @@ bool parseVirtualRegisterReference(PerFunctionMIParsingState &PFS,
 bool parseStackObjectReference(PerFunctionMIParsingState &PFS, int &FI,
                                StringRef Src, SMDiagnostic &Error);
 
-bool parsePrefetchTarget(PerFunctionMIParsingState &PFS, CallsiteID &Target,
-                         StringRef Src, SMDiagnostic &Error);
+bool parsePrefetchTarget(PerFunctionMIParsingState &PFS,
+                          CallsiteID &Target, StringRef Src,
+                          SMDiagnostic &Error);
+
 bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node, StringRef Src,
                  SMDiagnostic &Error);
 

>From 60be74d601a12b31b1e7697cf5616772377e0291 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Tue, 3 Mar 2026 01:22:12 +0000
Subject: [PATCH 02/23] clang-format.

---
 llvm/include/llvm/CodeGen/MIRParser/MIParser.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/MIRParser/MIParser.h b/llvm/include/llvm/CodeGen/MIRParser/MIParser.h
index a79faa068b09c..011e0fa5d4cdb 100644
--- a/llvm/include/llvm/CodeGen/MIRParser/MIParser.h
+++ b/llvm/include/llvm/CodeGen/MIRParser/MIParser.h
@@ -239,9 +239,8 @@ bool parseVirtualRegisterReference(PerFunctionMIParsingState &PFS,
 bool parseStackObjectReference(PerFunctionMIParsingState &PFS, int &FI,
                                StringRef Src, SMDiagnostic &Error);
 
-bool parsePrefetchTarget(PerFunctionMIParsingState &PFS,
-                          CallsiteID &Target, StringRef Src,
-                          SMDiagnostic &Error);
+bool parsePrefetchTarget(PerFunctionMIParsingState &PFS, CallsiteID &Target,
+                         StringRef Src, SMDiagnostic &Error);
 
 bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node, StringRef Src,
                  SMDiagnostic &Error);

>From 921de88cfb4142719b1ab1c74efaf3e3638da3eb Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Sat, 8 Nov 2025 19:54:21 +0000
Subject: [PATCH 03/23] feat(AsmPrinter): Add support for inserting code
 prefetch instructions.

---
 .../CodeGen/BasicBlockSectionsProfileReader.h | 30 +++++++
 .../include/llvm/CodeGen/InsertCodePrefetch.h | 25 ++++++
 llvm/include/llvm/CodeGen/MachineBasicBlock.h |  2 +
 llvm/include/llvm/CodeGen/Passes.h            |  2 +
 llvm/include/llvm/CodeGen/TargetInstrInfo.h   |  8 ++
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    |  1 +
 llvm/lib/CodeGen/BasicBlockSections.cpp       |  3 +-
 .../BasicBlockSectionsProfileReader.cpp       | 87 +++++++++++++++++--
 llvm/lib/CodeGen/CodeGenPrepare.cpp           |  1 +
 llvm/lib/CodeGen/InsertCodePrefetch.cpp       | 68 ++++++++++++++-
 llvm/lib/Target/X86/X86InstrInfo.cpp          | 20 +++++
 llvm/lib/Target/X86/X86InstrInfo.h            |  4 +
 .../X86/basic-block-sections-code-prefetch.ll |  7 +-
 13 files changed, 246 insertions(+), 12 deletions(-)
 create mode 100644 llvm/include/llvm/CodeGen/InsertCodePrefetch.h

diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 830f99cd661bc..f852fc7216e97 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -71,10 +71,30 @@ struct CFGProfile {
   }
 };
 
+<<<<<<< HEAD
+=======
+// The prefetch symbol is emitted immediately after the call of the given index,
+// in block `BBID` (First call has an index of 1). Zero callsite index means the
+// start of the block.
+struct CallsiteID {
+  UniqueBBID BBID;
+  unsigned CallsiteIndex;
+};
+
+// This represents a prefetch hint to be injected at site `SiteID`, targetting
+// `TargetID` in function `TargetFunction`.
+struct PrefetchHint {
+  CallsiteID SiteID;
+  StringRef TargetFunction;
+  CallsiteID TargetID;
+};
+
+>>>>>>> 5fb1ac189131 (feat(AsmPrinter): Add support for inserting code prefetch instructions.)
 // This struct represents the raw optimization profile for a function,
 // including CFG data (block and edge counts) and layout directives (clustering
 // and cloning paths).
 struct FunctionOptimizationProfile {
+// This represents the raw input profile for one function.
   // BB Cluster information specified by `UniqueBBID`s.
   SmallVector<BBClusterInfo> ClusterInfo;
   // Paths to clone. A path a -> b -> c -> d implies cloning b, c, and d along
@@ -86,6 +106,9 @@ struct FunctionOptimizationProfile {
   // Code prefetch targets, specified by the callsite ID. The target is the code
   // immediately following this callsite.
   SmallVector<CallsiteID> PrefetchTargets;
+  // Code prefetch hints, specified by the injection site ID, the target
+  // function and the target site ID.
+  SmallVector<PrefetchHint> PrefetchHints;
   // Node counts for each basic block.
   DenseMap<UniqueBBID, uint64_t> NodeCounts;
   // Edge counts for each edge.
@@ -134,6 +157,10 @@ class BasicBlockSectionsProfileReader {
   SmallVector<CallsiteID>
   getPrefetchTargetsForFunction(StringRef FuncName) const;
 
+  // Returns the prefetch hints to be injected in function `FuncName`.
+  SmallVector<PrefetchHint>
+  getPrefetchHintsForFunction(StringRef FuncName) const;
+
 private:
   StringRef getAliasName(StringRef FuncName) const {
     auto R = FuncAliasMap.find(FuncName);
@@ -240,6 +267,9 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
   SmallVector<CallsiteID>
   getPrefetchTargetsForFunction(StringRef FuncName) const;
 
+  SmallVector<PrefetchHint>
+  getPrefetchHintsForFunction(StringRef FuncName) const;
+
   // Initializes the FunctionNameToDIFilename map for the current module and
   // then reads the profile for the matching functions.
   bool doInitialization(Module &M) override;
diff --git a/llvm/include/llvm/CodeGen/InsertCodePrefetch.h b/llvm/include/llvm/CodeGen/InsertCodePrefetch.h
new file mode 100644
index 0000000000000..99241248862d3
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/InsertCodePrefetch.h
@@ -0,0 +1,25 @@
+//===- BasicBlockSectionUtils.h - Utilities for basic block sections     --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_INSERTCODEPREFETCH_H
+#define LLVM_CODEGEN_INSERTCODEPREFETCH_H
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/UniqueBBID.h"
+
+namespace llvm {
+
+SmallString<128> getPrefetchTargetSymbolName(StringRef FunctionName,
+                                             const UniqueBBID &BBID,
+                                             unsigned SubblockIndex);
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_INSERTCODEPREFETCH_H
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 0308cdc1e64b9..a7e072c273efb 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -218,6 +218,8 @@ class MachineBasicBlock
   /// basic block sections and basic block labels.
   std::optional<UniqueBBID> BBID;
 
+  SmallVector<unsigned> PrefetchTargets;
+
   /// With basic block sections, this stores the Section ID of the basic block.
   MBBSectionID SectionID{0};
 
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index 396c40d360eef..4ded3431e961d 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -73,6 +73,8 @@ LLVM_ABI MachineFunctionPass *createBasicBlockPathCloningPass();
 /// and inference when using propeller.
 LLVM_ABI MachineFunctionPass *createBasicBlockMatchingAndInferencePass();
 
+/// createInsertCodePrefetchPass - This pass enables inserting code prefetch
+/// hints based on the basic block section profile.
 LLVM_ABI MachineFunctionPass *createInsertCodePrefetchPass();
 
 /// createMachineBlockHashInfoPass - This pass computes basic block hashes.
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index ed968d7bd4593..d5b7bd5f928d2 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -2370,6 +2370,14 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo {
     llvm_unreachable("unknown number of operands necessary");
   }
 
+  /// Inserts a code prefetch instruction before `InsertBefore` in block `MBB`
+  /// targetting `GV`.
+  virtual bool insertCodePrefetchInstr(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator InsertBefore,
+                                       const GlobalValue *GV) const {
+    return false;
+  }
+
 private:
   mutable std::unique_ptr<MIRFormatter> Formatter;
   unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode;
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 38514b05e0c97..718f492e8d34b 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -39,6 +39,7 @@
 #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
 #include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/CodeGen/InsertCodePrefetch.h"
 #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockHashInfo.h"
diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp
index 69e752e43a70d..901d2ef703176 100644
--- a/llvm/lib/CodeGen/BasicBlockSections.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSections.cpp
@@ -108,7 +108,8 @@ class BasicBlockSections : public MachineFunctionPass {
 public:
   static char ID;
 
-  BasicBlockSectionsProfileReaderWrapperPass *BBSectionsProfileReader = nullptr;
+  // BasicBlockSectionsProfileReaderWrapperPass *BBSectionsProfileReader =
+  // nullptr;
 
   BasicBlockSections() : MachineFunctionPass(ID) {}
 
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 693a9bdf74e60..45942deacc05c 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -101,6 +101,14 @@ BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction(
                                                : SmallVector<CallsiteID>();
 }
 
+SmallVector<PrefetchHint>
+BasicBlockSectionsProfileReader::getPrefetchHintsForFunction(
+    StringRef FuncName) const {
+  auto R = ProgramOptimizationProfile.find(getAliasName(FuncName));
+  return R != ProgramOptimizationProfile.end() ? R->second.PrefetchHints
+                                               : SmallVector<PrefetchHint>();
+}
+
 // Reads the version 1 basic block sections profile. Profile for each function
 // is encoded as follows:
 //   m <module_name>
@@ -157,35 +165,53 @@ BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction(
 //                                ....
 // ****************************************************************************
 // This profile can also specify prefetch targets (starting with 't') which
-// instruct the compiler to emit a prefetch symbol for the given target.
+// instruct the compiler to emit a prefetch symbol for the given target and
+// prefetch hints (starting with 'i') which instruct the compiler to insert a
+// prefetch hint instruction at the given site for the given target.
+//
 // A prefetch target is specified by a pair "<bbid>,<subblock_index>" where
 // bbid specifies the target basic block and subblock_index is a zero-based
-// index. Subblock 0 refers to the region at the beginning of the block up to
-// the first callsite. Subblock `i > 0` refers to the region immediately after
+// index. Callsite 0 refers to the region at the beginning of the block up to
+// the first callsite. Callsite `i > 0` refers to the region immediately after
 // the `i`-th callsite up to the `i+1`-th callsite (or the end of the block).
 // The prefetch target is always emitted at the beginning of the subblock.
 // This is the beginning of the basic block for `i = 0` and immediately after
 // the `i`-th call for every `i > 0`.
 //
+// A prefetch int is specified by a pair "site target", where site is
+// specified as a pair "<bbid>,<callsite_index>" similar to prefetch
+// targets, and target is specified as a triple
+// "<function_name>,<bbid>,<callsite_index>".
+//
 // Example: A basic block in function "foo" with BBID 10 and two call
 // instructions (call_A, call_B). This block is conceptually split into
-// subblocks, with the prefetch target symbol emitted at the beginning of each
-// subblock.
+// subblocks, with the prefetch target symbol emitted at the beginning of
+// each subblock.
 //
 // +----------------------------------+
-// | __llvm_prefetch_target_foo_10_0: | <- Subblock 0 (before call_A)
+// | __llvm_prefetch_target_foo_10_0: | <- Callsite 0 (before call_A)
 // |  Instruction 1                   |
 // |  Instruction 2                   |
 // |  call_A (Callsite 0)             |
-// | __llvm_prefetch_target_foo_10_1: | <--- Subblock 1 (after call_A,
+// | __llvm_prefetch_target_foo_10_1: | <--- Callsite 1 (after call_A,
 // |                                  |                  before call_B)
 // |  Instruction 3                   |
 // |  call_B (Callsite 1)             |
-// | __llvm_prefetch_target_foo_10_2: | <--- Subblock 2 (after call_B,
+// | __llvm_prefetch_target_foo_10_2: | <--- Callsite 2 (after call_B,
 // |                                  |                  before call_C)
 // |  Instruction 4                   |
 // +----------------------------------+
 //
+// A prefetch hint specified in function "bar" as "120,1 foo,10,2" results
+// in a a hint inserted after the first call in block #120 of bar:
+// B
+// +----------------------------------------------------+
+// | Instruction 1                                      |
+// | call_C (Callsite 1)                                |
+// | code_prefetch __llvm_prfetch_target_foo_10         |
+// | Instruction 2                                      |
+// +----------------------------------------------------+
+//
 Error BasicBlockSectionsProfileReader::ReadV1Profile() {
   auto FI = ProgramOptimizationProfile.end();
 
@@ -367,6 +393,45 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
           CallsiteID{*TargetBBID, static_cast<unsigned>(CallsiteIndex)});
       continue;
     }
+
+    case 'i': { // Prefetch hint specifier.
+      // Skip the profile when we the profile iterator (FI) refers to the
+      // past-the-end element.
+      if (FI == ProgramOptimizationProfile.end())
+        continue;
+      if (Values.size() != 2)
+        return createProfileParseError(Twine("Prefetch hint expected: " + S));
+      SmallVector<StringRef, 2> PrefetchSiteStr;
+      Values[0].split(PrefetchSiteStr, ',');
+      if (PrefetchSiteStr.size() != 2)
+        return createProfileParseError(Twine("Prefetch site expected: ") +
+                                       Values[0]);
+      auto SiteBBID = parseUniqueBBID(PrefetchSiteStr[0]);
+      if (!SiteBBID)
+        return SiteBBID.takeError();
+      unsigned long long SiteCallsiteIndex;
+      if (getAsUnsignedInteger(PrefetchSiteStr[1], 10, SiteCallsiteIndex))
+        return createProfileParseError(Twine("unsigned integer expected: '") +
+                                       PrefetchSiteStr[1]);
+
+      SmallVector<StringRef, 3> PrefetchTargetStr;
+      Values[1].split(PrefetchTargetStr, ',');
+      if (PrefetchTargetStr.size() != 3)
+        return createProfileParseError(
+            Twine("Prefetch target target expected: ") + Values[1]);
+      auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[1]);
+      if (!TargetBBID)
+        return TargetBBID.takeError();
+      unsigned long long TargetCallsiteIndex;
+      if (getAsUnsignedInteger(PrefetchTargetStr[2], 10, TargetCallsiteIndex))
+        return createProfileParseError(Twine("unsigned integer expected: '") +
+                                       PrefetchTargetStr[2]);
+      FI->second.PrefetchHints.push_back(PrefetchHint{
+          CallsiteID{*SiteBBID, static_cast<unsigned>(SiteCallsiteIndex)},
+          PrefetchTargetStr[0],
+          CallsiteID{*TargetBBID, static_cast<unsigned>(TargetCallsiteIndex)}});
+      continue;
+    }
     default:
       return createProfileParseError(Twine("invalid specifier: '") +
                                      Twine(Specifier) + "'");
@@ -585,6 +650,12 @@ BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction(
   return BBSPR.getPrefetchTargetsForFunction(FuncName);
 }
 
+SmallVector<PrefetchHint>
+BasicBlockSectionsProfileReaderWrapperPass::getPrefetchHintsForFunction(
+    StringRef FuncName) const {
+  return BBSPR.getPrefetchHintsForFunction(FuncName);
+}
+
 BasicBlockSectionsProfileReader &
 BasicBlockSectionsProfileReaderWrapperPass::getBBSPR() {
   return BBSPR;
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 56ee6e8b43304..f6914722b83a5 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -22,6 +22,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/FloatingPointPredicateUtils.h"
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index cd220347257a8..87e2c776af19e 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -14,9 +14,13 @@
 /// be the beginning of any dynamic basic block, that is the beginning of a
 /// machine basic block, or immediately after a callsite. A global symbol is
 /// emitted at the position of the target so it can be addressed from the
-/// prefetch instruction from any module.
+/// prefetch instruction from any module. In order to insert prefetch hints,
+/// `TargetInstrInfo::insertCodePrefetchInstr` must be implemented by the
+/// target.
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/InsertCodePrefetch.h"
+
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
@@ -27,11 +31,26 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/InitializePasses.h"
 
 using namespace llvm;
 #define DEBUG_TYPE "insert-code-prefetch"
 
+namespace llvm {
+SmallString<128> getPrefetchTargetSymbolName(StringRef FunctionName,
+                                             const UniqueBBID &BBID,
+                                             unsigned CallsiteIndex) {
+  SmallString<128> R("__llvm_prefetch_target_");
+  R += FunctionName;
+  R += "_";
+  R += utostr(BBID.BaseID);
+  R += "_";
+  R += utostr(CallsiteIndex);
+  return R;
+}
+} // namespace llvm
+
 namespace {
 class InsertCodePrefetch : public MachineFunctionPass {
 public:
@@ -81,7 +100,52 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
     V.erase(llvm::unique(V), V.end());
   }
   MF.setPrefetchTargets(PrefetchTargetsByBBID);
-  return false;
+
+  SmallVector<PrefetchHint> PrefetchHints =
+      getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
+          .getPrefetchHintsForFunction(MF.getName());
+  DenseMap<UniqueBBID, SmallVector<PrefetchHint>> PrefetchHintsBySiteBBID;
+  for (const auto &H : PrefetchHints)
+    PrefetchHintsBySiteBBID[H.SiteID.BBID].push_back(H);
+  // Sort prefetch hints by their callsite index so we can insert them by one
+  // pass over the block's instructions.
+  for (auto &[SiteBBID, Hints] : PrefetchHintsBySiteBBID) {
+    llvm::sort(Hints, [](const PrefetchHint &H1, const PrefetchHint &H2) {
+      return H1.SiteID.CallsiteIndex < H2.SiteID.CallsiteIndex;
+    });
+  }
+  auto PtrTy =
+      PointerType::getUnqual(MF.getFunction().getParent()->getContext());
+  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+  for (auto &BB : MF) {
+    auto It = PrefetchHintsBySiteBBID.find(*BB.getBBID());
+    if (It == PrefetchHintsBySiteBBID.end())
+      continue;
+    const auto &PrefetchHints = It->second;
+    unsigned NumCallsInBB = 0;
+    auto InstrIt = BB.begin();
+    for (auto HintIt = PrefetchHints.begin(); HintIt != PrefetchHints.end();) {
+      auto NextInstrIt = InstrIt == BB.end() ? BB.end() : std::next(InstrIt);
+      // Insert all the prefetch hints which must be placed after this call (or
+      // at the beginning of the block if `NumCallsInBB` is zero.
+      while (HintIt != PrefetchHints.end() &&
+             NumCallsInBB >= HintIt->SiteID.CallsiteIndex) {
+        auto *GV = MF.getFunction().getParent()->getOrInsertGlobal(
+            getPrefetchTargetSymbolName(HintIt->TargetFunction,
+                                        HintIt->TargetID.BBID,
+                                        HintIt->TargetID.CallsiteIndex),
+            PtrTy);
+        TII->insertCodePrefetchInstr(BB, InstrIt, GV);
+        ++HintIt;
+      }
+      if (InstrIt == BB.end())
+        break;
+      if (InstrIt->isCall())
+        ++NumCallsInBB;
+      InstrIt = NextInstrIt;
+    }
+  }
+  return true;
 }
 
 void InsertCodePrefetch::getAnalysisUsage(AnalysisUsage &AU) const {
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index c862d902b218a..694656b85504b 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -10798,5 +10798,25 @@ void X86InstrInfo::getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops,
   M.getFullAddress(Ops);
 }
 
+bool X86InstrInfo::insertCodePrefetchInstr(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
+    const GlobalValue *GV) const {
+  MachineFunction &MF = *MBB.getParent();
+  MachineInstr *PrefetchInstr = MF.CreateMachineInstr(
+      get(X86::PREFETCHIT1),
+      InsertBefore == MBB.instr_end() ? MBB.findPrevDebugLoc(InsertBefore)
+                                      : InsertBefore->getDebugLoc(),
+      true);
+  MachineInstrBuilder MIB(MF, PrefetchInstr);
+  MIB.addMemOperand(MF.getMachineMemOperand(MachinePointerInfo(GV),
+                                            MachineMemOperand::MOLoad, /*s=*/8,
+                                            /*base_alignment=*/llvm::Align(1)));
+  MIB.addReg(X86::RIP).addImm(1).addReg(X86::NoRegister);
+  MIB.addGlobalAddress(GV);
+  MIB.addReg(X86::NoRegister);
+  MBB.insert(InsertBefore, PrefetchInstr);
+  return true;
+}
+
 #define GET_INSTRINFO_HELPERS
 #include "X86GenInstrInfo.inc"
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index de8ccb44578a3..e0638d75ae295 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -766,6 +766,10 @@ class X86InstrInfo final : public X86GenInstrInfo {
   /// \returns the index of operand that is commuted with \p Idx1. If the method
   /// fails to commute the operands, it will return \p Idx1.
   unsigned commuteOperandsForFold(MachineInstr &MI, unsigned Idx1) const;
+
+  bool insertCodePrefetchInstr(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator InsertBefore,
+                               const GlobalValue *GV) const override;
 };
 } // namespace llvm
 
diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
index bef121aecf483..d8559c317c32e 100644
--- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
+++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
@@ -9,9 +9,12 @@
 ; RUN: echo 't 2,1' >> %t
 ; RUN: echo 't 3,0' >> %t
 ; RUN: echo 't 3,1' >> %t
+; RUN: echo 'i 3,0 bar,0,0' >> %t
+; RUN: echo 'i 2,1 foo,1,0' >> %t
 ; RUN: echo 'f bar' >> %t
 ; RUN: echo 't 0,0' >> %t
 ; RUN: echo 't 21,1' >> %t
+; RUN: echo 'i 0,1 foo,0,0' >> %t
 ; RUN: echo 'f qux' >> %t
 ; RUN: echo 't 0,0' >> %t
 ; RUN: echo 't 0,1' >> %t
@@ -39,6 +42,7 @@ cond.false:                                          ; preds = %1
 ; CHECK:        callq baz at PLT
 ; CHECK-NEXT:   .globl __llvm_prefetch_target_foo_2_1
 ; CHECK-NEXT: __llvm_prefetch_target_foo_2_1:
+; CHECK-NEXT:   prefetchit1	__llvm_prefetch_target_foo_1_0(%rip)
 
 end:                                             ; preds = %11, %9
   ret void
@@ -47,6 +51,7 @@ end:                                             ; preds = %11, %9
 ; CHECK-NEXT: __llvm_prefetch_target_foo_3_0:
 ; CHECK:        .globl	__llvm_prefetch_target_foo_3_1
 ; CHECK-NEXT: __llvm_prefetch_target_foo_3_1:
+; CHECK-NEXT:   prefetchit1	__llvm_prefetch_target_bar_0_0(%rip)
 }
 
 define weak i32 @bar() nounwind {
@@ -58,6 +63,7 @@ define weak i32 @bar() nounwind {
 ; CHECK-NEXT:   .weak __llvm_prefetch_target_bar_0_0
 ; CHECK-NEXT: __llvm_prefetch_target_bar_0_0:
 ; CHECK:        callq baz at PLT
+; CHECK-NEXT:   prefetchit1	__llvm_prefetch_target_foo_0_0(%rip)
 }
 
 define internal i32 @qux() nounwind {
@@ -72,4 +78,3 @@ define internal i32 @qux() nounwind {
 }
 
 declare i32 @baz()
-declare i32 @dummy()

>From 5997d4341ed25856bdf6c9f3edc5cc4f2a57532f Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Tue, 6 Jan 2026 08:08:05 +0000
Subject: [PATCH 04/23] Update tests.

---
 ...asic-block-sections-code-prefetch-call-terminates-block.ll | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch-call-terminates-block.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch-call-terminates-block.ll
index bf98076770b5b..b3c8069d96f03 100644
--- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch-call-terminates-block.ll
+++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch-call-terminates-block.ll
@@ -1,9 +1,10 @@
-;; Check prefetch directives properly handle a block terminating with a call.
+;; Check prefetch directives properly handles prefetch targets and instructions after a call which terminates a block.
 ;;
 ;; Specify the bb sections profile:
 ; RUN: echo 'v1' > %t
 ; RUN: echo 'f foo' >> %t
 ; RUN: echo 't 0,1' >> %t
+; RUN: echo 'i 0,1 other,30,30' >> %t
 ;;
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -asm-verbose=false -function-sections -basic-block-sections=%t -O1 | FileCheck %s
 
@@ -15,6 +16,7 @@ entry:
 ; CHECK:        callq explode at PLT
 ; CHECK-NEXT:   .globl __llvm_prefetch_target_foo_0_1
 ; CHECK-NEXT: __llvm_prefetch_target_foo_0_1:
+; CHECK-NEXT:   prefetchit1 __llvm_prefetch_target_other_30_30(%rip)
 
 continue:
   ret i32 0

>From baec3e086e31ac998f74c7ada46eb2d722ce2127 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Tue, 6 Jan 2026 08:13:04 +0000
Subject: [PATCH 05/23] clang-format.

---
 llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index f852fc7216e97..18c4e94bb1b5b 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -94,7 +94,7 @@ struct PrefetchHint {
 // including CFG data (block and edge counts) and layout directives (clustering
 // and cloning paths).
 struct FunctionOptimizationProfile {
-// This represents the raw input profile for one function.
+  // This represents the raw input profile for one function.
   // BB Cluster information specified by `UniqueBBID`s.
   SmallVector<BBClusterInfo> ClusterInfo;
   // Paths to clone. A path a -> b -> c -> d implies cloning b, c, and d along

>From 95b801947043a7d6f6fef534f08ae4ab58db7745 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Tue, 6 Jan 2026 08:28:28 +0000
Subject: [PATCH 06/23] Remove unwanted changes.

---
 llvm/lib/CodeGen/BasicBlockSections.cpp              |  7 +++----
 llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 10 +++++-----
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp
index 901d2ef703176..9972d431aae8b 100644
--- a/llvm/lib/CodeGen/BasicBlockSections.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSections.cpp
@@ -108,10 +108,9 @@ class BasicBlockSections : public MachineFunctionPass {
 public:
   static char ID;
 
-  // BasicBlockSectionsProfileReaderWrapperPass *BBSectionsProfileReader =
-  // nullptr;
-
-  BasicBlockSections() : MachineFunctionPass(ID) {}
+  BasicBlockSections() : MachineFunctionPass(ID) {
+    initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry());
+  }
 
   StringRef getPassName() const override {
     return "Basic Block Sections Analysis";
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 45942deacc05c..0da1adc08f0a0 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -171,8 +171,8 @@ BasicBlockSectionsProfileReader::getPrefetchHintsForFunction(
 //
 // A prefetch target is specified by a pair "<bbid>,<subblock_index>" where
 // bbid specifies the target basic block and subblock_index is a zero-based
-// index. Callsite 0 refers to the region at the beginning of the block up to
-// the first callsite. Callsite `i > 0` refers to the region immediately after
+// index. Subblock 0 refers to the region at the beginning of the block up to
+// the first callsite. Subblock `i > 0` refers to the region immediately after
 // the `i`-th callsite up to the `i+1`-th callsite (or the end of the block).
 // The prefetch target is always emitted at the beginning of the subblock.
 // This is the beginning of the basic block for `i = 0` and immediately after
@@ -189,15 +189,15 @@ BasicBlockSectionsProfileReader::getPrefetchHintsForFunction(
 // each subblock.
 //
 // +----------------------------------+
-// | __llvm_prefetch_target_foo_10_0: | <- Callsite 0 (before call_A)
+// | __llvm_prefetch_target_foo_10_0: | <--- Subblock 0 (before call_A)
 // |  Instruction 1                   |
 // |  Instruction 2                   |
 // |  call_A (Callsite 0)             |
-// | __llvm_prefetch_target_foo_10_1: | <--- Callsite 1 (after call_A,
+// | __llvm_prefetch_target_foo_10_1: | <--- Subblock 1 (after call_A,
 // |                                  |                  before call_B)
 // |  Instruction 3                   |
 // |  call_B (Callsite 1)             |
-// | __llvm_prefetch_target_foo_10_2: | <--- Callsite 2 (after call_B,
+// | __llvm_prefetch_target_foo_10_2: | <--- Subblock 2 (after call_B,
 // |                                  |                  before call_C)
 // |  Instruction 4                   |
 // +----------------------------------+

>From 3ea2ffe3652f1f3a4a0ff05b5f3b36d96b2ee35a Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 4 Mar 2026 22:50:41 +0000
Subject: [PATCH 07/23] Additional changes.

---
 .../CodeGen/BasicBlockSectionsProfileReader.h | 19 -------------------
 llvm/include/llvm/Support/UniqueBBID.h        |  8 ++++++++
 .../X86/basic-block-sections-code-prefetch.ll |  2 +-
 3 files changed, 9 insertions(+), 20 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 18c4e94bb1b5b..c4b2fd59793bd 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -71,25 +71,6 @@ struct CFGProfile {
   }
 };
 
-<<<<<<< HEAD
-=======
-// The prefetch symbol is emitted immediately after the call of the given index,
-// in block `BBID` (First call has an index of 1). Zero callsite index means the
-// start of the block.
-struct CallsiteID {
-  UniqueBBID BBID;
-  unsigned CallsiteIndex;
-};
-
-// This represents a prefetch hint to be injected at site `SiteID`, targetting
-// `TargetID` in function `TargetFunction`.
-struct PrefetchHint {
-  CallsiteID SiteID;
-  StringRef TargetFunction;
-  CallsiteID TargetID;
-};
-
->>>>>>> 5fb1ac189131 (feat(AsmPrinter): Add support for inserting code prefetch instructions.)
 // This struct represents the raw optimization profile for a function,
 // including CFG data (block and edge counts) and layout directives (clustering
 // and cloning paths).
diff --git a/llvm/include/llvm/Support/UniqueBBID.h b/llvm/include/llvm/Support/UniqueBBID.h
index 23a9467534318..bee4cf9158f45 100644
--- a/llvm/include/llvm/Support/UniqueBBID.h
+++ b/llvm/include/llvm/Support/UniqueBBID.h
@@ -33,6 +33,14 @@ struct CallsiteID {
   unsigned CallsiteIndex;
 };
 
+// This represents a prefetch hint to be injected at site `SiteID`, targetting
+// `TargetID` in function `TargetFunction`.
+struct PrefetchHint {
+  CallsiteID SiteID;
+  StringRef TargetFunction;
+  CallsiteID TargetID;
+};
+
 // Provides DenseMapInfo for UniqueBBID.
 template <> struct DenseMapInfo<UniqueBBID> {
   static inline UniqueBBID getEmptyKey() {
diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
index d8559c317c32e..b530846f02a4a 100644
--- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
+++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
@@ -49,9 +49,9 @@ end:                                             ; preds = %11, %9
 ; CHECK:      .LBB0_3:
 ; CHECK-NEXT:   .globl	__llvm_prefetch_target_foo_3_0
 ; CHECK-NEXT: __llvm_prefetch_target_foo_3_0:
+; CHECK-NEXT:   prefetchit1	__llvm_prefetch_target_bar_0_0(%rip)
 ; CHECK:        .globl	__llvm_prefetch_target_foo_3_1
 ; CHECK-NEXT: __llvm_prefetch_target_foo_3_1:
-; CHECK-NEXT:   prefetchit1	__llvm_prefetch_target_bar_0_0(%rip)
 }
 
 define weak i32 @bar() nounwind {

>From ac6a49a4ee825109efbccd1906ec733a53ec5be0 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 4 Mar 2026 23:07:44 +0000
Subject: [PATCH 08/23] Split setPrefetchTargets and insertPrefetchHints into
 separate functions.

---
 llvm/lib/CodeGen/InsertCodePrefetch.cpp | 36 +++++++++++++++----------
 1 file changed, 22 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 87e2c776af19e..880c4bec8451d 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -81,16 +81,10 @@ INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass)
 INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion",
                     true, false)
 
-bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
-  assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List &&
-         "BB Sections list not enabled!");
-  if (hasInstrProfHashMismatch(MF))
-    return false;
+static void setPrefetchTargets(MachineFunction &MF,
+                               const SmallVector<CallsiteID> &PrefetchTargets) {
   // Set each block's prefetch targets so AsmPrinter can emit a special symbol
   // there.
-  SmallVector<CallsiteID> PrefetchTargets =
-      getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
-          .getPrefetchTargetsForFunction(MF.getName());
   DenseMap<UniqueBBID, SmallVector<unsigned>> PrefetchTargetsByBBID;
   for (const auto &Target : PrefetchTargets)
     PrefetchTargetsByBBID[Target.BBID].push_back(Target.CallsiteIndex);
@@ -100,10 +94,11 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
     V.erase(llvm::unique(V), V.end());
   }
   MF.setPrefetchTargets(PrefetchTargetsByBBID);
+}
 
-  SmallVector<PrefetchHint> PrefetchHints =
-      getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
-          .getPrefetchHintsForFunction(MF.getName());
+static void
+insertPrefetchHints(MachineFunction &MF,
+                    const SmallVector<PrefetchHint> &PrefetchHints) {
   DenseMap<UniqueBBID, SmallVector<PrefetchHint>> PrefetchHintsBySiteBBID;
   for (const auto &H : PrefetchHints)
     PrefetchHintsBySiteBBID[H.SiteID.BBID].push_back(H);
@@ -121,14 +116,14 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
     auto It = PrefetchHintsBySiteBBID.find(*BB.getBBID());
     if (It == PrefetchHintsBySiteBBID.end())
       continue;
-    const auto &PrefetchHints = It->second;
+    const auto &BBHints = It->second;
     unsigned NumCallsInBB = 0;
     auto InstrIt = BB.begin();
-    for (auto HintIt = PrefetchHints.begin(); HintIt != PrefetchHints.end();) {
+    for (auto HintIt = BBHints.begin(); HintIt != BBHints.end();) {
       auto NextInstrIt = InstrIt == BB.end() ? BB.end() : std::next(InstrIt);
       // Insert all the prefetch hints which must be placed after this call (or
       // at the beginning of the block if `NumCallsInBB` is zero.
-      while (HintIt != PrefetchHints.end() &&
+      while (HintIt != BBHints.end() &&
              NumCallsInBB >= HintIt->SiteID.CallsiteIndex) {
         auto *GV = MF.getFunction().getParent()->getOrInsertGlobal(
             getPrefetchTargetSymbolName(HintIt->TargetFunction,
@@ -145,6 +140,19 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
       InstrIt = NextInstrIt;
     }
   }
+}
+
+bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
+  assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List &&
+         "BB Sections list not enabled!");
+  if (hasInstrProfHashMismatch(MF))
+    return false;
+
+  auto &ProfileReader =
+      getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>();
+  setPrefetchTargets(MF, ProfileReader.getPrefetchTargetsForFunction(MF.getName()));
+  insertPrefetchHints(MF, ProfileReader.getPrefetchHintsForFunction(MF.getName()));
+
   return true;
 }
 

>From bca8548cd9db59d9ef85c2fc2b66d5f7d54ceb1e Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 4 Mar 2026 23:07:57 +0000
Subject: [PATCH 09/23] clang-format.

---
 llvm/lib/CodeGen/InsertCodePrefetch.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 880c4bec8451d..6f6d6efdc49c3 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -150,8 +150,10 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
 
   auto &ProfileReader =
       getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>();
-  setPrefetchTargets(MF, ProfileReader.getPrefetchTargetsForFunction(MF.getName()));
-  insertPrefetchHints(MF, ProfileReader.getPrefetchHintsForFunction(MF.getName()));
+  setPrefetchTargets(MF,
+                     ProfileReader.getPrefetchTargetsForFunction(MF.getName()));
+  insertPrefetchHints(MF,
+                      ProfileReader.getPrefetchHintsForFunction(MF.getName()));
 
   return true;
 }

>From 844c7e12b83fe5da730a0f68e74359d8da3209cc Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 5 Mar 2026 18:46:07 +0000
Subject: [PATCH 10/23] Implement fallback support.

---
 llvm/include/llvm/CodeGen/AsmPrinter.h        |  2 +-
 llvm/include/llvm/CodeGen/TargetInstrInfo.h   |  4 ++--
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    | 17 +++++++------
 llvm/lib/CodeGen/InsertCodePrefetch.cpp       | 24 +++++++++++++++----
 llvm/lib/Target/X86/X86InstrInfo.cpp          |  4 ++--
 llvm/lib/Target/X86/X86InstrInfo.h            |  2 +-
 .../X86/basic-block-sections-code-prefetch.ll |  4 ++++
 7 files changed, 40 insertions(+), 17 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h
index b160de2f060d6..daffdc5f41bd5 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -491,7 +491,7 @@ class LLVM_ABI AsmPrinter : public MachineFunctionPass {
   /// Helper to emit a symbol for the prefetch target associated with the given
   /// BBID and callsite index. The symbol is emitted as a label and its linkage
   /// is set based on the function's linkage.
-  void emitPrefetchTargetSymbol(unsigned BaseID, unsigned CallsiteIndex);
+  void emitPrefetchTargetSymbol(const UniqueBBID &BBID, unsigned CallsiteIndex);
 
   /// Emit prefetch targets that were not mapped to any basic block. These
   /// targets are emitted at the beginning of the function body.
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index d5b7bd5f928d2..c2d879a177a00 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -2372,10 +2372,10 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo {
 
   /// Inserts a code prefetch instruction before `InsertBefore` in block `MBB`
   /// targetting `GV`.
-  virtual bool insertCodePrefetchInstr(MachineBasicBlock &MBB,
+  virtual MachineInstr* insertCodePrefetchInstr(MachineBasicBlock &MBB,
                                        MachineBasicBlock::iterator InsertBefore,
                                        const GlobalValue *GV) const {
-    return false;
+    llvm_unreachable("target did not implement");
   }
 
 private:
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 718f492e8d34b..64d49b4713eb4 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2020,13 +2020,12 @@ void AsmPrinter::handleCallsiteForCallgraph(
 
 /// Helper to emit a symbol for the prefetch target associated with the given
 /// BBID and callsite index.
-void AsmPrinter::emitPrefetchTargetSymbol(unsigned BaseID,
+void AsmPrinter::emitPrefetchTargetSymbol(const UniqueBBID &BBID,
                                           unsigned CallsiteIndex) {
   SmallString<128> FunctionName;
   getNameWithPrefix(FunctionName, &MF->getFunction());
   MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
-      "__llvm_prefetch_target_" + FunctionName + "_" + Twine(BaseID) + "_" +
-      Twine(CallsiteIndex));
+      getPrefetchTargetSymbolName(FunctionName, BBID, CallsiteIndex));
   // If the function is weak-linkage it may be replaced by a strong
   // version, in which case the prefetch targets should also be replaced.
   OutStreamer->emitSymbolAttribute(
@@ -2050,7 +2049,7 @@ void AsmPrinter::emitDanglingPrefetchTargets() {
     if (MFBBIDs.contains(BBID))
       continue;
     for (unsigned CallsiteIndex : CallsiteIndexes)
-      emitPrefetchTargetSymbol(BBID.BaseID, CallsiteIndex);
+      emitPrefetchTargetSymbol(BBID, CallsiteIndex);
   }
 }
 
@@ -2125,7 +2124,7 @@ void AsmPrinter::emitFunctionBody() {
     for (auto &MI : MBB) {
       if (PrefetchTargetIt != PrefetchTargetEnd &&
           *PrefetchTargetIt == LastCallsiteIndex) {
-        emitPrefetchTargetSymbol(MBB.getBBID()->BaseID, *PrefetchTargetIt);
+        emitPrefetchTargetSymbol(*MBB.getBBID(), *PrefetchTargetIt);
         ++PrefetchTargetIt;
       }
 
@@ -2276,8 +2275,12 @@ void AsmPrinter::emitFunctionBody() {
         handleCallsiteForCallgraph(FuncCGInfo, CallSitesInfoMap, MI);
 
       // If there is a post-instruction symbol, emit a label for it here.
-      if (MCSymbol *S = MI.getPostInstrSymbol())
+      if (MCSymbol *S = MI.getPostInstrSymbol()) {
+        if (MCSymbolELF *ESym = static_cast<MCSymbolELF*>(S))
+          if (ESym->isWeakref())
+            OutStreamer->emitSymbolAttribute(S, MCSA_Weak);
         OutStreamer->emitLabel(S);
+      }
 
       for (auto &Handler : Handlers)
         Handler->endInstruction();
@@ -2285,7 +2288,7 @@ void AsmPrinter::emitFunctionBody() {
     // Emit the remaining prefetch targets for this block. This includes
     // nonexisting callsite indexes.
     while (PrefetchTargetIt != PrefetchTargetEnd) {
-      emitPrefetchTargetSymbol(MBB.getBBID()->BaseID, *PrefetchTargetIt);
+      emitPrefetchTargetSymbol(*MBB.getBBID(), *PrefetchTargetIt);
       ++PrefetchTargetIt;
     }
 
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 6f6d6efdc49c3..9bc4980424474 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -33,6 +33,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/MC/MCSymbolELF.h"
 
 using namespace llvm;
 #define DEBUG_TYPE "insert-code-prefetch"
@@ -99,6 +100,8 @@ static void setPrefetchTargets(MachineFunction &MF,
 static void
 insertPrefetchHints(MachineFunction &MF,
                     const SmallVector<PrefetchHint> &PrefetchHints) {
+  bool IsELF = MF.getTarget().getTargetTriple().isOSBinFormatELF();
+  const Module *M = MF.getFunction().getParent();
   DenseMap<UniqueBBID, SmallVector<PrefetchHint>> PrefetchHintsBySiteBBID;
   for (const auto &H : PrefetchHints)
     PrefetchHintsBySiteBBID[H.SiteID.BBID].push_back(H);
@@ -120,17 +123,30 @@ insertPrefetchHints(MachineFunction &MF,
     unsigned NumCallsInBB = 0;
     auto InstrIt = BB.begin();
     for (auto HintIt = BBHints.begin(); HintIt != BBHints.end();) {
+      bool TargetFunctionDefined = false;
+      if (Function *TargetFunction = M->getFunction(HintIt->TargetFunction))
+        TargetFunctionDefined = !TargetFunction->isDeclaration();
       auto NextInstrIt = InstrIt == BB.end() ? BB.end() : std::next(InstrIt);
       // Insert all the prefetch hints which must be placed after this call (or
       // at the beginning of the block if `NumCallsInBB` is zero.
       while (HintIt != BBHints.end() &&
              NumCallsInBB >= HintIt->SiteID.CallsiteIndex) {
-        auto *GV = MF.getFunction().getParent()->getOrInsertGlobal(
-            getPrefetchTargetSymbolName(HintIt->TargetFunction,
+        auto TargetSymbolName = getPrefetchTargetSymbolName(HintIt->TargetFunction,
                                         HintIt->TargetID.BBID,
-                                        HintIt->TargetID.CallsiteIndex),
+                                        HintIt->TargetID.CallsiteIndex);
+        auto *GV = MF.getFunction().getParent()->getOrInsertGlobal(TargetSymbolName,
             PtrTy);
-        TII->insertCodePrefetchInstr(BB, InstrIt, GV);
+        MachineInstr *PrefetchInstr = TII->insertCodePrefetchInstr(BB, InstrIt, GV);
+        if (!TargetFunctionDefined && IsELF) {
+          // If the target function is not defined in this module, we guard
+          // against undefined prefetch target symbol by emitting a fallback
+          // symbol with weak linkage right after the prefetch instruction. If
+          // there is no strong symbol, the fallback will be used and we
+          // prefetch the next address.
+          MCSymbolELF *WeakFallbackSym = static_cast<MCSymbolELF *>(MF.getContext().getOrCreateSymbol(TargetSymbolName));
+          WeakFallbackSym->setIsWeakref();
+          PrefetchInstr->setPostInstrSymbol(MF, WeakFallbackSym);
+        }
         ++HintIt;
       }
       if (InstrIt == BB.end())
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 694656b85504b..ddc8e6783f410 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -10798,7 +10798,7 @@ void X86InstrInfo::getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops,
   M.getFullAddress(Ops);
 }
 
-bool X86InstrInfo::insertCodePrefetchInstr(
+MachineInstr* X86InstrInfo::insertCodePrefetchInstr(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
     const GlobalValue *GV) const {
   MachineFunction &MF = *MBB.getParent();
@@ -10815,7 +10815,7 @@ bool X86InstrInfo::insertCodePrefetchInstr(
   MIB.addGlobalAddress(GV);
   MIB.addReg(X86::NoRegister);
   MBB.insert(InsertBefore, PrefetchInstr);
-  return true;
+  return PrefetchInstr;
 }
 
 #define GET_INSTRINFO_HELPERS
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index e0638d75ae295..b416e43250eb5 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -767,7 +767,7 @@ class X86InstrInfo final : public X86GenInstrInfo {
   /// fails to commute the operands, it will return \p Idx1.
   unsigned commuteOperandsForFold(MachineInstr &MI, unsigned Idx1) const;
 
-  bool insertCodePrefetchInstr(MachineBasicBlock &MBB,
+  MachineInstr* insertCodePrefetchInstr(MachineBasicBlock &MBB,
                                MachineBasicBlock::iterator InsertBefore,
                                const GlobalValue *GV) const override;
 };
diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
index b530846f02a4a..50b5fbf50dec7 100644
--- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
+++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
@@ -11,6 +11,7 @@
 ; RUN: echo 't 3,1' >> %t
 ; RUN: echo 'i 3,0 bar,0,0' >> %t
 ; RUN: echo 'i 2,1 foo,1,0' >> %t
+; RUN: echo 'i 1,1 undef,5,5' >> %t
 ; RUN: echo 'f bar' >> %t
 ; RUN: echo 't 0,0' >> %t
 ; RUN: echo 't 21,1' >> %t
@@ -33,8 +34,11 @@ cond.true:                                           ; preds = %1
 ; CHECK:        .globl __llvm_prefetch_target_foo_1_0
 ; CHECK-NEXT: __llvm_prefetch_target_foo_1_0:
 ; CHECK-NEXT:   callq bar at PLT
+; CHECK-NEXT:   prefetchit1     __llvm_prefetch_target_undef_5_5(%rip)
 ; CHECK-NEXT:   .globl __llvm_prefetch_target_foo_1_1
 ; CHECK-NEXT: __llvm_prefetch_target_foo_1_1:
+; CHECK-NEXT:   .weak __llvm_prefetch_target_undef_5_5
+; CHECK-NEXT: __llvm_prefetch_target_undef_5_5:
 
 cond.false:                                          ; preds = %1
   call i32 @baz()

>From 1ea25c456f1ad142920b1a3d0290fe8098b85d20 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 5 Mar 2026 18:46:21 +0000
Subject: [PATCH 11/23] clang-format.

---
 llvm/include/llvm/CodeGen/TargetInstrInfo.h |  7 ++++---
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp  |  2 +-
 llvm/lib/CodeGen/InsertCodePrefetch.cpp     | 16 +++++++++-------
 llvm/lib/Target/X86/X86InstrInfo.cpp        |  7 ++++---
 llvm/lib/Target/X86/X86InstrInfo.h          |  7 ++++---
 5 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index c2d879a177a00..0d2c931ac5e55 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -2372,9 +2372,10 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo {
 
   /// Inserts a code prefetch instruction before `InsertBefore` in block `MBB`
   /// targetting `GV`.
-  virtual MachineInstr* insertCodePrefetchInstr(MachineBasicBlock &MBB,
-                                       MachineBasicBlock::iterator InsertBefore,
-                                       const GlobalValue *GV) const {
+  virtual MachineInstr *
+  insertCodePrefetchInstr(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator InsertBefore,
+                          const GlobalValue *GV) const {
     llvm_unreachable("target did not implement");
   }
 
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 64d49b4713eb4..00de0a4ca3f48 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2276,7 +2276,7 @@ void AsmPrinter::emitFunctionBody() {
 
       // If there is a post-instruction symbol, emit a label for it here.
       if (MCSymbol *S = MI.getPostInstrSymbol()) {
-        if (MCSymbolELF *ESym = static_cast<MCSymbolELF*>(S))
+        if (MCSymbolELF *ESym = static_cast<MCSymbolELF *>(S))
           if (ESym->isWeakref())
             OutStreamer->emitSymbolAttribute(S, MCSA_Weak);
         OutStreamer->emitLabel(S);
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 9bc4980424474..496a672c734cb 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -131,19 +131,21 @@ insertPrefetchHints(MachineFunction &MF,
       // at the beginning of the block if `NumCallsInBB` is zero.
       while (HintIt != BBHints.end() &&
              NumCallsInBB >= HintIt->SiteID.CallsiteIndex) {
-        auto TargetSymbolName = getPrefetchTargetSymbolName(HintIt->TargetFunction,
-                                        HintIt->TargetID.BBID,
-                                        HintIt->TargetID.CallsiteIndex);
-        auto *GV = MF.getFunction().getParent()->getOrInsertGlobal(TargetSymbolName,
-            PtrTy);
-        MachineInstr *PrefetchInstr = TII->insertCodePrefetchInstr(BB, InstrIt, GV);
+        auto TargetSymbolName = getPrefetchTargetSymbolName(
+            HintIt->TargetFunction, HintIt->TargetID.BBID,
+            HintIt->TargetID.CallsiteIndex);
+        auto *GV = MF.getFunction().getParent()->getOrInsertGlobal(
+            TargetSymbolName, PtrTy);
+        MachineInstr *PrefetchInstr =
+            TII->insertCodePrefetchInstr(BB, InstrIt, GV);
         if (!TargetFunctionDefined && IsELF) {
           // If the target function is not defined in this module, we guard
           // against undefined prefetch target symbol by emitting a fallback
           // symbol with weak linkage right after the prefetch instruction. If
           // there is no strong symbol, the fallback will be used and we
           // prefetch the next address.
-          MCSymbolELF *WeakFallbackSym = static_cast<MCSymbolELF *>(MF.getContext().getOrCreateSymbol(TargetSymbolName));
+          MCSymbolELF *WeakFallbackSym = static_cast<MCSymbolELF *>(
+              MF.getContext().getOrCreateSymbol(TargetSymbolName));
           WeakFallbackSym->setIsWeakref();
           PrefetchInstr->setPostInstrSymbol(MF, WeakFallbackSym);
         }
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index ddc8e6783f410..1a48bfa8629b3 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -10798,9 +10798,10 @@ void X86InstrInfo::getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops,
   M.getFullAddress(Ops);
 }
 
-MachineInstr* X86InstrInfo::insertCodePrefetchInstr(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
-    const GlobalValue *GV) const {
+MachineInstr *
+X86InstrInfo::insertCodePrefetchInstr(MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator InsertBefore,
+                                      const GlobalValue *GV) const {
   MachineFunction &MF = *MBB.getParent();
   MachineInstr *PrefetchInstr = MF.CreateMachineInstr(
       get(X86::PREFETCHIT1),
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index b416e43250eb5..9b77ff0c62237 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -767,9 +767,10 @@ class X86InstrInfo final : public X86GenInstrInfo {
   /// fails to commute the operands, it will return \p Idx1.
   unsigned commuteOperandsForFold(MachineInstr &MI, unsigned Idx1) const;
 
-  MachineInstr* insertCodePrefetchInstr(MachineBasicBlock &MBB,
-                               MachineBasicBlock::iterator InsertBefore,
-                               const GlobalValue *GV) const override;
+  MachineInstr *
+  insertCodePrefetchInstr(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator InsertBefore,
+                          const GlobalValue *GV) const override;
 };
 } // namespace llvm
 

>From d29b1770511d9bb21d82b2258c3b00dbb68ee9e8 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 5 Mar 2026 18:58:35 +0000
Subject: [PATCH 12/23] fix the prefetch test.

---
 llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
index 50b5fbf50dec7..991e3d37fc366 100644
--- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
+++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
@@ -34,9 +34,9 @@ cond.true:                                           ; preds = %1
 ; CHECK:        .globl __llvm_prefetch_target_foo_1_0
 ; CHECK-NEXT: __llvm_prefetch_target_foo_1_0:
 ; CHECK-NEXT:   callq bar at PLT
-; CHECK-NEXT:   prefetchit1     __llvm_prefetch_target_undef_5_5(%rip)
 ; CHECK-NEXT:   .globl __llvm_prefetch_target_foo_1_1
 ; CHECK-NEXT: __llvm_prefetch_target_foo_1_1:
+; CHECK-NEXT:   prefetchit1     __llvm_prefetch_target_undef_5_5(%rip)
 ; CHECK-NEXT:   .weak __llvm_prefetch_target_undef_5_5
 ; CHECK-NEXT: __llvm_prefetch_target_undef_5_5:
 

>From 7710be5610c9a086178186d1892d6c93514853ef Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Sat, 7 Mar 2026 01:15:30 +0000
Subject: [PATCH 13/23] Change undef to extfunc.

---
 .../CodeGen/X86/basic-block-sections-code-prefetch.ll     | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
index 991e3d37fc366..acf6c588fafe8 100644
--- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
+++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
@@ -11,7 +11,7 @@
 ; RUN: echo 't 3,1' >> %t
 ; RUN: echo 'i 3,0 bar,0,0' >> %t
 ; RUN: echo 'i 2,1 foo,1,0' >> %t
-; RUN: echo 'i 1,1 undef,5,5' >> %t
+; RUN: echo 'i 1,1 extfunc,5,5' >> %t
 ; RUN: echo 'f bar' >> %t
 ; RUN: echo 't 0,0' >> %t
 ; RUN: echo 't 21,1' >> %t
@@ -36,9 +36,9 @@ cond.true:                                           ; preds = %1
 ; CHECK-NEXT:   callq bar at PLT
 ; CHECK-NEXT:   .globl __llvm_prefetch_target_foo_1_1
 ; CHECK-NEXT: __llvm_prefetch_target_foo_1_1:
-; CHECK-NEXT:   prefetchit1     __llvm_prefetch_target_undef_5_5(%rip)
-; CHECK-NEXT:   .weak __llvm_prefetch_target_undef_5_5
-; CHECK-NEXT: __llvm_prefetch_target_undef_5_5:
+; CHECK-NEXT:   prefetchit1     __llvm_prefetch_target_extfunc_5_5(%rip)
+; CHECK-NEXT:   .weak __llvm_prefetch_target_extfunc_5_5
+; CHECK-NEXT: __llvm_prefetch_target_extfunc_5_5:
 
 cond.false:                                          ; preds = %1
   call i32 @baz()

>From c82fd58a5ad3621caf44e6febf3d95823173e7b8 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Mon, 9 Mar 2026 20:47:08 +0000
Subject: [PATCH 14/23] Add include for MCContext.h

---
 llvm/lib/CodeGen/InsertCodePrefetch.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 496a672c734cb..0b230fec3eff0 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -33,6 +33,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSymbolELF.h"
 
 using namespace llvm;

>From 6ba32b3ff72138f32eb627c591c85ff4c32bd5ed Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Tue, 10 Mar 2026 17:42:42 +0000
Subject: [PATCH 15/23] nits.

---
 llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 3 ++-
 llvm/lib/CodeGen/InsertCodePrefetch.cpp              | 6 +++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 0da1adc08f0a0..752948a19dbb5 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -203,7 +203,8 @@ BasicBlockSectionsProfileReader::getPrefetchHintsForFunction(
 // +----------------------------------+
 //
 // A prefetch hint specified in function "bar" as "120,1 foo,10,2" results
-// in a a hint inserted after the first call in block #120 of bar:
+// in a hint inserted after the first call in block #120 of bar:
+//
 // B
 // +----------------------------------------------------+
 // | Instruction 1                                      |
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 0b230fec3eff0..75b4bfd35c35f 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -144,7 +144,11 @@ insertPrefetchHints(MachineFunction &MF,
           // against undefined prefetch target symbol by emitting a fallback
           // symbol with weak linkage right after the prefetch instruction. If
           // there is no strong symbol, the fallback will be used and we
-          // prefetch the next address.
+          // prefetch the next address:
+          //
+          // prefetchit1 __llvm_prefetch_target_foo_x_y(%rip)
+          // .weak __llvm_prefetch_target_foo_x_y
+          // __llvm_prefetch_target_foo_x_y:
           MCSymbolELF *WeakFallbackSym = static_cast<MCSymbolELF *>(
               MF.getContext().getOrCreateSymbol(TargetSymbolName));
           WeakFallbackSym->setIsWeakref();

>From 587595e0ec6bf80d3ce6fc1e9ab167b0eada6398 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Tue, 10 Mar 2026 22:25:23 +0000
Subject: [PATCH 16/23] Address comments.

---
 llvm/include/llvm/CodeGen/InsertCodePrefetch.h       | 6 ++++--
 llvm/include/llvm/CodeGen/MIRParser/MIParser.h       | 1 -
 llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 8 ++++----
 llvm/lib/CodeGen/CodeGenPrepare.cpp                  | 1 -
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/InsertCodePrefetch.h b/llvm/include/llvm/CodeGen/InsertCodePrefetch.h
index 99241248862d3..6bb5b6e55947d 100644
--- a/llvm/include/llvm/CodeGen/InsertCodePrefetch.h
+++ b/llvm/include/llvm/CodeGen/InsertCodePrefetch.h
@@ -1,4 +1,4 @@
-//===- BasicBlockSectionUtils.h - Utilities for basic block sections     --===//
+//===- InsertCodePrefetch.h - Common utilities for code prefetching      --===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -16,9 +16,11 @@
 
 namespace llvm {
 
+// Returns the symbol name for a prefetch target at function `FunctionName`,
+// basic block `BBID` and callsite index `CallsiteIndex`.
 SmallString<128> getPrefetchTargetSymbolName(StringRef FunctionName,
                                              const UniqueBBID &BBID,
-                                             unsigned SubblockIndex);
+                                             unsigned CallsiteIndex);
 
 } // end namespace llvm
 
diff --git a/llvm/include/llvm/CodeGen/MIRParser/MIParser.h b/llvm/include/llvm/CodeGen/MIRParser/MIParser.h
index 011e0fa5d4cdb..023919ee003c9 100644
--- a/llvm/include/llvm/CodeGen/MIRParser/MIParser.h
+++ b/llvm/include/llvm/CodeGen/MIRParser/MIParser.h
@@ -241,7 +241,6 @@ bool parseStackObjectReference(PerFunctionMIParsingState &PFS, int &FI,
 
 bool parsePrefetchTarget(PerFunctionMIParsingState &PFS, CallsiteID &Target,
                          StringRef Src, SMDiagnostic &Error);
-
 bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node, StringRef Src,
                  SMDiagnostic &Error);
 
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 752948a19dbb5..a5006318af1db 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -104,8 +104,8 @@ BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction(
 SmallVector<PrefetchHint>
 BasicBlockSectionsProfileReader::getPrefetchHintsForFunction(
     StringRef FuncName) const {
-  auto R = ProgramOptimizationProfile.find(getAliasName(FuncName));
-  return R != ProgramOptimizationProfile.end() ? R->second.PrefetchHints
+  StringMap<FunctionOptimizationProfile>::const_iterator It = ProgramOptimizationProfile.find(getAliasName(FuncName));
+  return It != ProgramOptimizationProfile.end() ? It->second.PrefetchHints
                                                : SmallVector<PrefetchHint>();
 }
 
@@ -178,7 +178,7 @@ BasicBlockSectionsProfileReader::getPrefetchHintsForFunction(
 // This is the beginning of the basic block for `i = 0` and immediately after
 // the `i`-th call for every `i > 0`.
 //
-// A prefetch int is specified by a pair "site target", where site is
+// A prefetch hint is specified by a pair "site target", where site is
 // specified as a pair "<bbid>,<callsite_index>" similar to prefetch
 // targets, and target is specified as a triple
 // "<function_name>,<bbid>,<callsite_index>".
@@ -396,7 +396,7 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
     }
 
     case 'i': { // Prefetch hint specifier.
-      // Skip the profile when we the profile iterator (FI) refers to the
+      // Skip the profile when the profile iterator (FI) refers to the
       // past-the-end element.
       if (FI == ProgramOptimizationProfile.end())
         continue;
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index f6914722b83a5..56ee6e8b43304 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -22,7 +22,6 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/FloatingPointPredicateUtils.h"

>From ffbc5f90d0355199a98d3f1f1e44beade77d9d93 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Tue, 10 Mar 2026 22:25:37 +0000
Subject: [PATCH 17/23] clang-format.

---
 llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index a5006318af1db..81de52a30580b 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -104,9 +104,10 @@ BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction(
 SmallVector<PrefetchHint>
 BasicBlockSectionsProfileReader::getPrefetchHintsForFunction(
     StringRef FuncName) const {
-  StringMap<FunctionOptimizationProfile>::const_iterator It = ProgramOptimizationProfile.find(getAliasName(FuncName));
+  StringMap<FunctionOptimizationProfile>::const_iterator It =
+      ProgramOptimizationProfile.find(getAliasName(FuncName));
   return It != ProgramOptimizationProfile.end() ? It->second.PrefetchHints
-                                               : SmallVector<PrefetchHint>();
+                                                : SmallVector<PrefetchHint>();
 }
 
 // Reads the version 1 basic block sections profile. Profile for each function

>From f3616630863c4c54a6dd8717d2293174c55735e8 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 18 Mar 2026 19:56:36 +0000
Subject: [PATCH 18/23] Address comments.

---
 llvm/include/llvm/Support/UniqueBBID.h               |  2 +-
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp           | 10 ++++++----
 llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 11 ++++++-----
 llvm/lib/CodeGen/InsertCodePrefetch.cpp              |  3 ++-
 4 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/llvm/include/llvm/Support/UniqueBBID.h b/llvm/include/llvm/Support/UniqueBBID.h
index bee4cf9158f45..87c89ff7eb0cc 100644
--- a/llvm/include/llvm/Support/UniqueBBID.h
+++ b/llvm/include/llvm/Support/UniqueBBID.h
@@ -33,7 +33,7 @@ struct CallsiteID {
   unsigned CallsiteIndex;
 };
 
-// This represents a prefetch hint to be injected at site `SiteID`, targetting
+// This represents a prefetch hint to be injected at site `SiteID`, targeting
 // `TargetID` in function `TargetFunction`.
 struct PrefetchHint {
   CallsiteID SiteID;
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 00de0a4ca3f48..7c3efa64ecbb0 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2275,11 +2275,13 @@ void AsmPrinter::emitFunctionBody() {
         handleCallsiteForCallgraph(FuncCGInfo, CallSitesInfoMap, MI);
 
       // If there is a post-instruction symbol, emit a label for it here.
-      if (MCSymbol *S = MI.getPostInstrSymbol()) {
-        if (MCSymbolELF *ESym = static_cast<MCSymbolELF *>(S))
-          if (ESym->isWeakref())
+      if (TM.getTargetTriple().isOSBinFormatELF()) {
+        if (MCSymbol *S = MI.getPostInstrSymbol()) {
+          MCSymbolELF *ESym = static_cast<MCSymbolELF *>(S);
+          if (ESym->getBinding() == ELF::STB_WEAK)
             OutStreamer->emitSymbolAttribute(S, MCSA_Weak);
-        OutStreamer->emitLabel(S);
+          OutStreamer->emitLabel(S);
+        }
       }
 
       for (auto &Handler : Handlers)
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 81de52a30580b..39b93331314a9 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -204,13 +204,14 @@ BasicBlockSectionsProfileReader::getPrefetchHintsForFunction(
 // +----------------------------------+
 //
 // A prefetch hint specified in function "bar" as "120,1 foo,10,2" results
-// in a hint inserted after the first call in block #120 of bar:
+// in a hint inserted after the first call in block #120 of bar targeting the
+// address immediately after the second call in block #10 of function foo.
 //
 // B
 // +----------------------------------------------------+
 // | Instruction 1                                      |
 // | call_C (Callsite 1)                                |
-// | code_prefetch __llvm_prfetch_target_foo_10         |
+// | code_prefetch __llvm_prefetch_target_foo_10         |
 // | Instruction 2                                      |
 // +----------------------------------------------------+
 //
@@ -402,11 +403,11 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
       if (FI == ProgramOptimizationProfile.end())
         continue;
       if (Values.size() != 2)
-        return createProfileParseError(Twine("Prefetch hint expected: " + S));
+        return createProfileParseError(Twine("Prefetch hint expected of format '<prefetch-site> <prefetch-target>': " + S));
       SmallVector<StringRef, 2> PrefetchSiteStr;
       Values[0].split(PrefetchSiteStr, ',');
       if (PrefetchSiteStr.size() != 2)
-        return createProfileParseError(Twine("Prefetch site expected: ") +
+        return createProfileParseError(Twine("Prefetch site expected of format '<block-id>,<callsite-id>': ") +
                                        Values[0]);
       auto SiteBBID = parseUniqueBBID(PrefetchSiteStr[0]);
       if (!SiteBBID)
@@ -420,7 +421,7 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
       Values[1].split(PrefetchTargetStr, ',');
       if (PrefetchTargetStr.size() != 3)
         return createProfileParseError(
-            Twine("Prefetch target target expected: ") + Values[1]);
+            Twine("Prefetch target expected of format '<function-name>,<block-id>,<callsite-id>': ") + Values[1]);
       auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[1]);
       if (!TargetBBID)
         return TargetBBID.takeError();
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 75b4bfd35c35f..c49ccd65ba1d7 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -35,6 +35,7 @@
 #include "llvm/InitializePasses.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSymbolELF.h"
+#include "llvm/Object/ELFTypes.h"
 
 using namespace llvm;
 #define DEBUG_TYPE "insert-code-prefetch"
@@ -151,7 +152,7 @@ insertPrefetchHints(MachineFunction &MF,
           // __llvm_prefetch_target_foo_x_y:
           MCSymbolELF *WeakFallbackSym = static_cast<MCSymbolELF *>(
               MF.getContext().getOrCreateSymbol(TargetSymbolName));
-          WeakFallbackSym->setIsWeakref();
+          WeakFallbackSym->setBinding(ELF::STB_WEAK);
           PrefetchInstr->setPostInstrSymbol(MF, WeakFallbackSym);
         }
         ++HintIt;

>From 4a04d20d8a791040f53d231c5c241d91a4bc7669 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 18 Mar 2026 19:56:46 +0000
Subject: [PATCH 19/23] clang-format.

---
 llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 39b93331314a9..e5935c8d2ea8f 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -403,11 +403,15 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
       if (FI == ProgramOptimizationProfile.end())
         continue;
       if (Values.size() != 2)
-        return createProfileParseError(Twine("Prefetch hint expected of format '<prefetch-site> <prefetch-target>': " + S));
+        return createProfileParseError(
+            Twine("Prefetch hint expected of format '<prefetch-site> "
+                  "<prefetch-target>': " +
+                  S));
       SmallVector<StringRef, 2> PrefetchSiteStr;
       Values[0].split(PrefetchSiteStr, ',');
       if (PrefetchSiteStr.size() != 2)
-        return createProfileParseError(Twine("Prefetch site expected of format '<block-id>,<callsite-id>': ") +
+        return createProfileParseError(Twine("Prefetch site expected of format "
+                                             "'<block-id>,<callsite-id>': ") +
                                        Values[0]);
       auto SiteBBID = parseUniqueBBID(PrefetchSiteStr[0]);
       if (!SiteBBID)
@@ -421,7 +425,9 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
       Values[1].split(PrefetchTargetStr, ',');
       if (PrefetchTargetStr.size() != 3)
         return createProfileParseError(
-            Twine("Prefetch target expected of format '<function-name>,<block-id>,<callsite-id>': ") + Values[1]);
+            Twine("Prefetch target expected of format "
+                  "'<function-name>,<block-id>,<callsite-id>': ") +
+            Values[1]);
       auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[1]);
       if (!TargetBBID)
         return TargetBBID.takeError();

>From a509ffe04ce48ccaabfd90967c367f11ab343d56 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <r.lavaee at gmail.com>
Date: Tue, 24 Mar 2026 08:46:24 -0700
Subject: [PATCH 20/23] Fix post-instruction symbol handling

The order of the if statements had been reversed which was incorrect.
---
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 7c3efa64ecbb0..2612e61cf4b8d 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2275,13 +2275,14 @@ void AsmPrinter::emitFunctionBody() {
         handleCallsiteForCallgraph(FuncCGInfo, CallSitesInfoMap, MI);
 
       // If there is a post-instruction symbol, emit a label for it here.
-      if (TM.getTargetTriple().isOSBinFormatELF()) {
-        if (MCSymbol *S = MI.getPostInstrSymbol()) {
+      if (MCSymbol *S = MI.getPostInstrSymbol()) {
+        // The weak symbol attribute used for the prefetch target fallback.
+        if (TM.getTargetTriple().isOSBinFormatELF()) {
           MCSymbolELF *ESym = static_cast<MCSymbolELF *>(S);
           if (ESym->getBinding() == ELF::STB_WEAK)
             OutStreamer->emitSymbolAttribute(S, MCSA_Weak);
-          OutStreamer->emitLabel(S);
         }
+        OutStreamer->emitLabel(S);
       }
 
       for (auto &Handler : Handlers)

>From 5773027e8baf23cb517fb7d3a6204fab440b0f0b Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <r.lavaee at gmail.com>
Date: Wed, 25 Mar 2026 11:17:12 -0700
Subject: [PATCH 21/23] Use llvm:: for the getPrefetchTargetSymbolName function
 definition

---
 llvm/lib/CodeGen/InsertCodePrefetch.cpp | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index c49ccd65ba1d7..53e0aaf5d1089 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -40,10 +40,9 @@
 using namespace llvm;
 #define DEBUG_TYPE "insert-code-prefetch"
 
-namespace llvm {
-SmallString<128> getPrefetchTargetSymbolName(StringRef FunctionName,
-                                             const UniqueBBID &BBID,
-                                             unsigned CallsiteIndex) {
+SmallString<128> llvm::getPrefetchTargetSymbolName(StringRef FunctionName,
+                                                   const UniqueBBID &BBID,
+                                                   unsigned CallsiteIndex) {
   SmallString<128> R("__llvm_prefetch_target_");
   R += FunctionName;
   R += "_";
@@ -52,7 +51,6 @@ SmallString<128> getPrefetchTargetSymbolName(StringRef FunctionName,
   R += utostr(CallsiteIndex);
   return R;
 }
-} // namespace llvm
 
 namespace {
 class InsertCodePrefetch : public MachineFunctionPass {

>From f88f246afc0b33383fe89983b41c2b0e119f8343 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Mon, 30 Mar 2026 18:26:10 +0000
Subject: [PATCH 22/23] Add new header style and set runOnMachineFunction's
 return value.

---
 .../include/llvm/CodeGen/InsertCodePrefetch.h |  7 ++++++-
 llvm/lib/CodeGen/InsertCodePrefetch.cpp       | 21 ++++++++++++-------
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/InsertCodePrefetch.h b/llvm/include/llvm/CodeGen/InsertCodePrefetch.h
index 6bb5b6e55947d..a04bc75e87f5a 100644
--- a/llvm/include/llvm/CodeGen/InsertCodePrefetch.h
+++ b/llvm/include/llvm/CodeGen/InsertCodePrefetch.h
@@ -1,10 +1,15 @@
-//===- InsertCodePrefetch.h - Common utilities for code prefetching      --===//
+//===----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains common utilities for code prefetch insertion.
+///
+//===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CODEGEN_INSERTCODEPREFETCH_H
 #define LLVM_CODEGEN_INSERTCODEPREFETCH_H
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 53e0aaf5d1089..badfdf66ada9e 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -82,8 +82,10 @@ INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass)
 INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion",
                     true, false)
 
-static void setPrefetchTargets(MachineFunction &MF,
+static bool setPrefetchTargets(MachineFunction &MF,
                                const SmallVector<CallsiteID> &PrefetchTargets) {
+  if (PrefetchTargets.empty())
+    return false;
   // Set each block's prefetch targets so AsmPrinter can emit a special symbol
   // there.
   DenseMap<UniqueBBID, SmallVector<unsigned>> PrefetchTargetsByBBID;
@@ -95,11 +97,13 @@ static void setPrefetchTargets(MachineFunction &MF,
     V.erase(llvm::unique(V), V.end());
   }
   MF.setPrefetchTargets(PrefetchTargetsByBBID);
+  return true;
 }
 
-static void
+static bool
 insertPrefetchHints(MachineFunction &MF,
                     const SmallVector<PrefetchHint> &PrefetchHints) {
+  bool PrefetchInserted = false;
   bool IsELF = MF.getTarget().getTargetTriple().isOSBinFormatELF();
   const Module *M = MF.getFunction().getParent();
   DenseMap<UniqueBBID, SmallVector<PrefetchHint>> PrefetchHintsBySiteBBID;
@@ -153,6 +157,7 @@ insertPrefetchHints(MachineFunction &MF,
           WeakFallbackSym->setBinding(ELF::STB_WEAK);
           PrefetchInstr->setPostInstrSymbol(MF, WeakFallbackSym);
         }
+        PrefetchInserted = true;
         ++HintIt;
       }
       if (InstrIt == BB.end())
@@ -162,6 +167,7 @@ insertPrefetchHints(MachineFunction &MF,
       InstrIt = NextInstrIt;
     }
   }
+  return PrefetchInserted;
 }
 
 bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
@@ -172,12 +178,11 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
 
   auto &ProfileReader =
       getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>();
-  setPrefetchTargets(MF,
-                     ProfileReader.getPrefetchTargetsForFunction(MF.getName()));
-  insertPrefetchHints(MF,
-                      ProfileReader.getPrefetchHintsForFunction(MF.getName()));
-
-  return true;
+  bool R = setPrefetchTargets(
+      MF, ProfileReader.getPrefetchTargetsForFunction(MF.getName()));
+  bool S = insertPrefetchHints(
+      MF, ProfileReader.getPrefetchHintsForFunction(MF.getName()));
+  return R || S;
 }
 
 void InsertCodePrefetch::getAnalysisUsage(AnalysisUsage &AU) const {

>From b95027daddc5716d747821885c9fd3b36eb80dd8 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 1 Apr 2026 07:39:31 +0000
Subject: [PATCH 23/23] Fix the defined function target check.

---
 llvm/lib/CodeGen/InsertCodePrefetch.cpp                  | 9 +++++----
 .../CodeGen/X86/basic-block-sections-code-prefetch.ll    | 2 ++
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index badfdf66ada9e..7e06db3974f69 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -127,14 +127,15 @@ insertPrefetchHints(MachineFunction &MF,
     unsigned NumCallsInBB = 0;
     auto InstrIt = BB.begin();
     for (auto HintIt = BBHints.begin(); HintIt != BBHints.end();) {
-      bool TargetFunctionDefined = false;
-      if (Function *TargetFunction = M->getFunction(HintIt->TargetFunction))
-        TargetFunctionDefined = !TargetFunction->isDeclaration();
       auto NextInstrIt = InstrIt == BB.end() ? BB.end() : std::next(InstrIt);
       // Insert all the prefetch hints which must be placed after this call (or
       // at the beginning of the block if `NumCallsInBB` is zero.
       while (HintIt != BBHints.end() &&
-             NumCallsInBB >= HintIt->SiteID.CallsiteIndex) {
+             HintIt->SiteID.CallsiteIndex == NumCallsInBB) {
+        bool TargetFunctionDefined = false;
+        if (Function *TargetFunction = M->getFunction(HintIt->TargetFunction))
+          TargetFunctionDefined = !TargetFunction->isDeclaration();
+
         auto TargetSymbolName = getPrefetchTargetSymbolName(
             HintIt->TargetFunction, HintIt->TargetID.BBID,
             HintIt->TargetID.CallsiteIndex);
diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
index acf6c588fafe8..dec7557181090 100644
--- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
+++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
@@ -12,6 +12,7 @@
 ; RUN: echo 'i 3,0 bar,0,0' >> %t
 ; RUN: echo 'i 2,1 foo,1,0' >> %t
 ; RUN: echo 'i 1,1 extfunc,5,5' >> %t
+; RUN: echo 'i 1,1 bar,0,0' >> %t
 ; RUN: echo 'f bar' >> %t
 ; RUN: echo 't 0,0' >> %t
 ; RUN: echo 't 21,1' >> %t
@@ -39,6 +40,7 @@ cond.true:                                           ; preds = %1
 ; CHECK-NEXT:   prefetchit1     __llvm_prefetch_target_extfunc_5_5(%rip)
 ; CHECK-NEXT:   .weak __llvm_prefetch_target_extfunc_5_5
 ; CHECK-NEXT: __llvm_prefetch_target_extfunc_5_5:
+; CHECK-NEXT:   prefetchit1	__llvm_prefetch_target_bar_0_0(%rip)
 
 cond.false:                                          ; preds = %1
   call i32 @baz()



More information about the llvm-commits mailing list