[llvm] X86: Add prefetch insertion based on Propeller profile (PR #166324)

Rahman Lavaee via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 9 12:47:44 PDT 2026


https://github.com/rlavaee updated https://github.com/llvm/llvm-project/pull/166324

>From 468b6b835936f5b056364b1020d38af4252387e8 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Mon, 2 Mar 2026 22:02:10 +0000
Subject: [PATCH 01/13] [CodeGen] Centralize prefetch target storage in
 MachineFunction

- Move prefetch target callsite indexes from MachineBasicBlock to a
  centralized DenseMap in MachineFunction, renamed to PrefetchTargets.
- Change PrefetchTargets type to DenseMap<UniqueBBID, SmallVector<unsigned>>
  to support both mapped and dangling targets.
- Refactor AsmPrinter to use member functions emitPrefetchTargetSymbol
  and emitDanglingPrefetchTargets for consistent emission.
- Update MIR serialization to use the new PrefetchTargets map and
  rename the YAML field to 'prefetch-targets'.
- Update the MIR format for bb_id to 'bb_id <base-id>, <clone-id>, <callsite-index>'
  for improved readability and to avoid ambiguity with floating point.
- Update related tests and add MIR parsing error test cases.
---
 llvm/include/llvm/CodeGen/MIRParser/MIParser.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/MIRParser/MIParser.h b/llvm/include/llvm/CodeGen/MIRParser/MIParser.h
index 023919ee003c9..a79faa068b09c 100644
--- a/llvm/include/llvm/CodeGen/MIRParser/MIParser.h
+++ b/llvm/include/llvm/CodeGen/MIRParser/MIParser.h
@@ -239,8 +239,10 @@ bool parseVirtualRegisterReference(PerFunctionMIParsingState &PFS,
 bool parseStackObjectReference(PerFunctionMIParsingState &PFS, int &FI,
                                StringRef Src, SMDiagnostic &Error);
 
-bool parsePrefetchTarget(PerFunctionMIParsingState &PFS, CallsiteID &Target,
-                         StringRef Src, SMDiagnostic &Error);
+bool parsePrefetchTarget(PerFunctionMIParsingState &PFS,
+                          CallsiteID &Target, StringRef Src,
+                          SMDiagnostic &Error);
+
 bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node, StringRef Src,
                  SMDiagnostic &Error);
 

>From 857077936af08b81bf29ee56cf9fb0ad63b6b2d5 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Tue, 3 Mar 2026 01:22:12 +0000
Subject: [PATCH 02/13] clang-format.

---
 llvm/include/llvm/CodeGen/MIRParser/MIParser.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/MIRParser/MIParser.h b/llvm/include/llvm/CodeGen/MIRParser/MIParser.h
index a79faa068b09c..011e0fa5d4cdb 100644
--- a/llvm/include/llvm/CodeGen/MIRParser/MIParser.h
+++ b/llvm/include/llvm/CodeGen/MIRParser/MIParser.h
@@ -239,9 +239,8 @@ bool parseVirtualRegisterReference(PerFunctionMIParsingState &PFS,
 bool parseStackObjectReference(PerFunctionMIParsingState &PFS, int &FI,
                                StringRef Src, SMDiagnostic &Error);
 
-bool parsePrefetchTarget(PerFunctionMIParsingState &PFS,
-                          CallsiteID &Target, StringRef Src,
-                          SMDiagnostic &Error);
+bool parsePrefetchTarget(PerFunctionMIParsingState &PFS, CallsiteID &Target,
+                         StringRef Src, SMDiagnostic &Error);
 
 bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node, StringRef Src,
                  SMDiagnostic &Error);

>From e3bec371a2da1fc397b796b7699225561ea3a85f Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Sat, 8 Nov 2025 19:54:21 +0000
Subject: [PATCH 03/13] feat(AsmPrinter): Add support for inserting code
 prefetch instructions.

---
 .../CodeGen/BasicBlockSectionsProfileReader.h | 30 +++++++
 .../include/llvm/CodeGen/InsertCodePrefetch.h | 25 ++++++
 llvm/include/llvm/CodeGen/MachineBasicBlock.h |  2 +
 llvm/include/llvm/CodeGen/Passes.h            |  2 +
 llvm/include/llvm/CodeGen/TargetInstrInfo.h   |  8 ++
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    |  1 +
 llvm/lib/CodeGen/BasicBlockSections.cpp       |  3 +-
 .../BasicBlockSectionsProfileReader.cpp       | 87 +++++++++++++++++--
 llvm/lib/CodeGen/CodeGenPrepare.cpp           |  1 +
 llvm/lib/CodeGen/InsertCodePrefetch.cpp       | 68 ++++++++++++++-
 llvm/lib/Target/X86/X86InstrInfo.cpp          | 20 +++++
 llvm/lib/Target/X86/X86InstrInfo.h            |  4 +
 .../X86/basic-block-sections-code-prefetch.ll |  7 +-
 13 files changed, 246 insertions(+), 12 deletions(-)
 create mode 100644 llvm/include/llvm/CodeGen/InsertCodePrefetch.h

diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 830f99cd661bc..f852fc7216e97 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -71,10 +71,30 @@ struct CFGProfile {
   }
 };
 
+<<<<<<< HEAD
+=======
+// The prefetch symbol is emitted immediately after the call of the given index,
+// in block `BBID` (First call has an index of 1). Zero callsite index means the
+// start of the block.
+struct CallsiteID {
+  UniqueBBID BBID;
+  unsigned CallsiteIndex;
+};
+
+// This represents a prefetch hint to be injected at site `SiteID`, targetting
+// `TargetID` in function `TargetFunction`.
+struct PrefetchHint {
+  CallsiteID SiteID;
+  StringRef TargetFunction;
+  CallsiteID TargetID;
+};
+
+>>>>>>> 5fb1ac189131 (feat(AsmPrinter): Add support for inserting code prefetch instructions.)
 // This struct represents the raw optimization profile for a function,
 // including CFG data (block and edge counts) and layout directives (clustering
 // and cloning paths).
 struct FunctionOptimizationProfile {
+// This represents the raw input profile for one function.
   // BB Cluster information specified by `UniqueBBID`s.
   SmallVector<BBClusterInfo> ClusterInfo;
   // Paths to clone. A path a -> b -> c -> d implies cloning b, c, and d along
@@ -86,6 +106,9 @@ struct FunctionOptimizationProfile {
   // Code prefetch targets, specified by the callsite ID. The target is the code
   // immediately following this callsite.
   SmallVector<CallsiteID> PrefetchTargets;
+  // Code prefetch hints, specified by the injection site ID, the target
+  // function and the target site ID.
+  SmallVector<PrefetchHint> PrefetchHints;
   // Node counts for each basic block.
   DenseMap<UniqueBBID, uint64_t> NodeCounts;
   // Edge counts for each edge.
@@ -134,6 +157,10 @@ class BasicBlockSectionsProfileReader {
   SmallVector<CallsiteID>
   getPrefetchTargetsForFunction(StringRef FuncName) const;
 
+  // Returns the prefetch hints to be injected in function `FuncName`.
+  SmallVector<PrefetchHint>
+  getPrefetchHintsForFunction(StringRef FuncName) const;
+
 private:
   StringRef getAliasName(StringRef FuncName) const {
     auto R = FuncAliasMap.find(FuncName);
@@ -240,6 +267,9 @@ class BasicBlockSectionsProfileReaderWrapperPass : public ImmutablePass {
   SmallVector<CallsiteID>
   getPrefetchTargetsForFunction(StringRef FuncName) const;
 
+  SmallVector<PrefetchHint>
+  getPrefetchHintsForFunction(StringRef FuncName) const;
+
   // Initializes the FunctionNameToDIFilename map for the current module and
   // then reads the profile for the matching functions.
   bool doInitialization(Module &M) override;
diff --git a/llvm/include/llvm/CodeGen/InsertCodePrefetch.h b/llvm/include/llvm/CodeGen/InsertCodePrefetch.h
new file mode 100644
index 0000000000000..99241248862d3
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/InsertCodePrefetch.h
@@ -0,0 +1,25 @@
+//===- BasicBlockSectionUtils.h - Utilities for basic block sections     --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_INSERTCODEPREFETCH_H
+#define LLVM_CODEGEN_INSERTCODEPREFETCH_H
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/UniqueBBID.h"
+
+namespace llvm {
+
+SmallString<128> getPrefetchTargetSymbolName(StringRef FunctionName,
+                                             const UniqueBBID &BBID,
+                                             unsigned SubblockIndex);
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_INSERTCODEPREFETCH_H
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 029e3695b2cda..739c4b7dee07b 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -215,6 +215,8 @@ class MachineBasicBlock
   /// basic block sections and basic block labels.
   std::optional<UniqueBBID> BBID;
 
+  SmallVector<unsigned> PrefetchTargets;
+
   /// With basic block sections, this stores the Section ID of the basic block.
   MBBSectionID SectionID{0};
 
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index 6f7c3bd177cb0..a6fd7e7524638 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -73,6 +73,8 @@ LLVM_ABI MachineFunctionPass *createBasicBlockPathCloningPass();
 /// and inference when using propeller.
 LLVM_ABI MachineFunctionPass *createBasicBlockMatchingAndInferencePass();
 
+/// createInsertCodePrefetchPass - This pass enables inserting code prefetch
+/// hints based on the basic block section profile.
 LLVM_ABI MachineFunctionPass *createInsertCodePrefetchPass();
 
 /// createMachineBlockHashInfoPass - This pass computes basic block hashes.
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index ed968d7bd4593..d5b7bd5f928d2 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -2370,6 +2370,14 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo {
     llvm_unreachable("unknown number of operands necessary");
   }
 
+  /// Inserts a code prefetch instruction before `InsertBefore` in block `MBB`
+  /// targetting `GV`.
+  virtual bool insertCodePrefetchInstr(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator InsertBefore,
+                                       const GlobalValue *GV) const {
+    return false;
+  }
+
 private:
   mutable std::unique_ptr<MIRFormatter> Formatter;
   unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode;
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index c77d2425d4ff4..204347d0ad395 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -39,6 +39,7 @@
 #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
 #include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/CodeGen/InsertCodePrefetch.h"
 #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockHashInfo.h"
diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp
index 583e755fc1e15..7a532b44c8c10 100644
--- a/llvm/lib/CodeGen/BasicBlockSections.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSections.cpp
@@ -108,7 +108,8 @@ class BasicBlockSections : public MachineFunctionPass {
 public:
   static char ID;
 
-  BasicBlockSectionsProfileReaderWrapperPass *BBSectionsProfileReader = nullptr;
+  // BasicBlockSectionsProfileReaderWrapperPass *BBSectionsProfileReader =
+  // nullptr;
 
   BasicBlockSections() : MachineFunctionPass(ID) {}
 
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 693a9bdf74e60..45942deacc05c 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -101,6 +101,14 @@ BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction(
                                                : SmallVector<CallsiteID>();
 }
 
+SmallVector<PrefetchHint>
+BasicBlockSectionsProfileReader::getPrefetchHintsForFunction(
+    StringRef FuncName) const {
+  auto R = ProgramOptimizationProfile.find(getAliasName(FuncName));
+  return R != ProgramOptimizationProfile.end() ? R->second.PrefetchHints
+                                               : SmallVector<PrefetchHint>();
+}
+
 // Reads the version 1 basic block sections profile. Profile for each function
 // is encoded as follows:
 //   m <module_name>
@@ -157,35 +165,53 @@ BasicBlockSectionsProfileReader::getPrefetchTargetsForFunction(
 //                                ....
 // ****************************************************************************
 // This profile can also specify prefetch targets (starting with 't') which
-// instruct the compiler to emit a prefetch symbol for the given target.
+// instruct the compiler to emit a prefetch symbol for the given target and
+// prefetch hints (starting with 'i') which instruct the compiler to insert a
+// prefetch hint instruction at the given site for the given target.
+//
 // A prefetch target is specified by a pair "<bbid>,<subblock_index>" where
 // bbid specifies the target basic block and subblock_index is a zero-based
-// index. Subblock 0 refers to the region at the beginning of the block up to
-// the first callsite. Subblock `i > 0` refers to the region immediately after
+// index. Callsite 0 refers to the region at the beginning of the block up to
+// the first callsite. Callsite `i > 0` refers to the region immediately after
 // the `i`-th callsite up to the `i+1`-th callsite (or the end of the block).
 // The prefetch target is always emitted at the beginning of the subblock.
 // This is the beginning of the basic block for `i = 0` and immediately after
 // the `i`-th call for every `i > 0`.
 //
+// A prefetch int is specified by a pair "site target", where site is
+// specified as a pair "<bbid>,<callsite_index>" similar to prefetch
+// targets, and target is specified as a triple
+// "<function_name>,<bbid>,<callsite_index>".
+//
 // Example: A basic block in function "foo" with BBID 10 and two call
 // instructions (call_A, call_B). This block is conceptually split into
-// subblocks, with the prefetch target symbol emitted at the beginning of each
-// subblock.
+// subblocks, with the prefetch target symbol emitted at the beginning of
+// each subblock.
 //
 // +----------------------------------+
-// | __llvm_prefetch_target_foo_10_0: | <- Subblock 0 (before call_A)
+// | __llvm_prefetch_target_foo_10_0: | <- Callsite 0 (before call_A)
 // |  Instruction 1                   |
 // |  Instruction 2                   |
 // |  call_A (Callsite 0)             |
-// | __llvm_prefetch_target_foo_10_1: | <--- Subblock 1 (after call_A,
+// | __llvm_prefetch_target_foo_10_1: | <--- Callsite 1 (after call_A,
 // |                                  |                  before call_B)
 // |  Instruction 3                   |
 // |  call_B (Callsite 1)             |
-// | __llvm_prefetch_target_foo_10_2: | <--- Subblock 2 (after call_B,
+// | __llvm_prefetch_target_foo_10_2: | <--- Callsite 2 (after call_B,
 // |                                  |                  before call_C)
 // |  Instruction 4                   |
 // +----------------------------------+
 //
+// A prefetch hint specified in function "bar" as "120,1 foo,10,2" results
+// in a a hint inserted after the first call in block #120 of bar:
+// B
+// +----------------------------------------------------+
+// | Instruction 1                                      |
+// | call_C (Callsite 1)                                |
+// | code_prefetch __llvm_prfetch_target_foo_10         |
+// | Instruction 2                                      |
+// +----------------------------------------------------+
+//
 Error BasicBlockSectionsProfileReader::ReadV1Profile() {
   auto FI = ProgramOptimizationProfile.end();
 
@@ -367,6 +393,45 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() {
           CallsiteID{*TargetBBID, static_cast<unsigned>(CallsiteIndex)});
       continue;
     }
+
+    case 'i': { // Prefetch hint specifier.
+      // Skip the profile when we the profile iterator (FI) refers to the
+      // past-the-end element.
+      if (FI == ProgramOptimizationProfile.end())
+        continue;
+      if (Values.size() != 2)
+        return createProfileParseError(Twine("Prefetch hint expected: " + S));
+      SmallVector<StringRef, 2> PrefetchSiteStr;
+      Values[0].split(PrefetchSiteStr, ',');
+      if (PrefetchSiteStr.size() != 2)
+        return createProfileParseError(Twine("Prefetch site expected: ") +
+                                       Values[0]);
+      auto SiteBBID = parseUniqueBBID(PrefetchSiteStr[0]);
+      if (!SiteBBID)
+        return SiteBBID.takeError();
+      unsigned long long SiteCallsiteIndex;
+      if (getAsUnsignedInteger(PrefetchSiteStr[1], 10, SiteCallsiteIndex))
+        return createProfileParseError(Twine("unsigned integer expected: '") +
+                                       PrefetchSiteStr[1]);
+
+      SmallVector<StringRef, 3> PrefetchTargetStr;
+      Values[1].split(PrefetchTargetStr, ',');
+      if (PrefetchTargetStr.size() != 3)
+        return createProfileParseError(
+            Twine("Prefetch target target expected: ") + Values[1]);
+      auto TargetBBID = parseUniqueBBID(PrefetchTargetStr[1]);
+      if (!TargetBBID)
+        return TargetBBID.takeError();
+      unsigned long long TargetCallsiteIndex;
+      if (getAsUnsignedInteger(PrefetchTargetStr[2], 10, TargetCallsiteIndex))
+        return createProfileParseError(Twine("unsigned integer expected: '") +
+                                       PrefetchTargetStr[2]);
+      FI->second.PrefetchHints.push_back(PrefetchHint{
+          CallsiteID{*SiteBBID, static_cast<unsigned>(SiteCallsiteIndex)},
+          PrefetchTargetStr[0],
+          CallsiteID{*TargetBBID, static_cast<unsigned>(TargetCallsiteIndex)}});
+      continue;
+    }
     default:
       return createProfileParseError(Twine("invalid specifier: '") +
                                      Twine(Specifier) + "'");
@@ -585,6 +650,12 @@ BasicBlockSectionsProfileReaderWrapperPass::getPrefetchTargetsForFunction(
   return BBSPR.getPrefetchTargetsForFunction(FuncName);
 }
 
+SmallVector<PrefetchHint>
+BasicBlockSectionsProfileReaderWrapperPass::getPrefetchHintsForFunction(
+    StringRef FuncName) const {
+  return BBSPR.getPrefetchHintsForFunction(FuncName);
+}
+
 BasicBlockSectionsProfileReader &
 BasicBlockSectionsProfileReaderWrapperPass::getBBSPR() {
   return BBSPR;
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 537b91bc39e0e..50904cc30bc98 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -22,6 +22,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/FloatingPointPredicateUtils.h"
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index cd220347257a8..87e2c776af19e 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -14,9 +14,13 @@
 /// be the beginning of any dynamic basic block, that is the beginning of a
 /// machine basic block, or immediately after a callsite. A global symbol is
 /// emitted at the position of the target so it can be addressed from the
-/// prefetch instruction from any module.
+/// prefetch instruction from any module. In order to insert prefetch hints,
+/// `TargetInstrInfo::insertCodePrefetchInstr` must be implemented by the
+/// target.
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/InsertCodePrefetch.h"
+
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
@@ -27,11 +31,26 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/InitializePasses.h"
 
 using namespace llvm;
 #define DEBUG_TYPE "insert-code-prefetch"
 
+namespace llvm {
+SmallString<128> getPrefetchTargetSymbolName(StringRef FunctionName,
+                                             const UniqueBBID &BBID,
+                                             unsigned CallsiteIndex) {
+  SmallString<128> R("__llvm_prefetch_target_");
+  R += FunctionName;
+  R += "_";
+  R += utostr(BBID.BaseID);
+  R += "_";
+  R += utostr(CallsiteIndex);
+  return R;
+}
+} // namespace llvm
+
 namespace {
 class InsertCodePrefetch : public MachineFunctionPass {
 public:
@@ -81,7 +100,52 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
     V.erase(llvm::unique(V), V.end());
   }
   MF.setPrefetchTargets(PrefetchTargetsByBBID);
-  return false;
+
+  SmallVector<PrefetchHint> PrefetchHints =
+      getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
+          .getPrefetchHintsForFunction(MF.getName());
+  DenseMap<UniqueBBID, SmallVector<PrefetchHint>> PrefetchHintsBySiteBBID;
+  for (const auto &H : PrefetchHints)
+    PrefetchHintsBySiteBBID[H.SiteID.BBID].push_back(H);
+  // Sort prefetch hints by their callsite index so we can insert them by one
+  // pass over the block's instructions.
+  for (auto &[SiteBBID, Hints] : PrefetchHintsBySiteBBID) {
+    llvm::sort(Hints, [](const PrefetchHint &H1, const PrefetchHint &H2) {
+      return H1.SiteID.CallsiteIndex < H2.SiteID.CallsiteIndex;
+    });
+  }
+  auto PtrTy =
+      PointerType::getUnqual(MF.getFunction().getParent()->getContext());
+  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+  for (auto &BB : MF) {
+    auto It = PrefetchHintsBySiteBBID.find(*BB.getBBID());
+    if (It == PrefetchHintsBySiteBBID.end())
+      continue;
+    const auto &PrefetchHints = It->second;
+    unsigned NumCallsInBB = 0;
+    auto InstrIt = BB.begin();
+    for (auto HintIt = PrefetchHints.begin(); HintIt != PrefetchHints.end();) {
+      auto NextInstrIt = InstrIt == BB.end() ? BB.end() : std::next(InstrIt);
+      // Insert all the prefetch hints which must be placed after this call (or
+      // at the beginning of the block if `NumCallsInBB` is zero.
+      while (HintIt != PrefetchHints.end() &&
+             NumCallsInBB >= HintIt->SiteID.CallsiteIndex) {
+        auto *GV = MF.getFunction().getParent()->getOrInsertGlobal(
+            getPrefetchTargetSymbolName(HintIt->TargetFunction,
+                                        HintIt->TargetID.BBID,
+                                        HintIt->TargetID.CallsiteIndex),
+            PtrTy);
+        TII->insertCodePrefetchInstr(BB, InstrIt, GV);
+        ++HintIt;
+      }
+      if (InstrIt == BB.end())
+        break;
+      if (InstrIt->isCall())
+        ++NumCallsInBB;
+      InstrIt = NextInstrIt;
+    }
+  }
+  return true;
 }
 
 void InsertCodePrefetch::getAnalysisUsage(AnalysisUsage &AU) const {
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 2479a8dccfb00..edcc074f2db87 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -10785,5 +10785,25 @@ void X86InstrInfo::getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops,
   M.getFullAddress(Ops);
 }
 
+bool X86InstrInfo::insertCodePrefetchInstr(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
+    const GlobalValue *GV) const {
+  MachineFunction &MF = *MBB.getParent();
+  MachineInstr *PrefetchInstr = MF.CreateMachineInstr(
+      get(X86::PREFETCHIT1),
+      InsertBefore == MBB.instr_end() ? MBB.findPrevDebugLoc(InsertBefore)
+                                      : InsertBefore->getDebugLoc(),
+      true);
+  MachineInstrBuilder MIB(MF, PrefetchInstr);
+  MIB.addMemOperand(MF.getMachineMemOperand(MachinePointerInfo(GV),
+                                            MachineMemOperand::MOLoad, /*s=*/8,
+                                            /*base_alignment=*/llvm::Align(1)));
+  MIB.addReg(X86::RIP).addImm(1).addReg(X86::NoRegister);
+  MIB.addGlobalAddress(GV);
+  MIB.addReg(X86::NoRegister);
+  MBB.insert(InsertBefore, PrefetchInstr);
+  return true;
+}
+
 #define GET_INSTRINFO_HELPERS
 #include "X86GenInstrInfo.inc"
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index de8ccb44578a3..e0638d75ae295 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -766,6 +766,10 @@ class X86InstrInfo final : public X86GenInstrInfo {
   /// \returns the index of operand that is commuted with \p Idx1. If the method
   /// fails to commute the operands, it will return \p Idx1.
   unsigned commuteOperandsForFold(MachineInstr &MI, unsigned Idx1) const;
+
+  bool insertCodePrefetchInstr(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator InsertBefore,
+                               const GlobalValue *GV) const override;
 };
 } // namespace llvm
 
diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
index bef121aecf483..d8559c317c32e 100644
--- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
+++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
@@ -9,9 +9,12 @@
 ; RUN: echo 't 2,1' >> %t
 ; RUN: echo 't 3,0' >> %t
 ; RUN: echo 't 3,1' >> %t
+; RUN: echo 'i 3,0 bar,0,0' >> %t
+; RUN: echo 'i 2,1 foo,1,0' >> %t
 ; RUN: echo 'f bar' >> %t
 ; RUN: echo 't 0,0' >> %t
 ; RUN: echo 't 21,1' >> %t
+; RUN: echo 'i 0,1 foo,0,0' >> %t
 ; RUN: echo 'f qux' >> %t
 ; RUN: echo 't 0,0' >> %t
 ; RUN: echo 't 0,1' >> %t
@@ -39,6 +42,7 @@ cond.false:                                          ; preds = %1
 ; CHECK:        callq baz at PLT
 ; CHECK-NEXT:   .globl __llvm_prefetch_target_foo_2_1
 ; CHECK-NEXT: __llvm_prefetch_target_foo_2_1:
+; CHECK-NEXT:   prefetchit1	__llvm_prefetch_target_foo_1_0(%rip)
 
 end:                                             ; preds = %11, %9
   ret void
@@ -47,6 +51,7 @@ end:                                             ; preds = %11, %9
 ; CHECK-NEXT: __llvm_prefetch_target_foo_3_0:
 ; CHECK:        .globl	__llvm_prefetch_target_foo_3_1
 ; CHECK-NEXT: __llvm_prefetch_target_foo_3_1:
+; CHECK-NEXT:   prefetchit1	__llvm_prefetch_target_bar_0_0(%rip)
 }
 
 define weak i32 @bar() nounwind {
@@ -58,6 +63,7 @@ define weak i32 @bar() nounwind {
 ; CHECK-NEXT:   .weak __llvm_prefetch_target_bar_0_0
 ; CHECK-NEXT: __llvm_prefetch_target_bar_0_0:
 ; CHECK:        callq baz at PLT
+; CHECK-NEXT:   prefetchit1	__llvm_prefetch_target_foo_0_0(%rip)
 }
 
 define internal i32 @qux() nounwind {
@@ -72,4 +78,3 @@ define internal i32 @qux() nounwind {
 }
 
 declare i32 @baz()
-declare i32 @dummy()

>From 79f337c8cf2b6d294c1d6a424e01f3519b3b8574 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Tue, 6 Jan 2026 08:08:05 +0000
Subject: [PATCH 04/13] Update tests.

---
 ...asic-block-sections-code-prefetch-call-terminates-block.ll | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch-call-terminates-block.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch-call-terminates-block.ll
index bf98076770b5b..b3c8069d96f03 100644
--- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch-call-terminates-block.ll
+++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch-call-terminates-block.ll
@@ -1,9 +1,10 @@
-;; Check prefetch directives properly handle a block terminating with a call.
+;; Check prefetch directives properly handles prefetch targets and instructions after a call which terminates a block.
 ;;
 ;; Specify the bb sections profile:
 ; RUN: echo 'v1' > %t
 ; RUN: echo 'f foo' >> %t
 ; RUN: echo 't 0,1' >> %t
+; RUN: echo 'i 0,1 other,30,30' >> %t
 ;;
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -asm-verbose=false -function-sections -basic-block-sections=%t -O1 | FileCheck %s
 
@@ -15,6 +16,7 @@ entry:
 ; CHECK:        callq explode at PLT
 ; CHECK-NEXT:   .globl __llvm_prefetch_target_foo_0_1
 ; CHECK-NEXT: __llvm_prefetch_target_foo_0_1:
+; CHECK-NEXT:   prefetchit1 __llvm_prefetch_target_other_30_30(%rip)
 
 continue:
   ret i32 0

>From c312e726ea59745728b6d91d0bc6676019ba94de Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Tue, 6 Jan 2026 08:13:04 +0000
Subject: [PATCH 05/13] clang-format.

---
 llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index f852fc7216e97..18c4e94bb1b5b 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -94,7 +94,7 @@ struct PrefetchHint {
 // including CFG data (block and edge counts) and layout directives (clustering
 // and cloning paths).
 struct FunctionOptimizationProfile {
-// This represents the raw input profile for one function.
+  // This represents the raw input profile for one function.
   // BB Cluster information specified by `UniqueBBID`s.
   SmallVector<BBClusterInfo> ClusterInfo;
   // Paths to clone. A path a -> b -> c -> d implies cloning b, c, and d along

>From e9cb9bf7d688a2ee0ca3b6a1c9097fa08a3a7ec0 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Tue, 6 Jan 2026 08:28:28 +0000
Subject: [PATCH 06/13] Remove unwanted changes.

---
 llvm/lib/CodeGen/BasicBlockSections.cpp              |  7 +++----
 llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp | 10 +++++-----
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp
index 7a532b44c8c10..1f82ba7e0821e 100644
--- a/llvm/lib/CodeGen/BasicBlockSections.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSections.cpp
@@ -108,10 +108,9 @@ class BasicBlockSections : public MachineFunctionPass {
 public:
   static char ID;
 
-  // BasicBlockSectionsProfileReaderWrapperPass *BBSectionsProfileReader =
-  // nullptr;
-
-  BasicBlockSections() : MachineFunctionPass(ID) {}
+  BasicBlockSections() : MachineFunctionPass(ID) {
+    initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry());
+  }
 
   StringRef getPassName() const override {
     return "Basic Block Sections Analysis";
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 45942deacc05c..0da1adc08f0a0 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -171,8 +171,8 @@ BasicBlockSectionsProfileReader::getPrefetchHintsForFunction(
 //
 // A prefetch target is specified by a pair "<bbid>,<subblock_index>" where
 // bbid specifies the target basic block and subblock_index is a zero-based
-// index. Callsite 0 refers to the region at the beginning of the block up to
-// the first callsite. Callsite `i > 0` refers to the region immediately after
+// index. Subblock 0 refers to the region at the beginning of the block up to
+// the first callsite. Subblock `i > 0` refers to the region immediately after
 // the `i`-th callsite up to the `i+1`-th callsite (or the end of the block).
 // The prefetch target is always emitted at the beginning of the subblock.
 // This is the beginning of the basic block for `i = 0` and immediately after
@@ -189,15 +189,15 @@ BasicBlockSectionsProfileReader::getPrefetchHintsForFunction(
 // each subblock.
 //
 // +----------------------------------+
-// | __llvm_prefetch_target_foo_10_0: | <- Callsite 0 (before call_A)
+// | __llvm_prefetch_target_foo_10_0: | <--- Subblock 0 (before call_A)
 // |  Instruction 1                   |
 // |  Instruction 2                   |
 // |  call_A (Callsite 0)             |
-// | __llvm_prefetch_target_foo_10_1: | <--- Callsite 1 (after call_A,
+// | __llvm_prefetch_target_foo_10_1: | <--- Subblock 1 (after call_A,
 // |                                  |                  before call_B)
 // |  Instruction 3                   |
 // |  call_B (Callsite 1)             |
-// | __llvm_prefetch_target_foo_10_2: | <--- Callsite 2 (after call_B,
+// | __llvm_prefetch_target_foo_10_2: | <--- Subblock 2 (after call_B,
 // |                                  |                  before call_C)
 // |  Instruction 4                   |
 // +----------------------------------+

>From 8ac63b928bc29e2a87cbcbf4af5d99ece73c709f Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 4 Mar 2026 22:50:41 +0000
Subject: [PATCH 07/13] Additional changes.

---
 .../CodeGen/BasicBlockSectionsProfileReader.h | 19 -------------------
 llvm/include/llvm/Support/UniqueBBID.h        |  8 ++++++++
 .../X86/basic-block-sections-code-prefetch.ll |  2 +-
 3 files changed, 9 insertions(+), 20 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index 18c4e94bb1b5b..c4b2fd59793bd 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -71,25 +71,6 @@ struct CFGProfile {
   }
 };
 
-<<<<<<< HEAD
-=======
-// The prefetch symbol is emitted immediately after the call of the given index,
-// in block `BBID` (First call has an index of 1). Zero callsite index means the
-// start of the block.
-struct CallsiteID {
-  UniqueBBID BBID;
-  unsigned CallsiteIndex;
-};
-
-// This represents a prefetch hint to be injected at site `SiteID`, targetting
-// `TargetID` in function `TargetFunction`.
-struct PrefetchHint {
-  CallsiteID SiteID;
-  StringRef TargetFunction;
-  CallsiteID TargetID;
-};
-
->>>>>>> 5fb1ac189131 (feat(AsmPrinter): Add support for inserting code prefetch instructions.)
 // This struct represents the raw optimization profile for a function,
 // including CFG data (block and edge counts) and layout directives (clustering
 // and cloning paths).
diff --git a/llvm/include/llvm/Support/UniqueBBID.h b/llvm/include/llvm/Support/UniqueBBID.h
index 23a9467534318..bee4cf9158f45 100644
--- a/llvm/include/llvm/Support/UniqueBBID.h
+++ b/llvm/include/llvm/Support/UniqueBBID.h
@@ -33,6 +33,14 @@ struct CallsiteID {
   unsigned CallsiteIndex;
 };
 
+// This represents a prefetch hint to be injected at site `SiteID`, targetting
+// `TargetID` in function `TargetFunction`.
+struct PrefetchHint {
+  CallsiteID SiteID;
+  StringRef TargetFunction;
+  CallsiteID TargetID;
+};
+
 // Provides DenseMapInfo for UniqueBBID.
 template <> struct DenseMapInfo<UniqueBBID> {
   static inline UniqueBBID getEmptyKey() {
diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
index d8559c317c32e..b530846f02a4a 100644
--- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
+++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
@@ -49,9 +49,9 @@ end:                                             ; preds = %11, %9
 ; CHECK:      .LBB0_3:
 ; CHECK-NEXT:   .globl	__llvm_prefetch_target_foo_3_0
 ; CHECK-NEXT: __llvm_prefetch_target_foo_3_0:
+; CHECK-NEXT:   prefetchit1	__llvm_prefetch_target_bar_0_0(%rip)
 ; CHECK:        .globl	__llvm_prefetch_target_foo_3_1
 ; CHECK-NEXT: __llvm_prefetch_target_foo_3_1:
-; CHECK-NEXT:   prefetchit1	__llvm_prefetch_target_bar_0_0(%rip)
 }
 
 define weak i32 @bar() nounwind {

>From 4390ae1edd160d2620ffdbe1c18a1710ba72932d Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 4 Mar 2026 23:07:44 +0000
Subject: [PATCH 08/13] Split setPrefetchTargets and insertPrefetchHints into
 separate functions.

---
 llvm/lib/CodeGen/InsertCodePrefetch.cpp | 36 +++++++++++++++----------
 1 file changed, 22 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 87e2c776af19e..880c4bec8451d 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -81,16 +81,10 @@ INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReaderWrapperPass)
 INITIALIZE_PASS_END(InsertCodePrefetch, DEBUG_TYPE, "Code prefetch insertion",
                     true, false)
 
-bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
-  assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List &&
-         "BB Sections list not enabled!");
-  if (hasInstrProfHashMismatch(MF))
-    return false;
+static void setPrefetchTargets(MachineFunction &MF,
+                               const SmallVector<CallsiteID> &PrefetchTargets) {
   // Set each block's prefetch targets so AsmPrinter can emit a special symbol
   // there.
-  SmallVector<CallsiteID> PrefetchTargets =
-      getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
-          .getPrefetchTargetsForFunction(MF.getName());
   DenseMap<UniqueBBID, SmallVector<unsigned>> PrefetchTargetsByBBID;
   for (const auto &Target : PrefetchTargets)
     PrefetchTargetsByBBID[Target.BBID].push_back(Target.CallsiteIndex);
@@ -100,10 +94,11 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
     V.erase(llvm::unique(V), V.end());
   }
   MF.setPrefetchTargets(PrefetchTargetsByBBID);
+}
 
-  SmallVector<PrefetchHint> PrefetchHints =
-      getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>()
-          .getPrefetchHintsForFunction(MF.getName());
+static void
+insertPrefetchHints(MachineFunction &MF,
+                    const SmallVector<PrefetchHint> &PrefetchHints) {
   DenseMap<UniqueBBID, SmallVector<PrefetchHint>> PrefetchHintsBySiteBBID;
   for (const auto &H : PrefetchHints)
     PrefetchHintsBySiteBBID[H.SiteID.BBID].push_back(H);
@@ -121,14 +116,14 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
     auto It = PrefetchHintsBySiteBBID.find(*BB.getBBID());
     if (It == PrefetchHintsBySiteBBID.end())
       continue;
-    const auto &PrefetchHints = It->second;
+    const auto &BBHints = It->second;
     unsigned NumCallsInBB = 0;
     auto InstrIt = BB.begin();
-    for (auto HintIt = PrefetchHints.begin(); HintIt != PrefetchHints.end();) {
+    for (auto HintIt = BBHints.begin(); HintIt != BBHints.end();) {
       auto NextInstrIt = InstrIt == BB.end() ? BB.end() : std::next(InstrIt);
       // Insert all the prefetch hints which must be placed after this call (or
       // at the beginning of the block if `NumCallsInBB` is zero.
-      while (HintIt != PrefetchHints.end() &&
+      while (HintIt != BBHints.end() &&
              NumCallsInBB >= HintIt->SiteID.CallsiteIndex) {
         auto *GV = MF.getFunction().getParent()->getOrInsertGlobal(
             getPrefetchTargetSymbolName(HintIt->TargetFunction,
@@ -145,6 +140,19 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
       InstrIt = NextInstrIt;
     }
   }
+}
+
+bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
+  assert(MF.getTarget().getBBSectionsType() == BasicBlockSection::List &&
+         "BB Sections list not enabled!");
+  if (hasInstrProfHashMismatch(MF))
+    return false;
+
+  auto &ProfileReader =
+      getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>();
+  setPrefetchTargets(MF, ProfileReader.getPrefetchTargetsForFunction(MF.getName()));
+  insertPrefetchHints(MF, ProfileReader.getPrefetchHintsForFunction(MF.getName()));
+
   return true;
 }
 

>From 235c35163b6de12c5a359a7fdb6cceae8258b059 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 4 Mar 2026 23:07:57 +0000
Subject: [PATCH 09/13] clang-format.

---
 llvm/lib/CodeGen/InsertCodePrefetch.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 880c4bec8451d..6f6d6efdc49c3 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -150,8 +150,10 @@ bool InsertCodePrefetch::runOnMachineFunction(MachineFunction &MF) {
 
   auto &ProfileReader =
       getAnalysis<BasicBlockSectionsProfileReaderWrapperPass>();
-  setPrefetchTargets(MF, ProfileReader.getPrefetchTargetsForFunction(MF.getName()));
-  insertPrefetchHints(MF, ProfileReader.getPrefetchHintsForFunction(MF.getName()));
+  setPrefetchTargets(MF,
+                     ProfileReader.getPrefetchTargetsForFunction(MF.getName()));
+  insertPrefetchHints(MF,
+                      ProfileReader.getPrefetchHintsForFunction(MF.getName()));
 
   return true;
 }

>From eb4169916bde2572f611d24c24060b3ea159d0c3 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 5 Mar 2026 18:46:07 +0000
Subject: [PATCH 10/13] Implement fallback support.

---
 llvm/include/llvm/CodeGen/AsmPrinter.h        |  2 +-
 llvm/include/llvm/CodeGen/TargetInstrInfo.h   |  4 ++--
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    | 17 +++++++------
 llvm/lib/CodeGen/InsertCodePrefetch.cpp       | 24 +++++++++++++++----
 llvm/lib/Target/X86/X86InstrInfo.cpp          |  4 ++--
 llvm/lib/Target/X86/X86InstrInfo.h            |  2 +-
 .../X86/basic-block-sections-code-prefetch.ll |  4 ++++
 7 files changed, 40 insertions(+), 17 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h
index b160de2f060d6..daffdc5f41bd5 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -491,7 +491,7 @@ class LLVM_ABI AsmPrinter : public MachineFunctionPass {
   /// Helper to emit a symbol for the prefetch target associated with the given
   /// BBID and callsite index. The symbol is emitted as a label and its linkage
   /// is set based on the function's linkage.
-  void emitPrefetchTargetSymbol(unsigned BaseID, unsigned CallsiteIndex);
+  void emitPrefetchTargetSymbol(const UniqueBBID &BBID, unsigned CallsiteIndex);
 
   /// Emit prefetch targets that were not mapped to any basic block. These
   /// targets are emitted at the beginning of the function body.
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index d5b7bd5f928d2..c2d879a177a00 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -2372,10 +2372,10 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo {
 
   /// Inserts a code prefetch instruction before `InsertBefore` in block `MBB`
   /// targetting `GV`.
-  virtual bool insertCodePrefetchInstr(MachineBasicBlock &MBB,
+  virtual MachineInstr* insertCodePrefetchInstr(MachineBasicBlock &MBB,
                                        MachineBasicBlock::iterator InsertBefore,
                                        const GlobalValue *GV) const {
-    return false;
+    llvm_unreachable("target did not implement");
   }
 
 private:
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 204347d0ad395..10c17ead558de 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2020,13 +2020,12 @@ void AsmPrinter::handleCallsiteForCallgraph(
 
 /// Helper to emit a symbol for the prefetch target associated with the given
 /// BBID and callsite index.
-void AsmPrinter::emitPrefetchTargetSymbol(unsigned BaseID,
+void AsmPrinter::emitPrefetchTargetSymbol(const UniqueBBID &BBID,
                                           unsigned CallsiteIndex) {
   SmallString<128> FunctionName;
   getNameWithPrefix(FunctionName, &MF->getFunction());
   MCSymbol *PrefetchTargetSymbol = OutContext.getOrCreateSymbol(
-      "__llvm_prefetch_target_" + FunctionName + "_" + Twine(BaseID) + "_" +
-      Twine(CallsiteIndex));
+      getPrefetchTargetSymbolName(FunctionName, BBID, CallsiteIndex));
   // If the function is weak-linkage it may be replaced by a strong
   // version, in which case the prefetch targets should also be replaced.
   OutStreamer->emitSymbolAttribute(
@@ -2050,7 +2049,7 @@ void AsmPrinter::emitDanglingPrefetchTargets() {
     if (MFBBIDs.contains(BBID))
       continue;
     for (unsigned CallsiteIndex : CallsiteIndexes)
-      emitPrefetchTargetSymbol(BBID.BaseID, CallsiteIndex);
+      emitPrefetchTargetSymbol(BBID, CallsiteIndex);
   }
 }
 
@@ -2125,7 +2124,7 @@ void AsmPrinter::emitFunctionBody() {
     for (auto &MI : MBB) {
       if (PrefetchTargetIt != PrefetchTargetEnd &&
           *PrefetchTargetIt == LastCallsiteIndex) {
-        emitPrefetchTargetSymbol(MBB.getBBID()->BaseID, *PrefetchTargetIt);
+        emitPrefetchTargetSymbol(*MBB.getBBID(), *PrefetchTargetIt);
         ++PrefetchTargetIt;
       }
 
@@ -2276,8 +2275,12 @@ void AsmPrinter::emitFunctionBody() {
         handleCallsiteForCallgraph(FuncCGInfo, CallSitesInfoMap, MI);
 
       // If there is a post-instruction symbol, emit a label for it here.
-      if (MCSymbol *S = MI.getPostInstrSymbol())
+      if (MCSymbol *S = MI.getPostInstrSymbol()) {
+        if (MCSymbolELF *ESym = static_cast<MCSymbolELF*>(S))
+          if (ESym->isWeakref())
+            OutStreamer->emitSymbolAttribute(S, MCSA_Weak);
         OutStreamer->emitLabel(S);
+      }
 
       for (auto &Handler : Handlers)
         Handler->endInstruction();
@@ -2285,7 +2288,7 @@ void AsmPrinter::emitFunctionBody() {
     // Emit the remaining prefetch targets for this block. This includes
     // nonexisting callsite indexes.
     while (PrefetchTargetIt != PrefetchTargetEnd) {
-      emitPrefetchTargetSymbol(MBB.getBBID()->BaseID, *PrefetchTargetIt);
+      emitPrefetchTargetSymbol(*MBB.getBBID(), *PrefetchTargetIt);
       ++PrefetchTargetIt;
     }
 
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 6f6d6efdc49c3..9bc4980424474 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -33,6 +33,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/MC/MCSymbolELF.h"
 
 using namespace llvm;
 #define DEBUG_TYPE "insert-code-prefetch"
@@ -99,6 +100,8 @@ static void setPrefetchTargets(MachineFunction &MF,
 static void
 insertPrefetchHints(MachineFunction &MF,
                     const SmallVector<PrefetchHint> &PrefetchHints) {
+  bool IsELF = MF.getTarget().getTargetTriple().isOSBinFormatELF();
+  const Module *M = MF.getFunction().getParent();
   DenseMap<UniqueBBID, SmallVector<PrefetchHint>> PrefetchHintsBySiteBBID;
   for (const auto &H : PrefetchHints)
     PrefetchHintsBySiteBBID[H.SiteID.BBID].push_back(H);
@@ -120,17 +123,30 @@ insertPrefetchHints(MachineFunction &MF,
     unsigned NumCallsInBB = 0;
     auto InstrIt = BB.begin();
     for (auto HintIt = BBHints.begin(); HintIt != BBHints.end();) {
+      bool TargetFunctionDefined = false;
+      if (Function *TargetFunction = M->getFunction(HintIt->TargetFunction))
+        TargetFunctionDefined = !TargetFunction->isDeclaration();
       auto NextInstrIt = InstrIt == BB.end() ? BB.end() : std::next(InstrIt);
       // Insert all the prefetch hints which must be placed after this call (or
       // at the beginning of the block if `NumCallsInBB` is zero.
       while (HintIt != BBHints.end() &&
              NumCallsInBB >= HintIt->SiteID.CallsiteIndex) {
-        auto *GV = MF.getFunction().getParent()->getOrInsertGlobal(
-            getPrefetchTargetSymbolName(HintIt->TargetFunction,
+        auto TargetSymbolName = getPrefetchTargetSymbolName(HintIt->TargetFunction,
                                         HintIt->TargetID.BBID,
-                                        HintIt->TargetID.CallsiteIndex),
+                                        HintIt->TargetID.CallsiteIndex);
+        auto *GV = MF.getFunction().getParent()->getOrInsertGlobal(TargetSymbolName,
             PtrTy);
-        TII->insertCodePrefetchInstr(BB, InstrIt, GV);
+        MachineInstr *PrefetchInstr = TII->insertCodePrefetchInstr(BB, InstrIt, GV);
+        if (!TargetFunctionDefined && IsELF) {
+          // If the target function is not defined in this module, we guard
+          // against undefined prefetch target symbol by emitting a fallback
+          // symbol with weak linkage right after the prefetch instruction. If
+          // there is no strong symbol, the fallback will be used and we
+          // prefetch the next address.
+          MCSymbolELF *WeakFallbackSym = static_cast<MCSymbolELF *>(MF.getContext().getOrCreateSymbol(TargetSymbolName));
+          WeakFallbackSym->setIsWeakref();
+          PrefetchInstr->setPostInstrSymbol(MF, WeakFallbackSym);
+        }
         ++HintIt;
       }
       if (InstrIt == BB.end())
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index edcc074f2db87..0ccd234b97f51 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -10785,7 +10785,7 @@ void X86InstrInfo::getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops,
   M.getFullAddress(Ops);
 }
 
-bool X86InstrInfo::insertCodePrefetchInstr(
+MachineInstr* X86InstrInfo::insertCodePrefetchInstr(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
     const GlobalValue *GV) const {
   MachineFunction &MF = *MBB.getParent();
@@ -10802,7 +10802,7 @@ bool X86InstrInfo::insertCodePrefetchInstr(
   MIB.addGlobalAddress(GV);
   MIB.addReg(X86::NoRegister);
   MBB.insert(InsertBefore, PrefetchInstr);
-  return true;
+  return PrefetchInstr;
 }
 
 #define GET_INSTRINFO_HELPERS
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index e0638d75ae295..b416e43250eb5 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -767,7 +767,7 @@ class X86InstrInfo final : public X86GenInstrInfo {
   /// fails to commute the operands, it will return \p Idx1.
   unsigned commuteOperandsForFold(MachineInstr &MI, unsigned Idx1) const;
 
-  bool insertCodePrefetchInstr(MachineBasicBlock &MBB,
+  MachineInstr* insertCodePrefetchInstr(MachineBasicBlock &MBB,
                                MachineBasicBlock::iterator InsertBefore,
                                const GlobalValue *GV) const override;
 };
diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
index b530846f02a4a..50b5fbf50dec7 100644
--- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
+++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
@@ -11,6 +11,7 @@
 ; RUN: echo 't 3,1' >> %t
 ; RUN: echo 'i 3,0 bar,0,0' >> %t
 ; RUN: echo 'i 2,1 foo,1,0' >> %t
+; RUN: echo 'i 1,1 undef,5,5' >> %t
 ; RUN: echo 'f bar' >> %t
 ; RUN: echo 't 0,0' >> %t
 ; RUN: echo 't 21,1' >> %t
@@ -33,8 +34,11 @@ cond.true:                                           ; preds = %1
 ; CHECK:        .globl __llvm_prefetch_target_foo_1_0
 ; CHECK-NEXT: __llvm_prefetch_target_foo_1_0:
 ; CHECK-NEXT:   callq bar at PLT
+; CHECK-NEXT:   prefetchit1     __llvm_prefetch_target_undef_5_5(%rip)
 ; CHECK-NEXT:   .globl __llvm_prefetch_target_foo_1_1
 ; CHECK-NEXT: __llvm_prefetch_target_foo_1_1:
+; CHECK-NEXT:   .weak __llvm_prefetch_target_undef_5_5
+; CHECK-NEXT: __llvm_prefetch_target_undef_5_5:
 
 cond.false:                                          ; preds = %1
   call i32 @baz()

>From 642f25c7057fbe4a736d7c9700110a7d93c7fba3 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 5 Mar 2026 18:46:21 +0000
Subject: [PATCH 11/13] clang-format.

---
 llvm/include/llvm/CodeGen/TargetInstrInfo.h |  7 ++++---
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp  |  2 +-
 llvm/lib/CodeGen/InsertCodePrefetch.cpp     | 16 +++++++++-------
 llvm/lib/Target/X86/X86InstrInfo.cpp        |  7 ++++---
 llvm/lib/Target/X86/X86InstrInfo.h          |  7 ++++---
 5 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index c2d879a177a00..0d2c931ac5e55 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -2372,9 +2372,10 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo {
 
   /// Inserts a code prefetch instruction before `InsertBefore` in block `MBB`
   /// targetting `GV`.
-  virtual MachineInstr* insertCodePrefetchInstr(MachineBasicBlock &MBB,
-                                       MachineBasicBlock::iterator InsertBefore,
-                                       const GlobalValue *GV) const {
+  virtual MachineInstr *
+  insertCodePrefetchInstr(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator InsertBefore,
+                          const GlobalValue *GV) const {
     llvm_unreachable("target did not implement");
   }
 
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 10c17ead558de..8173f5ff7d79a 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2276,7 +2276,7 @@ void AsmPrinter::emitFunctionBody() {
 
       // If there is a post-instruction symbol, emit a label for it here.
       if (MCSymbol *S = MI.getPostInstrSymbol()) {
-        if (MCSymbolELF *ESym = static_cast<MCSymbolELF*>(S))
+        if (MCSymbolELF *ESym = static_cast<MCSymbolELF *>(S))
           if (ESym->isWeakref())
             OutStreamer->emitSymbolAttribute(S, MCSA_Weak);
         OutStreamer->emitLabel(S);
diff --git a/llvm/lib/CodeGen/InsertCodePrefetch.cpp b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
index 9bc4980424474..496a672c734cb 100644
--- a/llvm/lib/CodeGen/InsertCodePrefetch.cpp
+++ b/llvm/lib/CodeGen/InsertCodePrefetch.cpp
@@ -131,19 +131,21 @@ insertPrefetchHints(MachineFunction &MF,
       // at the beginning of the block if `NumCallsInBB` is zero.
       while (HintIt != BBHints.end() &&
              NumCallsInBB >= HintIt->SiteID.CallsiteIndex) {
-        auto TargetSymbolName = getPrefetchTargetSymbolName(HintIt->TargetFunction,
-                                        HintIt->TargetID.BBID,
-                                        HintIt->TargetID.CallsiteIndex);
-        auto *GV = MF.getFunction().getParent()->getOrInsertGlobal(TargetSymbolName,
-            PtrTy);
-        MachineInstr *PrefetchInstr = TII->insertCodePrefetchInstr(BB, InstrIt, GV);
+        auto TargetSymbolName = getPrefetchTargetSymbolName(
+            HintIt->TargetFunction, HintIt->TargetID.BBID,
+            HintIt->TargetID.CallsiteIndex);
+        auto *GV = MF.getFunction().getParent()->getOrInsertGlobal(
+            TargetSymbolName, PtrTy);
+        MachineInstr *PrefetchInstr =
+            TII->insertCodePrefetchInstr(BB, InstrIt, GV);
         if (!TargetFunctionDefined && IsELF) {
           // If the target function is not defined in this module, we guard
           // against undefined prefetch target symbol by emitting a fallback
           // symbol with weak linkage right after the prefetch instruction. If
           // there is no strong symbol, the fallback will be used and we
           // prefetch the next address.
-          MCSymbolELF *WeakFallbackSym = static_cast<MCSymbolELF *>(MF.getContext().getOrCreateSymbol(TargetSymbolName));
+          MCSymbolELF *WeakFallbackSym = static_cast<MCSymbolELF *>(
+              MF.getContext().getOrCreateSymbol(TargetSymbolName));
           WeakFallbackSym->setIsWeakref();
           PrefetchInstr->setPostInstrSymbol(MF, WeakFallbackSym);
         }
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 0ccd234b97f51..f41d65db086cc 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -10785,9 +10785,10 @@ void X86InstrInfo::getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops,
   M.getFullAddress(Ops);
 }
 
-MachineInstr* X86InstrInfo::insertCodePrefetchInstr(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
-    const GlobalValue *GV) const {
+MachineInstr *
+X86InstrInfo::insertCodePrefetchInstr(MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator InsertBefore,
+                                      const GlobalValue *GV) const {
   MachineFunction &MF = *MBB.getParent();
   MachineInstr *PrefetchInstr = MF.CreateMachineInstr(
       get(X86::PREFETCHIT1),
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index b416e43250eb5..9b77ff0c62237 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -767,9 +767,10 @@ class X86InstrInfo final : public X86GenInstrInfo {
   /// fails to commute the operands, it will return \p Idx1.
   unsigned commuteOperandsForFold(MachineInstr &MI, unsigned Idx1) const;
 
-  MachineInstr* insertCodePrefetchInstr(MachineBasicBlock &MBB,
-                               MachineBasicBlock::iterator InsertBefore,
-                               const GlobalValue *GV) const override;
+  MachineInstr *
+  insertCodePrefetchInstr(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator InsertBefore,
+                          const GlobalValue *GV) const override;
 };
 } // namespace llvm
 

>From 7f8c3acd4ce33dd064ba64c7794d216e60bc0ee0 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Thu, 5 Mar 2026 18:58:35 +0000
Subject: [PATCH 12/13] fix the prefetch test.

---
 llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
index 50b5fbf50dec7..991e3d37fc366 100644
--- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
+++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
@@ -34,9 +34,9 @@ cond.true:                                           ; preds = %1
 ; CHECK:        .globl __llvm_prefetch_target_foo_1_0
 ; CHECK-NEXT: __llvm_prefetch_target_foo_1_0:
 ; CHECK-NEXT:   callq bar at PLT
-; CHECK-NEXT:   prefetchit1     __llvm_prefetch_target_undef_5_5(%rip)
 ; CHECK-NEXT:   .globl __llvm_prefetch_target_foo_1_1
 ; CHECK-NEXT: __llvm_prefetch_target_foo_1_1:
+; CHECK-NEXT:   prefetchit1     __llvm_prefetch_target_undef_5_5(%rip)
 ; CHECK-NEXT:   .weak __llvm_prefetch_target_undef_5_5
 ; CHECK-NEXT: __llvm_prefetch_target_undef_5_5:
 

>From effda96d041e43dfeea8d8eeba5cb09408381959 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Sat, 7 Mar 2026 01:15:30 +0000
Subject: [PATCH 13/13] Change undef to extfunc.

---
 .../CodeGen/X86/basic-block-sections-code-prefetch.ll     | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
index 991e3d37fc366..acf6c588fafe8 100644
--- a/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
+++ b/llvm/test/CodeGen/X86/basic-block-sections-code-prefetch.ll
@@ -11,7 +11,7 @@
 ; RUN: echo 't 3,1' >> %t
 ; RUN: echo 'i 3,0 bar,0,0' >> %t
 ; RUN: echo 'i 2,1 foo,1,0' >> %t
-; RUN: echo 'i 1,1 undef,5,5' >> %t
+; RUN: echo 'i 1,1 extfunc,5,5' >> %t
 ; RUN: echo 'f bar' >> %t
 ; RUN: echo 't 0,0' >> %t
 ; RUN: echo 't 21,1' >> %t
@@ -36,9 +36,9 @@ cond.true:                                           ; preds = %1
 ; CHECK-NEXT:   callq bar at PLT
 ; CHECK-NEXT:   .globl __llvm_prefetch_target_foo_1_1
 ; CHECK-NEXT: __llvm_prefetch_target_foo_1_1:
-; CHECK-NEXT:   prefetchit1     __llvm_prefetch_target_undef_5_5(%rip)
-; CHECK-NEXT:   .weak __llvm_prefetch_target_undef_5_5
-; CHECK-NEXT: __llvm_prefetch_target_undef_5_5:
+; CHECK-NEXT:   prefetchit1     __llvm_prefetch_target_extfunc_5_5(%rip)
+; CHECK-NEXT:   .weak __llvm_prefetch_target_extfunc_5_5
+; CHECK-NEXT: __llvm_prefetch_target_extfunc_5_5:
 
 cond.false:                                          ; preds = %1
   call i32 @baz()



More information about the llvm-commits mailing list