[llvm] [SHT_LLVM_BB_ADDR_MAP] Emit callsite offsets in the `SHT_LLVM_BB_ADDR_MAP` section. (PR #146563)

Rahman Lavaee via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 9 10:53:28 PDT 2025


https://github.com/rlavaee updated https://github.com/llvm/llvm-project/pull/146563

>From 827535e97c5fee0a9000154e4db1c39360eb750b Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Tue, 1 Jul 2025 15:24:04 +0000
Subject: [PATCH 1/8] [SHT_LLVM_BB_ADDR_MAP] Emit callsite offsets in the 
 section.

---
 llvm/include/llvm/CodeGen/AsmPrinter.h        |  4 +-
 llvm/include/llvm/CodeGen/MachineBasicBlock.h | 13 +++++
 llvm/include/llvm/MC/MCContext.h              |  4 +-
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    | 37 +++++++++----
 llvm/lib/CodeGen/MachineBasicBlock.cpp        |  9 ++++
 .../basic-block-address-map-empty-function.ll |  2 +-
 ...sic-block-address-map-function-sections.ll |  8 +--
 .../basic-block-address-map-pgo-features.ll   | 26 ++++++----
 ...k-address-map-with-basic-block-sections.ll | 52 +++++++++++--------
 .../X86/basic-block-address-map-with-mfs.ll   | 25 ++++++---
 .../CodeGen/X86/basic-block-address-map.ll    | 18 +++++--
 11 files changed, 139 insertions(+), 59 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h
index 6ad54fcd6d0e5..9defc88a65f23 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -173,6 +173,8 @@ class LLVM_ABI AsmPrinter : public MachineFunctionPass {
   // function. This is used to calculate the size of the BB section.
   MCSymbol *CurrentSectionBeginSym = nullptr;
 
+  bool HasAnyCallsitesForBBAddrMap = false;
+
   /// This map keeps track of which symbol is being used for the specified basic
   /// block's address of label.
   std::unique_ptr<AddrLabelMap> AddrLabelSymbols;
@@ -426,7 +428,7 @@ class LLVM_ABI AsmPrinter : public MachineFunctionPass {
 
   void emitStackUsage(const MachineFunction &MF);
 
-  void emitBBAddrMapSection(const MachineFunction &MF);
+  void emitBBAddrMapSection(const MachineFunction &MF, bool HasCalls);
 
   void emitKCFITrapEntry(const MachineFunction &MF, const MCSymbol *Symbol);
   virtual void emitKCFITypeId(const MachineFunction &MF);
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 3d2da01f2c856..9f4c078cd45a1 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -242,6 +242,10 @@ class MachineBasicBlock
   /// calculate the size of the basic block, or the BB section ending with it.
   mutable MCSymbol *CachedEndMCSymbol = nullptr;
 
+  /// Vector of symbols marking the position of callsites in the basic
+  /// block, stored in the order they appear in the basic block.
+  mutable SmallVector<MCSymbol *, 1> CallsiteSymbols;
+
   // Intrusive list support
   MachineBasicBlock() = default;
 
@@ -325,6 +329,12 @@ class MachineBasicBlock
   /// its label be emitted.
   void setLabelMustBeEmitted() { LabelMustBeEmitted = true; }
 
+  /// Returns the symbols marking callsites in the order they appear in the
+  /// basic block.
+  const SmallVectorImpl<MCSymbol *> &getCallsiteSymbols() const {
+    return CallsiteSymbols;
+  }
+
   /// Return the MachineFunction containing this basic block.
   const MachineFunction *getParent() const { return xParent; }
   MachineFunction *getParent() { return xParent; }
@@ -725,6 +735,9 @@ class MachineBasicBlock
   /// Returns the MCSymbol marking the end of this basic block.
   LLVM_ABI MCSymbol *getEndSymbol() const;
 
+  /// Returns a temporary MCSymbol marking the beginning of a callsite.
+  LLVM_ABI MCSymbol *createCallsiteSymbol() const;
+
   /// Returns true if this block may have an INLINEASM_BR (overestimate, by
   /// checking if any of the successors are indirect targets of any inlineasm_br
   /// in the function).
diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h
index d7b81af4a785a..636966dc09157 100644
--- a/llvm/include/llvm/MC/MCContext.h
+++ b/llvm/include/llvm/MC/MCContext.h
@@ -175,8 +175,8 @@ class MCContext {
   /// for the LocalLabelVal and adds it to the map if needed.
   unsigned GetInstance(unsigned LocalLabelVal);
 
-  /// LLVM_BB_ADDR_MAP version to emit.
-  uint8_t BBAddrMapVersion = 2;
+  /// SHT_LLVM_BB_ADDR_MAP version to emit.
+  uint8_t BBAddrMapVersion = 3;
 
   /// The file name of the log file from the environment variable
   /// AS_SECURE_LOG_FILE.  Which must be set before the .secure_log_unique
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 07d9380a02c43..39f7be6b13250 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1391,7 +1391,8 @@ static uint32_t getBBAddrMapMetadata(const MachineBasicBlock &MBB) {
 }
 
 static llvm::object::BBAddrMap::Features
-getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges) {
+getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges,
+                    bool HasCalls) {
   // Ensure that the user has not passed in additional options while also
   // specifying all or none.
   if ((PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::None) ||
@@ -1424,10 +1425,11 @@ getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges) {
           BrProbEnabled,
           MF.hasBBSections() && NumMBBSectionRanges > 1,
           static_cast<bool>(BBAddrMapSkipEmitBBEntries),
-          false};
+          HasCalls};
 }
 
-void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
+void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF,
+                                      bool HasCalls) {
   MCSection *BBAddrMapSection =
       getObjFileLowering().getBBAddrMapSection(*MF.getSection());
   assert(BBAddrMapSection && ".llvm_bb_addr_map section is not initialized.");
@@ -1440,7 +1442,7 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
   uint8_t BBAddrMapVersion = OutStreamer->getContext().getBBAddrMapVersion();
   OutStreamer->emitInt8(BBAddrMapVersion);
   OutStreamer->AddComment("feature");
-  auto Features = getBBAddrMapFeature(MF, MBBSectionRanges.size());
+  auto Features = getBBAddrMapFeature(MF, MBBSectionRanges.size(), HasCalls);
   OutStreamer->emitInt8(Features.encode());
   // Emit BB Information for each basic block in the function.
   if (Features.MultiBBRange) {
@@ -1493,13 +1495,24 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
       // Emit the basic block offset relative to the end of the previous block.
       // This is zero unless the block is padded due to alignment.
       emitLabelDifferenceAsULEB128(MBBSymbol, PrevMBBEndSymbol);
-      // Emit the basic block size. When BBs have alignments, their size cannot
-      // always be computed from their offsets.
-      emitLabelDifferenceAsULEB128(MBB.getEndSymbol(), MBBSymbol);
+      const MCSymbol *CurrentLabel = MBBSymbol;
+      if (HasCalls) {
+        const SmallVectorImpl<MCSymbol *> &CallsiteSymbols =
+            MBB.getCallsiteSymbols();
+        OutStreamer->AddComment("number of callsites");
+        OutStreamer->emitULEB128IntValue(CallsiteSymbols.size());
+        for (const MCSymbol *CallsiteSymbol : CallsiteSymbols) {
+          // Emit the callsite offset.
+          emitLabelDifferenceAsULEB128(CallsiteSymbol, CurrentLabel);
+          CurrentLabel = CallsiteSymbol;
+        }
+      }
+      // Emit the offset to the end of the block, which can be used to compute
+      // the total block size.
+      emitLabelDifferenceAsULEB128(MBB.getEndSymbol(), CurrentLabel);
       // Emit the Metadata.
       OutStreamer->emitULEB128IntValue(getBBAddrMapMetadata(MBB));
     }
-
     PrevMBBEndSymbol = MBB.getEndSymbol();
   }
 
@@ -1802,6 +1815,7 @@ void AsmPrinter::emitFunctionBody() {
 
   // Print out code for the function.
   bool HasAnyRealCode = false;
+  bool HasCalls = false;
   int NumInstsInFunction = 0;
   bool IsEHa = MMI->getModule()->getModuleFlag("eh-asynch");
 
@@ -1828,6 +1842,11 @@ void AsmPrinter::emitFunctionBody() {
           !MI.isDebugInstr()) {
         HasAnyRealCode = true;
       }
+      if (MI.isCall()) {
+        HasCalls = true;
+        if (MF->getTarget().Options.BBAddrMap)
+          OutStreamer->emitLabel(MBB.createCallsiteSymbol());
+      }
 
       // If there is a pre-instruction symbol, emit a label for it here.
       if (MCSymbol *S = MI.getPreInstrSymbol())
@@ -2114,7 +2133,7 @@ void AsmPrinter::emitFunctionBody() {
   // BB labels are requested for this function. Skip empty functions.
   if (HasAnyRealCode) {
     if (MF->getTarget().Options.BBAddrMap)
-      emitBBAddrMapSection(*MF);
+      emitBBAddrMapSection(*MF, HasCalls);
     else if (PgoAnalysisMapFeatures.getBits() != 0)
       MF->getContext().reportWarning(
           SMLoc(), "pgo-analysis-map is enabled for function " + MF->getName() +
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index c3c5a0f5102d7..97f992c75d2cb 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -112,6 +112,15 @@ MCSymbol *MachineBasicBlock::getEndSymbol() const {
   return CachedEndMCSymbol;
 }
 
+MCSymbol *MachineBasicBlock::createCallsiteSymbol() const {
+  const MachineFunction *MF = getParent();
+  MCContext &Ctx = MF->getContext();
+  CallsiteSymbols.push_back(
+      Ctx.createTempSymbol("BB" + Twine(MF->getFunctionNumber()) + "_" +
+                           Twine(getNumber()) + "_CS"));
+  return CallsiteSymbols.back();
+}
+
 raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) {
   MBB.print(OS);
   return OS;
diff --git a/llvm/test/CodeGen/X86/basic-block-address-map-empty-function.ll b/llvm/test/CodeGen/X86/basic-block-address-map-empty-function.ll
index 444655fc5299f..4e76262156b42 100644
--- a/llvm/test/CodeGen/X86/basic-block-address-map-empty-function.ll
+++ b/llvm/test/CodeGen/X86/basic-block-address-map-empty-function.ll
@@ -19,7 +19,7 @@ entry:
 ; CHECK:	func:
 ; CHECK:	.Lfunc_begin1:
 ; CHECK:		.section	.llvm_bb_addr_map,"o", at llvm_bb_addr_map,.text{{$}}
-; CHECK-NEXT:		.byte 2			# version
+; CHECK-NEXT:		.byte 3			# version
 ; BASIC-NEXT:		.byte 0			# feature
 ; PGO-NEXT:		.byte 3			# feature
 ; CHECK-NEXT:		.quad	.Lfunc_begin1	# function address
diff --git a/llvm/test/CodeGen/X86/basic-block-address-map-function-sections.ll b/llvm/test/CodeGen/X86/basic-block-address-map-function-sections.ll
index 9ff96381c2053..f610b043021a7 100644
--- a/llvm/test/CodeGen/X86/basic-block-address-map-function-sections.ll
+++ b/llvm/test/CodeGen/X86/basic-block-address-map-function-sections.ll
@@ -10,7 +10,7 @@ define dso_local i32 @_Z3barv() {
 ; CHECK-LABEL:	_Z3barv:
 ; CHECK-NEXT:	[[BAR_BEGIN:.Lfunc_begin[0-9]+]]:
 ; CHECK:		.section .llvm_bb_addr_map,"o", at llvm_bb_addr_map,.text._Z3barv{{$}}
-; CHECK-NEXT:		.byte 2			# version
+; CHECK-NEXT:		.byte 3			# version
 ; CHECK-NEXT:		.byte 0			# feature
 ; CHECK-NEXT:		.quad [[BAR_BEGIN]]	# function address
 
@@ -23,8 +23,8 @@ define dso_local i32 @_Z3foov() {
 ; CHECK-LABEL:	_Z3foov:
 ; CHECK-NEXT:	[[FOO_BEGIN:.Lfunc_begin[0-9]+]]:
 ; CHECK:		.section  .llvm_bb_addr_map,"o", at llvm_bb_addr_map,.text._Z3foov{{$}}
-; CHECK-NEXT:		.byte 2			# version
-; CHECK-NEXT:		.byte 0			# feature
+; CHECK-NEXT:		.byte 3			# version
+; CHECK-NEXT:		.byte 32                # feature
 ; CHECK-NEXT:		.quad [[FOO_BEGIN]]	# function address
 
 
@@ -36,6 +36,6 @@ define linkonce_odr dso_local i32 @_Z4fooTIiET_v() comdat {
 ; CHECK-LABEL:	_Z4fooTIiET_v:
 ; CHECK-NEXT:	[[FOOCOMDAT_BEGIN:.Lfunc_begin[0-9]+]]:
 ; CHECK:		.section .llvm_bb_addr_map,"oG", at llvm_bb_addr_map,.text._Z4fooTIiET_v,_Z4fooTIiET_v,comdat{{$}}
-; CHECK-NEXT:		.byte 2				# version
+; CHECK-NEXT:		.byte 3				# version
 ; CHECK-NEXT:		.byte 0				# feature
 ; CHECK-NEXT:		.quad [[FOOCOMDAT_BEGIN]]	# function address
diff --git a/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll b/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll
index 63779727ec72c..ba76f3eab6134 100644
--- a/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll
+++ b/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll
@@ -69,36 +69,44 @@ declare i32 @__gxx_personality_v0(...)
 ; CHECK-LABEL:	.Lfunc_end0:
 
 ; CHECK: 	.section	.llvm_bb_addr_map,"o", at llvm_bb_addr_map,.text._Z3bazb{{$}}
-; CHECK-NEXT:	.byte	2		# version
-; BASIC-NEXT:	.byte	0		# feature
-; PGO-ALL-NEXT:	.byte	7		# feature
-; FEC-ONLY-NEXT:.byte	1		# feature
-; BBF-ONLY-NEXT:.byte	2		# feature
-; BRP-ONLY-NEXT:.byte	4		# feature
+; CHECK-NEXT:	.byte	3		# version
+; BASIC-NEXT:	.byte	32		# feature
+; PGO-ALL-NEXT:	.byte	39		# feature
+; FEC-ONLY-NEXT:.byte	33		# feature
+; BBF-ONLY-NEXT:.byte	34		# feature
+; BRP-ONLY-NEXT:.byte	36		# feature
 ; CHECK-NEXT:	.quad	.Lfunc_begin0	# function address
 ; CHECK-NEXT:	.byte	6		# number of basic blocks
 ; CHECK-NEXT:	.byte	0		# BB id
 ; CHECK-NEXT:	.uleb128 .Lfunc_begin0-.Lfunc_begin0
+; CHECK-NEXT:   .byte   0               # number of callsites
 ; CHECK-NEXT:	.uleb128 .LBB_END0_0-.Lfunc_begin0
 ; CHECK-NEXT:	.byte	8
 ; CHECK-NEXT:	.byte	1		# BB id
 ; CHECK-NEXT:	.uleb128 .LBB0_1-.LBB_END0_0
-; CHECK-NEXT:	.uleb128 .LBB_END0_1-.LBB0_1
+; CHECK-NEXT:   .byte   1               # number of callsites
+; CHECK-NEXT:	.uleb128 .LBB0_1_CS0-.LBB0_1
+; CHECK-NEXT:	.uleb128 .LBB_END0_1-.LBB0_1_CS0
 ; CHECK-NEXT:	.byte	8
 ; CHECK-NEXT:	.byte	3		# BB id
 ; CHECK-NEXT:	.uleb128 .LBB0_2-.LBB_END0_1
-; CHECK-NEXT:	.uleb128 .LBB_END0_2-.LBB0_2
+; CHECK-NEXT:   .byte   1               # number of callsites
+; CHECK-NEXT:	.uleb128 .LBB0_2_CS0-.LBB0_2
+; CHECK-NEXT:	.uleb128 .LBB_END0_2-.LBB0_2_CS0
 ; CHECK-NEXT:	.byte	8
 ; CHECK-NEXT:	.byte	5		# BB id
 ; CHECK-NEXT:	.uleb128 .LBB0_3-.LBB_END0_2
+; CHECK-NEXT:   .byte   0               # number of callsites
 ; CHECK-NEXT:	.uleb128 .LBB_END0_3-.LBB0_3
 ; CHECK-NEXT:	.byte	1
 ; CHECK-NEXT:	.byte	4		# BB id
 ; CHECK-NEXT:	.uleb128 .LBB0_4-.LBB_END0_3
+; CHECK-NEXT:   .byte   0               # number of callsites
 ; CHECK-NEXT:	.uleb128 .LBB_END0_4-.LBB0_4
 ; CHECK-NEXT:	.byte	16
 ; CHECK-NEXT:	.byte	2		# BB id
 ; CHECK-NEXT:	.uleb128 .LBB0_5-.LBB_END0_4
+; CHECK-NEXT:   .byte   0               # number of callsites
 ; CHECK-NEXT:	.uleb128 .LBB_END0_5-.LBB0_5
 ; CHECK-NEXT:	.byte	4
 
@@ -138,7 +146,7 @@ declare i32 @__gxx_personality_v0(...)
 ; PGO-BRP-NEXT:	.byte	5		# successor BB ID
 ; PGO-BRP-NEXT:	.ascii	"\200\200\200\200\b"	# successor branch probability
 
-; SKIP-BB-ENTRIES:      .byte	17                              # feature
+; SKIP-BB-ENTRIES:      .byte	49                              # feature
 ; SKIP-BB-ENTRIES-NEXT:	.quad	.Lfunc_begin0                   # function address
 ; SKIP-BB-ENTRIES-NEXT:	.byte	6                               # number of basic blocks
 ; SKIP-BB-ENTRIES-NEXT:	.byte	100                             # function entry count
diff --git a/llvm/test/CodeGen/X86/basic-block-address-map-with-basic-block-sections.ll b/llvm/test/CodeGen/X86/basic-block-address-map-with-basic-block-sections.ll
index b897cf4853cac..6157f1a727ab5 100644
--- a/llvm/test/CodeGen/X86/basic-block-address-map-with-basic-block-sections.ll
+++ b/llvm/test/CodeGen/X86/basic-block-address-map-with-basic-block-sections.ll
@@ -39,33 +39,41 @@ declare i32 @__gxx_personality_v0(...)
 ; CHECK-LABEL:	.LBB_END0_1:
 ; CHECK:		.section .text.split._Z3bazb,"ax", at progbits
 ; CHECK-LABEL:	_Z3bazb.cold:
+; CHECK-LABEL:  .LBB0_2_CS0:
 ; CHECK-LABEL:	.LBB_END0_2:
 ; CHECK-LABEL:	.LBB0_3:
+; CHECK-LABEL:  .LBB0_3_CS0:
 ; CHECK-LABEL:	.LBB_END0_3:
 ; CHECK-LABEL:	.Lfunc_end0:
 
 ; CHECK:		.section	.llvm_bb_addr_map,"o", at llvm_bb_addr_map,.text.hot._Z3bazb
-; CHECK-NEXT:   .byte   2               # version
-; CHECK-NEXT:   .byte   8               # feature
-; CHECK-NEXT:   .byte   2               # number of basic block ranges
-; CHECK-NEXT:	.quad	.Lfunc_begin0   # base address
-; CHECK-NEXT:	.byte	2               # number of basic blocks
-; CHECK-NEXT:	.byte	0               # BB id
-; CHECK-NEXT:	.uleb128 .Lfunc_begin0-.Lfunc_begin0
-; CHECK-NEXT:	.uleb128 .LBB_END0_0-.Lfunc_begin0
-; CHECK-NEXT:	.byte	0
-; CHECK-NEXT:	.byte	2               # BB id
-; CHECK-NEXT:	.uleb128 .LBB0_1-.LBB_END0_0
-; CHECK-NEXT:	.uleb128 .LBB_END0_1-.LBB0_1
-; CHECK-NEXT:	.byte	5
-; CHECK-NEXT:	.quad	_Z3bazb.cold    # base address
-; CHECK-NEXT:	.byte	2               # number of basic blocks
-; CHECK-NEXT:	.byte	1               # BB id
-; CHECK-NEXT:	.uleb128 _Z3bazb.cold-_Z3bazb.cold
-; CHECK-NEXT:	.uleb128 .LBB_END0_2-_Z3bazb.cold
-; CHECK-NEXT:	.byte	8
-; CHECK-NEXT:	.byte	3               # BB id
-; CHECK-NEXT:	.uleb128 .LBB0_3-.LBB_END0_2
-; CHECK-NEXT:	.uleb128 .LBB_END0_3-.LBB0_3
+; CHECK-NEXT:   .byte   3                       # version
+; CHECK-NEXT:   .byte   40                      # feature
+; CHECK-NEXT:   .byte   2                       # number of basic block ranges
+; CHECK-NEXT:	.quad	.Lfunc_begin0           # base address
+; CHECK-NEXT:	.byte	2                       # number of basic blocks
+; CHECK-NEXT:   .byte	0                       # BB id
+; CHECK-NEXT:   .uleb128 .Lfunc_begin0-.Lfunc_begin0
+; CHECK-NEXT:   .byte   0                       # number of callsites
+; CHECK-NEXT:   .uleb128 .LBB_END0_0-.Lfunc_begin0
+; CHECK-NEXT:   .byte   0
+; CHECK-NEXT:   .byte   2                       # BB id
+; CHECK-NEXT:   .uleb128 .LBB0_1-.LBB_END0_0
+; CHECK-NEXT:   .byte   0                       # number of callsites
+; CHECK-NEXT:   .uleb128 .LBB_END0_1-.LBB0_1
+; CHECK-NEXT:   .byte   5
+; CHECK-NEXT:   .quad	_Z3bazb.cold            # base address
+; CHECK-NEXT:   .byte	2                       # number of basic blocks
+; CHECK-NEXT:   .byte	1                       # BB id
+; CHECK-NEXT:   .uleb128 _Z3bazb.cold-_Z3bazb.cold
+; CHECK-NEXT:   .byte	1                       # number of callsites
+; CHECK-NEXT:   .uleb128 .LBB0_2_CS0-_Z3bazb.cold
+; CHECK-NEXT:   .uleb128 .LBB_END0_2-.LBB0_2_CS0
+; CHECK-NEXT:   .byte	8
+; CHECK-NEXT:   .byte	3                       # BB id
+; CHECK-NEXT:   .uleb128 .LBB0_3-.LBB_END0_2
+; CHECK-NEXT:   .byte	1                       # number of callsites
+; CHECK-NEXT:   .uleb128 .LBB0_3_CS0-.LBB0_3
+; CHECK-NEXT:   .uleb128 .LBB_END0_3-.LBB0_3_CS0
 ; CHECK-NEXT:	.byte	1
 
diff --git a/llvm/test/CodeGen/X86/basic-block-address-map-with-mfs.ll b/llvm/test/CodeGen/X86/basic-block-address-map-with-mfs.ll
index 2565db23c0249..1e8cee4dc8b65 100644
--- a/llvm/test/CodeGen/X86/basic-block-address-map-with-mfs.ll
+++ b/llvm/test/CodeGen/X86/basic-block-address-map-with-mfs.ll
@@ -47,32 +47,43 @@ declare i32 @qux()
 ; CHECK-LABEL:  .Lfunc_begin0:
 ; CHECK-LABEL:  .LBB_END0_0:
 ; CHECK-LABEL:  .LBB0_1:
+; CHECK-LABEL:  .LBB0_1_CS0:
+; CHECK-LABEL:  .LBB0_1_CS1:
 ; CHECK-LABEL:  .LBB_END0_1:
 ; CHECK:          .section .text.split.foo,"ax", at progbits
 ; CHECK-LABEL:  foo.cold:
+; CHECK-LABEL:  .LBB0_2_CS0:
+; CHECK-LABEL:  .LBB0_2_CS1:
 ; CHECK-LABEL:  .LBB_END0_2:
 ; CHECK-LABEL:  .Lfunc_end0:
 
 ; CHECK:                .section        .llvm_bb_addr_map,"o", at llvm_bb_addr_map,.text.hot.foo
-; CHECK-NEXT:   .byte   2               # version
-; BASIC-NEXT:   .byte   8               # feature
-; PGO-NEXT:     .byte   15              # feature
+; CHECK-NEXT:   .byte   3               # version
+; BASIC-NEXT:   .byte   40              # feature
+; PGO-NEXT:     .byte   47              # feature
 ; CHECK-NEXT:   .byte   2               # number of basic block ranges
 ; CHECK-NEXT:   .quad   .Lfunc_begin0   # base address
 ; CHECK-NEXT:   .byte   2               # number of basic blocks
 ; CHECK-NEXT:   .byte   0               # BB id
 ; CHECK-NEXT:   .uleb128 .Lfunc_begin0-.Lfunc_begin0
+; CHECK-NEXT:   .byte   0               # number of callsites
 ; CHECK-NEXT:   .uleb128 .LBB_END0_0-.Lfunc_begin0
 ; CHECK-NEXT:   .byte   8
 ; CHECK-NEXT:   .byte   1               # BB id
 ; CHECK-NEXT:   .uleb128 .LBB0_1-.LBB_END0_0
-; CHECK-NEXT:   .uleb128 .LBB_END0_1-.LBB0_1
+; CHECK-NEXT:   .byte   2               # number of callsites
+; CHECK-NEXT:   .uleb128 .LBB0_1_CS0-.LBB0_1
+; CHECK-NEXT:   .uleb128 .LBB0_1_CS1-.LBB0_1_CS0
+; CHECK-NEXT:   .uleb128 .LBB_END0_1-.LBB0_1_CS1
 ; CHECK-NEXT:   .byte   3
-; CHECK-NEXT:   .quad   foo.cold    # base address
+; CHECK-NEXT:   .quad   foo.cold        # base address
 ; CHECK-NEXT:   .byte   1               # number of basic blocks
 ; CHECK-NEXT:   .byte   2               # BB id
 ; CHECK-NEXT:   .uleb128 foo.cold-foo.cold
-; CHECK-NEXT:   .uleb128 .LBB_END0_2-foo.cold
+; CHECK-NEXT:   .byte   2               # number of callsites
+; CHECK-NEXT:   .uleb128 .LBB0_2_CS0-foo.cold
+; CHECK-NEXT:   .uleb128 .LBB0_2_CS1-.LBB0_2_CS0
+; CHECK-NEXT:   .uleb128 .LBB_END0_2-.LBB0_2_CS1
 ; CHECK-NEXT:   .byte   3
 
 ;; PGO Analysis Map
@@ -84,6 +95,6 @@ declare i32 @qux()
 ; PGO-NEXT:    .byte   2                                  # successor BB ID
 ; PGO-NEXT:    .byte   0                                  # successor branch probability
 ; PGO-NEXT:    .ascii  "\200\200\200\374\377\377\377\037" # basic block frequency
-; PGO-NEXT:    .byte   0		                       # basic block successor count
+; PGO-NEXT:    .byte   0		                  # basic block successor count
 ; PGO-NEXT:    .ascii  "\200\200\200\004"                 # basic block frequency
 ; PGO-NEXT:    .byte   0                                  # basic block successor count
diff --git a/llvm/test/CodeGen/X86/basic-block-address-map.ll b/llvm/test/CodeGen/X86/basic-block-address-map.ll
index 4f12258eeeea0..5c8f3a645c34a 100644
--- a/llvm/test/CodeGen/X86/basic-block-address-map.ll
+++ b/llvm/test/CodeGen/X86/basic-block-address-map.ll
@@ -40,8 +40,10 @@ declare i32 @__gxx_personality_v0(...)
 ; CHECK-LABEL:	.Lfunc_begin0:
 ; CHECK-LABEL:	.LBB_END0_0:
 ; CHECK-LABEL:	.LBB0_1:
+; CHECK-LABEL:  .LBB0_1_CS0:
 ; CHECK-LABEL:	.LBB_END0_1:
 ; CHECK-LABEL:	.LBB0_2:
+; CHECK-LABEL:  .LBB0_2_CS0:
 ; CHECK-LABEL:	.LBB_END0_2:
 ; CHECK-LABEL:	.LBB0_3:
 ; CHECK-LABEL:	.LBB_END0_3:
@@ -50,31 +52,39 @@ declare i32 @__gxx_personality_v0(...)
 ; UNIQ:			.section	.llvm_bb_addr_map,"o", at llvm_bb_addr_map,.text._Z3bazb{{$}}
 ;; Verify that with -unique-section-names=false, the unique id of the text section gets assigned to the llvm_bb_addr_map section.
 ; NOUNIQ:		.section	.llvm_bb_addr_map,"o", at llvm_bb_addr_map,.text,unique,1
-; CHECK-NEXT:   .byte   2		# version
-; CHECK-NEXT:   .byte   0		# feature
+; CHECK-NEXT:   .byte   3		# version
+; CHECK-NEXT:   .byte   32		# feature
 ; CHECK-NEXT:	.quad	.Lfunc_begin0	# function address
 ; CHECK-NEXT:	.byte	6		# number of basic blocks
 ; CHECK-NEXT:   .byte	0		# BB id
 ; CHECK-NEXT:	.uleb128 .Lfunc_begin0-.Lfunc_begin0
+; CHECK-NEXT:   .byte	0		# number of callsites
 ; CHECK-NEXT:	.uleb128 .LBB_END0_0-.Lfunc_begin0
 ; CHECK-NEXT:	.byte	8
 ; CHECK-NEXT:   .byte	1		# BB id
 ; CHECK-NEXT:	.uleb128 .LBB0_1-.LBB_END0_0
-; CHECK-NEXT:	.uleb128 .LBB_END0_1-.LBB0_1
+; CHECK-NEXT:   .byte	1		# number of callsites
+; CHECK-NEXT:	.uleb128 .LBB0_1_CS0-.LBB0_1
+; CHECK-NEXT:	.uleb128 .LBB_END0_1-.LBB0_1_CS0
 ; CHECK-NEXT:	.byte	8
 ; CHECK-NEXT:   .byte	3		# BB id
 ; CHECK-NEXT:	.uleb128 .LBB0_2-.LBB_END0_1
-; CHECK-NEXT:	.uleb128 .LBB_END0_2-.LBB0_2
+; CHECK-NEXT:   .byte	1		# number of callsites
+; CHECK-NEXT:	.uleb128 .LBB0_2_CS0-.LBB0_2
+; CHECK-NEXT:	.uleb128 .LBB_END0_2-.LBB0_2_CS0
 ; CHECK-NEXT:	.byte	8
 ; CHECK-NEXT:   .byte	4		# BB id
 ; CHECK-NEXT:	.uleb128 .LBB0_3-.LBB_END0_2
+; CHECK-NEXT:   .byte	0		# number of callsites
 ; CHECK-NEXT:	.uleb128 .LBB_END0_3-.LBB0_3
 ; CHECK-NEXT:	.byte	16
 ; CHECK-NEXT:   .byte	5		# BB id
 ; CHECK-NEXT:	.uleb128 .LBB0_4-.LBB_END0_3
+; CHECK-NEXT:   .byte	0		# number of callsites
 ; CHECK-NEXT:	.uleb128 .LBB_END0_4-.LBB0_4
 ; CHECK-NEXT:	.byte	1
 ; CHECK-NEXT:   .byte	2		# BB id
 ; CHECK-NEXT:	.uleb128 .LBB0_5-.LBB_END0_4
+; CHECK-NEXT:   .byte	0		# number of callsites
 ; CHECK-NEXT:	.uleb128 .LBB_END0_5-.LBB0_5
 ; CHECK-NEXT:	.byte	5

>From 91d11426b5291077ca681e1b34b38aefc3d03182 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 2 Jul 2025 18:22:46 +0000
Subject: [PATCH 2/8] Updated Extensions.rst.

---
 llvm/docs/Extensions.rst | 59 ++++++++++++++++++++++++++++++++++------
 1 file changed, 50 insertions(+), 9 deletions(-)

diff --git a/llvm/docs/Extensions.rst b/llvm/docs/Extensions.rst
index f92eac3209c21..fd822b3477e88 100644
--- a/llvm/docs/Extensions.rst
+++ b/llvm/docs/Extensions.rst
@@ -405,31 +405,72 @@ This section is emitted with ``-basic-block-address-map`` and will contain
 a BB address map table for every function.
 
 The ``SHT_LLVM_BB_ADDR_MAP`` type provides backward compatibility to allow
-reading older versions of the BB address map generated by older compilers. Each
-function entry starts with a version byte which specifies the encoding version
-to use. The following versioning schemes are currently supported.
+reading older versions of the BB address map generated by older compilers (up to
+two years old). Each function entry starts with a version byte which specifies
+the encoding version to use. This follows by a feature byte which specifies the
+features specific to this particular entry. The function base address is stored
+as a full address. Other addresses in the entry (block begin and end addresses
+and callsite addresses) are stored in a running-offset fashion, as offset
+relative to the prior address.
 
-Version 1 (newest): basic block address offsets are computed relative to the end
-of previous blocks.
+The following versioning schemes are currently supported (newer versions support
+features of the older versions).
+
+Version 3 (newest): Capable of encoding callsite offsets. Enabled by the 6th bit
+of the feature byte.
 
 Example:
 
 .. code-block:: gas
 
   .section  ".llvm_bb_addr_map","", at llvm_bb_addr_map
-  .byte     1                             # version number
-  .byte     0                             # feature byte (reserved for future use)
+  .byte     3                             # version number
+  .byte     32                            # feature byte
   .quad     .Lfunc_begin0                 # address of the function
   .byte     2                             # number of basic blocks
   # BB record for BB_0
-   .uleb128  .Lfunc_beign0-.Lfunc_begin0  # BB_0 offset relative to function entry (always zero)
+   .byte     0                            # BB_0 ID
+   .uleb128  .Lfunc_begin0-.Lfunc_begin0  # BB_0 offset relative to function entry (always zero)
+   .byte     0                            # number of callsites in this block
    .uleb128  .LBB_END0_0-.Lfunc_begin0    # BB_0 size
    .byte     x                            # BB_0 metadata
   # BB record for BB_1
+   .byte     1                            # BB_1 ID
    .uleb128  .LBB0_1-.LBB_END0_0          # BB_1 offset relative to the end of last block (BB_0).
-   .uleb128  .LBB_END0_1-.LBB0_1          # BB_1 size
+   .byte     2                            # number of callsites in this block
+   .uleb128  .LBB0_1_CS0-.LBB0_1          # offset of callsite relative to the previous offset (.LBB0_1)
+   .uleb128  .LBB0_1_CS1-.LBB0_1_CS0      # offset of callsite relative to the previous offset (.LBB0_1_CS0)
+   .uleb128  .LBB_END0_1-.LBB0_1_CS1      # BB_1 size offset (Offset of the block end relative to the previous offset).
    .byte     y                            # BB_1 metadata
 
+Version 2: Capable of encoding split functions. Enabled by the 4th bit of the
+feature byte. The base address of each split range is stored as a full address.
+The first range corresponds to the function entry.
+
+Example:
+
+.. code-block:: gas
+  .section  ".llvm_bb_addr_map","", at llvm_bb_addr_map
+  .byte     2                             # version number
+  .byte     8                             # feature byte
+  .byte     2                             # number of basic block ranges
+  # 1st BB range (corresponding to the function entry)
+   .quad     .Lfunc_begin0                 # base address
+   .byte     1                             # number of basic blocks in this range
+    # BB record for BB_0
+    .byte     0                            # BB_0 ID
+    .uleb128  .Lfunc_begin0-.Lfunc_begin0  # BB_0 offset relative to function entry (always zero)
+    .uleb128  .LBB_END0_0-.Lfunc_begin0    # BB_0 size
+    .byte     x                            # BB_0 metadata
+  # 2nd BB range
+   .quad     func.part.1
+   .byte     1                             # number of basic blocks in this range
+    # BB record for BB_1
+    .byte     1                            # BB_1 ID
+    .uleb128  func.part.1-func.part.1      # BB_1 offset relative to the range begin (always zero)
+    .uleb128  .LBB_END0_1-func.part.1      # BB_1 size
+    .byte     1                            # BB_1 metadata
+
 PGO Analysis Map
 """"""""""""""""
 

>From 42cfc42e4e344e34fc45bd6adfd242c1954f4f98 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 2 Jul 2025 20:02:41 +0000
Subject: [PATCH 3/8] Update ELFTypes.h to reflect that callsite offset refers
 to the beginning of the call.

---
 llvm/include/llvm/Object/ELFTypes.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h
index d7a468f1116d7..d2d0f22309fd0 100644
--- a/llvm/include/llvm/Object/ELFTypes.h
+++ b/llvm/include/llvm/Object/ELFTypes.h
@@ -917,8 +917,8 @@ struct BBAddrMap {
     uint32_t Size = 0;   // Size of the basic block.
     Metadata MD = {false, false, false, false,
                    false}; // Metdata for this basic block.
-    // Offsets of callsites (end of call instructions), relative to the basic
-    // block start.
+    // Offsets of callsites (beginning of call instructions), relative to the
+    // basic block start.
     SmallVector<uint32_t, 1> CallsiteOffsets;
 
     BBEntry(uint32_t ID, uint32_t Offset, uint32_t Size, Metadata MD,

>From 8927b7b042fad342a9fa44701a9917d9bc43b521 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 2 Jul 2025 21:56:38 +0000
Subject: [PATCH 4/8] Remove LLVM_ABI from function declaration.

---
 llvm/include/llvm/CodeGen/MachineBasicBlock.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 9f4c078cd45a1..24a3fdc0201eb 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -735,8 +735,9 @@ class MachineBasicBlock
   /// Returns the MCSymbol marking the end of this basic block.
   LLVM_ABI MCSymbol *getEndSymbol() const;
 
-  /// Returns a temporary MCSymbol marking the beginning of a callsite.
-  LLVM_ABI MCSymbol *createCallsiteSymbol() const;
+  /// Returns a temporary MCSymbol marking the beginning of a callsite, and
+  /// appends it to `CallsiteSymbols`.
+  MCSymbol *createCallsiteSymbol() const;
 
   /// Returns true if this block may have an INLINEASM_BR (overestimate, by
   /// checking if any of the successors are indirect targets of any inlineasm_br

>From 3ccff1e4ae9fcae6ecca7a5013bb3f5499ddcd73 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Mon, 7 Jul 2025 18:24:38 +0000
Subject: [PATCH 5/8] Fix Extensions.rst issue.

---
 llvm/docs/Extensions.rst | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/llvm/docs/Extensions.rst b/llvm/docs/Extensions.rst
index fd822b3477e88..bad72c6ca8295 100644
--- a/llvm/docs/Extensions.rst
+++ b/llvm/docs/Extensions.rst
@@ -407,11 +407,11 @@ a BB address map table for every function.
 The ``SHT_LLVM_BB_ADDR_MAP`` type provides backward compatibility to allow
 reading older versions of the BB address map generated by older compilers (up to
 two years old). Each function entry starts with a version byte which specifies
-the encoding version to use. This follows by a feature byte which specifies the
-features specific to this particular entry. The function base address is stored
-as a full address. Other addresses in the entry (block begin and end addresses
-and callsite addresses) are stored in a running-offset fashion, as offset
-relative to the prior address.
+the encoding version to use. This is followed by a feature byte which specifies
+the features specific to this particular entry. The function base address is
+stored as a full address. Other addresses in the entry (block begin and end
+addresses and callsite addresses) are stored in a running-offset fashion, as
+offsets relative to prior addresses.
 
 The following versioning schemes are currently supported (newer versions support
 features of the older versions).
@@ -450,6 +450,7 @@ The first range corresponds to the function entry.
 Example:
 
 .. code-block:: gas
+
   .section  ".llvm_bb_addr_map","", at llvm_bb_addr_map
   .byte     2                             # version number
   .byte     8                             # feature byte

>From f473eb570c42d38a603010823631dce93b09c7a5 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Mon, 7 Jul 2025 22:59:21 +0000
Subject: [PATCH 6/8] Remove unused AsmPrinter field.

---
 llvm/include/llvm/CodeGen/AsmPrinter.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h
index 9defc88a65f23..efd9b77ebc68a 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -173,8 +173,6 @@ class LLVM_ABI AsmPrinter : public MachineFunctionPass {
   // function. This is used to calculate the size of the BB section.
   MCSymbol *CurrentSectionBeginSym = nullptr;
 
-  bool HasAnyCallsitesForBBAddrMap = false;
-
   /// This map keeps track of which symbol is being used for the specified basic
   /// block's address of label.
   std::unique_ptr<AddrLabelMap> AddrLabelSymbols;

>From 997717a877233416d9ef364ef2f7cef6d1a29382 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Tue, 8 Jul 2025 02:10:37 +0000
Subject: [PATCH 7/8] Move the implementation logic to AsmPrinter.

---
 llvm/include/llvm/CodeGen/AsmPrinter.h        | 12 ++++++++-
 llvm/include/llvm/CodeGen/MachineBasicBlock.h | 10 --------
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    | 25 +++++++++++--------
 llvm/lib/CodeGen/MachineBasicBlock.cpp        |  9 -------
 4 files changed, 26 insertions(+), 30 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h
index efd9b77ebc68a..4c366c750c2ec 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -135,6 +135,12 @@ class LLVM_ABI AsmPrinter : public MachineFunctionPass {
   /// default, this is equal to CurrentFnSym.
   MCSymbol *CurrentFnSymForSize = nullptr;
 
+  /// Vector of symbols marking the position of callsites in each basic block.
+  /// The callsite symbols of each block are stored in the order they appear
+  /// in that block.
+  DenseMap<const MachineBasicBlock *, SmallVector<MCSymbol *, 1>>
+      CurrentFnCallsiteSymbols;
+
   /// Provides the profile information for constants.
   const StaticDataProfileInfo *SDPI = nullptr;
 
@@ -295,6 +301,10 @@ class LLVM_ABI AsmPrinter : public MachineFunctionPass {
   /// to emit them as well, return the whole set.
   ArrayRef<MCSymbol *> getAddrLabelSymbolToEmit(const BasicBlock *BB);
 
+  /// Creates a new symbol to be used for the beginning of a callsite at th
+  /// specified basic block.
+  MCSymbol *createCallsiteSymbol(const MachineBasicBlock &MBB);
+
   /// If the specified function has had any references to address-taken blocks
   /// generated, but the block got deleted, return the symbol now so we can
   /// emit it.  This prevents emitting a reference to a symbol that has no
@@ -426,7 +436,7 @@ class LLVM_ABI AsmPrinter : public MachineFunctionPass {
 
   void emitStackUsage(const MachineFunction &MF);
 
-  void emitBBAddrMapSection(const MachineFunction &MF, bool HasCalls);
+  void emitBBAddrMapSection(const MachineFunction &MF);
 
   void emitKCFITrapEntry(const MachineFunction &MF, const MCSymbol *Symbol);
   virtual void emitKCFITypeId(const MachineFunction &MF);
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 24a3fdc0201eb..bcf2a602189c1 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -242,10 +242,6 @@ class MachineBasicBlock
   /// calculate the size of the basic block, or the BB section ending with it.
   mutable MCSymbol *CachedEndMCSymbol = nullptr;
 
-  /// Vector of symbols marking the position of callsites in the basic
-  /// block, stored in the order they appear in the basic block.
-  mutable SmallVector<MCSymbol *, 1> CallsiteSymbols;
-
   // Intrusive list support
   MachineBasicBlock() = default;
 
@@ -329,12 +325,6 @@ class MachineBasicBlock
   /// its label be emitted.
   void setLabelMustBeEmitted() { LabelMustBeEmitted = true; }
 
-  /// Returns the symbols marking callsites in the order they appear in the
-  /// basic block.
-  const SmallVectorImpl<MCSymbol *> &getCallsiteSymbols() const {
-    return CallsiteSymbols;
-  }
-
   /// Return the MachineFunction containing this basic block.
   const MachineFunction *getParent() const { return xParent; }
   MachineFunction *getParent() { return xParent; }
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 39f7be6b13250..11ef0e365faa6 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1428,11 +1428,11 @@ getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges,
           HasCalls};
 }
 
-void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF,
-                                      bool HasCalls) {
+void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
   MCSection *BBAddrMapSection =
       getObjFileLowering().getBBAddrMapSection(*MF.getSection());
   assert(BBAddrMapSection && ".llvm_bb_addr_map section is not initialized.");
+  bool HasCalls = !CurrentFnCallsiteSymbols.empty();
 
   const MCSymbol *FunctionSymbol = getFunctionBegin();
 
@@ -1498,7 +1498,7 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF,
       const MCSymbol *CurrentLabel = MBBSymbol;
       if (HasCalls) {
         const SmallVectorImpl<MCSymbol *> &CallsiteSymbols =
-            MBB.getCallsiteSymbols();
+            CurrentFnCallsiteSymbols.lookup(&MBB);
         OutStreamer->AddComment("number of callsites");
         OutStreamer->emitULEB128IntValue(CallsiteSymbols.size());
         for (const MCSymbol *CallsiteSymbol : CallsiteSymbols) {
@@ -1815,7 +1815,6 @@ void AsmPrinter::emitFunctionBody() {
 
   // Print out code for the function.
   bool HasAnyRealCode = false;
-  bool HasCalls = false;
   int NumInstsInFunction = 0;
   bool IsEHa = MMI->getModule()->getModuleFlag("eh-asynch");
 
@@ -1842,11 +1841,8 @@ void AsmPrinter::emitFunctionBody() {
           !MI.isDebugInstr()) {
         HasAnyRealCode = true;
       }
-      if (MI.isCall()) {
-        HasCalls = true;
-        if (MF->getTarget().Options.BBAddrMap)
-          OutStreamer->emitLabel(MBB.createCallsiteSymbol());
-      }
+      if (MI.isCall() && MF->getTarget().Options.BBAddrMap)
+        OutStreamer->emitLabel(createCallsiteSymbol(MBB));
 
       // If there is a pre-instruction symbol, emit a label for it here.
       if (MCSymbol *S = MI.getPreInstrSymbol())
@@ -2133,7 +2129,7 @@ void AsmPrinter::emitFunctionBody() {
   // BB labels are requested for this function. Skip empty functions.
   if (HasAnyRealCode) {
     if (MF->getTarget().Options.BBAddrMap)
-      emitBBAddrMapSection(*MF, HasCalls);
+      emitBBAddrMapSection(*MF);
     else if (PgoAnalysisMapFeatures.getBits() != 0)
       MF->getContext().reportWarning(
           SMLoc(), "pgo-analysis-map is enabled for function " + MF->getName() +
@@ -2794,6 +2790,14 @@ MCSymbol *AsmPrinter::getMBBExceptionSym(const MachineBasicBlock &MBB) {
   return Res.first->second;
 }
 
+MCSymbol *AsmPrinter::createCallsiteSymbol(const MachineBasicBlock &MBB) {
+  MCContext &Ctx = MF->getContext();
+  MCSymbol *Sym = Ctx.createTempSymbol("BB" + Twine(MF->getFunctionNumber()) +
+                                       "_" + Twine(MBB.getNumber()) + "_CS");
+  CurrentFnCallsiteSymbols[&MBB].push_back(Sym);
+  return Sym;
+}
+
 void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
   this->MF = &MF;
   const Function &F = MF.getFunction();
@@ -2828,6 +2832,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
   CurrentFnBegin = nullptr;
   CurrentFnBeginLocal = nullptr;
   CurrentSectionBeginSym = nullptr;
+  CurrentFnCallsiteSymbols.clear();
   MBBSectionRanges.clear();
   MBBSectionExceptionSyms.clear();
   bool NeedsLocalForSize = MAI->needsLocalForSize();
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 97f992c75d2cb..c3c5a0f5102d7 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -112,15 +112,6 @@ MCSymbol *MachineBasicBlock::getEndSymbol() const {
   return CachedEndMCSymbol;
 }
 
-MCSymbol *MachineBasicBlock::createCallsiteSymbol() const {
-  const MachineFunction *MF = getParent();
-  MCContext &Ctx = MF->getContext();
-  CallsiteSymbols.push_back(
-      Ctx.createTempSymbol("BB" + Twine(MF->getFunctionNumber()) + "_" +
-                           Twine(getNumber()) + "_CS"));
-  return CallsiteSymbols.back();
-}
-
 raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) {
   MBB.print(OS);
   return OS;

>From dfc089914646396eae6934bee38dbf54cecc1a29 Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Tue, 8 Jul 2025 21:14:47 +0000
Subject: [PATCH 8/8] Revert change in MachineBasicBlock.h

---
 llvm/include/llvm/CodeGen/AsmPrinter.h        | 5 +++--
 llvm/include/llvm/CodeGen/MachineBasicBlock.h | 4 ----
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    | 3 +--
 3 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h
index 4c366c750c2ec..faab2503ced50 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -135,7 +135,8 @@ class LLVM_ABI AsmPrinter : public MachineFunctionPass {
   /// default, this is equal to CurrentFnSym.
   MCSymbol *CurrentFnSymForSize = nullptr;
 
-  /// Vector of symbols marking the position of callsites in each basic block.
+  /// Vector of symbols marking the position of callsites in the current
+  /// function, keyed by their containing basic block.
   /// The callsite symbols of each block are stored in the order they appear
   /// in that block.
   DenseMap<const MachineBasicBlock *, SmallVector<MCSymbol *, 1>>
@@ -301,7 +302,7 @@ class LLVM_ABI AsmPrinter : public MachineFunctionPass {
   /// to emit them as well, return the whole set.
   ArrayRef<MCSymbol *> getAddrLabelSymbolToEmit(const BasicBlock *BB);
 
-  /// Creates a new symbol to be used for the beginning of a callsite at th
+  /// Creates a new symbol to be used for the beginning of a callsite at the
   /// specified basic block.
   MCSymbol *createCallsiteSymbol(const MachineBasicBlock &MBB);
 
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index bcf2a602189c1..3d2da01f2c856 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -725,10 +725,6 @@ class MachineBasicBlock
   /// Returns the MCSymbol marking the end of this basic block.
   LLVM_ABI MCSymbol *getEndSymbol() const;
 
-  /// Returns a temporary MCSymbol marking the beginning of a callsite, and
-  /// appends it to `CallsiteSymbols`.
-  MCSymbol *createCallsiteSymbol() const;
-
   /// Returns true if this block may have an INLINEASM_BR (overestimate, by
   /// checking if any of the successors are indirect targets of any inlineasm_br
   /// in the function).
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 11ef0e365faa6..a42b14d9d329d 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1497,8 +1497,7 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
       emitLabelDifferenceAsULEB128(MBBSymbol, PrevMBBEndSymbol);
       const MCSymbol *CurrentLabel = MBBSymbol;
       if (HasCalls) {
-        const SmallVectorImpl<MCSymbol *> &CallsiteSymbols =
-            CurrentFnCallsiteSymbols.lookup(&MBB);
+        auto CallsiteSymbols = CurrentFnCallsiteSymbols.lookup(&MBB);
         OutStreamer->AddComment("number of callsites");
         OutStreamer->emitULEB128IntValue(CallsiteSymbols.size());
         for (const MCSymbol *CallsiteSymbol : CallsiteSymbols) {



More information about the llvm-commits mailing list