[llvm] [llvm][AsmPrinter] Call graph section format. (PR #159866)

Prabhu Rajasekaran via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 19 15:53:10 PDT 2025


https://github.com/Prabhuk created https://github.com/llvm/llvm-project/pull/159866

Make .callgraph section's layout efficient in space. Document the layout
of the section.


>From baff0b90be4453c3e4962363c4b65fd214ac894c Mon Sep 17 00:00:00 2001
From: prabhukr <prabhukr at google.com>
Date: Fri, 19 Sep 2025 15:01:59 -0700
Subject: [PATCH] [llvm][AsmPrinter] Call graph section format.

Make .callgraph section's layout efficient in space. Document the layout
of the section.
---
 llvm/docs/CallGraphSection.md              | 26 +++++++
 llvm/include/llvm/CodeGen/AsmPrinter.h     | 17 ++---
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 88 ++++++++++------------
 3 files changed, 75 insertions(+), 56 deletions(-)
 create mode 100644 llvm/docs/CallGraphSection.md

diff --git a/llvm/docs/CallGraphSection.md b/llvm/docs/CallGraphSection.md
new file mode 100644
index 0000000000000..eb673b8db0bd5
--- /dev/null
+++ b/llvm/docs/CallGraphSection.md
@@ -0,0 +1,26 @@
+# .callgraph Section Layout
+
+The `.callgraph` section is used to store call graph information for each function, which can be used for post-link analyses and optimizations. The section contains a series of records, with each record corresponding to a single function.
+
+## Per Function Record Layout
+
+Each record in the `.callgraph` section has the following binary layout:
+
+| Field                        | Type          | Size (bits) | Description                                                                                             |
+| ---------------------------- | ------------- | ----------- | ------------------------------------------------------------------------------------------------------- |
+| Format Version               | `uint32_t`    | 32          | The version of the record format. The current version is 0.                                             |
+| Function Entry PC            | `uintptr_t`   | 32/64       | The address of the function's entry point.                                                              |
+| Function Kind                | `uint8_t`     | 8           | An enum indicating the function's properties (e.g., if it's an indirect call target).                   |
+| Function Type ID             | `uint64_t`    | 64          | The type ID of the function. This field is **only** present if `Function Kind` is `INDIRECT_TARGET_KNOWN_TID`. |
+| Number of Indirect Callsites | `uint32_t`    | 32          | The number of indirect call sites within the function.                                                  |
+| Indirect Callsites Array     | `Callsite[]`  | Variable    | An array of `Callsite` records, with a length of `Number of Indirect Callsites`.                        |
+
+
+### Indirect Callsite Record Layout
+
+Each record in the `Indirect Callsites Array` has the following layout:
+
+| Field             | Type        | Size (bits) | Description                               |
+| ----------------- | ----------- | ----------- | ----------------------------------------- |
+| Type ID           | `uint64_t`  | 64          | The type ID of the indirect call target.  |
+| Callsite PC       | `uintptr_t` | 32/64       | The address of the indirect call site.    |
diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h
index 71317619098ad..c76693fb9a95d 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -192,15 +192,20 @@ class LLVM_ABI AsmPrinter : public MachineFunctionPass {
 
   /// Store symbols and type identifiers used to create callgraph section
   /// entries related to a function.
-  struct FunctionInfo {
+  struct FunctionCallGraphInfo {
     /// Numeric type identifier used in callgraph section for indirect calls
     /// and targets.
     using CGTypeId = uint64_t;
 
+    /// Map type identifiers to callsite labels. Labels are generated for each
+    /// indirect callsite in the function.
+    SmallVector<std::pair<CGTypeId, MCSymbol *>> IndirectCallsites;
+  };
+
     /// Enumeration of function kinds, and their mapping to function kind values
     /// stored in callgraph section entries.
     /// Must match the enum in llvm/tools/llvm-objdump/llvm-objdump.cpp.
-    enum class FunctionKind : uint64_t {
+    enum class FunctionKind : uint8_t {
       /// Function cannot be target to indirect calls.
       NOT_INDIRECT_TARGET = 0,
 
@@ -210,13 +215,7 @@ class LLVM_ABI AsmPrinter : public MachineFunctionPass {
       /// Function may be target to indirect calls and its type id is known.
       INDIRECT_TARGET_KNOWN_TID = 2,
     };
-
-    /// Map type identifiers to callsite labels. Labels are generated for each
-    /// indirect callsite in the function.
-    SmallVector<std::pair<CGTypeId, MCSymbol *>> CallSiteLabels;
-  };
-
-  enum CallGraphSectionFormatVersion : uint64_t {
+  enum CallGraphSectionFormatVersion : uint32_t {
     V_0 = 0,
   };
 
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index cd14a4f57f760..e4edcafc666c5 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1672,7 +1672,7 @@ static ConstantInt *extractNumericCGTypeId(const Function &F) {
 
 /// Emits .callgraph section.
 void AsmPrinter::emitCallGraphSection(const MachineFunction &MF,
-                                      FunctionInfo &FuncInfo) {
+                                      FunctionCallGraphInfo &FuncCGInfo) {
   if (!MF.getTarget().Options.EmitCallGraphSection)
     return;
 
@@ -1683,55 +1683,49 @@ void AsmPrinter::emitCallGraphSection(const MachineFunction &MF,
   OutStreamer->pushSection();
   OutStreamer->switchSection(FuncCGSection);
 
-  // Emit format version number.
-  OutStreamer->emitInt64(CallGraphSectionFormatVersion::V_0);
-
-  // Emit function's self information, which is composed of:
-  //  1) FunctionEntryPc
-  //  2) FunctionKind: Whether the function is indirect target, and if so,
-  //     whether its type id is known.
-  //  3) FunctionTypeId: Emit only when the function is an indirect target
-  //     and its type id is known.
-
-  // Emit function entry pc.
-  const MCSymbol *FunctionSymbol = getFunctionBegin();
-  OutStreamer->emitSymbolValue(FunctionSymbol, TM.getProgramPointerSize());
+  auto EmitFunctionKindAndTypeId = [&]() {
+    const Function &F = MF.getFunction();
+    // If this function has external linkage or has its address taken and
+    // it is not a callback, then anything could call it.
+    bool IsIndirectTarget = !F.hasLocalLinkage() ||
+                            F.hasAddressTaken(nullptr,
+                                              /*IgnoreCallbackUses=*/true,
+                                              /*IgnoreAssumeLikeCalls=*/true,
+                                              /*IgnoreLLVMUsed=*/false);
+    if (!IsIndirectTarget) {
+      OutStreamer->emitInt8(static_cast<uint8_t>(FunctionKind::NOT_INDIRECT_TARGET);
+      return;
+    }
 
-  // If this function has external linkage or has its address taken and
-  // it is not a callback, then anything could call it.
-  const Function &F = MF.getFunction();
-  bool IsIndirectTarget =
-      !F.hasLocalLinkage() || F.hasAddressTaken(nullptr,
-                                                /*IgnoreCallbackUses=*/true,
-                                                /*IgnoreAssumeLikeCalls=*/true,
-                                                /*IgnoreLLVMUsed=*/false);
-
-  // FIXME: FunctionKind takes a few values but emitted as a 64-bit value.
-  // Can be optimized to occupy 2 bits instead.
-  // Emit function kind, and type id if available.
-  if (!IsIndirectTarget) {
-    OutStreamer->emitInt64(
-        static_cast<uint64_t>(FunctionInfo::FunctionKind::NOT_INDIRECT_TARGET));
-  } else {
     if (const auto *TypeId = extractNumericCGTypeId(F)) {
-      OutStreamer->emitInt64(static_cast<uint64_t>(
-          FunctionInfo::FunctionKind::INDIRECT_TARGET_KNOWN_TID));
+      OutStreamer->emitInt8(static_cast<uint8_t>(FunctionKind::INDIRECT_TARGET_KNOWN_TID);
       OutStreamer->emitInt64(TypeId->getZExtValue());
-    } else {
-      OutStreamer->emitInt64(static_cast<uint64_t>(
-          FunctionInfo::FunctionKind::INDIRECT_TARGET_UNKNOWN_TID));
+      return;
     }
-  }
+    OutStreamer->emitInt8(static_cast<uint8_t>(FunctionKind::INDIRECT_TARGET_UNKNOWN_TID);
+  };
 
-  // Emit callsite labels, where each element is a pair of type id and
-  // indirect callsite pc.
-  const auto &CallSiteLabels = FuncInfo.CallSiteLabels;
-  OutStreamer->emitInt64(CallSiteLabels.size());
-  for (const auto &[TypeId, Label] : CallSiteLabels) {
+  // Emit function's call graph information.
+  // 1) CallGraphSectionFormatVersion
+  // 2) Function entry PC.
+  // 3) FunctionKind: Whether the function is indirect target, and if so,
+  //    whether its type id is known.
+  // 4) FunctionTypeID if the function is indirect target, and its type id is
+  //    known. 
+  // 5) Number of indirect callsites.
+  // 6) For each indirect callsite, its callsite PC and callee's expected type id.
+
+  OutStreamer->emitInt32(CallGraphSectionFormatVersion::V_0);
+  const MCSymbol *FunctionSymbol = getFunctionBegin();
+  OutStreamer->emitSymbolValue(FunctionSymbol, TM.getProgramPointerSize());
+  EmitFunctionKindAndTypeId();
+  const auto &IndirectCallsites = FuncCGInfo.IndirectCallsites;
+  OutStreamer->emitInt32(IndirectCallsites.size());
+  for (const auto &[TypeId, Label] : IndirectCallsites) {
     OutStreamer->emitInt64(TypeId);
     OutStreamer->emitSymbolValue(Label, TM.getProgramPointerSize());
   }
-  FuncInfo.CallSiteLabels.clear();
+  FuncCGInfo.IndirectCallsites.clear();
 
   OutStreamer->popSection();
 }
@@ -1867,7 +1861,7 @@ static StringRef getMIMnemonic(const MachineInstr &MI, MCStreamer &Streamer) {
 }
 
 void AsmPrinter::emitIndirectCalleeLabels(
-    FunctionInfo &FuncInfo,
+    FunctionCallGraphInfo &FuncCGInfo,
     const MachineFunction::CallSiteInfoMap &CallSitesInfoMap,
     const MachineInstr &MI) {
   // Only indirect calls have type identifiers set.
@@ -1879,7 +1873,7 @@ void AsmPrinter::emitIndirectCalleeLabels(
     MCSymbol *S = MF->getContext().createTempSymbol();
     OutStreamer->emitLabel(S);
     uint64_t CalleeTypeIdVal = CalleeTypeId->getZExtValue();
-    FuncInfo.CallSiteLabels.emplace_back(CalleeTypeIdVal, S);
+    FuncCGInfo.IndirectCallsites.emplace_back(CalleeTypeIdVal, S);
   }
 }
 
@@ -1929,7 +1923,7 @@ void AsmPrinter::emitFunctionBody() {
     MBBSectionRanges[MF->front().getSectionID()] =
         MBBSectionRange{CurrentFnBegin, nullptr};
 
-  FunctionInfo FuncInfo;
+  FunctionCallGraphInfo FuncCGInfo;
   const auto &CallSitesInfoMap = MF->getCallSitesInfo();
   for (auto &MBB : *MF) {
     // Print a label for the basic block.
@@ -2066,7 +2060,7 @@ void AsmPrinter::emitFunctionBody() {
         OutStreamer->emitLabel(createCallsiteEndSymbol(MBB));
 
       if (TM.Options.EmitCallGraphSection && MI.isCall())
-        emitIndirectCalleeLabels(FuncInfo, CallSitesInfoMap, MI);
+        emitIndirectCalleeLabels(FuncCGInfo, CallSitesInfoMap, MI);
 
       // If there is a post-instruction symbol, emit a label for it here.
       if (MCSymbol *S = MI.getPostInstrSymbol())
@@ -2248,7 +2242,7 @@ void AsmPrinter::emitFunctionBody() {
   emitStackSizeSection(*MF);
 
   // Emit section containing call graph metadata.
-  emitCallGraphSection(*MF, FuncInfo);
+  emitCallGraphSection(*MF, FuncCGInfo);
 
   // Emit .su file containing function stack size information.
   emitStackUsage(*MF);



More information about the llvm-commits mailing list