[llvm] [llvm-objdump] Handle .callgraph section (PR #151009)

Paul Kirth via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 28 13:48:25 PDT 2025


================
@@ -3131,6 +3225,207 @@ void Dumper::printSymbol(const SymbolRef &Symbol,
   outs() << ' ' << SymName << '\n';
 }
 
+static void printCallGraphInfo(ObjectFile *Obj) {
+  // Get function info through disassembly.
+  disassembleObject(Obj, /*InlineRelocs=*/false, outs());
+
+  // Get the .callgraph section.
+  StringRef CallGraphSectionName(".callgraph");
+  std::optional<object::SectionRef> CallGraphSection;
+  for (auto Sec : ToolSectionFilter(*Obj)) {
+    StringRef Name;
+    if (Expected<StringRef> NameOrErr = Sec.getName())
+      Name = *NameOrErr;
+    else
+      consumeError(NameOrErr.takeError());
+
+    if (Name == CallGraphSectionName) {
+      CallGraphSection = Sec;
+      break;
+    }
+  }
+  if (!CallGraphSection)
+    reportWarning("there is no .callgraph section", Obj->getFileName());
+
+  // Map type id to indirect call sites.
+  MapVector<uint64_t, SmallVector<uint64_t>> TypeIdToIndirCallSites;
+  // Map type id to indirect targets.
+  MapVector<uint64_t, SmallVector<uint64_t>> TypeIdToIndirTargets;
+  // Instructions that are not indirect calls but have a type id are ignored.
+  uint64_t IgnoredICallIdCount = 0;
+  // Number of valid indirect calls with type ids.
+  uint64_t ICallWithTypeIdCount = 0;
+  if (CallGraphSection) {
+    StringRef CGSecContents = unwrapOrError(
+        CallGraphSection.value().getContents(), Obj->getFileName());
+    // TODO: some entries are written in pointer size. are they always 64-bit?
+    if (CGSecContents.size() % sizeof(uint64_t))
+      reportError(Obj->getFileName(), "Malformed .callgraph section.");
+
+    size_t Size = CGSecContents.size() / sizeof(uint64_t);
+    auto *It = reinterpret_cast<const uint64_t *>(CGSecContents.data());
+    const auto *const End = It + Size;
+
+    auto CGHasNext = [&]() { return It < End; };
+    auto CGNext = [&]() -> uint64_t {
+      if (!CGHasNext())
+        reportError(Obj->getFileName(), "Malformed .callgraph section.");
+      return *It++;
+    };
+
+    // Parse the content
+    while (CGHasNext()) {
+      // Format version number.
+      uint64_t FormatVersionNumber = CGNext();
+      if (FormatVersionNumber != 0)
+        reportError(Obj->getFileName(),
+                    "Unknown format version in .callgraph section.");
+
+      // Function entry pc.
+      uint64_t FuncEntryPc = CGNext();
+      if (!FuncInfo.count(FuncEntryPc))
+        reportError(Obj->getFileName(),
+                    "Invalid function entry pc in .callgraph section.");
+
+      // Function kind.
+      uint64_t Kind = CGNext();
+      switch (Kind) {
+      case 0: // not an indirect target
+        FuncInfo[FuncEntryPc].Kind = NOT_INDIRECT_TARGET;
+        break;
+      case 1: // indirect target with unknown type id
+        FuncInfo[FuncEntryPc].Kind = INDIRECT_TARGET_UNKNOWN_TID;
+        break;
+      case 2: // indirect target with known type id
+        FuncInfo[FuncEntryPc].Kind = INDIRECT_TARGET_KNOWN_TID;
+        TypeIdToIndirTargets[CGNext()].push_back(FuncEntryPc);
+        break;
+      default:
+        reportError(Obj->getFileName(),
+                    "Unknown function kind in .callgraph section.");
+      }
+
+      // Read call sites.
+      uint64_t CallSiteCount = CGNext();
+      for (unsigned long I = 0; I < CallSiteCount; I++) {
+        uint64_t TypeId = CGNext();
+        uint64_t CallSitePc = CGNext();
+        if (IndirectCallSites.count(CallSitePc)) {
+          TypeIdToIndirCallSites[TypeId].push_back(CallSitePc);
+          ICallWithTypeIdCount++;
+        } else {
+          // FIXME: .callgraph may have type ids for calls that are lowered to
+          // jump, which is why ignoring indirect call type ids are expected.
+          // However, actual indirect call site references stored as relocation
+          // entries here may also be discarded.
+          IgnoredICallIdCount++;
+        }
+      }
+    }
+
+    // Print any required warnings regarding the callgraph section.
+    if (IgnoredICallIdCount)
+      reportWarning("callgraph section has type ids for " +
+                        std::to_string(IgnoredICallIdCount) + " instructions " +
+                        "which are not indirect calls",
+                    Obj->getFileName());
+
+    if (auto ICallWithoutTypeIdCount =
+            IndirectCallSites.size() - ICallWithTypeIdCount)
+      reportWarning("callgraph section does not have type ids for " +
+                        std::to_string(ICallWithoutTypeIdCount) +
+                        " indirect calls",
+                    Obj->getFileName());
+
+    uint64_t NotListedCount = 0;
+    uint64_t UnknownCount = 0;
+    for (const auto &El : FuncInfo) {
+      NotListedCount += El.second.Kind == NOT_LISTED;
+      UnknownCount += El.second.Kind == INDIRECT_TARGET_UNKNOWN_TID;
+    }
+    if (NotListedCount)
+      reportWarning("callgraph section does not have information for " +
+                        std::to_string(NotListedCount) + " functions",
+                    Obj->getFileName());
+    if (UnknownCount)
+      reportWarning("callgraph section has unknown type id for " +
+                        std::to_string(UnknownCount) + " indirect targets",
+                    Obj->getFileName());
+
+    // Print indirect targets
+    outs() << "\nINDIRECT TARGET TYPES (TYPEID [FUNC_ADDR,])";
+
+    // Print indirect targets with unknown type.
+    // For completeness, functions for which the call graph section does not
+    // provide information are included.
+    if (NotListedCount || UnknownCount) {
+      outs() << "\nUNKNOWN";
+      for (const auto &El : FuncInfo) {
+        uint64_t FuncEntryPc = El.first;
+        FunctionKind FuncKind = El.second.Kind;
+        if (FuncKind == NOT_LISTED || FuncKind == INDIRECT_TARGET_UNKNOWN_TID)
+          outs() << " " << format("%lx", FuncEntryPc);
+      }
+    }
+
+    // Print indirect targets to type id mapping.
+    for (const auto &El : TypeIdToIndirTargets) {
+      uint64_t TypeId = El.first;
+      outs() << "\n" << format("%lx", TypeId);
+      for (uint64_t IndirTargetPc : El.second)
+        outs() << " " << format("%lx", IndirTargetPc);
+    }
+
+    // Print indirect calls to type id mapping. Any indirect call without a
+    // type id can be deduced by comparing this list to indirect call sites
+    // list.
+    outs() << "\n\nINDIRECT CALL TYPES (TYPEID [CALL_SITE_ADDR,])";
+    for (const auto &El : TypeIdToIndirCallSites) {
+      uint64_t TypeId = El.first;
+      outs() << "\n" << format("%lx", TypeId);
+      for (uint64_t IndirCallSitePc : El.second)
+        outs() << " " << format("%lx", IndirCallSitePc);
+    }
+  }
+
+  // Print function entry to indirect call site addresses mapping from disasm.
+  outs() << "\n\nINDIRECT CALL SITES (CALLER_ADDR [CALL_SITE_ADDR,])";
+  for (const auto &El : FuncInfo) {
+    auto CallerPc = El.first;
+    auto FuncIndirCallSites = El.second.IndirectCallSites;
+    if (!FuncIndirCallSites.empty()) {
+      outs() << "\n" << format("%lx", CallerPc);
+      for (auto IndirCallSitePc : FuncIndirCallSites)
+        outs() << " " << format("%lx", IndirCallSitePc);
+    }
+  }
----------------
ilovepi wrote:

I think all these printing routines should be factored out, so you can change them more easily, or support additional formats.

https://github.com/llvm/llvm-project/pull/151009


More information about the llvm-commits mailing list