[llvm] [GSYM] Callsites: Add data format support and loading from YAML (PR #109781)

Kyungwoo Lee via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 26 11:52:12 PDT 2024


================
@@ -0,0 +1,293 @@
+//===- CallSiteInfo.cpp ----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/GSYM/CallSiteInfo.h"
+#include "llvm/ADT/CachedHashString.h"
+#include "llvm/DebugInfo/GSYM/FileWriter.h"
+#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
+#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/YAMLParser.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
+#include <fstream>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+using namespace llvm;
+using namespace gsym;
+
+llvm::Error CallSiteInfo::encode(FileWriter &O) const {
+  O.writeU64(ReturnAddress);
+  O.writeU8(Flags);
+  O.writeU32(MatchRegex.size());
+  for (uint32_t Entry : MatchRegex)
+    O.writeU32(Entry);
+  return llvm::Error::success();
+}
+
+llvm::Expected<CallSiteInfo>
+CallSiteInfo::decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr) {
+  CallSiteInfo CSI;
+
+  // Read ReturnAddress
+  if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(uint64_t)))
+    return createStringError(std::errc::io_error,
+                             "0x%8.8" PRIx64 ": missing ReturnAddress", Offset);
+  CSI.ReturnAddress = Data.getU64(&Offset);
+
+  // Read Flags
+  if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(uint8_t)))
+    return createStringError(std::errc::io_error,
+                             "0x%8.8" PRIx64 ": missing Flags", Offset);
+  CSI.Flags = Data.getU8(&Offset);
+
+  // Read number of MatchRegex entries
+  if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(uint32_t)))
+    return createStringError(std::errc::io_error,
+                             "0x%8.8" PRIx64 ": missing MatchRegex count",
+                             Offset);
+  uint32_t NumEntries = Data.getU32(&Offset);
+
+  CSI.MatchRegex.reserve(NumEntries);
+  for (uint32_t i = 0; i < NumEntries; ++i) {
+    if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(uint32_t)))
+      return createStringError(std::errc::io_error,
+                               "0x%8.8" PRIx64 ": missing MatchRegex entry",
+                               Offset);
+    uint32_t Entry = Data.getU32(&Offset);
+    CSI.MatchRegex.push_back(Entry);
+  }
+
+  return CSI;
+}
+
+llvm::Error CallSiteInfoCollection::encode(FileWriter &O) const {
+  O.writeU32(CallSites.size());
+  for (const CallSiteInfo &CSI : CallSites) {
+    if (llvm::Error Err = CSI.encode(O))
+      return Err;
+  }
+  return llvm::Error::success();
+}
+
+llvm::Expected<CallSiteInfoCollection>
+CallSiteInfoCollection::decode(DataExtractor &Data, uint64_t BaseAddr) {
+  CallSiteInfoCollection CSC;
+  uint64_t Offset = 0;
+
+  // Read number of CallSiteInfo entries
+  if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(uint32_t)))
+    return createStringError(std::errc::io_error,
+                             "0x%8.8" PRIx64 ": missing CallSiteInfo count",
+                             Offset);
+  uint32_t NumCallSites = Data.getU32(&Offset);
+
+  CSC.CallSites.reserve(NumCallSites);
+  for (uint32_t i = 0; i < NumCallSites; ++i) {
+    llvm::Expected<CallSiteInfo> ECSI =
+        CallSiteInfo::decode(Data, Offset, BaseAddr);
+    if (!ECSI)
+      return ECSI.takeError();
+    CSC.CallSites.emplace_back(*ECSI);
+  }
+
+  return CSC;
+}
+
+/// Structures necessary for reading CallSiteInfo from YAML.
+namespace llvm {
+namespace yaml {
+
+struct CallSiteYAML {
+  // The offset of the return address of the call site - relative to the start
+  // of the function.
+  llvm::yaml::Hex64 return_offset;
+  std::vector<std::string> match_regex;
+  std::vector<std::string> flags;
+};
+
+struct FunctionYAML {
+  std::string name;
+  std::vector<CallSiteYAML> callsites;
+};
+
+struct FunctionsYAML {
+  std::vector<FunctionYAML> functions;
+};
+
+template <> struct MappingTraits<CallSiteYAML> {
+  static void mapping(IO &io, CallSiteYAML &callsite) {
+    io.mapRequired("return_offset", callsite.return_offset);
+    io.mapRequired("match_regex", callsite.match_regex);
+    io.mapOptional("flags", callsite.flags);
+  }
+};
+
+template <> struct MappingTraits<FunctionYAML> {
+  static void mapping(IO &io, FunctionYAML &func) {
+    io.mapRequired("name", func.name);
+    io.mapOptional("callsites", func.callsites);
+  }
+};
+
+template <> struct MappingTraits<FunctionsYAML> {
+  static void mapping(IO &io, FunctionsYAML &functionsYAML) {
+    io.mapRequired("functions", functionsYAML.functions);
+  }
+};
+
+} // namespace yaml
+} // namespace llvm
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(CallSiteYAML)
+LLVM_YAML_IS_SEQUENCE_VECTOR(FunctionYAML)
+
+// Implementation of CallSiteInfoLoader
+StringRef CallSiteInfoLoader::stringFromOffset(uint32_t offset) const {
+  assert(StringOffsetMap.count(offset) &&
+         "expected function name offset to already be in StringOffsetMap");
+  return StringOffsetMap.find(offset)->second.val();
+}
+
+uint32_t CallSiteInfoLoader::offsetFromString(StringRef str) {
+  return StrTab.add(StringStorage.insert(str).first->getKey());
+}
+
+llvm::Error CallSiteInfoLoader::loadYAML(std::vector<FunctionInfo> &Funcs,
+                                         StringRef YAMLFile) {
+  std::unique_ptr<llvm::MemoryBuffer> Buffer;
+  // Step 1: Read YAML file
+  if (auto Err = readYAMLFile(YAMLFile, Buffer))
+    return Err;
+
+  // Step 2: Parse YAML content
+  llvm::yaml::FunctionsYAML functionsYAML;
+  if (auto Err = parseYAML(*Buffer, functionsYAML))
+    return Err;
+
+  // Step 3: Build function map from Funcs
+  auto FuncMap = buildFunctionMap(Funcs);
+
+  // Step 4: Process parsed YAML functions and update FuncMap
+  return processYAMLFunctions(functionsYAML, FuncMap, YAMLFile);
+}
+
+llvm::Error
+CallSiteInfoLoader::readYAMLFile(StringRef YAMLFile,
+                                 std::unique_ptr<llvm::MemoryBuffer> &Buffer) {
+  auto BufferOrError = llvm::MemoryBuffer::getFile(YAMLFile);
+  if (!BufferOrError)
+    return errorCodeToError(BufferOrError.getError());
+  Buffer = std::move(*BufferOrError);
+  return llvm::Error::success();
+}
+
+llvm::Error
+CallSiteInfoLoader::parseYAML(llvm::MemoryBuffer &Buffer,
+                              llvm::yaml::FunctionsYAML &functionsYAML) {
+  // Use the MemoryBufferRef constructor
+  llvm::yaml::Input yin(Buffer.getMemBufferRef());
+  yin >> functionsYAML;
+  if (yin.error()) {
+    return llvm::createStringError(yin.error(), "Error parsing YAML file: %s\n",
+                                   Buffer.getBufferIdentifier().str().c_str());
+  }
+  return llvm::Error::success();
+}
+
+std::unordered_map<std::string, FunctionInfo *>
+CallSiteInfoLoader::buildFunctionMap(std::vector<FunctionInfo> &Funcs) {
+  std::unordered_map<std::string, FunctionInfo *> FuncMap;
+  auto insertFunc = [&](auto &Function) {
+    std::string FuncName = stringFromOffset(Function.Name).str();
+    // If the function name is already in the map, don't update it. This way we
+    // preferentially use the first encountered function. Since symbols are
+    // loaded from dSYM first, we end up preferring keeping track of symbols
+    // from dSYM rather than from the symbol table - which is what we want to
+    // do.
+    if (FuncMap.count(FuncName))
+      return;
+    FuncMap[FuncName] = &Function;
+  };
+  for (auto &Func : Funcs) {
+    insertFunc(Func);
+    if (Func.MergedFunctions.has_value())
+      for (auto &MFunc : Func.MergedFunctions->MergedFunctions)
+        insertFunc(MFunc);
+  }
+  return FuncMap;
+}
+
+llvm::Error CallSiteInfoLoader::processYAMLFunctions(
+    const llvm::yaml::FunctionsYAML &functionsYAML,
+    std::unordered_map<std::string, FunctionInfo *> &FuncMap,
+    StringRef YAMLFile) {
+  // For each function in the YAML file
+  for (const auto &FuncYAML : functionsYAML.functions) {
+    auto it = FuncMap.find(FuncYAML.name);
+    if (it == FuncMap.end()) {
+      return llvm::createStringError(
+          std::errc::invalid_argument,
+          "Can't find function '%s' specified in callsite YAML\n",
+          FuncYAML.name.c_str());
+    }
+    FunctionInfo *FuncInfo = it->second;
+    // Create a CallSiteInfoCollection if not already present
+    if (!FuncInfo->CallSites)
+      FuncInfo->CallSites = CallSiteInfoCollection();
+    for (const auto &CallSiteYAML : FuncYAML.callsites) {
+      CallSiteInfo CSI;
+      // Since YAML has specifies relative return offsets, add the function
+      // start address to make the offset absolute.
+      CSI.ReturnAddress = FuncInfo->Range.start() + CallSiteYAML.return_offset;
+      for (const auto &regex : CallSiteYAML.match_regex) {
+        CSI.MatchRegex.push_back(offsetFromString(regex));
+      }
+      // Initialize flags to None
+      CSI.Flags = CallSiteInfo::None;
+      // Parse flags and combine them
+      for (const auto &FlagStr : CallSiteYAML.flags) {
+        if (FlagStr == "InternalCall") {
+          CSI.Flags |= static_cast<uint8_t>(CallSiteInfo::InternalCall);
+        } else if (FlagStr == "ExternalCall") {
+          CSI.Flags |= static_cast<uint8_t>(CallSiteInfo::ExternalCall);
+        } else {
+          return llvm::createStringError(std::errc::invalid_argument,
+                                         "Unknown flag in callsite YAML: %s\n",
+                                         FlagStr.c_str());
+        }
+      }
+      FuncInfo->CallSites->CallSites.push_back(CSI);
+    }
+  }
+  return llvm::Error::success();
+}
+
+raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const CallSiteInfo &CSI) {
+  OS << "  Return=" << HEX64(CSI.ReturnAddress);
+  OS << "  Flags=" << HEX8(CSI.Flags);
+
+  OS << "  RegEx=";
+  for (uint32_t i = 0; i < CSI.MatchRegex.size(); ++i) {
+    if (i > 0)
+      OS << ",";
+    OS << CSI.MatchRegex[i];
----------------
kyulee-com wrote:

Looks like it prints out offset to the string table. It could be more useful to print the actual content, but not sure how we could do here.

https://github.com/llvm/llvm-project/pull/109781


More information about the llvm-commits mailing list