[llvm] [GSYM] Callsites: Add data format support and loading from YAML (PR #109781)

Kyungwoo Lee via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 2 19:26:59 PDT 2024


================
@@ -0,0 +1,275 @@
+//===- CallSiteInfo.cpp ----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/GSYM/CallSiteInfo.h"
+#include "llvm/ADT/CachedHashString.h"
+#include "llvm/DebugInfo/GSYM/FileWriter.h"
+#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
+#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/YAMLParser.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
+#include <fstream>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+using namespace llvm;
+using namespace gsym;
+
+llvm::Error CallSiteInfo::encode(FileWriter &O) const {
+  O.writeU64(ReturnAddress);
+  O.writeU8(Flags);
+  O.writeU32(MatchRegex.size());
+  for (uint32_t Entry : MatchRegex)
+    O.writeU32(Entry);
+  return llvm::Error::success();
+}
+
+llvm::Expected<CallSiteInfo>
+CallSiteInfo::decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr) {
+  CallSiteInfo CSI;
+
+  // Read ReturnAddress
+  if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(uint64_t)))
+    return createStringError(std::errc::io_error,
+                             "0x%8.8" PRIx64 ": missing ReturnAddress", Offset);
+  CSI.ReturnAddress = Data.getU64(&Offset);
+
+  // Read Flags
+  if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(uint8_t)))
+    return createStringError(std::errc::io_error,
+                             "0x%8.8" PRIx64 ": missing Flags", Offset);
+  CSI.Flags = Data.getU8(&Offset);
+
+  // Read number of MatchRegex entries
+  if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(uint32_t)))
+    return createStringError(std::errc::io_error,
+                             "0x%8.8" PRIx64 ": missing MatchRegex count",
+                             Offset);
+  uint32_t NumEntries = Data.getU32(&Offset);
+
+  CSI.MatchRegex.reserve(NumEntries);
+  for (uint32_t i = 0; i < NumEntries; ++i) {
+    if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(uint32_t)))
+      return createStringError(std::errc::io_error,
+                               "0x%8.8" PRIx64 ": missing MatchRegex entry",
+                               Offset);
+    uint32_t Entry = Data.getU32(&Offset);
+    CSI.MatchRegex.push_back(Entry);
+  }
+
+  return CSI;
+}
+
+llvm::Error CallSiteInfoCollection::encode(FileWriter &O) const {
+  O.writeU32(CallSites.size());
+  for (const CallSiteInfo &CSI : CallSites) {
+    if (llvm::Error Err = CSI.encode(O))
+      return Err;
+  }
+  return llvm::Error::success();
+}
+
+llvm::Expected<CallSiteInfoCollection>
+CallSiteInfoCollection::decode(DataExtractor &Data, uint64_t BaseAddr) {
+  CallSiteInfoCollection CSC;
+  uint64_t Offset = 0;
+
+  // Read number of CallSiteInfo entries
+  if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(uint32_t)))
+    return createStringError(std::errc::io_error,
+                             "0x%8.8" PRIx64 ": missing CallSiteInfo count",
+                             Offset);
+  uint32_t NumCallSites = Data.getU32(&Offset);
+
+  CSC.CallSites.reserve(NumCallSites);
+  for (uint32_t i = 0; i < NumCallSites; ++i) {
+    llvm::Expected<CallSiteInfo> ECSI =
+        CallSiteInfo::decode(Data, Offset, BaseAddr);
+    if (!ECSI)
+      return ECSI.takeError();
+    CSC.CallSites.emplace_back(*ECSI);
+  }
+
+  return CSC;
+}
+
+/// Structures necessary for reading CallSiteInfo from YAML.
+namespace llvm {
+namespace yaml {
+
+struct CallSiteYAML {
+  // The offset of the return address of the call site - relative to the start
+  // of the function.
+  llvm::yaml::Hex64 return_offset;
+  std::vector<std::string> match_regex;
+  std::vector<std::string> flags;
+};
+
+struct FunctionYAML {
+  std::string name;
+  std::vector<CallSiteYAML> callsites;
+};
+
+struct FunctionsYAML {
+  std::vector<FunctionYAML> functions;
+};
+
+template <> struct MappingTraits<CallSiteYAML> {
+  static void mapping(IO &io, CallSiteYAML &callsite) {
+    io.mapRequired("return_offset", callsite.return_offset);
+    io.mapRequired("match_regex", callsite.match_regex);
+    io.mapOptional("flags", callsite.flags);
+  }
+};
+
+template <> struct MappingTraits<FunctionYAML> {
+  static void mapping(IO &io, FunctionYAML &func) {
+    io.mapRequired("name", func.name);
+    io.mapOptional("callsites", func.callsites);
+  }
+};
+
+template <> struct MappingTraits<FunctionsYAML> {
+  static void mapping(IO &io, FunctionsYAML &functionsYAML) {
+    io.mapRequired("functions", functionsYAML.functions);
+  }
+};
+
+} // namespace yaml
+} // namespace llvm
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(CallSiteYAML)
+LLVM_YAML_IS_SEQUENCE_VECTOR(FunctionYAML)
+
+// Implementation of CallSiteInfoLoader
+StringRef CallSiteInfoLoader::stringFromOffset(uint32_t offset) const {
+  assert(StringOffsetMap.count(offset) &&
+         "expected function name offset to already be in StringOffsetMap");
+  return StringOffsetMap.find(offset)->second.val();
+}
+
+uint32_t CallSiteInfoLoader::offsetFromString(StringRef str) {
+  return StrTab.add(StringStorage.insert(str).first->getKey());
+}
+
+llvm::Error CallSiteInfoLoader::loadYAML(std::vector<FunctionInfo> &Funcs,
+                                         StringRef YAMLFile) {
+  // Step 1: Read YAML file
+  auto BufferOrError = llvm::MemoryBuffer::getFile(YAMLFile);
+  if (!BufferOrError)
+    return errorCodeToError(BufferOrError.getError());
+
+  std::unique_ptr<llvm::MemoryBuffer> Buffer = std::move(*BufferOrError);
+
+  // Step 2: Parse YAML content
+  llvm::yaml::FunctionsYAML functionsYAML;
+  llvm::yaml::Input yin(Buffer->getMemBufferRef());
+  yin >> functionsYAML;
----------------
kyulee-com wrote:

I still think the single field `functions:` in the YAML seems a waste unless you plan to extend other fields to deal with more than the function scope like module, etc. Normally with this function level entity, it'd be easier to deal with merging as then can be easily appended.

And also please use a capital letter for the first character of the local variables. I know it's different than LLD convention, but it's actually common in LLVM.

https://github.com/llvm/llvm-project/pull/109781


More information about the llvm-commits mailing list