[llvm] [GSYM] Callsites: Add data format support and loading from YAML (PR #109781)

via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 13 16:00:55 PST 2024


https://github.com/alx32 updated https://github.com/llvm/llvm-project/pull/109781

>From ac48f2c4d76cefd27ce40d39762ce54633fee3f4 Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Tue, 24 Sep 2024 03:16:47 -0700
Subject: [PATCH 1/6] [GSYM] Callsites: Add data format support and loading
 from YAML

---
 .../llvm/DebugInfo/GSYM/CallSiteInfo.h        | 224 +++++
 .../llvm/DebugInfo/GSYM/FunctionInfo.h        |  10 +-
 .../include/llvm/DebugInfo/GSYM/GsymCreator.h |  11 +-
 llvm/include/llvm/DebugInfo/GSYM/GsymReader.h |  20 +
 llvm/lib/DebugInfo/GSYM/CMakeLists.txt        |   1 +
 llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp      | 293 ++++++
 llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp      |  31 +-
 llvm/lib/DebugInfo/GSYM/GsymCreator.cpp       |   6 +
 llvm/lib/DebugInfo/GSYM/GsymReader.cpp        |  45 +
 .../macho-gsym-callsite-info-dsym.yaml        | 950 ++++++++++++++++++
 .../macho-gsym-callsite-info-exe.yaml         | 558 ++++++++++
 .../macho-gsym-callsite-info-obj.test         | 304 ++++++
 llvm/tools/llvm-gsymutil/Opts.td              |   2 +
 llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp    |  19 +
 llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp    |   4 +-
 15 files changed, 2471 insertions(+), 7 deletions(-)
 create mode 100644 llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h
 create mode 100644 llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp
 create mode 100644 llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-dsym.yaml
 create mode 100644 llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-exe.yaml
 create mode 100644 llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-obj.test

diff --git a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h
new file mode 100644
index 00000000000000..45257f0e11578e
--- /dev/null
+++ b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h
@@ -0,0 +1,224 @@
+//===- CallSiteInfo.h -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_GSYM_CALLSITEINFO_H
+#define LLVM_DEBUGINFO_GSYM_CALLSITEINFO_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/DebugInfo/GSYM/ExtractRanges.h"
+#include "llvm/Support/YAMLParser.h"
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+namespace llvm {
+class DataExtractor;
+class raw_ostream;
+class StringTableBuilder;
+class CachedHashStringRef;
+
+namespace yaml {
+struct CallSiteYAML;
+struct FunctionYAML;
+struct FunctionsYAML;
+} // namespace yaml
+
+namespace gsym {
+class FileWriter;
+struct FunctionInfo;
+struct CallSiteInfo {
+public:
+  enum Flags : uint8_t {
+    None = 0,
+    // This flag specifies that the call site can only call a function within
+    // the same link unit as the call site.
+    InternalCall = 1 << 0,
+    // This flag specifies that the call site can only call a function outside
+    // the link unit that the call site is in.
+    ExternalCall = 1 << 1,
+  };
+
+  /// The return address of the call site.
+  uint64_t ReturnAddress;
+
+  /// Offsets into the string table for function names regex patterns.
+  std::vector<uint32_t> MatchRegex;
+
+  /// Bitwise OR of CallSiteInfo::Flags values
+  uint8_t Flags;
+
+  /// Decode a CallSiteInfo object from a binary data stream.
+  ///
+  /// \param Data The binary stream to read the data from.
+  /// \param Offset The current offset within the data stream.
+  /// \param BaseAddr The base address for decoding (unused here but included
+  /// for consistency).
+  ///
+  /// \returns A CallSiteInfo or an error describing the issue.
+  static llvm::Expected<CallSiteInfo>
+  decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr);
+
+  /// Encode this CallSiteInfo object into a FileWriter stream.
+  ///
+  /// \param O The binary stream to write the data to.
+  /// \returns An error object that indicates success or failure.
+  llvm::Error encode(FileWriter &O) const;
+};
+
+struct CallSiteInfoCollection {
+public:
+  std::vector<CallSiteInfo> CallSites;
+
+  void clear() { CallSites.clear(); }
+
+  /// Query if a CallSiteInfoCollection object is valid.
+  ///
+  /// \returns True if the collection is not empty.
+  bool isValid() const { return !CallSites.empty(); }
+
+  /// Decode a CallSiteInfoCollection object from a binary data stream.
+  ///
+  /// \param Data The binary stream to read the data from.
+  /// \param BaseAddr The base address for decoding (unused here but included
+  /// for consistency).
+  ///
+  /// \returns A CallSiteInfoCollection or an error describing the issue.
+  static llvm::Expected<CallSiteInfoCollection> decode(DataExtractor &Data,
+                                                       uint64_t BaseAddr);
+
+  /// Encode this CallSiteInfoCollection object into a FileWriter stream.
+  ///
+  /// \param O The binary stream to write the data to.
+  /// \returns An error object that indicates success or failure.
+  llvm::Error encode(FileWriter &O) const;
+};
+
+bool operator==(const CallSiteInfoCollection &LHS,
+                const CallSiteInfoCollection &RHS);
+
+bool operator==(const CallSiteInfo &LHS, const CallSiteInfo &RHS);
+
+class CallSiteInfoLoader {
+public:
+  /// Constructor that initializes the CallSiteInfoLoader with necessary data
+  /// structures.
+  ///
+  /// \param StringOffsetMap A reference to a DenseMap that maps existing string
+  /// offsets to CachedHashStringRef. \param StrTab A reference to a
+  /// StringTableBuilder used for managing looking up and creating new strings.
+  /// \param StringStorage A reference to a StringSet for storing the data for
+  /// generated strings.
+  CallSiteInfoLoader(DenseMap<uint64_t, CachedHashStringRef> &StringOffsetMap,
+                     StringTableBuilder &StrTab, StringSet<> &StringStorage)
+      : StringOffsetMap(StringOffsetMap), StrTab(StrTab),
+        StringStorage(StringStorage) {}
+
+  /// Loads call site information from a YAML file and populates the provided
+  /// FunctionInfo vector.
+  ///
+  /// This method reads the specified YAML file, parses its content, and updates
+  /// the `Funcs` vector with call site information based on the YAML data.
+  ///
+  /// \param Funcs A reference to a vector of FunctionInfo objects to be
+  /// populated.
+  /// \param YAMLFile A StringRef representing the path to the YAML
+  /// file to be loaded.
+  ///
+  /// \returns An `llvm::Error` indicating success or describing any issues
+  /// encountered during the loading process.
+  llvm::Error loadYAML(std::vector<FunctionInfo> &Funcs, StringRef YAMLFile);
+
+private:
+  /// Retrieves an existing string from the StringOffsetMap using the provided
+  /// offset.
+  ///
+  /// \param offset A 32-bit unsigned integer representing the offset of the
+  /// string.
+  ///
+  /// \returns A StringRef corresponding to the string for the given offset.
+  ///
+  /// \note This method asserts that the offset exists in the StringOffsetMap.
+  StringRef stringFromOffset(uint32_t offset) const;
+
+  /// Obtains the offset corresponding to a given string in the StrTab. If the
+  /// string does not already exist, it is created.
+  ///
+  /// \param str A StringRef representing the string for which the offset is
+  /// requested.
+  ///
+  /// \returns A 32-bit unsigned integer representing the offset of the string.
+  uint32_t offsetFromString(StringRef str);
+
+  /// Reads the content of the YAML file specified by `YAMLFile` into
+  /// `yamlContent`.
+  ///
+  /// \param YAMLFile A StringRef representing the path to the YAML file.
+  /// \param Buffer The memory buffer containing the YAML content.
+  ///
+  /// \returns An `llvm::Error` indicating success or describing any issues
+  /// encountered while reading the file.
+  llvm::Error readYAMLFile(StringRef YAMLFile,
+                           std::unique_ptr<llvm::MemoryBuffer> &Buffer);
+
+  /// Parses the YAML content and populates `functionsYAML` with the parsed
+  /// data.
+  ///
+  /// \param Buffer The memory buffer containing the YAML content.
+  /// \param functionsYAML A reference to an llvm::yaml::FunctionsYAML object to
+  /// be populated.
+  ///
+  /// \returns An `llvm::Error` indicating success or describing any issues
+  /// encountered during parsing.
+  llvm::Error parseYAML(llvm::MemoryBuffer &Buffer,
+                        llvm::yaml::FunctionsYAML &functionsYAML);
+
+  /// Builds a map from function names to FunctionInfo pointers based on the
+  /// provided `Funcs` vector.
+  ///
+  /// \param Funcs A reference to a vector of FunctionInfo objects.
+  ///
+  /// \returns An unordered_map mapping function names (std::string) to their
+  /// corresponding FunctionInfo pointers.
+  std::unordered_map<std::string, FunctionInfo *>
+  buildFunctionMap(std::vector<FunctionInfo> &Funcs);
+
+  /// Processes the parsed YAML functions and updates the `FuncMap` accordingly.
+  ///
+  /// \param functionsYAML A constant reference to an llvm::yaml::FunctionsYAML
+  /// object containing parsed YAML data.
+  /// \param FuncMap A reference to an unordered_map mapping function names to
+  /// FunctionInfo pointers.
+  /// \param YAMLFile A StringRef representing the name of the YAML file (used
+  /// for error messages).
+  ///
+  /// \returns An `llvm::Error` indicating success or describing any issues
+  /// encountered during processing.
+  llvm::Error
+  processYAMLFunctions(const llvm::yaml::FunctionsYAML &functionsYAML,
+                       std::unordered_map<std::string, FunctionInfo *> &FuncMap,
+                       StringRef YAMLFile);
+
+  /// Map of existing string offsets to CachedHashStringRef.
+  DenseMap<uint64_t, CachedHashStringRef> &StringOffsetMap;
+
+  /// The gSYM string table builder.
+  StringTableBuilder &StrTab;
+
+  /// The gSYM string storage - we store generated strings here.
+  StringSet<> &StringStorage;
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const CallSiteInfo &CSI);
+raw_ostream &operator<<(raw_ostream &OS, const CallSiteInfoCollection &CSIC);
+
+} // namespace gsym
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_GSYM_CALLSITEINFO_H
diff --git a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
index 71209b6b5c9cd1..fd4ac3164c686d 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
@@ -10,6 +10,7 @@
 #define LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
 
 #include "llvm/ADT/SmallString.h"
+#include "llvm/DebugInfo/GSYM/CallSiteInfo.h"
 #include "llvm/DebugInfo/GSYM/ExtractRanges.h"
 #include "llvm/DebugInfo/GSYM/InlineInfo.h"
 #include "llvm/DebugInfo/GSYM/LineTable.h"
@@ -63,7 +64,9 @@ class GsymReader;
 ///   enum InfoType {
 ///     EndOfList = 0u,
 ///     LineTableInfo = 1u,
-///     InlineInfo = 2u
+///     InlineInfo = 2u,
+///     MergedFunctionsInfo = 3u,
+///     CallSiteInfo = 4u
 ///   };
 ///
 /// This stream of tuples is terminated by a "InfoType" whose value is
@@ -73,7 +76,7 @@ class GsymReader;
 /// clients to still parse the format and skip over any data that they don't
 /// understand or want to parse.
 ///
-/// So the function information encoding essientially looks like:
+/// So the function information encoding essentially looks like:
 ///
 /// struct {
 ///   uint32_t Size;
@@ -92,6 +95,7 @@ struct FunctionInfo {
   std::optional<LineTable> OptLineTable;
   std::optional<InlineInfo> Inline;
   std::optional<MergedFunctionsInfo> MergedFunctions;
+  std::optional<CallSiteInfoCollection> CallSites;
   /// If we encode a FunctionInfo during segmenting so we know its size, we can
   /// cache that encoding here so we don't need to re-encode it when saving the
   /// GSYM file.
@@ -107,7 +111,7 @@ struct FunctionInfo {
   /// debug info, we might end up with multiple FunctionInfo objects for the
   /// same range and we need to be able to tell which one is the better object
   /// to use.
-  bool hasRichInfo() const { return OptLineTable || Inline; }
+  bool hasRichInfo() const { return OptLineTable || Inline || CallSites; }
 
   /// Query if a FunctionInfo object is valid.
   ///
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
index 48808fb7b71e18..9e5b3c1f8d92de 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
@@ -352,13 +352,22 @@ class GsymCreator {
   /// \param   FI The function info object to emplace into our functions list.
   void addFunctionInfo(FunctionInfo &&FI);
 
+  /// Load call site information from a YAML file.
+  ///
+  /// This function reads call site information from a specified YAML file and
+  /// adds it to the GSYM data.
+  ///
+  /// \param YAMLFile The path to the YAML file containing call site
+  /// information.
+  llvm::Error loadCallSitesFromYAML(StringRef YAMLFile);
+
   /// Organize merged FunctionInfo's
   ///
   /// This method processes the list of function infos (Funcs) to identify and
   /// group functions with overlapping address ranges.
   ///
   /// \param  Out Output stream to report information about how merged
-  /// FunctionInfo's were handeled.
+  /// FunctionInfo's were handled.
   void prepareMergedFunctions(OutputAggregator &Out);
 
   /// Finalize the data in the GSYM creator prior to saving the data out.
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
index 89f8c043b91519..72b7f3e7bfc42e 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
@@ -181,6 +181,26 @@ class GsymReader {
   /// \param MFI The object to dump.
   void dump(raw_ostream &OS, const MergedFunctionsInfo &MFI);
 
+  /// Dump a CallSiteInfo object.
+  ///
+  /// This function will output the details of a CallSiteInfo object in a
+  /// human-readable format.
+  ///
+  /// \param OS The output stream to dump to.
+  ///
+  /// \param CSI The CallSiteInfo object to dump.
+  void dump(raw_ostream &OS, const CallSiteInfo &CSI);
+
+  /// Dump a CallSiteInfoCollection object.
+  ///
+  /// This function will iterate over a collection of CallSiteInfo objects and
+  /// dump each one.
+  ///
+  /// \param OS The output stream to dump to.
+  ///
+  /// \param CSIC The CallSiteInfoCollection object to dump.
+  void dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC);
+
   /// Dump a LineTable object.
   ///
   /// This function will convert any string table indexes and file indexes
diff --git a/llvm/lib/DebugInfo/GSYM/CMakeLists.txt b/llvm/lib/DebugInfo/GSYM/CMakeLists.txt
index be90bfdaa7fd2b..c27d648db62f61 100644
--- a/llvm/lib/DebugInfo/GSYM/CMakeLists.txt
+++ b/llvm/lib/DebugInfo/GSYM/CMakeLists.txt
@@ -8,6 +8,7 @@ add_llvm_component_library(LLVMDebugInfoGSYM
   InlineInfo.cpp
   LineTable.cpp
   LookupResult.cpp
+  CallSiteInfo.cpp
   MergedFunctionsInfo.cpp
   ObjectFileTransformer.cpp
   ExtractRanges.cpp
diff --git a/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp b/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp
new file mode 100644
index 00000000000000..4ed3d3f67a44fd
--- /dev/null
+++ b/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp
@@ -0,0 +1,293 @@
+//===- CallSiteInfo.cpp ----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/GSYM/CallSiteInfo.h"
+#include "llvm/ADT/CachedHashString.h"
+#include "llvm/DebugInfo/GSYM/FileWriter.h"
+#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
+#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/YAMLParser.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
+#include <fstream>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+using namespace llvm;
+using namespace gsym;
+
+llvm::Error CallSiteInfo::encode(FileWriter &O) const {
+  O.writeU64(ReturnAddress);
+  O.writeU8(Flags);
+  O.writeU32(MatchRegex.size());
+  for (uint32_t Entry : MatchRegex)
+    O.writeU32(Entry);
+  return llvm::Error::success();
+}
+
+llvm::Expected<CallSiteInfo>
+CallSiteInfo::decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr) {
+  CallSiteInfo CSI;
+
+  // Read ReturnAddress
+  if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(uint64_t)))
+    return createStringError(std::errc::io_error,
+                             "0x%8.8" PRIx64 ": missing ReturnAddress", Offset);
+  CSI.ReturnAddress = Data.getU64(&Offset);
+
+  // Read Flags
+  if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(uint8_t)))
+    return createStringError(std::errc::io_error,
+                             "0x%8.8" PRIx64 ": missing Flags", Offset);
+  CSI.Flags = Data.getU8(&Offset);
+
+  // Read number of MatchRegex entries
+  if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(uint32_t)))
+    return createStringError(std::errc::io_error,
+                             "0x%8.8" PRIx64 ": missing MatchRegex count",
+                             Offset);
+  uint32_t NumEntries = Data.getU32(&Offset);
+
+  CSI.MatchRegex.reserve(NumEntries);
+  for (uint32_t i = 0; i < NumEntries; ++i) {
+    if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(uint32_t)))
+      return createStringError(std::errc::io_error,
+                               "0x%8.8" PRIx64 ": missing MatchRegex entry",
+                               Offset);
+    uint32_t Entry = Data.getU32(&Offset);
+    CSI.MatchRegex.push_back(Entry);
+  }
+
+  return CSI;
+}
+
+llvm::Error CallSiteInfoCollection::encode(FileWriter &O) const {
+  O.writeU32(CallSites.size());
+  for (const CallSiteInfo &CSI : CallSites) {
+    if (llvm::Error Err = CSI.encode(O))
+      return Err;
+  }
+  return llvm::Error::success();
+}
+
+llvm::Expected<CallSiteInfoCollection>
+CallSiteInfoCollection::decode(DataExtractor &Data, uint64_t BaseAddr) {
+  CallSiteInfoCollection CSC;
+  uint64_t Offset = 0;
+
+  // Read number of CallSiteInfo entries
+  if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(uint32_t)))
+    return createStringError(std::errc::io_error,
+                             "0x%8.8" PRIx64 ": missing CallSiteInfo count",
+                             Offset);
+  uint32_t NumCallSites = Data.getU32(&Offset);
+
+  CSC.CallSites.reserve(NumCallSites);
+  for (uint32_t i = 0; i < NumCallSites; ++i) {
+    llvm::Expected<CallSiteInfo> ECSI =
+        CallSiteInfo::decode(Data, Offset, BaseAddr);
+    if (!ECSI)
+      return ECSI.takeError();
+    CSC.CallSites.emplace_back(*ECSI);
+  }
+
+  return CSC;
+}
+
+/// Structures necessary for reading CallSiteInfo from YAML.
+namespace llvm {
+namespace yaml {
+
+struct CallSiteYAML {
+  // The offset of the return address of the call site - relative to the start
+  // of the function.
+  llvm::yaml::Hex64 return_offset;
+  std::vector<std::string> match_regex;
+  std::vector<std::string> flags;
+};
+
+struct FunctionYAML {
+  std::string name;
+  std::vector<CallSiteYAML> callsites;
+};
+
+struct FunctionsYAML {
+  std::vector<FunctionYAML> functions;
+};
+
+template <> struct MappingTraits<CallSiteYAML> {
+  static void mapping(IO &io, CallSiteYAML &callsite) {
+    io.mapRequired("return_offset", callsite.return_offset);
+    io.mapRequired("match_regex", callsite.match_regex);
+    io.mapOptional("flags", callsite.flags);
+  }
+};
+
+template <> struct MappingTraits<FunctionYAML> {
+  static void mapping(IO &io, FunctionYAML &func) {
+    io.mapRequired("name", func.name);
+    io.mapOptional("callsites", func.callsites);
+  }
+};
+
+template <> struct MappingTraits<FunctionsYAML> {
+  static void mapping(IO &io, FunctionsYAML &functionsYAML) {
+    io.mapRequired("functions", functionsYAML.functions);
+  }
+};
+
+} // namespace yaml
+} // namespace llvm
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(CallSiteYAML)
+LLVM_YAML_IS_SEQUENCE_VECTOR(FunctionYAML)
+
+// Implementation of CallSiteInfoLoader
+StringRef CallSiteInfoLoader::stringFromOffset(uint32_t offset) const {
+  assert(StringOffsetMap.count(offset) &&
+         "expected function name offset to already be in StringOffsetMap");
+  return StringOffsetMap.find(offset)->second.val();
+}
+
+uint32_t CallSiteInfoLoader::offsetFromString(StringRef str) {
+  return StrTab.add(StringStorage.insert(str).first->getKey());
+}
+
+llvm::Error CallSiteInfoLoader::loadYAML(std::vector<FunctionInfo> &Funcs,
+                                         StringRef YAMLFile) {
+  std::unique_ptr<llvm::MemoryBuffer> Buffer;
+  // Step 1: Read YAML file
+  if (auto Err = readYAMLFile(YAMLFile, Buffer))
+    return Err;
+
+  // Step 2: Parse YAML content
+  llvm::yaml::FunctionsYAML functionsYAML;
+  if (auto Err = parseYAML(*Buffer, functionsYAML))
+    return Err;
+
+  // Step 3: Build function map from Funcs
+  auto FuncMap = buildFunctionMap(Funcs);
+
+  // Step 4: Process parsed YAML functions and update FuncMap
+  return processYAMLFunctions(functionsYAML, FuncMap, YAMLFile);
+}
+
+llvm::Error
+CallSiteInfoLoader::readYAMLFile(StringRef YAMLFile,
+                                 std::unique_ptr<llvm::MemoryBuffer> &Buffer) {
+  auto BufferOrError = llvm::MemoryBuffer::getFile(YAMLFile);
+  if (!BufferOrError)
+    return errorCodeToError(BufferOrError.getError());
+  Buffer = std::move(*BufferOrError);
+  return llvm::Error::success();
+}
+
+llvm::Error
+CallSiteInfoLoader::parseYAML(llvm::MemoryBuffer &Buffer,
+                              llvm::yaml::FunctionsYAML &functionsYAML) {
+  // Use the MemoryBufferRef constructor
+  llvm::yaml::Input yin(Buffer.getMemBufferRef());
+  yin >> functionsYAML;
+  if (yin.error()) {
+    return llvm::createStringError(yin.error(), "Error parsing YAML file: %s\n",
+                                   Buffer.getBufferIdentifier().str().c_str());
+  }
+  return llvm::Error::success();
+}
+
+std::unordered_map<std::string, FunctionInfo *>
+CallSiteInfoLoader::buildFunctionMap(std::vector<FunctionInfo> &Funcs) {
+  std::unordered_map<std::string, FunctionInfo *> FuncMap;
+  auto insertFunc = [&](auto &Function) {
+    std::string FuncName = stringFromOffset(Function.Name).str();
+    // If the function name is already in the map, don't update it. This way we
+    // preferentially use the first encountered function. Since symbols are
+    // loaded from dSYM first, we end up preferring keeping track of symbols
+    // from dSYM rather than from the symbol table - which is what we want to
+    // do.
+    if (FuncMap.count(FuncName))
+      return;
+    FuncMap[FuncName] = &Function;
+  };
+  for (auto &Func : Funcs) {
+    insertFunc(Func);
+    if (Func.MergedFunctions.has_value())
+      for (auto &MFunc : Func.MergedFunctions->MergedFunctions)
+        insertFunc(MFunc);
+  }
+  return FuncMap;
+}
+
+llvm::Error CallSiteInfoLoader::processYAMLFunctions(
+    const llvm::yaml::FunctionsYAML &functionsYAML,
+    std::unordered_map<std::string, FunctionInfo *> &FuncMap,
+    StringRef YAMLFile) {
+  // For each function in the YAML file
+  for (const auto &FuncYAML : functionsYAML.functions) {
+    auto it = FuncMap.find(FuncYAML.name);
+    if (it == FuncMap.end()) {
+      return llvm::createStringError(
+          std::errc::invalid_argument,
+          "Can't find function '%s' specified in callsite YAML\n",
+          FuncYAML.name.c_str());
+    }
+    FunctionInfo *FuncInfo = it->second;
+    // Create a CallSiteInfoCollection if not already present
+    if (!FuncInfo->CallSites)
+      FuncInfo->CallSites = CallSiteInfoCollection();
+    for (const auto &CallSiteYAML : FuncYAML.callsites) {
+      CallSiteInfo CSI;
+      // Since YAML has specifies relative return offsets, add the function
+      // start address to make the offset absolute.
+      CSI.ReturnAddress = FuncInfo->Range.start() + CallSiteYAML.return_offset;
+      for (const auto &regex : CallSiteYAML.match_regex) {
+        CSI.MatchRegex.push_back(offsetFromString(regex));
+      }
+      // Initialize flags to None
+      CSI.Flags = CallSiteInfo::None;
+      // Parse flags and combine them
+      for (const auto &FlagStr : CallSiteYAML.flags) {
+        if (FlagStr == "InternalCall") {
+          CSI.Flags |= static_cast<uint8_t>(CallSiteInfo::InternalCall);
+        } else if (FlagStr == "ExternalCall") {
+          CSI.Flags |= static_cast<uint8_t>(CallSiteInfo::ExternalCall);
+        } else {
+          return llvm::createStringError(std::errc::invalid_argument,
+                                         "Unknown flag in callsite YAML: %s\n",
+                                         FlagStr.c_str());
+        }
+      }
+      FuncInfo->CallSites->CallSites.push_back(CSI);
+    }
+  }
+  return llvm::Error::success();
+}
+
+raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const CallSiteInfo &CSI) {
+  OS << "  Return=" << HEX64(CSI.ReturnAddress);
+  OS << "  Flags=" << HEX8(CSI.Flags);
+
+  OS << "  RegEx=";
+  for (uint32_t i = 0; i < CSI.MatchRegex.size(); ++i) {
+    if (i > 0)
+      OS << ",";
+    OS << CSI.MatchRegex[i];
+  }
+  return OS;
+}
+
+raw_ostream &llvm::gsym::operator<<(raw_ostream &OS,
+                                    const CallSiteInfoCollection &CSIC) {
+  for (const auto &CS : CSIC.CallSites) {
+    OS << CS;
+    OS << "\n";
+  }
+  return OS;
+}
diff --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
index 2cd85ef2398f91..9dc9c241168b26 100644
--- a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
@@ -24,6 +24,7 @@ enum InfoType : uint32_t {
   LineTableInfo = 1u,
   InlineInfo = 2u,
   MergedFunctionsInfo = 3u,
+  CallSiteInfo = 4u,
 };
 
 raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const FunctionInfo &FI) {
@@ -32,6 +33,8 @@ raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const FunctionInfo &FI) {
     OS << FI.OptLineTable << '\n';
   if (FI.Inline)
     OS << FI.Inline << '\n';
+  if (FI.CallSites)
+    OS << *FI.CallSites << '\n';
   return OS;
 }
 
@@ -95,6 +98,14 @@ llvm::Expected<FunctionInfo> FunctionInfo::decode(DataExtractor &Data,
           return MI.takeError();
         break;
 
+      case InfoType::CallSiteInfo:
+        if (Expected<llvm::gsym::CallSiteInfoCollection> CI =
+                llvm::gsym::CallSiteInfoCollection::decode(InfoData, BaseAddr))
+          FI.CallSites = std::move(CI.get());
+        else
+          return CI.takeError();
+        break;
+
       default:
         return createStringError(std::errc::io_error,
                                  "0x%8.8" PRIx64 ": unsupported InfoType %u",
@@ -200,7 +211,25 @@ llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &Out,
     Out.fixup32(static_cast<uint32_t>(Length), StartOffset - 4);
   }
 
-  // Terminate the data chunks with and end of list with zero size
+  // Write out the call sites if we have any and if they are valid.
+  if (CallSites) {
+    Out.writeU32(InfoType::CallSiteInfo);
+    // Write a uint32_t length as zero for now, we will fix this up after
+    // writing the CallSites out with the number of bytes that were written.
+    Out.writeU32(0);
+    const auto StartOffset = Out.tell();
+    llvm::Error err = CallSites->encode(Out);
+    if (err)
+      return std::move(err);
+    const auto Length = Out.tell() - StartOffset;
+    if (Length > UINT32_MAX)
+      return createStringError(std::errc::invalid_argument,
+                               "CallSites length is greater than UINT32_MAX");
+    // Fixup the size of the CallSites data with the correct size.
+    Out.fixup32(static_cast<uint32_t>(Length), StartOffset - 4);
+  }
+
+  // Terminate the data chunks with an end of list with zero size.
   Out.writeU32(InfoType::EndOfList);
   Out.writeU32(0);
   return FuncInfoOffset;
diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
index 3227fa5400fb5c..0df84ee256aef9 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
@@ -189,6 +189,12 @@ llvm::Error GsymCreator::encode(FileWriter &O) const {
   return ErrorSuccess();
 }
 
+llvm::Error GsymCreator::loadCallSitesFromYAML(StringRef YAMLFile) {
+  // Use the loader to load call site information from the YAML file.
+  CallSiteInfoLoader Loader(StringOffsetMap, StrTab, StringStorage);
+  return Loader.loadYAML(Funcs, YAMLFile);
+}
+
 void GsymCreator::prepareMergedFunctions(OutputAggregator &Out) {
   // Nothing to do if we have less than 2 functions.
   if (Funcs.size() < 2)
diff --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
index ddfc92e1a8a403..4f645714480e6d 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
@@ -411,6 +411,10 @@ void GsymReader::dump(raw_ostream &OS, const FunctionInfo &FI,
     assert(Indent == 0 && "MergedFunctionsInfo should only exist at top level");
     dump(OS, *FI.MergedFunctions);
   }
+
+  if (FI.CallSites) {
+    dump(OS, *FI.CallSites);
+  }
 }
 
 void GsymReader::dump(raw_ostream &OS, const MergedFunctionsInfo &MFI) {
@@ -420,6 +424,47 @@ void GsymReader::dump(raw_ostream &OS, const MergedFunctionsInfo &MFI) {
   }
 }
 
+void GsymReader::dump(raw_ostream &OS, const CallSiteInfo &CSI) {
+  OS << HEX64(CSI.ReturnAddress);
+
+  std::string Flags;
+  auto addFlag = [&](const char *Flag) {
+    if (!Flags.empty())
+      Flags += " | ";
+    Flags += Flag;
+  };
+
+  if (CSI.Flags == CallSiteInfo::Flags::None)
+    Flags = "None";
+  else {
+    if (CSI.Flags & CallSiteInfo::Flags::InternalCall)
+      addFlag("InternalCall");
+
+    if (CSI.Flags & CallSiteInfo::Flags::ExternalCall)
+      addFlag("ExternalCall");
+  }
+  OS << " Flags[" << Flags << "]";
+
+  if (!CSI.MatchRegex.empty()) {
+    OS << " MatchRegex[";
+    for (uint32_t i = 0; i < CSI.MatchRegex.size(); ++i) {
+      if (i > 0)
+        OS << ";";
+      OS << getString(CSI.MatchRegex[i]);
+    }
+    OS << "]";
+  }
+}
+
+void GsymReader::dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC) {
+  OS << "CallSites (by return address):\n";
+  for (const auto &CS : CSIC.CallSites) {
+    OS.indent(2);
+    dump(OS, CS);
+    OS << "\n";
+  }
+}
+
 void GsymReader::dump(raw_ostream &OS, const LineTable &LT, uint32_t Indent) {
   OS.indent(Indent);
   OS << "LineTable:\n";
diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-dsym.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-dsym.yaml
new file mode 100644
index 00000000000000..5c31d609626694
--- /dev/null
+++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-dsym.yaml
@@ -0,0 +1,950 @@
+## Test that reconstructs a dSYM file from YAML and generates a callsite-enabled gsym from it - and then verifies the gsym.
+## See llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info.test for the script to generate this yaml file
+
+# RUN: split-file %s %t
+# RUN: yaml2obj %t/call_sites.dSYM.yaml -o %t/call_sites.dSYM
+
+# RUN: llvm-gsymutil --convert=%t/call_sites.dSYM --callsites-from-yaml=%t/callsites.yaml -o %t/call_sites_dSYM.gsym
+
+# Dump the GSYM file and check the output for callsite information
+# RUN: llvm-gsymutil %t/call_sites_dSYM.gsym | FileCheck --check-prefix=CHECK-GSYM %s
+
+
+# CHECK-GSYM:      FunctionInfo @ 0x[[#%x,]]: [0x[[#%x,]] - 0x[[#%x,]]) "func_mainBin_dec_call_everything"
+# CHECK-GSYM-NEXT: LineTable:
+#                    // func_mainBin_dec_call_everything() {
+# CHECK-GSYM-NEXT:   0x[[#%x,]] {{.*}}/call_sites.cpp:16
+#                    // func_mainBin_dec_01();
+# CHECK-GSYM-NEXT:   0x[[ADDR_dec_01_call:[0-9a-f]+]] {{.*}}/call_sites.cpp:17
+#                    // func_mainBin_dec_02();
+# CHECK-GSYM-NEXT:   0x[[ADDR_dec_02_call:[0-9a-f]+]] {{.*}}/call_sites.cpp:18
+#                    // func_mainBin_dec_03();
+# CHECK-GSYM-NEXT:   [[ADDR_dec_03_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:19
+#                    // func_mainBin_inc_01();
+# CHECK-GSYM-NEXT:   [[ADDR_inc_01_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:21
+#                    // func_mainBin_inc_02();
+# CHECK-GSYM-NEXT:   [[ADDR_inc_02_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:22
+#                    // func_mainBin_inc_03();
+# CHECK-GSYM-NEXT:   [[ADDR_inc_03_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:23
+#                    // g_func_ptr();
+# CHECK-GSYM-NEXT:   [[ADDR_func_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:25
+#                    // g_extern_func_ptr();
+# CHECK-GSYM-NEXT:   [[ADDR_extern_func_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:26
+#                    // g_volatile_var = 0;
+# CHECK-GSYM-NEXT:   [[ADDR_var_assign:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:28
+#                    // }
+# CHECK-GSYM-NEXT:   [[#%x,]] {{.*}}/call_sites.cpp:29
+# CHECK-GSYM-NEXT: CallSites (by return address):
+# CHECK-GSYM-NEXT:   [[ADDR_dec_02_call]] Flags[InternalCall] MatchRegex[func_mainBin_dec_01]
+# CHECK-GSYM-NEXT:   [[ADDR_dec_03_call]] Flags[InternalCall] MatchRegex[func_mainBin_dec_02]
+# CHECK-GSYM-NEXT:   [[ADDR_inc_01_call]] Flags[InternalCall] MatchRegex[func_mainBin_dec_03]
+# CHECK-GSYM-NEXT:   [[ADDR_inc_02_call]] Flags[InternalCall] MatchRegex[func_mainBin_inc_01]
+# CHECK-GSYM-NEXT:   [[ADDR_inc_03_call]] Flags[InternalCall] MatchRegex[func_mainBin_inc_02]
+# CHECK-GSYM-NEXT:   [[ADDR_func_call]] Flags[InternalCall] MatchRegex[func_mainBin_inc_03]
+# CHECK-GSYM-NEXT:   [[ADDR_extern_func_call]] Flags[None] MatchRegex[.*func.*]
+# CHECK-GSYM-NEXT:   [[ADDR_var_assign]] Flags[ExternalCall] MatchRegex[.*extern_func.*]
+
+
+#--- callsites.yaml
+functions:
+  - name: func_mainBin_dec_call_everything
+    callsites:
+      - return_offset: 0x0C
+        match_regex: ["func_mainBin_dec_01"]
+        flags:
+          - "InternalCall"
+      - return_offset: 0x10
+        match_regex: ["func_mainBin_dec_02"]
+        flags:
+          - "InternalCall"
+      - return_offset: 0x14
+        match_regex: ["func_mainBin_dec_03"]
+        flags:
+          - "InternalCall"
+      - return_offset: 24
+        match_regex: ["func_mainBin_inc_01"]
+        flags:
+          - "InternalCall"
+      - return_offset: 28
+        match_regex: ["func_mainBin_inc_02"]
+        flags:
+          - "InternalCall"
+      - return_offset: 32
+        match_regex: ["func_mainBin_inc_03"]
+        flags:
+          - "InternalCall"
+      - return_offset: 44
+        match_regex: [".*func.*"]
+      - return_offset: 56
+        match_regex: [".*extern_func.*"]
+        flags:
+          - "ExternalCall"
+
+
+#--- call_sites.dSYM.yaml
+--- !mach-o
+FileHeader:
+  magic:           0xFEEDFACF
+  cputype:         0x100000C
+  cpusubtype:      0x0
+  filetype:        0xA
+  ncmds:           8
+  sizeofcmds:      1392
+  flags:           0x0
+  reserved:        0x0
+LoadCommands:
+  - cmd:             LC_UUID
+    cmdsize:         24
+    uuid:            4C4C44E9-5555-3144-A1D3-328233D00078
+  - cmd:             LC_BUILD_VERSION
+    cmdsize:         24
+    platform:        1
+    minos:           720896
+    sdk:             720896
+    ntools:          0
+  - cmd:             LC_SYMTAB
+    cmdsize:         24
+    symoff:          4096
+    nsyms:           12
+    stroff:          4288
+    strsize:         235
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         72
+    segname:         __PAGEZERO
+    vmaddr:          0
+    vmsize:          4294967296
+    fileoff:         0
+    filesize:        0
+    maxprot:         0
+    initprot:        0
+    nsects:          0
+    flags:           0
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         152
+    segname:         __TEXT
+    vmaddr:          4294967296
+    vmsize:          16384
+    fileoff:         0
+    filesize:        0
+    maxprot:         5
+    initprot:        5
+    nsects:          1
+    flags:           0
+    Sections:
+      - sectname:        __text
+        segname:         __TEXT
+        addr:            0x100000338
+        size:            216
+        offset:          0x0
+        align:           2
+        reloff:          0x0
+        nreloc:          0
+        flags:           0x80000400
+        reserved1:       0x0
+        reserved2:       0x0
+        reserved3:       0x0
+        content:         CFFAEDFE0C000001000000000A000000080000007005000000000000000000001B000000180000004C4C44E955553144A1D3328233D0007832000000180000000100000000000B0000000B00000000000200000018000000001000000C000000C0100000EB00000019000000480000005F5F504147455A45524F00000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000000000019000000980000005F5F544558540000000000000000000000000000010000000040000000000000
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         152
+    segname:         __DATA
+    vmaddr:          4294983680
+    vmsize:          16384
+    fileoff:         0
+    filesize:        0
+    maxprot:         3
+    initprot:        3
+    nsects:          1
+    flags:           0
+    Sections:
+      - sectname:        __common
+        segname:         __DATA
+        addr:            0x100004000
+        size:            24
+        offset:          0x0
+        align:           3
+        reloff:          0x0
+        nreloc:          0
+        flags:           0x1
+        reserved1:       0x0
+        reserved2:       0x0
+        reserved3:       0x0
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         72
+    segname:         __LINKEDIT
+    vmaddr:          4295000064
+    vmsize:          4096
+    fileoff:         4096
+    filesize:        427
+    maxprot:         1
+    initprot:        1
+    nsects:          0
+    flags:           0
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         872
+    segname:         __DWARF
+    vmaddr:          4295004160
+    vmsize:          4096
+    fileoff:         8192
+    filesize:        1894
+    maxprot:         7
+    initprot:        3
+    nsects:          10
+    flags:           0
+    Sections:
+      - sectname:        __debug_line
+        segname:         __DWARF
+        addr:            0x100009000
+        size:            150
+        offset:          0x2000
+        align:           0
+        reloff:          0x0
+        nreloc:          0
+        flags:           0x0
+        reserved1:       0x0
+        reserved2:       0x0
+        reserved3:       0x0
+      - sectname:        __debug_aranges
+        segname:         __DWARF
+        addr:            0x100009096
+        size:            48
+        offset:          0x2096
+        align:           0
+        reloff:          0x0
+        nreloc:          0
+        flags:           0x0
+        reserved1:       0x0
+        reserved2:       0x0
+        reserved3:       0x0
+      - sectname:        __debug_info
+        segname:         __DWARF
+        addr:            0x1000090C6
+        size:            424
+        offset:          0x20C6
+        align:           0
+        reloff:          0x0
+        nreloc:          0
+        flags:           0x0
+        reserved1:       0x0
+        reserved2:       0x0
+        reserved3:       0x0
+      - sectname:        __debug_frame
+        segname:         __DWARF
+        addr:            0x10000926E
+        size:            232
+        offset:          0x226E
+        align:           0
+        reloff:          0x0
+        nreloc:          0
+        flags:           0x0
+        reserved1:       0x0
+        reserved2:       0x0
+        reserved3:       0x0
+        content:         14000000FFFFFFFF0400080001781E0C1F0000000000000014000000000000003803000001000000140000000000000014000000000000004C03000001000000140000000000000014000000000000006003000001000000140000000000000014000000000000007403000001000000140000000000000014000000000000008803000001000000140000000000000014000000000000009C0300000100000014000000000000001C00000000000000B0030000010000004800000000000000480C1D109E019D021C00000000000000F8030000010000001800000000000000480C1D109E019D02
+      - sectname:        __debug_abbrev
+        segname:         __DWARF
+        addr:            0x100009356
+        size:            171
+        offset:          0x2356
+        align:           0
+        reloff:          0x0
+        nreloc:          0
+        flags:           0x0
+        reserved1:       0x0
+        reserved2:       0x0
+        reserved3:       0x0
+      - sectname:        __debug_str
+        segname:         __DWARF
+        addr:            0x100009401
+        size:            378
+        offset:          0x2401
+        align:           0
+        reloff:          0x0
+        nreloc:          0
+        flags:           0x0
+        reserved1:       0x0
+        reserved2:       0x0
+        reserved3:       0x0
+      - sectname:        __apple_namespac
+        segname:         __DWARF
+        addr:            0x10000957B
+        size:            36
+        offset:          0x257B
+        align:           0
+        reloff:          0x0
+        nreloc:          0
+        flags:           0x0
+        reserved1:       0x0
+        reserved2:       0x0
+        reserved3:       0x0
+        content:         485341480100000001000000000000000C000000000000000100000001000600FFFFFFFF
+      - sectname:        __apple_names
+        segname:         __DWARF
+        addr:            0x10000959F
+        size:            340
+        offset:          0x259F
+        align:           0
+        reloff:          0x0
+        nreloc:          0
+        flags:           0x0
+        reserved1:       0x0
+        reserved2:       0x0
+        reserved3:       0x0
+        content:         48534148010000000B0000000B0000000C000000000000000100000001000600FFFFFFFF00000000FFFFFFFFFFFFFFFFFFFFFFFF01000000FFFFFFFF0300000004000000060000000900000073A83B36215E623FACBB81686A7F9A7C1939EE6AC7E03A771A39EE6AC8E03A773856D6801B39EE6AC9E03A77A4000000B4000000C4000000D4000000E4000000F40000000401000014010000240100003401000044010000AC000000010000002E00000000000000CA000000010000007300000000000000BF000000010000004F0000000000000075010000010000008001000000000000DC0000000100000088000000000000001801000001000000C700000000000000F0000000010000009D000000000000002C01000001000000DC00000000000000540100000100000006010000000000000401000001000000B2000000000000004001000001000000F100000000000000
+      - sectname:        __apple_types
+        segname:         __DWARF
+        addr:            0x1000096F3
+        size:            79
+        offset:          0x26F3
+        align:           0
+        reloff:          0x0
+        nreloc:          0
+        flags:           0x0
+        reserved1:       0x0
+        reserved2:       0x0
+        reserved3:       0x0
+        content:         48534148010000000100000001000000180000000000000004000000010006000300050005000B0006000600000000003080880B38000000BB0000000100000048000000240000A4283A0C00000000
+      - sectname:        __apple_objc
+        segname:         __DWARF
+        addr:            0x100009742
+        size:            36
+        offset:          0x2742
+        align:           0
+        reloff:          0x0
+        nreloc:          0
+        flags:           0x0
+        reserved1:       0x0
+        reserved2:       0x0
+        reserved3:       0x0
+        content:         485341480100000001000000000000000C000000000000000100000001000600FFFFFFFF
+LinkEditData:
+  NameList:
+    - n_strx:          2
+      n_type:          0xF
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294968312
+    - n_strx:          8
+      n_type:          0xF
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294968120
+    - n_strx:          29
+      n_type:          0xF
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294968140
+    - n_strx:          50
+      n_type:          0xF
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294968160
+    - n_strx:          71
+      n_type:          0xF
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294968180
+    - n_strx:          92
+      n_type:          0xF
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294968200
+    - n_strx:          113
+      n_type:          0xF
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294968220
+    - n_strx:          134
+      n_type:          0xF
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294968240
+    - n_strx:          168
+      n_type:          0xF
+      n_sect:          2
+      n_desc:          0
+      n_value:         4294983680
+    - n_strx:          184
+      n_type:          0xF
+      n_sect:          2
+      n_desc:          0
+      n_value:         4294983688
+    - n_strx:          196
+      n_type:          0xF
+      n_sect:          2
+      n_desc:          0
+      n_value:         4294983696
+    - n_strx:          215
+      n_type:          0xF
+      n_sect:          1
+      n_desc:          16
+      n_value:         4294967296
+  StringTable:
+    - ''
+    - ''
+    - _main
+    - _func_mainBin_dec_01
+    - _func_mainBin_dec_02
+    - _func_mainBin_dec_03
+    - _func_mainBin_inc_01
+    - _func_mainBin_inc_02
+    - _func_mainBin_inc_03
+    - _func_mainBin_dec_call_everything
+    - _g_volatile_var
+    - _g_func_ptr
+    - _g_extern_func_ptr
+    - __mh_execute_header
+DWARF:
+  debug_str:
+    - ''
+    - 'clang version 20.0.0git (https://github.com/alx32/llvm-project.git f41f6ea1f33c4f5e7c94f3d155e44292d1809c50)'
+    - call_sites.cpp
+    - '/'
+    - '/tmp/___________________________________/tst'
+    - g_volatile_var
+    - int
+    - g_func_ptr
+    - g_extern_func_ptr
+    - func_mainBin_dec_01
+    - func_mainBin_dec_02
+    - func_mainBin_dec_03
+    - func_mainBin_inc_01
+    - func_mainBin_inc_02
+    - func_mainBin_inc_03
+    - func_mainBin_dec_call_everything
+    - main
+  debug_abbrev:
+    - ID:              0
+      Table:
+        - Code:            0x1
+          Tag:             DW_TAG_compile_unit
+          Children:        DW_CHILDREN_yes
+          Attributes:
+            - Attribute:       DW_AT_producer
+              Form:            DW_FORM_strp
+            - Attribute:       DW_AT_language
+              Form:            DW_FORM_data2
+            - Attribute:       DW_AT_name
+              Form:            DW_FORM_strp
+            - Attribute:       DW_AT_LLVM_sysroot
+              Form:            DW_FORM_strp
+            - Attribute:       DW_AT_stmt_list
+              Form:            DW_FORM_sec_offset
+            - Attribute:       DW_AT_comp_dir
+              Form:            DW_FORM_strp
+            - Attribute:       DW_AT_APPLE_optimized
+              Form:            DW_FORM_flag_present
+            - Attribute:       DW_AT_low_pc
+              Form:            DW_FORM_addr
+            - Attribute:       DW_AT_high_pc
+              Form:            DW_FORM_data4
+        - Code:            0x2
+          Tag:             DW_TAG_variable
+          Children:        DW_CHILDREN_no
+          Attributes:
+            - Attribute:       DW_AT_name
+              Form:            DW_FORM_strp
+            - Attribute:       DW_AT_type
+              Form:            DW_FORM_ref_addr
+            - Attribute:       DW_AT_external
+              Form:            DW_FORM_flag_present
+            - Attribute:       DW_AT_decl_file
+              Form:            DW_FORM_data1
+            - Attribute:       DW_AT_decl_line
+              Form:            DW_FORM_data1
+            - Attribute:       DW_AT_location
+              Form:            DW_FORM_exprloc
+        - Code:            0x3
+          Tag:             DW_TAG_volatile_type
+          Children:        DW_CHILDREN_no
+          Attributes:
+            - Attribute:       DW_AT_type
+              Form:            DW_FORM_ref_addr
+        - Code:            0x4
+          Tag:             DW_TAG_base_type
+          Children:        DW_CHILDREN_no
+          Attributes:
+            - Attribute:       DW_AT_name
+              Form:            DW_FORM_strp
+            - Attribute:       DW_AT_encoding
+              Form:            DW_FORM_data1
+            - Attribute:       DW_AT_byte_size
+              Form:            DW_FORM_data1
+        - Code:            0x5
+          Tag:             DW_TAG_pointer_type
+          Children:        DW_CHILDREN_no
+          Attributes:
+            - Attribute:       DW_AT_type
+              Form:            DW_FORM_ref_addr
+        - Code:            0x6
+          Tag:             DW_TAG_subroutine_type
+          Children:        DW_CHILDREN_no
+          Attributes:
+            - Attribute:       DW_AT_type
+              Form:            DW_FORM_ref_addr
+        - Code:            0x7
+          Tag:             DW_TAG_subprogram
+          Children:        DW_CHILDREN_no
+          Attributes:
+            - Attribute:       DW_AT_low_pc
+              Form:            DW_FORM_addr
+            - Attribute:       DW_AT_high_pc
+              Form:            DW_FORM_data4
+            - Attribute:       DW_AT_APPLE_omit_frame_ptr
+              Form:            DW_FORM_flag_present
+            - Attribute:       DW_AT_frame_base
+              Form:            DW_FORM_exprloc
+            - Attribute:       DW_AT_call_all_calls
+              Form:            DW_FORM_flag_present
+            - Attribute:       DW_AT_name
+              Form:            DW_FORM_strp
+            - Attribute:       DW_AT_decl_file
+              Form:            DW_FORM_data1
+            - Attribute:       DW_AT_decl_line
+              Form:            DW_FORM_data1
+            - Attribute:       DW_AT_external
+              Form:            DW_FORM_flag_present
+            - Attribute:       DW_AT_APPLE_optimized
+              Form:            DW_FORM_flag_present
+        - Code:            0x8
+          Tag:             DW_TAG_subprogram
+          Children:        DW_CHILDREN_yes
+          Attributes:
+            - Attribute:       DW_AT_low_pc
+              Form:            DW_FORM_addr
+            - Attribute:       DW_AT_high_pc
+              Form:            DW_FORM_data4
+            - Attribute:       DW_AT_frame_base
+              Form:            DW_FORM_exprloc
+            - Attribute:       DW_AT_call_all_calls
+              Form:            DW_FORM_flag_present
+            - Attribute:       DW_AT_name
+              Form:            DW_FORM_strp
+            - Attribute:       DW_AT_decl_file
+              Form:            DW_FORM_data1
+            - Attribute:       DW_AT_decl_line
+              Form:            DW_FORM_data1
+            - Attribute:       DW_AT_external
+              Form:            DW_FORM_flag_present
+            - Attribute:       DW_AT_APPLE_optimized
+              Form:            DW_FORM_flag_present
+        - Code:            0x9
+          Tag:             DW_TAG_call_site
+          Children:        DW_CHILDREN_no
+          Attributes:
+            - Attribute:       DW_AT_call_origin
+              Form:            DW_FORM_ref4
+            - Attribute:       DW_AT_call_return_pc
+              Form:            DW_FORM_addr
+        - Code:            0xA
+          Tag:             DW_TAG_call_site
+          Children:        DW_CHILDREN_no
+          Attributes:
+            - Attribute:       DW_AT_call_target
+              Form:            DW_FORM_exprloc
+            - Attribute:       DW_AT_call_return_pc
+              Form:            DW_FORM_addr
+        - Code:            0xB
+          Tag:             DW_TAG_subprogram
+          Children:        DW_CHILDREN_yes
+          Attributes:
+            - Attribute:       DW_AT_low_pc
+              Form:            DW_FORM_addr
+            - Attribute:       DW_AT_high_pc
+              Form:            DW_FORM_data4
+            - Attribute:       DW_AT_frame_base
+              Form:            DW_FORM_exprloc
+            - Attribute:       DW_AT_call_all_calls
+              Form:            DW_FORM_flag_present
+            - Attribute:       DW_AT_name
+              Form:            DW_FORM_strp
+            - Attribute:       DW_AT_decl_file
+              Form:            DW_FORM_data1
+            - Attribute:       DW_AT_decl_line
+              Form:            DW_FORM_data1
+            - Attribute:       DW_AT_type
+              Form:            DW_FORM_ref_addr
+            - Attribute:       DW_AT_external
+              Form:            DW_FORM_flag_present
+            - Attribute:       DW_AT_APPLE_optimized
+              Form:            DW_FORM_flag_present
+  debug_aranges:
+    - Length:          0x2C
+      Version:         2
+      CuOffset:        0x0
+      AddressSize:     0x8
+      Descriptors:
+        - Address:         0x100000338
+          Length:          0xD8
+  debug_info:
+    - Length:          0x1A4
+      Version:         4
+      AbbrevTableID:   0
+      AbbrOffset:      0x0
+      AddrSize:        8
+      Entries:
+        - AbbrCode:        0x1
+          Values:
+            - Value:           0x1
+            - Value:           0x21
+            - Value:           0x6E
+            - Value:           0x7D
+            - Value:           0x0
+            - Value:           0x7F
+            - Value:           0x1
+            - Value:           0x100000338
+            - Value:           0xD8
+        - AbbrCode:        0x2
+          Values:
+            - Value:           0xAC
+            - Value:           0x43
+            - Value:           0x1
+            - Value:           0x1
+            - Value:           0x4
+            - Value:           0x9
+              BlockData:       [ 0x3, 0x0, 0x40, 0x0, 0x0, 0x1, 0x0, 0x0,
+                                 0x0 ]
+        - AbbrCode:        0x3
+          Values:
+            - Value:           0x48
+        - AbbrCode:        0x4
+          Values:
+            - Value:           0xBB
+            - Value:           0x5
+            - Value:           0x4
+        - AbbrCode:        0x2
+          Values:
+            - Value:           0xBF
+            - Value:           0x64
+            - Value:           0x1
+            - Value:           0x1
+            - Value:           0x5
+            - Value:           0x9
+              BlockData:       [ 0x3, 0x8, 0x40, 0x0, 0x0, 0x1, 0x0, 0x0,
+                                 0x0 ]
+        - AbbrCode:        0x3
+          Values:
+            - Value:           0x69
+        - AbbrCode:        0x5
+          Values:
+            - Value:           0x6E
+        - AbbrCode:        0x6
+          Values:
+            - Value:           0x48
+        - AbbrCode:        0x2
+          Values:
+            - Value:           0xCA
+            - Value:           0x64
+            - Value:           0x1
+            - Value:           0x1
+            - Value:           0x6
+            - Value:           0x9
+              BlockData:       [ 0x3, 0x10, 0x40, 0x0, 0x0, 0x1, 0x0, 0x0,
+                                 0x0 ]
+        - AbbrCode:        0x7
+          Values:
+            - Value:           0x100000338
+            - Value:           0x14
+            - Value:           0x1
+            - Value:           0x1
+              BlockData:       [ 0x6F ]
+            - Value:           0x1
+            - Value:           0xDC
+            - Value:           0x1
+            - Value:           0x8
+            - Value:           0x1
+            - Value:           0x1
+        - AbbrCode:        0x7
+          Values:
+            - Value:           0x10000034C
+            - Value:           0x14
+            - Value:           0x1
+            - Value:           0x1
+              BlockData:       [ 0x6F ]
+            - Value:           0x1
+            - Value:           0xF0
+            - Value:           0x1
+            - Value:           0x9
+            - Value:           0x1
+            - Value:           0x1
+        - AbbrCode:        0x7
+          Values:
+            - Value:           0x100000360
+            - Value:           0x14
+            - Value:           0x1
+            - Value:           0x1
+              BlockData:       [ 0x6F ]
+            - Value:           0x1
+            - Value:           0x104
+            - Value:           0x1
+            - Value:           0xA
+            - Value:           0x1
+            - Value:           0x1
+        - AbbrCode:        0x7
+          Values:
+            - Value:           0x100000374
+            - Value:           0x14
+            - Value:           0x1
+            - Value:           0x1
+              BlockData:       [ 0x6F ]
+            - Value:           0x1
+            - Value:           0x118
+            - Value:           0x1
+            - Value:           0xC
+            - Value:           0x1
+            - Value:           0x1
+        - AbbrCode:        0x7
+          Values:
+            - Value:           0x100000388
+            - Value:           0x14
+            - Value:           0x1
+            - Value:           0x1
+              BlockData:       [ 0x6F ]
+            - Value:           0x1
+            - Value:           0x12C
+            - Value:           0x1
+            - Value:           0xD
+            - Value:           0x1
+            - Value:           0x1
+        - AbbrCode:        0x7
+          Values:
+            - Value:           0x10000039C
+            - Value:           0x14
+            - Value:           0x1
+            - Value:           0x1
+              BlockData:       [ 0x6F ]
+            - Value:           0x1
+            - Value:           0x140
+            - Value:           0x1
+            - Value:           0xE
+            - Value:           0x1
+            - Value:           0x1
+        - AbbrCode:        0x8
+          Values:
+            - Value:           0x1000003B0
+            - Value:           0x48
+            - Value:           0x1
+              BlockData:       [ 0x6D ]
+            - Value:           0x1
+            - Value:           0x154
+            - Value:           0x1
+            - Value:           0x10
+            - Value:           0x1
+            - Value:           0x1
+        - AbbrCode:        0x9
+          Values:
+            - Value:           0x88
+            - Value:           0x1000003BC
+        - AbbrCode:        0x9
+          Values:
+            - Value:           0x9D
+            - Value:           0x1000003C0
+        - AbbrCode:        0x9
+          Values:
+            - Value:           0xB2
+            - Value:           0x1000003C4
+        - AbbrCode:        0x9
+          Values:
+            - Value:           0xC7
+            - Value:           0x1000003C8
+        - AbbrCode:        0x9
+          Values:
+            - Value:           0xDC
+            - Value:           0x1000003CC
+        - AbbrCode:        0x9
+          Values:
+            - Value:           0xF1
+            - Value:           0x1000003D0
+        - AbbrCode:        0xA
+          Values:
+            - Value:           0x1
+              BlockData:       [ 0x58 ]
+            - Value:           0x1000003DC
+        - AbbrCode:        0xA
+          Values:
+            - Value:           0x1
+              BlockData:       [ 0x58 ]
+            - Value:           0x1000003E8
+        - AbbrCode:        0x0
+        - AbbrCode:        0xB
+          Values:
+            - Value:           0x1000003F8
+            - Value:           0x18
+            - Value:           0x1
+              BlockData:       [ 0x6D ]
+            - Value:           0x1
+            - Value:           0x175
+            - Value:           0x1
+            - Value:           0x1F
+            - Value:           0x48
+            - Value:           0x1
+            - Value:           0x1
+        - AbbrCode:        0x9
+          Values:
+            - Value:           0x106
+            - Value:           0x100000404
+        - AbbrCode:        0x0
+        - AbbrCode:        0x0
+  debug_line:
+    - Length:          146
+      Version:         4
+      PrologueLength:  38
+      MinInstLength:   1
+      MaxOpsPerInst:   1
+      DefaultIsStmt:   1
+      LineBase:        251
+      LineRange:       14
+      OpcodeBase:      13
+      StandardOpcodeLengths: [ 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 ]
+      Files:
+        - Name:            call_sites.cpp
+          DirIdx:          0
+          ModTime:         0
+          Length:          0
+      Opcodes:
+        - Opcode:          DW_LNS_extended_op
+          ExtLen:          9
+          SubOpcode:       DW_LNE_set_address
+          Data:            4294968120
+        - Opcode:          DW_LNS_set_column
+          Data:            54
+        - Opcode:          DW_LNS_set_prologue_end
+          Data:            0
+        - Opcode:          0x19
+          Data:            0
+        - Opcode:          DW_LNS_set_column
+          Data:            58
+        - Opcode:          DW_LNS_negate_stmt
+          Data:            0
+        - Opcode:          0xF2
+          Data:            0
+        - Opcode:          DW_LNS_set_column
+          Data:            54
+        - Opcode:          DW_LNS_negate_stmt
+          Data:            0
+        - Opcode:          DW_LNS_set_prologue_end
+          Data:            0
+        - Opcode:          0x4B
+          Data:            0
+        - Opcode:          DW_LNS_set_column
+          Data:            58
+        - Opcode:          DW_LNS_negate_stmt
+          Data:            0
+        - Opcode:          0xF2
+          Data:            0
+        - Opcode:          DW_LNS_set_column
+          Data:            54
+        - Opcode:          DW_LNS_negate_stmt
+          Data:            0
+        - Opcode:          DW_LNS_set_prologue_end
+          Data:            0
+        - Opcode:          0x4B
+          Data:            0
+        - Opcode:          DW_LNS_set_column
+          Data:            58
+        - Opcode:          DW_LNS_negate_stmt
+          Data:            0
+        - Opcode:          0xF2
+          Data:            0
+        - Opcode:          DW_LNS_set_column
+          Data:            54
+        - Opcode:          DW_LNS_negate_stmt
+          Data:            0
+        - Opcode:          DW_LNS_set_prologue_end
+          Data:            0
+        - Opcode:          0x4C
+          Data:            0
+        - Opcode:          DW_LNS_set_column
+          Data:            58
+        - Opcode:          DW_LNS_negate_stmt
+          Data:            0
+        - Opcode:          0xF2
+          Data:            0
+        - Opcode:          DW_LNS_set_column
+          Data:            54
+        - Opcode:          DW_LNS_negate_stmt
+          Data:            0
+        - Opcode:          DW_LNS_set_prologue_end
+          Data:            0
+        - Opcode:          0x4B
+          Data:            0
+        - Opcode:          DW_LNS_set_column
+          Data:            58
+        - Opcode:          DW_LNS_negate_stmt
+          Data:            0
+        - Opcode:          0xF2
+          Data:            0
+        - Opcode:          DW_LNS_set_column
+          Data:            54
+        - Opcode:          DW_LNS_negate_stmt
+          Data:            0
+        - Opcode:          DW_LNS_set_prologue_end
+          Data:            0
+        - Opcode:          0x4B
+          Data:            0
+        - Opcode:          DW_LNS_set_column
+          Data:            58
+        - Opcode:          DW_LNS_negate_stmt
+          Data:            0
+        - Opcode:          0xF2
+          Data:            0
+        - Opcode:          DW_LNS_set_column
+          Data:            0
+        - Opcode:          DW_LNS_negate_stmt
+          Data:            0
+        - Opcode:          0x4C
+          Data:            0
+        - Opcode:          DW_LNS_set_column
+          Data:            5
+        - Opcode:          DW_LNS_set_prologue_end
+          Data:            0
+        - Opcode:          0x83
+          Data:            0
+        - Opcode:          0x4B
+          Data:            0
+        - Opcode:          0x4B
+          Data:            0
+        - Opcode:          0x4C
+          Data:            0
+        - Opcode:          0x4B
+          Data:            0
+        - Opcode:          0x4B
+          Data:            0
+        - Opcode:          0x4C
+          Data:            0
+        - Opcode:          0xBB
+          Data:            0
+        - Opcode:          DW_LNS_set_column
+          Data:            20
+        - Opcode:          0xBC
+          Data:            0
+        - Opcode:          DW_LNS_set_column
+          Data:            1
+        - Opcode:          DW_LNS_set_epilogue_begin
+          Data:            0
+        - Opcode:          0x83
+          Data:            0
+        - Opcode:          DW_LNS_set_column
+          Data:            0
+        - Opcode:          0x84
+          Data:            0
+        - Opcode:          DW_LNS_set_column
+          Data:            3
+        - Opcode:          DW_LNS_set_prologue_end
+          Data:            0
+        - Opcode:          0x83
+          Data:            0
+        - Opcode:          0x4B
+          Data:            0
+        - Opcode:          DW_LNS_negate_stmt
+          Data:            0
+        - Opcode:          DW_LNS_set_epilogue_begin
+          Data:            0
+        - Opcode:          0x4A
+          Data:            0
+        - Opcode:          DW_LNS_advance_pc
+          Data:            8
+        - Opcode:          DW_LNS_extended_op
+          ExtLen:          1
+          SubOpcode:       DW_LNE_end_sequence
+          Data:            0
+...
diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-exe.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-exe.yaml
new file mode 100644
index 00000000000000..b454a9e14699a9
--- /dev/null
+++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-exe.yaml
@@ -0,0 +1,558 @@
+## Test that reconstructs a MachO binary from YAML and generates a callsite-enabled gsym from it - and then verifies the gsym.
+## See llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info.test for the script to generate this yaml file
+
+# RUN: split-file %s %t
+# RUN: yaml2obj %t/call_sites.exe.yaml -o %t/call_sites.exe
+
+# RUN: llvm-gsymutil --convert=%t/call_sites.exe --callsites-from-yaml=%t/callsites.yaml -o %t/call_sites_exe.gsym
+
+# Dump the GSYM file and check the output for callsite information
+# RUN: llvm-gsymutil %t/call_sites_exe.gsym | FileCheck --check-prefix=CHECK-GSYM %s
+
+
+# CHECK-GSYM:      FunctionInfo @ 0x[[#%x,]]: [0x[[#%x,]] - 0x[[#%x,]]) "func_mainBin_dec_call_everything"
+# CHECK-GSYM-NEXT: CallSites (by return address):
+# CHECK-GSYM-NEXT:   0x[[#%x,]] Flags[InternalCall] MatchRegex[func_mainBin_dec_01]
+# CHECK-GSYM-NEXT:   0x[[#%x,]] Flags[InternalCall] MatchRegex[func_mainBin_dec_02]
+# CHECK-GSYM-NEXT:   0x[[#%x,]] Flags[InternalCall] MatchRegex[func_mainBin_dec_03]
+# CHECK-GSYM-NEXT:   0x[[#%x,]] Flags[InternalCall] MatchRegex[func_mainBin_inc_01]
+# CHECK-GSYM-NEXT:   0x[[#%x,]] Flags[InternalCall] MatchRegex[func_mainBin_inc_02]
+# CHECK-GSYM-NEXT:   0x[[#%x,]] Flags[InternalCall] MatchRegex[func_mainBin_inc_03]
+# CHECK-GSYM-NEXT:   0x[[#%x,]] Flags[None] MatchRegex[.*func.*]
+# CHECK-GSYM-NEXT:   0x[[#%x,]] Flags[ExternalCall] MatchRegex[.*extern_func.*]
+
+
+#--- callsites.yaml
+functions:
+  - name: func_mainBin_dec_call_everything
+    callsites:
+      - return_offset: 0x0C
+        match_regex: ["func_mainBin_dec_01"]
+        flags:
+          - "InternalCall"
+      - return_offset: 0x10
+        match_regex: ["func_mainBin_dec_02"]
+        flags:
+          - "InternalCall"
+      - return_offset: 0x14
+        match_regex: ["func_mainBin_dec_03"]
+        flags:
+          - "InternalCall"
+      - return_offset: 24
+        match_regex: ["func_mainBin_inc_01"]
+        flags:
+          - "InternalCall"
+      - return_offset: 28
+        match_regex: ["func_mainBin_inc_02"]
+        flags:
+          - "InternalCall"
+      - return_offset: 32
+        match_regex: ["func_mainBin_inc_03"]
+        flags:
+          - "InternalCall"
+      - return_offset: 44
+        match_regex: [".*func.*"]
+      - return_offset: 56
+        match_regex: [".*extern_func.*"]
+        flags:
+          - "ExternalCall"
+
+
+#--- call_sites.exe.yaml
+--- !mach-o
+FileHeader:
+  magic:           0xFEEDFACF
+  cputype:         0x100000C
+  cpusubtype:      0x0
+  filetype:        0x2
+  ncmds:           14
+  sizeofcmds:      760
+  flags:           0x200085
+  reserved:        0x0
+LoadCommands:
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         72
+    segname:         __PAGEZERO
+    vmaddr:          0
+    vmsize:          4294967296
+    fileoff:         0
+    filesize:        0
+    maxprot:         0
+    initprot:        0
+    nsects:          0
+    flags:           0
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         152
+    segname:         __TEXT
+    vmaddr:          4294967296
+    vmsize:          16384
+    fileoff:         0
+    filesize:        16384
+    maxprot:         5
+    initprot:        5
+    nsects:          1
+    flags:           0
+    Sections:
+      - sectname:        __text
+        segname:         __TEXT
+        addr:            0x100000338
+        size:            216
+        offset:          0x338
+        align:           2
+        reloff:          0x0
+        nreloc:          0
+        flags:           0x80000400
+        reserved1:       0x0
+        reserved2:       0x0
+        reserved3:       0x0
+        content:         28000090090140B929050051090100B9C0035FD628000090090140B929050051090100B9C0035FD628000090090140B929050051090100B9C0035FD628000090090140B929050011090100B9C0035FD628000090090140B929050011090100B9C0035FD628000090090140B929050011090100B9C0035FD6FD7BBFA9FD030091E0FFFF97E4FFFF97E8FFFF97ECFFFF97F0FFFF97F4FFFF971F2003D5A8E1015800013FD61F2003D588E1015800013FD6280000901F0100B9FD7BC1A8C0035FD6FD7BBFA9FD030091ECFFFF9700008052FD7BC1A8C0035FD6
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         152
+    segname:         __DATA
+    vmaddr:          4294983680
+    vmsize:          16384
+    fileoff:         16384
+    filesize:        0
+    maxprot:         3
+    initprot:        3
+    nsects:          1
+    flags:           0
+    Sections:
+      - sectname:        __common
+        segname:         __DATA
+        addr:            0x100004000
+        size:            24
+        offset:          0x0
+        align:           3
+        reloff:          0x0
+        nreloc:          0
+        flags:           0x1
+        reserved1:       0x0
+        reserved2:       0x0
+        reserved3:       0x0
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         72
+    segname:         __LINKEDIT
+    vmaddr:          4295000064
+    vmsize:          1648
+    fileoff:         16384
+    filesize:        1648
+    maxprot:         1
+    initprot:        1
+    nsects:          0
+    flags:           0
+  - cmd:             LC_DYLD_INFO_ONLY
+    cmdsize:         48
+    rebase_off:      0
+    rebase_size:     0
+    bind_off:        0
+    bind_size:       0
+    weak_bind_off:   0
+    weak_bind_size:  0
+    lazy_bind_off:   0
+    lazy_bind_size:  0
+    export_off:      16384
+    export_size:     232
+  - cmd:             LC_SYMTAB
+    cmdsize:         24
+    symoff:          16632
+    nsyms:           34
+    stroff:          17176
+    strsize:         568
+  - cmd:             LC_DYSYMTAB
+    cmdsize:         80
+    ilocalsym:       0
+    nlocalsym:       22
+    iextdefsym:      22
+    nextdefsym:      12
+    iundefsym:       34
+    nundefsym:       0
+    tocoff:          0
+    ntoc:            0
+    modtaboff:       0
+    nmodtab:         0
+    extrefsymoff:    0
+    nextrefsyms:     0
+    indirectsymoff:  0
+    nindirectsyms:   0
+    extreloff:       0
+    nextrel:         0
+    locreloff:       0
+    nlocrel:         0
+  - cmd:             LC_LOAD_DYLINKER
+    cmdsize:         32
+    name:            12
+    Content:         '/usr/lib/dyld'
+    ZeroPadBytes:    7
+  - cmd:             LC_UUID
+    cmdsize:         24
+    uuid:            4C4C44E9-5555-3144-A1D3-328233D00078
+  - cmd:             LC_BUILD_VERSION
+    cmdsize:         32
+    platform:        1
+    minos:           720896
+    sdk:             720896
+    ntools:          1
+    Tools:
+      - tool:            4
+        version:         1310720
+  - cmd:             LC_MAIN
+    cmdsize:         24
+    entryoff:        1016
+    stacksize:       0
+  - cmd:             LC_FUNCTION_STARTS
+    cmdsize:         16
+    dataoff:         16616
+    datasize:        16
+  - cmd:             LC_DATA_IN_CODE
+    cmdsize:         16
+    dataoff:         16632
+    datasize:        0
+  - cmd:             LC_CODE_SIGNATURE
+    cmdsize:         16
+    dataoff:         17744
+    datasize:        288
+LinkEditData:
+  ExportTrie:
+    TerminalSize:    0
+    NodeOffset:      0
+    Name:            ''
+    Flags:           0x0
+    Address:         0x0
+    Other:           0x0
+    ImportName:      ''
+    Children:
+      - TerminalSize:    0
+        NodeOffset:      5
+        Name:            _
+        Flags:           0x0
+        Address:         0x0
+        Other:           0x0
+        ImportName:      ''
+        Children:
+          - TerminalSize:    2
+            NodeOffset:      52
+            Name:            _mh_execute_header
+            Flags:           0x0
+            Address:         0x0
+            Other:           0x0
+            ImportName:      ''
+          - TerminalSize:    3
+            NodeOffset:      56
+            Name:            main
+            Flags:           0x0
+            Address:         0x3F8
+            Other:           0x0
+            ImportName:      ''
+          - TerminalSize:    0
+            NodeOffset:      61
+            Name:            g_
+            Flags:           0x0
+            Address:         0x0
+            Other:           0x0
+            ImportName:      ''
+            Children:
+              - TerminalSize:    4
+                NodeOffset:      104
+                Name:            volatile_var
+                Flags:           0x0
+                Address:         0x4000
+                Other:           0x0
+                ImportName:      ''
+              - TerminalSize:    4
+                NodeOffset:      110
+                Name:            func_ptr
+                Flags:           0x0
+                Address:         0x4008
+                Other:           0x0
+                ImportName:      ''
+              - TerminalSize:    4
+                NodeOffset:      116
+                Name:            extern_func_ptr
+                Flags:           0x0
+                Address:         0x4010
+                Other:           0x0
+                ImportName:      ''
+          - TerminalSize:    0
+            NodeOffset:      122
+            Name:            func_mainBin_
+            Flags:           0x0
+            Address:         0x0
+            Other:           0x0
+            ImportName:      ''
+            Children:
+              - TerminalSize:    0
+                NodeOffset:      139
+                Name:            dec_
+                Flags:           0x0
+                Address:         0x0
+                Other:           0x0
+                ImportName:      ''
+                Children:
+                  - TerminalSize:    3
+                    NodeOffset:      163
+                    Name:            call_everything
+                    Flags:           0x0
+                    Address:         0x3B0
+                    Other:           0x0
+                    ImportName:      ''
+                  - TerminalSize:    0
+                    NodeOffset:      168
+                    Name:            '0'
+                    Flags:           0x0
+                    Address:         0x0
+                    Other:           0x0
+                    ImportName:      ''
+                    Children:
+                      - TerminalSize:    3
+                        NodeOffset:      182
+                        Name:            '1'
+                        Flags:           0x0
+                        Address:         0x338
+                        Other:           0x0
+                        ImportName:      ''
+                      - TerminalSize:    3
+                        NodeOffset:      187
+                        Name:            '3'
+                        Flags:           0x0
+                        Address:         0x360
+                        Other:           0x0
+                        ImportName:      ''
+                      - TerminalSize:    3
+                        NodeOffset:      192
+                        Name:            '2'
+                        Flags:           0x0
+                        Address:         0x34C
+                        Other:           0x0
+                        ImportName:      ''
+              - TerminalSize:    0
+                NodeOffset:      197
+                Name:            inc_0
+                Flags:           0x0
+                Address:         0x0
+                Other:           0x0
+                ImportName:      ''
+                Children:
+                  - TerminalSize:    3
+                    NodeOffset:      211
+                    Name:            '2'
+                    Flags:           0x0
+                    Address:         0x388
+                    Other:           0x0
+                    ImportName:      ''
+                  - TerminalSize:    3
+                    NodeOffset:      216
+                    Name:            '1'
+                    Flags:           0x0
+                    Address:         0x374
+                    Other:           0x0
+                    ImportName:      ''
+                  - TerminalSize:    3
+                    NodeOffset:      221
+                    Name:            '3'
+                    Flags:           0x0
+                    Address:         0x39C
+                    Other:           0x0
+                    ImportName:      ''
+  NameList:
+    - n_strx:          235
+      n_type:          0x64
+      n_sect:          0
+      n_desc:          0
+      n_value:         0
+    - n_strx:          295
+      n_type:          0x66
+      n_sect:          0
+      n_desc:          1
+      n_value:         0
+    - n_strx:          353
+      n_type:          0x24
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294968312
+    - n_strx:          1
+      n_type:          0x24
+      n_sect:          0
+      n_desc:          0
+      n_value:         24
+    - n_strx:          359
+      n_type:          0x24
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294968120
+    - n_strx:          1
+      n_type:          0x24
+      n_sect:          0
+      n_desc:          0
+      n_value:         20
+    - n_strx:          380
+      n_type:          0x24
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294968140
+    - n_strx:          1
+      n_type:          0x24
+      n_sect:          0
+      n_desc:          0
+      n_value:         20
+    - n_strx:          401
+      n_type:          0x24
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294968160
+    - n_strx:          1
+      n_type:          0x24
+      n_sect:          0
+      n_desc:          0
+      n_value:         20
+    - n_strx:          422
+      n_type:          0x24
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294968180
+    - n_strx:          1
+      n_type:          0x24
+      n_sect:          0
+      n_desc:          0
+      n_value:         20
+    - n_strx:          443
+      n_type:          0x24
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294968200
+    - n_strx:          1
+      n_type:          0x24
+      n_sect:          0
+      n_desc:          0
+      n_value:         20
+    - n_strx:          464
+      n_type:          0x24
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294968220
+    - n_strx:          1
+      n_type:          0x24
+      n_sect:          0
+      n_desc:          0
+      n_value:         20
+    - n_strx:          485
+      n_type:          0x24
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294968240
+    - n_strx:          1
+      n_type:          0x24
+      n_sect:          0
+      n_desc:          0
+      n_value:         72
+    - n_strx:          519
+      n_type:          0x20
+      n_sect:          2
+      n_desc:          0
+      n_value:         4294983680
+    - n_strx:          535
+      n_type:          0x20
+      n_sect:          2
+      n_desc:          0
+      n_value:         4294983688
+    - n_strx:          547
+      n_type:          0x20
+      n_sect:          2
+      n_desc:          0
+      n_value:         4294983696
+    - n_strx:          1
+      n_type:          0x64
+      n_sect:          1
+      n_desc:          0
+      n_value:         0
+    - n_strx:          2
+      n_type:          0xF
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294968312
+    - n_strx:          8
+      n_type:          0xF
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294968120
+    - n_strx:          29
+      n_type:          0xF
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294968140
+    - n_strx:          50
+      n_type:          0xF
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294968160
+    - n_strx:          71
+      n_type:          0xF
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294968180
+    - n_strx:          92
+      n_type:          0xF
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294968200
+    - n_strx:          113
+      n_type:          0xF
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294968220
+    - n_strx:          134
+      n_type:          0xF
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294968240
+    - n_strx:          168
+      n_type:          0xF
+      n_sect:          2
+      n_desc:          0
+      n_value:         4294983680
+    - n_strx:          184
+      n_type:          0xF
+      n_sect:          2
+      n_desc:          0
+      n_value:         4294983688
+    - n_strx:          196
+      n_type:          0xF
+      n_sect:          2
+      n_desc:          0
+      n_value:         4294983696
+    - n_strx:          215
+      n_type:          0xF
+      n_sect:          1
+      n_desc:          16
+      n_value:         4294967296
+  StringTable:
+    - ' '
+    - _main
+    - _func_mainBin_dec_01
+    - _func_mainBin_dec_02
+    - _func_mainBin_dec_03
+    - _func_mainBin_inc_01
+    - _func_mainBin_inc_02
+    - _func_mainBin_inc_03
+    - _func_mainBin_dec_call_everything
+    - _g_volatile_var
+    - _g_func_ptr
+    - _g_extern_func_ptr
+    - __mh_execute_header
+    - '/tmp/_______________________________________/call_sites.cpp'
+    - '/tmp/_______________________________________/call_sites.o'
+    - _main
+    - _func_mainBin_dec_01
+    - _func_mainBin_dec_02
+    - _func_mainBin_dec_03
+    - _func_mainBin_inc_01
+    - _func_mainBin_inc_02
+    - _func_mainBin_inc_03
+    - _func_mainBin_dec_call_everything
+    - _g_volatile_var
+    - _g_func_ptr
+    - _g_extern_func_ptr
+    - ''
+    - ''
+  FunctionStarts:  [ 0x338, 0x34C, 0x360, 0x374, 0x388, 0x39C, 0x3B0, 0x3F8 ]
+...
diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-obj.test b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-obj.test
new file mode 100644
index 00000000000000..255b68a343b8db
--- /dev/null
+++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-obj.test
@@ -0,0 +1,304 @@
+// RUN: split-file %s %t
+
+// Assemble the input assembly code into an object file
+// RUN: llc -enable-machine-outliner=never -mtriple arm64-apple-darwin -filetype=obj %t/call_sites.ll -o %t/call_sites.o
+// RUN: llvm-gsymutil --convert=%t/call_sites.o --callsites-from-yaml=%t/callsites.yaml -o %t/call_sites_obj.gsym
+
+// Dump the GSYM file and check the output for callsite information
+// RUN: llvm-gsymutil %t/call_sites_obj.gsym | FileCheck --check-prefix=CHECK-GSYM %s
+
+
+// CHECK-GSYM:      FunctionInfo @ 0x[[#%x,]]: [0x[[#%x,]] - 0x[[#%x,]]) "func_mainBin_dec_call_everything"
+// CHECK-GSYM-NEXT: LineTable:
+                      // func_mainBin_dec_call_everything() {
+// CHECK-GSYM-NEXT:   0x[[#%x,]] {{.*}}/call_sites.cpp:16
+                      // func_mainBin_dec_01();
+// CHECK-GSYM-NEXT:   0x[[ADDR_dec_01_call:[0-9a-f]+]] {{.*}}/call_sites.cpp:17
+                      // func_mainBin_dec_02();
+// CHECK-GSYM-NEXT:   0x[[ADDR_dec_02_call:[0-9a-f]+]] {{.*}}/call_sites.cpp:18
+                      // func_mainBin_dec_03();
+// CHECK-GSYM-NEXT:   [[ADDR_dec_03_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:19
+                      // func_mainBin_inc_01();
+// CHECK-GSYM-NEXT:   [[ADDR_inc_01_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:21
+                      // func_mainBin_inc_02();
+// CHECK-GSYM-NEXT:   [[ADDR_inc_02_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:22
+                      // func_mainBin_inc_03();
+// CHECK-GSYM-NEXT:   [[ADDR_inc_03_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:23
+                      // g_func_ptr();
+// CHECK-GSYM-NEXT:   [[ADDR_func_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:25
+                      // g_extern_func_ptr();
+// CHECK-GSYM-NEXT:   [[ADDR_extern_func_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:26
+                      // g_volatile_var = 0;
+// CHECK-GSYM-NEXT:   [[ADDR_var_assign:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:28
+                      // }
+// CHECK-GSYM-NEXT:   [[#%x,]] {{.*}}/call_sites.cpp:29
+// CHECK-GSYM-NEXT: CallSites (by return address):
+// CHECK-GSYM-NEXT:   [[ADDR_dec_02_call]] Flags[InternalCall] MatchRegex[func_mainBin_dec_01]
+// CHECK-GSYM-NEXT:   [[ADDR_dec_03_call]] Flags[InternalCall] MatchRegex[func_mainBin_dec_02]
+// CHECK-GSYM-NEXT:   [[ADDR_inc_01_call]] Flags[InternalCall] MatchRegex[func_mainBin_dec_03]
+// CHECK-GSYM-NEXT:   [[ADDR_inc_02_call]] Flags[InternalCall] MatchRegex[func_mainBin_inc_01]
+// CHECK-GSYM-NEXT:   [[ADDR_inc_03_call]] Flags[InternalCall] MatchRegex[func_mainBin_inc_02]
+// CHECK-GSYM-NEXT:   [[ADDR_func_call]] Flags[InternalCall] MatchRegex[func_mainBin_inc_03]
+// CHECK-GSYM-NEXT:   [[ADDR_extern_func_call]] Flags[None] MatchRegex[.*func.*]
+// CHECK-GSYM-NEXT:   [[ADDR_var_assign]] Flags[ExternalCall] MatchRegex[.*extern_func.*]
+
+
+//--- callsites.yaml
+functions:
+  - name: func_mainBin_dec_call_everything
+    callsites:
+      - return_offset: 0x0C
+        match_regex: ["func_mainBin_dec_01"]
+        flags:
+          - "InternalCall"
+      - return_offset: 0x10
+        match_regex: ["func_mainBin_dec_02"]
+        flags:
+          - "InternalCall"
+      - return_offset: 0x14
+        match_regex: ["func_mainBin_dec_03"]
+        flags:
+          - "InternalCall"
+      - return_offset: 24
+        match_regex: ["func_mainBin_inc_01"]
+        flags:
+          - "InternalCall"
+      - return_offset: 28
+        match_regex: ["func_mainBin_inc_02"]
+        flags:
+          - "InternalCall"
+      - return_offset: 32
+        match_regex: ["func_mainBin_inc_03"]
+        flags:
+          - "InternalCall"
+      - return_offset: 44
+        match_regex: [".*func.*"]
+      - return_offset: 56
+        match_regex: [".*extern_func.*"]
+        flags:
+          - "ExternalCall"
+
+
+//--- call_sites.ll
+; ModuleID = 'call_sites.cpp'
+source_filename = "call_sites.cpp"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "arm64-unknown-macosx10.4.0"
+
+ at g_volatile_var = global i32 0, align 4, !dbg !0
+ at g_func_ptr = global ptr null, align 8, !dbg !5
+ at g_extern_func_ptr = global ptr null, align 8, !dbg !12
+
+; Function Attrs: minsize mustprogress nofree noinline norecurse nounwind optsize ssp memory(readwrite, argmem: none)
+define void @func_mainBin_dec_01() local_unnamed_addr #0 !dbg !21 {
+entry:
+  %0 = load volatile i32, ptr @g_volatile_var, align 4, !dbg !24, !tbaa !25
+  %dec = add nsw i32 %0, -1, !dbg !24
+  store volatile i32 %dec, ptr @g_volatile_var, align 4, !dbg !24, !tbaa !25
+  ret void, !dbg !29
+}
+
+; Function Attrs: minsize mustprogress nofree noinline norecurse nounwind optsize ssp memory(readwrite, argmem: none)
+define void @func_mainBin_dec_02() local_unnamed_addr #0 !dbg !30 {
+entry:
+  %0 = load volatile i32, ptr @g_volatile_var, align 4, !dbg !31, !tbaa !25
+  %dec = add nsw i32 %0, -1, !dbg !31
+  store volatile i32 %dec, ptr @g_volatile_var, align 4, !dbg !31, !tbaa !25
+  ret void, !dbg !32
+}
+
+; Function Attrs: minsize mustprogress nofree noinline norecurse nounwind optsize ssp memory(readwrite, argmem: none)
+define void @func_mainBin_dec_03() local_unnamed_addr #0 !dbg !33 {
+entry:
+  %0 = load volatile i32, ptr @g_volatile_var, align 4, !dbg !34, !tbaa !25
+  %dec = add nsw i32 %0, -1, !dbg !34
+  store volatile i32 %dec, ptr @g_volatile_var, align 4, !dbg !34, !tbaa !25
+  ret void, !dbg !35
+}
+
+; Function Attrs: minsize mustprogress nofree noinline norecurse nounwind optsize ssp memory(readwrite, argmem: none)
+define void @func_mainBin_inc_01() local_unnamed_addr #0 !dbg !36 {
+entry:
+  %0 = load volatile i32, ptr @g_volatile_var, align 4, !dbg !37, !tbaa !25
+  %inc = add nsw i32 %0, 1, !dbg !37
+  store volatile i32 %inc, ptr @g_volatile_var, align 4, !dbg !37, !tbaa !25
+  ret void, !dbg !38
+}
+
+; Function Attrs: minsize mustprogress nofree noinline norecurse nounwind optsize ssp memory(readwrite, argmem: none)
+define void @func_mainBin_inc_02() local_unnamed_addr #0 !dbg !39 {
+entry:
+  %0 = load volatile i32, ptr @g_volatile_var, align 4, !dbg !40, !tbaa !25
+  %inc = add nsw i32 %0, 1, !dbg !40
+  store volatile i32 %inc, ptr @g_volatile_var, align 4, !dbg !40, !tbaa !25
+  ret void, !dbg !41
+}
+
+; Function Attrs: minsize mustprogress nofree noinline norecurse nounwind optsize ssp memory(readwrite, argmem: none)
+define void @func_mainBin_inc_03() local_unnamed_addr #0 !dbg !42 {
+entry:
+  %0 = load volatile i32, ptr @g_volatile_var, align 4, !dbg !43, !tbaa !25
+  %inc = add nsw i32 %0, 1, !dbg !43
+  store volatile i32 %inc, ptr @g_volatile_var, align 4, !dbg !43, !tbaa !25
+  ret void, !dbg !44
+}
+
+; Function Attrs: minsize mustprogress noinline nounwind optsize ssp
+define void @func_mainBin_dec_call_everything() local_unnamed_addr #1 !dbg !45 {
+entry:
+  tail call void @func_mainBin_dec_01() #3, !dbg !46
+  tail call void @func_mainBin_dec_02() #3, !dbg !47
+  tail call void @func_mainBin_dec_03() #3, !dbg !48
+  tail call void @func_mainBin_inc_01() #3, !dbg !49
+  tail call void @func_mainBin_inc_02() #3, !dbg !50
+  tail call void @func_mainBin_inc_03() #3, !dbg !51
+  %0 = load volatile ptr, ptr @g_func_ptr, align 8, !dbg !52, !tbaa !53
+  %call = tail call noundef i32 %0() #4, !dbg !52
+  %1 = load volatile ptr, ptr @g_extern_func_ptr, align 8, !dbg !55, !tbaa !53
+  %call1 = tail call noundef i32 %1() #4, !dbg !55
+  store volatile i32 0, ptr @g_volatile_var, align 4, !dbg !56, !tbaa !25
+  ret void, !dbg !57
+}
+
+; Function Attrs: minsize mustprogress norecurse nounwind optsize ssp
+define noundef i32 @main() local_unnamed_addr #2 !dbg !58 {
+entry:
+  tail call void @func_mainBin_dec_call_everything() #3, !dbg !59
+  ret i32 0, !dbg !60
+}
+
+attributes #0 = { minsize mustprogress nofree noinline norecurse nounwind optsize ssp memory(readwrite, argmem: none) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" }
+attributes #1 = { minsize mustprogress noinline nounwind optsize ssp "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" }
+attributes #2 = { minsize mustprogress norecurse nounwind optsize ssp "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+altnzcv,+ccdp,+ccidx,+complxnum,+crc,+dit,+dotprod,+flagm,+fp-armv8,+fp16fml,+fptoint,+fullfp16,+jsconv,+lse,+neon,+pauth,+perfmon,+predres,+ras,+rcpc,+rdm,+sb,+sha2,+sha3,+specrestrict,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,+zcm,+zcz" }
+attributes #3 = { minsize optsize }
+attributes #4 = { minsize nounwind optsize }
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!15, !16, !17, !18, !19}
+!llvm.ident = !{!20}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "g_volatile_var", scope: !2, file: !3, line: 4, type: !14, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 20.0.0git (https://github.com/alx32/llvm-project.git f41f6ea1f33c4f5e7c94f3d155e44292d1809c50)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None, sysroot: "/")
+!3 = !DIFile(filename: "call_sites.cpp", directory: "/tmp/tst")
+!4 = !{!0, !5, !12}
+!5 = !DIGlobalVariableExpression(var: !6, expr: !DIExpression())
+!6 = distinct !DIGlobalVariable(name: "g_func_ptr", scope: !2, file: !3, line: 5, type: !7, isLocal: false, isDefinition: true)
+!7 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !8)
+!8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64)
+!9 = !DISubroutineType(types: !10)
+!10 = !{!11}
+!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!12 = !DIGlobalVariableExpression(var: !13, expr: !DIExpression())
+!13 = distinct !DIGlobalVariable(name: "g_extern_func_ptr", scope: !2, file: !3, line: 6, type: !7, isLocal: false, isDefinition: true)
+!14 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !11)
+!15 = !{i32 7, !"Dwarf Version", i32 4}
+!16 = !{i32 2, !"Debug Info Version", i32 3}
+!17 = !{i32 1, !"wchar_size", i32 4}
+!18 = !{i32 8, !"PIC Level", i32 2}
+!19 = !{i32 7, !"frame-pointer", i32 1}
+!20 = !{!"clang version 20.0.0git (https://github.com/alx32/llvm-project.git f41f6ea1f33c4f5e7c94f3d155e44292d1809c50)"}
+!21 = distinct !DISubprogram(name: "func_mainBin_dec_01", scope: !3, file: !3, line: 8, type: !22, scopeLine: 8, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!22 = !DISubroutineType(types: !23)
+!23 = !{null}
+!24 = !DILocation(line: 8, column: 54, scope: !21)
+!25 = !{!26, !26, i64 0}
+!26 = !{!"int", !27, i64 0}
+!27 = !{!"omnipotent char", !28, i64 0}
+!28 = !{!"Simple C++ TBAA"}
+!29 = !DILocation(line: 8, column: 58, scope: !21)
+!30 = distinct !DISubprogram(name: "func_mainBin_dec_02", scope: !3, file: !3, line: 9, type: !22, scopeLine: 9, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!31 = !DILocation(line: 9, column: 54, scope: !30)
+!32 = !DILocation(line: 9, column: 58, scope: !30)
+!33 = distinct !DISubprogram(name: "func_mainBin_dec_03", scope: !3, file: !3, line: 10, type: !22, scopeLine: 10, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!34 = !DILocation(line: 10, column: 54, scope: !33)
+!35 = !DILocation(line: 10, column: 58, scope: !33)
+!36 = distinct !DISubprogram(name: "func_mainBin_inc_01", scope: !3, file: !3, line: 12, type: !22, scopeLine: 12, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!37 = !DILocation(line: 12, column: 54, scope: !36)
+!38 = !DILocation(line: 12, column: 58, scope: !36)
+!39 = distinct !DISubprogram(name: "func_mainBin_inc_02", scope: !3, file: !3, line: 13, type: !22, scopeLine: 13, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!40 = !DILocation(line: 13, column: 54, scope: !39)
+!41 = !DILocation(line: 13, column: 58, scope: !39)
+!42 = distinct !DISubprogram(name: "func_mainBin_inc_03", scope: !3, file: !3, line: 14, type: !22, scopeLine: 14, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!43 = !DILocation(line: 14, column: 54, scope: !42)
+!44 = !DILocation(line: 14, column: 58, scope: !42)
+!45 = distinct !DISubprogram(name: "func_mainBin_dec_call_everything", scope: !3, file: !3, line: 16, type: !22, scopeLine: 16, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!46 = !DILocation(line: 17, column: 5, scope: !45)
+!47 = !DILocation(line: 18, column: 5, scope: !45)
+!48 = !DILocation(line: 19, column: 5, scope: !45)
+!49 = !DILocation(line: 21, column: 5, scope: !45)
+!50 = !DILocation(line: 22, column: 5, scope: !45)
+!51 = !DILocation(line: 23, column: 5, scope: !45)
+!52 = !DILocation(line: 25, column: 5, scope: !45)
+!53 = !{!54, !54, i64 0}
+!54 = !{!"any pointer", !27, i64 0}
+!55 = !DILocation(line: 26, column: 5, scope: !45)
+!56 = !DILocation(line: 28, column: 20, scope: !45)
+!57 = !DILocation(line: 29, column: 1, scope: !45)
+!58 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 31, type: !9, scopeLine: 31, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!59 = !DILocation(line: 32, column: 3, scope: !58)
+!60 = !DILocation(line: 33, column: 3, scope: !58)
+
+
+//--- generate-callsite-test-data.sh
+#!/bin/bash
+## This is provided for reference only, if we need to modify the file and regenerate the assembly code
+set -ex
+
+TOOLCHAIN_DIR="llvm-project/build/Debug/bin"
+
+# Create call_sites.cpp
+cat > call_sites.cpp <<EOF
+
+#define FUNC_ATTR extern "C" __attribute__((noinline))
+
+volatile int g_volatile_var;
+int (*volatile g_func_ptr)();
+int (*volatile g_extern_func_ptr)();
+
+FUNC_ATTR void func_mainBin_dec_01() { g_volatile_var--; }
+FUNC_ATTR void func_mainBin_dec_02() { g_volatile_var--; }
+FUNC_ATTR void func_mainBin_dec_03() { g_volatile_var--; }
+
+FUNC_ATTR void func_mainBin_inc_01() { g_volatile_var++; }
+FUNC_ATTR void func_mainBin_inc_02() { g_volatile_var++; }
+FUNC_ATTR void func_mainBin_inc_03() { g_volatile_var++; }
+
+FUNC_ATTR void func_mainBin_dec_call_everything() {
+    func_mainBin_dec_01();
+    func_mainBin_dec_02();
+    func_mainBin_dec_03();
+
+    func_mainBin_inc_01();
+    func_mainBin_inc_02();
+    func_mainBin_inc_03();
+
+    g_func_ptr();
+    g_extern_func_ptr();
+
+    g_volatile_var = 0;
+}
+
+int main() {
+  func_mainBin_dec_call_everything();
+  return 0;
+}
+EOF
+
+# Generate IR from call_sites.cpp
+"$TOOLCHAIN_DIR"/clang++ -mno-outline -target arm64-apple-macos -g -Oz -fno-exceptions -S -emit-llvm call_sites.cpp -o call_sites.ll
+
+# Compile the assembly into an object file
+"$TOOLCHAIN_DIR"/llc -filetype=obj call_sites.ll -o call_sites.o
+
+# Link the object file into an executable using lld directly
+"$TOOLCHAIN_DIR"/ld64.lld -arch arm64 -platform_version macos 11.0.0 11.0.0 -o call_sites call_sites.o -lSystem
+
+# Create a dSYM file
+"$TOOLCHAIN_DIR"/dsymutil call_sites -o call_sites.dSYM
+
+# Dump the binary to YAML
+"$TOOLCHAIN_DIR"/obj2yaml call_sites > call_sites_binary.yaml
+
+# Dump the dSYM to YAML
+"$TOOLCHAIN_DIR"/obj2yaml call_sites.dSYM/Contents/Resources/DWARF/call_sites > call_sites_dsym.yaml
diff --git a/llvm/tools/llvm-gsymutil/Opts.td b/llvm/tools/llvm-gsymutil/Opts.td
index e3001f3fe53f1d..8dcce1f7388750 100644
--- a/llvm/tools/llvm-gsymutil/Opts.td
+++ b/llvm/tools/llvm-gsymutil/Opts.td
@@ -18,6 +18,8 @@ defm convert :
      "Convert the specified file to the GSYM format.\nSupported files include ELF and mach-o files that will have their debug info (DWARF) and symbol table converted">;
 def merged_functions :
   FF<"merged-functions", "Encode merged function information for functions in debug info that have matching address ranges.\nWithout this option one function per unique address range will be emitted.">;
+defm callsites_from_yaml :
+  Eq<"callsites-from-yaml", "Load call site info from YAML file.">;
 defm arch :
   Eq<"arch",
      "Process debug information for the specified CPU architecture only.\nArchitectures may be specified by name or by number.\nThis option can be specified multiple times, once for each desired architecture">;
diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
index 10bbdf215736d8..59350157463252 100644
--- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
+++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
@@ -9,6 +9,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/DebugInfo/DIContext.h"
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/GSYM/CallSiteInfo.h"
 #include "llvm/Object/Archive.h"
 #include "llvm/Object/ELFObjectFile.h"
 #include "llvm/Object/MachOUniversal.h"
@@ -96,6 +97,7 @@ static bool Quiet;
 static std::vector<uint64_t> LookupAddresses;
 static bool LookupAddressesFromStdin;
 static bool StoreMergedFunctionInfo = false;
+static std::vector<std::string> CallSiteYamlPaths;
 
 static void parseArgs(int argc, char **argv) {
   GSYMUtilOptTable Tbl;
@@ -177,6 +179,16 @@ static void parseArgs(int argc, char **argv) {
 
   LookupAddressesFromStdin = Args.hasArg(OPT_addresses_from_stdin);
   StoreMergedFunctionInfo = Args.hasArg(OPT_merged_functions);
+
+  for (const llvm::opt::Arg *A : Args.filtered(OPT_callsites_from_yaml_EQ))
+    if (A->getValue() && A->getValue()[0] != '\0')
+      CallSiteYamlPaths.emplace_back(A->getValue());
+    else {
+      llvm::errs()
+          << ToolName
+          << ": --callsites-from-yaml option requires a non-empty argument.\n";
+      std::exit(1);
+    }
 }
 
 /// @}
@@ -370,6 +382,13 @@ static llvm::Error handleObjectFile(ObjectFile &Obj, const std::string &OutFile,
   if (auto Err = ObjectFileTransformer::convert(Obj, Out, Gsym))
     return Err;
 
+  // If any call site YAML files were specified, load them now.
+  for (const auto &yamlPath : CallSiteYamlPaths) {
+    if (auto Err = Gsym.loadCallSitesFromYAML(yamlPath)) {
+      return Err;
+    }
+  }
+
   // Finalize the GSYM to make it ready to save to disk. This will remove
   // duplicate FunctionInfo entries where we might have found an entry from
   // debug info and also a symbol table entry from the object file.
diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
index c6218e5004d178..33f53de2e77bcd 100644
--- a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
+++ b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
@@ -214,10 +214,10 @@ TEST(GSYMTest, TestFunctionInfoDecodeErrors) {
   FW.writeU32(1); // InfoType::LineTableInfo.
   TestFunctionInfoDecodeError(ByteOrder, OutStrm.str(), BaseAddr,
       "0x0000000c: missing FunctionInfo InfoType length");
-  FW.fixup32(4, FixupOffset); // Write an invalid InfoType enumeration value
+  FW.fixup32(7, FixupOffset); // Write an invalid InfoType enumeration value
   FW.writeU32(0); // LineTableInfo InfoType data length.
   TestFunctionInfoDecodeError(ByteOrder, OutStrm.str(), BaseAddr,
-      "0x00000008: unsupported InfoType 4");
+                              "0x00000008: unsupported InfoType 7");
 }
 
 static void TestFunctionInfoEncodeError(llvm::endianness ByteOrder,

>From fe23779dd6ecb7acd835577d68824001a2dcc608 Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Thu, 26 Sep 2024 16:10:23 -0700
Subject: [PATCH 2/6] Address Feedback nr.1

---
 .../llvm/DebugInfo/GSYM/CallSiteInfo.h        | 46 ++--------------
 llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp      | 54 +++++++------------
 llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp    |  3 +-
 3 files changed, 25 insertions(+), 78 deletions(-)

diff --git a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h
index 45257f0e11578e..c4e97a599e9889 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h
@@ -34,7 +34,6 @@ namespace gsym {
 class FileWriter;
 struct FunctionInfo;
 struct CallSiteInfo {
-public:
   enum Flags : uint8_t {
     None = 0,
     // This flag specifies that the call site can only call a function within
@@ -52,7 +51,7 @@ struct CallSiteInfo {
   std::vector<uint32_t> MatchRegex;
 
   /// Bitwise OR of CallSiteInfo::Flags values
-  uint8_t Flags;
+  uint8_t Flags = CallSiteInfo::Flags::None;
 
   /// Decode a CallSiteInfo object from a binary data stream.
   ///
@@ -73,16 +72,8 @@ struct CallSiteInfo {
 };
 
 struct CallSiteInfoCollection {
-public:
   std::vector<CallSiteInfo> CallSites;
 
-  void clear() { CallSites.clear(); }
-
-  /// Query if a CallSiteInfoCollection object is valid.
-  ///
-  /// \returns True if the collection is not empty.
-  bool isValid() const { return !CallSites.empty(); }
-
   /// Decode a CallSiteInfoCollection object from a binary data stream.
   ///
   /// \param Data The binary stream to read the data from.
@@ -156,54 +147,27 @@ class CallSiteInfoLoader {
   /// \returns A 32-bit unsigned integer representing the offset of the string.
   uint32_t offsetFromString(StringRef str);
 
-  /// Reads the content of the YAML file specified by `YAMLFile` into
-  /// `yamlContent`.
-  ///
-  /// \param YAMLFile A StringRef representing the path to the YAML file.
-  /// \param Buffer The memory buffer containing the YAML content.
-  ///
-  /// \returns An `llvm::Error` indicating success or describing any issues
-  /// encountered while reading the file.
-  llvm::Error readYAMLFile(StringRef YAMLFile,
-                           std::unique_ptr<llvm::MemoryBuffer> &Buffer);
-
-  /// Parses the YAML content and populates `functionsYAML` with the parsed
-  /// data.
-  ///
-  /// \param Buffer The memory buffer containing the YAML content.
-  /// \param functionsYAML A reference to an llvm::yaml::FunctionsYAML object to
-  /// be populated.
-  ///
-  /// \returns An `llvm::Error` indicating success or describing any issues
-  /// encountered during parsing.
-  llvm::Error parseYAML(llvm::MemoryBuffer &Buffer,
-                        llvm::yaml::FunctionsYAML &functionsYAML);
-
   /// Builds a map from function names to FunctionInfo pointers based on the
   /// provided `Funcs` vector.
   ///
   /// \param Funcs A reference to a vector of FunctionInfo objects.
   ///
-  /// \returns An unordered_map mapping function names (std::string) to their
+  /// \returns A StringMap mapping function names (StringRef) to their
   /// corresponding FunctionInfo pointers.
-  std::unordered_map<std::string, FunctionInfo *>
-  buildFunctionMap(std::vector<FunctionInfo> &Funcs);
+  StringMap<FunctionInfo *> buildFunctionMap(std::vector<FunctionInfo> &Funcs);
 
   /// Processes the parsed YAML functions and updates the `FuncMap` accordingly.
   ///
   /// \param functionsYAML A constant reference to an llvm::yaml::FunctionsYAML
   /// object containing parsed YAML data.
-  /// \param FuncMap A reference to an unordered_map mapping function names to
+  /// \param FuncMap A reference to a StringMap mapping function names to
   /// FunctionInfo pointers.
-  /// \param YAMLFile A StringRef representing the name of the YAML file (used
-  /// for error messages).
   ///
   /// \returns An `llvm::Error` indicating success or describing any issues
   /// encountered during processing.
   llvm::Error
   processYAMLFunctions(const llvm::yaml::FunctionsYAML &functionsYAML,
-                       std::unordered_map<std::string, FunctionInfo *> &FuncMap,
-                       StringRef YAMLFile);
+                       StringMap<FunctionInfo *> &FuncMap);
 
   /// Map of existing string offsets to CachedHashStringRef.
   DenseMap<uint64_t, CachedHashStringRef> &StringOffsetMap;
diff --git a/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp b/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp
index 4ed3d3f67a44fd..3fe7fb48cb0de3 100644
--- a/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp
@@ -162,49 +162,32 @@ uint32_t CallSiteInfoLoader::offsetFromString(StringRef str) {
 
 llvm::Error CallSiteInfoLoader::loadYAML(std::vector<FunctionInfo> &Funcs,
                                          StringRef YAMLFile) {
-  std::unique_ptr<llvm::MemoryBuffer> Buffer;
   // Step 1: Read YAML file
-  if (auto Err = readYAMLFile(YAMLFile, Buffer))
-    return Err;
-
-  // Step 2: Parse YAML content
-  llvm::yaml::FunctionsYAML functionsYAML;
-  if (auto Err = parseYAML(*Buffer, functionsYAML))
-    return Err;
-
-  // Step 3: Build function map from Funcs
-  auto FuncMap = buildFunctionMap(Funcs);
-
-  // Step 4: Process parsed YAML functions and update FuncMap
-  return processYAMLFunctions(functionsYAML, FuncMap, YAMLFile);
-}
-
-llvm::Error
-CallSiteInfoLoader::readYAMLFile(StringRef YAMLFile,
-                                 std::unique_ptr<llvm::MemoryBuffer> &Buffer) {
   auto BufferOrError = llvm::MemoryBuffer::getFile(YAMLFile);
   if (!BufferOrError)
     return errorCodeToError(BufferOrError.getError());
-  Buffer = std::move(*BufferOrError);
-  return llvm::Error::success();
-}
 
-llvm::Error
-CallSiteInfoLoader::parseYAML(llvm::MemoryBuffer &Buffer,
-                              llvm::yaml::FunctionsYAML &functionsYAML) {
-  // Use the MemoryBufferRef constructor
-  llvm::yaml::Input yin(Buffer.getMemBufferRef());
+  std::unique_ptr<llvm::MemoryBuffer> Buffer = std::move(*BufferOrError);
+
+  // Step 2: Parse YAML content
+  llvm::yaml::FunctionsYAML functionsYAML;
+  llvm::yaml::Input yin(Buffer->getMemBufferRef());
   yin >> functionsYAML;
   if (yin.error()) {
     return llvm::createStringError(yin.error(), "Error parsing YAML file: %s\n",
-                                   Buffer.getBufferIdentifier().str().c_str());
+                                   Buffer->getBufferIdentifier().str().c_str());
   }
-  return llvm::Error::success();
+
+  // Step 3: Build function map from Funcs
+  auto FuncMap = buildFunctionMap(Funcs);
+
+  // Step 4: Process parsed YAML functions and update FuncMap
+  return processYAMLFunctions(functionsYAML, FuncMap);
 }
 
-std::unordered_map<std::string, FunctionInfo *>
+StringMap<FunctionInfo *>
 CallSiteInfoLoader::buildFunctionMap(std::vector<FunctionInfo> &Funcs) {
-  std::unordered_map<std::string, FunctionInfo *> FuncMap;
+  StringMap<FunctionInfo *> FuncMap;
   auto insertFunc = [&](auto &Function) {
     std::string FuncName = stringFromOffset(Function.Name).str();
     // If the function name is already in the map, don't update it. This way we
@@ -227,8 +210,7 @@ CallSiteInfoLoader::buildFunctionMap(std::vector<FunctionInfo> &Funcs) {
 
 llvm::Error CallSiteInfoLoader::processYAMLFunctions(
     const llvm::yaml::FunctionsYAML &functionsYAML,
-    std::unordered_map<std::string, FunctionInfo *> &FuncMap,
-    StringRef YAMLFile) {
+    StringMap<FunctionInfo *> &FuncMap) {
   // For each function in the YAML file
   for (const auto &FuncYAML : functionsYAML.functions) {
     auto it = FuncMap.find(FuncYAML.name);
@@ -247,9 +229,9 @@ llvm::Error CallSiteInfoLoader::processYAMLFunctions(
       // Since YAML has specifies relative return offsets, add the function
       // start address to make the offset absolute.
       CSI.ReturnAddress = FuncInfo->Range.start() + CallSiteYAML.return_offset;
-      for (const auto &regex : CallSiteYAML.match_regex) {
-        CSI.MatchRegex.push_back(offsetFromString(regex));
-      }
+      for (const auto &Regex : CallSiteYAML.match_regex)
+        CSI.MatchRegex.push_back(offsetFromString(Regex));
+
       // Initialize flags to None
       CSI.Flags = CallSiteInfo::None;
       // Parse flags and combine them
diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
index 59350157463252..bb107c17f6f96c 100644
--- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
+++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
@@ -180,7 +180,7 @@ static void parseArgs(int argc, char **argv) {
   LookupAddressesFromStdin = Args.hasArg(OPT_addresses_from_stdin);
   StoreMergedFunctionInfo = Args.hasArg(OPT_merged_functions);
 
-  for (const llvm::opt::Arg *A : Args.filtered(OPT_callsites_from_yaml_EQ))
+  if (const llvm::opt::Arg *A = Args.getLastArg(OPT_callsites_from_yaml_EQ)) {
     if (A->getValue() && A->getValue()[0] != '\0')
       CallSiteYamlPaths.emplace_back(A->getValue());
     else {
@@ -189,6 +189,7 @@ static void parseArgs(int argc, char **argv) {
           << ": --callsites-from-yaml option requires a non-empty argument.\n";
       std::exit(1);
     }
+  }
 }
 
 /// @}

>From cd7c30ab8cac2b2be6446c74c4542dd8321d7439 Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Fri, 27 Sep 2024 14:00:36 -0700
Subject: [PATCH 3/6] Address feedback nr.2

---
 .../llvm/DebugInfo/GSYM/CallSiteInfo.h        | 72 +++------------
 .../include/llvm/DebugInfo/GSYM/GsymCreator.h |  9 ++
 llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp      | 92 ++++++++-----------
 llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp      |  2 +-
 llvm/lib/DebugInfo/GSYM/GsymCreator.cpp       |  8 +-
 llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp    | 15 ++-
 6 files changed, 76 insertions(+), 122 deletions(-)

diff --git a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h
index c4e97a599e9889..496679fdf9bc8f 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h
@@ -32,6 +32,7 @@ struct FunctionsYAML;
 
 namespace gsym {
 class FileWriter;
+class GsymCreator;
 struct FunctionInfo;
 struct CallSiteInfo {
   enum Flags : uint8_t {
@@ -45,7 +46,7 @@ struct CallSiteInfo {
   };
 
   /// The return address of the call site.
-  uint64_t ReturnAddress;
+  uint64_t ReturnAddress = 0;
 
   /// Offsets into the string table for function names regex patterns.
   std::vector<uint32_t> MatchRegex;
@@ -57,12 +58,9 @@ struct CallSiteInfo {
   ///
   /// \param Data The binary stream to read the data from.
   /// \param Offset The current offset within the data stream.
-  /// \param BaseAddr The base address for decoding (unused here but included
-  /// for consistency).
-  ///
   /// \returns A CallSiteInfo or an error describing the issue.
-  static llvm::Expected<CallSiteInfo>
-  decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr);
+  static llvm::Expected<CallSiteInfo> decode(DataExtractor &Data,
+                                             uint64_t &Offset);
 
   /// Encode this CallSiteInfo object into a FileWriter stream.
   ///
@@ -77,12 +75,8 @@ struct CallSiteInfoCollection {
   /// Decode a CallSiteInfoCollection object from a binary data stream.
   ///
   /// \param Data The binary stream to read the data from.
-  /// \param BaseAddr The base address for decoding (unused here but included
-  /// for consistency).
-  ///
   /// \returns A CallSiteInfoCollection or an error describing the issue.
-  static llvm::Expected<CallSiteInfoCollection> decode(DataExtractor &Data,
-                                                       uint64_t BaseAddr);
+  static llvm::Expected<CallSiteInfoCollection> decode(DataExtractor &Data);
 
   /// Encode this CallSiteInfoCollection object into a FileWriter stream.
   ///
@@ -91,29 +85,18 @@ struct CallSiteInfoCollection {
   llvm::Error encode(FileWriter &O) const;
 };
 
-bool operator==(const CallSiteInfoCollection &LHS,
-                const CallSiteInfoCollection &RHS);
-
-bool operator==(const CallSiteInfo &LHS, const CallSiteInfo &RHS);
-
 class CallSiteInfoLoader {
 public:
   /// Constructor that initializes the CallSiteInfoLoader with necessary data
   /// structures.
   ///
   /// \param StringOffsetMap A reference to a DenseMap that maps existing string
-  /// offsets to CachedHashStringRef. \param StrTab A reference to a
-  /// StringTableBuilder used for managing looking up and creating new strings.
-  /// \param StringStorage A reference to a StringSet for storing the data for
-  /// generated strings.
-  CallSiteInfoLoader(DenseMap<uint64_t, CachedHashStringRef> &StringOffsetMap,
-                     StringTableBuilder &StrTab, StringSet<> &StringStorage)
-      : StringOffsetMap(StringOffsetMap), StrTab(StrTab),
-        StringStorage(StringStorage) {}
-
-  /// Loads call site information from a YAML file and populates the provided
-  /// FunctionInfo vector.
-  ///
+  /// offsets to CachedHashStringRef.
+  /// \param StrTab A reference to a StringTableBuilder used for managing
+  /// looking up and creating new strings. \param StringStorage A reference to a
+  /// StringSet for storing the data for generated strings.
+  CallSiteInfoLoader(GsymCreator &GCreator) : GCreator(GCreator) {}
+
   /// This method reads the specified YAML file, parses its content, and updates
   /// the `Funcs` vector with call site information based on the YAML data.
   ///
@@ -121,37 +104,15 @@ class CallSiteInfoLoader {
   /// populated.
   /// \param YAMLFile A StringRef representing the path to the YAML
   /// file to be loaded.
-  ///
   /// \returns An `llvm::Error` indicating success or describing any issues
   /// encountered during the loading process.
   llvm::Error loadYAML(std::vector<FunctionInfo> &Funcs, StringRef YAMLFile);
 
 private:
-  /// Retrieves an existing string from the StringOffsetMap using the provided
-  /// offset.
-  ///
-  /// \param offset A 32-bit unsigned integer representing the offset of the
-  /// string.
-  ///
-  /// \returns A StringRef corresponding to the string for the given offset.
-  ///
-  /// \note This method asserts that the offset exists in the StringOffsetMap.
-  StringRef stringFromOffset(uint32_t offset) const;
-
-  /// Obtains the offset corresponding to a given string in the StrTab. If the
-  /// string does not already exist, it is created.
-  ///
-  /// \param str A StringRef representing the string for which the offset is
-  /// requested.
-  ///
-  /// \returns A 32-bit unsigned integer representing the offset of the string.
-  uint32_t offsetFromString(StringRef str);
-
   /// Builds a map from function names to FunctionInfo pointers based on the
   /// provided `Funcs` vector.
   ///
   /// \param Funcs A reference to a vector of FunctionInfo objects.
-  ///
   /// \returns A StringMap mapping function names (StringRef) to their
   /// corresponding FunctionInfo pointers.
   StringMap<FunctionInfo *> buildFunctionMap(std::vector<FunctionInfo> &Funcs);
@@ -162,21 +123,14 @@ class CallSiteInfoLoader {
   /// object containing parsed YAML data.
   /// \param FuncMap A reference to a StringMap mapping function names to
   /// FunctionInfo pointers.
-  ///
   /// \returns An `llvm::Error` indicating success or describing any issues
   /// encountered during processing.
   llvm::Error
   processYAMLFunctions(const llvm::yaml::FunctionsYAML &functionsYAML,
                        StringMap<FunctionInfo *> &FuncMap);
 
-  /// Map of existing string offsets to CachedHashStringRef.
-  DenseMap<uint64_t, CachedHashStringRef> &StringOffsetMap;
-
-  /// The gSYM string table builder.
-  StringTableBuilder &StrTab;
-
-  /// The gSYM string storage - we store generated strings here.
-  StringSet<> &StringStorage;
+  /// Reference to the parent Gsym Creator object.
+  GsymCreator &GCreator;
 };
 
 raw_ostream &operator<<(raw_ostream &OS, const CallSiteInfo &CSI);
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
index 9e5b3c1f8d92de..8e9e500f267c1f 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
@@ -329,6 +329,15 @@ class GsymCreator {
   /// \returns The unique 32 bit offset into the string table.
   uint32_t insertString(StringRef S, bool Copy = true);
 
+  /// Retrieve a string fromt he GSYM string table given its offset.
+  ///
+  /// The offset is assumed to be a valid offset into the string table.
+  /// otherwise an assert will be triggered.
+  ///
+  /// \param offset The offset of the string to retrieve, previously returned by
+  /// insertString. \returns The string at the given offset in the string table.
+  StringRef getString(uint32_t offset);
+
   /// Insert a file into this GSYM creator.
   ///
   /// Inserts a file by adding a FileEntry into the "Files" member variable if
diff --git a/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp b/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp
index 3fe7fb48cb0de3..5f8f622043bd8b 100644
--- a/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp
@@ -10,6 +10,7 @@
 #include "llvm/ADT/CachedHashString.h"
 #include "llvm/DebugInfo/GSYM/FileWriter.h"
 #include "llvm/DebugInfo/GSYM/FunctionInfo.h"
+#include "llvm/DebugInfo/GSYM/GsymCreator.h"
 #include "llvm/MC/StringTableBuilder.h"
 #include "llvm/Support/DataExtractor.h"
 #include "llvm/Support/YAMLParser.h"
@@ -23,17 +24,17 @@
 using namespace llvm;
 using namespace gsym;
 
-llvm::Error CallSiteInfo::encode(FileWriter &O) const {
+Error CallSiteInfo::encode(FileWriter &O) const {
   O.writeU64(ReturnAddress);
   O.writeU8(Flags);
   O.writeU32(MatchRegex.size());
   for (uint32_t Entry : MatchRegex)
     O.writeU32(Entry);
-  return llvm::Error::success();
+  return Error::success();
 }
 
-llvm::Expected<CallSiteInfo>
-CallSiteInfo::decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr) {
+Expected<CallSiteInfo> CallSiteInfo::decode(DataExtractor &Data,
+                                            uint64_t &Offset) {
   CallSiteInfo CSI;
 
   // Read ReturnAddress
@@ -68,17 +69,17 @@ CallSiteInfo::decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr) {
   return CSI;
 }
 
-llvm::Error CallSiteInfoCollection::encode(FileWriter &O) const {
+Error CallSiteInfoCollection::encode(FileWriter &O) const {
   O.writeU32(CallSites.size());
   for (const CallSiteInfo &CSI : CallSites) {
-    if (llvm::Error Err = CSI.encode(O))
+    if (Error Err = CSI.encode(O))
       return Err;
   }
-  return llvm::Error::success();
+  return Error::success();
 }
 
-llvm::Expected<CallSiteInfoCollection>
-CallSiteInfoCollection::decode(DataExtractor &Data, uint64_t BaseAddr) {
+Expected<CallSiteInfoCollection>
+CallSiteInfoCollection::decode(DataExtractor &Data) {
   CallSiteInfoCollection CSC;
   uint64_t Offset = 0;
 
@@ -91,8 +92,7 @@ CallSiteInfoCollection::decode(DataExtractor &Data, uint64_t BaseAddr) {
 
   CSC.CallSites.reserve(NumCallSites);
   for (uint32_t i = 0; i < NumCallSites; ++i) {
-    llvm::Expected<CallSiteInfo> ECSI =
-        CallSiteInfo::decode(Data, Offset, BaseAddr);
+    Expected<CallSiteInfo> ECSI = CallSiteInfo::decode(Data, Offset);
     if (!ECSI)
       return ECSI.takeError();
     CSC.CallSites.emplace_back(*ECSI);
@@ -108,7 +108,7 @@ namespace yaml {
 struct CallSiteYAML {
   // The offset of the return address of the call site - relative to the start
   // of the function.
-  llvm::yaml::Hex64 return_offset;
+  Hex64 return_offset;
   std::vector<std::string> match_regex;
   std::vector<std::string> flags;
 };
@@ -149,34 +149,22 @@ template <> struct MappingTraits<FunctionsYAML> {
 LLVM_YAML_IS_SEQUENCE_VECTOR(CallSiteYAML)
 LLVM_YAML_IS_SEQUENCE_VECTOR(FunctionYAML)
 
-// Implementation of CallSiteInfoLoader
-StringRef CallSiteInfoLoader::stringFromOffset(uint32_t offset) const {
-  assert(StringOffsetMap.count(offset) &&
-         "expected function name offset to already be in StringOffsetMap");
-  return StringOffsetMap.find(offset)->second.val();
-}
-
-uint32_t CallSiteInfoLoader::offsetFromString(StringRef str) {
-  return StrTab.add(StringStorage.insert(str).first->getKey());
-}
-
-llvm::Error CallSiteInfoLoader::loadYAML(std::vector<FunctionInfo> &Funcs,
-                                         StringRef YAMLFile) {
+Error CallSiteInfoLoader::loadYAML(std::vector<FunctionInfo> &Funcs,
+                                   StringRef YAMLFile) {
   // Step 1: Read YAML file
-  auto BufferOrError = llvm::MemoryBuffer::getFile(YAMLFile);
+  auto BufferOrError = MemoryBuffer::getFile(YAMLFile);
   if (!BufferOrError)
     return errorCodeToError(BufferOrError.getError());
 
-  std::unique_ptr<llvm::MemoryBuffer> Buffer = std::move(*BufferOrError);
+  std::unique_ptr<MemoryBuffer> Buffer = std::move(*BufferOrError);
 
   // Step 2: Parse YAML content
-  llvm::yaml::FunctionsYAML functionsYAML;
-  llvm::yaml::Input yin(Buffer->getMemBufferRef());
+  yaml::FunctionsYAML functionsYAML;
+  yaml::Input yin(Buffer->getMemBufferRef());
   yin >> functionsYAML;
-  if (yin.error()) {
-    return llvm::createStringError(yin.error(), "Error parsing YAML file: %s\n",
-                                   Buffer->getBufferIdentifier().str().c_str());
-  }
+  if (yin.error())
+    return createStringError(yin.error(), "Error parsing YAML file: %s\n",
+                             Buffer->getBufferIdentifier().str().c_str());
 
   // Step 3: Build function map from Funcs
   auto FuncMap = buildFunctionMap(Funcs);
@@ -189,7 +177,7 @@ StringMap<FunctionInfo *>
 CallSiteInfoLoader::buildFunctionMap(std::vector<FunctionInfo> &Funcs) {
   StringMap<FunctionInfo *> FuncMap;
   auto insertFunc = [&](auto &Function) {
-    std::string FuncName = stringFromOffset(Function.Name).str();
+    StringRef FuncName = GCreator.getString(Function.Name);
     // If the function name is already in the map, don't update it. This way we
     // preferentially use the first encountered function. Since symbols are
     // loaded from dSYM first, we end up preferring keeping track of symbols
@@ -208,19 +196,19 @@ CallSiteInfoLoader::buildFunctionMap(std::vector<FunctionInfo> &Funcs) {
   return FuncMap;
 }
 
-llvm::Error CallSiteInfoLoader::processYAMLFunctions(
-    const llvm::yaml::FunctionsYAML &functionsYAML,
+Error CallSiteInfoLoader::processYAMLFunctions(
+    const yaml::FunctionsYAML &functionsYAML,
     StringMap<FunctionInfo *> &FuncMap) {
   // For each function in the YAML file
   for (const auto &FuncYAML : functionsYAML.functions) {
-    auto it = FuncMap.find(FuncYAML.name);
-    if (it == FuncMap.end()) {
-      return llvm::createStringError(
+    auto It = FuncMap.find(FuncYAML.name);
+    if (It == FuncMap.end())
+      return createStringError(
           std::errc::invalid_argument,
           "Can't find function '%s' specified in callsite YAML\n",
           FuncYAML.name.c_str());
-    }
-    FunctionInfo *FuncInfo = it->second;
+
+    FunctionInfo *FuncInfo = It->second;
     // Create a CallSiteInfoCollection if not already present
     if (!FuncInfo->CallSites)
       FuncInfo->CallSites = CallSiteInfoCollection();
@@ -229,11 +217,11 @@ llvm::Error CallSiteInfoLoader::processYAMLFunctions(
       // Since YAML has specifies relative return offsets, add the function
       // start address to make the offset absolute.
       CSI.ReturnAddress = FuncInfo->Range.start() + CallSiteYAML.return_offset;
-      for (const auto &Regex : CallSiteYAML.match_regex)
-        CSI.MatchRegex.push_back(offsetFromString(Regex));
+      for (const auto &Regex : CallSiteYAML.match_regex) {
+        uint32_t StrOffset = GCreator.insertString(Regex);
+        CSI.MatchRegex.push_back(StrOffset);
+      }
 
-      // Initialize flags to None
-      CSI.Flags = CallSiteInfo::None;
       // Parse flags and combine them
       for (const auto &FlagStr : CallSiteYAML.flags) {
         if (FlagStr == "InternalCall") {
@@ -241,18 +229,18 @@ llvm::Error CallSiteInfoLoader::processYAMLFunctions(
         } else if (FlagStr == "ExternalCall") {
           CSI.Flags |= static_cast<uint8_t>(CallSiteInfo::ExternalCall);
         } else {
-          return llvm::createStringError(std::errc::invalid_argument,
-                                         "Unknown flag in callsite YAML: %s\n",
-                                         FlagStr.c_str());
+          return createStringError(std::errc::invalid_argument,
+                                   "Unknown flag in callsite YAML: %s\n",
+                                   FlagStr.c_str());
         }
       }
       FuncInfo->CallSites->CallSites.push_back(CSI);
     }
   }
-  return llvm::Error::success();
+  return Error::success();
 }
 
-raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const CallSiteInfo &CSI) {
+raw_ostream &gsym::operator<<(raw_ostream &OS, const CallSiteInfo &CSI) {
   OS << "  Return=" << HEX64(CSI.ReturnAddress);
   OS << "  Flags=" << HEX8(CSI.Flags);
 
@@ -265,8 +253,8 @@ raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const CallSiteInfo &CSI) {
   return OS;
 }
 
-raw_ostream &llvm::gsym::operator<<(raw_ostream &OS,
-                                    const CallSiteInfoCollection &CSIC) {
+raw_ostream &gsym::operator<<(raw_ostream &OS,
+                              const CallSiteInfoCollection &CSIC) {
   for (const auto &CS : CSIC.CallSites) {
     OS << CS;
     OS << "\n";
diff --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
index 9dc9c241168b26..facad95f8fdebc 100644
--- a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
@@ -100,7 +100,7 @@ llvm::Expected<FunctionInfo> FunctionInfo::decode(DataExtractor &Data,
 
       case InfoType::CallSiteInfo:
         if (Expected<llvm::gsym::CallSiteInfoCollection> CI =
-                llvm::gsym::CallSiteInfoCollection::decode(InfoData, BaseAddr))
+                llvm::gsym::CallSiteInfoCollection::decode(InfoData))
           FI.CallSites = std::move(CI.get());
         else
           return CI.takeError();
diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
index 0df84ee256aef9..3cabb27053a5c5 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
@@ -191,7 +191,7 @@ llvm::Error GsymCreator::encode(FileWriter &O) const {
 
 llvm::Error GsymCreator::loadCallSitesFromYAML(StringRef YAMLFile) {
   // Use the loader to load call site information from the YAML file.
-  CallSiteInfoLoader Loader(StringOffsetMap, StrTab, StringStorage);
+  CallSiteInfoLoader Loader(*this);
   return Loader.loadYAML(Funcs, YAMLFile);
 }
 
@@ -385,6 +385,12 @@ uint32_t GsymCreator::insertString(StringRef S, bool Copy) {
   return StrOff;
 }
 
+StringRef GsymCreator::getString(uint32_t offset) {
+  assert(StringOffsetMap.count(offset) &&
+         "GsymCreator::getString expects a valid offset as parameter.");
+  return StringOffsetMap.find(offset)->second.val();
+}
+
 void GsymCreator::addFunctionInfo(FunctionInfo &&FI) {
   std::lock_guard<std::mutex> Guard(Mutex);
   Funcs.emplace_back(std::move(FI));
diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
index bb107c17f6f96c..6c7506e51e649e 100644
--- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
+++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
@@ -97,7 +97,7 @@ static bool Quiet;
 static std::vector<uint64_t> LookupAddresses;
 static bool LookupAddressesFromStdin;
 static bool StoreMergedFunctionInfo = false;
-static std::vector<std::string> CallSiteYamlPaths;
+static std::string CallSiteYamlPath;
 
 static void parseArgs(int argc, char **argv) {
   GSYMUtilOptTable Tbl;
@@ -180,10 +180,9 @@ static void parseArgs(int argc, char **argv) {
   LookupAddressesFromStdin = Args.hasArg(OPT_addresses_from_stdin);
   StoreMergedFunctionInfo = Args.hasArg(OPT_merged_functions);
 
-  if (const llvm::opt::Arg *A = Args.getLastArg(OPT_callsites_from_yaml_EQ)) {
-    if (A->getValue() && A->getValue()[0] != '\0')
-      CallSiteYamlPaths.emplace_back(A->getValue());
-    else {
+  if (Args.hasArg(OPT_callsites_from_yaml_EQ)) {
+    CallSiteYamlPath = Args.getLastArgValue(OPT_callsites_from_yaml_EQ);
+    if (CallSiteYamlPath.empty()) {
       llvm::errs()
           << ToolName
           << ": --callsites-from-yaml option requires a non-empty argument.\n";
@@ -384,11 +383,9 @@ static llvm::Error handleObjectFile(ObjectFile &Obj, const std::string &OutFile,
     return Err;
 
   // If any call site YAML files were specified, load them now.
-  for (const auto &yamlPath : CallSiteYamlPaths) {
-    if (auto Err = Gsym.loadCallSitesFromYAML(yamlPath)) {
+  if (!CallSiteYamlPath.empty())
+    if (auto Err = Gsym.loadCallSitesFromYAML(CallSiteYamlPath))
       return Err;
-    }
-  }
 
   // Finalize the GSYM to make it ready to save to disk. This will remove
   // duplicate FunctionInfo entries where we might have found an entry from

>From 087cdea260ddcaec7b952ff523b5b662b49e18cb Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Fri, 27 Sep 2024 14:04:16 -0700
Subject: [PATCH 4/6] Address Feedback Nr 2.1

---
 llvm/lib/DebugInfo/GSYM/GsymReader.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
index 4f645714480e6d..786633adc51202 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
@@ -412,9 +412,8 @@ void GsymReader::dump(raw_ostream &OS, const FunctionInfo &FI,
     dump(OS, *FI.MergedFunctions);
   }
 
-  if (FI.CallSites) {
+  if (FI.CallSites)
     dump(OS, *FI.CallSites);
-  }
 }
 
 void GsymReader::dump(raw_ostream &OS, const MergedFunctionsInfo &MFI) {

>From d7cff1b16cb3edb26208b6cf6d5d2f53f01e34ef Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Thu, 17 Oct 2024 12:49:04 -0700
Subject: [PATCH 5/6] Address [Some] Feedback Nr 3 - Rest pending YAML format

---
 .../llvm/DebugInfo/GSYM/CallSiteInfo.h        | 19 +++++--------------
 llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp      |  6 +++---
 llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp      |  6 +++---
 3 files changed, 11 insertions(+), 20 deletions(-)

diff --git a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h
index 496679fdf9bc8f..8043779f31724c 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h
@@ -9,24 +9,17 @@
 #ifndef LLVM_DEBUGINFO_GSYM_CALLSITEINFO_H
 #define LLVM_DEBUGINFO_GSYM_CALLSITEINFO_H
 
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/BitmaskEnum.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSet.h"
-#include "llvm/DebugInfo/GSYM/ExtractRanges.h"
-#include "llvm/Support/YAMLParser.h"
-#include <string>
-#include <unordered_map>
+#include "llvm/Support/Error.h"
 #include <vector>
 
 namespace llvm {
 class DataExtractor;
 class raw_ostream;
-class StringTableBuilder;
-class CachedHashStringRef;
 
 namespace yaml {
-struct CallSiteYAML;
-struct FunctionYAML;
 struct FunctionsYAML;
 } // namespace yaml
 
@@ -43,6 +36,8 @@ struct CallSiteInfo {
     // This flag specifies that the call site can only call a function outside
     // the link unit that the call site is in.
     ExternalCall = 1 << 1,
+
+    LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue*/ ExternalCall),
   };
 
   /// The return address of the call site.
@@ -90,11 +85,7 @@ class CallSiteInfoLoader {
   /// Constructor that initializes the CallSiteInfoLoader with necessary data
   /// structures.
   ///
-  /// \param StringOffsetMap A reference to a DenseMap that maps existing string
-  /// offsets to CachedHashStringRef.
-  /// \param StrTab A reference to a StringTableBuilder used for managing
-  /// looking up and creating new strings. \param StringStorage A reference to a
-  /// StringSet for storing the data for generated strings.
+  /// \param GCreator A reference to the GsymCreator.
   CallSiteInfoLoader(GsymCreator &GCreator) : GCreator(GCreator) {}
 
   /// This method reads the specified YAML file, parses its content, and updates
diff --git a/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp b/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp
index 5f8f622043bd8b..6439aad4814f54 100644
--- a/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp
@@ -159,9 +159,9 @@ Error CallSiteInfoLoader::loadYAML(std::vector<FunctionInfo> &Funcs,
   std::unique_ptr<MemoryBuffer> Buffer = std::move(*BufferOrError);
 
   // Step 2: Parse YAML content
-  yaml::FunctionsYAML functionsYAML;
+  yaml::FunctionsYAML FuncsYAML;
   yaml::Input yin(Buffer->getMemBufferRef());
-  yin >> functionsYAML;
+  yin >> FuncsYAML;
   if (yin.error())
     return createStringError(yin.error(), "Error parsing YAML file: %s\n",
                              Buffer->getBufferIdentifier().str().c_str());
@@ -170,7 +170,7 @@ Error CallSiteInfoLoader::loadYAML(std::vector<FunctionInfo> &Funcs,
   auto FuncMap = buildFunctionMap(Funcs);
 
   // Step 4: Process parsed YAML functions and update FuncMap
-  return processYAMLFunctions(functionsYAML, FuncMap);
+  return processYAMLFunctions(FuncsYAML, FuncMap);
 }
 
 StringMap<FunctionInfo *>
diff --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
index facad95f8fdebc..dd754c701f6240 100644
--- a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
@@ -218,9 +218,9 @@ llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &Out,
     // writing the CallSites out with the number of bytes that were written.
     Out.writeU32(0);
     const auto StartOffset = Out.tell();
-    llvm::Error err = CallSites->encode(Out);
-    if (err)
-      return std::move(err);
+    Error Err = CallSites->encode(Out);
+    if (Err)
+      return std::move(Err);
     const auto Length = Out.tell() - StartOffset;
     if (Length > UINT32_MAX)
       return createStringError(std::errc::invalid_argument,

>From cc1d3144e48788b7466a4e7a6869bb984c298568 Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Wed, 13 Nov 2024 12:37:04 -0800
Subject: [PATCH 6/6] Switch to relative return offsets

---
 .../llvm/DebugInfo/GSYM/CallSiteInfo.h        | 11 +++--
 llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp      | 21 +++++-----
 llvm/lib/DebugInfo/GSYM/GsymReader.cpp        |  4 +-
 .../macho-gsym-callsite-info-obj.test         | 42 +++++++++----------
 4 files changed, 38 insertions(+), 40 deletions(-)

diff --git a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h
index 8043779f31724c..8a9b3e55531216 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h
@@ -40,8 +40,8 @@ struct CallSiteInfo {
     LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue*/ ExternalCall),
   };
 
-  /// The return address of the call site.
-  uint64_t ReturnAddress = 0;
+  /// The return offset of the call site - relative to the function start.
+  uint64_t ReturnOffset = 0;
 
   /// Offsets into the string table for function names regex patterns.
   std::vector<uint32_t> MatchRegex;
@@ -110,15 +110,14 @@ class CallSiteInfoLoader {
 
   /// Processes the parsed YAML functions and updates the `FuncMap` accordingly.
   ///
-  /// \param functionsYAML A constant reference to an llvm::yaml::FunctionsYAML
+  /// \param FuncYAMLs A constant reference to an llvm::yaml::FunctionsYAML
   /// object containing parsed YAML data.
   /// \param FuncMap A reference to a StringMap mapping function names to
   /// FunctionInfo pointers.
   /// \returns An `llvm::Error` indicating success or describing any issues
   /// encountered during processing.
-  llvm::Error
-  processYAMLFunctions(const llvm::yaml::FunctionsYAML &functionsYAML,
-                       StringMap<FunctionInfo *> &FuncMap);
+  llvm::Error processYAMLFunctions(const llvm::yaml::FunctionsYAML &FuncYAMLs,
+                                   StringMap<FunctionInfo *> &FuncMap);
 
   /// Reference to the parent Gsym Creator object.
   GsymCreator &GCreator;
diff --git a/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp b/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp
index 6439aad4814f54..af2a374c908bc8 100644
--- a/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp
@@ -25,7 +25,7 @@ using namespace llvm;
 using namespace gsym;
 
 Error CallSiteInfo::encode(FileWriter &O) const {
-  O.writeU64(ReturnAddress);
+  O.writeU64(ReturnOffset);
   O.writeU8(Flags);
   O.writeU32(MatchRegex.size());
   for (uint32_t Entry : MatchRegex)
@@ -37,11 +37,11 @@ Expected<CallSiteInfo> CallSiteInfo::decode(DataExtractor &Data,
                                             uint64_t &Offset) {
   CallSiteInfo CSI;
 
-  // Read ReturnAddress
+  // Read ReturnOffset
   if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(uint64_t)))
     return createStringError(std::errc::io_error,
-                             "0x%8.8" PRIx64 ": missing ReturnAddress", Offset);
-  CSI.ReturnAddress = Data.getU64(&Offset);
+                             "0x%8.8" PRIx64 ": missing ReturnOffset", Offset);
+  CSI.ReturnOffset = Data.getU64(&Offset);
 
   // Read Flags
   if (!Data.isValidOffsetForDataOfSize(Offset, sizeof(uint8_t)))
@@ -138,8 +138,8 @@ template <> struct MappingTraits<FunctionYAML> {
 };
 
 template <> struct MappingTraits<FunctionsYAML> {
-  static void mapping(IO &io, FunctionsYAML &functionsYAML) {
-    io.mapRequired("functions", functionsYAML.functions);
+  static void mapping(IO &io, FunctionsYAML &FuncYAMLs) {
+    io.mapRequired("functions", FuncYAMLs.functions);
   }
 };
 
@@ -197,10 +197,9 @@ CallSiteInfoLoader::buildFunctionMap(std::vector<FunctionInfo> &Funcs) {
 }
 
 Error CallSiteInfoLoader::processYAMLFunctions(
-    const yaml::FunctionsYAML &functionsYAML,
-    StringMap<FunctionInfo *> &FuncMap) {
+    const yaml::FunctionsYAML &FuncYAMLs, StringMap<FunctionInfo *> &FuncMap) {
   // For each function in the YAML file
-  for (const auto &FuncYAML : functionsYAML.functions) {
+  for (const auto &FuncYAML : FuncYAMLs.functions) {
     auto It = FuncMap.find(FuncYAML.name);
     if (It == FuncMap.end())
       return createStringError(
@@ -216,7 +215,7 @@ Error CallSiteInfoLoader::processYAMLFunctions(
       CallSiteInfo CSI;
       // Since YAML has specifies relative return offsets, add the function
       // start address to make the offset absolute.
-      CSI.ReturnAddress = FuncInfo->Range.start() + CallSiteYAML.return_offset;
+      CSI.ReturnOffset = CallSiteYAML.return_offset;
       for (const auto &Regex : CallSiteYAML.match_regex) {
         uint32_t StrOffset = GCreator.insertString(Regex);
         CSI.MatchRegex.push_back(StrOffset);
@@ -241,7 +240,7 @@ Error CallSiteInfoLoader::processYAMLFunctions(
 }
 
 raw_ostream &gsym::operator<<(raw_ostream &OS, const CallSiteInfo &CSI) {
-  OS << "  Return=" << HEX64(CSI.ReturnAddress);
+  OS << "  Return=" << HEX64(CSI.ReturnOffset);
   OS << "  Flags=" << HEX8(CSI.Flags);
 
   OS << "  RegEx=";
diff --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
index 786633adc51202..cd92488e8b9cbd 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
@@ -424,7 +424,7 @@ void GsymReader::dump(raw_ostream &OS, const MergedFunctionsInfo &MFI) {
 }
 
 void GsymReader::dump(raw_ostream &OS, const CallSiteInfo &CSI) {
-  OS << HEX64(CSI.ReturnAddress);
+  OS << HEX16(CSI.ReturnOffset);
 
   std::string Flags;
   auto addFlag = [&](const char *Flag) {
@@ -456,7 +456,7 @@ void GsymReader::dump(raw_ostream &OS, const CallSiteInfo &CSI) {
 }
 
 void GsymReader::dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC) {
-  OS << "CallSites (by return address):\n";
+  OS << "CallSites (by relative return offset):\n";
   for (const auto &CS : CSIC.CallSites) {
     OS.indent(2);
     dump(OS, CS);
diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-obj.test b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-obj.test
index 255b68a343b8db..61304558ba63f3 100644
--- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-obj.test
+++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-callsite-info-obj.test
@@ -8,39 +8,39 @@
 // RUN: llvm-gsymutil %t/call_sites_obj.gsym | FileCheck --check-prefix=CHECK-GSYM %s
 
 
-// CHECK-GSYM:      FunctionInfo @ 0x[[#%x,]]: [0x[[#%x,]] - 0x[[#%x,]]) "func_mainBin_dec_call_everything"
+// CHECK-GSYM:      FunctionInfo @ 0x[[#%x,FUNC_INFO:]]: [0x[[#%x,FUNC_START:]] - 0x[[#%x,FUNC_END:]]) "func_mainBin_dec_call_everything"
 // CHECK-GSYM-NEXT: LineTable:
                       // func_mainBin_dec_call_everything() {
-// CHECK-GSYM-NEXT:   0x[[#%x,]] {{.*}}/call_sites.cpp:16
+// CHECK-GSYM-NEXT:   0x[[#%x,ENTRY:]] {{.*}}/call_sites.cpp:16
                       // func_mainBin_dec_01();
-// CHECK-GSYM-NEXT:   0x[[ADDR_dec_01_call:[0-9a-f]+]] {{.*}}/call_sites.cpp:17
+// CHECK-GSYM-NEXT:   0x[[#%x,ADDR_DEC_01_CALL:]] {{.*}}/call_sites.cpp:17
                       // func_mainBin_dec_02();
-// CHECK-GSYM-NEXT:   0x[[ADDR_dec_02_call:[0-9a-f]+]] {{.*}}/call_sites.cpp:18
+// CHECK-GSYM-NEXT:   0x[[#%x,ADDR_DEC_02_CALL:]] {{.*}}/call_sites.cpp:18
                       // func_mainBin_dec_03();
-// CHECK-GSYM-NEXT:   [[ADDR_dec_03_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:19
+// CHECK-GSYM-NEXT:   0x[[#%x,ADDR_DEC_03_CALL:]] {{.*}}/call_sites.cpp:19
                       // func_mainBin_inc_01();
-// CHECK-GSYM-NEXT:   [[ADDR_inc_01_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:21
+// CHECK-GSYM-NEXT:   0x[[#%x,ADDR_INC_01_CALL:]] {{.*}}/call_sites.cpp:21
                       // func_mainBin_inc_02();
-// CHECK-GSYM-NEXT:   [[ADDR_inc_02_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:22
+// CHECK-GSYM-NEXT:   0x[[#%x,ADDR_INC_02_CALL:]] {{.*}}/call_sites.cpp:22
                       // func_mainBin_inc_03();
-// CHECK-GSYM-NEXT:   [[ADDR_inc_03_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:23
+// CHECK-GSYM-NEXT:   0x[[#%x,ADDR_INC_03_CALL:]] {{.*}}/call_sites.cpp:23
                       // g_func_ptr();
-// CHECK-GSYM-NEXT:   [[ADDR_func_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:25
+// CHECK-GSYM-NEXT:   0x[[#%x,ADDR_FUNC_CALL:]] {{.*}}/call_sites.cpp:25
                       // g_extern_func_ptr();
-// CHECK-GSYM-NEXT:   [[ADDR_extern_func_call:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:26
+// CHECK-GSYM-NEXT:   0x[[#%x,ADDR_EXTERN_FUNC_CALL:]] {{.*}}/call_sites.cpp:26
                       // g_volatile_var = 0;
-// CHECK-GSYM-NEXT:   [[ADDR_var_assign:0x[0-9a-f]+]] {{.*}}/call_sites.cpp:28
+// CHECK-GSYM-NEXT:   0x[[#%x,ADDR_VAR_ASSIGN:]] {{.*}}/call_sites.cpp:28
                       // }
-// CHECK-GSYM-NEXT:   [[#%x,]] {{.*}}/call_sites.cpp:29
-// CHECK-GSYM-NEXT: CallSites (by return address):
-// CHECK-GSYM-NEXT:   [[ADDR_dec_02_call]] Flags[InternalCall] MatchRegex[func_mainBin_dec_01]
-// CHECK-GSYM-NEXT:   [[ADDR_dec_03_call]] Flags[InternalCall] MatchRegex[func_mainBin_dec_02]
-// CHECK-GSYM-NEXT:   [[ADDR_inc_01_call]] Flags[InternalCall] MatchRegex[func_mainBin_dec_03]
-// CHECK-GSYM-NEXT:   [[ADDR_inc_02_call]] Flags[InternalCall] MatchRegex[func_mainBin_inc_01]
-// CHECK-GSYM-NEXT:   [[ADDR_inc_03_call]] Flags[InternalCall] MatchRegex[func_mainBin_inc_02]
-// CHECK-GSYM-NEXT:   [[ADDR_func_call]] Flags[InternalCall] MatchRegex[func_mainBin_inc_03]
-// CHECK-GSYM-NEXT:   [[ADDR_extern_func_call]] Flags[None] MatchRegex[.*func.*]
-// CHECK-GSYM-NEXT:   [[ADDR_var_assign]] Flags[ExternalCall] MatchRegex[.*extern_func.*]
+// CHECK-GSYM-NEXT:   0x[[#%x,]] {{.*}}/call_sites.cpp:29
+// CHECK-GSYM-NEXT: CallSites (by relative return offset):
+// CHECK-GSYM-NEXT:   0x[[#%.4x,sub(ADDR_DEC_02_CALL,FUNC_START)]] Flags[InternalCall] MatchRegex[func_mainBin_dec_01]
+// CHECK-GSYM-NEXT:   0x[[#%.4x,sub(ADDR_DEC_03_CALL,FUNC_START)]] Flags[InternalCall] MatchRegex[func_mainBin_dec_02]
+// CHECK-GSYM-NEXT:   0x[[#%.4x,sub(ADDR_INC_01_CALL,FUNC_START)]] Flags[InternalCall] MatchRegex[func_mainBin_dec_03]
+// CHECK-GSYM-NEXT:   0x[[#%.4x,sub(ADDR_INC_02_CALL,FUNC_START)]] Flags[InternalCall] MatchRegex[func_mainBin_inc_01]
+// CHECK-GSYM-NEXT:   0x[[#%.4x,sub(ADDR_INC_03_CALL,FUNC_START)]] Flags[InternalCall] MatchRegex[func_mainBin_inc_02]
+// CHECK-GSYM-NEXT:   0x[[#%.4x,sub(ADDR_FUNC_CALL,FUNC_START)]] Flags[InternalCall] MatchRegex[func_mainBin_inc_03]
+// CHECK-GSYM-NEXT:   0x[[#%.4x,sub(ADDR_EXTERN_FUNC_CALL,FUNC_START)]] Flags[None] MatchRegex[.*func.*]
+// CHECK-GSYM-NEXT:   0x[[#%.4x,sub(ADDR_VAR_ASSIGN,FUNC_START)]] Flags[ExternalCall] MatchRegex[.*extern_func.*]
 
 
 //--- callsites.yaml



More information about the llvm-commits mailing list