[Lldb-commits] [lldb] cf3524a - [lldb] Introduce new SymbolFileJSON and ObjectFileJSON

Jonas Devlieghere via lldb-commits lldb-commits at lists.llvm.org
Wed Mar 8 20:56:17 PST 2023


Author: Jonas Devlieghere
Date: 2023-03-08T20:56:11-08:00
New Revision: cf3524a5746f9498280b3a9180b75575c0065d1a

URL: https://github.com/llvm/llvm-project/commit/cf3524a5746f9498280b3a9180b75575c0065d1a
DIFF: https://github.com/llvm/llvm-project/commit/cf3524a5746f9498280b3a9180b75575c0065d1a.diff

LOG: [lldb] Introduce new SymbolFileJSON and ObjectFileJSON

Introduce a new object and symbol file format with the goal of mapping
addresses to symbol names. I'd like to think of is as an extremely
simple textual symtab. The file format consists of a triple, a UUID and
a list of symbols. JSON is used for the encoding, but that's mostly an
implementation detail. The goal of the format was to be simple and human
readable.

The new file format is motivated by two use cases:

 - Stripped binaries: when a binary is stripped, you lose the ability to
   do thing like setting symbolic breakpoints. You can keep the
   unstripped binary around, but if all you need is the stripped
   symbols then that's a lot of overhead. Instead, we could save the
   stripped symbols to a file and load them in the debugger when
   needed. I want to extend llvm-strip to have a mode where it emits
   this new file format.

 - Interactive crashlogs: with interactive crashlogs, if we don't have
   the binary or the dSYM for a particular module, we currently show an
   unnamed symbol for those frames. This is a regression compared to the
   textual format, that has these frames pre-symbolicated. Given that
   this information is available in the JSON crashlog, we need a way to
   tell LLDB about it. With the new symbol file format, we can easily
   synthesize a symbol file for each of those modules and load them to
   symbolicate those frames.

Here's an example of the file format:

 {
     "triple": "arm64-apple-macosx13.0.0",
     "uuid": "36D0CCE7-8ED2-3CA3-96B0-48C1764DA908",
     "symbols": [
         {
             "name": "main",
             "type": "code",
             "size": 32,
             "address": 4294983568
         },
         {
             "name": "foo",
             "type": "code",
             "size": 8,
             "address": 4294983560
         }
     ]
 }

Differential revision: https://reviews.llvm.org/D145180

Added: 
    lldb/source/Plugins/ObjectFile/JSON/CMakeLists.txt
    lldb/source/Plugins/ObjectFile/JSON/ObjectFileJSON.cpp
    lldb/source/Plugins/ObjectFile/JSON/ObjectFileJSON.h
    lldb/source/Plugins/SymbolFile/JSON/CMakeLists.txt
    lldb/source/Plugins/SymbolFile/JSON/SymbolFileJSON.cpp
    lldb/source/Plugins/SymbolFile/JSON/SymbolFileJSON.h
    lldb/test/API/macosx/symbols/Makefile
    lldb/test/API/macosx/symbols/TestSymbolFileJSON.py
    lldb/test/API/macosx/symbols/main.c
    lldb/unittests/Symbol/JSONSymbolTest.cpp

Modified: 
    lldb/include/lldb/Symbol/Symbol.h
    lldb/source/Plugins/ObjectFile/CMakeLists.txt
    lldb/source/Plugins/SymbolFile/CMakeLists.txt
    lldb/source/Symbol/Symbol.cpp
    lldb/unittests/Symbol/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/lldb/include/lldb/Symbol/Symbol.h b/lldb/include/lldb/Symbol/Symbol.h
index 164fafd437dcb..44a2d560010fe 100644
--- a/lldb/include/lldb/Symbol/Symbol.h
+++ b/lldb/include/lldb/Symbol/Symbol.h
@@ -11,12 +11,23 @@
 
 #include "lldb/Core/AddressRange.h"
 #include "lldb/Core/Mangled.h"
+#include "lldb/Core/Section.h"
 #include "lldb/Symbol/SymbolContextScope.h"
 #include "lldb/Utility/UserID.h"
 #include "lldb/lldb-private.h"
+#include "llvm/Support/JSON.h"
 
 namespace lldb_private {
 
+struct JSONSymbol {
+  std::optional<uint64_t> address;
+  std::optional<uint64_t> value;
+  std::optional<uint64_t> size;
+  std::optional<uint64_t> id;
+  std::optional<lldb::SymbolType> type;
+  std::string name;
+};
+
 class Symbol : public SymbolContextScope {
 public:
   // ObjectFile readers can classify their symbol table entries and searches
@@ -39,6 +50,9 @@ class Symbol : public SymbolContextScope {
 
   const Symbol &operator=(const Symbol &rhs);
 
+  static llvm::Expected<Symbol> FromJSON(const JSONSymbol &symbol,
+                                         SectionList *section_list);
+
   void Clear();
 
   bool Compare(ConstString name, lldb::SymbolType type) const;
@@ -187,7 +201,7 @@ class Symbol : public SymbolContextScope {
 
   bool IsWeak() const { return m_is_weak; }
 
-  void SetIsWeak (bool b) { m_is_weak = b; }
+  void SetIsWeak(bool b) { m_is_weak = b; }
 
   bool GetByteSizeIsValid() const { return m_size_is_valid; }
 
@@ -332,4 +346,16 @@ class Symbol : public SymbolContextScope {
 
 } // namespace lldb_private
 
+namespace llvm {
+namespace json {
+
+bool fromJSON(const llvm::json::Value &value, lldb_private::JSONSymbol &symbol,
+              llvm::json::Path path);
+
+bool fromJSON(const llvm::json::Value &value, lldb::SymbolType &type,
+              llvm::json::Path path);
+
+} // namespace json
+} // namespace llvm
+
 #endif // LLDB_SYMBOL_SYMBOL_H

diff  --git a/lldb/source/Plugins/ObjectFile/CMakeLists.txt b/lldb/source/Plugins/ObjectFile/CMakeLists.txt
index 042ea826aa3b1..5cc1240e0768b 100644
--- a/lldb/source/Plugins/ObjectFile/CMakeLists.txt
+++ b/lldb/source/Plugins/ObjectFile/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_subdirectory(Breakpad)
 add_subdirectory(ELF)
 add_subdirectory(JIT)
+add_subdirectory(JSON)
 add_subdirectory(Mach-O)
 add_subdirectory(Minidump)
 add_subdirectory(PDB)

diff  --git a/lldb/source/Plugins/ObjectFile/JSON/CMakeLists.txt b/lldb/source/Plugins/ObjectFile/JSON/CMakeLists.txt
new file mode 100644
index 0000000000000..73a8697d13d15
--- /dev/null
+++ b/lldb/source/Plugins/ObjectFile/JSON/CMakeLists.txt
@@ -0,0 +1,12 @@
+add_lldb_library(lldbPluginObjectFileJSON PLUGIN
+  ObjectFileJSON.cpp
+
+  LINK_LIBS
+    lldbCore
+    lldbHost
+    lldbSymbol
+    lldbUtility
+  LINK_COMPONENTS
+    Support
+    TargetParser
+  )

diff  --git a/lldb/source/Plugins/ObjectFile/JSON/ObjectFileJSON.cpp b/lldb/source/Plugins/ObjectFile/JSON/ObjectFileJSON.cpp
new file mode 100644
index 0000000000000..7cd836ab2460c
--- /dev/null
+++ b/lldb/source/Plugins/ObjectFile/JSON/ObjectFileJSON.cpp
@@ -0,0 +1,176 @@
+//===-- ObjectFileJSON.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Plugins/ObjectFile/JSON/ObjectFileJSON.h"
+#include "lldb/Core/Module.h"
+#include "lldb/Core/ModuleSpec.h"
+#include "lldb/Core/PluginManager.h"
+#include "lldb/Core/Section.h"
+#include "lldb/Symbol/Symbol.h"
+#include "lldb/Utility/LLDBLog.h"
+#include "lldb/Utility/Log.h"
+#include "llvm/ADT/DenseSet.h"
+#include <optional>
+
+using namespace llvm;
+using namespace lldb;
+using namespace lldb_private;
+
+LLDB_PLUGIN_DEFINE(ObjectFileJSON)
+
+char ObjectFileJSON::ID;
+
+void ObjectFileJSON::Initialize() {
+  PluginManager::RegisterPlugin(GetPluginNameStatic(),
+                                GetPluginDescriptionStatic(), CreateInstance,
+                                CreateMemoryInstance, GetModuleSpecifications);
+}
+
+void ObjectFileJSON::Terminate() {
+  PluginManager::UnregisterPlugin(CreateInstance);
+}
+
+ObjectFile *
+ObjectFileJSON::CreateInstance(const ModuleSP &module_sp, DataBufferSP data_sp,
+                               offset_t data_offset, const FileSpec *file,
+                               offset_t file_offset, offset_t length) {
+  if (!data_sp) {
+    data_sp = MapFileData(*file, length, file_offset);
+    if (!data_sp)
+      return nullptr;
+    data_offset = 0;
+  }
+
+  if (!MagicBytesMatch(data_sp, 0, data_sp->GetByteSize()))
+    return nullptr;
+
+  if (data_sp->GetByteSize() < length) {
+    data_sp = MapFileData(*file, length, file_offset);
+    if (!data_sp)
+      return nullptr;
+    data_offset = 0;
+  }
+
+  auto text =
+      llvm::StringRef(reinterpret_cast<const char *>(data_sp->GetBytes()));
+
+  Expected<json::Value> json = json::parse(text);
+  if (!json) {
+    llvm::consumeError(json.takeError());
+    return nullptr;
+  }
+
+  json::Path::Root root;
+  Header header;
+  if (!fromJSON(*json, header, root))
+    return nullptr;
+
+  ArchSpec arch(header.triple);
+  UUID uuid;
+  uuid.SetFromStringRef(header.uuid);
+
+  Body body;
+  fromJSON(*json, body, root);
+
+  return new ObjectFileJSON(module_sp, data_sp, data_offset, file, file_offset,
+                            length, std::move(arch), std::move(uuid),
+                            std::move(body.symbols));
+}
+
+ObjectFile *ObjectFileJSON::CreateMemoryInstance(const ModuleSP &module_sp,
+                                                 WritableDataBufferSP data_sp,
+                                                 const ProcessSP &process_sp,
+                                                 addr_t header_addr) {
+  return nullptr;
+}
+
+size_t ObjectFileJSON::GetModuleSpecifications(
+    const FileSpec &file, DataBufferSP &data_sp, offset_t data_offset,
+    offset_t file_offset, offset_t length, ModuleSpecList &specs) {
+
+  if (!MagicBytesMatch(data_sp, data_offset, data_sp->GetByteSize()))
+    return 0;
+
+  auto text =
+      llvm::StringRef(reinterpret_cast<const char *>(data_sp->GetBytes()));
+
+  Expected<json::Value> json = json::parse(text);
+  if (!json) {
+    llvm::consumeError(json.takeError());
+    return 0;
+  }
+
+  json::Path::Root root;
+  Header header;
+  if (!fromJSON(*json, header, root))
+    return 0;
+
+  ArchSpec arch(header.triple);
+  UUID uuid;
+  uuid.SetFromStringRef(header.uuid);
+
+  ModuleSpec spec(file, std::move(arch));
+  spec.GetUUID() = std::move(uuid);
+  specs.Append(spec);
+  return 1;
+}
+
+ObjectFileJSON::ObjectFileJSON(const ModuleSP &module_sp, DataBufferSP &data_sp,
+                               offset_t data_offset, const FileSpec *file,
+                               offset_t offset, offset_t length, ArchSpec arch,
+                               UUID uuid, std::vector<JSONSymbol> symbols)
+    : ObjectFile(module_sp, file, offset, length, data_sp, data_offset),
+      m_arch(std::move(arch)), m_uuid(std::move(uuid)),
+      m_symbols(std::move(symbols)) {}
+
+bool ObjectFileJSON::ParseHeader() {
+  // We already parsed the header during initialization.
+  return true;
+}
+
+void ObjectFileJSON::ParseSymtab(Symtab &symtab) {
+  Log *log = GetLog(LLDBLog::Symbols);
+  SectionList *section_list = GetModule()->GetSectionList();
+  for (JSONSymbol json_symbol : m_symbols) {
+    llvm::Expected<Symbol> symbol = Symbol::FromJSON(json_symbol, section_list);
+    if (!symbol) {
+      LLDB_LOG_ERROR(log, symbol.takeError(), "invalid symbol");
+      continue;
+    }
+    symtab.AddSymbol(*symbol);
+  }
+  symtab.Finalize();
+}
+
+void ObjectFileJSON::CreateSections(SectionList &unified_section_list) {}
+
+bool ObjectFileJSON::MagicBytesMatch(DataBufferSP data_sp,
+                                     lldb::addr_t data_offset,
+                                     lldb::addr_t data_length) {
+  DataExtractor data;
+  data.SetData(data_sp, data_offset, data_length);
+  lldb::offset_t offset = 0;
+  uint32_t magic = data.GetU8(&offset);
+  return magic == '{';
+}
+
+namespace lldb_private {
+
+bool fromJSON(const json::Value &value, ObjectFileJSON::Header &header,
+              json::Path path) {
+  json::ObjectMapper o(value, path);
+  return o && o.map("triple", header.triple) && o.map("uuid", header.uuid);
+}
+
+bool fromJSON(const json::Value &value, ObjectFileJSON::Body &body,
+              json::Path path) {
+  json::ObjectMapper o(value, path);
+  return o && o.map("symbols", body.symbols);
+}
+
+} // namespace lldb_private

diff  --git a/lldb/source/Plugins/ObjectFile/JSON/ObjectFileJSON.h b/lldb/source/Plugins/ObjectFile/JSON/ObjectFileJSON.h
new file mode 100644
index 0000000000000..cb3a7d2261800
--- /dev/null
+++ b/lldb/source/Plugins/ObjectFile/JSON/ObjectFileJSON.h
@@ -0,0 +1,119 @@
+//===-- ObjectFileJSON.h -------------------------------------- -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_SOURCE_PLUGINS_OBJECTFILE_JSON_OBJECTFILEJSON_H
+#define LLDB_SOURCE_PLUGINS_OBJECTFILE_JSON_OBJECTFILEJSON_H
+
+#include "lldb/Symbol/ObjectFile.h"
+#include "lldb/Utility/ArchSpec.h"
+#include "llvm/Support/JSON.h"
+
+namespace lldb_private {
+
+class ObjectFileJSON : public ObjectFile {
+public:
+  static void Initialize();
+  static void Terminate();
+
+  static llvm::StringRef GetPluginNameStatic() { return "JSON"; }
+
+  static const char *GetPluginDescriptionStatic() {
+    return "JSON object file reader.";
+  }
+
+  static ObjectFile *
+  CreateInstance(const lldb::ModuleSP &module_sp, lldb::DataBufferSP data_sp,
+                 lldb::offset_t data_offset, const FileSpec *file,
+                 lldb::offset_t file_offset, lldb::offset_t length);
+
+  static ObjectFile *CreateMemoryInstance(const lldb::ModuleSP &module_sp,
+                                          lldb::WritableDataBufferSP data_sp,
+                                          const lldb::ProcessSP &process_sp,
+                                          lldb::addr_t header_addr);
+
+  static size_t GetModuleSpecifications(const FileSpec &file,
+                                        lldb::DataBufferSP &data_sp,
+                                        lldb::offset_t data_offset,
+                                        lldb::offset_t file_offset,
+                                        lldb::offset_t length,
+                                        ModuleSpecList &specs);
+
+  llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
+
+  // LLVM RTTI support
+  static char ID;
+  bool isA(const void *ClassID) const override {
+    return ClassID == &ID || ObjectFile::isA(ClassID);
+  }
+  static bool classof(const ObjectFile *obj) { return obj->isA(&ID); }
+
+  bool ParseHeader() override;
+
+  lldb::ByteOrder GetByteOrder() const override {
+    return m_arch.GetByteOrder();
+  }
+
+  bool IsExecutable() const override { return false; }
+
+  uint32_t GetAddressByteSize() const override {
+    return m_arch.GetAddressByteSize();
+  }
+
+  AddressClass GetAddressClass(lldb::addr_t file_addr) override {
+    return AddressClass::eInvalid;
+  }
+
+  void ParseSymtab(lldb_private::Symtab &symtab) override;
+
+  bool IsStripped() override { return false; }
+
+  void CreateSections(SectionList &unified_section_list) override;
+
+  void Dump(Stream *s) override {}
+
+  ArchSpec GetArchitecture() override { return m_arch; }
+
+  UUID GetUUID() override { return m_uuid; }
+
+  uint32_t GetDependentModules(FileSpecList &files) override { return 0; }
+
+  Type CalculateType() override { return eTypeDebugInfo; }
+
+  Strata CalculateStrata() override { return eStrataUser; }
+
+  static bool MagicBytesMatch(lldb::DataBufferSP data_sp, lldb::addr_t offset,
+                              lldb::addr_t length);
+
+  struct Header {
+    std::string triple;
+    std::string uuid;
+  };
+
+  struct Body {
+    std::vector<JSONSymbol> symbols;
+  };
+
+private:
+  ArchSpec m_arch;
+  UUID m_uuid;
+  std::vector<JSONSymbol> m_symbols;
+
+  ObjectFileJSON(const lldb::ModuleSP &module_sp, lldb::DataBufferSP &data_sp,
+                 lldb::offset_t data_offset, const FileSpec *file,
+                 lldb::offset_t offset, lldb::offset_t length, ArchSpec arch,
+                 UUID uuid, std::vector<JSONSymbol> symbols);
+};
+
+bool fromJSON(const llvm::json::Value &value, ObjectFileJSON::Header &header,
+              llvm::json::Path path);
+
+bool fromJSON(const llvm::json::Value &value, ObjectFileJSON::Body &body,
+              llvm::json::Path path);
+
+} // namespace lldb_private
+#endif // LLDB_SOURCE_PLUGINS_OBJECTFILE_JSON_OBJECTFILEJSON_H

diff  --git a/lldb/source/Plugins/SymbolFile/CMakeLists.txt b/lldb/source/Plugins/SymbolFile/CMakeLists.txt
index 5e18fb154823a..76b09172bd206 100644
--- a/lldb/source/Plugins/SymbolFile/CMakeLists.txt
+++ b/lldb/source/Plugins/SymbolFile/CMakeLists.txt
@@ -1,5 +1,6 @@
 add_subdirectory(Breakpad)
 add_subdirectory(DWARF)
+add_subdirectory(JSON)
 add_subdirectory(NativePDB)
 add_subdirectory(PDB)
 add_subdirectory(Symtab)

diff  --git a/lldb/source/Plugins/SymbolFile/JSON/CMakeLists.txt b/lldb/source/Plugins/SymbolFile/JSON/CMakeLists.txt
new file mode 100644
index 0000000000000..7de8fef443976
--- /dev/null
+++ b/lldb/source/Plugins/SymbolFile/JSON/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_lldb_library(lldbPluginSymbolFileJSON PLUGIN
+  SymbolFileJSON.cpp
+
+  LINK_LIBS
+    lldbCore
+    lldbSymbol
+  )

diff  --git a/lldb/source/Plugins/SymbolFile/JSON/SymbolFileJSON.cpp b/lldb/source/Plugins/SymbolFile/JSON/SymbolFileJSON.cpp
new file mode 100644
index 0000000000000..f10ff59f3186d
--- /dev/null
+++ b/lldb/source/Plugins/SymbolFile/JSON/SymbolFileJSON.cpp
@@ -0,0 +1,105 @@
+//===-- SymbolFileJSON.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "SymbolFileJSON.h"
+
+#include "Plugins/ObjectFile/JSON/ObjectFileJSON.h"
+#include "lldb/Core/Module.h"
+#include "lldb/Core/PluginManager.h"
+#include "lldb/Symbol/CompileUnit.h"
+#include "lldb/Symbol/Function.h"
+#include "lldb/Symbol/ObjectFile.h"
+#include "lldb/Symbol/Symbol.h"
+#include "lldb/Symbol/SymbolContext.h"
+#include "lldb/Symbol/Symtab.h"
+#include "lldb/Symbol/TypeList.h"
+#include "lldb/Utility/LLDBLog.h"
+#include "lldb/Utility/Log.h"
+#include "lldb/Utility/RegularExpression.h"
+#include "lldb/Utility/Timer.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+#include <memory>
+#include <optional>
+
+using namespace llvm;
+using namespace lldb;
+using namespace lldb_private;
+
+LLDB_PLUGIN_DEFINE(SymbolFileJSON)
+
+char SymbolFileJSON::ID;
+
+SymbolFileJSON::SymbolFileJSON(lldb::ObjectFileSP objfile_sp)
+    : SymbolFileCommon(std::move(objfile_sp)) {}
+
+void SymbolFileJSON::Initialize() {
+  PluginManager::RegisterPlugin(GetPluginNameStatic(),
+                                GetPluginDescriptionStatic(), CreateInstance);
+}
+
+void SymbolFileJSON::Terminate() {
+  PluginManager::UnregisterPlugin(CreateInstance);
+}
+
+llvm::StringRef SymbolFileJSON::GetPluginDescriptionStatic() {
+  return "Reads debug symbols from a textual symbol table.";
+}
+
+SymbolFile *SymbolFileJSON::CreateInstance(ObjectFileSP objfile_sp) {
+  return new SymbolFileJSON(std::move(objfile_sp));
+}
+
+uint32_t SymbolFileJSON::CalculateAbilities() {
+  if (!m_objfile_sp || !llvm::isa<ObjectFileJSON>(*m_objfile_sp))
+    return 0;
+
+  return GlobalVariables | Functions;
+}
+
+uint32_t SymbolFileJSON::ResolveSymbolContext(const Address &so_addr,
+                                              SymbolContextItem resolve_scope,
+                                              SymbolContext &sc) {
+  std::lock_guard<std::recursive_mutex> guard(GetModuleMutex());
+  if (m_objfile_sp->GetSymtab() == nullptr)
+    return 0;
+
+  uint32_t resolved_flags = 0;
+  if (resolve_scope & eSymbolContextSymbol) {
+    sc.symbol = m_objfile_sp->GetSymtab()->FindSymbolContainingFileAddress(
+        so_addr.GetFileAddress());
+    if (sc.symbol)
+      resolved_flags |= eSymbolContextSymbol;
+  }
+  return resolved_flags;
+}
+
+CompUnitSP SymbolFileJSON::ParseCompileUnitAtIndex(uint32_t idx) { return {}; }
+
+void SymbolFileJSON::GetTypes(SymbolContextScope *sc_scope, TypeClass type_mask,
+                              lldb_private::TypeList &type_list) {}
+
+void SymbolFileJSON::AddSymbols(Symtab &symtab) {
+  if (!m_objfile_sp)
+    return;
+
+  Symtab *json_symtab = m_objfile_sp->GetSymtab();
+  if (!json_symtab)
+    return;
+
+  if (&symtab == json_symtab)
+    return;
+
+  // Merge the two symbol tables.
+  const size_t num_new_symbols = json_symtab->GetNumSymbols();
+  for (size_t i = 0; i < num_new_symbols; ++i) {
+    Symbol *s = json_symtab->SymbolAtIndex(i);
+    symtab.AddSymbol(*s);
+  }
+  symtab.Finalize();
+}

diff  --git a/lldb/source/Plugins/SymbolFile/JSON/SymbolFileJSON.h b/lldb/source/Plugins/SymbolFile/JSON/SymbolFileJSON.h
new file mode 100644
index 0000000000000..1fc41c739c027
--- /dev/null
+++ b/lldb/source/Plugins/SymbolFile/JSON/SymbolFileJSON.h
@@ -0,0 +1,110 @@
+//===-- SymbolFileJSON.h ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_SOURCE_PLUGINS_SYMBOLFILE_JSON_SYMBOLFILETEXT_H
+#define LLDB_SOURCE_PLUGINS_SYMBOLFILE_JSON_SYMBOLFILETEXT_H
+
+#include <map>
+#include <optional>
+#include <vector>
+
+#include "lldb/Symbol/CompileUnit.h"
+#include "lldb/Symbol/SymbolFile.h"
+
+namespace lldb_private {
+
+class SymbolFileJSON : public lldb_private::SymbolFileCommon {
+  /// LLVM RTTI support.
+  static char ID;
+
+public:
+  /// LLVM RTTI support.
+  /// \{
+  bool isA(const void *ClassID) const override {
+    return ClassID == &ID || SymbolFileCommon::isA(ClassID);
+  }
+  static bool classof(const SymbolFile *obj) { return obj->isA(&ID); }
+  /// \}
+
+  SymbolFileJSON(lldb::ObjectFileSP objfile_sp);
+
+  static void Initialize();
+
+  static void Terminate();
+
+  static llvm::StringRef GetPluginNameStatic() { return "text"; }
+
+  static llvm::StringRef GetPluginDescriptionStatic();
+
+  static lldb_private::SymbolFile *
+  CreateInstance(lldb::ObjectFileSP objfile_sp);
+
+  llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
+
+  uint32_t CalculateAbilities() override;
+
+  lldb::LanguageType ParseLanguage(CompileUnit &comp_unit) override {
+    return lldb::eLanguageTypeUnknown;
+  }
+
+  size_t ParseFunctions(CompileUnit &comp_unit) override { return 0; }
+
+  bool ParseLineTable(CompileUnit &comp_unit) override { return false; }
+
+  bool ParseDebugMacros(CompileUnit &comp_unit) override { return false; }
+
+  bool ParseSupportFiles(CompileUnit &comp_unit,
+                         FileSpecList &support_files) override {
+    return false;
+  }
+
+  size_t ParseTypes(CompileUnit &cu) override { return 0; }
+
+  bool ParseImportedModules(
+      const SymbolContext &sc,
+      std::vector<lldb_private::SourceModule> &imported_modules) override {
+    return false;
+  }
+
+  size_t ParseBlocksRecursive(Function &func) override { return 0; }
+
+  size_t ParseVariablesForContext(const SymbolContext &sc) override {
+    return 0;
+  }
+
+  uint32_t CalculateNumCompileUnits() override { return 0; }
+
+  lldb::CompUnitSP ParseCompileUnitAtIndex(uint32_t index) override;
+
+  Type *ResolveTypeUID(lldb::user_id_t type_uid) override { return nullptr; }
+  std::optional<ArrayInfo> GetDynamicArrayInfoForUID(
+      lldb::user_id_t type_uid,
+      const lldb_private::ExecutionContext *exe_ctx) override {
+    return std::nullopt;
+  }
+
+  bool CompleteType(CompilerType &compiler_type) override { return false; }
+
+  uint32_t ResolveSymbolContext(const lldb_private::Address &so_addr,
+                                lldb::SymbolContextItem resolve_scope,
+                                lldb_private::SymbolContext &sc) override;
+
+  void GetTypes(lldb_private::SymbolContextScope *sc_scope,
+                lldb::TypeClass type_mask,
+                lldb_private::TypeList &type_list) override;
+
+  void AddSymbols(Symtab &symtab) override;
+
+private:
+  lldb::addr_t GetBaseFileAddress();
+
+  std::vector<std::pair<uint64_t, std::string>> m_symbols;
+};
+} // namespace lldb_private
+
+#endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_JSON_SYMBOLFILETEXT_H

diff  --git a/lldb/source/Symbol/Symbol.cpp b/lldb/source/Symbol/Symbol.cpp
index 067b7b900d89d..ee42df9e4d469 100644
--- a/lldb/source/Symbol/Symbol.cpp
+++ b/lldb/source/Symbol/Symbol.cpp
@@ -19,6 +19,7 @@
 #include "lldb/Target/Target.h"
 #include "lldb/Utility/DataEncoder.h"
 #include "lldb/Utility/Stream.h"
+#include "llvm/ADT/StringSwitch.h"
 
 using namespace lldb;
 using namespace lldb_private;
@@ -95,6 +96,55 @@ const Symbol &Symbol::operator=(const Symbol &rhs) {
   return *this;
 }
 
+llvm::Expected<Symbol> Symbol::FromJSON(const JSONSymbol &symbol,
+                                        SectionList *section_list) {
+  if (!section_list)
+    return llvm::make_error<llvm::StringError>("no section list provided",
+                                               llvm::inconvertibleErrorCode());
+
+  if (!symbol.value && !symbol.address)
+    return llvm::make_error<llvm::StringError>(
+        "symbol must contain either a value or an address",
+        llvm::inconvertibleErrorCode());
+
+  if (symbol.value && symbol.address)
+    return llvm::make_error<llvm::StringError>(
+        "symbol cannot contain both a value and an address",
+        llvm::inconvertibleErrorCode());
+
+  const uint64_t size = symbol.size.value_or(0);
+  const bool is_artificial = false;
+  const bool is_trampoline = false;
+  const bool is_debug = false;
+  const bool external = false;
+  const bool size_is_valid = symbol.size.has_value();
+  const bool contains_linker_annotations = false;
+  const uint32_t flags = 0;
+
+  if (symbol.address) {
+    if (SectionSP section_sp =
+            section_list->FindSectionContainingFileAddress(*symbol.address)) {
+      const uint64_t offset = *symbol.address - section_sp->GetFileAddress();
+      return Symbol(symbol.id.value_or(0), Mangled(symbol.name),
+                    symbol.type.value_or(eSymbolTypeAny), external, is_debug,
+                    is_trampoline, is_artificial,
+                    AddressRange(section_sp, offset, size), size_is_valid,
+                    contains_linker_annotations, flags);
+    }
+    return llvm::make_error<llvm::StringError>(
+        llvm::formatv("no section found for address: {0:x}", *symbol.address),
+        llvm::inconvertibleErrorCode());
+  }
+
+  // Absolute symbols encode the integer value in the m_offset of the
+  // AddressRange object and the section is set to nothing.
+  return Symbol(symbol.id.value_or(0), Mangled(symbol.name),
+                symbol.type.value_or(eSymbolTypeAny), external, is_debug,
+                is_trampoline, is_artificial,
+                AddressRange(SectionSP(), *symbol.value, size), size_is_valid,
+                contains_linker_annotations, flags);
+}
+
 void Symbol::Clear() {
   m_uid = UINT32_MAX;
   m_mangled.Clear();
@@ -622,14 +672,14 @@ bool Symbol::Decode(const DataExtractor &data, lldb::offset_t *offset_ptr,
   const bool is_addr = data.GetU8(offset_ptr) != 0;
   const uint64_t value = data.GetU64(offset_ptr);
   if (is_addr) {
-    m_addr_range.GetBaseAddress().ResolveAddressUsingFileSections(
-        value, section_list);
+    m_addr_range.GetBaseAddress().ResolveAddressUsingFileSections(value,
+                                                                  section_list);
   } else {
     m_addr_range.GetBaseAddress().Clear();
     m_addr_range.GetBaseAddress().SetOffset(value);
   }
   m_addr_range.SetByteSize(data.GetU64(offset_ptr));
-  m_flags =  data.GetU32(offset_ptr);
+  m_flags = data.GetU32(offset_ptr);
   return true;
 }
 
@@ -723,3 +773,77 @@ bool Symbol::operator==(const Symbol &rhs) const {
     return false;
   return true;
 }
+
+namespace llvm {
+namespace json {
+
+bool fromJSON(const llvm::json::Value &value, lldb_private::JSONSymbol &symbol,
+              llvm::json::Path path) {
+  llvm::json::ObjectMapper o(value, path);
+  const bool mapped = o && o.map("value", symbol.value) &&
+                      o.map("address", symbol.address) &&
+                      o.map("size", symbol.size) && o.map("id", symbol.id) &&
+                      o.map("type", symbol.type) && o.map("name", symbol.name);
+
+  if (!mapped)
+    return false;
+
+  if (!symbol.value && !symbol.address) {
+    path.report("symbol must have either a value or an address");
+    return false;
+  }
+
+  if (symbol.value && symbol.address) {
+    path.report("symbol cannot have both a value and an address");
+    return false;
+  }
+
+  return true;
+}
+
+bool fromJSON(const llvm::json::Value &value, lldb::SymbolType &type,
+              llvm::json::Path path) {
+  if (auto str = value.getAsString()) {
+    type = llvm::StringSwitch<lldb::SymbolType>(*str)
+               .Case("absolute", eSymbolTypeAbsolute)
+               .Case("code", eSymbolTypeCode)
+               .Case("resolver", eSymbolTypeResolver)
+               .Case("data", eSymbolTypeData)
+               .Case("trampoline", eSymbolTypeTrampoline)
+               .Case("runtime", eSymbolTypeRuntime)
+               .Case("exception", eSymbolTypeException)
+               .Case("sourcefile", eSymbolTypeSourceFile)
+               .Case("headerfile", eSymbolTypeHeaderFile)
+               .Case("objectfile", eSymbolTypeObjectFile)
+               .Case("commonblock", eSymbolTypeCommonBlock)
+               .Case("block", eSymbolTypeBlock)
+               .Case("local", eSymbolTypeLocal)
+               .Case("param", eSymbolTypeParam)
+               .Case("variable", eSymbolTypeVariable)
+               .Case("variableType", eSymbolTypeVariableType)
+               .Case("lineentry", eSymbolTypeLineEntry)
+               .Case("lineheader", eSymbolTypeLineHeader)
+               .Case("scopebegin", eSymbolTypeScopeBegin)
+               .Case("scopeend", eSymbolTypeScopeEnd)
+               .Case("additional,", eSymbolTypeAdditional)
+               .Case("compiler", eSymbolTypeCompiler)
+               .Case("instrumentation", eSymbolTypeInstrumentation)
+               .Case("undefined", eSymbolTypeUndefined)
+               .Case("objcclass", eSymbolTypeObjCClass)
+               .Case("objcmetaClass", eSymbolTypeObjCMetaClass)
+               .Case("objcivar", eSymbolTypeObjCIVar)
+               .Case("reexporte", eSymbolTypeReExported)
+               .Default(eSymbolTypeInvalid);
+
+    if (type == eSymbolTypeInvalid) {
+      path.report("invalid symbol type");
+      return false;
+    }
+
+    return true;
+  }
+  path.report("expected string");
+  return false;
+}
+} // namespace json
+} // namespace llvm

diff  --git a/lldb/test/API/macosx/symbols/Makefile b/lldb/test/API/macosx/symbols/Makefile
new file mode 100644
index 0000000000000..aff841c364299
--- /dev/null
+++ b/lldb/test/API/macosx/symbols/Makefile
@@ -0,0 +1,8 @@
+C_SOURCES := main.c
+
+all: stripped.out
+
+stripped.out : a.out
+	strip a.out -o stripped.out
+
+include Makefile.rules

diff  --git a/lldb/test/API/macosx/symbols/TestSymbolFileJSON.py b/lldb/test/API/macosx/symbols/TestSymbolFileJSON.py
new file mode 100644
index 0000000000000..31814ba5c85e2
--- /dev/null
+++ b/lldb/test/API/macosx/symbols/TestSymbolFileJSON.py
@@ -0,0 +1,103 @@
+""" Testing symbol loading via JSON file. """
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+
+class TargetSymbolsFileJSON(TestBase):
+
+    def setUp(self):
+        TestBase.setUp(self)
+        self.source = 'main.c'
+
+    @no_debug_info_test
+    def test_symbol_file_json_address(self):
+        """Test that 'target symbols add' can load the symbols from a JSON file using file addresses."""
+
+        self.build()
+        stripped = self.getBuildArtifact("stripped.out")
+        unstripped = self.getBuildArtifact("a.out")
+
+        # Create a JSON symbol file from the unstripped target.
+        unstripped_target = self.dbg.CreateTarget(unstripped)
+        self.assertTrue(unstripped_target, VALID_TARGET)
+
+        unstripped_module = unstripped_target.GetModuleAtIndex(0)
+        main_symbol = unstripped_module.FindSymbol("main")
+        foo_symbol = unstripped_module.FindSymbol("foo")
+
+        data = {
+            "triple": unstripped_module.GetTriple(),
+            "uuid": unstripped_module.GetUUIDString(),
+            "symbols": list()
+        }
+        data['symbols'].append({
+            "name": "main",
+            "type": "code",
+            "size": main_symbol.GetSize(),
+            "address": main_symbol.addr.GetFileAddress(),
+        })
+        data['symbols'].append({
+            "name": "foo",
+            "type": "code",
+            "size": foo_symbol.GetSize(),
+            "address": foo_symbol.addr.GetFileAddress(),
+        })
+        data['symbols'].append({
+            "name": "bar",
+            "type": "code",
+            "size": 0,
+            "value": 0xFF,
+        })
+
+        json_object = json.dumps(data, indent=4)
+        json_symbol_file = self.getBuildArtifact("a.json")
+        with open(json_symbol_file, "w") as outfile:
+            outfile.write(json_object)
+
+        # Create a stripped target.
+        stripped_target = self.dbg.CreateTarget(stripped)
+        self.assertTrue(stripped_target, VALID_TARGET)
+
+        # Ensure there's no symbol for main and foo.
+        stripped_module = stripped_target.GetModuleAtIndex(0)
+        self.assertFalse(stripped_module.FindSymbol("main").IsValid())
+        self.assertFalse(stripped_module.FindSymbol("foo").IsValid())
+        self.assertFalse(stripped_module.FindSymbol("bar").IsValid())
+
+        main_bp = stripped_target.BreakpointCreateByName(
+            "main", "stripped.out")
+        self.assertTrue(main_bp, VALID_BREAKPOINT)
+        self.assertEqual(main_bp.num_locations, 0)
+
+        # Load the JSON symbol file.
+        self.runCmd("target symbols add -s %s %s" %
+                    (stripped, self.getBuildArtifact("a.json")))
+
+        stripped_main_symbol = stripped_module.FindSymbol("main")
+        stripped_foo_symbol = stripped_module.FindSymbol("foo")
+        stripped_bar_symbol = stripped_module.FindSymbol("bar")
+
+        # Ensure main and foo are available now.
+        self.assertTrue(stripped_main_symbol.IsValid())
+        self.assertTrue(stripped_foo_symbol.IsValid())
+        self.assertTrue(stripped_bar_symbol.IsValid())
+        self.assertEqual(main_bp.num_locations, 1)
+
+        # Ensure the file address matches between the stripped and unstripped target.
+        self.assertEqual(stripped_main_symbol.addr.GetFileAddress(),
+                         main_symbol.addr.GetFileAddress())
+        self.assertEqual(stripped_main_symbol.addr.GetFileAddress(),
+                         main_symbol.addr.GetFileAddress())
+
+        # Ensure the size matches.
+        self.assertEqual(stripped_main_symbol.GetSize(), main_symbol.GetSize())
+        self.assertEqual(stripped_main_symbol.GetSize(), main_symbol.GetSize())
+
+        # Ensure the type matches.
+        self.assertEqual(stripped_main_symbol.GetType(), main_symbol.GetType())
+        self.assertEqual(stripped_main_symbol.GetType(), main_symbol.GetType())
+
+        # Ensure the bar symbol has a fixed value of 10.
+        self.assertEqual(stripped_bar_symbol.GetValue(), 0xFF);

diff  --git a/lldb/test/API/macosx/symbols/main.c b/lldb/test/API/macosx/symbols/main.c
new file mode 100644
index 0000000000000..88653f66d7153
--- /dev/null
+++ b/lldb/test/API/macosx/symbols/main.c
@@ -0,0 +1,2 @@
+int foo() { return 1; }
+int main() { return foo(); }

diff  --git a/lldb/unittests/Symbol/CMakeLists.txt b/lldb/unittests/Symbol/CMakeLists.txt
index 60c0a9bdcf467..e1d24357e33db 100644
--- a/lldb/unittests/Symbol/CMakeLists.txt
+++ b/lldb/unittests/Symbol/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_lldb_unittest(SymbolTests
+  JSONSymbolTest.cpp
   LocateSymbolFileTest.cpp
   MangledTest.cpp
   PostfixExpressionTest.cpp

diff  --git a/lldb/unittests/Symbol/JSONSymbolTest.cpp b/lldb/unittests/Symbol/JSONSymbolTest.cpp
new file mode 100644
index 0000000000000..76c34b89f902f
--- /dev/null
+++ b/lldb/unittests/Symbol/JSONSymbolTest.cpp
@@ -0,0 +1,192 @@
+//===-- JSONSymbolTest.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "lldb/Core/Section.h"
+#include "lldb/Symbol/Symbol.h"
+#include "llvm/Testing/Support/Error.h"
+
+#include "gtest/gtest.h"
+
+using namespace lldb;
+using namespace llvm;
+using namespace lldb_private;
+
+static std::string g_error_no_section_list = "no section list provided";
+static std::string g_error_both_value_and_address =
+    "symbol cannot contain both a value and an address";
+static std::string g_error_neither_value_or_address =
+    "symbol must contain either a value or an address";
+
+TEST(JSONSymbolTest, DeserializeCodeAddress) {
+  std::string text = R"(
+{
+  "name": "foo",
+  "type": "code",
+  "size": 32,
+  "address": 4096
+})";
+
+  Expected<json::Value> json = json::parse(text);
+  ASSERT_TRUE(static_cast<bool>(json));
+
+  json::Path::Root root;
+  JSONSymbol json_symbol;
+  ASSERT_TRUE(fromJSON(*json, json_symbol, root));
+
+  SectionSP sect_sp(new Section(
+      /*module_sp=*/ModuleSP(),
+      /*obj_file=*/nullptr,
+      /*sect_id=*/1,
+      /*name=*/ConstString(".text"),
+      /*sect_type=*/eSectionTypeCode,
+      /*file_vm_addr=*/0x1000,
+      /*vm_size=*/0x1000,
+      /*file_offset=*/0,
+      /*file_size=*/0,
+      /*log2align=*/5,
+      /*flags=*/0x10203040));
+  SectionList sect_list;
+  sect_list.AddSection(sect_sp);
+
+  Expected<Symbol> symbol = Symbol::FromJSON(json_symbol, &sect_list);
+  EXPECT_THAT_EXPECTED(symbol, llvm::Succeeded());
+  EXPECT_EQ(symbol->GetName(), ConstString("foo"));
+  EXPECT_EQ(symbol->GetFileAddress(), static_cast<lldb::addr_t>(0x1000));
+  EXPECT_EQ(symbol->GetType(), eSymbolTypeCode);
+}
+
+TEST(JSONSymbolTest, DeserializeCodeValue) {
+  std::string text = R"(
+{
+  "name": "foo",
+  "type": "code",
+  "size": 32,
+  "value": 4096
+})";
+
+  Expected<json::Value> json = json::parse(text);
+  EXPECT_THAT_EXPECTED(json, llvm::Succeeded());
+
+  json::Path::Root root;
+  JSONSymbol json_symbol;
+  ASSERT_TRUE(fromJSON(*json, json_symbol, root));
+
+  SectionList sect_list;
+
+  Expected<Symbol> symbol = Symbol::FromJSON(json_symbol, &sect_list);
+  EXPECT_THAT_EXPECTED(symbol, llvm::Succeeded());
+  EXPECT_EQ(symbol->GetName(), ConstString("foo"));
+  EXPECT_EQ(symbol->GetRawValue(), static_cast<lldb::addr_t>(0x1000));
+  EXPECT_EQ(symbol->GetType(), eSymbolTypeCode);
+}
+
+TEST(JSONSymbolTest, JSONInvalidValueAndAddress) {
+  std::string text = R"(
+{
+  "name": "foo",
+  "type": "code",
+  "size": 32,
+  "value": 4096,
+  "address": 4096
+})";
+
+  Expected<json::Value> json = json::parse(text);
+  EXPECT_THAT_EXPECTED(json, llvm::Succeeded());
+
+  json::Path::Root root;
+  JSONSymbol json_symbol;
+  ASSERT_FALSE(fromJSON(*json, json_symbol, root));
+}
+
+TEST(JSONSymbolTest, JSONInvalidNoValueOrAddress) {
+  std::string text = R"(
+{
+  "name": "foo",
+  "type": "code",
+  "size": 32
+})";
+
+  Expected<json::Value> json = json::parse(text);
+  EXPECT_THAT_EXPECTED(json, llvm::Succeeded());
+
+  json::Path::Root root;
+  JSONSymbol json_symbol;
+  ASSERT_FALSE(fromJSON(*json, json_symbol, root));
+}
+
+TEST(JSONSymbolTest, JSONInvalidType) {
+  std::string text = R"(
+{
+  "name": "foo",
+  "type": "bogus",
+  "value": 4096,
+  "size": 32
+})";
+
+  Expected<json::Value> json = json::parse(text);
+  EXPECT_THAT_EXPECTED(json, llvm::Succeeded());
+
+  json::Path::Root root;
+  JSONSymbol json_symbol;
+  ASSERT_FALSE(fromJSON(*json, json_symbol, root));
+}
+
+TEST(JSONSymbolTest, SymbolInvalidNoSectionList) {
+  JSONSymbol json_symbol;
+  json_symbol.value = 0x1;
+
+  Expected<Symbol> symbol = Symbol::FromJSON(json_symbol, nullptr);
+  EXPECT_THAT_EXPECTED(symbol,
+                       llvm::FailedWithMessage(g_error_no_section_list));
+}
+
+TEST(JSONSymbolTest, SymbolInvalidValueAndAddress) {
+  JSONSymbol json_symbol;
+  json_symbol.value = 0x1;
+  json_symbol.address = 0x2;
+
+  SectionList sect_list;
+
+  Expected<Symbol> symbol = Symbol::FromJSON(json_symbol, &sect_list);
+  EXPECT_THAT_EXPECTED(symbol,
+                       llvm::FailedWithMessage(g_error_both_value_and_address));
+}
+
+TEST(JSONSymbolTest, SymbolInvalidNoValueOrAddress) {
+  JSONSymbol json_symbol;
+
+  SectionList sect_list;
+
+  Expected<Symbol> symbol = Symbol::FromJSON(json_symbol, &sect_list);
+  EXPECT_THAT_EXPECTED(
+      symbol, llvm::FailedWithMessage(g_error_neither_value_or_address));
+}
+
+TEST(JSONSymbolTest, SymbolInvalidAddressNotInSection) {
+  JSONSymbol json_symbol;
+  json_symbol.address = 0x0fff;
+
+  SectionSP sect_sp(new Section(
+      /*module_sp=*/ModuleSP(),
+      /*obj_file=*/nullptr,
+      /*sect_id=*/1,
+      /*name=*/ConstString(".text"),
+      /*sect_type=*/eSectionTypeCode,
+      /*file_vm_addr=*/0x1000,
+      /*vm_size=*/0x1000,
+      /*file_offset=*/0,
+      /*file_size=*/0,
+      /*log2align=*/5,
+      /*flags=*/0x10203040));
+  SectionList sect_list;
+  sect_list.AddSection(sect_sp);
+
+  Expected<Symbol> symbol = Symbol::FromJSON(json_symbol, &sect_list);
+  EXPECT_THAT_EXPECTED(
+      symbol, llvm::FailedWithMessage("no section found for address: 0xfff"));
+}


        


More information about the lldb-commits mailing list