[Lldb-commits] [lldb] d4a141e - Switch over to using the LLVM archive parser for BSD archives.

Greg Clayton via lldb-commits lldb-commits at lists.llvm.org
Tue Sep 5 16:54:15 PDT 2023


Author: Greg Clayton
Date: 2023-09-05T16:54:05-07:00
New Revision: d4a141ef932a596df61581090b70d0b546de68b2

URL: https://github.com/llvm/llvm-project/commit/d4a141ef932a596df61581090b70d0b546de68b2
DIFF: https://github.com/llvm/llvm-project/commit/d4a141ef932a596df61581090b70d0b546de68b2.diff

LOG: Switch over to using the LLVM archive parser for BSD archives.

Our LLDB parser didn't correctly handle archives of all flavors on different systems, it currently only correctly handled BSD archives, normal and thin, on macOS, but I noticed that it was getting incorrect information when decoding a variety of archives on linux. There were subtle changes to how names were encoded that we didn't handle correctly and we also didn't set the result of GetObjectSize() correctly as there was some bad math. This didn't matter when exracting .o files from .a files for LLDB because the size was always way too big, but it was big enough to at least read enough bytes for each object within the archive.

This patch does the following:
- switch over to use LLVM's archive parser and avoids previous code duplication
- remove values from ObjectContainerBSDArchive::Object that we don't use like:
  - uid
  - gid
  - mode
- fix ths ObjectContainerBSDArchive::Object::file_size value to be correct
- adds tests to test that we get the correct module specifications

Differential Revision: https://reviews.llvm.org/D159408

Added: 
    

Modified: 
    lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.cpp
    lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.h
    lldb/test/API/functionalities/archives/TestBSDArchives.py
    lldb/test/API/functionalities/archives/b.c
    lldb/test/API/functionalities/archives/c.c

Removed: 
    


################################################################################
diff  --git a/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.cpp b/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.cpp
index 51b2535421a6a9d..7aa5b8d81890aee 100644
--- a/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.cpp
+++ b/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.cpp
@@ -10,7 +10,6 @@
 
 #if defined(_WIN32) || defined(__ANDROID__)
 // Defines from ar, missing on Windows
-#define ARMAG "!<arch>\n"
 #define SARMAG 8
 #define ARFMAG "`\n"
 
@@ -32,9 +31,11 @@ typedef struct ar_hdr {
 #include "lldb/Host/FileSystem.h"
 #include "lldb/Symbol/ObjectFile.h"
 #include "lldb/Utility/ArchSpec.h"
+#include "lldb/Utility/LLDBLog.h"
 #include "lldb/Utility/Stream.h"
 #include "lldb/Utility/Timer.h"
 
+#include "llvm/Object/Archive.h"
 #include "llvm/Support/MemoryBuffer.h"
 
 using namespace lldb;
@@ -49,158 +50,19 @@ ObjectContainerBSDArchive::Object::Object() : ar_name() {}
 void ObjectContainerBSDArchive::Object::Clear() {
   ar_name.Clear();
   modification_time = 0;
-  uid = 0;
-  gid = 0;
-  mode = 0;
   size = 0;
   file_offset = 0;
   file_size = 0;
 }
 
-lldb::offset_t ObjectContainerBSDArchive::Object::ExtractFromThin(
-    const DataExtractor &data, lldb::offset_t offset,
-    llvm::StringRef stringTable) {
-  size_t ar_name_len = 0;
-  std::string str;
-  char *err;
-
-  // File header
-  //
-  // The common format is as follows.
-  //
-  //  Offset  Length	Name            Format
-  //  0       16      File name       ASCII right padded with spaces (no spaces
-  //  allowed in file name)
-  //  16      12      File mod        Decimal as cstring right padded with
-  //  spaces
-  //  28      6       Owner ID        Decimal as cstring right padded with
-  //  spaces
-  //  34      6       Group ID        Decimal as cstring right padded with
-  //  spaces
-  //  40      8       File mode       Octal   as cstring right padded with
-  //  spaces
-  //  48      10      File byte size  Decimal as cstring right padded with
-  //  spaces
-  //  58      2       File magic      0x60 0x0A
-
-  // Make sure there is enough data for the file header and bail if not
-  if (!data.ValidOffsetForDataOfSize(offset, 60))
-    return LLDB_INVALID_OFFSET;
-
-  str.assign((const char *)data.GetData(&offset, 16), 16);
-  if (!(llvm::StringRef(str).startswith("//") || stringTable.empty())) {
-    // Strip off any trailing spaces.
-    const size_t last_pos = str.find_last_not_of(' ');
-    if (last_pos != std::string::npos) {
-      if (last_pos + 1 < 16)
-        str.erase(last_pos + 1);
-    }
-    int start = strtoul(str.c_str() + 1, &err, 10);
-    int end = stringTable.find('\n', start);
-    str.assign(stringTable.data() + start, end - start - 1);
-    ar_name.SetCString(str.c_str());
-  }
-
-  str.assign((const char *)data.GetData(&offset, 12), 12);
-  modification_time = strtoul(str.c_str(), &err, 10);
-
-  str.assign((const char *)data.GetData(&offset, 6), 6);
-  uid = strtoul(str.c_str(), &err, 10);
-
-  str.assign((const char *)data.GetData(&offset, 6), 6);
-  gid = strtoul(str.c_str(), &err, 10);
-
-  str.assign((const char *)data.GetData(&offset, 8), 8);
-  mode = strtoul(str.c_str(), &err, 8);
-
-  str.assign((const char *)data.GetData(&offset, 10), 10);
-  size = strtoul(str.c_str(), &err, 10);
-
-  str.assign((const char *)data.GetData(&offset, 2), 2);
-  if (str == ARFMAG) {
-    file_offset = offset;
-    file_size = size - ar_name_len;
-    return offset;
-  }
-  return LLDB_INVALID_OFFSET;
-}
-
-lldb::offset_t
-ObjectContainerBSDArchive::Object::Extract(const DataExtractor &data,
-                                           lldb::offset_t offset) {
-  size_t ar_name_len = 0;
-  std::string str;
-  char *err;
-
-  // File header
-  //
-  // The common format is as follows.
-  //
-  //  Offset  Length	Name            Format
-  //  0       16      File name       ASCII right padded with spaces (no spaces
-  //  allowed in file name)
-  //  16      12      File mod        Decimal as cstring right padded with
-  //  spaces
-  //  28      6       Owner ID        Decimal as cstring right padded with
-  //  spaces
-  //  34      6       Group ID        Decimal as cstring right padded with
-  //  spaces
-  //  40      8       File mode       Octal   as cstring right padded with
-  //  spaces
-  //  48      10      File byte size  Decimal as cstring right padded with
-  //  spaces
-  //  58      2       File magic      0x60 0x0A
-
-  // Make sure there is enough data for the file header and bail if not
-  if (!data.ValidOffsetForDataOfSize(offset, 60))
-    return LLDB_INVALID_OFFSET;
-
-  str.assign((const char *)data.GetData(&offset, 16), 16);
-  if (llvm::StringRef(str).startswith("#1/")) {
-    // If the name is longer than 16 bytes, or contains an embedded space then
-    // it will use this format where the length of the name is here and the
-    // name characters are after this header.
-    ar_name_len = strtoul(str.c_str() + 3, &err, 10);
-  } else {
-    // Strip off any trailing spaces.
-    const size_t last_pos = str.find_last_not_of(' ');
-    if (last_pos != std::string::npos) {
-      if (last_pos + 1 < 16)
-        str.erase(last_pos + 1);
-    }
-    ar_name.SetCString(str.c_str());
-  }
-
-  str.assign((const char *)data.GetData(&offset, 12), 12);
-  modification_time = strtoul(str.c_str(), &err, 10);
-
-  str.assign((const char *)data.GetData(&offset, 6), 6);
-  uid = strtoul(str.c_str(), &err, 10);
-
-  str.assign((const char *)data.GetData(&offset, 6), 6);
-  gid = strtoul(str.c_str(), &err, 10);
-
-  str.assign((const char *)data.GetData(&offset, 8), 8);
-  mode = strtoul(str.c_str(), &err, 8);
-
-  str.assign((const char *)data.GetData(&offset, 10), 10);
-  size = strtoul(str.c_str(), &err, 10);
-
-  str.assign((const char *)data.GetData(&offset, 2), 2);
-  if (str == ARFMAG) {
-    if (ar_name_len > 0) {
-      const void *ar_name_ptr = data.GetData(&offset, ar_name_len);
-      // Make sure there was enough data for the string value and bail if not
-      if (ar_name_ptr == nullptr)
-        return LLDB_INVALID_OFFSET;
-      str.assign((const char *)ar_name_ptr, ar_name_len);
-      ar_name.SetCString(str.c_str());
-    }
-    file_offset = offset;
-    file_size = size - ar_name_len;
-    return offset;
-  }
-  return LLDB_INVALID_OFFSET;
+void ObjectContainerBSDArchive::Object::Dump() const {
+  printf("name        = \"%s\"\n", ar_name.GetCString());
+  printf("mtime       = 0x%8.8" PRIx32 "\n", modification_time);
+  printf("size        = 0x%8.8" PRIx32 " (%" PRIu32 ")\n", size, size);
+  printf("file_offset = 0x%16.16" PRIx64 " (%" PRIu64 ")\n", file_offset,
+         file_offset);
+  printf("file_size   = 0x%16.16" PRIx64 " (%" PRIu64 ")\n\n", file_size,
+         file_size);
 }
 
 ObjectContainerBSDArchive::Archive::Archive(const lldb_private::ArchSpec &arch,
@@ -211,72 +73,79 @@ ObjectContainerBSDArchive::Archive::Archive(const lldb_private::ArchSpec &arch,
     : m_arch(arch), m_modification_time(time), m_file_offset(file_offset),
       m_objects(), m_data(data), m_archive_type(archive_type) {}
 
+Log *l = GetLog(LLDBLog::Object);
 ObjectContainerBSDArchive::Archive::~Archive() = default;
 
 size_t ObjectContainerBSDArchive::Archive::ParseObjects() {
   DataExtractor &data = m_data;
-  std::string str;
-  lldb::offset_t offset = 0;
-  str.assign((const char *)data.GetData(&offset, SARMAG), SARMAG);
-  if (str == ARMAG) {
-    Object obj;
-    do {
-      offset = obj.Extract(data, offset);
-      if (offset == LLDB_INVALID_OFFSET)
-        break;
-      size_t obj_idx = m_objects.size();
-      m_objects.push_back(obj);
-      // Insert all of the C strings out of order for now...
-      m_object_name_to_index_map.Append(obj.ar_name, obj_idx);
-      offset += obj.file_size;
-      obj.Clear();
-    } while (data.ValidOffset(offset));
-
-    // Now sort all of the object name pointers
-    m_object_name_to_index_map.Sort();
-  } else if (str == ThinArchiveMagic) {
-    Object obj;
-    size_t obj_idx;
-
-    // Retrieve symbol table
-    offset = obj.ExtractFromThin(data, offset, "");
-    if (offset == LLDB_INVALID_OFFSET)
-      return m_objects.size();
-    obj_idx = m_objects.size();
-    m_objects.push_back(obj);
-    // Insert all of the C strings out of order for now...
-    m_object_name_to_index_map.Append(obj.ar_name, obj_idx);
-    offset += obj.file_size;
-    obj.Clear();
 
-    // Retrieve string table
-    offset = obj.ExtractFromThin(data, offset, "");
-    if (offset == LLDB_INVALID_OFFSET)
-      return m_objects.size();
-    obj_idx = m_objects.size();
-    m_objects.push_back(obj);
-    // Insert all of the C strings out of order for now...
-    m_object_name_to_index_map.Append(obj.ar_name, obj_idx);
-    // Extract string table
-    llvm::StringRef strtab((const char *)data.GetData(&offset, obj.size),
-                           obj.size);
+  std::unique_ptr<llvm::MemoryBuffer> mem_buffer =
+      llvm::MemoryBuffer::getMemBuffer(
+            llvm::StringRef((const char *)data.GetDataStart(),
+                            data.GetByteSize()),
+            llvm::StringRef(),
+            /*RequiresNullTerminator=*/false);
+
+  auto exp_ar = llvm::object::Archive::create(mem_buffer->getMemBufferRef());
+  if (!exp_ar) {
+    LLDB_LOG_ERROR(l, exp_ar.takeError(), "failed to create archive: {0}");
+    return 0;
+  }
+  auto llvm_archive = std::move(exp_ar.get());
+
+  llvm::Error iter_err = llvm::Error::success();
+  Object obj;
+  for (const auto &child: llvm_archive->children(iter_err)) {
     obj.Clear();
+    auto exp_name = child.getName();
+    if (exp_name) {
+      obj.ar_name = ConstString(exp_name.get());
+    } else {
+      LLDB_LOG_ERROR(l, exp_name.takeError(),
+                     "failed to get archive object name: {0}");
+      continue;
+    }
+
+    auto exp_mtime = child.getLastModified();
+    if (exp_mtime) {
+      obj.modification_time =
+          std::chrono::duration_cast<std::chrono::seconds>(
+              std::chrono::time_point_cast<std::chrono::seconds>(
+                    exp_mtime.get()).time_since_epoch()).count();
+    } else {
+      LLDB_LOG_ERROR(l, exp_mtime.takeError(),
+                     "failed to get archive object time: {0}");
+      continue;
+    }
 
-    // Retrieve object files
-    do {
-      offset = obj.ExtractFromThin(data, offset, strtab);
-      if (offset == LLDB_INVALID_OFFSET)
-        break;
-      obj_idx = m_objects.size();
-      m_objects.push_back(obj);
-      // Insert all of the C strings out of order for now...
-      m_object_name_to_index_map.Append(obj.ar_name, obj_idx);
-      obj.Clear();
-    } while (data.ValidOffset(offset));
-
-    // Now sort all of the object name pointers
-    m_object_name_to_index_map.Sort();
+    auto exp_size = child.getRawSize();
+    if (exp_size) {
+      obj.size = exp_size.get();
+    } else {
+      LLDB_LOG_ERROR(l, exp_size.takeError(),
+                     "failed to get archive object size: {0}");
+      continue;
+    }
+
+    obj.file_offset = child.getDataOffset();
+
+    auto exp_file_size = child.getSize();
+    if (exp_file_size) {
+      obj.file_size = exp_file_size.get();
+    } else {
+      LLDB_LOG_ERROR(l, exp_file_size.takeError(),
+                     "failed to get archive object file size: {0}");
+      continue;
+    }
+    m_object_name_to_index_map.Append(obj.ar_name, m_objects.size());
+    m_objects.push_back(obj);
+  }
+  if (iter_err) {
+    LLDB_LOG_ERROR(l, std::move(iter_err),
+                   "failed to iterate over archive objects: {0}");
   }
+  // Now sort all of the object name pointers
+  m_object_name_to_index_map.Sort();
   return m_objects.size();
 }
 
@@ -462,20 +331,21 @@ ObjectContainer *ObjectContainerBSDArchive::CreateInstance(
 ArchiveType
 ObjectContainerBSDArchive::MagicBytesMatch(const DataExtractor &data) {
   uint32_t offset = 0;
-  const char *armag = (const char *)data.PeekData(offset, sizeof(ar_hdr));
+  const char *armag = (const char *)data.PeekData(offset,
+                                                  sizeof(ar_hdr) + SARMAG);
   if (armag == nullptr)
     return ArchiveType::Invalid;
-  if (::strncmp(armag, ARMAG, SARMAG) == 0) {
-    armag += offsetof(struct ar_hdr, ar_fmag) + SARMAG;
-    if (strncmp(armag, ARFMAG, 2) == 0)
-      return ArchiveType::Archive;
-  } else if (::strncmp(armag, ThinArchiveMagic, strlen(ThinArchiveMagic)) ==
-             0) {
-    armag += offsetof(struct ar_hdr, ar_fmag) + strlen(ThinArchiveMagic);
-    if (strncmp(armag, ARFMAG, 2) == 0) {
-      return ArchiveType::ThinArchive;
-    }
-  }
+  ArchiveType result = ArchiveType::Invalid;
+  if (strncmp(armag, ArchiveMagic, SARMAG) == 0)
+      result = ArchiveType::Archive;
+  else if (strncmp(armag, ThinArchiveMagic, SARMAG) == 0)
+      result = ArchiveType::ThinArchive;
+  else
+      return ArchiveType::Invalid;
+
+  armag += offsetof(struct ar_hdr, ar_fmag) + SARMAG;
+  if (strncmp(armag, ARFMAG, 2) == 0)
+      return result;
   return ArchiveType::Invalid;
 }
 
@@ -623,7 +493,7 @@ size_t ObjectContainerBSDArchive::GetModuleSpecifications(
                 std::chrono::seconds(object->modification_time));
             spec.GetObjectName() = object->ar_name;
             spec.SetObjectOffset(object_file_offset);
-            spec.SetObjectSize(file_size - object_file_offset);
+            spec.SetObjectSize(object->file_size);
             spec.GetObjectModificationTime() = object_mod_time;
           }
         }

diff  --git a/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.h b/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.h
index 1e2ffce3e5e2fe9..fbecd1d27063e7c 100644
--- a/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.h
+++ b/lldb/source/Plugins/ObjectContainer/BSD-Archive/ObjectContainerBSDArchive.h
@@ -93,15 +93,6 @@ class ObjectContainerBSDArchive : public lldb_private::ObjectContainer {
     /// Object modification time in the archive.
     uint32_t modification_time = 0;
 
-    /// Object user id in the archive.
-    uint16_t uid = 0;
-
-    /// Object group id in the archive.
-    uint16_t gid = 0;
-
-    /// Object octal file permissions in the archive.
-    uint16_t mode = 0;
-
     /// Object size in bytes in the archive.
     uint32_t size = 0;
 
@@ -110,6 +101,8 @@ class ObjectContainerBSDArchive : public lldb_private::ObjectContainer {
 
     /// Length of the object data.
     lldb::offset_t file_size = 0;
+
+    void Dump() const;
   };
 
   class Archive {

diff  --git a/lldb/test/API/functionalities/archives/TestBSDArchives.py b/lldb/test/API/functionalities/archives/TestBSDArchives.py
index bee5a37c728a29a..cefcb95cb9e0b6c 100644
--- a/lldb/test/API/functionalities/archives/TestBSDArchives.py
+++ b/lldb/test/API/functionalities/archives/TestBSDArchives.py
@@ -70,12 +70,6 @@ def test(self):
         )
         self.expect_var_path("__b_global", type="int", value="2")
 
-        # Test loading thin archives
-        archive_path = self.getBuildArtifact("libbar.a")
-        module_specs = lldb.SBModuleSpecList.GetModuleSpecifications(archive_path)
-        num_specs = module_specs.GetSize()
-        self.assertEqual(num_specs, 1)
-        self.assertEqual(module_specs.GetSpecAtIndex(0).GetObjectName(), "c.o")
 
     def check_frame_variable_errors(self, thread, error_strings):
         command_result = lldb.SBCommandReturnObject()
@@ -129,6 +123,53 @@ def test_frame_var_errors_when_archive_missing(self):
         ]
         self.check_frame_variable_errors(thread, error_strings)
 
+    @skipIfRemote
+    def test_archive_specifications(self):
+        """
+        Create archives and make sure the information we get when retrieving
+        the modules specifications is correct.
+        """
+        self.build()
+        libbar_path = self.getBuildArtifact("libbar.a")
+        libfoo_path = self.getBuildArtifact("libfoo.a")
+        libfoothin_path = self.getBuildArtifact("libfoo-thin.a")
+        objfile_a = self.getBuildArtifact("a.o")
+        objfile_b = self.getBuildArtifact("b.o")
+        objfile_c = self.getBuildArtifact("c.o")
+        size_a = os.path.getsize(objfile_a)
+        size_b = os.path.getsize(objfile_b)
+        size_c = os.path.getsize(objfile_c)
+
+        # Test loading normal archives
+        module_specs = lldb.SBModuleSpecList.GetModuleSpecifications(libfoo_path)
+        num_specs = module_specs.GetSize()
+        self.assertEqual(num_specs, 2)
+        spec = module_specs.GetSpecAtIndex(0)
+        self.assertEqual(spec.GetObjectName(), "a.o")
+        self.assertEqual(spec.GetObjectSize(), size_a)
+        spec = module_specs.GetSpecAtIndex(1)
+        self.assertEqual(spec.GetObjectName(), "b.o")
+        self.assertEqual(spec.GetObjectSize(), size_b)
+
+        # Test loading thin archives
+        module_specs = lldb.SBModuleSpecList.GetModuleSpecifications(libbar_path)
+        num_specs = module_specs.GetSize()
+        self.assertEqual(num_specs, 1)
+        spec = module_specs.GetSpecAtIndex(0)
+        self.assertEqual(spec.GetObjectName(), "c.o")
+        self.assertEqual(spec.GetObjectSize(), size_c)
+
+        module_specs = lldb.SBModuleSpecList.GetModuleSpecifications(libfoothin_path)
+        num_specs = module_specs.GetSize()
+        self.assertEqual(num_specs, 2)
+        spec = module_specs.GetSpecAtIndex(0)
+        self.assertEqual(spec.GetObjectName(), "a.o")
+        self.assertEqual(spec.GetObjectSize(), size_a)
+        spec = module_specs.GetSpecAtIndex(1)
+        self.assertEqual(spec.GetObjectName(), "b.o")
+        self.assertEqual(spec.GetObjectSize(), size_b, libfoothin_path)
+
+
     @skipIfRemote
     @skipUnlessDarwin
     def test_frame_var_errors_when_thin_archive_malformed(self):
@@ -142,6 +183,8 @@ def test_frame_var_errors_when_thin_archive_malformed(self):
         libfoo_path = self.getBuildArtifact("libfoo.a")
         libthin_path = self.getBuildArtifact("libfoo-thin.a")
         objfile_a = self.getBuildArtifact("a.o")
+        objfile_b = self.getBuildArtifact("b.o")
+        objfile_c = self.getBuildArtifact("c.o")
         # Replace the libfoo.a file with a thin archive containing the same
         # debug information (a.o, b.o). Then remove a.o from the file system
         # so we force an error when we set a breakpoint on a() function.

diff  --git a/lldb/test/API/functionalities/archives/b.c b/lldb/test/API/functionalities/archives/b.c
index 0352c85d0117543..33f09937f07eb23 100644
--- a/lldb/test/API/functionalities/archives/b.c
+++ b/lldb/test/API/functionalities/archives/b.c
@@ -1,4 +1,5 @@
 static int __b_global = 2;
+char __extra[4096]; // Make sure sizeof b.o 
diff ers from a.o and c.o
 
 int b(int arg) {
     int result = arg + __b_global;

diff  --git a/lldb/test/API/functionalities/archives/c.c b/lldb/test/API/functionalities/archives/c.c
index 08581fe4f37a86f..698e54475d44c6f 100644
--- a/lldb/test/API/functionalities/archives/c.c
+++ b/lldb/test/API/functionalities/archives/c.c
@@ -1,5 +1,6 @@
 static int __c_global = 3;
-
+char __extra[4096]; // Make sure sizeof b.o 
diff ers from a.o and c.o
+char __extra2[4096]; // Make sure sizeof b.o 
diff ers from a.o and c.o
 int c(int arg) {
   int result = arg + __c_global;
   return result;


        


More information about the lldb-commits mailing list