[Lldb-commits] [lldb] [llvm] [lldb][DWARF] Replace lldb's DWARFDebugArangeSet with llvm's (PR #110058)

Zequan Wu via lldb-commits lldb-commits at lists.llvm.org
Tue Oct 1 10:25:32 PDT 2024


https://github.com/ZequanWu updated https://github.com/llvm/llvm-project/pull/110058

>From 8e1c59729905fb89a8764ace3dfa0d04119d273e Mon Sep 17 00:00:00 2001
From: Zequan Wu <zequanwu at google.com>
Date: Wed, 25 Sep 2024 15:59:29 -0700
Subject: [PATCH 1/5] [lldb][DWARF] Replace lldb's DWARFDebugArangeSet with
 llvm's

---
 .../Plugins/SymbolFile/DWARF/CMakeLists.txt   |   1 -
 .../SymbolFile/DWARF/DWARFDebugArangeSet.cpp  | 176 ------------------
 .../SymbolFile/DWARF/DWARFDebugArangeSet.h    |  70 -------
 .../SymbolFile/DWARF/DWARFDebugAranges.cpp    |  47 ++---
 .../SymbolFile/DWARF/SymbolFileDWARFTests.cpp |  56 +++---
 .../DebugInfo/DWARF/DWARFDebugArangeSet.h     |   6 +
 .../DebugInfo/DWARF/DWARFDebugArangeSet.cpp   |  12 +-
 7 files changed, 55 insertions(+), 313 deletions(-)
 delete mode 100644 lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp
 delete mode 100644 lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h

diff --git a/lldb/source/Plugins/SymbolFile/DWARF/CMakeLists.txt b/lldb/source/Plugins/SymbolFile/DWARF/CMakeLists.txt
index 0e4fd5b995d1ba..83c4f1a4cf892c 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/CMakeLists.txt
+++ b/lldb/source/Plugins/SymbolFile/DWARF/CMakeLists.txt
@@ -18,7 +18,6 @@ add_lldb_library(lldbPluginSymbolFileDWARF PLUGIN
   DWARFContext.cpp
   DWARFDataExtractor.cpp
   DWARFDebugAranges.cpp
-  DWARFDebugArangeSet.cpp
   DWARFDebugInfo.cpp
   DWARFDebugInfoEntry.cpp
   DWARFDebugMacro.cpp
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp
deleted file mode 100644
index 8461b94abca630..00000000000000
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp
+++ /dev/null
@@ -1,176 +0,0 @@
-//===-- DWARFDebugArangeSet.cpp -------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "DWARFDebugArangeSet.h"
-#include "DWARFDataExtractor.h"
-#include "LogChannelDWARF.h"
-#include "llvm/Object/Error.h"
-#include <cassert>
-
-using namespace lldb_private;
-using namespace lldb_private::plugin::dwarf;
-
-DWARFDebugArangeSet::DWARFDebugArangeSet()
-    : m_offset(DW_INVALID_OFFSET), m_next_offset(DW_INVALID_OFFSET) {}
-
-void DWARFDebugArangeSet::Clear() {
-  m_offset = DW_INVALID_OFFSET;
-  m_next_offset = DW_INVALID_OFFSET;
-  m_header.length = 0;
-  m_header.version = 0;
-  m_header.cu_offset = 0;
-  m_header.addr_size = 0;
-  m_header.seg_size = 0;
-  m_arange_descriptors.clear();
-}
-
-llvm::Error DWARFDebugArangeSet::extract(const DWARFDataExtractor &data,
-                                         lldb::offset_t *offset_ptr) {
-  assert(data.ValidOffset(*offset_ptr));
-
-  m_arange_descriptors.clear();
-  m_offset = *offset_ptr;
-
-  // 7.20 Address Range Table
-  //
-  // Each set of entries in the table of address ranges contained in the
-  // .debug_aranges section begins with a header consisting of: a 4-byte
-  // length containing the length of the set of entries for this compilation
-  // unit, not including the length field itself; a 2-byte version identifier
-  // containing the value 2 for DWARF Version 2; a 4-byte offset into
-  // the.debug_infosection; a 1-byte unsigned integer containing the size in
-  // bytes of an address (or the offset portion of an address for segmented
-  // addressing) on the target system; and a 1-byte unsigned integer
-  // containing the size in bytes of a segment descriptor on the target
-  // system. This header is followed by a series of tuples. Each tuple
-  // consists of an address and a length, each in the size appropriate for an
-  // address on the target architecture.
-  m_header.length = data.GetDWARFInitialLength(offset_ptr);
-  // The length could be 4 bytes or 12 bytes, so use the current offset to
-  // determine the next offset correctly.
-  if (m_header.length > 0)
-    m_next_offset = *offset_ptr + m_header.length;
-  else
-    m_next_offset = DW_INVALID_OFFSET;
-  m_header.version = data.GetU16(offset_ptr);
-  m_header.cu_offset = data.GetDWARFOffset(offset_ptr);
-  m_header.addr_size = data.GetU8(offset_ptr);
-  m_header.seg_size = data.GetU8(offset_ptr);
-
-  // Try to avoid reading invalid arange sets by making sure:
-  // 1 - the version looks good
-  // 2 - the address byte size looks plausible
-  // 3 - the length seems to make sense
-  // 4 - size looks plausible
-  // 5 - the arange tuples do not contain a segment field
-  if (m_header.version < 2 || m_header.version > 5)
-    return llvm::make_error<llvm::object::GenericBinaryError>(
-        "Invalid arange header version");
-
-  if (m_header.addr_size != 4 && m_header.addr_size != 8)
-    return llvm::make_error<llvm::object::GenericBinaryError>(
-        "Invalid arange header address size");
-
-  if (m_header.length == 0)
-    return llvm::make_error<llvm::object::GenericBinaryError>(
-        "Invalid arange header length");
-
-  if (!data.ValidOffset(m_offset + sizeof(m_header.length) + m_header.length -
-                        1))
-    return llvm::make_error<llvm::object::GenericBinaryError>(
-        "Invalid arange header length");
-
-  if (m_header.seg_size)
-    return llvm::make_error<llvm::object::GenericBinaryError>(
-        "segmented arange entries are not supported");
-
-  // The first tuple following the header in each set begins at an offset
-  // that is a multiple of the size of a single tuple (that is, twice the
-  // size of an address). The header is padded, if necessary, to the
-  // appropriate boundary.
-  const uint32_t header_size = *offset_ptr - m_offset;
-  const uint32_t tuple_size = m_header.addr_size << 1;
-  uint32_t first_tuple_offset = 0;
-  while (first_tuple_offset < header_size)
-    first_tuple_offset += tuple_size;
-
-  *offset_ptr = m_offset + first_tuple_offset;
-
-  Descriptor arangeDescriptor;
-
-  static_assert(sizeof(arangeDescriptor.address) ==
-                    sizeof(arangeDescriptor.length),
-                "DWARFDebugArangeSet::Descriptor.address and "
-                "DWARFDebugArangeSet::Descriptor.length must have same size");
-
-  const lldb::offset_t next_offset = GetNextOffset();
-  assert(next_offset != DW_INVALID_OFFSET);
-  uint32_t num_terminators = 0;
-  bool last_was_terminator = false;
-  while (*offset_ptr < next_offset) {
-    arangeDescriptor.address = data.GetMaxU64(offset_ptr, m_header.addr_size);
-    arangeDescriptor.length = data.GetMaxU64(offset_ptr, m_header.addr_size);
-
-    // Each set of tuples is terminated by a 0 for the address and 0 for
-    // the length. Some linkers can emit .debug_aranges with multiple
-    // terminator pair entries that are still withing the length of the
-    // DWARFDebugArangeSet. We want to be sure to parse all entries for
-    // this DWARFDebugArangeSet so that we don't stop parsing early and end up
-    // treating addresses as a header of the next DWARFDebugArangeSet. We also
-    // need to make sure we parse all valid address pairs so we don't omit them
-    // from the aranges result, so we can't stop at the first terminator entry
-    // we find.
-    if (arangeDescriptor.address == 0 && arangeDescriptor.length == 0) {
-      ++num_terminators;
-      last_was_terminator = true;
-    } else {
-      last_was_terminator = false;
-      // Only add .debug_aranges address entries that have a non zero size.
-      // Some linkers will zero out the length field for some .debug_aranges
-      // entries if they were stripped. We also could watch out for multiple
-      // entries at address zero and remove those as well.
-      if (arangeDescriptor.length > 0)
-        m_arange_descriptors.push_back(arangeDescriptor);
-    }
-  }
-  if (num_terminators > 1) {
-    Log *log = GetLog(DWARFLog::DebugInfo);
-    LLDB_LOG(log,
-             "warning: DWARFDebugArangeSet at %#" PRIx64 " contains %u "
-             "terminator entries",
-             m_offset, num_terminators);
-  }
-  if (last_was_terminator)
-    return llvm::ErrorSuccess();
-
-  return llvm::make_error<llvm::object::GenericBinaryError>(
-      "arange descriptors not terminated by null entry");
-}
-
-class DescriptorContainsAddress {
-public:
-  DescriptorContainsAddress(dw_addr_t address) : m_address(address) {}
-  bool operator()(const DWARFDebugArangeSet::Descriptor &desc) const {
-    return (m_address >= desc.address) &&
-           (m_address < (desc.address + desc.length));
-  }
-
-private:
-  const dw_addr_t m_address;
-};
-
-dw_offset_t DWARFDebugArangeSet::FindAddress(dw_addr_t address) const {
-  DescriptorConstIter end = m_arange_descriptors.end();
-  DescriptorConstIter pos =
-      std::find_if(m_arange_descriptors.begin(), end,   // Range
-                   DescriptorContainsAddress(address)); // Predicate
-  if (pos != end)
-    return m_header.cu_offset;
-
-  return DW_INVALID_OFFSET;
-}
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h
deleted file mode 100644
index ecdbe953f58b05..00000000000000
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h
+++ /dev/null
@@ -1,70 +0,0 @@
-//===-- DWARFDebugArangeSet.h -----------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGARANGESET_H
-#define LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGARANGESET_H
-
-#include "lldb/Core/dwarf.h"
-#include <cstdint>
-#include <vector>
-
-namespace lldb_private::plugin {
-namespace dwarf {
-class DWARFDebugArangeSet {
-public:
-  struct Header {
-    /// The total length of the entries for that set, not including the length
-    /// field itself.
-    uint32_t length = 0;
-    /// The DWARF version number.
-    uint16_t version = 0;
-    /// The offset from the beginning of the .debug_info section of the
-    /// compilation unit entry referenced by the table.
-    uint32_t cu_offset = 0;
-    /// The size in bytes of an address on the target architecture. For
-    /// segmented addressing, this is the size of the offset portion of the
-    /// address.
-    uint8_t addr_size = 0;
-    /// The size in bytes of a segment descriptor on the target architecture.
-    /// If the target system uses a flat address space, this value is 0.
-    uint8_t seg_size = 0;
-  };
-
-  struct Descriptor {
-    dw_addr_t address;
-    dw_addr_t length;
-    dw_addr_t end_address() const { return address + length; }
-  };
-
-  DWARFDebugArangeSet();
-  void Clear();
-  void SetOffset(uint32_t offset) { m_offset = offset; }
-  llvm::Error extract(const DWARFDataExtractor &data,
-                      lldb::offset_t *offset_ptr);
-  dw_offset_t FindAddress(dw_addr_t address) const;
-  size_t NumDescriptors() const { return m_arange_descriptors.size(); }
-  const Header &GetHeader() const { return m_header; }
-  dw_offset_t GetNextOffset() const { return m_next_offset; }
-  const Descriptor &GetDescriptorRef(uint32_t i) const {
-    return m_arange_descriptors[i];
-  }
-
-protected:
-  typedef std::vector<Descriptor> DescriptorColl;
-  typedef DescriptorColl::iterator DescriptorIter;
-  typedef DescriptorColl::const_iterator DescriptorConstIter;
-
-  dw_offset_t m_offset;
-  dw_offset_t m_next_offset;
-  Header m_header;
-  DescriptorColl m_arange_descriptors;
-};
-} // namespace dwarf
-} // namespace lldb_private::plugin
-
-#endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFDEBUGARANGESET_H
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp
index f383261e8a5fc8..03ab45ac988d1d 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "DWARFDebugAranges.h"
-#include "DWARFDebugArangeSet.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h"
 #include "DWARFUnit.h"
 #include "LogChannelDWARF.h"
 #include "lldb/Utility/Log.h"
@@ -16,56 +16,35 @@
 using namespace lldb;
 using namespace lldb_private;
 using namespace lldb_private::plugin::dwarf;
+using llvm::DWARFDebugArangeSet;
 
 // Constructor
 DWARFDebugAranges::DWARFDebugAranges() : m_aranges() {}
 
-// CountArangeDescriptors
-class CountArangeDescriptors {
-public:
-  CountArangeDescriptors(uint32_t &count_ref) : count(count_ref) {
-    //      printf("constructor CountArangeDescriptors()\n");
-  }
-  void operator()(const DWARFDebugArangeSet &set) {
-    count += set.NumDescriptors();
-  }
-  uint32_t &count;
-};
-
 // Extract
 void DWARFDebugAranges::extract(const DWARFDataExtractor &debug_aranges_data) {
+  llvm::DWARFDataExtractor llvm_dwarf_data = debug_aranges_data.GetAsLLVMDWARF();
   lldb::offset_t offset = 0;
 
   DWARFDebugArangeSet set;
   Range range;
-  while (debug_aranges_data.ValidOffset(offset)) {
+  while (llvm_dwarf_data.isValidOffset(offset)) {
     const lldb::offset_t set_offset = offset;
-    if (llvm::Error error = set.extract(debug_aranges_data, &offset)) {
+    if (llvm::Error error = set.extract(llvm_dwarf_data, &offset, nullptr)) {
       Log *log = GetLog(DWARFLog::DebugInfo);
       LLDB_LOG_ERROR(log, std::move(error),
                      "DWARFDebugAranges::extract failed to extract "
                      ".debug_aranges set at offset {1:x}: {0}",
                      set_offset);
-    } else {
-      const uint32_t num_descriptors = set.NumDescriptors();
-      if (num_descriptors > 0) {
-        const dw_offset_t cu_offset = set.GetHeader().cu_offset;
-
-        for (uint32_t i = 0; i < num_descriptors; ++i) {
-          const DWARFDebugArangeSet::Descriptor &descriptor =
-              set.GetDescriptorRef(i);
-          m_aranges.Append(RangeToDIE::Entry(descriptor.address,
-                                             descriptor.length, cu_offset));
-        }
-      }
+      set.clear();
+      return;
+    }
+    uint64_t cu_offset = set.getCompileUnitDIEOffset();
+    for (const auto &desc : set.descriptors()) {
+      if (desc.Length != 0)
+        m_aranges.Append(
+            RangeToDIE::Entry(desc.Address, desc.Length, cu_offset));
     }
-    // Always use the previous DWARFDebugArangeSet's information to calculate
-    // the offset of the next DWARFDebugArangeSet in case we entouncter an
-    // error in the current DWARFDebugArangeSet and our offset position is
-    // still in the middle of the data. If we do this, we can parse all valid
-    // DWARFDebugArangeSet objects without returning invalid errors.
-    offset = set.GetNextOffset();
-    set.Clear();
   }
 }
 
diff --git a/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp b/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp
index 3192efe8b339b1..254f3ac8aa618c 100644
--- a/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp
+++ b/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp
@@ -9,6 +9,7 @@
 #include "gtest/gtest.h"
 
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolData.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolExe.h"
 #include "llvm/Support/FileSystem.h"
@@ -16,7 +17,6 @@
 
 #include "Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.h"
 #include "Plugins/SymbolFile/DWARF/DWARFDataExtractor.h"
-#include "Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h"
 #include "Plugins/SymbolFile/DWARF/DWARFDebugAranges.h"
 #include "Plugins/SymbolFile/DWARF/SymbolFileDWARF.h"
 #include "Plugins/SymbolFile/PDB/SymbolFilePDB.h"
@@ -39,6 +39,7 @@ using namespace lldb;
 using namespace lldb_private;
 using namespace lldb_private::dwarf;
 using namespace lldb_private::plugin::dwarf;
+using llvm::DWARFDebugArangeSet;
 
 class SymbolFileDWARFTests : public testing::Test {
   SubsystemRAII<FileSystem, HostInfo, ObjectFilePECOFF, SymbolFileDWARF,
@@ -91,14 +92,15 @@ TEST_F(SymbolFileDWARFTests, ParseArangesNonzeroSegmentSize) {
       // END TUPLES
       0, 0, 0, 0, 0, 0, 0, 0, 0 // terminator
   };
-  DWARFDataExtractor data;
-  data.SetData(static_cast<const void *>(binary_data), sizeof binary_data,
-               lldb::ByteOrder::eByteOrderBig);
+  llvm::DWARFDataExtractor data(llvm::ArrayRef(binary_data),
+                                /*isLittleEndian=*/false, /*AddrSize=*/4);
+
   DWARFDebugArangeSet debug_aranges;
   offset_t off = 0;
-  llvm::Error error = debug_aranges.extract(data, &off);
+  llvm::Error error = debug_aranges.extract(data, &off, nullptr);
   EXPECT_TRUE(bool(error));
-  EXPECT_EQ("segmented arange entries are not supported",
+  EXPECT_EQ("non-zero segment selector size in address range table at offset "
+            "0x0 is not supported",
             llvm::toString(std::move(error)));
   EXPECT_EQ(off, 12U); // Parser should read no further than the segment size
 }
@@ -132,21 +134,20 @@ TEST_F(SymbolFileDWARFTests, ParseArangesWithMultipleTerminators) {
   // Set the big endian length correctly.
   const offset_t binary_data_size = sizeof(binary_data);
   binary_data[3] = (uint8_t)binary_data_size - 4;
-  DWARFDataExtractor data;
-  data.SetData(static_cast<const void *>(binary_data), sizeof binary_data,
-               lldb::ByteOrder::eByteOrderBig);
+  llvm::DWARFDataExtractor data(llvm::ArrayRef(binary_data),
+                                /*isLittleEndian=*/false, /*AddrSize=*/4);
   DWARFDebugArangeSet set;
   offset_t off = 0;
-  llvm::Error error = set.extract(data, &off);
+  llvm::Error error = set.extract(data, &off, nullptr);
   // Multiple terminators are not fatal as they do appear in binaries.
   EXPECT_FALSE(bool(error));
   // Parser should read all terminators to the end of the length specified.
   EXPECT_EQ(off, binary_data_size);
-  ASSERT_EQ(set.NumDescriptors(), 2U);
-  ASSERT_EQ(set.GetDescriptorRef(0).address, (dw_addr_t)0x1000);
-  ASSERT_EQ(set.GetDescriptorRef(0).length, (dw_addr_t)0x100);
-  ASSERT_EQ(set.GetDescriptorRef(1).address, (dw_addr_t)0x2000);
-  ASSERT_EQ(set.GetDescriptorRef(1).length, (dw_addr_t)0x10);
+  ASSERT_EQ(set.getDescriptorsSize(), 4U);
+  ASSERT_EQ(set.getDescriptiorRef(1).Address, (dw_addr_t)0x1000);
+  ASSERT_EQ(set.getDescriptiorRef(1).Length, (dw_addr_t)0x100);
+  ASSERT_EQ(set.getDescriptiorRef(3).Address, (dw_addr_t)0x2000);
+  ASSERT_EQ(set.getDescriptiorRef(3).Length, (dw_addr_t)0x10);
 }
 
 TEST_F(SymbolFileDWARFTests, ParseArangesIgnoreEmpty) {
@@ -156,7 +157,7 @@ TEST_F(SymbolFileDWARFTests, ParseArangesIgnoreEmpty) {
   unsigned char binary_data[] = {
       0, 0, 0, 0, // unit_length that will be set correctly after this
       0, 2,       // DWARF version number (uint16_t)
-      0, 0, 0, 0, // CU offset (ignored for the purposes of this test)
+      0, 0, 0, 255, // CU offset
       4,          // address size
       0,          // segment size
       0, 0, 0, 0, // alignment for the first tuple
@@ -174,22 +175,22 @@ TEST_F(SymbolFileDWARFTests, ParseArangesIgnoreEmpty) {
   DWARFDataExtractor data;
   data.SetData(static_cast<const void *>(binary_data), sizeof binary_data,
                lldb::ByteOrder::eByteOrderBig);
-  DWARFDebugArangeSet set;
-  offset_t off = 0;
-  llvm::Error error = set.extract(data, &off);
-  // Multiple terminators are not fatal as they do appear in binaries.
-  EXPECT_FALSE(bool(error));
+  DWARFDebugAranges debug_aranges;
+  debug_aranges.extract(data);
   // Parser should read all terminators to the end of the length specified.
   // Previously the DWARFDebugArangeSet would stop at the first terminator
   // entry and leave the offset in the middle of the current
   // DWARFDebugArangeSet data, and that would cause the next extracted
   // DWARFDebugArangeSet to fail.
-  EXPECT_EQ(off, binary_data_size);
-  ASSERT_EQ(set.NumDescriptors(), 2U);
-  ASSERT_EQ(set.GetDescriptorRef(0).address, (dw_addr_t)0x1000);
-  ASSERT_EQ(set.GetDescriptorRef(0).length, (dw_addr_t)0x100);
-  ASSERT_EQ(set.GetDescriptorRef(1).address, (dw_addr_t)0x2000);
-  ASSERT_EQ(set.GetDescriptorRef(1).length, (dw_addr_t)0x10);
+  ASSERT_EQ(debug_aranges.GetNumRanges(), 2U);
+  EXPECT_EQ(debug_aranges.FindAddress(0x0fff), DW_INVALID_OFFSET);
+  EXPECT_EQ(debug_aranges.FindAddress(0x1000), 255u);
+  EXPECT_EQ(debug_aranges.FindAddress(0x1100 - 1), 255u);
+  EXPECT_EQ(debug_aranges.FindAddress(0x1100), DW_INVALID_OFFSET);
+  EXPECT_EQ(debug_aranges.FindAddress(0x1fff), DW_INVALID_OFFSET);
+  EXPECT_EQ(debug_aranges.FindAddress(0x2000), 255u);
+  EXPECT_EQ(debug_aranges.FindAddress(0x2010 - 1), 255u);
+  EXPECT_EQ(debug_aranges.FindAddress(0x2010), DW_INVALID_OFFSET);
 }
 
 TEST_F(SymbolFileDWARFTests, ParseAranges) {
@@ -212,6 +213,7 @@ TEST_F(SymbolFileDWARFTests, ParseAranges) {
       0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Size    0x0100
       // Terminating tuple
       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Terminator
+      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // Terminator
   };
   // Set the little endian length correctly.
   binary_data[0] = sizeof(binary_data) - 4;
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
index 760d8826771c09..4afb65f4de59ac 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
@@ -73,6 +73,12 @@ class DWARFDebugArangeSet {
     return desc_iterator_range(ArangeDescriptors.begin(),
                                ArangeDescriptors.end());
   }
+
+  size_t getDescriptorsSize() const { return ArangeDescriptors.size(); }
+
+  const Descriptor &getDescriptiorRef(uint32_t I) const {
+    return ArangeDescriptors[I];
+  }
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp
index c60c9d9d722729..f64c71efd87e14 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp
@@ -144,11 +144,13 @@ Error DWARFDebugArangeSet::extract(DWARFDataExtractor data,
     if (arangeDescriptor.Length == 0 && arangeDescriptor.Address == 0) {
       if (*offset_ptr == end_offset)
         return ErrorSuccess();
-      WarningHandler(createStringError(
-          errc::invalid_argument,
-          "address range table at offset 0x%" PRIx64
-          " has a premature terminator entry at offset 0x%" PRIx64,
-          Offset, EntryOffset));
+      if (WarningHandler) {
+        WarningHandler(createStringError(
+            errc::invalid_argument,
+            "address range table at offset 0x%" PRIx64
+            " has a premature terminator entry at offset 0x%" PRIx64,
+            Offset, EntryOffset));
+      }
     }
 
     ArangeDescriptors.push_back(arangeDescriptor);

>From efdcb8ca3df9b3b5f46ba929c14afda8ff4d02e4 Mon Sep 17 00:00:00 2001
From: Zequan Wu <zequanwu at google.com>
Date: Wed, 25 Sep 2024 16:13:05 -0700
Subject: [PATCH 2/5] format

---
 .../Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp     |  5 +++--
 .../SymbolFile/DWARF/SymbolFileDWARFTests.cpp          | 10 +++++-----
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp
index 03ab45ac988d1d..062fcc6dc5bb29 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp
@@ -7,11 +7,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "DWARFDebugAranges.h"
-#include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h"
 #include "DWARFUnit.h"
 #include "LogChannelDWARF.h"
 #include "lldb/Utility/Log.h"
 #include "lldb/Utility/Timer.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h"
 
 using namespace lldb;
 using namespace lldb_private;
@@ -23,7 +23,8 @@ DWARFDebugAranges::DWARFDebugAranges() : m_aranges() {}
 
 // Extract
 void DWARFDebugAranges::extract(const DWARFDataExtractor &debug_aranges_data) {
-  llvm::DWARFDataExtractor llvm_dwarf_data = debug_aranges_data.GetAsLLVMDWARF();
+  llvm::DWARFDataExtractor llvm_dwarf_data =
+      debug_aranges_data.GetAsLLVMDWARF();
   lldb::offset_t offset = 0;
 
   DWARFDebugArangeSet set;
diff --git a/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp b/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp
index 254f3ac8aa618c..d77f0a50748b16 100644
--- a/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp
+++ b/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp
@@ -155,12 +155,12 @@ TEST_F(SymbolFileDWARFTests, ParseArangesIgnoreEmpty) {
   // and we ensure that these are ignored by our DWARFDebugArangeSet parser
   // and not included in the descriptors that are returned.
   unsigned char binary_data[] = {
-      0, 0, 0, 0, // unit_length that will be set correctly after this
-      0, 2,       // DWARF version number (uint16_t)
+      0, 0, 0, 0,   // unit_length that will be set correctly after this
+      0, 2,         // DWARF version number (uint16_t)
       0, 0, 0, 255, // CU offset
-      4,          // address size
-      0,          // segment size
-      0, 0, 0, 0, // alignment for the first tuple
+      4,            // address size
+      0,            // segment size
+      0, 0, 0, 0,   // alignment for the first tuple
       // BEGIN TUPLES
       0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, // [0x1000-0x1100)
       0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, // [0x1100-0x1100)

>From 45012ef5dfc093547a203172b205ab4b1276f01f Mon Sep 17 00:00:00 2001
From: Zequan Wu <zequanwu at google.com>
Date: Thu, 26 Sep 2024 11:23:40 -0700
Subject: [PATCH 3/5] Address coments

---
 lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp | 4 ++--
 lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp   | 4 ++--
 llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h    | 3 ++-
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp
index 062fcc6dc5bb29..26eeeb24401e8b 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp
@@ -31,7 +31,7 @@ void DWARFDebugAranges::extract(const DWARFDataExtractor &debug_aranges_data) {
   Range range;
   while (llvm_dwarf_data.isValidOffset(offset)) {
     const lldb::offset_t set_offset = offset;
-    if (llvm::Error error = set.extract(llvm_dwarf_data, &offset, nullptr)) {
+    if (llvm::Error error = set.extract(llvm_dwarf_data, &offset)) {
       Log *log = GetLog(DWARFLog::DebugInfo);
       LLDB_LOG_ERROR(log, std::move(error),
                      "DWARFDebugAranges::extract failed to extract "
@@ -40,7 +40,7 @@ void DWARFDebugAranges::extract(const DWARFDataExtractor &debug_aranges_data) {
       set.clear();
       return;
     }
-    uint64_t cu_offset = set.getCompileUnitDIEOffset();
+    const uint64_t cu_offset = set.getCompileUnitDIEOffset();
     for (const auto &desc : set.descriptors()) {
       if (desc.Length != 0)
         m_aranges.Append(
diff --git a/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp b/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp
index d77f0a50748b16..a9172eea8a29fb 100644
--- a/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp
+++ b/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp
@@ -97,7 +97,7 @@ TEST_F(SymbolFileDWARFTests, ParseArangesNonzeroSegmentSize) {
 
   DWARFDebugArangeSet debug_aranges;
   offset_t off = 0;
-  llvm::Error error = debug_aranges.extract(data, &off, nullptr);
+  llvm::Error error = debug_aranges.extract(data, &off);
   EXPECT_TRUE(bool(error));
   EXPECT_EQ("non-zero segment selector size in address range table at offset "
             "0x0 is not supported",
@@ -138,7 +138,7 @@ TEST_F(SymbolFileDWARFTests, ParseArangesWithMultipleTerminators) {
                                 /*isLittleEndian=*/false, /*AddrSize=*/4);
   DWARFDebugArangeSet set;
   offset_t off = 0;
-  llvm::Error error = set.extract(data, &off, nullptr);
+  llvm::Error error = set.extract(data, &off);
   // Multiple terminators are not fatal as they do appear in binaries.
   EXPECT_FALSE(bool(error));
   // Parser should read all terminators to the end of the length specified.
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
index 4afb65f4de59ac..abd5787d0b92b8 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
@@ -62,7 +62,7 @@ class DWARFDebugArangeSet {
 
   void clear();
   Error extract(DWARFDataExtractor data, uint64_t *offset_ptr,
-                function_ref<void(Error)> WarningHandler);
+                function_ref<void(Error)> WarningHandler = nullptr);
   void dump(raw_ostream &OS) const;
 
   uint64_t getCompileUnitDIEOffset() const { return HeaderData.CuOffset; }
@@ -77,6 +77,7 @@ class DWARFDebugArangeSet {
   size_t getDescriptorsSize() const { return ArangeDescriptors.size(); }
 
   const Descriptor &getDescriptiorRef(uint32_t I) const {
+    assert(I < ArangeDescriptors.size());
     return ArangeDescriptors[I];
   }
 };

>From 8d6e73e179c05264c1e1e93c83ef155b78a123e9 Mon Sep 17 00:00:00 2001
From: Zequan Wu <zequanwu at google.com>
Date: Fri, 27 Sep 2024 14:37:37 -0700
Subject: [PATCH 4/5] address comments

---
 .../SymbolFile/DWARF/SymbolFileDWARFTests.cpp | 29 ++++++++++---------
 .../DebugInfo/DWARF/DWARFDebugArangeSet.h     |  7 -----
 2 files changed, 15 insertions(+), 21 deletions(-)

diff --git a/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp b/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp
index a9172eea8a29fb..9672750c3c3da0 100644
--- a/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp
+++ b/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp
@@ -119,7 +119,7 @@ TEST_F(SymbolFileDWARFTests, ParseArangesWithMultipleTerminators) {
   unsigned char binary_data[] = {
       0, 0, 0, 0, // unit_length that will be set correctly after this
       0, 2,       // DWARF version number (uint16_t)
-      0, 0, 0, 0, // CU offset (ignored for the purposes of this test)
+      0, 0, 0, 255, // CU offset (ignored for the purposes of this test)
       4,          // address size
       0,          // segment size
       0, 0, 0, 0, // alignment for the first tuple
@@ -134,20 +134,21 @@ TEST_F(SymbolFileDWARFTests, ParseArangesWithMultipleTerminators) {
   // Set the big endian length correctly.
   const offset_t binary_data_size = sizeof(binary_data);
   binary_data[3] = (uint8_t)binary_data_size - 4;
-  llvm::DWARFDataExtractor data(llvm::ArrayRef(binary_data),
-                                /*isLittleEndian=*/false, /*AddrSize=*/4);
-  DWARFDebugArangeSet set;
-  offset_t off = 0;
-  llvm::Error error = set.extract(data, &off);
-  // Multiple terminators are not fatal as they do appear in binaries.
-  EXPECT_FALSE(bool(error));
+  DWARFDataExtractor data;
+  data.SetData(static_cast<const void *>(binary_data), sizeof binary_data,
+               lldb::ByteOrder::eByteOrderBig);
+  DWARFDebugAranges debug_aranges;
+  debug_aranges.extract(data);
   // Parser should read all terminators to the end of the length specified.
-  EXPECT_EQ(off, binary_data_size);
-  ASSERT_EQ(set.getDescriptorsSize(), 4U);
-  ASSERT_EQ(set.getDescriptiorRef(1).Address, (dw_addr_t)0x1000);
-  ASSERT_EQ(set.getDescriptiorRef(1).Length, (dw_addr_t)0x100);
-  ASSERT_EQ(set.getDescriptiorRef(3).Address, (dw_addr_t)0x2000);
-  ASSERT_EQ(set.getDescriptiorRef(3).Length, (dw_addr_t)0x10);
+  ASSERT_EQ(debug_aranges.GetNumRanges(), 2U);
+  EXPECT_EQ(debug_aranges.FindAddress(0x0fff), DW_INVALID_OFFSET);
+  EXPECT_EQ(debug_aranges.FindAddress(0x1000), 255u);
+  EXPECT_EQ(debug_aranges.FindAddress(0x1100 - 1), 255u);
+  EXPECT_EQ(debug_aranges.FindAddress(0x1100), DW_INVALID_OFFSET);
+  EXPECT_EQ(debug_aranges.FindAddress(0x1fff), DW_INVALID_OFFSET);
+  EXPECT_EQ(debug_aranges.FindAddress(0x2000), 255u);
+  EXPECT_EQ(debug_aranges.FindAddress(0x2010 - 1), 255u);
+  EXPECT_EQ(debug_aranges.FindAddress(0x2010), DW_INVALID_OFFSET);
 }
 
 TEST_F(SymbolFileDWARFTests, ParseArangesIgnoreEmpty) {
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
index abd5787d0b92b8..942de05f6bd84b 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
@@ -73,13 +73,6 @@ class DWARFDebugArangeSet {
     return desc_iterator_range(ArangeDescriptors.begin(),
                                ArangeDescriptors.end());
   }
-
-  size_t getDescriptorsSize() const { return ArangeDescriptors.size(); }
-
-  const Descriptor &getDescriptiorRef(uint32_t I) const {
-    assert(I < ArangeDescriptors.size());
-    return ArangeDescriptors[I];
-  }
 };
 
 } // end namespace llvm

>From 0d2da20e77b2d83336f9410d39f4b071f027a246 Mon Sep 17 00:00:00 2001
From: Zequan Wu <zequanwu at google.com>
Date: Tue, 1 Oct 2024 10:24:57 -0700
Subject: [PATCH 5/5] Address comments

---
 .../Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp   |  7 +++----
 .../SymbolFile/DWARF/SymbolFileDWARFTests.cpp        | 12 ++++++------
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp
index 26eeeb24401e8b..a1a70ac1c9e858 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp
@@ -23,15 +23,14 @@ DWARFDebugAranges::DWARFDebugAranges() : m_aranges() {}
 
 // Extract
 void DWARFDebugAranges::extract(const DWARFDataExtractor &debug_aranges_data) {
-  llvm::DWARFDataExtractor llvm_dwarf_data =
-      debug_aranges_data.GetAsLLVMDWARF();
+  llvm::DWARFDataExtractor dwarf_data = debug_aranges_data.GetAsLLVMDWARF();
   lldb::offset_t offset = 0;
 
   DWARFDebugArangeSet set;
   Range range;
-  while (llvm_dwarf_data.isValidOffset(offset)) {
+  while (dwarf_data.isValidOffset(offset)) {
     const lldb::offset_t set_offset = offset;
-    if (llvm::Error error = set.extract(llvm_dwarf_data, &offset)) {
+    if (llvm::Error error = set.extract(dwarf_data, &offset)) {
       Log *log = GetLog(DWARFLog::DebugInfo);
       LLDB_LOG_ERROR(log, std::move(error),
                      "DWARFDebugAranges::extract failed to extract "
diff --git a/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp b/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp
index 9672750c3c3da0..f9e869297bfb06 100644
--- a/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp
+++ b/lldb/unittests/SymbolFile/DWARF/SymbolFileDWARFTests.cpp
@@ -117,12 +117,12 @@ TEST_F(SymbolFileDWARFTests, ParseArangesWithMultipleTerminators) {
   // DWARFDebugArangeSet header using the remaining segment + address pairs
   // from the remaining bytes.
   unsigned char binary_data[] = {
-      0, 0, 0, 0, // unit_length that will be set correctly after this
-      0, 2,       // DWARF version number (uint16_t)
-      0, 0, 0, 255, // CU offset (ignored for the purposes of this test)
-      4,          // address size
-      0,          // segment size
-      0, 0, 0, 0, // alignment for the first tuple
+      0, 0, 0, 0,   // unit_length that will be set correctly after this
+      0, 2,         // DWARF version number (uint16_t)
+      0, 0, 0, 255, // CU offset
+      4,            // address size
+      0,            // segment size
+      0, 0, 0, 0,   // alignment for the first tuple
       // BEGIN TUPLES
       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // premature terminator
       0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, // [0x1000-0x1100)



More information about the lldb-commits mailing list