[lldb] [llvm] [Support] Remove address-extraction methods from DataExtractor (NFC) (PR #190519)

Sergei Barannikov via llvm-commits llvm-commits at lists.llvm.org
Sun Apr 5 04:56:02 PDT 2026


https://github.com/s-barannikov updated https://github.com/llvm/llvm-project/pull/190519

>From 9c04458272503120f8a55127f86aad2cee882a34 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Sun, 5 Apr 2026 14:21:38 +0300
Subject: [PATCH 1/3] [Support] Remove address-extraction methods from
 DataExtractor (NFC)

Most clients don't have a notion of "address" and pass arbitrary values
(including `0` and `sizeof(void *)`) to `DataExtractor` constructors.
This makes address-extraction methods dangerous to use.

Those clients that do have a notion of address can use other methods
like `getUnsigned()` to extract an address, or they can derive from
`DataExtractor` and add convenience methods if extracting an address
is routine. `DWARFDataExtractor` is an example, where the removed
methods were actually moved.

This does not remove `AddressSize` argument of `DataExtractor`
constructors yet, but makes it unused and overloads constructors
in preparation for their deprecation. I'll be removing uses of the
to-be-deprecated constructors in follow-up patches.
---
 bolt/lib/Core/AddressMap.cpp                  | 10 +--
 bolt/lib/Rewrite/GNUPropertyRewriter.cpp      |  5 +-
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   |  6 +-
 .../DataFormatters/FormatterSection.cpp       |  6 +-
 .../DWARF/LowLevel/DWARFDataExtractorSimple.h | 50 ++++++++++++++-
 llvm/include/llvm/Support/DataExtractor.h     | 63 +++++--------------
 llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp      |  6 +-
 .../DebugInfo/GSYM/MergedFunctionsInfo.cpp    |  2 +-
 .../Symbolize/SymbolizableObjectFile.cpp      |  8 +--
 llvm/lib/Object/ELF.cpp                       |  6 +-
 llvm/tools/llvm-readobj/ELFDumper.cpp         | 46 +++++++-------
 llvm/tools/obj2yaml/dwarf2yaml.cpp            |  5 +-
 llvm/tools/obj2yaml/elf2yaml.cpp              | 16 ++---
 llvm/unittests/Support/DataExtractorTest.cpp  |  8 +--
 14 files changed, 126 insertions(+), 111 deletions(-)

diff --git a/bolt/lib/Core/AddressMap.cpp b/bolt/lib/Core/AddressMap.cpp
index efa376d408db8..f061fea494394 100644
--- a/bolt/lib/Core/AddressMap.cpp
+++ b/bolt/lib/Core/AddressMap.cpp
@@ -70,20 +70,20 @@ std::optional<AddressMap> AddressMap::parse(BinaryContext &BC) {
 
   AddressMap Parsed;
 
-  const size_t EntrySize = 2 * BC.AsmInfo->getCodePointerSize();
+  unsigned CodePointerSize = BC.AsmInfo->getCodePointerSize();
+  const size_t EntrySize = 2 * CodePointerSize;
   auto parseSection =
       [&](BinarySection &Section,
           function_ref<void(uint64_t, uint64_t)> InsertCallback) {
         StringRef Buffer = Section.getOutputContents();
         assert(Buffer.size() % EntrySize == 0 && "Unexpected address map size");
 
-        DataExtractor DE(Buffer, BC.AsmInfo->isLittleEndian(),
-                         BC.AsmInfo->getCodePointerSize());
+        DataExtractor DE(Buffer, BC.AsmInfo->isLittleEndian());
         DataExtractor::Cursor Cursor(0);
 
         while (Cursor && !DE.eof(Cursor)) {
-          const uint64_t Input = DE.getAddress(Cursor);
-          const uint64_t Output = DE.getAddress(Cursor);
+          const uint64_t Input = DE.getUnsigned(Cursor, CodePointerSize);
+          const uint64_t Output = DE.getUnsigned(Cursor, CodePointerSize);
           InsertCallback(Input, Output);
         }
 
diff --git a/bolt/lib/Rewrite/GNUPropertyRewriter.cpp b/bolt/lib/Rewrite/GNUPropertyRewriter.cpp
index 3db5c6ab95d14..f2c1531e7fc7c 100644
--- a/bolt/lib/Rewrite/GNUPropertyRewriter.cpp
+++ b/bolt/lib/Rewrite/GNUPropertyRewriter.cpp
@@ -93,10 +93,9 @@ Error GNUPropertyRewriter::sectionInitializer() {
 /// As there is no guarantee that the features are encoded in which element of
 /// the array, we have to read all, and OR together the result.
 Expected<uint32_t> GNUPropertyRewriter::decodeGNUPropertyNote(StringRef Desc) {
-  DataExtractor DE(Desc, BC.AsmInfo->isLittleEndian(),
-                   BC.AsmInfo->getCodePointerSize());
+  DataExtractor DE(Desc, BC.AsmInfo->isLittleEndian());
   DataExtractor::Cursor Cursor(0);
-  const uint32_t Align = DE.getAddressSize();
+  const uint32_t Align = BC.AsmInfo->getCodePointerSize();
 
   std::optional<uint32_t> Features = 0;
   while (Cursor && !DE.eof(Cursor)) {
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index b4c08cf1f6153..f1c565c972942 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -2788,10 +2788,10 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
         BF.getAddress() - BF.getOriginSection()->getAddress(), BF.getMaxSize());
 
     const BinaryContext &BC = BF.getBinaryContext();
-    DataExtractor DE(FunctionContents, BC.AsmInfo->isLittleEndian(),
-                     BC.AsmInfo->getCodePointerSize());
+    unsigned CodePointerSize = BC.AsmInfo->getCodePointerSize();
+    DataExtractor DE(FunctionContents, BC.AsmInfo->isLittleEndian());
     uint64_t Offset = 8;
-    TargetAddress = DE.getAddress(&Offset);
+    TargetAddress = DE.getUnsigned(&Offset, CodePointerSize);
 
     return true;
   }
diff --git a/lldb/source/DataFormatters/FormatterSection.cpp b/lldb/source/DataFormatters/FormatterSection.cpp
index 980e04125cb69..7ddfdc6ec41d0 100644
--- a/lldb/source/DataFormatters/FormatterSection.cpp
+++ b/lldb/source/DataFormatters/FormatterSection.cpp
@@ -66,7 +66,6 @@ static void ForEachFormatterInModule(
   auto section_size = section_sp->GetSectionData(lldb_extractor);
   llvm::DataExtractor section = lldb_extractor.GetAsLLVM();
   bool le = section.isLittleEndian();
-  uint8_t addr_size = section.getAddressSize();
   llvm::DataExtractor::Cursor cursor(0);
   while (cursor && cursor.tell() < section_size) {
     if (!skipPadding(section, cursor))
@@ -77,14 +76,13 @@ static void ForEachFormatterInModule(
     if (version == 1) {
       llvm::DataExtractor record(
           section.getData().drop_front(cursor.tell()).take_front(record_size),
-          le, addr_size);
+          le);
       llvm::DataExtractor::Cursor cursor(0);
       uint64_t type_size = record.getULEB128(cursor);
       llvm::StringRef type_name = record.getBytes(cursor, type_size);
       llvm::Error error = cursor.takeError();
       if (!error)
-        fn(llvm::DataExtractor(record.getData().drop_front(cursor.tell()), le,
-                               addr_size),
+        fn(llvm::DataExtractor(record.getData().drop_front(cursor.tell()), le),
            type_name);
       else
         LLDB_LOG_ERROR(GetLog(LLDBLog::DataFormatters), std::move(error),
diff --git a/llvm/include/llvm/DebugInfo/DWARF/LowLevel/DWARFDataExtractorSimple.h b/llvm/include/llvm/DebugInfo/DWARF/LowLevel/DWARFDataExtractorSimple.h
index ffe0b50b036ac..d21c983e8123e 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/LowLevel/DWARFDataExtractorSimple.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/LowLevel/DWARFDataExtractorSimple.h
@@ -22,22 +22,68 @@ namespace llvm {
 
 template <typename Relocator>
 class DWARFDataExtractorBase : public DataExtractor {
+  unsigned AddressSize;
 
 public:
   DWARFDataExtractorBase(StringRef Data, bool IsLittleEndian,
                          uint8_t AddressSize)
-      : DataExtractor(Data, IsLittleEndian, AddressSize) {}
+      : DataExtractor(Data, IsLittleEndian), AddressSize(AddressSize) {}
+
   DWARFDataExtractorBase(ArrayRef<uint8_t> Data, bool IsLittleEndian,
                          uint8_t AddressSize)
       : DataExtractor(
             StringRef(reinterpret_cast<const char *>(Data.data()), Data.size()),
-            IsLittleEndian, AddressSize) {}
+            IsLittleEndian),
+        AddressSize(AddressSize) {}
 
   /// Truncating constructor
   DWARFDataExtractorBase(const DWARFDataExtractorBase &Other, size_t Length)
       : DataExtractor(Other.getData().substr(0, Length), Other.isLittleEndian(),
                       Other.getAddressSize()) {}
 
+  /// Get the address size for this extractor.
+  uint8_t getAddressSize() const { return AddressSize; }
+
+  /// Set the address size for this extractor.
+  void setAddressSize(uint8_t Size) { AddressSize = Size; }
+
+  //------------------------------------------------------------------
+  /// Extract an pointer from \a *offset_ptr.
+  ///
+  /// Extract a single pointer from the data and update the offset
+  /// pointed to by \a offset_ptr. The size of the extracted pointer
+  /// is \a getAddressSize(), so the address size has to be
+  /// set correctly prior to extracting any pointer values.
+  ///
+  /// @param[in,out] offset_ptr
+  ///     A pointer to an offset within the data that will be advanced
+  ///     by the appropriate number of bytes if the value is extracted
+  ///     correctly. If the offset is out of bounds or there are not
+  ///     enough bytes to extract this value, the offset will be left
+  ///     unmodified.
+  ///
+  /// @return
+  ///     The extracted pointer value as a 64 integer.
+  uint64_t getAddress(uint64_t *offset_ptr) const {
+    return getUnsigned(offset_ptr, AddressSize);
+  }
+
+  /// Extract a pointer-sized unsigned integer from the location given by the
+  /// cursor. In case of an extraction error, or if the cursor is already in
+  /// an error state, zero is returned.
+  uint64_t getAddress(Cursor &C) const { return getUnsigned(C, AddressSize); }
+
+  /// Test the availability of enough bytes of data for a pointer from
+  /// \a offset. The size of a pointer is \a getAddressSize().
+  ///
+  /// @return
+  ///     \b true if \a offset is a valid offset and there are enough
+  ///     bytes for a pointer available at that offset, \b false
+  ///     otherwise.
+  bool isValidOffsetForAddress(uint64_t offset) const {
+    return isValidOffsetForDataOfSize(offset, AddressSize);
+  }
+
   /// Extracts a value and returns it as adjusted by the Relocator
   uint64_t getRelocatedValue(uint32_t Size, uint64_t *Off,
                              uint64_t *SectionIndex = nullptr,
diff --git a/llvm/include/llvm/Support/DataExtractor.h b/llvm/include/llvm/Support/DataExtractor.h
index f1710b918ce78..807d9b3f2950e 100644
--- a/llvm/include/llvm/Support/DataExtractor.h
+++ b/llvm/include/llvm/Support/DataExtractor.h
@@ -38,7 +38,7 @@ inline uint24_t getSwappedBytes(uint24_t C) {
 class DataExtractor {
   StringRef Data;
   uint8_t IsLittleEndian;
-  uint8_t AddressSize;
+
 public:
   /// A class representing a position in a DataExtractor, as well as any error
   /// encountered during extraction. It enables one to extract a sequence of
@@ -80,22 +80,28 @@ class DataExtractor {
   /// This constructor allows us to use data that is owned by the
   /// caller. The data must stay around as long as this object is
   /// valid.
-  DataExtractor(StringRef Data, bool IsLittleEndian, uint8_t AddressSize)
-    : Data(Data), IsLittleEndian(IsLittleEndian), AddressSize(AddressSize) {}
-  DataExtractor(ArrayRef<uint8_t> Data, bool IsLittleEndian,
-                uint8_t AddressSize)
+  DataExtractor(StringRef Data, bool IsLittleEndian)
+      : Data(Data), IsLittleEndian(IsLittleEndian) {}
+
+  DataExtractor(ArrayRef<uint8_t> Data, bool IsLittleEndian)
       : Data(StringRef(reinterpret_cast<const char *>(Data.data()),
                        Data.size())),
-        IsLittleEndian(IsLittleEndian), AddressSize(AddressSize) {}
+        IsLittleEndian(IsLittleEndian) {}
+
+  // TODO: Deprecate.
+  DataExtractor(StringRef Data, bool IsLittleEndian, uint8_t)
+      : Data(Data), IsLittleEndian(IsLittleEndian) {}
+
+  // TODO: Deprecate.
+  DataExtractor(ArrayRef<uint8_t> Data, bool IsLittleEndian, uint8_t)
+      : Data(StringRef(reinterpret_cast<const char *>(Data.data()),
+                       Data.size())),
+        IsLittleEndian(IsLittleEndian) {}
 
   /// Get the data pointed to by this extractor.
   StringRef getData() const { return Data; }
   /// Get the endianness for this extractor.
   bool isLittleEndian() const { return IsLittleEndian; }
-  /// Get the address size for this extractor.
-  uint8_t getAddressSize() const { return AddressSize; }
-  /// Set the address size for this extractor.
-  void setAddressSize(uint8_t Size) { AddressSize = Size; }
 
   /// Extract a C string from \a *offset_ptr.
   ///
@@ -304,32 +310,6 @@ class DataExtractor {
   ///     or zero on failure.
   LLVM_ABI int64_t getSigned(uint64_t *offset_ptr, uint32_t size) const;
 
-  //------------------------------------------------------------------
-  /// Extract an pointer from \a *offset_ptr.
-  ///
-  /// Extract a single pointer from the data and update the offset
-  /// pointed to by \a offset_ptr. The size of the extracted pointer
-  /// is \a getAddressSize(), so the address size has to be
-  /// set correctly prior to extracting any pointer values.
-  ///
-  /// @param[in,out] offset_ptr
-  ///     A pointer to an offset within the data that will be advanced
-  ///     by the appropriate number of bytes if the value is extracted
-  ///     correctly. If the offset is out of bounds or there are not
-  ///     enough bytes to extract this value, the offset will be left
-  ///     unmodified.
-  ///
-  /// @return
-  ///     The extracted pointer value as a 64 integer.
-  uint64_t getAddress(uint64_t *offset_ptr) const {
-    return getUnsigned(offset_ptr, AddressSize);
-  }
-
-  /// Extract a pointer-sized unsigned integer from the location given by the
-  /// cursor. In case of an extraction error, or if the cursor is already in
-  /// an error state, zero is returned.
-  uint64_t getAddress(Cursor &C) const { return getUnsigned(C, AddressSize); }
-
   /// Extract a uint8_t value from \a *offset_ptr.
   ///
   /// Extract a single uint8_t from the binary data at the offset
@@ -725,17 +705,6 @@ class DataExtractor {
     return offset + length >= offset && isValidOffset(offset + length - 1);
   }
 
-  /// Test the availability of enough bytes of data for a pointer from
-  /// \a offset. The size of a pointer is \a getAddressSize().
-  ///
-  /// @return
-  ///     \b true if \a offset is a valid offset and there are enough
-  ///     bytes for a pointer available at that offset, \b false
-  ///     otherwise.
-  bool isValidOffsetForAddress(uint64_t offset) const {
-    return isValidOffsetForDataOfSize(offset, AddressSize);
-  }
-
   /// Return the number of bytes in the underlying buffer.
   size_t size() const { return Data.size(); }
 
diff --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
index b6dcaeb323f59..2eb716ef383d2 100644
--- a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
@@ -69,8 +69,7 @@ llvm::Expected<FunctionInfo> FunctionInfo::decode(DataExtractor &Data,
           "0x%8.8" PRIx64 ": missing FunctionInfo data for InfoType %u",
           Offset, IT);
     DataExtractor InfoData(Data.getData().substr(Offset, InfoLength),
-                           Data.isLittleEndian(),
-                           Data.getAddressSize());
+                           Data.isLittleEndian());
     switch (IT) {
       case InfoType::EndOfList:
         Done = true;
@@ -275,8 +274,7 @@ FunctionInfo::lookup(DataExtractor &Data, const GsymReader &GR,
     if (InfoLength != InfoBytes.size())
       return createStringError(std::errc::io_error,
                                "FunctionInfo data is truncated");
-    DataExtractor InfoData(InfoBytes, Data.isLittleEndian(),
-                           Data.getAddressSize());
+    DataExtractor InfoData(InfoBytes, Data.isLittleEndian());
     switch (IT) {
       case InfoType::EndOfList:
         Done = true;
diff --git a/llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp b/llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp
index d2c28f38799d3..94c8780d57985 100644
--- a/llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp
@@ -83,7 +83,7 @@ MergedFunctionsInfo::getFuncsDataExtractors(DataExtractor &Data) {
 
     // Extract the function data.
     Results.emplace_back(Data.getData().substr(Offset, FnSize),
-                         Data.isLittleEndian(), Data.getAddressSize());
+                         Data.isLittleEndian());
 
     Offset += FnSize;
   }
diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
index 29fd4d9fda7ad..c916371741abb 100644
--- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
@@ -47,8 +47,7 @@ SymbolizableObjectFile::create(const object::ObjectFile *Obj,
         Expected<StringRef> E = Section->getContents();
         if (!E)
           return E.takeError();
-        OpdExtractor.reset(new DataExtractor(*E, Obj->isLittleEndian(),
-                                             Obj->getBytesInAddress()));
+        OpdExtractor.reset(new DataExtractor(*E, Obj->isLittleEndian()));
         OpdAddress = Section->getAddress();
         break;
       }
@@ -205,8 +204,9 @@ Error SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol,
     // For the purposes of symbolization, pretend the symbol's address is that
     // of the function's code, not the descriptor.
     uint64_t OpdOffset = SymbolAddress - OpdAddress;
-    if (OpdExtractor->isValidOffsetForAddress(OpdOffset))
-      SymbolAddress = OpdExtractor->getAddress(&OpdOffset);
+    unsigned AddressSize = Obj.getBytesInAddress();
+    if (OpdExtractor->isValidOffsetForDataOfSize(OpdOffset, AddressSize))
+      SymbolAddress = OpdExtractor->getUnsigned(&OpdOffset, AddressSize);
   }
   // Mach-O symbol table names have leading underscore, skip it.
   if (Module->isMachO())
diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp
index 660331d5da96d..0780ca875f621 100644
--- a/llvm/lib/Object/ELF.cpp
+++ b/llvm/lib/Object/ELF.cpp
@@ -819,8 +819,8 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
     Content = DecompressedContentRef;
   }
 
-  DataExtractor Data(Content, EF.isLE(),
-                     sizeof(typename ELFFile<ELFT>::uintX_t));
+  constexpr unsigned AddressSize = sizeof(typename ELFFile<ELFT>::uintX_t);
+  DataExtractor Data(Content, EF.isLE());
   std::vector<BBAddrMap> FunctionEntries;
 
   DataExtractor::Cursor Cur(0);
@@ -830,7 +830,7 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
   // Helper lambda to extract the (possibly relocatable) address stored at Cur.
   auto ExtractAddress = [&]() -> Expected<uint64_t> {
     uint64_t RelocationOffsetInSection = Cur.tell();
-    uint64_t Address = Data.getAddress(Cur);
+    uint64_t Address = Data.getUnsigned(Cur, AddressSize);
     if (!Cur)
       return Cur.takeError();
     if (!IsRelocatable)
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index bcb580119fb85..f817ef5b29d74 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -6068,7 +6068,8 @@ struct CoreNote {
   std::vector<CoreFileMapping> Mappings;
 };
 
-static Expected<CoreNote> readCoreNote(DataExtractor Desc) {
+static Expected<CoreNote> readCoreNote(DataExtractor Desc,
+                                       unsigned AddressSize) {
   // Expected format of the NT_FILE note description:
   // 1. # of file mappings (call it N)
   // 2. Page size
@@ -6077,9 +6078,9 @@ static Expected<CoreNote> readCoreNote(DataExtractor Desc) {
   // Each field is an Elf_Addr, except for filenames which are char* strings.
 
   CoreNote Ret;
-  const int Bytes = Desc.getAddressSize();
+  const int Bytes = AddressSize;
 
-  if (!Desc.isValidOffsetForAddress(2))
+  if (!Desc.isValidOffsetForDataOfSize(2, AddressSize))
     return createError("the note of size 0x" + Twine::utohexstr(Desc.size()) +
                        " is too short, expected at least 0x" +
                        Twine::utohexstr(Bytes * 2));
@@ -6087,10 +6088,10 @@ static Expected<CoreNote> readCoreNote(DataExtractor Desc) {
     return createError("the note is not NUL terminated");
 
   uint64_t DescOffset = 0;
-  uint64_t FileCount = Desc.getAddress(&DescOffset);
-  Ret.PageSize = Desc.getAddress(&DescOffset);
+  uint64_t FileCount = Desc.getUnsigned(&DescOffset, AddressSize);
+  Ret.PageSize = Desc.getUnsigned(&DescOffset, AddressSize);
 
-  if (!Desc.isValidOffsetForAddress(3 * FileCount * Bytes))
+  if (!Desc.isValidOffsetForDataOfSize(3 * FileCount * Bytes, AddressSize))
     return createError("unable to read file mappings (found " +
                        Twine(FileCount) + "): the note of size 0x" +
                        Twine::utohexstr(Desc.size()) + " is too short");
@@ -6098,7 +6099,7 @@ static Expected<CoreNote> readCoreNote(DataExtractor Desc) {
   uint64_t FilenamesOffset = 0;
   DataExtractor Filenames(
       Desc.getData().drop_front(DescOffset + 3 * FileCount * Bytes),
-      Desc.isLittleEndian(), Desc.getAddressSize());
+      Desc.isLittleEndian());
 
   Ret.Mappings.resize(FileCount);
   size_t I = 0;
@@ -6109,9 +6110,9 @@ static Expected<CoreNote> readCoreNote(DataExtractor Desc) {
           "unable to read the file name for the mapping with index " +
           Twine(I) + ": the note of size 0x" + Twine::utohexstr(Desc.size()) +
           " is truncated");
-    Mapping.Start = Desc.getAddress(&DescOffset);
-    Mapping.End = Desc.getAddress(&DescOffset);
-    Mapping.Offset = Desc.getAddress(&DescOffset);
+    Mapping.Start = Desc.getUnsigned(&DescOffset, AddressSize);
+    Mapping.End = Desc.getUnsigned(&DescOffset, AddressSize);
+    Mapping.Offset = Desc.getUnsigned(&DescOffset, AddressSize);
     Mapping.Filename = Filenames.getCStrRef(&FilenamesOffset);
   }
 
@@ -6483,10 +6484,10 @@ template <class ELFT> void GNUELFDumper<ELFT>::printNotes() {
         return Error::success();
     } else if (Name == "CORE") {
       if (Type == ELF::NT_FILE) {
-        DataExtractor DescExtractor(
-            Descriptor, ELFT::Endianness == llvm::endianness::little,
-            sizeof(Elf_Addr));
-        if (Expected<CoreNote> NoteOrErr = readCoreNote(DescExtractor)) {
+        DataExtractor DescExtractor(Descriptor, ELFT::Endianness ==
+                                                    llvm::endianness::little);
+        if (Expected<CoreNote> NoteOrErr =
+                readCoreNote(DescExtractor, sizeof(Elf_Addr))) {
           printCoreNote<ELFT>(OS, *NoteOrErr);
           return Error::success();
         } else {
@@ -7169,7 +7170,8 @@ void ELFDumper<ELFT>::printStackSize(const Relocation<ELFT> &R,
   }
 
   uint64_t SymValue = Resolver(R.Type, Offset, RelocSymValue,
-                               Data.getAddress(&Offset), R.Addend.value_or(0));
+                               Data.getUnsigned(&Offset, sizeof(Elf_Addr)),
+                               R.Addend.value_or(0));
   this->printFunctionStackSize(SymValue, FunctionSec, StackSizeSec, Data,
                                &Offset);
 }
@@ -7185,7 +7187,7 @@ void ELFDumper<ELFT>::printNonRelocatableStackSizes(
     PrintHeader();
     ArrayRef<uint8_t> Contents =
         unwrapOrError(this->FileName, Obj.getSectionContents(Sec));
-    DataExtractor Data(Contents, Obj.isLE(), sizeof(Elf_Addr));
+    DataExtractor Data(Contents, Obj.isLE());
     uint64_t Offset = 0;
     while (Offset < Contents.size()) {
       // The function address is followed by a ULEB representing the stack
@@ -7196,7 +7198,7 @@ void ELFDumper<ELFT>::printNonRelocatableStackSizes(
             " ended while trying to extract a stack size entry");
         break;
       }
-      uint64_t SymValue = Data.getAddress(&Offset);
+      uint64_t SymValue = Data.getUnsigned(&Offset, sizeof(Elf_Addr));
       if (!printFunctionStackSize(SymValue, /*FunctionSec=*/std::nullopt, Sec,
                                   Data, &Offset))
         break;
@@ -7262,7 +7264,7 @@ void ELFDumper<ELFT>::printRelocatableStackSizes(
     std::tie(IsSupportedFn, Resolver) = getRelocationResolver(this->ObjF);
     ArrayRef<uint8_t> Contents =
         unwrapOrError(this->FileName, Obj.getSectionContents(*StackSizesELFSec));
-    DataExtractor Data(Contents, Obj.isLE(), sizeof(Elf_Addr));
+    DataExtractor Data(Contents, Obj.isLE());
 
     forEachRelocationDo(
         *RelocSec, [&](const Relocation<ELFT> &R, unsigned Ndx,
@@ -8660,10 +8662,10 @@ template <class ELFT> void LLVMELFDumper<ELFT>::printNotes() {
         return Error::success();
     } else if (Name == "CORE") {
       if (Type == ELF::NT_FILE) {
-        DataExtractor DescExtractor(
-            Descriptor, ELFT::Endianness == llvm::endianness::little,
-            sizeof(Elf_Addr));
-        if (Expected<CoreNote> N = readCoreNote(DescExtractor)) {
+        DataExtractor DescExtractor(Descriptor, ELFT::Endianness ==
+                                                    llvm::endianness::little);
+        if (Expected<CoreNote> N =
+                readCoreNote(DescExtractor, sizeof(Elf_Addr))) {
           printCoreNoteLLVMStyle(*N, W);
           return Error::success();
         } else {
diff --git a/llvm/tools/obj2yaml/dwarf2yaml.cpp b/llvm/tools/obj2yaml/dwarf2yaml.cpp
index 3c41fa2323921..60cc1037f9b49 100644
--- a/llvm/tools/obj2yaml/dwarf2yaml.cpp
+++ b/llvm/tools/obj2yaml/dwarf2yaml.cpp
@@ -376,8 +376,9 @@ void dumpDebugLines(DWARFContext &DCtx, DWARFYAML::Data &Y) {
     if (auto StmtOffset =
             dwarf::toSectionOffset(CUDIE.find(dwarf::DW_AT_stmt_list))) {
       DWARFYAML::LineTable DebugLines;
+      unsigned AddressSize = CU->getAddressByteSize();
       DataExtractor LineData(DCtx.getDWARFObj().getLineSection().Data,
-                             DCtx.isLittleEndian(), CU->getAddressByteSize());
+                             DCtx.isLittleEndian());
       uint64_t Offset = *StmtOffset;
       uint64_t LengthOrDWARF64Prefix = LineData.getU32(&Offset);
       if (LengthOrDWARF64Prefix == dwarf::DW_LENGTH_DWARF64) {
@@ -438,7 +439,7 @@ void dumpDebugLines(DWARFContext &DCtx, DWARFYAML::Data &Y) {
           switch (NewOp.SubOpcode) {
           case dwarf::DW_LNE_set_address:
           case dwarf::DW_LNE_set_discriminator:
-            NewOp.Data = LineData.getAddress(&Offset);
+            NewOp.Data = LineData.getUnsigned(&Offset, AddressSize);
             break;
           case dwarf::DW_LNE_define_file:
             dumpFileEntry(LineData, Offset, NewOp.FileEntry);
diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp
index 83d828dfb81c8..a69fd1b2d0695 100644
--- a/llvm/tools/obj2yaml/elf2yaml.cpp
+++ b/llvm/tools/obj2yaml/elf2yaml.cpp
@@ -852,12 +852,13 @@ ELFDumper<ELFT>::dumpStackSizesSection(const Elf_Shdr *Shdr) {
     return ContentOrErr.takeError();
 
   ArrayRef<uint8_t> Content = *ContentOrErr;
-  DataExtractor Data(Content, Obj.isLE(), ELFT::Is64Bits ? 8 : 4);
+  unsigned AddressSize = ELFT::Is64Bits ? 8 : 4;
+  DataExtractor Data(Content, Obj.isLE());
 
   std::vector<ELFYAML::StackSizeEntry> Entries;
   DataExtractor::Cursor Cur(0);
   while (Cur && Cur.tell() < Content.size()) {
-    uint64_t Address = Data.getAddress(Cur);
+    uint64_t Address = Data.getUnsigned(Cur, AddressSize);
     uint64_t Size = Data.getULEB128(Cur);
     Entries.push_back({Address, Size});
   }
@@ -888,7 +889,8 @@ ELFDumper<ELFT>::dumpBBAddrMapSection(const Elf_Shdr *Shdr) {
   if (Content.empty())
     return S.release();
 
-  DataExtractor Data(Content, Obj.isLE(), ELFT::Is64Bits ? 8 : 4);
+  unsigned AddressSize = ELFT::Is64Bits ? 8 : 4;
+  DataExtractor Data(Content, Obj.isLE());
 
   std::vector<ELFYAML::BBAddrMapEntry> Entries;
   bool HasAnyPGOAnalysisMapEntry = false;
@@ -916,14 +918,14 @@ ELFDumper<ELFT>::dumpBBAddrMapSection(const Elf_Shdr *Shdr) {
     if (FeatureOrErr->MultiBBRange) {
       NumBBRanges = Data.getULEB128(Cur);
     } else {
-      Address = Data.getAddress(Cur);
+      Address = Data.getUnsigned(Cur, AddressSize);
       NumBlocks = Data.getULEB128(Cur);
     }
     std::vector<ELFYAML::BBAddrMapEntry::BBRangeEntry> BBRanges;
     uint64_t BaseAddress = 0;
     for (uint64_t BBRangeN = 0; Cur && BBRangeN != NumBBRanges; ++BBRangeN) {
       if (FeatureOrErr->MultiBBRange) {
-        BaseAddress = Data.getAddress(Cur);
+        BaseAddress = Data.getUnsigned(Cur, AddressSize);
         NumBlocks = Data.getULEB128(Cur);
       } else {
         BaseAddress = Address;
@@ -1391,7 +1393,7 @@ ELFDumper<ELFT>::dumpGnuHashSection(const Elf_Shdr *Shdr) {
 
   unsigned AddrSize = ELFT::Is64Bits ? 8 : 4;
   ArrayRef<uint8_t> Content = *ContentOrErr;
-  DataExtractor Data(Content, Obj.isLE(), AddrSize);
+  DataExtractor Data(Content, Obj.isLE());
 
   ELFYAML::GnuHashHeader Header;
   DataExtractor::Cursor Cur(0);
@@ -1414,7 +1416,7 @@ ELFDumper<ELFT>::dumpGnuHashSection(const Elf_Shdr *Shdr) {
 
   S->BloomFilter.emplace(MaskWords);
   for (llvm::yaml::Hex64 &Val : *S->BloomFilter)
-    Val = Data.getAddress(Cur);
+    Val = Data.getUnsigned(Cur, AddrSize);
 
   S->HashBuckets.emplace(NBuckets);
   for (llvm::yaml::Hex32 &Val : *S->HashBuckets)
diff --git a/llvm/unittests/Support/DataExtractorTest.cpp b/llvm/unittests/Support/DataExtractorTest.cpp
index 029af6b1903c5..e1add2c0220bc 100644
--- a/llvm/unittests/Support/DataExtractorTest.cpp
+++ b/llvm/unittests/Support/DataExtractorTest.cpp
@@ -25,7 +25,7 @@ TEST(DataExtractorTest, OffsetOverflow) {
 }
 
 TEST(DataExtractorTest, UnsignedNumbers) {
-  DataExtractor DE(StringRef(numberData, sizeof(numberData)-1), false, 8);
+  DataExtractor DE(StringRef(numberData, sizeof(numberData) - 1), false);
   uint64_t offset = 0;
 
   EXPECT_EQ(0x80U, DE.getU8(&offset));
@@ -40,7 +40,7 @@ TEST(DataExtractorTest, UnsignedNumbers) {
   EXPECT_EQ(0x8090FFFF80000000ULL, DE.getU64(&offset));
   EXPECT_EQ(8U, offset);
   offset = 0;
-  EXPECT_EQ(0x8090FFFF80000000ULL, DE.getAddress(&offset));
+  EXPECT_EQ(0x8090FFFF80000000ULL, DE.getUnsigned(&offset, 8));
   EXPECT_EQ(8U, offset);
   offset = 0;
 
@@ -52,7 +52,7 @@ TEST(DataExtractorTest, UnsignedNumbers) {
   offset = 0;
 
   // Now for little endian.
-  DE = DataExtractor(StringRef(numberData, sizeof(numberData)-1), true, 4);
+  DE = DataExtractor(StringRef(numberData, sizeof(numberData) - 1), true);
   EXPECT_EQ(0x9080U, DE.getU16(&offset));
   EXPECT_EQ(2U, offset);
   offset = 0;
@@ -62,7 +62,7 @@ TEST(DataExtractorTest, UnsignedNumbers) {
   EXPECT_EQ(0x80FFFF9080ULL, DE.getU64(&offset));
   EXPECT_EQ(8U, offset);
   offset = 0;
-  EXPECT_EQ(0xFFFF9080U, DE.getAddress(&offset));
+  EXPECT_EQ(0xFFFF9080U, DE.getUnsigned(&offset, 4));
   EXPECT_EQ(4U, offset);
   offset = 0;
 

>From df95791a72ea200daf6a2c0c9dcc5de425c56520 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Sun, 5 Apr 2026 14:45:29 +0300
Subject: [PATCH 2/3] Use `unsigned` for `AddressSize`

`uint8_t` doesn't save space and word-sized integer type is potentially
faster.
---
 .../DebugInfo/DWARF/LowLevel/DWARFDataExtractorSimple.h   | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/DebugInfo/DWARF/LowLevel/DWARFDataExtractorSimple.h b/llvm/include/llvm/DebugInfo/DWARF/LowLevel/DWARFDataExtractorSimple.h
index d21c983e8123e..7a76ead6a93cf 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/LowLevel/DWARFDataExtractorSimple.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/LowLevel/DWARFDataExtractorSimple.h
@@ -26,11 +26,11 @@ class DWARFDataExtractorBase : public DataExtractor {
 
 public:
   DWARFDataExtractorBase(StringRef Data, bool IsLittleEndian,
-                         uint8_t AddressSize)
+                         unsigned AddressSize)
       : DataExtractor(Data, IsLittleEndian), AddressSize(AddressSize) {}
 
   DWARFDataExtractorBase(ArrayRef<uint8_t> Data, bool IsLittleEndian,
-                         uint8_t AddressSize)
+                         unsigned AddressSize)
       : DataExtractor(
             StringRef(reinterpret_cast<const char *>(Data.data()), Data.size()),
             IsLittleEndian),
@@ -42,10 +42,10 @@ class DWARFDataExtractorBase : public DataExtractor {
                       Other.getAddressSize()) {}
 
   /// Get the address size for this extractor.
-  uint8_t getAddressSize() const { return AddressSize; }
+  unsigned getAddressSize() const { return AddressSize; }
 
   /// Set the address size for this extractor.
-  void setAddressSize(uint8_t Size) { AddressSize = Size; }
+  void setAddressSize(unsigned Size) { AddressSize = Size; }
 
   //------------------------------------------------------------------
   /// Extract an pointer from \a *offset_ptr.

>From 920b908115d279cb49162c7186e7b1df3d9e70a5 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Sun, 5 Apr 2026 14:55:50 +0300
Subject: [PATCH 3/3] Use delegating constructors

---
 llvm/include/llvm/Support/DataExtractor.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/Support/DataExtractor.h b/llvm/include/llvm/Support/DataExtractor.h
index 807d9b3f2950e..dd738abd25473 100644
--- a/llvm/include/llvm/Support/DataExtractor.h
+++ b/llvm/include/llvm/Support/DataExtractor.h
@@ -90,13 +90,11 @@ class DataExtractor {
 
   // TODO: Deprecate.
   DataExtractor(StringRef Data, bool IsLittleEndian, uint8_t)
-      : Data(Data), IsLittleEndian(IsLittleEndian) {}
+      : DataExtractor(Data, IsLittleEndian) {}
 
   // TODO: Deprecate.
   DataExtractor(ArrayRef<uint8_t> Data, bool IsLittleEndian, uint8_t)
-      : Data(StringRef(reinterpret_cast<const char *>(Data.data()),
-                       Data.size())),
-        IsLittleEndian(IsLittleEndian) {}
+      : DataExtractor(Data, IsLittleEndian) {}
 
   /// Get the data pointed to by this extractor.
   StringRef getData() const { return Data; }



More information about the llvm-commits mailing list