[lldb] [llvm] [Support] Remove address-extraction methods from DataExtractor (NFC) (PR #190519)
Sergei Barannikov via llvm-commits
llvm-commits at lists.llvm.org
Sun Apr 5 04:56:02 PDT 2026
https://github.com/s-barannikov updated https://github.com/llvm/llvm-project/pull/190519
>From 9c04458272503120f8a55127f86aad2cee882a34 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Sun, 5 Apr 2026 14:21:38 +0300
Subject: [PATCH 1/3] [Support] Remove address-extraction methods from
DataExtractor (NFC)
Most clients don't have a notion of "address" and pass arbitrary values
(including `0` and `sizeof(void *)`) to `DataExtractor` constructors.
This makes address-extraction methods dangerous to use.
Those clients that do have a notion of address can use other methods
like `getUnsigned()` to extract an address, or they can derive from
`DataExtractor` and add convenience methods if extracting an address
is routine. `DWARFDataExtractor` is an example, where the removed
methods were actually moved.
This does not remove `AddressSize` argument of `DataExtractor`
constructors yet, but makes it unused and overloads constructors
in preparation for their deprecation. I'll be removing uses of the
to-be-deprecated constructors in follow-up patches.
---
bolt/lib/Core/AddressMap.cpp | 10 +--
bolt/lib/Rewrite/GNUPropertyRewriter.cpp | 5 +-
.../Target/AArch64/AArch64MCPlusBuilder.cpp | 6 +-
.../DataFormatters/FormatterSection.cpp | 6 +-
.../DWARF/LowLevel/DWARFDataExtractorSimple.h | 50 ++++++++++++++-
llvm/include/llvm/Support/DataExtractor.h | 63 +++++--------------
llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp | 6 +-
.../DebugInfo/GSYM/MergedFunctionsInfo.cpp | 2 +-
.../Symbolize/SymbolizableObjectFile.cpp | 8 +--
llvm/lib/Object/ELF.cpp | 6 +-
llvm/tools/llvm-readobj/ELFDumper.cpp | 46 +++++++-------
llvm/tools/obj2yaml/dwarf2yaml.cpp | 5 +-
llvm/tools/obj2yaml/elf2yaml.cpp | 16 ++---
llvm/unittests/Support/DataExtractorTest.cpp | 8 +--
14 files changed, 126 insertions(+), 111 deletions(-)
diff --git a/bolt/lib/Core/AddressMap.cpp b/bolt/lib/Core/AddressMap.cpp
index efa376d408db8..f061fea494394 100644
--- a/bolt/lib/Core/AddressMap.cpp
+++ b/bolt/lib/Core/AddressMap.cpp
@@ -70,20 +70,20 @@ std::optional<AddressMap> AddressMap::parse(BinaryContext &BC) {
AddressMap Parsed;
- const size_t EntrySize = 2 * BC.AsmInfo->getCodePointerSize();
+ unsigned CodePointerSize = BC.AsmInfo->getCodePointerSize();
+ const size_t EntrySize = 2 * CodePointerSize;
auto parseSection =
[&](BinarySection &Section,
function_ref<void(uint64_t, uint64_t)> InsertCallback) {
StringRef Buffer = Section.getOutputContents();
assert(Buffer.size() % EntrySize == 0 && "Unexpected address map size");
- DataExtractor DE(Buffer, BC.AsmInfo->isLittleEndian(),
- BC.AsmInfo->getCodePointerSize());
+ DataExtractor DE(Buffer, BC.AsmInfo->isLittleEndian());
DataExtractor::Cursor Cursor(0);
while (Cursor && !DE.eof(Cursor)) {
- const uint64_t Input = DE.getAddress(Cursor);
- const uint64_t Output = DE.getAddress(Cursor);
+ const uint64_t Input = DE.getUnsigned(Cursor, CodePointerSize);
+ const uint64_t Output = DE.getUnsigned(Cursor, CodePointerSize);
InsertCallback(Input, Output);
}
diff --git a/bolt/lib/Rewrite/GNUPropertyRewriter.cpp b/bolt/lib/Rewrite/GNUPropertyRewriter.cpp
index 3db5c6ab95d14..f2c1531e7fc7c 100644
--- a/bolt/lib/Rewrite/GNUPropertyRewriter.cpp
+++ b/bolt/lib/Rewrite/GNUPropertyRewriter.cpp
@@ -93,10 +93,9 @@ Error GNUPropertyRewriter::sectionInitializer() {
/// As there is no guarantee that the features are encoded in which element of
/// the array, we have to read all, and OR together the result.
Expected<uint32_t> GNUPropertyRewriter::decodeGNUPropertyNote(StringRef Desc) {
- DataExtractor DE(Desc, BC.AsmInfo->isLittleEndian(),
- BC.AsmInfo->getCodePointerSize());
+ DataExtractor DE(Desc, BC.AsmInfo->isLittleEndian());
DataExtractor::Cursor Cursor(0);
- const uint32_t Align = DE.getAddressSize();
+ const uint32_t Align = BC.AsmInfo->getCodePointerSize();
std::optional<uint32_t> Features = 0;
while (Cursor && !DE.eof(Cursor)) {
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index b4c08cf1f6153..f1c565c972942 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -2788,10 +2788,10 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
BF.getAddress() - BF.getOriginSection()->getAddress(), BF.getMaxSize());
const BinaryContext &BC = BF.getBinaryContext();
- DataExtractor DE(FunctionContents, BC.AsmInfo->isLittleEndian(),
- BC.AsmInfo->getCodePointerSize());
+ unsigned CodePointerSize = BC.AsmInfo->getCodePointerSize();
+ DataExtractor DE(FunctionContents, BC.AsmInfo->isLittleEndian());
uint64_t Offset = 8;
- TargetAddress = DE.getAddress(&Offset);
+ TargetAddress = DE.getUnsigned(&Offset, CodePointerSize);
return true;
}
diff --git a/lldb/source/DataFormatters/FormatterSection.cpp b/lldb/source/DataFormatters/FormatterSection.cpp
index 980e04125cb69..7ddfdc6ec41d0 100644
--- a/lldb/source/DataFormatters/FormatterSection.cpp
+++ b/lldb/source/DataFormatters/FormatterSection.cpp
@@ -66,7 +66,6 @@ static void ForEachFormatterInModule(
auto section_size = section_sp->GetSectionData(lldb_extractor);
llvm::DataExtractor section = lldb_extractor.GetAsLLVM();
bool le = section.isLittleEndian();
- uint8_t addr_size = section.getAddressSize();
llvm::DataExtractor::Cursor cursor(0);
while (cursor && cursor.tell() < section_size) {
if (!skipPadding(section, cursor))
@@ -77,14 +76,13 @@ static void ForEachFormatterInModule(
if (version == 1) {
llvm::DataExtractor record(
section.getData().drop_front(cursor.tell()).take_front(record_size),
- le, addr_size);
+ le);
llvm::DataExtractor::Cursor cursor(0);
uint64_t type_size = record.getULEB128(cursor);
llvm::StringRef type_name = record.getBytes(cursor, type_size);
llvm::Error error = cursor.takeError();
if (!error)
- fn(llvm::DataExtractor(record.getData().drop_front(cursor.tell()), le,
- addr_size),
+ fn(llvm::DataExtractor(record.getData().drop_front(cursor.tell()), le),
type_name);
else
LLDB_LOG_ERROR(GetLog(LLDBLog::DataFormatters), std::move(error),
diff --git a/llvm/include/llvm/DebugInfo/DWARF/LowLevel/DWARFDataExtractorSimple.h b/llvm/include/llvm/DebugInfo/DWARF/LowLevel/DWARFDataExtractorSimple.h
index ffe0b50b036ac..d21c983e8123e 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/LowLevel/DWARFDataExtractorSimple.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/LowLevel/DWARFDataExtractorSimple.h
@@ -22,22 +22,68 @@ namespace llvm {
template <typename Relocator>
class DWARFDataExtractorBase : public DataExtractor {
+ unsigned AddressSize;
public:
DWARFDataExtractorBase(StringRef Data, bool IsLittleEndian,
uint8_t AddressSize)
- : DataExtractor(Data, IsLittleEndian, AddressSize) {}
+ : DataExtractor(Data, IsLittleEndian), AddressSize(AddressSize) {}
+
DWARFDataExtractorBase(ArrayRef<uint8_t> Data, bool IsLittleEndian,
uint8_t AddressSize)
: DataExtractor(
StringRef(reinterpret_cast<const char *>(Data.data()), Data.size()),
- IsLittleEndian, AddressSize) {}
+ IsLittleEndian),
+ AddressSize(AddressSize) {}
/// Truncating constructor
DWARFDataExtractorBase(const DWARFDataExtractorBase &Other, size_t Length)
: DataExtractor(Other.getData().substr(0, Length), Other.isLittleEndian(),
Other.getAddressSize()) {}
+ /// Get the address size for this extractor.
+ uint8_t getAddressSize() const { return AddressSize; }
+
+ /// Set the address size for this extractor.
+ void setAddressSize(uint8_t Size) { AddressSize = Size; }
+
+ //------------------------------------------------------------------
+ /// Extract an pointer from \a *offset_ptr.
+ ///
+ /// Extract a single pointer from the data and update the offset
+ /// pointed to by \a offset_ptr. The size of the extracted pointer
+ /// is \a getAddressSize(), so the address size has to be
+ /// set correctly prior to extracting any pointer values.
+ ///
+ /// @param[in,out] offset_ptr
+ /// A pointer to an offset within the data that will be advanced
+ /// by the appropriate number of bytes if the value is extracted
+ /// correctly. If the offset is out of bounds or there are not
+ /// enough bytes to extract this value, the offset will be left
+ /// unmodified.
+ ///
+ /// @return
+ /// The extracted pointer value as a 64 integer.
+ uint64_t getAddress(uint64_t *offset_ptr) const {
+ return getUnsigned(offset_ptr, AddressSize);
+ }
+
+ /// Extract a pointer-sized unsigned integer from the location given by the
+ /// cursor. In case of an extraction error, or if the cursor is already in
+ /// an error state, zero is returned.
+ uint64_t getAddress(Cursor &C) const { return getUnsigned(C, AddressSize); }
+
+ /// Test the availability of enough bytes of data for a pointer from
+ /// \a offset. The size of a pointer is \a getAddressSize().
+ ///
+ /// @return
+ /// \b true if \a offset is a valid offset and there are enough
+ /// bytes for a pointer available at that offset, \b false
+ /// otherwise.
+ bool isValidOffsetForAddress(uint64_t offset) const {
+ return isValidOffsetForDataOfSize(offset, AddressSize);
+ }
+
/// Extracts a value and returns it as adjusted by the Relocator
uint64_t getRelocatedValue(uint32_t Size, uint64_t *Off,
uint64_t *SectionIndex = nullptr,
diff --git a/llvm/include/llvm/Support/DataExtractor.h b/llvm/include/llvm/Support/DataExtractor.h
index f1710b918ce78..807d9b3f2950e 100644
--- a/llvm/include/llvm/Support/DataExtractor.h
+++ b/llvm/include/llvm/Support/DataExtractor.h
@@ -38,7 +38,7 @@ inline uint24_t getSwappedBytes(uint24_t C) {
class DataExtractor {
StringRef Data;
uint8_t IsLittleEndian;
- uint8_t AddressSize;
+
public:
/// A class representing a position in a DataExtractor, as well as any error
/// encountered during extraction. It enables one to extract a sequence of
@@ -80,22 +80,28 @@ class DataExtractor {
/// This constructor allows us to use data that is owned by the
/// caller. The data must stay around as long as this object is
/// valid.
- DataExtractor(StringRef Data, bool IsLittleEndian, uint8_t AddressSize)
- : Data(Data), IsLittleEndian(IsLittleEndian), AddressSize(AddressSize) {}
- DataExtractor(ArrayRef<uint8_t> Data, bool IsLittleEndian,
- uint8_t AddressSize)
+ DataExtractor(StringRef Data, bool IsLittleEndian)
+ : Data(Data), IsLittleEndian(IsLittleEndian) {}
+
+ DataExtractor(ArrayRef<uint8_t> Data, bool IsLittleEndian)
: Data(StringRef(reinterpret_cast<const char *>(Data.data()),
Data.size())),
- IsLittleEndian(IsLittleEndian), AddressSize(AddressSize) {}
+ IsLittleEndian(IsLittleEndian) {}
+
+ // TODO: Deprecate.
+ DataExtractor(StringRef Data, bool IsLittleEndian, uint8_t)
+ : Data(Data), IsLittleEndian(IsLittleEndian) {}
+
+ // TODO: Deprecate.
+ DataExtractor(ArrayRef<uint8_t> Data, bool IsLittleEndian, uint8_t)
+ : Data(StringRef(reinterpret_cast<const char *>(Data.data()),
+ Data.size())),
+ IsLittleEndian(IsLittleEndian) {}
/// Get the data pointed to by this extractor.
StringRef getData() const { return Data; }
/// Get the endianness for this extractor.
bool isLittleEndian() const { return IsLittleEndian; }
- /// Get the address size for this extractor.
- uint8_t getAddressSize() const { return AddressSize; }
- /// Set the address size for this extractor.
- void setAddressSize(uint8_t Size) { AddressSize = Size; }
/// Extract a C string from \a *offset_ptr.
///
@@ -304,32 +310,6 @@ class DataExtractor {
/// or zero on failure.
LLVM_ABI int64_t getSigned(uint64_t *offset_ptr, uint32_t size) const;
- //------------------------------------------------------------------
- /// Extract an pointer from \a *offset_ptr.
- ///
- /// Extract a single pointer from the data and update the offset
- /// pointed to by \a offset_ptr. The size of the extracted pointer
- /// is \a getAddressSize(), so the address size has to be
- /// set correctly prior to extracting any pointer values.
- ///
- /// @param[in,out] offset_ptr
- /// A pointer to an offset within the data that will be advanced
- /// by the appropriate number of bytes if the value is extracted
- /// correctly. If the offset is out of bounds or there are not
- /// enough bytes to extract this value, the offset will be left
- /// unmodified.
- ///
- /// @return
- /// The extracted pointer value as a 64 integer.
- uint64_t getAddress(uint64_t *offset_ptr) const {
- return getUnsigned(offset_ptr, AddressSize);
- }
-
- /// Extract a pointer-sized unsigned integer from the location given by the
- /// cursor. In case of an extraction error, or if the cursor is already in
- /// an error state, zero is returned.
- uint64_t getAddress(Cursor &C) const { return getUnsigned(C, AddressSize); }
-
/// Extract a uint8_t value from \a *offset_ptr.
///
/// Extract a single uint8_t from the binary data at the offset
@@ -725,17 +705,6 @@ class DataExtractor {
return offset + length >= offset && isValidOffset(offset + length - 1);
}
- /// Test the availability of enough bytes of data for a pointer from
- /// \a offset. The size of a pointer is \a getAddressSize().
- ///
- /// @return
- /// \b true if \a offset is a valid offset and there are enough
- /// bytes for a pointer available at that offset, \b false
- /// otherwise.
- bool isValidOffsetForAddress(uint64_t offset) const {
- return isValidOffsetForDataOfSize(offset, AddressSize);
- }
-
/// Return the number of bytes in the underlying buffer.
size_t size() const { return Data.size(); }
diff --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
index b6dcaeb323f59..2eb716ef383d2 100644
--- a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
@@ -69,8 +69,7 @@ llvm::Expected<FunctionInfo> FunctionInfo::decode(DataExtractor &Data,
"0x%8.8" PRIx64 ": missing FunctionInfo data for InfoType %u",
Offset, IT);
DataExtractor InfoData(Data.getData().substr(Offset, InfoLength),
- Data.isLittleEndian(),
- Data.getAddressSize());
+ Data.isLittleEndian());
switch (IT) {
case InfoType::EndOfList:
Done = true;
@@ -275,8 +274,7 @@ FunctionInfo::lookup(DataExtractor &Data, const GsymReader &GR,
if (InfoLength != InfoBytes.size())
return createStringError(std::errc::io_error,
"FunctionInfo data is truncated");
- DataExtractor InfoData(InfoBytes, Data.isLittleEndian(),
- Data.getAddressSize());
+ DataExtractor InfoData(InfoBytes, Data.isLittleEndian());
switch (IT) {
case InfoType::EndOfList:
Done = true;
diff --git a/llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp b/llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp
index d2c28f38799d3..94c8780d57985 100644
--- a/llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp
@@ -83,7 +83,7 @@ MergedFunctionsInfo::getFuncsDataExtractors(DataExtractor &Data) {
// Extract the function data.
Results.emplace_back(Data.getData().substr(Offset, FnSize),
- Data.isLittleEndian(), Data.getAddressSize());
+ Data.isLittleEndian());
Offset += FnSize;
}
diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
index 29fd4d9fda7ad..c916371741abb 100644
--- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
@@ -47,8 +47,7 @@ SymbolizableObjectFile::create(const object::ObjectFile *Obj,
Expected<StringRef> E = Section->getContents();
if (!E)
return E.takeError();
- OpdExtractor.reset(new DataExtractor(*E, Obj->isLittleEndian(),
- Obj->getBytesInAddress()));
+ OpdExtractor.reset(new DataExtractor(*E, Obj->isLittleEndian()));
OpdAddress = Section->getAddress();
break;
}
@@ -205,8 +204,9 @@ Error SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol,
// For the purposes of symbolization, pretend the symbol's address is that
// of the function's code, not the descriptor.
uint64_t OpdOffset = SymbolAddress - OpdAddress;
- if (OpdExtractor->isValidOffsetForAddress(OpdOffset))
- SymbolAddress = OpdExtractor->getAddress(&OpdOffset);
+ unsigned AddressSize = Obj.getBytesInAddress();
+ if (OpdExtractor->isValidOffsetForDataOfSize(OpdOffset, AddressSize))
+ SymbolAddress = OpdExtractor->getUnsigned(&OpdOffset, AddressSize);
}
// Mach-O symbol table names have leading underscore, skip it.
if (Module->isMachO())
diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp
index 660331d5da96d..0780ca875f621 100644
--- a/llvm/lib/Object/ELF.cpp
+++ b/llvm/lib/Object/ELF.cpp
@@ -819,8 +819,8 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
Content = DecompressedContentRef;
}
- DataExtractor Data(Content, EF.isLE(),
- sizeof(typename ELFFile<ELFT>::uintX_t));
+ constexpr unsigned AddressSize = sizeof(typename ELFFile<ELFT>::uintX_t);
+ DataExtractor Data(Content, EF.isLE());
std::vector<BBAddrMap> FunctionEntries;
DataExtractor::Cursor Cur(0);
@@ -830,7 +830,7 @@ decodeBBAddrMapImpl(const ELFFile<ELFT> &EF,
// Helper lambda to extract the (possibly relocatable) address stored at Cur.
auto ExtractAddress = [&]() -> Expected<uint64_t> {
uint64_t RelocationOffsetInSection = Cur.tell();
- uint64_t Address = Data.getAddress(Cur);
+ uint64_t Address = Data.getUnsigned(Cur, AddressSize);
if (!Cur)
return Cur.takeError();
if (!IsRelocatable)
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index bcb580119fb85..f817ef5b29d74 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -6068,7 +6068,8 @@ struct CoreNote {
std::vector<CoreFileMapping> Mappings;
};
-static Expected<CoreNote> readCoreNote(DataExtractor Desc) {
+static Expected<CoreNote> readCoreNote(DataExtractor Desc,
+ unsigned AddressSize) {
// Expected format of the NT_FILE note description:
// 1. # of file mappings (call it N)
// 2. Page size
@@ -6077,9 +6078,9 @@ static Expected<CoreNote> readCoreNote(DataExtractor Desc) {
// Each field is an Elf_Addr, except for filenames which are char* strings.
CoreNote Ret;
- const int Bytes = Desc.getAddressSize();
+ const int Bytes = AddressSize;
- if (!Desc.isValidOffsetForAddress(2))
+ if (!Desc.isValidOffsetForDataOfSize(2, AddressSize))
return createError("the note of size 0x" + Twine::utohexstr(Desc.size()) +
" is too short, expected at least 0x" +
Twine::utohexstr(Bytes * 2));
@@ -6087,10 +6088,10 @@ static Expected<CoreNote> readCoreNote(DataExtractor Desc) {
return createError("the note is not NUL terminated");
uint64_t DescOffset = 0;
- uint64_t FileCount = Desc.getAddress(&DescOffset);
- Ret.PageSize = Desc.getAddress(&DescOffset);
+ uint64_t FileCount = Desc.getUnsigned(&DescOffset, AddressSize);
+ Ret.PageSize = Desc.getUnsigned(&DescOffset, AddressSize);
- if (!Desc.isValidOffsetForAddress(3 * FileCount * Bytes))
+ if (!Desc.isValidOffsetForDataOfSize(3 * FileCount * Bytes, AddressSize))
return createError("unable to read file mappings (found " +
Twine(FileCount) + "): the note of size 0x" +
Twine::utohexstr(Desc.size()) + " is too short");
@@ -6098,7 +6099,7 @@ static Expected<CoreNote> readCoreNote(DataExtractor Desc) {
uint64_t FilenamesOffset = 0;
DataExtractor Filenames(
Desc.getData().drop_front(DescOffset + 3 * FileCount * Bytes),
- Desc.isLittleEndian(), Desc.getAddressSize());
+ Desc.isLittleEndian());
Ret.Mappings.resize(FileCount);
size_t I = 0;
@@ -6109,9 +6110,9 @@ static Expected<CoreNote> readCoreNote(DataExtractor Desc) {
"unable to read the file name for the mapping with index " +
Twine(I) + ": the note of size 0x" + Twine::utohexstr(Desc.size()) +
" is truncated");
- Mapping.Start = Desc.getAddress(&DescOffset);
- Mapping.End = Desc.getAddress(&DescOffset);
- Mapping.Offset = Desc.getAddress(&DescOffset);
+ Mapping.Start = Desc.getUnsigned(&DescOffset, AddressSize);
+ Mapping.End = Desc.getUnsigned(&DescOffset, AddressSize);
+ Mapping.Offset = Desc.getUnsigned(&DescOffset, AddressSize);
Mapping.Filename = Filenames.getCStrRef(&FilenamesOffset);
}
@@ -6483,10 +6484,10 @@ template <class ELFT> void GNUELFDumper<ELFT>::printNotes() {
return Error::success();
} else if (Name == "CORE") {
if (Type == ELF::NT_FILE) {
- DataExtractor DescExtractor(
- Descriptor, ELFT::Endianness == llvm::endianness::little,
- sizeof(Elf_Addr));
- if (Expected<CoreNote> NoteOrErr = readCoreNote(DescExtractor)) {
+ DataExtractor DescExtractor(Descriptor, ELFT::Endianness ==
+ llvm::endianness::little);
+ if (Expected<CoreNote> NoteOrErr =
+ readCoreNote(DescExtractor, sizeof(Elf_Addr))) {
printCoreNote<ELFT>(OS, *NoteOrErr);
return Error::success();
} else {
@@ -7169,7 +7170,8 @@ void ELFDumper<ELFT>::printStackSize(const Relocation<ELFT> &R,
}
uint64_t SymValue = Resolver(R.Type, Offset, RelocSymValue,
- Data.getAddress(&Offset), R.Addend.value_or(0));
+ Data.getUnsigned(&Offset, sizeof(Elf_Addr)),
+ R.Addend.value_or(0));
this->printFunctionStackSize(SymValue, FunctionSec, StackSizeSec, Data,
&Offset);
}
@@ -7185,7 +7187,7 @@ void ELFDumper<ELFT>::printNonRelocatableStackSizes(
PrintHeader();
ArrayRef<uint8_t> Contents =
unwrapOrError(this->FileName, Obj.getSectionContents(Sec));
- DataExtractor Data(Contents, Obj.isLE(), sizeof(Elf_Addr));
+ DataExtractor Data(Contents, Obj.isLE());
uint64_t Offset = 0;
while (Offset < Contents.size()) {
// The function address is followed by a ULEB representing the stack
@@ -7196,7 +7198,7 @@ void ELFDumper<ELFT>::printNonRelocatableStackSizes(
" ended while trying to extract a stack size entry");
break;
}
- uint64_t SymValue = Data.getAddress(&Offset);
+ uint64_t SymValue = Data.getUnsigned(&Offset, sizeof(Elf_Addr));
if (!printFunctionStackSize(SymValue, /*FunctionSec=*/std::nullopt, Sec,
Data, &Offset))
break;
@@ -7262,7 +7264,7 @@ void ELFDumper<ELFT>::printRelocatableStackSizes(
std::tie(IsSupportedFn, Resolver) = getRelocationResolver(this->ObjF);
ArrayRef<uint8_t> Contents =
unwrapOrError(this->FileName, Obj.getSectionContents(*StackSizesELFSec));
- DataExtractor Data(Contents, Obj.isLE(), sizeof(Elf_Addr));
+ DataExtractor Data(Contents, Obj.isLE());
forEachRelocationDo(
*RelocSec, [&](const Relocation<ELFT> &R, unsigned Ndx,
@@ -8660,10 +8662,10 @@ template <class ELFT> void LLVMELFDumper<ELFT>::printNotes() {
return Error::success();
} else if (Name == "CORE") {
if (Type == ELF::NT_FILE) {
- DataExtractor DescExtractor(
- Descriptor, ELFT::Endianness == llvm::endianness::little,
- sizeof(Elf_Addr));
- if (Expected<CoreNote> N = readCoreNote(DescExtractor)) {
+ DataExtractor DescExtractor(Descriptor, ELFT::Endianness ==
+ llvm::endianness::little);
+ if (Expected<CoreNote> N =
+ readCoreNote(DescExtractor, sizeof(Elf_Addr))) {
printCoreNoteLLVMStyle(*N, W);
return Error::success();
} else {
diff --git a/llvm/tools/obj2yaml/dwarf2yaml.cpp b/llvm/tools/obj2yaml/dwarf2yaml.cpp
index 3c41fa2323921..60cc1037f9b49 100644
--- a/llvm/tools/obj2yaml/dwarf2yaml.cpp
+++ b/llvm/tools/obj2yaml/dwarf2yaml.cpp
@@ -376,8 +376,9 @@ void dumpDebugLines(DWARFContext &DCtx, DWARFYAML::Data &Y) {
if (auto StmtOffset =
dwarf::toSectionOffset(CUDIE.find(dwarf::DW_AT_stmt_list))) {
DWARFYAML::LineTable DebugLines;
+ unsigned AddressSize = CU->getAddressByteSize();
DataExtractor LineData(DCtx.getDWARFObj().getLineSection().Data,
- DCtx.isLittleEndian(), CU->getAddressByteSize());
+ DCtx.isLittleEndian());
uint64_t Offset = *StmtOffset;
uint64_t LengthOrDWARF64Prefix = LineData.getU32(&Offset);
if (LengthOrDWARF64Prefix == dwarf::DW_LENGTH_DWARF64) {
@@ -438,7 +439,7 @@ void dumpDebugLines(DWARFContext &DCtx, DWARFYAML::Data &Y) {
switch (NewOp.SubOpcode) {
case dwarf::DW_LNE_set_address:
case dwarf::DW_LNE_set_discriminator:
- NewOp.Data = LineData.getAddress(&Offset);
+ NewOp.Data = LineData.getUnsigned(&Offset, AddressSize);
break;
case dwarf::DW_LNE_define_file:
dumpFileEntry(LineData, Offset, NewOp.FileEntry);
diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp
index 83d828dfb81c8..a69fd1b2d0695 100644
--- a/llvm/tools/obj2yaml/elf2yaml.cpp
+++ b/llvm/tools/obj2yaml/elf2yaml.cpp
@@ -852,12 +852,13 @@ ELFDumper<ELFT>::dumpStackSizesSection(const Elf_Shdr *Shdr) {
return ContentOrErr.takeError();
ArrayRef<uint8_t> Content = *ContentOrErr;
- DataExtractor Data(Content, Obj.isLE(), ELFT::Is64Bits ? 8 : 4);
+ unsigned AddressSize = ELFT::Is64Bits ? 8 : 4;
+ DataExtractor Data(Content, Obj.isLE());
std::vector<ELFYAML::StackSizeEntry> Entries;
DataExtractor::Cursor Cur(0);
while (Cur && Cur.tell() < Content.size()) {
- uint64_t Address = Data.getAddress(Cur);
+ uint64_t Address = Data.getUnsigned(Cur, AddressSize);
uint64_t Size = Data.getULEB128(Cur);
Entries.push_back({Address, Size});
}
@@ -888,7 +889,8 @@ ELFDumper<ELFT>::dumpBBAddrMapSection(const Elf_Shdr *Shdr) {
if (Content.empty())
return S.release();
- DataExtractor Data(Content, Obj.isLE(), ELFT::Is64Bits ? 8 : 4);
+ unsigned AddressSize = ELFT::Is64Bits ? 8 : 4;
+ DataExtractor Data(Content, Obj.isLE());
std::vector<ELFYAML::BBAddrMapEntry> Entries;
bool HasAnyPGOAnalysisMapEntry = false;
@@ -916,14 +918,14 @@ ELFDumper<ELFT>::dumpBBAddrMapSection(const Elf_Shdr *Shdr) {
if (FeatureOrErr->MultiBBRange) {
NumBBRanges = Data.getULEB128(Cur);
} else {
- Address = Data.getAddress(Cur);
+ Address = Data.getUnsigned(Cur, AddressSize);
NumBlocks = Data.getULEB128(Cur);
}
std::vector<ELFYAML::BBAddrMapEntry::BBRangeEntry> BBRanges;
uint64_t BaseAddress = 0;
for (uint64_t BBRangeN = 0; Cur && BBRangeN != NumBBRanges; ++BBRangeN) {
if (FeatureOrErr->MultiBBRange) {
- BaseAddress = Data.getAddress(Cur);
+ BaseAddress = Data.getUnsigned(Cur, AddressSize);
NumBlocks = Data.getULEB128(Cur);
} else {
BaseAddress = Address;
@@ -1391,7 +1393,7 @@ ELFDumper<ELFT>::dumpGnuHashSection(const Elf_Shdr *Shdr) {
unsigned AddrSize = ELFT::Is64Bits ? 8 : 4;
ArrayRef<uint8_t> Content = *ContentOrErr;
- DataExtractor Data(Content, Obj.isLE(), AddrSize);
+ DataExtractor Data(Content, Obj.isLE());
ELFYAML::GnuHashHeader Header;
DataExtractor::Cursor Cur(0);
@@ -1414,7 +1416,7 @@ ELFDumper<ELFT>::dumpGnuHashSection(const Elf_Shdr *Shdr) {
S->BloomFilter.emplace(MaskWords);
for (llvm::yaml::Hex64 &Val : *S->BloomFilter)
- Val = Data.getAddress(Cur);
+ Val = Data.getUnsigned(Cur, AddrSize);
S->HashBuckets.emplace(NBuckets);
for (llvm::yaml::Hex32 &Val : *S->HashBuckets)
diff --git a/llvm/unittests/Support/DataExtractorTest.cpp b/llvm/unittests/Support/DataExtractorTest.cpp
index 029af6b1903c5..e1add2c0220bc 100644
--- a/llvm/unittests/Support/DataExtractorTest.cpp
+++ b/llvm/unittests/Support/DataExtractorTest.cpp
@@ -25,7 +25,7 @@ TEST(DataExtractorTest, OffsetOverflow) {
}
TEST(DataExtractorTest, UnsignedNumbers) {
- DataExtractor DE(StringRef(numberData, sizeof(numberData)-1), false, 8);
+ DataExtractor DE(StringRef(numberData, sizeof(numberData) - 1), false);
uint64_t offset = 0;
EXPECT_EQ(0x80U, DE.getU8(&offset));
@@ -40,7 +40,7 @@ TEST(DataExtractorTest, UnsignedNumbers) {
EXPECT_EQ(0x8090FFFF80000000ULL, DE.getU64(&offset));
EXPECT_EQ(8U, offset);
offset = 0;
- EXPECT_EQ(0x8090FFFF80000000ULL, DE.getAddress(&offset));
+ EXPECT_EQ(0x8090FFFF80000000ULL, DE.getUnsigned(&offset, 8));
EXPECT_EQ(8U, offset);
offset = 0;
@@ -52,7 +52,7 @@ TEST(DataExtractorTest, UnsignedNumbers) {
offset = 0;
// Now for little endian.
- DE = DataExtractor(StringRef(numberData, sizeof(numberData)-1), true, 4);
+ DE = DataExtractor(StringRef(numberData, sizeof(numberData) - 1), true);
EXPECT_EQ(0x9080U, DE.getU16(&offset));
EXPECT_EQ(2U, offset);
offset = 0;
@@ -62,7 +62,7 @@ TEST(DataExtractorTest, UnsignedNumbers) {
EXPECT_EQ(0x80FFFF9080ULL, DE.getU64(&offset));
EXPECT_EQ(8U, offset);
offset = 0;
- EXPECT_EQ(0xFFFF9080U, DE.getAddress(&offset));
+ EXPECT_EQ(0xFFFF9080U, DE.getUnsigned(&offset, 4));
EXPECT_EQ(4U, offset);
offset = 0;
>From df95791a72ea200daf6a2c0c9dcc5de425c56520 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Sun, 5 Apr 2026 14:45:29 +0300
Subject: [PATCH 2/3] Use `unsigned` for `AddressSize`
`uint8_t` doesn't save space and word-sized integer type is potentially
faster.
---
.../DebugInfo/DWARF/LowLevel/DWARFDataExtractorSimple.h | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/DWARF/LowLevel/DWARFDataExtractorSimple.h b/llvm/include/llvm/DebugInfo/DWARF/LowLevel/DWARFDataExtractorSimple.h
index d21c983e8123e..7a76ead6a93cf 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/LowLevel/DWARFDataExtractorSimple.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/LowLevel/DWARFDataExtractorSimple.h
@@ -26,11 +26,11 @@ class DWARFDataExtractorBase : public DataExtractor {
public:
DWARFDataExtractorBase(StringRef Data, bool IsLittleEndian,
- uint8_t AddressSize)
+ unsigned AddressSize)
: DataExtractor(Data, IsLittleEndian), AddressSize(AddressSize) {}
DWARFDataExtractorBase(ArrayRef<uint8_t> Data, bool IsLittleEndian,
- uint8_t AddressSize)
+ unsigned AddressSize)
: DataExtractor(
StringRef(reinterpret_cast<const char *>(Data.data()), Data.size()),
IsLittleEndian),
@@ -42,10 +42,10 @@ class DWARFDataExtractorBase : public DataExtractor {
Other.getAddressSize()) {}
/// Get the address size for this extractor.
- uint8_t getAddressSize() const { return AddressSize; }
+ unsigned getAddressSize() const { return AddressSize; }
/// Set the address size for this extractor.
- void setAddressSize(uint8_t Size) { AddressSize = Size; }
+ void setAddressSize(unsigned Size) { AddressSize = Size; }
//------------------------------------------------------------------
/// Extract an pointer from \a *offset_ptr.
>From 920b908115d279cb49162c7186e7b1df3d9e70a5 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88 at gmail.com>
Date: Sun, 5 Apr 2026 14:55:50 +0300
Subject: [PATCH 3/3] Use delegating constructors
---
llvm/include/llvm/Support/DataExtractor.h | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/llvm/include/llvm/Support/DataExtractor.h b/llvm/include/llvm/Support/DataExtractor.h
index 807d9b3f2950e..dd738abd25473 100644
--- a/llvm/include/llvm/Support/DataExtractor.h
+++ b/llvm/include/llvm/Support/DataExtractor.h
@@ -90,13 +90,11 @@ class DataExtractor {
// TODO: Deprecate.
DataExtractor(StringRef Data, bool IsLittleEndian, uint8_t)
- : Data(Data), IsLittleEndian(IsLittleEndian) {}
+ : DataExtractor(Data, IsLittleEndian) {}
// TODO: Deprecate.
DataExtractor(ArrayRef<uint8_t> Data, bool IsLittleEndian, uint8_t)
- : Data(StringRef(reinterpret_cast<const char *>(Data.data()),
- Data.size())),
- IsLittleEndian(IsLittleEndian) {}
+ : DataExtractor(Data, IsLittleEndian) {}
/// Get the data pointed to by this extractor.
StringRef getData() const { return Data; }
More information about the llvm-commits
mailing list