[llvm] [llvm-objcopy][ELF] Add an option to remove notes (PR #118739)
Igor Kudrin via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 4 20:23:36 PST 2024
https://github.com/igorkudrin created https://github.com/llvm/llvm-project/pull/118739
This adds an option `--remove-note` to selectively delete notes in an ELF file. For now, it is expected to be useful for editing core dump files; in particular, it searches for the notes in `PT_NOTE` segments and does not handle nested segments. The implementation can be extended later as needed.
>From 24308b26df6af7c9e027db9ef637a8da75fdf93f Mon Sep 17 00:00:00 2001
From: Igor Kudrin <ikudrin at accesssoftek.com>
Date: Wed, 27 Nov 2024 22:31:05 -0800
Subject: [PATCH] [llvm-objcopy][ELF] Add an option to remove notes
This adds an option `--remove-note` to selectively delete notes in an
ELF file. For now, it is expected to be useful for editing core dump
files; in particular, it searches for the notes in `PT_NOTE` segments
and does not handle nested segments. The implementation can be extended
later as needed.
---
llvm/include/llvm/ObjCopy/ELF/ELFConfig.h | 9 ++
llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp | 96 ++++++++++++++++++
llvm/lib/ObjCopy/ELF/ELFObject.cpp | 21 ++++
llvm/lib/ObjCopy/ELF/ELFObject.h | 5 +
.../tools/llvm-objcopy/ELF/remove-note.test | 98 +++++++++++++++++++
llvm/tools/llvm-objcopy/ObjcopyOptions.cpp | 39 ++++++++
llvm/tools/llvm-objcopy/ObjcopyOpts.td | 4 +
7 files changed, 272 insertions(+)
create mode 100644 llvm/test/tools/llvm-objcopy/ELF/remove-note.test
diff --git a/llvm/include/llvm/ObjCopy/ELF/ELFConfig.h b/llvm/include/llvm/ObjCopy/ELF/ELFConfig.h
index 59960b65307430..01a8762cfb9c37 100644
--- a/llvm/include/llvm/ObjCopy/ELF/ELFConfig.h
+++ b/llvm/include/llvm/ObjCopy/ELF/ELFConfig.h
@@ -15,6 +15,12 @@
namespace llvm {
namespace objcopy {
+// Note to remove info specified by --remove-note option.
+struct RemoveNoteInfo {
+ StringRef Name;
+ uint32_t TypeId;
+};
+
// ELF specific configuration for copying/stripping a single file.
struct ELFConfig {
uint8_t NewSymbolVisibility = (uint8_t)ELF::STV_DEFAULT;
@@ -31,6 +37,9 @@ struct ELFConfig {
bool KeepFileSymbols = false;
bool LocalizeHidden = false;
bool VerifyNoteSections = true;
+
+ // Notes specified by --remove-note option.
+ SmallVector<RemoveNoteInfo, 0> NotesToRemove;
};
} // namespace objcopy
diff --git a/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp
index 4793651f1d4e0b..1df64b0b7ce886 100644
--- a/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp
+++ b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp
@@ -609,6 +609,97 @@ static void addSymbol(Object &Obj, const NewSymbolInfo &SymInfo,
Sec ? (uint16_t)SYMBOL_SIMPLE_INDEX : (uint16_t)SHN_ABS, 0);
}
+template <class ELFT>
+static Error removeNoteImpl(Object &Obj,
+ ArrayRef<RemoveNoteInfo> NotesToRemove) {
+ LLVM_ELF_IMPORT_TYPES_ELFT(ELFT);
+ for (Segment &Seg : Obj.segments()) {
+ // TODO: Support nested segments
+ if (Seg.Type != PT_NOTE || Seg.ParentSegment)
+ continue;
+
+ // Find chunks of the segment data to remove
+ struct DeletedRange {
+ uint64_t OldFrom;
+ uint64_t OldTo;
+ uint64_t NewOffset;
+ };
+ std::vector<DeletedRange> DataToRemove;
+ ArrayRef<uint8_t> OldData = Seg.getContents();
+ size_t Align = std::max<size_t>(4, Seg.Align);
+ uint64_t Offset = 0;
+ while (Offset + sizeof(Elf_Nhdr) <= OldData.size()) {
+ auto Nhdr = reinterpret_cast<const Elf_Nhdr *>(OldData.data() + Offset);
+ size_t FullSize = Nhdr->getSize(Align);
+ if (Offset + FullSize > OldData.size())
+ break;
+ Elf_Note Note(*Nhdr);
+ if (llvm::any_of(NotesToRemove, [&](const RemoveNoteInfo &NoteInfo) {
+ return NoteInfo.TypeId == Note.getType() &&
+ (NoteInfo.Name.empty() || NoteInfo.Name == Note.getName());
+ }))
+ DataToRemove.push_back({Offset, Offset + FullSize, 0});
+ Offset += FullSize;
+ }
+ if (DataToRemove.empty())
+ continue;
+
+ // Prepare the new segment data
+ std::vector<uint8_t> NewData;
+ NewData.reserve(OldData.size());
+ Offset = 0;
+ for (auto &RemRange : DataToRemove) {
+ if (Offset < RemRange.OldFrom) {
+ auto Slice = OldData.slice(Offset, RemRange.OldFrom - Offset);
+ NewData.insert(NewData.end(), Slice.begin(), Slice.end());
+ }
+ RemRange.NewOffset = NewData.size();
+ Offset = RemRange.OldTo;
+ }
+ if (Offset < OldData.size()) {
+ auto Slice = OldData.slice(Offset);
+ NewData.insert(NewData.end(), Slice.begin(), Slice.end());
+ }
+
+ auto CalculateNewOffset = [&](uint64_t SecOffset) {
+ uint64_t Offset = SecOffset - Seg.Offset;
+ auto It =
+ llvm::upper_bound(DataToRemove, Offset,
+ [](const uint64_t &Off, const DeletedRange &Range) {
+ return Off < Range.OldFrom;
+ });
+ if (It != DataToRemove.begin()) {
+ --It;
+ Offset = (Offset > It->OldTo) ? (Offset - It->OldTo + It->NewOffset)
+ : It->NewOffset;
+ }
+ return Offset + Seg.Offset;
+ };
+
+ // Remap the segment's sections
+ DenseMap<const SectionBase *, std::pair<uint64_t, uint64_t>> Mapping;
+ for (const SectionBase *Sec : Seg.Sections) {
+ uint64_t NewOffset = CalculateNewOffset(Sec->Offset);
+ uint64_t NewSize =
+ CalculateNewOffset(Sec->Offset + Sec->Size) - NewOffset;
+ Mapping.try_emplace(Sec, NewOffset, NewSize);
+ }
+
+ Obj.updateSegmentData(Seg, std::move(NewData), Mapping);
+ }
+ return Error::success();
+}
+
+static Error removeNote(Object &Obj, endianness Endianness,
+ ArrayRef<RemoveNoteInfo> NotesToRemove) {
+ // Note: notes for both 32-bit and 64-bit ELF files use 4-byte words in the
+ // header, so the parsers are the same.
+ if (Endianness == endianness::little)
+ return removeNoteImpl<ELF64LE>(Obj, NotesToRemove);
+ else
+ return removeNoteImpl<ELF64BE>(Obj, NotesToRemove);
+}
+
static Error
handleUserSection(const NewSectionInfo &NewSection,
function_ref<Error(StringRef, ArrayRef<uint8_t>)> F) {
@@ -799,6 +890,11 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig,
? endianness::little
: endianness::big;
+ if (!ELFConfig.NotesToRemove.empty()) {
+ if (Error Err = removeNote(Obj, E, ELFConfig.NotesToRemove))
+ return Err;
+ }
+
for (const NewSectionInfo &AddedSection : Config.AddSection) {
auto AddSection = [&](StringRef Name, ArrayRef<uint8_t> Data) -> Error {
OwnedDataSection &NewSection =
diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.cpp b/llvm/lib/ObjCopy/ELF/ELFObject.cpp
index 01c2f24629077a..9f460f685706c8 100644
--- a/llvm/lib/ObjCopy/ELF/ELFObject.cpp
+++ b/llvm/lib/ObjCopy/ELF/ELFObject.cpp
@@ -2308,6 +2308,27 @@ Error Object::addNewSymbolTable() {
return Error::success();
}
+void Object::updateSegmentData(
+ Segment &S, std::vector<uint8_t> NewSegmentData,
+ const DenseMap<const SectionBase *, std::pair<uint64_t, uint64_t>>
+ &SectionMapping) {
+ auto It =
+ UpdatedSegments.insert_or_assign(&S, std::move(NewSegmentData)).first;
+ S.Contents = It->second;
+ S.FileSize = S.Contents.size();
+ if (S.MemSize)
+ S.MemSize = S.FileSize;
+ assert(SectionMapping.size() == S.Sections.size());
+ for (const auto &SM : SectionMapping) {
+ assert(SM.first->ParentSegment == &S && S.Sections.count(SM.first));
+ assert(SM.second.first >= S.Offset);
+ assert((SM.second.first + SM.second.second) <= (S.Offset + S.FileSize));
+ SectionBase *MutSec = const_cast<SectionBase *>(SM.first);
+ MutSec->Offset = SM.second.first;
+ MutSec->Size = SM.second.second;
+ }
+}
+
// Orders segments such that if x = y->ParentSegment then y comes before x.
static void orderSegments(std::vector<Segment *> &Segments) {
llvm::stable_sort(Segments, compareSegmentsByOffset);
diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.h b/llvm/lib/ObjCopy/ELF/ELFObject.h
index 6ccf85387131e4..5e16d4c0c1885a 100644
--- a/llvm/lib/ObjCopy/ELF/ELFObject.h
+++ b/llvm/lib/ObjCopy/ELF/ELFObject.h
@@ -1159,6 +1159,7 @@ class Object {
std::vector<SegPtr> Segments;
std::vector<SecPtr> RemovedSections;
DenseMap<SectionBase *, std::vector<uint8_t>> UpdatedSections;
+ DenseMap<Segment *, std::vector<uint8_t>> UpdatedSegments;
static bool sectionIsAlloc(const SectionBase &Sec) {
return Sec.Flags & ELF::SHF_ALLOC;
@@ -1234,6 +1235,10 @@ class Object {
Segments.emplace_back(std::make_unique<Segment>(Data));
return *Segments.back();
}
+ void updateSegmentData(
+ Segment &S, std::vector<uint8_t> NewSegmentData,
+ const DenseMap<const SectionBase *, std::pair<uint64_t, uint64_t>>
+ &SectionMapping);
bool isRelocatable() const {
return (Type != ELF::ET_DYN && Type != ELF::ET_EXEC) || MustBeRelocatable;
}
diff --git a/llvm/test/tools/llvm-objcopy/ELF/remove-note.test b/llvm/test/tools/llvm-objcopy/ELF/remove-note.test
new file mode 100644
index 00000000000000..b24538e26d019e
--- /dev/null
+++ b/llvm/test/tools/llvm-objcopy/ELF/remove-note.test
@@ -0,0 +1,98 @@
+# RUN: not llvm-objcopy --remove-note= - 2>&1 | FileCheck %s --check-prefix=ERR-NOTYPEID
+# RUN: not llvm-objcopy --remove-note=/1 - 2>&1 | FileCheck %s --check-prefix=ERR-EMPTYNAME
+# RUN: not llvm-objcopy --remove-note=CORE/1/2 - 2>&1 | FileCheck %s --check-prefix=ERR-INVNUM1
+# RUN: not llvm-objcopy --remove-note=Notanumber - 2>&1 | FileCheck %s --check-prefix=ERR-INVNUM2
+# RUN: not llvm-objcopy --remove-note=CORE/Notanumber - 2>&1 | FileCheck %s --check-prefix=ERR-INVNUM2
+
+# ERR-NOTYPEID: error: bad format for --remove-note, missing type_id
+# ERR-EMPTYNAME: error: bad format for --remove-note, note name is empty
+# ERR-INVNUM1: error: bad note type_id for --remove-note: '1/2'
+# ERR-INVNUM2: error: bad note type_id for --remove-note: 'Notanumber'
+
+# RUN: yaml2obj -D ALIGN=8 %s -o - \
+# RUN: | llvm-objcopy --remove-note=0x01 --remove-note=DUMMY/0x02 --remove-note=CORE/0x03 - - \
+# RUN: | llvm-readobj --segments --sections --notes - \
+# RUN: | FileCheck %s -D#SIZE=64
+
+# RUN: yaml2obj -D ALIGN=4 %s -o - \
+# RUN: | llvm-objcopy --remove-note=0x01 --remove-note=DUMMY/0x02 --remove-note=CORE/0x03 - - \
+# RUN: | llvm-readobj --segments --sections --notes - \
+# RUN: | FileCheck %s -D#SIZE=48
+
+# CHECK: Sections [
+# CHECK: Section {
+# CHECK: Name: .note
+# CHECK-NEXT: Type: SHT_NOTE
+# CHECK-NEXT: Flags [
+# CHECK-NEXT: ]
+# CHECK-NEXT: Address:
+# CHECK-NEXT: Offset: [[OFFSET:0x.+]]
+# CHECK-NEXT: Size: [[#%d,SIZE]]
+
+# CHECK: ProgramHeaders [
+# CHECK-NEXT: ProgramHeader {
+# CHECK-NEXT: Type: PT_NOTE
+# CHECK-NEXT: Offset: [[OFFSET]]
+# CHECK-NEXT: VirtualAddress: 0x0
+# CHECK-NEXT: PhysicalAddress: 0x0
+# CHECK-NEXT: FileSize: [[#%d,SIZE]]
+# CHECK-NEXT: MemSize: 0
+
+# CHECK: NoteSections [
+# CHECK-NEXT: NoteSection {
+# CHECK-NEXT: Name:
+# CHECK-NEXT: Offset: [[OFFSET]]
+# CHECK-NEXT: Size: 0x[[#%x,SIZE]]
+# CHECK-NEXT: Notes [
+# CHECK-NEXT: {
+# CHECK-NEXT: Owner: CORE
+# CHECK-NEXT: Data size: 0x2
+# CHECK-NEXT: Type: NT_FPREGSET
+# CHECK-NEXT: Description data (
+# CHECK-NEXT: 0000: 0202
+# CHECK-NEXT: )
+# CHECK-NEXT: }
+# CHECK-NEXT: {
+# CHECK-NEXT: Owner: CORE
+# CHECK-NEXT: Data size: 0x2
+# CHECK-NEXT: Type: NT_TASKSTRUCT
+# CHECK-NEXT: Description data (
+# CHECK-NEXT: 0000: 0404
+# CHECK-NEXT: )
+# CHECK-NEXT: }
+# CHECK-NEXT: ]
+# CHECK-NEXT: }
+# CHECK-NEXT: ]
+
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_CORE
+ Machine: EM_X86_64
+ProgramHeaders:
+ - Type: PT_NOTE
+ MemSize: 0
+ FirstSec: .note
+ LastSec: .note
+Sections:
+ - Name: .note
+ Type: SHT_NOTE
+ AddressAlign: [[ALIGN]]
+ Notes:
+ - Name: CORE
+ Type: 0x01
+ Desc: 0101
+ - Name: CORE
+ Type: 0x02
+ Desc: 0202
+ - Name: CORE
+ Type: 0x03
+ Desc: 0303
+ - Name: CORE
+ Type: 0x04
+ Desc: 0404
+ - Name: LINUX
+ Type: 0x01
+ Desc: 0505
+...
diff --git a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp
index 104d802b1e1eeb..5e348d65adca18 100644
--- a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp
+++ b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp
@@ -527,6 +527,37 @@ static Expected<NewSymbolInfo> parseNewSymbolInfo(StringRef FlagValue) {
return SI;
}
+static Expected<RemoveNoteInfo> parseRemoveNoteInfo(StringRef FlagValue) {
+ // Parse value given with --remove-note option. The format is:
+ //
+ // [name/]type_id
+ //
+ // where:
+ // <name> - optional note name. If not given, all notes with the specified
+ // <type_id> are removed.
+ // <type_id> - note type value, can be decimal or hexadecimal number prefixed
+ // with 0x.
+ RemoveNoteInfo NI;
+ if (FlagValue.empty())
+ return createStringError(errc::invalid_argument,
+ "bad format for --remove-note, missing type_id");
+ SmallVector<StringRef, 2> Tokens;
+ FlagValue.split(Tokens, '/', /*MaxSplit=*/1);
+ assert(!Tokens.empty() && Tokens.size() <= 2);
+ if (Tokens.size() == 2) {
+ if (Tokens[0].empty())
+ return createStringError(
+ errc::invalid_argument,
+ "bad format for --remove-note, note name is empty");
+ NI.Name = Tokens[0];
+ }
+ if (Tokens.back().getAsInteger(0, NI.TypeId))
+ return createStringError(errc::invalid_argument,
+ "bad note type_id for --remove-note: '%s'",
+ Tokens.back().str().c_str());
+ return NI;
+}
+
// Parse input option \p ArgValue and load section data. This function
// extracts section name and name of the file keeping section data from
// ArgValue, loads data from the file, and stores section name and data
@@ -1210,6 +1241,14 @@ objcopy::parseObjcopyOptions(ArrayRef<const char *> ArgsArr,
};
}
+ for (auto *Arg : InputArgs.filtered(OBJCOPY_remove_note)) {
+ Expected<RemoveNoteInfo> NoteInfo = parseRemoveNoteInfo(Arg->getValue());
+ if (!NoteInfo)
+ return NoteInfo.takeError();
+
+ ELFConfig.NotesToRemove.push_back(*NoteInfo);
+ }
+
if (Config.DecompressDebugSections &&
Config.CompressionType != DebugCompressionType::None) {
return createStringError(
diff --git a/llvm/tools/llvm-objcopy/ObjcopyOpts.td b/llvm/tools/llvm-objcopy/ObjcopyOpts.td
index 434b5ff92324eb..fbc6a59d9461e7 100644
--- a/llvm/tools/llvm-objcopy/ObjcopyOpts.td
+++ b/llvm/tools/llvm-objcopy/ObjcopyOpts.td
@@ -297,3 +297,7 @@ defm pad_to
"of zero or the value specified by the --gap-fill option. "
"This option is only supported for ELF input and binary output">,
MetaVarName<"address">;
+
+defm remove_note
+ : Eq<"remove-note", "Remove note(s) with <type_id> and optional <name>">,
+ MetaVarName<"[name/]type_id">;
More information about the llvm-commits
mailing list