[llvm] [llvm-objcopy][ELF] Add an option to remove notes (PR #118739)

Igor Kudrin via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 4 20:23:36 PST 2024


https://github.com/igorkudrin created https://github.com/llvm/llvm-project/pull/118739

This adds an option `--remove-note` to selectively delete notes in an ELF file. For now, it is expected to be useful for editing core dump files; in particular, it searches for the notes in `PT_NOTE` segments and does not handle nested segments. The implementation can be extended later as needed.

>From 24308b26df6af7c9e027db9ef637a8da75fdf93f Mon Sep 17 00:00:00 2001
From: Igor Kudrin <ikudrin at accesssoftek.com>
Date: Wed, 27 Nov 2024 22:31:05 -0800
Subject: [PATCH] [llvm-objcopy][ELF] Add an option to remove notes

This adds an option `--remove-note` to selectively delete notes in an
ELF file. For now, it is expected to be useful for editing core dump
files; in particular, it searches for the notes in `PT_NOTE` segments
and does not handle nested segments. The implementation can be extended
later as needed.
---
 llvm/include/llvm/ObjCopy/ELF/ELFConfig.h     |  9 ++
 llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp           | 96 ++++++++++++++++++
 llvm/lib/ObjCopy/ELF/ELFObject.cpp            | 21 ++++
 llvm/lib/ObjCopy/ELF/ELFObject.h              |  5 +
 .../tools/llvm-objcopy/ELF/remove-note.test   | 98 +++++++++++++++++++
 llvm/tools/llvm-objcopy/ObjcopyOptions.cpp    | 39 ++++++++
 llvm/tools/llvm-objcopy/ObjcopyOpts.td        |  4 +
 7 files changed, 272 insertions(+)
 create mode 100644 llvm/test/tools/llvm-objcopy/ELF/remove-note.test

diff --git a/llvm/include/llvm/ObjCopy/ELF/ELFConfig.h b/llvm/include/llvm/ObjCopy/ELF/ELFConfig.h
index 59960b65307430..01a8762cfb9c37 100644
--- a/llvm/include/llvm/ObjCopy/ELF/ELFConfig.h
+++ b/llvm/include/llvm/ObjCopy/ELF/ELFConfig.h
@@ -15,6 +15,12 @@
 namespace llvm {
 namespace objcopy {
 
+// Note to remove info specified by --remove-note option.
+struct RemoveNoteInfo {
+  StringRef Name;
+  uint32_t TypeId;
+};
+
 // ELF specific configuration for copying/stripping a single file.
 struct ELFConfig {
   uint8_t NewSymbolVisibility = (uint8_t)ELF::STV_DEFAULT;
@@ -31,6 +37,9 @@ struct ELFConfig {
   bool KeepFileSymbols = false;
   bool LocalizeHidden = false;
   bool VerifyNoteSections = true;
+
+  // Notes specified by --remove-note option.
+  SmallVector<RemoveNoteInfo, 0> NotesToRemove;
 };
 
 } // namespace objcopy
diff --git a/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp
index 4793651f1d4e0b..1df64b0b7ce886 100644
--- a/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp
+++ b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp
@@ -609,6 +609,97 @@ static void addSymbol(Object &Obj, const NewSymbolInfo &SymInfo,
       Sec ? (uint16_t)SYMBOL_SIMPLE_INDEX : (uint16_t)SHN_ABS, 0);
 }
 
+template <class ELFT>
+static Error removeNoteImpl(Object &Obj,
+                            ArrayRef<RemoveNoteInfo> NotesToRemove) {
+  LLVM_ELF_IMPORT_TYPES_ELFT(ELFT);
+  for (Segment &Seg : Obj.segments()) {
+    // TODO: Support nested segments
+    if (Seg.Type != PT_NOTE || Seg.ParentSegment)
+      continue;
+
+    // Find chunks of the segment data to remove
+    struct DeletedRange {
+      uint64_t OldFrom;
+      uint64_t OldTo;
+      uint64_t NewOffset;
+    };
+    std::vector<DeletedRange> DataToRemove;
+    ArrayRef<uint8_t> OldData = Seg.getContents();
+    size_t Align = std::max<size_t>(4, Seg.Align);
+    uint64_t Offset = 0;
+    while (Offset + sizeof(Elf_Nhdr) <= OldData.size()) {
+      auto Nhdr = reinterpret_cast<const Elf_Nhdr *>(OldData.data() + Offset);
+      size_t FullSize = Nhdr->getSize(Align);
+      if (Offset + FullSize > OldData.size())
+        break;
+      Elf_Note Note(*Nhdr);
+      if (llvm::any_of(NotesToRemove, [&](const RemoveNoteInfo &NoteInfo) {
+            return NoteInfo.TypeId == Note.getType() &&
+                   (NoteInfo.Name.empty() || NoteInfo.Name == Note.getName());
+          }))
+        DataToRemove.push_back({Offset, Offset + FullSize, 0});
+      Offset += FullSize;
+    }
+    if (DataToRemove.empty())
+      continue;
+
+    // Prepare the new segment data
+    std::vector<uint8_t> NewData;
+    NewData.reserve(OldData.size());
+    Offset = 0;
+    for (auto &RemRange : DataToRemove) {
+      if (Offset < RemRange.OldFrom) {
+        auto Slice = OldData.slice(Offset, RemRange.OldFrom - Offset);
+        NewData.insert(NewData.end(), Slice.begin(), Slice.end());
+      }
+      RemRange.NewOffset = NewData.size();
+      Offset = RemRange.OldTo;
+    }
+    if (Offset < OldData.size()) {
+      auto Slice = OldData.slice(Offset);
+      NewData.insert(NewData.end(), Slice.begin(), Slice.end());
+    }
+
+    auto CalculateNewOffset = [&](uint64_t SecOffset) {
+      uint64_t Offset = SecOffset - Seg.Offset;
+      auto It =
+          llvm::upper_bound(DataToRemove, Offset,
+                            [](const uint64_t &Off, const DeletedRange &Range) {
+                              return Off < Range.OldFrom;
+                            });
+      if (It != DataToRemove.begin()) {
+        --It;
+        Offset = (Offset > It->OldTo) ? (Offset - It->OldTo + It->NewOffset)
+                                      : It->NewOffset;
+      }
+      return Offset + Seg.Offset;
+    };
+
+    // Remap the segment's sections
+    DenseMap<const SectionBase *, std::pair<uint64_t, uint64_t>> Mapping;
+    for (const SectionBase *Sec : Seg.Sections) {
+      uint64_t NewOffset = CalculateNewOffset(Sec->Offset);
+      uint64_t NewSize =
+          CalculateNewOffset(Sec->Offset + Sec->Size) - NewOffset;
+      Mapping.try_emplace(Sec, NewOffset, NewSize);
+    }
+
+    Obj.updateSegmentData(Seg, std::move(NewData), Mapping);
+  }
+  return Error::success();
+}
+
+static Error removeNote(Object &Obj, endianness Endianness,
+                        ArrayRef<RemoveNoteInfo> NotesToRemove) {
+  // Note: notes for both 32-bit and 64-bit ELF files use 4-byte words in the
+  // header, so the parsers are the same.
+  if (Endianness == endianness::little)
+    return removeNoteImpl<ELF64LE>(Obj, NotesToRemove);
+  else
+    return removeNoteImpl<ELF64BE>(Obj, NotesToRemove);
+}
+
 static Error
 handleUserSection(const NewSectionInfo &NewSection,
                   function_ref<Error(StringRef, ArrayRef<uint8_t>)> F) {
@@ -799,6 +890,11 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig,
                      ? endianness::little
                      : endianness::big;
 
+  if (!ELFConfig.NotesToRemove.empty()) {
+    if (Error Err = removeNote(Obj, E, ELFConfig.NotesToRemove))
+      return Err;
+  }
+
   for (const NewSectionInfo &AddedSection : Config.AddSection) {
     auto AddSection = [&](StringRef Name, ArrayRef<uint8_t> Data) -> Error {
       OwnedDataSection &NewSection =
diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.cpp b/llvm/lib/ObjCopy/ELF/ELFObject.cpp
index 01c2f24629077a..9f460f685706c8 100644
--- a/llvm/lib/ObjCopy/ELF/ELFObject.cpp
+++ b/llvm/lib/ObjCopy/ELF/ELFObject.cpp
@@ -2308,6 +2308,27 @@ Error Object::addNewSymbolTable() {
   return Error::success();
 }
 
+void Object::updateSegmentData(
+    Segment &S, std::vector<uint8_t> NewSegmentData,
+    const DenseMap<const SectionBase *, std::pair<uint64_t, uint64_t>>
+        &SectionMapping) {
+  auto It =
+      UpdatedSegments.insert_or_assign(&S, std::move(NewSegmentData)).first;
+  S.Contents = It->second;
+  S.FileSize = S.Contents.size();
+  if (S.MemSize)
+    S.MemSize = S.FileSize;
+  assert(SectionMapping.size() == S.Sections.size());
+  for (const auto &SM : SectionMapping) {
+    assert(SM.first->ParentSegment == &S && S.Sections.count(SM.first));
+    assert(SM.second.first >= S.Offset);
+    assert((SM.second.first + SM.second.second) <= (S.Offset + S.FileSize));
+    SectionBase *MutSec = const_cast<SectionBase *>(SM.first);
+    MutSec->Offset = SM.second.first;
+    MutSec->Size = SM.second.second;
+  }
+}
+
 // Orders segments such that if x = y->ParentSegment then y comes before x.
 static void orderSegments(std::vector<Segment *> &Segments) {
   llvm::stable_sort(Segments, compareSegmentsByOffset);
diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.h b/llvm/lib/ObjCopy/ELF/ELFObject.h
index 6ccf85387131e4..5e16d4c0c1885a 100644
--- a/llvm/lib/ObjCopy/ELF/ELFObject.h
+++ b/llvm/lib/ObjCopy/ELF/ELFObject.h
@@ -1159,6 +1159,7 @@ class Object {
   std::vector<SegPtr> Segments;
   std::vector<SecPtr> RemovedSections;
   DenseMap<SectionBase *, std::vector<uint8_t>> UpdatedSections;
+  DenseMap<Segment *, std::vector<uint8_t>> UpdatedSegments;
 
   static bool sectionIsAlloc(const SectionBase &Sec) {
     return Sec.Flags & ELF::SHF_ALLOC;
@@ -1234,6 +1235,10 @@ class Object {
     Segments.emplace_back(std::make_unique<Segment>(Data));
     return *Segments.back();
   }
+  void updateSegmentData(
+      Segment &S, std::vector<uint8_t> NewSegmentData,
+      const DenseMap<const SectionBase *, std::pair<uint64_t, uint64_t>>
+          &SectionMapping);
   bool isRelocatable() const {
     return (Type != ELF::ET_DYN && Type != ELF::ET_EXEC) || MustBeRelocatable;
   }
diff --git a/llvm/test/tools/llvm-objcopy/ELF/remove-note.test b/llvm/test/tools/llvm-objcopy/ELF/remove-note.test
new file mode 100644
index 00000000000000..b24538e26d019e
--- /dev/null
+++ b/llvm/test/tools/llvm-objcopy/ELF/remove-note.test
@@ -0,0 +1,98 @@
+# RUN: not llvm-objcopy --remove-note= - 2>&1 | FileCheck %s --check-prefix=ERR-NOTYPEID
+# RUN: not llvm-objcopy --remove-note=/1 - 2>&1 | FileCheck %s --check-prefix=ERR-EMPTYNAME
+# RUN: not llvm-objcopy --remove-note=CORE/1/2 - 2>&1 | FileCheck %s --check-prefix=ERR-INVNUM1
+# RUN: not llvm-objcopy --remove-note=Notanumber - 2>&1 | FileCheck %s --check-prefix=ERR-INVNUM2
+# RUN: not llvm-objcopy --remove-note=CORE/Notanumber - 2>&1 | FileCheck %s --check-prefix=ERR-INVNUM2
+
+# ERR-NOTYPEID: error: bad format for --remove-note, missing type_id
+# ERR-EMPTYNAME: error: bad format for --remove-note, note name is empty
+# ERR-INVNUM1: error: bad note type_id for --remove-note: '1/2'
+# ERR-INVNUM2: error: bad note type_id for --remove-note: 'Notanumber'
+
+# RUN: yaml2obj -D ALIGN=8 %s -o - \
+# RUN:   | llvm-objcopy --remove-note=0x01 --remove-note=DUMMY/0x02 --remove-note=CORE/0x03 - - \
+# RUN:   | llvm-readobj --segments --sections --notes - \
+# RUN:   | FileCheck %s -D#SIZE=64
+
+# RUN: yaml2obj -D ALIGN=4 %s -o - \
+# RUN:   | llvm-objcopy --remove-note=0x01 --remove-note=DUMMY/0x02 --remove-note=CORE/0x03 - - \
+# RUN:   | llvm-readobj --segments --sections --notes - \
+# RUN:   | FileCheck %s -D#SIZE=48
+
+# CHECK:      Sections [
+# CHECK:        Section {
+# CHECK:          Name: .note
+# CHECK-NEXT:     Type: SHT_NOTE
+# CHECK-NEXT:     Flags [
+# CHECK-NEXT:     ]
+# CHECK-NEXT:     Address:
+# CHECK-NEXT:     Offset: [[OFFSET:0x.+]]
+# CHECK-NEXT:     Size: [[#%d,SIZE]]
+
+# CHECK:      ProgramHeaders [
+# CHECK-NEXT:   ProgramHeader {
+# CHECK-NEXT:     Type: PT_NOTE
+# CHECK-NEXT:     Offset: [[OFFSET]]
+# CHECK-NEXT:     VirtualAddress: 0x0
+# CHECK-NEXT:     PhysicalAddress: 0x0
+# CHECK-NEXT:     FileSize: [[#%d,SIZE]]
+# CHECK-NEXT:     MemSize: 0
+
+# CHECK:      NoteSections [
+# CHECK-NEXT:   NoteSection {
+# CHECK-NEXT:     Name:
+# CHECK-NEXT:     Offset: [[OFFSET]]
+# CHECK-NEXT:     Size: 0x[[#%x,SIZE]]
+# CHECK-NEXT:     Notes [
+# CHECK-NEXT:       {
+# CHECK-NEXT:         Owner: CORE
+# CHECK-NEXT:         Data size: 0x2
+# CHECK-NEXT:         Type: NT_FPREGSET
+# CHECK-NEXT:         Description data (
+# CHECK-NEXT:           0000: 0202
+# CHECK-NEXT:         )
+# CHECK-NEXT:       }
+# CHECK-NEXT:       {
+# CHECK-NEXT:         Owner: CORE
+# CHECK-NEXT:         Data size: 0x2
+# CHECK-NEXT:         Type: NT_TASKSTRUCT
+# CHECK-NEXT:         Description data (
+# CHECK-NEXT:           0000: 0404
+# CHECK-NEXT:         )
+# CHECK-NEXT:       }
+# CHECK-NEXT:     ]
+# CHECK-NEXT:   }
+# CHECK-NEXT: ]
+
+--- !ELF
+FileHeader:
+  Class:          ELFCLASS64
+  Data:           ELFDATA2LSB
+  Type:           ET_CORE
+  Machine:        EM_X86_64
+ProgramHeaders:
+  - Type:         PT_NOTE
+    MemSize:      0
+    FirstSec:     .note
+    LastSec:      .note
+Sections:
+  - Name:         .note
+    Type:         SHT_NOTE
+    AddressAlign: [[ALIGN]]
+    Notes:
+      - Name:   CORE
+        Type:   0x01
+        Desc:   0101
+      - Name:   CORE
+        Type:   0x02
+        Desc:   0202
+      - Name:   CORE
+        Type:   0x03
+        Desc:   0303
+      - Name:   CORE
+        Type:   0x04
+        Desc:   0404
+      - Name:   LINUX
+        Type:   0x01
+        Desc:   0505
+...
diff --git a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp
index 104d802b1e1eeb..5e348d65adca18 100644
--- a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp
+++ b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp
@@ -527,6 +527,37 @@ static Expected<NewSymbolInfo> parseNewSymbolInfo(StringRef FlagValue) {
   return SI;
 }
 
+static Expected<RemoveNoteInfo> parseRemoveNoteInfo(StringRef FlagValue) {
+  // Parse value given with --remove-note option. The format is:
+  //
+  // [name/]type_id
+  //
+  // where:
+  // <name>    - optional note name. If not given, all notes with the specified
+  //             <type_id> are removed.
+  // <type_id> - note type value, can be decimal or hexadecimal number prefixed
+  //             with 0x.
+  RemoveNoteInfo NI;
+  if (FlagValue.empty())
+    return createStringError(errc::invalid_argument,
+                             "bad format for --remove-note, missing type_id");
+  SmallVector<StringRef, 2> Tokens;
+  FlagValue.split(Tokens, '/', /*MaxSplit=*/1);
+  assert(!Tokens.empty() && Tokens.size() <= 2);
+  if (Tokens.size() == 2) {
+    if (Tokens[0].empty())
+      return createStringError(
+          errc::invalid_argument,
+          "bad format for --remove-note, note name is empty");
+    NI.Name = Tokens[0];
+  }
+  if (Tokens.back().getAsInteger(0, NI.TypeId))
+    return createStringError(errc::invalid_argument,
+                             "bad note type_id for --remove-note: '%s'",
+                             Tokens.back().str().c_str());
+  return NI;
+}
+
 // Parse input option \p ArgValue and load section data. This function
 // extracts section name and name of the file keeping section data from
 // ArgValue, loads data from the file, and stores section name and data
@@ -1210,6 +1241,14 @@ objcopy::parseObjcopyOptions(ArrayRef<const char *> ArgsArr,
       };
     }
 
+  for (auto *Arg : InputArgs.filtered(OBJCOPY_remove_note)) {
+    Expected<RemoveNoteInfo> NoteInfo = parseRemoveNoteInfo(Arg->getValue());
+    if (!NoteInfo)
+      return NoteInfo.takeError();
+
+    ELFConfig.NotesToRemove.push_back(*NoteInfo);
+  }
+
   if (Config.DecompressDebugSections &&
       Config.CompressionType != DebugCompressionType::None) {
     return createStringError(
diff --git a/llvm/tools/llvm-objcopy/ObjcopyOpts.td b/llvm/tools/llvm-objcopy/ObjcopyOpts.td
index 434b5ff92324eb..fbc6a59d9461e7 100644
--- a/llvm/tools/llvm-objcopy/ObjcopyOpts.td
+++ b/llvm/tools/llvm-objcopy/ObjcopyOpts.td
@@ -297,3 +297,7 @@ defm pad_to
                    "of zero or the value specified by the --gap-fill option. "
                    "This option is only supported for ELF input and binary output">,
       MetaVarName<"address">;
+
+defm remove_note
+    : Eq<"remove-note", "Remove note(s) with <type_id> and optional <name>">,
+      MetaVarName<"[name/]type_id">;



More information about the llvm-commits mailing list