[llvm] [ELF][Objcopy] Dont corrupt symbol table when `--update-section` is called for ELF files (PR #170462)

Dmitry Nechitaev via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 19 04:43:35 PST 2025


https://github.com/Nechda updated https://github.com/llvm/llvm-project/pull/170462

>From 9b7bd87d5defc88f1cfb3f8f7db8273ecbb101ca Mon Sep 17 00:00:00 2001
From: Dmitry Nechitaev <nechda6 at gmail.com>
Date: Wed, 3 Dec 2025 13:59:07 +0300
Subject: [PATCH 1/6] Update-section dont corrupt symbols

---
 llvm/lib/ObjCopy/ELF/ELFObject.cpp     |  9 ++++++++-
 llvm/unittests/ObjCopy/ObjCopyTest.cpp | 26 ++++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.cpp b/llvm/lib/ObjCopy/ELF/ELFObject.cpp
index 641966a867102..4aa509b161173 100644
--- a/llvm/lib/ObjCopy/ELF/ELFObject.cpp
+++ b/llvm/lib/ObjCopy/ELF/ELFObject.cpp
@@ -2171,7 +2171,14 @@ Error Object::updateSectionData(SecPtr &Sec, ArrayRef<uint8_t> Data) {
                              Data.size(), Sec->Name.c_str(), Sec->Size);
 
   if (!Sec->ParentSegment) {
-    Sec = std::make_unique<OwnedDataSection>(*Sec, Data);
+    // Be careful: the "Sec" refers to an item in a std::vector,
+    // so any changes to the std::vector could invalidate this reference.
+    // To avoid possible memory-related issues, save raw pointers.
+    auto Replaced = Sec.get();
+    auto Modified = &addSection<OwnedDataSection>(*Sec, Data);
+    DenseMap<SectionBase *, SectionBase *> replacements{{Replaced, Modified}};
+    if (auto err = replaceSections(replacements))
+      return err;
   } else {
     // The segment writer will be in charge of updating these contents.
     Sec->Size = Data.size();
diff --git a/llvm/unittests/ObjCopy/ObjCopyTest.cpp b/llvm/unittests/ObjCopy/ObjCopyTest.cpp
index 4382c73e889e9..629c67ec310e1 100644
--- a/llvm/unittests/ObjCopy/ObjCopyTest.cpp
+++ b/llvm/unittests/ObjCopy/ObjCopyTest.cpp
@@ -98,6 +98,24 @@ const char *SimpleFileWasmYAML = R"(
 ...
 )";
 
+const char *SimpleFileELFWithSymbolYAML = R"(
+--- !ELF
+FileHeader:
+  Class: ELFCLASS64
+  Data:  ELFDATA2LSB
+  Type:  ET_REL
+Sections:
+  - Name: .foo
+    Type: SHT_PROGBITS
+    Size: 0x10
+Symbols:
+  - Name: foo
+    Type: STT_FUNC
+    Value: 0x1
+    Section: .foo
+...
+)";
+
 // Create ObjectFile from \p YamlCreationString and do validation using \p
 // IsValidFormat checker. \p Storage is a storage for data. \returns created
 // ObjectFile.
@@ -321,6 +339,14 @@ TEST(UpdateSection, ELF) {
       ".text", "1234", UpdateSection);
 }
 
+TEST(UpdateSectionKeepSymbols, ELF) {
+  SCOPED_TRACE("updateSectionToFileELFKeepSymbols");
+
+  addOrUpdateSectionToFileImpl(
+      SimpleFileELFWithSymbolYAML, [](const Binary &File) { return File.isELF(); },
+      ".foo", "1234", UpdateSection);
+}
+
 TEST(UpdateSection, MachO) {
   SCOPED_TRACE("updateSectionToFileMachO");
 

>From 29b57f3c90dc16e77e8024275a2e9fc7be0e8b0e Mon Sep 17 00:00:00 2001
From: Dmitry Nechitaev <nechda6 at gmail.com>
Date: Wed, 3 Dec 2025 14:04:41 +0300
Subject: [PATCH 2/6] Add release notes

---
 llvm/docs/ReleaseNotes.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index 503cf641a221f..cdf6d0993bedf 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -249,6 +249,8 @@ Changes to the LLVM tools
 * `llvm-objdump` now supports using `--mcpu=help` and `--mattr=help` with the `--triple` option
   without requiring an input file or the `-d` (disassemble) flag.
 
+* `llvm-objcopy` no longer corrupts the symbol table when `--update-section` is called for ELF files.
+
 Changes to LLDB
 ---------------------------------
 

>From 0297669d6b083f397130e1ee0c426daf084d971e Mon Sep 17 00:00:00 2001
From: Dmitry Nechitaev <nechda6 at gmail.com>
Date: Wed, 3 Dec 2025 14:11:12 +0300
Subject: [PATCH 3/6] Fix style

---
 llvm/unittests/ObjCopy/ObjCopyTest.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/unittests/ObjCopy/ObjCopyTest.cpp b/llvm/unittests/ObjCopy/ObjCopyTest.cpp
index 629c67ec310e1..4194189c581bb 100644
--- a/llvm/unittests/ObjCopy/ObjCopyTest.cpp
+++ b/llvm/unittests/ObjCopy/ObjCopyTest.cpp
@@ -343,8 +343,9 @@ TEST(UpdateSectionKeepSymbols, ELF) {
   SCOPED_TRACE("updateSectionToFileELFKeepSymbols");
 
   addOrUpdateSectionToFileImpl(
-      SimpleFileELFWithSymbolYAML, [](const Binary &File) { return File.isELF(); },
-      ".foo", "1234", UpdateSection);
+      SimpleFileELFWithSymbolYAML,
+      [](const Binary &File) { return File.isELF(); }, ".foo", "1234",
+      UpdateSection);
 }
 
 TEST(UpdateSection, MachO) {

>From cb0cb865417e56e98ed0ee1efa3f8867ef6aa454 Mon Sep 17 00:00:00 2001
From: Dmitry Nechitaev <nechda6 at gmail.com>
Date: Fri, 19 Dec 2025 13:09:19 +0300
Subject: [PATCH 4/6] Fix after review

---
 llvm/lib/ObjCopy/ELF/ELFObject.cpp | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.cpp b/llvm/lib/ObjCopy/ELF/ELFObject.cpp
index 4aa509b161173..0c47bdcdbc35d 100644
--- a/llvm/lib/ObjCopy/ELF/ELFObject.cpp
+++ b/llvm/lib/ObjCopy/ELF/ELFObject.cpp
@@ -2171,13 +2171,16 @@ Error Object::updateSectionData(SecPtr &Sec, ArrayRef<uint8_t> Data) {
                              Data.size(), Sec->Name.c_str(), Sec->Size);
 
   if (!Sec->ParentSegment) {
-    // Be careful: the "Sec" refers to an item in a std::vector,
-    // so any changes to the std::vector could invalidate this reference.
-    // To avoid possible memory-related issues, save raw pointers.
-    auto Replaced = Sec.get();
-    auto Modified = &addSection<OwnedDataSection>(*Sec, Data);
-    DenseMap<SectionBase *, SectionBase *> replacements{{Replaced, Modified}};
-    if (auto err = replaceSections(replacements))
+    // Be careful: the "Sec" refers to an element in a std::vector.
+    // Calling the function addSection adds a new element to this std::vector,
+    // which may cause a reallocation and lead to dangling references.
+    // To avoid possible memory-related issues, you should save a raw pointer.
+    SectionBase *Replaced = Sec.get();
+    SectionBase *Modified = &addSection<OwnedDataSection>(*Sec, Data);
+    // We also don't need to additionally remove the "Replaced" section,
+    // as this removal will be handled during the replaceSections call.
+    DenseMap<SectionBase *, SectionBase *> Replacements{{Replaced, Modified}};
+    if (auto err = replaceSections(Replacements))
       return err;
   } else {
     // The segment writer will be in charge of updating these contents.

>From 311edbaa1c2fcfc46c23106531c8a5421de3c783 Mon Sep 17 00:00:00 2001
From: Dmitry Nechitaev <nechda6 at gmail.com>
Date: Fri, 19 Dec 2025 15:42:30 +0300
Subject: [PATCH 5/6] Remove gtest

---
 llvm/unittests/ObjCopy/ObjCopyTest.cpp | 27 --------------------------
 1 file changed, 27 deletions(-)

diff --git a/llvm/unittests/ObjCopy/ObjCopyTest.cpp b/llvm/unittests/ObjCopy/ObjCopyTest.cpp
index 4194189c581bb..4382c73e889e9 100644
--- a/llvm/unittests/ObjCopy/ObjCopyTest.cpp
+++ b/llvm/unittests/ObjCopy/ObjCopyTest.cpp
@@ -98,24 +98,6 @@ const char *SimpleFileWasmYAML = R"(
 ...
 )";
 
-const char *SimpleFileELFWithSymbolYAML = R"(
---- !ELF
-FileHeader:
-  Class: ELFCLASS64
-  Data:  ELFDATA2LSB
-  Type:  ET_REL
-Sections:
-  - Name: .foo
-    Type: SHT_PROGBITS
-    Size: 0x10
-Symbols:
-  - Name: foo
-    Type: STT_FUNC
-    Value: 0x1
-    Section: .foo
-...
-)";
-
 // Create ObjectFile from \p YamlCreationString and do validation using \p
 // IsValidFormat checker. \p Storage is a storage for data. \returns created
 // ObjectFile.
@@ -339,15 +321,6 @@ TEST(UpdateSection, ELF) {
       ".text", "1234", UpdateSection);
 }
 
-TEST(UpdateSectionKeepSymbols, ELF) {
-  SCOPED_TRACE("updateSectionToFileELFKeepSymbols");
-
-  addOrUpdateSectionToFileImpl(
-      SimpleFileELFWithSymbolYAML,
-      [](const Binary &File) { return File.isELF(); }, ".foo", "1234",
-      UpdateSection);
-}
-
 TEST(UpdateSection, MachO) {
   SCOPED_TRACE("updateSectionToFileMachO");
 

>From 92aa84b13ef698f3cd1a53e2e743c8a70b86a356 Mon Sep 17 00:00:00 2001
From: Dmitry Nechitaev <nechda6 at gmail.com>
Date: Fri, 19 Dec 2025 15:42:53 +0300
Subject: [PATCH 6/6] Add lit-test

---
 .../update-section-dont-corrupt-symtab.test   | 120 ++++++++++++++++++
 1 file changed, 120 insertions(+)
 create mode 100644 llvm/test/tools/llvm-objcopy/ELF/update-section-dont-corrupt-symtab.test

diff --git a/llvm/test/tools/llvm-objcopy/ELF/update-section-dont-corrupt-symtab.test b/llvm/test/tools/llvm-objcopy/ELF/update-section-dont-corrupt-symtab.test
new file mode 100644
index 0000000000000..3e84a1550b741
--- /dev/null
+++ b/llvm/test/tools/llvm-objcopy/ELF/update-section-dont-corrupt-symtab.test
@@ -0,0 +1,120 @@
+# REQUIRES: x86-registered-target
+
+# RUN: echo -n 12345678 > %t.data
+
+# RUN echo "" > %t.log
+
+# This test is attempting to trigger symbol table corruption
+# when calling the --update-section flag. We will sequentially
+# generate object files with section_0, section_1, ..., and section_N.
+#
+# Next, we will use llvm-objcopy to update the section data with
+# the --update-section flag, specifying the new data for each section.
+#
+# Finally, we will run llvm-readelf on the resulting object file
+# to check for any errors or warnings. If the symbol table has been corrupted,
+# llvm-readelf should emit a warning message indicating that
+# the extended symbol index is past the end of the SHT_SYMTAB_SHNDX section,
+# which has a size of 0.
+#
+# Our goal is to ensure that llvm-readelf does not emit this error or warning,
+# thus validating that our use of the --update-section flag has not caused any
+# issues with the symbol table.
+# There are a lot of running objcopy commands, because is you dont run this test
+# under sanitizers, then it may not emit any error in smaller test-cases.
+
+# RUN: %python %s 01 %t.yaml %t.objcopy_cmds %t.data; yaml2obj %t.yaml -o %t.o
+# RUN: llvm-objcopy %t.o @%t.objcopy_cmds
+# RUN: llvm-readobj -S -s %t.o 2>&1 >> %t.log
+# RUN: %python %s 02 %t.yaml %t.objcopy_cmds %t.data; yaml2obj %t.yaml -o %t.o
+# RUN: llvm-objcopy %t.o @%t.objcopy_cmds
+# RUN: llvm-readobj -S -s %t.o 2>&1 >> %t.log
+# RUN: %python %s 03 %t.yaml %t.objcopy_cmds %t.data; yaml2obj %t.yaml -o %t.o
+# RUN: llvm-objcopy %t.o @%t.objcopy_cmds
+# RUN: llvm-readobj -S -s %t.o 2>&1 >> %t.log
+# RUN: %python %s 04 %t.yaml %t.objcopy_cmds %t.data; yaml2obj %t.yaml -o %t.o
+# RUN: llvm-objcopy %t.o @%t.objcopy_cmds
+# RUN: llvm-readobj -S -s %t.o 2>&1 >> %t.log
+# RUN: %python %s 05 %t.yaml %t.objcopy_cmds %t.data; yaml2obj %t.yaml -o %t.o
+# RUN: llvm-objcopy %t.o @%t.objcopy_cmds
+# RUN: llvm-readobj -S -s %t.o 2>&1 >> %t.log
+# RUN: %python %s 06 %t.yaml %t.objcopy_cmds %t.data; yaml2obj %t.yaml -o %t.o
+# RUN: llvm-objcopy %t.o @%t.objcopy_cmds
+# RUN: llvm-readobj -S -s %t.o 2>&1 >> %t.log
+# RUN: %python %s 07 %t.yaml %t.objcopy_cmds %t.data; yaml2obj %t.yaml -o %t.o
+# RUN: llvm-objcopy %t.o @%t.objcopy_cmds
+# RUN: llvm-readobj -S -s %t.o 2>&1 >> %t.log
+# RUN: %python %s 08 %t.yaml %t.objcopy_cmds %t.data; yaml2obj %t.yaml -o %t.o
+# RUN: llvm-objcopy %t.o @%t.objcopy_cmds
+# RUN: llvm-readobj -S -s %t.o 2>&1 >> %t.log
+# RUN: %python %s 09 %t.yaml %t.objcopy_cmds %t.data; yaml2obj %t.yaml -o %t.o
+# RUN: llvm-objcopy %t.o @%t.objcopy_cmds
+# RUN: llvm-readobj -S -s %t.o 2>&1 >> %t.log
+# RUN: %python %s 10 %t.yaml %t.objcopy_cmds %t.data; yaml2obj %t.yaml -o %t.o
+# RUN: llvm-objcopy %t.o @%t.objcopy_cmds
+# RUN: llvm-readobj -S -s %t.o 2>&1 >> %t.log
+# RUN: %python %s 11 %t.yaml %t.objcopy_cmds %t.data; yaml2obj %t.yaml -o %t.o
+# RUN: llvm-objcopy %t.o @%t.objcopy_cmds
+# RUN: llvm-readobj -S -s %t.o 2>&1 >> %t.log
+# RUN: %python %s 12 %t.yaml %t.objcopy_cmds %t.data; yaml2obj %t.yaml -o %t.o
+# RUN: llvm-objcopy %t.o @%t.objcopy_cmds
+# RUN: llvm-readobj -S -s %t.o 2>&1 >> %t.log
+# RUN: %python %s 13 %t.yaml %t.objcopy_cmds %t.data; yaml2obj %t.yaml -o %t.o
+# RUN: llvm-objcopy %t.o @%t.objcopy_cmds
+# RUN: llvm-readobj -S -s %t.o 2>&1 >> %t.log
+# RUN: %python %s 14 %t.yaml %t.objcopy_cmds %t.data; yaml2obj %t.yaml -o %t.o
+# RUN: llvm-objcopy %t.o @%t.objcopy_cmds
+# RUN: llvm-readobj -S -s %t.o 2>&1 >> %t.log
+# RUN: %python %s 15 %t.yaml %t.objcopy_cmds %t.data; yaml2obj %t.yaml -o %t.o
+# RUN: llvm-objcopy %t.o @%t.objcopy_cmds
+# RUN: llvm-readobj -S -s %t.o 2>&1 >> %t.log
+# RUN: %python %s 16 %t.yaml %t.objcopy_cmds %t.data; yaml2obj %t.yaml -o %t.o
+# RUN: llvm-objcopy %t.o @%t.objcopy_cmds
+# RUN: llvm-readobj -S -s %t.o 2>&1 >> %t.log
+
+# RUN: not grep -E 'warning.*extended symbol index' %t.log
+
+
+import sys
+
+templ="""
+--- !ELF
+FileHeader:
+  Class: ELFCLASS64
+  Data:  ELFDATA2LSB
+  Type:  ET_REL
+Sections:{}
+Symbols:{}
+""".strip()
+
+templ_sec="""
+  - Name: .section_{0}
+    Type: SHT_PROGBITS
+    Size: 0x100
+""".rstrip()
+
+sym_templ="""
+  - Name: sym{0}
+    Type: STT_FUNC
+    Value: 0x1
+    Section: .section_{0}
+""".rstrip()
+
+total_sections = int(sys.argv[1])
+output_yaml_file = sys.argv[2]
+output_objcopy_commands_file = sys.argv[3]
+file_with_data = sys.argv[4]
+
+sections=""
+symbols=""
+for i in range(total_sections):
+    suffix = str(i)
+    sections += templ_sec.format(suffix)
+    symbols += sym_templ.format(suffix)
+
+with open(output_yaml_file, 'wt') as f:
+    print(templ.format(sections, symbols), file=f)
+
+with open(output_objcopy_commands_file, 'wt') as f:
+    cmds = ' '.join([f'--update-section=.section_{i}={file_with_data}' for i in range(total_sections)])
+    f.write(cmds)



More information about the llvm-commits mailing list