[llvm] [BOLT] Adjust section sizes based on file offsets (PR #80226)

Maksim Panchenko via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 31 23:21:43 PST 2024


https://github.com/maksfb updated https://github.com/llvm/llvm-project/pull/80226

>From 76a36466ebb699cd39737fd39ca734d5c6682974 Mon Sep 17 00:00:00 2001
From: Maksim Panchenko <maks at fb.com>
Date: Wed, 31 Jan 2024 15:54:33 -0800
Subject: [PATCH 1/2] [BOLT] Adjust section sizes based on file offsets

When we adjust section sizes while rewriting a binary, we should be
using section offsets and not addresses to determine if section overlap.
NFC for existing binaries.
---
 bolt/lib/Rewrite/RewriteInstance.cpp | 30 ++++++++++++++--------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index dee1bf125f0a7..3b488dc5e9f54 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -4153,10 +4153,10 @@ RewriteInstance::getOutputSections(ELFObjectFile<ELFT> *File,
 
   // Keep track of section header entries attached to the corresponding section.
   std::vector<std::pair<BinarySection *, ELFShdrTy>> OutputSections;
-  auto addSection = [&](const ELFShdrTy &Section, BinarySection *BinSec) {
+  auto addSection = [&](const ELFShdrTy &Section, BinarySection &BinSec) {
     ELFShdrTy NewSection = Section;
-    NewSection.sh_name = SHStrTab.getOffset(BinSec->getOutputName());
-    OutputSections.emplace_back(BinSec, std::move(NewSection));
+    NewSection.sh_name = SHStrTab.getOffset(BinSec.getOutputName());
+    OutputSections.emplace_back(&BinSec, std::move(NewSection));
   };
 
   // Copy over entries for original allocatable sections using modified name.
@@ -4174,7 +4174,7 @@ RewriteInstance::getOutputSections(ELFObjectFile<ELFT> *File,
     BinarySection *BinSec = BC->getSectionForSectionRef(SecRef);
     assert(BinSec && "Matching BinarySection should exist.");
 
-    addSection(Section, BinSec);
+    addSection(Section, *BinSec);
   }
 
   for (BinarySection &Section : BC->allocatableSections()) {
@@ -4201,7 +4201,7 @@ RewriteInstance::getOutputSections(ELFObjectFile<ELFT> *File,
     NewSection.sh_link = 0;
     NewSection.sh_info = 0;
     NewSection.sh_addralign = Section.getAlignment();
-    addSection(NewSection, &Section);
+    addSection(NewSection, Section);
   }
 
   // Sort all allocatable sections by their offset.
@@ -4215,19 +4215,19 @@ RewriteInstance::getOutputSections(ELFObjectFile<ELFT> *File,
   for (auto &SectionKV : OutputSections) {
     ELFShdrTy &Section = SectionKV.second;
 
-    // TBSS section does not take file or memory space. Ignore it for layout
-    // purposes.
-    if (Section.sh_type == ELF::SHT_NOBITS && (Section.sh_flags & ELF::SHF_TLS))
+    // Ignore TLS sections as they don't take any space in the file.
+    if (Section.sh_type == ELF::SHT_NOBITS)
       continue;
 
+    // Note that address continuity is not guaranteed as sections could be
+    // placed in different loadable segments.
     if (PrevSection &&
-        PrevSection->sh_addr + PrevSection->sh_size > Section.sh_addr) {
-      if (opts::Verbosity > 1)
+        PrevSection->sh_offset + PrevSection->sh_size > Section.sh_offset) {
+      if (opts::Verbosity > 1) {
         outs() << "BOLT-INFO: adjusting size for section "
                << PrevBinSec->getOutputName() << '\n';
-      PrevSection->sh_size = Section.sh_addr > PrevSection->sh_addr
-                                 ? Section.sh_addr - PrevSection->sh_addr
-                                 : 0;
+      }
+      PrevSection->sh_size = Section.sh_offset - PrevSection->sh_offset;
     }
 
     PrevSection = &Section;
@@ -4261,7 +4261,7 @@ RewriteInstance::getOutputSections(ELFObjectFile<ELFT> *File,
     if (NewSection.sh_type == ELF::SHT_SYMTAB)
       NewSection.sh_info = NumLocalSymbols;
 
-    addSection(NewSection, BinSec);
+    addSection(NewSection, *BinSec);
 
     LastFileOffset = BinSec->getOutputFileOffset();
   }
@@ -4286,7 +4286,7 @@ RewriteInstance::getOutputSections(ELFObjectFile<ELFT> *File,
     NewSection.sh_info = 0;
     NewSection.sh_addralign = Section.getAlignment();
 
-    addSection(NewSection, &Section);
+    addSection(NewSection, Section);
   }
 
   // Assign indices to sections.

>From fabf62818dd357bf69a16ef89629959ba54bcb5b Mon Sep 17 00:00:00 2001
From: Maksim Panchenko <maks at fb.com>
Date: Wed, 31 Jan 2024 23:21:11 -0800
Subject: [PATCH 2/2] fixup! [BOLT] Adjust section sizes based on file offsets

---
 bolt/test/X86/phdr-out-of-order.test | 52 ++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)
 create mode 100644 bolt/test/X86/phdr-out-of-order.test

diff --git a/bolt/test/X86/phdr-out-of-order.test b/bolt/test/X86/phdr-out-of-order.test
new file mode 100644
index 0000000000000..1e2fc2a5a4cd8
--- /dev/null
+++ b/bolt/test/X86/phdr-out-of-order.test
@@ -0,0 +1,52 @@
+## Check that llvm-bolt correctly processes a binary with program headers and
+## corresponding sections specified in non-ascending address order.
+
+RUN: split-file %s %t
+RUN: yaml2obj %t/yaml -o %t.exe --max-size=0
+RUN: llvm-bolt %t.exe -o %t.bolt --allow-stripped
+RUN: llvm-readelf -WS %t.bolt | FileCheck %s
+
+CHECK:      .a PROGBITS 0000000000400000 [[#%.6x, OFFSET:]] 000001
+CHECK-NEXT: .b PROGBITS 0000000000000000 [[#%.6x, OFFSET+1]] 000001
+CHECK-NEXT: .c PROGBITS 0000000000600000 [[#%.6x, OFFSET+2]] 000001
+
+#--- yaml
+--- !ELF
+FileHeader:
+  Class: ELFCLASS64
+  Data: ELFDATA2LSB
+  Type: ET_EXEC
+  Machine: EM_X86_64
+ProgramHeaders:
+  - Type: PT_LOAD
+    FirstSec: .a
+    LastSec: .a
+    VAddr: 0x400000
+  - Type: PT_LOAD
+    FirstSec: .b
+    LastSec: .b
+    VAddr: 0x0
+  - Type: PT_LOAD
+    FirstSec: .c
+    LastSec: .c
+    VAddr: 0x600000
+Sections:
+  - Name: .a
+    Type: SHT_PROGBITS
+    Flags: [ SHF_ALLOC ]
+    Content: 00
+    AddressAlign: 0x1
+    Address: 0x400000
+  - Name: .b
+    Type: SHT_PROGBITS
+    Flags: [ SHF_ALLOC ]
+    Content: 00
+    AddressAlign: 0x1
+    Address: 0x0
+  - Name: .c
+    Type: SHT_PROGBITS
+    Flags: [ SHF_ALLOC ]
+    Content: 00
+    AddressAlign: 0x1
+    Address: 0x600000
+...



More information about the llvm-commits mailing list