[llvm] main (PR #100801)

via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 26 12:05:09 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-bolt

Author: Vladislav Khmelevsky (yota9)

<details>
<summary>Changes</summary>

- **[BOLT][NFC] Fix build**
- **[BOLT] Abort on out-of-section symbols in GOT**


---
Full diff: https://github.com/llvm/llvm-project/pull/100801.diff


5 Files Affected:

- (modified) bolt/include/bolt/Core/BinaryContext.h (+2-1) 
- (modified) bolt/lib/Core/BinaryContext.cpp (+16-6) 
- (modified) bolt/lib/Rewrite/RewriteInstance.cpp (+33-8) 
- (added) bolt/test/AArch64/Inputs/got_end_of_section_symbol.lld_script (+6) 
- (added) bolt/test/AArch64/got_end_of_section_symbol.s (+28) 


``````````diff
diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index 73932c4ca2fb3..42ab171706827 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -890,7 +890,8 @@ class BinaryContext {
   /// of \p Flags.
   MCSymbol *registerNameAtAddress(StringRef Name, uint64_t Address,
                                   uint64_t Size, uint16_t Alignment,
-                                  unsigned Flags = 0);
+                                  unsigned Flags = 0,
+                                  BinarySection *Section = NULL);
 
   /// Return BinaryData registered at a given \p Address or nullptr if no
   /// global symbol was registered at the location.
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index 0a1f1bb9e0d20..648c74a4ed048 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -1054,18 +1054,28 @@ void BinaryContext::adjustCodePadding() {
 MCSymbol *BinaryContext::registerNameAtAddress(StringRef Name, uint64_t Address,
                                                uint64_t Size,
                                                uint16_t Alignment,
-                                               unsigned Flags) {
+                                               unsigned Flags,
+                                               BinarySection *Section) {
   // Register the name with MCContext.
   MCSymbol *Symbol = Ctx->getOrCreateSymbol(Name);
+  BinaryData *BD;
+
+  // Register out of section symbols only in GlobalSymbols map
+  if (Section && Section->getEndAddress() == Address) {
+    BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
+                        *Section, Flags);
+    GlobalSymbols[Name] = BD;
+    return Symbol;
+  }
 
   auto GAI = BinaryDataMap.find(Address);
-  BinaryData *BD;
   if (GAI == BinaryDataMap.end()) {
     ErrorOr<BinarySection &> SectionOrErr = getSectionForAddress(Address);
-    BinarySection &Section =
-        SectionOrErr ? SectionOrErr.get() : absoluteSection();
+    BinarySection &SectionRef = Section        ? *Section
+                                : SectionOrErr ? SectionOrErr.get()
+                                               : absoluteSection();
     BD = new BinaryData(*Symbol, Address, Size, Alignment ? Alignment : 1,
-                        Section, Flags);
+                        SectionRef, Flags);
     GAI = BinaryDataMap.emplace(Address, BD).first;
     GlobalSymbols[Name] = BD;
     updateObjectNesting(GAI);
@@ -1399,7 +1409,7 @@ void BinaryContext::postProcessSymbolTable() {
     if ((BD->getName().starts_with("SYMBOLat") ||
          BD->getName().starts_with("DATAat")) &&
         !BD->getParent() && !BD->getSize() && !BD->isAbsolute() &&
-        BD->getSection()) {
+        BD->getSection().getSize()) {
       this->errs() << "BOLT-WARNING: zero-sized top level symbol: " << *BD
                    << "\n";
       Valid = false;
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 32562ccb6b345..e41abe489c875 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -954,13 +954,13 @@ void RewriteInstance::discoverFileObjects() {
     uint64_t SymbolSize = ELFSymbolRef(Symbol).getSize();
     uint64_t SymbolAlignment = Symbol.getAlignment();
 
-    auto registerName = [&](uint64_t FinalSize) {
+    auto registerName = [&](uint64_t FinalSize, BinarySection *Section = NULL) {
       // Register names even if it's not a function, e.g. for an entry point.
       BC->registerNameAtAddress(UniqueName, SymbolAddress, FinalSize,
-                                SymbolAlignment, SymbolFlags);
+                                SymbolAlignment, SymbolFlags, Section);
       if (!AlternativeName.empty())
         BC->registerNameAtAddress(AlternativeName, SymbolAddress, FinalSize,
-                                  SymbolAlignment, SymbolFlags);
+                                  SymbolAlignment, SymbolFlags, Section);
     };
 
     section_iterator Section =
@@ -985,12 +985,25 @@ void RewriteInstance::discoverFileObjects() {
                       << " for function\n");
 
     if (SymbolAddress == Section->getAddress() + Section->getSize()) {
+      ErrorOr<BinarySection &> SectionOrError =
+          BC->getSectionForAddress(Section->getAddress());
+
+      // Skip symbols from invalid sections
+      if (!SectionOrError) {
+        BC->errs() << "BOLT-WARNING: " << UniqueName << " (0x"
+                   << Twine::utohexstr(SymbolAddress)
+                   << ") does not have any section\n";
+        continue;
+      }
+
       assert(SymbolSize == 0 &&
              "unexpect non-zero sized symbol at end of section");
-      LLVM_DEBUG(
-          dbgs()
-          << "BOLT-DEBUG: rejecting as symbol points to end of its section\n");
-      registerName(SymbolSize);
+      LLVM_DEBUG({
+        dbgs() << "BOLT-DEBUG: rejecting as symbol " << UniqueName
+               << " points to end of " << SectionOrError->getName()
+               << " section\n";
+      });
+      registerName(SymbolSize, &SectionOrError.get());
       continue;
     }
 
@@ -1432,7 +1445,9 @@ void RewriteInstance::registerFragments() {
   // of the last local symbol.
   ELFSymbolRef LocalSymEnd = ELF64LEFile->toSymbolRef(SymTab, SymTab->sh_info);
 
-  for (auto &[ParentName, BF] : AmbiguousFragments) {
+  for (auto &Fragment : AmbiguousFragments) {
+    const StringRef &ParentName = Fragment.first;
+    BinaryFunction *BF = Fragment.second;
     const uint64_t Address = BF->getAddress();
 
     // Get fragment's own symbol
@@ -2557,6 +2572,16 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
     return;
   }
 
+  if (Relocation::isGOT(RType) && !Relocation::isTLS(RType)) {
+    BinaryData *BD = BC->getBinaryDataByName(SymbolName);
+    if (BD && BD->getAddress() == BD->getSection().getEndAddress()) {
+      BC->errs() << "BOLT-ERROR: GOT table contains currently unsupported "
+                    "section end symbol "
+                 << BD->getName() << "\n";
+      exit(1);
+    }
+  }
+
   const uint64_t Address = SymbolAddress + Addend;
 
   LLVM_DEBUG({
diff --git a/bolt/test/AArch64/Inputs/got_end_of_section_symbol.lld_script b/bolt/test/AArch64/Inputs/got_end_of_section_symbol.lld_script
new file mode 100644
index 0000000000000..2ad4169bbcc60
--- /dev/null
+++ b/bolt/test/AArch64/Inputs/got_end_of_section_symbol.lld_script
@@ -0,0 +1,6 @@
+SECTIONS {
+  PROVIDE (__executable_start = SEGMENT_START("text-segment", 0x400000)); . = SEGMENT_START("text-segment", 0x400000) + SIZEOF_HEADERS;
+  .data : { *(.data)  *(.array) }
+  .text : { *(.text) }
+  .got  : { *(.got) *(.igot) }
+}
diff --git a/bolt/test/AArch64/got_end_of_section_symbol.s b/bolt/test/AArch64/got_end_of_section_symbol.s
new file mode 100644
index 0000000000000..c203214fe3fbe
--- /dev/null
+++ b/bolt/test/AArch64/got_end_of_section_symbol.s
@@ -0,0 +1,28 @@
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
+# RUN:   %s -o %t.o
+# RUN: %clang %cflags  -nostartfiles -nodefaultlibs -static -Wl,--no-relax \
+# RUN:   -Wl,-q -Wl,-T %S/Inputs/got_end_of_section_symbol.lld_script  \
+# RUN:   %t.o -o %t.exe
+# RUN: not llvm-bolt %t.exe -o %t.bolt 2>&1 | FileCheck %s
+
+# CHECK: BOLT-ERROR: GOT table contains currently unsupported section end
+# CHECK-SAME: symbol array_end
+
+.section .array, "a", @progbits
+.globl array_start
+.globl array_end
+array_start:
+  .word 0
+array_end:
+
+.section .text
+.globl _start
+.type exitOk, %function
+_start:
+  adrp x1, #:got:array_start
+  ldr x1, [x1, #:got_lo12:array_start]
+  adrp x0, #:got:array_end
+  ldr x0, [x0, #:got_lo12:array_end]
+  adrp x2, #:got:_start
+  ldr x2, [x2, #:got_lo12:_start]
+  ret

``````````

</details>


https://github.com/llvm/llvm-project/pull/100801


More information about the llvm-commits mailing list