[llvm] [BOLT] Support input binaries that use R_X86_GOTPC64 (PR #68036)

Rafael Auler via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 2 13:44:47 PDT 2023


https://github.com/rafaelauler created https://github.com/llvm/llvm-project/pull/68036

In large code model, the address of GOT is calculated by the static linker via R_X86_GOTPC64 reloc applied against a MOVABSQ instruction. In the final binary, it can be disassembled as a regular immediate, but because such immediate is the result of PC-relative pointer arithmetic, we need to parse this relocation and update this calculation whenever we move code, otherwise we break the code trying to read GOT.

A test case showing how GOT is accessed was provided.

Differential Revision: https://reviews.llvm.org/D158911

>From b6b3149cbc4cdd962b7ffcb0131d6283929bb597 Mon Sep 17 00:00:00 2001
From: Rafael Auler <rafaelauler at fb.com>
Date: Thu, 17 Aug 2023 18:14:53 -0700
Subject: [PATCH] [BOLT] Support input binaries that use R_X86_GOTPC64

In large code model, the address of GOT is calculated by the
static linker via R_X86_GOTPC64 reloc applied against a MOVABSQ
instruction. In the final binary, it can be disassembled as a regular
immediate, but because such immediate is the result of PC-relative
pointer arithmetic, we need to parse this relocation and update this
calculation whenever we move code, otherwise we break the code trying
to read GOT.

A test case showing how GOT is accessed was provided.

Differential Revision: https://reviews.llvm.org/D158911
---
 bolt/include/bolt/Core/BinaryContext.h        |  9 +++
 bolt/include/bolt/Core/Relocation.h           |  1 +
 bolt/lib/Core/BinaryContext.cpp               | 25 ++++++++
 bolt/lib/Core/Relocation.cpp                  |  9 +++
 bolt/lib/Rewrite/JITLinkLinker.cpp            | 14 +++++
 bolt/lib/Rewrite/RewriteInstance.cpp          |  6 +-
 bolt/lib/Target/X86/X86MCPlusBuilder.cpp      |  1 +
 bolt/lib/Target/X86/X86MCSymbolizer.cpp       | 35 ++++++++++++
 bolt/lib/Target/X86/X86MCSymbolizer.h         |  3 +
 .../runtime/X86/gotoff-large-code-model-2.s   | 57 +++++++++++++++++++
 .../runtime/X86/gotoff-large-code-model.s     | 55 ++++++++++++++++++
 11 files changed, 214 insertions(+), 1 deletion(-)
 create mode 100644 bolt/test/runtime/X86/gotoff-large-code-model-2.s
 create mode 100644 bolt/test/runtime/X86/gotoff-large-code-model.s

diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index ef57ff3541dc8c9..cc3eda332e42d74 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -871,6 +871,15 @@ class BinaryContext {
     return nullptr;
   }
 
+  /// Retrieves a reference to ELF's _GLOBAL_OFFSET_TABLE_ symbol, which points
+  /// at GOT, or null if it is not present in the input binary symtab.
+  BinaryData *getGlobalOffsetTableSym();
+
+  /// Checks if symbol name refers to ELF's _GLOBAL_OFFSET_TABLE_ symbol
+  bool isGlobalOffsetTableSym(StringRef SymName) const {
+    return SymName == "_GLOBAL_OFFSET_TABLE_";
+  }
+
   /// Return true if \p SymbolName was generated internally and was not present
   /// in the input binary.
   bool isInternalSymbolName(const StringRef Name) {
diff --git a/bolt/include/bolt/Core/Relocation.h b/bolt/include/bolt/Core/Relocation.h
index 5ae288a91986e52..6fc105773828580 100644
--- a/bolt/include/bolt/Core/Relocation.h
+++ b/bolt/include/bolt/Core/Relocation.h
@@ -84,6 +84,7 @@ struct Relocation {
 
   /// Special relocation type that allows the linker to modify the instruction.
   static bool isX86GOTPCRELX(uint64_t Type);
+  static bool isX86GOTPC64(uint64_t Type);
 
   /// Return true if relocation type is NONE
   static bool isNone(uint64_t Type);
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index 8132e5c213af449..d0e7ab2e2919187 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -1026,6 +1026,31 @@ BinaryContext::getBinaryDataContainingAddressImpl(uint64_t Address) const {
   return nullptr;
 }
 
+BinaryData *BinaryContext::getGlobalOffsetTableSym() {
+  // First tries to find a global symbol with that name
+  BinaryData *GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_");
+  if (GOTSymBD)
+    return GOTSymBD;
+
+  // This symbol might be hidden from run-time link, so fetch the local
+  // definition if available.
+  GOTSymBD = getBinaryDataByName("_GLOBAL_OFFSET_TABLE_/1");
+  if (!GOTSymBD)
+    return nullptr;
+
+  // If the local symbol is not unique, fail
+  unsigned Index = 2;
+  SmallString<30> Storage;
+  while (const BinaryData *BD =
+             getBinaryDataByName(Twine("_GLOBAL_OFFSET_TABLE_/")
+                                     .concat(Twine(Index++))
+                                     .toStringRef(Storage)))
+    if (BD->getAddress() != GOTSymBD->getAddress())
+      return nullptr;
+
+  return GOTSymBD;
+}
+
 bool BinaryContext::setBinaryDataSize(uint64_t Address, uint64_t Size) {
   auto NI = BinaryDataMap.find(Address);
   assert(NI != BinaryDataMap.end());
diff --git a/bolt/lib/Core/Relocation.cpp b/bolt/lib/Core/Relocation.cpp
index a73545905c545f4..6a4e7089bf24898 100644
--- a/bolt/lib/Core/Relocation.cpp
+++ b/bolt/lib/Core/Relocation.cpp
@@ -35,6 +35,7 @@ static bool isSupportedX86(uint64_t Type) {
   case ELF::R_X86_64_PC32:
   case ELF::R_X86_64_PC64:
   case ELF::R_X86_64_PLT32:
+  case ELF::R_X86_64_GOTPC64:
   case ELF::R_X86_64_GOTPCREL:
   case ELF::R_X86_64_GOTTPOFF:
   case ELF::R_X86_64_TPOFF32:
@@ -136,6 +137,7 @@ static size_t getSizeForTypeX86(uint64_t Type) {
     return 4;
   case ELF::R_X86_64_PC64:
   case ELF::R_X86_64_64:
+  case ELF::R_X86_64_GOTPC64:
     return 8;
   }
 }
@@ -655,6 +657,7 @@ static bool isPCRelativeX86(uint64_t Type) {
   case ELF::R_X86_64_PLT32:
   case ELF::R_X86_64_GOTOFF64:
   case ELF::R_X86_64_GOTPC32:
+  case ELF::R_X86_64_GOTPC64:
   case ELF::R_X86_64_GOTTPOFF:
   case ELF::R_X86_64_GOTPCRELX:
   case ELF::R_X86_64_REX_GOTPCRELX:
@@ -797,6 +800,12 @@ bool Relocation::isX86GOTPCRELX(uint64_t Type) {
   return Type == ELF::R_X86_64_GOTPCRELX || Type == ELF::R_X86_64_REX_GOTPCRELX;
 }
 
+bool Relocation::isX86GOTPC64(uint64_t Type) {
+  if (Arch != Triple::x86_64)
+    return false;
+  return Type == ELF::R_X86_64_GOTPC64;
+}
+
 bool Relocation::isNone(uint64_t Type) { return Type == getNone(); }
 
 bool Relocation::isRelative(uint64_t Type) {
diff --git a/bolt/lib/Rewrite/JITLinkLinker.cpp b/bolt/lib/Rewrite/JITLinkLinker.cpp
index 1df086a665789dd..8d4c9b16ca279ac 100644
--- a/bolt/lib/Rewrite/JITLinkLinker.cpp
+++ b/bolt/lib/Rewrite/JITLinkLinker.cpp
@@ -141,6 +141,20 @@ struct JITLinkLinker::Context : jitlink::JITLinkContext {
             orc::ExecutorAddr(Address), JITSymbolFlags());
         continue;
       }
+
+      if (Linker.BC.isGlobalOffsetTableSym(SymName)) {
+        if (const BinaryData *I = Linker.BC.getGlobalOffsetTableSym()) {
+          uint64_t Address = I->isMoved() && !I->isJumpTable()
+                                 ? I->getOutputAddress()
+                                 : I->getAddress();
+          LLVM_DEBUG(dbgs() << "Resolved to address 0x"
+                            << Twine::utohexstr(Address) << "\n");
+          AllResults[Symbol.first] = orc::ExecutorSymbolDef(
+              orc::ExecutorAddr(Address), JITSymbolFlags());
+          continue;
+        }
+      }
+
       LLVM_DEBUG(dbgs() << "Resolved to address 0x0\n");
       AllResults[Symbol.first] =
           orc::ExecutorSymbolDef(orc::ExecutorAddr(0), JITSymbolFlags());
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 03cbe0461b21127..3c89385b1300911 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -2394,9 +2394,13 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
   }
 
   MCSymbol *ReferencedSymbol = nullptr;
-  if (!IsSectionRelocation)
+  if (!IsSectionRelocation) {
     if (BinaryData *BD = BC->getBinaryDataByName(SymbolName))
       ReferencedSymbol = BD->getSymbol();
+    else if (BC->isGlobalOffsetTableSym(SymbolName))
+      if (BinaryData *BD = BC->getGlobalOffsetTableSym())
+        ReferencedSymbol = BD->getSymbol();
+  }
 
   ErrorOr<BinarySection &> ReferencedSection{std::errc::bad_address};
   symbol_iterator SymbolIter = Rel.getSymbol();
diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
index 4cb9d61710d1da7..99b08a91d969604 100644
--- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
+++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp
@@ -403,6 +403,7 @@ class X86MCPlusBuilder : public MCPlusBuilder {
     case ELF::R_X86_64_PC8:
     case ELF::R_X86_64_PC32:
     case ELF::R_X86_64_PC64:
+    case ELF::R_X86_64_GOTPC64:
     case ELF::R_X86_64_GOTPCRELX:
     case ELF::R_X86_64_REX_GOTPCRELX:
       return true;
diff --git a/bolt/lib/Target/X86/X86MCSymbolizer.cpp b/bolt/lib/Target/X86/X86MCSymbolizer.cpp
index 5eeb18467f84a67..8e0a6a1a073e2e6 100644
--- a/bolt/lib/Target/X86/X86MCSymbolizer.cpp
+++ b/bolt/lib/Target/X86/X86MCSymbolizer.cpp
@@ -130,6 +130,15 @@ bool X86MCSymbolizer::tryAddingSymbolicOperand(
   if (!Relocation)
     return processPCRelOperandNoRel();
 
+  // GOTPC64 is special because the X86 Assembler doesn't know how to emit
+  // a PC-relative 8-byte fixup, which is what we need to cover this. The
+  // only way to do this is to use the symbol name _GLOBAL_OFFSET_TABLE_.
+  if (Relocation::isX86GOTPC64(Relocation->Type)) {
+    auto [Sym, Addend] = handleGOTPC64(*Relocation, InstAddress);
+    addOperand(Sym, Addend);
+    return true;
+  }
+
   uint64_t SymbolValue = Relocation->Value - Relocation->Addend;
   if (Relocation->isPCRelative())
     SymbolValue += InstAddress + ImmOffset;
@@ -149,6 +158,32 @@ bool X86MCSymbolizer::tryAddingSymbolicOperand(
   return true;
 }
 
+std::pair<MCSymbol *, uint64_t>
+X86MCSymbolizer::handleGOTPC64(const Relocation &R, uint64_t InstrAddr) {
+  BinaryContext &BC = Function.getBinaryContext();
+  const BinaryData *GOTSymBD = BC.getGlobalOffsetTableSym();
+  if (!GOTSymBD || !GOTSymBD->getAddress()) {
+    errs() << "BOLT-ERROR: R_X86_GOTPC64 relocation is present but we did "
+              "not detect a valid  _GLOBAL_OFFSET_TABLE_ in symbol table.\n";
+    exit(1);
+  }
+  // R_X86_GOTPC64 are not relative to the Reloc nor end of instruction,
+  // but the start of the MOVABSQ instruction. So the Target Address is
+  // whatever is encoded in the original operand when we disassembled
+  // the binary (here, R.Value) plus MOVABSQ address (InstrAddr).
+  // Here we extract the intended Addend by subtracting the real
+  // GOT addr.
+  int64_t Addend = R.Value + InstrAddr - GOTSymBD->getAddress();
+  for (MCSymbol *GOTSym : GOTSymBD->symbols()) {
+    if (GOTSym->getName() == "_GLOBAL_OFFSET_TABLE_")
+      return std::make_pair(GOTSym, Addend);
+  }
+  // If we can't find a global _GLOBAL_OFFSET_TABLE_, it got renamed because
+  // it is local/hidden. Just create our own to please the assembler.
+  return std::make_pair(BC.Ctx->getOrCreateSymbol("_GLOBAL_OFFSET_TABLE_"),
+                        Addend);
+}
+
 void X86MCSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &CStream,
                                                       int64_t Value,
                                                       uint64_t Address) {}
diff --git a/bolt/lib/Target/X86/X86MCSymbolizer.h b/bolt/lib/Target/X86/X86MCSymbolizer.h
index 83a039bd70310b3..9ed18b69c74ce40 100644
--- a/bolt/lib/Target/X86/X86MCSymbolizer.h
+++ b/bolt/lib/Target/X86/X86MCSymbolizer.h
@@ -20,6 +20,9 @@ class X86MCSymbolizer : public MCSymbolizer {
   BinaryFunction &Function;
   bool CreateNewSymbols{true};
 
+  std::pair<MCSymbol *, uint64_t> handleGOTPC64(const Relocation &R,
+                                                uint64_t InstrAddr);
+
 public:
   X86MCSymbolizer(BinaryFunction &Function, bool CreateNewSymbols = true)
       : MCSymbolizer(*Function.getBinaryContext().Ctx.get(), nullptr),
diff --git a/bolt/test/runtime/X86/gotoff-large-code-model-2.s b/bolt/test/runtime/X86/gotoff-large-code-model-2.s
new file mode 100644
index 000000000000000..eae34cc24594b02
--- /dev/null
+++ b/bolt/test/runtime/X86/gotoff-large-code-model-2.s
@@ -0,0 +1,57 @@
+# A variation of gotoff-large-code-model.s that accesses GOT value
+# with a slightly different code sequence.
+
+# REQUIRES: system-linux
+
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \
+# RUN:   %s -o %t.o
+# RUN: %clang %cflags -no-pie %t.o -o %t.exe -Wl,-q
+
+# RUN: llvm-bolt %t.exe --funcs init_impls -lite \
+# RUN:   -o %t.bolted
+# RUN: %t.bolted | FileCheck %s
+
+	.section	.rodata.str1.1,"aMS", at progbits,1
+.LC2:
+	.string	"Hello, world\n"
+	.text
+	.p2align 4
+	.globl	init_impls
+	.type	init_impls, @function
+init_impls:
+	.cfi_startproc
+  push   %rbp
+  mov    %rsp,%rbp
+  push   %r15
+  push   %rbx
+  sub    $0x8,%rsp
+  lea    1f(%rip),%rbx
+  #  R_X86_64_GOTPC64  _GLOBAL_OFFSET_TABLE_+0x2
+1: movabsq $_GLOBAL_OFFSET_TABLE_, %r11
+  add    %r11,%rbx
+  #  R_X86_64_GOTOFF64 .LC2
+  movabs $.LC2 at gotoff,%rax
+  lea    (%rbx,%rax,1),%rax
+  mov    %rax,%rdi
+  mov    %rbx,%r15
+  #  R_X86_64_PLTOFF64 puts
+  movabs $puts at pltoff,%rax
+  add    %rbx,%rax
+  call   *%rax
+  add    $0x8,%rsp
+  pop    %rbx
+  pop    %r15
+  pop    %rbp
+  retq
+  .cfi_endproc
+  .size init_impls, .-init_impls
+
+  .globl main
+  .type main, @function
+  .p2align 4
+main:
+  callq init_impls
+  xorq  %rax, %rax
+  ret
+
+# CHECK: Hello, world
diff --git a/bolt/test/runtime/X86/gotoff-large-code-model.s b/bolt/test/runtime/X86/gotoff-large-code-model.s
new file mode 100644
index 000000000000000..2b09a1f41b51d04
--- /dev/null
+++ b/bolt/test/runtime/X86/gotoff-large-code-model.s
@@ -0,0 +1,55 @@
+# REQUIRES: system-linux
+
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \
+# RUN:   %s -o %t.o
+# RUN: %clang %cflags -no-pie %t.o -o %t.exe -Wl,-q
+
+# RUN: llvm-bolt %t.exe --funcs init_impls -lite \
+# RUN:   -o %t.bolted
+# RUN: %t.bolted | FileCheck %s
+
+	.section	.rodata.str1.1,"aMS", at progbits,1
+.LC2:
+	.string	"Hello, world\n"
+	.text
+	.p2align 4
+	.globl	init_impls
+	.type	init_impls, @function
+init_impls:
+	.cfi_startproc
+  push   %rbp
+  mov    %rsp,%rbp
+  push   %r15
+  push   %rbx
+  sub    $0x8,%rsp
+1:
+  lea    1b(%rip),%rbx
+  #  R_X86_64_GOTPC64  _GLOBAL_OFFSET_TABLE_+0x9
+  movabsq $_GLOBAL_OFFSET_TABLE_-1b, %r11
+  add    %r11,%rbx
+  #  R_X86_64_GOTOFF64 .LC2
+  movabs $.LC2 at gotoff,%rax
+  lea    (%rbx,%rax,1),%rax
+  mov    %rax,%rdi
+  mov    %rbx,%r15
+  #  R_X86_64_PLTOFF64 puts
+  movabs $puts at pltoff,%rax
+  add    %rbx,%rax
+  call   *%rax
+  add    $0x8,%rsp
+  pop    %rbx
+  pop    %r15
+  pop    %rbp
+  retq
+  .cfi_endproc
+  .size init_impls, .-init_impls
+
+  .globl main
+  .type main, @function
+  .p2align 4
+main:
+  callq init_impls
+  xorq  %rax, %rax
+  ret
+
+# CHECK: Hello, world



More information about the llvm-commits mailing list