[llvm] [BOLT][AArch64] Fix symbolization of unoptimized TLS access (PR #134332)

Maksim Panchenko via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 4 11:41:55 PDT 2025


https://github.com/maksfb updated https://github.com/llvm/llvm-project/pull/134332

>From 1ad8fab7a8c2b03487401abd1663c9258a7d7145 Mon Sep 17 00:00:00 2001
From: Maksim Panchenko <maks at fb.com>
Date: Thu, 3 Apr 2025 18:18:34 -0700
Subject: [PATCH] [BOLT][AArch64] Fix symbolization of unoptimized TLS access

TLS relocations may not have a valid BOLT symbol associated with them.
While symbolizing the operand, we were checking for the symbol value,
and since there was no symbol the check resulted in a crash.

Handle TLS case while performing operand symbolization on AArch64.
---
 bolt/lib/Rewrite/RewriteInstance.cpp          |  7 ++--
 .../Target/AArch64/AArch64MCSymbolizer.cpp    | 35 +++++++++++++------
 bolt/test/AArch64/tls.c                       |  8 +++++
 3 files changed, 36 insertions(+), 14 deletions(-)

diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 3217dd4324bc7..23faa92642d01 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -2827,9 +2827,10 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
     if (SymbolAddress == 0)
       ReferencedSymbol = BC->registerNameAtAddress(SymbolName, 0, 0, 0);
 
-    LLVM_DEBUG(dbgs() << "BOLT-DEBUG: forcing relocation against symbol "
-                      << ReferencedSymbol->getName() << " with addend "
-                      << Addend << '\n');
+    LLVM_DEBUG(
+        dbgs() << "BOLT-DEBUG: forcing relocation against symbol "
+               << (ReferencedSymbol ? ReferencedSymbol->getName() : "<none>")
+               << " with addend " << Addend << '\n');
   } else if (ReferencedBF) {
     ReferencedSymbol = ReferencedBF->getSymbol();
     uint64_t RefFunctionOffset = 0;
diff --git a/bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp b/bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp
index 772328f84c97a..28b4fc0705eaf 100644
--- a/bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp
@@ -119,26 +119,39 @@ AArch64MCSymbolizer::adjustRelocation(const Relocation &Rel,
     // The ADRP+LDR sequence was converted into ADRP+ADD. We are looking at the
     // second instruction and have to use the relocation type for ADD.
     AdjustedRel.Type = ELF::R_AARCH64_ADD_ABS_LO12_NC;
-  } else {
-    // For instructions that reference GOT, ignore the referenced symbol and
-    // use value at the relocation site. FixRelaxationPass will look at
-    // instruction pairs and will perform necessary adjustments.
+    return AdjustedRel;
+  }
+
+  // ADRP is a special case since the linker can leave the instruction opcode
+  // intact and modify only the operand. We are doing our best to detect when
+  // such conversion has happened without looking at the next instruction.
+  //
+  // If we detect that a page referenced by the ADRP cannot belong to GOT, and
+  // that it matches the symbol from the relocation, then we can be certain
+  // that the linker converted the GOT reference into the local one. Otherwise,
+  // we leave the disambiguation resolution to FixRelaxationPass.
+  //
+  // Note that ADRP relaxation described above cannot happen for TLS relocation.
+  // Since TLS relocations may not even have a valid symbol (not supported by
+  // BOLT), we explicitly exclude them from the check.
+  if (BC.MIB->isADRP(Inst) && Rel.Addend == 0 && !Relocation::isTLS(Rel.Type)) {
     ErrorOr<uint64_t> SymbolValue = BC.getSymbolValue(*Rel.Symbol);
     assert(SymbolValue && "Symbol value should be set");
     const uint64_t SymbolPageAddr = *SymbolValue & ~0xfffULL;
 
-    // Check if defined symbol and GOT are on the same page. If they are not,
-    // disambiguate the operand.
-    if (BC.MIB->isADRP(Inst) && Rel.Addend == 0 &&
-        SymbolPageAddr == Rel.Value &&
+    if (SymbolPageAddr == Rel.Value &&
         !isPageAddressValidForGOT(SymbolPageAddr)) {
       AdjustedRel.Type = ELF::R_AARCH64_ADR_PREL_PG_HI21;
-    } else {
-      AdjustedRel.Symbol = BC.registerNameAtAddress("__BOLT_got_zero", 0, 0, 0);
-      AdjustedRel.Addend = Rel.Value;
+      return AdjustedRel;
     }
   }
 
+  // For instructions that reference GOT, ignore the referenced symbol and
+  // use value at the relocation site. FixRelaxationPass will look at
+  // instruction pairs and will perform necessary adjustments.
+  AdjustedRel.Symbol = BC.registerNameAtAddress("__BOLT_got_zero", 0, 0, 0);
+  AdjustedRel.Addend = Rel.Value;
+
   return AdjustedRel;
 }
 
diff --git a/bolt/test/AArch64/tls.c b/bolt/test/AArch64/tls.c
index 3aa33777114ad..b531811f679ff 100644
--- a/bolt/test/AArch64/tls.c
+++ b/bolt/test/AArch64/tls.c
@@ -34,3 +34,11 @@ int main() {
 // RUN:   -target aarch64-linux -fuse-ld=lld \
 // RUN:   -nostdlib
 // RUN: llvm-bolt %t_pie.exe -o %t.bolt
+
+// RUN: %clang %cflags -fPIC -shared %s -o %t.so -Wl,-q -fuse-ld=lld
+// RUN: llvm-objdump -d -r --disassemble-symbols=main %t.so | FileCheck %s
+// RUN: llvm-bolt %t.so -o %t.bolt.so
+
+// Verify that unoptimized TLS access was generated for shared object.
+// CHECK:      adrp    x0
+// CHECK-NEXT: R_AARCH64_TLSDESC_ADR_PAGE21     tbssstruct



More information about the llvm-commits mailing list