[llvm] [BOLT] Pass unfiltered relocations to disassembler. NFCI (PR #131202)

Maksim Panchenko via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 14 13:14:41 PDT 2025


https://github.com/maksfb updated https://github.com/llvm/llvm-project/pull/131202

>From 21fdd030770610956fe69f06a3fc72596b5d7be0 Mon Sep 17 00:00:00 2001
From: Maksim Panchenko <maks at fb.com>
Date: Thu, 13 Mar 2025 12:43:39 -0700
Subject: [PATCH] [BOLT] Pass unfiltered relocations to disassembler. NFCI

Instead of filtering and modifying relocations in readRelocations(),
preserve the relocation info and use it in the symbolizing disassembler.
This change mostly affects AArch64, where we need to look at original
linker relocations in order to properly symbolize instruction operands.
---
 bolt/include/bolt/Core/MCPlusBuilder.h        | 10 +++
 bolt/include/bolt/Core/Relocation.h           |  5 --
 bolt/lib/Core/BinaryFunction.cpp              | 13 ++-
 bolt/lib/Core/Relocation.cpp                  | 85 -------------------
 bolt/lib/Rewrite/RewriteInstance.cpp          | 43 +++++-----
 .../Target/AArch64/AArch64MCPlusBuilder.cpp   |  4 +-
 .../Target/AArch64/AArch64MCSymbolizer.cpp    | 78 +++++++++++++----
 bolt/lib/Target/AArch64/AArch64MCSymbolizer.h |  8 ++
 8 files changed, 113 insertions(+), 133 deletions(-)

diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index 0927b01cd421d..b285138b77fe7 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -637,6 +637,16 @@ class MCPlusBuilder {
     return false;
   }
 
+  virtual bool isAddXri(const MCInst &Inst) const {
+    llvm_unreachable("not implemented");
+    return false;
+  }
+
+  virtual bool isMOVW(const MCInst &Inst) const {
+    llvm_unreachable("not implemented");
+    return false;
+  }
+
   virtual bool isMoveMem2Reg(const MCInst &Inst) const { return false; }
 
   virtual bool mayLoad(const MCInst &Inst) const {
diff --git a/bolt/include/bolt/Core/Relocation.h b/bolt/include/bolt/Core/Relocation.h
index 7b468566a1d30..1f18dad008ca2 100644
--- a/bolt/include/bolt/Core/Relocation.h
+++ b/bolt/include/bolt/Core/Relocation.h
@@ -64,11 +64,6 @@ struct Relocation {
   /// Skip relocations that we don't want to handle in BOLT
   static bool skipRelocationType(uint32_t Type);
 
-  /// Handle special cases when relocation should not be processed by BOLT or
-  /// change relocation \p Type to proper one before continuing if \p Contents
-  /// and \P Type mismatch occurred.
-  static bool skipRelocationProcess(uint32_t &Type, uint64_t Contents);
-
   /// Adjust value depending on relocation type (make it PC relative or not).
   static uint64_t encodeValue(uint32_t Type, uint64_t Value, uint64_t PC);
 
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index a5dc188c98601..acf04cee769c1 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1473,10 +1473,19 @@ Error BinaryFunction::disassemble() {
           }
         }
 
+        uint64_t Addend = Relocation.Addend;
+
+        // For GOT relocations, create a reference against GOT entry ignoring
+        // the relocation symbol.
+        if (Relocation::isGOT(Relocation.Type)) {
+          assert(Relocation::isPCRelative(Relocation.Type) &&
+                 "GOT relocation must be PC-relative on RISC-V");
+          Symbol = BC.registerNameAtAddress("__BOLT_got_zero", 0, 0, 0);
+          Addend = Relocation.Value + Relocation.Offset + getAddress();
+        }
         int64_t Value = Relocation.Value;
         const bool Result = BC.MIB->replaceImmWithSymbolRef(
-            Instruction, Symbol, Relocation.Addend, Ctx.get(), Value,
-            Relocation.Type);
+            Instruction, Symbol, Addend, Ctx.get(), Value, Relocation.Type);
         (void)Result;
         assert(Result && "cannot replace immediate with relocation");
       }
diff --git a/bolt/lib/Core/Relocation.cpp b/bolt/lib/Core/Relocation.cpp
index e91b6702c8d6c..1a142c7d9716c 100644
--- a/bolt/lib/Core/Relocation.cpp
+++ b/bolt/lib/Core/Relocation.cpp
@@ -257,78 +257,6 @@ static bool skipRelocationTypeRISCV(uint32_t Type) {
   }
 }
 
-static bool skipRelocationProcessX86(uint32_t &Type, uint64_t Contents) {
-  return false;
-}
-
-static bool skipRelocationProcessAArch64(uint32_t &Type, uint64_t Contents) {
-  auto IsMov = [](uint64_t Contents) -> bool {
-    // The bits 28-23 are 0b100101
-    return (Contents & 0x1f800000) == 0x12800000;
-  };
-
-  auto IsB = [](uint64_t Contents) -> bool {
-    // The bits 31-26 are 0b000101
-    return (Contents & 0xfc000000) == 0x14000000;
-  };
-
-  auto IsAddImm = [](uint64_t Contents) -> bool {
-    // The bits 30-23 are 0b00100010
-    return (Contents & 0x7F800000) == 0x11000000;
-  };
-
-  // The linker might relax ADRP+LDR instruction sequence for loading symbol
-  // address from GOT table to ADRP+ADD sequence that would point to the
-  // binary-local symbol. Change relocation type in order to process it right.
-  if (Type == ELF::R_AARCH64_LD64_GOT_LO12_NC && IsAddImm(Contents)) {
-    Type = ELF::R_AARCH64_ADD_ABS_LO12_NC;
-    return false;
-  }
-
-  // The linker might perform TLS relocations relaxations, such as
-  // changed TLS access model (e.g. changed global dynamic model
-  // to initial exec), thus changing the instructions. The static
-  // relocations might be invalid at this point and we might no
-  // need to process these relocations anymore.
-  // More information could be found by searching
-  // elfNN_aarch64_tls_relax in bfd
-  switch (Type) {
-  default:
-    break;
-  case ELF::R_AARCH64_TLSDESC_LD64_LO12:
-  case ELF::R_AARCH64_TLSDESC_ADR_PAGE21:
-  case ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
-  case ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: {
-    if (IsMov(Contents))
-      return true;
-  }
-  }
-
-  // The linker might replace load/store instruction with jump and
-  // veneer due to errata 843419
-  // https://documentation-service.arm.com/static/5fa29fddb209f547eebd361d
-  // Thus load/store relocations for these instructions must be ignored
-  // NOTE: We only process GOT and TLS relocations this way since the
-  // addend used in load/store instructions won't change after bolt
-  // (it is important since the instruction in veneer won't have relocation)
-  switch (Type) {
-  default:
-    break;
-  case ELF::R_AARCH64_LD64_GOT_LO12_NC:
-  case ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
-  case ELF::R_AARCH64_TLSDESC_LD64_LO12: {
-    if (IsB(Contents))
-      return true;
-  }
-  }
-
-  return false;
-}
-
-static bool skipRelocationProcessRISCV(uint32_t &Type, uint64_t Contents) {
-  return false;
-}
-
 static uint64_t encodeValueX86(uint32_t Type, uint64_t Value, uint64_t PC) {
   switch (Type) {
   default:
@@ -798,19 +726,6 @@ bool Relocation::skipRelocationType(uint32_t Type) {
   }
 }
 
-bool Relocation::skipRelocationProcess(uint32_t &Type, uint64_t Contents) {
-  switch (Arch) {
-  default:
-    llvm_unreachable("Unsupported architecture");
-  case Triple::aarch64:
-    return skipRelocationProcessAArch64(Type, Contents);
-  case Triple::riscv64:
-    return skipRelocationProcessRISCV(Type, Contents);
-  case Triple::x86_64:
-    return skipRelocationProcessX86(Type, Contents);
-  }
-}
-
 uint64_t Relocation::encodeValue(uint32_t Type, uint64_t Value, uint64_t PC) {
   switch (Arch) {
   default:
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index c50850e9090ca..560b30c6c676c 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -2229,8 +2229,6 @@ bool RewriteInstance::analyzeRelocation(
   ErrorOr<uint64_t> Value =
       BC->getUnsignedValueAtAddress(Rel.getOffset(), RelSize);
   assert(Value && "failed to extract relocated value");
-  if ((Skip = Relocation::skipRelocationProcess(RType, *Value)))
-    return true;
 
   ExtractedValue = Relocation::extractValue(RType, *Value, Rel.getOffset());
   Addend = getRelocationAddend(InputFile, Rel);
@@ -2283,17 +2281,14 @@ bool RewriteInstance::analyzeRelocation(
     }
   }
 
-  // If no symbol has been found or if it is a relocation requiring the
-  // creation of a GOT entry, do not link against the symbol but against
-  // whatever address was extracted from the instruction itself. We are
-  // not creating a GOT entry as this was already processed by the linker.
-  // For GOT relocs, do not subtract addend as the addend does not refer
-  // to this instruction's target, but it refers to the target in the GOT
-  // entry.
-  if (Relocation::isGOT(RType)) {
-    Addend = 0;
-    SymbolAddress = ExtractedValue + PCRelOffset;
-  } else if (Relocation::isTLS(RType)) {
+  // GOT relocation can cause the underlying instruction to be modified by the
+  // linker, resulting in the extracted value being different from the actual
+  // symbol. It's also possible to have a GOT entry for a symbol defined in the
+  // binary. In the latter case, the instruction can be using the GOT version
+  // causing the extracted value mismatch. Similar cases can happen for TLS.
+  // Pass the relocation information as is to the disassembler and let it decide
+  // how to use it for the operand symbolization.
+  if (Relocation::isGOT(RType) || Relocation::isTLS(RType)) {
     SkipVerification = true;
   } else if (!SymbolAddress) {
     assert(!IsSectionRelocation);
@@ -2666,11 +2661,14 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
 
   MCSymbol *ReferencedSymbol = nullptr;
   if (!IsSectionRelocation) {
-    if (BinaryData *BD = BC->getBinaryDataByName(SymbolName))
+    if (BinaryData *BD = BC->getBinaryDataByName(SymbolName)) {
       ReferencedSymbol = BD->getSymbol();
-    else if (BC->isGOTSymbol(SymbolName))
+    } else if (BC->isGOTSymbol(SymbolName)) {
       if (BinaryData *BD = BC->getGOTSymbol())
         ReferencedSymbol = BD->getSymbol();
+    } else if (BinaryData *BD = BC->getBinaryDataAtAddress(SymbolAddress)) {
+      ReferencedSymbol = BD->getSymbol();
+    }
   }
 
   ErrorOr<BinarySection &> ReferencedSection{std::errc::bad_address};
@@ -2798,15 +2796,14 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
     }
   }
 
-  if (ForceRelocation) {
-    std::string Name =
-        Relocation::isGOT(RType) ? "__BOLT_got_zero" : SymbolName;
-    ReferencedSymbol = BC->registerNameAtAddress(Name, 0, 0, 0);
-    SymbolAddress = 0;
-    if (Relocation::isGOT(RType))
-      Addend = Address;
+  if (ForceRelocation && !ReferencedBF) {
+    // Create the relocation symbol if it's not defined in the binary.
+    if (SymbolAddress == 0)
+      ReferencedSymbol = BC->registerNameAtAddress(SymbolName, 0, 0, 0);
+
     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: forcing relocation against symbol "
-                      << SymbolName << " with addend " << Addend << '\n');
+                      << ReferencedSymbol->getName() << " with addend "
+                      << Addend << '\n');
   } else if (ReferencedBF) {
     ReferencedSymbol = ReferencedBF->getSymbol();
     uint64_t RefFunctionOffset = 0;
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 73f27b00213d0..c03b4aedbec65 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -281,7 +281,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     return Inst.getOpcode() == AArch64::ADR;
   }
 
-  bool isAddXri(const MCInst &Inst) const {
+  bool isAddXri(const MCInst &Inst) const override {
     return Inst.getOpcode() == AArch64::ADDXri;
   }
 
@@ -318,7 +318,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
             Inst.getOpcode() == AArch64::CBZX);
   }
 
-  bool isMOVW(const MCInst &Inst) const {
+  bool isMOVW(const MCInst &Inst) const override {
     return (Inst.getOpcode() == AArch64::MOVKWi ||
             Inst.getOpcode() == AArch64::MOVKXi ||
             Inst.getOpcode() == AArch64::MOVNWi ||
diff --git a/bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp b/bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp
index d08bca6e0fc3e..231582b5aabe4 100644
--- a/bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp
@@ -45,24 +45,15 @@ bool AArch64MCSymbolizer::tryAddingSymbolicOperand(
         BC.MIB->getTargetExprFor(Inst, Expr, *Ctx, RelType)));
   };
 
-  // The linker can convert ADRP+ADD and ADRP+LDR instruction sequences into
-  // NOP+ADR. After the conversion, the linker might keep the relocations and
-  // if we try to symbolize ADR's operand using outdated relocations, we might
-  // get unexpected results. Hence, we check for the conversion/relaxation, and
-  // ignore the relocation. The symbolization is done based on the PC-relative
-  // value of the operand instead.
-  if (Relocation && BC.MIB->isADR(Inst)) {
-    if (Relocation->Type == ELF::R_AARCH64_ADD_ABS_LO12_NC ||
-        Relocation->Type == ELF::R_AARCH64_LD64_GOT_LO12_NC) {
-      LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocation at 0x"
-                        << Twine::utohexstr(InstAddress) << '\n');
-      Relocation = nullptr;
+  if (Relocation) {
+    auto AdjustedRel = adjustRelocation(*Relocation, Inst);
+    if (AdjustedRel) {
+      addOperand(AdjustedRel->Symbol, AdjustedRel->Addend, AdjustedRel->Type);
+      return true;
     }
-  }
 
-  if (Relocation) {
-    addOperand(Relocation->Symbol, Relocation->Addend, Relocation->Type);
-    return true;
+    LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocation at 0x"
+                      << Twine::utohexstr(InstAddress) << '\n');
   }
 
   if (!BC.MIB->hasPCRelOperand(Inst))
@@ -88,6 +79,61 @@ bool AArch64MCSymbolizer::tryAddingSymbolicOperand(
   return true;
 }
 
+std::optional<Relocation>
+AArch64MCSymbolizer::adjustRelocation(const Relocation &Rel,
+                                      const MCInst &Inst) const {
+  BinaryContext &BC = Function.getBinaryContext();
+
+  // The linker can convert ADRP+ADD and ADRP+LDR instruction sequences into
+  // NOP+ADR. After the conversion, the linker might keep the relocations and
+  // if we try to symbolize ADR's operand using outdated relocations, we might
+  // get unexpected results. Hence, we check for the conversion/relaxation, and
+  // ignore the relocation. The symbolization is done based on the PC-relative
+  // value of the operand instead.
+  if (BC.MIB->isADR(Inst) && (Rel.Type == ELF::R_AARCH64_ADD_ABS_LO12_NC ||
+                              Rel.Type == ELF::R_AARCH64_LD64_GOT_LO12_NC))
+    return std::nullopt;
+
+  // The linker might perform TLS relocations relaxations, such as changed TLS
+  // access model (e.g. changed global dynamic model to initial exec), thus
+  // changing the instructions. The static relocations might be invalid at this
+  // point and we don't have to process these relocations anymore. More
+  // information could be found by searching elfNN_aarch64_tls_relax in bfd.
+  if (BC.MIB->isMOVW(Inst)) {
+    switch (Rel.Type) {
+    default:
+      break;
+    case ELF::R_AARCH64_TLSDESC_LD64_LO12:
+    case ELF::R_AARCH64_TLSDESC_ADR_PAGE21:
+    case ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
+    case ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
+      return std::nullopt;
+    }
+  }
+
+  if (!Relocation::isGOT(Rel.Type))
+    return Rel;
+
+  Relocation AdjustedRel = Rel;
+  if (Rel.Type == ELF::R_AARCH64_LD64_GOT_LO12_NC && BC.MIB->isAddXri(Inst)) {
+    // The ADRP+LDR sequence was converted into ADRP+ADD. We are looking at the
+    // second instruction and have to use the relocation type for ADD.
+    AdjustedRel.Type = ELF::R_AARCH64_ADD_ABS_LO12_NC;
+  } else {
+    // For instructions that reference GOT, ignore the referenced symbol and
+    // use value at the relocation site. FixRelaxationPass will look at
+    // instruction pairs and will perform necessary adjustments.
+    ErrorOr<uint64_t> SymbolValue = BC.getSymbolValue(*Rel.Symbol);
+    assert(SymbolValue && "Symbol value should be set");
+    const uint64_t SymbolPageAddr = *SymbolValue & ~0xfffULL;
+
+    AdjustedRel.Symbol = BC.registerNameAtAddress("__BOLT_got_zero", 0, 0, 0);
+    AdjustedRel.Addend = Rel.Value;
+  }
+
+  return AdjustedRel;
+}
+
 void AArch64MCSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &CStream,
                                                           int64_t Value,
                                                           uint64_t Address) {}
diff --git a/bolt/lib/Target/AArch64/AArch64MCSymbolizer.h b/bolt/lib/Target/AArch64/AArch64MCSymbolizer.h
index 56ba4fbcaf275..7f06a4df1eb1d 100644
--- a/bolt/lib/Target/AArch64/AArch64MCSymbolizer.h
+++ b/bolt/lib/Target/AArch64/AArch64MCSymbolizer.h
@@ -11,6 +11,7 @@
 
 #include "bolt/Core/BinaryFunction.h"
 #include "llvm/MC/MCDisassembler/MCSymbolizer.h"
+#include <optional>
 
 namespace llvm {
 namespace bolt {
@@ -20,6 +21,13 @@ class AArch64MCSymbolizer : public MCSymbolizer {
   BinaryFunction &Function;
   bool CreateNewSymbols{true};
 
+  /// Modify relocation \p Rel based on type of the relocation and the
+  /// instruction it was applied to. Return the new relocation info, or
+  /// std::nullopt if the relocation should be ignored, e.g. in the case the
+  /// instruction was modified by the linker.
+  std::optional<Relocation> adjustRelocation(const Relocation &Rel,
+                                             const MCInst &Inst) const;
+
 public:
   AArch64MCSymbolizer(BinaryFunction &Function, bool CreateNewSymbols = true)
       : MCSymbolizer(*Function.getBinaryContext().Ctx.get(), nullptr),



More information about the llvm-commits mailing list