[llvm] [BOLT] Pass unfiltered relocations to disassembler. NFCI (PR #131202)

via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 13 12:58:31 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-bolt

Author: Maksim Panchenko (maksfb)

<details>
<summary>Changes</summary>

Instead of filtering and modifying relocations in readRelocations(), preserve the relocation info and use it in the symbolizing disassembler. This change mostly affects AArch64, where we need to look at original linker relocations in order to properly symbolize instruction operands.

---
Full diff: https://github.com/llvm/llvm-project/pull/131202.diff


8 Files Affected:

- (modified) bolt/include/bolt/Core/MCPlusBuilder.h (+10) 
- (modified) bolt/include/bolt/Core/Relocation.h (-5) 
- (modified) bolt/lib/Core/BinaryFunction.cpp (+11-2) 
- (modified) bolt/lib/Core/Relocation.cpp (-85) 
- (modified) bolt/lib/Rewrite/RewriteInstance.cpp (+20-23) 
- (modified) bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp (+2-2) 
- (modified) bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp (+64-16) 
- (modified) bolt/lib/Target/AArch64/AArch64MCSymbolizer.h (+8) 


``````````diff
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index 1d45a314a17b6..1677f3300c61b 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -637,6 +637,16 @@ class MCPlusBuilder {
     return false;
   }
 
+  virtual bool isAddXri(const MCInst &Inst) const {
+    llvm_unreachable("not implemented");
+    return false;
+  }
+
+  virtual bool isMOVW(const MCInst &Inst) const {
+    llvm_unreachable("not implemented");
+    return false;
+  }
+
   virtual bool isMoveMem2Reg(const MCInst &Inst) const { return false; }
 
   virtual bool mayLoad(const MCInst &Inst) const {
diff --git a/bolt/include/bolt/Core/Relocation.h b/bolt/include/bolt/Core/Relocation.h
index 933f62a31f8fd..00b4c603efee1 100644
--- a/bolt/include/bolt/Core/Relocation.h
+++ b/bolt/include/bolt/Core/Relocation.h
@@ -59,11 +59,6 @@ struct Relocation {
   /// Skip relocations that we don't want to handle in BOLT
   static bool skipRelocationType(uint64_t Type);
 
-  /// Handle special cases when relocation should not be processed by BOLT or
-  /// change relocation \p Type to proper one before continuing if \p Contents
-  /// and \P Type mismatch occurred.
-  static bool skipRelocationProcess(uint64_t &Type, uint64_t Contents);
-
   // Adjust value depending on relocation type (make it PC relative or not)
   static uint64_t encodeValue(uint64_t Type, uint64_t Value, uint64_t PC);
 
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 35617a92c1b2a..6681cb6a06271 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1473,10 +1473,19 @@ Error BinaryFunction::disassemble() {
           }
         }
 
+        uint64_t Addend = Relocation.Addend;
+
+        // For GOT relocations, create a reference against GOT entry ignoring
+        // the relocation symbol.
+        if (Relocation::isGOT(Relocation.Type)) {
+          assert(Relocation::isPCRelative(Relocation.Type) &&
+                 "GOT relocation must be PC-relative on RISC-V");
+          Symbol = BC.registerNameAtAddress("__BOLT_got_zero", 0, 0, 0);
+          Addend = Relocation.Value + Relocation.Offset + getAddress();
+        }
         int64_t Value = Relocation.Value;
         const bool Result = BC.MIB->replaceImmWithSymbolRef(
-            Instruction, Symbol, Relocation.Addend, Ctx.get(), Value,
-            Relocation.Type);
+            Instruction, Symbol, Addend, Ctx.get(), Value, Relocation.Type);
         (void)Result;
         assert(Result && "cannot replace immediate with relocation");
       }
diff --git a/bolt/lib/Core/Relocation.cpp b/bolt/lib/Core/Relocation.cpp
index 523ab8480cc90..a5e21f7d883e1 100644
--- a/bolt/lib/Core/Relocation.cpp
+++ b/bolt/lib/Core/Relocation.cpp
@@ -256,78 +256,6 @@ static bool skipRelocationTypeRISCV(uint64_t Type) {
   }
 }
 
-static bool skipRelocationProcessX86(uint64_t &Type, uint64_t Contents) {
-  return false;
-}
-
-static bool skipRelocationProcessAArch64(uint64_t &Type, uint64_t Contents) {
-  auto IsMov = [](uint64_t Contents) -> bool {
-    // The bits 28-23 are 0b100101
-    return (Contents & 0x1f800000) == 0x12800000;
-  };
-
-  auto IsB = [](uint64_t Contents) -> bool {
-    // The bits 31-26 are 0b000101
-    return (Contents & 0xfc000000) == 0x14000000;
-  };
-
-  auto IsAddImm = [](uint64_t Contents) -> bool {
-    // The bits 30-23 are 0b00100010
-    return (Contents & 0x7F800000) == 0x11000000;
-  };
-
-  // The linker might relax ADRP+LDR instruction sequence for loading symbol
-  // address from GOT table to ADRP+ADD sequence that would point to the
-  // binary-local symbol. Change relocation type in order to process it right.
-  if (Type == ELF::R_AARCH64_LD64_GOT_LO12_NC && IsAddImm(Contents)) {
-    Type = ELF::R_AARCH64_ADD_ABS_LO12_NC;
-    return false;
-  }
-
-  // The linker might perform TLS relocations relaxations, such as
-  // changed TLS access model (e.g. changed global dynamic model
-  // to initial exec), thus changing the instructions. The static
-  // relocations might be invalid at this point and we might no
-  // need to process these relocations anymore.
-  // More information could be found by searching
-  // elfNN_aarch64_tls_relax in bfd
-  switch (Type) {
-  default:
-    break;
-  case ELF::R_AARCH64_TLSDESC_LD64_LO12:
-  case ELF::R_AARCH64_TLSDESC_ADR_PAGE21:
-  case ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
-  case ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: {
-    if (IsMov(Contents))
-      return true;
-  }
-  }
-
-  // The linker might replace load/store instruction with jump and
-  // veneer due to errata 843419
-  // https://documentation-service.arm.com/static/5fa29fddb209f547eebd361d
-  // Thus load/store relocations for these instructions must be ignored
-  // NOTE: We only process GOT and TLS relocations this way since the
-  // addend used in load/store instructions won't change after bolt
-  // (it is important since the instruction in veneer won't have relocation)
-  switch (Type) {
-  default:
-    break;
-  case ELF::R_AARCH64_LD64_GOT_LO12_NC:
-  case ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
-  case ELF::R_AARCH64_TLSDESC_LD64_LO12: {
-    if (IsB(Contents))
-      return true;
-  }
-  }
-
-  return false;
-}
-
-static bool skipRelocationProcessRISCV(uint64_t &Type, uint64_t Contents) {
-  return false;
-}
-
 static uint64_t encodeValueX86(uint64_t Type, uint64_t Value, uint64_t PC) {
   switch (Type) {
   default:
@@ -797,19 +725,6 @@ bool Relocation::skipRelocationType(uint64_t Type) {
   }
 }
 
-bool Relocation::skipRelocationProcess(uint64_t &Type, uint64_t Contents) {
-  switch (Arch) {
-  default:
-    llvm_unreachable("Unsupported architecture");
-  case Triple::aarch64:
-    return skipRelocationProcessAArch64(Type, Contents);
-  case Triple::riscv64:
-    return skipRelocationProcessRISCV(Type, Contents);
-  case Triple::x86_64:
-    return skipRelocationProcessX86(Type, Contents);
-  }
-}
-
 uint64_t Relocation::encodeValue(uint64_t Type, uint64_t Value, uint64_t PC) {
   switch (Arch) {
   default:
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 9a2d228718283..e3b136378d51d 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -2229,8 +2229,6 @@ bool RewriteInstance::analyzeRelocation(
   ErrorOr<uint64_t> Value =
       BC->getUnsignedValueAtAddress(Rel.getOffset(), RelSize);
   assert(Value && "failed to extract relocated value");
-  if ((Skip = Relocation::skipRelocationProcess(RType, *Value)))
-    return true;
 
   ExtractedValue = Relocation::extractValue(RType, *Value, Rel.getOffset());
   Addend = getRelocationAddend(InputFile, Rel);
@@ -2283,17 +2281,14 @@ bool RewriteInstance::analyzeRelocation(
     }
   }
 
-  // If no symbol has been found or if it is a relocation requiring the
-  // creation of a GOT entry, do not link against the symbol but against
-  // whatever address was extracted from the instruction itself. We are
-  // not creating a GOT entry as this was already processed by the linker.
-  // For GOT relocs, do not subtract addend as the addend does not refer
-  // to this instruction's target, but it refers to the target in the GOT
-  // entry.
-  if (Relocation::isGOT(RType)) {
-    Addend = 0;
-    SymbolAddress = ExtractedValue + PCRelOffset;
-  } else if (Relocation::isTLS(RType)) {
+  // GOT relocation can cause the underlying instruction to be modified by the
+  // linker, resulting in the extracted value being different from the actual
+  // symbol. It's also possible to have a GOT entry for a symbol defined in the
+  // binary. In the latter case, the instruction can be using the GOT version
+  // causing the extracted value mismatch. Similar cases can happen for TLS.
+  // Pass the relocation information as is to the disassembler and let it decide
+  // how to use it for the operand symbolization.
+  if (Relocation::isGOT(RType) || Relocation::isTLS(RType)) {
     SkipVerification = true;
   } else if (!SymbolAddress) {
     assert(!IsSectionRelocation);
@@ -2666,11 +2661,14 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
 
   MCSymbol *ReferencedSymbol = nullptr;
   if (!IsSectionRelocation) {
-    if (BinaryData *BD = BC->getBinaryDataByName(SymbolName))
+    if (BinaryData *BD = BC->getBinaryDataByName(SymbolName)) {
       ReferencedSymbol = BD->getSymbol();
-    else if (BC->isGOTSymbol(SymbolName))
+    } else if (BC->isGOTSymbol(SymbolName)) {
       if (BinaryData *BD = BC->getGOTSymbol())
         ReferencedSymbol = BD->getSymbol();
+    } else if (BinaryData *BD = BC->getBinaryDataAtAddress(SymbolAddress)) {
+      ReferencedSymbol = BD->getSymbol();
+    }
   }
 
   ErrorOr<BinarySection &> ReferencedSection{std::errc::bad_address};
@@ -2798,15 +2796,14 @@ void RewriteInstance::handleRelocation(const SectionRef &RelocatedSection,
     }
   }
 
-  if (ForceRelocation) {
-    std::string Name =
-        Relocation::isGOT(RType) ? "__BOLT_got_zero" : SymbolName;
-    ReferencedSymbol = BC->registerNameAtAddress(Name, 0, 0, 0);
-    SymbolAddress = 0;
-    if (Relocation::isGOT(RType))
-      Addend = Address;
+  if (ForceRelocation && !ReferencedBF) {
+    // Create the relocation symbol if it's not defined in the binary.
+    if (SymbolAddress == 0)
+      ReferencedSymbol = BC->registerNameAtAddress(SymbolName, 0, 0, 0);
+
     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: forcing relocation against symbol "
-                      << SymbolName << " with addend " << Addend << '\n');
+                      << ReferencedSymbol->getName() << " with addend "
+                      << Addend << '\n');
   } else if (ReferencedBF) {
     ReferencedSymbol = ReferencedBF->getSymbol();
     uint64_t RefFunctionOffset = 0;
diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 685b2279e5afb..c002e03dd5766 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -281,7 +281,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
     return Inst.getOpcode() == AArch64::ADR;
   }
 
-  bool isAddXri(const MCInst &Inst) const {
+  bool isAddXri(const MCInst &Inst) const override {
     return Inst.getOpcode() == AArch64::ADDXri;
   }
 
@@ -318,7 +318,7 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
             Inst.getOpcode() == AArch64::CBZX);
   }
 
-  bool isMOVW(const MCInst &Inst) const {
+  bool isMOVW(const MCInst &Inst) const override {
     return (Inst.getOpcode() == AArch64::MOVKWi ||
             Inst.getOpcode() == AArch64::MOVKXi ||
             Inst.getOpcode() == AArch64::MOVNWi ||
diff --git a/bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp b/bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp
index d08bca6e0fc3e..2bebe8048da1f 100644
--- a/bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp
@@ -45,24 +45,15 @@ bool AArch64MCSymbolizer::tryAddingSymbolicOperand(
         BC.MIB->getTargetExprFor(Inst, Expr, *Ctx, RelType)));
   };
 
-  // The linker can convert ADRP+ADD and ADRP+LDR instruction sequences into
-  // NOP+ADR. After the conversion, the linker might keep the relocations and
-  // if we try to symbolize ADR's operand using outdated relocations, we might
-  // get unexpected results. Hence, we check for the conversion/relaxation, and
-  // ignore the relocation. The symbolization is done based on the PC-relative
-  // value of the operand instead.
-  if (Relocation && BC.MIB->isADR(Inst)) {
-    if (Relocation->Type == ELF::R_AARCH64_ADD_ABS_LO12_NC ||
-        Relocation->Type == ELF::R_AARCH64_LD64_GOT_LO12_NC) {
-      LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocation at 0x"
-                        << Twine::utohexstr(InstAddress) << '\n');
-      Relocation = nullptr;
+  if (Relocation) {
+    auto AdjustedRel = adjustRelocation(*Relocation, Inst);
+    if (AdjustedRel) {
+      addOperand(AdjustedRel->Symbol, AdjustedRel->Addend, AdjustedRel->Type);
+      return true;
     }
-  }
 
-  if (Relocation) {
-    addOperand(Relocation->Symbol, Relocation->Addend, Relocation->Type);
-    return true;
+    LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ignoring relocation at 0x"
+                      << Twine::utohexstr(InstAddress) << '\n');
   }
 
   if (!BC.MIB->hasPCRelOperand(Inst))
@@ -88,6 +79,63 @@ bool AArch64MCSymbolizer::tryAddingSymbolicOperand(
   return true;
 }
 
+std::optional<Relocation>
+AArch64MCSymbolizer::adjustRelocation(const Relocation &Rel,
+                                      const MCInst &Inst) const {
+  BinaryContext &BC = Function.getBinaryContext();
+
+  // The linker can convert ADRP+ADD and ADRP+LDR instruction sequences into
+  // NOP+ADR. After the conversion, the linker might keep the relocations and
+  // if we try to symbolize ADR's operand using outdated relocations, we might
+  // get unexpected results. Hence, we check for the conversion/relaxation, and
+  // ignore the relocation. The symbolization is done based on the PC-relative
+  // value of the operand instead.
+  if (BC.MIB->isADR(Inst) && (Rel.Type == ELF::R_AARCH64_ADD_ABS_LO12_NC ||
+                              Rel.Type == ELF::R_AARCH64_LD64_GOT_LO12_NC))
+    return std::nullopt;
+
+  // The linker might perform TLS relocations relaxations, such as
+  // changed TLS access model (e.g. changed global dynamic model
+  // to initial exec), thus changing the instructions. The static
+  // relocations might be invalid at this point and we might no
+  // need to process these relocations anymore.
+  // More information could be found by searching
+  // elfNN_aarch64_tls_relax in bfd
+  if (BC.MIB->isMOVW(Inst)) {
+    switch (Rel.Type) {
+    default:
+      break;
+    case ELF::R_AARCH64_TLSDESC_LD64_LO12:
+    case ELF::R_AARCH64_TLSDESC_ADR_PAGE21:
+    case ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
+    case ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
+      return std::nullopt;
+    }
+  }
+
+  if (!Relocation::isGOT(Rel.Type))
+    return Rel;
+
+  Relocation AdjustedRel = Rel;
+  if (Rel.Type == ELF::R_AARCH64_LD64_GOT_LO12_NC && BC.MIB->isAddXri(Inst)) {
+    // The ADRP+LDR sequence was converted into ADRP+ADD. We are looking at the
+    // second instruction and have to use the relocation type for ADD.
+    AdjustedRel.Type = ELF::R_AARCH64_ADD_ABS_LO12_NC;
+  } else {
+    // For instructions that reference GOT, ignore the referenced symbol and
+    // use value at the relocation site. FixRelaxationPass will look at
+    // instruction pairs and will perform necessary adjustments.
+    ErrorOr<uint64_t> SymbolValue = BC.getSymbolValue(*Rel.Symbol);
+    assert(SymbolValue && "Symbol value should be set");
+    const uint64_t SymbolPageAddr = *SymbolValue & ~0xfffULL;
+
+    AdjustedRel.Symbol = BC.registerNameAtAddress("__BOLT_got_zero", 0, 0, 0);
+    AdjustedRel.Addend = Rel.Value;
+  }
+
+  return AdjustedRel;
+}
+
 void AArch64MCSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &CStream,
                                                           int64_t Value,
                                                           uint64_t Address) {}
diff --git a/bolt/lib/Target/AArch64/AArch64MCSymbolizer.h b/bolt/lib/Target/AArch64/AArch64MCSymbolizer.h
index 56ba4fbcaf275..7f06a4df1eb1d 100644
--- a/bolt/lib/Target/AArch64/AArch64MCSymbolizer.h
+++ b/bolt/lib/Target/AArch64/AArch64MCSymbolizer.h
@@ -11,6 +11,7 @@
 
 #include "bolt/Core/BinaryFunction.h"
 #include "llvm/MC/MCDisassembler/MCSymbolizer.h"
+#include <optional>
 
 namespace llvm {
 namespace bolt {
@@ -20,6 +21,13 @@ class AArch64MCSymbolizer : public MCSymbolizer {
   BinaryFunction &Function;
   bool CreateNewSymbols{true};
 
+  /// Modify relocation \p Rel based on type of the relocation and the
+  /// instruction it was applied to. Return the new relocation info, or
+  /// std::nullopt if the relocation should be ignored, e.g. in the case the
+  /// instruction was modified by the linker.
+  std::optional<Relocation> adjustRelocation(const Relocation &Rel,
+                                             const MCInst &Inst) const;
+
 public:
   AArch64MCSymbolizer(BinaryFunction &Function, bool CreateNewSymbols = true)
       : MCSymbolizer(*Function.getBinaryContext().Ctx.get(), nullptr),

``````````

</details>


https://github.com/llvm/llvm-project/pull/131202


More information about the llvm-commits mailing list