[llvm] b971d4d - [BOLT][AArch64] Add symbolizer for AArch64 disassembler. NFCI (#127969)

via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 3 12:44:32 PST 2025


Author: Maksim Panchenko
Date: 2025-03-03T12:44:28-08:00
New Revision: b971d4d7c80821d648015281b7926ee6f93dccc9

URL: https://github.com/llvm/llvm-project/commit/b971d4d7c80821d648015281b7926ee6f93dccc9
DIFF: https://github.com/llvm/llvm-project/commit/b971d4d7c80821d648015281b7926ee6f93dccc9.diff

LOG: [BOLT][AArch64] Add symbolizer for AArch64 disassembler. NFCI (#127969)

Add AArch64MCSymbolizer that symbolizes `MCInst` operands during
disassembly. The symbolization was previously done in
`BinaryFunction::disassemble()`, but it is also required by
`scanExternalRefs()` for "lite" mode functionality. Hence, similar to
x86, I've implemented the symbolizer interface that uses
`BinaryFunction` relocations to properly create instruction operands. I
expect the result of the disassembly to be identical after the change.

AArch64 disassembler was not calling `tryAddingSymbolicOperand()` for
`MOV` instructions. Fix that. Additionally, the disassembler marks `ldr`
instructions as branches by setting `IsBranch` parameter to true. Ignore
the parameter and rely on `MCPlusBuilder` interface instead.

I've modified `--check-encoding` flag to check symolization of operands
of instructions that have relocations against them.

Added: 
    bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp
    bolt/lib/Target/AArch64/AArch64MCSymbolizer.h

Modified: 
    bolt/lib/Core/BinaryFunction.cpp
    bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
    bolt/lib/Target/AArch64/CMakeLists.txt
    llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp

Removed: 
    


################################################################################
diff  --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 5a1ff58b72f75..b46ba1a0d1a85 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -1333,6 +1333,22 @@ Error BinaryFunction::disassemble() {
         BC.printInstruction(BC.errs(), Instruction, AbsoluteInstrAddr);
         BC.errs() << '\n';
       }
+
+      // Verify that we've symbolized an operand if the instruction has a
+      // relocation against it.
+      if (getRelocationInRange(Offset, Offset + Size)) {
+        bool HasSymbolicOp = false;
+        for (MCOperand &Op : Instruction) {
+          if (Op.isExpr()) {
+            HasSymbolicOp = true;
+            break;
+          }
+        }
+        if (!HasSymbolicOp)
+          return createFatalBOLTError(
+              "expected symbolized operand for instruction at 0x" +
+              Twine::utohexstr(AbsoluteInstrAddr));
+      }
     }
 
     // Special handling for AVX-512 instructions.
@@ -1440,9 +1456,8 @@ Error BinaryFunction::disassemble() {
         if (BC.isAArch64())
           handleAArch64IndirectCall(Instruction, Offset);
       }
-    } else if (BC.isAArch64() || BC.isRISCV()) {
+    } else if (BC.isRISCV()) {
       // Check if there's a relocation associated with this instruction.
-      bool UsedReloc = false;
       for (auto Itr = Relocations.lower_bound(Offset),
                 ItrE = Relocations.lower_bound(Offset + Size);
            Itr != ItrE; ++Itr) {
@@ -1467,24 +1482,6 @@ Error BinaryFunction::disassemble() {
             Relocation.Type);
         (void)Result;
         assert(Result && "cannot replace immediate with relocation");
-
-        // For aarch64, if we replaced an immediate with a symbol from a
-        // relocation, we mark it so we do not try to further process a
-        // pc-relative operand. All we need is the symbol.
-        UsedReloc = true;
-      }
-
-      if (!BC.isRISCV() && MIB->hasPCRelOperand(Instruction) && !UsedReloc) {
-        if (auto NewE = handleErrors(
-                handlePCRelOperand(Instruction, AbsoluteInstrAddr, Size),
-                [&](const BOLTError &E) -> Error {
-                  if (E.isFatal())
-                    return Error(std::make_unique<BOLTError>(std::move(E)));
-                  if (!E.getMessage().empty())
-                    E.log(BC.errs());
-                  return Error::success();
-                }))
-          return Error(std::move(NewE));
       }
     }
 
@@ -1580,8 +1577,9 @@ bool BinaryFunction::scanExternalRefs() {
   assert(FunctionData.size() == getMaxSize() &&
          "function size does not match raw data size");
 
-  BC.SymbolicDisAsm->setSymbolizer(
-      BC.MIB->createTargetSymbolizer(*this, /*CreateSymbols*/ false));
+  if (BC.isX86())
+    BC.SymbolicDisAsm->setSymbolizer(
+        BC.MIB->createTargetSymbolizer(*this, /*CreateSymbols*/ false));
 
   // Disassemble contents of the function. Detect code entry points and create
   // relocations for references to code that will be moved.

diff  --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
index 6bcea7cde4b1c..685b2279e5afb 100644
--- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
+++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
@@ -10,6 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "AArch64MCSymbolizer.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
 #include "MCTargetDesc/AArch64FixupKinds.h"
 #include "MCTargetDesc/AArch64MCExpr.h"
@@ -134,6 +135,12 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
 public:
   using MCPlusBuilder::MCPlusBuilder;
 
+  std::unique_ptr<MCSymbolizer>
+  createTargetSymbolizer(BinaryFunction &Function,
+                         bool CreateNewSymbols) const override {
+    return std::make_unique<AArch64MCSymbolizer>(Function, CreateNewSymbols);
+  }
+
   MCPhysReg getStackPointer() const override { return AArch64::SP; }
   MCPhysReg getFramePointer() const override { return AArch64::FP; }
 

diff  --git a/bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp b/bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp
new file mode 100644
index 0000000000000..7145e77a1edbb
--- /dev/null
+++ b/bolt/lib/Target/AArch64/AArch64MCSymbolizer.cpp
@@ -0,0 +1,85 @@
+//===- bolt/Target/AArch64/AArch64MCSymbolizer.cpp ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64MCSymbolizer.h"
+#include "bolt/Core/BinaryContext.h"
+#include "bolt/Core/BinaryFunction.h"
+#include "bolt/Core/MCPlusBuilder.h"
+#include "bolt/Core/Relocation.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "bolt-symbolizer"
+
+namespace llvm {
+namespace bolt {
+
+AArch64MCSymbolizer::~AArch64MCSymbolizer() {}
+
+bool AArch64MCSymbolizer::tryAddingSymbolicOperand(
+    MCInst &Inst, raw_ostream &CStream, int64_t Value, uint64_t InstAddress,
+    bool IsBranch, uint64_t ImmOffset, uint64_t ImmSize, uint64_t InstSize) {
+  BinaryContext &BC = Function.getBinaryContext();
+  MCContext *Ctx = BC.Ctx.get();
+
+  // NOTE: the callee may incorrectly set IsBranch.
+  if (BC.MIB->isBranch(Inst) || BC.MIB->isCall(Inst))
+    return false;
+
+  // TODO: add handling for linker "relaxation". At the moment, relocations
+  // corresponding to "relaxed" instructions are excluded from BinaryFunction
+  // relocation list.
+
+  const uint64_t InstOffset = InstAddress - Function.getAddress();
+  const Relocation *Relocation = Function.getRelocationAt(InstOffset);
+
+  /// Add symbolic operand to the instruction with an optional addend.
+  auto addOperand = [&](const MCSymbol *Symbol, uint64_t Addend,
+                        uint64_t RelType) {
+    const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, *Ctx);
+    if (Addend)
+      Expr = MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Addend, *Ctx),
+                                     *Ctx);
+    Inst.addOperand(MCOperand::createExpr(
+        BC.MIB->getTargetExprFor(Inst, Expr, *Ctx, RelType)));
+  };
+
+  if (Relocation) {
+    addOperand(Relocation->Symbol, Relocation->Addend, Relocation->Type);
+    return true;
+  }
+
+  if (!BC.MIB->hasPCRelOperand(Inst))
+    return false;
+
+  Value += InstAddress;
+  const MCSymbol *TargetSymbol;
+  uint64_t TargetOffset;
+  if (!CreateNewSymbols) {
+    if (BinaryData *BD = BC.getBinaryDataContainingAddress(Value)) {
+      TargetSymbol = BD->getSymbol();
+      TargetOffset = Value - BD->getAddress();
+    } else {
+      return false;
+    }
+  } else {
+    std::tie(TargetSymbol, TargetOffset) =
+        BC.handleAddressRef(Value, Function, /*IsPCRel*/ true);
+  }
+
+  addOperand(TargetSymbol, TargetOffset, 0);
+
+  return true;
+}
+
+void AArch64MCSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &CStream,
+                                                          int64_t Value,
+                                                          uint64_t Address) {}
+
+} // namespace bolt
+} // namespace llvm

diff  --git a/bolt/lib/Target/AArch64/AArch64MCSymbolizer.h b/bolt/lib/Target/AArch64/AArch64MCSymbolizer.h
new file mode 100644
index 0000000000000..56ba4fbcaf275
--- /dev/null
+++ b/bolt/lib/Target/AArch64/AArch64MCSymbolizer.h
@@ -0,0 +1,44 @@
+//===- bolt/Target/AArch64/AArch64MCSymbolizer.cpp --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BOLT_TARGET_AARCH64_AARCH64MCSYMBOLIZER_H
+#define BOLT_TARGET_AARCH64_AARCH64MCSYMBOLIZER_H
+
+#include "bolt/Core/BinaryFunction.h"
+#include "llvm/MC/MCDisassembler/MCSymbolizer.h"
+
+namespace llvm {
+namespace bolt {
+
+class AArch64MCSymbolizer : public MCSymbolizer {
+protected:
+  BinaryFunction &Function;
+  bool CreateNewSymbols{true};
+
+public:
+  AArch64MCSymbolizer(BinaryFunction &Function, bool CreateNewSymbols = true)
+      : MCSymbolizer(*Function.getBinaryContext().Ctx.get(), nullptr),
+        Function(Function), CreateNewSymbols(CreateNewSymbols) {}
+
+  AArch64MCSymbolizer(const AArch64MCSymbolizer &) = delete;
+  AArch64MCSymbolizer &operator=(const AArch64MCSymbolizer &) = delete;
+  virtual ~AArch64MCSymbolizer();
+
+  bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &CStream,
+                                int64_t Value, uint64_t Address, bool IsBranch,
+                                uint64_t Offset, uint64_t OpSize,
+                                uint64_t InstSize) override;
+
+  void tryAddingPcLoadReferenceComment(raw_ostream &CStream, int64_t Value,
+                                       uint64_t Address) override;
+};
+
+} // namespace bolt
+} // namespace llvm
+
+#endif

diff  --git a/bolt/lib/Target/AArch64/CMakeLists.txt b/bolt/lib/Target/AArch64/CMakeLists.txt
index 8435ea7245e7e..cb38117de659e 100644
--- a/bolt/lib/Target/AArch64/CMakeLists.txt
+++ b/bolt/lib/Target/AArch64/CMakeLists.txt
@@ -1,5 +1,6 @@
 set(LLVM_LINK_COMPONENTS
   MC
+  MCDisassembler
   Support
   AArch64Desc
   )
@@ -18,6 +19,7 @@ endif()
 
 add_llvm_library(LLVMBOLTTargetAArch64
   AArch64MCPlusBuilder.cpp
+  AArch64MCSymbolizer.cpp
 
   NO_EXPORT
   DISABLE_LLVM_LINK_LLVM_DYLIB

diff  --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 8b1c16d319b2c..f78e6926f2c7d 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -744,7 +744,10 @@ static DecodeStatus DecodeMoveImmInstruction(MCInst &Inst, uint32_t insn,
       Inst.getOpcode() == AArch64::MOVKXi)
     Inst.addOperand(Inst.getOperand(0));
 
-  Inst.addOperand(MCOperand::createImm(imm));
+  if (!Decoder->tryAddingSymbolicOperand(Inst, imm, Addr, /*IsBranch*/ false, 0,
+                                         0, 4))
+    Inst.addOperand(MCOperand::createImm(imm));
+
   Inst.addOperand(MCOperand::createImm(shift));
   return Success;
 }


        


More information about the llvm-commits mailing list