[llvm] 0c2b43a - [X86] Fix MCSymbolizer interface for X86Disassembler

Maksim Panchenko via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 7 10:28:06 PST 2022


Author: Maksim Panchenko
Date: 2022-03-07T10:27:28-08:00
New Revision: 0c2b43ab8cb1067dd1c7899094b824890803a7d2

URL: https://github.com/llvm/llvm-project/commit/0c2b43ab8cb1067dd1c7899094b824890803a7d2
DIFF: https://github.com/llvm/llvm-project/commit/0c2b43ab8cb1067dd1c7899094b824890803a7d2.diff

LOG: [X86] Fix MCSymbolizer interface for X86Disassembler

Fix a number of issues with MCSymbolizer::tryAddingSymbolicOperand()
in X86Disassembler:

  * Pass instruction size instead of immediate size.
  * Correctly adjust the value of PC-relative operands.
  * Set operand offset to zero when the operand is specified
    implicitly.

Reviewed By: Amir, skan

Differential Revision: https://reviews.llvm.org/D121065

Added: 
    llvm/unittests/MC/X86/CMakeLists.txt
    llvm/unittests/MC/X86/X86MCDisassemblerTest.cpp

Modified: 
    llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 908eb6d1fab13..f2f79003b255e 100644
--- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -1809,11 +1809,11 @@ static void translateRegister(MCInst &mcInst, Reg reg) {
 /// @param isBranch   - If the instruction is a branch instruction
 /// @param Address    - The starting address of the instruction
 /// @param Offset     - The byte offset to this immediate in the instruction
-/// @param Width      - The byte width of this immediate in the instruction
+/// @param InstSize   - Size of the instruction in bytes
 ///
 /// If the getOpInfo() function was set when setupForSymbolicDisassembly() was
 /// called then that function is called to get any symbolic information for the
-/// immediate in the instruction using the Address, Offset and Width.  If that
+/// immediate in the instruction using the Address, Offset and InstSize. If that
 /// returns non-zero then the symbolic information it returns is used to create
 /// an MCExpr and that is added as an operand to the MCInst.  If getOpInfo()
 /// returns zero and isBranch is true then a symbol look up for immediate Value
@@ -1822,10 +1822,10 @@ static void translateRegister(MCInst &mcInst, Reg reg) {
 /// if it adds an operand to the MCInst and false otherwise.
 static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
                                      uint64_t Address, uint64_t Offset,
-                                     uint64_t Width, MCInst &MI,
+                                     uint64_t InstSize, MCInst &MI,
                                      const MCDisassembler *Dis) {
-  return Dis->tryAddingSymbolicOperand(MI, Value, Address, isBranch,
-                                       Offset, Width);
+  return Dis->tryAddingSymbolicOperand(MI, Value, Address, isBranch, Offset,
+                                       InstSize);
 }
 
 /// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being
@@ -1914,8 +1914,7 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
   uint64_t pcrel = 0;
   if (type == TYPE_REL) {
     isBranch = true;
-    pcrel = insn.startLocation +
-            insn.immediateOffset + insn.immediateSize;
+    pcrel = insn.startLocation + insn.length;
     switch (operand.encoding) {
     default:
       break;
@@ -1990,9 +1989,8 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
     break;
   }
 
-  if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation,
-                               insn.immediateOffset, insn.immediateSize,
-                               mcInst, Dis))
+  if (!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation,
+                                insn.immediateOffset, insn.length, mcInst, Dis))
     mcInst.addOperand(MCOperand::createImm(immediate));
 
   if (type == TYPE_MOFFS) {
@@ -2129,8 +2127,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
         return true;
       }
       if (insn.mode == MODE_64BIT){
-        pcrel = insn.startLocation +
-                insn.displacementOffset + insn.displacementSize;
+        pcrel = insn.startLocation + insn.length;
         tryAddingPcLoadReferenceComment(insn.startLocation +
                                         insn.displacementOffset,
                                         insn.displacement + pcrel, Dis);
@@ -2193,9 +2190,13 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
   mcInst.addOperand(baseReg);
   mcInst.addOperand(scaleAmount);
   mcInst.addOperand(indexReg);
-  if(!tryAddingSymbolicOperand(insn.displacement + pcrel, false,
-                               insn.startLocation, insn.displacementOffset,
-                               insn.displacementSize, mcInst, Dis))
+
+  const uint8_t dispOffset =
+      (insn.eaDisplacement == EA_DISP_NONE) ? 0 : insn.displacementOffset;
+
+  if (!tryAddingSymbolicOperand(insn.displacement + pcrel, false,
+                                insn.startLocation, dispOffset, insn.length,
+                                mcInst, Dis))
     mcInst.addOperand(displacement);
   mcInst.addOperand(segmentReg);
   return false;

diff  --git a/llvm/unittests/MC/X86/CMakeLists.txt b/llvm/unittests/MC/X86/CMakeLists.txt
new file mode 100644
index 0000000000000..a68aac868fa20
--- /dev/null
+++ b/llvm/unittests/MC/X86/CMakeLists.txt
@@ -0,0 +1,16 @@
+include_directories(
+  ${CMAKE_SOURCE_DIR}/lib/Target/X86
+  ${CMAKE_BINARY_DIR}/lib/Target/X86
+  )
+
+set(LLVM_LINK_COMPONENTS
+  MC
+  MCDisassembler
+  Target
+  X86Desc
+  X86Disassembler
+  )
+
+add_llvm_unittest(X86MCTests
+  X86MCDisassemblerTest.cpp
+  )

diff  --git a/llvm/unittests/MC/X86/X86MCDisassemblerTest.cpp b/llvm/unittests/MC/X86/X86MCDisassemblerTest.cpp
new file mode 100644
index 0000000000000..8d677638ebda4
--- /dev/null
+++ b/llvm/unittests/MC/X86/X86MCDisassemblerTest.cpp
@@ -0,0 +1,137 @@
+//===- X86MCDisassemblerTest.cpp - Tests for X86 MCDisassembler -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
+#include "llvm/MC/MCDisassembler/MCSymbolizer.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+struct Context {
+  const char *TripleName = "x86_64-pc-linux";
+  std::unique_ptr<MCRegisterInfo> MRI;
+  std::unique_ptr<MCAsmInfo> MAI;
+  std::unique_ptr<MCContext> Ctx;
+  std::unique_ptr<MCSubtargetInfo> STI;
+  std::unique_ptr<MCDisassembler> DisAsm;
+
+  Context() {
+    LLVMInitializeX86TargetInfo();
+    LLVMInitializeX86TargetMC();
+    LLVMInitializeX86Disassembler();
+
+    // If we didn't build x86, do not run the test.
+    std::string Error;
+    const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
+    if (!TheTarget)
+      return;
+
+    MRI.reset(TheTarget->createMCRegInfo(TripleName));
+    MAI.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
+    STI.reset(TheTarget->createMCSubtargetInfo(TripleName, "", ""));
+    Ctx = std::make_unique<MCContext>(Triple(TripleName), MAI.get(), MRI.get(),
+                                      STI.get());
+
+    DisAsm.reset(TheTarget->createMCDisassembler(*STI, *Ctx));
+  }
+
+  operator bool() { return Ctx.get(); }
+  operator MCContext &() { return *Ctx; };
+};
+
+Context &getContext() {
+  static Context Ctxt;
+  return Ctxt;
+}
+} // namespace
+
+class X86MCSymbolizerTest : public MCSymbolizer {
+public:
+  X86MCSymbolizerTest(MCContext &MC) : MCSymbolizer(MC, nullptr) {}
+  ~X86MCSymbolizerTest() {}
+
+  struct OpInfo {
+    int64_t Value = 0;
+    uint64_t Offset = 0;
+  };
+  std::vector<OpInfo> Operands;
+  uint64_t InstructionSize = 0;
+
+  void reset() {
+    Operands.clear();
+    InstructionSize = 0;
+  }
+
+  bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &CStream,
+                                int64_t Value, uint64_t Address, bool IsBranch,
+                                uint64_t Offset, uint64_t InstSize) override {
+    Operands.push_back({Value, Offset});
+    InstructionSize = InstSize;
+    return false;
+  }
+
+  void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value,
+                                       uint64_t Address) override {}
+};
+
+TEST(X86Disassembler, X86MCSymbolizerTest) {
+  if (!getContext())
+    return;
+
+  X86MCSymbolizerTest *TestSymbolizer = new X86MCSymbolizerTest(getContext());
+  getContext().DisAsm->setSymbolizer(
+      std::unique_ptr<MCSymbolizer>(TestSymbolizer));
+
+  MCDisassembler::DecodeStatus Status;
+  MCInst Inst;
+  uint64_t InstSize;
+
+  auto checkBytes = [&](ArrayRef<uint8_t> Bytes) {
+    TestSymbolizer->reset();
+    Status =
+        getContext().DisAsm->getInstruction(Inst, InstSize, Bytes, 0, nulls());
+    ASSERT_TRUE(Status == MCDisassembler::Success);
+    EXPECT_EQ(TestSymbolizer->InstructionSize, InstSize);
+  };
+
+  auto checkOperand = [&](size_t OpNo, int64_t Value, uint64_t Offset) {
+    ASSERT_TRUE(TestSymbolizer->Operands.size() > OpNo);
+    EXPECT_EQ(TestSymbolizer->Operands[OpNo].Value, Value);
+    EXPECT_EQ(TestSymbolizer->Operands[OpNo].Offset, Offset);
+  };
+
+  // movq   $0xffffffffffffefe8,-0x1(%rip)
+  // Test that the value of the rip-relative operand is set correctly.
+  // The instruction address is 0 and the size is 12 bytes.
+  checkBytes(
+      {0x48, 0xc7, 0x05, 0xff, 0xff, 0xff, 0xff, 0xe8, 0xef, 0xff, 0xff});
+  checkOperand(0, /*next instr address*/ 11 - /*disp*/ 1, 3);
+  checkOperand(1, 0xffffffffffffefe8, 7);
+
+  // movq   $0xfffffffffffffef5,(%r12)
+  // Test that the displacement operand has an offset of 0, since it is not
+  // explicitly specified in the instruction.
+  checkBytes({0x49, 0xc7, 0x04, 0x24, 0xf5, 0xfe, 0xff, 0xff});
+  checkOperand(0, 0, 0);
+  checkOperand(1, 0xfffffffffffffef5, 4);
+
+  // movq    $0x80000, 0x80000
+  checkBytes(
+      {0x48, 0xc7, 0x04, 0x25, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00});
+  checkOperand(0, 0x80000, 4);
+  checkOperand(1, 0x80000, 8);
+}


        


More information about the llvm-commits mailing list