[llvm] 0c2b43a - [X86] Fix MCSymbolizer interface for X86Disassembler
Maksim Panchenko via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 7 10:28:06 PST 2022
Author: Maksim Panchenko
Date: 2022-03-07T10:27:28-08:00
New Revision: 0c2b43ab8cb1067dd1c7899094b824890803a7d2
URL: https://github.com/llvm/llvm-project/commit/0c2b43ab8cb1067dd1c7899094b824890803a7d2
DIFF: https://github.com/llvm/llvm-project/commit/0c2b43ab8cb1067dd1c7899094b824890803a7d2.diff
LOG: [X86] Fix MCSymbolizer interface for X86Disassembler
Fix a number of issues with MCSymbolizer::tryAddingSymbolicOperand()
in X86Disassembler:
* Pass instruction size instead of immediate size.
* Correctly adjust the value of PC-relative operands.
* Set operand offset to zero when the operand is specified
implicitly.
Reviewed By: Amir, skan
Differential Revision: https://reviews.llvm.org/D121065
Added:
llvm/unittests/MC/X86/CMakeLists.txt
llvm/unittests/MC/X86/X86MCDisassemblerTest.cpp
Modified:
llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 908eb6d1fab13..f2f79003b255e 100644
--- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -1809,11 +1809,11 @@ static void translateRegister(MCInst &mcInst, Reg reg) {
/// @param isBranch - If the instruction is a branch instruction
/// @param Address - The starting address of the instruction
/// @param Offset - The byte offset to this immediate in the instruction
-/// @param Width - The byte width of this immediate in the instruction
+/// @param InstSize - Size of the instruction in bytes
///
/// If the getOpInfo() function was set when setupForSymbolicDisassembly() was
/// called then that function is called to get any symbolic information for the
-/// immediate in the instruction using the Address, Offset and Width. If that
+/// immediate in the instruction using the Address, Offset and InstSize. If that
/// returns non-zero then the symbolic information it returns is used to create
/// an MCExpr and that is added as an operand to the MCInst. If getOpInfo()
/// returns zero and isBranch is true then a symbol look up for immediate Value
@@ -1822,10 +1822,10 @@ static void translateRegister(MCInst &mcInst, Reg reg) {
/// if it adds an operand to the MCInst and false otherwise.
static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
uint64_t Address, uint64_t Offset,
- uint64_t Width, MCInst &MI,
+ uint64_t InstSize, MCInst &MI,
const MCDisassembler *Dis) {
- return Dis->tryAddingSymbolicOperand(MI, Value, Address, isBranch,
- Offset, Width);
+ return Dis->tryAddingSymbolicOperand(MI, Value, Address, isBranch, Offset,
+ InstSize);
}
/// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being
@@ -1914,8 +1914,7 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
uint64_t pcrel = 0;
if (type == TYPE_REL) {
isBranch = true;
- pcrel = insn.startLocation +
- insn.immediateOffset + insn.immediateSize;
+ pcrel = insn.startLocation + insn.length;
switch (operand.encoding) {
default:
break;
@@ -1990,9 +1989,8 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
break;
}
- if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation,
- insn.immediateOffset, insn.immediateSize,
- mcInst, Dis))
+ if (!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation,
+ insn.immediateOffset, insn.length, mcInst, Dis))
mcInst.addOperand(MCOperand::createImm(immediate));
if (type == TYPE_MOFFS) {
@@ -2129,8 +2127,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
return true;
}
if (insn.mode == MODE_64BIT){
- pcrel = insn.startLocation +
- insn.displacementOffset + insn.displacementSize;
+ pcrel = insn.startLocation + insn.length;
tryAddingPcLoadReferenceComment(insn.startLocation +
insn.displacementOffset,
insn.displacement + pcrel, Dis);
@@ -2193,9 +2190,13 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
mcInst.addOperand(baseReg);
mcInst.addOperand(scaleAmount);
mcInst.addOperand(indexReg);
- if(!tryAddingSymbolicOperand(insn.displacement + pcrel, false,
- insn.startLocation, insn.displacementOffset,
- insn.displacementSize, mcInst, Dis))
+
+ const uint8_t dispOffset =
+ (insn.eaDisplacement == EA_DISP_NONE) ? 0 : insn.displacementOffset;
+
+ if (!tryAddingSymbolicOperand(insn.displacement + pcrel, false,
+ insn.startLocation, dispOffset, insn.length,
+ mcInst, Dis))
mcInst.addOperand(displacement);
mcInst.addOperand(segmentReg);
return false;
diff --git a/llvm/unittests/MC/X86/CMakeLists.txt b/llvm/unittests/MC/X86/CMakeLists.txt
new file mode 100644
index 0000000000000..a68aac868fa20
--- /dev/null
+++ b/llvm/unittests/MC/X86/CMakeLists.txt
@@ -0,0 +1,16 @@
+include_directories(
+ ${CMAKE_SOURCE_DIR}/lib/Target/X86
+ ${CMAKE_BINARY_DIR}/lib/Target/X86
+ )
+
+set(LLVM_LINK_COMPONENTS
+ MC
+ MCDisassembler
+ Target
+ X86Desc
+ X86Disassembler
+ )
+
+add_llvm_unittest(X86MCTests
+ X86MCDisassemblerTest.cpp
+ )
diff --git a/llvm/unittests/MC/X86/X86MCDisassemblerTest.cpp b/llvm/unittests/MC/X86/X86MCDisassemblerTest.cpp
new file mode 100644
index 0000000000000..8d677638ebda4
--- /dev/null
+++ b/llvm/unittests/MC/X86/X86MCDisassemblerTest.cpp
@@ -0,0 +1,137 @@
+//===- X86MCDisassemblerTest.cpp - Tests for X86 MCDisassembler -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
+#include "llvm/MC/MCDisassembler/MCSymbolizer.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+struct Context {
+ const char *TripleName = "x86_64-pc-linux";
+ std::unique_ptr<MCRegisterInfo> MRI;
+ std::unique_ptr<MCAsmInfo> MAI;
+ std::unique_ptr<MCContext> Ctx;
+ std::unique_ptr<MCSubtargetInfo> STI;
+ std::unique_ptr<MCDisassembler> DisAsm;
+
+ Context() {
+ LLVMInitializeX86TargetInfo();
+ LLVMInitializeX86TargetMC();
+ LLVMInitializeX86Disassembler();
+
+ // If we didn't build x86, do not run the test.
+ std::string Error;
+ const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
+ if (!TheTarget)
+ return;
+
+ MRI.reset(TheTarget->createMCRegInfo(TripleName));
+ MAI.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
+ STI.reset(TheTarget->createMCSubtargetInfo(TripleName, "", ""));
+ Ctx = std::make_unique<MCContext>(Triple(TripleName), MAI.get(), MRI.get(),
+ STI.get());
+
+ DisAsm.reset(TheTarget->createMCDisassembler(*STI, *Ctx));
+ }
+
+ operator bool() { return Ctx.get(); }
+ operator MCContext &() { return *Ctx; };
+};
+
+Context &getContext() {
+ static Context Ctxt;
+ return Ctxt;
+}
+} // namespace
+
+class X86MCSymbolizerTest : public MCSymbolizer {
+public:
+ X86MCSymbolizerTest(MCContext &MC) : MCSymbolizer(MC, nullptr) {}
+ ~X86MCSymbolizerTest() {}
+
+ struct OpInfo {
+ int64_t Value = 0;
+ uint64_t Offset = 0;
+ };
+ std::vector<OpInfo> Operands;
+ uint64_t InstructionSize = 0;
+
+ void reset() {
+ Operands.clear();
+ InstructionSize = 0;
+ }
+
+ bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &CStream,
+ int64_t Value, uint64_t Address, bool IsBranch,
+ uint64_t Offset, uint64_t InstSize) override {
+ Operands.push_back({Value, Offset});
+ InstructionSize = InstSize;
+ return false;
+ }
+
+ void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value,
+ uint64_t Address) override {}
+};
+
+TEST(X86Disassembler, X86MCSymbolizerTest) {
+ if (!getContext())
+ return;
+
+ X86MCSymbolizerTest *TestSymbolizer = new X86MCSymbolizerTest(getContext());
+ getContext().DisAsm->setSymbolizer(
+ std::unique_ptr<MCSymbolizer>(TestSymbolizer));
+
+ MCDisassembler::DecodeStatus Status;
+ MCInst Inst;
+ uint64_t InstSize;
+
+ auto checkBytes = [&](ArrayRef<uint8_t> Bytes) {
+ TestSymbolizer->reset();
+ Status =
+ getContext().DisAsm->getInstruction(Inst, InstSize, Bytes, 0, nulls());
+ ASSERT_TRUE(Status == MCDisassembler::Success);
+ EXPECT_EQ(TestSymbolizer->InstructionSize, InstSize);
+ };
+
+ auto checkOperand = [&](size_t OpNo, int64_t Value, uint64_t Offset) {
+ ASSERT_TRUE(TestSymbolizer->Operands.size() > OpNo);
+ EXPECT_EQ(TestSymbolizer->Operands[OpNo].Value, Value);
+ EXPECT_EQ(TestSymbolizer->Operands[OpNo].Offset, Offset);
+ };
+
+ // movq $0xffffffffffffefe8,-0x1(%rip)
+ // Test that the value of the rip-relative operand is set correctly.
+ // The instruction address is 0 and the size is 12 bytes.
+ checkBytes(
+ {0x48, 0xc7, 0x05, 0xff, 0xff, 0xff, 0xff, 0xe8, 0xef, 0xff, 0xff});
+ checkOperand(0, /*next instr address*/ 11 - /*disp*/ 1, 3);
+ checkOperand(1, 0xffffffffffffefe8, 7);
+
+ // movq $0xfffffffffffffef5,(%r12)
+ // Test that the displacement operand has an offset of 0, since it is not
+ // explicitly specified in the instruction.
+ checkBytes({0x49, 0xc7, 0x04, 0x24, 0xf5, 0xfe, 0xff, 0xff});
+ checkOperand(0, 0, 0);
+ checkOperand(1, 0xfffffffffffffef5, 4);
+
+ // movq $0x80000, 0x80000
+ checkBytes(
+ {0x48, 0xc7, 0x04, 0x25, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00});
+ checkOperand(0, 0x80000, 4);
+ checkOperand(1, 0x80000, 8);
+}
More information about the llvm-commits
mailing list