[Lldb-commits] [lldb] 0538e54 - Move GetControlFlowKind's logic to DisassemblerLLVMC.cpp
Walter Erquinigo via lldb-commits
lldb-commits at lists.llvm.org
Tue Jul 26 12:05:37 PDT 2022
Author: Walter Erquinigo
Date: 2022-07-26T12:05:23-07:00
New Revision: 0538e5431afdb1fa05bdcedf70ee502ccfcd112a
URL: https://github.com/llvm/llvm-project/commit/0538e5431afdb1fa05bdcedf70ee502ccfcd112a
DIFF: https://github.com/llvm/llvm-project/commit/0538e5431afdb1fa05bdcedf70ee502ccfcd112a.diff
LOG: Move GetControlFlowKind's logic to DisassemblerLLVMC.cpp
This diff move the logic of `GetControlFlowKind()` from Disassembler.cpp to DisassemblerLLVMC.cpp.
Here's details:
- Actual logic of GetControlFlowKind() move to `DisassemblerLLVMC.cpp`, and we can check underlying architecture using `DisassemblerScope` there.
- With this change, passing 'triple' to `GetControlFlowKind()` is no more required.
Reviewed By: wallace
Differential Revision: https://reviews.llvm.org/D130320
Added:
Modified:
lldb/include/lldb/Core/Disassembler.h
lldb/source/Core/Disassembler.cpp
lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp
lldb/unittests/Disassembler/x86/TestGetControlFlowKindx86.cpp
Removed:
################################################################################
diff --git a/lldb/include/lldb/Core/Disassembler.h b/lldb/include/lldb/Core/Disassembler.h
index 2cb983c40d19..99d1b45ea599 100644
--- a/lldb/include/lldb/Core/Disassembler.h
+++ b/lldb/include/lldb/Core/Disassembler.h
@@ -83,7 +83,10 @@ class Instruction {
/// The control flow kind of this instruction, or
/// eInstructionControlFlowKindUnknown if the instruction
/// can't be classified.
- lldb::InstructionControlFlowKind GetControlFlowKind(const ArchSpec &arch);
+ virtual lldb::InstructionControlFlowKind
+ GetControlFlowKind(const ExecutionContext *exe_ctx) {
+ return lldb::eInstructionControlFlowKindUnknown;
+ }
virtual void
CalculateMnemonicOperandsAndComment(const ExecutionContext *exe_ctx) = 0;
diff --git a/lldb/source/Core/Disassembler.cpp b/lldb/source/Core/Disassembler.cpp
index 7a9e214748a7..81a0e1e74642 100644
--- a/lldb/source/Core/Disassembler.cpp
+++ b/lldb/source/Core/Disassembler.cpp
@@ -571,334 +571,6 @@ Instruction::Instruction(const Address &address, AddressClass addr_class)
Instruction::~Instruction() = default;
-namespace x86 {
-
-/// These are the three values deciding instruction control flow kind.
-/// InstructionLengthDecode function decodes an instruction and get this struct.
-///
-/// primary_opcode
-/// Primary opcode of the instruction.
-/// For one-byte opcode instruction, it's the first byte after prefix.
-/// For two- and three-byte opcodes, it's the second byte.
-///
-/// opcode_len
-/// The length of opcode in bytes. Valid opcode lengths are 1, 2, or 3.
-///
-/// modrm
-/// ModR/M byte of the instruction.
-/// Bits[7:6] indicate MOD. Bits[5:3] specify a register and R/M bits[2:0]
-/// may contain a register or specify an addressing mode, depending on MOD.
-struct InstructionOpcodeAndModrm {
- uint8_t primary_opcode;
- uint8_t opcode_len;
- uint8_t modrm;
-};
-
-/// Determine the InstructionControlFlowKind based on opcode and modrm bytes.
-/// Refer to http://ref.x86asm.net/coder.html for the full list of opcode and
-/// instruction set.
-///
-/// \param[in] opcode_and_modrm
-/// Contains primary_opcode byte, its length, and ModR/M byte.
-/// Refer to the struct InstructionOpcodeAndModrm for details.
-///
-/// \return
-/// The control flow kind of the instruction or
-/// eInstructionControlFlowKindOther if the instruction doesn't affect
-/// the control flow of the program.
-lldb::InstructionControlFlowKind
-MapOpcodeIntoControlFlowKind(InstructionOpcodeAndModrm opcode_and_modrm) {
- uint8_t opcode = opcode_and_modrm.primary_opcode;
- uint8_t opcode_len = opcode_and_modrm.opcode_len;
- uint8_t modrm = opcode_and_modrm.modrm;
-
- if (opcode_len > 2)
- return lldb::eInstructionControlFlowKindOther;
-
- if (opcode >= 0x70 && opcode <= 0x7F) {
- if (opcode_len == 1)
- return lldb::eInstructionControlFlowKindCondJump;
- else
- return lldb::eInstructionControlFlowKindOther;
- }
-
- if (opcode >= 0x80 && opcode <= 0x8F) {
- if (opcode_len == 2)
- return lldb::eInstructionControlFlowKindCondJump;
- else
- return lldb::eInstructionControlFlowKindOther;
- }
-
- switch (opcode) {
- case 0x9A:
- if (opcode_len == 1)
- return lldb::eInstructionControlFlowKindFarCall;
- break;
- case 0xFF:
- if (opcode_len == 1) {
- uint8_t modrm_reg = (modrm >> 3) & 7;
- if (modrm_reg == 2)
- return lldb::eInstructionControlFlowKindCall;
- else if (modrm_reg == 3)
- return lldb::eInstructionControlFlowKindFarCall;
- else if (modrm_reg == 4)
- return lldb::eInstructionControlFlowKindJump;
- else if (modrm_reg == 5)
- return lldb::eInstructionControlFlowKindFarJump;
- }
- break;
- case 0xE8:
- if (opcode_len == 1)
- return lldb::eInstructionControlFlowKindCall;
- break;
- case 0xCD:
- case 0xCC:
- case 0xCE:
- case 0xF1:
- if (opcode_len == 1)
- return lldb::eInstructionControlFlowKindFarCall;
- break;
- case 0xCF:
- if (opcode_len == 1)
- return lldb::eInstructionControlFlowKindFarReturn;
- break;
- case 0xE9:
- case 0xEB:
- if (opcode_len == 1)
- return lldb::eInstructionControlFlowKindJump;
- break;
- case 0xEA:
- if (opcode_len == 1)
- return lldb::eInstructionControlFlowKindFarJump;
- break;
- case 0xE3:
- case 0xE0:
- case 0xE1:
- case 0xE2:
- if (opcode_len == 1)
- return lldb::eInstructionControlFlowKindCondJump;
- break;
- case 0xC3:
- case 0xC2:
- if (opcode_len == 1)
- return lldb::eInstructionControlFlowKindReturn;
- break;
- case 0xCB:
- case 0xCA:
- if (opcode_len == 1)
- return lldb::eInstructionControlFlowKindFarReturn;
- break;
- case 0x05:
- case 0x34:
- if (opcode_len == 2)
- return lldb::eInstructionControlFlowKindFarCall;
- break;
- case 0x35:
- case 0x07:
- if (opcode_len == 2)
- return lldb::eInstructionControlFlowKindFarReturn;
- break;
- case 0x01:
- if (opcode_len == 2) {
- switch (modrm) {
- case 0xc1:
- return lldb::eInstructionControlFlowKindFarCall;
- case 0xc2:
- case 0xc3:
- return lldb::eInstructionControlFlowKindFarReturn;
- default:
- break;
- }
- }
- break;
- default:
- break;
- }
-
- return lldb::eInstructionControlFlowKindOther;
-}
-
-/// Decode an instruction into opcode, modrm and opcode_len.
-/// Refer to http://ref.x86asm.net/coder.html for the instruction bytes layout.
-/// Opcodes in x86 are generally the first byte of instruction, though two-byte
-/// instructions and prefixes exist. ModR/M is the byte following the opcode
-/// and adds additional information for how the instruction is executed.
-///
-/// \param[in] inst_bytes
-/// Raw bytes of the instruction
-///
-///
-/// \param[in] bytes_len
-/// The length of the inst_bytes array.
-///
-/// \param[in] is_exec_mode_64b
-/// If true, the execution mode is 64 bit.
-///
-/// \return
-/// Returns decoded instruction as struct InstructionOpcodeAndModrm, holding
-/// primary_opcode, opcode_len and modrm byte. Refer to the struct definition
-/// for more details.
-/// Otherwise if the given instruction is invalid, returns None.
-llvm::Optional<InstructionOpcodeAndModrm>
-InstructionLengthDecode(const uint8_t *inst_bytes, int bytes_len,
- bool is_exec_mode_64b) {
- int op_idx = 0;
- bool prefix_done = false;
- InstructionOpcodeAndModrm ret = {0, 0, 0};
-
- // In most cases, the primary_opcode is the first byte of the instruction
- // but some instructions have a prefix to be skipped for these calculations.
- // The following mapping is inspired from libipt's instruction decoding logic
- // in `src/pt_ild.c`
- while (!prefix_done) {
- if (op_idx >= bytes_len)
- return llvm::None;
-
- ret.primary_opcode = inst_bytes[op_idx];
- switch (ret.primary_opcode) {
- // prefix_ignore
- case 0x26:
- case 0x2e:
- case 0x36:
- case 0x3e:
- case 0x64:
- case 0x65:
- // prefix_osz, prefix_asz
- case 0x66:
- case 0x67:
- // prefix_lock, prefix_f2, prefix_f3
- case 0xf0:
- case 0xf2:
- case 0xf3:
- op_idx++;
- break;
-
- // prefix_rex
- case 0x40:
- case 0x41:
- case 0x42:
- case 0x43:
- case 0x44:
- case 0x45:
- case 0x46:
- case 0x47:
- case 0x48:
- case 0x49:
- case 0x4a:
- case 0x4b:
- case 0x4c:
- case 0x4d:
- case 0x4e:
- case 0x4f:
- if (is_exec_mode_64b)
- op_idx++;
- else
- prefix_done = true;
- break;
-
- // prefix_vex_c4, c5
- case 0xc5:
- if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
- prefix_done = true;
- break;
- }
-
- ret.opcode_len = 2;
- ret.primary_opcode = inst_bytes[op_idx + 2];
- ret.modrm = inst_bytes[op_idx + 3];
- return ret;
-
- case 0xc4:
- if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
- prefix_done = true;
- break;
- }
- ret.opcode_len = inst_bytes[op_idx + 1] & 0x1f;
- ret.primary_opcode = inst_bytes[op_idx + 3];
- ret.modrm = inst_bytes[op_idx + 4];
- return ret;
-
- // prefix_evex
- case 0x62:
- if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
- prefix_done = true;
- break;
- }
- ret.opcode_len = inst_bytes[op_idx + 1] & 0x03;
- ret.primary_opcode = inst_bytes[op_idx + 4];
- ret.modrm = inst_bytes[op_idx + 5];
- return ret;
-
- default:
- prefix_done = true;
- break;
- }
- } // prefix done
-
- ret.primary_opcode = inst_bytes[op_idx];
- ret.modrm = inst_bytes[op_idx + 1];
- ret.opcode_len = 1;
-
- // If the first opcode is 0F, it's two- or three- byte opcodes.
- if (ret.primary_opcode == 0x0F) {
- ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte
-
- if (ret.primary_opcode == 0x38) {
- ret.opcode_len = 3;
- ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte
- ret.modrm = inst_bytes[op_idx + 1];
- } else if (ret.primary_opcode == 0x3A) {
- ret.opcode_len = 3;
- ret.primary_opcode = inst_bytes[++op_idx];
- ret.modrm = inst_bytes[op_idx + 1];
- } else if ((ret.primary_opcode & 0xf8) == 0x38) {
- ret.opcode_len = 0;
- ret.primary_opcode = inst_bytes[++op_idx];
- ret.modrm = inst_bytes[op_idx + 1];
- } else if (ret.primary_opcode == 0x0F) {
- ret.opcode_len = 3;
- // opcode is 0x0F, no needs to update
- ret.modrm = inst_bytes[op_idx + 1];
- } else {
- ret.opcode_len = 2;
- ret.modrm = inst_bytes[op_idx + 1];
- }
- }
-
- return ret;
-}
-
-lldb::InstructionControlFlowKind GetControlFlowKind(bool is_exec_mode_64b,
- Opcode m_opcode) {
- llvm::Optional<InstructionOpcodeAndModrm> ret = llvm::None;
-
- if (m_opcode.GetOpcodeBytes() == nullptr || m_opcode.GetByteSize() <= 0) {
- // x86_64 and i386 instructions are categorized as Opcode::Type::eTypeBytes
- return lldb::eInstructionControlFlowKindUnknown;
- }
-
- // Opcode bytes will be decoded into primary_opcode, modrm and opcode length.
- // These are the three values deciding instruction control flow kind.
- ret = InstructionLengthDecode((const uint8_t *)m_opcode.GetOpcodeBytes(),
- m_opcode.GetByteSize(), is_exec_mode_64b);
- if (!ret)
- return lldb::eInstructionControlFlowKindUnknown;
- else
- return MapOpcodeIntoControlFlowKind(ret.value());
-}
-
-} // namespace x86
-
-lldb::InstructionControlFlowKind
-Instruction::GetControlFlowKind(const ArchSpec &arch) {
- if (arch.GetTriple().getArch() == llvm::Triple::x86)
- return x86::GetControlFlowKind(/*is_exec_mode_64b=*/false, m_opcode);
- else if (arch.GetTriple().getArch() == llvm::Triple::x86_64)
- return x86::GetControlFlowKind(/*is_exec_mode_64b=*/true, m_opcode);
- else
- return eInstructionControlFlowKindUnknown; // not implemented
-}
-
AddressClass Instruction::GetAddressClass() {
if (m_address_class == AddressClass::eInvalid)
m_address_class = m_address.GetAddressClass();
@@ -946,7 +618,7 @@ void Instruction::Dump(lldb_private::Stream *s, uint32_t max_opcode_byte_size,
}
if (show_control_flow_kind) {
- switch (GetControlFlowKind(exe_ctx->GetTargetRef().GetArchitecture())) {
+ switch (GetControlFlowKind(exe_ctx)) {
case eInstructionControlFlowKindUnknown:
ss.Printf("%-12s", "unknown");
break;
diff --git a/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp b/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp
index fb404e985f80..973884283f46 100644
--- a/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp
+++ b/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp
@@ -85,6 +85,324 @@ class DisassemblerLLVMC::MCDisasmInstance {
std::unique_ptr<llvm::MCInstPrinter> m_instr_printer_up;
};
+namespace x86 {
+
+/// These are the three values deciding instruction control flow kind.
+/// InstructionLengthDecode function decodes an instruction and get this struct.
+///
+/// primary_opcode
+/// Primary opcode of the instruction.
+/// For one-byte opcode instruction, it's the first byte after prefix.
+/// For two- and three-byte opcodes, it's the second byte.
+///
+/// opcode_len
+/// The length of opcode in bytes. Valid opcode lengths are 1, 2, or 3.
+///
+/// modrm
+/// ModR/M byte of the instruction.
+/// Bits[7:6] indicate MOD. Bits[5:3] specify a register and R/M bits[2:0]
+/// may contain a register or specify an addressing mode, depending on MOD.
+struct InstructionOpcodeAndModrm {
+ uint8_t primary_opcode;
+ uint8_t opcode_len;
+ uint8_t modrm;
+};
+
+/// Determine the InstructionControlFlowKind based on opcode and modrm bytes.
+/// Refer to http://ref.x86asm.net/coder.html for the full list of opcode and
+/// instruction set.
+///
+/// \param[in] opcode_and_modrm
+/// Contains primary_opcode byte, its length, and ModR/M byte.
+/// Refer to the struct InstructionOpcodeAndModrm for details.
+///
+/// \return
+/// The control flow kind of the instruction or
+/// eInstructionControlFlowKindOther if the instruction doesn't affect
+/// the control flow of the program.
+lldb::InstructionControlFlowKind
+MapOpcodeIntoControlFlowKind(InstructionOpcodeAndModrm opcode_and_modrm) {
+ uint8_t opcode = opcode_and_modrm.primary_opcode;
+ uint8_t opcode_len = opcode_and_modrm.opcode_len;
+ uint8_t modrm = opcode_and_modrm.modrm;
+
+ if (opcode_len > 2)
+ return lldb::eInstructionControlFlowKindOther;
+
+ if (opcode >= 0x70 && opcode <= 0x7F) {
+ if (opcode_len == 1)
+ return lldb::eInstructionControlFlowKindCondJump;
+ else
+ return lldb::eInstructionControlFlowKindOther;
+ }
+
+ if (opcode >= 0x80 && opcode <= 0x8F) {
+ if (opcode_len == 2)
+ return lldb::eInstructionControlFlowKindCondJump;
+ else
+ return lldb::eInstructionControlFlowKindOther;
+ }
+
+ switch (opcode) {
+ case 0x9A:
+ if (opcode_len == 1)
+ return lldb::eInstructionControlFlowKindFarCall;
+ break;
+ case 0xFF:
+ if (opcode_len == 1) {
+ uint8_t modrm_reg = (modrm >> 3) & 7;
+ if (modrm_reg == 2)
+ return lldb::eInstructionControlFlowKindCall;
+ else if (modrm_reg == 3)
+ return lldb::eInstructionControlFlowKindFarCall;
+ else if (modrm_reg == 4)
+ return lldb::eInstructionControlFlowKindJump;
+ else if (modrm_reg == 5)
+ return lldb::eInstructionControlFlowKindFarJump;
+ }
+ break;
+ case 0xE8:
+ if (opcode_len == 1)
+ return lldb::eInstructionControlFlowKindCall;
+ break;
+ case 0xCD:
+ case 0xCC:
+ case 0xCE:
+ case 0xF1:
+ if (opcode_len == 1)
+ return lldb::eInstructionControlFlowKindFarCall;
+ break;
+ case 0xCF:
+ if (opcode_len == 1)
+ return lldb::eInstructionControlFlowKindFarReturn;
+ break;
+ case 0xE9:
+ case 0xEB:
+ if (opcode_len == 1)
+ return lldb::eInstructionControlFlowKindJump;
+ break;
+ case 0xEA:
+ if (opcode_len == 1)
+ return lldb::eInstructionControlFlowKindFarJump;
+ break;
+ case 0xE3:
+ case 0xE0:
+ case 0xE1:
+ case 0xE2:
+ if (opcode_len == 1)
+ return lldb::eInstructionControlFlowKindCondJump;
+ break;
+ case 0xC3:
+ case 0xC2:
+ if (opcode_len == 1)
+ return lldb::eInstructionControlFlowKindReturn;
+ break;
+ case 0xCB:
+ case 0xCA:
+ if (opcode_len == 1)
+ return lldb::eInstructionControlFlowKindFarReturn;
+ break;
+ case 0x05:
+ case 0x34:
+ if (opcode_len == 2)
+ return lldb::eInstructionControlFlowKindFarCall;
+ break;
+ case 0x35:
+ case 0x07:
+ if (opcode_len == 2)
+ return lldb::eInstructionControlFlowKindFarReturn;
+ break;
+ case 0x01:
+ if (opcode_len == 2) {
+ switch (modrm) {
+ case 0xc1:
+ return lldb::eInstructionControlFlowKindFarCall;
+ case 0xc2:
+ case 0xc3:
+ return lldb::eInstructionControlFlowKindFarReturn;
+ default:
+ break;
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ return lldb::eInstructionControlFlowKindOther;
+}
+
+/// Decode an instruction into opcode, modrm and opcode_len.
+/// Refer to http://ref.x86asm.net/coder.html for the instruction bytes layout.
+/// Opcodes in x86 are generally the first byte of instruction, though two-byte
+/// instructions and prefixes exist. ModR/M is the byte following the opcode
+/// and adds additional information for how the instruction is executed.
+///
+/// \param[in] inst_bytes
+/// Raw bytes of the instruction
+///
+///
+/// \param[in] bytes_len
+/// The length of the inst_bytes array.
+///
+/// \param[in] is_exec_mode_64b
+/// If true, the execution mode is 64 bit.
+///
+/// \return
+/// Returns decoded instruction as struct InstructionOpcodeAndModrm, holding
+/// primary_opcode, opcode_len and modrm byte. Refer to the struct definition
+/// for more details.
+/// Otherwise if the given instruction is invalid, returns None.
+llvm::Optional<InstructionOpcodeAndModrm>
+InstructionLengthDecode(const uint8_t *inst_bytes, int bytes_len,
+ bool is_exec_mode_64b) {
+ int op_idx = 0;
+ bool prefix_done = false;
+ InstructionOpcodeAndModrm ret = {0, 0, 0};
+
+ // In most cases, the primary_opcode is the first byte of the instruction
+ // but some instructions have a prefix to be skipped for these calculations.
+ // The following mapping is inspired from libipt's instruction decoding logic
+ // in `src/pt_ild.c`
+ while (!prefix_done) {
+ if (op_idx >= bytes_len)
+ return llvm::None;
+
+ ret.primary_opcode = inst_bytes[op_idx];
+ switch (ret.primary_opcode) {
+ // prefix_ignore
+ case 0x26:
+ case 0x2e:
+ case 0x36:
+ case 0x3e:
+ case 0x64:
+ case 0x65:
+ // prefix_osz, prefix_asz
+ case 0x66:
+ case 0x67:
+ // prefix_lock, prefix_f2, prefix_f3
+ case 0xf0:
+ case 0xf2:
+ case 0xf3:
+ op_idx++;
+ break;
+
+ // prefix_rex
+ case 0x40:
+ case 0x41:
+ case 0x42:
+ case 0x43:
+ case 0x44:
+ case 0x45:
+ case 0x46:
+ case 0x47:
+ case 0x48:
+ case 0x49:
+ case 0x4a:
+ case 0x4b:
+ case 0x4c:
+ case 0x4d:
+ case 0x4e:
+ case 0x4f:
+ if (is_exec_mode_64b)
+ op_idx++;
+ else
+ prefix_done = true;
+ break;
+
+ // prefix_vex_c4, c5
+ case 0xc5:
+ if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
+ prefix_done = true;
+ break;
+ }
+
+ ret.opcode_len = 2;
+ ret.primary_opcode = inst_bytes[op_idx + 2];
+ ret.modrm = inst_bytes[op_idx + 3];
+ return ret;
+
+ case 0xc4:
+ if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
+ prefix_done = true;
+ break;
+ }
+ ret.opcode_len = inst_bytes[op_idx + 1] & 0x1f;
+ ret.primary_opcode = inst_bytes[op_idx + 3];
+ ret.modrm = inst_bytes[op_idx + 4];
+ return ret;
+
+ // prefix_evex
+ case 0x62:
+ if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
+ prefix_done = true;
+ break;
+ }
+ ret.opcode_len = inst_bytes[op_idx + 1] & 0x03;
+ ret.primary_opcode = inst_bytes[op_idx + 4];
+ ret.modrm = inst_bytes[op_idx + 5];
+ return ret;
+
+ default:
+ prefix_done = true;
+ break;
+ }
+ } // prefix done
+
+ ret.primary_opcode = inst_bytes[op_idx];
+ ret.modrm = inst_bytes[op_idx + 1];
+ ret.opcode_len = 1;
+
+ // If the first opcode is 0F, it's two- or three- byte opcodes.
+ if (ret.primary_opcode == 0x0F) {
+ ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte
+
+ if (ret.primary_opcode == 0x38) {
+ ret.opcode_len = 3;
+ ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte
+ ret.modrm = inst_bytes[op_idx + 1];
+ } else if (ret.primary_opcode == 0x3A) {
+ ret.opcode_len = 3;
+ ret.primary_opcode = inst_bytes[++op_idx];
+ ret.modrm = inst_bytes[op_idx + 1];
+ } else if ((ret.primary_opcode & 0xf8) == 0x38) {
+ ret.opcode_len = 0;
+ ret.primary_opcode = inst_bytes[++op_idx];
+ ret.modrm = inst_bytes[op_idx + 1];
+ } else if (ret.primary_opcode == 0x0F) {
+ ret.opcode_len = 3;
+ // opcode is 0x0F, no needs to update
+ ret.modrm = inst_bytes[op_idx + 1];
+ } else {
+ ret.opcode_len = 2;
+ ret.modrm = inst_bytes[op_idx + 1];
+ }
+ }
+
+ return ret;
+}
+
+lldb::InstructionControlFlowKind GetControlFlowKind(bool is_exec_mode_64b,
+ Opcode m_opcode) {
+ llvm::Optional<InstructionOpcodeAndModrm> ret = llvm::None;
+
+ if (m_opcode.GetOpcodeBytes() == nullptr || m_opcode.GetByteSize() <= 0) {
+ // x86_64 and i386 instructions are categorized as Opcode::Type::eTypeBytes
+ return lldb::eInstructionControlFlowKindUnknown;
+ }
+
+ // Opcode bytes will be decoded into primary_opcode, modrm and opcode length.
+ // These are the three values deciding instruction control flow kind.
+ ret = InstructionLengthDecode((const uint8_t *)m_opcode.GetOpcodeBytes(),
+ m_opcode.GetByteSize(), is_exec_mode_64b);
+ if (!ret)
+ return lldb::eInstructionControlFlowKindUnknown;
+ else
+ return MapOpcodeIntoControlFlowKind(ret.value());
+}
+
+} // namespace x86
+
class InstructionLLVMC : public lldb_private::Instruction {
public:
InstructionLLVMC(DisassemblerLLVMC &disasm,
@@ -223,6 +541,19 @@ class InstructionLLVMC : public lldb_private::Instruction {
}
}
+ lldb::InstructionControlFlowKind
+ GetControlFlowKind(const lldb_private::ExecutionContext *exe_ctx) override {
+ DisassemblerScope disasm(*this, exe_ctx);
+ if (disasm){
+ if (disasm->GetArchitecture().GetMachine() == llvm::Triple::x86)
+ return x86::GetControlFlowKind(/*is_64b=*/false, m_opcode);
+ else if (disasm->GetArchitecture().GetMachine() == llvm::Triple::x86_64)
+ return x86::GetControlFlowKind(/*is_64b=*/true, m_opcode);
+ }
+
+ return eInstructionControlFlowKindUnknown;
+ }
+
void CalculateMnemonicOperandsAndComment(
const lldb_private::ExecutionContext *exe_ctx) override {
DataExtractor data;
diff --git a/lldb/unittests/Disassembler/x86/TestGetControlFlowKindx86.cpp b/lldb/unittests/Disassembler/x86/TestGetControlFlowKindx86.cpp
index b867c0ae27a6..9cce3e265546 100644
--- a/lldb/unittests/Disassembler/x86/TestGetControlFlowKindx86.cpp
+++ b/lldb/unittests/Disassembler/x86/TestGetControlFlowKindx86.cpp
@@ -137,7 +137,8 @@ TEST_F(TestGetControlFlowKindx86, TestX86_64Instruction) {
for (size_t i = 0; i < num_of_instructions; ++i) {
InstructionSP inst_sp;
inst_sp = inst_list.GetInstructionAtIndex(i);
- InstructionControlFlowKind kind = inst_sp->GetControlFlowKind(arch);
+ ExecutionContext exe_ctx (nullptr, nullptr, nullptr);
+ InstructionControlFlowKind kind = inst_sp->GetControlFlowKind(&exe_ctx);
EXPECT_EQ(kind, result[i]);
}
}
More information about the lldb-commits
mailing list