[Lldb-commits] [lldb] 0538e54 - Move GetControlFlowKind's logic to DisassemblerLLVMC.cpp

Walter Erquinigo via lldb-commits lldb-commits at lists.llvm.org
Tue Jul 26 12:05:37 PDT 2022


Author: Walter Erquinigo
Date: 2022-07-26T12:05:23-07:00
New Revision: 0538e5431afdb1fa05bdcedf70ee502ccfcd112a

URL: https://github.com/llvm/llvm-project/commit/0538e5431afdb1fa05bdcedf70ee502ccfcd112a
DIFF: https://github.com/llvm/llvm-project/commit/0538e5431afdb1fa05bdcedf70ee502ccfcd112a.diff

LOG: Move GetControlFlowKind's logic to DisassemblerLLVMC.cpp

This diff move the logic of `GetControlFlowKind()` from Disassembler.cpp to DisassemblerLLVMC.cpp.
Here's details:
- Actual logic of GetControlFlowKind() move to `DisassemblerLLVMC.cpp`, and we can check underlying architecture using `DisassemblerScope` there.
- With this change, passing 'triple' to `GetControlFlowKind()` is no more required.

Reviewed By: wallace

Differential Revision: https://reviews.llvm.org/D130320

Added: 
    

Modified: 
    lldb/include/lldb/Core/Disassembler.h
    lldb/source/Core/Disassembler.cpp
    lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp
    lldb/unittests/Disassembler/x86/TestGetControlFlowKindx86.cpp

Removed: 
    


################################################################################
diff  --git a/lldb/include/lldb/Core/Disassembler.h b/lldb/include/lldb/Core/Disassembler.h
index 2cb983c40d19..99d1b45ea599 100644
--- a/lldb/include/lldb/Core/Disassembler.h
+++ b/lldb/include/lldb/Core/Disassembler.h
@@ -83,7 +83,10 @@ class Instruction {
   ///    The control flow kind of this instruction, or
   ///    eInstructionControlFlowKindUnknown if the instruction
   ///    can't be classified.
-  lldb::InstructionControlFlowKind GetControlFlowKind(const ArchSpec &arch);
+  virtual lldb::InstructionControlFlowKind
+  GetControlFlowKind(const ExecutionContext *exe_ctx) {
+    return lldb::eInstructionControlFlowKindUnknown;
+  }
 
   virtual void
   CalculateMnemonicOperandsAndComment(const ExecutionContext *exe_ctx) = 0;

diff  --git a/lldb/source/Core/Disassembler.cpp b/lldb/source/Core/Disassembler.cpp
index 7a9e214748a7..81a0e1e74642 100644
--- a/lldb/source/Core/Disassembler.cpp
+++ b/lldb/source/Core/Disassembler.cpp
@@ -571,334 +571,6 @@ Instruction::Instruction(const Address &address, AddressClass addr_class)
 
 Instruction::~Instruction() = default;
 
-namespace x86 {
-
-/// These are the three values deciding instruction control flow kind.
-/// InstructionLengthDecode function decodes an instruction and get this struct.
-///
-/// primary_opcode
-///    Primary opcode of the instruction.
-///    For one-byte opcode instruction, it's the first byte after prefix.
-///    For two- and three-byte opcodes, it's the second byte.
-///
-/// opcode_len
-///    The length of opcode in bytes. Valid opcode lengths are 1, 2, or 3.
-///
-/// modrm
-///    ModR/M byte of the instruction.
-///    Bits[7:6] indicate MOD. Bits[5:3] specify a register and R/M bits[2:0]
-///    may contain a register or specify an addressing mode, depending on MOD.
-struct InstructionOpcodeAndModrm {
-  uint8_t primary_opcode;
-  uint8_t opcode_len;
-  uint8_t modrm;
-};
-
-/// Determine the InstructionControlFlowKind based on opcode and modrm bytes.
-/// Refer to http://ref.x86asm.net/coder.html for the full list of opcode and
-/// instruction set.
-///
-/// \param[in] opcode_and_modrm
-///    Contains primary_opcode byte, its length, and ModR/M byte.
-///    Refer to the struct InstructionOpcodeAndModrm for details.
-///
-/// \return
-///   The control flow kind of the instruction or
-///   eInstructionControlFlowKindOther if the instruction doesn't affect
-///   the control flow of the program.
-lldb::InstructionControlFlowKind
-MapOpcodeIntoControlFlowKind(InstructionOpcodeAndModrm opcode_and_modrm) {
-  uint8_t opcode = opcode_and_modrm.primary_opcode;
-  uint8_t opcode_len = opcode_and_modrm.opcode_len;
-  uint8_t modrm = opcode_and_modrm.modrm;
-
-  if (opcode_len > 2)
-    return lldb::eInstructionControlFlowKindOther;
-
-  if (opcode >= 0x70 && opcode <= 0x7F) {
-    if (opcode_len == 1)
-      return lldb::eInstructionControlFlowKindCondJump;
-    else
-      return lldb::eInstructionControlFlowKindOther;
-  }
-
-  if (opcode >= 0x80 && opcode <= 0x8F) {
-    if (opcode_len == 2)
-      return lldb::eInstructionControlFlowKindCondJump;
-    else
-      return lldb::eInstructionControlFlowKindOther;
-  }
-
-  switch (opcode) {
-  case 0x9A:
-    if (opcode_len == 1)
-      return lldb::eInstructionControlFlowKindFarCall;
-    break;
-  case 0xFF:
-    if (opcode_len == 1) {
-      uint8_t modrm_reg = (modrm >> 3) & 7;
-      if (modrm_reg == 2)
-        return lldb::eInstructionControlFlowKindCall;
-      else if (modrm_reg == 3)
-        return lldb::eInstructionControlFlowKindFarCall;
-      else if (modrm_reg == 4)
-        return lldb::eInstructionControlFlowKindJump;
-      else if (modrm_reg == 5)
-        return lldb::eInstructionControlFlowKindFarJump;
-    }
-    break;
-  case 0xE8:
-    if (opcode_len == 1)
-      return lldb::eInstructionControlFlowKindCall;
-    break;
-  case 0xCD:
-  case 0xCC:
-  case 0xCE:
-  case 0xF1:
-    if (opcode_len == 1)
-      return lldb::eInstructionControlFlowKindFarCall;
-    break;
-  case 0xCF:
-    if (opcode_len == 1)
-      return lldb::eInstructionControlFlowKindFarReturn;
-    break;
-  case 0xE9:
-  case 0xEB:
-    if (opcode_len == 1)
-      return lldb::eInstructionControlFlowKindJump;
-    break;
-  case 0xEA:
-    if (opcode_len == 1)
-      return lldb::eInstructionControlFlowKindFarJump;
-    break;
-  case 0xE3:
-  case 0xE0:
-  case 0xE1:
-  case 0xE2:
-    if (opcode_len == 1)
-      return lldb::eInstructionControlFlowKindCondJump;
-    break;
-  case 0xC3:
-  case 0xC2:
-    if (opcode_len == 1)
-      return lldb::eInstructionControlFlowKindReturn;
-    break;
-  case 0xCB:
-  case 0xCA:
-    if (opcode_len == 1)
-      return lldb::eInstructionControlFlowKindFarReturn;
-    break;
-  case 0x05:
-  case 0x34:
-    if (opcode_len == 2)
-      return lldb::eInstructionControlFlowKindFarCall;
-    break;
-  case 0x35:
-  case 0x07:
-    if (opcode_len == 2)
-      return lldb::eInstructionControlFlowKindFarReturn;
-    break;
-  case 0x01:
-    if (opcode_len == 2) {
-      switch (modrm) {
-      case 0xc1:
-        return lldb::eInstructionControlFlowKindFarCall;
-      case 0xc2:
-      case 0xc3:
-        return lldb::eInstructionControlFlowKindFarReturn;
-      default:
-        break;
-      }
-    }
-    break;
-  default:
-    break;
-  }
-
-  return lldb::eInstructionControlFlowKindOther;
-}
-
-/// Decode an instruction into opcode, modrm and opcode_len.
-/// Refer to http://ref.x86asm.net/coder.html for the instruction bytes layout.
-/// Opcodes in x86 are generally the first byte of instruction, though two-byte
-/// instructions and prefixes exist. ModR/M is the byte following the opcode
-/// and adds additional information for how the instruction is executed.
-///
-/// \param[in] inst_bytes
-///    Raw bytes of the instruction
-///
-///
-/// \param[in] bytes_len
-///    The length of the inst_bytes array.
-///
-/// \param[in] is_exec_mode_64b
-///    If true, the execution mode is 64 bit.
-///
-/// \return
-///    Returns decoded instruction as struct InstructionOpcodeAndModrm, holding
-///    primary_opcode, opcode_len and modrm byte. Refer to the struct definition
-///    for more details.
-///    Otherwise if the given instruction is invalid, returns None.
-llvm::Optional<InstructionOpcodeAndModrm>
-InstructionLengthDecode(const uint8_t *inst_bytes, int bytes_len,
-                        bool is_exec_mode_64b) {
-  int op_idx = 0;
-  bool prefix_done = false;
-  InstructionOpcodeAndModrm ret = {0, 0, 0};
-
-  // In most cases, the primary_opcode is the first byte of the instruction
-  // but some instructions have a prefix to be skipped for these calculations.
-  // The following mapping is inspired from libipt's instruction decoding logic
-  // in `src/pt_ild.c`
-  while (!prefix_done) {
-    if (op_idx >= bytes_len)
-      return llvm::None;
-
-    ret.primary_opcode = inst_bytes[op_idx];
-    switch (ret.primary_opcode) {
-    // prefix_ignore
-    case 0x26:
-    case 0x2e:
-    case 0x36:
-    case 0x3e:
-    case 0x64:
-    case 0x65:
-    // prefix_osz, prefix_asz
-    case 0x66:
-    case 0x67:
-    // prefix_lock, prefix_f2, prefix_f3
-    case 0xf0:
-    case 0xf2:
-    case 0xf3:
-      op_idx++;
-      break;
-
-    // prefix_rex
-    case 0x40:
-    case 0x41:
-    case 0x42:
-    case 0x43:
-    case 0x44:
-    case 0x45:
-    case 0x46:
-    case 0x47:
-    case 0x48:
-    case 0x49:
-    case 0x4a:
-    case 0x4b:
-    case 0x4c:
-    case 0x4d:
-    case 0x4e:
-    case 0x4f:
-      if (is_exec_mode_64b)
-        op_idx++;
-      else
-        prefix_done = true;
-      break;
-
-    // prefix_vex_c4, c5
-    case 0xc5:
-      if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
-        prefix_done = true;
-        break;
-      }
-
-      ret.opcode_len = 2;
-      ret.primary_opcode = inst_bytes[op_idx + 2];
-      ret.modrm = inst_bytes[op_idx + 3];
-      return ret;
-
-    case 0xc4:
-      if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
-        prefix_done = true;
-        break;
-      }
-      ret.opcode_len = inst_bytes[op_idx + 1] & 0x1f;
-      ret.primary_opcode = inst_bytes[op_idx + 3];
-      ret.modrm = inst_bytes[op_idx + 4];
-      return ret;
-
-    // prefix_evex
-    case 0x62:
-      if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
-        prefix_done = true;
-        break;
-      }
-      ret.opcode_len = inst_bytes[op_idx + 1] & 0x03;
-      ret.primary_opcode = inst_bytes[op_idx + 4];
-      ret.modrm = inst_bytes[op_idx + 5];
-      return ret;
-
-    default:
-      prefix_done = true;
-      break;
-    }
-  } // prefix done
-
-  ret.primary_opcode = inst_bytes[op_idx];
-  ret.modrm = inst_bytes[op_idx + 1];
-  ret.opcode_len = 1;
-
-  // If the first opcode is 0F, it's two- or three- byte opcodes.
-  if (ret.primary_opcode == 0x0F) {
-    ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte
-
-    if (ret.primary_opcode == 0x38) {
-      ret.opcode_len = 3;
-      ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte
-      ret.modrm = inst_bytes[op_idx + 1];
-    } else if (ret.primary_opcode == 0x3A) {
-      ret.opcode_len = 3;
-      ret.primary_opcode = inst_bytes[++op_idx];
-      ret.modrm = inst_bytes[op_idx + 1];
-    } else if ((ret.primary_opcode & 0xf8) == 0x38) {
-      ret.opcode_len = 0;
-      ret.primary_opcode = inst_bytes[++op_idx];
-      ret.modrm = inst_bytes[op_idx + 1];
-    } else if (ret.primary_opcode == 0x0F) {
-      ret.opcode_len = 3;
-      // opcode is 0x0F, no needs to update
-      ret.modrm = inst_bytes[op_idx + 1];
-    } else {
-      ret.opcode_len = 2;
-      ret.modrm = inst_bytes[op_idx + 1];
-    }
-  }
-
-  return ret;
-}
-
-lldb::InstructionControlFlowKind GetControlFlowKind(bool is_exec_mode_64b,
-                                                    Opcode m_opcode) {
-  llvm::Optional<InstructionOpcodeAndModrm> ret = llvm::None;
-
-  if (m_opcode.GetOpcodeBytes() == nullptr || m_opcode.GetByteSize() <= 0) {
-    // x86_64 and i386 instructions are categorized as Opcode::Type::eTypeBytes
-    return lldb::eInstructionControlFlowKindUnknown;
-  }
-
-  // Opcode bytes will be decoded into primary_opcode, modrm and opcode length.
-  // These are the three values deciding instruction control flow kind.
-  ret = InstructionLengthDecode((const uint8_t *)m_opcode.GetOpcodeBytes(),
-                                m_opcode.GetByteSize(), is_exec_mode_64b);
-  if (!ret)
-    return lldb::eInstructionControlFlowKindUnknown;
-  else
-    return MapOpcodeIntoControlFlowKind(ret.value());
-}
-
-} // namespace x86
-
-lldb::InstructionControlFlowKind
-Instruction::GetControlFlowKind(const ArchSpec &arch) {
-  if (arch.GetTriple().getArch() == llvm::Triple::x86)
-    return x86::GetControlFlowKind(/*is_exec_mode_64b=*/false, m_opcode);
-  else if (arch.GetTriple().getArch() == llvm::Triple::x86_64)
-    return x86::GetControlFlowKind(/*is_exec_mode_64b=*/true, m_opcode);
-  else
-    return eInstructionControlFlowKindUnknown; // not implemented
-}
-
 AddressClass Instruction::GetAddressClass() {
   if (m_address_class == AddressClass::eInvalid)
     m_address_class = m_address.GetAddressClass();
@@ -946,7 +618,7 @@ void Instruction::Dump(lldb_private::Stream *s, uint32_t max_opcode_byte_size,
   }
 
   if (show_control_flow_kind) {
-    switch (GetControlFlowKind(exe_ctx->GetTargetRef().GetArchitecture())) {
+    switch (GetControlFlowKind(exe_ctx)) {
     case eInstructionControlFlowKindUnknown:
       ss.Printf("%-12s", "unknown");
       break;

diff  --git a/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp b/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp
index fb404e985f80..973884283f46 100644
--- a/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp
+++ b/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp
@@ -85,6 +85,324 @@ class DisassemblerLLVMC::MCDisasmInstance {
   std::unique_ptr<llvm::MCInstPrinter> m_instr_printer_up;
 };
 
+namespace x86 {
+
+/// These are the three values deciding instruction control flow kind.
+/// InstructionLengthDecode function decodes an instruction and get this struct.
+///
+/// primary_opcode
+///    Primary opcode of the instruction.
+///    For one-byte opcode instruction, it's the first byte after prefix.
+///    For two- and three-byte opcodes, it's the second byte.
+///
+/// opcode_len
+///    The length of opcode in bytes. Valid opcode lengths are 1, 2, or 3.
+///
+/// modrm
+///    ModR/M byte of the instruction.
+///    Bits[7:6] indicate MOD. Bits[5:3] specify a register and R/M bits[2:0]
+///    may contain a register or specify an addressing mode, depending on MOD.
+struct InstructionOpcodeAndModrm {
+  uint8_t primary_opcode;
+  uint8_t opcode_len;
+  uint8_t modrm;
+};
+
+/// Determine the InstructionControlFlowKind based on opcode and modrm bytes.
+/// Refer to http://ref.x86asm.net/coder.html for the full list of opcode and
+/// instruction set.
+///
+/// \param[in] opcode_and_modrm
+///    Contains primary_opcode byte, its length, and ModR/M byte.
+///    Refer to the struct InstructionOpcodeAndModrm for details.
+///
+/// \return
+///   The control flow kind of the instruction or
+///   eInstructionControlFlowKindOther if the instruction doesn't affect
+///   the control flow of the program.
+lldb::InstructionControlFlowKind
+MapOpcodeIntoControlFlowKind(InstructionOpcodeAndModrm opcode_and_modrm) {
+  uint8_t opcode = opcode_and_modrm.primary_opcode;
+  uint8_t opcode_len = opcode_and_modrm.opcode_len;
+  uint8_t modrm = opcode_and_modrm.modrm;
+
+  if (opcode_len > 2)
+    return lldb::eInstructionControlFlowKindOther;
+
+  if (opcode >= 0x70 && opcode <= 0x7F) {
+    if (opcode_len == 1)
+      return lldb::eInstructionControlFlowKindCondJump;
+    else
+      return lldb::eInstructionControlFlowKindOther;
+  }
+
+  if (opcode >= 0x80 && opcode <= 0x8F) {
+    if (opcode_len == 2)
+      return lldb::eInstructionControlFlowKindCondJump;
+    else
+      return lldb::eInstructionControlFlowKindOther;
+  }
+
+  switch (opcode) {
+  case 0x9A:
+    if (opcode_len == 1)
+      return lldb::eInstructionControlFlowKindFarCall;
+    break;
+  case 0xFF:
+    if (opcode_len == 1) {
+      uint8_t modrm_reg = (modrm >> 3) & 7;
+      if (modrm_reg == 2)
+        return lldb::eInstructionControlFlowKindCall;
+      else if (modrm_reg == 3)
+        return lldb::eInstructionControlFlowKindFarCall;
+      else if (modrm_reg == 4)
+        return lldb::eInstructionControlFlowKindJump;
+      else if (modrm_reg == 5)
+        return lldb::eInstructionControlFlowKindFarJump;
+    }
+    break;
+  case 0xE8:
+    if (opcode_len == 1)
+      return lldb::eInstructionControlFlowKindCall;
+    break;
+  case 0xCD:
+  case 0xCC:
+  case 0xCE:
+  case 0xF1:
+    if (opcode_len == 1)
+      return lldb::eInstructionControlFlowKindFarCall;
+    break;
+  case 0xCF:
+    if (opcode_len == 1)
+      return lldb::eInstructionControlFlowKindFarReturn;
+    break;
+  case 0xE9:
+  case 0xEB:
+    if (opcode_len == 1)
+      return lldb::eInstructionControlFlowKindJump;
+    break;
+  case 0xEA:
+    if (opcode_len == 1)
+      return lldb::eInstructionControlFlowKindFarJump;
+    break;
+  case 0xE3:
+  case 0xE0:
+  case 0xE1:
+  case 0xE2:
+    if (opcode_len == 1)
+      return lldb::eInstructionControlFlowKindCondJump;
+    break;
+  case 0xC3:
+  case 0xC2:
+    if (opcode_len == 1)
+      return lldb::eInstructionControlFlowKindReturn;
+    break;
+  case 0xCB:
+  case 0xCA:
+    if (opcode_len == 1)
+      return lldb::eInstructionControlFlowKindFarReturn;
+    break;
+  case 0x05:
+  case 0x34:
+    if (opcode_len == 2)
+      return lldb::eInstructionControlFlowKindFarCall;
+    break;
+  case 0x35:
+  case 0x07:
+    if (opcode_len == 2)
+      return lldb::eInstructionControlFlowKindFarReturn;
+    break;
+  case 0x01:
+    if (opcode_len == 2) {
+      switch (modrm) {
+      case 0xc1:
+        return lldb::eInstructionControlFlowKindFarCall;
+      case 0xc2:
+      case 0xc3:
+        return lldb::eInstructionControlFlowKindFarReturn;
+      default:
+        break;
+      }
+    }
+    break;
+  default:
+    break;
+  }
+
+  return lldb::eInstructionControlFlowKindOther;
+}
+
+/// Decode an instruction into opcode, modrm and opcode_len.
+/// Refer to http://ref.x86asm.net/coder.html for the instruction bytes layout.
+/// Opcodes in x86 are generally the first byte of instruction, though two-byte
+/// instructions and prefixes exist. ModR/M is the byte following the opcode
+/// and adds additional information for how the instruction is executed.
+///
+/// \param[in] inst_bytes
+///    Raw bytes of the instruction
+///
+///
+/// \param[in] bytes_len
+///    The length of the inst_bytes array.
+///
+/// \param[in] is_exec_mode_64b
+///    If true, the execution mode is 64 bit.
+///
+/// \return
+///    Returns decoded instruction as struct InstructionOpcodeAndModrm, holding
+///    primary_opcode, opcode_len and modrm byte. Refer to the struct definition
+///    for more details.
+///    Otherwise if the given instruction is invalid, returns None.
+llvm::Optional<InstructionOpcodeAndModrm>
+InstructionLengthDecode(const uint8_t *inst_bytes, int bytes_len,
+                        bool is_exec_mode_64b) {
+  int op_idx = 0;
+  bool prefix_done = false;
+  InstructionOpcodeAndModrm ret = {0, 0, 0};
+
+  // In most cases, the primary_opcode is the first byte of the instruction
+  // but some instructions have a prefix to be skipped for these calculations.
+  // The following mapping is inspired from libipt's instruction decoding logic
+  // in `src/pt_ild.c`
+  while (!prefix_done) {
+    if (op_idx >= bytes_len)
+      return llvm::None;
+
+    ret.primary_opcode = inst_bytes[op_idx];
+    switch (ret.primary_opcode) {
+    // prefix_ignore
+    case 0x26:
+    case 0x2e:
+    case 0x36:
+    case 0x3e:
+    case 0x64:
+    case 0x65:
+    // prefix_osz, prefix_asz
+    case 0x66:
+    case 0x67:
+    // prefix_lock, prefix_f2, prefix_f3
+    case 0xf0:
+    case 0xf2:
+    case 0xf3:
+      op_idx++;
+      break;
+
+    // prefix_rex
+    case 0x40:
+    case 0x41:
+    case 0x42:
+    case 0x43:
+    case 0x44:
+    case 0x45:
+    case 0x46:
+    case 0x47:
+    case 0x48:
+    case 0x49:
+    case 0x4a:
+    case 0x4b:
+    case 0x4c:
+    case 0x4d:
+    case 0x4e:
+    case 0x4f:
+      if (is_exec_mode_64b)
+        op_idx++;
+      else
+        prefix_done = true;
+      break;
+
+    // prefix_vex_c4, c5
+    case 0xc5:
+      if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
+        prefix_done = true;
+        break;
+      }
+
+      ret.opcode_len = 2;
+      ret.primary_opcode = inst_bytes[op_idx + 2];
+      ret.modrm = inst_bytes[op_idx + 3];
+      return ret;
+
+    case 0xc4:
+      if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
+        prefix_done = true;
+        break;
+      }
+      ret.opcode_len = inst_bytes[op_idx + 1] & 0x1f;
+      ret.primary_opcode = inst_bytes[op_idx + 3];
+      ret.modrm = inst_bytes[op_idx + 4];
+      return ret;
+
+    // prefix_evex
+    case 0x62:
+      if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
+        prefix_done = true;
+        break;
+      }
+      ret.opcode_len = inst_bytes[op_idx + 1] & 0x03;
+      ret.primary_opcode = inst_bytes[op_idx + 4];
+      ret.modrm = inst_bytes[op_idx + 5];
+      return ret;
+
+    default:
+      prefix_done = true;
+      break;
+    }
+  } // prefix done
+
+  ret.primary_opcode = inst_bytes[op_idx];
+  ret.modrm = inst_bytes[op_idx + 1];
+  ret.opcode_len = 1;
+
+  // If the first opcode is 0F, it's two- or three- byte opcodes.
+  if (ret.primary_opcode == 0x0F) {
+    ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte
+
+    if (ret.primary_opcode == 0x38) {
+      ret.opcode_len = 3;
+      ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte
+      ret.modrm = inst_bytes[op_idx + 1];
+    } else if (ret.primary_opcode == 0x3A) {
+      ret.opcode_len = 3;
+      ret.primary_opcode = inst_bytes[++op_idx];
+      ret.modrm = inst_bytes[op_idx + 1];
+    } else if ((ret.primary_opcode & 0xf8) == 0x38) {
+      ret.opcode_len = 0;
+      ret.primary_opcode = inst_bytes[++op_idx];
+      ret.modrm = inst_bytes[op_idx + 1];
+    } else if (ret.primary_opcode == 0x0F) {
+      ret.opcode_len = 3;
+      // opcode is 0x0F, no needs to update
+      ret.modrm = inst_bytes[op_idx + 1];
+    } else {
+      ret.opcode_len = 2;
+      ret.modrm = inst_bytes[op_idx + 1];
+    }
+  }
+
+  return ret;
+}
+
+lldb::InstructionControlFlowKind GetControlFlowKind(bool is_exec_mode_64b,
+                                                    Opcode m_opcode) {
+  llvm::Optional<InstructionOpcodeAndModrm> ret = llvm::None;
+
+  if (m_opcode.GetOpcodeBytes() == nullptr || m_opcode.GetByteSize() <= 0) {
+    // x86_64 and i386 instructions are categorized as Opcode::Type::eTypeBytes
+    return lldb::eInstructionControlFlowKindUnknown;
+  }
+
+  // Opcode bytes will be decoded into primary_opcode, modrm and opcode length.
+  // These are the three values deciding instruction control flow kind.
+  ret = InstructionLengthDecode((const uint8_t *)m_opcode.GetOpcodeBytes(),
+                                m_opcode.GetByteSize(), is_exec_mode_64b);
+  if (!ret)
+    return lldb::eInstructionControlFlowKindUnknown;
+  else
+    return MapOpcodeIntoControlFlowKind(ret.value());
+}
+
+} // namespace x86
+
 class InstructionLLVMC : public lldb_private::Instruction {
 public:
   InstructionLLVMC(DisassemblerLLVMC &disasm,
@@ -223,6 +541,19 @@ class InstructionLLVMC : public lldb_private::Instruction {
     }
   }
 
+  lldb::InstructionControlFlowKind
+  GetControlFlowKind(const lldb_private::ExecutionContext *exe_ctx) override {
+    DisassemblerScope disasm(*this, exe_ctx);
+    if (disasm){
+      if (disasm->GetArchitecture().GetMachine() == llvm::Triple::x86)
+        return x86::GetControlFlowKind(/*is_64b=*/false, m_opcode);
+      else if (disasm->GetArchitecture().GetMachine() == llvm::Triple::x86_64)
+        return x86::GetControlFlowKind(/*is_64b=*/true, m_opcode);
+    }
+
+    return eInstructionControlFlowKindUnknown;
+  }
+
   void CalculateMnemonicOperandsAndComment(
       const lldb_private::ExecutionContext *exe_ctx) override {
     DataExtractor data;

diff  --git a/lldb/unittests/Disassembler/x86/TestGetControlFlowKindx86.cpp b/lldb/unittests/Disassembler/x86/TestGetControlFlowKindx86.cpp
index b867c0ae27a6..9cce3e265546 100644
--- a/lldb/unittests/Disassembler/x86/TestGetControlFlowKindx86.cpp
+++ b/lldb/unittests/Disassembler/x86/TestGetControlFlowKindx86.cpp
@@ -137,7 +137,8 @@ TEST_F(TestGetControlFlowKindx86, TestX86_64Instruction) {
     for (size_t i = 0; i < num_of_instructions; ++i) {
       InstructionSP inst_sp;
       inst_sp = inst_list.GetInstructionAtIndex(i);
-      InstructionControlFlowKind kind = inst_sp->GetControlFlowKind(arch);
+      ExecutionContext exe_ctx (nullptr, nullptr, nullptr);
+      InstructionControlFlowKind kind = inst_sp->GetControlFlowKind(&exe_ctx);
       EXPECT_EQ(kind, result[i]);
     }
   }


        


More information about the lldb-commits mailing list