[LLVMdev] Some bugs in x86 disasm (llvm-mc)

David Woodhouse dwmw2 at infradead.org
Thu Jan 16 13:36:38 PST 2014


On Thu, 2014-01-16 at 07:30 -0800, Craig Topper wrote:
> Did we have IC_OPSIZE_ADSIZE defined but not used before this?

This removes it and fixes the same disasm bugs... 

From 01e472ac675363a71d27013bcf9d7f846093177d Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse at intel.com>
Date: Thu, 16 Jan 2014 14:44:20 +0000
Subject: [PATCH] [x86] Fix disassembly of MOV16ao16 et al.

The addition of IC_OPSIZE_ADSIZE in r198759 wasn't quite complete. It
also turns out to have been unnecessary. The disassembler handles the
AdSize prefix for itself, and doesn't care about the difference between
(e.g.) MOV8ao8 and MOB8ao8_16 definitions. So just let them coexist and
don't worry about it.
---
 .../Disassembler/X86DisassemblerDecoderCommon.h    |  2 -
 test/MC/Disassembler/X86/moffs.txt                 | 86 ++++++++++++++++++++++
 utils/TableGen/X86DisassemblerTables.cpp           | 14 +++-
 utils/TableGen/X86RecognizableInstr.cpp            |  2 -
 4 files changed, 96 insertions(+), 8 deletions(-)
 create mode 100644 test/MC/Disassembler/X86/moffs.txt

diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index 954a3d7..1acaef1 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -92,8 +92,6 @@ enum attributeBits {
                                         "operands change width")               \
   ENUM_ENTRY(IC_ADSIZE,             3,  "requires an ADSIZE prefix, so "       \
                                         "operands change width")               \
-  ENUM_ENTRY(IC_OPSIZE_ADSIZE,      3,  "requires both OPSIZE and ADSIZE "     \
-                                        "prefixes")                            \
   ENUM_ENTRY(IC_XD,                 2,  "may say something about the opcode "  \
                                         "but not the operands")                \
   ENUM_ENTRY(IC_XS,                 2,  "may say something about the opcode "  \
diff --git a/test/MC/Disassembler/X86/moffs.txt b/test/MC/Disassembler/X86/moffs.txt
new file mode 100644
index 0000000..67d64e8
--- /dev/null
+++ b/test/MC/Disassembler/X86/moffs.txt
@@ -0,0 +1,86 @@
+# RUN: llvm-mc --hdis %s -triple=i686-linux-gnu-code16 | FileCheck --check-prefix=16 %s
+# RUN: llvm-mc --hdis %s -triple=i686-linux-gnu | FileCheck --check-prefix=32 %s
+# RUN: llvm-mc --hdis %s -triple=x86_64-linux-gnu | FileCheck --check-prefix=64 %s
+
+# 16: movb 0x5a5a, %al
+# 32: movb 0x5a5a5a5a, %al
+# 64: movabsb 0x5a5a5a5a5a5a5a5a, %al
+0xa0 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
+
+# 16: movb 0x5a5a5a5a, %al
+# 32: movb 0x5a5a, %al
+# 64: movabsb 0x5a5a5a5a, %al
+0x67 0xa0 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
+
+# 16: movw 0x5a5a, %ax
+# 32: movl 0x5a5a5a5a, %eax
+# 64: movabsl 0x5a5a5a5a5a5a5a5a, %eax
+0xa1 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
+
+# 16: movw 0x5a5a5a5a, %ax
+# 32: movl 0x5a5a, %eax
+# 64: movabsl 0x5a5a5a5a, %eax
+0x67 0xa1 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
+
+# 16: movl 0x5a5a, %eax
+# 32: movw 0x5a5a5a5a, %ax
+# 64: movabsw 0x5a5a5a5a5a5a5a5a, %ax
+0x66 0xa1 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
+
+# 16: movl 0x5a5a5a5a, %eax
+# 32: movw 0x5a5a, %ax
+# 64: movabsw 0x5a5a5a5a, %ax
+0x66 0x67 0xa1 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
+
+# 16: movl 0x5a5a5a5a, %eax
+# 32: movw 0x5a5a, %ax
+# 64: movabsw 0x5a5a5a5a, %ax
+0x67 0x66 0xa1 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
+
+# 16: movl %es:0x5a5a5a5a, %eax
+# 32: movw %es:0x5a5a, %ax
+# 64: movabsw %es:0x5a5a5a5a, %ax
+0x67 0x26 0x66 0xa1 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
+
+
+
+# 16: movb %al, 0x5a5a
+# 32: movb %al, 0x5a5a5a5a
+# 64: movabsb %al, 0x5a5a5a5a5a5a5a5a
+0xa2 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
+
+# 16: movb %al, 0x5a5a5a5a
+# 32: movb %al, 0x5a5a
+# 64: movabsb %al, 0x5a5a5a5a
+0x67 0xa2 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
+
+# 16: movw %ax, 0x5a5a
+# 32: movl %eax, 0x5a5a5a5a
+# 64: movabsl %eax, 0x5a5a5a5a5a5a5a5a
+0xa3 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
+
+# 16: movw %ax, %gs:0x5a5a5a5a
+# 32: movl %eax, %gs:0x5a5a
+# 64: movabsl %eax, %gs:0x5a5a5a5a
+0x65 0x67 0xa3 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
+
+# 16: movl %eax, 0x5a5a
+# 32: movw %ax, 0x5a5a5a5a
+# 64: movabsw %ax, 0x5a5a5a5a5a5a5a5a
+0x66 0xa3 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
+
+# 16: movl %eax, 0x5a5a5a5a
+# 32: movw %ax, 0x5a5a
+# 64: movabsw %ax, 0x5a5a5a5a
+0x66 0x67 0xa3 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
+
+# 16: movl %eax, 0x5a5a5a5a
+# 32: movw %ax, 0x5a5a
+# 64: movabsw %ax, 0x5a5a5a5a
+0x67 0x66 0xa3 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
+
+# 16: movl %eax, %es:0x5a5a5a5a
+# 32: movw %ax, %es:0x5a5a
+# 64: movabsw %ax, %es:0x5a5a5a5a
+0x67 0x26 0x66 0xa3 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
+
diff --git a/utils/TableGen/X86DisassemblerTables.cpp b/utils/TableGen/X86DisassemblerTables.cpp
index 0ccfac7..f9c3e1a 100644
--- a/utils/TableGen/X86DisassemblerTables.cpp
+++ b/utils/TableGen/X86DisassemblerTables.cpp
@@ -94,11 +94,8 @@ static inline bool inheritsFrom(InstructionContext child,
            inheritsFrom(child, IC_64BIT_XD)     ||
            inheritsFrom(child, IC_64BIT_XS));
   case IC_OPSIZE:
-    return (inheritsFrom(child, IC_64BIT_OPSIZE) ||
-            inheritsFrom(child, IC_OPSIZE_ADSIZE));
+    return inheritsFrom(child, IC_64BIT_OPSIZE);
   case IC_ADSIZE:
-    return inheritsFrom(child, IC_OPSIZE_ADSIZE);
-  case IC_OPSIZE_ADSIZE:
   case IC_64BIT_ADSIZE:
     return false;
   case IC_XD:
@@ -803,6 +800,15 @@ void DisassemblerTables::setTableFields(ModRMDecision     &decision,
         if(newInfo.filtered)
           continue; // filtered instructions get lowest priority
 
+        // Instructions such as MOV8ao8 and MOV8ao8_16 differ only in the
+        // presence of the AdSize prefix. However, the disassembler doesn't
+        // care about that difference in the instruction definition; it
+        // handles 16-bit vs. 32-bit addressing for itself based purely
+        // on the 0x67 prefix and the CPU mode. So there's no need to
+        // disambiguate between them; just let them conflict/coexist.
+        if (previousInfo.name + "_16" == newInfo.name)
+          continue;
+
         if(previousInfo.name == "NOOP" && (newInfo.name == "XCHG16ar" ||
                                            newInfo.name == "XCHG32ar" ||
                                            newInfo.name == "XCHG32ar64" ||
diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp
index 019610e..c09abef 100644
--- a/utils/TableGen/X86RecognizableInstr.cpp
+++ b/utils/TableGen/X86RecognizableInstr.cpp
@@ -468,8 +468,6 @@ InstructionContext RecognizableInstr::insnContext() const {
     else if (HasOpSizePrefix &&
              (Prefix == X86Local::XS || Prefix == X86Local::T8XS))
       insnContext = IC_XS_OPSIZE;
-    else if (HasOpSizePrefix && HasAdSizePrefix)
-      insnContext = IC_OPSIZE_ADSIZE;
     else if (HasOpSizePrefix || Prefix == X86Local::PD ||
              Prefix == X86Local::T8PD || Prefix == X86Local::TAPD)
       insnContext = IC_OPSIZE;
-- 
1.8.3.1



-- 
dwmw2

-------------- next part --------------
A non-text attachment was scrubbed...
Name: smime.p7s
Type: application/x-pkcs7-signature
Size: 5745 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20140116/88a7b9e5/attachment.bin>


More information about the llvm-dev mailing list