[llvm] r311882 - The current version of LLVM X86 disassembler incorrectly interprets some possible sets of x86 prefixes. This patch is the first step to close PR7709 and PR17697. There will be next patch(es) to close relative PRs.

Andrew V. Tischenko via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 28 03:43:14 PDT 2017


Author: avt77
Date: Mon Aug 28 03:43:14 2017
New Revision: 311882

URL: http://llvm.org/viewvc/llvm-project?rev=311882&view=rev
Log:
The current version of LLVM X86 disassembler incorrectly interprets some possible sets of x86 prefixes. This patch is the first step to close PR7709 and PR17697. There will be next patch(es) to close relative PRs.
Differential Revision: https://reviews.llvm.org/D36788

M    lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
M    lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
A    test/MC/Disassembler/X86/prefixes-i386.s
A    test/MC/Disassembler/X86/prefixes-x86_64.s
M    test/MC/Disassembler/X86/prefixes.txt

Added:
    llvm/trunk/test/MC/Disassembler/X86/prefixes-i386.s
    llvm/trunk/test/MC/Disassembler/X86/prefixes-x86_64.s
Modified:
    llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
    llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
    llvm/trunk/test/MC/Disassembler/X86/prefixes.txt

Modified: llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp?rev=311882&r1=311881&r2=311882&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp (original)
+++ llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp Mon Aug 28 03:43:14 2017
@@ -277,6 +277,12 @@ static void dbgprintf(struct InternalIns
   insn->dlog(insn->dlogArg, buffer);
 }
 
+static bool isREX(struct InternalInstruction *insn, uint8_t prefix) {
+  if (insn->mode == MODE_64BIT)
+    return prefix >= 0x40 && prefix <= 0x4f;
+  return false;
+}
+
 /*
  * setPrefixPresent - Marks that a particular prefix is present at a particular
  *   location.
@@ -290,6 +296,38 @@ static void setPrefixPresent(struct Inte
                                     uint8_t prefix,
                                     uint64_t location)
 {
+  uint8_t nextByte;
+  switch (prefix) {
+  case 0xf2:
+  case 0xf3:
+    if (lookAtByte(insn, &nextByte))
+      break;
+    // TODO:
+    //  1. There could be several 0x66
+    //  2. if (nextByte == 0x66) and nextNextByte != 0x0f then
+    //      it's not mandatory prefix
+    //  3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need
+    //     0x0f exactly after it to be mandatory prefix
+    if (isREX(insn, nextByte) || nextByte == 0x0f || nextByte == 0x66) {
+      // The last of 0xf2 /0xf3 is mandatory prefix
+      insn->mandatory_prefix = prefix;
+      insn->necessaryPrefixLocation = location;
+      break;
+    }
+    break;
+  case 0x66:
+    if (lookAtByte(insn, &nextByte))
+      break;
+    // 0x66 can't overwrite existing mandatory prefix and should be ignored
+    if (!insn->mandatory_prefix &&
+        (nextByte == 0x0f || isREX(insn, nextByte))) {
+      insn->mandatory_prefix = prefix;
+      insn->necessaryPrefixLocation = location;
+    }
+    break;
+  }
+  if (!insn->mandatory_prefix)
+    insn->necessaryPrefixLocation = location;
   insn->prefixPresent[prefix] = 1;
   insn->prefixLocations[prefix] = location;
 }
@@ -343,13 +381,10 @@ static int readPrefixes(struct InternalI
      * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
      * break and let it be disassembled as a normal "instruction".
      */
-    if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0)
+    if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK
       break;
 
-    if (insn->readerCursor - 1 == insn->startLocation
-        && (byte == 0xf2 || byte == 0xf3)
-        && !lookAtByte(insn, &nextByte))
-    {
+    if ((byte == 0xf2 || byte == 0xf3) && !lookAtByte(insn, &nextByte)) {
       /*
        * If the byte is 0xf2 or 0xf3, and any of the following conditions are
        * met:
@@ -357,9 +392,8 @@ static int readPrefixes(struct InternalI
        * - it is followed by an xchg instruction
        * then it should be disassembled as a xacquire/xrelease not repne/rep.
        */
-      if ((byte == 0xf2 || byte == 0xf3) &&
-          ((nextByte == 0xf0) ||
-          ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90)))
+      if (((nextByte == 0xf0) ||
+           ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90)))
         insn->xAcquireRelease = true;
       /*
        * Also if the byte is 0xf3, and the following condition is met:
@@ -378,7 +412,13 @@ static int readPrefixes(struct InternalI
           return -1;
         unconsumeByte(insn);
       }
-      if (nextByte != 0x0f && nextByte != 0x90)
+      // If the current byte can't be mandatory prefix then it's a simple repeat
+      // prefix and should be elaborated as a separated instr
+      if (nextByte != 0x0f &&
+          // We can have f2 f3 f2 66 f2 0f (in any order) as a valid set
+          // of prefixes with the last one as a mandatory prefix (SSE/SSE2...)
+          nextByte != 0xf2 && nextByte != 0xf3 && nextByte != 0x66 &&
+          nextByte != 0x90 && !isREX(insn, nextByte))
         break;
     }
 
@@ -426,11 +466,13 @@ static int readPrefixes(struct InternalI
       setPrefixPresent(insn, byte, prefixLocation);
       break;
     case 0x66:  /* Operand-size override */
-      if (prefixGroups[2])
-        dbgprintf(insn, "Redundant Group 3 prefix");
-      prefixGroups[2] = true;
-      hasOpSize = true;
       setPrefixPresent(insn, byte, prefixLocation);
+      if (!insn->mandatory_prefix) {
+        if (prefixGroups[2])
+          dbgprintf(insn, "Redundant Group 3 prefix");
+        prefixGroups[2] = true;
+      }
+      hasOpSize = true;
       break;
     case 0x67:  /* Address-size override */
       if (prefixGroups[3])
@@ -624,10 +666,8 @@ static int readPrefixes(struct InternalI
         insn->necessaryPrefixLocation = insn->readerCursor - 2;
 
         dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
-      } else {
+      } else
         unconsumeByte(insn);
-        insn->necessaryPrefixLocation = insn->readerCursor - 1;
-      }
     } else {
       unconsumeByte(insn);
       insn->necessaryPrefixLocation = insn->readerCursor - 1;
@@ -950,8 +990,19 @@ static int getID(struct InternalInstruct
     } else {
       return -1;
     }
+  } else if (insn->mode != MODE_16BIT && !insn->mandatory_prefix) {
+    // If we don't have mandatory prefix we should use "standard" prefixes here
+    if (insn->prefixPresent[0x66])
+      attrMask |= ATTR_OPSIZE;
+    if (insn->prefixPresent[0x67])
+      attrMask |= ATTR_ADSIZE;
+    if (insn->prefixPresent[0xf2])
+      attrMask |= ATTR_XD;
+    if (insn->prefixPresent[0xf3])
+      attrMask |= ATTR_XS;
   } else {
-    if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
+    if (insn->mode != MODE_16BIT &&
+        isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
       attrMask |= ATTR_OPSIZE;
     else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation))
       attrMask |= ATTR_ADSIZE;

Modified: llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h?rev=311882&r1=311881&r2=311882&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h (original)
+++ llvm/trunk/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h Mon Aug 28 03:43:14 2017
@@ -546,10 +546,13 @@ struct InternalInstruction {
 
   // Prefix state
 
+  // TODO: we're able to get rid off the following 2 arrays
   // 1 if the prefix byte corresponding to the entry is present; 0 if not
   uint8_t prefixPresent[0x100];
   // contains the location (for use with the reader) of the prefix byte
   uint64_t prefixLocations[0x100];
+  // The possible mandatory prefix
+  uint8_t mandatory_prefix;
   // The value of the vector extension prefix(EVEX/VEX/XOP), if present
   uint8_t vectorExtensionPrefix[4];
   // The type of the vector extension prefix

Added: llvm/trunk/test/MC/Disassembler/X86/prefixes-i386.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/Disassembler/X86/prefixes-i386.s?rev=311882&view=auto
==============================================================================
--- llvm/trunk/test/MC/Disassembler/X86/prefixes-i386.s (added)
+++ llvm/trunk/test/MC/Disassembler/X86/prefixes-i386.s Mon Aug 28 03:43:14 2017
@@ -0,0 +1,68 @@
+// RUN: llvm-mc -disassemble %s -triple=i386-apple-darwin9
+
+// CHECK:  movl    %fs:24, %eax
+0x64 0xa1 0x18 0x00 0x00 0x00 # mov eax, dword ptr fs:[18h]
+
+# CHECK: rep
+# CHECK-NEXT: insb    %dx, %es:(%rdi)
+0xf3 0x6c #rep ins
+# CHECK: rep
+# CHECK-NEXT: insl    %dx, %es:(%rdi)
+0xf3 0x6d #rep ins
+# CHECK: rep
+# CHECK-NEXT: movsb   (%rsi), %es:(%rdi)
+0xf3 0xa4 #rep movs
+# CHECK: rep
+# CHECK-NEXT: movsl   (%rsi), %es:(%rdi)
+0xf3 0xa5 #rep movs
+# CHECK: rep
+# CHECK-NEXT: outsb   (%rsi), %dx
+0xf3 0x6e #rep outs
+# CHECK: rep
+# CHECK-NEXT: outsl   (%rsi), %dx
+0xf3 0x6f #rep outs
+# CHECK: rep
+# CHECK-NEXT: lodsb   (%rsi), %al
+0xf3 0xac #rep lods
+# CHECK: rep
+# CHECK-NEXT: lodsl   (%rsi), %eax
+0xf3 0xad #rep lods
+# CHECK: rep
+# CHECK-NEXT: stosb   %al, %es:(%rdi)
+0xf3 0xaa #rep stos
+# CHECK: rep
+# CHECK-NEXT: stosl   %eax, %es:(%rdi)
+0xf3 0xab #rep stos
+# CHECK: rep
+# CHECK-NEXT: cmpsb   %es:(%rdi), (%rsi)
+0xf3 0xa6 #rep cmps
+# CHECK: rep
+# CHECK-NEXT: cmpsl   %es:(%rdi), (%rsi)
+0xf3 0xa7 #repe cmps
+# CHECK: rep
+# CHECK-NEXT: scasb   %es:(%rdi), %al
+0xf3 0xae #repe scas
+# CHECK: rep
+# CHECK-NEXT: scasl   %es:(%rdi), %eax
+0xf3 0xaf #repe scas
+# CHECK: repne
+# CHECK-NEXT: cmpsb   %es:(%rdi), (%rsi)
+0xf2 0xa6 #repne cmps
+# CHECK: repne
+# CHECK-NEXT: cmpsl   %es:(%rdi), (%rsi)
+0xf2 0xa7 #repne cmps
+# CHECK: repne
+# CHECK-NEXT: scasb   %es:(%rdi), %al
+0xf2 0xae #repne scas
+# CHECK: repne
+# CHECK-NEXT: scasl   %es:(%rdi), %eax
+0xf2 0xaf #repne scas
+
+// CHECK: mulsd   %xmm7, %xmm7
+0x66 0xF3 0xF2 0x0F 0x59 0xFF
+// CHECK: mulss   %xmm7, %xmm7
+0x66 0xF2 0xF3 0x0F 0x59 0xFF
+// CHECK: mulpd   %xmm7, %xmm7
+0x66 0x0F 0x59 0xFF
+// CHECK: mulsd   %xmm7, %xmm7
+0xf2 0x66 0x0f 0x59 0xff

Added: llvm/trunk/test/MC/Disassembler/X86/prefixes-x86_64.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/Disassembler/X86/prefixes-x86_64.s?rev=311882&view=auto
==============================================================================
--- llvm/trunk/test/MC/Disassembler/X86/prefixes-x86_64.s (added)
+++ llvm/trunk/test/MC/Disassembler/X86/prefixes-x86_64.s Mon Aug 28 03:43:14 2017
@@ -0,0 +1,10 @@
+// RUN: llvm-mc -disassemble %s -triple=x86_64-apple-darwin9
+
+// CHECK: mulsd   %xmm7, %xmm7
+0x66 0xF3 0xF2 0x0F 0x59 0xFF
+// CHECK: mulss   %xmm7, %xmm7
+0x66 0xF2 0xF3 0x0F 0x59 0xFF
+// CHECK: mulpd   %xmm7, %xmm7
+0x66 0x0F 0x59 0xFF
+// CHECK: mulsd   %xmm7, %xmm7
+0xf2 0x66 0x0f 0x59 0xff

Modified: llvm/trunk/test/MC/Disassembler/X86/prefixes.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/Disassembler/X86/prefixes.txt?rev=311882&r1=311881&r2=311882&view=diff
==============================================================================
--- llvm/trunk/test/MC/Disassembler/X86/prefixes.txt (original)
+++ llvm/trunk/test/MC/Disassembler/X86/prefixes.txt Mon Aug 28 03:43:14 2017
@@ -1,5 +1,60 @@
 # RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s
 
+# CHECK: rep
+# CHECK-NEXT: insb    %dx, %es:(%rdi)
+0xf3 0x6c #rep ins
+# CHECK: rep
+# CHECK-NEXT: insl    %dx, %es:(%rdi)
+0xf3 0x6d #rep ins
+# CHECK: rep
+# CHECK-NEXT: movsb   (%rsi), %es:(%rdi)
+0xf3 0xa4 #rep movs
+# CHECK: rep
+# CHECK-NEXT: movsl   (%rsi), %es:(%rdi)
+0xf3 0xa5 #rep movs
+# CHECK: rep
+# CHECK-NEXT: outsb   (%rsi), %dx
+0xf3 0x6e #rep outs
+# CHECK: rep
+# CHECK-NEXT: outsl   (%rsi), %dx
+0xf3 0x6f #rep outs
+# CHECK: rep
+# CHECK-NEXT: lodsb   (%rsi), %al
+0xf3 0xac #rep lods
+# CHECK: rep
+# CHECK-NEXT: lodsl   (%rsi), %eax
+0xf3 0xad #rep lods
+# CHECK: rep
+# CHECK-NEXT: stosb   %al, %es:(%rdi)
+0xf3 0xaa #rep stos
+# CHECK: rep
+# CHECK-NEXT: stosl   %eax, %es:(%rdi)
+0xf3 0xab #rep stos
+# CHECK: rep
+# CHECK-NEXT: cmpsb   %es:(%rdi), (%rsi)
+0xf3 0xa6 #rep cmps
+# CHECK: rep
+# CHECK-NEXT: cmpsl   %es:(%rdi), (%rsi)
+0xf3 0xa7 #repe cmps
+# CHECK: rep
+# CHECK-NEXT: scasb   %es:(%rdi), %al
+0xf3 0xae #repe scas
+# CHECK: rep
+# CHECK-NEXT: scasl   %es:(%rdi), %eax
+0xf3 0xaf #repe scas
+# CHECK: repne
+# CHECK-NEXT: cmpsb   %es:(%rdi), (%rsi)
+0xf2 0xa6 #repne cmps
+# CHECK: repne
+# CHECK-NEXT: cmpsl   %es:(%rdi), (%rsi)
+0xf2 0xa7 #repne cmps
+# CHECK: repne
+# CHECK-NEXT: scasb   %es:(%rdi), %al
+0xf2 0xae #repne scas
+# CHECK: repne
+# CHECK-NEXT: scasl   %es:(%rdi), %eax
+0xf2 0xaf #repne scas
+
 # CHECK: lock
 # CHECK-NEXT:	orl	$16, %fs:776
 0xf0 0x64 0x83 0x0c 0x25 0x08 0x03 0x00 0x00 0x10
@@ -50,7 +105,6 @@
 
 # Test that multiple redundant prefixes work (redundant, but valid x86).
 # CHECK: rep
-# CHECK-NEXT: rep
 # CHECK-NEXT: stosq
 0xf3 0xf3 0x48 0xab
 




More information about the llvm-commits mailing list