[llvm] a9f9ceb - [X86] Use correct padding when in 16-bit mode

Thu Feb 25 20:05:54 PST 2021

Author: Bill Wendling
Date: 2021-02-25T20:05:45-08:00
New Revision: a9f9ceb35f8117ca2c969dfa6614412e00797d45

URL: https://github.com/llvm/llvm-project/commit/a9f9ceb35f8117ca2c969dfa6614412e00797d45
DIFF: https://github.com/llvm/llvm-project/commit/a9f9ceb35f8117ca2c969dfa6614412e00797d45.diff

LOG: [X86] Use correct padding when in 16-bit mode

In 16-bit mode, some of the nop patterns used in 32-bit mode can end up
mangling other instructions. For instance, an aligned "movz" instruction
may have the 0x66 and 0x67 prefixes omitted, because the nop that's used
messes things up.

       xorl    %ebx, %ebx
       .p2align 4, 0x90
       movzbl  (%esi,%ebx), %ecx

Use instead nop patterns we know 16-bit mode can handle.

Differential Revision: https://reviews.llvm.org/D97268

Added: 
    llvm/test/MC/X86/code16gcc-align.s

Modified: 
    llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 95012a148d83..73a77b8e99e7 100644

--- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -1073,6 +1073,8 @@ void X86AsmBackend::finishLayout(MCAssembler const &Asm,
 }
 
 unsigned X86AsmBackend::getMaximumNopSize() const {
+  if (STI.hasFeature(X86::Mode16Bit))
+    return 4;
   if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Mode64Bit))
     return 1;
   if (STI.getFeatureBits()[X86::FeatureFast7ByteNOP])
@@ -1091,29 +1093,44 @@ unsigned X86AsmBackend::getMaximumNopSize() const {
 /// bytes.
 /// \return - true on success, false on failure
 bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
-  static const char Nops[10][11] = {
-    // nop
-    "\x90",
-    // xchg %ax,%ax
-    "\x66\x90",
-    // nopl (%[re]ax)
-    "\x0f\x1f\x00",
-    // nopl 0(%[re]ax)
-    "\x0f\x1f\x40\x00",
-    // nopl 0(%[re]ax,%[re]ax,1)
-    "\x0f\x1f\x44\x00\x00",
-    // nopw 0(%[re]ax,%[re]ax,1)
-    "\x66\x0f\x1f\x44\x00\x00",
-    // nopl 0L(%[re]ax)
-    "\x0f\x1f\x80\x00\x00\x00\x00",
-    // nopl 0L(%[re]ax,%[re]ax,1)
-    "\x0f\x1f\x84\x00\x00\x00\x00\x00",
-    // nopw 0L(%[re]ax,%[re]ax,1)
-    "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00",
-    // nopw %cs:0L(%[re]ax,%[re]ax,1)
-    "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00",
+  static const char Nops32Bit[10][11] = {
+      // nop
+      "\x90",
+      // xchg %ax,%ax
+      "\x66\x90",
+      // nopl (%[re]ax)
+      "\x0f\x1f\x00",
+      // nopl 0(%[re]ax)
+      "\x0f\x1f\x40\x00",
+      // nopl 0(%[re]ax,%[re]ax,1)
+      "\x0f\x1f\x44\x00\x00",
+      // nopw 0(%[re]ax,%[re]ax,1)
+      "\x66\x0f\x1f\x44\x00\x00",
+      // nopl 0L(%[re]ax)
+      "\x0f\x1f\x80\x00\x00\x00\x00",
+      // nopl 0L(%[re]ax,%[re]ax,1)
+      "\x0f\x1f\x84\x00\x00\x00\x00\x00",
+      // nopw 0L(%[re]ax,%[re]ax,1)
+      "\x66\x0f\x1f\x84\x00\x00\x00\x00\x00",
+      // nopw %cs:0L(%[re]ax,%[re]ax,1)
+      "\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00",
+  };
+
+  // 16-bit mode uses 
diff erent nop patterns than 32-bit.
+  static const char Nops16Bit[4][11] = {
+      // nop
+      "\x90",
+      // xchg %eax,%eax
+      "\x66\x90",
+      // lea 0(%si),%si
+      "\x8d\x74\x00",
+      // lea 0w(%si),%si
+      "\x8d\xb4\x00\x00",
   };
 
+  const char(*Nops)[11] =
+      STI.getFeatureBits()[X86::Mode16Bit] ? Nops16Bit : Nops32Bit;
+
   uint64_t MaxNopLength = (uint64_t)getMaximumNopSize();
 
   // Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining

diff  --git a/llvm/test/MC/X86/code16gcc-align.s b/llvm/test/MC/X86/code16gcc-align.s
new file mode 100644
index 000000000000..657364a60427
--- /dev/null
+++ b/llvm/test/MC/X86/code16gcc-align.s
@@ -0,0 +1,40 @@
+# RUN: llvm-mc -filetype=obj -triple=i386-unknown-unknown-code16 %s | llvm-objdump --triple=i386-unknown-unknown-code16 -d - | FileCheck %s
+
+# Ensure that the "movzbl" is aligned such that the prefixes 0x67 0x66 are
+# properly included in the "movz" instruction.
+
+# CHECK-LABEL: <test>:
+# CHECK:           1c: 8d b4 00 00                  	leaw	(%si), %si
+# CHECK-NEXT:      20: 66 90                        	nop
+# CHECK-NEXT:      22: 66 89 c7                     	movl	%eax, %edi
+# CHECK-NEXT:      25: 66 31 db                     	xorl	%ebx, %ebx
+# CHECK-NEXT:      28: 8d b4 00 00                  	leaw	(%si), %si
+# CHECK-NEXT:      2c: 8d b4 00 00                      leaw	(%si), %si
+# CHECK-NEXT:      30: 67 66 0f b6 0c 1e            	movzbl	(%esi,%ebx), %ecx
+# CHECK-NEXT:      36: 66 e8 14 00 00 00            	calll	0x50 <called>
+# CHECK-NEXT:      3c: 8d 74 00                     	leaw	(%si), %si
+
+# CHECK-LABEL: <called>:
+# CHECK-NEXT:      50: 90                           	nop
+# CHECK-NEXT:      51: 66 c3                        	retl
+
+	.text
+	.code16gcc
+	.globl	test
+	.p2align	4, 0x90
+	.type	test, at function
+test:
+	.nops	34
+	movl	%eax, %edi
+	xorl	%ebx, %ebx
+	.p2align	4, 0x90
+	movzbl	(%esi,%ebx), %ecx
+	calll	called
+	.nops	3
+	retl
+
+	.p2align	4, 0x90
+	.type	called, at function
+called:
+	.nops	1
+	retl