[PATCH v3 01/11] [x86] Fix ModR/M byte output for 16-bit addressing modes

Fri Dec 20 06:57:58 PST 2013

From: David Woodhouse <David.Woodhouse at intel.com>

http://llvm.org/bugs/show_bug.cgi?id=18220

Add some tests to validate correct register selection, including a fix
to an existing test which was requiring the *wrong* output.
---
 lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp | 60 ++++++++++++++++++++++++
 test/MC/X86/address-size.s                       |  8 +++-
 2 files changed, 67 insertions(+), 1 deletion(-)

diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 7952607..0e48a35 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -402,6 +402,66 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
 
   unsigned BaseRegNo = BaseReg ? GetX86RegNum(Base) : -1U;
 
+  // 16-bit addressing forms of the ModR/M byte have a different encoding for
+  // the R/M field and are far more limited in which registers can be used.
+  if (Is16BitMemOperand(MI, Op)) {
+    if (BaseReg) {
+      // For 32-bit addressing, the row and column values in Table 2-2 are
+      // basically the same. It's AX/CX/DX/BX/SP/BP/SI/DI in that order, with
+      // some special cases. And GetX86RegNum reflects that numbering.
+      // For 16-bit addressing it's more fun, as shown in the SDM Vol 2A,
+      // Table 2-1 "16-Bit Addressing Forms with the ModR/M byte". We can only
+      // use SI/DI/BP/BX, which have "row" values 4-7 in no particular order,
+      // while values 0-3 indicate the allowed combinations (base+index) of
+      // those: 0 for BX+SI, 1 for BX+DI, 2 for BP+SI, 3 for BP+DI.
+      //
+      // R16Table[] is a lookup from the normal RegNo, to the row values from
+      // Table 2-1 for 16-bit addressing modes. Where zero means disallowed.
+      static const int R16Table[] = { 0, 0, 0, 7, 0, 6, 4, 5 };
+      unsigned RMfield = R16Table[BaseRegNo];
+
+      assert(RMfield && "invalid 16-bit base register");
+
+      if (IndexReg.getReg()) {
+        unsigned IndexReg16 = R16Table[GetX86RegNum(IndexReg)];
+
+        assert(IndexReg16 && "invalid 16-bit index register");
+        // We must have one of SI/DI (4,5), and one of BP/BX (6,7).
+        assert(((IndexReg16 ^ RMfield) & 2) &&
+               "invalid 16-bit base/index register combination");
+        assert(Scale.getImm() == 1 &&
+               "invalid scale for 16-bit memory reference");
+
+        // Allow base/index to appear in either order (although GAS doesn't).
+        if (IndexReg16 & 2)
+          RMfield = (RMfield & 1) | ((7 - IndexReg16) << 1);
+        else
+          RMfield = (IndexReg16 & 1) | ((7 - RMfield) << 1);
+      }
+
+      if (Disp.isImm() && isDisp8(Disp.getImm())) {
+        // Use [REG]+disp8 form if we can, and for [BP] which cannot be encoded.
+        if (BaseRegNo == N86::EBP || Disp.getImm() != 0) {
+          EmitByte(ModRMByte(1, RegOpcodeField, RMfield), CurByte, OS);
+          EmitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups);
+          return;
+        } else {
+          // There is no displacement; just the register.
+          EmitByte(ModRMByte(0, RegOpcodeField, RMfield), CurByte, OS);
+          return;
+        }
+      }
+      EmitByte(ModRMByte(2, RegOpcodeField, RMfield), CurByte, OS);
+    } else {
+      // There is no BaseReg; this is the plain [disp16] case.
+      EmitByte(ModRMByte(0, RegOpcodeField, 6), CurByte, OS);
+    }
+
+    // Emit 16-bit displacement for plain disp16 or [REG]+disp16 cases.
+    EmitImmediate(Disp, MI.getLoc(), 2, FK_Data_2, CurByte, OS, Fixups);
+    return;
+  }
+
   // Determine whether a SIB byte is needed.
   // If no BaseReg, issue a RIP relative instruction only if the MCE can
   // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table
diff --git a/test/MC/X86/address-size.s b/test/MC/X86/address-size.s
index b105b40..936cd57 100644
--- a/test/MC/X86/address-size.s
+++ b/test/MC/X86/address-size.s
@@ -8,6 +8,12 @@
 
 	.code32
 	movb	$0x0, (%si)
-// CHECK: encoding: [0x67,0xc6,0x06,0x00]
+// CHECK: encoding: [0x67,0xc6,0x04,0x00]
 	movb	$0x0, (%esi)
 // CHECK: encoding: [0xc6,0x06,0x00]
+	movw	$0x1234, (%si)
+// CHECK: encoding: [0x67,0x66,0xc7,0x04,0x34,0x12]
+	movl	$0x12345678, (%bx,%si,1)
+// CHECK: encoding: [0x67,0xc7,0x00,0x78,0x56,0x34,0x12]
+	movw	$0x1234, 0x5678(%bp)
+// CHECK: encoding: [0x67,0x66,0xc7,0x86,0x78,0x56,0x34,0x12]
-- 
1.8.3.1