[llvm] r177014 - Fix a bug in the calculation of the VEX.B bit for FMA4 rr with the VEX.W bit set. The VEX.B was being calculated from the wrong operand. Fixes at least some portion of PR14185.

Thu Mar 14 00:40:52 PDT 2013

Author: ctopper
Date: Thu Mar 14 02:40:52 2013
New Revision: 177014

URL: http://llvm.org/viewvc/llvm-project?rev=177014&view=rev
Log:
Fix a bug in the calculation of the VEX.B bit for FMA4 rr with the VEX.W bit set. The VEX.B was being calculated from the wrong operand. Fixes at least some portion of PR14185.

Modified:
    llvm/trunk/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
    llvm/trunk/lib/Target/X86/X86CodeEmitter.cpp
    llvm/trunk/test/MC/X86/x86_64-fma4-encoding.s

Modified: llvm/trunk/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp?rev=177014&r1=177013&r2=177014&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp (original)
+++ llvm/trunk/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp Thu Mar 14 02:40:52 2013
@@ -446,6 +446,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePref
                                            raw_ostream &OS) const {
   bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
   bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
+  bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
 
   // VEX_R: opcode externsion equivalent to REX.R in
   // 1's complement (inverted) form
@@ -650,12 +651,19 @@ void X86MCCodeEmitter::EmitVEXOpcodePref
     //  dst(ModR/M), src1(ModR/M)
     //  dst(ModR/M), src1(ModR/M), imm8
     //
+    //  FMA4:
+    //  dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
+    //  dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M),
     if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
       VEX_R = 0x0;
     CurOp++;
 
     if (HasVEX_4V)
       VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+    if (HasMemOp4) // Skip second register source (encoded in I8IMM)
+      CurOp++;
+
     if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
       VEX_B = 0x0;
     CurOp++;

Modified: llvm/trunk/lib/Target/X86/X86CodeEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86CodeEmitter.cpp?rev=177014&r1=177013&r2=177014&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86CodeEmitter.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86CodeEmitter.cpp Thu Mar 14 02:40:52 2013
@@ -816,6 +816,7 @@ void Emitter<CodeEmitter>::emitVEXOpcode
                                                const MCInstrDesc *Desc) const {
   bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
   bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
+  bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
 
   // VEX_R: opcode externsion equivalent to REX.R in
   // 1's complement (inverted) form
@@ -1032,6 +1033,10 @@ void Emitter<CodeEmitter>::emitVEXOpcode
 
       if (HasVEX_4V)
         VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
+
+      if (MemOp4) // Skip second register source (encoded in I8IMM)
+        CurOp++;
+
       if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
         VEX_B = 0x0;
       CurOp++;

Modified: llvm/trunk/test/MC/X86/x86_64-fma4-encoding.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/x86_64-fma4-encoding.s?rev=177014&r1=177013&r2=177014&view=diff
==============================================================================
--- llvm/trunk/test/MC/X86/x86_64-fma4-encoding.s (original)
+++ llvm/trunk/test/MC/X86/x86_64-fma4-encoding.s Thu Mar 14 02:40:52 2013
@@ -25,6 +25,10 @@
 // CHECK: encoding: [0xc4,0xe3,0xf9,0x6b,0xc2,0x10]
           vfmaddsd   %xmm2, %xmm1, %xmm0, %xmm0
 
+// CHECK: vfmaddsd   %xmm10, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xc3,0xf9,0x6b,0xc2,0x10]
+          vfmaddsd   %xmm10, %xmm1, %xmm0, %xmm0
+
 // CHECK: vfmaddps  (%rcx), %xmm1, %xmm0, %xmm0
 // CHECK: encoding: [0xc4,0xe3,0xf9,0x68,0x01,0x10]
           vfmaddps  (%rcx), %xmm1, %xmm0, %xmm0