[llvm-commits] [llvm] r75749 - in /llvm/trunk/lib/Target/X86: X86InstrInfo.cpp X86InstrInfo.h

Evan Cheng evan.cheng at apple.com
Tue Jul 14 23:10:07 PDT 2009


Author: evancheng
Date: Wed Jul 15 01:10:07 2009
New Revision: 75749

URL: http://llvm.org/viewvc/llvm-project?rev=75749&view=rev
Log:
Move load / store folding alignment require into the table(s).

Modified:
    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
    llvm/trunk/lib/Target/X86/X86InstrInfo.h

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=75749&r1=75748&r2=75749&view=diff

==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Wed Jul 15 01:10:07 2009
@@ -215,9 +215,10 @@
     unsigned RegOp = OpTbl2Addr[i][0];
     unsigned MemOp = OpTbl2Addr[i][1];
     if (!RegOp2MemOpTable2Addr.insert(std::make_pair((unsigned*)RegOp,
-                                                     MemOp)).second)
+                                               std::make_pair(MemOp,0))).second)
       assert(false && "Duplicated entries?");
-    unsigned AuxInfo = 0 | (1 << 4) | (1 << 5); // Index 0,folded load and store
+    // Index 0, folded load and store, no alignment requirement.
+    unsigned AuxInfo = 0 | (1 << 4) | (1 << 5);
     if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
                                                 std::make_pair(RegOp,
                                                               AuxInfo))).second)
@@ -225,93 +226,94 @@
   }
 
   // If the third value is 1, then it's folding either a load or a store.
-  static const unsigned OpTbl0[][3] = {
-    { X86::BT16ri8,     X86::BT16mi8, 1 },
-    { X86::BT32ri8,     X86::BT32mi8, 1 },
-    { X86::BT64ri8,     X86::BT64mi8, 1 },
-    { X86::CALL32r,     X86::CALL32m, 1 },
-    { X86::CALL64r,     X86::CALL64m, 1 },
-    { X86::CMP16ri,     X86::CMP16mi, 1 },
-    { X86::CMP16ri8,    X86::CMP16mi8, 1 },
-    { X86::CMP16rr,     X86::CMP16mr, 1 },
-    { X86::CMP32ri,     X86::CMP32mi, 1 },
-    { X86::CMP32ri8,    X86::CMP32mi8, 1 },
-    { X86::CMP32rr,     X86::CMP32mr, 1 },
-    { X86::CMP64ri32,   X86::CMP64mi32, 1 },
-    { X86::CMP64ri8,    X86::CMP64mi8, 1 },
-    { X86::CMP64rr,     X86::CMP64mr, 1 },
-    { X86::CMP8ri,      X86::CMP8mi, 1 },
-    { X86::CMP8rr,      X86::CMP8mr, 1 },
-    { X86::DIV16r,      X86::DIV16m, 1 },
-    { X86::DIV32r,      X86::DIV32m, 1 },
-    { X86::DIV64r,      X86::DIV64m, 1 },
-    { X86::DIV8r,       X86::DIV8m, 1 },
-    { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0 },
-    { X86::FsMOVAPDrr,  X86::MOVSDmr, 0 },
-    { X86::FsMOVAPSrr,  X86::MOVSSmr, 0 },
-    { X86::IDIV16r,     X86::IDIV16m, 1 },
-    { X86::IDIV32r,     X86::IDIV32m, 1 },
-    { X86::IDIV64r,     X86::IDIV64m, 1 },
-    { X86::IDIV8r,      X86::IDIV8m, 1 },
-    { X86::IMUL16r,     X86::IMUL16m, 1 },
-    { X86::IMUL32r,     X86::IMUL32m, 1 },
-    { X86::IMUL64r,     X86::IMUL64m, 1 },
-    { X86::IMUL8r,      X86::IMUL8m, 1 },
-    { X86::JMP32r,      X86::JMP32m, 1 },
-    { X86::JMP64r,      X86::JMP64m, 1 },
-    { X86::MOV16ri,     X86::MOV16mi, 0 },
-    { X86::MOV16rr,     X86::MOV16mr, 0 },
-    { X86::MOV32ri,     X86::MOV32mi, 0 },
-    { X86::MOV32rr,     X86::MOV32mr, 0 },
-    { X86::MOV64ri32,   X86::MOV64mi32, 0 },
-    { X86::MOV64rr,     X86::MOV64mr, 0 },
-    { X86::MOV8ri,      X86::MOV8mi, 0 },
-    { X86::MOV8rr,      X86::MOV8mr, 0 },
-    { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, 0 },
-    { X86::MOVAPDrr,    X86::MOVAPDmr, 0 },
-    { X86::MOVAPSrr,    X86::MOVAPSmr, 0 },
-    { X86::MOVDQArr,    X86::MOVDQAmr, 0 },
-    { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0 },
-    { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0 },
-    { X86::MOVPS2SSrr,  X86::MOVPS2SSmr, 0 },
-    { X86::MOVSDrr,     X86::MOVSDmr, 0 },
-    { X86::MOVSDto64rr, X86::MOVSDto64mr, 0 },
-    { X86::MOVSS2DIrr,  X86::MOVSS2DImr, 0 },
-    { X86::MOVSSrr,     X86::MOVSSmr, 0 },
-    { X86::MOVUPDrr,    X86::MOVUPDmr, 0 },
-    { X86::MOVUPSrr,    X86::MOVUPSmr, 0 },
-    { X86::MUL16r,      X86::MUL16m, 1 },
-    { X86::MUL32r,      X86::MUL32m, 1 },
-    { X86::MUL64r,      X86::MUL64m, 1 },
-    { X86::MUL8r,       X86::MUL8m, 1 },
-    { X86::SETAEr,      X86::SETAEm, 0 },
-    { X86::SETAr,       X86::SETAm, 0 },
-    { X86::SETBEr,      X86::SETBEm, 0 },
-    { X86::SETBr,       X86::SETBm, 0 },
-    { X86::SETEr,       X86::SETEm, 0 },
-    { X86::SETGEr,      X86::SETGEm, 0 },
-    { X86::SETGr,       X86::SETGm, 0 },
-    { X86::SETLEr,      X86::SETLEm, 0 },
-    { X86::SETLr,       X86::SETLm, 0 },
-    { X86::SETNEr,      X86::SETNEm, 0 },
-    { X86::SETNOr,      X86::SETNOm, 0 },
-    { X86::SETNPr,      X86::SETNPm, 0 },
-    { X86::SETNSr,      X86::SETNSm, 0 },
-    { X86::SETOr,       X86::SETOm, 0 },
-    { X86::SETPr,       X86::SETPm, 0 },
-    { X86::SETSr,       X86::SETSm, 0 },
-    { X86::TAILJMPr,    X86::TAILJMPm, 1 },
-    { X86::TEST16ri,    X86::TEST16mi, 1 },
-    { X86::TEST32ri,    X86::TEST32mi, 1 },
-    { X86::TEST64ri32,  X86::TEST64mi32, 1 },
-    { X86::TEST8ri,     X86::TEST8mi, 1 }
+  static const unsigned OpTbl0[][4] = {
+    { X86::BT16ri8,     X86::BT16mi8, 1, 0 },
+    { X86::BT32ri8,     X86::BT32mi8, 1, 0 },
+    { X86::BT64ri8,     X86::BT64mi8, 1, 0 },
+    { X86::CALL32r,     X86::CALL32m, 1, 0 },
+    { X86::CALL64r,     X86::CALL64m, 1, 0 },
+    { X86::CMP16ri,     X86::CMP16mi, 1, 0 },
+    { X86::CMP16ri8,    X86::CMP16mi8, 1, 0 },
+    { X86::CMP16rr,     X86::CMP16mr, 1, 0 },
+    { X86::CMP32ri,     X86::CMP32mi, 1, 0 },
+    { X86::CMP32ri8,    X86::CMP32mi8, 1, 0 },
+    { X86::CMP32rr,     X86::CMP32mr, 1, 0 },
+    { X86::CMP64ri32,   X86::CMP64mi32, 1, 0 },
+    { X86::CMP64ri8,    X86::CMP64mi8, 1, 0 },
+    { X86::CMP64rr,     X86::CMP64mr, 1, 0 },
+    { X86::CMP8ri,      X86::CMP8mi, 1, 0 },
+    { X86::CMP8rr,      X86::CMP8mr, 1, 0 },
+    { X86::DIV16r,      X86::DIV16m, 1, 0 },
+    { X86::DIV32r,      X86::DIV32m, 1, 0 },
+    { X86::DIV64r,      X86::DIV64m, 1, 0 },
+    { X86::DIV8r,       X86::DIV8m, 1, 0 },
+    { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0, 16 },
+    { X86::FsMOVAPDrr,  X86::MOVSDmr, 0, 0 },
+    { X86::FsMOVAPSrr,  X86::MOVSSmr, 0, 0 },
+    { X86::IDIV16r,     X86::IDIV16m, 1, 0 },
+    { X86::IDIV32r,     X86::IDIV32m, 1, 0 },
+    { X86::IDIV64r,     X86::IDIV64m, 1, 0 },
+    { X86::IDIV8r,      X86::IDIV8m, 1, 0 },
+    { X86::IMUL16r,     X86::IMUL16m, 1, 0 },
+    { X86::IMUL32r,     X86::IMUL32m, 1, 0 },
+    { X86::IMUL64r,     X86::IMUL64m, 1, 0 },
+    { X86::IMUL8r,      X86::IMUL8m, 1, 0 },
+    { X86::JMP32r,      X86::JMP32m, 1, 0 },
+    { X86::JMP64r,      X86::JMP64m, 1, 0 },
+    { X86::MOV16ri,     X86::MOV16mi, 0, 0 },
+    { X86::MOV16rr,     X86::MOV16mr, 0, 0 },
+    { X86::MOV32ri,     X86::MOV32mi, 0, 0 },
+    { X86::MOV32rr,     X86::MOV32mr, 0, 0 },
+    { X86::MOV64ri32,   X86::MOV64mi32, 0, 0 },
+    { X86::MOV64rr,     X86::MOV64mr, 0, 0 },
+    { X86::MOV8ri,      X86::MOV8mi, 0, 0 },
+    { X86::MOV8rr,      X86::MOV8mr, 0, 0 },
+    { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, 0, 0 },
+    { X86::MOVAPDrr,    X86::MOVAPDmr, 0, 16 },
+    { X86::MOVAPSrr,    X86::MOVAPSmr, 0, 16 },
+    { X86::MOVDQArr,    X86::MOVDQAmr, 0, 16 },
+    { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 },
+    { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 },
+    { X86::MOVPS2SSrr,  X86::MOVPS2SSmr, 0, 0 },
+    { X86::MOVSDrr,     X86::MOVSDmr, 0, 0 },
+    { X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 },
+    { X86::MOVSS2DIrr,  X86::MOVSS2DImr, 0, 0 },
+    { X86::MOVSSrr,     X86::MOVSSmr, 0, 0 },
+    { X86::MOVUPDrr,    X86::MOVUPDmr, 0, 0 },
+    { X86::MOVUPSrr,    X86::MOVUPSmr, 0, 0 },
+    { X86::MUL16r,      X86::MUL16m, 1, 0 },
+    { X86::MUL32r,      X86::MUL32m, 1, 0 },
+    { X86::MUL64r,      X86::MUL64m, 1, 0 },
+    { X86::MUL8r,       X86::MUL8m, 1, 0 },
+    { X86::SETAEr,      X86::SETAEm, 0, 0 },
+    { X86::SETAr,       X86::SETAm, 0, 0 },
+    { X86::SETBEr,      X86::SETBEm, 0, 0 },
+    { X86::SETBr,       X86::SETBm, 0, 0 },
+    { X86::SETEr,       X86::SETEm, 0, 0 },
+    { X86::SETGEr,      X86::SETGEm, 0, 0 },
+    { X86::SETGr,       X86::SETGm, 0, 0 },
+    { X86::SETLEr,      X86::SETLEm, 0, 0 },
+    { X86::SETLr,       X86::SETLm, 0, 0 },
+    { X86::SETNEr,      X86::SETNEm, 0, 0 },
+    { X86::SETNOr,      X86::SETNOm, 0, 0 },
+    { X86::SETNPr,      X86::SETNPm, 0, 0 },
+    { X86::SETNSr,      X86::SETNSm, 0, 0 },
+    { X86::SETOr,       X86::SETOm, 0, 0 },
+    { X86::SETPr,       X86::SETPm, 0, 0 },
+    { X86::SETSr,       X86::SETSm, 0, 0 },
+    { X86::TAILJMPr,    X86::TAILJMPm, 1, 0 },
+    { X86::TEST16ri,    X86::TEST16mi, 1, 0 },
+    { X86::TEST32ri,    X86::TEST32mi, 1, 0 },
+    { X86::TEST64ri32,  X86::TEST64mi32, 1, 0 },
+    { X86::TEST8ri,     X86::TEST8mi, 1, 0 }
   };
 
   for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
     unsigned RegOp = OpTbl0[i][0];
     unsigned MemOp = OpTbl0[i][1];
+    unsigned Align = OpTbl0[i][3];
     if (!RegOp2MemOpTable0.insert(std::make_pair((unsigned*)RegOp,
-                                                 MemOp)).second)
+                                           std::make_pair(MemOp,Align))).second)
       assert(false && "Duplicated entries?");
     unsigned FoldedLoad = OpTbl0[i][2];
     // Index 0, folded load or store.
@@ -322,338 +324,342 @@
         AmbEntries.push_back(MemOp);
   }
 
-  static const unsigned OpTbl1[][2] = {
-    { X86::CMP16rr,         X86::CMP16rm },
-    { X86::CMP32rr,         X86::CMP32rm },
-    { X86::CMP64rr,         X86::CMP64rm },
-    { X86::CMP8rr,          X86::CMP8rm },
-    { X86::CVTSD2SSrr,      X86::CVTSD2SSrm },
-    { X86::CVTSI2SD64rr,    X86::CVTSI2SD64rm },
-    { X86::CVTSI2SDrr,      X86::CVTSI2SDrm },
-    { X86::CVTSI2SS64rr,    X86::CVTSI2SS64rm },
-    { X86::CVTSI2SSrr,      X86::CVTSI2SSrm },
-    { X86::CVTSS2SDrr,      X86::CVTSS2SDrm },
-    { X86::CVTTSD2SI64rr,   X86::CVTTSD2SI64rm },
-    { X86::CVTTSD2SIrr,     X86::CVTTSD2SIrm },
-    { X86::CVTTSS2SI64rr,   X86::CVTTSS2SI64rm },
-    { X86::CVTTSS2SIrr,     X86::CVTTSS2SIrm },
-    { X86::FsMOVAPDrr,      X86::MOVSDrm },
-    { X86::FsMOVAPSrr,      X86::MOVSSrm },
-    { X86::IMUL16rri,       X86::IMUL16rmi },
-    { X86::IMUL16rri8,      X86::IMUL16rmi8 },
-    { X86::IMUL32rri,       X86::IMUL32rmi },
-    { X86::IMUL32rri8,      X86::IMUL32rmi8 },
-    { X86::IMUL64rri32,     X86::IMUL64rmi32 },
-    { X86::IMUL64rri8,      X86::IMUL64rmi8 },
-    { X86::Int_CMPSDrr,     X86::Int_CMPSDrm },
-    { X86::Int_CMPSSrr,     X86::Int_CMPSSrm },
-    { X86::Int_COMISDrr,    X86::Int_COMISDrm },
-    { X86::Int_COMISSrr,    X86::Int_COMISSrm },
-    { X86::Int_CVTDQ2PDrr,  X86::Int_CVTDQ2PDrm },
-    { X86::Int_CVTDQ2PSrr,  X86::Int_CVTDQ2PSrm },
-    { X86::Int_CVTPD2DQrr,  X86::Int_CVTPD2DQrm },
-    { X86::Int_CVTPD2PSrr,  X86::Int_CVTPD2PSrm },
-    { X86::Int_CVTPS2DQrr,  X86::Int_CVTPS2DQrm },
-    { X86::Int_CVTPS2PDrr,  X86::Int_CVTPS2PDrm },
-    { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm },
-    { X86::Int_CVTSD2SIrr,  X86::Int_CVTSD2SIrm },
-    { X86::Int_CVTSD2SSrr,  X86::Int_CVTSD2SSrm },
-    { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm },
-    { X86::Int_CVTSI2SDrr,  X86::Int_CVTSI2SDrm },
-    { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm },
-    { X86::Int_CVTSI2SSrr,  X86::Int_CVTSI2SSrm },
-    { X86::Int_CVTSS2SDrr,  X86::Int_CVTSS2SDrm },
-    { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm },
-    { X86::Int_CVTSS2SIrr,  X86::Int_CVTSS2SIrm },
-    { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm },
-    { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm },
-    { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm },
-    { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm },
-    { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm },
-    { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm },
-    { X86::Int_UCOMISDrr,   X86::Int_UCOMISDrm },
-    { X86::Int_UCOMISSrr,   X86::Int_UCOMISSrm },
-    { X86::MOV16rr,         X86::MOV16rm },
-    { X86::MOV32rr,         X86::MOV32rm },
-    { X86::MOV64rr,         X86::MOV64rm },
-    { X86::MOV64toPQIrr,    X86::MOVQI2PQIrm },
-    { X86::MOV64toSDrr,     X86::MOV64toSDrm },
-    { X86::MOV8rr,          X86::MOV8rm },
-    { X86::MOVAPDrr,        X86::MOVAPDrm },
-    { X86::MOVAPSrr,        X86::MOVAPSrm },
-    { X86::MOVDDUPrr,       X86::MOVDDUPrm },
-    { X86::MOVDI2PDIrr,     X86::MOVDI2PDIrm },
-    { X86::MOVDI2SSrr,      X86::MOVDI2SSrm },
-    { X86::MOVDQArr,        X86::MOVDQArm },
-    { X86::MOVSD2PDrr,      X86::MOVSD2PDrm },
-    { X86::MOVSDrr,         X86::MOVSDrm },
-    { X86::MOVSHDUPrr,      X86::MOVSHDUPrm },
-    { X86::MOVSLDUPrr,      X86::MOVSLDUPrm },
-    { X86::MOVSS2PSrr,      X86::MOVSS2PSrm },
-    { X86::MOVSSrr,         X86::MOVSSrm },
-    { X86::MOVSX16rr8,      X86::MOVSX16rm8 },
-    { X86::MOVSX32rr16,     X86::MOVSX32rm16 },
-    { X86::MOVSX32rr8,      X86::MOVSX32rm8 },
-    { X86::MOVSX64rr16,     X86::MOVSX64rm16 },
-    { X86::MOVSX64rr32,     X86::MOVSX64rm32 },
-    { X86::MOVSX64rr8,      X86::MOVSX64rm8 },
-    { X86::MOVUPDrr,        X86::MOVUPDrm },
-    { X86::MOVUPSrr,        X86::MOVUPSrm },
-    { X86::MOVZDI2PDIrr,    X86::MOVZDI2PDIrm },
-    { X86::MOVZQI2PQIrr,    X86::MOVZQI2PQIrm },
-    { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm },
-    { X86::MOVZX16rr8,      X86::MOVZX16rm8 },
-    { X86::MOVZX32rr16,     X86::MOVZX32rm16 },
-    { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8 },
-    { X86::MOVZX32rr8,      X86::MOVZX32rm8 },
-    { X86::MOVZX64rr16,     X86::MOVZX64rm16 },
-    { X86::MOVZX64rr32,     X86::MOVZX64rm32 },
-    { X86::MOVZX64rr8,      X86::MOVZX64rm8 },
-    { X86::PSHUFDri,        X86::PSHUFDmi },
-    { X86::PSHUFHWri,       X86::PSHUFHWmi },
-    { X86::PSHUFLWri,       X86::PSHUFLWmi },
-    { X86::RCPPSr,          X86::RCPPSm },
-    { X86::RCPPSr_Int,      X86::RCPPSm_Int },
-    { X86::RSQRTPSr,        X86::RSQRTPSm },
-    { X86::RSQRTPSr_Int,    X86::RSQRTPSm_Int },
-    { X86::RSQRTSSr,        X86::RSQRTSSm },
-    { X86::RSQRTSSr_Int,    X86::RSQRTSSm_Int },
-    { X86::SQRTPDr,         X86::SQRTPDm },
-    { X86::SQRTPDr_Int,     X86::SQRTPDm_Int },
-    { X86::SQRTPSr,         X86::SQRTPSm },
-    { X86::SQRTPSr_Int,     X86::SQRTPSm_Int },
-    { X86::SQRTSDr,         X86::SQRTSDm },
-    { X86::SQRTSDr_Int,     X86::SQRTSDm_Int },
-    { X86::SQRTSSr,         X86::SQRTSSm },
-    { X86::SQRTSSr_Int,     X86::SQRTSSm_Int },
-    { X86::TEST16rr,        X86::TEST16rm },
-    { X86::TEST32rr,        X86::TEST32rm },
-    { X86::TEST64rr,        X86::TEST64rm },
-    { X86::TEST8rr,         X86::TEST8rm },
+  static const unsigned OpTbl1[][3] = {
+    { X86::CMP16rr,         X86::CMP16rm, 0 },
+    { X86::CMP32rr,         X86::CMP32rm, 0 },
+    { X86::CMP64rr,         X86::CMP64rm, 0 },
+    { X86::CMP8rr,          X86::CMP8rm, 0 },
+    { X86::CVTSD2SSrr,      X86::CVTSD2SSrm, 0 },
+    { X86::CVTSI2SD64rr,    X86::CVTSI2SD64rm, 0 },
+    { X86::CVTSI2SDrr,      X86::CVTSI2SDrm, 0 },
+    { X86::CVTSI2SS64rr,    X86::CVTSI2SS64rm, 0 },
+    { X86::CVTSI2SSrr,      X86::CVTSI2SSrm, 0 },
+    { X86::CVTSS2SDrr,      X86::CVTSS2SDrm, 0 },
+    { X86::CVTTSD2SI64rr,   X86::CVTTSD2SI64rm, 0 },
+    { X86::CVTTSD2SIrr,     X86::CVTTSD2SIrm, 0 },
+    { X86::CVTTSS2SI64rr,   X86::CVTTSS2SI64rm, 0 },
+    { X86::CVTTSS2SIrr,     X86::CVTTSS2SIrm, 0 },
+    { X86::FsMOVAPDrr,      X86::MOVSDrm, 0 },
+    { X86::FsMOVAPSrr,      X86::MOVSSrm, 0 },
+    { X86::IMUL16rri,       X86::IMUL16rmi, 0 },
+    { X86::IMUL16rri8,      X86::IMUL16rmi8, 0 },
+    { X86::IMUL32rri,       X86::IMUL32rmi, 0 },
+    { X86::IMUL32rri8,      X86::IMUL32rmi8, 0 },
+    { X86::IMUL64rri32,     X86::IMUL64rmi32, 0 },
+    { X86::IMUL64rri8,      X86::IMUL64rmi8, 0 },
+    { X86::Int_CMPSDrr,     X86::Int_CMPSDrm, 0 },
+    { X86::Int_CMPSSrr,     X86::Int_CMPSSrm, 0 },
+    { X86::Int_COMISDrr,    X86::Int_COMISDrm, 0 },
+    { X86::Int_COMISSrr,    X86::Int_COMISSrm, 0 },
+    { X86::Int_CVTDQ2PDrr,  X86::Int_CVTDQ2PDrm, 16 },
+    { X86::Int_CVTDQ2PSrr,  X86::Int_CVTDQ2PSrm, 16 },
+    { X86::Int_CVTPD2DQrr,  X86::Int_CVTPD2DQrm, 16 },
+    { X86::Int_CVTPD2PSrr,  X86::Int_CVTPD2PSrm, 16 },
+    { X86::Int_CVTPS2DQrr,  X86::Int_CVTPS2DQrm, 16 },
+    { X86::Int_CVTPS2PDrr,  X86::Int_CVTPS2PDrm, 0 },
+    { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm, 0 },
+    { X86::Int_CVTSD2SIrr,  X86::Int_CVTSD2SIrm, 0 },
+    { X86::Int_CVTSD2SSrr,  X86::Int_CVTSD2SSrm, 0 },
+    { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
+    { X86::Int_CVTSI2SDrr,  X86::Int_CVTSI2SDrm, 0 },
+    { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 },
+    { X86::Int_CVTSI2SSrr,  X86::Int_CVTSI2SSrm, 0 },
+    { X86::Int_CVTSS2SDrr,  X86::Int_CVTSS2SDrm, 0 },
+    { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm, 0 },
+    { X86::Int_CVTSS2SIrr,  X86::Int_CVTSS2SIrm, 0 },
+    { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm, 16 },
+    { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm, 16 },
+    { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 },
+    { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 },
+    { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 },
+    { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, 0 },
+    { X86::Int_UCOMISDrr,   X86::Int_UCOMISDrm, 0 },
+    { X86::Int_UCOMISSrr,   X86::Int_UCOMISSrm, 0 },
+    { X86::MOV16rr,         X86::MOV16rm, 0 },
+    { X86::MOV32rr,         X86::MOV32rm, 0 },
+    { X86::MOV64rr,         X86::MOV64rm, 0 },
+    { X86::MOV64toPQIrr,    X86::MOVQI2PQIrm, 0 },
+    { X86::MOV64toSDrr,     X86::MOV64toSDrm, 0 },
+    { X86::MOV8rr,          X86::MOV8rm, 0 },
+    { X86::MOVAPDrr,        X86::MOVAPDrm, 16 },
+    { X86::MOVAPSrr,        X86::MOVAPSrm, 16 },
+    { X86::MOVDDUPrr,       X86::MOVDDUPrm, 0 },
+    { X86::MOVDI2PDIrr,     X86::MOVDI2PDIrm, 0 },
+    { X86::MOVDI2SSrr,      X86::MOVDI2SSrm, 0 },
+    { X86::MOVDQArr,        X86::MOVDQArm, 16 },
+    { X86::MOVSD2PDrr,      X86::MOVSD2PDrm, 0 },
+    { X86::MOVSDrr,         X86::MOVSDrm, 0 },
+    { X86::MOVSHDUPrr,      X86::MOVSHDUPrm, 16 },
+    { X86::MOVSLDUPrr,      X86::MOVSLDUPrm, 16 },
+    { X86::MOVSS2PSrr,      X86::MOVSS2PSrm, 0 },
+    { X86::MOVSSrr,         X86::MOVSSrm, 0 },
+    { X86::MOVSX16rr8,      X86::MOVSX16rm8, 0 },
+    { X86::MOVSX32rr16,     X86::MOVSX32rm16, 0 },
+    { X86::MOVSX32rr8,      X86::MOVSX32rm8, 0 },
+    { X86::MOVSX64rr16,     X86::MOVSX64rm16, 0 },
+    { X86::MOVSX64rr32,     X86::MOVSX64rm32, 0 },
+    { X86::MOVSX64rr8,      X86::MOVSX64rm8, 0 },
+    { X86::MOVUPDrr,        X86::MOVUPDrm, 16 },
+    { X86::MOVUPSrr,        X86::MOVUPSrm, 16 },
+    { X86::MOVZDI2PDIrr,    X86::MOVZDI2PDIrm, 0 },
+    { X86::MOVZQI2PQIrr,    X86::MOVZQI2PQIrm, 0 },
+    { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, 16 },
+    { X86::MOVZX16rr8,      X86::MOVZX16rm8, 0 },
+    { X86::MOVZX32rr16,     X86::MOVZX32rm16, 0 },
+    { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 },
+    { X86::MOVZX32rr8,      X86::MOVZX32rm8, 0 },
+    { X86::MOVZX64rr16,     X86::MOVZX64rm16, 0 },
+    { X86::MOVZX64rr32,     X86::MOVZX64rm32, 0 },
+    { X86::MOVZX64rr8,      X86::MOVZX64rm8, 0 },
+    { X86::PSHUFDri,        X86::PSHUFDmi, 16 },
+    { X86::PSHUFHWri,       X86::PSHUFHWmi, 16 },
+    { X86::PSHUFLWri,       X86::PSHUFLWmi, 16 },
+    { X86::RCPPSr,          X86::RCPPSm, 16 },
+    { X86::RCPPSr_Int,      X86::RCPPSm_Int, 16 },
+    { X86::RSQRTPSr,        X86::RSQRTPSm, 16 },
+    { X86::RSQRTPSr_Int,    X86::RSQRTPSm_Int, 16 },
+    { X86::RSQRTSSr,        X86::RSQRTSSm, 0 },
+    { X86::RSQRTSSr_Int,    X86::RSQRTSSm_Int, 0 },
+    { X86::SQRTPDr,         X86::SQRTPDm, 16 },
+    { X86::SQRTPDr_Int,     X86::SQRTPDm_Int, 16 },
+    { X86::SQRTPSr,         X86::SQRTPSm, 16 },
+    { X86::SQRTPSr_Int,     X86::SQRTPSm_Int, 16 },
+    { X86::SQRTSDr,         X86::SQRTSDm, 0 },
+    { X86::SQRTSDr_Int,     X86::SQRTSDm_Int, 0 },
+    { X86::SQRTSSr,         X86::SQRTSSm, 0 },
+    { X86::SQRTSSr_Int,     X86::SQRTSSm_Int, 0 },
+    { X86::TEST16rr,        X86::TEST16rm, 0 },
+    { X86::TEST32rr,        X86::TEST32rm, 0 },
+    { X86::TEST64rr,        X86::TEST64rm, 0 },
+    { X86::TEST8rr,         X86::TEST8rm, 0 },
     // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0
-    { X86::UCOMISDrr,       X86::UCOMISDrm },
-    { X86::UCOMISSrr,       X86::UCOMISSrm }
+    { X86::UCOMISDrr,       X86::UCOMISDrm, 0 },
+    { X86::UCOMISSrr,       X86::UCOMISSrm, 0 }
   };
 
   for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
     unsigned RegOp = OpTbl1[i][0];
     unsigned MemOp = OpTbl1[i][1];
+    unsigned Align = OpTbl1[i][2];
     if (!RegOp2MemOpTable1.insert(std::make_pair((unsigned*)RegOp,
-                                                 MemOp)).second)
+                                           std::make_pair(MemOp,Align))).second)
       assert(false && "Duplicated entries?");
-    unsigned AuxInfo = 1 | (1 << 4); // Index 1, folded load
+    // Index 1, folded load
+    unsigned AuxInfo = 1 | (1 << 4);
     if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr)
       if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
                                      std::make_pair(RegOp, AuxInfo))).second)
         AmbEntries.push_back(MemOp);
   }
 
-  static const unsigned OpTbl2[][2] = {
-    { X86::ADC32rr,         X86::ADC32rm },
-    { X86::ADC64rr,         X86::ADC64rm },
-    { X86::ADD16rr,         X86::ADD16rm },
-    { X86::ADD32rr,         X86::ADD32rm },
-    { X86::ADD64rr,         X86::ADD64rm },
-    { X86::ADD8rr,          X86::ADD8rm },
-    { X86::ADDPDrr,         X86::ADDPDrm },
-    { X86::ADDPSrr,         X86::ADDPSrm },
-    { X86::ADDSDrr,         X86::ADDSDrm },
-    { X86::ADDSSrr,         X86::ADDSSrm },
-    { X86::ADDSUBPDrr,      X86::ADDSUBPDrm },
-    { X86::ADDSUBPSrr,      X86::ADDSUBPSrm },
-    { X86::AND16rr,         X86::AND16rm },
-    { X86::AND32rr,         X86::AND32rm },
-    { X86::AND64rr,         X86::AND64rm },
-    { X86::AND8rr,          X86::AND8rm },
-    { X86::ANDNPDrr,        X86::ANDNPDrm },
-    { X86::ANDNPSrr,        X86::ANDNPSrm },
-    { X86::ANDPDrr,         X86::ANDPDrm },
-    { X86::ANDPSrr,         X86::ANDPSrm },
-    { X86::CMOVA16rr,       X86::CMOVA16rm },
-    { X86::CMOVA32rr,       X86::CMOVA32rm },
-    { X86::CMOVA64rr,       X86::CMOVA64rm },
-    { X86::CMOVAE16rr,      X86::CMOVAE16rm },
-    { X86::CMOVAE32rr,      X86::CMOVAE32rm },
-    { X86::CMOVAE64rr,      X86::CMOVAE64rm },
-    { X86::CMOVB16rr,       X86::CMOVB16rm },
-    { X86::CMOVB32rr,       X86::CMOVB32rm },
-    { X86::CMOVB64rr,       X86::CMOVB64rm },
-    { X86::CMOVBE16rr,      X86::CMOVBE16rm },
-    { X86::CMOVBE32rr,      X86::CMOVBE32rm },
-    { X86::CMOVBE64rr,      X86::CMOVBE64rm },
-    { X86::CMOVE16rr,       X86::CMOVE16rm },
-    { X86::CMOVE32rr,       X86::CMOVE32rm },
-    { X86::CMOVE64rr,       X86::CMOVE64rm },
-    { X86::CMOVG16rr,       X86::CMOVG16rm },
-    { X86::CMOVG32rr,       X86::CMOVG32rm },
-    { X86::CMOVG64rr,       X86::CMOVG64rm },
-    { X86::CMOVGE16rr,      X86::CMOVGE16rm },
-    { X86::CMOVGE32rr,      X86::CMOVGE32rm },
-    { X86::CMOVGE64rr,      X86::CMOVGE64rm },
-    { X86::CMOVL16rr,       X86::CMOVL16rm },
-    { X86::CMOVL32rr,       X86::CMOVL32rm },
-    { X86::CMOVL64rr,       X86::CMOVL64rm },
-    { X86::CMOVLE16rr,      X86::CMOVLE16rm },
-    { X86::CMOVLE32rr,      X86::CMOVLE32rm },
-    { X86::CMOVLE64rr,      X86::CMOVLE64rm },
-    { X86::CMOVNE16rr,      X86::CMOVNE16rm },
-    { X86::CMOVNE32rr,      X86::CMOVNE32rm },
-    { X86::CMOVNE64rr,      X86::CMOVNE64rm },
-    { X86::CMOVNO16rr,      X86::CMOVNO16rm },
-    { X86::CMOVNO32rr,      X86::CMOVNO32rm },
-    { X86::CMOVNO64rr,      X86::CMOVNO64rm },
-    { X86::CMOVNP16rr,      X86::CMOVNP16rm },
-    { X86::CMOVNP32rr,      X86::CMOVNP32rm },
-    { X86::CMOVNP64rr,      X86::CMOVNP64rm },
-    { X86::CMOVNS16rr,      X86::CMOVNS16rm },
-    { X86::CMOVNS32rr,      X86::CMOVNS32rm },
-    { X86::CMOVNS64rr,      X86::CMOVNS64rm },
-    { X86::CMOVO16rr,       X86::CMOVO16rm },
-    { X86::CMOVO32rr,       X86::CMOVO32rm },
-    { X86::CMOVO64rr,       X86::CMOVO64rm },
-    { X86::CMOVP16rr,       X86::CMOVP16rm },
-    { X86::CMOVP32rr,       X86::CMOVP32rm },
-    { X86::CMOVP64rr,       X86::CMOVP64rm },
-    { X86::CMOVS16rr,       X86::CMOVS16rm },
-    { X86::CMOVS32rr,       X86::CMOVS32rm },
-    { X86::CMOVS64rr,       X86::CMOVS64rm },
-    { X86::CMPPDrri,        X86::CMPPDrmi },
-    { X86::CMPPSrri,        X86::CMPPSrmi },
-    { X86::CMPSDrr,         X86::CMPSDrm },
-    { X86::CMPSSrr,         X86::CMPSSrm },
-    { X86::DIVPDrr,         X86::DIVPDrm },
-    { X86::DIVPSrr,         X86::DIVPSrm },
-    { X86::DIVSDrr,         X86::DIVSDrm },
-    { X86::DIVSSrr,         X86::DIVSSrm },
-    { X86::FsANDNPDrr,      X86::FsANDNPDrm },
-    { X86::FsANDNPSrr,      X86::FsANDNPSrm },
-    { X86::FsANDPDrr,       X86::FsANDPDrm },
-    { X86::FsANDPSrr,       X86::FsANDPSrm },
-    { X86::FsORPDrr,        X86::FsORPDrm },
-    { X86::FsORPSrr,        X86::FsORPSrm },
-    { X86::FsXORPDrr,       X86::FsXORPDrm },
-    { X86::FsXORPSrr,       X86::FsXORPSrm },
-    { X86::HADDPDrr,        X86::HADDPDrm },
-    { X86::HADDPSrr,        X86::HADDPSrm },
-    { X86::HSUBPDrr,        X86::HSUBPDrm },
-    { X86::HSUBPSrr,        X86::HSUBPSrm },
-    { X86::IMUL16rr,        X86::IMUL16rm },
-    { X86::IMUL32rr,        X86::IMUL32rm },
-    { X86::IMUL64rr,        X86::IMUL64rm },
-    { X86::MAXPDrr,         X86::MAXPDrm },
-    { X86::MAXPDrr_Int,     X86::MAXPDrm_Int },
-    { X86::MAXPSrr,         X86::MAXPSrm },
-    { X86::MAXPSrr_Int,     X86::MAXPSrm_Int },
-    { X86::MAXSDrr,         X86::MAXSDrm },
-    { X86::MAXSDrr_Int,     X86::MAXSDrm_Int },
-    { X86::MAXSSrr,         X86::MAXSSrm },
-    { X86::MAXSSrr_Int,     X86::MAXSSrm_Int },
-    { X86::MINPDrr,         X86::MINPDrm },
-    { X86::MINPDrr_Int,     X86::MINPDrm_Int },
-    { X86::MINPSrr,         X86::MINPSrm },
-    { X86::MINPSrr_Int,     X86::MINPSrm_Int },
-    { X86::MINSDrr,         X86::MINSDrm },
-    { X86::MINSDrr_Int,     X86::MINSDrm_Int },
-    { X86::MINSSrr,         X86::MINSSrm },
-    { X86::MINSSrr_Int,     X86::MINSSrm_Int },
-    { X86::MULPDrr,         X86::MULPDrm },
-    { X86::MULPSrr,         X86::MULPSrm },
-    { X86::MULSDrr,         X86::MULSDrm },
-    { X86::MULSSrr,         X86::MULSSrm },
-    { X86::OR16rr,          X86::OR16rm },
-    { X86::OR32rr,          X86::OR32rm },
-    { X86::OR64rr,          X86::OR64rm },
-    { X86::OR8rr,           X86::OR8rm },
-    { X86::ORPDrr,          X86::ORPDrm },
-    { X86::ORPSrr,          X86::ORPSrm },
-    { X86::PACKSSDWrr,      X86::PACKSSDWrm },
-    { X86::PACKSSWBrr,      X86::PACKSSWBrm },
-    { X86::PACKUSWBrr,      X86::PACKUSWBrm },
-    { X86::PADDBrr,         X86::PADDBrm },
-    { X86::PADDDrr,         X86::PADDDrm },
-    { X86::PADDQrr,         X86::PADDQrm },
-    { X86::PADDSBrr,        X86::PADDSBrm },
-    { X86::PADDSWrr,        X86::PADDSWrm },
-    { X86::PADDWrr,         X86::PADDWrm },
-    { X86::PANDNrr,         X86::PANDNrm },
-    { X86::PANDrr,          X86::PANDrm },
-    { X86::PAVGBrr,         X86::PAVGBrm },
-    { X86::PAVGWrr,         X86::PAVGWrm },
-    { X86::PCMPEQBrr,       X86::PCMPEQBrm },
-    { X86::PCMPEQDrr,       X86::PCMPEQDrm },
-    { X86::PCMPEQWrr,       X86::PCMPEQWrm },
-    { X86::PCMPGTBrr,       X86::PCMPGTBrm },
-    { X86::PCMPGTDrr,       X86::PCMPGTDrm },
-    { X86::PCMPGTWrr,       X86::PCMPGTWrm },
-    { X86::PINSRWrri,       X86::PINSRWrmi },
-    { X86::PMADDWDrr,       X86::PMADDWDrm },
-    { X86::PMAXSWrr,        X86::PMAXSWrm },
-    { X86::PMAXUBrr,        X86::PMAXUBrm },
-    { X86::PMINSWrr,        X86::PMINSWrm },
-    { X86::PMINUBrr,        X86::PMINUBrm },
-    { X86::PMULDQrr,        X86::PMULDQrm },
-    { X86::PMULHUWrr,       X86::PMULHUWrm },
-    { X86::PMULHWrr,        X86::PMULHWrm },
-    { X86::PMULLDrr,        X86::PMULLDrm },
-    { X86::PMULLDrr_int,    X86::PMULLDrm_int },
-    { X86::PMULLWrr,        X86::PMULLWrm },
-    { X86::PMULUDQrr,       X86::PMULUDQrm },
-    { X86::PORrr,           X86::PORrm },
-    { X86::PSADBWrr,        X86::PSADBWrm },
-    { X86::PSLLDrr,         X86::PSLLDrm },
-    { X86::PSLLQrr,         X86::PSLLQrm },
-    { X86::PSLLWrr,         X86::PSLLWrm },
-    { X86::PSRADrr,         X86::PSRADrm },
-    { X86::PSRAWrr,         X86::PSRAWrm },
-    { X86::PSRLDrr,         X86::PSRLDrm },
-    { X86::PSRLQrr,         X86::PSRLQrm },
-    { X86::PSRLWrr,         X86::PSRLWrm },
-    { X86::PSUBBrr,         X86::PSUBBrm },
-    { X86::PSUBDrr,         X86::PSUBDrm },
-    { X86::PSUBSBrr,        X86::PSUBSBrm },
-    { X86::PSUBSWrr,        X86::PSUBSWrm },
-    { X86::PSUBWrr,         X86::PSUBWrm },
-    { X86::PUNPCKHBWrr,     X86::PUNPCKHBWrm },
-    { X86::PUNPCKHDQrr,     X86::PUNPCKHDQrm },
-    { X86::PUNPCKHQDQrr,    X86::PUNPCKHQDQrm },
-    { X86::PUNPCKHWDrr,     X86::PUNPCKHWDrm },
-    { X86::PUNPCKLBWrr,     X86::PUNPCKLBWrm },
-    { X86::PUNPCKLDQrr,     X86::PUNPCKLDQrm },
-    { X86::PUNPCKLQDQrr,    X86::PUNPCKLQDQrm },
-    { X86::PUNPCKLWDrr,     X86::PUNPCKLWDrm },
-    { X86::PXORrr,          X86::PXORrm },
-    { X86::SBB32rr,         X86::SBB32rm },
-    { X86::SBB64rr,         X86::SBB64rm },
-    { X86::SHUFPDrri,       X86::SHUFPDrmi },
-    { X86::SHUFPSrri,       X86::SHUFPSrmi },
-    { X86::SUB16rr,         X86::SUB16rm },
-    { X86::SUB32rr,         X86::SUB32rm },
-    { X86::SUB64rr,         X86::SUB64rm },
-    { X86::SUB8rr,          X86::SUB8rm },
-    { X86::SUBPDrr,         X86::SUBPDrm },
-    { X86::SUBPSrr,         X86::SUBPSrm },
-    { X86::SUBSDrr,         X86::SUBSDrm },
-    { X86::SUBSSrr,         X86::SUBSSrm },
+  static const unsigned OpTbl2[][3] = {
+    { X86::ADC32rr,         X86::ADC32rm, 0 },
+    { X86::ADC64rr,         X86::ADC64rm, 0 },
+    { X86::ADD16rr,         X86::ADD16rm, 0 },
+    { X86::ADD32rr,         X86::ADD32rm, 0 },
+    { X86::ADD64rr,         X86::ADD64rm, 0 },
+    { X86::ADD8rr,          X86::ADD8rm, 0 },
+    { X86::ADDPDrr,         X86::ADDPDrm, 16 },
+    { X86::ADDPSrr,         X86::ADDPSrm, 16 },
+    { X86::ADDSDrr,         X86::ADDSDrm, 0 },
+    { X86::ADDSSrr,         X86::ADDSSrm, 0 },
+    { X86::ADDSUBPDrr,      X86::ADDSUBPDrm, 16 },
+    { X86::ADDSUBPSrr,      X86::ADDSUBPSrm, 16 },
+    { X86::AND16rr,         X86::AND16rm, 0 },
+    { X86::AND32rr,         X86::AND32rm, 0 },
+    { X86::AND64rr,         X86::AND64rm, 0 },
+    { X86::AND8rr,          X86::AND8rm, 0 },
+    { X86::ANDNPDrr,        X86::ANDNPDrm, 16 },
+    { X86::ANDNPSrr,        X86::ANDNPSrm, 16 },
+    { X86::ANDPDrr,         X86::ANDPDrm, 16 },
+    { X86::ANDPSrr,         X86::ANDPSrm, 16 },
+    { X86::CMOVA16rr,       X86::CMOVA16rm, 0 },
+    { X86::CMOVA32rr,       X86::CMOVA32rm, 0 },
+    { X86::CMOVA64rr,       X86::CMOVA64rm, 0 },
+    { X86::CMOVAE16rr,      X86::CMOVAE16rm, 0 },
+    { X86::CMOVAE32rr,      X86::CMOVAE32rm, 0 },
+    { X86::CMOVAE64rr,      X86::CMOVAE64rm, 0 },
+    { X86::CMOVB16rr,       X86::CMOVB16rm, 0 },
+    { X86::CMOVB32rr,       X86::CMOVB32rm, 0 },
+    { X86::CMOVB64rr,       X86::CMOVB64rm, 0 },
+    { X86::CMOVBE16rr,      X86::CMOVBE16rm, 0 },
+    { X86::CMOVBE32rr,      X86::CMOVBE32rm, 0 },
+    { X86::CMOVBE64rr,      X86::CMOVBE64rm, 0 },
+    { X86::CMOVE16rr,       X86::CMOVE16rm, 0 },
+    { X86::CMOVE32rr,       X86::CMOVE32rm, 0 },
+    { X86::CMOVE64rr,       X86::CMOVE64rm, 0 },
+    { X86::CMOVG16rr,       X86::CMOVG16rm, 0 },
+    { X86::CMOVG32rr,       X86::CMOVG32rm, 0 },
+    { X86::CMOVG64rr,       X86::CMOVG64rm, 0 },
+    { X86::CMOVGE16rr,      X86::CMOVGE16rm, 0 },
+    { X86::CMOVGE32rr,      X86::CMOVGE32rm, 0 },
+    { X86::CMOVGE64rr,      X86::CMOVGE64rm, 0 },
+    { X86::CMOVL16rr,       X86::CMOVL16rm, 0 },
+    { X86::CMOVL32rr,       X86::CMOVL32rm, 0 },
+    { X86::CMOVL64rr,       X86::CMOVL64rm, 0 },
+    { X86::CMOVLE16rr,      X86::CMOVLE16rm, 0 },
+    { X86::CMOVLE32rr,      X86::CMOVLE32rm, 0 },
+    { X86::CMOVLE64rr,      X86::CMOVLE64rm, 0 },
+    { X86::CMOVNE16rr,      X86::CMOVNE16rm, 0 },
+    { X86::CMOVNE32rr,      X86::CMOVNE32rm, 0 },
+    { X86::CMOVNE64rr,      X86::CMOVNE64rm, 0 },
+    { X86::CMOVNO16rr,      X86::CMOVNO16rm, 0 },
+    { X86::CMOVNO32rr,      X86::CMOVNO32rm, 0 },
+    { X86::CMOVNO64rr,      X86::CMOVNO64rm, 0 },
+    { X86::CMOVNP16rr,      X86::CMOVNP16rm, 0 },
+    { X86::CMOVNP32rr,      X86::CMOVNP32rm, 0 },
+    { X86::CMOVNP64rr,      X86::CMOVNP64rm, 0 },
+    { X86::CMOVNS16rr,      X86::CMOVNS16rm, 0 },
+    { X86::CMOVNS32rr,      X86::CMOVNS32rm, 0 },
+    { X86::CMOVNS64rr,      X86::CMOVNS64rm, 0 },
+    { X86::CMOVO16rr,       X86::CMOVO16rm, 0 },
+    { X86::CMOVO32rr,       X86::CMOVO32rm, 0 },
+    { X86::CMOVO64rr,       X86::CMOVO64rm, 0 },
+    { X86::CMOVP16rr,       X86::CMOVP16rm, 0 },
+    { X86::CMOVP32rr,       X86::CMOVP32rm, 0 },
+    { X86::CMOVP64rr,       X86::CMOVP64rm, 0 },
+    { X86::CMOVS16rr,       X86::CMOVS16rm, 0 },
+    { X86::CMOVS32rr,       X86::CMOVS32rm, 0 },
+    { X86::CMOVS64rr,       X86::CMOVS64rm, 0 },
+    { X86::CMPPDrri,        X86::CMPPDrmi, 16 },
+    { X86::CMPPSrri,        X86::CMPPSrmi, 16 },
+    { X86::CMPSDrr,         X86::CMPSDrm, 0 },
+    { X86::CMPSSrr,         X86::CMPSSrm, 0 },
+    { X86::DIVPDrr,         X86::DIVPDrm, 16 },
+    { X86::DIVPSrr,         X86::DIVPSrm, 16 },
+    { X86::DIVSDrr,         X86::DIVSDrm, 0 },
+    { X86::DIVSSrr,         X86::DIVSSrm, 0 },
+    { X86::FsANDNPDrr,      X86::FsANDNPDrm, 16 },
+    { X86::FsANDNPSrr,      X86::FsANDNPSrm, 16 },
+    { X86::FsANDPDrr,       X86::FsANDPDrm, 16 },
+    { X86::FsANDPSrr,       X86::FsANDPSrm, 16 },
+    { X86::FsORPDrr,        X86::FsORPDrm, 16 },
+    { X86::FsORPSrr,        X86::FsORPSrm, 16 },
+    { X86::FsXORPDrr,       X86::FsXORPDrm, 16 },
+    { X86::FsXORPSrr,       X86::FsXORPSrm, 16 },
+    { X86::HADDPDrr,        X86::HADDPDrm, 16 },
+    { X86::HADDPSrr,        X86::HADDPSrm, 16 },
+    { X86::HSUBPDrr,        X86::HSUBPDrm, 16 },
+    { X86::HSUBPSrr,        X86::HSUBPSrm, 16 },
+    { X86::IMUL16rr,        X86::IMUL16rm, 0 },
+    { X86::IMUL32rr,        X86::IMUL32rm, 0 },
+    { X86::IMUL64rr,        X86::IMUL64rm, 0 },
+    { X86::MAXPDrr,         X86::MAXPDrm, 16 },
+    { X86::MAXPDrr_Int,     X86::MAXPDrm_Int, 16 },
+    { X86::MAXPSrr,         X86::MAXPSrm, 16 },
+    { X86::MAXPSrr_Int,     X86::MAXPSrm_Int, 16 },
+    { X86::MAXSDrr,         X86::MAXSDrm, 0 },
+    { X86::MAXSDrr_Int,     X86::MAXSDrm_Int, 0 },
+    { X86::MAXSSrr,         X86::MAXSSrm, 0 },
+    { X86::MAXSSrr_Int,     X86::MAXSSrm_Int, 0 },
+    { X86::MINPDrr,         X86::MINPDrm, 16 },
+    { X86::MINPDrr_Int,     X86::MINPDrm_Int, 16 },
+    { X86::MINPSrr,         X86::MINPSrm, 16 },
+    { X86::MINPSrr_Int,     X86::MINPSrm_Int, 16 },
+    { X86::MINSDrr,         X86::MINSDrm, 0 },
+    { X86::MINSDrr_Int,     X86::MINSDrm_Int, 0 },
+    { X86::MINSSrr,         X86::MINSSrm, 0 },
+    { X86::MINSSrr_Int,     X86::MINSSrm_Int, 0 },
+    { X86::MULPDrr,         X86::MULPDrm, 16 },
+    { X86::MULPSrr,         X86::MULPSrm, 16 },
+    { X86::MULSDrr,         X86::MULSDrm, 0 },
+    { X86::MULSSrr,         X86::MULSSrm, 0 },
+    { X86::OR16rr,          X86::OR16rm, 0 },
+    { X86::OR32rr,          X86::OR32rm, 0 },
+    { X86::OR64rr,          X86::OR64rm, 0 },
+    { X86::OR8rr,           X86::OR8rm, 0 },
+    { X86::ORPDrr,          X86::ORPDrm, 16 },
+    { X86::ORPSrr,          X86::ORPSrm, 16 },
+    { X86::PACKSSDWrr,      X86::PACKSSDWrm, 16 },
+    { X86::PACKSSWBrr,      X86::PACKSSWBrm, 16 },
+    { X86::PACKUSWBrr,      X86::PACKUSWBrm, 16 },
+    { X86::PADDBrr,         X86::PADDBrm, 16 },
+    { X86::PADDDrr,         X86::PADDDrm, 16 },
+    { X86::PADDQrr,         X86::PADDQrm, 16 },
+    { X86::PADDSBrr,        X86::PADDSBrm, 16 },
+    { X86::PADDSWrr,        X86::PADDSWrm, 16 },
+    { X86::PADDWrr,         X86::PADDWrm, 16 },
+    { X86::PANDNrr,         X86::PANDNrm, 16 },
+    { X86::PANDrr,          X86::PANDrm, 16 },
+    { X86::PAVGBrr,         X86::PAVGBrm, 16 },
+    { X86::PAVGWrr,         X86::PAVGWrm, 16 },
+    { X86::PCMPEQBrr,       X86::PCMPEQBrm, 16 },
+    { X86::PCMPEQDrr,       X86::PCMPEQDrm, 16 },
+    { X86::PCMPEQWrr,       X86::PCMPEQWrm, 16 },
+    { X86::PCMPGTBrr,       X86::PCMPGTBrm, 16 },
+    { X86::PCMPGTDrr,       X86::PCMPGTDrm, 16 },
+    { X86::PCMPGTWrr,       X86::PCMPGTWrm, 16 },
+    { X86::PINSRWrri,       X86::PINSRWrmi, 16 },
+    { X86::PMADDWDrr,       X86::PMADDWDrm, 16 },
+    { X86::PMAXSWrr,        X86::PMAXSWrm, 16 },
+    { X86::PMAXUBrr,        X86::PMAXUBrm, 16 },
+    { X86::PMINSWrr,        X86::PMINSWrm, 16 },
+    { X86::PMINUBrr,        X86::PMINUBrm, 16 },
+    { X86::PMULDQrr,        X86::PMULDQrm, 16 },
+    { X86::PMULHUWrr,       X86::PMULHUWrm, 16 },
+    { X86::PMULHWrr,        X86::PMULHWrm, 16 },
+    { X86::PMULLDrr,        X86::PMULLDrm, 16 },
+    { X86::PMULLDrr_int,    X86::PMULLDrm_int, 16 },
+    { X86::PMULLWrr,        X86::PMULLWrm, 16 },
+    { X86::PMULUDQrr,       X86::PMULUDQrm, 16 },
+    { X86::PORrr,           X86::PORrm, 16 },
+    { X86::PSADBWrr,        X86::PSADBWrm, 16 },
+    { X86::PSLLDrr,         X86::PSLLDrm, 16 },
+    { X86::PSLLQrr,         X86::PSLLQrm, 16 },
+    { X86::PSLLWrr,         X86::PSLLWrm, 16 },
+    { X86::PSRADrr,         X86::PSRADrm, 16 },
+    { X86::PSRAWrr,         X86::PSRAWrm, 16 },
+    { X86::PSRLDrr,         X86::PSRLDrm, 16 },
+    { X86::PSRLQrr,         X86::PSRLQrm, 16 },
+    { X86::PSRLWrr,         X86::PSRLWrm, 16 },
+    { X86::PSUBBrr,         X86::PSUBBrm, 16 },
+    { X86::PSUBDrr,         X86::PSUBDrm, 16 },
+    { X86::PSUBSBrr,        X86::PSUBSBrm, 16 },
+    { X86::PSUBSWrr,        X86::PSUBSWrm, 16 },
+    { X86::PSUBWrr,         X86::PSUBWrm, 16 },
+    { X86::PUNPCKHBWrr,     X86::PUNPCKHBWrm, 16 },
+    { X86::PUNPCKHDQrr,     X86::PUNPCKHDQrm, 16 },
+    { X86::PUNPCKHQDQrr,    X86::PUNPCKHQDQrm, 16 },
+    { X86::PUNPCKHWDrr,     X86::PUNPCKHWDrm, 16 },
+    { X86::PUNPCKLBWrr,     X86::PUNPCKLBWrm, 16 },
+    { X86::PUNPCKLDQrr,     X86::PUNPCKLDQrm, 16 },
+    { X86::PUNPCKLQDQrr,    X86::PUNPCKLQDQrm, 16 },
+    { X86::PUNPCKLWDrr,     X86::PUNPCKLWDrm, 16 },
+    { X86::PXORrr,          X86::PXORrm, 16 },
+    { X86::SBB32rr,         X86::SBB32rm, 0 },
+    { X86::SBB64rr,         X86::SBB64rm, 0 },
+    { X86::SHUFPDrri,       X86::SHUFPDrmi, 16 },
+    { X86::SHUFPSrri,       X86::SHUFPSrmi, 16 },
+    { X86::SUB16rr,         X86::SUB16rm, 0 },
+    { X86::SUB32rr,         X86::SUB32rm, 0 },
+    { X86::SUB64rr,         X86::SUB64rm, 0 },
+    { X86::SUB8rr,          X86::SUB8rm, 0 },
+    { X86::SUBPDrr,         X86::SUBPDrm, 16 },
+    { X86::SUBPSrr,         X86::SUBPSrm, 16 },
+    { X86::SUBSDrr,         X86::SUBSDrm, 0 },
+    { X86::SUBSSrr,         X86::SUBSSrm, 0 },
     // FIXME: TEST*rr -> swapped operand of TEST*mr.
-    { X86::UNPCKHPDrr,      X86::UNPCKHPDrm },
-    { X86::UNPCKHPSrr,      X86::UNPCKHPSrm },
-    { X86::UNPCKLPDrr,      X86::UNPCKLPDrm },
-    { X86::UNPCKLPSrr,      X86::UNPCKLPSrm },
-    { X86::XOR16rr,         X86::XOR16rm },
-    { X86::XOR32rr,         X86::XOR32rm },
-    { X86::XOR64rr,         X86::XOR64rm },
-    { X86::XOR8rr,          X86::XOR8rm },
-    { X86::XORPDrr,         X86::XORPDrm },
-    { X86::XORPSrr,         X86::XORPSrm }
+    { X86::UNPCKHPDrr,      X86::UNPCKHPDrm, 16 },
+    { X86::UNPCKHPSrr,      X86::UNPCKHPSrm, 16 },
+    { X86::UNPCKLPDrr,      X86::UNPCKLPDrm, 16 },
+    { X86::UNPCKLPSrr,      X86::UNPCKLPSrm, 16 },
+    { X86::XOR16rr,         X86::XOR16rm, 0 },
+    { X86::XOR32rr,         X86::XOR32rm, 0 },
+    { X86::XOR64rr,         X86::XOR64rm, 0 },
+    { X86::XOR8rr,          X86::XOR8rm, 0 },
+    { X86::XORPDrr,         X86::XORPDrm, 16 },
+    { X86::XORPSrr,         X86::XORPSrm, 16 }
   };
 
   for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
     unsigned RegOp = OpTbl2[i][0];
     unsigned MemOp = OpTbl2[i][1];
+    unsigned Align = OpTbl2[i][2];
     if (!RegOp2MemOpTable2.insert(std::make_pair((unsigned*)RegOp,
-                                                 MemOp)).second)
+                                           std::make_pair(MemOp,Align))).second)
       assert(false && "Duplicated entries?");
-    unsigned AuxInfo = 2 | (1 << 4); // Index 2, folded load
+    // Index 2, folded load
+    unsigned AuxInfo = 2 | (1 << 4);
     if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp,
                                    std::make_pair(RegOp, AuxInfo))).second)
       AmbEntries.push_back(MemOp);
@@ -2142,8 +2148,9 @@
 MachineInstr*
 X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
                                     MachineInstr *MI, unsigned i,
-                                    const SmallVectorImpl<MachineOperand> &MOs) const{
-  const DenseMap<unsigned*, unsigned> *OpcodeTablePtr = NULL;
+                                    const SmallVectorImpl<MachineOperand> &MOs,
+                                    unsigned Align) const {
+  const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL;
   bool isTwoAddrFold = false;
   unsigned NumOps = MI->getDesc().getNumOperands();
   bool isTwoAddr = NumOps > 1 &&
@@ -2179,13 +2186,16 @@
   // If table selected...
   if (OpcodeTablePtr) {
     // Find the Opcode to fuse
-    DenseMap<unsigned*, unsigned>::iterator I =
+    DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
       OpcodeTablePtr->find((unsigned*)MI->getOpcode());
     if (I != OpcodeTablePtr->end()) {
+      unsigned MinAlign = I->second.second;
+      if (Align < MinAlign)
+        return NULL;
       if (isTwoAddrFold)
-        NewMI = FuseTwoAddrInst(MF, I->second, MOs, MI, *this);
+        NewMI = FuseTwoAddrInst(MF, I->second.first, MOs, MI, *this);
       else
-        NewMI = FuseInst(MF, I->second, i, MOs, MI, *this);
+        NewMI = FuseInst(MF, I->second.first, i, MOs, MI, *this);
       return NewMI;
     }
   }
@@ -2199,31 +2209,13 @@
 
 MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
                                                   MachineInstr *MI,
-                                                  const SmallVectorImpl<unsigned> &Ops,
+                                           const SmallVectorImpl<unsigned> &Ops,
                                                   int FrameIndex) const {
   // Check switch flag 
   if (NoFusing) return NULL;
 
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   unsigned Alignment = MFI->getObjectAlignment(FrameIndex);
-  // FIXME: Move alignment requirement into tables?
-  if (Alignment < 16) {
-    switch (MI->getOpcode()) {
-    default: break;
-    // Not always safe to fold movsd into these instructions since their load
-    // folding variants expects the address to be 16 byte aligned.
-    case X86::FsANDNPDrr:
-    case X86::FsANDNPSrr:
-    case X86::FsANDPDrr:
-    case X86::FsANDPSrr:
-    case X86::FsORPDrr:
-    case X86::FsORPSrr:
-    case X86::FsXORPDrr:
-    case X86::FsXORPSrr:
-      return NULL;
-    }
-  }
-
   if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
     unsigned NewOpc = 0;
     switch (MI->getOpcode()) {
@@ -2241,12 +2233,12 @@
 
   SmallVector<MachineOperand,4> MOs;
   MOs.push_back(MachineOperand::CreateFI(FrameIndex));
-  return foldMemoryOperandImpl(MF, MI, Ops[0], MOs);
+  return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Alignment);
 }
 
 MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
                                                   MachineInstr *MI,
-                                            const SmallVectorImpl<unsigned> &Ops,
+                                           const SmallVectorImpl<unsigned> &Ops,
                                                   MachineInstr *LoadMI) const {
   // Check switch flag 
   if (NoFusing) return NULL;
@@ -2255,25 +2247,9 @@
   unsigned Alignment = 0;
   if (LoadMI->hasOneMemOperand())
     Alignment = LoadMI->memoperands_begin()->getAlignment();
-
-  // FIXME: Move alignment requirement into tables?
-  if (Alignment < 16) {
-    switch (MI->getOpcode()) {
-    default: break;
-    // Not always safe to fold movsd into these instructions since their load
-    // folding variants expects the address to be 16 byte aligned.
-    case X86::FsANDNPDrr:
-    case X86::FsANDNPSrr:
-    case X86::FsANDPDrr:
-    case X86::FsANDPSrr:
-    case X86::FsORPDrr:
-    case X86::FsORPSrr:
-    case X86::FsXORPDrr:
-    case X86::FsXORPSrr:
-      return NULL;
-    }
-  }
-
+  else if (LoadMI->getOpcode() == X86::V_SET0 ||
+           LoadMI->getOpcode() == X86::V_SETALLONES)
+    Alignment = 16;
   if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
     unsigned NewOpc = 0;
     switch (MI->getOpcode()) {
@@ -2325,7 +2301,7 @@
     for (unsigned i = NumOps - X86AddrNumOperands; i != NumOps; ++i)
       MOs.push_back(LoadMI->getOperand(i));
   }
-  return foldMemoryOperandImpl(MF, MI, Ops[0], MOs);
+  return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Alignment);
 }
 
 
@@ -2357,7 +2333,7 @@
   // Folding a memory location into the two-address part of a two-address
   // instruction is different than folding it other places.  It requires
   // replacing the *two* registers with the memory location.
-  const DenseMap<unsigned*, unsigned> *OpcodeTablePtr = NULL;
+  const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL;
   if (isTwoAddr && NumOps >= 2 && OpNum < 2) { 
     OpcodeTablePtr = &RegOp2MemOpTable2Addr;
   } else if (OpNum == 0) { // If operand 0
@@ -2377,7 +2353,7 @@
   
   if (OpcodeTablePtr) {
     // Find the Opcode to fuse
-    DenseMap<unsigned*, unsigned>::iterator I =
+    DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
       OpcodeTablePtr->find((unsigned*)Opc);
     if (I != OpcodeTablePtr->end())
       return true;

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.h?rev=75749&r1=75748&r2=75749&view=diff

==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.h Wed Jul 15 01:10:07 2009
@@ -427,10 +427,10 @@
   /// RegOp2MemOpTable2Addr, RegOp2MemOpTable0, RegOp2MemOpTable1,
   /// RegOp2MemOpTable2 - Load / store folding opcode maps.
   ///
-  DenseMap<unsigned*, unsigned> RegOp2MemOpTable2Addr;
-  DenseMap<unsigned*, unsigned> RegOp2MemOpTable0;
-  DenseMap<unsigned*, unsigned> RegOp2MemOpTable1;
-  DenseMap<unsigned*, unsigned> RegOp2MemOpTable2;
+  DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable2Addr;
+  DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable0;
+  DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable1;
+  DenseMap<unsigned*, std::pair<unsigned,unsigned> > RegOp2MemOpTable2;
   
   /// MemOp2RegOpTable - Load / store unfolding opcode map.
   ///
@@ -602,9 +602,10 @@
 
 private:
   MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
-                                      MachineInstr* MI,
-                                      unsigned OpNum,
-                                      const SmallVectorImpl<MachineOperand> &MOs) const;
+                                     MachineInstr* MI,
+                                     unsigned OpNum,
+                                     const SmallVectorImpl<MachineOperand> &MOs,
+                                     unsigned Alignment) const;
 };
 
 } // End llvm namespace





More information about the llvm-commits mailing list