[llvm-commits] CVS: llvm/lib/Target/X86/X86ATTAsmPrinter.h X86ISelLowering.cpp X86InstrInfo.td X86InstrSSE.td X86IntelAsmPrinter.h

Evan Cheng evan.cheng at apple.com
Tue Feb 21 18:26:42 PST 2006



Changes in directory llvm/lib/Target/X86:

X86ATTAsmPrinter.h updated: 1.7 -> 1.8
X86ISelLowering.cpp updated: 1.92 -> 1.93
X86InstrInfo.td updated: 1.246 -> 1.247
X86InstrSSE.td updated: 1.4 -> 1.5
X86IntelAsmPrinter.h updated: 1.8 -> 1.9
---
Log message:

Added MMX, SSE1, and SSE2 vector instructions and some simple patterns.
Fixed some existing bugs (wrong predicates, prefixes) at the same time.


---
Diffs of the changes:  (+542 -283)

 X86ATTAsmPrinter.h   |    3 
 X86ISelLowering.cpp  |   38 ++
 X86InstrInfo.td      |    7 
 X86InstrSSE.td       |  773 ++++++++++++++++++++++++++++++++-------------------
 X86IntelAsmPrinter.h |    4 
 5 files changed, 542 insertions(+), 283 deletions(-)


Index: llvm/lib/Target/X86/X86ATTAsmPrinter.h
diff -u llvm/lib/Target/X86/X86ATTAsmPrinter.h:1.7 llvm/lib/Target/X86/X86ATTAsmPrinter.h:1.8
--- llvm/lib/Target/X86/X86ATTAsmPrinter.h:1.7	Fri Feb 17 18:15:05 2006
+++ llvm/lib/Target/X86/X86ATTAsmPrinter.h	Tue Feb 21 20:26:30 2006
@@ -49,6 +49,9 @@
   void printi64mem(const MachineInstr *MI, unsigned OpNo) {
     printMemReference(MI, OpNo);
   }
+  void printi128mem(const MachineInstr *MI, unsigned OpNo) {
+    printMemReference(MI, OpNo);
+  }
   void printf32mem(const MachineInstr *MI, unsigned OpNo) {
     printMemReference(MI, OpNo);
   }


Index: llvm/lib/Target/X86/X86ISelLowering.cpp
diff -u llvm/lib/Target/X86/X86ISelLowering.cpp:1.92 llvm/lib/Target/X86/X86ISelLowering.cpp:1.93
--- llvm/lib/Target/X86/X86ISelLowering.cpp:1.92	Tue Feb 21 18:56:39 2006
+++ llvm/lib/Target/X86/X86ISelLowering.cpp	Tue Feb 21 20:26:30 2006
@@ -236,6 +236,44 @@
     addLegalFPImmediate(-0.0); // FLD0/FCHS
     addLegalFPImmediate(-1.0); // FLD1/FCHS
   }
+
+  if (TM.getSubtarget<X86Subtarget>().hasMMX()) {
+    addRegisterClass(MVT::v8i8,  X86::VR64RegisterClass);
+    addRegisterClass(MVT::v4i16, X86::VR64RegisterClass);
+    addRegisterClass(MVT::v2i32, X86::VR64RegisterClass);
+
+    // FIXME: We don't support any ConstantVec's yet.  We should custom expand
+    // the ones we do!
+    setOperationAction(ISD::ConstantVec, MVT::v8i8,  Expand);
+    setOperationAction(ISD::ConstantVec, MVT::v4i16, Expand);
+    setOperationAction(ISD::ConstantVec, MVT::v2i32, Expand);
+  }
+
+  if (TM.getSubtarget<X86Subtarget>().hasSSE1()) {
+    addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
+
+    // FIXME: We don't support any ConstantVec's yet.  We should custom expand
+    // the ones we do!
+    setOperationAction(ISD::ConstantVec, MVT::v4f32, Expand);
+  }
+
+  if (TM.getSubtarget<X86Subtarget>().hasSSE2()) {
+    addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
+    addRegisterClass(MVT::v16i8, X86::VR128RegisterClass);
+    addRegisterClass(MVT::v8i16, X86::VR128RegisterClass);
+    addRegisterClass(MVT::v4i32, X86::VR128RegisterClass);
+    addRegisterClass(MVT::v2i64, X86::VR128RegisterClass);
+
+
+    // FIXME: We don't support any ConstantVec's yet.  We should custom expand
+    // the ones we do!
+    setOperationAction(ISD::ConstantVec, MVT::v2f64, Expand);
+    setOperationAction(ISD::ConstantVec, MVT::v16i8, Expand);
+    setOperationAction(ISD::ConstantVec, MVT::v8i16, Expand);
+    setOperationAction(ISD::ConstantVec, MVT::v4i32, Expand);
+    setOperationAction(ISD::ConstantVec, MVT::v2i64, Expand);
+  }
+
   computeRegisterProperties();
 
   // FIXME: These should be based on subtarget info. Plus, the values should


Index: llvm/lib/Target/X86/X86InstrInfo.td
diff -u llvm/lib/Target/X86/X86InstrInfo.td:1.246 llvm/lib/Target/X86/X86InstrInfo.td:1.247
--- llvm/lib/Target/X86/X86InstrInfo.td:1.246	Tue Feb 21 14:00:20 2006
+++ llvm/lib/Target/X86/X86InstrInfo.td	Tue Feb 21 20:26:30 2006
@@ -136,6 +136,7 @@
 def i16mem  : X86MemOperand<"printi16mem">;
 def i32mem  : X86MemOperand<"printi32mem">;
 def i64mem  : X86MemOperand<"printi64mem">;
+def i128mem : X86MemOperand<"printi128mem">;
 def f32mem  : X86MemOperand<"printf32mem">;
 def f64mem  : X86MemOperand<"printf64mem">;
 def f128mem : X86MemOperand<"printf128mem">;
@@ -341,6 +342,9 @@
 def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
 def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;
 
+def X86loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
+def X86loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
+
 def sextloadi16i1  : PatFrag<(ops node:$ptr), (i16 (sextload node:$ptr, i1))>;
 def sextloadi32i1  : PatFrag<(ops node:$ptr), (i32 (sextload node:$ptr, i1))>;
 def sextloadi16i8  : PatFrag<(ops node:$ptr), (i16 (sextload node:$ptr, i8))>;
@@ -360,9 +364,6 @@
 def X86loadpf32    : PatFrag<(ops node:$ptr), (f32   (X86loadp node:$ptr))>;
 def X86loadpf64    : PatFrag<(ops node:$ptr), (f64   (X86loadp node:$ptr))>;
 
-def X86loadpv4f32  : PatFrag<(ops node:$ptr), (v4f32 (X86loadp node:$ptr))>;
-def X86loadpv2f64  : PatFrag<(ops node:$ptr), (v2f64 (X86loadp node:$ptr))>;
-
 //===----------------------------------------------------------------------===//
 // Instruction templates...
 


Index: llvm/lib/Target/X86/X86InstrSSE.td
diff -u llvm/lib/Target/X86/X86InstrSSE.td:1.4 llvm/lib/Target/X86/X86InstrSSE.td:1.5
--- llvm/lib/Target/X86/X86InstrSSE.td:1.4	Tue Feb 21 14:00:20 2006
+++ llvm/lib/Target/X86/X86InstrSSE.td	Tue Feb 21 20:26:30 2006
@@ -17,6 +17,20 @@
 // SSE scalar FP Instructions
 //===----------------------------------------------------------------------===//
 
+// Instruction templates
+// SSI - SSE1 instructions with XS prefix.
+// SDI - SSE2 instructions with XD prefix.
+// PSI - SSE1 instructions with TB prefix.
+// PDI - SSE2 instructions with TB and OpSize prefixes.
+class SSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
+      : I<o, F, ops, asm, pattern>, XS, Requires<[HasSSE1]>;
+class SDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
+      : I<o, F, ops, asm, pattern>, XD, Requires<[HasSSE2]>;
+class PSI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
+      : I<o, F, ops, asm, pattern>, TB, Requires<[HasSSE1]>;
+class PDI<bits<8> o, Format F, dag ops, string asm, list<dag> pattern>
+      : I<o, F, ops, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
+
 // Some 'special' instructions
 def IMPLICIT_DEF_FR32 : I<0, Pseudo, (ops FR32:$dst),
                          "#IMPLICIT_DEF $dst",
@@ -39,206 +53,189 @@
 }
 
 // Move Instructions
-def MOVSSrr : I<0x10, MRMSrcReg, (ops FR32:$dst, FR32:$src),
-                "movss {$src, $dst|$dst, $src}", []>,
-              Requires<[HasSSE1]>, XS;
-def MOVSDrr : I<0x10, MRMSrcReg, (ops FR64:$dst, FR64:$src),
-                "movsd {$src, $dst|$dst, $src}", []>,
-              Requires<[HasSSE2]>, XD;
-
-def MOVSSrm : I<0x10, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
-                "movss {$src, $dst|$dst, $src}",
-                [(set FR32:$dst, (loadf32 addr:$src))]>,
-              Requires<[HasSSE1]>, XS;
-def MOVSSmr : I<0x11, MRMDestMem, (ops f32mem:$dst, FR32:$src),
+def MOVSSrr : SSI<0x10, MRMSrcReg, (ops FR32:$dst, FR32:$src),
+                "movss {$src, $dst|$dst, $src}", []>;
+def MOVSSrm : SSI<0x10, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
                 "movss {$src, $dst|$dst, $src}",
-                [(store FR32:$src, addr:$dst)]>,
-              Requires<[HasSSE1]>, XS;
-def MOVSDrm : I<0x10, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
+                [(set FR32:$dst, (loadf32 addr:$src))]>;
+def MOVSDrr : SDI<0x10, MRMSrcReg, (ops FR64:$dst, FR64:$src),
+                "movsd {$src, $dst|$dst, $src}", []>;
+def MOVSDrm : SDI<0x10, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
                 "movsd {$src, $dst|$dst, $src}",
-                [(set FR64:$dst, (loadf64 addr:$src))]>,
-              Requires<[HasSSE2]>, XD;
-def MOVSDmr : I<0x11, MRMDestMem, (ops f64mem:$dst, FR64:$src),
+                [(set FR64:$dst, (loadf64 addr:$src))]>;
+
+def MOVSSmr : SSI<0x11, MRMDestMem, (ops f32mem:$dst, FR32:$src),
+                "movss {$src, $dst|$dst, $src}",
+                [(store FR32:$src, addr:$dst)]>;
+def MOVSDmr : SDI<0x11, MRMDestMem, (ops f64mem:$dst, FR64:$src),
                 "movsd {$src, $dst|$dst, $src}",
-                [(store FR64:$src, addr:$dst)]>,
-              Requires<[HasSSE2]>, XD;
+                [(store FR64:$src, addr:$dst)]>;
 
 // Conversion instructions
-def CVTTSS2SIrr: I<0x2C, MRMSrcReg, (ops R32:$dst, FR32:$src),
+def CVTTSS2SIrr: SSI<0x2C, MRMSrcReg, (ops R32:$dst, FR32:$src),
                    "cvttss2si {$src, $dst|$dst, $src}",
-                   [(set R32:$dst, (fp_to_sint FR32:$src))]>,
-                 Requires<[HasSSE1]>, XS;
-def CVTTSS2SIrm: I<0x2C, MRMSrcMem, (ops R32:$dst, f32mem:$src),
+                   [(set R32:$dst, (fp_to_sint FR32:$src))]>;
+def CVTTSS2SIrm: SSI<0x2C, MRMSrcMem, (ops R32:$dst, f32mem:$src),
                    "cvttss2si {$src, $dst|$dst, $src}",
-                   [(set R32:$dst, (fp_to_sint (loadf32 addr:$src)))]>,
-                 Requires<[HasSSE1]>, XS;
-def CVTTSD2SIrr: I<0x2C, MRMSrcReg, (ops R32:$dst, FR64:$src),
+                   [(set R32:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
+def CVTTSD2SIrr: SDI<0x2C, MRMSrcReg, (ops R32:$dst, FR64:$src),
                    "cvttsd2si {$src, $dst|$dst, $src}",
-                   [(set R32:$dst, (fp_to_sint FR64:$src))]>,
-                 Requires<[HasSSE2]>, XD;
-def CVTTSD2SIrm: I<0x2C, MRMSrcMem, (ops R32:$dst, f64mem:$src),
+                   [(set R32:$dst, (fp_to_sint FR64:$src))]>;
+def CVTTSD2SIrm: SDI<0x2C, MRMSrcMem, (ops R32:$dst, f64mem:$src),
                    "cvttsd2si {$src, $dst|$dst, $src}",
-                   [(set R32:$dst, (fp_to_sint (loadf64 addr:$src)))]>,
-                 Requires<[HasSSE2]>, XD;
-def CVTSS2SDrr: I<0x5A, MRMSrcReg, (ops FR64:$dst, FR32:$src),
-                  "cvtss2sd {$src, $dst|$dst, $src}",
-                  [(set FR64:$dst, (fextend FR32:$src))]>,
-                Requires<[HasSSE2]>, XS;
-def CVTSS2SDrm: I<0x5A, MRMSrcMem, (ops FR64:$dst, f32mem:$src),
-                  "cvtss2sd {$src, $dst|$dst, $src}",
-                  [(set FR64:$dst, (fextend (loadf32 addr:$src)))]>,
-                Requires<[HasSSE2]>, XS;
-def CVTSD2SSrr: I<0x5A, MRMSrcReg, (ops FR32:$dst, FR64:$src),
+                   [(set R32:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
+def CVTSD2SSrr: SDI<0x5A, MRMSrcReg, (ops FR32:$dst, FR64:$src),
                   "cvtsd2ss {$src, $dst|$dst, $src}",
-                  [(set FR32:$dst, (fround FR64:$src))]>,
-                Requires<[HasSSE2]>, XD;
-def CVTSD2SSrm: I<0x5A, MRMSrcMem, (ops FR32:$dst, f64mem:$src), 
+                  [(set FR32:$dst, (fround FR64:$src))]>;
+def CVTSD2SSrm: SDI<0x5A, MRMSrcMem, (ops FR32:$dst, f64mem:$src), 
                   "cvtsd2ss {$src, $dst|$dst, $src}",
-                  [(set FR32:$dst, (fround (loadf64 addr:$src)))]>,
-                Requires<[HasSSE2]>, XD;
-def CVTSI2SSrr: I<0x2A, MRMSrcReg, (ops FR32:$dst, R32:$src),
+                  [(set FR32:$dst, (fround (loadf64 addr:$src)))]>;
+def CVTSI2SSrr: SSI<0x2A, MRMSrcReg, (ops FR32:$dst, R32:$src),
                   "cvtsi2ss {$src, $dst|$dst, $src}",
-                  [(set FR32:$dst, (sint_to_fp R32:$src))]>,
-                Requires<[HasSSE2]>, XS;
-def CVTSI2SSrm: I<0x2A, MRMSrcMem, (ops FR32:$dst, i32mem:$src),
+                  [(set FR32:$dst, (sint_to_fp R32:$src))]>;
+def CVTSI2SSrm: SSI<0x2A, MRMSrcMem, (ops FR32:$dst, i32mem:$src),
                   "cvtsi2ss {$src, $dst|$dst, $src}",
-                  [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>,
-                Requires<[HasSSE2]>, XS;
-def CVTSI2SDrr: I<0x2A, MRMSrcReg, (ops FR64:$dst, R32:$src),
+                  [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
+def CVTSI2SDrr: SDI<0x2A, MRMSrcReg, (ops FR64:$dst, R32:$src),
                   "cvtsi2sd {$src, $dst|$dst, $src}",
-                  [(set FR64:$dst, (sint_to_fp R32:$src))]>,
-                Requires<[HasSSE2]>, XD;
-def CVTSI2SDrm: I<0x2A, MRMSrcMem, (ops FR64:$dst, i32mem:$src),
+                  [(set FR64:$dst, (sint_to_fp R32:$src))]>;
+def CVTSI2SDrm: SDI<0x2A, MRMSrcMem, (ops FR64:$dst, i32mem:$src),
                   "cvtsi2sd {$src, $dst|$dst, $src}",
-                  [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>,
-                Requires<[HasSSE2]>, XD;
+                  [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
+// SSE2 instructions with XS prefix
+def CVTSS2SDrr: I<0x5A, MRMSrcReg, (ops FR64:$dst, FR32:$src),
+                  "cvtss2sd {$src, $dst|$dst, $src}",
+                  [(set FR64:$dst, (fextend FR32:$src))]>, XS,
+                Requires<[HasSSE2]>;
+def CVTSS2SDrm: I<0x5A, MRMSrcMem, (ops FR64:$dst, f32mem:$src),
+                  "cvtss2sd {$src, $dst|$dst, $src}",
+                  [(set FR64:$dst, (fextend (loadf32 addr:$src)))]>, XS,
+                Requires<[HasSSE2]>;
 
 // Arithmetic instructions
 let isTwoAddress = 1 in {
 let isCommutable = 1 in {
-def ADDSSrr : I<0x58, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
+def ADDSSrr : SSI<0x58, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
                 "addss {$src2, $dst|$dst, $src2}",
-                [(set FR32:$dst, (fadd FR32:$src1, FR32:$src2))]>,
-              Requires<[HasSSE1]>, XS;
-def ADDSDrr : I<0x58, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
+                [(set FR32:$dst, (fadd FR32:$src1, FR32:$src2))]>;
+def ADDSDrr : SDI<0x58, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
                 "addsd {$src2, $dst|$dst, $src2}",
-                [(set FR64:$dst, (fadd FR64:$src1, FR64:$src2))]>,
-              Requires<[HasSSE2]>, XD;
-def MULSSrr : I<0x59, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
+                [(set FR64:$dst, (fadd FR64:$src1, FR64:$src2))]>;
+def MULSSrr : SSI<0x59, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
                 "mulss {$src2, $dst|$dst, $src2}",
-                [(set FR32:$dst, (fmul FR32:$src1, FR32:$src2))]>,
-              Requires<[HasSSE1]>, XS;
-def MULSDrr : I<0x59, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
+                [(set FR32:$dst, (fmul FR32:$src1, FR32:$src2))]>;
+def MULSDrr : SDI<0x59, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
                 "mulsd {$src2, $dst|$dst, $src2}",
-                [(set FR64:$dst, (fmul FR64:$src1, FR64:$src2))]>,
-              Requires<[HasSSE2]>, XD;
+                [(set FR64:$dst, (fmul FR64:$src1, FR64:$src2))]>;
 }
 
-def ADDSSrm : I<0x58, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
+def ADDSSrm : SSI<0x58, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
                 "addss {$src2, $dst|$dst, $src2}",
-                [(set FR32:$dst, (fadd FR32:$src1, (loadf32 addr:$src2)))]>,
-              Requires<[HasSSE1]>, XS;
-def ADDSDrm : I<0x58, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
+                [(set FR32:$dst, (fadd FR32:$src1, (loadf32 addr:$src2)))]>;
+def ADDSDrm : SDI<0x58, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
                 "addsd {$src2, $dst|$dst, $src2}",
-                [(set FR64:$dst, (fadd FR64:$src1, (loadf64 addr:$src2)))]>,
-              Requires<[HasSSE2]>, XD;
-def MULSSrm : I<0x59, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
+                [(set FR64:$dst, (fadd FR64:$src1, (loadf64 addr:$src2)))]>;
+def MULSSrm : SSI<0x59, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
                 "mulss {$src2, $dst|$dst, $src2}",
-                [(set FR32:$dst, (fmul FR32:$src1, (loadf32 addr:$src2)))]>,
-              Requires<[HasSSE1]>, XS;
-def MULSDrm : I<0x59, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
+                [(set FR32:$dst, (fmul FR32:$src1, (loadf32 addr:$src2)))]>;
+def MULSDrm : SDI<0x59, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
                 "mulsd {$src2, $dst|$dst, $src2}",
-                [(set FR64:$dst, (fmul FR64:$src1, (loadf64 addr:$src2)))]>,
-              Requires<[HasSSE2]>, XD;
+                [(set FR64:$dst, (fmul FR64:$src1, (loadf64 addr:$src2)))]>;
 
-def DIVSSrr : I<0x5E, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
+def DIVSSrr : SSI<0x5E, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
                 "divss {$src2, $dst|$dst, $src2}",
-                [(set FR32:$dst, (fdiv FR32:$src1, FR32:$src2))]>,
-              Requires<[HasSSE1]>, XS;
-def DIVSSrm : I<0x5E, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
+                [(set FR32:$dst, (fdiv FR32:$src1, FR32:$src2))]>;
+def DIVSSrm : SSI<0x5E, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
                 "divss {$src2, $dst|$dst, $src2}",
-                [(set FR32:$dst, (fdiv FR32:$src1, (loadf32 addr:$src2)))]>,
-              Requires<[HasSSE1]>, XS;
-def DIVSDrr : I<0x5E, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
+                [(set FR32:$dst, (fdiv FR32:$src1, (loadf32 addr:$src2)))]>;
+def DIVSDrr : SDI<0x5E, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
                 "divsd {$src2, $dst|$dst, $src2}",
-                [(set FR64:$dst, (fdiv FR64:$src1, FR64:$src2))]>,
-              Requires<[HasSSE2]>, XD;
-def DIVSDrm : I<0x5E, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
+                [(set FR64:$dst, (fdiv FR64:$src1, FR64:$src2))]>;
+def DIVSDrm : SDI<0x5E, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
                 "divsd {$src2, $dst|$dst, $src2}",
-                [(set FR64:$dst, (fdiv FR64:$src1, (loadf64 addr:$src2)))]>,
-              Requires<[HasSSE2]>, XD;
+                [(set FR64:$dst, (fdiv FR64:$src1, (loadf64 addr:$src2)))]>;
 
-def SUBSSrr : I<0x5C, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
+def SUBSSrr : SSI<0x5C, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
                 "subss {$src2, $dst|$dst, $src2}",
-                [(set FR32:$dst, (fsub FR32:$src1, FR32:$src2))]>,
-              Requires<[HasSSE1]>, XS;
-def SUBSSrm : I<0x5C, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
+                [(set FR32:$dst, (fsub FR32:$src1, FR32:$src2))]>;
+def SUBSSrm : SSI<0x5C, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f32mem:$src2),
                 "subss {$src2, $dst|$dst, $src2}",
-                [(set FR32:$dst, (fsub FR32:$src1, (loadf32 addr:$src2)))]>,
-              Requires<[HasSSE1]>, XS;
-def SUBSDrr : I<0x5C, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
+                [(set FR32:$dst, (fsub FR32:$src1, (loadf32 addr:$src2)))]>;
+def SUBSDrr : SDI<0x5C, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
                 "subsd {$src2, $dst|$dst, $src2}",
-                [(set FR64:$dst, (fsub FR64:$src1, FR64:$src2))]>,
-              Requires<[HasSSE2]>, XD;
-def SUBSDrm : I<0x5C, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
+                [(set FR64:$dst, (fsub FR64:$src1, FR64:$src2))]>;
+def SUBSDrm : SDI<0x5C, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f64mem:$src2),
                 "subsd {$src2, $dst|$dst, $src2}",
-                [(set FR64:$dst, (fsub FR64:$src1, (loadf64 addr:$src2)))]>,
-              Requires<[HasSSE2]>, XD;
+                [(set FR64:$dst, (fsub FR64:$src1, (loadf64 addr:$src2)))]>;
 }
 
-def SQRTSSrr : I<0x51, MRMSrcReg, (ops FR32:$dst, FR32:$src),
+def SQRTSSrr : SSI<0x51, MRMSrcReg, (ops FR32:$dst, FR32:$src),
                  "sqrtss {$src, $dst|$dst, $src}",
-                 [(set FR32:$dst, (fsqrt FR32:$src))]>,
-               Requires<[HasSSE1]>, XS;
-def SQRTSSrm : I<0x51, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
+                 [(set FR32:$dst, (fsqrt FR32:$src))]>;
+def SQRTSSrm : SSI<0x51, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
                  "sqrtss {$src, $dst|$dst, $src}",
-                 [(set FR32:$dst, (fsqrt (loadf32 addr:$src)))]>,
-               Requires<[HasSSE1]>, XS;
-def SQRTSDrr : I<0x51, MRMSrcReg, (ops FR64:$dst, FR64:$src),
+                 [(set FR32:$dst, (fsqrt (loadf32 addr:$src)))]>;
+def SQRTSDrr : SDI<0x51, MRMSrcReg, (ops FR64:$dst, FR64:$src),
                  "sqrtsd {$src, $dst|$dst, $src}",
-                 [(set FR64:$dst, (fsqrt FR64:$src))]>,
-               Requires<[HasSSE2]>, XD;
-def SQRTSDrm : I<0x51, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
+                 [(set FR64:$dst, (fsqrt FR64:$src))]>;
+def SQRTSDrm : SDI<0x51, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
                  "sqrtsd {$src, $dst|$dst, $src}",
-                 [(set FR64:$dst, (fsqrt (loadf64 addr:$src)))]>,
-               Requires<[HasSSE2]>, XD;
+                 [(set FR64:$dst, (fsqrt (loadf64 addr:$src)))]>;
+
+def RSQRTSSrr : SSI<0x52, MRMSrcReg, (ops FR32:$dst, FR32:$src),
+                   "rsqrtss {$src, $dst|$dst, $src}", []>;
+def RSQRTSSrm : SSI<0x52, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
+                   "rsqrtss {$src, $dst|$dst, $src}", []>;
+def RCPSSrr : SSI<0x53, MRMSrcReg, (ops FR32:$dst, FR32:$src),
+                  "rcpss {$src, $dst|$dst, $src}", []>;
+def RCPSSrm : SSI<0x53, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
+                  "rcpss {$src, $dst|$dst, $src}", []>;
+
+def MAXSSrr : SSI<0x5F, MRMSrcReg, (ops FR32:$dst, FR32:$src),
+                  "maxss {$src, $dst|$dst, $src}", []>;
+def MAXSSrm : SSI<0x5F, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
+                  "maxss {$src, $dst|$dst, $src}", []>;
+def MAXSDrr : SDI<0x5F, MRMSrcReg, (ops FR64:$dst, FR64:$src),
+                  "maxsd {$src, $dst|$dst, $src}", []>;
+def MAXSDrm : SDI<0x5F, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
+                  "maxsd {$src, $dst|$dst, $src}", []>;
+def MINSSrr : SSI<0x5D, MRMSrcReg, (ops FR32:$dst, FR32:$src),
+                  "minss {$src, $dst|$dst, $src}", []>;
+def MINSSrm : SSI<0x5D, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
+                  "minss {$src, $dst|$dst, $src}", []>;
+def MINSDrr : SDI<0x5D, MRMSrcReg, (ops FR64:$dst, FR64:$src),
+                  "minsd {$src, $dst|$dst, $src}", []>;
+def MINSDrm : SDI<0x5D, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
+                  "minsd {$src, $dst|$dst, $src}", []>;
 
 // Comparison instructions
 let isTwoAddress = 1 in {
-def CMPSSrr : I<0xC2, MRMSrcReg, 
+def CMPSSrr : SSI<0xC2, MRMSrcReg, 
                 (ops FR32:$dst, FR32:$src1, FR32:$src, SSECC:$cc),
-                "cmp${cc}ss {$src, $dst|$dst, $src}", []>,
-              Requires<[HasSSE1]>, XS;
-def CMPSSrm : I<0xC2, MRMSrcMem, 
+                "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
+def CMPSSrm : SSI<0xC2, MRMSrcMem, 
                 (ops FR32:$dst, FR32:$src1, f32mem:$src, SSECC:$cc),
-                "cmp${cc}ss {$src, $dst|$dst, $src}", []>,
-              Requires<[HasSSE1]>, XS;
-def CMPSDrr : I<0xC2, MRMSrcReg, 
+                "cmp${cc}ss {$src, $dst|$dst, $src}", []>;
+def CMPSDrr : SDI<0xC2, MRMSrcReg, 
                 (ops FR64:$dst, FR64:$src1, FR64:$src, SSECC:$cc),
-                "cmp${cc}sd {$src, $dst|$dst, $src}", []>,
-              Requires<[HasSSE1]>, XD;
-def CMPSDrm : I<0xC2, MRMSrcMem, 
+                "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
+def CMPSDrm : SDI<0xC2, MRMSrcMem, 
                 (ops FR64:$dst, FR64:$src1, f64mem:$src, SSECC:$cc),
-                "cmp${cc}sd {$src, $dst|$dst, $src}", []>,
-              Requires<[HasSSE2]>, XD;
+                "cmp${cc}sd {$src, $dst|$dst, $src}", []>;
 }
 
-def UCOMISSrr: I<0x2E, MRMSrcReg, (ops FR32:$src1, FR32:$src2),
+def UCOMISSrr: PSI<0x2E, MRMSrcReg, (ops FR32:$src1, FR32:$src2),
                  "ucomiss {$src2, $src1|$src1, $src2}",
-                 [(X86cmp FR32:$src1, FR32:$src2)]>,
-               Requires<[HasSSE1]>, TB;
-def UCOMISSrm: I<0x2E, MRMSrcMem, (ops FR32:$src1, f32mem:$src2),
+                 [(X86cmp FR32:$src1, FR32:$src2)]>;
+def UCOMISSrm: PSI<0x2E, MRMSrcMem, (ops FR32:$src1, f32mem:$src2),
                  "ucomiss {$src2, $src1|$src1, $src2}",
-                 [(X86cmp FR32:$src1, (loadf32 addr:$src2))]>,
-               Requires<[HasSSE1]>, TB;
-def UCOMISDrr: I<0x2E, MRMSrcReg, (ops FR64:$src1, FR64:$src2),
+                 [(X86cmp FR32:$src1, (loadf32 addr:$src2))]>;
+def UCOMISDrr: PDI<0x2E, MRMSrcReg, (ops FR64:$src1, FR64:$src2),
                  "ucomisd {$src2, $src1|$src1, $src2}",
-                 [(X86cmp FR64:$src1, FR64:$src2)]>,
-               Requires<[HasSSE2]>, TB, OpSize;
-def UCOMISDrm: I<0x2E, MRMSrcMem, (ops FR64:$src1, f64mem:$src2),
+                 [(X86cmp FR64:$src1, FR64:$src2)]>;
+def UCOMISDrm: PDI<0x2E, MRMSrcMem, (ops FR64:$src1, f64mem:$src2),
                  "ucomisd {$src2, $src1|$src1, $src2}",
-                 [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>,
-               Requires<[HasSSE2]>, TB, OpSize;
+                 [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>;
 
 // Aliases of packed instructions for scalar use. These all have names that
 // start with 'Fs'.
@@ -254,89 +251,69 @@
 
 // Alias instructions to do FR32 / FR64 reg-to-reg copy using movaps / movapd.
 // Upper bits are disregarded.
-def FsMOVAPSrr : I<0x28, MRMSrcReg, (ops FR32:$dst, FR32:$src),
-                   "movaps {$src, $dst|$dst, $src}", []>,
-                 Requires<[HasSSE1]>, TB;
-def FsMOVAPDrr : I<0x28, MRMSrcReg, (ops FR64:$dst, FR64:$src),
-                   "movapd {$src, $dst|$dst, $src}", []>,
-                 Requires<[HasSSE2]>, TB, OpSize;
+def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (ops FR32:$dst, FR32:$src),
+                   "movaps {$src, $dst|$dst, $src}", []>;
+def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (ops FR64:$dst, FR64:$src),
+                   "movapd {$src, $dst|$dst, $src}", []>;
 
 // Alias instructions to load FR32 / FR64 from f128mem using movaps / movapd.
 // Upper bits are disregarded.
-def FsMOVAPSrm : I<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src),
+def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (ops FR32:$dst, f128mem:$src),
                    "movaps {$src, $dst|$dst, $src}",
-                   [(set FR32:$dst, (X86loadpf32 addr:$src))]>,
-                 Requires<[HasSSE1]>, TB;
-def FsMOVAPDrm : I<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src),
+                   [(set FR32:$dst, (X86loadpf32 addr:$src))]>;
+def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (ops FR64:$dst, f128mem:$src),
                   "movapd {$src, $dst|$dst, $src}",
-                  [(set FR64:$dst, (X86loadpf64 addr:$src))]>,
-                Requires<[HasSSE2]>, TB, OpSize;
+                  [(set FR64:$dst, (X86loadpf64 addr:$src))]>;
 
 // Alias bitwise logical operations using SSE logical ops on packed FP values.
 let isTwoAddress = 1 in {
 let isCommutable = 1 in {
-def FsANDPSrr : I<0x54, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
+def FsANDPSrr : PSI<0x54, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
                   "andps {$src2, $dst|$dst, $src2}",
-                  [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>,
-                Requires<[HasSSE1]>, TB;
-def FsANDPDrr : I<0x54, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
+                  [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>;
+def FsANDPDrr : PDI<0x54, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
                   "andpd {$src2, $dst|$dst, $src2}",
-                  [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>,
-                Requires<[HasSSE2]>, TB, OpSize;
-def FsORPSrr : I<0x56, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
-                 "orps {$src2, $dst|$dst, $src2}", []>,
-               Requires<[HasSSE1]>, TB;
-def FsORPDrr : I<0x56, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
-                 "orpd {$src2, $dst|$dst, $src2}", []>,
-               Requires<[HasSSE2]>, TB, OpSize;
-def FsXORPSrr : I<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
+                  [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>;
+def FsORPSrr  : PSI<0x56, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
+                  "orps {$src2, $dst|$dst, $src2}", []>;
+def FsORPDrr  : PDI<0x56, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
+                  "orpd {$src2, $dst|$dst, $src2}", []>;
+def FsXORPSrr : PSI<0x57, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
                   "xorps {$src2, $dst|$dst, $src2}",
-                  [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>,
-                Requires<[HasSSE1]>, TB;
-def FsXORPDrr : I<0x57, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
+                  [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>;
+def FsXORPDrr : PDI<0x57, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
                   "xorpd {$src2, $dst|$dst, $src2}",
-                  [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>,
-                Requires<[HasSSE2]>, TB, OpSize;
+                  [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>;
 }
-def FsANDPSrm : I<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
+def FsANDPSrm : PSI<0x54, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
                   "andps {$src2, $dst|$dst, $src2}",
                   [(set FR32:$dst, (X86fand FR32:$src1,
-                                    (X86loadpf32 addr:$src2)))]>,
-                Requires<[HasSSE1]>, TB;
-def FsANDPDrm : I<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
+                                    (X86loadpf32 addr:$src2)))]>;
+def FsANDPDrm : PDI<0x54, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
                   "andpd {$src2, $dst|$dst, $src2}",
                   [(set FR64:$dst, (X86fand FR64:$src1,
-                                    (X86loadpf64 addr:$src2)))]>,
-                Requires<[HasSSE2]>, TB, OpSize;
-def FsORPSrm : I<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
-                 "orps {$src2, $dst|$dst, $src2}", []>,
-               Requires<[HasSSE1]>, TB;
-def FsORPDrm : I<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
-                 "orpd {$src2, $dst|$dst, $src2}", []>,
-               Requires<[HasSSE2]>, TB, OpSize;
-def FsXORPSrm : I<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
+                                    (X86loadpf64 addr:$src2)))]>;
+def FsORPSrm  : PSI<0x56, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
+                  "orps {$src2, $dst|$dst, $src2}", []>;
+def FsORPDrm  : PDI<0x56, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
+                  "orpd {$src2, $dst|$dst, $src2}", []>;
+def FsXORPSrm : PSI<0x57, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
                   "xorps {$src2, $dst|$dst, $src2}",
                   [(set FR32:$dst, (X86fxor FR32:$src1,
-                                    (X86loadpf32 addr:$src2)))]>,
-                Requires<[HasSSE1]>, TB;
-def FsXORPDrm : I<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
+                                    (X86loadpf32 addr:$src2)))]>;
+def FsXORPDrm : PDI<0x57, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
                   "xorpd {$src2, $dst|$dst, $src2}",
                   [(set FR64:$dst, (X86fxor FR64:$src1,
-                                    (X86loadpf64 addr:$src2)))]>,
-                Requires<[HasSSE2]>, TB, OpSize;
+                                    (X86loadpf64 addr:$src2)))]>;
 
-def FsANDNPSrr : I<0x55, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
-                   "andnps {$src2, $dst|$dst, $src2}", []>,
-                 Requires<[HasSSE1]>, TB;
-def FsANDNPSrm : I<0x55, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
-                   "andnps {$src2, $dst|$dst, $src2}", []>,
-                 Requires<[HasSSE1]>, TB;
-def FsANDNPDrr : I<0x55, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
-                   "andnpd {$src2, $dst|$dst, $src2}", []>,
-                 Requires<[HasSSE2]>, TB, OpSize;
-def FsANDNPDrm : I<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
-                   "andnpd {$src2, $dst|$dst, $src2}", []>,
-                 Requires<[HasSSE2]>, TB, OpSize;
+def FsANDNPSrr : PSI<0x55, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
+                   "andnps {$src2, $dst|$dst, $src2}", []>;
+def FsANDNPSrm : PSI<0x55, MRMSrcMem, (ops FR32:$dst, FR32:$src1, f128mem:$src2),
+                   "andnps {$src2, $dst|$dst, $src2}", []>;
+def FsANDNPDrr : PDI<0x55, MRMSrcReg, (ops FR64:$dst, FR64:$src1, FR64:$src2),
+                   "andnpd {$src2, $dst|$dst, $src2}", []>;
+def FsANDNPDrm : PDI<0x55, MRMSrcMem, (ops FR64:$dst, FR64:$src1, f128mem:$src2),
+                   "andnpd {$src2, $dst|$dst, $src2}", []>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -344,114 +321,350 @@
 //===----------------------------------------------------------------------===//
 
 // Move Instructions
-def MOVAPSrr : I<0x28, MRMSrcReg, (ops V4F32:$dst, V4F32:$src),
-                "movaps {$src, $dst|$dst, $src}", []>,
-               Requires<[HasSSE1]>, TB;
-def MOVAPDrr : I<0x28, MRMSrcReg, (ops V2F64:$dst, V2F64:$src),
-                "movapd {$src, $dst|$dst, $src}", []>,
-               Requires<[HasSSE2]>, TB, OpSize;
+def MOVAPSrr : PSI<0x28, MRMSrcReg, (ops V4F32:$dst, V4F32:$src),
+                   "movaps {$src, $dst|$dst, $src}", []>;
+def MOVAPSrm : PSI<0x28, MRMSrcMem, (ops V4F32:$dst, f128mem:$src),
+                   "movaps {$src, $dst|$dst, $src}",
+                   [(set V4F32:$dst, (X86loadv4f32 addr:$src))]>;
+def MOVAPDrr : PDI<0x28, MRMSrcReg, (ops V2F64:$dst, V2F64:$src),
+                   "movapd {$src, $dst|$dst, $src}", []>;
+def MOVAPDrm : PDI<0x28, MRMSrcMem, (ops V2F64:$dst, f128mem:$src),
+                   "movapd {$src, $dst|$dst, $src}",
+                   [(set V2F64:$dst, (X86loadv2f64 addr:$src))]>;
 
-def MOVAPSrm : I<0x28, MRMSrcMem, (ops V4F32:$dst, f128mem:$src),
-                "movaps {$src, $dst|$dst, $src}", []>,
-               Requires<[HasSSE1]>, TB;
-def MOVAPSmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V4F32:$src),
-                "movaps {$src, $dst|$dst, $src}",[]>,
-               Requires<[HasSSE1]>, TB;
-def MOVAPDrm : I<0x28, MRMSrcMem, (ops V2F64:$dst, f128mem:$src),
-                "movapd {$src, $dst|$dst, $src}", []>,
-               Requires<[HasSSE1]>, TB, OpSize;
-def MOVAPDmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V2F64:$src),
-                "movapd {$src, $dst|$dst, $src}",[]>,
-               Requires<[HasSSE2]>, TB, OpSize;
+def MOVAPSmr : PSI<0x29, MRMDestMem, (ops f128mem:$dst, V4F32:$src),
+                   "movaps {$src, $dst|$dst, $src}",
+                   [(store V4F32:$src, addr:$dst)]>;
+def MOVAPDmr : PDI<0x29, MRMDestMem, (ops f128mem:$dst, V2F64:$src),
+                   "movapd {$src, $dst|$dst, $src}",
+                   [(store V2F64:$src, addr:$dst)]>;
+
+def MOVUPSrr : PSI<0x10, MRMSrcReg, (ops V4F32:$dst, V4F32:$src),
+                   "movups {$src, $dst|$dst, $src}", []>;
+def MOVUPSrm : PSI<0x10, MRMSrcMem, (ops V4F32:$dst, f128mem:$src),
+                   "movups {$src, $dst|$dst, $src}", []>;
+def MOVUPSmr : PSI<0x11, MRMDestMem, (ops f128mem:$dst, V4F32:$src),
+                   "movups {$src, $dst|$dst, $src}", []>;
+def MOVUPDrr : PDI<0x10, MRMSrcReg, (ops V2F64:$dst, V2F64:$src),
+                   "movupd {$src, $dst|$dst, $src}", []>;
+def MOVUPDrm : PDI<0x10, MRMSrcMem, (ops V2F64:$dst, f128mem:$src),
+                   "movupd {$src, $dst|$dst, $src}", []>;
+def MOVUPDmr : PDI<0x11, MRMDestMem, (ops f128mem:$dst, V2F64:$src),
+                   "movupd {$src, $dst|$dst, $src}", []>;
+
+def MOVLPSrm : PSI<0x12, MRMSrcMem, (ops V4F32:$dst, f64mem:$src),
+                   "movlps {$src, $dst|$dst, $src}", []>;
+def MOVLPSmr : PSI<0x13, MRMDestMem, (ops f64mem:$dst, V4F32:$src),
+                   "movlps {$src, $dst|$dst, $src}", []>;
+def MOVLPDrm : PDI<0x12, MRMSrcMem, (ops V2F64:$dst, f64mem:$src),
+                   "movlpd {$src, $dst|$dst, $src}", []>;
+def MOVLPDmr : PDI<0x13, MRMDestMem, (ops f64mem:$dst, V2F64:$src),
+                   "movlpd {$src, $dst|$dst, $src}", []>;
+
+def MOVHPSrm : PSI<0x16, MRMSrcMem, (ops V4F32:$dst, f64mem:$src),
+                   "movhps {$src, $dst|$dst, $src}", []>;
+def MOVHPSmr : PSI<0x17, MRMDestMem, (ops f64mem:$dst, V4F32:$src),
+                   "movhps {$src, $dst|$dst, $src}", []>;
+def MOVHPDrm : PDI<0x16, MRMSrcMem, (ops V2F64:$dst, f64mem:$src),
+                   "movhpd {$src, $dst|$dst, $src}", []>;
+def MOVHPDmr : PDI<0x17, MRMDestMem, (ops f64mem:$dst, V2F64:$src),
+                   "movhpd {$src, $dst|$dst, $src}", []>;
+
+def MOVLHPSrr : PSI<0x16, MRMSrcReg, (ops V4F32:$dst, V4F32:$src),
+                    "movlhps {$src, $dst|$dst, $src}", []>;
+def MOVHLPSrr : PSI<0x12, MRMSrcReg, (ops V4F32:$dst, V4F32:$src),
+                    "movlhps {$src, $dst|$dst, $src}", []>;
+
+def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, V4F32:$src),
+                     "movmskps {$src, $dst|$dst, $src}", []>;
+def MOVMSKPDrr : PSI<0x50, MRMSrcReg, (ops R32:$dst, V2F64:$src),
+                     "movmskpd {$src, $dst|$dst, $src}", []>;
+
+// Conversion instructions
+def CVTPI2PSrr : PSI<0x2A, MRMSrcReg, (ops V4F32:$dst, V2I32:$src),
+                     "cvtpi2ps {$src, $dst|$dst, $src}", []>;
+def CVTPI2PSrm : PSI<0x2A, MRMSrcMem, (ops V4F32:$dst, i64mem:$src),
+                     "cvtpi2ps {$src, $dst|$dst, $src}", []>;
+def CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (ops V2F64:$dst, V2I32:$src),
+                     "cvtpi2pd {$src, $dst|$dst, $src}", []>;
+def CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (ops V2F64:$dst, i64mem:$src),
+                     "cvtpi2pd {$src, $dst|$dst, $src}", []>;
+
+// SSE2 instructions without OpSize prefix
+def CVTDQ2PSrr : I<0x5B, MRMSrcReg, (ops V4F32:$dst, V4I32:$src),
+                   "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB,
+                 Requires<[HasSSE2]>;
+def CVTDQ2PSrm : I<0x5B, MRMSrcMem, (ops V4F32:$dst, i128mem:$src),
+                   "cvtdq2ps {$src, $dst|$dst, $src}", []>, TB,
+                 Requires<[HasSSE2]>;
+
+// SSE2 instructions with XS prefix
+def CVTDQ2PDrr : I<0xE6, MRMSrcReg, (ops V2F64:$dst, V2I32:$src),
+                   "cvtdq2pd {$src, $dst|$dst, $src}", []>,
+                 XS, Requires<[HasSSE2]>;
+def CVTDQ2PDrm : I<0xE6, MRMSrcMem, (ops V4F32:$dst, i64mem:$src),
+                   "cvtdq2pd {$src, $dst|$dst, $src}", []>,
+                 XS, Requires<[HasSSE2]>;
+
+def CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (ops V2I32:$dst, V4F32:$src),
+                    "cvtps2pi {$src, $dst|$dst, $src}", []>;
+def CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (ops V2I32:$dst, f64mem:$src),
+                    "cvtps2pi {$src, $dst|$dst, $src}", []>;
+def CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (ops V2I32:$dst, V2F64:$src),
+                    "cvtpd2pi {$src, $dst|$dst, $src}", []>;
+def CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (ops V2I32:$dst, f128mem:$src),
+                    "cvtpd2pi {$src, $dst|$dst, $src}", []>;
+
+def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (ops V4I32:$dst, V4F32:$src),
+                     "cvtps2dq {$src, $dst|$dst, $src}", []>;
+def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (ops V4I32:$dst, f128mem:$src),
+                     "cvtps2dq {$src, $dst|$dst, $src}", []>;
+// SSE2 packed instructions with XD prefix
+def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (ops V4I32:$dst, V2F64:$src),
+                     "cvtpd2dq {$src, $dst|$dst, $src}", []>;
+def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (ops V4I32:$dst, f128mem:$src),
+                     "cvtpd2dq {$src, $dst|$dst, $src}", []>;
+
+// SSE2 instructions without OpSize prefix
+def CVTPS2PDrr : I<0x5A, MRMSrcReg, (ops V4I32:$dst, V2F64:$src),
+                   "cvtps2pd {$src, $dst|$dst, $src}", []>, TB,
+                 Requires<[HasSSE2]>;
+def CVTPS2PDrm : I<0x5A, MRMSrcReg, (ops V4I32:$dst, f64mem:$src),
+                   "cvtps2pd {$src, $dst|$dst, $src}", []>, TB,
+                 Requires<[HasSSE2]>;
+
+def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (ops V4F32:$dst, V2F64:$src),
+                     "cvtpd2ps {$src, $dst|$dst, $src}", []>;
+def CVTPD2PSrm : PDI<0x5A, MRMSrcReg, (ops V4F32:$dst, f128mem:$src),
+                     "cvtpd2ps {$src, $dst|$dst, $src}", []>;
+
+// Arithmetic
+let isTwoAddress = 1 in {
+let isCommutable = 1 in {
+def ADDPSrr : PSI<0x58, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
+                  "addps {$src2, $dst|$dst, $src2}",
+                  [(set V4F32:$dst, (fadd V4F32:$src1, V4F32:$src2))]>;
+def ADDPDrr : PDI<0x58, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
+                  "addpd {$src2, $dst|$dst, $src2}",
+                  [(set V2F64:$dst, (fadd V2F64:$src1, V2F64:$src2))]>;
+def MULPSrr : PSI<0x59, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
+                  "mulps {$src2, $dst|$dst, $src2}",
+                  [(set V4F32:$dst, (fmul V4F32:$src1, V4F32:$src2))]>;
+def MULPDrr : PDI<0x59, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
+                  "mulpd {$src2, $dst|$dst, $src2}",
+                  [(set V2F64:$dst, (fmul V2F64:$src1, V2F64:$src2))]>;
+}
+
+def ADDPSrm : PSI<0x58, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
+                  "addps {$src2, $dst|$dst, $src2}",
+                  [(set V4F32:$dst, (fadd V4F32:$src1,
+                                    (X86loadv4f32 addr:$src2)))]>;
+def ADDPDrm : PDI<0x58, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
+                  "addpd {$src2, $dst|$dst, $src2}",
+                  [(set V2F64:$dst, (fadd V2F64:$src1,
+                                    (X86loadv2f64 addr:$src2)))]>;
+def MULPSrm : PSI<0x59, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
+                  "mulps {$src2, $dst|$dst, $src2}",
+                  [(set V4F32:$dst, (fmul V4F32:$src1,
+                                    (X86loadv4f32 addr:$src2)))]>;
+def MULPDrm : PDI<0x59, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
+                  "mulpd {$src2, $dst|$dst, $src2}",
+                  [(set V2F64:$dst, (fmul V2F64:$src1,
+                                    (X86loadv2f64 addr:$src2)))]>;
+
+def DIVPSrr : PSI<0x5E, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
+                "divps {$src2, $dst|$dst, $src2}",
+                [(set V4F32:$dst, (fdiv V4F32:$src1, V4F32:$src2))]>;
+def DIVPSrm : PSI<0x5E, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
+                "divps {$src2, $dst|$dst, $src2}",
+                [(set V4F32:$dst, (fdiv V4F32:$src1,
+                                  (X86loadv4f32 addr:$src2)))]>;
+def DIVPDrr : PDI<0x5E, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
+                "divpd {$src2, $dst|$dst, $src2}",
+                [(set V2F64:$dst, (fdiv V2F64:$src1, V2F64:$src2))]>;
+def DIVPDrm : PDI<0x5E, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
+                "divpd {$src2, $dst|$dst, $src2}",
+                [(set V2F64:$dst, (fdiv V2F64:$src1,
+                                  (X86loadv2f64 addr:$src2)))]>;
+
+def SUBPSrr : PSI<0x5C, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
+                "subps {$src2, $dst|$dst, $src2}",
+                [(set V4F32:$dst, (fsub V4F32:$src1, V4F32:$src2))]>;
+def SUBPSrm : PSI<0x5C, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
+                "subps {$src2, $dst|$dst, $src2}",
+                [(set V4F32:$dst, (fsub V4F32:$src1,
+                                  (X86loadv4f32 addr:$src2)))]>;
+def SUBPDrr : PDI<0x5C, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
+                "subpd {$src2, $dst|$dst, $src2}",
+                [(set V2F64:$dst, (fsub V2F64:$src1, V2F64:$src2))]>;
+def SUBPDrm : PDI<0x5C, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
+                "subpd {$src2, $dst|$dst, $src2}",
+                [(set V2F64:$dst, (fsub V2F64:$src1,
+                                  (X86loadv2f64 addr:$src2)))]>;
+}
+
+def SQRTPSrr : PSI<0x51, MRMSrcReg, (ops V4F32:$dst, V4F32:$src),
+                 "sqrtps {$src, $dst|$dst, $src}",
+                 [(set V4F32:$dst, (fsqrt V4F32:$src))]>;
+def SQRTPSrm : PSI<0x51, MRMSrcMem, (ops V4F32:$dst, f128mem:$src),
+                 "sqrtps {$src, $dst|$dst, $src}",
+                 [(set V4F32:$dst, (fsqrt (X86loadv4f32 addr:$src)))]>;
+def SQRTPDrr : PDI<0x51, MRMSrcReg, (ops V2F64:$dst, V2F64:$src),
+                 "sqrtpd {$src, $dst|$dst, $src}",
+                 [(set V2F64:$dst, (fsqrt V2F64:$src))]>;
+def SQRTPDrm : PDI<0x51, MRMSrcMem, (ops V2F64:$dst, f128mem:$src),
+                 "sqrtpd {$src, $dst|$dst, $src}",
+                 [(set V2F64:$dst, (fsqrt (X86loadv2f64 addr:$src)))]>;
+
+def RSQRTPSrr : PSI<0x52, MRMSrcReg, (ops V4F32:$dst, V4F32:$src),
+                 "rsqrtps {$src, $dst|$dst, $src}", []>;
+def RSQRTPSrm : PSI<0x52, MRMSrcMem, (ops V4F32:$dst, f128mem:$src),
+                 "rsqrtps {$src, $dst|$dst, $src}", []>;
+def RCPPSrr : PSI<0x53, MRMSrcReg, (ops V4F32:$dst, V4F32:$src),
+                  "rcpps {$src, $dst|$dst, $src}", []>;
+def RCPPSrm : PSI<0x53, MRMSrcMem, (ops V4F32:$dst, f128mem:$src),
+                  "rcpps {$src, $dst|$dst, $src}", []>;
+
+def MAXPSrr : PSI<0x5F, MRMSrcReg, (ops V4F32:$dst, V4F32:$src),
+                  "maxps {$src, $dst|$dst, $src}", []>;
+def MAXPSrm : PSI<0x5F, MRMSrcMem, (ops V4F32:$dst, f128mem:$src),
+                  "maxps {$src, $dst|$dst, $src}", []>;
+def MAXPDrr : PDI<0x5F, MRMSrcReg, (ops V2F64:$dst, V2F64:$src),
+                  "maxpd {$src, $dst|$dst, $src}", []>;
+def MAXPDrm : PDI<0x5F, MRMSrcMem, (ops V2F64:$dst, f128mem:$src),
+                  "maxpd {$src, $dst|$dst, $src}", []>;
+def MINPSrr : PSI<0x5D, MRMSrcReg, (ops V4F32:$dst, V4F32:$src),
+                  "minps {$src, $dst|$dst, $src}", []>;
+def MINPSrm : PSI<0x5D, MRMSrcMem, (ops V4F32:$dst, f128mem:$src),
+                  "minps {$src, $dst|$dst, $src}", []>;
+def MINPDrr : PDI<0x5D, MRMSrcReg, (ops V2F64:$dst, V2F64:$src),
+                  "minpd {$src, $dst|$dst, $src}", []>;
+def MINPDrm : PDI<0x5D, MRMSrcMem, (ops V2F64:$dst, f128mem:$src),
+                  "minpd {$src, $dst|$dst, $src}", []>;
 
 // Logical
 let isTwoAddress = 1 in {
 let isCommutable = 1 in {
-def ANDPSrr : I<0x54, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
+def ANDPSrr : PSI<0x54, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
                 "andps {$src2, $dst|$dst, $src2}",
-                [(set V4F32:$dst, (X86fand V4F32:$src1, V4F32:$src2))]>,
-              Requires<[HasSSE1]>, TB;
-def ANDPDrr : I<0x54, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
+                [(set V4F32:$dst, (X86fand V4F32:$src1, V4F32:$src2))]>;
+def ANDPDrr : PDI<0x54, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
                 "andpd {$src2, $dst|$dst, $src2}",
-                [(set V2F64:$dst, (X86fand V2F64:$src1, V2F64:$src2))]>,
-              Requires<[HasSSE2]>, TB, OpSize;
-def ORPSrr : I<0x56, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
-                "orps {$src2, $dst|$dst, $src2}", []>,
-             Requires<[HasSSE1]>, TB;
-def ORPDrr : I<0x56, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
-                "orpd {$src2, $dst|$dst, $src2}", []>,
-             Requires<[HasSSE2]>, TB, OpSize;
-def XORPSrr : I<0x57, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
+                [(set V2F64:$dst, (X86fand V2F64:$src1, V2F64:$src2))]>;
+def ORPSrr  : PSI<0x56, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
+                "orps {$src2, $dst|$dst, $src2}", []>;
+def ORPDrr  : PDI<0x56, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
+                "orpd {$src2, $dst|$dst, $src2}", []>;
+def XORPSrr : PSI<0x57, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
                 "xorps {$src2, $dst|$dst, $src2}",
-                [(set V4F32:$dst, (X86fxor V4F32:$src1, V4F32:$src2))]>,
-              Requires<[HasSSE1]>, TB;
-def XORPDrr : I<0x57, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
+                [(set V4F32:$dst, (X86fxor V4F32:$src1, V4F32:$src2))]>;
+def XORPDrr : PDI<0x57, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
                 "xorpd {$src2, $dst|$dst, $src2}",
-                [(set V2F64:$dst, (X86fxor V2F64:$src1, V2F64:$src2))]>,
-              Requires<[HasSSE2]>, TB, OpSize;
+                [(set V2F64:$dst, (X86fxor V2F64:$src1, V2F64:$src2))]>;
 }
-def ANDPSrm : I<0x54, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
+def ANDPSrm : PSI<0x54, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
                 "andps {$src2, $dst|$dst, $src2}",
                 [(set V4F32:$dst, (X86fand V4F32:$src1,
-                                  (X86loadpv4f32 addr:$src2)))]>,
-              Requires<[HasSSE1]>, TB;
-def ANDPDrm : I<0x54, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
+                                  (X86loadv4f32 addr:$src2)))]>;
+def ANDPDrm : PDI<0x54, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
                 "andpd {$src2, $dst|$dst, $src2}",
                 [(set V2F64:$dst, (X86fand V2F64:$src1,
-                                  (X86loadpv2f64 addr:$src2)))]>,
-              Requires<[HasSSE2]>, TB, OpSize;
-def ORPSrm : I<0x56, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
-                "orps {$src2, $dst|$dst, $src2}", []>,
-             Requires<[HasSSE1]>, TB;
-def ORPDrm : I<0x56, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
-                "orpd {$src2, $dst|$dst, $src2}", []>,
-             Requires<[HasSSE2]>, TB, OpSize;
-def XORPSrm : I<0x57, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
+                                  (X86loadv2f64 addr:$src2)))]>;
+def ORPSrm  : PSI<0x56, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
+                "orps {$src2, $dst|$dst, $src2}", []>;
+def ORPDrm  : PDI<0x56, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
+                "orpd {$src2, $dst|$dst, $src2}", []>;
+def XORPSrm : PSI<0x57, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
                 "xorps {$src2, $dst|$dst, $src2}",
                 [(set V4F32:$dst, (X86fxor V4F32:$src1,
-                                  (X86loadpv4f32 addr:$src2)))]>,
-              Requires<[HasSSE1]>, TB;
-def XORPDrm : I<0x57, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
+                                  (X86loadv4f32 addr:$src2)))]>;
+def XORPDrm : PDI<0x57, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
                 "xorpd {$src2, $dst|$dst, $src2}",
                 [(set V2F64:$dst, (X86fxor V2F64:$src1,
-                                  (X86loadpv2f64 addr:$src2)))]>,
-              Requires<[HasSSE2]>, TB, OpSize;
+                                  (X86loadv2f64 addr:$src2)))]>;
+def ANDNPSrr : PSI<0x55, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
+                "andnps {$src2, $dst|$dst, $src2}", []>;
+def ANDNPSrm : PSI<0x55, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
+                "andnps {$src2, $dst|$dst, $src2}", []>;
+def ANDNPDrr : PDI<0x55, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
+                "andnpd {$src2, $dst|$dst, $src2}", []>;
+def ANDNPDrm : PDI<0x55, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
+                "andnpd {$src2, $dst|$dst, $src2}", []>;
+}
 
-def ANDNPSrr : I<0x55, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
-                "andnps {$src2, $dst|$dst, $src2}", []>,
-               Requires<[HasSSE1]>, TB;
-def ANDNPSrm : I<0x55, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
-                "andnps {$src2, $dst|$dst, $src2}", []>,
-               Requires<[HasSSE1]>, TB;
-def ANDNPDrr : I<0x55, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
-                "andnpd {$src2, $dst|$dst, $src2}", []>,
-               Requires<[HasSSE2]>, TB, OpSize;
-def ANDNPDrm : I<0x55, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
-                "andnpd {$src2, $dst|$dst, $src2}", []>,
-               Requires<[HasSSE2]>, TB, OpSize;
+let isTwoAddress = 1 in {
+def CMPPSrr : PSI<0xC2, MRMSrcReg, 
+                (ops V4F32:$dst, V4F32:$src1, V4F32:$src, SSECC:$cc),
+                "cmp${cc}ps {$src, $dst|$dst, $src}", []>;
+def CMPPSrm : PSI<0xC2, MRMSrcMem, 
+                (ops V4F32:$dst, V4F32:$src1, f128mem:$src, SSECC:$cc),
+                "cmp${cc}ps {$src, $dst|$dst, $src}", []>;
+def CMPPDrr : PDI<0xC2, MRMSrcReg, 
+                (ops V2F64:$dst, V2F64:$src1, V2F64:$src, SSECC:$cc),
+                "cmp${cc}pd {$src, $dst|$dst, $src}", []>;
+def CMPPDrm : PDI<0xC2, MRMSrcMem, 
+                (ops V2F64:$dst, V2F64:$src1, f128mem:$src, SSECC:$cc),
+                "cmp${cc}pd {$src, $dst|$dst, $src}", []>;
 }
 
+// Shuffle and unpack instructions
+def SHUFPSrr : PSI<0xC6, MRMSrcReg, 
+                   (ops V4F32:$dst, V4F32:$src1, V4F32:$src2, i8imm:$src3),
+                   "shufps {$src3, $src2, $dst|$dst, $src2, $src3}", []>;
+def SHUFPSrm : PSI<0xC6, MRMSrcMem, 
+                   (ops V4F32:$dst, V4F32:$src1, f128mem:$src2, i8imm:$src3),
+                   "shufps {$src3, $src2, $dst|$dst, $src2, $src3}", []>;
+def SHUFPDrr : PDI<0xC6, MRMSrcReg, 
+                   (ops V2F64:$dst, V2F64:$src1, V2F64:$src2, i8imm:$src3),
+                   "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}", []>;
+def SHUFPDrm : PDI<0xC6, MRMSrcMem, 
+                   (ops V2F64:$dst, V2F64:$src1, f128mem:$src2, i8imm:$src3),
+                   "shufpd {$src3, $src2, $dst|$dst, $src2, $src3}", []>;
+
+def UNPCKHPSrr : PSI<0x15, MRMSrcReg, 
+                    (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
+                    "unpckhps {$src2, $dst|$dst, $src2}", []>;
+def UNPCKHPSrm : PSI<0x15, MRMSrcMem, 
+                    (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
+                    "unpckhps {$src2, $dst|$dst, $src2}", []>;
+def UNPCKHPDrr : PDI<0x15, MRMSrcReg, 
+                    (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
+                    "unpckhpd {$src2, $dst|$dst, $src2}", []>;
+def UNPCKHPDrm : PDI<0x15, MRMSrcMem, 
+                    (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
+                    "unpckhpd {$src2, $dst|$dst, $src2}", []>;
+def UNPCKLPSrr : PSI<0x14, MRMSrcReg, 
+                    (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
+                    "unpcklps {$src2, $dst|$dst, $src2}", []>;
+def UNPCKLPSrm : PSI<0x14, MRMSrcMem, 
+                    (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
+                    "unpcklps {$src2, $dst|$dst, $src2}", []>;
+def UNPCKLPDrr : PDI<0x14, MRMSrcReg, 
+                    (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
+                    "unpcklpd {$src2, $dst|$dst, $src2}", []>;
+def UNPCKLPDrm : PDI<0x14, MRMSrcMem, 
+                    (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
+                    "unpcklpd {$src2, $dst|$dst, $src2}", []>;
+
 //===----------------------------------------------------------------------===//
 // SSE integer instructions
 //===----------------------------------------------------------------------===//
 
 // Move Instructions
-def MOVD128rr : I<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src),
-                  "movd {$src, $dst|$dst, $src}", []>, TB, OpSize,
-                Requires<[HasSSE2]>;
-def MOVD128rm : I<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
-                  "movd {$src, $dst|$dst, $src}", []>, TB, OpSize,
-                Requires<[HasSSE2]>;
-def MOVD128mr : I<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
-                  "movd {$src, $dst|$dst, $src}", []>, TB, OpSize,
-                Requires<[HasSSE2]>;
+def MOVD128rr : PDI<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src),
+                  "movd {$src, $dst|$dst, $src}", []>;
+def MOVD128rm : PDI<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
+                  "movd {$src, $dst|$dst, $src}", []>;
+def MOVD128mr : PDI<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
+                  "movd {$src, $dst|$dst, $src}", []>;
 
+// SSE2 instructions with XS prefix
 def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src),
                   "movq {$src, $dst|$dst, $src}", []>, XS,
                 Requires<[HasSSE2]>;
 def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
                   "movq {$src, $dst|$dst, $src}", []>, XS,
                 Requires<[HasSSE2]>;
-def MOVQ128mr : I<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src),
-                  "movq {$src, $dst|$dst, $src}", []>, TB, OpSize,
-                Requires<[HasSSE2]>;
+
+def MOVQ128mr : PDI<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src),
+                  "movq {$src, $dst|$dst, $src}", []>;


Index: llvm/lib/Target/X86/X86IntelAsmPrinter.h
diff -u llvm/lib/Target/X86/X86IntelAsmPrinter.h:1.8 llvm/lib/Target/X86/X86IntelAsmPrinter.h:1.9
--- llvm/lib/Target/X86/X86IntelAsmPrinter.h:1.8	Fri Feb 17 18:15:05 2006
+++ llvm/lib/Target/X86/X86IntelAsmPrinter.h	Tue Feb 21 20:26:30 2006
@@ -65,6 +65,10 @@
     O << "QWORD PTR ";
     printMemReference(MI, OpNo);
   }
+  void printi128mem(const MachineInstr *MI, unsigned OpNo) {
+    O << "XMMWORD PTR ";
+    printMemReference(MI, OpNo);
+  }
   void printf32mem(const MachineInstr *MI, unsigned OpNo) {
     O << "DWORD PTR ";
     printMemReference(MI, OpNo);






More information about the llvm-commits mailing list