[llvm-commits] CVS: llvm/lib/Target/X86/X86InstrFPStack.td X86InstrInfo.td X86InstrMMX.td X86InstrSSE.td

Evan Cheng evan.cheng at apple.com
Tue Feb 21 12:00:31 PST 2006



Changes in directory llvm/lib/Target/X86:

X86InstrFPStack.td updated: 1.2 -> 1.3
X86InstrInfo.td updated: 1.245 -> 1.246
X86InstrMMX.td updated: 1.1 -> 1.2
X86InstrSSE.td updated: 1.3 -> 1.4
---
Log message:

One more round of reorg so sabre doesn't freak out. :-)


---
Diffs of the changes:  (+240 -222)

 X86InstrFPStack.td |   39 ++++++
 X86InstrInfo.td    |   94 +++-------------
 X86InstrMMX.td     |   22 ---
 X86InstrSSE.td     |  307 +++++++++++++++++++++++++++++++----------------------
 4 files changed, 240 insertions(+), 222 deletions(-)


Index: llvm/lib/Target/X86/X86InstrFPStack.td
diff -u llvm/lib/Target/X86/X86InstrFPStack.td:1.2 llvm/lib/Target/X86/X86InstrFPStack.td:1.3
--- llvm/lib/Target/X86/X86InstrFPStack.td:1.2	Tue Feb 21 13:26:52 2006
+++ llvm/lib/Target/X86/X86InstrFPStack.td	Tue Feb 21 14:00:20 2006
@@ -13,6 +13,26 @@
 //
 //===----------------------------------------------------------------------===//
 
+// Some 'special' instructions
+let usesCustomDAGSchedInserter = 1 in {  // Expanded by the scheduler.
+  def FP_TO_INT16_IN_MEM : I<0, Pseudo,
+                            (ops i16mem:$dst, RFP:$src),
+                           "#FP_TO_INT16_IN_MEM PSEUDO!",
+                           [(X86fp_to_i16mem RFP:$src, addr:$dst)]>;
+  def FP_TO_INT32_IN_MEM : I<0, Pseudo,
+                            (ops i32mem:$dst, RFP:$src),
+                           "#FP_TO_INT32_IN_MEM PSEUDO!",
+                           [(X86fp_to_i32mem RFP:$src, addr:$dst)]>;
+  def FP_TO_INT64_IN_MEM : I<0, Pseudo,
+                            (ops i64mem:$dst, RFP:$src),
+                           "#FP_TO_INT64_IN_MEM PSEUDO!",
+                           [(X86fp_to_i64mem RFP:$src, addr:$dst)]>;
+}
+
+let isTerminator = 1 in
+  let Defs = [FP0, FP1, FP2, FP3, FP4, FP5, FP6] in
+    def FP_REG_KILL  : I<0, Pseudo, (ops), "#FP_REG_KILL", []>;
+
 // All FP Stack operations are represented with two instructions here.  The
 // first instruction, generated by the instruction selector, uses "RFP"
 // registers: a traditional register file to reference floating point values.
@@ -379,3 +399,22 @@
                   (ops i16mem:$dst), "fnstcw $dst", []>;
 def FLDCW16m  : I<0xD9, MRM5m,                   // X87 control world = [mem16]
                   (ops i16mem:$dst), "fldcw $dst", []>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// Required for RET of f32 / f64 values.
+def : Pat<(X86fld addr:$src, f32), (FpLD32m addr:$src)>;
+def : Pat<(X86fld addr:$src, f64), (FpLD64m addr:$src)>;
+
+// Required for CALL which return f32 / f64 values.
+def : Pat<(X86fst RFP:$src, addr:$op, f32), (FpST32m addr:$op, RFP:$src)>;
+def : Pat<(X86fst RFP:$src, addr:$op, f64), (FpST64m addr:$op, RFP:$src)>;
+
+// Floating point constant -0.0 and -1.0
+def : Pat<(f64 fp64immneg0), (FpCHS (FpLD0))>, Requires<[FPStack]>;
+def : Pat<(f64 fp64immneg1), (FpCHS (FpLD1))>, Requires<[FPStack]>;
+
+// Used to conv. i64 to f64 since there isn't a SSE version.
+def : Pat<(X86fildflag addr:$src, i64), (FpILD64m addr:$src)>;


Index: llvm/lib/Target/X86/X86InstrInfo.td
diff -u llvm/lib/Target/X86/X86InstrInfo.td:1.245 llvm/lib/Target/X86/X86InstrInfo.td:1.246
--- llvm/lib/Target/X86/X86InstrInfo.td:1.245	Tue Feb 21 13:30:30 2006
+++ llvm/lib/Target/X86/X86InstrInfo.td	Tue Feb 21 14:00:20 2006
@@ -403,47 +403,6 @@
 def IMPLICIT_DEF_R32  : I<0, Pseudo, (ops R32:$dst),
                          "#IMPLICIT_DEF $dst",
                          [(set R32:$dst, (undef))]>;
-def IMPLICIT_DEF_FR32 : I<0, Pseudo, (ops FR32:$dst),
-                         "#IMPLICIT_DEF $dst",
-                         [(set FR32:$dst, (undef))]>, Requires<[HasSSE2]>;
-def IMPLICIT_DEF_FR64 : I<0, Pseudo, (ops FR64:$dst),
-                         "#IMPLICIT_DEF $dst",
-                         [(set FR64:$dst, (undef))]>, Requires<[HasSSE2]>;
-
-
-// CMOV* - Used to implement the SSE SELECT DAG operation.  Expanded by the
-// scheduler into a branch sequence.
-let usesCustomDAGSchedInserter = 1 in {  // Expanded by the scheduler.
-  def CMOV_FR32 : I<0, Pseudo,
-                    (ops FR32:$dst, FR32:$t, FR32:$f, i8imm:$cond),
-                    "#CMOV_FR32 PSEUDO!",
-                    [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond))]>;
-  def CMOV_FR64 : I<0, Pseudo,
-                    (ops FR64:$dst, FR64:$t, FR64:$f, i8imm:$cond),
-                    "#CMOV_FR64 PSEUDO!",
-                    [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond))]>;
-}
-
-let usesCustomDAGSchedInserter = 1 in {  // Expanded by the scheduler.
-  def FP_TO_INT16_IN_MEM : I<0, Pseudo,
-                            (ops i16mem:$dst, RFP:$src),
-                           "#FP_TO_INT16_IN_MEM PSEUDO!",
-                           [(X86fp_to_i16mem RFP:$src, addr:$dst)]>;
-  def FP_TO_INT32_IN_MEM : I<0, Pseudo,
-                            (ops i32mem:$dst, RFP:$src),
-                           "#FP_TO_INT32_IN_MEM PSEUDO!",
-                           [(X86fp_to_i32mem RFP:$src, addr:$dst)]>;
-  def FP_TO_INT64_IN_MEM : I<0, Pseudo,
-                            (ops i64mem:$dst, RFP:$src),
-                           "#FP_TO_INT64_IN_MEM PSEUDO!",
-                           [(X86fp_to_i64mem RFP:$src, addr:$dst)]>;
-}
-
-
-let isTerminator = 1 in
-  let Defs = [FP0, FP1, FP2, FP3, FP4, FP5, FP6] in
-    def FP_REG_KILL  : I<0, Pseudo, (ops), "#FP_REG_KILL", []>;
-
 
 // Nop
 def NOOP : I<0x90, RawFrm, (ops), "nop", []>;
@@ -1690,7 +1649,6 @@
 
 
 // Double shift instructions (generalizations of rotate)
-
 def SHLD32rrCL : I<0xA5, MRMDestReg, (ops R32:$dst, R32:$src1, R32:$src2),
                    "shld{l} {%cl, $src2, $dst|$dst, $src2, %CL}",
                    [(set R32:$dst, (X86shld R32:$src1, R32:$src2, CL))]>,
@@ -2390,24 +2348,6 @@
                  [(set R32:$dst, 0)]>;
 
 //===----------------------------------------------------------------------===//
-// Floating Point Stack Support
-//===----------------------------------------------------------------------===//
-
-include "X86InstrFPStack.td"
-
-//===----------------------------------------------------------------------===//
-// MMX and XMM Packed Integer support (requires MMX, SSE, and SSE2)
-//===----------------------------------------------------------------------===//
-
-include "X86InstrMMX.td"
-
-//===----------------------------------------------------------------------===//
-// XMM Floating point support (requires SSE / SSE2)
-//===----------------------------------------------------------------------===//
-
-include "X86InstrSSE.td"
-
-//===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
 //===----------------------------------------------------------------------===//
 
@@ -2460,21 +2400,6 @@
 def : Pat<(i32 (anyext R8 :$src)), (MOVZX32rr8  R8 :$src)>;
 def : Pat<(i32 (anyext R16:$src)), (MOVZX32rr16 R16:$src)>;
 
-// Required for RET of f32 / f64 values.
-def : Pat<(X86fld addr:$src, f32), (FpLD32m addr:$src)>;
-def : Pat<(X86fld addr:$src, f64), (FpLD64m addr:$src)>;
-
-// Required for CALL which return f32 / f64 values.
-def : Pat<(X86fst RFP:$src, addr:$op, f32), (FpST32m addr:$op, RFP:$src)>;
-def : Pat<(X86fst RFP:$src, addr:$op, f64), (FpST64m addr:$op, RFP:$src)>;
-
-// Floating point constant -0.0 and -1.0
-def : Pat<(f64 fp64immneg0), (FpCHS (FpLD0))>, Requires<[FPStack]>;
-def : Pat<(f64 fp64immneg1), (FpCHS (FpLD1))>, Requires<[FPStack]>;
-
-// Used to conv. i64 to f64 since there isn't a SSE version.
-def : Pat<(X86fildflag addr:$src, i64), (FpILD64m addr:$src)>;
-
 //===----------------------------------------------------------------------===//
 // Some peepholes
 //===----------------------------------------------------------------------===//
@@ -2519,3 +2444,22 @@
 def : Pat<(store (or (shl (loadi16 addr:$dst), CL:$amt),
                      (srl R16:$src2, (sub 16, CL:$amt))), addr:$dst),
           (SHLD16mrCL addr:$dst, R16:$src2)>;
+
+
+//===----------------------------------------------------------------------===//
+// Floating Point Stack Support
+//===----------------------------------------------------------------------===//
+
+include "X86InstrFPStack.td"
+
+//===----------------------------------------------------------------------===//
+// MMX and XMM Packed Integer support (requires MMX, SSE, and SSE2)
+//===----------------------------------------------------------------------===//
+
+include "X86InstrMMX.td"
+
+//===----------------------------------------------------------------------===//
+// XMM Floating point support (requires SSE / SSE2)
+//===----------------------------------------------------------------------===//
+
+include "X86InstrSSE.td"


Index: llvm/lib/Target/X86/X86InstrMMX.td
diff -u llvm/lib/Target/X86/X86InstrMMX.td:1.1 llvm/lib/Target/X86/X86InstrMMX.td:1.2
--- llvm/lib/Target/X86/X86InstrMMX.td:1.1	Tue Feb 21 13:13:53 2006
+++ llvm/lib/Target/X86/X86InstrMMX.td	Tue Feb 21 14:00:20 2006
@@ -24,17 +24,6 @@
                  "movd {$src, $dst|$dst, $src}", []>, TB,
                Requires<[HasMMX]>;
 
-def MOVD128rr : I<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src),
-                  "movd {$src, $dst|$dst, $src}", []>, TB, OpSize,
-                Requires<[HasSSE2]>;
-def MOVD128rm : I<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
-                  "movd {$src, $dst|$dst, $src}", []>, TB, OpSize,
-                Requires<[HasSSE2]>;
-def MOVD128mr : I<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
-                  "movd {$src, $dst|$dst, $src}", []>, TB, OpSize,
-                Requires<[HasSSE2]>;
-
-
 def MOVQ64rr : I<0x6F, MRMSrcReg, (ops VR64:$dst, VR64:$src),
                  "movq {$src, $dst|$dst, $src}", []>, TB,
                Requires<[HasMMX]>;
@@ -44,14 +33,3 @@
 def MOVQ64mr : I<0x7F, MRMDestMem, (ops i64mem:$dst, VR64:$src),
                  "movq {$src, $dst|$dst, $src}", []>, TB,
                Requires<[HasMMX]>;
-
-def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src),
-                  "movq {$src, $dst|$dst, $src}", []>, XS,
-                Requires<[HasSSE2]>;
-def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
-                  "movq {$src, $dst|$dst, $src}", []>, XS,
-                Requires<[HasSSE2]>;
-def MOVQ128mr : I<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src),
-                  "movq {$src, $dst|$dst, $src}", []>, TB, OpSize,
-                Requires<[HasSSE2]>;
-


Index: llvm/lib/Target/X86/X86InstrSSE.td
diff -u llvm/lib/Target/X86/X86InstrSSE.td:1.3 llvm/lib/Target/X86/X86InstrSSE.td:1.4
--- llvm/lib/Target/X86/X86InstrSSE.td:1.3	Tue Feb 21 13:30:30 2006
+++ llvm/lib/Target/X86/X86InstrSSE.td	Tue Feb 21 14:00:20 2006
@@ -13,93 +13,32 @@
 //
 //===----------------------------------------------------------------------===//
 
-def MOVAPSrr : I<0x28, MRMSrcReg, (ops V4F32:$dst, V4F32:$src),
-                "movaps {$src, $dst|$dst, $src}", []>,
-               Requires<[HasSSE1]>, TB;
-def MOVAPDrr : I<0x28, MRMSrcReg, (ops V2F64:$dst, V2F64:$src),
-                "movapd {$src, $dst|$dst, $src}", []>,
-               Requires<[HasSSE2]>, TB, OpSize;
-
-def MOVAPSrm : I<0x28, MRMSrcMem, (ops V4F32:$dst, f128mem:$src),
-                "movaps {$src, $dst|$dst, $src}", []>,
-               Requires<[HasSSE1]>, TB;
-def MOVAPSmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V4F32:$src),
-                "movaps {$src, $dst|$dst, $src}",[]>,
-               Requires<[HasSSE1]>, TB;
-def MOVAPDrm : I<0x28, MRMSrcMem, (ops V2F64:$dst, f128mem:$src),
-                "movapd {$src, $dst|$dst, $src}", []>,
-               Requires<[HasSSE1]>, TB, OpSize;
-def MOVAPDmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V2F64:$src),
-                "movapd {$src, $dst|$dst, $src}",[]>,
-               Requires<[HasSSE2]>, TB, OpSize;
-
-// Logical
-let isTwoAddress = 1 in {
-let isCommutable = 1 in {
-def ANDPSrr : I<0x54, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
-                "andps {$src2, $dst|$dst, $src2}",
-                [(set V4F32:$dst, (X86fand V4F32:$src1, V4F32:$src2))]>,
-              Requires<[HasSSE1]>, TB;
-def ANDPDrr : I<0x54, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
-                "andpd {$src2, $dst|$dst, $src2}",
-                [(set V2F64:$dst, (X86fand V2F64:$src1, V2F64:$src2))]>,
-              Requires<[HasSSE2]>, TB, OpSize;
-def ORPSrr : I<0x56, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
-                "orps {$src2, $dst|$dst, $src2}", []>,
-             Requires<[HasSSE1]>, TB;
-def ORPDrr : I<0x56, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
-                "orpd {$src2, $dst|$dst, $src2}", []>,
-             Requires<[HasSSE2]>, TB, OpSize;
-def XORPSrr : I<0x57, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
-                "xorps {$src2, $dst|$dst, $src2}",
-                [(set V4F32:$dst, (X86fxor V4F32:$src1, V4F32:$src2))]>,
-              Requires<[HasSSE1]>, TB;
-def XORPDrr : I<0x57, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
-                "xorpd {$src2, $dst|$dst, $src2}",
-                [(set V2F64:$dst, (X86fxor V2F64:$src1, V2F64:$src2))]>,
-              Requires<[HasSSE2]>, TB, OpSize;
-}
-def ANDPSrm : I<0x54, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
-                "andps {$src2, $dst|$dst, $src2}",
-                [(set V4F32:$dst, (X86fand V4F32:$src1,
-                                  (X86loadpv4f32 addr:$src2)))]>,
-              Requires<[HasSSE1]>, TB;
-def ANDPDrm : I<0x54, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
-                "andpd {$src2, $dst|$dst, $src2}",
-                [(set V2F64:$dst, (X86fand V2F64:$src1,
-                                  (X86loadpv2f64 addr:$src2)))]>,
-              Requires<[HasSSE2]>, TB, OpSize;
-def ORPSrm : I<0x56, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
-                "orps {$src2, $dst|$dst, $src2}", []>,
-             Requires<[HasSSE1]>, TB;
-def ORPDrm : I<0x56, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
-                "orpd {$src2, $dst|$dst, $src2}", []>,
-             Requires<[HasSSE2]>, TB, OpSize;
-def XORPSrm : I<0x57, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
-                "xorps {$src2, $dst|$dst, $src2}",
-                [(set V4F32:$dst, (X86fxor V4F32:$src1,
-                                  (X86loadpv4f32 addr:$src2)))]>,
-              Requires<[HasSSE1]>, TB;
-def XORPDrm : I<0x57, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
-                "xorpd {$src2, $dst|$dst, $src2}",
-                [(set V2F64:$dst, (X86fxor V2F64:$src1,
-                                  (X86loadpv2f64 addr:$src2)))]>,
-              Requires<[HasSSE2]>, TB, OpSize;
+//===----------------------------------------------------------------------===//
+// SSE scalar FP Instructions
+//===----------------------------------------------------------------------===//
 
-def ANDNPSrr : I<0x55, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
-                "andnps {$src2, $dst|$dst, $src2}", []>,
-               Requires<[HasSSE1]>, TB;
-def ANDNPSrm : I<0x55, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
-                "andnps {$src2, $dst|$dst, $src2}", []>,
-               Requires<[HasSSE1]>, TB;
-def ANDNPDrr : I<0x55, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
-                "andnpd {$src2, $dst|$dst, $src2}", []>,
-               Requires<[HasSSE2]>, TB, OpSize;
-def ANDNPDrm : I<0x55, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
-                "andnpd {$src2, $dst|$dst, $src2}", []>,
-               Requires<[HasSSE2]>, TB, OpSize;
+// Some 'special' instructions
+def IMPLICIT_DEF_FR32 : I<0, Pseudo, (ops FR32:$dst),
+                         "#IMPLICIT_DEF $dst",
+                         [(set FR32:$dst, (undef))]>, Requires<[HasSSE2]>;
+def IMPLICIT_DEF_FR64 : I<0, Pseudo, (ops FR64:$dst),
+                         "#IMPLICIT_DEF $dst",
+                         [(set FR64:$dst, (undef))]>, Requires<[HasSSE2]>;
+
+// CMOV* - Used to implement the SSE SELECT DAG operation.  Expanded by the
+// scheduler into a branch sequence.
+let usesCustomDAGSchedInserter = 1 in {  // Expanded by the scheduler.
+  def CMOV_FR32 : I<0, Pseudo,
+                    (ops FR32:$dst, FR32:$t, FR32:$f, i8imm:$cond),
+                    "#CMOV_FR32 PSEUDO!",
+                    [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond))]>;
+  def CMOV_FR64 : I<0, Pseudo,
+                    (ops FR64:$dst, FR64:$t, FR64:$f, i8imm:$cond),
+                    "#CMOV_FR64 PSEUDO!",
+                    [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond))]>;
 }
 
+// Move Instructions
 def MOVSSrr : I<0x10, MRMSrcReg, (ops FR32:$dst, FR32:$src),
                 "movss {$src, $dst|$dst, $src}", []>,
               Requires<[HasSSE1]>, XS;
@@ -124,6 +63,7 @@
                 [(store FR64:$src, addr:$dst)]>,
               Requires<[HasSSE2]>, XD;
 
+// Conversion instructions
 def CVTTSS2SIrr: I<0x2C, MRMSrcReg, (ops R32:$dst, FR32:$src),
                    "cvttss2si {$src, $dst|$dst, $src}",
                    [(set R32:$dst, (fp_to_sint FR32:$src))]>,
@@ -173,42 +113,8 @@
                   [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>,
                 Requires<[HasSSE2]>, XD;
 
-def SQRTSSrr : I<0x51, MRMSrcReg, (ops FR32:$dst, FR32:$src),
-                 "sqrtss {$src, $dst|$dst, $src}",
-                 [(set FR32:$dst, (fsqrt FR32:$src))]>,
-               Requires<[HasSSE1]>, XS;
-def SQRTSSrm : I<0x51, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
-                 "sqrtss {$src, $dst|$dst, $src}",
-                 [(set FR32:$dst, (fsqrt (loadf32 addr:$src)))]>,
-               Requires<[HasSSE1]>, XS;
-def SQRTSDrr : I<0x51, MRMSrcReg, (ops FR64:$dst, FR64:$src),
-                 "sqrtsd {$src, $dst|$dst, $src}",
-                 [(set FR64:$dst, (fsqrt FR64:$src))]>,
-               Requires<[HasSSE2]>, XD;
-def SQRTSDrm : I<0x51, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
-                 "sqrtsd {$src, $dst|$dst, $src}",
-                 [(set FR64:$dst, (fsqrt (loadf64 addr:$src)))]>,
-               Requires<[HasSSE2]>, XD;
-
-def UCOMISSrr: I<0x2E, MRMSrcReg, (ops FR32:$src1, FR32:$src2),
-                 "ucomiss {$src2, $src1|$src1, $src2}",
-                 [(X86cmp FR32:$src1, FR32:$src2)]>,
-               Requires<[HasSSE1]>, TB;
-def UCOMISSrm: I<0x2E, MRMSrcMem, (ops FR32:$src1, f32mem:$src2),
-                 "ucomiss {$src2, $src1|$src1, $src2}",
-                 [(X86cmp FR32:$src1, (loadf32 addr:$src2))]>,
-               Requires<[HasSSE1]>, TB;
-def UCOMISDrr: I<0x2E, MRMSrcReg, (ops FR64:$src1, FR64:$src2),
-                 "ucomisd {$src2, $src1|$src1, $src2}",
-                 [(X86cmp FR64:$src1, FR64:$src2)]>,
-               Requires<[HasSSE2]>, TB, OpSize;
-def UCOMISDrm: I<0x2E, MRMSrcMem, (ops FR64:$src1, f64mem:$src2),
-                 "ucomisd {$src2, $src1|$src1, $src2}",
-                 [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>,
-               Requires<[HasSSE2]>, TB, OpSize;
-
+// Arithmetic instructions
 let isTwoAddress = 1 in {
-// SSE Scalar Arithmetic
 let isCommutable = 1 in {
 def ADDSSrr : I<0x58, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
                 "addss {$src2, $dst|$dst, $src2}",
@@ -278,8 +184,27 @@
                 "subsd {$src2, $dst|$dst, $src2}",
                 [(set FR64:$dst, (fsub FR64:$src1, (loadf64 addr:$src2)))]>,
               Requires<[HasSSE2]>, XD;
+}
 
-// SSE compare
+def SQRTSSrr : I<0x51, MRMSrcReg, (ops FR32:$dst, FR32:$src),
+                 "sqrtss {$src, $dst|$dst, $src}",
+                 [(set FR32:$dst, (fsqrt FR32:$src))]>,
+               Requires<[HasSSE1]>, XS;
+def SQRTSSrm : I<0x51, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
+                 "sqrtss {$src, $dst|$dst, $src}",
+                 [(set FR32:$dst, (fsqrt (loadf32 addr:$src)))]>,
+               Requires<[HasSSE1]>, XS;
+def SQRTSDrr : I<0x51, MRMSrcReg, (ops FR64:$dst, FR64:$src),
+                 "sqrtsd {$src, $dst|$dst, $src}",
+                 [(set FR64:$dst, (fsqrt FR64:$src))]>,
+               Requires<[HasSSE2]>, XD;
+def SQRTSDrm : I<0x51, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
+                 "sqrtsd {$src, $dst|$dst, $src}",
+                 [(set FR64:$dst, (fsqrt (loadf64 addr:$src)))]>,
+               Requires<[HasSSE2]>, XD;
+
+// Comparison instructions
+let isTwoAddress = 1 in {
 def CMPSSrr : I<0xC2, MRMSrcReg, 
                 (ops FR32:$dst, FR32:$src1, FR32:$src, SSECC:$cc),
                 "cmp${cc}ss {$src, $dst|$dst, $src}", []>,
@@ -298,10 +223,25 @@
               Requires<[HasSSE2]>, XD;
 }
 
+def UCOMISSrr: I<0x2E, MRMSrcReg, (ops FR32:$src1, FR32:$src2),
+                 "ucomiss {$src2, $src1|$src1, $src2}",
+                 [(X86cmp FR32:$src1, FR32:$src2)]>,
+               Requires<[HasSSE1]>, TB;
+def UCOMISSrm: I<0x2E, MRMSrcMem, (ops FR32:$src1, f32mem:$src2),
+                 "ucomiss {$src2, $src1|$src1, $src2}",
+                 [(X86cmp FR32:$src1, (loadf32 addr:$src2))]>,
+               Requires<[HasSSE1]>, TB;
+def UCOMISDrr: I<0x2E, MRMSrcReg, (ops FR64:$src1, FR64:$src2),
+                 "ucomisd {$src2, $src1|$src1, $src2}",
+                 [(X86cmp FR64:$src1, FR64:$src2)]>,
+               Requires<[HasSSE2]>, TB, OpSize;
+def UCOMISDrm: I<0x2E, MRMSrcMem, (ops FR64:$src1, f64mem:$src2),
+                 "ucomisd {$src2, $src1|$src1, $src2}",
+                 [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>,
+               Requires<[HasSSE2]>, TB, OpSize;
 
-//===----------------------------------------------------------------------===//
-// Alias Instructions
-//===----------------------------------------------------------------------===//
+// Aliases of packed instructions for scalar use. These all have names that
+// start with 'Fs'.
 
 // Alias instructions that map fld0 to pxor for sse.
 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
@@ -314,10 +254,10 @@
 
 // Alias instructions to do FR32 / FR64 reg-to-reg copy using movaps / movapd.
 // Upper bits are disregarded.
-def FsMOVAPSrr : I<0x28, MRMSrcReg, (ops V4F32:$dst, V4F32:$src),
+def FsMOVAPSrr : I<0x28, MRMSrcReg, (ops FR32:$dst, FR32:$src),
                    "movaps {$src, $dst|$dst, $src}", []>,
                  Requires<[HasSSE1]>, TB;
-def FsMOVAPDrr : I<0x28, MRMSrcReg, (ops V2F64:$dst, V2F64:$src),
+def FsMOVAPDrr : I<0x28, MRMSrcReg, (ops FR64:$dst, FR64:$src),
                    "movapd {$src, $dst|$dst, $src}", []>,
                  Requires<[HasSSE2]>, TB, OpSize;
 
@@ -398,3 +338,120 @@
                    "andnpd {$src2, $dst|$dst, $src2}", []>,
                  Requires<[HasSSE2]>, TB, OpSize;
 }
+
+//===----------------------------------------------------------------------===//
+// SSE packed FP Instructions
+//===----------------------------------------------------------------------===//
+
+// Move Instructions
+def MOVAPSrr : I<0x28, MRMSrcReg, (ops V4F32:$dst, V4F32:$src),
+                "movaps {$src, $dst|$dst, $src}", []>,
+               Requires<[HasSSE1]>, TB;
+def MOVAPDrr : I<0x28, MRMSrcReg, (ops V2F64:$dst, V2F64:$src),
+                "movapd {$src, $dst|$dst, $src}", []>,
+               Requires<[HasSSE2]>, TB, OpSize;
+
+def MOVAPSrm : I<0x28, MRMSrcMem, (ops V4F32:$dst, f128mem:$src),
+                "movaps {$src, $dst|$dst, $src}", []>,
+               Requires<[HasSSE1]>, TB;
+def MOVAPSmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V4F32:$src),
+                "movaps {$src, $dst|$dst, $src}",[]>,
+               Requires<[HasSSE1]>, TB;
+def MOVAPDrm : I<0x28, MRMSrcMem, (ops V2F64:$dst, f128mem:$src),
+                "movapd {$src, $dst|$dst, $src}", []>,
+               Requires<[HasSSE1]>, TB, OpSize;
+def MOVAPDmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V2F64:$src),
+                "movapd {$src, $dst|$dst, $src}",[]>,
+               Requires<[HasSSE2]>, TB, OpSize;
+
+// Logical
+let isTwoAddress = 1 in {
+let isCommutable = 1 in {
+def ANDPSrr : I<0x54, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
+                "andps {$src2, $dst|$dst, $src2}",
+                [(set V4F32:$dst, (X86fand V4F32:$src1, V4F32:$src2))]>,
+              Requires<[HasSSE1]>, TB;
+def ANDPDrr : I<0x54, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
+                "andpd {$src2, $dst|$dst, $src2}",
+                [(set V2F64:$dst, (X86fand V2F64:$src1, V2F64:$src2))]>,
+              Requires<[HasSSE2]>, TB, OpSize;
+def ORPSrr : I<0x56, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
+                "orps {$src2, $dst|$dst, $src2}", []>,
+             Requires<[HasSSE1]>, TB;
+def ORPDrr : I<0x56, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
+                "orpd {$src2, $dst|$dst, $src2}", []>,
+             Requires<[HasSSE2]>, TB, OpSize;
+def XORPSrr : I<0x57, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
+                "xorps {$src2, $dst|$dst, $src2}",
+                [(set V4F32:$dst, (X86fxor V4F32:$src1, V4F32:$src2))]>,
+              Requires<[HasSSE1]>, TB;
+def XORPDrr : I<0x57, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
+                "xorpd {$src2, $dst|$dst, $src2}",
+                [(set V2F64:$dst, (X86fxor V2F64:$src1, V2F64:$src2))]>,
+              Requires<[HasSSE2]>, TB, OpSize;
+}
+def ANDPSrm : I<0x54, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
+                "andps {$src2, $dst|$dst, $src2}",
+                [(set V4F32:$dst, (X86fand V4F32:$src1,
+                                  (X86loadpv4f32 addr:$src2)))]>,
+              Requires<[HasSSE1]>, TB;
+def ANDPDrm : I<0x54, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
+                "andpd {$src2, $dst|$dst, $src2}",
+                [(set V2F64:$dst, (X86fand V2F64:$src1,
+                                  (X86loadpv2f64 addr:$src2)))]>,
+              Requires<[HasSSE2]>, TB, OpSize;
+def ORPSrm : I<0x56, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
+                "orps {$src2, $dst|$dst, $src2}", []>,
+             Requires<[HasSSE1]>, TB;
+def ORPDrm : I<0x56, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
+                "orpd {$src2, $dst|$dst, $src2}", []>,
+             Requires<[HasSSE2]>, TB, OpSize;
+def XORPSrm : I<0x57, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
+                "xorps {$src2, $dst|$dst, $src2}",
+                [(set V4F32:$dst, (X86fxor V4F32:$src1,
+                                  (X86loadpv4f32 addr:$src2)))]>,
+              Requires<[HasSSE1]>, TB;
+def XORPDrm : I<0x57, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
+                "xorpd {$src2, $dst|$dst, $src2}",
+                [(set V2F64:$dst, (X86fxor V2F64:$src1,
+                                  (X86loadpv2f64 addr:$src2)))]>,
+              Requires<[HasSSE2]>, TB, OpSize;
+
+def ANDNPSrr : I<0x55, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
+                "andnps {$src2, $dst|$dst, $src2}", []>,
+               Requires<[HasSSE1]>, TB;
+def ANDNPSrm : I<0x55, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
+                "andnps {$src2, $dst|$dst, $src2}", []>,
+               Requires<[HasSSE1]>, TB;
+def ANDNPDrr : I<0x55, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
+                "andnpd {$src2, $dst|$dst, $src2}", []>,
+               Requires<[HasSSE2]>, TB, OpSize;
+def ANDNPDrm : I<0x55, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
+                "andnpd {$src2, $dst|$dst, $src2}", []>,
+               Requires<[HasSSE2]>, TB, OpSize;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE integer instructions
+//===----------------------------------------------------------------------===//
+
+// Move Instructions
+def MOVD128rr : I<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src),
+                  "movd {$src, $dst|$dst, $src}", []>, TB, OpSize,
+                Requires<[HasSSE2]>;
+def MOVD128rm : I<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
+                  "movd {$src, $dst|$dst, $src}", []>, TB, OpSize,
+                Requires<[HasSSE2]>;
+def MOVD128mr : I<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
+                  "movd {$src, $dst|$dst, $src}", []>, TB, OpSize,
+                Requires<[HasSSE2]>;
+
+def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src),
+                  "movq {$src, $dst|$dst, $src}", []>, XS,
+                Requires<[HasSSE2]>;
+def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
+                  "movq {$src, $dst|$dst, $src}", []>, XS,
+                Requires<[HasSSE2]>;
+def MOVQ128mr : I<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src),
+                  "movq {$src, $dst|$dst, $src}", []>, TB, OpSize,
+                Requires<[HasSSE2]>;






More information about the llvm-commits mailing list