[llvm-commits] CVS: llvm/lib/Target/X86/X86InstrFPStack.td X86InstrInfo.td X86InstrMMX.td X86InstrSSE.td
Evan Cheng
evan.cheng at apple.com
Tue Feb 21 12:00:31 PST 2006
Changes in directory llvm/lib/Target/X86:
X86InstrFPStack.td updated: 1.2 -> 1.3
X86InstrInfo.td updated: 1.245 -> 1.246
X86InstrMMX.td updated: 1.1 -> 1.2
X86InstrSSE.td updated: 1.3 -> 1.4
---
Log message:
One more round of reorg so sabre doesn't freak out. :-)
---
Diffs of the changes: (+240 -222)
X86InstrFPStack.td | 39 ++++++
X86InstrInfo.td | 94 +++-------------
X86InstrMMX.td | 22 ---
X86InstrSSE.td | 307 +++++++++++++++++++++++++++++++----------------------
4 files changed, 240 insertions(+), 222 deletions(-)
Index: llvm/lib/Target/X86/X86InstrFPStack.td
diff -u llvm/lib/Target/X86/X86InstrFPStack.td:1.2 llvm/lib/Target/X86/X86InstrFPStack.td:1.3
--- llvm/lib/Target/X86/X86InstrFPStack.td:1.2 Tue Feb 21 13:26:52 2006
+++ llvm/lib/Target/X86/X86InstrFPStack.td Tue Feb 21 14:00:20 2006
@@ -13,6 +13,26 @@
//
//===----------------------------------------------------------------------===//
+// Some 'special' instructions
+let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
+ def FP_TO_INT16_IN_MEM : I<0, Pseudo,
+ (ops i16mem:$dst, RFP:$src),
+ "#FP_TO_INT16_IN_MEM PSEUDO!",
+ [(X86fp_to_i16mem RFP:$src, addr:$dst)]>;
+ def FP_TO_INT32_IN_MEM : I<0, Pseudo,
+ (ops i32mem:$dst, RFP:$src),
+ "#FP_TO_INT32_IN_MEM PSEUDO!",
+ [(X86fp_to_i32mem RFP:$src, addr:$dst)]>;
+ def FP_TO_INT64_IN_MEM : I<0, Pseudo,
+ (ops i64mem:$dst, RFP:$src),
+ "#FP_TO_INT64_IN_MEM PSEUDO!",
+ [(X86fp_to_i64mem RFP:$src, addr:$dst)]>;
+}
+
+let isTerminator = 1 in
+ let Defs = [FP0, FP1, FP2, FP3, FP4, FP5, FP6] in
+ def FP_REG_KILL : I<0, Pseudo, (ops), "#FP_REG_KILL", []>;
+
// All FP Stack operations are represented with two instructions here. The
// first instruction, generated by the instruction selector, uses "RFP"
// registers: a traditional register file to reference floating point values.
@@ -379,3 +399,22 @@
(ops i16mem:$dst), "fnstcw $dst", []>;
def FLDCW16m : I<0xD9, MRM5m, // X87 control world = [mem16]
(ops i16mem:$dst), "fldcw $dst", []>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// Required for RET of f32 / f64 values.
+def : Pat<(X86fld addr:$src, f32), (FpLD32m addr:$src)>;
+def : Pat<(X86fld addr:$src, f64), (FpLD64m addr:$src)>;
+
+// Required for CALL which return f32 / f64 values.
+def : Pat<(X86fst RFP:$src, addr:$op, f32), (FpST32m addr:$op, RFP:$src)>;
+def : Pat<(X86fst RFP:$src, addr:$op, f64), (FpST64m addr:$op, RFP:$src)>;
+
+// Floating point constant -0.0 and -1.0
+def : Pat<(f64 fp64immneg0), (FpCHS (FpLD0))>, Requires<[FPStack]>;
+def : Pat<(f64 fp64immneg1), (FpCHS (FpLD1))>, Requires<[FPStack]>;
+
+// Used to conv. i64 to f64 since there isn't a SSE version.
+def : Pat<(X86fildflag addr:$src, i64), (FpILD64m addr:$src)>;
Index: llvm/lib/Target/X86/X86InstrInfo.td
diff -u llvm/lib/Target/X86/X86InstrInfo.td:1.245 llvm/lib/Target/X86/X86InstrInfo.td:1.246
--- llvm/lib/Target/X86/X86InstrInfo.td:1.245 Tue Feb 21 13:30:30 2006
+++ llvm/lib/Target/X86/X86InstrInfo.td Tue Feb 21 14:00:20 2006
@@ -403,47 +403,6 @@
def IMPLICIT_DEF_R32 : I<0, Pseudo, (ops R32:$dst),
"#IMPLICIT_DEF $dst",
[(set R32:$dst, (undef))]>;
-def IMPLICIT_DEF_FR32 : I<0, Pseudo, (ops FR32:$dst),
- "#IMPLICIT_DEF $dst",
- [(set FR32:$dst, (undef))]>, Requires<[HasSSE2]>;
-def IMPLICIT_DEF_FR64 : I<0, Pseudo, (ops FR64:$dst),
- "#IMPLICIT_DEF $dst",
- [(set FR64:$dst, (undef))]>, Requires<[HasSSE2]>;
-
-
-// CMOV* - Used to implement the SSE SELECT DAG operation. Expanded by the
-// scheduler into a branch sequence.
-let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
- def CMOV_FR32 : I<0, Pseudo,
- (ops FR32:$dst, FR32:$t, FR32:$f, i8imm:$cond),
- "#CMOV_FR32 PSEUDO!",
- [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond))]>;
- def CMOV_FR64 : I<0, Pseudo,
- (ops FR64:$dst, FR64:$t, FR64:$f, i8imm:$cond),
- "#CMOV_FR64 PSEUDO!",
- [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond))]>;
-}
-
-let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
- def FP_TO_INT16_IN_MEM : I<0, Pseudo,
- (ops i16mem:$dst, RFP:$src),
- "#FP_TO_INT16_IN_MEM PSEUDO!",
- [(X86fp_to_i16mem RFP:$src, addr:$dst)]>;
- def FP_TO_INT32_IN_MEM : I<0, Pseudo,
- (ops i32mem:$dst, RFP:$src),
- "#FP_TO_INT32_IN_MEM PSEUDO!",
- [(X86fp_to_i32mem RFP:$src, addr:$dst)]>;
- def FP_TO_INT64_IN_MEM : I<0, Pseudo,
- (ops i64mem:$dst, RFP:$src),
- "#FP_TO_INT64_IN_MEM PSEUDO!",
- [(X86fp_to_i64mem RFP:$src, addr:$dst)]>;
-}
-
-
-let isTerminator = 1 in
- let Defs = [FP0, FP1, FP2, FP3, FP4, FP5, FP6] in
- def FP_REG_KILL : I<0, Pseudo, (ops), "#FP_REG_KILL", []>;
-
// Nop
def NOOP : I<0x90, RawFrm, (ops), "nop", []>;
@@ -1690,7 +1649,6 @@
// Double shift instructions (generalizations of rotate)
-
def SHLD32rrCL : I<0xA5, MRMDestReg, (ops R32:$dst, R32:$src1, R32:$src2),
"shld{l} {%cl, $src2, $dst|$dst, $src2, %CL}",
[(set R32:$dst, (X86shld R32:$src1, R32:$src2, CL))]>,
@@ -2390,24 +2348,6 @@
[(set R32:$dst, 0)]>;
//===----------------------------------------------------------------------===//
-// Floating Point Stack Support
-//===----------------------------------------------------------------------===//
-
-include "X86InstrFPStack.td"
-
-//===----------------------------------------------------------------------===//
-// MMX and XMM Packed Integer support (requires MMX, SSE, and SSE2)
-//===----------------------------------------------------------------------===//
-
-include "X86InstrMMX.td"
-
-//===----------------------------------------------------------------------===//
-// XMM Floating point support (requires SSE / SSE2)
-//===----------------------------------------------------------------------===//
-
-include "X86InstrSSE.td"
-
-//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//===----------------------------------------------------------------------===//
@@ -2460,21 +2400,6 @@
def : Pat<(i32 (anyext R8 :$src)), (MOVZX32rr8 R8 :$src)>;
def : Pat<(i32 (anyext R16:$src)), (MOVZX32rr16 R16:$src)>;
-// Required for RET of f32 / f64 values.
-def : Pat<(X86fld addr:$src, f32), (FpLD32m addr:$src)>;
-def : Pat<(X86fld addr:$src, f64), (FpLD64m addr:$src)>;
-
-// Required for CALL which return f32 / f64 values.
-def : Pat<(X86fst RFP:$src, addr:$op, f32), (FpST32m addr:$op, RFP:$src)>;
-def : Pat<(X86fst RFP:$src, addr:$op, f64), (FpST64m addr:$op, RFP:$src)>;
-
-// Floating point constant -0.0 and -1.0
-def : Pat<(f64 fp64immneg0), (FpCHS (FpLD0))>, Requires<[FPStack]>;
-def : Pat<(f64 fp64immneg1), (FpCHS (FpLD1))>, Requires<[FPStack]>;
-
-// Used to conv. i64 to f64 since there isn't a SSE version.
-def : Pat<(X86fildflag addr:$src, i64), (FpILD64m addr:$src)>;
-
//===----------------------------------------------------------------------===//
// Some peepholes
//===----------------------------------------------------------------------===//
@@ -2519,3 +2444,22 @@
def : Pat<(store (or (shl (loadi16 addr:$dst), CL:$amt),
(srl R16:$src2, (sub 16, CL:$amt))), addr:$dst),
(SHLD16mrCL addr:$dst, R16:$src2)>;
+
+
+//===----------------------------------------------------------------------===//
+// Floating Point Stack Support
+//===----------------------------------------------------------------------===//
+
+include "X86InstrFPStack.td"
+
+//===----------------------------------------------------------------------===//
+// MMX and XMM Packed Integer support (requires MMX, SSE, and SSE2)
+//===----------------------------------------------------------------------===//
+
+include "X86InstrMMX.td"
+
+//===----------------------------------------------------------------------===//
+// XMM Floating point support (requires SSE / SSE2)
+//===----------------------------------------------------------------------===//
+
+include "X86InstrSSE.td"
Index: llvm/lib/Target/X86/X86InstrMMX.td
diff -u llvm/lib/Target/X86/X86InstrMMX.td:1.1 llvm/lib/Target/X86/X86InstrMMX.td:1.2
--- llvm/lib/Target/X86/X86InstrMMX.td:1.1 Tue Feb 21 13:13:53 2006
+++ llvm/lib/Target/X86/X86InstrMMX.td Tue Feb 21 14:00:20 2006
@@ -24,17 +24,6 @@
"movd {$src, $dst|$dst, $src}", []>, TB,
Requires<[HasMMX]>;
-def MOVD128rr : I<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src),
- "movd {$src, $dst|$dst, $src}", []>, TB, OpSize,
- Requires<[HasSSE2]>;
-def MOVD128rm : I<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
- "movd {$src, $dst|$dst, $src}", []>, TB, OpSize,
- Requires<[HasSSE2]>;
-def MOVD128mr : I<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
- "movd {$src, $dst|$dst, $src}", []>, TB, OpSize,
- Requires<[HasSSE2]>;
-
-
def MOVQ64rr : I<0x6F, MRMSrcReg, (ops VR64:$dst, VR64:$src),
"movq {$src, $dst|$dst, $src}", []>, TB,
Requires<[HasMMX]>;
@@ -44,14 +33,3 @@
def MOVQ64mr : I<0x7F, MRMDestMem, (ops i64mem:$dst, VR64:$src),
"movq {$src, $dst|$dst, $src}", []>, TB,
Requires<[HasMMX]>;
-
-def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src),
- "movq {$src, $dst|$dst, $src}", []>, XS,
- Requires<[HasSSE2]>;
-def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
- "movq {$src, $dst|$dst, $src}", []>, XS,
- Requires<[HasSSE2]>;
-def MOVQ128mr : I<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src),
- "movq {$src, $dst|$dst, $src}", []>, TB, OpSize,
- Requires<[HasSSE2]>;
-
Index: llvm/lib/Target/X86/X86InstrSSE.td
diff -u llvm/lib/Target/X86/X86InstrSSE.td:1.3 llvm/lib/Target/X86/X86InstrSSE.td:1.4
--- llvm/lib/Target/X86/X86InstrSSE.td:1.3 Tue Feb 21 13:30:30 2006
+++ llvm/lib/Target/X86/X86InstrSSE.td Tue Feb 21 14:00:20 2006
@@ -13,93 +13,32 @@
//
//===----------------------------------------------------------------------===//
-def MOVAPSrr : I<0x28, MRMSrcReg, (ops V4F32:$dst, V4F32:$src),
- "movaps {$src, $dst|$dst, $src}", []>,
- Requires<[HasSSE1]>, TB;
-def MOVAPDrr : I<0x28, MRMSrcReg, (ops V2F64:$dst, V2F64:$src),
- "movapd {$src, $dst|$dst, $src}", []>,
- Requires<[HasSSE2]>, TB, OpSize;
-
-def MOVAPSrm : I<0x28, MRMSrcMem, (ops V4F32:$dst, f128mem:$src),
- "movaps {$src, $dst|$dst, $src}", []>,
- Requires<[HasSSE1]>, TB;
-def MOVAPSmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V4F32:$src),
- "movaps {$src, $dst|$dst, $src}",[]>,
- Requires<[HasSSE1]>, TB;
-def MOVAPDrm : I<0x28, MRMSrcMem, (ops V2F64:$dst, f128mem:$src),
- "movapd {$src, $dst|$dst, $src}", []>,
- Requires<[HasSSE1]>, TB, OpSize;
-def MOVAPDmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V2F64:$src),
- "movapd {$src, $dst|$dst, $src}",[]>,
- Requires<[HasSSE2]>, TB, OpSize;
-
-// Logical
-let isTwoAddress = 1 in {
-let isCommutable = 1 in {
-def ANDPSrr : I<0x54, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
- "andps {$src2, $dst|$dst, $src2}",
- [(set V4F32:$dst, (X86fand V4F32:$src1, V4F32:$src2))]>,
- Requires<[HasSSE1]>, TB;
-def ANDPDrr : I<0x54, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
- "andpd {$src2, $dst|$dst, $src2}",
- [(set V2F64:$dst, (X86fand V2F64:$src1, V2F64:$src2))]>,
- Requires<[HasSSE2]>, TB, OpSize;
-def ORPSrr : I<0x56, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
- "orps {$src2, $dst|$dst, $src2}", []>,
- Requires<[HasSSE1]>, TB;
-def ORPDrr : I<0x56, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
- "orpd {$src2, $dst|$dst, $src2}", []>,
- Requires<[HasSSE2]>, TB, OpSize;
-def XORPSrr : I<0x57, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
- "xorps {$src2, $dst|$dst, $src2}",
- [(set V4F32:$dst, (X86fxor V4F32:$src1, V4F32:$src2))]>,
- Requires<[HasSSE1]>, TB;
-def XORPDrr : I<0x57, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
- "xorpd {$src2, $dst|$dst, $src2}",
- [(set V2F64:$dst, (X86fxor V2F64:$src1, V2F64:$src2))]>,
- Requires<[HasSSE2]>, TB, OpSize;
-}
-def ANDPSrm : I<0x54, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
- "andps {$src2, $dst|$dst, $src2}",
- [(set V4F32:$dst, (X86fand V4F32:$src1,
- (X86loadpv4f32 addr:$src2)))]>,
- Requires<[HasSSE1]>, TB;
-def ANDPDrm : I<0x54, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
- "andpd {$src2, $dst|$dst, $src2}",
- [(set V2F64:$dst, (X86fand V2F64:$src1,
- (X86loadpv2f64 addr:$src2)))]>,
- Requires<[HasSSE2]>, TB, OpSize;
-def ORPSrm : I<0x56, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
- "orps {$src2, $dst|$dst, $src2}", []>,
- Requires<[HasSSE1]>, TB;
-def ORPDrm : I<0x56, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
- "orpd {$src2, $dst|$dst, $src2}", []>,
- Requires<[HasSSE2]>, TB, OpSize;
-def XORPSrm : I<0x57, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
- "xorps {$src2, $dst|$dst, $src2}",
- [(set V4F32:$dst, (X86fxor V4F32:$src1,
- (X86loadpv4f32 addr:$src2)))]>,
- Requires<[HasSSE1]>, TB;
-def XORPDrm : I<0x57, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
- "xorpd {$src2, $dst|$dst, $src2}",
- [(set V2F64:$dst, (X86fxor V2F64:$src1,
- (X86loadpv2f64 addr:$src2)))]>,
- Requires<[HasSSE2]>, TB, OpSize;
+//===----------------------------------------------------------------------===//
+// SSE scalar FP Instructions
+//===----------------------------------------------------------------------===//
-def ANDNPSrr : I<0x55, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
- "andnps {$src2, $dst|$dst, $src2}", []>,
- Requires<[HasSSE1]>, TB;
-def ANDNPSrm : I<0x55, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
- "andnps {$src2, $dst|$dst, $src2}", []>,
- Requires<[HasSSE1]>, TB;
-def ANDNPDrr : I<0x55, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
- "andnpd {$src2, $dst|$dst, $src2}", []>,
- Requires<[HasSSE2]>, TB, OpSize;
-def ANDNPDrm : I<0x55, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
- "andnpd {$src2, $dst|$dst, $src2}", []>,
- Requires<[HasSSE2]>, TB, OpSize;
+// Some 'special' instructions
+def IMPLICIT_DEF_FR32 : I<0, Pseudo, (ops FR32:$dst),
+ "#IMPLICIT_DEF $dst",
+ [(set FR32:$dst, (undef))]>, Requires<[HasSSE2]>;
+def IMPLICIT_DEF_FR64 : I<0, Pseudo, (ops FR64:$dst),
+ "#IMPLICIT_DEF $dst",
+ [(set FR64:$dst, (undef))]>, Requires<[HasSSE2]>;
+
+// CMOV* - Used to implement the SSE SELECT DAG operation. Expanded by the
+// scheduler into a branch sequence.
+let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler.
+ def CMOV_FR32 : I<0, Pseudo,
+ (ops FR32:$dst, FR32:$t, FR32:$f, i8imm:$cond),
+ "#CMOV_FR32 PSEUDO!",
+ [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond))]>;
+ def CMOV_FR64 : I<0, Pseudo,
+ (ops FR64:$dst, FR64:$t, FR64:$f, i8imm:$cond),
+ "#CMOV_FR64 PSEUDO!",
+ [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond))]>;
}
+// Move Instructions
def MOVSSrr : I<0x10, MRMSrcReg, (ops FR32:$dst, FR32:$src),
"movss {$src, $dst|$dst, $src}", []>,
Requires<[HasSSE1]>, XS;
@@ -124,6 +63,7 @@
[(store FR64:$src, addr:$dst)]>,
Requires<[HasSSE2]>, XD;
+// Conversion instructions
def CVTTSS2SIrr: I<0x2C, MRMSrcReg, (ops R32:$dst, FR32:$src),
"cvttss2si {$src, $dst|$dst, $src}",
[(set R32:$dst, (fp_to_sint FR32:$src))]>,
@@ -173,42 +113,8 @@
[(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>,
Requires<[HasSSE2]>, XD;
-def SQRTSSrr : I<0x51, MRMSrcReg, (ops FR32:$dst, FR32:$src),
- "sqrtss {$src, $dst|$dst, $src}",
- [(set FR32:$dst, (fsqrt FR32:$src))]>,
- Requires<[HasSSE1]>, XS;
-def SQRTSSrm : I<0x51, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
- "sqrtss {$src, $dst|$dst, $src}",
- [(set FR32:$dst, (fsqrt (loadf32 addr:$src)))]>,
- Requires<[HasSSE1]>, XS;
-def SQRTSDrr : I<0x51, MRMSrcReg, (ops FR64:$dst, FR64:$src),
- "sqrtsd {$src, $dst|$dst, $src}",
- [(set FR64:$dst, (fsqrt FR64:$src))]>,
- Requires<[HasSSE2]>, XD;
-def SQRTSDrm : I<0x51, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
- "sqrtsd {$src, $dst|$dst, $src}",
- [(set FR64:$dst, (fsqrt (loadf64 addr:$src)))]>,
- Requires<[HasSSE2]>, XD;
-
-def UCOMISSrr: I<0x2E, MRMSrcReg, (ops FR32:$src1, FR32:$src2),
- "ucomiss {$src2, $src1|$src1, $src2}",
- [(X86cmp FR32:$src1, FR32:$src2)]>,
- Requires<[HasSSE1]>, TB;
-def UCOMISSrm: I<0x2E, MRMSrcMem, (ops FR32:$src1, f32mem:$src2),
- "ucomiss {$src2, $src1|$src1, $src2}",
- [(X86cmp FR32:$src1, (loadf32 addr:$src2))]>,
- Requires<[HasSSE1]>, TB;
-def UCOMISDrr: I<0x2E, MRMSrcReg, (ops FR64:$src1, FR64:$src2),
- "ucomisd {$src2, $src1|$src1, $src2}",
- [(X86cmp FR64:$src1, FR64:$src2)]>,
- Requires<[HasSSE2]>, TB, OpSize;
-def UCOMISDrm: I<0x2E, MRMSrcMem, (ops FR64:$src1, f64mem:$src2),
- "ucomisd {$src2, $src1|$src1, $src2}",
- [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>,
- Requires<[HasSSE2]>, TB, OpSize;
-
+// Arithmetic instructions
let isTwoAddress = 1 in {
-// SSE Scalar Arithmetic
let isCommutable = 1 in {
def ADDSSrr : I<0x58, MRMSrcReg, (ops FR32:$dst, FR32:$src1, FR32:$src2),
"addss {$src2, $dst|$dst, $src2}",
@@ -278,8 +184,27 @@
"subsd {$src2, $dst|$dst, $src2}",
[(set FR64:$dst, (fsub FR64:$src1, (loadf64 addr:$src2)))]>,
Requires<[HasSSE2]>, XD;
+}
-// SSE compare
+def SQRTSSrr : I<0x51, MRMSrcReg, (ops FR32:$dst, FR32:$src),
+ "sqrtss {$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (fsqrt FR32:$src))]>,
+ Requires<[HasSSE1]>, XS;
+def SQRTSSrm : I<0x51, MRMSrcMem, (ops FR32:$dst, f32mem:$src),
+ "sqrtss {$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (fsqrt (loadf32 addr:$src)))]>,
+ Requires<[HasSSE1]>, XS;
+def SQRTSDrr : I<0x51, MRMSrcReg, (ops FR64:$dst, FR64:$src),
+ "sqrtsd {$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (fsqrt FR64:$src))]>,
+ Requires<[HasSSE2]>, XD;
+def SQRTSDrm : I<0x51, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
+ "sqrtsd {$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (fsqrt (loadf64 addr:$src)))]>,
+ Requires<[HasSSE2]>, XD;
+
+// Comparison instructions
+let isTwoAddress = 1 in {
def CMPSSrr : I<0xC2, MRMSrcReg,
(ops FR32:$dst, FR32:$src1, FR32:$src, SSECC:$cc),
"cmp${cc}ss {$src, $dst|$dst, $src}", []>,
@@ -298,10 +223,25 @@
Requires<[HasSSE2]>, XD;
}
+def UCOMISSrr: I<0x2E, MRMSrcReg, (ops FR32:$src1, FR32:$src2),
+ "ucomiss {$src2, $src1|$src1, $src2}",
+ [(X86cmp FR32:$src1, FR32:$src2)]>,
+ Requires<[HasSSE1]>, TB;
+def UCOMISSrm: I<0x2E, MRMSrcMem, (ops FR32:$src1, f32mem:$src2),
+ "ucomiss {$src2, $src1|$src1, $src2}",
+ [(X86cmp FR32:$src1, (loadf32 addr:$src2))]>,
+ Requires<[HasSSE1]>, TB;
+def UCOMISDrr: I<0x2E, MRMSrcReg, (ops FR64:$src1, FR64:$src2),
+ "ucomisd {$src2, $src1|$src1, $src2}",
+ [(X86cmp FR64:$src1, FR64:$src2)]>,
+ Requires<[HasSSE2]>, TB, OpSize;
+def UCOMISDrm: I<0x2E, MRMSrcMem, (ops FR64:$src1, f64mem:$src2),
+ "ucomisd {$src2, $src1|$src1, $src2}",
+ [(X86cmp FR64:$src1, (loadf64 addr:$src2))]>,
+ Requires<[HasSSE2]>, TB, OpSize;
-//===----------------------------------------------------------------------===//
-// Alias Instructions
-//===----------------------------------------------------------------------===//
+// Aliases of packed instructions for scalar use. These all have names that
+// start with 'Fs'.
// Alias instructions that map fld0 to pxor for sse.
// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
@@ -314,10 +254,10 @@
// Alias instructions to do FR32 / FR64 reg-to-reg copy using movaps / movapd.
// Upper bits are disregarded.
-def FsMOVAPSrr : I<0x28, MRMSrcReg, (ops V4F32:$dst, V4F32:$src),
+def FsMOVAPSrr : I<0x28, MRMSrcReg, (ops FR32:$dst, FR32:$src),
"movaps {$src, $dst|$dst, $src}", []>,
Requires<[HasSSE1]>, TB;
-def FsMOVAPDrr : I<0x28, MRMSrcReg, (ops V2F64:$dst, V2F64:$src),
+def FsMOVAPDrr : I<0x28, MRMSrcReg, (ops FR64:$dst, FR64:$src),
"movapd {$src, $dst|$dst, $src}", []>,
Requires<[HasSSE2]>, TB, OpSize;
@@ -398,3 +338,120 @@
"andnpd {$src2, $dst|$dst, $src2}", []>,
Requires<[HasSSE2]>, TB, OpSize;
}
+
+//===----------------------------------------------------------------------===//
+// SSE packed FP Instructions
+//===----------------------------------------------------------------------===//
+
+// Move Instructions
+def MOVAPSrr : I<0x28, MRMSrcReg, (ops V4F32:$dst, V4F32:$src),
+ "movaps {$src, $dst|$dst, $src}", []>,
+ Requires<[HasSSE1]>, TB;
+def MOVAPDrr : I<0x28, MRMSrcReg, (ops V2F64:$dst, V2F64:$src),
+ "movapd {$src, $dst|$dst, $src}", []>,
+ Requires<[HasSSE2]>, TB, OpSize;
+
+def MOVAPSrm : I<0x28, MRMSrcMem, (ops V4F32:$dst, f128mem:$src),
+ "movaps {$src, $dst|$dst, $src}", []>,
+ Requires<[HasSSE1]>, TB;
+def MOVAPSmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V4F32:$src),
+ "movaps {$src, $dst|$dst, $src}",[]>,
+ Requires<[HasSSE1]>, TB;
+def MOVAPDrm : I<0x28, MRMSrcMem, (ops V2F64:$dst, f128mem:$src),
+ "movapd {$src, $dst|$dst, $src}", []>,
+ Requires<[HasSSE1]>, TB, OpSize;
+def MOVAPDmr : I<0x29, MRMDestMem, (ops f128mem:$dst, V2F64:$src),
+ "movapd {$src, $dst|$dst, $src}",[]>,
+ Requires<[HasSSE2]>, TB, OpSize;
+
+// Logical
+let isTwoAddress = 1 in {
+let isCommutable = 1 in {
+def ANDPSrr : I<0x54, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
+ "andps {$src2, $dst|$dst, $src2}",
+ [(set V4F32:$dst, (X86fand V4F32:$src1, V4F32:$src2))]>,
+ Requires<[HasSSE1]>, TB;
+def ANDPDrr : I<0x54, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
+ "andpd {$src2, $dst|$dst, $src2}",
+ [(set V2F64:$dst, (X86fand V2F64:$src1, V2F64:$src2))]>,
+ Requires<[HasSSE2]>, TB, OpSize;
+def ORPSrr : I<0x56, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
+ "orps {$src2, $dst|$dst, $src2}", []>,
+ Requires<[HasSSE1]>, TB;
+def ORPDrr : I<0x56, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
+ "orpd {$src2, $dst|$dst, $src2}", []>,
+ Requires<[HasSSE2]>, TB, OpSize;
+def XORPSrr : I<0x57, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
+ "xorps {$src2, $dst|$dst, $src2}",
+ [(set V4F32:$dst, (X86fxor V4F32:$src1, V4F32:$src2))]>,
+ Requires<[HasSSE1]>, TB;
+def XORPDrr : I<0x57, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
+ "xorpd {$src2, $dst|$dst, $src2}",
+ [(set V2F64:$dst, (X86fxor V2F64:$src1, V2F64:$src2))]>,
+ Requires<[HasSSE2]>, TB, OpSize;
+}
+def ANDPSrm : I<0x54, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
+ "andps {$src2, $dst|$dst, $src2}",
+ [(set V4F32:$dst, (X86fand V4F32:$src1,
+ (X86loadpv4f32 addr:$src2)))]>,
+ Requires<[HasSSE1]>, TB;
+def ANDPDrm : I<0x54, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
+ "andpd {$src2, $dst|$dst, $src2}",
+ [(set V2F64:$dst, (X86fand V2F64:$src1,
+ (X86loadpv2f64 addr:$src2)))]>,
+ Requires<[HasSSE2]>, TB, OpSize;
+def ORPSrm : I<0x56, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
+ "orps {$src2, $dst|$dst, $src2}", []>,
+ Requires<[HasSSE1]>, TB;
+def ORPDrm : I<0x56, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
+ "orpd {$src2, $dst|$dst, $src2}", []>,
+ Requires<[HasSSE2]>, TB, OpSize;
+def XORPSrm : I<0x57, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
+ "xorps {$src2, $dst|$dst, $src2}",
+ [(set V4F32:$dst, (X86fxor V4F32:$src1,
+ (X86loadpv4f32 addr:$src2)))]>,
+ Requires<[HasSSE1]>, TB;
+def XORPDrm : I<0x57, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
+ "xorpd {$src2, $dst|$dst, $src2}",
+ [(set V2F64:$dst, (X86fxor V2F64:$src1,
+ (X86loadpv2f64 addr:$src2)))]>,
+ Requires<[HasSSE2]>, TB, OpSize;
+
+def ANDNPSrr : I<0x55, MRMSrcReg, (ops V4F32:$dst, V4F32:$src1, V4F32:$src2),
+ "andnps {$src2, $dst|$dst, $src2}", []>,
+ Requires<[HasSSE1]>, TB;
+def ANDNPSrm : I<0x55, MRMSrcMem, (ops V4F32:$dst, V4F32:$src1, f128mem:$src2),
+ "andnps {$src2, $dst|$dst, $src2}", []>,
+ Requires<[HasSSE1]>, TB;
+def ANDNPDrr : I<0x55, MRMSrcReg, (ops V2F64:$dst, V2F64:$src1, V2F64:$src2),
+ "andnpd {$src2, $dst|$dst, $src2}", []>,
+ Requires<[HasSSE2]>, TB, OpSize;
+def ANDNPDrm : I<0x55, MRMSrcMem, (ops V2F64:$dst, V2F64:$src1, f128mem:$src2),
+ "andnpd {$src2, $dst|$dst, $src2}", []>,
+ Requires<[HasSSE2]>, TB, OpSize;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE integer instructions
+//===----------------------------------------------------------------------===//
+
+// Move Instructions
+def MOVD128rr : I<0x6E, MRMSrcReg, (ops VR128:$dst, R32:$src),
+ "movd {$src, $dst|$dst, $src}", []>, TB, OpSize,
+ Requires<[HasSSE2]>;
+def MOVD128rm : I<0x6E, MRMSrcMem, (ops VR128:$dst, i32mem:$src),
+ "movd {$src, $dst|$dst, $src}", []>, TB, OpSize,
+ Requires<[HasSSE2]>;
+def MOVD128mr : I<0x7E, MRMDestMem, (ops i32mem:$dst, VR128:$src),
+ "movd {$src, $dst|$dst, $src}", []>, TB, OpSize,
+ Requires<[HasSSE2]>;
+
+def MOVQ128rr : I<0x7E, MRMSrcReg, (ops VR128:$dst, VR64:$src),
+ "movq {$src, $dst|$dst, $src}", []>, XS,
+ Requires<[HasSSE2]>;
+def MOVQ128rm : I<0x7E, MRMSrcMem, (ops VR128:$dst, i64mem:$src),
+ "movq {$src, $dst|$dst, $src}", []>, XS,
+ Requires<[HasSSE2]>;
+def MOVQ128mr : I<0xD6, MRMSrcMem, (ops i64mem:$dst, VR128:$src),
+ "movq {$src, $dst|$dst, $src}", []>, TB, OpSize,
+ Requires<[HasSSE2]>;
More information about the llvm-commits
mailing list