[llvm-commits] CVS: llvm/lib/Target/X86/X86InstrSSE.td
Chris Lattner
sabre at nondot.org
Sat Oct 7 14:17:28 PDT 2006
Changes in directory llvm/lib/Target/X86:
X86InstrSSE.td updated: 1.160 -> 1.161
---
Log message:
convert packed FP add/sub/mul/div to use a multiclass.
---
Diffs of the changes: (+34 -65)
X86InstrSSE.td | 99 +++++++++++++++++++--------------------------------------
1 files changed, 34 insertions(+), 65 deletions(-)
Index: llvm/lib/Target/X86/X86InstrSSE.td
diff -u llvm/lib/Target/X86/X86InstrSSE.td:1.160 llvm/lib/Target/X86/X86InstrSSE.td:1.161
--- llvm/lib/Target/X86/X86InstrSSE.td:1.160 Sat Oct 7 15:55:57 2006
+++ llvm/lib/Target/X86/X86InstrSSE.td Sat Oct 7 16:17:13 2006
@@ -288,8 +288,6 @@
"movsd {$src, $dst|$dst, $src}",
[(store FR64:$src, addr:$dst)]>;
-let isTwoAddress = 1 in {
-
/// scalar_sse12_fp_binop_rm - Scalar SSE binops come in four basic forms:
/// 1. f32 vs f64 - These come in SSE1/SSE2 forms for float/doubles.
/// 2. rr vs rm - They include a reg+reg form and a ref+mem form.
@@ -299,6 +297,7 @@
/// leave the top elements undefined. This adds another two variants of the
/// above permutations, giving us 8 forms for 'instruction'.
///
+let isTwoAddress = 1 in {
multiclass scalar_sse12_fp_binop_rm<bits<8> opc, string OpcodeStr,
SDNode OpNode, Intrinsic F32Int,
Intrinsic F64Int, bit Commutable = 0> {
@@ -573,7 +572,6 @@
// start with 'Fs'.
// Alias instructions that map fld0 to pxor for sse.
-// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
def FsFLD0SS : I<0xEF, MRMInitReg, (ops FR32:$dst),
"pxor $dst, $dst", [(set FR32:$dst, fp32imm0)]>,
Requires<[HasSSE1]>, TB, OpSize;
@@ -912,70 +910,41 @@
Requires<[HasSSE2]>;
}
-// Arithmetic
+/// packed_sse12_fp_binop_rm - Packed SSE binops come in four basic forms:
+/// 1. v4f32 vs v2f64 - These come in SSE1/SSE2 forms for float/doubles.
+/// 2. rr vs rm - They include a reg+reg form and a ref+mem form.
+///
let isTwoAddress = 1 in {
-let isCommutable = 1 in {
-def ADDPSrr : PSI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
- "addps {$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v4f32 (fadd VR128:$src1, VR128:$src2)))]>;
-def ADDPDrr : PDI<0x58, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
- "addpd {$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v2f64 (fadd VR128:$src1, VR128:$src2)))]>;
-def MULPSrr : PSI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
- "mulps {$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v4f32 (fmul VR128:$src1, VR128:$src2)))]>;
-def MULPDrr : PDI<0x59, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
- "mulpd {$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v2f64 (fmul VR128:$src1, VR128:$src2)))]>;
-}
-
-def ADDPSrm : PSI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
- "addps {$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v4f32 (fadd VR128:$src1,
- (load addr:$src2))))]>;
-def ADDPDrm : PDI<0x58, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
- "addpd {$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v2f64 (fadd VR128:$src1,
- (load addr:$src2))))]>;
-def MULPSrm : PSI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
- "mulps {$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v4f32 (fmul VR128:$src1,
- (load addr:$src2))))]>;
-def MULPDrm : PDI<0x59, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
- "mulpd {$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v2f64 (fmul VR128:$src1,
- (load addr:$src2))))]>;
-
-def DIVPSrr : PSI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
- "divps {$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v4f32 (fdiv VR128:$src1, VR128:$src2)))]>;
-def DIVPSrm : PSI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
- "divps {$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v4f32 (fdiv VR128:$src1,
- (load addr:$src2))))]>;
-def DIVPDrr : PDI<0x5E, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
- "divpd {$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v2f64 (fdiv VR128:$src1, VR128:$src2)))]>;
-def DIVPDrm : PDI<0x5E, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
- "divpd {$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v2f64 (fdiv VR128:$src1,
- (load addr:$src2))))]>;
-
-def SUBPSrr : PSI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
- "subps {$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v4f32 (fsub VR128:$src1, VR128:$src2)))]>;
-def SUBPSrm : PSI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
- "subps {$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v4f32 (fsub VR128:$src1,
- (load addr:$src2))))]>;
-def SUBPDrr : PDI<0x5C, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
- "subpd {$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v2f64 (fsub VR128:$src1, VR128:$src2)))]>;
-def SUBPDrm : PDI<0x5C, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
- "subpd {$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v2f64 (fsub VR128:$src1,
- (load addr:$src2))))]>;
+multiclass packed_sse12_fp_binop_rm<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, bit Commutable = 0> {
+ // Packed operation, reg+reg.
+ def PSrr : PSI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "ps {$src2, $dst|$dst, $src2"),
+ [(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
+ let isCommutable = Commutable;
+ }
+ def PDrr : PDI<opc, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "pd {$src2, $dst|$dst, $src2"),
+ [(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> {
+ let isCommutable = Commutable;
+ }
+ // Packed operation, reg+mem.
+ def PSrm : PSI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "ps {$src2, $dst|$dst, $src2"),
+ [(set VR128:$dst, (OpNode VR128:$src1, (loadv4f32 addr:$src2)))]>;
+ def PDrm : PDI<opc, MRMSrcMem, (ops VR128:$dst, VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "pd {$src2, $dst|$dst, $src2"),
+ [(set VR128:$dst, (OpNode VR128:$src1, (loadv2f64 addr:$src2)))]>;
+}
+}
+
+defm ADD : packed_sse12_fp_binop_rm<0x58, "add", fadd, 1>;
+defm MUL : packed_sse12_fp_binop_rm<0x59, "mul", fmul, 1>;
+defm DIV : packed_sse12_fp_binop_rm<0x5E, "div", fdiv>;
+defm SUB : packed_sse12_fp_binop_rm<0x5C, "sub", fsub>;
+// Arithmetic
+let isTwoAddress = 1 in {
def ADDSUBPSrr : S3DI<0xD0, MRMSrcReg,
(ops VR128:$dst, VR128:$src1, VR128:$src2),
"addsubps {$src2, $dst|$dst, $src2}",
More information about the llvm-commits
mailing list