[llvm-commits] CVS: llvm/lib/Target/X86/README-MMX.txt README-SSE.txt X86ISelLowering.cpp X86InstrMMX.td
Bill Wendling
isanbard at gmail.com
Thu Mar 22 11:43:15 PDT 2007
Changes in directory llvm/lib/Target/X86:
README-MMX.txt added (r1.1)
README-SSE.txt updated: 1.15 -> 1.16
X86ISelLowering.cpp updated: 1.374 -> 1.375
X86InstrMMX.td updated: 1.21 -> 1.22
---
Log message:
Support added for shifts and unpacking MMX instructions.
---
Diffs of the changes: (+211 -2)
README-MMX.txt | 59 ++++++++++++++++++++++++++++
README-SSE.txt | 40 +++++++++++++++++++
X86ISelLowering.cpp | 6 ++
X86InstrMMX.td | 108 +++++++++++++++++++++++++++++++++++++++++++++++++++-
4 files changed, 211 insertions(+), 2 deletions(-)
Index: llvm/lib/Target/X86/README-MMX.txt
diff -c /dev/null llvm/lib/Target/X86/README-MMX.txt:1.1
*** /dev/null Thu Mar 22 13:42:55 2007
--- llvm/lib/Target/X86/README-MMX.txt Thu Mar 22 13:42:45 2007
***************
*** 0 ****
--- 1,59 ----
+ //===---------------------------------------------------------------------===//
+ // Random ideas for the X86 backend: MMX-specific stuff.
+ //===---------------------------------------------------------------------===//
+
+ //===---------------------------------------------------------------------===//
+
+ We should compile
+
+ #include <mmintrin.h>
+
+ extern __m64 C;
+
+ void baz(__v2si *A, __v2si *B)
+ {
+ *A = __builtin_ia32_psllq(*B, C);
+ _mm_empty();
+ }
+
+ to:
+
+ .globl _baz
+ _baz:
+ call L3
+ "L00000000001$pb":
+ L3:
+ popl %ecx
+ subl $12, %esp
+ movl 20(%esp), %eax
+ movq (%eax), %mm0
+ movl L_C$non_lazy_ptr-"L00000000001$pb"(%ecx), %eax
+ movq (%eax), %mm1
+ movl 16(%esp), %eax
+ psllq %mm1, %mm0
+ movq %mm0, (%eax)
+ emms
+ addl $12, %esp
+ ret
+
+ not:
+
+ _baz:
+ subl $12, %esp
+ call "L1$pb"
+ "L1$pb":
+ popl %eax
+ movl L_C$non_lazy_ptr-"L1$pb"(%eax), %eax
+ movl (%eax), %ecx
+ movl %ecx, (%esp)
+ movl 4(%eax), %eax
+ movl %eax, 4(%esp)
+ movl 20(%esp), %eax
+ movq (%eax), %mm0
+ movq (%esp), %mm1
+ psllq %mm1, %mm0
+ movl 16(%esp), %eax
+ movq %mm0, (%eax)
+ emms
+ addl $12, %esp
+ ret
Index: llvm/lib/Target/X86/README-SSE.txt
diff -u llvm/lib/Target/X86/README-SSE.txt:1.15 llvm/lib/Target/X86/README-SSE.txt:1.16
--- llvm/lib/Target/X86/README-SSE.txt:1.15 Tue Feb 27 11:21:09 2007
+++ llvm/lib/Target/X86/README-SSE.txt Thu Mar 22 13:42:45 2007
@@ -571,4 +571,44 @@
movaps %xmm0, (%eax)
ret
+//===---------------------------------------------------------------------===//
+We should compile this:
+
+#include <xmmintrin.h>
+
+void foo(__m128i *A, __m128i *B) {
+ *A = _mm_sll_epi16 (*A, *B);
+}
+
+to:
+
+_foo:
+ subl $12, %esp
+ movl 16(%esp), %edx
+ movl 20(%esp), %eax
+ movdqa (%edx), %xmm1
+ movdqa (%eax), %xmm0
+ psllw %xmm0, %xmm1
+ movdqa %xmm1, (%edx)
+ addl $12, %esp
+ ret
+
+not:
+
+_foo:
+ movl 8(%esp), %eax
+ movdqa (%eax), %xmm0
+ #IMPLICIT_DEF %eax
+ pinsrw $2, %eax, %xmm0
+ xorl %ecx, %ecx
+ pinsrw $3, %ecx, %xmm0
+ pinsrw $4, %eax, %xmm0
+ pinsrw $5, %ecx, %xmm0
+ pinsrw $6, %eax, %xmm0
+ pinsrw $7, %ecx, %xmm0
+ movl 4(%esp), %eax
+ movdqa (%eax), %xmm1
+ psllw %xmm0, %xmm1
+ movdqa %xmm1, (%eax)
+ ret
Index: llvm/lib/Target/X86/X86ISelLowering.cpp
diff -u llvm/lib/Target/X86/X86ISelLowering.cpp:1.374 llvm/lib/Target/X86/X86ISelLowering.cpp:1.375
--- llvm/lib/Target/X86/X86ISelLowering.cpp:1.374 Wed Mar 21 16:51:52 2007
+++ llvm/lib/Target/X86/X86ISelLowering.cpp Thu Mar 22 13:42:45 2007
@@ -355,6 +355,10 @@
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Expand);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Expand);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Expand);
+
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom);
}
if (Subtarget->hasSSE1()) {
@@ -2312,7 +2316,7 @@
return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V);
}
-/// LowerBuildVectorv16i8 - Custom lower build_vector of v8i16.
+/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
///
static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros,
unsigned NumNonZero, unsigned NumZero,
Index: llvm/lib/Target/X86/X86InstrMMX.td
diff -u llvm/lib/Target/X86/X86InstrMMX.td:1.21 llvm/lib/Target/X86/X86InstrMMX.td:1.22
--- llvm/lib/Target/X86/X86InstrMMX.td:1.21 Fri Mar 16 04:44:46 2007
+++ llvm/lib/Target/X86/X86InstrMMX.td Thu Mar 22 13:42:45 2007
@@ -44,6 +44,10 @@
def loadv2i32 : PatFrag<(ops node:$ptr), (v2i32 (load node:$ptr))>;
+def bc_v8i8 : PatFrag<(ops node:$in), (v8i8 (bitconvert node:$in))>;
+def bc_v4i16 : PatFrag<(ops node:$in), (v4i16 (bitconvert node:$in))>;
+def bc_v2i32 : PatFrag<(ops node:$in), (v2i32 (bitconvert node:$in))>;
+
//===----------------------------------------------------------------------===//
// MMX Multiclasses
//===----------------------------------------------------------------------===//
@@ -94,13 +98,28 @@
[(set VR64:$dst,
(OpNode VR64:$src1,(loadv2i32 addr:$src2)))]>;
}
+
+ multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
+ string OpcodeStr, Intrinsic IntId> {
+ def rr : MMXI<opc, MRMSrcReg, (ops VR64:$dst, VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]>;
+ def rm : MMXI<opc, MRMSrcMem, (ops VR64:$dst, VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId VR64:$src1,
+ (bitconvert (loadv2i32 addr:$src2))))]>;
+ def ri : MMXIi8<opc2, ImmForm, (ops VR64:$dst, VR64:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId VR64:$src1,
+ (scalar_to_vector (i32 imm:$src2))))]>;
+ }
}
//===----------------------------------------------------------------------===//
// MMX EMMS Instruction
//===----------------------------------------------------------------------===//
-def EMMS : MMXI<0x77, RawFrm, (ops), "emms", [(int_x86_mmx_emms)]>;
+def MMX_EMMS : MMXI<0x77, RawFrm, (ops), "emms", [(int_x86_mmx_emms)]>;
//===----------------------------------------------------------------------===//
// MMX Scalar Instructions
@@ -132,6 +151,53 @@
defm MMX_PMULHW : MMXI_binop_rm_int<0xE5, "pmulhw" , int_x86_mmx_pmulh_w , 1>;
defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd, 1>;
+
+def MMX_UNPCKH_shuffle_mask : PatLeaf<(build_vector), [{
+ return X86::isUNPCKHMask(N);
+}]>;
+
+let isTwoAddress = 1 in {
+def MMX_PUNPCKHBWrr : MMXI<0x68, MRMSrcReg,
+ (ops VR64:$dst, VR64:$src1, VR64:$src2),
+ "punpckhbw {$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v8i8 (vector_shuffle VR64:$src1, VR64:$src2,
+ MMX_UNPCKH_shuffle_mask)))]>;
+def MMX_PUNPCKHBWrm : MMXI<0x68, MRMSrcMem,
+ (ops VR64:$dst, VR64:$src1, i64mem:$src2),
+ "punpckhbw {$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v8i8 (vector_shuffle VR64:$src1,
+ (bc_v8i8 (loadv2i32 addr:$src2)),
+ MMX_UNPCKH_shuffle_mask)))]>;
+def MMX_PUNPCKHWDrr : MMXI<0x69, MRMSrcReg,
+ (ops VR64:$dst, VR64:$src1, VR64:$src2),
+ "punpckhwd {$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v4i16 (vector_shuffle VR64:$src1, VR64:$src2,
+ MMX_UNPCKH_shuffle_mask)))]>;
+def MMX_PUNPCKHWDrm : MMXI<0x69, MRMSrcMem,
+ (ops VR64:$dst, VR64:$src1, i64mem:$src2),
+ "punpckhwd {$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v4i16 (vector_shuffle VR64:$src1,
+ (bc_v4i16 (loadv2i32 addr:$src2)),
+ MMX_UNPCKH_shuffle_mask)))]>;
+def MMX_PUNPCKHDQrr : MMXI<0x6A, MRMSrcReg,
+ (ops VR64:$dst, VR64:$src1, VR64:$src2),
+ "punpckhdq {$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v2i32 (vector_shuffle VR64:$src1, VR64:$src2,
+ MMX_UNPCKH_shuffle_mask)))]>;
+def MMX_PUNPCKHDQrm : MMXI<0x6A, MRMSrcMem,
+ (ops VR64:$dst, VR64:$src1, i64mem:$src2),
+ "punpckhdq {$src2, $dst|$dst, $src2}",
+ [(set VR64:$dst,
+ (v2i32 (vector_shuffle VR64:$src1,
+ (loadv2i32 addr:$src2),
+ MMX_UNPCKH_shuffle_mask)))]>;
+}
+
// Logical Instructions
defm MMX_PAND : MMXI_binop_rm_v2i32<0xDB, "pand", and, 1>;
defm MMX_POR : MMXI_binop_rm_v2i32<0xEB, "por" , or, 1>;
@@ -150,6 +216,26 @@
(load addr:$src2))))]>;
}
+// Shift Instructions
+defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
+ int_x86_mmx_psrl_w>;
+defm MMX_PSRLD : MMXI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
+ int_x86_mmx_psrl_d>;
+defm MMX_PSRLQ : MMXI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
+ int_x86_mmx_psrl_q>;
+
+defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
+ int_x86_mmx_psll_w>;
+defm MMX_PSLLD : MMXI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
+ int_x86_mmx_psll_d>;
+defm MMX_PSLLQ : MMXI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
+ int_x86_mmx_psll_q>;
+
+defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
+ int_x86_mmx_psra_w>;
+defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
+ int_x86_mmx_psra_d>;
+
// Move Instructions
def MOVD64rr : MMXI<0x6E, MRMSrcReg, (ops VR64:$dst, GR32:$src),
"movd {$src, $dst|$dst, $src}", []>;
@@ -225,3 +311,23 @@
def : Pat<(v4i16 (bitconvert (v8i8 VR64:$src))), (v4i16 VR64:$src)>;
def : Pat<(v2i32 (bitconvert (v4i16 VR64:$src))), (v2i32 VR64:$src)>;
def : Pat<(v2i32 (bitconvert (v8i8 VR64:$src))), (v2i32 VR64:$src)>;
+
+// Splat v2i32
+let AddedComplexity = 10 in {
+ def : Pat<(vector_shuffle (v2i32 VR64:$src), (undef),
+ MMX_UNPCKH_shuffle_mask:$sm),
+ (MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>;
+}
+
+// FIXME: Temporary workaround because 2-wide shuffle is broken.
+def : Pat<(int_x86_mmx_punpckh_dq VR64:$src1, VR64:$src2),
+ (v2i32 (MMX_PUNPCKHDQrr VR64:$src1, VR64:$src2))>;
+def : Pat<(int_x86_mmx_punpckh_dq VR64:$src1, (load addr:$src2)),
+ (v2i32 (MMX_PUNPCKHDQrm VR64:$src1, addr:$src2))>;
+
+def MMX_X86s2vec : SDNode<"X86ISD::S2VEC", SDTypeProfile<1, 1, []>, []>;
+
+// Scalar to v4i16 / v8i8. The source may be a GR32, but only the lower 8 or
+// 16-bits matter.
+def : Pat<(v4i16 (MMX_X86s2vec GR32:$src)), (MOVD64rr GR32:$src)>;
+def : Pat<(v8i8 (MMX_X86s2vec GR32:$src)), (MOVD64rr GR32:$src)>;
More information about the llvm-commits
mailing list