[llvm-commits] [llvm] r107749 - in /llvm/trunk/lib/Target/X86: X86Instr64bit.td X86InstrSSE.td
Bruno Cardoso Lopes
bruno.cardoso at gmail.com
Tue Jul 6 18:33:38 PDT 2010
Author: bruno
Date: Tue Jul 6 20:33:38 2010
New Revision: 107749
URL: http://llvm.org/viewvc/llvm-project?rev=107749&view=rev
Log:
Now that almost all SSE4.1 AVX instructions are added, move code around to more appropriate sections. No functionality changes
Modified:
llvm/trunk/lib/Target/X86/X86Instr64bit.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
Modified: llvm/trunk/lib/Target/X86/X86Instr64bit.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Instr64bit.td?rev=107749&r1=107748&r2=107749&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Instr64bit.td (original)
+++ llvm/trunk/lib/Target/X86/X86Instr64bit.td Tue Jul 6 20:33:38 2010
@@ -2353,24 +2353,6 @@
// X86-64 SSE4.1 Instructions
//===----------------------------------------------------------------------===//
-/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
-multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> {
- def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst),
- (ins VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set GR64:$dst,
- (extractelt (v2i64 VR128:$src1), imm:$src2))]>, OpSize, REX_W;
- def mr : SS4AIi8<opc, MRMDestMem, (outs),
- (ins i64mem:$dst, VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(store (extractelt (v2i64 VR128:$src1), imm:$src2),
- addr:$dst)]>, OpSize, REX_W;
-}
-
-defm PEXTRQ : SS41I_extract64<0x16, "pextrq">;
-
let Constraints = "$src1 = $dst" in {
multiclass SS41I_insert64<bits<8> opc, string OpcodeStr> {
def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=107749&r1=107748&r2=107749&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Jul 6 20:33:38 2010
@@ -3900,737 +3900,769 @@
(MOVUPSmr addr:$dst, VR128:$src)>;
//===----------------------------------------------------------------------===//
-// SSE4.1 - Misc Instructions
+// SSE4.1 - Packed Move with Sign/Zero Extend
//===----------------------------------------------------------------------===//
-multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd,
- string OpcodeStr,
- Intrinsic V4F32Int,
- Intrinsic V2F64Int> {
- // Intrinsic operation, reg.
- // Vector intrinsic operation, reg
- def PSr_Int : SS4AIi8<opcps, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (V4F32Int VR128:$src1, imm:$src2))]>,
- OpSize;
-
- // Vector intrinsic operation, mem
- def PSm_Int : Ii8<opcps, MRMSrcMem,
- (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst,
- (V4F32Int (memopv4f32 addr:$src1),imm:$src2))]>,
- TA, OpSize,
- Requires<[HasSSE41]>;
+multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
- // Vector intrinsic operation, reg
- def PDr_Int : SS4AIi8<opcpd, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (V2F64Int VR128:$src1, imm:$src2))]>,
- OpSize;
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>,
+ OpSize;
+}
- // Vector intrinsic operation, mem
- def PDm_Int : SS4AIi8<opcpd, MRMSrcMem,
- (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst,
- (V2F64Int (memopv2f64 addr:$src1),imm:$src2))]>,
- OpSize;
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in {
+defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>,
+ VEX;
+defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd", int_x86_sse41_pmovsxwd>,
+ VEX;
+defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq", int_x86_sse41_pmovsxdq>,
+ VEX;
+defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw", int_x86_sse41_pmovzxbw>,
+ VEX;
+defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd", int_x86_sse41_pmovzxwd>,
+ VEX;
+defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq", int_x86_sse41_pmovzxdq>,
+ VEX;
}
-multiclass sse41_fp_unop_rm_avx<bits<8> opcps, bits<8> opcpd,
- string OpcodeStr> {
- // Intrinsic operation, reg.
- // Vector intrinsic operation, reg
- def PSr : SS4AIi8<opcps, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, OpSize;
+defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>;
+defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>;
+defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>;
+defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw>;
+defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd>;
+defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq>;
- // Vector intrinsic operation, mem
- def PSm : Ii8<opcps, MRMSrcMem,
- (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, TA, OpSize, Requires<[HasSSE41]>;
+// Common patterns involving scalar load.
+def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),
+ (PMOVSXBWrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
+ (PMOVSXBWrm addr:$src)>, Requires<[HasSSE41]>;
- // Vector intrinsic operation, reg
- def PDr : SS4AIi8<opcpd, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, OpSize;
+def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
+ (PMOVSXWDrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
+ (PMOVSXWDrm addr:$src)>, Requires<[HasSSE41]>;
- // Vector intrinsic operation, mem
- def PDm : SS4AIi8<opcpd, MRMSrcMem,
- (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, OpSize;
-}
+def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
+ (PMOVSXDQrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
+ (PMOVSXDQrm addr:$src)>, Requires<[HasSSE41]>;
-multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
- string OpcodeStr,
- Intrinsic F32Int,
- Intrinsic F64Int, bit Is2Addr = 1> {
- // Intrinsic operation, reg.
- def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
- !if(Is2Addr,
- !strconcat(OpcodeStr,
- "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(OpcodeStr,
- "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>,
- OpSize;
+def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
+ (PMOVZXBWrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
+ (PMOVZXBWrm addr:$src)>, Requires<[HasSSE41]>;
- // Intrinsic operation, mem.
- def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
- !if(Is2Addr,
- !strconcat(OpcodeStr,
- "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(OpcodeStr,
- "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set VR128:$dst,
- (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
- OpSize;
+def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
+ (PMOVZXWDrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
+ (PMOVZXWDrm addr:$src)>, Requires<[HasSSE41]>;
- // Intrinsic operation, reg.
- def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
- !if(Is2Addr,
- !strconcat(OpcodeStr,
- "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(OpcodeStr,
- "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>,
- OpSize;
+def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
+ (PMOVZXDQrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
+ (PMOVZXDQrm addr:$src)>, Requires<[HasSSE41]>;
- // Intrinsic operation, mem.
- def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
- !if(Is2Addr,
- !strconcat(OpcodeStr,
- "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(OpcodeStr,
- "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set VR128:$dst,
- (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
- OpSize;
+
+multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
+
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (IntId (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>,
+ OpSize;
}
-multiclass sse41_fp_binop_rm_avx<bits<8> opcss, bits<8> opcsd,
- string OpcodeStr> {
- // Intrinsic operation, reg.
- def SSr : SS4AIi8<opcss, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, OpSize;
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in {
+defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd", int_x86_sse41_pmovsxbd>,
+ VEX;
+defm VPMOVSXWQ : SS41I_binop_rm_int4<0x24, "vpmovsxwq", int_x86_sse41_pmovsxwq>,
+ VEX;
+defm VPMOVZXBD : SS41I_binop_rm_int4<0x31, "vpmovzxbd", int_x86_sse41_pmovzxbd>,
+ VEX;
+defm VPMOVZXWQ : SS41I_binop_rm_int4<0x34, "vpmovzxwq", int_x86_sse41_pmovzxwq>,
+ VEX;
+}
- // Intrinsic operation, mem.
- def SSm : SS4AIi8<opcss, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, OpSize;
+defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>;
+defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>;
+defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>;
+defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq>;
- // Intrinsic operation, reg.
- def SDr : SS4AIi8<opcsd, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, OpSize;
+// Common patterns involving scalar load
+def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),
+ (PMOVSXBDrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)),
+ (PMOVSXWQrm addr:$src)>, Requires<[HasSSE41]>;
- // Intrinsic operation, mem.
- def SDm : SS4AIi8<opcsd, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, OpSize;
+def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)),
+ (PMOVZXBDrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)),
+ (PMOVZXWQrm addr:$src)>, Requires<[HasSSE41]>;
+
+
+multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
+
+ // Expecting a i16 load any extended to i32 value.
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i16mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId (bitconvert
+ (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))]>,
+ OpSize;
}
-// FP round - roundss, roundps, roundsd, roundpd
let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in {
- // Intrinsic form
- defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround",
- int_x86_sse41_round_ps, int_x86_sse41_round_pd>,
- VEX;
- defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround",
- int_x86_sse41_round_ss, int_x86_sse41_round_sd,
- 0>, VEX_4V;
- // Instructions for the assembler
- defm VROUND : sse41_fp_unop_rm_avx<0x08, 0x09, "vround">, VEX;
- defm VROUND : sse41_fp_binop_rm_avx<0x0A, 0x0B, "vround">, VEX_4V;
+defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq>,
+ VEX;
+defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>,
+ VEX;
}
+defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
+defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
-defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round",
- int_x86_sse41_round_ps, int_x86_sse41_round_pd>;
-let Constraints = "$src1 = $dst" in
-defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
- int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
+// Common patterns involving scalar load
+def : Pat<(int_x86_sse41_pmovsxbq
+ (bitconvert (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (PMOVSXBQrm addr:$src)>, Requires<[HasSSE41]>;
-// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
-multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
- Intrinsic IntId128> {
- def rr128 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (IntId128 VR128:$src))]>, OpSize;
- def rm128 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins i128mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst,
- (IntId128
- (bitconvert (memopv8i16 addr:$src))))]>, OpSize;
-}
+def : Pat<(int_x86_sse41_pmovzxbq
+ (bitconvert (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+ (PMOVZXBQrm addr:$src)>, Requires<[HasSSE41]>;
-let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in
-defm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw",
- int_x86_sse41_phminposuw>, VEX;
-defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw",
- int_x86_sse41_phminposuw>;
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Extract Instructions
+//===----------------------------------------------------------------------===//
-/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
-multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
- Intrinsic IntId128, bit Is2Addr = 1> {
- let isCommutable = 1 in
- def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, OpSize;
- def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst,
- (IntId128 VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem
+multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set GR32:$dst, (X86pextrb (v16i8 VR128:$src1), imm:$src2))]>,
+ OpSize;
+ def mr : SS4AIi8<opc, MRMDestMem, (outs),
+ (ins i8mem:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, OpSize;
+// FIXME:
+// There's an AssertZext in the way of writing the store pattern
+// (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
}
-let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in {
- let isCommutable = 0 in
- defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
- 0>, VEX_4V;
- defm VPCMPEQQ : SS41I_binop_rm_int<0x29, "vpcmpeqq", int_x86_sse41_pcmpeqq,
- 0>, VEX_4V;
- defm VPMINSB : SS41I_binop_rm_int<0x38, "vpminsb", int_x86_sse41_pminsb,
- 0>, VEX_4V;
- defm VPMINSD : SS41I_binop_rm_int<0x39, "vpminsd", int_x86_sse41_pminsd,
- 0>, VEX_4V;
- defm VPMINUD : SS41I_binop_rm_int<0x3B, "vpminud", int_x86_sse41_pminud,
- 0>, VEX_4V;
- defm VPMINUW : SS41I_binop_rm_int<0x3A, "vpminuw", int_x86_sse41_pminuw,
- 0>, VEX_4V;
- defm VPMAXSB : SS41I_binop_rm_int<0x3C, "vpmaxsb", int_x86_sse41_pmaxsb,
- 0>, VEX_4V;
- defm VPMAXSD : SS41I_binop_rm_int<0x3D, "vpmaxsd", int_x86_sse41_pmaxsd,
- 0>, VEX_4V;
- defm VPMAXUD : SS41I_binop_rm_int<0x3F, "vpmaxud", int_x86_sse41_pmaxud,
- 0>, VEX_4V;
- defm VPMAXUW : SS41I_binop_rm_int<0x3E, "vpmaxuw", int_x86_sse41_pmaxuw,
- 0>, VEX_4V;
- defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq,
- 0>, VEX_4V;
-}
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in
+ defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX;
-let Constraints = "$src1 = $dst" in {
- let isCommutable = 0 in
- defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>;
- defm PCMPEQQ : SS41I_binop_rm_int<0x29, "pcmpeqq", int_x86_sse41_pcmpeqq>;
- defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", int_x86_sse41_pminsb>;
- defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", int_x86_sse41_pminsd>;
- defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", int_x86_sse41_pminud>;
- defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw", int_x86_sse41_pminuw>;
- defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb", int_x86_sse41_pmaxsb>;
- defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd", int_x86_sse41_pmaxsd>;
- defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud", int_x86_sse41_pmaxud>;
- defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw", int_x86_sse41_pmaxuw>;
- defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>;
-}
+defm PEXTRB : SS41I_extract8<0x14, "pextrb">;
-def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
- (PCMPEQQrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
- (PCMPEQQrm VR128:$src1, addr:$src2)>;
-/// SS48I_binop_rm - Simple SSE41 binary operator.
-multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType OpVT, bit Is2Addr = 1> {
- let isCommutable = 1 in
- def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>,
- OpSize;
- def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (OpNode VR128:$src1,
- (bc_v4i32 (memopv2i64 addr:$src2))))]>,
- OpSize;
+/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination
+multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
+ def mr : SS4AIi8<opc, MRMDestMem, (outs),
+ (ins i16mem:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, OpSize;
+// FIXME:
+// There's an AssertZext in the way of writing the store pattern
+// (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
}
let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in
- defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, 0>, VEX_4V;
-let Constraints = "$src1 = $dst" in
- defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32>;
+ defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX;
-/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
-multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
- Intrinsic IntId128, bit Is2Addr = 1> {
- let isCommutable = 1 in
- def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
- !if(Is2Addr,
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set VR128:$dst,
- (IntId128 VR128:$src1, VR128:$src2, imm:$src3))]>,
- OpSize;
- def rmi : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2, i32i8imm:$src3),
- !if(Is2Addr,
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set VR128:$dst,
- (IntId128 VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2)), imm:$src3))]>,
- OpSize;
-}
+defm PEXTRW : SS41I_extract16<0x15, "pextrw">;
-let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in {
- let isCommutable = 0 in {
- defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
- 0>, VEX_4V;
- defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
- 0>, VEX_4V;
- defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
- 0>, VEX_4V;
- defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
- 0>, VEX_4V;
- }
- defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
- 0>, VEX_4V;
- defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
- 0>, VEX_4V;
-}
-let Constraints = "$src1 = $dst" in {
- let isCommutable = 0 in {
- defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps>;
- defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd>;
- defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw>;
- defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw>;
- }
- defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps>;
- defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd>;
+/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
+multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set GR32:$dst,
+ (extractelt (v4i32 VR128:$src1), imm:$src2))]>, OpSize;
+ def mr : SS4AIi8<opc, MRMDestMem, (outs),
+ (ins i32mem:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(store (extractelt (v4i32 VR128:$src1), imm:$src2),
+ addr:$dst)]>, OpSize;
}
-/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
-let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in {
- multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr> {
- def rr : I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in
+ defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX;
- def rm : I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2, VR128:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
- }
+defm PEXTRD : SS41I_extract32<0x16, "pextrd">;
+
+/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
+multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set GR64:$dst,
+ (extractelt (v2i64 VR128:$src1), imm:$src2))]>, OpSize, REX_W;
+ def mr : SS4AIi8<opc, MRMDestMem, (outs),
+ (ins i64mem:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(store (extractelt (v2i64 VR128:$src1), imm:$src2),
+ addr:$dst)]>, OpSize, REX_W;
}
-defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd">;
-defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps">;
-defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb">;
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in
+ defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W;
-/// SS41I_ternary_int - SSE 4.1 ternary operator
-let Uses = [XMM0], Constraints = "$src1 = $dst" in {
- multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
- def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr,
- "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"),
- [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>,
- OpSize;
+defm PEXTRQ : SS41I_extract64<0x16, "pextrq">;
- def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr,
- "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"),
- [(set VR128:$dst,
- (IntId VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2)), XMM0))]>, OpSize;
- }
+/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory
+/// destination
+multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set GR32:$dst,
+ (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>,
+ OpSize;
+ def mr : SS4AIi8<opc, MRMDestMem, (outs),
+ (ins f32mem:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2),
+ addr:$dst)]>, OpSize;
}
-defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
-defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
-defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in
+ defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX;
+defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">;
+// Also match an EXTRACTPS store when the store is done as f32 instead of i32.
+def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
+ imm:$src2))),
+ addr:$dst),
+ (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
+ Requires<[HasSSE41]>;
-multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
- def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Insert Instructions
+//===----------------------------------------------------------------------===//
- def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst,
- (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>,
- OpSize;
+multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
+ def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]>, OpSize;
+ def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i8mem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
+ imm:$src3))]>, OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in {
-defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>,
- VEX;
-defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd", int_x86_sse41_pmovsxwd>,
- VEX;
-defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq", int_x86_sse41_pmovsxdq>,
- VEX;
-defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw", int_x86_sse41_pmovzxbw>,
- VEX;
-defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd", int_x86_sse41_pmovzxwd>,
- VEX;
-defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq", int_x86_sse41_pmovzxdq>,
- VEX;
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in
+ defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+ defm PINSRB : SS41I_insert8<0x20, "pinsrb">;
+
+multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
+ def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
+ OpSize;
+ def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i32mem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2),
+ imm:$src3)))]>, OpSize;
}
-defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>;
-defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>;
-defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>;
-defm PMOVZXBW : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw>;
-defm PMOVZXWD : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd>;
-defm PMOVZXDQ : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq>;
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in
+ defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+ defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
-// Common patterns involving scalar load.
-def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),
- (PMOVSXBWrm addr:$src)>, Requires<[HasSSE41]>;
-def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
- (PMOVSXBWrm addr:$src)>, Requires<[HasSSE41]>;
+multiclass SS41I_insert64_avx<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>,
+ OpSize, REX_W;
+ def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i64mem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2),
+ imm:$src3)))]>, OpSize, REX_W;
+}
-def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
- (PMOVSXWDrm addr:$src)>, Requires<[HasSSE41]>;
-def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
- (PMOVSXWDrm addr:$src)>, Requires<[HasSSE41]>;
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in
+ defm VPINSRQ : SS41I_insert64_avx<0x22, "vpinsrq">, VEX_4V, VEX_W;
-def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
- (PMOVSXDQrm addr:$src)>, Requires<[HasSSE41]>;
-def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
- (PMOVSXDQrm addr:$src)>, Requires<[HasSSE41]>;
+// insertps has a few different modes, there's the first two here below which
+// are optimized inserts that won't zero arbitrary elements in the destination
+// vector. The next one matches the intrinsic and could zero arbitrary elements
+// in the target vector.
+multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
+ def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>,
+ OpSize;
+ def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f32mem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (X86insrtps VR128:$src1,
+ (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
+ imm:$src3))]>, OpSize;
+}
-def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
- (PMOVZXBWrm addr:$src)>, Requires<[HasSSE41]>;
-def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
- (PMOVZXBWrm addr:$src)>, Requires<[HasSSE41]>;
+let Constraints = "$src1 = $dst" in
+ defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in
+ defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
-def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
- (PMOVZXWDrm addr:$src)>, Requires<[HasSSE41]>;
-def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
- (PMOVZXWDrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
+ (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>;
-def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
- (PMOVZXDQrm addr:$src)>, Requires<[HasSSE41]>;
-def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
- (PMOVZXDQrm addr:$src)>, Requires<[HasSSE41]>;
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Round Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd,
+ string OpcodeStr,
+ Intrinsic V4F32Int,
+ Intrinsic V2F64Int> {
+ // Intrinsic operation, reg.
+ // Vector intrinsic operation, reg
+ def PSr_Int : SS4AIi8<opcps, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (V4F32Int VR128:$src1, imm:$src2))]>,
+ OpSize;
+ // Vector intrinsic operation, mem
+ def PSm_Int : Ii8<opcps, MRMSrcMem,
+ (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (V4F32Int (memopv4f32 addr:$src1),imm:$src2))]>,
+ TA, OpSize,
+ Requires<[HasSSE41]>;
-multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
- def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
+ // Vector intrinsic operation, reg
+ def PDr_Int : SS4AIi8<opcpd, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (V2F64Int VR128:$src1, imm:$src2))]>,
+ OpSize;
- def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst,
- (IntId (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>,
- OpSize;
+ // Vector intrinsic operation, mem
+ def PDm_Int : SS4AIi8<opcpd, MRMSrcMem,
+ (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (V2F64Int (memopv2f64 addr:$src1),imm:$src2))]>,
+ OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in {
-defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd", int_x86_sse41_pmovsxbd>,
- VEX;
-defm VPMOVSXWQ : SS41I_binop_rm_int4<0x24, "vpmovsxwq", int_x86_sse41_pmovsxwq>,
- VEX;
-defm VPMOVZXBD : SS41I_binop_rm_int4<0x31, "vpmovzxbd", int_x86_sse41_pmovzxbd>,
- VEX;
-defm VPMOVZXWQ : SS41I_binop_rm_int4<0x34, "vpmovzxwq", int_x86_sse41_pmovzxwq>,
- VEX;
-}
+multiclass sse41_fp_unop_rm_avx<bits<8> opcps, bits<8> opcpd,
+ string OpcodeStr> {
+ // Intrinsic operation, reg.
+ // Vector intrinsic operation, reg
+ def PSr : SS4AIi8<opcps, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, OpSize;
-defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>;
-defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>;
-defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>;
-defm PMOVZXWQ : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq>;
+ // Vector intrinsic operation, mem
+ def PSm : Ii8<opcps, MRMSrcMem,
+ (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, TA, OpSize, Requires<[HasSSE41]>;
-// Common patterns involving scalar load
-def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),
- (PMOVSXBDrm addr:$src)>, Requires<[HasSSE41]>;
-def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)),
- (PMOVSXWQrm addr:$src)>, Requires<[HasSSE41]>;
+ // Vector intrinsic operation, reg
+ def PDr : SS4AIi8<opcpd, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, OpSize;
-def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)),
- (PMOVZXBDrm addr:$src)>, Requires<[HasSSE41]>;
-def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)),
- (PMOVZXWQrm addr:$src)>, Requires<[HasSSE41]>;
+ // Vector intrinsic operation, mem
+ def PDm : SS4AIi8<opcpd, MRMSrcMem,
+ (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, OpSize;
+}
+multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
+ string OpcodeStr,
+ Intrinsic F32Int,
+ Intrinsic F64Int, bit Is2Addr = 1> {
+ // Intrinsic operation, reg.
+ def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>,
+ OpSize;
-multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
- def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
+ // Intrinsic operation, mem.
+ def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
+ OpSize;
- // Expecting a i16 load any extended to i32 value.
- def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i16mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (IntId (bitconvert
- (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))]>,
- OpSize;
-}
+ // Intrinsic operation, reg.
+ def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>,
+ OpSize;
-let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in {
-defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq>,
- VEX;
-defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>,
- VEX;
+ // Intrinsic operation, mem.
+ def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
+ OpSize;
}
-defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
-defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
-// Common patterns involving scalar load
-def : Pat<(int_x86_sse41_pmovsxbq
- (bitconvert (v4i32 (X86vzmovl
- (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
- (PMOVSXBQrm addr:$src)>, Requires<[HasSSE41]>;
+multiclass sse41_fp_binop_rm_avx<bits<8> opcss, bits<8> opcsd,
+ string OpcodeStr> {
+ // Intrinsic operation, reg.
+ def SSr : SS4AIi8<opcss, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, OpSize;
-def : Pat<(int_x86_sse41_pmovzxbq
- (bitconvert (v4i32 (X86vzmovl
- (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
- (PMOVZXBQrm addr:$src)>, Requires<[HasSSE41]>;
+ // Intrinsic operation, mem.
+ def SSm : SS4AIi8<opcss, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, OpSize;
+ // Intrinsic operation, reg.
+ def SDr : SS4AIi8<opcsd, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, OpSize;
-/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem
-multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
- def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
- (ins VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set GR32:$dst, (X86pextrb (v16i8 VR128:$src1), imm:$src2))]>,
- OpSize;
- def mr : SS4AIi8<opc, MRMDestMem, (outs),
- (ins i8mem:$dst, VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, OpSize;
-// FIXME:
-// There's an AssertZext in the way of writing the store pattern
-// (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
+ // Intrinsic operation, mem.
+ def SDm : SS4AIi8<opcsd, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in
- defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX;
+// FP round - roundss, roundps, roundsd, roundpd
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in {
+ // Intrinsic form
+ defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround",
+ int_x86_sse41_round_ps, int_x86_sse41_round_pd>,
+ VEX;
+ defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround",
+ int_x86_sse41_round_ss, int_x86_sse41_round_sd,
+ 0>, VEX_4V;
+ // Instructions for the assembler
+ defm VROUND : sse41_fp_unop_rm_avx<0x08, 0x09, "vround">, VEX;
+ defm VROUND : sse41_fp_binop_rm_avx<0x0A, 0x0B, "vround">, VEX_4V;
+}
-defm PEXTRB : SS41I_extract8<0x14, "pextrb">;
+defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round",
+ int_x86_sse41_round_ps, int_x86_sse41_round_pd>;
+let Constraints = "$src1 = $dst" in
+defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
+ int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Misc Instructions
+//===----------------------------------------------------------------------===//
-/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination
-multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
- def mr : SS4AIi8<opc, MRMDestMem, (outs),
- (ins i16mem:$dst, VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>, OpSize;
-// FIXME:
-// There's an AssertZext in the way of writing the store pattern
-// (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
+// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
+multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128> {
+ def rr128 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId128 VR128:$src))]>, OpSize;
+ def rm128 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (IntId128
+ (bitconvert (memopv8i16 addr:$src))))]>, OpSize;
}
let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in
- defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX;
-
-defm PEXTRW : SS41I_extract16<0x15, "pextrw">;
+defm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw",
+ int_x86_sse41_phminposuw>, VEX;
+defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw",
+ int_x86_sse41_phminposuw>;
+/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
+multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128, bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, OpSize;
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+}
-/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
-multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
- def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
- (ins VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set GR32:$dst,
- (extractelt (v4i32 VR128:$src1), imm:$src2))]>, OpSize;
- def mr : SS4AIi8<opc, MRMDestMem, (outs),
- (ins i32mem:$dst, VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(store (extractelt (v4i32 VR128:$src1), imm:$src2),
- addr:$dst)]>, OpSize;
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in {
+ let isCommutable = 0 in
+ defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
+ 0>, VEX_4V;
+ defm VPCMPEQQ : SS41I_binop_rm_int<0x29, "vpcmpeqq", int_x86_sse41_pcmpeqq,
+ 0>, VEX_4V;
+ defm VPMINSB : SS41I_binop_rm_int<0x38, "vpminsb", int_x86_sse41_pminsb,
+ 0>, VEX_4V;
+ defm VPMINSD : SS41I_binop_rm_int<0x39, "vpminsd", int_x86_sse41_pminsd,
+ 0>, VEX_4V;
+ defm VPMINUD : SS41I_binop_rm_int<0x3B, "vpminud", int_x86_sse41_pminud,
+ 0>, VEX_4V;
+ defm VPMINUW : SS41I_binop_rm_int<0x3A, "vpminuw", int_x86_sse41_pminuw,
+ 0>, VEX_4V;
+ defm VPMAXSB : SS41I_binop_rm_int<0x3C, "vpmaxsb", int_x86_sse41_pmaxsb,
+ 0>, VEX_4V;
+ defm VPMAXSD : SS41I_binop_rm_int<0x3D, "vpmaxsd", int_x86_sse41_pmaxsd,
+ 0>, VEX_4V;
+ defm VPMAXUD : SS41I_binop_rm_int<0x3F, "vpmaxud", int_x86_sse41_pmaxud,
+ 0>, VEX_4V;
+ defm VPMAXUW : SS41I_binop_rm_int<0x3E, "vpmaxuw", int_x86_sse41_pmaxuw,
+ 0>, VEX_4V;
+ defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq,
+ 0>, VEX_4V;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in
- defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX;
-
-defm PEXTRD : SS41I_extract32<0x16, "pextrd">;
-
-let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in
- defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W;
-
-/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory
-/// destination
-multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
- def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
- (ins VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set GR32:$dst,
- (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>,
- OpSize;
- def mr : SS4AIi8<opc, MRMDestMem, (outs),
- (ins f32mem:$dst, VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2),
- addr:$dst)]>, OpSize;
+let Constraints = "$src1 = $dst" in {
+ let isCommutable = 0 in
+ defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>;
+ defm PCMPEQQ : SS41I_binop_rm_int<0x29, "pcmpeqq", int_x86_sse41_pcmpeqq>;
+ defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", int_x86_sse41_pminsb>;
+ defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", int_x86_sse41_pminsd>;
+ defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", int_x86_sse41_pminud>;
+ defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw", int_x86_sse41_pminuw>;
+ defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb", int_x86_sse41_pmaxsb>;
+ defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd", int_x86_sse41_pmaxsd>;
+ defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud", int_x86_sse41_pmaxud>;
+ defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw", int_x86_sse41_pmaxuw>;
+ defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in
- defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX;
-defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">;
-
-// Also match an EXTRACTPS store when the store is done as f32 instead of i32.
-def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
- imm:$src2))),
- addr:$dst),
- (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
- Requires<[HasSSE41]>;
+def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
+ (PCMPEQQrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
+ (PCMPEQQrm VR128:$src1, addr:$src2)>;
-multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
- def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
- !if(Is2Addr,
- !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(asm,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set VR128:$dst,
- (X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]>, OpSize;
- def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i8mem:$src2, i32i8imm:$src3),
- !if(Is2Addr,
- !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(asm,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set VR128:$dst,
- (X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
- imm:$src3))]>, OpSize;
+/// SS48I_binop_rm - Simple SSE41 binary operator.
+multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>,
+ OpSize;
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (OpNode VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2))))]>,
+ OpSize;
}
let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in
- defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V;
+ defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, 0>, VEX_4V;
let Constraints = "$src1 = $dst" in
- defm PINSRB : SS41I_insert8<0x20, "pinsrb">;
+ defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32>;
-multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
- def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
- !if(Is2Addr,
- !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(asm,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set VR128:$dst,
- (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
- OpSize;
- def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i32mem:$src2, i32i8imm:$src3),
- !if(Is2Addr,
- !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(asm,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set VR128:$dst,
- (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2),
- imm:$src3)))]>, OpSize;
+/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
+multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128, bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1, VR128:$src2, imm:$src3))]>,
+ OpSize;
+ def rmi : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2)), imm:$src3))]>,
+ OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in
- defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V;
-let Constraints = "$src1 = $dst" in
- defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in {
+ let isCommutable = 0 in {
+ defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
+ 0>, VEX_4V;
+ defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
+ 0>, VEX_4V;
+ defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
+ 0>, VEX_4V;
+ defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
+ 0>, VEX_4V;
+ }
+ defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
+ 0>, VEX_4V;
+ defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
+ 0>, VEX_4V;
+}
-multiclass SS41I_insert64_avx<bits<8> opc, string OpcodeStr> {
- def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR128:$dst,
- (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>,
- OpSize, REX_W;
- def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i64mem:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR128:$dst,
- (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2),
- imm:$src3)))]>, OpSize, REX_W;
+let Constraints = "$src1 = $dst" in {
+ let isCommutable = 0 in {
+ defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps>;
+ defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd>;
+ defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw>;
+ defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw>;
+ }
+ defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps>;
+ defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd>;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in
- defm VPINSRQ : SS41I_insert64_avx<0x22, "vpinsrq">, VEX_4V, VEX_W;
+/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in {
+ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr> {
+ def rr : I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
-// insertps has a few different modes, there's the first two here below which
-// are optimized inserts that won't zero arbitrary elements in the destination
-// vector. The next one matches the intrinsic and could zero arbitrary elements
-// in the target vector.
-multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
- def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
- !if(Is2Addr,
- !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(asm,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set VR128:$dst,
- (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>,
- OpSize;
- def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f32mem:$src2, i32i8imm:$src3),
- !if(Is2Addr,
- !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(asm,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set VR128:$dst,
- (X86insrtps VR128:$src1,
- (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
- imm:$src3))]>, OpSize;
+ def rm : I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+ }
}
-let Constraints = "$src1 = $dst" in
- defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
-let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in
- defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
+defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd">;
+defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps">;
+defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb">;
-def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
- (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>;
+/// SS41I_ternary_int - SSE 4.1 ternary operator
+let Uses = [XMM0], Constraints = "$src1 = $dst" in {
+ multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+ def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr,
+ "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"),
+ [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>,
+ OpSize;
+
+ def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr,
+ "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"),
+ [(set VR128:$dst,
+ (IntId VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2)), XMM0))]>, OpSize;
+ }
+}
+
+defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
+defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
+defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
// ptest instruction we'll lower to this in X86ISelLowering primarily from
// the intel intrinsic that corresponds to this.
@@ -4666,7 +4698,6 @@
[(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
OpSize;
-
//===----------------------------------------------------------------------===//
// SSE4.2 Instructions
//===----------------------------------------------------------------------===//
More information about the llvm-commits
mailing list