[llvm] r319433 - [X86][AVX512] Tag fcmp/ptest/ternlog instructions scheduler classes
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 30 05:18:06 PST 2017
Author: rksimon
Date: Thu Nov 30 05:18:06 2017
New Revision: 319433
URL: http://llvm.org/viewvc/llvm-project?rev=319433&view=rev
Log:
[X86][AVX512] Tag fcmp/ptest/ternlog instructions scheduler classes
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=319433&r1=319432&r2=319433&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Thu Nov 30 05:18:06 2017
@@ -369,17 +369,18 @@ multiclass AVX512_maskable_custom_cmp<bi
string AttSrcAsm, string IntelSrcAsm,
list<dag> Pattern,
list<dag> MaskingPattern,
+ InstrItinClass itin = NoItinerary,
bit IsCommutable = 0> {
let isCommutable = IsCommutable in
def NAME: AVX512<O, F, Outs, Ins,
OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
"$dst, "#IntelSrcAsm#"}",
- Pattern, NoItinerary>;
+ Pattern, itin>;
def NAME#k: AVX512<O, F, Outs, MaskingIns,
OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
"$dst {${mask}}, "#IntelSrcAsm#"}",
- MaskingPattern, NoItinerary>, EVEX_K;
+ MaskingPattern, itin>, EVEX_K;
}
multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
@@ -388,27 +389,30 @@ multiclass AVX512_maskable_common_cmp<bi
string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
dag RHS, dag MaskingRHS,
+ InstrItinClass itin = NoItinerary,
bit IsCommutable = 0> :
AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
AttSrcAsm, IntelSrcAsm,
[(set _.KRC:$dst, RHS)],
- [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
+ [(set _.KRC:$dst, MaskingRHS)], itin, IsCommutable>;
multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
- dag RHS, bit IsCommutable = 0> :
+ dag RHS, InstrItinClass itin = NoItinerary,
+ bit IsCommutable = 0> :
AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
!con((ins _.KRCWM:$mask), Ins),
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
- (and _.KRCWM:$mask, RHS), IsCommutable>;
+ (and _.KRCWM:$mask, RHS), itin, IsCommutable>;
multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins, string OpcodeStr,
- string AttSrcAsm, string IntelSrcAsm> :
+ string AttSrcAsm, string IntelSrcAsm,
+ InstrItinClass itin = NoItinerary> :
AVX512_maskable_custom_cmp<O, F, Outs,
Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr,
- AttSrcAsm, IntelSrcAsm, [],[]>;
+ AttSrcAsm, IntelSrcAsm, [],[], itin>;
// This multiclass generates the unconditional/non-masking, the masking and
// the zero-masking variant of the vector instruction. In the masking case, the
@@ -2219,15 +2223,15 @@ defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1
HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
-multiclass avx512_vcmp_common<X86VectorVTInfo _> {
-
+multiclass avx512_vcmp_common<OpndItins itins, X86VectorVTInfo _> {
defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc),
"vcmp${cc}"#_.Suffix,
"$src2, $src1", "$src1, $src2",
(X86cmpm (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
- imm:$cc), 1>;
+ imm:$cc), itins.rr, 1>,
+ Sched<[itins.Sched]>;
defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
@@ -2235,7 +2239,8 @@ multiclass avx512_vcmp_common<X86VectorV
"$src2, $src1", "$src1, $src2",
(X86cmpm (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2))),
- imm:$cc)>;
+ imm:$cc), itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),
@@ -2245,28 +2250,32 @@ multiclass avx512_vcmp_common<X86VectorV
"$src1, ${src2}"##_.BroadcastStr,
(X86cmpm (_.VT _.RC:$src1),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
- imm:$cc)>,EVEX_B;
+ imm:$cc), itins.rm>,
+ EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
// Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1, hasSideEffects = 0 in {
defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst),
(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
- "$cc, $src2, $src1", "$src1, $src2, $cc">;
+ "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rr>,
+ Sched<[itins.Sched]>;
let mayLoad = 1 in {
defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),
(ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
- "$cc, $src2, $src1", "$src1, $src2, $cc">;
+ "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
defm rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, ${src2}"##_.BroadcastStr##", $src1",
- "$src1, ${src2}"##_.BroadcastStr##", $cc">,EVEX_B;
+ "$src1, ${src2}"##_.BroadcastStr##", $cc", itins.rm>,
+ EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
@@ -2297,7 +2306,7 @@ multiclass avx512_vcmp_common<X86VectorV
imm:$cc)>;
}
-multiclass avx512_vcmp_sae<X86VectorVTInfo _> {
+multiclass avx512_vcmp_sae<OpndItins itins, X86VectorVTInfo _> {
// comparison code form (VCMP[EQ/LT/LE/...]
defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
@@ -2306,7 +2315,8 @@ multiclass avx512_vcmp_sae<X86VectorVTIn
(X86cmpmRnd (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
imm:$cc,
- (i32 FROUND_NO_EXC))>, EVEX_B;
+ (i32 FROUND_NO_EXC)), itins.rr>,
+ EVEX_B, Sched<[itins.Sched]>;
let isAsmParserOnly = 1, hasSideEffects = 0 in {
defm rrib_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
@@ -2314,25 +2324,26 @@ multiclass avx512_vcmp_sae<X86VectorVTIn
(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, {sae}, $src2, $src1",
- "$src1, $src2, {sae}, $cc">, EVEX_B;
+ "$src1, $src2, {sae}, $cc", itins.rr>,
+ EVEX_B, Sched<[itins.Sched]>;
}
}
-multiclass avx512_vcmp<AVX512VLVectorVTInfo _> {
+multiclass avx512_vcmp<OpndItins itins, AVX512VLVectorVTInfo _> {
let Predicates = [HasAVX512] in {
- defm Z : avx512_vcmp_common<_.info512>,
- avx512_vcmp_sae<_.info512>, EVEX_V512;
+ defm Z : avx512_vcmp_common<itins, _.info512>,
+ avx512_vcmp_sae<itins, _.info512>, EVEX_V512;
}
let Predicates = [HasAVX512,HasVLX] in {
- defm Z128 : avx512_vcmp_common<_.info128>, EVEX_V128;
- defm Z256 : avx512_vcmp_common<_.info256>, EVEX_V256;
+ defm Z128 : avx512_vcmp_common<itins, _.info128>, EVEX_V128;
+ defm Z256 : avx512_vcmp_common<itins, _.info256>, EVEX_V256;
}
}
-defm VCMPPD : avx512_vcmp<avx512vl_f64_info>,
+defm VCMPPD : avx512_vcmp<SSE_ALU_F64P, avx512vl_f64_info>,
AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
-defm VCMPPS : avx512_vcmp<avx512vl_f32_info>,
+defm VCMPPS : avx512_vcmp<SSE_ALU_F32P, avx512vl_f32_info>,
AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
@@ -4998,34 +5009,35 @@ defm VSCALEF : avx512_fp_scalef_all<0x2C
//===----------------------------------------------------------------------===//
multiclass avx512_vptest<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _> {
+ OpndItins itins, X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in {
let isCommutable = 1 in
defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>,
- EVEX_4V;
+ (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)), itins.rr>,
+ EVEX_4V, Sched<[itins.Sched]>;
defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
(ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1),
- (_.VT (bitconvert (_.LdFrag addr:$src2))))>,
- EVEX_4V,
- EVEX_CD8<_.EltSize, CD8VF>;
+ (_.VT (bitconvert (_.LdFrag addr:$src2)))), itins.rm>,
+ EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _> {
+ OpndItins itins, X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in
defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
"${src2}"##_.BroadcastStr##", $src1",
"$src1, ${src2}"##_.BroadcastStr,
(OpNode (_.VT _.RC:$src1), (_.VT (X86VBroadcast
- (_.ScalarLdFrag addr:$src2))))>,
- EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
+ (_.ScalarLdFrag addr:$src2)))),
+ itins.rm>, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
// Use 512bit version to implement 128/256 bit in case NoVLX.
@@ -5042,16 +5054,17 @@ multiclass avx512_vptest_lowering<SDNode
}
multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
- AVX512VLVectorVTInfo _, string Suffix> {
+ OpndItins itins, AVX512VLVectorVTInfo _,
+ string Suffix> {
let Predicates = [HasAVX512] in
- defm Z : avx512_vptest<opc, OpcodeStr, OpNode, _.info512>,
- avx512_vptest_mb<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
+ defm Z : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info512>,
+ avx512_vptest_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
let Predicates = [HasAVX512, HasVLX] in {
- defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, _.info256>,
- avx512_vptest_mb<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
- defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, _.info128>,
- avx512_vptest_mb<opc, OpcodeStr, OpNode, _.info128>, EVEX_V128;
+ defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info256>,
+ avx512_vptest_mb<opc, OpcodeStr, OpNode,itins, _.info256>, EVEX_V256;
+ defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info128>,
+ avx512_vptest_mb<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128;
}
let Predicates = [HasAVX512, NoVLX] in {
defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, Suffix>;
@@ -5059,30 +5072,31 @@ multiclass avx512_vptest_dq_sizes<bits<8
}
}
-multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, SDNode OpNode> {
- defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode,
+multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ OpndItins itins> {
+ defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode, itins,
avx512vl_i32_info, "D">;
- defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode,
+ defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode, itins,
avx512vl_i64_info, "Q">, VEX_W;
}
multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
- SDNode OpNode> {
+ SDNode OpNode, OpndItins itins> {
let Predicates = [HasBWI] in {
- defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, v32i16_info>,
+ defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v32i16_info>,
EVEX_V512, VEX_W;
- defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, v64i8_info>,
+ defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v64i8_info>,
EVEX_V512;
}
let Predicates = [HasVLX, HasBWI] in {
- defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, v16i16x_info>,
+ defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v16i16x_info>,
EVEX_V256, VEX_W;
- defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, v8i16x_info>,
+ defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v8i16x_info>,
EVEX_V128, VEX_W;
- defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, v32i8x_info>,
+ defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v32i8x_info>,
EVEX_V256;
- defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, v16i8x_info>,
+ defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v16i8x_info>,
EVEX_V128;
}
@@ -5092,16 +5106,17 @@ multiclass avx512_vptest_wb<bits<8> opc,
defm WZ256_Alt : avx512_vptest_lowering< OpNode, v32i16_info, v16i16x_info, "W">;
defm WZ128_Alt : avx512_vptest_lowering< OpNode, v32i16_info, v8i16x_info, "W">;
}
-
}
multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
- SDNode OpNode> :
- avx512_vptest_wb <opc_wb, OpcodeStr, OpNode>,
- avx512_vptest_dq<opc_dq, OpcodeStr, OpNode>;
-
-defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86testm>, T8PD;
-defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86testnm>, T8XS;
+ SDNode OpNode, OpndItins itins> :
+ avx512_vptest_wb <opc_wb, OpcodeStr, OpNode, itins>,
+ avx512_vptest_dq<opc_dq, OpcodeStr, OpNode, itins>;
+
+defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86testm,
+ SSE_BIT_ITINS_P>, T8PD;
+defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86testnm,
+ SSE_BIT_ITINS_P>, T8XS;
//===----------------------------------------------------------------------===//
@@ -9702,7 +9717,7 @@ def VPTERNLOG312_imm8 : SDNodeXForm<imm,
}]>;
multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _>{
+ OpndItins itins, X86VectorVTInfo _>{
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
@@ -9710,16 +9725,17 @@ multiclass avx512_ternlog<bits<8> opc, s
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(_.VT _.RC:$src3),
- (i8 imm:$src4)), NoItinerary, 1, 1>,
- AVX512AIi8Base, EVEX_4V;
+ (i8 imm:$src4)), itins.rr, 1, 1>,
+ AVX512AIi8Base, EVEX_4V, Sched<[itins.Sched]>;
defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(_.VT (bitconvert (_.LdFrag addr:$src3))),
- (i8 imm:$src4)), NoItinerary, 1, 0>,
- AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
+ (i8 imm:$src4)), itins.rm, 1, 0>,
+ AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
@@ -9727,8 +9743,9 @@ multiclass avx512_ternlog<bits<8> opc, s
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
- (i8 imm:$src4)), NoItinerary, 1, 0>, EVEX_B,
- AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
+ (i8 imm:$src4)), itins.rm, 1, 0>, EVEX_B,
+ AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}// Constraints = "$src1 = $dst"
// Additional patterns for matching passthru operand in other positions.
@@ -9867,17 +9884,20 @@ multiclass avx512_ternlog<bits<8> opc, s
_.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
}
-multiclass avx512_common_ternlog<string OpcodeStr, AVX512VLVectorVTInfo _>{
+multiclass avx512_common_ternlog<string OpcodeStr, OpndItins itins,
+ AVX512VLVectorVTInfo _> {
let Predicates = [HasAVX512] in
- defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info512>, EVEX_V512;
+ defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info512>, EVEX_V512;
let Predicates = [HasAVX512, HasVLX] in {
- defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info128>, EVEX_V128;
- defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info256>, EVEX_V256;
+ defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info128>, EVEX_V128;
+ defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info256>, EVEX_V256;
}
}
-defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", avx512vl_i32_info>;
-defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", avx512vl_i64_info>, VEX_W;
+defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SSE_INTALU_ITINS_P,
+ avx512vl_i32_info>;
+defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SSE_INTALU_ITINS_P,
+ avx512vl_i64_info>, VEX_W;
//===----------------------------------------------------------------------===//
// AVX-512 - FixupImm
Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=319433&r1=319432&r2=319433&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Thu Nov 30 05:18:06 2017
@@ -1829,7 +1829,7 @@ define <4 x float> @f64to4f32_mask(<4 x
; GENERIC-LABEL: f64to4f32_mask:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vptestmd %xmm1, %xmm1, %k1
+; GENERIC-NEXT: vptestmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
; GENERIC-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z}
; GENERIC-NEXT: vzeroupper
; GENERIC-NEXT: retq # sched: [1:1.00]
@@ -1879,7 +1879,7 @@ define <8 x double> @f32to8f64(<8 x floa
define <4 x double> @f32to4f64_mask(<4 x float> %b, <4 x double> %b1, <4 x double> %a1) {
; GENERIC-LABEL: f32to4f64_mask:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vcmpltpd %ymm2, %ymm1, %k1
+; GENERIC-NEXT: vcmpltpd %ymm2, %ymm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z}
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -2469,8 +2469,8 @@ define <16 x double> @sbto16f64(<16 x do
; GENERIC-LABEL: sbto16f64:
; GENERIC: # BB#0:
; GENERIC-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:1.00]
-; GENERIC-NEXT: vcmpltpd %zmm1, %zmm2, %k0
-; GENERIC-NEXT: vcmpltpd %zmm0, %zmm2, %k1
+; GENERIC-NEXT: vcmpltpd %zmm1, %zmm2, %k0 # sched: [3:1.00]
+; GENERIC-NEXT: vcmpltpd %zmm0, %zmm2, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vpmovm2d %k1, %ymm0
; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0
; GENERIC-NEXT: vpmovm2d %k0, %ymm1
@@ -2496,7 +2496,7 @@ define <8 x double> @sbto8f64(<8 x doubl
; GENERIC-LABEL: sbto8f64:
; GENERIC: # BB#0:
; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vcmpltpd %zmm0, %zmm1, %k0
+; GENERIC-NEXT: vcmpltpd %zmm0, %zmm1, %k0 # sched: [3:1.00]
; GENERIC-NEXT: vpmovm2d %k0, %ymm0
; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
@@ -2517,7 +2517,7 @@ define <8 x float> @sbto8f32(<8 x float>
; GENERIC-LABEL: sbto8f32:
; GENERIC: # BB#0:
; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vcmpltps %ymm0, %ymm1, %k0
+; GENERIC-NEXT: vcmpltps %ymm0, %ymm1, %k0 # sched: [3:1.00]
; GENERIC-NEXT: vpmovm2d %k0, %ymm0
; GENERIC-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
@@ -2538,7 +2538,7 @@ define <4 x float> @sbto4f32(<4 x float>
; GENERIC-LABEL: sbto4f32:
; GENERIC: # BB#0:
; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vcmpltps %xmm0, %xmm1, %k0
+; GENERIC-NEXT: vcmpltps %xmm0, %xmm1, %k0 # sched: [3:1.00]
; GENERIC-NEXT: vpmovm2d %k0, %xmm0
; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
@@ -2559,7 +2559,7 @@ define <4 x double> @sbto4f64(<4 x doubl
; GENERIC-LABEL: sbto4f64:
; GENERIC: # BB#0:
; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vcmpltpd %ymm0, %ymm1, %k0
+; GENERIC-NEXT: vcmpltpd %ymm0, %ymm1, %k0 # sched: [3:1.00]
; GENERIC-NEXT: vpmovm2d %k0, %xmm0
; GENERIC-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
@@ -2580,7 +2580,7 @@ define <2 x float> @sbto2f32(<2 x float>
; GENERIC-LABEL: sbto2f32:
; GENERIC: # BB#0:
; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vcmpltps %xmm0, %xmm1, %k0
+; GENERIC-NEXT: vcmpltps %xmm0, %xmm1, %k0 # sched: [3:1.00]
; GENERIC-NEXT: vpmovm2d %k0, %xmm0
; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
@@ -2601,7 +2601,7 @@ define <2 x double> @sbto2f64(<2 x doubl
; GENERIC-LABEL: sbto2f64:
; GENERIC: # BB#0:
; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vcmpltpd %xmm0, %xmm1, %k0
+; GENERIC-NEXT: vcmpltpd %xmm0, %xmm1, %k0 # sched: [3:1.00]
; GENERIC-NEXT: vpmovm2q %k0, %xmm0
; GENERIC-NEXT: vcvtqq2pd %xmm0, %xmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
@@ -3263,7 +3263,7 @@ define <4 x i32> @zext_4x8mem_to_4x32(<4
; GENERIC-LABEL: zext_4x8mem_to_4x32:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1
+; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
; GENERIC-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -3283,7 +3283,7 @@ define <4 x i32> @sext_4x8mem_to_4x32(<4
; GENERIC-LABEL: sext_4x8mem_to_4x32:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1
+; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
; GENERIC-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z}
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -3449,7 +3449,7 @@ define <2 x i64> @zext_2x8mem_to_2x64(<2
; GENERIC-LABEL: zext_2x8mem_to_2x64:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1
+; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00]
; GENERIC-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -3468,7 +3468,7 @@ define <2 x i64> @sext_2x8mem_to_2x64mas
; GENERIC-LABEL: sext_2x8mem_to_2x64mask:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1
+; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00]
; GENERIC-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z}
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -3502,7 +3502,7 @@ define <4 x i64> @zext_4x8mem_to_4x64(<4
; GENERIC-LABEL: zext_4x8mem_to_4x64:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1
+; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -3522,7 +3522,7 @@ define <4 x i64> @sext_4x8mem_to_4x64mas
; GENERIC-LABEL: sext_4x8mem_to_4x64mask:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1
+; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z}
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -3612,7 +3612,7 @@ define <4 x i32> @zext_4x16mem_to_4x32(<
; GENERIC-LABEL: zext_4x16mem_to_4x32:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1
+; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
; GENERIC-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -3632,7 +3632,7 @@ define <4 x i32> @sext_4x16mem_to_4x32ma
; GENERIC-LABEL: sext_4x16mem_to_4x32mask:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1
+; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
; GENERIC-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z}
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -3843,7 +3843,7 @@ define <2 x i64> @zext_2x16mem_to_2x64(<
; GENERIC-LABEL: zext_2x16mem_to_2x64:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1
+; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00]
; GENERIC-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -3863,7 +3863,7 @@ define <2 x i64> @sext_2x16mem_to_2x64ma
; GENERIC-LABEL: sext_2x16mem_to_2x64mask:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1
+; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00]
; GENERIC-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z}
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -3898,7 +3898,7 @@ define <4 x i64> @zext_4x16mem_to_4x64(<
; GENERIC-LABEL: zext_4x16mem_to_4x64:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1
+; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -3918,7 +3918,7 @@ define <4 x i64> @sext_4x16mem_to_4x64ma
; GENERIC-LABEL: sext_4x16mem_to_4x64mask:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1
+; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z}
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4041,7 +4041,7 @@ define <2 x i64> @zext_2x32mem_to_2x64(<
; GENERIC-LABEL: zext_2x32mem_to_2x64:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1
+; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00]
; GENERIC-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4061,7 +4061,7 @@ define <2 x i64> @sext_2x32mem_to_2x64ma
; GENERIC-LABEL: sext_2x32mem_to_2x64mask:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1
+; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00]
; GENERIC-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z}
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4096,7 +4096,7 @@ define <4 x i64> @zext_4x32mem_to_4x64(<
; GENERIC-LABEL: zext_4x32mem_to_4x64:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1
+; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4116,7 +4116,7 @@ define <4 x i64> @sext_4x32mem_to_4x64ma
; GENERIC-LABEL: sext_4x32mem_to_4x64mask:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1
+; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z}
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4165,7 +4165,7 @@ define <4 x i64> @zext_4x32_to_4x64mask(
; GENERIC-LABEL: zext_4x32_to_4x64mask:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vptestmd %xmm1, %xmm1, %k1
+; GENERIC-NEXT: vptestmd %xmm1, %xmm1, %k1 # sched: [1:1.00]
; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4354,7 +4354,7 @@ define i16 @trunc_16i32_to_16i1(<16 x i3
; GENERIC-LABEL: trunc_16i32_to_16i1:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpslld $31, %zmm0, %zmm0
-; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k0
+; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kmovd %k0, %eax
; GENERIC-NEXT: # kill: %ax<def> %ax<kill> %eax<kill>
; GENERIC-NEXT: vzeroupper
@@ -4377,9 +4377,9 @@ define <4 x i32> @trunc_4i32_to_4i1(<4 x
; GENERIC-LABEL: trunc_4i32_to_4i1:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1
+; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00]
; GENERIC-NEXT: vpslld $31, %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k0 {%k1}
+; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k0 {%k1} # sched: [1:1.00]
; GENERIC-NEXT: vpmovm2d %k0, %xmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -6570,7 +6570,7 @@ define <16 x float> @mov_test40(i8 * %ad
; GENERIC-LABEL: mov_test40:
; GENERIC: # BB#0:
; GENERIC-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00]
-; GENERIC-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1
+; GENERIC-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vmovaps (%rdi), %zmm0 {%k1} # sched: [4:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -6591,7 +6591,7 @@ define <16 x float> @mov_test41(i8 * %ad
; GENERIC-LABEL: mov_test41:
; GENERIC: # BB#0:
; GENERIC-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00]
-; GENERIC-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1
+; GENERIC-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vmovups (%rdi), %zmm0 {%k1} # sched: [4:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -6612,7 +6612,7 @@ define <16 x float> @mov_test42(i8 * %ad
; GENERIC-LABEL: mov_test42:
; GENERIC: # BB#0:
; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1
+; GENERIC-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} # sched: [4:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -6633,7 +6633,7 @@ define <16 x float> @mov_test43(i8 * %ad
; GENERIC-LABEL: mov_test43:
; GENERIC: # BB#0:
; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1
+; GENERIC-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} # sched: [4:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -6654,7 +6654,7 @@ define <8 x double> @mov_test44(i8 * %ad
; GENERIC-LABEL: mov_test44:
; GENERIC: # BB#0:
; GENERIC-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:1.00]
-; GENERIC-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1
+; GENERIC-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vmovapd (%rdi), %zmm0 {%k1} # sched: [4:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -6675,7 +6675,7 @@ define <8 x double> @mov_test45(i8 * %ad
; GENERIC-LABEL: mov_test45:
; GENERIC: # BB#0:
; GENERIC-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:1.00]
-; GENERIC-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1
+; GENERIC-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vmovupd (%rdi), %zmm0 {%k1} # sched: [4:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -6696,7 +6696,7 @@ define <8 x double> @mov_test46(i8 * %ad
; GENERIC-LABEL: mov_test46:
; GENERIC: # BB#0:
; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1
+; GENERIC-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z} # sched: [4:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -6717,7 +6717,7 @@ define <8 x double> @mov_test47(i8 * %ad
; GENERIC-LABEL: mov_test47:
; GENERIC: # BB#0:
; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1
+; GENERIC-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} # sched: [4:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -7230,7 +7230,7 @@ define <4 x i1> @vmov_test11(<4 x i1>%a,
; GENERIC-NEXT: .LBB389_1:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: .LBB389_3:
-; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k0
+; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovm2d %k0, %xmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -7471,7 +7471,7 @@ define void @vmov_test22(<4 x i1> %a, <4
; GENERIC-LABEL: vmov_test22:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k0
+; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kmovb %k0, (%rdi)
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -7489,7 +7489,7 @@ define void @vmov_test23(<2 x i1> %a, <2
; GENERIC-LABEL: vmov_test23:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k0
+; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kmovb %k0, (%rdi)
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -7528,7 +7528,7 @@ define void @store_v2i1(<2 x i1> %c , <2
; GENERIC-LABEL: store_v2i1:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k0
+; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: knotw %k0, %k0
; GENERIC-NEXT: kmovb %k0, (%rdi)
; GENERIC-NEXT: retq # sched: [1:1.00]
@@ -7549,7 +7549,7 @@ define void @store_v4i1(<4 x i1> %c , <4
; GENERIC-LABEL: store_v4i1:
; GENERIC: # BB#0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k0
+; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: knotw %k0, %k0
; GENERIC-NEXT: kmovb %k0, (%rdi)
; GENERIC-NEXT: retq # sched: [1:1.00]
@@ -7718,9 +7718,9 @@ define void @ktest_1(<8 x double> %in, d
; GENERIC-LABEL: ktest_1:
; GENERIC: # BB#0:
; GENERIC-NEXT: vmovupd (%rdi), %zmm1 # sched: [4:0.50]
-; GENERIC-NEXT: vcmpltpd %zmm0, %zmm1, %k1
+; GENERIC-NEXT: vcmpltpd %zmm0, %zmm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [4:0.50]
-; GENERIC-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1}
+; GENERIC-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: ktestb %k0, %k0
; GENERIC-NEXT: je .LBB410_2 # sched: [1:1.00]
; GENERIC-NEXT: # BB#1: # %L1
@@ -7781,13 +7781,13 @@ define void @ktest_2(<32 x float> %in, f
; GENERIC: # BB#0:
; GENERIC-NEXT: vmovups (%rdi), %zmm2 # sched: [4:0.50]
; GENERIC-NEXT: vmovups 64(%rdi), %zmm3 # sched: [4:0.50]
-; GENERIC-NEXT: vcmpltps %zmm0, %zmm2, %k1
-; GENERIC-NEXT: vcmpltps %zmm1, %zmm3, %k2
+; GENERIC-NEXT: vcmpltps %zmm0, %zmm2, %k1 # sched: [3:1.00]
+; GENERIC-NEXT: vcmpltps %zmm1, %zmm3, %k2 # sched: [3:1.00]
; GENERIC-NEXT: kunpckwd %k1, %k2, %k0
; GENERIC-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} # sched: [4:0.50]
; GENERIC-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} # sched: [4:0.50]
-; GENERIC-NEXT: vcmpltps %zmm3, %zmm0, %k1
-; GENERIC-NEXT: vcmpltps %zmm2, %zmm1, %k2
+; GENERIC-NEXT: vcmpltps %zmm3, %zmm0, %k1 # sched: [3:1.00]
+; GENERIC-NEXT: vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00]
; GENERIC-NEXT: kunpckwd %k1, %k2, %k1
; GENERIC-NEXT: kord %k1, %k0, %k0
; GENERIC-NEXT: ktestd %k0, %k0
@@ -8590,7 +8590,7 @@ define <16 x i32> @test_vbroadcast() {
; GENERIC-LABEL: test_vbroadcast:
; GENERIC: # BB#0: # %entry
; GENERIC-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vcmpunordps %zmm0, %zmm0, %k0
+; GENERIC-NEXT: vcmpunordps %zmm0, %zmm0, %k0 # sched: [3:1.00]
; GENERIC-NEXT: vpmovm2d %k0, %zmm0
; GENERIC-NEXT: knotw %k0, %k1
; GENERIC-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
More information about the llvm-commits
mailing list