[llvm] r319945 - [X86][AVX512] Tag mask reg op instruction scheduler classes
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 6 11:36:01 PST 2017
Author: rksimon
Date: Wed Dec 6 11:36:00 2017
New Revision: 319945
URL: http://llvm.org/viewvc/llvm-project?rev=319945&view=rev
Log:
[X86][AVX512] Tag mask reg op instruction scheduler classes
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=319945&r1=319944&r2=319945&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Wed Dec 6 11:36:00 2017
@@ -2621,15 +2621,16 @@ defm VFPCLASS : avx512_fp_fpclass_all<"v
multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
string OpcodeStr, RegisterClass KRC,
ValueType vvt, X86MemOperand x86memop> {
- let hasSideEffects = 0 in
+ let hasSideEffects = 0, SchedRW = [WriteMove] in
def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
+ IIC_SSE_MOVDQ>;
def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set KRC:$dst, (vvt (load addr:$src)))]>;
+ [(set KRC:$dst, (vvt (load addr:$src)))], IIC_SSE_MOVDQ>;
def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(store KRC:$src, addr:$dst)]>;
+ [(store KRC:$src, addr:$dst)], IIC_SSE_MOVDQ>;
}
multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
@@ -2637,9 +2638,11 @@ multiclass avx512_mask_mov_gpr<bits<8> o
RegisterClass KRC, RegisterClass GRC> {
let hasSideEffects = 0 in {
def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
+ IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [],
+ IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>;
}
}
@@ -2805,26 +2808,27 @@ let Predicates = [HasAVX512] in {
// - KNOT
multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr,
RegisterClass KRC, SDPatternOperator OpNode,
- Predicate prd> {
+ OpndItins itins, Predicate prd> {
let Predicates = [prd] in
def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set KRC:$dst, (OpNode KRC:$src))]>;
+ [(set KRC:$dst, (OpNode KRC:$src))], itins.rr>,
+ Sched<[itins.Sched]>;
}
multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
- SDPatternOperator OpNode> {
+ SDPatternOperator OpNode, OpndItins itins> {
defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
- HasDQI>, VEX, PD;
+ itins, HasDQI>, VEX, PD;
defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
- HasAVX512>, VEX, PS;
+ itins, HasAVX512>, VEX, PS;
defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
- HasBWI>, VEX, PD, VEX_W;
+ itins, HasBWI>, VEX, PD, VEX_W;
defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
- HasBWI>, VEX, PS, VEX_W;
+ itins, HasBWI>, VEX, PS, VEX_W;
}
-defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot>;
+defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SSE_BIT_ITINS_P>;
// KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
let Predicates = [HasAVX512, NoDQI] in
@@ -2840,25 +2844,26 @@ def : Pat<(vnot VK2:$src),
// - KAND, KANDN, KOR, KXNOR, KXOR
multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
RegisterClass KRC, SDPatternOperator OpNode,
- Predicate prd, bit IsCommutable> {
+ OpndItins itins, Predicate prd, bit IsCommutable> {
let Predicates = [prd], isCommutable = IsCommutable in
def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>;
+ [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))], itins.rr>,
+ Sched<[itins.Sched]>;
}
multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
- SDPatternOperator OpNode, bit IsCommutable,
- Predicate prdW = HasAVX512> {
+ SDPatternOperator OpNode, OpndItins itins,
+ bit IsCommutable, Predicate prdW = HasAVX512> {
defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
- HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
+ itins, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
- prdW, IsCommutable>, VEX_4V, VEX_L, PS;
+ itins, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
- HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
+ itins, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
- HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
+ itins, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
}
def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
@@ -2867,12 +2872,12 @@ def xnor : PatFrag<(ops node:$i0, node:$
def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
-defm KAND : avx512_mask_binop_all<0x41, "kand", and, 1>;
-defm KOR : avx512_mask_binop_all<0x45, "kor", or, 1>;
-defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, 1>;
-defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, 1>;
-defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, 0>;
-defm KADD : avx512_mask_binop_all<0x4A, "kadd", add, 1, HasDQI>;
+defm KAND : avx512_mask_binop_all<0x41, "kand", and, SSE_BIT_ITINS_P, 1>;
+defm KOR : avx512_mask_binop_all<0x45, "kor", or, SSE_BIT_ITINS_P, 1>;
+defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SSE_BIT_ITINS_P, 1>;
+defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SSE_BIT_ITINS_P, 1>;
+defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SSE_BIT_ITINS_P, 0>;
+defm KADD : avx512_mask_binop_all<0x4A, "kadd", add, SSE_BIT_ITINS_P, 1, HasDQI>;
multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
Instruction Inst> {
@@ -2907,13 +2912,13 @@ defm : avx512_binop_pat<xor, xor, KXO
// Mask unpacking
multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
- RegisterClass KRCSrc, Predicate prd> {
+ RegisterClass KRCSrc, OpndItins itins, Predicate prd> {
let Predicates = [prd] in {
let hasSideEffects = 0 in
def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst),
(ins KRC:$src1, KRC:$src2),
- "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- VEX_4V, VEX_L;
+ "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
+ itins.rr>, VEX_4V, VEX_L, Sched<[itins.Sched]>;
def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)),
(!cast<Instruction>(NAME##rr)
@@ -2922,61 +2927,63 @@ multiclass avx512_mask_unpck<string Suff
}
}
-defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, HasAVX512>, PD;
-defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, HasBWI>, PS;
-defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, HasBWI>, PS, VEX_W;
+defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, SSE_UNPCK, HasAVX512>, PD;
+defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, SSE_UNPCK, HasBWI>, PS;
+defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, SSE_UNPCK, HasBWI>, PS, VEX_W;
// Mask bit testing
multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
- SDNode OpNode, Predicate prd> {
+ SDNode OpNode, OpndItins itins, Predicate prd> {
let Predicates = [prd], Defs = [EFLAGS] in
def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
- [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>;
+ [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))], itins.rr>,
+ Sched<[itins.Sched]>;
}
multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
- Predicate prdW = HasAVX512> {
- defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, HasDQI>,
+ OpndItins itins, Predicate prdW = HasAVX512> {
+ defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, itins, HasDQI>,
VEX, PD;
- defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, prdW>,
+ defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, itins, prdW>,
VEX, PS;
- defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, HasBWI>,
+ defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, itins, HasBWI>,
VEX, PS, VEX_W;
- defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, HasBWI>,
+ defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, itins, HasBWI>,
VEX, PD, VEX_W;
}
-defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>;
-defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, HasDQI>;
+defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SSE_PTEST>;
+defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SSE_PTEST, HasDQI>;
// Mask shift
multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
- SDNode OpNode> {
+ SDNode OpNode, OpndItins itins> {
let Predicates = [HasAVX512] in
def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
!strconcat(OpcodeStr,
"\t{$imm, $src, $dst|$dst, $src, $imm}"),
- [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>;
+ [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))],
+ itins.rr>, Sched<[itins.Sched]>;
}
multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
- SDNode OpNode> {
- defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
- VEX, TAPD, VEX_W;
+ SDNode OpNode, OpndItins itins> {
+ defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
+ itins>, VEX, TAPD, VEX_W;
let Predicates = [HasDQI] in
- defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode>,
- VEX, TAPD;
+ defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
+ itins>, VEX, TAPD;
let Predicates = [HasBWI] in {
- defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode>,
- VEX, TAPD, VEX_W;
- defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode>,
- VEX, TAPD;
+ defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
+ itins>, VEX, TAPD, VEX_W;
+ defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
+ itins>, VEX, TAPD;
}
}
-defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl>;
-defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr>;
+defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, SSE_PSHUF>;
+defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, SSE_PSHUF>;
multiclass axv512_icmp_packed_no_vlx_lowering<SDNode OpNode, string InstStr> {
def : Pat<(v8i1 (OpNode (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
@@ -3023,7 +3030,8 @@ let Predicates = [HasAVX512, NoVLX] in {
// Mask setting all 0s or 1s
multiclass avx512_mask_setop<RegisterClass KRC, ValueType VT, PatFrag Val> {
let Predicates = [HasAVX512] in
- let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1 in
+ let isReMaterializable = 1, isAsCheapAsAMove = 1, isPseudo = 1,
+ SchedRW = [WriteZero] in
def #NAME# : I<0, Pseudo, (outs KRC:$dst), (ins), "",
[(set KRC:$dst, (VT Val))]>;
}
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=319945&r1=319944&r2=319945&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Wed Dec 6 11:36:00 2017
@@ -6172,6 +6172,11 @@ let Predicates = [UseSSE41] in {
// SSE4.1 - Packed Bit Test
//===----------------------------------------------------------------------===//
+let Sched = WriteVecLogic in
+def SSE_PTEST : OpndItins<
+ IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
+>;
+
// ptest instruction we'll lower to this in X86ISelLowering primarily from
// the intel intrinsic that corresponds to this.
let Defs = [EFLAGS], Predicates = [HasAVX] in {
Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=319945&r1=319944&r2=319945&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Wed Dec 6 11:36:00 2017
@@ -1164,7 +1164,7 @@ define i32 @test3(float %a, float %b) {
; GENERIC-LABEL: test3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vcmpeqss %xmm1, %xmm0, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: movzbl %al, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -1719,7 +1719,7 @@ define <8 x double> @sito8f64(<8 x i32>
define <8 x double> @i32to8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind {
; GENERIC-LABEL: i32to8f64_mask:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -1741,7 +1741,7 @@ define <8 x double> @i32to8f64_mask(<8 x
define <8 x double> @sito8f64_maskz(<8 x i32> %a, i8 %b) nounwind {
; GENERIC-LABEL: sito8f64_maskz:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -2193,7 +2193,7 @@ define <16 x float> @ulto16f32(<16 x i64
define <8 x double> @uito8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind {
; GENERIC-LABEL: uito8f64_mask:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -2215,7 +2215,7 @@ define <8 x double> @uito8f64_mask(<8 x
define <8 x double> @uito8f64_maskz(<8 x i32> %a, i8 %b) nounwind {
; GENERIC-LABEL: uito8f64_maskz:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -2855,7 +2855,7 @@ define <16 x double> @ubto16f64(<16 x i3
; GENERIC-NEXT: movl {{.*}}(%rip), %eax # sched: [5:0.50]
; GENERIC-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z} # sched: [1:1.00]
; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
-; GENERIC-NEXT: kshiftrw $8, %k1, %k1
+; GENERIC-NEXT: kshiftrw $8, %k1, %k1 # sched: [1:1.00]
; GENERIC-NEXT: vpbroadcastd %eax, %ymm1 {%k1} {z} # sched: [1:1.00]
; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
@@ -4298,7 +4298,7 @@ define <8 x double> @fpext_test(<8 x flo
define <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
; GENERIC-LABEL: zext_16i1_to_16xi32:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4315,7 +4315,7 @@ define <16 x i32> @zext_16i1_to_16xi32
define <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
; GENERIC-LABEL: zext_8i1_to_8xi64:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4334,7 +4334,7 @@ define i16 @trunc_16i8_to_16i1(<16 x i8>
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: # kill: %ax<def> %ax<kill> %eax<kill>
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4355,7 +4355,7 @@ define i16 @trunc_16i32_to_16i1(<16 x i3
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpslld $31, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: # kill: %ax<def> %ax<kill> %eax<kill>
; GENERIC-NEXT: vzeroupper
; GENERIC-NEXT: retq # sched: [1:1.00]
@@ -4404,7 +4404,7 @@ define i8 @trunc_8i16_to_8i1(<8 x i16> %
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: # kill: %al<def> %al<kill> %eax<kill>
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4443,13 +4443,13 @@ define i16 @trunc_i32_to_i1(i32 %a) {
; GENERIC-LABEL: trunc_i32_to_i1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: movw $-4, %ax # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %eax, %k0
-; GENERIC-NEXT: kshiftrw $1, %k0, %k0
-; GENERIC-NEXT: kshiftlw $1, %k0, %k0
+; GENERIC-NEXT: kmovd %eax, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: kshiftrw $1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kshiftlw $1, %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: andl $1, %edi # sched: [1:0.33]
-; GENERIC-NEXT: kmovw %edi, %k1
-; GENERIC-NEXT: korw %k1, %k0, %k0
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kmovw %edi, %k1 # sched: [1:0.33]
+; GENERIC-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: # kill: %ax<def> %ax<kill> %eax<kill>
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4550,7 +4550,7 @@ define <64 x i16> @test21(<64 x i16> %x
; GENERIC-NEXT: vpsllw $7, %zmm2, %zmm2 # sched: [3:1.00]
; GENERIC-NEXT: vpmovb2m %zmm2, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
-; GENERIC-NEXT: kshiftrq $32, %k1, %k1
+; GENERIC-NEXT: kshiftrq $32, %k1, %k1 # sched: [1:1.00]
; GENERIC-NEXT: vmovdqu16 %zmm1, %zmm1 {%k1} {z} # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5357,7 +5357,7 @@ define <32 x i16> @xor_v32i16(<32 x i16>
define <16 x float> @masked_and_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) {
; GENERIC-LABEL: masked_and_v16f32:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
@@ -5382,7 +5382,7 @@ define <16 x float> @masked_and_v16f32(<
define <16 x float> @masked_or_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) {
; GENERIC-LABEL: masked_or_v16f32:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
@@ -5407,7 +5407,7 @@ define <16 x float> @masked_or_v16f32(<1
define <16 x float> @masked_xor_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) {
; GENERIC-LABEL: masked_xor_v16f32:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
@@ -5432,7 +5432,7 @@ define <16 x float> @masked_xor_v16f32(<
define <8 x double> @masked_and_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) {
; GENERIC-LABEL: masked_and_v8f64:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
@@ -5457,7 +5457,7 @@ define <8 x double> @masked_and_v8f64(<8
define <8 x double> @masked_or_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) {
; GENERIC-LABEL: masked_or_v8f64:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
@@ -5482,7 +5482,7 @@ define <8 x double> @masked_or_v8f64(<8
define <8 x double> @masked_xor_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) {
; GENERIC-LABEL: masked_xor_v8f64:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
@@ -5507,7 +5507,7 @@ define <8 x double> @masked_xor_v8f64(<8
define <8 x i64> @test_mm512_mask_and_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) {
; GENERIC-LABEL: test_mm512_mask_and_epi32:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5529,7 +5529,7 @@ entry:
define <8 x i64> @test_mm512_mask_or_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) {
; GENERIC-LABEL: test_mm512_mask_or_epi32:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5551,7 +5551,7 @@ entry:
define <8 x i64> @test_mm512_mask_xor_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) {
; GENERIC-LABEL: test_mm512_mask_xor_epi32:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5573,7 +5573,7 @@ entry:
define <8 x double> @test_mm512_mask_xor_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; GENERIC-LABEL: test_mm512_mask_xor_pd:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5595,7 +5595,7 @@ entry:
define <8 x double> @test_mm512_maskz_xor_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; GENERIC-LABEL: test_mm512_maskz_xor_pd:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5617,7 +5617,7 @@ entry:
define <16 x float> @test_mm512_mask_xor_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; GENERIC-LABEL: test_mm512_mask_xor_ps:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5639,7 +5639,7 @@ entry:
define <16 x float> @test_mm512_maskz_xor_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; GENERIC-LABEL: test_mm512_maskz_xor_ps:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5661,7 +5661,7 @@ entry:
define <8 x double> @test_mm512_mask_or_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; GENERIC-LABEL: test_mm512_mask_or_pd:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5683,7 +5683,7 @@ entry:
define <8 x double> @test_mm512_maskz_or_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; GENERIC-LABEL: test_mm512_maskz_or_pd:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5705,7 +5705,7 @@ entry:
define <16 x float> @test_mm512_mask_or_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; GENERIC-LABEL: test_mm512_mask_or_ps:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5727,7 +5727,7 @@ entry:
define <16 x float> @test_mm512_maskz_or_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; GENERIC-LABEL: test_mm512_maskz_or_ps:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5749,7 +5749,7 @@ entry:
define <8 x double> @test_mm512_mask_and_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; GENERIC-LABEL: test_mm512_mask_and_pd:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5771,7 +5771,7 @@ entry:
define <8 x double> @test_mm512_maskz_and_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; GENERIC-LABEL: test_mm512_maskz_and_pd:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5793,7 +5793,7 @@ entry:
define <16 x float> @test_mm512_mask_and_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; GENERIC-LABEL: test_mm512_mask_and_ps:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5815,7 +5815,7 @@ entry:
define <16 x float> @test_mm512_maskz_and_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; GENERIC-LABEL: test_mm512_maskz_and_ps:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5837,7 +5837,7 @@ entry:
define <8 x double> @test_mm512_mask_andnot_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; GENERIC-LABEL: test_mm512_mask_andnot_pd:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5860,7 +5860,7 @@ entry:
define <8 x double> @test_mm512_maskz_andnot_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; GENERIC-LABEL: test_mm512_maskz_andnot_pd:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5883,7 +5883,7 @@ entry:
define <16 x float> @test_mm512_mask_andnot_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; GENERIC-LABEL: test_mm512_mask_andnot_ps:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -5906,7 +5906,7 @@ entry:
define <16 x float> @test_mm512_maskz_andnot_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; GENERIC-LABEL: test_mm512_maskz_andnot_ps:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -6737,9 +6737,9 @@ define <8 x double> @mov_test47(i8 * %ad
define i16 @mask16(i16 %x) {
; GENERIC-LABEL: mask16:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0
-; GENERIC-NEXT: knotw %k0, %k0
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: # kill: %ax<def> %ax<kill> %eax<kill>
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -6759,9 +6759,9 @@ define i16 @mask16(i16 %x) {
define i32 @mask16_zext(i16 %x) {
; GENERIC-LABEL: mask16_zext:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0
-; GENERIC-NEXT: knotw %k0, %k0
-; GENERIC-NEXT: kmovw %k0, %eax
+; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovw %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: mask16_zext:
@@ -6780,9 +6780,9 @@ define i32 @mask16_zext(i16 %x) {
define i8 @mask8(i8 %x) {
; GENERIC-LABEL: mask8:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0
-; GENERIC-NEXT: knotb %k0, %k0
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: knotb %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: # kill: %al<def> %al<kill> %eax<kill>
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -6802,9 +6802,9 @@ define i8 @mask8(i8 %x) {
define i32 @mask8_zext(i8 %x) {
; GENERIC-LABEL: mask8_zext:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0
-; GENERIC-NEXT: knotb %k0, %k0
-; GENERIC-NEXT: kmovb %k0, %eax
+; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: knotb %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovb %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: mask8_zext:
@@ -6824,7 +6824,7 @@ define void @mask16_mem(i16* %ptr) {
; GENERIC-LABEL: mask16_mem:
; GENERIC: # %bb.0:
; GENERIC-NEXT: kmovw (%rdi), %k0
-; GENERIC-NEXT: knotw %k0, %k0
+; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kmovw %k0, (%rdi)
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -6846,7 +6846,7 @@ define void @mask8_mem(i8* %ptr) {
; GENERIC-LABEL: mask8_mem:
; GENERIC: # %bb.0:
; GENERIC-NEXT: kmovb (%rdi), %k0
-; GENERIC-NEXT: knotb %k0, %k0
+; GENERIC-NEXT: knotb %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kmovb %k0, (%rdi)
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -6896,10 +6896,10 @@ define i16 @mand16_mem(<16 x i1>* %x, <1
; GENERIC: # %bb.0:
; GENERIC-NEXT: kmovw (%rdi), %k0
; GENERIC-NEXT: kmovw (%rsi), %k1
-; GENERIC-NEXT: kandw %k1, %k0, %k2
-; GENERIC-NEXT: kxorw %k1, %k0, %k0
-; GENERIC-NEXT: korw %k0, %k2, %k0
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kandw %k1, %k0, %k2 # sched: [1:1.00]
+; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: korw %k0, %k2, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: # kill: %ax<def> %ax<kill> %eax<kill>
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -6925,9 +6925,9 @@ define i16 @mand16_mem(<16 x i1>* %x, <1
define i8 @shuf_test1(i16 %v) nounwind {
; GENERIC-LABEL: shuf_test1:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0
-; GENERIC-NEXT: kshiftrw $8, %k0, %k0
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: kshiftrw $8, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: # kill: %al<def> %al<kill> %eax<kill>
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -6948,9 +6948,9 @@ define i32 @zext_test1(<16 x i32> %a, <1
; GENERIC-LABEL: zext_test1:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: kshiftlw $10, %k0, %k0
-; GENERIC-NEXT: kshiftrw $15, %k0, %k0
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kshiftlw $10, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kshiftrw $15, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: andl $1, %eax # sched: [1:0.33]
; GENERIC-NEXT: vzeroupper
; GENERIC-NEXT: retq # sched: [1:1.00]
@@ -6974,9 +6974,9 @@ define i16 @zext_test2(<16 x i32> %a, <1
; GENERIC-LABEL: zext_test2:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: kshiftlw $10, %k0, %k0
-; GENERIC-NEXT: kshiftrw $15, %k0, %k0
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kshiftlw $10, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kshiftrw $15, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: andl $1, %eax # sched: [1:0.33]
; GENERIC-NEXT: # kill: %ax<def> %ax<kill> %eax<kill>
; GENERIC-NEXT: vzeroupper
@@ -7002,9 +7002,9 @@ define i8 @zext_test3(<16 x i32> %a, <16
; GENERIC-LABEL: zext_test3:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: kshiftlw $10, %k0, %k0
-; GENERIC-NEXT: kshiftrw $15, %k0, %k0
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kshiftlw $10, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kshiftrw $15, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: andb $1, %al # sched: [1:0.33]
; GENERIC-NEXT: # kill: %al<def> %al<kill> %eax<kill>
; GENERIC-NEXT: vzeroupper
@@ -7029,7 +7029,7 @@ define i8 @zext_test3(<16 x i32> %a, <16
define i8 @conv1(<8 x i1>* %R) {
; GENERIC-LABEL: conv1:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kxnorw %k0, %k0, %k0
+; GENERIC-NEXT: kxnorw %k0, %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kmovb %k0, (%rdi)
; GENERIC-NEXT: movb $-2, -{{[0-9]+}}(%rsp) # sched: [5:1.00]
; GENERIC-NEXT: movb $-2, %al # sched: [1:0.33]
@@ -7057,7 +7057,7 @@ define <4 x i32> @test4(<4 x i64> %x, <4
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpgtq %ymm3, %ymm2, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: kandnw %k0, %k1, %k0
+; GENERIC-NEXT: kandnw %k0, %k1, %k0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: vzeroupper
; GENERIC-NEXT: retq # sched: [1:1.00]
@@ -7082,7 +7082,7 @@ define <2 x i64> @vcmp_test5(<2 x i64> %
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 # sched: [3:1.00]
; GENERIC-NEXT: vpcmpgtq %xmm3, %xmm2, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: kandnw %k1, %k0, %k0
+; GENERIC-NEXT: kandnw %k1, %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -7117,9 +7117,9 @@ define void @vcmp_test7(<8 x i1> %mask)
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33]
; GENERIC-NEXT: movb $85, %al # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %eax, %k1
-; GENERIC-NEXT: korb %k1, %k0, %k0
-; GENERIC-NEXT: ktestb %k0, %k0
+; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33]
+; GENERIC-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: ktestb %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: vcmp_test7:
@@ -7297,7 +7297,7 @@ define <16 x i1> @vmov_test15(i32 %x, i3
; GENERIC-NEXT: # sched: [1:0.33]
; GENERIC-NEXT: movw $1, %cx # sched: [1:0.33]
; GENERIC-NEXT: cmovgw %ax, %cx # sched: [2:0.67]
-; GENERIC-NEXT: kmovd %ecx, %k0
+; GENERIC-NEXT: kmovd %ecx, %k0 # sched: [1:0.33]
; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -7322,14 +7322,14 @@ define <64 x i8> @vmov_test16(i64 %x) {
;
; GENERIC-LABEL: vmov_test16:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovq %rdi, %k0
+; GENERIC-NEXT: kmovq %rdi, %k0 # sched: [1:0.33]
; GENERIC-NEXT: movb $1, %al # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %eax, %k1
+; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovm2b %k1, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: vpsllq $40, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovm2b %k0, %zmm1 # sched: [1:0.33]
; GENERIC-NEXT: movl $32, %eax # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %eax, %k1
+; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} # sched: [2:1.00]
; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] sched: [1:1.00]
; GENERIC-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:0.33]
@@ -7361,15 +7361,15 @@ define <64 x i8> @vmov_test17(i64 %x, i3
;
; GENERIC-LABEL: vmov_test17:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovq %rdi, %k0
+; GENERIC-NEXT: kmovq %rdi, %k0 # sched: [1:0.33]
; GENERIC-NEXT: cmpl %edx, %esi # sched: [1:0.33]
; GENERIC-NEXT: setg %al # sched: [1:0.50]
-; GENERIC-NEXT: kmovd %eax, %k1
+; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpmovm2b %k1, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: vpsllq $40, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovm2b %k0, %zmm1 # sched: [1:0.33]
; GENERIC-NEXT: movl $32, %eax # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %eax, %k1
+; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1} # sched: [2:1.00]
; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] sched: [1:1.00]
; GENERIC-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:0.33]
@@ -7402,21 +7402,21 @@ define <64 x i8> @vmov_test17(i64 %x, i3
define <8 x i1> @vmov_test18(i8 %a, i16 %y) {
; GENERIC-LABEL: vmov_test18:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1
-; GENERIC-NEXT: kmovd %esi, %k2
-; GENERIC-NEXT: kshiftlw $7, %k2, %k0
-; GENERIC-NEXT: kshiftrw $15, %k0, %k0
-; GENERIC-NEXT: kshiftlw $6, %k2, %k2
-; GENERIC-NEXT: kshiftrw $15, %k2, %k2
+; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
+; GENERIC-NEXT: kmovd %esi, %k2 # sched: [1:0.33]
+; GENERIC-NEXT: kshiftlw $7, %k2, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kshiftrw $15, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kshiftlw $6, %k2, %k2 # sched: [1:1.00]
+; GENERIC-NEXT: kshiftrw $15, %k2, %k2 # sched: [1:1.00]
; GENERIC-NEXT: vpmovm2q %k1, %zmm0 # sched: [1:0.33]
; GENERIC-NEXT: vpmovm2q %k2, %zmm1 # sched: [1:0.33]
; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,7] sched: [4:0.50]
; GENERIC-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 # sched: [1:1.00]
; GENERIC-NEXT: vpmovq2m %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: kshiftlb $1, %k1, %k1
-; GENERIC-NEXT: kshiftrb $1, %k1, %k1
-; GENERIC-NEXT: kshiftlb $7, %k0, %k0
-; GENERIC-NEXT: korb %k0, %k1, %k0
+; GENERIC-NEXT: kshiftlb $1, %k1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT: kshiftrb $1, %k1, %k1 # sched: [1:1.00]
+; GENERIC-NEXT: kshiftlb $7, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: korb %k0, %k1, %k0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: vzeroupper
; GENERIC-NEXT: retq # sched: [1:1.00]
@@ -7506,9 +7506,9 @@ define void @vmov_test23(<2 x i1> %a, <2
define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) {
; GENERIC-LABEL: store_v1i1:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0
-; GENERIC-NEXT: kxnorw %k0, %k0, %k1
-; GENERIC-NEXT: kxorw %k1, %k0, %k0
+; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: kxnorw %k0, %k0, %k1 # sched: [1:1.00]
+; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kmovb %k0, (%rsi)
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -7529,7 +7529,7 @@ define void @store_v2i1(<2 x i1> %c , <2
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: knotw %k0, %k0
+; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kmovb %k0, (%rdi)
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -7550,7 +7550,7 @@ define void @store_v4i1(<4 x i1> %c , <4
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: knotw %k0, %k0
+; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kmovb %k0, (%rdi)
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -7571,7 +7571,7 @@ define void @store_v8i1(<8 x i1> %c , <8
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: knotb %k0, %k0
+; GENERIC-NEXT: knotb %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kmovb %k0, (%rdi)
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -7592,7 +7592,7 @@ define void @store_v16i1(<16 x i1> %c ,
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: knotw %k0, %k0
+; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: kmovw %k0, (%rdi)
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -7685,7 +7685,7 @@ define <32 x i16> @test_build_vec_v32i1(
; GENERIC: # %bb.0:
; GENERIC-NEXT: movl $1497715861, %eax # imm = 0x59455495
; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %eax, %k1
+; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33]
; GENERIC-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -7721,7 +7721,7 @@ define void @ktest_1(<8 x double> %in, d
; GENERIC-NEXT: vcmpltpd %zmm0, %zmm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [4:0.50]
; GENERIC-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [3:1.00]
-; GENERIC-NEXT: ktestb %k0, %k0
+; GENERIC-NEXT: ktestb %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: je .LBB410_2 # sched: [1:1.00]
; GENERIC-NEXT: # %bb.1: # %L1
; GENERIC-NEXT: vmovapd %zmm0, (%rdi) # sched: [1:1.00]
@@ -7783,14 +7783,14 @@ define void @ktest_2(<32 x float> %in, f
; GENERIC-NEXT: vmovups 64(%rdi), %zmm3 # sched: [4:0.50]
; GENERIC-NEXT: vcmpltps %zmm0, %zmm2, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vcmpltps %zmm1, %zmm3, %k2 # sched: [3:1.00]
-; GENERIC-NEXT: kunpckwd %k1, %k2, %k0
+; GENERIC-NEXT: kunpckwd %k1, %k2, %k0 # sched: [1:1.00]
; GENERIC-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} # sched: [4:0.50]
; GENERIC-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} # sched: [4:0.50]
; GENERIC-NEXT: vcmpltps %zmm3, %zmm0, %k1 # sched: [3:1.00]
; GENERIC-NEXT: vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00]
-; GENERIC-NEXT: kunpckwd %k1, %k2, %k1
-; GENERIC-NEXT: kord %k1, %k0, %k0
-; GENERIC-NEXT: ktestd %k0, %k0
+; GENERIC-NEXT: kunpckwd %k1, %k2, %k1 # sched: [1:1.00]
+; GENERIC-NEXT: kord %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: ktestd %k0, %k0 # sched: [1:1.00]
; GENERIC-NEXT: je .LBB411_2 # sched: [1:1.00]
; GENERIC-NEXT: # %bb.1: # %L1
; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00]
@@ -8080,7 +8080,7 @@ define i32 @test_bitcast_v8i1_zext(<16 x
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: kmovb %k0, %eax
+; GENERIC-NEXT: kmovb %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: addl %eax, %eax # sched: [1:0.33]
; GENERIC-NEXT: vzeroupper
; GENERIC-NEXT: retq # sched: [1:1.00]
@@ -8106,7 +8106,7 @@ define i32 @test_bitcast_v16i1_zext(<16
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: kmovw %k0, %eax
+; GENERIC-NEXT: kmovw %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: addl %eax, %eax # sched: [1:0.33]
; GENERIC-NEXT: vzeroupper
; GENERIC-NEXT: retq # sched: [1:1.00]
@@ -8129,10 +8129,10 @@ define i32 @test_bitcast_v16i1_zext(<16
define i16 @test_v16i1_add(i16 %x, i16 %y) {
; GENERIC-LABEL: test_v16i1_add:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0
-; GENERIC-NEXT: kmovd %esi, %k1
-; GENERIC-NEXT: kxorw %k1, %k0, %k0
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33]
+; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: # kill: %ax<def> %ax<kill> %eax<kill>
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -8154,10 +8154,10 @@ define i16 @test_v16i1_add(i16 %x, i16 %
define i16 @test_v16i1_sub(i16 %x, i16 %y) {
; GENERIC-LABEL: test_v16i1_sub:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0
-; GENERIC-NEXT: kmovd %esi, %k1
-; GENERIC-NEXT: kxorw %k1, %k0, %k0
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33]
+; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: # kill: %ax<def> %ax<kill> %eax<kill>
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -8179,10 +8179,10 @@ define i16 @test_v16i1_sub(i16 %x, i16 %
define i16 @test_v16i1_mul(i16 %x, i16 %y) {
; GENERIC-LABEL: test_v16i1_mul:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0
-; GENERIC-NEXT: kmovd %esi, %k1
-; GENERIC-NEXT: kandw %k1, %k0, %k0
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33]
+; GENERIC-NEXT: kandw %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: # kill: %ax<def> %ax<kill> %eax<kill>
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -8204,10 +8204,10 @@ define i16 @test_v16i1_mul(i16 %x, i16 %
define i8 @test_v8i1_add(i8 %x, i8 %y) {
; GENERIC-LABEL: test_v8i1_add:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0
-; GENERIC-NEXT: kmovd %esi, %k1
-; GENERIC-NEXT: kxorb %k1, %k0, %k0
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33]
+; GENERIC-NEXT: kxorb %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: # kill: %al<def> %al<kill> %eax<kill>
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -8229,10 +8229,10 @@ define i8 @test_v8i1_add(i8 %x, i8 %y) {
define i8 @test_v8i1_sub(i8 %x, i8 %y) {
; GENERIC-LABEL: test_v8i1_sub:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0
-; GENERIC-NEXT: kmovd %esi, %k1
-; GENERIC-NEXT: kxorb %k1, %k0, %k0
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33]
+; GENERIC-NEXT: kxorb %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: # kill: %al<def> %al<kill> %eax<kill>
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -8254,10 +8254,10 @@ define i8 @test_v8i1_sub(i8 %x, i8 %y) {
define i8 @test_v8i1_mul(i8 %x, i8 %y) {
; GENERIC-LABEL: test_v8i1_mul:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0
-; GENERIC-NEXT: kmovd %esi, %k1
-; GENERIC-NEXT: kandb %k1, %k0, %k0
-; GENERIC-NEXT: kmovd %k0, %eax
+; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33]
+; GENERIC-NEXT: kandb %k1, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
; GENERIC-NEXT: # kill: %al<def> %al<kill> %eax<kill>
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -8592,7 +8592,7 @@ define <16 x i32> @test_vbroadcast() {
; GENERIC-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vcmpunordps %zmm0, %zmm0, %k0 # sched: [3:1.00]
; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33]
-; GENERIC-NEXT: knotw %k0, %k1
+; GENERIC-NEXT: knotw %k0, %k1 # sched: [1:1.00]
; GENERIC-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
More information about the llvm-commits
mailing list