[llvm] r319900 - [X86][AVX512] Tag BROADCAST instruction scheduler classes
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 6 07:48:41 PST 2017
Author: rksimon
Date: Wed Dec 6 07:48:40 2017
New Revision: 319900
URL: http://llvm.org/viewvc/llvm-project?rev=319900&view=rev
Log:
[X86][AVX512] Tag BROADCAST instruction scheduler classes
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/test/CodeGen/X86/avx512-bugfix-23634.ll
llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=319900&r1=319899&r2=319900&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Wed Dec 6 07:48:40 2017
@@ -274,7 +274,7 @@ multiclass AVX512_maskable_split<bits<8>
dag Outs, dag Ins, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
dag RHS, dag MaskRHS,
- InstrItinClass itin = NoItinerary,
+ InstrItinClass itin,
bit IsCommutable = 0, bit IsKCommutable = 0,
SDNode Select = vselect> :
AVX512_maskable_custom<O, F, Outs, Ins,
@@ -295,7 +295,7 @@ multiclass AVX512_maskable<bits<8> O, Fo
dag Outs, dag Ins, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
dag RHS,
- InstrItinClass itin = NoItinerary,
+ InstrItinClass itin,
bit IsCommutable = 0, bit IsKCommutable = 0,
SDNode Select = vselect> :
AVX512_maskable_common<O, F, _, Outs, Ins,
@@ -1149,6 +1149,7 @@ multiclass avx512_broadcast_scalar<bits<
// Split version to allow mask and broadcast node to be different types. This
// helps support the 32x2 broadcasts.
multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
+ SchedWrite SchedRR, SchedWrite SchedRM,
X86VectorVTInfo MaskInfo,
X86VectorVTInfo DestInfo,
X86VectorVTInfo SrcInfo,
@@ -1164,8 +1165,8 @@ multiclass avx512_broadcast_rm_split<bit
(MaskInfo.VT
(bitconvert
(DestInfo.VT
- (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))))>,
- T8PD, EVEX;
+ (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))))),
+ NoItinerary>, T8PD, EVEX, Sched<[SchedRR]>;
let mayLoad = 1 in
defm m : AVX512_maskable_split<opc, MRMSrcMem, MaskInfo,
(outs MaskInfo.RC:$dst),
@@ -1177,8 +1178,9 @@ multiclass avx512_broadcast_rm_split<bit
(MaskInfo.VT
(bitconvert
(DestInfo.VT (X86VBroadcast
- (SrcInfo.ScalarLdFrag addr:$src)))))>,
- T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>;
+ (SrcInfo.ScalarLdFrag addr:$src))))),
+ NoItinerary>, T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>,
+ Sched<[SchedRM]>;
}
def : Pat<(MaskInfo.VT
@@ -1209,36 +1211,43 @@ multiclass avx512_broadcast_rm_split<bit
// Helper class to force mask and broadcast result to same type.
multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
+ SchedWrite SchedRR, SchedWrite SchedRM,
X86VectorVTInfo DestInfo,
X86VectorVTInfo SrcInfo> :
- avx512_broadcast_rm_split<opc, OpcodeStr, DestInfo, DestInfo, SrcInfo>;
+ avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM,
+ DestInfo, DestInfo, SrcInfo>;
multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo _> {
let Predicates = [HasAVX512] in
- defm Z : avx512_broadcast_rm<opc, OpcodeStr, _.info512, _.info128>,
+ defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
+ WriteFShuffle256Ld, _.info512, _.info128>,
avx512_broadcast_scalar<opc, OpcodeStr, _.info512, _.info128>,
- EVEX_V512;
+ EVEX_V512;
let Predicates = [HasVLX] in {
- defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, _.info256, _.info128>,
+ defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
+ WriteFShuffle256Ld, _.info256, _.info128>,
avx512_broadcast_scalar<opc, OpcodeStr, _.info256, _.info128>,
- EVEX_V256;
+ EVEX_V256;
}
}
multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo _> {
let Predicates = [HasAVX512] in
- defm Z : avx512_broadcast_rm<opc, OpcodeStr, _.info512, _.info128>,
+ defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
+ WriteFShuffle256Ld, _.info512, _.info128>,
avx512_broadcast_scalar<opc, OpcodeStr, _.info512, _.info128>,
EVEX_V512;
let Predicates = [HasVLX] in {
- defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, _.info256, _.info128>,
+ defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
+ WriteFShuffle256Ld, _.info256, _.info128>,
avx512_broadcast_scalar<opc, OpcodeStr, _.info256, _.info128>,
EVEX_V256;
- defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, _.info128, _.info128>,
+ defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
+ WriteFShuffle256Ld, _.info128, _.info128>,
avx512_broadcast_scalar<opc, OpcodeStr, _.info128, _.info128>,
EVEX_V128;
}
@@ -1253,17 +1262,18 @@ def : Pat<(int_x86_avx512_vbroadcast_ss_
def : Pat<(int_x86_avx512_vbroadcast_sd_512 addr:$src),
(VBROADCASTSDZm addr:$src)>;
-multiclass avx512_int_broadcast_reg<bits<8> opc, X86VectorVTInfo _,
- SDPatternOperator OpNode,
+multiclass avx512_int_broadcast_reg<bits<8> opc, SchedWrite SchedRR,
+ X86VectorVTInfo _, SDPatternOperator OpNode,
RegisterClass SrcRC> {
let ExeDomain = _.ExeDomain in
defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins SrcRC:$src),
"vpbroadcast"##_.Suffix, "$src", "$src",
- (_.VT (OpNode SrcRC:$src))>, T8PD, EVEX;
+ (_.VT (OpNode SrcRC:$src)), NoItinerary>, T8PD, EVEX,
+ Sched<[SchedRR]>;
}
-multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name,
+multiclass avx512_int_broadcastbw_reg<bits<8> opc, string Name, SchedWrite SchedRR,
X86VectorVTInfo _, SDPatternOperator OpNode,
RegisterClass SrcRC, SubRegIndex Subreg> {
let hasSideEffects = 0, ExeDomain = _.ExeDomain in
@@ -1272,7 +1282,7 @@ multiclass avx512_int_broadcastbw_reg<bi
!con((ins _.RC:$src0, _.KRCWM:$mask), (ins GR32:$src)),
!con((ins _.KRCWM:$mask), (ins GR32:$src)),
"vpbroadcast"##_.Suffix, "$src", "$src", [], [], [],
- "$src0 = $dst">, T8PD, EVEX;
+ "$src0 = $dst", NoItinerary>, T8PD, EVEX, Sched<[SchedRR]>;
def : Pat <(_.VT (OpNode SrcRC:$src)),
(!cast<Instruction>(Name#r)
@@ -1291,13 +1301,13 @@ multiclass avx512_int_broadcastbw_reg_vl
AVX512VLVectorVTInfo _, SDPatternOperator OpNode,
RegisterClass SrcRC, SubRegIndex Subreg, Predicate prd> {
let Predicates = [prd] in
- defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, _.info512, OpNode, SrcRC,
- Subreg>, EVEX_V512;
+ defm Z : avx512_int_broadcastbw_reg<opc, Name#Z, WriteShuffle256, _.info512,
+ OpNode, SrcRC, Subreg>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
- defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, _.info256, OpNode,
- SrcRC, Subreg>, EVEX_V256;
- defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, _.info128, OpNode,
- SrcRC, Subreg>, EVEX_V128;
+ defm Z256 : avx512_int_broadcastbw_reg<opc, Name#Z256, WriteShuffle256,
+ _.info256, OpNode, SrcRC, Subreg>, EVEX_V256;
+ defm Z128 : avx512_int_broadcastbw_reg<opc, Name#Z128, WriteShuffle,
+ _.info128, OpNode, SrcRC, Subreg>, EVEX_V128;
}
}
@@ -1305,10 +1315,13 @@ multiclass avx512_int_broadcast_reg_vl<b
SDPatternOperator OpNode,
RegisterClass SrcRC, Predicate prd> {
let Predicates = [prd] in
- defm Z : avx512_int_broadcast_reg<opc, _.info512, OpNode, SrcRC>, EVEX_V512;
+ defm Z : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info512, OpNode,
+ SrcRC>, EVEX_V512;
let Predicates = [prd, HasVLX] in {
- defm Z256 : avx512_int_broadcast_reg<opc, _.info256, OpNode, SrcRC>, EVEX_V256;
- defm Z128 : avx512_int_broadcast_reg<opc, _.info128, OpNode, SrcRC>, EVEX_V128;
+ defm Z256 : avx512_int_broadcast_reg<opc, WriteShuffle256, _.info256, OpNode,
+ SrcRC>, EVEX_V256;
+ defm Z128 : avx512_int_broadcast_reg<opc, WriteShuffle, _.info128, OpNode,
+ SrcRC>, EVEX_V128;
}
}
@@ -1339,17 +1352,20 @@ multiclass avx512_int_broadcast_rm_lower
multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo _, Predicate prd> {
let Predicates = [prd] in {
- defm Z : avx512_broadcast_rm<opc, OpcodeStr, _.info512, _.info128>,
+ defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
+ WriteShuffle256Ld, _.info512, _.info128>,
avx512_int_broadcast_rm_lowering<_.info512, _.info256>,
EVEX_V512;
// Defined separately to avoid redefinition.
defm Z_Alt : avx512_int_broadcast_rm_lowering<_.info512, _.info512>;
}
let Predicates = [prd, HasVLX] in {
- defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, _.info256, _.info128>,
+ defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
+ WriteShuffle256Ld, _.info256, _.info128>,
avx512_int_broadcast_rm_lowering<_.info256, _.info256>,
EVEX_V256;
- defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, _.info128, _.info128>,
+ defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle,
+ WriteShuffleLd, _.info128, _.info128>,
EVEX_V128;
}
}
@@ -1368,8 +1384,9 @@ multiclass avx512_subvec_broadcast_rm<bi
defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
(ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
(_Dst.VT (X86SubVBroadcast
- (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
- AVX5128IBase, EVEX;
+ (_Src.VT (bitconvert (_Src.LdFrag addr:$src))))),
+ NoItinerary>, AVX5128IBase, EVEX,
+ Sched<[WriteShuffleLd]>;
}
// This should be used for the AVX512DQ broadcast instructions. It disables
@@ -1382,8 +1399,9 @@ multiclass avx512_subvec_broadcast_rm_dq
(ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
(null_frag),
(_Dst.VT (X86SubVBroadcast
- (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
- AVX5128IBase, EVEX;
+ (_Src.VT (bitconvert (_Src.LdFrag addr:$src))))),
+ NoItinerary>, AVX5128IBase, EVEX,
+ Sched<[WriteShuffleLd]>;
}
let Predicates = [HasAVX512] in {
@@ -1538,11 +1556,13 @@ defm VBROADCASTF32X8 : avx512_subvec_bro
multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
let Predicates = [HasDQI] in
- defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, _Dst.info512,
+ defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
+ WriteShuffle256Ld, _Dst.info512,
_Src.info512, _Src.info128, null_frag>,
EVEX_V512;
let Predicates = [HasDQI, HasVLX] in
- defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, _Dst.info256,
+ defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
+ WriteShuffle256Ld, _Dst.info256,
_Src.info256, _Src.info128, null_frag>,
EVEX_V256;
}
@@ -1552,7 +1572,8 @@ multiclass avx512_common_broadcast_i32x2
avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
let Predicates = [HasDQI, HasVLX] in
- defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, _Dst.info128,
+ defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle,
+ WriteShuffleLd, _Dst.info128,
_Src.info128, _Src.info128, null_frag>,
EVEX_V128;
}
@@ -1586,7 +1607,8 @@ multiclass avx512_mask_broadcastm<bits<8
X86VectorVTInfo _, RegisterClass KRC> {
def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>, EVEX;
+ [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))],
+ IIC_SSE_PSHUF_RI>, EVEX, Sched<[WriteShuffle]>;
}
multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
Modified: llvm/trunk/test/CodeGen/X86/avx512-bugfix-23634.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-bugfix-23634.ll?rev=319900&r1=319899&r2=319900&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-bugfix-23634.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-bugfix-23634.ll Wed Dec 6 07:48:40 2017
@@ -16,8 +16,8 @@ define void @f_fu(float* %ret, float* %
; CHECK-NEXT: movw $-21846, %ax ## imm = 0xAAAA
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vmovdqa32 {{.*}}(%rip), %zmm1 {%k1}
-; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0
; CHECK-NEXT: vmovups %zmm0, (%rdi)
; CHECK-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=319900&r1=319899&r2=319900&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Wed Dec 6 07:48:40 2017
@@ -2831,7 +2831,7 @@ define <16 x float> @ubto16f32(<16 x i32
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [5:1.00]
; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -2853,10 +2853,10 @@ define <16 x double> @ubto16f64(<16 x i3
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 # sched: [3:1.00]
; GENERIC-NEXT: movl {{.*}}(%rip), %eax # sched: [5:0.50]
-; GENERIC-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z}
+; GENERIC-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z} # sched: [1:1.00]
; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
; GENERIC-NEXT: kshiftrw $8, %k1, %k1
-; GENERIC-NEXT: vpbroadcastd %eax, %ymm1 {%k1} {z}
+; GENERIC-NEXT: vpbroadcastd %eax, %ymm1 {%k1} {z} # sched: [1:1.00]
; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -2881,7 +2881,7 @@ define <8 x float> @ubto8f32(<8 x i32> %
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
+; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [5:1.00]
; GENERIC-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -2902,7 +2902,7 @@ define <8 x double> @ubto8f64(<8 x i32>
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
+; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [5:1.00]
; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -2923,7 +2923,7 @@ define <4 x float> @ubto4f32(<4 x i32> %
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
+; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [5:1.00]
; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -2944,7 +2944,7 @@ define <4 x double> @ubto4f64(<4 x i32>
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
+; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [5:1.00]
; GENERIC-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -2966,7 +2966,7 @@ define <2 x float> @ubto2f32(<2 x i32> %
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.50]
; GENERIC-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
+; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [5:1.00]
; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -4299,7 +4299,7 @@ define <16 x i32> @zext_16i1_to_16xi32
; GENERIC-LABEL: zext_16i1_to_16xi32:
; GENERIC: # %bb.0:
; GENERIC-NEXT: kmovd %edi, %k1
-; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_16i1_to_16xi32:
@@ -4316,7 +4316,7 @@ define <8 x i64> @zext_8i1_to_8xi64(i8
; GENERIC-LABEL: zext_8i1_to_8xi64:
; GENERIC: # %bb.0:
; GENERIC-NEXT: kmovd %edi, %k1
-; GENERIC-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; GENERIC-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_8i1_to_8xi64:
@@ -4767,7 +4767,7 @@ define <4 x i32> @zext_4xi1_to_4x32(<4 x
; GENERIC-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33]
; GENERIC-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %xmm1, %xmm0, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
+; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_4xi1_to_4x32:
@@ -8279,7 +8279,7 @@ define i8 @test_v8i1_mul(i8 %x, i8 %y) {
define <16 x i32> @_inreg16xi32(i32 %a) {
; GENERIC-LABEL: _inreg16xi32:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpbroadcastd %edi, %zmm0
+; GENERIC-NEXT: vpbroadcastd %edi, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: _inreg16xi32:
@@ -8294,7 +8294,7 @@ define <16 x i32> @_inreg16xi32(i32 %a
define <8 x i64> @_inreg8xi64(i64 %a) {
; GENERIC-LABEL: _inreg8xi64:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpbroadcastq %rdi, %zmm0
+; GENERIC-NEXT: vpbroadcastq %rdi, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: _inreg8xi64:
@@ -8309,7 +8309,7 @@ define <8 x i64> @_inreg8xi64(i64 %a)
define <16 x float> @_ss16xfloat_v4(<4 x float> %a) {
; GENERIC-LABEL: _ss16xfloat_v4:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0
+; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: _ss16xfloat_v4:
@@ -8323,7 +8323,7 @@ define <16 x float> @_ss16xfloat_v4(<4
define <16 x float> @_inreg16xfloat(float %a) {
; GENERIC-LABEL: _inreg16xfloat:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0
+; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: _inreg16xfloat:
@@ -8340,7 +8340,7 @@ define <16 x float> @_ss16xfloat_mask(
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpneqd %zmm3, %zmm2, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vbroadcastss %xmm0, %zmm1 {%k1}
+; GENERIC-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} # sched: [1:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -8363,7 +8363,7 @@ define <16 x float> @_ss16xfloat_maskz
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z}
+; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: _ss16xfloat_maskz:
@@ -8382,7 +8382,7 @@ define <16 x float> @_ss16xfloat_maskz
define <16 x float> @_ss16xfloat_load(float* %a.ptr) {
; GENERIC-LABEL: _ss16xfloat_load:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0
+; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: _ss16xfloat_load:
@@ -8400,7 +8400,7 @@ define <16 x float> @_ss16xfloat_mask_
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpneqd %zmm2, %zmm1, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1}
+; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: _ss16xfloat_mask_load:
@@ -8422,7 +8422,7 @@ define <16 x float> @_ss16xfloat_maskz
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpneqd %zmm1, %zmm0, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z}
+; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: _ss16xfloat_maskz_load:
@@ -8442,7 +8442,7 @@ define <16 x float> @_ss16xfloat_maskz
define <8 x double> @_inreg8xdouble(double %a) {
; GENERIC-LABEL: _inreg8xdouble:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0
+; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: _inreg8xdouble:
@@ -8459,7 +8459,7 @@ define <8 x double> @_sd8xdouble_mask(
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpneqd %ymm3, %ymm2, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1}
+; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} # sched: [1:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -8482,7 +8482,7 @@ define <8 x double> @_sd8xdouble_maskz
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpneqd %ymm2, %ymm1, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z}
+; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: _sd8xdouble_maskz:
@@ -8501,7 +8501,7 @@ define <8 x double> @_sd8xdouble_maskz
define <8 x double> @_sd8xdouble_load(double* %a.ptr) {
; GENERIC-LABEL: _sd8xdouble_load:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0
+; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: _sd8xdouble_load:
@@ -8519,7 +8519,7 @@ define <8 x double> @_sd8xdouble_mask_
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpneqd %ymm2, %ymm1, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1}
+; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: _sd8xdouble_mask_load:
@@ -8541,7 +8541,7 @@ define <8 x double> @_sd8xdouble_maskz
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpneqd %ymm1, %ymm0, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z}
+; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: _sd8xdouble_maskz_load:
@@ -8561,7 +8561,7 @@ define <8 x double> @_sd8xdouble_maskz
define <16 x i32> @_xmm16xi32(<16 x i32> %a) {
; GENERIC-LABEL: _xmm16xi32:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0
+; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: _xmm16xi32:
@@ -8575,7 +8575,7 @@ define <16 x i32> @_xmm16xi32(<16 x i3
define <16 x float> @_xmm16xfloat(<16 x float> %a) {
; GENERIC-LABEL: _xmm16xfloat:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0
+; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: _xmm16xfloat:
@@ -8617,7 +8617,7 @@ entry:
define <8 x double> @test_set1_pd(double %d) #2 {
; GENERIC-LABEL: test_set1_pd:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0
+; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_set1_pd:
@@ -8639,7 +8639,7 @@ entry:
define <8 x i64> @test_set1_epi64(i64 %d) #2 {
; GENERIC-LABEL: test_set1_epi64:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vpbroadcastq %rdi, %zmm0
+; GENERIC-NEXT: vpbroadcastq %rdi, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_set1_epi64:
@@ -8661,7 +8661,7 @@ entry:
define <16 x float> @test_set1_ps(float %f) #2 {
; GENERIC-LABEL: test_set1_ps:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0
+; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_set1_ps:
@@ -8691,7 +8691,7 @@ entry:
define <16 x i32> @test_set1_epi32(i32 %f) #2 {
; GENERIC-LABEL: test_set1_epi32:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vpbroadcastd %edi, %zmm0
+; GENERIC-NEXT: vpbroadcastd %edi, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_set1_epi32:
@@ -8723,7 +8723,7 @@ entry:
define <8 x double> @test_mm512_broadcastsd_pd(<2 x double> %a) {
; GENERIC-LABEL: test_mm512_broadcastsd_pd:
; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0
+; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_mm512_broadcastsd_pd:
@@ -8746,7 +8746,7 @@ entry:
define <16 x float> @suff_test1(<8 x float>%a) {
; GENERIC-LABEL: suff_test1:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0
+; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: suff_test1:
@@ -8760,7 +8760,7 @@ define <16 x float> @suff_test1(<8 x flo
define <8 x double> @suff_test2(<4 x double>%a) {
; GENERIC-LABEL: suff_test2:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0
+; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: suff_test2:
@@ -8774,7 +8774,7 @@ define <8 x double> @suff_test2(<4 x dou
define <64 x i8> @_invec32xi8(<32 x i8>%a) {
; GENERIC-LABEL: _invec32xi8:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpbroadcastb %xmm0, %zmm0
+; GENERIC-NEXT: vpbroadcastb %xmm0, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: _invec32xi8:
@@ -8788,7 +8788,7 @@ define <64 x i8> @_invec32xi8(<32 x i8>%
define <32 x i16> @_invec16xi16(<16 x i16>%a) {
; GENERIC-LABEL: _invec16xi16:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpbroadcastw %xmm0, %zmm0
+; GENERIC-NEXT: vpbroadcastw %xmm0, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: _invec16xi16:
@@ -8802,7 +8802,7 @@ define <32 x i16> @_invec16xi16(<16 x i1
define <16 x i32> @_invec8xi32(<8 x i32>%a) {
; GENERIC-LABEL: _invec8xi32:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0
+; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: _invec8xi32:
@@ -8816,7 +8816,7 @@ define <16 x i32> @_invec8xi32(<8 x i32>
define <8 x i64> @_invec4xi64(<4 x i64>%a) {
; GENERIC-LABEL: _invec4xi64:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0
+; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: _invec4xi64:
@@ -8837,7 +8837,8 @@ define <16 x float> @broadcast_ss_spill(
; GENERIC-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill sched: [5:1.00]
; GENERIC-NEXT: # sched: [5:1.00]
; GENERIC-NEXT: callq func_f32
-; GENERIC-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload
+; GENERIC-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [5:1.00]
+; GENERIC-NEXT: # sched: [5:1.00]
; GENERIC-NEXT: addq $24, %rsp # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -8870,7 +8871,8 @@ define <8 x double> @broadcast_sd_spill(
; GENERIC-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill sched: [5:1.00]
; GENERIC-NEXT: # sched: [5:1.00]
; GENERIC-NEXT: callq func_f64
-; GENERIC-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload
+; GENERIC-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [5:1.00]
+; GENERIC-NEXT: # sched: [5:1.00]
; GENERIC-NEXT: addq $24, %rsp # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
More information about the llvm-commits
mailing list