[llvm] r324820 - [X86] Extend inputs with elements smaller than i32 to sint_to_fp/uint_to_fp before type legalization.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 10 09:58:59 PST 2018
Author: ctopper
Date: Sat Feb 10 09:58:58 2018
New Revision: 324820
URL: http://llvm.org/viewvc/llvm-project?rev=324820&view=rev
Log:
[X86] Extend inputs with elements smaller than i32 to sint_to_fp/uint_to_fp before type legalization.
This prevents extends of masks being introduced during lowering where it become difficult to combine them out.
There are a few oddities in here.
We sometimes concatenate two k-registers produced by two compares, sign_extend the combined pair, then extract two halves. This worked better previously because the sign_extend wasn't created until after the fp_to_sint was split which led to a split sign_extend being created.
We probably also need to custom type legalize (v2i32 (sext v2i1)) via widening.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-cvt.ll
llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
llvm/trunk/test/CodeGen/X86/sse-fsignum.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=324820&r1=324819&r2=324820&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Feb 10 09:58:58 2018
@@ -36836,11 +36836,11 @@ static SDValue combineUIntToFP(SDNode *N
SDValue Op0 = N->getOperand(0);
EVT VT = N->getValueType(0);
EVT InVT = Op0.getValueType();
- EVT InSVT = InVT.getScalarType();
+ // UINT_TO_FP(vXi1) -> SINT_TO_FP(SEXT(vXi1 to vXi32))
// UINT_TO_FP(vXi8) -> SINT_TO_FP(ZEXT(vXi8 to vXi32))
// UINT_TO_FP(vXi16) -> SINT_TO_FP(ZEXT(vXi16 to vXi32))
- if (InVT.isVector() && (InSVT == MVT::i8 || InSVT == MVT::i16)) {
+ if (InVT.isVector() && InVT.getScalarSizeInBits() < 32) {
SDLoc dl(N);
EVT DstVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
InVT.getVectorNumElements());
@@ -36870,14 +36870,11 @@ static SDValue combineSIntToFP(SDNode *N
SDValue Op0 = N->getOperand(0);
EVT VT = N->getValueType(0);
EVT InVT = Op0.getValueType();
- EVT InSVT = InVT.getScalarType();
// SINT_TO_FP(vXi1) -> SINT_TO_FP(SEXT(vXi1 to vXi32))
// SINT_TO_FP(vXi8) -> SINT_TO_FP(SEXT(vXi8 to vXi32))
// SINT_TO_FP(vXi16) -> SINT_TO_FP(SEXT(vXi16 to vXi32))
- if (InVT.isVector() &&
- (InSVT == MVT::i8 || InSVT == MVT::i16 ||
- (InSVT == MVT::i1 && !DAG.getTargetLoweringInfo().isTypeLegal(InVT)))) {
+ if (InVT.isVector() && InVT.getScalarSizeInBits() < 32) {
SDLoc dl(N);
EVT DstVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
InVT.getVectorNumElements());
Modified: llvm/trunk/test/CodeGen/X86/avx512-cvt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-cvt.ll?rev=324820&r1=324819&r2=324820&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-cvt.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-cvt.ll Sat Feb 10 09:58:58 2018
@@ -1527,45 +1527,48 @@ define <16 x double> @sbto16f64(<16 x do
; NOVLDQ-LABEL: sbto16f64:
; NOVLDQ: # %bb.0:
; NOVLDQ-NEXT: vxorpd %xmm2, %xmm2, %xmm2
+; NOVLDQ-NEXT: vcmpltpd %zmm0, %zmm2, %k0
; NOVLDQ-NEXT: vcmpltpd %zmm1, %zmm2, %k1
-; NOVLDQ-NEXT: vcmpltpd %zmm0, %zmm2, %k2
-; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
-; NOVLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0
+; NOVLDQ-NEXT: kunpckbw %k0, %k1, %k1
; NOVLDQ-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NOVLDQ-NEXT: vcvtdq2pd %ymm1, %zmm0
+; NOVLDQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; NOVLDQ-NEXT: vcvtdq2pd %ymm1, %zmm1
; NOVLDQ-NEXT: retq
;
; VLDQ-LABEL: sbto16f64:
; VLDQ: # %bb.0:
; VLDQ-NEXT: vxorpd %xmm2, %xmm2, %xmm2
-; VLDQ-NEXT: vcmpltpd %zmm1, %zmm2, %k0
-; VLDQ-NEXT: vcmpltpd %zmm0, %zmm2, %k1
-; VLDQ-NEXT: vpmovm2d %k1, %ymm0
-; VLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0
-; VLDQ-NEXT: vpmovm2d %k0, %ymm1
+; VLDQ-NEXT: vcmpltpd %zmm0, %zmm2, %k0
+; VLDQ-NEXT: vcmpltpd %zmm1, %zmm2, %k1
+; VLDQ-NEXT: kunpckbw %k0, %k1, %k0
+; VLDQ-NEXT: vpmovm2d %k0, %zmm1
+; VLDQ-NEXT: vcvtdq2pd %ymm1, %zmm0
+; VLDQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; VLDQ-NEXT: vcvtdq2pd %ymm1, %zmm1
; VLDQ-NEXT: retq
;
; VLNODQ-LABEL: sbto16f64:
; VLNODQ: # %bb.0:
; VLNODQ-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; VLNODQ-NEXT: vcmpltpd %zmm0, %zmm2, %k0
; VLNODQ-NEXT: vcmpltpd %zmm1, %zmm2, %k1
-; VLNODQ-NEXT: vcmpltpd %zmm0, %zmm2, %k2
-; VLNODQ-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
-; VLNODQ-NEXT: vmovdqa32 %ymm1, %ymm0 {%k2} {z}
-; VLNODQ-NEXT: vcvtdq2pd %ymm0, %zmm0
-; VLNODQ-NEXT: vmovdqa32 %ymm1, %ymm1 {%k1} {z}
+; VLNODQ-NEXT: kunpckbw %k0, %k1, %k1
+; VLNODQ-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; VLNODQ-NEXT: vcvtdq2pd %ymm1, %zmm0
+; VLNODQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; VLNODQ-NEXT: vcvtdq2pd %ymm1, %zmm1
; VLNODQ-NEXT: retq
;
; DQNOVL-LABEL: sbto16f64:
; DQNOVL: # %bb.0:
; DQNOVL-NEXT: vxorpd %xmm2, %xmm2, %xmm2
-; DQNOVL-NEXT: vcmpltpd %zmm1, %zmm2, %k0
-; DQNOVL-NEXT: vcmpltpd %zmm0, %zmm2, %k1
-; DQNOVL-NEXT: vpmovm2d %k1, %zmm0
-; DQNOVL-NEXT: vcvtdq2pd %ymm0, %zmm0
+; DQNOVL-NEXT: vcmpltpd %zmm0, %zmm2, %k0
+; DQNOVL-NEXT: vcmpltpd %zmm1, %zmm2, %k1
+; DQNOVL-NEXT: kunpckbw %k0, %k1, %k0
; DQNOVL-NEXT: vpmovm2d %k0, %zmm1
+; DQNOVL-NEXT: vcvtdq2pd %ymm1, %zmm0
+; DQNOVL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; DQNOVL-NEXT: vcvtdq2pd %ymm1, %zmm1
; DQNOVL-NEXT: retq
%cmpres = fcmp ogt <16 x double> %a, zeroinitializer
@@ -1612,96 +1615,65 @@ define <8 x double> @sbto8f64(<8 x doubl
}
define <8 x float> @sbto8f32(<8 x float> %a) {
-; NOVLDQ-LABEL: sbto8f32:
-; NOVLDQ: # %bb.0:
-; NOVLDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; NOVLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; NOVLDQ-NEXT: vcmpltps %zmm0, %zmm1, %k1
-; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NOVLDQ-NEXT: vcvtdq2ps %ymm0, %ymm0
-; NOVLDQ-NEXT: retq
+; NOVL-LABEL: sbto8f32:
+; NOVL: # %bb.0:
+; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; NOVL-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
+; NOVL-NEXT: vcvtdq2ps %ymm0, %ymm0
+; NOVL-NEXT: retq
;
; VLDQ-LABEL: sbto8f32:
; VLDQ: # %bb.0:
; VLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; VLDQ-NEXT: vcmpltps %ymm0, %ymm1, %k0
-; VLDQ-NEXT: vpmovm2d %k0, %ymm0
+; VLDQ-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; VLDQ-NEXT: vcvtdq2ps %ymm0, %ymm0
; VLDQ-NEXT: retq
;
; VLNODQ-LABEL: sbto8f32:
; VLNODQ: # %bb.0:
; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; VLNODQ-NEXT: vcmpltps %ymm0, %ymm1, %k1
-; VLNODQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; VLNODQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; VLNODQ-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
; VLNODQ-NEXT: vcvtdq2ps %ymm0, %ymm0
; VLNODQ-NEXT: retq
-;
-; DQNOVL-LABEL: sbto8f32:
-; DQNOVL: # %bb.0:
-; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; DQNOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; DQNOVL-NEXT: vcmpltps %zmm0, %zmm1, %k0
-; DQNOVL-NEXT: vpmovm2d %k0, %zmm0
-; DQNOVL-NEXT: vcvtdq2ps %ymm0, %ymm0
-; DQNOVL-NEXT: retq
%cmpres = fcmp ogt <8 x float> %a, zeroinitializer
%1 = sitofp <8 x i1> %cmpres to <8 x float>
ret <8 x float> %1
}
define <4 x float> @sbto4f32(<4 x float> %a) {
-; NOVLDQ-LABEL: sbto4f32:
-; NOVLDQ: # %bb.0:
-; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; NOVLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; NOVLDQ-NEXT: vcmpltps %zmm0, %zmm1, %k1
-; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NOVLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0
-; NOVLDQ-NEXT: vzeroupper
-; NOVLDQ-NEXT: retq
+; NOVL-LABEL: sbto4f32:
+; NOVL: # %bb.0:
+; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; NOVL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
+; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0
+; NOVL-NEXT: retq
;
; VLDQ-LABEL: sbto4f32:
; VLDQ: # %bb.0:
; VLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; VLDQ-NEXT: vcmpltps %xmm0, %xmm1, %k0
-; VLDQ-NEXT: vpmovm2d %k0, %xmm0
+; VLDQ-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
; VLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0
; VLDQ-NEXT: retq
;
; VLNODQ-LABEL: sbto4f32:
; VLNODQ: # %bb.0:
; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; VLNODQ-NEXT: vcmpltps %xmm0, %xmm1, %k1
-; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; VLNODQ-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
; VLNODQ-NEXT: vcvtdq2ps %xmm0, %xmm0
; VLNODQ-NEXT: retq
-;
-; DQNOVL-LABEL: sbto4f32:
-; DQNOVL: # %bb.0:
-; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; DQNOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; DQNOVL-NEXT: vcmpltps %zmm0, %zmm1, %k0
-; DQNOVL-NEXT: vpmovm2d %k0, %zmm0
-; DQNOVL-NEXT: vcvtdq2ps %xmm0, %xmm0
-; DQNOVL-NEXT: vzeroupper
-; DQNOVL-NEXT: retq
%cmpres = fcmp ogt <4 x float> %a, zeroinitializer
%1 = sitofp <4 x i1> %cmpres to <4 x float>
ret <4 x float> %1
}
define <4 x double> @sbto4f64(<4 x double> %a) {
-; NOVLDQ-LABEL: sbto4f64:
-; NOVLDQ: # %bb.0:
-; NOVLDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; NOVLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; NOVLDQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1
-; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NOVLDQ-NEXT: vcvtdq2pd %xmm0, %ymm0
-; NOVLDQ-NEXT: retq
+; NOVL-LABEL: sbto4f64:
+; NOVL: # %bb.0:
+; NOVL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; NOVL-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
+; NOVL-NEXT: vpmovqd %zmm0, %ymm0
+; NOVL-NEXT: vcvtdq2pd %xmm0, %ymm0
+; NOVL-NEXT: retq
;
; VLDQ-LABEL: sbto4f64:
; VLDQ: # %bb.0:
@@ -1719,36 +1691,25 @@ define <4 x double> @sbto4f64(<4 x doubl
; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
; VLNODQ-NEXT: vcvtdq2pd %xmm0, %ymm0
; VLNODQ-NEXT: retq
-;
-; DQNOVL-LABEL: sbto4f64:
-; DQNOVL: # %bb.0:
-; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; DQNOVL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; DQNOVL-NEXT: vcmpltpd %zmm0, %zmm1, %k0
-; DQNOVL-NEXT: vpmovm2d %k0, %zmm0
-; DQNOVL-NEXT: vcvtdq2pd %xmm0, %ymm0
-; DQNOVL-NEXT: retq
%cmpres = fcmp ogt <4 x double> %a, zeroinitializer
%1 = sitofp <4 x i1> %cmpres to <4 x double>
ret <4 x double> %1
}
define <2 x float> @sbto2f32(<2 x float> %a) {
-; NOVLDQ-LABEL: sbto2f32:
-; NOVLDQ: # %bb.0:
-; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; NOVLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; NOVLDQ-NEXT: vcmpltps %zmm0, %zmm1, %k1
-; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NOVLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0
-; NOVLDQ-NEXT: vzeroupper
-; NOVLDQ-NEXT: retq
+; NOVL-LABEL: sbto2f32:
+; NOVL: # %bb.0:
+; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; NOVL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
+; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0
+; NOVL-NEXT: retq
;
; VLDQ-LABEL: sbto2f32:
; VLDQ: # %bb.0:
; VLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
; VLDQ-NEXT: vcmpltps %xmm0, %xmm1, %k0
-; VLDQ-NEXT: vpmovm2d %k0, %xmm0
+; VLDQ-NEXT: vpmovm2q %k0, %xmm0
+; VLDQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; VLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0
; VLDQ-NEXT: retq
;
@@ -1757,61 +1718,39 @@ define <2 x float> @sbto2f32(<2 x float>
; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; VLNODQ-NEXT: vcmpltps %xmm0, %xmm1, %k1
; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; VLNODQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
+; VLNODQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; VLNODQ-NEXT: vcvtdq2ps %xmm0, %xmm0
; VLNODQ-NEXT: retq
-;
-; DQNOVL-LABEL: sbto2f32:
-; DQNOVL: # %bb.0:
-; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; DQNOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; DQNOVL-NEXT: vcmpltps %zmm0, %zmm1, %k0
-; DQNOVL-NEXT: vpmovm2d %k0, %zmm0
-; DQNOVL-NEXT: vcvtdq2ps %xmm0, %xmm0
-; DQNOVL-NEXT: vzeroupper
-; DQNOVL-NEXT: retq
%cmpres = fcmp ogt <2 x float> %a, zeroinitializer
%1 = sitofp <2 x i1> %cmpres to <2 x float>
ret <2 x float> %1
}
define <2 x double> @sbto2f64(<2 x double> %a) {
-; NOVLDQ-LABEL: sbto2f64:
-; NOVLDQ: # %bb.0:
-; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; NOVLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; NOVLDQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1
-; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NOVLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0
-; NOVLDQ-NEXT: vzeroupper
-; NOVLDQ-NEXT: retq
+; NOVL-LABEL: sbto2f64:
+; NOVL: # %bb.0:
+; NOVL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; NOVL-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
+; NOVL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; NOVL-NEXT: vcvtdq2pd %xmm0, %xmm0
+; NOVL-NEXT: retq
;
; VLDQ-LABEL: sbto2f64:
; VLDQ: # %bb.0:
; VLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; VLDQ-NEXT: vcmpltpd %xmm0, %xmm1, %k0
-; VLDQ-NEXT: vpmovm2d %k0, %xmm0
+; VLDQ-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
+; VLDQ-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0
; VLDQ-NEXT: retq
;
; VLNODQ-LABEL: sbto2f64:
; VLNODQ: # %bb.0:
; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; VLNODQ-NEXT: vcmpltpd %xmm0, %xmm1, %k1
-; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; VLNODQ-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
+; VLNODQ-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
; VLNODQ-NEXT: vcvtdq2pd %xmm0, %xmm0
; VLNODQ-NEXT: retq
-;
-; DQNOVL-LABEL: sbto2f64:
-; DQNOVL: # %bb.0:
-; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; DQNOVL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; DQNOVL-NEXT: vcmpltpd %zmm0, %zmm1, %k0
-; DQNOVL-NEXT: vpmovm2d %k0, %zmm0
-; DQNOVL-NEXT: vcvtdq2pd %xmm0, %xmm0
-; DQNOVL-NEXT: vzeroupper
-; DQNOVL-NEXT: retq
%cmpres = fcmp ogt <2 x double> %a, zeroinitializer
%1 = sitofp <2 x i1> %cmpres to <2 x double>
ret <2 x double> %1
@@ -1976,54 +1915,34 @@ define <16 x float> @ubto16f32(<16 x i32
}
define <16 x double> @ubto16f64(<16 x i32> %a) {
-; NOVLDQ-LABEL: ubto16f64:
-; NOVLDQ: # %bb.0:
-; NOVLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; NOVLDQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
-; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NOVLDQ-NEXT: vpsrld $31, %ymm0, %ymm0
-; NOVLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0
-; NOVLDQ-NEXT: kshiftrw $8, %k1, %k1
-; NOVLDQ-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NOVLDQ-NEXT: vpsrld $31, %ymm1, %ymm1
-; NOVLDQ-NEXT: vcvtdq2pd %ymm1, %zmm1
-; NOVLDQ-NEXT: retq
+; NODQ-LABEL: ubto16f64:
+; NODQ: # %bb.0:
+; NODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; NODQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
+; NODQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NODQ-NEXT: vpsrld $31, %zmm0, %zmm1
+; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm0
+; NODQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1
+; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm1
+; NODQ-NEXT: retq
;
; VLDQ-LABEL: ubto16f64:
; VLDQ: # %bb.0:
; VLDQ-NEXT: vpmovd2m %zmm0, %k0
-; VLDQ-NEXT: vpmovm2d %k0, %ymm0
-; VLDQ-NEXT: vpsrld $31, %ymm0, %ymm0
-; VLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0
-; VLDQ-NEXT: kshiftrw $8, %k0, %k0
-; VLDQ-NEXT: vpmovm2d %k0, %ymm1
-; VLDQ-NEXT: vpsrld $31, %ymm1, %ymm1
+; VLDQ-NEXT: vpmovm2d %k0, %zmm0
+; VLDQ-NEXT: vpsrld $31, %zmm0, %zmm1
+; VLDQ-NEXT: vcvtdq2pd %ymm1, %zmm0
+; VLDQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; VLDQ-NEXT: vcvtdq2pd %ymm1, %zmm1
; VLDQ-NEXT: retq
;
-; VLNODQ-LABEL: ubto16f64:
-; VLNODQ: # %bb.0:
-; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; VLNODQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
-; VLNODQ-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
-; VLNODQ-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1} {z}
-; VLNODQ-NEXT: vpsrld $31, %ymm0, %ymm0
-; VLNODQ-NEXT: vcvtdq2pd %ymm0, %zmm0
-; VLNODQ-NEXT: kshiftrw $8, %k1, %k1
-; VLNODQ-NEXT: vmovdqa32 %ymm1, %ymm1 {%k1} {z}
-; VLNODQ-NEXT: vpsrld $31, %ymm1, %ymm1
-; VLNODQ-NEXT: vcvtdq2pd %ymm1, %zmm1
-; VLNODQ-NEXT: retq
-;
; DQNOVL-LABEL: ubto16f64:
; DQNOVL: # %bb.0:
; DQNOVL-NEXT: vpmovd2m %zmm0, %k0
; DQNOVL-NEXT: vpmovm2d %k0, %zmm0
-; DQNOVL-NEXT: vpsrld $31, %ymm0, %ymm0
-; DQNOVL-NEXT: vcvtdq2pd %ymm0, %zmm0
-; DQNOVL-NEXT: kshiftrw $8, %k0, %k0
-; DQNOVL-NEXT: vpmovm2d %k0, %zmm1
-; DQNOVL-NEXT: vpsrld $31, %ymm1, %ymm1
+; DQNOVL-NEXT: vpsrld $31, %zmm0, %zmm1
+; DQNOVL-NEXT: vcvtdq2pd %ymm1, %zmm0
+; DQNOVL-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; DQNOVL-NEXT: vcvtdq2pd %ymm1, %zmm1
; DQNOVL-NEXT: retq
%mask = icmp slt <16 x i32> %a, zeroinitializer
@@ -2032,268 +1951,95 @@ define <16 x double> @ubto16f64(<16 x i3
}
define <8 x float> @ubto8f32(<8 x i32> %a) {
-; NOVLDQ-LABEL: ubto8f32:
-; NOVLDQ: # %bb.0:
-; NOVLDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; NOVLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; NOVLDQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
-; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NOVLDQ-NEXT: vpsrld $31, %ymm0, %ymm0
-; NOVLDQ-NEXT: vcvtdq2ps %ymm0, %ymm0
-; NOVLDQ-NEXT: retq
-;
-; VLDQ-LABEL: ubto8f32:
-; VLDQ: # %bb.0:
-; VLDQ-NEXT: vpmovd2m %ymm0, %k0
-; VLDQ-NEXT: vpmovm2d %k0, %ymm0
-; VLDQ-NEXT: vpsrld $31, %ymm0, %ymm0
-; VLDQ-NEXT: vcvtdq2ps %ymm0, %ymm0
-; VLDQ-NEXT: retq
-;
-; VLNODQ-LABEL: ubto8f32:
-; VLNODQ: # %bb.0:
-; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; VLNODQ-NEXT: vpcmpgtd %ymm0, %ymm1, %k1
-; VLNODQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; VLNODQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
-; VLNODQ-NEXT: vpsrld $31, %ymm0, %ymm0
-; VLNODQ-NEXT: vcvtdq2ps %ymm0, %ymm0
-; VLNODQ-NEXT: retq
-;
-; DQNOVL-LABEL: ubto8f32:
-; DQNOVL: # %bb.0:
-; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; DQNOVL-NEXT: vpmovd2m %zmm0, %k0
-; DQNOVL-NEXT: vpmovm2d %k0, %zmm0
-; DQNOVL-NEXT: vpsrld $31, %ymm0, %ymm0
-; DQNOVL-NEXT: vcvtdq2ps %ymm0, %ymm0
-; DQNOVL-NEXT: retq
+; NOVL-LABEL: ubto8f32:
+; NOVL: # %bb.0:
+; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; NOVL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0
+; NOVL-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1065353216,1065353216,1065353216,1065353216,1065353216,1065353216,1065353216,1065353216]
+; NOVL-NEXT: vpand %ymm1, %ymm0, %ymm0
+; NOVL-NEXT: retq
+;
+; VL-LABEL: ubto8f32:
+; VL: # %bb.0:
+; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; VL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0
+; VL-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0
+; VL-NEXT: retq
%mask = icmp slt <8 x i32> %a, zeroinitializer
%1 = uitofp <8 x i1> %mask to <8 x float>
ret <8 x float> %1
}
define <8 x double> @ubto8f64(<8 x i32> %a) {
-; NOVLDQ-LABEL: ubto8f64:
-; NOVLDQ: # %bb.0:
-; NOVLDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; NOVLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; NOVLDQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
-; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NOVLDQ-NEXT: vpsrld $31, %ymm0, %ymm0
-; NOVLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0
-; NOVLDQ-NEXT: retq
-;
-; VLDQ-LABEL: ubto8f64:
-; VLDQ: # %bb.0:
-; VLDQ-NEXT: vpmovd2m %ymm0, %k0
-; VLDQ-NEXT: vpmovm2d %k0, %ymm0
-; VLDQ-NEXT: vpsrld $31, %ymm0, %ymm0
-; VLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0
-; VLDQ-NEXT: retq
-;
-; VLNODQ-LABEL: ubto8f64:
-; VLNODQ: # %bb.0:
-; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; VLNODQ-NEXT: vpcmpgtd %ymm0, %ymm1, %k1
-; VLNODQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; VLNODQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
-; VLNODQ-NEXT: vpsrld $31, %ymm0, %ymm0
-; VLNODQ-NEXT: vcvtdq2pd %ymm0, %zmm0
-; VLNODQ-NEXT: retq
-;
-; DQNOVL-LABEL: ubto8f64:
-; DQNOVL: # %bb.0:
-; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; DQNOVL-NEXT: vpmovd2m %zmm0, %k0
-; DQNOVL-NEXT: vpmovm2d %k0, %zmm0
-; DQNOVL-NEXT: vpsrld $31, %ymm0, %ymm0
-; DQNOVL-NEXT: vcvtdq2pd %ymm0, %zmm0
-; DQNOVL-NEXT: retq
+; ALL-LABEL: ubto8f64:
+; ALL: # %bb.0:
+; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; ALL-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0
+; ALL-NEXT: vpsrld $31, %ymm0, %ymm0
+; ALL-NEXT: vcvtdq2pd %ymm0, %zmm0
+; ALL-NEXT: retq
%mask = icmp slt <8 x i32> %a, zeroinitializer
%1 = uitofp <8 x i1> %mask to <8 x double>
ret <8 x double> %1
}
define <4 x float> @ubto4f32(<4 x i32> %a) {
-; NOVLDQ-LABEL: ubto4f32:
-; NOVLDQ: # %bb.0:
-; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; NOVLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; NOVLDQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
-; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NOVLDQ-NEXT: vpsrld $31, %xmm0, %xmm0
-; NOVLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0
-; NOVLDQ-NEXT: vzeroupper
-; NOVLDQ-NEXT: retq
-;
-; VLDQ-LABEL: ubto4f32:
-; VLDQ: # %bb.0:
-; VLDQ-NEXT: vpmovd2m %xmm0, %k0
-; VLDQ-NEXT: vpmovm2d %k0, %xmm0
-; VLDQ-NEXT: vpsrld $31, %xmm0, %xmm0
-; VLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0
-; VLDQ-NEXT: retq
-;
-; VLNODQ-LABEL: ubto4f32:
-; VLNODQ: # %bb.0:
-; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; VLNODQ-NEXT: vpcmpgtd %xmm0, %xmm1, %k1
-; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; VLNODQ-NEXT: vpsrld $31, %xmm0, %xmm0
-; VLNODQ-NEXT: vcvtdq2ps %xmm0, %xmm0
-; VLNODQ-NEXT: retq
-;
-; DQNOVL-LABEL: ubto4f32:
-; DQNOVL: # %bb.0:
-; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; DQNOVL-NEXT: vpmovd2m %zmm0, %k0
-; DQNOVL-NEXT: vpmovm2d %k0, %zmm0
-; DQNOVL-NEXT: vpsrld $31, %xmm0, %xmm0
-; DQNOVL-NEXT: vcvtdq2ps %xmm0, %xmm0
-; DQNOVL-NEXT: vzeroupper
-; DQNOVL-NEXT: retq
+; NOVL-LABEL: ubto4f32:
+; NOVL: # %bb.0:
+; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; NOVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; NOVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1065353216,1065353216,1065353216,1065353216]
+; NOVL-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NOVL-NEXT: retq
+;
+; VL-LABEL: ubto4f32:
+; VL: # %bb.0:
+; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; VL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0
+; VL-NEXT: retq
%mask = icmp slt <4 x i32> %a, zeroinitializer
%1 = uitofp <4 x i1> %mask to <4 x float>
ret <4 x float> %1
}
define <4 x double> @ubto4f64(<4 x i32> %a) {
-; NOVLDQ-LABEL: ubto4f64:
-; NOVLDQ: # %bb.0:
-; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; NOVLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; NOVLDQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
-; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NOVLDQ-NEXT: vpsrld $31, %xmm0, %xmm0
-; NOVLDQ-NEXT: vcvtdq2pd %xmm0, %ymm0
-; NOVLDQ-NEXT: retq
-;
-; VLDQ-LABEL: ubto4f64:
-; VLDQ: # %bb.0:
-; VLDQ-NEXT: vpmovd2m %xmm0, %k0
-; VLDQ-NEXT: vpmovm2d %k0, %xmm0
-; VLDQ-NEXT: vpsrld $31, %xmm0, %xmm0
-; VLDQ-NEXT: vcvtdq2pd %xmm0, %ymm0
-; VLDQ-NEXT: retq
-;
-; VLNODQ-LABEL: ubto4f64:
-; VLNODQ: # %bb.0:
-; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; VLNODQ-NEXT: vpcmpgtd %xmm0, %xmm1, %k1
-; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; VLNODQ-NEXT: vpsrld $31, %xmm0, %xmm0
-; VLNODQ-NEXT: vcvtdq2pd %xmm0, %ymm0
-; VLNODQ-NEXT: retq
-;
-; DQNOVL-LABEL: ubto4f64:
-; DQNOVL: # %bb.0:
-; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; DQNOVL-NEXT: vpmovd2m %zmm0, %k0
-; DQNOVL-NEXT: vpmovm2d %k0, %zmm0
-; DQNOVL-NEXT: vpsrld $31, %xmm0, %xmm0
-; DQNOVL-NEXT: vcvtdq2pd %xmm0, %ymm0
-; DQNOVL-NEXT: retq
+; ALL-LABEL: ubto4f64:
+; ALL: # %bb.0:
+; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; ALL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; ALL-NEXT: vpsrld $31, %xmm0, %xmm0
+; ALL-NEXT: vcvtdq2pd %xmm0, %ymm0
+; ALL-NEXT: retq
%mask = icmp slt <4 x i32> %a, zeroinitializer
%1 = uitofp <4 x i1> %mask to <4 x double>
ret <4 x double> %1
}
define <2 x float> @ubto2f32(<2 x i32> %a) {
-; NOVLDQ-LABEL: ubto2f32:
-; NOVLDQ: # %bb.0:
-; NOVLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; NOVLDQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; NOVLDQ-NEXT: vptestmq %zmm0, %zmm0, %k1
-; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NOVLDQ-NEXT: vpsrld $31, %xmm0, %xmm0
-; NOVLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0
-; NOVLDQ-NEXT: vzeroupper
-; NOVLDQ-NEXT: retq
-;
-; VLDQ-LABEL: ubto2f32:
-; VLDQ: # %bb.0:
-; VLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; VLDQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; VLDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
-; VLDQ-NEXT: vpmovm2d %k0, %xmm0
-; VLDQ-NEXT: vpsrld $31, %xmm0, %xmm0
-; VLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0
-; VLDQ-NEXT: retq
-;
-; VLNODQ-LABEL: ubto2f32:
-; VLNODQ: # %bb.0:
-; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; VLNODQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; VLNODQ-NEXT: vptestmq %xmm0, %xmm0, %k1
-; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; VLNODQ-NEXT: vpsrld $31, %xmm0, %xmm0
-; VLNODQ-NEXT: vcvtdq2ps %xmm0, %xmm0
-; VLNODQ-NEXT: retq
-;
-; DQNOVL-LABEL: ubto2f32:
-; DQNOVL: # %bb.0:
-; DQNOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; DQNOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; DQNOVL-NEXT: vptestmq %zmm0, %zmm0, %k0
-; DQNOVL-NEXT: vpmovm2d %k0, %zmm0
-; DQNOVL-NEXT: vpsrld $31, %xmm0, %xmm0
-; DQNOVL-NEXT: vcvtdq2ps %xmm0, %xmm0
-; DQNOVL-NEXT: vzeroupper
-; DQNOVL-NEXT: retq
+; ALL-LABEL: ubto2f32:
+; ALL: # %bb.0:
+; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; ALL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; ALL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
+; ALL-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0
+; ALL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; ALL-NEXT: retq
%mask = icmp ne <2 x i32> %a, zeroinitializer
%1 = uitofp <2 x i1> %mask to <2 x float>
ret <2 x float> %1
}
define <2 x double> @ubto2f64(<2 x i32> %a) {
-; NOVLDQ-LABEL: ubto2f64:
-; NOVLDQ: # %bb.0:
-; NOVLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; NOVLDQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; NOVLDQ-NEXT: vptestmq %zmm0, %zmm0, %k1
-; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NOVLDQ-NEXT: vpsrld $31, %xmm0, %xmm0
-; NOVLDQ-NEXT: vcvtudq2pd %ymm0, %zmm0
-; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
-; NOVLDQ-NEXT: vzeroupper
-; NOVLDQ-NEXT: retq
-;
-; VLDQ-LABEL: ubto2f64:
-; VLDQ: # %bb.0:
-; VLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; VLDQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; VLDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
-; VLDQ-NEXT: vpmovm2d %k0, %xmm0
-; VLDQ-NEXT: vpsrld $31, %xmm0, %xmm0
-; VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0
-; VLDQ-NEXT: retq
-;
-; VLNODQ-LABEL: ubto2f64:
-; VLNODQ: # %bb.0:
-; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; VLNODQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; VLNODQ-NEXT: vptestmq %xmm0, %xmm0, %k1
-; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
-; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; VLNODQ-NEXT: vpsrld $31, %xmm0, %xmm0
-; VLNODQ-NEXT: vcvtudq2pd %xmm0, %xmm0
-; VLNODQ-NEXT: retq
-;
-; DQNOVL-LABEL: ubto2f64:
-; DQNOVL: # %bb.0:
-; DQNOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; DQNOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; DQNOVL-NEXT: vptestmq %zmm0, %zmm0, %k0
-; DQNOVL-NEXT: vpmovm2d %k0, %zmm0
-; DQNOVL-NEXT: vpsrld $31, %xmm0, %xmm0
-; DQNOVL-NEXT: vcvtudq2pd %ymm0, %zmm0
-; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
-; DQNOVL-NEXT: vzeroupper
-; DQNOVL-NEXT: retq
+; ALL-LABEL: ubto2f64:
+; ALL: # %bb.0:
+; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; ALL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; ALL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
+; ALL-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0
+; ALL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; ALL-NEXT: vcvtdq2pd %xmm0, %xmm0
+; ALL-NEXT: retq
%mask = icmp ne <2 x i32> %a, zeroinitializer
%1 = uitofp <2 x i1> %mask to <2 x double>
ret <2 x double> %1
Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=324820&r1=324819&r2=324820&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Sat Feb 10 09:58:58 2018
@@ -2427,22 +2427,24 @@ define <16 x double> @sbto16f64(<16 x do
; GENERIC-LABEL: sbto16f64:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:1.00]
-; GENERIC-NEXT: vcmpltpd %zmm1, %zmm2, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: vcmpltpd %zmm0, %zmm2, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vpmovm2d %k1, %ymm0 # sched: [1:0.33]
-; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
-; GENERIC-NEXT: vpmovm2d %k0, %ymm1 # sched: [1:0.33]
+; GENERIC-NEXT: vcmpltpd %zmm0, %zmm2, %k0 # sched: [3:1.00]
+; GENERIC-NEXT: vcmpltpd %zmm1, %zmm2, %k1 # sched: [3:1.00]
+; GENERIC-NEXT: kunpckbw %k0, %k1, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: vpmovm2d %k0, %zmm1 # sched: [1:0.33]
+; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00]
+; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00]
; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sbto16f64:
; SKX: # %bb.0:
; SKX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; SKX-NEXT: vcmpltpd %zmm1, %zmm2, %k0 # sched: [3:1.00]
-; SKX-NEXT: vcmpltpd %zmm0, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2d %k1, %ymm0 # sched: [1:0.25]
-; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
-; SKX-NEXT: vpmovm2d %k0, %ymm1 # sched: [1:0.25]
+; SKX-NEXT: vcmpltpd %zmm0, %zmm2, %k0 # sched: [3:1.00]
+; SKX-NEXT: vcmpltpd %zmm1, %zmm2, %k1 # sched: [3:1.00]
+; SKX-NEXT: kunpckbw %k0, %k1, %k0 # sched: [3:1.00]
+; SKX-NEXT: vpmovm2d %k0, %zmm1 # sched: [1:0.25]
+; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00]
+; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00]
; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%cmpres = fcmp ogt <16 x double> %a, zeroinitializer
@@ -2475,16 +2477,14 @@ define <8 x float> @sbto8f32(<8 x float>
; GENERIC-LABEL: sbto8f32:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vcmpltps %ymm0, %ymm1, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.33]
+; GENERIC-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sbto8f32:
; SKX: # %bb.0:
; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; SKX-NEXT: vcmpltps %ymm0, %ymm1, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25]
+; SKX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 # sched: [4:0.33]
; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%cmpres = fcmp ogt <8 x float> %a, zeroinitializer
@@ -2496,16 +2496,14 @@ define <4 x float> @sbto4f32(<4 x float>
; GENERIC-LABEL: sbto4f32:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vcmpltps %xmm0, %xmm1, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
+; GENERIC-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sbto4f32:
; SKX: # %bb.0:
; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; SKX-NEXT: vcmpltps %xmm0, %xmm1, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%cmpres = fcmp ogt <4 x float> %a, zeroinitializer
@@ -2539,7 +2537,8 @@ define <2 x float> @sbto2f32(<2 x float>
; GENERIC: # %bb.0:
; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
; GENERIC-NEXT: vcmpltps %xmm0, %xmm1, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
+; GENERIC-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.33]
+; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:0.50]
; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -2547,7 +2546,8 @@ define <2 x float> @sbto2f32(<2 x float>
; SKX: # %bb.0:
; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; SKX-NEXT: vcmpltps %xmm0, %xmm1, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00]
; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%cmpres = fcmp ogt <2 x float> %a, zeroinitializer
@@ -2559,16 +2559,16 @@ define <2 x double> @sbto2f64(<2 x doubl
; GENERIC-LABEL: sbto2f64:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vcmpltpd %xmm0, %xmm1, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
+; GENERIC-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00]
; GENERIC-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: sbto2f64:
; SKX: # %bb.0:
; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00]
; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%cmpres = fcmp ogt <2 x double> %a, zeroinitializer
@@ -2809,24 +2809,20 @@ define <16 x double> @ubto16f64(<16 x i3
; GENERIC-LABEL: ubto16f64:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.33]
-; GENERIC-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
-; GENERIC-NEXT: kshiftrw $8, %k0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovm2d %k0, %ymm1 # sched: [1:0.33]
-; GENERIC-NEXT: vpsrld $31, %ymm1, %ymm1 # sched: [1:1.00]
+; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33]
+; GENERIC-NEXT: vpsrld $31, %zmm0, %zmm1 # sched: [3:1.00]
+; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00]
+; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00]
; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: ubto16f64:
; SKX: # %bb.0:
; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25]
-; SKX-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
-; SKX-NEXT: kshiftrw $8, %k0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2d %k0, %ymm1 # sched: [1:0.25]
-; SKX-NEXT: vpsrld $31, %ymm1, %ymm1 # sched: [1:0.50]
+; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25]
+; SKX-NEXT: vpsrld $31, %zmm0, %zmm1 # sched: [1:0.50]
+; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00]
+; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00]
; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp slt <16 x i32> %a, zeroinitializer
@@ -2837,18 +2833,16 @@ define <16 x double> @ubto16f64(<16 x i3
define <8 x float> @ubto8f32(<8 x i32> %a) {
; GENERIC-LABEL: ubto8f32:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovd2m %ymm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.33]
-; GENERIC-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
+; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: ubto8f32:
; SKX: # %bb.0:
-; SKX-NEXT: vpmovd2m %ymm0, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25]
-; SKX-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.33]
+; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
+; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
+; SKX-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp slt <8 x i32> %a, zeroinitializer
%1 = uitofp <8 x i1> %mask to <8 x float>
@@ -2858,16 +2852,16 @@ define <8 x float> @ubto8f32(<8 x i32> %
define <8 x double> @ubto8f64(<8 x i32> %a) {
; GENERIC-LABEL: ubto8f64:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovd2m %ymm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.33]
+; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
+; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: ubto8f64:
; SKX: # %bb.0:
-; SKX-NEXT: vpmovd2m %ymm0, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25]
+; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
+; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
; SKX-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:0.50]
; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
@@ -2879,18 +2873,16 @@ define <8 x double> @ubto8f64(<8 x i32>
define <4 x float> @ubto4f32(<4 x i32> %a) {
; GENERIC-LABEL: ubto4f32:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
+; GENERIC-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: ubto4f32:
; SKX: # %bb.0:
-; SKX-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
-; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
+; SKX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp slt <4 x i32> %a, zeroinitializer
%1 = uitofp <4 x i1> %mask to <4 x float>
@@ -2900,16 +2892,16 @@ define <4 x float> @ubto4f32(<4 x i32> %
define <4 x double> @ubto4f64(<4 x i32> %a) {
; GENERIC-LABEL: ubto4f64:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
+; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
+; GENERIC-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: ubto4f64:
; SKX: # %bb.0:
-; SKX-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
+; SKX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
@@ -2923,20 +2915,18 @@ define <2 x float> @ubto2f32(<2 x i32> %
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.50]
-; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: ubto2f32:
; SKX: # %bb.0:
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.33]
-; SKX-NEXT: vptestmq %xmm0, %xmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
-; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50]
+; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp ne <2 x i32> %a, zeroinitializer
%1 = uitofp <2 x i1> %mask to <2 x float>
@@ -2948,20 +2938,20 @@ define <2 x double> @ubto2f64(<2 x i32>
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.50]
-; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vcvtudq2pd %xmm0, %xmm0 # sched: [4:1.00]
+; GENERIC-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; GENERIC-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50]
+; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:0.50]
+; GENERIC-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: ubto2f64:
; SKX: # %bb.0:
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.33]
-; SKX-NEXT: vptestmq %xmm0, %xmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
-; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vcvtudq2pd %xmm0, %xmm0 # sched: [5:1.00]
+; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
+; SKX-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50]
+; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00]
+; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp ne <2 x i32> %a, zeroinitializer
%1 = uitofp <2 x i1> %mask to <2 x double>
Modified: llvm/trunk/test/CodeGen/X86/sse-fsignum.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-fsignum.ll?rev=324820&r1=324819&r2=324820&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-fsignum.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-fsignum.ll Sat Feb 10 09:58:58 2018
@@ -10,44 +10,17 @@
;
define void @signum32a(<4 x float>*) {
-; AVX1-LABEL: signum32a:
-; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vmovaps (%rdi), %xmm0
-; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vcmpltps %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vcvtdq2ps %xmm2, %xmm2
-; AVX1-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0
-; AVX1-NEXT: vsubps %xmm0, %xmm2, %xmm0
-; AVX1-NEXT: vmovaps %xmm0, (%rdi)
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: signum32a:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vmovaps (%rdi), %xmm0
-; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vcmpltps %xmm1, %xmm0, %xmm2
-; AVX2-NEXT: vcvtdq2ps %xmm2, %xmm2
-; AVX2-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: vcvtdq2ps %xmm0, %xmm0
-; AVX2-NEXT: vsubps %xmm0, %xmm2, %xmm0
-; AVX2-NEXT: vmovaps %xmm0, (%rdi)
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: signum32a:
-; AVX512F: # %bb.0: # %entry
-; AVX512F-NEXT: vmovaps (%rdi), %xmm0
-; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX512F-NEXT: vcmpltps %zmm1, %zmm0, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; AVX512F-NEXT: vcvtdq2ps %xmm2, %xmm2
-; AVX512F-NEXT: vcmpltps %zmm0, %zmm1, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512F-NEXT: vcvtdq2ps %xmm0, %xmm0
-; AVX512F-NEXT: vsubps %xmm0, %xmm2, %xmm0
-; AVX512F-NEXT: vmovaps %xmm0, (%rdi)
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
+; AVX-LABEL: signum32a:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vmovaps (%rdi), %xmm0
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vcmpltps %xmm1, %xmm0, %xmm2
+; AVX-NEXT: vcvtdq2ps %xmm2, %xmm2
+; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
+; AVX-NEXT: vsubps %xmm0, %xmm2, %xmm0
+; AVX-NEXT: vmovaps %xmm0, (%rdi)
+; AVX-NEXT: retq
entry:
%1 = load <4 x float>, <4 x float>* %0
%2 = fcmp olt <4 x float> %1, zeroinitializer
@@ -60,48 +33,19 @@ entry:
}
define void @signum64a(<2 x double>*) {
-; AVX1-LABEL: signum64a:
-; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vmovapd (%rdi), %xmm0
-; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vcmpltpd %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,2,2,3]
-; AVX1-NEXT: vcvtdq2pd %xmm2, %xmm2
-; AVX1-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0
-; AVX1-NEXT: vsubpd %xmm0, %xmm2, %xmm0
-; AVX1-NEXT: vmovapd %xmm0, (%rdi)
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: signum64a:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vmovapd (%rdi), %xmm0
-; AVX2-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vcmpltpd %xmm1, %xmm0, %xmm2
-; AVX2-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,2,2,3]
-; AVX2-NEXT: vcvtdq2pd %xmm2, %xmm2
-; AVX2-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX2-NEXT: vcvtdq2pd %xmm0, %xmm0
-; AVX2-NEXT: vsubpd %xmm0, %xmm2, %xmm0
-; AVX2-NEXT: vmovapd %xmm0, (%rdi)
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: signum64a:
-; AVX512F: # %bb.0: # %entry
-; AVX512F-NEXT: vmovapd (%rdi), %xmm0
-; AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; AVX512F-NEXT: vcmpltpd %zmm1, %zmm0, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; AVX512F-NEXT: vcvtdq2pd %xmm2, %xmm2
-; AVX512F-NEXT: vcmpltpd %zmm0, %zmm1, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512F-NEXT: vcvtdq2pd %xmm0, %xmm0
-; AVX512F-NEXT: vsubpd %xmm0, %xmm2, %xmm0
-; AVX512F-NEXT: vmovapd %xmm0, (%rdi)
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
+; AVX-LABEL: signum64a:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vmovapd (%rdi), %xmm0
+; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vcmpltpd %xmm1, %xmm0, %xmm2
+; AVX-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; AVX-NEXT: vcvtdq2pd %xmm2, %xmm2
+; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
+; AVX-NEXT: vsubpd %xmm0, %xmm2, %xmm0
+; AVX-NEXT: vmovapd %xmm0, (%rdi)
+; AVX-NEXT: retq
entry:
%1 = load <2 x double>, <2 x double>* %0
%2 = fcmp olt <2 x double> %1, zeroinitializer
@@ -118,46 +62,18 @@ entry:
;
define void @signum32b(<8 x float>*) {
-; AVX1-LABEL: signum32b:
-; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vmovaps (%rdi), %ymm0
-; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX1-NEXT: vcmpltps %ymm1, %ymm0, %ymm2
-; AVX1-NEXT: vcvtdq2ps %ymm2, %ymm2
-; AVX1-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
-; AVX1-NEXT: vcvtdq2ps %ymm0, %ymm0
-; AVX1-NEXT: vsubps %ymm0, %ymm2, %ymm0
-; AVX1-NEXT: vmovaps %ymm0, (%rdi)
-; AVX1-NEXT: vzeroupper
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: signum32b:
-; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vmovaps (%rdi), %ymm0
-; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vcmpltps %ymm1, %ymm0, %ymm2
-; AVX2-NEXT: vcvtdq2ps %ymm2, %ymm2
-; AVX2-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
-; AVX2-NEXT: vsubps %ymm0, %ymm2, %ymm0
-; AVX2-NEXT: vmovaps %ymm0, (%rdi)
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: signum32b:
-; AVX512F: # %bb.0: # %entry
-; AVX512F-NEXT: vmovaps (%rdi), %ymm0
-; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; AVX512F-NEXT: vcmpltps %zmm1, %zmm0, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; AVX512F-NEXT: vcvtdq2ps %ymm2, %ymm2
-; AVX512F-NEXT: vcmpltps %zmm0, %zmm1, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512F-NEXT: vcvtdq2ps %ymm0, %ymm0
-; AVX512F-NEXT: vsubps %ymm0, %ymm2, %ymm0
-; AVX512F-NEXT: vmovaps %ymm0, (%rdi)
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
+; AVX-LABEL: signum32b:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vmovaps (%rdi), %ymm0
+; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vcmpltps %ymm1, %ymm0, %ymm2
+; AVX-NEXT: vcvtdq2ps %ymm2, %ymm2
+; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
+; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX-NEXT: vsubps %ymm0, %ymm2, %ymm0
+; AVX-NEXT: vmovaps %ymm0, (%rdi)
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
entry:
%1 = load <8 x float>, <8 x float>* %0
%2 = fcmp olt <8 x float> %1, zeroinitializer
@@ -208,11 +124,11 @@ define void @signum64b(<4 x double>*) {
; AVX512F: # %bb.0: # %entry
; AVX512F-NEXT: vmovapd (%rdi), %ymm0
; AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1
-; AVX512F-NEXT: vcmpltpd %zmm1, %zmm0, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
+; AVX512F-NEXT: vcmpltpd %ymm1, %ymm0, %ymm2
+; AVX512F-NEXT: vpmovqd %zmm2, %ymm2
; AVX512F-NEXT: vcvtdq2pd %xmm2, %ymm2
-; AVX512F-NEXT: vcmpltpd %zmm0, %zmm1, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
+; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
; AVX512F-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX512F-NEXT: vsubpd %ymm0, %ymm2, %ymm0
; AVX512F-NEXT: vmovapd %ymm0, (%rdi)
More information about the llvm-commits
mailing list