<div dir="ltr"><div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Mon, Sep 17, 2018 at 10:44 AM, Wei Mi <span dir="ltr"><<a href="mailto:wmi@google.com" target="_blank">wmi@google.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div dir="ltr"><div dir="ltr"><div dir="ltr">We found a "cannot select" problem for the reduced testcase caused by the commit. Could you take a look?<div><br></div><div>/usr/local/google/home/wmi/<wbr>workarea/llvm-r324805/dbuild/<wbr>bin/clang -cc1 -triple x86_64-grtev4-linux-gnu -target-feature +avx512f -target-feature +avx512dq -target-feature +avx512vl -O3 -fexperimental-new-pass-<wbr>manager 1.cc -S</div></div></div></div></blockquote><div><br></div><div>Forget to mention, the problem exists in the head:</div><div><br></div><div>/usr/local/google/home/wmi/workarea/llvm-r342257/rbuild/bin/clang -cc1 -triple x86_64-grtev4-linux-gnu -target-feature +avx512f -target-feature +avx512dq -target-feature +avx512vl -O3 -fexperimental-new-pass-manager 1.cc -S<br></div><div><br></div><div>Same error.</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div dir="ltr"><div dir="ltr"><div dir="ltr"><div><br></div><div>1.cc is attached.</div><div><br></div><div><div>fatal error: error in backend: Cannot select: t51: v8i16 = sign_extend t50</div><div> t50: v8i1 = X86ISD::CMPM t10, t68, Constant:i8<4></div><div> t10: v8i64,ch = load<LD64[%scevgep89](align=8)<wbr>(tbaa=<0x563cb21c38b8>)> t0, t7, undef:i64</div><div> t7: i64 = add t2, t6</div><div> t2: i64,ch = CopyFromReg t0, Register:i64 %7</div><div> t1: i64 = Register %7</div><div> t6: i64 = shl t4, Constant:i8<3></div><div> t4: i64,ch = CopyFromReg t0, Register:i64 %8</div><div> t3: i64 = Register %8</div><div> t65: i8 = Constant<3></div><div> t9: i64 = undef</div><div> t68: v8i64 = bitcast t67</div><div> t67: v16i32 = BUILD_VECTOR Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0>, Constant:i32<0></div><div> t66: i32 = Constant<0></div><div> t66: i32 = Constant<0></div><div> t66: i32 = Constant<0></div><div> t66: i32 = Constant<0></div><div> t66: i32 = Constant<0></div><div> t66: i32 = Constant<0></div><div> t66: i32 = Constant<0></div><div> t66: i32 = Constant<0></div><div> t66: i32 = Constant<0></div><div> t66: i32 = Constant<0></div><div> t66: i32 = Constant<0></div><div> t66: i32 = Constant<0></div><div> t66: i32 = Constant<0></div><div> t66: i32 = Constant<0></div><div> t66: i32 = Constant<0></div><div> t66: i32 = Constant<0></div><div> t49: i8 = Constant<4></div><div><br></div><div>Thanks,</div><div>Wei.</div><div><div class="gmail-h5"><div class="gmail_extra"><div class="gmail_quote"><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div dir="ltr"><pre style="white-space:pre-wrap;color:rgb(0,0,0);text-decoration-style:initial;text-decoration-color:initial">Author: ctopper
Date: Sat Feb 10 00:06:52 2018
New Revision: 324805
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=324805&view=rev" target="_blank">http://llvm.org/viewvc/llvm-pr<wbr>oject?rev=324805&view=rev</a>
Log:
[X86] Legalize zero extends from vXi1 to vXi16/vXi32/vXi64 using a sign extend and a shift.
This avoids a constant pool load to create 1.
The int->float are showing converts to mask and back. We probably need to widen inputs to sint_to_fp/uint_to_fp before type legalization.
Modified:
llvm/trunk/lib/Target/X86/X86I<wbr>SelLowering.cpp
llvm/trunk/test/CodeGen/X86/av<wbr>x512-cvt.ll
llvm/trunk/test/CodeGen/X86/av<wbr>x512-ext.ll
llvm/trunk/test/CodeGen/X86/av<wbr>x512-schedule.ll
llvm/trunk/test/CodeGen/X86/av<wbr>x512-vec-cmp.ll
llvm/trunk/test/CodeGen/X86/bi<wbr>tcast-int-to-vector-bool-zext.<wbr>ll
llvm/trunk/test/CodeGen/X86/pr<wbr>efer-avx256-mask-extend.ll
Modified: llvm/trunk/lib/Target/X86/X86I<wbr>SelLowering.cpp
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=324805&r1=324804&r2=324805&view=diff" target="_blank">http://llvm.org/viewvc/llvm-pr<wbr>oject/llvm/trunk/lib/Target/X8<wbr>6/X86ISelLowering.cpp?rev=3248<wbr>05&r1=324804&r2=324805&view=<wbr>diff</a>
==============================<wbr>==============================<wbr>==================
--- llvm/trunk/lib/Target/X86/X86I<wbr>SelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86I<wbr>SelLowering.cpp Sat Feb 10 00:06:52 2018
@@ -16472,10 +16472,17 @@ static SDValue LowerZERO_EXTEND_Mask(SD
SDLoc DL(Op);
unsigned NumElts = VT.getVectorNumElements();
- // Extend VT if the scalar type is v8/v16 and BWI is not supported.
+ // For all vectors, but vXi8 we can just emit a sign_extend a shift. This
+ // avoids a constant pool load.
+ if (VT.getVectorElementType() != MVT::i8) {
+ SDValue Extend = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, In);
+ return DAG.getNode(ISD::SRL, DL, VT, Extend,
+ DAG.getConstant(VT.getScalarSi<wbr>zeInBits() - 1, DL, VT));
+ }
+
+ // Extend VT if BWI is not supported.
MVT ExtVT = VT;
- if (!Subtarget.hasBWI() &&
- (VT.getVectorElementType().get<wbr>SizeInBits() <= 16)) {
+ if (!Subtarget.hasBWI()) {
// If v16i32 is to be avoided, we'll need to split and concatenate.
if (NumElts == 16 && !Subtarget.canExtendTo512DQ())
return SplitAndExtendv16i1(ISD::ZERO_<wbr>EXTEND, VT, In, DL, DAG);
@@ -16499,9 +16506,9 @@ static SDValue LowerZERO_EXTEND_Mask(SD
SDValue SelectedVal = DAG.getSelect(DL, WideVT, In, One, Zero);
- // Truncate if we had to extend i16/i8 above.
+ // Truncate if we had to extend above.
if (VT != ExtVT) {
- WideVT = MVT::getVectorVT(VT.getVectorE<wbr>lementType(), NumElts);
+ WideVT = MVT::getVectorVT(MVT::i8, NumElts);
SelectedVal = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SelectedVal);
}
Modified: llvm/trunk/test/CodeGen/X86/av<wbr>x512-cvt.ll
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-cvt.ll?rev=324805&r1=324804&r2=324805&view=diff" target="_blank">http://llvm.org/viewvc/llvm-pr<wbr>oject/llvm/trunk/test/CodeGen/<wbr>X86/avx512-cvt.ll?rev=324805&<wbr>r1=324804&r2=324805&view=diff</a>
==============================<wbr>==============================<wbr>==================
--- llvm/trunk/test/CodeGen/X86/av<wbr>x512-cvt.ll (original)
+++ llvm/trunk/test/CodeGen/X86/av<wbr>x512-cvt.ll Sat Feb 10 00:06:52 2018
@@ -1892,14 +1892,16 @@ define <16 x float> @ubto16f32(<16 x i32
; NODQ: # %bb.0:
; NODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; NODQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
-; NODQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NODQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NODQ-NEXT: vpsrld $31, %zmm0, %zmm0
; NODQ-NEXT: vcvtdq2ps %zmm0, %zmm0
; NODQ-NEXT: retq
;
; DQ-LABEL: ubto16f32:
; DQ: # %bb.0:
-; DQ-NEXT: vpmovd2m %zmm0, %k1
-; DQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; DQ-NEXT: vpmovd2m %zmm0, %k0
+; DQ-NEXT: vpmovm2d %k0, %zmm0
+; DQ-NEXT: vpsrld $31, %zmm0, %zmm0
; DQ-NEXT: vcvtdq2ps %zmm0, %zmm0
; DQ-NEXT: retq
%mask = icmp slt <16 x i32> %a, zeroinitializer
@@ -1912,22 +1914,24 @@ define <16 x double> @ubto16f64(<16 x i3
; NOVLDQ: # %bb.0:
; NOVLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; NOVLDQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
-; NOVLDQ-NEXT: movl {{.*}}(%rip), %eax
-; NOVLDQ-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
+; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NOVLDQ-NEXT: vpsrld $31, %ymm0, %ymm0
; NOVLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0
; NOVLDQ-NEXT: kshiftrw $8, %k1, %k1
-; NOVLDQ-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
+; NOVLDQ-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NOVLDQ-NEXT: vpsrld $31, %ymm1, %ymm1
; NOVLDQ-NEXT: vcvtdq2pd %ymm1, %zmm1
; NOVLDQ-NEXT: retq
;
; VLDQ-LABEL: ubto16f64:
; VLDQ: # %bb.0:
-; VLDQ-NEXT: vpmovd2m %zmm0, %k1
-; VLDQ-NEXT: movl {{.*}}(%rip), %eax
-; VLDQ-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z}
+; VLDQ-NEXT: vpmovd2m %zmm0, %k0
+; VLDQ-NEXT: vpmovm2d %k0, %ymm0
+; VLDQ-NEXT: vpsrld $31, %ymm0, %ymm0
; VLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0
-; VLDQ-NEXT: kshiftrw $8, %k1, %k1
-; VLDQ-NEXT: vpbroadcastd %eax, %ymm1 {%k1} {z}
+; VLDQ-NEXT: kshiftrw $8, %k0, %k0
+; VLDQ-NEXT: vpmovm2d %k0, %ymm1
+; VLDQ-NEXT: vpsrld $31, %ymm1, %ymm1
; VLDQ-NEXT: vcvtdq2pd %ymm1, %zmm1
; VLDQ-NEXT: retq
;
@@ -1935,22 +1939,25 @@ define <16 x double> @ubto16f64(<16 x i3
; VLNODQ: # %bb.0:
; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; VLNODQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
-; VLNODQ-NEXT: movl {{.*}}(%rip), %eax
-; VLNODQ-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z}
+; VLNODQ-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; VLNODQ-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1} {z}
+; VLNODQ-NEXT: vpsrld $31, %ymm0, %ymm0
; VLNODQ-NEXT: vcvtdq2pd %ymm0, %zmm0
; VLNODQ-NEXT: kshiftrw $8, %k1, %k1
-; VLNODQ-NEXT: vpbroadcastd %eax, %ymm1 {%k1} {z}
+; VLNODQ-NEXT: vmovdqa32 %ymm1, %ymm1 {%k1} {z}
+; VLNODQ-NEXT: vpsrld $31, %ymm1, %ymm1
; VLNODQ-NEXT: vcvtdq2pd %ymm1, %zmm1
; VLNODQ-NEXT: retq
;
; AVX512DQ-LABEL: ubto16f64:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1
-; AVX512DQ-NEXT: movl {{.*}}(%rip), %eax
-; AVX512DQ-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
+; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
+; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
+; AVX512DQ-NEXT: vpsrld $31, %ymm0, %ymm0
; AVX512DQ-NEXT: vcvtdq2pd %ymm0, %zmm0
-; AVX512DQ-NEXT: kshiftrw $8, %k1, %k1
-; AVX512DQ-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
+; AVX512DQ-NEXT: kshiftrw $8, %k0, %k0
+; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1
+; AVX512DQ-NEXT: vpsrld $31, %ymm1, %ymm1
; AVX512DQ-NEXT: vcvtdq2pd %ymm1, %zmm1
; AVX512DQ-NEXT: retq
%mask = icmp slt <16 x i32> %a, zeroinitializer
@@ -1964,14 +1971,16 @@ define <8 x float> @ubto8f32(<8 x i32> %
; NOVLDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; NOVLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; NOVLDQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
-; NOVLDQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NOVLDQ-NEXT: vpsrld $31, %ymm0, %ymm0
; NOVLDQ-NEXT: vcvtdq2ps %ymm0, %ymm0
; NOVLDQ-NEXT: retq
;
; VLDQ-LABEL: ubto8f32:
; VLDQ: # %bb.0:
-; VLDQ-NEXT: vpmovd2m %ymm0, %k1
-; VLDQ-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
+; VLDQ-NEXT: vpmovd2m %ymm0, %k0
+; VLDQ-NEXT: vpmovm2d %k0, %ymm0
+; VLDQ-NEXT: vpsrld $31, %ymm0, %ymm0
; VLDQ-NEXT: vcvtdq2ps %ymm0, %ymm0
; VLDQ-NEXT: retq
;
@@ -1979,15 +1988,18 @@ define <8 x float> @ubto8f32(<8 x i32> %
; VLNODQ: # %bb.0:
; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; VLNODQ-NEXT: vpcmpgtd %ymm0, %ymm1, %k1
-; VLNODQ-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
+; VLNODQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; VLNODQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; VLNODQ-NEXT: vpsrld $31, %ymm0, %ymm0
; VLNODQ-NEXT: vcvtdq2ps %ymm0, %ymm0
; VLNODQ-NEXT: retq
;
; AVX512DQ-LABEL: ubto8f32:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1
-; AVX512DQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
+; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
+; AVX512DQ-NEXT: vpsrld $31, %ymm0, %ymm0
; AVX512DQ-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX512DQ-NEXT: retq
%mask = icmp slt <8 x i32> %a, zeroinitializer
@@ -2001,14 +2013,16 @@ define <8 x double> @ubto8f64(<8 x i32>
; NOVLDQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; NOVLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; NOVLDQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
-; NOVLDQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NOVLDQ-NEXT: vpsrld $31, %ymm0, %ymm0
; NOVLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0
; NOVLDQ-NEXT: retq
;
; VLDQ-LABEL: ubto8f64:
; VLDQ: # %bb.0:
-; VLDQ-NEXT: vpmovd2m %ymm0, %k1
-; VLDQ-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
+; VLDQ-NEXT: vpmovd2m %ymm0, %k0
+; VLDQ-NEXT: vpmovm2d %k0, %ymm0
+; VLDQ-NEXT: vpsrld $31, %ymm0, %ymm0
; VLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0
; VLDQ-NEXT: retq
;
@@ -2016,15 +2030,18 @@ define <8 x double> @ubto8f64(<8 x i32>
; VLNODQ: # %bb.0:
; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; VLNODQ-NEXT: vpcmpgtd %ymm0, %ymm1, %k1
-; VLNODQ-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
+; VLNODQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; VLNODQ-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; VLNODQ-NEXT: vpsrld $31, %ymm0, %ymm0
; VLNODQ-NEXT: vcvtdq2pd %ymm0, %zmm0
; VLNODQ-NEXT: retq
;
; AVX512DQ-LABEL: ubto8f64:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1
-; AVX512DQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
+; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
+; AVX512DQ-NEXT: vpsrld $31, %ymm0, %ymm0
; AVX512DQ-NEXT: vcvtdq2pd %ymm0, %zmm0
; AVX512DQ-NEXT: retq
%mask = icmp slt <8 x i32> %a, zeroinitializer
@@ -2038,15 +2055,17 @@ define <4 x float> @ubto4f32(<4 x i32> %
; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; NOVLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; NOVLDQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
-; NOVLDQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NOVLDQ-NEXT: vpsrld $31, %xmm0, %xmm0
; NOVLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0
; NOVLDQ-NEXT: vzeroupper
; NOVLDQ-NEXT: retq
;
; VLDQ-LABEL: ubto4f32:
; VLDQ: # %bb.0:
-; VLDQ-NEXT: vpmovd2m %xmm0, %k1
-; VLDQ-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
+; VLDQ-NEXT: vpmovd2m %xmm0, %k0
+; VLDQ-NEXT: vpmovm2d %k0, %xmm0
+; VLDQ-NEXT: vpsrld $31, %xmm0, %xmm0
; VLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0
; VLDQ-NEXT: retq
;
@@ -2054,15 +2073,18 @@ define <4 x float> @ubto4f32(<4 x i32> %
; VLNODQ: # %bb.0:
; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; VLNODQ-NEXT: vpcmpgtd %xmm0, %xmm1, %k1
-; VLNODQ-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
+; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; VLNODQ-NEXT: vpsrld $31, %xmm0, %xmm0
; VLNODQ-NEXT: vcvtdq2ps %xmm0, %xmm0
; VLNODQ-NEXT: retq
;
; AVX512DQ-LABEL: ubto4f32:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1
-; AVX512DQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
+; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
+; AVX512DQ-NEXT: vpsrld $31, %xmm0, %xmm0
; AVX512DQ-NEXT: vcvtdq2ps %xmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
@@ -2077,14 +2099,16 @@ define <4 x double> @ubto4f64(<4 x i32>
; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; NOVLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; NOVLDQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
-; NOVLDQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NOVLDQ-NEXT: vpsrld $31, %xmm0, %xmm0
; NOVLDQ-NEXT: vcvtdq2pd %xmm0, %ymm0
; NOVLDQ-NEXT: retq
;
; VLDQ-LABEL: ubto4f64:
; VLDQ: # %bb.0:
-; VLDQ-NEXT: vpmovd2m %xmm0, %k1
-; VLDQ-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
+; VLDQ-NEXT: vpmovd2m %xmm0, %k0
+; VLDQ-NEXT: vpmovm2d %k0, %xmm0
+; VLDQ-NEXT: vpsrld $31, %xmm0, %xmm0
; VLDQ-NEXT: vcvtdq2pd %xmm0, %ymm0
; VLDQ-NEXT: retq
;
@@ -2092,15 +2116,18 @@ define <4 x double> @ubto4f64(<4 x i32>
; VLNODQ: # %bb.0:
; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; VLNODQ-NEXT: vpcmpgtd %xmm0, %xmm1, %k1
-; VLNODQ-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
+; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; VLNODQ-NEXT: vpsrld $31, %xmm0, %xmm0
; VLNODQ-NEXT: vcvtdq2pd %xmm0, %ymm0
; VLNODQ-NEXT: retq
;
; AVX512DQ-LABEL: ubto4f64:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1
-; AVX512DQ-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0
+; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
+; AVX512DQ-NEXT: vpsrld $31, %xmm0, %xmm0
; AVX512DQ-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX512DQ-NEXT: retq
%mask = icmp slt <4 x i32> %a, zeroinitializer
@@ -2109,49 +2136,98 @@ define <4 x double> @ubto4f64(<4 x i32>
}
define <2 x float> @ubto2f32(<2 x i32> %a) {
-; NOVL-LABEL: ubto2f32:
-; NOVL: # %bb.0:
-; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3<wbr>]
-; NOVL-NEXT: vptestmq %zmm0, %zmm0, %k1
-; NOVL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
-; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0
-; NOVL-NEXT: vzeroupper
-; NOVL-NEXT: retq
-;
-; VL-LABEL: ubto2f32:
-; VL: # %bb.0:
-; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3<wbr>]
-; VL-NEXT: vptestmq %xmm0, %xmm0, %k1
-; VL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
-; VL-NEXT: vcvtdq2ps %xmm0, %xmm0
-; VL-NEXT: retq
+; NOVLDQ-LABEL: ubto2f32:
+; NOVLDQ: # %bb.0:
+; NOVLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; NOVLDQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3<wbr>]
+; NOVLDQ-NEXT: vptestmq %zmm0, %zmm0, %k1
+; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NOVLDQ-NEXT: vpsrld $31, %xmm0, %xmm0
+; NOVLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0
+; NOVLDQ-NEXT: vzeroupper
+; NOVLDQ-NEXT: retq
+;
+; VLDQ-LABEL: ubto2f32:
+; VLDQ: # %bb.0:
+; VLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; VLDQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3<wbr>]
+; VLDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
+; VLDQ-NEXT: vpmovm2d %k0, %xmm0
+; VLDQ-NEXT: vpsrld $31, %xmm0, %xmm0
+; VLDQ-NEXT: vcvtdq2ps %xmm0, %xmm0
+; VLDQ-NEXT: retq
+;
+; VLNODQ-LABEL: ubto2f32:
+; VLNODQ: # %bb.0:
+; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; VLNODQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3<wbr>]
+; VLNODQ-NEXT: vptestmq %xmm0, %xmm0, %k1
+; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; VLNODQ-NEXT: vpsrld $31, %xmm0, %xmm0
+; VLNODQ-NEXT: vcvtdq2ps %xmm0, %xmm0
+; VLNODQ-NEXT: retq
+;
+; AVX512DQ-LABEL: ubto2f32:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512DQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3<wbr>]
+; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
+; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
+; AVX512DQ-NEXT: vpsrld $31, %xmm0, %xmm0
+; AVX512DQ-NEXT: vcvtdq2ps %xmm0, %xmm0
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: retq
%mask = icmp ne <2 x i32> %a, zeroinitializer
%1 = uitofp <2 x i1> %mask to <2 x float>
ret <2 x float> %1
}
define <2 x double> @ubto2f64(<2 x i32> %a) {
-; NOVL-LABEL: ubto2f64:
-; NOVL: # %bb.0:
-; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3<wbr>]
-; NOVL-NEXT: vptestmq %zmm0, %zmm0, %k1
-; NOVL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
-; NOVL-NEXT: vcvtudq2pd %ymm0, %zmm0
-; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
-; NOVL-NEXT: vzeroupper
-; NOVL-NEXT: retq
-;
-; VL-LABEL: ubto2f64:
-; VL: # %bb.0:
-; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3<wbr>]
-; VL-NEXT: vptestmq %xmm0, %xmm0, %k1
-; VL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
-; VL-NEXT: vcvtudq2pd %xmm0, %xmm0
-; VL-NEXT: retq
+; NOVLDQ-LABEL: ubto2f64:
+; NOVLDQ: # %bb.0:
+; NOVLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; NOVLDQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3<wbr>]
+; NOVLDQ-NEXT: vptestmq %zmm0, %zmm0, %k1
+; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NOVLDQ-NEXT: vpsrld $31, %xmm0, %xmm0
+; NOVLDQ-NEXT: vcvtudq2pd %ymm0, %zmm0
+; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; NOVLDQ-NEXT: vzeroupper
+; NOVLDQ-NEXT: retq
+;
+; VLDQ-LABEL: ubto2f64:
+; VLDQ: # %bb.0:
+; VLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; VLDQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3<wbr>]
+; VLDQ-NEXT: vptestmq %xmm0, %xmm0, %k0
+; VLDQ-NEXT: vpmovm2d %k0, %xmm0
+; VLDQ-NEXT: vpsrld $31, %xmm0, %xmm0
+; VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0
+; VLDQ-NEXT: retq
+;
+; VLNODQ-LABEL: ubto2f64:
+; VLNODQ: # %bb.0:
+; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; VLNODQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3<wbr>]
+; VLNODQ-NEXT: vptestmq %xmm0, %xmm0, %k1
+; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; VLNODQ-NEXT: vpsrld $31, %xmm0, %xmm0
+; VLNODQ-NEXT: vcvtudq2pd %xmm0, %xmm0
+; VLNODQ-NEXT: retq
+;
+; AVX512DQ-LABEL: ubto2f64:
+; AVX512DQ: # %bb.0:
+; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512DQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3<wbr>]
+; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0
+; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
+; AVX512DQ-NEXT: vpsrld $31, %xmm0, %xmm0
+; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0
+; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512DQ-NEXT: vzeroupper
+; AVX512DQ-NEXT: retq
%mask = icmp ne <2 x i32> %a, zeroinitializer
%1 = uitofp <2 x i1> %mask to <2 x double>
ret <2 x double> %1
Modified: llvm/trunk/test/CodeGen/X86/av<wbr>x512-ext.ll
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-ext.ll?rev=324805&r1=324804&r2=324805&view=diff" target="_blank">http://llvm.org/viewvc/llvm-pr<wbr>oject/llvm/trunk/test/CodeGen/<wbr>X86/avx512-ext.ll?rev=324805&<wbr>r1=324804&r2=324805&view=diff</a>
==============================<wbr>==============================<wbr>==================
--- llvm/trunk/test/CodeGen/X86/av<wbr>x512-ext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/av<wbr>x512-ext.ll Sat Feb 10 00:06:52 2018
@@ -1294,13 +1294,15 @@ define <16 x i32> @zext_16i1_to_16xi32
; KNL-LABEL: zext_16i1_to_16xi32:
; KNL: # %bb.0:
; KNL-NEXT: kmovw %edi, %k1
-; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpsrld $31, %zmm0, %zmm0
; KNL-NEXT: retq
;
; SKX-LABEL: zext_16i1_to_16xi32:
; SKX: # %bb.0:
-; SKX-NEXT: kmovd %edi, %k1
-; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; SKX-NEXT: kmovd %edi, %k0
+; SKX-NEXT: vpmovm2d %k0, %zmm0
+; SKX-NEXT: vpsrld $31, %zmm0, %zmm0
; SKX-NEXT: retq
%a = bitcast i16 %b to <16 x i1>
%c = zext <16 x i1> %a to <16 x i32>
@@ -1311,13 +1313,15 @@ define <8 x i64> @zext_8i1_to_8xi64(i8
; KNL-LABEL: zext_8i1_to_8xi64:
; KNL: # %bb.0:
; KNL-NEXT: kmovw %edi, %k1
-; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT: vpsrlq $63, %zmm0, %zmm0
; KNL-NEXT: retq
;
; SKX-LABEL: zext_8i1_to_8xi64:
; SKX: # %bb.0:
-; SKX-NEXT: kmovd %edi, %k1
-; SKX-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; SKX-NEXT: kmovd %edi, %k0
+; SKX-NEXT: vpmovm2q %k0, %zmm0
+; SKX-NEXT: vpsrlq $63, %zmm0, %zmm0
; SKX-NEXT: retq
%a = bitcast i8 %b to <8 x i1>
%c = zext <8 x i1> %a to <8 x i64>
@@ -1685,8 +1689,9 @@ define <32 x i16> @zext_32xi1_to_32xi16(
;
; SKX-LABEL: zext_32xi1_to_32xi16:
; SKX: # %bb.0:
-; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k1
-; SKX-NEXT: vmovdqu16 {{.*}}(%rip), %zmm0 {%k1} {z}
+; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0
+; SKX-NEXT: vpmovm2w %k0, %zmm0
+; SKX-NEXT: vpsrlw $15, %zmm0, %zmm0
; SKX-NEXT: retq
%mask = icmp eq <32 x i16> %x, %y
%1 = zext <32 x i1> %mask to <32 x i16>
Modified: llvm/trunk/test/CodeGen/X86/av<wbr>x512-schedule.ll
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=324805&r1=324804&r2=324805&view=diff" target="_blank">http://llvm.org/viewvc/llvm-pr<wbr>oject/llvm/trunk/test/CodeGen/<wbr>X86/avx512-schedule.ll?rev=<wbr>324805&r1=324804&r2=324805&<wbr>view=diff</a>
==============================<wbr>==============================<wbr>==================
--- llvm/trunk/test/CodeGen/X86/av<wbr>x512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/av<wbr>x512-schedule.ll Sat Feb 10 00:06:52 2018
@@ -2787,15 +2787,17 @@ define <16 x float> @usto16f32(<16 x i16
define <16 x float> @ubto16f32(<16 x i32> %a) {
; GENERIC-LABEL: ubto16f32:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovd2m %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33]
+; GENERIC-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: ubto16f32:
; SKX: # %bb.0:
-; SKX-NEXT: vpmovd2m %zmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [8:0.50]
+; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00]
+; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25]
+; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:0.50]
; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp slt <16 x i32> %a, zeroinitializer
@@ -2806,23 +2808,25 @@ define <16 x float> @ubto16f32(<16 x i32
define <16 x double> @ubto16f64(<16 x i32> %a) {
; GENERIC-LABEL: ubto16f64:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovd2m %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: movl {{.*}}(%rip), %eax # sched: [5:0.50]
-; GENERIC-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z} # sched: [1:1.00]
+; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.33]
+; GENERIC-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
-; GENERIC-NEXT: kshiftrw $8, %k1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpbroadcastd %eax, %ymm1 {%k1} {z} # sched: [1:1.00]
+; GENERIC-NEXT: kshiftrw $8, %k0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: vpmovm2d %k0, %ymm1 # sched: [1:0.33]
+; GENERIC-NEXT: vpsrld $31, %ymm1, %ymm1 # sched: [1:1.00]
; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: ubto16f64:
; SKX: # %bb.0:
-; SKX-NEXT: vpmovd2m %zmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: movl {{.*}}(%rip), %eax # sched: [5:0.50]
-; SKX-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z} # sched: [3:1.00]
+; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00]
+; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25]
+; SKX-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:0.50]
; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
-; SKX-NEXT: kshiftrw $8, %k1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpbroadcastd %eax, %ymm1 {%k1} {z} # sched: [3:1.00]
+; SKX-NEXT: kshiftrw $8, %k0, %k0 # sched: [3:1.00]
+; SKX-NEXT: vpmovm2d %k0, %ymm1 # sched: [1:0.25]
+; SKX-NEXT: vpsrld $31, %ymm1, %ymm1 # sched: [1:0.50]
; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp slt <16 x i32> %a, zeroinitializer
@@ -2833,15 +2837,17 @@ define <16 x double> @ubto16f64(<16 x i3
define <8 x float> @ubto8f32(<8 x i32> %a) {
; GENERIC-LABEL: ubto8f32:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovd2m %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT: vpmovd2m %ymm0, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.33]
+; GENERIC-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: ubto8f32:
; SKX: # %bb.0:
-; SKX-NEXT: vpmovd2m %ymm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [8:0.50]
+; SKX-NEXT: vpmovd2m %ymm0, %k0 # sched: [1:1.00]
+; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25]
+; SKX-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:0.50]
; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp slt <8 x i32> %a, zeroinitializer
@@ -2852,15 +2858,17 @@ define <8 x float> @ubto8f32(<8 x i32> %
define <8 x double> @ubto8f64(<8 x i32> %a) {
; GENERIC-LABEL: ubto8f64:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovd2m %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT: vpmovd2m %ymm0, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.33]
+; GENERIC-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: ubto8f64:
; SKX: # %bb.0:
-; SKX-NEXT: vpmovd2m %ymm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [8:0.50]
+; SKX-NEXT: vpmovd2m %ymm0, %k0 # sched: [1:1.00]
+; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25]
+; SKX-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:0.50]
; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp slt <8 x i32> %a, zeroinitializer
@@ -2871,15 +2879,17 @@ define <8 x double> @ubto8f64(<8 x i32>
define <4 x float> @ubto4f32(<4 x i32> %a) {
; GENERIC-LABEL: ubto4f32:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
+; GENERIC-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: ubto4f32:
; SKX: # %bb.0:
-; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50]
+; SKX-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:1.00]
+; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp slt <4 x i32> %a, zeroinitializer
@@ -2890,15 +2900,17 @@ define <4 x float> @ubto4f32(<4 x i32> %
define <4 x double> @ubto4f64(<4 x i32> %a) {
; GENERIC-LABEL: ubto4f64:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
+; GENERIC-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: ubto4f64:
; SKX: # %bb.0:
-; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50]
+; SKX-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:1.00]
+; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp slt <4 x i32> %a, zeroinitializer
@@ -2911,8 +2923,9 @@ define <2 x float> @ubto2f32(<2 x i32> %
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3<wbr>] sched: [1:0.50]
-; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
+; GENERIC-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -2920,8 +2933,9 @@ define <2 x float> @ubto2f32(<2 x i32> %
; SKX: # %bb.0:
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3<wbr>] sched: [1:0.33]
-; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50]
+; SKX-NEXT: vptestmq %xmm0, %xmm0, %k0 # sched: [3:1.00]
+; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp ne <2 x i32> %a, zeroinitializer
@@ -2934,8 +2948,9 @@ define <2 x double> @ubto2f64(<2 x i32>
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3<wbr>] sched: [1:0.50]
-; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k0 # sched: [1:1.00]
+; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
+; GENERIC-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: vcvtudq2pd %xmm0, %xmm0 # sched: [4:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -2943,8 +2958,9 @@ define <2 x double> @ubto2f64(<2 x i32>
; SKX: # %bb.0:
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3<wbr>] sched: [1:0.33]
-; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50]
+; SKX-NEXT: vptestmq %xmm0, %xmm0, %k0 # sched: [3:1.00]
+; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vcvtudq2pd %xmm0, %xmm0 # sched: [5:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp ne <2 x i32> %a, zeroinitializer
@@ -4244,14 +4260,16 @@ define <8 x double> @fpext_test(<8 x flo
define <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
; GENERIC-LABEL: zext_16i1_to_16xi32:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33]
+; GENERIC-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_16i1_to_16xi32:
; SKX: # %bb.0:
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [8:0.50]
+; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
+; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25]
+; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%a = bitcast i16 %b to <16 x i1>
%c = zext <16 x i1> %a to <16 x i32>
@@ -4261,14 +4279,16 @@ define <16 x i32> @zext_16i1_to_16xi32
define <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
; GENERIC-LABEL: zext_8i1_to_8xi64:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [5:1.00]
+; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
+; GENERIC-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.33]
+; GENERIC-NEXT: vpsrlq $63, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_8i1_to_8xi64:
; SKX: # %bb.0:
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [8:0.50]
+; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
+; SKX-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.25]
+; SKX-NEXT: vpsrlq $63, %zmm0, %zmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%a = bitcast i8 %b to <8 x i1>
%c = zext <8 x i1> %a to <8 x i64>
@@ -4653,14 +4673,16 @@ define <64 x i8> @zext_64xi1_to_64xi8(<6
define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 {
; GENERIC-LABEL: zext_32xi1_to_32xi16:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vmovdqu16 {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [4:0.50]
+; GENERIC-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # sched: [3:1.00]
+; GENERIC-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.33]
+; GENERIC-NEXT: vpsrlw $15, %zmm0, %zmm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: zext_32xi1_to_32xi16:
; SKX: # %bb.0:
-; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vmovdqu16 {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [8:0.50]
+; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # sched: [3:1.00]
+; SKX-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.25]
+; SKX-NEXT: vpsrlw $15, %zmm0, %zmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
%mask = icmp eq <32 x i16> %x, %y
%1 = zext <32 x i1> %mask to <32 x i16>
Modified: llvm/trunk/test/CodeGen/X86/av<wbr>x512-vec-cmp.ll
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll?rev=324805&r1=324804&r2=324805&view=diff" target="_blank">http://llvm.org/viewvc/llvm-pr<wbr>oject/llvm/trunk/test/CodeGen/<wbr>X86/avx512-vec-cmp.ll?rev=<wbr>324805&r1=324804&r2=324805&<wbr>view=diff</a>
==============================<wbr>==============================<wbr>==================
--- llvm/trunk/test/CodeGen/X86/av<wbr>x512-vec-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/av<wbr>x512-vec-cmp.ll Sat Feb 10 00:06:52 2018
@@ -288,11 +288,19 @@ define i64 @test12_v64i16(<64 x i16> %a,
}
define <16 x i32> @test13(<16 x float>%a, <16 x float>%b)
-; CHECK-LABEL: test13:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
-; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
-; CHECK-NEXT: retq
+; AVX512-LABEL: test13:
+; AVX512: ## %bb.0:
+; AVX512-NEXT: vcmpeqps %zmm1, %zmm0, %k1
+; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512-NEXT: vpsrld $31, %zmm0, %zmm0
+; AVX512-NEXT: retq
+;
+; SKX-LABEL: test13:
+; SKX: ## %bb.0:
+; SKX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
+; SKX-NEXT: vpmovm2d %k0, %zmm0
+; SKX-NEXT: vpsrld $31, %zmm0, %zmm0
+; SKX-NEXT: retq
{
%cmpvector_i = fcmp oeq <16 x float> %a, %b
%conv = zext <16 x i1> %cmpvector_i to <16 x i32>
@@ -906,8 +914,9 @@ define <2 x i64> @test46(<2 x float> %x,
;
; SKX-LABEL: test46:
; SKX: ## %bb.0:
-; SKX-NEXT: vcmpeqps %xmm1, %xmm0, %k1
-; SKX-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
+; SKX-NEXT: vcmpeqps %xmm1, %xmm0, %k0
+; SKX-NEXT: vpmovm2q %k0, %xmm0
+; SKX-NEXT: vpsrlq $63, %xmm0, %xmm0
; SKX-NEXT: retq
%mask = fcmp oeq <2 x float> %x, %y
%1 = zext <2 x i1> %mask to <2 x i64>
Modified: llvm/trunk/test/CodeGen/X86/bi<wbr>tcast-int-to-vector-bool-zext.<wbr>ll
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll?rev=324805&r1=324804&r2=324805&view=diff" target="_blank">http://llvm.org/viewvc/llvm-pr<wbr>oject/llvm/trunk/test/CodeGen/<wbr>X86/bitcast-int-to-vector-<wbr>bool-zext.ll?rev=324805&r1=<wbr>324804&r2=324805&view=diff</a>
==============================<wbr>==============================<wbr>==================
--- llvm/trunk/test/CodeGen/X86/bi<wbr>tcast-int-to-vector-bool-zext.<wbr>ll (original)
+++ llvm/trunk/test/CodeGen/X86/bi<wbr>tcast-int-to-vector-bool-zext.<wbr>ll Sat Feb 10 00:06:52 2018
@@ -50,15 +50,17 @@ define <2 x i64> @ext_i2_2i64(i2 %a0) {
; AVX512F-LABEL: ext_i2_2i64:
; AVX512F: # %bb.0:
; AVX512F-NEXT: kmovw %edi, %k1
-; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
-; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpsrlq $63, %xmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VLBW-LABEL: ext_i2_2i64:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: kmovd %edi, %k1
-; AVX512VLBW-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
+; AVX512VLBW-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
+; AVX512VLBW-NEXT: vpsrlq $63, %xmm0, %xmm0
; AVX512VLBW-NEXT: retq
%1 = bitcast i2 %a0 to <2 x i1>
%2 = zext <2 x i1> %1 to <2 x i64>
@@ -99,15 +101,17 @@ define <4 x i32> @ext_i4_4i32(i4 %a0) {
; AVX512F-LABEL: ext_i4_4i32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: kmovw %edi, %k1
-; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
-; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpsrld $31, %xmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VLBW-LABEL: ext_i4_4i32:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: kmovd %edi, %k1
-; AVX512VLBW-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
+; AVX512VLBW-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512VLBW-NEXT: vpsrld $31, %xmm0, %xmm0
; AVX512VLBW-NEXT: retq
%1 = bitcast i4 %a0 to <4 x i1>
%2 = zext <4 x i1> %1 to <4 x i32>
@@ -150,16 +154,17 @@ define <8 x i16> @ext_i8_8i16(i8 %a0) {
; AVX512F-LABEL: ext_i8_8i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: kmovw %edi, %k1
-; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512F-NEXT: vpsrlw $15, %xmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VLBW-LABEL: ext_i8_8i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: kmovd %edi, %k1
-; AVX512VLBW-NEXT: vmovdqu16 {{.*}}(%rip), %xmm0 {%k1} {z}
+; AVX512VLBW-NEXT: kmovd %edi, %k0
+; AVX512VLBW-NEXT: vpmovm2w %k0, %xmm0
+; AVX512VLBW-NEXT: vpsrlw $15, %xmm0, %xmm0
; AVX512VLBW-NEXT: retq
%1 = bitcast i8 %a0 to <8 x i1>
%2 = zext <8 x i1> %1 to <8 x i16>
@@ -289,14 +294,16 @@ define <4 x i64> @ext_i4_4i64(i4 %a0) {
; AVX512F-LABEL: ext_i4_4i64:
; AVX512F: # %bb.0:
; AVX512F-NEXT: kmovw %edi, %k1
-; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
-; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpsrlq $63, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VLBW-LABEL: ext_i4_4i64:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: kmovd %edi, %k1
-; AVX512VLBW-NEXT: vpbroadcastq {{.*}}(%rip), %ymm0 {%k1} {z}
+; AVX512VLBW-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
+; AVX512VLBW-NEXT: vpsrlq $63, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
%1 = bitcast i4 %a0 to <4 x i1>
%2 = zext <4 x i1> %1 to <4 x i64>
@@ -350,14 +357,16 @@ define <8 x i32> @ext_i8_8i32(i8 %a0) {
; AVX512F-LABEL: ext_i8_8i32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: kmovw %edi, %k1
-; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
-; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpsrld $31, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VLBW-LABEL: ext_i8_8i32:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: kmovd %edi, %k1
-; AVX512VLBW-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
+; AVX512VLBW-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX512VLBW-NEXT: vpsrld $31, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
%1 = bitcast i8 %a0 to <8 x i1>
%2 = zext <8 x i1> %1 to <8 x i32>
@@ -413,14 +422,16 @@ define <16 x i16> @ext_i16_16i16(i16 %a0
; AVX512F-LABEL: ext_i16_16i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: kmovw %edi, %k1
-; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512F-NEXT: vpsrlw $15, %ymm0, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VLBW-LABEL: ext_i16_16i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: kmovd %edi, %k1
-; AVX512VLBW-NEXT: vmovdqu16 {{.*}}(%rip), %ymm0 {%k1} {z}
+; AVX512VLBW-NEXT: kmovd %edi, %k0
+; AVX512VLBW-NEXT: vpmovm2w %k0, %ymm0
+; AVX512VLBW-NEXT: vpsrlw $15, %ymm0, %ymm0
; AVX512VLBW-NEXT: retq
%1 = bitcast i16 %a0 to <16 x i1>
%2 = zext <16 x i1> %1 to <16 x i16>
@@ -611,13 +622,15 @@ define <8 x i64> @ext_i8_8i64(i8 %a0) {
; AVX512F-LABEL: ext_i8_8i64:
; AVX512F: # %bb.0:
; AVX512F-NEXT: kmovw %edi, %k1
-; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpsrlq $63, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VLBW-LABEL: ext_i8_8i64:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: kmovd %edi, %k1
-; AVX512VLBW-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512VLBW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512VLBW-NEXT: vpsrlq $63, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
%1 = bitcast i8 %a0 to <8 x i1>
%2 = zext <8 x i1> %1 to <8 x i64>
@@ -694,13 +707,15 @@ define <16 x i32> @ext_i16_16i32(i16 %a0
; AVX512F-LABEL: ext_i16_16i32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: kmovw %edi, %k1
-; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpsrld $31, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VLBW-LABEL: ext_i16_16i32:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: kmovd %edi, %k1
-; AVX512VLBW-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512VLBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512VLBW-NEXT: vpsrld $31, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
%1 = bitcast i16 %a0 to <16 x i1>
%2 = zext <16 x i1> %1 to <16 x i32>
@@ -786,17 +801,19 @@ define <32 x i16> @ext_i32_32i16(i32 %a0
; AVX512F-NEXT: kmovw %edi, %k1
; AVX512F-NEXT: shrl $16, %edi
; AVX512F-NEXT: kmovw %edi, %k2
-; AVX512F-NEXT: movl {{.*}}(%rip), %eax
-; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k2} {z}
+; AVX512F-NEXT: vpsrlw $15, %ymm0, %ymm0
+; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
+; AVX512F-NEXT: vpsrlw $15, %ymm1, %ymm1
; AVX512F-NEXT: retq
;
; AVX512VLBW-LABEL: ext_i32_32i16:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: kmovd %edi, %k1
-; AVX512VLBW-NEXT: vmovdqu16 {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512VLBW-NEXT: kmovd %edi, %k0
+; AVX512VLBW-NEXT: vpmovm2w %k0, %zmm0
+; AVX512VLBW-NEXT: vpsrlw $15, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
%1 = bitcast i32 %a0 to <32 x i1>
%2 = zext <32 x i1> %1 to <32 x i16>
Modified: llvm/trunk/test/CodeGen/X86/pr<wbr>efer-avx256-mask-extend.ll
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/prefer-avx256-mask-extend.ll?rev=324805&r1=324804&r2=324805&view=diff" target="_blank">http://llvm.org/viewvc/llvm-pr<wbr>oject/llvm/trunk/test/CodeGen/<wbr>X86/prefer-avx256-mask-extend.<wbr>ll?rev=324805&r1=324804&r2=<wbr>324805&view=diff</a>
==============================<wbr>==============================<wbr>==================
--- llvm/trunk/test/CodeGen/X86/pr<wbr>efer-avx256-mask-extend.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr<wbr>efer-avx256-mask-extend.ll Sat Feb 10 00:06:52 2018
@@ -135,8 +135,10 @@ define <8 x i16> @testv8i1_zext_v8i16(<8
; AVX256: # %bb.0:
; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
-; AVX256-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
+; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX256-NEXT: vpmovdw %ymm0, %xmm0
+; AVX256-NEXT: vpsrlw $15, %xmm0, %xmm0
; AVX256-NEXT: vzeroupper
; AVX256-NEXT: retq
;
@@ -144,8 +146,10 @@ define <8 x i16> @testv8i1_zext_v8i16(<8
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
-; AVX512VL-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
+; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
+; AVX512VL-NEXT: vpsrlw $15, %xmm0, %xmm0
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
@@ -169,15 +173,14 @@ define <16 x i8> @testv16i1_zext_v16i8(<
; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
; AVX256-NEXT: vpcmpeqd (%rsi), %ymm0, %k2
-; AVX256-NEXT: movl {{.*}}(%rip), %eax
-; AVX256-NEXT: vpbroadcastd %eax, %ymm0 {%k2} {z}
+; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX256-NEXT: vmovdqa32 %ymm0, %ymm1 {%k2} {z}
+; AVX256-NEXT: vpmovdw %ymm1, %xmm1
+; AVX256-NEXT: vpsrlw $15, %xmm1, %xmm1
+; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX256-NEXT: vpmovdw %ymm0, %xmm0
-; AVX256-NEXT: vmovdqa {{.*#+}} xmm1 = <0,2,4,6,8,10,12,14,u,u,u,u,u,<wbr>u,u,u>
-; AVX256-NEXT: vpshufb %xmm1, %xmm0, %xmm0
-; AVX256-NEXT: vpbroadcastd %eax, %ymm2 {%k1} {z}
-; AVX256-NEXT: vpmovdw %ymm2, %xmm2
-; AVX256-NEXT: vpshufb %xmm1, %xmm2, %xmm1
-; AVX256-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX256-NEXT: vpsrlw $15, %xmm0, %xmm0
+; AVX256-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX256-NEXT: vzeroupper
; AVX256-NEXT: retq
;
@@ -218,12 +221,13 @@ define <16 x i16> @testv16i1_zext_v16i16
; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX256-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
; AVX256-NEXT: vpcmpeqd (%rsi), %ymm0, %k2
-; AVX256-NEXT: movl {{.*}}(%rip), %eax
-; AVX256-NEXT: vpbroadcastd %eax, %ymm0 {%k1} {z}
-; AVX256-NEXT: vpmovdw %ymm0, %xmm0
-; AVX256-NEXT: vpbroadcastd %eax, %ymm1 {%k2} {z}
+; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX256-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} {z}
; AVX256-NEXT: vpmovdw %ymm1, %xmm1
-; AVX256-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k2} {z}
+; AVX256-NEXT: vpmovdw %ymm0, %xmm0
+; AVX256-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX256-NEXT: vpsrlw $15, %ymm0, %ymm0
; AVX256-NEXT: retq
;
; AVX512VL-LABEL: testv16i1_zext_v16i16:
@@ -232,8 +236,9 @@ define <16 x i16> @testv16i1_zext_v16i16
; AVX512VL-NEXT: vpcmpeqd (%rdi), %ymm0, %k0
; AVX512VL-NEXT: vpcmpeqd (%rsi), %ymm0, %k1
; AVX512VL-NEXT: kunpckbw %k0, %k1, %k1
-; AVX512VL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512VL-NEXT: vpsrlw $15, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512F-LABEL: testv16i1_zext_v16i16:
@@ -243,8 +248,9 @@ define <16 x i16> @testv16i1_zext_v16i16
; AVX512F-NEXT: vmovdqa (%rsi), %ymm0
; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k1
; AVX512F-NEXT: kunpckbw %k0, %k1, %k1
-; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512F-NEXT: vpsrlw $15, %ymm0, %ymm0
; AVX512F-NEXT: retq
%in = load <8 x i32>, <8 x i32>* %p
%cmp = icmp eq <8 x i32> %in, zeroinitializer
</pre><br><br></div>
</blockquote></div><br></div></div></div></div></div></div></div>
</blockquote></div><br></div></div></div>