[llvm-branch-commits] [llvm-branch] r277508 - Merging r276648:

Hans Wennborg via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Aug 2 12:41:54 PDT 2016


Author: hans
Date: Tue Aug  2 14:41:53 2016
New Revision: 277508

URL: http://llvm.org/viewvc/llvm-project?rev=277508&view=rev
Log:
Merging r276648:
------------------------------------------------------------------------
r276648 | delena | 2016-07-25 09:51:00 -0700 (Mon, 25 Jul 2016) | 6 lines

AVX-512: Fixed [US]INT_TO_FP selection for i1 vectors.
It failed with assertion before this patch.

Differential Revision: https://reviews.llvm.org/D22735


------------------------------------------------------------------------

Modified:
    llvm/branches/release_39/   (props changed)
    llvm/branches/release_39/lib/Target/X86/X86ISelLowering.cpp
    llvm/branches/release_39/test/CodeGen/X86/avx512-cvt.ll

Propchange: llvm/branches/release_39/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue Aug  2 14:41:53 2016
@@ -1,3 +1,3 @@
 /llvm/branches/Apple/Pertwee:110850,110961
 /llvm/branches/type-system-rewrite:133420-134817
-/llvm/trunk:155241,275868-275870,275879,275898,275928,275935,275946,275978,275981,276015,276077,276109,276119,276181,276209,276236-276237,276358,276364,276368,276389,276435,276438,276479,276510,276740,276956,276980,277114,277135
+/llvm/trunk:155241,275868-275870,275879,275898,275928,275935,275946,275978,275981,276015,276077,276109,276119,276181,276209,276236-276237,276358,276364,276368,276389,276435,276438,276479,276510,276648,276740,276956,276980,277114,277135

Modified: llvm/branches/release_39/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_39/lib/Target/X86/X86ISelLowering.cpp?rev=277508&r1=277507&r2=277508&view=diff
==============================================================================
--- llvm/branches/release_39/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/branches/release_39/lib/Target/X86/X86ISelLowering.cpp Tue Aug  2 14:41:53 2016
@@ -1187,6 +1187,14 @@ X86TargetLowering::X86TargetLowering(con
     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i32, Legal);
     setOperationAction(ISD::UINT_TO_FP,         MVT::v16i8, Custom);
     setOperationAction(ISD::UINT_TO_FP,         MVT::v16i16, Custom);
+    setOperationAction(ISD::SINT_TO_FP,         MVT::v16i1, Custom);
+    setOperationAction(ISD::UINT_TO_FP,         MVT::v16i1, Custom);
+    setOperationAction(ISD::SINT_TO_FP,         MVT::v8i1,  Custom);
+    setOperationAction(ISD::UINT_TO_FP,         MVT::v8i1,  Custom);
+    setOperationAction(ISD::SINT_TO_FP,         MVT::v4i1,  Custom);
+    setOperationAction(ISD::UINT_TO_FP,         MVT::v4i1,  Custom);
+    setOperationAction(ISD::SINT_TO_FP,         MVT::v2i1,  Custom);
+    setOperationAction(ISD::UINT_TO_FP,         MVT::v2i1,  Custom);
     setOperationAction(ISD::FP_ROUND,           MVT::v8f32, Legal);
     setOperationAction(ISD::FP_EXTEND,          MVT::v8f32, Legal);
 
@@ -13373,6 +13381,7 @@ SDValue X86TargetLowering::LowerSINT_TO_
   MVT VT = Op.getSimpleValueType();
   SDLoc dl(Op);
 
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   if (SrcVT.isVector()) {
     if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) {
       return DAG.getNode(X86ISD::CVTDQ2PD, dl, VT,
@@ -13380,6 +13389,9 @@ SDValue X86TargetLowering::LowerSINT_TO_
                          DAG.getUNDEF(SrcVT)));
     }
     if (SrcVT.getVectorElementType() == MVT::i1) {
+      if (SrcVT == MVT::v2i1 && TLI.isTypeLegal(SrcVT))
+        return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
+                           DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v2i64, Src));
       MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
       return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
                          DAG.getNode(ISD::SIGN_EXTEND, dl, IntegerVT, Src));
@@ -13694,6 +13706,15 @@ SDValue X86TargetLowering::lowerUINT_TO_
   MVT SVT = N0.getSimpleValueType();
   SDLoc dl(Op);
 
+  if (SVT.getVectorElementType() == MVT::i1) {
+    if (SVT == MVT::v2i1)
+      return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
+                         DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, N0));
+    MVT IntegerVT = MVT::getVectorVT(MVT::i32, SVT.getVectorNumElements());
+    return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
+                       DAG.getNode(ISD::ZERO_EXTEND, dl, IntegerVT, N0));
+  }
+
   switch (SVT.SimpleTy) {
   default:
     llvm_unreachable("Custom UINT_TO_FP is not supported!");

Modified: llvm/branches/release_39/test/CodeGen/X86/avx512-cvt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_39/test/CodeGen/X86/avx512-cvt.ll?rev=277508&r1=277507&r2=277508&view=diff
==============================================================================
--- llvm/branches/release_39/test/CodeGen/X86/avx512-cvt.ll (original)
+++ llvm/branches/release_39/test/CodeGen/X86/avx512-cvt.ll Tue Aug  2 14:41:53 2016
@@ -744,6 +744,36 @@ define <8 x double> @sitofp_8i8_double(<
   ret <8 x double> %1
 }
 
+define <16 x double> @sitofp_16i1_double(<16 x double> %a) {
+; KNL-LABEL: sitofp_16i1_double:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpxord %zmm2, %zmm2, %zmm2
+; KNL-NEXT:    vcmpltpd %zmm1, %zmm2, %k1
+; KNL-NEXT:    vcmpltpd %zmm0, %zmm2, %k2
+; KNL-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
+; KNL-NEXT:    vmovdqa64 %zmm1, %zmm0 {%k2} {z}
+; KNL-NEXT:    vpmovqd %zmm0, %ymm0
+; KNL-NEXT:    vcvtdq2pd %ymm0, %zmm0
+; KNL-NEXT:    vmovdqa64 %zmm1, %zmm1 {%k1} {z}
+; KNL-NEXT:    vpmovqd %zmm1, %ymm1
+; KNL-NEXT:    vcvtdq2pd %ymm1, %zmm1
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: sitofp_16i1_double:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpxord %zmm2, %zmm2, %zmm2
+; SKX-NEXT:    vcmpltpd %zmm1, %zmm2, %k0
+; SKX-NEXT:    vcmpltpd %zmm0, %zmm2, %k1
+; SKX-NEXT:    vpmovm2d %k1, %ymm0
+; SKX-NEXT:    vcvtdq2pd %ymm0, %zmm0
+; SKX-NEXT:    vpmovm2d %k0, %ymm1
+; SKX-NEXT:    vcvtdq2pd %ymm1, %zmm1
+; SKX-NEXT:    retq
+  %cmpres = fcmp ogt <16 x double> %a, zeroinitializer
+  %1 = sitofp <16 x i1> %cmpres to <16 x double>
+  ret <16 x double> %1
+}
+
 define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
 ; KNL-LABEL: sitofp_8i1_double:
 ; KNL:       ## BB#0:
@@ -767,6 +797,130 @@ define <8 x double> @sitofp_8i1_double(<
   ret <8 x double> %1
 }
 
+define <8 x float> @sitofp_8i1_float(<8 x float> %a) {
+; KNL-LABEL: sitofp_8i1_float:
+; KNL:       ## BB#0:
+; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; KNL-NEXT:    vxorps %ymm1, %ymm1, %ymm1
+; KNL-NEXT:    vcmpltps %zmm0, %zmm1, %k1
+; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0
+; KNL-NEXT:    vmovdqa64 %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT:    vpmovqd %zmm0, %ymm0
+; KNL-NEXT:    vcvtdq2ps %ymm0, %ymm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: sitofp_8i1_float:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpxord %ymm1, %ymm1, %ymm1
+; SKX-NEXT:    vcmpltps %ymm0, %ymm1, %k0
+; SKX-NEXT:    vpmovm2d %k0, %ymm0
+; SKX-NEXT:    vcvtdq2ps %ymm0, %ymm0
+; SKX-NEXT:    retq
+  %cmpres = fcmp ogt <8 x float> %a, zeroinitializer
+  %1 = sitofp <8 x i1> %cmpres to <8 x float>
+  ret <8 x float> %1
+}
+
+define <4 x float> @sitofp_4i1_float(<4 x float> %a) {
+; KNL-LABEL: sitofp_4i1_float:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; KNL-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0
+; KNL-NEXT:    vcvtdq2ps %xmm0, %xmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: sitofp_4i1_float:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpxord %xmm1, %xmm1, %xmm1
+; SKX-NEXT:    vcmpltps %xmm0, %xmm1, %k0
+; SKX-NEXT:    vpmovm2d %k0, %xmm0
+; SKX-NEXT:    vcvtdq2ps %xmm0, %xmm0
+; SKX-NEXT:    retq
+  %cmpres = fcmp ogt <4 x float> %a, zeroinitializer
+  %1 = sitofp <4 x i1> %cmpres to <4 x float>
+  ret <4 x float> %1
+}
+
+define <4 x double> @sitofp_4i1_double(<4 x double> %a) {
+; KNL-LABEL: sitofp_4i1_double:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
+; KNL-NEXT:    vcmpltpd %ymm0, %ymm1, %ymm0
+; KNL-NEXT:    vpmovqd %zmm0, %ymm0
+; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
+; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT:    vcvtdq2pd %xmm0, %ymm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: sitofp_4i1_double:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpxord %ymm1, %ymm1, %ymm1
+; SKX-NEXT:    vcmpltpd %ymm0, %ymm1, %k0
+; SKX-NEXT:    vpmovm2d %k0, %xmm0
+; SKX-NEXT:    vcvtdq2pd %xmm0, %ymm0
+; SKX-NEXT:    retq
+  %cmpres = fcmp ogt <4 x double> %a, zeroinitializer
+  %1 = sitofp <4 x i1> %cmpres to <4 x double>
+  ret <4 x double> %1
+}
+
+define <2 x float> @sitofp_2i1_float(<2 x float> %a) {
+; KNL-LABEL: sitofp_2i1_float:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; KNL-NEXT:    vcmpltps %xmm0, %xmm1, %xmm0
+; KNL-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; KNL-NEXT:    vpsllq $32, %xmm0, %xmm0
+; KNL-NEXT:    vpsrad $31, %xmm0, %xmm1
+; KNL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; KNL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; KNL-NEXT:    vpextrq $1, %xmm0, %rax
+; KNL-NEXT:    xorl %ecx, %ecx
+; KNL-NEXT:    testb $1, %al
+; KNL-NEXT:    movl $-1, %eax
+; KNL-NEXT:    movl $0, %edx
+; KNL-NEXT:    cmovnel %eax, %edx
+; KNL-NEXT:    vcvtsi2ssl %edx, %xmm0, %xmm1
+; KNL-NEXT:    vmovq %xmm0, %rdx
+; KNL-NEXT:    testb $1, %dl
+; KNL-NEXT:    cmovnel %eax, %ecx
+; KNL-NEXT:    vcvtsi2ssl %ecx, %xmm0, %xmm0
+; KNL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: sitofp_2i1_float:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpxord %xmm1, %xmm1, %xmm1
+; SKX-NEXT:    vcmpltps %xmm0, %xmm1, %k0
+; SKX-NEXT:    vpmovm2d %k0, %xmm0
+; SKX-NEXT:    vcvtdq2ps %xmm0, %xmm0
+; SKX-NEXT:    retq
+  %cmpres = fcmp ogt <2 x float> %a, zeroinitializer
+  %1 = sitofp <2 x i1> %cmpres to <2 x float>
+  ret <2 x float> %1
+}
+
+define <2 x double> @sitofp_2i1_double(<2 x double> %a) {
+; KNL-LABEL: sitofp_2i1_double:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
+; KNL-NEXT:    vcmpltpd %xmm0, %xmm1, %xmm0
+; KNL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; KNL-NEXT:    vcvtdq2pd %xmm0, %xmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: sitofp_2i1_double:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpxord %xmm1, %xmm1, %xmm1
+; SKX-NEXT:    vcmpltpd %xmm0, %xmm1, %k0
+; SKX-NEXT:    vpmovm2q %k0, %xmm0
+; SKX-NEXT:    vcvtqq2pd %xmm0, %xmm0
+; SKX-NEXT:    retq
+  %cmpres = fcmp ogt <2 x double> %a, zeroinitializer
+  %1 = sitofp <2 x i1> %cmpres to <2 x double>
+  ret <2 x double> %1
+}
+
 define <16 x float> @uitofp_16i8(<16 x i8>%a) {
 ; ALL-LABEL: uitofp_16i8:
 ; ALL:       ## BB#0:
@@ -787,3 +941,196 @@ define <16 x float> @uitofp_16i16(<16 x
   ret <16 x float>%b
 }
 
+define <16 x float> @uitofp_16i1_float(<16 x i32> %a) {
+; ALL-LABEL: uitofp_16i1_float:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpxord %zmm1, %zmm1, %zmm1
+; ALL-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1
+; ALL-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; ALL-NEXT:    vcvtudq2ps %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %mask = icmp slt <16 x i32> %a, zeroinitializer
+  %1 = uitofp <16 x i1> %mask to <16 x float>
+  ret <16 x float> %1
+}
+
+define <16 x double> @uitofp_16i1_double(<16 x i32> %a) {
+; KNL-LABEL: uitofp_16i1_double:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpxord %zmm1, %zmm1, %zmm1
+; KNL-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1
+; KNL-NEXT:    movq {{.*}}(%rip), %rax
+; KNL-NEXT:    vpbroadcastq %rax, %zmm0 {%k1} {z}
+; KNL-NEXT:    vpmovqd %zmm0, %ymm0
+; KNL-NEXT:    vcvtudq2pd %ymm0, %zmm0
+; KNL-NEXT:    kshiftrw $8, %k1, %k1
+; KNL-NEXT:    vpbroadcastq %rax, %zmm1 {%k1} {z}
+; KNL-NEXT:    vpmovqd %zmm1, %ymm1
+; KNL-NEXT:    vcvtudq2pd %ymm1, %zmm1
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: uitofp_16i1_double:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpxord %zmm1, %zmm1, %zmm1
+; SKX-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1
+; SKX-NEXT:    movl {{.*}}(%rip), %eax
+; SKX-NEXT:    vpbroadcastd %eax, %ymm0 {%k1} {z}
+; SKX-NEXT:    vcvtudq2pd %ymm0, %zmm0
+; SKX-NEXT:    kshiftrw $8, %k1, %k1
+; SKX-NEXT:    vpbroadcastd %eax, %ymm1 {%k1} {z}
+; SKX-NEXT:    vcvtudq2pd %ymm1, %zmm1
+; SKX-NEXT:    retq
+  %mask = icmp slt <16 x i32> %a, zeroinitializer
+  %1 = uitofp <16 x i1> %mask to <16 x double>
+  ret <16 x double> %1
+}
+
+define <8 x float> @uitofp_8i1_float(<8 x i32> %a) {
+; KNL-LABEL: uitofp_8i1_float:
+; KNL:       ## BB#0:
+; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; KNL-NEXT:    vpxor %ymm1, %ymm1, %ymm1
+; KNL-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1
+; KNL-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT:    vpmovqd %zmm0, %ymm0
+; KNL-NEXT:    vcvtudq2ps %zmm0, %zmm0
+; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: uitofp_8i1_float:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpxord %ymm1, %ymm1, %ymm1
+; SKX-NEXT:    vpcmpgtd %ymm0, %ymm1, %k1
+; SKX-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
+; SKX-NEXT:    vcvtudq2ps %ymm0, %ymm0
+; SKX-NEXT:    retq
+  %mask = icmp slt <8 x i32> %a, zeroinitializer
+  %1 = uitofp <8 x i1> %mask to <8 x float>
+  ret <8 x float> %1
+}
+
+define <8 x double> @uitofp_8i1_double(<8 x i32> %a) {
+; KNL-LABEL: uitofp_8i1_double:
+; KNL:       ## BB#0:
+; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; KNL-NEXT:    vpxor %ymm1, %ymm1, %ymm1
+; KNL-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1
+; KNL-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT:    vpmovqd %zmm0, %ymm0
+; KNL-NEXT:    vcvtudq2pd %ymm0, %zmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: uitofp_8i1_double:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpxord %ymm1, %ymm1, %ymm1
+; SKX-NEXT:    vpcmpgtd %ymm0, %ymm1, %k1
+; SKX-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
+; SKX-NEXT:    vcvtudq2pd %ymm0, %zmm0
+; SKX-NEXT:    retq
+  %mask = icmp slt <8 x i32> %a, zeroinitializer
+  %1 = uitofp <8 x i1> %mask to <8 x double>
+  ret <8 x double> %1
+}
+
+define <4 x float> @uitofp_4i1_float(<4 x i32> %a) {
+; KNL-LABEL: uitofp_4i1_float:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; KNL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; KNL-NEXT:    vpsrld $31, %xmm0, %xmm0
+; KNL-NEXT:    vcvtudq2ps %zmm0, %zmm0
+; KNL-NEXT:    ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: uitofp_4i1_float:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpxord %xmm1, %xmm1, %xmm1
+; SKX-NEXT:    vpcmpgtd %xmm0, %xmm1, %k1
+; SKX-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
+; SKX-NEXT:    vcvtudq2ps %xmm0, %xmm0
+; SKX-NEXT:    retq
+  %mask = icmp slt <4 x i32> %a, zeroinitializer
+  %1 = uitofp <4 x i1> %mask to <4 x float>
+  ret <4 x float> %1
+}
+
+define <4 x double> @uitofp_4i1_double(<4 x i32> %a) {
+; KNL-LABEL: uitofp_4i1_double:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; KNL-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; KNL-NEXT:    vpsrld $31, %xmm0, %xmm0
+; KNL-NEXT:    vcvtudq2pd %ymm0, %zmm0
+; KNL-NEXT:    ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: uitofp_4i1_double:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpxord %xmm1, %xmm1, %xmm1
+; SKX-NEXT:    vpcmpgtd %xmm0, %xmm1, %k1
+; SKX-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
+; SKX-NEXT:    vcvtudq2pd %xmm0, %ymm0
+; SKX-NEXT:    retq
+  %mask = icmp slt <4 x i32> %a, zeroinitializer
+  %1 = uitofp <4 x i1> %mask to <4 x double>
+  ret <4 x double> %1
+}
+
+define <2 x float> @uitofp_2i1_float(<2 x i32> %a) {
+; KNL-LABEL: uitofp_2i1_float:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; KNL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; KNL-NEXT:    vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
+; KNL-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; KNL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; KNL-NEXT:    vpextrq $1, %xmm0, %rax
+; KNL-NEXT:    andl $1, %eax
+; KNL-NEXT:    vcvtsi2ssl %eax, %xmm0, %xmm1
+; KNL-NEXT:    vmovq %xmm0, %rax
+; KNL-NEXT:    andl $1, %eax
+; KNL-NEXT:    vcvtsi2ssl %eax, %xmm0, %xmm0
+; KNL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: uitofp_2i1_float:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpxord %xmm1, %xmm1, %xmm1
+; SKX-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; SKX-NEXT:    vpcmpltuq %xmm1, %xmm0, %k1
+; SKX-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
+; SKX-NEXT:    vcvtudq2ps %xmm0, %xmm0
+; SKX-NEXT:    retq
+  %mask = icmp ult <2 x i32> %a, zeroinitializer
+  %1 = uitofp <2 x i1> %mask to <2 x float>
+  ret <2 x float> %1
+}
+
+define <2 x double> @uitofp_2i1_double(<2 x i32> %a) {
+; KNL-LABEL: uitofp_2i1_double:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; KNL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; KNL-NEXT:    vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
+; KNL-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; KNL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; KNL-NEXT:    vpsrlq $63, %xmm0, %xmm0
+; KNL-NEXT:    vpextrq $1, %xmm0, %rax
+; KNL-NEXT:    vcvtusi2sdq %rax, %xmm0, %xmm1
+; KNL-NEXT:    vmovq %xmm0, %rax
+; KNL-NEXT:    vcvtusi2sdq %rax, %xmm0, %xmm0
+; KNL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: uitofp_2i1_double:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpxord %xmm1, %xmm1, %xmm1
+; SKX-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; SKX-NEXT:    vpcmpltuq %xmm1, %xmm0, %k1
+; SKX-NEXT:    vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
+; SKX-NEXT:    vcvtuqq2pd %xmm0, %xmm0
+; SKX-NEXT:    retq
+  %mask = icmp ult <2 x i32> %a, zeroinitializer
+  %1 = uitofp <2 x i1> %mask to <2 x double>
+  ret <2 x double> %1
+}




More information about the llvm-branch-commits mailing list