[llvm] r312921 - X86 Tests: More AVX512 conversions tests. NFC

Zvi Rackover via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 11 08:54:38 PDT 2017


Author: zvi
Date: Mon Sep 11 08:54:38 2017
New Revision: 312921

URL: http://llvm.org/viewvc/llvm-project?rev=312921&view=rev
Log:
X86 Tests: More AVX512 conversions tests. NFC

Adding more tests for AVX512 fp<->int conversions that were missing.

Modified:
    llvm/trunk/test/CodeGen/X86/avx512-cvt.ll

Modified: llvm/trunk/test/CodeGen/X86/avx512-cvt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-cvt.ll?rev=312921&r1=312920&r2=312921&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-cvt.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-cvt.ll Mon Sep 11 08:54:38 2017
@@ -88,6 +88,32 @@ define <4 x double> @slto4f64(<4 x i64>
   ret <4 x double> %b
 }
 
+define <2 x double> @slto2f64(<2 x i64> %a) {
+; NODQ-LABEL: slto2f64:
+; NODQ:       # BB#0:
+; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
+; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm1, %xmm1
+; NODQ-NEXT:    vmovq %xmm0, %rax
+; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm2, %xmm0
+; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; NODQ-NEXT:    retq
+;
+; VLDQ-LABEL: slto2f64:
+; VLDQ:       # BB#0:
+; VLDQ-NEXT:    vcvtqq2pd %xmm0, %xmm0
+; VLDQ-NEXT:    retq
+;
+; AVX512DQ-LABEL: slto2f64:
+; AVX512DQ:       # BB#0:
+; AVX512DQ-NEXT:    # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; AVX512DQ-NEXT:    vcvtqq2pd %zmm0, %zmm0
+; AVX512DQ-NEXT:    # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; AVX512DQ-NEXT:    vzeroupper
+; AVX512DQ-NEXT:    retq
+  %b = sitofp <2 x i64> %a to <2 x double>
+  ret <2 x double> %b
+}
+
 define <2 x float> @sltof2f32(<2 x i64> %a) {
 ; NODQ-LABEL: sltof2f32:
 ; NODQ:       # BB#0:
@@ -430,6 +456,184 @@ define <8 x double> @ulto8f64(<8 x i64>
   ret <8 x double> %b
 }
 
+define <16 x double> @ulto16f64(<16 x i64> %a) {
+; KNL-LABEL: ulto16f64:
+; KNL:       # BB#0:
+; KNL-NEXT:    vextracti32x4 $3, %zmm0, %xmm2
+; KNL-NEXT:    vpextrq $1, %xmm2, %rax
+; KNL-NEXT:    vcvtusi2sdq %rax, %xmm3, %xmm3
+; KNL-NEXT:    vmovq %xmm2, %rax
+; KNL-NEXT:    vcvtusi2sdq %rax, %xmm4, %xmm2
+; KNL-NEXT:    vextracti32x4 $2, %zmm0, %xmm4
+; KNL-NEXT:    vpextrq $1, %xmm4, %rax
+; KNL-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; KNL-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
+; KNL-NEXT:    vmovq %xmm4, %rax
+; KNL-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
+; KNL-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm4[0],xmm3[0]
+; KNL-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; KNL-NEXT:    vpextrq $1, %xmm3, %rax
+; KNL-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
+; KNL-NEXT:    vmovq %xmm3, %rax
+; KNL-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
+; KNL-NEXT:    vpextrq $1, %xmm0, %rax
+; KNL-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; KNL-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
+; KNL-NEXT:    vmovq %xmm0, %rax
+; KNL-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm0
+; KNL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
+; KNL-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; KNL-NEXT:    vextracti32x4 $3, %zmm1, %xmm3
+; KNL-NEXT:    vpextrq $1, %xmm3, %rax
+; KNL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; KNL-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm2
+; KNL-NEXT:    vmovq %xmm3, %rax
+; KNL-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
+; KNL-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; KNL-NEXT:    vextracti32x4 $2, %zmm1, %xmm3
+; KNL-NEXT:    vpextrq $1, %xmm3, %rax
+; KNL-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
+; KNL-NEXT:    vmovq %xmm3, %rax
+; KNL-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
+; KNL-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; KNL-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; KNL-NEXT:    vextracti128 $1, %ymm1, %xmm3
+; KNL-NEXT:    vpextrq $1, %xmm3, %rax
+; KNL-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
+; KNL-NEXT:    vmovq %xmm3, %rax
+; KNL-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
+; KNL-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; KNL-NEXT:    vpextrq $1, %xmm1, %rax
+; KNL-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
+; KNL-NEXT:    vmovq %xmm1, %rax
+; KNL-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm1
+; KNL-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
+; KNL-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; KNL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm1, %zmm1
+; KNL-NEXT:    retq
+;
+; DQ-LABEL: ulto16f64:
+; DQ:       # BB#0:
+; DQ-NEXT:    vcvtuqq2pd %zmm0, %zmm0
+; DQ-NEXT:    vcvtuqq2pd %zmm1, %zmm1
+; DQ-NEXT:    retq
+;
+; VLNODQ-LABEL: ulto16f64:
+; VLNODQ:       # BB#0:
+; VLNODQ-NEXT:    vextracti32x4 $3, %zmm0, %xmm2
+; VLNODQ-NEXT:    vpextrq $1, %xmm2, %rax
+; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm3, %xmm3
+; VLNODQ-NEXT:    vmovq %xmm2, %rax
+; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm4, %xmm2
+; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; VLNODQ-NEXT:    vextracti32x4 $2, %zmm0, %xmm3
+; VLNODQ-NEXT:    vpextrq $1, %xmm3, %rax
+; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm4, %xmm4
+; VLNODQ-NEXT:    vmovq %xmm3, %rax
+; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
+; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; VLNODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; VLNODQ-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; VLNODQ-NEXT:    vpextrq $1, %xmm3, %rax
+; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
+; VLNODQ-NEXT:    vmovq %xmm3, %rax
+; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
+; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; VLNODQ-NEXT:    vpextrq $1, %xmm0, %rax
+; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
+; VLNODQ-NEXT:    vmovq %xmm0, %rax
+; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm0
+; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
+; VLNODQ-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; VLNODQ-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; VLNODQ-NEXT:    vextracti32x4 $3, %zmm1, %xmm2
+; VLNODQ-NEXT:    vpextrq $1, %xmm2, %rax
+; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
+; VLNODQ-NEXT:    vmovq %xmm2, %rax
+; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm2
+; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; VLNODQ-NEXT:    vextracti32x4 $2, %zmm1, %xmm3
+; VLNODQ-NEXT:    vpextrq $1, %xmm3, %rax
+; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
+; VLNODQ-NEXT:    vmovq %xmm3, %rax
+; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
+; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; VLNODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; VLNODQ-NEXT:    vextracti128 $1, %ymm1, %xmm3
+; VLNODQ-NEXT:    vpextrq $1, %xmm3, %rax
+; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
+; VLNODQ-NEXT:    vmovq %xmm3, %rax
+; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
+; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; VLNODQ-NEXT:    vpextrq $1, %xmm1, %rax
+; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
+; VLNODQ-NEXT:    vmovq %xmm1, %rax
+; VLNODQ-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm1
+; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
+; VLNODQ-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; VLNODQ-NEXT:    vinsertf64x4 $1, %ymm2, %zmm1, %zmm1
+; VLNODQ-NEXT:    retq
+;
+; AVX512BW-LABEL: ulto16f64:
+; AVX512BW:       # BB#0:
+; AVX512BW-NEXT:    vextracti32x4 $3, %zmm0, %xmm2
+; AVX512BW-NEXT:    vpextrq $1, %xmm2, %rax
+; AVX512BW-NEXT:    vcvtusi2sdq %rax, %xmm3, %xmm3
+; AVX512BW-NEXT:    vmovq %xmm2, %rax
+; AVX512BW-NEXT:    vcvtusi2sdq %rax, %xmm4, %xmm2
+; AVX512BW-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512BW-NEXT:    vextracti32x4 $2, %zmm0, %xmm3
+; AVX512BW-NEXT:    vpextrq $1, %xmm3, %rax
+; AVX512BW-NEXT:    vcvtusi2sdq %rax, %xmm4, %xmm4
+; AVX512BW-NEXT:    vmovq %xmm3, %rax
+; AVX512BW-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
+; AVX512BW-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX512BW-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX512BW-NEXT:    vpextrq $1, %xmm3, %rax
+; AVX512BW-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
+; AVX512BW-NEXT:    vmovq %xmm3, %rax
+; AVX512BW-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
+; AVX512BW-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX512BW-NEXT:    vpextrq $1, %xmm0, %rax
+; AVX512BW-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
+; AVX512BW-NEXT:    vmovq %xmm0, %rax
+; AVX512BW-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm0
+; AVX512BW-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
+; AVX512BW-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; AVX512BW-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512BW-NEXT:    vextracti32x4 $3, %zmm1, %xmm2
+; AVX512BW-NEXT:    vpextrq $1, %xmm2, %rax
+; AVX512BW-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
+; AVX512BW-NEXT:    vmovq %xmm2, %rax
+; AVX512BW-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm2
+; AVX512BW-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512BW-NEXT:    vextracti32x4 $2, %zmm1, %xmm3
+; AVX512BW-NEXT:    vpextrq $1, %xmm3, %rax
+; AVX512BW-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
+; AVX512BW-NEXT:    vmovq %xmm3, %rax
+; AVX512BW-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
+; AVX512BW-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX512BW-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX512BW-NEXT:    vextracti128 $1, %ymm1, %xmm3
+; AVX512BW-NEXT:    vpextrq $1, %xmm3, %rax
+; AVX512BW-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
+; AVX512BW-NEXT:    vmovq %xmm3, %rax
+; AVX512BW-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm3
+; AVX512BW-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX512BW-NEXT:    vpextrq $1, %xmm1, %rax
+; AVX512BW-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm4
+; AVX512BW-NEXT:    vmovq %xmm1, %rax
+; AVX512BW-NEXT:    vcvtusi2sdq %rax, %xmm5, %xmm1
+; AVX512BW-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
+; AVX512BW-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; AVX512BW-NEXT:    vinsertf64x4 $1, %ymm2, %zmm1, %zmm1
+; AVX512BW-NEXT:    retq
+  %b = uitofp <16 x i64> %a to <16 x double>
+  ret <16 x double> %b
+}
+
 define <16 x i32> @f64to16si(<16 x float> %a) nounwind {
 ; ALL-LABEL: f64to16si:
 ; ALL:       # BB#0:
@@ -974,6 +1178,558 @@ define <16 x double> @uito16f64(<16 x i3
   %b = uitofp <16 x i32> %a to <16 x double>
   ret <16 x double> %b
 }
+
+define <8 x float> @slto8f32(<8 x i64> %a) {
+; KNL-LABEL: slto8f32:
+; KNL:       # BB#0:
+; KNL-NEXT:    vextracti32x4 $2, %zmm0, %xmm1
+; KNL-NEXT:    vpextrq $1, %xmm1, %rax
+; KNL-NEXT:    vcvtsi2ssq %rax, %xmm2, %xmm2
+; KNL-NEXT:    vmovq %xmm1, %rax
+; KNL-NEXT:    vcvtsi2ssq %rax, %xmm3, %xmm1
+; KNL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
+; KNL-NEXT:    vextracti32x4 $3, %zmm0, %xmm2
+; KNL-NEXT:    vmovq %xmm2, %rax
+; KNL-NEXT:    vcvtsi2ssq %rax, %xmm3, %xmm3
+; KNL-NEXT:    vpextrq $1, %xmm2, %rax
+; KNL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
+; KNL-NEXT:    vcvtsi2ssq %rax, %xmm4, %xmm2
+; KNL-NEXT:    vpextrq $1, %xmm0, %rax
+; KNL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
+; KNL-NEXT:    vcvtsi2ssq %rax, %xmm4, %xmm2
+; KNL-NEXT:    vmovq %xmm0, %rax
+; KNL-NEXT:    vcvtsi2ssq %rax, %xmm4, %xmm3
+; KNL-NEXT:    vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
+; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; KNL-NEXT:    vmovq %xmm0, %rax
+; KNL-NEXT:    vcvtsi2ssq %rax, %xmm4, %xmm3
+; KNL-NEXT:    vpextrq $1, %xmm0, %rax
+; KNL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm2[0,1],xmm3[0],xmm2[3]
+; KNL-NEXT:    vcvtsi2ssq %rax, %xmm4, %xmm2
+; KNL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm2[0]
+; KNL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; KNL-NEXT:    retq
+;
+; DQ-LABEL: slto8f32:
+; DQ:       # BB#0:
+; DQ-NEXT:    vcvtqq2ps %zmm0, %ymm0
+; DQ-NEXT:    retq
+;
+; VLNODQ-LABEL: slto8f32:
+; VLNODQ:       # BB#0:
+; VLNODQ-NEXT:    vextracti32x4 $2, %zmm0, %xmm1
+; VLNODQ-NEXT:    vpextrq $1, %xmm1, %rax
+; VLNODQ-NEXT:    vcvtsi2ssq %rax, %xmm2, %xmm2
+; VLNODQ-NEXT:    vmovq %xmm1, %rax
+; VLNODQ-NEXT:    vcvtsi2ssq %rax, %xmm3, %xmm1
+; VLNODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
+; VLNODQ-NEXT:    vextracti32x4 $3, %zmm0, %xmm2
+; VLNODQ-NEXT:    vmovq %xmm2, %rax
+; VLNODQ-NEXT:    vcvtsi2ssq %rax, %xmm3, %xmm3
+; VLNODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
+; VLNODQ-NEXT:    vpextrq $1, %xmm2, %rax
+; VLNODQ-NEXT:    vcvtsi2ssq %rax, %xmm4, %xmm2
+; VLNODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
+; VLNODQ-NEXT:    vpextrq $1, %xmm0, %rax
+; VLNODQ-NEXT:    vcvtsi2ssq %rax, %xmm4, %xmm2
+; VLNODQ-NEXT:    vmovq %xmm0, %rax
+; VLNODQ-NEXT:    vcvtsi2ssq %rax, %xmm4, %xmm3
+; VLNODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
+; VLNODQ-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; VLNODQ-NEXT:    vmovq %xmm0, %rax
+; VLNODQ-NEXT:    vcvtsi2ssq %rax, %xmm4, %xmm3
+; VLNODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
+; VLNODQ-NEXT:    vpextrq $1, %xmm0, %rax
+; VLNODQ-NEXT:    vcvtsi2ssq %rax, %xmm4, %xmm0
+; VLNODQ-NEXT:    vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
+; VLNODQ-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; VLNODQ-NEXT:    retq
+;
+; AVX512BW-LABEL: slto8f32:
+; AVX512BW:       # BB#0:
+; AVX512BW-NEXT:    vextracti32x4 $2, %zmm0, %xmm1
+; AVX512BW-NEXT:    vpextrq $1, %xmm1, %rax
+; AVX512BW-NEXT:    vcvtsi2ssq %rax, %xmm2, %xmm2
+; AVX512BW-NEXT:    vmovq %xmm1, %rax
+; AVX512BW-NEXT:    vcvtsi2ssq %rax, %xmm3, %xmm1
+; AVX512BW-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
+; AVX512BW-NEXT:    vextracti32x4 $3, %zmm0, %xmm2
+; AVX512BW-NEXT:    vmovq %xmm2, %rax
+; AVX512BW-NEXT:    vcvtsi2ssq %rax, %xmm3, %xmm3
+; AVX512BW-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
+; AVX512BW-NEXT:    vpextrq $1, %xmm2, %rax
+; AVX512BW-NEXT:    vcvtsi2ssq %rax, %xmm4, %xmm2
+; AVX512BW-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
+; AVX512BW-NEXT:    vpextrq $1, %xmm0, %rax
+; AVX512BW-NEXT:    vcvtsi2ssq %rax, %xmm4, %xmm2
+; AVX512BW-NEXT:    vmovq %xmm0, %rax
+; AVX512BW-NEXT:    vcvtsi2ssq %rax, %xmm4, %xmm3
+; AVX512BW-NEXT:    vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
+; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; AVX512BW-NEXT:    vmovq %xmm0, %rax
+; AVX512BW-NEXT:    vcvtsi2ssq %rax, %xmm4, %xmm3
+; AVX512BW-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
+; AVX512BW-NEXT:    vpextrq $1, %xmm0, %rax
+; AVX512BW-NEXT:    vcvtsi2ssq %rax, %xmm4, %xmm0
+; AVX512BW-NEXT:    vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
+; AVX512BW-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT:    retq
+  %b = sitofp <8 x i64> %a to <8 x float>
+  ret <8 x float> %b
+}
+
+define <16 x float> @slto16f32(<16 x i64> %a) {
+; NODQ-LABEL: slto16f32:
+; NODQ:       # BB#0:
+; NODQ-NEXT:    vextracti32x4 $2, %zmm1, %xmm2
+; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
+; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm3, %xmm3
+; NODQ-NEXT:    vmovq %xmm2, %rax
+; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm4, %xmm2
+; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3]
+; NODQ-NEXT:    vextracti32x4 $3, %zmm1, %xmm3
+; NODQ-NEXT:    vmovq %xmm3, %rax
+; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm4, %xmm4
+; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3]
+; NODQ-NEXT:    vpextrq $1, %xmm3, %rax
+; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm5, %xmm3
+; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0]
+; NODQ-NEXT:    vpextrq $1, %xmm1, %rax
+; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm5, %xmm3
+; NODQ-NEXT:    vmovq %xmm1, %rax
+; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm5, %xmm4
+; NODQ-NEXT:    vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
+; NODQ-NEXT:    vextracti128 $1, %ymm1, %xmm1
+; NODQ-NEXT:    vmovq %xmm1, %rax
+; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm5, %xmm4
+; NODQ-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
+; NODQ-NEXT:    vpextrq $1, %xmm1, %rax
+; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm5, %xmm1
+; NODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
+; NODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; NODQ-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
+; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
+; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm5, %xmm3
+; NODQ-NEXT:    vmovq %xmm2, %rax
+; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm5, %xmm2
+; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3]
+; NODQ-NEXT:    vextracti32x4 $3, %zmm0, %xmm3
+; NODQ-NEXT:    vmovq %xmm3, %rax
+; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm5, %xmm4
+; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3]
+; NODQ-NEXT:    vpextrq $1, %xmm3, %rax
+; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm5, %xmm3
+; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0]
+; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
+; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm5, %xmm3
+; NODQ-NEXT:    vmovq %xmm0, %rax
+; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm5, %xmm4
+; NODQ-NEXT:    vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
+; NODQ-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; NODQ-NEXT:    vmovq %xmm0, %rax
+; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm5, %xmm4
+; NODQ-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
+; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
+; NODQ-NEXT:    vcvtsi2ssq %rax, %xmm5, %xmm0
+; NODQ-NEXT:    vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0]
+; NODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; NODQ-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; NODQ-NEXT:    retq
+;
+; DQ-LABEL: slto16f32:
+; DQ:       # BB#0:
+; DQ-NEXT:    vcvtqq2ps %zmm0, %ymm0
+; DQ-NEXT:    vcvtqq2ps %zmm1, %ymm1
+; DQ-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; DQ-NEXT:    retq
+  %b = sitofp <16 x i64> %a to <16 x float>
+  ret <16 x float> %b
+}
+
+define <8 x double> @slto8f64(<8 x i64> %a) {
+; NODQ-LABEL: slto8f64:
+; NODQ:       # BB#0:
+; NODQ-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
+; NODQ-NEXT:    vpextrq $1, %xmm1, %rax
+; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm2, %xmm2
+; NODQ-NEXT:    vmovq %xmm1, %rax
+; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm1
+; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; NODQ-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
+; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
+; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm3
+; NODQ-NEXT:    vmovq %xmm2, %rax
+; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm2
+; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; NODQ-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; NODQ-NEXT:    vextracti128 $1, %ymm0, %xmm2
+; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
+; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm3
+; NODQ-NEXT:    vmovq %xmm2, %rax
+; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm2
+; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
+; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm3
+; NODQ-NEXT:    vmovq %xmm0, %rax
+; NODQ-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm0
+; NODQ-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; NODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; NODQ-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; NODQ-NEXT:    retq
+;
+; DQ-LABEL: slto8f64:
+; DQ:       # BB#0:
+; DQ-NEXT:    vcvtqq2pd %zmm0, %zmm0
+; DQ-NEXT:    retq
+  %b = sitofp <8 x i64> %a to <8 x double>
+  ret <8 x double> %b
+}
+
+define <16 x double> @slto16f64(<16 x i64> %a) {
+; KNL-LABEL: slto16f64:
+; KNL:       # BB#0:
+; KNL-NEXT:    vextracti32x4 $3, %zmm0, %xmm2
+; KNL-NEXT:    vpextrq $1, %xmm2, %rax
+; KNL-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm3
+; KNL-NEXT:    vmovq %xmm2, %rax
+; KNL-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm2
+; KNL-NEXT:    vextracti32x4 $2, %zmm0, %xmm4
+; KNL-NEXT:    vpextrq $1, %xmm4, %rax
+; KNL-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; KNL-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
+; KNL-NEXT:    vmovq %xmm4, %rax
+; KNL-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
+; KNL-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm4[0],xmm3[0]
+; KNL-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; KNL-NEXT:    vpextrq $1, %xmm3, %rax
+; KNL-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
+; KNL-NEXT:    vmovq %xmm3, %rax
+; KNL-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
+; KNL-NEXT:    vpextrq $1, %xmm0, %rax
+; KNL-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; KNL-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
+; KNL-NEXT:    vmovq %xmm0, %rax
+; KNL-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm0
+; KNL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
+; KNL-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; KNL-NEXT:    vextracti32x4 $3, %zmm1, %xmm3
+; KNL-NEXT:    vpextrq $1, %xmm3, %rax
+; KNL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; KNL-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm2
+; KNL-NEXT:    vmovq %xmm3, %rax
+; KNL-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
+; KNL-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; KNL-NEXT:    vextracti32x4 $2, %zmm1, %xmm3
+; KNL-NEXT:    vpextrq $1, %xmm3, %rax
+; KNL-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
+; KNL-NEXT:    vmovq %xmm3, %rax
+; KNL-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
+; KNL-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; KNL-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; KNL-NEXT:    vextracti128 $1, %ymm1, %xmm3
+; KNL-NEXT:    vpextrq $1, %xmm3, %rax
+; KNL-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
+; KNL-NEXT:    vmovq %xmm3, %rax
+; KNL-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
+; KNL-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; KNL-NEXT:    vpextrq $1, %xmm1, %rax
+; KNL-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
+; KNL-NEXT:    vmovq %xmm1, %rax
+; KNL-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm1
+; KNL-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
+; KNL-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; KNL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm1, %zmm1
+; KNL-NEXT:    retq
+;
+; DQ-LABEL: slto16f64:
+; DQ:       # BB#0:
+; DQ-NEXT:    vcvtqq2pd %zmm0, %zmm0
+; DQ-NEXT:    vcvtqq2pd %zmm1, %zmm1
+; DQ-NEXT:    retq
+;
+; VLNODQ-LABEL: slto16f64:
+; VLNODQ:       # BB#0:
+; VLNODQ-NEXT:    vextracti32x4 $3, %zmm0, %xmm2
+; VLNODQ-NEXT:    vpextrq $1, %xmm2, %rax
+; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm3
+; VLNODQ-NEXT:    vmovq %xmm2, %rax
+; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm2
+; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; VLNODQ-NEXT:    vextracti32x4 $2, %zmm0, %xmm3
+; VLNODQ-NEXT:    vpextrq $1, %xmm3, %rax
+; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm4
+; VLNODQ-NEXT:    vmovq %xmm3, %rax
+; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
+; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; VLNODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; VLNODQ-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; VLNODQ-NEXT:    vpextrq $1, %xmm3, %rax
+; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
+; VLNODQ-NEXT:    vmovq %xmm3, %rax
+; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
+; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; VLNODQ-NEXT:    vpextrq $1, %xmm0, %rax
+; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
+; VLNODQ-NEXT:    vmovq %xmm0, %rax
+; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm0
+; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
+; VLNODQ-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; VLNODQ-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; VLNODQ-NEXT:    vextracti32x4 $3, %zmm1, %xmm2
+; VLNODQ-NEXT:    vpextrq $1, %xmm2, %rax
+; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
+; VLNODQ-NEXT:    vmovq %xmm2, %rax
+; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm2
+; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; VLNODQ-NEXT:    vextracti32x4 $2, %zmm1, %xmm3
+; VLNODQ-NEXT:    vpextrq $1, %xmm3, %rax
+; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
+; VLNODQ-NEXT:    vmovq %xmm3, %rax
+; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
+; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; VLNODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; VLNODQ-NEXT:    vextracti128 $1, %ymm1, %xmm3
+; VLNODQ-NEXT:    vpextrq $1, %xmm3, %rax
+; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
+; VLNODQ-NEXT:    vmovq %xmm3, %rax
+; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
+; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; VLNODQ-NEXT:    vpextrq $1, %xmm1, %rax
+; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
+; VLNODQ-NEXT:    vmovq %xmm1, %rax
+; VLNODQ-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm1
+; VLNODQ-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
+; VLNODQ-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; VLNODQ-NEXT:    vinsertf64x4 $1, %ymm2, %zmm1, %zmm1
+; VLNODQ-NEXT:    retq
+;
+; AVX512BW-LABEL: slto16f64:
+; AVX512BW:       # BB#0:
+; AVX512BW-NEXT:    vextracti32x4 $3, %zmm0, %xmm2
+; AVX512BW-NEXT:    vpextrq $1, %xmm2, %rax
+; AVX512BW-NEXT:    vcvtsi2sdq %rax, %xmm3, %xmm3
+; AVX512BW-NEXT:    vmovq %xmm2, %rax
+; AVX512BW-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm2
+; AVX512BW-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512BW-NEXT:    vextracti32x4 $2, %zmm0, %xmm3
+; AVX512BW-NEXT:    vpextrq $1, %xmm3, %rax
+; AVX512BW-NEXT:    vcvtsi2sdq %rax, %xmm4, %xmm4
+; AVX512BW-NEXT:    vmovq %xmm3, %rax
+; AVX512BW-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
+; AVX512BW-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX512BW-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX512BW-NEXT:    vpextrq $1, %xmm3, %rax
+; AVX512BW-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
+; AVX512BW-NEXT:    vmovq %xmm3, %rax
+; AVX512BW-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
+; AVX512BW-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX512BW-NEXT:    vpextrq $1, %xmm0, %rax
+; AVX512BW-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
+; AVX512BW-NEXT:    vmovq %xmm0, %rax
+; AVX512BW-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm0
+; AVX512BW-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
+; AVX512BW-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; AVX512BW-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512BW-NEXT:    vextracti32x4 $3, %zmm1, %xmm2
+; AVX512BW-NEXT:    vpextrq $1, %xmm2, %rax
+; AVX512BW-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
+; AVX512BW-NEXT:    vmovq %xmm2, %rax
+; AVX512BW-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm2
+; AVX512BW-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX512BW-NEXT:    vextracti32x4 $2, %zmm1, %xmm3
+; AVX512BW-NEXT:    vpextrq $1, %xmm3, %rax
+; AVX512BW-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
+; AVX512BW-NEXT:    vmovq %xmm3, %rax
+; AVX512BW-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
+; AVX512BW-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX512BW-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX512BW-NEXT:    vextracti128 $1, %ymm1, %xmm3
+; AVX512BW-NEXT:    vpextrq $1, %xmm3, %rax
+; AVX512BW-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
+; AVX512BW-NEXT:    vmovq %xmm3, %rax
+; AVX512BW-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm3
+; AVX512BW-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX512BW-NEXT:    vpextrq $1, %xmm1, %rax
+; AVX512BW-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm4
+; AVX512BW-NEXT:    vmovq %xmm1, %rax
+; AVX512BW-NEXT:    vcvtsi2sdq %rax, %xmm5, %xmm1
+; AVX512BW-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
+; AVX512BW-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; AVX512BW-NEXT:    vinsertf64x4 $1, %ymm2, %zmm1, %zmm1
+; AVX512BW-NEXT:    retq
+  %b = sitofp <16 x i64> %a to <16 x double>
+  ret <16 x double> %b
+}
+
+define <8 x float> @ulto8f32(<8 x i64> %a) {
+; KNL-LABEL: ulto8f32:
+; KNL:       # BB#0:
+; KNL-NEXT:    vextracti32x4 $2, %zmm0, %xmm1
+; KNL-NEXT:    vpextrq $1, %xmm1, %rax
+; KNL-NEXT:    vcvtusi2ssq %rax, %xmm2, %xmm2
+; KNL-NEXT:    vmovq %xmm1, %rax
+; KNL-NEXT:    vcvtusi2ssq %rax, %xmm3, %xmm1
+; KNL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
+; KNL-NEXT:    vextracti32x4 $3, %zmm0, %xmm2
+; KNL-NEXT:    vmovq %xmm2, %rax
+; KNL-NEXT:    vcvtusi2ssq %rax, %xmm3, %xmm3
+; KNL-NEXT:    vpextrq $1, %xmm2, %rax
+; KNL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
+; KNL-NEXT:    vcvtusi2ssq %rax, %xmm4, %xmm2
+; KNL-NEXT:    vpextrq $1, %xmm0, %rax
+; KNL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
+; KNL-NEXT:    vcvtusi2ssq %rax, %xmm4, %xmm2
+; KNL-NEXT:    vmovq %xmm0, %rax
+; KNL-NEXT:    vcvtusi2ssq %rax, %xmm4, %xmm3
+; KNL-NEXT:    vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
+; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; KNL-NEXT:    vmovq %xmm0, %rax
+; KNL-NEXT:    vcvtusi2ssq %rax, %xmm4, %xmm3
+; KNL-NEXT:    vpextrq $1, %xmm0, %rax
+; KNL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm2[0,1],xmm3[0],xmm2[3]
+; KNL-NEXT:    vcvtusi2ssq %rax, %xmm4, %xmm2
+; KNL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm2[0]
+; KNL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; KNL-NEXT:    retq
+;
+; DQ-LABEL: ulto8f32:
+; DQ:       # BB#0:
+; DQ-NEXT:    vcvtuqq2ps %zmm0, %ymm0
+; DQ-NEXT:    retq
+;
+; VLNODQ-LABEL: ulto8f32:
+; VLNODQ:       # BB#0:
+; VLNODQ-NEXT:    vextracti32x4 $2, %zmm0, %xmm1
+; VLNODQ-NEXT:    vpextrq $1, %xmm1, %rax
+; VLNODQ-NEXT:    vcvtusi2ssq %rax, %xmm2, %xmm2
+; VLNODQ-NEXT:    vmovq %xmm1, %rax
+; VLNODQ-NEXT:    vcvtusi2ssq %rax, %xmm3, %xmm1
+; VLNODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
+; VLNODQ-NEXT:    vextracti32x4 $3, %zmm0, %xmm2
+; VLNODQ-NEXT:    vmovq %xmm2, %rax
+; VLNODQ-NEXT:    vcvtusi2ssq %rax, %xmm3, %xmm3
+; VLNODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
+; VLNODQ-NEXT:    vpextrq $1, %xmm2, %rax
+; VLNODQ-NEXT:    vcvtusi2ssq %rax, %xmm4, %xmm2
+; VLNODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
+; VLNODQ-NEXT:    vpextrq $1, %xmm0, %rax
+; VLNODQ-NEXT:    vcvtusi2ssq %rax, %xmm4, %xmm2
+; VLNODQ-NEXT:    vmovq %xmm0, %rax
+; VLNODQ-NEXT:    vcvtusi2ssq %rax, %xmm4, %xmm3
+; VLNODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
+; VLNODQ-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; VLNODQ-NEXT:    vmovq %xmm0, %rax
+; VLNODQ-NEXT:    vcvtusi2ssq %rax, %xmm4, %xmm3
+; VLNODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
+; VLNODQ-NEXT:    vpextrq $1, %xmm0, %rax
+; VLNODQ-NEXT:    vcvtusi2ssq %rax, %xmm4, %xmm0
+; VLNODQ-NEXT:    vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
+; VLNODQ-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; VLNODQ-NEXT:    retq
+;
+; AVX512BW-LABEL: ulto8f32:
+; AVX512BW:       # BB#0:
+; AVX512BW-NEXT:    vextracti32x4 $2, %zmm0, %xmm1
+; AVX512BW-NEXT:    vpextrq $1, %xmm1, %rax
+; AVX512BW-NEXT:    vcvtusi2ssq %rax, %xmm2, %xmm2
+; AVX512BW-NEXT:    vmovq %xmm1, %rax
+; AVX512BW-NEXT:    vcvtusi2ssq %rax, %xmm3, %xmm1
+; AVX512BW-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
+; AVX512BW-NEXT:    vextracti32x4 $3, %zmm0, %xmm2
+; AVX512BW-NEXT:    vmovq %xmm2, %rax
+; AVX512BW-NEXT:    vcvtusi2ssq %rax, %xmm3, %xmm3
+; AVX512BW-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
+; AVX512BW-NEXT:    vpextrq $1, %xmm2, %rax
+; AVX512BW-NEXT:    vcvtusi2ssq %rax, %xmm4, %xmm2
+; AVX512BW-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
+; AVX512BW-NEXT:    vpextrq $1, %xmm0, %rax
+; AVX512BW-NEXT:    vcvtusi2ssq %rax, %xmm4, %xmm2
+; AVX512BW-NEXT:    vmovq %xmm0, %rax
+; AVX512BW-NEXT:    vcvtusi2ssq %rax, %xmm4, %xmm3
+; AVX512BW-NEXT:    vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
+; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; AVX512BW-NEXT:    vmovq %xmm0, %rax
+; AVX512BW-NEXT:    vcvtusi2ssq %rax, %xmm4, %xmm3
+; AVX512BW-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
+; AVX512BW-NEXT:    vpextrq $1, %xmm0, %rax
+; AVX512BW-NEXT:    vcvtusi2ssq %rax, %xmm4, %xmm0
+; AVX512BW-NEXT:    vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
+; AVX512BW-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT:    retq
+  %b = uitofp <8 x i64> %a to <8 x float>
+  ret <8 x float> %b
+}
+
+define <16 x float> @ulto16f32(<16 x i64> %a) {
+; NODQ-LABEL: ulto16f32:
+; NODQ:       # BB#0:
+; NODQ-NEXT:    vextracti32x4 $2, %zmm1, %xmm2
+; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
+; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm3, %xmm3
+; NODQ-NEXT:    vmovq %xmm2, %rax
+; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm4, %xmm2
+; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3]
+; NODQ-NEXT:    vextracti32x4 $3, %zmm1, %xmm3
+; NODQ-NEXT:    vmovq %xmm3, %rax
+; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm4, %xmm4
+; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3]
+; NODQ-NEXT:    vpextrq $1, %xmm3, %rax
+; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm5, %xmm3
+; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0]
+; NODQ-NEXT:    vpextrq $1, %xmm1, %rax
+; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm5, %xmm3
+; NODQ-NEXT:    vmovq %xmm1, %rax
+; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm5, %xmm4
+; NODQ-NEXT:    vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
+; NODQ-NEXT:    vextracti128 $1, %ymm1, %xmm1
+; NODQ-NEXT:    vmovq %xmm1, %rax
+; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm5, %xmm4
+; NODQ-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
+; NODQ-NEXT:    vpextrq $1, %xmm1, %rax
+; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm5, %xmm1
+; NODQ-NEXT:    vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
+; NODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; NODQ-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
+; NODQ-NEXT:    vpextrq $1, %xmm2, %rax
+; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm5, %xmm3
+; NODQ-NEXT:    vmovq %xmm2, %rax
+; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm5, %xmm2
+; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3]
+; NODQ-NEXT:    vextracti32x4 $3, %zmm0, %xmm3
+; NODQ-NEXT:    vmovq %xmm3, %rax
+; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm5, %xmm4
+; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3]
+; NODQ-NEXT:    vpextrq $1, %xmm3, %rax
+; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm5, %xmm3
+; NODQ-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0]
+; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
+; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm5, %xmm3
+; NODQ-NEXT:    vmovq %xmm0, %rax
+; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm5, %xmm4
+; NODQ-NEXT:    vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
+; NODQ-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; NODQ-NEXT:    vmovq %xmm0, %rax
+; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm5, %xmm4
+; NODQ-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
+; NODQ-NEXT:    vpextrq $1, %xmm0, %rax
+; NODQ-NEXT:    vcvtusi2ssq %rax, %xmm5, %xmm0
+; NODQ-NEXT:    vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0]
+; NODQ-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; NODQ-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; NODQ-NEXT:    retq
+;
+; DQ-LABEL: ulto16f32:
+; DQ:       # BB#0:
+; DQ-NEXT:    vcvtuqq2ps %zmm0, %ymm0
+; DQ-NEXT:    vcvtuqq2ps %zmm1, %ymm1
+; DQ-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; DQ-NEXT:    retq
+  %b = uitofp <16 x i64> %a to <16 x float>
+  ret <16 x float> %b
+}
+
 define <8 x double> @uito8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind {
 ; KNL-LABEL: uito8f64_mask:
 ; KNL:       # BB#0:
@@ -1070,6 +1826,15 @@ define <16 x float> @uito16f32(<16 x i32
   ret <16 x float> %b
 }
 
+define <8 x double> @uito8f64(<8 x i32> %a) {
+; ALL-LABEL: uito8f64:
+; ALL:       # BB#0:
+; ALL-NEXT:    vcvtudq2pd %ymm0, %zmm0
+; ALL-NEXT:    retq
+  %b = uitofp <8 x i32> %a to <8 x double>
+  ret <8 x double> %b
+}
+
 define <8 x float> @uito8f32(<8 x i32> %a) nounwind {
 ; NOVL-LABEL: uito8f32:
 ; NOVL:       # BB#0:
@@ -1217,6 +1982,18 @@ define <8 x double> @scto8f64(<8 x i8> %
   ret <8 x double> %1
 }
 
+define <16 x double> @scto16f64(<16 x i8> %a) {
+; ALL-LABEL: scto16f64:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpmovsxbd %xmm0, %zmm1
+; ALL-NEXT:    vcvtdq2pd %ymm1, %zmm0
+; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
+; ALL-NEXT:    vcvtdq2pd %ymm1, %zmm1
+; ALL-NEXT:    retq
+  %b = sitofp <16 x i8> %a to <16 x double>
+  ret <16 x double> %b
+}
+
 define <16 x double> @sbto16f64(<16 x double> %a) {
 ; NOVLDQ-LABEL: sbto16f64:
 ; NOVLDQ:       # BB#0:
@@ -1481,6 +2258,114 @@ define <16 x float> @ucto16f32(<16 x i8>
   ret <16 x float>%b
 }
 
+define <8 x double> @ucto8f64(<8 x i8> %a) {
+; ALL-LABEL: ucto8f64:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
+; ALL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; ALL-NEXT:    vcvtdq2pd %ymm0, %zmm0
+; ALL-NEXT:    retq
+  %b = uitofp <8 x i8> %a to <8 x double>
+  ret <8 x double> %b
+}
+
+define <16 x float> @swto16f32(<16 x i16> %a) {
+; ALL-LABEL: swto16f32:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpmovsxwd %ymm0, %zmm0
+; ALL-NEXT:    vcvtdq2ps %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %b = sitofp <16 x i16> %a to <16 x float>
+  ret <16 x float> %b
+}
+
+define <8 x double> @swto8f64(<8 x i16> %a) {
+; ALL-LABEL: swto8f64:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpmovsxwd %xmm0, %ymm0
+; ALL-NEXT:    vcvtdq2pd %ymm0, %zmm0
+; ALL-NEXT:    retq
+  %b = sitofp <8 x i16> %a to <8 x double>
+  ret <8 x double> %b
+}
+
+define <16 x double> @swto16f64(<16 x i16> %a) {
+; ALL-LABEL: swto16f64:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpmovsxwd %ymm0, %zmm1
+; ALL-NEXT:    vcvtdq2pd %ymm1, %zmm0
+; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
+; ALL-NEXT:    vcvtdq2pd %ymm1, %zmm1
+; ALL-NEXT:    retq
+  %b = sitofp <16 x i16> %a to <16 x double>
+  ret <16 x double> %b
+}
+
+define <16 x double> @ucto16f64(<16 x i8> %a) {
+; ALL-LABEL: ucto16f64:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
+; ALL-NEXT:    vcvtdq2pd %ymm1, %zmm0
+; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
+; ALL-NEXT:    vcvtdq2pd %ymm1, %zmm1
+; ALL-NEXT:    retq
+  %b = uitofp <16 x i8> %a to <16 x double>
+  ret <16 x double> %b
+}
+
+define <16 x float> @uwto16f32(<16 x i16> %a) {
+; ALL-LABEL: uwto16f32:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; ALL-NEXT:    vcvtdq2ps %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %b = uitofp <16 x i16> %a to <16 x float>
+  ret <16 x float> %b
+}
+
+define <8 x double> @uwto8f64(<8 x i16> %a) {
+; ALL-LABEL: uwto8f64:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; ALL-NEXT:    vcvtdq2pd %ymm0, %zmm0
+; ALL-NEXT:    retq
+  %b = uitofp <8 x i16> %a to <8 x double>
+  ret <8 x double> %b
+}
+
+define <16 x double> @uwto16f64(<16 x i16> %a) {
+; ALL-LABEL: uwto16f64:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; ALL-NEXT:    vcvtdq2pd %ymm1, %zmm0
+; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
+; ALL-NEXT:    vcvtdq2pd %ymm1, %zmm1
+; ALL-NEXT:    retq
+  %b = uitofp <16 x i16> %a to <16 x double>
+  ret <16 x double> %b
+}
+
+define <16 x float> @sito16f32(<16 x i32> %a) {
+; ALL-LABEL: sito16f32:
+; ALL:       # BB#0:
+; ALL-NEXT:    vcvtdq2ps %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %b = sitofp <16 x i32> %a to <16 x float>
+  ret <16 x float> %b
+}
+
+define <16 x double> @sito16f64(<16 x i32> %a) {
+; ALL-LABEL: sito16f64:
+; ALL:       # BB#0:
+; ALL-NEXT:    vcvtdq2pd %ymm0, %zmm2
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vcvtdq2pd %ymm0, %zmm1
+; ALL-NEXT:    vmovaps %zmm2, %zmm0
+; ALL-NEXT:    retq
+  %b = sitofp <16 x i32> %a to <16 x double>
+  ret <16 x double> %b
+}
+
 define <16 x float> @usto16f32(<16 x i16> %a) {
 ; ALL-LABEL: usto16f32:
 ; ALL:       # BB#0:




More information about the llvm-commits mailing list