[llvm] r337500 - [X86][AVX] Use extract_subvector to reduce vector op widths (PR36761)

Thu Jul 19 14:52:07 PDT 2018

Author: rksimon
Date: Thu Jul 19 14:52:06 2018
New Revision: 337500

URL: http://llvm.org/viewvc/llvm-project?rev=337500&view=rev
Log:
[X86][AVX] Use extract_subvector to reduce vector op widths (PR36761)

We have a number of cases where we fail to reduce vector op widths, performing the op in a larger vector and then extracting a subvector. This is often because by default it would create illegal types.

This peephole patch attempts to handle a few common cases detailed in PR36761, which typically involved extension+conversion to vX2f64 types.

Differential Revision: https://reviews.llvm.org/D49556

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll
    llvm/trunk/test/CodeGen/X86/trunc-subvector.ll
    llvm/trunk/test/CodeGen/X86/vec_fpext.ll
    llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=337500&r1=337499&r2=337500&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Jul 19 14:52:06 2018
@@ -39275,6 +39275,31 @@ static SDValue combineExtractSubvector(S
         OpVT, SDLoc(N),
         InVec.getNode()->ops().slice(IdxVal, OpVT.getVectorNumElements()));
 
+  // If we're extracting the lowest subvector and we're the only user,
+  // we may be able to perform this with a smaller vector width.
+  if (IdxVal == 0 && InVec.hasOneUse()) {
+    unsigned InOpcode = InVec.getOpcode();
+    if (OpVT == MVT::v2f64 && InVec.getValueType() == MVT::v4f64) {
+      // v2f64 CVTDQ2PD(v4i32).
+      if (InOpcode == ISD::SINT_TO_FP &&
+          InVec.getOperand(0).getValueType() == MVT::v4i32) {
+        return DAG.getNode(X86ISD::CVTSI2P, SDLoc(N), OpVT, InVec.getOperand(0));
+      }
+      // v2f64 CVTPS2PD(v4f32).
+      if (InOpcode == ISD::FP_EXTEND &&
+          InVec.getOperand(0).getValueType() == MVT::v4f32) {
+        return DAG.getNode(X86ISD::VFPEXT, SDLoc(N), OpVT, InVec.getOperand(0));
+      }
+    }
+    if ((InOpcode == X86ISD::VZEXT || InOpcode == X86ISD::VSEXT) &&
+        OpVT.is128BitVector() &&
+        InVec.getOperand(0).getSimpleValueType().is128BitVector()) {
+      unsigned ExtOp = InOpcode == X86ISD::VZEXT ? ISD::ZERO_EXTEND_VECTOR_INREG
+                                                 : ISD::SIGN_EXTEND_VECTOR_INREG;
+      return DAG.getNode(ExtOp, SDLoc(N), OpVT, InVec.getOperand(0));
+    }
+  }
+
   return SDValue();
 }
 

Modified: llvm/trunk/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll?rev=337500&r1=337499&r2=337500&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll (original)
+++ llvm/trunk/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll Thu Jul 19 14:52:06 2018
@@ -131,7 +131,7 @@ define <32 x i1> @shuf32i1_3_6_22_12_3_7
 ; AVX256VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX256VL-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
 ; AVX256VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX256VL-NEXT:    vpmovsxbw %xmm1, %ymm1
+; AVX256VL-NEXT:    vpmovsxbw %xmm1, %xmm1
 ; AVX256VL-NEXT:    vpmovsxwd %xmm1, %ymm1
 ; AVX256VL-NEXT:    vptestmd %ymm1, %ymm1, %k1
 ; AVX256VL-NEXT:    vpmovsxbw %xmm0, %ymm0

Modified: llvm/trunk/test/CodeGen/X86/trunc-subvector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/trunc-subvector.ll?rev=337500&r1=337499&r2=337500&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/trunc-subvector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/trunc-subvector.ll Thu Jul 19 14:52:06 2018
@@ -49,8 +49,7 @@ define <2 x i32> @test3(<8 x i32> %v) {
 ; AVX2-LABEL: test3:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
-; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
-; AVX2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX2-NEXT:    vpmovsxdq %xmm0, %xmm0
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
@@ -76,8 +75,7 @@ define <2 x i32> @test4(<8 x i32> %v) {
 ;
 ; AVX2-LABEL: test4:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
-; AVX2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX2-NEXT:    vpmovsxdq %xmm0, %xmm0
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
@@ -175,8 +173,7 @@ define <2 x i32> @test8(<8 x i32> %v) {
 ; AVX2-LABEL: test8:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
-; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; AVX2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
@@ -201,8 +198,7 @@ define <2 x i32> @test9(<8 x i32> %v) {
 ;
 ; AVX2-LABEL: test9:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; AVX2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;

Modified: llvm/trunk/test/CodeGen/X86/vec_fpext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_fpext.ll?rev=337500&r1=337499&r2=337500&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_fpext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_fpext.ll Thu Jul 19 14:52:06 2018
@@ -14,16 +14,12 @@ define <2 x double> @fpext_4f32_to_2f64(
 ;
 ; AVX-LABEL: fpext_4f32_to_2f64:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vcvtps2pd %xmm0, %ymm0 # encoding: [0xc5,0xfc,0x5a,0xc0]
-; AVX-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; AVX-NEXT:    vcvtps2pd %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5a,0xc0]
 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 ;
 ; AVX512VL-LABEL: fpext_4f32_to_2f64:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vcvtps2pd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5a,0xc0]
-; AVX512VL-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
+; AVX512VL-NEXT:    vcvtps2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0]
 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   %cvt = fpext <4 x float> %a to <4 x double>
   %shuf = shufflevector <4 x double> %cvt, <4 x double> undef, <2 x i32> <i32 0, i32 1>
@@ -38,8 +34,7 @@ define <2 x double> @fpext_8f32_to_2f64(
 ;
 ; AVX-LABEL: fpext_8f32_to_2f64:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vcvtps2pd %xmm0, %ymm0 # encoding: [0xc5,0xfc,0x5a,0xc0]
-; AVX-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-NEXT:    vcvtps2pd %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5a,0xc0]
 ; AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
 ;

Modified: llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll?rev=337500&r1=337499&r2=337500&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll Thu Jul 19 14:52:06 2018
@@ -95,9 +95,7 @@ define <2 x double> @sitofp_4i32_to_2f64
 ;
 ; AVX-LABEL: sitofp_4i32_to_2f64:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vcvtdq2pd %xmm0, %ymm0
-; AVX-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %cvt = sitofp <4 x i32> %a to <4 x double>
   %shuf = shufflevector <4 x double> %cvt, <4 x double> undef, <2 x i32> <i32 0, i32 1>
@@ -130,21 +128,11 @@ define <2 x double> @sitofp_8i16_to_2f64
 ; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: sitofp_8i16_to_2f64:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm0
-; AVX1-NEXT:    vcvtdq2pd %xmm0, %ymm0
-; AVX1-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX1-NEXT:    vzeroupper
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: sitofp_8i16_to_2f64:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
-; AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
-; AVX2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX2-NEXT:    vzeroupper
-; AVX2-NEXT:    retq
+; VEX-LABEL: sitofp_8i16_to_2f64:
+; VEX:       # %bb.0:
+; VEX-NEXT:    vpmovsxwd %xmm0, %xmm0
+; VEX-NEXT:    vcvtdq2pd %xmm0, %xmm0
+; VEX-NEXT:    retq
 ;
 ; AVX512-LABEL: sitofp_8i16_to_2f64:
 ; AVX512:       # %bb.0:
@@ -186,21 +174,11 @@ define <2 x double> @sitofp_16i8_to_2f64
 ; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: sitofp_16i8_to_2f64:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpmovsxbd %xmm0, %xmm0
-; AVX1-NEXT:    vcvtdq2pd %xmm0, %ymm0
-; AVX1-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX1-NEXT:    vzeroupper
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: sitofp_16i8_to_2f64:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpmovsxbd %xmm0, %ymm0
-; AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
-; AVX2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX2-NEXT:    vzeroupper
-; AVX2-NEXT:    retq
+; VEX-LABEL: sitofp_16i8_to_2f64:
+; VEX:       # %bb.0:
+; VEX-NEXT:    vpmovsxbd %xmm0, %xmm0
+; VEX-NEXT:    vcvtdq2pd %xmm0, %xmm0
+; VEX-NEXT:    retq
 ;
 ; AVX512-LABEL: sitofp_16i8_to_2f64:
 ; AVX512:       # %bb.0:
@@ -361,17 +339,11 @@ define <4 x double> @sitofp_8i16_to_4f64
 ; SSE-NEXT:    cvtdq2pd %xmm1, %xmm1
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: sitofp_8i16_to_4f64:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm0
-; AVX1-NEXT:    vcvtdq2pd %xmm0, %ymm0
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: sitofp_8i16_to_4f64:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
-; AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
-; AVX2-NEXT:    retq
+; VEX-LABEL: sitofp_8i16_to_4f64:
+; VEX:       # %bb.0:
+; VEX-NEXT:    vpmovsxwd %xmm0, %xmm0
+; VEX-NEXT:    vcvtdq2pd %xmm0, %ymm0
+; VEX-NEXT:    retq
 ;
 ; AVX512-LABEL: sitofp_8i16_to_4f64:
 ; AVX512:       # %bb.0:
@@ -416,17 +388,11 @@ define <4 x double> @sitofp_16i8_to_4f64
 ; SSE-NEXT:    cvtdq2pd %xmm1, %xmm1
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: sitofp_16i8_to_4f64:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpmovsxbd %xmm0, %xmm0
-; AVX1-NEXT:    vcvtdq2pd %xmm0, %ymm0
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: sitofp_16i8_to_4f64:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpmovsxbd %xmm0, %ymm0
-; AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
-; AVX2-NEXT:    retq
+; VEX-LABEL: sitofp_16i8_to_4f64:
+; VEX:       # %bb.0:
+; VEX-NEXT:    vpmovsxbd %xmm0, %xmm0
+; VEX-NEXT:    vcvtdq2pd %xmm0, %ymm0
+; VEX-NEXT:    retq
 ;
 ; AVX512-LABEL: sitofp_16i8_to_4f64:
 ; AVX512:       # %bb.0:
@@ -658,21 +624,11 @@ define <2 x double> @uitofp_8i16_to_2f64
 ; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: uitofp_8i16_to_2f64:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; AVX1-NEXT:    vcvtdq2pd %xmm0, %ymm0
-; AVX1-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX1-NEXT:    vzeroupper
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: uitofp_8i16_to_2f64:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
-; AVX2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX2-NEXT:    vzeroupper
-; AVX2-NEXT:    retq
+; VEX-LABEL: uitofp_8i16_to_2f64:
+; VEX:       # %bb.0:
+; VEX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; VEX-NEXT:    vcvtdq2pd %xmm0, %xmm0
+; VEX-NEXT:    retq
 ;
 ; AVX512-LABEL: uitofp_8i16_to_2f64:
 ; AVX512:       # %bb.0:
@@ -714,21 +670,11 @@ define <2 x double> @uitofp_16i8_to_2f64
 ; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: uitofp_16i8_to_2f64:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; AVX1-NEXT:    vcvtdq2pd %xmm0, %ymm0
-; AVX1-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX1-NEXT:    vzeroupper
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: uitofp_16i8_to_2f64:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
-; AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
-; AVX2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
-; AVX2-NEXT:    vzeroupper
-; AVX2-NEXT:    retq
+; VEX-LABEL: uitofp_16i8_to_2f64:
+; VEX:       # %bb.0:
+; VEX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; VEX-NEXT:    vcvtdq2pd %xmm0, %xmm0
+; VEX-NEXT:    retq
 ;
 ; AVX512-LABEL: uitofp_16i8_to_2f64:
 ; AVX512:       # %bb.0:
@@ -939,17 +885,11 @@ define <4 x double> @uitofp_8i16_to_4f64
 ; SSE-NEXT:    movaps %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: uitofp_8i16_to_4f64:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; AVX1-NEXT:    vcvtdq2pd %xmm0, %ymm0
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: uitofp_8i16_to_4f64:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
-; AVX2-NEXT:    retq
+; VEX-LABEL: uitofp_8i16_to_4f64:
+; VEX:       # %bb.0:
+; VEX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; VEX-NEXT:    vcvtdq2pd %xmm0, %ymm0
+; VEX-NEXT:    retq
 ;
 ; AVX512-LABEL: uitofp_8i16_to_4f64:
 ; AVX512:       # %bb.0:
@@ -996,17 +936,11 @@ define <4 x double> @uitofp_16i8_to_4f64
 ; SSE-NEXT:    movaps %xmm2, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: uitofp_16i8_to_4f64:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; AVX1-NEXT:    vcvtdq2pd %xmm0, %ymm0
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: uitofp_16i8_to_4f64:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
-; AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
-; AVX2-NEXT:    retq
+; VEX-LABEL: uitofp_16i8_to_4f64:
+; VEX:       # %bb.0:
+; VEX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; VEX-NEXT:    vcvtdq2pd %xmm0, %ymm0
+; VEX-NEXT:    retq
 ;
 ; AVX512-LABEL: uitofp_16i8_to_4f64:
 ; AVX512:       # %bb.0: