[llvm] r347175 - [X86] Add a 32-bit command line with only sse2 to vector-sext.ll and vector-sext.ll to show some of the scalarized load sequences without 64-bit scalar support.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sun Nov 18 13:28:47 PST 2018


Author: ctopper
Date: Sun Nov 18 13:28:47 2018
New Revision: 347175

URL: http://llvm.org/viewvc/llvm-project?rev=347175&view=rev
Log:
[X86] Add a 32-bit command line with only sse2 to vector-sext.ll and vector-sext.ll to show some of the scalarized load sequences without 64-bit scalar support.

Some of these sequeces look pretty bad since we have to copy the sign bit from a 32 bit register to a 64 bit register to finish a sign extend.

Modified:
    llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll
    llvm/trunk/test/CodeGen/X86/vector-sext.ll

Modified: llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll?rev=347175&r1=347174&r2=347175&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll Sun Nov 18 13:28:47 2018
@@ -7,7 +7,8 @@
 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW
 ;
-; Just one 32-bit run to make sure we do reasonable things there.
+; Just two 32-bit runs to make sure we do reasonable things there.
+; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32-SSE2
 ; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32-SSE41
 
 define <8 x i16> @sext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp {
@@ -33,6 +34,12 @@ define <8 x i16> @sext_16i8_to_8i16(<16
 ; AVX-NEXT:    vpmovsxbw %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_16i8_to_8i16:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    psraw $8, %xmm0
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_16i8_to_8i16:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxbw %xmm0, %xmm0
@@ -88,6 +95,15 @@ define <16 x i16> @sext_16i8_to_16i16(<1
 ; AVX512-NEXT:    vpmovsxbw %xmm0, %ymm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_16i8_to_16i16:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    psraw $8, %xmm0
+; X32-SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; X32-SSE2-NEXT:    psraw $8, %xmm1
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_16i8_to_16i16:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxbw %xmm0, %xmm2
@@ -178,6 +194,21 @@ define <32 x i16> @sext_32i8_to_32i16(<3
 ; AVX512BW-NEXT:    vpmovsxbw %ymm0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_32i8_to_32i16:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movdqa %xmm1, %xmm3
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    psraw $8, %xmm0
+; X32-SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; X32-SSE2-NEXT:    psraw $8, %xmm1
+; X32-SSE2-NEXT:    movdqa %xmm3, %xmm2
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
+; X32-SSE2-NEXT:    psraw $8, %xmm2
+; X32-SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; X32-SSE2-NEXT:    psraw $8, %xmm3
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_32i8_to_32i16:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxbw %xmm0, %xmm5
@@ -219,6 +250,13 @@ define <4 x i32> @sext_16i8_to_4i32(<16
 ; AVX-NEXT:    vpmovsxbd %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_16i8_to_4i32:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT:    psrad $24, %xmm0
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_16i8_to_4i32:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxbd %xmm0, %xmm0
@@ -276,6 +314,16 @@ define <8 x i32> @sext_16i8_to_8i32(<16
 ; AVX512-NEXT:    vpmovsxbd %xmm0, %ymm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_16i8_to_8i32:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT:    psrad $24, %xmm2
+; X32-SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE2-NEXT:    psrad $24, %xmm1
+; X32-SSE2-NEXT:    movdqa %xmm2, %xmm0
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_16i8_to_8i32:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxbd %xmm0, %xmm2
@@ -361,6 +409,22 @@ define <16 x i32> @sext_16i8_to_16i32(<1
 ; AVX512-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_16i8_to_16i32:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3]
+; X32-SSE2-NEXT:    psrad $24, %xmm4
+; X32-SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    psrad $24, %xmm1
+; X32-SSE2-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT:    psrad $24, %xmm2
+; X32-SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; X32-SSE2-NEXT:    psrad $24, %xmm3
+; X32-SSE2-NEXT:    movdqa %xmm4, %xmm0
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_16i8_to_16i32:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxbd %xmm0, %xmm4
@@ -408,6 +472,16 @@ define <2 x i64> @sext_16i8_to_2i64(<16
 ; AVX-NEXT:    vpmovsxbq %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_16i8_to_2i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT:    psrad $31, %xmm1
+; X32-SSE2-NEXT:    psrad $24, %xmm0
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_16i8_to_2i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxbq %xmm0, %xmm0
@@ -479,6 +553,23 @@ define <4 x i64> @sext_16i8_to_4i64(<16
 ; AVX512-NEXT:    vpmovsxbq %xmm0, %ymm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_16i8_to_4i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT:    movdqa %xmm2, %xmm1
+; X32-SSE2-NEXT:    psrad $31, %xmm1
+; X32-SSE2-NEXT:    psrad $24, %xmm2
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X32-SSE2-NEXT:    movdqa %xmm1, %xmm0
+; X32-SSE2-NEXT:    psrad $31, %xmm0
+; X32-SSE2-NEXT:    psrad $24, %xmm1
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT:    movdqa %xmm2, %xmm0
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_16i8_to_4i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxbq %xmm0, %xmm2
@@ -589,6 +680,34 @@ define <8 x i64> @sext_16i8_to_8i64(<16
 ; AVX512-NEXT:    vpmovsxbq %xmm0, %zmm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_16i8_to_8i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
+; X32-SSE2-NEXT:    movdqa %xmm4, %xmm1
+; X32-SSE2-NEXT:    psrad $31, %xmm1
+; X32-SSE2-NEXT:    psrad $24, %xmm4
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
+; X32-SSE2-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; X32-SSE2-NEXT:    movdqa %xmm2, %xmm1
+; X32-SSE2-NEXT:    psrad $31, %xmm1
+; X32-SSE2-NEXT:    psrad $24, %xmm2
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT:    movdqa %xmm1, %xmm3
+; X32-SSE2-NEXT:    psrad $31, %xmm3
+; X32-SSE2-NEXT:    psrad $24, %xmm1
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
+; X32-SSE2-NEXT:    movdqa %xmm3, %xmm0
+; X32-SSE2-NEXT:    psrad $31, %xmm0
+; X32-SSE2-NEXT:    psrad $24, %xmm3
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
+; X32-SSE2-NEXT:    movdqa %xmm4, %xmm0
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_16i8_to_8i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxbq %xmm0, %xmm4
@@ -630,6 +749,12 @@ define <4 x i32> @sext_8i16_to_4i32(<8 x
 ; AVX-NEXT:    vpmovsxwd %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_8i16_to_4i32:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT:    psrad $16, %xmm0
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_8i16_to_4i32:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxwd %xmm0, %xmm0
@@ -685,6 +810,15 @@ define <8 x i32> @sext_8i16_to_8i32(<8 x
 ; AVX512-NEXT:    vpmovsxwd %xmm0, %ymm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_8i16_to_8i32:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT:    psrad $16, %xmm2
+; X32-SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE2-NEXT:    psrad $16, %xmm1
+; X32-SSE2-NEXT:    movdqa %xmm2, %xmm0
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_8i16_to_8i32:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxwd %xmm0, %xmm2
@@ -765,6 +899,20 @@ define <16 x i32> @sext_16i16_to_16i32(<
 ; AVX512-NEXT:    vpmovsxwd %ymm0, %zmm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_16i16_to_16i32:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
+; X32-SSE2-NEXT:    psrad $16, %xmm4
+; X32-SSE2-NEXT:    punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
+; X32-SSE2-NEXT:    psrad $16, %xmm5
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; X32-SSE2-NEXT:    psrad $16, %xmm2
+; X32-SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
+; X32-SSE2-NEXT:    psrad $16, %xmm3
+; X32-SSE2-NEXT:    movdqa %xmm4, %xmm0
+; X32-SSE2-NEXT:    movdqa %xmm5, %xmm1
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_16i16_to_16i32:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxwd %xmm0, %xmm5
@@ -810,6 +958,15 @@ define <2 x i64> @sext_8i16_to_2i64(<8 x
 ; AVX-NEXT:    vpmovsxwq %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_8i16_to_2i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT:    psrad $31, %xmm1
+; X32-SSE2-NEXT:    psrad $16, %xmm0
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_8i16_to_2i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxwq %xmm0, %xmm0
@@ -877,6 +1034,21 @@ define <4 x i64> @sext_8i16_to_4i64(<8 x
 ; AVX512-NEXT:    vpmovsxwq %xmm0, %ymm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_8i16_to_4i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT:    movdqa %xmm2, %xmm1
+; X32-SSE2-NEXT:    psrad $31, %xmm1
+; X32-SSE2-NEXT:    psrad $16, %xmm2
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
+; X32-SSE2-NEXT:    movdqa %xmm1, %xmm0
+; X32-SSE2-NEXT:    psrad $31, %xmm0
+; X32-SSE2-NEXT:    psrad $16, %xmm1
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT:    movdqa %xmm2, %xmm0
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_8i16_to_4i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxwq %xmm0, %xmm2
@@ -982,6 +1154,32 @@ define <8 x i64> @sext_8i16_to_8i64(<8 x
 ; AVX512-NEXT:    vpmovsxwq %xmm0, %zmm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_8i16_to_8i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
+; X32-SSE2-NEXT:    movdqa %xmm4, %xmm1
+; X32-SSE2-NEXT:    psrad $31, %xmm1
+; X32-SSE2-NEXT:    psrad $16, %xmm4
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
+; X32-SSE2-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; X32-SSE2-NEXT:    movdqa %xmm2, %xmm1
+; X32-SSE2-NEXT:    psrad $31, %xmm1
+; X32-SSE2-NEXT:    psrad $16, %xmm2
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
+; X32-SSE2-NEXT:    movdqa %xmm1, %xmm3
+; X32-SSE2-NEXT:    psrad $31, %xmm3
+; X32-SSE2-NEXT:    psrad $16, %xmm1
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; X32-SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
+; X32-SSE2-NEXT:    movdqa %xmm3, %xmm0
+; X32-SSE2-NEXT:    psrad $31, %xmm0
+; X32-SSE2-NEXT:    psrad $16, %xmm3
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
+; X32-SSE2-NEXT:    movdqa %xmm4, %xmm0
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_8i16_to_8i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxwq %xmm0, %xmm4
@@ -1023,6 +1221,13 @@ define <2 x i64> @sext_4i32_to_2i64(<4 x
 ; AVX-NEXT:    vpmovsxdq %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_4i32_to_2i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT:    psrad $31, %xmm1
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_4i32_to_2i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxdq %xmm0, %xmm0
@@ -1082,6 +1287,17 @@ define <4 x i64> @sext_4i32_to_4i64(<4 x
 ; AVX512-NEXT:    vpmovsxdq %xmm0, %ymm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_4i32_to_4i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm2
+; X32-SSE2-NEXT:    psrad $31, %xmm2
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT:    movdqa %xmm1, %xmm2
+; X32-SSE2-NEXT:    psrad $31, %xmm2
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_4i32_to_4i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxdq %xmm0, %xmm2
@@ -1172,6 +1388,25 @@ define <8 x i64> @sext_8i32_to_8i64(<8 x
 ; AVX512-NEXT:    vpmovsxdq %ymm0, %zmm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_8i32_to_8i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movdqa %xmm1, %xmm2
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm3
+; X32-SSE2-NEXT:    psrad $31, %xmm3
+; X32-SSE2-NEXT:    movdqa %xmm1, %xmm4
+; X32-SSE2-NEXT:    psrad $31, %xmm4
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X32-SSE2-NEXT:    movdqa %xmm1, %xmm3
+; X32-SSE2-NEXT:    psrad $31, %xmm3
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[2,3,0,1]
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
+; X32-SSE2-NEXT:    movdqa %xmm3, %xmm4
+; X32-SSE2-NEXT:    psrad $31, %xmm4
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_8i32_to_8i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxdq %xmm0, %xmm5
@@ -1246,6 +1481,22 @@ define <2 x i64> @load_sext_2i1_to_2i64(
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_2i1_to_2i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movzbl (%eax), %eax
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shll $30, %ecx
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
+; X32-SSE2-NEXT:    shll $31, %eax
+; X32-SSE2-NEXT:    sarl $31, %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm0
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_2i1_to_2i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -1301,6 +1552,19 @@ define <2 x i64> @load_sext_2i8_to_2i64(
 ; AVX-NEXT:    vpmovsxbq (%rdi), %xmm0
 ; AVX-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_2i8_to_2i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movzwl (%eax), %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm0
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT:    psrad $31, %xmm1
+; X32-SSE2-NEXT:    psrad $24, %xmm0
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_2i8_to_2i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -1437,6 +1701,30 @@ define <4 x i32> @load_sext_4i1_to_4i32(
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_4i1_to_4i32:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movl (%eax), %eax
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shll $28, %ecx
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shll $29, %ecx
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm1
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shll $30, %ecx
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm2
+; X32-SSE2-NEXT:    shll $31, %eax
+; X32-SSE2-NEXT:    sarl $31, %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm0
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_4i1_to_4i32:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -1490,6 +1778,15 @@ define <4 x i32> @load_sext_4i8_to_4i32(
 ; AVX-NEXT:    vpmovsxbd (%rdi), %xmm0
 ; AVX-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_4i8_to_4i32:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT:    psrad $24, %xmm0
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_4i8_to_4i32:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -1640,6 +1937,33 @@ define <4 x i64> @load_sext_4i1_to_4i64(
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512BW-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_4i1_to_4i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movzbl (%eax), %eax
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $3, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $2, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm1
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT:    movd %eax, %xmm2
+; X32-SSE2-NEXT:    shrl %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm0
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; X32-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm2
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,1,1,3]
+; X32-SSE2-NEXT:    psllq $63, %xmm0
+; X32-SSE2-NEXT:    psrad $31, %xmm0
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[2,1,3,3]
+; X32-SSE2-NEXT:    psllq $63, %xmm1
+; X32-SSE2-NEXT:    psrad $31, %xmm1
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_4i1_to_4i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -1723,6 +2047,33 @@ define <4 x i64> @load_sext_4i8_to_4i64(
 ; AVX512-NEXT:    vpmovsxbq (%rdi), %ymm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_4i8_to_4i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movsbl 1(%eax), %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm1
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT:    movsbl (%eax), %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm2
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT:    movsbl 3(%eax), %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm2
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm1
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE2-NEXT:    movsbl 2(%eax), %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm1
+; X32-SSE2-NEXT:    sarl $31, %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm3
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_4i8_to_4i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -1780,6 +2131,22 @@ define <2 x i64> @load_sext_4i8_to_4i64_
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_4i8_to_4i64_extract:
+; X32-SSE2:       # %bb.0:
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movsbl 3(%eax), %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm1
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT:    movsbl 2(%eax), %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm0
+; X32-SSE2-NEXT:    sarl $31, %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm2
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_4i8_to_4i64_extract:
 ; X32-SSE41:       # %bb.0:
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -2000,6 +2367,49 @@ define <8 x i16> @load_sext_8i1_to_8i16(
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_8i1_to_8i16:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movsbl (%eax), %eax
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $7, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shll $25, %ecx
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm1
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shll $26, %ecx
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shll $27, %ecx
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm2
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shll $28, %ecx
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shll $29, %ecx
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm1
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shll $30, %ecx
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm3
+; X32-SSE2-NEXT:    shll $31, %eax
+; X32-SSE2-NEXT:    sarl $31, %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm0
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_8i1_to_8i16:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -2066,6 +2476,14 @@ define <8 x i16> @load_sext_8i8_to_8i16(
 ; AVX-NEXT:    vpmovsxbw (%rdi), %xmm0
 ; AVX-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_8i8_to_8i16:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    psraw $8, %xmm0
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_8i8_to_8i16:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -2159,6 +2577,55 @@ define <8 x i64> @load_sext_8i8_to_8i64(
 ; AVX512-NEXT:    vpmovsxbq (%rdi), %zmm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_8i8_to_8i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movsbl 1(%eax), %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm1
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT:    movsbl (%eax), %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm2
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT:    movsbl 3(%eax), %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm2
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm1
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE2-NEXT:    movsbl 2(%eax), %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm1
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm3
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; X32-SSE2-NEXT:    movsbl 5(%eax), %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm3
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm2
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; X32-SSE2-NEXT:    movsbl 4(%eax), %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm2
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm4
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
+; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; X32-SSE2-NEXT:    movsbl 7(%eax), %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm4
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm3
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
+; X32-SSE2-NEXT:    movsbl 6(%eax), %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm3
+; X32-SSE2-NEXT:    sarl $31, %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm5
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
+; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_8i8_to_8i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -2394,6 +2861,53 @@ define <8 x i32> @load_sext_8i1_to_8i32(
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512BW-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_8i1_to_8i32:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movzbl (%eax), %eax
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $7, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $6, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm2
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $5, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $4, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm1
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $3, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $2, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm2
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    shrl %eax
+; X32-SSE2-NEXT:    andl $1, %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm3
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT:    pslld $31, %xmm0
+; X32-SSE2-NEXT:    psrad $31, %xmm0
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X32-SSE2-NEXT:    pslld $31, %xmm1
+; X32-SSE2-NEXT:    psrad $31, %xmm1
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_8i1_to_8i32:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -2490,6 +3004,19 @@ define <8 x i32> @load_sext_8i8_to_8i32(
 ; AVX512-NEXT:    vpmovsxbd (%rdi), %ymm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_8i8_to_8i32:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT:    psrad $24, %xmm0
+; X32-SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT:    psrad $24, %xmm1
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_8i8_to_8i32:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -2905,6 +3432,97 @@ define <16 x i8> @load_sext_16i1_to_16i8
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_16i1_to_16i8:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    pushl %ebp
+; X32-SSE2-NEXT:    pushl %ebx
+; X32-SSE2-NEXT:    pushl %edi
+; X32-SSE2-NEXT:    pushl %esi
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movswl (%eax), %eax
+; X32-SSE2-NEXT:    movl %eax, %edx
+; X32-SSE2-NEXT:    movl %eax, %ebp
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    movl %eax, %esi
+; X32-SSE2-NEXT:    movl %eax, %edi
+; X32-SSE2-NEXT:    movl %eax, %ebx
+; X32-SSE2-NEXT:    shrl $15, %ebx
+; X32-SSE2-NEXT:    movd %ebx, %xmm2
+; X32-SSE2-NEXT:    movl %eax, %ebx
+; X32-SSE2-NEXT:    shll $17, %edx
+; X32-SSE2-NEXT:    sarl $31, %edx
+; X32-SSE2-NEXT:    movd %edx, %xmm0
+; X32-SSE2-NEXT:    movl %eax, %edx
+; X32-SSE2-NEXT:    shll $18, %ebp
+; X32-SSE2-NEXT:    sarl $31, %ebp
+; X32-SSE2-NEXT:    movd %ebp, %xmm1
+; X32-SSE2-NEXT:    movl %eax, %ebp
+; X32-SSE2-NEXT:    shll $19, %ecx
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm3
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shll $20, %esi
+; X32-SSE2-NEXT:    sarl $31, %esi
+; X32-SSE2-NEXT:    movd %esi, %xmm4
+; X32-SSE2-NEXT:    movl %eax, %esi
+; X32-SSE2-NEXT:    shll $21, %edi
+; X32-SSE2-NEXT:    sarl $31, %edi
+; X32-SSE2-NEXT:    movd %edi, %xmm6
+; X32-SSE2-NEXT:    movl %eax, %edi
+; X32-SSE2-NEXT:    shll $22, %ebx
+; X32-SSE2-NEXT:    sarl $31, %ebx
+; X32-SSE2-NEXT:    movd %ebx, %xmm7
+; X32-SSE2-NEXT:    movl %eax, %ebx
+; X32-SSE2-NEXT:    shll $23, %edx
+; X32-SSE2-NEXT:    sarl $31, %edx
+; X32-SSE2-NEXT:    movd %edx, %xmm5
+; X32-SSE2-NEXT:    movl %eax, %edx
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; X32-SSE2-NEXT:    shll $28, %ebp
+; X32-SSE2-NEXT:    sarl $31, %ebp
+; X32-SSE2-NEXT:    movd %ebp, %xmm2
+; X32-SSE2-NEXT:    movl %eax, %ebp
+; X32-SSE2-NEXT:    movsbl %al, %eax
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3],xmm6[4],xmm4[4],xmm6[5],xmm4[5],xmm6[6],xmm4[6],xmm6[7],xmm4[7]
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1],xmm5[2],xmm7[2],xmm5[3],xmm7[3],xmm5[4],xmm7[4],xmm5[5],xmm7[5],xmm5[6],xmm7[6],xmm5[7],xmm7[7]
+; X32-SSE2-NEXT:    shll $29, %ecx
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm1
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
+; X32-SSE2-NEXT:    shll $30, %esi
+; X32-SSE2-NEXT:    sarl $31, %esi
+; X32-SSE2-NEXT:    movd %esi, %xmm4
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
+; X32-SSE2-NEXT:    shll $31, %edi
+; X32-SSE2-NEXT:    sarl $31, %edi
+; X32-SSE2-NEXT:    movd %edi, %xmm0
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; X32-SSE2-NEXT:    shll $26, %ebx
+; X32-SSE2-NEXT:    sarl $31, %ebx
+; X32-SSE2-NEXT:    movd %ebx, %xmm2
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; X32-SSE2-NEXT:    shll $27, %edx
+; X32-SSE2-NEXT:    sarl $31, %edx
+; X32-SSE2-NEXT:    movd %edx, %xmm3
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; X32-SSE2-NEXT:    shll $25, %ebp
+; X32-SSE2-NEXT:    sarl $31, %ebp
+; X32-SSE2-NEXT:    movd %ebp, %xmm1
+; X32-SSE2-NEXT:    shrl $7, %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm2
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0]
+; X32-SSE2-NEXT:    popl %esi
+; X32-SSE2-NEXT:    popl %edi
+; X32-SSE2-NEXT:    popl %ebx
+; X32-SSE2-NEXT:    popl %ebp
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_16i1_to_16i8:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -3430,6 +4048,93 @@ define <16 x i16> @load_sext_16i1_to_16i
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512BW-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_16i1_to_16i16:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movzwl (%eax), %eax
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $15, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $14, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm1
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $13, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $12, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm2
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $11, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $10, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm3
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $9, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $8, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm1
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $7, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $6, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm2
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $5, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $4, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm3
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $3, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $2, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm2
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    shrl %eax
+; X32-SSE2-NEXT:    andl $1, %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm4
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    psllw $15, %xmm0
+; X32-SSE2-NEXT:    psraw $15, %xmm0
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE2-NEXT:    psllw $15, %xmm1
+; X32-SSE2-NEXT:    psraw $15, %xmm1
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_16i1_to_16i16:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -4285,6 +4990,179 @@ define <32 x i8> @load_sext_32i1_to_32i8
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512BW-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_32i1_to_32i8:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    pushl %ebp
+; X32-SSE2-NEXT:    pushl %ebx
+; X32-SSE2-NEXT:    pushl %edi
+; X32-SSE2-NEXT:    pushl %esi
+; X32-SSE2-NEXT:    subl $28, %esp
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movswl (%eax), %edx
+; X32-SSE2-NEXT:    movl %edx, %ebp
+; X32-SSE2-NEXT:    movl %edx, %esi
+; X32-SSE2-NEXT:    movl %edx, %edi
+; X32-SSE2-NEXT:    movl %edx, %ebx
+; X32-SSE2-NEXT:    movl %edx, %ecx
+; X32-SSE2-NEXT:    shrl $15, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    movl %edx, %ecx
+; X32-SSE2-NEXT:    shll $17, %ebp
+; X32-SSE2-NEXT:    sarl $31, %ebp
+; X32-SSE2-NEXT:    movd %ebp, %xmm4
+; X32-SSE2-NEXT:    movl %edx, %ebp
+; X32-SSE2-NEXT:    shll $18, %esi
+; X32-SSE2-NEXT:    sarl $31, %esi
+; X32-SSE2-NEXT:    movd %esi, %xmm1
+; X32-SSE2-NEXT:    movl %edx, %esi
+; X32-SSE2-NEXT:    shll $19, %edi
+; X32-SSE2-NEXT:    sarl $31, %edi
+; X32-SSE2-NEXT:    movd %edi, %xmm2
+; X32-SSE2-NEXT:    movl %edx, %edi
+; X32-SSE2-NEXT:    shll $20, %ebx
+; X32-SSE2-NEXT:    sarl $31, %ebx
+; X32-SSE2-NEXT:    movd %ebx, %xmm5
+; X32-SSE2-NEXT:    movl %edx, %ebx
+; X32-SSE2-NEXT:    shll $21, %ecx
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm6
+; X32-SSE2-NEXT:    movl %edx, %ecx
+; X32-SSE2-NEXT:    shll $22, %ebp
+; X32-SSE2-NEXT:    sarl $31, %ebp
+; X32-SSE2-NEXT:    movd %ebp, %xmm7
+; X32-SSE2-NEXT:    movl %edx, %ebp
+; X32-SSE2-NEXT:    shll $23, %esi
+; X32-SSE2-NEXT:    sarl $31, %esi
+; X32-SSE2-NEXT:    movd %esi, %xmm3
+; X32-SSE2-NEXT:    movl %edx, %esi
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7]
+; X32-SSE2-NEXT:    shll $28, %edi
+; X32-SSE2-NEXT:    sarl $31, %edi
+; X32-SSE2-NEXT:    movd %edi, %xmm0
+; X32-SSE2-NEXT:    movl %edx, %edi
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; X32-SSE2-NEXT:    shll $29, %ebx
+; X32-SSE2-NEXT:    sarl $31, %ebx
+; X32-SSE2-NEXT:    movd %ebx, %xmm1
+; X32-SSE2-NEXT:    movl %edx, %ebx
+; X32-SSE2-NEXT:    movsbl %dl, %edx
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm7[0],xmm3[1],xmm7[1],xmm3[2],xmm7[2],xmm3[3],xmm7[3],xmm3[4],xmm7[4],xmm3[5],xmm7[5],xmm3[6],xmm7[6],xmm3[7],xmm7[7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm6[0],xmm3[1],xmm6[1],xmm3[2],xmm6[2],xmm3[3],xmm6[3]
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE2-NEXT:    shll $30, %ecx
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm2
+; X32-SSE2-NEXT:    shll $31, %ebp
+; X32-SSE2-NEXT:    sarl $31, %ebp
+; X32-SSE2-NEXT:    movd %ebp, %xmm0
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; X32-SSE2-NEXT:    shll $26, %esi
+; X32-SSE2-NEXT:    sarl $31, %esi
+; X32-SSE2-NEXT:    movd %esi, %xmm7
+; X32-SSE2-NEXT:    shll $27, %edi
+; X32-SSE2-NEXT:    sarl $31, %edi
+; X32-SSE2-NEXT:    movd %edi, %xmm2
+; X32-SSE2-NEXT:    shll $25, %ebx
+; X32-SSE2-NEXT:    sarl $31, %ebx
+; X32-SSE2-NEXT:    movd %ebx, %xmm6
+; X32-SSE2-NEXT:    shrl $7, %edx
+; X32-SSE2-NEXT:    movd %edx, %xmm5
+; X32-SSE2-NEXT:    movswl 2(%eax), %eax
+; X32-SSE2-NEXT:    movl %eax, %edx
+; X32-SSE2-NEXT:    movl %eax, %ebp
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    movl %eax, %esi
+; X32-SSE2-NEXT:    movl %eax, %edi
+; X32-SSE2-NEXT:    movl %eax, %ebx
+; X32-SSE2-NEXT:    shrl $15, %ebx
+; X32-SSE2-NEXT:    movd %ebx, %xmm4
+; X32-SSE2-NEXT:    movdqu %xmm4, (%esp) # 16-byte Spill
+; X32-SSE2-NEXT:    movl %eax, %ebx
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; X32-SSE2-NEXT:    shll $17, %edx
+; X32-SSE2-NEXT:    sarl $31, %edx
+; X32-SSE2-NEXT:    movd %edx, %xmm4
+; X32-SSE2-NEXT:    movl %eax, %edx
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm7[0],xmm2[1],xmm7[1],xmm2[2],xmm7[2],xmm2[3],xmm7[3],xmm2[4],xmm7[4],xmm2[5],xmm7[5],xmm2[6],xmm7[6],xmm2[7],xmm7[7]
+; X32-SSE2-NEXT:    shll $18, %ebp
+; X32-SSE2-NEXT:    sarl $31, %ebp
+; X32-SSE2-NEXT:    movd %ebp, %xmm7
+; X32-SSE2-NEXT:    movl %eax, %ebp
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
+; X32-SSE2-NEXT:    shll $19, %ecx
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm5
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3]
+; X32-SSE2-NEXT:    shll $20, %esi
+; X32-SSE2-NEXT:    sarl $31, %esi
+; X32-SSE2-NEXT:    movd %esi, %xmm6
+; X32-SSE2-NEXT:    movl %eax, %esi
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT:    shll $21, %edi
+; X32-SSE2-NEXT:    sarl $31, %edi
+; X32-SSE2-NEXT:    movd %edi, %xmm1
+; X32-SSE2-NEXT:    movl %eax, %edi
+; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; X32-SSE2-NEXT:    shll $22, %ebx
+; X32-SSE2-NEXT:    sarl $31, %ebx
+; X32-SSE2-NEXT:    movd %ebx, %xmm3
+; X32-SSE2-NEXT:    movl %eax, %ebx
+; X32-SSE2-NEXT:    movdqu (%esp), %xmm2 # 16-byte Reload
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
+; X32-SSE2-NEXT:    shll $23, %edx
+; X32-SSE2-NEXT:    sarl $31, %edx
+; X32-SSE2-NEXT:    movd %edx, %xmm2
+; X32-SSE2-NEXT:    movl %eax, %edx
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1],xmm5[2],xmm7[2],xmm5[3],xmm7[3],xmm5[4],xmm7[4],xmm5[5],xmm7[5],xmm5[6],xmm7[6],xmm5[7],xmm7[7]
+; X32-SSE2-NEXT:    shll $28, %ebp
+; X32-SSE2-NEXT:    sarl $31, %ebp
+; X32-SSE2-NEXT:    movd %ebp, %xmm7
+; X32-SSE2-NEXT:    movl %eax, %ebp
+; X32-SSE2-NEXT:    movsbl %al, %eax
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
+; X32-SSE2-NEXT:    shll $29, %ecx
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm3
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; X32-SSE2-NEXT:    shll $30, %esi
+; X32-SSE2-NEXT:    sarl $31, %esi
+; X32-SSE2-NEXT:    movd %esi, %xmm4
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
+; X32-SSE2-NEXT:    shll $31, %edi
+; X32-SSE2-NEXT:    sarl $31, %edi
+; X32-SSE2-NEXT:    movd %edi, %xmm1
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm7[0],xmm3[1],xmm7[1],xmm3[2],xmm7[2],xmm3[3],xmm7[3],xmm3[4],xmm7[4],xmm3[5],xmm7[5],xmm3[6],xmm7[6],xmm3[7],xmm7[7]
+; X32-SSE2-NEXT:    shll $26, %ebx
+; X32-SSE2-NEXT:    sarl $31, %ebx
+; X32-SSE2-NEXT:    movd %ebx, %xmm5
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; X32-SSE2-NEXT:    shll $27, %edx
+; X32-SSE2-NEXT:    sarl $31, %edx
+; X32-SSE2-NEXT:    movd %edx, %xmm3
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3],xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7]
+; X32-SSE2-NEXT:    shll $25, %ebp
+; X32-SSE2-NEXT:    sarl $31, %ebp
+; X32-SSE2-NEXT:    movd %ebp, %xmm4
+; X32-SSE2-NEXT:    shrl $7, %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm5
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; X32-SSE2-NEXT:    addl $28, %esp
+; X32-SSE2-NEXT:    popl %esi
+; X32-SSE2-NEXT:    popl %edi
+; X32-SSE2-NEXT:    popl %ebx
+; X32-SSE2-NEXT:    popl %ebp
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_32i1_to_32i8:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pushl %esi
@@ -4465,6 +5343,17 @@ define <16 x i16> @load_sext_16i8_to_16i
 ; AVX512-NEXT:    vpmovsxbw (%rdi), %ymm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_16i8_to_16i16:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; X32-SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    psraw $8, %xmm0
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    psraw $8, %xmm1
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_16i8_to_16i16:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -4508,6 +5397,17 @@ define <2 x i64> @load_sext_2i16_to_2i64
 ; AVX-NEXT:    vpmovsxwq (%rdi), %xmm0
 ; AVX-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_2i16_to_2i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT:    psrad $31, %xmm1
+; X32-SSE2-NEXT:    psrad $16, %xmm0
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_2i16_to_2i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -4544,6 +5444,14 @@ define <4 x i32> @load_sext_4i16_to_4i32
 ; AVX-NEXT:    vpmovsxwd (%rdi), %xmm0
 ; AVX-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_4i16_to_4i32:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT:    psrad $16, %xmm0
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_4i16_to_4i32:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -4609,6 +5517,33 @@ define <4 x i64> @load_sext_4i16_to_4i64
 ; AVX512-NEXT:    vpmovsxwq (%rdi), %ymm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_4i16_to_4i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movswl 2(%eax), %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm1
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT:    movswl (%eax), %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm2
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT:    movswl 6(%eax), %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm2
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm1
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE2-NEXT:    movswl 4(%eax), %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm1
+; X32-SSE2-NEXT:    sarl $31, %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm3
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_4i16_to_4i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -4665,6 +5600,17 @@ define <8 x i32> @load_sext_8i16_to_8i32
 ; AVX512-NEXT:    vpmovsxwd (%rdi), %ymm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_8i16_to_8i32:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; X32-SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT:    psrad $16, %xmm0
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT:    psrad $16, %xmm1
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_8i16_to_8i32:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -4704,6 +5650,15 @@ define <2 x i64> @load_sext_2i32_to_2i64
 ; AVX-NEXT:    vpmovsxdq (%rdi), %xmm0
 ; AVX-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_2i32_to_2i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT:    psrad $31, %xmm1
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_2i32_to_2i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -4763,6 +5718,19 @@ define <4 x i64> @load_sext_4i32_to_4i64
 ; AVX512-NEXT:    vpmovsxdq (%rdi), %ymm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_4i32_to_4i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movdqa (%eax), %xmm0
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm2
+; X32-SSE2-NEXT:    psrad $31, %xmm2
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT:    movdqa %xmm1, %xmm2
+; X32-SSE2-NEXT:    psrad $31, %xmm2
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_4i32_to_4i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -4802,6 +5770,13 @@ define i32 @sext_2i8_to_i32(<16 x i8> %A
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_2i8_to_i32:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    psraw $8, %xmm0
+; X32-SSE2-NEXT:    movd %xmm0, %eax
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_2i8_to_i32:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxbw %xmm0, %xmm0
@@ -4875,6 +5850,19 @@ define <4 x i64> @sext_4i1_to_4i64(<4 x
 ; AVX512-NEXT:    vpmovsxdq %xmm0, %ymm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_4i1_to_4i64:
+; X32-SSE2:       # %bb.0:
+; X32-SSE2-NEXT:    pslld $31, %xmm0
+; X32-SSE2-NEXT:    psrad $31, %xmm0
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm2
+; X32-SSE2-NEXT:    psrad $31, %xmm2
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT:    movdqa %xmm1, %xmm2
+; X32-SSE2-NEXT:    psrad $31, %xmm2
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_4i1_to_4i64:
 ; X32-SSE41:       # %bb.0:
 ; X32-SSE41-NEXT:    pslld $31, %xmm0
@@ -4949,6 +5937,23 @@ define <4 x i64> @sext_4i8_to_4i64(<4 x
 ; AVX512-NEXT:    vpmovsxbq %xmm0, %ymm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_4i8_to_4i64:
+; X32-SSE2:       # %bb.0:
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT:    movdqa %xmm2, %xmm1
+; X32-SSE2-NEXT:    psrad $31, %xmm1
+; X32-SSE2-NEXT:    psrad $24, %xmm2
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X32-SSE2-NEXT:    movdqa %xmm1, %xmm0
+; X32-SSE2-NEXT:    psrad $31, %xmm0
+; X32-SSE2-NEXT:    psrad $24, %xmm1
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT:    movdqa %xmm2, %xmm0
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_4i8_to_4i64:
 ; X32-SSE41:       # %bb.0:
 ; X32-SSE41-NEXT:    pmovsxbq %xmm0, %xmm2
@@ -5013,6 +6018,24 @@ define <32 x i8> @sext_32xi1_to_32xi8(<3
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512BW-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_32xi1_to_32xi8:
+; X32-SSE2:       # %bb.0:
+; X32-SSE2-NEXT:    pushl %ebp
+; X32-SSE2-NEXT:    movl %esp, %ebp
+; X32-SSE2-NEXT:    andl $-16, %esp
+; X32-SSE2-NEXT:    subl $16, %esp
+; X32-SSE2-NEXT:    movdqa 8(%ebp), %xmm3
+; X32-SSE2-NEXT:    pcmpeqw 40(%ebp), %xmm1
+; X32-SSE2-NEXT:    pcmpeqw 24(%ebp), %xmm0
+; X32-SSE2-NEXT:    packsswb %xmm1, %xmm0
+; X32-SSE2-NEXT:    pcmpeqw 72(%ebp), %xmm3
+; X32-SSE2-NEXT:    pcmpeqw 56(%ebp), %xmm2
+; X32-SSE2-NEXT:    packsswb %xmm3, %xmm2
+; X32-SSE2-NEXT:    movdqa %xmm2, %xmm1
+; X32-SSE2-NEXT:    movl %ebp, %esp
+; X32-SSE2-NEXT:    popl %ebp
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_32xi1_to_32xi8:
 ; X32-SSE41:       # %bb.0:
 ; X32-SSE41-NEXT:    pushl %ebp
@@ -5072,6 +6095,17 @@ define <2 x i32> @sext_2i8_to_2i32(<2 x
 ; AVX-NEXT:    vpaddd %xmm0, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_2i8_to_2i32:
+; X32-SSE2:       # %bb.0:
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movzwl (%eax), %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm0
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT:    psrad $24, %xmm0
+; X32-SSE2-NEXT:    paddd %xmm0, %xmm0
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_2i8_to_2i32:
 ; X32-SSE41:       # %bb.0:
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax

Modified: llvm/trunk/test/CodeGen/X86/vector-sext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-sext.ll?rev=347175&r1=347174&r2=347175&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-sext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-sext.ll Sun Nov 18 13:28:47 2018
@@ -7,7 +7,8 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW
 ;
-; Just one 32-bit run to make sure we do reasonable things there.
+; Just two 32-bit runs to make sure we do reasonable things there.
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32-SSE2
 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32-SSE41
 
 define <8 x i16> @sext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp {
@@ -33,6 +34,12 @@ define <8 x i16> @sext_16i8_to_8i16(<16
 ; AVX-NEXT:    vpmovsxbw %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_16i8_to_8i16:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    psraw $8, %xmm0
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_16i8_to_8i16:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxbw %xmm0, %xmm0
@@ -88,6 +95,15 @@ define <16 x i16> @sext_16i8_to_16i16(<1
 ; AVX512-NEXT:    vpmovsxbw %xmm0, %ymm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_16i8_to_16i16:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    psraw $8, %xmm0
+; X32-SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; X32-SSE2-NEXT:    psraw $8, %xmm1
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_16i8_to_16i16:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxbw %xmm0, %xmm2
@@ -178,6 +194,21 @@ define <32 x i16> @sext_32i8_to_32i16(<3
 ; AVX512BW-NEXT:    vpmovsxbw %ymm0, %zmm0
 ; AVX512BW-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_32i8_to_32i16:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movdqa %xmm1, %xmm3
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    psraw $8, %xmm0
+; X32-SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; X32-SSE2-NEXT:    psraw $8, %xmm1
+; X32-SSE2-NEXT:    movdqa %xmm3, %xmm2
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
+; X32-SSE2-NEXT:    psraw $8, %xmm2
+; X32-SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; X32-SSE2-NEXT:    psraw $8, %xmm3
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_32i8_to_32i16:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxbw %xmm0, %xmm5
@@ -219,6 +250,13 @@ define <4 x i32> @sext_16i8_to_4i32(<16
 ; AVX-NEXT:    vpmovsxbd %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_16i8_to_4i32:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT:    psrad $24, %xmm0
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_16i8_to_4i32:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxbd %xmm0, %xmm0
@@ -276,6 +314,16 @@ define <8 x i32> @sext_16i8_to_8i32(<16
 ; AVX512-NEXT:    vpmovsxbd %xmm0, %ymm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_16i8_to_8i32:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT:    psrad $24, %xmm2
+; X32-SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE2-NEXT:    psrad $24, %xmm1
+; X32-SSE2-NEXT:    movdqa %xmm2, %xmm0
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_16i8_to_8i32:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxbd %xmm0, %xmm2
@@ -361,6 +409,22 @@ define <16 x i32> @sext_16i8_to_16i32(<1
 ; AVX512-NEXT:    vpmovsxbd %xmm0, %zmm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_16i8_to_16i32:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3]
+; X32-SSE2-NEXT:    psrad $24, %xmm4
+; X32-SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    psrad $24, %xmm1
+; X32-SSE2-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT:    psrad $24, %xmm2
+; X32-SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; X32-SSE2-NEXT:    psrad $24, %xmm3
+; X32-SSE2-NEXT:    movdqa %xmm4, %xmm0
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_16i8_to_16i32:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxbd %xmm0, %xmm4
@@ -408,6 +472,16 @@ define <2 x i64> @sext_16i8_to_2i64(<16
 ; AVX-NEXT:    vpmovsxbq %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_16i8_to_2i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT:    psrad $31, %xmm1
+; X32-SSE2-NEXT:    psrad $24, %xmm0
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_16i8_to_2i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxbq %xmm0, %xmm0
@@ -479,6 +553,23 @@ define <4 x i64> @sext_16i8_to_4i64(<16
 ; AVX512-NEXT:    vpmovsxbq %xmm0, %ymm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_16i8_to_4i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT:    movdqa %xmm2, %xmm1
+; X32-SSE2-NEXT:    psrad $31, %xmm1
+; X32-SSE2-NEXT:    psrad $24, %xmm2
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X32-SSE2-NEXT:    movdqa %xmm1, %xmm0
+; X32-SSE2-NEXT:    psrad $31, %xmm0
+; X32-SSE2-NEXT:    psrad $24, %xmm1
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT:    movdqa %xmm2, %xmm0
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_16i8_to_4i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxbq %xmm0, %xmm2
@@ -593,6 +684,36 @@ define <8 x i64> @sext_16i8_to_8i64(<16
 ; AVX512-NEXT:    vpmovsxbq %xmm0, %zmm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_16i8_to_8i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
+; X32-SSE2-NEXT:    movdqa %xmm4, %xmm1
+; X32-SSE2-NEXT:    psrad $31, %xmm1
+; X32-SSE2-NEXT:    psrad $24, %xmm4
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X32-SSE2-NEXT:    movdqa %xmm1, %xmm0
+; X32-SSE2-NEXT:    psrad $31, %xmm0
+; X32-SSE2-NEXT:    psrad $24, %xmm1
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
+; X32-SSE2-NEXT:    movdqa %xmm2, %xmm0
+; X32-SSE2-NEXT:    psrad $31, %xmm0
+; X32-SSE2-NEXT:    psrad $24, %xmm2
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
+; X32-SSE2-NEXT:    movdqa %xmm3, %xmm0
+; X32-SSE2-NEXT:    psrad $31, %xmm0
+; X32-SSE2-NEXT:    psrad $24, %xmm3
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
+; X32-SSE2-NEXT:    movdqa %xmm4, %xmm0
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_16i8_to_8i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxbq %xmm0, %xmm4
@@ -634,6 +755,12 @@ define <4 x i32> @sext_8i16_to_4i32(<8 x
 ; AVX-NEXT:    vpmovsxwd %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_8i16_to_4i32:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT:    psrad $16, %xmm0
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_8i16_to_4i32:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxwd %xmm0, %xmm0
@@ -689,6 +816,15 @@ define <8 x i32> @sext_8i16_to_8i32(<8 x
 ; AVX512-NEXT:    vpmovsxwd %xmm0, %ymm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_8i16_to_8i32:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT:    psrad $16, %xmm2
+; X32-SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE2-NEXT:    psrad $16, %xmm1
+; X32-SSE2-NEXT:    movdqa %xmm2, %xmm0
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_8i16_to_8i32:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxwd %xmm0, %xmm2
@@ -769,6 +905,20 @@ define <16 x i32> @sext_16i16_to_16i32(<
 ; AVX512-NEXT:    vpmovsxwd %ymm0, %zmm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_16i16_to_16i32:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
+; X32-SSE2-NEXT:    psrad $16, %xmm4
+; X32-SSE2-NEXT:    punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
+; X32-SSE2-NEXT:    psrad $16, %xmm5
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; X32-SSE2-NEXT:    psrad $16, %xmm2
+; X32-SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
+; X32-SSE2-NEXT:    psrad $16, %xmm3
+; X32-SSE2-NEXT:    movdqa %xmm4, %xmm0
+; X32-SSE2-NEXT:    movdqa %xmm5, %xmm1
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_16i16_to_16i32:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxwd %xmm0, %xmm5
@@ -814,6 +964,15 @@ define <2 x i64> @sext_8i16_to_2i64(<8 x
 ; AVX-NEXT:    vpmovsxwq %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_8i16_to_2i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT:    psrad $31, %xmm1
+; X32-SSE2-NEXT:    psrad $16, %xmm0
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_8i16_to_2i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxwq %xmm0, %xmm0
@@ -881,6 +1040,21 @@ define <4 x i64> @sext_8i16_to_4i64(<8 x
 ; AVX512-NEXT:    vpmovsxwq %xmm0, %ymm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_8i16_to_4i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT:    movdqa %xmm2, %xmm1
+; X32-SSE2-NEXT:    psrad $31, %xmm1
+; X32-SSE2-NEXT:    psrad $16, %xmm2
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
+; X32-SSE2-NEXT:    movdqa %xmm1, %xmm0
+; X32-SSE2-NEXT:    psrad $31, %xmm0
+; X32-SSE2-NEXT:    psrad $16, %xmm1
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT:    movdqa %xmm2, %xmm0
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_8i16_to_4i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxwq %xmm0, %xmm2
@@ -986,6 +1160,32 @@ define <8 x i64> @sext_8i16_to_8i64(<8 x
 ; AVX512-NEXT:    vpmovsxwq %xmm0, %zmm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_8i16_to_8i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
+; X32-SSE2-NEXT:    movdqa %xmm4, %xmm1
+; X32-SSE2-NEXT:    psrad $31, %xmm1
+; X32-SSE2-NEXT:    psrad $16, %xmm4
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
+; X32-SSE2-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; X32-SSE2-NEXT:    movdqa %xmm2, %xmm1
+; X32-SSE2-NEXT:    psrad $31, %xmm1
+; X32-SSE2-NEXT:    psrad $16, %xmm2
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
+; X32-SSE2-NEXT:    movdqa %xmm1, %xmm3
+; X32-SSE2-NEXT:    psrad $31, %xmm3
+; X32-SSE2-NEXT:    psrad $16, %xmm1
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; X32-SSE2-NEXT:    pshuflw {{.*#+}} xmm3 = xmm0[0,2,2,3,4,5,6,7]
+; X32-SSE2-NEXT:    movdqa %xmm3, %xmm0
+; X32-SSE2-NEXT:    psrad $31, %xmm0
+; X32-SSE2-NEXT:    psrad $16, %xmm3
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
+; X32-SSE2-NEXT:    movdqa %xmm4, %xmm0
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_8i16_to_8i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxwq %xmm0, %xmm4
@@ -1027,6 +1227,13 @@ define <2 x i64> @sext_4i32_to_2i64(<4 x
 ; AVX-NEXT:    vpmovsxdq %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_4i32_to_2i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT:    psrad $31, %xmm1
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_4i32_to_2i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxdq %xmm0, %xmm0
@@ -1086,6 +1293,17 @@ define <4 x i64> @sext_4i32_to_4i64(<4 x
 ; AVX512-NEXT:    vpmovsxdq %xmm0, %ymm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_4i32_to_4i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm2
+; X32-SSE2-NEXT:    psrad $31, %xmm2
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT:    movdqa %xmm1, %xmm2
+; X32-SSE2-NEXT:    psrad $31, %xmm2
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_4i32_to_4i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxdq %xmm0, %xmm2
@@ -1176,6 +1394,25 @@ define <8 x i64> @sext_8i32_to_8i64(<8 x
 ; AVX512-NEXT:    vpmovsxdq %ymm0, %zmm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_8i32_to_8i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movdqa %xmm1, %xmm2
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm3
+; X32-SSE2-NEXT:    psrad $31, %xmm3
+; X32-SSE2-NEXT:    movdqa %xmm1, %xmm4
+; X32-SSE2-NEXT:    psrad $31, %xmm4
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X32-SSE2-NEXT:    movdqa %xmm1, %xmm3
+; X32-SSE2-NEXT:    psrad $31, %xmm3
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[2,3,0,1]
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
+; X32-SSE2-NEXT:    movdqa %xmm3, %xmm4
+; X32-SSE2-NEXT:    psrad $31, %xmm4
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_8i32_to_8i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pmovsxdq %xmm0, %xmm5
@@ -1250,6 +1487,22 @@ define <2 x i64> @load_sext_2i1_to_2i64(
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_2i1_to_2i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movzbl (%eax), %eax
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shll $30, %ecx
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
+; X32-SSE2-NEXT:    shll $31, %eax
+; X32-SSE2-NEXT:    sarl $31, %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm0
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_2i1_to_2i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -1305,6 +1558,19 @@ define <2 x i64> @load_sext_2i8_to_2i64(
 ; AVX-NEXT:    vpmovsxbq (%rdi), %xmm0
 ; AVX-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_2i8_to_2i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movzwl (%eax), %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm0
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT:    psrad $31, %xmm1
+; X32-SSE2-NEXT:    psrad $24, %xmm0
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_2i8_to_2i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -1441,6 +1707,30 @@ define <4 x i32> @load_sext_4i1_to_4i32(
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_4i1_to_4i32:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movl (%eax), %eax
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shll $28, %ecx
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shll $29, %ecx
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm1
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shll $30, %ecx
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm2
+; X32-SSE2-NEXT:    shll $31, %eax
+; X32-SSE2-NEXT:    sarl $31, %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm0
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_4i1_to_4i32:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -1494,6 +1784,15 @@ define <4 x i32> @load_sext_4i8_to_4i32(
 ; AVX-NEXT:    vpmovsxbd (%rdi), %xmm0
 ; AVX-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_4i8_to_4i32:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT:    psrad $24, %xmm0
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_4i8_to_4i32:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -1644,6 +1943,33 @@ define <4 x i64> @load_sext_4i1_to_4i64(
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512BW-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_4i1_to_4i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movzbl (%eax), %eax
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $3, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $2, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm1
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT:    movd %eax, %xmm2
+; X32-SSE2-NEXT:    shrl %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm0
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; X32-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm2
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,1,1,3]
+; X32-SSE2-NEXT:    psllq $63, %xmm0
+; X32-SSE2-NEXT:    psrad $31, %xmm0
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[2,1,3,3]
+; X32-SSE2-NEXT:    psllq $63, %xmm1
+; X32-SSE2-NEXT:    psrad $31, %xmm1
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_4i1_to_4i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -1727,6 +2053,33 @@ define <4 x i64> @load_sext_4i8_to_4i64(
 ; AVX512-NEXT:    vpmovsxbq (%rdi), %ymm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_4i8_to_4i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movsbl 1(%eax), %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm1
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT:    movsbl (%eax), %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm2
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT:    movsbl 3(%eax), %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm2
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm1
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE2-NEXT:    movsbl 2(%eax), %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm1
+; X32-SSE2-NEXT:    sarl $31, %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm3
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_4i8_to_4i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -1784,6 +2137,22 @@ define <2 x i64> @load_sext_4i8_to_4i64_
 ; AVX512-NEXT:    vzeroupper
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_4i8_to_4i64_extract:
+; X32-SSE2:       # %bb.0:
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movsbl 3(%eax), %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm1
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT:    movsbl 2(%eax), %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm0
+; X32-SSE2-NEXT:    sarl $31, %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm2
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_4i8_to_4i64_extract:
 ; X32-SSE41:       # %bb.0:
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -2004,6 +2373,49 @@ define <8 x i16> @load_sext_8i1_to_8i16(
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_8i1_to_8i16:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movsbl (%eax), %eax
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $7, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shll $25, %ecx
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm1
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shll $26, %ecx
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shll $27, %ecx
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm2
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shll $28, %ecx
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shll $29, %ecx
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm1
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shll $30, %ecx
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm3
+; X32-SSE2-NEXT:    shll $31, %eax
+; X32-SSE2-NEXT:    sarl $31, %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm0
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_8i1_to_8i16:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -2070,6 +2482,14 @@ define <8 x i16> @load_sext_8i8_to_8i16(
 ; AVX-NEXT:    vpmovsxbw (%rdi), %xmm0
 ; AVX-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_8i8_to_8i16:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    psraw $8, %xmm0
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_8i8_to_8i16:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -2163,6 +2583,55 @@ define <8 x i64> @load_sext_8i8_to_8i64(
 ; AVX512-NEXT:    vpmovsxbq (%rdi), %zmm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_8i8_to_8i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movsbl 1(%eax), %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm1
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT:    movsbl (%eax), %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm2
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT:    movsbl 3(%eax), %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm2
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm1
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE2-NEXT:    movsbl 2(%eax), %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm1
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm3
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; X32-SSE2-NEXT:    movsbl 5(%eax), %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm3
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm2
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; X32-SSE2-NEXT:    movsbl 4(%eax), %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm2
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm4
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
+; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; X32-SSE2-NEXT:    movsbl 7(%eax), %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm4
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm3
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
+; X32-SSE2-NEXT:    movsbl 6(%eax), %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm3
+; X32-SSE2-NEXT:    sarl $31, %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm5
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
+; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_8i8_to_8i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -2398,6 +2867,53 @@ define <8 x i32> @load_sext_8i1_to_8i32(
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512BW-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_8i1_to_8i32:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movzbl (%eax), %eax
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $7, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $6, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm2
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $5, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $4, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm1
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $3, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $2, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm2
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    shrl %eax
+; X32-SSE2-NEXT:    andl $1, %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm3
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT:    pslld $31, %xmm0
+; X32-SSE2-NEXT:    psrad $31, %xmm0
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X32-SSE2-NEXT:    pslld $31, %xmm1
+; X32-SSE2-NEXT:    psrad $31, %xmm1
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_8i1_to_8i32:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -2494,6 +3010,19 @@ define <8 x i32> @load_sext_8i8_to_8i32(
 ; AVX512-NEXT:    vpmovsxbd (%rdi), %ymm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_8i8_to_8i32:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT:    psrad $24, %xmm0
+; X32-SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT:    psrad $24, %xmm1
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_8i8_to_8i32:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -2909,6 +3438,97 @@ define <16 x i8> @load_sext_16i1_to_16i8
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_16i1_to_16i8:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    pushl %ebp
+; X32-SSE2-NEXT:    pushl %ebx
+; X32-SSE2-NEXT:    pushl %edi
+; X32-SSE2-NEXT:    pushl %esi
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movswl (%eax), %eax
+; X32-SSE2-NEXT:    movl %eax, %edx
+; X32-SSE2-NEXT:    movl %eax, %ebp
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    movl %eax, %esi
+; X32-SSE2-NEXT:    movl %eax, %edi
+; X32-SSE2-NEXT:    movl %eax, %ebx
+; X32-SSE2-NEXT:    shrl $15, %ebx
+; X32-SSE2-NEXT:    movd %ebx, %xmm2
+; X32-SSE2-NEXT:    movl %eax, %ebx
+; X32-SSE2-NEXT:    shll $17, %edx
+; X32-SSE2-NEXT:    sarl $31, %edx
+; X32-SSE2-NEXT:    movd %edx, %xmm0
+; X32-SSE2-NEXT:    movl %eax, %edx
+; X32-SSE2-NEXT:    shll $18, %ebp
+; X32-SSE2-NEXT:    sarl $31, %ebp
+; X32-SSE2-NEXT:    movd %ebp, %xmm1
+; X32-SSE2-NEXT:    movl %eax, %ebp
+; X32-SSE2-NEXT:    shll $19, %ecx
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm3
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shll $20, %esi
+; X32-SSE2-NEXT:    sarl $31, %esi
+; X32-SSE2-NEXT:    movd %esi, %xmm4
+; X32-SSE2-NEXT:    movl %eax, %esi
+; X32-SSE2-NEXT:    shll $21, %edi
+; X32-SSE2-NEXT:    sarl $31, %edi
+; X32-SSE2-NEXT:    movd %edi, %xmm6
+; X32-SSE2-NEXT:    movl %eax, %edi
+; X32-SSE2-NEXT:    shll $22, %ebx
+; X32-SSE2-NEXT:    sarl $31, %ebx
+; X32-SSE2-NEXT:    movd %ebx, %xmm7
+; X32-SSE2-NEXT:    movl %eax, %ebx
+; X32-SSE2-NEXT:    shll $23, %edx
+; X32-SSE2-NEXT:    sarl $31, %edx
+; X32-SSE2-NEXT:    movd %edx, %xmm5
+; X32-SSE2-NEXT:    movl %eax, %edx
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; X32-SSE2-NEXT:    shll $28, %ebp
+; X32-SSE2-NEXT:    sarl $31, %ebp
+; X32-SSE2-NEXT:    movd %ebp, %xmm2
+; X32-SSE2-NEXT:    movl %eax, %ebp
+; X32-SSE2-NEXT:    movsbl %al, %eax
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3],xmm6[4],xmm4[4],xmm6[5],xmm4[5],xmm6[6],xmm4[6],xmm6[7],xmm4[7]
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1],xmm5[2],xmm7[2],xmm5[3],xmm7[3],xmm5[4],xmm7[4],xmm5[5],xmm7[5],xmm5[6],xmm7[6],xmm5[7],xmm7[7]
+; X32-SSE2-NEXT:    shll $29, %ecx
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm1
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
+; X32-SSE2-NEXT:    shll $30, %esi
+; X32-SSE2-NEXT:    sarl $31, %esi
+; X32-SSE2-NEXT:    movd %esi, %xmm4
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
+; X32-SSE2-NEXT:    shll $31, %edi
+; X32-SSE2-NEXT:    sarl $31, %edi
+; X32-SSE2-NEXT:    movd %edi, %xmm0
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; X32-SSE2-NEXT:    shll $26, %ebx
+; X32-SSE2-NEXT:    sarl $31, %ebx
+; X32-SSE2-NEXT:    movd %ebx, %xmm2
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; X32-SSE2-NEXT:    shll $27, %edx
+; X32-SSE2-NEXT:    sarl $31, %edx
+; X32-SSE2-NEXT:    movd %edx, %xmm3
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; X32-SSE2-NEXT:    shll $25, %ebp
+; X32-SSE2-NEXT:    sarl $31, %ebp
+; X32-SSE2-NEXT:    movd %ebp, %xmm1
+; X32-SSE2-NEXT:    shrl $7, %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm2
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0]
+; X32-SSE2-NEXT:    popl %esi
+; X32-SSE2-NEXT:    popl %edi
+; X32-SSE2-NEXT:    popl %ebx
+; X32-SSE2-NEXT:    popl %ebp
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_16i1_to_16i8:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -3434,6 +4054,93 @@ define <16 x i16> @load_sext_16i1_to_16i
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512BW-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_16i1_to_16i16:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movzwl (%eax), %eax
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $15, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $14, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm1
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $13, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $12, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm2
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $11, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $10, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm3
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $9, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $8, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm1
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $7, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $6, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm2
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $5, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $4, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm3
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $3, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    shrl $2, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm2
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    andl $1, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    shrl %eax
+; X32-SSE2-NEXT:    andl $1, %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm4
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    psllw $15, %xmm0
+; X32-SSE2-NEXT:    psraw $15, %xmm0
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE2-NEXT:    psllw $15, %xmm1
+; X32-SSE2-NEXT:    psraw $15, %xmm1
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_16i1_to_16i16:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -4289,6 +4996,179 @@ define <32 x i8> @load_sext_32i1_to_32i8
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512BW-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_32i1_to_32i8:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    pushl %ebp
+; X32-SSE2-NEXT:    pushl %ebx
+; X32-SSE2-NEXT:    pushl %edi
+; X32-SSE2-NEXT:    pushl %esi
+; X32-SSE2-NEXT:    subl $28, %esp
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movswl (%eax), %edx
+; X32-SSE2-NEXT:    movl %edx, %ebp
+; X32-SSE2-NEXT:    movl %edx, %esi
+; X32-SSE2-NEXT:    movl %edx, %edi
+; X32-SSE2-NEXT:    movl %edx, %ebx
+; X32-SSE2-NEXT:    movl %edx, %ecx
+; X32-SSE2-NEXT:    shrl $15, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    movl %edx, %ecx
+; X32-SSE2-NEXT:    shll $17, %ebp
+; X32-SSE2-NEXT:    sarl $31, %ebp
+; X32-SSE2-NEXT:    movd %ebp, %xmm4
+; X32-SSE2-NEXT:    movl %edx, %ebp
+; X32-SSE2-NEXT:    shll $18, %esi
+; X32-SSE2-NEXT:    sarl $31, %esi
+; X32-SSE2-NEXT:    movd %esi, %xmm1
+; X32-SSE2-NEXT:    movl %edx, %esi
+; X32-SSE2-NEXT:    shll $19, %edi
+; X32-SSE2-NEXT:    sarl $31, %edi
+; X32-SSE2-NEXT:    movd %edi, %xmm2
+; X32-SSE2-NEXT:    movl %edx, %edi
+; X32-SSE2-NEXT:    shll $20, %ebx
+; X32-SSE2-NEXT:    sarl $31, %ebx
+; X32-SSE2-NEXT:    movd %ebx, %xmm5
+; X32-SSE2-NEXT:    movl %edx, %ebx
+; X32-SSE2-NEXT:    shll $21, %ecx
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm6
+; X32-SSE2-NEXT:    movl %edx, %ecx
+; X32-SSE2-NEXT:    shll $22, %ebp
+; X32-SSE2-NEXT:    sarl $31, %ebp
+; X32-SSE2-NEXT:    movd %ebp, %xmm7
+; X32-SSE2-NEXT:    movl %edx, %ebp
+; X32-SSE2-NEXT:    shll $23, %esi
+; X32-SSE2-NEXT:    sarl $31, %esi
+; X32-SSE2-NEXT:    movd %esi, %xmm3
+; X32-SSE2-NEXT:    movl %edx, %esi
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7]
+; X32-SSE2-NEXT:    shll $28, %edi
+; X32-SSE2-NEXT:    sarl $31, %edi
+; X32-SSE2-NEXT:    movd %edi, %xmm0
+; X32-SSE2-NEXT:    movl %edx, %edi
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; X32-SSE2-NEXT:    shll $29, %ebx
+; X32-SSE2-NEXT:    sarl $31, %ebx
+; X32-SSE2-NEXT:    movd %ebx, %xmm1
+; X32-SSE2-NEXT:    movl %edx, %ebx
+; X32-SSE2-NEXT:    movsbl %dl, %edx
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm7[0],xmm3[1],xmm7[1],xmm3[2],xmm7[2],xmm3[3],xmm7[3],xmm3[4],xmm7[4],xmm3[5],xmm7[5],xmm3[6],xmm7[6],xmm3[7],xmm7[7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm6[0],xmm3[1],xmm6[1],xmm3[2],xmm6[2],xmm3[3],xmm6[3]
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE2-NEXT:    shll $30, %ecx
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm2
+; X32-SSE2-NEXT:    shll $31, %ebp
+; X32-SSE2-NEXT:    sarl $31, %ebp
+; X32-SSE2-NEXT:    movd %ebp, %xmm0
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; X32-SSE2-NEXT:    shll $26, %esi
+; X32-SSE2-NEXT:    sarl $31, %esi
+; X32-SSE2-NEXT:    movd %esi, %xmm7
+; X32-SSE2-NEXT:    shll $27, %edi
+; X32-SSE2-NEXT:    sarl $31, %edi
+; X32-SSE2-NEXT:    movd %edi, %xmm2
+; X32-SSE2-NEXT:    shll $25, %ebx
+; X32-SSE2-NEXT:    sarl $31, %ebx
+; X32-SSE2-NEXT:    movd %ebx, %xmm6
+; X32-SSE2-NEXT:    shrl $7, %edx
+; X32-SSE2-NEXT:    movd %edx, %xmm5
+; X32-SSE2-NEXT:    movswl 2(%eax), %eax
+; X32-SSE2-NEXT:    movl %eax, %edx
+; X32-SSE2-NEXT:    movl %eax, %ebp
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    movl %eax, %esi
+; X32-SSE2-NEXT:    movl %eax, %edi
+; X32-SSE2-NEXT:    movl %eax, %ebx
+; X32-SSE2-NEXT:    shrl $15, %ebx
+; X32-SSE2-NEXT:    movd %ebx, %xmm4
+; X32-SSE2-NEXT:    movdqu %xmm4, (%esp) # 16-byte Spill
+; X32-SSE2-NEXT:    movl %eax, %ebx
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; X32-SSE2-NEXT:    shll $17, %edx
+; X32-SSE2-NEXT:    sarl $31, %edx
+; X32-SSE2-NEXT:    movd %edx, %xmm4
+; X32-SSE2-NEXT:    movl %eax, %edx
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm7[0],xmm2[1],xmm7[1],xmm2[2],xmm7[2],xmm2[3],xmm7[3],xmm2[4],xmm7[4],xmm2[5],xmm7[5],xmm2[6],xmm7[6],xmm2[7],xmm7[7]
+; X32-SSE2-NEXT:    shll $18, %ebp
+; X32-SSE2-NEXT:    sarl $31, %ebp
+; X32-SSE2-NEXT:    movd %ebp, %xmm7
+; X32-SSE2-NEXT:    movl %eax, %ebp
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
+; X32-SSE2-NEXT:    shll $19, %ecx
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm5
+; X32-SSE2-NEXT:    movl %eax, %ecx
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3]
+; X32-SSE2-NEXT:    shll $20, %esi
+; X32-SSE2-NEXT:    sarl $31, %esi
+; X32-SSE2-NEXT:    movd %esi, %xmm6
+; X32-SSE2-NEXT:    movl %eax, %esi
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT:    shll $21, %edi
+; X32-SSE2-NEXT:    sarl $31, %edi
+; X32-SSE2-NEXT:    movd %edi, %xmm1
+; X32-SSE2-NEXT:    movl %eax, %edi
+; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; X32-SSE2-NEXT:    shll $22, %ebx
+; X32-SSE2-NEXT:    sarl $31, %ebx
+; X32-SSE2-NEXT:    movd %ebx, %xmm3
+; X32-SSE2-NEXT:    movl %eax, %ebx
+; X32-SSE2-NEXT:    movdqu (%esp), %xmm2 # 16-byte Reload
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
+; X32-SSE2-NEXT:    shll $23, %edx
+; X32-SSE2-NEXT:    sarl $31, %edx
+; X32-SSE2-NEXT:    movd %edx, %xmm2
+; X32-SSE2-NEXT:    movl %eax, %edx
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1],xmm5[2],xmm7[2],xmm5[3],xmm7[3],xmm5[4],xmm7[4],xmm5[5],xmm7[5],xmm5[6],xmm7[6],xmm5[7],xmm7[7]
+; X32-SSE2-NEXT:    shll $28, %ebp
+; X32-SSE2-NEXT:    sarl $31, %ebp
+; X32-SSE2-NEXT:    movd %ebp, %xmm7
+; X32-SSE2-NEXT:    movl %eax, %ebp
+; X32-SSE2-NEXT:    movsbl %al, %eax
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
+; X32-SSE2-NEXT:    shll $29, %ecx
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm3
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; X32-SSE2-NEXT:    shll $30, %esi
+; X32-SSE2-NEXT:    sarl $31, %esi
+; X32-SSE2-NEXT:    movd %esi, %xmm4
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
+; X32-SSE2-NEXT:    shll $31, %edi
+; X32-SSE2-NEXT:    sarl $31, %edi
+; X32-SSE2-NEXT:    movd %edi, %xmm1
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm7[0],xmm3[1],xmm7[1],xmm3[2],xmm7[2],xmm3[3],xmm7[3],xmm3[4],xmm7[4],xmm3[5],xmm7[5],xmm3[6],xmm7[6],xmm3[7],xmm7[7]
+; X32-SSE2-NEXT:    shll $26, %ebx
+; X32-SSE2-NEXT:    sarl $31, %ebx
+; X32-SSE2-NEXT:    movd %ebx, %xmm5
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; X32-SSE2-NEXT:    shll $27, %edx
+; X32-SSE2-NEXT:    sarl $31, %edx
+; X32-SSE2-NEXT:    movd %edx, %xmm3
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3],xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7]
+; X32-SSE2-NEXT:    shll $25, %ebp
+; X32-SSE2-NEXT:    sarl $31, %ebp
+; X32-SSE2-NEXT:    movd %ebp, %xmm4
+; X32-SSE2-NEXT:    shrl $7, %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm5
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; X32-SSE2-NEXT:    addl $28, %esp
+; X32-SSE2-NEXT:    popl %esi
+; X32-SSE2-NEXT:    popl %edi
+; X32-SSE2-NEXT:    popl %ebx
+; X32-SSE2-NEXT:    popl %ebp
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_32i1_to_32i8:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pushl %esi
@@ -4469,6 +5349,17 @@ define <16 x i16> @load_sext_16i8_to_16i
 ; AVX512-NEXT:    vpmovsxbw (%rdi), %ymm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_16i8_to_16i16:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; X32-SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    psraw $8, %xmm0
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    psraw $8, %xmm1
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_16i8_to_16i16:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -4512,6 +5403,17 @@ define <2 x i64> @load_sext_2i16_to_2i64
 ; AVX-NEXT:    vpmovsxwq (%rdi), %xmm0
 ; AVX-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_2i16_to_2i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT:    psrad $31, %xmm1
+; X32-SSE2-NEXT:    psrad $16, %xmm0
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_2i16_to_2i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -4548,6 +5450,14 @@ define <4 x i32> @load_sext_4i16_to_4i32
 ; AVX-NEXT:    vpmovsxwd (%rdi), %xmm0
 ; AVX-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_4i16_to_4i32:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT:    psrad $16, %xmm0
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_4i16_to_4i32:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -4613,6 +5523,33 @@ define <4 x i64> @load_sext_4i16_to_4i64
 ; AVX512-NEXT:    vpmovsxwq (%rdi), %ymm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_4i16_to_4i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movswl 2(%eax), %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm1
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT:    movswl (%eax), %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm0
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm2
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT:    movswl 6(%eax), %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm2
+; X32-SSE2-NEXT:    sarl $31, %ecx
+; X32-SSE2-NEXT:    movd %ecx, %xmm1
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE2-NEXT:    movswl 4(%eax), %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm1
+; X32-SSE2-NEXT:    sarl $31, %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm3
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; X32-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_4i16_to_4i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -4669,6 +5606,17 @@ define <8 x i32> @load_sext_8i16_to_8i32
 ; AVX512-NEXT:    vpmovsxwd (%rdi), %ymm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_8i16_to_8i32:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; X32-SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT:    psrad $16, %xmm0
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT:    psrad $16, %xmm1
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_8i16_to_8i32:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -4708,6 +5656,15 @@ define <2 x i64> @load_sext_2i32_to_2i64
 ; AVX-NEXT:    vpmovsxdq (%rdi), %xmm0
 ; AVX-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_2i32_to_2i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT:    psrad $31, %xmm1
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_2i32_to_2i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -4767,6 +5724,19 @@ define <4 x i64> @load_sext_4i32_to_4i64
 ; AVX512-NEXT:    vpmovsxdq (%rdi), %ymm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: load_sext_4i32_to_4i64:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movdqa (%eax), %xmm0
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm2
+; X32-SSE2-NEXT:    psrad $31, %xmm2
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT:    movdqa %xmm1, %xmm2
+; X32-SSE2-NEXT:    psrad $31, %xmm2
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: load_sext_4i32_to_4i64:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -4806,6 +5776,17 @@ define i32 @sext_2i8_to_i32(<16 x i8> %A
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_2i8_to_i32:
+; X32-SSE2:       # %bb.0: # %entry
+; X32-SSE2-NEXT:    pushl %eax
+; X32-SSE2-NEXT:    .cfi_def_cfa_offset 8
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    psraw $8, %xmm0
+; X32-SSE2-NEXT:    movd %xmm0, %eax
+; X32-SSE2-NEXT:    popl %ecx
+; X32-SSE2-NEXT:    .cfi_def_cfa_offset 4
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_2i8_to_i32:
 ; X32-SSE41:       # %bb.0: # %entry
 ; X32-SSE41-NEXT:    pushl %eax
@@ -4883,6 +5864,19 @@ define <4 x i64> @sext_4i1_to_4i64(<4 x
 ; AVX512-NEXT:    vpmovsxdq %xmm0, %ymm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_4i1_to_4i64:
+; X32-SSE2:       # %bb.0:
+; X32-SSE2-NEXT:    pslld $31, %xmm0
+; X32-SSE2-NEXT:    psrad $31, %xmm0
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm2
+; X32-SSE2-NEXT:    psrad $31, %xmm2
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT:    movdqa %xmm1, %xmm2
+; X32-SSE2-NEXT:    psrad $31, %xmm2
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_4i1_to_4i64:
 ; X32-SSE41:       # %bb.0:
 ; X32-SSE41-NEXT:    pslld $31, %xmm0
@@ -4957,6 +5951,19 @@ define <4 x i64> @sext_4i8_to_4i64(<4 x
 ; AVX512-NEXT:    vpmovsxdq %xmm0, %ymm0
 ; AVX512-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_4i8_to_4i64:
+; X32-SSE2:       # %bb.0:
+; X32-SSE2-NEXT:    pslld $24, %xmm0
+; X32-SSE2-NEXT:    psrad $24, %xmm0
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm2
+; X32-SSE2-NEXT:    psrad $31, %xmm2
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT:    movdqa %xmm1, %xmm2
+; X32-SSE2-NEXT:    psrad $31, %xmm2
+; X32-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_4i8_to_4i64:
 ; X32-SSE41:       # %bb.0:
 ; X32-SSE41-NEXT:    pslld $24, %xmm0
@@ -5023,6 +6030,24 @@ define <32 x i8> @sext_32xi1_to_32xi8(<3
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 ; AVX512BW-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_32xi1_to_32xi8:
+; X32-SSE2:       # %bb.0:
+; X32-SSE2-NEXT:    pushl %ebp
+; X32-SSE2-NEXT:    movl %esp, %ebp
+; X32-SSE2-NEXT:    andl $-16, %esp
+; X32-SSE2-NEXT:    subl $16, %esp
+; X32-SSE2-NEXT:    movdqa 8(%ebp), %xmm3
+; X32-SSE2-NEXT:    pcmpeqw 40(%ebp), %xmm1
+; X32-SSE2-NEXT:    pcmpeqw 24(%ebp), %xmm0
+; X32-SSE2-NEXT:    packsswb %xmm1, %xmm0
+; X32-SSE2-NEXT:    pcmpeqw 72(%ebp), %xmm3
+; X32-SSE2-NEXT:    pcmpeqw 56(%ebp), %xmm2
+; X32-SSE2-NEXT:    packsswb %xmm3, %xmm2
+; X32-SSE2-NEXT:    movdqa %xmm2, %xmm1
+; X32-SSE2-NEXT:    movl %ebp, %esp
+; X32-SSE2-NEXT:    popl %ebp
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_32xi1_to_32xi8:
 ; X32-SSE41:       # %bb.0:
 ; X32-SSE41-NEXT:    pushl %ebp
@@ -5080,6 +6105,18 @@ define <2 x i32> @sext_2i8_to_2i32(<2 x
 ; AVX-NEXT:    vpaddq %xmm0, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
+; X32-SSE2-LABEL: sext_2i8_to_2i32:
+; X32-SSE2:       # %bb.0:
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movzwl (%eax), %eax
+; X32-SSE2-NEXT:    movd %eax, %xmm0
+; X32-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT:    psrad $24, %xmm0
+; X32-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
+; X32-SSE2-NEXT:    paddq %xmm0, %xmm0
+; X32-SSE2-NEXT:    retl
+;
 ; X32-SSE41-LABEL: sext_2i8_to_2i32:
 ; X32-SSE41:       # %bb.0:
 ; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax




More information about the llvm-commits mailing list