[llvm] r347175 - [X86] Add a 32-bit command line with only sse2 to vector-sext.ll and vector-sext.ll to show some of the scalarized load sequences without 64-bit scalar support.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 18 13:28:47 PST 2018
Author: ctopper
Date: Sun Nov 18 13:28:47 2018
New Revision: 347175
URL: http://llvm.org/viewvc/llvm-project?rev=347175&view=rev
Log:
[X86] Add a 32-bit command line with only sse2 to vector-sext.ll and vector-sext.ll to show some of the scalarized load sequences without 64-bit scalar support.
Some of these sequeces look pretty bad since we have to copy the sign bit from a 32 bit register to a 64 bit register to finish a sign extend.
Modified:
llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll
llvm/trunk/test/CodeGen/X86/vector-sext.ll
Modified: llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll?rev=347175&r1=347174&r2=347175&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll Sun Nov 18 13:28:47 2018
@@ -7,7 +7,8 @@
; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW
;
-; Just one 32-bit run to make sure we do reasonable things there.
+; Just two 32-bit runs to make sure we do reasonable things there.
+; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32-SSE2
; RUN: llc < %s -x86-experimental-vector-widening-legalization -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32-SSE41
define <8 x i16> @sext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp {
@@ -33,6 +34,12 @@ define <8 x i16> @sext_16i8_to_8i16(<16
; AVX-NEXT: vpmovsxbw %xmm0, %xmm0
; AVX-NEXT: retq
;
+; X32-SSE2-LABEL: sext_16i8_to_8i16:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: psraw $8, %xmm0
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_16i8_to_8i16:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm0
@@ -88,6 +95,15 @@ define <16 x i16> @sext_16i8_to_16i16(<1
; AVX512-NEXT: vpmovsxbw %xmm0, %ymm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: sext_16i8_to_16i16:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: psraw $8, %xmm0
+; X32-SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; X32-SSE2-NEXT: psraw $8, %xmm1
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_16i8_to_16i16:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm2
@@ -178,6 +194,21 @@ define <32 x i16> @sext_32i8_to_32i16(<3
; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0
; AVX512BW-NEXT: retq
;
+; X32-SSE2-LABEL: sext_32i8_to_32i16:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movdqa %xmm1, %xmm3
+; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: psraw $8, %xmm0
+; X32-SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; X32-SSE2-NEXT: psraw $8, %xmm1
+; X32-SSE2-NEXT: movdqa %xmm3, %xmm2
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
+; X32-SSE2-NEXT: psraw $8, %xmm2
+; X32-SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; X32-SSE2-NEXT: psraw $8, %xmm3
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_32i8_to_32i16:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm5
@@ -219,6 +250,13 @@ define <4 x i32> @sext_16i8_to_4i32(<16
; AVX-NEXT: vpmovsxbd %xmm0, %xmm0
; AVX-NEXT: retq
;
+; X32-SSE2-LABEL: sext_16i8_to_4i32:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT: psrad $24, %xmm0
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_16i8_to_4i32:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxbd %xmm0, %xmm0
@@ -276,6 +314,16 @@ define <8 x i32> @sext_16i8_to_8i32(<16
; AVX512-NEXT: vpmovsxbd %xmm0, %ymm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: sext_16i8_to_8i32:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT: psrad $24, %xmm2
+; X32-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE2-NEXT: psrad $24, %xmm1
+; X32-SSE2-NEXT: movdqa %xmm2, %xmm0
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_16i8_to_8i32:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxbd %xmm0, %xmm2
@@ -361,6 +409,22 @@ define <16 x i32> @sext_16i8_to_16i32(<1
; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: sext_16i8_to_16i32:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3]
+; X32-SSE2-NEXT: psrad $24, %xmm4
+; X32-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: psrad $24, %xmm1
+; X32-SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT: psrad $24, %xmm2
+; X32-SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; X32-SSE2-NEXT: psrad $24, %xmm3
+; X32-SSE2-NEXT: movdqa %xmm4, %xmm0
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_16i8_to_16i32:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxbd %xmm0, %xmm4
@@ -408,6 +472,16 @@ define <2 x i64> @sext_16i8_to_2i64(<16
; AVX-NEXT: vpmovsxbq %xmm0, %xmm0
; AVX-NEXT: retq
;
+; X32-SSE2-LABEL: sext_16i8_to_2i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: psrad $24, %xmm0
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_16i8_to_2i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxbq %xmm0, %xmm0
@@ -479,6 +553,23 @@ define <4 x i64> @sext_16i8_to_4i64(<16
; AVX512-NEXT: vpmovsxbq %xmm0, %ymm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: sext_16i8_to_4i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT: movdqa %xmm2, %xmm1
+; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: psrad $24, %xmm2
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X32-SSE2-NEXT: movdqa %xmm1, %xmm0
+; X32-SSE2-NEXT: psrad $31, %xmm0
+; X32-SSE2-NEXT: psrad $24, %xmm1
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT: movdqa %xmm2, %xmm0
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_16i8_to_4i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxbq %xmm0, %xmm2
@@ -589,6 +680,34 @@ define <8 x i64> @sext_16i8_to_8i64(<16
; AVX512-NEXT: vpmovsxbq %xmm0, %zmm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: sext_16i8_to_8i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
+; X32-SSE2-NEXT: movdqa %xmm4, %xmm1
+; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: psrad $24, %xmm4
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
+; X32-SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; X32-SSE2-NEXT: movdqa %xmm2, %xmm1
+; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: psrad $24, %xmm2
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT: movdqa %xmm1, %xmm3
+; X32-SSE2-NEXT: psrad $31, %xmm3
+; X32-SSE2-NEXT: psrad $24, %xmm1
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
+; X32-SSE2-NEXT: movdqa %xmm3, %xmm0
+; X32-SSE2-NEXT: psrad $31, %xmm0
+; X32-SSE2-NEXT: psrad $24, %xmm3
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
+; X32-SSE2-NEXT: movdqa %xmm4, %xmm0
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_16i8_to_8i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxbq %xmm0, %xmm4
@@ -630,6 +749,12 @@ define <4 x i32> @sext_8i16_to_4i32(<8 x
; AVX-NEXT: vpmovsxwd %xmm0, %xmm0
; AVX-NEXT: retq
;
+; X32-SSE2-LABEL: sext_8i16_to_4i32:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT: psrad $16, %xmm0
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_8i16_to_4i32:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxwd %xmm0, %xmm0
@@ -685,6 +810,15 @@ define <8 x i32> @sext_8i16_to_8i32(<8 x
; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: sext_8i16_to_8i32:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT: psrad $16, %xmm2
+; X32-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE2-NEXT: psrad $16, %xmm1
+; X32-SSE2-NEXT: movdqa %xmm2, %xmm0
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_8i16_to_8i32:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxwd %xmm0, %xmm2
@@ -765,6 +899,20 @@ define <16 x i32> @sext_16i16_to_16i32(<
; AVX512-NEXT: vpmovsxwd %ymm0, %zmm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: sext_16i16_to_16i32:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
+; X32-SSE2-NEXT: psrad $16, %xmm4
+; X32-SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
+; X32-SSE2-NEXT: psrad $16, %xmm5
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; X32-SSE2-NEXT: psrad $16, %xmm2
+; X32-SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
+; X32-SSE2-NEXT: psrad $16, %xmm3
+; X32-SSE2-NEXT: movdqa %xmm4, %xmm0
+; X32-SSE2-NEXT: movdqa %xmm5, %xmm1
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_16i16_to_16i32:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxwd %xmm0, %xmm5
@@ -810,6 +958,15 @@ define <2 x i64> @sext_8i16_to_2i64(<8 x
; AVX-NEXT: vpmovsxwq %xmm0, %xmm0
; AVX-NEXT: retq
;
+; X32-SSE2-LABEL: sext_8i16_to_2i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: psrad $16, %xmm0
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_8i16_to_2i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxwq %xmm0, %xmm0
@@ -877,6 +1034,21 @@ define <4 x i64> @sext_8i16_to_4i64(<8 x
; AVX512-NEXT: vpmovsxwq %xmm0, %ymm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: sext_8i16_to_4i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT: movdqa %xmm2, %xmm1
+; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: psrad $16, %xmm2
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
+; X32-SSE2-NEXT: movdqa %xmm1, %xmm0
+; X32-SSE2-NEXT: psrad $31, %xmm0
+; X32-SSE2-NEXT: psrad $16, %xmm1
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT: movdqa %xmm2, %xmm0
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_8i16_to_4i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxwq %xmm0, %xmm2
@@ -982,6 +1154,32 @@ define <8 x i64> @sext_8i16_to_8i64(<8 x
; AVX512-NEXT: vpmovsxwq %xmm0, %zmm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: sext_8i16_to_8i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
+; X32-SSE2-NEXT: movdqa %xmm4, %xmm1
+; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: psrad $16, %xmm4
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
+; X32-SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; X32-SSE2-NEXT: movdqa %xmm2, %xmm1
+; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: psrad $16, %xmm2
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
+; X32-SSE2-NEXT: movdqa %xmm1, %xmm3
+; X32-SSE2-NEXT: psrad $31, %xmm3
+; X32-SSE2-NEXT: psrad $16, %xmm1
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; X32-SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
+; X32-SSE2-NEXT: movdqa %xmm3, %xmm0
+; X32-SSE2-NEXT: psrad $31, %xmm0
+; X32-SSE2-NEXT: psrad $16, %xmm3
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
+; X32-SSE2-NEXT: movdqa %xmm4, %xmm0
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_8i16_to_8i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxwq %xmm0, %xmm4
@@ -1023,6 +1221,13 @@ define <2 x i64> @sext_4i32_to_2i64(<4 x
; AVX-NEXT: vpmovsxdq %xmm0, %xmm0
; AVX-NEXT: retq
;
+; X32-SSE2-LABEL: sext_4i32_to_2i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_4i32_to_2i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm0
@@ -1082,6 +1287,17 @@ define <4 x i64> @sext_4i32_to_4i64(<4 x
; AVX512-NEXT: vpmovsxdq %xmm0, %ymm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: sext_4i32_to_4i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movdqa %xmm0, %xmm2
+; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT: movdqa %xmm1, %xmm2
+; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_4i32_to_4i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm2
@@ -1172,6 +1388,25 @@ define <8 x i64> @sext_8i32_to_8i64(<8 x
; AVX512-NEXT: vpmovsxdq %ymm0, %zmm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: sext_8i32_to_8i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movdqa %xmm1, %xmm2
+; X32-SSE2-NEXT: movdqa %xmm0, %xmm3
+; X32-SSE2-NEXT: psrad $31, %xmm3
+; X32-SSE2-NEXT: movdqa %xmm1, %xmm4
+; X32-SSE2-NEXT: psrad $31, %xmm4
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X32-SSE2-NEXT: movdqa %xmm1, %xmm3
+; X32-SSE2-NEXT: psrad $31, %xmm3
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,0,1]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
+; X32-SSE2-NEXT: movdqa %xmm3, %xmm4
+; X32-SSE2-NEXT: psrad $31, %xmm4
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_8i32_to_8i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm5
@@ -1246,6 +1481,22 @@ define <2 x i64> @load_sext_2i1_to_2i64(
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_2i1_to_2i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movzbl (%eax), %eax
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shll $30, %ecx
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
+; X32-SSE2-NEXT: shll $31, %eax
+; X32-SSE2-NEXT: sarl $31, %eax
+; X32-SSE2-NEXT: movd %eax, %xmm0
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_2i1_to_2i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -1301,6 +1552,19 @@ define <2 x i64> @load_sext_2i8_to_2i64(
; AVX-NEXT: vpmovsxbq (%rdi), %xmm0
; AVX-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_2i8_to_2i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movzwl (%eax), %eax
+; X32-SSE2-NEXT: movd %eax, %xmm0
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: psrad $24, %xmm0
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_2i8_to_2i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -1437,6 +1701,30 @@ define <4 x i32> @load_sext_4i1_to_4i32(
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_4i1_to_4i32:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movl (%eax), %eax
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shll $28, %ecx
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shll $29, %ecx
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm1
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shll $30, %ecx
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm2
+; X32-SSE2-NEXT: shll $31, %eax
+; X32-SSE2-NEXT: sarl $31, %eax
+; X32-SSE2-NEXT: movd %eax, %xmm0
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_4i1_to_4i32:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -1490,6 +1778,15 @@ define <4 x i32> @load_sext_4i8_to_4i32(
; AVX-NEXT: vpmovsxbd (%rdi), %xmm0
; AVX-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_4i8_to_4i32:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT: psrad $24, %xmm0
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_4i8_to_4i32:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -1640,6 +1937,33 @@ define <4 x i64> @load_sext_4i1_to_4i64(
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512BW-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_4i1_to_4i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movzbl (%eax), %eax
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $3, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $2, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm1
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT: movd %eax, %xmm2
+; X32-SSE2-NEXT: shrl %eax
+; X32-SSE2-NEXT: movd %eax, %xmm0
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; X32-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm2
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,1,3]
+; X32-SSE2-NEXT: psllq $63, %xmm0
+; X32-SSE2-NEXT: psrad $31, %xmm0
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,1,3,3]
+; X32-SSE2-NEXT: psllq $63, %xmm1
+; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_4i1_to_4i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -1723,6 +2047,33 @@ define <4 x i64> @load_sext_4i8_to_4i64(
; AVX512-NEXT: vpmovsxbq (%rdi), %ymm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_4i8_to_4i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movsbl 1(%eax), %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm1
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT: movsbl (%eax), %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm2
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT: movsbl 3(%eax), %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm2
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm1
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE2-NEXT: movsbl 2(%eax), %eax
+; X32-SSE2-NEXT: movd %eax, %xmm1
+; X32-SSE2-NEXT: sarl $31, %eax
+; X32-SSE2-NEXT: movd %eax, %xmm3
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_4i8_to_4i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -1780,6 +2131,22 @@ define <2 x i64> @load_sext_4i8_to_4i64_
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_4i8_to_4i64_extract:
+; X32-SSE2: # %bb.0:
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movsbl 3(%eax), %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm1
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT: movsbl 2(%eax), %eax
+; X32-SSE2-NEXT: movd %eax, %xmm0
+; X32-SSE2-NEXT: sarl $31, %eax
+; X32-SSE2-NEXT: movd %eax, %xmm2
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_4i8_to_4i64_extract:
; X32-SSE41: # %bb.0:
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -2000,6 +2367,49 @@ define <8 x i16> @load_sext_8i1_to_8i16(
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_8i1_to_8i16:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movsbl (%eax), %eax
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $7, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shll $25, %ecx
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm1
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shll $26, %ecx
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shll $27, %ecx
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm2
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shll $28, %ecx
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shll $29, %ecx
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm1
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shll $30, %ecx
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm3
+; X32-SSE2-NEXT: shll $31, %eax
+; X32-SSE2-NEXT: sarl $31, %eax
+; X32-SSE2-NEXT: movd %eax, %xmm0
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_8i1_to_8i16:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -2066,6 +2476,14 @@ define <8 x i16> @load_sext_8i8_to_8i16(
; AVX-NEXT: vpmovsxbw (%rdi), %xmm0
; AVX-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_8i8_to_8i16:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: psraw $8, %xmm0
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_8i8_to_8i16:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -2159,6 +2577,55 @@ define <8 x i64> @load_sext_8i8_to_8i64(
; AVX512-NEXT: vpmovsxbq (%rdi), %zmm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_8i8_to_8i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movsbl 1(%eax), %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm1
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT: movsbl (%eax), %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm2
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT: movsbl 3(%eax), %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm2
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm1
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE2-NEXT: movsbl 2(%eax), %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm1
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm3
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; X32-SSE2-NEXT: movsbl 5(%eax), %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm3
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm2
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; X32-SSE2-NEXT: movsbl 4(%eax), %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm2
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm4
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
+; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; X32-SSE2-NEXT: movsbl 7(%eax), %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm4
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm3
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
+; X32-SSE2-NEXT: movsbl 6(%eax), %eax
+; X32-SSE2-NEXT: movd %eax, %xmm3
+; X32-SSE2-NEXT: sarl $31, %eax
+; X32-SSE2-NEXT: movd %eax, %xmm5
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
+; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_8i8_to_8i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -2394,6 +2861,53 @@ define <8 x i32> @load_sext_8i1_to_8i32(
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512BW-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_8i1_to_8i32:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movzbl (%eax), %eax
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $7, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $6, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm2
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $5, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $4, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm1
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $3, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $2, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm2
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: shrl %eax
+; X32-SSE2-NEXT: andl $1, %eax
+; X32-SSE2-NEXT: movd %eax, %xmm3
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT: pslld $31, %xmm0
+; X32-SSE2-NEXT: psrad $31, %xmm0
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X32-SSE2-NEXT: pslld $31, %xmm1
+; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_8i1_to_8i32:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -2490,6 +3004,19 @@ define <8 x i32> @load_sext_8i8_to_8i32(
; AVX512-NEXT: vpmovsxbd (%rdi), %ymm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_8i8_to_8i32:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT: psrad $24, %xmm0
+; X32-SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT: psrad $24, %xmm1
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_8i8_to_8i32:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -2905,6 +3432,97 @@ define <16 x i8> @load_sext_16i1_to_16i8
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_16i1_to_16i8:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: pushl %ebp
+; X32-SSE2-NEXT: pushl %ebx
+; X32-SSE2-NEXT: pushl %edi
+; X32-SSE2-NEXT: pushl %esi
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movswl (%eax), %eax
+; X32-SSE2-NEXT: movl %eax, %edx
+; X32-SSE2-NEXT: movl %eax, %ebp
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: movl %eax, %esi
+; X32-SSE2-NEXT: movl %eax, %edi
+; X32-SSE2-NEXT: movl %eax, %ebx
+; X32-SSE2-NEXT: shrl $15, %ebx
+; X32-SSE2-NEXT: movd %ebx, %xmm2
+; X32-SSE2-NEXT: movl %eax, %ebx
+; X32-SSE2-NEXT: shll $17, %edx
+; X32-SSE2-NEXT: sarl $31, %edx
+; X32-SSE2-NEXT: movd %edx, %xmm0
+; X32-SSE2-NEXT: movl %eax, %edx
+; X32-SSE2-NEXT: shll $18, %ebp
+; X32-SSE2-NEXT: sarl $31, %ebp
+; X32-SSE2-NEXT: movd %ebp, %xmm1
+; X32-SSE2-NEXT: movl %eax, %ebp
+; X32-SSE2-NEXT: shll $19, %ecx
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm3
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shll $20, %esi
+; X32-SSE2-NEXT: sarl $31, %esi
+; X32-SSE2-NEXT: movd %esi, %xmm4
+; X32-SSE2-NEXT: movl %eax, %esi
+; X32-SSE2-NEXT: shll $21, %edi
+; X32-SSE2-NEXT: sarl $31, %edi
+; X32-SSE2-NEXT: movd %edi, %xmm6
+; X32-SSE2-NEXT: movl %eax, %edi
+; X32-SSE2-NEXT: shll $22, %ebx
+; X32-SSE2-NEXT: sarl $31, %ebx
+; X32-SSE2-NEXT: movd %ebx, %xmm7
+; X32-SSE2-NEXT: movl %eax, %ebx
+; X32-SSE2-NEXT: shll $23, %edx
+; X32-SSE2-NEXT: sarl $31, %edx
+; X32-SSE2-NEXT: movd %edx, %xmm5
+; X32-SSE2-NEXT: movl %eax, %edx
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; X32-SSE2-NEXT: shll $28, %ebp
+; X32-SSE2-NEXT: sarl $31, %ebp
+; X32-SSE2-NEXT: movd %ebp, %xmm2
+; X32-SSE2-NEXT: movl %eax, %ebp
+; X32-SSE2-NEXT: movsbl %al, %eax
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3],xmm6[4],xmm4[4],xmm6[5],xmm4[5],xmm6[6],xmm4[6],xmm6[7],xmm4[7]
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1],xmm5[2],xmm7[2],xmm5[3],xmm7[3],xmm5[4],xmm7[4],xmm5[5],xmm7[5],xmm5[6],xmm7[6],xmm5[7],xmm7[7]
+; X32-SSE2-NEXT: shll $29, %ecx
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm1
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
+; X32-SSE2-NEXT: shll $30, %esi
+; X32-SSE2-NEXT: sarl $31, %esi
+; X32-SSE2-NEXT: movd %esi, %xmm4
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
+; X32-SSE2-NEXT: shll $31, %edi
+; X32-SSE2-NEXT: sarl $31, %edi
+; X32-SSE2-NEXT: movd %edi, %xmm0
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; X32-SSE2-NEXT: shll $26, %ebx
+; X32-SSE2-NEXT: sarl $31, %ebx
+; X32-SSE2-NEXT: movd %ebx, %xmm2
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; X32-SSE2-NEXT: shll $27, %edx
+; X32-SSE2-NEXT: sarl $31, %edx
+; X32-SSE2-NEXT: movd %edx, %xmm3
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; X32-SSE2-NEXT: shll $25, %ebp
+; X32-SSE2-NEXT: sarl $31, %ebp
+; X32-SSE2-NEXT: movd %ebp, %xmm1
+; X32-SSE2-NEXT: shrl $7, %eax
+; X32-SSE2-NEXT: movd %eax, %xmm2
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0]
+; X32-SSE2-NEXT: popl %esi
+; X32-SSE2-NEXT: popl %edi
+; X32-SSE2-NEXT: popl %ebx
+; X32-SSE2-NEXT: popl %ebp
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_16i1_to_16i8:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -3430,6 +4048,93 @@ define <16 x i16> @load_sext_16i1_to_16i
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512BW-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_16i1_to_16i16:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movzwl (%eax), %eax
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $15, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $14, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm1
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $13, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $12, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm2
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $11, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $10, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm3
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $9, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $8, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm1
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $7, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $6, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm2
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $5, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $4, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm3
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $3, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $2, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm2
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: shrl %eax
+; X32-SSE2-NEXT: andl $1, %eax
+; X32-SSE2-NEXT: movd %eax, %xmm4
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: psllw $15, %xmm0
+; X32-SSE2-NEXT: psraw $15, %xmm0
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE2-NEXT: psllw $15, %xmm1
+; X32-SSE2-NEXT: psraw $15, %xmm1
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_16i1_to_16i16:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -4285,6 +4990,179 @@ define <32 x i8> @load_sext_32i1_to_32i8
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512BW-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_32i1_to_32i8:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: pushl %ebp
+; X32-SSE2-NEXT: pushl %ebx
+; X32-SSE2-NEXT: pushl %edi
+; X32-SSE2-NEXT: pushl %esi
+; X32-SSE2-NEXT: subl $28, %esp
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movswl (%eax), %edx
+; X32-SSE2-NEXT: movl %edx, %ebp
+; X32-SSE2-NEXT: movl %edx, %esi
+; X32-SSE2-NEXT: movl %edx, %edi
+; X32-SSE2-NEXT: movl %edx, %ebx
+; X32-SSE2-NEXT: movl %edx, %ecx
+; X32-SSE2-NEXT: shrl $15, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: movl %edx, %ecx
+; X32-SSE2-NEXT: shll $17, %ebp
+; X32-SSE2-NEXT: sarl $31, %ebp
+; X32-SSE2-NEXT: movd %ebp, %xmm4
+; X32-SSE2-NEXT: movl %edx, %ebp
+; X32-SSE2-NEXT: shll $18, %esi
+; X32-SSE2-NEXT: sarl $31, %esi
+; X32-SSE2-NEXT: movd %esi, %xmm1
+; X32-SSE2-NEXT: movl %edx, %esi
+; X32-SSE2-NEXT: shll $19, %edi
+; X32-SSE2-NEXT: sarl $31, %edi
+; X32-SSE2-NEXT: movd %edi, %xmm2
+; X32-SSE2-NEXT: movl %edx, %edi
+; X32-SSE2-NEXT: shll $20, %ebx
+; X32-SSE2-NEXT: sarl $31, %ebx
+; X32-SSE2-NEXT: movd %ebx, %xmm5
+; X32-SSE2-NEXT: movl %edx, %ebx
+; X32-SSE2-NEXT: shll $21, %ecx
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm6
+; X32-SSE2-NEXT: movl %edx, %ecx
+; X32-SSE2-NEXT: shll $22, %ebp
+; X32-SSE2-NEXT: sarl $31, %ebp
+; X32-SSE2-NEXT: movd %ebp, %xmm7
+; X32-SSE2-NEXT: movl %edx, %ebp
+; X32-SSE2-NEXT: shll $23, %esi
+; X32-SSE2-NEXT: sarl $31, %esi
+; X32-SSE2-NEXT: movd %esi, %xmm3
+; X32-SSE2-NEXT: movl %edx, %esi
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7]
+; X32-SSE2-NEXT: shll $28, %edi
+; X32-SSE2-NEXT: sarl $31, %edi
+; X32-SSE2-NEXT: movd %edi, %xmm0
+; X32-SSE2-NEXT: movl %edx, %edi
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; X32-SSE2-NEXT: shll $29, %ebx
+; X32-SSE2-NEXT: sarl $31, %ebx
+; X32-SSE2-NEXT: movd %ebx, %xmm1
+; X32-SSE2-NEXT: movl %edx, %ebx
+; X32-SSE2-NEXT: movsbl %dl, %edx
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm7[0],xmm3[1],xmm7[1],xmm3[2],xmm7[2],xmm3[3],xmm7[3],xmm3[4],xmm7[4],xmm3[5],xmm7[5],xmm3[6],xmm7[6],xmm3[7],xmm7[7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm6[0],xmm3[1],xmm6[1],xmm3[2],xmm6[2],xmm3[3],xmm6[3]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE2-NEXT: shll $30, %ecx
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm2
+; X32-SSE2-NEXT: shll $31, %ebp
+; X32-SSE2-NEXT: sarl $31, %ebp
+; X32-SSE2-NEXT: movd %ebp, %xmm0
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; X32-SSE2-NEXT: shll $26, %esi
+; X32-SSE2-NEXT: sarl $31, %esi
+; X32-SSE2-NEXT: movd %esi, %xmm7
+; X32-SSE2-NEXT: shll $27, %edi
+; X32-SSE2-NEXT: sarl $31, %edi
+; X32-SSE2-NEXT: movd %edi, %xmm2
+; X32-SSE2-NEXT: shll $25, %ebx
+; X32-SSE2-NEXT: sarl $31, %ebx
+; X32-SSE2-NEXT: movd %ebx, %xmm6
+; X32-SSE2-NEXT: shrl $7, %edx
+; X32-SSE2-NEXT: movd %edx, %xmm5
+; X32-SSE2-NEXT: movswl 2(%eax), %eax
+; X32-SSE2-NEXT: movl %eax, %edx
+; X32-SSE2-NEXT: movl %eax, %ebp
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: movl %eax, %esi
+; X32-SSE2-NEXT: movl %eax, %edi
+; X32-SSE2-NEXT: movl %eax, %ebx
+; X32-SSE2-NEXT: shrl $15, %ebx
+; X32-SSE2-NEXT: movd %ebx, %xmm4
+; X32-SSE2-NEXT: movdqu %xmm4, (%esp) # 16-byte Spill
+; X32-SSE2-NEXT: movl %eax, %ebx
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; X32-SSE2-NEXT: shll $17, %edx
+; X32-SSE2-NEXT: sarl $31, %edx
+; X32-SSE2-NEXT: movd %edx, %xmm4
+; X32-SSE2-NEXT: movl %eax, %edx
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm7[0],xmm2[1],xmm7[1],xmm2[2],xmm7[2],xmm2[3],xmm7[3],xmm2[4],xmm7[4],xmm2[5],xmm7[5],xmm2[6],xmm7[6],xmm2[7],xmm7[7]
+; X32-SSE2-NEXT: shll $18, %ebp
+; X32-SSE2-NEXT: sarl $31, %ebp
+; X32-SSE2-NEXT: movd %ebp, %xmm7
+; X32-SSE2-NEXT: movl %eax, %ebp
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
+; X32-SSE2-NEXT: shll $19, %ecx
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm5
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3]
+; X32-SSE2-NEXT: shll $20, %esi
+; X32-SSE2-NEXT: sarl $31, %esi
+; X32-SSE2-NEXT: movd %esi, %xmm6
+; X32-SSE2-NEXT: movl %eax, %esi
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT: shll $21, %edi
+; X32-SSE2-NEXT: sarl $31, %edi
+; X32-SSE2-NEXT: movd %edi, %xmm1
+; X32-SSE2-NEXT: movl %eax, %edi
+; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; X32-SSE2-NEXT: shll $22, %ebx
+; X32-SSE2-NEXT: sarl $31, %ebx
+; X32-SSE2-NEXT: movd %ebx, %xmm3
+; X32-SSE2-NEXT: movl %eax, %ebx
+; X32-SSE2-NEXT: movdqu (%esp), %xmm2 # 16-byte Reload
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
+; X32-SSE2-NEXT: shll $23, %edx
+; X32-SSE2-NEXT: sarl $31, %edx
+; X32-SSE2-NEXT: movd %edx, %xmm2
+; X32-SSE2-NEXT: movl %eax, %edx
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1],xmm5[2],xmm7[2],xmm5[3],xmm7[3],xmm5[4],xmm7[4],xmm5[5],xmm7[5],xmm5[6],xmm7[6],xmm5[7],xmm7[7]
+; X32-SSE2-NEXT: shll $28, %ebp
+; X32-SSE2-NEXT: sarl $31, %ebp
+; X32-SSE2-NEXT: movd %ebp, %xmm7
+; X32-SSE2-NEXT: movl %eax, %ebp
+; X32-SSE2-NEXT: movsbl %al, %eax
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
+; X32-SSE2-NEXT: shll $29, %ecx
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm3
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; X32-SSE2-NEXT: shll $30, %esi
+; X32-SSE2-NEXT: sarl $31, %esi
+; X32-SSE2-NEXT: movd %esi, %xmm4
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
+; X32-SSE2-NEXT: shll $31, %edi
+; X32-SSE2-NEXT: sarl $31, %edi
+; X32-SSE2-NEXT: movd %edi, %xmm1
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm7[0],xmm3[1],xmm7[1],xmm3[2],xmm7[2],xmm3[3],xmm7[3],xmm3[4],xmm7[4],xmm3[5],xmm7[5],xmm3[6],xmm7[6],xmm3[7],xmm7[7]
+; X32-SSE2-NEXT: shll $26, %ebx
+; X32-SSE2-NEXT: sarl $31, %ebx
+; X32-SSE2-NEXT: movd %ebx, %xmm5
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; X32-SSE2-NEXT: shll $27, %edx
+; X32-SSE2-NEXT: sarl $31, %edx
+; X32-SSE2-NEXT: movd %edx, %xmm3
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3],xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7]
+; X32-SSE2-NEXT: shll $25, %ebp
+; X32-SSE2-NEXT: sarl $31, %ebp
+; X32-SSE2-NEXT: movd %ebp, %xmm4
+; X32-SSE2-NEXT: shrl $7, %eax
+; X32-SSE2-NEXT: movd %eax, %xmm5
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; X32-SSE2-NEXT: addl $28, %esp
+; X32-SSE2-NEXT: popl %esi
+; X32-SSE2-NEXT: popl %edi
+; X32-SSE2-NEXT: popl %ebx
+; X32-SSE2-NEXT: popl %ebp
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_32i1_to_32i8:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pushl %esi
@@ -4465,6 +5343,17 @@ define <16 x i16> @load_sext_16i8_to_16i
; AVX512-NEXT: vpmovsxbw (%rdi), %ymm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_16i8_to_16i16:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X32-SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: psraw $8, %xmm0
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: psraw $8, %xmm1
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_16i8_to_16i16:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -4508,6 +5397,17 @@ define <2 x i64> @load_sext_2i16_to_2i64
; AVX-NEXT: vpmovsxwq (%rdi), %xmm0
; AVX-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_2i16_to_2i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
+; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: psrad $16, %xmm0
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_2i16_to_2i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -4544,6 +5444,14 @@ define <4 x i32> @load_sext_4i16_to_4i32
; AVX-NEXT: vpmovsxwd (%rdi), %xmm0
; AVX-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_4i16_to_4i32:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT: psrad $16, %xmm0
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_4i16_to_4i32:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -4609,6 +5517,33 @@ define <4 x i64> @load_sext_4i16_to_4i64
; AVX512-NEXT: vpmovsxwq (%rdi), %ymm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_4i16_to_4i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movswl 2(%eax), %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm1
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT: movswl (%eax), %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm2
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT: movswl 6(%eax), %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm2
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm1
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE2-NEXT: movswl 4(%eax), %eax
+; X32-SSE2-NEXT: movd %eax, %xmm1
+; X32-SSE2-NEXT: sarl $31, %eax
+; X32-SSE2-NEXT: movd %eax, %xmm3
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_4i16_to_4i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -4665,6 +5600,17 @@ define <8 x i32> @load_sext_8i16_to_8i32
; AVX512-NEXT: vpmovsxwd (%rdi), %ymm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_8i16_to_8i32:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X32-SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT: psrad $16, %xmm0
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT: psrad $16, %xmm1
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_8i16_to_8i32:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -4704,6 +5650,15 @@ define <2 x i64> @load_sext_2i32_to_2i64
; AVX-NEXT: vpmovsxdq (%rdi), %xmm0
; AVX-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_2i32_to_2i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_2i32_to_2i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -4763,6 +5718,19 @@ define <4 x i64> @load_sext_4i32_to_4i64
; AVX512-NEXT: vpmovsxdq (%rdi), %ymm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_4i32_to_4i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movdqa (%eax), %xmm0
+; X32-SSE2-NEXT: movdqa %xmm0, %xmm2
+; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT: movdqa %xmm1, %xmm2
+; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_4i32_to_4i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -4802,6 +5770,13 @@ define i32 @sext_2i8_to_i32(<16 x i8> %A
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: retq
;
+; X32-SSE2-LABEL: sext_2i8_to_i32:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: psraw $8, %xmm0
+; X32-SSE2-NEXT: movd %xmm0, %eax
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_2i8_to_i32:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm0
@@ -4875,6 +5850,19 @@ define <4 x i64> @sext_4i1_to_4i64(<4 x
; AVX512-NEXT: vpmovsxdq %xmm0, %ymm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: sext_4i1_to_4i64:
+; X32-SSE2: # %bb.0:
+; X32-SSE2-NEXT: pslld $31, %xmm0
+; X32-SSE2-NEXT: psrad $31, %xmm0
+; X32-SSE2-NEXT: movdqa %xmm0, %xmm2
+; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT: movdqa %xmm1, %xmm2
+; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_4i1_to_4i64:
; X32-SSE41: # %bb.0:
; X32-SSE41-NEXT: pslld $31, %xmm0
@@ -4949,6 +5937,23 @@ define <4 x i64> @sext_4i8_to_4i64(<4 x
; AVX512-NEXT: vpmovsxbq %xmm0, %ymm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: sext_4i8_to_4i64:
+; X32-SSE2: # %bb.0:
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT: movdqa %xmm2, %xmm1
+; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: psrad $24, %xmm2
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X32-SSE2-NEXT: movdqa %xmm1, %xmm0
+; X32-SSE2-NEXT: psrad $31, %xmm0
+; X32-SSE2-NEXT: psrad $24, %xmm1
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT: movdqa %xmm2, %xmm0
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_4i8_to_4i64:
; X32-SSE41: # %bb.0:
; X32-SSE41-NEXT: pmovsxbq %xmm0, %xmm2
@@ -5013,6 +6018,24 @@ define <32 x i8> @sext_32xi1_to_32xi8(<3
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512BW-NEXT: retq
;
+; X32-SSE2-LABEL: sext_32xi1_to_32xi8:
+; X32-SSE2: # %bb.0:
+; X32-SSE2-NEXT: pushl %ebp
+; X32-SSE2-NEXT: movl %esp, %ebp
+; X32-SSE2-NEXT: andl $-16, %esp
+; X32-SSE2-NEXT: subl $16, %esp
+; X32-SSE2-NEXT: movdqa 8(%ebp), %xmm3
+; X32-SSE2-NEXT: pcmpeqw 40(%ebp), %xmm1
+; X32-SSE2-NEXT: pcmpeqw 24(%ebp), %xmm0
+; X32-SSE2-NEXT: packsswb %xmm1, %xmm0
+; X32-SSE2-NEXT: pcmpeqw 72(%ebp), %xmm3
+; X32-SSE2-NEXT: pcmpeqw 56(%ebp), %xmm2
+; X32-SSE2-NEXT: packsswb %xmm3, %xmm2
+; X32-SSE2-NEXT: movdqa %xmm2, %xmm1
+; X32-SSE2-NEXT: movl %ebp, %esp
+; X32-SSE2-NEXT: popl %ebp
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_32xi1_to_32xi8:
; X32-SSE41: # %bb.0:
; X32-SSE41-NEXT: pushl %ebp
@@ -5072,6 +6095,17 @@ define <2 x i32> @sext_2i8_to_2i32(<2 x
; AVX-NEXT: vpaddd %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
;
+; X32-SSE2-LABEL: sext_2i8_to_2i32:
+; X32-SSE2: # %bb.0:
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movzwl (%eax), %eax
+; X32-SSE2-NEXT: movd %eax, %xmm0
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT: psrad $24, %xmm0
+; X32-SSE2-NEXT: paddd %xmm0, %xmm0
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_2i8_to_2i32:
; X32-SSE41: # %bb.0:
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
Modified: llvm/trunk/test/CodeGen/X86/vector-sext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-sext.ll?rev=347175&r1=347174&r2=347175&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-sext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-sext.ll Sun Nov 18 13:28:47 2018
@@ -7,7 +7,8 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW
;
-; Just one 32-bit run to make sure we do reasonable things there.
+; Just two 32-bit runs to make sure we do reasonable things there.
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32-SSE2
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32-SSE41
define <8 x i16> @sext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp {
@@ -33,6 +34,12 @@ define <8 x i16> @sext_16i8_to_8i16(<16
; AVX-NEXT: vpmovsxbw %xmm0, %xmm0
; AVX-NEXT: retq
;
+; X32-SSE2-LABEL: sext_16i8_to_8i16:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: psraw $8, %xmm0
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_16i8_to_8i16:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm0
@@ -88,6 +95,15 @@ define <16 x i16> @sext_16i8_to_16i16(<1
; AVX512-NEXT: vpmovsxbw %xmm0, %ymm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: sext_16i8_to_16i16:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: psraw $8, %xmm0
+; X32-SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; X32-SSE2-NEXT: psraw $8, %xmm1
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_16i8_to_16i16:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm2
@@ -178,6 +194,21 @@ define <32 x i16> @sext_32i8_to_32i16(<3
; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0
; AVX512BW-NEXT: retq
;
+; X32-SSE2-LABEL: sext_32i8_to_32i16:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movdqa %xmm1, %xmm3
+; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: psraw $8, %xmm0
+; X32-SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; X32-SSE2-NEXT: psraw $8, %xmm1
+; X32-SSE2-NEXT: movdqa %xmm3, %xmm2
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
+; X32-SSE2-NEXT: psraw $8, %xmm2
+; X32-SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; X32-SSE2-NEXT: psraw $8, %xmm3
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_32i8_to_32i16:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm5
@@ -219,6 +250,13 @@ define <4 x i32> @sext_16i8_to_4i32(<16
; AVX-NEXT: vpmovsxbd %xmm0, %xmm0
; AVX-NEXT: retq
;
+; X32-SSE2-LABEL: sext_16i8_to_4i32:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT: psrad $24, %xmm0
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_16i8_to_4i32:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxbd %xmm0, %xmm0
@@ -276,6 +314,16 @@ define <8 x i32> @sext_16i8_to_8i32(<16
; AVX512-NEXT: vpmovsxbd %xmm0, %ymm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: sext_16i8_to_8i32:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT: psrad $24, %xmm2
+; X32-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE2-NEXT: psrad $24, %xmm1
+; X32-SSE2-NEXT: movdqa %xmm2, %xmm0
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_16i8_to_8i32:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxbd %xmm0, %xmm2
@@ -361,6 +409,22 @@ define <16 x i32> @sext_16i8_to_16i32(<1
; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: sext_16i8_to_16i32:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3]
+; X32-SSE2-NEXT: psrad $24, %xmm4
+; X32-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: psrad $24, %xmm1
+; X32-SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT: psrad $24, %xmm2
+; X32-SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; X32-SSE2-NEXT: psrad $24, %xmm3
+; X32-SSE2-NEXT: movdqa %xmm4, %xmm0
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_16i8_to_16i32:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxbd %xmm0, %xmm4
@@ -408,6 +472,16 @@ define <2 x i64> @sext_16i8_to_2i64(<16
; AVX-NEXT: vpmovsxbq %xmm0, %xmm0
; AVX-NEXT: retq
;
+; X32-SSE2-LABEL: sext_16i8_to_2i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: psrad $24, %xmm0
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_16i8_to_2i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxbq %xmm0, %xmm0
@@ -479,6 +553,23 @@ define <4 x i64> @sext_16i8_to_4i64(<16
; AVX512-NEXT: vpmovsxbq %xmm0, %ymm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: sext_16i8_to_4i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT: movdqa %xmm2, %xmm1
+; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: psrad $24, %xmm2
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X32-SSE2-NEXT: movdqa %xmm1, %xmm0
+; X32-SSE2-NEXT: psrad $31, %xmm0
+; X32-SSE2-NEXT: psrad $24, %xmm1
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT: movdqa %xmm2, %xmm0
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_16i8_to_4i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxbq %xmm0, %xmm2
@@ -593,6 +684,36 @@ define <8 x i64> @sext_16i8_to_8i64(<16
; AVX512-NEXT: vpmovsxbq %xmm0, %zmm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: sext_16i8_to_8i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
+; X32-SSE2-NEXT: movdqa %xmm4, %xmm1
+; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: psrad $24, %xmm4
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X32-SSE2-NEXT: movdqa %xmm1, %xmm0
+; X32-SSE2-NEXT: psrad $31, %xmm0
+; X32-SSE2-NEXT: psrad $24, %xmm1
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
+; X32-SSE2-NEXT: movdqa %xmm2, %xmm0
+; X32-SSE2-NEXT: psrad $31, %xmm0
+; X32-SSE2-NEXT: psrad $24, %xmm2
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
+; X32-SSE2-NEXT: movdqa %xmm3, %xmm0
+; X32-SSE2-NEXT: psrad $31, %xmm0
+; X32-SSE2-NEXT: psrad $24, %xmm3
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
+; X32-SSE2-NEXT: movdqa %xmm4, %xmm0
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_16i8_to_8i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxbq %xmm0, %xmm4
@@ -634,6 +755,12 @@ define <4 x i32> @sext_8i16_to_4i32(<8 x
; AVX-NEXT: vpmovsxwd %xmm0, %xmm0
; AVX-NEXT: retq
;
+; X32-SSE2-LABEL: sext_8i16_to_4i32:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT: psrad $16, %xmm0
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_8i16_to_4i32:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxwd %xmm0, %xmm0
@@ -689,6 +816,15 @@ define <8 x i32> @sext_8i16_to_8i32(<8 x
; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: sext_8i16_to_8i32:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT: psrad $16, %xmm2
+; X32-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE2-NEXT: psrad $16, %xmm1
+; X32-SSE2-NEXT: movdqa %xmm2, %xmm0
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_8i16_to_8i32:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxwd %xmm0, %xmm2
@@ -769,6 +905,20 @@ define <16 x i32> @sext_16i16_to_16i32(<
; AVX512-NEXT: vpmovsxwd %ymm0, %zmm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: sext_16i16_to_16i32:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
+; X32-SSE2-NEXT: psrad $16, %xmm4
+; X32-SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
+; X32-SSE2-NEXT: psrad $16, %xmm5
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; X32-SSE2-NEXT: psrad $16, %xmm2
+; X32-SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
+; X32-SSE2-NEXT: psrad $16, %xmm3
+; X32-SSE2-NEXT: movdqa %xmm4, %xmm0
+; X32-SSE2-NEXT: movdqa %xmm5, %xmm1
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_16i16_to_16i32:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxwd %xmm0, %xmm5
@@ -814,6 +964,15 @@ define <2 x i64> @sext_8i16_to_2i64(<8 x
; AVX-NEXT: vpmovsxwq %xmm0, %xmm0
; AVX-NEXT: retq
;
+; X32-SSE2-LABEL: sext_8i16_to_2i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: psrad $16, %xmm0
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_8i16_to_2i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxwq %xmm0, %xmm0
@@ -881,6 +1040,21 @@ define <4 x i64> @sext_8i16_to_4i64(<8 x
; AVX512-NEXT: vpmovsxwq %xmm0, %ymm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: sext_8i16_to_4i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT: movdqa %xmm2, %xmm1
+; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: psrad $16, %xmm2
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
+; X32-SSE2-NEXT: movdqa %xmm1, %xmm0
+; X32-SSE2-NEXT: psrad $31, %xmm0
+; X32-SSE2-NEXT: psrad $16, %xmm1
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT: movdqa %xmm2, %xmm0
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_8i16_to_4i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxwq %xmm0, %xmm2
@@ -986,6 +1160,32 @@ define <8 x i64> @sext_8i16_to_8i64(<8 x
; AVX512-NEXT: vpmovsxwq %xmm0, %zmm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: sext_8i16_to_8i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
+; X32-SSE2-NEXT: movdqa %xmm4, %xmm1
+; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: psrad $16, %xmm4
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
+; X32-SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; X32-SSE2-NEXT: movdqa %xmm2, %xmm1
+; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: psrad $16, %xmm2
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
+; X32-SSE2-NEXT: movdqa %xmm1, %xmm3
+; X32-SSE2-NEXT: psrad $31, %xmm3
+; X32-SSE2-NEXT: psrad $16, %xmm1
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm0[0,2,2,3,4,5,6,7]
+; X32-SSE2-NEXT: movdqa %xmm3, %xmm0
+; X32-SSE2-NEXT: psrad $31, %xmm0
+; X32-SSE2-NEXT: psrad $16, %xmm3
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
+; X32-SSE2-NEXT: movdqa %xmm4, %xmm0
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_8i16_to_8i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxwq %xmm0, %xmm4
@@ -1027,6 +1227,13 @@ define <2 x i64> @sext_4i32_to_2i64(<4 x
; AVX-NEXT: vpmovsxdq %xmm0, %xmm0
; AVX-NEXT: retq
;
+; X32-SSE2-LABEL: sext_4i32_to_2i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_4i32_to_2i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm0
@@ -1086,6 +1293,17 @@ define <4 x i64> @sext_4i32_to_4i64(<4 x
; AVX512-NEXT: vpmovsxdq %xmm0, %ymm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: sext_4i32_to_4i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movdqa %xmm0, %xmm2
+; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT: movdqa %xmm1, %xmm2
+; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_4i32_to_4i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm2
@@ -1176,6 +1394,25 @@ define <8 x i64> @sext_8i32_to_8i64(<8 x
; AVX512-NEXT: vpmovsxdq %ymm0, %zmm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: sext_8i32_to_8i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movdqa %xmm1, %xmm2
+; X32-SSE2-NEXT: movdqa %xmm0, %xmm3
+; X32-SSE2-NEXT: psrad $31, %xmm3
+; X32-SSE2-NEXT: movdqa %xmm1, %xmm4
+; X32-SSE2-NEXT: psrad $31, %xmm4
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X32-SSE2-NEXT: movdqa %xmm1, %xmm3
+; X32-SSE2-NEXT: psrad $31, %xmm3
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,0,1]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
+; X32-SSE2-NEXT: movdqa %xmm3, %xmm4
+; X32-SSE2-NEXT: psrad $31, %xmm4
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_8i32_to_8i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm5
@@ -1250,6 +1487,22 @@ define <2 x i64> @load_sext_2i1_to_2i64(
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_2i1_to_2i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movzbl (%eax), %eax
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shll $30, %ecx
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
+; X32-SSE2-NEXT: shll $31, %eax
+; X32-SSE2-NEXT: sarl $31, %eax
+; X32-SSE2-NEXT: movd %eax, %xmm0
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_2i1_to_2i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -1305,6 +1558,19 @@ define <2 x i64> @load_sext_2i8_to_2i64(
; AVX-NEXT: vpmovsxbq (%rdi), %xmm0
; AVX-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_2i8_to_2i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movzwl (%eax), %eax
+; X32-SSE2-NEXT: movd %eax, %xmm0
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: psrad $24, %xmm0
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_2i8_to_2i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -1441,6 +1707,30 @@ define <4 x i32> @load_sext_4i1_to_4i32(
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_4i1_to_4i32:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movl (%eax), %eax
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shll $28, %ecx
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shll $29, %ecx
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm1
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shll $30, %ecx
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm2
+; X32-SSE2-NEXT: shll $31, %eax
+; X32-SSE2-NEXT: sarl $31, %eax
+; X32-SSE2-NEXT: movd %eax, %xmm0
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_4i1_to_4i32:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -1494,6 +1784,15 @@ define <4 x i32> @load_sext_4i8_to_4i32(
; AVX-NEXT: vpmovsxbd (%rdi), %xmm0
; AVX-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_4i8_to_4i32:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT: psrad $24, %xmm0
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_4i8_to_4i32:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -1644,6 +1943,33 @@ define <4 x i64> @load_sext_4i1_to_4i64(
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512BW-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_4i1_to_4i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movzbl (%eax), %eax
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $3, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $2, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm1
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT: movd %eax, %xmm2
+; X32-SSE2-NEXT: shrl %eax
+; X32-SSE2-NEXT: movd %eax, %xmm0
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; X32-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm2
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,1,3]
+; X32-SSE2-NEXT: psllq $63, %xmm0
+; X32-SSE2-NEXT: psrad $31, %xmm0
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,1,3,3]
+; X32-SSE2-NEXT: psllq $63, %xmm1
+; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_4i1_to_4i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -1727,6 +2053,33 @@ define <4 x i64> @load_sext_4i8_to_4i64(
; AVX512-NEXT: vpmovsxbq (%rdi), %ymm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_4i8_to_4i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movsbl 1(%eax), %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm1
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT: movsbl (%eax), %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm2
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT: movsbl 3(%eax), %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm2
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm1
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE2-NEXT: movsbl 2(%eax), %eax
+; X32-SSE2-NEXT: movd %eax, %xmm1
+; X32-SSE2-NEXT: sarl $31, %eax
+; X32-SSE2-NEXT: movd %eax, %xmm3
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_4i8_to_4i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -1784,6 +2137,22 @@ define <2 x i64> @load_sext_4i8_to_4i64_
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_4i8_to_4i64_extract:
+; X32-SSE2: # %bb.0:
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movsbl 3(%eax), %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm1
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT: movsbl 2(%eax), %eax
+; X32-SSE2-NEXT: movd %eax, %xmm0
+; X32-SSE2-NEXT: sarl $31, %eax
+; X32-SSE2-NEXT: movd %eax, %xmm2
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_4i8_to_4i64_extract:
; X32-SSE41: # %bb.0:
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -2004,6 +2373,49 @@ define <8 x i16> @load_sext_8i1_to_8i16(
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_8i1_to_8i16:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movsbl (%eax), %eax
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $7, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shll $25, %ecx
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm1
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shll $26, %ecx
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shll $27, %ecx
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm2
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shll $28, %ecx
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shll $29, %ecx
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm1
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shll $30, %ecx
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm3
+; X32-SSE2-NEXT: shll $31, %eax
+; X32-SSE2-NEXT: sarl $31, %eax
+; X32-SSE2-NEXT: movd %eax, %xmm0
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_8i1_to_8i16:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -2070,6 +2482,14 @@ define <8 x i16> @load_sext_8i8_to_8i16(
; AVX-NEXT: vpmovsxbw (%rdi), %xmm0
; AVX-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_8i8_to_8i16:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: psraw $8, %xmm0
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_8i8_to_8i16:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -2163,6 +2583,55 @@ define <8 x i64> @load_sext_8i8_to_8i64(
; AVX512-NEXT: vpmovsxbq (%rdi), %zmm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_8i8_to_8i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movsbl 1(%eax), %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm1
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT: movsbl (%eax), %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm2
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT: movsbl 3(%eax), %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm2
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm1
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE2-NEXT: movsbl 2(%eax), %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm1
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm3
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; X32-SSE2-NEXT: movsbl 5(%eax), %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm3
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm2
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; X32-SSE2-NEXT: movsbl 4(%eax), %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm2
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm4
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
+; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; X32-SSE2-NEXT: movsbl 7(%eax), %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm4
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm3
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
+; X32-SSE2-NEXT: movsbl 6(%eax), %eax
+; X32-SSE2-NEXT: movd %eax, %xmm3
+; X32-SSE2-NEXT: sarl $31, %eax
+; X32-SSE2-NEXT: movd %eax, %xmm5
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
+; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_8i8_to_8i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -2398,6 +2867,53 @@ define <8 x i32> @load_sext_8i1_to_8i32(
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512BW-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_8i1_to_8i32:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movzbl (%eax), %eax
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $7, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $6, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm2
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $5, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $4, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm1
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $3, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $2, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm2
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: shrl %eax
+; X32-SSE2-NEXT: andl $1, %eax
+; X32-SSE2-NEXT: movd %eax, %xmm3
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT: pslld $31, %xmm0
+; X32-SSE2-NEXT: psrad $31, %xmm0
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X32-SSE2-NEXT: pslld $31, %xmm1
+; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_8i1_to_8i32:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -2494,6 +3010,19 @@ define <8 x i32> @load_sext_8i8_to_8i32(
; AVX512-NEXT: vpmovsxbd (%rdi), %ymm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_8i8_to_8i32:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT: psrad $24, %xmm0
+; X32-SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT: psrad $24, %xmm1
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_8i8_to_8i32:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -2909,6 +3438,97 @@ define <16 x i8> @load_sext_16i1_to_16i8
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_16i1_to_16i8:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: pushl %ebp
+; X32-SSE2-NEXT: pushl %ebx
+; X32-SSE2-NEXT: pushl %edi
+; X32-SSE2-NEXT: pushl %esi
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movswl (%eax), %eax
+; X32-SSE2-NEXT: movl %eax, %edx
+; X32-SSE2-NEXT: movl %eax, %ebp
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: movl %eax, %esi
+; X32-SSE2-NEXT: movl %eax, %edi
+; X32-SSE2-NEXT: movl %eax, %ebx
+; X32-SSE2-NEXT: shrl $15, %ebx
+; X32-SSE2-NEXT: movd %ebx, %xmm2
+; X32-SSE2-NEXT: movl %eax, %ebx
+; X32-SSE2-NEXT: shll $17, %edx
+; X32-SSE2-NEXT: sarl $31, %edx
+; X32-SSE2-NEXT: movd %edx, %xmm0
+; X32-SSE2-NEXT: movl %eax, %edx
+; X32-SSE2-NEXT: shll $18, %ebp
+; X32-SSE2-NEXT: sarl $31, %ebp
+; X32-SSE2-NEXT: movd %ebp, %xmm1
+; X32-SSE2-NEXT: movl %eax, %ebp
+; X32-SSE2-NEXT: shll $19, %ecx
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm3
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shll $20, %esi
+; X32-SSE2-NEXT: sarl $31, %esi
+; X32-SSE2-NEXT: movd %esi, %xmm4
+; X32-SSE2-NEXT: movl %eax, %esi
+; X32-SSE2-NEXT: shll $21, %edi
+; X32-SSE2-NEXT: sarl $31, %edi
+; X32-SSE2-NEXT: movd %edi, %xmm6
+; X32-SSE2-NEXT: movl %eax, %edi
+; X32-SSE2-NEXT: shll $22, %ebx
+; X32-SSE2-NEXT: sarl $31, %ebx
+; X32-SSE2-NEXT: movd %ebx, %xmm7
+; X32-SSE2-NEXT: movl %eax, %ebx
+; X32-SSE2-NEXT: shll $23, %edx
+; X32-SSE2-NEXT: sarl $31, %edx
+; X32-SSE2-NEXT: movd %edx, %xmm5
+; X32-SSE2-NEXT: movl %eax, %edx
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; X32-SSE2-NEXT: shll $28, %ebp
+; X32-SSE2-NEXT: sarl $31, %ebp
+; X32-SSE2-NEXT: movd %ebp, %xmm2
+; X32-SSE2-NEXT: movl %eax, %ebp
+; X32-SSE2-NEXT: movsbl %al, %eax
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3],xmm6[4],xmm4[4],xmm6[5],xmm4[5],xmm6[6],xmm4[6],xmm6[7],xmm4[7]
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1],xmm5[2],xmm7[2],xmm5[3],xmm7[3],xmm5[4],xmm7[4],xmm5[5],xmm7[5],xmm5[6],xmm7[6],xmm5[7],xmm7[7]
+; X32-SSE2-NEXT: shll $29, %ecx
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm1
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
+; X32-SSE2-NEXT: shll $30, %esi
+; X32-SSE2-NEXT: sarl $31, %esi
+; X32-SSE2-NEXT: movd %esi, %xmm4
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
+; X32-SSE2-NEXT: shll $31, %edi
+; X32-SSE2-NEXT: sarl $31, %edi
+; X32-SSE2-NEXT: movd %edi, %xmm0
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; X32-SSE2-NEXT: shll $26, %ebx
+; X32-SSE2-NEXT: sarl $31, %ebx
+; X32-SSE2-NEXT: movd %ebx, %xmm2
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; X32-SSE2-NEXT: shll $27, %edx
+; X32-SSE2-NEXT: sarl $31, %edx
+; X32-SSE2-NEXT: movd %edx, %xmm3
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; X32-SSE2-NEXT: shll $25, %ebp
+; X32-SSE2-NEXT: sarl $31, %ebp
+; X32-SSE2-NEXT: movd %ebp, %xmm1
+; X32-SSE2-NEXT: shrl $7, %eax
+; X32-SSE2-NEXT: movd %eax, %xmm2
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0]
+; X32-SSE2-NEXT: popl %esi
+; X32-SSE2-NEXT: popl %edi
+; X32-SSE2-NEXT: popl %ebx
+; X32-SSE2-NEXT: popl %ebp
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_16i1_to_16i8:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -3434,6 +4054,93 @@ define <16 x i16> @load_sext_16i1_to_16i
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512BW-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_16i1_to_16i16:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movzwl (%eax), %eax
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $15, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $14, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm1
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $13, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $12, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm2
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $11, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $10, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm3
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $9, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $8, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm1
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $7, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $6, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm2
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $5, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $4, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm3
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $3, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: shrl $2, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm2
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: andl $1, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: shrl %eax
+; X32-SSE2-NEXT: andl $1, %eax
+; X32-SSE2-NEXT: movd %eax, %xmm4
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: psllw $15, %xmm0
+; X32-SSE2-NEXT: psraw $15, %xmm0
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE2-NEXT: psllw $15, %xmm1
+; X32-SSE2-NEXT: psraw $15, %xmm1
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_16i1_to_16i16:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -4289,6 +4996,179 @@ define <32 x i8> @load_sext_32i1_to_32i8
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512BW-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_32i1_to_32i8:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: pushl %ebp
+; X32-SSE2-NEXT: pushl %ebx
+; X32-SSE2-NEXT: pushl %edi
+; X32-SSE2-NEXT: pushl %esi
+; X32-SSE2-NEXT: subl $28, %esp
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movswl (%eax), %edx
+; X32-SSE2-NEXT: movl %edx, %ebp
+; X32-SSE2-NEXT: movl %edx, %esi
+; X32-SSE2-NEXT: movl %edx, %edi
+; X32-SSE2-NEXT: movl %edx, %ebx
+; X32-SSE2-NEXT: movl %edx, %ecx
+; X32-SSE2-NEXT: shrl $15, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: movl %edx, %ecx
+; X32-SSE2-NEXT: shll $17, %ebp
+; X32-SSE2-NEXT: sarl $31, %ebp
+; X32-SSE2-NEXT: movd %ebp, %xmm4
+; X32-SSE2-NEXT: movl %edx, %ebp
+; X32-SSE2-NEXT: shll $18, %esi
+; X32-SSE2-NEXT: sarl $31, %esi
+; X32-SSE2-NEXT: movd %esi, %xmm1
+; X32-SSE2-NEXT: movl %edx, %esi
+; X32-SSE2-NEXT: shll $19, %edi
+; X32-SSE2-NEXT: sarl $31, %edi
+; X32-SSE2-NEXT: movd %edi, %xmm2
+; X32-SSE2-NEXT: movl %edx, %edi
+; X32-SSE2-NEXT: shll $20, %ebx
+; X32-SSE2-NEXT: sarl $31, %ebx
+; X32-SSE2-NEXT: movd %ebx, %xmm5
+; X32-SSE2-NEXT: movl %edx, %ebx
+; X32-SSE2-NEXT: shll $21, %ecx
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm6
+; X32-SSE2-NEXT: movl %edx, %ecx
+; X32-SSE2-NEXT: shll $22, %ebp
+; X32-SSE2-NEXT: sarl $31, %ebp
+; X32-SSE2-NEXT: movd %ebp, %xmm7
+; X32-SSE2-NEXT: movl %edx, %ebp
+; X32-SSE2-NEXT: shll $23, %esi
+; X32-SSE2-NEXT: sarl $31, %esi
+; X32-SSE2-NEXT: movd %esi, %xmm3
+; X32-SSE2-NEXT: movl %edx, %esi
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7]
+; X32-SSE2-NEXT: shll $28, %edi
+; X32-SSE2-NEXT: sarl $31, %edi
+; X32-SSE2-NEXT: movd %edi, %xmm0
+; X32-SSE2-NEXT: movl %edx, %edi
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; X32-SSE2-NEXT: shll $29, %ebx
+; X32-SSE2-NEXT: sarl $31, %ebx
+; X32-SSE2-NEXT: movd %ebx, %xmm1
+; X32-SSE2-NEXT: movl %edx, %ebx
+; X32-SSE2-NEXT: movsbl %dl, %edx
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm7[0],xmm3[1],xmm7[1],xmm3[2],xmm7[2],xmm3[3],xmm7[3],xmm3[4],xmm7[4],xmm3[5],xmm7[5],xmm3[6],xmm7[6],xmm3[7],xmm7[7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm6[0],xmm3[1],xmm6[1],xmm3[2],xmm6[2],xmm3[3],xmm6[3]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE2-NEXT: shll $30, %ecx
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm2
+; X32-SSE2-NEXT: shll $31, %ebp
+; X32-SSE2-NEXT: sarl $31, %ebp
+; X32-SSE2-NEXT: movd %ebp, %xmm0
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; X32-SSE2-NEXT: shll $26, %esi
+; X32-SSE2-NEXT: sarl $31, %esi
+; X32-SSE2-NEXT: movd %esi, %xmm7
+; X32-SSE2-NEXT: shll $27, %edi
+; X32-SSE2-NEXT: sarl $31, %edi
+; X32-SSE2-NEXT: movd %edi, %xmm2
+; X32-SSE2-NEXT: shll $25, %ebx
+; X32-SSE2-NEXT: sarl $31, %ebx
+; X32-SSE2-NEXT: movd %ebx, %xmm6
+; X32-SSE2-NEXT: shrl $7, %edx
+; X32-SSE2-NEXT: movd %edx, %xmm5
+; X32-SSE2-NEXT: movswl 2(%eax), %eax
+; X32-SSE2-NEXT: movl %eax, %edx
+; X32-SSE2-NEXT: movl %eax, %ebp
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: movl %eax, %esi
+; X32-SSE2-NEXT: movl %eax, %edi
+; X32-SSE2-NEXT: movl %eax, %ebx
+; X32-SSE2-NEXT: shrl $15, %ebx
+; X32-SSE2-NEXT: movd %ebx, %xmm4
+; X32-SSE2-NEXT: movdqu %xmm4, (%esp) # 16-byte Spill
+; X32-SSE2-NEXT: movl %eax, %ebx
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; X32-SSE2-NEXT: shll $17, %edx
+; X32-SSE2-NEXT: sarl $31, %edx
+; X32-SSE2-NEXT: movd %edx, %xmm4
+; X32-SSE2-NEXT: movl %eax, %edx
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm7[0],xmm2[1],xmm7[1],xmm2[2],xmm7[2],xmm2[3],xmm7[3],xmm2[4],xmm7[4],xmm2[5],xmm7[5],xmm2[6],xmm7[6],xmm2[7],xmm7[7]
+; X32-SSE2-NEXT: shll $18, %ebp
+; X32-SSE2-NEXT: sarl $31, %ebp
+; X32-SSE2-NEXT: movd %ebp, %xmm7
+; X32-SSE2-NEXT: movl %eax, %ebp
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
+; X32-SSE2-NEXT: shll $19, %ecx
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm5
+; X32-SSE2-NEXT: movl %eax, %ecx
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3]
+; X32-SSE2-NEXT: shll $20, %esi
+; X32-SSE2-NEXT: sarl $31, %esi
+; X32-SSE2-NEXT: movd %esi, %xmm6
+; X32-SSE2-NEXT: movl %eax, %esi
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT: shll $21, %edi
+; X32-SSE2-NEXT: sarl $31, %edi
+; X32-SSE2-NEXT: movd %edi, %xmm1
+; X32-SSE2-NEXT: movl %eax, %edi
+; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; X32-SSE2-NEXT: shll $22, %ebx
+; X32-SSE2-NEXT: sarl $31, %ebx
+; X32-SSE2-NEXT: movd %ebx, %xmm3
+; X32-SSE2-NEXT: movl %eax, %ebx
+; X32-SSE2-NEXT: movdqu (%esp), %xmm2 # 16-byte Reload
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
+; X32-SSE2-NEXT: shll $23, %edx
+; X32-SSE2-NEXT: sarl $31, %edx
+; X32-SSE2-NEXT: movd %edx, %xmm2
+; X32-SSE2-NEXT: movl %eax, %edx
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1],xmm5[2],xmm7[2],xmm5[3],xmm7[3],xmm5[4],xmm7[4],xmm5[5],xmm7[5],xmm5[6],xmm7[6],xmm5[7],xmm7[7]
+; X32-SSE2-NEXT: shll $28, %ebp
+; X32-SSE2-NEXT: sarl $31, %ebp
+; X32-SSE2-NEXT: movd %ebp, %xmm7
+; X32-SSE2-NEXT: movl %eax, %ebp
+; X32-SSE2-NEXT: movsbl %al, %eax
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
+; X32-SSE2-NEXT: shll $29, %ecx
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm3
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; X32-SSE2-NEXT: shll $30, %esi
+; X32-SSE2-NEXT: sarl $31, %esi
+; X32-SSE2-NEXT: movd %esi, %xmm4
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
+; X32-SSE2-NEXT: shll $31, %edi
+; X32-SSE2-NEXT: sarl $31, %edi
+; X32-SSE2-NEXT: movd %edi, %xmm1
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm7[0],xmm3[1],xmm7[1],xmm3[2],xmm7[2],xmm3[3],xmm7[3],xmm3[4],xmm7[4],xmm3[5],xmm7[5],xmm3[6],xmm7[6],xmm3[7],xmm7[7]
+; X32-SSE2-NEXT: shll $26, %ebx
+; X32-SSE2-NEXT: sarl $31, %ebx
+; X32-SSE2-NEXT: movd %ebx, %xmm5
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; X32-SSE2-NEXT: shll $27, %edx
+; X32-SSE2-NEXT: sarl $31, %edx
+; X32-SSE2-NEXT: movd %edx, %xmm3
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3],xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7]
+; X32-SSE2-NEXT: shll $25, %ebp
+; X32-SSE2-NEXT: sarl $31, %ebp
+; X32-SSE2-NEXT: movd %ebp, %xmm4
+; X32-SSE2-NEXT: shrl $7, %eax
+; X32-SSE2-NEXT: movd %eax, %xmm5
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; X32-SSE2-NEXT: addl $28, %esp
+; X32-SSE2-NEXT: popl %esi
+; X32-SSE2-NEXT: popl %edi
+; X32-SSE2-NEXT: popl %ebx
+; X32-SSE2-NEXT: popl %ebp
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_32i1_to_32i8:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pushl %esi
@@ -4469,6 +5349,17 @@ define <16 x i16> @load_sext_16i8_to_16i
; AVX512-NEXT: vpmovsxbw (%rdi), %ymm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_16i8_to_16i16:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X32-SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: psraw $8, %xmm0
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: psraw $8, %xmm1
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_16i8_to_16i16:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -4512,6 +5403,17 @@ define <2 x i64> @load_sext_2i16_to_2i64
; AVX-NEXT: vpmovsxwq (%rdi), %xmm0
; AVX-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_2i16_to_2i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
+; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: psrad $16, %xmm0
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_2i16_to_2i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -4548,6 +5450,14 @@ define <4 x i32> @load_sext_4i16_to_4i32
; AVX-NEXT: vpmovsxwd (%rdi), %xmm0
; AVX-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_4i16_to_4i32:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT: psrad $16, %xmm0
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_4i16_to_4i32:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -4613,6 +5523,33 @@ define <4 x i64> @load_sext_4i16_to_4i64
; AVX512-NEXT: vpmovsxwq (%rdi), %ymm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_4i16_to_4i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movswl 2(%eax), %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm1
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT: movswl (%eax), %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm0
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm2
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X32-SSE2-NEXT: movswl 6(%eax), %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm2
+; X32-SSE2-NEXT: sarl $31, %ecx
+; X32-SSE2-NEXT: movd %ecx, %xmm1
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE2-NEXT: movswl 4(%eax), %eax
+; X32-SSE2-NEXT: movd %eax, %xmm1
+; X32-SSE2-NEXT: sarl $31, %eax
+; X32-SSE2-NEXT: movd %eax, %xmm3
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; X32-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_4i16_to_4i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -4669,6 +5606,17 @@ define <8 x i32> @load_sext_8i16_to_8i32
; AVX512-NEXT: vpmovsxwd (%rdi), %ymm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_8i16_to_8i32:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X32-SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT: psrad $16, %xmm0
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT: psrad $16, %xmm1
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_8i16_to_8i32:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -4708,6 +5656,15 @@ define <2 x i64> @load_sext_2i32_to_2i64
; AVX-NEXT: vpmovsxdq (%rdi), %xmm0
; AVX-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_2i32_to_2i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_2i32_to_2i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -4767,6 +5724,19 @@ define <4 x i64> @load_sext_4i32_to_4i64
; AVX512-NEXT: vpmovsxdq (%rdi), %ymm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: load_sext_4i32_to_4i64:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movdqa (%eax), %xmm0
+; X32-SSE2-NEXT: movdqa %xmm0, %xmm2
+; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT: movdqa %xmm1, %xmm2
+; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: load_sext_4i32_to_4i64:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -4806,6 +5776,17 @@ define i32 @sext_2i8_to_i32(<16 x i8> %A
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: retq
;
+; X32-SSE2-LABEL: sext_2i8_to_i32:
+; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: pushl %eax
+; X32-SSE2-NEXT: .cfi_def_cfa_offset 8
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: psraw $8, %xmm0
+; X32-SSE2-NEXT: movd %xmm0, %eax
+; X32-SSE2-NEXT: popl %ecx
+; X32-SSE2-NEXT: .cfi_def_cfa_offset 4
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_2i8_to_i32:
; X32-SSE41: # %bb.0: # %entry
; X32-SSE41-NEXT: pushl %eax
@@ -4883,6 +5864,19 @@ define <4 x i64> @sext_4i1_to_4i64(<4 x
; AVX512-NEXT: vpmovsxdq %xmm0, %ymm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: sext_4i1_to_4i64:
+; X32-SSE2: # %bb.0:
+; X32-SSE2-NEXT: pslld $31, %xmm0
+; X32-SSE2-NEXT: psrad $31, %xmm0
+; X32-SSE2-NEXT: movdqa %xmm0, %xmm2
+; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT: movdqa %xmm1, %xmm2
+; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_4i1_to_4i64:
; X32-SSE41: # %bb.0:
; X32-SSE41-NEXT: pslld $31, %xmm0
@@ -4957,6 +5951,19 @@ define <4 x i64> @sext_4i8_to_4i64(<4 x
; AVX512-NEXT: vpmovsxdq %xmm0, %ymm0
; AVX512-NEXT: retq
;
+; X32-SSE2-LABEL: sext_4i8_to_4i64:
+; X32-SSE2: # %bb.0:
+; X32-SSE2-NEXT: pslld $24, %xmm0
+; X32-SSE2-NEXT: psrad $24, %xmm0
+; X32-SSE2-NEXT: movdqa %xmm0, %xmm2
+; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT: movdqa %xmm1, %xmm2
+; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_4i8_to_4i64:
; X32-SSE41: # %bb.0:
; X32-SSE41-NEXT: pslld $24, %xmm0
@@ -5023,6 +6030,24 @@ define <32 x i8> @sext_32xi1_to_32xi8(<3
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
; AVX512BW-NEXT: retq
;
+; X32-SSE2-LABEL: sext_32xi1_to_32xi8:
+; X32-SSE2: # %bb.0:
+; X32-SSE2-NEXT: pushl %ebp
+; X32-SSE2-NEXT: movl %esp, %ebp
+; X32-SSE2-NEXT: andl $-16, %esp
+; X32-SSE2-NEXT: subl $16, %esp
+; X32-SSE2-NEXT: movdqa 8(%ebp), %xmm3
+; X32-SSE2-NEXT: pcmpeqw 40(%ebp), %xmm1
+; X32-SSE2-NEXT: pcmpeqw 24(%ebp), %xmm0
+; X32-SSE2-NEXT: packsswb %xmm1, %xmm0
+; X32-SSE2-NEXT: pcmpeqw 72(%ebp), %xmm3
+; X32-SSE2-NEXT: pcmpeqw 56(%ebp), %xmm2
+; X32-SSE2-NEXT: packsswb %xmm3, %xmm2
+; X32-SSE2-NEXT: movdqa %xmm2, %xmm1
+; X32-SSE2-NEXT: movl %ebp, %esp
+; X32-SSE2-NEXT: popl %ebp
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_32xi1_to_32xi8:
; X32-SSE41: # %bb.0:
; X32-SSE41-NEXT: pushl %ebp
@@ -5080,6 +6105,18 @@ define <2 x i32> @sext_2i8_to_2i32(<2 x
; AVX-NEXT: vpaddq %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
;
+; X32-SSE2-LABEL: sext_2i8_to_2i32:
+; X32-SSE2: # %bb.0:
+; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT: movzwl (%eax), %eax
+; X32-SSE2-NEXT: movd %eax, %xmm0
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT: psrad $24, %xmm0
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
+; X32-SSE2-NEXT: paddq %xmm0, %xmm0
+; X32-SSE2-NEXT: retl
+;
; X32-SSE41-LABEL: sext_2i8_to_2i32:
; X32-SSE41: # %bb.0:
; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
More information about the llvm-commits
mailing list