[llvm] 45878ed - [X86] Regenerate vector-shift-*.ll tests

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 8 07:37:13 PST 2020


Author: Simon Pilgrim
Date: 2020-12-08T15:36:50Z
New Revision: 45878ede7e05b5e255eca3dee1d0b23830fc55b3

URL: https://github.com/llvm/llvm-project/commit/45878ede7e05b5e255eca3dee1d0b23830fc55b3
DIFF: https://github.com/llvm/llvm-project/commit/45878ede7e05b5e255eca3dee1d0b23830fc55b3.diff

LOG: [X86] Regenerate vector-shift-*.ll tests

Replace X32 check prefixes with X86 - X32 is generally used for gnux triple tests

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
    llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
    llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll
    llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
    llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
    llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
    llvm/test/CodeGen/X86/vector-shift-shl-128.ll
    llvm/test/CodeGen/X86/vector-shift-shl-256.ll
    llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
index 9d5b70d8048b..46d2d97ca86e 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll
@@ -11,7 +11,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512VL,AVX512BWVL
 ;
 ; Just one 32-bit run to make sure we do reasonable things for i64 shifts.
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32-SSE
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE
 
 ;
 ; Variable Shifts
@@ -94,21 +94,21 @@ define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; AVX512VL-NEXT:    vpsravq %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: var_shift_v2i64:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
-; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
-; X32-SSE-NEXT:    psrlq %xmm1, %xmm3
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[2,3,2,3]
-; X32-SSE-NEXT:    psrlq %xmm4, %xmm2
-; X32-SSE-NEXT:    movsd {{.*#+}} xmm2 = xmm3[0],xmm2[1]
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm3
-; X32-SSE-NEXT:    psrlq %xmm1, %xmm3
-; X32-SSE-NEXT:    psrlq %xmm4, %xmm0
-; X32-SSE-NEXT:    movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
-; X32-SSE-NEXT:    xorpd %xmm2, %xmm0
-; X32-SSE-NEXT:    psubq %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: var_shift_v2i64:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
+; X86-SSE-NEXT:    movdqa %xmm2, %xmm3
+; X86-SSE-NEXT:    psrlq %xmm1, %xmm3
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[2,3,2,3]
+; X86-SSE-NEXT:    psrlq %xmm4, %xmm2
+; X86-SSE-NEXT:    movsd {{.*#+}} xmm2 = xmm3[0],xmm2[1]
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm3
+; X86-SSE-NEXT:    psrlq %xmm1, %xmm3
+; X86-SSE-NEXT:    psrlq %xmm4, %xmm0
+; X86-SSE-NEXT:    movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
+; X86-SSE-NEXT:    xorpd %xmm2, %xmm0
+; X86-SSE-NEXT:    psubq %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = ashr <2 x i64> %a, %b
   ret <2 x i64> %shift
 }
@@ -196,25 +196,25 @@ define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
 ; AVX512VL-NEXT:    vpsravd %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: var_shift_v4i32:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm3
-; X32-SSE-NEXT:    psrad %xmm2, %xmm3
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm4 = xmm1[0,1,1,1,4,5,6,7]
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm2
-; X32-SSE-NEXT:    psrad %xmm4, %xmm2
-; X32-SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm3 = xmm1[2,3,3,3,4,5,6,7]
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm4
-; X32-SSE-NEXT:    psrad %xmm3, %xmm4
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
-; X32-SSE-NEXT:    psrad %xmm1, %xmm0
-; X32-SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm4[1]
-; X32-SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,3],xmm0[0,3]
-; X32-SSE-NEXT:    movaps %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: var_shift_v4i32:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm3
+; X86-SSE-NEXT:    psrad %xmm2, %xmm3
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm4 = xmm1[0,1,1,1,4,5,6,7]
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm2
+; X86-SSE-NEXT:    psrad %xmm4, %xmm2
+; X86-SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm3 = xmm1[2,3,3,3,4,5,6,7]
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm4
+; X86-SSE-NEXT:    psrad %xmm3, %xmm4
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
+; X86-SSE-NEXT:    psrad %xmm1, %xmm0
+; X86-SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm4[1]
+; X86-SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,3],xmm0[0,3]
+; X86-SSE-NEXT:    movaps %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = ashr <4 x i32> %a, %b
   ret <4 x i32> %shift
 }
@@ -354,40 +354,40 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
 ; AVX512BWVL-NEXT:    vpsravw %xmm1, %xmm0, %xmm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: var_shift_v8i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psllw $12, %xmm1
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
-; X32-SSE-NEXT:    psraw $15, %xmm2
-; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
-; X32-SSE-NEXT:    pandn %xmm0, %xmm3
-; X32-SSE-NEXT:    psraw $8, %xmm0
-; X32-SSE-NEXT:    pand %xmm2, %xmm0
-; X32-SSE-NEXT:    por %xmm3, %xmm0
-; X32-SSE-NEXT:    paddw %xmm1, %xmm1
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
-; X32-SSE-NEXT:    psraw $15, %xmm2
-; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
-; X32-SSE-NEXT:    pandn %xmm0, %xmm3
-; X32-SSE-NEXT:    psraw $4, %xmm0
-; X32-SSE-NEXT:    pand %xmm2, %xmm0
-; X32-SSE-NEXT:    por %xmm3, %xmm0
-; X32-SSE-NEXT:    paddw %xmm1, %xmm1
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
-; X32-SSE-NEXT:    psraw $15, %xmm2
-; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
-; X32-SSE-NEXT:    pandn %xmm0, %xmm3
-; X32-SSE-NEXT:    psraw $2, %xmm0
-; X32-SSE-NEXT:    pand %xmm2, %xmm0
-; X32-SSE-NEXT:    por %xmm3, %xmm0
-; X32-SSE-NEXT:    paddw %xmm1, %xmm1
-; X32-SSE-NEXT:    psraw $15, %xmm1
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
-; X32-SSE-NEXT:    pandn %xmm0, %xmm2
-; X32-SSE-NEXT:    psraw $1, %xmm0
-; X32-SSE-NEXT:    pand %xmm1, %xmm0
-; X32-SSE-NEXT:    por %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: var_shift_v8i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psllw $12, %xmm1
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X86-SSE-NEXT:    psraw $15, %xmm2
+; X86-SSE-NEXT:    movdqa %xmm2, %xmm3
+; X86-SSE-NEXT:    pandn %xmm0, %xmm3
+; X86-SSE-NEXT:    psraw $8, %xmm0
+; X86-SSE-NEXT:    pand %xmm2, %xmm0
+; X86-SSE-NEXT:    por %xmm3, %xmm0
+; X86-SSE-NEXT:    paddw %xmm1, %xmm1
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X86-SSE-NEXT:    psraw $15, %xmm2
+; X86-SSE-NEXT:    movdqa %xmm2, %xmm3
+; X86-SSE-NEXT:    pandn %xmm0, %xmm3
+; X86-SSE-NEXT:    psraw $4, %xmm0
+; X86-SSE-NEXT:    pand %xmm2, %xmm0
+; X86-SSE-NEXT:    por %xmm3, %xmm0
+; X86-SSE-NEXT:    paddw %xmm1, %xmm1
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X86-SSE-NEXT:    psraw $15, %xmm2
+; X86-SSE-NEXT:    movdqa %xmm2, %xmm3
+; X86-SSE-NEXT:    pandn %xmm0, %xmm3
+; X86-SSE-NEXT:    psraw $2, %xmm0
+; X86-SSE-NEXT:    pand %xmm2, %xmm0
+; X86-SSE-NEXT:    por %xmm3, %xmm0
+; X86-SSE-NEXT:    paddw %xmm1, %xmm1
+; X86-SSE-NEXT:    psraw $15, %xmm1
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X86-SSE-NEXT:    pandn %xmm0, %xmm2
+; X86-SSE-NEXT:    psraw $1, %xmm0
+; X86-SSE-NEXT:    pand %xmm1, %xmm0
+; X86-SSE-NEXT:    por %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = ashr <8 x i16> %a, %b
   ret <8 x i16> %shift
 }
@@ -559,63 +559,63 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: var_shift_v16i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
-; X32-SSE-NEXT:    psllw $5, %xmm1
-; X32-SSE-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm1[8],xmm4[9],xmm1[9],xmm4[10],xmm1[10],xmm4[11],xmm1[11],xmm4[12],xmm1[12],xmm4[13],xmm1[13],xmm4[14],xmm1[14],xmm4[15],xmm1[15]
-; X32-SSE-NEXT:    pxor %xmm3, %xmm3
-; X32-SSE-NEXT:    pxor %xmm5, %xmm5
-; X32-SSE-NEXT:    pcmpgtw %xmm4, %xmm5
-; X32-SSE-NEXT:    movdqa %xmm5, %xmm6
-; X32-SSE-NEXT:    pandn %xmm2, %xmm6
-; X32-SSE-NEXT:    psraw $4, %xmm2
-; X32-SSE-NEXT:    pand %xmm5, %xmm2
-; X32-SSE-NEXT:    por %xmm6, %xmm2
-; X32-SSE-NEXT:    paddw %xmm4, %xmm4
-; X32-SSE-NEXT:    pxor %xmm5, %xmm5
-; X32-SSE-NEXT:    pcmpgtw %xmm4, %xmm5
-; X32-SSE-NEXT:    movdqa %xmm5, %xmm6
-; X32-SSE-NEXT:    pandn %xmm2, %xmm6
-; X32-SSE-NEXT:    psraw $2, %xmm2
-; X32-SSE-NEXT:    pand %xmm5, %xmm2
-; X32-SSE-NEXT:    por %xmm6, %xmm2
-; X32-SSE-NEXT:    paddw %xmm4, %xmm4
-; X32-SSE-NEXT:    pxor %xmm5, %xmm5
-; X32-SSE-NEXT:    pcmpgtw %xmm4, %xmm5
-; X32-SSE-NEXT:    movdqa %xmm5, %xmm4
-; X32-SSE-NEXT:    pandn %xmm2, %xmm4
-; X32-SSE-NEXT:    psraw $1, %xmm2
-; X32-SSE-NEXT:    pand %xmm5, %xmm2
-; X32-SSE-NEXT:    por %xmm4, %xmm2
-; X32-SSE-NEXT:    psrlw $8, %xmm2
-; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-SSE-NEXT:    pxor %xmm4, %xmm4
-; X32-SSE-NEXT:    pcmpgtw %xmm1, %xmm4
-; X32-SSE-NEXT:    movdqa %xmm4, %xmm5
-; X32-SSE-NEXT:    pandn %xmm0, %xmm5
-; X32-SSE-NEXT:    psraw $4, %xmm0
-; X32-SSE-NEXT:    pand %xmm4, %xmm0
-; X32-SSE-NEXT:    por %xmm5, %xmm0
-; X32-SSE-NEXT:    paddw %xmm1, %xmm1
-; X32-SSE-NEXT:    pxor %xmm4, %xmm4
-; X32-SSE-NEXT:    pcmpgtw %xmm1, %xmm4
-; X32-SSE-NEXT:    movdqa %xmm4, %xmm5
-; X32-SSE-NEXT:    pandn %xmm0, %xmm5
-; X32-SSE-NEXT:    psraw $2, %xmm0
-; X32-SSE-NEXT:    pand %xmm4, %xmm0
-; X32-SSE-NEXT:    por %xmm5, %xmm0
-; X32-SSE-NEXT:    paddw %xmm1, %xmm1
-; X32-SSE-NEXT:    pcmpgtw %xmm1, %xmm3
-; X32-SSE-NEXT:    movdqa %xmm3, %xmm1
-; X32-SSE-NEXT:    pandn %xmm0, %xmm1
-; X32-SSE-NEXT:    psraw $1, %xmm0
-; X32-SSE-NEXT:    pand %xmm3, %xmm0
-; X32-SSE-NEXT:    por %xmm1, %xmm0
-; X32-SSE-NEXT:    psrlw $8, %xmm0
-; X32-SSE-NEXT:    packuswb %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: var_shift_v16i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
+; X86-SSE-NEXT:    psllw $5, %xmm1
+; X86-SSE-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm1[8],xmm4[9],xmm1[9],xmm4[10],xmm1[10],xmm4[11],xmm1[11],xmm4[12],xmm1[12],xmm4[13],xmm1[13],xmm4[14],xmm1[14],xmm4[15],xmm1[15]
+; X86-SSE-NEXT:    pxor %xmm3, %xmm3
+; X86-SSE-NEXT:    pxor %xmm5, %xmm5
+; X86-SSE-NEXT:    pcmpgtw %xmm4, %xmm5
+; X86-SSE-NEXT:    movdqa %xmm5, %xmm6
+; X86-SSE-NEXT:    pandn %xmm2, %xmm6
+; X86-SSE-NEXT:    psraw $4, %xmm2
+; X86-SSE-NEXT:    pand %xmm5, %xmm2
+; X86-SSE-NEXT:    por %xmm6, %xmm2
+; X86-SSE-NEXT:    paddw %xmm4, %xmm4
+; X86-SSE-NEXT:    pxor %xmm5, %xmm5
+; X86-SSE-NEXT:    pcmpgtw %xmm4, %xmm5
+; X86-SSE-NEXT:    movdqa %xmm5, %xmm6
+; X86-SSE-NEXT:    pandn %xmm2, %xmm6
+; X86-SSE-NEXT:    psraw $2, %xmm2
+; X86-SSE-NEXT:    pand %xmm5, %xmm2
+; X86-SSE-NEXT:    por %xmm6, %xmm2
+; X86-SSE-NEXT:    paddw %xmm4, %xmm4
+; X86-SSE-NEXT:    pxor %xmm5, %xmm5
+; X86-SSE-NEXT:    pcmpgtw %xmm4, %xmm5
+; X86-SSE-NEXT:    movdqa %xmm5, %xmm4
+; X86-SSE-NEXT:    pandn %xmm2, %xmm4
+; X86-SSE-NEXT:    psraw $1, %xmm2
+; X86-SSE-NEXT:    pand %xmm5, %xmm2
+; X86-SSE-NEXT:    por %xmm4, %xmm2
+; X86-SSE-NEXT:    psrlw $8, %xmm2
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT:    pxor %xmm4, %xmm4
+; X86-SSE-NEXT:    pcmpgtw %xmm1, %xmm4
+; X86-SSE-NEXT:    movdqa %xmm4, %xmm5
+; X86-SSE-NEXT:    pandn %xmm0, %xmm5
+; X86-SSE-NEXT:    psraw $4, %xmm0
+; X86-SSE-NEXT:    pand %xmm4, %xmm0
+; X86-SSE-NEXT:    por %xmm5, %xmm0
+; X86-SSE-NEXT:    paddw %xmm1, %xmm1
+; X86-SSE-NEXT:    pxor %xmm4, %xmm4
+; X86-SSE-NEXT:    pcmpgtw %xmm1, %xmm4
+; X86-SSE-NEXT:    movdqa %xmm4, %xmm5
+; X86-SSE-NEXT:    pandn %xmm0, %xmm5
+; X86-SSE-NEXT:    psraw $2, %xmm0
+; X86-SSE-NEXT:    pand %xmm4, %xmm0
+; X86-SSE-NEXT:    por %xmm5, %xmm0
+; X86-SSE-NEXT:    paddw %xmm1, %xmm1
+; X86-SSE-NEXT:    pcmpgtw %xmm1, %xmm3
+; X86-SSE-NEXT:    movdqa %xmm3, %xmm1
+; X86-SSE-NEXT:    pandn %xmm0, %xmm1
+; X86-SSE-NEXT:    psraw $1, %xmm0
+; X86-SSE-NEXT:    pand %xmm3, %xmm0
+; X86-SSE-NEXT:    por %xmm1, %xmm0
+; X86-SSE-NEXT:    psrlw $8, %xmm0
+; X86-SSE-NEXT:    packuswb %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = ashr <16 x i8> %a, %b
   ret <16 x i8> %shift
 }
@@ -672,14 +672,14 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; AVX512VL-NEXT:    vpsraq %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatvar_shift_v2i64:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
-; X32-SSE-NEXT:    psrlq %xmm1, %xmm2
-; X32-SSE-NEXT:    psrlq %xmm1, %xmm0
-; X32-SSE-NEXT:    pxor %xmm2, %xmm0
-; X32-SSE-NEXT:    psubq %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatvar_shift_v2i64:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
+; X86-SSE-NEXT:    psrlq %xmm1, %xmm2
+; X86-SSE-NEXT:    psrlq %xmm1, %xmm0
+; X86-SSE-NEXT:    pxor %xmm2, %xmm0
+; X86-SSE-NEXT:    psubq %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
   %shift = ashr <2 x i64> %a, %splat
   ret <2 x i64> %shift
@@ -723,12 +723,12 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
 ; AVX512VL-NEXT:    vpsrad %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatvar_shift_v4i32:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    xorps %xmm2, %xmm2
-; X32-SSE-NEXT:    movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
-; X32-SSE-NEXT:    psrad %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatvar_shift_v4i32:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    xorps %xmm2, %xmm2
+; X86-SSE-NEXT:    movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
+; X86-SSE-NEXT:    psrad %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
   %shift = ashr <4 x i32> %a, %splat
   ret <4 x i32> %shift
@@ -772,12 +772,12 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
 ; AVX512VL-NEXT:    vpsraw %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatvar_shift_v8i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
-; X32-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X32-SSE-NEXT:    psraw %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatvar_shift_v8i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
+; X86-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-SSE-NEXT:    psraw %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
   %shift = ashr <8 x i16> %a, %splat
   ret <8 x i16> %shift
@@ -898,23 +898,23 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatvar_shift_v16i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
-; X32-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X32-SSE-NEXT:    psrlw %xmm1, %xmm0
-; X32-SSE-NEXT:    pcmpeqd %xmm2, %xmm2
-; X32-SSE-NEXT:    psrlw %xmm1, %xmm2
-; X32-SSE-NEXT:    psrlw $8, %xmm2
-; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
-; X32-SSE-NEXT:    pand %xmm2, %xmm0
-; X32-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [32896,32896,32896,32896,32896,32896,32896,32896]
-; X32-SSE-NEXT:    psrlw %xmm1, %xmm2
-; X32-SSE-NEXT:    pxor %xmm2, %xmm0
-; X32-SSE-NEXT:    psubb %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatvar_shift_v16i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
+; X86-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-SSE-NEXT:    psrlw %xmm1, %xmm0
+; X86-SSE-NEXT:    pcmpeqd %xmm2, %xmm2
+; X86-SSE-NEXT:    psrlw %xmm1, %xmm2
+; X86-SSE-NEXT:    psrlw $8, %xmm2
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
+; X86-SSE-NEXT:    pand %xmm2, %xmm0
+; X86-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [32896,32896,32896,32896,32896,32896,32896,32896]
+; X86-SSE-NEXT:    psrlw %xmm1, %xmm2
+; X86-SSE-NEXT:    pxor %xmm2, %xmm0
+; X86-SSE-NEXT:    psubb %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
   %shift = ashr <16 x i8> %a, %splat
   ret <16 x i8> %shift
@@ -984,16 +984,16 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) nounwind {
 ; AVX512VL-NEXT:    vpsravq {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: constant_shift_v2i64:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
-; X32-SSE-NEXT:    psrlq $1, %xmm1
-; X32-SSE-NEXT:    psrlq $7, %xmm0
-; X32-SSE-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
-; X32-SSE-NEXT:    movapd {{.*#+}} xmm1 = [2.0E+0,7.2911220195563975E-304]
-; X32-SSE-NEXT:    xorpd %xmm1, %xmm0
-; X32-SSE-NEXT:    psubq %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: constant_shift_v2i64:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X86-SSE-NEXT:    psrlq $1, %xmm1
+; X86-SSE-NEXT:    psrlq $7, %xmm0
+; X86-SSE-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; X86-SSE-NEXT:    movapd {{.*#+}} xmm1 = [2.0E+0,7.2911220195563975E-304]
+; X86-SSE-NEXT:    xorpd %xmm1, %xmm0
+; X86-SSE-NEXT:    psubq %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = ashr <2 x i64> %a, <i64 1, i64 7>
   ret <2 x i64> %shift
 }
@@ -1063,19 +1063,19 @@ define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) nounwind {
 ; AVX512VL-NEXT:    vpsravd {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: constant_shift_v4i32:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
-; X32-SSE-NEXT:    psrad $7, %xmm1
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm2
-; X32-SSE-NEXT:    psrad $6, %xmm2
-; X32-SSE-NEXT:    punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
-; X32-SSE-NEXT:    psrad $5, %xmm1
-; X32-SSE-NEXT:    psrad $4, %xmm0
-; X32-SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; X32-SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,3],xmm2[0,3]
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: constant_shift_v4i32:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X86-SSE-NEXT:    psrad $7, %xmm1
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm2
+; X86-SSE-NEXT:    psrad $6, %xmm2
+; X86-SSE-NEXT:    punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X86-SSE-NEXT:    psrad $5, %xmm1
+; X86-SSE-NEXT:    psrad $4, %xmm0
+; X86-SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X86-SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,3],xmm2[0,3]
+; X86-SSE-NEXT:    retl
   %shift = ashr <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
   ret <4 x i32> %shift
 }
@@ -1152,23 +1152,23 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
 ; AVX512BWVL-NEXT:    vpsravw {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: constant_shift_v8i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
-; X32-SSE-NEXT:    psraw $4, %xmm1
-; X32-SSE-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
-; X32-SSE-NEXT:    movapd %xmm1, %xmm2
-; X32-SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[2,3]
-; X32-SSE-NEXT:    psraw $2, %xmm1
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,3,2,3]
-; X32-SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; X32-SSE-NEXT:    movaps {{.*#+}} xmm1 = [65535,0,65535,0,65535,0,65535,0]
-; X32-SSE-NEXT:    movaps %xmm2, %xmm0
-; X32-SSE-NEXT:    andps %xmm1, %xmm0
-; X32-SSE-NEXT:    psraw $1, %xmm2
-; X32-SSE-NEXT:    andnps %xmm2, %xmm1
-; X32-SSE-NEXT:    orps %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: constant_shift_v8i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X86-SSE-NEXT:    psraw $4, %xmm1
+; X86-SSE-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; X86-SSE-NEXT:    movapd %xmm1, %xmm2
+; X86-SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[2,3]
+; X86-SSE-NEXT:    psraw $2, %xmm1
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,3,2,3]
+; X86-SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; X86-SSE-NEXT:    movaps {{.*#+}} xmm1 = [65535,0,65535,0,65535,0,65535,0]
+; X86-SSE-NEXT:    movaps %xmm2, %xmm0
+; X86-SSE-NEXT:    andps %xmm1, %xmm0
+; X86-SSE-NEXT:    psraw $1, %xmm2
+; X86-SSE-NEXT:    andnps %xmm2, %xmm1
+; X86-SSE-NEXT:    orps %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
   ret <8 x i16> %shift
 }
@@ -1250,19 +1250,19 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: constant_shift_v16i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
-; X32-SSE-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
-; X32-SSE-NEXT:    psraw $8, %xmm1
-; X32-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm1
-; X32-SSE-NEXT:    psrlw $8, %xmm1
-; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-SSE-NEXT:    psraw $8, %xmm0
-; X32-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    psrlw $8, %xmm0
-; X32-SSE-NEXT:    packuswb %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: constant_shift_v16i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X86-SSE-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
+; X86-SSE-NEXT:    psraw $8, %xmm1
+; X86-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm1
+; X86-SSE-NEXT:    psrlw $8, %xmm1
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT:    psraw $8, %xmm0
+; X86-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    psrlw $8, %xmm0
+; X86-SSE-NEXT:    packuswb %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
   ret <16 x i8> %shift
 }
@@ -1322,15 +1322,15 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) nounwind {
 ; AVX512VL-NEXT:    vpsraq $7, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatconstant_shift_v2i64:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
-; X32-SSE-NEXT:    psrad $7, %xmm1
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
-; X32-SSE-NEXT:    psrlq $7, %xmm0
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X32-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatconstant_shift_v2i64:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X86-SSE-NEXT:    psrad $7, %xmm1
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; X86-SSE-NEXT:    psrlq $7, %xmm0
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X86-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X86-SSE-NEXT:    retl
   %shift = ashr <2 x i64> %a, <i64 7, i64 7>
   ret <2 x i64> %shift
 }
@@ -1361,10 +1361,10 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) nounwind {
 ; AVX512VL-NEXT:    vpsrad $5, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatconstant_shift_v4i32:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psrad $5, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatconstant_shift_v4i32:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psrad $5, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
   ret <4 x i32> %shift
 }
@@ -1395,10 +1395,10 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) nounwind {
 ; AVX512VL-NEXT:    vpsraw $3, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatconstant_shift_v8i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psraw $3, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatconstant_shift_v8i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psraw $3, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <8 x i16> %shift
 }
@@ -1444,14 +1444,14 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) nounwind {
 ; AVX512VL-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatconstant_shift_v16i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psrlw $3, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; X32-SSE-NEXT:    pxor %xmm1, %xmm0
-; X32-SSE-NEXT:    psubb %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatconstant_shift_v16i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psrlw $3, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; X86-SSE-NEXT:    pxor %xmm1, %xmm0
+; X86-SSE-NEXT:    psubb %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <16 x i8> %shift
 }

diff  --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
index 293adea10ee2..a182335f06d1 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
@@ -9,8 +9,8 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512VL,AVX512BWVL
 ;
 ; 32-bit runs to make sure we do reasonable things for i64 shifts.
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx  | FileCheck %s --check-prefix=X32-AVX1
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32-AVX2
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx  | FileCheck %s --check-prefix=X86-AVX1
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X86-AVX2
 
 ;
 ; Variable Shifts
@@ -86,41 +86,41 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
 ; AVX512VL-NEXT:    vpsravq %ymm1, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: var_shift_v4i64:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; X32-AVX1-NEXT:    vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
-; X32-AVX1-NEXT:    # xmm3 = mem[0,0]
-; X32-AVX1-NEXT:    vpsrlq %xmm2, %xmm3, %xmm4
-; X32-AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm2[2,3,2,3]
-; X32-AVX1-NEXT:    vpsrlq %xmm5, %xmm3, %xmm6
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm6[4,5,6,7]
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
-; X32-AVX1-NEXT:    vpsrlq %xmm2, %xmm6, %xmm2
-; X32-AVX1-NEXT:    vpsrlq %xmm5, %xmm6, %xmm5
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm5[4,5,6,7]
-; X32-AVX1-NEXT:    vpxor %xmm4, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpsubq %xmm4, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpsrlq %xmm1, %xmm3, %xmm4
-; X32-AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm1[2,3,2,3]
-; X32-AVX1-NEXT:    vpsrlq %xmm5, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
-; X32-AVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm1
-; X32-AVX1-NEXT:    vpsrlq %xmm5, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
-; X32-AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpsubq %xmm3, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: var_shift_v4i64:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
-; X32-AVX2-NEXT:    vpsrlvq %ymm1, %ymm2, %ymm2
-; X32-AVX2-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpsubq %ymm2, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: var_shift_v4i64:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; X86-AVX1-NEXT:    vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; X86-AVX1-NEXT:    # xmm3 = mem[0,0]
+; X86-AVX1-NEXT:    vpsrlq %xmm2, %xmm3, %xmm4
+; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm2[2,3,2,3]
+; X86-AVX1-NEXT:    vpsrlq %xmm5, %xmm3, %xmm6
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm6[4,5,6,7]
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
+; X86-AVX1-NEXT:    vpsrlq %xmm2, %xmm6, %xmm2
+; X86-AVX1-NEXT:    vpsrlq %xmm5, %xmm6, %xmm5
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm5[4,5,6,7]
+; X86-AVX1-NEXT:    vpxor %xmm4, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpsubq %xmm4, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpsrlq %xmm1, %xmm3, %xmm4
+; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm1[2,3,2,3]
+; X86-AVX1-NEXT:    vpsrlq %xmm5, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
+; X86-AVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm1
+; X86-AVX1-NEXT:    vpsrlq %xmm5, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; X86-AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpsubq %xmm3, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: var_shift_v4i64:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
+; X86-AVX2-NEXT:    vpsrlvq %ymm1, %ymm2, %ymm2
+; X86-AVX2-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpsubq %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %shift = ashr <4 x i64> %a, %b
   ret <4 x i64> %shift
 }
@@ -188,40 +188,40 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
 ; AVX512VL-NEXT:    vpsravd %ymm1, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: var_shift_v8i32:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
-; X32-AVX1-NEXT:    vpsrldq {{.*#+}} xmm4 = xmm3[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X32-AVX1-NEXT:    vpsrad %xmm4, %xmm2, %xmm4
-; X32-AVX1-NEXT:    vpsrlq $32, %xmm3, %xmm5
-; X32-AVX1-NEXT:    vpsrad %xmm5, %xmm2, %xmm5
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm4 = xmm5[0,1,2,3],xmm4[4,5,6,7]
-; X32-AVX1-NEXT:    vpxor %xmm5, %xmm5, %xmm5
-; X32-AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm6 = xmm3[2],xmm5[2],xmm3[3],xmm5[3]
-; X32-AVX1-NEXT:    vpsrad %xmm6, %xmm2, %xmm6
-; X32-AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero
-; X32-AVX1-NEXT:    vpsrad %xmm3, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7]
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7]
-; X32-AVX1-NEXT:    vpsrldq {{.*#+}} xmm3 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X32-AVX1-NEXT:    vpsrad %xmm3, %xmm0, %xmm3
-; X32-AVX1-NEXT:    vpsrlq $32, %xmm1, %xmm4
-; X32-AVX1-NEXT:    vpsrad %xmm4, %xmm0, %xmm4
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
-; X32-AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm4 = xmm1[2],xmm5[2],xmm1[3],xmm5[3]
-; X32-AVX1-NEXT:    vpsrad %xmm4, %xmm0, %xmm4
-; X32-AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; X32-AVX1-NEXT:    vpsrad %xmm1, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm4[4,5,6,7]
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5],xmm3[6,7]
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: var_shift_v8i32:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpsravd %ymm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: var_shift_v8i32:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; X86-AVX1-NEXT:    vpsrldq {{.*#+}} xmm4 = xmm3[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT:    vpsrad %xmm4, %xmm2, %xmm4
+; X86-AVX1-NEXT:    vpsrlq $32, %xmm3, %xmm5
+; X86-AVX1-NEXT:    vpsrad %xmm5, %xmm2, %xmm5
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm4 = xmm5[0,1,2,3],xmm4[4,5,6,7]
+; X86-AVX1-NEXT:    vpxor %xmm5, %xmm5, %xmm5
+; X86-AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm6 = xmm3[2],xmm5[2],xmm3[3],xmm5[3]
+; X86-AVX1-NEXT:    vpsrad %xmm6, %xmm2, %xmm6
+; X86-AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero
+; X86-AVX1-NEXT:    vpsrad %xmm3, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7]
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7]
+; X86-AVX1-NEXT:    vpsrldq {{.*#+}} xmm3 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT:    vpsrad %xmm3, %xmm0, %xmm3
+; X86-AVX1-NEXT:    vpsrlq $32, %xmm1, %xmm4
+; X86-AVX1-NEXT:    vpsrad %xmm4, %xmm0, %xmm4
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
+; X86-AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm4 = xmm1[2],xmm5[2],xmm1[3],xmm5[3]
+; X86-AVX1-NEXT:    vpsrad %xmm4, %xmm0, %xmm4
+; X86-AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; X86-AVX1-NEXT:    vpsrad %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm4[4,5,6,7]
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5],xmm3[6,7]
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: var_shift_v8i32:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpsravd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %shift = ashr <8 x i32> %a, %b
   ret <8 x i32> %shift
 }
@@ -329,54 +329,54 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
 ; AVX512BWVL-NEXT:    vpsravw %ymm1, %ymm0, %ymm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: var_shift_v16i16:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; X32-AVX1-NEXT:    vpsllw $12, %xmm2, %xmm3
-; X32-AVX1-NEXT:    vpsllw $4, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpor %xmm3, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpaddw %xmm2, %xmm2, %xmm3
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
-; X32-AVX1-NEXT:    vpsraw $8, %xmm4, %xmm5
-; X32-AVX1-NEXT:    vpblendvb %xmm2, %xmm5, %xmm4, %xmm2
-; X32-AVX1-NEXT:    vpsraw $4, %xmm2, %xmm4
-; X32-AVX1-NEXT:    vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpsraw $2, %xmm2, %xmm4
-; X32-AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpsraw $1, %xmm2, %xmm4
-; X32-AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpsllw $12, %xmm1, %xmm3
-; X32-AVX1-NEXT:    vpsllw $4, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpor %xmm3, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpaddw %xmm1, %xmm1, %xmm3
-; X32-AVX1-NEXT:    vpsraw $8, %xmm0, %xmm4
-; X32-AVX1-NEXT:    vpblendvb %xmm1, %xmm4, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpsraw $4, %xmm0, %xmm1
-; X32-AVX1-NEXT:    vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpsraw $2, %xmm0, %xmm1
-; X32-AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpsraw $1, %xmm0, %xmm1
-; X32-AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: var_shift_v16i16:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; X32-AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
-; X32-AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
-; X32-AVX2-NEXT:    vpsravd %ymm3, %ymm4, %ymm3
-; X32-AVX2-NEXT:    vpsrld $16, %ymm3, %ymm3
-; X32-AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
-; X32-AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
-; X32-AVX2-NEXT:    vpsravd %ymm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpackusdw %ymm3, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: var_shift_v16i16:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; X86-AVX1-NEXT:    vpsllw $12, %xmm2, %xmm3
+; X86-AVX1-NEXT:    vpsllw $4, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpor %xmm3, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpaddw %xmm2, %xmm2, %xmm3
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; X86-AVX1-NEXT:    vpsraw $8, %xmm4, %xmm5
+; X86-AVX1-NEXT:    vpblendvb %xmm2, %xmm5, %xmm4, %xmm2
+; X86-AVX1-NEXT:    vpsraw $4, %xmm2, %xmm4
+; X86-AVX1-NEXT:    vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpsraw $2, %xmm2, %xmm4
+; X86-AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpsraw $1, %xmm2, %xmm4
+; X86-AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpsllw $12, %xmm1, %xmm3
+; X86-AVX1-NEXT:    vpsllw $4, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpor %xmm3, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpaddw %xmm1, %xmm1, %xmm3
+; X86-AVX1-NEXT:    vpsraw $8, %xmm0, %xmm4
+; X86-AVX1-NEXT:    vpblendvb %xmm1, %xmm4, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpsraw $4, %xmm0, %xmm1
+; X86-AVX1-NEXT:    vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpsraw $2, %xmm0, %xmm1
+; X86-AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpsraw $1, %xmm0, %xmm1
+; X86-AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: var_shift_v16i16:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; X86-AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
+; X86-AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
+; X86-AVX2-NEXT:    vpsravd %ymm3, %ymm4, %ymm3
+; X86-AVX2-NEXT:    vpsrld $16, %ymm3, %ymm3
+; X86-AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
+; X86-AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
+; X86-AVX2-NEXT:    vpsravd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpackusdw %ymm3, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %shift = ashr <16 x i16> %a, %b
   ret <16 x i16> %shift
 }
@@ -561,88 +561,88 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
 ; AVX512BWVL-NEXT:    vpmovwb %zmm0, %ymm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: var_shift_v32i8:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; X32-AVX1-NEXT:    vpsllw $5, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
-; X32-AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; X32-AVX1-NEXT:    vpsraw $4, %xmm5, %xmm6
-; X32-AVX1-NEXT:    vpblendvb %xmm3, %xmm6, %xmm5, %xmm5
-; X32-AVX1-NEXT:    vpsraw $2, %xmm5, %xmm6
-; X32-AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpblendvb %xmm3, %xmm6, %xmm5, %xmm5
-; X32-AVX1-NEXT:    vpsraw $1, %xmm5, %xmm6
-; X32-AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpblendvb %xmm3, %xmm6, %xmm5, %xmm3
-; X32-AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm4 = xmm4[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-AVX1-NEXT:    vpsraw $4, %xmm4, %xmm5
-; X32-AVX1-NEXT:    vpblendvb %xmm2, %xmm5, %xmm4, %xmm4
-; X32-AVX1-NEXT:    vpsraw $2, %xmm4, %xmm5
-; X32-AVX1-NEXT:    vpaddw %xmm2, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpblendvb %xmm2, %xmm5, %xmm4, %xmm4
-; X32-AVX1-NEXT:    vpsraw $1, %xmm4, %xmm5
-; X32-AVX1-NEXT:    vpaddw %xmm2, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpblendvb %xmm2, %xmm5, %xmm4, %xmm2
-; X32-AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpackuswb %xmm3, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpsllw $5, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; X32-AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm4 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; X32-AVX1-NEXT:    vpsraw $4, %xmm4, %xmm5
-; X32-AVX1-NEXT:    vpblendvb %xmm3, %xmm5, %xmm4, %xmm4
-; X32-AVX1-NEXT:    vpsraw $2, %xmm4, %xmm5
-; X32-AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpblendvb %xmm3, %xmm5, %xmm4, %xmm4
-; X32-AVX1-NEXT:    vpsraw $1, %xmm4, %xmm5
-; X32-AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpblendvb %xmm3, %xmm5, %xmm4, %xmm3
-; X32-AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-AVX1-NEXT:    vpsraw $4, %xmm0, %xmm4
-; X32-AVX1-NEXT:    vpblendvb %xmm1, %xmm4, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpsraw $2, %xmm0, %xmm4
-; X32-AVX1-NEXT:    vpaddw %xmm1, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpblendvb %xmm1, %xmm4, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpsraw $1, %xmm0, %xmm4
-; X32-AVX1-NEXT:    vpaddw %xmm1, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpblendvb %xmm1, %xmm4, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpackuswb %xmm3, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: var_shift_v32i8:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpsllw $5, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
-; X32-AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
-; X32-AVX2-NEXT:    vpsraw $4, %ymm3, %ymm4
-; X32-AVX2-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
-; X32-AVX2-NEXT:    vpsraw $2, %ymm3, %ymm4
-; X32-AVX2-NEXT:    vpaddw %ymm2, %ymm2, %ymm2
-; X32-AVX2-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
-; X32-AVX2-NEXT:    vpsraw $1, %ymm3, %ymm4
-; X32-AVX2-NEXT:    vpaddw %ymm2, %ymm2, %ymm2
-; X32-AVX2-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
-; X32-AVX2-NEXT:    vpsrlw $8, %ymm2, %ymm2
-; X32-AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
-; X32-AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
-; X32-AVX2-NEXT:    vpsraw $4, %ymm0, %ymm3
-; X32-AVX2-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpsraw $2, %ymm0, %ymm3
-; X32-AVX2-NEXT:    vpaddw %ymm1, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpsraw $1, %ymm0, %ymm3
-; X32-AVX2-NEXT:    vpaddw %ymm1, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpsrlw $8, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: var_shift_v32i8:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; X86-AVX1-NEXT:    vpsllw $5, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; X86-AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm5 = xmm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; X86-AVX1-NEXT:    vpsraw $4, %xmm5, %xmm6
+; X86-AVX1-NEXT:    vpblendvb %xmm3, %xmm6, %xmm5, %xmm5
+; X86-AVX1-NEXT:    vpsraw $2, %xmm5, %xmm6
+; X86-AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpblendvb %xmm3, %xmm6, %xmm5, %xmm5
+; X86-AVX1-NEXT:    vpsraw $1, %xmm5, %xmm6
+; X86-AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpblendvb %xmm3, %xmm6, %xmm5, %xmm3
+; X86-AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm4 = xmm4[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-AVX1-NEXT:    vpsraw $4, %xmm4, %xmm5
+; X86-AVX1-NEXT:    vpblendvb %xmm2, %xmm5, %xmm4, %xmm4
+; X86-AVX1-NEXT:    vpsraw $2, %xmm4, %xmm5
+; X86-AVX1-NEXT:    vpaddw %xmm2, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpblendvb %xmm2, %xmm5, %xmm4, %xmm4
+; X86-AVX1-NEXT:    vpsraw $1, %xmm4, %xmm5
+; X86-AVX1-NEXT:    vpaddw %xmm2, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpblendvb %xmm2, %xmm5, %xmm4, %xmm2
+; X86-AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpackuswb %xmm3, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpsllw $5, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; X86-AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm4 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; X86-AVX1-NEXT:    vpsraw $4, %xmm4, %xmm5
+; X86-AVX1-NEXT:    vpblendvb %xmm3, %xmm5, %xmm4, %xmm4
+; X86-AVX1-NEXT:    vpsraw $2, %xmm4, %xmm5
+; X86-AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpblendvb %xmm3, %xmm5, %xmm4, %xmm4
+; X86-AVX1-NEXT:    vpsraw $1, %xmm4, %xmm5
+; X86-AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpblendvb %xmm3, %xmm5, %xmm4, %xmm3
+; X86-AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-AVX1-NEXT:    vpsraw $4, %xmm0, %xmm4
+; X86-AVX1-NEXT:    vpblendvb %xmm1, %xmm4, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpsraw $2, %xmm0, %xmm4
+; X86-AVX1-NEXT:    vpaddw %xmm1, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpblendvb %xmm1, %xmm4, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpsraw $1, %xmm0, %xmm4
+; X86-AVX1-NEXT:    vpaddw %xmm1, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpblendvb %xmm1, %xmm4, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpackuswb %xmm3, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: var_shift_v32i8:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpsllw $5, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
+; X86-AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
+; X86-AVX2-NEXT:    vpsraw $4, %ymm3, %ymm4
+; X86-AVX2-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; X86-AVX2-NEXT:    vpsraw $2, %ymm3, %ymm4
+; X86-AVX2-NEXT:    vpaddw %ymm2, %ymm2, %ymm2
+; X86-AVX2-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; X86-AVX2-NEXT:    vpsraw $1, %ymm3, %ymm4
+; X86-AVX2-NEXT:    vpaddw %ymm2, %ymm2, %ymm2
+; X86-AVX2-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
+; X86-AVX2-NEXT:    vpsrlw $8, %ymm2, %ymm2
+; X86-AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; X86-AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; X86-AVX2-NEXT:    vpsraw $4, %ymm0, %ymm3
+; X86-AVX2-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpsraw $2, %ymm0, %ymm3
+; X86-AVX2-NEXT:    vpaddw %ymm1, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpsraw $1, %ymm0, %ymm3
+; X86-AVX2-NEXT:    vpaddw %ymm1, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpsrlw $8, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %shift = ashr <32 x i8> %a, %b
   ret <32 x i8> %shift
 }
@@ -707,28 +707,28 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
 ; AVX512VL-NEXT:    vpsraq %xmm1, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: splatvar_shift_v4i64:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
-; X32-AVX1-NEXT:    vpsrlq %xmm1, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; X32-AVX1-NEXT:    vpsrlq %xmm1, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpxor %xmm2, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpsubq %xmm2, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: splatvar_shift_v4i64:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
-; X32-AVX2-NEXT:    vpsrlq %xmm1, %ymm2, %ymm2
-; X32-AVX2-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpsubq %ymm2, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: splatvar_shift_v4i64:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
+; X86-AVX1-NEXT:    vpsrlq %xmm1, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; X86-AVX1-NEXT:    vpsrlq %xmm1, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpxor %xmm2, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpsubq %xmm2, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: splatvar_shift_v4i64:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
+; X86-AVX2-NEXT:    vpsrlq %xmm1, %ymm2, %ymm2
+; X86-AVX2-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpsubq %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
   %shift = ashr <4 x i64> %a, %splat
   ret <4 x i64> %shift
@@ -777,20 +777,20 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
 ; AVX512VL-NEXT:    vpsrad %xmm1, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: splatvar_shift_v8i32:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; X32-AVX1-NEXT:    vpsrad %xmm1, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpsrad %xmm1, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: splatvar_shift_v8i32:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; X32-AVX2-NEXT:    vpsrad %xmm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: splatvar_shift_v8i32:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT:    vpsrad %xmm1, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpsrad %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: splatvar_shift_v8i32:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; X86-AVX2-NEXT:    vpsrad %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
   %shift = ashr <8 x i32> %a, %splat
   ret <8 x i32> %shift
@@ -839,20 +839,20 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
 ; AVX512VL-NEXT:    vpsraw %xmm1, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: splatvar_shift_v16i16:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; X32-AVX1-NEXT:    vpsraw %xmm1, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpsraw %xmm1, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: splatvar_shift_v16i16:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; X32-AVX2-NEXT:    vpsraw %xmm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: splatvar_shift_v16i16:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT:    vpsraw %xmm1, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpsraw %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: splatvar_shift_v16i16:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; X86-AVX2-NEXT:    vpsraw %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
   %shift = ashr <16 x i16> %a, %splat
   ret <16 x i16> %shift
@@ -961,40 +961,40 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
 ; AVX512BWVL-NEXT:    vpmovwb %zmm0, %ymm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: splatvar_shift_v32i8:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; X32-AVX1-NEXT:    vpsrlw %xmm1, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpsrlw %xmm1, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpshufb {{.*#+}} xmm3 = xmm3[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; X32-AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [32896,32896,32896,32896,32896,32896,32896,32896]
-; X32-AVX1-NEXT:    vpsrlw %xmm1, %xmm4, %xmm4
-; X32-AVX1-NEXT:    vpxor %xmm4, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpsubb %xmm4, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpxor %xmm4, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpsubb %xmm4, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: splatvar_shift_v32i8:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; X32-AVX2-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
-; X32-AVX2-NEXT:    vpsrlw %xmm1, %xmm2, %xmm2
-; X32-AVX2-NEXT:    vpsrlw $8, %xmm2, %xmm2
-; X32-AVX2-NEXT:    vpbroadcastb %xmm2, %ymm2
-; X32-AVX2-NEXT:    vpand %ymm2, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896]
-; X32-AVX2-NEXT:    vpsrlw %xmm1, %ymm2, %ymm1
-; X32-AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: splatvar_shift_v32i8:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT:    vpsrlw %xmm1, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpsrlw %xmm1, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpshufb {{.*#+}} xmm3 = xmm3[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; X86-AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [32896,32896,32896,32896,32896,32896,32896,32896]
+; X86-AVX1-NEXT:    vpsrlw %xmm1, %xmm4, %xmm4
+; X86-AVX1-NEXT:    vpxor %xmm4, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpsubb %xmm4, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpxor %xmm4, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpsubb %xmm4, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: splatvar_shift_v32i8:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; X86-AVX2-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
+; X86-AVX2-NEXT:    vpsrlw %xmm1, %xmm2, %xmm2
+; X86-AVX2-NEXT:    vpsrlw $8, %xmm2, %xmm2
+; X86-AVX2-NEXT:    vpbroadcastb %xmm2, %ymm2
+; X86-AVX2-NEXT:    vpand %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896]
+; X86-AVX2-NEXT:    vpsrlw %xmm1, %ymm2, %ymm1
+; X86-AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
   %shift = ashr <32 x i8> %a, %splat
   ret <32 x i8> %shift
@@ -1060,33 +1060,33 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind {
 ; AVX512VL-NEXT:    vpsravq {{.*}}(%rip), %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: constant_shift_v4i64:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; X32-AVX1-NEXT:    vpsrlq $62, %xmm1, %xmm2
-; X32-AVX1-NEXT:    vpsrlq $31, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
-; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,2,0]
-; X32-AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpsubq %xmm2, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpsrlq $7, %xmm0, %xmm2
-; X32-AVX1-NEXT:    vpsrlq $1, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
-; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1073741824,0,16777216]
-; X32-AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: constant_shift_v4i64:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,0,7,0,31,0,62,0]
-; X32-AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
-; X32-AVX2-NEXT:    vpsrlvq %ymm1, %ymm2, %ymm2
-; X32-AVX2-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpsubq %ymm2, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: constant_shift_v4i64:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; X86-AVX1-NEXT:    vpsrlq $62, %xmm1, %xmm2
+; X86-AVX1-NEXT:    vpsrlq $31, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
+; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,2,0]
+; X86-AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpsubq %xmm2, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpsrlq $7, %xmm0, %xmm2
+; X86-AVX1-NEXT:    vpsrlq $1, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
+; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1073741824,0,16777216]
+; X86-AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: constant_shift_v4i64:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,0,7,0,31,0,62,0]
+; X86-AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
+; X86-AVX2-NEXT:    vpsrlvq %ymm1, %ymm2, %ymm2
+; X86-AVX2-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpsubq %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %shift = ashr <4 x i64> %a, <i64 1, i64 7, i64 31, i64 62>
   ret <4 x i64> %shift
 }
@@ -1138,28 +1138,28 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind {
 ; AVX512VL-NEXT:    vpsravd {{.*}}(%rip), %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: constant_shift_v8i32:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vpsrad $7, %xmm0, %xmm1
-; X32-AVX1-NEXT:    vpsrad $5, %xmm0, %xmm2
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
-; X32-AVX1-NEXT:    vpsrad $6, %xmm0, %xmm2
-; X32-AVX1-NEXT:    vpsrad $4, %xmm0, %xmm3
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; X32-AVX1-NEXT:    vpsrad $7, %xmm0, %xmm2
-; X32-AVX1-NEXT:    vpsrad $9, %xmm0, %xmm3
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
-; X32-AVX1-NEXT:    vpsrad $8, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: constant_shift_v8i32:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpsravd {{\.LCPI.*}}, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: constant_shift_v8i32:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vpsrad $7, %xmm0, %xmm1
+; X86-AVX1-NEXT:    vpsrad $5, %xmm0, %xmm2
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
+; X86-AVX1-NEXT:    vpsrad $6, %xmm0, %xmm2
+; X86-AVX1-NEXT:    vpsrad $4, %xmm0, %xmm3
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; X86-AVX1-NEXT:    vpsrad $7, %xmm0, %xmm2
+; X86-AVX1-NEXT:    vpsrad $9, %xmm0, %xmm3
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
+; X86-AVX1-NEXT:    vpsrad $8, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: constant_shift_v8i32:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpsravd {{\.LCPI.*}}, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %shift = ashr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
   ret <8 x i32> %shift
 }
@@ -1228,25 +1228,25 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
 ; AVX512BWVL-NEXT:    vpsravw {{.*}}(%rip), %ymm0, %ymm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: constant_shift_v16i16:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vpmulhw {{\.LCPI.*}}, %xmm0, %xmm1
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
-; X32-AVX1-NEXT:    vpsraw $1, %xmm0, %xmm2
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2,3,4,5,6,7]
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; X32-AVX1-NEXT:    vpmulhw {{\.LCPI.*}}, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: constant_shift_v16i16:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpmulhw {{\.LCPI.*}}, %ymm0, %ymm1
-; X32-AVX2-NEXT:    vpblendw {{.*#+}} xmm2 = xmm0[0],xmm1[1,2,3,4,5,6,7]
-; X32-AVX2-NEXT:    vpsraw $1, %xmm0, %xmm0
-; X32-AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2,3,4,5,6,7]
-; X32-AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: constant_shift_v16i16:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vpmulhw {{\.LCPI.*}}, %xmm0, %xmm1
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
+; X86-AVX1-NEXT:    vpsraw $1, %xmm0, %xmm2
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2,3,4,5,6,7]
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; X86-AVX1-NEXT:    vpmulhw {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: constant_shift_v16i16:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpmulhw {{\.LCPI.*}}, %ymm0, %ymm1
+; X86-AVX2-NEXT:    vpblendw {{.*#+}} xmm2 = xmm0[0],xmm1[1,2,3,4,5,6,7]
+; X86-AVX2-NEXT:    vpsraw $1, %xmm0, %xmm0
+; X86-AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2,3,4,5,6,7]
+; X86-AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; X86-AVX2-NEXT:    retl
   %shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
   ret <16 x i16> %shift
 }
@@ -1349,44 +1349,44 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
 ; AVX512BWVL-NEXT:    vpmovwb %zmm0, %ymm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: constant_shift_v32i8:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; X32-AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; X32-AVX1-NEXT:    vpsraw $8, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [2,4,8,16,32,64,128,256]
-; X32-AVX1-NEXT:    vpmullw %xmm3, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-AVX1-NEXT:    vpsraw $8, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [256,128,64,32,16,8,4,2]
-; X32-AVX1-NEXT:    vpmullw %xmm4, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpsrlw $8, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; X32-AVX1-NEXT:    vpsraw $8, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpmullw %xmm3, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-AVX1-NEXT:    vpsraw $8, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpmullw %xmm4, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: constant_shift_v32i8:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
-; X32-AVX2-NEXT:    vpsraw $8, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vpmullw {{\.LCPI.*}}, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vpsrlw $8, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
-; X32-AVX2-NEXT:    vpsraw $8, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpmullw {{\.LCPI.*}}, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpsrlw $8, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: constant_shift_v32i8:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; X86-AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; X86-AVX1-NEXT:    vpsraw $8, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [2,4,8,16,32,64,128,256]
+; X86-AVX1-NEXT:    vpmullw %xmm3, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-AVX1-NEXT:    vpsraw $8, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [256,128,64,32,16,8,4,2]
+; X86-AVX1-NEXT:    vpmullw %xmm4, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpsrlw $8, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; X86-AVX1-NEXT:    vpsraw $8, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpmullw %xmm3, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-AVX1-NEXT:    vpsraw $8, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpmullw %xmm4, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: constant_shift_v32i8:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
+; X86-AVX2-NEXT:    vpsraw $8, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vpmullw {{\.LCPI.*}}, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vpsrlw $8, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; X86-AVX2-NEXT:    vpsraw $8, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpmullw {{\.LCPI.*}}, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpsrlw $8, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
   ret <32 x i8> %shift
 }
@@ -1443,24 +1443,24 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) nounwind {
 ; AVX512VL-NEXT:    vpsraq $7, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: splatconstant_shift_v4i64:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; X32-AVX1-NEXT:    vpsrad $7, %xmm1, %xmm2
-; X32-AVX1-NEXT:    vpsrlq $7, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
-; X32-AVX1-NEXT:    vpsrad $7, %xmm0, %xmm2
-; X32-AVX1-NEXT:    vpsrlq $7, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: splatconstant_shift_v4i64:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpsrad $7, %ymm0, %ymm1
-; X32-AVX2-NEXT:    vpsrlq $7, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: splatconstant_shift_v4i64:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; X86-AVX1-NEXT:    vpsrad $7, %xmm1, %xmm2
+; X86-AVX1-NEXT:    vpsrlq $7, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
+; X86-AVX1-NEXT:    vpsrad $7, %xmm0, %xmm2
+; X86-AVX1-NEXT:    vpsrlq $7, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: splatconstant_shift_v4i64:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpsrad $7, %ymm0, %ymm1
+; X86-AVX2-NEXT:    vpsrlq $7, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
+; X86-AVX2-NEXT:    retl
   %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
   ret <4 x i64> %shift
 }
@@ -1502,18 +1502,18 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) nounwind {
 ; AVX512VL-NEXT:    vpsrad $5, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: splatconstant_shift_v8i32:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vpsrad $5, %xmm0, %xmm1
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; X32-AVX1-NEXT:    vpsrad $5, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: splatconstant_shift_v8i32:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpsrad $5, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: splatconstant_shift_v8i32:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vpsrad $5, %xmm0, %xmm1
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; X86-AVX1-NEXT:    vpsrad $5, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: splatconstant_shift_v8i32:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpsrad $5, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   ret <8 x i32> %shift
 }
@@ -1555,18 +1555,18 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) nounwind {
 ; AVX512VL-NEXT:    vpsraw $3, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: splatconstant_shift_v16i16:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vpsraw $3, %xmm0, %xmm1
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; X32-AVX1-NEXT:    vpsraw $3, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: splatconstant_shift_v16i16:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpsraw $3, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: splatconstant_shift_v16i16:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vpsraw $3, %xmm0, %xmm1
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; X86-AVX1-NEXT:    vpsraw $3, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: splatconstant_shift_v16i16:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpsraw $3, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <16 x i16> %shift
 }
@@ -1632,30 +1632,30 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
 ; AVX512VL-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: splatconstant_shift_v32i8:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; X32-AVX1-NEXT:    vpsrlw $3, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
-; X32-AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; X32-AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpsrlw $3, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpsubb %xmm3, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: splatconstant_shift_v32i8:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpsrlw $3, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpand {{\.LCPI.*}}, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; X32-AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: splatconstant_shift_v32i8:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; X86-AVX1-NEXT:    vpsrlw $3, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
+; X86-AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; X86-AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpsrlw $3, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpsubb %xmm3, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: splatconstant_shift_v32i8:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpsrlw $3, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpand {{\.LCPI.*}}, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; X86-AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <32 x i8> %shift
 }

diff  --git a/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll
index 8cbe0fc3d943..a55dc2e16185 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll
@@ -11,7 +11,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512VL,AVX512BWVL
 ;
 ; Just one 32-bit run to make sure we do reasonable things for i64 shifts.
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32-SSE
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE
 
 ;
 ; Variable Shifts
@@ -100,25 +100,25 @@ define <2 x i32> @var_shift_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
 ; AVX512VL-NEXT:    vpsravd %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: var_shift_v2i32:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm3
-; X32-SSE-NEXT:    psrad %xmm2, %xmm3
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm4 = xmm1[0,1,1,1,4,5,6,7]
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm2
-; X32-SSE-NEXT:    psrad %xmm4, %xmm2
-; X32-SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm3 = xmm1[2,3,3,3,4,5,6,7]
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm4
-; X32-SSE-NEXT:    psrad %xmm3, %xmm4
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
-; X32-SSE-NEXT:    psrad %xmm1, %xmm0
-; X32-SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm4[1]
-; X32-SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,3],xmm0[0,3]
-; X32-SSE-NEXT:    movaps %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: var_shift_v2i32:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm3
+; X86-SSE-NEXT:    psrad %xmm2, %xmm3
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm4 = xmm1[0,1,1,1,4,5,6,7]
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm2
+; X86-SSE-NEXT:    psrad %xmm4, %xmm2
+; X86-SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm3 = xmm1[2,3,3,3,4,5,6,7]
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm4
+; X86-SSE-NEXT:    psrad %xmm3, %xmm4
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
+; X86-SSE-NEXT:    psrad %xmm1, %xmm0
+; X86-SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm4[1]
+; X86-SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,3],xmm0[0,3]
+; X86-SSE-NEXT:    movaps %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = ashr <2 x i32> %a, %b
   ret <2 x i32> %shift
 }
@@ -258,40 +258,40 @@ define <4 x i16> @var_shift_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind {
 ; AVX512BWVL-NEXT:    vpsravw %xmm1, %xmm0, %xmm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: var_shift_v4i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psllw $12, %xmm1
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
-; X32-SSE-NEXT:    psraw $15, %xmm2
-; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
-; X32-SSE-NEXT:    pandn %xmm0, %xmm3
-; X32-SSE-NEXT:    psraw $8, %xmm0
-; X32-SSE-NEXT:    pand %xmm2, %xmm0
-; X32-SSE-NEXT:    por %xmm3, %xmm0
-; X32-SSE-NEXT:    paddw %xmm1, %xmm1
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
-; X32-SSE-NEXT:    psraw $15, %xmm2
-; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
-; X32-SSE-NEXT:    pandn %xmm0, %xmm3
-; X32-SSE-NEXT:    psraw $4, %xmm0
-; X32-SSE-NEXT:    pand %xmm2, %xmm0
-; X32-SSE-NEXT:    por %xmm3, %xmm0
-; X32-SSE-NEXT:    paddw %xmm1, %xmm1
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
-; X32-SSE-NEXT:    psraw $15, %xmm2
-; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
-; X32-SSE-NEXT:    pandn %xmm0, %xmm3
-; X32-SSE-NEXT:    psraw $2, %xmm0
-; X32-SSE-NEXT:    pand %xmm2, %xmm0
-; X32-SSE-NEXT:    por %xmm3, %xmm0
-; X32-SSE-NEXT:    paddw %xmm1, %xmm1
-; X32-SSE-NEXT:    psraw $15, %xmm1
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
-; X32-SSE-NEXT:    pandn %xmm0, %xmm2
-; X32-SSE-NEXT:    psraw $1, %xmm0
-; X32-SSE-NEXT:    pand %xmm1, %xmm0
-; X32-SSE-NEXT:    por %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: var_shift_v4i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psllw $12, %xmm1
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X86-SSE-NEXT:    psraw $15, %xmm2
+; X86-SSE-NEXT:    movdqa %xmm2, %xmm3
+; X86-SSE-NEXT:    pandn %xmm0, %xmm3
+; X86-SSE-NEXT:    psraw $8, %xmm0
+; X86-SSE-NEXT:    pand %xmm2, %xmm0
+; X86-SSE-NEXT:    por %xmm3, %xmm0
+; X86-SSE-NEXT:    paddw %xmm1, %xmm1
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X86-SSE-NEXT:    psraw $15, %xmm2
+; X86-SSE-NEXT:    movdqa %xmm2, %xmm3
+; X86-SSE-NEXT:    pandn %xmm0, %xmm3
+; X86-SSE-NEXT:    psraw $4, %xmm0
+; X86-SSE-NEXT:    pand %xmm2, %xmm0
+; X86-SSE-NEXT:    por %xmm3, %xmm0
+; X86-SSE-NEXT:    paddw %xmm1, %xmm1
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X86-SSE-NEXT:    psraw $15, %xmm2
+; X86-SSE-NEXT:    movdqa %xmm2, %xmm3
+; X86-SSE-NEXT:    pandn %xmm0, %xmm3
+; X86-SSE-NEXT:    psraw $2, %xmm0
+; X86-SSE-NEXT:    pand %xmm2, %xmm0
+; X86-SSE-NEXT:    por %xmm3, %xmm0
+; X86-SSE-NEXT:    paddw %xmm1, %xmm1
+; X86-SSE-NEXT:    psraw $15, %xmm1
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X86-SSE-NEXT:    pandn %xmm0, %xmm2
+; X86-SSE-NEXT:    psraw $1, %xmm0
+; X86-SSE-NEXT:    pand %xmm1, %xmm0
+; X86-SSE-NEXT:    por %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = ashr <4 x i16> %a, %b
   ret <4 x i16> %shift
 }
@@ -431,40 +431,40 @@ define <2 x i16> @var_shift_v2i16(<2 x i16> %a, <2 x i16> %b) nounwind {
 ; AVX512BWVL-NEXT:    vpsravw %xmm1, %xmm0, %xmm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: var_shift_v2i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psllw $12, %xmm1
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
-; X32-SSE-NEXT:    psraw $15, %xmm2
-; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
-; X32-SSE-NEXT:    pandn %xmm0, %xmm3
-; X32-SSE-NEXT:    psraw $8, %xmm0
-; X32-SSE-NEXT:    pand %xmm2, %xmm0
-; X32-SSE-NEXT:    por %xmm3, %xmm0
-; X32-SSE-NEXT:    paddw %xmm1, %xmm1
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
-; X32-SSE-NEXT:    psraw $15, %xmm2
-; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
-; X32-SSE-NEXT:    pandn %xmm0, %xmm3
-; X32-SSE-NEXT:    psraw $4, %xmm0
-; X32-SSE-NEXT:    pand %xmm2, %xmm0
-; X32-SSE-NEXT:    por %xmm3, %xmm0
-; X32-SSE-NEXT:    paddw %xmm1, %xmm1
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
-; X32-SSE-NEXT:    psraw $15, %xmm2
-; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
-; X32-SSE-NEXT:    pandn %xmm0, %xmm3
-; X32-SSE-NEXT:    psraw $2, %xmm0
-; X32-SSE-NEXT:    pand %xmm2, %xmm0
-; X32-SSE-NEXT:    por %xmm3, %xmm0
-; X32-SSE-NEXT:    paddw %xmm1, %xmm1
-; X32-SSE-NEXT:    psraw $15, %xmm1
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
-; X32-SSE-NEXT:    pandn %xmm0, %xmm2
-; X32-SSE-NEXT:    psraw $1, %xmm0
-; X32-SSE-NEXT:    pand %xmm1, %xmm0
-; X32-SSE-NEXT:    por %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: var_shift_v2i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psllw $12, %xmm1
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X86-SSE-NEXT:    psraw $15, %xmm2
+; X86-SSE-NEXT:    movdqa %xmm2, %xmm3
+; X86-SSE-NEXT:    pandn %xmm0, %xmm3
+; X86-SSE-NEXT:    psraw $8, %xmm0
+; X86-SSE-NEXT:    pand %xmm2, %xmm0
+; X86-SSE-NEXT:    por %xmm3, %xmm0
+; X86-SSE-NEXT:    paddw %xmm1, %xmm1
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X86-SSE-NEXT:    psraw $15, %xmm2
+; X86-SSE-NEXT:    movdqa %xmm2, %xmm3
+; X86-SSE-NEXT:    pandn %xmm0, %xmm3
+; X86-SSE-NEXT:    psraw $4, %xmm0
+; X86-SSE-NEXT:    pand %xmm2, %xmm0
+; X86-SSE-NEXT:    por %xmm3, %xmm0
+; X86-SSE-NEXT:    paddw %xmm1, %xmm1
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X86-SSE-NEXT:    psraw $15, %xmm2
+; X86-SSE-NEXT:    movdqa %xmm2, %xmm3
+; X86-SSE-NEXT:    pandn %xmm0, %xmm3
+; X86-SSE-NEXT:    psraw $2, %xmm0
+; X86-SSE-NEXT:    pand %xmm2, %xmm0
+; X86-SSE-NEXT:    por %xmm3, %xmm0
+; X86-SSE-NEXT:    paddw %xmm1, %xmm1
+; X86-SSE-NEXT:    psraw $15, %xmm1
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X86-SSE-NEXT:    pandn %xmm0, %xmm2
+; X86-SSE-NEXT:    psraw $1, %xmm0
+; X86-SSE-NEXT:    pand %xmm1, %xmm0
+; X86-SSE-NEXT:    por %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = ashr <2 x i16> %a, %b
   ret <2 x i16> %shift
 }
@@ -636,63 +636,63 @@ define <8 x i8> @var_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: var_shift_v8i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
-; X32-SSE-NEXT:    psllw $5, %xmm1
-; X32-SSE-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm1[8],xmm4[9],xmm1[9],xmm4[10],xmm1[10],xmm4[11],xmm1[11],xmm4[12],xmm1[12],xmm4[13],xmm1[13],xmm4[14],xmm1[14],xmm4[15],xmm1[15]
-; X32-SSE-NEXT:    pxor %xmm3, %xmm3
-; X32-SSE-NEXT:    pxor %xmm5, %xmm5
-; X32-SSE-NEXT:    pcmpgtw %xmm4, %xmm5
-; X32-SSE-NEXT:    movdqa %xmm5, %xmm6
-; X32-SSE-NEXT:    pandn %xmm2, %xmm6
-; X32-SSE-NEXT:    psraw $4, %xmm2
-; X32-SSE-NEXT:    pand %xmm5, %xmm2
-; X32-SSE-NEXT:    por %xmm6, %xmm2
-; X32-SSE-NEXT:    paddw %xmm4, %xmm4
-; X32-SSE-NEXT:    pxor %xmm5, %xmm5
-; X32-SSE-NEXT:    pcmpgtw %xmm4, %xmm5
-; X32-SSE-NEXT:    movdqa %xmm5, %xmm6
-; X32-SSE-NEXT:    pandn %xmm2, %xmm6
-; X32-SSE-NEXT:    psraw $2, %xmm2
-; X32-SSE-NEXT:    pand %xmm5, %xmm2
-; X32-SSE-NEXT:    por %xmm6, %xmm2
-; X32-SSE-NEXT:    paddw %xmm4, %xmm4
-; X32-SSE-NEXT:    pxor %xmm5, %xmm5
-; X32-SSE-NEXT:    pcmpgtw %xmm4, %xmm5
-; X32-SSE-NEXT:    movdqa %xmm5, %xmm4
-; X32-SSE-NEXT:    pandn %xmm2, %xmm4
-; X32-SSE-NEXT:    psraw $1, %xmm2
-; X32-SSE-NEXT:    pand %xmm5, %xmm2
-; X32-SSE-NEXT:    por %xmm4, %xmm2
-; X32-SSE-NEXT:    psrlw $8, %xmm2
-; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-SSE-NEXT:    pxor %xmm4, %xmm4
-; X32-SSE-NEXT:    pcmpgtw %xmm1, %xmm4
-; X32-SSE-NEXT:    movdqa %xmm4, %xmm5
-; X32-SSE-NEXT:    pandn %xmm0, %xmm5
-; X32-SSE-NEXT:    psraw $4, %xmm0
-; X32-SSE-NEXT:    pand %xmm4, %xmm0
-; X32-SSE-NEXT:    por %xmm5, %xmm0
-; X32-SSE-NEXT:    paddw %xmm1, %xmm1
-; X32-SSE-NEXT:    pxor %xmm4, %xmm4
-; X32-SSE-NEXT:    pcmpgtw %xmm1, %xmm4
-; X32-SSE-NEXT:    movdqa %xmm4, %xmm5
-; X32-SSE-NEXT:    pandn %xmm0, %xmm5
-; X32-SSE-NEXT:    psraw $2, %xmm0
-; X32-SSE-NEXT:    pand %xmm4, %xmm0
-; X32-SSE-NEXT:    por %xmm5, %xmm0
-; X32-SSE-NEXT:    paddw %xmm1, %xmm1
-; X32-SSE-NEXT:    pcmpgtw %xmm1, %xmm3
-; X32-SSE-NEXT:    movdqa %xmm3, %xmm1
-; X32-SSE-NEXT:    pandn %xmm0, %xmm1
-; X32-SSE-NEXT:    psraw $1, %xmm0
-; X32-SSE-NEXT:    pand %xmm3, %xmm0
-; X32-SSE-NEXT:    por %xmm1, %xmm0
-; X32-SSE-NEXT:    psrlw $8, %xmm0
-; X32-SSE-NEXT:    packuswb %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: var_shift_v8i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
+; X86-SSE-NEXT:    psllw $5, %xmm1
+; X86-SSE-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm1[8],xmm4[9],xmm1[9],xmm4[10],xmm1[10],xmm4[11],xmm1[11],xmm4[12],xmm1[12],xmm4[13],xmm1[13],xmm4[14],xmm1[14],xmm4[15],xmm1[15]
+; X86-SSE-NEXT:    pxor %xmm3, %xmm3
+; X86-SSE-NEXT:    pxor %xmm5, %xmm5
+; X86-SSE-NEXT:    pcmpgtw %xmm4, %xmm5
+; X86-SSE-NEXT:    movdqa %xmm5, %xmm6
+; X86-SSE-NEXT:    pandn %xmm2, %xmm6
+; X86-SSE-NEXT:    psraw $4, %xmm2
+; X86-SSE-NEXT:    pand %xmm5, %xmm2
+; X86-SSE-NEXT:    por %xmm6, %xmm2
+; X86-SSE-NEXT:    paddw %xmm4, %xmm4
+; X86-SSE-NEXT:    pxor %xmm5, %xmm5
+; X86-SSE-NEXT:    pcmpgtw %xmm4, %xmm5
+; X86-SSE-NEXT:    movdqa %xmm5, %xmm6
+; X86-SSE-NEXT:    pandn %xmm2, %xmm6
+; X86-SSE-NEXT:    psraw $2, %xmm2
+; X86-SSE-NEXT:    pand %xmm5, %xmm2
+; X86-SSE-NEXT:    por %xmm6, %xmm2
+; X86-SSE-NEXT:    paddw %xmm4, %xmm4
+; X86-SSE-NEXT:    pxor %xmm5, %xmm5
+; X86-SSE-NEXT:    pcmpgtw %xmm4, %xmm5
+; X86-SSE-NEXT:    movdqa %xmm5, %xmm4
+; X86-SSE-NEXT:    pandn %xmm2, %xmm4
+; X86-SSE-NEXT:    psraw $1, %xmm2
+; X86-SSE-NEXT:    pand %xmm5, %xmm2
+; X86-SSE-NEXT:    por %xmm4, %xmm2
+; X86-SSE-NEXT:    psrlw $8, %xmm2
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT:    pxor %xmm4, %xmm4
+; X86-SSE-NEXT:    pcmpgtw %xmm1, %xmm4
+; X86-SSE-NEXT:    movdqa %xmm4, %xmm5
+; X86-SSE-NEXT:    pandn %xmm0, %xmm5
+; X86-SSE-NEXT:    psraw $4, %xmm0
+; X86-SSE-NEXT:    pand %xmm4, %xmm0
+; X86-SSE-NEXT:    por %xmm5, %xmm0
+; X86-SSE-NEXT:    paddw %xmm1, %xmm1
+; X86-SSE-NEXT:    pxor %xmm4, %xmm4
+; X86-SSE-NEXT:    pcmpgtw %xmm1, %xmm4
+; X86-SSE-NEXT:    movdqa %xmm4, %xmm5
+; X86-SSE-NEXT:    pandn %xmm0, %xmm5
+; X86-SSE-NEXT:    psraw $2, %xmm0
+; X86-SSE-NEXT:    pand %xmm4, %xmm0
+; X86-SSE-NEXT:    por %xmm5, %xmm0
+; X86-SSE-NEXT:    paddw %xmm1, %xmm1
+; X86-SSE-NEXT:    pcmpgtw %xmm1, %xmm3
+; X86-SSE-NEXT:    movdqa %xmm3, %xmm1
+; X86-SSE-NEXT:    pandn %xmm0, %xmm1
+; X86-SSE-NEXT:    psraw $1, %xmm0
+; X86-SSE-NEXT:    pand %xmm3, %xmm0
+; X86-SSE-NEXT:    por %xmm1, %xmm0
+; X86-SSE-NEXT:    psrlw $8, %xmm0
+; X86-SSE-NEXT:    packuswb %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = ashr <8 x i8> %a, %b
   ret <8 x i8> %shift
 }
@@ -864,63 +864,63 @@ define <4 x i8> @var_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: var_shift_v4i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
-; X32-SSE-NEXT:    psllw $5, %xmm1
-; X32-SSE-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm1[8],xmm4[9],xmm1[9],xmm4[10],xmm1[10],xmm4[11],xmm1[11],xmm4[12],xmm1[12],xmm4[13],xmm1[13],xmm4[14],xmm1[14],xmm4[15],xmm1[15]
-; X32-SSE-NEXT:    pxor %xmm3, %xmm3
-; X32-SSE-NEXT:    pxor %xmm5, %xmm5
-; X32-SSE-NEXT:    pcmpgtw %xmm4, %xmm5
-; X32-SSE-NEXT:    movdqa %xmm5, %xmm6
-; X32-SSE-NEXT:    pandn %xmm2, %xmm6
-; X32-SSE-NEXT:    psraw $4, %xmm2
-; X32-SSE-NEXT:    pand %xmm5, %xmm2
-; X32-SSE-NEXT:    por %xmm6, %xmm2
-; X32-SSE-NEXT:    paddw %xmm4, %xmm4
-; X32-SSE-NEXT:    pxor %xmm5, %xmm5
-; X32-SSE-NEXT:    pcmpgtw %xmm4, %xmm5
-; X32-SSE-NEXT:    movdqa %xmm5, %xmm6
-; X32-SSE-NEXT:    pandn %xmm2, %xmm6
-; X32-SSE-NEXT:    psraw $2, %xmm2
-; X32-SSE-NEXT:    pand %xmm5, %xmm2
-; X32-SSE-NEXT:    por %xmm6, %xmm2
-; X32-SSE-NEXT:    paddw %xmm4, %xmm4
-; X32-SSE-NEXT:    pxor %xmm5, %xmm5
-; X32-SSE-NEXT:    pcmpgtw %xmm4, %xmm5
-; X32-SSE-NEXT:    movdqa %xmm5, %xmm4
-; X32-SSE-NEXT:    pandn %xmm2, %xmm4
-; X32-SSE-NEXT:    psraw $1, %xmm2
-; X32-SSE-NEXT:    pand %xmm5, %xmm2
-; X32-SSE-NEXT:    por %xmm4, %xmm2
-; X32-SSE-NEXT:    psrlw $8, %xmm2
-; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-SSE-NEXT:    pxor %xmm4, %xmm4
-; X32-SSE-NEXT:    pcmpgtw %xmm1, %xmm4
-; X32-SSE-NEXT:    movdqa %xmm4, %xmm5
-; X32-SSE-NEXT:    pandn %xmm0, %xmm5
-; X32-SSE-NEXT:    psraw $4, %xmm0
-; X32-SSE-NEXT:    pand %xmm4, %xmm0
-; X32-SSE-NEXT:    por %xmm5, %xmm0
-; X32-SSE-NEXT:    paddw %xmm1, %xmm1
-; X32-SSE-NEXT:    pxor %xmm4, %xmm4
-; X32-SSE-NEXT:    pcmpgtw %xmm1, %xmm4
-; X32-SSE-NEXT:    movdqa %xmm4, %xmm5
-; X32-SSE-NEXT:    pandn %xmm0, %xmm5
-; X32-SSE-NEXT:    psraw $2, %xmm0
-; X32-SSE-NEXT:    pand %xmm4, %xmm0
-; X32-SSE-NEXT:    por %xmm5, %xmm0
-; X32-SSE-NEXT:    paddw %xmm1, %xmm1
-; X32-SSE-NEXT:    pcmpgtw %xmm1, %xmm3
-; X32-SSE-NEXT:    movdqa %xmm3, %xmm1
-; X32-SSE-NEXT:    pandn %xmm0, %xmm1
-; X32-SSE-NEXT:    psraw $1, %xmm0
-; X32-SSE-NEXT:    pand %xmm3, %xmm0
-; X32-SSE-NEXT:    por %xmm1, %xmm0
-; X32-SSE-NEXT:    psrlw $8, %xmm0
-; X32-SSE-NEXT:    packuswb %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: var_shift_v4i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
+; X86-SSE-NEXT:    psllw $5, %xmm1
+; X86-SSE-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm1[8],xmm4[9],xmm1[9],xmm4[10],xmm1[10],xmm4[11],xmm1[11],xmm4[12],xmm1[12],xmm4[13],xmm1[13],xmm4[14],xmm1[14],xmm4[15],xmm1[15]
+; X86-SSE-NEXT:    pxor %xmm3, %xmm3
+; X86-SSE-NEXT:    pxor %xmm5, %xmm5
+; X86-SSE-NEXT:    pcmpgtw %xmm4, %xmm5
+; X86-SSE-NEXT:    movdqa %xmm5, %xmm6
+; X86-SSE-NEXT:    pandn %xmm2, %xmm6
+; X86-SSE-NEXT:    psraw $4, %xmm2
+; X86-SSE-NEXT:    pand %xmm5, %xmm2
+; X86-SSE-NEXT:    por %xmm6, %xmm2
+; X86-SSE-NEXT:    paddw %xmm4, %xmm4
+; X86-SSE-NEXT:    pxor %xmm5, %xmm5
+; X86-SSE-NEXT:    pcmpgtw %xmm4, %xmm5
+; X86-SSE-NEXT:    movdqa %xmm5, %xmm6
+; X86-SSE-NEXT:    pandn %xmm2, %xmm6
+; X86-SSE-NEXT:    psraw $2, %xmm2
+; X86-SSE-NEXT:    pand %xmm5, %xmm2
+; X86-SSE-NEXT:    por %xmm6, %xmm2
+; X86-SSE-NEXT:    paddw %xmm4, %xmm4
+; X86-SSE-NEXT:    pxor %xmm5, %xmm5
+; X86-SSE-NEXT:    pcmpgtw %xmm4, %xmm5
+; X86-SSE-NEXT:    movdqa %xmm5, %xmm4
+; X86-SSE-NEXT:    pandn %xmm2, %xmm4
+; X86-SSE-NEXT:    psraw $1, %xmm2
+; X86-SSE-NEXT:    pand %xmm5, %xmm2
+; X86-SSE-NEXT:    por %xmm4, %xmm2
+; X86-SSE-NEXT:    psrlw $8, %xmm2
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT:    pxor %xmm4, %xmm4
+; X86-SSE-NEXT:    pcmpgtw %xmm1, %xmm4
+; X86-SSE-NEXT:    movdqa %xmm4, %xmm5
+; X86-SSE-NEXT:    pandn %xmm0, %xmm5
+; X86-SSE-NEXT:    psraw $4, %xmm0
+; X86-SSE-NEXT:    pand %xmm4, %xmm0
+; X86-SSE-NEXT:    por %xmm5, %xmm0
+; X86-SSE-NEXT:    paddw %xmm1, %xmm1
+; X86-SSE-NEXT:    pxor %xmm4, %xmm4
+; X86-SSE-NEXT:    pcmpgtw %xmm1, %xmm4
+; X86-SSE-NEXT:    movdqa %xmm4, %xmm5
+; X86-SSE-NEXT:    pandn %xmm0, %xmm5
+; X86-SSE-NEXT:    psraw $2, %xmm0
+; X86-SSE-NEXT:    pand %xmm4, %xmm0
+; X86-SSE-NEXT:    por %xmm5, %xmm0
+; X86-SSE-NEXT:    paddw %xmm1, %xmm1
+; X86-SSE-NEXT:    pcmpgtw %xmm1, %xmm3
+; X86-SSE-NEXT:    movdqa %xmm3, %xmm1
+; X86-SSE-NEXT:    pandn %xmm0, %xmm1
+; X86-SSE-NEXT:    psraw $1, %xmm0
+; X86-SSE-NEXT:    pand %xmm3, %xmm0
+; X86-SSE-NEXT:    por %xmm1, %xmm0
+; X86-SSE-NEXT:    psrlw $8, %xmm0
+; X86-SSE-NEXT:    packuswb %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = ashr <4 x i8> %a, %b
   ret <4 x i8> %shift
 }
@@ -1092,63 +1092,63 @@ define <2 x i8> @var_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: var_shift_v2i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
-; X32-SSE-NEXT:    psllw $5, %xmm1
-; X32-SSE-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm1[8],xmm4[9],xmm1[9],xmm4[10],xmm1[10],xmm4[11],xmm1[11],xmm4[12],xmm1[12],xmm4[13],xmm1[13],xmm4[14],xmm1[14],xmm4[15],xmm1[15]
-; X32-SSE-NEXT:    pxor %xmm3, %xmm3
-; X32-SSE-NEXT:    pxor %xmm5, %xmm5
-; X32-SSE-NEXT:    pcmpgtw %xmm4, %xmm5
-; X32-SSE-NEXT:    movdqa %xmm5, %xmm6
-; X32-SSE-NEXT:    pandn %xmm2, %xmm6
-; X32-SSE-NEXT:    psraw $4, %xmm2
-; X32-SSE-NEXT:    pand %xmm5, %xmm2
-; X32-SSE-NEXT:    por %xmm6, %xmm2
-; X32-SSE-NEXT:    paddw %xmm4, %xmm4
-; X32-SSE-NEXT:    pxor %xmm5, %xmm5
-; X32-SSE-NEXT:    pcmpgtw %xmm4, %xmm5
-; X32-SSE-NEXT:    movdqa %xmm5, %xmm6
-; X32-SSE-NEXT:    pandn %xmm2, %xmm6
-; X32-SSE-NEXT:    psraw $2, %xmm2
-; X32-SSE-NEXT:    pand %xmm5, %xmm2
-; X32-SSE-NEXT:    por %xmm6, %xmm2
-; X32-SSE-NEXT:    paddw %xmm4, %xmm4
-; X32-SSE-NEXT:    pxor %xmm5, %xmm5
-; X32-SSE-NEXT:    pcmpgtw %xmm4, %xmm5
-; X32-SSE-NEXT:    movdqa %xmm5, %xmm4
-; X32-SSE-NEXT:    pandn %xmm2, %xmm4
-; X32-SSE-NEXT:    psraw $1, %xmm2
-; X32-SSE-NEXT:    pand %xmm5, %xmm2
-; X32-SSE-NEXT:    por %xmm4, %xmm2
-; X32-SSE-NEXT:    psrlw $8, %xmm2
-; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-SSE-NEXT:    pxor %xmm4, %xmm4
-; X32-SSE-NEXT:    pcmpgtw %xmm1, %xmm4
-; X32-SSE-NEXT:    movdqa %xmm4, %xmm5
-; X32-SSE-NEXT:    pandn %xmm0, %xmm5
-; X32-SSE-NEXT:    psraw $4, %xmm0
-; X32-SSE-NEXT:    pand %xmm4, %xmm0
-; X32-SSE-NEXT:    por %xmm5, %xmm0
-; X32-SSE-NEXT:    paddw %xmm1, %xmm1
-; X32-SSE-NEXT:    pxor %xmm4, %xmm4
-; X32-SSE-NEXT:    pcmpgtw %xmm1, %xmm4
-; X32-SSE-NEXT:    movdqa %xmm4, %xmm5
-; X32-SSE-NEXT:    pandn %xmm0, %xmm5
-; X32-SSE-NEXT:    psraw $2, %xmm0
-; X32-SSE-NEXT:    pand %xmm4, %xmm0
-; X32-SSE-NEXT:    por %xmm5, %xmm0
-; X32-SSE-NEXT:    paddw %xmm1, %xmm1
-; X32-SSE-NEXT:    pcmpgtw %xmm1, %xmm3
-; X32-SSE-NEXT:    movdqa %xmm3, %xmm1
-; X32-SSE-NEXT:    pandn %xmm0, %xmm1
-; X32-SSE-NEXT:    psraw $1, %xmm0
-; X32-SSE-NEXT:    pand %xmm3, %xmm0
-; X32-SSE-NEXT:    por %xmm1, %xmm0
-; X32-SSE-NEXT:    psrlw $8, %xmm0
-; X32-SSE-NEXT:    packuswb %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: var_shift_v2i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
+; X86-SSE-NEXT:    psllw $5, %xmm1
+; X86-SSE-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm1[8],xmm4[9],xmm1[9],xmm4[10],xmm1[10],xmm4[11],xmm1[11],xmm4[12],xmm1[12],xmm4[13],xmm1[13],xmm4[14],xmm1[14],xmm4[15],xmm1[15]
+; X86-SSE-NEXT:    pxor %xmm3, %xmm3
+; X86-SSE-NEXT:    pxor %xmm5, %xmm5
+; X86-SSE-NEXT:    pcmpgtw %xmm4, %xmm5
+; X86-SSE-NEXT:    movdqa %xmm5, %xmm6
+; X86-SSE-NEXT:    pandn %xmm2, %xmm6
+; X86-SSE-NEXT:    psraw $4, %xmm2
+; X86-SSE-NEXT:    pand %xmm5, %xmm2
+; X86-SSE-NEXT:    por %xmm6, %xmm2
+; X86-SSE-NEXT:    paddw %xmm4, %xmm4
+; X86-SSE-NEXT:    pxor %xmm5, %xmm5
+; X86-SSE-NEXT:    pcmpgtw %xmm4, %xmm5
+; X86-SSE-NEXT:    movdqa %xmm5, %xmm6
+; X86-SSE-NEXT:    pandn %xmm2, %xmm6
+; X86-SSE-NEXT:    psraw $2, %xmm2
+; X86-SSE-NEXT:    pand %xmm5, %xmm2
+; X86-SSE-NEXT:    por %xmm6, %xmm2
+; X86-SSE-NEXT:    paddw %xmm4, %xmm4
+; X86-SSE-NEXT:    pxor %xmm5, %xmm5
+; X86-SSE-NEXT:    pcmpgtw %xmm4, %xmm5
+; X86-SSE-NEXT:    movdqa %xmm5, %xmm4
+; X86-SSE-NEXT:    pandn %xmm2, %xmm4
+; X86-SSE-NEXT:    psraw $1, %xmm2
+; X86-SSE-NEXT:    pand %xmm5, %xmm2
+; X86-SSE-NEXT:    por %xmm4, %xmm2
+; X86-SSE-NEXT:    psrlw $8, %xmm2
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT:    pxor %xmm4, %xmm4
+; X86-SSE-NEXT:    pcmpgtw %xmm1, %xmm4
+; X86-SSE-NEXT:    movdqa %xmm4, %xmm5
+; X86-SSE-NEXT:    pandn %xmm0, %xmm5
+; X86-SSE-NEXT:    psraw $4, %xmm0
+; X86-SSE-NEXT:    pand %xmm4, %xmm0
+; X86-SSE-NEXT:    por %xmm5, %xmm0
+; X86-SSE-NEXT:    paddw %xmm1, %xmm1
+; X86-SSE-NEXT:    pxor %xmm4, %xmm4
+; X86-SSE-NEXT:    pcmpgtw %xmm1, %xmm4
+; X86-SSE-NEXT:    movdqa %xmm4, %xmm5
+; X86-SSE-NEXT:    pandn %xmm0, %xmm5
+; X86-SSE-NEXT:    psraw $2, %xmm0
+; X86-SSE-NEXT:    pand %xmm4, %xmm0
+; X86-SSE-NEXT:    por %xmm5, %xmm0
+; X86-SSE-NEXT:    paddw %xmm1, %xmm1
+; X86-SSE-NEXT:    pcmpgtw %xmm1, %xmm3
+; X86-SSE-NEXT:    movdqa %xmm3, %xmm1
+; X86-SSE-NEXT:    pandn %xmm0, %xmm1
+; X86-SSE-NEXT:    psraw $1, %xmm0
+; X86-SSE-NEXT:    pand %xmm3, %xmm0
+; X86-SSE-NEXT:    por %xmm1, %xmm0
+; X86-SSE-NEXT:    psrlw $8, %xmm0
+; X86-SSE-NEXT:    packuswb %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = ashr <2 x i8> %a, %b
   ret <2 x i8> %shift
 }
@@ -1195,12 +1195,12 @@ define <2 x i32> @splatvar_shift_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
 ; AVX512VL-NEXT:    vpsrad %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatvar_shift_v2i32:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    xorps %xmm2, %xmm2
-; X32-SSE-NEXT:    movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
-; X32-SSE-NEXT:    psrad %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatvar_shift_v2i32:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    xorps %xmm2, %xmm2
+; X86-SSE-NEXT:    movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
+; X86-SSE-NEXT:    psrad %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %splat = shufflevector <2 x i32> %b, <2 x i32> undef, <2 x i32> zeroinitializer
   %shift = ashr <2 x i32> %a, %splat
   ret <2 x i32> %shift
@@ -1244,12 +1244,12 @@ define <4 x i16> @splatvar_shift_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind {
 ; AVX512VL-NEXT:    vpsraw %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatvar_shift_v4i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
-; X32-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X32-SSE-NEXT:    psraw %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatvar_shift_v4i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
+; X86-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-SSE-NEXT:    psraw %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %splat = shufflevector <4 x i16> %b, <4 x i16> undef, <4 x i32> zeroinitializer
   %shift = ashr <4 x i16> %a, %splat
   ret <4 x i16> %shift
@@ -1293,12 +1293,12 @@ define <2 x i16> @splatvar_shift_v2i16(<2 x i16> %a, <2 x i16> %b) nounwind {
 ; AVX512VL-NEXT:    vpsraw %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatvar_shift_v2i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
-; X32-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X32-SSE-NEXT:    psraw %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatvar_shift_v2i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
+; X86-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-SSE-NEXT:    psraw %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %splat = shufflevector <2 x i16> %b, <2 x i16> undef, <2 x i32> zeroinitializer
   %shift = ashr <2 x i16> %a, %splat
   ret <2 x i16> %shift
@@ -1420,23 +1420,23 @@ define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatvar_shift_v8i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
-; X32-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X32-SSE-NEXT:    psrlw %xmm1, %xmm0
-; X32-SSE-NEXT:    pcmpeqd %xmm2, %xmm2
-; X32-SSE-NEXT:    psrlw %xmm1, %xmm2
-; X32-SSE-NEXT:    psrlw $8, %xmm2
-; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
-; X32-SSE-NEXT:    pand %xmm2, %xmm0
-; X32-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [32896,32896,32896,32896,32896,32896,32896,32896]
-; X32-SSE-NEXT:    psrlw %xmm1, %xmm2
-; X32-SSE-NEXT:    pxor %xmm2, %xmm0
-; X32-SSE-NEXT:    psubb %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatvar_shift_v8i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
+; X86-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-SSE-NEXT:    psrlw %xmm1, %xmm0
+; X86-SSE-NEXT:    pcmpeqd %xmm2, %xmm2
+; X86-SSE-NEXT:    psrlw %xmm1, %xmm2
+; X86-SSE-NEXT:    psrlw $8, %xmm2
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
+; X86-SSE-NEXT:    pand %xmm2, %xmm0
+; X86-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [32896,32896,32896,32896,32896,32896,32896,32896]
+; X86-SSE-NEXT:    psrlw %xmm1, %xmm2
+; X86-SSE-NEXT:    pxor %xmm2, %xmm0
+; X86-SSE-NEXT:    psubb %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %splat = shufflevector <8 x i8> %b, <8 x i8> undef, <8 x i32> zeroinitializer
   %shift = ashr <8 x i8> %a, %splat
   ret <8 x i8> %shift
@@ -1558,23 +1558,23 @@ define <4 x i8> @splatvar_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatvar_shift_v4i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
-; X32-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X32-SSE-NEXT:    psrlw %xmm1, %xmm0
-; X32-SSE-NEXT:    pcmpeqd %xmm2, %xmm2
-; X32-SSE-NEXT:    psrlw %xmm1, %xmm2
-; X32-SSE-NEXT:    psrlw $8, %xmm2
-; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
-; X32-SSE-NEXT:    pand %xmm2, %xmm0
-; X32-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [32896,32896,32896,32896,32896,32896,32896,32896]
-; X32-SSE-NEXT:    psrlw %xmm1, %xmm2
-; X32-SSE-NEXT:    pxor %xmm2, %xmm0
-; X32-SSE-NEXT:    psubb %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatvar_shift_v4i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
+; X86-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-SSE-NEXT:    psrlw %xmm1, %xmm0
+; X86-SSE-NEXT:    pcmpeqd %xmm2, %xmm2
+; X86-SSE-NEXT:    psrlw %xmm1, %xmm2
+; X86-SSE-NEXT:    psrlw $8, %xmm2
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
+; X86-SSE-NEXT:    pand %xmm2, %xmm0
+; X86-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [32896,32896,32896,32896,32896,32896,32896,32896]
+; X86-SSE-NEXT:    psrlw %xmm1, %xmm2
+; X86-SSE-NEXT:    pxor %xmm2, %xmm0
+; X86-SSE-NEXT:    psubb %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %splat = shufflevector <4 x i8> %b, <4 x i8> undef, <4 x i32> zeroinitializer
   %shift = ashr <4 x i8> %a, %splat
   ret <4 x i8> %shift
@@ -1687,23 +1687,23 @@ define <2 x i8> @splatvar_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatvar_shift_v2i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
-; X32-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X32-SSE-NEXT:    psrlw %xmm1, %xmm0
-; X32-SSE-NEXT:    pcmpeqd %xmm2, %xmm2
-; X32-SSE-NEXT:    psrlw %xmm1, %xmm2
-; X32-SSE-NEXT:    psrlw $8, %xmm2
-; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
-; X32-SSE-NEXT:    pand %xmm2, %xmm0
-; X32-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [32896,32896,32896,32896,32896,32896,32896,32896]
-; X32-SSE-NEXT:    psrlw %xmm1, %xmm2
-; X32-SSE-NEXT:    pxor %xmm2, %xmm0
-; X32-SSE-NEXT:    psubb %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatvar_shift_v2i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
+; X86-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-SSE-NEXT:    psrlw %xmm1, %xmm0
+; X86-SSE-NEXT:    pcmpeqd %xmm2, %xmm2
+; X86-SSE-NEXT:    psrlw %xmm1, %xmm2
+; X86-SSE-NEXT:    psrlw $8, %xmm2
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
+; X86-SSE-NEXT:    pand %xmm2, %xmm0
+; X86-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [32896,32896,32896,32896,32896,32896,32896,32896]
+; X86-SSE-NEXT:    psrlw %xmm1, %xmm2
+; X86-SSE-NEXT:    pxor %xmm2, %xmm0
+; X86-SSE-NEXT:    psubb %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %splat = shufflevector <2 x i8> %b, <2 x i8> undef, <2 x i32> zeroinitializer
   %shift = ashr <2 x i8> %a, %splat
   ret <2 x i8> %shift
@@ -1764,15 +1764,15 @@ define <2 x i32> @constant_shift_v2i32(<2 x i32> %a) nounwind {
 ; AVX512VL-NEXT:    vpsravd {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: constant_shift_v2i32:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
-; X32-SSE-NEXT:    psrad $4, %xmm1
-; X32-SSE-NEXT:    psrad $5, %xmm0
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; X32-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: constant_shift_v2i32:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X86-SSE-NEXT:    psrad $4, %xmm1
+; X86-SSE-NEXT:    psrad $5, %xmm0
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = ashr <2 x i32> %a, <i32 4, i32 5>
   ret <2 x i32> %shift
 }
@@ -1845,19 +1845,19 @@ define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind {
 ; AVX512BWVL-NEXT:    vpsravw {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: constant_shift_v4i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
-; X32-SSE-NEXT:    psraw $2, %xmm1
-; X32-SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
-; X32-SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
-; X32-SSE-NEXT:    movaps {{.*#+}} xmm2 = [65535,0,65535,0,65535,65535,65535,65535]
-; X32-SSE-NEXT:    movaps %xmm1, %xmm0
-; X32-SSE-NEXT:    andps %xmm2, %xmm0
-; X32-SSE-NEXT:    psraw $1, %xmm1
-; X32-SSE-NEXT:    andnps %xmm1, %xmm2
-; X32-SSE-NEXT:    orps %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: constant_shift_v4i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X86-SSE-NEXT:    psraw $2, %xmm1
+; X86-SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
+; X86-SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
+; X86-SSE-NEXT:    movaps {{.*#+}} xmm2 = [65535,0,65535,0,65535,65535,65535,65535]
+; X86-SSE-NEXT:    movaps %xmm1, %xmm0
+; X86-SSE-NEXT:    andps %xmm2, %xmm0
+; X86-SSE-NEXT:    psraw $1, %xmm1
+; X86-SSE-NEXT:    andnps %xmm1, %xmm2
+; X86-SSE-NEXT:    orps %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = ashr <4 x i16> %a, <i16 0, i16 1, i16 2, i16 3>
   ret <4 x i16> %shift
 }
@@ -1922,16 +1922,16 @@ define <2 x i16> @constant_shift_v2i16(<2 x i16> %a) nounwind {
 ; AVX512BWVL-NEXT:    vpsravw {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: constant_shift_v2i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
-; X32-SSE-NEXT:    psraw $3, %xmm1
-; X32-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [65535,0,65535,65535,65535,65535,65535,65535]
-; X32-SSE-NEXT:    psraw $2, %xmm0
-; X32-SSE-NEXT:    pand %xmm2, %xmm0
-; X32-SSE-NEXT:    pandn %xmm1, %xmm2
-; X32-SSE-NEXT:    por %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: constant_shift_v2i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X86-SSE-NEXT:    psraw $3, %xmm1
+; X86-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [65535,0,65535,65535,65535,65535,65535,65535]
+; X86-SSE-NEXT:    psraw $2, %xmm0
+; X86-SSE-NEXT:    pand %xmm2, %xmm0
+; X86-SSE-NEXT:    pandn %xmm1, %xmm2
+; X86-SSE-NEXT:    por %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = ashr <2 x i16> %a, <i16 2, i16 3>
   ret <2 x i16> %shift
 }
@@ -2009,17 +2009,17 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: constant_shift_v8i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pxor %xmm1, %xmm1
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm2
-; X32-SSE-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
-; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-SSE-NEXT:    psraw $8, %xmm0
-; X32-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    psrlw $8, %xmm0
-; X32-SSE-NEXT:    packuswb %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: constant_shift_v8i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pxor %xmm1, %xmm1
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm2
+; X86-SSE-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT:    psraw $8, %xmm0
+; X86-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    psrlw $8, %xmm0
+; X86-SSE-NEXT:    packuswb %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = ashr <8 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
   ret <8 x i8> %shift
 }
@@ -2097,17 +2097,17 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: constant_shift_v4i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pxor %xmm1, %xmm1
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm2
-; X32-SSE-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
-; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-SSE-NEXT:    psraw $8, %xmm0
-; X32-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    psrlw $8, %xmm0
-; X32-SSE-NEXT:    packuswb %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: constant_shift_v4i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pxor %xmm1, %xmm1
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm2
+; X86-SSE-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT:    psraw $8, %xmm0
+; X86-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    psrlw $8, %xmm0
+; X86-SSE-NEXT:    packuswb %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = ashr <4 x i8> %a, <i8 0, i8 1, i8 2, i8 3>
   ret <4 x i8> %shift
 }
@@ -2185,17 +2185,17 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: constant_shift_v2i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pxor %xmm1, %xmm1
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm2
-; X32-SSE-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
-; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-SSE-NEXT:    psraw $8, %xmm0
-; X32-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    psrlw $8, %xmm0
-; X32-SSE-NEXT:    packuswb %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: constant_shift_v2i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pxor %xmm1, %xmm1
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm2
+; X86-SSE-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT:    psraw $8, %xmm0
+; X86-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    psrlw $8, %xmm0
+; X86-SSE-NEXT:    packuswb %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = ashr <2 x i8> %a, <i8 2, i8 3>
   ret <2 x i8> %shift
 }
@@ -2230,10 +2230,10 @@ define <2 x i32> @splatconstant_shift_v2i32(<2 x i32> %a) nounwind {
 ; AVX512VL-NEXT:    vpsrad $5, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatconstant_shift_v2i32:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psrad $5, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatconstant_shift_v2i32:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psrad $5, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = ashr <2 x i32> %a, <i32 5, i32 5>
   ret <2 x i32> %shift
 }
@@ -2264,10 +2264,10 @@ define <4 x i16> @splatconstant_shift_v4i16(<4 x i16> %a) nounwind {
 ; AVX512VL-NEXT:    vpsraw $3, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatconstant_shift_v4i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psraw $3, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatconstant_shift_v4i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psraw $3, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = ashr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
   ret <4 x i16> %shift
 }
@@ -2298,10 +2298,10 @@ define <2 x i16> @splatconstant_shift_v2i16(<2 x i16> %a) nounwind {
 ; AVX512VL-NEXT:    vpsraw $3, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatconstant_shift_v2i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psraw $3, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatconstant_shift_v2i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psraw $3, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = ashr <2 x i16> %a, <i16 3, i16 3>
   ret <2 x i16> %shift
 }
@@ -2347,14 +2347,14 @@ define <8 x i8> @splatconstant_shift_v8i8(<8 x i8> %a) nounwind {
 ; AVX512VL-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatconstant_shift_v8i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psrlw $3, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; X32-SSE-NEXT:    pxor %xmm1, %xmm0
-; X32-SSE-NEXT:    psubb %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatconstant_shift_v8i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psrlw $3, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; X86-SSE-NEXT:    pxor %xmm1, %xmm0
+; X86-SSE-NEXT:    psubb %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <8 x i8> %shift
 }
@@ -2400,14 +2400,14 @@ define <4 x i8> @splatconstant_shift_v4i8(<4 x i8> %a) nounwind {
 ; AVX512VL-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatconstant_shift_v4i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psrlw $3, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; X32-SSE-NEXT:    pxor %xmm1, %xmm0
-; X32-SSE-NEXT:    psubb %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatconstant_shift_v4i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psrlw $3, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; X86-SSE-NEXT:    pxor %xmm1, %xmm0
+; X86-SSE-NEXT:    psubb %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = ashr <4 x i8> %a, <i8 3, i8 3, i8 3, i8 3>
   ret <4 x i8> %shift
 }
@@ -2453,14 +2453,14 @@ define <2 x i8> @splatconstant_shift_v2i8(<2 x i8> %a) nounwind {
 ; AVX512VL-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatconstant_shift_v2i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psrlw $3, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
-; X32-SSE-NEXT:    pxor %xmm1, %xmm0
-; X32-SSE-NEXT:    psubb %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatconstant_shift_v2i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psrlw $3, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; X86-SSE-NEXT:    pxor %xmm1, %xmm0
+; X86-SSE-NEXT:    psubb %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = ashr <2 x i8> %a, <i8 3, i8 3>
   ret <2 x i8> %shift
 }

diff  --git a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
index 9669b4cded7f..61908e2241b6 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll
@@ -11,7 +11,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512VL,AVX512BWVL
 ;
 ; Just one 32-bit run to make sure we do reasonable things for i64 shifts.
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32-SSE
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE
 
 ;
 ; Variable Shifts
@@ -71,14 +71,14 @@ define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; AVX512VL-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: var_shift_v2i64:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm2
-; X32-SSE-NEXT:    psrlq %xmm1, %xmm2
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
-; X32-SSE-NEXT:    psrlq %xmm1, %xmm0
-; X32-SSE-NEXT:    movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: var_shift_v2i64:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm2
+; X86-SSE-NEXT:    psrlq %xmm1, %xmm2
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
+; X86-SSE-NEXT:    psrlq %xmm1, %xmm0
+; X86-SSE-NEXT:    movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
+; X86-SSE-NEXT:    retl
   %shift = lshr <2 x i64> %a, %b
   ret <2 x i64> %shift
 }
@@ -166,25 +166,25 @@ define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
 ; AVX512VL-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: var_shift_v4i32:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm3
-; X32-SSE-NEXT:    psrld %xmm2, %xmm3
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm4 = xmm1[0,1,1,1,4,5,6,7]
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm2
-; X32-SSE-NEXT:    psrld %xmm4, %xmm2
-; X32-SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm3 = xmm1[2,3,3,3,4,5,6,7]
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm4
-; X32-SSE-NEXT:    psrld %xmm3, %xmm4
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
-; X32-SSE-NEXT:    psrld %xmm1, %xmm0
-; X32-SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm4[1]
-; X32-SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,3],xmm0[0,3]
-; X32-SSE-NEXT:    movaps %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: var_shift_v4i32:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm3
+; X86-SSE-NEXT:    psrld %xmm2, %xmm3
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm4 = xmm1[0,1,1,1,4,5,6,7]
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm2
+; X86-SSE-NEXT:    psrld %xmm4, %xmm2
+; X86-SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm3 = xmm1[2,3,3,3,4,5,6,7]
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm4
+; X86-SSE-NEXT:    psrld %xmm3, %xmm4
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
+; X86-SSE-NEXT:    psrld %xmm1, %xmm0
+; X86-SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm4[1]
+; X86-SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,3],xmm0[0,3]
+; X86-SSE-NEXT:    movaps %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = lshr <4 x i32> %a, %b
   ret <4 x i32> %shift
 }
@@ -324,40 +324,40 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
 ; AVX512BWVL-NEXT:    vpsrlvw %xmm1, %xmm0, %xmm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: var_shift_v8i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psllw $12, %xmm1
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
-; X32-SSE-NEXT:    psraw $15, %xmm2
-; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
-; X32-SSE-NEXT:    pandn %xmm0, %xmm3
-; X32-SSE-NEXT:    psrlw $8, %xmm0
-; X32-SSE-NEXT:    pand %xmm2, %xmm0
-; X32-SSE-NEXT:    por %xmm3, %xmm0
-; X32-SSE-NEXT:    paddw %xmm1, %xmm1
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
-; X32-SSE-NEXT:    psraw $15, %xmm2
-; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
-; X32-SSE-NEXT:    pandn %xmm0, %xmm3
-; X32-SSE-NEXT:    psrlw $4, %xmm0
-; X32-SSE-NEXT:    pand %xmm2, %xmm0
-; X32-SSE-NEXT:    por %xmm3, %xmm0
-; X32-SSE-NEXT:    paddw %xmm1, %xmm1
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
-; X32-SSE-NEXT:    psraw $15, %xmm2
-; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
-; X32-SSE-NEXT:    pandn %xmm0, %xmm3
-; X32-SSE-NEXT:    psrlw $2, %xmm0
-; X32-SSE-NEXT:    pand %xmm2, %xmm0
-; X32-SSE-NEXT:    por %xmm3, %xmm0
-; X32-SSE-NEXT:    paddw %xmm1, %xmm1
-; X32-SSE-NEXT:    psraw $15, %xmm1
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
-; X32-SSE-NEXT:    pandn %xmm0, %xmm2
-; X32-SSE-NEXT:    psrlw $1, %xmm0
-; X32-SSE-NEXT:    pand %xmm1, %xmm0
-; X32-SSE-NEXT:    por %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: var_shift_v8i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psllw $12, %xmm1
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X86-SSE-NEXT:    psraw $15, %xmm2
+; X86-SSE-NEXT:    movdqa %xmm2, %xmm3
+; X86-SSE-NEXT:    pandn %xmm0, %xmm3
+; X86-SSE-NEXT:    psrlw $8, %xmm0
+; X86-SSE-NEXT:    pand %xmm2, %xmm0
+; X86-SSE-NEXT:    por %xmm3, %xmm0
+; X86-SSE-NEXT:    paddw %xmm1, %xmm1
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X86-SSE-NEXT:    psraw $15, %xmm2
+; X86-SSE-NEXT:    movdqa %xmm2, %xmm3
+; X86-SSE-NEXT:    pandn %xmm0, %xmm3
+; X86-SSE-NEXT:    psrlw $4, %xmm0
+; X86-SSE-NEXT:    pand %xmm2, %xmm0
+; X86-SSE-NEXT:    por %xmm3, %xmm0
+; X86-SSE-NEXT:    paddw %xmm1, %xmm1
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X86-SSE-NEXT:    psraw $15, %xmm2
+; X86-SSE-NEXT:    movdqa %xmm2, %xmm3
+; X86-SSE-NEXT:    pandn %xmm0, %xmm3
+; X86-SSE-NEXT:    psrlw $2, %xmm0
+; X86-SSE-NEXT:    pand %xmm2, %xmm0
+; X86-SSE-NEXT:    por %xmm3, %xmm0
+; X86-SSE-NEXT:    paddw %xmm1, %xmm1
+; X86-SSE-NEXT:    psraw $15, %xmm1
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X86-SSE-NEXT:    pandn %xmm0, %xmm2
+; X86-SSE-NEXT:    psrlw $1, %xmm0
+; X86-SSE-NEXT:    pand %xmm1, %xmm0
+; X86-SSE-NEXT:    por %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = lshr <8 x i16> %a, %b
   ret <8 x i16> %shift
 }
@@ -478,36 +478,36 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: var_shift_v16i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psllw $5, %xmm1
-; X32-SSE-NEXT:    pxor %xmm2, %xmm2
-; X32-SSE-NEXT:    pxor %xmm3, %xmm3
-; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
-; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
-; X32-SSE-NEXT:    pandn %xmm0, %xmm4
-; X32-SSE-NEXT:    psrlw $4, %xmm0
-; X32-SSE-NEXT:    pand %xmm3, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    por %xmm4, %xmm0
-; X32-SSE-NEXT:    paddb %xmm1, %xmm1
-; X32-SSE-NEXT:    pxor %xmm3, %xmm3
-; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
-; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
-; X32-SSE-NEXT:    pandn %xmm0, %xmm4
-; X32-SSE-NEXT:    psrlw $2, %xmm0
-; X32-SSE-NEXT:    pand %xmm3, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    por %xmm4, %xmm0
-; X32-SSE-NEXT:    paddb %xmm1, %xmm1
-; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm2
-; X32-SSE-NEXT:    movdqa %xmm2, %xmm1
-; X32-SSE-NEXT:    pandn %xmm0, %xmm1
-; X32-SSE-NEXT:    psrlw $1, %xmm0
-; X32-SSE-NEXT:    pand %xmm2, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    por %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: var_shift_v16i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psllw $5, %xmm1
+; X86-SSE-NEXT:    pxor %xmm2, %xmm2
+; X86-SSE-NEXT:    pxor %xmm3, %xmm3
+; X86-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
+; X86-SSE-NEXT:    movdqa %xmm3, %xmm4
+; X86-SSE-NEXT:    pandn %xmm0, %xmm4
+; X86-SSE-NEXT:    psrlw $4, %xmm0
+; X86-SSE-NEXT:    pand %xmm3, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    por %xmm4, %xmm0
+; X86-SSE-NEXT:    paddb %xmm1, %xmm1
+; X86-SSE-NEXT:    pxor %xmm3, %xmm3
+; X86-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
+; X86-SSE-NEXT:    movdqa %xmm3, %xmm4
+; X86-SSE-NEXT:    pandn %xmm0, %xmm4
+; X86-SSE-NEXT:    psrlw $2, %xmm0
+; X86-SSE-NEXT:    pand %xmm3, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    por %xmm4, %xmm0
+; X86-SSE-NEXT:    paddb %xmm1, %xmm1
+; X86-SSE-NEXT:    pcmpgtb %xmm1, %xmm2
+; X86-SSE-NEXT:    movdqa %xmm2, %xmm1
+; X86-SSE-NEXT:    pandn %xmm0, %xmm1
+; X86-SSE-NEXT:    psrlw $1, %xmm0
+; X86-SSE-NEXT:    pand %xmm2, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    por %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = lshr <16 x i8> %a, %b
   ret <16 x i8> %shift
 }
@@ -542,10 +542,10 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; AVX512VL-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatvar_shift_v2i64:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psrlq %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatvar_shift_v2i64:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psrlq %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
   %shift = lshr <2 x i64> %a, %splat
   ret <2 x i64> %shift
@@ -589,12 +589,12 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
 ; AVX512VL-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatvar_shift_v4i32:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    xorps %xmm2, %xmm2
-; X32-SSE-NEXT:    movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
-; X32-SSE-NEXT:    psrld %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatvar_shift_v4i32:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    xorps %xmm2, %xmm2
+; X86-SSE-NEXT:    movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
+; X86-SSE-NEXT:    psrld %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
   %shift = lshr <4 x i32> %a, %splat
   ret <4 x i32> %shift
@@ -638,12 +638,12 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
 ; AVX512VL-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatvar_shift_v8i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
-; X32-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X32-SSE-NEXT:    psrlw %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatvar_shift_v8i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
+; X86-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-SSE-NEXT:    psrlw %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
   %shift = lshr <8 x i16> %a, %splat
   ret <8 x i16> %shift
@@ -748,19 +748,19 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatvar_shift_v16i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
-; X32-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X32-SSE-NEXT:    psrlw %xmm1, %xmm0
-; X32-SSE-NEXT:    pcmpeqd %xmm2, %xmm2
-; X32-SSE-NEXT:    psrlw %xmm1, %xmm2
-; X32-SSE-NEXT:    psrlw $8, %xmm2
-; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7]
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; X32-SSE-NEXT:    pand %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatvar_shift_v16i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
+; X86-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-SSE-NEXT:    psrlw %xmm1, %xmm0
+; X86-SSE-NEXT:    pcmpeqd %xmm2, %xmm2
+; X86-SSE-NEXT:    psrlw %xmm1, %xmm2
+; X86-SSE-NEXT:    psrlw $8, %xmm2
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7]
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
+; X86-SSE-NEXT:    pand %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
   %shift = lshr <16 x i8> %a, %splat
   ret <16 x i8> %shift
@@ -819,13 +819,13 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) nounwind {
 ; AVX512VL-NEXT:    vpsrlvq {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: constant_shift_v2i64:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
-; X32-SSE-NEXT:    psrlq $1, %xmm1
-; X32-SSE-NEXT:    psrlq $7, %xmm0
-; X32-SSE-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: constant_shift_v2i64:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X86-SSE-NEXT:    psrlq $1, %xmm1
+; X86-SSE-NEXT:    psrlq $7, %xmm0
+; X86-SSE-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; X86-SSE-NEXT:    retl
   %shift = lshr <2 x i64> %a, <i64 1, i64 7>
   ret <2 x i64> %shift
 }
@@ -895,19 +895,19 @@ define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) nounwind {
 ; AVX512VL-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: constant_shift_v4i32:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
-; X32-SSE-NEXT:    psrld $7, %xmm1
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm2
-; X32-SSE-NEXT:    psrld $6, %xmm2
-; X32-SSE-NEXT:    punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
-; X32-SSE-NEXT:    psrld $5, %xmm1
-; X32-SSE-NEXT:    psrld $4, %xmm0
-; X32-SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; X32-SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,3],xmm2[0,3]
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: constant_shift_v4i32:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X86-SSE-NEXT:    psrld $7, %xmm1
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm2
+; X86-SSE-NEXT:    psrld $6, %xmm2
+; X86-SSE-NEXT:    punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X86-SSE-NEXT:    psrld $5, %xmm1
+; X86-SSE-NEXT:    psrld $4, %xmm0
+; X86-SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X86-SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,3],xmm2[0,3]
+; X86-SSE-NEXT:    retl
   %shift = lshr <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
   ret <4 x i32> %shift
 }
@@ -967,15 +967,15 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
 ; AVX512BWVL-NEXT:    vpsrlvw {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: constant_shift_v8i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,65535,65535,65535]
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
-; X32-SSE-NEXT:    pandn %xmm0, %xmm2
-; X32-SSE-NEXT:    pmulhuw {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    pand %xmm1, %xmm0
-; X32-SSE-NEXT:    por %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: constant_shift_v8i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,65535,65535,65535]
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X86-SSE-NEXT:    pandn %xmm0, %xmm2
+; X86-SSE-NEXT:    pmulhuw {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    pand %xmm1, %xmm0
+; X86-SSE-NEXT:    por %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
   ret <8 x i16> %shift
 }
@@ -1068,18 +1068,18 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: constant_shift_v16i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pxor %xmm1, %xmm1
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm2
-; X32-SSE-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
-; X32-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm2
-; X32-SSE-NEXT:    psrlw $8, %xmm2
-; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; X32-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    psrlw $8, %xmm0
-; X32-SSE-NEXT:    packuswb %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: constant_shift_v16i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pxor %xmm1, %xmm1
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm2
+; X86-SSE-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
+; X86-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm2
+; X86-SSE-NEXT:    psrlw $8, %xmm2
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; X86-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    psrlw $8, %xmm0
+; X86-SSE-NEXT:    packuswb %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
   ret <16 x i8> %shift
 }
@@ -1114,10 +1114,10 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) nounwind {
 ; AVX512VL-NEXT:    vpsrlq $7, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatconstant_shift_v2i64:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psrlq $7, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatconstant_shift_v2i64:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psrlq $7, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = lshr <2 x i64> %a, <i64 7, i64 7>
   ret <2 x i64> %shift
 }
@@ -1148,10 +1148,10 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) nounwind {
 ; AVX512VL-NEXT:    vpsrld $5, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatconstant_shift_v4i32:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psrld $5, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatconstant_shift_v4i32:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psrld $5, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
   ret <4 x i32> %shift
 }
@@ -1182,10 +1182,10 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) nounwind {
 ; AVX512VL-NEXT:    vpsrlw $3, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatconstant_shift_v8i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psrlw $3, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatconstant_shift_v8i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psrlw $3, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <8 x i16> %shift
 }
@@ -1220,11 +1220,11 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) nounwind {
 ; AVX512VL-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatconstant_shift_v16i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psrlw $3, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatconstant_shift_v16i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psrlw $3, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <16 x i8> %shift
 }
@@ -1337,28 +1337,28 @@ define <4 x i32> @vector_variable_shift_right(<4 x i1> %cond, <4 x i32> %x, <4 x
 ; AVX512BWVL-NEXT:    vpsrlvd %xmm0, %xmm3, %xmm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: vector_variable_shift_right:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pushl %ebp
-; X32-SSE-NEXT:    movl %esp, %ebp
-; X32-SSE-NEXT:    andl $-16, %esp
-; X32-SSE-NEXT:    subl $16, %esp
-; X32-SSE-NEXT:    xorps %xmm3, %xmm3
-; X32-SSE-NEXT:    xorps %xmm4, %xmm4
-; X32-SSE-NEXT:    movss {{.*#+}} xmm4 = xmm2[0],xmm4[1,2,3]
-; X32-SSE-NEXT:    movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
-; X32-SSE-NEXT:    pslld $31, %xmm0
-; X32-SSE-NEXT:    psrad $31, %xmm0
-; X32-SSE-NEXT:    movdqa 8(%ebp), %xmm1
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
-; X32-SSE-NEXT:    psrld %xmm3, %xmm2
-; X32-SSE-NEXT:    psrld %xmm4, %xmm1
-; X32-SSE-NEXT:    pand %xmm0, %xmm2
-; X32-SSE-NEXT:    pandn %xmm1, %xmm0
-; X32-SSE-NEXT:    por %xmm2, %xmm0
-; X32-SSE-NEXT:    movl %ebp, %esp
-; X32-SSE-NEXT:    popl %ebp
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: vector_variable_shift_right:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pushl %ebp
+; X86-SSE-NEXT:    movl %esp, %ebp
+; X86-SSE-NEXT:    andl $-16, %esp
+; X86-SSE-NEXT:    subl $16, %esp
+; X86-SSE-NEXT:    xorps %xmm3, %xmm3
+; X86-SSE-NEXT:    xorps %xmm4, %xmm4
+; X86-SSE-NEXT:    movss {{.*#+}} xmm4 = xmm2[0],xmm4[1,2,3]
+; X86-SSE-NEXT:    movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
+; X86-SSE-NEXT:    pslld $31, %xmm0
+; X86-SSE-NEXT:    psrad $31, %xmm0
+; X86-SSE-NEXT:    movdqa 8(%ebp), %xmm1
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X86-SSE-NEXT:    psrld %xmm3, %xmm2
+; X86-SSE-NEXT:    psrld %xmm4, %xmm1
+; X86-SSE-NEXT:    pand %xmm0, %xmm2
+; X86-SSE-NEXT:    pandn %xmm1, %xmm0
+; X86-SSE-NEXT:    por %xmm2, %xmm0
+; X86-SSE-NEXT:    movl %ebp, %esp
+; X86-SSE-NEXT:    popl %ebp
+; X86-SSE-NEXT:    retl
   %splat1 = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> zeroinitializer
   %splat2 = shufflevector <4 x i32> %y, <4 x i32> undef, <4 x i32> zeroinitializer
   %sel = select <4 x i1> %cond, <4 x i32> %splat1, <4 x i32> %splat2

diff  --git a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
index 81b7677bc96e..c995af7c430d 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll
@@ -9,8 +9,8 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512VL,AVX512BWVL
 ;
 ; 32-bit runs to make sure we do reasonable things for i64 shifts.
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx  | FileCheck %s --check-prefix=X32-AVX1
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32-AVX2
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx  | FileCheck %s --check-prefix=X86-AVX1
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X86-AVX2
 
 ;
 ; Variable Shifts
@@ -64,25 +64,25 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
 ; AVX512VL-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: var_shift_v4i64:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; X32-AVX1-NEXT:    vpsrlq %xmm2, %xmm3, %xmm4
-; X32-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
-; X32-AVX1-NEXT:    vpsrlq %xmm2, %xmm3, %xmm2
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7]
-; X32-AVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm3
-; X32-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
-; X32-AVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7]
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: var_shift_v4i64:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: var_shift_v4i64:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; X86-AVX1-NEXT:    vpsrlq %xmm2, %xmm3, %xmm4
+; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
+; X86-AVX1-NEXT:    vpsrlq %xmm2, %xmm3, %xmm2
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7]
+; X86-AVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm3
+; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
+; X86-AVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7]
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: var_shift_v4i64:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %shift = lshr <4 x i64> %a, %b
   ret <4 x i64> %shift
 }
@@ -150,40 +150,40 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
 ; AVX512VL-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: var_shift_v8i32:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
-; X32-AVX1-NEXT:    vpsrldq {{.*#+}} xmm4 = xmm3[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X32-AVX1-NEXT:    vpsrld %xmm4, %xmm2, %xmm4
-; X32-AVX1-NEXT:    vpsrlq $32, %xmm3, %xmm5
-; X32-AVX1-NEXT:    vpsrld %xmm5, %xmm2, %xmm5
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm4 = xmm5[0,1,2,3],xmm4[4,5,6,7]
-; X32-AVX1-NEXT:    vpxor %xmm5, %xmm5, %xmm5
-; X32-AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm6 = xmm3[2],xmm5[2],xmm3[3],xmm5[3]
-; X32-AVX1-NEXT:    vpsrld %xmm6, %xmm2, %xmm6
-; X32-AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero
-; X32-AVX1-NEXT:    vpsrld %xmm3, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7]
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7]
-; X32-AVX1-NEXT:    vpsrldq {{.*#+}} xmm3 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X32-AVX1-NEXT:    vpsrld %xmm3, %xmm0, %xmm3
-; X32-AVX1-NEXT:    vpsrlq $32, %xmm1, %xmm4
-; X32-AVX1-NEXT:    vpsrld %xmm4, %xmm0, %xmm4
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
-; X32-AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm4 = xmm1[2],xmm5[2],xmm1[3],xmm5[3]
-; X32-AVX1-NEXT:    vpsrld %xmm4, %xmm0, %xmm4
-; X32-AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; X32-AVX1-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm4[4,5,6,7]
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5],xmm3[6,7]
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: var_shift_v8i32:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: var_shift_v8i32:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; X86-AVX1-NEXT:    vpsrldq {{.*#+}} xmm4 = xmm3[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT:    vpsrld %xmm4, %xmm2, %xmm4
+; X86-AVX1-NEXT:    vpsrlq $32, %xmm3, %xmm5
+; X86-AVX1-NEXT:    vpsrld %xmm5, %xmm2, %xmm5
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm4 = xmm5[0,1,2,3],xmm4[4,5,6,7]
+; X86-AVX1-NEXT:    vpxor %xmm5, %xmm5, %xmm5
+; X86-AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm6 = xmm3[2],xmm5[2],xmm3[3],xmm5[3]
+; X86-AVX1-NEXT:    vpsrld %xmm6, %xmm2, %xmm6
+; X86-AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero
+; X86-AVX1-NEXT:    vpsrld %xmm3, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7]
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7]
+; X86-AVX1-NEXT:    vpsrldq {{.*#+}} xmm3 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT:    vpsrld %xmm3, %xmm0, %xmm3
+; X86-AVX1-NEXT:    vpsrlq $32, %xmm1, %xmm4
+; X86-AVX1-NEXT:    vpsrld %xmm4, %xmm0, %xmm4
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
+; X86-AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm4 = xmm1[2],xmm5[2],xmm1[3],xmm5[3]
+; X86-AVX1-NEXT:    vpsrld %xmm4, %xmm0, %xmm4
+; X86-AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; X86-AVX1-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm4[4,5,6,7]
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5],xmm3[6,7]
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: var_shift_v8i32:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %shift = lshr <8 x i32> %a, %b
   ret <8 x i32> %shift
 }
@@ -291,54 +291,54 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
 ; AVX512BWVL-NEXT:    vpsrlvw %ymm1, %ymm0, %ymm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: var_shift_v16i16:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; X32-AVX1-NEXT:    vpsllw $12, %xmm2, %xmm3
-; X32-AVX1-NEXT:    vpsllw $4, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpor %xmm3, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpaddw %xmm2, %xmm2, %xmm3
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
-; X32-AVX1-NEXT:    vpsrlw $8, %xmm4, %xmm5
-; X32-AVX1-NEXT:    vpblendvb %xmm2, %xmm5, %xmm4, %xmm2
-; X32-AVX1-NEXT:    vpsrlw $4, %xmm2, %xmm4
-; X32-AVX1-NEXT:    vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpsrlw $2, %xmm2, %xmm4
-; X32-AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpsrlw $1, %xmm2, %xmm4
-; X32-AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpsllw $12, %xmm1, %xmm3
-; X32-AVX1-NEXT:    vpsllw $4, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpor %xmm3, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpaddw %xmm1, %xmm1, %xmm3
-; X32-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm4
-; X32-AVX1-NEXT:    vpblendvb %xmm1, %xmm4, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm1
-; X32-AVX1-NEXT:    vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpsrlw $2, %xmm0, %xmm1
-; X32-AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpsrlw $1, %xmm0, %xmm1
-; X32-AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: var_shift_v16i16:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; X32-AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
-; X32-AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
-; X32-AVX2-NEXT:    vpsrlvd %ymm3, %ymm4, %ymm3
-; X32-AVX2-NEXT:    vpsrld $16, %ymm3, %ymm3
-; X32-AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
-; X32-AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
-; X32-AVX2-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpackusdw %ymm3, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: var_shift_v16i16:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; X86-AVX1-NEXT:    vpsllw $12, %xmm2, %xmm3
+; X86-AVX1-NEXT:    vpsllw $4, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpor %xmm3, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpaddw %xmm2, %xmm2, %xmm3
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; X86-AVX1-NEXT:    vpsrlw $8, %xmm4, %xmm5
+; X86-AVX1-NEXT:    vpblendvb %xmm2, %xmm5, %xmm4, %xmm2
+; X86-AVX1-NEXT:    vpsrlw $4, %xmm2, %xmm4
+; X86-AVX1-NEXT:    vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpsrlw $2, %xmm2, %xmm4
+; X86-AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpsrlw $1, %xmm2, %xmm4
+; X86-AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpblendvb %xmm3, %xmm4, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpsllw $12, %xmm1, %xmm3
+; X86-AVX1-NEXT:    vpsllw $4, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpor %xmm3, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpaddw %xmm1, %xmm1, %xmm3
+; X86-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm4
+; X86-AVX1-NEXT:    vpblendvb %xmm1, %xmm4, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm1
+; X86-AVX1-NEXT:    vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpsrlw $2, %xmm0, %xmm1
+; X86-AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpsrlw $1, %xmm0, %xmm1
+; X86-AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpblendvb %xmm3, %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: var_shift_v16i16:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; X86-AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
+; X86-AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
+; X86-AVX2-NEXT:    vpsrlvd %ymm3, %ymm4, %ymm3
+; X86-AVX2-NEXT:    vpsrld $16, %ymm3, %ymm3
+; X86-AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
+; X86-AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
+; X86-AVX2-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpackusdw %ymm3, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %shift = lshr <16 x i16> %a, %b
   ret <16 x i16> %shift
 }
@@ -466,55 +466,55 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
 ; AVX512BWVL-NEXT:    vpmovwb %zmm0, %ymm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: var_shift_v32i8:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; X32-AVX1-NEXT:    vpsrlw $4, %xmm2, %xmm3
-; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; X32-AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
-; X32-AVX1-NEXT:    vpsllw $5, %xmm5, %xmm5
-; X32-AVX1-NEXT:    vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpsrlw $2, %xmm2, %xmm3
-; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
-; X32-AVX1-NEXT:    vpand %xmm6, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpaddb %xmm5, %xmm5, %xmm5
-; X32-AVX1-NEXT:    vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpsrlw $1, %xmm2, %xmm3
-; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; X32-AVX1-NEXT:    vpand %xmm7, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpaddb %xmm5, %xmm5, %xmm5
-; X32-AVX1-NEXT:    vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm3
-; X32-AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpsllw $5, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpsrlw $2, %xmm0, %xmm3
-; X32-AVX1-NEXT:    vpand %xmm6, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpsrlw $1, %xmm0, %xmm3
-; X32-AVX1-NEXT:    vpand %xmm7, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: var_shift_v32i8:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpsllw $5, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vpsrlw $4, %ymm0, %ymm2
-; X32-AVX2-NEXT:    vpand {{\.LCPI.*}}, %ymm2, %ymm2
-; X32-AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpsrlw $2, %ymm0, %ymm2
-; X32-AVX2-NEXT:    vpand {{\.LCPI.*}}, %ymm2, %ymm2
-; X32-AVX2-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpsrlw $1, %ymm0, %ymm2
-; X32-AVX2-NEXT:    vpand {{\.LCPI.*}}, %ymm2, %ymm2
-; X32-AVX2-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: var_shift_v32i8:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT:    vpsrlw $4, %xmm2, %xmm3
+; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; X86-AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
+; X86-AVX1-NEXT:    vpsllw $5, %xmm5, %xmm5
+; X86-AVX1-NEXT:    vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpsrlw $2, %xmm2, %xmm3
+; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
+; X86-AVX1-NEXT:    vpand %xmm6, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpaddb %xmm5, %xmm5, %xmm5
+; X86-AVX1-NEXT:    vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpsrlw $1, %xmm2, %xmm3
+; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; X86-AVX1-NEXT:    vpand %xmm7, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpaddb %xmm5, %xmm5, %xmm5
+; X86-AVX1-NEXT:    vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm3
+; X86-AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpsllw $5, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpsrlw $2, %xmm0, %xmm3
+; X86-AVX1-NEXT:    vpand %xmm6, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpsrlw $1, %xmm0, %xmm3
+; X86-AVX1-NEXT:    vpand %xmm7, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: var_shift_v32i8:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpsllw $5, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vpsrlw $4, %ymm0, %ymm2
+; X86-AVX2-NEXT:    vpand {{\.LCPI.*}}, %ymm2, %ymm2
+; X86-AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpsrlw $2, %ymm0, %ymm2
+; X86-AVX2-NEXT:    vpand {{\.LCPI.*}}, %ymm2, %ymm2
+; X86-AVX2-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpsrlw $1, %ymm0, %ymm2
+; X86-AVX2-NEXT:    vpand {{\.LCPI.*}}, %ymm2, %ymm2
+; X86-AVX2-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %shift = lshr <32 x i8> %a, %b
   ret <32 x i8> %shift
 }
@@ -560,18 +560,18 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
 ; AVX512VL-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: splatvar_shift_v4i64:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; X32-AVX1-NEXT:    vpsrlq %xmm1, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: splatvar_shift_v4i64:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: splatvar_shift_v4i64:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT:    vpsrlq %xmm1, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: splatvar_shift_v4i64:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
   %shift = lshr <4 x i64> %a, %splat
   ret <4 x i64> %shift
@@ -620,20 +620,20 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
 ; AVX512VL-NEXT:    vpsrld %xmm1, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: splatvar_shift_v8i32:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; X32-AVX1-NEXT:    vpsrld %xmm1, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: splatvar_shift_v8i32:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; X32-AVX2-NEXT:    vpsrld %xmm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: splatvar_shift_v8i32:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT:    vpsrld %xmm1, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: splatvar_shift_v8i32:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; X86-AVX2-NEXT:    vpsrld %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
   %shift = lshr <8 x i32> %a, %splat
   ret <8 x i32> %shift
@@ -682,20 +682,20 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
 ; AVX512VL-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: splatvar_shift_v16i16:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; X32-AVX1-NEXT:    vpsrlw %xmm1, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: splatvar_shift_v16i16:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; X32-AVX2-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: splatvar_shift_v16i16:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT:    vpsrlw %xmm1, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: splatvar_shift_v16i16:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; X86-AVX2-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
   %shift = lshr <16 x i16> %a, %splat
   ret <16 x i16> %shift
@@ -787,30 +787,30 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
 ; AVX512BWVL-NEXT:    vpmovwb %zmm0, %ymm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: splatvar_shift_v32i8:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; X32-AVX1-NEXT:    vpsrlw %xmm1, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpsrlw %xmm1, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpshufb {{.*#+}} xmm3 = xmm3[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; X32-AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: splatvar_shift_v32i8:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; X32-AVX2-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
-; X32-AVX2-NEXT:    vpsrlw %xmm1, %xmm2, %xmm1
-; X32-AVX2-NEXT:    vpsrlw $8, %xmm1, %xmm1
-; X32-AVX2-NEXT:    vpbroadcastb %xmm1, %ymm1
-; X32-AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: splatvar_shift_v32i8:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT:    vpsrlw %xmm1, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpsrlw %xmm1, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpshufb {{.*#+}} xmm3 = xmm3[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; X86-AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: splatvar_shift_v32i8:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; X86-AVX2-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
+; X86-AVX2-NEXT:    vpsrlw %xmm1, %xmm2, %xmm1
+; X86-AVX2-NEXT:    vpsrlw $8, %xmm1, %xmm1
+; X86-AVX2-NEXT:    vpbroadcastb %xmm1, %ymm1
+; X86-AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
   %shift = lshr <32 x i8> %a, %splat
   ret <32 x i8> %shift
@@ -861,22 +861,22 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind {
 ; AVX512VL-NEXT:    vpsrlvq {{.*}}(%rip), %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: constant_shift_v4i64:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; X32-AVX1-NEXT:    vpsrlq $62, %xmm1, %xmm2
-; X32-AVX1-NEXT:    vpsrlq $31, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
-; X32-AVX1-NEXT:    vpsrlq $7, %xmm0, %xmm2
-; X32-AVX1-NEXT:    vpsrlq $1, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: constant_shift_v4i64:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpsrlvq {{\.LCPI.*}}, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: constant_shift_v4i64:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; X86-AVX1-NEXT:    vpsrlq $62, %xmm1, %xmm2
+; X86-AVX1-NEXT:    vpsrlq $31, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
+; X86-AVX1-NEXT:    vpsrlq $7, %xmm0, %xmm2
+; X86-AVX1-NEXT:    vpsrlq $1, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: constant_shift_v4i64:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpsrlvq {{\.LCPI.*}}, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %shift = lshr <4 x i64> %a, <i64 1, i64 7, i64 31, i64 62>
   ret <4 x i64> %shift
 }
@@ -928,28 +928,28 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind {
 ; AVX512VL-NEXT:    vpsrlvd {{.*}}(%rip), %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: constant_shift_v8i32:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vpsrld $7, %xmm0, %xmm1
-; X32-AVX1-NEXT:    vpsrld $5, %xmm0, %xmm2
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
-; X32-AVX1-NEXT:    vpsrld $6, %xmm0, %xmm2
-; X32-AVX1-NEXT:    vpsrld $4, %xmm0, %xmm3
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; X32-AVX1-NEXT:    vpsrld $7, %xmm0, %xmm2
-; X32-AVX1-NEXT:    vpsrld $9, %xmm0, %xmm3
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
-; X32-AVX1-NEXT:    vpsrld $8, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: constant_shift_v8i32:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpsrlvd {{\.LCPI.*}}, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: constant_shift_v8i32:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vpsrld $7, %xmm0, %xmm1
+; X86-AVX1-NEXT:    vpsrld $5, %xmm0, %xmm2
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
+; X86-AVX1-NEXT:    vpsrld $6, %xmm0, %xmm2
+; X86-AVX1-NEXT:    vpsrld $4, %xmm0, %xmm3
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; X86-AVX1-NEXT:    vpsrld $7, %xmm0, %xmm2
+; X86-AVX1-NEXT:    vpsrld $9, %xmm0, %xmm3
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
+; X86-AVX1-NEXT:    vpsrld $8, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: constant_shift_v8i32:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpsrlvd {{\.LCPI.*}}, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %shift = lshr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
   ret <8 x i32> %shift
 }
@@ -1013,21 +1013,21 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
 ; AVX512BWVL-NEXT:    vpsrlvw {{.*}}(%rip), %ymm0, %ymm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: constant_shift_v16i16:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vpmulhuw {{\.LCPI.*}}, %xmm0, %xmm1
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; X32-AVX1-NEXT:    vpmulhuw {{\.LCPI.*}}, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: constant_shift_v16i16:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpmulhuw {{\.LCPI.*}}, %ymm0, %ymm1
-; X32-AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
-; X32-AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: constant_shift_v16i16:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vpmulhuw {{\.LCPI.*}}, %xmm0, %xmm1
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; X86-AVX1-NEXT:    vpmulhuw {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: constant_shift_v16i16:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpmulhuw {{\.LCPI.*}}, %ymm0, %ymm1
+; X86-AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
+; X86-AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; X86-AVX2-NEXT:    retl
   %shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
   ret <16 x i16> %shift
 }
@@ -1124,40 +1124,40 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
 ; AVX512BWVL-NEXT:    vpmovwb %zmm0, %ymm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: constant_shift_v32i8:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; X32-AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
-; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [2,4,8,16,32,64,128,256]
-; X32-AVX1-NEXT:    vpmullw %xmm4, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [256,128,64,32,16,8,4,2]
-; X32-AVX1-NEXT:    vpmullw %xmm5, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpsrlw $8, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpackuswb %xmm3, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
-; X32-AVX1-NEXT:    vpmullw %xmm4, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; X32-AVX1-NEXT:    vpmullw %xmm5, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: constant_shift_v32i8:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; X32-AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
-; X32-AVX2-NEXT:    vpmullw {{\.LCPI.*}}, %ymm2, %ymm2
-; X32-AVX2-NEXT:    vpsrlw $8, %ymm2, %ymm2
-; X32-AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
-; X32-AVX2-NEXT:    vpmullw {{\.LCPI.*}}, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpsrlw $8, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: constant_shift_v32i8:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; X86-AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
+; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [2,4,8,16,32,64,128,256]
+; X86-AVX1-NEXT:    vpmullw %xmm4, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
+; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [256,128,64,32,16,8,4,2]
+; X86-AVX1-NEXT:    vpmullw %xmm5, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpsrlw $8, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpackuswb %xmm3, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
+; X86-AVX1-NEXT:    vpmullw %xmm4, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; X86-AVX1-NEXT:    vpmullw %xmm5, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: constant_shift_v32i8:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; X86-AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
+; X86-AVX2-NEXT:    vpmullw {{\.LCPI.*}}, %ymm2, %ymm2
+; X86-AVX2-NEXT:    vpsrlw $8, %ymm2, %ymm2
+; X86-AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
+; X86-AVX2-NEXT:    vpmullw {{\.LCPI.*}}, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpsrlw $8, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
   ret <32 x i8> %shift
 }
@@ -1203,18 +1203,18 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) nounwind {
 ; AVX512VL-NEXT:    vpsrlq $7, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: splatconstant_shift_v4i64:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vpsrlq $7, %xmm0, %xmm1
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; X32-AVX1-NEXT:    vpsrlq $7, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: splatconstant_shift_v4i64:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpsrlq $7, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: splatconstant_shift_v4i64:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vpsrlq $7, %xmm0, %xmm1
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; X86-AVX1-NEXT:    vpsrlq $7, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: splatconstant_shift_v4i64:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpsrlq $7, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
   ret <4 x i64> %shift
 }
@@ -1256,18 +1256,18 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) nounwind {
 ; AVX512VL-NEXT:    vpsrld $5, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: splatconstant_shift_v8i32:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vpsrld $5, %xmm0, %xmm1
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; X32-AVX1-NEXT:    vpsrld $5, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: splatconstant_shift_v8i32:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpsrld $5, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: splatconstant_shift_v8i32:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vpsrld $5, %xmm0, %xmm1
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; X86-AVX1-NEXT:    vpsrld $5, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: splatconstant_shift_v8i32:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpsrld $5, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   ret <8 x i32> %shift
 }
@@ -1309,18 +1309,18 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) nounwind {
 ; AVX512VL-NEXT:    vpsrlw $3, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: splatconstant_shift_v16i16:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vpsrlw $3, %xmm0, %xmm1
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; X32-AVX1-NEXT:    vpsrlw $3, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: splatconstant_shift_v16i16:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpsrlw $3, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: splatconstant_shift_v16i16:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vpsrlw $3, %xmm0, %xmm1
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; X86-AVX1-NEXT:    vpsrlw $3, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: splatconstant_shift_v16i16:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpsrlw $3, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <16 x i16> %shift
 }
@@ -1370,22 +1370,22 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
 ; AVX512VL-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: splatconstant_shift_v32i8:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; X32-AVX1-NEXT:    vpsrlw $3, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
-; X32-AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpsrlw $3, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: splatconstant_shift_v32i8:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpsrlw $3, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpand {{\.LCPI.*}}, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: splatconstant_shift_v32i8:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; X86-AVX1-NEXT:    vpsrlw $3, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
+; X86-AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpsrlw $3, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: splatconstant_shift_v32i8:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpsrlw $3, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpand {{\.LCPI.*}}, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <32 x i8> %shift
 }
@@ -1448,25 +1448,25 @@ define <4 x i32> @sh_trunc_sh_vec(<4 x i64> %x) {
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: sh_trunc_sh_vec:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; X32-AVX1-NEXT:    vpsrlq $36, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpsrlq $36, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
-; X32-AVX1-NEXT:    vandps {{\.LCPI.*}}, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vzeroupper
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: sh_trunc_sh_vec:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpsrlq $36, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; X32-AVX2-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
-; X32-AVX2-NEXT:    vbroadcastss {{.*#+}} xmm1 = [1048575,1048575,1048575,1048575]
-; X32-AVX2-NEXT:    vandps %xmm1, %xmm0, %xmm0
-; X32-AVX2-NEXT:    vzeroupper
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: sh_trunc_sh_vec:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; X86-AVX1-NEXT:    vpsrlq $36, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpsrlq $36, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; X86-AVX1-NEXT:    vandps {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vzeroupper
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: sh_trunc_sh_vec:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpsrlq $36, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; X86-AVX2-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} xmm1 = [1048575,1048575,1048575,1048575]
+; X86-AVX2-NEXT:    vandps %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT:    vzeroupper
+; X86-AVX2-NEXT:    retl
   %s = lshr <4 x i64> %x, <i64 24, i64 24, i64 24, i64 24>
   %t = trunc <4 x i64> %s to <4 x i32>
   %r = lshr <4 x i32> %t, <i32 12, i32 12, i32 12, i32 12>

diff  --git a/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
index 9bf668ef948d..49cf4c0793d1 100644
--- a/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll
@@ -11,7 +11,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512VL,AVX512BWVL
 ;
 ; Just one 32-bit run to make sure we do reasonable things for i64 shifts.
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32-SSE
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE
 
 ;
 ; Variable Shifts
@@ -100,25 +100,25 @@ define <2 x i32> @var_shift_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
 ; AVX512VL-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: var_shift_v2i32:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm3
-; X32-SSE-NEXT:    psrld %xmm2, %xmm3
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm4 = xmm1[0,1,1,1,4,5,6,7]
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm2
-; X32-SSE-NEXT:    psrld %xmm4, %xmm2
-; X32-SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm3 = xmm1[2,3,3,3,4,5,6,7]
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm4
-; X32-SSE-NEXT:    psrld %xmm3, %xmm4
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
-; X32-SSE-NEXT:    psrld %xmm1, %xmm0
-; X32-SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm4[1]
-; X32-SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,3],xmm0[0,3]
-; X32-SSE-NEXT:    movaps %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: var_shift_v2i32:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7]
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm3
+; X86-SSE-NEXT:    psrld %xmm2, %xmm3
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm4 = xmm1[0,1,1,1,4,5,6,7]
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm2
+; X86-SSE-NEXT:    psrld %xmm4, %xmm2
+; X86-SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm3 = xmm1[2,3,3,3,4,5,6,7]
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm4
+; X86-SSE-NEXT:    psrld %xmm3, %xmm4
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7]
+; X86-SSE-NEXT:    psrld %xmm1, %xmm0
+; X86-SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm4[1]
+; X86-SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,3],xmm0[0,3]
+; X86-SSE-NEXT:    movaps %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = lshr <2 x i32> %a, %b
   ret <2 x i32> %shift
 }
@@ -258,40 +258,40 @@ define <4 x i16> @var_shift_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind {
 ; AVX512BWVL-NEXT:    vpsrlvw %xmm1, %xmm0, %xmm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: var_shift_v4i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psllw $12, %xmm1
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
-; X32-SSE-NEXT:    psraw $15, %xmm2
-; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
-; X32-SSE-NEXT:    pandn %xmm0, %xmm3
-; X32-SSE-NEXT:    psrlw $8, %xmm0
-; X32-SSE-NEXT:    pand %xmm2, %xmm0
-; X32-SSE-NEXT:    por %xmm3, %xmm0
-; X32-SSE-NEXT:    paddw %xmm1, %xmm1
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
-; X32-SSE-NEXT:    psraw $15, %xmm2
-; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
-; X32-SSE-NEXT:    pandn %xmm0, %xmm3
-; X32-SSE-NEXT:    psrlw $4, %xmm0
-; X32-SSE-NEXT:    pand %xmm2, %xmm0
-; X32-SSE-NEXT:    por %xmm3, %xmm0
-; X32-SSE-NEXT:    paddw %xmm1, %xmm1
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
-; X32-SSE-NEXT:    psraw $15, %xmm2
-; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
-; X32-SSE-NEXT:    pandn %xmm0, %xmm3
-; X32-SSE-NEXT:    psrlw $2, %xmm0
-; X32-SSE-NEXT:    pand %xmm2, %xmm0
-; X32-SSE-NEXT:    por %xmm3, %xmm0
-; X32-SSE-NEXT:    paddw %xmm1, %xmm1
-; X32-SSE-NEXT:    psraw $15, %xmm1
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
-; X32-SSE-NEXT:    pandn %xmm0, %xmm2
-; X32-SSE-NEXT:    psrlw $1, %xmm0
-; X32-SSE-NEXT:    pand %xmm1, %xmm0
-; X32-SSE-NEXT:    por %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: var_shift_v4i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psllw $12, %xmm1
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X86-SSE-NEXT:    psraw $15, %xmm2
+; X86-SSE-NEXT:    movdqa %xmm2, %xmm3
+; X86-SSE-NEXT:    pandn %xmm0, %xmm3
+; X86-SSE-NEXT:    psrlw $8, %xmm0
+; X86-SSE-NEXT:    pand %xmm2, %xmm0
+; X86-SSE-NEXT:    por %xmm3, %xmm0
+; X86-SSE-NEXT:    paddw %xmm1, %xmm1
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X86-SSE-NEXT:    psraw $15, %xmm2
+; X86-SSE-NEXT:    movdqa %xmm2, %xmm3
+; X86-SSE-NEXT:    pandn %xmm0, %xmm3
+; X86-SSE-NEXT:    psrlw $4, %xmm0
+; X86-SSE-NEXT:    pand %xmm2, %xmm0
+; X86-SSE-NEXT:    por %xmm3, %xmm0
+; X86-SSE-NEXT:    paddw %xmm1, %xmm1
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X86-SSE-NEXT:    psraw $15, %xmm2
+; X86-SSE-NEXT:    movdqa %xmm2, %xmm3
+; X86-SSE-NEXT:    pandn %xmm0, %xmm3
+; X86-SSE-NEXT:    psrlw $2, %xmm0
+; X86-SSE-NEXT:    pand %xmm2, %xmm0
+; X86-SSE-NEXT:    por %xmm3, %xmm0
+; X86-SSE-NEXT:    paddw %xmm1, %xmm1
+; X86-SSE-NEXT:    psraw $15, %xmm1
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X86-SSE-NEXT:    pandn %xmm0, %xmm2
+; X86-SSE-NEXT:    psrlw $1, %xmm0
+; X86-SSE-NEXT:    pand %xmm1, %xmm0
+; X86-SSE-NEXT:    por %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = lshr <4 x i16> %a, %b
   ret <4 x i16> %shift
 }
@@ -431,40 +431,40 @@ define <2 x i16> @var_shift_v2i16(<2 x i16> %a, <2 x i16> %b) nounwind {
 ; AVX512BWVL-NEXT:    vpsrlvw %xmm1, %xmm0, %xmm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: var_shift_v2i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psllw $12, %xmm1
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
-; X32-SSE-NEXT:    psraw $15, %xmm2
-; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
-; X32-SSE-NEXT:    pandn %xmm0, %xmm3
-; X32-SSE-NEXT:    psrlw $8, %xmm0
-; X32-SSE-NEXT:    pand %xmm2, %xmm0
-; X32-SSE-NEXT:    por %xmm3, %xmm0
-; X32-SSE-NEXT:    paddw %xmm1, %xmm1
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
-; X32-SSE-NEXT:    psraw $15, %xmm2
-; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
-; X32-SSE-NEXT:    pandn %xmm0, %xmm3
-; X32-SSE-NEXT:    psrlw $4, %xmm0
-; X32-SSE-NEXT:    pand %xmm2, %xmm0
-; X32-SSE-NEXT:    por %xmm3, %xmm0
-; X32-SSE-NEXT:    paddw %xmm1, %xmm1
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
-; X32-SSE-NEXT:    psraw $15, %xmm2
-; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
-; X32-SSE-NEXT:    pandn %xmm0, %xmm3
-; X32-SSE-NEXT:    psrlw $2, %xmm0
-; X32-SSE-NEXT:    pand %xmm2, %xmm0
-; X32-SSE-NEXT:    por %xmm3, %xmm0
-; X32-SSE-NEXT:    paddw %xmm1, %xmm1
-; X32-SSE-NEXT:    psraw $15, %xmm1
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
-; X32-SSE-NEXT:    pandn %xmm0, %xmm2
-; X32-SSE-NEXT:    psrlw $1, %xmm0
-; X32-SSE-NEXT:    pand %xmm1, %xmm0
-; X32-SSE-NEXT:    por %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: var_shift_v2i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psllw $12, %xmm1
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X86-SSE-NEXT:    psraw $15, %xmm2
+; X86-SSE-NEXT:    movdqa %xmm2, %xmm3
+; X86-SSE-NEXT:    pandn %xmm0, %xmm3
+; X86-SSE-NEXT:    psrlw $8, %xmm0
+; X86-SSE-NEXT:    pand %xmm2, %xmm0
+; X86-SSE-NEXT:    por %xmm3, %xmm0
+; X86-SSE-NEXT:    paddw %xmm1, %xmm1
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X86-SSE-NEXT:    psraw $15, %xmm2
+; X86-SSE-NEXT:    movdqa %xmm2, %xmm3
+; X86-SSE-NEXT:    pandn %xmm0, %xmm3
+; X86-SSE-NEXT:    psrlw $4, %xmm0
+; X86-SSE-NEXT:    pand %xmm2, %xmm0
+; X86-SSE-NEXT:    por %xmm3, %xmm0
+; X86-SSE-NEXT:    paddw %xmm1, %xmm1
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X86-SSE-NEXT:    psraw $15, %xmm2
+; X86-SSE-NEXT:    movdqa %xmm2, %xmm3
+; X86-SSE-NEXT:    pandn %xmm0, %xmm3
+; X86-SSE-NEXT:    psrlw $2, %xmm0
+; X86-SSE-NEXT:    pand %xmm2, %xmm0
+; X86-SSE-NEXT:    por %xmm3, %xmm0
+; X86-SSE-NEXT:    paddw %xmm1, %xmm1
+; X86-SSE-NEXT:    psraw $15, %xmm1
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X86-SSE-NEXT:    pandn %xmm0, %xmm2
+; X86-SSE-NEXT:    psrlw $1, %xmm0
+; X86-SSE-NEXT:    pand %xmm1, %xmm0
+; X86-SSE-NEXT:    por %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = lshr <2 x i16> %a, %b
   ret <2 x i16> %shift
 }
@@ -585,36 +585,36 @@ define <8 x i8> @var_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: var_shift_v8i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psllw $5, %xmm1
-; X32-SSE-NEXT:    pxor %xmm2, %xmm2
-; X32-SSE-NEXT:    pxor %xmm3, %xmm3
-; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
-; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
-; X32-SSE-NEXT:    pandn %xmm0, %xmm4
-; X32-SSE-NEXT:    psrlw $4, %xmm0
-; X32-SSE-NEXT:    pand %xmm3, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    por %xmm4, %xmm0
-; X32-SSE-NEXT:    paddb %xmm1, %xmm1
-; X32-SSE-NEXT:    pxor %xmm3, %xmm3
-; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
-; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
-; X32-SSE-NEXT:    pandn %xmm0, %xmm4
-; X32-SSE-NEXT:    psrlw $2, %xmm0
-; X32-SSE-NEXT:    pand %xmm3, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    por %xmm4, %xmm0
-; X32-SSE-NEXT:    paddb %xmm1, %xmm1
-; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm2
-; X32-SSE-NEXT:    movdqa %xmm2, %xmm1
-; X32-SSE-NEXT:    pandn %xmm0, %xmm1
-; X32-SSE-NEXT:    psrlw $1, %xmm0
-; X32-SSE-NEXT:    pand %xmm2, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    por %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: var_shift_v8i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psllw $5, %xmm1
+; X86-SSE-NEXT:    pxor %xmm2, %xmm2
+; X86-SSE-NEXT:    pxor %xmm3, %xmm3
+; X86-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
+; X86-SSE-NEXT:    movdqa %xmm3, %xmm4
+; X86-SSE-NEXT:    pandn %xmm0, %xmm4
+; X86-SSE-NEXT:    psrlw $4, %xmm0
+; X86-SSE-NEXT:    pand %xmm3, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    por %xmm4, %xmm0
+; X86-SSE-NEXT:    paddb %xmm1, %xmm1
+; X86-SSE-NEXT:    pxor %xmm3, %xmm3
+; X86-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
+; X86-SSE-NEXT:    movdqa %xmm3, %xmm4
+; X86-SSE-NEXT:    pandn %xmm0, %xmm4
+; X86-SSE-NEXT:    psrlw $2, %xmm0
+; X86-SSE-NEXT:    pand %xmm3, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    por %xmm4, %xmm0
+; X86-SSE-NEXT:    paddb %xmm1, %xmm1
+; X86-SSE-NEXT:    pcmpgtb %xmm1, %xmm2
+; X86-SSE-NEXT:    movdqa %xmm2, %xmm1
+; X86-SSE-NEXT:    pandn %xmm0, %xmm1
+; X86-SSE-NEXT:    psrlw $1, %xmm0
+; X86-SSE-NEXT:    pand %xmm2, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    por %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = lshr <8 x i8> %a, %b
   ret <8 x i8> %shift
 }
@@ -735,36 +735,36 @@ define <4 x i8> @var_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: var_shift_v4i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psllw $5, %xmm1
-; X32-SSE-NEXT:    pxor %xmm2, %xmm2
-; X32-SSE-NEXT:    pxor %xmm3, %xmm3
-; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
-; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
-; X32-SSE-NEXT:    pandn %xmm0, %xmm4
-; X32-SSE-NEXT:    psrlw $4, %xmm0
-; X32-SSE-NEXT:    pand %xmm3, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    por %xmm4, %xmm0
-; X32-SSE-NEXT:    paddb %xmm1, %xmm1
-; X32-SSE-NEXT:    pxor %xmm3, %xmm3
-; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
-; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
-; X32-SSE-NEXT:    pandn %xmm0, %xmm4
-; X32-SSE-NEXT:    psrlw $2, %xmm0
-; X32-SSE-NEXT:    pand %xmm3, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    por %xmm4, %xmm0
-; X32-SSE-NEXT:    paddb %xmm1, %xmm1
-; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm2
-; X32-SSE-NEXT:    movdqa %xmm2, %xmm1
-; X32-SSE-NEXT:    pandn %xmm0, %xmm1
-; X32-SSE-NEXT:    psrlw $1, %xmm0
-; X32-SSE-NEXT:    pand %xmm2, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    por %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: var_shift_v4i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psllw $5, %xmm1
+; X86-SSE-NEXT:    pxor %xmm2, %xmm2
+; X86-SSE-NEXT:    pxor %xmm3, %xmm3
+; X86-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
+; X86-SSE-NEXT:    movdqa %xmm3, %xmm4
+; X86-SSE-NEXT:    pandn %xmm0, %xmm4
+; X86-SSE-NEXT:    psrlw $4, %xmm0
+; X86-SSE-NEXT:    pand %xmm3, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    por %xmm4, %xmm0
+; X86-SSE-NEXT:    paddb %xmm1, %xmm1
+; X86-SSE-NEXT:    pxor %xmm3, %xmm3
+; X86-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
+; X86-SSE-NEXT:    movdqa %xmm3, %xmm4
+; X86-SSE-NEXT:    pandn %xmm0, %xmm4
+; X86-SSE-NEXT:    psrlw $2, %xmm0
+; X86-SSE-NEXT:    pand %xmm3, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    por %xmm4, %xmm0
+; X86-SSE-NEXT:    paddb %xmm1, %xmm1
+; X86-SSE-NEXT:    pcmpgtb %xmm1, %xmm2
+; X86-SSE-NEXT:    movdqa %xmm2, %xmm1
+; X86-SSE-NEXT:    pandn %xmm0, %xmm1
+; X86-SSE-NEXT:    psrlw $1, %xmm0
+; X86-SSE-NEXT:    pand %xmm2, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    por %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = lshr <4 x i8> %a, %b
   ret <4 x i8> %shift
 }
@@ -885,36 +885,36 @@ define <2 x i8> @var_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: var_shift_v2i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psllw $5, %xmm1
-; X32-SSE-NEXT:    pxor %xmm2, %xmm2
-; X32-SSE-NEXT:    pxor %xmm3, %xmm3
-; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
-; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
-; X32-SSE-NEXT:    pandn %xmm0, %xmm4
-; X32-SSE-NEXT:    psrlw $4, %xmm0
-; X32-SSE-NEXT:    pand %xmm3, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    por %xmm4, %xmm0
-; X32-SSE-NEXT:    paddb %xmm1, %xmm1
-; X32-SSE-NEXT:    pxor %xmm3, %xmm3
-; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
-; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
-; X32-SSE-NEXT:    pandn %xmm0, %xmm4
-; X32-SSE-NEXT:    psrlw $2, %xmm0
-; X32-SSE-NEXT:    pand %xmm3, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    por %xmm4, %xmm0
-; X32-SSE-NEXT:    paddb %xmm1, %xmm1
-; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm2
-; X32-SSE-NEXT:    movdqa %xmm2, %xmm1
-; X32-SSE-NEXT:    pandn %xmm0, %xmm1
-; X32-SSE-NEXT:    psrlw $1, %xmm0
-; X32-SSE-NEXT:    pand %xmm2, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    por %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: var_shift_v2i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psllw $5, %xmm1
+; X86-SSE-NEXT:    pxor %xmm2, %xmm2
+; X86-SSE-NEXT:    pxor %xmm3, %xmm3
+; X86-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
+; X86-SSE-NEXT:    movdqa %xmm3, %xmm4
+; X86-SSE-NEXT:    pandn %xmm0, %xmm4
+; X86-SSE-NEXT:    psrlw $4, %xmm0
+; X86-SSE-NEXT:    pand %xmm3, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    por %xmm4, %xmm0
+; X86-SSE-NEXT:    paddb %xmm1, %xmm1
+; X86-SSE-NEXT:    pxor %xmm3, %xmm3
+; X86-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
+; X86-SSE-NEXT:    movdqa %xmm3, %xmm4
+; X86-SSE-NEXT:    pandn %xmm0, %xmm4
+; X86-SSE-NEXT:    psrlw $2, %xmm0
+; X86-SSE-NEXT:    pand %xmm3, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    por %xmm4, %xmm0
+; X86-SSE-NEXT:    paddb %xmm1, %xmm1
+; X86-SSE-NEXT:    pcmpgtb %xmm1, %xmm2
+; X86-SSE-NEXT:    movdqa %xmm2, %xmm1
+; X86-SSE-NEXT:    pandn %xmm0, %xmm1
+; X86-SSE-NEXT:    psrlw $1, %xmm0
+; X86-SSE-NEXT:    pand %xmm2, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    por %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = lshr <2 x i8> %a, %b
   ret <2 x i8> %shift
 }
@@ -961,12 +961,12 @@ define <2 x i32> @splatvar_shift_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
 ; AVX512VL-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatvar_shift_v2i32:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    xorps %xmm2, %xmm2
-; X32-SSE-NEXT:    movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
-; X32-SSE-NEXT:    psrld %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatvar_shift_v2i32:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    xorps %xmm2, %xmm2
+; X86-SSE-NEXT:    movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
+; X86-SSE-NEXT:    psrld %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %splat = shufflevector <2 x i32> %b, <2 x i32> undef, <2 x i32> zeroinitializer
   %shift = lshr <2 x i32> %a, %splat
   ret <2 x i32> %shift
@@ -1010,12 +1010,12 @@ define <4 x i16> @splatvar_shift_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind {
 ; AVX512VL-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatvar_shift_v4i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
-; X32-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X32-SSE-NEXT:    psrlw %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatvar_shift_v4i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
+; X86-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-SSE-NEXT:    psrlw %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %splat = shufflevector <4 x i16> %b, <4 x i16> undef, <4 x i32> zeroinitializer
   %shift = lshr <4 x i16> %a, %splat
   ret <4 x i16> %shift
@@ -1059,12 +1059,12 @@ define <2 x i16> @splatvar_shift_v2i16(<2 x i16> %a, <2 x i16> %b) nounwind {
 ; AVX512VL-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatvar_shift_v2i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
-; X32-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X32-SSE-NEXT:    psrlw %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatvar_shift_v2i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
+; X86-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-SSE-NEXT:    psrlw %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %splat = shufflevector <2 x i16> %b, <2 x i16> undef, <2 x i32> zeroinitializer
   %shift = lshr <2 x i16> %a, %splat
   ret <2 x i16> %shift
@@ -1170,19 +1170,19 @@ define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatvar_shift_v8i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
-; X32-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X32-SSE-NEXT:    psrlw %xmm1, %xmm0
-; X32-SSE-NEXT:    pcmpeqd %xmm2, %xmm2
-; X32-SSE-NEXT:    psrlw %xmm1, %xmm2
-; X32-SSE-NEXT:    psrlw $8, %xmm2
-; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7]
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; X32-SSE-NEXT:    pand %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatvar_shift_v8i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
+; X86-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-SSE-NEXT:    psrlw %xmm1, %xmm0
+; X86-SSE-NEXT:    pcmpeqd %xmm2, %xmm2
+; X86-SSE-NEXT:    psrlw %xmm1, %xmm2
+; X86-SSE-NEXT:    psrlw $8, %xmm2
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7]
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
+; X86-SSE-NEXT:    pand %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %splat = shufflevector <8 x i8> %b, <8 x i8> undef, <8 x i32> zeroinitializer
   %shift = lshr <8 x i8> %a, %splat
   ret <8 x i8> %shift
@@ -1288,19 +1288,19 @@ define <4 x i8> @splatvar_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatvar_shift_v4i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
-; X32-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X32-SSE-NEXT:    psrlw %xmm1, %xmm0
-; X32-SSE-NEXT:    pcmpeqd %xmm2, %xmm2
-; X32-SSE-NEXT:    psrlw %xmm1, %xmm2
-; X32-SSE-NEXT:    psrlw $8, %xmm2
-; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7]
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; X32-SSE-NEXT:    pand %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatvar_shift_v4i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
+; X86-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-SSE-NEXT:    psrlw %xmm1, %xmm0
+; X86-SSE-NEXT:    pcmpeqd %xmm2, %xmm2
+; X86-SSE-NEXT:    psrlw %xmm1, %xmm2
+; X86-SSE-NEXT:    psrlw $8, %xmm2
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7]
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
+; X86-SSE-NEXT:    pand %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %splat = shufflevector <4 x i8> %b, <4 x i8> undef, <4 x i32> zeroinitializer
   %shift = lshr <4 x i8> %a, %splat
   ret <4 x i8> %shift
@@ -1397,19 +1397,19 @@ define <2 x i8> @splatvar_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatvar_shift_v2i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
-; X32-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X32-SSE-NEXT:    psrlw %xmm1, %xmm0
-; X32-SSE-NEXT:    pcmpeqd %xmm2, %xmm2
-; X32-SSE-NEXT:    psrlw %xmm1, %xmm2
-; X32-SSE-NEXT:    psrlw $8, %xmm2
-; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7]
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; X32-SSE-NEXT:    pand %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatvar_shift_v2i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
+; X86-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-SSE-NEXT:    psrlw %xmm1, %xmm0
+; X86-SSE-NEXT:    pcmpeqd %xmm2, %xmm2
+; X86-SSE-NEXT:    psrlw %xmm1, %xmm2
+; X86-SSE-NEXT:    psrlw $8, %xmm2
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7]
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
+; X86-SSE-NEXT:    pand %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %splat = shufflevector <2 x i8> %b, <2 x i8> undef, <2 x i32> zeroinitializer
   %shift = lshr <2 x i8> %a, %splat
   ret <2 x i8> %shift
@@ -1470,15 +1470,15 @@ define <2 x i32> @constant_shift_v2i32(<2 x i32> %a) nounwind {
 ; AVX512VL-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: constant_shift_v2i32:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
-; X32-SSE-NEXT:    psrld $4, %xmm1
-; X32-SSE-NEXT:    psrld $5, %xmm0
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; X32-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: constant_shift_v2i32:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X86-SSE-NEXT:    psrld $4, %xmm1
+; X86-SSE-NEXT:    psrld $5, %xmm0
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = lshr <2 x i32> %a, <i32 4, i32 5>
   ret <2 x i32> %shift
 }
@@ -1538,15 +1538,15 @@ define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind {
 ; AVX512BWVL-NEXT:    vpsrlvw {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: constant_shift_v4i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,65535,65535,65535]
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
-; X32-SSE-NEXT:    pandn %xmm0, %xmm2
-; X32-SSE-NEXT:    pmulhuw {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    pand %xmm1, %xmm0
-; X32-SSE-NEXT:    por %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: constant_shift_v4i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,65535,65535,65535]
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X86-SSE-NEXT:    pandn %xmm0, %xmm2
+; X86-SSE-NEXT:    pmulhuw {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    pand %xmm1, %xmm0
+; X86-SSE-NEXT:    por %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = lshr <4 x i16> %a, <i16 0, i16 1, i16 2, i16 3>
   ret <4 x i16> %shift
 }
@@ -1611,16 +1611,16 @@ define <2 x i16> @constant_shift_v2i16(<2 x i16> %a) nounwind {
 ; AVX512BWVL-NEXT:    vpsrlvw {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: constant_shift_v2i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
-; X32-SSE-NEXT:    psrlw $3, %xmm1
-; X32-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [65535,0,65535,65535,65535,65535,65535,65535]
-; X32-SSE-NEXT:    psrlw $2, %xmm0
-; X32-SSE-NEXT:    pand %xmm2, %xmm0
-; X32-SSE-NEXT:    pandn %xmm1, %xmm2
-; X32-SSE-NEXT:    por %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: constant_shift_v2i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X86-SSE-NEXT:    psrlw $3, %xmm1
+; X86-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [65535,0,65535,65535,65535,65535,65535,65535]
+; X86-SSE-NEXT:    psrlw $2, %xmm0
+; X86-SSE-NEXT:    pand %xmm2, %xmm0
+; X86-SSE-NEXT:    pandn %xmm1, %xmm2
+; X86-SSE-NEXT:    por %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = lshr <2 x i16> %a, <i16 2, i16 3>
   ret <2 x i16> %shift
 }
@@ -1707,16 +1707,16 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: constant_shift_v8i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pxor %xmm1, %xmm1
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm2
-; X32-SSE-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
-; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; X32-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    psrlw $8, %xmm0
-; X32-SSE-NEXT:    packuswb %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: constant_shift_v8i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pxor %xmm1, %xmm1
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm2
+; X86-SSE-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; X86-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    psrlw $8, %xmm0
+; X86-SSE-NEXT:    packuswb %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = lshr <8 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
   ret <8 x i8> %shift
 }
@@ -1803,16 +1803,16 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: constant_shift_v4i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pxor %xmm1, %xmm1
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm2
-; X32-SSE-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
-; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; X32-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    psrlw $8, %xmm0
-; X32-SSE-NEXT:    packuswb %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: constant_shift_v4i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pxor %xmm1, %xmm1
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm2
+; X86-SSE-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; X86-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    psrlw $8, %xmm0
+; X86-SSE-NEXT:    packuswb %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = lshr <4 x i8> %a, <i8 0, i8 1, i8 2, i8 3>
   ret <4 x i8> %shift
 }
@@ -1899,16 +1899,16 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: constant_shift_v2i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pxor %xmm1, %xmm1
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm2
-; X32-SSE-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
-; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; X32-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    psrlw $8, %xmm0
-; X32-SSE-NEXT:    packuswb %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: constant_shift_v2i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pxor %xmm1, %xmm1
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm2
+; X86-SSE-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; X86-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    psrlw $8, %xmm0
+; X86-SSE-NEXT:    packuswb %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = lshr <2 x i8> %a, <i8 2, i8 3>
   ret <2 x i8> %shift
 }
@@ -1943,10 +1943,10 @@ define <2 x i32> @splatconstant_shift_v2i32(<2 x i32> %a) nounwind {
 ; AVX512VL-NEXT:    vpsrld $5, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatconstant_shift_v2i32:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psrld $5, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatconstant_shift_v2i32:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psrld $5, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = lshr <2 x i32> %a, <i32 5, i32 5>
   ret <2 x i32> %shift
 }
@@ -1977,10 +1977,10 @@ define <4 x i16> @splatconstant_shift_v4i16(<4 x i16> %a) nounwind {
 ; AVX512VL-NEXT:    vpsrlw $3, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatconstant_shift_v4i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psrlw $3, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatconstant_shift_v4i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psrlw $3, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = lshr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
   ret <4 x i16> %shift
 }
@@ -2011,10 +2011,10 @@ define <2 x i16> @splatconstant_shift_v2i16(<2 x i16> %a) nounwind {
 ; AVX512VL-NEXT:    vpsrlw $3, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatconstant_shift_v2i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psrlw $3, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatconstant_shift_v2i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psrlw $3, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = lshr <2 x i16> %a, <i16 3, i16 3>
   ret <2 x i16> %shift
 }
@@ -2049,11 +2049,11 @@ define <8 x i8> @splatconstant_shift_v8i8(<8 x i8> %a) nounwind {
 ; AVX512VL-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatconstant_shift_v8i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psrlw $3, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatconstant_shift_v8i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psrlw $3, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <8 x i8> %shift
 }
@@ -2088,11 +2088,11 @@ define <4 x i8> @splatconstant_shift_v4i8(<4 x i8> %a) nounwind {
 ; AVX512VL-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatconstant_shift_v4i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psrlw $3, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatconstant_shift_v4i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psrlw $3, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = lshr <4 x i8> %a, <i8 3, i8 3, i8 3, i8 3>
   ret <4 x i8> %shift
 }
@@ -2127,11 +2127,11 @@ define <2 x i8> @splatconstant_shift_v2i8(<2 x i8> %a) nounwind {
 ; AVX512VL-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatconstant_shift_v2i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psrlw $3, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatconstant_shift_v2i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psrlw $3, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = lshr <2 x i8> %a, <i8 3, i8 3>
   ret <2 x i8> %shift
 }

diff  --git a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
index f099b7ff3cb9..d2d958bf3286 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll
@@ -11,7 +11,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512VL,AVX512BWVL
 ;
 ; Just one 32-bit run to make sure we do reasonable things for i64 shifts.
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32-SSE
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE
 
 ;
 ; Variable Shifts
@@ -69,14 +69,14 @@ define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; AVX512VL-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: var_shift_v2i64:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm2
-; X32-SSE-NEXT:    psllq %xmm1, %xmm2
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
-; X32-SSE-NEXT:    psllq %xmm1, %xmm0
-; X32-SSE-NEXT:    movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: var_shift_v2i64:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm2
+; X86-SSE-NEXT:    psllq %xmm1, %xmm2
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
+; X86-SSE-NEXT:    psllq %xmm1, %xmm0
+; X86-SSE-NEXT:    movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
+; X86-SSE-NEXT:    retl
   %shift = shl <2 x i64> %a, %b
   ret <2 x i64> %shift
 }
@@ -137,19 +137,19 @@ define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
 ; AVX512VL-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: var_shift_v4i32:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pslld $23, %xmm1
-; X32-SSE-NEXT:    paddd {{\.LCPI.*}}, %xmm1
-; X32-SSE-NEXT:    cvttps2dq %xmm1, %xmm1
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; X32-SSE-NEXT:    pmuludq %xmm1, %xmm0
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; X32-SSE-NEXT:    pmuludq %xmm2, %xmm1
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; X32-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: var_shift_v4i32:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pslld $23, %xmm1
+; X86-SSE-NEXT:    paddd {{\.LCPI.*}}, %xmm1
+; X86-SSE-NEXT:    cvttps2dq %xmm1, %xmm1
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; X86-SSE-NEXT:    pmuludq %xmm1, %xmm0
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X86-SSE-NEXT:    pmuludq %xmm2, %xmm1
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; X86-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X86-SSE-NEXT:    retl
   %shift = shl <4 x i32> %a, %b
   ret <4 x i32> %shift
 }
@@ -256,27 +256,27 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
 ; AVX512BWVL-NEXT:    vpsllvw %xmm1, %xmm0, %xmm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: var_shift_v8i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
-; X32-SSE-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
-; X32-SSE-NEXT:    pslld $23, %xmm2
-; X32-SSE-NEXT:    movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
-; X32-SSE-NEXT:    paddd %xmm3, %xmm2
-; X32-SSE-NEXT:    cvttps2dq %xmm2, %xmm2
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
-; X32-SSE-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
-; X32-SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
-; X32-SSE-NEXT:    pslld $23, %xmm1
-; X32-SSE-NEXT:    paddd %xmm3, %xmm1
-; X32-SSE-NEXT:    cvttps2dq %xmm1, %xmm1
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
-; X32-SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; X32-SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; X32-SSE-NEXT:    pmullw %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: var_shift_v8i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X86-SSE-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT:    pslld $23, %xmm2
+; X86-SSE-NEXT:    movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
+; X86-SSE-NEXT:    paddd %xmm3, %xmm2
+; X86-SSE-NEXT:    cvttps2dq %xmm2, %xmm2
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
+; X86-SSE-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; X86-SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; X86-SSE-NEXT:    pslld $23, %xmm1
+; X86-SSE-NEXT:    paddd %xmm3, %xmm1
+; X86-SSE-NEXT:    cvttps2dq %xmm1, %xmm1
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
+; X86-SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; X86-SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; X86-SSE-NEXT:    pmullw %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = shl <8 x i16> %a, %b
   ret <8 x i16> %shift
 }
@@ -392,35 +392,35 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: var_shift_v16i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psllw $5, %xmm1
-; X32-SSE-NEXT:    pxor %xmm2, %xmm2
-; X32-SSE-NEXT:    pxor %xmm3, %xmm3
-; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
-; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
-; X32-SSE-NEXT:    pandn %xmm0, %xmm4
-; X32-SSE-NEXT:    psllw $4, %xmm0
-; X32-SSE-NEXT:    pand %xmm3, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    por %xmm4, %xmm0
-; X32-SSE-NEXT:    paddb %xmm1, %xmm1
-; X32-SSE-NEXT:    pxor %xmm3, %xmm3
-; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
-; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
-; X32-SSE-NEXT:    pandn %xmm0, %xmm4
-; X32-SSE-NEXT:    psllw $2, %xmm0
-; X32-SSE-NEXT:    pand %xmm3, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    por %xmm4, %xmm0
-; X32-SSE-NEXT:    paddb %xmm1, %xmm1
-; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm2
-; X32-SSE-NEXT:    movdqa %xmm2, %xmm1
-; X32-SSE-NEXT:    pandn %xmm0, %xmm1
-; X32-SSE-NEXT:    paddb %xmm0, %xmm0
-; X32-SSE-NEXT:    pand %xmm2, %xmm0
-; X32-SSE-NEXT:    por %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: var_shift_v16i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psllw $5, %xmm1
+; X86-SSE-NEXT:    pxor %xmm2, %xmm2
+; X86-SSE-NEXT:    pxor %xmm3, %xmm3
+; X86-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
+; X86-SSE-NEXT:    movdqa %xmm3, %xmm4
+; X86-SSE-NEXT:    pandn %xmm0, %xmm4
+; X86-SSE-NEXT:    psllw $4, %xmm0
+; X86-SSE-NEXT:    pand %xmm3, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    por %xmm4, %xmm0
+; X86-SSE-NEXT:    paddb %xmm1, %xmm1
+; X86-SSE-NEXT:    pxor %xmm3, %xmm3
+; X86-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
+; X86-SSE-NEXT:    movdqa %xmm3, %xmm4
+; X86-SSE-NEXT:    pandn %xmm0, %xmm4
+; X86-SSE-NEXT:    psllw $2, %xmm0
+; X86-SSE-NEXT:    pand %xmm3, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    por %xmm4, %xmm0
+; X86-SSE-NEXT:    paddb %xmm1, %xmm1
+; X86-SSE-NEXT:    pcmpgtb %xmm1, %xmm2
+; X86-SSE-NEXT:    movdqa %xmm2, %xmm1
+; X86-SSE-NEXT:    pandn %xmm0, %xmm1
+; X86-SSE-NEXT:    paddb %xmm0, %xmm0
+; X86-SSE-NEXT:    pand %xmm2, %xmm0
+; X86-SSE-NEXT:    por %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = shl <16 x i8> %a, %b
   ret <16 x i8> %shift
 }
@@ -455,10 +455,10 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; AVX512VL-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatvar_shift_v2i64:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psllq %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatvar_shift_v2i64:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psllq %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
   %shift = shl <2 x i64> %a, %splat
   ret <2 x i64> %shift
@@ -502,12 +502,12 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
 ; AVX512VL-NEXT:    vpslld %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatvar_shift_v4i32:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    xorps %xmm2, %xmm2
-; X32-SSE-NEXT:    movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
-; X32-SSE-NEXT:    pslld %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatvar_shift_v4i32:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    xorps %xmm2, %xmm2
+; X86-SSE-NEXT:    movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
+; X86-SSE-NEXT:    pslld %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
   %shift = shl <4 x i32> %a, %splat
   ret <4 x i32> %shift
@@ -551,12 +551,12 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
 ; AVX512VL-NEXT:    vpsllw %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatvar_shift_v8i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
-; X32-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X32-SSE-NEXT:    psllw %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatvar_shift_v8i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
+; X86-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-SSE-NEXT:    psllw %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
   %shift = shl <8 x i16> %a, %splat
   ret <8 x i16> %shift
@@ -658,18 +658,18 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatvar_shift_v16i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
-; X32-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X32-SSE-NEXT:    psllw %xmm1, %xmm0
-; X32-SSE-NEXT:    pcmpeqd %xmm2, %xmm2
-; X32-SSE-NEXT:    psllw %xmm1, %xmm2
-; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7]
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; X32-SSE-NEXT:    pand %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatvar_shift_v16i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
+; X86-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-SSE-NEXT:    psllw %xmm1, %xmm0
+; X86-SSE-NEXT:    pcmpeqd %xmm2, %xmm2
+; X86-SSE-NEXT:    psllw %xmm1, %xmm2
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7]
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
+; X86-SSE-NEXT:    pand %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
   %shift = shl <16 x i8> %a, %splat
   ret <16 x i8> %shift
@@ -728,13 +728,13 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) nounwind {
 ; AVX512VL-NEXT:    vpsllvq {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: constant_shift_v2i64:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
-; X32-SSE-NEXT:    psllq $1, %xmm1
-; X32-SSE-NEXT:    psllq $7, %xmm0
-; X32-SSE-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: constant_shift_v2i64:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X86-SSE-NEXT:    psllq $1, %xmm1
+; X86-SSE-NEXT:    psllq $7, %xmm0
+; X86-SSE-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; X86-SSE-NEXT:    retl
   %shift = shl <2 x i64> %a, <i64 1, i64 7>
   ret <2 x i64> %shift
 }
@@ -787,17 +787,17 @@ define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) nounwind {
 ; AVX512VL-NEXT:    vpsllvd {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: constant_shift_v4i32:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    movdqa {{.*#+}} xmm1 = [16,32,64,128]
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; X32-SSE-NEXT:    pmuludq %xmm1, %xmm0
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; X32-SSE-NEXT:    pmuludq %xmm2, %xmm1
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; X32-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: constant_shift_v4i32:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    movdqa {{.*#+}} xmm1 = [16,32,64,128]
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; X86-SSE-NEXT:    pmuludq %xmm1, %xmm0
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X86-SSE-NEXT:    pmuludq %xmm2, %xmm1
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; X86-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X86-SSE-NEXT:    retl
   %shift = shl <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
   ret <4 x i32> %shift
 }
@@ -842,10 +842,10 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
 ; AVX512BWVL-NEXT:    vpsllvw {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: constant_shift_v8i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: constant_shift_v8i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = shl <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
   ret <8 x i16> %shift
 }
@@ -938,18 +938,18 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: constant_shift_v16i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
-; X32-SSE-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; X32-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm1
-; X32-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
-; X32-SSE-NEXT:    pand %xmm2, %xmm1
-; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    pand %xmm2, %xmm0
-; X32-SSE-NEXT:    packuswb %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: constant_shift_v16i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X86-SSE-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; X86-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm1
+; X86-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; X86-SSE-NEXT:    pand %xmm2, %xmm1
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    pand %xmm2, %xmm0
+; X86-SSE-NEXT:    packuswb %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
   ret <16 x i8> %shift
 }
@@ -984,10 +984,10 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) nounwind {
 ; AVX512VL-NEXT:    vpsllq $7, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatconstant_shift_v2i64:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psllq $7, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatconstant_shift_v2i64:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psllq $7, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = shl <2 x i64> %a, <i64 7, i64 7>
   ret <2 x i64> %shift
 }
@@ -1018,10 +1018,10 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) nounwind {
 ; AVX512VL-NEXT:    vpslld $5, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatconstant_shift_v4i32:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pslld $5, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatconstant_shift_v4i32:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pslld $5, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
   ret <4 x i32> %shift
 }
@@ -1052,10 +1052,10 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) nounwind {
 ; AVX512VL-NEXT:    vpsllw $3, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatconstant_shift_v8i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psllw $3, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatconstant_shift_v8i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psllw $3, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <8 x i16> %shift
 }
@@ -1090,11 +1090,11 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) nounwind {
 ; AVX512VL-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatconstant_shift_v16i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psllw $3, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatconstant_shift_v16i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psllw $3, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <16 x i8> %shift
 }

diff  --git a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
index 0ab4efc92e72..c033d0fd6181 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll
@@ -9,8 +9,8 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512VL,AVX512BWVL
 ;
 ; 32-bit runs to make sure we do reasonable things for i64 shifts.
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx  | FileCheck %s --check-prefix=X32-AVX1
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32-AVX2
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx  | FileCheck %s --check-prefix=X86-AVX1
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X86-AVX2
 
 ;
 ; Variable Shifts
@@ -61,25 +61,25 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
 ; AVX512VL-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: var_shift_v4i64:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; X32-AVX1-NEXT:    vpsllq %xmm2, %xmm3, %xmm4
-; X32-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
-; X32-AVX1-NEXT:    vpsllq %xmm2, %xmm3, %xmm2
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7]
-; X32-AVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm3
-; X32-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
-; X32-AVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7]
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: var_shift_v4i64:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: var_shift_v4i64:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; X86-AVX1-NEXT:    vpsllq %xmm2, %xmm3, %xmm4
+; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
+; X86-AVX1-NEXT:    vpsllq %xmm2, %xmm3, %xmm2
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7]
+; X86-AVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm3
+; X86-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
+; X86-AVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7]
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: var_shift_v4i64:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %shift = shl <4 x i64> %a, %b
   ret <4 x i64> %shift
 }
@@ -130,26 +130,26 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
 ; AVX512VL-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: var_shift_v8i32:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; X32-AVX1-NEXT:    vpslld $23, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
-; X32-AVX1-NEXT:    vpaddd %xmm3, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vcvttps2dq %xmm2, %xmm2
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
-; X32-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
-; X32-AVX1-NEXT:    vpslld $23, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpaddd %xmm3, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vcvttps2dq %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: var_shift_v8i32:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: var_shift_v8i32:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; X86-AVX1-NEXT:    vpslld $23, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
+; X86-AVX1-NEXT:    vpaddd %xmm3, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vcvttps2dq %xmm2, %xmm2
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; X86-AVX1-NEXT:    vpmulld %xmm2, %xmm4, %xmm2
+; X86-AVX1-NEXT:    vpslld $23, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpaddd %xmm3, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vcvttps2dq %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: var_shift_v8i32:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %shift = shl <8 x i32> %a, %b
   ret <8 x i32> %shift
 }
@@ -244,47 +244,47 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
 ; AVX512BWVL-NEXT:    vpsllvw %ymm1, %ymm0, %ymm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: var_shift_v16i16:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; X32-AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm3 = xmm2[4,4,5,5,6,6,7,7]
-; X32-AVX1-NEXT:    vpslld $23, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [1065353216,1065353216,1065353216,1065353216]
-; X32-AVX1-NEXT:    vpaddd %xmm4, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vcvttps2dq %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
-; X32-AVX1-NEXT:    vpslld $23, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpaddd %xmm4, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vcvttps2dq %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpackusdw %xmm3, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; X32-AVX1-NEXT:    vpmullw %xmm2, %xmm3, %xmm2
-; X32-AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm3 = xmm1[4,4,5,5,6,6,7,7]
-; X32-AVX1-NEXT:    vpslld $23, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpaddd %xmm4, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vcvttps2dq %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
-; X32-AVX1-NEXT:    vpslld $23, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpaddd %xmm4, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vcvttps2dq %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpackusdw %xmm3, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: var_shift_v16i16:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; X32-AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
-; X32-AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
-; X32-AVX2-NEXT:    vpsllvd %ymm3, %ymm4, %ymm3
-; X32-AVX2-NEXT:    vpsrld $16, %ymm3, %ymm3
-; X32-AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
-; X32-AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
-; X32-AVX2-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpackusdw %ymm3, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: var_shift_v16i16:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; X86-AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm3 = xmm2[4,4,5,5,6,6,7,7]
+; X86-AVX1-NEXT:    vpslld $23, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [1065353216,1065353216,1065353216,1065353216]
+; X86-AVX1-NEXT:    vpaddd %xmm4, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vcvttps2dq %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
+; X86-AVX1-NEXT:    vpslld $23, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpaddd %xmm4, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vcvttps2dq %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpackusdw %xmm3, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; X86-AVX1-NEXT:    vpmullw %xmm2, %xmm3, %xmm2
+; X86-AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm3 = xmm1[4,4,5,5,6,6,7,7]
+; X86-AVX1-NEXT:    vpslld $23, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpaddd %xmm4, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vcvttps2dq %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; X86-AVX1-NEXT:    vpslld $23, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpaddd %xmm4, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vcvttps2dq %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpackusdw %xmm3, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: var_shift_v16i16:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; X86-AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm3 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15]
+; X86-AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15]
+; X86-AVX2-NEXT:    vpsllvd %ymm3, %ymm4, %ymm3
+; X86-AVX2-NEXT:    vpsrld $16, %ymm3, %ymm3
+; X86-AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11]
+; X86-AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11]
+; X86-AVX2-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpackusdw %ymm3, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %shift = shl <16 x i16> %a, %b
   ret <16 x i16> %shift
 }
@@ -400,51 +400,51 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
 ; AVX512BWVL-NEXT:    vpmovwb %zmm0, %ymm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: var_shift_v32i8:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; X32-AVX1-NEXT:    vpsllw $4, %xmm2, %xmm3
-; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; X32-AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
-; X32-AVX1-NEXT:    vpsllw $5, %xmm5, %xmm5
-; X32-AVX1-NEXT:    vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpsllw $2, %xmm2, %xmm3
-; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
-; X32-AVX1-NEXT:    vpand %xmm6, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpaddb %xmm5, %xmm5, %xmm5
-; X32-AVX1-NEXT:    vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpaddb %xmm2, %xmm2, %xmm3
-; X32-AVX1-NEXT:    vpaddb %xmm5, %xmm5, %xmm5
-; X32-AVX1-NEXT:    vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpsllw $4, %xmm0, %xmm3
-; X32-AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpsllw $5, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpsllw $2, %xmm0, %xmm3
-; X32-AVX1-NEXT:    vpand %xmm6, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpaddb %xmm0, %xmm0, %xmm3
-; X32-AVX1-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: var_shift_v32i8:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpsllw $5, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vpsllw $4, %ymm0, %ymm2
-; X32-AVX2-NEXT:    vpand {{\.LCPI.*}}, %ymm2, %ymm2
-; X32-AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpsllw $2, %ymm0, %ymm2
-; X32-AVX2-NEXT:    vpand {{\.LCPI.*}}, %ymm2, %ymm2
-; X32-AVX2-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpaddb %ymm0, %ymm0, %ymm2
-; X32-AVX2-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: var_shift_v32i8:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT:    vpsllw $4, %xmm2, %xmm3
+; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; X86-AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
+; X86-AVX1-NEXT:    vpsllw $5, %xmm5, %xmm5
+; X86-AVX1-NEXT:    vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpsllw $2, %xmm2, %xmm3
+; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
+; X86-AVX1-NEXT:    vpand %xmm6, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpaddb %xmm5, %xmm5, %xmm5
+; X86-AVX1-NEXT:    vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpaddb %xmm2, %xmm2, %xmm3
+; X86-AVX1-NEXT:    vpaddb %xmm5, %xmm5, %xmm5
+; X86-AVX1-NEXT:    vpblendvb %xmm5, %xmm3, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpsllw $4, %xmm0, %xmm3
+; X86-AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpsllw $5, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpsllw $2, %xmm0, %xmm3
+; X86-AVX1-NEXT:    vpand %xmm6, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpaddb %xmm0, %xmm0, %xmm3
+; X86-AVX1-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: var_shift_v32i8:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpsllw $5, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vpsllw $4, %ymm0, %ymm2
+; X86-AVX2-NEXT:    vpand {{\.LCPI.*}}, %ymm2, %ymm2
+; X86-AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpsllw $2, %ymm0, %ymm2
+; X86-AVX2-NEXT:    vpand {{\.LCPI.*}}, %ymm2, %ymm2
+; X86-AVX2-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpaddb %ymm0, %ymm0, %ymm2
+; X86-AVX2-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %shift = shl <32 x i8> %a, %b
   ret <32 x i8> %shift
 }
@@ -490,18 +490,18 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
 ; AVX512VL-NEXT:    vpsllq %xmm1, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: splatvar_shift_v4i64:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; X32-AVX1-NEXT:    vpsllq %xmm1, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: splatvar_shift_v4i64:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpsllq %xmm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: splatvar_shift_v4i64:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT:    vpsllq %xmm1, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: splatvar_shift_v4i64:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpsllq %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
   %shift = shl <4 x i64> %a, %splat
   ret <4 x i64> %shift
@@ -550,20 +550,20 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
 ; AVX512VL-NEXT:    vpslld %xmm1, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: splatvar_shift_v8i32:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; X32-AVX1-NEXT:    vpslld %xmm1, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpslld %xmm1, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: splatvar_shift_v8i32:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
-; X32-AVX2-NEXT:    vpslld %xmm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: splatvar_shift_v8i32:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT:    vpslld %xmm1, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpslld %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: splatvar_shift_v8i32:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; X86-AVX2-NEXT:    vpslld %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
   %shift = shl <8 x i32> %a, %splat
   ret <8 x i32> %shift
@@ -612,20 +612,20 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind
 ; AVX512VL-NEXT:    vpsllw %xmm1, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: splatvar_shift_v16i16:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; X32-AVX1-NEXT:    vpsllw %xmm1, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpsllw %xmm1, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: splatvar_shift_v16i16:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
-; X32-AVX2-NEXT:    vpsllw %xmm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: splatvar_shift_v16i16:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT:    vpsllw %xmm1, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpsllw %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: splatvar_shift_v16i16:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; X86-AVX2-NEXT:    vpsllw %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
   %shift = shl <16 x i16> %a, %splat
   ret <16 x i16> %shift
@@ -712,30 +712,30 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
 ; AVX512BWVL-NEXT:    vpmovwb %zmm0, %ymm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: splatvar_shift_v32i8:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; X32-AVX1-NEXT:    vpsllw %xmm1, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpsllw %xmm1, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
-; X32-AVX1-NEXT:    vpshufb %xmm4, %xmm3, %xmm3
-; X32-AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpsllw %xmm1, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: splatvar_shift_v32i8:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
-; X32-AVX2-NEXT:    vpsllw %xmm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
-; X32-AVX2-NEXT:    vpsllw %xmm1, %xmm2, %xmm1
-; X32-AVX2-NEXT:    vpbroadcastb %xmm1, %ymm1
-; X32-AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: splatvar_shift_v32i8:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; X86-AVX1-NEXT:    vpsllw %xmm1, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpsllw %xmm1, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
+; X86-AVX1-NEXT:    vpshufb %xmm4, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpsllw %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: splatvar_shift_v32i8:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; X86-AVX2-NEXT:    vpsllw %xmm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
+; X86-AVX2-NEXT:    vpsllw %xmm1, %xmm2, %xmm1
+; X86-AVX2-NEXT:    vpbroadcastb %xmm1, %ymm1
+; X86-AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
   %shift = shl <32 x i8> %a, %splat
   ret <32 x i8> %shift
@@ -786,22 +786,22 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind {
 ; AVX512VL-NEXT:    vpsllvq {{.*}}(%rip), %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: constant_shift_v4i64:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; X32-AVX1-NEXT:    vpsllq $62, %xmm1, %xmm2
-; X32-AVX1-NEXT:    vpsllq $31, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
-; X32-AVX1-NEXT:    vpsllq $7, %xmm0, %xmm2
-; X32-AVX1-NEXT:    vpsllq $1, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: constant_shift_v4i64:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpsllvq {{\.LCPI.*}}, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: constant_shift_v4i64:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; X86-AVX1-NEXT:    vpsllq $62, %xmm1, %xmm2
+; X86-AVX1-NEXT:    vpsllq $31, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
+; X86-AVX1-NEXT:    vpsllq $7, %xmm0, %xmm2
+; X86-AVX1-NEXT:    vpsllq $1, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: constant_shift_v4i64:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpsllvq {{\.LCPI.*}}, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %shift = shl <4 x i64> %a, <i64 1, i64 7, i64 31, i64 62>
   ret <4 x i64> %shift
 }
@@ -843,18 +843,18 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind {
 ; AVX512VL-NEXT:    vpsllvd {{.*}}(%rip), %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: constant_shift_v8i32:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vpmulld {{\.LCPI.*}}, %xmm0, %xmm1
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; X32-AVX1-NEXT:    vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: constant_shift_v8i32:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpsllvd {{\.LCPI.*}}, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: constant_shift_v8i32:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vpmulld {{\.LCPI.*}}, %xmm0, %xmm1
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; X86-AVX1-NEXT:    vpmulld {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: constant_shift_v8i32:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpsllvd {{\.LCPI.*}}, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %shift = shl <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
   ret <8 x i32> %shift
 }
@@ -909,18 +909,18 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
 ; AVX512BWVL-NEXT:    vpsllvw {{.*}}(%rip), %ymm0, %ymm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: constant_shift_v16i16:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vpmullw {{\.LCPI.*}}, %xmm0, %xmm1
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; X32-AVX1-NEXT:    vpmullw {{\.LCPI.*}}, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: constant_shift_v16i16:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpmullw {{\.LCPI.*}}, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: constant_shift_v16i16:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vpmullw {{\.LCPI.*}}, %xmm0, %xmm1
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; X86-AVX1-NEXT:    vpmullw {{\.LCPI.*}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: constant_shift_v16i16:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpmullw {{\.LCPI.*}}, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %shift = shl <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
   ret <16 x i16> %shift
 }
@@ -1029,44 +1029,44 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
 ; AVX512BWVL-NEXT:    vpmovwb %zmm0, %ymm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: constant_shift_v32i8:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; X32-AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [128,64,32,16,8,4,2,1]
-; X32-AVX1-NEXT:    vpmullw %xmm3, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
-; X32-AVX1-NEXT:    vpand %xmm4, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [1,2,4,8,16,32,64,128]
-; X32-AVX1-NEXT:    vpmullw %xmm5, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpand %xmm4, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; X32-AVX1-NEXT:    vpmullw %xmm3, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpand %xmm4, %xmm2, %xmm2
-; X32-AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; X32-AVX1-NEXT:    vpmullw %xmm5, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: constant_shift_v32i8:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpsllw $4, %ymm0, %ymm1
-; X32-AVX2-NEXT:    vpand {{\.LCPI.*}}, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vbroadcasti128 {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
-; X32-AVX2-NEXT:    # ymm2 = mem[0,1,0,1]
-; X32-AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpsllw $2, %ymm0, %ymm1
-; X32-AVX2-NEXT:    vpand {{\.LCPI.*}}, %ymm1, %ymm1
-; X32-AVX2-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
-; X32-AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpaddb %ymm0, %ymm0, %ymm1
-; X32-AVX2-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
-; X32-AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: constant_shift_v32i8:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; X86-AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [128,64,32,16,8,4,2,1]
+; X86-AVX1-NEXT:    vpmullw %xmm3, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255]
+; X86-AVX1-NEXT:    vpand %xmm4, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
+; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [1,2,4,8,16,32,64,128]
+; X86-AVX1-NEXT:    vpmullw %xmm5, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpand %xmm4, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; X86-AVX1-NEXT:    vpmullw %xmm3, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpand %xmm4, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; X86-AVX1-NEXT:    vpmullw %xmm5, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: constant_shift_v32i8:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpsllw $4, %ymm0, %ymm1
+; X86-AVX2-NEXT:    vpand {{\.LCPI.*}}, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vbroadcasti128 {{.*#+}} ymm2 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
+; X86-AVX2-NEXT:    # ymm2 = mem[0,1,0,1]
+; X86-AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpsllw $2, %ymm0, %ymm1
+; X86-AVX2-NEXT:    vpand {{\.LCPI.*}}, %ymm1, %ymm1
+; X86-AVX2-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
+; X86-AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpaddb %ymm0, %ymm0, %ymm1
+; X86-AVX2-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
+; X86-AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
   ret <32 x i8> %shift
 }
@@ -1112,18 +1112,18 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) nounwind {
 ; AVX512VL-NEXT:    vpsllq $7, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: splatconstant_shift_v4i64:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vpsllq $7, %xmm0, %xmm1
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; X32-AVX1-NEXT:    vpsllq $7, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: splatconstant_shift_v4i64:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpsllq $7, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: splatconstant_shift_v4i64:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vpsllq $7, %xmm0, %xmm1
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; X86-AVX1-NEXT:    vpsllq $7, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: splatconstant_shift_v4i64:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpsllq $7, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
   ret <4 x i64> %shift
 }
@@ -1165,18 +1165,18 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) nounwind {
 ; AVX512VL-NEXT:    vpslld $5, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: splatconstant_shift_v8i32:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vpslld $5, %xmm0, %xmm1
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; X32-AVX1-NEXT:    vpslld $5, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: splatconstant_shift_v8i32:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpslld $5, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: splatconstant_shift_v8i32:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vpslld $5, %xmm0, %xmm1
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; X86-AVX1-NEXT:    vpslld $5, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: splatconstant_shift_v8i32:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpslld $5, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   ret <8 x i32> %shift
 }
@@ -1218,18 +1218,18 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) nounwind {
 ; AVX512VL-NEXT:    vpsllw $3, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: splatconstant_shift_v16i16:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vpsllw $3, %xmm0, %xmm1
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; X32-AVX1-NEXT:    vpsllw $3, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: splatconstant_shift_v16i16:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpsllw $3, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: splatconstant_shift_v16i16:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vpsllw $3, %xmm0, %xmm1
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; X86-AVX1-NEXT:    vpsllw $3, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: splatconstant_shift_v16i16:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpsllw $3, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <16 x i16> %shift
 }
@@ -1279,22 +1279,22 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
 ; AVX512VL-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-AVX1-LABEL: splatconstant_shift_v32i8:
-; X32-AVX1:       # %bb.0:
-; X32-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; X32-AVX1-NEXT:    vpsllw $3, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
-; X32-AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
-; X32-AVX1-NEXT:    vpsllw $3, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
-; X32-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; X32-AVX1-NEXT:    retl
-;
-; X32-AVX2-LABEL: splatconstant_shift_v32i8:
-; X32-AVX2:       # %bb.0:
-; X32-AVX2-NEXT:    vpsllw $3, %ymm0, %ymm0
-; X32-AVX2-NEXT:    vpand {{\.LCPI.*}}, %ymm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X86-AVX1-LABEL: splatconstant_shift_v32i8:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; X86-AVX1-NEXT:    vpsllw $3, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
+; X86-AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
+; X86-AVX1-NEXT:    vpsllw $3, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: splatconstant_shift_v32i8:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    vpsllw $3, %ymm0, %ymm0
+; X86-AVX2-NEXT:    vpand {{\.LCPI.*}}, %ymm0, %ymm0
+; X86-AVX2-NEXT:    retl
   %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <32 x i8> %shift
 }

diff  --git a/llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll
index 2b0f5a70cdbf..0861a5f4e402 100644
--- a/llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll
@@ -11,7 +11,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=AVX512VL,AVX512BWVL
 ;
 ; Just one 32-bit run to make sure we do reasonable things for i64 shifts.
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32-SSE
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE
 
 ;
 ; Variable Shifts
@@ -73,19 +73,19 @@ define <2 x i32> @var_shift_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
 ; AVX512VL-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: var_shift_v2i32:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pslld $23, %xmm1
-; X32-SSE-NEXT:    paddd {{\.LCPI.*}}, %xmm1
-; X32-SSE-NEXT:    cvttps2dq %xmm1, %xmm1
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
-; X32-SSE-NEXT:    pmuludq %xmm1, %xmm0
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; X32-SSE-NEXT:    pmuludq %xmm2, %xmm1
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; X32-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: var_shift_v2i32:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pslld $23, %xmm1
+; X86-SSE-NEXT:    paddd {{\.LCPI.*}}, %xmm1
+; X86-SSE-NEXT:    cvttps2dq %xmm1, %xmm1
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; X86-SSE-NEXT:    pmuludq %xmm1, %xmm0
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X86-SSE-NEXT:    pmuludq %xmm2, %xmm1
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; X86-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X86-SSE-NEXT:    retl
   %shift = shl <2 x i32> %a, %b
   ret <2 x i32> %shift
 }
@@ -192,27 +192,27 @@ define <4 x i16> @var_shift_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind {
 ; AVX512BWVL-NEXT:    vpsllvw %xmm1, %xmm0, %xmm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: var_shift_v4i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
-; X32-SSE-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
-; X32-SSE-NEXT:    pslld $23, %xmm2
-; X32-SSE-NEXT:    movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
-; X32-SSE-NEXT:    paddd %xmm3, %xmm2
-; X32-SSE-NEXT:    cvttps2dq %xmm2, %xmm2
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
-; X32-SSE-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
-; X32-SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
-; X32-SSE-NEXT:    pslld $23, %xmm1
-; X32-SSE-NEXT:    paddd %xmm3, %xmm1
-; X32-SSE-NEXT:    cvttps2dq %xmm1, %xmm1
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
-; X32-SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; X32-SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; X32-SSE-NEXT:    pmullw %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: var_shift_v4i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X86-SSE-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT:    pslld $23, %xmm2
+; X86-SSE-NEXT:    movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
+; X86-SSE-NEXT:    paddd %xmm3, %xmm2
+; X86-SSE-NEXT:    cvttps2dq %xmm2, %xmm2
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
+; X86-SSE-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; X86-SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; X86-SSE-NEXT:    pslld $23, %xmm1
+; X86-SSE-NEXT:    paddd %xmm3, %xmm1
+; X86-SSE-NEXT:    cvttps2dq %xmm1, %xmm1
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
+; X86-SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; X86-SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; X86-SSE-NEXT:    pmullw %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = shl <4 x i16> %a, %b
   ret <4 x i16> %shift
 }
@@ -319,27 +319,27 @@ define <2 x i16> @var_shift_v2i16(<2 x i16> %a, <2 x i16> %b) nounwind {
 ; AVX512BWVL-NEXT:    vpsllvw %xmm1, %xmm0, %xmm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: var_shift_v2i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
-; X32-SSE-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
-; X32-SSE-NEXT:    pslld $23, %xmm2
-; X32-SSE-NEXT:    movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
-; X32-SSE-NEXT:    paddd %xmm3, %xmm2
-; X32-SSE-NEXT:    cvttps2dq %xmm2, %xmm2
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
-; X32-SSE-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
-; X32-SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
-; X32-SSE-NEXT:    pslld $23, %xmm1
-; X32-SSE-NEXT:    paddd %xmm3, %xmm1
-; X32-SSE-NEXT:    cvttps2dq %xmm1, %xmm1
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
-; X32-SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
-; X32-SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; X32-SSE-NEXT:    pmullw %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: var_shift_v2i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X86-SSE-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT:    pslld $23, %xmm2
+; X86-SSE-NEXT:    movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
+; X86-SSE-NEXT:    paddd %xmm3, %xmm2
+; X86-SSE-NEXT:    cvttps2dq %xmm2, %xmm2
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
+; X86-SSE-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; X86-SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; X86-SSE-NEXT:    pslld $23, %xmm1
+; X86-SSE-NEXT:    paddd %xmm3, %xmm1
+; X86-SSE-NEXT:    cvttps2dq %xmm1, %xmm1
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
+; X86-SSE-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; X86-SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; X86-SSE-NEXT:    pmullw %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = shl <2 x i16> %a, %b
   ret <2 x i16> %shift
 }
@@ -455,35 +455,35 @@ define <8 x i8> @var_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: var_shift_v8i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psllw $5, %xmm1
-; X32-SSE-NEXT:    pxor %xmm2, %xmm2
-; X32-SSE-NEXT:    pxor %xmm3, %xmm3
-; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
-; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
-; X32-SSE-NEXT:    pandn %xmm0, %xmm4
-; X32-SSE-NEXT:    psllw $4, %xmm0
-; X32-SSE-NEXT:    pand %xmm3, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    por %xmm4, %xmm0
-; X32-SSE-NEXT:    paddb %xmm1, %xmm1
-; X32-SSE-NEXT:    pxor %xmm3, %xmm3
-; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
-; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
-; X32-SSE-NEXT:    pandn %xmm0, %xmm4
-; X32-SSE-NEXT:    psllw $2, %xmm0
-; X32-SSE-NEXT:    pand %xmm3, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    por %xmm4, %xmm0
-; X32-SSE-NEXT:    paddb %xmm1, %xmm1
-; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm2
-; X32-SSE-NEXT:    movdqa %xmm2, %xmm1
-; X32-SSE-NEXT:    pandn %xmm0, %xmm1
-; X32-SSE-NEXT:    paddb %xmm0, %xmm0
-; X32-SSE-NEXT:    pand %xmm2, %xmm0
-; X32-SSE-NEXT:    por %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: var_shift_v8i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psllw $5, %xmm1
+; X86-SSE-NEXT:    pxor %xmm2, %xmm2
+; X86-SSE-NEXT:    pxor %xmm3, %xmm3
+; X86-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
+; X86-SSE-NEXT:    movdqa %xmm3, %xmm4
+; X86-SSE-NEXT:    pandn %xmm0, %xmm4
+; X86-SSE-NEXT:    psllw $4, %xmm0
+; X86-SSE-NEXT:    pand %xmm3, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    por %xmm4, %xmm0
+; X86-SSE-NEXT:    paddb %xmm1, %xmm1
+; X86-SSE-NEXT:    pxor %xmm3, %xmm3
+; X86-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
+; X86-SSE-NEXT:    movdqa %xmm3, %xmm4
+; X86-SSE-NEXT:    pandn %xmm0, %xmm4
+; X86-SSE-NEXT:    psllw $2, %xmm0
+; X86-SSE-NEXT:    pand %xmm3, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    por %xmm4, %xmm0
+; X86-SSE-NEXT:    paddb %xmm1, %xmm1
+; X86-SSE-NEXT:    pcmpgtb %xmm1, %xmm2
+; X86-SSE-NEXT:    movdqa %xmm2, %xmm1
+; X86-SSE-NEXT:    pandn %xmm0, %xmm1
+; X86-SSE-NEXT:    paddb %xmm0, %xmm0
+; X86-SSE-NEXT:    pand %xmm2, %xmm0
+; X86-SSE-NEXT:    por %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = shl <8 x i8> %a, %b
   ret <8 x i8> %shift
 }
@@ -599,35 +599,35 @@ define <4 x i8> @var_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: var_shift_v4i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psllw $5, %xmm1
-; X32-SSE-NEXT:    pxor %xmm2, %xmm2
-; X32-SSE-NEXT:    pxor %xmm3, %xmm3
-; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
-; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
-; X32-SSE-NEXT:    pandn %xmm0, %xmm4
-; X32-SSE-NEXT:    psllw $4, %xmm0
-; X32-SSE-NEXT:    pand %xmm3, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    por %xmm4, %xmm0
-; X32-SSE-NEXT:    paddb %xmm1, %xmm1
-; X32-SSE-NEXT:    pxor %xmm3, %xmm3
-; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
-; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
-; X32-SSE-NEXT:    pandn %xmm0, %xmm4
-; X32-SSE-NEXT:    psllw $2, %xmm0
-; X32-SSE-NEXT:    pand %xmm3, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    por %xmm4, %xmm0
-; X32-SSE-NEXT:    paddb %xmm1, %xmm1
-; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm2
-; X32-SSE-NEXT:    movdqa %xmm2, %xmm1
-; X32-SSE-NEXT:    pandn %xmm0, %xmm1
-; X32-SSE-NEXT:    paddb %xmm0, %xmm0
-; X32-SSE-NEXT:    pand %xmm2, %xmm0
-; X32-SSE-NEXT:    por %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: var_shift_v4i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psllw $5, %xmm1
+; X86-SSE-NEXT:    pxor %xmm2, %xmm2
+; X86-SSE-NEXT:    pxor %xmm3, %xmm3
+; X86-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
+; X86-SSE-NEXT:    movdqa %xmm3, %xmm4
+; X86-SSE-NEXT:    pandn %xmm0, %xmm4
+; X86-SSE-NEXT:    psllw $4, %xmm0
+; X86-SSE-NEXT:    pand %xmm3, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    por %xmm4, %xmm0
+; X86-SSE-NEXT:    paddb %xmm1, %xmm1
+; X86-SSE-NEXT:    pxor %xmm3, %xmm3
+; X86-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
+; X86-SSE-NEXT:    movdqa %xmm3, %xmm4
+; X86-SSE-NEXT:    pandn %xmm0, %xmm4
+; X86-SSE-NEXT:    psllw $2, %xmm0
+; X86-SSE-NEXT:    pand %xmm3, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    por %xmm4, %xmm0
+; X86-SSE-NEXT:    paddb %xmm1, %xmm1
+; X86-SSE-NEXT:    pcmpgtb %xmm1, %xmm2
+; X86-SSE-NEXT:    movdqa %xmm2, %xmm1
+; X86-SSE-NEXT:    pandn %xmm0, %xmm1
+; X86-SSE-NEXT:    paddb %xmm0, %xmm0
+; X86-SSE-NEXT:    pand %xmm2, %xmm0
+; X86-SSE-NEXT:    por %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = shl <4 x i8> %a, %b
   ret <4 x i8> %shift
 }
@@ -743,35 +743,35 @@ define <2 x i8> @var_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: var_shift_v2i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psllw $5, %xmm1
-; X32-SSE-NEXT:    pxor %xmm2, %xmm2
-; X32-SSE-NEXT:    pxor %xmm3, %xmm3
-; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
-; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
-; X32-SSE-NEXT:    pandn %xmm0, %xmm4
-; X32-SSE-NEXT:    psllw $4, %xmm0
-; X32-SSE-NEXT:    pand %xmm3, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    por %xmm4, %xmm0
-; X32-SSE-NEXT:    paddb %xmm1, %xmm1
-; X32-SSE-NEXT:    pxor %xmm3, %xmm3
-; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
-; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
-; X32-SSE-NEXT:    pandn %xmm0, %xmm4
-; X32-SSE-NEXT:    psllw $2, %xmm0
-; X32-SSE-NEXT:    pand %xmm3, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    por %xmm4, %xmm0
-; X32-SSE-NEXT:    paddb %xmm1, %xmm1
-; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm2
-; X32-SSE-NEXT:    movdqa %xmm2, %xmm1
-; X32-SSE-NEXT:    pandn %xmm0, %xmm1
-; X32-SSE-NEXT:    paddb %xmm0, %xmm0
-; X32-SSE-NEXT:    pand %xmm2, %xmm0
-; X32-SSE-NEXT:    por %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: var_shift_v2i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psllw $5, %xmm1
+; X86-SSE-NEXT:    pxor %xmm2, %xmm2
+; X86-SSE-NEXT:    pxor %xmm3, %xmm3
+; X86-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
+; X86-SSE-NEXT:    movdqa %xmm3, %xmm4
+; X86-SSE-NEXT:    pandn %xmm0, %xmm4
+; X86-SSE-NEXT:    psllw $4, %xmm0
+; X86-SSE-NEXT:    pand %xmm3, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    por %xmm4, %xmm0
+; X86-SSE-NEXT:    paddb %xmm1, %xmm1
+; X86-SSE-NEXT:    pxor %xmm3, %xmm3
+; X86-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
+; X86-SSE-NEXT:    movdqa %xmm3, %xmm4
+; X86-SSE-NEXT:    pandn %xmm0, %xmm4
+; X86-SSE-NEXT:    psllw $2, %xmm0
+; X86-SSE-NEXT:    pand %xmm3, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    por %xmm4, %xmm0
+; X86-SSE-NEXT:    paddb %xmm1, %xmm1
+; X86-SSE-NEXT:    pcmpgtb %xmm1, %xmm2
+; X86-SSE-NEXT:    movdqa %xmm2, %xmm1
+; X86-SSE-NEXT:    pandn %xmm0, %xmm1
+; X86-SSE-NEXT:    paddb %xmm0, %xmm0
+; X86-SSE-NEXT:    pand %xmm2, %xmm0
+; X86-SSE-NEXT:    por %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = shl <2 x i8> %a, %b
   ret <2 x i8> %shift
 }
@@ -818,12 +818,12 @@ define <2 x i32> @splatvar_shift_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind {
 ; AVX512VL-NEXT:    vpslld %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatvar_shift_v2i32:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    xorps %xmm2, %xmm2
-; X32-SSE-NEXT:    movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
-; X32-SSE-NEXT:    pslld %xmm2, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatvar_shift_v2i32:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    xorps %xmm2, %xmm2
+; X86-SSE-NEXT:    movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
+; X86-SSE-NEXT:    pslld %xmm2, %xmm0
+; X86-SSE-NEXT:    retl
   %splat = shufflevector <2 x i32> %b, <2 x i32> undef, <2 x i32> zeroinitializer
   %shift = shl <2 x i32> %a, %splat
   ret <2 x i32> %shift
@@ -867,12 +867,12 @@ define <4 x i16> @splatvar_shift_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind {
 ; AVX512VL-NEXT:    vpsllw %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatvar_shift_v4i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
-; X32-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X32-SSE-NEXT:    psllw %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatvar_shift_v4i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
+; X86-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-SSE-NEXT:    psllw %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %splat = shufflevector <4 x i16> %b, <4 x i16> undef, <4 x i32> zeroinitializer
   %shift = shl <4 x i16> %a, %splat
   ret <4 x i16> %shift
@@ -916,12 +916,12 @@ define <2 x i16> @splatvar_shift_v2i16(<2 x i16> %a, <2 x i16> %b) nounwind {
 ; AVX512VL-NEXT:    vpsllw %xmm1, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatvar_shift_v2i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
-; X32-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X32-SSE-NEXT:    psllw %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatvar_shift_v2i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
+; X86-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-SSE-NEXT:    psllw %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %splat = shufflevector <2 x i16> %b, <2 x i16> undef, <2 x i32> zeroinitializer
   %shift = shl <2 x i16> %a, %splat
   ret <2 x i16> %shift
@@ -1023,18 +1023,18 @@ define <8 x i8> @splatvar_shift_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatvar_shift_v8i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
-; X32-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X32-SSE-NEXT:    psllw %xmm1, %xmm0
-; X32-SSE-NEXT:    pcmpeqd %xmm2, %xmm2
-; X32-SSE-NEXT:    psllw %xmm1, %xmm2
-; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7]
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; X32-SSE-NEXT:    pand %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatvar_shift_v8i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
+; X86-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-SSE-NEXT:    psllw %xmm1, %xmm0
+; X86-SSE-NEXT:    pcmpeqd %xmm2, %xmm2
+; X86-SSE-NEXT:    psllw %xmm1, %xmm2
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7]
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
+; X86-SSE-NEXT:    pand %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %splat = shufflevector <8 x i8> %b, <8 x i8> undef, <8 x i32> zeroinitializer
   %shift = shl <8 x i8> %a, %splat
   ret <8 x i8> %shift
@@ -1136,18 +1136,18 @@ define <4 x i8> @splatvar_shift_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatvar_shift_v4i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
-; X32-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X32-SSE-NEXT:    psllw %xmm1, %xmm0
-; X32-SSE-NEXT:    pcmpeqd %xmm2, %xmm2
-; X32-SSE-NEXT:    psllw %xmm1, %xmm2
-; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7]
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; X32-SSE-NEXT:    pand %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatvar_shift_v4i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
+; X86-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-SSE-NEXT:    psllw %xmm1, %xmm0
+; X86-SSE-NEXT:    pcmpeqd %xmm2, %xmm2
+; X86-SSE-NEXT:    psllw %xmm1, %xmm2
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7]
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
+; X86-SSE-NEXT:    pand %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %splat = shufflevector <4 x i8> %b, <4 x i8> undef, <4 x i32> zeroinitializer
   %shift = shl <4 x i8> %a, %splat
   ret <4 x i8> %shift
@@ -1242,18 +1242,18 @@ define <2 x i8> @splatvar_shift_v2i8(<2 x i8> %a, <2 x i8> %b) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatvar_shift_v2i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
-; X32-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X32-SSE-NEXT:    psllw %xmm1, %xmm0
-; X32-SSE-NEXT:    pcmpeqd %xmm2, %xmm2
-; X32-SSE-NEXT:    psllw %xmm1, %xmm2
-; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7]
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; X32-SSE-NEXT:    pand %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatvar_shift_v2i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
+; X86-SSE-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-SSE-NEXT:    psllw %xmm1, %xmm0
+; X86-SSE-NEXT:    pcmpeqd %xmm2, %xmm2
+; X86-SSE-NEXT:    psllw %xmm1, %xmm2
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm2[0,0,0,0,4,5,6,7]
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
+; X86-SSE-NEXT:    pand %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %splat = shufflevector <2 x i8> %b, <2 x i8> undef, <2 x i32> zeroinitializer
   %shift = shl <2 x i8> %a, %splat
   ret <2 x i8> %shift
@@ -1314,15 +1314,15 @@ define <2 x i32> @constant_shift_v2i32(<2 x i32> %a) nounwind {
 ; AVX512VL-NEXT:    vpsllvd {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: constant_shift_v2i32:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
-; X32-SSE-NEXT:    pslld $4, %xmm1
-; X32-SSE-NEXT:    pslld $5, %xmm0
-; X32-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; X32-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; X32-SSE-NEXT:    movdqa %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: constant_shift_v2i32:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X86-SSE-NEXT:    pslld $4, %xmm1
+; X86-SSE-NEXT:    pslld $5, %xmm0
+; X86-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86-SSE-NEXT:    movdqa %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = shl <2 x i32> %a, <i32 4, i32 5>
   ret <2 x i32> %shift
 }
@@ -1367,10 +1367,10 @@ define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind {
 ; AVX512BWVL-NEXT:    vpsllvw {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: constant_shift_v4i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: constant_shift_v4i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = shl <4 x i16> %a, <i16 0, i16 1, i16 2, i16 3>
   ret <4 x i16> %shift
 }
@@ -1429,10 +1429,10 @@ define <2 x i16> @constant_shift_v2i16(<2 x i16> %a) nounwind {
 ; AVX512BWVL-NEXT:    vpsllvw {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: constant_shift_v2i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: constant_shift_v2i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = shl <2 x i16> %a, <i16 2, i16 3>
   ret <2 x i16> %shift
 }
@@ -1514,14 +1514,14 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: constant_shift_v8i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    pxor %xmm1, %xmm1
-; X32-SSE-NEXT:    packuswb %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: constant_shift_v8i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    pxor %xmm1, %xmm1
+; X86-SSE-NEXT:    packuswb %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = shl <8 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
   ret <8 x i8> %shift
 }
@@ -1603,14 +1603,14 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: constant_shift_v4i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    pxor %xmm1, %xmm1
-; X32-SSE-NEXT:    packuswb %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: constant_shift_v4i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    pxor %xmm1, %xmm1
+; X86-SSE-NEXT:    packuswb %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = shl <4 x i8> %a, <i8 0, i8 1, i8 2, i8 3>
   ret <4 x i8> %shift
 }
@@ -1692,14 +1692,14 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind {
 ; AVX512BWVL-NEXT:    vzeroupper
 ; AVX512BWVL-NEXT:    retq
 ;
-; X32-SSE-LABEL: constant_shift_v2i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    pxor %xmm1, %xmm1
-; X32-SSE-NEXT:    packuswb %xmm1, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: constant_shift_v2i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X86-SSE-NEXT:    pmullw {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    pxor %xmm1, %xmm1
+; X86-SSE-NEXT:    packuswb %xmm1, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = shl <2 x i8> %a, <i8 2, i8 3>
   ret <2 x i8> %shift
 }
@@ -1734,10 +1734,10 @@ define <2 x i32> @splatconstant_shift_v2i32(<2 x i32> %a) nounwind {
 ; AVX512VL-NEXT:    vpslld $5, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatconstant_shift_v2i32:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    pslld $5, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatconstant_shift_v2i32:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    pslld $5, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = shl <2 x i32> %a, <i32 5, i32 5>
   ret <2 x i32> %shift
 }
@@ -1768,10 +1768,10 @@ define <4 x i16> @splatconstant_shift_v4i16(<4 x i16> %a) nounwind {
 ; AVX512VL-NEXT:    vpsllw $3, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatconstant_shift_v4i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psllw $3, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatconstant_shift_v4i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psllw $3, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = shl <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
   ret <4 x i16> %shift
 }
@@ -1802,10 +1802,10 @@ define <2 x i16> @splatconstant_shift_v2i16(<2 x i16> %a) nounwind {
 ; AVX512VL-NEXT:    vpsllw $3, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatconstant_shift_v2i16:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psllw $3, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatconstant_shift_v2i16:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psllw $3, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = shl <2 x i16> %a, <i16 3, i16 3>
   ret <2 x i16> %shift
 }
@@ -1840,11 +1840,11 @@ define <8 x i8> @splatconstant_shift_v8i8(<8 x i8> %a) nounwind {
 ; AVX512VL-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatconstant_shift_v8i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psllw $3, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatconstant_shift_v8i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psllw $3, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = shl <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <8 x i8> %shift
 }
@@ -1879,11 +1879,11 @@ define <4 x i8> @splatconstant_shift_v4i8(<4 x i8> %a) nounwind {
 ; AVX512VL-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatconstant_shift_v4i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psllw $3, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatconstant_shift_v4i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psllw $3, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = shl <4 x i8> %a, <i8 3, i8 3, i8 3, i8 3>
   ret <4 x i8> %shift
 }
@@ -1918,11 +1918,11 @@ define <2 x i8> @splatconstant_shift_v2i8(<2 x i8> %a) nounwind {
 ; AVX512VL-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
-; X32-SSE-LABEL: splatconstant_shift_v2i8:
-; X32-SSE:       # %bb.0:
-; X32-SSE-NEXT:    psllw $3, %xmm0
-; X32-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT:    retl
+; X86-SSE-LABEL: splatconstant_shift_v2i8:
+; X86-SSE:       # %bb.0:
+; X86-SSE-NEXT:    psllw $3, %xmm0
+; X86-SSE-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-SSE-NEXT:    retl
   %shift = shl <2 x i8> %a, <i8 3, i8 3>
   ret <2 x i8> %shift
 }


        


More information about the llvm-commits mailing list