[llvm] b483349 - [X86] Add v2i64/v8i16/v16i8 + AVX2 coverage to saturated shift tests

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sat Oct 22 03:35:54 PDT 2022


Author: Simon Pilgrim
Date: 2022-10-22T11:35:37+01:00
New Revision: b483349c5f4f61a67cfd9add21dbee2d69833b77

URL: https://github.com/llvm/llvm-project/commit/b483349c5f4f61a67cfd9add21dbee2d69833b77
DIFF: https://github.com/llvm/llvm-project/commit/b483349c5f4f61a67cfd9add21dbee2d69833b77.diff

LOG: [X86] Add v2i64/v8i16/v16i8 + AVX2 coverage to saturated shift tests

To help better test the effects of D136478

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/sshl_sat_vec.ll
    llvm/test/CodeGen/X86/ushl_sat_vec.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/sshl_sat_vec.ll b/llvm/test/CodeGen/X86/sshl_sat_vec.ll
index 40f9025097586..bdae47e3970ee 100644
--- a/llvm/test/CodeGen/X86/sshl_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/sshl_sat_vec.ll
@@ -1,11 +1,179 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx2 | FileCheck %s --check-prefix=X64-AVX2
 ; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86
 
-declare  <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32>, <4 x i32>)
+declare <2 x i64> @llvm.sshl.sat.v2i64(<2 x i64>, <2 x i64>)
+declare <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32>, <4 x i32>)
+declare <8 x i16> @llvm.sshl.sat.v8i16(<8 x i16>, <8 x i16>)
+declare <16 x i8> @llvm.sshl.sat.v16i8(<16 x i8>, <16 x i8>)
 
-define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
-; X64-LABEL: vec:
+define <2 x i64> @vec_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
+; X64-LABEL: vec_v2i64:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %xmm0, %rax
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    testq %rax, %rax
+; X64-NEXT:    sets %dl
+; X64-NEXT:    movabsq $9223372036854775807, %rsi # imm = 0x7FFFFFFFFFFFFFFF
+; X64-NEXT:    addq %rsi, %rdx
+; X64-NEXT:    movq %xmm1, %rcx
+; X64-NEXT:    movq %rax, %rdi
+; X64-NEXT:    shlq %cl, %rdi
+; X64-NEXT:    movq %rdi, %r8
+; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NEXT:    sarq %cl, %r8
+; X64-NEXT:    cmpq %r8, %rax
+; X64-NEXT:    cmovneq %rdx, %rdi
+; X64-NEXT:    movq %rdi, %xmm2
+; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; X64-NEXT:    movq %xmm0, %rax
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    testq %rax, %rax
+; X64-NEXT:    sets %dl
+; X64-NEXT:    addq %rsi, %rdx
+; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
+; X64-NEXT:    movq %xmm0, %rcx
+; X64-NEXT:    movq %rax, %rsi
+; X64-NEXT:    shlq %cl, %rsi
+; X64-NEXT:    movq %rsi, %rdi
+; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NEXT:    sarq %cl, %rdi
+; X64-NEXT:    cmpq %rdi, %rax
+; X64-NEXT:    cmovneq %rdx, %rsi
+; X64-NEXT:    movq %rsi, %xmm0
+; X64-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
+; X64-NEXT:    movdqa %xmm2, %xmm0
+; X64-NEXT:    retq
+;
+; X64-AVX2-LABEL: vec_v2i64:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vpextrq $1, %xmm0, %rax
+; X64-AVX2-NEXT:    xorl %edx, %edx
+; X64-AVX2-NEXT:    testq %rax, %rax
+; X64-AVX2-NEXT:    sets %dl
+; X64-AVX2-NEXT:    movabsq $9223372036854775807, %rsi # imm = 0x7FFFFFFFFFFFFFFF
+; X64-AVX2-NEXT:    addq %rsi, %rdx
+; X64-AVX2-NEXT:    vpextrq $1, %xmm1, %rcx
+; X64-AVX2-NEXT:    movq %rax, %rdi
+; X64-AVX2-NEXT:    shlq %cl, %rdi
+; X64-AVX2-NEXT:    movq %rdi, %r8
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-AVX2-NEXT:    sarq %cl, %r8
+; X64-AVX2-NEXT:    cmpq %r8, %rax
+; X64-AVX2-NEXT:    cmovneq %rdx, %rdi
+; X64-AVX2-NEXT:    vmovq %rdi, %xmm2
+; X64-AVX2-NEXT:    vmovq %xmm0, %rax
+; X64-AVX2-NEXT:    xorl %edx, %edx
+; X64-AVX2-NEXT:    testq %rax, %rax
+; X64-AVX2-NEXT:    sets %dl
+; X64-AVX2-NEXT:    addq %rsi, %rdx
+; X64-AVX2-NEXT:    vmovq %xmm1, %rcx
+; X64-AVX2-NEXT:    movq %rax, %rsi
+; X64-AVX2-NEXT:    shlq %cl, %rsi
+; X64-AVX2-NEXT:    movq %rsi, %rdi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-AVX2-NEXT:    sarq %cl, %rdi
+; X64-AVX2-NEXT:    cmpq %rdi, %rax
+; X64-AVX2-NEXT:    cmovneq %rdx, %rsi
+; X64-AVX2-NEXT:    vmovq %rsi, %xmm0
+; X64-AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; X64-AVX2-NEXT:    retq
+;
+; X86-LABEL: vec_v2i64:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    subl $20, %esp
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    shldl %cl, %edx, %esi
+; X86-NEXT:    xorl %edi, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    cmovnel %eax, %esi
+; X86-NEXT:    cmovnel %edi, %eax
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    movl %esi, %ebx
+; X86-NEXT:    sarl %cl, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sarl $31, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    cmovel %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl %ebp, %ebx
+; X86-NEXT:    shldl %cl, %esi, %ebx
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    cmovnel %eax, %ebx
+; X86-NEXT:    cmovnel %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %edi
+; X86-NEXT:    sarl %cl, %edi
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    sarl $31, %esi
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    cmovel %edi, %esi
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shrdl %cl, %ebx, %edx
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    cmovnel %edi, %edx
+; X86-NEXT:    xorl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    xorl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT:    sarl $31, %edi
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:    xorl $2147483647, %ecx # imm = 0x7FFFFFFF
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    notl %edi
+; X86-NEXT:    cmovel (%esp), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, (%esp) # 4-byte Spill
+; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    xorl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    xorl %ebp, %esi
+; X86-NEXT:    sarl $31, %ebp
+; X86-NEXT:    movl %ebp, %edi
+; X86-NEXT:    xorl $2147483647, %edi # imm = 0x7FFFFFFF
+; X86-NEXT:    orl %esi, %edx
+; X86-NEXT:    notl %ebp
+; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; X86-NEXT:    cmovel %ebx, %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %edi, 12(%eax)
+; X86-NEXT:    movl %ebp, 8(%eax)
+; X86-NEXT:    movl %ecx, 4(%eax)
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, (%eax)
+; X86-NEXT:    addl $20, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+  %tmp = call <2 x i64> @llvm.sshl.sat.v2i64(<2 x i64> %x, <2 x i64> %y)
+  ret <2 x i64> %tmp
+}
+
+define <4 x i32> @vec_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
+; X64-LABEL: vec_v4i32:
 ; X64:       # %bb.0:
 ; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3]
 ; X64-NEXT:    movd %xmm2, %eax
@@ -75,7 +243,67 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X64-NEXT:    movdqa %xmm2, %xmm0
 ; X64-NEXT:    retq
 ;
-; X86-LABEL: vec:
+; X64-AVX2-LABEL: vec_v4i32:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vpextrd $1, %xmm0, %eax
+; X64-AVX2-NEXT:    vpextrd $1, %xmm1, %ecx
+; X64-AVX2-NEXT:    movl %eax, %edx
+; X64-AVX2-NEXT:    shll %cl, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    sarl %cl, %esi
+; X64-AVX2-NEXT:    xorl %edi, %edi
+; X64-AVX2-NEXT:    testl %eax, %eax
+; X64-AVX2-NEXT:    sets %dil
+; X64-AVX2-NEXT:    addl $2147483647, %edi # imm = 0x7FFFFFFF
+; X64-AVX2-NEXT:    cmpl %esi, %eax
+; X64-AVX2-NEXT:    cmovel %edx, %edi
+; X64-AVX2-NEXT:    vmovd %xmm0, %eax
+; X64-AVX2-NEXT:    vmovd %xmm1, %ecx
+; X64-AVX2-NEXT:    movl %eax, %edx
+; X64-AVX2-NEXT:    shll %cl, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    sarl %cl, %esi
+; X64-AVX2-NEXT:    xorl %ecx, %ecx
+; X64-AVX2-NEXT:    testl %eax, %eax
+; X64-AVX2-NEXT:    sets %cl
+; X64-AVX2-NEXT:    addl $2147483647, %ecx # imm = 0x7FFFFFFF
+; X64-AVX2-NEXT:    cmpl %esi, %eax
+; X64-AVX2-NEXT:    cmovel %edx, %ecx
+; X64-AVX2-NEXT:    vmovd %ecx, %xmm2
+; X64-AVX2-NEXT:    vpinsrd $1, %edi, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrd $2, %xmm0, %eax
+; X64-AVX2-NEXT:    vpextrd $2, %xmm1, %ecx
+; X64-AVX2-NEXT:    movl %eax, %edx
+; X64-AVX2-NEXT:    shll %cl, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    sarl %cl, %esi
+; X64-AVX2-NEXT:    xorl %ecx, %ecx
+; X64-AVX2-NEXT:    testl %eax, %eax
+; X64-AVX2-NEXT:    sets %cl
+; X64-AVX2-NEXT:    addl $2147483647, %ecx # imm = 0x7FFFFFFF
+; X64-AVX2-NEXT:    cmpl %esi, %eax
+; X64-AVX2-NEXT:    cmovel %edx, %ecx
+; X64-AVX2-NEXT:    vpinsrd $2, %ecx, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrd $3, %xmm0, %eax
+; X64-AVX2-NEXT:    vpextrd $3, %xmm1, %ecx
+; X64-AVX2-NEXT:    movl %eax, %edx
+; X64-AVX2-NEXT:    shll %cl, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    sarl %cl, %esi
+; X64-AVX2-NEXT:    xorl %ecx, %ecx
+; X64-AVX2-NEXT:    testl %eax, %eax
+; X64-AVX2-NEXT:    sets %cl
+; X64-AVX2-NEXT:    addl $2147483647, %ecx # imm = 0x7FFFFFFF
+; X64-AVX2-NEXT:    cmpl %esi, %eax
+; X64-AVX2-NEXT:    cmovel %edx, %ecx
+; X64-AVX2-NEXT:    vpinsrd $3, %ecx, %xmm2, %xmm0
+; X64-AVX2-NEXT:    retq
+;
+; X86-LABEL: vec_v4i32:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %ebp
 ; X86-NEXT:    pushl %ebx
@@ -144,3 +372,1119 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
   %tmp = call <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
   ret <4 x i32> %tmp
 }
+
+define <8 x i16> @vec_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
+; X64-LABEL: vec_v8i16:
+; X64:       # %bb.0:
+; X64-NEXT:    pextrw $7, %xmm0, %eax
+; X64-NEXT:    pextrw $7, %xmm1, %ecx
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movswl %dx, %esi
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    sarl %cl, %esi
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    testw %ax, %ax
+; X64-NEXT:    sets %cl
+; X64-NEXT:    addl $32767, %ecx # imm = 0x7FFF
+; X64-NEXT:    cmpw %si, %ax
+; X64-NEXT:    cmovel %edx, %ecx
+; X64-NEXT:    movd %ecx, %xmm2
+; X64-NEXT:    pextrw $6, %xmm0, %eax
+; X64-NEXT:    pextrw $6, %xmm1, %ecx
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movswl %dx, %esi
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    sarl %cl, %esi
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    testw %ax, %ax
+; X64-NEXT:    sets %cl
+; X64-NEXT:    addl $32767, %ecx # imm = 0x7FFF
+; X64-NEXT:    cmpw %si, %ax
+; X64-NEXT:    cmovel %edx, %ecx
+; X64-NEXT:    movd %ecx, %xmm3
+; X64-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X64-NEXT:    pextrw $5, %xmm0, %eax
+; X64-NEXT:    pextrw $5, %xmm1, %ecx
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movswl %dx, %esi
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    sarl %cl, %esi
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    testw %ax, %ax
+; X64-NEXT:    sets %cl
+; X64-NEXT:    addl $32767, %ecx # imm = 0x7FFF
+; X64-NEXT:    cmpw %si, %ax
+; X64-NEXT:    cmovel %edx, %ecx
+; X64-NEXT:    movd %ecx, %xmm4
+; X64-NEXT:    pextrw $4, %xmm0, %eax
+; X64-NEXT:    pextrw $4, %xmm1, %ecx
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movswl %dx, %esi
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    sarl %cl, %esi
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    testw %ax, %ax
+; X64-NEXT:    sets %cl
+; X64-NEXT:    addl $32767, %ecx # imm = 0x7FFF
+; X64-NEXT:    cmpw %si, %ax
+; X64-NEXT:    cmovel %edx, %ecx
+; X64-NEXT:    movd %ecx, %xmm2
+; X64-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
+; X64-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; X64-NEXT:    pextrw $3, %xmm0, %eax
+; X64-NEXT:    pextrw $3, %xmm1, %ecx
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movswl %dx, %esi
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    sarl %cl, %esi
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    testw %ax, %ax
+; X64-NEXT:    sets %cl
+; X64-NEXT:    addl $32767, %ecx # imm = 0x7FFF
+; X64-NEXT:    cmpw %si, %ax
+; X64-NEXT:    cmovel %edx, %ecx
+; X64-NEXT:    movd %ecx, %xmm4
+; X64-NEXT:    pextrw $2, %xmm0, %eax
+; X64-NEXT:    pextrw $2, %xmm1, %ecx
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movswl %dx, %esi
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    sarl %cl, %esi
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    testw %ax, %ax
+; X64-NEXT:    sets %cl
+; X64-NEXT:    addl $32767, %ecx # imm = 0x7FFF
+; X64-NEXT:    cmpw %si, %ax
+; X64-NEXT:    cmovel %edx, %ecx
+; X64-NEXT:    movd %ecx, %xmm3
+; X64-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
+; X64-NEXT:    pextrw $1, %xmm0, %eax
+; X64-NEXT:    pextrw $1, %xmm1, %ecx
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movswl %dx, %esi
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    sarl %cl, %esi
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    testw %ax, %ax
+; X64-NEXT:    sets %cl
+; X64-NEXT:    addl $32767, %ecx # imm = 0x7FFF
+; X64-NEXT:    cmpw %si, %ax
+; X64-NEXT:    cmovel %edx, %ecx
+; X64-NEXT:    movd %ecx, %xmm4
+; X64-NEXT:    movd %xmm0, %eax
+; X64-NEXT:    movd %xmm1, %ecx
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movswl %dx, %esi
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    sarl %cl, %esi
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    testw %ax, %ax
+; X64-NEXT:    sets %cl
+; X64-NEXT:    addl $32767, %ecx # imm = 0x7FFF
+; X64-NEXT:    cmpw %si, %ax
+; X64-NEXT:    cmovel %edx, %ecx
+; X64-NEXT:    movd %ecx, %xmm0
+; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
+; X64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X64-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; X64-NEXT:    retq
+;
+; X64-AVX2-LABEL: vec_v8i16:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vpextrw $1, %xmm0, %edx
+; X64-AVX2-NEXT:    vpextrw $1, %xmm1, %ecx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    shll %cl, %esi
+; X64-AVX2-NEXT:    movswl %si, %edi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    sarl %cl, %edi
+; X64-AVX2-NEXT:    xorl %eax, %eax
+; X64-AVX2-NEXT:    testw %dx, %dx
+; X64-AVX2-NEXT:    sets %al
+; X64-AVX2-NEXT:    addl $32767, %eax # imm = 0x7FFF
+; X64-AVX2-NEXT:    cmpw %di, %dx
+; X64-AVX2-NEXT:    cmovel %esi, %eax
+; X64-AVX2-NEXT:    vmovd %xmm0, %edx
+; X64-AVX2-NEXT:    vmovd %xmm1, %ecx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    shll %cl, %esi
+; X64-AVX2-NEXT:    movswl %si, %edi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    sarl %cl, %edi
+; X64-AVX2-NEXT:    xorl %ecx, %ecx
+; X64-AVX2-NEXT:    testw %dx, %dx
+; X64-AVX2-NEXT:    sets %cl
+; X64-AVX2-NEXT:    addl $32767, %ecx # imm = 0x7FFF
+; X64-AVX2-NEXT:    cmpw %di, %dx
+; X64-AVX2-NEXT:    cmovel %esi, %ecx
+; X64-AVX2-NEXT:    vmovd %ecx, %xmm2
+; X64-AVX2-NEXT:    vpinsrw $1, %eax, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrw $2, %xmm0, %eax
+; X64-AVX2-NEXT:    vpextrw $2, %xmm1, %ecx
+; X64-AVX2-NEXT:    movl %eax, %edx
+; X64-AVX2-NEXT:    shll %cl, %edx
+; X64-AVX2-NEXT:    movswl %dx, %esi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    sarl %cl, %esi
+; X64-AVX2-NEXT:    xorl %ecx, %ecx
+; X64-AVX2-NEXT:    testw %ax, %ax
+; X64-AVX2-NEXT:    sets %cl
+; X64-AVX2-NEXT:    addl $32767, %ecx # imm = 0x7FFF
+; X64-AVX2-NEXT:    cmpw %si, %ax
+; X64-AVX2-NEXT:    cmovel %edx, %ecx
+; X64-AVX2-NEXT:    vpinsrw $2, %ecx, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrw $3, %xmm0, %eax
+; X64-AVX2-NEXT:    vpextrw $3, %xmm1, %ecx
+; X64-AVX2-NEXT:    movl %eax, %edx
+; X64-AVX2-NEXT:    shll %cl, %edx
+; X64-AVX2-NEXT:    movswl %dx, %esi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    sarl %cl, %esi
+; X64-AVX2-NEXT:    xorl %ecx, %ecx
+; X64-AVX2-NEXT:    testw %ax, %ax
+; X64-AVX2-NEXT:    sets %cl
+; X64-AVX2-NEXT:    addl $32767, %ecx # imm = 0x7FFF
+; X64-AVX2-NEXT:    cmpw %si, %ax
+; X64-AVX2-NEXT:    cmovel %edx, %ecx
+; X64-AVX2-NEXT:    vpinsrw $3, %ecx, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrw $4, %xmm0, %eax
+; X64-AVX2-NEXT:    vpextrw $4, %xmm1, %ecx
+; X64-AVX2-NEXT:    movl %eax, %edx
+; X64-AVX2-NEXT:    shll %cl, %edx
+; X64-AVX2-NEXT:    movswl %dx, %esi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    sarl %cl, %esi
+; X64-AVX2-NEXT:    xorl %ecx, %ecx
+; X64-AVX2-NEXT:    testw %ax, %ax
+; X64-AVX2-NEXT:    sets %cl
+; X64-AVX2-NEXT:    addl $32767, %ecx # imm = 0x7FFF
+; X64-AVX2-NEXT:    cmpw %si, %ax
+; X64-AVX2-NEXT:    cmovel %edx, %ecx
+; X64-AVX2-NEXT:    vpinsrw $4, %ecx, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrw $5, %xmm0, %eax
+; X64-AVX2-NEXT:    vpextrw $5, %xmm1, %ecx
+; X64-AVX2-NEXT:    movl %eax, %edx
+; X64-AVX2-NEXT:    shll %cl, %edx
+; X64-AVX2-NEXT:    movswl %dx, %esi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    sarl %cl, %esi
+; X64-AVX2-NEXT:    xorl %ecx, %ecx
+; X64-AVX2-NEXT:    testw %ax, %ax
+; X64-AVX2-NEXT:    sets %cl
+; X64-AVX2-NEXT:    addl $32767, %ecx # imm = 0x7FFF
+; X64-AVX2-NEXT:    cmpw %si, %ax
+; X64-AVX2-NEXT:    cmovel %edx, %ecx
+; X64-AVX2-NEXT:    vpinsrw $5, %ecx, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrw $6, %xmm0, %eax
+; X64-AVX2-NEXT:    vpextrw $6, %xmm1, %ecx
+; X64-AVX2-NEXT:    movl %eax, %edx
+; X64-AVX2-NEXT:    shll %cl, %edx
+; X64-AVX2-NEXT:    movswl %dx, %esi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    sarl %cl, %esi
+; X64-AVX2-NEXT:    xorl %ecx, %ecx
+; X64-AVX2-NEXT:    testw %ax, %ax
+; X64-AVX2-NEXT:    sets %cl
+; X64-AVX2-NEXT:    addl $32767, %ecx # imm = 0x7FFF
+; X64-AVX2-NEXT:    cmpw %si, %ax
+; X64-AVX2-NEXT:    cmovel %edx, %ecx
+; X64-AVX2-NEXT:    vpinsrw $6, %ecx, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrw $7, %xmm0, %eax
+; X64-AVX2-NEXT:    vpextrw $7, %xmm1, %ecx
+; X64-AVX2-NEXT:    movl %eax, %edx
+; X64-AVX2-NEXT:    shll %cl, %edx
+; X64-AVX2-NEXT:    movswl %dx, %esi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    sarl %cl, %esi
+; X64-AVX2-NEXT:    xorl %ecx, %ecx
+; X64-AVX2-NEXT:    testw %ax, %ax
+; X64-AVX2-NEXT:    sets %cl
+; X64-AVX2-NEXT:    addl $32767, %ecx # imm = 0x7FFF
+; X64-AVX2-NEXT:    cmpw %si, %ax
+; X64-AVX2-NEXT:    cmovel %edx, %ecx
+; X64-AVX2-NEXT:    vpinsrw $7, %ecx, %xmm2, %xmm0
+; X64-AVX2-NEXT:    retq
+;
+; X86-LABEL: vec_v8i16:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    subl $16, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:    shll %cl, %ebx
+; X86-NEXT:    movswl %bx, %ebp
+; X86-NEXT:    sarl %cl, %ebp
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    testw %di, %di
+; X86-NEXT:    sets %cl
+; X86-NEXT:    addl $32767, %ecx # imm = 0x7FFF
+; X86-NEXT:    cmpw %bp, %di
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    cmovel %ebx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    movswl %di, %ebx
+; X86-NEXT:    sarl %cl, %ebx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    testw %si, %si
+; X86-NEXT:    sets %al
+; X86-NEXT:    addl $32767, %eax # imm = 0x7FFF
+; X86-NEXT:    cmpw %bx, %si
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    cmovel %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movswl %si, %edi
+; X86-NEXT:    sarl %cl, %edi
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    testw %dx, %dx
+; X86-NEXT:    sets %al
+; X86-NEXT:    addl $32767, %eax # imm = 0x7FFF
+; X86-NEXT:    cmpw %di, %dx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    cmovel %esi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movswl %dx, %esi
+; X86-NEXT:    sarl %cl, %esi
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:    testw %ax, %ax
+; X86-NEXT:    sets %bl
+; X86-NEXT:    addl $32767, %ebx # imm = 0x7FFF
+; X86-NEXT:    cmpw %si, %ax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    cmovel %edx, %ebx
+; X86-NEXT:    movl %ebx, (%esp) # 4-byte Spill
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movswl %dx, %esi
+; X86-NEXT:    sarl %cl, %esi
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    testw %ax, %ax
+; X86-NEXT:    sets %cl
+; X86-NEXT:    addl $32767, %ecx # imm = 0x7FFF
+; X86-NEXT:    cmpw %si, %ax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    cmovel %edx, %ecx
+; X86-NEXT:    movl %ecx, %ebp
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movswl %dx, %esi
+; X86-NEXT:    sarl %cl, %esi
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:    testw %ax, %ax
+; X86-NEXT:    sets %bl
+; X86-NEXT:    addl $32767, %ebx # imm = 0x7FFF
+; X86-NEXT:    cmpw %si, %ax
+; X86-NEXT:    cmovel %edx, %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movswl %si, %edi
+; X86-NEXT:    sarl %cl, %edi
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    testw %ax, %ax
+; X86-NEXT:    sets %dl
+; X86-NEXT:    addl $32767, %edx # imm = 0x7FFF
+; X86-NEXT:    cmpw %di, %ax
+; X86-NEXT:    cmovel %esi, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movswl %si, %edi
+; X86-NEXT:    sarl %cl, %edi
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    testw %ax, %ax
+; X86-NEXT:    sets %cl
+; X86-NEXT:    addl $32767, %ecx # imm = 0x7FFF
+; X86-NEXT:    cmpw %di, %ax
+; X86-NEXT:    cmovel %esi, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movw %cx, 14(%eax)
+; X86-NEXT:    movw %dx, 12(%eax)
+; X86-NEXT:    movw %bx, 10(%eax)
+; X86-NEXT:    movw %bp, 8(%eax)
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    movw %cx, 6(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movw %cx, 4(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movw %cx, 2(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movw %cx, (%eax)
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+  %tmp = call <8 x i16> @llvm.sshl.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
+  ret <8 x i16> %tmp
+}
+
+define <16 x i8> @vec_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
+; X64-LABEL: vec_v16i8:
+; X64:       # %bb.0:
+; X64-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    shlb %cl, %dl
+; X64-NEXT:    movzbl %dl, %edx
+; X64-NEXT:    movl %edx, %esi
+; X64-NEXT:    sarb %cl, %sil
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    testb %al, %al
+; X64-NEXT:    sets %cl
+; X64-NEXT:    addl $127, %ecx
+; X64-NEXT:    cmpb %sil, %al
+; X64-NEXT:    cmovel %edx, %ecx
+; X64-NEXT:    movd %ecx, %xmm0
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    shlb %cl, %dl
+; X64-NEXT:    movzbl %dl, %edx
+; X64-NEXT:    movl %edx, %esi
+; X64-NEXT:    sarb %cl, %sil
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    testb %al, %al
+; X64-NEXT:    sets %cl
+; X64-NEXT:    addl $127, %ecx
+; X64-NEXT:    cmpb %sil, %al
+; X64-NEXT:    cmovel %edx, %ecx
+; X64-NEXT:    movd %ecx, %xmm1
+; X64-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    shlb %cl, %dl
+; X64-NEXT:    movzbl %dl, %edx
+; X64-NEXT:    movl %edx, %esi
+; X64-NEXT:    sarb %cl, %sil
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    testb %al, %al
+; X64-NEXT:    sets %cl
+; X64-NEXT:    addl $127, %ecx
+; X64-NEXT:    cmpb %sil, %al
+; X64-NEXT:    cmovel %edx, %ecx
+; X64-NEXT:    movd %ecx, %xmm2
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    shlb %cl, %dl
+; X64-NEXT:    movzbl %dl, %edx
+; X64-NEXT:    movl %edx, %esi
+; X64-NEXT:    sarb %cl, %sil
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    testb %al, %al
+; X64-NEXT:    sets %cl
+; X64-NEXT:    addl $127, %ecx
+; X64-NEXT:    cmpb %sil, %al
+; X64-NEXT:    cmovel %edx, %ecx
+; X64-NEXT:    movd %ecx, %xmm0
+; X64-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    shlb %cl, %dl
+; X64-NEXT:    movzbl %dl, %edx
+; X64-NEXT:    movl %edx, %esi
+; X64-NEXT:    sarb %cl, %sil
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    testb %al, %al
+; X64-NEXT:    sets %cl
+; X64-NEXT:    addl $127, %ecx
+; X64-NEXT:    cmpb %sil, %al
+; X64-NEXT:    cmovel %edx, %ecx
+; X64-NEXT:    movd %ecx, %xmm1
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    shlb %cl, %dl
+; X64-NEXT:    movzbl %dl, %edx
+; X64-NEXT:    movl %edx, %esi
+; X64-NEXT:    sarb %cl, %sil
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    testb %al, %al
+; X64-NEXT:    sets %cl
+; X64-NEXT:    addl $127, %ecx
+; X64-NEXT:    cmpb %sil, %al
+; X64-NEXT:    cmovel %edx, %ecx
+; X64-NEXT:    movd %ecx, %xmm2
+; X64-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    shlb %cl, %dl
+; X64-NEXT:    movzbl %dl, %edx
+; X64-NEXT:    movl %edx, %esi
+; X64-NEXT:    sarb %cl, %sil
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    testb %al, %al
+; X64-NEXT:    sets %cl
+; X64-NEXT:    addl $127, %ecx
+; X64-NEXT:    cmpb %sil, %al
+; X64-NEXT:    cmovel %edx, %ecx
+; X64-NEXT:    movd %ecx, %xmm3
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    shlb %cl, %dl
+; X64-NEXT:    movzbl %dl, %edx
+; X64-NEXT:    movl %edx, %esi
+; X64-NEXT:    sarb %cl, %sil
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    testb %al, %al
+; X64-NEXT:    sets %cl
+; X64-NEXT:    addl $127, %ecx
+; X64-NEXT:    cmpb %sil, %al
+; X64-NEXT:    cmovel %edx, %ecx
+; X64-NEXT:    movd %ecx, %xmm1
+; X64-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
+; X64-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; X64-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    shlb %cl, %dl
+; X64-NEXT:    movzbl %dl, %edx
+; X64-NEXT:    movl %edx, %esi
+; X64-NEXT:    sarb %cl, %sil
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    testb %al, %al
+; X64-NEXT:    sets %cl
+; X64-NEXT:    addl $127, %ecx
+; X64-NEXT:    cmpb %sil, %al
+; X64-NEXT:    cmovel %edx, %ecx
+; X64-NEXT:    movd %ecx, %xmm2
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    shlb %cl, %dl
+; X64-NEXT:    movzbl %dl, %edx
+; X64-NEXT:    movl %edx, %esi
+; X64-NEXT:    sarb %cl, %sil
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    testb %al, %al
+; X64-NEXT:    sets %cl
+; X64-NEXT:    addl $127, %ecx
+; X64-NEXT:    cmpb %sil, %al
+; X64-NEXT:    cmovel %edx, %ecx
+; X64-NEXT:    movd %ecx, %xmm0
+; X64-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    shlb %cl, %dl
+; X64-NEXT:    movzbl %dl, %edx
+; X64-NEXT:    movl %edx, %esi
+; X64-NEXT:    sarb %cl, %sil
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    testb %al, %al
+; X64-NEXT:    sets %cl
+; X64-NEXT:    addl $127, %ecx
+; X64-NEXT:    cmpb %sil, %al
+; X64-NEXT:    cmovel %edx, %ecx
+; X64-NEXT:    movd %ecx, %xmm3
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    shlb %cl, %dl
+; X64-NEXT:    movzbl %dl, %edx
+; X64-NEXT:    movl %edx, %esi
+; X64-NEXT:    sarb %cl, %sil
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    testb %al, %al
+; X64-NEXT:    sets %cl
+; X64-NEXT:    addl $127, %ecx
+; X64-NEXT:    cmpb %sil, %al
+; X64-NEXT:    cmovel %edx, %ecx
+; X64-NEXT:    movd %ecx, %xmm2
+; X64-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
+; X64-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    shlb %cl, %dl
+; X64-NEXT:    movzbl %dl, %edx
+; X64-NEXT:    movl %edx, %esi
+; X64-NEXT:    sarb %cl, %sil
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    testb %al, %al
+; X64-NEXT:    sets %cl
+; X64-NEXT:    addl $127, %ecx
+; X64-NEXT:    cmpb %sil, %al
+; X64-NEXT:    cmovel %edx, %ecx
+; X64-NEXT:    movd %ecx, %xmm0
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    shlb %cl, %dl
+; X64-NEXT:    movzbl %dl, %edx
+; X64-NEXT:    movl %edx, %esi
+; X64-NEXT:    sarb %cl, %sil
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    testb %al, %al
+; X64-NEXT:    sets %cl
+; X64-NEXT:    addl $127, %ecx
+; X64-NEXT:    cmpb %sil, %al
+; X64-NEXT:    cmovel %edx, %ecx
+; X64-NEXT:    movd %ecx, %xmm3
+; X64-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edx
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %esi
+; X64-NEXT:    movl %esi, %edi
+; X64-NEXT:    shlb %cl, %dil
+; X64-NEXT:    movzbl %dil, %edi
+; X64-NEXT:    movl %edi, %r8d
+; X64-NEXT:    sarb %cl, %r8b
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    testb %sil, %sil
+; X64-NEXT:    sets %cl
+; X64-NEXT:    addl $127, %ecx
+; X64-NEXT:    cmpb %r8b, %sil
+; X64-NEXT:    cmovel %edi, %ecx
+; X64-NEXT:    movd %ecx, %xmm4
+; X64-NEXT:    movl %edx, %esi
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    shlb %cl, %sil
+; X64-NEXT:    movzbl %sil, %esi
+; X64-NEXT:    movl %esi, %edi
+; X64-NEXT:    sarb %cl, %dil
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    testb %dl, %dl
+; X64-NEXT:    sets %al
+; X64-NEXT:    addl $127, %eax
+; X64-NEXT:    cmpb %dil, %dl
+; X64-NEXT:    cmovel %esi, %eax
+; X64-NEXT:    movd %eax, %xmm0
+; X64-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; X64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X64-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-NEXT:    retq
+;
+; X64-AVX2-LABEL: vec_v16i8:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vpextrb $1, %xmm1, %ecx
+; X64-AVX2-NEXT:    vpextrb $1, %xmm0, %edx
+; X64-AVX2-NEXT:    movl %edx, %eax
+; X64-AVX2-NEXT:    shlb %cl, %al
+; X64-AVX2-NEXT:    movzbl %al, %esi
+; X64-AVX2-NEXT:    movl %esi, %edi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    sarb %cl, %dil
+; X64-AVX2-NEXT:    xorl %eax, %eax
+; X64-AVX2-NEXT:    testb %dl, %dl
+; X64-AVX2-NEXT:    sets %al
+; X64-AVX2-NEXT:    addl $127, %eax
+; X64-AVX2-NEXT:    cmpb %dil, %dl
+; X64-AVX2-NEXT:    cmovel %esi, %eax
+; X64-AVX2-NEXT:    vmovd %xmm1, %ecx
+; X64-AVX2-NEXT:    vmovd %xmm0, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    shlb %cl, %sil
+; X64-AVX2-NEXT:    movzbl %sil, %esi
+; X64-AVX2-NEXT:    movl %esi, %edi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    sarb %cl, %dil
+; X64-AVX2-NEXT:    xorl %ecx, %ecx
+; X64-AVX2-NEXT:    testb %dl, %dl
+; X64-AVX2-NEXT:    sets %cl
+; X64-AVX2-NEXT:    addl $127, %ecx
+; X64-AVX2-NEXT:    cmpb %dil, %dl
+; X64-AVX2-NEXT:    cmovel %esi, %ecx
+; X64-AVX2-NEXT:    vmovd %ecx, %xmm2
+; X64-AVX2-NEXT:    vpinsrb $1, %eax, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrb $2, %xmm1, %ecx
+; X64-AVX2-NEXT:    vpextrb $2, %xmm0, %eax
+; X64-AVX2-NEXT:    movl %eax, %edx
+; X64-AVX2-NEXT:    shlb %cl, %dl
+; X64-AVX2-NEXT:    movzbl %dl, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    sarb %cl, %sil
+; X64-AVX2-NEXT:    xorl %ecx, %ecx
+; X64-AVX2-NEXT:    testb %al, %al
+; X64-AVX2-NEXT:    sets %cl
+; X64-AVX2-NEXT:    addl $127, %ecx
+; X64-AVX2-NEXT:    cmpb %sil, %al
+; X64-AVX2-NEXT:    cmovel %edx, %ecx
+; X64-AVX2-NEXT:    vpinsrb $2, %ecx, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrb $3, %xmm1, %ecx
+; X64-AVX2-NEXT:    vpextrb $3, %xmm0, %eax
+; X64-AVX2-NEXT:    movl %eax, %edx
+; X64-AVX2-NEXT:    shlb %cl, %dl
+; X64-AVX2-NEXT:    movzbl %dl, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    sarb %cl, %sil
+; X64-AVX2-NEXT:    xorl %ecx, %ecx
+; X64-AVX2-NEXT:    testb %al, %al
+; X64-AVX2-NEXT:    sets %cl
+; X64-AVX2-NEXT:    addl $127, %ecx
+; X64-AVX2-NEXT:    cmpb %sil, %al
+; X64-AVX2-NEXT:    cmovel %edx, %ecx
+; X64-AVX2-NEXT:    vpinsrb $3, %ecx, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrb $4, %xmm1, %ecx
+; X64-AVX2-NEXT:    vpextrb $4, %xmm0, %eax
+; X64-AVX2-NEXT:    movl %eax, %edx
+; X64-AVX2-NEXT:    shlb %cl, %dl
+; X64-AVX2-NEXT:    movzbl %dl, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    sarb %cl, %sil
+; X64-AVX2-NEXT:    xorl %ecx, %ecx
+; X64-AVX2-NEXT:    testb %al, %al
+; X64-AVX2-NEXT:    sets %cl
+; X64-AVX2-NEXT:    addl $127, %ecx
+; X64-AVX2-NEXT:    cmpb %sil, %al
+; X64-AVX2-NEXT:    cmovel %edx, %ecx
+; X64-AVX2-NEXT:    vpinsrb $4, %ecx, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrb $5, %xmm1, %ecx
+; X64-AVX2-NEXT:    vpextrb $5, %xmm0, %eax
+; X64-AVX2-NEXT:    movl %eax, %edx
+; X64-AVX2-NEXT:    shlb %cl, %dl
+; X64-AVX2-NEXT:    movzbl %dl, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    sarb %cl, %sil
+; X64-AVX2-NEXT:    xorl %ecx, %ecx
+; X64-AVX2-NEXT:    testb %al, %al
+; X64-AVX2-NEXT:    sets %cl
+; X64-AVX2-NEXT:    addl $127, %ecx
+; X64-AVX2-NEXT:    cmpb %sil, %al
+; X64-AVX2-NEXT:    cmovel %edx, %ecx
+; X64-AVX2-NEXT:    vpinsrb $5, %ecx, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrb $6, %xmm1, %ecx
+; X64-AVX2-NEXT:    vpextrb $6, %xmm0, %eax
+; X64-AVX2-NEXT:    movl %eax, %edx
+; X64-AVX2-NEXT:    shlb %cl, %dl
+; X64-AVX2-NEXT:    movzbl %dl, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    sarb %cl, %sil
+; X64-AVX2-NEXT:    xorl %ecx, %ecx
+; X64-AVX2-NEXT:    testb %al, %al
+; X64-AVX2-NEXT:    sets %cl
+; X64-AVX2-NEXT:    addl $127, %ecx
+; X64-AVX2-NEXT:    cmpb %sil, %al
+; X64-AVX2-NEXT:    cmovel %edx, %ecx
+; X64-AVX2-NEXT:    vpinsrb $6, %ecx, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrb $7, %xmm1, %ecx
+; X64-AVX2-NEXT:    vpextrb $7, %xmm0, %eax
+; X64-AVX2-NEXT:    movl %eax, %edx
+; X64-AVX2-NEXT:    shlb %cl, %dl
+; X64-AVX2-NEXT:    movzbl %dl, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    sarb %cl, %sil
+; X64-AVX2-NEXT:    xorl %ecx, %ecx
+; X64-AVX2-NEXT:    testb %al, %al
+; X64-AVX2-NEXT:    sets %cl
+; X64-AVX2-NEXT:    addl $127, %ecx
+; X64-AVX2-NEXT:    cmpb %sil, %al
+; X64-AVX2-NEXT:    cmovel %edx, %ecx
+; X64-AVX2-NEXT:    vpinsrb $7, %ecx, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrb $8, %xmm1, %ecx
+; X64-AVX2-NEXT:    vpextrb $8, %xmm0, %eax
+; X64-AVX2-NEXT:    movl %eax, %edx
+; X64-AVX2-NEXT:    shlb %cl, %dl
+; X64-AVX2-NEXT:    movzbl %dl, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    sarb %cl, %sil
+; X64-AVX2-NEXT:    xorl %ecx, %ecx
+; X64-AVX2-NEXT:    testb %al, %al
+; X64-AVX2-NEXT:    sets %cl
+; X64-AVX2-NEXT:    addl $127, %ecx
+; X64-AVX2-NEXT:    cmpb %sil, %al
+; X64-AVX2-NEXT:    cmovel %edx, %ecx
+; X64-AVX2-NEXT:    vpinsrb $8, %ecx, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrb $9, %xmm1, %ecx
+; X64-AVX2-NEXT:    vpextrb $9, %xmm0, %eax
+; X64-AVX2-NEXT:    movl %eax, %edx
+; X64-AVX2-NEXT:    shlb %cl, %dl
+; X64-AVX2-NEXT:    movzbl %dl, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    sarb %cl, %sil
+; X64-AVX2-NEXT:    xorl %ecx, %ecx
+; X64-AVX2-NEXT:    testb %al, %al
+; X64-AVX2-NEXT:    sets %cl
+; X64-AVX2-NEXT:    addl $127, %ecx
+; X64-AVX2-NEXT:    cmpb %sil, %al
+; X64-AVX2-NEXT:    cmovel %edx, %ecx
+; X64-AVX2-NEXT:    vpinsrb $9, %ecx, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrb $10, %xmm1, %ecx
+; X64-AVX2-NEXT:    vpextrb $10, %xmm0, %eax
+; X64-AVX2-NEXT:    movl %eax, %edx
+; X64-AVX2-NEXT:    shlb %cl, %dl
+; X64-AVX2-NEXT:    movzbl %dl, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    sarb %cl, %sil
+; X64-AVX2-NEXT:    xorl %ecx, %ecx
+; X64-AVX2-NEXT:    testb %al, %al
+; X64-AVX2-NEXT:    sets %cl
+; X64-AVX2-NEXT:    addl $127, %ecx
+; X64-AVX2-NEXT:    cmpb %sil, %al
+; X64-AVX2-NEXT:    cmovel %edx, %ecx
+; X64-AVX2-NEXT:    vpinsrb $10, %ecx, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrb $11, %xmm1, %ecx
+; X64-AVX2-NEXT:    vpextrb $11, %xmm0, %eax
+; X64-AVX2-NEXT:    movl %eax, %edx
+; X64-AVX2-NEXT:    shlb %cl, %dl
+; X64-AVX2-NEXT:    movzbl %dl, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    sarb %cl, %sil
+; X64-AVX2-NEXT:    xorl %ecx, %ecx
+; X64-AVX2-NEXT:    testb %al, %al
+; X64-AVX2-NEXT:    sets %cl
+; X64-AVX2-NEXT:    addl $127, %ecx
+; X64-AVX2-NEXT:    cmpb %sil, %al
+; X64-AVX2-NEXT:    cmovel %edx, %ecx
+; X64-AVX2-NEXT:    vpinsrb $11, %ecx, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrb $12, %xmm1, %ecx
+; X64-AVX2-NEXT:    vpextrb $12, %xmm0, %eax
+; X64-AVX2-NEXT:    movl %eax, %edx
+; X64-AVX2-NEXT:    shlb %cl, %dl
+; X64-AVX2-NEXT:    movzbl %dl, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    sarb %cl, %sil
+; X64-AVX2-NEXT:    xorl %ecx, %ecx
+; X64-AVX2-NEXT:    testb %al, %al
+; X64-AVX2-NEXT:    sets %cl
+; X64-AVX2-NEXT:    addl $127, %ecx
+; X64-AVX2-NEXT:    cmpb %sil, %al
+; X64-AVX2-NEXT:    cmovel %edx, %ecx
+; X64-AVX2-NEXT:    vpinsrb $12, %ecx, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrb $13, %xmm1, %ecx
+; X64-AVX2-NEXT:    vpextrb $13, %xmm0, %eax
+; X64-AVX2-NEXT:    movl %eax, %edx
+; X64-AVX2-NEXT:    shlb %cl, %dl
+; X64-AVX2-NEXT:    movzbl %dl, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    sarb %cl, %sil
+; X64-AVX2-NEXT:    xorl %ecx, %ecx
+; X64-AVX2-NEXT:    testb %al, %al
+; X64-AVX2-NEXT:    sets %cl
+; X64-AVX2-NEXT:    addl $127, %ecx
+; X64-AVX2-NEXT:    cmpb %sil, %al
+; X64-AVX2-NEXT:    cmovel %edx, %ecx
+; X64-AVX2-NEXT:    vpinsrb $13, %ecx, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrb $14, %xmm1, %ecx
+; X64-AVX2-NEXT:    vpextrb $14, %xmm0, %eax
+; X64-AVX2-NEXT:    movl %eax, %edx
+; X64-AVX2-NEXT:    shlb %cl, %dl
+; X64-AVX2-NEXT:    movzbl %dl, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    sarb %cl, %sil
+; X64-AVX2-NEXT:    xorl %ecx, %ecx
+; X64-AVX2-NEXT:    testb %al, %al
+; X64-AVX2-NEXT:    sets %cl
+; X64-AVX2-NEXT:    addl $127, %ecx
+; X64-AVX2-NEXT:    cmpb %sil, %al
+; X64-AVX2-NEXT:    cmovel %edx, %ecx
+; X64-AVX2-NEXT:    vpinsrb $14, %ecx, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrb $15, %xmm1, %ecx
+; X64-AVX2-NEXT:    vpextrb $15, %xmm0, %eax
+; X64-AVX2-NEXT:    movl %eax, %edx
+; X64-AVX2-NEXT:    shlb %cl, %dl
+; X64-AVX2-NEXT:    movzbl %dl, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    sarb %cl, %sil
+; X64-AVX2-NEXT:    xorl %ecx, %ecx
+; X64-AVX2-NEXT:    testb %al, %al
+; X64-AVX2-NEXT:    sets %cl
+; X64-AVX2-NEXT:    addl $127, %ecx
+; X64-AVX2-NEXT:    cmpb %sil, %al
+; X64-AVX2-NEXT:    cmovel %edx, %ecx
+; X64-AVX2-NEXT:    vpinsrb $15, %ecx, %xmm2, %xmm0
+; X64-AVX2-NEXT:    retq
+;
+; X86-LABEL: vec_v16i8:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    subl $44, %esp
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-NEXT:    movb %ch, %bh
+; X86-NEXT:    shlb %cl, %bh
+; X86-NEXT:    movzbl %bh, %esi
+; X86-NEXT:    sarb %cl, %bh
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    testb %ch, %ch
+; X86-NEXT:    sets %al
+; X86-NEXT:    addl $127, %eax
+; X86-NEXT:    cmpb %bh, %ch
+; X86-NEXT:    cmovel %esi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %eax
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    shlb %cl, %al
+; X86-NEXT:    movzbl %al, %esi
+; X86-NEXT:    sarb %cl, %al
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    testb %bl, %bl
+; X86-NEXT:    sets %cl
+; X86-NEXT:    addl $127, %ecx
+; X86-NEXT:    cmpb %al, %bl
+; X86-NEXT:    cmovel %esi, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb %dh, %al
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    shlb %cl, %al
+; X86-NEXT:    movzbl %al, %esi
+; X86-NEXT:    sarb %cl, %al
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    testb %dh, %dh
+; X86-NEXT:    sets %cl
+; X86-NEXT:    addl $127, %ecx
+; X86-NEXT:    cmpb %al, %dh
+; X86-NEXT:    cmovel %esi, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %ah
+; X86-NEXT:    movb %ah, %al
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    shlb %cl, %al
+; X86-NEXT:    movzbl %al, %esi
+; X86-NEXT:    sarb %cl, %al
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    testb %ah, %ah
+; X86-NEXT:    sets %dl
+; X86-NEXT:    addl $127, %edx
+; X86-NEXT:    cmpb %al, %ah
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    cmovel %esi, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shlb %cl, %dl
+; X86-NEXT:    movzbl %dl, %esi
+; X86-NEXT:    sarb %cl, %dl
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    testb %al, %al
+; X86-NEXT:    sets %cl
+; X86-NEXT:    addl $127, %ecx
+; X86-NEXT:    cmpb %dl, %al
+; X86-NEXT:    cmovel %esi, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shlb %cl, %dl
+; X86-NEXT:    movzbl %dl, %esi
+; X86-NEXT:    sarb %cl, %dl
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    testb %al, %al
+; X86-NEXT:    sets %cl
+; X86-NEXT:    addl $127, %ecx
+; X86-NEXT:    cmpb %dl, %al
+; X86-NEXT:    cmovel %esi, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shlb %cl, %dl
+; X86-NEXT:    movzbl %dl, %esi
+; X86-NEXT:    sarb %cl, %dl
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    testb %al, %al
+; X86-NEXT:    sets %cl
+; X86-NEXT:    addl $127, %ecx
+; X86-NEXT:    cmpb %dl, %al
+; X86-NEXT:    cmovel %esi, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shlb %cl, %dl
+; X86-NEXT:    movzbl %dl, %esi
+; X86-NEXT:    sarb %cl, %dl
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    testb %al, %al
+; X86-NEXT:    sets %cl
+; X86-NEXT:    addl $127, %ecx
+; X86-NEXT:    cmpb %dl, %al
+; X86-NEXT:    cmovel %esi, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shlb %cl, %dl
+; X86-NEXT:    movzbl %dl, %esi
+; X86-NEXT:    sarb %cl, %dl
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    testb %al, %al
+; X86-NEXT:    sets %cl
+; X86-NEXT:    addl $127, %ecx
+; X86-NEXT:    cmpb %dl, %al
+; X86-NEXT:    cmovel %esi, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shlb %cl, %dl
+; X86-NEXT:    movzbl %dl, %esi
+; X86-NEXT:    sarb %cl, %dl
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    testb %al, %al
+; X86-NEXT:    sets %cl
+; X86-NEXT:    addl $127, %ecx
+; X86-NEXT:    cmpb %dl, %al
+; X86-NEXT:    cmovel %esi, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shlb %cl, %dl
+; X86-NEXT:    movzbl %dl, %esi
+; X86-NEXT:    sarb %cl, %dl
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    testb %al, %al
+; X86-NEXT:    sets %cl
+; X86-NEXT:    addl $127, %ecx
+; X86-NEXT:    cmpb %dl, %al
+; X86-NEXT:    cmovel %esi, %ecx
+; X86-NEXT:    movl %ecx, (%esp) # 4-byte Spill
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shlb %cl, %dl
+; X86-NEXT:    movzbl %dl, %esi
+; X86-NEXT:    sarb %cl, %dl
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    testb %al, %al
+; X86-NEXT:    sets %cl
+; X86-NEXT:    addl $127, %ecx
+; X86-NEXT:    cmpb %dl, %al
+; X86-NEXT:    cmovel %esi, %ecx
+; X86-NEXT:    movl %ecx, %ebp
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shlb %cl, %dl
+; X86-NEXT:    movzbl %dl, %esi
+; X86-NEXT:    sarb %cl, %dl
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    testb %al, %al
+; X86-NEXT:    sets %cl
+; X86-NEXT:    addl $127, %ecx
+; X86-NEXT:    cmpb %dl, %al
+; X86-NEXT:    cmovel %esi, %ecx
+; X86-NEXT:    movl %ecx, %edi
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shlb %cl, %dl
+; X86-NEXT:    movzbl %dl, %esi
+; X86-NEXT:    sarb %cl, %dl
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:    testb %al, %al
+; X86-NEXT:    sets %bl
+; X86-NEXT:    addl $127, %ebx
+; X86-NEXT:    cmpb %dl, %al
+; X86-NEXT:    cmovel %esi, %ebx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb %al, %ah
+; X86-NEXT:    shlb %cl, %ah
+; X86-NEXT:    movzbl %ah, %esi
+; X86-NEXT:    sarb %cl, %ah
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    testb %al, %al
+; X86-NEXT:    sets %dl
+; X86-NEXT:    addl $127, %edx
+; X86-NEXT:    cmpb %ah, %al
+; X86-NEXT:    cmovel %esi, %edx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb %al, %ah
+; X86-NEXT:    shlb %cl, %ah
+; X86-NEXT:    movzbl %ah, %esi
+; X86-NEXT:    sarb %cl, %ah
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    testb %al, %al
+; X86-NEXT:    sets %cl
+; X86-NEXT:    addl $127, %ecx
+; X86-NEXT:    cmpb %ah, %al
+; X86-NEXT:    cmovel %esi, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb %cl, 15(%eax)
+; X86-NEXT:    movb %dl, 14(%eax)
+; X86-NEXT:    movb %bl, 13(%eax)
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:    movb %cl, 12(%eax)
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:    movb %cl, 11(%eax)
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    movb %cl, 10(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movb %cl, 9(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movb %cl, 8(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movb %cl, 7(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movb %cl, 6(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movb %cl, 5(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movb %cl, 4(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movb %cl, 3(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movb %cl, 2(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movb %cl, 1(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movb %cl, (%eax)
+; X86-NEXT:    addl $44, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+  %tmp = call <16 x i8> @llvm.sshl.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
+  ret <16 x i8> %tmp
+}

diff  --git a/llvm/test/CodeGen/X86/ushl_sat_vec.ll b/llvm/test/CodeGen/X86/ushl_sat_vec.ll
index 871e9059c20cd..ad9fc85eaeb18 100644
--- a/llvm/test/CodeGen/X86/ushl_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/ushl_sat_vec.ll
@@ -1,11 +1,151 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx2 | FileCheck %s --check-prefix=X64-AVX2
 ; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86
 
-declare  <4 x i32> @llvm.ushl.sat.v4i32(<4 x i32>, <4 x i32>)
+declare <2 x i64> @llvm.ushl.sat.v2i64(<2 x i64>, <2 x i64>)
+declare <4 x i32> @llvm.ushl.sat.v4i32(<4 x i32>, <4 x i32>)
+declare <8 x i16> @llvm.ushl.sat.v8i16(<8 x i16>, <8 x i16>)
+declare <16 x i8> @llvm.ushl.sat.v16i8(<16 x i8>, <16 x i8>)
 
-define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
-; X64-LABEL: vec:
+define <2 x i64> @vec_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind {
+; X64-LABEL: vec_v2i64:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %xmm0, %rax
+; X64-NEXT:    movq %xmm1, %rcx
+; X64-NEXT:    movq %rax, %rdx
+; X64-NEXT:    shlq %cl, %rdx
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NEXT:    shrq %cl, %rsi
+; X64-NEXT:    cmpq %rsi, %rax
+; X64-NEXT:    movq $-1, %rax
+; X64-NEXT:    cmovneq %rax, %rdx
+; X64-NEXT:    movq %rdx, %xmm2
+; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; X64-NEXT:    movq %xmm0, %rdx
+; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
+; X64-NEXT:    movq %xmm0, %rcx
+; X64-NEXT:    movq %rdx, %rsi
+; X64-NEXT:    shlq %cl, %rsi
+; X64-NEXT:    movq %rsi, %rdi
+; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NEXT:    shrq %cl, %rdi
+; X64-NEXT:    cmpq %rdi, %rdx
+; X64-NEXT:    cmovneq %rax, %rsi
+; X64-NEXT:    movq %rsi, %xmm0
+; X64-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
+; X64-NEXT:    movdqa %xmm2, %xmm0
+; X64-NEXT:    retq
+;
+; X64-AVX2-LABEL: vec_v2i64:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vpextrq $1, %xmm0, %rax
+; X64-AVX2-NEXT:    vpextrq $1, %xmm1, %rcx
+; X64-AVX2-NEXT:    movq %rax, %rdx
+; X64-AVX2-NEXT:    shlq %cl, %rdx
+; X64-AVX2-NEXT:    movq %rdx, %rsi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-AVX2-NEXT:    shrq %cl, %rsi
+; X64-AVX2-NEXT:    cmpq %rsi, %rax
+; X64-AVX2-NEXT:    movq $-1, %rax
+; X64-AVX2-NEXT:    cmovneq %rax, %rdx
+; X64-AVX2-NEXT:    vmovq %rdx, %xmm2
+; X64-AVX2-NEXT:    vmovq %xmm0, %rdx
+; X64-AVX2-NEXT:    vmovq %xmm1, %rcx
+; X64-AVX2-NEXT:    movq %rdx, %rsi
+; X64-AVX2-NEXT:    shlq %cl, %rsi
+; X64-AVX2-NEXT:    movq %rsi, %rdi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-AVX2-NEXT:    shrq %cl, %rdi
+; X64-AVX2-NEXT:    cmpq %rdi, %rdx
+; X64-AVX2-NEXT:    cmovneq %rax, %rsi
+; X64-AVX2-NEXT:    vmovq %rsi, %xmm0
+; X64-AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; X64-AVX2-NEXT:    retq
+;
+; X86-LABEL: vec_v2i64:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    subl $16, %esp
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    shldl %cl, %esi, %edx
+; X86-NEXT:    xorl %ebx, %ebx
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    cmovnel %eax, %edx
+; X86-NEXT:    cmovnel %ebx, %eax
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    movl %edx, %ebp
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    cmovnel %ebx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    shldl %cl, %eax, %edx
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    cmovnel %esi, %edx
+; X86-NEXT:    cmovnel %ebx, %esi
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    testb $32, %cl
+; X86-NEXT:    cmovel %edi, %ebx
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shrdl %cl, %ebp, %eax
+; X86-NEXT:    testb $32, %ch
+; X86-NEXT:    cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %esi, %ebp
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    shrdl %cl, %edx, %ebp
+; X86-NEXT:    testb $32, {{[0-9]+}}(%esp)
+; X86-NEXT:    cmovnel %edi, %ebp
+; X86-NEXT:    xorl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    xorl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    movl $-1, %ecx
+; X86-NEXT:    movl (%esp), %edi # 4-byte Reload
+; X86-NEXT:    cmovnel %ecx, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmovnel %ecx, %eax
+; X86-NEXT:    xorl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    xorl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    orl %ebp, %ebx
+; X86-NEXT:    cmovnel %ecx, %esi
+; X86-NEXT:    cmovnel %ecx, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %edx, 12(%ecx)
+; X86-NEXT:    movl %esi, 8(%ecx)
+; X86-NEXT:    movl %eax, 4(%ecx)
+; X86-NEXT:    movl %edi, (%ecx)
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+  %tmp = call <2 x i64> @llvm.ushl.sat.v2i64(<2 x i64> %x, <2 x i64> %y)
+  ret <2 x i64> %tmp
+}
+
+define <4 x i32> @vec_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
+; X64-LABEL: vec_v4i32:
 ; X64:       # %bb.0:
 ; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[3,3,3,3]
 ; X64-NEXT:    movd %xmm2, %eax
@@ -60,7 +200,52 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X64-NEXT:    movdqa %xmm2, %xmm0
 ; X64-NEXT:    retq
 ;
-; X86-LABEL: vec:
+; X64-AVX2-LABEL: vec_v4i32:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vpextrd $1, %xmm0, %eax
+; X64-AVX2-NEXT:    vpextrd $1, %xmm1, %ecx
+; X64-AVX2-NEXT:    movl %eax, %edx
+; X64-AVX2-NEXT:    shll %cl, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shrl %cl, %esi
+; X64-AVX2-NEXT:    cmpl %esi, %eax
+; X64-AVX2-NEXT:    movl $-1, %eax
+; X64-AVX2-NEXT:    cmovnel %eax, %edx
+; X64-AVX2-NEXT:    vmovd %xmm0, %esi
+; X64-AVX2-NEXT:    vmovd %xmm1, %ecx
+; X64-AVX2-NEXT:    movl %esi, %edi
+; X64-AVX2-NEXT:    shll %cl, %edi
+; X64-AVX2-NEXT:    movl %edi, %r8d
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shrl %cl, %r8d
+; X64-AVX2-NEXT:    cmpl %r8d, %esi
+; X64-AVX2-NEXT:    cmovnel %eax, %edi
+; X64-AVX2-NEXT:    vmovd %edi, %xmm2
+; X64-AVX2-NEXT:    vpinsrd $1, %edx, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrd $2, %xmm0, %edx
+; X64-AVX2-NEXT:    vpextrd $2, %xmm1, %ecx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    shll %cl, %esi
+; X64-AVX2-NEXT:    movl %esi, %edi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shrl %cl, %edi
+; X64-AVX2-NEXT:    cmpl %edi, %edx
+; X64-AVX2-NEXT:    cmovnel %eax, %esi
+; X64-AVX2-NEXT:    vpinsrd $2, %esi, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrd $3, %xmm0, %edx
+; X64-AVX2-NEXT:    vpextrd $3, %xmm1, %ecx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    shll %cl, %esi
+; X64-AVX2-NEXT:    movl %esi, %edi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shrl %cl, %edi
+; X64-AVX2-NEXT:    cmpl %edi, %edx
+; X64-AVX2-NEXT:    cmovnel %eax, %esi
+; X64-AVX2-NEXT:    vpinsrd $3, %esi, %xmm2, %xmm0
+; X64-AVX2-NEXT:    retq
+;
+; X86-LABEL: vec_v4i32:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %ebp
 ; X86-NEXT:    pushl %ebx
@@ -114,3 +299,867 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
   %tmp = call <4 x i32> @llvm.ushl.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
   ret <4 x i32> %tmp
 }
+
+define <8 x i16> @vec_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
+; X64-LABEL: vec_v8i16:
+; X64:       # %bb.0:
+; X64-NEXT:    pextrw $7, %xmm0, %eax
+; X64-NEXT:    pextrw $7, %xmm1, %ecx
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    shll %cl, %edx
+; X64-NEXT:    movzwl %dx, %edx
+; X64-NEXT:    movl %edx, %esi
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shrl %cl, %esi
+; X64-NEXT:    cmpw %si, %ax
+; X64-NEXT:    movl $65535, %eax # imm = 0xFFFF
+; X64-NEXT:    cmovnel %eax, %edx
+; X64-NEXT:    movd %edx, %xmm2
+; X64-NEXT:    pextrw $6, %xmm0, %edx
+; X64-NEXT:    pextrw $6, %xmm1, %ecx
+; X64-NEXT:    movl %edx, %esi
+; X64-NEXT:    shll %cl, %esi
+; X64-NEXT:    movzwl %si, %esi
+; X64-NEXT:    movl %esi, %edi
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shrl %cl, %edi
+; X64-NEXT:    cmpw %di, %dx
+; X64-NEXT:    cmovnel %eax, %esi
+; X64-NEXT:    movd %esi, %xmm3
+; X64-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; X64-NEXT:    pextrw $5, %xmm0, %edx
+; X64-NEXT:    pextrw $5, %xmm1, %ecx
+; X64-NEXT:    movl %edx, %esi
+; X64-NEXT:    shll %cl, %esi
+; X64-NEXT:    movzwl %si, %esi
+; X64-NEXT:    movl %esi, %edi
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shrl %cl, %edi
+; X64-NEXT:    cmpw %di, %dx
+; X64-NEXT:    cmovnel %eax, %esi
+; X64-NEXT:    movd %esi, %xmm4
+; X64-NEXT:    pextrw $4, %xmm0, %edx
+; X64-NEXT:    pextrw $4, %xmm1, %ecx
+; X64-NEXT:    movl %edx, %esi
+; X64-NEXT:    shll %cl, %esi
+; X64-NEXT:    movzwl %si, %esi
+; X64-NEXT:    movl %esi, %edi
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shrl %cl, %edi
+; X64-NEXT:    cmpw %di, %dx
+; X64-NEXT:    cmovnel %eax, %esi
+; X64-NEXT:    movd %esi, %xmm2
+; X64-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
+; X64-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; X64-NEXT:    pextrw $3, %xmm0, %edx
+; X64-NEXT:    pextrw $3, %xmm1, %ecx
+; X64-NEXT:    movl %edx, %esi
+; X64-NEXT:    shll %cl, %esi
+; X64-NEXT:    movzwl %si, %esi
+; X64-NEXT:    movl %esi, %edi
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shrl %cl, %edi
+; X64-NEXT:    cmpw %di, %dx
+; X64-NEXT:    cmovnel %eax, %esi
+; X64-NEXT:    movd %esi, %xmm4
+; X64-NEXT:    pextrw $2, %xmm0, %edx
+; X64-NEXT:    pextrw $2, %xmm1, %ecx
+; X64-NEXT:    movl %edx, %esi
+; X64-NEXT:    shll %cl, %esi
+; X64-NEXT:    movzwl %si, %esi
+; X64-NEXT:    movl %esi, %edi
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shrl %cl, %edi
+; X64-NEXT:    cmpw %di, %dx
+; X64-NEXT:    cmovnel %eax, %esi
+; X64-NEXT:    movd %esi, %xmm3
+; X64-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
+; X64-NEXT:    pextrw $1, %xmm0, %edx
+; X64-NEXT:    pextrw $1, %xmm1, %ecx
+; X64-NEXT:    movl %edx, %esi
+; X64-NEXT:    shll %cl, %esi
+; X64-NEXT:    movzwl %si, %esi
+; X64-NEXT:    movl %esi, %edi
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shrl %cl, %edi
+; X64-NEXT:    cmpw %di, %dx
+; X64-NEXT:    cmovnel %eax, %esi
+; X64-NEXT:    movd %esi, %xmm4
+; X64-NEXT:    movd %xmm0, %edx
+; X64-NEXT:    movd %xmm1, %ecx
+; X64-NEXT:    movl %edx, %esi
+; X64-NEXT:    shll %cl, %esi
+; X64-NEXT:    movzwl %si, %esi
+; X64-NEXT:    movl %esi, %edi
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    shrl %cl, %edi
+; X64-NEXT:    cmpw %di, %dx
+; X64-NEXT:    cmovnel %eax, %esi
+; X64-NEXT:    movd %esi, %xmm0
+; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
+; X64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X64-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; X64-NEXT:    retq
+;
+; X64-AVX2-LABEL: vec_v8i16:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vpextrw $1, %xmm0, %eax
+; X64-AVX2-NEXT:    vpextrw $1, %xmm1, %ecx
+; X64-AVX2-NEXT:    movl %eax, %edx
+; X64-AVX2-NEXT:    shll %cl, %edx
+; X64-AVX2-NEXT:    movzwl %dx, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shrl %cl, %esi
+; X64-AVX2-NEXT:    cmpw %si, %ax
+; X64-AVX2-NEXT:    movl $65535, %eax # imm = 0xFFFF
+; X64-AVX2-NEXT:    cmovnel %eax, %edx
+; X64-AVX2-NEXT:    vmovd %xmm0, %esi
+; X64-AVX2-NEXT:    vmovd %xmm1, %ecx
+; X64-AVX2-NEXT:    movl %esi, %edi
+; X64-AVX2-NEXT:    shll %cl, %edi
+; X64-AVX2-NEXT:    movzwl %di, %edi
+; X64-AVX2-NEXT:    movl %edi, %r8d
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shrl %cl, %r8d
+; X64-AVX2-NEXT:    cmpw %r8w, %si
+; X64-AVX2-NEXT:    cmovnel %eax, %edi
+; X64-AVX2-NEXT:    vmovd %edi, %xmm2
+; X64-AVX2-NEXT:    vpinsrw $1, %edx, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrw $2, %xmm0, %edx
+; X64-AVX2-NEXT:    vpextrw $2, %xmm1, %ecx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    shll %cl, %esi
+; X64-AVX2-NEXT:    movzwl %si, %esi
+; X64-AVX2-NEXT:    movl %esi, %edi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shrl %cl, %edi
+; X64-AVX2-NEXT:    cmpw %di, %dx
+; X64-AVX2-NEXT:    cmovnel %eax, %esi
+; X64-AVX2-NEXT:    vpinsrw $2, %esi, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrw $3, %xmm0, %edx
+; X64-AVX2-NEXT:    vpextrw $3, %xmm1, %ecx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    shll %cl, %esi
+; X64-AVX2-NEXT:    movzwl %si, %esi
+; X64-AVX2-NEXT:    movl %esi, %edi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shrl %cl, %edi
+; X64-AVX2-NEXT:    cmpw %di, %dx
+; X64-AVX2-NEXT:    cmovnel %eax, %esi
+; X64-AVX2-NEXT:    vpinsrw $3, %esi, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrw $4, %xmm0, %edx
+; X64-AVX2-NEXT:    vpextrw $4, %xmm1, %ecx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    shll %cl, %esi
+; X64-AVX2-NEXT:    movzwl %si, %esi
+; X64-AVX2-NEXT:    movl %esi, %edi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shrl %cl, %edi
+; X64-AVX2-NEXT:    cmpw %di, %dx
+; X64-AVX2-NEXT:    cmovnel %eax, %esi
+; X64-AVX2-NEXT:    vpinsrw $4, %esi, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrw $5, %xmm0, %edx
+; X64-AVX2-NEXT:    vpextrw $5, %xmm1, %ecx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    shll %cl, %esi
+; X64-AVX2-NEXT:    movzwl %si, %esi
+; X64-AVX2-NEXT:    movl %esi, %edi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shrl %cl, %edi
+; X64-AVX2-NEXT:    cmpw %di, %dx
+; X64-AVX2-NEXT:    cmovnel %eax, %esi
+; X64-AVX2-NEXT:    vpinsrw $5, %esi, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrw $6, %xmm0, %edx
+; X64-AVX2-NEXT:    vpextrw $6, %xmm1, %ecx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    shll %cl, %esi
+; X64-AVX2-NEXT:    movzwl %si, %esi
+; X64-AVX2-NEXT:    movl %esi, %edi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shrl %cl, %edi
+; X64-AVX2-NEXT:    cmpw %di, %dx
+; X64-AVX2-NEXT:    cmovnel %eax, %esi
+; X64-AVX2-NEXT:    vpinsrw $6, %esi, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrw $7, %xmm0, %edx
+; X64-AVX2-NEXT:    vpextrw $7, %xmm1, %ecx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    shll %cl, %esi
+; X64-AVX2-NEXT:    movzwl %si, %esi
+; X64-AVX2-NEXT:    movl %esi, %edi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shrl %cl, %edi
+; X64-AVX2-NEXT:    cmpw %di, %dx
+; X64-AVX2-NEXT:    cmovnel %eax, %esi
+; X64-AVX2-NEXT:    vpinsrw $7, %esi, %xmm2, %xmm0
+; X64-AVX2-NEXT:    retq
+;
+; X86-LABEL: vec_v8i16:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    subl $12, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    movzwl %di, %ebx
+; X86-NEXT:    movl %ebx, %edi
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    cmpw %di, %ax
+; X86-NEXT:    movl $65535, %eax # imm = 0xFFFF
+; X86-NEXT:    cmovnel %eax, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %esi, %edi
+; X86-NEXT:    movb %ch, %cl
+; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    movzwl %di, %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    shrl %cl, %edi
+; X86-NEXT:    cmpw %di, %si
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $65535, %edi # imm = 0xFFFF
+; X86-NEXT:    cmovnel %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movzwl %si, %eax
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    cmpw %si, %dx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    cmovnel %edi, %eax
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    movl $65535, %esi # imm = 0xFFFF
+; X86-NEXT:    movl %ebp, %eax
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movzwl %dx, %ebp
+; X86-NEXT:    movl %ebp, %edx
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    cmpw %dx, %ax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    cmovnel %esi, %ebp
+; X86-NEXT:    movl $65535, %eax # imm = 0xFFFF
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movzwl %si, %ebx
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    cmpw %si, %dx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    cmovnel %eax, %ebx
+; X86-NEXT:    movl $65535, %eax # imm = 0xFFFF
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    shll %cl, %esi
+; X86-NEXT:    movzwl %si, %edi
+; X86-NEXT:    movl %edi, %esi
+; X86-NEXT:    shrl %cl, %esi
+; X86-NEXT:    cmpw %si, %dx
+; X86-NEXT:    cmovnel %eax, %edi
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movzwl %dx, %esi
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    cmpw %dx, %ax
+; X86-NEXT:    movl $65535, %eax # imm = 0xFFFF
+; X86-NEXT:    cmovnel %eax, %esi
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    movzwl %dx, %edx
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    cmpw %ax, %cx
+; X86-NEXT:    movl $65535, %eax # imm = 0xFFFF
+; X86-NEXT:    cmovnel %eax, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movw %dx, 14(%eax)
+; X86-NEXT:    movw %si, 12(%eax)
+; X86-NEXT:    movw %di, 10(%eax)
+; X86-NEXT:    movw %bx, 8(%eax)
+; X86-NEXT:    movw %bp, 6(%eax)
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    movw %cx, 4(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movw %cx, 2(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movw %cx, (%eax)
+; X86-NEXT:    addl $12, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+  %tmp = call <8 x i16> @llvm.ushl.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
+  ret <8 x i16> %tmp
+}
+
+define <16 x i8> @vec_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
+; X64-LABEL: vec_v16i8:
+; X64:       # %bb.0:
+; X64-NEXT:    movaps %xmm1, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movl %eax, %edx
+; X64-NEXT:    shlb %cl, %dl
+; X64-NEXT:    movzbl %dl, %esi
+; X64-NEXT:    movl %esi, %edx
+; X64-NEXT:    shrb %cl, %dl
+; X64-NEXT:    cmpb %dl, %al
+; X64-NEXT:    movl $255, %edx
+; X64-NEXT:    cmovnel %edx, %esi
+; X64-NEXT:    movd %esi, %xmm0
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    shlb %cl, %sil
+; X64-NEXT:    movzbl %sil, %esi
+; X64-NEXT:    movl %esi, %edi
+; X64-NEXT:    shrb %cl, %dil
+; X64-NEXT:    cmpb %dil, %al
+; X64-NEXT:    cmovnel %edx, %esi
+; X64-NEXT:    movd %esi, %xmm1
+; X64-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    shlb %cl, %sil
+; X64-NEXT:    movzbl %sil, %esi
+; X64-NEXT:    movl %esi, %edi
+; X64-NEXT:    shrb %cl, %dil
+; X64-NEXT:    cmpb %dil, %al
+; X64-NEXT:    cmovnel %edx, %esi
+; X64-NEXT:    movd %esi, %xmm2
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    shlb %cl, %sil
+; X64-NEXT:    movzbl %sil, %esi
+; X64-NEXT:    movl %esi, %edi
+; X64-NEXT:    shrb %cl, %dil
+; X64-NEXT:    cmpb %dil, %al
+; X64-NEXT:    cmovnel %edx, %esi
+; X64-NEXT:    movd %esi, %xmm0
+; X64-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    shlb %cl, %sil
+; X64-NEXT:    movzbl %sil, %esi
+; X64-NEXT:    movl %esi, %edi
+; X64-NEXT:    shrb %cl, %dil
+; X64-NEXT:    cmpb %dil, %al
+; X64-NEXT:    cmovnel %edx, %esi
+; X64-NEXT:    movd %esi, %xmm1
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    shlb %cl, %sil
+; X64-NEXT:    movzbl %sil, %esi
+; X64-NEXT:    movl %esi, %edi
+; X64-NEXT:    shrb %cl, %dil
+; X64-NEXT:    cmpb %dil, %al
+; X64-NEXT:    cmovnel %edx, %esi
+; X64-NEXT:    movd %esi, %xmm2
+; X64-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    shlb %cl, %sil
+; X64-NEXT:    movzbl %sil, %esi
+; X64-NEXT:    movl %esi, %edi
+; X64-NEXT:    shrb %cl, %dil
+; X64-NEXT:    cmpb %dil, %al
+; X64-NEXT:    cmovnel %edx, %esi
+; X64-NEXT:    movd %esi, %xmm3
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    shlb %cl, %sil
+; X64-NEXT:    movzbl %sil, %esi
+; X64-NEXT:    movl %esi, %edi
+; X64-NEXT:    shrb %cl, %dil
+; X64-NEXT:    cmpb %dil, %al
+; X64-NEXT:    cmovnel %edx, %esi
+; X64-NEXT:    movd %esi, %xmm1
+; X64-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
+; X64-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; X64-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    shlb %cl, %sil
+; X64-NEXT:    movzbl %sil, %esi
+; X64-NEXT:    movl %esi, %edi
+; X64-NEXT:    shrb %cl, %dil
+; X64-NEXT:    cmpb %dil, %al
+; X64-NEXT:    cmovnel %edx, %esi
+; X64-NEXT:    movd %esi, %xmm2
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    shlb %cl, %sil
+; X64-NEXT:    movzbl %sil, %esi
+; X64-NEXT:    movl %esi, %edi
+; X64-NEXT:    shrb %cl, %dil
+; X64-NEXT:    cmpb %dil, %al
+; X64-NEXT:    cmovnel %edx, %esi
+; X64-NEXT:    movd %esi, %xmm0
+; X64-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    shlb %cl, %sil
+; X64-NEXT:    movzbl %sil, %esi
+; X64-NEXT:    movl %esi, %edi
+; X64-NEXT:    shrb %cl, %dil
+; X64-NEXT:    cmpb %dil, %al
+; X64-NEXT:    cmovnel %edx, %esi
+; X64-NEXT:    movd %esi, %xmm3
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    shlb %cl, %sil
+; X64-NEXT:    movzbl %sil, %esi
+; X64-NEXT:    movl %esi, %edi
+; X64-NEXT:    shrb %cl, %dil
+; X64-NEXT:    cmpb %dil, %al
+; X64-NEXT:    cmovnel %edx, %esi
+; X64-NEXT:    movd %esi, %xmm2
+; X64-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
+; X64-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    shlb %cl, %sil
+; X64-NEXT:    movzbl %sil, %esi
+; X64-NEXT:    movl %esi, %edi
+; X64-NEXT:    shrb %cl, %dil
+; X64-NEXT:    cmpb %dil, %al
+; X64-NEXT:    cmovnel %edx, %esi
+; X64-NEXT:    movd %esi, %xmm0
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movl %eax, %esi
+; X64-NEXT:    shlb %cl, %sil
+; X64-NEXT:    movzbl %sil, %esi
+; X64-NEXT:    movl %esi, %edi
+; X64-NEXT:    shrb %cl, %dil
+; X64-NEXT:    cmpb %dil, %al
+; X64-NEXT:    cmovnel %edx, %esi
+; X64-NEXT:    movd %esi, %xmm3
+; X64-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %esi
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edi
+; X64-NEXT:    movl %edi, %r8d
+; X64-NEXT:    shlb %cl, %r8b
+; X64-NEXT:    movzbl %r8b, %r8d
+; X64-NEXT:    movl %r8d, %r9d
+; X64-NEXT:    shrb %cl, %r9b
+; X64-NEXT:    cmpb %r9b, %dil
+; X64-NEXT:    cmovnel %edx, %r8d
+; X64-NEXT:    movd %r8d, %xmm4
+; X64-NEXT:    movl %esi, %edi
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    shlb %cl, %dil
+; X64-NEXT:    movzbl %dil, %edi
+; X64-NEXT:    movl %edi, %r8d
+; X64-NEXT:    shrb %cl, %r8b
+; X64-NEXT:    cmpb %r8b, %sil
+; X64-NEXT:    cmovnel %edx, %edi
+; X64-NEXT:    movd %edi, %xmm0
+; X64-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; X64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X64-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-NEXT:    retq
+;
+; X64-AVX2-LABEL: vec_v16i8:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vpextrb $1, %xmm1, %ecx
+; X64-AVX2-NEXT:    vpextrb $1, %xmm0, %eax
+; X64-AVX2-NEXT:    movl %eax, %edx
+; X64-AVX2-NEXT:    shlb %cl, %dl
+; X64-AVX2-NEXT:    movzbl %dl, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shrb %cl, %sil
+; X64-AVX2-NEXT:    cmpb %sil, %al
+; X64-AVX2-NEXT:    movl $255, %eax
+; X64-AVX2-NEXT:    cmovnel %eax, %edx
+; X64-AVX2-NEXT:    vmovd %xmm1, %ecx
+; X64-AVX2-NEXT:    vmovd %xmm0, %esi
+; X64-AVX2-NEXT:    movl %esi, %edi
+; X64-AVX2-NEXT:    shlb %cl, %dil
+; X64-AVX2-NEXT:    movzbl %dil, %edi
+; X64-AVX2-NEXT:    movl %edi, %r8d
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shrb %cl, %r8b
+; X64-AVX2-NEXT:    cmpb %r8b, %sil
+; X64-AVX2-NEXT:    cmovnel %eax, %edi
+; X64-AVX2-NEXT:    vmovd %edi, %xmm2
+; X64-AVX2-NEXT:    vpinsrb $1, %edx, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrb $2, %xmm1, %ecx
+; X64-AVX2-NEXT:    vpextrb $2, %xmm0, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    shlb %cl, %sil
+; X64-AVX2-NEXT:    movzbl %sil, %esi
+; X64-AVX2-NEXT:    movl %esi, %edi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shrb %cl, %dil
+; X64-AVX2-NEXT:    cmpb %dil, %dl
+; X64-AVX2-NEXT:    cmovnel %eax, %esi
+; X64-AVX2-NEXT:    vpinsrb $2, %esi, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrb $3, %xmm1, %ecx
+; X64-AVX2-NEXT:    vpextrb $3, %xmm0, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    shlb %cl, %sil
+; X64-AVX2-NEXT:    movzbl %sil, %esi
+; X64-AVX2-NEXT:    movl %esi, %edi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shrb %cl, %dil
+; X64-AVX2-NEXT:    cmpb %dil, %dl
+; X64-AVX2-NEXT:    cmovnel %eax, %esi
+; X64-AVX2-NEXT:    vpinsrb $3, %esi, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrb $4, %xmm1, %ecx
+; X64-AVX2-NEXT:    vpextrb $4, %xmm0, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    shlb %cl, %sil
+; X64-AVX2-NEXT:    movzbl %sil, %esi
+; X64-AVX2-NEXT:    movl %esi, %edi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shrb %cl, %dil
+; X64-AVX2-NEXT:    cmpb %dil, %dl
+; X64-AVX2-NEXT:    cmovnel %eax, %esi
+; X64-AVX2-NEXT:    vpinsrb $4, %esi, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrb $5, %xmm1, %ecx
+; X64-AVX2-NEXT:    vpextrb $5, %xmm0, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    shlb %cl, %sil
+; X64-AVX2-NEXT:    movzbl %sil, %esi
+; X64-AVX2-NEXT:    movl %esi, %edi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shrb %cl, %dil
+; X64-AVX2-NEXT:    cmpb %dil, %dl
+; X64-AVX2-NEXT:    cmovnel %eax, %esi
+; X64-AVX2-NEXT:    vpinsrb $5, %esi, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrb $6, %xmm1, %ecx
+; X64-AVX2-NEXT:    vpextrb $6, %xmm0, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    shlb %cl, %sil
+; X64-AVX2-NEXT:    movzbl %sil, %esi
+; X64-AVX2-NEXT:    movl %esi, %edi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shrb %cl, %dil
+; X64-AVX2-NEXT:    cmpb %dil, %dl
+; X64-AVX2-NEXT:    cmovnel %eax, %esi
+; X64-AVX2-NEXT:    vpinsrb $6, %esi, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrb $7, %xmm1, %ecx
+; X64-AVX2-NEXT:    vpextrb $7, %xmm0, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    shlb %cl, %sil
+; X64-AVX2-NEXT:    movzbl %sil, %esi
+; X64-AVX2-NEXT:    movl %esi, %edi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shrb %cl, %dil
+; X64-AVX2-NEXT:    cmpb %dil, %dl
+; X64-AVX2-NEXT:    cmovnel %eax, %esi
+; X64-AVX2-NEXT:    vpinsrb $7, %esi, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrb $8, %xmm1, %ecx
+; X64-AVX2-NEXT:    vpextrb $8, %xmm0, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    shlb %cl, %sil
+; X64-AVX2-NEXT:    movzbl %sil, %esi
+; X64-AVX2-NEXT:    movl %esi, %edi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shrb %cl, %dil
+; X64-AVX2-NEXT:    cmpb %dil, %dl
+; X64-AVX2-NEXT:    cmovnel %eax, %esi
+; X64-AVX2-NEXT:    vpinsrb $8, %esi, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrb $9, %xmm1, %ecx
+; X64-AVX2-NEXT:    vpextrb $9, %xmm0, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    shlb %cl, %sil
+; X64-AVX2-NEXT:    movzbl %sil, %esi
+; X64-AVX2-NEXT:    movl %esi, %edi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shrb %cl, %dil
+; X64-AVX2-NEXT:    cmpb %dil, %dl
+; X64-AVX2-NEXT:    cmovnel %eax, %esi
+; X64-AVX2-NEXT:    vpinsrb $9, %esi, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrb $10, %xmm1, %ecx
+; X64-AVX2-NEXT:    vpextrb $10, %xmm0, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    shlb %cl, %sil
+; X64-AVX2-NEXT:    movzbl %sil, %esi
+; X64-AVX2-NEXT:    movl %esi, %edi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shrb %cl, %dil
+; X64-AVX2-NEXT:    cmpb %dil, %dl
+; X64-AVX2-NEXT:    cmovnel %eax, %esi
+; X64-AVX2-NEXT:    vpinsrb $10, %esi, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrb $11, %xmm1, %ecx
+; X64-AVX2-NEXT:    vpextrb $11, %xmm0, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    shlb %cl, %sil
+; X64-AVX2-NEXT:    movzbl %sil, %esi
+; X64-AVX2-NEXT:    movl %esi, %edi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shrb %cl, %dil
+; X64-AVX2-NEXT:    cmpb %dil, %dl
+; X64-AVX2-NEXT:    cmovnel %eax, %esi
+; X64-AVX2-NEXT:    vpinsrb $11, %esi, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrb $12, %xmm1, %ecx
+; X64-AVX2-NEXT:    vpextrb $12, %xmm0, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    shlb %cl, %sil
+; X64-AVX2-NEXT:    movzbl %sil, %esi
+; X64-AVX2-NEXT:    movl %esi, %edi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shrb %cl, %dil
+; X64-AVX2-NEXT:    cmpb %dil, %dl
+; X64-AVX2-NEXT:    cmovnel %eax, %esi
+; X64-AVX2-NEXT:    vpinsrb $12, %esi, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrb $13, %xmm1, %ecx
+; X64-AVX2-NEXT:    vpextrb $13, %xmm0, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    shlb %cl, %sil
+; X64-AVX2-NEXT:    movzbl %sil, %esi
+; X64-AVX2-NEXT:    movl %esi, %edi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shrb %cl, %dil
+; X64-AVX2-NEXT:    cmpb %dil, %dl
+; X64-AVX2-NEXT:    cmovnel %eax, %esi
+; X64-AVX2-NEXT:    vpinsrb $13, %esi, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrb $14, %xmm1, %ecx
+; X64-AVX2-NEXT:    vpextrb $14, %xmm0, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    shlb %cl, %sil
+; X64-AVX2-NEXT:    movzbl %sil, %esi
+; X64-AVX2-NEXT:    movl %esi, %edi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shrb %cl, %dil
+; X64-AVX2-NEXT:    cmpb %dil, %dl
+; X64-AVX2-NEXT:    cmovnel %eax, %esi
+; X64-AVX2-NEXT:    vpinsrb $14, %esi, %xmm2, %xmm2
+; X64-AVX2-NEXT:    vpextrb $15, %xmm1, %ecx
+; X64-AVX2-NEXT:    vpextrb $15, %xmm0, %edx
+; X64-AVX2-NEXT:    movl %edx, %esi
+; X64-AVX2-NEXT:    shlb %cl, %sil
+; X64-AVX2-NEXT:    movzbl %sil, %esi
+; X64-AVX2-NEXT:    movl %esi, %edi
+; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX2-NEXT:    shrb %cl, %dil
+; X64-AVX2-NEXT:    cmpb %dil, %dl
+; X64-AVX2-NEXT:    cmovnel %eax, %esi
+; X64-AVX2-NEXT:    vpinsrb $15, %esi, %xmm2, %xmm0
+; X64-AVX2-NEXT:    retq
+;
+; X86-LABEL: vec_v16i8:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    subl $48, %esp
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %ah
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movb %bl, %bh
+; X86-NEXT:    shlb %cl, %bh
+; X86-NEXT:    movzbl %bh, %edi
+; X86-NEXT:    shrb %cl, %bh
+; X86-NEXT:    cmpb %bh, %bl
+; X86-NEXT:    movl $255, %esi
+; X86-NEXT:    cmovnel %esi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb %dh, %bl
+; X86-NEXT:    movb %ah, %cl
+; X86-NEXT:    shlb %cl, %bl
+; X86-NEXT:    movzbl %bl, %edi
+; X86-NEXT:    shrb %cl, %bl
+; X86-NEXT:    cmpb %bl, %dh
+; X86-NEXT:    cmovnel %esi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb %ch, %ah
+; X86-NEXT:    movb %dl, %cl
+; X86-NEXT:    shlb %cl, %ah
+; X86-NEXT:    movzbl %ah, %edi
+; X86-NEXT:    shrb %cl, %ah
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    cmpb %ah, %ch
+; X86-NEXT:    cmovnel %esi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movb %dl, %ah
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    shlb %cl, %ah
+; X86-NEXT:    movzbl %ah, %edi
+; X86-NEXT:    shrb %cl, %ah
+; X86-NEXT:    cmpb %ah, %dl
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    cmovnel %esi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shlb %cl, %dl
+; X86-NEXT:    movzbl %dl, %edi
+; X86-NEXT:    shrb %cl, %dl
+; X86-NEXT:    cmpb %dl, %al
+; X86-NEXT:    cmovnel %esi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shlb %cl, %dl
+; X86-NEXT:    movzbl %dl, %edi
+; X86-NEXT:    shrb %cl, %dl
+; X86-NEXT:    cmpb %dl, %al
+; X86-NEXT:    cmovnel %esi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shlb %cl, %dl
+; X86-NEXT:    movzbl %dl, %edi
+; X86-NEXT:    shrb %cl, %dl
+; X86-NEXT:    cmpb %dl, %al
+; X86-NEXT:    cmovnel %esi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shlb %cl, %dl
+; X86-NEXT:    movzbl %dl, %edi
+; X86-NEXT:    shrb %cl, %dl
+; X86-NEXT:    cmpb %dl, %al
+; X86-NEXT:    cmovnel %esi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shlb %cl, %dl
+; X86-NEXT:    movzbl %dl, %edi
+; X86-NEXT:    shrb %cl, %dl
+; X86-NEXT:    cmpb %dl, %al
+; X86-NEXT:    cmovnel %esi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shlb %cl, %dl
+; X86-NEXT:    movzbl %dl, %edi
+; X86-NEXT:    shrb %cl, %dl
+; X86-NEXT:    cmpb %dl, %al
+; X86-NEXT:    cmovnel %esi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shlb %cl, %dl
+; X86-NEXT:    movzbl %dl, %edi
+; X86-NEXT:    shrb %cl, %dl
+; X86-NEXT:    cmpb %dl, %al
+; X86-NEXT:    cmovnel %esi, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shlb %cl, %dl
+; X86-NEXT:    movzbl %dl, %edi
+; X86-NEXT:    shrb %cl, %dl
+; X86-NEXT:    cmpb %dl, %al
+; X86-NEXT:    cmovnel %esi, %edi
+; X86-NEXT:    movl %edi, (%esp) # 4-byte Spill
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shlb %cl, %dl
+; X86-NEXT:    movzbl %dl, %ebp
+; X86-NEXT:    shrb %cl, %dl
+; X86-NEXT:    cmpb %dl, %al
+; X86-NEXT:    cmovnel %esi, %ebp
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shlb %cl, %dl
+; X86-NEXT:    movzbl %dl, %edi
+; X86-NEXT:    shrb %cl, %dl
+; X86-NEXT:    cmpb %dl, %al
+; X86-NEXT:    cmovnel %esi, %edi
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    shlb %cl, %dl
+; X86-NEXT:    movzbl %dl, %ebx
+; X86-NEXT:    shrb %cl, %dl
+; X86-NEXT:    cmpb %dl, %al
+; X86-NEXT:    cmovnel %esi, %ebx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb %al, %ah
+; X86-NEXT:    shlb %cl, %ah
+; X86-NEXT:    movzbl %ah, %edx
+; X86-NEXT:    shrb %cl, %ah
+; X86-NEXT:    cmpb %ah, %al
+; X86-NEXT:    cmovnel %esi, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movb %dl, 15(%eax)
+; X86-NEXT:    movb %bl, 14(%eax)
+; X86-NEXT:    movl %edi, %ecx
+; X86-NEXT:    movb %cl, 13(%eax)
+; X86-NEXT:    movl %ebp, %ecx
+; X86-NEXT:    movb %cl, 12(%eax)
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    movb %cl, 11(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movb %cl, 10(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movb %cl, 9(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movb %cl, 8(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movb %cl, 7(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movb %cl, 6(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movb %cl, 5(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movb %cl, 4(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movb %cl, 3(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movb %cl, 2(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movb %cl, 1(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movb %cl, (%eax)
+; X86-NEXT:    addl $48, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+  %tmp = call <16 x i8> @llvm.ushl.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
+  ret <16 x i8> %tmp
+}


        


More information about the llvm-commits mailing list