[llvm] 65292fe - [X86] Add SSE2+SSE3 common check prefix to psubus tests

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 15 06:07:29 PST 2021


Author: Simon Pilgrim
Date: 2021-02-15T14:07:11Z
New Revision: 65292fe3a2101a5ce9b01f089cdc077320e53b13

URL: https://github.com/llvm/llvm-project/commit/65292fe3a2101a5ce9b01f089cdc077320e53b13
DIFF: https://github.com/llvm/llvm-project/commit/65292fe3a2101a5ce9b01f089cdc077320e53b13.diff

LOG: [X86] Add SSE2+SSE3 common check prefix to psubus tests

Noticed by @pengfei on D96703

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/psubus.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/psubus.ll b/llvm/test/CodeGen/X86/psubus.ll
index 098ecc158e0c..d2d8176838fc 100644
--- a/llvm/test/CodeGen/X86/psubus.ll
+++ b/llvm/test/CodeGen/X86/psubus.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2OR3,SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSE2OR3,SSSE3
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW
@@ -602,95 +602,50 @@ vector.ph:
 
 ; FIXME: match this to UMIN+TRUNC+PSUBUS
 define <16 x i8> @test14(<16 x i8> %x, <16 x i32> %y) nounwind {
-; SSE2-LABEL: test14:
-; SSE2:       # %bb.0: # %vector.ph
-; SSE2-NEXT:    pxor %xmm8, %xmm8
-; SSE2-NEXT:    movdqa %xmm0, %xmm6
-; SSE2-NEXT:    movdqa %xmm4, %xmm9
-; SSE2-NEXT:    movdqa %xmm3, %xmm10
-; SSE2-NEXT:    movdqa %xmm2, %xmm7
-; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
-; SSE2-NEXT:    pand %xmm5, %xmm4
-; SSE2-NEXT:    pand %xmm5, %xmm3
-; SSE2-NEXT:    packuswb %xmm4, %xmm3
-; SSE2-NEXT:    movdqa %xmm1, %xmm4
-; SSE2-NEXT:    pand %xmm5, %xmm2
-; SSE2-NEXT:    pand %xmm5, %xmm1
-; SSE2-NEXT:    packuswb %xmm2, %xmm1
-; SSE2-NEXT:    packuswb %xmm3, %xmm1
-; SSE2-NEXT:    psubb %xmm0, %xmm1
-; SSE2-NEXT:    movdqa %xmm0, %xmm2
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1],xmm2[2],xmm8[2],xmm2[3],xmm8[3],xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7]
-; SSE2-NEXT:    movdqa %xmm2, %xmm0
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm8[0],xmm0[1],xmm8[1],xmm0[2],xmm8[2],xmm0[3],xmm8[3]
-; SSE2-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7]
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm8[8],xmm6[9],xmm8[9],xmm6[10],xmm8[10],xmm6[11],xmm8[11],xmm6[12],xmm8[12],xmm6[13],xmm8[13],xmm6[14],xmm8[14],xmm6[15],xmm8[15]
-; SSE2-NEXT:    movdqa %xmm6, %xmm3
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3]
-; SSE2-NEXT:    punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm8[4],xmm6[5],xmm8[5],xmm6[6],xmm8[6],xmm6[7],xmm8[7]
-; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
-; SSE2-NEXT:    pxor %xmm5, %xmm9
-; SSE2-NEXT:    por %xmm5, %xmm6
-; SSE2-NEXT:    pcmpgtd %xmm9, %xmm6
-; SSE2-NEXT:    pxor %xmm5, %xmm10
-; SSE2-NEXT:    por %xmm5, %xmm3
-; SSE2-NEXT:    pcmpgtd %xmm10, %xmm3
-; SSE2-NEXT:    packssdw %xmm6, %xmm3
-; SSE2-NEXT:    pxor %xmm5, %xmm7
-; SSE2-NEXT:    por %xmm5, %xmm2
-; SSE2-NEXT:    pcmpgtd %xmm7, %xmm2
-; SSE2-NEXT:    pxor %xmm5, %xmm4
-; SSE2-NEXT:    por %xmm5, %xmm0
-; SSE2-NEXT:    pcmpgtd %xmm4, %xmm0
-; SSE2-NEXT:    packssdw %xmm2, %xmm0
-; SSE2-NEXT:    packsswb %xmm3, %xmm0
-; SSE2-NEXT:    pandn %xmm1, %xmm0
-; SSE2-NEXT:    retq
-;
-; SSSE3-LABEL: test14:
-; SSSE3:       # %bb.0: # %vector.ph
-; SSSE3-NEXT:    pxor %xmm8, %xmm8
-; SSSE3-NEXT:    movdqa %xmm0, %xmm6
-; SSSE3-NEXT:    movdqa %xmm4, %xmm9
-; SSSE3-NEXT:    movdqa %xmm3, %xmm10
-; SSSE3-NEXT:    movdqa %xmm2, %xmm7
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
-; SSSE3-NEXT:    pand %xmm5, %xmm4
-; SSSE3-NEXT:    pand %xmm5, %xmm3
-; SSSE3-NEXT:    packuswb %xmm4, %xmm3
-; SSSE3-NEXT:    movdqa %xmm1, %xmm4
-; SSSE3-NEXT:    pand %xmm5, %xmm2
-; SSSE3-NEXT:    pand %xmm5, %xmm1
-; SSSE3-NEXT:    packuswb %xmm2, %xmm1
-; SSSE3-NEXT:    packuswb %xmm3, %xmm1
-; SSSE3-NEXT:    psubb %xmm0, %xmm1
-; SSSE3-NEXT:    movdqa %xmm0, %xmm2
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1],xmm2[2],xmm8[2],xmm2[3],xmm8[3],xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7]
-; SSSE3-NEXT:    movdqa %xmm2, %xmm0
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm8[0],xmm0[1],xmm8[1],xmm0[2],xmm8[2],xmm0[3],xmm8[3]
-; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7]
-; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm8[8],xmm6[9],xmm8[9],xmm6[10],xmm8[10],xmm6[11],xmm8[11],xmm6[12],xmm8[12],xmm6[13],xmm8[13],xmm6[14],xmm8[14],xmm6[15],xmm8[15]
-; SSSE3-NEXT:    movdqa %xmm6, %xmm3
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3]
-; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm8[4],xmm6[5],xmm8[5],xmm6[6],xmm8[6],xmm6[7],xmm8[7]
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
-; SSSE3-NEXT:    pxor %xmm5, %xmm9
-; SSSE3-NEXT:    por %xmm5, %xmm6
-; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm6
-; SSSE3-NEXT:    pxor %xmm5, %xmm10
-; SSSE3-NEXT:    por %xmm5, %xmm3
-; SSSE3-NEXT:    pcmpgtd %xmm10, %xmm3
-; SSSE3-NEXT:    packssdw %xmm6, %xmm3
-; SSSE3-NEXT:    pxor %xmm5, %xmm7
-; SSSE3-NEXT:    por %xmm5, %xmm2
-; SSSE3-NEXT:    pcmpgtd %xmm7, %xmm2
-; SSSE3-NEXT:    pxor %xmm5, %xmm4
-; SSSE3-NEXT:    por %xmm5, %xmm0
-; SSSE3-NEXT:    pcmpgtd %xmm4, %xmm0
-; SSSE3-NEXT:    packssdw %xmm2, %xmm0
-; SSSE3-NEXT:    packsswb %xmm3, %xmm0
-; SSSE3-NEXT:    pandn %xmm1, %xmm0
-; SSSE3-NEXT:    retq
+; SSE2OR3-LABEL: test14:
+; SSE2OR3:       # %bb.0: # %vector.ph
+; SSE2OR3-NEXT:    pxor %xmm8, %xmm8
+; SSE2OR3-NEXT:    movdqa %xmm0, %xmm6
+; SSE2OR3-NEXT:    movdqa %xmm4, %xmm9
+; SSE2OR3-NEXT:    movdqa %xmm3, %xmm10
+; SSE2OR3-NEXT:    movdqa %xmm2, %xmm7
+; SSE2OR3-NEXT:    movdqa {{.*#+}} xmm5 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; SSE2OR3-NEXT:    pand %xmm5, %xmm4
+; SSE2OR3-NEXT:    pand %xmm5, %xmm3
+; SSE2OR3-NEXT:    packuswb %xmm4, %xmm3
+; SSE2OR3-NEXT:    movdqa %xmm1, %xmm4
+; SSE2OR3-NEXT:    pand %xmm5, %xmm2
+; SSE2OR3-NEXT:    pand %xmm5, %xmm1
+; SSE2OR3-NEXT:    packuswb %xmm2, %xmm1
+; SSE2OR3-NEXT:    packuswb %xmm3, %xmm1
+; SSE2OR3-NEXT:    psubb %xmm0, %xmm1
+; SSE2OR3-NEXT:    movdqa %xmm0, %xmm2
+; SSE2OR3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1],xmm2[2],xmm8[2],xmm2[3],xmm8[3],xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7]
+; SSE2OR3-NEXT:    movdqa %xmm2, %xmm0
+; SSE2OR3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm8[0],xmm0[1],xmm8[1],xmm0[2],xmm8[2],xmm0[3],xmm8[3]
+; SSE2OR3-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7]
+; SSE2OR3-NEXT:    punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm8[8],xmm6[9],xmm8[9],xmm6[10],xmm8[10],xmm6[11],xmm8[11],xmm6[12],xmm8[12],xmm6[13],xmm8[13],xmm6[14],xmm8[14],xmm6[15],xmm8[15]
+; SSE2OR3-NEXT:    movdqa %xmm6, %xmm3
+; SSE2OR3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3]
+; SSE2OR3-NEXT:    punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm8[4],xmm6[5],xmm8[5],xmm6[6],xmm8[6],xmm6[7],xmm8[7]
+; SSE2OR3-NEXT:    movdqa {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2OR3-NEXT:    pxor %xmm5, %xmm9
+; SSE2OR3-NEXT:    por %xmm5, %xmm6
+; SSE2OR3-NEXT:    pcmpgtd %xmm9, %xmm6
+; SSE2OR3-NEXT:    pxor %xmm5, %xmm10
+; SSE2OR3-NEXT:    por %xmm5, %xmm3
+; SSE2OR3-NEXT:    pcmpgtd %xmm10, %xmm3
+; SSE2OR3-NEXT:    packssdw %xmm6, %xmm3
+; SSE2OR3-NEXT:    pxor %xmm5, %xmm7
+; SSE2OR3-NEXT:    por %xmm5, %xmm2
+; SSE2OR3-NEXT:    pcmpgtd %xmm7, %xmm2
+; SSE2OR3-NEXT:    pxor %xmm5, %xmm4
+; SSE2OR3-NEXT:    por %xmm5, %xmm0
+; SSE2OR3-NEXT:    pcmpgtd %xmm4, %xmm0
+; SSE2OR3-NEXT:    packssdw %xmm2, %xmm0
+; SSE2OR3-NEXT:    packsswb %xmm3, %xmm0
+; SSE2OR3-NEXT:    pandn %xmm1, %xmm0
+; SSE2OR3-NEXT:    retq
 ;
 ; SSE41-LABEL: test14:
 ; SSE41:       # %bb.0: # %vector.ph
@@ -1475,145 +1430,75 @@ vector.ph:
 }
 
 define <8 x i16> @psubus_8i64_max(<8 x i16> %x, <8 x i64> %y) nounwind {
-; SSE2-LABEL: psubus_8i64_max:
-; SSE2:       # %bb.0: # %vector.ph
-; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456]
-; SSE2-NEXT:    movdqa %xmm2, %xmm7
-; SSE2-NEXT:    pxor %xmm5, %xmm7
-; SSE2-NEXT:    movdqa {{.*#+}} xmm8 = [9223372039002324991,9223372039002324991]
-; SSE2-NEXT:    movdqa %xmm8, %xmm6
-; SSE2-NEXT:    pcmpgtd %xmm7, %xmm6
-; SSE2-NEXT:    pshufd {{.*#+}} xmm9 = xmm6[0,0,2,2]
-; SSE2-NEXT:    pcmpeqd %xmm8, %xmm7
-; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
-; SSE2-NEXT:    pand %xmm9, %xmm7
-; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
-; SSE2-NEXT:    por %xmm7, %xmm6
-; SSE2-NEXT:    movdqa {{.*#+}} xmm9 = [65535,65535]
-; SSE2-NEXT:    pand %xmm6, %xmm2
-; SSE2-NEXT:    pandn %xmm9, %xmm6
-; SSE2-NEXT:    por %xmm2, %xmm6
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[0,2,2,3]
-; SSE2-NEXT:    pshuflw {{.*#+}} xmm10 = xmm2[0,2,2,3,4,5,6,7]
-; SSE2-NEXT:    movdqa %xmm1, %xmm6
-; SSE2-NEXT:    pxor %xmm5, %xmm6
-; SSE2-NEXT:    movdqa %xmm8, %xmm7
-; SSE2-NEXT:    pcmpgtd %xmm6, %xmm7
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm7[0,0,2,2]
-; SSE2-NEXT:    pcmpeqd %xmm8, %xmm6
-; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
-; SSE2-NEXT:    pand %xmm2, %xmm6
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm7[1,1,3,3]
-; SSE2-NEXT:    por %xmm6, %xmm2
-; SSE2-NEXT:    pand %xmm2, %xmm1
-; SSE2-NEXT:    pandn %xmm9, %xmm2
-; SSE2-NEXT:    por %xmm1, %xmm2
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
-; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm10[0],xmm1[1],xmm10[1]
-; SSE2-NEXT:    movdqa %xmm4, %xmm2
-; SSE2-NEXT:    pxor %xmm5, %xmm2
-; SSE2-NEXT:    movdqa %xmm8, %xmm6
-; SSE2-NEXT:    pcmpgtd %xmm2, %xmm6
-; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
-; SSE2-NEXT:    pcmpeqd %xmm8, %xmm2
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; SSE2-NEXT:    pand %xmm7, %xmm2
-; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
-; SSE2-NEXT:    por %xmm2, %xmm6
-; SSE2-NEXT:    pand %xmm6, %xmm4
-; SSE2-NEXT:    pandn %xmm9, %xmm6
-; SSE2-NEXT:    por %xmm4, %xmm6
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[0,2,2,3]
-; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7]
-; SSE2-NEXT:    pxor %xmm3, %xmm5
-; SSE2-NEXT:    movdqa %xmm8, %xmm4
-; SSE2-NEXT:    pcmpgtd %xmm5, %xmm4
-; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
-; SSE2-NEXT:    pcmpeqd %xmm8, %xmm5
-; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
-; SSE2-NEXT:    pand %xmm6, %xmm5
-; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSE2-NEXT:    por %xmm5, %xmm4
-; SSE2-NEXT:    pand %xmm4, %xmm3
-; SSE2-NEXT:    pandn %xmm9, %xmm4
-; SSE2-NEXT:    por %xmm3, %xmm4
-; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3]
-; SSE2-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
-; SSE2-NEXT:    movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
-; SSE2-NEXT:    psubusw %xmm3, %xmm0
-; SSE2-NEXT:    retq
-;
-; SSSE3-LABEL: psubus_8i64_max:
-; SSSE3:       # %bb.0: # %vector.ph
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456]
-; SSSE3-NEXT:    movdqa %xmm2, %xmm7
-; SSSE3-NEXT:    pxor %xmm5, %xmm7
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm8 = [9223372039002324991,9223372039002324991]
-; SSSE3-NEXT:    movdqa %xmm8, %xmm6
-; SSSE3-NEXT:    pcmpgtd %xmm7, %xmm6
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm9 = xmm6[0,0,2,2]
-; SSSE3-NEXT:    pcmpeqd %xmm8, %xmm7
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
-; SSSE3-NEXT:    pand %xmm9, %xmm7
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
-; SSSE3-NEXT:    por %xmm7, %xmm6
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm9 = [65535,65535]
-; SSSE3-NEXT:    pand %xmm6, %xmm2
-; SSSE3-NEXT:    pandn %xmm9, %xmm6
-; SSSE3-NEXT:    por %xmm2, %xmm6
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[0,2,2,3]
-; SSSE3-NEXT:    pshuflw {{.*#+}} xmm10 = xmm2[0,2,2,3,4,5,6,7]
-; SSSE3-NEXT:    movdqa %xmm1, %xmm6
-; SSSE3-NEXT:    pxor %xmm5, %xmm6
-; SSSE3-NEXT:    movdqa %xmm8, %xmm7
-; SSSE3-NEXT:    pcmpgtd %xmm6, %xmm7
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm7[0,0,2,2]
-; SSSE3-NEXT:    pcmpeqd %xmm8, %xmm6
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
-; SSSE3-NEXT:    pand %xmm2, %xmm6
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm7[1,1,3,3]
-; SSSE3-NEXT:    por %xmm6, %xmm2
-; SSSE3-NEXT:    pand %xmm2, %xmm1
-; SSSE3-NEXT:    pandn %xmm9, %xmm2
-; SSSE3-NEXT:    por %xmm1, %xmm2
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
-; SSSE3-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm10[0],xmm1[1],xmm10[1]
-; SSSE3-NEXT:    movdqa %xmm4, %xmm2
-; SSSE3-NEXT:    pxor %xmm5, %xmm2
-; SSSE3-NEXT:    movdqa %xmm8, %xmm6
-; SSSE3-NEXT:    pcmpgtd %xmm2, %xmm6
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
-; SSSE3-NEXT:    pcmpeqd %xmm8, %xmm2
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; SSSE3-NEXT:    pand %xmm7, %xmm2
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
-; SSSE3-NEXT:    por %xmm2, %xmm6
-; SSSE3-NEXT:    pand %xmm6, %xmm4
-; SSSE3-NEXT:    pandn %xmm9, %xmm6
-; SSSE3-NEXT:    por %xmm4, %xmm6
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[0,2,2,3]
-; SSSE3-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7]
-; SSSE3-NEXT:    pxor %xmm3, %xmm5
-; SSSE3-NEXT:    movdqa %xmm8, %xmm4
-; SSSE3-NEXT:    pcmpgtd %xmm5, %xmm4
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
-; SSSE3-NEXT:    pcmpeqd %xmm8, %xmm5
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
-; SSSE3-NEXT:    pand %xmm6, %xmm5
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSSE3-NEXT:    por %xmm5, %xmm4
-; SSSE3-NEXT:    pand %xmm4, %xmm3
-; SSSE3-NEXT:    pandn %xmm9, %xmm4
-; SSSE3-NEXT:    por %xmm3, %xmm4
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3]
-; SSSE3-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7]
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
-; SSSE3-NEXT:    movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
-; SSSE3-NEXT:    psubusw %xmm3, %xmm0
-; SSSE3-NEXT:    retq
+; SSE2OR3-LABEL: psubus_8i64_max:
+; SSE2OR3:       # %bb.0: # %vector.ph
+; SSE2OR3-NEXT:    movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456]
+; SSE2OR3-NEXT:    movdqa %xmm2, %xmm7
+; SSE2OR3-NEXT:    pxor %xmm5, %xmm7
+; SSE2OR3-NEXT:    movdqa {{.*#+}} xmm8 = [9223372039002324991,9223372039002324991]
+; SSE2OR3-NEXT:    movdqa %xmm8, %xmm6
+; SSE2OR3-NEXT:    pcmpgtd %xmm7, %xmm6
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm9 = xmm6[0,0,2,2]
+; SSE2OR3-NEXT:    pcmpeqd %xmm8, %xmm7
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
+; SSE2OR3-NEXT:    pand %xmm9, %xmm7
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE2OR3-NEXT:    por %xmm7, %xmm6
+; SSE2OR3-NEXT:    movdqa {{.*#+}} xmm9 = [65535,65535]
+; SSE2OR3-NEXT:    pand %xmm6, %xmm2
+; SSE2OR3-NEXT:    pandn %xmm9, %xmm6
+; SSE2OR3-NEXT:    por %xmm2, %xmm6
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[0,2,2,3]
+; SSE2OR3-NEXT:    pshuflw {{.*#+}} xmm10 = xmm2[0,2,2,3,4,5,6,7]
+; SSE2OR3-NEXT:    movdqa %xmm1, %xmm6
+; SSE2OR3-NEXT:    pxor %xmm5, %xmm6
+; SSE2OR3-NEXT:    movdqa %xmm8, %xmm7
+; SSE2OR3-NEXT:    pcmpgtd %xmm6, %xmm7
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm2 = xmm7[0,0,2,2]
+; SSE2OR3-NEXT:    pcmpeqd %xmm8, %xmm6
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE2OR3-NEXT:    pand %xmm2, %xmm6
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm2 = xmm7[1,1,3,3]
+; SSE2OR3-NEXT:    por %xmm6, %xmm2
+; SSE2OR3-NEXT:    pand %xmm2, %xmm1
+; SSE2OR3-NEXT:    pandn %xmm9, %xmm2
+; SSE2OR3-NEXT:    por %xmm1, %xmm2
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; SSE2OR3-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
+; SSE2OR3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm10[0],xmm1[1],xmm10[1]
+; SSE2OR3-NEXT:    movdqa %xmm4, %xmm2
+; SSE2OR3-NEXT:    pxor %xmm5, %xmm2
+; SSE2OR3-NEXT:    movdqa %xmm8, %xmm6
+; SSE2OR3-NEXT:    pcmpgtd %xmm2, %xmm6
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
+; SSE2OR3-NEXT:    pcmpeqd %xmm8, %xmm2
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2OR3-NEXT:    pand %xmm7, %xmm2
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE2OR3-NEXT:    por %xmm2, %xmm6
+; SSE2OR3-NEXT:    pand %xmm6, %xmm4
+; SSE2OR3-NEXT:    pandn %xmm9, %xmm6
+; SSE2OR3-NEXT:    por %xmm4, %xmm6
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[0,2,2,3]
+; SSE2OR3-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7]
+; SSE2OR3-NEXT:    pxor %xmm3, %xmm5
+; SSE2OR3-NEXT:    movdqa %xmm8, %xmm4
+; SSE2OR3-NEXT:    pcmpgtd %xmm5, %xmm4
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
+; SSE2OR3-NEXT:    pcmpeqd %xmm8, %xmm5
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE2OR3-NEXT:    pand %xmm6, %xmm5
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
+; SSE2OR3-NEXT:    por %xmm5, %xmm4
+; SSE2OR3-NEXT:    pand %xmm4, %xmm3
+; SSE2OR3-NEXT:    pandn %xmm9, %xmm4
+; SSE2OR3-NEXT:    por %xmm3, %xmm4
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3]
+; SSE2OR3-NEXT:    pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7]
+; SSE2OR3-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; SSE2OR3-NEXT:    movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
+; SSE2OR3-NEXT:    psubusw %xmm3, %xmm0
+; SSE2OR3-NEXT:    retq
 ;
 ; SSE41-LABEL: psubus_8i64_max:
 ; SSE41:       # %bb.0: # %vector.ph
@@ -1730,95 +1615,50 @@ vector.ph:
 }
 
 define <16 x i16> @psubus_16i32_max(<16 x i16> %x, <16 x i32> %y) nounwind {
-; SSE2-LABEL: psubus_16i32_max:
-; SSE2:       # %bb.0: # %vector.ph
-; SSE2-NEXT:    movdqa {{.*#+}} xmm9 = [2147483648,2147483648,2147483648,2147483648]
-; SSE2-NEXT:    movdqa %xmm3, %xmm8
-; SSE2-NEXT:    pxor %xmm9, %xmm8
-; SSE2-NEXT:    movdqa {{.*#+}} xmm10 = [2147549183,2147549183,2147549183,2147549183]
-; SSE2-NEXT:    movdqa %xmm10, %xmm6
-; SSE2-NEXT:    pcmpgtd %xmm8, %xmm6
-; SSE2-NEXT:    pcmpeqd %xmm8, %xmm8
-; SSE2-NEXT:    pand %xmm6, %xmm3
-; SSE2-NEXT:    pxor %xmm8, %xmm6
-; SSE2-NEXT:    por %xmm3, %xmm6
-; SSE2-NEXT:    pslld $16, %xmm6
-; SSE2-NEXT:    psrad $16, %xmm6
-; SSE2-NEXT:    movdqa %xmm2, %xmm3
-; SSE2-NEXT:    pxor %xmm9, %xmm3
-; SSE2-NEXT:    movdqa %xmm10, %xmm7
-; SSE2-NEXT:    pcmpgtd %xmm3, %xmm7
-; SSE2-NEXT:    pand %xmm7, %xmm2
-; SSE2-NEXT:    pxor %xmm8, %xmm7
-; SSE2-NEXT:    por %xmm2, %xmm7
-; SSE2-NEXT:    pslld $16, %xmm7
-; SSE2-NEXT:    psrad $16, %xmm7
-; SSE2-NEXT:    packssdw %xmm6, %xmm7
-; SSE2-NEXT:    psubusw %xmm7, %xmm0
-; SSE2-NEXT:    movdqa %xmm5, %xmm2
-; SSE2-NEXT:    pxor %xmm9, %xmm2
-; SSE2-NEXT:    movdqa %xmm10, %xmm3
-; SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
-; SSE2-NEXT:    pand %xmm3, %xmm5
-; SSE2-NEXT:    pxor %xmm8, %xmm3
-; SSE2-NEXT:    por %xmm5, %xmm3
-; SSE2-NEXT:    pslld $16, %xmm3
-; SSE2-NEXT:    psrad $16, %xmm3
-; SSE2-NEXT:    pxor %xmm4, %xmm9
-; SSE2-NEXT:    pcmpgtd %xmm9, %xmm10
-; SSE2-NEXT:    pxor %xmm10, %xmm8
-; SSE2-NEXT:    pand %xmm4, %xmm10
-; SSE2-NEXT:    por %xmm8, %xmm10
-; SSE2-NEXT:    pslld $16, %xmm10
-; SSE2-NEXT:    psrad $16, %xmm10
-; SSE2-NEXT:    packssdw %xmm3, %xmm10
-; SSE2-NEXT:    psubusw %xmm10, %xmm1
-; SSE2-NEXT:    retq
-;
-; SSSE3-LABEL: psubus_16i32_max:
-; SSSE3:       # %bb.0: # %vector.ph
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm9 = [2147483648,2147483648,2147483648,2147483648]
-; SSSE3-NEXT:    movdqa %xmm3, %xmm8
-; SSSE3-NEXT:    pxor %xmm9, %xmm8
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm10 = [2147549183,2147549183,2147549183,2147549183]
-; SSSE3-NEXT:    movdqa %xmm10, %xmm6
-; SSSE3-NEXT:    pcmpgtd %xmm8, %xmm6
-; SSSE3-NEXT:    pcmpeqd %xmm8, %xmm8
-; SSSE3-NEXT:    pand %xmm6, %xmm3
-; SSSE3-NEXT:    pxor %xmm8, %xmm6
-; SSSE3-NEXT:    por %xmm3, %xmm6
-; SSSE3-NEXT:    pslld $16, %xmm6
-; SSSE3-NEXT:    psrad $16, %xmm6
-; SSSE3-NEXT:    movdqa %xmm2, %xmm3
-; SSSE3-NEXT:    pxor %xmm9, %xmm3
-; SSSE3-NEXT:    movdqa %xmm10, %xmm7
-; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm7
-; SSSE3-NEXT:    pand %xmm7, %xmm2
-; SSSE3-NEXT:    pxor %xmm8, %xmm7
-; SSSE3-NEXT:    por %xmm2, %xmm7
-; SSSE3-NEXT:    pslld $16, %xmm7
-; SSSE3-NEXT:    psrad $16, %xmm7
-; SSSE3-NEXT:    packssdw %xmm6, %xmm7
-; SSSE3-NEXT:    psubusw %xmm7, %xmm0
-; SSSE3-NEXT:    movdqa %xmm5, %xmm2
-; SSSE3-NEXT:    pxor %xmm9, %xmm2
-; SSSE3-NEXT:    movdqa %xmm10, %xmm3
-; SSSE3-NEXT:    pcmpgtd %xmm2, %xmm3
-; SSSE3-NEXT:    pand %xmm3, %xmm5
-; SSSE3-NEXT:    pxor %xmm8, %xmm3
-; SSSE3-NEXT:    por %xmm5, %xmm3
-; SSSE3-NEXT:    pslld $16, %xmm3
-; SSSE3-NEXT:    psrad $16, %xmm3
-; SSSE3-NEXT:    pxor %xmm4, %xmm9
-; SSSE3-NEXT:    pcmpgtd %xmm9, %xmm10
-; SSSE3-NEXT:    pxor %xmm10, %xmm8
-; SSSE3-NEXT:    pand %xmm4, %xmm10
-; SSSE3-NEXT:    por %xmm8, %xmm10
-; SSSE3-NEXT:    pslld $16, %xmm10
-; SSSE3-NEXT:    psrad $16, %xmm10
-; SSSE3-NEXT:    packssdw %xmm3, %xmm10
-; SSSE3-NEXT:    psubusw %xmm10, %xmm1
-; SSSE3-NEXT:    retq
+; SSE2OR3-LABEL: psubus_16i32_max:
+; SSE2OR3:       # %bb.0: # %vector.ph
+; SSE2OR3-NEXT:    movdqa {{.*#+}} xmm9 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2OR3-NEXT:    movdqa %xmm3, %xmm8
+; SSE2OR3-NEXT:    pxor %xmm9, %xmm8
+; SSE2OR3-NEXT:    movdqa {{.*#+}} xmm10 = [2147549183,2147549183,2147549183,2147549183]
+; SSE2OR3-NEXT:    movdqa %xmm10, %xmm6
+; SSE2OR3-NEXT:    pcmpgtd %xmm8, %xmm6
+; SSE2OR3-NEXT:    pcmpeqd %xmm8, %xmm8
+; SSE2OR3-NEXT:    pand %xmm6, %xmm3
+; SSE2OR3-NEXT:    pxor %xmm8, %xmm6
+; SSE2OR3-NEXT:    por %xmm3, %xmm6
+; SSE2OR3-NEXT:    pslld $16, %xmm6
+; SSE2OR3-NEXT:    psrad $16, %xmm6
+; SSE2OR3-NEXT:    movdqa %xmm2, %xmm3
+; SSE2OR3-NEXT:    pxor %xmm9, %xmm3
+; SSE2OR3-NEXT:    movdqa %xmm10, %xmm7
+; SSE2OR3-NEXT:    pcmpgtd %xmm3, %xmm7
+; SSE2OR3-NEXT:    pand %xmm7, %xmm2
+; SSE2OR3-NEXT:    pxor %xmm8, %xmm7
+; SSE2OR3-NEXT:    por %xmm2, %xmm7
+; SSE2OR3-NEXT:    pslld $16, %xmm7
+; SSE2OR3-NEXT:    psrad $16, %xmm7
+; SSE2OR3-NEXT:    packssdw %xmm6, %xmm7
+; SSE2OR3-NEXT:    psubusw %xmm7, %xmm0
+; SSE2OR3-NEXT:    movdqa %xmm5, %xmm2
+; SSE2OR3-NEXT:    pxor %xmm9, %xmm2
+; SSE2OR3-NEXT:    movdqa %xmm10, %xmm3
+; SSE2OR3-NEXT:    pcmpgtd %xmm2, %xmm3
+; SSE2OR3-NEXT:    pand %xmm3, %xmm5
+; SSE2OR3-NEXT:    pxor %xmm8, %xmm3
+; SSE2OR3-NEXT:    por %xmm5, %xmm3
+; SSE2OR3-NEXT:    pslld $16, %xmm3
+; SSE2OR3-NEXT:    psrad $16, %xmm3
+; SSE2OR3-NEXT:    pxor %xmm4, %xmm9
+; SSE2OR3-NEXT:    pcmpgtd %xmm9, %xmm10
+; SSE2OR3-NEXT:    pxor %xmm10, %xmm8
+; SSE2OR3-NEXT:    pand %xmm4, %xmm10
+; SSE2OR3-NEXT:    por %xmm8, %xmm10
+; SSE2OR3-NEXT:    pslld $16, %xmm10
+; SSE2OR3-NEXT:    psrad $16, %xmm10
+; SSE2OR3-NEXT:    packssdw %xmm3, %xmm10
+; SSE2OR3-NEXT:    psubusw %xmm10, %xmm1
+; SSE2OR3-NEXT:    retq
 ;
 ; SSE41-LABEL: psubus_16i32_max:
 ; SSE41:       # %bb.0: # %vector.ph
@@ -2115,23 +1955,14 @@ define void @subus_v4i8(<4 x i8>* %p1, <4 x i8>* %p2) {
 }
 
 define void @subus_v2i8(<2 x i8>* %p1, <2 x i8>* %p2) {
-; SSE2-LABEL: subus_v2i8:
-; SSE2:       # %bb.0:
-; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
-; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
-; SSE2-NEXT:    psubusb %xmm1, %xmm0
-; SSE2-NEXT:    movd %xmm0, %eax
-; SSE2-NEXT:    movw %ax, (%rdi)
-; SSE2-NEXT:    retq
-;
-; SSSE3-LABEL: subus_v2i8:
-; SSSE3:       # %bb.0:
-; SSSE3-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
-; SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
-; SSSE3-NEXT:    psubusb %xmm1, %xmm0
-; SSSE3-NEXT:    movd %xmm0, %eax
-; SSSE3-NEXT:    movw %ax, (%rdi)
-; SSSE3-NEXT:    retq
+; SSE2OR3-LABEL: subus_v2i8:
+; SSE2OR3:       # %bb.0:
+; SSE2OR3-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2OR3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
+; SSE2OR3-NEXT:    psubusb %xmm1, %xmm0
+; SSE2OR3-NEXT:    movd %xmm0, %eax
+; SSE2OR3-NEXT:    movw %ax, (%rdi)
+; SSE2OR3-NEXT:    retq
 ;
 ; SSE41-LABEL: subus_v2i8:
 ; SSE41:       # %bb.0:
@@ -2663,159 +2494,82 @@ define <8 x i16> @test32(<8 x i16> %a0, <8 x i32> %a1) {
 
 ; v8i32/v8i64 - sub(x,trunc(umin(y,zext(x))))
 define <8 x i32> @test33(<8 x i32> %a0, <8 x i64> %a1) {
-; SSE2-LABEL: test33:
-; SSE2:       # %bb.0:
-; SSE2-NEXT:    pxor %xmm7, %xmm7
-; SSE2-NEXT:    movdqa %xmm1, %xmm8
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm8 = xmm8[0],xmm7[0],xmm8[1],xmm7[1]
-; SSE2-NEXT:    movdqa %xmm1, %xmm9
-; SSE2-NEXT:    punpckhdq {{.*#+}} xmm9 = xmm9[2],xmm7[2],xmm9[3],xmm7[3]
-; SSE2-NEXT:    movdqa %xmm0, %xmm10
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm10 = xmm10[0],xmm7[0],xmm10[1],xmm7[1]
-; SSE2-NEXT:    movdqa %xmm0, %xmm11
-; SSE2-NEXT:    punpckhdq {{.*#+}} xmm11 = xmm11[2],xmm7[2],xmm11[3],xmm7[3]
-; SSE2-NEXT:    movdqa {{.*#+}} xmm12 = [9223372039002259456,9223372039002259456]
-; SSE2-NEXT:    movdqa %xmm3, %xmm6
-; SSE2-NEXT:    pxor %xmm12, %xmm6
-; SSE2-NEXT:    movdqa %xmm11, %xmm7
-; SSE2-NEXT:    pxor %xmm12, %xmm7
-; SSE2-NEXT:    movdqa %xmm7, %xmm13
-; SSE2-NEXT:    pcmpgtd %xmm6, %xmm13
-; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
-; SSE2-NEXT:    pcmpeqd %xmm6, %xmm7
-; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
-; SSE2-NEXT:    pand %xmm14, %xmm6
-; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm13[1,1,3,3]
-; SSE2-NEXT:    por %xmm6, %xmm7
-; SSE2-NEXT:    pand %xmm7, %xmm3
-; SSE2-NEXT:    pandn %xmm11, %xmm7
-; SSE2-NEXT:    por %xmm3, %xmm7
-; SSE2-NEXT:    movdqa %xmm2, %xmm3
-; SSE2-NEXT:    pxor %xmm12, %xmm3
-; SSE2-NEXT:    movdqa %xmm10, %xmm6
-; SSE2-NEXT:    pxor %xmm12, %xmm6
-; SSE2-NEXT:    movdqa %xmm6, %xmm11
-; SSE2-NEXT:    pcmpgtd %xmm3, %xmm11
-; SSE2-NEXT:    pshufd {{.*#+}} xmm13 = xmm11[0,0,2,2]
-; SSE2-NEXT:    pcmpeqd %xmm3, %xmm6
-; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
-; SSE2-NEXT:    pand %xmm13, %xmm6
-; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm11[1,1,3,3]
-; SSE2-NEXT:    por %xmm6, %xmm3
-; SSE2-NEXT:    pand %xmm3, %xmm2
-; SSE2-NEXT:    pandn %xmm10, %xmm3
-; SSE2-NEXT:    por %xmm2, %xmm3
-; SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,2],xmm7[0,2]
-; SSE2-NEXT:    movdqa %xmm5, %xmm2
-; SSE2-NEXT:    pxor %xmm12, %xmm2
-; SSE2-NEXT:    movdqa %xmm9, %xmm6
-; SSE2-NEXT:    pxor %xmm12, %xmm6
-; SSE2-NEXT:    movdqa %xmm6, %xmm7
-; SSE2-NEXT:    pcmpgtd %xmm2, %xmm7
-; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2]
-; SSE2-NEXT:    pcmpeqd %xmm2, %xmm6
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
-; SSE2-NEXT:    pand %xmm10, %xmm2
-; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
-; SSE2-NEXT:    por %xmm2, %xmm6
-; SSE2-NEXT:    pand %xmm6, %xmm5
-; SSE2-NEXT:    pandn %xmm9, %xmm6
-; SSE2-NEXT:    por %xmm5, %xmm6
-; SSE2-NEXT:    movdqa %xmm4, %xmm2
-; SSE2-NEXT:    pxor %xmm12, %xmm2
-; SSE2-NEXT:    pxor %xmm8, %xmm12
-; SSE2-NEXT:    movdqa %xmm12, %xmm5
-; SSE2-NEXT:    pcmpgtd %xmm2, %xmm5
-; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2]
-; SSE2-NEXT:    pcmpeqd %xmm2, %xmm12
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm12[1,1,3,3]
-; SSE2-NEXT:    pand %xmm7, %xmm2
-; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
-; SSE2-NEXT:    por %xmm2, %xmm5
-; SSE2-NEXT:    pand %xmm5, %xmm4
-; SSE2-NEXT:    pandn %xmm8, %xmm5
-; SSE2-NEXT:    por %xmm4, %xmm5
-; SSE2-NEXT:    shufps {{.*#+}} xmm5 = xmm5[0,2],xmm6[0,2]
-; SSE2-NEXT:    psubd %xmm3, %xmm0
-; SSE2-NEXT:    psubd %xmm5, %xmm1
-; SSE2-NEXT:    retq
-;
-; SSSE3-LABEL: test33:
-; SSSE3:       # %bb.0:
-; SSSE3-NEXT:    pxor %xmm7, %xmm7
-; SSSE3-NEXT:    movdqa %xmm1, %xmm8
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm8 = xmm8[0],xmm7[0],xmm8[1],xmm7[1]
-; SSSE3-NEXT:    movdqa %xmm1, %xmm9
-; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm9 = xmm9[2],xmm7[2],xmm9[3],xmm7[3]
-; SSSE3-NEXT:    movdqa %xmm0, %xmm10
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm10 = xmm10[0],xmm7[0],xmm10[1],xmm7[1]
-; SSSE3-NEXT:    movdqa %xmm0, %xmm11
-; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm11 = xmm11[2],xmm7[2],xmm11[3],xmm7[3]
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm12 = [9223372039002259456,9223372039002259456]
-; SSSE3-NEXT:    movdqa %xmm3, %xmm6
-; SSSE3-NEXT:    pxor %xmm12, %xmm6
-; SSSE3-NEXT:    movdqa %xmm11, %xmm7
-; SSSE3-NEXT:    pxor %xmm12, %xmm7
-; SSSE3-NEXT:    movdqa %xmm7, %xmm13
-; SSSE3-NEXT:    pcmpgtd %xmm6, %xmm13
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
-; SSSE3-NEXT:    pcmpeqd %xmm6, %xmm7
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
-; SSSE3-NEXT:    pand %xmm14, %xmm6
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm13[1,1,3,3]
-; SSSE3-NEXT:    por %xmm6, %xmm7
-; SSSE3-NEXT:    pand %xmm7, %xmm3
-; SSSE3-NEXT:    pandn %xmm11, %xmm7
-; SSSE3-NEXT:    por %xmm3, %xmm7
-; SSSE3-NEXT:    movdqa %xmm2, %xmm3
-; SSSE3-NEXT:    pxor %xmm12, %xmm3
-; SSSE3-NEXT:    movdqa %xmm10, %xmm6
-; SSSE3-NEXT:    pxor %xmm12, %xmm6
-; SSSE3-NEXT:    movdqa %xmm6, %xmm11
-; SSSE3-NEXT:    pcmpgtd %xmm3, %xmm11
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm13 = xmm11[0,0,2,2]
-; SSSE3-NEXT:    pcmpeqd %xmm3, %xmm6
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
-; SSSE3-NEXT:    pand %xmm13, %xmm6
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm11[1,1,3,3]
-; SSSE3-NEXT:    por %xmm6, %xmm3
-; SSSE3-NEXT:    pand %xmm3, %xmm2
-; SSSE3-NEXT:    pandn %xmm10, %xmm3
-; SSSE3-NEXT:    por %xmm2, %xmm3
-; SSSE3-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,2],xmm7[0,2]
-; SSSE3-NEXT:    movdqa %xmm5, %xmm2
-; SSSE3-NEXT:    pxor %xmm12, %xmm2
-; SSSE3-NEXT:    movdqa %xmm9, %xmm6
-; SSSE3-NEXT:    pxor %xmm12, %xmm6
-; SSSE3-NEXT:    movdqa %xmm6, %xmm7
-; SSSE3-NEXT:    pcmpgtd %xmm2, %xmm7
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2]
-; SSSE3-NEXT:    pcmpeqd %xmm2, %xmm6
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
-; SSSE3-NEXT:    pand %xmm10, %xmm2
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
-; SSSE3-NEXT:    por %xmm2, %xmm6
-; SSSE3-NEXT:    pand %xmm6, %xmm5
-; SSSE3-NEXT:    pandn %xmm9, %xmm6
-; SSSE3-NEXT:    por %xmm5, %xmm6
-; SSSE3-NEXT:    movdqa %xmm4, %xmm2
-; SSSE3-NEXT:    pxor %xmm12, %xmm2
-; SSSE3-NEXT:    pxor %xmm8, %xmm12
-; SSSE3-NEXT:    movdqa %xmm12, %xmm5
-; SSSE3-NEXT:    pcmpgtd %xmm2, %xmm5
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2]
-; SSSE3-NEXT:    pcmpeqd %xmm2, %xmm12
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm12[1,1,3,3]
-; SSSE3-NEXT:    pand %xmm7, %xmm2
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
-; SSSE3-NEXT:    por %xmm2, %xmm5
-; SSSE3-NEXT:    pand %xmm5, %xmm4
-; SSSE3-NEXT:    pandn %xmm8, %xmm5
-; SSSE3-NEXT:    por %xmm4, %xmm5
-; SSSE3-NEXT:    shufps {{.*#+}} xmm5 = xmm5[0,2],xmm6[0,2]
-; SSSE3-NEXT:    psubd %xmm3, %xmm0
-; SSSE3-NEXT:    psubd %xmm5, %xmm1
-; SSSE3-NEXT:    retq
+; SSE2OR3-LABEL: test33:
+; SSE2OR3:       # %bb.0:
+; SSE2OR3-NEXT:    pxor %xmm7, %xmm7
+; SSE2OR3-NEXT:    movdqa %xmm1, %xmm8
+; SSE2OR3-NEXT:    punpckldq {{.*#+}} xmm8 = xmm8[0],xmm7[0],xmm8[1],xmm7[1]
+; SSE2OR3-NEXT:    movdqa %xmm1, %xmm9
+; SSE2OR3-NEXT:    punpckhdq {{.*#+}} xmm9 = xmm9[2],xmm7[2],xmm9[3],xmm7[3]
+; SSE2OR3-NEXT:    movdqa %xmm0, %xmm10
+; SSE2OR3-NEXT:    punpckldq {{.*#+}} xmm10 = xmm10[0],xmm7[0],xmm10[1],xmm7[1]
+; SSE2OR3-NEXT:    movdqa %xmm0, %xmm11
+; SSE2OR3-NEXT:    punpckhdq {{.*#+}} xmm11 = xmm11[2],xmm7[2],xmm11[3],xmm7[3]
+; SSE2OR3-NEXT:    movdqa {{.*#+}} xmm12 = [9223372039002259456,9223372039002259456]
+; SSE2OR3-NEXT:    movdqa %xmm3, %xmm6
+; SSE2OR3-NEXT:    pxor %xmm12, %xmm6
+; SSE2OR3-NEXT:    movdqa %xmm11, %xmm7
+; SSE2OR3-NEXT:    pxor %xmm12, %xmm7
+; SSE2OR3-NEXT:    movdqa %xmm7, %xmm13
+; SSE2OR3-NEXT:    pcmpgtd %xmm6, %xmm13
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2OR3-NEXT:    pcmpeqd %xmm6, %xmm7
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2OR3-NEXT:    pand %xmm14, %xmm6
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm7 = xmm13[1,1,3,3]
+; SSE2OR3-NEXT:    por %xmm6, %xmm7
+; SSE2OR3-NEXT:    pand %xmm7, %xmm3
+; SSE2OR3-NEXT:    pandn %xmm11, %xmm7
+; SSE2OR3-NEXT:    por %xmm3, %xmm7
+; SSE2OR3-NEXT:    movdqa %xmm2, %xmm3
+; SSE2OR3-NEXT:    pxor %xmm12, %xmm3
+; SSE2OR3-NEXT:    movdqa %xmm10, %xmm6
+; SSE2OR3-NEXT:    pxor %xmm12, %xmm6
+; SSE2OR3-NEXT:    movdqa %xmm6, %xmm11
+; SSE2OR3-NEXT:    pcmpgtd %xmm3, %xmm11
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm13 = xmm11[0,0,2,2]
+; SSE2OR3-NEXT:    pcmpeqd %xmm3, %xmm6
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE2OR3-NEXT:    pand %xmm13, %xmm6
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm3 = xmm11[1,1,3,3]
+; SSE2OR3-NEXT:    por %xmm6, %xmm3
+; SSE2OR3-NEXT:    pand %xmm3, %xmm2
+; SSE2OR3-NEXT:    pandn %xmm10, %xmm3
+; SSE2OR3-NEXT:    por %xmm2, %xmm3
+; SSE2OR3-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,2],xmm7[0,2]
+; SSE2OR3-NEXT:    movdqa %xmm5, %xmm2
+; SSE2OR3-NEXT:    pxor %xmm12, %xmm2
+; SSE2OR3-NEXT:    movdqa %xmm9, %xmm6
+; SSE2OR3-NEXT:    pxor %xmm12, %xmm6
+; SSE2OR3-NEXT:    movdqa %xmm6, %xmm7
+; SSE2OR3-NEXT:    pcmpgtd %xmm2, %xmm7
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2]
+; SSE2OR3-NEXT:    pcmpeqd %xmm2, %xmm6
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
+; SSE2OR3-NEXT:    pand %xmm10, %xmm2
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2OR3-NEXT:    por %xmm2, %xmm6
+; SSE2OR3-NEXT:    pand %xmm6, %xmm5
+; SSE2OR3-NEXT:    pandn %xmm9, %xmm6
+; SSE2OR3-NEXT:    por %xmm5, %xmm6
+; SSE2OR3-NEXT:    movdqa %xmm4, %xmm2
+; SSE2OR3-NEXT:    pxor %xmm12, %xmm2
+; SSE2OR3-NEXT:    pxor %xmm8, %xmm12
+; SSE2OR3-NEXT:    movdqa %xmm12, %xmm5
+; SSE2OR3-NEXT:    pcmpgtd %xmm2, %xmm5
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2]
+; SSE2OR3-NEXT:    pcmpeqd %xmm2, %xmm12
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm2 = xmm12[1,1,3,3]
+; SSE2OR3-NEXT:    pand %xmm7, %xmm2
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE2OR3-NEXT:    por %xmm2, %xmm5
+; SSE2OR3-NEXT:    pand %xmm5, %xmm4
+; SSE2OR3-NEXT:    pandn %xmm8, %xmm5
+; SSE2OR3-NEXT:    por %xmm4, %xmm5
+; SSE2OR3-NEXT:    shufps {{.*#+}} xmm5 = xmm5[0,2],xmm6[0,2]
+; SSE2OR3-NEXT:    psubd %xmm3, %xmm0
+; SSE2OR3-NEXT:    psubd %xmm5, %xmm1
+; SSE2OR3-NEXT:    retq
 ;
 ; SSE41-LABEL: test33:
 ; SSE41:       # %bb.0:
@@ -2970,165 +2724,85 @@ define <8 x i32> @test33(<8 x i32> %a0, <8 x i64> %a1) {
 
 ; v8i32/v8i64 - sub(x,trunc(umin(zext(and(x,1)),y)))
 define <8 x i32> @test34(<8 x i32> %a0, <8 x i64> %a1) {
-; SSE2-LABEL: test34:
-; SSE2:       # %bb.0:
-; SSE2-NEXT:    movdqa {{.*#+}} xmm6 = [1,1,1,1]
-; SSE2-NEXT:    pand %xmm6, %xmm0
-; SSE2-NEXT:    pand %xmm6, %xmm1
-; SSE2-NEXT:    pxor %xmm7, %xmm7
-; SSE2-NEXT:    movdqa %xmm1, %xmm8
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm8 = xmm8[0],xmm7[0],xmm8[1],xmm7[1]
-; SSE2-NEXT:    movdqa %xmm1, %xmm9
-; SSE2-NEXT:    punpckhdq {{.*#+}} xmm9 = xmm9[2],xmm7[2],xmm9[3],xmm7[3]
-; SSE2-NEXT:    movdqa %xmm0, %xmm10
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm10 = xmm10[0],xmm7[0],xmm10[1],xmm7[1]
-; SSE2-NEXT:    movdqa %xmm0, %xmm12
-; SSE2-NEXT:    punpckhdq {{.*#+}} xmm12 = xmm12[2],xmm7[2],xmm12[3],xmm7[3]
-; SSE2-NEXT:    movdqa {{.*#+}} xmm11 = [9223372039002259456,9223372039002259456]
-; SSE2-NEXT:    movdqa %xmm3, %xmm7
-; SSE2-NEXT:    pxor %xmm11, %xmm7
-; SSE2-NEXT:    movdqa %xmm12, %xmm6
-; SSE2-NEXT:    por %xmm11, %xmm6
-; SSE2-NEXT:    movdqa %xmm7, %xmm13
-; SSE2-NEXT:    pcmpgtd %xmm6, %xmm13
-; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
-; SSE2-NEXT:    pcmpeqd %xmm7, %xmm6
-; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
-; SSE2-NEXT:    pand %xmm14, %xmm6
-; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm13[1,1,3,3]
-; SSE2-NEXT:    por %xmm6, %xmm7
-; SSE2-NEXT:    pand %xmm7, %xmm12
-; SSE2-NEXT:    pandn %xmm3, %xmm7
-; SSE2-NEXT:    por %xmm12, %xmm7
-; SSE2-NEXT:    movdqa %xmm2, %xmm3
-; SSE2-NEXT:    pxor %xmm11, %xmm3
-; SSE2-NEXT:    movdqa %xmm10, %xmm6
-; SSE2-NEXT:    por %xmm11, %xmm6
-; SSE2-NEXT:    movdqa %xmm3, %xmm12
-; SSE2-NEXT:    pcmpgtd %xmm6, %xmm12
-; SSE2-NEXT:    pshufd {{.*#+}} xmm13 = xmm12[0,0,2,2]
-; SSE2-NEXT:    pcmpeqd %xmm3, %xmm6
-; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
-; SSE2-NEXT:    pand %xmm13, %xmm6
-; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm12[1,1,3,3]
-; SSE2-NEXT:    por %xmm6, %xmm3
-; SSE2-NEXT:    pand %xmm3, %xmm10
-; SSE2-NEXT:    pandn %xmm2, %xmm3
-; SSE2-NEXT:    por %xmm10, %xmm3
-; SSE2-NEXT:    packuswb %xmm7, %xmm3
-; SSE2-NEXT:    movdqa %xmm5, %xmm2
-; SSE2-NEXT:    pxor %xmm11, %xmm2
-; SSE2-NEXT:    movdqa %xmm9, %xmm6
-; SSE2-NEXT:    por %xmm11, %xmm6
-; SSE2-NEXT:    movdqa %xmm2, %xmm7
-; SSE2-NEXT:    pcmpgtd %xmm6, %xmm7
-; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2]
-; SSE2-NEXT:    pcmpeqd %xmm2, %xmm6
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
-; SSE2-NEXT:    pand %xmm10, %xmm2
-; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
-; SSE2-NEXT:    por %xmm2, %xmm6
-; SSE2-NEXT:    pand %xmm6, %xmm9
-; SSE2-NEXT:    pandn %xmm5, %xmm6
-; SSE2-NEXT:    por %xmm9, %xmm6
-; SSE2-NEXT:    movdqa %xmm4, %xmm2
-; SSE2-NEXT:    pxor %xmm11, %xmm2
-; SSE2-NEXT:    por %xmm8, %xmm11
-; SSE2-NEXT:    movdqa %xmm2, %xmm5
-; SSE2-NEXT:    pcmpgtd %xmm11, %xmm5
-; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2]
-; SSE2-NEXT:    pcmpeqd %xmm2, %xmm11
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm11[1,1,3,3]
-; SSE2-NEXT:    pand %xmm7, %xmm2
-; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
-; SSE2-NEXT:    por %xmm2, %xmm5
-; SSE2-NEXT:    pand %xmm5, %xmm8
-; SSE2-NEXT:    pandn %xmm4, %xmm5
-; SSE2-NEXT:    por %xmm8, %xmm5
-; SSE2-NEXT:    packuswb %xmm6, %xmm5
-; SSE2-NEXT:    psubd %xmm3, %xmm0
-; SSE2-NEXT:    psubd %xmm5, %xmm1
-; SSE2-NEXT:    retq
-;
-; SSSE3-LABEL: test34:
-; SSSE3:       # %bb.0:
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm6 = [1,1,1,1]
-; SSSE3-NEXT:    pand %xmm6, %xmm0
-; SSSE3-NEXT:    pand %xmm6, %xmm1
-; SSSE3-NEXT:    pxor %xmm7, %xmm7
-; SSSE3-NEXT:    movdqa %xmm1, %xmm8
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm8 = xmm8[0],xmm7[0],xmm8[1],xmm7[1]
-; SSSE3-NEXT:    movdqa %xmm1, %xmm9
-; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm9 = xmm9[2],xmm7[2],xmm9[3],xmm7[3]
-; SSSE3-NEXT:    movdqa %xmm0, %xmm10
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm10 = xmm10[0],xmm7[0],xmm10[1],xmm7[1]
-; SSSE3-NEXT:    movdqa %xmm0, %xmm12
-; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm12 = xmm12[2],xmm7[2],xmm12[3],xmm7[3]
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm11 = [9223372039002259456,9223372039002259456]
-; SSSE3-NEXT:    movdqa %xmm3, %xmm7
-; SSSE3-NEXT:    pxor %xmm11, %xmm7
-; SSSE3-NEXT:    movdqa %xmm12, %xmm6
-; SSSE3-NEXT:    por %xmm11, %xmm6
-; SSSE3-NEXT:    movdqa %xmm7, %xmm13
-; SSSE3-NEXT:    pcmpgtd %xmm6, %xmm13
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
-; SSSE3-NEXT:    pcmpeqd %xmm7, %xmm6
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
-; SSSE3-NEXT:    pand %xmm14, %xmm6
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm13[1,1,3,3]
-; SSSE3-NEXT:    por %xmm6, %xmm7
-; SSSE3-NEXT:    pand %xmm7, %xmm12
-; SSSE3-NEXT:    pandn %xmm3, %xmm7
-; SSSE3-NEXT:    por %xmm12, %xmm7
-; SSSE3-NEXT:    movdqa %xmm2, %xmm3
-; SSSE3-NEXT:    pxor %xmm11, %xmm3
-; SSSE3-NEXT:    movdqa %xmm10, %xmm6
-; SSSE3-NEXT:    por %xmm11, %xmm6
-; SSSE3-NEXT:    movdqa %xmm3, %xmm12
-; SSSE3-NEXT:    pcmpgtd %xmm6, %xmm12
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm13 = xmm12[0,0,2,2]
-; SSSE3-NEXT:    pcmpeqd %xmm3, %xmm6
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
-; SSSE3-NEXT:    pand %xmm13, %xmm6
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm12[1,1,3,3]
-; SSSE3-NEXT:    por %xmm6, %xmm3
-; SSSE3-NEXT:    pand %xmm3, %xmm10
-; SSSE3-NEXT:    pandn %xmm2, %xmm3
-; SSSE3-NEXT:    por %xmm10, %xmm3
-; SSSE3-NEXT:    packuswb %xmm7, %xmm3
-; SSSE3-NEXT:    movdqa %xmm5, %xmm2
-; SSSE3-NEXT:    pxor %xmm11, %xmm2
-; SSSE3-NEXT:    movdqa %xmm9, %xmm6
-; SSSE3-NEXT:    por %xmm11, %xmm6
-; SSSE3-NEXT:    movdqa %xmm2, %xmm7
-; SSSE3-NEXT:    pcmpgtd %xmm6, %xmm7
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2]
-; SSSE3-NEXT:    pcmpeqd %xmm2, %xmm6
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
-; SSSE3-NEXT:    pand %xmm10, %xmm2
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
-; SSSE3-NEXT:    por %xmm2, %xmm6
-; SSSE3-NEXT:    pand %xmm6, %xmm9
-; SSSE3-NEXT:    pandn %xmm5, %xmm6
-; SSSE3-NEXT:    por %xmm9, %xmm6
-; SSSE3-NEXT:    movdqa %xmm4, %xmm2
-; SSSE3-NEXT:    pxor %xmm11, %xmm2
-; SSSE3-NEXT:    por %xmm8, %xmm11
-; SSSE3-NEXT:    movdqa %xmm2, %xmm5
-; SSSE3-NEXT:    pcmpgtd %xmm11, %xmm5
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2]
-; SSSE3-NEXT:    pcmpeqd %xmm2, %xmm11
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm11[1,1,3,3]
-; SSSE3-NEXT:    pand %xmm7, %xmm2
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
-; SSSE3-NEXT:    por %xmm2, %xmm5
-; SSSE3-NEXT:    pand %xmm5, %xmm8
-; SSSE3-NEXT:    pandn %xmm4, %xmm5
-; SSSE3-NEXT:    por %xmm8, %xmm5
-; SSSE3-NEXT:    packuswb %xmm6, %xmm5
-; SSSE3-NEXT:    psubd %xmm3, %xmm0
-; SSSE3-NEXT:    psubd %xmm5, %xmm1
-; SSSE3-NEXT:    retq
+; SSE2OR3-LABEL: test34:
+; SSE2OR3:       # %bb.0:
+; SSE2OR3-NEXT:    movdqa {{.*#+}} xmm6 = [1,1,1,1]
+; SSE2OR3-NEXT:    pand %xmm6, %xmm0
+; SSE2OR3-NEXT:    pand %xmm6, %xmm1
+; SSE2OR3-NEXT:    pxor %xmm7, %xmm7
+; SSE2OR3-NEXT:    movdqa %xmm1, %xmm8
+; SSE2OR3-NEXT:    punpckldq {{.*#+}} xmm8 = xmm8[0],xmm7[0],xmm8[1],xmm7[1]
+; SSE2OR3-NEXT:    movdqa %xmm1, %xmm9
+; SSE2OR3-NEXT:    punpckhdq {{.*#+}} xmm9 = xmm9[2],xmm7[2],xmm9[3],xmm7[3]
+; SSE2OR3-NEXT:    movdqa %xmm0, %xmm10
+; SSE2OR3-NEXT:    punpckldq {{.*#+}} xmm10 = xmm10[0],xmm7[0],xmm10[1],xmm7[1]
+; SSE2OR3-NEXT:    movdqa %xmm0, %xmm12
+; SSE2OR3-NEXT:    punpckhdq {{.*#+}} xmm12 = xmm12[2],xmm7[2],xmm12[3],xmm7[3]
+; SSE2OR3-NEXT:    movdqa {{.*#+}} xmm11 = [9223372039002259456,9223372039002259456]
+; SSE2OR3-NEXT:    movdqa %xmm3, %xmm7
+; SSE2OR3-NEXT:    pxor %xmm11, %xmm7
+; SSE2OR3-NEXT:    movdqa %xmm12, %xmm6
+; SSE2OR3-NEXT:    por %xmm11, %xmm6
+; SSE2OR3-NEXT:    movdqa %xmm7, %xmm13
+; SSE2OR3-NEXT:    pcmpgtd %xmm6, %xmm13
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2OR3-NEXT:    pcmpeqd %xmm7, %xmm6
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE2OR3-NEXT:    pand %xmm14, %xmm6
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm7 = xmm13[1,1,3,3]
+; SSE2OR3-NEXT:    por %xmm6, %xmm7
+; SSE2OR3-NEXT:    pand %xmm7, %xmm12
+; SSE2OR3-NEXT:    pandn %xmm3, %xmm7
+; SSE2OR3-NEXT:    por %xmm12, %xmm7
+; SSE2OR3-NEXT:    movdqa %xmm2, %xmm3
+; SSE2OR3-NEXT:    pxor %xmm11, %xmm3
+; SSE2OR3-NEXT:    movdqa %xmm10, %xmm6
+; SSE2OR3-NEXT:    por %xmm11, %xmm6
+; SSE2OR3-NEXT:    movdqa %xmm3, %xmm12
+; SSE2OR3-NEXT:    pcmpgtd %xmm6, %xmm12
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm13 = xmm12[0,0,2,2]
+; SSE2OR3-NEXT:    pcmpeqd %xmm3, %xmm6
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE2OR3-NEXT:    pand %xmm13, %xmm6
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm3 = xmm12[1,1,3,3]
+; SSE2OR3-NEXT:    por %xmm6, %xmm3
+; SSE2OR3-NEXT:    pand %xmm3, %xmm10
+; SSE2OR3-NEXT:    pandn %xmm2, %xmm3
+; SSE2OR3-NEXT:    por %xmm10, %xmm3
+; SSE2OR3-NEXT:    packuswb %xmm7, %xmm3
+; SSE2OR3-NEXT:    movdqa %xmm5, %xmm2
+; SSE2OR3-NEXT:    pxor %xmm11, %xmm2
+; SSE2OR3-NEXT:    movdqa %xmm9, %xmm6
+; SSE2OR3-NEXT:    por %xmm11, %xmm6
+; SSE2OR3-NEXT:    movdqa %xmm2, %xmm7
+; SSE2OR3-NEXT:    pcmpgtd %xmm6, %xmm7
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2]
+; SSE2OR3-NEXT:    pcmpeqd %xmm2, %xmm6
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
+; SSE2OR3-NEXT:    pand %xmm10, %xmm2
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2OR3-NEXT:    por %xmm2, %xmm6
+; SSE2OR3-NEXT:    pand %xmm6, %xmm9
+; SSE2OR3-NEXT:    pandn %xmm5, %xmm6
+; SSE2OR3-NEXT:    por %xmm9, %xmm6
+; SSE2OR3-NEXT:    movdqa %xmm4, %xmm2
+; SSE2OR3-NEXT:    pxor %xmm11, %xmm2
+; SSE2OR3-NEXT:    por %xmm8, %xmm11
+; SSE2OR3-NEXT:    movdqa %xmm2, %xmm5
+; SSE2OR3-NEXT:    pcmpgtd %xmm11, %xmm5
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2]
+; SSE2OR3-NEXT:    pcmpeqd %xmm2, %xmm11
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm2 = xmm11[1,1,3,3]
+; SSE2OR3-NEXT:    pand %xmm7, %xmm2
+; SSE2OR3-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE2OR3-NEXT:    por %xmm2, %xmm5
+; SSE2OR3-NEXT:    pand %xmm5, %xmm8
+; SSE2OR3-NEXT:    pandn %xmm4, %xmm5
+; SSE2OR3-NEXT:    por %xmm8, %xmm5
+; SSE2OR3-NEXT:    packuswb %xmm6, %xmm5
+; SSE2OR3-NEXT:    psubd %xmm3, %xmm0
+; SSE2OR3-NEXT:    psubd %xmm5, %xmm1
+; SSE2OR3-NEXT:    retq
 ;
 ; SSE41-LABEL: test34:
 ; SSE41:       # %bb.0:


        


More information about the llvm-commits mailing list