[llvm] 65292fe - [X86] Add SSE2+SSE3 common check prefix to psubus tests
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 15 06:07:29 PST 2021
Author: Simon Pilgrim
Date: 2021-02-15T14:07:11Z
New Revision: 65292fe3a2101a5ce9b01f089cdc077320e53b13
URL: https://github.com/llvm/llvm-project/commit/65292fe3a2101a5ce9b01f089cdc077320e53b13
DIFF: https://github.com/llvm/llvm-project/commit/65292fe3a2101a5ce9b01f089cdc077320e53b13.diff
LOG: [X86] Add SSE2+SSE3 common check prefix to psubus tests
Noticed by @pengfei on D96703
Added:
Modified:
llvm/test/CodeGen/X86/psubus.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/psubus.ll b/llvm/test/CodeGen/X86/psubus.ll
index 098ecc158e0c..d2d8176838fc 100644
--- a/llvm/test/CodeGen/X86/psubus.ll
+++ b/llvm/test/CodeGen/X86/psubus.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2OR3,SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSE2OR3,SSSE3
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW
@@ -602,95 +602,50 @@ vector.ph:
; FIXME: match this to UMIN+TRUNC+PSUBUS
define <16 x i8> @test14(<16 x i8> %x, <16 x i32> %y) nounwind {
-; SSE2-LABEL: test14:
-; SSE2: # %bb.0: # %vector.ph
-; SSE2-NEXT: pxor %xmm8, %xmm8
-; SSE2-NEXT: movdqa %xmm0, %xmm6
-; SSE2-NEXT: movdqa %xmm4, %xmm9
-; SSE2-NEXT: movdqa %xmm3, %xmm10
-; SSE2-NEXT: movdqa %xmm2, %xmm7
-; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
-; SSE2-NEXT: pand %xmm5, %xmm4
-; SSE2-NEXT: pand %xmm5, %xmm3
-; SSE2-NEXT: packuswb %xmm4, %xmm3
-; SSE2-NEXT: movdqa %xmm1, %xmm4
-; SSE2-NEXT: pand %xmm5, %xmm2
-; SSE2-NEXT: pand %xmm5, %xmm1
-; SSE2-NEXT: packuswb %xmm2, %xmm1
-; SSE2-NEXT: packuswb %xmm3, %xmm1
-; SSE2-NEXT: psubb %xmm0, %xmm1
-; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1],xmm2[2],xmm8[2],xmm2[3],xmm8[3],xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7]
-; SSE2-NEXT: movdqa %xmm2, %xmm0
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm8[0],xmm0[1],xmm8[1],xmm0[2],xmm8[2],xmm0[3],xmm8[3]
-; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7]
-; SSE2-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm8[8],xmm6[9],xmm8[9],xmm6[10],xmm8[10],xmm6[11],xmm8[11],xmm6[12],xmm8[12],xmm6[13],xmm8[13],xmm6[14],xmm8[14],xmm6[15],xmm8[15]
-; SSE2-NEXT: movdqa %xmm6, %xmm3
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3]
-; SSE2-NEXT: punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm8[4],xmm6[5],xmm8[5],xmm6[6],xmm8[6],xmm6[7],xmm8[7]
-; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
-; SSE2-NEXT: pxor %xmm5, %xmm9
-; SSE2-NEXT: por %xmm5, %xmm6
-; SSE2-NEXT: pcmpgtd %xmm9, %xmm6
-; SSE2-NEXT: pxor %xmm5, %xmm10
-; SSE2-NEXT: por %xmm5, %xmm3
-; SSE2-NEXT: pcmpgtd %xmm10, %xmm3
-; SSE2-NEXT: packssdw %xmm6, %xmm3
-; SSE2-NEXT: pxor %xmm5, %xmm7
-; SSE2-NEXT: por %xmm5, %xmm2
-; SSE2-NEXT: pcmpgtd %xmm7, %xmm2
-; SSE2-NEXT: pxor %xmm5, %xmm4
-; SSE2-NEXT: por %xmm5, %xmm0
-; SSE2-NEXT: pcmpgtd %xmm4, %xmm0
-; SSE2-NEXT: packssdw %xmm2, %xmm0
-; SSE2-NEXT: packsswb %xmm3, %xmm0
-; SSE2-NEXT: pandn %xmm1, %xmm0
-; SSE2-NEXT: retq
-;
-; SSSE3-LABEL: test14:
-; SSSE3: # %bb.0: # %vector.ph
-; SSSE3-NEXT: pxor %xmm8, %xmm8
-; SSSE3-NEXT: movdqa %xmm0, %xmm6
-; SSSE3-NEXT: movdqa %xmm4, %xmm9
-; SSSE3-NEXT: movdqa %xmm3, %xmm10
-; SSSE3-NEXT: movdqa %xmm2, %xmm7
-; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
-; SSSE3-NEXT: pand %xmm5, %xmm4
-; SSSE3-NEXT: pand %xmm5, %xmm3
-; SSSE3-NEXT: packuswb %xmm4, %xmm3
-; SSSE3-NEXT: movdqa %xmm1, %xmm4
-; SSSE3-NEXT: pand %xmm5, %xmm2
-; SSSE3-NEXT: pand %xmm5, %xmm1
-; SSSE3-NEXT: packuswb %xmm2, %xmm1
-; SSSE3-NEXT: packuswb %xmm3, %xmm1
-; SSSE3-NEXT: psubb %xmm0, %xmm1
-; SSSE3-NEXT: movdqa %xmm0, %xmm2
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1],xmm2[2],xmm8[2],xmm2[3],xmm8[3],xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7]
-; SSSE3-NEXT: movdqa %xmm2, %xmm0
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm8[0],xmm0[1],xmm8[1],xmm0[2],xmm8[2],xmm0[3],xmm8[3]
-; SSSE3-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7]
-; SSSE3-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm8[8],xmm6[9],xmm8[9],xmm6[10],xmm8[10],xmm6[11],xmm8[11],xmm6[12],xmm8[12],xmm6[13],xmm8[13],xmm6[14],xmm8[14],xmm6[15],xmm8[15]
-; SSSE3-NEXT: movdqa %xmm6, %xmm3
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3]
-; SSSE3-NEXT: punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm8[4],xmm6[5],xmm8[5],xmm6[6],xmm8[6],xmm6[7],xmm8[7]
-; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
-; SSSE3-NEXT: pxor %xmm5, %xmm9
-; SSSE3-NEXT: por %xmm5, %xmm6
-; SSSE3-NEXT: pcmpgtd %xmm9, %xmm6
-; SSSE3-NEXT: pxor %xmm5, %xmm10
-; SSSE3-NEXT: por %xmm5, %xmm3
-; SSSE3-NEXT: pcmpgtd %xmm10, %xmm3
-; SSSE3-NEXT: packssdw %xmm6, %xmm3
-; SSSE3-NEXT: pxor %xmm5, %xmm7
-; SSSE3-NEXT: por %xmm5, %xmm2
-; SSSE3-NEXT: pcmpgtd %xmm7, %xmm2
-; SSSE3-NEXT: pxor %xmm5, %xmm4
-; SSSE3-NEXT: por %xmm5, %xmm0
-; SSSE3-NEXT: pcmpgtd %xmm4, %xmm0
-; SSSE3-NEXT: packssdw %xmm2, %xmm0
-; SSSE3-NEXT: packsswb %xmm3, %xmm0
-; SSSE3-NEXT: pandn %xmm1, %xmm0
-; SSSE3-NEXT: retq
+; SSE2OR3-LABEL: test14:
+; SSE2OR3: # %bb.0: # %vector.ph
+; SSE2OR3-NEXT: pxor %xmm8, %xmm8
+; SSE2OR3-NEXT: movdqa %xmm0, %xmm6
+; SSE2OR3-NEXT: movdqa %xmm4, %xmm9
+; SSE2OR3-NEXT: movdqa %xmm3, %xmm10
+; SSE2OR3-NEXT: movdqa %xmm2, %xmm7
+; SSE2OR3-NEXT: movdqa {{.*#+}} xmm5 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; SSE2OR3-NEXT: pand %xmm5, %xmm4
+; SSE2OR3-NEXT: pand %xmm5, %xmm3
+; SSE2OR3-NEXT: packuswb %xmm4, %xmm3
+; SSE2OR3-NEXT: movdqa %xmm1, %xmm4
+; SSE2OR3-NEXT: pand %xmm5, %xmm2
+; SSE2OR3-NEXT: pand %xmm5, %xmm1
+; SSE2OR3-NEXT: packuswb %xmm2, %xmm1
+; SSE2OR3-NEXT: packuswb %xmm3, %xmm1
+; SSE2OR3-NEXT: psubb %xmm0, %xmm1
+; SSE2OR3-NEXT: movdqa %xmm0, %xmm2
+; SSE2OR3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1],xmm2[2],xmm8[2],xmm2[3],xmm8[3],xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7]
+; SSE2OR3-NEXT: movdqa %xmm2, %xmm0
+; SSE2OR3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm8[0],xmm0[1],xmm8[1],xmm0[2],xmm8[2],xmm0[3],xmm8[3]
+; SSE2OR3-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7]
+; SSE2OR3-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm8[8],xmm6[9],xmm8[9],xmm6[10],xmm8[10],xmm6[11],xmm8[11],xmm6[12],xmm8[12],xmm6[13],xmm8[13],xmm6[14],xmm8[14],xmm6[15],xmm8[15]
+; SSE2OR3-NEXT: movdqa %xmm6, %xmm3
+; SSE2OR3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3]
+; SSE2OR3-NEXT: punpckhwd {{.*#+}} xmm6 = xmm6[4],xmm8[4],xmm6[5],xmm8[5],xmm6[6],xmm8[6],xmm6[7],xmm8[7]
+; SSE2OR3-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2OR3-NEXT: pxor %xmm5, %xmm9
+; SSE2OR3-NEXT: por %xmm5, %xmm6
+; SSE2OR3-NEXT: pcmpgtd %xmm9, %xmm6
+; SSE2OR3-NEXT: pxor %xmm5, %xmm10
+; SSE2OR3-NEXT: por %xmm5, %xmm3
+; SSE2OR3-NEXT: pcmpgtd %xmm10, %xmm3
+; SSE2OR3-NEXT: packssdw %xmm6, %xmm3
+; SSE2OR3-NEXT: pxor %xmm5, %xmm7
+; SSE2OR3-NEXT: por %xmm5, %xmm2
+; SSE2OR3-NEXT: pcmpgtd %xmm7, %xmm2
+; SSE2OR3-NEXT: pxor %xmm5, %xmm4
+; SSE2OR3-NEXT: por %xmm5, %xmm0
+; SSE2OR3-NEXT: pcmpgtd %xmm4, %xmm0
+; SSE2OR3-NEXT: packssdw %xmm2, %xmm0
+; SSE2OR3-NEXT: packsswb %xmm3, %xmm0
+; SSE2OR3-NEXT: pandn %xmm1, %xmm0
+; SSE2OR3-NEXT: retq
;
; SSE41-LABEL: test14:
; SSE41: # %bb.0: # %vector.ph
@@ -1475,145 +1430,75 @@ vector.ph:
}
define <8 x i16> @psubus_8i64_max(<8 x i16> %x, <8 x i64> %y) nounwind {
-; SSE2-LABEL: psubus_8i64_max:
-; SSE2: # %bb.0: # %vector.ph
-; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456]
-; SSE2-NEXT: movdqa %xmm2, %xmm7
-; SSE2-NEXT: pxor %xmm5, %xmm7
-; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [9223372039002324991,9223372039002324991]
-; SSE2-NEXT: movdqa %xmm8, %xmm6
-; SSE2-NEXT: pcmpgtd %xmm7, %xmm6
-; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm6[0,0,2,2]
-; SSE2-NEXT: pcmpeqd %xmm8, %xmm7
-; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
-; SSE2-NEXT: pand %xmm9, %xmm7
-; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
-; SSE2-NEXT: por %xmm7, %xmm6
-; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [65535,65535]
-; SSE2-NEXT: pand %xmm6, %xmm2
-; SSE2-NEXT: pandn %xmm9, %xmm6
-; SSE2-NEXT: por %xmm2, %xmm6
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[0,2,2,3]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm10 = xmm2[0,2,2,3,4,5,6,7]
-; SSE2-NEXT: movdqa %xmm1, %xmm6
-; SSE2-NEXT: pxor %xmm5, %xmm6
-; SSE2-NEXT: movdqa %xmm8, %xmm7
-; SSE2-NEXT: pcmpgtd %xmm6, %xmm7
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm7[0,0,2,2]
-; SSE2-NEXT: pcmpeqd %xmm8, %xmm6
-; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
-; SSE2-NEXT: pand %xmm2, %xmm6
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm7[1,1,3,3]
-; SSE2-NEXT: por %xmm6, %xmm2
-; SSE2-NEXT: pand %xmm2, %xmm1
-; SSE2-NEXT: pandn %xmm9, %xmm2
-; SSE2-NEXT: por %xmm1, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm10[0],xmm1[1],xmm10[1]
-; SSE2-NEXT: movdqa %xmm4, %xmm2
-; SSE2-NEXT: pxor %xmm5, %xmm2
-; SSE2-NEXT: movdqa %xmm8, %xmm6
-; SSE2-NEXT: pcmpgtd %xmm2, %xmm6
-; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
-; SSE2-NEXT: pcmpeqd %xmm8, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; SSE2-NEXT: pand %xmm7, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
-; SSE2-NEXT: por %xmm2, %xmm6
-; SSE2-NEXT: pand %xmm6, %xmm4
-; SSE2-NEXT: pandn %xmm9, %xmm6
-; SSE2-NEXT: por %xmm4, %xmm6
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[0,2,2,3]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7]
-; SSE2-NEXT: pxor %xmm3, %xmm5
-; SSE2-NEXT: movdqa %xmm8, %xmm4
-; SSE2-NEXT: pcmpgtd %xmm5, %xmm4
-; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
-; SSE2-NEXT: pcmpeqd %xmm8, %xmm5
-; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
-; SSE2-NEXT: pand %xmm6, %xmm5
-; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSE2-NEXT: por %xmm5, %xmm4
-; SSE2-NEXT: pand %xmm4, %xmm3
-; SSE2-NEXT: pandn %xmm9, %xmm4
-; SSE2-NEXT: por %xmm3, %xmm4
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
-; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
-; SSE2-NEXT: psubusw %xmm3, %xmm0
-; SSE2-NEXT: retq
-;
-; SSSE3-LABEL: psubus_8i64_max:
-; SSSE3: # %bb.0: # %vector.ph
-; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456]
-; SSSE3-NEXT: movdqa %xmm2, %xmm7
-; SSSE3-NEXT: pxor %xmm5, %xmm7
-; SSSE3-NEXT: movdqa {{.*#+}} xmm8 = [9223372039002324991,9223372039002324991]
-; SSSE3-NEXT: movdqa %xmm8, %xmm6
-; SSSE3-NEXT: pcmpgtd %xmm7, %xmm6
-; SSSE3-NEXT: pshufd {{.*#+}} xmm9 = xmm6[0,0,2,2]
-; SSSE3-NEXT: pcmpeqd %xmm8, %xmm7
-; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
-; SSSE3-NEXT: pand %xmm9, %xmm7
-; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
-; SSSE3-NEXT: por %xmm7, %xmm6
-; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [65535,65535]
-; SSSE3-NEXT: pand %xmm6, %xmm2
-; SSSE3-NEXT: pandn %xmm9, %xmm6
-; SSSE3-NEXT: por %xmm2, %xmm6
-; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm6[0,2,2,3]
-; SSSE3-NEXT: pshuflw {{.*#+}} xmm10 = xmm2[0,2,2,3,4,5,6,7]
-; SSSE3-NEXT: movdqa %xmm1, %xmm6
-; SSSE3-NEXT: pxor %xmm5, %xmm6
-; SSSE3-NEXT: movdqa %xmm8, %xmm7
-; SSSE3-NEXT: pcmpgtd %xmm6, %xmm7
-; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm7[0,0,2,2]
-; SSSE3-NEXT: pcmpeqd %xmm8, %xmm6
-; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
-; SSSE3-NEXT: pand %xmm2, %xmm6
-; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm7[1,1,3,3]
-; SSSE3-NEXT: por %xmm6, %xmm2
-; SSSE3-NEXT: pand %xmm2, %xmm1
-; SSSE3-NEXT: pandn %xmm9, %xmm2
-; SSSE3-NEXT: por %xmm1, %xmm2
-; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
-; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm10[0],xmm1[1],xmm10[1]
-; SSSE3-NEXT: movdqa %xmm4, %xmm2
-; SSSE3-NEXT: pxor %xmm5, %xmm2
-; SSSE3-NEXT: movdqa %xmm8, %xmm6
-; SSSE3-NEXT: pcmpgtd %xmm2, %xmm6
-; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
-; SSSE3-NEXT: pcmpeqd %xmm8, %xmm2
-; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; SSSE3-NEXT: pand %xmm7, %xmm2
-; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
-; SSSE3-NEXT: por %xmm2, %xmm6
-; SSSE3-NEXT: pand %xmm6, %xmm4
-; SSSE3-NEXT: pandn %xmm9, %xmm6
-; SSSE3-NEXT: por %xmm4, %xmm6
-; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm6[0,2,2,3]
-; SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7]
-; SSSE3-NEXT: pxor %xmm3, %xmm5
-; SSSE3-NEXT: movdqa %xmm8, %xmm4
-; SSSE3-NEXT: pcmpgtd %xmm5, %xmm4
-; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
-; SSSE3-NEXT: pcmpeqd %xmm8, %xmm5
-; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
-; SSSE3-NEXT: pand %xmm6, %xmm5
-; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSSE3-NEXT: por %xmm5, %xmm4
-; SSSE3-NEXT: pand %xmm4, %xmm3
-; SSSE3-NEXT: pandn %xmm9, %xmm4
-; SSSE3-NEXT: por %xmm3, %xmm4
-; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3]
-; SSSE3-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7]
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
-; SSSE3-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
-; SSSE3-NEXT: psubusw %xmm3, %xmm0
-; SSSE3-NEXT: retq
+; SSE2OR3-LABEL: psubus_8i64_max:
+; SSE2OR3: # %bb.0: # %vector.ph
+; SSE2OR3-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456]
+; SSE2OR3-NEXT: movdqa %xmm2, %xmm7
+; SSE2OR3-NEXT: pxor %xmm5, %xmm7
+; SSE2OR3-NEXT: movdqa {{.*#+}} xmm8 = [9223372039002324991,9223372039002324991]
+; SSE2OR3-NEXT: movdqa %xmm8, %xmm6
+; SSE2OR3-NEXT: pcmpgtd %xmm7, %xmm6
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm9 = xmm6[0,0,2,2]
+; SSE2OR3-NEXT: pcmpeqd %xmm8, %xmm7
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
+; SSE2OR3-NEXT: pand %xmm9, %xmm7
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE2OR3-NEXT: por %xmm7, %xmm6
+; SSE2OR3-NEXT: movdqa {{.*#+}} xmm9 = [65535,65535]
+; SSE2OR3-NEXT: pand %xmm6, %xmm2
+; SSE2OR3-NEXT: pandn %xmm9, %xmm6
+; SSE2OR3-NEXT: por %xmm2, %xmm6
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm2 = xmm6[0,2,2,3]
+; SSE2OR3-NEXT: pshuflw {{.*#+}} xmm10 = xmm2[0,2,2,3,4,5,6,7]
+; SSE2OR3-NEXT: movdqa %xmm1, %xmm6
+; SSE2OR3-NEXT: pxor %xmm5, %xmm6
+; SSE2OR3-NEXT: movdqa %xmm8, %xmm7
+; SSE2OR3-NEXT: pcmpgtd %xmm6, %xmm7
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm2 = xmm7[0,0,2,2]
+; SSE2OR3-NEXT: pcmpeqd %xmm8, %xmm6
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE2OR3-NEXT: pand %xmm2, %xmm6
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm2 = xmm7[1,1,3,3]
+; SSE2OR3-NEXT: por %xmm6, %xmm2
+; SSE2OR3-NEXT: pand %xmm2, %xmm1
+; SSE2OR3-NEXT: pandn %xmm9, %xmm2
+; SSE2OR3-NEXT: por %xmm1, %xmm2
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; SSE2OR3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
+; SSE2OR3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm10[0],xmm1[1],xmm10[1]
+; SSE2OR3-NEXT: movdqa %xmm4, %xmm2
+; SSE2OR3-NEXT: pxor %xmm5, %xmm2
+; SSE2OR3-NEXT: movdqa %xmm8, %xmm6
+; SSE2OR3-NEXT: pcmpgtd %xmm2, %xmm6
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
+; SSE2OR3-NEXT: pcmpeqd %xmm8, %xmm2
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2OR3-NEXT: pand %xmm7, %xmm2
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE2OR3-NEXT: por %xmm2, %xmm6
+; SSE2OR3-NEXT: pand %xmm6, %xmm4
+; SSE2OR3-NEXT: pandn %xmm9, %xmm6
+; SSE2OR3-NEXT: por %xmm4, %xmm6
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm2 = xmm6[0,2,2,3]
+; SSE2OR3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7]
+; SSE2OR3-NEXT: pxor %xmm3, %xmm5
+; SSE2OR3-NEXT: movdqa %xmm8, %xmm4
+; SSE2OR3-NEXT: pcmpgtd %xmm5, %xmm4
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
+; SSE2OR3-NEXT: pcmpeqd %xmm8, %xmm5
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE2OR3-NEXT: pand %xmm6, %xmm5
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
+; SSE2OR3-NEXT: por %xmm5, %xmm4
+; SSE2OR3-NEXT: pand %xmm4, %xmm3
+; SSE2OR3-NEXT: pandn %xmm9, %xmm4
+; SSE2OR3-NEXT: por %xmm3, %xmm4
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3]
+; SSE2OR3-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7]
+; SSE2OR3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; SSE2OR3-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
+; SSE2OR3-NEXT: psubusw %xmm3, %xmm0
+; SSE2OR3-NEXT: retq
;
; SSE41-LABEL: psubus_8i64_max:
; SSE41: # %bb.0: # %vector.ph
@@ -1730,95 +1615,50 @@ vector.ph:
}
define <16 x i16> @psubus_16i32_max(<16 x i16> %x, <16 x i32> %y) nounwind {
-; SSE2-LABEL: psubus_16i32_max:
-; SSE2: # %bb.0: # %vector.ph
-; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [2147483648,2147483648,2147483648,2147483648]
-; SSE2-NEXT: movdqa %xmm3, %xmm8
-; SSE2-NEXT: pxor %xmm9, %xmm8
-; SSE2-NEXT: movdqa {{.*#+}} xmm10 = [2147549183,2147549183,2147549183,2147549183]
-; SSE2-NEXT: movdqa %xmm10, %xmm6
-; SSE2-NEXT: pcmpgtd %xmm8, %xmm6
-; SSE2-NEXT: pcmpeqd %xmm8, %xmm8
-; SSE2-NEXT: pand %xmm6, %xmm3
-; SSE2-NEXT: pxor %xmm8, %xmm6
-; SSE2-NEXT: por %xmm3, %xmm6
-; SSE2-NEXT: pslld $16, %xmm6
-; SSE2-NEXT: psrad $16, %xmm6
-; SSE2-NEXT: movdqa %xmm2, %xmm3
-; SSE2-NEXT: pxor %xmm9, %xmm3
-; SSE2-NEXT: movdqa %xmm10, %xmm7
-; SSE2-NEXT: pcmpgtd %xmm3, %xmm7
-; SSE2-NEXT: pand %xmm7, %xmm2
-; SSE2-NEXT: pxor %xmm8, %xmm7
-; SSE2-NEXT: por %xmm2, %xmm7
-; SSE2-NEXT: pslld $16, %xmm7
-; SSE2-NEXT: psrad $16, %xmm7
-; SSE2-NEXT: packssdw %xmm6, %xmm7
-; SSE2-NEXT: psubusw %xmm7, %xmm0
-; SSE2-NEXT: movdqa %xmm5, %xmm2
-; SSE2-NEXT: pxor %xmm9, %xmm2
-; SSE2-NEXT: movdqa %xmm10, %xmm3
-; SSE2-NEXT: pcmpgtd %xmm2, %xmm3
-; SSE2-NEXT: pand %xmm3, %xmm5
-; SSE2-NEXT: pxor %xmm8, %xmm3
-; SSE2-NEXT: por %xmm5, %xmm3
-; SSE2-NEXT: pslld $16, %xmm3
-; SSE2-NEXT: psrad $16, %xmm3
-; SSE2-NEXT: pxor %xmm4, %xmm9
-; SSE2-NEXT: pcmpgtd %xmm9, %xmm10
-; SSE2-NEXT: pxor %xmm10, %xmm8
-; SSE2-NEXT: pand %xmm4, %xmm10
-; SSE2-NEXT: por %xmm8, %xmm10
-; SSE2-NEXT: pslld $16, %xmm10
-; SSE2-NEXT: psrad $16, %xmm10
-; SSE2-NEXT: packssdw %xmm3, %xmm10
-; SSE2-NEXT: psubusw %xmm10, %xmm1
-; SSE2-NEXT: retq
-;
-; SSSE3-LABEL: psubus_16i32_max:
-; SSSE3: # %bb.0: # %vector.ph
-; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = [2147483648,2147483648,2147483648,2147483648]
-; SSSE3-NEXT: movdqa %xmm3, %xmm8
-; SSSE3-NEXT: pxor %xmm9, %xmm8
-; SSSE3-NEXT: movdqa {{.*#+}} xmm10 = [2147549183,2147549183,2147549183,2147549183]
-; SSSE3-NEXT: movdqa %xmm10, %xmm6
-; SSSE3-NEXT: pcmpgtd %xmm8, %xmm6
-; SSSE3-NEXT: pcmpeqd %xmm8, %xmm8
-; SSSE3-NEXT: pand %xmm6, %xmm3
-; SSSE3-NEXT: pxor %xmm8, %xmm6
-; SSSE3-NEXT: por %xmm3, %xmm6
-; SSSE3-NEXT: pslld $16, %xmm6
-; SSSE3-NEXT: psrad $16, %xmm6
-; SSSE3-NEXT: movdqa %xmm2, %xmm3
-; SSSE3-NEXT: pxor %xmm9, %xmm3
-; SSSE3-NEXT: movdqa %xmm10, %xmm7
-; SSSE3-NEXT: pcmpgtd %xmm3, %xmm7
-; SSSE3-NEXT: pand %xmm7, %xmm2
-; SSSE3-NEXT: pxor %xmm8, %xmm7
-; SSSE3-NEXT: por %xmm2, %xmm7
-; SSSE3-NEXT: pslld $16, %xmm7
-; SSSE3-NEXT: psrad $16, %xmm7
-; SSSE3-NEXT: packssdw %xmm6, %xmm7
-; SSSE3-NEXT: psubusw %xmm7, %xmm0
-; SSSE3-NEXT: movdqa %xmm5, %xmm2
-; SSSE3-NEXT: pxor %xmm9, %xmm2
-; SSSE3-NEXT: movdqa %xmm10, %xmm3
-; SSSE3-NEXT: pcmpgtd %xmm2, %xmm3
-; SSSE3-NEXT: pand %xmm3, %xmm5
-; SSSE3-NEXT: pxor %xmm8, %xmm3
-; SSSE3-NEXT: por %xmm5, %xmm3
-; SSSE3-NEXT: pslld $16, %xmm3
-; SSSE3-NEXT: psrad $16, %xmm3
-; SSSE3-NEXT: pxor %xmm4, %xmm9
-; SSSE3-NEXT: pcmpgtd %xmm9, %xmm10
-; SSSE3-NEXT: pxor %xmm10, %xmm8
-; SSSE3-NEXT: pand %xmm4, %xmm10
-; SSSE3-NEXT: por %xmm8, %xmm10
-; SSSE3-NEXT: pslld $16, %xmm10
-; SSSE3-NEXT: psrad $16, %xmm10
-; SSSE3-NEXT: packssdw %xmm3, %xmm10
-; SSSE3-NEXT: psubusw %xmm10, %xmm1
-; SSSE3-NEXT: retq
+; SSE2OR3-LABEL: psubus_16i32_max:
+; SSE2OR3: # %bb.0: # %vector.ph
+; SSE2OR3-NEXT: movdqa {{.*#+}} xmm9 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2OR3-NEXT: movdqa %xmm3, %xmm8
+; SSE2OR3-NEXT: pxor %xmm9, %xmm8
+; SSE2OR3-NEXT: movdqa {{.*#+}} xmm10 = [2147549183,2147549183,2147549183,2147549183]
+; SSE2OR3-NEXT: movdqa %xmm10, %xmm6
+; SSE2OR3-NEXT: pcmpgtd %xmm8, %xmm6
+; SSE2OR3-NEXT: pcmpeqd %xmm8, %xmm8
+; SSE2OR3-NEXT: pand %xmm6, %xmm3
+; SSE2OR3-NEXT: pxor %xmm8, %xmm6
+; SSE2OR3-NEXT: por %xmm3, %xmm6
+; SSE2OR3-NEXT: pslld $16, %xmm6
+; SSE2OR3-NEXT: psrad $16, %xmm6
+; SSE2OR3-NEXT: movdqa %xmm2, %xmm3
+; SSE2OR3-NEXT: pxor %xmm9, %xmm3
+; SSE2OR3-NEXT: movdqa %xmm10, %xmm7
+; SSE2OR3-NEXT: pcmpgtd %xmm3, %xmm7
+; SSE2OR3-NEXT: pand %xmm7, %xmm2
+; SSE2OR3-NEXT: pxor %xmm8, %xmm7
+; SSE2OR3-NEXT: por %xmm2, %xmm7
+; SSE2OR3-NEXT: pslld $16, %xmm7
+; SSE2OR3-NEXT: psrad $16, %xmm7
+; SSE2OR3-NEXT: packssdw %xmm6, %xmm7
+; SSE2OR3-NEXT: psubusw %xmm7, %xmm0
+; SSE2OR3-NEXT: movdqa %xmm5, %xmm2
+; SSE2OR3-NEXT: pxor %xmm9, %xmm2
+; SSE2OR3-NEXT: movdqa %xmm10, %xmm3
+; SSE2OR3-NEXT: pcmpgtd %xmm2, %xmm3
+; SSE2OR3-NEXT: pand %xmm3, %xmm5
+; SSE2OR3-NEXT: pxor %xmm8, %xmm3
+; SSE2OR3-NEXT: por %xmm5, %xmm3
+; SSE2OR3-NEXT: pslld $16, %xmm3
+; SSE2OR3-NEXT: psrad $16, %xmm3
+; SSE2OR3-NEXT: pxor %xmm4, %xmm9
+; SSE2OR3-NEXT: pcmpgtd %xmm9, %xmm10
+; SSE2OR3-NEXT: pxor %xmm10, %xmm8
+; SSE2OR3-NEXT: pand %xmm4, %xmm10
+; SSE2OR3-NEXT: por %xmm8, %xmm10
+; SSE2OR3-NEXT: pslld $16, %xmm10
+; SSE2OR3-NEXT: psrad $16, %xmm10
+; SSE2OR3-NEXT: packssdw %xmm3, %xmm10
+; SSE2OR3-NEXT: psubusw %xmm10, %xmm1
+; SSE2OR3-NEXT: retq
;
; SSE41-LABEL: psubus_16i32_max:
; SSE41: # %bb.0: # %vector.ph
@@ -2115,23 +1955,14 @@ define void @subus_v4i8(<4 x i8>* %p1, <4 x i8>* %p2) {
}
define void @subus_v2i8(<2 x i8>* %p1, <2 x i8>* %p2) {
-; SSE2-LABEL: subus_v2i8:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; SSE2-NEXT: psubusb %xmm1, %xmm0
-; SSE2-NEXT: movd %xmm0, %eax
-; SSE2-NEXT: movw %ax, (%rdi)
-; SSE2-NEXT: retq
-;
-; SSSE3-LABEL: subus_v2i8:
-; SSSE3: # %bb.0:
-; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; SSSE3-NEXT: psubusb %xmm1, %xmm0
-; SSSE3-NEXT: movd %xmm0, %eax
-; SSSE3-NEXT: movw %ax, (%rdi)
-; SSSE3-NEXT: retq
+; SSE2OR3-LABEL: subus_v2i8:
+; SSE2OR3: # %bb.0:
+; SSE2OR3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2OR3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; SSE2OR3-NEXT: psubusb %xmm1, %xmm0
+; SSE2OR3-NEXT: movd %xmm0, %eax
+; SSE2OR3-NEXT: movw %ax, (%rdi)
+; SSE2OR3-NEXT: retq
;
; SSE41-LABEL: subus_v2i8:
; SSE41: # %bb.0:
@@ -2663,159 +2494,82 @@ define <8 x i16> @test32(<8 x i16> %a0, <8 x i32> %a1) {
; v8i32/v8i64 - sub(x,trunc(umin(y,zext(x))))
define <8 x i32> @test33(<8 x i32> %a0, <8 x i64> %a1) {
-; SSE2-LABEL: test33:
-; SSE2: # %bb.0:
-; SSE2-NEXT: pxor %xmm7, %xmm7
-; SSE2-NEXT: movdqa %xmm1, %xmm8
-; SSE2-NEXT: punpckldq {{.*#+}} xmm8 = xmm8[0],xmm7[0],xmm8[1],xmm7[1]
-; SSE2-NEXT: movdqa %xmm1, %xmm9
-; SSE2-NEXT: punpckhdq {{.*#+}} xmm9 = xmm9[2],xmm7[2],xmm9[3],xmm7[3]
-; SSE2-NEXT: movdqa %xmm0, %xmm10
-; SSE2-NEXT: punpckldq {{.*#+}} xmm10 = xmm10[0],xmm7[0],xmm10[1],xmm7[1]
-; SSE2-NEXT: movdqa %xmm0, %xmm11
-; SSE2-NEXT: punpckhdq {{.*#+}} xmm11 = xmm11[2],xmm7[2],xmm11[3],xmm7[3]
-; SSE2-NEXT: movdqa {{.*#+}} xmm12 = [9223372039002259456,9223372039002259456]
-; SSE2-NEXT: movdqa %xmm3, %xmm6
-; SSE2-NEXT: pxor %xmm12, %xmm6
-; SSE2-NEXT: movdqa %xmm11, %xmm7
-; SSE2-NEXT: pxor %xmm12, %xmm7
-; SSE2-NEXT: movdqa %xmm7, %xmm13
-; SSE2-NEXT: pcmpgtd %xmm6, %xmm13
-; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
-; SSE2-NEXT: pcmpeqd %xmm6, %xmm7
-; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
-; SSE2-NEXT: pand %xmm14, %xmm6
-; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm13[1,1,3,3]
-; SSE2-NEXT: por %xmm6, %xmm7
-; SSE2-NEXT: pand %xmm7, %xmm3
-; SSE2-NEXT: pandn %xmm11, %xmm7
-; SSE2-NEXT: por %xmm3, %xmm7
-; SSE2-NEXT: movdqa %xmm2, %xmm3
-; SSE2-NEXT: pxor %xmm12, %xmm3
-; SSE2-NEXT: movdqa %xmm10, %xmm6
-; SSE2-NEXT: pxor %xmm12, %xmm6
-; SSE2-NEXT: movdqa %xmm6, %xmm11
-; SSE2-NEXT: pcmpgtd %xmm3, %xmm11
-; SSE2-NEXT: pshufd {{.*#+}} xmm13 = xmm11[0,0,2,2]
-; SSE2-NEXT: pcmpeqd %xmm3, %xmm6
-; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
-; SSE2-NEXT: pand %xmm13, %xmm6
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm11[1,1,3,3]
-; SSE2-NEXT: por %xmm6, %xmm3
-; SSE2-NEXT: pand %xmm3, %xmm2
-; SSE2-NEXT: pandn %xmm10, %xmm3
-; SSE2-NEXT: por %xmm2, %xmm3
-; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,2],xmm7[0,2]
-; SSE2-NEXT: movdqa %xmm5, %xmm2
-; SSE2-NEXT: pxor %xmm12, %xmm2
-; SSE2-NEXT: movdqa %xmm9, %xmm6
-; SSE2-NEXT: pxor %xmm12, %xmm6
-; SSE2-NEXT: movdqa %xmm6, %xmm7
-; SSE2-NEXT: pcmpgtd %xmm2, %xmm7
-; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2]
-; SSE2-NEXT: pcmpeqd %xmm2, %xmm6
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
-; SSE2-NEXT: pand %xmm10, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
-; SSE2-NEXT: por %xmm2, %xmm6
-; SSE2-NEXT: pand %xmm6, %xmm5
-; SSE2-NEXT: pandn %xmm9, %xmm6
-; SSE2-NEXT: por %xmm5, %xmm6
-; SSE2-NEXT: movdqa %xmm4, %xmm2
-; SSE2-NEXT: pxor %xmm12, %xmm2
-; SSE2-NEXT: pxor %xmm8, %xmm12
-; SSE2-NEXT: movdqa %xmm12, %xmm5
-; SSE2-NEXT: pcmpgtd %xmm2, %xmm5
-; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2]
-; SSE2-NEXT: pcmpeqd %xmm2, %xmm12
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm12[1,1,3,3]
-; SSE2-NEXT: pand %xmm7, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
-; SSE2-NEXT: por %xmm2, %xmm5
-; SSE2-NEXT: pand %xmm5, %xmm4
-; SSE2-NEXT: pandn %xmm8, %xmm5
-; SSE2-NEXT: por %xmm4, %xmm5
-; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,2],xmm6[0,2]
-; SSE2-NEXT: psubd %xmm3, %xmm0
-; SSE2-NEXT: psubd %xmm5, %xmm1
-; SSE2-NEXT: retq
-;
-; SSSE3-LABEL: test33:
-; SSSE3: # %bb.0:
-; SSSE3-NEXT: pxor %xmm7, %xmm7
-; SSSE3-NEXT: movdqa %xmm1, %xmm8
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm8 = xmm8[0],xmm7[0],xmm8[1],xmm7[1]
-; SSSE3-NEXT: movdqa %xmm1, %xmm9
-; SSSE3-NEXT: punpckhdq {{.*#+}} xmm9 = xmm9[2],xmm7[2],xmm9[3],xmm7[3]
-; SSSE3-NEXT: movdqa %xmm0, %xmm10
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm10 = xmm10[0],xmm7[0],xmm10[1],xmm7[1]
-; SSSE3-NEXT: movdqa %xmm0, %xmm11
-; SSSE3-NEXT: punpckhdq {{.*#+}} xmm11 = xmm11[2],xmm7[2],xmm11[3],xmm7[3]
-; SSSE3-NEXT: movdqa {{.*#+}} xmm12 = [9223372039002259456,9223372039002259456]
-; SSSE3-NEXT: movdqa %xmm3, %xmm6
-; SSSE3-NEXT: pxor %xmm12, %xmm6
-; SSSE3-NEXT: movdqa %xmm11, %xmm7
-; SSSE3-NEXT: pxor %xmm12, %xmm7
-; SSSE3-NEXT: movdqa %xmm7, %xmm13
-; SSSE3-NEXT: pcmpgtd %xmm6, %xmm13
-; SSSE3-NEXT: pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
-; SSSE3-NEXT: pcmpeqd %xmm6, %xmm7
-; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
-; SSSE3-NEXT: pand %xmm14, %xmm6
-; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm13[1,1,3,3]
-; SSSE3-NEXT: por %xmm6, %xmm7
-; SSSE3-NEXT: pand %xmm7, %xmm3
-; SSSE3-NEXT: pandn %xmm11, %xmm7
-; SSSE3-NEXT: por %xmm3, %xmm7
-; SSSE3-NEXT: movdqa %xmm2, %xmm3
-; SSSE3-NEXT: pxor %xmm12, %xmm3
-; SSSE3-NEXT: movdqa %xmm10, %xmm6
-; SSSE3-NEXT: pxor %xmm12, %xmm6
-; SSSE3-NEXT: movdqa %xmm6, %xmm11
-; SSSE3-NEXT: pcmpgtd %xmm3, %xmm11
-; SSSE3-NEXT: pshufd {{.*#+}} xmm13 = xmm11[0,0,2,2]
-; SSSE3-NEXT: pcmpeqd %xmm3, %xmm6
-; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
-; SSSE3-NEXT: pand %xmm13, %xmm6
-; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm11[1,1,3,3]
-; SSSE3-NEXT: por %xmm6, %xmm3
-; SSSE3-NEXT: pand %xmm3, %xmm2
-; SSSE3-NEXT: pandn %xmm10, %xmm3
-; SSSE3-NEXT: por %xmm2, %xmm3
-; SSSE3-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,2],xmm7[0,2]
-; SSSE3-NEXT: movdqa %xmm5, %xmm2
-; SSSE3-NEXT: pxor %xmm12, %xmm2
-; SSSE3-NEXT: movdqa %xmm9, %xmm6
-; SSSE3-NEXT: pxor %xmm12, %xmm6
-; SSSE3-NEXT: movdqa %xmm6, %xmm7
-; SSSE3-NEXT: pcmpgtd %xmm2, %xmm7
-; SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2]
-; SSSE3-NEXT: pcmpeqd %xmm2, %xmm6
-; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
-; SSSE3-NEXT: pand %xmm10, %xmm2
-; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
-; SSSE3-NEXT: por %xmm2, %xmm6
-; SSSE3-NEXT: pand %xmm6, %xmm5
-; SSSE3-NEXT: pandn %xmm9, %xmm6
-; SSSE3-NEXT: por %xmm5, %xmm6
-; SSSE3-NEXT: movdqa %xmm4, %xmm2
-; SSSE3-NEXT: pxor %xmm12, %xmm2
-; SSSE3-NEXT: pxor %xmm8, %xmm12
-; SSSE3-NEXT: movdqa %xmm12, %xmm5
-; SSSE3-NEXT: pcmpgtd %xmm2, %xmm5
-; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2]
-; SSSE3-NEXT: pcmpeqd %xmm2, %xmm12
-; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm12[1,1,3,3]
-; SSSE3-NEXT: pand %xmm7, %xmm2
-; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
-; SSSE3-NEXT: por %xmm2, %xmm5
-; SSSE3-NEXT: pand %xmm5, %xmm4
-; SSSE3-NEXT: pandn %xmm8, %xmm5
-; SSSE3-NEXT: por %xmm4, %xmm5
-; SSSE3-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,2],xmm6[0,2]
-; SSSE3-NEXT: psubd %xmm3, %xmm0
-; SSSE3-NEXT: psubd %xmm5, %xmm1
-; SSSE3-NEXT: retq
+; SSE2OR3-LABEL: test33:
+; SSE2OR3: # %bb.0:
+; SSE2OR3-NEXT: pxor %xmm7, %xmm7
+; SSE2OR3-NEXT: movdqa %xmm1, %xmm8
+; SSE2OR3-NEXT: punpckldq {{.*#+}} xmm8 = xmm8[0],xmm7[0],xmm8[1],xmm7[1]
+; SSE2OR3-NEXT: movdqa %xmm1, %xmm9
+; SSE2OR3-NEXT: punpckhdq {{.*#+}} xmm9 = xmm9[2],xmm7[2],xmm9[3],xmm7[3]
+; SSE2OR3-NEXT: movdqa %xmm0, %xmm10
+; SSE2OR3-NEXT: punpckldq {{.*#+}} xmm10 = xmm10[0],xmm7[0],xmm10[1],xmm7[1]
+; SSE2OR3-NEXT: movdqa %xmm0, %xmm11
+; SSE2OR3-NEXT: punpckhdq {{.*#+}} xmm11 = xmm11[2],xmm7[2],xmm11[3],xmm7[3]
+; SSE2OR3-NEXT: movdqa {{.*#+}} xmm12 = [9223372039002259456,9223372039002259456]
+; SSE2OR3-NEXT: movdqa %xmm3, %xmm6
+; SSE2OR3-NEXT: pxor %xmm12, %xmm6
+; SSE2OR3-NEXT: movdqa %xmm11, %xmm7
+; SSE2OR3-NEXT: pxor %xmm12, %xmm7
+; SSE2OR3-NEXT: movdqa %xmm7, %xmm13
+; SSE2OR3-NEXT: pcmpgtd %xmm6, %xmm13
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2OR3-NEXT: pcmpeqd %xmm6, %xmm7
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2OR3-NEXT: pand %xmm14, %xmm6
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm7 = xmm13[1,1,3,3]
+; SSE2OR3-NEXT: por %xmm6, %xmm7
+; SSE2OR3-NEXT: pand %xmm7, %xmm3
+; SSE2OR3-NEXT: pandn %xmm11, %xmm7
+; SSE2OR3-NEXT: por %xmm3, %xmm7
+; SSE2OR3-NEXT: movdqa %xmm2, %xmm3
+; SSE2OR3-NEXT: pxor %xmm12, %xmm3
+; SSE2OR3-NEXT: movdqa %xmm10, %xmm6
+; SSE2OR3-NEXT: pxor %xmm12, %xmm6
+; SSE2OR3-NEXT: movdqa %xmm6, %xmm11
+; SSE2OR3-NEXT: pcmpgtd %xmm3, %xmm11
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm13 = xmm11[0,0,2,2]
+; SSE2OR3-NEXT: pcmpeqd %xmm3, %xmm6
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE2OR3-NEXT: pand %xmm13, %xmm6
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm3 = xmm11[1,1,3,3]
+; SSE2OR3-NEXT: por %xmm6, %xmm3
+; SSE2OR3-NEXT: pand %xmm3, %xmm2
+; SSE2OR3-NEXT: pandn %xmm10, %xmm3
+; SSE2OR3-NEXT: por %xmm2, %xmm3
+; SSE2OR3-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,2],xmm7[0,2]
+; SSE2OR3-NEXT: movdqa %xmm5, %xmm2
+; SSE2OR3-NEXT: pxor %xmm12, %xmm2
+; SSE2OR3-NEXT: movdqa %xmm9, %xmm6
+; SSE2OR3-NEXT: pxor %xmm12, %xmm6
+; SSE2OR3-NEXT: movdqa %xmm6, %xmm7
+; SSE2OR3-NEXT: pcmpgtd %xmm2, %xmm7
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2]
+; SSE2OR3-NEXT: pcmpeqd %xmm2, %xmm6
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
+; SSE2OR3-NEXT: pand %xmm10, %xmm2
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2OR3-NEXT: por %xmm2, %xmm6
+; SSE2OR3-NEXT: pand %xmm6, %xmm5
+; SSE2OR3-NEXT: pandn %xmm9, %xmm6
+; SSE2OR3-NEXT: por %xmm5, %xmm6
+; SSE2OR3-NEXT: movdqa %xmm4, %xmm2
+; SSE2OR3-NEXT: pxor %xmm12, %xmm2
+; SSE2OR3-NEXT: pxor %xmm8, %xmm12
+; SSE2OR3-NEXT: movdqa %xmm12, %xmm5
+; SSE2OR3-NEXT: pcmpgtd %xmm2, %xmm5
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2]
+; SSE2OR3-NEXT: pcmpeqd %xmm2, %xmm12
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm2 = xmm12[1,1,3,3]
+; SSE2OR3-NEXT: pand %xmm7, %xmm2
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE2OR3-NEXT: por %xmm2, %xmm5
+; SSE2OR3-NEXT: pand %xmm5, %xmm4
+; SSE2OR3-NEXT: pandn %xmm8, %xmm5
+; SSE2OR3-NEXT: por %xmm4, %xmm5
+; SSE2OR3-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,2],xmm6[0,2]
+; SSE2OR3-NEXT: psubd %xmm3, %xmm0
+; SSE2OR3-NEXT: psubd %xmm5, %xmm1
+; SSE2OR3-NEXT: retq
;
; SSE41-LABEL: test33:
; SSE41: # %bb.0:
@@ -2970,165 +2724,85 @@ define <8 x i32> @test33(<8 x i32> %a0, <8 x i64> %a1) {
; v8i32/v8i64 - sub(x,trunc(umin(zext(and(x,1)),y)))
define <8 x i32> @test34(<8 x i32> %a0, <8 x i64> %a1) {
-; SSE2-LABEL: test34:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [1,1,1,1]
-; SSE2-NEXT: pand %xmm6, %xmm0
-; SSE2-NEXT: pand %xmm6, %xmm1
-; SSE2-NEXT: pxor %xmm7, %xmm7
-; SSE2-NEXT: movdqa %xmm1, %xmm8
-; SSE2-NEXT: punpckldq {{.*#+}} xmm8 = xmm8[0],xmm7[0],xmm8[1],xmm7[1]
-; SSE2-NEXT: movdqa %xmm1, %xmm9
-; SSE2-NEXT: punpckhdq {{.*#+}} xmm9 = xmm9[2],xmm7[2],xmm9[3],xmm7[3]
-; SSE2-NEXT: movdqa %xmm0, %xmm10
-; SSE2-NEXT: punpckldq {{.*#+}} xmm10 = xmm10[0],xmm7[0],xmm10[1],xmm7[1]
-; SSE2-NEXT: movdqa %xmm0, %xmm12
-; SSE2-NEXT: punpckhdq {{.*#+}} xmm12 = xmm12[2],xmm7[2],xmm12[3],xmm7[3]
-; SSE2-NEXT: movdqa {{.*#+}} xmm11 = [9223372039002259456,9223372039002259456]
-; SSE2-NEXT: movdqa %xmm3, %xmm7
-; SSE2-NEXT: pxor %xmm11, %xmm7
-; SSE2-NEXT: movdqa %xmm12, %xmm6
-; SSE2-NEXT: por %xmm11, %xmm6
-; SSE2-NEXT: movdqa %xmm7, %xmm13
-; SSE2-NEXT: pcmpgtd %xmm6, %xmm13
-; SSE2-NEXT: pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
-; SSE2-NEXT: pcmpeqd %xmm7, %xmm6
-; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
-; SSE2-NEXT: pand %xmm14, %xmm6
-; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm13[1,1,3,3]
-; SSE2-NEXT: por %xmm6, %xmm7
-; SSE2-NEXT: pand %xmm7, %xmm12
-; SSE2-NEXT: pandn %xmm3, %xmm7
-; SSE2-NEXT: por %xmm12, %xmm7
-; SSE2-NEXT: movdqa %xmm2, %xmm3
-; SSE2-NEXT: pxor %xmm11, %xmm3
-; SSE2-NEXT: movdqa %xmm10, %xmm6
-; SSE2-NEXT: por %xmm11, %xmm6
-; SSE2-NEXT: movdqa %xmm3, %xmm12
-; SSE2-NEXT: pcmpgtd %xmm6, %xmm12
-; SSE2-NEXT: pshufd {{.*#+}} xmm13 = xmm12[0,0,2,2]
-; SSE2-NEXT: pcmpeqd %xmm3, %xmm6
-; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
-; SSE2-NEXT: pand %xmm13, %xmm6
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm12[1,1,3,3]
-; SSE2-NEXT: por %xmm6, %xmm3
-; SSE2-NEXT: pand %xmm3, %xmm10
-; SSE2-NEXT: pandn %xmm2, %xmm3
-; SSE2-NEXT: por %xmm10, %xmm3
-; SSE2-NEXT: packuswb %xmm7, %xmm3
-; SSE2-NEXT: movdqa %xmm5, %xmm2
-; SSE2-NEXT: pxor %xmm11, %xmm2
-; SSE2-NEXT: movdqa %xmm9, %xmm6
-; SSE2-NEXT: por %xmm11, %xmm6
-; SSE2-NEXT: movdqa %xmm2, %xmm7
-; SSE2-NEXT: pcmpgtd %xmm6, %xmm7
-; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2]
-; SSE2-NEXT: pcmpeqd %xmm2, %xmm6
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
-; SSE2-NEXT: pand %xmm10, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
-; SSE2-NEXT: por %xmm2, %xmm6
-; SSE2-NEXT: pand %xmm6, %xmm9
-; SSE2-NEXT: pandn %xmm5, %xmm6
-; SSE2-NEXT: por %xmm9, %xmm6
-; SSE2-NEXT: movdqa %xmm4, %xmm2
-; SSE2-NEXT: pxor %xmm11, %xmm2
-; SSE2-NEXT: por %xmm8, %xmm11
-; SSE2-NEXT: movdqa %xmm2, %xmm5
-; SSE2-NEXT: pcmpgtd %xmm11, %xmm5
-; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2]
-; SSE2-NEXT: pcmpeqd %xmm2, %xmm11
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm11[1,1,3,3]
-; SSE2-NEXT: pand %xmm7, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
-; SSE2-NEXT: por %xmm2, %xmm5
-; SSE2-NEXT: pand %xmm5, %xmm8
-; SSE2-NEXT: pandn %xmm4, %xmm5
-; SSE2-NEXT: por %xmm8, %xmm5
-; SSE2-NEXT: packuswb %xmm6, %xmm5
-; SSE2-NEXT: psubd %xmm3, %xmm0
-; SSE2-NEXT: psubd %xmm5, %xmm1
-; SSE2-NEXT: retq
-;
-; SSSE3-LABEL: test34:
-; SSSE3: # %bb.0:
-; SSSE3-NEXT: movdqa {{.*#+}} xmm6 = [1,1,1,1]
-; SSSE3-NEXT: pand %xmm6, %xmm0
-; SSSE3-NEXT: pand %xmm6, %xmm1
-; SSSE3-NEXT: pxor %xmm7, %xmm7
-; SSSE3-NEXT: movdqa %xmm1, %xmm8
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm8 = xmm8[0],xmm7[0],xmm8[1],xmm7[1]
-; SSSE3-NEXT: movdqa %xmm1, %xmm9
-; SSSE3-NEXT: punpckhdq {{.*#+}} xmm9 = xmm9[2],xmm7[2],xmm9[3],xmm7[3]
-; SSSE3-NEXT: movdqa %xmm0, %xmm10
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm10 = xmm10[0],xmm7[0],xmm10[1],xmm7[1]
-; SSSE3-NEXT: movdqa %xmm0, %xmm12
-; SSSE3-NEXT: punpckhdq {{.*#+}} xmm12 = xmm12[2],xmm7[2],xmm12[3],xmm7[3]
-; SSSE3-NEXT: movdqa {{.*#+}} xmm11 = [9223372039002259456,9223372039002259456]
-; SSSE3-NEXT: movdqa %xmm3, %xmm7
-; SSSE3-NEXT: pxor %xmm11, %xmm7
-; SSSE3-NEXT: movdqa %xmm12, %xmm6
-; SSSE3-NEXT: por %xmm11, %xmm6
-; SSSE3-NEXT: movdqa %xmm7, %xmm13
-; SSSE3-NEXT: pcmpgtd %xmm6, %xmm13
-; SSSE3-NEXT: pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
-; SSSE3-NEXT: pcmpeqd %xmm7, %xmm6
-; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
-; SSSE3-NEXT: pand %xmm14, %xmm6
-; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm13[1,1,3,3]
-; SSSE3-NEXT: por %xmm6, %xmm7
-; SSSE3-NEXT: pand %xmm7, %xmm12
-; SSSE3-NEXT: pandn %xmm3, %xmm7
-; SSSE3-NEXT: por %xmm12, %xmm7
-; SSSE3-NEXT: movdqa %xmm2, %xmm3
-; SSSE3-NEXT: pxor %xmm11, %xmm3
-; SSSE3-NEXT: movdqa %xmm10, %xmm6
-; SSSE3-NEXT: por %xmm11, %xmm6
-; SSSE3-NEXT: movdqa %xmm3, %xmm12
-; SSSE3-NEXT: pcmpgtd %xmm6, %xmm12
-; SSSE3-NEXT: pshufd {{.*#+}} xmm13 = xmm12[0,0,2,2]
-; SSSE3-NEXT: pcmpeqd %xmm3, %xmm6
-; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
-; SSSE3-NEXT: pand %xmm13, %xmm6
-; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm12[1,1,3,3]
-; SSSE3-NEXT: por %xmm6, %xmm3
-; SSSE3-NEXT: pand %xmm3, %xmm10
-; SSSE3-NEXT: pandn %xmm2, %xmm3
-; SSSE3-NEXT: por %xmm10, %xmm3
-; SSSE3-NEXT: packuswb %xmm7, %xmm3
-; SSSE3-NEXT: movdqa %xmm5, %xmm2
-; SSSE3-NEXT: pxor %xmm11, %xmm2
-; SSSE3-NEXT: movdqa %xmm9, %xmm6
-; SSSE3-NEXT: por %xmm11, %xmm6
-; SSSE3-NEXT: movdqa %xmm2, %xmm7
-; SSSE3-NEXT: pcmpgtd %xmm6, %xmm7
-; SSSE3-NEXT: pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2]
-; SSSE3-NEXT: pcmpeqd %xmm2, %xmm6
-; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
-; SSSE3-NEXT: pand %xmm10, %xmm2
-; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
-; SSSE3-NEXT: por %xmm2, %xmm6
-; SSSE3-NEXT: pand %xmm6, %xmm9
-; SSSE3-NEXT: pandn %xmm5, %xmm6
-; SSSE3-NEXT: por %xmm9, %xmm6
-; SSSE3-NEXT: movdqa %xmm4, %xmm2
-; SSSE3-NEXT: pxor %xmm11, %xmm2
-; SSSE3-NEXT: por %xmm8, %xmm11
-; SSSE3-NEXT: movdqa %xmm2, %xmm5
-; SSSE3-NEXT: pcmpgtd %xmm11, %xmm5
-; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2]
-; SSSE3-NEXT: pcmpeqd %xmm2, %xmm11
-; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm11[1,1,3,3]
-; SSSE3-NEXT: pand %xmm7, %xmm2
-; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
-; SSSE3-NEXT: por %xmm2, %xmm5
-; SSSE3-NEXT: pand %xmm5, %xmm8
-; SSSE3-NEXT: pandn %xmm4, %xmm5
-; SSSE3-NEXT: por %xmm8, %xmm5
-; SSSE3-NEXT: packuswb %xmm6, %xmm5
-; SSSE3-NEXT: psubd %xmm3, %xmm0
-; SSSE3-NEXT: psubd %xmm5, %xmm1
-; SSSE3-NEXT: retq
+; SSE2OR3-LABEL: test34:
+; SSE2OR3: # %bb.0:
+; SSE2OR3-NEXT: movdqa {{.*#+}} xmm6 = [1,1,1,1]
+; SSE2OR3-NEXT: pand %xmm6, %xmm0
+; SSE2OR3-NEXT: pand %xmm6, %xmm1
+; SSE2OR3-NEXT: pxor %xmm7, %xmm7
+; SSE2OR3-NEXT: movdqa %xmm1, %xmm8
+; SSE2OR3-NEXT: punpckldq {{.*#+}} xmm8 = xmm8[0],xmm7[0],xmm8[1],xmm7[1]
+; SSE2OR3-NEXT: movdqa %xmm1, %xmm9
+; SSE2OR3-NEXT: punpckhdq {{.*#+}} xmm9 = xmm9[2],xmm7[2],xmm9[3],xmm7[3]
+; SSE2OR3-NEXT: movdqa %xmm0, %xmm10
+; SSE2OR3-NEXT: punpckldq {{.*#+}} xmm10 = xmm10[0],xmm7[0],xmm10[1],xmm7[1]
+; SSE2OR3-NEXT: movdqa %xmm0, %xmm12
+; SSE2OR3-NEXT: punpckhdq {{.*#+}} xmm12 = xmm12[2],xmm7[2],xmm12[3],xmm7[3]
+; SSE2OR3-NEXT: movdqa {{.*#+}} xmm11 = [9223372039002259456,9223372039002259456]
+; SSE2OR3-NEXT: movdqa %xmm3, %xmm7
+; SSE2OR3-NEXT: pxor %xmm11, %xmm7
+; SSE2OR3-NEXT: movdqa %xmm12, %xmm6
+; SSE2OR3-NEXT: por %xmm11, %xmm6
+; SSE2OR3-NEXT: movdqa %xmm7, %xmm13
+; SSE2OR3-NEXT: pcmpgtd %xmm6, %xmm13
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2OR3-NEXT: pcmpeqd %xmm7, %xmm6
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE2OR3-NEXT: pand %xmm14, %xmm6
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm7 = xmm13[1,1,3,3]
+; SSE2OR3-NEXT: por %xmm6, %xmm7
+; SSE2OR3-NEXT: pand %xmm7, %xmm12
+; SSE2OR3-NEXT: pandn %xmm3, %xmm7
+; SSE2OR3-NEXT: por %xmm12, %xmm7
+; SSE2OR3-NEXT: movdqa %xmm2, %xmm3
+; SSE2OR3-NEXT: pxor %xmm11, %xmm3
+; SSE2OR3-NEXT: movdqa %xmm10, %xmm6
+; SSE2OR3-NEXT: por %xmm11, %xmm6
+; SSE2OR3-NEXT: movdqa %xmm3, %xmm12
+; SSE2OR3-NEXT: pcmpgtd %xmm6, %xmm12
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm13 = xmm12[0,0,2,2]
+; SSE2OR3-NEXT: pcmpeqd %xmm3, %xmm6
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE2OR3-NEXT: pand %xmm13, %xmm6
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm3 = xmm12[1,1,3,3]
+; SSE2OR3-NEXT: por %xmm6, %xmm3
+; SSE2OR3-NEXT: pand %xmm3, %xmm10
+; SSE2OR3-NEXT: pandn %xmm2, %xmm3
+; SSE2OR3-NEXT: por %xmm10, %xmm3
+; SSE2OR3-NEXT: packuswb %xmm7, %xmm3
+; SSE2OR3-NEXT: movdqa %xmm5, %xmm2
+; SSE2OR3-NEXT: pxor %xmm11, %xmm2
+; SSE2OR3-NEXT: movdqa %xmm9, %xmm6
+; SSE2OR3-NEXT: por %xmm11, %xmm6
+; SSE2OR3-NEXT: movdqa %xmm2, %xmm7
+; SSE2OR3-NEXT: pcmpgtd %xmm6, %xmm7
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm10 = xmm7[0,0,2,2]
+; SSE2OR3-NEXT: pcmpeqd %xmm2, %xmm6
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
+; SSE2OR3-NEXT: pand %xmm10, %xmm2
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2OR3-NEXT: por %xmm2, %xmm6
+; SSE2OR3-NEXT: pand %xmm6, %xmm9
+; SSE2OR3-NEXT: pandn %xmm5, %xmm6
+; SSE2OR3-NEXT: por %xmm9, %xmm6
+; SSE2OR3-NEXT: movdqa %xmm4, %xmm2
+; SSE2OR3-NEXT: pxor %xmm11, %xmm2
+; SSE2OR3-NEXT: por %xmm8, %xmm11
+; SSE2OR3-NEXT: movdqa %xmm2, %xmm5
+; SSE2OR3-NEXT: pcmpgtd %xmm11, %xmm5
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm7 = xmm5[0,0,2,2]
+; SSE2OR3-NEXT: pcmpeqd %xmm2, %xmm11
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm2 = xmm11[1,1,3,3]
+; SSE2OR3-NEXT: pand %xmm7, %xmm2
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE2OR3-NEXT: por %xmm2, %xmm5
+; SSE2OR3-NEXT: pand %xmm5, %xmm8
+; SSE2OR3-NEXT: pandn %xmm4, %xmm5
+; SSE2OR3-NEXT: por %xmm8, %xmm5
+; SSE2OR3-NEXT: packuswb %xmm6, %xmm5
+; SSE2OR3-NEXT: psubd %xmm3, %xmm0
+; SSE2OR3-NEXT: psubd %xmm5, %xmm1
+; SSE2OR3-NEXT: retq
;
; SSE41-LABEL: test34:
; SSE41: # %bb.0:
More information about the llvm-commits
mailing list