[llvm] r347181 - [X86] Use compare with 0 to fill an element with sign bits when sign extending to v2i64 pre-sse4.1
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 18 20:33:20 PST 2018
Author: ctopper
Date: Sun Nov 18 20:33:20 2018
New Revision: 347181
URL: http://llvm.org/viewvc/llvm-project?rev=347181&view=rev
Log:
[X86] Use compare with 0 to fill an element with sign bits when sign extending to v2i64 pre-sse4.1
Previously we used an arithmetic shift right by 31, but that requires a copy to preserve the input. So we might as well materialize a zero and compare to it since the comparison will overwrite the register that contains the zeros. This should be one byte shorter.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/pmul.ll
llvm/trunk/test/CodeGen/X86/trunc-subvector.ll
llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll
llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll
llvm/trunk/test/CodeGen/X86/vector-sext.ll
llvm/trunk/test/CodeGen/X86/vector-trunc-math-widen.ll
llvm/trunk/test/CodeGen/X86/vector-trunc-math.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=347181&r1=347180&r2=347181&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Nov 18 20:33:20 2018
@@ -20083,8 +20083,8 @@ static SDValue LowerEXTEND_VECTOR_INREG(
return SignExt;
if (VT == MVT::v2i64 && CurrVT == MVT::v4i32) {
- SDValue Sign = DAG.getNode(X86ISD::VSRAI, dl, CurrVT, Curr,
- DAG.getConstant(31, dl, MVT::i8));
+ SDValue Zero = DAG.getConstant(0, dl, CurrVT);
+ SDValue Sign = DAG.getSetCC(dl, CurrVT, Zero, Curr, ISD::SETGT);
SDValue Ext = DAG.getVectorShuffle(CurrVT, dl, SignExt, Sign, {0, 4, 1, 5});
return DAG.getBitcast(VT, Ext);
}
@@ -26358,8 +26358,8 @@ void X86TargetLowering::ReplaceNodeResul
In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, In);
// Fill a vector with sign bits for each element.
- SDValue SignBits = DAG.getNode(ISD::SRA, dl, MVT::v4i32, In,
- DAG.getConstant(31, dl, MVT::v4i32));
+ SDValue Zero = DAG.getConstant(0, dl, MVT::v4i32);
+ SDValue SignBits = DAG.getSetCC(dl, MVT::v4i32, Zero, In, ISD::SETGT);
// Create an unpackl and unpackh to interleave the sign bits then bitcast
// to v2i64.
Modified: llvm/trunk/test/CodeGen/X86/pmul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pmul.ll?rev=347181&r1=347180&r2=347181&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pmul.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pmul.ll Sun Nov 18 20:33:20 2018
@@ -1302,74 +1302,76 @@ entry:
define <8 x i64> @mul_v8i64_sext(<8 x i16> %val1, <8 x i32> %val2) {
; SSE2-LABEL: mul_v8i64_sext:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa %xmm0, %xmm3
-; SSE2-NEXT: punpckhwd {{.*#+}} xmm12 = xmm12[4],xmm0[4],xmm12[5],xmm0[5],xmm12[6],xmm0[6],xmm12[7],xmm0[7]
-; SSE2-NEXT: movdqa %xmm12, %xmm9
-; SSE2-NEXT: psrad $31, %xmm9
-; SSE2-NEXT: psrad $16, %xmm12
-; SSE2-NEXT: punpckldq {{.*#+}} xmm12 = xmm12[0],xmm9[0],xmm12[1],xmm9[1]
+; SSE2-NEXT: movdqa %xmm1, %xmm4
+; SSE2-NEXT: movdqa %xmm0, %xmm5
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm7 = xmm7[4],xmm0[4],xmm7[5],xmm0[5],xmm7[6],xmm0[6],xmm7[7],xmm0[7]
+; SSE2-NEXT: pxor %xmm8, %xmm8
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: movdqa %xmm0, %xmm4
-; SSE2-NEXT: psrad $31, %xmm4
+; SSE2-NEXT: pxor %xmm3, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
; SSE2-NEXT: psrad $16, %xmm0
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
-; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[2,3,0,1]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm15 = xmm5[0,2,2,3,4,5,6,7]
-; SSE2-NEXT: movdqa %xmm15, %xmm8
-; SSE2-NEXT: psrad $31, %xmm8
-; SSE2-NEXT: psrad $16, %xmm15
-; SSE2-NEXT: punpckldq {{.*#+}} xmm15 = xmm15[0],xmm8[0],xmm15[1],xmm8[1]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm7 = xmm3[0,2,2,3,4,5,6,7]
-; SSE2-NEXT: movdqa %xmm7, %xmm13
-; SSE2-NEXT: psrad $31, %xmm13
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE2-NEXT: pxor %xmm6, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
+; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1]
+; SSE2-NEXT: pmuludq %xmm4, %xmm3
+; SSE2-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm8[0],xmm6[1],xmm8[1]
+; SSE2-NEXT: pmuludq %xmm0, %xmm6
+; SSE2-NEXT: paddq %xmm3, %xmm6
+; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm5[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pmuludq %xmm4, %xmm0
+; SSE2-NEXT: pxor %xmm4, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT: psrad $16, %xmm3
+; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
+; SSE2-NEXT: psllq $32, %xmm6
+; SSE2-NEXT: paddq %xmm6, %xmm0
+; SSE2-NEXT: pxor %xmm6, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm6
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm8[0],xmm4[1],xmm8[1]
+; SSE2-NEXT: pmuludq %xmm1, %xmm4
+; SSE2-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm8[0],xmm6[1],xmm8[1]
+; SSE2-NEXT: pmuludq %xmm3, %xmm6
+; SSE2-NEXT: paddq %xmm4, %xmm6
+; SSE2-NEXT: pxor %xmm4, %xmm4
+; SSE2-NEXT: pcmpgtd %xmm7, %xmm4
; SSE2-NEXT: psrad $16, %xmm7
-; SSE2-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm13[0],xmm7[1],xmm13[1]
-; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm2[2,3,0,1]
-; SSE2-NEXT: movdqa %xmm11, %xmm10
-; SSE2-NEXT: psrad $31, %xmm10
-; SSE2-NEXT: punpckldq {{.*#+}} xmm11 = xmm11[0],xmm10[0],xmm11[1],xmm10[1]
-; SSE2-NEXT: movdqa %xmm2, %xmm14
-; SSE2-NEXT: psrad $31, %xmm14
-; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm14[0],xmm2[1],xmm14[1]
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
-; SSE2-NEXT: movdqa %xmm3, %xmm6
-; SSE2-NEXT: psrad $31, %xmm6
+; SSE2-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm4[0],xmm7[1],xmm4[1]
+; SSE2-NEXT: pmuludq %xmm3, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,0,1]
+; SSE2-NEXT: psllq $32, %xmm6
+; SSE2-NEXT: paddq %xmm6, %xmm1
+; SSE2-NEXT: pxor %xmm6, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm2, %xmm6
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm8[0],xmm4[1],xmm8[1]
+; SSE2-NEXT: pmuludq %xmm2, %xmm4
+; SSE2-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm8[0],xmm6[1],xmm8[1]
+; SSE2-NEXT: pmuludq %xmm7, %xmm6
+; SSE2-NEXT: paddq %xmm4, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[2,3,0,1]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,2,2,3,4,5,6,7]
+; SSE2-NEXT: pmuludq %xmm7, %xmm2
+; SSE2-NEXT: pxor %xmm5, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm5
+; SSE2-NEXT: psrad $16, %xmm4
+; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
+; SSE2-NEXT: psllq $32, %xmm6
+; SSE2-NEXT: paddq %xmm6, %xmm2
+; SSE2-NEXT: pxor %xmm6, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm6
; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm6[0],xmm3[1],xmm6[1]
-; SSE2-NEXT: movdqa %xmm1, %xmm5
-; SSE2-NEXT: psrad $31, %xmm5
-; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1]
-; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,1,1,3]
-; SSE2-NEXT: pmuludq %xmm1, %xmm4
-; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,1,1,3]
-; SSE2-NEXT: pmuludq %xmm0, %xmm5
-; SSE2-NEXT: paddq %xmm4, %xmm5
-; SSE2-NEXT: psllq $32, %xmm5
-; SSE2-NEXT: pmuludq %xmm1, %xmm0
-; SSE2-NEXT: paddq %xmm5, %xmm0
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm6[0,1,1,3]
-; SSE2-NEXT: pmuludq %xmm7, %xmm1
-; SSE2-NEXT: pmuludq %xmm3, %xmm7
-; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm13[0,1,1,3]
-; SSE2-NEXT: pmuludq %xmm3, %xmm4
-; SSE2-NEXT: paddq %xmm4, %xmm1
-; SSE2-NEXT: psllq $32, %xmm1
-; SSE2-NEXT: paddq %xmm7, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm9[0,1,1,3]
-; SSE2-NEXT: pmuludq %xmm2, %xmm3
-; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm14[0,1,1,3]
-; SSE2-NEXT: pmuludq %xmm12, %xmm4
-; SSE2-NEXT: paddq %xmm3, %xmm4
-; SSE2-NEXT: psllq $32, %xmm4
-; SSE2-NEXT: pmuludq %xmm12, %xmm2
-; SSE2-NEXT: paddq %xmm4, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm10[0,1,1,3]
-; SSE2-NEXT: pmuludq %xmm15, %xmm3
-; SSE2-NEXT: pmuludq %xmm11, %xmm15
-; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm8[0,1,1,3]
-; SSE2-NEXT: pmuludq %xmm11, %xmm4
-; SSE2-NEXT: paddq %xmm4, %xmm3
-; SSE2-NEXT: psllq $32, %xmm3
-; SSE2-NEXT: paddq %xmm15, %xmm3
+; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm8[0],xmm5[1],xmm8[1]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm8[0],xmm6[1],xmm8[1]
+; SSE2-NEXT: pmuludq %xmm3, %xmm5
+; SSE2-NEXT: pmuludq %xmm4, %xmm6
+; SSE2-NEXT: paddq %xmm5, %xmm6
+; SSE2-NEXT: pmuludq %xmm4, %xmm3
+; SSE2-NEXT: psllq $32, %xmm6
+; SSE2-NEXT: paddq %xmm6, %xmm3
; SSE2-NEXT: retq
;
; SSE41-LABEL: mul_v8i64_sext:
Modified: llvm/trunk/test/CodeGen/X86/trunc-subvector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/trunc-subvector.ll?rev=347181&r1=347180&r2=347181&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/trunc-subvector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/trunc-subvector.ll Sun Nov 18 20:33:20 2018
@@ -41,7 +41,8 @@ define <2 x i32> @test3(<8 x i32> %v) {
; SSE2-LABEL: test3:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
@@ -67,8 +68,8 @@ define <2 x i32> @test3(<8 x i32> %v) {
define <2 x i32> @test4(<8 x i32> %v) {
; SSE2-LABEL: test4:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
@@ -93,12 +94,12 @@ define <2 x i32> @test4(<8 x i32> %v) {
define <2 x i32> @test5(<8 x i32> %v) {
; SSE2-LABEL: test5:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa %xmm1, %xmm2
-; SSE2-NEXT: psrad $31, %xmm2
-; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
; SSE2-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll?rev=347181&r1=347180&r2=347181&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_fp_to_int.ll Sun Nov 18 20:33:20 2018
@@ -2387,8 +2387,8 @@ define <2 x i8> @fptosi_2f32_to_2i8(<2 x
; SSE-LABEL: fptosi_2f32_to_2i8:
; SSE: # %bb.0:
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
-; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: psrad $31, %xmm1
+; SSE-NEXT: pxor %xmm1, %xmm1
+; SSE-NEXT: pcmpgtd %xmm0, %xmm1
; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE-NEXT: retq
;
@@ -2430,8 +2430,8 @@ define <2 x i16> @fptosi_2f32_to_2i16(<2
; SSE-LABEL: fptosi_2f32_to_2i16:
; SSE: # %bb.0:
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
-; SSE-NEXT: movdqa %xmm0, %xmm1
-; SSE-NEXT: psrad $31, %xmm1
+; SSE-NEXT: pxor %xmm1, %xmm1
+; SSE-NEXT: pcmpgtd %xmm0, %xmm1
; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE-NEXT: retq
;
@@ -2557,9 +2557,9 @@ define <2 x i8> @fptosi_2f64_to_2i8(<2 x
; SSE-LABEL: fptosi_2f64_to_2i8:
; SSE: # %bb.0:
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
-; SSE-NEXT: movapd %xmm0, %xmm1
-; SSE-NEXT: psrad $31, %xmm1
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: pxor %xmm1, %xmm1
+; SSE-NEXT: pcmpgtd %xmm0, %xmm1
+; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE-NEXT: retq
;
; VEX-LABEL: fptosi_2f64_to_2i8:
@@ -2600,9 +2600,9 @@ define <2 x i16> @fptosi_2f64_to_2i16(<2
; SSE-LABEL: fptosi_2f64_to_2i16:
; SSE: # %bb.0:
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
-; SSE-NEXT: movapd %xmm0, %xmm1
-; SSE-NEXT: psrad $31, %xmm1
-; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT: pxor %xmm1, %xmm1
+; SSE-NEXT: pcmpgtd %xmm0, %xmm1
+; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE-NEXT: retq
;
; VEX-LABEL: fptosi_2f64_to_2i16:
Modified: llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll?rev=347181&r1=347180&r2=347181&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-sext-widen.ll Sun Nov 18 20:33:20 2018
@@ -446,8 +446,8 @@ define <2 x i64> @sext_16i8_to_2i64(<16
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; SSE2-NEXT: psrad $24, %xmm0
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
@@ -456,8 +456,8 @@ define <2 x i64> @sext_16i8_to_2i64(<16
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSSE3-NEXT: movdqa %xmm0, %xmm1
-; SSSE3-NEXT: psrad $31, %xmm1
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
; SSSE3-NEXT: psrad $24, %xmm0
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: retq
@@ -476,8 +476,8 @@ define <2 x i64> @sext_16i8_to_2i64(<16
; X32-SSE2: # %bb.0: # %entry
; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
-; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: pxor %xmm1, %xmm1
+; X32-SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; X32-SSE2-NEXT: psrad $24, %xmm0
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X32-SSE2-NEXT: retl
@@ -498,8 +498,8 @@ define <4 x i64> @sext_16i8_to_4i64(<16
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSE2-NEXT: psrad $24, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm2
-; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
@@ -510,8 +510,8 @@ define <4 x i64> @sext_16i8_to_4i64(<16
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSSE3-NEXT: psrad $24, %xmm1
-; SSSE3-NEXT: movdqa %xmm1, %xmm2
-; SSSE3-NEXT: psrad $31, %xmm2
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
@@ -548,8 +548,8 @@ define <4 x i64> @sext_16i8_to_4i64(<16
; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; X32-SSE2-NEXT: psrad $24, %xmm1
-; X32-SSE2-NEXT: movdqa %xmm1, %xmm2
-; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: pxor %xmm2, %xmm2
+; X32-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; X32-SSE2-NEXT: movdqa %xmm1, %xmm0
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; X32-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
@@ -574,18 +574,18 @@ define <8 x i64> @sext_16i8_to_8i64(<16
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSE2-NEXT: psrad $24, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm2
-; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pxor %xmm5, %xmm5
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; SSE2-NEXT: movdqa %xmm1, %xmm4
; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
; SSE2-NEXT: psrad $24, %xmm3
-; SSE2-NEXT: movdqa %xmm3, %xmm0
-; SSE2-NEXT: psrad $31, %xmm0
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
; SSE2-NEXT: movdqa %xmm3, %xmm2
-; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
+; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm5[2],xmm3[3],xmm5[3]
; SSE2-NEXT: movdqa %xmm4, %xmm0
; SSE2-NEXT: retq
;
@@ -594,18 +594,18 @@ define <8 x i64> @sext_16i8_to_8i64(<16
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSSE3-NEXT: psrad $24, %xmm1
-; SSSE3-NEXT: movdqa %xmm1, %xmm2
-; SSSE3-NEXT: psrad $31, %xmm2
+; SSSE3-NEXT: pxor %xmm5, %xmm5
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
; SSSE3-NEXT: movdqa %xmm1, %xmm4
; SSSE3-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
; SSSE3-NEXT: psrad $24, %xmm3
-; SSSE3-NEXT: movdqa %xmm3, %xmm0
-; SSSE3-NEXT: psrad $31, %xmm0
+; SSSE3-NEXT: pcmpgtd %xmm3, %xmm5
; SSSE3-NEXT: movdqa %xmm3, %xmm2
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; SSSE3-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
+; SSSE3-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm5[2],xmm3[3],xmm5[3]
; SSSE3-NEXT: movdqa %xmm4, %xmm0
; SSSE3-NEXT: retq
;
@@ -654,18 +654,18 @@ define <8 x i64> @sext_16i8_to_8i64(<16
; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; X32-SSE2-NEXT: psrad $24, %xmm1
-; X32-SSE2-NEXT: movdqa %xmm1, %xmm2
-; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: pxor %xmm5, %xmm5
+; X32-SSE2-NEXT: pxor %xmm2, %xmm2
+; X32-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; X32-SSE2-NEXT: movdqa %xmm1, %xmm4
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
; X32-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; X32-SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
; X32-SSE2-NEXT: psrad $24, %xmm3
-; X32-SSE2-NEXT: movdqa %xmm3, %xmm0
-; X32-SSE2-NEXT: psrad $31, %xmm0
+; X32-SSE2-NEXT: pcmpgtd %xmm3, %xmm5
; X32-SSE2-NEXT: movdqa %xmm3, %xmm2
-; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; X32-SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
+; X32-SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm5[2],xmm3[3],xmm5[3]
; X32-SSE2-NEXT: movdqa %xmm4, %xmm0
; X32-SSE2-NEXT: retl
;
@@ -894,8 +894,8 @@ define <2 x i64> @sext_8i16_to_2i64(<8 x
; SSE2-LABEL: sext_8i16_to_2i64:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; SSE2-NEXT: psrad $16, %xmm0
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
@@ -903,8 +903,8 @@ define <2 x i64> @sext_8i16_to_2i64(<8 x
; SSSE3-LABEL: sext_8i16_to_2i64:
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSSE3-NEXT: movdqa %xmm0, %xmm1
-; SSSE3-NEXT: psrad $31, %xmm1
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
; SSSE3-NEXT: psrad $16, %xmm0
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: retq
@@ -922,8 +922,8 @@ define <2 x i64> @sext_8i16_to_2i64(<8 x
; X32-SSE2-LABEL: sext_8i16_to_2i64:
; X32-SSE2: # %bb.0: # %entry
; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
-; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: pxor %xmm1, %xmm1
+; X32-SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; X32-SSE2-NEXT: psrad $16, %xmm0
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X32-SSE2-NEXT: retl
@@ -943,8 +943,8 @@ define <4 x i64> @sext_8i16_to_4i64(<8 x
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSE2-NEXT: psrad $16, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm2
-; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
@@ -954,8 +954,8 @@ define <4 x i64> @sext_8i16_to_4i64(<8 x
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSSE3-NEXT: psrad $16, %xmm1
-; SSSE3-NEXT: movdqa %xmm1, %xmm2
-; SSSE3-NEXT: psrad $31, %xmm2
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
@@ -991,8 +991,8 @@ define <4 x i64> @sext_8i16_to_4i64(<8 x
; X32-SSE2: # %bb.0: # %entry
; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; X32-SSE2-NEXT: psrad $16, %xmm1
-; X32-SSE2-NEXT: movdqa %xmm1, %xmm2
-; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: pxor %xmm2, %xmm2
+; X32-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; X32-SSE2-NEXT: movdqa %xmm1, %xmm0
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; X32-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
@@ -1016,18 +1016,18 @@ define <8 x i64> @sext_8i16_to_8i64(<8 x
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSE2-NEXT: psrad $16, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm2
-; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pxor %xmm5, %xmm5
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; SSE2-NEXT: movdqa %xmm1, %xmm4
; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
; SSE2-NEXT: psrad $16, %xmm3
-; SSE2-NEXT: movdqa %xmm3, %xmm0
-; SSE2-NEXT: psrad $31, %xmm0
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
; SSE2-NEXT: movdqa %xmm3, %xmm2
-; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
+; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm5[2],xmm3[3],xmm5[3]
; SSE2-NEXT: movdqa %xmm4, %xmm0
; SSE2-NEXT: retq
;
@@ -1035,18 +1035,18 @@ define <8 x i64> @sext_8i16_to_8i64(<8 x
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSSE3-NEXT: psrad $16, %xmm1
-; SSSE3-NEXT: movdqa %xmm1, %xmm2
-; SSSE3-NEXT: psrad $31, %xmm2
+; SSSE3-NEXT: pxor %xmm5, %xmm5
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
; SSSE3-NEXT: movdqa %xmm1, %xmm4
; SSSE3-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
; SSSE3-NEXT: psrad $16, %xmm3
-; SSSE3-NEXT: movdqa %xmm3, %xmm0
-; SSSE3-NEXT: psrad $31, %xmm0
+; SSSE3-NEXT: pcmpgtd %xmm3, %xmm5
; SSSE3-NEXT: movdqa %xmm3, %xmm2
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; SSSE3-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
+; SSSE3-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm5[2],xmm3[3],xmm5[3]
; SSSE3-NEXT: movdqa %xmm4, %xmm0
; SSSE3-NEXT: retq
;
@@ -1093,18 +1093,18 @@ define <8 x i64> @sext_8i16_to_8i64(<8 x
; X32-SSE2: # %bb.0: # %entry
; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; X32-SSE2-NEXT: psrad $16, %xmm1
-; X32-SSE2-NEXT: movdqa %xmm1, %xmm2
-; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: pxor %xmm5, %xmm5
+; X32-SSE2-NEXT: pxor %xmm2, %xmm2
+; X32-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; X32-SSE2-NEXT: movdqa %xmm1, %xmm4
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
; X32-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; X32-SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
; X32-SSE2-NEXT: psrad $16, %xmm3
-; X32-SSE2-NEXT: movdqa %xmm3, %xmm0
-; X32-SSE2-NEXT: psrad $31, %xmm0
+; X32-SSE2-NEXT: pcmpgtd %xmm3, %xmm5
; X32-SSE2-NEXT: movdqa %xmm3, %xmm2
-; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; X32-SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm0[2],xmm3[3],xmm0[3]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
+; X32-SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm5[2],xmm3[3],xmm5[3]
; X32-SSE2-NEXT: movdqa %xmm4, %xmm0
; X32-SSE2-NEXT: retl
;
@@ -1127,15 +1127,15 @@ entry:
define <2 x i64> @sext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp {
; SSE2-LABEL: sext_4i32_to_2i64:
; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: sext_4i32_to_2i64:
; SSSE3: # %bb.0: # %entry
-; SSSE3-NEXT: movdqa %xmm0, %xmm1
-; SSSE3-NEXT: psrad $31, %xmm1
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: retq
;
@@ -1151,8 +1151,8 @@ define <2 x i64> @sext_4i32_to_2i64(<4 x
;
; X32-SSE2-LABEL: sext_4i32_to_2i64:
; X32-SSE2: # %bb.0: # %entry
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
-; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: pxor %xmm1, %xmm1
+; X32-SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X32-SSE2-NEXT: retl
;
@@ -1169,23 +1169,23 @@ entry:
define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
; SSE2-LABEL: sext_4i32_to_4i64:
; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT: movdqa %xmm1, %xmm2
-; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: sext_4i32_to_4i64:
; SSSE3: # %bb.0: # %entry
-; SSSE3-NEXT: movdqa %xmm0, %xmm2
-; SSSE3-NEXT: psrad $31, %xmm2
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: pxor %xmm3, %xmm3
+; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT: movdqa %xmm1, %xmm2
-; SSSE3-NEXT: psrad $31, %xmm2
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSSE3-NEXT: retq
;
@@ -1217,12 +1217,12 @@ define <4 x i64> @sext_4i32_to_4i64(<4 x
;
; X32-SSE2-LABEL: sext_4i32_to_4i64:
; X32-SSE2: # %bb.0: # %entry
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm2
-; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: pxor %xmm2, %xmm2
+; X32-SSE2-NEXT: pxor %xmm3, %xmm3
+; X32-SSE2-NEXT: pcmpgtd %xmm0, %xmm3
; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-SSE2-NEXT: movdqa %xmm1, %xmm2
-; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X32-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; X32-SSE2-NEXT: retl
;
@@ -1242,38 +1242,38 @@ define <8 x i64> @sext_8i32_to_8i64(<8 x
; SSE2-LABEL: sext_8i32_to_8i64:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movdqa %xmm1, %xmm2
-; SSE2-NEXT: movdqa %xmm0, %xmm3
-; SSE2-NEXT: psrad $31, %xmm3
-; SSE2-NEXT: movdqa %xmm1, %xmm4
-; SSE2-NEXT: psrad $31, %xmm4
+; SSE2-NEXT: pxor %xmm4, %xmm4
+; SSE2-NEXT: pxor %xmm3, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
+; SSE2-NEXT: pxor %xmm5, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm5
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
-; SSE2-NEXT: movdqa %xmm1, %xmm3
-; SSE2-NEXT: psrad $31, %xmm3
+; SSE2-NEXT: pxor %xmm3, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,0,1]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
-; SSE2-NEXT: movdqa %xmm3, %xmm4
-; SSE2-NEXT: psrad $31, %xmm4
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: sext_8i32_to_8i64:
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: movdqa %xmm1, %xmm2
-; SSSE3-NEXT: movdqa %xmm0, %xmm3
-; SSSE3-NEXT: psrad $31, %xmm3
-; SSSE3-NEXT: movdqa %xmm1, %xmm4
-; SSSE3-NEXT: psrad $31, %xmm4
+; SSSE3-NEXT: pxor %xmm4, %xmm4
+; SSSE3-NEXT: pxor %xmm3, %xmm3
+; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3
+; SSSE3-NEXT: pxor %xmm5, %xmm5
+; SSSE3-NEXT: pcmpgtd %xmm1, %xmm5
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
-; SSSE3-NEXT: movdqa %xmm1, %xmm3
-; SSSE3-NEXT: psrad $31, %xmm3
+; SSSE3-NEXT: pxor %xmm3, %xmm3
+; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,0,1]
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
-; SSSE3-NEXT: movdqa %xmm3, %xmm4
-; SSSE3-NEXT: psrad $31, %xmm4
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
+; SSSE3-NEXT: pcmpgtd %xmm3, %xmm4
; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
; SSSE3-NEXT: retq
;
@@ -1319,19 +1319,19 @@ define <8 x i64> @sext_8i32_to_8i64(<8 x
; X32-SSE2-LABEL: sext_8i32_to_8i64:
; X32-SSE2: # %bb.0: # %entry
; X32-SSE2-NEXT: movdqa %xmm1, %xmm2
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm3
-; X32-SSE2-NEXT: psrad $31, %xmm3
-; X32-SSE2-NEXT: movdqa %xmm1, %xmm4
-; X32-SSE2-NEXT: psrad $31, %xmm4
+; X32-SSE2-NEXT: pxor %xmm4, %xmm4
+; X32-SSE2-NEXT: pxor %xmm3, %xmm3
+; X32-SSE2-NEXT: pcmpgtd %xmm0, %xmm3
+; X32-SSE2-NEXT: pxor %xmm5, %xmm5
+; X32-SSE2-NEXT: pcmpgtd %xmm1, %xmm5
; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
-; X32-SSE2-NEXT: movdqa %xmm1, %xmm3
-; X32-SSE2-NEXT: psrad $31, %xmm3
+; X32-SSE2-NEXT: pxor %xmm3, %xmm3
+; X32-SSE2-NEXT: pcmpgtd %xmm1, %xmm3
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; X32-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,0,1]
-; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
-; X32-SSE2-NEXT: movdqa %xmm3, %xmm4
-; X32-SSE2-NEXT: psrad $31, %xmm4
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
+; X32-SSE2-NEXT: pcmpgtd %xmm3, %xmm4
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
; X32-SSE2-NEXT: retl
;
@@ -1452,8 +1452,8 @@ define <2 x i64> @load_sext_2i8_to_2i64(
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; SSE2-NEXT: psrad $24, %xmm0
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
@@ -1464,8 +1464,8 @@ define <2 x i64> @load_sext_2i8_to_2i64(
; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSSE3-NEXT: movdqa %xmm0, %xmm1
-; SSSE3-NEXT: psrad $31, %xmm1
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
; SSSE3-NEXT: psrad $24, %xmm0
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: retq
@@ -1487,8 +1487,8 @@ define <2 x i64> @load_sext_2i8_to_2i64(
; X32-SSE2-NEXT: movd %eax, %xmm0
; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
-; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: pxor %xmm1, %xmm1
+; X32-SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; X32-SSE2-NEXT: psrad $24, %xmm0
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X32-SSE2-NEXT: retl
@@ -1980,8 +1980,8 @@ define <4 x i64> @load_sext_4i8_to_4i64(
; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; X32-SSE2-NEXT: psrad $24, %xmm1
-; X32-SSE2-NEXT: movdqa %xmm1, %xmm2
-; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: pxor %xmm2, %xmm2
+; X32-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; X32-SSE2-NEXT: movdqa %xmm1, %xmm0
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; X32-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
@@ -2049,8 +2049,8 @@ define <2 x i64> @load_sext_4i8_to_4i64_
; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; X32-SSE2-NEXT: psrad $24, %xmm0
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
-; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: pxor %xmm1, %xmm1
+; X32-SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; X32-SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; X32-SSE2-NEXT: retl
;
@@ -2487,15 +2487,15 @@ define <8 x i64> @load_sext_8i8_to_8i64(
; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
; X32-SSE2-NEXT: psrad $24, %xmm1
-; X32-SSE2-NEXT: movdqa %xmm1, %xmm3
-; X32-SSE2-NEXT: psrad $31, %xmm3
+; X32-SSE2-NEXT: pxor %xmm4, %xmm4
+; X32-SSE2-NEXT: pxor %xmm3, %xmm3
+; X32-SSE2-NEXT: pcmpgtd %xmm1, %xmm3
; X32-SSE2-NEXT: movdqa %xmm1, %xmm0
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
; X32-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
; X32-SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
; X32-SSE2-NEXT: psrad $24, %xmm3
-; X32-SSE2-NEXT: movdqa %xmm3, %xmm4
-; X32-SSE2-NEXT: psrad $31, %xmm4
+; X32-SSE2-NEXT: pcmpgtd %xmm3, %xmm4
; X32-SSE2-NEXT: movdqa %xmm3, %xmm2
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
; X32-SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3]
@@ -5238,8 +5238,8 @@ define <2 x i64> @load_sext_2i16_to_2i64
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; SSE2-NEXT: psrad $16, %xmm0
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
@@ -5248,8 +5248,8 @@ define <2 x i64> @load_sext_2i16_to_2i64
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
-; SSSE3-NEXT: movdqa %xmm0, %xmm1
-; SSSE3-NEXT: psrad $31, %xmm1
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
; SSSE3-NEXT: psrad $16, %xmm0
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: retq
@@ -5269,8 +5269,8 @@ define <2 x i64> @load_sext_2i16_to_2i64
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
-; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: pxor %xmm1, %xmm1
+; X32-SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; X32-SSE2-NEXT: psrad $16, %xmm0
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X32-SSE2-NEXT: retl
@@ -5336,8 +5336,8 @@ define <4 x i64> @load_sext_4i16_to_4i64
; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSE2-NEXT: psrad $16, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm2
-; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
@@ -5348,8 +5348,8 @@ define <4 x i64> @load_sext_4i16_to_4i64
; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSSE3-NEXT: psrad $16, %xmm1
-; SSSE3-NEXT: movdqa %xmm1, %xmm2
-; SSSE3-NEXT: psrad $31, %xmm2
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
@@ -5384,8 +5384,8 @@ define <4 x i64> @load_sext_4i16_to_4i64
; X32-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; X32-SSE2-NEXT: psrad $16, %xmm1
-; X32-SSE2-NEXT: movdqa %xmm1, %xmm2
-; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: pxor %xmm2, %xmm2
+; X32-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; X32-SSE2-NEXT: movdqa %xmm1, %xmm0
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; X32-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
@@ -5471,16 +5471,16 @@ define <2 x i64> @load_sext_2i32_to_2i64
; SSE2-LABEL: load_sext_2i32_to_2i64:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: load_sext_2i32_to_2i64:
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; SSSE3-NEXT: movdqa %xmm0, %xmm1
-; SSSE3-NEXT: psrad $31, %xmm1
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: retq
;
@@ -5498,8 +5498,8 @@ define <2 x i64> @load_sext_2i32_to_2i64
; X32-SSE2: # %bb.0: # %entry
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
-; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: pxor %xmm1, %xmm1
+; X32-SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X32-SSE2-NEXT: retl
;
@@ -5518,24 +5518,24 @@ define <4 x i64> @load_sext_4i32_to_4i64
; SSE2-LABEL: load_sext_4i32_to_4i64:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movdqa (%rdi), %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT: movdqa %xmm1, %xmm2
-; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: load_sext_4i32_to_4i64:
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: movdqa (%rdi), %xmm0
-; SSSE3-NEXT: movdqa %xmm0, %xmm2
-; SSSE3-NEXT: psrad $31, %xmm2
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: pxor %xmm3, %xmm3
+; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT: movdqa %xmm1, %xmm2
-; SSSE3-NEXT: psrad $31, %xmm2
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSSE3-NEXT: retq
;
@@ -5566,12 +5566,12 @@ define <4 x i64> @load_sext_4i32_to_4i64
; X32-SSE2: # %bb.0: # %entry
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE2-NEXT: movdqa (%eax), %xmm0
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm2
-; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: pxor %xmm2, %xmm2
+; X32-SSE2-NEXT: pxor %xmm3, %xmm3
+; X32-SSE2-NEXT: pcmpgtd %xmm0, %xmm3
; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-SSE2-NEXT: movdqa %xmm1, %xmm2
-; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X32-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; X32-SSE2-NEXT: retl
;
@@ -5638,12 +5638,12 @@ define <4 x i64> @sext_4i1_to_4i64(<4 x
; SSE2: # %bb.0:
; SSE2-NEXT: pslld $31, %xmm0
; SSE2-NEXT: psrad $31, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT: movdqa %xmm1, %xmm2
-; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE2-NEXT: retq
;
@@ -5651,12 +5651,12 @@ define <4 x i64> @sext_4i1_to_4i64(<4 x
; SSSE3: # %bb.0:
; SSSE3-NEXT: pslld $31, %xmm0
; SSSE3-NEXT: psrad $31, %xmm0
-; SSSE3-NEXT: movdqa %xmm0, %xmm2
-; SSSE3-NEXT: psrad $31, %xmm2
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: pxor %xmm3, %xmm3
+; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT: movdqa %xmm1, %xmm2
-; SSSE3-NEXT: psrad $31, %xmm2
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSSE3-NEXT: retq
;
@@ -5698,12 +5698,12 @@ define <4 x i64> @sext_4i1_to_4i64(<4 x
; X32-SSE2: # %bb.0:
; X32-SSE2-NEXT: pslld $31, %xmm0
; X32-SSE2-NEXT: psrad $31, %xmm0
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm2
-; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: pxor %xmm2, %xmm2
+; X32-SSE2-NEXT: pxor %xmm3, %xmm3
+; X32-SSE2-NEXT: pcmpgtd %xmm0, %xmm3
; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-SSE2-NEXT: movdqa %xmm1, %xmm2
-; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X32-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; X32-SSE2-NEXT: retl
;
@@ -5726,8 +5726,8 @@ define <4 x i64> @sext_4i8_to_4i64(<4 x
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSE2-NEXT: psrad $24, %xmm1
-; SSE2-NEXT: movdqa %xmm1, %xmm2
-; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
@@ -5738,8 +5738,8 @@ define <4 x i64> @sext_4i8_to_4i64(<4 x
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; SSSE3-NEXT: psrad $24, %xmm1
-; SSSE3-NEXT: movdqa %xmm1, %xmm2
-; SSSE3-NEXT: psrad $31, %xmm2
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
@@ -5776,8 +5776,8 @@ define <4 x i64> @sext_4i8_to_4i64(<4 x
; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; X32-SSE2-NEXT: psrad $24, %xmm1
-; X32-SSE2-NEXT: movdqa %xmm1, %xmm2
-; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: pxor %xmm2, %xmm2
+; X32-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; X32-SSE2-NEXT: movdqa %xmm1, %xmm0
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; X32-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
Modified: llvm/trunk/test/CodeGen/X86/vector-sext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-sext.ll?rev=347181&r1=347180&r2=347181&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-sext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-sext.ll Sun Nov 18 20:33:20 2018
@@ -446,8 +446,8 @@ define <2 x i64> @sext_16i8_to_2i64(<16
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; SSE2-NEXT: psrad $24, %xmm0
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
@@ -456,8 +456,8 @@ define <2 x i64> @sext_16i8_to_2i64(<16
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSSE3-NEXT: movdqa %xmm0, %xmm1
-; SSSE3-NEXT: psrad $31, %xmm1
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
; SSSE3-NEXT: psrad $24, %xmm0
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: retq
@@ -476,8 +476,8 @@ define <2 x i64> @sext_16i8_to_2i64(<16
; X32-SSE2: # %bb.0: # %entry
; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
-; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: pxor %xmm1, %xmm1
+; X32-SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; X32-SSE2-NEXT: psrad $24, %xmm0
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X32-SSE2-NEXT: retl
@@ -497,16 +497,16 @@ define <4 x i64> @sext_16i8_to_4i64(<16
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSE2-NEXT: movdqa %xmm2, %xmm1
-; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: pxor %xmm3, %xmm3
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm2, %xmm1
; SSE2-NEXT: psrad $24, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: psrad $31, %xmm0
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
; SSE2-NEXT: psrad $24, %xmm1
-; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; SSE2-NEXT: movdqa %xmm2, %xmm0
; SSE2-NEXT: retq
;
@@ -514,16 +514,16 @@ define <4 x i64> @sext_16i8_to_4i64(<16
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSSE3-NEXT: movdqa %xmm2, %xmm1
-; SSSE3-NEXT: psrad $31, %xmm1
+; SSSE3-NEXT: pxor %xmm3, %xmm3
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: pcmpgtd %xmm2, %xmm1
; SSSE3-NEXT: psrad $24, %xmm2
; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSSE3-NEXT: movdqa %xmm1, %xmm0
-; SSSE3-NEXT: psrad $31, %xmm0
+; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
; SSSE3-NEXT: psrad $24, %xmm1
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; SSSE3-NEXT: movdqa %xmm2, %xmm0
; SSSE3-NEXT: retq
;
@@ -557,16 +557,16 @@ define <4 x i64> @sext_16i8_to_4i64(<16
; X32-SSE2: # %bb.0: # %entry
; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; X32-SSE2-NEXT: movdqa %xmm2, %xmm1
-; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: pxor %xmm3, %xmm3
+; X32-SSE2-NEXT: pxor %xmm1, %xmm1
+; X32-SSE2-NEXT: pcmpgtd %xmm2, %xmm1
; X32-SSE2-NEXT: psrad $24, %xmm2
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X32-SSE2-NEXT: movdqa %xmm1, %xmm0
-; X32-SSE2-NEXT: psrad $31, %xmm0
+; X32-SSE2-NEXT: pcmpgtd %xmm1, %xmm3
; X32-SSE2-NEXT: psrad $24, %xmm1
-; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; X32-SSE2-NEXT: movdqa %xmm2, %xmm0
; X32-SSE2-NEXT: retl
;
@@ -586,62 +586,62 @@ entry:
define <8 x i64> @sext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp {
; SSE2-LABEL: sext_16i8_to_8i64:
; SSE2: # %bb.0: # %entry
+; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
-; SSE2-NEXT: movdqa %xmm4, %xmm1
-; SSE2-NEXT: psrad $31, %xmm1
-; SSE2-NEXT: psrad $24, %xmm4
-; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: psrad $31, %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: pxor %xmm4, %xmm4
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
+; SSE2-NEXT: psrad $24, %xmm0
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; SSE2-NEXT: psrad $24, %xmm1
-; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
-; SSE2-NEXT: movdqa %xmm2, %xmm0
-; SSE2-NEXT: psrad $31, %xmm0
+; SSE2-NEXT: pxor %xmm5, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm2, %xmm5
; SSE2-NEXT: psrad $24, %xmm2
-; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
-; SSE2-NEXT: movdqa %xmm3, %xmm0
-; SSE2-NEXT: psrad $31, %xmm0
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,2,3]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
; SSE2-NEXT: psrad $24, %xmm3
-; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
-; SSE2-NEXT: movdqa %xmm4, %xmm0
+; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: sext_16i8_to_8i64:
; SSSE3: # %bb.0: # %entry
+; SSSE3-NEXT: movdqa %xmm0, %xmm1
; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
-; SSSE3-NEXT: movdqa %xmm4, %xmm1
-; SSSE3-NEXT: psrad $31, %xmm1
-; SSSE3-NEXT: psrad $24, %xmm4
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
-; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSSE3-NEXT: movdqa %xmm1, %xmm0
-; SSSE3-NEXT: psrad $31, %xmm0
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT: pxor %xmm4, %xmm4
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: pcmpgtd %xmm0, %xmm2
+; SSSE3-NEXT: psrad $24, %xmm0
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
; SSSE3-NEXT: psrad $24, %xmm1
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
-; SSSE3-NEXT: movdqa %xmm2, %xmm0
-; SSSE3-NEXT: psrad $31, %xmm0
+; SSSE3-NEXT: pxor %xmm5, %xmm5
+; SSSE3-NEXT: pcmpgtd %xmm2, %xmm5
; SSSE3-NEXT: psrad $24, %xmm2
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
-; SSSE3-NEXT: movdqa %xmm3, %xmm0
-; SSSE3-NEXT: psrad $31, %xmm0
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,2,3]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT: pcmpgtd %xmm3, %xmm4
; SSSE3-NEXT: psrad $24, %xmm3
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
-; SSSE3-NEXT: movdqa %xmm4, %xmm0
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: sext_16i8_to_8i64:
@@ -686,32 +686,32 @@ define <8 x i64> @sext_16i8_to_8i64(<16
;
; X32-SSE2-LABEL: sext_16i8_to_8i64:
; X32-SSE2: # %bb.0: # %entry
+; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
; X32-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3]
-; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
-; X32-SSE2-NEXT: movdqa %xmm4, %xmm1
-; X32-SSE2-NEXT: psrad $31, %xmm1
-; X32-SSE2-NEXT: psrad $24, %xmm4
-; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
-; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; X32-SSE2-NEXT: movdqa %xmm1, %xmm0
-; X32-SSE2-NEXT: psrad $31, %xmm0
+; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; X32-SSE2-NEXT: pxor %xmm4, %xmm4
+; X32-SSE2-NEXT: pxor %xmm2, %xmm2
+; X32-SSE2-NEXT: pcmpgtd %xmm0, %xmm2
+; X32-SSE2-NEXT: psrad $24, %xmm0
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT: pxor %xmm2, %xmm2
+; X32-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; X32-SSE2-NEXT: psrad $24, %xmm1
-; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
-; X32-SSE2-NEXT: movdqa %xmm2, %xmm0
-; X32-SSE2-NEXT: psrad $31, %xmm0
+; X32-SSE2-NEXT: pxor %xmm5, %xmm5
+; X32-SSE2-NEXT: pcmpgtd %xmm2, %xmm5
; X32-SSE2-NEXT: psrad $24, %xmm2
-; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3]
-; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
-; X32-SSE2-NEXT: movdqa %xmm3, %xmm0
-; X32-SSE2-NEXT: psrad $31, %xmm0
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
+; X32-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,2,3]
+; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3]
+; X32-SSE2-NEXT: pcmpgtd %xmm3, %xmm4
; X32-SSE2-NEXT: psrad $24, %xmm3
-; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
-; X32-SSE2-NEXT: movdqa %xmm4, %xmm0
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
; X32-SSE2-NEXT: retl
;
; X32-SSE41-LABEL: sext_16i8_to_8i64:
@@ -939,8 +939,8 @@ define <2 x i64> @sext_8i16_to_2i64(<8 x
; SSE2-LABEL: sext_8i16_to_2i64:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; SSE2-NEXT: psrad $16, %xmm0
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
@@ -948,8 +948,8 @@ define <2 x i64> @sext_8i16_to_2i64(<8 x
; SSSE3-LABEL: sext_8i16_to_2i64:
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSSE3-NEXT: movdqa %xmm0, %xmm1
-; SSSE3-NEXT: psrad $31, %xmm1
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
; SSSE3-NEXT: psrad $16, %xmm0
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: retq
@@ -967,8 +967,8 @@ define <2 x i64> @sext_8i16_to_2i64(<8 x
; X32-SSE2-LABEL: sext_8i16_to_2i64:
; X32-SSE2: # %bb.0: # %entry
; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
-; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: pxor %xmm1, %xmm1
+; X32-SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; X32-SSE2-NEXT: psrad $16, %xmm0
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X32-SSE2-NEXT: retl
@@ -987,30 +987,30 @@ define <4 x i64> @sext_8i16_to_4i64(<8 x
; SSE2-LABEL: sext_8i16_to_4i64:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSE2-NEXT: movdqa %xmm2, %xmm1
-; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: pxor %xmm3, %xmm3
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm2, %xmm1
; SSE2-NEXT: psrad $16, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
-; SSE2-NEXT: movdqa %xmm1, %xmm0
-; SSE2-NEXT: psrad $31, %xmm0
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
; SSE2-NEXT: psrad $16, %xmm1
-; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; SSE2-NEXT: movdqa %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: sext_8i16_to_4i64:
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSSE3-NEXT: movdqa %xmm2, %xmm1
-; SSSE3-NEXT: psrad $31, %xmm1
+; SSSE3-NEXT: pxor %xmm3, %xmm3
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: pcmpgtd %xmm2, %xmm1
; SSSE3-NEXT: psrad $16, %xmm2
; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
-; SSSE3-NEXT: movdqa %xmm1, %xmm0
-; SSSE3-NEXT: psrad $31, %xmm0
+; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
; SSSE3-NEXT: psrad $16, %xmm1
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; SSSE3-NEXT: movdqa %xmm2, %xmm0
; SSSE3-NEXT: retq
;
@@ -1043,15 +1043,15 @@ define <4 x i64> @sext_8i16_to_4i64(<8 x
; X32-SSE2-LABEL: sext_8i16_to_4i64:
; X32-SSE2: # %bb.0: # %entry
; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; X32-SSE2-NEXT: movdqa %xmm2, %xmm1
-; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: pxor %xmm3, %xmm3
+; X32-SSE2-NEXT: pxor %xmm1, %xmm1
+; X32-SSE2-NEXT: pcmpgtd %xmm2, %xmm1
; X32-SSE2-NEXT: psrad $16, %xmm2
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
-; X32-SSE2-NEXT: movdqa %xmm1, %xmm0
-; X32-SSE2-NEXT: psrad $31, %xmm0
+; X32-SSE2-NEXT: pcmpgtd %xmm1, %xmm3
; X32-SSE2-NEXT: psrad $16, %xmm1
-; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; X32-SSE2-NEXT: movdqa %xmm2, %xmm0
; X32-SSE2-NEXT: retl
;
@@ -1072,52 +1072,52 @@ define <8 x i64> @sext_8i16_to_8i64(<8 x
; SSE2-LABEL: sext_8i16_to_8i64:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
-; SSE2-NEXT: movdqa %xmm4, %xmm1
-; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: pxor %xmm5, %xmm5
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm1
; SSE2-NEXT: psrad $16, %xmm4
; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT: movdqa %xmm2, %xmm1
-; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm2, %xmm1
; SSE2-NEXT: psrad $16, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
-; SSE2-NEXT: movdqa %xmm1, %xmm3
-; SSE2-NEXT: psrad $31, %xmm3
+; SSE2-NEXT: pxor %xmm3, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
; SSE2-NEXT: psrad $16, %xmm1
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm0[0,2,2,3,4,5,6,7]
-; SSE2-NEXT: movdqa %xmm3, %xmm0
-; SSE2-NEXT: psrad $31, %xmm0
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
; SSE2-NEXT: psrad $16, %xmm3
-; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
; SSE2-NEXT: movdqa %xmm4, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: sext_8i16_to_8i64:
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
-; SSSE3-NEXT: movdqa %xmm4, %xmm1
-; SSSE3-NEXT: psrad $31, %xmm1
+; SSSE3-NEXT: pxor %xmm5, %xmm5
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: pcmpgtd %xmm4, %xmm1
; SSSE3-NEXT: psrad $16, %xmm4
; SSSE3-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
; SSSE3-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSSE3-NEXT: movdqa %xmm2, %xmm1
-; SSSE3-NEXT: psrad $31, %xmm1
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: pcmpgtd %xmm2, %xmm1
; SSSE3-NEXT: psrad $16, %xmm2
; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
-; SSSE3-NEXT: movdqa %xmm1, %xmm3
-; SSSE3-NEXT: psrad $31, %xmm3
+; SSSE3-NEXT: pxor %xmm3, %xmm3
+; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
; SSSE3-NEXT: psrad $16, %xmm1
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSSE3-NEXT: pshuflw {{.*#+}} xmm3 = xmm0[0,2,2,3,4,5,6,7]
-; SSSE3-NEXT: movdqa %xmm3, %xmm0
-; SSSE3-NEXT: psrad $31, %xmm0
+; SSSE3-NEXT: pcmpgtd %xmm3, %xmm5
; SSSE3-NEXT: psrad $16, %xmm3
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
; SSSE3-NEXT: movdqa %xmm4, %xmm0
; SSSE3-NEXT: retq
;
@@ -1163,26 +1163,26 @@ define <8 x i64> @sext_8i16_to_8i64(<8 x
; X32-SSE2-LABEL: sext_8i16_to_8i64:
; X32-SSE2: # %bb.0: # %entry
; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
-; X32-SSE2-NEXT: movdqa %xmm4, %xmm1
-; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: pxor %xmm5, %xmm5
+; X32-SSE2-NEXT: pxor %xmm1, %xmm1
+; X32-SSE2-NEXT: pcmpgtd %xmm4, %xmm1
; X32-SSE2-NEXT: psrad $16, %xmm4
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
; X32-SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; X32-SSE2-NEXT: movdqa %xmm2, %xmm1
-; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: pxor %xmm1, %xmm1
+; X32-SSE2-NEXT: pcmpgtd %xmm2, %xmm1
; X32-SSE2-NEXT: psrad $16, %xmm2
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
-; X32-SSE2-NEXT: movdqa %xmm1, %xmm3
-; X32-SSE2-NEXT: psrad $31, %xmm3
+; X32-SSE2-NEXT: pxor %xmm3, %xmm3
+; X32-SSE2-NEXT: pcmpgtd %xmm1, %xmm3
; X32-SSE2-NEXT: psrad $16, %xmm1
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; X32-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm0[0,2,2,3,4,5,6,7]
-; X32-SSE2-NEXT: movdqa %xmm3, %xmm0
-; X32-SSE2-NEXT: psrad $31, %xmm0
+; X32-SSE2-NEXT: pcmpgtd %xmm3, %xmm5
; X32-SSE2-NEXT: psrad $16, %xmm3
-; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
; X32-SSE2-NEXT: movdqa %xmm4, %xmm0
; X32-SSE2-NEXT: retl
;
@@ -1205,15 +1205,15 @@ entry:
define <2 x i64> @sext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp {
; SSE2-LABEL: sext_4i32_to_2i64:
; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: sext_4i32_to_2i64:
; SSSE3: # %bb.0: # %entry
-; SSSE3-NEXT: movdqa %xmm0, %xmm1
-; SSSE3-NEXT: psrad $31, %xmm1
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: retq
;
@@ -1229,8 +1229,8 @@ define <2 x i64> @sext_4i32_to_2i64(<4 x
;
; X32-SSE2-LABEL: sext_4i32_to_2i64:
; X32-SSE2: # %bb.0: # %entry
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
-; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: pxor %xmm1, %xmm1
+; X32-SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X32-SSE2-NEXT: retl
;
@@ -1247,23 +1247,23 @@ entry:
define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
; SSE2-LABEL: sext_4i32_to_4i64:
; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT: movdqa %xmm1, %xmm2
-; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: sext_4i32_to_4i64:
; SSSE3: # %bb.0: # %entry
-; SSSE3-NEXT: movdqa %xmm0, %xmm2
-; SSSE3-NEXT: psrad $31, %xmm2
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: pxor %xmm3, %xmm3
+; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT: movdqa %xmm1, %xmm2
-; SSSE3-NEXT: psrad $31, %xmm2
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSSE3-NEXT: retq
;
@@ -1295,12 +1295,12 @@ define <4 x i64> @sext_4i32_to_4i64(<4 x
;
; X32-SSE2-LABEL: sext_4i32_to_4i64:
; X32-SSE2: # %bb.0: # %entry
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm2
-; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: pxor %xmm2, %xmm2
+; X32-SSE2-NEXT: pxor %xmm3, %xmm3
+; X32-SSE2-NEXT: pcmpgtd %xmm0, %xmm3
; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-SSE2-NEXT: movdqa %xmm1, %xmm2
-; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X32-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; X32-SSE2-NEXT: retl
;
@@ -1320,38 +1320,38 @@ define <8 x i64> @sext_8i32_to_8i64(<8 x
; SSE2-LABEL: sext_8i32_to_8i64:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movdqa %xmm1, %xmm2
-; SSE2-NEXT: movdqa %xmm0, %xmm3
-; SSE2-NEXT: psrad $31, %xmm3
-; SSE2-NEXT: movdqa %xmm1, %xmm4
-; SSE2-NEXT: psrad $31, %xmm4
+; SSE2-NEXT: pxor %xmm4, %xmm4
+; SSE2-NEXT: pxor %xmm3, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
+; SSE2-NEXT: pxor %xmm5, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm5
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
-; SSE2-NEXT: movdqa %xmm1, %xmm3
-; SSE2-NEXT: psrad $31, %xmm3
+; SSE2-NEXT: pxor %xmm3, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm3
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,0,1]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
-; SSE2-NEXT: movdqa %xmm3, %xmm4
-; SSE2-NEXT: psrad $31, %xmm4
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm4
; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: sext_8i32_to_8i64:
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: movdqa %xmm1, %xmm2
-; SSSE3-NEXT: movdqa %xmm0, %xmm3
-; SSSE3-NEXT: psrad $31, %xmm3
-; SSSE3-NEXT: movdqa %xmm1, %xmm4
-; SSSE3-NEXT: psrad $31, %xmm4
+; SSSE3-NEXT: pxor %xmm4, %xmm4
+; SSSE3-NEXT: pxor %xmm3, %xmm3
+; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3
+; SSSE3-NEXT: pxor %xmm5, %xmm5
+; SSSE3-NEXT: pcmpgtd %xmm1, %xmm5
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
-; SSSE3-NEXT: movdqa %xmm1, %xmm3
-; SSSE3-NEXT: psrad $31, %xmm3
+; SSSE3-NEXT: pxor %xmm3, %xmm3
+; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,0,1]
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
-; SSSE3-NEXT: movdqa %xmm3, %xmm4
-; SSSE3-NEXT: psrad $31, %xmm4
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
+; SSSE3-NEXT: pcmpgtd %xmm3, %xmm4
; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
; SSSE3-NEXT: retq
;
@@ -1397,19 +1397,19 @@ define <8 x i64> @sext_8i32_to_8i64(<8 x
; X32-SSE2-LABEL: sext_8i32_to_8i64:
; X32-SSE2: # %bb.0: # %entry
; X32-SSE2-NEXT: movdqa %xmm1, %xmm2
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm3
-; X32-SSE2-NEXT: psrad $31, %xmm3
-; X32-SSE2-NEXT: movdqa %xmm1, %xmm4
-; X32-SSE2-NEXT: psrad $31, %xmm4
+; X32-SSE2-NEXT: pxor %xmm4, %xmm4
+; X32-SSE2-NEXT: pxor %xmm3, %xmm3
+; X32-SSE2-NEXT: pcmpgtd %xmm0, %xmm3
+; X32-SSE2-NEXT: pxor %xmm5, %xmm5
+; X32-SSE2-NEXT: pcmpgtd %xmm1, %xmm5
; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
-; X32-SSE2-NEXT: movdqa %xmm1, %xmm3
-; X32-SSE2-NEXT: psrad $31, %xmm3
+; X32-SSE2-NEXT: pxor %xmm3, %xmm3
+; X32-SSE2-NEXT: pcmpgtd %xmm1, %xmm3
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; X32-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,0,1]
-; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
-; X32-SSE2-NEXT: movdqa %xmm3, %xmm4
-; X32-SSE2-NEXT: psrad $31, %xmm4
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
+; X32-SSE2-NEXT: pcmpgtd %xmm3, %xmm4
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
; X32-SSE2-NEXT: retl
;
@@ -1530,8 +1530,8 @@ define <2 x i64> @load_sext_2i8_to_2i64(
; SSE2-NEXT: movd %eax, %xmm0
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; SSE2-NEXT: psrad $24, %xmm0
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
@@ -1542,8 +1542,8 @@ define <2 x i64> @load_sext_2i8_to_2i64(
; SSSE3-NEXT: movd %eax, %xmm0
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSSE3-NEXT: movdqa %xmm0, %xmm1
-; SSSE3-NEXT: psrad $31, %xmm1
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
; SSSE3-NEXT: psrad $24, %xmm0
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: retq
@@ -1565,8 +1565,8 @@ define <2 x i64> @load_sext_2i8_to_2i64(
; X32-SSE2-NEXT: movd %eax, %xmm0
; X32-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; X32-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
-; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: pxor %xmm1, %xmm1
+; X32-SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; X32-SSE2-NEXT: psrad $24, %xmm0
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X32-SSE2-NEXT: retl
@@ -5377,8 +5377,8 @@ define <2 x i64> @load_sext_2i16_to_2i64
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; SSE2-NEXT: psrad $16, %xmm0
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
@@ -5387,8 +5387,8 @@ define <2 x i64> @load_sext_2i16_to_2i64
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
-; SSSE3-NEXT: movdqa %xmm0, %xmm1
-; SSSE3-NEXT: psrad $31, %xmm1
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
; SSSE3-NEXT: psrad $16, %xmm0
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: retq
@@ -5408,8 +5408,8 @@ define <2 x i64> @load_sext_2i16_to_2i64
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7]
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
-; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: pxor %xmm1, %xmm1
+; X32-SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; X32-SSE2-NEXT: psrad $16, %xmm0
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X32-SSE2-NEXT: retl
@@ -5633,16 +5633,16 @@ define <2 x i64> @load_sext_2i32_to_2i64
; SSE2-LABEL: load_sext_2i32_to_2i64:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: load_sext_2i32_to_2i64:
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; SSSE3-NEXT: movdqa %xmm0, %xmm1
-; SSSE3-NEXT: psrad $31, %xmm1
+; SSSE3-NEXT: pxor %xmm1, %xmm1
+; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSSE3-NEXT: retq
;
@@ -5660,8 +5660,8 @@ define <2 x i64> @load_sext_2i32_to_2i64
; X32-SSE2: # %bb.0: # %entry
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
-; X32-SSE2-NEXT: psrad $31, %xmm1
+; X32-SSE2-NEXT: pxor %xmm1, %xmm1
+; X32-SSE2-NEXT: pcmpgtd %xmm0, %xmm1
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X32-SSE2-NEXT: retl
;
@@ -5680,24 +5680,24 @@ define <4 x i64> @load_sext_4i32_to_4i64
; SSE2-LABEL: load_sext_4i32_to_4i64:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movdqa (%rdi), %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT: movdqa %xmm1, %xmm2
-; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE2-NEXT: retq
;
; SSSE3-LABEL: load_sext_4i32_to_4i64:
; SSSE3: # %bb.0: # %entry
; SSSE3-NEXT: movdqa (%rdi), %xmm0
-; SSSE3-NEXT: movdqa %xmm0, %xmm2
-; SSSE3-NEXT: psrad $31, %xmm2
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: pxor %xmm3, %xmm3
+; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT: movdqa %xmm1, %xmm2
-; SSSE3-NEXT: psrad $31, %xmm2
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSSE3-NEXT: retq
;
@@ -5728,12 +5728,12 @@ define <4 x i64> @load_sext_4i32_to_4i64
; X32-SSE2: # %bb.0: # %entry
; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE2-NEXT: movdqa (%eax), %xmm0
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm2
-; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: pxor %xmm2, %xmm2
+; X32-SSE2-NEXT: pxor %xmm3, %xmm3
+; X32-SSE2-NEXT: pcmpgtd %xmm0, %xmm3
; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-SSE2-NEXT: movdqa %xmm1, %xmm2
-; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X32-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; X32-SSE2-NEXT: retl
;
@@ -5808,12 +5808,12 @@ define <4 x i64> @sext_4i1_to_4i64(<4 x
; SSE2: # %bb.0:
; SSE2-NEXT: pslld $31, %xmm0
; SSE2-NEXT: psrad $31, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT: movdqa %xmm1, %xmm2
-; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE2-NEXT: retq
;
@@ -5821,12 +5821,12 @@ define <4 x i64> @sext_4i1_to_4i64(<4 x
; SSSE3: # %bb.0:
; SSSE3-NEXT: pslld $31, %xmm0
; SSSE3-NEXT: psrad $31, %xmm0
-; SSSE3-NEXT: movdqa %xmm0, %xmm2
-; SSSE3-NEXT: psrad $31, %xmm2
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: pxor %xmm3, %xmm3
+; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT: movdqa %xmm1, %xmm2
-; SSSE3-NEXT: psrad $31, %xmm2
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSSE3-NEXT: retq
;
@@ -5868,12 +5868,12 @@ define <4 x i64> @sext_4i1_to_4i64(<4 x
; X32-SSE2: # %bb.0:
; X32-SSE2-NEXT: pslld $31, %xmm0
; X32-SSE2-NEXT: psrad $31, %xmm0
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm2
-; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: pxor %xmm2, %xmm2
+; X32-SSE2-NEXT: pxor %xmm3, %xmm3
+; X32-SSE2-NEXT: pcmpgtd %xmm0, %xmm3
; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-SSE2-NEXT: movdqa %xmm1, %xmm2
-; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X32-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; X32-SSE2-NEXT: retl
;
@@ -5895,12 +5895,12 @@ define <4 x i64> @sext_4i8_to_4i64(<4 x
; SSE2: # %bb.0:
; SSE2-NEXT: pslld $24, %xmm0
; SSE2-NEXT: psrad $24, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT: movdqa %xmm1, %xmm2
-; SSE2-NEXT: psrad $31, %xmm2
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSE2-NEXT: retq
;
@@ -5908,12 +5908,12 @@ define <4 x i64> @sext_4i8_to_4i64(<4 x
; SSSE3: # %bb.0:
; SSSE3-NEXT: pslld $24, %xmm0
; SSSE3-NEXT: psrad $24, %xmm0
-; SSSE3-NEXT: movdqa %xmm0, %xmm2
-; SSSE3-NEXT: psrad $31, %xmm2
+; SSSE3-NEXT: pxor %xmm2, %xmm2
+; SSSE3-NEXT: pxor %xmm3, %xmm3
+; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSSE3-NEXT: movdqa %xmm1, %xmm2
-; SSSE3-NEXT: psrad $31, %xmm2
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2
; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; SSSE3-NEXT: retq
;
@@ -5955,12 +5955,12 @@ define <4 x i64> @sext_4i8_to_4i64(<4 x
; X32-SSE2: # %bb.0:
; X32-SSE2-NEXT: pslld $24, %xmm0
; X32-SSE2-NEXT: psrad $24, %xmm0
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm2
-; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: pxor %xmm2, %xmm2
+; X32-SSE2-NEXT: pxor %xmm3, %xmm3
+; X32-SSE2-NEXT: pcmpgtd %xmm0, %xmm3
; X32-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; X32-SSE2-NEXT: movdqa %xmm1, %xmm2
-; X32-SSE2-NEXT: psrad $31, %xmm2
+; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X32-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
; X32-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
; X32-SSE2-NEXT: retl
;
Modified: llvm/trunk/test/CodeGen/X86/vector-trunc-math-widen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-trunc-math-widen.ll?rev=347181&r1=347180&r2=347181&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-trunc-math-widen.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-trunc-math-widen.ll Sun Nov 18 20:33:20 2018
@@ -5569,39 +5569,40 @@ define <4 x i32> @mul_add_const_v4i64_v4
define <4 x i32> @mul_add_self_v4i64_v4i32(<4 x i32> %a0, <4 x i32> %a1) nounwind {
; SSE-LABEL: mul_add_self_v4i64_v4i32:
; SSE: # %bb.0:
-; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
-; SSE-NEXT: movdqa %xmm3, %xmm4
-; SSE-NEXT: psrad $31, %xmm4
-; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
-; SSE-NEXT: movdqa %xmm0, %xmm5
-; SSE-NEXT: psrad $31, %xmm5
-; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1]
-; SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm1[2,3,0,1]
-; SSE-NEXT: movdqa %xmm6, %xmm7
-; SSE-NEXT: psrad $31, %xmm7
-; SSE-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1]
-; SSE-NEXT: movdqa %xmm1, %xmm2
-; SSE-NEXT: psrad $31, %xmm2
-; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1,1,3]
-; SSE-NEXT: pmuludq %xmm0, %xmm2
-; SSE-NEXT: pmuludq %xmm1, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,1,1,3]
-; SSE-NEXT: pmuludq %xmm1, %xmm5
-; SSE-NEXT: paddq %xmm5, %xmm2
-; SSE-NEXT: psllq $32, %xmm2
-; SSE-NEXT: paddq %xmm0, %xmm2
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,1,1,3]
-; SSE-NEXT: pmuludq %xmm3, %xmm0
-; SSE-NEXT: pmuludq %xmm6, %xmm3
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,1,1,3]
-; SSE-NEXT: pmuludq %xmm6, %xmm1
-; SSE-NEXT: paddq %xmm1, %xmm0
-; SSE-NEXT: psllq $32, %xmm0
-; SSE-NEXT: paddq %xmm3, %xmm0
-; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[0,2]
-; SSE-NEXT: paddd %xmm2, %xmm2
-; SSE-NEXT: movdqa %xmm2, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; SSE-NEXT: pxor %xmm8, %xmm8
+; SSE-NEXT: pxor %xmm3, %xmm3
+; SSE-NEXT: pcmpgtd %xmm2, %xmm3
+; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE-NEXT: pxor %xmm7, %xmm7
+; SSE-NEXT: pcmpgtd %xmm0, %xmm7
+; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1]
+; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
+; SSE-NEXT: pxor %xmm6, %xmm6
+; SSE-NEXT: pcmpgtd %xmm4, %xmm6
+; SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1]
+; SSE-NEXT: pxor %xmm5, %xmm5
+; SSE-NEXT: pcmpgtd %xmm1, %xmm5
+; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1]
+; SSE-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm8[0],xmm7[1],xmm8[1]
+; SSE-NEXT: pmuludq %xmm1, %xmm7
+; SSE-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm8[0],xmm5[1],xmm8[1]
+; SSE-NEXT: pmuludq %xmm0, %xmm5
+; SSE-NEXT: paddq %xmm7, %xmm5
+; SSE-NEXT: psllq $32, %xmm5
+; SSE-NEXT: pmuludq %xmm0, %xmm1
+; SSE-NEXT: paddq %xmm5, %xmm1
+; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1]
+; SSE-NEXT: pmuludq %xmm4, %xmm3
+; SSE-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm8[0],xmm6[1],xmm8[1]
+; SSE-NEXT: pmuludq %xmm2, %xmm6
+; SSE-NEXT: paddq %xmm3, %xmm6
+; SSE-NEXT: psllq $32, %xmm6
+; SSE-NEXT: pmuludq %xmm2, %xmm4
+; SSE-NEXT: paddq %xmm6, %xmm4
+; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm4[0,2]
+; SSE-NEXT: paddd %xmm1, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: mul_add_self_v4i64_v4i32:
Modified: llvm/trunk/test/CodeGen/X86/vector-trunc-math.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-trunc-math.ll?rev=347181&r1=347180&r2=347181&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-trunc-math.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-trunc-math.ll Sun Nov 18 20:33:20 2018
@@ -5569,39 +5569,40 @@ define <4 x i32> @mul_add_const_v4i64_v4
define <4 x i32> @mul_add_self_v4i64_v4i32(<4 x i32> %a0, <4 x i32> %a1) nounwind {
; SSE-LABEL: mul_add_self_v4i64_v4i32:
; SSE: # %bb.0:
-; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
-; SSE-NEXT: movdqa %xmm3, %xmm4
-; SSE-NEXT: psrad $31, %xmm4
-; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
-; SSE-NEXT: movdqa %xmm0, %xmm5
-; SSE-NEXT: psrad $31, %xmm5
-; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1]
-; SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm1[2,3,0,1]
-; SSE-NEXT: movdqa %xmm6, %xmm7
-; SSE-NEXT: psrad $31, %xmm7
-; SSE-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1]
-; SSE-NEXT: movdqa %xmm1, %xmm2
-; SSE-NEXT: psrad $31, %xmm2
-; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1,1,3]
-; SSE-NEXT: pmuludq %xmm0, %xmm2
-; SSE-NEXT: pmuludq %xmm1, %xmm0
-; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,1,1,3]
-; SSE-NEXT: pmuludq %xmm1, %xmm5
-; SSE-NEXT: paddq %xmm5, %xmm2
-; SSE-NEXT: psllq $32, %xmm2
-; SSE-NEXT: paddq %xmm0, %xmm2
-; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,1,1,3]
-; SSE-NEXT: pmuludq %xmm3, %xmm0
-; SSE-NEXT: pmuludq %xmm6, %xmm3
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,1,1,3]
-; SSE-NEXT: pmuludq %xmm6, %xmm1
-; SSE-NEXT: paddq %xmm1, %xmm0
-; SSE-NEXT: psllq $32, %xmm0
-; SSE-NEXT: paddq %xmm3, %xmm0
-; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[0,2]
-; SSE-NEXT: paddd %xmm2, %xmm2
-; SSE-NEXT: movdqa %xmm2, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; SSE-NEXT: pxor %xmm8, %xmm8
+; SSE-NEXT: pxor %xmm3, %xmm3
+; SSE-NEXT: pcmpgtd %xmm2, %xmm3
+; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE-NEXT: pxor %xmm7, %xmm7
+; SSE-NEXT: pcmpgtd %xmm0, %xmm7
+; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1]
+; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
+; SSE-NEXT: pxor %xmm6, %xmm6
+; SSE-NEXT: pcmpgtd %xmm4, %xmm6
+; SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1]
+; SSE-NEXT: pxor %xmm5, %xmm5
+; SSE-NEXT: pcmpgtd %xmm1, %xmm5
+; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1]
+; SSE-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm8[0],xmm7[1],xmm8[1]
+; SSE-NEXT: pmuludq %xmm1, %xmm7
+; SSE-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm8[0],xmm5[1],xmm8[1]
+; SSE-NEXT: pmuludq %xmm0, %xmm5
+; SSE-NEXT: paddq %xmm7, %xmm5
+; SSE-NEXT: psllq $32, %xmm5
+; SSE-NEXT: pmuludq %xmm0, %xmm1
+; SSE-NEXT: paddq %xmm5, %xmm1
+; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1]
+; SSE-NEXT: pmuludq %xmm4, %xmm3
+; SSE-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm8[0],xmm6[1],xmm8[1]
+; SSE-NEXT: pmuludq %xmm2, %xmm6
+; SSE-NEXT: paddq %xmm3, %xmm6
+; SSE-NEXT: psllq $32, %xmm6
+; SSE-NEXT: pmuludq %xmm2, %xmm4
+; SSE-NEXT: paddq %xmm6, %xmm4
+; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm4[0,2]
+; SSE-NEXT: paddd %xmm1, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: mul_add_self_v4i64_v4i32:
More information about the llvm-commits
mailing list