[llvm] r349057 - [X86][SSE] Add SSE vector imm/var shift support to SimplifyDemandedVectorEltsForTargetNode
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 13 08:39:29 PST 2018
Author: rksimon
Date: Thu Dec 13 08:39:29 2018
New Revision: 349057
URL: http://llvm.org/viewvc/llvm-project?rev=349057&view=rev
Log:
[X86][SSE] Add SSE vector imm/var shift support to SimplifyDemandedVectorEltsForTargetNode
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll
llvm/trunk/test/CodeGen/X86/packss.ll
llvm/trunk/test/CodeGen/X86/vector-idiv-v2i32.ll
llvm/trunk/test/CodeGen/X86/vector-reduce-smax.ll
llvm/trunk/test/CodeGen/X86/vector-reduce-smin.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=349057&r1=349056&r2=349057&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Dec 13 08:39:29 2018
@@ -32285,6 +32285,17 @@ bool X86TargetLowering::SimplifyDemanded
if (SimplifyDemandedVectorElts(Amt, AmtElts, AmtUndef, AmtZero, TLO,
Depth + 1))
return true;
+ LLVM_FALLTHROUGH;
+ }
+ case X86ISD::VSHLI:
+ case X86ISD::VSRLI:
+ case X86ISD::VSRAI: {
+ SDValue Src = Op.getOperand(0);
+ APInt SrcUndef;
+ if (SimplifyDemandedVectorElts(Src, DemandedElts, SrcUndef, KnownZero, TLO,
+ Depth + 1))
+ return true;
+ // TODO convert SrcUndef to KnownUndef.
break;
}
case X86ISD::CVTSI2P:
@@ -32359,6 +32370,10 @@ bool X86TargetLowering::SimplifyDemanded
[VT](SDValue V) { return VT != V.getValueType(); }))
return false;
+ // Clear known elts that might have been set above.
+ KnownZero.clearAllBits();
+ KnownUndef.clearAllBits();
+
// Check if shuffle mask can be simplified to undef/zero/identity.
int NumSrcs = OpInputs.size();
for (int i = 0; i != NumElts; ++i) {
Modified: llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll?rev=349057&r1=349056&r2=349057&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll Thu Dec 13 08:39:29 2018
@@ -165,8 +165,6 @@ define float @signbits_ashr_insert_ashr_
; X32-NEXT: sarl $30, %ecx
; X32-NEXT: vmovd %eax, %xmm0
; X32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
-; X32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
-; X32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
; X32-NEXT: vpsrlq $3, %xmm0, %xmm0
; X32-NEXT: vmovd %xmm0, %eax
; X32-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0
@@ -178,12 +176,10 @@ define float @signbits_ashr_insert_ashr_
; X64-LABEL: signbits_ashr_insert_ashr_extract_sitofp:
; X64: # %bb.0:
; X64-NEXT: sarq $30, %rdi
-; X64-NEXT: vmovq %rsi, %xmm0
-; X64-NEXT: vmovq %rdi, %xmm1
-; X64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; X64-NEXT: vmovq %rdi, %xmm0
; X64-NEXT: vpsrlq $3, %xmm0, %xmm0
; X64-NEXT: vmovq %xmm0, %rax
-; X64-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0
+; X64-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0
; X64-NEXT: retq
%1 = ashr i64 %a0, 30
%2 = insertelement <2 x i64> undef, i64 %1, i32 0
Modified: llvm/trunk/test/CodeGen/X86/packss.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/packss.ll?rev=349057&r1=349056&r2=349057&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/packss.ll (original)
+++ llvm/trunk/test/CodeGen/X86/packss.ll Thu Dec 13 08:39:29 2018
@@ -158,21 +158,17 @@ define <8 x i16> @trunc_ashr_v4i64_deman
; X86-SSE: # %bb.0:
; X86-SSE-NEXT: movdqa %xmm1, %xmm2
; X86-SSE-NEXT: psllq $63, %xmm2
-; X86-SSE-NEXT: movdqa %xmm1, %xmm3
-; X86-SSE-NEXT: movsd {{.*#+}} xmm3 = xmm2[0],xmm3[1]
-; X86-SSE-NEXT: movdqa %xmm0, %xmm2
-; X86-SSE-NEXT: psllq $63, %xmm2
-; X86-SSE-NEXT: movdqa %xmm0, %xmm4
-; X86-SSE-NEXT: movsd {{.*#+}} xmm4 = xmm2[0],xmm4[1]
-; X86-SSE-NEXT: psrlq $63, %xmm4
-; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm4[0],xmm0[1]
-; X86-SSE-NEXT: movapd {{.*#+}} xmm2 = [4.9406564584124654E-324,-0.0E+0]
-; X86-SSE-NEXT: xorpd %xmm2, %xmm0
-; X86-SSE-NEXT: psubq %xmm2, %xmm0
+; X86-SSE-NEXT: movdqa %xmm0, %xmm3
+; X86-SSE-NEXT: psllq $63, %xmm3
; X86-SSE-NEXT: psrlq $63, %xmm3
-; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm3[0],xmm1[1]
-; X86-SSE-NEXT: xorpd %xmm2, %xmm1
-; X86-SSE-NEXT: psubq %xmm2, %xmm1
+; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
+; X86-SSE-NEXT: movapd {{.*#+}} xmm3 = [4.9406564584124654E-324,-0.0E+0]
+; X86-SSE-NEXT: xorpd %xmm3, %xmm0
+; X86-SSE-NEXT: psubq %xmm3, %xmm0
+; X86-SSE-NEXT: psrlq $63, %xmm2
+; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
+; X86-SSE-NEXT: xorpd %xmm3, %xmm1
+; X86-SSE-NEXT: psubq %xmm3, %xmm1
; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X86-SSE-NEXT: packssdw %xmm1, %xmm0
@@ -181,10 +177,8 @@ define <8 x i16> @trunc_ashr_v4i64_deman
; X86-AVX1-LABEL: trunc_ashr_v4i64_demandedelts:
; X86-AVX1: # %bb.0:
; X86-AVX1-NEXT: vpsllq $63, %xmm0, %xmm1
-; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X86-AVX1-NEXT: vpsllq $63, %xmm2, %xmm3
-; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
; X86-AVX1-NEXT: vpsrlq $63, %xmm3, %xmm3
; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
; X86-AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1,0,0,0,0,0,0,32768]
@@ -220,21 +214,17 @@ define <8 x i16> @trunc_ashr_v4i64_deman
; X64-SSE: # %bb.0:
; X64-SSE-NEXT: movdqa %xmm1, %xmm2
; X64-SSE-NEXT: psllq $63, %xmm2
-; X64-SSE-NEXT: movdqa %xmm1, %xmm3
-; X64-SSE-NEXT: movsd {{.*#+}} xmm3 = xmm2[0],xmm3[1]
-; X64-SSE-NEXT: movdqa %xmm0, %xmm2
-; X64-SSE-NEXT: psllq $63, %xmm2
-; X64-SSE-NEXT: movdqa %xmm0, %xmm4
-; X64-SSE-NEXT: movsd {{.*#+}} xmm4 = xmm2[0],xmm4[1]
-; X64-SSE-NEXT: psrlq $63, %xmm4
-; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm4[0],xmm0[1]
-; X64-SSE-NEXT: movapd {{.*#+}} xmm2 = [1,9223372036854775808]
-; X64-SSE-NEXT: xorpd %xmm2, %xmm0
-; X64-SSE-NEXT: psubq %xmm2, %xmm0
+; X64-SSE-NEXT: movdqa %xmm0, %xmm3
+; X64-SSE-NEXT: psllq $63, %xmm3
; X64-SSE-NEXT: psrlq $63, %xmm3
-; X64-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm3[0],xmm1[1]
-; X64-SSE-NEXT: xorpd %xmm2, %xmm1
-; X64-SSE-NEXT: psubq %xmm2, %xmm1
+; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
+; X64-SSE-NEXT: movapd {{.*#+}} xmm3 = [1,9223372036854775808]
+; X64-SSE-NEXT: xorpd %xmm3, %xmm0
+; X64-SSE-NEXT: psubq %xmm3, %xmm0
+; X64-SSE-NEXT: psrlq $63, %xmm2
+; X64-SSE-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
+; X64-SSE-NEXT: xorpd %xmm3, %xmm1
+; X64-SSE-NEXT: psubq %xmm3, %xmm1
; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
; X64-SSE-NEXT: packssdw %xmm1, %xmm0
@@ -243,10 +233,8 @@ define <8 x i16> @trunc_ashr_v4i64_deman
; X64-AVX1-LABEL: trunc_ashr_v4i64_demandedelts:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vpsllq $63, %xmm0, %xmm1
-; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; X64-AVX1-NEXT: vpsllq $63, %xmm2, %xmm3
-; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
; X64-AVX1-NEXT: vpsrlq $63, %xmm3, %xmm3
; X64-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
; X64-AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1,9223372036854775808]
Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-v2i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-v2i32.ll?rev=349057&r1=349056&r2=349057&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-v2i32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-v2i32.ll Thu Dec 13 08:39:29 2018
@@ -595,8 +595,7 @@ define void @test_sdiv_pow2_v2i32(<2 x i
; X64-LABEL: test_sdiv_pow2_v2i32:
; X64: # %bb.0:
; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT: pxor %xmm1, %xmm1
-; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,2,1]
; X64-NEXT: psrad $31, %xmm1
; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,1,3]
@@ -619,14 +618,13 @@ define void @test_sdiv_pow2_v2i32(<2 x i
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X86-NEXT: pxor %xmm1, %xmm1
-; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; X86-NEXT: psrad $31, %xmm1
-; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
-; X86-NEXT: movdqa %xmm0, %xmm1
-; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,1,3]
+; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X86-NEXT: psrad $31, %xmm0
+; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
+; X86-NEXT: movdqa %xmm1, %xmm0
+; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1,1,3]
; X86-NEXT: movdqa {{.*#+}} xmm2 = [31,0,31,0]
; X86-NEXT: movdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648]
; X86-NEXT: movdqa %xmm3, %xmm4
@@ -635,22 +633,22 @@ define void @test_sdiv_pow2_v2i32(<2 x i
; X86-NEXT: movd %ecx, %xmm5
; X86-NEXT: psrlq %xmm5, %xmm3
; X86-NEXT: movsd {{.*#+}} xmm3 = xmm4[0],xmm3[1]
-; X86-NEXT: movdqa %xmm1, %xmm4
+; X86-NEXT: movdqa %xmm0, %xmm4
; X86-NEXT: psrlq %xmm2, %xmm4
-; X86-NEXT: psrlq %xmm5, %xmm1
-; X86-NEXT: movsd {{.*#+}} xmm1 = xmm4[0],xmm1[1]
-; X86-NEXT: xorpd %xmm3, %xmm1
-; X86-NEXT: psubq %xmm3, %xmm1
-; X86-NEXT: pand {{\.LCPI.*}}, %xmm1
-; X86-NEXT: psrlq $29, %xmm1
-; X86-NEXT: paddq %xmm0, %xmm1
-; X86-NEXT: psllq $32, %xmm1
-; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,3,2,3]
-; X86-NEXT: psrad $31, %xmm1
-; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
-; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X86-NEXT: psrlq $3, %xmm0
-; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X86-NEXT: psrlq %xmm5, %xmm0
+; X86-NEXT: movsd {{.*#+}} xmm0 = xmm4[0],xmm0[1]
+; X86-NEXT: xorpd %xmm3, %xmm0
+; X86-NEXT: psubq %xmm3, %xmm0
+; X86-NEXT: pand {{\.LCPI.*}}, %xmm0
+; X86-NEXT: psrlq $29, %xmm0
+; X86-NEXT: paddq %xmm1, %xmm0
+; X86-NEXT: psllq $32, %xmm0
+; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
+; X86-NEXT: psrad $31, %xmm0
+; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
+; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86-NEXT: psrlq $3, %xmm1
+; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
; X86-NEXT: movq %xmm0, (%eax)
; X86-NEXT: retl
;
Modified: llvm/trunk/test/CodeGen/X86/vector-reduce-smax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-reduce-smax.ll?rev=349057&r1=349056&r2=349057&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-reduce-smax.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-reduce-smax.ll Thu Dec 13 08:39:29 2018
@@ -720,11 +720,10 @@ define i32 @test_v2i32(<2 x i32> %a0) {
; SSE41-NEXT: psllq $32, %xmm1
; SSE41-NEXT: psrad $31, %xmm1
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; SSE41-NEXT: pxor %xmm2, %xmm2
-; SSE41-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSE41-NEXT: psrad $31, %xmm2
-; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,2,3,3]
-; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
+; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
+; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,2,2,3]
+; SSE41-NEXT: psrad $31, %xmm3
+; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648]
; SSE41-NEXT: movdqa %xmm3, %xmm2
; SSE41-NEXT: pxor %xmm0, %xmm2
@@ -746,11 +745,10 @@ define i32 @test_v2i32(<2 x i32> %a0) {
; AVX1-NEXT: vpsllq $32, %xmm0, %xmm1
; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; AVX1-NEXT: vpsrad $31, %xmm2, %xmm2
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
@@ -761,11 +759,10 @@ define i32 @test_v2i32(<2 x i32> %a0) {
; AVX2-NEXT: vpsllq $32, %xmm0, %xmm1
; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1
; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; AVX2-NEXT: vpsrad $31, %xmm2, %xmm2
-; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
-; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
+; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
; AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
; AVX2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
Modified: llvm/trunk/test/CodeGen/X86/vector-reduce-smin.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-reduce-smin.ll?rev=349057&r1=349056&r2=349057&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-reduce-smin.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-reduce-smin.ll Thu Dec 13 08:39:29 2018
@@ -719,11 +719,10 @@ define i32 @test_v2i32(<2 x i32> %a0) {
; SSE41-NEXT: psllq $32, %xmm1
; SSE41-NEXT: psrad $31, %xmm1
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; SSE41-NEXT: pxor %xmm2, %xmm2
-; SSE41-NEXT: punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSE41-NEXT: psrad $31, %xmm2
-; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,2,3,3]
-; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
+; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
+; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,2,2,3]
+; SSE41-NEXT: psrad $31, %xmm3
+; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648]
; SSE41-NEXT: movdqa %xmm1, %xmm2
; SSE41-NEXT: pxor %xmm0, %xmm2
@@ -745,11 +744,10 @@ define i32 @test_v2i32(<2 x i32> %a0) {
; AVX1-NEXT: vpsllq $32, %xmm0, %xmm1
; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; AVX1-NEXT: vpsrad $31, %xmm2, %xmm2
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0
+; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovd %xmm0, %eax
@@ -760,11 +758,10 @@ define i32 @test_v2i32(<2 x i32> %a0) {
; AVX2-NEXT: vpsllq $32, %xmm0, %xmm1
; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1
; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX2-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; AVX2-NEXT: vpsrad $31, %xmm2, %xmm2
-; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
-; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
+; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0
+; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
; AVX2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
; AVX2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vmovd %xmm0, %eax
More information about the llvm-commits
mailing list