[llvm] r349057 - [X86][SSE] Add SSE vector imm/var shift support to SimplifyDemandedVectorEltsForTargetNode

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 13 08:39:29 PST 2018


Author: rksimon
Date: Thu Dec 13 08:39:29 2018
New Revision: 349057

URL: http://llvm.org/viewvc/llvm-project?rev=349057&view=rev
Log:
[X86][SSE] Add SSE vector imm/var shift support to SimplifyDemandedVectorEltsForTargetNode

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll
    llvm/trunk/test/CodeGen/X86/packss.ll
    llvm/trunk/test/CodeGen/X86/vector-idiv-v2i32.ll
    llvm/trunk/test/CodeGen/X86/vector-reduce-smax.ll
    llvm/trunk/test/CodeGen/X86/vector-reduce-smin.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=349057&r1=349056&r2=349057&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Dec 13 08:39:29 2018
@@ -32285,6 +32285,17 @@ bool X86TargetLowering::SimplifyDemanded
     if (SimplifyDemandedVectorElts(Amt, AmtElts, AmtUndef, AmtZero, TLO,
                                    Depth + 1))
       return true;
+    LLVM_FALLTHROUGH;
+  }
+  case X86ISD::VSHLI:
+  case X86ISD::VSRLI:
+  case X86ISD::VSRAI: {
+    SDValue Src = Op.getOperand(0);
+    APInt SrcUndef;
+    if (SimplifyDemandedVectorElts(Src, DemandedElts, SrcUndef, KnownZero, TLO,
+                                   Depth + 1))
+      return true;
+    // TODO convert SrcUndef to KnownUndef.
     break;
   }
   case X86ISD::CVTSI2P:
@@ -32359,6 +32370,10 @@ bool X86TargetLowering::SimplifyDemanded
                    [VT](SDValue V) { return VT != V.getValueType(); }))
     return false;
 
+  // Clear known elts that might have been set above.
+  KnownZero.clearAllBits();
+  KnownUndef.clearAllBits();
+
   // Check if shuffle mask can be simplified to undef/zero/identity.
   int NumSrcs = OpInputs.size();
   for (int i = 0; i != NumElts; ++i) {

Modified: llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll?rev=349057&r1=349056&r2=349057&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll (original)
+++ llvm/trunk/test/CodeGen/X86/known-signbits-vector.ll Thu Dec 13 08:39:29 2018
@@ -165,8 +165,6 @@ define float @signbits_ashr_insert_ashr_
 ; X32-NEXT:    sarl $30, %ecx
 ; X32-NEXT:    vmovd %eax, %xmm0
 ; X32-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
-; X32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
-; X32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
 ; X32-NEXT:    vpsrlq $3, %xmm0, %xmm0
 ; X32-NEXT:    vmovd %xmm0, %eax
 ; X32-NEXT:    vcvtsi2ssl %eax, %xmm1, %xmm0
@@ -178,12 +176,10 @@ define float @signbits_ashr_insert_ashr_
 ; X64-LABEL: signbits_ashr_insert_ashr_extract_sitofp:
 ; X64:       # %bb.0:
 ; X64-NEXT:    sarq $30, %rdi
-; X64-NEXT:    vmovq %rsi, %xmm0
-; X64-NEXT:    vmovq %rdi, %xmm1
-; X64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; X64-NEXT:    vmovq %rdi, %xmm0
 ; X64-NEXT:    vpsrlq $3, %xmm0, %xmm0
 ; X64-NEXT:    vmovq %xmm0, %rax
-; X64-NEXT:    vcvtsi2ssl %eax, %xmm2, %xmm0
+; X64-NEXT:    vcvtsi2ssl %eax, %xmm1, %xmm0
 ; X64-NEXT:    retq
   %1 = ashr i64 %a0, 30
   %2 = insertelement <2 x i64> undef, i64 %1, i32 0

Modified: llvm/trunk/test/CodeGen/X86/packss.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/packss.ll?rev=349057&r1=349056&r2=349057&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/packss.ll (original)
+++ llvm/trunk/test/CodeGen/X86/packss.ll Thu Dec 13 08:39:29 2018
@@ -158,21 +158,17 @@ define <8 x i16> @trunc_ashr_v4i64_deman
 ; X86-SSE:       # %bb.0:
 ; X86-SSE-NEXT:    movdqa %xmm1, %xmm2
 ; X86-SSE-NEXT:    psllq $63, %xmm2
-; X86-SSE-NEXT:    movdqa %xmm1, %xmm3
-; X86-SSE-NEXT:    movsd {{.*#+}} xmm3 = xmm2[0],xmm3[1]
-; X86-SSE-NEXT:    movdqa %xmm0, %xmm2
-; X86-SSE-NEXT:    psllq $63, %xmm2
-; X86-SSE-NEXT:    movdqa %xmm0, %xmm4
-; X86-SSE-NEXT:    movsd {{.*#+}} xmm4 = xmm2[0],xmm4[1]
-; X86-SSE-NEXT:    psrlq $63, %xmm4
-; X86-SSE-NEXT:    movsd {{.*#+}} xmm0 = xmm4[0],xmm0[1]
-; X86-SSE-NEXT:    movapd {{.*#+}} xmm2 = [4.9406564584124654E-324,-0.0E+0]
-; X86-SSE-NEXT:    xorpd %xmm2, %xmm0
-; X86-SSE-NEXT:    psubq %xmm2, %xmm0
+; X86-SSE-NEXT:    movdqa %xmm0, %xmm3
+; X86-SSE-NEXT:    psllq $63, %xmm3
 ; X86-SSE-NEXT:    psrlq $63, %xmm3
-; X86-SSE-NEXT:    movsd {{.*#+}} xmm1 = xmm3[0],xmm1[1]
-; X86-SSE-NEXT:    xorpd %xmm2, %xmm1
-; X86-SSE-NEXT:    psubq %xmm2, %xmm1
+; X86-SSE-NEXT:    movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
+; X86-SSE-NEXT:    movapd {{.*#+}} xmm3 = [4.9406564584124654E-324,-0.0E+0]
+; X86-SSE-NEXT:    xorpd %xmm3, %xmm0
+; X86-SSE-NEXT:    psubq %xmm3, %xmm0
+; X86-SSE-NEXT:    psrlq $63, %xmm2
+; X86-SSE-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
+; X86-SSE-NEXT:    xorpd %xmm3, %xmm1
+; X86-SSE-NEXT:    psubq %xmm3, %xmm1
 ; X86-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
 ; X86-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
 ; X86-SSE-NEXT:    packssdw %xmm1, %xmm0
@@ -181,10 +177,8 @@ define <8 x i16> @trunc_ashr_v4i64_deman
 ; X86-AVX1-LABEL: trunc_ashr_v4i64_demandedelts:
 ; X86-AVX1:       # %bb.0:
 ; X86-AVX1-NEXT:    vpsllq $63, %xmm0, %xmm1
-; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
 ; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 ; X86-AVX1-NEXT:    vpsllq $63, %xmm2, %xmm3
-; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
 ; X86-AVX1-NEXT:    vpsrlq $63, %xmm3, %xmm3
 ; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
 ; X86-AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [1,0,0,0,0,0,0,32768]
@@ -220,21 +214,17 @@ define <8 x i16> @trunc_ashr_v4i64_deman
 ; X64-SSE:       # %bb.0:
 ; X64-SSE-NEXT:    movdqa %xmm1, %xmm2
 ; X64-SSE-NEXT:    psllq $63, %xmm2
-; X64-SSE-NEXT:    movdqa %xmm1, %xmm3
-; X64-SSE-NEXT:    movsd {{.*#+}} xmm3 = xmm2[0],xmm3[1]
-; X64-SSE-NEXT:    movdqa %xmm0, %xmm2
-; X64-SSE-NEXT:    psllq $63, %xmm2
-; X64-SSE-NEXT:    movdqa %xmm0, %xmm4
-; X64-SSE-NEXT:    movsd {{.*#+}} xmm4 = xmm2[0],xmm4[1]
-; X64-SSE-NEXT:    psrlq $63, %xmm4
-; X64-SSE-NEXT:    movsd {{.*#+}} xmm0 = xmm4[0],xmm0[1]
-; X64-SSE-NEXT:    movapd {{.*#+}} xmm2 = [1,9223372036854775808]
-; X64-SSE-NEXT:    xorpd %xmm2, %xmm0
-; X64-SSE-NEXT:    psubq %xmm2, %xmm0
+; X64-SSE-NEXT:    movdqa %xmm0, %xmm3
+; X64-SSE-NEXT:    psllq $63, %xmm3
 ; X64-SSE-NEXT:    psrlq $63, %xmm3
-; X64-SSE-NEXT:    movsd {{.*#+}} xmm1 = xmm3[0],xmm1[1]
-; X64-SSE-NEXT:    xorpd %xmm2, %xmm1
-; X64-SSE-NEXT:    psubq %xmm2, %xmm1
+; X64-SSE-NEXT:    movsd {{.*#+}} xmm0 = xmm3[0],xmm0[1]
+; X64-SSE-NEXT:    movapd {{.*#+}} xmm3 = [1,9223372036854775808]
+; X64-SSE-NEXT:    xorpd %xmm3, %xmm0
+; X64-SSE-NEXT:    psubq %xmm3, %xmm0
+; X64-SSE-NEXT:    psrlq $63, %xmm2
+; X64-SSE-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
+; X64-SSE-NEXT:    xorpd %xmm3, %xmm1
+; X64-SSE-NEXT:    psubq %xmm3, %xmm1
 ; X64-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
 ; X64-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
 ; X64-SSE-NEXT:    packssdw %xmm1, %xmm0
@@ -243,10 +233,8 @@ define <8 x i16> @trunc_ashr_v4i64_deman
 ; X64-AVX1-LABEL: trunc_ashr_v4i64_demandedelts:
 ; X64-AVX1:       # %bb.0:
 ; X64-AVX1-NEXT:    vpsllq $63, %xmm0, %xmm1
-; X64-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
 ; X64-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 ; X64-AVX1-NEXT:    vpsllq $63, %xmm2, %xmm3
-; X64-AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
 ; X64-AVX1-NEXT:    vpsrlq $63, %xmm3, %xmm3
 ; X64-AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
 ; X64-AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [1,9223372036854775808]

Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-v2i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-v2i32.ll?rev=349057&r1=349056&r2=349057&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-v2i32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-v2i32.ll Thu Dec 13 08:39:29 2018
@@ -595,8 +595,7 @@ define void @test_sdiv_pow2_v2i32(<2 x i
 ; X64-LABEL: test_sdiv_pow2_v2i32:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
-; X64-NEXT:    pxor %xmm1, %xmm1
-; X64-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,2,1]
 ; X64-NEXT:    psrad $31, %xmm1
 ; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
 ; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,1,1,3]
@@ -619,14 +618,13 @@ define void @test_sdiv_pow2_v2i32(<2 x i
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
-; X86-NEXT:    pxor %xmm1, %xmm1
-; X86-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; X86-NEXT:    psrad $31, %xmm1
-; X86-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
-; X86-NEXT:    movdqa %xmm0, %xmm1
-; X86-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; X86-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1,1,3]
+; X86-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
+; X86-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X86-NEXT:    psrad $31, %xmm0
+; X86-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,3,2,3]
+; X86-NEXT:    movdqa %xmm1, %xmm0
+; X86-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X86-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1,1,3]
 ; X86-NEXT:    movdqa {{.*#+}} xmm2 = [31,0,31,0]
 ; X86-NEXT:    movdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648]
 ; X86-NEXT:    movdqa %xmm3, %xmm4
@@ -635,22 +633,22 @@ define void @test_sdiv_pow2_v2i32(<2 x i
 ; X86-NEXT:    movd %ecx, %xmm5
 ; X86-NEXT:    psrlq %xmm5, %xmm3
 ; X86-NEXT:    movsd {{.*#+}} xmm3 = xmm4[0],xmm3[1]
-; X86-NEXT:    movdqa %xmm1, %xmm4
+; X86-NEXT:    movdqa %xmm0, %xmm4
 ; X86-NEXT:    psrlq %xmm2, %xmm4
-; X86-NEXT:    psrlq %xmm5, %xmm1
-; X86-NEXT:    movsd {{.*#+}} xmm1 = xmm4[0],xmm1[1]
-; X86-NEXT:    xorpd %xmm3, %xmm1
-; X86-NEXT:    psubq %xmm3, %xmm1
-; X86-NEXT:    pand {{\.LCPI.*}}, %xmm1
-; X86-NEXT:    psrlq $29, %xmm1
-; X86-NEXT:    paddq %xmm0, %xmm1
-; X86-NEXT:    psllq $32, %xmm1
-; X86-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,3,2,3]
-; X86-NEXT:    psrad $31, %xmm1
-; X86-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
-; X86-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X86-NEXT:    psrlq $3, %xmm0
-; X86-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X86-NEXT:    psrlq %xmm5, %xmm0
+; X86-NEXT:    movsd {{.*#+}} xmm0 = xmm4[0],xmm0[1]
+; X86-NEXT:    xorpd %xmm3, %xmm0
+; X86-NEXT:    psubq %xmm3, %xmm0
+; X86-NEXT:    pand {{\.LCPI.*}}, %xmm0
+; X86-NEXT:    psrlq $29, %xmm0
+; X86-NEXT:    paddq %xmm1, %xmm0
+; X86-NEXT:    psllq $32, %xmm0
+; X86-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
+; X86-NEXT:    psrad $31, %xmm0
+; X86-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
+; X86-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86-NEXT:    psrlq $3, %xmm1
+; X86-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
 ; X86-NEXT:    movq %xmm0, (%eax)
 ; X86-NEXT:    retl
 ;

Modified: llvm/trunk/test/CodeGen/X86/vector-reduce-smax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-reduce-smax.ll?rev=349057&r1=349056&r2=349057&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-reduce-smax.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-reduce-smax.ll Thu Dec 13 08:39:29 2018
@@ -720,11 +720,10 @@ define i32 @test_v2i32(<2 x i32> %a0) {
 ; SSE41-NEXT:    psllq $32, %xmm1
 ; SSE41-NEXT:    psrad $31, %xmm1
 ; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; SSE41-NEXT:    pxor %xmm2, %xmm2
-; SSE41-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSE41-NEXT:    psrad $31, %xmm2
-; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[2,2,3,3]
-; SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[0,2,2,3]
+; SSE41-NEXT:    psrad $31, %xmm3
+; SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
 ; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [2147483648,2147483648]
 ; SSE41-NEXT:    movdqa %xmm3, %xmm2
 ; SSE41-NEXT:    pxor %xmm0, %xmm2
@@ -746,11 +745,10 @@ define i32 @test_v2i32(<2 x i32> %a0) {
 ; AVX1-NEXT:    vpsllq $32, %xmm0, %xmm1
 ; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm1
 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; AVX1-NEXT:    vpsrad $31, %xmm2, %xmm2
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
-; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
 ; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
 ; AVX1-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vmovd %xmm0, %eax
@@ -761,11 +759,10 @@ define i32 @test_v2i32(<2 x i32> %a0) {
 ; AVX2-NEXT:    vpsllq $32, %xmm0, %xmm1
 ; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm1
 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; AVX2-NEXT:    vpsrad $31, %xmm2, %xmm2
-; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
-; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
 ; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
 ; AVX2-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vmovd %xmm0, %eax

Modified: llvm/trunk/test/CodeGen/X86/vector-reduce-smin.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-reduce-smin.ll?rev=349057&r1=349056&r2=349057&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-reduce-smin.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-reduce-smin.ll Thu Dec 13 08:39:29 2018
@@ -719,11 +719,10 @@ define i32 @test_v2i32(<2 x i32> %a0) {
 ; SSE41-NEXT:    psllq $32, %xmm1
 ; SSE41-NEXT:    psrad $31, %xmm1
 ; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; SSE41-NEXT:    pxor %xmm2, %xmm2
-; SSE41-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSE41-NEXT:    psrad $31, %xmm2
-; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[2,2,3,3]
-; SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[0,2,2,3]
+; SSE41-NEXT:    psrad $31, %xmm3
+; SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
 ; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [2147483648,2147483648]
 ; SSE41-NEXT:    movdqa %xmm1, %xmm2
 ; SSE41-NEXT:    pxor %xmm0, %xmm2
@@ -745,11 +744,10 @@ define i32 @test_v2i32(<2 x i32> %a0) {
 ; AVX1-NEXT:    vpsllq $32, %xmm0, %xmm1
 ; AVX1-NEXT:    vpsrad $31, %xmm1, %xmm1
 ; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; AVX1-NEXT:    vpsrad $31, %xmm2, %xmm2
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
-; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
 ; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
 ; AVX1-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vmovd %xmm0, %eax
@@ -760,11 +758,10 @@ define i32 @test_v2i32(<2 x i32> %a0) {
 ; AVX2-NEXT:    vpsllq $32, %xmm0, %xmm1
 ; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm1
 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
-; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; AVX2-NEXT:    vpsrad $31, %xmm2, %xmm2
-; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
-; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
 ; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
 ; AVX2-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vmovd %xmm0, %eax




More information about the llvm-commits mailing list