[llvm] r341762 - [SelectionDAG] enhance vector demanded elements to look at a vector select condition operand
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 9 07:13:22 PDT 2018
Author: spatel
Date: Sun Sep 9 07:13:22 2018
New Revision: 341762
URL: http://llvm.org/viewvc/llvm-project?rev=341762&view=rev
Log:
[SelectionDAG] enhance vector demanded elements to look at a vector select condition operand
This is the DAG equivalent of D51433.
If we know we're not using all vector lanes, use that knowledge to potentially simplify a vselect condition.
The reduction/horizontal tests show that we are eliminating AVX1 operations on the upper half of 256-bit
vectors because we don't need those anyway.
I'm not sure what the pr34592 test is showing. That's run with -O0; is SimplifyDemandedVectorElts supposed
to be running there?
Differential Revision: https://reviews.llvm.org/D51696
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/trunk/test/CodeGen/X86/horizontal-reduce-smax.ll
llvm/trunk/test/CodeGen/X86/horizontal-reduce-smin.ll
llvm/trunk/test/CodeGen/X86/horizontal-reduce-umax.ll
llvm/trunk/test/CodeGen/X86/horizontal-reduce-umin.ll
llvm/trunk/test/CodeGen/X86/pr34592.ll
llvm/trunk/test/CodeGen/X86/vector-reduce-smax.ll
llvm/trunk/test/CodeGen/X86/vector-reduce-smin.ll
llvm/trunk/test/CodeGen/X86/vector-reduce-umax.ll
llvm/trunk/test/CodeGen/X86/vector-reduce-umin.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp?rev=341762&r1=341761&r2=341762&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp Sun Sep 9 07:13:22 2018
@@ -1532,12 +1532,20 @@ bool TargetLowering::SimplifyDemandedVec
break;
}
case ISD::VSELECT: {
- APInt DemandedLHS(DemandedElts);
- APInt DemandedRHS(DemandedElts);
-
- // TODO - add support for constant vselect masks.
+ // Try to transform the select condition based on the current demanded
+ // elements.
+ // TODO: If a condition element is undef, we can choose from one arm of the
+ // select (and if one arm is undef, then we can propagate that to the
+ // result).
+ // TODO - add support for constant vselect masks (see IR version of this).
+ APInt UnusedUndef, UnusedZero;
+ if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef,
+ UnusedZero, TLO, Depth + 1))
+ return true;
// See if we can simplify either vselect operand.
+ APInt DemandedLHS(DemandedElts);
+ APInt DemandedRHS(DemandedElts);
APInt UndefLHS, ZeroLHS;
APInt UndefRHS, ZeroRHS;
if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS,
Modified: llvm/trunk/test/CodeGen/X86/horizontal-reduce-smax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/horizontal-reduce-smax.ll?rev=341762&r1=341761&r2=341762&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/horizontal-reduce-smax.ll (original)
+++ llvm/trunk/test/CodeGen/X86/horizontal-reduce-smax.ll Sun Sep 9 07:13:22 2018
@@ -469,9 +469,6 @@ define i64 @test_reduce_v4i64(<4 x i64>
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
-; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
-; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
@@ -548,9 +545,6 @@ define i64 @test_reduce_v4i64(<4 x i64>
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
-; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
-; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vmovq %xmm0, %rax
; X64-AVX1-NEXT: vzeroupper
@@ -1159,9 +1153,6 @@ define i64 @test_reduce_v8i64(<8 x i64>
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
-; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
-; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
@@ -1283,9 +1274,6 @@ define i64 @test_reduce_v8i64(<8 x i64>
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
-; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
-; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vmovq %xmm0, %rax
; X64-AVX1-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/horizontal-reduce-smin.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/horizontal-reduce-smin.ll?rev=341762&r1=341761&r2=341762&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/horizontal-reduce-smin.ll (original)
+++ llvm/trunk/test/CodeGen/X86/horizontal-reduce-smin.ll Sun Sep 9 07:13:22 2018
@@ -472,9 +472,6 @@ define i64 @test_reduce_v4i64(<4 x i64>
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
-; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
-; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
@@ -552,9 +549,6 @@ define i64 @test_reduce_v4i64(<4 x i64>
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
-; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
-; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vmovq %xmm0, %rax
; X64-AVX1-NEXT: vzeroupper
@@ -1163,9 +1157,6 @@ define i64 @test_reduce_v8i64(<8 x i64>
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
-; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
-; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
@@ -1287,9 +1278,6 @@ define i64 @test_reduce_v8i64(<8 x i64>
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
-; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
-; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vmovq %xmm0, %rax
; X64-AVX1-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/horizontal-reduce-umax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/horizontal-reduce-umax.ll?rev=341762&r1=341761&r2=341762&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/horizontal-reduce-umax.ll (original)
+++ llvm/trunk/test/CodeGen/X86/horizontal-reduce-umax.ll Sun Sep 9 07:13:22 2018
@@ -535,12 +535,8 @@ define i64 @test_reduce_v4i64(<4 x i64>
; X86-AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3
-; X86-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm4
-; X86-AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3
-; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
-; X86-AVX1-NEXT: vpxor %xmm2, %xmm4, %xmm2
-; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm2
-; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; X86-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2
+; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
@@ -631,12 +627,8 @@ define i64 @test_reduce_v4i64(<4 x i64>
; X64-AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X64-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3
-; X64-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm4
-; X64-AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3
-; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
-; X64-AVX1-NEXT: vpxor %xmm2, %xmm4, %xmm2
-; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm2
-; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; X64-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2
+; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vmovq %xmm0, %rax
; X64-AVX1-NEXT: vzeroupper
@@ -1270,12 +1262,8 @@ define i64 @test_reduce_v8i64(<8 x i64>
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2
-; X86-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm4
-; X86-AVX1-NEXT: vpcmpgtq %xmm4, %xmm2, %xmm2
-; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
-; X86-AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3
-; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
-; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; X86-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm3
+; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
@@ -1422,12 +1410,8 @@ define i64 @test_reduce_v8i64(<8 x i64>
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X64-AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2
-; X64-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm4
-; X64-AVX1-NEXT: vpcmpgtq %xmm4, %xmm2, %xmm2
-; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
-; X64-AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3
-; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
-; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; X64-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm3
+; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vmovq %xmm0, %rax
; X64-AVX1-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/horizontal-reduce-umin.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/horizontal-reduce-umin.ll?rev=341762&r1=341761&r2=341762&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/horizontal-reduce-umin.ll (original)
+++ llvm/trunk/test/CodeGen/X86/horizontal-reduce-umin.ll Sun Sep 9 07:13:22 2018
@@ -473,12 +473,8 @@ define i64 @test_reduce_v4i64(<4 x i64>
; X86-AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3
-; X86-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm4
-; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
-; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
-; X86-AVX1-NEXT: vpxor %xmm2, %xmm4, %xmm2
-; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm2
-; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; X86-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2
+; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
@@ -571,12 +567,8 @@ define i64 @test_reduce_v4i64(<4 x i64>
; X64-AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X64-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3
-; X64-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm4
-; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
-; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
-; X64-AVX1-NEXT: vpxor %xmm2, %xmm4, %xmm2
-; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm2
-; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; X64-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2
+; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vmovq %xmm0, %rax
; X64-AVX1-NEXT: vzeroupper
@@ -1172,12 +1164,8 @@ define i64 @test_reduce_v8i64(<8 x i64>
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X86-AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2
-; X86-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm4
-; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
-; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
-; X86-AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3
-; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
-; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; X86-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm3
+; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X86-AVX1-NEXT: vmovd %xmm0, %eax
; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
@@ -1326,12 +1314,8 @@ define i64 @test_reduce_v8i64(<8 x i64>
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; X64-AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2
-; X64-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm4
-; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
-; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
-; X64-AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3
-; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
-; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; X64-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm3
+; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; X64-AVX1-NEXT: vmovq %xmm0, %rax
; X64-AVX1-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/pr34592.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr34592.ll?rev=341762&r1=341761&r2=341762&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr34592.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr34592.ll Sun Sep 9 07:13:22 2018
@@ -19,31 +19,30 @@ define <16 x i64> @pluto(<16 x i64> %arg
; CHECK-NEXT: vmovaps 80(%rbp), %ymm13
; CHECK-NEXT: vmovaps 48(%rbp), %ymm14
; CHECK-NEXT: vmovaps 16(%rbp), %ymm15
-; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm6[0,1,2,3],ymm2[4,5,6,7]
+; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm6[0,1,2,3,4,5],ymm2[6,7]
; CHECK-NEXT: vxorps %xmm6, %xmm6, %xmm6
-; CHECK-NEXT: vmovaps %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1],ymm8[2,3,4,5,6,7]
-; CHECK-NEXT: vpblendd {{.*#+}} ymm6 = ymm6[0,1],ymm11[2,3,4,5,6,7]
+; CHECK-NEXT: vpblendd {{.*#+}} ymm11 = ymm6[0,1,2,3],ymm11[4,5],ymm6[6,7]
; CHECK-NEXT: # kill: def $xmm9 killed $xmm9 killed $ymm9
-; CHECK-NEXT: vmovdqa %xmm9, %xmm11
-; CHECK-NEXT: # kill: def $ymm11 killed $xmm11
-; CHECK-NEXT: vpalignr {{.*#+}} ymm6 = ymm2[8,9,10,11,12,13,14,15],ymm6[0,1,2,3,4,5,6,7],ymm2[24,25,26,27,28,29,30,31],ymm6[16,17,18,19,20,21,22,23]
-; CHECK-NEXT: vpermq {{.*#+}} ymm6 = ymm6[2,3,2,0]
+; CHECK-NEXT: vmovaps %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK-NEXT: vmovdqa %xmm9, %xmm0
+; CHECK-NEXT: # kill: def $ymm0 killed $xmm0
+; CHECK-NEXT: vpalignr {{.*#+}} ymm11 = ymm2[8,9,10,11,12,13,14,15],ymm11[0,1,2,3,4,5,6,7],ymm2[24,25,26,27,28,29,30,31],ymm11[16,17,18,19,20,21,22,23]
+; CHECK-NEXT: vpermq {{.*#+}} ymm11 = ymm11[2,3,2,0]
; CHECK-NEXT: vmovaps %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-NEXT: # implicit-def: $ymm0
; CHECK-NEXT: vinserti128 $1, %xmm9, %ymm0, %ymm0
-; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5],ymm6[6,7]
+; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm11[0,1,2,3],ymm0[4,5],ymm11[6,7]
; CHECK-NEXT: vmovaps %xmm2, %xmm9
; CHECK-NEXT: # implicit-def: $ymm2
; CHECK-NEXT: vinserti128 $1, %xmm9, %ymm2, %ymm2
-; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm6 # 32-byte Reload
-; CHECK-NEXT: vpunpcklqdq {{.*#+}} ymm6 = ymm7[0],ymm6[0],ymm7[2],ymm6[2]
+; CHECK-NEXT: vpblendd {{.*#+}} ymm6 = ymm7[0,1],ymm6[2,3],ymm7[4,5],ymm6[6,7]
; CHECK-NEXT: vpermq {{.*#+}} ymm6 = ymm6[2,1,2,3]
; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm6[0,1,2,3],ymm2[4,5,6,7]
; CHECK-NEXT: vmovaps %xmm7, %xmm9
; CHECK-NEXT: vpslldq {{.*#+}} xmm9 = zero,zero,zero,zero,zero,zero,zero,zero,xmm9[0,1,2,3,4,5,6,7]
; CHECK-NEXT: # implicit-def: $ymm6
; CHECK-NEXT: vmovaps %xmm9, %xmm6
+; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm11 # 32-byte Reload
; CHECK-NEXT: vpalignr {{.*#+}} ymm11 = ymm11[8,9,10,11,12,13,14,15],ymm5[0,1,2,3,4,5,6,7],ymm11[24,25,26,27,28,29,30,31],ymm5[16,17,18,19,20,21,22,23]
; CHECK-NEXT: vpermq {{.*#+}} ymm11 = ymm11[0,1,0,3]
; CHECK-NEXT: vpblendd {{.*#+}} ymm6 = ymm6[0,1,2,3],ymm11[4,5,6,7]
@@ -56,9 +55,9 @@ define <16 x i64> @pluto(<16 x i64> %arg
; CHECK-NEXT: vmovaps %ymm3, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-NEXT: vmovaps %ymm6, %ymm3
; CHECK-NEXT: vmovaps %ymm15, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; CHECK-NEXT: vmovaps %ymm12, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-NEXT: vmovaps %ymm13, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-NEXT: vmovaps %ymm10, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK-NEXT: vmovaps %ymm12, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-NEXT: vmovaps %ymm4, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-NEXT: vmovaps %ymm14, (%rsp) # 32-byte Spill
; CHECK-NEXT: movq %rbp, %rsp
Modified: llvm/trunk/test/CodeGen/X86/vector-reduce-smax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-reduce-smax.ll?rev=341762&r1=341761&r2=341762&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-reduce-smax.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-reduce-smax.ll Sun Sep 9 07:13:22 2018
@@ -158,9 +158,6 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper
@@ -343,9 +340,6 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper
@@ -645,9 +639,6 @@ define i64 @test_v16i64(<16 x i64> %a0)
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/vector-reduce-smin.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-reduce-smin.ll?rev=341762&r1=341761&r2=341762&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-reduce-smin.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-reduce-smin.ll Sun Sep 9 07:13:22 2018
@@ -157,9 +157,6 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper
@@ -342,9 +339,6 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper
@@ -644,9 +638,6 @@ define i64 @test_v16i64(<16 x i64> %a0)
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/vector-reduce-umax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-reduce-umax.ll?rev=341762&r1=341761&r2=341762&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-reduce-umax.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-reduce-umax.ll Sun Sep 9 07:13:22 2018
@@ -164,12 +164,8 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3
-; AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm4
-; AVX1-NEXT: vpcmpgtq %xmm4, %xmm3, %xmm3
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT: vpxor %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm2
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper
@@ -364,12 +360,8 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2
-; AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm4
-; AVX1-NEXT: vpcmpgtq %xmm4, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper
@@ -693,10 +685,6 @@ define i64 @test_v16i64(<16 x i64> %a0)
; AVX1-NEXT: vxorpd %xmm4, %xmm0, %xmm2
; AVX1-NEXT: vxorpd %xmm4, %xmm1, %xmm3
; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm3
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/vector-reduce-umin.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-reduce-umin.ll?rev=341762&r1=341761&r2=341762&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-reduce-umin.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-reduce-umin.ll Sun Sep 9 07:13:22 2018
@@ -163,12 +163,8 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX1-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3
-; AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm4
-; AVX1-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT: vpxor %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpcmpgtq %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2
+; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper
@@ -363,12 +359,8 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
; AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2
-; AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm4
-; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper
@@ -692,10 +684,6 @@ define i64 @test_v16i64(<16 x i64> %a0)
; AVX1-NEXT: vxorpd %xmm4, %xmm0, %xmm2
; AVX1-NEXT: vxorpd %xmm4, %xmm1, %xmm3
; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm3, %xmm0, %xmm3
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vzeroupper
More information about the llvm-commits
mailing list