[llvm] r325417 - [X86] Turn selects with constant condition into vector shuffles during DAG combine
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 16 16:30:30 PST 2018
Author: ctopper
Date: Fri Feb 16 16:30:30 2018
New Revision: 325417
URL: http://llvm.org/viewvc/llvm-project?rev=325417&view=rev
Log:
[X86] Turn selects with constant condition into vector shuffles during DAG combine
Summary:
Currently we convert to shuffles during lowering. This moves it to DAG combine so hopefully we can get it done before type legalization has to extend the condition.
I believe in some cases we're creating SHRUNKBLENDs that end up with constant conditions because we see the extended on the condition and think its a dynamic selelect before DAG combine gets a chance to constant fold the extend. We could add combines to turn SHRUNKBLENDs with constant condition back to vselect. But it seemed like it might be better to just send them to shuffles as early as possible so they never get a chance to become SHRUNKBLENDs. This the reason some tests went from blends controlled by a constant pool load to just move.
Some of the constant pool entries changed because the sign_extend introduced by type legalization turned undef elements in select condition into 0s. While the select->shuffle used -1 in the shuffle mask. So now the shuffle lowering can do what it wants with them.
I'll remove the lowering code as a follow up. We might be able to simplify some of the pre-checks for SHRUNKBLEND as the FIXME there says.
Reviewers: spatel, RKSimon, efriedma, zvi, andreadb
Reviewed By: spatel
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D43367
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/fold-vector-sext-crash.ll
llvm/trunk/test/CodeGen/X86/pr34592.ll
llvm/trunk/test/CodeGen/X86/vector-blend.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=325417&r1=325416&r2=325417&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Feb 16 16:30:30 2018
@@ -31543,6 +31543,23 @@ static SDValue combineSelect(SDNode *N,
EVT CondVT = Cond.getValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ // Convert vselects with constant condition into shuffles.
+ if (ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()) &&
+ DCI.isBeforeLegalizeOps()) {
+ SmallVector<int, 64> Mask(VT.getVectorNumElements(), -1);
+ for (int i = 0, Size = Mask.size(); i != Size; ++i) {
+ SDValue CondElt = Cond->getOperand(i);
+ Mask[i] = i;
+ // Arbitrarily choose from the 2nd operand if the select condition element
+ // is undef.
+ // TODO: Can we do better by matching patterns such as even/odd?
+ if (CondElt.isUndef() || isNullConstant(CondElt))
+ Mask[i] += Size;
+ }
+
+ return DAG.getVectorShuffle(VT, DL, LHS, RHS, Mask);
+ }
+
// If we have SSE[12] support, try to form min/max nodes. SSE min/max
// instructions match the semantics of the common C idiom x<y?x:y but not
// x<=y?x:y, because of how they handle negative zero (which can be
Modified: llvm/trunk/test/CodeGen/X86/fold-vector-sext-crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fold-vector-sext-crash.ll?rev=325417&r1=325416&r2=325417&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fold-vector-sext-crash.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fold-vector-sext-crash.ll Fri Feb 16 16:30:30 2018
@@ -9,9 +9,7 @@
define <4 x i64> @foo(<4 x i64> %A) {
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,4294967295,0,0,0,0]
-; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vblendvpd %ymm1, %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: vmovaps %xmm0, %xmm0
; CHECK-NEXT: retl
%1 = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i64> %A, <4 x i64><i64 undef, i64 undef, i64 0, i64 0>
ret <4 x i64> %1
Modified: llvm/trunk/test/CodeGen/X86/pr34592.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr34592.ll?rev=325417&r1=325416&r2=325417&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr34592.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr34592.ll Fri Feb 16 16:30:30 2018
@@ -10,7 +10,7 @@ define <16 x i64> @pluto(<16 x i64> %arg
; CHECK-NEXT: movq %rsp, %rbp
; CHECK-NEXT: .cfi_def_cfa_register %rbp
; CHECK-NEXT: andq $-32, %rsp
-; CHECK-NEXT: subq $288, %rsp # imm = 0x120
+; CHECK-NEXT: subq $320, %rsp # imm = 0x140
; CHECK-NEXT: vmovaps 240(%rbp), %ymm8
; CHECK-NEXT: vmovaps 208(%rbp), %ymm9
; CHECK-NEXT: vmovaps 176(%rbp), %ymm10
@@ -19,43 +19,43 @@ define <16 x i64> @pluto(<16 x i64> %arg
; CHECK-NEXT: vmovaps 80(%rbp), %ymm13
; CHECK-NEXT: vmovaps 48(%rbp), %ymm14
; CHECK-NEXT: vmovaps 16(%rbp), %ymm15
+; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm6[0,1,2,3],ymm2[4,5,6,7]
+; CHECK-NEXT: vxorps %xmm6, %xmm6, %xmm6
+; CHECK-NEXT: vpblendd {{.*#+}} ymm8 = ymm6[0,1],ymm8[2,3,4,5,6,7]
+; CHECK-NEXT: vpblendd {{.*#+}} ymm6 = ymm6[0,1],ymm11[2,3,4,5,6,7]
+; CHECK-NEXT: # kill: def $xmm9 killed $xmm9 killed $ymm9
+; CHECK-NEXT: vmovdqa %xmm9, %xmm11
+; CHECK-NEXT: # kill: def $ymm11 killed $xmm11
+; CHECK-NEXT: vpalignr {{.*#+}} ymm6 = ymm2[8,9,10,11,12,13,14,15],ymm6[0,1,2,3,4,5,6,7],ymm2[24,25,26,27,28,29,30,31],ymm6[16,17,18,19,20,21,22,23]
+; CHECK-NEXT: vpermq {{.*#+}} ymm6 = ymm6[2,3,2,0]
; CHECK-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill
-; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,18446744071562067968,18446744071562067968]
-; CHECK-NEXT: vblendvpd %ymm0, %ymm2, %ymm6, %ymm0
-; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; CHECK-NEXT: vpblendd {{.*#+}} ymm6 = ymm2[0,1],ymm13[2,3],ymm2[4,5,6,7]
-; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1],ymm8[2,3,4,5,6,7]
-; CHECK-NEXT: vmovaps {{.*#+}} ymm8 = [18446744071562067968,18446744071562067968,0,0]
-; CHECK-NEXT: vblendvpd %ymm8, %ymm9, %ymm6, %ymm6
-; CHECK-NEXT: vpblendd {{.*#+}} ymm8 = ymm15[0,1],ymm11[2,3,4,5,6,7]
-; CHECK-NEXT: vpalignr {{.*#+}} ymm8 = ymm0[8,9,10,11,12,13,14,15],ymm8[0,1,2,3,4,5,6,7],ymm0[24,25,26,27,28,29,30,31],ymm8[16,17,18,19,20,21,22,23]
-; CHECK-NEXT: vpermq {{.*#+}} ymm8 = ymm8[2,3,2,0]
-; CHECK-NEXT: vmovaps %xmm6, %xmm9
-; CHECK-NEXT: # implicit-def: $ymm11
-; CHECK-NEXT: vinserti128 $1, %xmm9, %ymm11, %ymm11
-; CHECK-NEXT: vpblendd {{.*#+}} ymm8 = ymm8[0,1,2,3],ymm11[4,5],ymm8[6,7]
-; CHECK-NEXT: vmovaps %xmm0, %xmm9
; CHECK-NEXT: # implicit-def: $ymm0
; CHECK-NEXT: vinserti128 $1, %xmm9, %ymm0, %ymm0
-; CHECK-NEXT: vpunpcklqdq {{.*#+}} ymm11 = ymm7[0],ymm2[0],ymm7[2],ymm2[2]
-; CHECK-NEXT: vpermq {{.*#+}} ymm11 = ymm11[2,1,2,3]
-; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm11[0,1,2,3],ymm0[4,5,6,7]
-; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm7[0,1],ymm2[2,3],ymm7[4,5,6,7]
-; CHECK-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,1,1,3]
-; CHECK-NEXT: vpshufd {{.*#+}} ymm11 = ymm5[0,1,0,1,4,5,4,5]
-; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3,4,5],ymm11[6,7]
-; CHECK-NEXT: vpalignr {{.*#+}} ymm5 = ymm6[8,9,10,11,12,13,14,15],ymm5[0,1,2,3,4,5,6,7],ymm6[24,25,26,27,28,29,30,31],ymm5[16,17,18,19,20,21,22,23]
-; CHECK-NEXT: vpermq {{.*#+}} ymm5 = ymm5[0,1,0,3]
-; CHECK-NEXT: vpslldq {{.*#+}} ymm6 = zero,zero,zero,zero,zero,zero,zero,zero,ymm7[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm7[16,17,18,19,20,21,22,23]
-; CHECK-NEXT: vpblendd {{.*#+}} ymm5 = ymm6[0,1,2,3],ymm5[4,5,6,7]
-; CHECK-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill
-; CHECK-NEXT: vmovaps %ymm8, %ymm0
+; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5],ymm6[6,7]
+; CHECK-NEXT: vmovaps %xmm2, %xmm9
+; CHECK-NEXT: # implicit-def: $ymm2
+; CHECK-NEXT: vinserti128 $1, %xmm9, %ymm2, %ymm2
+; CHECK-NEXT: vpunpcklqdq {{.*#+}} ymm6 = ymm7[0],ymm8[0],ymm7[2],ymm8[2]
+; CHECK-NEXT: vpermq {{.*#+}} ymm6 = ymm6[2,1,2,3]
+; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm6[0,1,2,3],ymm2[4,5,6,7]
+; CHECK-NEXT: vmovaps %xmm7, %xmm9
+; CHECK-NEXT: vpslldq {{.*#+}} xmm9 = zero,zero,zero,zero,zero,zero,zero,zero,xmm9[0,1,2,3,4,5,6,7]
+; CHECK-NEXT: # implicit-def: $ymm6
+; CHECK-NEXT: vmovaps %xmm9, %xmm6
+; CHECK-NEXT: vpalignr {{.*#+}} ymm11 = ymm11[8,9,10,11,12,13,14,15],ymm5[0,1,2,3,4,5,6,7],ymm11[24,25,26,27,28,29,30,31],ymm5[16,17,18,19,20,21,22,23]
+; CHECK-NEXT: vpermq {{.*#+}} ymm11 = ymm11[0,1,0,3]
+; CHECK-NEXT: vpblendd {{.*#+}} ymm6 = ymm6[0,1,2,3],ymm11[4,5,6,7]
+; CHECK-NEXT: vpblendd {{.*#+}} ymm7 = ymm7[0,1],ymm8[2,3],ymm7[4,5,6,7]
+; CHECK-NEXT: vpermq {{.*#+}} ymm7 = ymm7[2,1,1,3]
+; CHECK-NEXT: vpshufd {{.*#+}} ymm5 = ymm5[0,1,0,1,4,5,4,5]
+; CHECK-NEXT: vpblendd {{.*#+}} ymm5 = ymm7[0,1,2,3,4,5],ymm5[6,7]
; CHECK-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) # 32-byte Spill
-; CHECK-NEXT: vmovaps %ymm2, %ymm1
-; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %ymm2 # 32-byte Reload
+; CHECK-NEXT: vmovaps %ymm5, %ymm1
; CHECK-NEXT: vmovaps %ymm3, {{[0-9]+}}(%rsp) # 32-byte Spill
-; CHECK-NEXT: vmovaps %ymm5, %ymm3
+; CHECK-NEXT: vmovaps %ymm6, %ymm3
+; CHECK-NEXT: vmovaps %ymm15, {{[0-9]+}}(%rsp) # 32-byte Spill
; CHECK-NEXT: vmovaps %ymm10, {{[0-9]+}}(%rsp) # 32-byte Spill
+; CHECK-NEXT: vmovaps %ymm13, {{[0-9]+}}(%rsp) # 32-byte Spill
; CHECK-NEXT: vmovaps %ymm12, {{[0-9]+}}(%rsp) # 32-byte Spill
; CHECK-NEXT: vmovaps %ymm4, {{[0-9]+}}(%rsp) # 32-byte Spill
; CHECK-NEXT: vmovaps %ymm14, (%rsp) # 32-byte Spill
Modified: llvm/trunk/test/CodeGen/X86/vector-blend.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-blend.ll?rev=325417&r1=325416&r2=325417&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-blend.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-blend.ll Fri Feb 16 16:30:30 2018
@@ -253,15 +253,15 @@ define <16 x i8> @vsel_i8(<16 x i8> %v1,
; SSE41-LABEL: vsel_i8:
; SSE41: # %bb.0: # %entry
; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
-; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1
-; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
+; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2
+; SSE41-NEXT: movdqa %xmm2, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: vsel_i8:
; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
-; AVX-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
+; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
entry:
%vsel = select <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <16 x i8> %v1, <16 x i8> %v2
@@ -621,11 +621,10 @@ define <32 x i8> @constant_pblendvb_avx2
; SSE41-LABEL: constant_pblendvb_avx2:
; SSE41: # %bb.0: # %entry
; SSE41-NEXT: movdqa %xmm0, %xmm4
-; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0]
-; SSE41-NEXT: pblendvb %xmm0, %xmm4, %xmm2
-; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm3
-; SSE41-NEXT: movdqa %xmm2, %xmm0
-; SSE41-NEXT: movdqa %xmm3, %xmm1
+; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
+; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm4
+; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm1
+; SSE41-NEXT: movdqa %xmm4, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: constant_pblendvb_avx2:
@@ -638,8 +637,8 @@ define <32 x i8> @constant_pblendvb_avx2
;
; AVX2-LABEL: constant_pblendvb_avx2:
; AVX2: # %bb.0: # %entry
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
+; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
entry:
%select = select <32 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <32 x i8> %xyzw, <32 x i8> %abcd
More information about the llvm-commits
mailing list