[llvm] r357258 - [DAGCombiner] simplify shuffle of shuffle
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 29 07:20:38 PDT 2019
Author: spatel
Date: Fri Mar 29 07:20:38 2019
New Revision: 357258
URL: http://llvm.org/viewvc/llvm-project?rev=357258&view=rev
Log:
[DAGCombiner] simplify shuffle of shuffle
After investigating the examples from D59777 targeting an SSE4.1 machine,
it looks like a very different problem due to how we map illegal types (256-bit in these cases).
We're missing a shuffle simplification that maps elements of a vector back to a shuffled operand.
We have a more general version of this transform in DAGCombiner::visitVECTOR_SHUFFLE(), but that
generality means it is limited to patterns with a one-use constraint, and the examples here have
2 uses. We don't need any uses or legality limitations for a simplification (no new value is
created).
It looks like we miss this pattern in IR too.
In one of the zext examples here, we have shuffle masks like this:
Shuf0 = vector_shuffle<0,u,3,7,0,u,3,7>
Shuf = vector_shuffle<4,u,6,7,u,u,u,u>
...so that's moving the high half of the 1st vector into the low half. But the high half of the
1st vector is already identical to the low half.
Differential Revision: https://reviews.llvm.org/D59961
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/X86/vector-zext.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=357258&r1=357257&r2=357258&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Mar 29 07:20:38 2019
@@ -17976,6 +17976,34 @@ static SDValue replaceShuffleOfInsert(Sh
Op1, Op0.getOperand(1), NewInsIndex);
}
+/// If we have a unary shuffle of a shuffle, see if it can be folded away
+/// completely. This has the potential to lose undef knowledge because the first
+/// shuffle may not have an undef mask element where the second one does. So
+/// only call this after doing simplifications based on demanded elements.
+static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
+ // shuf (shuf0 X, Y, Mask0), undef, Mask
+ auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
+ if (!Shuf0 || !Shuf->getOperand(1).isUndef())
+ return SDValue();
+
+ ArrayRef<int> Mask = Shuf->getMask();
+ ArrayRef<int> Mask0 = Shuf0->getMask();
+ for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
+ // Ignore undef elements.
+ if (Mask[i] == -1)
+ continue;
+ assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
+
+ // Is the element of the shuffle operand chosen by this shuffle the same as
+ // the element chosen by the shuffle operand itself?
+ if (Mask0[Mask[i]] != Mask0[i])
+ return SDValue();
+ }
+ // Every element of this shuffle is identical to the result of the previous
+ // shuffle, so we can replace this value.
+ return Shuf->getOperand(0);
+}
+
SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
EVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
@@ -18086,6 +18114,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE
if (SimplifyDemandedVectorElts(SDValue(N, 0)))
return SDValue(N, 0);
+ // This is intentionally placed after demanded elements simplification because
+ // it could eliminate knowledge of undef elements created by this shuffle.
+ if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
+ return ShufOp;
+
// Match shuffles that can be converted to any_vector_extend_in_reg.
if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
return V;
Modified: llvm/trunk/test/CodeGen/X86/vector-zext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-zext.ll?rev=357258&r1=357257&r2=357258&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-zext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-zext.ll Fri Mar 29 07:20:38 2019
@@ -2617,32 +2617,23 @@ define <4 x i64> @splatshuf_zext_v4i64(<
define <8 x i32> @splatshuf_zext_v8i32_matching_undefs(<8 x i16> %x) {
; SSE2-LABEL: splatshuf_zext_v8i32_matching_undefs:
; SSE2: # %bb.0:
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,0,3,4,5,6,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,1,3]
-; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,3,4,5,6,7]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,5,7]
-; SSE2-NEXT: pxor %xmm2, %xmm2
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,7,7]
+; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: retq
;
; SSSE3-LABEL: splatshuf_zext_v8i32_matching_undefs:
; SSSE3: # %bb.0:
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[u,u],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero
; SSSE3-NEXT: movdqa %xmm0, %xmm1
-; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,6,7,6,7,14,15,0,1,6,7,6,7,14,15]
-; SSSE3-NEXT: pxor %xmm2, %xmm2
-; SSSE3-NEXT: movdqa %xmm1, %xmm0
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: splatshuf_zext_v8i32_matching_undefs:
; SSE41: # %bb.0:
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,6,7,14,15,0,1,6,7,6,7,14,15]
-; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,6,7,6,7,14,15,8,9,10,11,12,13,14,15]
+; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: retq
;
; AVX1-LABEL: splatshuf_zext_v8i32_matching_undefs:
@@ -2671,31 +2662,25 @@ define <8 x i32> @splatshuf_zext_v8i32_u
; SSE2-LABEL: splatshuf_zext_v8i32_unmatched_undef:
; SSE2: # %bb.0:
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,5,7]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,0]
-; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,6,5,5,4]
-; SSE2-NEXT: pxor %xmm2, %xmm2
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,7]
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,3,2,4,5,6,7]
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: retq
;
; SSSE3-LABEL: splatshuf_zext_v8i32_unmatched_undef:
; SSSE3: # %bb.0:
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[2,3],zero,zero,xmm0[6,7],zero,zero,xmm0[14,15],zero,zero
; SSSE3-NEXT: movdqa %xmm0, %xmm1
-; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15]
-; SSSE3-NEXT: pxor %xmm2, %xmm2
-; SSSE3-NEXT: movdqa %xmm1, %xmm0
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: splatshuf_zext_v8i32_unmatched_undef:
; SSE41: # %bb.0:
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,0,1,6,7,6,7,14,15]
-; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,14,15,14,15,6,7,12,13,14,15]
+; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: retq
;
; AVX1-LABEL: splatshuf_zext_v8i32_unmatched_undef:
More information about the llvm-commits
mailing list