[llvm] r274097 - [DAGCombine] Teach DAG combine to handle ORs of shuffles involving zero vectors where the zero vector is the first operand to the shuffle instead of the second.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 28 20:29:12 PDT 2016
Author: ctopper
Date: Tue Jun 28 22:29:12 2016
New Revision: 274097
URL: http://llvm.org/viewvc/llvm-project?rev=274097&view=rev
Log:
[DAGCombine] Teach DAG combine to handle ORs of shuffles involving zero vectors where the zero vector is the first operand to the shuffle instead of the second.
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/X86/combine-or.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=274097&r1=274096&r2=274097&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Jun 28 22:29:12 2016
@@ -3751,59 +3751,66 @@ SDValue DAGCombiner::visitOR(SDNode *N)
N1.getValueType().getScalarType().getSizeInBits()),
SDLoc(N), N1.getValueType());
- // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1)
- // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2)
+ // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
// Do this only if the resulting shuffle is legal.
if (isa<ShuffleVectorSDNode>(N0) &&
isa<ShuffleVectorSDNode>(N1) &&
// Avoid folding a node with illegal type.
- TLI.isTypeLegal(VT) &&
- N0->getOperand(1) == N1->getOperand(1) &&
- ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) {
- bool CanFold = true;
- unsigned NumElts = VT.getVectorNumElements();
- const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
- const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
- // We construct two shuffle masks:
- // - Mask1 is a shuffle mask for a shuffle with N0 as the first operand
- // and N1 as the second operand.
- // - Mask2 is a shuffle mask for a shuffle with N1 as the first operand
- // and N0 as the second operand.
- // We do this because OR is commutable and therefore there might be
- // two ways to fold this node into a shuffle.
- SmallVector<int,4> Mask1;
- SmallVector<int,4> Mask2;
-
- for (unsigned i = 0; i != NumElts && CanFold; ++i) {
- int M0 = SV0->getMaskElt(i);
- int M1 = SV1->getMaskElt(i);
-
- // Both shuffle indexes are undef. Propagate Undef.
- if (M0 < 0 && M1 < 0) {
- Mask1.push_back(M0);
- Mask2.push_back(M0);
- continue;
+ TLI.isTypeLegal(VT)) {
+ bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
+ bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
+ bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
+ bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
+ // Ensure both shuffles have a zero input.
+ if ((ZeroN00 || ZeroN01) && (ZeroN10 || ZeroN11)) {
+ assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
+ assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
+ const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
+ const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
+ bool CanFold = true;
+ int NumElts = VT.getVectorNumElements();
+ SmallVector<int, 4> Mask(NumElts);
+
+ for (int i = 0; i != NumElts; ++i) {
+ int M0 = SV0->getMaskElt(i);
+ int M1 = SV1->getMaskElt(i);
+
+ // Both shuffle indexes are undef. Propagate Undef.
+ if (M0 < 0 && M1 < 0) {
+ Mask[i] = -1;
+ continue;
+ }
+
+ // Determine if either index is pointing to a zero vector.
+ bool M0Zero = M0 >= 0 && (ZeroN00 == (M0 < NumElts));
+ bool M1Zero = M1 >= 0 && (ZeroN10 == (M1 < NumElts));
+ if (M0Zero == M1Zero) {
+ CanFold = false;
+ break;
+ }
+
+ // We have a zero and non-zero element. If the non-zero came from
+ // SV0 make the index a LHS index. If it came from SV1, make it
+ // a RHS index. We need to mod by NumElts because we don't care
+ // which operand it came from in the original shuffles.
+ Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
}
- if (M0 < 0 || M1 < 0 ||
- (M0 < (int)NumElts && M1 < (int)NumElts) ||
- (M0 >= (int)NumElts && M1 >= (int)NumElts)) {
- CanFold = false;
- break;
+ if (CanFold) {
+ SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
+ SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
+
+ bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
+ if (!LegalMask) {
+ std::swap(NewLHS, NewRHS);
+ ShuffleVectorSDNode::commuteMask(Mask);
+ LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
+ }
+
+ if (LegalMask)
+ return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS,
+ NewRHS, &Mask[0]);
}
-
- Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts);
- Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts);
- }
-
- if (CanFold) {
- // Fold this sequence only if the resulting shuffle is 'legal'.
- if (TLI.isShuffleMaskLegal(Mask1, VT))
- return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0),
- N1->getOperand(0), &Mask1[0]);
- if (TLI.isShuffleMaskLegal(Mask2, VT))
- return DAG.getVectorShuffle(VT, SDLoc(N), N1->getOperand(0),
- N0->getOperand(0), &Mask2[0]);
}
}
}
Modified: llvm/trunk/test/CodeGen/X86/combine-or.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-or.ll?rev=274097&r1=274096&r2=274097&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-or.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-or.ll Tue Jun 28 22:29:12 2016
@@ -361,10 +361,7 @@ define <4 x i8> @test_crash(<4 x i8> %a,
define <4 x i32> @test2b(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test2b:
; CHECK: # BB#0:
-; CHECK-NEXT: pxor %xmm2, %xmm2
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
-; CHECK-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero
-; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32><i32 0, i32 0, i32 6, i32 7>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
@@ -375,10 +372,7 @@ define <4 x i32> @test2b(<4 x i32> %a, <
define <4 x i32> @test2c(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test2c:
; CHECK: # BB#0:
-; CHECK-NEXT: pxor %xmm2, %xmm2
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
-; CHECK-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero
-; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32><i32 0, i32 0, i32 6, i32 7>
%shuf2 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %b, <4 x i32><i32 4, i32 5, i32 0, i32 0>
@@ -390,10 +384,7 @@ define <4 x i32> @test2c(<4 x i32> %a, <
define <4 x i32> @test2d(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: test2d:
; CHECK: # BB#0:
-; CHECK-NEXT: pxor %xmm2, %xmm2
-; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
-; CHECK-NEXT: movq {{.*#+}} xmm1 = xmm1[0],zero
-; CHECK-NEXT: por %xmm1, %xmm0
+; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; CHECK-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
%shuf2 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %b, <4 x i32><i32 4, i32 5, i32 0, i32 0>
More information about the llvm-commits
mailing list