[llvm] r215797 - [DAGCombiner] Improve the folding of target independet shuffles to Undef.
Andrea Di Biagio
Andrea_DiBiagio at sn.scee.net
Fri Aug 15 17:29:45 PDT 2014
Author: adibiagio
Date: Fri Aug 15 19:29:44 2014
New Revision: 215797
URL: http://llvm.org/viewvc/llvm-project?rev=215797&view=rev
Log:
[DAGCombiner] Improve the folding of target independet shuffles to Undef.
When combining a pair of shuffle nodes, check if the combined shuffle mask is
trivially Undef. In case, immediately fold that pair of shuffles to Undef.
The lack of checks for undef masks was the root-cause of a poor-codegen bug
in the dag combiner.
Example:
%1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 1, i32 6>
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 6>
%3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 5, i32 3, i32 3>
Before this patch, on x86 (with -mcpu=corei7) we failed to fold the entire
sequence to Undef value and therefore we generated:
shufps $-123, %xmm1, $xmm0
pshufd $-46, %xmm0, %xmm0
With this patch, the entire shuffle sequence is folded to Undef and no
shuffles are generated in the output assembly.
Added new test cases to test 'combine-vec-shuffle-5.ll'.
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/X86/combine-vec-shuffle-5.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=215797&r1=215796&r2=215797&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Aug 15 19:29:44 2014
@@ -10787,6 +10787,14 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE
Idx = OtherSV->getMaskElt(Idx);
Mask.push_back(Idx);
}
+
+ // Check if all indices in Mask are Undef. In case, propagate Undef.
+ bool isUndefMask = true;
+ for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
+ isUndefMask &= Mask[i] < 0;
+
+ if (isUndefMask)
+ return DAG.getUNDEF(VT);
bool CommuteOperands = false;
if (N0.getOperand(1).getOpcode() != ISD::UNDEF) {
@@ -10932,6 +10940,14 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE
Mask.push_back(Idx);
}
+ // Check if all indices in Mask are Undef. In case, propagate Undef.
+ bool isUndefMask = true;
+ for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
+ isUndefMask &= Mask[i] < 0;
+
+ if (isUndefMask)
+ return DAG.getUNDEF(VT);
+
// Avoid introducing shuffles with illegal mask.
if (TLI.isShuffleMaskLegal(Mask, VT)) {
if (IsSV1Undef)
Modified: llvm/trunk/test/CodeGen/X86/combine-vec-shuffle-5.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-vec-shuffle-5.ll?rev=215797&r1=215796&r2=215797&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-vec-shuffle-5.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-vec-shuffle-5.ll Fri Aug 15 19:29:44 2014
@@ -255,3 +255,194 @@ define <4 x i8> @test4c(<4 x i8>* %a, <4
; CHECK: blendps $13
; CHECK: ret
+
+; Verify that the dag combiner correctly folds the following shuffle pairs to Undef.
+
+define <4 x i32> @test1b(<4 x i32> %A) {
+ %1 = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 5, i32 7>
+ %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
+ ret <4 x i32> %2
+}
+; CHECK-LABEL: test1b
+; CHECK-NOT: blendps
+; CHECK-NOT: pshufd
+; CHECK-NOT: movhlps
+; CHECK: ret
+
+define <4 x i32> @test2b(<4 x i32> %A) {
+ %1 = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> <i32 4, i32 5, i32 1, i32 6>
+ %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 6, i32 7>
+ ret <4 x i32> %2
+}
+; CHECK-LABEL: test2b
+; CHECK-NOT: blendps
+; CHECK-NOT: pshufd
+; CHECK-NOT: movhlps
+; CHECK: ret
+
+define <4 x i32> @test3b(<4 x i32> %A, <4 x i32> %B) {
+ %1 = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
+ %2 = shufflevector <4 x i32> %1, <4 x i32> %B, <4 x i32> <i32 1, i32 3, i32 1, i32 3>
+ ret <4 x i32> %2
+}
+; CHECK-LABEL: test3b
+; CHECK-NOT: blendps
+; CHECK-NOT: pshufd
+; CHECK-NOT: movhlps
+; CHECK: ret
+
+define <4 x i32> @test4b(<4 x i32> %A, <4 x i32> %B) {
+ %1 = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> <i32 4, i32 1, i32 1, i32 6>
+ %2 = shufflevector <4 x i32> %1, <4 x i32> %B, <4 x i32> <i32 0, i32 3, i32 3, i32 0>
+ ret <4 x i32> %2
+}
+; CHECK-LABEL: test4b
+; CHECK-NOT: blendps
+; CHECK-NOT: pshufd
+; CHECK-NOT: movhlps
+; CHECK: ret
+
+define <4 x i32> @test5b(<4 x i32> %A) {
+ %1 = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 5, i32 7>
+ %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
+ %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: test5b
+; CHECK-NOT: blendps
+; CHECK-NOT: pshufd
+; CHECK-NOT: movhlps
+; CHECK: ret
+
+define <4 x i32> @test6b(<4 x i32> %A) {
+ %1 = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> <i32 4, i32 5, i32 1, i32 6>
+ %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 6, i32 7>
+ %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: test6b
+; CHECK-NOT: blendps
+; CHECK-NOT: pshufd
+; CHECK-NOT: movhlps
+; CHECK: ret
+
+define <4 x i32> @test7b(<4 x i32> %A, <4 x i32> %B) {
+ %1 = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
+ %2 = shufflevector <4 x i32> %1, <4 x i32> %B, <4 x i32> <i32 1, i32 3, i32 1, i32 3>
+ %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 6>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: test7b
+; CHECK-NOT: blendps
+; CHECK-NOT: pshufd
+; CHECK-NOT: movhlps
+; CHECK: ret
+
+define <4 x i32> @test8b(<4 x i32> %A, <4 x i32> %B) {
+ %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 1, i32 6>
+ %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 6>
+ %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 5, i32 3, i32 3>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: test8b
+; CHECK-NOT: blendps
+; CHECK-NOT: pshufd
+; CHECK-NOT: movhlps
+; CHECK: ret
+
+define <4 x i32> @test9b(<4 x i32> %A, <4 x i32> %B) {
+ %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 1, i32 undef, i32 7>
+ %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 4, i32 2, i32 1>
+ %3 = shufflevector <4 x i32> %2, <4 x i32> %A, <4 x i32> <i32 2, i32 1, i32 1, i32 2>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: test9b
+; CHECK-NOT: blendps
+; CHECK-NOT: pshufd
+; CHECK-NOT: movhlps
+; CHECK: ret
+
+define <4 x i32> @test10b(<4 x i32> %A, <4 x i32> %B) {
+ %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 undef, i32 undef, i32 1, i32 6>
+ %2 = shufflevector <4 x i32> %1, <4 x i32> %A, <4 x i32> <i32 0, i32 6, i32 1, i32 0>
+ %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 2>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: test10b
+; CHECK-NOT: blendps
+; CHECK-NOT: pshufd
+; CHECK-NOT: movhlps
+; CHECK: ret
+
+define <4 x i32> @test11b(<4 x i32> %A, <4 x i32> %B) {
+ %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
+ %2 = shufflevector <4 x i32> %1, <4 x i32> %B, <4 x i32> <i32 1, i32 3, i32 1, i32 3>
+ %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 6>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: test11b
+; CHECK-NOT: blendps
+; CHECK-NOT: pshufd
+; CHECK-NOT: movhlps
+; CHECK: ret
+
+define <4 x i32> @test12b(<4 x i32> %A, <4 x i32> %B) {
+ %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 undef, i32 1, i32 1, i32 undef>
+ %2 = shufflevector <4 x i32> %1, <4 x i32> %B, <4 x i32> <i32 0, i32 3, i32 3, i32 0>
+ %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 5, i32 1, i32 4>
+ ret <4 x i32> %3
+}
+; CHECK-LABEL: test12b
+; CHECK-NOT: blendps
+; CHECK-NOT: pshufd
+; CHECK-NOT: movhlps
+; CHECK: ret
+
+define <8 x i32> @test13b(<8 x i32> %A, <8 x i32> %B) {
+ %1 = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 0, i32 undef, i32 1, i32 undef>
+ %2 = shufflevector <8 x i32> %1, <8 x i32> %B, <8 x i32> <i32 1, i32 3, i32 1, i32 3, i32 1, i32 3, i32 1, i32 3>
+ %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 0, i32 9, i32 1, i32 10, i32 0, i32 9, i32 1, i32 10>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: test13b
+; CHECK-NOT: blendps
+; CHECK-NOT: pshufd
+; CHECK-NOT: movhlps
+; CHECK: ret
+
+define <8 x i32> @test14b(<8 x i32> %A, <8 x i32> %B) {
+ %1 = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 undef, i32 1, i32 1, i32 undef, i32 undef, i32 1, i32 1, i32 undef>
+ %2 = shufflevector <8 x i32> %1, <8 x i32> %B, <8 x i32> <i32 0, i32 3, i32 3, i32 0, i32 0, i32 3, i32 3, i32 0>
+ %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 1, i32 9, i32 1, i32 8, i32 1, i32 9, i32 1, i32 8>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: test14b
+; CHECK-NOT: blendps
+; CHECK-NOT: pshufd
+; CHECK-NOT: movhlps
+; CHECK: ret
+
+define <8 x i32> @test15b(<8 x i32> %A, <8 x i32> %B) {
+ %1 = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 0, i32 1, i32 undef, i32 11, i32 0, i32 1, i32 undef, i32 11>
+ %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <8 x i32> <i32 8, i32 9, i32 2, i32 11, i32 8, i32 9, i32 2, i32 11>
+ %3 = shufflevector <8 x i32> %2, <8 x i32> %A, <8 x i32> <i32 2, i32 2, i32 undef, i32 2, i32 2, i32 2, i32 undef, i32 2>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: test15b
+; CHECK-NOT: blendps
+; CHECK-NOT: pshufd
+; CHECK-NOT: movhlps
+; CHECK: ret
+
+define <8 x i32> @test16b(<8 x i32> %A, <8 x i32> %B) {
+ %1 = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 undef, i32 undef, i32 1, i32 10, i32 undef, i32 undef, i32 1, i32 10>
+ %2 = shufflevector <8 x i32> %1, <8 x i32> %A, <8 x i32> <i32 0, i32 10, i32 2, i32 11, i32 0, i32 10, i32 2, i32 11>
+ %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 4, i32 9, i32 undef, i32 0, i32 4, i32 9, i32 undef, i32 0>
+ ret <8 x i32> %3
+}
+; CHECK-LABEL: test16b
+; CHECK-NOT: blendps
+; CHECK-NOT: pshufd
+; CHECK-NOT: movhlps
+; CHECK: ret
+
More information about the llvm-commits
mailing list