[PATCH] D33994: [DAGCombiner] Add another combine from build vector to shuffle
Guy Blank via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 7 08:24:16 PDT 2017
guyblank created this revision.
Herald added subscribers: kristof.beyls, javed.absar, aemerson.
The patch adds support for combining a build vector to a shuffle.
When the build vector is of extracted elements from 2 vectors (vec1, vec2) where vec2 is 2 times smaller than vec1. Without the patch this would generate a bunch of extract/insert.
Not sure if the changed AArch64 test got better or worse though...
https://reviews.llvm.org/D33994
Files:
lib/CodeGen/SelectionDAG/DAGCombiner.cpp
test/CodeGen/AArch64/arm64-neon-copy.ll
test/CodeGen/X86/vector-shuffle-v48.ll
Index: test/CodeGen/X86/vector-shuffle-v48.ll
===================================================================
--- test/CodeGen/X86/vector-shuffle-v48.ll
+++ test/CodeGen/X86/vector-shuffle-v48.ll
@@ -3,26 +3,18 @@
define <16 x i8> @foo(<48 x i8>* %x0, <16 x i32> %x1, <16 x i32> %x2) {
; CHECK-LABEL: foo:
; CHECK: # BB#0:
-; CHECK-NEXT: vmovdqu (%rdi), %ymm4
-; CHECK-NEXT: vmovdqu 32(%rdi), %xmm5
-; CHECK-NEXT: vpextrb $13, %xmm5, %eax
-; CHECK-NEXT: vpextrb $10, %xmm5, %ecx
-; CHECK-NEXT: vpextrb $7, %xmm5, %edx
-; CHECK-NEXT: vpextrb $4, %xmm5, %esi
-; CHECK-NEXT: vpextrb $1, %xmm5, %edi
-; CHECK-NEXT: vextracti128 $1, %ymm4, %xmm5
-; CHECK-NEXT: vpshufb {{.*#+}} xmm6 = xmm5[2,2,5,5,5,5,3,3,4,4,5,5,6,6,7,7]
-; CHECK-NEXT: vpshufb {{.*#+}} xmm7 = xmm4[12,12,13,13,15,15,15,15,12,12,13,13,14,14,15,15]
-; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm6 = xmm7[0],xmm6[0]
-; CHECK-NEXT: vpshufb {{.*#+}} xmm4 = xmm4[0,0,1,1,3,3,3,3,6,6,9,9,9,9,7,7]
-; CHECK-NEXT: vinserti128 $1, %xmm6, %ymm4, %ymm4
+; CHECK-NEXT: vmovdqu 32(%rdi), %xmm8
+; CHECK-NEXT: vmovdqu (%rdi), %ymm5
+; CHECK-NEXT: vextracti128 $1, %ymm5, %xmm6
+; CHECK-NEXT: vpshufb {{.*#+}} xmm7 = xmm6[2,2,5,5,5,5,3,3,4,4,5,5,6,6,7,7]
+; CHECK-NEXT: vpshufb {{.*#+}} xmm4 = xmm5[12,12,13,13,15,15,15,15,12,12,13,13,14,14,15,15]
+; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm7[0]
+; CHECK-NEXT: vpshufb {{.*#+}} xmm5 = xmm5[0,0,1,1,3,3,3,3,6,6,9,9,9,9,7,7]
+; CHECK-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm4
; CHECK-NEXT: vpand {{.*}}(%rip), %ymm4, %ymm4
-; CHECK-NEXT: vpshufb {{.*#+}} xmm5 = xmm5[8,11,14],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; CHECK-NEXT: vpinsrb $3, %edi, %xmm5, %xmm5
-; CHECK-NEXT: vpinsrb $4, %esi, %xmm5, %xmm5
-; CHECK-NEXT: vpinsrb $5, %edx, %xmm5, %xmm5
-; CHECK-NEXT: vpinsrb $6, %ecx, %xmm5, %xmm5
-; CHECK-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5
+; CHECK-NEXT: vpshufb {{.*#+}} xmm5 = zero,zero,zero,xmm8[1,4,7,10,13,u,u,u,u,u,u,u,u]
+; CHECK-NEXT: vpshufb {{.*#+}} xmm6 = xmm6[8,11,14],zero,zero,zero,zero,zero,xmm6[u,u,u,u,u,u,u,u]
+; CHECK-NEXT: vpor %xmm6, %xmm5, %xmm5
; CHECK-NEXT: vpmovzxbd {{.*#+}} ymm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero,xmm5[4],zero,zero,zero,xmm5[5],zero,zero,zero,xmm5[6],zero,zero,zero,xmm5[7],zero,zero,zero
; CHECK-NEXT: vpmulld %ymm0, %ymm4, %ymm0
; CHECK-NEXT: vpmulld %ymm1, %ymm5, %ymm1
Index: test/CodeGen/AArch64/arm64-neon-copy.ll
===================================================================
--- test/CodeGen/AArch64/arm64-neon-copy.ll
+++ test/CodeGen/AArch64/arm64-neon-copy.ll
@@ -1378,7 +1378,7 @@
define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 {
; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64:
-; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
entry:
%vecext = extractelement <2 x i64> %x, i32 0
%vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14007,6 +14007,11 @@
// when we start sorting the vectors by type.
return SDValue();
}
+ } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
+ InVT1.getSizeInBits() == VT.getSizeInBits()) {
+ SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
+ ConcatOps[0] = VecIn2;
+ VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
} else {
// TODO: Support cases where the length mismatch isn't exactly by a
// factor of 2.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D33994.101739.patch
Type: text/x-patch
Size: 3842 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170607/e0475302/attachment.bin>
More information about the llvm-commits
mailing list