[llvm] r222090 - [DAG] Improved target independent vector shuffle folding logic.
Andrea Di Biagio
Andrea_DiBiagio at sn.scee.net
Sat Nov 15 14:56:26 PST 2014
Author: adibiagio
Date: Sat Nov 15 16:56:25 2014
New Revision: 222090
URL: http://llvm.org/viewvc/llvm-project?rev=222090&view=rev
Log:
[DAG] Improved target independent vector shuffle folding logic.
This patch teaches the DAGCombiner how to combine shuffles according to rules:
shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(B, A, M2)
shuffle(shuffle(A, B, M0), B, M1) -> shuffle(B, A, M2)
shuffle(shuffle(A, B, M0), A, M1) -> shuffle(B, A, M2)
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=222090&r1=222089&r2=222090&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Sat Nov 15 16:56:25 2014
@@ -11239,6 +11239,26 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE
return DAG.getVectorShuffle(VT, SDLoc(N), SV0, N1, &Mask[0]);
return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]);
}
+
+ // Compute the commuted shuffle mask.
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int idx = Mask[i];
+ if (idx < 0)
+ continue;
+ else if (idx < (int)NumElts)
+ Mask[i] = idx + NumElts;
+ else
+ Mask[i] = idx - NumElts;
+ }
+
+ if (TLI.isShuffleMaskLegal(Mask, VT)) {
+ if (IsSV1Undef)
+ // shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(B, A, M2)
+ return DAG.getVectorShuffle(VT, SDLoc(N), N1, SV0, &Mask[0]);
+ // shuffle(shuffle(A, B, M0), B, M1) -> shuffle(B, A, M2)
+ // shuffle(shuffle(A, B, M0), A, M1) -> shuffle(B, A, M2)
+ return DAG.getVectorShuffle(VT, SDLoc(N), SV1, SV0, &Mask[0]);
+ }
}
return SDValue();
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll?rev=222090&r1=222089&r2=222090&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining.ll Sat Nov 15 16:56:25 2014
@@ -1146,18 +1146,14 @@ define <4 x float> @combine_test1(<4 x f
define <4 x float> @combine_test2(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: combine_test2:
; SSE2: # BB#0:
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
-; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
+; SSE2-NEXT: movss %xmm0, %xmm1
+; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_test2:
; SSSE3: # BB#0:
-; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
-; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
-; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
-; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
+; SSSE3-NEXT: movss %xmm0, %xmm1
+; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_test2:
@@ -1268,18 +1264,14 @@ define <4 x i32> @combine_test6(<4 x i32
define <4 x i32> @combine_test7(<4 x i32> %a, <4 x i32> %b) {
; SSE2-LABEL: combine_test7:
; SSE2: # BB#0:
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
-; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
+; SSE2-NEXT: movss %xmm0, %xmm1
+; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_test7:
; SSSE3: # BB#0:
-; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
-; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
-; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
-; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
+; SSSE3-NEXT: movss %xmm0, %xmm1
+; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_test7:
@@ -1385,14 +1377,12 @@ define <4 x float> @combine_test12(<4 x
; SSE2-LABEL: combine_test12:
; SSE2: # BB#0:
; SSE2-NEXT: movss %xmm0, %xmm1
-; SSE2-NEXT: movss %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_test12:
; SSSE3: # BB#0:
; SSSE3-NEXT: movss %xmm0, %xmm1
-; SSSE3-NEXT: movss %xmm0, %xmm1
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
@@ -1486,14 +1476,12 @@ define <4 x i32> @combine_test17(<4 x i3
; SSE2-LABEL: combine_test17:
; SSE2: # BB#0:
; SSE2-NEXT: movss %xmm0, %xmm1
-; SSE2-NEXT: movss %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_test17:
; SSSE3: # BB#0:
; SSSE3-NEXT: movss %xmm0, %xmm1
-; SSSE3-NEXT: movss %xmm0, %xmm1
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
@@ -1700,30 +1688,24 @@ define <4 x float> @combine_test4b(<4 x
define <4 x i8> @combine_test1c(<4 x i8>* %a, <4 x i8>* %b) {
; SSE2-LABEL: combine_test1c:
; SSE2: # BB#0:
-; SSE2-NEXT: movd (%rdi), %xmm0
-; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT: movd (%rsi), %xmm1
+; SSE2-NEXT: movd (%rdi), %xmm1
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
-; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
-; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
+; SSE2-NEXT: movd (%rsi), %xmm0
+; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT: movss %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_test1c:
; SSSE3: # BB#0:
-; SSSE3-NEXT: movd (%rdi), %xmm0
-; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSSE3-NEXT: movd (%rsi), %xmm1
+; SSSE3-NEXT: movd (%rdi), %xmm1
; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
-; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
-; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
-; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
+; SSSE3-NEXT: movd (%rsi), %xmm0
+; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT: movss %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: combine_test1c:
@@ -1984,19 +1966,13 @@ define <4 x float> @combine_blend_02(<4
define <4 x float> @combine_blend_123(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: combine_blend_123:
; SSE2: # BB#0:
-; SSE2-NEXT: movaps %xmm1, %xmm2
-; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,0],xmm0[0,0]
-; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm0[2,3]
-; SSE2-NEXT: movsd %xmm2, %xmm1
+; SSE2-NEXT: movss %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: combine_blend_123:
; SSSE3: # BB#0:
-; SSSE3-NEXT: movaps %xmm1, %xmm2
-; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,0],xmm0[0,0]
-; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm0[2,3]
-; SSSE3-NEXT: movsd %xmm2, %xmm1
+; SSSE3-NEXT: movss %xmm0, %xmm1
; SSSE3-NEXT: movaps %xmm1, %xmm0
; SSSE3-NEXT: retq
;
More information about the llvm-commits
mailing list