[llvm] r218112 - [x86] Teach the new vector shuffle lowering to also use pmovzx for v4i32
Chandler Carruth
chandlerc at gmail.com
Fri Sep 19 01:37:44 PDT 2014
Author: chandlerc
Date: Fri Sep 19 03:37:44 2014
New Revision: 218112
URL: http://llvm.org/viewvc/llvm-project?rev=218112&view=rev
Log:
[x86] Teach the new vector shuffle lowering to also use pmovzx for v4i32
shuffles that are zext-ing.
Not a lot to see here; the undef lane variant is better handled with
pshufd, but this improves the actual zext pattern.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=218112&r1=218111&r2=218112&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Sep 19 03:37:44 2014
@@ -7841,6 +7841,13 @@ static SDValue lowerV4I32VectorShuffle(S
getV4X86ShuffleImm8ForMask(Mask, DAG));
}
+ // Whenever we can lower this as a zext, that instruction is strictly faster
+ // than any alternative.
+ if (Subtarget->hasSSE41())
+ if (SDValue ZExt =
+ lowerVectorShuffleAsZeroExtend(DL, MVT::v4i32, V1, V2, Mask, DAG))
+ return ZExt;
+
// Use dedicated unpack instructions for masks that match their pattern.
if (isShuffleEquivalent(Mask, 0, 4, 1, 5))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V1, V2);
@@ -8517,7 +8524,6 @@ static SDValue lowerV8I16VectorShuffle(S
OrigMask, DAG))
return ZExt;
-
auto isV1 = [](int M) { return M >= 0 && M < 8; };
auto isV2 = [](int M) { return M >= 8; };
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll?rev=218112&r1=218111&r2=218112&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll Fri Sep 19 03:37:44 2014
@@ -765,3 +765,47 @@ define <4 x i32> @shuffle_v4i32_3456(<4
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
ret <4 x i32> %shuffle
}
+
+define <4 x i32> @shuffle_v4i32_0u1u(<4 x i32> %a, <4 x i32> %b) {
+; ALL-LABEL: @shuffle_v4i32_0u1u
+; ALL: # BB#0:
+; ALL-NEXT: pshufd {{.*}} # xmm0 = xmm0[0,0,1,1]
+; ALL-NEXT: retq
+ %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
+ ret <4 x i32> %shuffle
+}
+
+define <4 x i32> @shuffle_v4i32_0z1z(<4 x i32> %a) {
+; SSE2-LABEL: @shuffle_v4i32_0z1z
+; SSE2: # BB#0:
+; SSE2-NEXT: xorps %[[X:xmm[0-9]+]], %[[X]]
+; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,1],[[X]][1,3]
+; SSE2-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2,1,3]
+; SSE2-NEXT: retq
+;
+; SSE3-LABEL: @shuffle_v4i32_0z1z
+; SSE3: # BB#0:
+; SSE3-NEXT: xorps %[[X:xmm[0-9]+]], %[[X]]
+; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,1],[[X]][1,3]
+; SSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2,1,3]
+; SSE3-NEXT: retq
+;
+; SSSE3-LABEL: @shuffle_v4i32_0z1z
+; SSSE3: # BB#0:
+; SSSE3-NEXT: xorps %[[X:xmm[0-9]+]], %[[X]]
+; SSSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,1],[[X]][1,3]
+; SSSE3-NEXT: shufps {{.*}} # xmm0 = xmm0[0,2,1,3]
+; SSSE3-NEXT: retq
+;
+; SSE41-LABEL: @shuffle_v4i32_0z1z
+; SSE41: # BB#0:
+; SSE41-NEXT: pmovzxdq %xmm0, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: @shuffle_v4i32_0z1z
+; AVX1: # BB#0:
+; AVX1-NEXT: vpmovzxdq %xmm0, %xmm0
+; AVX1-NEXT: retq
+ %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
+ ret <4 x i32> %shuffle
+}
More information about the llvm-commits
mailing list