[llvm] r307268 - [X86][SSE4A] Add support for shuffle combining to INSERTQI.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 6 08:34:17 PDT 2017
Author: rksimon
Date: Thu Jul 6 08:34:17 2017
New Revision: 307268
URL: http://llvm.org/viewvc/llvm-project?rev=307268&view=rev
Log:
[X86][SSE4A] Add support for shuffle combining to INSERTQI.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=307268&r1=307267&r2=307268&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Jul 6 08:34:17 2017
@@ -27714,6 +27714,22 @@ static bool combineX86ShuffleChain(Array
/*AddTo*/ true);
return true;
}
+
+ if (matchVectorShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) {
+ if (Depth == 1 && Root.getOpcode() == X86ISD::INSERTQI)
+ return false; // Nothing to do!
+ V1 = DAG.getBitcast(IntMaskVT, V1);
+ DCI.AddToWorklist(V1.getNode());
+ V2 = DAG.getBitcast(IntMaskVT, V2);
+ DCI.AddToWorklist(V2.getNode());
+ Res = DAG.getNode(X86ISD::INSERTQI, DL, IntMaskVT, V1, V2,
+ DAG.getConstant(BitLen, DL, MVT::i8),
+ DAG.getConstant(BitIdx, DL, MVT::i8));
+ DCI.AddToWorklist(Res.getNode());
+ DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
+ /*AddTo*/ true);
+ return true;
+ }
}
// Don't try to re-form single instruction chains under any circumstances now
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll?rev=307268&r1=307267&r2=307268&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-sse4a.ll Thu Jul 6 08:34:17 2017
@@ -75,19 +75,10 @@ define <8 x i16> @combine_insertqi_pshuf
}
define <16 x i8> @combine_pshufb_insertqi_pshufb(<16 x i8> %a0, <16 x i8> %a1) {
-; SSE-LABEL: combine_pshufb_insertqi_pshufb:
-; SSE: # BB#0:
-; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,u,u,u,u,u,u,u,u]
-; SSE-NEXT: insertq {{.*#+}} xmm0 = xmm0[0],xmm1[0,1],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u]
-; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[7,1,2,4,3,u,u,0,u,u,u,u,u,u,u,u]
-; SSE-NEXT: retq
-;
-; AVX-LABEL: combine_pshufb_insertqi_pshufb:
-; AVX: # BB#0:
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,6,5,4,3,2,1,0,u,u,u,u,u,u,u,u]
-; AVX-NEXT: insertq {{.*#+}} xmm0 = xmm0[0],xmm1[0,1],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u]
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[7,1,2,4,3,u,u,0,u,u,u,u,u,u,u,u]
-; AVX-NEXT: retq
+; ALL-LABEL: combine_pshufb_insertqi_pshufb:
+; ALL: # BB#0:
+; ALL-NEXT: insertq {{.*#+}} xmm0 = xmm0[0],xmm1[0,1],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u]
+; ALL-NEXT: retq
%1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
%2 = shufflevector <16 x i8> %1, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 17, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%3 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %2, <16 x i8> <i8 7, i8 1, i8 2, i8 4, i8 3, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>)
More information about the llvm-commits
mailing list