[llvm] r344336 - [X86][SSE] Add extract_subvector(PSHUFB) -> PSHUFB(extract_subvector()) combine
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 12 05:10:34 PDT 2018
Author: rksimon
Date: Fri Oct 12 05:10:34 2018
New Revision: 344336
URL: http://llvm.org/viewvc/llvm-project?rev=344336&view=rev
Log:
[X86][SSE] Add extract_subvector(PSHUFB) -> PSHUFB(extract_subvector()) combine
Fixes PR32160 by reducing the size of PSHUFB if we only use one of the lanes.
This approach can probably be generalized to handle any target shuffle (and any subvector index) but we have no test coverage at the moment.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vector-trunc.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=344336&r1=344335&r2=344336&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Oct 12 05:10:34 2018
@@ -40306,6 +40306,18 @@ static SDValue combineExtractSubvector(S
: ISD::SIGN_EXTEND_VECTOR_INREG;
return DAG.getNode(ExtOp, SDLoc(N), OpVT, InVec.getOperand(0));
}
+ if (InOpcode == ISD::BITCAST) {
+ // TODO - do this for target shuffles in general.
+ SDValue InVecBC = peekThroughOneUseBitcasts(InVec);
+ if (InVecBC.getOpcode() == X86ISD::PSHUFB && OpVT.is128BitVector()) {
+ SDLoc DL(N);
+ SDValue SubPSHUFB =
+ DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8,
+ extract128BitVector(InVecBC.getOperand(0), 0, DAG, DL),
+ extract128BitVector(InVecBC.getOperand(1), 0, DAG, DL));
+ return DAG.getBitcast(OpVT, SubPSHUFB);
+ }
+ }
}
return SDValue();
Modified: llvm/trunk/test/CodeGen/X86/vector-trunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-trunc.ll?rev=344336&r1=344335&r2=344336&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-trunc.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-trunc.ll Fri Oct 12 05:10:34 2018
@@ -1922,16 +1922,14 @@ define <8 x i16> @PR32160(<8 x i32> %x)
;
; AVX2-SLOW-LABEL: PR32160:
; AVX2-SLOW: # %bb.0:
-; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
-; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,2,3,4,5,6,7]
+; AVX2-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
; AVX2-SLOW-NEXT: vpbroadcastd %xmm0, %xmm0
; AVX2-SLOW-NEXT: vzeroupper
; AVX2-SLOW-NEXT: retq
;
; AVX2-FAST-LABEL: PR32160:
; AVX2-FAST: # %bb.0:
-; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
-; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,4,5,4,5,4,5,4,5,4,5,4,5,4,5]
+; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,8,9,8,9,8,9,8,9]
; AVX2-FAST-NEXT: vzeroupper
; AVX2-FAST-NEXT: retq
;
More information about the llvm-commits
mailing list