[llvm] r348910 - [X86] Combine vpmovdw+vpacksswb into vpmovdb.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 11 21:56:01 PST 2018
Author: ctopper
Date: Tue Dec 11 21:56:01 2018
New Revision: 348910
URL: http://llvm.org/viewvc/llvm-project?rev=348910&view=rev
Log:
[X86] Combine vpmovdw+vpacksswb into vpmovdb.
This is similar to the combine we already have for vpmovdw+vpackuswb.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-cvt-widen.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=348910&r1=348909&r2=348910&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Dec 11 21:56:01 2018
@@ -35455,6 +35455,8 @@ static SDValue combineVectorPack(SDNode
N1.getScalarValueSizeInBits() == SrcBitsPerElt &&
"Unexpected PACKSS/PACKUS input type");
+ bool IsSigned = (X86ISD::PACKSS == Opcode);
+
// Constant Folding.
APInt UndefElts0, UndefElts1;
SmallVector<APInt, 32> EltBits0, EltBits1;
@@ -35467,7 +35469,6 @@ static SDValue combineVectorPack(SDNode
unsigned NumSrcElts = NumDstElts / 2;
unsigned NumDstEltsPerLane = NumDstElts / NumLanes;
unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
- bool IsSigned = (X86ISD::PACKSS == Opcode);
APInt Undefs(NumDstElts, 0);
SmallVector<APInt, 32> Bits(NumDstElts, APInt::getNullValue(DstBitsPerElt));
@@ -35511,15 +35512,14 @@ static SDValue combineVectorPack(SDNode
return getConstVector(Bits, Undefs, VT.getSimpleVT(), DAG, SDLoc(N));
}
- // Try to combine a PACKUSWB implemented truncate with a regular truncate to
- // create a larger truncate.
- // TODO: Match PACKSSWB as well?
- if (Subtarget.hasAVX512() && Opcode == X86ISD::PACKUS &&
+ // Try to combine a PACKUSWB/PACKSSWB implemented truncate with a regular
+ // truncate to create a larger truncate.
+ if (Subtarget.hasAVX512() &&
N0.getOpcode() == ISD::TRUNCATE && N1.isUndef() && VT == MVT::v16i8 &&
N0.getOperand(0).getValueType() == MVT::v8i32) {
-
- APInt ZeroMask = APInt::getHighBitsSet(16, 8);
- if (DAG.MaskedValueIsZero(N0, ZeroMask)) {
+ if ((IsSigned && DAG.ComputeNumSignBits(N0) > 8) ||
+ (!IsSigned &&
+ DAG.MaskedValueIsZero(N0, APInt::getHighBitsSet(16, 8)))) {
if (Subtarget.hasVLX())
return DAG.getNode(X86ISD::VTRUNC, SDLoc(N), VT, N0.getOperand(0));
Modified: llvm/trunk/test/CodeGen/X86/avx512-cvt-widen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-cvt-widen.ll?rev=348910&r1=348909&r2=348910&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-cvt-widen.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-cvt-widen.ll Tue Dec 11 21:56:01 2018
@@ -648,16 +648,14 @@ define <8 x i8> @f64to8sc(<8 x double> %
; NOVL-LABEL: f64to8sc:
; NOVL: # %bb.0:
; NOVL-NEXT: vcvttpd2dq %zmm0, %ymm0
-; NOVL-NEXT: vpmovdw %zmm0, %ymm0
-; NOVL-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
+; NOVL-NEXT: vpmovdb %zmm0, %xmm0
; NOVL-NEXT: vzeroupper
; NOVL-NEXT: retq
;
; VL-LABEL: f64to8sc:
; VL: # %bb.0:
; VL-NEXT: vcvttpd2dq %zmm0, %ymm0
-; VL-NEXT: vpmovdw %ymm0, %xmm0
-; VL-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
+; VL-NEXT: vpmovdb %ymm0, %xmm0
; VL-NEXT: vzeroupper
; VL-NEXT: retq
%res = fptosi <8 x double> %f to <8 x i8>
More information about the llvm-commits
mailing list