[llvm] r348910 - [X86] Combine vpmovdw+vpacksswb into vpmovdb.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 11 21:56:01 PST 2018


Author: ctopper
Date: Tue Dec 11 21:56:01 2018
New Revision: 348910

URL: http://llvm.org/viewvc/llvm-project?rev=348910&view=rev
Log:
[X86] Combine vpmovdw+vpacksswb into vpmovdb.

This is similar to the combine we already have for vpmovdw+vpackuswb.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/avx512-cvt-widen.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=348910&r1=348909&r2=348910&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Dec 11 21:56:01 2018
@@ -35455,6 +35455,8 @@ static SDValue combineVectorPack(SDNode
          N1.getScalarValueSizeInBits() == SrcBitsPerElt &&
          "Unexpected PACKSS/PACKUS input type");
 
+  bool IsSigned = (X86ISD::PACKSS == Opcode);
+
   // Constant Folding.
   APInt UndefElts0, UndefElts1;
   SmallVector<APInt, 32> EltBits0, EltBits1;
@@ -35467,7 +35469,6 @@ static SDValue combineVectorPack(SDNode
     unsigned NumSrcElts = NumDstElts / 2;
     unsigned NumDstEltsPerLane = NumDstElts / NumLanes;
     unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
-    bool IsSigned = (X86ISD::PACKSS == Opcode);
 
     APInt Undefs(NumDstElts, 0);
     SmallVector<APInt, 32> Bits(NumDstElts, APInt::getNullValue(DstBitsPerElt));
@@ -35511,15 +35512,14 @@ static SDValue combineVectorPack(SDNode
     return getConstVector(Bits, Undefs, VT.getSimpleVT(), DAG, SDLoc(N));
   }
 
-  // Try to combine a PACKUSWB implemented truncate with a regular truncate to
-  // create a larger truncate.
-  // TODO: Match PACKSSWB as well?
-  if (Subtarget.hasAVX512() && Opcode == X86ISD::PACKUS &&
+  // Try to combine a PACKUSWB/PACKSSWB implemented truncate with a regular
+  // truncate to create a larger truncate.
+  if (Subtarget.hasAVX512() &&
       N0.getOpcode() == ISD::TRUNCATE && N1.isUndef() && VT == MVT::v16i8 &&
       N0.getOperand(0).getValueType() == MVT::v8i32) {
-
-    APInt ZeroMask = APInt::getHighBitsSet(16, 8);
-    if (DAG.MaskedValueIsZero(N0, ZeroMask)) {
+    if ((IsSigned && DAG.ComputeNumSignBits(N0) > 8) ||
+        (!IsSigned &&
+         DAG.MaskedValueIsZero(N0, APInt::getHighBitsSet(16, 8)))) {
       if (Subtarget.hasVLX())
         return DAG.getNode(X86ISD::VTRUNC, SDLoc(N), VT, N0.getOperand(0));
 

Modified: llvm/trunk/test/CodeGen/X86/avx512-cvt-widen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-cvt-widen.ll?rev=348910&r1=348909&r2=348910&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-cvt-widen.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-cvt-widen.ll Tue Dec 11 21:56:01 2018
@@ -648,16 +648,14 @@ define <8 x i8> @f64to8sc(<8 x double> %
 ; NOVL-LABEL: f64to8sc:
 ; NOVL:       # %bb.0:
 ; NOVL-NEXT:    vcvttpd2dq %zmm0, %ymm0
-; NOVL-NEXT:    vpmovdw %zmm0, %ymm0
-; NOVL-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
+; NOVL-NEXT:    vpmovdb %zmm0, %xmm0
 ; NOVL-NEXT:    vzeroupper
 ; NOVL-NEXT:    retq
 ;
 ; VL-LABEL: f64to8sc:
 ; VL:       # %bb.0:
 ; VL-NEXT:    vcvttpd2dq %zmm0, %ymm0
-; VL-NEXT:    vpmovdw %ymm0, %xmm0
-; VL-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
+; VL-NEXT:    vpmovdb %ymm0, %xmm0
 ; VL-NEXT:    vzeroupper
 ; VL-NEXT:    retq
   %res = fptosi <8 x double> %f to <8 x i8>




More information about the llvm-commits mailing list