[PATCH] D54836: [X86] Add DAG combine to combine a v8i32->v8i16 truncate with a packuswb that truncates v8i16->v8i8.

Mon Nov 26 14:16:59 PST 2018

craig.topper updated this revision to Diff 175340.
craig.topper added a comment.

Add a DAG combine to avoid this failing after r347593. Since we now have an AssertSExt and a stronger AssertZExt sandwiched around the truncate.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D54836/new/

https://reviews.llvm.org/D54836

Files:
  lib/CodeGen/SelectionDAG/DAGCombiner.cpp
  lib/Target/X86/X86ISelLowering.cpp
  test/CodeGen/X86/avx512-cvt-widen.ll


Index: test/CodeGen/X86/avx512-cvt-widen.ll
===================================================================

--- test/CodeGen/X86/avx512-cvt-widen.ll
+++ test/CodeGen/X86/avx512-cvt-widen.ll
@@ -505,16 +505,14 @@
 ; NOVL-LABEL: f64to8uc:
 ; NOVL:       # %bb.0:
 ; NOVL-NEXT:    vcvttpd2dq %zmm0, %ymm0
-; NOVL-NEXT:    vpmovdw %zmm0, %ymm0
-; NOVL-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
+; NOVL-NEXT:    vpmovdb %zmm0, %xmm0
 ; NOVL-NEXT:    vzeroupper
 ; NOVL-NEXT:    retq
 ;
 ; VL-LABEL: f64to8uc:
 ; VL:       # %bb.0:
 ; VL-NEXT:    vcvttpd2dq %zmm0, %ymm0
-; VL-NEXT:    vpmovdw %ymm0, %xmm0
-; VL-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
+; VL-NEXT:    vpmovdb %ymm0, %xmm0
 ; VL-NEXT:    vzeroupper
 ; VL-NEXT:    retq
   %res = fptoui <8 x double> %f to <8 x i8>
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -35378,6 +35378,25 @@
     return getConstVector(Bits, Undefs, VT.getSimpleVT(), DAG, SDLoc(N));
   }
 
+  // Try to combine a PACKUSWB implemented truncate with a regular truncate to
+  // create a larger truncate.
+  if (Subtarget.hasAVX512() && Opcode == X86ISD::PACKUS &&
+      N0.getOpcode() == ISD::TRUNCATE && N1.isUndef() && VT == MVT::v16i8 &&
+      N0.getOperand(0).getValueType() == MVT::v8i32) {
+
+    APInt ZeroMask = APInt::getHighBitsSet(16, 8);
+    if (DAG.MaskedValueIsZero(N0, ZeroMask)) {
+      if (Subtarget.hasVLX())
+        return DAG.getNode(X86ISD::VTRUNC, SDLoc(N), VT, N0.getOperand(0));
+
+      // Widen input to v16i32 so we can truncate that.
+      SDLoc dl(N);
+      SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i32,
+                                   N0.getOperand(0), DAG.getUNDEF(MVT::v8i32));
+      return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Concat);
+    }
+  }
+
   // Attempt to combine as shuffle.
   SDValue Op(N, 0);
   if (SDValue Res =
Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9079,6 +9079,26 @@
     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
   }
 
+  // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
+  // than X. Just move the AssertZext in front of the truncate and drop the
+  // AssertSExt.
+  if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
+      N0.getOperand(0).getOpcode() == ISD::AssertSext &&
+      Opcode == ISD::AssertZext) {
+    SDValue BigA = N0.getOperand(0);
+    EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
+    assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
+           "Asserting zero/sign-extended bits to a type larger than the "
+           "truncated destination does not provide information");
+
+    if (AssertVT.bitsLT(BigA_AssertVT)) {
+      SDLoc DL(N);
+      SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
+                                      BigA.getOperand(0), N1);
+      return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
+    }
+  }
+
   return SDValue();
 }
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D54836.175340.patch
Type: text/x-patch
Size: 3240 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20181126/98557efb/attachment.bin>