[llvm] r374615 - [X86] Fold a VTRUNCS/VTRUNCUS+store into a saturating truncating store.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 11 17:01:08 PDT 2019


Author: ctopper
Date: Fri Oct 11 17:01:08 2019
New Revision: 374615

URL: http://llvm.org/viewvc/llvm-project?rev=374615&view=rev
Log:
[X86] Fold a VTRUNCS/VTRUNCUS+store into a saturating truncating store.

We already did this for VTRUNCUS with a specific combination of
types. This extends this to VTRUNCS and handles any types where
a truncating store is legal.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/avx512-trunc.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=374615&r1=374614&r2=374615&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Oct 11 17:01:08 2019
@@ -40332,11 +40332,11 @@ static SDValue combineStore(SDNode *N, S
                             TargetLowering::DAGCombinerInfo &DCI,
                             const X86Subtarget &Subtarget) {
   StoreSDNode *St = cast<StoreSDNode>(N);
-  EVT VT = St->getValue().getValueType();
   EVT StVT = St->getMemoryVT();
   SDLoc dl(St);
   unsigned Alignment = St->getAlignment();
-  SDValue StoredVal = St->getOperand(1);
+  SDValue StoredVal = St->getValue();
+  EVT VT = StoredVal.getValueType();
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
   // Convert a store of vXi1 into a store of iX and a bitcast.
@@ -40453,17 +40453,15 @@ static SDValue combineStore(SDNode *N, S
                              MVT::v16i8, St->getMemOperand());
   }
 
-  // Try to fold a vpmovuswb 256->128 into a truncating store.
-  // FIXME: Generalize this to other types.
-  // FIXME: Do the same for signed saturation.
-  if (!St->isTruncatingStore() && VT == MVT::v16i8 &&
-      St->getValue().getOpcode() == X86ISD::VTRUNCUS &&
-      St->getValue().getOperand(0).getValueType() == MVT::v16i16 &&
-      TLI.isTruncStoreLegal(MVT::v16i16, MVT::v16i8) &&
-      St->getValue().hasOneUse()) {
-    return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(),
-                           dl, St->getValue().getOperand(0), St->getBasePtr(),
-                           MVT::v16i8, St->getMemOperand(), DAG);
+  // Try to fold a VTRUNCUS or VTRUNCS into a truncating store.
+  if (!St->isTruncatingStore() && StoredVal.hasOneUse() &&
+      (StoredVal.getOpcode() == X86ISD::VTRUNCUS ||
+       StoredVal.getOpcode() == X86ISD::VTRUNCS) &&
+      TLI.isTruncStoreLegal(StoredVal.getOperand(0).getValueType(), VT)) {
+    bool IsSigned = StoredVal.getOpcode() == X86ISD::VTRUNCS;
+    return EmitTruncSStore(IsSigned, St->getChain(),
+                           dl, StoredVal.getOperand(0), St->getBasePtr(),
+                           VT, St->getMemOperand(), DAG);
   }
 
   // Optimize trunc store (of multiple scalars) to shuffle and store.

Modified: llvm/trunk/test/CodeGen/X86/avx512-trunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-trunc.ll?rev=374615&r1=374614&r2=374615&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-trunc.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-trunc.ll Fri Oct 11 17:01:08 2019
@@ -690,10 +690,8 @@ define <32 x i8> @usat_trunc_db_1024(<32
 define void @usat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
 ; ALL-LABEL: usat_trunc_db_1024_mem:
 ; ALL:       ## %bb.0:
-; ALL-NEXT:    vpmovusdb %zmm0, %xmm0
-; ALL-NEXT:    vpmovusdb %zmm1, %xmm1
-; ALL-NEXT:    vmovdqu %xmm1, 16(%rdi)
-; ALL-NEXT:    vmovdqu %xmm0, (%rdi)
+; ALL-NEXT:    vpmovusdb %zmm1, 16(%rdi)
+; ALL-NEXT:    vpmovusdb %zmm0, (%rdi)
 ; ALL-NEXT:    vzeroupper
 ; ALL-NEXT:    retq
   %x3 = icmp ult <32 x i32> %i, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
@@ -957,12 +955,10 @@ define void @smax_usat_trunc_db_1024_mem
 ; ALL-LABEL: smax_usat_trunc_db_1024_mem:
 ; ALL:       ## %bb.0:
 ; ALL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; ALL-NEXT:    vpmaxsd %zmm2, %zmm1, %zmm1
 ; ALL-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm0
-; ALL-NEXT:    vpmovusdb %zmm0, %xmm0
-; ALL-NEXT:    vpmovusdb %zmm1, %xmm1
-; ALL-NEXT:    vmovdqu %xmm1, 16(%rdi)
-; ALL-NEXT:    vmovdqu %xmm0, (%rdi)
+; ALL-NEXT:    vpmaxsd %zmm2, %zmm1, %zmm1
+; ALL-NEXT:    vpmovusdb %zmm1, 16(%rdi)
+; ALL-NEXT:    vpmovusdb %zmm0, (%rdi)
 ; ALL-NEXT:    vzeroupper
 ; ALL-NEXT:    retq
   %x1 = icmp sgt <32 x i32> %i, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -1048,10 +1044,8 @@ define void @negative_test2_smax_usat_tr
 define void @ssat_trunc_db_1024_mem(<32 x i32> %i, <32 x i8>* %p) {
 ; ALL-LABEL: ssat_trunc_db_1024_mem:
 ; ALL:       ## %bb.0:
-; ALL-NEXT:    vpmovsdb %zmm0, %xmm0
-; ALL-NEXT:    vpmovsdb %zmm1, %xmm1
-; ALL-NEXT:    vmovdqu %xmm1, 16(%rdi)
-; ALL-NEXT:    vmovdqu %xmm0, (%rdi)
+; ALL-NEXT:    vpmovsdb %zmm1, 16(%rdi)
+; ALL-NEXT:    vpmovsdb %zmm0, (%rdi)
 ; ALL-NEXT:    vzeroupper
 ; ALL-NEXT:    retq
   %x1 = icmp sgt <32 x i32> %i, <i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32 -128, i32




More information about the llvm-commits mailing list