[llvm] r374509 - [X86] Add a DAG combine to turn v16i16->v16i8 VTRUNCUS+store into a saturating truncating store.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 10 21:16:49 PDT 2019
Author: ctopper
Date: Thu Oct 10 21:16:49 2019
New Revision: 374509
URL: http://llvm.org/viewvc/llvm-project?rev=374509&view=rev
Log:
[X86] Add a DAG combine to turn v16i16->v16i8 VTRUNCUS+store into a saturating truncating store.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/min-legal-vector-width.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=374509&r1=374508&r2=374509&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Oct 10 21:16:49 2019
@@ -40448,6 +40448,19 @@ static SDValue combineStore(SDNode *N, S
MVT::v16i8, St->getMemOperand());
}
+ // Try to fold a vpmovuswb 256->128 into a truncating store.
+ // FIXME: Generalize this to other types.
+ // FIXME: Do the same for signed saturation.
+ if (!St->isTruncatingStore() && VT == MVT::v16i8 &&
+ St->getValue().getOpcode() == X86ISD::VTRUNCUS &&
+ St->getValue().getOperand(0).getValueType() == MVT::v16i16 &&
+ TLI.isTruncStoreLegal(MVT::v16i16, MVT::v16i8) &&
+ St->getValue().hasOneUse()) {
+ return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(),
+ dl, St->getValue().getOperand(0), St->getBasePtr(),
+ MVT::v16i8, St->getMemOperand(), DAG);
+ }
+
// Optimize trunc store (of multiple scalars) to shuffle and store.
// First, pack all of the elements in one place. Next, store to memory
// in fewer chunks.
Modified: llvm/trunk/test/CodeGen/X86/min-legal-vector-width.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/min-legal-vector-width.ll?rev=374509&r1=374508&r2=374509&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/min-legal-vector-width.ll (original)
+++ llvm/trunk/test/CodeGen/X86/min-legal-vector-width.ll Thu Oct 10 21:16:49 2019
@@ -1104,8 +1104,7 @@ define void @trunc_packus_v16i32_v16i8_s
; CHECK-NEXT: vmovdqa (%rdi), %ymm0
; CHECK-NEXT: vpackusdw 32(%rdi), %ymm0, %ymm0
; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
-; CHECK-NEXT: vpmovuswb %ymm0, %xmm0
-; CHECK-NEXT: vmovdqa %xmm0, (%rsi)
+; CHECK-NEXT: vpmovuswb %ymm0, (%rsi)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%a = load <16 x i32>, <16 x i32>* %p
More information about the llvm-commits
mailing list