[llvm] r258645 - [X86][SSE] Generalised TRUNC -> PACKSS/PACKUS code. NFC.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Jan 23 14:02:48 PST 2016
Author: rksimon
Date: Sat Jan 23 16:02:48 2016
New Revision: 258645
URL: http://llvm.org/viewvc/llvm-project?rev=258645&view=rev
Log:
[X86][SSE] Generalised TRUNC -> PACKSS/PACKUS code. NFC.
Generalised mask generation / subvector extraction to use the input/output types directly instead of an if/else through all the currently accepted types.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=258645&r1=258644&r2=258645&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Jan 23 16:02:48 2016
@@ -27190,13 +27190,11 @@ combineVectorTruncationWithPACKUS(SDNode
// First, use mask to unset all bits that won't appear in the result.
assert((OutSVT == MVT::i8 || OutSVT == MVT::i16) &&
"OutSVT can only be either i8 or i16.");
- SDValue MaskVal =
- DAG.getConstant(OutSVT == MVT::i8 ? 0xFF : 0xFFFF, DL, InSVT);
- SDValue MaskVec = DAG.getNode(
- ISD::BUILD_VECTOR, DL, InVT,
- SmallVector<SDValue, 8>(InVT.getVectorNumElements(), MaskVal));
+ APInt Mask =
+ APInt::getLowBitsSet(InSVT.getSizeInBits(), OutSVT.getSizeInBits());
+ SDValue MaskVal = DAG.getConstant(Mask, DL, InVT);
for (auto &Reg : Regs)
- Reg = DAG.getNode(ISD::AND, DL, InVT, MaskVec, Reg);
+ Reg = DAG.getNode(ISD::AND, DL, InVT, MaskVal, Reg);
MVT UnpackedVT, PackedVT;
if (OutSVT == MVT::i8) {
@@ -27300,17 +27298,14 @@ static SDValue combineVectorTruncation(S
// Split a long vector into vectors of legal type.
unsigned RegNum = InVT.getSizeInBits() / 128;
SmallVector<SDValue, 8> SubVec(RegNum);
- if (InSVT == MVT::i32) {
- for (unsigned i = 0; i < RegNum; i++)
- SubVec[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
- DAG.getIntPtrConstant(i * 4, DL));
- } else {
- for (unsigned i = 0; i < RegNum; i++)
- SubVec[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
- DAG.getIntPtrConstant(i * 2, DL));
- }
+ unsigned NumSubRegElts = 128 / InSVT.getSizeInBits();
+ EVT SubRegVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumSubRegElts);
+
+ for (unsigned i = 0; i < RegNum; i++)
+ SubVec[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubRegVT, In,
+ DAG.getIntPtrConstant(i * NumSubRegElts, DL));
- // SSE2 provides PACKUS for only 2 x v8i16 -> v16i8 and SSE4.1 provides PAKCUS
+ // SSE2 provides PACKUS for only 2 x v8i16 -> v16i8 and SSE4.1 provides PACKUS
// for 2 x v4i32 -> v8i16. For SSSE3 and below, we need to use PACKSS to
// truncate 2 x v4i32 to v8i16.
if (Subtarget->hasSSE41() || OutSVT == MVT::i8)
More information about the llvm-commits
mailing list