[llvm] r259995 - [X86][SSE] Refactored PMOVZX shuffle decoding to use scalar input types
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 6 08:33:42 PST 2016
Author: rksimon
Date: Sat Feb 6 10:33:42 2016
New Revision: 259995
URL: http://llvm.org/viewvc/llvm-project?rev=259995&view=rev
Log:
[X86][SSE] Refactored PMOVZX shuffle decoding to use scalar input types
First step towards being able to decode AVX512 PMOVZX instructions without a massive bloat in the shuffle decode switch statement.
This should also make it easier to decode X86ISD::VZEXT target shuffles in the future.
Modified:
llvm/trunk/lib/Target/X86/InstPrinter/X86InstComments.cpp
llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp
llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h
Modified: llvm/trunk/lib/Target/X86/InstPrinter/X86InstComments.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/InstPrinter/X86InstComments.cpp?rev=259995&r1=259994&r2=259995&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/InstPrinter/X86InstComments.cpp (original)
+++ llvm/trunk/lib/Target/X86/InstPrinter/X86InstComments.cpp Sat Feb 6 10:33:42 2016
@@ -41,89 +41,53 @@ static MVT getRegOperandVectorVT(const M
getVectorRegSize(OpReg)/ScalarVT.getSizeInBits());
}
-/// \brief Extracts the src/dst types for a given zero extension instruction.
-/// \note While the number of elements in DstVT type correct, the
-/// number in the SrcVT type is expanded to fill the src xmm register and the
-/// upper elements may not be included in the dst xmm/ymm register.
-static void getZeroExtensionTypes(const MCInst *MI, MVT &SrcVT, MVT &DstVT) {
+/// \brief Extracts the dst type for a given zero extension instruction.
+static MVT getZeroExtensionResultType(const MCInst *MI) {
switch (MI->getOpcode()) {
default:
llvm_unreachable("Unknown zero extension instruction");
- // i8 zero extension
+ // zero extension to i16
case X86::PMOVZXBWrm:
case X86::PMOVZXBWrr:
case X86::VPMOVZXBWrm:
case X86::VPMOVZXBWrr:
- SrcVT = MVT::v16i8;
- DstVT = MVT::v8i16;
- break;
case X86::VPMOVZXBWYrm:
case X86::VPMOVZXBWYrr:
- SrcVT = MVT::v16i8;
- DstVT = MVT::v16i16;
- break;
+ return getRegOperandVectorVT(MI, MVT::i16, 0);
+ // zero extension to i32
case X86::PMOVZXBDrm:
case X86::PMOVZXBDrr:
case X86::VPMOVZXBDrm:
case X86::VPMOVZXBDrr:
- SrcVT = MVT::v16i8;
- DstVT = MVT::v4i32;
- break;
case X86::VPMOVZXBDYrm:
case X86::VPMOVZXBDYrr:
- SrcVT = MVT::v16i8;
- DstVT = MVT::v8i32;
- break;
- case X86::PMOVZXBQrm:
- case X86::PMOVZXBQrr:
- case X86::VPMOVZXBQrm:
- case X86::VPMOVZXBQrr:
- SrcVT = MVT::v16i8;
- DstVT = MVT::v2i64;
- break;
- case X86::VPMOVZXBQYrm:
- case X86::VPMOVZXBQYrr:
- SrcVT = MVT::v16i8;
- DstVT = MVT::v4i64;
- break;
- // i16 zero extension
case X86::PMOVZXWDrm:
case X86::PMOVZXWDrr:
case X86::VPMOVZXWDrm:
case X86::VPMOVZXWDrr:
- SrcVT = MVT::v8i16;
- DstVT = MVT::v4i32;
- break;
case X86::VPMOVZXWDYrm:
case X86::VPMOVZXWDYrr:
- SrcVT = MVT::v8i16;
- DstVT = MVT::v8i32;
- break;
+ return getRegOperandVectorVT(MI, MVT::i32, 0);
+ // zero extension to i64
+ case X86::PMOVZXBQrm:
+ case X86::PMOVZXBQrr:
+ case X86::VPMOVZXBQrm:
+ case X86::VPMOVZXBQrr:
+ case X86::VPMOVZXBQYrm:
+ case X86::VPMOVZXBQYrr:
case X86::PMOVZXWQrm:
case X86::PMOVZXWQrr:
case X86::VPMOVZXWQrm:
case X86::VPMOVZXWQrr:
- SrcVT = MVT::v8i16;
- DstVT = MVT::v2i64;
- break;
case X86::VPMOVZXWQYrm:
case X86::VPMOVZXWQYrr:
- SrcVT = MVT::v8i16;
- DstVT = MVT::v4i64;
- break;
- // i32 zero extension
case X86::PMOVZXDQrm:
case X86::PMOVZXDQrr:
case X86::VPMOVZXDQrm:
case X86::VPMOVZXDQrr:
- SrcVT = MVT::v4i32;
- DstVT = MVT::v2i64;
- break;
case X86::VPMOVZXDQYrm:
case X86::VPMOVZXDQYrr:
- SrcVT = MVT::v4i32;
- DstVT = MVT::v4i64;
- break;
+ return getRegOperandVectorVT(MI, MVT::i64, 0);
}
}
@@ -728,46 +692,56 @@ bool llvm::EmitAnyX86InstComments(const
case X86::PMOVZXBWrr:
case X86::PMOVZXBDrr:
case X86::PMOVZXBQrr:
- case X86::PMOVZXWDrr:
- case X86::PMOVZXWQrr:
- case X86::PMOVZXDQrr:
case X86::VPMOVZXBWrr:
case X86::VPMOVZXBDrr:
case X86::VPMOVZXBQrr:
- case X86::VPMOVZXWDrr:
- case X86::VPMOVZXWQrr:
- case X86::VPMOVZXDQrr:
case X86::VPMOVZXBWYrr:
case X86::VPMOVZXBDYrr:
case X86::VPMOVZXBQYrr:
- case X86::VPMOVZXWDYrr:
- case X86::VPMOVZXWQYrr:
- case X86::VPMOVZXDQYrr:
Src1Name = getRegName(MI->getOperand(1).getReg());
// FALL THROUGH.
case X86::PMOVZXBWrm:
case X86::PMOVZXBDrm:
case X86::PMOVZXBQrm:
- case X86::PMOVZXWDrm:
- case X86::PMOVZXWQrm:
- case X86::PMOVZXDQrm:
case X86::VPMOVZXBWrm:
case X86::VPMOVZXBDrm:
case X86::VPMOVZXBQrm:
- case X86::VPMOVZXWDrm:
- case X86::VPMOVZXWQrm:
- case X86::VPMOVZXDQrm:
case X86::VPMOVZXBWYrm:
case X86::VPMOVZXBDYrm:
case X86::VPMOVZXBQYrm:
+ DecodeZeroExtendMask(MVT::i8, getZeroExtensionResultType(MI), ShuffleMask);
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+
+ case X86::PMOVZXWDrr:
+ case X86::PMOVZXWQrr:
+ case X86::VPMOVZXWDrr:
+ case X86::VPMOVZXWQrr:
+ case X86::VPMOVZXWDYrr:
+ case X86::VPMOVZXWQYrr:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::PMOVZXWDrm:
+ case X86::PMOVZXWQrm:
+ case X86::VPMOVZXWDrm:
+ case X86::VPMOVZXWQrm:
case X86::VPMOVZXWDYrm:
case X86::VPMOVZXWQYrm:
- case X86::VPMOVZXDQYrm: {
- MVT SrcVT, DstVT;
- getZeroExtensionTypes(MI, SrcVT, DstVT);
- DecodeZeroExtendMask(SrcVT, DstVT, ShuffleMask);
+ DecodeZeroExtendMask(MVT::i16, getZeroExtensionResultType(MI), ShuffleMask);
+ DestName = getRegName(MI->getOperand(0).getReg());
+ break;
+
+ case X86::PMOVZXDQrr:
+ case X86::VPMOVZXDQrr:
+ case X86::VPMOVZXDQYrr:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::PMOVZXDQrm:
+ case X86::VPMOVZXDQrm:
+ case X86::VPMOVZXDQYrm:
+ DecodeZeroExtendMask(MVT::i32, getZeroExtensionResultType(MI), ShuffleMask);
DestName = getRegName(MI->getOperand(0).getReg());
- } break;
+ break;
}
// The only comments we decode are shuffles, so give up if we were unable to
Modified: llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp?rev=259995&r1=259994&r2=259995&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp (original)
+++ llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp Sat Feb 6 10:33:42 2016
@@ -339,15 +339,13 @@ void DecodeVPERMMask(unsigned Imm, Small
}
}
-void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT, SmallVectorImpl<int> &Mask) {
+void DecodeZeroExtendMask(MVT SrcScalarVT, MVT DstVT, SmallVectorImpl<int> &Mask) {
unsigned NumDstElts = DstVT.getVectorNumElements();
- unsigned SrcScalarBits = SrcVT.getScalarSizeInBits();
+ unsigned SrcScalarBits = SrcScalarVT.getSizeInBits();
unsigned DstScalarBits = DstVT.getScalarSizeInBits();
unsigned Scale = DstScalarBits / SrcScalarBits;
assert(SrcScalarBits < DstScalarBits &&
"Expected zero extension mask to increase scalar size");
- assert(SrcVT.getVectorNumElements() >= NumDstElts &&
- "Too many zero extension lanes");
for (unsigned i = 0; i != NumDstElts; i++) {
Mask.push_back(i);
Modified: llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h?rev=259995&r1=259994&r2=259995&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h (original)
+++ llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h Sat Feb 6 10:33:42 2016
@@ -92,7 +92,7 @@ void decodeVSHUF64x2FamilyMask(MVT VT, u
void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
/// \brief Decode a zero extension instruction as a shuffle mask.
-void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT,
+void DecodeZeroExtendMask(MVT SrcScalarVT, MVT DstVT,
SmallVectorImpl<int> &ShuffleMask);
/// \brief Decode a move lower and zero upper instruction as a shuffle mask.
More information about the llvm-commits
mailing list