[llvm] r259995 - [X86][SSE] Refactored PMOVZX shuffle decoding to use scalar input types

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sat Feb 6 08:33:42 PST 2016


Author: rksimon
Date: Sat Feb  6 10:33:42 2016
New Revision: 259995

URL: http://llvm.org/viewvc/llvm-project?rev=259995&view=rev
Log:
[X86][SSE] Refactored PMOVZX shuffle decoding to use scalar input types

First step towards being able to decode AVX512 PMOVZX instructions without a massive bloat in the shuffle decode switch statement.

This should also make it easier to decode X86ISD::VZEXT target shuffles in the future.

Modified:
    llvm/trunk/lib/Target/X86/InstPrinter/X86InstComments.cpp
    llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp
    llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h

Modified: llvm/trunk/lib/Target/X86/InstPrinter/X86InstComments.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/InstPrinter/X86InstComments.cpp?rev=259995&r1=259994&r2=259995&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/InstPrinter/X86InstComments.cpp (original)
+++ llvm/trunk/lib/Target/X86/InstPrinter/X86InstComments.cpp Sat Feb  6 10:33:42 2016
@@ -41,89 +41,53 @@ static MVT getRegOperandVectorVT(const M
                           getVectorRegSize(OpReg)/ScalarVT.getSizeInBits());
 }
 
-/// \brief Extracts the src/dst types for a given zero extension instruction.
-/// \note While the number of elements in DstVT type correct, the
-/// number in the SrcVT type is expanded to fill the src xmm register and the
-/// upper elements may not be included in the dst xmm/ymm register.
-static void getZeroExtensionTypes(const MCInst *MI, MVT &SrcVT, MVT &DstVT) {
+/// \brief Extracts the dst type for a given zero extension instruction.
+static MVT getZeroExtensionResultType(const MCInst *MI) {
   switch (MI->getOpcode()) {
   default:
     llvm_unreachable("Unknown zero extension instruction");
-  // i8 zero extension
+  // zero extension to i16
   case X86::PMOVZXBWrm:
   case X86::PMOVZXBWrr:
   case X86::VPMOVZXBWrm:
   case X86::VPMOVZXBWrr:
-    SrcVT = MVT::v16i8;
-    DstVT = MVT::v8i16;
-    break;
   case X86::VPMOVZXBWYrm:
   case X86::VPMOVZXBWYrr:
-    SrcVT = MVT::v16i8;
-    DstVT = MVT::v16i16;
-    break;
+    return getRegOperandVectorVT(MI, MVT::i16, 0);
+  // zero extension to i32
   case X86::PMOVZXBDrm:
   case X86::PMOVZXBDrr:
   case X86::VPMOVZXBDrm:
   case X86::VPMOVZXBDrr:
-    SrcVT = MVT::v16i8;
-    DstVT = MVT::v4i32;
-    break;
   case X86::VPMOVZXBDYrm:
   case X86::VPMOVZXBDYrr:
-    SrcVT = MVT::v16i8;
-    DstVT = MVT::v8i32;
-    break;
-  case X86::PMOVZXBQrm:
-  case X86::PMOVZXBQrr:
-  case X86::VPMOVZXBQrm:
-  case X86::VPMOVZXBQrr:
-    SrcVT = MVT::v16i8;
-    DstVT = MVT::v2i64;
-    break;
-  case X86::VPMOVZXBQYrm:
-  case X86::VPMOVZXBQYrr:
-    SrcVT = MVT::v16i8;
-    DstVT = MVT::v4i64;
-    break;
-  // i16 zero extension
   case X86::PMOVZXWDrm:
   case X86::PMOVZXWDrr:
   case X86::VPMOVZXWDrm:
   case X86::VPMOVZXWDrr:
-    SrcVT = MVT::v8i16;
-    DstVT = MVT::v4i32;
-    break;
   case X86::VPMOVZXWDYrm:
   case X86::VPMOVZXWDYrr:
-    SrcVT = MVT::v8i16;
-    DstVT = MVT::v8i32;
-    break;
+    return getRegOperandVectorVT(MI, MVT::i32, 0);
+  // zero extension to i64
+  case X86::PMOVZXBQrm:
+  case X86::PMOVZXBQrr:
+  case X86::VPMOVZXBQrm:
+  case X86::VPMOVZXBQrr:
+  case X86::VPMOVZXBQYrm:
+  case X86::VPMOVZXBQYrr:
   case X86::PMOVZXWQrm:
   case X86::PMOVZXWQrr:
   case X86::VPMOVZXWQrm:
   case X86::VPMOVZXWQrr:
-    SrcVT = MVT::v8i16;
-    DstVT = MVT::v2i64;
-    break;
   case X86::VPMOVZXWQYrm:
   case X86::VPMOVZXWQYrr:
-    SrcVT = MVT::v8i16;
-    DstVT = MVT::v4i64;
-    break;
-  // i32 zero extension
   case X86::PMOVZXDQrm:
   case X86::PMOVZXDQrr:
   case X86::VPMOVZXDQrm:
   case X86::VPMOVZXDQrr:
-    SrcVT = MVT::v4i32;
-    DstVT = MVT::v2i64;
-    break;
   case X86::VPMOVZXDQYrm:
   case X86::VPMOVZXDQYrr:
-    SrcVT = MVT::v4i32;
-    DstVT = MVT::v4i64;
-    break;
+    return getRegOperandVectorVT(MI, MVT::i64, 0);
   }
 }
 
@@ -728,46 +692,56 @@ bool llvm::EmitAnyX86InstComments(const
   case X86::PMOVZXBWrr:
   case X86::PMOVZXBDrr:
   case X86::PMOVZXBQrr:
-  case X86::PMOVZXWDrr:
-  case X86::PMOVZXWQrr:
-  case X86::PMOVZXDQrr:
   case X86::VPMOVZXBWrr:
   case X86::VPMOVZXBDrr:
   case X86::VPMOVZXBQrr:
-  case X86::VPMOVZXWDrr:
-  case X86::VPMOVZXWQrr:
-  case X86::VPMOVZXDQrr:
   case X86::VPMOVZXBWYrr:
   case X86::VPMOVZXBDYrr:
   case X86::VPMOVZXBQYrr:
-  case X86::VPMOVZXWDYrr:
-  case X86::VPMOVZXWQYrr:
-  case X86::VPMOVZXDQYrr:
     Src1Name = getRegName(MI->getOperand(1).getReg());
   // FALL THROUGH.
   case X86::PMOVZXBWrm:
   case X86::PMOVZXBDrm:
   case X86::PMOVZXBQrm:
-  case X86::PMOVZXWDrm:
-  case X86::PMOVZXWQrm:
-  case X86::PMOVZXDQrm:
   case X86::VPMOVZXBWrm:
   case X86::VPMOVZXBDrm:
   case X86::VPMOVZXBQrm:
-  case X86::VPMOVZXWDrm:
-  case X86::VPMOVZXWQrm:
-  case X86::VPMOVZXDQrm:
   case X86::VPMOVZXBWYrm:
   case X86::VPMOVZXBDYrm:
   case X86::VPMOVZXBQYrm:
+    DecodeZeroExtendMask(MVT::i8, getZeroExtensionResultType(MI), ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  case X86::PMOVZXWDrr:
+  case X86::PMOVZXWQrr:
+  case X86::VPMOVZXWDrr:
+  case X86::VPMOVZXWQrr:
+  case X86::VPMOVZXWDYrr:
+  case X86::VPMOVZXWQYrr:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+  // FALL THROUGH.
+  case X86::PMOVZXWDrm:
+  case X86::PMOVZXWQrm:
+  case X86::VPMOVZXWDrm:
+  case X86::VPMOVZXWQrm:
   case X86::VPMOVZXWDYrm:
   case X86::VPMOVZXWQYrm:
-  case X86::VPMOVZXDQYrm: {
-    MVT SrcVT, DstVT;
-    getZeroExtensionTypes(MI, SrcVT, DstVT);
-    DecodeZeroExtendMask(SrcVT, DstVT, ShuffleMask);
+    DecodeZeroExtendMask(MVT::i16, getZeroExtensionResultType(MI), ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  case X86::PMOVZXDQrr:
+  case X86::VPMOVZXDQrr:
+  case X86::VPMOVZXDQYrr:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+  // FALL THROUGH.
+  case X86::PMOVZXDQrm:
+  case X86::VPMOVZXDQrm:
+  case X86::VPMOVZXDQYrm:
+    DecodeZeroExtendMask(MVT::i32, getZeroExtensionResultType(MI), ShuffleMask);
     DestName = getRegName(MI->getOperand(0).getReg());
-  } break;
+    break;
   }
 
   // The only comments we decode are shuffles, so give up if we were unable to

Modified: llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp?rev=259995&r1=259994&r2=259995&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp (original)
+++ llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp Sat Feb  6 10:33:42 2016
@@ -339,15 +339,13 @@ void DecodeVPERMMask(unsigned Imm, Small
   }
 }
 
-void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT, SmallVectorImpl<int> &Mask) {
+void DecodeZeroExtendMask(MVT SrcScalarVT, MVT DstVT, SmallVectorImpl<int> &Mask) {
   unsigned NumDstElts = DstVT.getVectorNumElements();
-  unsigned SrcScalarBits = SrcVT.getScalarSizeInBits();
+  unsigned SrcScalarBits = SrcScalarVT.getSizeInBits();
   unsigned DstScalarBits = DstVT.getScalarSizeInBits();
   unsigned Scale = DstScalarBits / SrcScalarBits;
   assert(SrcScalarBits < DstScalarBits &&
          "Expected zero extension mask to increase scalar size");
-  assert(SrcVT.getVectorNumElements() >= NumDstElts &&
-         "Too many zero extension lanes");
 
   for (unsigned i = 0; i != NumDstElts; i++) {
     Mask.push_back(i);

Modified: llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h?rev=259995&r1=259994&r2=259995&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h (original)
+++ llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h Sat Feb  6 10:33:42 2016
@@ -92,7 +92,7 @@ void decodeVSHUF64x2FamilyMask(MVT VT, u
 void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
 
 /// \brief Decode a zero extension instruction as a shuffle mask.
-void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT,
+void DecodeZeroExtendMask(MVT SrcScalarVT, MVT DstVT,
                           SmallVectorImpl<int> &ShuffleMask);
 
 /// \brief Decode a move lower and zero upper instruction as a shuffle mask.




More information about the llvm-commits mailing list