[llvm] r265874 - [X86][XOP] Support for VPPERM 2-input shuffle mask decoding

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sat Apr 9 07:51:26 PDT 2016


Author: rksimon
Date: Sat Apr  9 09:51:26 2016
New Revision: 265874

URL: http://llvm.org/viewvc/llvm-project?rev=265874&view=rev
Log:
[X86][XOP] Support for VPPERM 2-input shuffle mask decoding

This patch adds support for decoding XOP VPPERM instruction when it represents a basic shuffle.

The mask decoding required the existing MCInstrLowering code to be updated to support binary shuffles - the implementation now matches what is done in X86InstrComments.cpp.

Differential Revision: http://reviews.llvm.org/D18441

Modified:
    llvm/trunk/lib/Target/X86/X86MCInstLower.cpp
    llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
    llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.h
    llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll

Modified: llvm/trunk/lib/Target/X86/X86MCInstLower.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86MCInstLower.cpp?rev=265874&r1=265873&r2=265874&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86MCInstLower.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86MCInstLower.cpp Sat Apr  9 09:51:26 2016
@@ -1018,7 +1018,8 @@ static const Constant *getConstantFromPo
 }
 
 static std::string getShuffleComment(const MachineOperand &DstOp,
-                                     const MachineOperand &SrcOp,
+                                     const MachineOperand &SrcOp1,
+                                     const MachineOperand &SrcOp2,
                                      ArrayRef<int> Mask) {
   std::string Comment;
 
@@ -1032,39 +1033,49 @@ static std::string getShuffleComment(con
   };
 
   StringRef DstName = DstOp.isReg() ? GetRegisterName(DstOp.getReg()) : "mem";
-  StringRef SrcName = SrcOp.isReg() ? GetRegisterName(SrcOp.getReg()) : "mem";
+  StringRef Src1Name =
+      SrcOp1.isReg() ? GetRegisterName(SrcOp1.getReg()) : "mem";
+  StringRef Src2Name =
+      SrcOp2.isReg() ? GetRegisterName(SrcOp2.getReg()) : "mem";
+
+  // One source operand, fix the mask to print all elements in one span.
+  SmallVector<int, 8> ShuffleMask(Mask.begin(), Mask.end());
+  if (Src1Name == Src2Name)
+    for (int i = 0, e = ShuffleMask.size(); i != e; ++i)
+      if (ShuffleMask[i] >= e)
+        ShuffleMask[i] -= e;
 
   raw_string_ostream CS(Comment);
   CS << DstName << " = ";
-  bool NeedComma = false;
-  bool InSrc = false;
-  for (int M : Mask) {
-    // Wrap up any prior entry...
-    if (M == SM_SentinelZero && InSrc) {
-      InSrc = false;
-      CS << "]";
-    }
-    if (NeedComma)
+  for (int i = 0, e = ShuffleMask.size(); i != e; ++i) {
+    if (i != 0)
       CS << ",";
-    else
-      NeedComma = true;
-
-    // Print this shuffle...
-    if (M == SM_SentinelZero) {
+    if (ShuffleMask[i] == SM_SentinelZero) {
       CS << "zero";
-    } else {
-      if (!InSrc) {
-        InSrc = true;
-        CS << SrcName << "[";
-      }
-      if (M == SM_SentinelUndef)
+      continue;
+    }
+
+    // Otherwise, it must come from src1 or src2.  Print the span of elements
+    // that comes from this src.
+    bool isSrc1 = ShuffleMask[i] < (int)e;
+    CS << (isSrc1 ? Src1Name : Src2Name) << '[';
+
+    bool IsFirst = true;
+    while (i != e && ShuffleMask[i] != SM_SentinelZero &&
+           (ShuffleMask[i] < (int)e) == isSrc1) {
+      if (!IsFirst)
+        CS << ',';
+      else
+        IsFirst = false;
+      if (ShuffleMask[i] == SM_SentinelUndef)
         CS << "u";
       else
-        CS << M;
+        CS << ShuffleMask[i] % (int)e;
+      ++i;
     }
+    CS << ']';
+    --i; // For loop increments element #.
   }
-  if (InSrc)
-    CS << "]";
   CS.flush();
 
   return Comment;
@@ -1313,7 +1324,7 @@ void X86AsmPrinter::EmitInstruction(cons
       SmallVector<int, 16> Mask;
       DecodePSHUFBMask(C, Mask);
       if (!Mask.empty())
-        OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, Mask));
+        OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, SrcOp, Mask));
     }
     break;
   }
@@ -1340,7 +1351,25 @@ void X86AsmPrinter::EmitInstruction(cons
       SmallVector<int, 16> Mask;
       DecodeVPERMILPMask(C, ElSize, Mask);
       if (!Mask.empty())
-        OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, Mask));
+        OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, SrcOp, Mask));
+    }
+    break;
+  }
+  case X86::VPPERMrrm: {
+    if (!OutStreamer->isVerboseAsm())
+      break;
+    assert(MI->getNumOperands() > 6 &&
+           "We should always have at least 6 operands!");
+    const MachineOperand &DstOp = MI->getOperand(0);
+    const MachineOperand &SrcOp1 = MI->getOperand(1);
+    const MachineOperand &SrcOp2 = MI->getOperand(2);
+    const MachineOperand &MaskOp = MI->getOperand(6);
+
+    if (auto *C = getConstantFromPool(*MI, MaskOp)) {
+      SmallVector<int, 16> Mask;
+      DecodeVPPERMMask(C, Mask);
+      if (!Mask.empty())
+        OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp1, SrcOp2, Mask));
     }
     break;
   }

Modified: llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp?rev=265874&r1=265873&r2=265874&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp Sat Apr  9 09:51:26 2016
@@ -153,6 +153,74 @@ void DecodeVPERMILPMask(const Constant *
   // TODO: Handle funny-looking vectors too.
 }
 
+void DecodeVPPERMMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
+  Type *MaskTy = C->getType();
+  assert(MaskTy->getPrimitiveSizeInBits() == 128);
+
+  // Only support vector types.
+  if (!MaskTy->isVectorTy())
+    return;
+
+  // Make sure its an integer type.
+  Type *VecEltTy = MaskTy->getVectorElementType();
+  if (!VecEltTy->isIntegerTy())
+    return;
+
+  // The shuffle mask requires a byte vector - decode cases with
+  // wider elements as well.
+  unsigned BitWidth = cast<IntegerType>(VecEltTy)->getBitWidth();
+  if ((BitWidth % 8) != 0)
+    return;
+
+  int NumElts = MaskTy->getVectorNumElements();
+  int Scale = BitWidth / 8;
+  int NumBytes = NumElts * Scale;
+  ShuffleMask.reserve(NumBytes);
+
+  for (int i = 0; i != NumElts; ++i) {
+    Constant *COp = C->getAggregateElement(i);
+    if (!COp) {
+      ShuffleMask.clear();
+      return;
+    } else if (isa<UndefValue>(COp)) {
+      ShuffleMask.append(Scale, SM_SentinelUndef);
+      continue;
+    }
+
+    // VPPERM Operation
+    // Bits[4:0] - Byte Index (0 - 31)
+    // Bits[7:5] - Permute Operation
+    //
+    // Permute Operation:
+    // 0 - Source byte (no logical operation).
+    // 1 - Invert source byte.
+    // 2 - Bit reverse of source byte.
+    // 3 - Bit reverse of inverted source byte.
+    // 4 - 00h (zero - fill).
+    // 5 - FFh (ones - fill).
+    // 6 - Most significant bit of source byte replicated in all bit positions.
+    // 7 - Invert most significant bit of source byte and replicate in all bit positions.
+    APInt MaskElt = cast<ConstantInt>(COp)->getValue();
+    for (int j = 0; j != Scale; ++j) {
+      APInt Index = MaskElt.getLoBits(5);
+      APInt PermuteOp = MaskElt.lshr(5).getLoBits(3);
+      MaskElt = MaskElt.lshr(8);
+
+      if (PermuteOp == 4) {
+        ShuffleMask.push_back(SM_SentinelZero);
+        continue;
+      }
+      if (PermuteOp != 0) {
+        ShuffleMask.clear();
+        return;
+      }
+      ShuffleMask.push_back((int)Index.getZExtValue());
+    }
+  }
+
+  assert(NumBytes == (int)ShuffleMask.size() && "Unexpected shuffle mask size");
+}
+
 void DecodeVPERMVMask(const Constant *C, MVT VT,
                       SmallVectorImpl<int> &ShuffleMask) {
   Type *MaskTy = C->getType();

Modified: llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.h?rev=265874&r1=265873&r2=265874&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.h (original)
+++ llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.h Sat Apr  9 09:51:26 2016
@@ -32,6 +32,9 @@ void DecodePSHUFBMask(const Constant *C,
 void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
                         SmallVectorImpl<int> &ShuffleMask);
 
+/// Decode a VPPERM variable mask from an IR-level vector constant.
+void DecodeVPPERMMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask);
+
 /// Decode a VPERM W/D/Q/PS/PD mask from an IR-level vector constant.
 void DecodeVPERMVMask(const Constant *C, MVT VT,
                       SmallVectorImpl<int> &ShuffleMask);

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll?rev=265874&r1=265873&r2=265874&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll Sat Apr  9 09:51:26 2016
@@ -13,8 +13,8 @@ declare <16 x i8> @llvm.x86.xop.vpperm(<
 define <16 x i8> @combine_vpperm_identity(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK-LABEL: combine_vpperm_identity:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vpperm {{.*}}(%rip), %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    vpperm {{.*#+}} xmm0 = xmm1[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
+; CHECK-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
 ; CHECK-NEXT:    retq
   %res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> <i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16>)
   %res1 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %res0, <16 x i8> undef, <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
@@ -24,7 +24,7 @@ define <16 x i8> @combine_vpperm_identit
 define <16 x i8> @combine_vpperm_as_unary_unpckhwd(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK-LABEL: combine_vpperm_as_unary_unpckhwd:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
 ; CHECK-NEXT:    retq
   %res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a0, <16 x i8> <i8 8, i8 24, i8 9, i8 25, i8 10, i8 26, i8 11, i8 27, i8 12, i8 28, i8 13, i8 29, i8 14, i8 30, i8 15, i8 31>)
   ret <16 x i8> %res0
@@ -33,7 +33,7 @@ define <16 x i8> @combine_vpperm_as_unar
 define <16 x i8> @combine_vpperm_as_unpckhwd(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK-LABEL: combine_vpperm_as_unpckhwd:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vpperm {{.*}}(%rip), %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpperm {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
 ; CHECK-NEXT:    retq
   %res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> <i8 8, i8 24, i8 9, i8 25, i8 10, i8 26, i8 11, i8 27, i8 12, i8 28, i8 13, i8 29, i8 14, i8 30, i8 15, i8 31>)
   ret <16 x i8> %res0




More information about the llvm-commits mailing list