[llvm] r265874 - [X86][XOP] Support for VPPERM 2-input shuffle mask decoding
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Apr 9 07:51:26 PDT 2016
Author: rksimon
Date: Sat Apr 9 09:51:26 2016
New Revision: 265874
URL: http://llvm.org/viewvc/llvm-project?rev=265874&view=rev
Log:
[X86][XOP] Support for VPPERM 2-input shuffle mask decoding
This patch adds support for decoding XOP VPPERM instruction when it represents a basic shuffle.
The mask decoding required the existing MCInstrLowering code to be updated to support binary shuffles - the implementation now matches what is done in X86InstrComments.cpp.
Differential Revision: http://reviews.llvm.org/D18441
Modified:
llvm/trunk/lib/Target/X86/X86MCInstLower.cpp
llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.h
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll
Modified: llvm/trunk/lib/Target/X86/X86MCInstLower.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86MCInstLower.cpp?rev=265874&r1=265873&r2=265874&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86MCInstLower.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86MCInstLower.cpp Sat Apr 9 09:51:26 2016
@@ -1018,7 +1018,8 @@ static const Constant *getConstantFromPo
}
static std::string getShuffleComment(const MachineOperand &DstOp,
- const MachineOperand &SrcOp,
+ const MachineOperand &SrcOp1,
+ const MachineOperand &SrcOp2,
ArrayRef<int> Mask) {
std::string Comment;
@@ -1032,39 +1033,49 @@ static std::string getShuffleComment(con
};
StringRef DstName = DstOp.isReg() ? GetRegisterName(DstOp.getReg()) : "mem";
- StringRef SrcName = SrcOp.isReg() ? GetRegisterName(SrcOp.getReg()) : "mem";
+ StringRef Src1Name =
+ SrcOp1.isReg() ? GetRegisterName(SrcOp1.getReg()) : "mem";
+ StringRef Src2Name =
+ SrcOp2.isReg() ? GetRegisterName(SrcOp2.getReg()) : "mem";
+
+ // One source operand, fix the mask to print all elements in one span.
+ SmallVector<int, 8> ShuffleMask(Mask.begin(), Mask.end());
+ if (Src1Name == Src2Name)
+ for (int i = 0, e = ShuffleMask.size(); i != e; ++i)
+ if (ShuffleMask[i] >= e)
+ ShuffleMask[i] -= e;
raw_string_ostream CS(Comment);
CS << DstName << " = ";
- bool NeedComma = false;
- bool InSrc = false;
- for (int M : Mask) {
- // Wrap up any prior entry...
- if (M == SM_SentinelZero && InSrc) {
- InSrc = false;
- CS << "]";
- }
- if (NeedComma)
+ for (int i = 0, e = ShuffleMask.size(); i != e; ++i) {
+ if (i != 0)
CS << ",";
- else
- NeedComma = true;
-
- // Print this shuffle...
- if (M == SM_SentinelZero) {
+ if (ShuffleMask[i] == SM_SentinelZero) {
CS << "zero";
- } else {
- if (!InSrc) {
- InSrc = true;
- CS << SrcName << "[";
- }
- if (M == SM_SentinelUndef)
+ continue;
+ }
+
+ // Otherwise, it must come from src1 or src2. Print the span of elements
+ // that comes from this src.
+ bool isSrc1 = ShuffleMask[i] < (int)e;
+ CS << (isSrc1 ? Src1Name : Src2Name) << '[';
+
+ bool IsFirst = true;
+ while (i != e && ShuffleMask[i] != SM_SentinelZero &&
+ (ShuffleMask[i] < (int)e) == isSrc1) {
+ if (!IsFirst)
+ CS << ',';
+ else
+ IsFirst = false;
+ if (ShuffleMask[i] == SM_SentinelUndef)
CS << "u";
else
- CS << M;
+ CS << ShuffleMask[i] % (int)e;
+ ++i;
}
+ CS << ']';
+ --i; // For loop increments element #.
}
- if (InSrc)
- CS << "]";
CS.flush();
return Comment;
@@ -1313,7 +1324,7 @@ void X86AsmPrinter::EmitInstruction(cons
SmallVector<int, 16> Mask;
DecodePSHUFBMask(C, Mask);
if (!Mask.empty())
- OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, Mask));
+ OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, SrcOp, Mask));
}
break;
}
@@ -1340,7 +1351,25 @@ void X86AsmPrinter::EmitInstruction(cons
SmallVector<int, 16> Mask;
DecodeVPERMILPMask(C, ElSize, Mask);
if (!Mask.empty())
- OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, Mask));
+ OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, SrcOp, Mask));
+ }
+ break;
+ }
+ case X86::VPPERMrrm: {
+ if (!OutStreamer->isVerboseAsm())
+ break;
+ assert(MI->getNumOperands() > 6 &&
+ "We should always have at least 6 operands!");
+ const MachineOperand &DstOp = MI->getOperand(0);
+ const MachineOperand &SrcOp1 = MI->getOperand(1);
+ const MachineOperand &SrcOp2 = MI->getOperand(2);
+ const MachineOperand &MaskOp = MI->getOperand(6);
+
+ if (auto *C = getConstantFromPool(*MI, MaskOp)) {
+ SmallVector<int, 16> Mask;
+ DecodeVPPERMMask(C, Mask);
+ if (!Mask.empty())
+ OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp1, SrcOp2, Mask));
}
break;
}
Modified: llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp?rev=265874&r1=265873&r2=265874&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp Sat Apr 9 09:51:26 2016
@@ -153,6 +153,74 @@ void DecodeVPERMILPMask(const Constant *
// TODO: Handle funny-looking vectors too.
}
+void DecodeVPPERMMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
+ Type *MaskTy = C->getType();
+ assert(MaskTy->getPrimitiveSizeInBits() == 128);
+
+ // Only support vector types.
+ if (!MaskTy->isVectorTy())
+ return;
+
+ // Make sure its an integer type.
+ Type *VecEltTy = MaskTy->getVectorElementType();
+ if (!VecEltTy->isIntegerTy())
+ return;
+
+ // The shuffle mask requires a byte vector - decode cases with
+ // wider elements as well.
+ unsigned BitWidth = cast<IntegerType>(VecEltTy)->getBitWidth();
+ if ((BitWidth % 8) != 0)
+ return;
+
+ int NumElts = MaskTy->getVectorNumElements();
+ int Scale = BitWidth / 8;
+ int NumBytes = NumElts * Scale;
+ ShuffleMask.reserve(NumBytes);
+
+ for (int i = 0; i != NumElts; ++i) {
+ Constant *COp = C->getAggregateElement(i);
+ if (!COp) {
+ ShuffleMask.clear();
+ return;
+ } else if (isa<UndefValue>(COp)) {
+ ShuffleMask.append(Scale, SM_SentinelUndef);
+ continue;
+ }
+
+ // VPPERM Operation
+ // Bits[4:0] - Byte Index (0 - 31)
+ // Bits[7:5] - Permute Operation
+ //
+ // Permute Operation:
+ // 0 - Source byte (no logical operation).
+ // 1 - Invert source byte.
+ // 2 - Bit reverse of source byte.
+ // 3 - Bit reverse of inverted source byte.
+ // 4 - 00h (zero - fill).
+ // 5 - FFh (ones - fill).
+ // 6 - Most significant bit of source byte replicated in all bit positions.
+ // 7 - Invert most significant bit of source byte and replicate in all bit positions.
+ APInt MaskElt = cast<ConstantInt>(COp)->getValue();
+ for (int j = 0; j != Scale; ++j) {
+ APInt Index = MaskElt.getLoBits(5);
+ APInt PermuteOp = MaskElt.lshr(5).getLoBits(3);
+ MaskElt = MaskElt.lshr(8);
+
+ if (PermuteOp == 4) {
+ ShuffleMask.push_back(SM_SentinelZero);
+ continue;
+ }
+ if (PermuteOp != 0) {
+ ShuffleMask.clear();
+ return;
+ }
+ ShuffleMask.push_back((int)Index.getZExtValue());
+ }
+ }
+
+ assert(NumBytes == (int)ShuffleMask.size() && "Unexpected shuffle mask size");
+}
+
void DecodeVPERMVMask(const Constant *C, MVT VT,
SmallVectorImpl<int> &ShuffleMask) {
Type *MaskTy = C->getType();
Modified: llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.h?rev=265874&r1=265873&r2=265874&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.h (original)
+++ llvm/trunk/lib/Target/X86/X86ShuffleDecodeConstantPool.h Sat Apr 9 09:51:26 2016
@@ -32,6 +32,9 @@ void DecodePSHUFBMask(const Constant *C,
void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
SmallVectorImpl<int> &ShuffleMask);
+/// Decode a VPPERM variable mask from an IR-level vector constant.
+void DecodeVPPERMMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask);
+
/// Decode a VPERM W/D/Q/PS/PD mask from an IR-level vector constant.
void DecodeVPERMVMask(const Constant *C, MVT VT,
SmallVectorImpl<int> &ShuffleMask);
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll?rev=265874&r1=265873&r2=265874&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll Sat Apr 9 09:51:26 2016
@@ -13,8 +13,8 @@ declare <16 x i8> @llvm.x86.xop.vpperm(<
define <16 x i8> @combine_vpperm_identity(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: combine_vpperm_identity:
; CHECK: # BB#0:
-; CHECK-NEXT: vpperm {{.*}}(%rip), %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vpperm {{.*#+}} xmm0 = xmm1[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
+; CHECK-NEXT: vpperm {{.*#+}} xmm0 = xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
; CHECK-NEXT: retq
%res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> <i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16>)
%res1 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %res0, <16 x i8> undef, <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
@@ -24,7 +24,7 @@ define <16 x i8> @combine_vpperm_identit
define <16 x i8> @combine_vpperm_as_unary_unpckhwd(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: combine_vpperm_as_unary_unpckhwd:
; CHECK: # BB#0:
-; CHECK-NEXT: vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vpperm {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
; CHECK-NEXT: retq
%res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a0, <16 x i8> <i8 8, i8 24, i8 9, i8 25, i8 10, i8 26, i8 11, i8 27, i8 12, i8 28, i8 13, i8 29, i8 14, i8 30, i8 15, i8 31>)
ret <16 x i8> %res0
@@ -33,7 +33,7 @@ define <16 x i8> @combine_vpperm_as_unar
define <16 x i8> @combine_vpperm_as_unpckhwd(<16 x i8> %a0, <16 x i8> %a1) {
; CHECK-LABEL: combine_vpperm_as_unpckhwd:
; CHECK: # BB#0:
-; CHECK-NEXT: vpperm {{.*}}(%rip), %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpperm {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
; CHECK-NEXT: retq
%res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> <i8 8, i8 24, i8 9, i8 25, i8 10, i8 26, i8 11, i8 27, i8 12, i8 28, i8 13, i8 29, i8 14, i8 30, i8 15, i8 31>)
ret <16 x i8> %res0
More information about the llvm-commits
mailing list