[llvm] r307095 - [X86][SSE4A] Generalized EXTRQI/INSERTQI shuffle decodes
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 4 09:53:12 PDT 2017
Author: rksimon
Date: Tue Jul 4 09:53:12 2017
New Revision: 307095
URL: http://llvm.org/viewvc/llvm-project?rev=307095&view=rev
Log:
[X86][SSE4A] Generalized EXTRQI/INSERTQI shuffle decodes
The existing decodes only worked for v16i8 vectors, this adds support for any 128-bit vector
Modified:
llvm/trunk/lib/Target/X86/InstPrinter/X86InstComments.cpp
llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp
llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
Modified: llvm/trunk/lib/Target/X86/InstPrinter/X86InstComments.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/InstPrinter/X86InstComments.cpp?rev=307095&r1=307094&r2=307095&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/InstPrinter/X86InstComments.cpp (original)
+++ llvm/trunk/lib/Target/X86/InstPrinter/X86InstComments.cpp Tue Jul 4 09:53:12 2017
@@ -1038,7 +1038,7 @@ bool llvm::EmitAnyX86InstComments(const
case X86::EXTRQI:
if (MI->getOperand(2).isImm() &&
MI->getOperand(3).isImm())
- DecodeEXTRQIMask(MI->getOperand(2).getImm(),
+ DecodeEXTRQIMask(MVT::v16i8, MI->getOperand(2).getImm(),
MI->getOperand(3).getImm(),
ShuffleMask);
@@ -1049,7 +1049,7 @@ bool llvm::EmitAnyX86InstComments(const
case X86::INSERTQI:
if (MI->getOperand(3).isImm() &&
MI->getOperand(4).isImm())
- DecodeINSERTQIMask(MI->getOperand(3).getImm(),
+ DecodeINSERTQIMask(MVT::v16i8, MI->getOperand(3).getImm(),
MI->getOperand(4).getImm(),
ShuffleMask);
Modified: llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp?rev=307095&r1=307094&r2=307095&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp (original)
+++ llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp Tue Jul 4 09:53:12 2017
@@ -452,15 +452,20 @@ void DecodeScalarMoveMask(MVT VT, bool I
Mask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i);
}
-void DecodeEXTRQIMask(int Len, int Idx,
+void DecodeEXTRQIMask(MVT VT, int Len, int Idx,
SmallVectorImpl<int> &ShuffleMask) {
+ assert(VT.is128BitVector() && "Expected 128-bit vector");
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSize = VT.getScalarSizeInBits();
+ unsigned HalfElts = NumElts / 2;
+
// Only the bottom 6 bits are valid for each immediate.
Len &= 0x3F;
Idx &= 0x3F;
// We can only decode this bit extraction instruction as a shuffle if both the
- // length and index work with whole bytes.
- if (0 != (Len % 8) || 0 != (Idx % 8))
+ // length and index work with whole elements.
+ if (0 != (Len % EltSize) || 0 != (Idx % EltSize))
return;
// A length of zero is equivalent to a bit length of 64.
@@ -469,33 +474,38 @@ void DecodeEXTRQIMask(int Len, int Idx,
// If the length + index exceeds the bottom 64 bits the result is undefined.
if ((Len + Idx) > 64) {
- ShuffleMask.append(16, SM_SentinelUndef);
+ ShuffleMask.append(NumElts, SM_SentinelUndef);
return;
}
- // Convert index and index to work with bytes.
- Len /= 8;
- Idx /= 8;
+ // Convert index and index to work with elements.
+ Len /= EltSize;
+ Idx /= EltSize;
- // EXTRQ: Extract Len bytes starting from Idx. Zero pad the remaining bytes
- // of the lower 64-bits. The upper 64-bits are undefined.
+ // EXTRQ: Extract Len elements starting from Idx. Zero pad the remaining
+ // elements of the lower 64-bits. The upper 64-bits are undefined.
for (int i = 0; i != Len; ++i)
ShuffleMask.push_back(i + Idx);
- for (int i = Len; i != 8; ++i)
+ for (int i = Len; i != HalfElts; ++i)
ShuffleMask.push_back(SM_SentinelZero);
- for (int i = 8; i != 16; ++i)
+ for (int i = HalfElts; i != NumElts; ++i)
ShuffleMask.push_back(SM_SentinelUndef);
}
-void DecodeINSERTQIMask(int Len, int Idx,
+void DecodeINSERTQIMask(MVT VT, int Len, int Idx,
SmallVectorImpl<int> &ShuffleMask) {
+ assert(VT.is128BitVector() && "Expected 128-bit vector");
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSize = VT.getScalarSizeInBits();
+ unsigned HalfElts = NumElts / 2;
+
// Only the bottom 6 bits are valid for each immediate.
Len &= 0x3F;
Idx &= 0x3F;
// We can only decode this bit insertion instruction as a shuffle if both the
- // length and index work with whole bytes.
- if (0 != (Len % 8) || 0 != (Idx % 8))
+ // length and index work with whole elements.
+ if (0 != (Len % EltSize) || 0 != (Idx % EltSize))
return;
// A length of zero is equivalent to a bit length of 64.
@@ -504,24 +514,24 @@ void DecodeINSERTQIMask(int Len, int Idx
// If the length + index exceeds the bottom 64 bits the result is undefined.
if ((Len + Idx) > 64) {
- ShuffleMask.append(16, SM_SentinelUndef);
+ ShuffleMask.append(NumElts, SM_SentinelUndef);
return;
}
- // Convert index and index to work with bytes.
- Len /= 8;
- Idx /= 8;
+ // Convert index and index to work with elements.
+ Len /= EltSize;
+ Idx /= EltSize;
- // INSERTQ: Extract lowest Len bytes from lower half of second source and
- // insert over first source starting at Idx byte. The upper 64-bits are
+ // INSERTQ: Extract lowest Len elements from lower half of second source and
+ // insert over first source starting at Idx element. The upper 64-bits are
// undefined.
for (int i = 0; i != Idx; ++i)
ShuffleMask.push_back(i);
for (int i = 0; i != Len; ++i)
- ShuffleMask.push_back(i + 16);
- for (int i = Idx + Len; i != 8; ++i)
+ ShuffleMask.push_back(i + NumElts);
+ for (int i = Idx + Len; i != HalfElts; ++i)
ShuffleMask.push_back(i);
- for (int i = 8; i != 16; ++i)
+ for (int i = HalfElts; i != NumElts; ++i)
ShuffleMask.push_back(SM_SentinelUndef);
}
Modified: llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h?rev=307095&r1=307094&r2=307095&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h (original)
+++ llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h Tue Jul 4 09:53:12 2017
@@ -134,12 +134,12 @@ void DecodeZeroMoveLowMask(MVT VT, Small
void DecodeScalarMoveMask(MVT VT, bool IsLoad,
SmallVectorImpl<int> &ShuffleMask);
-/// Decode a SSE4A EXTRQ instruction as a v16i8 shuffle mask.
-void DecodeEXTRQIMask(int Len, int Idx,
+/// Decode a SSE4A EXTRQ instruction as a shuffle mask.
+void DecodeEXTRQIMask(MVT VT, int Len, int Idx,
SmallVectorImpl<int> &ShuffleMask);
-/// Decode a SSE4A INSERTQ instruction as a v16i8 shuffle mask.
-void DecodeINSERTQIMask(int Len, int Idx,
+/// Decode a SSE4A INSERTQ instruction as a shuffle mask.
+void DecodeINSERTQIMask(MVT VT, int Len, int Idx,
SmallVectorImpl<int> &ShuffleMask);
/// Decode a VPERMILPD/VPERMILPS variable mask from a raw array of constants.
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=307095&r1=307094&r2=307095&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Jul 4 09:53:12 2017
@@ -5561,7 +5561,7 @@ static bool getTargetShuffleMask(SDNode
isa<ConstantSDNode>(N->getOperand(2))) {
int BitLen = N->getConstantOperandVal(1);
int BitIdx = N->getConstantOperandVal(2);
- DecodeEXTRQIMask(BitLen, BitIdx, Mask);
+ DecodeEXTRQIMask(VT, BitLen, BitIdx, Mask);
IsUnary = true;
}
break;
@@ -5570,7 +5570,7 @@ static bool getTargetShuffleMask(SDNode
isa<ConstantSDNode>(N->getOperand(3))) {
int BitLen = N->getConstantOperandVal(2);
int BitIdx = N->getConstantOperandVal(3);
- DecodeINSERTQIMask(BitLen, BitIdx, Mask);
+ DecodeINSERTQIMask(VT, BitLen, BitIdx, Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
}
break;
More information about the llvm-commits
mailing list