[llvm] r262784 - [X86][AVX] Improved VPERMILPS variable shuffle mask decoding.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 5 14:53:32 PST 2016
Author: rksimon
Date: Sat Mar 5 16:53:31 2016
New Revision: 262784
URL: http://llvm.org/viewvc/llvm-project?rev=262784&view=rev
Log:
[X86][AVX] Improved VPERMILPS variable shuffle mask decoding.
Added support for decoding VPERMILPS variable shuffle masks that aren't in the constant pool.
Added target shuffle mask decoding for SCALAR_TO_VECTOR+VZEXT_MOVL cases - these can happen for v2i64 constant re-materialization
Followup to D17681
Modified:
llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp
llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx.ll
Modified: llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp?rev=262784&r1=262783&r2=262784&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp (original)
+++ llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.cpp Sat Mar 5 16:53:31 2016
@@ -454,6 +454,24 @@ void DecodeINSERTQIMask(int Len, int Idx
ShuffleMask.push_back(SM_SentinelUndef);
}
+void DecodeVPERMILPMask(MVT VT, ArrayRef<uint64_t> RawMask,
+ SmallVectorImpl<int> &ShuffleMask) {
+ unsigned VecSize = VT.getSizeInBits();
+ unsigned EltSize = VT.getScalarSizeInBits();
+ unsigned NumLanes = VecSize / 128;
+ unsigned NumEltsPerLane = VT.getVectorNumElements() / NumLanes;
+ assert((VecSize == 128 || VecSize == 256 || VecSize == 512) &&
+ "Unexpected vector size");
+ assert((EltSize == 32 || EltSize == 64) && "Unexpected element size");
+
+ for (unsigned i = 0, e = RawMask.size(); i < e; ++i) {
+ uint64_t M = RawMask[i];
+ M = (EltSize == 64 ? ((M >> 1) & 0x1) : (M & 0x3));
+ unsigned LaneOffset = i & ~(NumEltsPerLane - 1);
+ ShuffleMask.push_back((int)(LaneOffset + M));
+ }
+}
+
void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask,
SmallVectorImpl<int> &ShuffleMask) {
for (int i = 0, e = RawMask.size(); i < e; ++i) {
Modified: llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h?rev=262784&r1=262783&r2=262784&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h (original)
+++ llvm/trunk/lib/Target/X86/Utils/X86ShuffleDecode.h Sat Mar 5 16:53:31 2016
@@ -115,6 +115,11 @@ void DecodeEXTRQIMask(int Len, int Idx,
void DecodeINSERTQIMask(int Len, int Idx,
SmallVectorImpl<int> &ShuffleMask);
+/// \brief Decode a VPERMILPD/VPERMILPS variable mask from a raw
+/// array of constants.
+void DecodeVPERMILPMask(MVT VT, ArrayRef<uint64_t> RawMask,
+ SmallVectorImpl<int> &ShuffleMask);
+
/// \brief Decode a VPERM W/D/Q/PS/PD mask from a raw array of constants.
void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask,
SmallVectorImpl<int> &ShuffleMask);
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=262784&r1=262783&r2=262784&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Mar 5 16:53:31 2016
@@ -4880,6 +4880,20 @@ static bool getTargetShuffleMaskIndices(
return false;
}
+ if (MaskNode.getOpcode() == X86ISD::VZEXT_MOVL &&
+ MaskNode.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ if (VT.getScalarSizeInBits() != MaskEltSizeInBits)
+ return false;
+ SDValue MaskElement = MaskNode.getOperand(0).getOperand(0);
+ if (auto *CN = dyn_cast<ConstantSDNode>(MaskElement)) {
+ APInt RawElt = CN->getAPIntValue().getLoBits(MaskEltSizeInBits);
+ RawMask.push_back(RawElt.getZExtValue());
+ RawMask.append(VT.getVectorNumElements() - 1, 0);
+ return true;
+ }
+ return false;
+ }
+
if (MaskNode.getOpcode() != ISD::BUILD_VECTOR)
return false;
@@ -5012,8 +5026,13 @@ static bool getTargetShuffleMask(SDNode
case X86ISD::VPERMILPV: {
IsUnary = true;
SDValue MaskNode = N->getOperand(1);
+ unsigned MaskEltSize = VT.getScalarSizeInBits();
+ SmallVector<uint64_t, 32> RawMask;
+ if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
+ DecodeVPERMILPMask(VT, RawMask, Mask);
+ break;
+ }
if (auto *C = getTargetShuffleMaskConstant(MaskNode)) {
- unsigned MaskEltSize = VT.getScalarSizeInBits();
DecodeVPERMILPMask(C, MaskEltSize, Mask);
break;
}
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx.ll?rev=262784&r1=262783&r2=262784&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx.ll Sat Mar 5 16:53:31 2016
@@ -40,10 +40,6 @@ define <8 x float> @combine_vpermilvar_8
define <2 x double> @combine_vpermilvar_2f64(<2 x double> %a0) {
; ALL-LABEL: combine_vpermilvar_2f64:
; ALL: # BB#0:
-; ALL-NEXT: movl $2, %eax
-; ALL-NEXT: vmovq %rax, %xmm1
-; ALL-NEXT: vpermilpd %xmm1, %xmm0, %xmm0
-; ALL-NEXT: vpermilpd %xmm1, %xmm0, %xmm0
; ALL-NEXT: retq
%1 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> <i64 2, i64 0>)
%2 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %1, <2 x i64> <i64 2, i64 0>)
More information about the llvm-commits
mailing list