[llvm] r277814 - [X86][SSE] Consistently use the target shuffle root value type for vector size calculations. NFCI.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 5 06:02:53 PDT 2016
Author: rksimon
Date: Fri Aug 5 08:02:53 2016
New Revision: 277814
URL: http://llvm.org/viewvc/llvm-project?rev=277814&view=rev
Log:
[X86][SSE] Consistently use the target shuffle root value type for vector size calculations. NFCI.
Preparation for adding 2 input support so we want to avoid unnecessary references to the input value type.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=277814&r1=277813&r2=277814&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Aug 5 08:02:53 2016
@@ -25092,8 +25092,10 @@ static bool combineX86ShuffleChain(SDVal
MVT VT = Input.getSimpleValueType();
MVT RootVT = Root.getSimpleValueType();
- SDLoc DL(Root);
+ assert(VT.getSizeInBits() == RootVT.getSizeInBits() &&
+ "Vector size mismatch");
+ SDLoc DL(Root);
SDValue Res;
unsigned NumBaseMaskElts = BaseMask.size();
@@ -25106,6 +25108,8 @@ static bool combineX86ShuffleChain(SDVal
unsigned RootSizeInBits = RootVT.getSizeInBits();
unsigned BaseMaskEltSizeInBits = RootSizeInBits / NumBaseMaskElts;
+ bool FloatDomain = VT.isFloatingPoint() ||
+ (RootVT.is256BitVector() && !Subtarget.hasAVX2());
// Don't combine if we are a AVX512/EVEX target and the mask element size
// is different from the root element size - this would prevent writemasks
@@ -25122,12 +25126,11 @@ static bool combineX86ShuffleChain(SDVal
// TODO - handle 128/256-bit lane shuffles of 512-bit vectors.
// Handle 128-bit lane shuffles of 256-bit vectors.
- if (VT.is256BitVector() && NumBaseMaskElts == 2 &&
+ if (RootVT.is256BitVector() && NumBaseMaskElts == 2 &&
!isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0)) {
if (Depth == 1 && Root.getOpcode() == X86ISD::VPERM2X128)
return false; // Nothing to do!
- MVT ShuffleVT = (VT.isFloatingPoint() || !Subtarget.hasAVX2() ? MVT::v4f64
- : MVT::v4i64);
+ MVT ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64);
unsigned PermMask = 0;
PermMask |= ((BaseMask[0] < 0 ? 0x8 : (BaseMask[0] & 1)) << 0);
PermMask |= ((BaseMask[1] < 0 ? 0x8 : (BaseMask[1] & 1)) << 4);
@@ -25158,9 +25161,7 @@ static bool combineX86ShuffleChain(SDVal
unsigned MaskEltSizeInBits = RootSizeInBits / NumMaskElts;
// Determine the effective mask value type.
- bool FloatDomain =
- (VT.isFloatingPoint() || (VT.is256BitVector() && !Subtarget.hasAVX2())) &&
- (32 <= MaskEltSizeInBits);
+ FloatDomain &= (32 <= MaskEltSizeInBits);
MVT MaskVT = FloatDomain ? MVT::getFloatingPointVT(MaskEltSizeInBits)
: MVT::getIntegerVT(MaskEltSizeInBits);
MaskVT = MVT::getVectorVT(MaskVT, NumMaskElts);
@@ -25265,11 +25266,11 @@ static bool combineX86ShuffleChain(SDVal
// instructions, but in practice PSHUFB tends to be *very* fast so we're
// more aggressive.
if ((Depth >= 3 || HasVariableMask) &&
- ((VT.is128BitVector() && Subtarget.hasSSSE3()) ||
- (VT.is256BitVector() && Subtarget.hasAVX2()) ||
- (VT.is512BitVector() && Subtarget.hasBWI()))) {
+ ((RootVT.is128BitVector() && Subtarget.hasSSSE3()) ||
+ (RootVT.is256BitVector() && Subtarget.hasAVX2()) ||
+ (RootVT.is512BitVector() && Subtarget.hasBWI()))) {
SmallVector<SDValue, 16> PSHUFBMask;
- int NumBytes = VT.getSizeInBits() / 8;
+ int NumBytes = RootVT.getSizeInBits() / 8;
int Ratio = NumBytes / NumMaskElts;
for (int i = 0; i < NumBytes; ++i) {
int M = Mask[i / Ratio];
More information about the llvm-commits
mailing list