[llvm] r295604 - [X86][SSE] Add domain crossing support for target shuffle combines.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 19 06:12:26 PST 2017
Author: rksimon
Date: Sun Feb 19 08:12:25 2017
New Revision: 295604
URL: http://llvm.org/viewvc/llvm-project?rev=295604&view=rev
Log:
[X86][SSE] Add domain crossing support for target shuffle combines.
Add the infrastructure to flag whether float and/or int domains are permitable.
A future patch will enable domain crossing based off shuffle depth and the value types of the source vectors.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=295604&r1=295603&r2=295604&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Feb 19 08:12:25 2017
@@ -26369,8 +26369,8 @@ bool X86TargetLowering::isGAPlusOffset(S
// instructions.
// TODO: Investigate sharing more of this with shuffle lowering.
static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
- bool FloatDomain, SDValue &V1, SDLoc &DL,
- SelectionDAG &DAG,
+ bool AllowFloatDomain, bool AllowIntDomain,
+ SDValue &V1, SDLoc &DL, SelectionDAG &DAG,
const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &SrcVT, MVT &DstVT) {
unsigned NumMaskElts = Mask.size();
@@ -26387,8 +26387,8 @@ static bool matchUnaryVectorShuffle(MVT
// Match against a VZEXT instruction.
// TODO: Add 512-bit vector support (split AVX512F and AVX512BW).
- if (!FloatDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE41()) ||
- (MaskVT.is256BitVector() && Subtarget.hasInt256()))) {
+ if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE41()) ||
+ (MaskVT.is256BitVector() && Subtarget.hasInt256()))) {
unsigned MaxScale = 64 / MaskEltSize;
for (unsigned Scale = 2; Scale <= MaxScale; Scale *= 2) {
bool Match = true;
@@ -26413,7 +26413,7 @@ static bool matchUnaryVectorShuffle(MVT
// Check if we have SSE3 which will let us use MOVDDUP etc. The
// instructions are no slower than UNPCKLPD but has the option to
// fold the input operand into even an unaligned memory load.
- if (MaskVT.is128BitVector() && Subtarget.hasSSE3() && FloatDomain) {
+ if (MaskVT.is128BitVector() && Subtarget.hasSSE3() && AllowFloatDomain) {
if (isTargetShuffleEquivalent(Mask, {0, 0})) {
Shuffle = X86ISD::MOVDDUP;
SrcVT = DstVT = MVT::v2f64;
@@ -26431,7 +26431,7 @@ static bool matchUnaryVectorShuffle(MVT
}
}
- if (MaskVT.is256BitVector() && FloatDomain) {
+ if (MaskVT.is256BitVector() && AllowFloatDomain) {
assert(Subtarget.hasAVX() && "AVX required for 256-bit vector shuffles");
if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2})) {
Shuffle = X86ISD::MOVDDUP;
@@ -26450,7 +26450,7 @@ static bool matchUnaryVectorShuffle(MVT
}
}
- if (MaskVT.is512BitVector() && FloatDomain) {
+ if (MaskVT.is512BitVector() && AllowFloatDomain) {
assert(Subtarget.hasAVX512() &&
"AVX512 required for 512-bit vector shuffles");
if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2, 4, 4, 6, 6})) {
@@ -26489,7 +26489,8 @@ static bool matchUnaryVectorShuffle(MVT
// permute instructions.
// TODO: Investigate sharing more of this with shuffle lowering.
static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
- bool FloatDomain,
+ bool AllowFloatDomain,
+ bool AllowIntDomain,
const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &ShuffleVT,
unsigned &PermuteImm) {
@@ -26505,8 +26506,8 @@ static bool matchUnaryPermuteVectorShuff
// Attempt to match against byte/bit shifts.
// FIXME: Add 512-bit support.
- if (!FloatDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) ||
- (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) {
+ if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) ||
+ (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) {
int ShiftAmt = matchVectorShuffleAsShift(ShuffleVT, Shuffle,
MaskVT.getScalarSizeInBits(), Mask,
0, Zeroable, Subtarget);
@@ -26569,19 +26570,21 @@ static bool matchUnaryPermuteVectorShuff
// AVX introduced the VPERMILPD/VPERMILPS float permutes, before then we
// had to use 2-input SHUFPD/SHUFPS shuffles (not handled here).
- if (FloatDomain && !Subtarget.hasAVX())
+ if ((AllowFloatDomain && !AllowIntDomain) && !Subtarget.hasAVX())
return false;
// Pre-AVX2 we must use float shuffles on 256-bit vectors.
- if (MaskVT.is256BitVector() && !Subtarget.hasAVX2())
- FloatDomain = true;
+ if (MaskVT.is256BitVector() && !Subtarget.hasAVX2()) {
+ AllowFloatDomain = true;
+ AllowIntDomain = false;
+ }
// Check for lane crossing permutes.
if (is128BitLaneCrossingShuffleMask(MaskEltVT, Mask)) {
// PERMPD/PERMQ permutes within a 256-bit vector (AVX2+).
if (Subtarget.hasAVX2() && MaskVT.is256BitVector() && Mask.size() == 4) {
Shuffle = X86ISD::VPERMI;
- ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64);
+ ShuffleVT = (AllowFloatDomain ? MVT::v4f64 : MVT::v4i64);
PermuteImm = getV4X86ShuffleImm(Mask);
return true;
}
@@ -26589,7 +26592,7 @@ static bool matchUnaryPermuteVectorShuff
SmallVector<int, 4> RepeatedMask;
if (is256BitLaneRepeatedShuffleMask(MVT::v8f64, Mask, RepeatedMask)) {
Shuffle = X86ISD::VPERMI;
- ShuffleVT = (FloatDomain ? MVT::v8f64 : MVT::v8i64);
+ ShuffleVT = (AllowFloatDomain ? MVT::v8f64 : MVT::v8i64);
PermuteImm = getV4X86ShuffleImm(RepeatedMask);
return true;
}
@@ -26598,7 +26601,7 @@ static bool matchUnaryPermuteVectorShuff
}
// VPERMILPD can permute with a non-repeating shuffle.
- if (FloatDomain && MaskScalarSizeInBits == 64) {
+ if (AllowFloatDomain && MaskScalarSizeInBits == 64) {
Shuffle = X86ISD::VPERMILPI;
ShuffleVT = MVT::getVectorVT(MVT::f64, Mask.size());
PermuteImm = 0;
@@ -26622,8 +26625,8 @@ static bool matchUnaryPermuteVectorShuff
if (MaskScalarSizeInBits == 64)
scaleShuffleMask(2, RepeatedMask, WordMask);
- Shuffle = (FloatDomain ? X86ISD::VPERMILPI : X86ISD::PSHUFD);
- ShuffleVT = (FloatDomain ? MVT::f32 : MVT::i32);
+ Shuffle = (AllowFloatDomain ? X86ISD::VPERMILPI : X86ISD::PSHUFD);
+ ShuffleVT = (AllowFloatDomain ? MVT::f32 : MVT::i32);
ShuffleVT = MVT::getVectorVT(ShuffleVT, InputSizeInBits / 32);
PermuteImm = getV4X86ShuffleImm(WordMask);
return true;
@@ -26633,35 +26636,36 @@ static bool matchUnaryPermuteVectorShuff
// shuffle instructions.
// TODO: Investigate sharing more of this with shuffle lowering.
static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
- bool FloatDomain, SDValue &V1, SDValue &V2,
- SDLoc &DL, SelectionDAG &DAG,
+ bool AllowFloatDomain, bool AllowIntDomain,
+ SDValue &V1, SDValue &V2, SDLoc &DL,
+ SelectionDAG &DAG,
const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &ShuffleVT,
bool IsUnary) {
unsigned EltSizeInBits = MaskVT.getScalarSizeInBits();
if (MaskVT.is128BitVector()) {
- if (isTargetShuffleEquivalent(Mask, {0, 0}) && FloatDomain) {
+ if (isTargetShuffleEquivalent(Mask, {0, 0}) && AllowFloatDomain) {
V2 = V1;
Shuffle = X86ISD::MOVLHPS;
ShuffleVT = MVT::v4f32;
return true;
}
- if (isTargetShuffleEquivalent(Mask, {1, 1}) && FloatDomain) {
+ if (isTargetShuffleEquivalent(Mask, {1, 1}) && AllowFloatDomain) {
V2 = V1;
Shuffle = X86ISD::MOVHLPS;
ShuffleVT = MVT::v4f32;
return true;
}
if (isTargetShuffleEquivalent(Mask, {0, 3}) && Subtarget.hasSSE2() &&
- (FloatDomain || !Subtarget.hasSSE41())) {
+ (AllowFloatDomain || !Subtarget.hasSSE41())) {
std::swap(V1, V2);
Shuffle = X86ISD::MOVSD;
ShuffleVT = MaskVT;
return true;
}
if (isTargetShuffleEquivalent(Mask, {4, 1, 2, 3}) &&
- (FloatDomain || !Subtarget.hasSSE41())) {
+ (AllowFloatDomain || !Subtarget.hasSSE41())) {
Shuffle = X86ISD::MOVSS;
ShuffleVT = MaskVT;
return true;
@@ -26687,17 +26691,17 @@ static bool matchBinaryVectorShuffle(MVT
}
static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
- bool FloatDomain,
- SDValue &V1, SDValue &V2,
- SDLoc &DL, SelectionDAG &DAG,
+ bool AllowIntDomain, SDValue &V1,
+ SDValue &V2, SDLoc &DL,
+ SelectionDAG &DAG,
const X86Subtarget &Subtarget,
unsigned &Shuffle, MVT &ShuffleVT,
unsigned &PermuteImm) {
unsigned NumMaskElts = Mask.size();
// Attempt to match against PALIGNR byte rotate.
- if (!FloatDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSSE3()) ||
- (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) {
+ if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSSE3()) ||
+ (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) {
int ByteRotation = matchVectorShuffleAsByteRotate(MaskVT, V1, V2, Mask);
if (0 < ByteRotation) {
Shuffle = X86ISD::PALIGNR;
@@ -26958,6 +26962,11 @@ static bool combineX86ShuffleChain(Array
MVT ShuffleSrcVT, ShuffleVT;
unsigned Shuffle, PermuteImm;
+ // Which shuffle domains are permitted?
+ // TODO - Allow either domain after a threshold depth.
+ bool AllowFloatDomain = FloatDomain;
+ bool AllowIntDomain = !FloatDomain;
+
if (UnaryShuffle) {
// If we are shuffling a X86ISD::VZEXT_LOAD then we can use the load
// directly if we don't shuffle the lower element and we shuffle the upper
@@ -26974,8 +26983,9 @@ static bool combineX86ShuffleChain(Array
}
}
- if (matchUnaryVectorShuffle(MaskVT, Mask, FloatDomain, V1, DL, DAG,
- Subtarget, Shuffle, ShuffleSrcVT, ShuffleVT)) {
+ if (matchUnaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
+ V1, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
+ ShuffleVT)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
@@ -26989,8 +26999,9 @@ static bool combineX86ShuffleChain(Array
return true;
}
- if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, FloatDomain, Subtarget,
- Shuffle, ShuffleVT, PermuteImm)) {
+ if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, AllowFloatDomain,
+ AllowIntDomain, Subtarget, Shuffle,
+ ShuffleVT, PermuteImm)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
@@ -27006,8 +27017,9 @@ static bool combineX86ShuffleChain(Array
}
}
- if (matchBinaryVectorShuffle(MaskVT, Mask, FloatDomain, V1, V2, DL, DAG,
- Subtarget, Shuffle, ShuffleVT, UnaryShuffle)) {
+ if (matchBinaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
+ V1, V2, DL, DAG, Subtarget, Shuffle, ShuffleVT,
+ UnaryShuffle)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
@@ -27023,7 +27035,7 @@ static bool combineX86ShuffleChain(Array
return true;
}
- if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, FloatDomain, V1, V2, DL,
+ if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, AllowIntDomain, V1, V2, DL,
DAG, Subtarget, Shuffle, ShuffleVT,
PermuteImm)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
More information about the llvm-commits
mailing list