[llvm] 35c0848 - [NFC][X86] combineX86ShuffleChain(): hoist Mask variable higher up
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 4 07:15:41 PDT 2021
Author: Roman Lebedev
Date: 2021-08-04T17:15:12+03:00
New Revision: 35c0848b570214ed2b2d96cca4dd62bb7ae725cd
URL: https://github.com/llvm/llvm-project/commit/35c0848b570214ed2b2d96cca4dd62bb7ae725cd
DIFF: https://github.com/llvm/llvm-project/commit/35c0848b570214ed2b2d96cca4dd62bb7ae725cd.diff
LOG: [NFC][X86] combineX86ShuffleChain(): hoist Mask variable higher up
Having `NewMask` outside of an if and rebinding `BaseMask` `ArrayRef`
to it is confusing. Instead, just move the `Mask` vector higher up,
and change the code that earlier had no access to it but now does
to use `Mask` instead of `BaseMask`.
This has no other intentional changes.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a8355c986909..2689212795ec 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -35836,14 +35836,14 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
return CanonicalizeShuffleInput(RootVT, V1);
}
+ SmallVector<int, 64> Mask(BaseMask.begin(), BaseMask.end());
+
// Adjust mask elements that pick from a splat input to be identity mask elts,
// i.e. to pick from the same lane of the input as the mask element is in.
// This may allow to simplify the shuffle into a blend.
- SmallVector<int> NewMask;
if (InputIsSplat[0] || InputIsSplat[1]) {
- NewMask.assign(BaseMask.begin(), BaseMask.end());
for (unsigned i = 0; i != NumBaseMaskElts; ++i) {
- int &M = NewMask[i];
+ int &M = Mask[i];
assert(isUndefOrZeroOrInRange(M, 0, 2 * NumBaseMaskElts) &&
"OOB mask element?");
if (M < 0)
@@ -35853,7 +35853,6 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
if (InputIsSplat[InputIdx] && i < InputNumElts[InputIdx])
M = i + InputIdx * NumBaseMaskElts; // Pick from the same lane of input.
}
- BaseMask = std::move(NewMask);
}
// See if the shuffle is a hidden identity shuffle - repeated args in HOPs
@@ -35861,8 +35860,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
if (VT1 == VT2 && VT1.getSizeInBits() == RootSizeInBits) {
SmallVector<int> ScaledMask, IdentityMask;
unsigned NumElts = VT1.getVectorNumElements();
- if (BaseMask.size() <= NumElts &&
- scaleShuffleElements(BaseMask, NumElts, ScaledMask)) {
+ if (Mask.size() <= NumElts &&
+ scaleShuffleElements(Mask, NumElts, ScaledMask)) {
for (unsigned i = 0; i != NumElts; ++i)
IdentityMask.push_back(i);
if (isTargetShuffleEquivalent(RootVT, ScaledMask, IdentityMask, V1, V2))
@@ -35876,14 +35875,14 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// If the upper subvectors are zeroable, then an extract+insert is more
// optimal than using X86ISD::SHUF128. The insertion is free, even if it has
// to zero the upper subvectors.
- if (isUndefOrZeroInRange(BaseMask, 1, NumBaseMaskElts - 1)) {
+ if (isUndefOrZeroInRange(Mask, 1, NumBaseMaskElts - 1)) {
if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR)
return SDValue(); // Nothing to do!
- assert(isInRange(BaseMask[0], 0, NumBaseMaskElts) &&
+ assert(isInRange(Mask[0], 0, NumBaseMaskElts) &&
"Unexpected lane shuffle");
Res = CanonicalizeShuffleInput(RootVT, V1);
- unsigned SubIdx = BaseMask[0] * (NumRootElts / NumBaseMaskElts);
- bool UseZero = isAnyZero(BaseMask);
+ unsigned SubIdx = Mask[0] * (NumRootElts / NumBaseMaskElts);
+ bool UseZero = isAnyZero(Mask);
Res = extractSubVector(Res, SubIdx, DAG, DL, BaseMaskEltSizeInBits);
return widenSubVector(Res, UseZero, Subtarget, DAG, DL, RootSizeInBits);
}
@@ -35891,7 +35890,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// Narrow shuffle mask to v4x128.
SmallVector<int, 4> ScaledMask;
assert((BaseMaskEltSizeInBits % 128) == 0 && "Illegal mask size");
- narrowShuffleMaskElts(BaseMaskEltSizeInBits / 128, BaseMask, ScaledMask);
+ narrowShuffleMaskElts(BaseMaskEltSizeInBits / 128, Mask, ScaledMask);
// Try to lower to vshuf64x2/vshuf32x4.
auto MatchSHUF128 = [&](MVT ShuffleVT, const SDLoc &DL,
@@ -35950,20 +35949,20 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// If the upper half is zeroable, then an extract+insert is more optimal
// than using X86ISD::VPERM2X128. The insertion is free, even if it has to
// zero the upper half.
- if (isUndefOrZero(BaseMask[1])) {
+ if (isUndefOrZero(Mask[1])) {
if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR)
return SDValue(); // Nothing to do!
- assert(isInRange(BaseMask[0], 0, 2) && "Unexpected lane shuffle");
+ assert(isInRange(Mask[0], 0, 2) && "Unexpected lane shuffle");
Res = CanonicalizeShuffleInput(RootVT, V1);
- Res = extract128BitVector(Res, BaseMask[0] * (NumRootElts / 2), DAG, DL);
- return widenSubVector(Res, BaseMask[1] == SM_SentinelZero, Subtarget, DAG,
- DL, 256);
+ Res = extract128BitVector(Res, Mask[0] * (NumRootElts / 2), DAG, DL);
+ return widenSubVector(Res, Mask[1] == SM_SentinelZero, Subtarget, DAG, DL,
+ 256);
}
// If we're splatting the low subvector, an insert-subvector 'concat'
// pattern is quicker than VPERM2X128.
// TODO: Add AVX2 support instead of VPERMQ/VPERMPD.
- if (BaseMask[0] == 0 && BaseMask[1] == 0 && !Subtarget.hasAVX2()) {
+ if (Mask[0] == 0 && Mask[1] == 0 && !Subtarget.hasAVX2()) {
if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR)
return SDValue(); // Nothing to do!
Res = CanonicalizeShuffleInput(RootVT, V1);
@@ -35978,11 +35977,11 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// we need to use the zeroing feature.
// Prefer blends for sequential shuffles unless we are optimizing for size.
if (UnaryShuffle &&
- !(Subtarget.hasAVX2() && isUndefOrInRange(BaseMask, 0, 2)) &&
- (OptForSize || !isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0))) {
+ !(Subtarget.hasAVX2() && isUndefOrInRange(Mask, 0, 2)) &&
+ (OptForSize || !isSequentialOrUndefOrZeroInRange(Mask, 0, 2, 0))) {
unsigned PermMask = 0;
- PermMask |= ((BaseMask[0] < 0 ? 0x8 : (BaseMask[0] & 1)) << 0);
- PermMask |= ((BaseMask[1] < 0 ? 0x8 : (BaseMask[1] & 1)) << 4);
+ PermMask |= ((Mask[0] < 0 ? 0x8 : (Mask[0] & 1)) << 0);
+ PermMask |= ((Mask[1] < 0 ? 0x8 : (Mask[1] & 1)) << 4);
return DAG.getNode(
X86ISD::VPERM2X128, DL, RootVT, CanonicalizeShuffleInput(RootVT, V1),
DAG.getUNDEF(RootVT), DAG.getTargetConstant(PermMask, DL, MVT::i8));
@@ -35993,16 +35992,15 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// TODO - handle AVX512VL cases with X86ISD::SHUF128.
if (!UnaryShuffle && !IsMaskedShuffle) {
- assert(llvm::all_of(BaseMask, [](int M) { return 0 <= M && M < 4; }) &&
+ assert(llvm::all_of(Mask, [](int M) { return 0 <= M && M < 4; }) &&
"Unexpected shuffle sentinel value");
// Prefer blends to X86ISD::VPERM2X128.
- if (!((BaseMask[0] == 0 && BaseMask[1] == 3) ||
- (BaseMask[0] == 2 && BaseMask[1] == 1))) {
+ if (!((Mask[0] == 0 && Mask[1] == 3) || (Mask[0] == 2 && Mask[1] == 1))) {
unsigned PermMask = 0;
- PermMask |= ((BaseMask[0] & 3) << 0);
- PermMask |= ((BaseMask[1] & 3) << 4);
- SDValue LHS = isInRange(BaseMask[0], 0, 2) ? V1 : V2;
- SDValue RHS = isInRange(BaseMask[1], 0, 2) ? V1 : V2;
+ PermMask |= ((Mask[0] & 3) << 0);
+ PermMask |= ((Mask[1] & 3) << 4);
+ SDValue LHS = isInRange(Mask[0], 0, 2) ? V1 : V2;
+ SDValue RHS = isInRange(Mask[1], 0, 2) ? V1 : V2;
return DAG.getNode(X86ISD::VPERM2X128, DL, RootVT,
CanonicalizeShuffleInput(RootVT, LHS),
CanonicalizeShuffleInput(RootVT, RHS),
@@ -36013,13 +36011,12 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// For masks that have been widened to 128-bit elements or more,
// narrow back down to 64-bit elements.
- SmallVector<int, 64> Mask;
if (BaseMaskEltSizeInBits > 64) {
assert((BaseMaskEltSizeInBits % 64) == 0 && "Illegal mask size");
int MaskScale = BaseMaskEltSizeInBits / 64;
- narrowShuffleMaskElts(MaskScale, BaseMask, Mask);
- } else {
- Mask.assign(BaseMask.begin(), BaseMask.end());
+ SmallVector<int, 64> ScaledMask;
+ narrowShuffleMaskElts(MaskScale, Mask, ScaledMask);
+ Mask = std::move(ScaledMask);
}
// For masked shuffles, we're trying to match the root width for better
More information about the llvm-commits
mailing list