[llvm] 057bdd6 - [X86][AVX] lowerShuffleWithVPMOV - minor refactor to more closely match lowerShuffleAsVTRUNC
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 19 06:39:38 PDT 2020
Author: Simon Pilgrim
Date: 2020-08-19T14:34:32+01:00
New Revision: 057bdd63a49a37924d1c88473d6c298caf2bcbec
URL: https://github.com/llvm/llvm-project/commit/057bdd63a49a37924d1c88473d6c298caf2bcbec
DIFF: https://github.com/llvm/llvm-project/commit/057bdd63a49a37924d1c88473d6c298caf2bcbec.diff
LOG: [X86][AVX] lowerShuffleWithVPMOV - minor refactor to more closely match lowerShuffleAsVTRUNC
Replace isBuildVectorAllZeros check by using the Zeroable bitmask instead.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 7ad56f03891b..32d8f3d96dae 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -11249,7 +11249,6 @@ static SDValue lowerShuffleWithUNPCK256(const SDLoc &DL, MVT VT,
// Check if the mask can be mapped to a TRUNCATE or VTRUNC, truncating the
// source into the lower elements and zeroing the upper elements.
-// TODO: Merge with matchShuffleAsVPMOV.
static bool matchShuffleAsVTRUNC(MVT &SrcVT, MVT &DstVT, MVT VT,
ArrayRef<int> Mask, const APInt &Zeroable,
const X86Subtarget &Subtarget) {
@@ -11333,21 +11332,6 @@ static SDValue getAVX512TruncNode(const SDLoc &DL, MVT DstVT, SDValue Src,
return Trunc;
}
-static bool matchShuffleAsVPMOV(ArrayRef<int> Mask, int Delta) {
- int Size = (int)Mask.size();
- int Split = Size / Delta;
-
- // Match for mask starting with e.g.: <8, 10, 12, 14,... or <0, 2, 4, 6,...
- if (!isSequentialOrUndefInRange(Mask, 0, Split, 0, Delta))
- return false;
-
- // The rest of the mask should not refer to the truncated vector's elements.
- if (isAnyInRange(Mask.slice(Split, Size - Split), 0, Size))
- return false;
-
- return true;
-}
-
// Try to lower trunc+vector_shuffle to a vpmovdb or a vpmovdw instruction.
//
// An example is the following:
@@ -11366,40 +11350,44 @@ static bool matchShuffleAsVPMOV(ArrayRef<int> Mask, int Delta) {
// TODO: Merge with lowerShuffleAsVTRUNC.
static SDValue lowerShuffleWithVPMOV(const SDLoc &DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
+ const APInt &Zeroable,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
assert((VT == MVT::v16i8 || VT == MVT::v8i16) && "Unexpected VTRUNC type");
-
- // TODO: Convert to use Zeroable bitmask.
- if (!ISD::isBuildVectorAllZeros(V2.getNode()))
+ if (!Subtarget.hasAVX512())
return SDValue();
- // Look for:
- //
- // bitcast (truncate <8 x i32> %vec to <8 x i16>) to <16 x i8>
- // bitcast (truncate <4 x i64> %vec to <4 x i32>) to <8 x i16>
- //
- // and similar ones.
- if (V1.getOpcode() != ISD::BITCAST)
- return SDValue();
- if (V1.getOperand(0).getOpcode() != ISD::TRUNCATE)
- return SDValue();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ unsigned MaxScale = 64 / EltSizeInBits;
+ for (unsigned Scale = 2; Scale <= MaxScale; Scale += Scale) {
+ unsigned NumSrcElts = NumElts / Scale;
+ unsigned UpperElts = NumElts - NumSrcElts;
+ if (!isSequentialOrUndefInRange(Mask, 0, NumSrcElts, 0, Scale) ||
+ !Zeroable.extractBits(UpperElts, NumSrcElts).isAllOnesValue())
+ continue;
- SDValue Src = V1.getOperand(0).getOperand(0);
- MVT SrcVT = Src.getSimpleValueType();
+ SDValue Src = V1;
+ if (!Src.hasOneUse())
+ return SDValue();
- // Down Convert Word to Byte is only available with avx512bw. The case with
- // 256-bit output doesn't contain a shuffle and is therefore not handled here.
- if (SrcVT.getVectorElementType() == MVT::i16 && VT == MVT::v16i8 &&
- !Subtarget.hasBWI())
- return SDValue();
+ Src = peekThroughOneUseBitcasts(Src);
+ if (Src.getOpcode() != ISD::TRUNCATE ||
+ Src.getScalarValueSizeInBits() != (EltSizeInBits * Scale))
+ return SDValue();
+ Src = Src.getOperand(0);
- // The first half/quarter of the mask should refer to every second/fourth
- // element of the vector truncated and bitcasted.
- if (!matchShuffleAsVPMOV(Mask, 2) && !matchShuffleAsVPMOV(Mask, 4))
- return SDValue();
+ // VPMOVWB is only available with avx512bw.
+ MVT SrcVT = Src.getSimpleValueType();
+ if (SrcVT.getVectorElementType() == MVT::i16 && VT == MVT::v16i8 &&
+ !Subtarget.hasBWI())
+ return SDValue();
- return getAVX512TruncNode(DL, VT, Src, Subtarget, DAG, true);
+ bool UndefUppers = isUndefInRange(Mask, NumSrcElts, UpperElts);
+ return getAVX512TruncNode(DL, VT, Src, Subtarget, DAG, !UndefUppers);
+ }
+
+ return SDValue();
}
// Attempt to match binary shuffle patterns as a truncate.
@@ -14806,8 +14794,8 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return ZExt;
// Try to use lower using a truncation.
- if (SDValue V =
- lowerShuffleWithVPMOV(DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG))
+ if (SDValue V = lowerShuffleWithVPMOV(DL, MVT::v8i16, V1, V2, Mask, Zeroable,
+ Subtarget, DAG))
return V;
int NumV2Inputs = count_if(Mask, [](int M) { return M >= 8; });
@@ -15018,8 +15006,8 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return ZExt;
// Try to use lower using a truncation.
- if (SDValue V =
- lowerShuffleWithVPMOV(DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
+ if (SDValue V = lowerShuffleWithVPMOV(DL, MVT::v16i8, V1, V2, Mask, Zeroable,
+ Subtarget, DAG))
return V;
if (SDValue V = lowerShuffleAsVTRUNC(DL, MVT::v16i8, V1, V2, Mask, Zeroable,
More information about the llvm-commits
mailing list