[llvm] r364924 - [X86][AVX] combineX86ShuffleChain - pull out CombineShuffleWithExtract lambda. NFCI.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 2 06:30:05 PDT 2019
Author: rksimon
Date: Tue Jul 2 06:30:04 2019
New Revision: 364924
URL: http://llvm.org/viewvc/llvm-project?rev=364924&view=rev
Log:
[X86][AVX] combineX86ShuffleChain - pull out CombineShuffleWithExtract lambda. NFCI.
Pull out CombineShuffleWithExtract lambda to new combineX86ShuffleChainWithExtract wrapper and refactored it to handle more than 2 shuffle inputs - this will allow combineX86ShufflesRecursively to call this in a future patch.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=364924&r1=364923&r2=364924&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Jul 2 06:30:04 2019
@@ -31810,6 +31810,11 @@ static bool matchBinaryPermuteShuffle(
return false;
}
+static SDValue combineX86ShuffleChainWithExtract(
+ ArrayRef<SDValue> Inputs, SDValue Root, ArrayRef<int> BaseMask, int Depth,
+ bool HasVariableMask, bool AllowVariableMask, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget);
+
/// Combine an arbitrary chain of shuffles into a single instruction if
/// possible.
///
@@ -32083,87 +32088,6 @@ static SDValue combineX86ShuffleChain(Ar
bool MaskContainsZeros =
any_of(Mask, [](int M) { return M == SM_SentinelZero; });
- // Unwrap shuffle(extract_subvector(x,c1),extract_subvector(y,c2),m1) ->
- // shuffle(x,y,m2)
- auto CombineShuffleWithExtract =
- [&](SDValue &NewRoot, SmallVectorImpl<int> &NewMask,
- SmallVectorImpl<SDValue> &NewInputs) -> bool {
- assert(NewMask.empty() && NewInputs.empty() && "Non-empty shuffle mask");
- if (UnaryShuffle)
- return false;
-
- SDValue Src1 = V1, Src2 = V2;
- unsigned Offset1 = 0, Offset2 = 0;
- while (Src1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
- isa<ConstantSDNode>(Src1.getOperand(1))) {
- Offset1 += Src1.getConstantOperandVal(1);
- Src1 = Src1.getOperand(0);
- }
- while (Src2.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
- isa<ConstantSDNode>(Src2.getOperand(1))) {
- Offset2 += Src2.getConstantOperandVal(1);
- Src2 = Src2.getOperand(0);
- }
- if (Offset1 == 0 && Offset2 == 0)
- return false;
-
- // If the src vector types aren't the same, see if we can extend
- // one to match the other.
- if ((Src1.getValueType().getScalarType() !=
- Src2.getValueType().getScalarType()) ||
- !DAG.getTargetLoweringInfo().isTypeLegal(Src1.getValueType()) ||
- !DAG.getTargetLoweringInfo().isTypeLegal(Src2.getValueType()))
- return false;
-
- unsigned Src1SizeInBits = Src1.getValueSizeInBits();
- unsigned Src2SizeInBits = Src2.getValueSizeInBits();
- assert(((Src1SizeInBits % Src2SizeInBits) == 0 ||
- (Src2SizeInBits % Src1SizeInBits) == 0) &&
- "Shuffle vector size mismatch");
- if (Src1SizeInBits != Src2SizeInBits) {
- if (Src1SizeInBits > Src2SizeInBits) {
- Src2 = widenSubVector(Src2, false, Subtarget, DAG, DL, Src1SizeInBits);
- Src2SizeInBits = Src1SizeInBits;
- } else {
- Src1 = widenSubVector(Src1, false, Subtarget, DAG, DL, Src2SizeInBits);
- Src1SizeInBits = Src2SizeInBits;
- }
- }
-
- assert(((Offset1 % VT1.getVectorNumElements()) == 0 &&
- (Offset2 % VT2.getVectorNumElements()) == 0 &&
- (Src1SizeInBits % RootSizeInBits) == 0 &&
- Src1SizeInBits == Src2SizeInBits) &&
- "Unexpected subvector extraction");
- unsigned Scale = Src1SizeInBits / RootSizeInBits;
-
- // Convert extraction indices to mask size.
- Offset1 /= VT1.getVectorNumElements();
- Offset2 /= VT2.getVectorNumElements();
- Offset1 *= NumMaskElts;
- Offset2 *= NumMaskElts;
-
- NewInputs.push_back(Src1);
- if (Src1 != Src2) {
- NewInputs.push_back(Src2);
- Offset2 += Scale * NumMaskElts;
- }
-
- // Create new mask for larger type.
- NewMask.append(Mask.begin(), Mask.end());
- for (int &M : NewMask) {
- if (M < 0)
- continue;
- if (M < (int)NumMaskElts)
- M += Offset1;
- else
- M = (M - NumMaskElts) + Offset2;
- }
- NewMask.append((Scale - 1) * NumMaskElts, SM_SentinelUndef);
- NewRoot = Src1;
- return true;
- };
-
if (is128BitLaneCrossingShuffleMask(MaskVT, Mask)) {
// If we have a single input lane-crossing shuffle then lower to VPERMV.
if (UnaryShuffle && AllowVariableMask && !MaskContainsZeros &&
@@ -32209,18 +32133,10 @@ static SDValue combineX86ShuffleChain(Ar
// If that failed and either input is extracted then try to combine as a
// shuffle with the larger type.
- SDValue NewRoot;
- SmallVector<int, 64> NewMask;
- SmallVector<SDValue, 2> NewInputs;
- if (CombineShuffleWithExtract(NewRoot, NewMask, NewInputs)) {
- if (SDValue Res = combineX86ShuffleChain(
- NewInputs, NewRoot, NewMask, Depth, HasVariableMask,
- AllowVariableMask, DAG, Subtarget)) {
- Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT1, Res,
- DAG.getIntPtrConstant(0, DL));
- return DAG.getBitcast(RootVT, Res);
- }
- }
+ if (SDValue WideShuffle = combineX86ShuffleChainWithExtract(
+ Inputs, Root, BaseMask, Depth, HasVariableMask, AllowVariableMask,
+ DAG, Subtarget))
+ return WideShuffle;
// If we have a dual input lane-crossing shuffle then lower to VPERMV3.
if (AllowVariableMask && !MaskContainsZeros &&
@@ -32389,18 +32305,10 @@ static SDValue combineX86ShuffleChain(Ar
// If that failed and either input is extracted then try to combine as a
// shuffle with the larger type.
- SDValue NewRoot;
- SmallVector<int, 64> NewMask;
- SmallVector<SDValue, 2> NewInputs;
- if (CombineShuffleWithExtract(NewRoot, NewMask, NewInputs)) {
- if (SDValue Res = combineX86ShuffleChain(NewInputs, NewRoot, NewMask, Depth,
- HasVariableMask, AllowVariableMask,
- DAG, Subtarget)) {
- Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT1, Res,
- DAG.getIntPtrConstant(0, DL));
- return DAG.getBitcast(RootVT, Res);
- }
- }
+ if (SDValue WideShuffle = combineX86ShuffleChainWithExtract(
+ Inputs, Root, BaseMask, Depth, HasVariableMask, AllowVariableMask,
+ DAG, Subtarget))
+ return WideShuffle;
// If we have a dual input shuffle then lower to VPERMV3.
if (!UnaryShuffle && AllowVariableMask && !MaskContainsZeros &&
@@ -32428,6 +32336,109 @@ static SDValue combineX86ShuffleChain(Ar
return SDValue();
}
+// Combine an arbitrary chain of shuffles + extract_subvectors into a single
+// instruction if possible.
+//
+// Wrapper for combineX86ShuffleChain that extends the shuffle mask to a larger
+// type size to attempt to combine:
+// shuffle(extract_subvector(x,c1),extract_subvector(y,c2),m1)
+// -->
+// extract_subvector(shuffle(x,y,m2),0)
+static SDValue combineX86ShuffleChainWithExtract(
+ ArrayRef<SDValue> Inputs, SDValue Root, ArrayRef<int> BaseMask, int Depth,
+ bool HasVariableMask, bool AllowVariableMask, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ unsigned NumMaskElts = BaseMask.size();
+ unsigned NumInputs = Inputs.size();
+ if (NumInputs == 0)
+ return SDValue();
+
+ SmallVector<SDValue, 4> WideInputs(Inputs.begin(), Inputs.end());
+ SmallVector<unsigned, 4> Offsets(NumInputs, 0);
+
+ // Peek through subvectors.
+ // TODO: Support inter-mixed EXTRACT_SUBVECTORs + BITCASTs?
+ unsigned WideSizeInBits = WideInputs[0].getValueSizeInBits();
+ for (unsigned i = 0; i != NumInputs; ++i) {
+ SDValue &Src = WideInputs[i];
+ unsigned &Offset = Offsets[i];
+ Src = peekThroughBitcasts(Src);
+ EVT BaseVT = Src.getValueType();
+ while (Src.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ isa<ConstantSDNode>(Src.getOperand(1))) {
+ Offset += Src.getConstantOperandVal(1);
+ Src = Src.getOperand(0);
+ }
+ WideSizeInBits = std::max(WideSizeInBits, Src.getValueSizeInBits());
+ assert((Offset % BaseVT.getVectorNumElements()) == 0 &&
+ "Unexpected subvector extraction");
+ Offset /= BaseVT.getVectorNumElements();
+ Offset *= NumMaskElts;
+ }
+
+ // Bail if we're always extracting from the lowest subvectors,
+ // combineX86ShuffleChain should match this for the current width.
+ if (llvm::all_of(Offsets, [](unsigned Offset) { return Offset == 0; }))
+ return SDValue();
+
+ EVT RootVT = Root.getValueType();
+ unsigned RootSizeInBits = RootVT.getSizeInBits();
+ unsigned Scale = WideSizeInBits / RootSizeInBits;
+ assert((WideSizeInBits % RootSizeInBits) == 0 &&
+ "Unexpected subvector extraction");
+
+ // If the src vector types aren't the same, see if we can extend
+ // them to match each other.
+ // TODO: Support different scalar types?
+ EVT WideSVT = WideInputs[0].getValueType().getScalarType();
+ if (llvm::any_of(WideInputs, [&WideSVT, &DAG](SDValue Op) {
+ return !DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()) ||
+ Op.getValueType().getScalarType() != WideSVT;
+ }))
+ return SDValue();
+
+ for (SDValue &NewInput : WideInputs) {
+ assert((WideSizeInBits % NewInput.getValueSizeInBits()) == 0 &&
+ "Shuffle vector size mismatch");
+ if (WideSizeInBits > NewInput.getValueSizeInBits())
+ NewInput = widenSubVector(NewInput, false, Subtarget, DAG,
+ SDLoc(NewInput), WideSizeInBits);
+ assert(WideSizeInBits == NewInput.getValueSizeInBits() &&
+ "Unexpected subvector extraction");
+ }
+
+ // Create new mask for larger type.
+ for (unsigned i = 1; i != NumInputs; ++i)
+ Offsets[i] += i * Scale * NumMaskElts;
+
+ SmallVector<int, 64> WideMask(BaseMask.begin(), BaseMask.end());
+ for (int &M : WideMask) {
+ if (M < 0)
+ continue;
+ M = (M % NumMaskElts) + Offsets[M / NumMaskElts];
+ }
+ WideMask.append((Scale - 1) * NumMaskElts, SM_SentinelUndef);
+
+ // Remove unused/repeated shuffle source ops.
+ resolveTargetShuffleInputsAndMask(WideInputs, WideMask);
+ assert(!WideInputs.empty() && "Shuffle with no inputs detected");
+
+ if (WideInputs.size() > 2)
+ return SDValue();
+
+ // Attempt to combine wider chain.
+ // TODO: Can we use a better Root?
+ SDValue WideRoot = WideInputs[0];
+ if (SDValue WideShuffle = combineX86ShuffleChain(
+ WideInputs, WideRoot, WideMask, Depth, HasVariableMask,
+ AllowVariableMask, DAG, Subtarget)) {
+ WideShuffle =
+ extractSubVector(WideShuffle, 0, DAG, SDLoc(Root), RootSizeInBits);
+ return DAG.getBitcast(RootVT, WideShuffle);
+ }
+ return SDValue();
+}
+
// Attempt to constant fold all of the constant source ops.
// Returns true if the entire shuffle is folded to a constant.
// TODO: Extend this to merge multiple constant Ops and update the mask.
More information about the llvm-commits
mailing list