[llvm] 800fb68 - [X86][SSE] Pull out PACK(SHUFFLE(), SHUFFLE()) folds into its own function. NFC.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 8 09:47:20 PDT 2020
Author: Simon Pilgrim
Date: 2020-07-08T17:42:42+01:00
New Revision: 800fb68420681d14f7690b76421c57ff474349a1
URL: https://github.com/llvm/llvm-project/commit/800fb68420681d14f7690b76421c57ff474349a1
DIFF: https://github.com/llvm/llvm-project/commit/800fb68420681d14f7690b76421c57ff474349a1.diff
LOG: [X86][SSE] Pull out PACK(SHUFFLE(),SHUFFLE()) folds into its own function. NFC.
Future patches will extend this so declutter combineVectorPack before we start.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index cd52684fc263..017bfba94b61 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -41882,6 +41882,50 @@ static SDValue combineShiftRightLogical(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue combineVectorPackWithShuffle(SDNode *N, SelectionDAG &DAG) {
+ unsigned Opcode = N->getOpcode();
+ assert((X86ISD::PACKSS == Opcode || X86ISD::PACKUS == Opcode) &&
+ "Unexpected pack opcode");
+
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ unsigned NumDstElts = VT.getVectorNumElements();
+
+ // Attempt to fold PACK(LOSUBVECTOR(SHUFFLE(X)),HISUBVECTOR(SHUFFLE(X)))
+ // to SHUFFLE(PACK(LOSUBVECTOR(X),HISUBVECTOR(X))), this is mainly for
+ // truncation trees that help us avoid lane crossing shuffles.
+ // TODO: There's a lot more we can do for PACK/HADD style shuffle combines.
+ if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N0.getConstantOperandAPInt(1) == 0 &&
+ N1.getConstantOperandAPInt(1) == (NumDstElts / 2) &&
+ N0.getOperand(0) == N1.getOperand(0) && VT.is128BitVector() &&
+ N0.getOperand(0).getValueType().is256BitVector()) {
+ // TODO - support target/faux shuffles.
+ SDValue Vec = peekThroughBitcasts(N0.getOperand(0));
+ if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec)) {
+ // To keep the PACK LHS/RHS coherency, we must be able to scale the unary
+ // shuffle to a vXi64 width - we can probably relax this in the future.
+ SmallVector<int, 4> ShuffleMask;
+ if (SVN->getOperand(1).isUndef() &&
+ scaleShuffleElements(SVN->getMask(), 4, ShuffleMask)) {
+ SDLoc DL(N);
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitVector(SVN->getOperand(0), DL);
+ Lo = DAG.getBitcast(N0.getValueType(), Lo);
+ Hi = DAG.getBitcast(N1.getValueType(), Hi);
+ SDValue Res = DAG.getNode(Opcode, DL, VT, Lo, Hi);
+ Res = DAG.getBitcast(MVT::v4i32, Res);
+ Res = DAG.getVectorShuffle(MVT::v4i32, DL, Res, Res, ShuffleMask);
+ return DAG.getBitcast(VT, Res);
+ }
+ }
+ }
+
+ return SDValue();
+}
+
static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -41955,36 +41999,9 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
return getConstVector(Bits, Undefs, VT.getSimpleVT(), DAG, SDLoc(N));
}
- // Attempt to fold PACK(LOSUBVECTOR(SHUFFLE(X)),HISUBVECTOR(SHUFFLE(X)))
- // to SHUFFLE(PACK(LOSUBVECTOR(X),HISUBVECTOR(X))), this is mainly for
- // truncation trees that help us avoid lane crossing shuffles.
- // TODO: There's a lot more we can do for PACK/HADD style shuffle combines.
- if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
- N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
- N0.getConstantOperandAPInt(1) == 0 &&
- N1.getConstantOperandAPInt(1) == (NumDstElts / 2) &&
- N0.getOperand(0) == N1.getOperand(0) && VT.is128BitVector() &&
- N0.getOperand(0).getValueType().is256BitVector()) {
- // TODO - support target/faux shuffles.
- SDValue Vec = peekThroughBitcasts(N0.getOperand(0));
- if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec)) {
- // To keep the PACK LHS/RHS coherency, we must be able to scale the unary
- // shuffle to a vXi64 width - we can probably relax this in the future.
- SmallVector<int, 4> ShuffleMask;
- if (SVN->getOperand(1).isUndef() &&
- scaleShuffleElements(SVN->getMask(), 4, ShuffleMask)) {
- SDLoc DL(N);
- SDValue Lo, Hi;
- std::tie(Lo, Hi) = DAG.SplitVector(SVN->getOperand(0), DL);
- Lo = DAG.getBitcast(N0.getValueType(), Lo);
- Hi = DAG.getBitcast(N1.getValueType(), Hi);
- SDValue Res = DAG.getNode(Opcode, DL, VT, Lo, Hi);
- Res = DAG.getBitcast(MVT::v4i32, Res);
- Res = DAG.getVectorShuffle(MVT::v4i32, DL, Res, Res, ShuffleMask);
- return DAG.getBitcast(VT, Res);
- }
- }
- }
+ // Try to fold PACK(SHUFFLE(),SHUFFLE()) -> SHUFFLE(PACK()).
+ if (SDValue V = combineVectorPackWithShuffle(N, DAG))
+ return V;
// Try to combine a PACKUSWB/PACKSSWB implemented truncate with a regular
// truncate to create a larger truncate.
More information about the llvm-commits
mailing list