[llvm] [X86] Fold BLEND(PERMUTE(X), PERMUTE(Y)) -> PERMUTE(BLEND(X, Y)) (PR #90219)
via llvm-commits
llvm-commits at lists.llvm.org
Sat May 4 07:23:59 PDT 2024
================
@@ -40019,6 +40029,93 @@ static SDValue combineCommutableSHUFP(SDValue N, MVT VT, const SDLoc &DL,
return SDValue();
}
+// Attempt to fold BLEND(PERMUTE(X),PERMUTE(Y)) -> PERMUTE(BLEND(X,Y))
+// iff we don't demand the same element index for both X and Y.
+static SDValue combineBlendOfPermutes(MVT VT, SDValue N0, SDValue N1,
+ ArrayRef<int> BlendMask,
+ const APInt &DemandedElts,
+ SelectionDAG &DAG, const SDLoc &DL) {
+ assert(isBlendOrUndef(BlendMask) && "Blend shuffle expected");
+ if (!N0.hasOneUse() || !N1.hasOneUse())
+ return SDValue();
+
+ unsigned NumElts = VT.getVectorNumElements();
+ SDValue BC0 = peekThroughOneUseBitcasts(N0);
+ SDValue BC1 = peekThroughOneUseBitcasts(N1);
+
+ // See if both operands are shuffles, and that we can scale the shuffle masks
+ // to the same width as the blend mask.
+ // TODO: Support SM_SentinelZero?
+ SmallVector<SDValue, 2> Ops0, Ops1;
+ SmallVector<int, 32> Mask0, Mask1, ScaledMask0, ScaledMask1;
+ if (!getTargetShuffleMask(BC0, /*AllowSentinelZero=*/false, Ops0, Mask0) ||
+ !getTargetShuffleMask(BC1, /*AllowSentinelZero=*/false, Ops1, Mask1) ||
+ !scaleShuffleElements(Mask0, NumElts, ScaledMask0) ||
+ !scaleShuffleElements(Mask1, NumElts, ScaledMask1))
+ return SDValue();
+
+ // Determine the demanded elts from both permutes.
+ APInt Demanded0, DemandedLHS0, DemandedRHS0;
+ APInt Demanded1, DemandedLHS1, DemandedRHS1;
+ if (!getShuffleDemandedElts(NumElts, BlendMask, DemandedElts, Demanded0,
+ Demanded1,
+ /*AllowUndefElts=*/true) ||
+ !getShuffleDemandedElts(NumElts, ScaledMask0, Demanded0, DemandedLHS0,
+ DemandedRHS0, /*AllowUndefElts=*/true) ||
+ !getShuffleDemandedElts(NumElts, ScaledMask1, Demanded1, DemandedLHS1,
+ DemandedRHS1, /*AllowUndefElts=*/true))
+ return SDValue();
+
+ // Confirm that we only use a single operand from both permutes and that we
+ // don't demand the same index from both.
+ if (!(DemandedRHS0.isZero() && DemandedRHS1.isZero() &&
+ !DemandedLHS0.intersects(DemandedLHS1)))
----------------
goldsteinn wrote:
I think this would be cleaner as: `!DemandedRHS0.isZero() || !DemandedRHS1.isZero() || DemandedLHS0.intersects(DemandedLHS1)`
https://github.com/llvm/llvm-project/pull/90219
More information about the llvm-commits
mailing list