[llvm] [X86][AVX] Match v4f64 blend from shuffle of scalar values. (PR #135753)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri May 2 04:43:44 PDT 2025
================
@@ -8743,6 +8745,52 @@ static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op, const SDLoc &DL,
return LowerShift(Res, Subtarget, DAG);
}
+static bool isShuffleFoldableLoad(SDValue);
+
+/// Attempt to lower a BUILD_VECTOR of scalar values to a shuffle of splats
+/// representing a blend.
+static SDValue lowerBuildVectorAsBlend(BuildVectorSDNode *BVOp, SDLoc const &DL,
+ X86Subtarget const &Subtarget,
+ SelectionDAG &DAG) {
+ MVT VT = BVOp->getSimpleValueType(0u);
+
+ if (VT != MVT::v4f64)
+ return SDValue();
+
+ // Collect unique operands.
+ auto UniqueOps = SmallSet<SDValue, 16u>();
+ for (SDValue Op : BVOp->ops()) {
+ if (isIntOrFPConstant(Op) || Op.isUndef())
+ return SDValue();
+ UniqueOps.insert(Op);
+ }
+
+ // Candidate BUILD_VECTOR must have 2 unique operands.
+ if (UniqueOps.size() != 2u)
+ return SDValue();
+
+ SDValue Op0 = BVOp->getOperand(0u);
+ UniqueOps.erase(Op0);
+ SDValue Op1 = *UniqueOps.begin();
+
+ if (isShuffleFoldableLoad(Op0) || isShuffleFoldableLoad(Op1) ||
+ Subtarget.hasAVX2()) {
----------------
RKSimon wrote:
Check for hasAVX2 first - its a lot cheaper than the isShuffleFoldableLoad checks.
https://github.com/llvm/llvm-project/pull/135753
More information about the llvm-commits
mailing list