[PATCH] D83789: [X86][SSE] Attempt to match OP(SHUFFLE(X,Y),SHUFFLE(X,Y)) -> SHUFFLE(HOP(X,Y))
Sanjay Patel via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 14 15:55:57 PDT 2020
spatel added a comment.
Need to add more tests to make sure integer and 256-bit work as expected.
What do you think about consolidating the h-op creation in 1 helper as a preliminary to avoid the caller code duplication? We can also more easily assert properties of the shuffle mask (eg, only chooses from operand 0?).
I had this:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 450927aaf5c..3ed354d483f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -44507,21 +44507,66 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, SelectionDAG &DAG,
return true;
}
+/// Try to synthesize horizontal add/sub from adds/subs of shuffles.
+static SDValue getHorizontalBinop(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ unsigned Opcode = N->getOpcode();
+ unsigned HorizOpcode;
+ switch (Opcode) {
+ case ISD::ADD: HorizOpcode = X86ISD::HADD; break;
+ case ISD::SUB: HorizOpcode = X86ISD::HSUB; break;
+ case ISD::FADD: HorizOpcode = X86ISD::FHADD; break;
+ case ISD::FSUB: HorizOpcode = X86ISD::FHSUB; break;
+ default:
+ llvm_unreachable("Unexpected opcode for horizontal op");
+ }
+
+ EVT VT = N->getValueType(0);
+ if (!VT.isSimple())
+ return SDValue();
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v16i16:
+ case MVT::v8i32:
+ // 256-bit vectors without AVX2 are handled by splitting below.
+ if (!Subtarget.hasSSSE3())
+ return SDValue();
+ break;
+ case MVT::v4f32:
+ case MVT::v2f64:
+ if (!Subtarget.hasSSE3())
+ return SDValue();
+ break;
+ case MVT::v8f32:
+ case MVT::v4f64:
+ if (!Subtarget.hasAVX())
+ return SDValue();
+ break;
+ default:
+ return SDValue();
+ }
+
+ SDValue Op0 = N->getOperand(0), Op1 = N->getOperand(1);
+ bool IsCommutable = (Opcode == ISD::ADD || Opcode == ISD::FADD);
+ if (!isHorizontalBinOp(Op0, Op1, DAG, Subtarget, IsCommutable))
+ return SDValue();
+
+ if (VT.getScalarType().isFloatingPoint())
+ return DAG.getNode(HorizOpcode, SDLoc(N), VT, Op0, Op1);
+
+ auto HopBuilder = [&](SelectionDAG &DAG, const SDLoc &DL,
+ ArrayRef<SDValue> Ops) {
+ return DAG.getNode(HorizOpcode, DL, Ops[0].getValueType(), Ops);
+ };
+ return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {Op0, Op1}, HopBuilder);
+}
+
/// Do target-specific dag combines on floating-point adds/subs.
static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D83789/new/
https://reviews.llvm.org/D83789
More information about the llvm-commits
mailing list