[PATCH] D83789: [X86][SSE] Attempt to match OP(SHUFFLE(X,Y),SHUFFLE(X,Y)) -> SHUFFLE(HOP(X,Y))

Tue Jul 14 15:55:57 PDT 2020

spatel added a comment.

Need to add more tests to make sure integer and 256-bit work as expected.

What do you think about consolidating the h-op creation in 1 helper as a preliminary to avoid the caller code duplication? We can also more easily assert properties of the shuffle mask (eg, only chooses from operand 0?).

I had this:

  diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
  index 450927aaf5c..3ed354d483f 100644
  --- a/llvm/lib/Target/X86/X86ISelLowering.cpp
  +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
  @@ -44507,21 +44507,66 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, SelectionDAG &DAG,
     return true;
   }

  +/// Try to synthesize horizontal add/sub from adds/subs of shuffles.
  +static SDValue getHorizontalBinop(SDNode *N, SelectionDAG &DAG,
  +                                  const X86Subtarget &Subtarget) {
  +  unsigned Opcode = N->getOpcode();
  +  unsigned HorizOpcode;
  +  switch (Opcode) {
  +  case ISD::ADD: HorizOpcode = X86ISD::HADD; break;
  +  case ISD::SUB: HorizOpcode = X86ISD::HSUB; break;
  +  case ISD::FADD: HorizOpcode = X86ISD::FHADD; break;
  +  case ISD::FSUB: HorizOpcode = X86ISD::FHSUB; break;
  +  default:
  +    llvm_unreachable("Unexpected opcode for horizontal op");
  +  }
  +
  +  EVT VT = N->getValueType(0);
  +  if (!VT.isSimple())
  +    return SDValue();
  +  switch (VT.getSimpleVT().SimpleTy) {
  +  case MVT::v8i16:
  +  case MVT::v4i32:
  +  case MVT::v16i16:
  +  case MVT::v8i32:
  +    // 256-bit vectors without AVX2 are handled by splitting below.
  +    if (!Subtarget.hasSSSE3())
  +      return SDValue();
  +    break;
  +  case MVT::v4f32:
  +  case MVT::v2f64:
  +    if (!Subtarget.hasSSE3())
  +      return SDValue();
  +    break;
  +  case MVT::v8f32:
  +  case MVT::v4f64:
  +    if (!Subtarget.hasAVX())
  +      return SDValue();
  +    break;
  +  default:
  +    return SDValue();
  +  }
  +
  +  SDValue Op0 = N->getOperand(0), Op1 = N->getOperand(1);
  +  bool IsCommutable = (Opcode == ISD::ADD || Opcode == ISD::FADD);
  +  if (!isHorizontalBinOp(Op0, Op1, DAG, Subtarget, IsCommutable))
  +    return SDValue();
  +
  +  if (VT.getScalarType().isFloatingPoint())
  +    return DAG.getNode(HorizOpcode, SDLoc(N), VT, Op0, Op1);
  +
  +  auto HopBuilder = [&](SelectionDAG &DAG, const SDLoc &DL,
  +                        ArrayRef<SDValue> Ops) {
  +    return DAG.getNode(HorizOpcode, DL, Ops[0].getValueType(), Ops);
  +  };
  +  return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {Op0, Op1}, HopBuilder);
  +}
  +
   /// Do target-specific dag combines on floating-point adds/subs.
   static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG,
                                  const X86Subtarget &Subtarget) {

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83789/new/

https://reviews.llvm.org/D83789