[llvm] r369872 - [X86][DAGCombiner] Teach narrowShuffle to use concat_vectors instead of inserting into undef

Sun Aug 25 10:59:49 PDT 2019

Author: ctopper
Date: Sun Aug 25 10:59:49 2019
New Revision: 369872

URL: http://llvm.org/viewvc/llvm-project?rev=369872&view=rev
Log:
[X86][DAGCombiner] Teach narrowShuffle to use concat_vectors instead of inserting into undef

Summary:
Concat_vectors is more canonical during early DAG combine. For example, its what's used by SelectionDAGBuilder when converting IR shuffles into SelectionDAG shuffles when element counts between inputs and mask don't match. We also have combines in DAGCombiner than can pull concat_vectors through a shuffle. See partitionShuffleOfConcats. So it seems like concat_vectors is a better operation to use here. I had to teach DAGCombiner's SimplifyVBinOp to also handle concat_vectors with undef. I haven't checked yet if we can remove the INSERT_SUBVECTOR version in there or not.

I didn't want to mess with the other caller of getShuffleHalfVectors that's used during shuffle lowering where insert_subvector probably is what we want to produce so I've enabled this via a boolean passed to the function.

Reviewers: spatel, RKSimon

Reviewed By: RKSimon

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D66504

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=369872&r1=369871&r2=369872&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Sun Aug 25 10:59:49 2019
@@ -19537,6 +19537,37 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNo
     }
   }
 
+  // Make sure all but the first op are undef.
+  auto ConcatWithUndef = [](SDValue Concat) {
+    return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
+           std::all_of(std::next(Concat->op_begin()), Concat->op_end(),
+                       [](const SDValue &Op) {
+                         return Op.isUndef();
+                       });
+  };
+
+  // The following pattern is likely to emerge with vector reduction ops. Moving
+  // the binary operation ahead of the concat may allow using a narrower vector
+  // instruction that has better performance than the wide version of the op:
+  // VBinOp (concat X, undef), (concat Y, undef) --> concat (VBinOp X, Y), VecC
+  if (ConcatWithUndef(LHS) && ConcatWithUndef(RHS) &&
+      (LHS.hasOneUse() || RHS.hasOneUse())) {
+    SDValue X = LHS.getOperand(0);
+    SDValue Y = RHS.getOperand(0);
+    EVT NarrowVT = X.getValueType();
+    if (NarrowVT == Y.getValueType() &&
+        TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
+      // (binop undef, undef) may not return undef, so compute that result.
+      SDLoc DL(N);
+      SDValue VecC =
+          DAG.getNode(Opcode, DL, NarrowVT, DAG.getUNDEF(NarrowVT),
+                      DAG.getUNDEF(NarrowVT));
+      SmallVector<SDValue, 4> Ops(LHS.getNumOperands(), VecC);
+      Ops[0] = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
+      return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Ops);
+    }
+  }
+
   if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
     return V;
 

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=369872&r1=369871&r2=369872&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Aug 25 10:59:49 2019
@@ -15013,7 +15013,7 @@ getHalfShuffleMask(ArrayRef<int> Mask, M
 static SDValue getShuffleHalfVectors(const SDLoc &DL, SDValue V1, SDValue V2,
                                      ArrayRef<int> HalfMask, int HalfIdx1,
                                      int HalfIdx2, bool UndefLower,
-                                     SelectionDAG &DAG) {
+                                     SelectionDAG &DAG, bool UseConcat = false) {
   assert(V1.getValueType() == V2.getValueType() && "Different sized vectors?");
   assert(V1.getValueType().isSimple() && "Expecting only simple types");
 
@@ -15034,6 +15034,14 @@ static SDValue getShuffleHalfVectors(con
   SDValue Half1 = getHalfVector(HalfIdx1);
   SDValue Half2 = getHalfVector(HalfIdx2);
   SDValue V = DAG.getVectorShuffle(HalfVT, DL, Half1, Half2, HalfMask);
+  if (UseConcat) {
+    SDValue Op0 = V;
+    SDValue Op1 = DAG.getUNDEF(HalfVT);
+    if (UndefLower)
+      std::swap(Op0, Op1);
+    return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Op0, Op1);
+  }
+
   unsigned Offset = UndefLower ? HalfNumElts : 0;
   return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V,
                      DAG.getIntPtrConstant(Offset, DL));
@@ -33974,7 +33982,7 @@ static SDValue narrowShuffle(ShuffleVect
   // the wide shuffle that we started with.
   return getShuffleHalfVectors(SDLoc(Shuf), Shuf->getOperand(0),
                                Shuf->getOperand(1), HalfMask, HalfIdx1,
-                               HalfIdx2, false, DAG);
+                               HalfIdx2, false, DAG, /*UseConcat*/true);
 }
 
 static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,