[PATCH] D38696: [DAGCombine] Permit combining of shuffle of equivalent splat BUILD_VECTORs

Simon Pilgrim via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 17 10:58:44 PDT 2017


RKSimon updated this revision to Diff 119352.
RKSimon added a reviewer: andreadb.
RKSimon removed a subscriber: andreadb.
RKSimon added a comment.

Updated to reuse more of the existing BUILD_VECTOR code.


Repository:
  rL LLVM

https://reviews.llvm.org/D38696

Files:
  lib/CodeGen/SelectionDAG/DAGCombiner.cpp
  test/CodeGen/X86/avx-vbroadcast.ll


Index: test/CodeGen/X86/avx-vbroadcast.ll
===================================================================
--- test/CodeGen/X86/avx-vbroadcast.ll
+++ test/CodeGen/X86/avx-vbroadcast.ll
@@ -853,14 +853,12 @@
 ; X32-LABEL: broadcast_shuffle1032:
 ; X32:       ## BB#0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
-; X32-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT:    vbroadcastsd (%eax), %ymm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: broadcast_shuffle1032:
 ; X64:       ## BB#0:
-; X64-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
-; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-NEXT:    vbroadcastsd (%rdi), %ymm0
 ; X64-NEXT:    retq
   %1 = load double, double* %p
   %2 = insertelement <2 x double> undef, double %1, i32 1
Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -15451,7 +15451,7 @@
 //
 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
 // We don't fold shuffles where one side is a non-zero constant, and we don't
-// fold shuffles if the resulting BUILD_VECTOR would have duplicate
+// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
 // non-constant operands. This seems to work out reasonably well in practice.
 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
                                        SelectionDAG &DAG,
@@ -15463,6 +15463,7 @@
 
   if (!N0->hasOneUse() || !N1->hasOneUse())
     return SDValue();
+
   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
   // discussed above.
   if (!N1.isUndef()) {
@@ -15474,6 +15475,15 @@
       return SDValue();
   }
 
+  // If both inputs are splats of the same value then we can safely merge this
+  // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
+  bool IsSplat = false;
+  auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
+  auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
+  if (BV0 && BV1)
+    if (SDValue Splat0 = BV0->getSplatValue())
+      IsSplat = (Splat0 == BV1->getSplatValue());
+
   SmallVector<SDValue, 8> Ops;
   SmallSet<SDValue, 16> DuplicateOps;
   for (int M : SVN->getMask()) {
@@ -15492,15 +15502,17 @@
       }
     }
 
-    // Don't duplicate a non-constant BUILD_VECTOR operand; semantically, this is
-    // fine, but it's likely to generate low-quality code if the target can't
-    // reconstruct an appropriate shuffle.
+    // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
+    // generating a splat; semantically, this is fine, but it's likely to
+    // generate low-quality code if the target can't reconstruct an appropriate
+    // shuffle.
     if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
-      if (!DuplicateOps.insert(Op).second)
+      if (!IsSplat && !DuplicateOps.insert(Op).second)
         return SDValue();
 
     Ops.push_back(Op);
   }
+
   // BUILD_VECTOR requires all inputs to be of the same type, find the
   // maximum type and extend them all.
   EVT SVT = VT.getScalarType();


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D38696.119352.patch
Type: text/x-patch
Size: 3276 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20171017/d687aaed/attachment.bin>


More information about the llvm-commits mailing list