[llvm] 5cce97d - [DAG] isSplatValue - improve ISD::VECTOR_SHUFFLE splat detection

Wed Mar 2 07:32:39 PST 2022

Author: Simon Pilgrim
Date: 2022-03-02T15:32:24Z
New Revision: 5cce97d61e18e49033954793f4bc28906c75a305

URL: https://github.com/llvm/llvm-project/commit/5cce97d61e18e49033954793f4bc28906c75a305
DIFF: https://github.com/llvm/llvm-project/commit/5cce97d61e18e49033954793f4bc28906c75a305.diff

LOG: [DAG] isSplatValue - improve ISD::VECTOR_SHUFFLE splat detection

Currently we only check for splat shuffles, this extends it to see if the source operand is a splat across the demanded elts based upon the shuffle mask

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
    llvm/test/CodeGen/X86/vector-shift-ashr-256.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 77aaf74338321..2f8fd513e586e 100644

--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2587,9 +2587,9 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
     return true;
   }
   case ISD::VECTOR_SHUFFLE: {
-    // Check if this is a shuffle node doing a splat.
-    // TODO: Do we need to handle shuffle(splat, undef, mask)?
-    int SplatIndex = -1;
+    // Check if this is a shuffle node doing a splat or a shuffle of a splat.
+    APInt DemandedLHS = APInt::getNullValue(NumElts);
+    APInt DemandedRHS = APInt::getNullValue(NumElts);
     ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(V)->getMask();
     for (int i = 0; i != (int)NumElts; ++i) {
       int M = Mask[i];
@@ -2599,11 +2599,30 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
       }
       if (!DemandedElts[i])
         continue;
-      if (0 <= SplatIndex && SplatIndex != M)
-        return false;
-      SplatIndex = M;
+      if (M < (int)NumElts)
+        DemandedLHS.setBit(M);
+      else
+        DemandedRHS.setBit(M - NumElts);
     }
-    return true;
+
+    // If we aren't demanding either op, assume there's no splat.
+    // If we are demanding both ops, assume there's no splat.
+    if ((DemandedLHS.isZero() && DemandedRHS.isZero()) ||
+        (!DemandedLHS.isZero() && !DemandedRHS.isZero()))
+      return false;
+
+    // See if the demanded elts of the source op is a splat or we only demand
+    // one element, which should always be a splat.
+    // TODO: Handle source ops splats with undefs.
+    auto CheckSplatSrc = [&](SDValue Src, const APInt &SrcElts) {
+      APInt SrcUndefs;
+      return (SrcElts.countPopulation() == 1) ||
+             (isSplatValue(Src, SrcElts, SrcUndefs, Depth + 1) &&
+              (SrcElts & SrcUndefs).isZero());
+    };
+    if (!DemandedLHS.isZero())
+      return CheckSplatSrc(V.getOperand(0), DemandedLHS);
+    return CheckSplatSrc(V.getOperand(1), DemandedRHS);
   }
   case ISD::EXTRACT_SUBVECTOR: {
     // Offset the demanded elts by the subvector index.

diff  --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
index 322bc7efe4b41..902547a7b2892 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
@@ -2193,24 +2193,19 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) {
 ; X86-AVX1-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %xmm1
 ; X86-AVX1-NEXT:    vxorps %xmm2, %xmm2, %xmm2
 ; X86-AVX1-NEXT:    vblendps {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
-; X86-AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm3 = xmm1[0],zero,xmm1[1],zero
-; X86-AVX1-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X86-AVX1-NEXT:    vmovddup {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
-; X86-AVX1-NEXT:    # xmm4 = mem[0,0]
-; X86-AVX1-NEXT:    vpsrlq %xmm1, %xmm4, %xmm5
-; X86-AVX1-NEXT:    vpsrlq %xmm2, %xmm4, %xmm6
-; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm5 = xmm6[0,1,2,3],xmm5[4,5,6,7]
-; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
-; X86-AVX1-NEXT:    vpsrlq %xmm1, %xmm6, %xmm1
-; X86-AVX1-NEXT:    vpsrlq %xmm2, %xmm6, %xmm2
-; X86-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
-; X86-AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm1
-; X86-AVX1-NEXT:    vpsubq %xmm5, %xmm1, %xmm1
-; X86-AVX1-NEXT:    vpsrlq %xmm3, %xmm4, %xmm2
-; X86-AVX1-NEXT:    vpsrlq %xmm3, %xmm0, %xmm0
-; X86-AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0
-; X86-AVX1-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
-; X86-AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; X86-AVX1-NEXT:    vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; X86-AVX1-NEXT:    # xmm3 = mem[0,0]
+; X86-AVX1-NEXT:    vpsrlq %xmm2, %xmm3, %xmm4
+; X86-AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; X86-AVX1-NEXT:    vpsrlq %xmm2, %xmm5, %xmm2
+; X86-AVX1-NEXT:    vpxor %xmm4, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpsubq %xmm4, %xmm2, %xmm2
+; X86-AVX1-NEXT:    vpsrlq %xmm1, %xmm3, %xmm3
+; X86-AVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpsubq %xmm3, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; X86-AVX1-NEXT:    retl
 ;
 ; X86-AVX2-LABEL: PR52719: