[llvm] 5cce97d - [DAG] isSplatValue - improve ISD::VECTOR_SHUFFLE splat detection
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 2 07:32:39 PST 2022
Author: Simon Pilgrim
Date: 2022-03-02T15:32:24Z
New Revision: 5cce97d61e18e49033954793f4bc28906c75a305
URL: https://github.com/llvm/llvm-project/commit/5cce97d61e18e49033954793f4bc28906c75a305
DIFF: https://github.com/llvm/llvm-project/commit/5cce97d61e18e49033954793f4bc28906c75a305.diff
LOG: [DAG] isSplatValue - improve ISD::VECTOR_SHUFFLE splat detection
Currently we only check for splat shuffles, this extends it to see if the source operand is a splat across the demanded elts based upon the shuffle mask
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 77aaf74338321..2f8fd513e586e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2587,9 +2587,9 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
return true;
}
case ISD::VECTOR_SHUFFLE: {
- // Check if this is a shuffle node doing a splat.
- // TODO: Do we need to handle shuffle(splat, undef, mask)?
- int SplatIndex = -1;
+ // Check if this is a shuffle node doing a splat or a shuffle of a splat.
+ APInt DemandedLHS = APInt::getNullValue(NumElts);
+ APInt DemandedRHS = APInt::getNullValue(NumElts);
ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(V)->getMask();
for (int i = 0; i != (int)NumElts; ++i) {
int M = Mask[i];
@@ -2599,11 +2599,30 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
}
if (!DemandedElts[i])
continue;
- if (0 <= SplatIndex && SplatIndex != M)
- return false;
- SplatIndex = M;
+ if (M < (int)NumElts)
+ DemandedLHS.setBit(M);
+ else
+ DemandedRHS.setBit(M - NumElts);
}
- return true;
+
+ // If we aren't demanding either op, assume there's no splat.
+ // If we are demanding both ops, assume there's no splat.
+ if ((DemandedLHS.isZero() && DemandedRHS.isZero()) ||
+ (!DemandedLHS.isZero() && !DemandedRHS.isZero()))
+ return false;
+
+ // See if the demanded elts of the source op is a splat or we only demand
+ // one element, which should always be a splat.
+ // TODO: Handle source ops splats with undefs.
+ auto CheckSplatSrc = [&](SDValue Src, const APInt &SrcElts) {
+ APInt SrcUndefs;
+ return (SrcElts.countPopulation() == 1) ||
+ (isSplatValue(Src, SrcElts, SrcUndefs, Depth + 1) &&
+ (SrcElts & SrcUndefs).isZero());
+ };
+ if (!DemandedLHS.isZero())
+ return CheckSplatSrc(V.getOperand(0), DemandedLHS);
+ return CheckSplatSrc(V.getOperand(1), DemandedRHS);
}
case ISD::EXTRACT_SUBVECTOR: {
// Offset the demanded elts by the subvector index.
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
index 322bc7efe4b41..902547a7b2892 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll
@@ -2193,24 +2193,19 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) {
; X86-AVX1-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm1
; X86-AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
; X86-AVX1-NEXT: vblendps {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
-; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm1[0],zero,xmm1[1],zero
-; X86-AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
-; X86-AVX1-NEXT: # xmm4 = mem[0,0]
-; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm4, %xmm5
-; X86-AVX1-NEXT: vpsrlq %xmm2, %xmm4, %xmm6
-; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm6[0,1,2,3],xmm5[4,5,6,7]
-; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6
-; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm6, %xmm1
-; X86-AVX1-NEXT: vpsrlq %xmm2, %xmm6, %xmm2
-; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
-; X86-AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
-; X86-AVX1-NEXT: vpsubq %xmm5, %xmm1, %xmm1
-; X86-AVX1-NEXT: vpsrlq %xmm3, %xmm4, %xmm2
-; X86-AVX1-NEXT: vpsrlq %xmm3, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
-; X86-AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0
-; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
+; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; X86-AVX1-NEXT: # xmm3 = mem[0,0]
+; X86-AVX1-NEXT: vpsrlq %xmm2, %xmm3, %xmm4
+; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; X86-AVX1-NEXT: vpsrlq %xmm2, %xmm5, %xmm2
+; X86-AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
+; X86-AVX1-NEXT: vpsubq %xmm4, %xmm2, %xmm2
+; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm3, %xmm3
+; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
+; X86-AVX1-NEXT: vpsubq %xmm3, %xmm0, %xmm0
+; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; X86-AVX1-NEXT: retl
;
; X86-AVX2-LABEL: PR52719:
More information about the llvm-commits
mailing list