[llvm] ac8be21 - [DAG] isSplatValue - don't attempt to merge any BITCAST sub elements if they contain UNDEFs
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 23 10:39:01 PDT 2022
Author: Simon Pilgrim
Date: 2022-07-23T18:38:48+01:00
New Revision: ac8be21365bc4a8897303d13161a0f5c7a511215
URL: https://github.com/llvm/llvm-project/commit/ac8be21365bc4a8897303d13161a0f5c7a511215
DIFF: https://github.com/llvm/llvm-project/commit/ac8be21365bc4a8897303d13161a0f5c7a511215.diff
LOG: [DAG] isSplatValue - don't attempt to merge any BITCAST sub elements if they contain UNDEFs
We still haven't found a solution that correctly handles 'don't care' sub elements properly - given how close it is to the next release branch, I'm making this fail safe change and we can revisit this later if we can't find alternatives.
NOTE: This isn't a reversion of D128570 - it's the removal of undef handling across bitcasts entirely
Fixes #56520
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/test/CodeGen/X86/fshl-splat-undef.ll
llvm/test/CodeGen/X86/vector-shuffle-combining.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 78bd711db49b..441437351852 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2712,16 +2712,9 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
SubDemandedElts &= ScaledDemandedElts;
if (!isSplatValue(Src, SubDemandedElts, SubUndefElts, Depth + 1))
return false;
-
- // Here we can't do "MatchAnyBits" operation merge for undef bits.
- // Because some operation only use part value of the source.
- // Take llvm.fshl.* for example:
- // t1: v4i32 = Constant:i32<12>, undef:i32, Constant:i32<12>, undef:i32
- // t2: v2i64 = bitcast t1
- // t5: v2i64 = fshl t3, t4, t2
- // We can not convert t2 to {i64 undef, i64 undef}
- UndefElts |= APIntOps::ScaleBitMask(SubUndefElts, NumElts,
- /*MatchAllBits=*/true);
+ // TODO: Add support for merging sub undef elements.
+ if (!SubUndefElts.isZero())
+ return false;
}
return true;
}
diff --git a/llvm/test/CodeGen/X86/fshl-splat-undef.ll b/llvm/test/CodeGen/X86/fshl-splat-undef.ll
index 365c3e32e0a0..dcbd3a56a2ba 100644
--- a/llvm/test/CodeGen/X86/fshl-splat-undef.ll
+++ b/llvm/test/CodeGen/X86/fshl-splat-undef.ll
@@ -20,16 +20,14 @@
define void @test_fshl(<8 x i64> %lo, <8 x i64> %hi, <8 x i64>* %arr) {
; CHECK-LABEL: test_fshl:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movl $63, %eax
-; CHECK-NEXT: vmovd %eax, %xmm2
-; CHECK-NEXT: movl $12, %eax
-; CHECK-NEXT: vmovd %eax, %xmm3
-; CHECK-NEXT: vpand %xmm2, %xmm3, %xmm2
-; CHECK-NEXT: vpsllq %xmm2, %zmm1, %zmm1
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: vpsrlq $52, %zmm0, %zmm0
-; CHECK-NEXT: vpternlogq $168, {{\.?LCPI[0-9]+_[0-9]+}}, %zmm1, %zmm0
-; CHECK-NEXT: vmovdqa64 %zmm0, (%eax)
+; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm2 = [12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12]
+; CHECK-NEXT: vpandnq {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %zmm2, %zmm2
+; CHECK-NEXT: vpsrlq $1, %zmm0, %zmm0
+; CHECK-NEXT: vpsrlvq %zmm2, %zmm0, %zmm0
+; CHECK-NEXT: vpsllq $12, %zmm1, %zmm1
+; CHECK-NEXT: vpternlogq $168, {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm1
+; CHECK-NEXT: vmovdqa64 %zmm1, (%eax)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retl
entry:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
index 33bafa3c7cf2..f587720e477c 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
@@ -3353,7 +3353,7 @@ define <2 x i64> @PR55157(ptr %0) {
}
declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>)
-; FIXME: SelectionDAG::isSplatValue - incorrect handling of undef sub-elements
+; SelectionDAG::isSplatValue - incorrect handling of undef sub-elements
define <2 x i64> @PR56520(<16 x i8> %0) {
; SSE-LABEL: PR56520:
; SSE: # %bb.0:
@@ -3362,16 +3362,38 @@ define <2 x i64> @PR56520(<16 x i8> %0) {
; SSE-NEXT: movd %xmm1, %eax
; SSE-NEXT: movsbl %al, %eax
; SSE-NEXT: movd %eax, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; SSE-NEXT: retq
;
-; AVX-LABEL: PR56520:
-; AVX: # %bb.0:
-; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmovd %xmm0, %eax
-; AVX-NEXT: movsbl %al, %eax
-; AVX-NEXT: vmovd %eax, %xmm0
-; AVX-NEXT: retq
+; AVX1-LABEL: PR56520:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: movsbl %al, %eax
+; AVX1-NEXT: vmovd %eax, %xmm0
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT: retq
+;
+; AVX2-SLOW-LABEL: PR56520:
+; AVX2-SLOW: # %bb.0:
+; AVX2-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-SLOW-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX2-SLOW-NEXT: vmovd %xmm0, %eax
+; AVX2-SLOW-NEXT: movsbl %al, %eax
+; AVX2-SLOW-NEXT: vmovd %eax, %xmm0
+; AVX2-SLOW-NEXT: vpbroadcastq %xmm0, %xmm0
+; AVX2-SLOW-NEXT: retq
+;
+; AVX2-FAST-LABEL: PR56520:
+; AVX2-FAST: # %bb.0:
+; AVX2-FAST-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2-FAST-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX2-FAST-NEXT: vmovd %xmm0, %eax
+; AVX2-FAST-NEXT: movsbl %al, %eax
+; AVX2-FAST-NEXT: vmovd %eax, %xmm0
+; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3],zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero
+; AVX2-FAST-NEXT: retq
%2 = icmp eq <16 x i8> zeroinitializer, %0
%3 = extractelement <16 x i1> %2, i64 0
%4 = sext i1 %3 to i32
More information about the llvm-commits
mailing list