[llvm] ac8be21 - [DAG] isSplatValue - don't attempt to merge any BITCAST sub elements if they contain UNDEFs

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Sat Jul 23 10:39:01 PDT 2022


Author: Simon Pilgrim
Date: 2022-07-23T18:38:48+01:00
New Revision: ac8be21365bc4a8897303d13161a0f5c7a511215

URL: https://github.com/llvm/llvm-project/commit/ac8be21365bc4a8897303d13161a0f5c7a511215
DIFF: https://github.com/llvm/llvm-project/commit/ac8be21365bc4a8897303d13161a0f5c7a511215.diff

LOG: [DAG] isSplatValue - don't attempt to merge any BITCAST sub elements if they contain UNDEFs

We still haven't found a solution that correctly handles 'don't care' sub elements properly - given how close it is to the next release branch, I'm making this fail safe change and we can revisit this later if we can't find alternatives.

NOTE: This isn't a reversion of D128570 - it's the removal of undef handling across bitcasts entirely

Fixes #56520

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
    llvm/test/CodeGen/X86/fshl-splat-undef.ll
    llvm/test/CodeGen/X86/vector-shuffle-combining.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 78bd711db49b..441437351852 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2712,16 +2712,9 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
         SubDemandedElts &= ScaledDemandedElts;
         if (!isSplatValue(Src, SubDemandedElts, SubUndefElts, Depth + 1))
           return false;
-
-        // Here we can't do "MatchAnyBits" operation merge for undef bits.
-        // Because some operation only use part value of the source.
-        // Take llvm.fshl.* for example:
-        // t1: v4i32 = Constant:i32<12>, undef:i32, Constant:i32<12>, undef:i32
-        // t2: v2i64 = bitcast t1
-        // t5: v2i64 = fshl t3, t4, t2
-        // We can not convert t2 to {i64 undef, i64 undef}
-        UndefElts |= APIntOps::ScaleBitMask(SubUndefElts, NumElts,
-                                            /*MatchAllBits=*/true);
+        // TODO: Add support for merging sub undef elements.
+        if (!SubUndefElts.isZero())
+          return false;
       }
       return true;
     }

diff  --git a/llvm/test/CodeGen/X86/fshl-splat-undef.ll b/llvm/test/CodeGen/X86/fshl-splat-undef.ll
index 365c3e32e0a0..dcbd3a56a2ba 100644
--- a/llvm/test/CodeGen/X86/fshl-splat-undef.ll
+++ b/llvm/test/CodeGen/X86/fshl-splat-undef.ll
@@ -20,16 +20,14 @@
 define void @test_fshl(<8 x i64> %lo, <8 x i64> %hi, <8 x i64>* %arr) {
 ; CHECK-LABEL: test_fshl:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl $63, %eax
-; CHECK-NEXT:    vmovd %eax, %xmm2
-; CHECK-NEXT:    movl $12, %eax
-; CHECK-NEXT:    vmovd %eax, %xmm3
-; CHECK-NEXT:    vpand %xmm2, %xmm3, %xmm2
-; CHECK-NEXT:    vpsllq %xmm2, %zmm1, %zmm1
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    vpsrlq $52, %zmm0, %zmm0
-; CHECK-NEXT:    vpternlogq $168, {{\.?LCPI[0-9]+_[0-9]+}}, %zmm1, %zmm0
-; CHECK-NEXT:    vmovdqa64 %zmm0, (%eax)
+; CHECK-NEXT:    vpbroadcastd {{.*#+}} zmm2 = [12,12,12,12,12,12,12,12,12,12,12,12,12,12,12,12]
+; CHECK-NEXT:    vpandnq {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %zmm2, %zmm2
+; CHECK-NEXT:    vpsrlq $1, %zmm0, %zmm0
+; CHECK-NEXT:    vpsrlvq %zmm2, %zmm0, %zmm0
+; CHECK-NEXT:    vpsllq $12, %zmm1, %zmm1
+; CHECK-NEXT:    vpternlogq $168, {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm1
+; CHECK-NEXT:    vmovdqa64 %zmm1, (%eax)
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retl
 entry:

diff  --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
index 33bafa3c7cf2..f587720e477c 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
@@ -3353,7 +3353,7 @@ define <2 x i64> @PR55157(ptr %0) {
 }
 declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>)
 
-; FIXME: SelectionDAG::isSplatValue - incorrect handling of undef sub-elements
+; SelectionDAG::isSplatValue - incorrect handling of undef sub-elements
 define <2 x i64> @PR56520(<16 x i8> %0) {
 ; SSE-LABEL: PR56520:
 ; SSE:       # %bb.0:
@@ -3362,16 +3362,38 @@ define <2 x i64> @PR56520(<16 x i8> %0) {
 ; SSE-NEXT:    movd %xmm1, %eax
 ; SSE-NEXT:    movsbl %al, %eax
 ; SSE-NEXT:    movd %eax, %xmm0
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: PR56520:
-; AVX:       # %bb.0:
-; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vmovd %xmm0, %eax
-; AVX-NEXT:    movsbl %al, %eax
-; AVX-NEXT:    vmovd %eax, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: PR56520:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vmovd %xmm0, %eax
+; AVX1-NEXT:    movsbl %al, %eax
+; AVX1-NEXT:    vmovd %eax, %xmm0
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT:    retq
+;
+; AVX2-SLOW-LABEL: PR56520:
+; AVX2-SLOW:       # %bb.0:
+; AVX2-SLOW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-SLOW-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX2-SLOW-NEXT:    vmovd %xmm0, %eax
+; AVX2-SLOW-NEXT:    movsbl %al, %eax
+; AVX2-SLOW-NEXT:    vmovd %eax, %xmm0
+; AVX2-SLOW-NEXT:    vpbroadcastq %xmm0, %xmm0
+; AVX2-SLOW-NEXT:    retq
+;
+; AVX2-FAST-LABEL: PR56520:
+; AVX2-FAST:       # %bb.0:
+; AVX2-FAST-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-FAST-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX2-FAST-NEXT:    vmovd %xmm0, %eax
+; AVX2-FAST-NEXT:    movsbl %al, %eax
+; AVX2-FAST-NEXT:    vmovd %eax, %xmm0
+; AVX2-FAST-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3],zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero
+; AVX2-FAST-NEXT:    retq
   %2 = icmp eq <16 x i8> zeroinitializer, %0
   %3 = extractelement <16 x i1> %2, i64 0
   %4 = sext i1 %3 to i32


        


More information about the llvm-commits mailing list