[llvm] b43d7aa - [DAG] visitINSERT_VECTOR_ELT - extend folding to BUILD_VECTOR if all missing elements from an insertion chain are known zero

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 1 03:32:46 PDT 2022


Author: Simon Pilgrim
Date: 2022-08-01T11:32:33+01:00
New Revision: b43d7aacf80f5604a022a0907b65d07ed323fcf7

URL: https://github.com/llvm/llvm-project/commit/b43d7aacf80f5604a022a0907b65d07ed323fcf7
DIFF: https://github.com/llvm/llvm-project/commit/b43d7aacf80f5604a022a0907b65d07ed323fcf7.diff

LOG: [DAG] visitINSERT_VECTOR_ELT - extend folding to BUILD_VECTOR if all missing elements from an insertion chain are known zero

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/X86/insertelement-zero.ll
    llvm/test/CodeGen/X86/shuffle-combine-crash-3.ll
    llvm/test/CodeGen/X86/vector-shuffle-avx512.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d261d8b5123e3..82b89ab4b1097 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -19632,6 +19632,23 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
       // Failed to find a match in the chain - bail.
       break;
     }
+
+    // See if we can fill in the missing constant elements as zeros.
+    // TODO: Should we do this for any constant?
+    APInt DemandedZeroElts = APInt::getZero(NumElts);
+    for (int I = 0; I != NumElts; ++I)
+      if (!Ops[I])
+        DemandedZeroElts.setBit(I);
+
+    if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) {
+      SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT)
+                                    : DAG.getConstantFP(0, DL, MaxEltVT);
+      for (int I = 0; I != NumElts; ++I)
+        if (!Ops[I])
+          Ops[I] = Zero;
+
+      return CanonicalizeBuildVector(Ops);
+    }
   }
 
   return SDValue();

diff  --git a/llvm/test/CodeGen/X86/insertelement-zero.ll b/llvm/test/CodeGen/X86/insertelement-zero.ll
index 9479b3107b367..6a2174626aced 100644
--- a/llvm/test/CodeGen/X86/insertelement-zero.ll
+++ b/llvm/test/CodeGen/X86/insertelement-zero.ll
@@ -562,8 +562,8 @@ define <8 x float> @PR41512_v8f32(float %x, float %y) {
 ; AVX-LABEL: PR41512_v8f32:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vxorps %xmm2, %xmm2, %xmm2
-; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
 ; AVX-NEXT:    vblendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
+; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX-NEXT:    retq
   %ins1 = insertelement <8 x float> zeroinitializer, float %x, i32 0

diff  --git a/llvm/test/CodeGen/X86/shuffle-combine-crash-3.ll b/llvm/test/CodeGen/X86/shuffle-combine-crash-3.ll
index 7ddc0f1b2e4ac..854a36489dfab 100644
--- a/llvm/test/CodeGen/X86/shuffle-combine-crash-3.ll
+++ b/llvm/test/CodeGen/X86/shuffle-combine-crash-3.ll
@@ -12,9 +12,7 @@
 define i1 @dont_hit_assert(i24 signext %d) {
 ; CHECK-LABEL: dont_hit_assert:
 ; CHECK:       # %bb.0: # %for.cond
-; CHECK-NEXT:    movb $-1, %al
-; CHECK-NEXT:    negb %al
-; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    retq
 for.cond:
   %t0 = insertelement <8 x i24> zeroinitializer, i24 1, i32 0

diff  --git a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
index 0c3f5207eda15..0b39677215cbe 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
@@ -534,8 +534,8 @@ define <16 x float> @test_masked_permps_v16f32(ptr %vp, <16 x float> %vec2) {
 define void @test_demandedelts_pshufb_v32i8_v16i8(ptr %src, ptr %dst) {
 ; SKX64-LABEL: test_demandedelts_pshufb_v32i8_v16i8:
 ; SKX64:       # %bb.0:
-; SKX64-NEXT:    vmovdqa 32(%rdi), %xmm0
-; SKX64-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,14,15,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
+; SKX64-NEXT:    vpbroadcastd 44(%rdi), %xmm0
+; SKX64-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 ; SKX64-NEXT:    vmovdqa %ymm0, 672(%rsi)
 ; SKX64-NEXT:    vmovdqa 208(%rdi), %xmm0
 ; SKX64-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero
@@ -545,11 +545,11 @@ define void @test_demandedelts_pshufb_v32i8_v16i8(ptr %src, ptr %dst) {
 ;
 ; KNL64-LABEL: test_demandedelts_pshufb_v32i8_v16i8:
 ; KNL64:       # %bb.0:
-; KNL64-NEXT:    vmovdqa 32(%rdi), %xmm0
-; KNL64-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,14,15,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
+; KNL64-NEXT:    vpbroadcastd 44(%rdi), %xmm0
+; KNL64-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 ; KNL64-NEXT:    vmovdqa %ymm0, 672(%rsi)
-; KNL64-NEXT:    vmovdqa 208(%rdi), %xmm0
-; KNL64-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero
+; KNL64-NEXT:    vpshufd {{.*#+}} xmm0 = mem[1,0,2,3]
+; KNL64-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 ; KNL64-NEXT:    vmovdqa %ymm0, 832(%rsi)
 ; KNL64-NEXT:    retq
 ;
@@ -557,8 +557,8 @@ define void @test_demandedelts_pshufb_v32i8_v16i8(ptr %src, ptr %dst) {
 ; SKX32:       # %bb.0:
 ; SKX32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SKX32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; SKX32-NEXT:    vmovdqa 32(%ecx), %xmm0
-; SKX32-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,14,15,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
+; SKX32-NEXT:    vpbroadcastd 44(%ecx), %xmm0
+; SKX32-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
 ; SKX32-NEXT:    vmovdqa %ymm0, 672(%eax)
 ; SKX32-NEXT:    vmovdqa 208(%ecx), %xmm0
 ; SKX32-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero
@@ -569,13 +569,13 @@ define void @test_demandedelts_pshufb_v32i8_v16i8(ptr %src, ptr %dst) {
 ; KNL32-LABEL: test_demandedelts_pshufb_v32i8_v16i8:
 ; KNL32:       # %bb.0:
 ; KNL32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; KNL32-NEXT:    vmovdqa 32(%eax), %xmm0
-; KNL32-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,14,15,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
 ; KNL32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; KNL32-NEXT:    vmovdqa %ymm0, 672(%ecx)
-; KNL32-NEXT:    vmovdqa 208(%eax), %xmm0
-; KNL32-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero
-; KNL32-NEXT:    vmovdqa %ymm0, 832(%ecx)
+; KNL32-NEXT:    vpbroadcastd 44(%ecx), %xmm0
+; KNL32-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; KNL32-NEXT:    vmovdqa %ymm0, 672(%eax)
+; KNL32-NEXT:    vpshufd {{.*#+}} xmm0 = mem[1,0,2,3]
+; KNL32-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; KNL32-NEXT:    vmovdqa %ymm0, 832(%eax)
 ; KNL32-NEXT:    retl
   %t87 = load <16 x i32>, ptr %src, align 64
   %t88 = extractelement <16 x i32> %t87, i64 11


        


More information about the llvm-commits mailing list