[llvm] b43d7aa - [DAG] visitINSERT_VECTOR_ELT - extend folding to BUILD_VECTOR if all missing elements from an insertion chain are known zero
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 1 03:32:46 PDT 2022
Author: Simon Pilgrim
Date: 2022-08-01T11:32:33+01:00
New Revision: b43d7aacf80f5604a022a0907b65d07ed323fcf7
URL: https://github.com/llvm/llvm-project/commit/b43d7aacf80f5604a022a0907b65d07ed323fcf7
DIFF: https://github.com/llvm/llvm-project/commit/b43d7aacf80f5604a022a0907b65d07ed323fcf7.diff
LOG: [DAG] visitINSERT_VECTOR_ELT - extend folding to BUILD_VECTOR if all missing elements from an insertion chain are known zero
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/test/CodeGen/X86/insertelement-zero.ll
llvm/test/CodeGen/X86/shuffle-combine-crash-3.ll
llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d261d8b5123e3..82b89ab4b1097 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -19632,6 +19632,23 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
// Failed to find a match in the chain - bail.
break;
}
+
+ // See if we can fill in the missing constant elements as zeros.
+ // TODO: Should we do this for any constant?
+ APInt DemandedZeroElts = APInt::getZero(NumElts);
+ for (int I = 0; I != NumElts; ++I)
+ if (!Ops[I])
+ DemandedZeroElts.setBit(I);
+
+ if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) {
+ SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT)
+ : DAG.getConstantFP(0, DL, MaxEltVT);
+ for (int I = 0; I != NumElts; ++I)
+ if (!Ops[I])
+ Ops[I] = Zero;
+
+ return CanonicalizeBuildVector(Ops);
+ }
}
return SDValue();
diff --git a/llvm/test/CodeGen/X86/insertelement-zero.ll b/llvm/test/CodeGen/X86/insertelement-zero.ll
index 9479b3107b367..6a2174626aced 100644
--- a/llvm/test/CodeGen/X86/insertelement-zero.ll
+++ b/llvm/test/CodeGen/X86/insertelement-zero.ll
@@ -562,8 +562,8 @@ define <8 x float> @PR41512_v8f32(float %x, float %y) {
; AVX-LABEL: PR41512_v8f32:
; AVX: # %bb.0:
; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2
-; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
; AVX-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
+; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-NEXT: retq
%ins1 = insertelement <8 x float> zeroinitializer, float %x, i32 0
diff --git a/llvm/test/CodeGen/X86/shuffle-combine-crash-3.ll b/llvm/test/CodeGen/X86/shuffle-combine-crash-3.ll
index 7ddc0f1b2e4ac..854a36489dfab 100644
--- a/llvm/test/CodeGen/X86/shuffle-combine-crash-3.ll
+++ b/llvm/test/CodeGen/X86/shuffle-combine-crash-3.ll
@@ -12,9 +12,7 @@
define i1 @dont_hit_assert(i24 signext %d) {
; CHECK-LABEL: dont_hit_assert:
; CHECK: # %bb.0: # %for.cond
-; CHECK-NEXT: movb $-1, %al
-; CHECK-NEXT: negb %al
-; CHECK-NEXT: sete %al
+; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: retq
for.cond:
%t0 = insertelement <8 x i24> zeroinitializer, i24 1, i32 0
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
index 0c3f5207eda15..0b39677215cbe 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
@@ -534,8 +534,8 @@ define <16 x float> @test_masked_permps_v16f32(ptr %vp, <16 x float> %vec2) {
define void @test_demandedelts_pshufb_v32i8_v16i8(ptr %src, ptr %dst) {
; SKX64-LABEL: test_demandedelts_pshufb_v32i8_v16i8:
; SKX64: # %bb.0:
-; SKX64-NEXT: vmovdqa 32(%rdi), %xmm0
-; SKX64-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,14,15,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
+; SKX64-NEXT: vpbroadcastd 44(%rdi), %xmm0
+; SKX64-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; SKX64-NEXT: vmovdqa %ymm0, 672(%rsi)
; SKX64-NEXT: vmovdqa 208(%rdi), %xmm0
; SKX64-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero
@@ -545,11 +545,11 @@ define void @test_demandedelts_pshufb_v32i8_v16i8(ptr %src, ptr %dst) {
;
; KNL64-LABEL: test_demandedelts_pshufb_v32i8_v16i8:
; KNL64: # %bb.0:
-; KNL64-NEXT: vmovdqa 32(%rdi), %xmm0
-; KNL64-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,14,15,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
+; KNL64-NEXT: vpbroadcastd 44(%rdi), %xmm0
+; KNL64-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; KNL64-NEXT: vmovdqa %ymm0, 672(%rsi)
-; KNL64-NEXT: vmovdqa 208(%rdi), %xmm0
-; KNL64-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero
+; KNL64-NEXT: vpshufd {{.*#+}} xmm0 = mem[1,0,2,3]
+; KNL64-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; KNL64-NEXT: vmovdqa %ymm0, 832(%rsi)
; KNL64-NEXT: retq
;
@@ -557,8 +557,8 @@ define void @test_demandedelts_pshufb_v32i8_v16i8(ptr %src, ptr %dst) {
; SKX32: # %bb.0:
; SKX32-NEXT: movl {{[0-9]+}}(%esp), %eax
; SKX32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; SKX32-NEXT: vmovdqa 32(%ecx), %xmm0
-; SKX32-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,14,15,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
+; SKX32-NEXT: vpbroadcastd 44(%ecx), %xmm0
+; SKX32-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; SKX32-NEXT: vmovdqa %ymm0, 672(%eax)
; SKX32-NEXT: vmovdqa 208(%ecx), %xmm0
; SKX32-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero
@@ -569,13 +569,13 @@ define void @test_demandedelts_pshufb_v32i8_v16i8(ptr %src, ptr %dst) {
; KNL32-LABEL: test_demandedelts_pshufb_v32i8_v16i8:
; KNL32: # %bb.0:
; KNL32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; KNL32-NEXT: vmovdqa 32(%eax), %xmm0
-; KNL32-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,14,15,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
; KNL32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; KNL32-NEXT: vmovdqa %ymm0, 672(%ecx)
-; KNL32-NEXT: vmovdqa 208(%eax), %xmm0
-; KNL32-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero
-; KNL32-NEXT: vmovdqa %ymm0, 832(%ecx)
+; KNL32-NEXT: vpbroadcastd 44(%ecx), %xmm0
+; KNL32-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; KNL32-NEXT: vmovdqa %ymm0, 672(%eax)
+; KNL32-NEXT: vpshufd {{.*#+}} xmm0 = mem[1,0,2,3]
+; KNL32-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; KNL32-NEXT: vmovdqa %ymm0, 832(%eax)
; KNL32-NEXT: retl
%t87 = load <16 x i32>, ptr %src, align 64
%t88 = extractelement <16 x i32> %t87, i64 11
More information about the llvm-commits
mailing list