[llvm] 426db6b - [X86] LowerBUILD_VECTOR - fold build_vector(undef,freeze(undef)) -> freeze(undef)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Apr 22 07:30:55 PDT 2023
Author: Simon Pilgrim
Date: 2023-04-22T15:30:50+01:00
New Revision: 426db6b4eb2e9298598d5c09d27f0cdb27917751
URL: https://github.com/llvm/llvm-project/commit/426db6b4eb2e9298598d5c09d27f0cdb27917751
DIFF: https://github.com/llvm/llvm-project/commit/426db6b4eb2e9298598d5c09d27f0cdb27917751.diff
LOG: [X86] LowerBUILD_VECTOR - fold build_vector(undef,freeze(undef)) -> freeze(undef)
Noticed while triaging #62286
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/build-vector-128.ll
llvm/test/CodeGen/X86/freeze-vector.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 05c8d6cc3ea12..cda001d623e8e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -11259,6 +11259,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (UndefMask.isAllOnes())
return DAG.getUNDEF(VT);
+ // All undef/freeze(undef) vector. Return an FREEZE UNDEF.
+ if ((UndefMask | FrozenUndefMask).isAllOnes())
+ return DAG.getFreeze(DAG.getUNDEF(VT));
+
// If we have multiple FREEZE-UNDEF operands, we are likely going to end up
// lowering into a suboptimal insertion sequence. Instead, thaw the UNDEF in
// our source BUILD_VECTOR, create another FREEZE-UNDEF splat BUILD_VECTOR,
diff --git a/llvm/test/CodeGen/X86/build-vector-128.ll b/llvm/test/CodeGen/X86/build-vector-128.ll
index a14884a476bee..df664a92425a0 100644
--- a/llvm/test/CodeGen/X86/build-vector-128.ll
+++ b/llvm/test/CodeGen/X86/build-vector-128.ll
@@ -556,30 +556,28 @@ define void @pr60168_buildvector_of_zeros_and_undef(<2 x i32> %x, ptr %out) {
; SSE2-32-LABEL: pr60168_buildvector_of_zeros_and_undef:
; SSE2-32: # %bb.0:
; SSE2-32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SSE2-32-NEXT: movd %eax, %xmm1
-; SSE2-32-NEXT: xorps %xmm2, %xmm2
-; SSE2-32-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[0,0]
+; SSE2-32-NEXT: xorpd %xmm1, %xmm1
+; SSE2-32-NEXT: movsd {{.*#+}} xmm1 = xmm1[0,1]
; SSE2-32-NEXT: paddd %xmm0, %xmm0
-; SSE2-32-NEXT: psubd %xmm0, %xmm2
-; SSE2-32-NEXT: movdqa %xmm2, %xmm0
+; SSE2-32-NEXT: psubd %xmm0, %xmm1
+; SSE2-32-NEXT: movdqa %xmm1, %xmm0
; SSE2-32-NEXT: psrad $31, %xmm0
-; SSE2-32-NEXT: pxor %xmm0, %xmm2
-; SSE2-32-NEXT: psubd %xmm0, %xmm2
-; SSE2-32-NEXT: movq %xmm2, (%eax)
+; SSE2-32-NEXT: pxor %xmm0, %xmm1
+; SSE2-32-NEXT: psubd %xmm0, %xmm1
+; SSE2-32-NEXT: movq %xmm1, (%eax)
; SSE2-32-NEXT: retl
;
; SSE2-64-LABEL: pr60168_buildvector_of_zeros_and_undef:
; SSE2-64: # %bb.0:
-; SSE2-64-NEXT: movd %eax, %xmm1
-; SSE2-64-NEXT: xorps %xmm2, %xmm2
-; SSE2-64-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[0,0]
+; SSE2-64-NEXT: xorpd %xmm1, %xmm1
+; SSE2-64-NEXT: movsd {{.*#+}} xmm1 = xmm1[0,1]
; SSE2-64-NEXT: paddd %xmm0, %xmm0
-; SSE2-64-NEXT: psubd %xmm0, %xmm2
-; SSE2-64-NEXT: movdqa %xmm2, %xmm0
+; SSE2-64-NEXT: psubd %xmm0, %xmm1
+; SSE2-64-NEXT: movdqa %xmm1, %xmm0
; SSE2-64-NEXT: psrad $31, %xmm0
-; SSE2-64-NEXT: pxor %xmm0, %xmm2
-; SSE2-64-NEXT: psubd %xmm0, %xmm2
-; SSE2-64-NEXT: movq %xmm2, (%rdi)
+; SSE2-64-NEXT: pxor %xmm0, %xmm1
+; SSE2-64-NEXT: psubd %xmm0, %xmm1
+; SSE2-64-NEXT: movq %xmm1, (%rdi)
; SSE2-64-NEXT: retq
;
; SSE41-32-LABEL: pr60168_buildvector_of_zeros_and_undef:
diff --git a/llvm/test/CodeGen/X86/freeze-vector.ll b/llvm/test/CodeGen/X86/freeze-vector.ll
index 4139d974d8308..5f060fb0d4d93 100644
--- a/llvm/test/CodeGen/X86/freeze-vector.ll
+++ b/llvm/test/CodeGen/X86/freeze-vector.ll
@@ -353,11 +353,9 @@ define void @freeze_two_frozen_buildvectors(ptr %origin0, ptr %origin1, ptr %dst
; X86-NEXT: vmovdqa {{.*#+}} xmm1 = [7,7,7,7]
; X86-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-NEXT: vmovdqa %xmm0, (%ecx)
-; X86-NEXT: vmovd %eax, %xmm0
-; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
-; X86-NEXT: vmovd %edx, %xmm2
-; X86-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
-; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5],xmm0[6,7]
+; X86-NEXT: vmovd %edx, %xmm0
+; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7]
; X86-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-NEXT: vmovdqa %xmm0, (%eax)
; X86-NEXT: retl
@@ -372,9 +370,7 @@ define void @freeze_two_frozen_buildvectors(ptr %origin0, ptr %origin1, ptr %dst
; X64-NEXT: vmovdqa %xmm0, (%rdx)
; X64-NEXT: vmovd %eax, %xmm0
; X64-NEXT: vpbroadcastd %xmm0, %xmm0
-; X64-NEXT: vmovd %eax, %xmm2
-; X64-NEXT: vpbroadcastd %xmm2, %xmm2
-; X64-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm2[2],xmm0[3]
+; X64-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1,2,3]
; X64-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rcx)
; X64-NEXT: retq
@@ -401,15 +397,13 @@ define void @freeze_two_buildvectors_only_one_frozen(ptr %origin0, ptr %origin1,
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl (%edx), %edx
; X86-NEXT: andl $15, %edx
-; X86-NEXT: vmovd %eax, %xmm0
-; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
-; X86-NEXT: vmovd %edx, %xmm1
-; X86-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,0,1,1]
-; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5,6,7]
+; X86-NEXT: vmovd %edx, %xmm0
+; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
+; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
; X86-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7]
-; X86-NEXT: vpand %xmm2, %xmm0, %xmm0
-; X86-NEXT: vmovdqa %xmm0, (%ecx)
-; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
+; X86-NEXT: vpand %xmm2, %xmm1, %xmm1
+; X86-NEXT: vmovdqa %xmm1, (%ecx)
+; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; X86-NEXT: vpand %xmm2, %xmm0, %xmm0
; X86-NEXT: vmovdqa %xmm0, (%eax)
; X86-NEXT: retl
@@ -420,13 +414,11 @@ define void @freeze_two_buildvectors_only_one_frozen(ptr %origin0, ptr %origin1,
; X64-NEXT: andl $15, %eax
; X64-NEXT: vmovd %eax, %xmm0
; X64-NEXT: vpbroadcastd %xmm0, %xmm0
-; X64-NEXT: vmovd %eax, %xmm1
-; X64-NEXT: vpbroadcastd %xmm1, %xmm1
-; X64-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
+; X64-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0,1,2,3]
; X64-NEXT: vpbroadcastd {{.*#+}} xmm2 = [7,7,7,7]
+; X64-NEXT: vpand %xmm2, %xmm1, %xmm1
+; X64-NEXT: vmovdqa %xmm1, (%rdx)
; X64-NEXT: vpand %xmm2, %xmm0, %xmm0
-; X64-NEXT: vmovdqa %xmm0, (%rdx)
-; X64-NEXT: vpand %xmm2, %xmm1, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rcx)
; X64-NEXT: retq
%i0.src = load i32, ptr %origin0
@@ -451,14 +443,13 @@ define void @freeze_two_buildvectors_one_undef_elt(ptr %origin0, ptr %origin1, p
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl (%edx), %edx
; X86-NEXT: andl $15, %edx
-; X86-NEXT: vmovd %eax, %xmm0
-; X86-NEXT: vmovd %edx, %xmm1
-; X86-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; X86-NEXT: vmovd %edx, %xmm0
+; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,6,7]
; X86-NEXT: vmovddup {{.*#+}} xmm2 = [7,7]
; X86-NEXT: # xmm2 = mem[0,0]
-; X86-NEXT: vpand %xmm2, %xmm0, %xmm0
-; X86-NEXT: vmovdqa %xmm0, (%ecx)
-; X86-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
+; X86-NEXT: vpand %xmm2, %xmm1, %xmm1
+; X86-NEXT: vmovdqa %xmm1, (%ecx)
+; X86-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; X86-NEXT: vpand %xmm2, %xmm0, %xmm0
; X86-NEXT: vmovdqa %xmm0, (%eax)
; X86-NEXT: retl
More information about the llvm-commits
mailing list