[llvm] ae0f41d - [X86] LowerBUILD_VECTOR - fold build_vector(undef,freeze(undef),zero) -> zero vector
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 26 02:45:27 PDT 2023
Author: Simon Pilgrim
Date: 2023-04-26T10:44:58+01:00
New Revision: ae0f41d186e6e851161fe0bad37a1c207b845859
URL: https://github.com/llvm/llvm-project/commit/ae0f41d186e6e851161fe0bad37a1c207b845859
DIFF: https://github.com/llvm/llvm-project/commit/ae0f41d186e6e851161fe0bad37a1c207b845859.diff
LOG: [X86] LowerBUILD_VECTOR - fold build_vector(undef,freeze(undef),zero) -> zero vector
426db6b4eb2e9298598 added the build_vector(undef,freeze(undef)) -> freeze(undef) fold, but failed to account for cases where the scalar freeze(undef) had multiple uses, in those cases we can only only safely fold to a zero vector
https://alive2.llvm.org/ce/z/87jG8K
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/build-vector-128.ll
llvm/test/CodeGen/X86/freeze-vector.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d5fdd233d82b0..90b3f79e81fb1 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -11231,6 +11231,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
APInt ZeroMask = APInt::getZero(NumElems);
APInt NonZeroMask = APInt::getZero(NumElems);
bool IsAllConstants = true;
+ bool OneUseFrozenUndefs = true;
SmallSet<SDValue, 8> Values;
unsigned NumConstants = NumElems;
for (unsigned i = 0; i < NumElems; ++i) {
@@ -11239,7 +11240,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
UndefMask.setBit(i);
continue;
}
- if (Elt.getOpcode() == ISD::FREEZE && Elt.getOperand(0).isUndef()) {
+ if (ISD::isFreezeUndef(Elt.getNode())) {
+ OneUseFrozenUndefs = OneUseFrozenUndefs && Elt->hasOneUse();
FrozenUndefMask.setBit(i);
continue;
}
@@ -11259,10 +11261,14 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (UndefMask.isAllOnes())
return DAG.getUNDEF(VT);
- // All undef/freeze(undef) vector. Return an FREEZE UNDEF.
- if ((UndefMask | FrozenUndefMask).isAllOnes())
+ // All undef/freeze(undef) vector. Return a FREEZE UNDEF.
+ if (OneUseFrozenUndefs && (UndefMask | FrozenUndefMask).isAllOnes())
return DAG.getFreeze(DAG.getUNDEF(VT));
+ // All undef/freeze(undef)/zero vector. Return a zero vector.
+ if ((UndefMask | FrozenUndefMask | ZeroMask).isAllOnes())
+ return getZeroVector(VT, Subtarget, DAG, dl);
+
// If we have multiple FREEZE-UNDEF operands, we are likely going to end up
// lowering into a suboptimal insertion sequence. Instead, thaw the UNDEF in
// our source BUILD_VECTOR, create another FREEZE-UNDEF splat BUILD_VECTOR,
diff --git a/llvm/test/CodeGen/X86/build-vector-128.ll b/llvm/test/CodeGen/X86/build-vector-128.ll
index df664a92425a0..30f55a1d41922 100644
--- a/llvm/test/CodeGen/X86/build-vector-128.ll
+++ b/llvm/test/CodeGen/X86/build-vector-128.ll
@@ -556,9 +556,8 @@ define void @pr60168_buildvector_of_zeros_and_undef(<2 x i32> %x, ptr %out) {
; SSE2-32-LABEL: pr60168_buildvector_of_zeros_and_undef:
; SSE2-32: # %bb.0:
; SSE2-32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SSE2-32-NEXT: xorpd %xmm1, %xmm1
-; SSE2-32-NEXT: movsd {{.*#+}} xmm1 = xmm1[0,1]
; SSE2-32-NEXT: paddd %xmm0, %xmm0
+; SSE2-32-NEXT: pxor %xmm1, %xmm1
; SSE2-32-NEXT: psubd %xmm0, %xmm1
; SSE2-32-NEXT: movdqa %xmm1, %xmm0
; SSE2-32-NEXT: psrad $31, %xmm0
@@ -569,9 +568,8 @@ define void @pr60168_buildvector_of_zeros_and_undef(<2 x i32> %x, ptr %out) {
;
; SSE2-64-LABEL: pr60168_buildvector_of_zeros_and_undef:
; SSE2-64: # %bb.0:
-; SSE2-64-NEXT: xorpd %xmm1, %xmm1
-; SSE2-64-NEXT: movsd {{.*#+}} xmm1 = xmm1[0,1]
; SSE2-64-NEXT: paddd %xmm0, %xmm0
+; SSE2-64-NEXT: pxor %xmm1, %xmm1
; SSE2-64-NEXT: psubd %xmm0, %xmm1
; SSE2-64-NEXT: movdqa %xmm1, %xmm0
; SSE2-64-NEXT: psrad $31, %xmm0
diff --git a/llvm/test/CodeGen/X86/freeze-vector.ll b/llvm/test/CodeGen/X86/freeze-vector.ll
index 5f060fb0d4d93..45587f1f33ece 100644
--- a/llvm/test/CodeGen/X86/freeze-vector.ll
+++ b/llvm/test/CodeGen/X86/freeze-vector.ll
@@ -355,7 +355,8 @@ define void @freeze_two_frozen_buildvectors(ptr %origin0, ptr %origin1, ptr %dst
; X86-NEXT: vmovdqa %xmm0, (%ecx)
; X86-NEXT: vmovd %edx, %xmm0
; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
-; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7]
+; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5],xmm2[6,7]
; X86-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-NEXT: vmovdqa %xmm0, (%eax)
; X86-NEXT: retl
@@ -370,7 +371,8 @@ define void @freeze_two_frozen_buildvectors(ptr %origin0, ptr %origin1, ptr %dst
; X64-NEXT: vmovdqa %xmm0, (%rdx)
; X64-NEXT: vmovd %eax, %xmm0
; X64-NEXT: vpbroadcastd %xmm0, %xmm0
-; X64-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1,2,3]
+; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; X64-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm0[2],xmm2[3]
; X64-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rcx)
; X64-NEXT: retq
@@ -397,13 +399,14 @@ define void @freeze_two_buildvectors_only_one_frozen(ptr %origin0, ptr %origin1,
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl (%edx), %edx
; X86-NEXT: andl $15, %edx
-; X86-NEXT: vmovd %edx, %xmm0
-; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
-; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
+; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; X86-NEXT: vmovd %edx, %xmm1
+; X86-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,0,1,1]
+; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5,6,7]
; X86-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7]
-; X86-NEXT: vpand %xmm2, %xmm1, %xmm1
-; X86-NEXT: vmovdqa %xmm1, (%ecx)
-; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; X86-NEXT: vpand %xmm2, %xmm0, %xmm0
+; X86-NEXT: vmovdqa %xmm0, (%ecx)
+; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
; X86-NEXT: vpand %xmm2, %xmm0, %xmm0
; X86-NEXT: vmovdqa %xmm0, (%eax)
; X86-NEXT: retl
@@ -412,13 +415,14 @@ define void @freeze_two_buildvectors_only_one_frozen(ptr %origin0, ptr %origin1,
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: andl $15, %eax
-; X64-NEXT: vmovd %eax, %xmm0
-; X64-NEXT: vpbroadcastd %xmm0, %xmm0
-; X64-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0,1,2,3]
+; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; X64-NEXT: vmovd %eax, %xmm1
+; X64-NEXT: vpbroadcastd %xmm1, %xmm1
+; X64-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
; X64-NEXT: vpbroadcastd {{.*#+}} xmm2 = [7,7,7,7]
-; X64-NEXT: vpand %xmm2, %xmm1, %xmm1
-; X64-NEXT: vmovdqa %xmm1, (%rdx)
; X64-NEXT: vpand %xmm2, %xmm0, %xmm0
+; X64-NEXT: vmovdqa %xmm0, (%rdx)
+; X64-NEXT: vpand %xmm2, %xmm1, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rcx)
; X64-NEXT: retq
%i0.src = load i32, ptr %origin0
@@ -443,14 +447,13 @@ define void @freeze_two_buildvectors_one_undef_elt(ptr %origin0, ptr %origin1, p
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl (%edx), %edx
; X86-NEXT: andl $15, %edx
-; X86-NEXT: vmovd %edx, %xmm0
-; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,6,7]
-; X86-NEXT: vmovddup {{.*#+}} xmm2 = [7,7]
-; X86-NEXT: # xmm2 = mem[0,0]
-; X86-NEXT: vpand %xmm2, %xmm1, %xmm1
-; X86-NEXT: vmovdqa %xmm1, (%ecx)
-; X86-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
-; X86-NEXT: vpand %xmm2, %xmm0, %xmm0
+; X86-NEXT: vmovddup {{.*#+}} xmm0 = [7,7]
+; X86-NEXT: # xmm0 = mem[0,0]
+; X86-NEXT: vmovd %edx, %xmm1
+; X86-NEXT: vpand %xmm0, %xmm1, %xmm2
+; X86-NEXT: vmovdqa %xmm2, (%ecx)
+; X86-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
+; X86-NEXT: vpand %xmm0, %xmm1, %xmm0
; X86-NEXT: vmovdqa %xmm0, (%eax)
; X86-NEXT: retl
;
More information about the llvm-commits
mailing list