[llvm] 1eecf03 - [X86] `LowerBUILD_VECTOR()`: fix all-UNDEF detection
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 20 09:22:27 PST 2023
Author: Roman Lebedev
Date: 2023-01-20T20:21:26+03:00
New Revision: 1eecf03919cdded5853b4aace67650e8c03a271b
URL: https://github.com/llvm/llvm-project/commit/1eecf03919cdded5853b4aace67650e8c03a271b
DIFF: https://github.com/llvm/llvm-project/commit/1eecf03919cdded5853b4aace67650e8c03a271b.diff
LOG: [X86] `LowerBUILD_VECTOR()`: fix all-UNDEF detection
The original check was trying to avoid checking UndefMask itself,
and deduce it via simpler means, but checking `NonZeroMask`
does not, e.g., check `ZeroMask`.
Fixes https://github.com/llvm/llvm-project/issues/60168
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/build-vector-128.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c88c66d8b2edc..411462f8ff07b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -11166,19 +11166,17 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
}
}
- // All undef vector. Return an UNDEF. All zero vectors were handled above.
- unsigned NumFrozenUndefElts = FrozenUndefMask.countPopulation();
- if (NonZeroMask == 0 && NumFrozenUndefElts != NumElems) {
- assert(UndefMask.isAllOnes() && "Fully undef mask expected");
+ // All undef vector. Return an UNDEF.
+ if (UndefMask.isAllOnes())
return DAG.getUNDEF(VT);
- }
// If we have multiple FREEZE-UNDEF operands, we are likely going to end up
// lowering into a suboptimal insertion sequence. Instead, thaw the UNDEF in
// our source BUILD_VECTOR, create another FREEZE-UNDEF splat BUILD_VECTOR,
// and blend the FREEZE-UNDEF operands back in.
// FIXME: is this worthwhile even for a single FREEZE-UNDEF operand?
- if (NumFrozenUndefElts >= 2 && NumFrozenUndefElts < NumElems) {
+ if (unsigned NumFrozenUndefElts = FrozenUndefMask.countPopulation();
+ NumFrozenUndefElts >= 2 && NumFrozenUndefElts < NumElems) {
SmallVector<int, 16> BlendMask(NumElems, -1);
SmallVector<SDValue, 16> Elts(NumElems, DAG.getUNDEF(OpEltVT));
for (unsigned i = 0; i < NumElems; ++i) {
diff --git a/llvm/test/CodeGen/X86/build-vector-128.ll b/llvm/test/CodeGen/X86/build-vector-128.ll
index 7b03005f08056..a14884a476bee 100644
--- a/llvm/test/CodeGen/X86/build-vector-128.ll
+++ b/llvm/test/CodeGen/X86/build-vector-128.ll
@@ -552,3 +552,76 @@ define <4 x float> @PR37502(float %x, float %y) {
ret <4 x float> %i3
}
+define void @pr60168_buildvector_of_zeros_and_undef(<2 x i32> %x, ptr %out) {
+; SSE2-32-LABEL: pr60168_buildvector_of_zeros_and_undef:
+; SSE2-32: # %bb.0:
+; SSE2-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE2-32-NEXT: movd %eax, %xmm1
+; SSE2-32-NEXT: xorps %xmm2, %xmm2
+; SSE2-32-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[0,0]
+; SSE2-32-NEXT: paddd %xmm0, %xmm0
+; SSE2-32-NEXT: psubd %xmm0, %xmm2
+; SSE2-32-NEXT: movdqa %xmm2, %xmm0
+; SSE2-32-NEXT: psrad $31, %xmm0
+; SSE2-32-NEXT: pxor %xmm0, %xmm2
+; SSE2-32-NEXT: psubd %xmm0, %xmm2
+; SSE2-32-NEXT: movq %xmm2, (%eax)
+; SSE2-32-NEXT: retl
+;
+; SSE2-64-LABEL: pr60168_buildvector_of_zeros_and_undef:
+; SSE2-64: # %bb.0:
+; SSE2-64-NEXT: movd %eax, %xmm1
+; SSE2-64-NEXT: xorps %xmm2, %xmm2
+; SSE2-64-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[0,0]
+; SSE2-64-NEXT: paddd %xmm0, %xmm0
+; SSE2-64-NEXT: psubd %xmm0, %xmm2
+; SSE2-64-NEXT: movdqa %xmm2, %xmm0
+; SSE2-64-NEXT: psrad $31, %xmm0
+; SSE2-64-NEXT: pxor %xmm0, %xmm2
+; SSE2-64-NEXT: psubd %xmm0, %xmm2
+; SSE2-64-NEXT: movq %xmm2, (%rdi)
+; SSE2-64-NEXT: retq
+;
+; SSE41-32-LABEL: pr60168_buildvector_of_zeros_and_undef:
+; SSE41-32: # %bb.0:
+; SSE41-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE41-32-NEXT: paddd %xmm0, %xmm0
+; SSE41-32-NEXT: pxor %xmm1, %xmm1
+; SSE41-32-NEXT: psubd %xmm0, %xmm1
+; SSE41-32-NEXT: pabsd %xmm1, %xmm0
+; SSE41-32-NEXT: movq %xmm0, (%eax)
+; SSE41-32-NEXT: retl
+;
+; SSE41-64-LABEL: pr60168_buildvector_of_zeros_and_undef:
+; SSE41-64: # %bb.0:
+; SSE41-64-NEXT: paddd %xmm0, %xmm0
+; SSE41-64-NEXT: pxor %xmm1, %xmm1
+; SSE41-64-NEXT: psubd %xmm0, %xmm1
+; SSE41-64-NEXT: pabsd %xmm1, %xmm0
+; SSE41-64-NEXT: movq %xmm0, (%rdi)
+; SSE41-64-NEXT: retq
+;
+; AVX-32-LABEL: pr60168_buildvector_of_zeros_and_undef:
+; AVX-32: # %bb.0:
+; AVX-32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; AVX-32-NEXT: vpaddd %xmm0, %xmm0, %xmm0
+; AVX-32-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX-32-NEXT: vpsubd %xmm0, %xmm1, %xmm0
+; AVX-32-NEXT: vpabsd %xmm0, %xmm0
+; AVX-32-NEXT: vmovq %xmm0, (%eax)
+; AVX-32-NEXT: retl
+;
+; AVX-64-LABEL: pr60168_buildvector_of_zeros_and_undef:
+; AVX-64: # %bb.0:
+; AVX-64-NEXT: vpaddd %xmm0, %xmm0, %xmm0
+; AVX-64-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX-64-NEXT: vpsubd %xmm0, %xmm1, %xmm0
+; AVX-64-NEXT: vpabsd %xmm0, %xmm0
+; AVX-64-NEXT: vmovq %xmm0, (%rdi)
+; AVX-64-NEXT: retq
+ %i2 = mul <2 x i32> %x, <i32 -2, i32 -2>
+ %i3 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %i2, i1 false)
+ store <2 x i32> %i3, ptr %out
+ ret void
+}
+declare <2 x i32> @llvm.abs.v2i32(<2 x i32>, i1 immarg)
More information about the llvm-commits
mailing list