[llvm] 46a929f - [SelectionDAG] Fix isKnownNeverZeroFloat for vectors (#78308)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 16 21:56:00 PST 2024
Author: Danila Malyutin
Date: 2024-01-17T12:55:57+07:00
New Revision: 46a929f0a077186034d5692b013730578a2a2d36
URL: https://github.com/llvm/llvm-project/commit/46a929f0a077186034d5692b013730578a2a2d36
DIFF: https://github.com/llvm/llvm-project/commit/46a929f0a077186034d5692b013730578a2a2d36.diff
LOG: [SelectionDAG] Fix isKnownNeverZeroFloat for vectors (#78308)
Return true iff all of vector elements are constant AND not zero
Fixes #77805
Previously, it'd return `true` (as in - the value is known to be never
zero) for any build_vector/splat_vector with non-constant elements.
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
llvm/test/CodeGen/X86/fminimum-fmaximum.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 01d31806c8442fe..2123f3d3b688567 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5233,22 +5233,8 @@ bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const {
"Floating point type expected");
// If the value is a constant, we can obviously see if it is a zero or not.
- if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
- return !C->isZero();
-
- // Return false if we find any zero in a vector.
- if (Op->getOpcode() == ISD::BUILD_VECTOR ||
- Op->getOpcode() == ISD::SPLAT_VECTOR) {
- for (const SDValue &OpVal : Op->op_values()) {
- if (OpVal.isUndef())
- return false;
- if (auto *C = dyn_cast<ConstantFPSDNode>(OpVal))
- if (C->isZero())
- return false;
- }
- return true;
- }
- return false;
+ return ISD::matchUnaryFpPredicate(
+ Op, [](ConstantFPSDNode *C) { return !C->isZero(); });
}
bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
diff --git a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
index a279f1e937e6955..b927f92897a20bc 100644
--- a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
+++ b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
@@ -1328,20 +1328,31 @@ define <4 x float> @test_fmaximum_v4f32_splat(<4 x float> %x, float %y) {
; SSE2-LABEL: test_fmaximum_v4f32_splat:
; SSE2: # %bb.0:
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; SSE2-NEXT: movaps %xmm0, %xmm2
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm3
+; SSE2-NEXT: pandn %xmm0, %xmm3
+; SSE2-NEXT: movaps %xmm1, %xmm4
+; SSE2-NEXT: andps %xmm2, %xmm4
+; SSE2-NEXT: orps %xmm3, %xmm4
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: andnps %xmm1, %xmm2
+; SSE2-NEXT: por %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: maxps %xmm4, %xmm1
+; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: cmpunordps %xmm0, %xmm2
-; SSE2-NEXT: movaps %xmm0, %xmm3
-; SSE2-NEXT: andps %xmm2, %xmm3
-; SSE2-NEXT: maxps %xmm1, %xmm0
-; SSE2-NEXT: andnps %xmm0, %xmm2
-; SSE2-NEXT: orps %xmm3, %xmm2
-; SSE2-NEXT: movaps %xmm2, %xmm0
+; SSE2-NEXT: andps %xmm2, %xmm0
+; SSE2-NEXT: andnps %xmm1, %xmm2
+; SSE2-NEXT: orps %xmm2, %xmm0
; SSE2-NEXT: retq
;
; AVX1-LABEL: test_fmaximum_v4f32_splat:
; AVX1: # %bb.0:
; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; AVX1-NEXT: vmaxps %xmm1, %xmm0, %xmm1
+; AVX1-NEXT: vblendvps %xmm0, %xmm1, %xmm0, %xmm2
+; AVX1-NEXT: vblendvps %xmm0, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vmaxps %xmm2, %xmm0, %xmm1
; AVX1-NEXT: vcmpunordps %xmm0, %xmm0, %xmm2
; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; AVX1-NEXT: retq
@@ -1349,7 +1360,9 @@ define <4 x float> @test_fmaximum_v4f32_splat(<4 x float> %x, float %y) {
; AVX512-LABEL: test_fmaximum_v4f32_splat:
; AVX512: # %bb.0:
; AVX512-NEXT: vbroadcastss %xmm1, %xmm1
-; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: vblendvps %xmm0, %xmm1, %xmm0, %xmm2
+; AVX512-NEXT: vblendvps %xmm0, %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmaxps %xmm2, %xmm0, %xmm1
; AVX512-NEXT: vcmpunordps %xmm0, %xmm0, %xmm2
; AVX512-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; AVX512-NEXT: retq
@@ -1357,7 +1370,9 @@ define <4 x float> @test_fmaximum_v4f32_splat(<4 x float> %x, float %y) {
; X86-LABEL: test_fmaximum_v4f32_splat:
; X86: # %bb.0:
; X86-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm1
-; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm1
+; X86-NEXT: vblendvps %xmm0, %xmm1, %xmm0, %xmm2
+; X86-NEXT: vblendvps %xmm0, %xmm0, %xmm1, %xmm0
+; X86-NEXT: vmaxps %xmm2, %xmm0, %xmm1
; X86-NEXT: vcmpunordps %xmm0, %xmm0, %xmm2
; X86-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
; X86-NEXT: retl
More information about the llvm-commits
mailing list