[llvm] [SelectionDAG] Fix isKnownNeverZeroFloat for vectors (PR #78308)
Danila Malyutin via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 16 08:26:35 PST 2024
https://github.com/danilaml created https://github.com/llvm/llvm-project/pull/78308
Return true iff all of vector elements are constant AND not zero
Fixes #77805
Previously, it'd return `true` (as in - the value is known to be never zero) for any build_vector/splat_vector with non-constant elements.
>From 3874940fdf4774b4b29828da85835d1bd0339f3d Mon Sep 17 00:00:00 2001
From: Danila Malyutin <dmalyutin at azul.com>
Date: Tue, 16 Jan 2024 20:05:56 +0400
Subject: [PATCH] [SelectionDAG] Fix isKnownNeverZeroFloat for vectors
Return true iff all of vector elements are constant AND not zero
Fixes #77805
---
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 11 ++--
llvm/test/CodeGen/X86/fminimum-fmaximum.ll | 60 +++++++++++++++++++
2 files changed, 64 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 01d31806c8442f..8bd39db82e1b0b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5239,14 +5239,11 @@ bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const {
// Return false if we find any zero in a vector.
if (Op->getOpcode() == ISD::BUILD_VECTOR ||
Op->getOpcode() == ISD::SPLAT_VECTOR) {
- for (const SDValue &OpVal : Op->op_values()) {
- if (OpVal.isUndef())
- return false;
+ return llvm::all_of(Op->op_values(), [](const SDValue &OpVal){
if (auto *C = dyn_cast<ConstantFPSDNode>(OpVal))
- if (C->isZero())
- return false;
- }
- return true;
+ return !C->isZero();
+ return false;
+ });
}
return false;
}
diff --git a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
index 8905d2bce5e92a..442d1d29590807 100644
--- a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
+++ b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
@@ -1322,3 +1322,63 @@ define <4 x float> @test_fmaximum_vector_zero(<4 x float> %x) {
%r = call <4 x float> @llvm.maximum.v4f32(<4 x float> %x, <4 x float> <float 0., float 0., float 0., float 0.>)
ret <4 x float> %r
}
+
+; Check that signed zeroes are handled correctly in this case
+define <4 x float> @test_fmaximum_v4f32_splat(<4 x float> %x, float %y) {
+; SSE2-LABEL: test_fmaximum_v4f32_splat:
+; SSE2: # %bb.0:
+; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
+; SSE2-NEXT: pxor %xmm2, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
+; SSE2-NEXT: movdqa %xmm2, %xmm3
+; SSE2-NEXT: pandn %xmm0, %xmm3
+; SSE2-NEXT: movaps %xmm1, %xmm4
+; SSE2-NEXT: andps %xmm2, %xmm4
+; SSE2-NEXT: orps %xmm3, %xmm4
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: andnps %xmm1, %xmm2
+; SSE2-NEXT: por %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: maxps %xmm4, %xmm1
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: cmpunordps %xmm0, %xmm2
+; SSE2-NEXT: andps %xmm2, %xmm0
+; SSE2-NEXT: andnps %xmm1, %xmm2
+; SSE2-NEXT: orps %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; AVX1-LABEL: test_fmaximum_v4f32_splat:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
+; AVX1-NEXT: vblendvps %xmm0, %xmm1, %xmm0, %xmm2
+; AVX1-NEXT: vblendvps %xmm0, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vmaxps %xmm2, %xmm0, %xmm1
+; AVX1-NEXT: vcmpunordps %xmm0, %xmm0, %xmm2
+; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX512-LABEL: test_fmaximum_v4f32_splat:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vbroadcastss %xmm1, %xmm1
+; AVX512-NEXT: vblendvps %xmm0, %xmm1, %xmm0, %xmm2
+; AVX512-NEXT: vblendvps %xmm0, %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vmaxps %xmm2, %xmm0, %xmm1
+; AVX512-NEXT: vcmpunordps %xmm0, %xmm0, %xmm2
+; AVX512-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: retq
+;
+; X86-LABEL: test_fmaximum_v4f32_splat:
+; X86: # %bb.0:
+; X86-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm1
+; X86-NEXT: vblendvps %xmm0, %xmm1, %xmm0, %xmm2
+; X86-NEXT: vblendvps %xmm0, %xmm0, %xmm1, %xmm0
+; X86-NEXT: vmaxps %xmm2, %xmm0, %xmm1
+; X86-NEXT: vcmpunordps %xmm0, %xmm0, %xmm2
+; X86-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; X86-NEXT: retl
+ %splatinsert = insertelement <4 x float> poison, float %y, i64 0
+ %vec = shufflevector <4 x float> %splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
+ %r = call <4 x float> @llvm.maximum.v4f32(<4 x float> %x, <4 x float> %vec) readnone
+ ret <4 x float> %r
+}
+
More information about the llvm-commits
mailing list