[llvm] APFloat: Fix signed zero handling in minnum/maxnum (PR #83376)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 29 02:51:33 PST 2024
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/83376
>From 29f6732b4f99b4bf891a56d5484a4b173febf419 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 29 Feb 2024 10:02:51 +0530
Subject: [PATCH 1/3] APFloat: Fix signed zero handling in minnum/maxnum
Follow the 2019 rules and order -0 as less than +0 and +0 as greater than
-0. As currently defined this isn't required for the intrinsics, but is
a better QoI.
This will avoid the workaround in libc added by #83158
---
llvm/include/llvm/ADT/APFloat.h | 4 ++
llvm/test/CodeGen/AMDGPU/fmaxnum.ll | 2 +-
llvm/test/CodeGen/AMDGPU/fminnum.ll | 2 +-
llvm/test/Transforms/InstCombine/maxnum.ll | 2 +-
llvm/test/Transforms/InstCombine/minnum.ll | 4 +-
.../InstSimplify/ConstProp/min-max.ll | 68 ++++++++++++++++++-
llvm/unittests/ADT/APFloatTest.cpp | 10 +++
7 files changed, 85 insertions(+), 7 deletions(-)
diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h
index 8c247bbcec90a2..598d00f592f8c0 100644
--- a/llvm/include/llvm/ADT/APFloat.h
+++ b/llvm/include/llvm/ADT/APFloat.h
@@ -1397,6 +1397,8 @@ inline APFloat minnum(const APFloat &A, const APFloat &B) {
return B;
if (B.isNaN())
return A;
+ if (A.isZero() && B.isZero() && (A.isNegative() != B.isNegative()))
+ return A.isNegative() ? A : B;
return B < A ? B : A;
}
@@ -1408,6 +1410,8 @@ inline APFloat maxnum(const APFloat &A, const APFloat &B) {
return B;
if (B.isNaN())
return A;
+ if (A.isZero() && B.isZero() && (A.isNegative() != B.isNegative()))
+ return A.isNegative() ? B : A;
return A < B ? B : A;
}
diff --git a/llvm/test/CodeGen/AMDGPU/fmaxnum.ll b/llvm/test/CodeGen/AMDGPU/fmaxnum.ll
index 09898f1442fb82..38640a18b5aee6 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaxnum.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaxnum.ll
@@ -152,7 +152,7 @@ define amdgpu_kernel void @constant_fold_fmax_f32_p0_n0(ptr addrspace(1) %out) #
; GCN-LABEL: {{^}}constant_fold_fmax_f32_n0_p0:
; GCN-NOT: v_max_f32_e32
-; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
; GCN: buffer_store_dword [[REG]]
define amdgpu_kernel void @constant_fold_fmax_f32_n0_p0(ptr addrspace(1) %out) #0 {
%val = call float @llvm.maxnum.f32(float -0.0, float 0.0)
diff --git a/llvm/test/CodeGen/AMDGPU/fminnum.ll b/llvm/test/CodeGen/AMDGPU/fminnum.ll
index 844d26a6225b40..65b311845a6b77 100644
--- a/llvm/test/CodeGen/AMDGPU/fminnum.ll
+++ b/llvm/test/CodeGen/AMDGPU/fminnum.ll
@@ -150,7 +150,7 @@ define amdgpu_kernel void @constant_fold_fmin_f32_p0_p0(ptr addrspace(1) %out) #
; GCN-LABEL: {{^}}constant_fold_fmin_f32_p0_n0:
; GCN-NOT: v_min_f32_e32
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0
+; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
; GCN: buffer_store_dword [[REG]]
define amdgpu_kernel void @constant_fold_fmin_f32_p0_n0(ptr addrspace(1) %out) #0 {
%val = call float @llvm.minnum.f32(float 0.0, float -0.0)
diff --git a/llvm/test/Transforms/InstCombine/maxnum.ll b/llvm/test/Transforms/InstCombine/maxnum.ll
index 87288b18cbcd9f..e140a5b405ea84 100644
--- a/llvm/test/Transforms/InstCombine/maxnum.ll
+++ b/llvm/test/Transforms/InstCombine/maxnum.ll
@@ -66,7 +66,7 @@ define float @constant_fold_maxnum_f32_p0_n0() {
define float @constant_fold_maxnum_f32_n0_p0() {
; CHECK-LABEL: @constant_fold_maxnum_f32_n0_p0(
-; CHECK-NEXT: ret float -0.000000e+00
+; CHECK-NEXT: ret float 0.000000e+00
;
%x = call float @llvm.maxnum.f32(float -0.0, float 0.0)
ret float %x
diff --git a/llvm/test/Transforms/InstCombine/minnum.ll b/llvm/test/Transforms/InstCombine/minnum.ll
index 8050f075595272..cc6171b9d8e6cb 100644
--- a/llvm/test/Transforms/InstCombine/minnum.ll
+++ b/llvm/test/Transforms/InstCombine/minnum.ll
@@ -60,7 +60,7 @@ define float @constant_fold_minnum_f32_p0_p0() {
define float @constant_fold_minnum_f32_p0_n0() {
; CHECK-LABEL: @constant_fold_minnum_f32_p0_n0(
-; CHECK-NEXT: ret float 0.000000e+00
+; CHECK-NEXT: ret float -0.000000e+00
;
%x = call float @llvm.minnum.f32(float 0.0, float -0.0)
ret float %x
@@ -199,7 +199,7 @@ define float @minnum_f32_1_minnum_p0_val_fmf3(float %x) {
define float @minnum_f32_p0_minnum_val_n0(float %x) {
; CHECK-LABEL: @minnum_f32_p0_minnum_val_n0(
-; CHECK-NEXT: [[Z:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float 0.000000e+00)
+; CHECK-NEXT: [[Z:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float -0.000000e+00)
; CHECK-NEXT: ret float [[Z]]
;
%y = call float @llvm.minnum.f32(float %x, float -0.0)
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/min-max.ll b/llvm/test/Transforms/InstSimplify/ConstProp/min-max.ll
index a5f5d4e12ed842..9120649eb5c4f1 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/min-max.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/min-max.ll
@@ -49,6 +49,38 @@ define float @minnum_float() {
ret float %1
}
+define float @minnum_float_p0_n0() {
+; CHECK-LABEL: @minnum_float_p0_n0(
+; CHECK-NEXT: ret float -0.000000e+00
+;
+ %min = call float @llvm.minnum.f32(float 0.0, float -0.0)
+ ret float %min
+}
+
+define float @minnum_float_n0_p0() {
+; CHECK-LABEL: @minnum_float_n0_p0(
+; CHECK-NEXT: ret float -0.000000e+00
+;
+ %min = call float @llvm.minnum.f32(float -0.0, float 0.0)
+ ret float %min
+}
+
+define float @minnum_float_p0_qnan() {
+; CHECK-LABEL: @minnum_float_p0_qnan(
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %min = call float @llvm.minnum.f32(float 0.0, float 0x7FF8000000000000)
+ ret float %min
+}
+
+define float @minnum_float_qnan_p0() {
+; CHECK-LABEL: @minnum_float_qnan_p0(
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %min = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 0.0)
+ ret float %min
+}
+
define bfloat @minnum_bfloat() {
; CHECK-LABEL: @minnum_bfloat(
; CHECK-NEXT: ret bfloat 0xR40A0
@@ -95,7 +127,7 @@ define <4 x half> @minnum_half_vec() {
define <4 x float> @minnum_float_zeros_vec() {
; CHECK-LABEL: @minnum_float_zeros_vec(
-; CHECK-NEXT: ret <4 x float> <float 0.000000e+00, float -0.000000e+00, float 0.000000e+00, float -0.000000e+00>
+; CHECK-NEXT: ret <4 x float> <float 0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>
;
%1 = call <4 x float> @llvm.minnum.v4f32(<4 x float> <float 0.0, float -0.0, float 0.0, float -0.0>, <4 x float> <float 0.0, float 0.0, float -0.0, float -0.0>)
ret <4 x float> %1
@@ -109,6 +141,38 @@ define float @maxnum_float() {
ret float %1
}
+define float @maxnum_float_p0_n0() {
+; CHECK-LABEL: @maxnum_float_p0_n0(
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %max = call float @llvm.maxnum.f32(float 0.0, float -0.0)
+ ret float %max
+}
+
+define float @maxnum_float_n0_p0() {
+; CHECK-LABEL: @maxnum_float_n0_p0(
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %max = call float @llvm.maxnum.f32(float -0.0, float 0.0)
+ ret float %max
+}
+
+define float @maxnum_float_p0_qnan() {
+; CHECK-LABEL: @maxnum_float_p0_qnan(
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %max = call float @llvm.maxnum.f32(float 0.0, float 0x7FF8000000000000)
+ ret float %max
+}
+
+define float @maxnum_float_qnan_p0() {
+; CHECK-LABEL: @maxnum_float_qnan_p0(
+; CHECK-NEXT: ret float 0.000000e+00
+;
+ %max = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 0.0)
+ ret float %max
+}
+
define bfloat @maxnum_bfloat() {
; CHECK-LABEL: @maxnum_bfloat(
; CHECK-NEXT: ret bfloat 0xR4228
@@ -155,7 +219,7 @@ define <4 x half> @maxnum_half_vec() {
define <4 x float> @maxnum_float_zeros_vec() {
; CHECK-LABEL: @maxnum_float_zeros_vec(
-; CHECK-NEXT: ret <4 x float> <float 0.000000e+00, float -0.000000e+00, float 0.000000e+00, float -0.000000e+00>
+; CHECK-NEXT: ret <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float -0.000000e+00>
;
%1 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> <float 0.0, float -0.0, float 0.0, float -0.0>, <4 x float> <float 0.0, float 0.0, float -0.0, float -0.0>)
ret <4 x float> %1
diff --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp
index baf055e503b7e7..6e4dda8351a1b1 100644
--- a/llvm/unittests/ADT/APFloatTest.cpp
+++ b/llvm/unittests/ADT/APFloatTest.cpp
@@ -578,6 +578,11 @@ TEST(APFloatTest, MinNum) {
EXPECT_EQ(1.0, minnum(f2, f1).convertToDouble());
EXPECT_EQ(1.0, minnum(f1, nan).convertToDouble());
EXPECT_EQ(1.0, minnum(nan, f1).convertToDouble());
+
+ APFloat zp(0.0);
+ APFloat zn(-0.0);
+ EXPECT_EQ(-0.0, minnum(zp, zn).convertToDouble());
+ EXPECT_EQ(-0.0, minnum(zn, zp).convertToDouble());
}
TEST(APFloatTest, MaxNum) {
@@ -589,6 +594,11 @@ TEST(APFloatTest, MaxNum) {
EXPECT_EQ(2.0, maxnum(f2, f1).convertToDouble());
EXPECT_EQ(1.0, maxnum(f1, nan).convertToDouble());
EXPECT_EQ(1.0, maxnum(nan, f1).convertToDouble());
+
+ APFloat zp(0.0);
+ APFloat zn(-0.0);
+ EXPECT_EQ(0.0, maxnum(zp, zn).convertToDouble());
+ EXPECT_EQ(0.0, maxnum(zn, zp).convertToDouble());
}
TEST(APFloatTest, Minimum) {
>From dfb55521761139bd5942f6b158bc9098c8e63b0c Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 29 Feb 2024 15:31:44 +0530
Subject: [PATCH 2/3] Update IEEE spec reference
---
llvm/include/llvm/ADT/APFloat.h | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h
index 8630f3104b71f2..7e672a6103b23c 100644
--- a/llvm/include/llvm/ADT/APFloat.h
+++ b/llvm/include/llvm/ADT/APFloat.h
@@ -1389,8 +1389,9 @@ inline APFloat neg(APFloat X) {
return X;
}
-/// Implements IEEE minNum semantics. Returns the smaller of the 2 arguments if
-/// both are not NaN. If either argument is a NaN, returns the other argument.
+/// Implements IEEE-754 2019 minimumNumber semantics. Returns the smaller of the
+/// 2 arguments if both are not NaN. If either argument is a NaN, returns the
+/// other argument. -0 is treated as ordered
LLVM_READONLY
inline APFloat minnum(const APFloat &A, const APFloat &B) {
if (A.isNaN())
@@ -1402,8 +1403,9 @@ inline APFloat minnum(const APFloat &A, const APFloat &B) {
return B < A ? B : A;
}
-/// Implements IEEE maxNum semantics. Returns the larger of the 2 arguments if
-/// both are not NaN. If either argument is a NaN, returns the other argument.
+/// Implements IEEE-754 2019 maximumNumber semantics. Returns the larger of the
+/// 2 arguments if both are not NaN. If either argument is a NaN, returns the
+/// other argument.
LLVM_READONLY
inline APFloat maxnum(const APFloat &A, const APFloat &B) {
if (A.isNaN())
>From 68300ddd209e5f4995fede5ab593f8a4cb4b7391 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 29 Feb 2024 16:18:54 +0530
Subject: [PATCH 3/3] Complete signed zero documentation
---
llvm/include/llvm/ADT/APFloat.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h
index 7e672a6103b23c..deb74cb2fdeb1e 100644
--- a/llvm/include/llvm/ADT/APFloat.h
+++ b/llvm/include/llvm/ADT/APFloat.h
@@ -1391,7 +1391,7 @@ inline APFloat neg(APFloat X) {
/// Implements IEEE-754 2019 minimumNumber semantics. Returns the smaller of the
/// 2 arguments if both are not NaN. If either argument is a NaN, returns the
-/// other argument. -0 is treated as ordered
+/// other argument. -0 is treated as ordered less than +0.
LLVM_READONLY
inline APFloat minnum(const APFloat &A, const APFloat &B) {
if (A.isNaN())
@@ -1405,7 +1405,7 @@ inline APFloat minnum(const APFloat &A, const APFloat &B) {
/// Implements IEEE-754 2019 maximumNumber semantics. Returns the larger of the
/// 2 arguments if both are not NaN. If either argument is a NaN, returns the
-/// other argument.
+/// other argument. +0 is treated as ordered greater than -0.
LLVM_READONLY
inline APFloat maxnum(const APFloat &A, const APFloat &B) {
if (A.isNaN())
More information about the llvm-commits
mailing list