[llvm] APFloat: Fix signed zero handling in minnum/maxnum (PR #83376)

Thu Feb 29 02:51:33 PST 2024

https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/83376

>From 29f6732b4f99b4bf891a56d5484a4b173febf419 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 29 Feb 2024 10:02:51 +0530
Subject: [PATCH 1/3] APFloat: Fix signed zero handling in minnum/maxnum

Follow the 2019 rules and order -0 as less than +0 and +0 as greater than
-0. As currently defined this isn't required for the intrinsics, but is
a better QoI.

This will avoid the workaround in libc added by #83158
---
 llvm/include/llvm/ADT/APFloat.h               |  4 ++
 llvm/test/CodeGen/AMDGPU/fmaxnum.ll           |  2 +-
 llvm/test/CodeGen/AMDGPU/fminnum.ll           |  2 +-
 llvm/test/Transforms/InstCombine/maxnum.ll    |  2 +-
 llvm/test/Transforms/InstCombine/minnum.ll    |  4 +-
 .../InstSimplify/ConstProp/min-max.ll         | 68 ++++++++++++++++++-
 llvm/unittests/ADT/APFloatTest.cpp            | 10 +++
 7 files changed, 85 insertions(+), 7 deletions(-)

diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h
index 8c247bbcec90a2..598d00f592f8c0 100644
--- a/llvm/include/llvm/ADT/APFloat.h
+++ b/llvm/include/llvm/ADT/APFloat.h
@@ -1397,6 +1397,8 @@ inline APFloat minnum(const APFloat &A, const APFloat &B) {
     return B;
   if (B.isNaN())
     return A;
+  if (A.isZero() && B.isZero() && (A.isNegative() != B.isNegative()))
+    return A.isNegative() ? A : B;
   return B < A ? B : A;
 }
 
@@ -1408,6 +1410,8 @@ inline APFloat maxnum(const APFloat &A, const APFloat &B) {
     return B;
   if (B.isNaN())
     return A;
+  if (A.isZero() && B.isZero() && (A.isNegative() != B.isNegative()))
+    return A.isNegative() ? B : A;
   return A < B ? B : A;
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/fmaxnum.ll b/llvm/test/CodeGen/AMDGPU/fmaxnum.ll
index 09898f1442fb82..38640a18b5aee6 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaxnum.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaxnum.ll
@@ -152,7 +152,7 @@ define amdgpu_kernel void @constant_fold_fmax_f32_p0_n0(ptr addrspace(1) %out) #
 
 ; GCN-LABEL: {{^}}constant_fold_fmax_f32_n0_p0:
 ; GCN-NOT: v_max_f32_e32
-; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
 ; GCN: buffer_store_dword [[REG]]
 define amdgpu_kernel void @constant_fold_fmax_f32_n0_p0(ptr addrspace(1) %out) #0 {
   %val = call float @llvm.maxnum.f32(float -0.0, float 0.0)
diff --git a/llvm/test/CodeGen/AMDGPU/fminnum.ll b/llvm/test/CodeGen/AMDGPU/fminnum.ll
index 844d26a6225b40..65b311845a6b77 100644
--- a/llvm/test/CodeGen/AMDGPU/fminnum.ll
+++ b/llvm/test/CodeGen/AMDGPU/fminnum.ll
@@ -150,7 +150,7 @@ define amdgpu_kernel void @constant_fold_fmin_f32_p0_p0(ptr addrspace(1) %out) #
 
 ; GCN-LABEL: {{^}}constant_fold_fmin_f32_p0_n0:
 ; GCN-NOT: v_min_f32_e32
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0
+; GCN: v_bfrev_b32_e32 [[REG:v[0-9]+]], 1{{$}}
 ; GCN: buffer_store_dword [[REG]]
 define amdgpu_kernel void @constant_fold_fmin_f32_p0_n0(ptr addrspace(1) %out) #0 {
   %val = call float @llvm.minnum.f32(float 0.0, float -0.0)
diff --git a/llvm/test/Transforms/InstCombine/maxnum.ll b/llvm/test/Transforms/InstCombine/maxnum.ll
index 87288b18cbcd9f..e140a5b405ea84 100644
--- a/llvm/test/Transforms/InstCombine/maxnum.ll
+++ b/llvm/test/Transforms/InstCombine/maxnum.ll
@@ -66,7 +66,7 @@ define float @constant_fold_maxnum_f32_p0_n0() {
 
 define float @constant_fold_maxnum_f32_n0_p0() {
 ; CHECK-LABEL: @constant_fold_maxnum_f32_n0_p0(
-; CHECK-NEXT:    ret float -0.000000e+00
+; CHECK-NEXT:    ret float 0.000000e+00
 ;
   %x = call float @llvm.maxnum.f32(float -0.0, float 0.0)
   ret float %x
diff --git a/llvm/test/Transforms/InstCombine/minnum.ll b/llvm/test/Transforms/InstCombine/minnum.ll
index 8050f075595272..cc6171b9d8e6cb 100644
--- a/llvm/test/Transforms/InstCombine/minnum.ll
+++ b/llvm/test/Transforms/InstCombine/minnum.ll
@@ -60,7 +60,7 @@ define float @constant_fold_minnum_f32_p0_p0() {
 
 define float @constant_fold_minnum_f32_p0_n0() {
 ; CHECK-LABEL: @constant_fold_minnum_f32_p0_n0(
-; CHECK-NEXT:    ret float 0.000000e+00
+; CHECK-NEXT:    ret float -0.000000e+00
 ;
   %x = call float @llvm.minnum.f32(float 0.0, float -0.0)
   ret float %x
@@ -199,7 +199,7 @@ define float @minnum_f32_1_minnum_p0_val_fmf3(float %x) {
 
 define float @minnum_f32_p0_minnum_val_n0(float %x) {
 ; CHECK-LABEL: @minnum_f32_p0_minnum_val_n0(
-; CHECK-NEXT:    [[Z:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float 0.000000e+00)
+; CHECK-NEXT:    [[Z:%.*]] = call float @llvm.minnum.f32(float [[X:%.*]], float -0.000000e+00)
 ; CHECK-NEXT:    ret float [[Z]]
 ;
   %y = call float @llvm.minnum.f32(float %x, float -0.0)
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/min-max.ll b/llvm/test/Transforms/InstSimplify/ConstProp/min-max.ll
index a5f5d4e12ed842..9120649eb5c4f1 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/min-max.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/min-max.ll
@@ -49,6 +49,38 @@ define float @minnum_float() {
   ret float %1
 }
 
+define float @minnum_float_p0_n0() {
+; CHECK-LABEL: @minnum_float_p0_n0(
+; CHECK-NEXT:    ret float -0.000000e+00
+;
+  %min = call float @llvm.minnum.f32(float 0.0, float -0.0)
+  ret float %min
+}
+
+define float @minnum_float_n0_p0() {
+; CHECK-LABEL: @minnum_float_n0_p0(
+; CHECK-NEXT:    ret float -0.000000e+00
+;
+  %min = call float @llvm.minnum.f32(float -0.0, float 0.0)
+  ret float %min
+}
+
+define float @minnum_float_p0_qnan() {
+; CHECK-LABEL: @minnum_float_p0_qnan(
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %min = call float @llvm.minnum.f32(float 0.0, float 0x7FF8000000000000)
+  ret float %min
+}
+
+define float @minnum_float_qnan_p0() {
+; CHECK-LABEL: @minnum_float_qnan_p0(
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %min = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 0.0)
+  ret float %min
+}
+
 define bfloat @minnum_bfloat() {
 ; CHECK-LABEL: @minnum_bfloat(
 ; CHECK-NEXT:    ret bfloat 0xR40A0
@@ -95,7 +127,7 @@ define <4 x half> @minnum_half_vec() {
 
 define <4 x float> @minnum_float_zeros_vec() {
 ; CHECK-LABEL: @minnum_float_zeros_vec(
-; CHECK-NEXT:    ret <4 x float> <float 0.000000e+00, float -0.000000e+00, float 0.000000e+00, float -0.000000e+00>
+; CHECK-NEXT:    ret <4 x float> <float 0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>
 ;
   %1 = call <4 x float> @llvm.minnum.v4f32(<4 x float> <float 0.0, float -0.0, float 0.0, float -0.0>, <4 x float> <float 0.0, float 0.0, float -0.0, float -0.0>)
   ret <4 x float> %1
@@ -109,6 +141,38 @@ define float @maxnum_float() {
   ret float %1
 }
 
+define float @maxnum_float_p0_n0() {
+; CHECK-LABEL: @maxnum_float_p0_n0(
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %max = call float @llvm.maxnum.f32(float 0.0, float -0.0)
+  ret float %max
+}
+
+define float @maxnum_float_n0_p0() {
+; CHECK-LABEL: @maxnum_float_n0_p0(
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %max = call float @llvm.maxnum.f32(float -0.0, float 0.0)
+  ret float %max
+}
+
+define float @maxnum_float_p0_qnan() {
+; CHECK-LABEL: @maxnum_float_p0_qnan(
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %max = call float @llvm.maxnum.f32(float 0.0, float 0x7FF8000000000000)
+  ret float %max
+}
+
+define float @maxnum_float_qnan_p0() {
+; CHECK-LABEL: @maxnum_float_qnan_p0(
+; CHECK-NEXT:    ret float 0.000000e+00
+;
+  %max = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 0.0)
+  ret float %max
+}
+
 define bfloat @maxnum_bfloat() {
 ; CHECK-LABEL: @maxnum_bfloat(
 ; CHECK-NEXT:    ret bfloat 0xR4228
@@ -155,7 +219,7 @@ define <4 x half> @maxnum_half_vec() {
 
 define <4 x float> @maxnum_float_zeros_vec() {
 ; CHECK-LABEL: @maxnum_float_zeros_vec(
-; CHECK-NEXT:    ret <4 x float> <float 0.000000e+00, float -0.000000e+00, float 0.000000e+00, float -0.000000e+00>
+; CHECK-NEXT:    ret <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float -0.000000e+00>
 ;
   %1 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> <float 0.0, float -0.0, float 0.0, float -0.0>, <4 x float> <float 0.0, float 0.0, float -0.0, float -0.0>)
   ret <4 x float> %1
diff --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp
index baf055e503b7e7..6e4dda8351a1b1 100644
--- a/llvm/unittests/ADT/APFloatTest.cpp
+++ b/llvm/unittests/ADT/APFloatTest.cpp
@@ -578,6 +578,11 @@ TEST(APFloatTest, MinNum) {
   EXPECT_EQ(1.0, minnum(f2, f1).convertToDouble());
   EXPECT_EQ(1.0, minnum(f1, nan).convertToDouble());
   EXPECT_EQ(1.0, minnum(nan, f1).convertToDouble());
+
+  APFloat zp(0.0);
+  APFloat zn(-0.0);
+  EXPECT_EQ(-0.0, minnum(zp, zn).convertToDouble());
+  EXPECT_EQ(-0.0, minnum(zn, zp).convertToDouble());
 }
 
 TEST(APFloatTest, MaxNum) {
@@ -589,6 +594,11 @@ TEST(APFloatTest, MaxNum) {
   EXPECT_EQ(2.0, maxnum(f2, f1).convertToDouble());
   EXPECT_EQ(1.0, maxnum(f1, nan).convertToDouble());
   EXPECT_EQ(1.0, maxnum(nan, f1).convertToDouble());
+
+  APFloat zp(0.0);
+  APFloat zn(-0.0);
+  EXPECT_EQ(0.0, maxnum(zp, zn).convertToDouble());
+  EXPECT_EQ(0.0, maxnum(zn, zp).convertToDouble());
 }
 
 TEST(APFloatTest, Minimum) {

>From dfb55521761139bd5942f6b158bc9098c8e63b0c Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 29 Feb 2024 15:31:44 +0530
Subject: [PATCH 2/3] Update IEEE spec reference

---
 llvm/include/llvm/ADT/APFloat.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h
index 8630f3104b71f2..7e672a6103b23c 100644
--- a/llvm/include/llvm/ADT/APFloat.h
+++ b/llvm/include/llvm/ADT/APFloat.h
@@ -1389,8 +1389,9 @@ inline APFloat neg(APFloat X) {
   return X;
 }
 
-/// Implements IEEE minNum semantics. Returns the smaller of the 2 arguments if
-/// both are not NaN. If either argument is a NaN, returns the other argument.
+/// Implements IEEE-754 2019 minimumNumber semantics. Returns the smaller of the
+/// 2 arguments if both are not NaN. If either argument is a NaN, returns the
+/// other argument. -0 is treated as ordered
 LLVM_READONLY
 inline APFloat minnum(const APFloat &A, const APFloat &B) {
   if (A.isNaN())
@@ -1402,8 +1403,9 @@ inline APFloat minnum(const APFloat &A, const APFloat &B) {
   return B < A ? B : A;
 }
 
-/// Implements IEEE maxNum semantics. Returns the larger of the 2 arguments if
-/// both are not NaN. If either argument is a NaN, returns the other argument.
+/// Implements IEEE-754 2019 maximumNumber semantics. Returns the larger of the
+/// 2 arguments if both are not NaN. If either argument is a NaN, returns the
+/// other argument.
 LLVM_READONLY
 inline APFloat maxnum(const APFloat &A, const APFloat &B) {
   if (A.isNaN())

>From 68300ddd209e5f4995fede5ab593f8a4cb4b7391 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 29 Feb 2024 16:18:54 +0530
Subject: [PATCH 3/3] Complete signed zero documentation

---
 llvm/include/llvm/ADT/APFloat.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h
index 7e672a6103b23c..deb74cb2fdeb1e 100644
--- a/llvm/include/llvm/ADT/APFloat.h
+++ b/llvm/include/llvm/ADT/APFloat.h
@@ -1391,7 +1391,7 @@ inline APFloat neg(APFloat X) {
 
 /// Implements IEEE-754 2019 minimumNumber semantics. Returns the smaller of the
 /// 2 arguments if both are not NaN. If either argument is a NaN, returns the
-/// other argument. -0 is treated as ordered
+/// other argument. -0 is treated as ordered less than +0.
 LLVM_READONLY
 inline APFloat minnum(const APFloat &A, const APFloat &B) {
   if (A.isNaN())
@@ -1405,7 +1405,7 @@ inline APFloat minnum(const APFloat &A, const APFloat &B) {
 
 /// Implements IEEE-754 2019 maximumNumber semantics. Returns the larger of the
 /// 2 arguments if both are not NaN. If either argument is a NaN, returns the
-/// other argument.
+/// other argument. +0 is treated as ordered greater than -0.
 LLVM_READONLY
 inline APFloat maxnum(const APFloat &A, const APFloat &B) {
   if (A.isNaN())