[compiler-rt] Unbreak *tf builtins for hexfloat (PR #82208)

Sun Feb 18 21:32:18 PST 2024

https://github.com/arichardson created https://github.com/llvm/llvm-project/pull/82208

This is https://github.com/llvm/llvm-project/pull/77981 with an additional fix on top that should unbreak PPC64. It took me a while to get back to this but I believe this change fixes it.

I am still getting failures for PPC32 but it's likely that this is due to my QEMU-user setup since those tests also failed without this patchset.


@perry-ca could you please test if this fixes the issue you reported?

>From 1a0e2dd8e75eab687e707642a31ffd057639ca0c Mon Sep 17 00:00:00 2001
From: Sean Perry <perry at ca.ibm.com>
Date: Fri, 12 Jan 2024 14:49:41 -0600
Subject: [PATCH 1/4] support QUAD with native long double

---
 compiler-rt/lib/builtins/divtc3.c    | 2 +-
 compiler-rt/lib/builtins/fp_lib.h    | 6 +++++-
 compiler-rt/lib/builtins/int_types.h | 8 ++++++--
 compiler-rt/lib/builtins/multc3.c    | 2 +-
 4 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/compiler-rt/lib/builtins/divtc3.c b/compiler-rt/lib/builtins/divtc3.c
index e970cef574b21d..099de5802daf0e 100644
--- a/compiler-rt/lib/builtins/divtc3.c
+++ b/compiler-rt/lib/builtins/divtc3.c
@@ -13,7 +13,7 @@
 #define QUAD_PRECISION
 #include "fp_lib.h"
 
-#if defined(CRT_HAS_TF_MODE)
+#if defined(CRT_HAS_F128)
 
 // Returns: the quotient of (a + ib) / (c + id)
 
diff --git a/compiler-rt/lib/builtins/fp_lib.h b/compiler-rt/lib/builtins/fp_lib.h
index af406e760497a4..4fca0183e54817 100644
--- a/compiler-rt/lib/builtins/fp_lib.h
+++ b/compiler-rt/lib/builtins/fp_lib.h
@@ -92,7 +92,7 @@ static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
 
 COMPILER_RT_ABI fp_t __adddf3(fp_t a, fp_t b);
 
-#elif defined QUAD_PRECISION
+#elif defined QUAD_PRECISION && defined(CRT_HAS_F128)
 #if defined(CRT_HAS_TF_MODE)
 typedef uint64_t half_rep_t;
 typedef __uint128_t rep_t;
@@ -188,6 +188,8 @@ static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
 #undef Word_HiMask
 #undef Word_LoMask
 #undef Word_FullMask
+#else
+typedef long double fp_t;
 #endif // defined(CRT_HAS_TF_MODE)
 #else
 #error SINGLE_PRECISION, DOUBLE_PRECISION or QUAD_PRECISION must be defined.
@@ -405,6 +407,8 @@ static __inline tf_float __compiler_rt_fmaxtf(tf_float x, tf_float y) {
 #define __compiler_rt_logbl crt_logbl
 #define __compiler_rt_scalbnl crt_scalbnl
 #define __compiler_rt_fmaxl crt_fmaxl
+#define crt_fabstf crt_fabsl
+#define crt_copysigntf crt_copysignl
 #else
 #error Unsupported TF mode type
 #endif
diff --git a/compiler-rt/lib/builtins/int_types.h b/compiler-rt/lib/builtins/int_types.h
index 7624c728061518..219948850a97e5 100644
--- a/compiler-rt/lib/builtins/int_types.h
+++ b/compiler-rt/lib/builtins/int_types.h
@@ -189,12 +189,16 @@ typedef long double tf_float;
 #define CRT_LDBL_IEEE_F128
 #endif
 #define TF_C(x) x##L
-#elif __LDBL_MANT_DIG__ == 113
-// Use long double instead of __float128 if it matches the IEEE 128-bit format.
+#elif __LDBL_MANT_DIG__ == 113 ||
+    (__FLT_RADIX__ == 16 && __LDBL_MANT_DIG__ == 28)
+// Use long double instead of __float128 if it matches the IEEE 128-bit format
+// or the IBM hexadecimal format.
 #define CRT_LDBL_128BIT
 #define CRT_HAS_F128
+#if __LDBL_MANT_DIG__ == 113
 #define CRT_HAS_IEEE_TF
 #define CRT_LDBL_IEEE_F128
+#endif
 typedef long double tf_float;
 #define TF_C(x) x##L
 #elif defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__)
diff --git a/compiler-rt/lib/builtins/multc3.c b/compiler-rt/lib/builtins/multc3.c
index f20e53ccbf233b..61a3f45e47279c 100644
--- a/compiler-rt/lib/builtins/multc3.c
+++ b/compiler-rt/lib/builtins/multc3.c
@@ -15,7 +15,7 @@
 #include "int_lib.h"
 #include "int_math.h"
 
-#if defined(CRT_HAS_TF_MODE)
+#if defined(CRT_HAS_F128)
 
 // Returns: the product of a + ib and c + id
 

>From f84d834ecfa3ec7be9df4697a866508b182b2ee1 Mon Sep 17 00:00:00 2001
From: Sean Perry <perry at ca.ibm.com>
Date: Fri, 12 Jan 2024 15:05:41 -0600
Subject: [PATCH 2/4] missing line continue

---
 compiler-rt/lib/builtins/int_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-rt/lib/builtins/int_types.h b/compiler-rt/lib/builtins/int_types.h
index 219948850a97e5..226db1041596d3 100644
--- a/compiler-rt/lib/builtins/int_types.h
+++ b/compiler-rt/lib/builtins/int_types.h
@@ -189,7 +189,7 @@ typedef long double tf_float;
 #define CRT_LDBL_IEEE_F128
 #endif
 #define TF_C(x) x##L
-#elif __LDBL_MANT_DIG__ == 113 ||
+#elif __LDBL_MANT_DIG__ == 113 || \
     (__FLT_RADIX__ == 16 && __LDBL_MANT_DIG__ == 28)
 // Use long double instead of __float128 if it matches the IEEE 128-bit format
 // or the IBM hexadecimal format.

>From 9fde72825f764661aa643fcbd86e1d3e9e2781cb Mon Sep 17 00:00:00 2001
From: Sean Perry <perry at ca.ibm.com>
Date: Fri, 12 Jan 2024 15:09:57 -0600
Subject: [PATCH 3/4] add space

---
 compiler-rt/lib/builtins/int_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-rt/lib/builtins/int_types.h b/compiler-rt/lib/builtins/int_types.h
index 226db1041596d3..ca97391fc28466 100644
--- a/compiler-rt/lib/builtins/int_types.h
+++ b/compiler-rt/lib/builtins/int_types.h
@@ -189,7 +189,7 @@ typedef long double tf_float;
 #define CRT_LDBL_IEEE_F128
 #endif
 #define TF_C(x) x##L
-#elif __LDBL_MANT_DIG__ == 113 || \
+#elif __LDBL_MANT_DIG__ == 113 ||                                              \
     (__FLT_RADIX__ == 16 && __LDBL_MANT_DIG__ == 28)
 // Use long double instead of __float128 if it matches the IEEE 128-bit format
 // or the IBM hexadecimal format.

>From 87d15c683aeecc7c3331434abe9db95de03d16a0 Mon Sep 17 00:00:00 2001
From: Alex Richardson <alexrichardson at google.com>
Date: Sun, 18 Feb 2024 21:28:14 -0800
Subject: [PATCH 4/4] Attempt to fix the PPC64 build

---
 compiler-rt/lib/builtins/fp_lib.h | 39 ++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/compiler-rt/lib/builtins/fp_lib.h b/compiler-rt/lib/builtins/fp_lib.h
index 4fca0183e54817..c4f0a5b9587f77 100644
--- a/compiler-rt/lib/builtins/fp_lib.h
+++ b/compiler-rt/lib/builtins/fp_lib.h
@@ -22,6 +22,7 @@
 
 #include "int_lib.h"
 #include "int_math.h"
+#include "int_types.h"
 #include <limits.h>
 #include <stdbool.h>
 #include <stdint.h>
@@ -92,14 +93,15 @@ static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
 
 COMPILER_RT_ABI fp_t __adddf3(fp_t a, fp_t b);
 
-#elif defined QUAD_PRECISION && defined(CRT_HAS_F128)
-#if defined(CRT_HAS_TF_MODE)
+#elif defined QUAD_PRECISION
+#if defined(CRT_HAS_F128) && defined(CRT_HAS_128BIT)
 typedef uint64_t half_rep_t;
 typedef __uint128_t rep_t;
 typedef __int128_t srep_t;
 typedef tf_float fp_t;
 #define HALF_REP_C UINT64_C
 #define REP_C (__uint128_t)
+#if defined(CRT_HAS_IEEE_TF)
 // Note: Since there is no explicit way to tell compiler the constant is a
 // 128-bit integer, we let the constant be casted to 128-bit integer
 #define significandBits 112
@@ -188,9 +190,10 @@ static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
 #undef Word_HiMask
 #undef Word_LoMask
 #undef Word_FullMask
+#endif // defined(CRT_HAS_IEEE_TF)
 #else
 typedef long double fp_t;
-#endif // defined(CRT_HAS_TF_MODE)
+#endif // defined(CRT_HAS_F128) && defined(CRT_HAS_128BIT)
 #else
 #error SINGLE_PRECISION, DOUBLE_PRECISION or QUAD_PRECISION must be defined.
 #endif
@@ -198,19 +201,6 @@ typedef long double fp_t;
 #if defined(SINGLE_PRECISION) || defined(DOUBLE_PRECISION) ||                  \
     (defined(QUAD_PRECISION) && defined(CRT_HAS_TF_MODE))
 #define typeWidth (sizeof(rep_t) * CHAR_BIT)
-#define exponentBits (typeWidth - significandBits - 1)
-#define maxExponent ((1 << exponentBits) - 1)
-#define exponentBias (maxExponent >> 1)
-
-#define implicitBit (REP_C(1) << significandBits)
-#define significandMask (implicitBit - 1U)
-#define signBit (REP_C(1) << (significandBits + exponentBits))
-#define absMask (signBit - 1U)
-#define exponentMask (absMask ^ significandMask)
-#define oneRep ((rep_t)exponentBias << significandBits)
-#define infRep exponentMask
-#define quietBit (implicitBit >> 1)
-#define qnanRep (exponentMask | quietBit)
 
 static __inline rep_t toRep(fp_t x) {
   const union {
@@ -228,6 +218,21 @@ static __inline fp_t fromRep(rep_t x) {
   return rep.f;
 }
 
+#if !defined(QUAD_PRECISION) || defined(CRT_HAS_IEEE_TF)
+#define exponentBits (typeWidth - significandBits - 1)
+#define maxExponent ((1 << exponentBits) - 1)
+#define exponentBias (maxExponent >> 1)
+
+#define implicitBit (REP_C(1) << significandBits)
+#define significandMask (implicitBit - 1U)
+#define signBit (REP_C(1) << (significandBits + exponentBits))
+#define absMask (signBit - 1U)
+#define exponentMask (absMask ^ significandMask)
+#define oneRep ((rep_t)exponentBias << significandBits)
+#define infRep exponentMask
+#define quietBit (implicitBit >> 1)
+#define qnanRep (exponentMask | quietBit)
+
 static __inline int normalize(rep_t *significand) {
   const int shift = rep_clz(*significand) - rep_clz(implicitBit);
   *significand <<= shift;
@@ -330,6 +335,8 @@ static __inline fp_t __compiler_rt_scalbnX(fp_t x, int y) {
     return fromRep(sign | ((rep_t)exp << significandBits) | sig);
 }
 
+#endif // !defined(QUAD_PRECISION) || defined(CRT_HAS_IEEE_TF)
+
 // Avoid using fmax from libm.
 static __inline fp_t __compiler_rt_fmaxX(fp_t x, fp_t y) {
   // If either argument is NaN, return the other argument. If both are NaN,