[compiler-rt] [compiler-rt] Implement extendxftf2 and trunctfxf2 for x86_64 (PR #66918)

Thu Sep 28 14:52:50 PDT 2023

https://github.com/alexander-shaposhnikov updated https://github.com/llvm/llvm-project/pull/66918

>From 9574611c390daa20a752ed33da6acd5bc58d774a Mon Sep 17 00:00:00 2001
From: Alexander Shaposhnikov <ashaposhnikov at google.com>
Date: Wed, 27 Sep 2023 01:05:55 +0000
Subject: [PATCH] Implement __extendxftf2 and __trunctfxf2 for x86_64

---
 compiler-rt/lib/builtins/CMakeLists.txt       |  2 +
 compiler-rt/lib/builtins/extendxftf2.c        | 23 +++++
 compiler-rt/lib/builtins/fp_extend.h          | 25 +++++
 compiler-rt/lib/builtins/fp_extend_impl.inc   | 59 +++++++++---
 compiler-rt/lib/builtins/fp_trunc.h           | 26 ++++++
 compiler-rt/lib/builtins/fp_trunc_impl.inc    | 80 +++++++++++++---
 compiler-rt/lib/builtins/trunctfxf2.c         | 24 +++++
 compiler-rt/test/builtins/Unit/addtf3_test.c  |  2 +-
 compiler-rt/test/builtins/Unit/divtf3_test.c  |  2 +-
 .../test/builtins/Unit/extenddftf2_test.c     |  2 +-
 .../test/builtins/Unit/extendhftf2_test.c     |  2 +-
 .../test/builtins/Unit/extendsftf2_test.c     |  2 +-
 .../test/builtins/Unit/extendxftf2_test.c     | 74 +++++++++++++++
 .../test/builtins/Unit/floatditf_test.c       |  2 +-
 .../test/builtins/Unit/floatsitf_test.c       |  2 +-
 .../test/builtins/Unit/floatunditf_test.c     |  2 +-
 .../test/builtins/Unit/floatunsitf_test.c     |  2 +-
 compiler-rt/test/builtins/Unit/fp_test.h      | 92 +++++++++++++------
 compiler-rt/test/builtins/Unit/multf3_test.c  |  2 +-
 compiler-rt/test/builtins/Unit/subtf3_test.c  |  2 +-
 .../test/builtins/Unit/trunctfxf2_test.c      | 72 +++++++++++++++
 21 files changed, 433 insertions(+), 66 deletions(-)
 create mode 100644 compiler-rt/lib/builtins/extendxftf2.c
 create mode 100644 compiler-rt/lib/builtins/trunctfxf2.c
 create mode 100644 compiler-rt/test/builtins/Unit/extendxftf2_test.c
 create mode 100644 compiler-rt/test/builtins/Unit/trunctfxf2_test.c

diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index 1afceddc62d846f..298b87dda5a9744 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -280,6 +280,7 @@ endif ()
 # long double is not 80 bits on Android or MSVC.
 set(x86_80_BIT_SOURCES
   divxc3.c
+  extendxftf2.c
   fixxfdi.c
   fixxfti.c
   fixunsxfdi.c
@@ -291,6 +292,7 @@ set(x86_80_BIT_SOURCES
   floatuntixf.c
   mulxc3.c
   powixf2.c
+  trunctfxf2.c
 )
 
 if (NOT MSVC)
diff --git a/compiler-rt/lib/builtins/extendxftf2.c b/compiler-rt/lib/builtins/extendxftf2.c
new file mode 100644
index 000000000000000..20911fe7cf2a000
--- /dev/null
+++ b/compiler-rt/lib/builtins/extendxftf2.c
@@ -0,0 +1,23 @@
+//===-- lib/extendxftf2.c - long double -> quad conversion --------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Assumption: long double is a IEEE 80 bit floating point type padded to 128
+// bits.
+
+// TODO: use fp_lib.h once QUAD_PRECISION is available on x86_64.
+#if __LDBL_MANT_DIG__ == 64 && defined(__x86_64__) &&                          \
+    (defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__))
+#define SRC_80
+#define DST_QUAD
+#include "fp_extend_impl.inc"
+
+COMPILER_RT_ABI __float128 __extendxftf2(long double a) {
+  return __extendXfYf2__(a);
+}
+
+#endif
diff --git a/compiler-rt/lib/builtins/fp_extend.h b/compiler-rt/lib/builtins/fp_extend.h
index eee4722bf90e69f..ad73a49c5e93327 100644
--- a/compiler-rt/lib/builtins/fp_extend.h
+++ b/compiler-rt/lib/builtins/fp_extend.h
@@ -20,14 +20,18 @@
 typedef float src_t;
 typedef uint32_t src_rep_t;
 #define SRC_REP_C UINT32_C
+static const int srcBits = sizeof(src_t) * CHAR_BIT;
 static const int srcSigBits = 23;
+static const int srcSigFracBits = 23;
 #define src_rep_t_clz clzsi
 
 #elif defined SRC_DOUBLE
 typedef double src_t;
 typedef uint64_t src_rep_t;
 #define SRC_REP_C UINT64_C
+static const int srcBits = sizeof(src_t) * CHAR_BIT;
 static const int srcSigBits = 52;
+static const int srcSigFracBits = 52;
 static __inline int src_rep_t_clz(src_rep_t a) {
 #if defined __LP64__
   return __builtin_clzl(a);
@@ -39,6 +43,16 @@ static __inline int src_rep_t_clz(src_rep_t a) {
 #endif
 }
 
+#elif defined SRC_80
+typedef long double src_t;
+typedef __uint128_t src_rep_t;
+#define SRC_REP_C (__uint128_t)
+// sign bit, exponent and significand occupy the lower 80 bits.
+static const int srcBits = 80;
+// significand stores the integer bit.
+static const int srcSigBits = 64;
+static const int srcSigFracBits = 63;
+
 #elif defined SRC_HALF
 #ifdef COMPILER_RT_HAS_FLOAT16
 typedef _Float16 src_t;
@@ -47,7 +61,9 @@ typedef uint16_t src_t;
 #endif
 typedef uint16_t src_rep_t;
 #define SRC_REP_C UINT16_C
+static const int srcBits = sizeof(src_t) * CHAR_BIT;
 static const int srcSigBits = 10;
+static const int srcSigFracBits = 10;
 #define src_rep_t_clz __builtin_clz
 
 #else
@@ -59,18 +75,27 @@ typedef float dst_t;
 typedef uint32_t dst_rep_t;
 #define DST_REP_C UINT32_C
 static const int dstSigBits = 23;
+static const int dstSigFracBits = 23;
 
 #elif defined DST_DOUBLE
 typedef double dst_t;
 typedef uint64_t dst_rep_t;
 #define DST_REP_C UINT64_C
 static const int dstSigBits = 52;
+static const int dstSigFracBits = 52;
 
 #elif defined DST_QUAD
+// TODO: use fp_lib.h once QUAD_PRECISION is available on x86_64.
+#if __LDBL_MANT_DIG__ == 113
 typedef long double dst_t;
+#elif defined(__x86_64__) &&                                                   \
+    (defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__))
+typedef __float128 dst_t;
+#endif
 typedef __uint128_t dst_rep_t;
 #define DST_REP_C (__uint128_t)
 static const int dstSigBits = 112;
+static const int dstSigFracBits = 112;
 
 #else
 #error Destination should be single, double, or quad precision!
diff --git a/compiler-rt/lib/builtins/fp_extend_impl.inc b/compiler-rt/lib/builtins/fp_extend_impl.inc
index d1c9c02a00c5314..4c58ec7aa32ba2e 100644
--- a/compiler-rt/lib/builtins/fp_extend_impl.inc
+++ b/compiler-rt/lib/builtins/fp_extend_impl.inc
@@ -37,19 +37,27 @@
 
 #include "fp_extend.h"
 
+// The source type may use a usual IEEE-754 interchange format or Intel 80-bit
+// format. In particular, for the source type srcSigFracBits may be not equal to
+// srcSigBits. The destination type is assumed to be one of IEEE-754 standard
+// types, i.e. dstSigFracBits is assumed to be equal to dstSigBits.
 static __inline dst_t __extendXfYf2__(src_t a) {
   // Various constants whose values follow from the type parameters.
   // Any reasonable optimizer will fold and propagate all of these.
-  const int srcBits = sizeof(src_t) * CHAR_BIT;
+
   const int srcExpBits = srcBits - srcSigBits - 1;
   const int srcInfExp = (1 << srcExpBits) - 1;
   const int srcExpBias = srcInfExp >> 1;
 
+  const src_rep_t srcSigFracMask = (SRC_REP_C(1) << srcSigFracBits) - 1;
+  const src_rep_t srcExpMask = ((SRC_REP_C(1) << srcExpBits) - 1) << srcSigBits;
   const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits;
   const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits;
+
   const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits);
   const src_rep_t srcAbsMask = srcSignMask - 1;
-  const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1);
+
+  const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigFracBits - 1);
   const src_rep_t srcNaNCode = srcQNaN - 1;
 
   const int dstBits = sizeof(dst_t) * CHAR_BIT;
@@ -69,9 +77,14 @@ static __inline dst_t __extendXfYf2__(src_t a) {
   // to (signed) int.  To avoid that, explicitly cast to src_rep_t.
   if ((src_rep_t)(aAbs - srcMinNormal) < srcInfinity - srcMinNormal) {
     // a is a normal number.
-    // Extend to the destination type by shifting the significand and
-    // exponent into the proper position and rebiasing the exponent.
-    absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits);
+    // Extend to the destination type by shifting the significand into the
+    // proper position.
+    absResult = ((dst_rep_t)aAbs & srcSigFracMask)
+                << (dstSigFracBits - srcSigFracBits);
+    // Extend to the destination type by shifting the exponent into the proper
+    // position.
+    absResult |= ((dst_rep_t)aAbs & srcExpMask) << (dstSigBits - srcSigBits);
+    // Rebias the expoenent.
     absResult += (dst_rep_t)(dstExpBias - srcExpBias) << dstSigBits;
   }
 
@@ -81,19 +94,37 @@ static __inline dst_t __extendXfYf2__(src_t a) {
     // bit (if needed) and right-aligning the rest of the trailing NaN
     // payload field.
     absResult = (dst_rep_t)dstInfExp << dstSigBits;
-    absResult |= (dst_rep_t)(aAbs & srcQNaN) << (dstSigBits - srcSigBits);
-    absResult |= (dst_rep_t)(aAbs & srcNaNCode) << (dstSigBits - srcSigBits);
+    absResult |= (dst_rep_t)(aAbs & srcQNaN)
+                 << (dstSigFracBits - srcSigFracBits);
+    absResult |= (dst_rep_t)(aAbs & srcNaNCode)
+                 << (dstSigFracBits - srcSigFracBits);
   }
 
   else if (aAbs) {
     // a is denormal.
-    // renormalize the significand and clear the leading bit, then insert
-    // the correct adjusted exponent in the destination type.
-    const int scale = src_rep_t_clz(aAbs) - src_rep_t_clz(srcMinNormal);
-    absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits + scale);
-    absResult ^= dstMinNormal;
-    const int resultExponent = dstExpBias - srcExpBias - scale + 1;
-    absResult |= (dst_rep_t)resultExponent << dstSigBits;
+    if (srcExpBits == dstExpBits) {
+      // When we extend a denormal F80 to F128 this branch is always taken.
+      // In particular, we rely on the fact that both formats use 15-bit
+      // exponent. Therefore, the other branch is meaningless and marked with
+      // __builtin_unreachable.
+      // Insert the significand into the correct location in the destination
+      // type.
+      absResult = ((dst_rep_t)aAbs & srcSigFracMask)
+                  << (dstSigFracBits - srcSigFracBits);
+    } else {
+#ifndef src_rep_t_clz
+      // If src_rep_t_clz is not defined this branch must be unreachable.
+      __builtin_unreachable();
+#define src_rep_t_clz (int)
+#endif
+      // Renormalize the significand and clear the leading bit, then insert
+      // the correct adjusted exponent in the destination type.
+      const int scale = src_rep_t_clz(aAbs) - src_rep_t_clz(srcMinNormal);
+      absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits + scale);
+      absResult ^= dstMinNormal;
+      const int resultExponent = dstExpBias - srcExpBias - scale + 1;
+      absResult |= (dst_rep_t)resultExponent << dstSigBits;
+    }
   }
 
   else {
diff --git a/compiler-rt/lib/builtins/fp_trunc.h b/compiler-rt/lib/builtins/fp_trunc.h
index 91f614528ab3f42..b05cfd329778dd7 100644
--- a/compiler-rt/lib/builtins/fp_trunc.h
+++ b/compiler-rt/lib/builtins/fp_trunc.h
@@ -20,18 +20,27 @@ typedef float src_t;
 typedef uint32_t src_rep_t;
 #define SRC_REP_C UINT32_C
 static const int srcSigBits = 23;
+static const int srcSigFracBits = 23;
 
 #elif defined SRC_DOUBLE
 typedef double src_t;
 typedef uint64_t src_rep_t;
 #define SRC_REP_C UINT64_C
 static const int srcSigBits = 52;
+static const int srcSigFracBits = 52;
 
 #elif defined SRC_QUAD
+// TODO: use fp_lib.h once QUAD_PRECISION is available on x86_64.
+#if __LDBL_MANT_DIG__ == 113
 typedef long double src_t;
+#elif defined(__x86_64__) &&                                                   \
+    (defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__))
+typedef __float128 src_t;
+#endif
 typedef __uint128_t src_rep_t;
 #define SRC_REP_C (__uint128_t)
 static const int srcSigBits = 112;
+static const int srcSigFracBits = 112;
 
 #else
 #error Source should be double precision or quad precision!
@@ -41,13 +50,26 @@ static const int srcSigBits = 112;
 typedef double dst_t;
 typedef uint64_t dst_rep_t;
 #define DST_REP_C UINT64_C
+static const int dstBits = 64;
 static const int dstSigBits = 52;
+static const int dstSigFracBits = 52;
+
+#elif defined DST_80
+typedef long double dst_t;
+typedef __uint128_t dst_rep_t;
+#define DST_REP_C (__uint128_t)
+static const int dstBits = 80;
+// significand stores the integer bit.
+static const int dstSigBits = 64;
+static const int dstSigFracBits = 63;
 
 #elif defined DST_SINGLE
 typedef float dst_t;
 typedef uint32_t dst_rep_t;
 #define DST_REP_C UINT32_C
+static const int dstBits = 32;
 static const int dstSigBits = 23;
+static const int dstSigFracBits = 23;
 
 #elif defined DST_HALF
 #ifdef COMPILER_RT_HAS_FLOAT16
@@ -57,13 +79,17 @@ typedef uint16_t dst_t;
 #endif
 typedef uint16_t dst_rep_t;
 #define DST_REP_C UINT16_C
+static const int dstBits = 16;
 static const int dstSigBits = 10;
+static const int dstSigFracBits = 10;
 
 #elif defined DST_BFLOAT
 typedef __bf16 dst_t;
 typedef uint16_t dst_rep_t;
 #define DST_REP_C UINT16_C
+static const int dstBits = 16;
 static const int dstSigBits = 7;
+static const int dstSigFracBits = 7;
 
 #else
 #error Destination should be single precision or double precision!
diff --git a/compiler-rt/lib/builtins/fp_trunc_impl.inc b/compiler-rt/lib/builtins/fp_trunc_impl.inc
index e235f45965a7276..1f79a069c0d2af8 100644
--- a/compiler-rt/lib/builtins/fp_trunc_impl.inc
+++ b/compiler-rt/lib/builtins/fp_trunc_impl.inc
@@ -38,6 +38,10 @@
 
 #include "fp_trunc.h"
 
+// The destination type may use a usual IEEE-754 interchange format or Intel
+// 80-bit format. In particular, for the destination type dstSigFracBits may be
+// not equal to dstSigBits. The source type is assumed to be one of IEEE-754
+// standard types, i.e. srcSigFracBits is assumed to be equal to srcSigBits.
 static __inline dst_t __truncXfYf2__(src_t a) {
   // Various constants whose values follow from the type parameters.
   // Any reasonable optimizer will fold and propagate all of these.
@@ -48,15 +52,18 @@ static __inline dst_t __truncXfYf2__(src_t a) {
 
   const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits;
   const src_rep_t srcSignificandMask = srcMinNormal - 1;
+  const src_rep_t srcSigFracMask = (SRC_REP_C(1) << srcSigFracBits) - 1;
+  const src_rep_t srcExpMask = ((SRC_REP_C(1) << srcExpBits) - 1) << srcSigBits;
   const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits;
   const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits);
   const src_rep_t srcAbsMask = srcSignMask - 1;
-  const src_rep_t roundMask = (SRC_REP_C(1) << (srcSigBits - dstSigBits)) - 1;
-  const src_rep_t halfway = SRC_REP_C(1) << (srcSigBits - dstSigBits - 1);
-  const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1);
+  const src_rep_t roundMask =
+      (SRC_REP_C(1) << (srcSigFracBits - dstSigFracBits)) - 1;
+  const src_rep_t halfway = SRC_REP_C(1)
+                            << (srcSigFracBits - dstSigFracBits - 1);
+  const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigFracBits - 1);
   const src_rep_t srcNaNCode = srcQNaN - 1;
 
-  const int dstBits = sizeof(dst_t) * CHAR_BIT;
   const int dstExpBits = dstBits - dstSigBits - 1;
   const int dstInfExp = (1 << dstExpBits) - 1;
   const int dstExpBias = dstInfExp >> 1;
@@ -66,7 +73,7 @@ static __inline dst_t __truncXfYf2__(src_t a) {
   const src_rep_t underflow = (src_rep_t)underflowExponent << srcSigBits;
   const src_rep_t overflow = (src_rep_t)overflowExponent << srcSigBits;
 
-  const dst_rep_t dstQNaN = DST_REP_C(1) << (dstSigBits - 1);
+  const dst_rep_t dstQNaN = DST_REP_C(1) << (dstSigFracBits - 1);
   const dst_rep_t dstNaNCode = dstQNaN - 1;
 
   // Break a into a sign and representation of the absolute value.
@@ -75,10 +82,27 @@ static __inline dst_t __truncXfYf2__(src_t a) {
   const src_rep_t sign = aRep & srcSignMask;
   dst_rep_t absResult;
 
-  const int tailBits = srcBits - dstBits;
-  if (srcExpBits == dstExpBits && ((aRep >> tailBits) << tailBits) == aRep) {
+  const int sigFracTailBits = srcSigFracBits - dstSigFracBits;
+  if (srcExpBits == dstExpBits &&
+      ((aRep >> sigFracTailBits) << sigFracTailBits) == aRep) {
+    dst_rep_t result;
     // Same size exponents and a's significand tail is 0. Remove tail.
-    dst_rep_t result = aRep >> tailBits;
+    if (dstSigBits == dstSigFracBits) {
+      result = aRep >> sigFracTailBits;
+    } else {
+      // Copy sign and exponent into dst representation type.
+      result = (aRep >> srcSigBits) << dstSigBits;
+
+      const src_rep_t significand = (aRep & srcSignificandMask);
+      // Copy significand (without zero tail) into dst representation type.
+      result |= (significand >> sigFracTailBits);
+
+      const src_rep_t aExp = aAbs >> srcSigBits;
+      // Set the explicit integer bit if present.
+      if (dstSigBits != dstSigFracBits && aExp) {
+        result |= (DST_REP_C(1) << (dstSigBits - 1));
+      }
+    }
     return dstFromRep(result);
   }
 
@@ -86,8 +110,20 @@ static __inline dst_t __truncXfYf2__(src_t a) {
     // The exponent of a is within the range of normal numbers in the
     // destination format.  We can convert by simply right-shifting with
     // rounding and adjusting the exponent.
-    absResult = aAbs >> (srcSigBits - dstSigBits);
-    absResult -= (dst_rep_t)(srcExpBias - dstExpBias) << dstSigBits;
+
+    // Extend to the destination type by shifting the significand into the
+    // proper position.
+    absResult =
+        ((src_rep_t)aAbs & srcSigFracMask) >> (srcSigFracBits - dstSigFracBits);
+    // Extend to the destination type by shifting the exponent into the proper
+    // position.
+    absResult |= ((src_rep_t)aAbs & srcExpMask) >> (srcSigBits - dstSigBits);
+    absResult -= (src_rep_t)(srcExpBias - dstExpBias) << dstSigBits;
+
+    // Set the explicit integer bit.
+    if (dstSigBits != dstSigFracBits) {
+      absResult |= (DST_REP_C(1) << (dstSigBits - 1));
+    }
 
     const src_rep_t roundBits = aAbs & roundMask;
     // Round to nearest.
@@ -103,18 +139,32 @@ static __inline dst_t __truncXfYf2__(src_t a) {
     absResult = (dst_rep_t)dstInfExp << dstSigBits;
     absResult |= dstQNaN;
     absResult |=
-        ((aAbs & srcNaNCode) >> (srcSigBits - dstSigBits)) & dstNaNCode;
+        ((aAbs & srcNaNCode) >> (srcSigFracBits - dstSigFracBits)) & dstNaNCode;
+    // Set the explicit integer bit.
+    if (dstSigBits != dstSigFracBits) {
+      absResult |= (DST_REP_C(1) << (dstSigBits - 1));
+    }
   } else if (aAbs >= overflow) {
     // a overflows to infinity.
     absResult = (dst_rep_t)dstInfExp << dstSigBits;
+    // Set the explicit integer bit.
+    if (dstSigBits != dstSigFracBits) {
+      absResult |= (DST_REP_C(1) << (dstSigBits - 1));
+    }
   } else {
     // a underflows on conversion to the destination type or is an exact
     // zero.  The result may be a denormal or zero.  Extract the exponent
     // to get the shift amount for the denormalization.
-    const int aExp = aAbs >> srcSigBits;
-    const int shift = srcExpBias - dstExpBias - aExp + 1;
+    const src_rep_t aExp = aAbs >> srcSigBits;
 
-    const src_rep_t significand = (aRep & srcSignificandMask) | srcMinNormal;
+    src_rep_t significand = (aRep & srcSignificandMask);
+    int shift = srcExpBias - dstExpBias - aExp;
+
+    if (aExp) {
+      // Set the implicit integer bit if the source is a normal number.
+      significand |= srcMinNormal;
+      shift += 1;
+    }
 
     // Right shift by the denormalization amount with sticky.
     if (shift > srcSigBits) {
@@ -122,7 +172,7 @@ static __inline dst_t __truncXfYf2__(src_t a) {
     } else {
       const bool sticky = (significand << (srcBits - shift)) != 0;
       src_rep_t denormalizedSignificand = significand >> shift | sticky;
-      absResult = denormalizedSignificand >> (srcSigBits - dstSigBits);
+      absResult = denormalizedSignificand >> (srcSigFracBits - dstSigFracBits);
       const src_rep_t roundBits = denormalizedSignificand & roundMask;
       // Round to nearest
       if (roundBits > halfway)
diff --git a/compiler-rt/lib/builtins/trunctfxf2.c b/compiler-rt/lib/builtins/trunctfxf2.c
new file mode 100644
index 000000000000000..4a22a602b38173f
--- /dev/null
+++ b/compiler-rt/lib/builtins/trunctfxf2.c
@@ -0,0 +1,24 @@
+//===-- lib/trunctfsf2.c - long double -> quad conversion ---------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Assumption: long double is a IEEE 80 bit floating point type padded to 128
+// bits.
+
+// TODO: use fp_lib.h once QUAD_PRECISION is available on x86_64.
+#if __LDBL_MANT_DIG__ == 64 && defined(__x86_64__) &&                          \
+    (defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__))
+
+#define SRC_QUAD
+#define DST_80
+#include "fp_trunc_impl.inc"
+
+COMPILER_RT_ABI long double __trunctfxf2(__float128 a) {
+  return __truncXfYf2__(a);
+}
+
+#endif
diff --git a/compiler-rt/test/builtins/Unit/addtf3_test.c b/compiler-rt/test/builtins/Unit/addtf3_test.c
index fe2e2c80f655b7a..e6986c236a64f5e 100644
--- a/compiler-rt/test/builtins/Unit/addtf3_test.c
+++ b/compiler-rt/test/builtins/Unit/addtf3_test.c
@@ -16,7 +16,7 @@ int test__addtf3(long double a, long double b,
                  uint64_t expectedHi, uint64_t expectedLo)
 {
     long double x = __addtf3(a, b);
-    int ret = compareResultLD(x, expectedHi, expectedLo);
+    int ret = compareResultF128(x, expectedHi, expectedLo);
 
     if (ret){
         printf("error in test__addtf3(%.20Lf, %.20Lf) = %.20Lf, "
diff --git a/compiler-rt/test/builtins/Unit/divtf3_test.c b/compiler-rt/test/builtins/Unit/divtf3_test.c
index 927d0b826f8f57c..da6465636e92326 100644
--- a/compiler-rt/test/builtins/Unit/divtf3_test.c
+++ b/compiler-rt/test/builtins/Unit/divtf3_test.c
@@ -15,7 +15,7 @@ int test__divtf3(long double a, long double b,
                  uint64_t expectedHi, uint64_t expectedLo)
 {
     long double x = __divtf3(a, b);
-    int ret = compareResultLD(x, expectedHi, expectedLo);
+    int ret = compareResultF128(x, expectedHi, expectedLo);
 
     if (ret){
         printf("error in test__divtf3(%.20Le, %.20Le) = %.20Le, "
diff --git a/compiler-rt/test/builtins/Unit/extenddftf2_test.c b/compiler-rt/test/builtins/Unit/extenddftf2_test.c
index 04a346887661bf8..fcc030ca92202e9 100644
--- a/compiler-rt/test/builtins/Unit/extenddftf2_test.c
+++ b/compiler-rt/test/builtins/Unit/extenddftf2_test.c
@@ -13,7 +13,7 @@ COMPILER_RT_ABI long double __extenddftf2(double a);
 int test__extenddftf2(double a, uint64_t expectedHi, uint64_t expectedLo)
 {
     long double x = __extenddftf2(a);
-    int ret = compareResultLD(x, expectedHi, expectedLo);
+    int ret = compareResultF128(x, expectedHi, expectedLo);
 
     if (ret){
         printf("error in test__extenddftf2(%f) = %.20Lf, "
diff --git a/compiler-rt/test/builtins/Unit/extendhftf2_test.c b/compiler-rt/test/builtins/Unit/extendhftf2_test.c
index 7d3ea3049e8a195..5de17379093af18 100644
--- a/compiler-rt/test/builtins/Unit/extendhftf2_test.c
+++ b/compiler-rt/test/builtins/Unit/extendhftf2_test.c
@@ -12,7 +12,7 @@ COMPILER_RT_ABI long double __extendhftf2(TYPE_FP16 a);
 
 int test__extendhftf2(TYPE_FP16 a, uint64_t expectedHi, uint64_t expectedLo) {
   long double x = __extendhftf2(a);
-  int ret = compareResultLD(x, expectedHi, expectedLo);
+  int ret = compareResultF128(x, expectedHi, expectedLo);
 
   if (ret) {
     printf("error in test__extendhftf2(%#.4x) = %.20Lf, "
diff --git a/compiler-rt/test/builtins/Unit/extendsftf2_test.c b/compiler-rt/test/builtins/Unit/extendsftf2_test.c
index 19dd5b02c07bd26..6ce9bd81a3dd919 100644
--- a/compiler-rt/test/builtins/Unit/extendsftf2_test.c
+++ b/compiler-rt/test/builtins/Unit/extendsftf2_test.c
@@ -13,7 +13,7 @@ COMPILER_RT_ABI long double __extendsftf2(float a);
 int test__extendsftf2(float a, uint64_t expectedHi, uint64_t expectedLo)
 {
     long double x = __extendsftf2(a);
-    int ret = compareResultLD(x, expectedHi, expectedLo);
+    int ret = compareResultF128(x, expectedHi, expectedLo);
 
     if (ret)
     {
diff --git a/compiler-rt/test/builtins/Unit/extendxftf2_test.c b/compiler-rt/test/builtins/Unit/extendxftf2_test.c
new file mode 100644
index 000000000000000..f5211875438c732
--- /dev/null
+++ b/compiler-rt/test/builtins/Unit/extendxftf2_test.c
@@ -0,0 +1,74 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+// REQUIRES: librt_has_extendxftf2
+
+#include "int_lib.h"
+#include <stdio.h>
+
+#if __LDBL_MANT_DIG__ == 64 && defined(__x86_64__) &&                          \
+    (defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__))
+
+#include "fp_test.h"
+
+COMPILER_RT_ABI __float128 __extendxftf2(long double a);
+
+int test__extendxftf2(long double a, uint64_t expectedHi, uint64_t expectedLo) {
+  __float128 x = __extendxftf2(a);
+  int ret = compareResultF128(x, expectedHi, expectedLo);
+
+  if (ret) {
+    printf("error in __extendxftf2(%.20Lf) = %.20Lf, "
+           "expected %.20Lf\n",
+           a, x, fromRep128(expectedHi, expectedLo));
+  }
+  return ret;
+}
+
+char assumption_1[sizeof(long double) * CHAR_BIT == 128] = {0};
+
+#endif
+
+int main() {
+#if __LDBL_MANT_DIG__ == 64 && defined(__x86_64__) &&                          \
+    (defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__))
+  // qNaN
+  if (test__extendxftf2(makeQNaN80(), UINT64_C(0x7fff800000000000),
+                        UINT64_C(0x0)))
+    return 1;
+  // NaN
+  if (test__extendxftf2(makeNaN80(UINT64_C(0x3fffffffffffffff)),
+                        UINT64_C(0x7fff7fffffffffff),
+                        UINT64_C(0xfffe000000000000)))
+    return 1;
+  // inf
+  if (test__extendxftf2(makeInf80(), UINT64_C(0x7fff000000000000),
+                        UINT64_C(0x0)))
+    return 1;
+  // zero
+  if (test__extendxftf2(0.0, UINT64_C(0x0), UINT64_C(0x0)))
+    return 1;
+  if (test__extendxftf2(0x1.23456789abcdefp+5, UINT64_C(0x400423456789abcd),
+                        UINT64_C(0xf000000000000000)))
+    return 1;
+  if (test__extendxftf2(0x1.edcba987654321fp-9, UINT64_C(0x3ff6edcba9876543),
+                        UINT64_C(0x2000000000000000)))
+    return 1;
+  if (test__extendxftf2(0x1.23456789abcdefp+45, UINT64_C(0x402c23456789abcd),
+                        UINT64_C(0xf000000000000000)))
+    return 1;
+  if (test__extendxftf2(0x1.edcba987654321fp-45, UINT64_C(0x3fd2edcba9876543),
+                        UINT64_C(0x2000000000000000)))
+    return 1;
+  // denormal number
+  if (test__extendxftf2(1e-4932L, UINT64_C(0x00004c248f91e526),
+                        UINT64_C(0xafe0000000000000)))
+    return 1;
+  // denormal number
+  if (test__extendxftf2(2e-4932L, UINT64_C(0x000098491f23ca4d),
+                        UINT64_C(0x5fc0000000000000)))
+    return 1;
+#else
+  printf("skipped\n");
+
+#endif
+  return 0;
+}
diff --git a/compiler-rt/test/builtins/Unit/floatditf_test.c b/compiler-rt/test/builtins/Unit/floatditf_test.c
index 4d5da32ec25d42f..fe7a5fd86ae8423 100644
--- a/compiler-rt/test/builtins/Unit/floatditf_test.c
+++ b/compiler-rt/test/builtins/Unit/floatditf_test.c
@@ -17,7 +17,7 @@ COMPILER_RT_ABI long double __floatditf(di_int a);
 int test__floatditf(di_int a, uint64_t expectedHi, uint64_t expectedLo)
 {
     long double x = __floatditf(a);
-    int ret = compareResultLD(x, expectedHi, expectedLo);
+    int ret = compareResultF128(x, expectedHi, expectedLo);
 
     if (ret)
         printf("error in __floatditf(%Ld) = %.20Lf, "
diff --git a/compiler-rt/test/builtins/Unit/floatsitf_test.c b/compiler-rt/test/builtins/Unit/floatsitf_test.c
index 751a4a9b9207afb..b6571b9ba223d9b 100644
--- a/compiler-rt/test/builtins/Unit/floatsitf_test.c
+++ b/compiler-rt/test/builtins/Unit/floatsitf_test.c
@@ -13,7 +13,7 @@ COMPILER_RT_ABI long double __floatsitf(si_int a);
 int test__floatsitf(si_int a, uint64_t expectedHi, uint64_t expectedLo)
 {
     long double x = __floatsitf(a);
-    int ret = compareResultLD(x, expectedHi, expectedLo);
+    int ret = compareResultF128(x, expectedHi, expectedLo);
 
     if (ret)
     {
diff --git a/compiler-rt/test/builtins/Unit/floatunditf_test.c b/compiler-rt/test/builtins/Unit/floatunditf_test.c
index d44ae7934145a6d..8da78da9760293a 100644
--- a/compiler-rt/test/builtins/Unit/floatunditf_test.c
+++ b/compiler-rt/test/builtins/Unit/floatunditf_test.c
@@ -17,7 +17,7 @@ COMPILER_RT_ABI long double __floatunditf(du_int a);
 int test__floatunditf(du_int a, uint64_t expectedHi, uint64_t expectedLo)
 {
     long double x = __floatunditf(a);
-    int ret = compareResultLD(x, expectedHi, expectedLo);
+    int ret = compareResultF128(x, expectedHi, expectedLo);
 
     if (ret)
         printf("error in __floatunditf(%Lu) = %.20Lf, "
diff --git a/compiler-rt/test/builtins/Unit/floatunsitf_test.c b/compiler-rt/test/builtins/Unit/floatunsitf_test.c
index f0a6c63eb83799d..b6b1ba045739900 100644
--- a/compiler-rt/test/builtins/Unit/floatunsitf_test.c
+++ b/compiler-rt/test/builtins/Unit/floatunsitf_test.c
@@ -13,7 +13,7 @@ COMPILER_RT_ABI long double __floatunsitf(su_int a);
 int test__floatunsitf(su_int a, uint64_t expectedHi, uint64_t expectedLo)
 {
     long double x = __floatunsitf(a);
-    int ret = compareResultLD(x, expectedHi, expectedLo);
+    int ret = compareResultF128(x, expectedHi, expectedLo);
 
     if (ret){
         printf("error in test__floatunsitf(%u) = %.20Lf, "
diff --git a/compiler-rt/test/builtins/Unit/fp_test.h b/compiler-rt/test/builtins/Unit/fp_test.h
index e54dfc108e71887..922b600409fc5ee 100644
--- a/compiler-rt/test/builtins/Unit/fp_test.h
+++ b/compiler-rt/test/builtins/Unit/fp_test.h
@@ -9,6 +9,18 @@
 #define TYPE_FP16 uint16_t
 #endif
 
+// TODO: Switch to using fp_lib.h once QUAD_PRECISION is available on x86_64.
+#if __LDBL_MANT_DIG__ == 113 ||                                                \
+    ((__LDBL_MANT_DIG__ == 64) && defined(__x86_64__) &&                       \
+     (defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__)))
+#if __LDBL_MANT_DIG__ == 113
+#define TYPE_FP128 long double
+#else
+#define TYPE_FP128 __float128
+#endif
+#define TEST_COMPILER_RT_HAS_FLOAT128
+#endif
+
 enum EXPECTED_RESULT {
     LESS_0, LESS_EQUAL_0, EQUAL_0, GREATER_0, GREATER_EQUAL_0, NEQUAL_0
 };
@@ -38,11 +50,10 @@ static inline double fromRep64(uint64_t x)
     return ret;
 }
 
-#if __LDBL_MANT_DIG__ == 113
-static inline long double fromRep128(uint64_t hi, uint64_t lo)
-{
+#ifdef TEST_COMPILER_RT_HAS_FLOAT128
+static inline TYPE_FP128 fromRep128(uint64_t hi, uint64_t lo) {
     __uint128_t x = ((__uint128_t)hi << 64) + lo;
-    long double ret;
+    TYPE_FP128 ret;
     memcpy(&ret, &x, 16);
     return ret;
 }
@@ -73,9 +84,8 @@ static inline uint64_t toRep64(double x)
     return ret;
 }
 
-#if __LDBL_MANT_DIG__ == 113
-static inline __uint128_t toRep128(long double x)
-{
+#ifdef TEST_COMPILER_RT_HAS_FLOAT128
+static inline __uint128_t toRep128(TYPE_FP128 x) {
     __uint128_t ret;
     memcpy(&ret, &x, 16);
     return ret;
@@ -136,25 +146,23 @@ static inline int compareResultD(double result,
     return 1;
 }
 
-#if __LDBL_MANT_DIG__ == 113
+#ifdef TEST_COMPILER_RT_HAS_FLOAT128
 // return 0 if equal
 // use two 64-bit integers instead of one 128-bit integer
 // because 128-bit integer constant can't be assigned directly
-static inline int compareResultLD(long double result,
-                                  uint64_t expectedHi,
-                                  uint64_t expectedLo)
-{
+static inline int compareResultF128(TYPE_FP128 result, uint64_t expectedHi,
+                                    uint64_t expectedLo) {
     __uint128_t rep = toRep128(result);
     uint64_t hi = rep >> 64;
     uint64_t lo = rep;
 
-    if (hi == expectedHi && lo == expectedLo){
+    if (hi == expectedHi && lo == expectedLo) {
         return 0;
     }
     // test other possible NaN representation(signal NaN)
-    else if (expectedHi == 0x7fff800000000000UL && expectedLo == 0x0UL){
+    else if (expectedHi == 0x7fff800000000000UL && expectedLo == 0x0UL) {
         if ((hi & 0x7fff000000000000UL) == 0x7fff000000000000UL &&
-            ((hi & 0xffffffffffffUL) > 0 || lo > 0)){
+            ((hi & 0xffffffffffffUL) > 0 || lo > 0)) {
             return 0;
         }
     }
@@ -232,9 +240,44 @@ static inline double makeQNaN64(void)
     return fromRep64(0x7ff8000000000000UL);
 }
 
-#if __LDBL_MANT_DIG__ == 113
-static inline long double makeQNaN128(void)
-{
+#if __LDBL_MANT_DIG__ == 64 && defined(__x86_64__)
+static inline long double F80FromRep128(uint64_t hi, uint64_t lo) {
+    __uint128_t x = ((__uint128_t)hi << 64) + lo;
+    long double ret;
+    memcpy(&ret, &x, 16);
+    return ret;
+}
+
+static inline __uint128_t F80ToRep128(long double x) {
+    __uint128_t ret;
+    memcpy(&ret, &x, 16);
+    return ret;
+}
+
+static inline int compareResultF80(long double result, uint64_t expectedHi,
+                                   uint64_t expectedLo) {
+    __uint128_t rep = F80ToRep128(result);
+    uint64_t hi = (rep >> 64) & ((1UL << 48) - 1);
+    uint64_t lo = rep;
+    return !(hi == expectedHi && lo == expectedLo);
+}
+
+static inline long double makeQNaN80(void) {
+    return F80FromRep128(0x7fffUL, 0xc000000000000000UL);
+}
+
+static inline long double makeNaN80(uint64_t rand) {
+    return F80FromRep128(0x7fffUL,
+                         0x8000000000000000 | (rand & 0x3fffffffffffffff));
+}
+
+static inline long double makeInf80(void) {
+    return F80FromRep128(0x7fffUL, 0x8000000000000000UL);
+}
+#endif
+
+#ifdef TEST_COMPILER_RT_HAS_FLOAT128
+static inline TYPE_FP128 makeQNaN128(void) {
     return fromRep128(0x7fff800000000000UL, 0x0UL);
 }
 #endif
@@ -254,9 +297,8 @@ static inline double makeNaN64(uint64_t rand)
     return fromRep64(0x7ff0000000000000UL | (rand & 0xfffffffffffffUL));
 }
 
-#if __LDBL_MANT_DIG__ == 113
-static inline long double makeNaN128(uint64_t rand)
-{
+#ifdef TEST_COMPILER_RT_HAS_FLOAT128
+static inline TYPE_FP128 makeNaN128(uint64_t rand) {
     return fromRep128(0x7fff000000000000UL | (rand & 0xffffffffffffUL), 0x0UL);
 }
 #endif
@@ -286,14 +328,12 @@ static inline double makeNegativeInf64(void)
     return fromRep64(0xfff0000000000000UL);
 }
 
-#if __LDBL_MANT_DIG__ == 113
-static inline long double makeInf128(void)
-{
+#ifdef TEST_COMPILER_RT_HAS_FLOAT128
+static inline TYPE_FP128 makeInf128(void) {
     return fromRep128(0x7fff000000000000UL, 0x0UL);
 }
 
-static inline long double makeNegativeInf128(void)
-{
+static inline TYPE_FP128 makeNegativeInf128(void) {
     return fromRep128(0xffff000000000000UL, 0x0UL);
 }
 #endif
diff --git a/compiler-rt/test/builtins/Unit/multf3_test.c b/compiler-rt/test/builtins/Unit/multf3_test.c
index 3bf6ab24cec0221..543b55899ce82a9 100644
--- a/compiler-rt/test/builtins/Unit/multf3_test.c
+++ b/compiler-rt/test/builtins/Unit/multf3_test.c
@@ -15,7 +15,7 @@ int test__multf3(long double a, long double b,
                  uint64_t expectedHi, uint64_t expectedLo)
 {
     long double x = __multf3(a, b);
-    int ret = compareResultLD(x, expectedHi, expectedLo);
+    int ret = compareResultF128(x, expectedHi, expectedLo);
 
     if (ret){
         printf("error in test__multf3(%.20Lf, %.20Lf) = %.20Lf, "
diff --git a/compiler-rt/test/builtins/Unit/subtf3_test.c b/compiler-rt/test/builtins/Unit/subtf3_test.c
index 377ae95a9a7d7bb..724fa4820d99d32 100644
--- a/compiler-rt/test/builtins/Unit/subtf3_test.c
+++ b/compiler-rt/test/builtins/Unit/subtf3_test.c
@@ -16,7 +16,7 @@ int test__subtf3(long double a, long double b,
                  uint64_t expectedHi, uint64_t expectedLo)
 {
     long double x = __subtf3(a, b);
-    int ret = compareResultLD(x, expectedHi, expectedLo);
+    int ret = compareResultF128(x, expectedHi, expectedLo);
 
     if (ret){
         printf("error in test__subtf3(%.20Lf, %.20Lf) = %.20Lf, "
diff --git a/compiler-rt/test/builtins/Unit/trunctfxf2_test.c b/compiler-rt/test/builtins/Unit/trunctfxf2_test.c
new file mode 100644
index 000000000000000..b863f7365f5466a
--- /dev/null
+++ b/compiler-rt/test/builtins/Unit/trunctfxf2_test.c
@@ -0,0 +1,72 @@
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+// REQUIRES: librt_has_trunctfxf2
+
+#include "int_lib.h"
+#include <stdio.h>
+
+#if __LDBL_MANT_DIG__ == 64 && defined(__x86_64__) &&                          \
+    (defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__))
+
+#include "fp_test.h"
+
+COMPILER_RT_ABI long double __trunctfxf2(__float128 a);
+
+int test__trunctfxf2(__float128 a, uint64_t expectedHi, uint64_t expectedLo) {
+  long double x = __trunctfxf2(a);
+  int ret = compareResultF80(x, expectedHi, expectedLo);
+  ;
+  if (ret) {
+    printf("error in __trunctfxf2(%.20Lf) = %.20Lf, "
+           "expected %.20Lf\n",
+           a, x, fromRep128(expectedHi, expectedLo));
+  }
+  return ret;
+}
+
+char assumption_1[sizeof(long double) * CHAR_BIT == 128] = {0};
+
+#endif
+
+int main() {
+#if __LDBL_MANT_DIG__ == 64 && defined(__x86_64__) &&                          \
+    (defined(__FLOAT128__) || defined(__SIZEOF_FLOAT128__))
+  // qNaN
+  if (test__trunctfxf2(makeQNaN128(), UINT64_C(0x7FFF),
+                       UINT64_C(0xC000000000000000)))
+    return 1;
+  // NaN
+  if (test__trunctfxf2(makeNaN128(UINT64_C(0x810000000000)), UINT64_C(0x7FFF),
+                       UINT64_C(0xC080000000000000)))
+    return 1;
+  // inf
+  if (test__trunctfxf2(makeInf128(), UINT64_C(0x7FFF),
+                       UINT64_C(0x8000000000000000)))
+    return 1;
+  // zero
+  if (test__trunctfxf2(0.0Q, UINT64_C(0x0), UINT64_C(0x0)))
+    return 1;
+  if (test__trunctfxf2(0x1.af23456789bbaaab347645365cdep+5L, UINT64_C(0x4004),
+                       UINT64_C(0xd791a2b3c4ddd556)))
+    return 1;
+  if (test__trunctfxf2(0x1.dedafcff354b6ae9758763545432p-9L, UINT64_C(0x3ff6),
+                       UINT64_C(0xef6d7e7f9aa5b575)))
+    return 1;
+  if (test__trunctfxf2(0x1.2f34dd5f437e849b4baab754cdefp+4534L,
+                       UINT64_C(0x51b5), UINT64_C(0x979a6eafa1bf424e)))
+    return 1;
+  if (test__trunctfxf2(0x1.edcbff8ad76ab5bf46463233214fp-435L, UINT64_C(0x3e4c),
+                       UINT64_C(0xf6e5ffc56bb55ae0)))
+    return 1;
+  // denormal number
+  if (test__trunctfxf2(1e-4932Q, UINT64_C(0), UINT64_C(0x261247c8f29357f0)))
+    return 1;
+  // denormal number
+  if (test__trunctfxf2(2e-4932Q, UINT64_C(0), UINT64_C(0x4c248f91e526afe0)))
+    return 1;
+
+#else
+  printf("skipped\n");
+
+#endif
+  return 0;
+}



[compiler-rt] [compiler-rt] Implement __extendxftf2 and __trunctfxf2 for x86_64 (PR #66918)

[compiler-rt] [compiler-rt] Implement extendxftf2 and trunctfxf2 for x86_64 (PR #66918)