[libc-commits] [libc] 9b39737 - [libc] add modified Eisel-Lemire for long doubles

Wed Dec 22 16:45:26 PST 2021

Author: Michael Jones
Date: 2021-12-22T16:45:22-08:00
New Revision: 9b39737129f549ce3a17893c780bab96441ac921

URL: https://github.com/llvm/llvm-project/commit/9b39737129f549ce3a17893c780bab96441ac921
DIFF: https://github.com/llvm/llvm-project/commit/9b39737129f549ce3a17893c780bab96441ac921.diff

LOG: [libc] add modified Eisel-Lemire for long doubles

The Eisel-Lemire algorithm is an effecient way to handle converting to
floating point numbers from strings, but in its base form it only
supports up to 64 bit floating point numbers. This adds an
implementation to handle long doubles.

Reviewed By: lntue

Differential Revision: https://reviews.llvm.org/D115280

Added: 
    

Modified: 
    libc/src/__support/str_to_float.h
    libc/test/src/__support/str_to_float_test.cpp

Removed: 
    


################################################################################
diff  --git a/libc/src/__support/str_to_float.h b/libc/src/__support/str_to_float.h
index a7ac3e2e9a681..8defeed122a9f 100644

--- a/libc/src/__support/str_to_float.h
+++ b/libc/src/__support/str_to_float.h
@@ -182,6 +182,114 @@ eisel_lemire(typename fputil::FPBits<T>::UIntType mantissa, int32_t exp10,
   return true;
 }
 
+#if !defined(LONG_DOUBLE_IS_DOUBLE)
+template <>
+inline bool eisel_lemire<long double>(
+    typename fputil::FPBits<long double>::UIntType mantissa, int32_t exp10,
+    typename fputil::FPBits<long double>::UIntType *outputMantissa,
+    uint32_t *outputExp2) {
+  using BitsType = typename fputil::FPBits<long double>::UIntType;
+  constexpr uint32_t BITS_IN_MANTISSA = sizeof(mantissa) * 8;
+
+  // Exp10 Range
+  // This doesn't reach very far into the range for long doubles, since it's
+  // sized for doubles and their 11 exponent bits, and not for long doubles and
+  // their 15 exponent bits (max exponent of ~300 for double vs ~5000 for long
+  // double). This is a known tradeoff, and was made because a proper long
+  // double table would be approximately 16 times larger. This would have
+  // significant memory and storage costs all the time to speed up a relatively
+  // uncommon path. In addition the exp10_to_exp2 function only approximates
+  // multiplying by log(10)/log(2), and that approximation may not be accurate
+  // out to the full long double range.
+  if (exp10 < DETAILED_POWERS_OF_TEN_MIN_EXP_10 ||
+      exp10 > DETAILED_POWERS_OF_TEN_MAX_EXP_10) {
+    return false;
+  }
+
+  // Normalization
+  uint32_t clz = leading_zeroes<BitsType>(mantissa);
+  mantissa <<= clz;
+
+  uint32_t exp2 = exp10_to_exp2(exp10) + BITS_IN_MANTISSA +
+                  fputil::FloatProperties<long double>::EXPONENT_BIAS - clz;
+
+  // Multiplication
+  const uint64_t *power_of_ten =
+      DETAILED_POWERS_OF_TEN[exp10 - DETAILED_POWERS_OF_TEN_MIN_EXP_10];
+
+  // Since the input mantissa is more than 64 bits, we have to multiply with the
+  // full 128 bits of the power of ten to get an approximation with the same
+  // number of significant bits. This means that we only get the one
+  // approximation, and that approximation is 256 bits long.
+  __uint128_t approx_upper = static_cast<__uint128_t>(high64(mantissa)) *
+                             static_cast<__uint128_t>(power_of_ten[1]);
+
+  __uint128_t approx_middle = static_cast<__uint128_t>(high64(mantissa)) *
+                                  static_cast<__uint128_t>(power_of_ten[0]) +
+                              static_cast<__uint128_t>(low64(mantissa)) *
+                                  static_cast<__uint128_t>(power_of_ten[1]);
+
+  __uint128_t approx_lower = static_cast<__uint128_t>(low64(mantissa)) *
+                             static_cast<__uint128_t>(power_of_ten[0]);
+
+  __uint128_t final_approx_lower =
+      approx_lower + (static_cast<__uint128_t>(low64(approx_middle)) << 64);
+  __uint128_t final_approx_upper = approx_upper + high64(approx_middle) +
+                                   (final_approx_lower < approx_lower ? 1 : 0);
+
+  // The halfway constant is used to check if the bits that will be shifted away
+  // intially are all 1. For 80 bit floats this is 128 (bitstype size) - 64
+  // (final mantissa size) - 3 (we shift away the last two bits separately for
+  // accuracy, and the most significant bit is ignored.) = 61 bits. Similarly,
+  // it's 12 bits for 128 bit floats in this case.
+  constexpr __uint128_t HALFWAY_CONSTANT =
+      (__uint128_t(1) << (BITS_IN_MANTISSA -
+                          fputil::FloatProperties<long double>::MANTISSA_WIDTH -
+                          3)) -
+      1;
+
+  if ((final_approx_upper & HALFWAY_CONSTANT) == HALFWAY_CONSTANT &&
+      final_approx_lower + mantissa < mantissa) {
+    return false;
+  }
+
+  // Shifting to 65 bits for 80 bit floats and 113 bits for 128 bit floats
+  BitsType msb = final_approx_upper >> (BITS_IN_MANTISSA - 1);
+  BitsType final_mantissa =
+      final_approx_upper >>
+      (msb + BITS_IN_MANTISSA -
+       (fputil::FloatProperties<long double>::MANTISSA_WIDTH + 3));
+  exp2 -= 1 ^ msb; // same as !msb
+
+  // Half-way ambiguity
+  if (final_approx_lower == 0 && (final_approx_upper & HALFWAY_CONSTANT) == 0 &&
+      (final_mantissa & 3) == 1) {
+    return false;
+  }
+
+  // From 65 to 64 bits for 80 bit floats and 113  to 112 bits for 128 bit
+  // floats
+  final_mantissa += final_mantissa & 1;
+  final_mantissa >>= 1;
+  if ((final_mantissa >>
+       (fputil::FloatProperties<long double>::MANTISSA_WIDTH + 1)) > 0) {
+    final_mantissa >>= 1;
+    ++exp2;
+  }
+
+  // The if block is equivalent to (but has fewer branches than):
+  //   if exp2 <= 0 || exp2 >= MANTISSA_MAX { etc }
+  if (exp2 - 1 >=
+      (1 << fputil::FloatProperties<long double>::EXPONENT_WIDTH) - 2) {
+    return false;
+  }
+
+  *outputMantissa = final_mantissa;
+  *outputExp2 = exp2;
+  return true;
+}
+#endif
+
 // The nth item in POWERS_OF_TWO represents the greatest power of two less than
 // 10^n. This tells us how much we can safely shift without overshooting.
 constexpr uint8_t POWERS_OF_TWO[19] = {

diff  --git a/libc/test/src/__support/str_to_float_test.cpp b/libc/test/src/__support/str_to_float_test.cpp
index 44f0cecec76ca..e75434621157e 100644
--- a/libc/test/src/__support/str_to_float_test.cpp
+++ b/libc/test/src/__support/str_to_float_test.cpp
@@ -175,13 +175,8 @@ TEST_F(LlvmLibcStrToFloatTest, EiselLemireFallbackStates) {
   // Check the fallback states for the algorithm:
   uint32_t float_output_mantissa = 0;
   uint64_t double_output_mantissa = 0;
-  __uint128_t too_long_mantissa = 0;
   uint32_t output_exp2 = 0;
 
-  // This Eisel-Lemire implementation doesn't support long doubles yet.
-  ASSERT_FALSE(__llvm_libc::internal::eisel_lemire<long double>(
-      too_long_mantissa, 0, &too_long_mantissa, &output_exp2));
-
   // This number can't be evaluated by Eisel-Lemire since it's exactly 1024 away
   // from both of its closest floating point approximations
   // (12345678901234548736 and 12345678901234550784)
@@ -272,3 +267,92 @@ TEST(LlvmLibcStrToFloatTest, SimpleDecimalConversionExtraTypes) {
   // EXPECT_EQ(outputExp2, uint32_t(1089));
   // EXPECT_EQ(errno, 0);
 }
+
+#if defined(LONG_DOUBLE_IS_DOUBLE)
+TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat64AsLongDouble) {
+  eisel_lemire_test<long double>(123, 0, 0x1EC00000000000, 1029);
+}
+#elif defined(SPECIAL_X86_LONG_DOUBLE)
+TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat80Simple) {
+  eisel_lemire_test<long double>(123, 0, 0xf600000000000000, 16389);
+  eisel_lemire_test<long double>(12345678901234568192u, 0, 0xab54a98ceb1f0c00,
+                                 16446);
+}
+
+TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat80LongerMantissa) {
+  eisel_lemire_test<long double>((__uint128_t(0x1234567812345678) << 64) +
+                                     __uint128_t(0x1234567812345678),
+                                 0, 0x91a2b3c091a2b3c1, 16507);
+  eisel_lemire_test<long double>((__uint128_t(0x1234567812345678) << 64) +
+                                     __uint128_t(0x1234567812345678),
+                                 300, 0xd97757de56adb65c, 17503);
+  eisel_lemire_test<long double>((__uint128_t(0x1234567812345678) << 64) +
+                                     __uint128_t(0x1234567812345678),
+                                 -300, 0xc30feb9a7618457d, 15510);
+}
+
+// These tests check numbers at the edge of the DETAILED_POWERS_OF_TEN table.
+// This doesn't reach very far into the range for long doubles, since it's sized
+// for doubles and their 11 exponent bits, and not for long doubles and their
+// 15 exponent bits. This is a known tradeoff, and was made because a proper
+// long double table would be approximately 16 times longer (specifically the
+// maximum exponent would need to be about 5000, leading to a 10,000 entry
+// table). This would have significant memory and storage costs all the time to
+// speed up a relatively uncommon path.
+TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat80TableLimits) {
+  eisel_lemire_test<long double>(1, 347, 0xd13eb46469447567, 17535);
+  eisel_lemire_test<long double>(1, -348, 0xfa8fd5a0081c0288, 15226);
+}
+
+TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat80Fallback) {
+  uint32_t outputExp2 = 0;
+  __uint128_t quadOutputMantissa = 0;
+
+  // This number is halfway between two possible results, and the algorithm
+  // can't determine which is correct.
+  ASSERT_FALSE(__llvm_libc::internal::eisel_lemire<long double>(
+      12345678901234567890u, 1, &quadOutputMantissa, &outputExp2));
+
+  // These numbers' exponents are out of range for the current powers of ten
+  // table.
+  ASSERT_FALSE(__llvm_libc::internal::eisel_lemire<long double>(
+      1, 1000, &quadOutputMantissa, &outputExp2));
+  ASSERT_FALSE(__llvm_libc::internal::eisel_lemire<long double>(
+      1, -1000, &quadOutputMantissa, &outputExp2));
+}
+#else
+TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat128Simple) {
+  eisel_lemire_test<long double>(123, 0, (__uint128_t(0x1ec0000000000) << 64),
+                                 16389);
+  eisel_lemire_test<long double>(12345678901234568192u, 0,
+                                 (__uint128_t(0x156a95319d63e) << 64) +
+                                     __uint128_t(0x1800000000000000),
+                                 16446);
+}
+
+TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat128LongerMantissa) {
+  eisel_lemire_test<long double>(
+      (__uint128_t(0x1234567812345678) << 64) + __uint128_t(0x1234567812345678),
+      0, (__uint128_t(0x1234567812345) << 64) + __uint128_t(0x6781234567812345),
+      16507);
+  eisel_lemire_test<long double>(
+      (__uint128_t(0x1234567812345678) << 64) + __uint128_t(0x1234567812345678),
+      300,
+      (__uint128_t(0x1b2eeafbcad5b) << 64) + __uint128_t(0x6cb8b4451dfcde19),
+      17503);
+  eisel_lemire_test<long double>(
+      (__uint128_t(0x1234567812345678) << 64) + __uint128_t(0x1234567812345678),
+      -300,
+      (__uint128_t(0x1861fd734ec30) << 64) + __uint128_t(0x8afa7189f0f7595f),
+      15510);
+}
+
+TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat128Fallback) {
+  uint32_t outputExp2 = 0;
+  __uint128_t quadOutputMantissa = 0;
+
+  ASSERT_FALSE(__llvm_libc::internal::eisel_lemire<long double>(
+      (__uint128_t(0x5ce0e9a56015fec5) << 64) + __uint128_t(0xaadfa328ae39b333),
+      1, &quadOutputMantissa, &outputExp2));
+}
+#endif