[libc-commits] [libc] bb8f258 - [libc] Add implementations of ldexp[f|l].

Siva Chandra Reddy via libc-commits libc-commits at lists.llvm.org
Tue Nov 17 15:06:05 PST 2020


Author: Siva Chandra Reddy
Date: 2020-11-17T15:05:42-08:00
New Revision: bb8f2585c6eab263916757435d71df16d92de4a8

URL: https://github.com/llvm/llvm-project/commit/bb8f2585c6eab263916757435d71df16d92de4a8
DIFF: https://github.com/llvm/llvm-project/commit/bb8f2585c6eab263916757435d71df16d92de4a8.diff

LOG: [libc] Add implementations of ldexp[f|l].

The rounding behavior of NormalFloat to float format has been changed
to round to nearest. Also, a bug in NormalFloat to subnormal number
conversion has been fixed.

Reviewed By: lntue

Differential Revision: https://reviews.llvm.org/D91591

Added: 
    libc/src/math/ldexp.cpp
    libc/src/math/ldexp.h
    libc/src/math/ldexpf.cpp
    libc/src/math/ldexpf.h
    libc/src/math/ldexpl.cpp
    libc/src/math/ldexpl.h
    libc/test/src/math/LdExpTest.h
    libc/test/src/math/ldexp_test.cpp
    libc/test/src/math/ldexpf_test.cpp
    libc/test/src/math/ldexpl_test.cpp

Modified: 
    libc/config/linux/aarch64/entrypoints.txt
    libc/config/linux/x86_64/entrypoints.txt
    libc/spec/stdc.td
    libc/src/math/CMakeLists.txt
    libc/test/src/math/CMakeLists.txt
    libc/utils/FPUtil/ManipulationFunctions.h
    libc/utils/FPUtil/NormalFloat.h

Removed: 
    


################################################################################
diff  --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 6ee0e98541c4..3a34d95b36e1 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -71,6 +71,9 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.ilogb
     libc.src.math.ilogbf
     libc.src.math.ilogbl
+    libc.src.math.ldexp
+    libc.src.math.ldexpf
+    libc.src.math.ldexpl
     libc.src.math.logb
     libc.src.math.logbf
     libc.src.math.logbl

diff  --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index c1a80288a6a6..a0773ef0fce6 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -104,6 +104,9 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.ilogb
     libc.src.math.ilogbf
     libc.src.math.ilogbl
+    libc.src.math.ldexp
+    libc.src.math.ldexpf
+    libc.src.math.ldexpl
     libc.src.math.logb
     libc.src.math.logbf
     libc.src.math.logbl

diff  --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index c587f5b73013..7f70e17cfd52 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -284,6 +284,10 @@ def StdC : StandardSpec<"stdc"> {
           FunctionSpec<"ilogbf", RetValSpec<IntType>, [ArgSpec<FloatType>]>,
           FunctionSpec<"ilogbl", RetValSpec<IntType>, [ArgSpec<LongDoubleType>]>,
 
+          FunctionSpec<"ldexp", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<IntType>]>,
+          FunctionSpec<"ldexpf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<IntType>]>,
+          FunctionSpec<"ldexpl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<IntType>]>,
+
           FunctionSpec<"logb", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
           FunctionSpec<"logbf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
           FunctionSpec<"logbl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>]>,

diff  --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index b7614cfec612..fd75a3b48bcb 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -378,6 +378,42 @@ add_entrypoint_object(
     -O2
 )
 
+add_entrypoint_object(
+  ldexp
+  SRCS
+    ldexp.cpp
+  HDRS
+    ldexp.h
+  DEPENDS
+    libc.utils.FPUtil.fputil
+  COMPILE_OPTIONS
+    -O2
+)
+
+add_entrypoint_object(
+  ldexpf
+  SRCS
+    ldexpf.cpp
+  HDRS
+    ldexpf.h
+  DEPENDS
+    libc.utils.FPUtil.fputil
+  COMPILE_OPTIONS
+    -O2
+)
+
+add_entrypoint_object(
+  ldexpl
+  SRCS
+    ldexpl.cpp
+  HDRS
+    ldexpl.h
+  DEPENDS
+    libc.utils.FPUtil.fputil
+  COMPILE_OPTIONS
+    -O2
+)
+
 add_entrypoint_object(
   logb
   SRCS

diff  --git a/libc/src/math/ldexp.cpp b/libc/src/math/ldexp.cpp
new file mode 100644
index 000000000000..1eefd1037cde
--- /dev/null
+++ b/libc/src/math/ldexp.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of ldexp function ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/common.h"
+#include "utils/FPUtil/ManipulationFunctions.h"
+
+namespace __llvm_libc {
+
+double LLVM_LIBC_ENTRYPOINT(ldexp)(double x, int exp) {
+  return fputil::ldexp(x, exp);
+}
+
+} // namespace __llvm_libc

diff  --git a/libc/src/math/ldexp.h b/libc/src/math/ldexp.h
new file mode 100644
index 000000000000..74f9a600666f
--- /dev/null
+++ b/libc/src/math/ldexp.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for ldexp -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_LDEXP_H
+#define LLVM_LIBC_SRC_MATH_LDEXP_H
+
+namespace __llvm_libc {
+
+double ldexp(double x, int exp);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_MATH_LDEXP_H

diff  --git a/libc/src/math/ldexpf.cpp b/libc/src/math/ldexpf.cpp
new file mode 100644
index 000000000000..5c4425450430
--- /dev/null
+++ b/libc/src/math/ldexpf.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of ldexpf function ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/common.h"
+#include "utils/FPUtil/ManipulationFunctions.h"
+
+namespace __llvm_libc {
+
+float LLVM_LIBC_ENTRYPOINT(ldexpf)(float x, int exp) {
+  return fputil::ldexp(x, exp);
+}
+
+} // namespace __llvm_libc

diff  --git a/libc/src/math/ldexpf.h b/libc/src/math/ldexpf.h
new file mode 100644
index 000000000000..f30d60155f1f
--- /dev/null
+++ b/libc/src/math/ldexpf.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for ldexpf ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_LDEXPF_H
+#define LLVM_LIBC_SRC_MATH_LDEXPF_H
+
+namespace __llvm_libc {
+
+float ldexpf(float x, int exp);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_MATH_LDEXPF_H

diff  --git a/libc/src/math/ldexpl.cpp b/libc/src/math/ldexpl.cpp
new file mode 100644
index 000000000000..10ec66024fc0
--- /dev/null
+++ b/libc/src/math/ldexpl.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of ldexpl function ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/common.h"
+#include "utils/FPUtil/ManipulationFunctions.h"
+
+namespace __llvm_libc {
+
+long double LLVM_LIBC_ENTRYPOINT(ldexpl)(long double x, int exp) {
+  return fputil::ldexp(x, exp);
+}
+
+} // namespace __llvm_libc

diff  --git a/libc/src/math/ldexpl.h b/libc/src/math/ldexpl.h
new file mode 100644
index 000000000000..bf8435b1a21d
--- /dev/null
+++ b/libc/src/math/ldexpl.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for ldexpl ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_ldexpl_H
+#define LLVM_LIBC_SRC_MATH_ldexpl_H
+
+namespace __llvm_libc {
+
+long double ldexpl(long double x, int exp);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_MATH_ldexpl_H

diff  --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
index 0f5405cb3a26..2220cef00791 100644
--- a/libc/test/src/math/CMakeLists.txt
+++ b/libc/test/src/math/CMakeLists.txt
@@ -412,6 +412,48 @@ add_fp_unittest(
     libc.utils.FPUtil.fputil
 )
 
+add_fp_unittest(
+  ldexp_test
+  SUITE
+    libc_math_unittests
+  SRCS
+    ldexp_test.cpp
+  HDRS
+    LdExpTest.h
+  DEPENDS
+    libc.include.math
+    libc.src.math.ldexp
+    libc.utils.FPUtil.fputil
+)
+
+add_fp_unittest(
+  ldexpf_test
+  SUITE
+    libc_math_unittests
+  SRCS
+    ldexpf_test.cpp
+  HDRS
+    LdExpTest.h
+  DEPENDS
+    libc.include.math
+    libc.src.math.ldexpf
+    libc.utils.FPUtil.fputil
+)
+
+add_fp_unittest(
+  ldexpl_test
+  SUITE
+    libc_math_unittests
+  SRCS
+    ldexpl_test.cpp
+  HDRS
+    LdExpTest.h
+  DEPENDS
+    libc.include.math
+    libc.src.math.ldexpl
+    libc.utils.FPUtil.fputil
+)
+
 add_fp_unittest(
   logb_test
   SUITE

diff  --git a/libc/test/src/math/LdExpTest.h b/libc/test/src/math/LdExpTest.h
new file mode 100644
index 000000000000..e1976d8714f5
--- /dev/null
+++ b/libc/test/src/math/LdExpTest.h
@@ -0,0 +1,131 @@
+//===-- Utility class to test 
diff erent flavors of ldexp --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_TEST_SRC_MATH_LDEXPTEST_H
+#define LLVM_LIBC_TEST_SRC_MATH_LDEXPTEST_H
+
+#include "utils/FPUtil/FPBits.h"
+#include "utils/FPUtil/NormalFloat.h"
+#include "utils/FPUtil/TestHelpers.h"
+#include "utils/UnitTest/Test.h"
+
+#include <limits.h>
+#include <math.h>
+#include <stdint.h>
+
+template <typename T>
+class LdExpTestTemplate : public __llvm_libc::testing::Test {
+  using FPBits = __llvm_libc::fputil::FPBits<T>;
+  using NormalFloat = __llvm_libc::fputil::NormalFloat<T>;
+  using UIntType = typename FPBits::UIntType;
+  static constexpr UIntType mantissaWidth =
+      __llvm_libc::fputil::MantissaWidth<T>::value;
+  // A normalized mantissa to be used with tests.
+  static constexpr UIntType mantissa = NormalFloat::one + 0x1234;
+
+  const T zero = __llvm_libc::fputil::FPBits<T>::zero();
+  const T negZero = __llvm_libc::fputil::FPBits<T>::negZero();
+  const T inf = __llvm_libc::fputil::FPBits<T>::inf();
+  const T negInf = __llvm_libc::fputil::FPBits<T>::negInf();
+  const T nan = __llvm_libc::fputil::FPBits<T>::buildNaN(1);
+
+public:
+  typedef T (*LdExpFunc)(T, int);
+
+  void testSpecialNumbers(LdExpFunc func) {
+    int expArray[5] = {-INT_MAX - 1, -10, 0, 10, INT_MAX};
+    for (int exp : expArray) {
+      ASSERT_FP_EQ(zero, func(zero, exp));
+      ASSERT_FP_EQ(negZero, func(negZero, exp));
+      ASSERT_FP_EQ(inf, func(inf, exp));
+      ASSERT_FP_EQ(negInf, func(negInf, exp));
+      ASSERT_NE(isnan(func(nan, exp)), 0);
+    }
+  }
+
+  void testPowersOfTwo(LdExpFunc func) {
+    int32_t expArray[5] = {1, 2, 3, 4, 5};
+    int32_t valArray[6] = {1, 2, 4, 8, 16, 32};
+    for (int32_t exp : expArray) {
+      for (int32_t val : valArray) {
+        ASSERT_FP_EQ(T(val << exp), func(T(val), exp));
+        ASSERT_FP_EQ(T(-1 * (val << exp)), func(T(-val), exp));
+      }
+    }
+  }
+
+  void testOverflow(LdExpFunc func) {
+    NormalFloat x(FPBits::maxExponent - 10, NormalFloat::one + 0xF00BA, 0);
+    for (int32_t exp = 10; exp < 100; ++exp) {
+      ASSERT_FP_EQ(inf, func(T(x), exp));
+      ASSERT_FP_EQ(negInf, func(-T(x), exp));
+    }
+  }
+
+  void testUnderflowToZeroOnNormal(LdExpFunc func) {
+    // In this test, we pass a normal nubmer to func and expect zero
+    // to be returned due to underflow.
+    int32_t baseExponent = FPBits::exponentBias + mantissaWidth;
+    int32_t expArray[] = {baseExponent + 5, baseExponent + 4, baseExponent + 3,
+                          baseExponent + 2, baseExponent + 1};
+    T x = NormalFloat(0, mantissa, 0);
+    for (int32_t exp : expArray) {
+      ASSERT_FP_EQ(func(x, -exp), x > 0 ? zero : negZero);
+    }
+  }
+
+  void testUnderflowToZeroOnSubnormal(LdExpFunc func) {
+    // In this test, we pass a normal nubmer to func and expect zero
+    // to be returned due to underflow.
+    int32_t baseExponent = FPBits::exponentBias + mantissaWidth;
+    int32_t expArray[] = {baseExponent + 5, baseExponent + 4, baseExponent + 3,
+                          baseExponent + 2, baseExponent + 1};
+    T x = NormalFloat(-FPBits::exponentBias, mantissa, 0);
+    for (int32_t exp : expArray) {
+      ASSERT_FP_EQ(func(x, -exp), x > 0 ? zero : negZero);
+    }
+  }
+
+  void testNormalOperation(LdExpFunc func) {
+    T valArray[] = {
+        // Normal numbers
+        NormalFloat(100, mantissa, 0), NormalFloat(-100, mantissa, 0),
+        NormalFloat(100, mantissa, 1), NormalFloat(-100, mantissa, 1),
+        // Subnormal numbers
+        NormalFloat(-FPBits::exponentBias, mantissa, 0),
+        NormalFloat(-FPBits::exponentBias, mantissa, 1)};
+    for (int32_t exp = 0; exp <= static_cast<int32_t>(mantissaWidth); ++exp) {
+      for (T x : valArray) {
+        // We compare the result of ldexp with the result
+        // of the native multiplication/division instruction.
+        ASSERT_FP_EQ(func(x, exp), x * (UIntType(1) << exp));
+        ASSERT_FP_EQ(func(x, -exp), x / (UIntType(1) << exp));
+      }
+    }
+
+    // Normal which trigger mantissa overflow.
+    T x = NormalFloat(-FPBits::exponentBias + 1, 2 * NormalFloat::one - 1, 0);
+    ASSERT_FP_EQ(func(x, -1), x / 2);
+    ASSERT_FP_EQ(func(-x, -1), -x / 2);
+  }
+};
+
+#define LIST_LDEXP_TESTS(T, func)                                              \
+  using LdExpTest = LdExpTestTemplate<T>;                                      \
+  TEST_F(LdExpTest, SpecialNumbers) { testSpecialNumbers(&func); }             \
+  TEST_F(LdExpTest, PowersOfTwo) { testPowersOfTwo(&func); }                   \
+  TEST_F(LdExpTest, OverFlow) { testOverflow(&func); }                         \
+  TEST_F(LdExpTest, UnderflowToZeroOnNormal) {                                 \
+    testUnderflowToZeroOnNormal(&func);                                        \
+  }                                                                            \
+  TEST_F(LdExpTest, UnderflowToZeroOnSubnormal) {                              \
+    testUnderflowToZeroOnSubnormal(&func);                                     \
+  }                                                                            \
+  TEST_F(LdExpTest, NormalOperation) { testNormalOperation(&func); }
+
+#endif // LLVM_LIBC_TEST_SRC_MATH_LDEXPTEST_H

diff  --git a/libc/test/src/math/ldexp_test.cpp b/libc/test/src/math/ldexp_test.cpp
new file mode 100644
index 000000000000..0f5974c018f7
--- /dev/null
+++ b/libc/test/src/math/ldexp_test.cpp
@@ -0,0 +1,21 @@
+//===-- Unittests for ldexp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "LdExpTest.h"
+
+#include "include/math.h"
+#include "src/math/ldexp.h"
+#include "utils/CPP/Functional.h"
+#include "utils/FPUtil/FPBits.h"
+#include "utils/FPUtil/ManipulationFunctions.h"
+#include "utils/FPUtil/TestHelpers.h"
+#include "utils/UnitTest/Test.h"
+
+#include <limits.h>
+
+LIST_LDEXP_TESTS(double, __llvm_libc::ldexp)

diff  --git a/libc/test/src/math/ldexpf_test.cpp b/libc/test/src/math/ldexpf_test.cpp
new file mode 100644
index 000000000000..d9a44b3f4125
--- /dev/null
+++ b/libc/test/src/math/ldexpf_test.cpp
@@ -0,0 +1,21 @@
+//===-- Unittests for ldexpf ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "LdExpTest.h"
+
+#include "include/math.h"
+#include "src/math/ldexpf.h"
+#include "utils/CPP/Functional.h"
+#include "utils/FPUtil/FPBits.h"
+#include "utils/FPUtil/ManipulationFunctions.h"
+#include "utils/FPUtil/TestHelpers.h"
+#include "utils/UnitTest/Test.h"
+
+#include <limits.h>
+
+LIST_LDEXP_TESTS(float, __llvm_libc::ldexpf)

diff  --git a/libc/test/src/math/ldexpl_test.cpp b/libc/test/src/math/ldexpl_test.cpp
new file mode 100644
index 000000000000..69444b6edbaf
--- /dev/null
+++ b/libc/test/src/math/ldexpl_test.cpp
@@ -0,0 +1,21 @@
+//===-- Unittests for ldexpl ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "LdExpTest.h"
+
+#include "include/math.h"
+#include "src/math/ldexpl.h"
+#include "utils/CPP/Functional.h"
+#include "utils/FPUtil/FPBits.h"
+#include "utils/FPUtil/ManipulationFunctions.h"
+#include "utils/FPUtil/TestHelpers.h"
+#include "utils/UnitTest/Test.h"
+
+#include <limits.h>
+
+LIST_LDEXP_TESTS(long double, __llvm_libc::ldexpl)

diff  --git a/libc/utils/FPUtil/ManipulationFunctions.h b/libc/utils/FPUtil/ManipulationFunctions.h
index bfd24c2cc481..6ca33e859d65 100644
--- a/libc/utils/FPUtil/ManipulationFunctions.h
+++ b/libc/utils/FPUtil/ManipulationFunctions.h
@@ -116,6 +116,30 @@ static inline T logb(T x) {
   return normal.exponent;
 }
 
+template <typename T,
+          cpp::EnableIfType<cpp::IsFloatingPointType<T>::Value, int> = 0>
+static inline T ldexp(T x, int exp) {
+  FPBits<T> bits(x);
+  if (bits.isZero() || bits.isInfOrNaN() || exp == 0)
+    return x;
+
+  // NormalFloat uses int32_t to store the true exponent value. We should ensure
+  // that adding |exp| to it does not lead to integer rollover. But, we |exp|
+  // value is larger the exponent range for type T, then we can return infinity
+  // early.
+  if (exp > FPBits<T>::maxExponent)
+    return bits.sign ? FPBits<T>::negInf() : FPBits<T>::inf();
+
+  // Similarly on the negative side.
+  if (exp < -FPBits<T>::maxExponent)
+    return bits.sign ? FPBits<T>::negZero() : FPBits<T>::zero();
+
+  // For all other values, NormalFloat to T conversion handles it the right way.
+  NormalFloat<T> normal(bits);
+  normal.exponent += exp;
+  return normal;
+}
+
 } // namespace fputil
 } // namespace __llvm_libc
 

diff  --git a/libc/utils/FPUtil/NormalFloat.h b/libc/utils/FPUtil/NormalFloat.h
index e0e691125431..f71951941d81 100644
--- a/libc/utils/FPUtil/NormalFloat.h
+++ b/libc/utils/FPUtil/NormalFloat.h
@@ -93,30 +93,47 @@ template <typename T> struct NormalFloat {
     // Max exponent is of the form 0xFF...E. That is why -2 and not -1.
     constexpr int maxExponentValue = (1 << ExponentWidth<T>::value) - 2;
     if (biasedExponent > maxExponentValue) {
-      // TODO: Should infinity with the correct sign be returned?
-      return FPBits<T>::buildNaN(1);
+      return sign ? FPBits<T>::negInf() : FPBits<T>::inf();
     }
 
     FPBits<T> result(T(0.0));
+    result.sign = sign;
 
     constexpr int subnormalExponent = -FPBits<T>::exponentBias + 1;
     if (exponent < subnormalExponent) {
       unsigned shift = subnormalExponent - exponent;
-      if (shift <= MantissaWidth<T>::value) {
+      // Since exponent > subnormalExponent, shift is strictly greater than
+      // zero.
+      if (shift <= MantissaWidth<T>::value + 1) {
         // Generate a subnormal number. Might lead to loss of precision.
+        // We round to nearest and round halfway cases to even.
+        const UIntType shiftOutMask = (UIntType(1) << shift) - 1;
+        const UIntType shiftOutValue = mantissa & shiftOutMask;
+        const UIntType halfwayValue = UIntType(1) << (shift - 1);
         result.exponent = 0;
         result.mantissa = mantissa >> shift;
-        result.sign = sign;
+        UIntType newMantissa = result.mantissa;
+        if (shiftOutValue > halfwayValue) {
+          newMantissa += 1;
+        } else if (shiftOutValue == halfwayValue) {
+          // Round to even.
+          if (result.mantissa & 0x1)
+            newMantissa += 1;
+        }
+        result.mantissa = newMantissa;
+        // Adding 1 to mantissa can lead to overflow. This can only happen if
+        // mantissa was all ones (0b111..11). For such a case, we will carry
+        // the overflow into the exponent.
+        if (newMantissa == one)
+          result.exponent = 1;
         return result;
       } else {
-        // TODO: Should zero with the correct sign be returned?
-        return FPBits<T>::buildNaN(1);
+        return result;
       }
     }
 
     result.exponent = exponent + FPBits<T>::exponentBias;
     result.mantissa = mantissa;
-    result.sign = sign;
     return result;
   }
 
@@ -192,32 +209,50 @@ template <> inline NormalFloat<long double>::operator long double() const {
   // Max exponent is of the form 0xFF...E. That is why -2 and not -1.
   constexpr int maxExponentValue = (1 << ExponentWidth<long double>::value) - 2;
   if (biasedExponent > maxExponentValue) {
-    // TODO: Should infinity with the correct sign be returned?
-    return FPBits<long double>::buildNaN(1);
+    return sign ? FPBits<long double>::negInf() : FPBits<long double>::inf();
   }
 
   FPBits<long double> result(0.0l);
+  result.sign = sign;
 
   constexpr int subnormalExponent = -FPBits<long double>::exponentBias + 1;
   if (exponent < subnormalExponent) {
     unsigned shift = subnormalExponent - exponent;
-    if (shift <= MantissaWidth<long double>::value) {
+    if (shift <= MantissaWidth<long double>::value + 1) {
       // Generate a subnormal number. Might lead to loss of precision.
+      // We round to nearest and round halfway cases to even.
+      const UIntType shiftOutMask = (UIntType(1) << shift) - 1;
+      const UIntType shiftOutValue = mantissa & shiftOutMask;
+      const UIntType halfwayValue = UIntType(1) << (shift - 1);
       result.exponent = 0;
       result.mantissa = mantissa >> shift;
-      result.implicitBit = 0;
-      result.sign = sign;
+      UIntType newMantissa = result.mantissa;
+      if (shiftOutValue > halfwayValue) {
+        newMantissa += 1;
+      } else if (shiftOutValue == halfwayValue) {
+        // Round to even.
+        if (result.mantissa & 0x1)
+          newMantissa += 1;
+      }
+      result.mantissa = newMantissa;
+      // Adding 1 to mantissa can lead to overflow. This can only happen if
+      // mantissa was all ones (0b111..11). For such a case, we will carry
+      // the overflow into the exponent and set the implicit bit to 1.
+      if (newMantissa == one) {
+        result.exponent = 1;
+        result.implicitBit = 1;
+      } else {
+        result.implicitBit = 0;
+      }
       return result;
     } else {
-      // TODO: Should zero with the correct sign be returned?
-      return FPBits<long double>::buildNaN(1);
+      return result;
     }
   }
 
   result.exponent = biasedExponent;
   result.mantissa = mantissa;
   result.implicitBit = 1;
-  result.sign = sign;
   return result;
 }
 #endif


        


More information about the libc-commits mailing list