[libc-commits] [libc] bb8f258 - [libc] Add implementations of ldexp[f|l].
Siva Chandra Reddy via libc-commits
libc-commits at lists.llvm.org
Tue Nov 17 15:06:05 PST 2020
Author: Siva Chandra Reddy
Date: 2020-11-17T15:05:42-08:00
New Revision: bb8f2585c6eab263916757435d71df16d92de4a8
URL: https://github.com/llvm/llvm-project/commit/bb8f2585c6eab263916757435d71df16d92de4a8
DIFF: https://github.com/llvm/llvm-project/commit/bb8f2585c6eab263916757435d71df16d92de4a8.diff
LOG: [libc] Add implementations of ldexp[f|l].
The rounding behavior of NormalFloat to float format has been changed
to round to nearest. Also, a bug in NormalFloat to subnormal number
conversion has been fixed.
Reviewed By: lntue
Differential Revision: https://reviews.llvm.org/D91591
Added:
libc/src/math/ldexp.cpp
libc/src/math/ldexp.h
libc/src/math/ldexpf.cpp
libc/src/math/ldexpf.h
libc/src/math/ldexpl.cpp
libc/src/math/ldexpl.h
libc/test/src/math/LdExpTest.h
libc/test/src/math/ldexp_test.cpp
libc/test/src/math/ldexpf_test.cpp
libc/test/src/math/ldexpl_test.cpp
Modified:
libc/config/linux/aarch64/entrypoints.txt
libc/config/linux/x86_64/entrypoints.txt
libc/spec/stdc.td
libc/src/math/CMakeLists.txt
libc/test/src/math/CMakeLists.txt
libc/utils/FPUtil/ManipulationFunctions.h
libc/utils/FPUtil/NormalFloat.h
Removed:
################################################################################
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 6ee0e98541c4..3a34d95b36e1 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -71,6 +71,9 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.ilogb
libc.src.math.ilogbf
libc.src.math.ilogbl
+ libc.src.math.ldexp
+ libc.src.math.ldexpf
+ libc.src.math.ldexpl
libc.src.math.logb
libc.src.math.logbf
libc.src.math.logbl
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index c1a80288a6a6..a0773ef0fce6 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -104,6 +104,9 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.ilogb
libc.src.math.ilogbf
libc.src.math.ilogbl
+ libc.src.math.ldexp
+ libc.src.math.ldexpf
+ libc.src.math.ldexpl
libc.src.math.logb
libc.src.math.logbf
libc.src.math.logbl
diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index c587f5b73013..7f70e17cfd52 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -284,6 +284,10 @@ def StdC : StandardSpec<"stdc"> {
FunctionSpec<"ilogbf", RetValSpec<IntType>, [ArgSpec<FloatType>]>,
FunctionSpec<"ilogbl", RetValSpec<IntType>, [ArgSpec<LongDoubleType>]>,
+ FunctionSpec<"ldexp", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<IntType>]>,
+ FunctionSpec<"ldexpf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<IntType>]>,
+ FunctionSpec<"ldexpl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<IntType>]>,
+
FunctionSpec<"logb", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
FunctionSpec<"logbf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
FunctionSpec<"logbl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>]>,
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index b7614cfec612..fd75a3b48bcb 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -378,6 +378,42 @@ add_entrypoint_object(
-O2
)
+add_entrypoint_object(
+ ldexp
+ SRCS
+ ldexp.cpp
+ HDRS
+ ldexp.h
+ DEPENDS
+ libc.utils.FPUtil.fputil
+ COMPILE_OPTIONS
+ -O2
+)
+
+add_entrypoint_object(
+ ldexpf
+ SRCS
+ ldexpf.cpp
+ HDRS
+ ldexpf.h
+ DEPENDS
+ libc.utils.FPUtil.fputil
+ COMPILE_OPTIONS
+ -O2
+)
+
+add_entrypoint_object(
+ ldexpl
+ SRCS
+ ldexpl.cpp
+ HDRS
+ ldexpl.h
+ DEPENDS
+ libc.utils.FPUtil.fputil
+ COMPILE_OPTIONS
+ -O2
+)
+
add_entrypoint_object(
logb
SRCS
diff --git a/libc/src/math/ldexp.cpp b/libc/src/math/ldexp.cpp
new file mode 100644
index 000000000000..1eefd1037cde
--- /dev/null
+++ b/libc/src/math/ldexp.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of ldexp function ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/common.h"
+#include "utils/FPUtil/ManipulationFunctions.h"
+
+namespace __llvm_libc {
+
+double LLVM_LIBC_ENTRYPOINT(ldexp)(double x, int exp) {
+ return fputil::ldexp(x, exp);
+}
+
+} // namespace __llvm_libc
diff --git a/libc/src/math/ldexp.h b/libc/src/math/ldexp.h
new file mode 100644
index 000000000000..74f9a600666f
--- /dev/null
+++ b/libc/src/math/ldexp.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for ldexp -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_LDEXP_H
+#define LLVM_LIBC_SRC_MATH_LDEXP_H
+
+namespace __llvm_libc {
+
+double ldexp(double x, int exp);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_MATH_LDEXP_H
diff --git a/libc/src/math/ldexpf.cpp b/libc/src/math/ldexpf.cpp
new file mode 100644
index 000000000000..5c4425450430
--- /dev/null
+++ b/libc/src/math/ldexpf.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of ldexpf function ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/common.h"
+#include "utils/FPUtil/ManipulationFunctions.h"
+
+namespace __llvm_libc {
+
+float LLVM_LIBC_ENTRYPOINT(ldexpf)(float x, int exp) {
+ return fputil::ldexp(x, exp);
+}
+
+} // namespace __llvm_libc
diff --git a/libc/src/math/ldexpf.h b/libc/src/math/ldexpf.h
new file mode 100644
index 000000000000..f30d60155f1f
--- /dev/null
+++ b/libc/src/math/ldexpf.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for ldexpf ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_LDEXPF_H
+#define LLVM_LIBC_SRC_MATH_LDEXPF_H
+
+namespace __llvm_libc {
+
+float ldexpf(float x, int exp);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_MATH_LDEXPF_H
diff --git a/libc/src/math/ldexpl.cpp b/libc/src/math/ldexpl.cpp
new file mode 100644
index 000000000000..10ec66024fc0
--- /dev/null
+++ b/libc/src/math/ldexpl.cpp
@@ -0,0 +1,18 @@
+//===-- Implementation of ldexpl function ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/common.h"
+#include "utils/FPUtil/ManipulationFunctions.h"
+
+namespace __llvm_libc {
+
+long double LLVM_LIBC_ENTRYPOINT(ldexpl)(long double x, int exp) {
+ return fputil::ldexp(x, exp);
+}
+
+} // namespace __llvm_libc
diff --git a/libc/src/math/ldexpl.h b/libc/src/math/ldexpl.h
new file mode 100644
index 000000000000..bf8435b1a21d
--- /dev/null
+++ b/libc/src/math/ldexpl.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for ldexpl ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_ldexpl_H
+#define LLVM_LIBC_SRC_MATH_ldexpl_H
+
+namespace __llvm_libc {
+
+long double ldexpl(long double x, int exp);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_MATH_ldexpl_H
diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
index 0f5405cb3a26..2220cef00791 100644
--- a/libc/test/src/math/CMakeLists.txt
+++ b/libc/test/src/math/CMakeLists.txt
@@ -412,6 +412,48 @@ add_fp_unittest(
libc.utils.FPUtil.fputil
)
+add_fp_unittest(
+ ldexp_test
+ SUITE
+ libc_math_unittests
+ SRCS
+ ldexp_test.cpp
+ HDRS
+ LdExpTest.h
+ DEPENDS
+ libc.include.math
+ libc.src.math.ldexp
+ libc.utils.FPUtil.fputil
+)
+
+add_fp_unittest(
+ ldexpf_test
+ SUITE
+ libc_math_unittests
+ SRCS
+ ldexpf_test.cpp
+ HDRS
+ LdExpTest.h
+ DEPENDS
+ libc.include.math
+ libc.src.math.ldexpf
+ libc.utils.FPUtil.fputil
+)
+
+add_fp_unittest(
+ ldexpl_test
+ SUITE
+ libc_math_unittests
+ SRCS
+ ldexpl_test.cpp
+ HDRS
+ LdExpTest.h
+ DEPENDS
+ libc.include.math
+ libc.src.math.ldexpl
+ libc.utils.FPUtil.fputil
+)
+
add_fp_unittest(
logb_test
SUITE
diff --git a/libc/test/src/math/LdExpTest.h b/libc/test/src/math/LdExpTest.h
new file mode 100644
index 000000000000..e1976d8714f5
--- /dev/null
+++ b/libc/test/src/math/LdExpTest.h
@@ -0,0 +1,131 @@
+//===-- Utility class to test
diff erent flavors of ldexp --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_TEST_SRC_MATH_LDEXPTEST_H
+#define LLVM_LIBC_TEST_SRC_MATH_LDEXPTEST_H
+
+#include "utils/FPUtil/FPBits.h"
+#include "utils/FPUtil/NormalFloat.h"
+#include "utils/FPUtil/TestHelpers.h"
+#include "utils/UnitTest/Test.h"
+
+#include <limits.h>
+#include <math.h>
+#include <stdint.h>
+
+template <typename T>
+class LdExpTestTemplate : public __llvm_libc::testing::Test {
+ using FPBits = __llvm_libc::fputil::FPBits<T>;
+ using NormalFloat = __llvm_libc::fputil::NormalFloat<T>;
+ using UIntType = typename FPBits::UIntType;
+ static constexpr UIntType mantissaWidth =
+ __llvm_libc::fputil::MantissaWidth<T>::value;
+ // A normalized mantissa to be used with tests.
+ static constexpr UIntType mantissa = NormalFloat::one + 0x1234;
+
+ const T zero = __llvm_libc::fputil::FPBits<T>::zero();
+ const T negZero = __llvm_libc::fputil::FPBits<T>::negZero();
+ const T inf = __llvm_libc::fputil::FPBits<T>::inf();
+ const T negInf = __llvm_libc::fputil::FPBits<T>::negInf();
+ const T nan = __llvm_libc::fputil::FPBits<T>::buildNaN(1);
+
+public:
+ typedef T (*LdExpFunc)(T, int);
+
+ void testSpecialNumbers(LdExpFunc func) {
+ int expArray[5] = {-INT_MAX - 1, -10, 0, 10, INT_MAX};
+ for (int exp : expArray) {
+ ASSERT_FP_EQ(zero, func(zero, exp));
+ ASSERT_FP_EQ(negZero, func(negZero, exp));
+ ASSERT_FP_EQ(inf, func(inf, exp));
+ ASSERT_FP_EQ(negInf, func(negInf, exp));
+ ASSERT_NE(isnan(func(nan, exp)), 0);
+ }
+ }
+
+ void testPowersOfTwo(LdExpFunc func) {
+ int32_t expArray[5] = {1, 2, 3, 4, 5};
+ int32_t valArray[6] = {1, 2, 4, 8, 16, 32};
+ for (int32_t exp : expArray) {
+ for (int32_t val : valArray) {
+ ASSERT_FP_EQ(T(val << exp), func(T(val), exp));
+ ASSERT_FP_EQ(T(-1 * (val << exp)), func(T(-val), exp));
+ }
+ }
+ }
+
+ void testOverflow(LdExpFunc func) {
+ NormalFloat x(FPBits::maxExponent - 10, NormalFloat::one + 0xF00BA, 0);
+ for (int32_t exp = 10; exp < 100; ++exp) {
+ ASSERT_FP_EQ(inf, func(T(x), exp));
+ ASSERT_FP_EQ(negInf, func(-T(x), exp));
+ }
+ }
+
+ void testUnderflowToZeroOnNormal(LdExpFunc func) {
+ // In this test, we pass a normal nubmer to func and expect zero
+ // to be returned due to underflow.
+ int32_t baseExponent = FPBits::exponentBias + mantissaWidth;
+ int32_t expArray[] = {baseExponent + 5, baseExponent + 4, baseExponent + 3,
+ baseExponent + 2, baseExponent + 1};
+ T x = NormalFloat(0, mantissa, 0);
+ for (int32_t exp : expArray) {
+ ASSERT_FP_EQ(func(x, -exp), x > 0 ? zero : negZero);
+ }
+ }
+
+ void testUnderflowToZeroOnSubnormal(LdExpFunc func) {
+ // In this test, we pass a normal nubmer to func and expect zero
+ // to be returned due to underflow.
+ int32_t baseExponent = FPBits::exponentBias + mantissaWidth;
+ int32_t expArray[] = {baseExponent + 5, baseExponent + 4, baseExponent + 3,
+ baseExponent + 2, baseExponent + 1};
+ T x = NormalFloat(-FPBits::exponentBias, mantissa, 0);
+ for (int32_t exp : expArray) {
+ ASSERT_FP_EQ(func(x, -exp), x > 0 ? zero : negZero);
+ }
+ }
+
+ void testNormalOperation(LdExpFunc func) {
+ T valArray[] = {
+ // Normal numbers
+ NormalFloat(100, mantissa, 0), NormalFloat(-100, mantissa, 0),
+ NormalFloat(100, mantissa, 1), NormalFloat(-100, mantissa, 1),
+ // Subnormal numbers
+ NormalFloat(-FPBits::exponentBias, mantissa, 0),
+ NormalFloat(-FPBits::exponentBias, mantissa, 1)};
+ for (int32_t exp = 0; exp <= static_cast<int32_t>(mantissaWidth); ++exp) {
+ for (T x : valArray) {
+ // We compare the result of ldexp with the result
+ // of the native multiplication/division instruction.
+ ASSERT_FP_EQ(func(x, exp), x * (UIntType(1) << exp));
+ ASSERT_FP_EQ(func(x, -exp), x / (UIntType(1) << exp));
+ }
+ }
+
+ // Normal which trigger mantissa overflow.
+ T x = NormalFloat(-FPBits::exponentBias + 1, 2 * NormalFloat::one - 1, 0);
+ ASSERT_FP_EQ(func(x, -1), x / 2);
+ ASSERT_FP_EQ(func(-x, -1), -x / 2);
+ }
+};
+
+#define LIST_LDEXP_TESTS(T, func) \
+ using LdExpTest = LdExpTestTemplate<T>; \
+ TEST_F(LdExpTest, SpecialNumbers) { testSpecialNumbers(&func); } \
+ TEST_F(LdExpTest, PowersOfTwo) { testPowersOfTwo(&func); } \
+ TEST_F(LdExpTest, OverFlow) { testOverflow(&func); } \
+ TEST_F(LdExpTest, UnderflowToZeroOnNormal) { \
+ testUnderflowToZeroOnNormal(&func); \
+ } \
+ TEST_F(LdExpTest, UnderflowToZeroOnSubnormal) { \
+ testUnderflowToZeroOnSubnormal(&func); \
+ } \
+ TEST_F(LdExpTest, NormalOperation) { testNormalOperation(&func); }
+
+#endif // LLVM_LIBC_TEST_SRC_MATH_LDEXPTEST_H
diff --git a/libc/test/src/math/ldexp_test.cpp b/libc/test/src/math/ldexp_test.cpp
new file mode 100644
index 000000000000..0f5974c018f7
--- /dev/null
+++ b/libc/test/src/math/ldexp_test.cpp
@@ -0,0 +1,21 @@
+//===-- Unittests for ldexp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "LdExpTest.h"
+
+#include "include/math.h"
+#include "src/math/ldexp.h"
+#include "utils/CPP/Functional.h"
+#include "utils/FPUtil/FPBits.h"
+#include "utils/FPUtil/ManipulationFunctions.h"
+#include "utils/FPUtil/TestHelpers.h"
+#include "utils/UnitTest/Test.h"
+
+#include <limits.h>
+
+LIST_LDEXP_TESTS(double, __llvm_libc::ldexp)
diff --git a/libc/test/src/math/ldexpf_test.cpp b/libc/test/src/math/ldexpf_test.cpp
new file mode 100644
index 000000000000..d9a44b3f4125
--- /dev/null
+++ b/libc/test/src/math/ldexpf_test.cpp
@@ -0,0 +1,21 @@
+//===-- Unittests for ldexpf ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "LdExpTest.h"
+
+#include "include/math.h"
+#include "src/math/ldexpf.h"
+#include "utils/CPP/Functional.h"
+#include "utils/FPUtil/FPBits.h"
+#include "utils/FPUtil/ManipulationFunctions.h"
+#include "utils/FPUtil/TestHelpers.h"
+#include "utils/UnitTest/Test.h"
+
+#include <limits.h>
+
+LIST_LDEXP_TESTS(float, __llvm_libc::ldexpf)
diff --git a/libc/test/src/math/ldexpl_test.cpp b/libc/test/src/math/ldexpl_test.cpp
new file mode 100644
index 000000000000..69444b6edbaf
--- /dev/null
+++ b/libc/test/src/math/ldexpl_test.cpp
@@ -0,0 +1,21 @@
+//===-- Unittests for ldexpl ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "LdExpTest.h"
+
+#include "include/math.h"
+#include "src/math/ldexpl.h"
+#include "utils/CPP/Functional.h"
+#include "utils/FPUtil/FPBits.h"
+#include "utils/FPUtil/ManipulationFunctions.h"
+#include "utils/FPUtil/TestHelpers.h"
+#include "utils/UnitTest/Test.h"
+
+#include <limits.h>
+
+LIST_LDEXP_TESTS(long double, __llvm_libc::ldexpl)
diff --git a/libc/utils/FPUtil/ManipulationFunctions.h b/libc/utils/FPUtil/ManipulationFunctions.h
index bfd24c2cc481..6ca33e859d65 100644
--- a/libc/utils/FPUtil/ManipulationFunctions.h
+++ b/libc/utils/FPUtil/ManipulationFunctions.h
@@ -116,6 +116,30 @@ static inline T logb(T x) {
return normal.exponent;
}
+template <typename T,
+ cpp::EnableIfType<cpp::IsFloatingPointType<T>::Value, int> = 0>
+static inline T ldexp(T x, int exp) {
+ FPBits<T> bits(x);
+ if (bits.isZero() || bits.isInfOrNaN() || exp == 0)
+ return x;
+
+ // NormalFloat uses int32_t to store the true exponent value. We should ensure
+ // that adding |exp| to it does not lead to integer rollover. But, we |exp|
+ // value is larger the exponent range for type T, then we can return infinity
+ // early.
+ if (exp > FPBits<T>::maxExponent)
+ return bits.sign ? FPBits<T>::negInf() : FPBits<T>::inf();
+
+ // Similarly on the negative side.
+ if (exp < -FPBits<T>::maxExponent)
+ return bits.sign ? FPBits<T>::negZero() : FPBits<T>::zero();
+
+ // For all other values, NormalFloat to T conversion handles it the right way.
+ NormalFloat<T> normal(bits);
+ normal.exponent += exp;
+ return normal;
+}
+
} // namespace fputil
} // namespace __llvm_libc
diff --git a/libc/utils/FPUtil/NormalFloat.h b/libc/utils/FPUtil/NormalFloat.h
index e0e691125431..f71951941d81 100644
--- a/libc/utils/FPUtil/NormalFloat.h
+++ b/libc/utils/FPUtil/NormalFloat.h
@@ -93,30 +93,47 @@ template <typename T> struct NormalFloat {
// Max exponent is of the form 0xFF...E. That is why -2 and not -1.
constexpr int maxExponentValue = (1 << ExponentWidth<T>::value) - 2;
if (biasedExponent > maxExponentValue) {
- // TODO: Should infinity with the correct sign be returned?
- return FPBits<T>::buildNaN(1);
+ return sign ? FPBits<T>::negInf() : FPBits<T>::inf();
}
FPBits<T> result(T(0.0));
+ result.sign = sign;
constexpr int subnormalExponent = -FPBits<T>::exponentBias + 1;
if (exponent < subnormalExponent) {
unsigned shift = subnormalExponent - exponent;
- if (shift <= MantissaWidth<T>::value) {
+ // Since exponent > subnormalExponent, shift is strictly greater than
+ // zero.
+ if (shift <= MantissaWidth<T>::value + 1) {
// Generate a subnormal number. Might lead to loss of precision.
+ // We round to nearest and round halfway cases to even.
+ const UIntType shiftOutMask = (UIntType(1) << shift) - 1;
+ const UIntType shiftOutValue = mantissa & shiftOutMask;
+ const UIntType halfwayValue = UIntType(1) << (shift - 1);
result.exponent = 0;
result.mantissa = mantissa >> shift;
- result.sign = sign;
+ UIntType newMantissa = result.mantissa;
+ if (shiftOutValue > halfwayValue) {
+ newMantissa += 1;
+ } else if (shiftOutValue == halfwayValue) {
+ // Round to even.
+ if (result.mantissa & 0x1)
+ newMantissa += 1;
+ }
+ result.mantissa = newMantissa;
+ // Adding 1 to mantissa can lead to overflow. This can only happen if
+ // mantissa was all ones (0b111..11). For such a case, we will carry
+ // the overflow into the exponent.
+ if (newMantissa == one)
+ result.exponent = 1;
return result;
} else {
- // TODO: Should zero with the correct sign be returned?
- return FPBits<T>::buildNaN(1);
+ return result;
}
}
result.exponent = exponent + FPBits<T>::exponentBias;
result.mantissa = mantissa;
- result.sign = sign;
return result;
}
@@ -192,32 +209,50 @@ template <> inline NormalFloat<long double>::operator long double() const {
// Max exponent is of the form 0xFF...E. That is why -2 and not -1.
constexpr int maxExponentValue = (1 << ExponentWidth<long double>::value) - 2;
if (biasedExponent > maxExponentValue) {
- // TODO: Should infinity with the correct sign be returned?
- return FPBits<long double>::buildNaN(1);
+ return sign ? FPBits<long double>::negInf() : FPBits<long double>::inf();
}
FPBits<long double> result(0.0l);
+ result.sign = sign;
constexpr int subnormalExponent = -FPBits<long double>::exponentBias + 1;
if (exponent < subnormalExponent) {
unsigned shift = subnormalExponent - exponent;
- if (shift <= MantissaWidth<long double>::value) {
+ if (shift <= MantissaWidth<long double>::value + 1) {
// Generate a subnormal number. Might lead to loss of precision.
+ // We round to nearest and round halfway cases to even.
+ const UIntType shiftOutMask = (UIntType(1) << shift) - 1;
+ const UIntType shiftOutValue = mantissa & shiftOutMask;
+ const UIntType halfwayValue = UIntType(1) << (shift - 1);
result.exponent = 0;
result.mantissa = mantissa >> shift;
- result.implicitBit = 0;
- result.sign = sign;
+ UIntType newMantissa = result.mantissa;
+ if (shiftOutValue > halfwayValue) {
+ newMantissa += 1;
+ } else if (shiftOutValue == halfwayValue) {
+ // Round to even.
+ if (result.mantissa & 0x1)
+ newMantissa += 1;
+ }
+ result.mantissa = newMantissa;
+ // Adding 1 to mantissa can lead to overflow. This can only happen if
+ // mantissa was all ones (0b111..11). For such a case, we will carry
+ // the overflow into the exponent and set the implicit bit to 1.
+ if (newMantissa == one) {
+ result.exponent = 1;
+ result.implicitBit = 1;
+ } else {
+ result.implicitBit = 0;
+ }
return result;
} else {
- // TODO: Should zero with the correct sign be returned?
- return FPBits<long double>::buildNaN(1);
+ return result;
}
}
result.exponent = biasedExponent;
result.mantissa = mantissa;
result.implicitBit = 1;
- result.sign = sign;
return result;
}
#endif
More information about the libc-commits
mailing list