[libc-commits] [libc] ee89927 - [libc] Implement double precision FMA for targets without FMA instructions.
Tue Ly via libc-commits
libc-commits at lists.llvm.org
Fri Jun 10 17:57:41 PDT 2022
Author: Tue Ly
Date: 2022-06-10T20:57:27-04:00
New Revision: ee8992770754d186c9217d35cccd339efc55a36b
URL: https://github.com/llvm/llvm-project/commit/ee8992770754d186c9217d35cccd339efc55a36b
DIFF: https://github.com/llvm/llvm-project/commit/ee8992770754d186c9217d35cccd339efc55a36b.diff
LOG: [libc] Implement double precision FMA for targets without FMA instructions.
Implement double precision FMA (Fused Multiply-Add) for targets without
FMA instructions using __uint128_t to store the intermediate results.
Reviewed By: michaelrj, sivachandra
Differential Revision: https://reviews.llvm.org/D124495
Added:
Modified:
libc/src/__support/FPUtil/FMA.h
libc/src/__support/FPUtil/generic/FMA.h
libc/src/math/CMakeLists.txt
libc/test/src/math/CMakeLists.txt
libc/test/src/math/FmaTest.h
libc/test/src/math/fma_test.cpp
libc/test/src/math/fmaf_test.cpp
Removed:
################################################################################
diff --git a/libc/src/__support/FPUtil/FMA.h b/libc/src/__support/FPUtil/FMA.h
index 6823dd0f897b5..a3c1729dbe774 100644
--- a/libc/src/__support/FPUtil/FMA.h
+++ b/libc/src/__support/FPUtil/FMA.h
@@ -27,11 +27,7 @@
namespace __llvm_libc {
namespace fputil {
-// We have a generic implementation available only for single precision fma as
-// we restrict it to float values for now.
-template <typename T>
-static inline cpp::EnableIfType<cpp::IsSame<T, float>::Value, T> fma(T x, T y,
- T z) {
+template <typename T> static inline T fma(T x, T y, T z) {
return generic::fma(x, y, z);
}
diff --git a/libc/src/__support/FPUtil/generic/FMA.h b/libc/src/__support/FPUtil/generic/FMA.h
index 78b640c2c1a13..3dad2adf10df9 100644
--- a/libc/src/__support/FPUtil/generic/FMA.h
+++ b/libc/src/__support/FPUtil/generic/FMA.h
@@ -9,16 +9,24 @@
#ifndef LLVM_LIBC_SRC_SUPPORT_FPUTIL_GENERIC_FMA_H
#define LLVM_LIBC_SRC_SUPPORT_FPUTIL_GENERIC_FMA_H
+#include "src/__support/CPP/Bit.h"
#include "src/__support/CPP/TypeTraits.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/FloatProperties.h"
+#include "src/__support/FPUtil/builtin_wrappers.h"
+#include "src/__support/common.h"
namespace __llvm_libc {
namespace fputil {
namespace generic {
-template <typename T>
-static inline cpp::EnableIfType<cpp::IsSame<T, float>::Value, T> fma(T x, T y,
- T z) {
+template <typename T> static inline T fma(T x, T y, T z);
+
+// TODO(lntue): Implement fmaf that is correctly rounded to all rounding modes.
+// The implementation below only is only correct for the default rounding mode,
+// round-to-nearest tie-to-even.
+template <> inline float fma<float>(float x, float y, float z) {
// Product is exact.
double prod = static_cast<double>(x) * static_cast<double>(y);
double z_d = static_cast<double>(z);
@@ -66,6 +74,215 @@ static inline cpp::EnableIfType<cpp::IsSame<T, float>::Value, T> fma(T x, T y,
return static_cast<float>(static_cast<double>(bit_sum));
}
+namespace internal {
+
+// Extract the sticky bits and shift the `mantissa` to the right by
+// `shift_length`.
+static inline bool shift_mantissa(int shift_length, __uint128_t &mant) {
+ if (shift_length >= 128) {
+ mant = 0;
+ return true; // prod_mant is non-zero.
+ }
+ __uint128_t mask = (__uint128_t(1) << shift_length) - 1;
+ bool sticky_bits = (mant & mask) != 0;
+ mant >>= shift_length;
+ return sticky_bits;
+}
+
+} // namespace internal
+
+template <> inline double fma<double>(double x, double y, double z) {
+ using FPBits = fputil::FPBits<double>;
+ using FloatProp = fputil::FloatProperties<double>;
+
+ if (unlikely(x == 0 || y == 0 || z == 0)) {
+ return x * y + z;
+ }
+
+ int x_exp = 0;
+ int y_exp = 0;
+ int z_exp = 0;
+
+ // Normalize denormal inputs.
+ if (unlikely(FPBits(x).get_unbiased_exponent() == 0)) {
+ x_exp -= 52;
+ x *= 0x1.0p+52;
+ }
+ if (unlikely(FPBits(y).get_unbiased_exponent() == 0)) {
+ y_exp -= 52;
+ y *= 0x1.0p+52;
+ }
+ if (unlikely(FPBits(z).get_unbiased_exponent() == 0)) {
+ z_exp -= 52;
+ z *= 0x1.0p+52;
+ }
+
+ FPBits x_bits(x), y_bits(y), z_bits(z);
+ bool x_sign = x_bits.get_sign();
+ bool y_sign = y_bits.get_sign();
+ bool z_sign = z_bits.get_sign();
+ bool prod_sign = x_sign != y_sign;
+ x_exp += x_bits.get_unbiased_exponent();
+ y_exp += y_bits.get_unbiased_exponent();
+ z_exp += z_bits.get_unbiased_exponent();
+
+ if (unlikely(x_exp == FPBits::MAX_EXPONENT || y_exp == FPBits::MAX_EXPONENT ||
+ z_exp == FPBits::MAX_EXPONENT))
+ return x * y + z;
+
+ // Extract mantissa and append hidden leading bits.
+ __uint128_t x_mant = x_bits.get_mantissa() | FPBits::MIN_NORMAL;
+ __uint128_t y_mant = y_bits.get_mantissa() | FPBits::MIN_NORMAL;
+ __uint128_t z_mant = z_bits.get_mantissa() | FPBits::MIN_NORMAL;
+
+ // If the exponent of the product x*y > the exponent of z, then no extra
+ // precision beside the entire product x*y is needed. On the other hand, when
+ // the exponent of z >= the exponent of the product x*y, the worst-case that
+ // we need extra precision is when there is cancellation and the most
+ // significant bit of the product is aligned exactly with the second most
+ // significant bit of z:
+ // z : 10aa...a
+ // - prod : 1bb...bb....b
+ // In that case, in order to store the exact result, we need at least
+ // (Length of prod) - (MantissaLength of z) = 2*(52 + 1) - 52 = 54.
+ // Overall, before aligning the mantissas and exponents, we can simply left-
+ // shift the mantissa of z by at least 54, and left-shift the product of x*y
+ // by (that amount - 52). After that, it is enough to align the least
+ // significant bit, given that we keep track of the round and sticky bits
+ // after the least significant bit.
+ // We pick shifting z_mant by 64 bits so that technically we can simply use
+ // the original mantissa as high part when constructing 128-bit z_mant. So the
+ // mantissa of prod will be left-shifted by 64 - 54 = 10 initially.
+
+ __uint128_t prod_mant = x_mant * y_mant << 10;
+ int prod_lsb_exp =
+ x_exp + y_exp -
+ (FPBits::EXPONENT_BIAS + 2 * MantissaWidth<double>::VALUE + 10);
+
+ z_mant <<= 64;
+ int z_lsb_exp = z_exp - (MantissaWidth<double>::VALUE + 64);
+ bool round_bit = false;
+ bool sticky_bits = false;
+ bool z_shifted = false;
+
+ // Align exponents.
+ if (prod_lsb_exp < z_lsb_exp) {
+ sticky_bits = internal::shift_mantissa(z_lsb_exp - prod_lsb_exp, prod_mant);
+ prod_lsb_exp = z_lsb_exp;
+ } else if (z_lsb_exp < prod_lsb_exp) {
+ z_shifted = true;
+ sticky_bits = internal::shift_mantissa(prod_lsb_exp - z_lsb_exp, z_mant);
+ }
+
+ // Perform the addition:
+ // (-1)^prod_sign * prod_mant + (-1)^z_sign * z_mant.
+ // The final result will be stored in prod_sign and prod_mant.
+ if (prod_sign == z_sign) {
+ // Effectively an addition.
+ prod_mant += z_mant;
+ } else {
+ // Subtraction cases.
+ if (prod_mant >= z_mant) {
+ if (z_shifted && sticky_bits) {
+ // Add 1 more to the subtrahend so that the sticky bits remain
+ // positive. This would simplify the rounding logic.
+ ++z_mant;
+ }
+ prod_mant -= z_mant;
+ } else {
+ if (!z_shifted && sticky_bits) {
+ // Add 1 more to the subtrahend so that the sticky bits remain
+ // positive. This would simplify the rounding logic.
+ ++prod_mant;
+ }
+ prod_mant = z_mant - prod_mant;
+ prod_sign = z_sign;
+ }
+ }
+
+ uint64_t result = 0;
+ int r_exp = 0; // Unbiased exponent of the result
+
+ // Normalize the result.
+ if (prod_mant != 0) {
+ uint64_t prod_hi = static_cast<uint64_t>(prod_mant >> 64);
+ int lead_zeros =
+ prod_hi ? clz(prod_hi) : 64 + clz(static_cast<uint64_t>(prod_mant));
+ // Move the leading 1 to the most significant bit.
+ prod_mant <<= lead_zeros;
+ // The lower 64 bits are always sticky bits after moving the leading 1 to
+ // the most significant bit.
+ sticky_bits |= (static_cast<uint64_t>(prod_mant) != 0);
+ result = static_cast<uint64_t>(prod_mant >> 64);
+ // Change prod_lsb_exp the be the exponent of the least significant bit of
+ // the result.
+ prod_lsb_exp += 64 - lead_zeros;
+ r_exp = prod_lsb_exp + 63;
+
+ if (r_exp > 0) {
+ // The result is normal. We will shift the mantissa to the right by
+ // 63 - 52 = 11 bits (from the locations of the most significant bit).
+ // Then the rounding bit will correspond the the 11th bit, and the lowest
+ // 10 bits are merged into sticky bits.
+ round_bit = (result & 0x0400ULL) != 0;
+ sticky_bits |= (result & 0x03ffULL) != 0;
+ result >>= 11;
+ } else {
+ if (r_exp < -52) {
+ // The result is smaller than 1/2 of the smallest denormal number.
+ sticky_bits = true; // since the result is non-zero.
+ result = 0;
+ } else {
+ // The result is denormal.
+ uint64_t mask = 1ULL << (11 - r_exp);
+ round_bit = (result & mask) != 0;
+ sticky_bits |= (result & (mask - 1)) != 0;
+ if (r_exp > -52)
+ result >>= 12 - r_exp;
+ else
+ result = 0;
+ }
+
+ r_exp = 0;
+ }
+ } else {
+ // Return +0.0 when there is exact cancellation, i.e., x*y == -z exactly.
+ prod_sign = false;
+ }
+
+ // Finalize the result.
+ int round_mode = fputil::get_round();
+ if (unlikely(r_exp >= FPBits::MAX_EXPONENT)) {
+ if ((round_mode == FE_TOWARDZERO) ||
+ (round_mode == FE_UPWARD && prod_sign) ||
+ (round_mode == FE_DOWNWARD && !prod_sign)) {
+ result = FPBits::MAX_NORMAL;
+ return prod_sign ? -bit_cast<double>(result) : bit_cast<double>(result);
+ }
+ return prod_sign ? static_cast<double>(FPBits::neg_inf())
+ : static_cast<double>(FPBits::inf());
+ }
+
+ // Remove hidden bit and append the exponent field and sign bit.
+ result = (result & FloatProp::MANTISSA_MASK) |
+ (static_cast<uint64_t>(r_exp) << FloatProp::MANTISSA_WIDTH);
+ if (prod_sign) {
+ result |= FloatProp::SIGN_MASK;
+ }
+
+ // Rounding.
+ if (round_mode == FE_TONEAREST) {
+ if (round_bit && (sticky_bits || ((result & 1) != 0)))
+ ++result;
+ } else if ((round_mode == FE_UPWARD && !prod_sign) ||
+ (round_mode == FE_DOWNWARD && prod_sign)) {
+ if (round_bit || sticky_bits)
+ ++result;
+ }
+
+ return bit_cast<double>(result);
+}
+
} // namespace generic
} // namespace fputil
} // namespace __llvm_libc
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index e33022c7e1512..60ccfb2159a1f 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -64,8 +64,6 @@ add_entrypoint_object(
libc.src.__support.FPUtil.fma
COMPILE_OPTIONS
-O3
- FLAGS
- FMA_OPT__ONLY
)
add_math_entrypoint_object(ceil)
diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
index 68a9aed825a9f..ddc02b2fc7786 100644
--- a/libc/test/src/math/CMakeLists.txt
+++ b/libc/test/src/math/CMakeLists.txt
@@ -1151,6 +1151,8 @@ add_fp_unittest(
libc.src.__support.FPUtil.fputil
)
+# TODO(lntue): The current implementation of fputil::general::fma<float> is only
+# correctly rounded for the default rounding mode round-to-nearest tie-to-even.
add_fp_unittest(
fmaf_test
NEED_MPFR
@@ -1162,6 +1164,8 @@ add_fp_unittest(
libc.include.math
libc.src.math.fmaf
libc.src.__support.FPUtil.fputil
+ FLAGS
+ FMA_OPT__ONLY
)
add_fp_unittest(
diff --git a/libc/test/src/math/FmaTest.h b/libc/test/src/math/FmaTest.h
index 2d04989400f48..959c4e837a778 100644
--- a/libc/test/src/math/FmaTest.h
+++ b/libc/test/src/math/FmaTest.h
@@ -61,6 +61,9 @@ class FmaTestTemplate : public __llvm_libc::testing::Test {
// Test overflow.
T z = T(FPBits(FPBits::MAX_NORMAL));
EXPECT_FP_EQ(func(T(1.75), z, -z), T(0.75) * z);
+ // Exact cancellation.
+ EXPECT_FP_EQ(func(T(3.0), T(5.0), -T(15.0)), T(0.0));
+ EXPECT_FP_EQ(func(T(-3.0), T(5.0), T(15.0)), T(0.0));
}
void test_subnormal_range(Func func) {
@@ -72,9 +75,9 @@ class FmaTestTemplate : public __llvm_libc::testing::Test {
v += STEP, w -= STEP) {
T x = T(FPBits(get_random_bit_pattern())), y = T(FPBits(v)),
z = T(FPBits(w));
- T result = func(x, y, z);
mpfr::TernaryInput<T> input{x, y, z};
- ASSERT_MPFR_MATCH(mpfr::Operation::Fma, input, result, 0.5);
+ ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Fma, input, func(x, y, z),
+ 0.5);
}
}
@@ -86,9 +89,9 @@ class FmaTestTemplate : public __llvm_libc::testing::Test {
v += STEP, w -= STEP) {
T x = T(FPBits(v)), y = T(FPBits(w)),
z = T(FPBits(get_random_bit_pattern()));
- T result = func(x, y, z);
mpfr::TernaryInput<T> input{x, y, z};
- ASSERT_MPFR_MATCH(mpfr::Operation::Fma, input, result, 0.5);
+ ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Fma, input, func(x, y, z),
+ 0.5);
}
}
};
diff --git a/libc/test/src/math/fma_test.cpp b/libc/test/src/math/fma_test.cpp
index a9388cca72f79..be6b138342d5c 100644
--- a/libc/test/src/math/fma_test.cpp
+++ b/libc/test/src/math/fma_test.cpp
@@ -10,7 +10,271 @@
#include "src/math/fma.h"
-using LlvmLibcFmaTest = FmaTestTemplate<double>;
+struct Inputs {
+ double a, b, c;
+};
+
+struct LlvmLibcFmaTest : public FmaTestTemplate<double> {
+ void test_more_values() {
+ constexpr int N = 236;
+ constexpr Inputs INPUTS[N] = {
+ {0x1p+0, 0x2p+0, 0x3p+0},
+ {0x1.4p+0, 0xcp-4, 0x1p-4},
+ {0x0p+0, 0x0p+0, 0x0p+0},
+ {0x1p+0, 0x0p+0, 0x0p+0},
+ {0x0p+0, 0x1p+0, 0x0p+0},
+ {0x1p+0, 0x1p+0, 0x1p+0},
+ {0x0p+0, 0x0p+0, 0x1p+0},
+ {0x0p+0, 0x0p+0, 0x2p+0},
+ {0x0p+0, 0x0p+0, 0xf.fffffp+124},
+ {0x0p+0, 0x0p+0, 0xf.ffffffffffff8p+1020},
+ {0x0p+0, 0x1p+0, 0x1p+0},
+ {0x1p+0, 0x0p+0, 0x1p+0},
+ {0x0p+0, 0x1p+0, 0x2p+0},
+ {0x1p+0, 0x0p+0, 0x2p+0},
+ {0x0p+0, 0x1p+0, 0xf.fffffp+124},
+ {0x0p+0, 0x1p+0, 0xf.ffffffffffff8p+1020},
+ {0x1p+0, 0x0p+0, 0xf.fffffp+124},
+ {0x1p+0, 0x0p+0, 0xf.ffffffffffff8p+1020},
+ {0x4p-128, 0x4p-128, 0x0p+0},
+ {0x4p-128, 0x4p-1024, 0x0p+0},
+ {0x4p-128, 0x8p-972, 0x0p+0},
+ {0x4p-1024, 0x4p-128, 0x0p+0},
+ {0x4p-1024, 0x4p-1024, 0x0p+0},
+ {0x4p-1024, 0x8p-972, 0x0p+0},
+ {0x8p-972, 0x4p-128, 0x0p+0},
+ {0x8p-972, 0x4p-1024, 0x0p+0},
+ {0x8p-972, 0x8p-972, 0x0p+0},
+ {0x4p-128, 0x4p-128, 0x0p+0},
+ {0x4p-128, 0x4p-1024, 0x0p+0},
+ {0x4p-128, 0x8p-972, 0x0p+0},
+ {0x4p-1024, 0x4p-128, 0x0p+0},
+ {0x4p-1024, 0x4p-1024, 0x0p+0},
+ {0x4p-1024, 0x8p-972, 0x0p+0},
+ {0x8p-972, 0x4p-128, 0x0p+0},
+ {0x8p-972, 0x4p-1024, 0x0p+0},
+ {0x8p-972, 0x8p-972, 0x0p+0},
+ {0x4p-128, 0x4p-128, 0x0p+0},
+ {0x4p-128, 0x4p-1024, 0x0p+0},
+ {0x4p-128, 0x8p-972, 0x0p+0},
+ {0x4p-1024, 0x4p-128, 0x0p+0},
+ {0x4p-1024, 0x4p-1024, 0x0p+0},
+ {0x4p-1024, 0x8p-972, 0x0p+0},
+ {0x8p-972, 0x4p-128, 0x0p+0},
+ {0x8p-972, 0x4p-1024, 0x0p+0},
+ {0x8p-972, 0x8p-972, 0x0p+0},
+ {0x4p-128, 0x4p-128, 0x0p+0},
+ {0x4p-128, 0x4p-1024, 0x0p+0},
+ {0x4p-128, 0x8p-972, 0x0p+0},
+ {0x4p-1024, 0x4p-128, 0x0p+0},
+ {0x4p-1024, 0x4p-1024, 0x0p+0},
+ {0x4p-1024, 0x8p-972, 0x0p+0},
+ {0x8p-972, 0x4p-128, 0x0p+0},
+ {0x8p-972, 0x4p-1024, 0x0p+0},
+ {0x8p-972, 0x8p-972, 0x0p+0},
+ {0x4p-128, 0x4p-128, 0x0p+0},
+ {0x4p-128, 0x4p-1024, 0x0p+0},
+ {0x4p-128, 0x8p-972, 0x0p+0},
+ {0x4p-1024, 0x4p-128, 0x0p+0},
+ {0x4p-1024, 0x4p-1024, 0x0p+0},
+ {0x4p-1024, 0x8p-972, 0x0p+0},
+ {0x8p-972, 0x4p-128, 0x0p+0},
+ {0x8p-972, 0x4p-1024, 0x0p+0},
+ {0x8p-972, 0x8p-972, 0x0p+0},
+ {0x4p-128, 0x4p-128, 0x0p+0},
+ {0x4p-128, 0x4p-1024, 0x0p+0},
+ {0x4p-128, 0x8p-972, 0x0p+0},
+ {0x4p-1024, 0x4p-128, 0x0p+0},
+ {0x4p-1024, 0x4p-1024, 0x0p+0},
+ {0x4p-1024, 0x8p-972, 0x0p+0},
+ {0x8p-972, 0x4p-128, 0x0p+0},
+ {0x8p-972, 0x4p-1024, 0x0p+0},
+ {0x8p-972, 0x8p-972, 0x0p+0},
+ {0x4p-128, 0x4p-128, 0x0p+0},
+ {0x4p-128, 0x4p-1024, 0x0p+0},
+ {0x4p-128, 0x8p-972, 0x0p+0},
+ {0x4p-1024, 0x4p-128, 0x0p+0},
+ {0x4p-1024, 0x4p-1024, 0x0p+0},
+ {0x4p-1024, 0x8p-972, 0x0p+0},
+ {0x8p-972, 0x4p-128, 0x0p+0},
+ {0x8p-972, 0x4p-1024, 0x0p+0},
+ {0x8p-972, 0x8p-972, 0x0p+0},
+ {0x4p-128, 0x4p-128, 0x0p+0},
+ {0x4p-128, 0x4p-1024, 0x0p+0},
+ {0x4p-128, 0x8p-972, 0x0p+0},
+ {0x4p-1024, 0x4p-128, 0x0p+0},
+ {0x4p-1024, 0x4p-1024, 0x0p+0},
+ {0x4p-1024, 0x8p-972, 0x0p+0},
+ {0x8p-972, 0x4p-128, 0x0p+0},
+ {0x8p-972, 0x4p-1024, 0x0p+0},
+ {0x8p-972, 0x8p-972, 0x0p+0},
+ {0xf.fffffp+124, 0xf.fffffp+124, 0x4p-128},
+ {0xf.fffffp+124, 0xf.fffffp+124, 0x4p-1024},
+ {0xf.fffffp+124, 0xf.fffffp+124, 0x8p-972},
+ {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-128},
+ {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-1024},
+ {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x8p-972},
+ {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-128},
+ {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-1024},
+ {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x8p-972},
+ {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-128},
+ {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-1024},
+ {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x8p-972},
+ {0xf.fffffp+124, 0xf.fffffp+124, 0x4p-128},
+ {0xf.fffffp+124, 0xf.fffffp+124, 0x4p-1024},
+ {0xf.fffffp+124, 0xf.fffffp+124, 0x8p-972},
+ {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-128},
+ {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-1024},
+ {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x8p-972},
+ {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-128},
+ {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-1024},
+ {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x8p-972},
+ {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-128},
+ {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-1024},
+ {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x8p-972},
+ {0xf.fffffp+124, 0xf.fffffp+124, 0x4p-128},
+ {0xf.fffffp+124, 0xf.fffffp+124, 0x4p-1024},
+ {0xf.fffffp+124, 0xf.fffffp+124, 0x8p-972},
+ {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-128},
+ {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-1024},
+ {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x8p-972},
+ {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-128},
+ {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-1024},
+ {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x8p-972},
+ {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-128},
+ {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-1024},
+ {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x8p-972},
+ {0xf.fffffp+124, 0xf.fffffp+124, 0x4p-128},
+ {0xf.fffffp+124, 0xf.fffffp+124, 0x4p-1024},
+ {0xf.fffffp+124, 0xf.fffffp+124, 0x8p-972},
+ {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-128},
+ {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-1024},
+ {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x8p-972},
+ {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-128},
+ {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-1024},
+ {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x8p-972},
+ {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-128},
+ {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-1024},
+ {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x8p-972},
+ {0xf.fffffp+124, 0xf.fffffp+124, 0x4p-128},
+ {0xf.fffffp+124, 0xf.fffffp+124, 0x4p-1024},
+ {0xf.fffffp+124, 0xf.fffffp+124, 0x8p-972},
+ {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-128},
+ {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-1024},
+ {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x8p-972},
+ {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-128},
+ {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-1024},
+ {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x8p-972},
+ {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-128},
+ {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-1024},
+ {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x8p-972},
+ {0xf.fffffp+124, 0xf.fffffp+124, 0x4p-128},
+ {0xf.fffffp+124, 0xf.fffffp+124, 0x4p-1024},
+ {0xf.fffffp+124, 0xf.fffffp+124, 0x8p-972},
+ {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-128},
+ {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-1024},
+ {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x8p-972},
+ {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-128},
+ {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-1024},
+ {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x8p-972},
+ {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-128},
+ {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-1024},
+ {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x8p-972},
+ {0xf.fffffp+124, 0xf.fffffp+124, 0x4p-128},
+ {0xf.fffffp+124, 0xf.fffffp+124, 0x4p-1024},
+ {0xf.fffffp+124, 0xf.fffffp+124, 0x8p-972},
+ {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-128},
+ {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-1024},
+ {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x8p-972},
+ {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-128},
+ {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-1024},
+ {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x8p-972},
+ {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-128},
+ {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-1024},
+ {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x8p-972},
+ {0xf.fffffp+124, 0xf.fffffp+124, 0x4p-128},
+ {0xf.fffffp+124, 0xf.fffffp+124, 0x4p-1024},
+ {0xf.fffffp+124, 0xf.fffffp+124, 0x8p-972},
+ {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-128},
+ {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x4p-1024},
+ {0xf.fffffp+124, 0xf.ffffffffffff8p+1020, 0x8p-972},
+ {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-128},
+ {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x4p-1024},
+ {0xf.ffffffffffff8p+1020, 0xf.fffffp+124, 0x8p-972},
+ {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-128},
+ {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x4p-1024},
+ {0xf.ffffffffffff8p+1020, 0xf.ffffffffffff8p+1020, 0x8p-972},
+ {0x2.fffp+12, 0x1.000002p+0, 0x1.ffffp-24},
+ {0x1.fffp+0, 0x1.00001p+0, 0x1.fffp+0},
+ {0xc.d5e6fp+124, 0x2.6af378p-128, 0x1.f08948p+0},
+ {0x1.9abcdep+100, 0x2.6af378p-128, 0x3.e1129p-28},
+ {0xf.fffffp+124, 0x1.001p+0, 0xf.fffffp+124},
+ {0xf.fffffp+124, 0x1.fffffep+0, 0xf.fffffp+124},
+ {0xf.fffffp+124, 0x2p+0, 0xf.fffffp+124},
+ {0x5p-128, 0x8.00002p-4, 0x1p-128},
+ {0x7.ffffep-128, 0x8.00001p-4, 0x8p-152},
+ {0x8p-152, 0x8p-4, 0x3.fffff8p-128},
+ {0x8p-152, 0x8.8p-4, 0x3.fffff8p-128},
+ {0x8p-152, 0x8p-152, 0x8p+124},
+ {0x8p-152, 0x8p-152, 0x4p-128},
+ {0x8p-152, 0x8p-152, 0x3.fffff8p-128},
+ {0x8p-152, 0x8p-152, 0x8p-152},
+ {0xf.ffp-4, 0xf.ffp-4, 0xf.fep-4},
+ {0x4.000008p-128, 0x4.000008p-28, 0x8p+124},
+ {0x4.000008p-128, 0x4.000008p-28, 0x8p+100},
+ {0x2.fep+12, 0x1.0000000000001p+0, 0x1.ffep-48},
+ {0x1.fffp+0, 0x1.0000000000001p+0, 0x1.fffp+0},
+ {0x1.0000002p+0, 0xf.fffffep-4, 0x1p-300},
+ {0xe.f56df7797f768p+1020, 0x3.7ab6fbbcbfbb4p-1024,
+ 0x3.40bf1803497f6p+0},
+ {0x1.deadbeef2feedp+900, 0x3.7ab6fbbcbfbb4p-1024,
+ 0x6.817e300692fecp-124},
+ {0xf.ffffffffffff8p+1020, 0x1.001p+0, 0xf.ffffffffffff8p+1020},
+ {0xf.ffffffffffff8p+1020, 0x1.fffffffffffffp+0,
+ 0xf.ffffffffffff8p+1020},
+ {0xf.ffffffffffff8p+1020, 0x2p+0, 0xf.ffffffffffff8p+1020},
+ {0x5.a827999fcef3p-540, 0x5.a827999fcef3p-540, 0x0p+0},
+ {0x3.bd5b7dde5fddap-496, 0x3.bd5b7dde5fddap-496, 0xd.fc352bc352bap-992},
+ {0x3.bd5b7dde5fddap-504, 0x3.bd5b7dde5fddap-504,
+ 0xd.fc352bc352bap-1008},
+ {0x8p-540, 0x4p-540, 0x4p-1076},
+ {0x1.7fffff8p-968, 0x4p-108, 0x4p-1048},
+ {0x2.8000008p-968, 0x4p-108, 0x4p-1048},
+ {0x2.8p-968, 0x4p-108, 0x4p-1048},
+ {0x2.33956cdae7c2ep-960, 0x3.8e211518bfea2p-108,
+ 0x2.02c2b59766d9p-1024},
+ {0x3.a5d5dadd1d3a6p-980, 0x2.9c0cd8c5593bap-64, 0x2.49179ac00d15p-1024},
+ {0x2.2a7aca1773e0cp-908, 0x9.6809186a42038p-128, 0x2.c9e356b3f0fp-1024},
+ {0x3.ffffffffffffep-712, 0x3.ffffffffffffep-276,
+ 0x3.fffffc0000ffep-984},
+ {0x5p-1024, 0x8.000000000001p-4, 0x1p-1024},
+ {0x7.ffffffffffffp-1024, 0x8.0000000000008p-4, 0x4p-1076},
+ {0x4p-1076, 0x8p-4, 0x3.ffffffffffffcp-1024},
+ {0x4p-1076, 0x8.8p-4, 0x3.ffffffffffffcp-1024},
+ {0x4p-1076, 0x4p-1076, 0x8p+1020},
+ {0x4p-1076, 0x4p-1076, 0x4p-1024},
+ {0x4p-1076, 0x4p-1076, 0x3.ffffffffffffcp-1024},
+ {0x4p-1076, 0x4p-1076, 0x4p-1076},
+ {0xf.ffffffffffff8p-4, 0xf.ffffffffffff8p-4, 0xf.ffffffffffffp-4},
+ {0x4.0000000000004p-1024, 0x2.0000000000002p-56, 0x8p+1020},
+ {0x4.0000000000004p-1024, 0x2.0000000000002p-56, 0x4p+968},
+ {0x7.fffff8p-128, 0x3.fffffcp+24, 0xf.fffffp+124},
+ {0x7.ffffffffffffcp-1024, 0x7.ffffffffffffcp+52,
+ 0xf.ffffffffffff8p+1020},
+ };
+
+ for (int i = 0; i < N; ++i) {
+ for (int signs = 0; signs < 7; ++signs) {
+ double a = (signs & 4) ? -INPUTS[i].a : INPUTS[i].a;
+ double b = (signs & 2) ? -INPUTS[i].b : INPUTS[i].b;
+ double c = (signs & 1) ? -INPUTS[i].c : INPUTS[i].c;
+ mpfr::TernaryInput<double> input{a, b, c};
+ ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Fma, input,
+ __llvm_libc::fma(a, b, c), 0.5);
+ }
+ }
+ }
+};
TEST_F(LlvmLibcFmaTest, SpecialNumbers) {
test_special_numbers(&__llvm_libc::fma);
@@ -21,3 +285,5 @@ TEST_F(LlvmLibcFmaTest, SubnormalRange) {
}
TEST_F(LlvmLibcFmaTest, NormalRange) { test_normal_range(&__llvm_libc::fma); }
+
+TEST_F(LlvmLibcFmaTest, ExtraValues) { test_more_values(); }
diff --git a/libc/test/src/math/fmaf_test.cpp b/libc/test/src/math/fmaf_test.cpp
index 73d009499deb9..8d1cf254701c9 100644
--- a/libc/test/src/math/fmaf_test.cpp
+++ b/libc/test/src/math/fmaf_test.cpp
@@ -10,14 +10,14 @@
#include "src/math/fmaf.h"
-using LlvmLibcFmaTest = FmaTestTemplate<float>;
+using LlvmLibcFmafTest = FmaTestTemplate<float>;
-TEST_F(LlvmLibcFmaTest, SpecialNumbers) {
+TEST_F(LlvmLibcFmafTest, SpecialNumbers) {
test_special_numbers(&__llvm_libc::fmaf);
}
-TEST_F(LlvmLibcFmaTest, SubnormalRange) {
+TEST_F(LlvmLibcFmafTest, SubnormalRange) {
test_subnormal_range(&__llvm_libc::fmaf);
}
-TEST_F(LlvmLibcFmaTest, NormalRange) { test_normal_range(&__llvm_libc::fmaf); }
+TEST_F(LlvmLibcFmafTest, NormalRange) { test_normal_range(&__llvm_libc::fmaf); }
More information about the libc-commits
mailing list