[libc-commits] [libc] d883a4a - [libc] Implement sinf function that is correctly rounded to all rounding modes.
Tue Ly via libc-commits
libc-commits at lists.llvm.org
Fri Jul 22 07:07:48 PDT 2022
Author: Tue Ly
Date: 2022-07-22T10:07:31-04:00
New Revision: d883a4ad02d867e7037bd4ec342016c402484148
URL: https://github.com/llvm/llvm-project/commit/d883a4ad02d867e7037bd4ec342016c402484148
DIFF: https://github.com/llvm/llvm-project/commit/d883a4ad02d867e7037bd4ec342016c402484148.diff
LOG: [libc] Implement sinf function that is correctly rounded to all rounding modes.
Implement sinf function that is correctly rounded to all rounding modes.
- We use a simple range reduction for `pi/16 < |x|` :
Let `k = round(x / pi)` and `y = (x/pi) - k`.
So `k` is an integer and `-0.5 <= y <= 0.5`.
Then
```
sin(x) = sin(y*pi + k*pi)
= (-1)^(k & 1) * sin(y*pi)
~ (-1)^(k & 1) * y * P(y^2)
```
where `y*P(y^2)` is a degree-15 minimax polynomial generated by Sollya with:
```
> P = fpminimax(sin(x*pi)/x, [|0, 2, 4, 6, 8, 10, 12, 14|], [|D...|], [0, 0.5]);
```
- Performance benchmark using perf tool from CORE-MATH project
(https://gitlab.inria.fr/core-math/core-math/-/tree/master) on Ryzen 1700:
Before this patch (not correctly rounded):
```
$ CORE_MATH_PERF_MODE="rdtsc" ./perf.sh sinf
CORE-MATH reciprocal throughput : 17.892
System LIBC reciprocal throughput : 25.559
LIBC reciprocal throughput : 29.381
```
After this patch (correctly rounded):
```
$ CORE_MATH_PERF_MODE="rdtsc" ./perf.sh sinf
CORE-MATH reciprocal throughput : 17.896
System LIBC reciprocal throughput : 25.740
LIBC reciprocal throughput : 27.872
LIBC reciprocal throughput : 20.012 (with `-msse4.2` flag)
LIBC reciprocal throughput : 14.244 (with `-mfma` flag)
```
Reviewed By: zimmermann6
Differential Revision: https://reviews.llvm.org/D123154
Added:
libc/src/__support/FPUtil/except_value_utils.h
libc/src/math/generic/range_reduction.h
libc/src/math/generic/range_reduction_fma.h
Modified:
libc/cmake/modules/LLVMLibCObjectRules.cmake
libc/docs/math.rst
libc/src/__support/FPUtil/CMakeLists.txt
libc/src/math/generic/CMakeLists.txt
libc/src/math/generic/sinf.cpp
libc/test/src/math/exhaustive/CMakeLists.txt
libc/test/src/math/exhaustive/sinf_test.cpp
libc/test/src/math/sinf_test.cpp
utils/bazel/llvm-project-overlay/libc/BUILD.bazel
Removed:
################################################################################
diff --git a/libc/cmake/modules/LLVMLibCObjectRules.cmake b/libc/cmake/modules/LLVMLibCObjectRules.cmake
index a824cad94b7c2..9e825144bfda2 100644
--- a/libc/cmake/modules/LLVMLibCObjectRules.cmake
+++ b/libc/cmake/modules/LLVMLibCObjectRules.cmake
@@ -515,7 +515,7 @@ function(add_entrypoint_object target_name)
list(SORT flags)
if(SHOW_INTERMEDIATE_OBJECTS AND flags)
- message(STATUS "Object library ${fq_target_name} has FLAGS: ${flags}")
+ message(STATUS "Entrypoint object ${fq_target_name} has FLAGS: ${flags}")
endif()
if(NOT ADD_TO_EXPAND_NAME)
diff --git a/libc/docs/math.rst b/libc/docs/math.rst
index b765428734f5e..6480a981acb02 100644
--- a/libc/docs/math.rst
+++ b/libc/docs/math.rst
@@ -164,7 +164,7 @@ log |check|
log10 |check|
log1p |check|
log2 |check|
-sin 0.561 ULPs large
+sin |check| large
sincos 0.776 ULPs large
sqrt |check| |check| |check|
============== ================ =============== ======================
@@ -205,13 +205,13 @@ Performance
+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+
| expm1f | 14 | 53 | 59 | 146 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA |
+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+
-| fmodf (n) | 73 | 263 | - | - | [MIN_NORMAL, MAX_NORMAL] | i5 mobile | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | |
+| fmodf | 73 | 263 | - | - | [MIN_NORMAL, MAX_NORMAL] | i5 mobile | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | |
+| +-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+
+| | 9 | 11 | - | - | [0, MAX_SUBNORMAL] | i5 mobile | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | |
+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+
-| fmodf (d) | 9 | 11 | - | - | [0, MAX_SUBNORMAL] | i5 mobile | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | |
-+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+
-| fmod (n) | 595 | 3297 | - | - | [MIN_NORMAL, MAX_NORMAL] | i5 mobile | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | |
-+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+
-| fmod (d) | 14 | 13 | - | - | [0, MAX_SUBNORMAL] | i5 mobile | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | |
+| fmod | 595 | 3297 | - | - | [MIN_NORMAL, MAX_NORMAL] | i5 mobile | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | |
+| +-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+
+| | 14 | 13 | - | - | [0, MAX_SUBNORMAL] | i5 mobile | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | |
+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+
| hypotf | 25 | 15 | 64 | 49 | :math:`[-10, 10] \times [-10, 10]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | |
+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+
@@ -223,7 +223,7 @@ Performance
+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+
| log2f | 13 | 10 | 57 | 46 | :math:`[e^{-1}, e]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA |
+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+
-| sinf | 36 | 31 | 72 | 71 | :math:`[-\pi, \pi]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | |
+| sinf | 14 | 26 | 65 | 59 | :math:`[-\pi, \pi]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA |
+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+
References
diff --git a/libc/src/__support/FPUtil/CMakeLists.txt b/libc/src/__support/FPUtil/CMakeLists.txt
index a4247fba214bf..6aae1acf3a841 100644
--- a/libc/src/__support/FPUtil/CMakeLists.txt
+++ b/libc/src/__support/FPUtil/CMakeLists.txt
@@ -13,6 +13,7 @@ add_header_library(
NormalFloat.h
PlatformDefs.h
builtin_wrappers.h
+ except_value_utils.h
DEPENDS
libc.include.math
libc.include.errno
diff --git a/libc/src/__support/FPUtil/except_value_utils.h b/libc/src/__support/FPUtil/except_value_utils.h
new file mode 100644
index 0000000000000..a8a9ba865fb7e
--- /dev/null
+++ b/libc/src/__support/FPUtil/except_value_utils.h
@@ -0,0 +1,70 @@
+//===-- Common header for helpers to set exceptional values -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_SUPPORT_FPUTIL_EXCEPT_VALUE_UTILS_H
+#define LLVM_LIBC_SRC_SUPPORT_FPUTIL_EXCEPT_VALUE_UTILS_H
+
+#include "FEnvImpl.h"
+#include "FPBits.h"
+
+namespace __llvm_libc {
+
+namespace fputil {
+
+template <typename T, int N> struct ExceptionalValues {
+ using UIntType = typename FPBits<T>::UIntType;
+ static constexpr int SIZE = N;
+ // Input bits.
+ UIntType inputs[SIZE];
+ // Output bits contains 4 values:
+ // output[i][0]: output bits corresponding to FE_TOWARDZERO
+ // output[i][1]: offset for FE_UPWARD
+ // output[i][2]: offset for FE_DOWNWARD
+ // output[i][3]: offset for FE_TONEAREST
+ UIntType outputs[SIZE][4];
+};
+
+template <typename T, int N> struct ExceptionChecker {
+ using UIntType = typename FPBits<T>::UIntType;
+ using FPBits = FPBits<T>;
+ using ExceptionalValues = ExceptionalValues<T, N>;
+
+ static bool check_odd_func(const ExceptionalValues &ExceptVals,
+ UIntType x_abs, bool sign, T &result) {
+ for (int i = 0; i < N; ++i) {
+ if (unlikely(x_abs == ExceptVals.inputs[i])) {
+ UIntType out_bits = ExceptVals.outputs[i][0]; // FE_TOWARDZERO
+ switch (fputil::get_round()) {
+ case FE_UPWARD:
+ out_bits +=
+ sign ? ExceptVals.outputs[i][2] : ExceptVals.outputs[i][1];
+ break;
+ case FE_DOWNWARD:
+ out_bits +=
+ sign ? ExceptVals.outputs[i][1] : ExceptVals.outputs[i][2];
+ break;
+ case FE_TONEAREST:
+ out_bits += ExceptVals.outputs[i][3];
+ break;
+ }
+ result = FPBits(out_bits).get_val();
+ if (sign)
+ result = -result;
+
+ return true;
+ }
+ }
+ return false;
+ }
+};
+
+} // namespace fputil
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_SUPPORT_FPUTIL_EXCEPT_VALUE_UTILS_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 41588104cf62f..7b6a378b4616c 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -76,11 +76,16 @@ add_entrypoint_object(
sinf.cpp
HDRS
../sinf.h
+ range_reduction.h
+ range_reduction_fma.h
DEPENDS
- .sincosf_utils
libc.include.math
libc.src.errno.errno
libc.src.__support.FPUtil.fputil
+ libc.src.__support.FPUtil.fma
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.FPUtil.nearest_integer
+ libc.src.__support.FPUtil.polyeval
COMPILE_OPTIONS
-O3
)
diff --git a/libc/src/math/generic/range_reduction.h b/libc/src/math/generic/range_reduction.h
new file mode 100644
index 0000000000000..ac28c6f0435e9
--- /dev/null
+++ b/libc/src/math/generic/range_reduction.h
@@ -0,0 +1,131 @@
+//===-- Utilities for trigonometric functions -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_H
+#define LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_H
+
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/except_value_utils.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+
+namespace __llvm_libc {
+
+namespace generic {
+
+static constexpr uint32_t FAST_PASS_BOUND = 0x4c80'0000U; // 2^26
+
+static constexpr int N_ENTRIES = 8;
+
+// We choose to split bits of 1/pi into 28-bit precision pieces, so that the
+// product of x * ONE_OVER_PI_28[i] is exact.
+// These are generated by Sollya with:
+// > a1 = D(round(1/pi, 28, RN)); a1;
+// > a2 = D(round(1/pi - a1, 28, RN)); a2;
+// > a3 = D(round(1/pi - a1 - a2, 28, RN)); a3;
+// > a4 = D(round(1/pi - a1 - a2 - a3, 28, RN)); a4;
+// ...
+static constexpr double ONE_OVER_PI_28[N_ENTRIES] = {
+ 0x1.45f306ep-2, -0x1.b1bbeaep-33, 0x1.3f84ebp-62, -0x1.7056592p-92,
+ 0x1.c0db62ap-121, -0x1.4cd8778p-150, -0x1.bef806cp-179, 0x1.63abdecp-209};
+
+// Exponents of the least significant bits of the corresponding entries in
+// ONE_OVER_PI_28.
+static constexpr int ONE_OVER_PI_28_LSB_EXP[N_ENTRIES] = {
+ -29, -60, -86, -119, -148, -175, -205, -235};
+
+// Return (k mod 2) and y, where
+// k = round(x / pi) and y = (x / pi) - k.
+static inline int64_t small_range_reduction(double x, double &y) {
+ double prod = x * ONE_OVER_PI_28[0];
+ double kd = fputil::nearest_integer(prod);
+ y = prod - kd;
+ y = fputil::multiply_add(x, ONE_OVER_PI_28[1], y);
+ y = fputil::multiply_add(x, ONE_OVER_PI_28[2], y);
+ return static_cast<int64_t>(kd);
+}
+
+// Return k and y, where
+// k = round(x / pi) and y = (x / pi) - k.
+// For large range, there are at most 2 parts of ONE_OVER_PI_28 contributing to
+// the unit binary digit (k & 1). If the least significant bit of x * the least
+// significant bit of ONE_OVER_PI_28[i] > 1, we can completely ignore
+// ONE_OVER_PI_28[i].
+static inline int64_t large_range_reduction(double x, int x_exp, double &y) {
+ int idx = 0;
+ y = 0;
+ int x_lsb_exp = x_exp - fputil::FloatProperties<float>::MANTISSA_WIDTH;
+
+ // Skipping the first parts of 1/pi such that:
+ // LSB of x * LSB of ONE_OVER_PI_28[i] > 1.
+ while (x_lsb_exp + ONE_OVER_PI_28_LSB_EXP[idx] > 0)
+ ++idx;
+
+ double prod_hi = x * ONE_OVER_PI_28[idx];
+ // Get the integral part of x * ONE_OVER_PI_28[idx]
+ double k_hi = fputil::nearest_integer(prod_hi);
+ // Get the fractional part of x * ONE_OVER_PI_28[idx]
+ double frac = prod_hi - k_hi;
+ double prod_lo = fputil::multiply_add(x, ONE_OVER_PI_28[idx + 1], frac);
+ double k_lo = fputil::nearest_integer(prod_lo);
+
+ // Now y is the fractional parts.
+ y = prod_lo - k_lo;
+ y = fputil::multiply_add(x, ONE_OVER_PI_28[idx + 2], y);
+ y = fputil::multiply_add(x, ONE_OVER_PI_28[idx + 3], y);
+
+ return static_cast<int64_t>(k_hi + k_lo);
+}
+
+// Exceptional cases.
+static constexpr int N_EXCEPT_SMALL = 4;
+
+static constexpr fputil::ExceptionalValues<float, N_EXCEPT_SMALL> SmallExcepts{
+ /* inputs */ {
+ 0x3fa7832a, // x = 0x1.4f0654p0
+ 0x46199998, // x = 0x1.33333p13
+ 0x4afdece4, // x = 0x1.fbd9c8p22
+ 0x4c2332e9, // x = 0x1.4665d2p25
+ },
+ /* outputs (RZ, RU offset, RD offset, RN offset) */
+ {
+ {0x3f7741b5, 1, 0, 1}, // x = 0x1.4f0654p0, sin(x) = 0x1.ee836ap-1 (RZ)
+ {0xbeb1fa5d, 0, 1, 0}, // x = 0x1.33333p13, sin(x) = -0x1.63f4bap-2 (RZ)
+ {0xbf7fb6e0, 0, 1, 1}, // x = 0x1.fbd9c8p22, sin(x) = -0x1.ff6dcp-1 (RZ)
+ {0xbf7fffff, 0, 1,
+ 1}, // x = 0x1.4665d2p25, sin(x) = -0x1.fffffep-1 (RZ)
+ }};
+
+static constexpr int N_EXCEPT_LARGE = 5;
+
+static constexpr fputil::ExceptionalValues<float, N_EXCEPT_LARGE> LargeExcepts{
+ /* inputs */ {
+ 0x523947f6, // x = 0x1.728fecp37
+ 0x53b146a6, // x = 0x1.628d4cp40
+ 0x55cafb2a, // x = 0x1.95f654p44
+ 0x6a1976f1, // x = 0x1.32ede2p85
+ 0x77584625, // x = 0x1.b08c4ap111
+ },
+ /* outputs (RZ, RU offset, RD offset, RN offset) */
+ {
+ {0xbf12791d, 0, 1,
+ 1}, // x = 0x1.728fecp37, sin(x) = -0x1.24f23ap-1 (RZ)
+ {0xbf7fffff, 0, 1,
+ 1}, // x = 0x1.628d4cp40, sin(x) = -0x1.fffffep-1 (RZ)
+ {0xbf7e7a16, 0, 1,
+ 1}, // x = 0x1.95f654p44, sin(x) = -0x1.fcf42cp-1 (RZ)
+ {0x3f7fffff, 1, 0, 1}, // x = 0x1.32ede2p85, sin(x) = 0x1.fffffep-1 (RZ)
+ {0xbf7fffff, 0, 1,
+ 1}, // x = 0x1.b08c4ap111, sin(x) = -0x1.fffffep-1 (RZ)
+ }};
+
+} // namespace generic
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_H
diff --git a/libc/src/math/generic/range_reduction_fma.h b/libc/src/math/generic/range_reduction_fma.h
new file mode 100644
index 0000000000000..c2a35477bf458
--- /dev/null
+++ b/libc/src/math/generic/range_reduction_fma.h
@@ -0,0 +1,137 @@
+//===-- Utilities for trigonometric functions with FMA ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_FMA_H
+#define LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_FMA_H
+
+#include "src/__support/FPUtil/FMA.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/except_value_utils.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+
+namespace __llvm_libc {
+
+namespace fma {
+
+static constexpr uint32_t FAST_PASS_BOUND = 0x5880'0000U; // 2^50
+
+// Digits of 1/pi, generated by Sollya with:
+// > a0 = D(1/pi);
+// > a1 = D(1/pi - a0);
+// > a2 = D(1/pi - a0 - a1);
+// > a3 = D(1/pi - a0 - a1 - a2);
+static constexpr double ONE_OVER_PI[5] = {
+ 0x1.45f306dc9c883p-2, -0x1.6b01ec5417056p-56, -0x1.6447e493ad4cep-110,
+ 0x1.e21c820ff28b2p-164, -0x1.508510ea79237p-219};
+
+// Return k and y, where
+// k = round(x / pi) and y = (x / pi) - k.
+// Assume x is non-negative.
+static inline int64_t small_range_reduction(double x, double &y) {
+ double kd = fputil::nearest_integer(x * ONE_OVER_PI[0]);
+ y = fputil::fma(x, ONE_OVER_PI[0], -kd);
+ y = fputil::fma(x, ONE_OVER_PI[1], y);
+ return static_cast<int64_t>(kd);
+}
+
+// Return k and y, where
+// k = round(x / pi) and y = (x / pi) - k.
+static inline int64_t large_range_reduction(double x, int x_exp, double &y) {
+ // 2^50 <= |x| < 2^104
+ if (x_exp < 103) {
+ // - When x < 2^104, the unit bit is contained in the full exact product of
+ // x * ONE_OVER_PI[0].
+ // - When 2^50 <= |x| < 2^55, the unit bit is contained
+ // in the last 8 bits of double(x * ONE_OVER_PI[0]).
+ // - When |x| >= 2^55, the LSB of double(x * ONE_OVER_PI[0]) is at least 2.
+ fputil::FPBits<double> prod_hi(x * ONE_OVER_PI[0]);
+ prod_hi.bits &= (x_exp < 55) ? (~0xffULL) : (~0ULL); // |x| < 2^55
+ double k_hi = fputil::nearest_integer(static_cast<double>(prod_hi));
+ double truncated_prod = fputil::fma(x, ONE_OVER_PI[0], -k_hi);
+ double prod_lo = fputil::fma(x, ONE_OVER_PI[1], truncated_prod);
+ double k_lo = fputil::nearest_integer(prod_lo);
+ y = fputil::fma(x, ONE_OVER_PI[1], truncated_prod - k_lo);
+ y = fputil::fma(x, ONE_OVER_PI[2], y);
+ y = fputil::fma(x, ONE_OVER_PI[3], y);
+
+ return static_cast<int64_t>(k_lo);
+ }
+
+ // - When x >= 2^104, the full exact product of x * ONE_OVER_PI[0] does not
+ // contain the unit bit, so we can ignore it completely.
+ // - When 2^104 <= |x| < 2^109, the unit bit is contained
+ // in the last 8 bits of double(x * ONE_OVER_PI[1]).
+ // - When |x| >= 2^109, the LSB of double(x * ONE_OVER_PI[1]) is at least 2.
+ fputil::FPBits<double> prod_hi(x * ONE_OVER_PI[1]);
+ prod_hi.bits &= (x_exp < 109) ? (~0xffULL) : (~0ULL); // |x| < 2^55
+ double k_hi = fputil::nearest_integer(static_cast<double>(prod_hi));
+ double truncated_prod = fputil::fma(x, ONE_OVER_PI[1], -k_hi);
+ double prod_lo = fputil::fma(x, ONE_OVER_PI[2], truncated_prod);
+ double k_lo = fputil::nearest_integer(prod_lo);
+ y = fputil::fma(x, ONE_OVER_PI[2], truncated_prod - k_lo);
+ y = fputil::fma(x, ONE_OVER_PI[3], y);
+ y = fputil::fma(x, ONE_OVER_PI[4], y);
+
+ return static_cast<int64_t>(k_lo);
+}
+
+// Exceptional cases.
+static constexpr int N_EXCEPT_SMALL = 9;
+
+static constexpr fputil::ExceptionalValues<float, N_EXCEPT_SMALL> SmallExcepts{
+ /* inputs */ {
+ 0x3fa7832a, // x = 0x1.4f0654p0
+ 0x40171973, // x = 0x1.2e32e6p1
+ 0x4096cbe4, // x = 0x1.2d97c8p2
+ 0x433b7490, // x = 0x1.76e92p7
+ 0x437ce5f1, // x = 0x1.f9cbe2p7
+ 0x46199998, // x = 0x1.33333p13
+ 0x474d246f, // x = 0x1.9a48dep15
+ 0x4afdece4, // x = 0x1.fbd9c8p22
+ 0x55cafb2a, // x = 0x1.95f654p44
+ },
+ /* outputs (RZ, RU offset, RD offset, RN offset) */
+ {
+ {0x3f7741b5, 1, 0, 1}, // x = 0x1.4f0654p0, sin(x) = 0x1.ee836ap-1 (RZ)
+ {0x3f34290f, 1, 0, 1}, // x = 0x1.2e32e6p1, sin(x) = 0x1.68521ep-1 (RZ)
+ {0xbf7fffff, 0, 1, 1}, // x = 0x1.2d97c8p2, sin(x) = -0x1.fffffep-1 (RZ)
+ {0xbf5cce62, 0, 1, 0}, // x = 0x1.76e92p7, sin(x) = -0x1.b99cc4p-1 (RZ)
+ {0x3f7fffff, 1, 0, 1}, // x = 0x1.f9cbe2p7, sin(x) = 0x1.fffffep-1 (RZ)
+ {0xbeb1fa5d, 0, 1, 0}, // x = 0x1.33333p13, sin(x) = -0x1.63f4bap-2 (RZ)
+ {0x3f7fffff, 1, 0, 1}, // x = 0x1.9a48dep15, sin(x) = 0x1.fffffep-1 (RZ)
+ {0xbf7fb6e0, 0, 1, 1}, // x = 0x1.fbd9c8p22, sin(x) = -0x1.ff6dcp-1 (RZ)
+ {0xbf7e7a16, 0, 1,
+ 1}, // x = 0x1.95f654p44, sin(x) = -0x1.fcf42cp-1 (RZ)
+ }};
+
+static constexpr int N_EXCEPT_LARGE = 5;
+
+static constexpr fputil::ExceptionalValues<float, N_EXCEPT_LARGE> LargeExcepts{
+ /* inputs */ {
+ 0x5ebcfdde, // x = 0x1.79fbbcp62
+ 0x5fa6eba7, // x = 0x1.4dd74ep64
+ 0x6386134e, // x = 0x1.0c269cp72
+ 0x6a1976f1, // x = 0x1.32ede2p85
+ 0x727669d4, // x = 0x1.ecd3a8p101
+ },
+ /* outputs (RZ, RU offset, RD offset, RN offset) */
+ {
+ {0x3f50622d, 1, 0, 0}, // x = 0x1.79fbbcp62, sin(x) = 0x1.a0c45ap-1 (RZ)
+ {0xbe52464a, 0, 1,
+ 0}, // x = 0x1.4dd74ep64, sin(x) = -0x1.a48c94p-3 (RZ)
+ {0x3f7cb2e7, 1, 0, 0}, // x = 0x1.0c269cp72, sin(x) = 0x1.f965cep-1 (RZ)
+ {0x3f7fffff, 1, 0, 1}, // x = 0x1.32ede2p85, sin(x) = 0x1.fffffep-1 (RZ)
+ {0xbf7a781d, 0, 1,
+ 0}, // x = 0x1.ecd3a8p101, sin(x) = -0x1.f4f038p-1 (RZ)
+ }};
+
+} // namespace fma
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_FMA_H
diff --git a/libc/src/math/generic/sinf.cpp b/libc/src/math/generic/sinf.cpp
index 18e88cc9b72c3..3175088ec1751 100644
--- a/libc/src/math/generic/sinf.cpp
+++ b/libc/src/math/generic/sinf.cpp
@@ -7,63 +7,195 @@
//===----------------------------------------------------------------------===//
#include "src/math/sinf.h"
-#include "math_utils.h"
-#include "sincosf_utils.h"
-
+#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/except_value_utils.h"
+#include "src/__support/FPUtil/multiply_add.h"
#include "src/__support/common.h"
-#include <math.h>
-#include <stdint.h>
+#include <errno.h>
+
+#if defined(LIBC_TARGET_HAS_FMA)
+#include "range_reduction_fma.h"
+// using namespace __llvm_libc::fma;
+using __llvm_libc::fma::FAST_PASS_BOUND;
+using __llvm_libc::fma::large_range_reduction;
+using __llvm_libc::fma::LargeExcepts;
+using __llvm_libc::fma::N_EXCEPT_LARGE;
+using __llvm_libc::fma::N_EXCEPT_SMALL;
+using __llvm_libc::fma::small_range_reduction;
+using __llvm_libc::fma::SmallExcepts;
+#else
+#include "range_reduction.h"
+// using namespace __llvm_libc::generic;
+using __llvm_libc::generic::FAST_PASS_BOUND;
+using __llvm_libc::generic::large_range_reduction;
+using __llvm_libc::generic::LargeExcepts;
+using __llvm_libc::generic::N_EXCEPT_LARGE;
+using __llvm_libc::generic::N_EXCEPT_SMALL;
+using __llvm_libc::generic::small_range_reduction;
+using __llvm_libc::generic::SmallExcepts;
+#endif
namespace __llvm_libc {
-// Fast sinf implementation. Worst-case ULP is 0.5607, maximum relative
-// error is 0.5303 * 2^-23. A single-step range reduction is used for
-// small values. Large inputs have their range reduced using fast integer
-// arithmetic.
-LLVM_LIBC_FUNCTION(float, sinf, (float y)) {
- double x = y;
- double s;
- int n;
- const sincos_t *p = &SINCOSF_TABLE[0];
-
- if (abstop12(y) < abstop12(PIO4)) {
- s = x * x;
-
- if (unlikely(abstop12(y) < abstop12(as_float(0x39800000)))) {
- if (unlikely(abstop12(y) < abstop12(as_float(0x800000))))
- // Force underflow for tiny y.
- force_eval<float>(s);
- return y;
+LLVM_LIBC_FUNCTION(float, sinf, (float x)) {
+ using FPBits = typename fputil::FPBits<float>;
+ FPBits xbits(x);
+
+ uint32_t x_u = xbits.uintval();
+ uint32_t x_abs = x_u & 0x7fff'ffffU;
+ double xd, y;
+
+ // Range reduction:
+ // For |x| > pi/16, we perform range reduction as follows:
+ // Find k and y such that:
+ // x = (k + y) * pi
+ // k is an integer
+ // |y| < 0.5
+ // For small range (|x| < 2^50 when FMA instructions are available, 2^26
+ // otherwise), this is done by performing:
+ // k = round(x * 1/pi)
+ // y = x * 1/pi - k
+ // For large range, we will omit all the higher parts of 1/pi such that the
+ // least significant bits of their full products with x are larger than 1,
+ // since sin(x + i * 2pi) = sin(x).
+ //
+ // When FMA instructions are not available, we store the digits of 1/pi in
+ // chunks of 28-bit precision. This will make sure that the products:
+ // x * ONE_OVER_PI_28[i] are all exact.
+ // When FMA instructions are available, we simply store the digits of 1/pi in
+ // chunks of doubles (53-bit of precision).
+ // So when multiplying by the largest values of single precision, the
+ // resulting output should be correct up to 2^(-208 + 128) ~ 2^-80. By the
+ // worst-case analysis of range reduction, |y| >= 2^-38, so this should give
+ // us more than 40 bits of accuracy. For the worst-case estimation of range
+ // reduction, see for instances:
+ // Elementary Functions by J-M. Muller, Chapter 11,
+ // Handbook of Floating-Point Arithmetic by J-M. Muller et. al.,
+ // Chapter 10.2.
+ //
+ // Once k and y are computed, we then deduce the answer by the sine of sum
+ // formula:
+ // sin(x) = sin((k + y)*pi)
+ // = sin(y*pi) * cos(k*pi) + cos(y*pi) * sin(k*pi)
+ // = (-1)^(k & 1) * sin(y*pi)
+ // ~ (-1)^(k & 1) * y * P(y^2)
+ // where y*P(y^2) is a degree-15 minimax polynomial generated by Sollya
+ // with: > Q = fpminimax(sin(x*pi)/x, [|0, 2, 4, 6, 8, 10, 12, 14|],
+ // [|D...|], [0, 0.5]);
+
+ // |x| <= pi/16
+ if (x_abs <= 0x3e49'0fdbU) {
+ xd = static_cast<double>(x);
+
+ // |x| < 0x1.d12ed2p-12f
+ if (x_abs < 0x39e8'9769U) {
+ if (unlikely(x_abs == 0U)) {
+ // For signed zeros.
+ return x;
+ }
+ // When |x| < 2^-12, the relative error of the approximation sin(x) ~ x
+ // is:
+ // |sin(x) - x| / |sin(x)| < |x^3| / (6|x|)
+ // = x^2 / 6
+ // < 2^-25
+ // < epsilon(1)/2.
+ // So the correctly rounded values of sin(x) are:
+ // = x - sign(x)*eps(x) if rounding mode = FE_TOWARDZERO,
+ // or (rounding mode = FE_UPWARD and x is
+ // negative),
+ // = x otherwise.
+ // To simplify the rounding decision and make it more efficient, we use
+ // fma(x, -2^-25, x) instead.
+ // An exhaustive test shows that this formula work correctly for all
+ // rounding modes up to |x| < 0x1.c555dep-11f.
+ // Note: to use the formula x - 2^-25*x to decide the correct rounding, we
+ // do need fma(x, -2^-25, x) to prevent underflow caused by -2^-25*x when
+ // |x| < 2^-125. For targets without FMA instructions, we simply use
+ // double for intermediate results as it is more efficient than using an
+ // emulated version of FMA.
+#if defined(LIBC_TARGET_HAS_FMA)
+ return fputil::multiply_add(x, -0x1.0p-25f, x);
+#else
+ return static_cast<float>(fputil::multiply_add(xd, -0x1.0p-25, xd));
+#endif // LIBC_TARGET_HAS_FMA
}
- return sinf_poly(x, s, p, 0);
- } else if (likely(abstop12(y) < abstop12(120.0f))) {
- x = reduce_fast(x, p, &n);
-
- // Setup the signs for sin and cos.
- s = p->sign[n & 3];
+ // |x| < pi/16.
+ double xsq = xd * xd;
+
+ // Degree-9 polynomial approximation:
+ // sin(x) ~ x + a_3 x^3 + a_5 x^5 + a_7 x^7 + a_9 x^9
+ // = x (1 + a_3 x^2 + ... + a_9 x^8)
+ // = x * P(x^2)
+ // generated by Sollya with the following commands:
+ // > display = hexadecimal;
+ // > Q = fpminimax(sin(x)/x, [|0, 2, 4, 6, 8|], [|1, D...|], [0, pi/16]);
+ double result =
+ fputil::polyeval(xsq, 1.0, -0x1.55555555554c6p-3, 0x1.1111111085e65p-7,
+ -0x1.a019f70fb4d4fp-13, 0x1.718d179815e74p-19);
+ return xd * result;
+ }
- if (n & 2)
- p = &SINCOSF_TABLE[1];
+ bool x_sign = xbits.get_sign();
- return sinf_poly(x * s, x * x, p, n);
- } else if (abstop12(y) < abstop12(INFINITY)) {
- uint32_t xi = as_uint32_bits(y);
- int sign = xi >> 31;
+ int64_t k;
+ xd = static_cast<double>(x);
- x = reduce_large(xi, &n);
+ if (x_abs < FAST_PASS_BOUND) {
+ using ExceptChecker =
+ typename fputil::ExceptionChecker<float, N_EXCEPT_SMALL>;
+ {
+ float result;
+ if (ExceptChecker::check_odd_func(SmallExcepts, x_abs, x_sign, result)) {
+ return result;
+ }
+ }
- // Setup signs for sin and cos - include original sign.
- s = p->sign[(n + sign) & 3];
+ k = small_range_reduction(xd, y);
+ } else {
+ // x is inf or nan.
+ if (unlikely(x_abs >= 0x7f80'0000U)) {
+ if (x_abs == 0x7f80'0000U)
+ errno = EDOM;
+ return x +
+ FPBits::build_nan(1 << (fputil::MantissaWidth<float>::VALUE - 1));
+ }
- if ((n + sign) & 2)
- p = &SINCOSF_TABLE[1];
+ using ExceptChecker =
+ typename fputil::ExceptionChecker<float, N_EXCEPT_LARGE>;
+ {
+ float result;
+ if (ExceptChecker::check_odd_func(LargeExcepts, x_abs, x_sign, result))
+ return result;
+ }
- return sinf_poly(x * s, x * x, p, n);
+ k = large_range_reduction(xd, xbits.get_exponent(), y);
}
- return invalid(y);
+ // After range reduction, k = round(x / pi) and y = (x/pi) - k.
+ // So k is an integer and -0.5 <= y <= 0.5.
+ // Then sin(x) = sin(y*pi + k*pi)
+ // = (-1)^(k & 1) * sin(y*pi)
+ // ~ (-1)^(k & 1) * y * P(y^2)
+ // where y*P(y^2) is a degree-15 minimax polynomial generated by Sollya
+ // with: > P = fpminimax(sin(x*pi)/x, [|0, 2, 4, 6, 8, 10, 12, 14|],
+ // [|D...|], [0, 0.5]);
+
+ constexpr double SIGN[2] = {1.0, -1.0};
+
+ double ysq = y * y;
+ double result =
+ y * fputil::polyeval(ysq, 0x1.921fb54442d17p1, -0x1.4abbce625bd4bp2,
+ 0x1.466bc67750a3fp1, -0x1.32d2cce1612b5p-1,
+ 0x1.507832417bce6p-4, -0x1.e3062119b6071p-8,
+ 0x1.e89c7aa14122dp-12, -0x1.625b1709dece6p-16);
+
+ return SIGN[k & 1] * result;
+ // }
}
} // namespace __llvm_libc
diff --git a/libc/test/src/math/exhaustive/CMakeLists.txt b/libc/test/src/math/exhaustive/CMakeLists.txt
index a59724819ed55..a114267f27252 100644
--- a/libc/test/src/math/exhaustive/CMakeLists.txt
+++ b/libc/test/src/math/exhaustive/CMakeLists.txt
@@ -23,15 +23,19 @@ add_fp_unittest(
add_fp_unittest(
sinf_test
+ NO_RUN_POSTBUILD
NEED_MPFR
SUITE
libc_math_exhaustive_tests
SRCS
sinf_test.cpp
DEPENDS
+ .exhaustive_test
libc.include.math
libc.src.math.sinf
libc.src.__support.FPUtil.fputil
+ LINK_LIBRARIES
+ -lpthread
)
add_fp_unittest(
diff --git a/libc/test/src/math/exhaustive/sinf_test.cpp b/libc/test/src/math/exhaustive/sinf_test.cpp
index fe90252724dca..de1630a0826bd 100644
--- a/libc/test/src/math/exhaustive/sinf_test.cpp
+++ b/libc/test/src/math/exhaustive/sinf_test.cpp
@@ -6,20 +6,71 @@
//
//===----------------------------------------------------------------------===//
+#include "exhaustive_test.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/math/sinf.h"
#include "utils/MPFRWrapper/MPFRUtils.h"
-#include <math.h>
+#include "utils/UnitTest/FPMatcher.h"
+
+#include <thread>
using FPBits = __llvm_libc::fputil::FPBits<float>;
namespace mpfr = __llvm_libc::testing::mpfr;
-TEST(LlvmLibcsinffExhaustiveTest, AllValues) {
- uint32_t bits = 0;
- do {
- FPBits xbits(bits);
- float x = float(xbits);
- ASSERT_MPFR_MATCH(mpfr::Operation::Sin, x, __llvm_libc::sinf(x), 1.0);
- } while (bits++ < 0xffff'ffffU);
+struct LlvmLibcSinfExhaustiveTest : public LlvmLibcExhaustiveTest<uint32_t> {
+ bool check(uint32_t start, uint32_t stop,
+ mpfr::RoundingMode rounding) override {
+ mpfr::ForceRoundingMode r(rounding);
+ uint32_t bits = start;
+ bool result = true;
+ do {
+ FPBits xbits(bits);
+ float x = float(xbits);
+ bool r = EXPECT_MPFR_MATCH(mpfr::Operation::Sin, x, __llvm_libc::sinf(x),
+ 0.5, rounding);
+ result &= r;
+ } while (++bits < stop);
+ return result;
+ }
+};
+
+// Range: [0, +Inf);
+static constexpr uint32_t POS_START = 0x0000'0000U;
+static constexpr uint32_t POS_STOP = 0x7f80'0000U;
+
+TEST_F(LlvmLibcSinfExhaustiveTest, PostiveRangeRoundNearestTieToEven) {
+ test_full_range(POS_START, POS_STOP, mpfr::RoundingMode::Nearest);
+}
+
+TEST_F(LlvmLibcSinfExhaustiveTest, PostiveRangeRoundUp) {
+ test_full_range(POS_START, POS_STOP, mpfr::RoundingMode::Upward);
+}
+
+TEST_F(LlvmLibcSinfExhaustiveTest, PostiveRangeRoundDown) {
+ test_full_range(POS_START, POS_STOP, mpfr::RoundingMode::Downward);
+}
+
+TEST_F(LlvmLibcSinfExhaustiveTest, PostiveRangeRoundTowardZero) {
+ test_full_range(POS_START, POS_STOP, mpfr::RoundingMode::TowardZero);
+}
+
+// Range: (-Inf, 0];
+static constexpr uint32_t NEG_START = 0x8000'0000U;
+static constexpr uint32_t NEG_STOP = 0xff80'0000U;
+
+TEST_F(LlvmLibcSinfExhaustiveTest, NegativeRangeRoundNearestTieToEven) {
+ test_full_range(NEG_START, NEG_STOP, mpfr::RoundingMode::Nearest);
+}
+
+TEST_F(LlvmLibcSinfExhaustiveTest, NegativeRangeRoundUp) {
+ test_full_range(NEG_START, NEG_STOP, mpfr::RoundingMode::Upward);
+}
+
+TEST_F(LlvmLibcSinfExhaustiveTest, NegativeRangeRoundDown) {
+ test_full_range(NEG_START, NEG_STOP, mpfr::RoundingMode::Downward);
+}
+
+TEST_F(LlvmLibcSinfExhaustiveTest, NegativeRangeRoundTowardZero) {
+ test_full_range(NEG_START, NEG_STOP, mpfr::RoundingMode::TowardZero);
}
diff --git a/libc/test/src/math/sinf_test.cpp b/libc/test/src/math/sinf_test.cpp
index 29c3f581ced72..40dcc4d4f615b 100644
--- a/libc/test/src/math/sinf_test.cpp
+++ b/libc/test/src/math/sinf_test.cpp
@@ -51,26 +51,70 @@ TEST(LlvmLibcSinfTest, InFloatRange) {
float x = float(FPBits(v));
if (isnan(x) || isinf(x))
continue;
- ASSERT_MPFR_MATCH(mpfr::Operation::Sin, x, __llvm_libc::sinf(x), 1.0);
+ ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sin, x,
+ __llvm_libc::sinf(x), 0.5);
}
}
TEST(LlvmLibcSinfTest, SpecificBitPatterns) {
- float x = float(FPBits(uint32_t(0xc70d39a1)));
- EXPECT_MPFR_MATCH(mpfr::Operation::Sin, x, __llvm_libc::sinf(x), 1.0);
+ constexpr int N = 36;
+ constexpr uint32_t INPUTS[N] = {
+ 0x3f06'0a92U, // x = pi/6
+ 0x3f3a'dc51U, // x = 0x1.75b8a2p-1f
+ 0x3f49'0fdbU, // x = pi/4
+ 0x3f86'0a92U, // x = pi/3
+ 0x3fa7'832aU, // x = 0x1.4f0654p+0f
+ 0x3fc9'0fdbU, // x = pi/2
+ 0x4017'1973U, // x = 0x1.2e32e6p+1f
+ 0x4049'0fdbU, // x = pi
+ 0x4096'cbe4U, // x = 0x1.2d97c8p+2f
+ 0x40c9'0fdbU, // x = 2*pi
+ 0x433b'7490U, // x = 0x1.76e92p+7f
+ 0x437c'e5f1U, // x = 0x1.f9cbe2p+7f
+ 0x4619'9998U, // x = 0x1.33333p+13f
+ 0x474d'246fU, // x = 0x1.9a48dep+15f
+ 0x4afd'ece4U, // x = 0x1.fbd9c8p+22f
+ 0x4c23'32e9U, // x = 0x1.4665d2p+25f
+ 0x50a3'e87fU, // x = 0x1.47d0fep+34f
+ 0x5239'47f6U, // x = 0x1.728fecp+37f
+ 0x53b1'46a6U, // x = 0x1.628d4cp+40f
+ 0x55ca'fb2aU, // x = 0x1.95f654p+44f
+ 0x588e'f060U, // x = 0x1.1de0cp+50f
+ 0x5c07'bcd0U, // x = 0x1.0f79ap+57f
+ 0x5ebc'fddeU, // x = 0x1.79fbbcp+62f
+ 0x5fa6'eba7U, // x = 0x1.4dd74ep+64f
+ 0x61a4'0b40U, // x = 0x1.48168p+68f
+ 0x6386'134eU, // x = 0x1.0c269cp+72f
+ 0x6589'8498U, // x = 0x1.13093p+76f
+ 0x6600'0001U, // x = 0x1.000002p+77f
+ 0x664e'46e4U, // x = 0x1.9c8dc8p+77f
+ 0x66b0'14aaU, // x = 0x1.602954p+78f
+ 0x67a9'242bU, // x = 0x1.524856p+80f
+ 0x6a19'76f1U, // x = 0x1.32ede2p+85f
+ 0x6c55'da58U, // x = 0x1.abb4bp+89f
+ 0x6f79'be45U, // x = 0x1.f37c8ap+95f
+ 0x7276'69d4U, // x = 0x1.ecd3a8p+101f
+ 0x7758'4625U, // x = 0x1.b08c4ap+111f
+ };
+
+ for (int i = 0; i < N; ++i) {
+ float x = float(FPBits(INPUTS[i]));
+ EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sin, x,
+ __llvm_libc::sinf(x), 0.5);
+ EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sin, -x,
+ __llvm_libc::sinf(-x), 0.5);
+ }
}
// For small values, sin(x) is x.
TEST(LlvmLibcSinfTest, SmallValues) {
- float x = float(FPBits(uint32_t(0x17800000)));
- float result = __llvm_libc::sinf(x);
- EXPECT_MPFR_MATCH(mpfr::Operation::Sin, x, result, 1.0);
- EXPECT_FP_EQ(x, result);
-
- x = float(FPBits(uint32_t(0x00400000)));
- result = __llvm_libc::sinf(x);
- EXPECT_MPFR_MATCH(mpfr::Operation::Sin, x, result, 1.0);
- EXPECT_FP_EQ(x, result);
+ float x = float(FPBits(0x1780'0000U));
+ EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sin, x, __llvm_libc::sinf(x),
+ 0.5);
+
+ x = float(FPBits(0x0040'0000U));
+ EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sin, x, __llvm_libc::sinf(x),
+ 0.5);
}
// SDCOMP-26094: check sinf in the cases for which the range reducer
@@ -78,6 +122,7 @@ TEST(LlvmLibcSinfTest, SmallValues) {
TEST(LlvmLibcSinfTest, SDCOMP_26094) {
for (uint32_t v : SDCOMP26094_VALUES) {
float x = float(FPBits((v)));
- EXPECT_MPFR_MATCH(mpfr::Operation::Sin, x, __llvm_libc::sinf(x), 1.0);
+ EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sin, x,
+ __llvm_libc::sinf(x), 0.5);
}
}
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index 003929d986de1..aaf1f2389c0f3 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -161,6 +161,7 @@ fputil_common_hdrs = [
"src/__support/FPUtil/NormalFloat.h",
"src/__support/FPUtil/PlatformDefs.h",
"src/__support/FPUtil/builtin_wrappers.h",
+ "src/__support/FPUtil/except_value_utils.h",
]
fputil_hdrs = selects.with_or({
@@ -460,6 +461,21 @@ cc_library(
],
)
+cc_library(
+ name = "range_reduction",
+ hdrs = [
+ "src/math/generic/range_reduction.h",
+ "src/math/generic/range_reduction_fma.h",
+ ],
+ deps = [
+ ":__support_fputil",
+ ":__support_fputil_fma",
+ ":__support_fputil_multiply_add",
+ ":__support_fputil_nearest_integer",
+ ":libc_root",
+ ],
+)
+
libc_math_function(
name = "expm1f",
additional_deps = [
@@ -635,8 +651,10 @@ libc_math_function(
libc_math_function(
name = "sinf",
additional_deps = [
- ":math_utils",
- ":sincosf_utils",
+ ":__support_fputil_fma",
+ ":__support_fputil_multiply_add",
+ ":__support_fputil_polyeval",
+ ":range_reduction",
],
)
More information about the libc-commits
mailing list