[libc-commits] [libc] [llvm] [libc][math][c23] Add rsqrtf16() function (PR #137545)
Anton Shepelev via libc-commits
libc-commits at lists.llvm.org
Fri Sep 12 17:09:05 PDT 2025
https://github.com/amemov updated https://github.com/llvm/llvm-project/pull/137545
>From 43fefedc6b8c644fb71aa5a12b17e9c76aac6cef Mon Sep 17 00:00:00 2001
From: amemov <shepelev777 at gmail.com>
Date: Sun, 27 Apr 2025 19:54:02 +0000
Subject: [PATCH 1/7] Initial implementation of rsqrtf16 - Defined and declared
rsqrtf16 - Added entrypoints to support rsqrt in float16 format - Added tests
- both unit and exhaustive - Added MPFR support for Rsqrt to verify the test
results
TODO:
- Write computation as a polynomial in the end of rsqrtf16.cpp
- Check if errors are generated correctly (Reference from C23: The rsqrt functions compute the reciprocal of the nonnegative square root of the argument. A domain error occurs if the argument is less than zero. A pole error may occur if the argument equals zero.)
---
libc/config/linux/x86_64/entrypoints.txt | 1 +
libc/docs/headers/math/index.rst | 2 +-
libc/include/math.yaml | 7 +++
libc/src/math/CMakeLists.txt | 2 +
libc/src/math/generic/CMakeLists.txt | 19 ++++++
libc/src/math/generic/rsqrtf16.cpp | 67 ++++++++++++++++++++++
libc/src/math/rsqrtf16.h | 21 +++++++
libc/test/src/math/CMakeLists.txt | 11 ++++
libc/test/src/math/rsqrtf16_test.cpp | 42 ++++++++++++++
libc/test/src/math/smoke/CMakeLists.txt | 11 ++++
libc/test/src/math/smoke/rsqrtf16_test.cpp | 38 ++++++++++++
libc/utils/MPFRWrapper/MPCommon.cpp | 6 ++
libc/utils/MPFRWrapper/MPCommon.h | 1 +
libc/utils/MPFRWrapper/MPFRUtils.cpp | 2 +
libc/utils/MPFRWrapper/MPFRUtils.h | 1 +
15 files changed, 230 insertions(+), 1 deletion(-)
create mode 100644 libc/src/math/generic/rsqrtf16.cpp
create mode 100644 libc/src/math/rsqrtf16.h
create mode 100644 libc/test/src/math/rsqrtf16_test.cpp
create mode 100644 libc/test/src/math/smoke/rsqrtf16_test.cpp
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 0e4fbdd2f2f57..dc3120ca913a3 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -740,6 +740,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
libc.src.math.rintf16
libc.src.math.roundevenf16
libc.src.math.roundf16
+ libc.src.math.rsqrtf16
libc.src.math.scalblnf16
libc.src.math.scalbnf16
libc.src.math.setpayloadf16
diff --git a/libc/docs/headers/math/index.rst b/libc/docs/headers/math/index.rst
index cf88e6237d1e3..a29e70a7b625b 100644
--- a/libc/docs/headers/math/index.rst
+++ b/libc/docs/headers/math/index.rst
@@ -333,7 +333,7 @@ Higher Math Functions
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| rootn | | | | | | 7.12.7.8 | F.10.4.8 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| rsqrt | | | | | | 7.12.7.9 | F.10.4.9 |
+| rsqrt | | | | |check| | | 7.12.7.9 | F.10.4.9 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
| sin | |check| | |check| | | |check| | | 7.12.4.6 | F.10.1.6 |
+-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
diff --git a/libc/include/math.yaml b/libc/include/math.yaml
index b725c33c0bb06..35aee5f54d8cd 100644
--- a/libc/include/math.yaml
+++ b/libc/include/math.yaml
@@ -2237,6 +2237,13 @@ functions:
return_type: long double
arguments:
- type: long double
+ - name: rsqrtf16
+ standards:
+ - stdc
+ return_type: _Float16
+ arguments:
+ - type: _Float16
+ guard: LIBC_TYPES_HAS_FLOAT16
- name: scalbln
standards:
- stdc
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index 5161b2b61aa94..689327b2f1249 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -467,6 +467,8 @@ add_math_entrypoint_object(roundevenl)
add_math_entrypoint_object(roundevenf16)
add_math_entrypoint_object(roundevenf128)
+add_math_entrypoint_object(rsqrtf16)
+
add_math_entrypoint_object(scalbln)
add_math_entrypoint_object(scalblnf)
add_math_entrypoint_object(scalblnl)
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 25f583035fbc2..ae0977ce255c2 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -955,6 +955,25 @@ add_entrypoint_object(
libc.src.__support.FPUtil.nearest_integer_operations
)
+add_entrypoint_object(
+ rsqrtf16
+ SRCS
+ rsqrtf16.cpp
+ HDRS
+ ../rsqrtf16.h
+ DEPENDS
+ libc.hdr.errno_macros
+ libc.hdr.fenv_macros
+ libc.src.__support.FPUtil.cast
+ libc.src.__support.FPUtil.fenv_impl
+ libc.src.__support.FPUtil.fp_bits
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.FPUtil.polyeval
+ libc.src.__support.FPUtil.sqrt
+ libc.src.__support.macros.optimization
+ libc.src.__support.macros.properties.types
+)
+
add_entrypoint_object(
lround
SRCS
diff --git a/libc/src/math/generic/rsqrtf16.cpp b/libc/src/math/generic/rsqrtf16.cpp
new file mode 100644
index 0000000000000..930c8f6fc7b4e
--- /dev/null
+++ b/libc/src/math/generic/rsqrtf16.cpp
@@ -0,0 +1,67 @@
+//===-- Half-precision rsqrt function -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception.
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/rsqrtf16.h"
+#include "hdr/errno_macros.h"
+#include "hdr/fenv_macros.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/cast.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/sqrt.h"
+#include "src/__support/macros/optimization.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(float16, rsqrtf16, (float16 x)) {
+ using FPBits = fputil::FPBits<float16>;
+ FPBits xbits(x);
+
+ uint16_t x_u = xbits.uintval();
+ uint16_t x_abs = x_u & 0x7fff;
+ uint16_t x_sign = x_u >> 15;
+
+ // x is NaN
+ if (LIBC_UNLIKELY(xbits.is_nan())) {
+ if (xbits.is_signaling_nan()) {
+ fputil::raise_except_if_required(FE_INVALID);
+ return FPBits::quiet_nan().get_val();
+ }
+ return x;
+ }
+
+ // |x| = 0
+ if (LIBC_UNLIKELY(x_abs == 0x0)) {
+ fputil::raise_except_if_required(FE_DIVBYZERO);
+ fputil::set_errno_if_required(ERANGE);
+ return FPBits::quiet_nan().get_val();
+ }
+
+ // -inf <= x < 0
+ if (LIBC_UNLIKELY(x_sign == 1)) {
+ fputil::raise_except_if_required(FE_INVALID);
+ fputil::set_errno_if_required(EDOM);
+ return FPBits::quiet_nan().get_val();
+ }
+
+ // x = +inf => rsqrt(x) = 0
+ if (LIBC_UNLIKELY(xbits.is_inf())) {
+ return fputil::cast<float16>(0.0f);
+ }
+
+ // x = 1 => rsqrt(x) = 1
+ if (LIBC_UNLIKELY(x_u == 0x1)) {
+ return fputil::cast<float16>(1.0f);
+ }
+
+ // x is valid, estimate the result - below is temporary solution for just testing
+ float xf = x;
+ return fputil::cast<float16>(1.0f / xf);
+}
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/rsqrtf16.h b/libc/src/math/rsqrtf16.h
new file mode 100644
index 0000000000000..c88ab5256ce88
--- /dev/null
+++ b/libc/src/math/rsqrtf16.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for rsqrtf16 ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_RSQRTF16_H
+#define LLVM_LIBC_SRC_MATH_RSQRTF16_H
+
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+float16 rsqrtf16(float16 x);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_MATH_RSQRTF16_H
diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
index ebf9f1c86cf15..8daf59032622c 100644
--- a/libc/test/src/math/CMakeLists.txt
+++ b/libc/test/src/math/CMakeLists.txt
@@ -1560,6 +1560,17 @@ add_fp_unittest(
libc.src.math.sqrtl
)
+add_fp_unittest(
+ rsqrtf16_test
+ NEED_MPFR
+ SUITE
+ libc-math-unittests
+ SRCS
+ rsqrtf16_test.cpp
+ DEPENDS
+ libc.src.math.rsqrtf16
+)
+
add_fp_unittest(
sqrtf16_test
NEED_MPFR
diff --git a/libc/test/src/math/rsqrtf16_test.cpp b/libc/test/src/math/rsqrtf16_test.cpp
new file mode 100644
index 0000000000000..ec22df5bd31c7
--- /dev/null
+++ b/libc/test/src/math/rsqrtf16_test.cpp
@@ -0,0 +1,42 @@
+//===-- Exhaustive test for rsqrtf16 --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/rsqrtf16.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+
+using LlvmLibcRsqrtf16Test = LIBC_NAMESPACE::testing::FPTest<float16>;
+
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+
+// Range: [0, Inf]
+static constexpr uint16_t POS_START = 0x0000U;
+static constexpr uint16_t POS_STOP = 0x7c00U;
+
+// Range: [-Inf, 0]
+static constexpr uint16_t NEG_START = 0x8000U;
+static constexpr uint16_t NEG_STOP = 0xfc00U;
+
+TEST_F(LlvmLibcRsqrtf16Test, PositiveRange) {
+ for (uint16_t v = POS_START; v <= POS_STOP; ++v) {
+ float16 x = FPBits(v).get_val();
+
+ EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Rsqrt, x,
+ LIBC_NAMESPACE::rsqrtf16(x), 0.5);
+ }
+}
+
+TEST_F(LlvmLibcRsqrtf16Test, NegativeRange) {
+ for (uint16_t v = NEG_START; v <= NEG_STOP; ++v) {
+ float16 x = FPBits(v).get_val();
+
+ EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Rsqrt, x,
+ LIBC_NAMESPACE::rsqrtf16(x), 0.5);
+ }
+}
\ No newline at end of file
diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt
index 79b189159e9d8..fe5d8a6b3b864 100644
--- a/libc/test/src/math/smoke/CMakeLists.txt
+++ b/libc/test/src/math/smoke/CMakeLists.txt
@@ -2958,6 +2958,17 @@ add_fp_unittest(
libc.src.math.sqrtl
)
+add_fp_unittest(
+ rsqrtf16_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ rsqrtf16_test.cpp
+ DEPENDS
+ libc.src.errno.errno
+ libc.src.math.rsqrtf16
+)
+
add_fp_unittest(
sqrtf16_test
SUITE
diff --git a/libc/test/src/math/smoke/rsqrtf16_test.cpp b/libc/test/src/math/smoke/rsqrtf16_test.cpp
new file mode 100644
index 0000000000000..eb429d3dbf6c8
--- /dev/null
+++ b/libc/test/src/math/smoke/rsqrtf16_test.cpp
@@ -0,0 +1,38 @@
+//===-- Unittests for rsqrtf16 --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception.
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/errno/libc_errno.h"
+#include "src/math/rsqrtf16.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
+
+using LlvmLibcRsqrtf16Test = LIBC_NAMESPACE::testing::FPTest<float16>;
+TEST_F(LlvmLibcRsqrtf16Test, SpecialNumbers) {
+ LIBC_NAMESPACE::libc_errno = 0;
+ EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::rsqrtf16(aNaN));
+ EXPECT_MATH_ERRNO(0);
+
+ EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::rsqrtf16(sNaN),
+ FE_INVALID);
+ EXPECT_MATH_ERRNO(0);
+
+ EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::rsqrtf16(0.0f));
+ EXPECT_MATH_ERRNO(ERANGE);
+
+ EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::rsqrtf16(1.0f));
+ EXPECT_MATH_ERRNO(0);
+
+ EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::rsqrtf16(inf));
+ EXPECT_MATH_ERRNO(0);
+
+ EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::rsqrtf16(neg_inf));
+ EXPECT_MATH_ERRNO(EDOM);
+
+ EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::rsqrtf16(-2.0f));
+ EXPECT_MATH_ERRNO(EDOM);
+}
\ No newline at end of file
diff --git a/libc/utils/MPFRWrapper/MPCommon.cpp b/libc/utils/MPFRWrapper/MPCommon.cpp
index ccd4d2d01a4e2..1a78ca5c24ba2 100644
--- a/libc/utils/MPFRWrapper/MPCommon.cpp
+++ b/libc/utils/MPFRWrapper/MPCommon.cpp
@@ -366,6 +366,12 @@ MPFRNumber MPFRNumber::rint(mpfr_rnd_t rnd) const {
return result;
}
+MPFRNumber MPFRNumber::rsqrt() const {
+ MPFRNumber result(*this);
+ mpfr_rec_sqrt(result.value, value, mpfr_rounding);
+ return result;
+}
+
MPFRNumber MPFRNumber::mod_2pi() const {
MPFRNumber result(0.0, 1280);
MPFRNumber _2pi(0.0, 1280);
diff --git a/libc/utils/MPFRWrapper/MPCommon.h b/libc/utils/MPFRWrapper/MPCommon.h
index 99cb7ec66a2ca..43218ee7662db 100644
--- a/libc/utils/MPFRWrapper/MPCommon.h
+++ b/libc/utils/MPFRWrapper/MPCommon.h
@@ -216,6 +216,7 @@ class MPFRNumber {
bool round_to_long(long &result) const;
bool round_to_long(mpfr_rnd_t rnd, long &result) const;
MPFRNumber rint(mpfr_rnd_t rnd) const;
+ MPFRNumber rsqrt() const;
MPFRNumber mod_2pi() const;
MPFRNumber mod_pi_over_2() const;
MPFRNumber mod_pi_over_4() const;
diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp
index 8853f96ef8f92..a68d21650f004 100644
--- a/libc/utils/MPFRWrapper/MPFRUtils.cpp
+++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp
@@ -86,6 +86,8 @@ unary_operation(Operation op, InputType input, unsigned int precision,
return mpfrInput.round();
case Operation::RoundEven:
return mpfrInput.roundeven();
+ case Operation::Rsqrt:
+ return mpfrInput.rsqrt();
case Operation::Sin:
return mpfrInput.sin();
case Operation::Sinpi:
diff --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h
index c77a6aa3adeae..532b2c8d9d819 100644
--- a/libc/utils/MPFRWrapper/MPFRUtils.h
+++ b/libc/utils/MPFRWrapper/MPFRUtils.h
@@ -55,6 +55,7 @@ enum class Operation : int {
ModPIOver4,
Round,
RoundEven,
+ Rsqrt,
Sin,
Sinpi,
Sinh,
>From 10f24106d6947ce0c8408c527e81d9201589be10 Mon Sep 17 00:00:00 2001
From: amemov <shepelev777 at gmail.com>
Date: Sun, 27 Apr 2025 20:07:27 +0000
Subject: [PATCH 2/7] Clang-formated the files
---
libc/src/math/generic/rsqrtf16.cpp | 9 +++++----
libc/test/src/math/smoke/rsqrtf16_test.cpp | 3 +--
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/libc/src/math/generic/rsqrtf16.cpp b/libc/src/math/generic/rsqrtf16.cpp
index 930c8f6fc7b4e..8e97ec896aa0c 100644
--- a/libc/src/math/generic/rsqrtf16.cpp
+++ b/libc/src/math/generic/rsqrtf16.cpp
@@ -35,7 +35,7 @@ LLVM_LIBC_FUNCTION(float16, rsqrtf16, (float16 x)) {
}
return x;
}
-
+
// |x| = 0
if (LIBC_UNLIKELY(x_abs == 0x0)) {
fputil::raise_except_if_required(FE_DIVBYZERO);
@@ -44,7 +44,7 @@ LLVM_LIBC_FUNCTION(float16, rsqrtf16, (float16 x)) {
}
// -inf <= x < 0
- if (LIBC_UNLIKELY(x_sign == 1)) {
+ if (LIBC_UNLIKELY(x_sign == 1)) {
fputil::raise_except_if_required(FE_INVALID);
fputil::set_errno_if_required(EDOM);
return FPBits::quiet_nan().get_val();
@@ -54,13 +54,14 @@ LLVM_LIBC_FUNCTION(float16, rsqrtf16, (float16 x)) {
if (LIBC_UNLIKELY(xbits.is_inf())) {
return fputil::cast<float16>(0.0f);
}
-
+
// x = 1 => rsqrt(x) = 1
if (LIBC_UNLIKELY(x_u == 0x1)) {
return fputil::cast<float16>(1.0f);
}
- // x is valid, estimate the result - below is temporary solution for just testing
+ // x is valid, estimate the result - below is temporary solution for just
+ // testing
float xf = x;
return fputil::cast<float16>(1.0f / xf);
}
diff --git a/libc/test/src/math/smoke/rsqrtf16_test.cpp b/libc/test/src/math/smoke/rsqrtf16_test.cpp
index eb429d3dbf6c8..d50d8ee0ad2ff 100644
--- a/libc/test/src/math/smoke/rsqrtf16_test.cpp
+++ b/libc/test/src/math/smoke/rsqrtf16_test.cpp
@@ -17,8 +17,7 @@ TEST_F(LlvmLibcRsqrtf16Test, SpecialNumbers) {
EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::rsqrtf16(aNaN));
EXPECT_MATH_ERRNO(0);
- EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::rsqrtf16(sNaN),
- FE_INVALID);
+ EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::rsqrtf16(sNaN), FE_INVALID);
EXPECT_MATH_ERRNO(0);
EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::rsqrtf16(0.0f));
>From 8d15cfac9a39c497c951b9fdd6b73583d63020f6 Mon Sep 17 00:00:00 2001
From: amemov <shepelev777 at gmail.com>
Date: Sun, 27 Apr 2025 22:09:15 +0000
Subject: [PATCH 3/7] Replaced the computation for valid X with polynomial
approximation
---
libc/src/math/generic/rsqrtf16.cpp | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/libc/src/math/generic/rsqrtf16.cpp b/libc/src/math/generic/rsqrtf16.cpp
index 8e97ec896aa0c..2ae026499b89c 100644
--- a/libc/src/math/generic/rsqrtf16.cpp
+++ b/libc/src/math/generic/rsqrtf16.cpp
@@ -60,9 +60,12 @@ LLVM_LIBC_FUNCTION(float16, rsqrtf16, (float16 x)) {
return fputil::cast<float16>(1.0f);
}
- // x is valid, estimate the result - below is temporary solution for just
- // testing
+ // x is valid, estimate the result
+ // 3-degree polynomial generated using Sollya
+ // P = fpminimax(1/sqrt(x), [|1, 2, 3|], [|SG...|], [0.5, 1]);
float xf = x;
- return fputil::cast<float16>(1.0f / xf);
+ float result =
+ fputil::polyeval(xf, 0x1.d42408p2f, -0x1.7cc4fep3f, 0x1.66cb6ap2f);
+ return fputil::cast<float16>(result);
}
} // namespace LIBC_NAMESPACE_DECL
>From e49620105adb5eaab40a9c4a2cdade8469121bf3 Mon Sep 17 00:00:00 2001
From: amemov <shepelev777 at gmail.com>
Date: Tue, 29 Apr 2025 21:37:45 +0000
Subject: [PATCH 4/7] Added range reduction to the approximation
---
libc/src/math/generic/CMakeLists.txt | 2 +-
libc/src/math/generic/rsqrtf16.cpp | 39 ++++++++++++++++++----
libc/test/src/math/rsqrtf16_test.cpp | 2 +-
libc/test/src/math/smoke/rsqrtf16_test.cpp | 4 +--
4 files changed, 36 insertions(+), 11 deletions(-)
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index ae0977ce255c2..9335e7cbcb270 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -968,8 +968,8 @@ add_entrypoint_object(
libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.FPUtil.manipulation_functions
libc.src.__support.FPUtil.polyeval
- libc.src.__support.FPUtil.sqrt
libc.src.__support.macros.optimization
libc.src.__support.macros.properties.types
)
diff --git a/libc/src/math/generic/rsqrtf16.cpp b/libc/src/math/generic/rsqrtf16.cpp
index 2ae026499b89c..5bf4ab3e46fda 100644
--- a/libc/src/math/generic/rsqrtf16.cpp
+++ b/libc/src/math/generic/rsqrtf16.cpp
@@ -11,10 +11,10 @@
#include "hdr/fenv_macros.h"
#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/ManipulationFunctions.h"
#include "src/__support/FPUtil/PolyEval.h"
#include "src/__support/FPUtil/cast.h"
-#include "src/__support/FPUtil/multiply_add.h"
-#include "src/__support/FPUtil/sqrt.h"
+#include "src/__support/FPUtil/multiply_add.h" // to remove
#include "src/__support/macros/optimization.h"
namespace LIBC_NAMESPACE_DECL {
@@ -40,7 +40,7 @@ LLVM_LIBC_FUNCTION(float16, rsqrtf16, (float16 x)) {
if (LIBC_UNLIKELY(x_abs == 0x0)) {
fputil::raise_except_if_required(FE_DIVBYZERO);
fputil::set_errno_if_required(ERANGE);
- return FPBits::quiet_nan().get_val();
+ return FPBits::inf(Sign::POS).get_val();
}
// -inf <= x < 0
@@ -61,11 +61,36 @@ LLVM_LIBC_FUNCTION(float16, rsqrtf16, (float16 x)) {
}
// x is valid, estimate the result
- // 3-degree polynomial generated using Sollya
- // P = fpminimax(1/sqrt(x), [|1, 2, 3|], [|SG...|], [0.5, 1]);
+ // Range reduction:
+ // x can be expressed as m*2^e, where e - int exponent and m - mantissa
+ // rsqrtf16(x) = rsqrtf16(m*2^e)
+ // rsqrtf16(m*2^e) = 1/sqrt(m) * 1/sqrt(2^e) = 1/sqrt(m) * 1/2^(e/2)
+ // 1/sqrt(m) * 1/2^(e/2) = 1/sqrt(m) * 2^(-e/2)
+
float xf = x;
- float result =
- fputil::polyeval(xf, 0x1.d42408p2f, -0x1.7cc4fep3f, 0x1.66cb6ap2f);
+ int exponent;
+ float mantissa = fputil::frexp(xf, exponent);
+
+ // 6-degree polynomial generated using Sollya
+ // P = fpminimax(1/sqrt(x), [|0,1,2,3,4,5|], [|SG...|], [0.5, 1]);
+ float interm =
+ fputil::polyeval(mantissa, 0x1.9c81c4p1f, -0x1.e2c57cp2f, 0x1.91e8bp3f,
+ -0x1.899954p3f, 0x1.9edcp2f, -0x1.6bd93cp0f);
+
+ // Round (-e/2)
+ int exp_floored = -(exponent >> 1);
+
+ // rsqrt(x) = 1/sqrt(mantissa) * 2^(-e/2)
+ // rsqrt(x) = P(mantissa) * 2*(exp_floored)
+ float result = fputil::ldexp(interm, exp_floored);
+
+ // Handle the case where exponent is odd
+ if (exponent & 1) {
+ const float ONE_OVER_SQRT2 =
+ 0x1.6a09e667f3bcc908b2fb1366ea957d3e3adec1751p-1f;
+ result *= ONE_OVER_SQRT2;
+ }
+
return fputil::cast<float16>(result);
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/src/math/rsqrtf16_test.cpp b/libc/test/src/math/rsqrtf16_test.cpp
index ec22df5bd31c7..d2f3fe8f49b92 100644
--- a/libc/test/src/math/rsqrtf16_test.cpp
+++ b/libc/test/src/math/rsqrtf16_test.cpp
@@ -39,4 +39,4 @@ TEST_F(LlvmLibcRsqrtf16Test, NegativeRange) {
EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Rsqrt, x,
LIBC_NAMESPACE::rsqrtf16(x), 0.5);
}
-}
\ No newline at end of file
+}
diff --git a/libc/test/src/math/smoke/rsqrtf16_test.cpp b/libc/test/src/math/smoke/rsqrtf16_test.cpp
index d50d8ee0ad2ff..8e69027e67e13 100644
--- a/libc/test/src/math/smoke/rsqrtf16_test.cpp
+++ b/libc/test/src/math/smoke/rsqrtf16_test.cpp
@@ -20,7 +20,7 @@ TEST_F(LlvmLibcRsqrtf16Test, SpecialNumbers) {
EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::rsqrtf16(sNaN), FE_INVALID);
EXPECT_MATH_ERRNO(0);
- EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::rsqrtf16(0.0f));
+ EXPECT_FP_EQ(inf, LIBC_NAMESPACE::rsqrtf16(0.0f));
EXPECT_MATH_ERRNO(ERANGE);
EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::rsqrtf16(1.0f));
@@ -34,4 +34,4 @@ TEST_F(LlvmLibcRsqrtf16Test, SpecialNumbers) {
EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::rsqrtf16(-2.0f));
EXPECT_MATH_ERRNO(EDOM);
-}
\ No newline at end of file
+}
>From ba9bdd2681f48dd353398c22501c8fa5390e83af Mon Sep 17 00:00:00 2001
From: amemov <shepelev777 at gmail.com>
Date: Tue, 20 May 2025 19:56:14 +0000
Subject: [PATCH 5/7] Added Newton-Raphson iterations -The accuracy improved
drastically, but it still fails
---
libc/src/math/generic/CMakeLists.txt | 2 +-
libc/src/math/generic/rsqrtf16.cpp | 35 +++++++++++++++++++---------
2 files changed, 25 insertions(+), 12 deletions(-)
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 9335e7cbcb270..11ffd8d69829f 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -967,7 +967,7 @@ add_entrypoint_object(
libc.src.__support.FPUtil.cast
libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
- libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.FPUtil.fma
libc.src.__support.FPUtil.manipulation_functions
libc.src.__support.FPUtil.polyeval
libc.src.__support.macros.optimization
diff --git a/libc/src/math/generic/rsqrtf16.cpp b/libc/src/math/generic/rsqrtf16.cpp
index 5bf4ab3e46fda..806328b60aca2 100644
--- a/libc/src/math/generic/rsqrtf16.cpp
+++ b/libc/src/math/generic/rsqrtf16.cpp
@@ -10,11 +10,11 @@
#include "hdr/errno_macros.h"
#include "hdr/fenv_macros.h"
#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FMA.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/ManipulationFunctions.h"
#include "src/__support/FPUtil/PolyEval.h"
#include "src/__support/FPUtil/cast.h"
-#include "src/__support/FPUtil/multiply_add.h" // to remove
#include "src/__support/macros/optimization.h"
namespace LIBC_NAMESPACE_DECL {
@@ -55,11 +55,6 @@ LLVM_LIBC_FUNCTION(float16, rsqrtf16, (float16 x)) {
return fputil::cast<float16>(0.0f);
}
- // x = 1 => rsqrt(x) = 1
- if (LIBC_UNLIKELY(x_u == 0x1)) {
- return fputil::cast<float16>(1.0f);
- }
-
// x is valid, estimate the result
// Range reduction:
// x can be expressed as m*2^e, where e - int exponent and m - mantissa
@@ -72,23 +67,41 @@ LLVM_LIBC_FUNCTION(float16, rsqrtf16, (float16 x)) {
float mantissa = fputil::frexp(xf, exponent);
// 6-degree polynomial generated using Sollya
- // P = fpminimax(1/sqrt(x), [|0,1,2,3,4,5|], [|SG...|], [0.5, 1]);
+ // bigger polynomial doesn't generate better results-> the current one
+ // produces the least number of errors but still errors are presents P =
+ // fpminimax(1/(sqrt(x)), [|0,1,2,3,4,5|], [|SG...|], [0.5, 1]);
float interm =
fputil::polyeval(mantissa, 0x1.9c81c4p1f, -0x1.e2c57cp2f, 0x1.91e8bp3f,
-0x1.899954p3f, 0x1.9edcp2f, -0x1.6bd93cp0f);
+ // Apply one Newton-Raphson iteration to refine the approximation of
+ // 1/sqrt(mantissa) y_new = y_old * (1.5 - 0.5 * mantissa * y_old^2) Using
+ // fputil::fma for potential precision benefits in the factor calculation
+ float interm_sq = interm * interm;
+ float factor = fputil::fma<float>(-0.5f * mantissa, interm_sq, 1.5f);
+ float interm_refined = interm * factor; // Final multiplication
+
+ // Apply a second Newton-Raphson iteration
+ // y_new = y_old * (1.5 - 0.5 * mantissa * y_old^2)
+ // y_old is now interm_refined
+ float interm_refined_sq = interm_refined * interm_refined;
+ float factor2 = fputil::fma<float>(-0.5f * mantissa, interm_refined_sq, 1.5f);
+ float interm_refined2 = interm_refined * factor2;
+
// Round (-e/2)
int exp_floored = -(exponent >> 1);
// rsqrt(x) = 1/sqrt(mantissa) * 2^(-e/2)
// rsqrt(x) = P(mantissa) * 2*(exp_floored)
- float result = fputil::ldexp(interm, exp_floored);
+ // float result = fputil::ldexp(interm, exp_floored);
+ float result = fputil::ldexp(interm_refined2, exp_floored);
// Handle the case where exponent is odd
if (exponent & 1) {
- const float ONE_OVER_SQRT2 =
- 0x1.6a09e667f3bcc908b2fb1366ea957d3e3adec1751p-1f;
- result *= ONE_OVER_SQRT2;
+ const float ONE_OVER_SQRT2 = 0x1.6a09e6p-1f;
+ // result *= ONE_OVER_SQRT2;
+ result = fputil::fma<float>(result, ONE_OVER_SQRT2,
+ 0.0f); // Use FMA for multiplication
}
return fputil::cast<float16>(result);
>From 3b1548aaeacaba8fb540330ed6d964e203fdba00 Mon Sep 17 00:00:00 2001
From: amemov <shepelev777 at gmail.com>
Date: Wed, 21 May 2025 20:28:22 +0000
Subject: [PATCH 6/7] Added separate handling for mantissa == 0.5f. Resulted in
fewer errors
---
libc/src/math/generic/rsqrtf16.cpp | 79 +++++++++++++++++-------------
1 file changed, 45 insertions(+), 34 deletions(-)
diff --git a/libc/src/math/generic/rsqrtf16.cpp b/libc/src/math/generic/rsqrtf16.cpp
index 806328b60aca2..6ad9f5f968772 100644
--- a/libc/src/math/generic/rsqrtf16.cpp
+++ b/libc/src/math/generic/rsqrtf16.cpp
@@ -66,42 +66,53 @@ LLVM_LIBC_FUNCTION(float16, rsqrtf16, (float16 x)) {
int exponent;
float mantissa = fputil::frexp(xf, exponent);
- // 6-degree polynomial generated using Sollya
- // bigger polynomial doesn't generate better results-> the current one
- // produces the least number of errors but still errors are presents P =
- // fpminimax(1/(sqrt(x)), [|0,1,2,3,4,5|], [|SG...|], [0.5, 1]);
- float interm =
- fputil::polyeval(mantissa, 0x1.9c81c4p1f, -0x1.e2c57cp2f, 0x1.91e8bp3f,
- -0x1.899954p3f, 0x1.9edcp2f, -0x1.6bd93cp0f);
-
- // Apply one Newton-Raphson iteration to refine the approximation of
- // 1/sqrt(mantissa) y_new = y_old * (1.5 - 0.5 * mantissa * y_old^2) Using
- // fputil::fma for potential precision benefits in the factor calculation
- float interm_sq = interm * interm;
- float factor = fputil::fma<float>(-0.5f * mantissa, interm_sq, 1.5f);
- float interm_refined = interm * factor; // Final multiplication
-
- // Apply a second Newton-Raphson iteration
- // y_new = y_old * (1.5 - 0.5 * mantissa * y_old^2)
- // y_old is now interm_refined
- float interm_refined_sq = interm_refined * interm_refined;
- float factor2 = fputil::fma<float>(-0.5f * mantissa, interm_refined_sq, 1.5f);
- float interm_refined2 = interm_refined * factor2;
-
- // Round (-e/2)
+ float result;
int exp_floored = -(exponent >> 1);
- // rsqrt(x) = 1/sqrt(mantissa) * 2^(-e/2)
- // rsqrt(x) = P(mantissa) * 2*(exp_floored)
- // float result = fputil::ldexp(interm, exp_floored);
- float result = fputil::ldexp(interm_refined2, exp_floored);
-
- // Handle the case where exponent is odd
- if (exponent & 1) {
- const float ONE_OVER_SQRT2 = 0x1.6a09e6p-1f;
- // result *= ONE_OVER_SQRT2;
- result = fputil::fma<float>(result, ONE_OVER_SQRT2,
- 0.0f); // Use FMA for multiplication
+ if (mantissa == 0.5f) {
+ // When mantissa is 0.5f, x was a power of 2 (or subnormal that normalizes this way).
+ // 1/sqrt(0.5f) = sqrt(2.0f) = 0x1.6a09e6p0f
+ // If exponent is odd (exponent = 2k + 1):
+ // rsqrt(x) = (1/sqrt(0.5)) * 2^(-(2k+1)/2) = sqrt(2) * 2^(-k-0.5)
+ // = sqrt(2) * 2^(-k) * (1/sqrt(2)) = 2^(-k)
+ // exp_floored = -((2k+1)>>1) = -(k) = -k
+ // So result = ldexp(1.0f, exp_floored)
+ // If exponent is even (exponent = 2k):
+ // rsqrt(x) = (1/sqrt(0.5)) * 2^(-2k/2) = sqrt(2) * 2^(-k)
+ // exp_floored = -((2k)>>1) = -(k) = -k
+ // So result = ldexp(sqrt(2.0f), exp_floored)
+ if (exponent & 1) {
+ result = fputil::ldexp(1.0f, exp_floored);
+ } else {
+ constexpr float SQRT_2_F = 0x1.6a09e6p0f; // sqrt(2.0f)
+ result = fputil::ldexp(SQRT_2_F, exp_floored);
+ }
+ } else {
+ // 6-degree polynomial generated using Sollya
+ // P = fpminimax(1/sqrt(x), [|0,1,2,3,4,5|], [|SG...|], [0.5, 1]);
+ float interm = fputil::polyeval(
+ mantissa, 0x1.9c81c4p1f, -0x1.e2c57cp2f, 0x1.91e8bp3f,
+ -0x1.899954p3f, 0x1.9edcp2f, -0x1.6bd93cp0f);
+
+ // Apply one Newton-Raphson iteration to refine the approximation of
+ // 1/sqrt(mantissa) y_new = y_old * (1.5 - 0.5 * mantissa * y_old^2) Using
+ // fputil::fma for potential precision benefits in the factor calculation
+ float interm_sq = interm * interm;
+ float factor = fputil::fma<float>(-0.5f * mantissa, interm_sq, 1.5f);
+ float interm_refined = interm * factor;
+
+ // Apply a second Newton-Raphson iteration
+ // y_new = y_old * (1.5 - 0.5 * mantissa * y_old^2)
+ float interm_refined_sq = interm_refined * interm_refined;
+ float factor2 =
+ fputil::fma<float>(-0.5f * mantissa, interm_refined_sq, 1.5f);
+ float interm_refined2 = interm_refined * factor2;
+
+ result = fputil::ldexp(interm_refined2, exp_floored);
+ if (exponent & 1) {
+ const float ONE_OVER_SQRT2 = 0x1.6a09e6p-1f;
+ result = fputil::fma<float>(result, ONE_OVER_SQRT2, 0.0f);
+ }
}
return fputil::cast<float16>(result);
>From d000dcc429c7f54ea584fd56699f1fbbabd28229 Mon Sep 17 00:00:00 2001
From: Anton Shepelev <shepelev777 at gmail.com>
Date: Fri, 12 Sep 2025 16:50:15 -0700
Subject: [PATCH 7/7] - Fixed ULP errors - Refactored the implementation to
match the proposal for constexpr - Added rsqrtf16 in Bazel build
---
libc/shared/math.h | 2 +
libc/shared/math/rsqrtf16.h | 30 ++++
libc/src/__support/math/CMakeLists.txt | 16 ++
libc/src/__support/math/rsqrtf16.h | 137 ++++++++++++++++++
libc/src/math/generic/CMakeLists.txt | 12 +-
libc/src/math/generic/rsqrtf16.cpp | 107 +-------------
libc/test/shared/CMakeLists.txt | 2 +
libc/test/shared/shared_math_test.cpp | 2 +
libc/test/src/math/smoke/rsqrtf16_test.cpp | 2 +-
.../llvm-project-overlay/libc/BUILD.bazel | 25 ++++
10 files changed, 219 insertions(+), 116 deletions(-)
create mode 100644 libc/shared/math/rsqrtf16.h
create mode 100644 libc/src/__support/math/rsqrtf16.h
diff --git a/libc/shared/math.h b/libc/shared/math.h
index 3714f380a27dc..0c564ee6f2dc8 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -45,4 +45,6 @@
#include "math/ldexpf128.h"
#include "math/ldexpf16.h"
+#include "math/rsqrtf16.h"
+
#endif // LLVM_LIBC_SHARED_MATH_H
diff --git a/libc/shared/math/rsqrtf16.h b/libc/shared/math/rsqrtf16.h
new file mode 100644
index 0000000000000..0d1ace3318a50
--- /dev/null
+++ b/libc/shared/math/rsqrtf16.h
@@ -0,0 +1,30 @@
+//===-- Shared rsqrtf16 function -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_RSQRTF16_H
+#define LLVM_LIBC_SHARED_MATH_RSQRTF16_H
+
+#include "include/llvm-libc-macros/float16-macros.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT16
+
+#include "shared/libc_common.h"
+#include "src/__support/math/rsqrtf16.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::rsqrtf16;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT16
+
+#endif // LLVM_LIBC_SHARED_MATH_RSQRTF16_H
+
diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt
index e1076edf1e61c..8d91900a10b6e 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -109,6 +109,22 @@ add_header_library(
libc.src.__support.macros.properties.types
)
+
+add_header_library(
+ rsqrtf16
+ HDRS
+ rsqrtf16.h
+ DEPENDS
+ libc.src.__support.FPUtil.cast
+ libc.src.__support.FPUtil.fenv_impl
+ libc.src.__support.FPUtil.fp_bits
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.FPUtil.polyeval
+ libc.src.__support.FPUtil.manipulation_functions
+ libc.src.__support.macros.optimization
+ libc.src.__support.macros.properties.types
+)
+
add_header_library(
asin_utils
HDRS
diff --git a/libc/src/__support/math/rsqrtf16.h b/libc/src/__support/math/rsqrtf16.h
new file mode 100644
index 0000000000000..dfbcf41ba2b07
--- /dev/null
+++ b/libc/src/__support/math/rsqrtf16.h
@@ -0,0 +1,137 @@
+//===-- Implementation header for rsqrtf16 ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_RSQRTF16_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_RSQRTF16_H
+
+#include "include/llvm-libc-macros/float16-macros.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT16
+
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/ManipulationFunctions.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/cast.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/macros/optimization.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace math {
+
+static constexpr float16 rsqrtf16(float16 x) {
+using FPBits = fputil::FPBits<float16>;
+ FPBits xbits(x);
+
+ uint16_t x_u = xbits.uintval();
+ uint16_t x_abs = x_u & 0x7fff;
+ uint16_t x_sign = x_u >> 15;
+
+ // x is NaN
+ if (LIBC_UNLIKELY(xbits.is_nan())) {
+ if (xbits.is_signaling_nan()) {
+ fputil::raise_except_if_required(FE_INVALID);
+ return FPBits::quiet_nan().get_val();
+ }
+ return x;
+ }
+
+ // |x| = 0
+ if (LIBC_UNLIKELY(x_abs == 0x0)) {
+ fputil::raise_except_if_required(FE_DIVBYZERO);
+ fputil::set_errno_if_required(ERANGE);
+ return FPBits::inf(Sign::POS).get_val();
+ }
+
+ // -inf <= x < 0
+ if (LIBC_UNLIKELY(x_sign == 1)) {
+ fputil::raise_except_if_required(FE_INVALID);
+ fputil::set_errno_if_required(EDOM);
+ return FPBits::quiet_nan().get_val();
+ }
+
+ // x = +inf => rsqrt(x) = 0
+ if (LIBC_UNLIKELY(xbits.is_inf())) {
+ return fputil::cast<float16>(0.0f);
+ }
+
+ // x is valid, estimate the result
+ // Range reduction:
+ // x can be expressed as m*2^e, where e - int exponent and m - mantissa
+ // rsqrtf16(x) = rsqrtf16(m*2^e)
+ // rsqrtf16(m*2^e) = 1/sqrt(m) * 1/sqrt(2^e) = 1/sqrt(m) * 1/2^(e/2)
+ // 1/sqrt(m) * 1/2^(e/2) = 1/sqrt(m) * 2^(-e/2)
+
+ // Compute in float throughout to minimize cost while preserving accuracy.
+ float xf = x;
+ int exponent = 0;
+ float mantissa = fputil::frexp(xf, exponent);
+
+ float result = 0.0f;
+ int exp_floored = -(exponent >> 1);
+
+ if (mantissa == 0.5f) {
+ // When mantissa is 0.5f, x was a power of 2 (or subnormal that normalizes this way).
+ // 1/sqrt(0.5f) = sqrt(2.0f).
+ // If exponent is odd (exponent = 2k + 1):
+ // rsqrt(x) = (1/sqrt(0.5)) * 2^(-(2k+1)/2) = sqrt(2) * 2^(-k-0.5)
+ // = sqrt(2) * 2^(-k) * (1/sqrt(2)) = 2^(-k)
+ // exp_floored = -((2k+1)>>1) = -(k) = -k
+ // So result = ldexp(1.0f, exp_floored)
+ // If exponent is even (exponent = 2k):
+ // rsqrt(x) = (1/sqrt(0.5)) * 2^(-2k/2) = sqrt(2) * 2^(-k)
+ // exp_floored = -((2k)>>1) = -(k) = -k
+ // So result = ldexp(sqrt(2.0f), exp_floored)
+ if (exponent & 1) {
+ result = fputil::ldexp(1.0f, exp_floored);
+ } else {
+ constexpr float SQRT_2_F = 0x1.6a09e6p0f; // sqrt(2.0f)
+ result = fputil::ldexp(SQRT_2_F, exp_floored);
+ }
+ } else {
+ // Degree-5 polynomial (float coefficients) generated with Sollya:
+ // P = fpminimax(1/sqrt(x) + 2^-28, 5, [|single...|], [0.5,1])
+ float y = fputil::polyeval(
+ mantissa, 0x1.9c81fap1f, -0x1.e2c63ap2f, 0x1.91e9b8p3f,
+ -0x1.899abep3f, 0x1.9eddeap2f, -0x1.6bdb48p0f);
+
+ // Newton-Raphson iteration in float (use multiply_add to leverage FMA when available):
+ float y2 = y * y;
+ float factor = fputil::multiply_add(-0.5f * mantissa, y2, 1.5f);
+ y = y * factor;
+
+
+ result = fputil::ldexp(y, exp_floored);
+ if (exponent & 1) {
+ constexpr float ONE_OVER_SQRT2 = 0x1.6a09e6p-1f; // 1/sqrt(2)
+ result *= ONE_OVER_SQRT2;
+ }
+
+ // Targeted post-correction: for the specific half-precision mantissa pattern
+ // M == 0x011F we observe a consistent -1 ULP bias across exponents.
+ // Apply a tiny upward nudge to cross the rounding boundary in all modes.
+ const uint16_t half_mantissa = static_cast<uint16_t>(x_abs & 0x3ff);
+ if (half_mantissa == 0x011F) {
+ // Nudge up to fix consistent -1 ULP at that mantissa boundary
+ result = fputil::multiply_add(result, 0x1.0p-21f, result); // result *= (1 + 2^-21)
+ } else if (half_mantissa == 0x0313) {
+ // Nudge down to fix +1 ULP under upward rounding at this mantissa boundary
+ result = fputil::multiply_add(result, -0x1.0p-21f, result); // result *= (1 - 2^-21)
+ }
+ }
+
+ return fputil::cast<float16>(result);
+}
+
+} // namespace math
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT16
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_RSQRTF16_H
+
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index e1aed6653dc81..17b0e154d1a99 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -988,16 +988,8 @@ add_entrypoint_object(
HDRS
../rsqrtf16.h
DEPENDS
- libc.hdr.errno_macros
- libc.hdr.fenv_macros
- libc.src.__support.FPUtil.cast
- libc.src.__support.FPUtil.fenv_impl
- libc.src.__support.FPUtil.fp_bits
- libc.src.__support.FPUtil.fma
- libc.src.__support.FPUtil.manipulation_functions
- libc.src.__support.FPUtil.polyeval
- libc.src.__support.macros.optimization
- libc.src.__support.macros.properties.types
+ libc.src.__support.math.rsqrtf16
+ libc.src.errno.errno
)
add_entrypoint_object(
diff --git a/libc/src/math/generic/rsqrtf16.cpp b/libc/src/math/generic/rsqrtf16.cpp
index 6ad9f5f968772..6979376632785 100644
--- a/libc/src/math/generic/rsqrtf16.cpp
+++ b/libc/src/math/generic/rsqrtf16.cpp
@@ -7,114 +7,11 @@
//===----------------------------------------------------------------------===//
#include "src/math/rsqrtf16.h"
-#include "hdr/errno_macros.h"
-#include "hdr/fenv_macros.h"
-#include "src/__support/FPUtil/FEnvImpl.h"
-#include "src/__support/FPUtil/FMA.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/ManipulationFunctions.h"
-#include "src/__support/FPUtil/PolyEval.h"
-#include "src/__support/FPUtil/cast.h"
-#include "src/__support/macros/optimization.h"
+#include "src/__support/math/rsqrtf16.h"
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(float16, rsqrtf16, (float16 x)) {
- using FPBits = fputil::FPBits<float16>;
- FPBits xbits(x);
-
- uint16_t x_u = xbits.uintval();
- uint16_t x_abs = x_u & 0x7fff;
- uint16_t x_sign = x_u >> 15;
-
- // x is NaN
- if (LIBC_UNLIKELY(xbits.is_nan())) {
- if (xbits.is_signaling_nan()) {
- fputil::raise_except_if_required(FE_INVALID);
- return FPBits::quiet_nan().get_val();
- }
- return x;
- }
-
- // |x| = 0
- if (LIBC_UNLIKELY(x_abs == 0x0)) {
- fputil::raise_except_if_required(FE_DIVBYZERO);
- fputil::set_errno_if_required(ERANGE);
- return FPBits::inf(Sign::POS).get_val();
- }
-
- // -inf <= x < 0
- if (LIBC_UNLIKELY(x_sign == 1)) {
- fputil::raise_except_if_required(FE_INVALID);
- fputil::set_errno_if_required(EDOM);
- return FPBits::quiet_nan().get_val();
- }
-
- // x = +inf => rsqrt(x) = 0
- if (LIBC_UNLIKELY(xbits.is_inf())) {
- return fputil::cast<float16>(0.0f);
- }
-
- // x is valid, estimate the result
- // Range reduction:
- // x can be expressed as m*2^e, where e - int exponent and m - mantissa
- // rsqrtf16(x) = rsqrtf16(m*2^e)
- // rsqrtf16(m*2^e) = 1/sqrt(m) * 1/sqrt(2^e) = 1/sqrt(m) * 1/2^(e/2)
- // 1/sqrt(m) * 1/2^(e/2) = 1/sqrt(m) * 2^(-e/2)
-
- float xf = x;
- int exponent;
- float mantissa = fputil::frexp(xf, exponent);
-
- float result;
- int exp_floored = -(exponent >> 1);
-
- if (mantissa == 0.5f) {
- // When mantissa is 0.5f, x was a power of 2 (or subnormal that normalizes this way).
- // 1/sqrt(0.5f) = sqrt(2.0f) = 0x1.6a09e6p0f
- // If exponent is odd (exponent = 2k + 1):
- // rsqrt(x) = (1/sqrt(0.5)) * 2^(-(2k+1)/2) = sqrt(2) * 2^(-k-0.5)
- // = sqrt(2) * 2^(-k) * (1/sqrt(2)) = 2^(-k)
- // exp_floored = -((2k+1)>>1) = -(k) = -k
- // So result = ldexp(1.0f, exp_floored)
- // If exponent is even (exponent = 2k):
- // rsqrt(x) = (1/sqrt(0.5)) * 2^(-2k/2) = sqrt(2) * 2^(-k)
- // exp_floored = -((2k)>>1) = -(k) = -k
- // So result = ldexp(sqrt(2.0f), exp_floored)
- if (exponent & 1) {
- result = fputil::ldexp(1.0f, exp_floored);
- } else {
- constexpr float SQRT_2_F = 0x1.6a09e6p0f; // sqrt(2.0f)
- result = fputil::ldexp(SQRT_2_F, exp_floored);
- }
- } else {
- // 6-degree polynomial generated using Sollya
- // P = fpminimax(1/sqrt(x), [|0,1,2,3,4,5|], [|SG...|], [0.5, 1]);
- float interm = fputil::polyeval(
- mantissa, 0x1.9c81c4p1f, -0x1.e2c57cp2f, 0x1.91e8bp3f,
- -0x1.899954p3f, 0x1.9edcp2f, -0x1.6bd93cp0f);
-
- // Apply one Newton-Raphson iteration to refine the approximation of
- // 1/sqrt(mantissa) y_new = y_old * (1.5 - 0.5 * mantissa * y_old^2) Using
- // fputil::fma for potential precision benefits in the factor calculation
- float interm_sq = interm * interm;
- float factor = fputil::fma<float>(-0.5f * mantissa, interm_sq, 1.5f);
- float interm_refined = interm * factor;
-
- // Apply a second Newton-Raphson iteration
- // y_new = y_old * (1.5 - 0.5 * mantissa * y_old^2)
- float interm_refined_sq = interm_refined * interm_refined;
- float factor2 =
- fputil::fma<float>(-0.5f * mantissa, interm_refined_sq, 1.5f);
- float interm_refined2 = interm_refined * factor2;
-
- result = fputil::ldexp(interm_refined2, exp_floored);
- if (exponent & 1) {
- const float ONE_OVER_SQRT2 = 0x1.6a09e6p-1f;
- result = fputil::fma<float>(result, ONE_OVER_SQRT2, 0.0f);
- }
- }
-
- return fputil::cast<float16>(result);
+ return math::rsqrtf16(x);
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/shared/CMakeLists.txt b/libc/test/shared/CMakeLists.txt
index f5ea510180366..10392ee2d2bb6 100644
--- a/libc/test/shared/CMakeLists.txt
+++ b/libc/test/shared/CMakeLists.txt
@@ -40,4 +40,6 @@ add_fp_unittest(
libc.src.__support.math.ldexpf
libc.src.__support.math.ldexpf128
libc.src.__support.math.ldexpf16
+ libc.src.__support.math.rsqrtf16
+
)
diff --git a/libc/test/shared/shared_math_test.cpp b/libc/test/shared/shared_math_test.cpp
index 3d64e5e68882b..52de3d4262653 100644
--- a/libc/test/shared/shared_math_test.cpp
+++ b/libc/test/shared/shared_math_test.cpp
@@ -17,6 +17,8 @@ TEST(LlvmLibcSharedMathTest, AllFloat16) {
EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::acoshf16(1.0f16));
EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::acospif16(1.0f16));
+ EXPECT_FP_EQ(0x1p+0f16, LIBC_NAMESPACE::shared::rsqrtf16(1.0f16));
+
EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::asinf16(0.0f16));
EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::asinhf16(0.0f16));
EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::atanf16(0.0f16));
diff --git a/libc/test/src/math/smoke/rsqrtf16_test.cpp b/libc/test/src/math/smoke/rsqrtf16_test.cpp
index 8e69027e67e13..a229ca6cdaaaf 100644
--- a/libc/test/src/math/smoke/rsqrtf16_test.cpp
+++ b/libc/test/src/math/smoke/rsqrtf16_test.cpp
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
-#include "src/errno/libc_errno.h"
+#include "src/__support/libc_errno.h"
#include "src/math/rsqrtf16.h"
#include "test/UnitTest/FPMatcher.h"
#include "test/UnitTest/Test.h"
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index b46f334512979..7f49308d05188 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -2187,6 +2187,22 @@ libc_support_library(
],
)
+libc_support_library(
+ name = "__support_math_rsqrtf16",
+ hdrs = ["src/__support/math/rsqrtf16.h"],
+ deps = [
+ ":__support_fputil_cast",
+ ":__support_fputil_fenv_impl",
+ ":__support_fputil_fp_bits",
+ ":__support_fputil_multiply_add",
+ ":__support_fputil_polyeval",
+ ":__support_fputil_manipulation_functions",
+ ":__support_macros_optimization",
+ ":__support_macros_properties_types",
+ ],
+)
+
+
libc_support_library(
name = "__support_math_asin_utils",
hdrs = ["src/__support/math/asin_utils.h"],
@@ -2941,6 +2957,15 @@ libc_math_function(
],
)
+libc_math_function(
+ name = "rsqrtf16",
+ additional_deps = [
+ ":__support_math_rsqrtf16",
+ ":errno",
+ ],
+)
+
+
libc_math_function(
name = "acoshf16",
additional_deps = [
More information about the libc-commits
mailing list