[libc-commits] [libc] [llvm] [libc][math][c23] Add rsqrtf16() function (PR #137545)

Fri Sep 12 18:18:39 PDT 2025

https://github.com/amemov updated https://github.com/llvm/llvm-project/pull/137545

>From d71d21dd2c22adffc20d52426df78fd00039257a Mon Sep 17 00:00:00 2001
From: amemov <shepelev777 at gmail.com>
Date: Sun, 27 Apr 2025 19:54:02 +0000
Subject: [PATCH 1/9] - rsqrtf16 refactored

---
 libc/config/linux/x86_64/entrypoints.txt   |  1 +
 libc/docs/headers/math/index.rst           |  4 +-
 libc/include/math.yaml                     |  7 +++
 libc/src/math/CMakeLists.txt               |  2 +
 libc/src/math/generic/CMakeLists.txt       | 13 ++++-
 libc/src/math/generic/rsqrtf16.cpp         | 67 ++++++++++++++++++++++
 libc/src/math/rsqrtf16.h                   | 21 +++++++
 libc/test/src/math/CMakeLists.txt          | 11 ++++
 libc/test/src/math/rsqrtf16_test.cpp       | 42 ++++++++++++++
 libc/test/src/math/smoke/CMakeLists.txt    | 11 ++++
 libc/test/src/math/smoke/rsqrtf16_test.cpp | 38 ++++++++++++
 libc/utils/MPFRWrapper/MPCommon.cpp        |  6 ++
 libc/utils/MPFRWrapper/MPCommon.h          |  1 +
 libc/utils/MPFRWrapper/MPFRUtils.cpp       |  2 +
 libc/utils/MPFRWrapper/MPFRUtils.h         |  1 +
 15 files changed, 225 insertions(+), 2 deletions(-)
 create mode 100644 libc/src/math/generic/rsqrtf16.cpp
 create mode 100644 libc/src/math/rsqrtf16.h
 create mode 100644 libc/test/src/math/rsqrtf16_test.cpp
 create mode 100644 libc/test/src/math/smoke/rsqrtf16_test.cpp

diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 1fef16f190af6..0bb8a683c5b01 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -784,6 +784,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
     libc.src.math.rintf16
     libc.src.math.roundevenf16
     libc.src.math.roundf16
+    libc.src.math.rsqrtf16
     libc.src.math.scalblnf16
     libc.src.math.scalbnf16
     libc.src.math.setpayloadf16
diff --git a/libc/docs/headers/math/index.rst b/libc/docs/headers/math/index.rst
index 6c0e2190808df..7d5b341ba674a 100644
--- a/libc/docs/headers/math/index.rst
+++ b/libc/docs/headers/math/index.rst
@@ -255,6 +255,7 @@ Basic Operations
 Higher Math Functions
 =====================
 
+
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+----------++------------+------------------------+----------------------------+
 | <Func>    | <Func_f> (float) | <Func> (double) | <Func_l> (long double) | <Func_f16> (float16) | <Func_f128> (float128) | <Func_bf16> (bfloat16) | C23 Definition Section | C23 Error Handling Section |
 +===========+==================+=================+========================+======================+========================+========================+========================+============================+
@@ -342,7 +343,7 @@ Higher Math Functions
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+------------------------+----------------------------+
 | rootn     |                  |                 |                        |                      |                        |                        | 7.12.7.8               | F.10.4.8                   |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+------------------------+----------------------------+
-| rsqrt     |                  |                 |                        |                      |                        |                        | 7.12.7.9               | F.10.4.9                   |
+| rsqrt     |                  |                 |                        | |check|              |                        |                        | 7.12.7.9               | F.10.4.9                   |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+------------------------+----------------------------+
 | sin       | |check|          | |check|         |                        | |check|              |                        |                        | 7.12.4.6               | F.10.1.6                   |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+------------------------+----------------------------+
@@ -363,6 +364,7 @@ Higher Math Functions
 | tgamma    |                  |                 |                        |                      |                        |                        | 7.12.8.4               | F.10.5.4                   |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+------------------------+----------------------------+
 
+
 Legends:
 
 * |check| : correctly rounded for all 4 rounding modes.
diff --git a/libc/include/math.yaml b/libc/include/math.yaml
index 17f26fcfcb308..6c800a0e2aa28 100644
--- a/libc/include/math.yaml
+++ b/libc/include/math.yaml
@@ -2349,6 +2349,13 @@ functions:
     return_type: long double
     arguments:
       - type: long double
+  - name: rsqrtf16
+    standards:
+      - stdc
+    return_type: _Float16
+    arguments:
+      - type: _Float16
+    guard: LIBC_TYPES_HAS_FLOAT16
   - name: scalbln
     standards:
       - stdc
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index e418a8b0e24b9..a6f400c873b7e 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -516,6 +516,8 @@ add_math_entrypoint_object(roundevenf16)
 add_math_entrypoint_object(roundevenf128)
 add_math_entrypoint_object(roundevenbf16)
 
+add_math_entrypoint_object(rsqrtf16)
+
 add_math_entrypoint_object(scalbln)
 add_math_entrypoint_object(scalblnf)
 add_math_entrypoint_object(scalblnl)
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 263c5dfd0832b..ca7baeccae01a 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -973,7 +973,7 @@ add_entrypoint_object(
 )
 
 add_entrypoint_object(
-    roundevenbf16
+  roundevenbf16
   SRCS
     roundevenbf16.cpp
   HDRS
@@ -988,6 +988,17 @@ add_entrypoint_object(
     ROUND_OPT
 )
 
+add_entrypoint_object(
+  rsqrtf16
+  SRCS
+    rsqrtf16.cpp
+  HDRS
+    ../rsqrtf16.h
+  DEPENDS
+    libc.src.__support.math.rsqrtf16
+    libc.src.errno.errno
+)
+
 add_entrypoint_object(
   lround
   SRCS
diff --git a/libc/src/math/generic/rsqrtf16.cpp b/libc/src/math/generic/rsqrtf16.cpp
new file mode 100644
index 0000000000000..930c8f6fc7b4e
--- /dev/null
+++ b/libc/src/math/generic/rsqrtf16.cpp
@@ -0,0 +1,67 @@
+//===-- Half-precision rsqrt function -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception.
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/rsqrtf16.h"
+#include "hdr/errno_macros.h"
+#include "hdr/fenv_macros.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/cast.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/sqrt.h"
+#include "src/__support/macros/optimization.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(float16, rsqrtf16, (float16 x)) {
+  using FPBits = fputil::FPBits<float16>;
+  FPBits xbits(x);
+
+  uint16_t x_u = xbits.uintval();
+  uint16_t x_abs = x_u & 0x7fff;
+  uint16_t x_sign = x_u >> 15;
+
+  // x is NaN
+  if (LIBC_UNLIKELY(xbits.is_nan())) {
+    if (xbits.is_signaling_nan()) {
+      fputil::raise_except_if_required(FE_INVALID);
+      return FPBits::quiet_nan().get_val();
+    }
+    return x;
+  }
+  
+  // |x| = 0
+  if (LIBC_UNLIKELY(x_abs == 0x0)) {
+    fputil::raise_except_if_required(FE_DIVBYZERO);
+    fputil::set_errno_if_required(ERANGE);
+    return FPBits::quiet_nan().get_val();
+  }
+
+  // -inf <= x < 0
+  if (LIBC_UNLIKELY(x_sign == 1)) {  
+    fputil::raise_except_if_required(FE_INVALID);
+    fputil::set_errno_if_required(EDOM);
+    return FPBits::quiet_nan().get_val();
+  }
+
+  // x = +inf => rsqrt(x) = 0
+  if (LIBC_UNLIKELY(xbits.is_inf())) {
+    return fputil::cast<float16>(0.0f);
+  }
+  
+  // x = 1 => rsqrt(x) = 1
+  if (LIBC_UNLIKELY(x_u == 0x1)) {
+    return fputil::cast<float16>(1.0f);
+  }
+
+  // x is valid, estimate the result - below is temporary solution for just testing
+  float xf = x;
+  return fputil::cast<float16>(1.0f / xf);
+}
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/rsqrtf16.h b/libc/src/math/rsqrtf16.h
new file mode 100644
index 0000000000000..c88ab5256ce88
--- /dev/null
+++ b/libc/src/math/rsqrtf16.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for rsqrtf16 ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_RSQRTF16_H
+#define LLVM_LIBC_SRC_MATH_RSQRTF16_H
+
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+float16 rsqrtf16(float16 x);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_MATH_RSQRTF16_H
diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
index 378eadcf9e70b..9d644703a61ae 100644
--- a/libc/test/src/math/CMakeLists.txt
+++ b/libc/test/src/math/CMakeLists.txt
@@ -1678,6 +1678,17 @@ add_fp_unittest(
     libc.src.math.sqrtl
 )
 
+add_fp_unittest(
+  rsqrtf16_test
+  NEED_MPFR
+  SUITE
+    libc-math-unittests
+  SRCS
+    rsqrtf16_test.cpp
+  DEPENDS
+    libc.src.math.rsqrtf16
+)
+
 add_fp_unittest(
   sqrtf16_test
   NEED_MPFR
diff --git a/libc/test/src/math/rsqrtf16_test.cpp b/libc/test/src/math/rsqrtf16_test.cpp
new file mode 100644
index 0000000000000..ec22df5bd31c7
--- /dev/null
+++ b/libc/test/src/math/rsqrtf16_test.cpp
@@ -0,0 +1,42 @@
+//===-- Exhaustive test for rsqrtf16 --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/rsqrtf16.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+
+using LlvmLibcRsqrtf16Test = LIBC_NAMESPACE::testing::FPTest<float16>;
+
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+
+// Range: [0, Inf]
+static constexpr uint16_t POS_START = 0x0000U;
+static constexpr uint16_t POS_STOP = 0x7c00U;
+
+// Range: [-Inf, 0]
+static constexpr uint16_t NEG_START = 0x8000U;
+static constexpr uint16_t NEG_STOP = 0xfc00U;
+
+TEST_F(LlvmLibcRsqrtf16Test, PositiveRange) {
+  for (uint16_t v = POS_START; v <= POS_STOP; ++v) {
+    float16 x = FPBits(v).get_val();
+
+    EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Rsqrt, x,
+                                   LIBC_NAMESPACE::rsqrtf16(x), 0.5);
+  }
+}
+
+TEST_F(LlvmLibcRsqrtf16Test, NegativeRange) {
+  for (uint16_t v = NEG_START; v <= NEG_STOP; ++v) {
+    float16 x = FPBits(v).get_val();
+
+    EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Rsqrt, x,
+                                   LIBC_NAMESPACE::rsqrtf16(x), 0.5);
+  }
+}
\ No newline at end of file
diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt
index b8d5ecf4d77e5..93243e0ca9e5a 100644
--- a/libc/test/src/math/smoke/CMakeLists.txt
+++ b/libc/test/src/math/smoke/CMakeLists.txt
@@ -3502,6 +3502,17 @@ add_fp_unittest(
     libc.src.math.sqrtl
 )
 
+add_fp_unittest(
+  rsqrtf16_test
+  SUITE
+    libc-math-smoke-tests
+  SRCS
+    rsqrtf16_test.cpp
+  DEPENDS
+    libc.src.errno.errno
+    libc.src.math.rsqrtf16
+)
+
 add_fp_unittest(
   sqrtf16_test
   SUITE
diff --git a/libc/test/src/math/smoke/rsqrtf16_test.cpp b/libc/test/src/math/smoke/rsqrtf16_test.cpp
new file mode 100644
index 0000000000000..eb429d3dbf6c8
--- /dev/null
+++ b/libc/test/src/math/smoke/rsqrtf16_test.cpp
@@ -0,0 +1,38 @@
+//===-- Unittests for rsqrtf16 --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception.
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/errno/libc_errno.h"
+#include "src/math/rsqrtf16.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
+
+using LlvmLibcRsqrtf16Test = LIBC_NAMESPACE::testing::FPTest<float16>;
+TEST_F(LlvmLibcRsqrtf16Test, SpecialNumbers) {
+  LIBC_NAMESPACE::libc_errno = 0;
+  EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::rsqrtf16(aNaN));
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::rsqrtf16(sNaN),
+                              FE_INVALID);
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::rsqrtf16(0.0f));
+  EXPECT_MATH_ERRNO(ERANGE);
+
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::rsqrtf16(1.0f));
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::rsqrtf16(inf));
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::rsqrtf16(neg_inf));
+  EXPECT_MATH_ERRNO(EDOM);
+
+  EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::rsqrtf16(-2.0f));
+  EXPECT_MATH_ERRNO(EDOM);
+}
\ No newline at end of file
diff --git a/libc/utils/MPFRWrapper/MPCommon.cpp b/libc/utils/MPFRWrapper/MPCommon.cpp
index c255220774110..6b78bee6e7cae 100644
--- a/libc/utils/MPFRWrapper/MPCommon.cpp
+++ b/libc/utils/MPFRWrapper/MPCommon.cpp
@@ -393,6 +393,12 @@ MPFRNumber MPFRNumber::rint(mpfr_rnd_t rnd) const {
   return result;
 }
 
+MPFRNumber MPFRNumber::rsqrt() const {
+  MPFRNumber result(*this);
+  mpfr_rec_sqrt(result.value, value, mpfr_rounding);
+  return result;
+}
+
 MPFRNumber MPFRNumber::mod_2pi() const {
   MPFRNumber result(0.0, 1280);
   MPFRNumber _2pi(0.0, 1280);
diff --git a/libc/utils/MPFRWrapper/MPCommon.h b/libc/utils/MPFRWrapper/MPCommon.h
index 25bdc9bc00250..9f4107a7961d2 100644
--- a/libc/utils/MPFRWrapper/MPCommon.h
+++ b/libc/utils/MPFRWrapper/MPCommon.h
@@ -222,6 +222,7 @@ class MPFRNumber {
   bool round_to_long(long &result) const;
   bool round_to_long(mpfr_rnd_t rnd, long &result) const;
   MPFRNumber rint(mpfr_rnd_t rnd) const;
+  MPFRNumber rsqrt() const;
   MPFRNumber mod_2pi() const;
   MPFRNumber mod_pi_over_2() const;
   MPFRNumber mod_pi_over_4() const;
diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp
index 144a4ec25d213..a7d307b47c3e8 100644
--- a/libc/utils/MPFRWrapper/MPFRUtils.cpp
+++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp
@@ -91,6 +91,8 @@ unary_operation(Operation op, InputType input, unsigned int precision,
     return mpfrInput.round();
   case Operation::RoundEven:
     return mpfrInput.roundeven();
+  case Operation::Rsqrt:
+    return mpfrInput.rsqrt();
   case Operation::Sin:
     return mpfrInput.sin();
   case Operation::Sinpi:
diff --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h
index 35d7942a2620e..a33fcd21789f9 100644
--- a/libc/utils/MPFRWrapper/MPFRUtils.h
+++ b/libc/utils/MPFRWrapper/MPFRUtils.h
@@ -56,6 +56,7 @@ enum class Operation : int {
   ModPIOver4,
   Round,
   RoundEven,
+  Rsqrt,
   Sin,
   Sinpi,
   Sinh,

>From ecea3c837d39bed6038a4c7d049cbad5cda2cdcf Mon Sep 17 00:00:00 2001
From: amemov <shepelev777 at gmail.com>
Date: Sun, 27 Apr 2025 20:07:27 +0000
Subject: [PATCH 2/9] Clang-formated the files

---
 libc/src/math/generic/rsqrtf16.cpp         | 9 +++++----
 libc/test/src/math/smoke/rsqrtf16_test.cpp | 3 +--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/libc/src/math/generic/rsqrtf16.cpp b/libc/src/math/generic/rsqrtf16.cpp
index 930c8f6fc7b4e..8e97ec896aa0c 100644
--- a/libc/src/math/generic/rsqrtf16.cpp
+++ b/libc/src/math/generic/rsqrtf16.cpp
@@ -35,7 +35,7 @@ LLVM_LIBC_FUNCTION(float16, rsqrtf16, (float16 x)) {
     }
     return x;
   }
-  
+
   // |x| = 0
   if (LIBC_UNLIKELY(x_abs == 0x0)) {
     fputil::raise_except_if_required(FE_DIVBYZERO);
@@ -44,7 +44,7 @@ LLVM_LIBC_FUNCTION(float16, rsqrtf16, (float16 x)) {
   }
 
   // -inf <= x < 0
-  if (LIBC_UNLIKELY(x_sign == 1)) {  
+  if (LIBC_UNLIKELY(x_sign == 1)) {
     fputil::raise_except_if_required(FE_INVALID);
     fputil::set_errno_if_required(EDOM);
     return FPBits::quiet_nan().get_val();
@@ -54,13 +54,14 @@ LLVM_LIBC_FUNCTION(float16, rsqrtf16, (float16 x)) {
   if (LIBC_UNLIKELY(xbits.is_inf())) {
     return fputil::cast<float16>(0.0f);
   }
-  
+
   // x = 1 => rsqrt(x) = 1
   if (LIBC_UNLIKELY(x_u == 0x1)) {
     return fputil::cast<float16>(1.0f);
   }
 
-  // x is valid, estimate the result - below is temporary solution for just testing
+  // x is valid, estimate the result - below is temporary solution for just
+  // testing
   float xf = x;
   return fputil::cast<float16>(1.0f / xf);
 }
diff --git a/libc/test/src/math/smoke/rsqrtf16_test.cpp b/libc/test/src/math/smoke/rsqrtf16_test.cpp
index eb429d3dbf6c8..d50d8ee0ad2ff 100644
--- a/libc/test/src/math/smoke/rsqrtf16_test.cpp
+++ b/libc/test/src/math/smoke/rsqrtf16_test.cpp
@@ -17,8 +17,7 @@ TEST_F(LlvmLibcRsqrtf16Test, SpecialNumbers) {
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::rsqrtf16(aNaN));
   EXPECT_MATH_ERRNO(0);
 
-  EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::rsqrtf16(sNaN),
-                              FE_INVALID);
+  EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::rsqrtf16(sNaN), FE_INVALID);
   EXPECT_MATH_ERRNO(0);
 
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::rsqrtf16(0.0f));

>From ca9f71c2d1507b3668216f3772473be56155b965 Mon Sep 17 00:00:00 2001
From: amemov <shepelev777 at gmail.com>
Date: Sun, 27 Apr 2025 22:09:15 +0000
Subject: [PATCH 3/9] Replaced the computation for valid X with polynomial
 approximation

---
 libc/src/math/generic/rsqrtf16.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/libc/src/math/generic/rsqrtf16.cpp b/libc/src/math/generic/rsqrtf16.cpp
index 8e97ec896aa0c..2ae026499b89c 100644
--- a/libc/src/math/generic/rsqrtf16.cpp
+++ b/libc/src/math/generic/rsqrtf16.cpp
@@ -60,9 +60,12 @@ LLVM_LIBC_FUNCTION(float16, rsqrtf16, (float16 x)) {
     return fputil::cast<float16>(1.0f);
   }
 
-  // x is valid, estimate the result - below is temporary solution for just
-  // testing
+  // x is valid, estimate the result
+  // 3-degree polynomial generated using Sollya
+  // P = fpminimax(1/sqrt(x), [|1, 2, 3|], [|SG...|], [0.5, 1]);
   float xf = x;
-  return fputil::cast<float16>(1.0f / xf);
+  float result =
+      fputil::polyeval(xf, 0x1.d42408p2f, -0x1.7cc4fep3f, 0x1.66cb6ap2f);
+  return fputil::cast<float16>(result);
 }
 } // namespace LIBC_NAMESPACE_DECL

>From c15e79f7a67c16a80f3357bec912077ded41afbd Mon Sep 17 00:00:00 2001
From: amemov <shepelev777 at gmail.com>
Date: Tue, 29 Apr 2025 21:37:45 +0000
Subject: [PATCH 4/9] Added range reduction to the approximation

---
 libc/src/math/generic/rsqrtf16.cpp         | 39 ++++++++++++++++++----
 libc/test/src/math/rsqrtf16_test.cpp       |  2 +-
 libc/test/src/math/smoke/rsqrtf16_test.cpp |  4 +--
 3 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/libc/src/math/generic/rsqrtf16.cpp b/libc/src/math/generic/rsqrtf16.cpp
index 2ae026499b89c..5bf4ab3e46fda 100644
--- a/libc/src/math/generic/rsqrtf16.cpp
+++ b/libc/src/math/generic/rsqrtf16.cpp
@@ -11,10 +11,10 @@
 #include "hdr/fenv_macros.h"
 #include "src/__support/FPUtil/FEnvImpl.h"
 #include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/ManipulationFunctions.h"
 #include "src/__support/FPUtil/PolyEval.h"
 #include "src/__support/FPUtil/cast.h"
-#include "src/__support/FPUtil/multiply_add.h"
-#include "src/__support/FPUtil/sqrt.h"
+#include "src/__support/FPUtil/multiply_add.h" // to remove
 #include "src/__support/macros/optimization.h"
 
 namespace LIBC_NAMESPACE_DECL {
@@ -40,7 +40,7 @@ LLVM_LIBC_FUNCTION(float16, rsqrtf16, (float16 x)) {
   if (LIBC_UNLIKELY(x_abs == 0x0)) {
     fputil::raise_except_if_required(FE_DIVBYZERO);
     fputil::set_errno_if_required(ERANGE);
-    return FPBits::quiet_nan().get_val();
+    return FPBits::inf(Sign::POS).get_val();
   }
 
   // -inf <= x < 0
@@ -61,11 +61,36 @@ LLVM_LIBC_FUNCTION(float16, rsqrtf16, (float16 x)) {
   }
 
   // x is valid, estimate the result
-  // 3-degree polynomial generated using Sollya
-  // P = fpminimax(1/sqrt(x), [|1, 2, 3|], [|SG...|], [0.5, 1]);
+  // Range reduction:
+  // x can be expressed as m*2^e, where e - int exponent and m - mantissa
+  // rsqrtf16(x) = rsqrtf16(m*2^e)
+  // rsqrtf16(m*2^e) = 1/sqrt(m) * 1/sqrt(2^e) = 1/sqrt(m) * 1/2^(e/2)
+  // 1/sqrt(m) * 1/2^(e/2) = 1/sqrt(m) * 2^(-e/2)
+
   float xf = x;
-  float result =
-      fputil::polyeval(xf, 0x1.d42408p2f, -0x1.7cc4fep3f, 0x1.66cb6ap2f);
+  int exponent;
+  float mantissa = fputil::frexp(xf, exponent);
+
+  // 6-degree polynomial generated using Sollya
+  // P = fpminimax(1/sqrt(x), [|0,1,2,3,4,5|], [|SG...|], [0.5, 1]);
+  float interm =
+      fputil::polyeval(mantissa, 0x1.9c81c4p1f, -0x1.e2c57cp2f, 0x1.91e8bp3f,
+                       -0x1.899954p3f, 0x1.9edcp2f, -0x1.6bd93cp0f);
+
+  // Round (-e/2)
+  int exp_floored = -(exponent >> 1);
+
+  // rsqrt(x) = 1/sqrt(mantissa) * 2^(-e/2)
+  // rsqrt(x) = P(mantissa) * 2*(exp_floored)
+  float result = fputil::ldexp(interm, exp_floored);
+
+  // Handle the case where exponent is odd
+  if (exponent & 1) {
+    const float ONE_OVER_SQRT2 =
+        0x1.6a09e667f3bcc908b2fb1366ea957d3e3adec1751p-1f;
+    result *= ONE_OVER_SQRT2;
+  }
+
   return fputil::cast<float16>(result);
 }
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/src/math/rsqrtf16_test.cpp b/libc/test/src/math/rsqrtf16_test.cpp
index ec22df5bd31c7..d2f3fe8f49b92 100644
--- a/libc/test/src/math/rsqrtf16_test.cpp
+++ b/libc/test/src/math/rsqrtf16_test.cpp
@@ -39,4 +39,4 @@ TEST_F(LlvmLibcRsqrtf16Test, NegativeRange) {
     EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Rsqrt, x,
                                    LIBC_NAMESPACE::rsqrtf16(x), 0.5);
   }
-}
\ No newline at end of file
+}
diff --git a/libc/test/src/math/smoke/rsqrtf16_test.cpp b/libc/test/src/math/smoke/rsqrtf16_test.cpp
index d50d8ee0ad2ff..8e69027e67e13 100644
--- a/libc/test/src/math/smoke/rsqrtf16_test.cpp
+++ b/libc/test/src/math/smoke/rsqrtf16_test.cpp
@@ -20,7 +20,7 @@ TEST_F(LlvmLibcRsqrtf16Test, SpecialNumbers) {
   EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::rsqrtf16(sNaN), FE_INVALID);
   EXPECT_MATH_ERRNO(0);
 
-  EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::rsqrtf16(0.0f));
+  EXPECT_FP_EQ(inf, LIBC_NAMESPACE::rsqrtf16(0.0f));
   EXPECT_MATH_ERRNO(ERANGE);
 
   EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::rsqrtf16(1.0f));
@@ -34,4 +34,4 @@ TEST_F(LlvmLibcRsqrtf16Test, SpecialNumbers) {
 
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::rsqrtf16(-2.0f));
   EXPECT_MATH_ERRNO(EDOM);
-}
\ No newline at end of file
+}

>From af7ca246894cb6b40ad10b7a3274bf1868533987 Mon Sep 17 00:00:00 2001
From: amemov <shepelev777 at gmail.com>
Date: Tue, 20 May 2025 19:56:14 +0000
Subject: [PATCH 5/9] Added Newton-Raphson iterations -The accuracy improved
 drastically, but it still fails

---
 libc/src/math/generic/rsqrtf16.cpp | 35 ++++++++++++++++++++----------
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/libc/src/math/generic/rsqrtf16.cpp b/libc/src/math/generic/rsqrtf16.cpp
index 5bf4ab3e46fda..806328b60aca2 100644
--- a/libc/src/math/generic/rsqrtf16.cpp
+++ b/libc/src/math/generic/rsqrtf16.cpp
@@ -10,11 +10,11 @@
 #include "hdr/errno_macros.h"
 #include "hdr/fenv_macros.h"
 #include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FMA.h"
 #include "src/__support/FPUtil/FPBits.h"
 #include "src/__support/FPUtil/ManipulationFunctions.h"
 #include "src/__support/FPUtil/PolyEval.h"
 #include "src/__support/FPUtil/cast.h"
-#include "src/__support/FPUtil/multiply_add.h" // to remove
 #include "src/__support/macros/optimization.h"
 
 namespace LIBC_NAMESPACE_DECL {
@@ -55,11 +55,6 @@ LLVM_LIBC_FUNCTION(float16, rsqrtf16, (float16 x)) {
     return fputil::cast<float16>(0.0f);
   }
 
-  // x = 1 => rsqrt(x) = 1
-  if (LIBC_UNLIKELY(x_u == 0x1)) {
-    return fputil::cast<float16>(1.0f);
-  }
-
   // x is valid, estimate the result
   // Range reduction:
   // x can be expressed as m*2^e, where e - int exponent and m - mantissa
@@ -72,23 +67,41 @@ LLVM_LIBC_FUNCTION(float16, rsqrtf16, (float16 x)) {
   float mantissa = fputil::frexp(xf, exponent);
 
   // 6-degree polynomial generated using Sollya
-  // P = fpminimax(1/sqrt(x), [|0,1,2,3,4,5|], [|SG...|], [0.5, 1]);
+  // bigger polynomial doesn't generate better results-> the current one
+  // produces the least number of errors but still errors are presents P =
+  // fpminimax(1/(sqrt(x)), [|0,1,2,3,4,5|], [|SG...|], [0.5, 1]);
   float interm =
       fputil::polyeval(mantissa, 0x1.9c81c4p1f, -0x1.e2c57cp2f, 0x1.91e8bp3f,
                        -0x1.899954p3f, 0x1.9edcp2f, -0x1.6bd93cp0f);
 
+  // Apply one Newton-Raphson iteration to refine the approximation of
+  // 1/sqrt(mantissa) y_new = y_old * (1.5 - 0.5 * mantissa * y_old^2) Using
+  // fputil::fma for potential precision benefits in the factor calculation
+  float interm_sq = interm * interm;
+  float factor = fputil::fma<float>(-0.5f * mantissa, interm_sq, 1.5f);
+  float interm_refined = interm * factor; // Final multiplication
+
+  // Apply a second Newton-Raphson iteration
+  // y_new = y_old * (1.5 - 0.5 * mantissa * y_old^2)
+  // y_old is now interm_refined
+  float interm_refined_sq = interm_refined * interm_refined;
+  float factor2 = fputil::fma<float>(-0.5f * mantissa, interm_refined_sq, 1.5f);
+  float interm_refined2 = interm_refined * factor2;
+
   // Round (-e/2)
   int exp_floored = -(exponent >> 1);
 
   // rsqrt(x) = 1/sqrt(mantissa) * 2^(-e/2)
   // rsqrt(x) = P(mantissa) * 2*(exp_floored)
-  float result = fputil::ldexp(interm, exp_floored);
+  // float result = fputil::ldexp(interm, exp_floored);
+  float result = fputil::ldexp(interm_refined2, exp_floored);
 
   // Handle the case where exponent is odd
   if (exponent & 1) {
-    const float ONE_OVER_SQRT2 =
-        0x1.6a09e667f3bcc908b2fb1366ea957d3e3adec1751p-1f;
-    result *= ONE_OVER_SQRT2;
+    const float ONE_OVER_SQRT2 = 0x1.6a09e6p-1f;
+    // result *= ONE_OVER_SQRT2;
+    result = fputil::fma<float>(result, ONE_OVER_SQRT2,
+                                0.0f); // Use FMA for multiplication
   }
 
   return fputil::cast<float16>(result);

>From 3cce788d461d70340ccee5094f99dab57ad2ce13 Mon Sep 17 00:00:00 2001
From: amemov <shepelev777 at gmail.com>
Date: Wed, 21 May 2025 20:28:22 +0000
Subject: [PATCH 6/9] Added separate handling for mantissa == 0.5f. Resulted in
 fewer errors

---
 libc/src/math/generic/rsqrtf16.cpp | 79 +++++++++++++++++-------------
 1 file changed, 45 insertions(+), 34 deletions(-)

diff --git a/libc/src/math/generic/rsqrtf16.cpp b/libc/src/math/generic/rsqrtf16.cpp
index 806328b60aca2..6ad9f5f968772 100644
--- a/libc/src/math/generic/rsqrtf16.cpp
+++ b/libc/src/math/generic/rsqrtf16.cpp
@@ -66,42 +66,53 @@ LLVM_LIBC_FUNCTION(float16, rsqrtf16, (float16 x)) {
   int exponent;
   float mantissa = fputil::frexp(xf, exponent);
 
-  // 6-degree polynomial generated using Sollya
-  // bigger polynomial doesn't generate better results-> the current one
-  // produces the least number of errors but still errors are presents P =
-  // fpminimax(1/(sqrt(x)), [|0,1,2,3,4,5|], [|SG...|], [0.5, 1]);
-  float interm =
-      fputil::polyeval(mantissa, 0x1.9c81c4p1f, -0x1.e2c57cp2f, 0x1.91e8bp3f,
-                       -0x1.899954p3f, 0x1.9edcp2f, -0x1.6bd93cp0f);
-
-  // Apply one Newton-Raphson iteration to refine the approximation of
-  // 1/sqrt(mantissa) y_new = y_old * (1.5 - 0.5 * mantissa * y_old^2) Using
-  // fputil::fma for potential precision benefits in the factor calculation
-  float interm_sq = interm * interm;
-  float factor = fputil::fma<float>(-0.5f * mantissa, interm_sq, 1.5f);
-  float interm_refined = interm * factor; // Final multiplication
-
-  // Apply a second Newton-Raphson iteration
-  // y_new = y_old * (1.5 - 0.5 * mantissa * y_old^2)
-  // y_old is now interm_refined
-  float interm_refined_sq = interm_refined * interm_refined;
-  float factor2 = fputil::fma<float>(-0.5f * mantissa, interm_refined_sq, 1.5f);
-  float interm_refined2 = interm_refined * factor2;
-
-  // Round (-e/2)
+  float result;
   int exp_floored = -(exponent >> 1);
 
-  // rsqrt(x) = 1/sqrt(mantissa) * 2^(-e/2)
-  // rsqrt(x) = P(mantissa) * 2*(exp_floored)
-  // float result = fputil::ldexp(interm, exp_floored);
-  float result = fputil::ldexp(interm_refined2, exp_floored);
-
-  // Handle the case where exponent is odd
-  if (exponent & 1) {
-    const float ONE_OVER_SQRT2 = 0x1.6a09e6p-1f;
-    // result *= ONE_OVER_SQRT2;
-    result = fputil::fma<float>(result, ONE_OVER_SQRT2,
-                                0.0f); // Use FMA for multiplication
+  if (mantissa == 0.5f) {
+    // When mantissa is 0.5f, x was a power of 2 (or subnormal that normalizes this way).
+    // 1/sqrt(0.5f) = sqrt(2.0f) = 0x1.6a09e6p0f
+    // If exponent is odd (exponent = 2k + 1):
+    //   rsqrt(x) = (1/sqrt(0.5)) * 2^(-(2k+1)/2) = sqrt(2) * 2^(-k-0.5)
+    //            = sqrt(2) * 2^(-k) * (1/sqrt(2)) = 2^(-k)
+    //   exp_floored = -((2k+1)>>1) = -(k) = -k
+    //   So result = ldexp(1.0f, exp_floored)
+    // If exponent is even (exponent = 2k):
+    //   rsqrt(x) = (1/sqrt(0.5)) * 2^(-2k/2) = sqrt(2) * 2^(-k)
+    //   exp_floored = -((2k)>>1) = -(k) = -k
+    //   So result = ldexp(sqrt(2.0f), exp_floored)
+    if (exponent & 1) {
+      result = fputil::ldexp(1.0f, exp_floored);
+    } else {
+      constexpr float SQRT_2_F = 0x1.6a09e6p0f; // sqrt(2.0f)
+      result = fputil::ldexp(SQRT_2_F, exp_floored);
+    }
+  } else {
+    // 6-degree polynomial generated using Sollya
+    // P = fpminimax(1/sqrt(x), [|0,1,2,3,4,5|], [|SG...|], [0.5, 1]);
+    float interm = fputil::polyeval(
+        mantissa, 0x1.9c81c4p1f, -0x1.e2c57cp2f, 0x1.91e8bp3f,
+        -0x1.899954p3f, 0x1.9edcp2f, -0x1.6bd93cp0f);
+    
+    // Apply one Newton-Raphson iteration to refine the approximation of
+    // 1/sqrt(mantissa) y_new = y_old * (1.5 - 0.5 * mantissa * y_old^2) Using
+    // fputil::fma for potential precision benefits in the factor calculation  
+    float interm_sq = interm * interm;
+    float factor = fputil::fma<float>(-0.5f * mantissa, interm_sq, 1.5f);
+    float interm_refined = interm * factor;
+
+    // Apply a second Newton-Raphson iteration
+    // y_new = y_old * (1.5 - 0.5 * mantissa * y_old^2)
+    float interm_refined_sq = interm_refined * interm_refined;
+    float factor2 =
+        fputil::fma<float>(-0.5f * mantissa, interm_refined_sq, 1.5f);
+    float interm_refined2 = interm_refined * factor2;
+
+    result = fputil::ldexp(interm_refined2, exp_floored);
+    if (exponent & 1) {
+      const float ONE_OVER_SQRT2 = 0x1.6a09e6p-1f;
+      result = fputil::fma<float>(result, ONE_OVER_SQRT2, 0.0f);
+    }
   }
 
   return fputil::cast<float16>(result);

>From 43847734b5b3cf023bc5316a1420609e4e65c2c3 Mon Sep 17 00:00:00 2001
From: Anton Shepelev <shepelev777 at gmail.com>
Date: Fri, 12 Sep 2025 16:50:15 -0700
Subject: [PATCH 7/9] - Fixed ULP errors - Refactored the implementation to
 match the proposal for constexpr - Added rsqrtf16 in Bazel build

---
 libc/shared/math.h                            |   2 +
 libc/shared/math/rsqrtf16.h                   |  30 ++++
 libc/src/__support/math/CMakeLists.txt        |  16 ++
 libc/src/__support/math/rsqrtf16.h            | 137 ++++++++++++++++++
 libc/src/math/generic/rsqrtf16.cpp            | 107 +-------------
 libc/test/shared/CMakeLists.txt               |   2 +
 libc/test/shared/shared_math_test.cpp         |   2 +
 libc/test/src/math/smoke/rsqrtf16_test.cpp    |   2 +-
 .../llvm-project-overlay/libc/BUILD.bazel     |  25 ++++
 9 files changed, 217 insertions(+), 106 deletions(-)
 create mode 100644 libc/shared/math/rsqrtf16.h
 create mode 100644 libc/src/__support/math/rsqrtf16.h

diff --git a/libc/shared/math.h b/libc/shared/math.h
index 69d785b3e0291..4f20095912bf1 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -53,4 +53,6 @@
 #include "math/ldexpf128.h"
 #include "math/ldexpf16.h"
 
+#include "math/rsqrtf16.h"
+
 #endif // LLVM_LIBC_SHARED_MATH_H
diff --git a/libc/shared/math/rsqrtf16.h b/libc/shared/math/rsqrtf16.h
new file mode 100644
index 0000000000000..0d1ace3318a50
--- /dev/null
+++ b/libc/shared/math/rsqrtf16.h
@@ -0,0 +1,30 @@
+//===-- Shared rsqrtf16 function -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_RSQRTF16_H
+#define LLVM_LIBC_SHARED_MATH_RSQRTF16_H
+
+#include "include/llvm-libc-macros/float16-macros.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT16
+
+#include "shared/libc_common.h"
+#include "src/__support/math/rsqrtf16.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace shared {
+
+using math::rsqrtf16;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT16
+
+#endif // LLVM_LIBC_SHARED_MATH_RSQRTF16_H
+
diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt
index 39dc0e57f4472..ed5f314b0a9b5 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -109,6 +109,22 @@ add_header_library(
     libc.src.__support.macros.properties.types
 )
 
+
+add_header_library(
+  rsqrtf16
+  HDRS
+    rsqrtf16.h
+  DEPENDS
+    libc.src.__support.FPUtil.cast
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.polyeval
+    libc.src.__support.FPUtil.manipulation_functions
+    libc.src.__support.macros.optimization
+    libc.src.__support.macros.properties.types
+)
+
 add_header_library(
   asin_utils
   HDRS
diff --git a/libc/src/__support/math/rsqrtf16.h b/libc/src/__support/math/rsqrtf16.h
new file mode 100644
index 0000000000000..dfbcf41ba2b07
--- /dev/null
+++ b/libc/src/__support/math/rsqrtf16.h
@@ -0,0 +1,137 @@
+//===-- Implementation header for rsqrtf16 ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_RSQRTF16_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_RSQRTF16_H
+
+#include "include/llvm-libc-macros/float16-macros.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT16
+
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/ManipulationFunctions.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/cast.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/macros/optimization.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace math {
+
+static constexpr float16 rsqrtf16(float16 x) {
+using FPBits = fputil::FPBits<float16>;
+  FPBits xbits(x);
+
+  uint16_t x_u = xbits.uintval();
+  uint16_t x_abs = x_u & 0x7fff;
+  uint16_t x_sign = x_u >> 15;
+
+  // x is NaN
+  if (LIBC_UNLIKELY(xbits.is_nan())) {
+    if (xbits.is_signaling_nan()) {
+      fputil::raise_except_if_required(FE_INVALID);
+      return FPBits::quiet_nan().get_val();
+    }
+    return x;
+  }
+
+  // |x| = 0
+  if (LIBC_UNLIKELY(x_abs == 0x0)) {
+    fputil::raise_except_if_required(FE_DIVBYZERO);
+    fputil::set_errno_if_required(ERANGE);
+    return FPBits::inf(Sign::POS).get_val();
+  }
+
+  // -inf <= x < 0
+  if (LIBC_UNLIKELY(x_sign == 1)) {
+    fputil::raise_except_if_required(FE_INVALID);
+    fputil::set_errno_if_required(EDOM);
+    return FPBits::quiet_nan().get_val();
+  }
+
+  // x = +inf => rsqrt(x) = 0
+  if (LIBC_UNLIKELY(xbits.is_inf())) {
+    return fputil::cast<float16>(0.0f);
+  }
+
+  // x is valid, estimate the result
+  // Range reduction:
+  // x can be expressed as m*2^e, where e - int exponent and m - mantissa
+  // rsqrtf16(x) = rsqrtf16(m*2^e)
+  // rsqrtf16(m*2^e) = 1/sqrt(m) * 1/sqrt(2^e) = 1/sqrt(m) * 1/2^(e/2)
+  // 1/sqrt(m) * 1/2^(e/2) = 1/sqrt(m) * 2^(-e/2)
+
+  // Compute in float throughout to minimize cost while preserving accuracy.
+  float xf = x;
+  int exponent = 0;
+  float mantissa = fputil::frexp(xf, exponent);
+
+  float result = 0.0f;
+  int exp_floored = -(exponent >> 1);
+
+  if (mantissa == 0.5f) {
+    // When mantissa is 0.5f, x was a power of 2 (or subnormal that normalizes this way).
+    // 1/sqrt(0.5f) = sqrt(2.0f).
+    // If exponent is odd (exponent = 2k + 1):
+    //   rsqrt(x) = (1/sqrt(0.5)) * 2^(-(2k+1)/2) = sqrt(2) * 2^(-k-0.5)
+    //            = sqrt(2) * 2^(-k) * (1/sqrt(2)) = 2^(-k)
+    //   exp_floored = -((2k+1)>>1) = -(k) = -k
+    //   So result = ldexp(1.0f, exp_floored)
+    // If exponent is even (exponent = 2k):
+    //   rsqrt(x) = (1/sqrt(0.5)) * 2^(-2k/2) = sqrt(2) * 2^(-k)
+    //   exp_floored = -((2k)>>1) = -(k) = -k
+    //   So result = ldexp(sqrt(2.0f), exp_floored)
+    if (exponent & 1) {
+      result = fputil::ldexp(1.0f, exp_floored);
+    } else {
+      constexpr float SQRT_2_F = 0x1.6a09e6p0f; // sqrt(2.0f)
+      result = fputil::ldexp(SQRT_2_F, exp_floored);
+    }
+  } else {
+    // Degree-5 polynomial (float coefficients) generated with Sollya:
+    // P = fpminimax(1/sqrt(x) + 2^-28, 5, [|single...|], [0.5,1])
+    float y = fputil::polyeval(
+        mantissa, 0x1.9c81fap1f, -0x1.e2c63ap2f, 0x1.91e9b8p3f,
+        -0x1.899abep3f, 0x1.9eddeap2f, -0x1.6bdb48p0f);
+
+    // Newton-Raphson iteration in float (use multiply_add to leverage FMA when available):
+    float y2 = y * y;
+    float factor = fputil::multiply_add(-0.5f * mantissa, y2, 1.5f);
+    y = y * factor;
+
+
+    result = fputil::ldexp(y, exp_floored);
+    if (exponent & 1) {
+      constexpr float ONE_OVER_SQRT2 = 0x1.6a09e6p-1f; // 1/sqrt(2)
+      result *= ONE_OVER_SQRT2;
+    }
+
+    // Targeted post-correction: for the specific half-precision mantissa pattern
+    // M == 0x011F we observe a consistent -1 ULP bias across exponents.
+    // Apply a tiny upward nudge to cross the rounding boundary in all modes.
+    const uint16_t half_mantissa = static_cast<uint16_t>(x_abs & 0x3ff);
+    if (half_mantissa == 0x011F) {
+      // Nudge up to fix consistent -1 ULP at that mantissa boundary
+      result = fputil::multiply_add(result, 0x1.0p-21f, result); // result *= (1 + 2^-21)
+    } else if (half_mantissa == 0x0313) {
+      // Nudge down to fix +1 ULP under upward rounding at this mantissa boundary
+      result = fputil::multiply_add(result, -0x1.0p-21f, result); // result *= (1 - 2^-21)
+    }
+  }
+
+  return fputil::cast<float16>(result);
+}
+
+} // namespace math
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT16
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_RSQRTF16_H
+
diff --git a/libc/src/math/generic/rsqrtf16.cpp b/libc/src/math/generic/rsqrtf16.cpp
index 6ad9f5f968772..6979376632785 100644
--- a/libc/src/math/generic/rsqrtf16.cpp
+++ b/libc/src/math/generic/rsqrtf16.cpp
@@ -7,114 +7,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/math/rsqrtf16.h"
-#include "hdr/errno_macros.h"
-#include "hdr/fenv_macros.h"
-#include "src/__support/FPUtil/FEnvImpl.h"
-#include "src/__support/FPUtil/FMA.h"
-#include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/ManipulationFunctions.h"
-#include "src/__support/FPUtil/PolyEval.h"
-#include "src/__support/FPUtil/cast.h"
-#include "src/__support/macros/optimization.h"
+#include "src/__support/math/rsqrtf16.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(float16, rsqrtf16, (float16 x)) {
-  using FPBits = fputil::FPBits<float16>;
-  FPBits xbits(x);
-
-  uint16_t x_u = xbits.uintval();
-  uint16_t x_abs = x_u & 0x7fff;
-  uint16_t x_sign = x_u >> 15;
-
-  // x is NaN
-  if (LIBC_UNLIKELY(xbits.is_nan())) {
-    if (xbits.is_signaling_nan()) {
-      fputil::raise_except_if_required(FE_INVALID);
-      return FPBits::quiet_nan().get_val();
-    }
-    return x;
-  }
-
-  // |x| = 0
-  if (LIBC_UNLIKELY(x_abs == 0x0)) {
-    fputil::raise_except_if_required(FE_DIVBYZERO);
-    fputil::set_errno_if_required(ERANGE);
-    return FPBits::inf(Sign::POS).get_val();
-  }
-
-  // -inf <= x < 0
-  if (LIBC_UNLIKELY(x_sign == 1)) {
-    fputil::raise_except_if_required(FE_INVALID);
-    fputil::set_errno_if_required(EDOM);
-    return FPBits::quiet_nan().get_val();
-  }
-
-  // x = +inf => rsqrt(x) = 0
-  if (LIBC_UNLIKELY(xbits.is_inf())) {
-    return fputil::cast<float16>(0.0f);
-  }
-
-  // x is valid, estimate the result
-  // Range reduction:
-  // x can be expressed as m*2^e, where e - int exponent and m - mantissa
-  // rsqrtf16(x) = rsqrtf16(m*2^e)
-  // rsqrtf16(m*2^e) = 1/sqrt(m) * 1/sqrt(2^e) = 1/sqrt(m) * 1/2^(e/2)
-  // 1/sqrt(m) * 1/2^(e/2) = 1/sqrt(m) * 2^(-e/2)
-
-  float xf = x;
-  int exponent;
-  float mantissa = fputil::frexp(xf, exponent);
-
-  float result;
-  int exp_floored = -(exponent >> 1);
-
-  if (mantissa == 0.5f) {
-    // When mantissa is 0.5f, x was a power of 2 (or subnormal that normalizes this way).
-    // 1/sqrt(0.5f) = sqrt(2.0f) = 0x1.6a09e6p0f
-    // If exponent is odd (exponent = 2k + 1):
-    //   rsqrt(x) = (1/sqrt(0.5)) * 2^(-(2k+1)/2) = sqrt(2) * 2^(-k-0.5)
-    //            = sqrt(2) * 2^(-k) * (1/sqrt(2)) = 2^(-k)
-    //   exp_floored = -((2k+1)>>1) = -(k) = -k
-    //   So result = ldexp(1.0f, exp_floored)
-    // If exponent is even (exponent = 2k):
-    //   rsqrt(x) = (1/sqrt(0.5)) * 2^(-2k/2) = sqrt(2) * 2^(-k)
-    //   exp_floored = -((2k)>>1) = -(k) = -k
-    //   So result = ldexp(sqrt(2.0f), exp_floored)
-    if (exponent & 1) {
-      result = fputil::ldexp(1.0f, exp_floored);
-    } else {
-      constexpr float SQRT_2_F = 0x1.6a09e6p0f; // sqrt(2.0f)
-      result = fputil::ldexp(SQRT_2_F, exp_floored);
-    }
-  } else {
-    // 6-degree polynomial generated using Sollya
-    // P = fpminimax(1/sqrt(x), [|0,1,2,3,4,5|], [|SG...|], [0.5, 1]);
-    float interm = fputil::polyeval(
-        mantissa, 0x1.9c81c4p1f, -0x1.e2c57cp2f, 0x1.91e8bp3f,
-        -0x1.899954p3f, 0x1.9edcp2f, -0x1.6bd93cp0f);
-    
-    // Apply one Newton-Raphson iteration to refine the approximation of
-    // 1/sqrt(mantissa) y_new = y_old * (1.5 - 0.5 * mantissa * y_old^2) Using
-    // fputil::fma for potential precision benefits in the factor calculation  
-    float interm_sq = interm * interm;
-    float factor = fputil::fma<float>(-0.5f * mantissa, interm_sq, 1.5f);
-    float interm_refined = interm * factor;
-
-    // Apply a second Newton-Raphson iteration
-    // y_new = y_old * (1.5 - 0.5 * mantissa * y_old^2)
-    float interm_refined_sq = interm_refined * interm_refined;
-    float factor2 =
-        fputil::fma<float>(-0.5f * mantissa, interm_refined_sq, 1.5f);
-    float interm_refined2 = interm_refined * factor2;
-
-    result = fputil::ldexp(interm_refined2, exp_floored);
-    if (exponent & 1) {
-      const float ONE_OVER_SQRT2 = 0x1.6a09e6p-1f;
-      result = fputil::fma<float>(result, ONE_OVER_SQRT2, 0.0f);
-    }
-  }
-
-  return fputil::cast<float16>(result);
+  return math::rsqrtf16(x);
 }
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/shared/CMakeLists.txt b/libc/test/shared/CMakeLists.txt
index 48241d3f55287..495d6f0a81a4c 100644
--- a/libc/test/shared/CMakeLists.txt
+++ b/libc/test/shared/CMakeLists.txt
@@ -48,4 +48,6 @@ add_fp_unittest(
     libc.src.__support.math.ldexpf
     libc.src.__support.math.ldexpf128
     libc.src.__support.math.ldexpf16
+    libc.src.__support.math.rsqrtf16
+
 )
diff --git a/libc/test/shared/shared_math_test.cpp b/libc/test/shared/shared_math_test.cpp
index 2e5a2d51146d4..aa459f88c29f5 100644
--- a/libc/test/shared/shared_math_test.cpp
+++ b/libc/test/shared/shared_math_test.cpp
@@ -17,6 +17,8 @@ TEST(LlvmLibcSharedMathTest, AllFloat16) {
 
   EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::acoshf16(1.0f16));
   EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::acospif16(1.0f16));
+  EXPECT_FP_EQ(0x1p+0f16, LIBC_NAMESPACE::shared::rsqrtf16(1.0f16));
+
   EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::asinf16(0.0f16));
   EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::asinhf16(0.0f16));
   EXPECT_FP_EQ(0x0p+0f16, LIBC_NAMESPACE::shared::atanf16(0.0f16));
diff --git a/libc/test/src/math/smoke/rsqrtf16_test.cpp b/libc/test/src/math/smoke/rsqrtf16_test.cpp
index 8e69027e67e13..a229ca6cdaaaf 100644
--- a/libc/test/src/math/smoke/rsqrtf16_test.cpp
+++ b/libc/test/src/math/smoke/rsqrtf16_test.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "src/errno/libc_errno.h"
+#include "src/__support/libc_errno.h"
 #include "src/math/rsqrtf16.h"
 #include "test/UnitTest/FPMatcher.h"
 #include "test/UnitTest/Test.h"
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index a955f7f4916ac..be161612dbd48 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -2296,6 +2296,22 @@ libc_support_library(
     ],
 )
 
+libc_support_library(
+    name = "__support_math_rsqrtf16",
+    hdrs = ["src/__support/math/rsqrtf16.h"],
+    deps = [
+        ":__support_fputil_cast",
+        ":__support_fputil_fenv_impl",
+        ":__support_fputil_fp_bits",
+        ":__support_fputil_multiply_add",
+        ":__support_fputil_polyeval",
+        ":__support_fputil_manipulation_functions",
+        ":__support_macros_optimization",
+        ":__support_macros_properties_types",
+    ],
+)
+
+
 libc_support_library(
     name = "__support_math_asin_utils",
     hdrs = ["src/__support/math/asin_utils.h"],
@@ -3244,6 +3260,15 @@ libc_math_function(
     ],
 )
 
+libc_math_function(
+    name = "rsqrtf16",
+    additional_deps = [
+        ":__support_math_rsqrtf16",
+        ":errno",
+    ],
+)
+
+
 libc_math_function(
     name = "acoshf16",
     additional_deps = [

>From 1fdc3198b51a42f283d2408c3e649c35cc3519c1 Mon Sep 17 00:00:00 2001
From: Anton Shepelev <shepelev777 at gmail.com>
Date: Fri, 12 Sep 2025 17:14:31 -0700
Subject: [PATCH 8/9] clang-formatted the files

---
 libc/shared/math/rsqrtf16.h        |  1 -
 libc/src/__support/math/rsqrtf16.h | 30 ++++++++++++++++--------------
 libc/src/math/generic/rsqrtf16.cpp |  4 +---
 3 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/libc/shared/math/rsqrtf16.h b/libc/shared/math/rsqrtf16.h
index 0d1ace3318a50..54c7499214636 100644
--- a/libc/shared/math/rsqrtf16.h
+++ b/libc/shared/math/rsqrtf16.h
@@ -27,4 +27,3 @@ using math::rsqrtf16;
 #endif // LIBC_TYPES_HAS_FLOAT16
 
 #endif // LLVM_LIBC_SHARED_MATH_RSQRTF16_H
-
diff --git a/libc/src/__support/math/rsqrtf16.h b/libc/src/__support/math/rsqrtf16.h
index dfbcf41ba2b07..b410f258450d8 100644
--- a/libc/src/__support/math/rsqrtf16.h
+++ b/libc/src/__support/math/rsqrtf16.h
@@ -25,7 +25,7 @@ namespace LIBC_NAMESPACE_DECL {
 namespace math {
 
 static constexpr float16 rsqrtf16(float16 x) {
-using FPBits = fputil::FPBits<float16>;
+  using FPBits = fputil::FPBits<float16>;
   FPBits xbits(x);
 
   uint16_t x_u = xbits.uintval();
@@ -76,8 +76,8 @@ using FPBits = fputil::FPBits<float16>;
   int exp_floored = -(exponent >> 1);
 
   if (mantissa == 0.5f) {
-    // When mantissa is 0.5f, x was a power of 2 (or subnormal that normalizes this way).
-    // 1/sqrt(0.5f) = sqrt(2.0f).
+    // When mantissa is 0.5f, x was a power of 2 (or subnormal that normalizes
+    // this way). 1/sqrt(0.5f) = sqrt(2.0f).
     // If exponent is odd (exponent = 2k + 1):
     //   rsqrt(x) = (1/sqrt(0.5)) * 2^(-(2k+1)/2) = sqrt(2) * 2^(-k-0.5)
     //            = sqrt(2) * 2^(-k) * (1/sqrt(2)) = 2^(-k)
@@ -96,32 +96,35 @@ using FPBits = fputil::FPBits<float16>;
   } else {
     // Degree-5 polynomial (float coefficients) generated with Sollya:
     // P = fpminimax(1/sqrt(x) + 2^-28, 5, [|single...|], [0.5,1])
-    float y = fputil::polyeval(
-        mantissa, 0x1.9c81fap1f, -0x1.e2c63ap2f, 0x1.91e9b8p3f,
-        -0x1.899abep3f, 0x1.9eddeap2f, -0x1.6bdb48p0f);
+    float y =
+        fputil::polyeval(mantissa, 0x1.9c81fap1f, -0x1.e2c63ap2f, 0x1.91e9b8p3f,
+                         -0x1.899abep3f, 0x1.9eddeap2f, -0x1.6bdb48p0f);
 
-    // Newton-Raphson iteration in float (use multiply_add to leverage FMA when available):
+    // Newton-Raphson iteration in float (use multiply_add to leverage FMA when
+    // available):
     float y2 = y * y;
     float factor = fputil::multiply_add(-0.5f * mantissa, y2, 1.5f);
     y = y * factor;
 
-
     result = fputil::ldexp(y, exp_floored);
     if (exponent & 1) {
       constexpr float ONE_OVER_SQRT2 = 0x1.6a09e6p-1f; // 1/sqrt(2)
       result *= ONE_OVER_SQRT2;
     }
 
-    // Targeted post-correction: for the specific half-precision mantissa pattern
-    // M == 0x011F we observe a consistent -1 ULP bias across exponents.
+    // Targeted post-correction: for the specific half-precision mantissa
+    // pattern M == 0x011F we observe a consistent -1 ULP bias across exponents.
     // Apply a tiny upward nudge to cross the rounding boundary in all modes.
     const uint16_t half_mantissa = static_cast<uint16_t>(x_abs & 0x3ff);
     if (half_mantissa == 0x011F) {
       // Nudge up to fix consistent -1 ULP at that mantissa boundary
-      result = fputil::multiply_add(result, 0x1.0p-21f, result); // result *= (1 + 2^-21)
+      result = fputil::multiply_add(result, 0x1.0p-21f,
+                                    result); // result *= (1 + 2^-21)
     } else if (half_mantissa == 0x0313) {
-      // Nudge down to fix +1 ULP under upward rounding at this mantissa boundary
-      result = fputil::multiply_add(result, -0x1.0p-21f, result); // result *= (1 - 2^-21)
+      // Nudge down to fix +1 ULP under upward rounding at this mantissa
+      // boundary
+      result = fputil::multiply_add(result, -0x1.0p-21f,
+                                    result); // result *= (1 - 2^-21)
     }
   }
 
@@ -134,4 +137,3 @@ using FPBits = fputil::FPBits<float16>;
 #endif // LIBC_TYPES_HAS_FLOAT16
 
 #endif // LLVM_LIBC_SRC___SUPPORT_MATH_RSQRTF16_H
-
diff --git a/libc/src/math/generic/rsqrtf16.cpp b/libc/src/math/generic/rsqrtf16.cpp
index 6979376632785..fb166b131d673 100644
--- a/libc/src/math/generic/rsqrtf16.cpp
+++ b/libc/src/math/generic/rsqrtf16.cpp
@@ -11,7 +11,5 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
-LLVM_LIBC_FUNCTION(float16, rsqrtf16, (float16 x)) {
-  return math::rsqrtf16(x);
-}
+LLVM_LIBC_FUNCTION(float16, rsqrtf16, (float16 x)) { return math::rsqrtf16(x); }
 } // namespace LIBC_NAMESPACE_DECL

>From cd0b0d4c9164151b05f0493d9ba8665b0d2aecfc Mon Sep 17 00:00:00 2001
From: Anton Shepelev <shepelev777 at gmail.com>
Date: Fri, 12 Sep 2025 18:18:23 -0700
Subject: [PATCH 9/9] Formatted BUILD.Bazel w/ buildifier

---
 utils/bazel/llvm-project-overlay/libc/BUILD.bazel | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index be161612dbd48..cc1a919ae020f 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -2303,15 +2303,14 @@ libc_support_library(
         ":__support_fputil_cast",
         ":__support_fputil_fenv_impl",
         ":__support_fputil_fp_bits",
+        ":__support_fputil_manipulation_functions",
         ":__support_fputil_multiply_add",
         ":__support_fputil_polyeval",
-        ":__support_fputil_manipulation_functions",
         ":__support_macros_optimization",
         ":__support_macros_properties_types",
     ],
 )
 
-
 libc_support_library(
     name = "__support_math_asin_utils",
     hdrs = ["src/__support/math/asin_utils.h"],
@@ -3268,7 +3267,6 @@ libc_math_function(
     ],
 )
 
-
 libc_math_function(
     name = "acoshf16",
     additional_deps = [