[libc-commits] [libc] [llvm] [libc][math][c23] Improve rsqrtf16() function (PR #160639)

Sat Jun 6 08:31:51 PDT 2026

================
@@ -24,6 +24,193 @@
 namespace LIBC_NAMESPACE_DECL {
 namespace math {
 
+namespace rsqrtf16_internal {
+
+// Fixed-point computations below use Q29: the integer N represents
+// N * 2^-29.  Multiplying two Q29 values produces a Q58 value, so products are
+// shifted right by RSQRT_FRACTION_BITS to return to Q29.
+LIBC_INLINE_VAR constexpr int RSQRT_FRACTION_BITS = 29;
+LIBC_INLINE_VAR constexpr int64_t ONE = int64_t(1) << RSQRT_FRACTION_BITS;
+LIBC_INLINE_VAR constexpr int64_t THREE_HALVES = 3 * (ONE >> 1);
+
+// Midpoint lookup table for 1/sqrt(x) on 16 sub-intervals of [0.5;1).
+// Values are stored in Q29 fixed-point format.  The Newton step and exact
+// rounding below correct the seed before producing the final half result.
+LIBC_INLINE_VAR constexpr uint32_t RSQRT_APPROX[16] = {
+    747'657'839, 725'981'977, 706'088'274, 687'745'184,
+    670'761'200, 654'976'372, 640'255'922, 626'485'368,
+    613'566'757, 601'415'717, 589'959'130, 579'133'272,
+    568'882'316, 559'157'115, 549'914'212, 541'115'017,
+};
+LIBC_INLINE_VAR constexpr int64_t ONE_OVER_SQRT2 = 0x16a09e60;
+
+LIBC_INLINE constexpr int floor_log2(uint64_t x) {
+  return 63 - cpp::countl_zero(x);
+}
+
+LIBC_INLINE constexpr int64_t initial_approximation(uint32_t x_mant) {
+  return RSQRT_APPROX[(x_mant - 0x0400) >> 6];
+}
+
+LIBC_INLINE constexpr int64_t newton_raphson(uint32_t m, int64_t y) {
+  // Refine y ~= 1/sqrt(m) with:
+  //   y_{n+1} = y_n * (1.5 - 0.5 * m * y_n^2)
+  // where both m and y are stored in Q29.
+  int64_t y2 = (y * y) >> RSQRT_FRACTION_BITS;
+  int64_t my2 = (static_cast<int64_t>(m) * y2) >> RSQRT_FRACTION_BITS;
+  int64_t factor = THREE_HALVES - (my2 >> 1);
+  return (y * factor) >> RSQRT_FRACTION_BITS;
+}
+
+LIBC_INLINE constexpr uint16_t fixed_to_half_bits(uint64_t y, int scale_exp) {
+  int y_log2 = floor_log2(y);
+  int out_exp = scale_exp + y_log2 - RSQRT_FRACTION_BITS;
+  int biased_exp = out_exp + 15;
+
+  uint32_t out_sig = y_log2 >= 10 ? static_cast<uint32_t>(y >> (y_log2 - 10))
+                                  : static_cast<uint32_t>(y << (10 - y_log2));
+
+  if (biased_exp <= 0)
+    return 0x0400;
+  if (biased_exp >= 31)
+    return 0x7bff;
+
+  return static_cast<uint16_t>((biased_exp << 10) | (out_sig & 0x3ff));
+}
+
+struct ApproxResult {
+  uint16_t value;
+  uint32_t x_sig;
+  int x_exp;
+};
+
+LIBC_INLINE constexpr ApproxResult approximate_rsqrt(uint16_t x_abs) {
+  uint32_t x_mant = x_abs & 0x03ff;
+  uint32_t x_sig = x_mant;
+  int x_exp = -24;
+  int exponent = 0;
+
+  // Decompose the finite positive input as:
+  //   x = m * 2^exponent, with 0.5 <= m < 1.
+  // `x_sig` and `x_exp` keep the exact input as x_sig * 2^x_exp for the integer
+  // rounding test below.
+  if (x_abs >= 0x0400) {
+    int biased_exp = static_cast<int>(x_abs >> 10);
----------------
lntue wrote:

Can we demystify these constants, either via comments or via `FPBits` constants?

https://github.com/llvm/llvm-project/pull/160639