[libc-commits] [libc] [libc][math] Implement an integer-only version of double precision sin and cos with 1 ULP errors. (PR #184752)

via libc-commits libc-commits at lists.llvm.org
Fri Mar 6 11:05:34 PST 2026


================
@@ -0,0 +1,274 @@
+//===-- Trig range reduction and evaluation using integer-only --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_SINCOS_INTEGER_UTILS_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_SINCOS_INTEGER_UTILS_H
+
+#include "src/__support/CPP/bit.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/big_int.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h"
+#include "src/__support/math_extras.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+namespace integer_only {
+
+struct Frac128 : public UInt<128> {
+  using UInt<128>::UInt;
+
+  constexpr Frac128 operator~() const {
+    Frac128 r;
+    r.val[0] = ~val[0];
+    r.val[1] = ~val[1];
+    return r;
+  }
+
+  constexpr Frac128 operator+(const Frac128 &other) const {
+    UInt<128> r = UInt<128>(*this) + (UInt<128>(other));
+    return Frac128(r.val);
+  }
+
+  constexpr Frac128 operator-(const Frac128 &other) const {
+    UInt<128> r = UInt<128>(*this) - (UInt<128>(other));
+    return Frac128(r.val);
+  }
+
+  constexpr Frac128 operator*(const Frac128 &other) const {
+    UInt<128> r = UInt<128>::quick_mul_hi(UInt<128>(other));
+    return Frac128(r.val);
+  }
+};
+
+// 1280 + 64 bits of 2/pi, printed using MPFR.
+// Notice that if we store from the highest bytes to lowest bytes, it is
+// essentially having 2/pi in big-endian.  On the other hand, uint64_t type
+// that will be used for computations later are in little-endian.  So a few
+// bit-reversal instructions will be introduced when we extract the parts
+// out.  It's possble to skip the bit-reversal instructions entirely by
+// having this table presented in little-endian, meaning from the lowest
+// bytes to highest bytes.  The tradeoff will be a bit more complicated and
+// awkward in the computations of the index, but it might be worth it?
+// We also add 8 more bytes to extend to all non-negative exponents.
+LIBC_INLINE_VAR constexpr unsigned TWO_OVER_PI_LENGTH = 1280 / 8 + 7;
+
+LIBC_INLINE_VAR constexpr uint8_t TWO_OVER_PI[TWO_OVER_PI_LENGTH] = {
+    0x1D, 0x36, 0xF4, 0x9A, 0x20, 0xBC, 0xCF, 0xF0, 0xAB, 0x6B, 0x7B, 0xFC,
+    0x46, 0x30, 0x03, 0x56, 0x08, 0x5D, 0x8D, 0x1F, 0xB1, 0x5F, 0xFB, 0x6B,
+    0xEA, 0x92, 0x52, 0x8A, 0xF7, 0x39, 0x07, 0x3D, 0x7B, 0xF1, 0xE5, 0xEB,
+    0xC7, 0xBA, 0x27, 0x75, 0x2D, 0xEA, 0x5F, 0x9E, 0x66, 0x3F, 0x46, 0x4F,
+    0xB7, 0x09, 0xCB, 0x27, 0xCF, 0x7E, 0x36, 0x6D, 0x1F, 0x6D, 0x0A, 0x5A,
+    0x8B, 0x11, 0x2F, 0xEF, 0x0F, 0x98, 0x05, 0xDE, 0xFF, 0x97, 0xF8, 0x1F,
+    0x3B, 0x28, 0xF9, 0xBD, 0x8B, 0x5F, 0x84, 0x9C, 0xF4, 0x39, 0x53, 0x83,
+    0x39, 0xD6, 0x91, 0x39, 0x41, 0x7E, 0x5F, 0xB4, 0x26, 0x70, 0x9C, 0xE9,
+    0x84, 0x44, 0xBB, 0x2E, 0xF5, 0x35, 0x82, 0xE8, 0x3E, 0xA7, 0x29, 0xB1,
+    0x1C, 0xEB, 0x1D, 0xFE, 0x1C, 0x92, 0xD1, 0x09, 0xEA, 0x2E, 0x49, 0x06,
+    0xE0, 0xD2, 0x4D, 0x42, 0x3A, 0x6E, 0x24, 0xB7, 0x61, 0xC5, 0xBB, 0xDE,
+    0xAB, 0x63, 0x51, 0xFE, 0x41, 0x90, 0x43, 0x3C, 0x99, 0x95, 0x62, 0xDB,
+    0xC0, 0xDD, 0x34, 0xF5, 0xD1, 0x57, 0x27, 0xFC, 0x29, 0x15, 0x44, 0x4E,
+    0x6E, 0x83, 0xF9, 0xA2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+LIBC_INLINE_VAR constexpr Frac128 PI_OVER_2_M1({0x898c'c517'01b8'39a2,
+                                                0x921f'b544'42d1'8469});
+
+// Perform range reduction mod pi/2
+//
+// Inputs:
+//   x_u: explicit mantissa
+//   x_e: biased exponent
+// Output:
+//   k     : round(x * 2/pi) mod 4
+//   x_frac: |x - k * pi/2|
+// Return:
+//   x_frac_is_neg.
+LIBC_INLINE constexpr bool trig_range_reduction(uint64_t x_u, unsigned x_e,
+                                                unsigned &k, Frac128 &x_frac) {
+  using FPBits = typename fputil::FPBits<double>;
+  bool x_frac_is_neg = false;
+  // We do multiplication x * (2/pi)
+  // Let T[i] be the i'th byte of 2/pi expansion:
+  // Then 2/pi = T[0] * 2^-8 + T[1] * 2^-16 + ...
+  //           = sum_i T[i] * 2^(-8(i + 1))
+  // To be able to drop all T[j] * 2^(-8(j + 1)) for small j < i, we will want
+  //   ulp(x) * lsb(T[i - 1] * 2^(-8 * i)) >= 4 = 2^2  (since 4 * pi/2 = 2*pi)
+  // So:
+  //   2^(e - 52) * 2^(-8 * i) >= 2^2
+  // Or equivalently,
+  //   e - 54 - 8*i >= 0.
+  // Define:
+  //   i = floor( (e - 54)/8 ),
+  // and let
+  //   s = e - 54 - 8i >= 0.
+  // Since we store the mantissa of x, which is 53 bits long in a 64 bit
+  // integer, we have some wiggle room to shuffle the lsb of x.
+  // By shifting mantissa of x_u to the left by s, the lsb of x_u will be:
+  //   2^(e - 52 - s), for which, the product of lsb's is now exactly 4
+  //   lsb(x_u) * 2^(-8 * i)) = 4.
+  // This will allow us to compute the full product:
+  //   x_u * (T[i] * 2^(-8(i + 1)) + ... ) in exact fixed point.
+  // From the formula of i, in order for i >= 0, e >= 54.  To support all the
+  // exponents e >= 0, we could add ceil(54 / 8) = 7 0x00 bytes and shift the
+  // index by 7.
+  unsigned e_num = x_e - FPBits::EXP_BIAS + 2; // e - 54 + 7*8
+  // With
+  //   i = floor( (e - 54) / 8 ),
+  // the shifted-by-7 index is:
+  //   j = i + 7 = floor( (e - 54) / 8 ) + 7
+  // Since the 64-bit integer chunk will be form by T[j] ... T[j + 7],
+  // and we store the table in the little-endian form, we will index to the
+  // lowest part of the 64-bit integer chunk, which is:
+  //   idx = the index of the T[j + 7] part.
+  unsigned j = e_num >> 3;
+  unsigned idx = (TWO_OVER_PI_LENGTH - 1 - j - 7);
+  unsigned shift = e_num & 7; // s = e - 54 - 8*i
+  x_u <<= shift;              // lsb(x_u) = 2^(e - 52 - s)
+  UInt<64> x_u64(x_u);
+  // Gather parts
+  auto get_uint64 = [](const uint8_t *ptr) -> uint64_t {
+    return ptr[0] | (uint64_t(ptr[1]) << 8) | (uint64_t(ptr[2]) << 16) |
+           (uint64_t(ptr[3]) << 24) | (uint64_t(ptr[4]) << 32) |
+           (uint64_t(ptr[5]) << 40) | (uint64_t(ptr[6]) << 48) |
+           (uint64_t(ptr[7]) << 56);
+  };
+  // lsb(c0) = 2^(-8i - 64)
+  uint64_t c0 = get_uint64(&TWO_OVER_PI[idx]);
+  // lsb(p0) = lsb(x_u) * lsb(c0)
+  //         = 2^(e - 52 - s) * 2^(-8i - 64)
+  //         = 2^(-62)
+  // msb(p0) = 2^(-62 + 63) = 2^1.
+  uint64_t p0 = x_u * c0;
+  // lsb(c1) = lsb(c0) * 2^-64 = 2^(-8i - 128)
+  UInt<64> c1(get_uint64(&TWO_OVER_PI[idx - 8]));
+  // lsb(c2) = lsb(c1) * 2^-64 = 2^(-8i - 192)
+  UInt<64> c2(get_uint64(&TWO_OVER_PI[idx - 16]));
+  // lsb(p1) = lsb(x_u) * lsb(c1) = 2^(-62 - 64) = 2^-126
+  UInt<128> p1 = x_u64.ful_mul(c1);
+  // lsb(p2) = lsb(x_u) * lsb(c2) * 2^64 = 2^-126
+  UInt<128> p2(x_u64.quick_mul_hi(c2));
+  UInt<128> sum = p1 + p2;
+  sum.val[1] += p0;
+  // Get the highest 2 bits.
+  k = static_cast<unsigned>(sum.val[1] >> 62);
+  bool round_bit = sum.val[1] & 0x2000'0000'0000'0000;
+  // Shift so that the leading bit is 0.5.
+  x_frac.val[0] = (sum.val[0] << 2);
+  x_frac.val[1] = (sum.val[1] << 2) | (sum.val[0] >> 62);
----------------
lntue wrote:

I've added shifting operators to Frac128 type and fixed this part.

https://github.com/llvm/llvm-project/pull/184752


More information about the libc-commits mailing list