[libc-commits] [libc] [libc][math] Implement double precision sin correctly rounded to all rounding modes. (PR #95736)

Fri Jun 21 08:44:00 PDT 2024

================
@@ -0,0 +1,173 @@
+//===-- Range reduction for double precision sin/cos/tan -*- C++ --------*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_COMMON_H
+#define LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_DOUBLE_COMMON_H
+
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/double_double.h"
+#include "src/__support/FPUtil/dyadic_float.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/common.h"
+#include "src/__support/integer_literals.h"
+
+namespace LIBC_NAMESPACE {
+
+namespace generic {
+
+using LIBC_NAMESPACE::fputil::DoubleDouble;
+using Float128 = LIBC_NAMESPACE::fputil::DyadicFloat<128>;
+
+LIBC_INLINE constexpr Float128 PI_OVER_128_F128 = {
+    Sign::POS, -133, 0xc90f'daa2'2168'c234'c4c6'628b'80dc'1cd1_u128};
+
+// Note: The look-up tables ONE_TWENTY_EIGHT_OVER_PI is selected to be either
+// from fma:: or nofma:: namespace.
+
+// For large range |x| >= 2^32, we use the exponent of x to find 3 double-chunks
+// of 128/pi c_hi, c_mid, c_lo such that:
+//   1) ulp(round(x * c_hi, D, RN)) >= 256,
+//   2) If x * c_hi = ph_hi + ph_lo and x * c_mid = pm_hi + pm_lo, then
+//        min(ulp(ph_lo), ulp(pm_hi)) >= 2^-53.
+//   3) ulp(round(x * c_lo, D, RN)) <= 2^-7x.
+// This will allow us to do quick computations as:
+//   (x * 256/pi) ~ x * (c_hi + c_mid + c_lo)    (mod 256)
+//                ~ ph_lo + pm_hi + pm_lo + (x * c_lo)
+// Then,
+//   round(x * 128/pi) = round(ph_lo + pm_hi)    (mod 256)
+// And the high part of fractional part of (x * 128/pi) can simply be:
+//   {x * 128/pi}_hi = {ph_lo + pm_hi}.
+// To prevent overflow when x is very large, we simply scale up
+// (c_hi, c_mid, c_lo) by a fixed power of 2 (based on the index) and scale down
+// x by the same amount.
+//
+
+template <bool NO_FMA>
+LIBC_INLINE unsigned range_reduction_large(double x, DoubleDouble &u) {
+
+  // Digits of pi/128, generated by Sollya with:
+  // > a = round(pi/128, D, RN);
+  // > b = round(pi/128 - a, D, RN);
+  constexpr DoubleDouble PI_OVER_128_DD = {0x1.1a62633145c07p-60,
+                                           0x1.921fb54442d18p-6};
----------------
nickdesaulniers wrote:

Move this down closer to where it's used (near L104).

https://github.com/llvm/llvm-project/pull/95736