[libc-commits] [libc] ca6b354 - [libc] Add range reduction functions based on Paine and Hanek algorithm.

Sun Aug 22 22:19:02 PDT 2021

Author: Siva Chandra Reddy
Date: 2021-08-23T05:18:41Z
New Revision: ca6b354229702ba9a36044f0941669ee82da8761

URL: https://github.com/llvm/llvm-project/commit/ca6b354229702ba9a36044f0941669ee82da8761
DIFF: https://github.com/llvm/llvm-project/commit/ca6b354229702ba9a36044f0941669ee82da8761.diff

LOG: [libc] Add range reduction functions based on Paine and Hanek algorithm.

These functions will be used in a future patch to implement
trigonometric functions. Unit tests have been added but to the
libc-long-running-tests suite. The unit tests long running because we
compare against MPFR computations performed at 1280 bits of precision.

Some cleanups or elimination of repeated patterns can be done as follow
up changes.

Differential Revision: https://reviews.llvm.org/D104817

Added: 
    libc/src/__support/FPUtil/UInt.h
    libc/src/__support/FPUtil/XFloat.h
    libc/src/math/generic/dp_trig.cpp
    libc/src/math/generic/dp_trig.h
    libc/test/src/math/mod_k_pi_test.cpp

Modified: 
    libc/src/__support/FPUtil/CMakeLists.txt
    libc/src/math/generic/CMakeLists.txt
    libc/test/src/math/CMakeLists.txt
    libc/utils/MPFRWrapper/MPFRUtils.cpp
    libc/utils/MPFRWrapper/MPFRUtils.h

Removed: 
    


################################################################################
diff  --git a/libc/src/__support/FPUtil/CMakeLists.txt b/libc/src/__support/FPUtil/CMakeLists.txt
index 510b6917df421..589b9b597f813 100644

--- a/libc/src/__support/FPUtil/CMakeLists.txt
+++ b/libc/src/__support/FPUtil/CMakeLists.txt
@@ -26,6 +26,8 @@ add_header_library(
     NormalFloat.h
     PlatformDefs.h
     PolyEval.h
+    UInt.h
+    XFloat.h
   DEPENDS
     libc.include.math
     libc.include.errno

diff  --git a/libc/src/__support/FPUtil/UInt.h b/libc/src/__support/FPUtil/UInt.h
new file mode 100644
index 0000000000000..9e3f02914eac1
--- /dev/null
+++ b/libc/src/__support/FPUtil/UInt.h
@@ -0,0 +1,236 @@
+//===-- A class to manipulate wide integers. --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_UTILS_FPUTIL_UINT_H
+#define LLVM_LIBC_UTILS_FPUTIL_UINT_H
+
+#include <stddef.h> // For size_t
+#include <stdint.h>
+
+namespace __llvm_libc {
+namespace fputil {
+
+template <size_t Bits> class UInt {
+
+  // This is mainly used for debugging.
+  enum Kind {
+    NotANumber,
+    Valid,
+  };
+
+  static_assert(Bits > 0 && Bits % 64 == 0,
+                "Number of bits in UInt should be a multiple of 64.");
+  static constexpr uint64_t Mask32 = 0xFFFFFFFF;
+  static constexpr size_t WordCount = Bits / 64;
+  static constexpr uint64_t InvalidHexDigit = 20;
+  uint64_t val[WordCount];
+  Kind kind;
+
+  uint64_t low(uint64_t v) { return v & Mask32; }
+
+  uint64_t high(uint64_t v) { return (v >> 32) & Mask32; }
+
+  uint64_t hexval(char c) {
+    uint64_t 
diff ;
+    if ((
diff  = uint64_t(c) - 'A') < 6)
+      return 
diff  + 10;
+    else if ((
diff  = uint64_t(c) - 'a') < 6)
+      return 
diff  + 10;
+    else if ((
diff  = uint64_t(c) - '0') < 10)
+      return 
diff ;
+    else
+      return InvalidHexDigit;
+  }
+
+  size_t strlen(const char *s) {
+    size_t len;
+    for (len = 0; *s != '\0'; ++s, ++len)
+      ;
+    return len;
+  }
+
+public:
+  UInt() { kind = Valid; }
+
+  UInt(const UInt<Bits> &other) : kind(other.kind) {
+    if (kind == Valid) {
+      for (size_t i = 0; i < WordCount; ++i)
+        val[i] = other.val[i];
+    }
+  }
+
+  // This constructor is used for debugging.
+  explicit UInt(const char *s) {
+    size_t len = strlen(s);
+    if (len > Bits / 4 + 2 || len < 3) {
+      kind = NotANumber;
+      return;
+    }
+
+    if (!(s[0] == '0' && s[1] == 'x')) {
+      kind = NotANumber;
+      return;
+    }
+
+    for (size_t i = 0; i < WordCount; ++i)
+      val[i] = 0;
+
+    for (size_t i = len - 1, w = 0; i >= 2; --i, w += 4) {
+      uint64_t hex = hexval(s[i]);
+      if (hex == InvalidHexDigit) {
+        kind = NotANumber;
+        return;
+      }
+      val[w / 64] |= (hex << (w % 64));
+    }
+
+    kind = Valid;
+  }
+
+  explicit UInt(uint64_t v) {
+    val[0] = v;
+    for (size_t i = 1; i < WordCount; ++i)
+      val[i] = 0;
+    kind = Valid;
+  }
+
+  explicit UInt(uint64_t data[WordCount]) {
+    for (size_t i = 0; i < WordCount; ++i)
+      val[i] = data[i];
+    kind = Valid;
+  }
+
+  bool is_valid() const { return kind == Valid; }
+
+  // Add x to this number and store the result in this number.
+  // Returns the carry value produced by the addition operation.
+  uint64_t add(const UInt<Bits> &x) {
+    uint64_t carry = 0;
+    for (size_t i = 0; i < WordCount; ++i) {
+      uint64_t res_lo = low(val[i]) + low(x.val[i]) + carry;
+      carry = high(res_lo);
+      res_lo = low(res_lo);
+
+      uint64_t res_hi = high(val[i]) + high(x.val[i]) + carry;
+      carry = high(res_hi);
+      res_hi = low(res_hi);
+
+      val[i] = res_lo + (res_hi << 32);
+    }
+    return carry;
+  }
+
+  // Multiply this number with x and store the result in this number. It is
+  // implemented using the long multiplication algorithm by splitting the
+  // 64-bit words of this number and |x| in to 32-bit halves but peforming
+  // the operations using 64-bit numbers. This ensures that we don't lose the
+  // carry bits.
+  // Returns the carry value produced by the multiplication operation.
+  uint64_t mul(uint64_t x) {
+    uint64_t x_lo = low(x);
+    uint64_t x_hi = high(x);
+
+    uint64_t row1[WordCount + 1];
+    uint64_t carry = 0;
+    for (size_t i = 0; i < WordCount; ++i) {
+      uint64_t l = low(val[i]);
+      uint64_t h = high(val[i]);
+      uint64_t p1 = x_lo * l;
+      uint64_t p2 = x_lo * h;
+
+      uint64_t res_lo = low(p1) + carry;
+      carry = high(res_lo);
+      uint64_t res_hi = high(p1) + low(p2) + carry;
+      carry = high(res_hi) + high(p2);
+
+      res_lo = low(res_lo);
+      res_hi = low(res_hi);
+      row1[i] = res_lo + (res_hi << 32);
+    }
+    row1[WordCount] = carry;
+
+    uint64_t row2[WordCount + 1];
+    row2[0] = 0;
+    carry = 0;
+    for (size_t i = 0; i < WordCount; ++i) {
+      uint64_t l = low(val[i]);
+      uint64_t h = high(val[i]);
+      uint64_t p1 = x_hi * l;
+      uint64_t p2 = x_hi * h;
+
+      uint64_t res_lo = low(p1) + carry;
+      carry = high(res_lo);
+      uint64_t res_hi = high(p1) + low(p2) + carry;
+      carry = high(res_hi) + high(p2);
+
+      res_lo = low(res_lo);
+      res_hi = low(res_hi);
+      row2[i] = res_lo + (res_hi << 32);
+    }
+    row2[WordCount] = carry;
+
+    UInt<(WordCount + 1) * 64> r1(row1), r2(row2);
+    r2.shift_left(32);
+    r1.add(r2);
+    for (size_t i = 0; i < WordCount; ++i) {
+      val[i] = r1[i];
+    }
+    return r1[WordCount];
+  }
+
+  void shift_left(size_t s) {
+    const size_t drop = s / 64;  // Number of words to drop
+    const size_t shift = s % 64; // Bits to shift in the remaining words.
+    const uint64_t mask = ((uint64_t(1) << shift) - 1) << (64 - shift);
+
+    for (size_t i = WordCount; drop > 0 && i > 0; --i) {
+      if (i - drop > 0)
+        val[i - 1] = val[i - drop - 1];
+      else
+        val[i - 1] = 0;
+    }
+    for (size_t i = WordCount; shift > 0 && i > drop; --i) {
+      uint64_t drop_val = (val[i - 1] & mask) >> (64 - shift);
+      val[i - 1] <<= shift;
+      if (i < WordCount)
+        val[i] |= drop_val;
+    }
+  }
+
+  void shift_right(size_t s) {
+    const size_t drop = s / 64;  // Number of words to drop
+    const size_t shift = s % 64; // Bit shift in the remaining words.
+    const uint64_t mask = (uint64_t(1) << shift) - 1;
+
+    for (size_t i = 0; drop > 0 && i < WordCount; ++i) {
+      if (i + drop < WordCount)
+        val[i] = val[i + drop];
+      else
+        val[i] = 0;
+    }
+    for (size_t i = 0; shift > 0 && i < WordCount; ++i) {
+      uint64_t drop_val = ((val[i] & mask) << (64 - shift));
+      val[i] >>= shift;
+      if (i > 0)
+        val[i - 1] |= drop_val;
+    }
+  }
+
+  const uint64_t &operator[](size_t i) const { return val[i]; }
+
+  uint64_t &operator[](size_t i) { return val[i]; }
+
+  uint64_t *data() { return val; }
+
+  const uint64_t *data() const { return val; }
+};
+
+} // namespace fputil
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_UTILS_FPUTIL_UINT_H

diff  --git a/libc/src/__support/FPUtil/XFloat.h b/libc/src/__support/FPUtil/XFloat.h
new file mode 100644
index 0000000000000..11dbf7df14758
--- /dev/null
+++ b/libc/src/__support/FPUtil/XFloat.h
@@ -0,0 +1,180 @@
+//===-- Utility class to manipulate wide floats. ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FPBits.h"
+#include "NormalFloat.h"
+#include "UInt.h"
+
+#include <stdint.h>
+
+namespace __llvm_libc {
+namespace fputil {
+
+// Store and manipulate positive double precision numbers at |Precision| bits.
+template <size_t Precision> class XFloat {
+  static constexpr uint64_t OneMask = (uint64_t(1) << 63);
+  UInt<Precision> man;
+  static constexpr uint64_t WordCount = Precision / 64;
+  int exp;
+
+  size_t bit_width(uint64_t x) {
+    if (x == 0)
+      return 0;
+    size_t shift = 0;
+    while ((OneMask & x) == 0) {
+      ++shift;
+      x <<= 1;
+    }
+    return 64 - shift;
+  }
+
+public:
+  XFloat() : exp(0) {
+    for (int i = 0; i < WordCount; ++i)
+      man[i] = 0;
+  }
+
+  XFloat(const XFloat &other) : exp(other.exp) {
+    for (int i = 0; i < WordCount; ++i)
+      man[i] = other.man[i];
+  }
+
+  explicit XFloat(double x) {
+    auto xn = NormalFloat<double>(x);
+    exp = xn.exponent;
+    man[WordCount - 1] = xn.mantissa << 11;
+    for (int i = 0; i < WordCount - 1; ++i)
+      man[i] = 0;
+  }
+
+  XFloat(int e, const UInt<Precision> &bits) : exp(e) {
+    for (size_t i = 0; i < WordCount; ++i)
+      man[i] = bits[i];
+  }
+
+  // Multiply this number with x and store the result in this number.
+  void mul(double x) {
+    auto xn = NormalFloat<double>(x);
+    exp += xn.exponent;
+    uint64_t carry = man.mul(xn.mantissa << 11);
+    size_t carry_width = bit_width(carry);
+    carry_width = (carry_width == 64 ? 64 : 63);
+    man.shift_right(carry_width);
+    man[WordCount - 1] = man[WordCount - 1] + (carry << (64 - carry_width));
+    exp += carry_width == 64 ? 1 : 0;
+    normalize();
+  }
+
+  void drop_int() {
+    if (exp < 0)
+      return;
+    if (exp > int(Precision - 1)) {
+      for (size_t i = 0; i < WordCount; ++i)
+        man[i] = 0;
+      return;
+    }
+
+    man.shift_left(exp + 1);
+    man.shift_right(exp + 1);
+
+    normalize();
+  }
+
+  double mul(const XFloat<Precision> &other) {
+    constexpr size_t row_words = 2 * WordCount + 1;
+    constexpr size_t row_precision = row_words * 64;
+    constexpr size_t result_bits = 2 * Precision;
+    UInt<row_precision> rows[WordCount];
+
+    for (size_t r = 0; r < WordCount; ++r) {
+      for (size_t i = 0; i < row_words; ++i) {
+        if (i < WordCount)
+          rows[r][i] = man[i];
+        else
+          rows[r][i] = 0;
+      }
+      rows[r].mul(other.man[r]);
+      rows[r].shift_left(r * 64);
+    }
+
+    for (size_t r = 1; r < WordCount; ++r) {
+      rows[0].add(rows[r]);
+    }
+    int result_exp = exp + other.exp;
+    uint64_t carry = rows[0][row_words - 1];
+    if (carry) {
+      size_t carry_width = bit_width(carry);
+      rows[0].shift_right(carry_width);
+      rows[0][row_words - 2] =
+          rows[0][row_words - 2] + (carry << (64 - carry_width));
+      result_exp += carry_width;
+    }
+
+    if (rows[0][row_words - 2] & OneMask) {
+      ++result_exp;
+    } else {
+      rows[0].shift_left(1);
+    }
+
+    UInt<result_bits> result_man;
+    for (size_t i = 0; i < result_bits / 64; ++i)
+      result_man[i] = rows[0][i];
+    XFloat<result_bits> result(result_exp, result_man);
+    result.normalize();
+    return double(result);
+  }
+
+  explicit operator double() {
+    normalize();
+
+    constexpr uint64_t one = uint64_t(1) << 10;
+    constexpr uint64_t excess_mask = (one << 1) - 1;
+    uint64_t excess = man[WordCount - 1] & excess_mask;
+    uint64_t new_man = man[WordCount - 1] >> 11;
+    if (excess > one) {
+      // We have to round up.
+      ++new_man;
+    } else if (excess == one) {
+      bool greater_than_one = false;
+      for (size_t i = 0; i < WordCount - 1; ++i) {
+        greater_than_one = (man[i] != 0);
+        if (greater_than_one)
+          break;
+      }
+      if (greater_than_one || (new_man & 1) != 0) {
+        ++new_man;
+      }
+    }
+
+    if (new_man == (uint64_t(1) << 53))
+      ++exp;
+
+    // We use NormalFloat as it can produce subnormal numbers or underflow to 0
+    // if necessary.
+    NormalFloat<double> d(exp, new_man, 0);
+    return double(d);
+  }
+
+  // Normalizes this number.
+  void normalize() {
+    uint64_t man_bits = 0;
+    for (size_t i = 0; i < WordCount; ++i)
+      man_bits |= man[i];
+
+    if (man_bits == 0)
+      return;
+
+    while ((man[WordCount - 1] & OneMask) == 0) {
+      man.shift_left(1);
+      --exp;
+    }
+  }
+};
+
+} // namespace fputil
+} // namespace __llvm_libc

diff  --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index ff5e814652eeb..6728e44e635fb 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -992,3 +992,15 @@ add_entrypoint_object(
   COMPILE_OPTIONS
     -O2
 )
+
+add_object_library(
+  dp_trig
+  SRCS
+    dp_trig.cpp
+  HDRS
+    dp_trig.h
+  DEPENDS
+    libc.src.__support.FPUtil.fputil
+  COMPILE_OPTIONS
+   -O3
+)

diff  --git a/libc/src/math/generic/dp_trig.cpp b/libc/src/math/generic/dp_trig.cpp
new file mode 100644
index 0000000000000..77077925e4510
--- /dev/null
+++ b/libc/src/math/generic/dp_trig.cpp
@@ -0,0 +1,105 @@
+//===-- Utilities for double precision trigonometric functions ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/ManipulationFunctions.h"
+#include "src/__support/FPUtil/UInt.h"
+#include "src/__support/FPUtil/XFloat.h"
+
+using FPBits = __llvm_libc::fputil::FPBits<double>;
+
+namespace __llvm_libc {
+
+// Implementation is based on the Payne and Hanek range reduction algorithm.
+// The caller should ensure that x is positive.
+// Consider:
+//   x/y = x * 1/y = I + F
+// I is the integral part and F the fractional part of the result of the
+// division operation. Then M = mod(x, y) = F * y. In order to compute M, we
+// first compute F. We do it by dropping bits from 1/y which would only
+// contribute integral results in the operation x * 1/y. This helps us get
+// accurate values of F even when x is a very large number.
+//
+// Internal operations are performed at 192 bits of precision.
+static double mod_impl(double x, const uint64_t y_bits[3],
+                       const uint64_t inv_y_bits[20], int y_exponent,
+                       int inv_y_exponent) {
+  FPBits bits(x);
+  int exponent = bits.getExponent();
+  int bit_drop = (exponent - 52) + inv_y_exponent + 1;
+  bit_drop = bit_drop >= 0 ? bit_drop : 0;
+  int word_drop = bit_drop / 64;
+  bit_drop %= 64;
+  fputil::UInt<256> man4;
+  for (size_t i = 0; i < 4; ++i) {
+    man4[3 - i] = inv_y_bits[word_drop + i];
+  }
+  man4.shift_left(bit_drop);
+  fputil::UInt<192> man_bits;
+  for (size_t i = 0; i < 3; ++i)
+    man_bits[i] = man4[i + 1];
+  fputil::XFloat<192> result(inv_y_exponent - word_drop * 64 - bit_drop,
+                             man_bits);
+  result.mul(x);
+  result.drop_int(); // |result| now holds fractional part of x/y.
+
+  fputil::UInt<192> y_man;
+  for (size_t i = 0; i < 3; ++i)
+    y_man[i] = y_bits[2 - i];
+  fputil::XFloat<192> y_192(y_exponent, y_man);
+  return result.mul(y_192);
+}
+
+static constexpr int TwoPIExponent = 2;
+
+// The mantissa bits of 2 * PI.
+// The most signification bits are in the first uint64_t word
+// and the least signification bits are in the last word. The
+// first word includes the implicit '1' bit.
+static constexpr uint64_t TwoPI[] = {0xc90fdaa22168c234, 0xc4c6628b80dc1cd1,
+                                     0x29024e088a67cc74};
+
+static constexpr int InvTwoPIExponent = -3;
+
+// The mantissa bits of 1/(2 * PI).
+// The most signification bits are in the first uint64_t word
+// and the least signification bits are in the last word. The
+// first word includes the implicit '1' bit.
+static constexpr uint64_t InvTwoPI[] = {
+    0xa2f9836e4e441529, 0xfc2757d1f534ddc0, 0xdb6295993c439041,
+    0xfe5163abdebbc561, 0xb7246e3a424dd2e0, 0x6492eea09d1921c,
+    0xfe1deb1cb129a73e, 0xe88235f52ebb4484, 0xe99c7026b45f7e41,
+    0x3991d639835339f4, 0x9c845f8bbdf9283b, 0x1ff897ffde05980f,
+    0xef2f118b5a0a6d1f, 0x6d367ecf27cb09b7, 0x4f463f669e5fea2d,
+    0x7527bac7ebe5f17b, 0x3d0739f78a5292ea, 0x6bfb5fb11f8d5d08,
+    0x56033046fc7b6bab, 0xf0cfbc209af4361e};
+
+double mod_2pi(double x) {
+  static constexpr double _2pi = 6.283185307179586;
+  if (x < _2pi)
+    return x;
+  return mod_impl(x, TwoPI, InvTwoPI, TwoPIExponent, InvTwoPIExponent);
+}
+
+// Returns mod(x, pi/2)
+double mod_pi_over_2(double x) {
+  static constexpr double pi_over_2 = 1.5707963267948966;
+  if (x < pi_over_2)
+    return x;
+  return mod_impl(x, TwoPI, InvTwoPI, TwoPIExponent - 2, InvTwoPIExponent + 2);
+}
+
+// Returns mod(x, pi/4)
+double mod_pi_over_4(double x) {
+  static constexpr double pi_over_4 = 0.7853981633974483;
+  if (x < pi_over_4)
+    return x;
+  return mod_impl(x, TwoPI, InvTwoPI, TwoPIExponent - 3, InvTwoPIExponent + 3);
+}
+
+} // namespace __llvm_libc

diff  --git a/libc/src/math/generic/dp_trig.h b/libc/src/math/generic/dp_trig.h
new file mode 100644
index 0000000000000..a724da04c1717
--- /dev/null
+++ b/libc/src/math/generic/dp_trig.h
@@ -0,0 +1,22 @@
+//===-- Utilities for double precision trigonometric functions --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_GENERIC_DP_TRIG_H
+#define LLVM_LIBC_SRC_MATH_GENERIC_DP_TRIG_H
+
+namespace __llvm_libc {
+
+double mod_2pi(double);
+
+double mod_pi_over_2(double);
+
+double mod_pi_over_4(double);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_MATH_GENERIC_DP_TRIG_H

diff  --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
index a0d06688a4e74..5a7a145dfd2c1 100644
--- a/libc/test/src/math/CMakeLists.txt
+++ b/libc/test/src/math/CMakeLists.txt
@@ -1169,6 +1169,18 @@ add_fp_unittest(
     libc.src.__support.FPUtil.fputil
 )
 
+add_fp_unittest(
+  mod_k_pi_test
+  NEED_MPFR
+  SUITE
+    libc-long-running-tests
+  SRCS
+    mod_k_pi_test.cpp
+  DEPENDS
+    libc.src.math.generic.dp_trig
+    libc.src.__support.FPUtil.fputil
+)
+
 add_subdirectory(generic)
 add_subdirectory(exhaustive)
 add_subdirectory(
diff erential_testing)

diff  --git a/libc/test/src/math/mod_k_pi_test.cpp b/libc/test/src/math/mod_k_pi_test.cpp
new file mode 100644
index 0000000000000..008e93da9519b
--- /dev/null
+++ b/libc/test/src/math/mod_k_pi_test.cpp
@@ -0,0 +1,56 @@
+//===-- Unittests mod_2pi, mod_pi_over_4 and mod_pi_over_2 ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/FPUtil/TestHelpers.h"
+#include "src/math/generic/dp_trig.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+#include "utils/UnitTest/Test.h"
+
+#include <math.h>
+
+namespace mpfr = __llvm_libc::testing::mpfr;
+using FPBits = __llvm_libc::fputil::FPBits<double>;
+using UIntType = FPBits::UIntType;
+
+TEST(LlvmLibcMod2PITest, Range) {
+  constexpr UIntType count = 1000000000;
+  constexpr UIntType step = UIntType(-1) / count;
+  for (UIntType i = 0, v = 0; i <= count; ++i, v += step) {
+    double x = double(FPBits(v));
+    if (isnan(x) || isinf(x) || x <= 0.0)
+      continue;
+
+    ASSERT_MPFR_MATCH(mpfr::Operation::Mod2PI, x, __llvm_libc::mod_2pi(x), 0);
+  }
+}
+
+TEST(LlvmLibcModPIOver2Test, Range) {
+  constexpr UIntType count = 1000000000;
+  constexpr UIntType step = UIntType(-1) / count;
+  for (UIntType i = 0, v = 0; i <= count; ++i, v += step) {
+    double x = double(FPBits(v));
+    if (isnan(x) || isinf(x) || x <= 0.0)
+      continue;
+
+    ASSERT_MPFR_MATCH(mpfr::Operation::ModPIOver2, x,
+                      __llvm_libc::mod_pi_over_2(x), 0);
+  }
+}
+
+TEST(LlvmLibcModPIOver4Test, Range) {
+  constexpr UIntType count = 1000000000;
+  constexpr UIntType step = UIntType(-1) / count;
+  for (UIntType i = 0, v = 0; i <= count; ++i, v += step) {
+    double x = double(FPBits(v));
+    if (isnan(x) || isinf(x) || x <= 0.0)
+      continue;
+
+    ASSERT_MPFR_MATCH(mpfr::Operation::ModPIOver4, x,
+                      __llvm_libc::mod_pi_over_4(x), 0);
+  }
+}

diff  --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp
index d0c20987cb173..86305af009173 100644
--- a/libc/utils/MPFRWrapper/MPFRUtils.cpp
+++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp
@@ -62,7 +62,7 @@ class MPFRNumber {
   mpfr_t value;
 
 public:
-  MPFRNumber() : mpfrPrecision(128) { mpfr_init2(value, mpfrPrecision); }
+  MPFRNumber() : mpfrPrecision(256) { mpfr_init2(value, mpfrPrecision); }
 
   // We use explicit EnableIf specializations to disallow implicit
   // conversions. Implicit conversions can potentially lead to loss of
@@ -202,6 +202,33 @@ class MPFRNumber {
     return result;
   }
 
+  MPFRNumber mod_2pi() const {
+    MPFRNumber result(0.0, 1280);
+    MPFRNumber _2pi(0.0, 1280);
+    mpfr_const_pi(_2pi.value, MPFR_RNDN);
+    mpfr_mul_si(_2pi.value, _2pi.value, 2, MPFR_RNDN);
+    mpfr_fmod(result.value, value, _2pi.value, MPFR_RNDN);
+    return result;
+  }
+
+  MPFRNumber mod_pi_over_2() const {
+    MPFRNumber result(0.0, 1280);
+    MPFRNumber pi_over_2(0.0, 1280);
+    mpfr_const_pi(pi_over_2.value, MPFR_RNDN);
+    mpfr_mul_d(pi_over_2.value, pi_over_2.value, 0.5, MPFR_RNDN);
+    mpfr_fmod(result.value, value, pi_over_2.value, MPFR_RNDN);
+    return result;
+  }
+
+  MPFRNumber mod_pi_over_4() const {
+    MPFRNumber result(0.0, 1280);
+    MPFRNumber pi_over_4(0.0, 1280);
+    mpfr_const_pi(pi_over_4.value, MPFR_RNDN);
+    mpfr_mul_d(pi_over_4.value, pi_over_4.value, 0.25, MPFR_RNDN);
+    mpfr_fmod(result.value, value, pi_over_4.value, MPFR_RNDN);
+    return result;
+  }
+
   MPFRNumber sin() const {
     MPFRNumber result;
     mpfr_sin(result.value, value, MPFR_RNDN);
@@ -281,6 +308,9 @@ class MPFRNumber {
   template <typename T>
   cpp::EnableIfType<cpp::IsFloatingPointType<T>::Value, double> ulp(T input) {
     T thisAsT = as<T>();
+    if (thisAsT == input)
+      return T(0.0);
+
     int thisExponent = fputil::FPBits<T>(thisAsT).getExponent();
     int inputExponent = fputil::FPBits<T>(input).getExponent();
     if (thisAsT * input < 0 || thisExponent == inputExponent) {
@@ -339,6 +369,12 @@ unaryOperation(Operation op, InputType input) {
     return mpfrInput.expm1();
   case Operation::Floor:
     return mpfrInput.floor();
+  case Operation::Mod2PI:
+    return mpfrInput.mod_2pi();
+  case Operation::ModPIOver2:
+    return mpfrInput.mod_pi_over_2();
+  case Operation::ModPIOver4:
+    return mpfrInput.mod_pi_over_4();
   case Operation::Round:
     return mpfrInput.round();
   case Operation::Sin:

diff  --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h
index ee98c8f7ecc2d..4e8dd385d4f9b 100644
--- a/libc/utils/MPFRWrapper/MPFRUtils.h
+++ b/libc/utils/MPFRWrapper/MPFRUtils.h
@@ -30,6 +30,9 @@ enum class Operation : int {
   Exp2,
   Expm1,
   Floor,
+  Mod2PI,
+  ModPIOver2,
+  ModPIOver4,
   Round,
   Sin,
   Sqrt,