[libc-commits] [libc] [libc][math] Implemented sinpif function (PR #97149)

Sat Jun 29 00:47:16 PDT 2024

https://github.com/HendrikHuebner updated https://github.com/llvm/llvm-project/pull/97149

>From 4d7a5143fcb970c3cc8b6d5ece7c5118e4a725c3 Mon Sep 17 00:00:00 2001
From: hhuebner <hendrik.huebner18 at gmail.com>
Date: Tue, 11 Jun 2024 11:28:39 +0200
Subject: [PATCH 1/8] Added sinpi declarations and test

---
 libc/config/linux/x86_64/entrypoints.txt |   1 +
 libc/docs/math/index.rst                 |   2 +-
 libc/src/math/generic/CMakeLists.txt     |  22 ++++
 libc/src/math/generic/sinpif.cpp         | 160 +++++++++++++++++++++++
 libc/src/math/sinpif.h                   |  18 +++
 libc/test/src/math/CMakeLists.txt        |  16 +++
 libc/test/src/math/sinpif_test.cpp       | 127 ++++++++++++++++++
 libc/utils/MPFRWrapper/MPFRUtils.cpp     |   2 +
 libc/utils/MPFRWrapper/MPFRUtils.h       |   1 +
 9 files changed, 348 insertions(+), 1 deletion(-)
 create mode 100644 libc/src/math/generic/sinpif.cpp
 create mode 100644 libc/src/math/sinpif.h
 create mode 100644 libc/test/src/math/sinpif_test.cpp

diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 9581f7f2604c4..9a7ad32b66470 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -514,6 +514,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.sincosf
     libc.src.math.sinf
     libc.src.math.sinhf
+    libc.src.math.sinpif
     libc.src.math.sqrt
     libc.src.math.sqrtf
     libc.src.math.sqrtl
diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst
index 56cc8d658257d..9adc93b5e104d 100644
--- a/libc/docs/math/index.rst
+++ b/libc/docs/math/index.rst
@@ -330,7 +330,7 @@ Higher Math Functions
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | sinh      | |check|          |                 |                        |                      |                        | 7.12.5.5               | F.10.2.5                   |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| sinpi     |                  |                 |                        |                      |                        | 7.12.4.13              | F.10.1.13                  |
+| sinpi     | |check|          |                 |                        |                      |                        | 7.12.4.13              | F.10.1.13                  |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | sqrt      | |check|          | |check|         | |check|                |                      | |check|                | 7.12.7.10              | F.10.4.10                  |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 3773a2b49c416..de9701dec1594 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -283,6 +283,28 @@ add_entrypoint_object(
     -O3
 )
 
+add_entrypoint_object(
+  sinpif
+  SRCS
+    sinpif.cpp
+  HDRS
+    ../sinpif.h
+  DEPENDS
+    .range_reduction
+    .sincosf_utils
+    libc.src.errno.errno
+    libc.src.__support.FPUtil.basic_operations
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.fma
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.polyeval
+    libc.src.__support.FPUtil.rounding_mode
+    libc.src.__support.macros.optimization
+  COMPILE_OPTIONS
+    -O3
+)
+
 add_entrypoint_object(
   sincosf
   SRCS
diff --git a/libc/src/math/generic/sinpif.cpp b/libc/src/math/generic/sinpif.cpp
new file mode 100644
index 0000000000000..50c0494bb2233
--- /dev/null
+++ b/libc/src/math/generic/sinpif.cpp
@@ -0,0 +1,160 @@
+//===-- Single-precision sinpi function -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/sinpif.h"
+#include "sincosf_utils.h"
+#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/optimization.h"            // LIBC_UNLIKELY
+#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
+
+#include <errno.h>
+
+#if defined(LIBC_TARGET_CPU_HAS_FMA)
+#include "range_reduction_fma.h"
+#else
+#include "range_reduction.h"
+#endif
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float, sinpif, (float x)) {
+  using FPBits = typename fputil::FPBits<float>;
+  FPBits xbits(x);
+
+  uint32_t x_u = xbits.uintval();
+  uint32_t x_abs = x_u & 0x7fff'ffffU;
+  double xd = static_cast<double>(x);
+
+  // Range reduction:
+  // For |x| > pi/32, we perform range reduction as follows:
+  // Find k and y such that:
+  //   x = (k + y) * pi/32
+  //   k is an integer
+  //   |y| < 0.5
+  // For small range (|x| < 2^45 when FMA instructions are available, 2^22
+  // otherwise), this is done by performing:
+  //   k = round(x * 32/pi)
+  //   y = x * 32/pi - k
+  // For large range, we will omit all the higher parts of 32/pi such that the
+  // least significant bits of their full products with x are larger than 63,
+  // since sin((k + y + 64*i) * pi/32) = sin(x + i * 2pi) = sin(x).
+  //
+  // When FMA instructions are not available, we store the digits of 32/pi in
+  // chunks of 28-bit precision.  This will make sure that the products:
+  //   x * THIRTYTWO_OVER_PI_28[i] are all exact.
+  // When FMA instructions are available, we simply store the digits of 32/pi in
+  // chunks of doubles (53-bit of precision).
+  // So when multiplying by the largest values of single precision, the
+  // resulting output should be correct up to 2^(-208 + 128) ~ 2^-80.  By the
+  // worst-case analysis of range reduction, |y| >= 2^-38, so this should give
+  // us more than 40 bits of accuracy. For the worst-case estimation of range
+  // reduction, see for instances:
+  //   Elementary Functions by J-M. Muller, Chapter 11,
+  //   Handbook of Floating-Point Arithmetic by J-M. Muller et. al.,
+  //   Chapter 10.2.
+  //
+  // Once k and y are computed, we then deduce the answer by the sine of sum
+  // formula:
+  //   sin(x) = sin((k + y)*pi/32)
+  //          = sin(y*pi/32) * cos(k*pi/32) + cos(y*pi/32) * sin(k*pi/32)
+  // The values of sin(k*pi/32) and cos(k*pi/32) for k = 0..31 are precomputed
+  // and stored using a vector of 32 doubles. Sin(y*pi/32) and cos(y*pi/32) are
+  // computed using degree-7 and degree-6 minimax polynomials generated by
+  // Sollya respectively.
+
+  // |x| <= 1/16
+  if (LIBC_UNLIKELY(x_abs <= 0x3d80'0000U)) {
+
+    // |x| < 0x1.d12ed2p-12f
+    if (LIBC_UNLIKELY(x_abs < 0x39e8'9769U)) {
+      if (LIBC_UNLIKELY(x_abs == 0U)) {
+        // For signed zeros.
+        return x;
+      }
+      // When |x| < 2^-12, the relative error of the approximation sin(x) ~ x
+      // is:
+      //   |sin(x) - x| / |sin(x)| < |x^3| / (6|x|)
+      //                           = x^2 / 6
+      //                           < 2^-25
+      //                           < epsilon(1)/2.
+      // So the correctly rounded values of sin(x) are:
+      //   = x - sign(x)*eps(x) if rounding mode = FE_TOWARDZERO,
+      //                        or (rounding mode = FE_UPWARD and x is
+      //                        negative),
+      //   = x otherwise.
+      // To simplify the rounding decision and make it more efficient, we use
+      //   fma(x, -2^-25, x) instead.
+      // An exhaustive test shows that this formula work correctly for all
+      // rounding modes up to |x| < 0x1.c555dep-11f.
+      // Note: to use the formula x - 2^-25*x to decide the correct rounding, we
+      // do need fma(x, -2^-25, x) to prevent underflow caused by -2^-25*x when
+      // |x| < 2^-125. For targets without FMA instructions, we simply use
+      // double for intermediate results as it is more efficient than using an
+      // emulated version of FMA.
+#if defined(LIBC_TARGET_CPU_HAS_FMA)
+      return fputil::multiply_add(x, -0x1.0p-25f, x);
+#else
+      return static_cast<float>(fputil::multiply_add(xd, -0x1.0p-25, xd));
+#endif // LIBC_TARGET_CPU_HAS_FMAx
+    }
+
+    // |x| < pi/16.
+    double xsq = xd * xd;
+
+    // Degree-9 polynomial approximation:
+    //   sin(x) ~ x + a_3 x^3 + a_5 x^5 + a_7 x^7 + a_9 x^9
+    //          = x (1 + a_3 x^2 + ... + a_9 x^8)
+    //          = x * P(x^2)
+    // generated by Sollya with the following commands:
+    // > display = hexadecimal;
+    // > Q = fpminimax(sin(x)/x, [|0, 2, 4, 6, 8|], [|1, D...|], [0, pi/16]);
+    double result =
+        fputil::polyeval(xsq, 1.0, -0x1.55555555554c6p-3, 0x1.1111111085e65p-7,
+                         -0x1.a019f70fb4d4fp-13, 0x1.718d179815e74p-19);
+    return static_cast<float>(xd * result);
+  }
+
+  if (LIBC_UNLIKELY(x_abs == 0x4619'9998U)) { // x = 0x1.33333p13
+    float r = -0x1.63f4bap-2f;
+    int rounding = fputil::quick_get_round();
+    if ((rounding == FE_DOWNWARD && xbits.is_pos()) ||
+        (rounding == FE_UPWARD && xbits.is_neg()))
+      r = -0x1.63f4bcp-2f;
+
+    return xbits.is_neg() ? -r : r;
+  }
+
+  if (LIBC_UNLIKELY(x_abs >= 0x7f80'0000U)) {
+    if (x_abs == 0x7f80'0000U) {
+      fputil::set_errno_if_required(EDOM);
+      fputil::raise_except_if_required(FE_INVALID);
+    }
+    return x + FPBits::quiet_nan().get_val();
+  }
+
+  // Combine the results with the sine of sum formula:
+  //   sin(x) = sin((k + y)*pi/32)
+  //          = sin(y*pi/32) * cos(k*pi/32) + cos(y*pi/32) * sin(k*pi/32)
+  //          = sin_y * cos_k + (1 + cosm1_y) * sin_k
+  //          = sin_y * cos_k + (cosm1_y * sin_k + sin_k)
+  double sin_k, cos_k, sin_y, cosm1_y;
+
+  sincosf_eval(xd, x_abs, sin_k, cos_k, sin_y, cosm1_y);
+
+  return static_cast<float>(fputil::multiply_add(
+      sin_y, cos_k, fputil::multiply_add(cosm1_y, sin_k, sin_k)));
+}
+
+} // namespace LIBC_NAMESPACE
+
diff --git a/libc/src/math/sinpif.h b/libc/src/math/sinpif.h
new file mode 100644
index 0000000000000..a5bef435efae6
--- /dev/null
+++ b/libc/src/math/sinpif.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for sinpif --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_SINPIF_H
+#define LLVM_LIBC_SRC_MATH_SINPIF_H
+
+namespace LIBC_NAMESPACE {
+
+float sinpif(float x);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_SINPIF_H
diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
index 36d2a2fbfd302..6e9ea9bd0466b 100644
--- a/libc/test/src/math/CMakeLists.txt
+++ b/libc/test/src/math/CMakeLists.txt
@@ -44,6 +44,22 @@ add_fp_unittest(
     libc.src.__support.FPUtil.fp_bits
 )
 
+add_fp_unittest(
+  sinpif_test
+  NEED_MPFR
+  SUITE
+    libc-math-unittests
+  SRCS
+    sinpif_test.cpp
+  HDRS
+    sdcomp26094.h
+  DEPENDS
+    libc.src.errno.errno
+    libc.src.math.sinpif
+    libc.src.__support.CPP.array
+    libc.src.__support.FPUtil.fp_bits
+)
+
 add_fp_unittest(
   sin_test
   NEED_MPFR
diff --git a/libc/test/src/math/sinpif_test.cpp b/libc/test/src/math/sinpif_test.cpp
new file mode 100644
index 0000000000000..9c2b1210ea3bc
--- /dev/null
+++ b/libc/test/src/math/sinpif_test.cpp
@@ -0,0 +1,127 @@
+//===-- Unittests for sinpif ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/math_macros.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/errno/libc_errno.h"
+#include "src/math/sinpif.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
+#include "test/src/math/sdcomp26094.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+
+#include <errno.h>
+#include <stdint.h>
+
+using LlvmLibcSinpifTest = LIBC_NAMESPACE::testing::FPTest<float>;
+
+using LIBC_NAMESPACE::testing::SDCOMP26094_VALUES;
+
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+
+TEST_F(LlvmLibcSinpifTest, SpecialNumbers) {
+  LIBC_NAMESPACE::libc_errno = 0;
+
+  EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::sinpif(aNaN));
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinpif(0.0f));
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ(-0.0f, LIBC_NAMESPACE::sinpif(-0.0f));
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::sinpif(inf));
+  EXPECT_MATH_ERRNO(EDOM);
+
+  EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::sinpif(neg_inf));
+  EXPECT_MATH_ERRNO(EDOM);
+}
+
+TEST_F(LlvmLibcSinpifTest, InFloatRange) {
+  constexpr uint32_t COUNT = 100'000;
+  constexpr uint32_t STEP = UINT32_MAX / COUNT;
+  for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
+    float x = FPBits(v).get_val();
+    if (isnan(x) || isinf(x))
+      continue;
+    ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sinpi, x,
+                                   LIBC_NAMESPACE::sinpif(x), 0.5);
+  }
+}
+
+TEST_F(LlvmLibcSinpifTest, SpecificBitPatterns) {
+  constexpr int N = 36;
+  constexpr uint32_t INPUTS[N] = {
+      0x3f06'0a92U, // x = pi/6
+      0x3f3a'dc51U, // x = 0x1.75b8a2p-1f
+      0x3f49'0fdbU, // x = pi/4
+      0x3f86'0a92U, // x = pi/3
+      0x3fa7'832aU, // x = 0x1.4f0654p+0f
+      0x3fc9'0fdbU, // x = pi/2
+      0x4017'1973U, // x = 0x1.2e32e6p+1f
+      0x4049'0fdbU, // x = pi
+      0x4096'cbe4U, // x = 0x1.2d97c8p+2f
+      0x40c9'0fdbU, // x = 2*pi
+      0x433b'7490U, // x = 0x1.76e92p+7f
+      0x437c'e5f1U, // x = 0x1.f9cbe2p+7f
+      0x4619'9998U, // x = 0x1.33333p+13f
+      0x474d'246fU, // x = 0x1.9a48dep+15f
+      0x4afd'ece4U, // x = 0x1.fbd9c8p+22f
+      0x4c23'32e9U, // x = 0x1.4665d2p+25f
+      0x50a3'e87fU, // x = 0x1.47d0fep+34f
+      0x5239'47f6U, // x = 0x1.728fecp+37f
+      0x53b1'46a6U, // x = 0x1.628d4cp+40f
+      0x55ca'fb2aU, // x = 0x1.95f654p+44f
+      0x588e'f060U, // x = 0x1.1de0cp+50f
+      0x5c07'bcd0U, // x = 0x1.0f79ap+57f
+      0x5ebc'fddeU, // x = 0x1.79fbbcp+62f
+      0x5fa6'eba7U, // x = 0x1.4dd74ep+64f
+      0x61a4'0b40U, // x = 0x1.48168p+68f
+      0x6386'134eU, // x = 0x1.0c269cp+72f
+      0x6589'8498U, // x = 0x1.13093p+76f
+      0x6600'0001U, // x = 0x1.000002p+77f
+      0x664e'46e4U, // x = 0x1.9c8dc8p+77f
+      0x66b0'14aaU, // x = 0x1.602954p+78f
+      0x67a9'242bU, // x = 0x1.524856p+80f
+      0x6a19'76f1U, // x = 0x1.32ede2p+85f
+      0x6c55'da58U, // x = 0x1.abb4bp+89f
+      0x6f79'be45U, // x = 0x1.f37c8ap+95f
+      0x7276'69d4U, // x = 0x1.ecd3a8p+101f
+      0x7758'4625U, // x = 0x1.b08c4ap+111f
+  };
+
+  for (int i = 0; i < N; ++i) {
+    float x = FPBits(INPUTS[i]).get_val();
+    EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sinpi, x,
+                                   LIBC_NAMESPACE::sinpif(x), 0.5);
+    EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sinpi, -x,
+                                   LIBC_NAMESPACE::sinpif(-x), 0.5);
+  }
+}
+
+// For small values, sin(x) is x.
+TEST_F(LlvmLibcSinpifTest, SmallValues) {
+  float x = FPBits(0x1780'0000U).get_val();
+  EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sinpi, x,
+                                 LIBC_NAMESPACE::sinpif(x), 0.5);
+
+  x = FPBits(0x0040'0000U).get_val();
+  EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sinpi, x,
+                                 LIBC_NAMESPACE::sinpif(x), 0.5);
+}
+
+// SDCOMP-26094: check sinpif in the cases for which the range reducer
+// returns values furthest beyond its nominal upper bound of pi/4.
+TEST_F(LlvmLibcSinpifTest, SDCOMP_26094) {
+  for (uint32_t v : SDCOMP26094_VALUES) {
+    float x = FPBits((v)).get_val();
+    EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sinpi, x,
+                                   LIBC_NAMESPACE::sinpif(x), 0.5);
+  }
+}
diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp
index d69309077e099..e6e1b8331140d 100644
--- a/libc/utils/MPFRWrapper/MPFRUtils.cpp
+++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp
@@ -677,6 +677,8 @@ unary_operation(Operation op, InputType input, unsigned int precision,
     return mpfrInput.roundeven();
   case Operation::Sin:
     return mpfrInput.sin();
+  case Operation::Sinpi:
+    return mpfrInput.sinpi();
   case Operation::Sinh:
     return mpfrInput.sinh();
   case Operation::Sqrt:
diff --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h
index 46f3375fd4b7e..11e323bf6881d 100644
--- a/libc/utils/MPFRWrapper/MPFRUtils.h
+++ b/libc/utils/MPFRWrapper/MPFRUtils.h
@@ -51,6 +51,7 @@ enum class Operation : int {
   Round,
   RoundEven,
   Sin,
+  Sinpi,
   Sinh,
   Sqrt,
   Tan,

>From d55a543769fb08edc6894d9d4f8ebc1fa017e32e Mon Sep 17 00:00:00 2001
From: hhuebner <hendrik.huebner18 at gmail.com>
Date: Fri, 21 Jun 2024 13:26:29 +0200
Subject: [PATCH 2/8] Add mpfr sinpi function and update cmake

---
 libc/src/math/CMakeLists.txt                 |  1 +
 libc/src/math/generic/sinpif.cpp             |  3 +++
 libc/test/src/math/exhaustive/CMakeLists.txt | 16 ++++++++++++++++
 libc/test/src/math/sinpif_test.cpp           | 11 -----------
 libc/utils/MPFRWrapper/MPFRUtils.cpp         |  6 ++++++
 5 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index 3dfc4ac94827d..3037c39c85506 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -370,6 +370,7 @@ add_math_entrypoint_object(sincosf)
 
 add_math_entrypoint_object(sin)
 add_math_entrypoint_object(sinf)
+add_math_entrypoint_object(sinpif)
 
 add_math_entrypoint_object(sinh)
 add_math_entrypoint_object(sinhf)
diff --git a/libc/src/math/generic/sinpif.cpp b/libc/src/math/generic/sinpif.cpp
index 50c0494bb2233..f76b34c3d1b4d 100644
--- a/libc/src/math/generic/sinpif.cpp
+++ b/libc/src/math/generic/sinpif.cpp
@@ -101,6 +101,9 @@ LLVM_LIBC_FUNCTION(float, sinpif, (float x)) {
       // do need fma(x, -2^-25, x) to prevent underflow caused by -2^-25*x when
       // |x| < 2^-125. For targets without FMA instructions, we simply use
       // double for intermediate results as it is more efficient than using an
+      double xdpi =  xd * 3.1415926535897;
+      return static_cast<float>(fputil::multiply_add(xd, -0x1.65p-34, xdpi));
+
       // emulated version of FMA.
 #if defined(LIBC_TARGET_CPU_HAS_FMA)
       return fputil::multiply_add(x, -0x1.0p-25f, x);
diff --git a/libc/test/src/math/exhaustive/CMakeLists.txt b/libc/test/src/math/exhaustive/CMakeLists.txt
index fb3596c3378ff..412ca031d0e99 100644
--- a/libc/test/src/math/exhaustive/CMakeLists.txt
+++ b/libc/test/src/math/exhaustive/CMakeLists.txt
@@ -42,6 +42,22 @@ add_fp_unittest(
     -lpthread
 )
 
+add_fp_unittest(
+  sinpif_test
+  NO_RUN_POSTBUILD
+  NEED_MPFR
+  SUITE
+    libc_math_exhaustive_tests
+  SRCS
+    sinpif_test.cpp
+  DEPENDS
+    .exhaustive_test
+    libc.src.math.sinpif
+    libc.src.__support.FPUtil.fp_bits
+  LINK_LIBRARIES
+    -lpthread
+)
+
 add_fp_unittest(
   cosf_test
   NO_RUN_POSTBUILD
diff --git a/libc/test/src/math/sinpif_test.cpp b/libc/test/src/math/sinpif_test.cpp
index 9c2b1210ea3bc..40bcb9541c5f5 100644
--- a/libc/test/src/math/sinpif_test.cpp
+++ b/libc/test/src/math/sinpif_test.cpp
@@ -43,17 +43,6 @@ TEST_F(LlvmLibcSinpifTest, SpecialNumbers) {
   EXPECT_MATH_ERRNO(EDOM);
 }
 
-TEST_F(LlvmLibcSinpifTest, InFloatRange) {
-  constexpr uint32_t COUNT = 100'000;
-  constexpr uint32_t STEP = UINT32_MAX / COUNT;
-  for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
-    float x = FPBits(v).get_val();
-    if (isnan(x) || isinf(x))
-      continue;
-    ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sinpi, x,
-                                   LIBC_NAMESPACE::sinpif(x), 0.5);
-  }
-}
 
 TEST_F(LlvmLibcSinpifTest, SpecificBitPatterns) {
   constexpr int N = 36;
diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp
index e6e1b8331140d..7aaefa671e2ab 100644
--- a/libc/utils/MPFRWrapper/MPFRUtils.cpp
+++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp
@@ -437,6 +437,12 @@ class MPFRNumber {
     return result;
   }
 
+  MPFRNumber sinpi() const {
+    MPFRNumber result(*this);
+    mpfr_sinpi(result.value, value, mpfr_rounding);
+    return result;
+  }
+
   MPFRNumber sinh() const {
     MPFRNumber result(*this);
     mpfr_sinh(result.value, value, mpfr_rounding);

>From 0c4a4f938bf2a33aaf5b381735c39325b1841c76 Mon Sep 17 00:00:00 2001
From: hhuebner <hendrik.huebner18 at gmail.com>
Date: Sat, 29 Jun 2024 00:05:46 +0200
Subject: [PATCH 3/8] Computed new polynom factors for sinpi function

---
 libc/src/math/generic/range_reduction_fma.h   | 10 ++++
 libc/src/math/generic/sincosf_utils.h         | 39 ++++++++++----
 libc/src/math/generic/sinpif.cpp              | 48 +++++++++--------
 libc/test/src/math/exhaustive/sinpif_test.cpp | 26 ++++++++++
 libc/test/src/math/sinpif_test.cpp            | 51 -------------------
 5 files changed, 92 insertions(+), 82 deletions(-)
 create mode 100644 libc/test/src/math/exhaustive/sinpif_test.cpp

diff --git a/libc/src/math/generic/range_reduction_fma.h b/libc/src/math/generic/range_reduction_fma.h
index 82b4ae1c705e1..b65031311f0d9 100644
--- a/libc/src/math/generic/range_reduction_fma.h
+++ b/libc/src/math/generic/range_reduction_fma.h
@@ -38,6 +38,16 @@ LIBC_INLINE int64_t small_range_reduction(double x, double &y) {
   return static_cast<int64_t>(kd);
 }
 
+// Return k and y, where
+//   k = round(x * 32) and y = (x * 32) - k.
+//   => pi * x = (k + y) * pi / 32
+LIBC_INLINE int64_t small_range_reduction_mul_pi(double x, double &y) {
+  double kd = fputil::nearest_integer(x * 32);
+  y = fputil::fma(x, 32.0, -kd);
+  
+  return static_cast<int64_t>(kd);
+}
+
 // Return k and y, where
 //   k = round(x * 32 / pi) and y = (x * 32 / pi) - k.
 // This is used for sinf, cosf, sincosf.
diff --git a/libc/src/math/generic/sincosf_utils.h b/libc/src/math/generic/sincosf_utils.h
index 904df4f4ed08e..3f5d2777f7cfa 100644
--- a/libc/src/math/generic/sincosf_utils.h
+++ b/libc/src/math/generic/sincosf_utils.h
@@ -11,14 +11,18 @@
 
 #include "src/__support/FPUtil/FPBits.h"
 #include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/macros/attributes.h"
 #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
+#include <cstdint>
 
 #if defined(LIBC_TARGET_CPU_HAS_FMA)
 #include "range_reduction_fma.h"
 // using namespace LIBC_NAMESPACE::fma;
 using LIBC_NAMESPACE::fma::FAST_PASS_BOUND;
 using LIBC_NAMESPACE::fma::large_range_reduction;
+using LIBC_NAMESPACE::fma::small_range_reduction_mul_pi;
 using LIBC_NAMESPACE::fma::small_range_reduction;
+
 #else
 #include "range_reduction.h"
 // using namespace LIBC_NAMESPACE::generic;
@@ -58,18 +62,9 @@ const double SIN_K_PI_OVER_32[64] = {
     -0x1.917a6bc29b42cp-4,
 };
 
-LIBC_INLINE void sincosf_eval(double xd, uint32_t x_abs, double &sin_k,
-                              double &cos_k, double &sin_y, double &cosm1_y) {
-  int64_t k;
-  double y;
-
-  if (LIBC_LIKELY(x_abs < FAST_PASS_BOUND)) {
-    k = small_range_reduction(xd, y);
-  } else {
-    fputil::FPBits<float> x_bits(x_abs);
-    k = large_range_reduction(xd, x_bits.get_exponent(), y);
-  }
 
+static LIBC_INLINE void sincosf_poly_eval(int64_t k, double y, double &sin_k,
+                              double &cos_k, double &sin_y, double &cosm1_y) {
   // After range reduction, k = round(x * 32 / pi) and y = (x * 32 / pi) - k.
   // So k is an integer and -0.5 <= y <= 0.5.
   // Then sin(x) = sin((k + y)*pi/32)
@@ -95,6 +90,28 @@ LIBC_INLINE void sincosf_eval(double xd, uint32_t x_abs, double &sin_k,
                                    0x1.03c1f070c2e27p-18, -0x1.55cc84bd942p-30);
 }
 
+LIBC_INLINE void sincosf_eval(double xd, uint32_t x_abs, double &sin_k,
+                              double &cos_k, double &sin_y, double &cosm1_y) {
+  int64_t k;
+  double y;
+
+  if (LIBC_LIKELY(x_abs < FAST_PASS_BOUND)) {
+    k = small_range_reduction(xd, y);
+  } else {
+    fputil::FPBits<float> x_bits(x_abs);
+    k = large_range_reduction(xd, x_bits.get_exponent(), y);
+  }
+
+  sincosf_poly_eval(k, y, sin_k, cos_k, sin_y, cosm1_y);
+}
+
+LIBC_INLINE void sincospif_eval(double xd, double &sin_k, double &cos_k,
+                                double &sin_y, double &cosm1_y) {
+  double y;
+  int64_t k = small_range_reduction_mul_pi(xd, y);
+  sincosf_poly_eval(k, y, sin_k, cos_k, sin_y, cosm1_y);
+}
+
 } // namespace LIBC_NAMESPACE
 
 #endif // LLVM_LIBC_SRC_MATH_GENERIC_SINCOSF_UTILS_H
diff --git a/libc/src/math/generic/sinpif.cpp b/libc/src/math/generic/sinpif.cpp
index f76b34c3d1b4d..dd0f3afb050b0 100644
--- a/libc/src/math/generic/sinpif.cpp
+++ b/libc/src/math/generic/sinpif.cpp
@@ -18,7 +18,9 @@
 #include "src/__support/macros/optimization.h"            // LIBC_UNLIKELY
 #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
 
+#include <cstdio>
 #include <errno.h>
+#include <iostream>
 
 #if defined(LIBC_TARGET_CPU_HAS_FMA)
 #include "range_reduction_fma.h"
@@ -28,6 +30,23 @@
 
 namespace LIBC_NAMESPACE {
 
+float tof(int i) {
+    return *(float * )&i;
+}
+
+int toi(float f) {
+    return *(int *)&f;
+}
+
+
+double dtof(long i) {
+    return *(double * )&i;
+}
+
+long dtoi(double f) {
+    return *(long *)&f;
+}
+
 LLVM_LIBC_FUNCTION(float, sinpif, (float x)) {
   using FPBits = typename fputil::FPBits<float>;
   FPBits xbits(x);
@@ -76,8 +95,7 @@ LLVM_LIBC_FUNCTION(float, sinpif, (float x)) {
   // |x| <= 1/16
   if (LIBC_UNLIKELY(x_abs <= 0x3d80'0000U)) {
 
-    // |x| < 0x1.d12ed2p-12f
-    if (LIBC_UNLIKELY(x_abs < 0x39e8'9769U)) {
+    if (LIBC_UNLIKELY(x_abs < 0x34b0'0000U)) {
       if (LIBC_UNLIKELY(x_abs == 0U)) {
         // For signed zeros.
         return x;
@@ -102,17 +120,16 @@ LLVM_LIBC_FUNCTION(float, sinpif, (float x)) {
       // |x| < 2^-125. For targets without FMA instructions, we simply use
       // double for intermediate results as it is more efficient than using an
       double xdpi =  xd * 3.1415926535897;
-      return static_cast<float>(fputil::multiply_add(xd, -0x1.65p-34, xdpi));
+      return static_cast<float>(xdpi);
 
       // emulated version of FMA.
 #if defined(LIBC_TARGET_CPU_HAS_FMA)
-      return fputil::multiply_add(x, -0x1.0p-25f, x);
 #else
       return static_cast<float>(fputil::multiply_add(xd, -0x1.0p-25, xd));
 #endif // LIBC_TARGET_CPU_HAS_FMAx
     }
 
-    // |x| < pi/16.
+    // |x| < 1/16.
     double xsq = xd * xd;
 
     // Degree-9 polynomial approximation:
@@ -121,22 +138,13 @@ LLVM_LIBC_FUNCTION(float, sinpif, (float x)) {
     //          = x * P(x^2)
     // generated by Sollya with the following commands:
     // > display = hexadecimal;
-    // > Q = fpminimax(sin(x)/x, [|0, 2, 4, 6, 8|], [|1, D...|], [0, pi/16]);
-    double result =
-        fputil::polyeval(xsq, 1.0, -0x1.55555555554c6p-3, 0x1.1111111085e65p-7,
-                         -0x1.a019f70fb4d4fp-13, 0x1.718d179815e74p-19);
+    // > Q = fpminimax(sin(pi * x)/x, [|0, 2, 4, 6, 8|], [|D...|], [0, 1/16]);
+    double result = fputil::polyeval(
+        xsq, 0x1.921fb54442d18p1, -0x1.4abbce625bbf2p2, 0x1.466bc675e116ap1,
+        -0x1.32d2c0b62d41cp-1, 0x1.501ec4497cb7dp-4);
     return static_cast<float>(xd * result);
   }
 
-  if (LIBC_UNLIKELY(x_abs == 0x4619'9998U)) { // x = 0x1.33333p13
-    float r = -0x1.63f4bap-2f;
-    int rounding = fputil::quick_get_round();
-    if ((rounding == FE_DOWNWARD && xbits.is_pos()) ||
-        (rounding == FE_UPWARD && xbits.is_neg()))
-      r = -0x1.63f4bcp-2f;
-
-    return xbits.is_neg() ? -r : r;
-  }
 
   if (LIBC_UNLIKELY(x_abs >= 0x7f80'0000U)) {
     if (x_abs == 0x7f80'0000U) {
@@ -147,13 +155,13 @@ LLVM_LIBC_FUNCTION(float, sinpif, (float x)) {
   }
 
   // Combine the results with the sine of sum formula:
-  //   sin(x) = sin((k + y)*pi/32)
+  //   sin(x * pi) = sin((k + y)*pi/32)
   //          = sin(y*pi/32) * cos(k*pi/32) + cos(y*pi/32) * sin(k*pi/32)
   //          = sin_y * cos_k + (1 + cosm1_y) * sin_k
   //          = sin_y * cos_k + (cosm1_y * sin_k + sin_k)
   double sin_k, cos_k, sin_y, cosm1_y;
 
-  sincosf_eval(xd, x_abs, sin_k, cos_k, sin_y, cosm1_y);
+  sincospif_eval(xd, sin_k, cos_k, sin_y, cosm1_y);
 
   return static_cast<float>(fputil::multiply_add(
       sin_y, cos_k, fputil::multiply_add(cosm1_y, sin_k, sin_k)));
diff --git a/libc/test/src/math/exhaustive/sinpif_test.cpp b/libc/test/src/math/exhaustive/sinpif_test.cpp
new file mode 100644
index 0000000000000..96870ed200d96
--- /dev/null
+++ b/libc/test/src/math/exhaustive/sinpif_test.cpp
@@ -0,0 +1,26 @@
+//===-- Exhaustive test for sinpif ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "exhaustive_test.h"
+#include "src/math/sinpif.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+#include "mpfr.h"
+#include <sys/types.h>
+
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+
+using LlvmLibcSinpifExhaustiveTest =
+    LlvmLibcUnaryOpExhaustiveMathTest<float, mpfr::Operation::Sinpi,
+                                      LIBC_NAMESPACE::sinpif>;
+
+static constexpr uint32_t POS_START = 0x0000'0000U;
+static constexpr uint32_t POS_STOP = 0x7f80'0000U;
+
+TEST_F(LlvmLibcSinpifExhaustiveTest, PostiveRange) {
+  test_full_range_all_roundings(POS_START, POS_STOP);
+}
diff --git a/libc/test/src/math/sinpif_test.cpp b/libc/test/src/math/sinpif_test.cpp
index 40bcb9541c5f5..05f886ba8bfa8 100644
--- a/libc/test/src/math/sinpif_test.cpp
+++ b/libc/test/src/math/sinpif_test.cpp
@@ -43,57 +43,6 @@ TEST_F(LlvmLibcSinpifTest, SpecialNumbers) {
   EXPECT_MATH_ERRNO(EDOM);
 }
 
-
-TEST_F(LlvmLibcSinpifTest, SpecificBitPatterns) {
-  constexpr int N = 36;
-  constexpr uint32_t INPUTS[N] = {
-      0x3f06'0a92U, // x = pi/6
-      0x3f3a'dc51U, // x = 0x1.75b8a2p-1f
-      0x3f49'0fdbU, // x = pi/4
-      0x3f86'0a92U, // x = pi/3
-      0x3fa7'832aU, // x = 0x1.4f0654p+0f
-      0x3fc9'0fdbU, // x = pi/2
-      0x4017'1973U, // x = 0x1.2e32e6p+1f
-      0x4049'0fdbU, // x = pi
-      0x4096'cbe4U, // x = 0x1.2d97c8p+2f
-      0x40c9'0fdbU, // x = 2*pi
-      0x433b'7490U, // x = 0x1.76e92p+7f
-      0x437c'e5f1U, // x = 0x1.f9cbe2p+7f
-      0x4619'9998U, // x = 0x1.33333p+13f
-      0x474d'246fU, // x = 0x1.9a48dep+15f
-      0x4afd'ece4U, // x = 0x1.fbd9c8p+22f
-      0x4c23'32e9U, // x = 0x1.4665d2p+25f
-      0x50a3'e87fU, // x = 0x1.47d0fep+34f
-      0x5239'47f6U, // x = 0x1.728fecp+37f
-      0x53b1'46a6U, // x = 0x1.628d4cp+40f
-      0x55ca'fb2aU, // x = 0x1.95f654p+44f
-      0x588e'f060U, // x = 0x1.1de0cp+50f
-      0x5c07'bcd0U, // x = 0x1.0f79ap+57f
-      0x5ebc'fddeU, // x = 0x1.79fbbcp+62f
-      0x5fa6'eba7U, // x = 0x1.4dd74ep+64f
-      0x61a4'0b40U, // x = 0x1.48168p+68f
-      0x6386'134eU, // x = 0x1.0c269cp+72f
-      0x6589'8498U, // x = 0x1.13093p+76f
-      0x6600'0001U, // x = 0x1.000002p+77f
-      0x664e'46e4U, // x = 0x1.9c8dc8p+77f
-      0x66b0'14aaU, // x = 0x1.602954p+78f
-      0x67a9'242bU, // x = 0x1.524856p+80f
-      0x6a19'76f1U, // x = 0x1.32ede2p+85f
-      0x6c55'da58U, // x = 0x1.abb4bp+89f
-      0x6f79'be45U, // x = 0x1.f37c8ap+95f
-      0x7276'69d4U, // x = 0x1.ecd3a8p+101f
-      0x7758'4625U, // x = 0x1.b08c4ap+111f
-  };
-
-  for (int i = 0; i < N; ++i) {
-    float x = FPBits(INPUTS[i]).get_val();
-    EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sinpi, x,
-                                   LIBC_NAMESPACE::sinpif(x), 0.5);
-    EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sinpi, -x,
-                                   LIBC_NAMESPACE::sinpif(-x), 0.5);
-  }
-}
-
 // For small values, sin(x) is x.
 TEST_F(LlvmLibcSinpifTest, SmallValues) {
   float x = FPBits(0x1780'0000U).get_val();

>From 65c815cc34eefe08749e3f0469b01506e733feab Mon Sep 17 00:00:00 2001
From: hhuebner <hendrik.huebner18 at gmail.com>
Date: Sat, 29 Jun 2024 00:20:07 +0200
Subject: [PATCH 4/8] Update factor for very small x

---
 libc/src/math/generic/sinpif.cpp | 30 +++---------------------------
 1 file changed, 3 insertions(+), 27 deletions(-)

diff --git a/libc/src/math/generic/sinpif.cpp b/libc/src/math/generic/sinpif.cpp
index dd0f3afb050b0..d42952ac08f43 100644
--- a/libc/src/math/generic/sinpif.cpp
+++ b/libc/src/math/generic/sinpif.cpp
@@ -95,38 +95,14 @@ LLVM_LIBC_FUNCTION(float, sinpif, (float x)) {
   // |x| <= 1/16
   if (LIBC_UNLIKELY(x_abs <= 0x3d80'0000U)) {
 
-    if (LIBC_UNLIKELY(x_abs < 0x34b0'0000U)) {
+    if (LIBC_UNLIKELY(x_abs < 0x33FC'1538U)) {
       if (LIBC_UNLIKELY(x_abs == 0U)) {
         // For signed zeros.
         return x;
       }
-      // When |x| < 2^-12, the relative error of the approximation sin(x) ~ x
-      // is:
-      //   |sin(x) - x| / |sin(x)| < |x^3| / (6|x|)
-      //                           = x^2 / 6
-      //                           < 2^-25
-      //                           < epsilon(1)/2.
-      // So the correctly rounded values of sin(x) are:
-      //   = x - sign(x)*eps(x) if rounding mode = FE_TOWARDZERO,
-      //                        or (rounding mode = FE_UPWARD and x is
-      //                        negative),
-      //   = x otherwise.
-      // To simplify the rounding decision and make it more efficient, we use
-      //   fma(x, -2^-25, x) instead.
-      // An exhaustive test shows that this formula work correctly for all
-      // rounding modes up to |x| < 0x1.c555dep-11f.
-      // Note: to use the formula x - 2^-25*x to decide the correct rounding, we
-      // do need fma(x, -2^-25, x) to prevent underflow caused by -2^-25*x when
-      // |x| < 2^-125. For targets without FMA instructions, we simply use
-      // double for intermediate results as it is more efficient than using an
-      double xdpi =  xd * 3.1415926535897;
-      return static_cast<float>(xdpi);
 
-      // emulated version of FMA.
-#if defined(LIBC_TARGET_CPU_HAS_FMA)
-#else
-      return static_cast<float>(fputil::multiply_add(xd, -0x1.0p-25, xd));
-#endif // LIBC_TARGET_CPU_HAS_FMAx
+      double xdpi =  xd * 0x1.921fb54442d18p1; // pi
+      return static_cast<float>(xdpi);
     }
 
     // |x| < 1/16.

>From 349be9c2f99d138891851780b10109e7d49354c2 Mon Sep 17 00:00:00 2001
From: hhuebner <hendrik.huebner18 at gmail.com>
Date: Sat, 29 Jun 2024 00:31:47 +0200
Subject: [PATCH 5/8] remove debug functions

---
 libc/src/math/generic/sinpif.cpp | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/libc/src/math/generic/sinpif.cpp b/libc/src/math/generic/sinpif.cpp
index d42952ac08f43..5ee2de3d5c640 100644
--- a/libc/src/math/generic/sinpif.cpp
+++ b/libc/src/math/generic/sinpif.cpp
@@ -30,23 +30,6 @@
 
 namespace LIBC_NAMESPACE {
 
-float tof(int i) {
-    return *(float * )&i;
-}
-
-int toi(float f) {
-    return *(int *)&f;
-}
-
-
-double dtof(long i) {
-    return *(double * )&i;
-}
-
-long dtoi(double f) {
-    return *(long *)&f;
-}
-
 LLVM_LIBC_FUNCTION(float, sinpif, (float x)) {
   using FPBits = typename fputil::FPBits<float>;
   FPBits xbits(x);

>From 67ec7772a215a8a81dcd920a75f11e43334ddb97 Mon Sep 17 00:00:00 2001
From: hhuebner <hendrik.huebner18 at gmail.com>
Date: Sat, 29 Jun 2024 00:48:28 +0200
Subject: [PATCH 6/8] Added smoke and unit test

---
 libc/src/math/generic/sinpif.cpp              |  8 +--
 libc/test/src/math/exhaustive/sinpif_test.cpp |  9 ++++
 libc/test/src/math/sinpif_test.cpp            | 54 ++++++++++++++++++-
 libc/test/src/math/smoke/CMakeLists.txt       | 13 +++++
 libc/test/src/math/smoke/sinpif_test.cpp      | 38 +++++++++++++
 5 files changed, 117 insertions(+), 5 deletions(-)
 create mode 100644 libc/test/src/math/smoke/sinpif_test.cpp

diff --git a/libc/src/math/generic/sinpif.cpp b/libc/src/math/generic/sinpif.cpp
index 5ee2de3d5c640..2cedb6f29021b 100644
--- a/libc/src/math/generic/sinpif.cpp
+++ b/libc/src/math/generic/sinpif.cpp
@@ -78,13 +78,15 @@ LLVM_LIBC_FUNCTION(float, sinpif, (float x)) {
   // |x| <= 1/16
   if (LIBC_UNLIKELY(x_abs <= 0x3d80'0000U)) {
 
-    if (LIBC_UNLIKELY(x_abs < 0x33FC'1538U)) {
+    if (LIBC_UNLIKELY(x_abs < 0x33CD'01D7U)) {
       if (LIBC_UNLIKELY(x_abs == 0U)) {
         // For signed zeros.
         return x;
       }
-
-      double xdpi =  xd * 0x1.921fb54442d18p1; // pi
+      
+      // For very small values we can approximate sinpi(x) with x * pi
+      // An exhaustive test shows that this is accurate for |x| < 9.546391 × 10-8
+      double xdpi =  xd * 0x1.921fb54442d18p1;
       return static_cast<float>(xdpi);
     }
 
diff --git a/libc/test/src/math/exhaustive/sinpif_test.cpp b/libc/test/src/math/exhaustive/sinpif_test.cpp
index 96870ed200d96..3c74c2872bef0 100644
--- a/libc/test/src/math/exhaustive/sinpif_test.cpp
+++ b/libc/test/src/math/exhaustive/sinpif_test.cpp
@@ -21,6 +21,15 @@ using LlvmLibcSinpifExhaustiveTest =
 static constexpr uint32_t POS_START = 0x0000'0000U;
 static constexpr uint32_t POS_STOP = 0x7f80'0000U;
 
+// Range: [0, Inf]
 TEST_F(LlvmLibcSinpifExhaustiveTest, PostiveRange) {
   test_full_range_all_roundings(POS_START, POS_STOP);
 }
+
+// Range: [-Inf, 0]
+static constexpr uint32_t NEG_START = 0xb000'0000U;
+static constexpr uint32_t NEG_STOP = 0xff80'0000U;
+
+TEST_F(LlvmLibcSinpifExhaustiveTest, NegativeRange) {
+  test_full_range_all_roundings(NEG_START, NEG_STOP);
+}
diff --git a/libc/test/src/math/sinpif_test.cpp b/libc/test/src/math/sinpif_test.cpp
index 05f886ba8bfa8..15f25b86574ac 100644
--- a/libc/test/src/math/sinpif_test.cpp
+++ b/libc/test/src/math/sinpif_test.cpp
@@ -43,7 +43,57 @@ TEST_F(LlvmLibcSinpifTest, SpecialNumbers) {
   EXPECT_MATH_ERRNO(EDOM);
 }
 
-// For small values, sin(x) is x.
+TEST_F(LlvmLibcSinpifTest, SpecificBitPatterns) {
+  constexpr int N = 36;
+  constexpr uint32_t INPUTS[N] = {
+      0x3f06'0a92U, // x = pi/6
+      0x3f3a'dc51U, // x = 0x1.75b8a2p-1f
+      0x3f49'0fdbU, // x = pi/4
+      0x3f86'0a92U, // x = pi/3
+      0x3fa7'832aU, // x = 0x1.4f0654p+0f
+      0x3fc9'0fdbU, // x = pi/2
+      0x4017'1973U, // x = 0x1.2e32e6p+1f
+      0x4049'0fdbU, // x = pi
+      0x4096'cbe4U, // x = 0x1.2d97c8p+2f
+      0x40c9'0fdbU, // x = 2*pi
+      0x433b'7490U, // x = 0x1.76e92p+7f
+      0x437c'e5f1U, // x = 0x1.f9cbe2p+7f
+      0x4619'9998U, // x = 0x1.33333p+13f
+      0x474d'246fU, // x = 0x1.9a48dep+15f
+      0x4afd'ece4U, // x = 0x1.fbd9c8p+22f
+      0x4c23'32e9U, // x = 0x1.4665d2p+25f
+      0x50a3'e87fU, // x = 0x1.47d0fep+34f
+      0x5239'47f6U, // x = 0x1.728fecp+37f
+      0x53b1'46a6U, // x = 0x1.628d4cp+40f
+      0x55ca'fb2aU, // x = 0x1.95f654p+44f
+      0x588e'f060U, // x = 0x1.1de0cp+50f
+      0x5c07'bcd0U, // x = 0x1.0f79ap+57f
+      0x5ebc'fddeU, // x = 0x1.79fbbcp+62f
+      0x5fa6'eba7U, // x = 0x1.4dd74ep+64f
+      0x61a4'0b40U, // x = 0x1.48168p+68f
+      0x6386'134eU, // x = 0x1.0c269cp+72f
+      0x6589'8498U, // x = 0x1.13093p+76f
+      0x6600'0001U, // x = 0x1.000002p+77f
+      0x664e'46e4U, // x = 0x1.9c8dc8p+77f
+      0x66b0'14aaU, // x = 0x1.602954p+78f
+      0x67a9'242bU, // x = 0x1.524856p+80f
+      0x6a19'76f1U, // x = 0x1.32ede2p+85f
+      0x6c55'da58U, // x = 0x1.abb4bp+89f
+      0x6f79'be45U, // x = 0x1.f37c8ap+95f
+      0x7276'69d4U, // x = 0x1.ecd3a8p+101f
+      0x7758'4625U, // x = 0x1.b08c4ap+111f
+  };
+
+  for (int i = 0; i < N; ++i) {
+    float x = FPBits(INPUTS[i]).get_val();
+    EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sinpi, x,
+                                   LIBC_NAMESPACE::sinpif(x), 0.5);
+    EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sinpi, -x,
+                                   LIBC_NAMESPACE::sinpif(-x), 0.5);
+  }
+}
+
+// For small values, sinpi(x) is pi * x.
 TEST_F(LlvmLibcSinpifTest, SmallValues) {
   float x = FPBits(0x1780'0000U).get_val();
   EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sinpi, x,
@@ -54,7 +104,7 @@ TEST_F(LlvmLibcSinpifTest, SmallValues) {
                                  LIBC_NAMESPACE::sinpif(x), 0.5);
 }
 
-// SDCOMP-26094: check sinpif in the cases for which the range reducer
+// SDCOMP-26094: check sinfpi in the cases for which the range reducer
 // returns values furthest beyond its nominal upper bound of pi/4.
 TEST_F(LlvmLibcSinpifTest, SDCOMP_26094) {
   for (uint32_t v : SDCOMP26094_VALUES) {
diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt
index ee6f1595fe0fd..c58167a150adc 100644
--- a/libc/test/src/math/smoke/CMakeLists.txt
+++ b/libc/test/src/math/smoke/CMakeLists.txt
@@ -27,6 +27,19 @@ add_fp_unittest(
     libc.src.__support.FPUtil.fp_bits
 )
 
+add_fp_unittest(
+  sinpif_test
+  SUITE
+    libc-math-smoke-tests
+  SRCS
+    sinpif_test.cpp
+  DEPENDS
+    libc.src.errno.errno
+    libc.src.math.sinpif
+    libc.src.__support.CPP.array
+    libc.src.__support.FPUtil.fp_bits
+)
+
 add_fp_unittest(
   sincosf_test
   SUITE
diff --git a/libc/test/src/math/smoke/sinpif_test.cpp b/libc/test/src/math/smoke/sinpif_test.cpp
new file mode 100644
index 0000000000000..9b3b6a6948345
--- /dev/null
+++ b/libc/test/src/math/smoke/sinpif_test.cpp
@@ -0,0 +1,38 @@
+//===-- Unittests for sinpif ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/math_macros.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/errno/libc_errno.h"
+#include "src/math/sinpif.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
+
+#include <errno.h>
+#include <stdint.h>
+
+using LlvmLibcSinpifTest = LIBC_NAMESPACE::testing::FPTest<float>;
+
+TEST_F(LlvmLibcSinpifTest, SpecialNumbers) {
+  LIBC_NAMESPACE::libc_errno = 0;
+
+  EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::sinpif(aNaN));
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinpif(0.0f));
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ(-0.0f, LIBC_NAMESPACE::sinpif(-0.0f));
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::sinpif(inf));
+  EXPECT_MATH_ERRNO(EDOM);
+
+  EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::sinpif(neg_inf));
+  EXPECT_MATH_ERRNO(EDOM);
+}

>From 16b93842602c88f5632dec0397bd28599f7da84d Mon Sep 17 00:00:00 2001
From: hhuebner <hendrik.huebner18 at gmail.com>
Date: Sat, 29 Jun 2024 00:56:51 +0200
Subject: [PATCH 7/8] Removed unnecessary headers

---
 libc/src/math/generic/range_reduction_fma.h | 2 +-
 libc/src/math/generic/sincosf_utils.h       | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/libc/src/math/generic/range_reduction_fma.h b/libc/src/math/generic/range_reduction_fma.h
index b65031311f0d9..a270fdc0033f2 100644
--- a/libc/src/math/generic/range_reduction_fma.h
+++ b/libc/src/math/generic/range_reduction_fma.h
@@ -43,7 +43,7 @@ LIBC_INLINE int64_t small_range_reduction(double x, double &y) {
 //   => pi * x = (k + y) * pi / 32
 LIBC_INLINE int64_t small_range_reduction_mul_pi(double x, double &y) {
   double kd = fputil::nearest_integer(x * 32);
-  y = fputil::fma(x, 32.0, -kd);
+  y = fputil::fma<double>(x, 32.0, -kd);
   
   return static_cast<int64_t>(kd);
 }
diff --git a/libc/src/math/generic/sincosf_utils.h b/libc/src/math/generic/sincosf_utils.h
index 3f5d2777f7cfa..5d93b287057a1 100644
--- a/libc/src/math/generic/sincosf_utils.h
+++ b/libc/src/math/generic/sincosf_utils.h
@@ -11,9 +11,7 @@
 
 #include "src/__support/FPUtil/FPBits.h"
 #include "src/__support/FPUtil/PolyEval.h"
-#include "src/__support/macros/attributes.h"
 #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
-#include <cstdint>
 
 #if defined(LIBC_TARGET_CPU_HAS_FMA)
 #include "range_reduction_fma.h"

>From b0fa41c6b116ca370be2f9578243c199c681e5ec Mon Sep 17 00:00:00 2001
From: hhuebner <hendrik.huebner18 at gmail.com>
Date: Sat, 29 Jun 2024 09:45:43 +0200
Subject: [PATCH 8/8] Update explaination comments for sinpif

---
 libc/src/math/generic/sinpif.cpp | 29 ++++++-----------------------
 1 file changed, 6 insertions(+), 23 deletions(-)

diff --git a/libc/src/math/generic/sinpif.cpp b/libc/src/math/generic/sinpif.cpp
index 2cedb6f29021b..5008e1cdab19d 100644
--- a/libc/src/math/generic/sinpif.cpp
+++ b/libc/src/math/generic/sinpif.cpp
@@ -1,4 +1,4 @@
-//===-- Single-precision sinpi function -------------------------------------===//
+//===-- Single-precision sinpif function -------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -41,34 +41,17 @@ LLVM_LIBC_FUNCTION(float, sinpif, (float x)) {
   // Range reduction:
   // For |x| > pi/32, we perform range reduction as follows:
   // Find k and y such that:
-  //   x = (k + y) * pi/32
+  //   x = (k + y) * 1/32
   //   k is an integer
   //   |y| < 0.5
   // For small range (|x| < 2^45 when FMA instructions are available, 2^22
   // otherwise), this is done by performing:
-  //   k = round(x * 32/pi)
-  //   y = x * 32/pi - k
-  // For large range, we will omit all the higher parts of 32/pi such that the
-  // least significant bits of their full products with x are larger than 63,
-  // since sin((k + y + 64*i) * pi/32) = sin(x + i * 2pi) = sin(x).
-  //
-  // When FMA instructions are not available, we store the digits of 32/pi in
-  // chunks of 28-bit precision.  This will make sure that the products:
-  //   x * THIRTYTWO_OVER_PI_28[i] are all exact.
-  // When FMA instructions are available, we simply store the digits of 32/pi in
-  // chunks of doubles (53-bit of precision).
-  // So when multiplying by the largest values of single precision, the
-  // resulting output should be correct up to 2^(-208 + 128) ~ 2^-80.  By the
-  // worst-case analysis of range reduction, |y| >= 2^-38, so this should give
-  // us more than 40 bits of accuracy. For the worst-case estimation of range
-  // reduction, see for instances:
-  //   Elementary Functions by J-M. Muller, Chapter 11,
-  //   Handbook of Floating-Point Arithmetic by J-M. Muller et. al.,
-  //   Chapter 10.2.
+  //   k = round(x * 32)
+  //   y = x * 32 - k
   //
   // Once k and y are computed, we then deduce the answer by the sine of sum
   // formula:
-  //   sin(x) = sin((k + y)*pi/32)
+  //   sin(x * pi) = sin((k + y)*pi/32)
   //          = sin(y*pi/32) * cos(k*pi/32) + cos(y*pi/32) * sin(k*pi/32)
   // The values of sin(k*pi/32) and cos(k*pi/32) for k = 0..31 are precomputed
   // and stored using a vector of 32 doubles. Sin(y*pi/32) and cos(y*pi/32) are
@@ -94,7 +77,7 @@ LLVM_LIBC_FUNCTION(float, sinpif, (float x)) {
     double xsq = xd * xd;
 
     // Degree-9 polynomial approximation:
-    //   sin(x) ~ x + a_3 x^3 + a_5 x^5 + a_7 x^7 + a_9 x^9
+    //   sinpi(x) ~ x + a_3 x^3 + a_5 x^5 + a_7 x^7 + a_9 x^9
     //          = x (1 + a_3 x^2 + ... + a_9 x^8)
     //          = x * P(x^2)
     // generated by Sollya with the following commands: