[libc-commits] [libc] [libc] Add sinpif16 function (PR #110994)

Thu Oct 3 06:46:52 PDT 2024

https://github.com/wldfngrs created https://github.com/llvm/llvm-project/pull/110994

Half-precision floating point (16-bit) implementation of the trigonometric function Sin for inputs scaled by pi

>From 1dd121c8f55d32c7152389e20d1cb2005d983630 Mon Sep 17 00:00:00 2001
From: wldfngrs <wldfngrs at gmail.com>
Date: Mon, 23 Sep 2024 23:29:33 +0100
Subject: [PATCH 1/3] add sinpif16 function implementation and unittests

---
 libc/config/linux/aarch64/entrypoints.txt  |   1 +
 libc/config/linux/arm/entrypoints.txt      |   1 +
 libc/config/linux/riscv/entrypoints.txt    |   1 +
 libc/config/linux/x86_64/entrypoints.txt   |   1 +
 libc/config/windows/entrypoints.txt        |   3 +-
 libc/newhdrgen/yaml/math.yaml              |   6 +
 libc/src/math/CMakeLists.txt               |   1 +
 libc/src/math/generic/CMakeLists.txt       |  16 +++
 libc/src/math/generic/sinpif16.cpp         | 151 +++++++++++++++++++++
 libc/src/math/sinpif16.h                   |  23 ++++
 libc/test/src/math/smoke/CMakeLists.txt    |  13 ++
 libc/test/src/math/smoke/sinpif16_test.cpp |  44 ++++++
 libc/utils/MPFRWrapper/MPFRUtils.h         |   1 +
 13 files changed, 261 insertions(+), 1 deletion(-)
 create mode 100644 libc/src/math/generic/sinpif16.cpp
 create mode 100644 libc/src/math/sinpif16.h
 create mode 100644 libc/test/src/math/smoke/sinpif16_test.cpp

diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 64fbe1a250c0ba..47f46c4b9160e3 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -564,6 +564,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.sinf
     libc.src.math.sinhf
     libc.src.math.sinpif
+    libc.src.math.sinpif16
     libc.src.math.sqrt
     libc.src.math.sqrtf
     libc.src.math.sqrtl
diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt
index 1be9a872dd2f7f..dbd21f6b5d4388 100644
--- a/libc/config/linux/arm/entrypoints.txt
+++ b/libc/config/linux/arm/entrypoints.txt
@@ -390,6 +390,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.sincosf
     libc.src.math.sinf
     libc.src.math.sinhf
+    libc.src.math.sinpif16
     libc.src.math.sqrt
     libc.src.math.sqrtf
     libc.src.math.sqrtl
diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
index ff3d821c664c5b..c45ba2fe5b9aa4 100644
--- a/libc/config/linux/riscv/entrypoints.txt
+++ b/libc/config/linux/riscv/entrypoints.txt
@@ -567,6 +567,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.sinf
     libc.src.math.sinhf
     libc.src.math.sinpif
+    libc.src.math.sinpif16
     libc.src.math.sqrt
     libc.src.math.sqrtf
     libc.src.math.sqrtl
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index dd658af3bfb674..12b86f541bc91e 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -567,6 +567,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.sinf
     libc.src.math.sinhf
     libc.src.math.sinpif
+    libc.src.math.sinpif16
     libc.src.math.sqrt
     libc.src.math.sqrtf
     libc.src.math.sqrtl
diff --git a/libc/config/windows/entrypoints.txt b/libc/config/windows/entrypoints.txt
index 8f0b50bcc83ea2..5b0d84669e9244 100644
--- a/libc/config/windows/entrypoints.txt
+++ b/libc/config/windows/entrypoints.txt
@@ -275,7 +275,8 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.sincosf
     libc.src.math.sincosf
     libc.src.math.sinf
-    libc.src.math.sinhf
+    libc.src.math.sinhfi
+    libc.src.math.sinpif16
     libc.src.math.sqrt
     libc.src.math.sqrtf
     libc.src.math.sqrtl
diff --git a/libc/newhdrgen/yaml/math.yaml b/libc/newhdrgen/yaml/math.yaml
index 04b6a073deace0..fb79f15a314604 100644
--- a/libc/newhdrgen/yaml/math.yaml
+++ b/libc/newhdrgen/yaml/math.yaml
@@ -2290,6 +2290,12 @@ functions:
     return_type: float
     arguments:
       - type: float
+  - name: sinpif16
+    standards:
+      - stdc
+    return_type: _Float16
+    arguments:
+      - type: _Float16
   - name: sqrt
     standards:
       - stdc
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index 3cba34fc249322..dbfc0ce6fcaeb0 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -462,6 +462,7 @@ add_math_entrypoint_object(sincosf)
 add_math_entrypoint_object(sin)
 add_math_entrypoint_object(sinf)
 add_math_entrypoint_object(sinpif)
+add_math_entrypoint_object(sinpif16)
 
 add_math_entrypoint_object(sinh)
 add_math_entrypoint_object(sinhf)
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 5a1ee3b8b83c77..6a2e45dbd4023d 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -473,6 +473,22 @@ add_entrypoint_object(
     -O3
 )
 
+add_entrypoint_object(
+  sinpif16
+  SRCS
+    sinpif16.cpp
+  HDRS
+    ../sinpif16.h
+  DEPENDS
+    libc.src.__support.macros.properties.types
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.common
+    libc.src.__support.macros.config
+  COMPILE_OPTIONS
+    -O3
+)
+
 add_entrypoint_object(
   tan
   SRCS
diff --git a/libc/src/math/generic/sinpif16.cpp b/libc/src/math/generic/sinpif16.cpp
new file mode 100644
index 00000000000000..bb028d5b88fc08
--- /dev/null
+++ b/libc/src/math/generic/sinpif16.cpp
@@ -0,0 +1,151 @@
+//===-- Half-precision sinpif function ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#define M_PI 3.1415925f
+
+#include "src/math/sinpif16.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+
+// TODO: Should probably create a new file; sincospif16_utils.h
+// To store the following helper functions and constants. 
+// I'd defer to @lntue for suggestions regarding that
+
+// HELPER_START
+namespace LIBC_NAMESPACE_DECL {
+
+constexpr float PI_OVER_32 = M_PI / 32;
+
+// In Sollya generate 10 coeffecients for a degree-9 chebyshev polynomial 
+// approximating the sine function in [-pi / 32, pi / 32] with the following 
+// commands:
+// > prec=23;
+// > TL = chebyshevform(sin(x), 9, [-pi / 32, pi / 32]);
+// > TL[0];
+const float SIN_COEFF[10] = {
+  0x1.801p-27, 0x1.000078p0,
+  -0x1.7e98p-14, -0x1.6bf4p-3,
+  0x1.95ccp-5, 0x1.1baep2,
+  -0x1.030ap3, -0x1.3dap9, 
+  0x1.98e4p8, 0x1.d3d8p14
+};
+
+// In Sollya generate 10 coefficients for a degree-9 chebyshev polynomial
+// approximating the sine function in [-pi/32, pi/32] with the following
+// commands:
+// > prec = 23;
+// > TL = chebyshevform(cos(x), 9, [-pi / 32, pi / 32]);
+// > TL[0];
+const float COS_COEFF[10] = {
+  0x1.00001p0, -0x1.48p-17,
+  -0x1.01259cp-1, -0x1.17fp-6,
+  0x1.283p0, 0x1.5d1p3,
+  -0x1.6278p7, -0x1.c23p10,
+  0x1.1444p13, 0x1.5fcp16
+};
+
+// Lookup table for sin(k * pi / 32) with k = 0, ..., 63.
+// Table is generated with Sollya as follows:
+// > display = hexadecimmal;
+// > prec = 23;
+// > for k from 0 to 63 do {sin(k * pi/32);};
+
+const float SIN_K_PI_OVER_32[64] = {
+  0, 0x1.917a6cp-4, 0x1.8f8b84p-3,
+  0x1.294064p-2, 0x1.87de2cp-2, 0x1.e2b5d4p-2,
+  0x1.1c73b4p-1, 0x1.44cf34p-1, 0x1.6a09e8p-1,
+  0x1.8bc808p-1, 0x1.a9b664p-1, 0x1.c38b3p-1,
+  0x1.d906bcp-1, 0x1.e9f414p-1, 0x1.f6297cp-1,
+  0x1.fd88dcp-1, 0x1p0, 0x1.fd88dcp-1,
+  0x1.f6297cp-1, 0x1.e9f414p-1, 0x1.d906bcp-1,
+  0x1.c38b3p-1, 0x1.a9b664p-1, 0x1.8bc808p-1,
+  0x1.6a09e8p-1, 0x1.44cf34p-1, 0x1.1c73b4p-1,
+  0x1.e2b5d4p-2, 0x1.87de2cp-2, 0x1.294064p-2,
+  0x1.8f8b84p-3, 0x1.917a6cp-4, 0,
+  -0x1.917a6cp-4, -0x1.8f8b84p-3, -0x1.294064p-2,
+  -0x1.87de2cp-2, -0x1.e2b5d4p-2, -0x1.1c73b4p-1,
+  -0x1.44cf34p-1, -0x1.6a09e8p-1, -0x1.8bc808p-1,
+  -0x1.a9b664p-1, -0x1.c38b3p-1, -0x1.d906bcp-1,
+  -0x1.e9f414p-1, -0x1.f6297cp-1, -0x1.fd88dcp-1,
+  -0x1p0, -0x1.fd88dcp-1, -0x1.f6297cp-1,
+  -0x1.e9f414p-1, -0x1.d906bcp-1, -0x1.c38b3p-1,
+  -0x1.a9b664p-1, -0x1.8bc808p-1, -0x1.6a09e8p-1,
+  -0x1.44cf34p-1, -0x1.1c73b4p-1, -0x1.e2b5d4p-2,
+  -0x1.87de2cp-2, -0x1.294064p-2, -0x1.8f8b84p-3,
+  -0x1.917a6cp-4
+};
+
+// horner's algorithm to accurately and efficiently evaluate a degree-9
+// polynomial iteratively
+float horners(float x, const float COEFF[10]) {
+  float b8 = fputil::multiply_add<float>(COEFF[9], x, COEFF[8]);
+  float b7 = fputil::multiply_add<float>(b8, x, COEFF[7]);
+  float b6 = fputil::multiply_add<float>(b7, x, COEFF[6]);
+  float b5 = fputil::multiply_add<float>(b6, x, COEFF[5]);
+  float b4 = fputil::multiply_add<float>(b5, x, COEFF[4]);
+  float b3 = fputil::multiply_add<float>(b4, x, COEFF[3]);
+  float b2 = fputil::multiply_add<float>(b3, x, COEFF[2]);
+  float b1 = fputil::multiply_add<float>(b2, x, COEFF[1]);
+  return fputil::multiply_add<float>(b1, x, COEFF[0]);
+}
+
+float range_reduction(float x, float& y) {
+  float kf = fputil::nearest_integer(x * 32);
+  y = fputil::multiply_add<float>(x, 32.0, -kf);
+
+  return static_cast<int32_t>(kf);
+}
+// HELPER_END
+
+LLVM_LIBC_FUNCTION(float16, sinpif16, (float16 x)) {
+  using FPBits = typename fputil::FPBits<float16>;
+  FPBits xbits(x);
+
+  uint16_t x_u = xbits.uintval();
+  uint16_t x_abs = x_u & 0x7fff;
+
+  // Range reduction:
+  // For |x| > 1/32, we perform range reduction as follows:
+  // Find k and y such that:
+  //   x = (k + y) * 1/32
+  //   k is an integer
+  //   |y| < 0.5
+  //
+  // This is done by performing:
+  //   k = round(x * 32)
+  //   y = x * 32 - k
+  //
+  // Once k and y are computed, we then deduce the answer by the sine of sum
+  // formula:
+  //   sin(x * pi) = sin((k + y) * pi/32)
+  //           = sin(k * pi/32) * cos(y * pi/32) + sin (y * pi/32) * cos (k * pi/32)
+  // The values of sin(k * pi/32) and cos (k * pi/32) for k = 0...63 are precomputed
+  // and stored using a vector of 64 single precision floats. sin(y * pi/32) and cos(y * pi/32) are
+  // computed using degree-9 chebyshev polynomials generated by Sollya.
+
+  float f32 = x;
+  float y;
+  int32_t k = range_reduction(f32, y);
+
+  float sin_k = SIN_K_PI_OVER_32[k & 63];
+  float cos_k = SIN_K_PI_OVER_32[(k + 16) & 63];
+
+  float cos_y, sin_y;
+  if (y == 0) {
+    cos_y = 1;
+    sin_y = 0;
+  } else {
+    cos_y = horners(y * PI_OVER_32, COS_COEFF);
+    sin_y = horners(y * PI_OVER_32, SIN_COEFF);
+  }
+
+  return static_cast<float16>(fputil::multiply_add(sin_k, cos_y, fputil::multiply_add(sin_y, cos_k, 0)));
+}
+}
diff --git a/libc/src/math/sinpif16.h b/libc/src/math/sinpif16.h
new file mode 100644
index 00000000000000..ce091c1737145c
--- /dev/null
+++ b/libc/src/math/sinpif16.h
@@ -0,0 +1,23 @@
+//===-- Implementation header for sinpif16 ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache Licese v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_SINPIF16_H
+#define LLVM_LIBC_SRC_MATH_SINPIF16_H
+
+
+#include "include/llvm-libc-macros/float16-macros.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+float16 sinpif16(float16 x);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif //LLVM_LIBC_SRC_MATH_SINPIF16_H
diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt
index 47e16926f10df1..33d03710abd035 100644
--- a/libc/test/src/math/smoke/CMakeLists.txt
+++ b/libc/test/src/math/smoke/CMakeLists.txt
@@ -51,6 +51,19 @@ add_fp_unittest(
     libc.src.__support.FPUtil.fp_bits
 )
 
+add_fp_unittest(
+  sinpif16_test
+  SUITE
+    libc-math-smoke-tests
+  SRCS
+    sinpif16_test.cpp
+  DEPENDS
+    libc.src.math.sinpif16
+    libc.src.errno.errno
+    libc.src.__support.CPP.array
+    libc.src.__support.FPUtil.fp_bits  
+)
+
 add_fp_unittest(
   sincosf_test
   SUITE
diff --git a/libc/test/src/math/smoke/sinpif16_test.cpp b/libc/test/src/math/smoke/sinpif16_test.cpp
new file mode 100644
index 00000000000000..e1356f493a1039
--- /dev/null
+++ b/libc/test/src/math/smoke/sinpif16_test.cpp
@@ -0,0 +1,44 @@
+//===-- Unittests for sinpif16 ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//
+// ===----------------------------------------------------------------------==//
+
+#include "src/math/sinpif16.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "src/errno/libc_errno.h"
+
+#include <stdint.h>
+
+using LlvmLibcSinpif16Test = LIBC_NAMESPACE::testing::FPTest<float16>;
+
+TEST_F(LlvmLibcSinpif16Test, SpecialNumbers) {
+  LIBC_NAMESPACE::libc_errno = 0;
+
+  EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::sinpif16(aNaN));
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinpif16(0.0f));
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ(-0.0f, LIBC_NAMESPACE::sinpif16(-0.0f));
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::sinpif16(inf));
+  EXPECT_MATH_ERRNO(EDOM);
+
+  EXPECT_FP_EQ(aNan, LIBC_NAMESPACE::sinpif16(neg_inf));
+  EXPECT_MATH_ERRNO(EDOM);
+}
+
+TEST_F(LlvmLibcSinpif16Test, Integers) {
+  EXPECT_FP_EQ(-0.0, LIBC_NAMESPACE::sinpif16(-0x420));
+  EXPECT_FP_EQ(-0.0, LIBC_NAMESPACE::sinpif16(-0x1p+43));
+  EXPECT_FP_EQ(-0.0, LIBC_NAMESPACE::sinpif16(-0x1.4p+64));
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::sinpif16(0x420));
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::sinpif16(0x1.cp+106));
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::sinpif16(0x1.cp+21));
+}
diff --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h
index 8d51fa4e477267..00c298368e5a9f 100644
--- a/libc/utils/MPFRWrapper/MPFRUtils.h
+++ b/libc/utils/MPFRWrapper/MPFRUtils.h
@@ -55,6 +55,7 @@ enum class Operation : int {
   RoundEven,
   Sin,
   Sinpi,
+  Sinpif16
   Sinh,
   Sqrt,
   Tan,

>From d7bbbc20d1509e989a88bcb9cfd9a345dc4a2662 Mon Sep 17 00:00:00 2001
From: wldfngrs <wldfngrs at gmail.com>
Date: Mon, 23 Sep 2024 23:30:36 +0100
Subject: [PATCH 2/3] clang format changes

---
 libc/src/math/generic/sinpif16.cpp         | 127 +++++++++++++--------
 libc/src/math/sinpif16.h                   |   3 +-
 libc/test/src/math/smoke/sinpif16_test.cpp |   2 +-
 libc/utils/MPFRWrapper/MPFRUtils.h         |   3 +-
 4 files changed, 84 insertions(+), 51 deletions(-)

diff --git a/libc/src/math/generic/sinpif16.cpp b/libc/src/math/generic/sinpif16.cpp
index bb028d5b88fc08..dc33188d5a5d56 100644
--- a/libc/src/math/generic/sinpif16.cpp
+++ b/libc/src/math/generic/sinpif16.cpp
@@ -15,7 +15,7 @@
 #include "src/__support/macros/config.h"
 
 // TODO: Should probably create a new file; sincospif16_utils.h
-// To store the following helper functions and constants. 
+// To store the following helper functions and constants.
 // I'd defer to @lntue for suggestions regarding that
 
 // HELPER_START
@@ -23,19 +23,15 @@ namespace LIBC_NAMESPACE_DECL {
 
 constexpr float PI_OVER_32 = M_PI / 32;
 
-// In Sollya generate 10 coeffecients for a degree-9 chebyshev polynomial 
-// approximating the sine function in [-pi / 32, pi / 32] with the following 
+// In Sollya generate 10 coeffecients for a degree-9 chebyshev polynomial
+// approximating the sine function in [-pi / 32, pi / 32] with the following
 // commands:
 // > prec=23;
 // > TL = chebyshevform(sin(x), 9, [-pi / 32, pi / 32]);
 // > TL[0];
 const float SIN_COEFF[10] = {
-  0x1.801p-27, 0x1.000078p0,
-  -0x1.7e98p-14, -0x1.6bf4p-3,
-  0x1.95ccp-5, 0x1.1baep2,
-  -0x1.030ap3, -0x1.3dap9, 
-  0x1.98e4p8, 0x1.d3d8p14
-};
+    0x1.801p-27, 0x1.000078p0, -0x1.7e98p-14, -0x1.6bf4p-3, 0x1.95ccp-5,
+    0x1.1baep2,  -0x1.030ap3,  -0x1.3dap9,    0x1.98e4p8,   0x1.d3d8p14};
 
 // In Sollya generate 10 coefficients for a degree-9 chebyshev polynomial
 // approximating the sine function in [-pi/32, pi/32] with the following
@@ -44,12 +40,8 @@ const float SIN_COEFF[10] = {
 // > TL = chebyshevform(cos(x), 9, [-pi / 32, pi / 32]);
 // > TL[0];
 const float COS_COEFF[10] = {
-  0x1.00001p0, -0x1.48p-17,
-  -0x1.01259cp-1, -0x1.17fp-6,
-  0x1.283p0, 0x1.5d1p3,
-  -0x1.6278p7, -0x1.c23p10,
-  0x1.1444p13, 0x1.5fcp16
-};
+    0x1.00001p0, -0x1.48p-17, -0x1.01259cp-1, -0x1.17fp-6, 0x1.283p0,
+    0x1.5d1p3,   -0x1.6278p7, -0x1.c23p10,    0x1.1444p13, 0x1.5fcp16};
 
 // Lookup table for sin(k * pi / 32) with k = 0, ..., 63.
 // Table is generated with Sollya as follows:
@@ -57,30 +49,70 @@ const float COS_COEFF[10] = {
 // > prec = 23;
 // > for k from 0 to 63 do {sin(k * pi/32);};
 
-const float SIN_K_PI_OVER_32[64] = {
-  0, 0x1.917a6cp-4, 0x1.8f8b84p-3,
-  0x1.294064p-2, 0x1.87de2cp-2, 0x1.e2b5d4p-2,
-  0x1.1c73b4p-1, 0x1.44cf34p-1, 0x1.6a09e8p-1,
-  0x1.8bc808p-1, 0x1.a9b664p-1, 0x1.c38b3p-1,
-  0x1.d906bcp-1, 0x1.e9f414p-1, 0x1.f6297cp-1,
-  0x1.fd88dcp-1, 0x1p0, 0x1.fd88dcp-1,
-  0x1.f6297cp-1, 0x1.e9f414p-1, 0x1.d906bcp-1,
-  0x1.c38b3p-1, 0x1.a9b664p-1, 0x1.8bc808p-1,
-  0x1.6a09e8p-1, 0x1.44cf34p-1, 0x1.1c73b4p-1,
-  0x1.e2b5d4p-2, 0x1.87de2cp-2, 0x1.294064p-2,
-  0x1.8f8b84p-3, 0x1.917a6cp-4, 0,
-  -0x1.917a6cp-4, -0x1.8f8b84p-3, -0x1.294064p-2,
-  -0x1.87de2cp-2, -0x1.e2b5d4p-2, -0x1.1c73b4p-1,
-  -0x1.44cf34p-1, -0x1.6a09e8p-1, -0x1.8bc808p-1,
-  -0x1.a9b664p-1, -0x1.c38b3p-1, -0x1.d906bcp-1,
-  -0x1.e9f414p-1, -0x1.f6297cp-1, -0x1.fd88dcp-1,
-  -0x1p0, -0x1.fd88dcp-1, -0x1.f6297cp-1,
-  -0x1.e9f414p-1, -0x1.d906bcp-1, -0x1.c38b3p-1,
-  -0x1.a9b664p-1, -0x1.8bc808p-1, -0x1.6a09e8p-1,
-  -0x1.44cf34p-1, -0x1.1c73b4p-1, -0x1.e2b5d4p-2,
-  -0x1.87de2cp-2, -0x1.294064p-2, -0x1.8f8b84p-3,
-  -0x1.917a6cp-4
-};
+const float SIN_K_PI_OVER_32[64] = {0,
+                                    0x1.917a6cp-4,
+                                    0x1.8f8b84p-3,
+                                    0x1.294064p-2,
+                                    0x1.87de2cp-2,
+                                    0x1.e2b5d4p-2,
+                                    0x1.1c73b4p-1,
+                                    0x1.44cf34p-1,
+                                    0x1.6a09e8p-1,
+                                    0x1.8bc808p-1,
+                                    0x1.a9b664p-1,
+                                    0x1.c38b3p-1,
+                                    0x1.d906bcp-1,
+                                    0x1.e9f414p-1,
+                                    0x1.f6297cp-1,
+                                    0x1.fd88dcp-1,
+                                    0x1p0,
+                                    0x1.fd88dcp-1,
+                                    0x1.f6297cp-1,
+                                    0x1.e9f414p-1,
+                                    0x1.d906bcp-1,
+                                    0x1.c38b3p-1,
+                                    0x1.a9b664p-1,
+                                    0x1.8bc808p-1,
+                                    0x1.6a09e8p-1,
+                                    0x1.44cf34p-1,
+                                    0x1.1c73b4p-1,
+                                    0x1.e2b5d4p-2,
+                                    0x1.87de2cp-2,
+                                    0x1.294064p-2,
+                                    0x1.8f8b84p-3,
+                                    0x1.917a6cp-4,
+                                    0,
+                                    -0x1.917a6cp-4,
+                                    -0x1.8f8b84p-3,
+                                    -0x1.294064p-2,
+                                    -0x1.87de2cp-2,
+                                    -0x1.e2b5d4p-2,
+                                    -0x1.1c73b4p-1,
+                                    -0x1.44cf34p-1,
+                                    -0x1.6a09e8p-1,
+                                    -0x1.8bc808p-1,
+                                    -0x1.a9b664p-1,
+                                    -0x1.c38b3p-1,
+                                    -0x1.d906bcp-1,
+                                    -0x1.e9f414p-1,
+                                    -0x1.f6297cp-1,
+                                    -0x1.fd88dcp-1,
+                                    -0x1p0,
+                                    -0x1.fd88dcp-1,
+                                    -0x1.f6297cp-1,
+                                    -0x1.e9f414p-1,
+                                    -0x1.d906bcp-1,
+                                    -0x1.c38b3p-1,
+                                    -0x1.a9b664p-1,
+                                    -0x1.8bc808p-1,
+                                    -0x1.6a09e8p-1,
+                                    -0x1.44cf34p-1,
+                                    -0x1.1c73b4p-1,
+                                    -0x1.e2b5d4p-2,
+                                    -0x1.87de2cp-2,
+                                    -0x1.294064p-2,
+                                    -0x1.8f8b84p-3,
+                                    -0x1.917a6cp-4};
 
 // horner's algorithm to accurately and efficiently evaluate a degree-9
 // polynomial iteratively
@@ -96,7 +128,7 @@ float horners(float x, const float COEFF[10]) {
   return fputil::multiply_add<float>(b1, x, COEFF[0]);
 }
 
-float range_reduction(float x, float& y) {
+float range_reduction(float x, float &y) {
   float kf = fputil::nearest_integer(x * 32);
   y = fputil::multiply_add<float>(x, 32.0, -kf);
 
@@ -125,10 +157,12 @@ LLVM_LIBC_FUNCTION(float16, sinpif16, (float16 x)) {
   // Once k and y are computed, we then deduce the answer by the sine of sum
   // formula:
   //   sin(x * pi) = sin((k + y) * pi/32)
-  //           = sin(k * pi/32) * cos(y * pi/32) + sin (y * pi/32) * cos (k * pi/32)
-  // The values of sin(k * pi/32) and cos (k * pi/32) for k = 0...63 are precomputed
-  // and stored using a vector of 64 single precision floats. sin(y * pi/32) and cos(y * pi/32) are
-  // computed using degree-9 chebyshev polynomials generated by Sollya.
+  //           = sin(k * pi/32) * cos(y * pi/32) + sin (y * pi/32) * cos (k *
+  //           pi/32)
+  // The values of sin(k * pi/32) and cos (k * pi/32) for k = 0...63 are
+  // precomputed and stored using a vector of 64 single precision floats. sin(y
+  // * pi/32) and cos(y * pi/32) are computed using degree-9 chebyshev
+  // polynomials generated by Sollya.
 
   float f32 = x;
   float y;
@@ -146,6 +180,7 @@ LLVM_LIBC_FUNCTION(float16, sinpif16, (float16 x)) {
     sin_y = horners(y * PI_OVER_32, SIN_COEFF);
   }
 
-  return static_cast<float16>(fputil::multiply_add(sin_k, cos_y, fputil::multiply_add(sin_y, cos_k, 0)));
-}
+  return static_cast<float16>(fputil::multiply_add(
+      sin_k, cos_y, fputil::multiply_add(sin_y, cos_k, 0)));
 }
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/sinpif16.h b/libc/src/math/sinpif16.h
index ce091c1737145c..f7c653383ba5ef 100644
--- a/libc/src/math/sinpif16.h
+++ b/libc/src/math/sinpif16.h
@@ -9,7 +9,6 @@
 #ifndef LLVM_LIBC_SRC_MATH_SINPIF16_H
 #define LLVM_LIBC_SRC_MATH_SINPIF16_H
 
-
 #include "include/llvm-libc-macros/float16-macros.h"
 #include "src/__support/macros/config.h"
 #include "src/__support/macros/properties/types.h"
@@ -20,4 +19,4 @@ float16 sinpif16(float16 x);
 
 } // namespace LIBC_NAMESPACE_DECL
 
-#endif //LLVM_LIBC_SRC_MATH_SINPIF16_H
+#endif // LLVM_LIBC_SRC_MATH_SINPIF16_H
diff --git a/libc/test/src/math/smoke/sinpif16_test.cpp b/libc/test/src/math/smoke/sinpif16_test.cpp
index e1356f493a1039..54eb12ae100efa 100644
--- a/libc/test/src/math/smoke/sinpif16_test.cpp
+++ b/libc/test/src/math/smoke/sinpif16_test.cpp
@@ -7,9 +7,9 @@
 //
 // ===----------------------------------------------------------------------==//
 
+#include "src/errno/libc_errno.h"
 #include "src/math/sinpif16.h"
 #include "test/UnitTest/FPMatcher.h"
-#include "src/errno/libc_errno.h"
 
 #include <stdint.h>
 
diff --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h
index 00c298368e5a9f..43a30ad87c57ae 100644
--- a/libc/utils/MPFRWrapper/MPFRUtils.h
+++ b/libc/utils/MPFRWrapper/MPFRUtils.h
@@ -55,8 +55,7 @@ enum class Operation : int {
   RoundEven,
   Sin,
   Sinpi,
-  Sinpif16
-  Sinh,
+  Sinpif16 Sinh,
   Sqrt,
   Tan,
   Tanh,

>From 663fbd51d8992daf763772bc61b5942a5a1e8249 Mon Sep 17 00:00:00 2001
From: wldfngrs <wldfngrs at gmail.com>
Date: Thu, 3 Oct 2024 14:42:57 +0100
Subject: [PATCH 3/3] add implementation of sinpif16 function

---
 libc/src/math/generic/sinpif16.cpp         | 181 ++++++++++-----------
 libc/test/src/math/smoke/sinpif16_test.cpp |  12 +-
 libc/utils/MPFRWrapper/MPFRUtils.h         |   2 +-
 3 files changed, 91 insertions(+), 104 deletions(-)

diff --git a/libc/src/math/generic/sinpif16.cpp b/libc/src/math/generic/sinpif16.cpp
index dc33188d5a5d56..b878e09d542007 100644
--- a/libc/src/math/generic/sinpif16.cpp
+++ b/libc/src/math/generic/sinpif16.cpp
@@ -5,12 +5,12 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-
-#define M_PI 3.1415925f
-
 #include "src/math/sinpif16.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
 #include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
 #include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
 #include "src/__support/common.h"
 #include "src/__support/macros/config.h"
 
@@ -21,114 +21,70 @@
 // HELPER_START
 namespace LIBC_NAMESPACE_DECL {
 
-constexpr float PI_OVER_32 = M_PI / 32;
+constexpr float PI_OVER_32 = 0x1.921fb6p-4f;
 
 // In Sollya generate 10 coeffecients for a degree-9 chebyshev polynomial
 // approximating the sine function in [-pi / 32, pi / 32] with the following
 // commands:
-// > prec=23;
+// > prec=24;
 // > TL = chebyshevform(sin(x), 9, [-pi / 32, pi / 32]);
 // > TL[0];
 const float SIN_COEFF[10] = {
-    0x1.801p-27, 0x1.000078p0, -0x1.7e98p-14, -0x1.6bf4p-3, 0x1.95ccp-5,
-    0x1.1baep2,  -0x1.030ap3,  -0x1.3dap9,    0x1.98e4p8,   0x1.d3d8p14};
-
+  0x1.d333p-26, 0x1.000048p0, -0x1.a5d2p-14, -0x1.628588p-3, 0x1.c1eep-5,
+  0x1.4455p1, -0x1.317a8p3, -0x1.6bb9p8, 0x1.00ef8p9, 0x1.0edcp14
+};
 // In Sollya generate 10 coefficients for a degree-9 chebyshev polynomial
 // approximating the sine function in [-pi/32, pi/32] with the following
 // commands:
-// > prec = 23;
+// > prec = 24;
 // > TL = chebyshevform(cos(x), 9, [-pi / 32, pi / 32]);
 // > TL[0];
 const float COS_COEFF[10] = {
-    0x1.00001p0, -0x1.48p-17, -0x1.01259cp-1, -0x1.17fp-6, 0x1.283p0,
-    0x1.5d1p3,   -0x1.6278p7, -0x1.c23p10,    0x1.1444p13, 0x1.5fcp16};
-
+  0x1.000006p0, 0x1.e1eap-15, -0x1.0071p-1, -0x1.3b56p-4, 0x1.f3dfp-2,
+  0x1.ccbap4, -0x1.3034p6, -0x1.f817p11, 0x1.fc59p11, 0x1.7079p17
+};
 // Lookup table for sin(k * pi / 32) with k = 0, ..., 63.
 // Table is generated with Sollya as follows:
 // > display = hexadecimmal;
-// > prec = 23;
+// > prec = 24;
 // > for k from 0 to 63 do {sin(k * pi/32);};
 
-const float SIN_K_PI_OVER_32[64] = {0,
-                                    0x1.917a6cp-4,
-                                    0x1.8f8b84p-3,
-                                    0x1.294064p-2,
-                                    0x1.87de2cp-2,
-                                    0x1.e2b5d4p-2,
-                                    0x1.1c73b4p-1,
-                                    0x1.44cf34p-1,
-                                    0x1.6a09e8p-1,
-                                    0x1.8bc808p-1,
-                                    0x1.a9b664p-1,
-                                    0x1.c38b3p-1,
-                                    0x1.d906bcp-1,
-                                    0x1.e9f414p-1,
-                                    0x1.f6297cp-1,
-                                    0x1.fd88dcp-1,
-                                    0x1p0,
-                                    0x1.fd88dcp-1,
-                                    0x1.f6297cp-1,
-                                    0x1.e9f414p-1,
-                                    0x1.d906bcp-1,
-                                    0x1.c38b3p-1,
-                                    0x1.a9b664p-1,
-                                    0x1.8bc808p-1,
-                                    0x1.6a09e8p-1,
-                                    0x1.44cf34p-1,
-                                    0x1.1c73b4p-1,
-                                    0x1.e2b5d4p-2,
-                                    0x1.87de2cp-2,
-                                    0x1.294064p-2,
-                                    0x1.8f8b84p-3,
-                                    0x1.917a6cp-4,
-                                    0,
-                                    -0x1.917a6cp-4,
-                                    -0x1.8f8b84p-3,
-                                    -0x1.294064p-2,
-                                    -0x1.87de2cp-2,
-                                    -0x1.e2b5d4p-2,
-                                    -0x1.1c73b4p-1,
-                                    -0x1.44cf34p-1,
-                                    -0x1.6a09e8p-1,
-                                    -0x1.8bc808p-1,
-                                    -0x1.a9b664p-1,
-                                    -0x1.c38b3p-1,
-                                    -0x1.d906bcp-1,
-                                    -0x1.e9f414p-1,
-                                    -0x1.f6297cp-1,
-                                    -0x1.fd88dcp-1,
-                                    -0x1p0,
-                                    -0x1.fd88dcp-1,
-                                    -0x1.f6297cp-1,
-                                    -0x1.e9f414p-1,
-                                    -0x1.d906bcp-1,
-                                    -0x1.c38b3p-1,
-                                    -0x1.a9b664p-1,
-                                    -0x1.8bc808p-1,
-                                    -0x1.6a09e8p-1,
-                                    -0x1.44cf34p-1,
-                                    -0x1.1c73b4p-1,
-                                    -0x1.e2b5d4p-2,
-                                    -0x1.87de2cp-2,
-                                    -0x1.294064p-2,
-                                    -0x1.8f8b84p-3,
-                                    -0x1.917a6cp-4};
-
-// horner's algorithm to accurately and efficiently evaluate a degree-9
-// polynomial iteratively
-float horners(float x, const float COEFF[10]) {
-  float b8 = fputil::multiply_add<float>(COEFF[9], x, COEFF[8]);
-  float b7 = fputil::multiply_add<float>(b8, x, COEFF[7]);
-  float b6 = fputil::multiply_add<float>(b7, x, COEFF[6]);
-  float b5 = fputil::multiply_add<float>(b6, x, COEFF[5]);
-  float b4 = fputil::multiply_add<float>(b5, x, COEFF[4]);
-  float b3 = fputil::multiply_add<float>(b4, x, COEFF[3]);
-  float b2 = fputil::multiply_add<float>(b3, x, COEFF[2]);
-  float b1 = fputil::multiply_add<float>(b2, x, COEFF[1]);
-  return fputil::multiply_add<float>(b1, x, COEFF[0]);
-}
-
-float range_reduction(float x, float &y) {
+const float SIN_K_PI_OVER_32[64] = {
+  0, 0x1.917a6cp-4,
+  0x1.8f8b84p-3, 0x1.294062p-2,
+  0x1.87de2ap-2, 0x1.e2b5d4p-2,
+  0x1.1c73b4p-1, 0x1.44cf32p-1,
+  0x1.6a09e6p-1, 0x1.8bc806p-1,
+  0x1.a9b662p-1, 0x1.c38b3p-1,
+  0x1.d906bcp-1, 0x1.e9f416p-1,
+  0x1.f6297cp-1, 0x1.fd88dap-1,
+  0x1p0, 0x1.fd88dap-1,
+  0x1.f6297cp-1, 0x1.e9f416p-1,
+  0x1.d906bcp-1, 0x1.c38b3p-1,
+  0x1.a9b662p-1, 0x1.8bc806p-1,
+  0x1.6a09e6p-1, 0x1.44cf32p-1,
+  0x1.1c73b4p-1, 0x1.e2b5d4p-2,
+  0x1.87de2ap-2, 0x1.294062p-2,
+  0x1.8f8b84p-3, 0x1.917a6cp-4,
+  0, -0x1.917a6cp-4,
+  -0x1.8f8b84p-3, -0x1.294062p-2,
+  -0x1.87de2ap-2, -0x1.e2b5d4p-2,
+  -0x1.1c73b4p-1, -0x1.44cf32p-1,
+  -0x1.6a09e6p-1, -0x1.8bc806p-1,
+  -0x1.a9b662p-1, -0x1.c38b3p-1,
+  -0x1.d906bcp-1, -0x1.e9f416p-1,
+  -0x1.f6297ep-1, -0x1.fd88dap-1,
+  -0x1p0, -0x1.fd88dap-1,
+  -0x1.f6297cp-1, -0x1.e9f416p-1,
+  -0x1.d906bcp-1, -0x1.c38b3p-1,
+  -0x1.a9b662p-1, -0x1.8bc806p-1,
+  -0x1.6a09e6p-1, -0x1.44cf32p-1,
+  -0x1.1c73b4p-1, -0x1.e2b5d4p-2,
+  -0x1.87de2ap-2, -0x1.294062p-2,
+  -0x1.8f8b84p-3, -0x1.917a6cp-4
+};
+
+int32_t range_reduction(float x, float &y) {
   float kf = fputil::nearest_integer(x * 32);
   y = fputil::multiply_add<float>(x, 32.0, -kf);
 
@@ -164,7 +120,28 @@ LLVM_LIBC_FUNCTION(float16, sinpif16, (float16 x)) {
   // * pi/32) and cos(y * pi/32) are computed using degree-9 chebyshev
   // polynomials generated by Sollya.
 
-  float f32 = x;
+  if (LIBC_UNLIKELY(x_abs == 0U)) {
+    // For signed zeros
+    return x;
+  }
+
+  // Numbers greater or equal to 2^10 are integers or NaN
+  if (LIBC_UNLIKELY(x_abs >= 0x6400)) {
+    // Check for NaN or infinity values
+    if (LIBC_UNLIKELY(x_abs >= 0x7c00)) {
+      // If value is equal to infinity
+      if (x_abs == 0x7c00) {
+        fputil::set_errno_if_required(EDOM);
+	fputil::raise_except_if_required(FE_INVALID);
+      }
+      
+      // If value is NaN
+      return x + FPBits::quiet_nan().get_val();
+    }
+    return FPBits::zero(xbits.sign()).get_val();
+  }
+
+  float f32 = static_cast<float>(x);
   float y;
   int32_t k = range_reduction(f32, y);
 
@@ -176,11 +153,21 @@ LLVM_LIBC_FUNCTION(float16, sinpif16, (float16 x)) {
     cos_y = 1;
     sin_y = 0;
   } else {
-    cos_y = horners(y * PI_OVER_32, COS_COEFF);
-    sin_y = horners(y * PI_OVER_32, SIN_COEFF);
+    cos_y = fputil::polyeval(y * PI_OVER_32,
+	COS_COEFF[0], COS_COEFF[1],
+	COS_COEFF[2], COS_COEFF[3],
+	COS_COEFF[4], COS_COEFF[5],
+	COS_COEFF[6], COS_COEFF[7],
+	COS_COEFF[8], COS_COEFF[9]);
+    sin_y = fputil::polyeval(y * PI_OVER_32,
+        SIN_COEFF[0], SIN_COEFF[1],
+	SIN_COEFF[2], SIN_COEFF[3],
+	SIN_COEFF[4], SIN_COEFF[5],
+	SIN_COEFF[6], SIN_COEFF[7],
+	SIN_COEFF[8], SIN_COEFF[9]);
   }
 
   return static_cast<float16>(fputil::multiply_add(
-      sin_k, cos_y, fputil::multiply_add(sin_y, cos_k, 0)));
+      sin_k, cos_y, fputil::multiply_add(sin_y, cos_k, 0.0f)));
 }
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/src/math/smoke/sinpif16_test.cpp b/libc/test/src/math/smoke/sinpif16_test.cpp
index 54eb12ae100efa..bf55a4854bfb3a 100644
--- a/libc/test/src/math/smoke/sinpif16_test.cpp
+++ b/libc/test/src/math/smoke/sinpif16_test.cpp
@@ -1,4 +1,4 @@
-//===-- Unittests for sinpif16 ------------------------------------===//
+//===-- Unittests for sinpif16 --------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -30,15 +30,15 @@ TEST_F(LlvmLibcSinpif16Test, SpecialNumbers) {
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::sinpif16(inf));
   EXPECT_MATH_ERRNO(EDOM);
 
-  EXPECT_FP_EQ(aNan, LIBC_NAMESPACE::sinpif16(neg_inf));
+  EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::sinpif16(neg_inf));
   EXPECT_MATH_ERRNO(EDOM);
 }
 
 TEST_F(LlvmLibcSinpif16Test, Integers) {
   EXPECT_FP_EQ(-0.0, LIBC_NAMESPACE::sinpif16(-0x420));
-  EXPECT_FP_EQ(-0.0, LIBC_NAMESPACE::sinpif16(-0x1p+43));
-  EXPECT_FP_EQ(-0.0, LIBC_NAMESPACE::sinpif16(-0x1.4p+64));
+  EXPECT_FP_EQ(-0.0, LIBC_NAMESPACE::sinpif16(-0x1p+10));
+  EXPECT_FP_EQ(-0.0, LIBC_NAMESPACE::sinpif16(-0x1.4p+14));
   EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::sinpif16(0x420));
-  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::sinpif16(0x1.cp+106));
-  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::sinpif16(0x1.cp+21));
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::sinpif16(0x1.cp+15));
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::sinpif16(0x1.cp+7));
 }
diff --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h
index 43a30ad87c57ae..8d51fa4e477267 100644
--- a/libc/utils/MPFRWrapper/MPFRUtils.h
+++ b/libc/utils/MPFRWrapper/MPFRUtils.h
@@ -55,7 +55,7 @@ enum class Operation : int {
   RoundEven,
   Sin,
   Sinpi,
-  Sinpif16 Sinh,
+  Sinh,
   Sqrt,
   Tan,
   Tanh,