[libc-commits] [libc] [libc] Add sinpif16 function (PR #110994)
via libc-commits
libc-commits at lists.llvm.org
Thu Oct 3 06:46:52 PDT 2024
https://github.com/wldfngrs created https://github.com/llvm/llvm-project/pull/110994
Half-precision floating point (16-bit) implementation of the trigonometric function Sin for inputs scaled by pi
>From 1dd121c8f55d32c7152389e20d1cb2005d983630 Mon Sep 17 00:00:00 2001
From: wldfngrs <wldfngrs at gmail.com>
Date: Mon, 23 Sep 2024 23:29:33 +0100
Subject: [PATCH 1/3] add sinpif16 function implementation and unittests
---
libc/config/linux/aarch64/entrypoints.txt | 1 +
libc/config/linux/arm/entrypoints.txt | 1 +
libc/config/linux/riscv/entrypoints.txt | 1 +
libc/config/linux/x86_64/entrypoints.txt | 1 +
libc/config/windows/entrypoints.txt | 3 +-
libc/newhdrgen/yaml/math.yaml | 6 +
libc/src/math/CMakeLists.txt | 1 +
libc/src/math/generic/CMakeLists.txt | 16 +++
libc/src/math/generic/sinpif16.cpp | 151 +++++++++++++++++++++
libc/src/math/sinpif16.h | 23 ++++
libc/test/src/math/smoke/CMakeLists.txt | 13 ++
libc/test/src/math/smoke/sinpif16_test.cpp | 44 ++++++
libc/utils/MPFRWrapper/MPFRUtils.h | 1 +
13 files changed, 261 insertions(+), 1 deletion(-)
create mode 100644 libc/src/math/generic/sinpif16.cpp
create mode 100644 libc/src/math/sinpif16.h
create mode 100644 libc/test/src/math/smoke/sinpif16_test.cpp
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 64fbe1a250c0ba..47f46c4b9160e3 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -564,6 +564,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.sinf
libc.src.math.sinhf
libc.src.math.sinpif
+ libc.src.math.sinpif16
libc.src.math.sqrt
libc.src.math.sqrtf
libc.src.math.sqrtl
diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt
index 1be9a872dd2f7f..dbd21f6b5d4388 100644
--- a/libc/config/linux/arm/entrypoints.txt
+++ b/libc/config/linux/arm/entrypoints.txt
@@ -390,6 +390,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.sincosf
libc.src.math.sinf
libc.src.math.sinhf
+ libc.src.math.sinpif16
libc.src.math.sqrt
libc.src.math.sqrtf
libc.src.math.sqrtl
diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
index ff3d821c664c5b..c45ba2fe5b9aa4 100644
--- a/libc/config/linux/riscv/entrypoints.txt
+++ b/libc/config/linux/riscv/entrypoints.txt
@@ -567,6 +567,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.sinf
libc.src.math.sinhf
libc.src.math.sinpif
+ libc.src.math.sinpif16
libc.src.math.sqrt
libc.src.math.sqrtf
libc.src.math.sqrtl
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index dd658af3bfb674..12b86f541bc91e 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -567,6 +567,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.sinf
libc.src.math.sinhf
libc.src.math.sinpif
+ libc.src.math.sinpif16
libc.src.math.sqrt
libc.src.math.sqrtf
libc.src.math.sqrtl
diff --git a/libc/config/windows/entrypoints.txt b/libc/config/windows/entrypoints.txt
index 8f0b50bcc83ea2..5b0d84669e9244 100644
--- a/libc/config/windows/entrypoints.txt
+++ b/libc/config/windows/entrypoints.txt
@@ -275,7 +275,8 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.sincosf
libc.src.math.sincosf
libc.src.math.sinf
- libc.src.math.sinhf
+ libc.src.math.sinhfi
+ libc.src.math.sinpif16
libc.src.math.sqrt
libc.src.math.sqrtf
libc.src.math.sqrtl
diff --git a/libc/newhdrgen/yaml/math.yaml b/libc/newhdrgen/yaml/math.yaml
index 04b6a073deace0..fb79f15a314604 100644
--- a/libc/newhdrgen/yaml/math.yaml
+++ b/libc/newhdrgen/yaml/math.yaml
@@ -2290,6 +2290,12 @@ functions:
return_type: float
arguments:
- type: float
+ - name: sinpif16
+ standards:
+ - stdc
+ return_type: _Float16
+ arguments:
+ - type: _Float16
- name: sqrt
standards:
- stdc
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index 3cba34fc249322..dbfc0ce6fcaeb0 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -462,6 +462,7 @@ add_math_entrypoint_object(sincosf)
add_math_entrypoint_object(sin)
add_math_entrypoint_object(sinf)
add_math_entrypoint_object(sinpif)
+add_math_entrypoint_object(sinpif16)
add_math_entrypoint_object(sinh)
add_math_entrypoint_object(sinhf)
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 5a1ee3b8b83c77..6a2e45dbd4023d 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -473,6 +473,22 @@ add_entrypoint_object(
-O3
)
+add_entrypoint_object(
+ sinpif16
+ SRCS
+ sinpif16.cpp
+ HDRS
+ ../sinpif16.h
+ DEPENDS
+ libc.src.__support.macros.properties.types
+ libc.src.__support.FPUtil.fp_bits
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.common
+ libc.src.__support.macros.config
+ COMPILE_OPTIONS
+ -O3
+)
+
add_entrypoint_object(
tan
SRCS
diff --git a/libc/src/math/generic/sinpif16.cpp b/libc/src/math/generic/sinpif16.cpp
new file mode 100644
index 00000000000000..bb028d5b88fc08
--- /dev/null
+++ b/libc/src/math/generic/sinpif16.cpp
@@ -0,0 +1,151 @@
+//===-- Half-precision sinpif function ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#define M_PI 3.1415925f
+
+#include "src/math/sinpif16.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+
+// TODO: Should probably create a new file; sincospif16_utils.h
+// To store the following helper functions and constants.
+// I'd defer to @lntue for suggestions regarding that
+
+// HELPER_START
+namespace LIBC_NAMESPACE_DECL {
+
+constexpr float PI_OVER_32 = M_PI / 32;
+
+// In Sollya generate 10 coeffecients for a degree-9 chebyshev polynomial
+// approximating the sine function in [-pi / 32, pi / 32] with the following
+// commands:
+// > prec=23;
+// > TL = chebyshevform(sin(x), 9, [-pi / 32, pi / 32]);
+// > TL[0];
+const float SIN_COEFF[10] = {
+ 0x1.801p-27, 0x1.000078p0,
+ -0x1.7e98p-14, -0x1.6bf4p-3,
+ 0x1.95ccp-5, 0x1.1baep2,
+ -0x1.030ap3, -0x1.3dap9,
+ 0x1.98e4p8, 0x1.d3d8p14
+};
+
+// In Sollya generate 10 coefficients for a degree-9 chebyshev polynomial
+// approximating the sine function in [-pi/32, pi/32] with the following
+// commands:
+// > prec = 23;
+// > TL = chebyshevform(cos(x), 9, [-pi / 32, pi / 32]);
+// > TL[0];
+const float COS_COEFF[10] = {
+ 0x1.00001p0, -0x1.48p-17,
+ -0x1.01259cp-1, -0x1.17fp-6,
+ 0x1.283p0, 0x1.5d1p3,
+ -0x1.6278p7, -0x1.c23p10,
+ 0x1.1444p13, 0x1.5fcp16
+};
+
+// Lookup table for sin(k * pi / 32) with k = 0, ..., 63.
+// Table is generated with Sollya as follows:
+// > display = hexadecimmal;
+// > prec = 23;
+// > for k from 0 to 63 do {sin(k * pi/32);};
+
+const float SIN_K_PI_OVER_32[64] = {
+ 0, 0x1.917a6cp-4, 0x1.8f8b84p-3,
+ 0x1.294064p-2, 0x1.87de2cp-2, 0x1.e2b5d4p-2,
+ 0x1.1c73b4p-1, 0x1.44cf34p-1, 0x1.6a09e8p-1,
+ 0x1.8bc808p-1, 0x1.a9b664p-1, 0x1.c38b3p-1,
+ 0x1.d906bcp-1, 0x1.e9f414p-1, 0x1.f6297cp-1,
+ 0x1.fd88dcp-1, 0x1p0, 0x1.fd88dcp-1,
+ 0x1.f6297cp-1, 0x1.e9f414p-1, 0x1.d906bcp-1,
+ 0x1.c38b3p-1, 0x1.a9b664p-1, 0x1.8bc808p-1,
+ 0x1.6a09e8p-1, 0x1.44cf34p-1, 0x1.1c73b4p-1,
+ 0x1.e2b5d4p-2, 0x1.87de2cp-2, 0x1.294064p-2,
+ 0x1.8f8b84p-3, 0x1.917a6cp-4, 0,
+ -0x1.917a6cp-4, -0x1.8f8b84p-3, -0x1.294064p-2,
+ -0x1.87de2cp-2, -0x1.e2b5d4p-2, -0x1.1c73b4p-1,
+ -0x1.44cf34p-1, -0x1.6a09e8p-1, -0x1.8bc808p-1,
+ -0x1.a9b664p-1, -0x1.c38b3p-1, -0x1.d906bcp-1,
+ -0x1.e9f414p-1, -0x1.f6297cp-1, -0x1.fd88dcp-1,
+ -0x1p0, -0x1.fd88dcp-1, -0x1.f6297cp-1,
+ -0x1.e9f414p-1, -0x1.d906bcp-1, -0x1.c38b3p-1,
+ -0x1.a9b664p-1, -0x1.8bc808p-1, -0x1.6a09e8p-1,
+ -0x1.44cf34p-1, -0x1.1c73b4p-1, -0x1.e2b5d4p-2,
+ -0x1.87de2cp-2, -0x1.294064p-2, -0x1.8f8b84p-3,
+ -0x1.917a6cp-4
+};
+
+// horner's algorithm to accurately and efficiently evaluate a degree-9
+// polynomial iteratively
+float horners(float x, const float COEFF[10]) {
+ float b8 = fputil::multiply_add<float>(COEFF[9], x, COEFF[8]);
+ float b7 = fputil::multiply_add<float>(b8, x, COEFF[7]);
+ float b6 = fputil::multiply_add<float>(b7, x, COEFF[6]);
+ float b5 = fputil::multiply_add<float>(b6, x, COEFF[5]);
+ float b4 = fputil::multiply_add<float>(b5, x, COEFF[4]);
+ float b3 = fputil::multiply_add<float>(b4, x, COEFF[3]);
+ float b2 = fputil::multiply_add<float>(b3, x, COEFF[2]);
+ float b1 = fputil::multiply_add<float>(b2, x, COEFF[1]);
+ return fputil::multiply_add<float>(b1, x, COEFF[0]);
+}
+
+float range_reduction(float x, float& y) {
+ float kf = fputil::nearest_integer(x * 32);
+ y = fputil::multiply_add<float>(x, 32.0, -kf);
+
+ return static_cast<int32_t>(kf);
+}
+// HELPER_END
+
+LLVM_LIBC_FUNCTION(float16, sinpif16, (float16 x)) {
+ using FPBits = typename fputil::FPBits<float16>;
+ FPBits xbits(x);
+
+ uint16_t x_u = xbits.uintval();
+ uint16_t x_abs = x_u & 0x7fff;
+
+ // Range reduction:
+ // For |x| > 1/32, we perform range reduction as follows:
+ // Find k and y such that:
+ // x = (k + y) * 1/32
+ // k is an integer
+ // |y| < 0.5
+ //
+ // This is done by performing:
+ // k = round(x * 32)
+ // y = x * 32 - k
+ //
+ // Once k and y are computed, we then deduce the answer by the sine of sum
+ // formula:
+ // sin(x * pi) = sin((k + y) * pi/32)
+ // = sin(k * pi/32) * cos(y * pi/32) + sin (y * pi/32) * cos (k * pi/32)
+ // The values of sin(k * pi/32) and cos (k * pi/32) for k = 0...63 are precomputed
+ // and stored using a vector of 64 single precision floats. sin(y * pi/32) and cos(y * pi/32) are
+ // computed using degree-9 chebyshev polynomials generated by Sollya.
+
+ float f32 = x;
+ float y;
+ int32_t k = range_reduction(f32, y);
+
+ float sin_k = SIN_K_PI_OVER_32[k & 63];
+ float cos_k = SIN_K_PI_OVER_32[(k + 16) & 63];
+
+ float cos_y, sin_y;
+ if (y == 0) {
+ cos_y = 1;
+ sin_y = 0;
+ } else {
+ cos_y = horners(y * PI_OVER_32, COS_COEFF);
+ sin_y = horners(y * PI_OVER_32, SIN_COEFF);
+ }
+
+ return static_cast<float16>(fputil::multiply_add(sin_k, cos_y, fputil::multiply_add(sin_y, cos_k, 0)));
+}
+}
diff --git a/libc/src/math/sinpif16.h b/libc/src/math/sinpif16.h
new file mode 100644
index 00000000000000..ce091c1737145c
--- /dev/null
+++ b/libc/src/math/sinpif16.h
@@ -0,0 +1,23 @@
+//===-- Implementation header for sinpif16 ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache Licese v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_SINPIF16_H
+#define LLVM_LIBC_SRC_MATH_SINPIF16_H
+
+
+#include "include/llvm-libc-macros/float16-macros.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+float16 sinpif16(float16 x);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif //LLVM_LIBC_SRC_MATH_SINPIF16_H
diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt
index 47e16926f10df1..33d03710abd035 100644
--- a/libc/test/src/math/smoke/CMakeLists.txt
+++ b/libc/test/src/math/smoke/CMakeLists.txt
@@ -51,6 +51,19 @@ add_fp_unittest(
libc.src.__support.FPUtil.fp_bits
)
+add_fp_unittest(
+ sinpif16_test
+ SUITE
+ libc-math-smoke-tests
+ SRCS
+ sinpif16_test.cpp
+ DEPENDS
+ libc.src.math.sinpif16
+ libc.src.errno.errno
+ libc.src.__support.CPP.array
+ libc.src.__support.FPUtil.fp_bits
+)
+
add_fp_unittest(
sincosf_test
SUITE
diff --git a/libc/test/src/math/smoke/sinpif16_test.cpp b/libc/test/src/math/smoke/sinpif16_test.cpp
new file mode 100644
index 00000000000000..e1356f493a1039
--- /dev/null
+++ b/libc/test/src/math/smoke/sinpif16_test.cpp
@@ -0,0 +1,44 @@
+//===-- Unittests for sinpif16 ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//
+// ===----------------------------------------------------------------------==//
+
+#include "src/math/sinpif16.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "src/errno/libc_errno.h"
+
+#include <stdint.h>
+
+using LlvmLibcSinpif16Test = LIBC_NAMESPACE::testing::FPTest<float16>;
+
+TEST_F(LlvmLibcSinpif16Test, SpecialNumbers) {
+ LIBC_NAMESPACE::libc_errno = 0;
+
+ EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::sinpif16(aNaN));
+ EXPECT_MATH_ERRNO(0);
+
+ EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinpif16(0.0f));
+ EXPECT_MATH_ERRNO(0);
+
+ EXPECT_FP_EQ(-0.0f, LIBC_NAMESPACE::sinpif16(-0.0f));
+ EXPECT_MATH_ERRNO(0);
+
+ EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::sinpif16(inf));
+ EXPECT_MATH_ERRNO(EDOM);
+
+ EXPECT_FP_EQ(aNan, LIBC_NAMESPACE::sinpif16(neg_inf));
+ EXPECT_MATH_ERRNO(EDOM);
+}
+
+TEST_F(LlvmLibcSinpif16Test, Integers) {
+ EXPECT_FP_EQ(-0.0, LIBC_NAMESPACE::sinpif16(-0x420));
+ EXPECT_FP_EQ(-0.0, LIBC_NAMESPACE::sinpif16(-0x1p+43));
+ EXPECT_FP_EQ(-0.0, LIBC_NAMESPACE::sinpif16(-0x1.4p+64));
+ EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::sinpif16(0x420));
+ EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::sinpif16(0x1.cp+106));
+ EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::sinpif16(0x1.cp+21));
+}
diff --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h
index 8d51fa4e477267..00c298368e5a9f 100644
--- a/libc/utils/MPFRWrapper/MPFRUtils.h
+++ b/libc/utils/MPFRWrapper/MPFRUtils.h
@@ -55,6 +55,7 @@ enum class Operation : int {
RoundEven,
Sin,
Sinpi,
+ Sinpif16
Sinh,
Sqrt,
Tan,
>From d7bbbc20d1509e989a88bcb9cfd9a345dc4a2662 Mon Sep 17 00:00:00 2001
From: wldfngrs <wldfngrs at gmail.com>
Date: Mon, 23 Sep 2024 23:30:36 +0100
Subject: [PATCH 2/3] clang format changes
---
libc/src/math/generic/sinpif16.cpp | 127 +++++++++++++--------
libc/src/math/sinpif16.h | 3 +-
libc/test/src/math/smoke/sinpif16_test.cpp | 2 +-
libc/utils/MPFRWrapper/MPFRUtils.h | 3 +-
4 files changed, 84 insertions(+), 51 deletions(-)
diff --git a/libc/src/math/generic/sinpif16.cpp b/libc/src/math/generic/sinpif16.cpp
index bb028d5b88fc08..dc33188d5a5d56 100644
--- a/libc/src/math/generic/sinpif16.cpp
+++ b/libc/src/math/generic/sinpif16.cpp
@@ -15,7 +15,7 @@
#include "src/__support/macros/config.h"
// TODO: Should probably create a new file; sincospif16_utils.h
-// To store the following helper functions and constants.
+// To store the following helper functions and constants.
// I'd defer to @lntue for suggestions regarding that
// HELPER_START
@@ -23,19 +23,15 @@ namespace LIBC_NAMESPACE_DECL {
constexpr float PI_OVER_32 = M_PI / 32;
-// In Sollya generate 10 coeffecients for a degree-9 chebyshev polynomial
-// approximating the sine function in [-pi / 32, pi / 32] with the following
+// In Sollya generate 10 coeffecients for a degree-9 chebyshev polynomial
+// approximating the sine function in [-pi / 32, pi / 32] with the following
// commands:
// > prec=23;
// > TL = chebyshevform(sin(x), 9, [-pi / 32, pi / 32]);
// > TL[0];
const float SIN_COEFF[10] = {
- 0x1.801p-27, 0x1.000078p0,
- -0x1.7e98p-14, -0x1.6bf4p-3,
- 0x1.95ccp-5, 0x1.1baep2,
- -0x1.030ap3, -0x1.3dap9,
- 0x1.98e4p8, 0x1.d3d8p14
-};
+ 0x1.801p-27, 0x1.000078p0, -0x1.7e98p-14, -0x1.6bf4p-3, 0x1.95ccp-5,
+ 0x1.1baep2, -0x1.030ap3, -0x1.3dap9, 0x1.98e4p8, 0x1.d3d8p14};
// In Sollya generate 10 coefficients for a degree-9 chebyshev polynomial
// approximating the sine function in [-pi/32, pi/32] with the following
@@ -44,12 +40,8 @@ const float SIN_COEFF[10] = {
// > TL = chebyshevform(cos(x), 9, [-pi / 32, pi / 32]);
// > TL[0];
const float COS_COEFF[10] = {
- 0x1.00001p0, -0x1.48p-17,
- -0x1.01259cp-1, -0x1.17fp-6,
- 0x1.283p0, 0x1.5d1p3,
- -0x1.6278p7, -0x1.c23p10,
- 0x1.1444p13, 0x1.5fcp16
-};
+ 0x1.00001p0, -0x1.48p-17, -0x1.01259cp-1, -0x1.17fp-6, 0x1.283p0,
+ 0x1.5d1p3, -0x1.6278p7, -0x1.c23p10, 0x1.1444p13, 0x1.5fcp16};
// Lookup table for sin(k * pi / 32) with k = 0, ..., 63.
// Table is generated with Sollya as follows:
@@ -57,30 +49,70 @@ const float COS_COEFF[10] = {
// > prec = 23;
// > for k from 0 to 63 do {sin(k * pi/32);};
-const float SIN_K_PI_OVER_32[64] = {
- 0, 0x1.917a6cp-4, 0x1.8f8b84p-3,
- 0x1.294064p-2, 0x1.87de2cp-2, 0x1.e2b5d4p-2,
- 0x1.1c73b4p-1, 0x1.44cf34p-1, 0x1.6a09e8p-1,
- 0x1.8bc808p-1, 0x1.a9b664p-1, 0x1.c38b3p-1,
- 0x1.d906bcp-1, 0x1.e9f414p-1, 0x1.f6297cp-1,
- 0x1.fd88dcp-1, 0x1p0, 0x1.fd88dcp-1,
- 0x1.f6297cp-1, 0x1.e9f414p-1, 0x1.d906bcp-1,
- 0x1.c38b3p-1, 0x1.a9b664p-1, 0x1.8bc808p-1,
- 0x1.6a09e8p-1, 0x1.44cf34p-1, 0x1.1c73b4p-1,
- 0x1.e2b5d4p-2, 0x1.87de2cp-2, 0x1.294064p-2,
- 0x1.8f8b84p-3, 0x1.917a6cp-4, 0,
- -0x1.917a6cp-4, -0x1.8f8b84p-3, -0x1.294064p-2,
- -0x1.87de2cp-2, -0x1.e2b5d4p-2, -0x1.1c73b4p-1,
- -0x1.44cf34p-1, -0x1.6a09e8p-1, -0x1.8bc808p-1,
- -0x1.a9b664p-1, -0x1.c38b3p-1, -0x1.d906bcp-1,
- -0x1.e9f414p-1, -0x1.f6297cp-1, -0x1.fd88dcp-1,
- -0x1p0, -0x1.fd88dcp-1, -0x1.f6297cp-1,
- -0x1.e9f414p-1, -0x1.d906bcp-1, -0x1.c38b3p-1,
- -0x1.a9b664p-1, -0x1.8bc808p-1, -0x1.6a09e8p-1,
- -0x1.44cf34p-1, -0x1.1c73b4p-1, -0x1.e2b5d4p-2,
- -0x1.87de2cp-2, -0x1.294064p-2, -0x1.8f8b84p-3,
- -0x1.917a6cp-4
-};
+const float SIN_K_PI_OVER_32[64] = {0,
+ 0x1.917a6cp-4,
+ 0x1.8f8b84p-3,
+ 0x1.294064p-2,
+ 0x1.87de2cp-2,
+ 0x1.e2b5d4p-2,
+ 0x1.1c73b4p-1,
+ 0x1.44cf34p-1,
+ 0x1.6a09e8p-1,
+ 0x1.8bc808p-1,
+ 0x1.a9b664p-1,
+ 0x1.c38b3p-1,
+ 0x1.d906bcp-1,
+ 0x1.e9f414p-1,
+ 0x1.f6297cp-1,
+ 0x1.fd88dcp-1,
+ 0x1p0,
+ 0x1.fd88dcp-1,
+ 0x1.f6297cp-1,
+ 0x1.e9f414p-1,
+ 0x1.d906bcp-1,
+ 0x1.c38b3p-1,
+ 0x1.a9b664p-1,
+ 0x1.8bc808p-1,
+ 0x1.6a09e8p-1,
+ 0x1.44cf34p-1,
+ 0x1.1c73b4p-1,
+ 0x1.e2b5d4p-2,
+ 0x1.87de2cp-2,
+ 0x1.294064p-2,
+ 0x1.8f8b84p-3,
+ 0x1.917a6cp-4,
+ 0,
+ -0x1.917a6cp-4,
+ -0x1.8f8b84p-3,
+ -0x1.294064p-2,
+ -0x1.87de2cp-2,
+ -0x1.e2b5d4p-2,
+ -0x1.1c73b4p-1,
+ -0x1.44cf34p-1,
+ -0x1.6a09e8p-1,
+ -0x1.8bc808p-1,
+ -0x1.a9b664p-1,
+ -0x1.c38b3p-1,
+ -0x1.d906bcp-1,
+ -0x1.e9f414p-1,
+ -0x1.f6297cp-1,
+ -0x1.fd88dcp-1,
+ -0x1p0,
+ -0x1.fd88dcp-1,
+ -0x1.f6297cp-1,
+ -0x1.e9f414p-1,
+ -0x1.d906bcp-1,
+ -0x1.c38b3p-1,
+ -0x1.a9b664p-1,
+ -0x1.8bc808p-1,
+ -0x1.6a09e8p-1,
+ -0x1.44cf34p-1,
+ -0x1.1c73b4p-1,
+ -0x1.e2b5d4p-2,
+ -0x1.87de2cp-2,
+ -0x1.294064p-2,
+ -0x1.8f8b84p-3,
+ -0x1.917a6cp-4};
// horner's algorithm to accurately and efficiently evaluate a degree-9
// polynomial iteratively
@@ -96,7 +128,7 @@ float horners(float x, const float COEFF[10]) {
return fputil::multiply_add<float>(b1, x, COEFF[0]);
}
-float range_reduction(float x, float& y) {
+float range_reduction(float x, float &y) {
float kf = fputil::nearest_integer(x * 32);
y = fputil::multiply_add<float>(x, 32.0, -kf);
@@ -125,10 +157,12 @@ LLVM_LIBC_FUNCTION(float16, sinpif16, (float16 x)) {
// Once k and y are computed, we then deduce the answer by the sine of sum
// formula:
// sin(x * pi) = sin((k + y) * pi/32)
- // = sin(k * pi/32) * cos(y * pi/32) + sin (y * pi/32) * cos (k * pi/32)
- // The values of sin(k * pi/32) and cos (k * pi/32) for k = 0...63 are precomputed
- // and stored using a vector of 64 single precision floats. sin(y * pi/32) and cos(y * pi/32) are
- // computed using degree-9 chebyshev polynomials generated by Sollya.
+ // = sin(k * pi/32) * cos(y * pi/32) + sin (y * pi/32) * cos (k *
+ // pi/32)
+ // The values of sin(k * pi/32) and cos (k * pi/32) for k = 0...63 are
+ // precomputed and stored using a vector of 64 single precision floats. sin(y
+ // * pi/32) and cos(y * pi/32) are computed using degree-9 chebyshev
+ // polynomials generated by Sollya.
float f32 = x;
float y;
@@ -146,6 +180,7 @@ LLVM_LIBC_FUNCTION(float16, sinpif16, (float16 x)) {
sin_y = horners(y * PI_OVER_32, SIN_COEFF);
}
- return static_cast<float16>(fputil::multiply_add(sin_k, cos_y, fputil::multiply_add(sin_y, cos_k, 0)));
-}
+ return static_cast<float16>(fputil::multiply_add(
+ sin_k, cos_y, fputil::multiply_add(sin_y, cos_k, 0)));
}
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/sinpif16.h b/libc/src/math/sinpif16.h
index ce091c1737145c..f7c653383ba5ef 100644
--- a/libc/src/math/sinpif16.h
+++ b/libc/src/math/sinpif16.h
@@ -9,7 +9,6 @@
#ifndef LLVM_LIBC_SRC_MATH_SINPIF16_H
#define LLVM_LIBC_SRC_MATH_SINPIF16_H
-
#include "include/llvm-libc-macros/float16-macros.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/properties/types.h"
@@ -20,4 +19,4 @@ float16 sinpif16(float16 x);
} // namespace LIBC_NAMESPACE_DECL
-#endif //LLVM_LIBC_SRC_MATH_SINPIF16_H
+#endif // LLVM_LIBC_SRC_MATH_SINPIF16_H
diff --git a/libc/test/src/math/smoke/sinpif16_test.cpp b/libc/test/src/math/smoke/sinpif16_test.cpp
index e1356f493a1039..54eb12ae100efa 100644
--- a/libc/test/src/math/smoke/sinpif16_test.cpp
+++ b/libc/test/src/math/smoke/sinpif16_test.cpp
@@ -7,9 +7,9 @@
//
// ===----------------------------------------------------------------------==//
+#include "src/errno/libc_errno.h"
#include "src/math/sinpif16.h"
#include "test/UnitTest/FPMatcher.h"
-#include "src/errno/libc_errno.h"
#include <stdint.h>
diff --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h
index 00c298368e5a9f..43a30ad87c57ae 100644
--- a/libc/utils/MPFRWrapper/MPFRUtils.h
+++ b/libc/utils/MPFRWrapper/MPFRUtils.h
@@ -55,8 +55,7 @@ enum class Operation : int {
RoundEven,
Sin,
Sinpi,
- Sinpif16
- Sinh,
+ Sinpif16 Sinh,
Sqrt,
Tan,
Tanh,
>From 663fbd51d8992daf763772bc61b5942a5a1e8249 Mon Sep 17 00:00:00 2001
From: wldfngrs <wldfngrs at gmail.com>
Date: Thu, 3 Oct 2024 14:42:57 +0100
Subject: [PATCH 3/3] add implementation of sinpif16 function
---
libc/src/math/generic/sinpif16.cpp | 181 ++++++++++-----------
libc/test/src/math/smoke/sinpif16_test.cpp | 12 +-
libc/utils/MPFRWrapper/MPFRUtils.h | 2 +-
3 files changed, 91 insertions(+), 104 deletions(-)
diff --git a/libc/src/math/generic/sinpif16.cpp b/libc/src/math/generic/sinpif16.cpp
index dc33188d5a5d56..b878e09d542007 100644
--- a/libc/src/math/generic/sinpif16.cpp
+++ b/libc/src/math/generic/sinpif16.cpp
@@ -5,12 +5,12 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-
-#define M_PI 3.1415925f
-
#include "src/math/sinpif16.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
@@ -21,114 +21,70 @@
// HELPER_START
namespace LIBC_NAMESPACE_DECL {
-constexpr float PI_OVER_32 = M_PI / 32;
+constexpr float PI_OVER_32 = 0x1.921fb6p-4f;
// In Sollya generate 10 coeffecients for a degree-9 chebyshev polynomial
// approximating the sine function in [-pi / 32, pi / 32] with the following
// commands:
-// > prec=23;
+// > prec=24;
// > TL = chebyshevform(sin(x), 9, [-pi / 32, pi / 32]);
// > TL[0];
const float SIN_COEFF[10] = {
- 0x1.801p-27, 0x1.000078p0, -0x1.7e98p-14, -0x1.6bf4p-3, 0x1.95ccp-5,
- 0x1.1baep2, -0x1.030ap3, -0x1.3dap9, 0x1.98e4p8, 0x1.d3d8p14};
-
+ 0x1.d333p-26, 0x1.000048p0, -0x1.a5d2p-14, -0x1.628588p-3, 0x1.c1eep-5,
+ 0x1.4455p1, -0x1.317a8p3, -0x1.6bb9p8, 0x1.00ef8p9, 0x1.0edcp14
+};
// In Sollya generate 10 coefficients for a degree-9 chebyshev polynomial
// approximating the sine function in [-pi/32, pi/32] with the following
// commands:
-// > prec = 23;
+// > prec = 24;
// > TL = chebyshevform(cos(x), 9, [-pi / 32, pi / 32]);
// > TL[0];
const float COS_COEFF[10] = {
- 0x1.00001p0, -0x1.48p-17, -0x1.01259cp-1, -0x1.17fp-6, 0x1.283p0,
- 0x1.5d1p3, -0x1.6278p7, -0x1.c23p10, 0x1.1444p13, 0x1.5fcp16};
-
+ 0x1.000006p0, 0x1.e1eap-15, -0x1.0071p-1, -0x1.3b56p-4, 0x1.f3dfp-2,
+ 0x1.ccbap4, -0x1.3034p6, -0x1.f817p11, 0x1.fc59p11, 0x1.7079p17
+};
// Lookup table for sin(k * pi / 32) with k = 0, ..., 63.
// Table is generated with Sollya as follows:
// > display = hexadecimmal;
-// > prec = 23;
+// > prec = 24;
// > for k from 0 to 63 do {sin(k * pi/32);};
-const float SIN_K_PI_OVER_32[64] = {0,
- 0x1.917a6cp-4,
- 0x1.8f8b84p-3,
- 0x1.294064p-2,
- 0x1.87de2cp-2,
- 0x1.e2b5d4p-2,
- 0x1.1c73b4p-1,
- 0x1.44cf34p-1,
- 0x1.6a09e8p-1,
- 0x1.8bc808p-1,
- 0x1.a9b664p-1,
- 0x1.c38b3p-1,
- 0x1.d906bcp-1,
- 0x1.e9f414p-1,
- 0x1.f6297cp-1,
- 0x1.fd88dcp-1,
- 0x1p0,
- 0x1.fd88dcp-1,
- 0x1.f6297cp-1,
- 0x1.e9f414p-1,
- 0x1.d906bcp-1,
- 0x1.c38b3p-1,
- 0x1.a9b664p-1,
- 0x1.8bc808p-1,
- 0x1.6a09e8p-1,
- 0x1.44cf34p-1,
- 0x1.1c73b4p-1,
- 0x1.e2b5d4p-2,
- 0x1.87de2cp-2,
- 0x1.294064p-2,
- 0x1.8f8b84p-3,
- 0x1.917a6cp-4,
- 0,
- -0x1.917a6cp-4,
- -0x1.8f8b84p-3,
- -0x1.294064p-2,
- -0x1.87de2cp-2,
- -0x1.e2b5d4p-2,
- -0x1.1c73b4p-1,
- -0x1.44cf34p-1,
- -0x1.6a09e8p-1,
- -0x1.8bc808p-1,
- -0x1.a9b664p-1,
- -0x1.c38b3p-1,
- -0x1.d906bcp-1,
- -0x1.e9f414p-1,
- -0x1.f6297cp-1,
- -0x1.fd88dcp-1,
- -0x1p0,
- -0x1.fd88dcp-1,
- -0x1.f6297cp-1,
- -0x1.e9f414p-1,
- -0x1.d906bcp-1,
- -0x1.c38b3p-1,
- -0x1.a9b664p-1,
- -0x1.8bc808p-1,
- -0x1.6a09e8p-1,
- -0x1.44cf34p-1,
- -0x1.1c73b4p-1,
- -0x1.e2b5d4p-2,
- -0x1.87de2cp-2,
- -0x1.294064p-2,
- -0x1.8f8b84p-3,
- -0x1.917a6cp-4};
-
-// horner's algorithm to accurately and efficiently evaluate a degree-9
-// polynomial iteratively
-float horners(float x, const float COEFF[10]) {
- float b8 = fputil::multiply_add<float>(COEFF[9], x, COEFF[8]);
- float b7 = fputil::multiply_add<float>(b8, x, COEFF[7]);
- float b6 = fputil::multiply_add<float>(b7, x, COEFF[6]);
- float b5 = fputil::multiply_add<float>(b6, x, COEFF[5]);
- float b4 = fputil::multiply_add<float>(b5, x, COEFF[4]);
- float b3 = fputil::multiply_add<float>(b4, x, COEFF[3]);
- float b2 = fputil::multiply_add<float>(b3, x, COEFF[2]);
- float b1 = fputil::multiply_add<float>(b2, x, COEFF[1]);
- return fputil::multiply_add<float>(b1, x, COEFF[0]);
-}
-
-float range_reduction(float x, float &y) {
+const float SIN_K_PI_OVER_32[64] = {
+ 0, 0x1.917a6cp-4,
+ 0x1.8f8b84p-3, 0x1.294062p-2,
+ 0x1.87de2ap-2, 0x1.e2b5d4p-2,
+ 0x1.1c73b4p-1, 0x1.44cf32p-1,
+ 0x1.6a09e6p-1, 0x1.8bc806p-1,
+ 0x1.a9b662p-1, 0x1.c38b3p-1,
+ 0x1.d906bcp-1, 0x1.e9f416p-1,
+ 0x1.f6297cp-1, 0x1.fd88dap-1,
+ 0x1p0, 0x1.fd88dap-1,
+ 0x1.f6297cp-1, 0x1.e9f416p-1,
+ 0x1.d906bcp-1, 0x1.c38b3p-1,
+ 0x1.a9b662p-1, 0x1.8bc806p-1,
+ 0x1.6a09e6p-1, 0x1.44cf32p-1,
+ 0x1.1c73b4p-1, 0x1.e2b5d4p-2,
+ 0x1.87de2ap-2, 0x1.294062p-2,
+ 0x1.8f8b84p-3, 0x1.917a6cp-4,
+ 0, -0x1.917a6cp-4,
+ -0x1.8f8b84p-3, -0x1.294062p-2,
+ -0x1.87de2ap-2, -0x1.e2b5d4p-2,
+ -0x1.1c73b4p-1, -0x1.44cf32p-1,
+ -0x1.6a09e6p-1, -0x1.8bc806p-1,
+ -0x1.a9b662p-1, -0x1.c38b3p-1,
+ -0x1.d906bcp-1, -0x1.e9f416p-1,
+ -0x1.f6297ep-1, -0x1.fd88dap-1,
+ -0x1p0, -0x1.fd88dap-1,
+ -0x1.f6297cp-1, -0x1.e9f416p-1,
+ -0x1.d906bcp-1, -0x1.c38b3p-1,
+ -0x1.a9b662p-1, -0x1.8bc806p-1,
+ -0x1.6a09e6p-1, -0x1.44cf32p-1,
+ -0x1.1c73b4p-1, -0x1.e2b5d4p-2,
+ -0x1.87de2ap-2, -0x1.294062p-2,
+ -0x1.8f8b84p-3, -0x1.917a6cp-4
+};
+
+int32_t range_reduction(float x, float &y) {
float kf = fputil::nearest_integer(x * 32);
y = fputil::multiply_add<float>(x, 32.0, -kf);
@@ -164,7 +120,28 @@ LLVM_LIBC_FUNCTION(float16, sinpif16, (float16 x)) {
// * pi/32) and cos(y * pi/32) are computed using degree-9 chebyshev
// polynomials generated by Sollya.
- float f32 = x;
+ if (LIBC_UNLIKELY(x_abs == 0U)) {
+ // For signed zeros
+ return x;
+ }
+
+ // Numbers greater or equal to 2^10 are integers or NaN
+ if (LIBC_UNLIKELY(x_abs >= 0x6400)) {
+ // Check for NaN or infinity values
+ if (LIBC_UNLIKELY(x_abs >= 0x7c00)) {
+ // If value is equal to infinity
+ if (x_abs == 0x7c00) {
+ fputil::set_errno_if_required(EDOM);
+ fputil::raise_except_if_required(FE_INVALID);
+ }
+
+ // If value is NaN
+ return x + FPBits::quiet_nan().get_val();
+ }
+ return FPBits::zero(xbits.sign()).get_val();
+ }
+
+ float f32 = static_cast<float>(x);
float y;
int32_t k = range_reduction(f32, y);
@@ -176,11 +153,21 @@ LLVM_LIBC_FUNCTION(float16, sinpif16, (float16 x)) {
cos_y = 1;
sin_y = 0;
} else {
- cos_y = horners(y * PI_OVER_32, COS_COEFF);
- sin_y = horners(y * PI_OVER_32, SIN_COEFF);
+ cos_y = fputil::polyeval(y * PI_OVER_32,
+ COS_COEFF[0], COS_COEFF[1],
+ COS_COEFF[2], COS_COEFF[3],
+ COS_COEFF[4], COS_COEFF[5],
+ COS_COEFF[6], COS_COEFF[7],
+ COS_COEFF[8], COS_COEFF[9]);
+ sin_y = fputil::polyeval(y * PI_OVER_32,
+ SIN_COEFF[0], SIN_COEFF[1],
+ SIN_COEFF[2], SIN_COEFF[3],
+ SIN_COEFF[4], SIN_COEFF[5],
+ SIN_COEFF[6], SIN_COEFF[7],
+ SIN_COEFF[8], SIN_COEFF[9]);
}
return static_cast<float16>(fputil::multiply_add(
- sin_k, cos_y, fputil::multiply_add(sin_y, cos_k, 0)));
+ sin_k, cos_y, fputil::multiply_add(sin_y, cos_k, 0.0f)));
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/src/math/smoke/sinpif16_test.cpp b/libc/test/src/math/smoke/sinpif16_test.cpp
index 54eb12ae100efa..bf55a4854bfb3a 100644
--- a/libc/test/src/math/smoke/sinpif16_test.cpp
+++ b/libc/test/src/math/smoke/sinpif16_test.cpp
@@ -1,4 +1,4 @@
-//===-- Unittests for sinpif16 ------------------------------------===//
+//===-- Unittests for sinpif16 --------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -30,15 +30,15 @@ TEST_F(LlvmLibcSinpif16Test, SpecialNumbers) {
EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::sinpif16(inf));
EXPECT_MATH_ERRNO(EDOM);
- EXPECT_FP_EQ(aNan, LIBC_NAMESPACE::sinpif16(neg_inf));
+ EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::sinpif16(neg_inf));
EXPECT_MATH_ERRNO(EDOM);
}
TEST_F(LlvmLibcSinpif16Test, Integers) {
EXPECT_FP_EQ(-0.0, LIBC_NAMESPACE::sinpif16(-0x420));
- EXPECT_FP_EQ(-0.0, LIBC_NAMESPACE::sinpif16(-0x1p+43));
- EXPECT_FP_EQ(-0.0, LIBC_NAMESPACE::sinpif16(-0x1.4p+64));
+ EXPECT_FP_EQ(-0.0, LIBC_NAMESPACE::sinpif16(-0x1p+10));
+ EXPECT_FP_EQ(-0.0, LIBC_NAMESPACE::sinpif16(-0x1.4p+14));
EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::sinpif16(0x420));
- EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::sinpif16(0x1.cp+106));
- EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::sinpif16(0x1.cp+21));
+ EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::sinpif16(0x1.cp+15));
+ EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::sinpif16(0x1.cp+7));
}
diff --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h
index 43a30ad87c57ae..8d51fa4e477267 100644
--- a/libc/utils/MPFRWrapper/MPFRUtils.h
+++ b/libc/utils/MPFRWrapper/MPFRUtils.h
@@ -55,7 +55,7 @@ enum class Operation : int {
RoundEven,
Sin,
Sinpi,
- Sinpif16 Sinh,
+ Sinh,
Sqrt,
Tan,
Tanh,
More information about the libc-commits
mailing list