[libclc] 13a313f - [libclc] Move sinpi/cospi/tanpi to the CLC library (#133889)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Apr 1 04:03:25 PDT 2025
Author: Fraser Cormack
Date: 2025-04-01T12:03:21+01:00
New Revision: 13a313fe582a3c41fb5c50ca2325c0987c0af6d7
URL: https://github.com/llvm/llvm-project/commit/13a313fe582a3c41fb5c50ca2325c0987c0af6d7
DIFF: https://github.com/llvm/llvm-project/commit/13a313fe582a3c41fb5c50ca2325c0987c0af6d7.diff
LOG: [libclc] Move sinpi/cospi/tanpi to the CLC library (#133889)
Additionally, these builtins are now vectorized.
This also moves the native_recip and native_divide builtins as they are
used by the tanpi builtin.
Added:
libclc/clc/include/clc/math/clc_cospi.h
libclc/clc/include/clc/math/clc_native_divide.h
libclc/clc/include/clc/math/clc_native_recip.h
libclc/clc/include/clc/math/clc_sincos_piby4.h
libclc/clc/include/clc/math/clc_sincos_piby4.inc
libclc/clc/include/clc/math/clc_sinpi.h
libclc/clc/include/clc/math/clc_tanpi.h
libclc/clc/lib/generic/math/clc_cospi.cl
libclc/clc/lib/generic/math/clc_cospi.inc
libclc/clc/lib/generic/math/clc_native_divide.cl
libclc/clc/lib/generic/math/clc_native_divide.inc
libclc/clc/lib/generic/math/clc_native_recip.cl
libclc/clc/lib/generic/math/clc_native_recip.inc
libclc/clc/lib/generic/math/clc_sinpi.cl
libclc/clc/lib/generic/math/clc_sinpi.inc
libclc/clc/lib/generic/math/clc_tanpi.cl
libclc/clc/lib/generic/math/clc_tanpi.inc
Modified:
libclc/CMakeLists.txt
libclc/clc/include/clc/math/clc_sincos_helpers.inc
libclc/clc/lib/generic/SOURCES
libclc/clc/lib/generic/math/clc_sincos_helpers.cl
libclc/clc/lib/generic/math/clc_sincos_helpers.inc
libclc/clspv/lib/SOURCES
libclc/generic/lib/SOURCES
libclc/generic/lib/math/clc_tan.cl
libclc/generic/lib/math/cospi.cl
libclc/generic/lib/math/native_divide.cl
libclc/generic/lib/math/native_recip.cl
libclc/generic/lib/math/sincos_helpers.cl
libclc/generic/lib/math/sincos_helpers.h
libclc/generic/lib/math/sinpi.cl
libclc/generic/lib/math/tanpi.cl
libclc/spirv/lib/SOURCES
Removed:
libclc/generic/lib/math/clc_tanpi.cl
libclc/generic/lib/math/native_divide.inc
libclc/generic/lib/math/native_recip.inc
libclc/generic/lib/math/sincosD_piby4.h
libclc/generic/lib/math/sincospiF_piby4.h
################################################################################
diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
index efe7f5804e8fb..d4753b22ed01c 100644
--- a/libclc/CMakeLists.txt
+++ b/libclc/CMakeLists.txt
@@ -265,11 +265,13 @@ endif()
set_source_files_properties(
# CLC builtins
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_cos.cl
+ ${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_divide.cl
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_exp2.cl
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_exp.cl
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_log10.cl
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_log2.cl
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_log.cl
+ ${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_recip.cl
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_rsqrt.cl
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_sin.cl
${CMAKE_CURRENT_SOURCE_DIR}/clc/lib/generic/math/clc_native_sqrt.cl
diff --git a/libclc/clc/include/clc/math/clc_cospi.h b/libclc/clc/include/clc/math/clc_cospi.h
new file mode 100644
index 0000000000000..07565c23a2f07
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_cospi.h
@@ -0,0 +1,20 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_MATH_CLC_COSPI_H__
+#define __CLC_MATH_CLC_COSPI_H__
+
+#define __CLC_BODY <clc/math/unary_decl.inc>
+#define __CLC_FUNCTION __clc_cospi
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_COSPI_H__
diff --git a/libclc/clc/include/clc/math/clc_native_divide.h b/libclc/clc/include/clc/math/clc_native_divide.h
new file mode 100644
index 0000000000000..b48c3e5d03b36
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_native_divide.h
@@ -0,0 +1,22 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_MATH_CLC_NATIVE_DIVIDE_H__
+#define __CLC_MATH_CLC_NATIVE_DIVIDE_H__
+
+#define __FLOAT_ONLY
+#define __CLC_FUNCTION __clc_native_divide
+#define __CLC_BODY <clc/shared/binary_decl.inc>
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+#undef __FLOAT_ONLY
+
+#endif // __CLC_MATH_CLC_NATIVE_DIVIDE_H__
diff --git a/libclc/clc/include/clc/math/clc_native_recip.h b/libclc/clc/include/clc/math/clc_native_recip.h
new file mode 100644
index 0000000000000..9af36b0c7ce85
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_native_recip.h
@@ -0,0 +1,22 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_MATH_CLC_NATIVE_RECIP_H__
+#define __CLC_MATH_CLC_NATIVE_RECIP_H__
+
+#define __FLOAT_ONLY
+#define __CLC_FUNCTION __clc_native_recip
+#define __CLC_BODY <clc/shared/unary_decl.inc>
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+#undef __FLOAT_ONLY
+
+#endif // __CLC_MATH_CLC_NATIVE_RECIP_H__
diff --git a/libclc/clc/include/clc/math/clc_sincos_helpers.inc b/libclc/clc/include/clc/math/clc_sincos_helpers.inc
index c891dd91dfd2b..4daff92955cd7 100644
--- a/libclc/clc/include/clc/math/clc_sincos_helpers.inc
+++ b/libclc/clc/include/clc/math/clc_sincos_helpers.inc
@@ -10,6 +10,8 @@ _CLC_DECL _CLC_OVERLOAD __CLC_FLOATN __clc_sinf_piby4(__CLC_FLOATN x,
__CLC_FLOATN y);
_CLC_DECL _CLC_OVERLOAD __CLC_FLOATN __clc_cosf_piby4(__CLC_FLOATN x,
__CLC_FLOATN y);
+_CLC_DECL _CLC_OVERLOAD __CLC_FLOATN __clc_tanf_piby4(__CLC_FLOATN x,
+ __CLC_INTN regn);
_CLC_DECL _CLC_OVERLOAD __CLC_INTN __clc_argReductionS(private __CLC_FLOATN *r,
private __CLC_FLOATN *rr,
diff --git a/libclc/clc/include/clc/math/clc_sincos_piby4.h b/libclc/clc/include/clc/math/clc_sincos_piby4.h
new file mode 100644
index 0000000000000..50608ae24e947
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_sincos_piby4.h
@@ -0,0 +1,14 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/math/clc_fma.h>
+#include <clc/math/clc_mad.h>
+#include <clc/math/math.h>
+
+#define __CLC_BODY <clc/math/clc_sincos_piby4.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/clc/include/clc/math/clc_sincos_piby4.inc b/libclc/clc/include/clc/math/clc_sincos_piby4.inc
new file mode 100644
index 0000000000000..91ec518b70e97
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_sincos_piby4.inc
@@ -0,0 +1,174 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if __CLC_FPSIZE == 32
+
+// Evaluate single precisions in and cos of value in interval [-pi/4, pi/4]
+_CLC_INLINE _CLC_OVERLOAD void
+__clc_sincos_piby4(__CLC_GENTYPE x, private __CLC_GENTYPE *sinval,
+ private __CLC_GENTYPE *cosval) {
+ // Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ...
+ // = x * (1 - x^2/3! + x^4/5! - x^6/7! ...
+ // = x * f(w)
+ // where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ...
+ // We use a minimax approximation of (f(w) - 1) / w
+ // because this produces an expansion in even powers of x.
+
+ // Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ...
+ // = f(w)
+ // where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ...
+ // We use a minimax approximation of (f(w) - 1 + w/2) / (w*w)
+ // because this produces an expansion in even powers of x.
+
+ const __CLC_GENTYPE sc1 = -0.166666666638608441788607926e0F;
+ const __CLC_GENTYPE sc2 = 0.833333187633086262120839299e-2F;
+ const __CLC_GENTYPE sc3 = -0.198400874359527693921333720e-3F;
+ const __CLC_GENTYPE sc4 = 0.272500015145584081596826911e-5F;
+
+ const __CLC_GENTYPE cc1 = 0.41666666664325175238031e-1F;
+ const __CLC_GENTYPE cc2 = -0.13888887673175665567647e-2F;
+ const __CLC_GENTYPE cc3 = 0.24800600878112441958053e-4F;
+ const __CLC_GENTYPE cc4 = -0.27301013343179832472841e-6F;
+
+ __CLC_GENTYPE x2 = x * x;
+
+ *sinval = __clc_mad(
+ x * x2, __clc_mad(x2, __clc_mad(x2, __clc_mad(x2, sc4, sc3), sc2), sc1),
+ x);
+ *cosval = __clc_mad(
+ x2 * x2, __clc_mad(x2, __clc_mad(x2, __clc_mad(x2, cc4, cc3), cc2), cc1),
+ __clc_mad(x2, -0.5f, 1.0f));
+}
+
+#elif __CLC_FPSIZE == 64
+
+_CLC_INLINE _CLC_OVERLOAD void
+__clc_sincos_piby4(__CLC_GENTYPE x, __CLC_GENTYPE xx,
+ private __CLC_GENTYPE *sinval,
+ private __CLC_GENTYPE *cosval) {
+ // Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ...
+ // = x * (1 - x^2/3! + x^4/5! - x^6/7! ...
+ // = x * f(w)
+ // where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ...
+ // We use a minimax approximation of (f(w) - 1) / w
+ // because this produces an expansion in even powers of x.
+ // If xx (the tail of x) is non-zero, we add a correction
+ // term g(x,xx) = (1-x*x/2)*xx to the result, where g(x,xx)
+ // is an approximation to cos(x)*sin(xx) valid because
+ // xx is tiny relative to x.
+
+ // Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ...
+ // = f(w)
+ // where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ...
+ // We use a minimax approximation of (f(w) - 1 + w/2) / (w*w)
+ // because this produces an expansion in even powers of x.
+ // If xx (the tail of x) is non-zero, we subtract a correction
+ // term g(x,xx) = x*xx to the result, where g(x,xx)
+ // is an approximation to sin(x)*sin(xx) valid because
+ // xx is tiny relative to x.
+
+ const __CLC_GENTYPE sc1 = -0.166666666666666646259241729;
+ const __CLC_GENTYPE sc2 = 0.833333333333095043065222816e-2;
+ const __CLC_GENTYPE sc3 = -0.19841269836761125688538679e-3;
+ const __CLC_GENTYPE sc4 = 0.275573161037288022676895908448e-5;
+ const __CLC_GENTYPE sc5 = -0.25051132068021699772257377197e-7;
+ const __CLC_GENTYPE sc6 = 0.159181443044859136852668200e-9;
+
+ const __CLC_GENTYPE cc1 = 0.41666666666666665390037e-1;
+ const __CLC_GENTYPE cc2 = -0.13888888888887398280412e-2;
+ const __CLC_GENTYPE cc3 = 0.248015872987670414957399e-4;
+ const __CLC_GENTYPE cc4 = -0.275573172723441909470836e-6;
+ const __CLC_GENTYPE cc5 = 0.208761463822329611076335e-8;
+ const __CLC_GENTYPE cc6 = -0.113826398067944859590880e-10;
+
+ __CLC_GENTYPE x2 = x * x;
+ __CLC_GENTYPE x3 = x2 * x;
+ __CLC_GENTYPE r = (__CLC_GENTYPE)0.5 * x2;
+ __CLC_GENTYPE t = (__CLC_GENTYPE)1.0 - r;
+
+ __CLC_GENTYPE sp = __clc_fma(
+ __clc_fma(__clc_fma(__clc_fma(sc6, x2, sc5), x2, sc4), x2, sc3), x2, sc2);
+
+ __CLC_GENTYPE cp =
+ t +
+ __clc_fma(__clc_fma(__clc_fma(__clc_fma(__clc_fma(__clc_fma(cc6, x2, cc5),
+ x2, cc4),
+ x2, cc3),
+ x2, cc2),
+ x2, cc1),
+ x2 * x2, __clc_fma(x, xx, (1.0 - t) - r));
+
+ *sinval =
+ x - __clc_fma(-x3, sc1, __clc_fma(__clc_fma(-x3, sp, 0.5 * xx), x2, -xx));
+ *cosval = cp;
+}
+
+_CLC_INLINE _CLC_OVERLOAD void __clc_tan_piby4(__CLC_GENTYPE x,
+ __CLC_GENTYPE xx,
+ private __CLC_GENTYPE *leadval,
+ private __CLC_GENTYPE *tailval) {
+ // 0x3fe921fb54442d18
+ const __CLC_GENTYPE piby4_lead = 7.85398163397448278999e-01;
+ // 0x3c81a62633145c06
+ const __CLC_GENTYPE piby4_tail = 3.06161699786838240164e-17;
+
+ // In order to maintain relative precision transform using the identity:
+ // tan(pi/4-x) = (1-tan(x))/(1+tan(x)) for arguments close to pi/4.
+ // Similarly use tan(x-pi/4) = (tan(x)-1)/(tan(x)+1) close to -pi/4.
+
+ __CLC_LONGN ca = x > 0.68;
+ __CLC_LONGN cb = x < -0.68;
+ __CLC_GENTYPE transform = ca ? 1.0 : 0.0;
+ transform = cb ? -1.0 : transform;
+
+ __CLC_GENTYPE tx = __clc_fma(-transform, x, piby4_lead) +
+ __clc_fma(-transform, xx, piby4_tail);
+ __CLC_LONGN c = ca | cb;
+ x = c ? tx : x;
+ xx = c ? 0.0 : xx;
+
+ // Core Remez [2,3] approximation to tan(x+xx) on the interval [0,0.68].
+ __CLC_GENTYPE t1 = x;
+ __CLC_GENTYPE r = __clc_fma(2.0, x * xx, x * x);
+
+ __CLC_GENTYPE a = __clc_fma(r,
+ __clc_fma(r, 0.224044448537022097264602535574e-3,
+ -0.229345080057565662883358588111e-1),
+ 0.372379159759792203640806338901e0);
+
+ __CLC_GENTYPE b =
+ __clc_fma(r,
+ __clc_fma(r,
+ __clc_fma(r, -0.232371494088563558304549252913e-3,
+ 0.260656620398645407524064091208e-1),
+ -0.515658515729031149329237816945e0),
+ 0.111713747927937668539901657944e1);
+
+ __CLC_GENTYPE t2 = __clc_fma(MATH_DIVIDE(a, b), x * r, xx);
+
+ __CLC_GENTYPE tp = t1 + t2;
+
+ // Compute -1.0/(t1 + t2) accurately
+ __CLC_GENTYPE z1 =
+ __CLC_AS_GENTYPE(__CLC_AS_ULONGN(tp) & 0xffffffff00000000L);
+ __CLC_GENTYPE z2 = t2 - (z1 - t1);
+ __CLC_GENTYPE trec = -MATH_RECIP(tp);
+ __CLC_GENTYPE trec_top =
+ __CLC_AS_GENTYPE(__CLC_AS_ULONGN(trec) & 0xffffffff00000000L);
+
+ __CLC_GENTYPE tpr = __clc_fma(
+ __clc_fma(trec_top, z2, __clc_fma(trec_top, z1, 1.0)), trec, trec_top);
+
+ __CLC_GENTYPE tpt = transform * (1.0 - MATH_DIVIDE(2.0 * tp, 1.0 + tp));
+ __CLC_GENTYPE tptr = transform * (MATH_DIVIDE(2.0 * tp, tp - 1.0) - 1.0);
+
+ *leadval = c ? tpt : tp;
+ *tailval = c ? tptr : tpr;
+}
+
+#endif
diff --git a/libclc/clc/include/clc/math/clc_sinpi.h b/libclc/clc/include/clc/math/clc_sinpi.h
new file mode 100644
index 0000000000000..46fec465ceb03
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_sinpi.h
@@ -0,0 +1,20 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_MATH_CLC_SINPI_H__
+#define __CLC_MATH_CLC_SINPI_H__
+
+#define __CLC_BODY <clc/math/unary_decl.inc>
+#define __CLC_FUNCTION __clc_sinpi
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_SINPI_H__
diff --git a/libclc/clc/include/clc/math/clc_tanpi.h b/libclc/clc/include/clc/math/clc_tanpi.h
new file mode 100644
index 0000000000000..0b8efce27dee8
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_tanpi.h
@@ -0,0 +1,20 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_MATH_CLC_TANPI_H__
+#define __CLC_MATH_CLC_TANPI_H__
+
+#define __CLC_BODY <clc/math/unary_decl.inc>
+#define __CLC_FUNCTION __clc_tanpi
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_TANPI_H__
diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES
index c31963c59e950..474b11d745a44 100644
--- a/libclc/clc/lib/generic/SOURCES
+++ b/libclc/clc/lib/generic/SOURCES
@@ -30,6 +30,7 @@ math/clc_atanh.cl
math/clc_atanpi.cl
math/clc_ceil.cl
math/clc_copysign.cl
+math/clc_cospi.cl
math/clc_ep_log.cl
math/clc_fabs.cl
math/clc_fma.cl
@@ -46,12 +47,14 @@ math/clc_mad.cl
math/clc_modf.cl
math/clc_nan.cl
math/clc_native_cos.cl
+math/clc_native_divide.cl
math/clc_native_exp.cl
math/clc_native_exp2.cl
math/clc_native_log.cl
math/clc_native_log10.cl
math/clc_native_log2.cl
math/clc_native_rsqrt.cl
+math/clc_native_recip.cl
math/clc_native_sin.cl
math/clc_native_sqrt.cl
math/clc_nextafter.cl
@@ -65,9 +68,11 @@ math/clc_rootn.cl
math/clc_round.cl
math/clc_rsqrt.cl
math/clc_sincos_helpers.cl
+math/clc_sinpi.cl
math/clc_sqrt.cl
math/clc_sw_fma.cl
math/clc_tables.cl
+math/clc_tanpi.cl
math/clc_trunc.cl
relational/clc_all.cl
relational/clc_any.cl
diff --git a/libclc/clc/lib/generic/math/clc_cospi.cl b/libclc/clc/lib/generic/math/clc_cospi.cl
new file mode 100644
index 0000000000000..07e1b49cc9e02
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_cospi.cl
@@ -0,0 +1,18 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/clc_convert.h>
+#include <clc/float/definitions.h>
+#include <clc/internal/clc.h>
+#include <clc/math/clc_fabs.h>
+#include <clc/math/clc_sincos_helpers.h>
+#include <clc/math/clc_sincos_piby4.h>
+#include <clc/math/math.h>
+
+#define __CLC_BODY <clc_cospi.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/clc/lib/generic/math/clc_cospi.inc b/libclc/clc/lib/generic/math/clc_cospi.inc
new file mode 100644
index 0000000000000..b037f82872dde
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_cospi.inc
@@ -0,0 +1,116 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if __CLC_FPSIZE == 32
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cospi(__CLC_GENTYPE x) {
+ __CLC_GENTYPE absx = __clc_fabs(x);
+ __CLC_INTN ix = __CLC_AS_INTN(absx);
+ __CLC_INTN iax = __CLC_CONVERT_INTN(absx);
+ __CLC_GENTYPE r = absx - __CLC_CONVERT_GENTYPE(iax);
+ __CLC_INTN xodd = (iax & 0x1) != 0 ? (__CLC_INTN)0x80000000 : (__CLC_INTN)0;
+
+ // Initialize with return for +-Inf and NaN
+ __CLC_INTN ir = QNANBITPATT_SP32;
+
+ // 2^24 <= |x| < Inf, the result is always even integer
+ ir = ix < PINFBITPATT_SP32 ? 0x3f800000 : ir;
+
+ // 2^23 <= |x| < 2^24, the result is always integer
+ ir = ix < 0x4b800000 ? xodd | 0x3f800000 : ir;
+
+ // 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval
+
+ // r < 1.0
+ __CLC_GENTYPE a = 1.0f - r;
+ __CLC_INTN e = 1;
+ __CLC_INTN s = xodd ^ (__CLC_INTN)0x80000000;
+
+ // r <= 0.75
+ __CLC_INTN c = r <= 0.75f;
+ a = c ? r - 0.5f : a;
+ e = c ? 0 : e;
+
+ // r < 0.5
+ c = r < 0.5f;
+ a = c ? 0.5f - r : a;
+ s = c ? xodd : s;
+
+ // r <= 0.25
+ c = r <= 0.25f;
+ a = c ? r : a;
+ e = c ? 1 : e;
+
+ __CLC_GENTYPE sinval, cosval;
+ __clc_sincos_piby4(a * M_PI_F, &sinval, &cosval);
+ __CLC_INTN jr = s ^ __CLC_AS_INTN(e != 0 ? cosval : sinval);
+
+ ir = ix < 0x4b000000 ? jr : ir;
+
+ return __CLC_AS_GENTYPE(ir);
+}
+
+#elif __CLC_FPSIZE == 64
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cospi(__CLC_GENTYPE x) {
+ __CLC_GENTYPE absx = __clc_fabs(x);
+ __CLC_LONGN ix = __CLC_AS_LONGN(absx);
+ __CLC_LONGN iax = __CLC_CONVERT_LONGN(absx);
+ __CLC_GENTYPE r = absx - __CLC_CONVERT_GENTYPE(iax);
+ __CLC_LONGN xodd =
+ (iax & 0x1L) != 0 ? (__CLC_LONGN)0x8000000000000000L : (__CLC_LONGN)0L;
+
+ // Initialize with return for +-Inf and NaN
+ __CLC_LONGN ir = QNANBITPATT_DP64;
+
+ // 2^53 <= |x| < Inf, the result is always even integer
+ ir = ix < PINFBITPATT_DP64 ? 0x3ff0000000000000L : ir;
+
+ // 2^52 <= |x| < 2^53, the result is always integer
+ ir = absx < 0x1.0p+53 ? xodd | 0x3ff0000000000000L : ir;
+
+ // 0x1.0p-7 <= |x| < 2^52, result depends on which 0.25 interval
+
+ // r < 1.0
+ __CLC_GENTYPE a = 1.0 - r;
+ __CLC_LONGN e = 1;
+ __CLC_LONGN s = xodd ^ (__CLC_LONGN)0x8000000000000000L;
+
+ // r <= 0.75
+ __CLC_LONGN c = r <= 0.75;
+ __CLC_GENTYPE t = r - 0.5;
+ a = c ? t : a;
+ e = c ? 0 : e;
+
+ // r < 0.5
+ c = r < 0.5;
+ t = 0.5 - r;
+ a = c ? t : a;
+ s = c ? xodd : s;
+
+ // r <= 0.25
+ c = r <= 0.25;
+ a = c ? r : a;
+ e = c ? 1 : e;
+
+ __CLC_GENTYPE sinval, cosval;
+ __clc_sincos_piby4(a * M_PI, 0.0, &sinval, &cosval);
+ __CLC_LONGN jr = s ^ __CLC_AS_LONGN(e != 0 ? cosval : sinval);
+
+ ir = absx < 0x1.0p+52 ? jr : ir;
+
+ return __CLC_AS_GENTYPE(ir);
+}
+
+#elif __CLC_FPSIZE == 16
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cospi(__CLC_GENTYPE x) {
+ return __CLC_CONVERT_GENTYPE(__clc_cospi(__CLC_CONVERT_FLOATN(x)));
+}
+
+#endif
diff --git a/libclc/clc/lib/generic/math/clc_native_divide.cl b/libclc/clc/lib/generic/math/clc_native_divide.cl
new file mode 100644
index 0000000000000..005089b1ba15d
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_native_divide.cl
@@ -0,0 +1,14 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/internal/clc.h>
+
+#define __FLOAT_ONLY
+#define __CLC_BODY <clc_native_divide.inc>
+
+#include <clc/math/gentype.inc>
diff --git a/libclc/clc/lib/generic/math/clc_native_divide.inc b/libclc/clc/lib/generic/math/clc_native_divide.inc
new file mode 100644
index 0000000000000..fdf1794812c5a
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_native_divide.inc
@@ -0,0 +1,12 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_native_divide(__CLC_GENTYPE x,
+ __CLC_GENTYPE y) {
+ return x / y;
+}
diff --git a/libclc/generic/lib/math/native_divide.inc b/libclc/clc/lib/generic/math/clc_native_recip.cl
similarity index 74%
rename from libclc/generic/lib/math/native_divide.inc
rename to libclc/clc/lib/generic/math/clc_native_recip.cl
index b0c83d503b965..4377f10b1543f 100644
--- a/libclc/generic/lib/math/native_divide.inc
+++ b/libclc/clc/lib/generic/math/clc_native_recip.cl
@@ -6,6 +6,9 @@
//
//===----------------------------------------------------------------------===//
-_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_divide(__CLC_GENTYPE x, __CLC_GENTYPE y) {
- return x / y;
-}
+#include <clc/internal/clc.h>
+
+#define __FLOAT_ONLY
+#define __CLC_BODY <clc_native_recip.inc>
+
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/native_recip.inc b/libclc/clc/lib/generic/math/clc_native_recip.inc
similarity index 83%
rename from libclc/generic/lib/math/native_recip.inc
rename to libclc/clc/lib/generic/math/clc_native_recip.inc
index d6652fc2d2c69..57eb35a9522f8 100644
--- a/libclc/generic/lib/math/native_recip.inc
+++ b/libclc/clc/lib/generic/math/clc_native_recip.inc
@@ -6,6 +6,6 @@
//
//===----------------------------------------------------------------------===//
-_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_recip(__CLC_GENTYPE val) {
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_native_recip(__CLC_GENTYPE val) {
return 1.0f / val;
}
diff --git a/libclc/clc/lib/generic/math/clc_sincos_helpers.cl b/libclc/clc/lib/generic/math/clc_sincos_helpers.cl
index d8a7b10d8e868..24676d3c7711c 100644
--- a/libclc/clc/lib/generic/math/clc_sincos_helpers.cl
+++ b/libclc/clc/lib/generic/math/clc_sincos_helpers.cl
@@ -12,6 +12,7 @@
#include <clc/internal/clc.h>
#include <clc/math/clc_fma.h>
#include <clc/math/clc_mad.h>
+#include <clc/math/clc_native_divide.h>
#include <clc/math/clc_trunc.h>
#include <clc/math/math.h>
diff --git a/libclc/clc/lib/generic/math/clc_sincos_helpers.inc b/libclc/clc/lib/generic/math/clc_sincos_helpers.inc
index d9d2b81226b72..516a40c4672a9 100644
--- a/libclc/clc/lib/generic/math/clc_sincos_helpers.inc
+++ b/libclc/clc/lib/generic/math/clc_sincos_helpers.inc
@@ -74,6 +74,25 @@ _CLC_DEF _CLC_OVERLOAD __CLC_FLOATN __clc_cosf_piby4(__CLC_FLOATN x,
return ret;
}
+_CLC_DECL _CLC_OVERLOAD __CLC_FLOATN __clc_tanf_piby4(__CLC_FLOATN x,
+ __CLC_INTN regn) {
+ // Core Remez [1,2] approximation to tan(x) on the interval [0,pi/4].
+ __CLC_FLOATN r = x * x;
+
+ __CLC_FLOATN a =
+ __clc_mad(r, -0.0172032480471481694693109f, 0.385296071263995406715129f);
+
+ __CLC_FLOATN b = __clc_mad(
+ r,
+ __clc_mad(r, 0.01844239256901656082986661f, -0.51396505478854532132342f),
+ 1.15588821434688393452299f);
+
+ __CLC_FLOATN t = __clc_mad(x * r, __clc_native_divide(a, b), x);
+ __CLC_FLOATN tr = -MATH_RECIP(t);
+
+ return regn & 1 ? tr : t;
+}
+
_CLC_DEF _CLC_OVERLOAD void __clc_fullMulS(private __CLC_FLOATN *hi,
private __CLC_FLOATN *lo,
__CLC_FLOATN a, __CLC_FLOATN b,
diff --git a/libclc/clc/lib/generic/math/clc_sinpi.cl b/libclc/clc/lib/generic/math/clc_sinpi.cl
new file mode 100644
index 0000000000000..6cff247707845
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_sinpi.cl
@@ -0,0 +1,18 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/clc_convert.h>
+#include <clc/float/definitions.h>
+#include <clc/internal/clc.h>
+#include <clc/math/clc_fabs.h>
+#include <clc/math/clc_sincos_helpers.h>
+#include <clc/math/clc_sincos_piby4.h>
+#include <clc/math/math.h>
+
+#define __CLC_BODY <clc_sinpi.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/clc/lib/generic/math/clc_sinpi.inc b/libclc/clc/lib/generic/math/clc_sinpi.inc
new file mode 100644
index 0000000000000..264609aeaca45
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_sinpi.inc
@@ -0,0 +1,114 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if __CLC_FPSIZE == 32
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sinpi(__CLC_GENTYPE x) {
+ __CLC_INTN ix = __CLC_AS_INTN(x);
+ __CLC_INTN xsgn = ix & (__CLC_INTN)0x80000000;
+ ix ^= xsgn;
+ __CLC_GENTYPE absx = __clc_fabs(x);
+ __CLC_INTN iax = __CLC_CONVERT_INTN(absx);
+ __CLC_GENTYPE r = absx - __CLC_CONVERT_GENTYPE(iax);
+ __CLC_INTN xodd =
+ xsgn ^ ((iax & 0x1) != 0 ? (__CLC_INTN)0x80000000 : (__CLC_INTN)0);
+
+ // Initialize with return for +-Inf and NaN
+ __CLC_INTN ir = QNANBITPATT_SP32;
+
+ // 2^23 <= |x| < Inf, the result is always integer
+ ir = ix < PINFBITPATT_SP32 ? xsgn : ir;
+
+ // 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval
+
+ // r < 1.0
+ __CLC_GENTYPE a = 1.0f - r;
+ __CLC_INTN e = 0;
+
+ // r <= 0.75
+ __CLC_INTN c = r <= 0.75f;
+ a = c ? r - 0.5f : a;
+ e = c ? 1 : e;
+
+ // r < 0.5
+ c = r < 0.5f;
+ a = c ? 0.5f - r : a;
+
+ // 0 < r <= 0.25
+ c = r <= 0.25f;
+ a = c ? r : a;
+ e = c ? 0 : e;
+
+ __CLC_GENTYPE sinval, cosval;
+ __clc_sincos_piby4(a * M_PI_F, &sinval, &cosval);
+ __CLC_INTN jr = xodd ^ __CLC_AS_INTN(e != 0 ? cosval : sinval);
+
+ ir = ix < 0x4b000000 ? jr : ir;
+
+ return __CLC_AS_GENTYPE(ir);
+}
+
+#elif __CLC_FPSIZE == 64
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sinpi(__CLC_GENTYPE x) {
+ __CLC_LONGN ix = __CLC_AS_LONGN(x);
+ __CLC_LONGN xsgn = ix & (__CLC_LONGN)0x8000000000000000L;
+ ix ^= xsgn;
+ __CLC_GENTYPE absx = __clc_fabs(x);
+ __CLC_LONGN iax = __CLC_CONVERT_LONGN(absx);
+ __CLC_GENTYPE r = absx - __CLC_CONVERT_GENTYPE(iax);
+ __CLC_LONGN xodd =
+ xsgn ^
+ ((iax & 0x1L) != 0 ? (__CLC_LONGN)0x8000000000000000L : (__CLC_LONGN)0L);
+
+ // Initialize with return for +-Inf and NaN
+ __CLC_LONGN ir = QNANBITPATT_DP64;
+
+ // 2^23 <= |x| < Inf, the result is always integer
+ ir = ix < PINFBITPATT_DP64 ? xsgn : ir;
+
+ // 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval
+
+ // r < 1.0
+ __CLC_GENTYPE a = 1.0 - r;
+ __CLC_LONGN e = 0;
+
+ // r <= 0.75
+ __CLC_LONGN c = r <= 0.75;
+ __CLC_GENTYPE t = r - 0.5;
+ a = c ? t : a;
+ e = c ? 1 : e;
+
+ // r < 0.5
+ c = r < 0.5;
+ t = 0.5 - r;
+ a = c ? t : a;
+
+ // r <= 0.25
+ c = r <= 0.25;
+ a = c ? r : a;
+ e = c ? 0 : e;
+
+ __CLC_GENTYPE api = a * M_PI;
+
+ __CLC_GENTYPE sinval, cosval;
+ __clc_sincos_piby4(api, 0.0, &sinval, &cosval);
+ __CLC_LONGN jr = xodd ^ __CLC_AS_LONGN(e != 0 ? cosval : sinval);
+
+ ir = absx < 0x1.0p+52 ? jr : ir;
+
+ return __CLC_AS_GENTYPE(ir);
+}
+
+#elif __CLC_FPSIZE == 16
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sinpi(__CLC_GENTYPE x) {
+ return __CLC_CONVERT_GENTYPE(__clc_sinpi(__CLC_CONVERT_FLOATN(x)));
+}
+
+#endif
diff --git a/libclc/clc/lib/generic/math/clc_tanpi.cl b/libclc/clc/lib/generic/math/clc_tanpi.cl
new file mode 100644
index 0000000000000..f1265892d107b
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_tanpi.cl
@@ -0,0 +1,19 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/clc_convert.h>
+#include <clc/float/definitions.h>
+#include <clc/internal/clc.h>
+#include <clc/math/clc_fabs.h>
+#include <clc/math/clc_native_recip.h>
+#include <clc/math/clc_sincos_helpers.h>
+#include <clc/math/clc_sincos_piby4.h>
+#include <clc/math/math.h>
+
+#define __CLC_BODY <clc_tanpi.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/clc/lib/generic/math/clc_tanpi.inc b/libclc/clc/lib/generic/math/clc_tanpi.inc
new file mode 100644
index 0000000000000..3a2f5dcf7b1ee
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_tanpi.inc
@@ -0,0 +1,132 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if __CLC_FPSIZE == 32
+
+_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __clc_tanpi(__CLC_GENTYPE x) {
+ __CLC_INTN ix = __CLC_AS_INTN(x);
+ __CLC_INTN xsgn = ix & (__CLC_INTN)SIGNBIT_SP32;
+ __CLC_INTN xnsgn = xsgn ^ (__CLC_INTN)SIGNBIT_SP32;
+ ix ^= xsgn;
+ __CLC_GENTYPE absx = __clc_fabs(x);
+ __CLC_INTN iax = __CLC_CONVERT_INTN(absx);
+ __CLC_GENTYPE r = absx - __CLC_CONVERT_GENTYPE(iax);
+ __CLC_INTN xodd = xsgn ^ __CLC_AS_INTN((iax & 0x1) != 0 ? SIGNBIT_SP32 : 0);
+
+ // Initialize with return for +-Inf and NaN
+ __CLC_INTN ir = QNANBITPATT_SP32;
+
+ // 2^24 <= |x| < Inf, the result is always even integer
+ ir = ix < PINFBITPATT_SP32 ? xsgn : ir;
+
+ // 2^23 <= |x| < 2^24, the result is always integer
+ ir = ix < 0x4b800000 ? xodd : ir;
+
+ // 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval
+
+ // r < 1.0
+ __CLC_GENTYPE a = 1.0f - r;
+ __CLC_INTN e = 0;
+ __CLC_INTN s = xnsgn;
+
+ // r <= 0.75
+ __CLC_INTN c = r <= 0.75f;
+ a = c ? r - 0.5f : a;
+ e = c ? 1 : e;
+ s = c ? xsgn : s;
+
+ // r < 0.5
+ c = r < 0.5f;
+ a = c ? 0.5f - r : a;
+ s = c ? xnsgn : s;
+
+ // 0 < r <= 0.25
+ c = r <= 0.25f;
+ a = c ? r : a;
+ e = c ? 0 : e;
+ s = c ? xsgn : s;
+
+ __CLC_GENTYPE t = __clc_tanf_piby4(a * M_PI_F, 0);
+ __CLC_GENTYPE tr = -__clc_native_recip(t);
+ __CLC_INTN jr = s ^ __CLC_AS_INTN(e != 0 ? tr : t);
+
+ jr = r == 0.5f ? xodd | 0x7f800000 : jr;
+
+ ir = ix < 0x4b000000 ? jr : ir;
+
+ return __CLC_AS_GENTYPE(ir);
+}
+
+#elif __CLC_FPSIZE == 64
+
+_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __clc_tanpi(__CLC_GENTYPE x) {
+ __CLC_LONGN ix = __CLC_AS_LONGN(x);
+ __CLC_LONGN xsgn = ix & (__CLC_LONGN)0x8000000000000000L;
+ __CLC_LONGN xnsgn = xsgn ^ (__CLC_LONGN)0x8000000000000000L;
+ ix ^= xsgn;
+ __CLC_GENTYPE absx = __clc_fabs(x);
+ __CLC_LONGN iax = __CLC_CONVERT_LONGN(absx);
+ __CLC_GENTYPE r = absx - __CLC_CONVERT_GENTYPE(iax);
+ __CLC_LONGN xodd =
+ xsgn ^ __CLC_AS_LONGN((iax & 0x1) != 0 ? 0x8000000000000000L : 0L);
+
+ // Initialize with return for +-Inf and NaN
+ __CLC_LONGN ir = QNANBITPATT_DP64;
+
+ // 2^53 <= |x| < Inf, the result is always even integer
+ ir = ix < PINFBITPATT_DP64 ? xsgn : ir;
+
+ // 2^52 <= |x| < 2^53, the result is always integer
+ ir = ix < 0x4340000000000000L ? xodd : ir;
+
+ // 0x1.0p-14 <= |x| < 2^53, result depends on which 0.25 interval
+
+ // r < 1.0
+ __CLC_GENTYPE a = 1.0 - r;
+ __CLC_LONGN e = 0;
+ __CLC_LONGN s = xnsgn;
+
+ // r <= 0.75
+ __CLC_LONGN c = r <= 0.75;
+ __CLC_GENTYPE t = r - 0.5;
+ a = c ? t : a;
+ e = c ? 1 : e;
+ s = c ? xsgn : s;
+
+ // r < 0.5
+ c = r < 0.5;
+ t = 0.5 - r;
+ a = c ? t : a;
+ s = c ? xnsgn : s;
+
+ // r <= 0.25
+ c = r <= 0.25;
+ a = c ? r : a;
+ e = c ? 0 : e;
+ s = c ? xsgn : s;
+
+ __CLC_GENTYPE api = a * M_PI;
+ __CLC_GENTYPE lo, hi;
+ __clc_tan_piby4(api, 0.0, &lo, &hi);
+ __CLC_LONGN jr = s ^ __CLC_AS_LONGN(e != 0 ? hi : lo);
+
+ __CLC_LONGN si = xodd | 0x7ff0000000000000L;
+ jr = r == 0.5 ? si : jr;
+
+ ir = ix < 0x4330000000000000L ? jr : ir;
+
+ return __CLC_AS_GENTYPE(ir);
+}
+
+#elif __CLC_FPSIZE == 16
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_tanpi(__CLC_GENTYPE x) {
+ return __CLC_CONVERT_GENTYPE(__clc_tanpi(__CLC_CONVERT_FLOATN(x)));
+}
+
+#endif
diff --git a/libclc/clspv/lib/SOURCES b/libclc/clspv/lib/SOURCES
index 1c86fb0fbc8e8..0ef09d275243b 100644
--- a/libclc/clspv/lib/SOURCES
+++ b/libclc/clspv/lib/SOURCES
@@ -18,7 +18,6 @@ subnormal_config.cl
../../generic/lib/math/cbrt.cl
../../generic/lib/math/clc_exp10.cl
../../generic/lib/math/clc_tan.cl
-../../generic/lib/math/clc_tanpi.cl
../../generic/lib/math/cos.cl
../../generic/lib/math/cosh.cl
../../generic/lib/math/cospi.cl
diff --git a/libclc/generic/lib/SOURCES b/libclc/generic/lib/SOURCES
index a93444af0c954..9b5bbc5d9b53c 100644
--- a/libclc/generic/lib/SOURCES
+++ b/libclc/generic/lib/SOURCES
@@ -174,7 +174,6 @@ math/sqrt.cl
math/clc_tan.cl
math/tan.cl
math/tanh.cl
-math/clc_tanpi.cl
math/tanpi.cl
math/tgamma.cl
math/trunc.cl
diff --git a/libclc/generic/lib/math/clc_tan.cl b/libclc/generic/lib/math/clc_tan.cl
index eb02879339307..7e28e9ffed3b6 100644
--- a/libclc/generic/lib/math/clc_tan.cl
+++ b/libclc/generic/lib/math/clc_tan.cl
@@ -35,7 +35,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_tan(float x) {
_CLC_UNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_tan, float);
#ifdef cl_khr_fp64
-#include "sincosD_piby4.h"
+#include <clc/math/clc_sincos_piby4.h>
_CLC_DEF _CLC_OVERLOAD double __clc_tan(double x) {
double y = __clc_fabs(x);
@@ -48,9 +48,10 @@ _CLC_DEF _CLC_OVERLOAD double __clc_tan(double x) {
else
__clc_remainder_piby2_large(y, &r, &rr, ®n);
- double2 tt = __clc_tan_piby4(r, rr);
+ double lead, tail;
+ __clc_tan_piby4(r, rr, &lead, &tail);
- int2 t = as_int2(regn & 1 ? tt.y : tt.x);
+ int2 t = as_int2(regn & 1 ? tail : lead);
t.hi ^= (x < 0.0) << 31;
return __clc_isnan(x) || __clc_isinf(x) ? as_double(QNANBITPATT_DP64)
diff --git a/libclc/generic/lib/math/clc_tanpi.cl b/libclc/generic/lib/math/clc_tanpi.cl
deleted file mode 100644
index 533db5e4d1877..0000000000000
--- a/libclc/generic/lib/math/clc_tanpi.cl
+++ /dev/null
@@ -1,132 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "sincos_helpers.h"
-#include <clc/clc.h>
-#include <clc/clcmacro.h>
-#include <clc/math/math.h>
-#include <clc/math/tables.h>
-
-_CLC_DEF _CLC_OVERLOAD float __clc_tanpi(float x)
-{
- int ix = as_int(x);
- int xsgn = ix & 0x80000000;
- int xnsgn = xsgn ^ 0x80000000;
- ix ^= xsgn;
- float ax = as_float(ix);
- int iax = (int)ax;
- float r = ax - iax;
- int xodd = xsgn ^ (iax & 0x1 ? 0x80000000 : 0);
-
- // Initialize with return for +-Inf and NaN
- int ir = 0x7fc00000;
-
- // 2^24 <= |x| < Inf, the result is always even integer
- ir = ix < 0x7f800000 ? xsgn : ir;
-
- // 2^23 <= |x| < 2^24, the result is always integer
- ir = ix < 0x4b800000 ? xodd : ir;
-
- // 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval
-
- // r < 1.0
- float a = 1.0f - r;
- int e = 0;
- int s = xnsgn;
-
- // r <= 0.75
- int c = r <= 0.75f;
- a = c ? r - 0.5f : a;
- e = c ? 1 : e;
- s = c ? xsgn : s;
-
- // r < 0.5
- c = r < 0.5f;
- a = c ? 0.5f - r : a;
- s = c ? xnsgn : s;
-
- // 0 < r <= 0.25
- c = r <= 0.25f;
- a = c ? r : a;
- e = c ? 0 : e;
- s = c ? xsgn : s;
-
- float t = __clc_tanf_piby4(a * M_PI_F, 0);
- float tr = -native_recip(t);
- int jr = s ^ as_int(e ? tr : t);
-
- jr = r == 0.5f ? xodd | 0x7f800000 : jr;
-
- ir = ix < 0x4b000000 ? jr : ir;
-
- return as_float(ir);
-}
-_CLC_UNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_tanpi, float);
-
-#ifdef cl_khr_fp64
-#include "sincosD_piby4.h"
-
-_CLC_DEF _CLC_OVERLOAD double __clc_tanpi(double x)
-{
- long ix = as_long(x);
- long xsgn = ix & 0x8000000000000000L;
- long xnsgn = xsgn ^ 0x8000000000000000L;
- ix ^= xsgn;
- double ax = as_double(ix);
- long iax = (long)ax;
- double r = ax - iax;
- long xodd = xsgn ^ (iax & 0x1 ? 0x8000000000000000L : 0L);
-
- // Initialize with return for +-Inf and NaN
- long ir = 0x7ff8000000000000L;
-
- // 2^53 <= |x| < Inf, the result is always even integer
- ir = ix < 0x7ff0000000000000L ? xsgn : ir;
-
- // 2^52 <= |x| < 2^53, the result is always integer
- ir = ix < 0x4340000000000000L ? xodd : ir;
-
- // 0x1.0p-14 <= |x| < 2^53, result depends on which 0.25 interval
-
- // r < 1.0
- double a = 1.0 - r;
- int e = 0;
- long s = xnsgn;
-
- // r <= 0.75
- int c = r <= 0.75;
- double t = r - 0.5;
- a = c ? t : a;
- e = c ? 1 : e;
- s = c ? xsgn : s;
-
- // r < 0.5
- c = r < 0.5;
- t = 0.5 - r;
- a = c ? t : a;
- s = c ? xnsgn : s;
-
- // r <= 0.25
- c = r <= 0.25;
- a = c ? r : a;
- e = c ? 0 : e;
- s = c ? xsgn : s;
-
- double api = a * M_PI;
- double2 tt = __clc_tan_piby4(api, 0.0);
- long jr = s ^ as_long(e ? tt.hi : tt.lo);
-
- long si = xodd | 0x7ff0000000000000L;
- jr = r == 0.5 ? si : jr;
-
- ir = ix < 0x4330000000000000L ? jr : ir;
-
- return as_double(ir);
-}
-_CLC_UNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_tanpi, double);
-#endif
diff --git a/libclc/generic/lib/math/cospi.cl b/libclc/generic/lib/math/cospi.cl
index c8d4148de700f..f78935664c65b 100644
--- a/libclc/generic/lib/math/cospi.cl
+++ b/libclc/generic/lib/math/cospi.cl
@@ -7,124 +7,9 @@
//===----------------------------------------------------------------------===//
#include <clc/clc.h>
-#include <clc/clcmacro.h>
-#include <clc/math/math.h>
+#include <clc/math/clc_cospi.h>
-#include "sincos_helpers.h"
-#include "sincospiF_piby4.h"
-#ifdef cl_khr_fp64
-#include "sincosD_piby4.h"
-#endif
+#define FUNCTION cospi
+#define __CLC_BODY <clc/shared/unary_def.inc>
-_CLC_OVERLOAD _CLC_DEF float cospi(float x)
-{
- int ix = as_int(x) & 0x7fffffff;
- float ax = as_float(ix);
- int iax = (int)ax;
- float r = ax - iax;
- int xodd = iax & 0x1 ? 0x80000000 : 0;
-
- // Initialize with return for +-Inf and NaN
- int ir = 0x7fc00000;
-
- // 2^24 <= |x| < Inf, the result is always even integer
- ir = ix < 0x7f800000 ? 0x3f800000 : ir;
-
- // 2^23 <= |x| < 2^24, the result is always integer
- ir = ix < 0x4b800000 ? xodd | 0x3f800000 : ir;
-
- // 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval
-
- // r < 1.0
- float a = 1.0f - r;
- int e = 1;
- int s = xodd ^ 0x80000000;
-
- // r <= 0.75
- int c = r <= 0.75f;
- a = c ? r - 0.5f : a;
- e = c ? 0 : e;
-
- // r < 0.5
- c = r < 0.5f;
- a = c ? 0.5f - r : a;
- s = c ? xodd : s;
-
- // r <= 0.25
- c = r <= 0.25f;
- a = c ? r : a;
- e = c ? 1 : e;
-
- float2 t = __libclc__sincosf_piby4(a * M_PI_F);
- int jr = s ^ as_int(e ? t.hi : t.lo);
-
- ir = ix < 0x4b000000 ? jr : ir;
-
- return as_float(ir);
-}
-
-
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, cospi, float);
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-_CLC_OVERLOAD _CLC_DEF double cospi(double x) {
-
- long ix = as_long(x) & 0x7fffffffffffffffL;
- double ax = as_double(ix);
- long iax = (long)ax;
- double r = ax - (double)iax;
- long xodd = iax & 0x1L ? 0x8000000000000000L : 0L;
-
- // Initialize with return for +-Inf and NaN
- long ir = 0x7ff8000000000000L;
-
- // 2^53 <= |x| < Inf, the result is always even integer
- ir = ix < 0x7ff0000000000000 ? 0x3ff0000000000000L : ir;
-
- // 2^52 <= |x| < 2^53, the result is always integer
- ir = ax < 0x1.0p+53 ? xodd | 0x3ff0000000000000L : ir;
-
- // 0x1.0p-7 <= |x| < 2^52, result depends on which 0.25 interval
-
- // r < 1.0
- double a = 1.0 - r;
- int e = 1;
- long s = xodd ^ 0x8000000000000000L;
-
- // r <= 0.75
- int c = r <= 0.75;
- double t = r - 0.5;
- a = c ? t : a;
- e = c ? 0 : e;
-
- // r < 0.5
- c = r < 0.5;
- t = 0.5 - r;
- a = c ? t : a;
- s = c ? xodd : s;
-
- // r <= 0.25
- c = r <= 0.25;
- a = c ? r : a;
- e = c ? 1 : e;
-
- double2 sc = __libclc__sincos_piby4(a * M_PI, 0.0);
- long jr = s ^ as_long(e ? sc.hi : sc.lo);
-
- ir = ax < 0x1.0p+52 ? jr : ir;
-
- return as_double(ir);
-}
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cospi, double);
-#endif
-
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-_CLC_DEFINE_UNARY_BUILTIN_FP16(cospi)
-
-#endif
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/native_divide.cl b/libclc/generic/lib/math/native_divide.cl
index a1e9008a90c94..a4d9b830b5519 100644
--- a/libclc/generic/lib/math/native_divide.cl
+++ b/libclc/generic/lib/math/native_divide.cl
@@ -7,7 +7,10 @@
//===----------------------------------------------------------------------===//
#include <clc/clc.h>
+#include <clc/math/clc_native_divide.h>
-#define __CLC_BODY <native_divide.inc>
#define __FLOAT_ONLY
+#define FUNCTION native_divide
+#define __CLC_BODY <clc/shared/binary_def.inc>
+
#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/native_recip.cl b/libclc/generic/lib/math/native_recip.cl
index b43248e6aeae6..3c844495046f1 100644
--- a/libclc/generic/lib/math/native_recip.cl
+++ b/libclc/generic/lib/math/native_recip.cl
@@ -7,7 +7,10 @@
//===----------------------------------------------------------------------===//
#include <clc/clc.h>
+#include <clc/math/clc_native_recip.h>
-#define __CLC_BODY <native_recip.inc>
#define __FLOAT_ONLY
+#define FUNCTION native_recip
+#define __CLC_BODY <clc/shared/unary_def.inc>
+
#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/sincosD_piby4.h b/libclc/generic/lib/math/sincosD_piby4.h
deleted file mode 100644
index cce3d1554583f..0000000000000
--- a/libclc/generic/lib/math/sincosD_piby4.h
+++ /dev/null
@@ -1,119 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-_CLC_INLINE double2
-__libclc__sincos_piby4(double x, double xx)
-{
- // Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ...
- // = x * (1 - x^2/3! + x^4/5! - x^6/7! ...
- // = x * f(w)
- // where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ...
- // We use a minimax approximation of (f(w) - 1) / w
- // because this produces an expansion in even powers of x.
- // If xx (the tail of x) is non-zero, we add a correction
- // term g(x,xx) = (1-x*x/2)*xx to the result, where g(x,xx)
- // is an approximation to cos(x)*sin(xx) valid because
- // xx is tiny relative to x.
-
- // Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ...
- // = f(w)
- // where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ...
- // We use a minimax approximation of (f(w) - 1 + w/2) / (w*w)
- // because this produces an expansion in even powers of x.
- // If xx (the tail of x) is non-zero, we subtract a correction
- // term g(x,xx) = x*xx to the result, where g(x,xx)
- // is an approximation to sin(x)*sin(xx) valid because
- // xx is tiny relative to x.
-
- const double sc1 = -0.166666666666666646259241729;
- const double sc2 = 0.833333333333095043065222816e-2;
- const double sc3 = -0.19841269836761125688538679e-3;
- const double sc4 = 0.275573161037288022676895908448e-5;
- const double sc5 = -0.25051132068021699772257377197e-7;
- const double sc6 = 0.159181443044859136852668200e-9;
-
- const double cc1 = 0.41666666666666665390037e-1;
- const double cc2 = -0.13888888888887398280412e-2;
- const double cc3 = 0.248015872987670414957399e-4;
- const double cc4 = -0.275573172723441909470836e-6;
- const double cc5 = 0.208761463822329611076335e-8;
- const double cc6 = -0.113826398067944859590880e-10;
-
- double x2 = x * x;
- double x3 = x2 * x;
- double r = 0.5 * x2;
- double t = 1.0 - r;
-
- double sp = fma(fma(fma(fma(sc6, x2, sc5), x2, sc4), x2, sc3), x2, sc2);
-
- double cp = t + fma(fma(fma(fma(fma(fma(cc6, x2, cc5), x2, cc4), x2, cc3), x2, cc2), x2, cc1),
- x2*x2, fma(x, xx, (1.0 - t) - r));
-
- double2 ret;
- ret.lo = x - fma(-x3, sc1, fma(fma(-x3, sp, 0.5*xx), x2, -xx));
- ret.hi = cp;
-
- return ret;
-}
-
-_CLC_INLINE double2
-__clc_tan_piby4(double x, double xx)
-{
- const double piby4_lead = 7.85398163397448278999e-01; // 0x3fe921fb54442d18
- const double piby4_tail = 3.06161699786838240164e-17; // 0x3c81a62633145c06
-
- // In order to maintain relative precision transform using the identity:
- // tan(pi/4-x) = (1-tan(x))/(1+tan(x)) for arguments close to pi/4.
- // Similarly use tan(x-pi/4) = (tan(x)-1)/(tan(x)+1) close to -pi/4.
-
- int ca = x > 0.68;
- int cb = x < -0.68;
- double transform = ca ? 1.0 : 0.0;
- transform = cb ? -1.0 : transform;
-
- double tx = fma(-transform, x, piby4_lead) + fma(-transform, xx, piby4_tail);
- int c = ca | cb;
- x = c ? tx : x;
- xx = c ? 0.0 : xx;
-
- // Core Remez [2,3] approximation to tan(x+xx) on the interval [0,0.68].
- double t1 = x;
- double r = fma(2.0, x*xx, x*x);
-
- double a = fma(r,
- fma(r, 0.224044448537022097264602535574e-3, -0.229345080057565662883358588111e-1),
- 0.372379159759792203640806338901e0);
-
- double b = fma(r,
- fma(r,
- fma(r, -0.232371494088563558304549252913e-3, 0.260656620398645407524064091208e-1),
- -0.515658515729031149329237816945e0),
- 0.111713747927937668539901657944e1);
-
- double t2 = fma(MATH_DIVIDE(a, b), x*r, xx);
-
- double tp = t1 + t2;
-
- // Compute -1.0/(t1 + t2) accurately
- double z1 = as_double(as_long(tp) & 0xffffffff00000000L);
- double z2 = t2 - (z1 - t1);
- double trec = -MATH_RECIP(tp);
- double trec_top = as_double(as_long(trec) & 0xffffffff00000000L);
-
- double tpr = fma(fma(trec_top, z2, fma(trec_top, z1, 1.0)), trec, trec_top);
-
- double tpt = transform * (1.0 - MATH_DIVIDE(2.0*tp, 1.0 + tp));
- double tptr = transform * (MATH_DIVIDE(2.0*tp, tp - 1.0) - 1.0);
-
- double2 ret;
- ret.lo = c ? tpt : tp;
- ret.hi = c ? tptr : tpr;
- return ret;
-}
diff --git a/libclc/generic/lib/math/sincos_helpers.cl b/libclc/generic/lib/math/sincos_helpers.cl
index 32ab5af4ca90c..651cd11ccf016 100644
--- a/libclc/generic/lib/math/sincos_helpers.cl
+++ b/libclc/generic/lib/math/sincos_helpers.cl
@@ -17,31 +17,13 @@
#include <clc/math/tables.h>
#include <clc/shared/clc_max.h>
-#define bytealign(src0, src1, src2) \
- ((uint)(((((long)(src0)) << 32) | (long)(src1)) >> (((src2) & 3) * 8)))
-
-_CLC_DEF float __clc_tanf_piby4(float x, int regn) {
- // Core Remez [1,2] approximation to tan(x) on the interval [0,pi/4].
- float r = x * x;
-
- float a =
- __clc_mad(r, -0.0172032480471481694693109f, 0.385296071263995406715129f);
-
- float b = __clc_mad(
- r,
- __clc_mad(r, 0.01844239256901656082986661f, -0.51396505478854532132342f),
- 1.15588821434688393452299f);
-
- float t = __clc_mad(x * r, native_divide(a, b), x);
- float tr = -MATH_RECIP(t);
-
- return regn & 1 ? tr : t;
-}
-
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#define bytealign(src0, src1, src2) \
+ ((uint)(((((long)(src0)) << 32) | (long)(src1)) >> (((src2) & 3) * 8)))
+
// Reduction for medium sized arguments
_CLC_DEF void __clc_remainder_piby2_medium(double x, private double *r,
private double *rr,
diff --git a/libclc/generic/lib/math/sincos_helpers.h b/libclc/generic/lib/math/sincos_helpers.h
index c94784081cd64..11cb93f34850d 100644
--- a/libclc/generic/lib/math/sincos_helpers.h
+++ b/libclc/generic/lib/math/sincos_helpers.h
@@ -9,8 +9,6 @@
#include <clc/clcfunc.h>
#include <clc/clctypes.h>
-_CLC_DECL float __clc_tanf_piby4(float x, int y);
-
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
diff --git a/libclc/generic/lib/math/sincospiF_piby4.h b/libclc/generic/lib/math/sincospiF_piby4.h
deleted file mode 100644
index 66596395fdd1e..0000000000000
--- a/libclc/generic/lib/math/sincospiF_piby4.h
+++ /dev/null
@@ -1,46 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <clc/math/clc_mad.h>
-
-// Evaluate single precisions in and cos of value in interval [-pi/4, pi/4]
-_CLC_INLINE float2 __libclc__sincosf_piby4(float x) {
- // Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ...
- // = x * (1 - x^2/3! + x^4/5! - x^6/7! ...
- // = x * f(w)
- // where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ...
- // We use a minimax approximation of (f(w) - 1) / w
- // because this produces an expansion in even powers of x.
-
- // Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ...
- // = f(w)
- // where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ...
- // We use a minimax approximation of (f(w) - 1 + w/2) / (w*w)
- // because this produces an expansion in even powers of x.
-
- const float sc1 = -0.166666666638608441788607926e0F;
- const float sc2 = 0.833333187633086262120839299e-2F;
- const float sc3 = -0.198400874359527693921333720e-3F;
- const float sc4 = 0.272500015145584081596826911e-5F;
-
- const float cc1 = 0.41666666664325175238031e-1F;
- const float cc2 = -0.13888887673175665567647e-2F;
- const float cc3 = 0.24800600878112441958053e-4F;
- const float cc4 = -0.27301013343179832472841e-6F;
-
- float x2 = x * x;
-
- float2 ret;
- ret.x = __clc_mad(
- x * x2, __clc_mad(x2, __clc_mad(x2, __clc_mad(x2, sc4, sc3), sc2), sc1),
- x);
- ret.y = __clc_mad(
- x2 * x2, __clc_mad(x2, __clc_mad(x2, __clc_mad(x2, cc4, cc3), cc2), cc1),
- __clc_mad(x2, -0.5f, 1.0f));
- return ret;
-}
diff --git a/libclc/generic/lib/math/sinpi.cl b/libclc/generic/lib/math/sinpi.cl
index 1261b7f9c5277..223e7db142117 100644
--- a/libclc/generic/lib/math/sinpi.cl
+++ b/libclc/generic/lib/math/sinpi.cl
@@ -7,119 +7,9 @@
//===----------------------------------------------------------------------===//
#include <clc/clc.h>
-#include <clc/clcmacro.h>
-#include <clc/math/math.h>
+#include <clc/math/clc_sinpi.h>
-#include "sincospiF_piby4.h"
-#ifdef cl_khr_fp64
-#include "sincosD_piby4.h"
-#endif
+#define FUNCTION sinpi
+#define __CLC_BODY <clc/shared/unary_def.inc>
-_CLC_OVERLOAD _CLC_DEF float sinpi(float x)
-{
- int ix = as_int(x);
- int xsgn = ix & 0x80000000;
- ix ^= xsgn;
- float ax = as_float(ix);
- int iax = (int)ax;
- float r = ax - iax;
- int xodd = xsgn ^ (iax & 0x1 ? 0x80000000 : 0);
-
- // Initialize with return for +-Inf and NaN
- int ir = 0x7fc00000;
-
- // 2^23 <= |x| < Inf, the result is always integer
- ir = ix < 0x7f800000 ? xsgn : ir;
-
- // 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval
-
- // r < 1.0
- float a = 1.0f - r;
- int e = 0;
-
- // r <= 0.75
- int c = r <= 0.75f;
- a = c ? r - 0.5f : a;
- e = c ? 1 : e;
-
- // r < 0.5
- c = r < 0.5f;
- a = c ? 0.5f - r : a;
-
- // 0 < r <= 0.25
- c = r <= 0.25f;
- a = c ? r : a;
- e = c ? 0 : e;
-
- float2 t = __libclc__sincosf_piby4(a * M_PI_F);
- int jr = xodd ^ as_int(e ? t.hi : t.lo);
-
- ir = ix < 0x4b000000 ? jr : ir;
-
- return as_float(ir);
-}
-
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, sinpi, float);
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-_CLC_OVERLOAD _CLC_DEF double sinpi(double x)
-{
- long ix = as_long(x);
- long xsgn = ix & 0x8000000000000000L;
- ix ^= xsgn;
- double ax = as_double(ix);
- long iax = (long)ax;
- double r = ax - (double)iax;
- long xodd = xsgn ^ (iax & 0x1L ? 0x8000000000000000L : 0L);
-
- // Initialize with return for +-Inf and NaN
- long ir = 0x7ff8000000000000L;
-
- // 2^23 <= |x| < Inf, the result is always integer
- ir = ix < 0x7ff0000000000000 ? xsgn : ir;
-
- // 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval
-
- // r < 1.0
- double a = 1.0 - r;
- int e = 0;
-
- // r <= 0.75
- int c = r <= 0.75;
- double t = r - 0.5;
- a = c ? t : a;
- e = c ? 1 : e;
-
- // r < 0.5
- c = r < 0.5;
- t = 0.5 - r;
- a = c ? t : a;
-
- // r <= 0.25
- c = r <= 0.25;
- a = c ? r : a;
- e = c ? 0 : e;
-
- double api = a * M_PI;
- double2 sc = __libclc__sincos_piby4(api, 0.0);
- long jr = xodd ^ as_long(e ? sc.hi : sc.lo);
-
- ir = ax < 0x1.0p+52 ? jr : ir;
-
- return as_double(ir);
-}
-
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sinpi, double)
-
-#endif
-
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-_CLC_DEFINE_UNARY_BUILTIN_FP16(sinpi)
-
-#endif
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/tanpi.cl b/libclc/generic/lib/math/tanpi.cl
index 0f0461b1742c1..8015d32adb38d 100644
--- a/libclc/generic/lib/math/tanpi.cl
+++ b/libclc/generic/lib/math/tanpi.cl
@@ -7,9 +7,9 @@
//===----------------------------------------------------------------------===//
#include <clc/clc.h>
+#include <clc/math/clc_tanpi.h>
-#include <math/clc_tanpi.h>
+#define FUNCTION tanpi
+#define __CLC_BODY <clc/shared/unary_def.inc>
-#define __CLC_FUNC tanpi
-#define __CLC_BODY <clc_sw_unary.inc>
#include <clc/math/gentype.inc>
diff --git a/libclc/spirv/lib/SOURCES b/libclc/spirv/lib/SOURCES
index ad9f44a6149ae..5358577ea1805 100644
--- a/libclc/spirv/lib/SOURCES
+++ b/libclc/spirv/lib/SOURCES
@@ -72,7 +72,6 @@ math/fma.cl
../../generic/lib/math/clc_tan.cl
../../generic/lib/math/tan.cl
../../generic/lib/math/tanh.cl
-../../generic/lib/math/clc_tanpi.cl
../../generic/lib/math/tanpi.cl
../../generic/lib/math/tgamma.cl
../../generic/lib/shared/vload.cl
More information about the cfe-commits
mailing list