[libclc] 377148f - [libclc] Move functions definition from header clc_sincos_piby4.inc into clc_sincos_helpers.cl (#164028)

Sun Oct 19 19:08:40 PDT 2025

Author: Wenju He
Date: 2025-10-20T10:08:35+08:00
New Revision: 377148fd60df55e486dc72b26248006ae092725a

URL: https://github.com/llvm/llvm-project/commit/377148fd60df55e486dc72b26248006ae092725a
DIFF: https://github.com/llvm/llvm-project/commit/377148fd60df55e486dc72b26248006ae092725a.diff

LOG: [libclc] Move functions definition from header clc_sincos_piby4.inc into clc_sincos_helpers.cl (#164028)

inline functions defined in clc_sincos_piby4.inc miss static specifier
and are deleted by EliminateAvailableExternallyPass when not inlined.

This PR fix the problem by removing inline and moving function
definition
into clc/lib/generic/math/clc_sincos_helpers.cl. It makes sense to put
all sin/cos helpers definitions in one file clc_sincos_helpers.cl.

Added: 
    

Modified: 
    libclc/clc/include/clc/math/clc_sincos_helpers.inc
    libclc/clc/include/clc/math/clc_sincos_helpers_fp64.inc
    libclc/clc/lib/generic/math/clc_cos.cl
    libclc/clc/lib/generic/math/clc_cospi.cl
    libclc/clc/lib/generic/math/clc_sin.cl
    libclc/clc/lib/generic/math/clc_sincos_helpers.inc
    libclc/clc/lib/generic/math/clc_sincos_helpers_fp64.inc
    libclc/clc/lib/generic/math/clc_sinpi.cl
    libclc/clc/lib/generic/math/clc_tan.cl
    libclc/clc/lib/generic/math/clc_tanpi.cl

Removed: 
    libclc/clc/include/clc/math/clc_sincos_piby4.h
    libclc/clc/include/clc/math/clc_sincos_piby4.inc


################################################################################
diff  --git a/libclc/clc/include/clc/math/clc_sincos_helpers.inc b/libclc/clc/include/clc/math/clc_sincos_helpers.inc
index 4daff92955cd7..0a3b816cb8c89 100644

--- a/libclc/clc/include/clc/math/clc_sincos_helpers.inc
+++ b/libclc/clc/include/clc/math/clc_sincos_helpers.inc
@@ -10,6 +10,11 @@ _CLC_DECL _CLC_OVERLOAD __CLC_FLOATN __clc_sinf_piby4(__CLC_FLOATN x,
                                                       __CLC_FLOATN y);
 _CLC_DECL _CLC_OVERLOAD __CLC_FLOATN __clc_cosf_piby4(__CLC_FLOATN x,
                                                       __CLC_FLOATN y);
+
+_CLC_DECL _CLC_OVERLOAD void __clc_sincos_piby4(__CLC_FLOATN x,
+                                                private __CLC_FLOATN *sinval,
+                                                private __CLC_FLOATN *cosval);
+
 _CLC_DECL _CLC_OVERLOAD __CLC_FLOATN __clc_tanf_piby4(__CLC_FLOATN x,
                                                       __CLC_INTN regn);
 

diff  --git a/libclc/clc/include/clc/math/clc_sincos_helpers_fp64.inc b/libclc/clc/include/clc/math/clc_sincos_helpers_fp64.inc
index 09c6e1c965f64..15934cab32751 100644
--- a/libclc/clc/include/clc/math/clc_sincos_helpers_fp64.inc
+++ b/libclc/clc/include/clc/math/clc_sincos_helpers_fp64.inc
@@ -6,6 +6,15 @@
 //
 //===----------------------------------------------------------------------===//
 
+_CLC_DECL _CLC_OVERLOAD void __clc_sincos_piby4(__CLC_DOUBLEN x,
+                                                __CLC_DOUBLEN xx,
+                                                private __CLC_DOUBLEN *sinval,
+                                                private __CLC_DOUBLEN *cosval);
+
+_CLC_DECL _CLC_OVERLOAD void __clc_tan_piby4(__CLC_DOUBLEN x, __CLC_DOUBLEN xx,
+                                             private __CLC_DOUBLEN *leadval,
+                                             private __CLC_DOUBLEN *tailval);
+
 _CLC_DECL _CLC_OVERLOAD void
 __clc_remainder_piby2_medium(__CLC_DOUBLEN x, private __CLC_DOUBLEN *r,
                              private __CLC_DOUBLEN *rr,

diff  --git a/libclc/clc/include/clc/math/clc_sincos_piby4.h b/libclc/clc/include/clc/math/clc_sincos_piby4.h
deleted file mode 100644
index 50608ae24e947..0000000000000
--- a/libclc/clc/include/clc/math/clc_sincos_piby4.h
+++ /dev/null
@@ -1,14 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <clc/math/clc_fma.h>
-#include <clc/math/clc_mad.h>
-#include <clc/math/math.h>
-
-#define __CLC_BODY <clc/math/clc_sincos_piby4.inc>
-#include <clc/math/gentype.inc>

diff  --git a/libclc/clc/include/clc/math/clc_sincos_piby4.inc b/libclc/clc/include/clc/math/clc_sincos_piby4.inc
deleted file mode 100644
index 91ec518b70e97..0000000000000
--- a/libclc/clc/include/clc/math/clc_sincos_piby4.inc
+++ /dev/null
@@ -1,174 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#if __CLC_FPSIZE == 32
-
-// Evaluate single precisions in and cos of value in interval [-pi/4, pi/4]
-_CLC_INLINE _CLC_OVERLOAD void
-__clc_sincos_piby4(__CLC_GENTYPE x, private __CLC_GENTYPE *sinval,
-                   private __CLC_GENTYPE *cosval) {
-  // Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ...
-  // = x * (1 - x^2/3! + x^4/5! - x^6/7! ...
-  // = x * f(w)
-  // where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ...
-  // We use a minimax approximation of (f(w) - 1) / w
-  // because this produces an expansion in even powers of x.
-
-  // Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ...
-  // = f(w)
-  // where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ...
-  // We use a minimax approximation of (f(w) - 1 + w/2) / (w*w)
-  // because this produces an expansion in even powers of x.
-
-  const __CLC_GENTYPE sc1 = -0.166666666638608441788607926e0F;
-  const __CLC_GENTYPE sc2 = 0.833333187633086262120839299e-2F;
-  const __CLC_GENTYPE sc3 = -0.198400874359527693921333720e-3F;
-  const __CLC_GENTYPE sc4 = 0.272500015145584081596826911e-5F;
-
-  const __CLC_GENTYPE cc1 = 0.41666666664325175238031e-1F;
-  const __CLC_GENTYPE cc2 = -0.13888887673175665567647e-2F;
-  const __CLC_GENTYPE cc3 = 0.24800600878112441958053e-4F;
-  const __CLC_GENTYPE cc4 = -0.27301013343179832472841e-6F;
-
-  __CLC_GENTYPE x2 = x * x;
-
-  *sinval = __clc_mad(
-      x * x2, __clc_mad(x2, __clc_mad(x2, __clc_mad(x2, sc4, sc3), sc2), sc1),
-      x);
-  *cosval = __clc_mad(
-      x2 * x2, __clc_mad(x2, __clc_mad(x2, __clc_mad(x2, cc4, cc3), cc2), cc1),
-      __clc_mad(x2, -0.5f, 1.0f));
-}
-
-#elif __CLC_FPSIZE == 64
-
-_CLC_INLINE _CLC_OVERLOAD void
-__clc_sincos_piby4(__CLC_GENTYPE x, __CLC_GENTYPE xx,
-                   private __CLC_GENTYPE *sinval,
-                   private __CLC_GENTYPE *cosval) {
-  // Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ...
-  //                      = x * (1 - x^2/3! + x^4/5! - x^6/7! ...
-  //                      = x * f(w)
-  // where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ...
-  // We use a minimax approximation of (f(w) - 1) / w
-  // because this produces an expansion in even powers of x.
-  // If xx (the tail of x) is non-zero, we add a correction
-  // term g(x,xx) = (1-x*x/2)*xx to the result, where g(x,xx)
-  // is an approximation to cos(x)*sin(xx) valid because
-  // xx is tiny relative to x.
-
-  // Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ...
-  //                      = f(w)
-  // where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ...
-  // We use a minimax approximation of (f(w) - 1 + w/2) / (w*w)
-  // because this produces an expansion in even powers of x.
-  // If xx (the tail of x) is non-zero, we subtract a correction
-  // term g(x,xx) = x*xx to the result, where g(x,xx)
-  // is an approximation to sin(x)*sin(xx) valid because
-  // xx is tiny relative to x.
-
-  const __CLC_GENTYPE sc1 = -0.166666666666666646259241729;
-  const __CLC_GENTYPE sc2 = 0.833333333333095043065222816e-2;
-  const __CLC_GENTYPE sc3 = -0.19841269836761125688538679e-3;
-  const __CLC_GENTYPE sc4 = 0.275573161037288022676895908448e-5;
-  const __CLC_GENTYPE sc5 = -0.25051132068021699772257377197e-7;
-  const __CLC_GENTYPE sc6 = 0.159181443044859136852668200e-9;
-
-  const __CLC_GENTYPE cc1 = 0.41666666666666665390037e-1;
-  const __CLC_GENTYPE cc2 = -0.13888888888887398280412e-2;
-  const __CLC_GENTYPE cc3 = 0.248015872987670414957399e-4;
-  const __CLC_GENTYPE cc4 = -0.275573172723441909470836e-6;
-  const __CLC_GENTYPE cc5 = 0.208761463822329611076335e-8;
-  const __CLC_GENTYPE cc6 = -0.113826398067944859590880e-10;
-
-  __CLC_GENTYPE x2 = x * x;
-  __CLC_GENTYPE x3 = x2 * x;
-  __CLC_GENTYPE r = (__CLC_GENTYPE)0.5 * x2;
-  __CLC_GENTYPE t = (__CLC_GENTYPE)1.0 - r;
-
-  __CLC_GENTYPE sp = __clc_fma(
-      __clc_fma(__clc_fma(__clc_fma(sc6, x2, sc5), x2, sc4), x2, sc3), x2, sc2);
-
-  __CLC_GENTYPE cp =
-      t +
-      __clc_fma(__clc_fma(__clc_fma(__clc_fma(__clc_fma(__clc_fma(cc6, x2, cc5),
-                                                        x2, cc4),
-                                              x2, cc3),
-                                    x2, cc2),
-                          x2, cc1),
-                x2 * x2, __clc_fma(x, xx, (1.0 - t) - r));
-
-  *sinval =
-      x - __clc_fma(-x3, sc1, __clc_fma(__clc_fma(-x3, sp, 0.5 * xx), x2, -xx));
-  *cosval = cp;
-}
-
-_CLC_INLINE _CLC_OVERLOAD void __clc_tan_piby4(__CLC_GENTYPE x,
-                                               __CLC_GENTYPE xx,
-                                               private __CLC_GENTYPE *leadval,
-                                               private __CLC_GENTYPE *tailval) {
-  // 0x3fe921fb54442d18
-  const __CLC_GENTYPE piby4_lead = 7.85398163397448278999e-01;
-  // 0x3c81a62633145c06
-  const __CLC_GENTYPE piby4_tail = 3.06161699786838240164e-17;
-
-  // In order to maintain relative precision transform using the identity:
-  // tan(pi/4-x) = (1-tan(x))/(1+tan(x)) for arguments close to pi/4.
-  // Similarly use tan(x-pi/4) = (tan(x)-1)/(tan(x)+1) close to -pi/4.
-
-  __CLC_LONGN ca = x > 0.68;
-  __CLC_LONGN cb = x < -0.68;
-  __CLC_GENTYPE transform = ca ? 1.0 : 0.0;
-  transform = cb ? -1.0 : transform;
-
-  __CLC_GENTYPE tx = __clc_fma(-transform, x, piby4_lead) +
-                     __clc_fma(-transform, xx, piby4_tail);
-  __CLC_LONGN c = ca | cb;
-  x = c ? tx : x;
-  xx = c ? 0.0 : xx;
-
-  // Core Remez [2,3] approximation to tan(x+xx) on the interval [0,0.68].
-  __CLC_GENTYPE t1 = x;
-  __CLC_GENTYPE r = __clc_fma(2.0, x * xx, x * x);
-
-  __CLC_GENTYPE a = __clc_fma(r,
-                              __clc_fma(r, 0.224044448537022097264602535574e-3,
-                                        -0.229345080057565662883358588111e-1),
-                              0.372379159759792203640806338901e0);
-
-  __CLC_GENTYPE b =
-      __clc_fma(r,
-                __clc_fma(r,
-                          __clc_fma(r, -0.232371494088563558304549252913e-3,
-                                    0.260656620398645407524064091208e-1),
-                          -0.515658515729031149329237816945e0),
-                0.111713747927937668539901657944e1);
-
-  __CLC_GENTYPE t2 = __clc_fma(MATH_DIVIDE(a, b), x * r, xx);
-
-  __CLC_GENTYPE tp = t1 + t2;
-
-  // Compute -1.0/(t1 + t2) accurately
-  __CLC_GENTYPE z1 =
-      __CLC_AS_GENTYPE(__CLC_AS_ULONGN(tp) & 0xffffffff00000000L);
-  __CLC_GENTYPE z2 = t2 - (z1 - t1);
-  __CLC_GENTYPE trec = -MATH_RECIP(tp);
-  __CLC_GENTYPE trec_top =
-      __CLC_AS_GENTYPE(__CLC_AS_ULONGN(trec) & 0xffffffff00000000L);
-
-  __CLC_GENTYPE tpr = __clc_fma(
-      __clc_fma(trec_top, z2, __clc_fma(trec_top, z1, 1.0)), trec, trec_top);
-
-  __CLC_GENTYPE tpt = transform * (1.0 - MATH_DIVIDE(2.0 * tp, 1.0 + tp));
-  __CLC_GENTYPE tptr = transform * (MATH_DIVIDE(2.0 * tp, tp - 1.0) - 1.0);
-
-  *leadval = c ? tpt : tp;
-  *tailval = c ? tptr : tpr;
-}
-
-#endif

diff  --git a/libclc/clc/lib/generic/math/clc_cos.cl b/libclc/clc/lib/generic/math/clc_cos.cl
index e7e4d6ad39ede..5529ec411a195 100644
--- a/libclc/clc/lib/generic/math/clc_cos.cl
+++ b/libclc/clc/lib/generic/math/clc_cos.cl
@@ -10,7 +10,6 @@
 #include <clc/float/definitions.h>
 #include <clc/math/clc_fabs.h>
 #include <clc/math/clc_sincos_helpers.h>
-#include <clc/math/clc_sincos_piby4.h>
 #include <clc/math/math.h>
 #include <clc/relational/clc_isinf.h>
 #include <clc/relational/clc_isnan.h>

diff  --git a/libclc/clc/lib/generic/math/clc_cospi.cl b/libclc/clc/lib/generic/math/clc_cospi.cl
index 07e1b49cc9e02..6a10171c723d0 100644
--- a/libclc/clc/lib/generic/math/clc_cospi.cl
+++ b/libclc/clc/lib/generic/math/clc_cospi.cl
@@ -11,7 +11,6 @@
 #include <clc/internal/clc.h>
 #include <clc/math/clc_fabs.h>
 #include <clc/math/clc_sincos_helpers.h>
-#include <clc/math/clc_sincos_piby4.h>
 #include <clc/math/math.h>
 
 #define __CLC_BODY <clc_cospi.inc>

diff  --git a/libclc/clc/lib/generic/math/clc_sin.cl b/libclc/clc/lib/generic/math/clc_sin.cl
index 741383f94c456..99338c95eb60c 100644
--- a/libclc/clc/lib/generic/math/clc_sin.cl
+++ b/libclc/clc/lib/generic/math/clc_sin.cl
@@ -11,7 +11,6 @@
 #include <clc/internal/clc.h>
 #include <clc/math/clc_fabs.h>
 #include <clc/math/clc_sincos_helpers.h>
-#include <clc/math/clc_sincos_piby4.h>
 #include <clc/math/clc_trunc.h>
 #include <clc/math/math.h>
 #include <clc/math/tables.h>

diff  --git a/libclc/clc/lib/generic/math/clc_sincos_helpers.inc b/libclc/clc/lib/generic/math/clc_sincos_helpers.inc
index 9a46170a3db38..2a71b5626ccc5 100644
--- a/libclc/clc/lib/generic/math/clc_sincos_helpers.inc
+++ b/libclc/clc/lib/generic/math/clc_sincos_helpers.inc
@@ -74,6 +74,43 @@ _CLC_DEF _CLC_OVERLOAD __CLC_FLOATN __clc_cosf_piby4(__CLC_FLOATN x,
   return ret;
 }
 
+// Evaluate single precisions sin and cos of value in interval [-pi/4, pi/4]
+_CLC_DEF _CLC_OVERLOAD void __clc_sincos_piby4(__CLC_FLOATN x,
+                                               private __CLC_FLOATN *sinval,
+                                               private __CLC_FLOATN *cosval) {
+  // Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ...
+  // = x * (1 - x^2/3! + x^4/5! - x^6/7! ...
+  // = x * f(w)
+  // where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ...
+  // We use a minimax approximation of (f(w) - 1) / w
+  // because this produces an expansion in even powers of x.
+
+  // Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ...
+  // = f(w)
+  // where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ...
+  // We use a minimax approximation of (f(w) - 1 + w/2) / (w*w)
+  // because this produces an expansion in even powers of x.
+
+  const __CLC_FLOATN sc1 = -0.166666666638608441788607926e0F;
+  const __CLC_FLOATN sc2 = 0.833333187633086262120839299e-2F;
+  const __CLC_FLOATN sc3 = -0.198400874359527693921333720e-3F;
+  const __CLC_FLOATN sc4 = 0.272500015145584081596826911e-5F;
+
+  const __CLC_FLOATN cc1 = 0.41666666664325175238031e-1F;
+  const __CLC_FLOATN cc2 = -0.13888887673175665567647e-2F;
+  const __CLC_FLOATN cc3 = 0.24800600878112441958053e-4F;
+  const __CLC_FLOATN cc4 = -0.27301013343179832472841e-6F;
+
+  __CLC_FLOATN x2 = x * x;
+
+  *sinval = __clc_mad(
+      x * x2, __clc_mad(x2, __clc_mad(x2, __clc_mad(x2, sc4, sc3), sc2), sc1),
+      x);
+  *cosval = __clc_mad(
+      x2 * x2, __clc_mad(x2, __clc_mad(x2, __clc_mad(x2, cc4, cc3), cc2), cc1),
+      __clc_mad(x2, -0.5f, 1.0f));
+}
+
 _CLC_DEF _CLC_OVERLOAD __CLC_FLOATN __clc_tanf_piby4(__CLC_FLOATN x,
                                                      __CLC_INTN regn) {
   // Core Remez [1,2] approximation to tan(x) on the interval [0,pi/4].

diff  --git a/libclc/clc/lib/generic/math/clc_sincos_helpers_fp64.inc b/libclc/clc/lib/generic/math/clc_sincos_helpers_fp64.inc
index 8fae90c9cc5a5..e029c6dcfaa02 100644
--- a/libclc/clc/lib/generic/math/clc_sincos_helpers_fp64.inc
+++ b/libclc/clc/lib/generic/math/clc_sincos_helpers_fp64.inc
@@ -6,6 +6,129 @@
 //
 //===----------------------------------------------------------------------===//
 
+_CLC_DEF _CLC_OVERLOAD void __clc_sincos_piby4(__CLC_DOUBLEN x,
+                                               __CLC_DOUBLEN xx,
+                                               private __CLC_DOUBLEN *sinval,
+                                               private __CLC_DOUBLEN *cosval) {
+  // Taylor series for sin(x) is x - x^3/3! + x^5/5! - x^7/7! ...
+  //                      = x * (1 - x^2/3! + x^4/5! - x^6/7! ...
+  //                      = x * f(w)
+  // where w = x*x and f(w) = (1 - w/3! + w^2/5! - w^3/7! ...
+  // We use a minimax approximation of (f(w) - 1) / w
+  // because this produces an expansion in even powers of x.
+  // If xx (the tail of x) is non-zero, we add a correction
+  // term g(x,xx) = (1-x*x/2)*xx to the result, where g(x,xx)
+  // is an approximation to cos(x)*sin(xx) valid because
+  // xx is tiny relative to x.
+
+  // Taylor series for cos(x) is 1 - x^2/2! + x^4/4! - x^6/6! ...
+  //                      = f(w)
+  // where w = x*x and f(w) = (1 - w/2! + w^2/4! - w^3/6! ...
+  // We use a minimax approximation of (f(w) - 1 + w/2) / (w*w)
+  // because this produces an expansion in even powers of x.
+  // If xx (the tail of x) is non-zero, we subtract a correction
+  // term g(x,xx) = x*xx to the result, where g(x,xx)
+  // is an approximation to sin(x)*sin(xx) valid because
+  // xx is tiny relative to x.
+
+  const __CLC_DOUBLEN sc1 = -0.166666666666666646259241729;
+  const __CLC_DOUBLEN sc2 = 0.833333333333095043065222816e-2;
+  const __CLC_DOUBLEN sc3 = -0.19841269836761125688538679e-3;
+  const __CLC_DOUBLEN sc4 = 0.275573161037288022676895908448e-5;
+  const __CLC_DOUBLEN sc5 = -0.25051132068021699772257377197e-7;
+  const __CLC_DOUBLEN sc6 = 0.159181443044859136852668200e-9;
+
+  const __CLC_DOUBLEN cc1 = 0.41666666666666665390037e-1;
+  const __CLC_DOUBLEN cc2 = -0.13888888888887398280412e-2;
+  const __CLC_DOUBLEN cc3 = 0.248015872987670414957399e-4;
+  const __CLC_DOUBLEN cc4 = -0.275573172723441909470836e-6;
+  const __CLC_DOUBLEN cc5 = 0.208761463822329611076335e-8;
+  const __CLC_DOUBLEN cc6 = -0.113826398067944859590880e-10;
+
+  __CLC_DOUBLEN x2 = x * x;
+  __CLC_DOUBLEN x3 = x2 * x;
+  __CLC_DOUBLEN r = (__CLC_DOUBLEN)0.5 * x2;
+  __CLC_DOUBLEN t = (__CLC_DOUBLEN)1.0 - r;
+
+  __CLC_DOUBLEN sp = __clc_fma(
+      __clc_fma(__clc_fma(__clc_fma(sc6, x2, sc5), x2, sc4), x2, sc3), x2, sc2);
+
+  __CLC_DOUBLEN cp =
+      t +
+      __clc_fma(__clc_fma(__clc_fma(__clc_fma(__clc_fma(__clc_fma(cc6, x2, cc5),
+                                                        x2, cc4),
+                                              x2, cc3),
+                                    x2, cc2),
+                          x2, cc1),
+                x2 * x2, __clc_fma(x, xx, (1.0 - t) - r));
+
+  *sinval =
+      x - __clc_fma(-x3, sc1, __clc_fma(__clc_fma(-x3, sp, 0.5 * xx), x2, -xx));
+  *cosval = cp;
+}
+
+_CLC_DEF _CLC_OVERLOAD void __clc_tan_piby4(__CLC_DOUBLEN x, __CLC_DOUBLEN xx,
+                                            private __CLC_DOUBLEN *leadval,
+                                            private __CLC_DOUBLEN *tailval) {
+  // 0x3fe921fb54442d18
+  const __CLC_DOUBLEN piby4_lead = 7.85398163397448278999e-01;
+  // 0x3c81a62633145c06
+  const __CLC_DOUBLEN piby4_tail = 3.06161699786838240164e-17;
+
+  // In order to maintain relative precision transform using the identity:
+  // tan(pi/4-x) = (1-tan(x))/(1+tan(x)) for arguments close to pi/4.
+  // Similarly use tan(x-pi/4) = (tan(x)-1)/(tan(x)+1) close to -pi/4.
+
+  __CLC_LONGN ca = x > 0.68;
+  __CLC_LONGN cb = x < -0.68;
+  __CLC_DOUBLEN transform = ca ? 1.0 : 0.0;
+  transform = cb ? -1.0 : transform;
+
+  __CLC_DOUBLEN tx = __clc_fma(-transform, x, piby4_lead) +
+                     __clc_fma(-transform, xx, piby4_tail);
+  __CLC_LONGN c = ca | cb;
+  x = c ? tx : x;
+  xx = c ? 0.0 : xx;
+
+  // Core Remez [2,3] approximation to tan(x+xx) on the interval [0,0.68].
+  __CLC_DOUBLEN t1 = x;
+  __CLC_DOUBLEN r = __clc_fma(2.0, x * xx, x * x);
+
+  __CLC_DOUBLEN a = __clc_fma(r,
+                              __clc_fma(r, 0.224044448537022097264602535574e-3,
+                                        -0.229345080057565662883358588111e-1),
+                              0.372379159759792203640806338901e0);
+
+  __CLC_DOUBLEN b =
+      __clc_fma(r,
+                __clc_fma(r,
+                          __clc_fma(r, -0.232371494088563558304549252913e-3,
+                                    0.260656620398645407524064091208e-1),
+                          -0.515658515729031149329237816945e0),
+                0.111713747927937668539901657944e1);
+
+  __CLC_DOUBLEN t2 = __clc_fma(MATH_DIVIDE(a, b), x * r, xx);
+
+  __CLC_DOUBLEN tp = t1 + t2;
+
+  // Compute -1.0/(t1 + t2) accurately
+  __CLC_DOUBLEN z1 =
+      __CLC_AS_GENTYPE(__CLC_AS_ULONGN(tp) & 0xffffffff00000000L);
+  __CLC_DOUBLEN z2 = t2 - (z1 - t1);
+  __CLC_DOUBLEN trec = -MATH_RECIP(tp);
+  __CLC_DOUBLEN trec_top =
+      __CLC_AS_GENTYPE(__CLC_AS_ULONGN(trec) & 0xffffffff00000000L);
+
+  __CLC_DOUBLEN tpr = __clc_fma(
+      __clc_fma(trec_top, z2, __clc_fma(trec_top, z1, 1.0)), trec, trec_top);
+
+  __CLC_DOUBLEN tpt = transform * (1.0 - MATH_DIVIDE(2.0 * tp, 1.0 + tp));
+  __CLC_DOUBLEN tptr = transform * (MATH_DIVIDE(2.0 * tp, tp - 1.0) - 1.0);
+
+  *leadval = c ? tpt : tp;
+  *tailval = c ? tptr : tpr;
+}
+
 // Reduction for medium sized arguments
 _CLC_DEF _CLC_OVERLOAD void
 __clc_remainder_piby2_medium(__CLC_DOUBLEN x, private __CLC_DOUBLEN *r,

diff  --git a/libclc/clc/lib/generic/math/clc_sinpi.cl b/libclc/clc/lib/generic/math/clc_sinpi.cl
index 6cff247707845..bb5de09f03c08 100644
--- a/libclc/clc/lib/generic/math/clc_sinpi.cl
+++ b/libclc/clc/lib/generic/math/clc_sinpi.cl
@@ -11,7 +11,6 @@
 #include <clc/internal/clc.h>
 #include <clc/math/clc_fabs.h>
 #include <clc/math/clc_sincos_helpers.h>
-#include <clc/math/clc_sincos_piby4.h>
 #include <clc/math/math.h>
 
 #define __CLC_BODY <clc_sinpi.inc>

diff  --git a/libclc/clc/lib/generic/math/clc_tan.cl b/libclc/clc/lib/generic/math/clc_tan.cl
index adf42c43d0484..7e68216ca43aa 100644
--- a/libclc/clc/lib/generic/math/clc_tan.cl
+++ b/libclc/clc/lib/generic/math/clc_tan.cl
@@ -11,7 +11,6 @@
 #include <clc/internal/clc.h>
 #include <clc/math/clc_fabs.h>
 #include <clc/math/clc_sincos_helpers.h>
-#include <clc/math/clc_sincos_piby4.h>
 #include <clc/math/math.h>
 #include <clc/math/tables.h>
 #include <clc/relational/clc_isinf.h>

diff  --git a/libclc/clc/lib/generic/math/clc_tanpi.cl b/libclc/clc/lib/generic/math/clc_tanpi.cl
index f1265892d107b..099457c186314 100644
--- a/libclc/clc/lib/generic/math/clc_tanpi.cl
+++ b/libclc/clc/lib/generic/math/clc_tanpi.cl
@@ -12,7 +12,6 @@
 #include <clc/math/clc_fabs.h>
 #include <clc/math/clc_native_recip.h>
 #include <clc/math/clc_sincos_helpers.h>
-#include <clc/math/clc_sincos_piby4.h>
 #include <clc/math/math.h>
 
 #define __CLC_BODY <clc_tanpi.inc>