[Libclc-dev] [PATCH 06/15] Implement atan2 for doubles
Tom Stellard
thomas.stellard at amd.com
Tue Apr 7 11:05:37 PDT 2015
This implementation was ported from the AMD builtin library
and has been tested with piglit, OpenCV, and the ocl conformance tests.
---
generic/lib/math/atan2.cl | 160 +++++++++++++++++++++++++++-
generic/lib/math/tables.cl | 253 +++++++++++++++++++++++++++++++++++++++++++++
generic/lib/math/tables.h | 1 +
3 files changed, 412 insertions(+), 2 deletions(-)
diff --git a/generic/lib/math/atan2.cl b/generic/lib/math/atan2.cl
index 9e5fb58..a2f104f 100644
--- a/generic/lib/math/atan2.cl
+++ b/generic/lib/math/atan2.cl
@@ -20,11 +20,12 @@
* THE SOFTWARE.
*/
+#include <clc/clc.h>
+
#include "math.h"
+#include "tables.h"
#include "../clcmacro.h"
-#include <clc/clc.h>
-
_CLC_OVERLOAD _CLC_DEF float atan2(float y, float x)
{
const float pi = 0x1.921fb6p+1f;
@@ -79,3 +80,158 @@ _CLC_OVERLOAD _CLC_DEF float atan2(float y, float x)
}
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, atan2, float, float);
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_OVERLOAD _CLC_DEF double atan2(double y, double x)
+{
+ const double pi = 3.1415926535897932e+00; /* 0x400921fb54442d18 */
+ const double piby2 = 1.5707963267948966e+00; /* 0x3ff921fb54442d18 */
+ const double piby4 = 7.8539816339744831e-01; /* 0x3fe921fb54442d18 */
+ const double three_piby4 = 2.3561944901923449e+00; /* 0x4002d97c7f3321d2 */
+ const double pi_head = 3.1415926218032836e+00; /* 0x400921fb50000000 */
+ const double pi_tail = 3.1786509547056392e-08; /* 0x3e6110b4611a6263 */
+ const double piby2_head = 1.5707963267948965e+00; /* 0x3ff921fb54442d18 */
+ const double piby2_tail = 6.1232339957367660e-17; /* 0x3c91a62633145c07 */
+
+ double x2 = x;
+ int xneg = as_int2(x).hi < 0;
+ int xexp = (as_int2(x).hi >> 20) & 0x7ff;
+
+ double y2 = y;
+ int yneg = as_int2(y).hi < 0;
+ int yexp = (as_int2(y).hi >> 20) & 0x7ff;
+
+ int cond2 = (xexp < 1021) & (yexp < 1021);
+ int diffexp = yexp - xexp;
+
+ // Scale up both x and y if they are both below 1/4
+ double x1 = ldexp(x, 1024);
+ int xexp1 = (as_int2(x1).hi >> 20) & 0x7ff;
+ double y1 = ldexp(y, 1024);
+ int yexp1 = (as_int2(y1).hi >> 20) & 0x7ff;
+ int diffexp1 = yexp1 - xexp1;
+
+ diffexp = cond2 ? diffexp1 : diffexp;
+ x = cond2 ? x1 : x;
+ y = cond2 ? y1 : y;
+
+ // General case: take absolute values of arguments
+ double u = fabs(x);
+ double v = fabs(y);
+
+ // Swap u and v if necessary to obtain 0 < v < u. Compute v/u.
+ int swap_vu = u < v;
+ double uu = u;
+ u = swap_vu ? v : u;
+ v = swap_vu ? uu : v;
+
+ double vbyu = v / u;
+ double q1, q2;
+
+ // General values of v/u. Use a look-up table and series expansion.
+
+ {
+ double val = vbyu > 0.0625 ? vbyu : 0.063;
+ int index = convert_int(fma(256.0, val, 0.5));
+ double2 tv = USE_TABLE(atan_jby256_tbl, index - 16);
+ q1 = tv.s0;
+ q2 = tv.s1;
+ double c = (double)index * 0x1.0p-8;
+
+ // We're going to scale u and v by 2^(-u_exponent) to bring them close to 1
+ // u_exponent could be EMAX so we have to do it in 2 steps
+ int m = -((int)(as_ulong(u) >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64);
+ //double um = __amdil_ldexp_f64(u, m);
+ //double vm = __amdil_ldexp_f64(v, m);
+ double um = ldexp(u, m);
+ double vm = ldexp(v, m);
+
+ // 26 leading bits of u
+ double u1 = as_double(as_ulong(um) & 0xfffffffff8000000UL);
+ double u2 = um - u1;
+
+ double r = MATH_DIVIDE(fma(-c, u2, fma(-c, u1, vm)), fma(c, vm, um));
+
+ // Polynomial approximation to atan(r)
+ double s = r * r;
+ q2 = q2 + fma((s * fma(-s, 0.19999918038989143496, 0.33333333333224095522)), -r, r);
+ }
+
+
+ double q3, q4;
+ {
+ q3 = 0.0;
+ q4 = vbyu;
+ }
+
+ double q5, q6;
+ {
+ double u1 = as_double(as_ulong(u) & 0xffffffff00000000UL);
+ double u2 = u - u1;
+ double vu1 = as_double(as_ulong(vbyu) & 0xffffffff00000000UL);
+ double vu2 = vbyu - vu1;
+
+ q5 = 0.0;
+ double s = vbyu * vbyu;
+ q6 = vbyu + fma(-vbyu * s,
+ fma(-s,
+ fma(-s,
+ fma(-s,
+ fma(-s, 0.90029810285449784439E-01,
+ 0.11110736283514525407),
+ 0.14285713561807169030),
+ 0.19999999999393223405),
+ 0.33333333333333170500),
+ MATH_DIVIDE(fma(-u, vu2, fma(-u2, vu1, fma(-u1, vu1, v))), u));
+ }
+
+
+ q3 = vbyu < 0x1.d12ed0af1a27fp-27 ? q3 : q5;
+ q4 = vbyu < 0x1.d12ed0af1a27fp-27 ? q4 : q6;
+
+ q1 = vbyu > 0.0625 ? q1 : q3;
+ q2 = vbyu > 0.0625 ? q2 : q4;
+
+ // Tidy-up according to which quadrant the arguments lie in
+ double res1, res2, res3, res4;
+ q1 = swap_vu ? piby2_head - q1 : q1;
+ q2 = swap_vu ? piby2_tail - q2 : q2;
+ q1 = xneg ? pi_head - q1 : q1;
+ q2 = xneg ? pi_tail - q2 : q2;
+ q1 = q1 + q2;
+ res4 = yneg ? -q1 : q1;
+
+ res1 = yneg ? -three_piby4 : three_piby4;
+ res2 = yneg ? -piby4 : piby4;
+ res3 = xneg ? res1 : res2;
+
+ res3 = isinf(x2) & isinf(y2) ? res3 : res4;
+ res1 = yneg ? -pi : pi;
+
+ // abs(x)/abs(y) > 2^56 and x < 0
+ res3 = (diffexp < -56 && xneg) ? res1 : res3;
+
+ res4 = MATH_DIVIDE(y, x);
+ // x positive and dominant over y by a factor of 2^28
+ res3 = diffexp < -28 & xneg == 0 ? res4 : res3;
+
+ // abs(y)/abs(x) > 2^56
+ res4 = yneg ? -piby2 : piby2; // atan(y/x) is insignificant compared to piby2
+ res3 = diffexp > 56 ? res4 : res3;
+
+ res3 = x2 == 0.0 ? res4 : res3; // Zero x gives +- pi/2 depending on sign of y
+ res4 = xneg ? res1 : y2;
+
+ res3 = y2 == 0.0 ? res4 : res3; // Zero y gives +-0 for positive x and +-pi for negative x
+ res3 = isnan(y2) ? y2 : res3;
+ res3 = isnan(x2) ? x2 : res3;
+
+ return res3;
+}
+
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, atan2, double, double);
+
+#endif
diff --git a/generic/lib/math/tables.cl b/generic/lib/math/tables.cl
index b5345a2..f22a6af 100644
--- a/generic/lib/math/tables.cl
+++ b/generic/lib/math/tables.cl
@@ -363,4 +363,257 @@ DECLARE_TABLE(double2, LN_TBL, 65) = {
TABLE_FUNCTION(double2, LN_TBL, ln_tbl);
+
+// Arrays atan_jby256_lead and atan_jby256_tail contain
+// leading and trailing parts respectively of precomputed
+// values of atan(j/256), for j = 16, 17, ..., 256.
+// atan_jby256_lead contains the first 21 bits of precision,
+// and atan_jby256_tail contains a further 53 bits precision.
+
+DECLARE_TABLE(double2, ATAN_JBY256_TBL, 241) = {
+ (double2)(0x1.ff55b00000000p-5, 0x1.6e59fbd38db2cp-26),
+ (double2)(0x1.0f99e00000000p-4, 0x1.4e3aa54dedf96p-25),
+ (double2)(0x1.1f86d00000000p-4, 0x1.7e105ab1bda88p-25),
+ (double2)(0x1.2f71900000000p-4, 0x1.8c5254d013fd0p-27),
+ (double2)(0x1.3f59f00000000p-4, 0x1.cf8ab3ad62670p-29),
+ (double2)(0x1.4f3fd00000000p-4, 0x1.9dca4bec80468p-26),
+ (double2)(0x1.5f23200000000p-4, 0x1.3f4b5ec98a8dap-26),
+ (double2)(0x1.6f03b00000000p-4, 0x1.b9d49619d81fep-25),
+ (double2)(0x1.7ee1800000000p-4, 0x1.3017887460934p-27),
+ (double2)(0x1.8ebc500000000p-4, 0x1.11e3eca0b9944p-26),
+ (double2)(0x1.9e94100000000p-4, 0x1.4f3f73c5a332ep-26),
+ (double2)(0x1.ae68a00000000p-4, 0x1.c71c8ae0e00a6p-26),
+ (double2)(0x1.be39e00000000p-4, 0x1.7cde0f86fbdc7p-25),
+ (double2)(0x1.ce07c00000000p-4, 0x1.70f328c889c72p-26),
+ (double2)(0x1.ddd2100000000p-4, 0x1.c07ae9b994efep-26),
+ (double2)(0x1.ed98c00000000p-4, 0x1.0c8021d7b1698p-27),
+ (double2)(0x1.fd5ba00000000p-4, 0x1.35585edb8cb22p-25),
+ (double2)(0x1.068d500000000p-3, 0x1.0842567b30e96p-24),
+ (double2)(0x1.0e6ad00000000p-3, 0x1.99e811031472ep-24),
+ (double2)(0x1.1646500000000p-3, 0x1.041821416bceep-25),
+ (double2)(0x1.1e1fa00000000p-3, 0x1.f6086e4dc96f4p-24),
+ (double2)(0x1.25f6e00000000p-3, 0x1.71a535c5f1b58p-27),
+ (double2)(0x1.2dcbd00000000p-3, 0x1.65f743fe63ca1p-24),
+ (double2)(0x1.359e800000000p-3, 0x1.dbd733472d014p-24),
+ (double2)(0x1.3d6ee00000000p-3, 0x1.d18cc4d8b0d1dp-24),
+ (double2)(0x1.453ce00000000p-3, 0x1.8c12553c8fb29p-24),
+ (double2)(0x1.4d08700000000p-3, 0x1.53b49e2e8f991p-24),
+ (double2)(0x1.54d1800000000p-3, 0x1.7422ae148c141p-24),
+ (double2)(0x1.5c98100000000p-3, 0x1.e3ec269df56a8p-27),
+ (double2)(0x1.645bf00000000p-3, 0x1.ff6754e7e0ac9p-24),
+ (double2)(0x1.6c1d400000000p-3, 0x1.131267b1b5aadp-24),
+ (double2)(0x1.73dbd00000000p-3, 0x1.d14fa403a94bcp-24),
+ (double2)(0x1.7b97b00000000p-3, 0x1.2f396c089a3d8p-25),
+ (double2)(0x1.8350b00000000p-3, 0x1.c731d78fa95bbp-24),
+ (double2)(0x1.8b06e00000000p-3, 0x1.c50f385177399p-24),
+ (double2)(0x1.92ba300000000p-3, 0x1.f41409c6f2c20p-25),
+ (double2)(0x1.9a6a800000000p-3, 0x1.d2d90c4c39ec0p-24),
+ (double2)(0x1.a217e00000000p-3, 0x1.80420696f2106p-25),
+ (double2)(0x1.a9c2300000000p-3, 0x1.b40327943a2e8p-27),
+ (double2)(0x1.b169600000000p-3, 0x1.5d35e02f3d2a2p-25),
+ (double2)(0x1.b90d700000000p-3, 0x1.4a498288117b0p-25),
+ (double2)(0x1.c0ae500000000p-3, 0x1.35da119afb324p-25),
+ (double2)(0x1.c84bf00000000p-3, 0x1.14e85cdb9a908p-24),
+ (double2)(0x1.cfe6500000000p-3, 0x1.38754e5547b9ap-25),
+ (double2)(0x1.d77d500000000p-3, 0x1.be40ae6ce3246p-24),
+ (double2)(0x1.df11000000000p-3, 0x1.0c993b3bea7e7p-24),
+ (double2)(0x1.e6a1400000000p-3, 0x1.1d2dd89ac3359p-24),
+ (double2)(0x1.ee2e100000000p-3, 0x1.1476603332c46p-25),
+ (double2)(0x1.f5b7500000000p-3, 0x1.f25901bac55b7p-24),
+ (double2)(0x1.fd3d100000000p-3, 0x1.f881b7c826e28p-24),
+ (double2)(0x1.025fa00000000p-2, 0x1.441996d698d20p-24),
+ (double2)(0x1.061ee00000000p-2, 0x1.407ac521ea089p-23),
+ (double2)(0x1.09dc500000000p-2, 0x1.2fb0c6c4b1723p-23),
+ (double2)(0x1.0d97e00000000p-2, 0x1.ca135966a3e18p-23),
+ (double2)(0x1.1151a00000000p-2, 0x1.b1218e4d646e4p-25),
+ (double2)(0x1.1509700000000p-2, 0x1.d4e72a350d288p-25),
+ (double2)(0x1.18bf500000000p-2, 0x1.4617e2f04c329p-23),
+ (double2)(0x1.1c73500000000p-2, 0x1.096ec41e82650p-25),
+ (double2)(0x1.2025500000000p-2, 0x1.9f91f25773e6ep-24),
+ (double2)(0x1.23d5600000000p-2, 0x1.59c0820f1d674p-25),
+ (double2)(0x1.2783700000000p-2, 0x1.02bf7a2df1064p-25),
+ (double2)(0x1.2b2f700000000p-2, 0x1.fb36bfc40508fp-23),
+ (double2)(0x1.2ed9800000000p-2, 0x1.ea08f3f8dc892p-24),
+ (double2)(0x1.3281800000000p-2, 0x1.3ed6254656a0ep-24),
+ (double2)(0x1.3627700000000p-2, 0x1.b83f5e5e69c58p-25),
+ (double2)(0x1.39cb400000000p-2, 0x1.d6ec2af768592p-23),
+ (double2)(0x1.3d6d100000000p-2, 0x1.493889a226f94p-25),
+ (double2)(0x1.410cb00000000p-2, 0x1.5ad8fa65279bap-23),
+ (double2)(0x1.44aa400000000p-2, 0x1.b615784d45434p-25),
+ (double2)(0x1.4845a00000000p-2, 0x1.09a184368f145p-23),
+ (double2)(0x1.4bdee00000000p-2, 0x1.61a2439b0d91cp-24),
+ (double2)(0x1.4f75f00000000p-2, 0x1.ce1a65e39a978p-24),
+ (double2)(0x1.530ad00000000p-2, 0x1.32a39a93b6a66p-23),
+ (double2)(0x1.569d800000000p-2, 0x1.1c3699af804e7p-23),
+ (double2)(0x1.5a2e000000000p-2, 0x1.75e0f4e44ede8p-26),
+ (double2)(0x1.5dbc300000000p-2, 0x1.f77ced1a7a83bp-23),
+ (double2)(0x1.6148400000000p-2, 0x1.84e7f0cb1b500p-29),
+ (double2)(0x1.64d1f00000000p-2, 0x1.ec6b838b02dfep-23),
+ (double2)(0x1.6859700000000p-2, 0x1.3ebf4dfbeda87p-23),
+ (double2)(0x1.6bdea00000000p-2, 0x1.9397aed9cb475p-23),
+ (double2)(0x1.6f61900000000p-2, 0x1.07937bc239c54p-24),
+ (double2)(0x1.72e2200000000p-2, 0x1.aa754553131b6p-23),
+ (double2)(0x1.7660700000000p-2, 0x1.4a05d407c45dcp-24),
+ (double2)(0x1.79dc600000000p-2, 0x1.132231a206dd0p-23),
+ (double2)(0x1.7d56000000000p-2, 0x1.2d8ecfdd69c88p-24),
+ (double2)(0x1.80cd400000000p-2, 0x1.a852c74218606p-24),
+ (double2)(0x1.8442200000000p-2, 0x1.71bf2baeebb50p-23),
+ (double2)(0x1.87b4b00000000p-2, 0x1.83d7db7491820p-27),
+ (double2)(0x1.8b24d00000000p-2, 0x1.ca50d92b6da14p-25),
+ (double2)(0x1.8e92900000000p-2, 0x1.6f5cde8530298p-26),
+ (double2)(0x1.91fde00000000p-2, 0x1.f343198910740p-24),
+ (double2)(0x1.9566d00000000p-2, 0x1.0e8d241ccd80ap-24),
+ (double2)(0x1.98cd500000000p-2, 0x1.1535ac619e6c8p-24),
+ (double2)(0x1.9c31600000000p-2, 0x1.7316041c36cd2p-24),
+ (double2)(0x1.9f93000000000p-2, 0x1.985a000637d8ep-24),
+ (double2)(0x1.a2f2300000000p-2, 0x1.f2f29858c0a68p-25),
+ (double2)(0x1.a64ee00000000p-2, 0x1.879847f96d909p-23),
+ (double2)(0x1.a9a9200000000p-2, 0x1.ab3d319e12e42p-23),
+ (double2)(0x1.ad00f00000000p-2, 0x1.5088162dfc4c2p-24),
+ (double2)(0x1.b056400000000p-2, 0x1.05749a1cd9d8cp-25),
+ (double2)(0x1.b3a9100000000p-2, 0x1.da65c6c6b8618p-26),
+ (double2)(0x1.b6f9600000000p-2, 0x1.739bf7df1ad64p-25),
+ (double2)(0x1.ba47300000000p-2, 0x1.bc31252aa3340p-25),
+ (double2)(0x1.bd92800000000p-2, 0x1.e528191ad3aa8p-26),
+ (double2)(0x1.c0db400000000p-2, 0x1.929d93df19f18p-23),
+ (double2)(0x1.c421900000000p-2, 0x1.ff11eb693a080p-26),
+ (double2)(0x1.c765500000000p-2, 0x1.55ae3f145a3a0p-27),
+ (double2)(0x1.caa6800000000p-2, 0x1.cbcd8c6c0ca82p-24),
+ (double2)(0x1.cde5300000000p-2, 0x1.0cb04d425d304p-24),
+ (double2)(0x1.d121500000000p-2, 0x1.9adfcab5be678p-24),
+ (double2)(0x1.d45ae00000000p-2, 0x1.93d90c5662508p-23),
+ (double2)(0x1.d791f00000000p-2, 0x1.68489bd35ff40p-24),
+ (double2)(0x1.dac6700000000p-2, 0x1.586ed3da2b7e0p-28),
+ (double2)(0x1.ddf8500000000p-2, 0x1.7604d2e850eeep-23),
+ (double2)(0x1.e127b00000000p-2, 0x1.ac1d12bfb53d8p-24),
+ (double2)(0x1.e454800000000p-2, 0x1.9b3d468274740p-28),
+ (double2)(0x1.e77eb00000000p-2, 0x1.fc5d68d10e53cp-24),
+ (double2)(0x1.eaa6500000000p-2, 0x1.8f9e51884becbp-23),
+ (double2)(0x1.edcb600000000p-2, 0x1.a87f0869c06d1p-23),
+ (double2)(0x1.f0ede00000000p-2, 0x1.31e7279f685fap-23),
+ (double2)(0x1.f40dd00000000p-2, 0x1.6a8282f9719b0p-27),
+ (double2)(0x1.f72b200000000p-2, 0x1.0d2724a8a44e0p-25),
+ (double2)(0x1.fa45d00000000p-2, 0x1.a60524b11ad4ep-23),
+ (double2)(0x1.fd5e000000000p-2, 0x1.75fdf832750f0p-26),
+ (double2)(0x1.0039c00000000p-1, 0x1.cf06902e4cd36p-23),
+ (double2)(0x1.01c3400000000p-1, 0x1.e82422d4f6d10p-25),
+ (double2)(0x1.034b700000000p-1, 0x1.24a091063e6c0p-26),
+ (double2)(0x1.04d2500000000p-1, 0x1.8a1a172dc6f38p-24),
+ (double2)(0x1.0657e00000000p-1, 0x1.29b6619f8a92dp-22),
+ (double2)(0x1.07dc300000000p-1, 0x1.9274d9c1b70c8p-24),
+ (double2)(0x1.095f300000000p-1, 0x1.0c34b1fbb7930p-26),
+ (double2)(0x1.0ae0e00000000p-1, 0x1.639866c20eb50p-25),
+ (double2)(0x1.0c61400000000p-1, 0x1.6d6d0f6832e9ep-23),
+ (double2)(0x1.0de0500000000p-1, 0x1.af54def99f25ep-22),
+ (double2)(0x1.0f5e200000000p-1, 0x1.16cfc52a00262p-22),
+ (double2)(0x1.10daa00000000p-1, 0x1.dcc1e83569c32p-23),
+ (double2)(0x1.1255d00000000p-1, 0x1.37f7a551ed425p-22),
+ (double2)(0x1.13cfb00000000p-1, 0x1.f6360adc98887p-22),
+ (double2)(0x1.1548500000000p-1, 0x1.2c6ec8d35a2c1p-22),
+ (double2)(0x1.16bfa00000000p-1, 0x1.bd44df84cb036p-23),
+ (double2)(0x1.1835a00000000p-1, 0x1.117cf826e310ep-22),
+ (double2)(0x1.19aa500000000p-1, 0x1.ca533f332cfc9p-22),
+ (double2)(0x1.1b1dc00000000p-1, 0x1.0f208509dbc2ep-22),
+ (double2)(0x1.1c8fe00000000p-1, 0x1.cd07d93c945dep-23),
+ (double2)(0x1.1e00b00000000p-1, 0x1.57bdfd67e6d72p-22),
+ (double2)(0x1.1f70400000000p-1, 0x1.aab89c516c658p-24),
+ (double2)(0x1.20de800000000p-1, 0x1.3e823b1a1b8a0p-25),
+ (double2)(0x1.224b700000000p-1, 0x1.307464a9d6d3cp-23),
+ (double2)(0x1.23b7100000000p-1, 0x1.c5993cd438843p-22),
+ (double2)(0x1.2521700000000p-1, 0x1.ba2fca02ab554p-22),
+ (double2)(0x1.268a900000000p-1, 0x1.01a5b6983a268p-23),
+ (double2)(0x1.27f2600000000p-1, 0x1.273d1b350efc8p-25),
+ (double2)(0x1.2958e00000000p-1, 0x1.64c238c37b0c6p-23),
+ (double2)(0x1.2abe200000000p-1, 0x1.aded07370a300p-25),
+ (double2)(0x1.2c22100000000p-1, 0x1.78091197eb47ep-23),
+ (double2)(0x1.2d84c00000000p-1, 0x1.4b0f245e0dabcp-24),
+ (double2)(0x1.2ee6200000000p-1, 0x1.080d9794e2eafp-22),
+ (double2)(0x1.3046400000000p-1, 0x1.d4ec242b60c76p-23),
+ (double2)(0x1.31a5200000000p-1, 0x1.221d2f940caa0p-27),
+ (double2)(0x1.3302b00000000p-1, 0x1.cdbc42b2bba5cp-24),
+ (double2)(0x1.345f000000000p-1, 0x1.cce37bb440840p-25),
+ (double2)(0x1.35ba000000000p-1, 0x1.6c1d999cf1dd0p-22),
+ (double2)(0x1.3713d00000000p-1, 0x1.bed8a07eb0870p-26),
+ (double2)(0x1.386c500000000p-1, 0x1.69ed88f490e3cp-24),
+ (double2)(0x1.39c3900000000p-1, 0x1.cd41719b73ef0p-25),
+ (double2)(0x1.3b19800000000p-1, 0x1.cbc4ac95b41b7p-22),
+ (double2)(0x1.3c6e400000000p-1, 0x1.238f1b890f5d7p-22),
+ (double2)(0x1.3dc1c00000000p-1, 0x1.50c4282259cc4p-24),
+ (double2)(0x1.3f13f00000000p-1, 0x1.713d2de87b3e2p-22),
+ (double2)(0x1.4064f00000000p-1, 0x1.1d5a7d2255276p-23),
+ (double2)(0x1.41b4a00000000p-1, 0x1.c0dfd48227ac1p-22),
+ (double2)(0x1.4303200000000p-1, 0x1.1c964dab76753p-22),
+ (double2)(0x1.4450600000000p-1, 0x1.6de56d5704496p-23),
+ (double2)(0x1.459c600000000p-1, 0x1.4aeb71fd19968p-23),
+ (double2)(0x1.46e7200000000p-1, 0x1.fbf91c57b1918p-23),
+ (double2)(0x1.4830a00000000p-1, 0x1.d6bef7fbe5d9ap-22),
+ (double2)(0x1.4978f00000000p-1, 0x1.464d3dc249066p-22),
+ (double2)(0x1.4ac0000000000p-1, 0x1.638e2ec4d9073p-22),
+ (double2)(0x1.4c05e00000000p-1, 0x1.16f4a7247ea7cp-24),
+ (double2)(0x1.4d4a800000000p-1, 0x1.1a0a740f1d440p-28),
+ (double2)(0x1.4e8de00000000p-1, 0x1.6edbb0114a33cp-23),
+ (double2)(0x1.4fd0100000000p-1, 0x1.dbee8bf1d513cp-24),
+ (double2)(0x1.5111000000000p-1, 0x1.5b8bdb0248f73p-22),
+ (double2)(0x1.5250c00000000p-1, 0x1.7de3d3f5eac64p-22),
+ (double2)(0x1.538f500000000p-1, 0x1.ee24187ae448ap-23),
+ (double2)(0x1.54cca00000000p-1, 0x1.e06c591ec5192p-22),
+ (double2)(0x1.5608d00000000p-1, 0x1.4e3861a332738p-24),
+ (double2)(0x1.5743c00000000p-1, 0x1.a9599dcc2bfe4p-24),
+ (double2)(0x1.587d800000000p-1, 0x1.f732fbad43468p-25),
+ (double2)(0x1.59b6000000000p-1, 0x1.eb9f573b727d9p-22),
+ (double2)(0x1.5aed600000000p-1, 0x1.8b212a2eb9897p-22),
+ (double2)(0x1.5c23900000000p-1, 0x1.384884c167215p-22),
+ (double2)(0x1.5d58900000000p-1, 0x1.0e2d363020051p-22),
+ (double2)(0x1.5e8c600000000p-1, 0x1.2820879fbd022p-22),
+ (double2)(0x1.5fbf000000000p-1, 0x1.a1ab9893e4b30p-22),
+ (double2)(0x1.60f0800000000p-1, 0x1.2d1b817a24478p-23),
+ (double2)(0x1.6220d00000000p-1, 0x1.15d7b8ded4878p-25),
+ (double2)(0x1.634ff00000000p-1, 0x1.8968f9db3a5e4p-24),
+ (double2)(0x1.647de00000000p-1, 0x1.71c4171fe135fp-22),
+ (double2)(0x1.65aab00000000p-1, 0x1.6d80f605d0d8cp-22),
+ (double2)(0x1.66d6600000000p-1, 0x1.c91f043691590p-24),
+ (double2)(0x1.6800e00000000p-1, 0x1.39f8a15fce2b2p-23),
+ (double2)(0x1.692a400000000p-1, 0x1.55beda9d94b80p-27),
+ (double2)(0x1.6a52700000000p-1, 0x1.b12c15d60949ap-23),
+ (double2)(0x1.6b79800000000p-1, 0x1.24167b312bfe3p-22),
+ (double2)(0x1.6c9f700000000p-1, 0x1.0ab8633070277p-22),
+ (double2)(0x1.6dc4400000000p-1, 0x1.54554ebbc80eep-23),
+ (double2)(0x1.6ee7f00000000p-1, 0x1.0204aef5a4bb8p-25),
+ (double2)(0x1.700a700000000p-1, 0x1.8af08c679cf2cp-22),
+ (double2)(0x1.712be00000000p-1, 0x1.0852a330ae6c8p-22),
+ (double2)(0x1.724c300000000p-1, 0x1.6d3eb9ec32916p-23),
+ (double2)(0x1.736b600000000p-1, 0x1.685cb7fcbbafep-23),
+ (double2)(0x1.7489700000000p-1, 0x1.1f751c1e0bd95p-22),
+ (double2)(0x1.75a6700000000p-1, 0x1.705b1b0f72560p-26),
+ (double2)(0x1.76c2400000000p-1, 0x1.b98d8d808ca92p-22),
+ (double2)(0x1.77dd100000000p-1, 0x1.2ea22c75cc980p-25),
+ (double2)(0x1.78f6b00000000p-1, 0x1.7aba62bca0350p-22),
+ (double2)(0x1.7a0f400000000p-1, 0x1.d73833442278cp-22),
+ (double2)(0x1.7b26c00000000p-1, 0x1.5a5ca1fb18bf9p-22),
+ (double2)(0x1.7c3d300000000p-1, 0x1.1a6092b6ecf28p-25),
+ (double2)(0x1.7d52800000000p-1, 0x1.44fd049aac104p-24),
+ (double2)(0x1.7e66c00000000p-1, 0x1.c114fd8df5180p-29),
+ (double2)(0x1.7f79e00000000p-1, 0x1.5972f130feae5p-22),
+ (double2)(0x1.808c000000000p-1, 0x1.ca034a55fe198p-24),
+ (double2)(0x1.819d000000000p-1, 0x1.6e2b149990227p-22),
+ (double2)(0x1.82ad000000000p-1, 0x1.b00000294592cp-24),
+ (double2)(0x1.83bbe00000000p-1, 0x1.8b9bdc442620ep-22),
+ (double2)(0x1.84c9c00000000p-1, 0x1.d94fdfabf3e4ep-23),
+ (double2)(0x1.85d6900000000p-1, 0x1.5db30b145ad9ap-23),
+ (double2)(0x1.86e2500000000p-1, 0x1.e3e1eb95022b0p-23),
+ (double2)(0x1.87ed000000000p-1, 0x1.d5b8b45442bd6p-22),
+ (double2)(0x1.88f6b00000000p-1, 0x1.7a046231ecd2ep-22),
+ (double2)(0x1.89ff500000000p-1, 0x1.feafe3ef55232p-22),
+ (double2)(0x1.8b06f00000000p-1, 0x1.839e7bfd78267p-22),
+ (double2)(0x1.8c0d900000000p-1, 0x1.45cf49d6fa900p-25),
+ (double2)(0x1.8d13200000000p-1, 0x1.be3132b27f380p-27),
+ (double2)(0x1.8e17a00000000p-1, 0x1.533980bb84f9fp-22),
+ (double2)(0x1.8f1b300000000p-1, 0x1.889e2ce3ba390p-26),
+ (double2)(0x1.901db00000000p-1, 0x1.f7778c3ad0cc8p-24),
+ (double2)(0x1.911f300000000p-1, 0x1.46660cec4eba2p-23),
+ (double2)(0x1.921fb00000000p-1, 0x1.5110b4611a626p-23),
+};
+
+TABLE_FUNCTION(double2, ATAN_JBY256_TBL, atan_jby256_tbl);
+
#endif // cl_khr_fp64
diff --git a/generic/lib/math/tables.h b/generic/lib/math/tables.h
index 9255440..1e82901 100644
--- a/generic/lib/math/tables.h
+++ b/generic/lib/math/tables.h
@@ -46,5 +46,6 @@ TABLE_FUNCTION_DECL(float, log_inv_tbl);
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
TABLE_FUNCTION_DECL(double2, ln_tbl);
+TABLE_FUNCTION_DECL(double2, atan_jby256_tbl);
#endif // cl_khr_fp64
--
2.0.4
More information about the Libclc-dev
mailing list