[libclc] r219230 - Implement log1p builtin
Tom Stellard
thomas.stellard at amd.com
Tue Oct 7 13:22:42 PDT 2014
Author: tstellar
Date: Tue Oct 7 15:22:42 2014
New Revision: 219230
URL: http://llvm.org/viewvc/llvm-project?rev=219230&view=rev
Log:
Implement log1p builtin
Added:
libclc/trunk/generic/include/clc/math/log1p.h
libclc/trunk/generic/include/clc/math/log1p.inc
libclc/trunk/generic/lib/math/log1p.cl
libclc/trunk/generic/lib/math/tables.cl
libclc/trunk/generic/lib/math/tables.h
- copied, changed from r219087, libclc/trunk/generic/lib/math/math.h
Modified:
libclc/trunk/generic/include/clc/clc.h
libclc/trunk/generic/lib/SOURCES
libclc/trunk/generic/lib/math/math.h
Modified: libclc/trunk/generic/include/clc/clc.h
URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/generic/include/clc/clc.h?rev=219230&r1=219229&r2=219230&view=diff
==============================================================================
--- libclc/trunk/generic/include/clc/clc.h (original)
+++ libclc/trunk/generic/include/clc/clc.h Tue Oct 7 15:22:42 2014
@@ -50,6 +50,7 @@
#include <clc/math/fmod.h>
#include <clc/math/hypot.h>
#include <clc/math/log.h>
+#include <clc/math/log1p.h>
#include <clc/math/log2.h>
#include <clc/math/mad.h>
#include <clc/math/mix.h>
Added: libclc/trunk/generic/include/clc/math/log1p.h
URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/generic/include/clc/math/log1p.h?rev=219230&view=auto
==============================================================================
--- libclc/trunk/generic/include/clc/math/log1p.h (added)
+++ libclc/trunk/generic/include/clc/math/log1p.h Tue Oct 7 15:22:42 2014
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#define __CLC_BODY <clc/math/log1p.inc>
+#include <clc/math/gentype.inc>
Added: libclc/trunk/generic/include/clc/math/log1p.inc
URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/generic/include/clc/math/log1p.inc?rev=219230&view=auto
==============================================================================
--- libclc/trunk/generic/include/clc/math/log1p.inc (added)
+++ libclc/trunk/generic/include/clc/math/log1p.inc Tue Oct 7 15:22:42 2014
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE log1p(__CLC_GENTYPE a);
Modified: libclc/trunk/generic/lib/SOURCES
URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/generic/lib/SOURCES?rev=219230&r1=219229&r2=219230&view=diff
==============================================================================
--- libclc/trunk/generic/lib/SOURCES (original)
+++ libclc/trunk/generic/lib/SOURCES Tue Oct 7 15:22:42 2014
@@ -63,8 +63,10 @@ math/fmax.cl
math/fmin.cl
math/fmod.cl
math/hypot.cl
+math/log1p.cl
math/mad.cl
math/mix.cl
+math/tables.cl
math/clc_nextafter.cl
math/nextafter.cl
math/pown.cl
Added: libclc/trunk/generic/lib/math/log1p.cl
URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/generic/lib/math/log1p.cl?rev=219230&view=auto
==============================================================================
--- libclc/trunk/generic/lib/math/log1p.cl (added)
+++ libclc/trunk/generic/lib/math/log1p.cl Tue Oct 7 15:22:42 2014
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "math.h"
+#include "tables.h"
+#include "../clcmacro.h"
+
+_CLC_OVERLOAD _CLC_DEF float log1p(float x)
+{
+ float w = x;
+ uint ux = as_uint(x);
+ uint ax = ux & EXSIGNBIT_SP32;
+
+ // |x| < 2^-4
+ float u2 = MATH_DIVIDE(x, 2.0f + x);
+ float u = u2 + u2;
+ float v = u * u;
+ // 2/(5 * 2^5), 2/(3 * 2^3)
+ float zsmall = mad(-u2, x, mad(v, 0x1.99999ap-7f, 0x1.555556p-4f) * v * u) + x;
+
+ // |x| >= 2^-4
+ ux = as_uint(x + 1.0f);
+
+ int m = (int)((ux >> EXPSHIFTBITS_SP32) & 0xff) - EXPBIAS_SP32;
+ float mf = (float)m;
+ uint indx = (ux & 0x007f0000) + ((ux & 0x00008000) << 1);
+ float F = as_float(indx | 0x3f000000);
+
+ // x > 2^24
+ float fg24 = F - as_float(0x3f000000 | (ux & MANTBITS_SP32));
+
+ // x <= 2^24
+ uint xhi = ux & 0xffff8000;
+ float xh = as_float(xhi);
+ float xt = (1.0f - xh) + w;
+ uint xnm = ((~(xhi & 0x7f800000)) - 0x00800000) & 0x7f800000;
+ xt = xt * as_float(xnm) * 0.5f;
+ float fl24 = F - as_float(0x3f000000 | (xhi & MANTBITS_SP32)) - xt;
+
+ float f = mf > 24.0f ? fg24 : fl24;
+
+ indx = indx >> 16;
+ float r = f * USE_TABLE(log_inv_tbl, indx);
+
+ // 1/3, 1/2
+ float poly = mad(mad(r, 0x1.555556p-2f, 0x1.0p-1f), r*r, r);
+
+ const float LOG2_HEAD = 0x1.62e000p-1f; // 0.693115234
+ const float LOG2_TAIL = 0x1.0bfbe8p-15f; // 0.0000319461833
+
+ float2 tv = USE_TABLE(loge_tbl, indx);
+ float z1 = mad(mf, LOG2_HEAD, tv.s0);
+ float z2 = mad(mf, LOG2_TAIL, -poly) + tv.s1;
+ float z = z1 + z2;
+
+ z = ax < 0x3d800000U ? zsmall : z;
+
+
+
+ // Edge cases
+ z = ax >= PINFBITPATT_SP32 ? w : z;
+ z = w < -1.0f ? as_float(QNANBITPATT_SP32) : z;
+ z = w == -1.0f ? as_float(NINFBITPATT_SP32) : z;
+ //fix subnormals
+ z = ax < 0x33800000 ? x : z;
+
+ return z;
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, log1p, float);
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_OVERLOAD _CLC_DEF double log1p(double x)
+{
+ // Computes natural log(1+x). Algorithm based on:
+ // Ping-Tak Peter Tang
+ // "Table-driven implementation of the logarithm function in IEEE
+ // floating-point arithmetic"
+ // ACM Transactions on Mathematical Software (TOMS)
+ // Volume 16, Issue 4 (December 1990)
+ // Note that we use a lookup table of size 64 rather than 128,
+ // and compensate by having extra terms in the minimax polynomial
+ // for the kernel approximation.
+
+ // Process Inside the threshold now
+ ulong ux = as_ulong(1.0 + x);
+ int xexp = ((as_int2(ux).hi >> 20) & 0x7ff) - EXPBIAS_DP64;
+ double f = as_double(ONEEXPBITS_DP64 | (ux & MANTBITS_DP64));
+
+ int j = as_int2(ux).hi >> 13;
+ j = ((0x80 | (j & 0x7e)) >> 1) + (j & 0x1);
+ double f1 = (double)j * 0x1.0p-6;
+ j -= 64;
+
+ double f2temp = f - f1;
+ double m2 = as_double(convert_ulong(0x3ff - xexp) << EXPSHIFTBITS_DP64);
+ double f2l = fma(m2, x, m2 - f1);
+ double f2g = fma(m2, x, -f1) + m2;
+ double f2 = xexp <= MANTLENGTH_DP64-1 ? f2l : f2g;
+ f2 = (xexp <= -2) | (xexp >= MANTLENGTH_DP64+8) ? f2temp : f2;
+
+ double2 tv = USE_TABLE(ln_tbl, j);
+ double z1 = tv.s0;
+ double q = tv.s1;
+
+ double u = MATH_DIVIDE(f2, fma(0.5, f2, f1));
+ double v = u * u;
+
+ double poly = v * fma(v,
+ fma(v, 2.23219810758559851206e-03, 1.24999999978138668903e-02),
+ 8.33333333333333593622e-02);
+
+ // log2_lead and log2_tail sum to an extra-precise version of log(2)
+ const double log2_lead = 6.93147122859954833984e-01; /* 0x3fe62e42e0000000 */
+ const double log2_tail = 5.76999904754328540596e-08; /* 0x3e6efa39ef35793c */
+
+ double z2 = q + fma(u, poly, u);
+ double dxexp = (double)xexp;
+ double r1 = fma(dxexp, log2_lead, z1);
+ double r2 = fma(dxexp, log2_tail, z2);
+ double result1 = r1 + r2;
+
+ // Process Outside the threshold now
+ double r = x;
+ u = r / (2.0 + r);
+ double correction = r * u;
+ u = u + u;
+ v = u * u;
+ r1 = r;
+
+ poly = fma(v,
+ fma(v,
+ fma(v, 4.34887777707614552256e-04, 2.23213998791944806202e-03),
+ 1.25000000037717509602e-02),
+ 8.33333333333317923934e-02);
+
+ r2 = fma(u*v, poly, -correction);
+
+ // The values exp(-1/16)-1 and exp(1/16)-1
+ const double log1p_thresh1 = -0x1.f0540438fd5c3p-5;
+ const double log1p_thresh2 = 0x1.082b577d34ed8p-4;
+ double result2 = r1 + r2;
+ result2 = x < log1p_thresh1 | x > log1p_thresh2 ? result1 : result2;
+
+ result2 = isinf(x) ? x : result2;
+ result2 = x < -1.0 ? as_double(QNANBITPATT_DP64) : result2;
+ result2 = x == -1.0 ? as_double(NINFBITPATT_DP64) : result2;
+ return result2;
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, log1p, double);
+
+#endif // cl_khr_fp64
Modified: libclc/trunk/generic/lib/math/math.h
URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/generic/lib/math/math.h?rev=219230&r1=219229&r2=219230&view=diff
==============================================================================
--- libclc/trunk/generic/lib/math/math.h (original)
+++ libclc/trunk/generic/lib/math/math.h Tue Oct 7 15:22:42 2014
@@ -61,4 +61,30 @@
#define MANTLENGTH_SP32 24
#define BASEDIGITS_SP32 7
+#ifdef cl_khr_fp64
+
+#define SIGNBIT_DP64 0x8000000000000000L
+#define EXSIGNBIT_DP64 0x7fffffffffffffffL
+#define EXPBITS_DP64 0x7ff0000000000000L
+#define MANTBITS_DP64 0x000fffffffffffffL
+#define ONEEXPBITS_DP64 0x3ff0000000000000L
+#define TWOEXPBITS_DP64 0x4000000000000000L
+#define HALFEXPBITS_DP64 0x3fe0000000000000L
+#define IMPBIT_DP64 0x0010000000000000L
+#define QNANBITPATT_DP64 0x7ff8000000000000L
+#define INDEFBITPATT_DP64 0xfff8000000000000L
+#define PINFBITPATT_DP64 0x7ff0000000000000L
+#define NINFBITPATT_DP64 0xfff0000000000000L
+#define EXPBIAS_DP64 1023
+#define EXPSHIFTBITS_DP64 52
+#define BIASEDEMIN_DP64 1
+#define EMIN_DP64 -1022
+#define BIASEDEMAX_DP64 2046 /* 0x7fe */
+#define EMAX_DP64 1023 /* 0x3ff */
+#define LAMBDA_DP64 1.0e300
+#define MANTLENGTH_DP64 53
+#define BASEDIGITS_DP64 15
+
+#endif // cl_khr_fp64
+
#define ALIGNED(x) __attribute__((aligned(x)))
Added: libclc/trunk/generic/lib/math/tables.cl
URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/generic/lib/math/tables.cl?rev=219230&view=auto
==============================================================================
--- libclc/trunk/generic/lib/math/tables.cl (added)
+++ libclc/trunk/generic/lib/math/tables.cl Tue Oct 7 15:22:42 2014
@@ -0,0 +1,366 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "tables.h"
+
+DECLARE_TABLE(float2, LOGE_TBL, 129) = {
+ (float2)(0x0.000000p+0f, 0x0.000000p+0f),
+ (float2)(0x1.fe0000p-8f, 0x1.535882p-23f),
+ (float2)(0x1.fc0000p-7f, 0x1.5161f8p-20f),
+ (float2)(0x1.7b8000p-6f, 0x1.1b07d4p-18f),
+ (float2)(0x1.f82000p-6f, 0x1.361cf0p-19f),
+ (float2)(0x1.39e000p-5f, 0x1.0f73fcp-18f),
+ (float2)(0x1.774000p-5f, 0x1.63d8cap-19f),
+ (float2)(0x1.b42000p-5f, 0x1.bae232p-18f),
+ (float2)(0x1.f0a000p-5f, 0x1.86008ap-20f),
+ (float2)(0x1.164000p-4f, 0x1.36eea2p-16f),
+ (float2)(0x1.340000p-4f, 0x1.d7961ap-16f),
+ (float2)(0x1.51a000p-4f, 0x1.073f06p-16f),
+ (float2)(0x1.6f0000p-4f, 0x1.a515cap-17f),
+ (float2)(0x1.8c2000p-4f, 0x1.45d630p-16f),
+ (float2)(0x1.a92000p-4f, 0x1.b4e92ap-18f),
+ (float2)(0x1.c5e000p-4f, 0x1.523d6ep-18f),
+ (float2)(0x1.e26000p-4f, 0x1.076e2ap-16f),
+ (float2)(0x1.fec000p-4f, 0x1.2263b6p-17f),
+ (float2)(0x1.0d6000p-3f, 0x1.7e7cd0p-15f),
+ (float2)(0x1.1b6000p-3f, 0x1.2ad52ep-15f),
+ (float2)(0x1.294000p-3f, 0x1.52f81ep-15f),
+ (float2)(0x1.370000p-3f, 0x1.fc201ep-15f),
+ (float2)(0x1.44c000p-3f, 0x1.2b6ccap-15f),
+ (float2)(0x1.526000p-3f, 0x1.cbc742p-16f),
+ (float2)(0x1.5fe000p-3f, 0x1.3070a6p-15f),
+ (float2)(0x1.6d6000p-3f, 0x1.fce33ap-20f),
+ (float2)(0x1.7aa000p-3f, 0x1.890210p-15f),
+ (float2)(0x1.87e000p-3f, 0x1.a06520p-15f),
+ (float2)(0x1.952000p-3f, 0x1.6a73d0p-17f),
+ (float2)(0x1.a22000p-3f, 0x1.bc1fe2p-15f),
+ (float2)(0x1.af2000p-3f, 0x1.c94e80p-15f),
+ (float2)(0x1.bc2000p-3f, 0x1.0ce85ap-16f),
+ (float2)(0x1.c8e000p-3f, 0x1.f7c79ap-15f),
+ (float2)(0x1.d5c000p-3f, 0x1.0b5a7cp-18f),
+ (float2)(0x1.e26000p-3f, 0x1.076e2ap-15f),
+ (float2)(0x1.ef0000p-3f, 0x1.5b97b8p-16f),
+ (float2)(0x1.fb8000p-3f, 0x1.186d5ep-15f),
+ (float2)(0x1.040000p-2f, 0x1.2ca5a6p-17f),
+ (float2)(0x1.0a2000p-2f, 0x1.24e272p-14f),
+ (float2)(0x1.104000p-2f, 0x1.8bf9aep-14f),
+ (float2)(0x1.166000p-2f, 0x1.5cabaap-14f),
+ (float2)(0x1.1c8000p-2f, 0x1.3182d2p-15f),
+ (float2)(0x1.228000p-2f, 0x1.41fbcep-14f),
+ (float2)(0x1.288000p-2f, 0x1.5a13dep-14f),
+ (float2)(0x1.2e8000p-2f, 0x1.c575c2p-15f),
+ (float2)(0x1.346000p-2f, 0x1.dd9a98p-14f),
+ (float2)(0x1.3a6000p-2f, 0x1.3155a4p-16f),
+ (float2)(0x1.404000p-2f, 0x1.843434p-17f),
+ (float2)(0x1.460000p-2f, 0x1.8bc21cp-14f),
+ (float2)(0x1.4be000p-2f, 0x1.7e55dcp-16f),
+ (float2)(0x1.51a000p-2f, 0x1.5b0e5ap-15f),
+ (float2)(0x1.576000p-2f, 0x1.dc5d14p-16f),
+ (float2)(0x1.5d0000p-2f, 0x1.bdbf58p-14f),
+ (float2)(0x1.62c000p-2f, 0x1.05e572p-15f),
+ (float2)(0x1.686000p-2f, 0x1.903d36p-15f),
+ (float2)(0x1.6e0000p-2f, 0x1.1d5456p-15f),
+ (float2)(0x1.738000p-2f, 0x1.d7f6bap-14f),
+ (float2)(0x1.792000p-2f, 0x1.4abfbap-15f),
+ (float2)(0x1.7ea000p-2f, 0x1.f07704p-15f),
+ (float2)(0x1.842000p-2f, 0x1.a3b43cp-15f),
+ (float2)(0x1.89a000p-2f, 0x1.9c360ap-17f),
+ (float2)(0x1.8f0000p-2f, 0x1.1e8736p-14f),
+ (float2)(0x1.946000p-2f, 0x1.941c20p-14f),
+ (float2)(0x1.99c000p-2f, 0x1.958116p-14f),
+ (float2)(0x1.9f2000p-2f, 0x1.23ecbep-14f),
+ (float2)(0x1.a48000p-2f, 0x1.024396p-16f),
+ (float2)(0x1.a9c000p-2f, 0x1.d93534p-15f),
+ (float2)(0x1.af0000p-2f, 0x1.293246p-14f),
+ (float2)(0x1.b44000p-2f, 0x1.eef798p-15f),
+ (float2)(0x1.b98000p-2f, 0x1.625a4cp-16f),
+ (float2)(0x1.bea000p-2f, 0x1.4d9da6p-14f),
+ (float2)(0x1.c3c000p-2f, 0x1.d7a7ccp-14f),
+ (float2)(0x1.c8e000p-2f, 0x1.f7c79ap-14f),
+ (float2)(0x1.ce0000p-2f, 0x1.af0b84p-14f),
+ (float2)(0x1.d32000p-2f, 0x1.fcfc00p-15f),
+ (float2)(0x1.d82000p-2f, 0x1.e7258ap-14f),
+ (float2)(0x1.dd4000p-2f, 0x1.a81306p-16f),
+ (float2)(0x1.e24000p-2f, 0x1.1034f8p-15f),
+ (float2)(0x1.e74000p-2f, 0x1.09875ap-16f),
+ (float2)(0x1.ec2000p-2f, 0x1.99d246p-14f),
+ (float2)(0x1.f12000p-2f, 0x1.1ebf5ep-15f),
+ (float2)(0x1.f60000p-2f, 0x1.23fa70p-14f),
+ (float2)(0x1.fae000p-2f, 0x1.588f78p-14f),
+ (float2)(0x1.ffc000p-2f, 0x1.2e0856p-14f),
+ (float2)(0x1.024000p-1f, 0x1.52a5a4p-13f),
+ (float2)(0x1.04a000p-1f, 0x1.df9da8p-13f),
+ (float2)(0x1.072000p-1f, 0x1.f2e0e6p-16f),
+ (float2)(0x1.098000p-1f, 0x1.bd3d5cp-15f),
+ (float2)(0x1.0be000p-1f, 0x1.cb9094p-15f),
+ (float2)(0x1.0e4000p-1f, 0x1.261746p-15f),
+ (float2)(0x1.108000p-1f, 0x1.f39e2cp-13f),
+ (float2)(0x1.12e000p-1f, 0x1.719592p-13f),
+ (float2)(0x1.154000p-1f, 0x1.87a5e8p-14f),
+ (float2)(0x1.178000p-1f, 0x1.eabbd8p-13f),
+ (float2)(0x1.19e000p-1f, 0x1.cd68cep-14f),
+ (float2)(0x1.1c2000p-1f, 0x1.b81f70p-13f),
+ (float2)(0x1.1e8000p-1f, 0x1.7d79c0p-15f),
+ (float2)(0x1.20c000p-1f, 0x1.b9a324p-14f),
+ (float2)(0x1.230000p-1f, 0x1.30d7bep-13f),
+ (float2)(0x1.254000p-1f, 0x1.5bce98p-13f),
+ (float2)(0x1.278000p-1f, 0x1.5e1288p-13f),
+ (float2)(0x1.29c000p-1f, 0x1.37fec2p-13f),
+ (float2)(0x1.2c0000p-1f, 0x1.d3da88p-14f),
+ (float2)(0x1.2e4000p-1f, 0x1.d0db90p-15f),
+ (float2)(0x1.306000p-1f, 0x1.d7334ep-13f),
+ (float2)(0x1.32a000p-1f, 0x1.133912p-13f),
+ (float2)(0x1.34e000p-1f, 0x1.44ece6p-16f),
+ (float2)(0x1.370000p-1f, 0x1.17b546p-13f),
+ (float2)(0x1.392000p-1f, 0x1.e0d356p-13f),
+ (float2)(0x1.3b6000p-1f, 0x1.0893fep-14f),
+ (float2)(0x1.3d8000p-1f, 0x1.026a70p-13f),
+ (float2)(0x1.3fa000p-1f, 0x1.5b84d0p-13f),
+ (float2)(0x1.41c000p-1f, 0x1.8fe846p-13f),
+ (float2)(0x1.43e000p-1f, 0x1.9fe2f8p-13f),
+ (float2)(0x1.460000p-1f, 0x1.8bc21cp-13f),
+ (float2)(0x1.482000p-1f, 0x1.53d1eap-13f),
+ (float2)(0x1.4a4000p-1f, 0x1.f0bb60p-14f),
+ (float2)(0x1.4c6000p-1f, 0x1.e6bf32p-15f),
+ (float2)(0x1.4e6000p-1f, 0x1.d811b6p-13f),
+ (float2)(0x1.508000p-1f, 0x1.13cc00p-13f),
+ (float2)(0x1.52a000p-1f, 0x1.6932dep-16f),
+ (float2)(0x1.54a000p-1f, 0x1.246798p-13f),
+ (float2)(0x1.56a000p-1f, 0x1.f9d5b2p-13f),
+ (float2)(0x1.58c000p-1f, 0x1.5b6b9ap-14f),
+ (float2)(0x1.5ac000p-1f, 0x1.404c34p-13f),
+ (float2)(0x1.5cc000p-1f, 0x1.b1dc6cp-13f),
+ (float2)(0x1.5ee000p-1f, 0x1.54920ap-20f),
+ (float2)(0x1.60e000p-1f, 0x1.97a23cp-16f),
+ (float2)(0x1.62e000p-1f, 0x1.0bfbe8p-15f),
+};
+
+DECLARE_TABLE(float, LOG_INV_TBL, 129) = {
+ 0x1.000000p+1f,
+ 0x1.fc07f0p+0f,
+ 0x1.f81f82p+0f,
+ 0x1.f4465ap+0f,
+ 0x1.f07c20p+0f,
+ 0x1.ecc07cp+0f,
+ 0x1.e9131ap+0f,
+ 0x1.e573acp+0f,
+ 0x1.e1e1e2p+0f,
+ 0x1.de5d6ep+0f,
+ 0x1.dae608p+0f,
+ 0x1.d77b66p+0f,
+ 0x1.d41d42p+0f,
+ 0x1.d0cb58p+0f,
+ 0x1.cd8568p+0f,
+ 0x1.ca4b30p+0f,
+ 0x1.c71c72p+0f,
+ 0x1.c3f8f0p+0f,
+ 0x1.c0e070p+0f,
+ 0x1.bdd2b8p+0f,
+ 0x1.bacf92p+0f,
+ 0x1.b7d6c4p+0f,
+ 0x1.b4e81cp+0f,
+ 0x1.b20364p+0f,
+ 0x1.af286cp+0f,
+ 0x1.ac5702p+0f,
+ 0x1.a98ef6p+0f,
+ 0x1.a6d01ap+0f,
+ 0x1.a41a42p+0f,
+ 0x1.a16d40p+0f,
+ 0x1.9ec8eap+0f,
+ 0x1.9c2d14p+0f,
+ 0x1.99999ap+0f,
+ 0x1.970e50p+0f,
+ 0x1.948b10p+0f,
+ 0x1.920fb4p+0f,
+ 0x1.8f9c18p+0f,
+ 0x1.8d3018p+0f,
+ 0x1.8acb90p+0f,
+ 0x1.886e60p+0f,
+ 0x1.861862p+0f,
+ 0x1.83c978p+0f,
+ 0x1.818182p+0f,
+ 0x1.7f4060p+0f,
+ 0x1.7d05f4p+0f,
+ 0x1.7ad220p+0f,
+ 0x1.78a4c8p+0f,
+ 0x1.767dcep+0f,
+ 0x1.745d18p+0f,
+ 0x1.724288p+0f,
+ 0x1.702e06p+0f,
+ 0x1.6e1f76p+0f,
+ 0x1.6c16c2p+0f,
+ 0x1.6a13cep+0f,
+ 0x1.681682p+0f,
+ 0x1.661ec6p+0f,
+ 0x1.642c86p+0f,
+ 0x1.623fa8p+0f,
+ 0x1.605816p+0f,
+ 0x1.5e75bcp+0f,
+ 0x1.5c9882p+0f,
+ 0x1.5ac056p+0f,
+ 0x1.58ed24p+0f,
+ 0x1.571ed4p+0f,
+ 0x1.555556p+0f,
+ 0x1.539094p+0f,
+ 0x1.51d07ep+0f,
+ 0x1.501502p+0f,
+ 0x1.4e5e0ap+0f,
+ 0x1.4cab88p+0f,
+ 0x1.4afd6ap+0f,
+ 0x1.49539ep+0f,
+ 0x1.47ae14p+0f,
+ 0x1.460cbcp+0f,
+ 0x1.446f86p+0f,
+ 0x1.42d662p+0f,
+ 0x1.414142p+0f,
+ 0x1.3fb014p+0f,
+ 0x1.3e22ccp+0f,
+ 0x1.3c995ap+0f,
+ 0x1.3b13b2p+0f,
+ 0x1.3991c2p+0f,
+ 0x1.381382p+0f,
+ 0x1.3698e0p+0f,
+ 0x1.3521d0p+0f,
+ 0x1.33ae46p+0f,
+ 0x1.323e34p+0f,
+ 0x1.30d190p+0f,
+ 0x1.2f684cp+0f,
+ 0x1.2e025cp+0f,
+ 0x1.2c9fb4p+0f,
+ 0x1.2b404ap+0f,
+ 0x1.29e412p+0f,
+ 0x1.288b02p+0f,
+ 0x1.27350cp+0f,
+ 0x1.25e228p+0f,
+ 0x1.24924ap+0f,
+ 0x1.234568p+0f,
+ 0x1.21fb78p+0f,
+ 0x1.20b470p+0f,
+ 0x1.1f7048p+0f,
+ 0x1.1e2ef4p+0f,
+ 0x1.1cf06ap+0f,
+ 0x1.1bb4a4p+0f,
+ 0x1.1a7b96p+0f,
+ 0x1.194538p+0f,
+ 0x1.181182p+0f,
+ 0x1.16e068p+0f,
+ 0x1.15b1e6p+0f,
+ 0x1.1485f0p+0f,
+ 0x1.135c82p+0f,
+ 0x1.12358ep+0f,
+ 0x1.111112p+0f,
+ 0x1.0fef02p+0f,
+ 0x1.0ecf56p+0f,
+ 0x1.0db20ap+0f,
+ 0x1.0c9714p+0f,
+ 0x1.0b7e6ep+0f,
+ 0x1.0a6810p+0f,
+ 0x1.0953f4p+0f,
+ 0x1.084210p+0f,
+ 0x1.073260p+0f,
+ 0x1.0624dep+0f,
+ 0x1.051980p+0f,
+ 0x1.041042p+0f,
+ 0x1.03091cp+0f,
+ 0x1.020408p+0f,
+ 0x1.010102p+0f,
+ 0x1.000000p+0f,
+};
+
+TABLE_FUNCTION(float2, LOGE_TBL, loge_tbl);
+TABLE_FUNCTION(float, LOG_INV_TBL, log_inv_tbl);
+
+#ifdef cl_khr_fp64
+
+DECLARE_TABLE(double2, LN_TBL, 65) = {
+ (double2)(0x0.0000000000000p+0, 0x0.0000000000000p+0),
+ (double2)(0x1.fc0a800000000p-7, 0x1.61f807c79f3dbp-28),
+ (double2)(0x1.f829800000000p-6, 0x1.873c1980267c8p-25),
+ (double2)(0x1.7745800000000p-5, 0x1.ec65b9f88c69ep-26),
+ (double2)(0x1.f0a3000000000p-5, 0x1.8022c54cc2f99p-26),
+ (double2)(0x1.341d700000000p-4, 0x1.2c37a3a125330p-25),
+ (double2)(0x1.6f0d200000000p-4, 0x1.15cad69737c93p-25),
+ (double2)(0x1.a926d00000000p-4, 0x1.d256ab1b285e9p-27),
+ (double2)(0x1.e270700000000p-4, 0x1.b8abcb97a7aa2p-26),
+ (double2)(0x1.0d77e00000000p-3, 0x1.f34239659a5dcp-25),
+ (double2)(0x1.2955280000000p-3, 0x1.e07fd48d30177p-25),
+ (double2)(0x1.44d2b00000000p-3, 0x1.b32df4799f4f6p-25),
+ (double2)(0x1.5ff3000000000p-3, 0x1.c29e4f4f21cf8p-25),
+ (double2)(0x1.7ab8900000000p-3, 0x1.086c848df1b59p-30),
+ (double2)(0x1.9525a80000000p-3, 0x1.cf456b4764130p-27),
+ (double2)(0x1.af3c900000000p-3, 0x1.3a02ffcb63398p-25),
+ (double2)(0x1.c8ff780000000p-3, 0x1.1e6a6886b0976p-25),
+ (double2)(0x1.e270700000000p-3, 0x1.b8abcb97a7aa2p-25),
+ (double2)(0x1.fb91800000000p-3, 0x1.b578f8aa35552p-25),
+ (double2)(0x1.0a324c0000000p-2, 0x1.139c871afb9fcp-25),
+ (double2)(0x1.1675c80000000p-2, 0x1.5d5d30701ce64p-25),
+ (double2)(0x1.22941c0000000p-2, 0x1.de7bcb2d12142p-25),
+ (double2)(0x1.2e8e280000000p-2, 0x1.d708e984e1664p-25),
+ (double2)(0x1.3a64c40000000p-2, 0x1.56945e9c72f36p-26),
+ (double2)(0x1.4618bc0000000p-2, 0x1.0e2f613e85bdap-29),
+ (double2)(0x1.51aad80000000p-2, 0x1.cb7e0b42724f6p-28),
+ (double2)(0x1.5d1bd80000000p-2, 0x1.fac04e52846c7p-25),
+ (double2)(0x1.686c800000000p-2, 0x1.e9b14aec442bep-26),
+ (double2)(0x1.739d7c0000000p-2, 0x1.b5de8034e7126p-25),
+ (double2)(0x1.7eaf800000000p-2, 0x1.dc157e1b259d3p-25),
+ (double2)(0x1.89a3380000000p-2, 0x1.b05096ad69c62p-28),
+ (double2)(0x1.9479400000000p-2, 0x1.c2116faba4cddp-26),
+ (double2)(0x1.9f323c0000000p-2, 0x1.65fcc25f95b47p-25),
+ (double2)(0x1.a9cec80000000p-2, 0x1.a9a08498d4850p-26),
+ (double2)(0x1.b44f740000000p-2, 0x1.de647b1465f77p-25),
+ (double2)(0x1.beb4d80000000p-2, 0x1.da71b7bf7861dp-26),
+ (double2)(0x1.c8ff7c0000000p-2, 0x1.e6a6886b09760p-28),
+ (double2)(0x1.d32fe40000000p-2, 0x1.f0075eab0ef64p-25),
+ (double2)(0x1.dd46a00000000p-2, 0x1.3071282fb989bp-28),
+ (double2)(0x1.e744240000000p-2, 0x1.0eb43c3f1bed2p-25),
+ (double2)(0x1.f128f40000000p-2, 0x1.faf06ecb35c84p-26),
+ (double2)(0x1.faf5880000000p-2, 0x1.ef1e63db35f68p-27),
+ (double2)(0x1.02552a0000000p-1, 0x1.69743fb1a71a5p-27),
+ (double2)(0x1.0723e40000000p-1, 0x1.c1cdf404e5796p-25),
+ (double2)(0x1.0be72e0000000p-1, 0x1.094aa0ada625ep-27),
+ (double2)(0x1.109f380000000p-1, 0x1.e2d4c96fde3ecp-25),
+ (double2)(0x1.154c3c0000000p-1, 0x1.2f4d5e9a98f34p-25),
+ (double2)(0x1.19ee6a0000000p-1, 0x1.467c96ecc5cbep-25),
+ (double2)(0x1.1e85f40000000p-1, 0x1.e7040d03dec5ap-25),
+ (double2)(0x1.23130c0000000p-1, 0x1.7bebf4282de36p-25),
+ (double2)(0x1.2795e00000000p-1, 0x1.289b11aeb783fp-25),
+ (double2)(0x1.2c0e9e0000000p-1, 0x1.a891d1772f538p-26),
+ (double2)(0x1.307d720000000p-1, 0x1.34f10be1fb591p-25),
+ (double2)(0x1.34e2880000000p-1, 0x1.d9ce1d316eb93p-25),
+ (double2)(0x1.393e0c0000000p-1, 0x1.3562a19a9c442p-25),
+ (double2)(0x1.3d90260000000p-1, 0x1.4e2adf548084cp-26),
+ (double2)(0x1.41d8fe0000000p-1, 0x1.08ce55cc8c97ap-26),
+ (double2)(0x1.4618bc0000000p-1, 0x1.0e2f613e85bdap-28),
+ (double2)(0x1.4a4f840000000p-1, 0x1.db03ebb0227bfp-25),
+ (double2)(0x1.4e7d800000000p-1, 0x1.1b75bb09cb098p-25),
+ (double2)(0x1.52a2d20000000p-1, 0x1.96f16abb9df22p-27),
+ (double2)(0x1.56bf9c0000000p-1, 0x1.5b3f399411c62p-25),
+ (double2)(0x1.5ad4040000000p-1, 0x1.86b3e59f65355p-26),
+ (double2)(0x1.5ee02a0000000p-1, 0x1.2482ceae1ac12p-26),
+ (double2)(0x1.62e42e0000000p-1, 0x1.efa39ef35793cp-25),
+};
+
+TABLE_FUNCTION(double2, LN_TBL, ln_tbl);
+
+#endif // cl_khr_fp64
Copied: libclc/trunk/generic/lib/math/tables.h (from r219087, libclc/trunk/generic/lib/math/math.h)
URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/generic/lib/math/tables.h?p2=libclc/trunk/generic/lib/math/tables.h&p1=libclc/trunk/generic/lib/math/math.h&r1=219087&r2=219230&rev=219230&view=diff
==============================================================================
--- libclc/trunk/generic/lib/math/math.h (original)
+++ libclc/trunk/generic/lib/math/tables.h Tue Oct 7 15:22:42 2014
@@ -20,45 +20,31 @@
* THE SOFTWARE.
*/
-#define SNAN 0x001
-#define QNAN 0x002
-#define NINF 0x004
-#define NNOR 0x008
-#define NSUB 0x010
-#define NZER 0x020
-#define PZER 0x040
-#define PSUB 0x080
-#define PNOR 0x100
-#define PINF 0x200
-
-#define HAVE_HW_FMA32() (1)
-#define HAVE_BITALIGN() (0)
-#define HAVE_FAST_FMA32() (0)
-
-#define MATH_DIVIDE(X, Y) ((X) / (Y))
-#define MATH_RECIP(X) (1.0f / (X))
-#define MATH_SQRT(X) sqrt(X)
-
-#define SIGNBIT_SP32 0x80000000
-#define EXSIGNBIT_SP32 0x7fffffff
-#define EXPBITS_SP32 0x7f800000
-#define MANTBITS_SP32 0x007fffff
-#define ONEEXPBITS_SP32 0x3f800000
-#define TWOEXPBITS_SP32 0x40000000
-#define HALFEXPBITS_SP32 0x3f000000
-#define IMPBIT_SP32 0x00800000
-#define QNANBITPATT_SP32 0x7fc00000
-#define INDEFBITPATT_SP32 0xffc00000
-#define PINFBITPATT_SP32 0x7f800000
-#define NINFBITPATT_SP32 0xff800000
-#define EXPBIAS_SP32 127
-#define EXPSHIFTBITS_SP32 23
-#define BIASEDEMIN_SP32 1
-#define EMIN_SP32 -126
-#define BIASEDEMAX_SP32 254
-#define EMAX_SP32 127
-#define LAMBDA_SP32 1.0e30
-#define MANTLENGTH_SP32 24
-#define BASEDIGITS_SP32 7
+#define TABLE_SPACE __constant
-#define ALIGNED(x) __attribute__((aligned(x)))
+#define TABLE_MANGLE(NAME) __clc_##NAME
+
+#define DECLARE_TABLE(TYPE,NAME,LENGTH) \
+ TABLE_SPACE TYPE NAME [ LENGTH ]
+
+#define TABLE_FUNCTION(TYPE,TABLE,NAME) \
+ TYPE TABLE_MANGLE(NAME)(size_t idx) { \
+ return TABLE[idx]; \
+ }
+
+#define TABLE_FUNCTION_DECL(TYPE, NAME) \
+ TYPE TABLE_MANGLE(NAME)(size_t idx);
+
+#define USE_TABLE(NAME, IDX) \
+ TABLE_MANGLE(NAME)(IDX)
+
+TABLE_FUNCTION_DECL(float2, loge_tbl);
+TABLE_FUNCTION_DECL(float, log_inv_tbl);
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+TABLE_FUNCTION_DECL(double2, ln_tbl);
+
+#endif // cl_khr_fp64
More information about the cfe-commits
mailing list