[libclc] r243131 - Implement accurate log2 function

Tom Stellard thomas.stellard at amd.com
Fri Jul 24 11:07:12 PDT 2015


Author: tstellar
Date: Fri Jul 24 13:07:12 2015
New Revision: 243131

URL: http://llvm.org/viewvc/llvm-project?rev=243131&view=rev
Log:
Implement accurate log2 function

Use the implementation was ported from the AMD builtin library rather
than LLVM Intrinsics.

This has been tested with piglit, OpenCV, and the ocl conformance tests.

Added:
    libclc/trunk/generic/include/clc/math/log2.inc
      - copied, changed from r243130, libclc/trunk/generic/lib/math/tables.h
    libclc/trunk/generic/lib/math/log2.cl
      - copied, changed from r243130, libclc/trunk/generic/lib/math/tables.h
    libclc/trunk/generic/lib/math/log_base.h
Modified:
    libclc/trunk/generic/include/clc/math/log.h
    libclc/trunk/generic/include/clc/math/log2.h
    libclc/trunk/generic/lib/SOURCES
    libclc/trunk/generic/lib/math/tables.cl
    libclc/trunk/generic/lib/math/tables.h

Modified: libclc/trunk/generic/include/clc/math/log.h
URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/generic/include/clc/math/log.h?rev=243131&r1=243130&r2=243131&view=diff
==============================================================================
--- libclc/trunk/generic/include/clc/math/log.h (original)
+++ libclc/trunk/generic/include/clc/math/log.h Fri Jul 24 13:07:12 2015
@@ -1,4 +1,4 @@
 #undef log
 
 // log(x) = log2(x) * (1/log2(e))
-#define log(val) (__clc_log2(val) * 0.693147181f)
+#define log(val) (log2(val) * 0.693147181f)

Modified: libclc/trunk/generic/include/clc/math/log2.h
URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/generic/include/clc/math/log2.h?rev=243131&r1=243130&r2=243131&view=diff
==============================================================================
--- libclc/trunk/generic/include/clc/math/log2.h (original)
+++ libclc/trunk/generic/include/clc/math/log2.h Fri Jul 24 13:07:12 2015
@@ -1,6 +1,24 @@
-#undef log2
-#define log2 __clc_log2
+/*
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
 
-#define __CLC_FUNCTION __clc_log2
-#define __CLC_INTRINSIC "llvm.log2"
-#include <clc/math/unary_intrin.inc>
+#define __CLC_BODY <clc/math/log2.inc>
+#include <clc/math/gentype.inc>

Copied: libclc/trunk/generic/include/clc/math/log2.inc (from r243130, libclc/trunk/generic/lib/math/tables.h)
URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/generic/include/clc/math/log2.inc?p2=libclc/trunk/generic/include/clc/math/log2.inc&p1=libclc/trunk/generic/lib/math/tables.h&r1=243130&r2=243131&rev=243131&view=diff
==============================================================================
--- libclc/trunk/generic/lib/math/tables.h (original)
+++ libclc/trunk/generic/include/clc/math/log2.inc Fri Jul 24 13:07:12 2015
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -20,33 +20,4 @@
  * THE SOFTWARE.
  */
 
-#define TABLE_SPACE __constant
-
-#define TABLE_MANGLE(NAME) __clc_##NAME
-
-#define DECLARE_TABLE(TYPE,NAME,LENGTH) \
-    TABLE_SPACE TYPE NAME [ LENGTH ]
-
-#define TABLE_FUNCTION(TYPE,TABLE,NAME) \
-    TYPE TABLE_MANGLE(NAME)(size_t idx) { \
-        return TABLE[idx]; \
-    }
-
-#define TABLE_FUNCTION_DECL(TYPE, NAME) \
-    TYPE TABLE_MANGLE(NAME)(size_t idx);
-
-#define USE_TABLE(NAME, IDX) \
-    TABLE_MANGLE(NAME)(IDX)
-
-TABLE_FUNCTION_DECL(float2, loge_tbl);
-TABLE_FUNCTION_DECL(float, log_inv_tbl);
-TABLE_FUNCTION_DECL(uint4,  pibits_tbl);
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-TABLE_FUNCTION_DECL(double2, ln_tbl);
-TABLE_FUNCTION_DECL(double2, atan_jby256_tbl);
-TABLE_FUNCTION_DECL(double2, two_to_jby64_ep_tbl);
-#endif // cl_khr_fp64
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE log2(__CLC_GENTYPE a);

Modified: libclc/trunk/generic/lib/SOURCES
URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/generic/lib/SOURCES?rev=243131&r1=243130&r2=243131&view=diff
==============================================================================
--- libclc/trunk/generic/lib/SOURCES (original)
+++ libclc/trunk/generic/lib/SOURCES Fri Jul 24 13:07:12 2015
@@ -93,6 +93,7 @@ math/clc_ldexp.cl
 math/ldexp.cl
 math/log10.cl
 math/log1p.cl
+math/log2.cl
 math/mad.cl
 math/native_log.cl
 math/native_log2.cl

Copied: libclc/trunk/generic/lib/math/log2.cl (from r243130, libclc/trunk/generic/lib/math/tables.h)
URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/generic/lib/math/log2.cl?p2=libclc/trunk/generic/lib/math/log2.cl&p1=libclc/trunk/generic/lib/math/tables.h&r1=243130&r2=243131&rev=243131&view=diff
==============================================================================
--- libclc/trunk/generic/lib/math/tables.h (original)
+++ libclc/trunk/generic/lib/math/log2.cl Fri Jul 24 13:07:12 2015
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ * Copyright (c) 2015 Advanced Micro Devices, Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -20,33 +20,18 @@
  * THE SOFTWARE.
  */
 
-#define TABLE_SPACE __constant
-
-#define TABLE_MANGLE(NAME) __clc_##NAME
-
-#define DECLARE_TABLE(TYPE,NAME,LENGTH) \
-    TABLE_SPACE TYPE NAME [ LENGTH ]
-
-#define TABLE_FUNCTION(TYPE,TABLE,NAME) \
-    TYPE TABLE_MANGLE(NAME)(size_t idx) { \
-        return TABLE[idx]; \
-    }
-
-#define TABLE_FUNCTION_DECL(TYPE, NAME) \
-    TYPE TABLE_MANGLE(NAME)(size_t idx);
-
-#define USE_TABLE(NAME, IDX) \
-    TABLE_MANGLE(NAME)(IDX)
-
-TABLE_FUNCTION_DECL(float2, loge_tbl);
-TABLE_FUNCTION_DECL(float, log_inv_tbl);
-TABLE_FUNCTION_DECL(uint4,  pibits_tbl);
+#include <clc/clc.h>
+#include "../clcmacro.h"
+#include "tables.h"
 
 #ifdef cl_khr_fp64
-
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-TABLE_FUNCTION_DECL(double2, ln_tbl);
-TABLE_FUNCTION_DECL(double2, atan_jby256_tbl);
-TABLE_FUNCTION_DECL(double2, two_to_jby64_ep_tbl);
 #endif // cl_khr_fp64
+
+#define COMPILING_LOG2
+#include "log_base.h"
+#undef COMPILING_LOG2
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, log2, float);
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, log2, double);

Added: libclc/trunk/generic/lib/math/log_base.h
URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/generic/lib/math/log_base.h?rev=243131&view=auto
==============================================================================
--- libclc/trunk/generic/lib/math/log_base.h (added)
+++ libclc/trunk/generic/lib/math/log_base.h Fri Jul 24 13:07:12 2015
@@ -0,0 +1,299 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "math.h"
+
+/*
+   Algorithm:
+
+   Based on:
+   Ping-Tak Peter Tang
+   "Table-driven implementation of the logarithm function in IEEE
+   floating-point arithmetic"
+   ACM Transactions on Mathematical Software (TOMS)
+   Volume 16, Issue 4 (December 1990)
+
+
+   x very close to 1.0 is handled differently, for x everywhere else
+   a brief explanation is given below
+
+   x = (2^m)*A
+   x = (2^m)*(G+g) with (1 <= G < 2) and (g <= 2^(-8))
+   x = (2^m)*2*(G/2+g/2)
+   x = (2^m)*2*(F+f) with (0.5 <= F < 1) and (f <= 2^(-9))
+
+   Y = (2^(-1))*(2^(-m))*(2^m)*A
+   Now, range of Y is: 0.5 <= Y < 1
+
+   F = 0x80 + (first 7 mantissa bits) + (8th mantissa bit)
+   Now, range of F is: 128 <= F <= 256 
+   F = F / 256 
+   Now, range of F is: 0.5 <= F <= 1
+
+   f = -(Y-F), with (f <= 2^(-9))
+
+   log(x) = m*log(2) + log(2) + log(F-f)
+   log(x) = m*log(2) + log(2) + log(F) + log(1-(f/F))
+   log(x) = m*log(2) + log(2*F) + log(1-r)
+
+   r = (f/F), with (r <= 2^(-8))
+   r = f*(1/F) with (1/F) precomputed to avoid division
+
+   log(x) = m*log(2) + log(G) - poly
+
+   log(G) is precomputed
+   poly = (r + (r^2)/2 + (r^3)/3 + (r^4)/4) + (r^5)/5))
+
+   log(2) and log(G) need to be maintained in extra precision
+   to avoid losing precision in the calculations
+
+
+   For x close to 1.0, we employ the following technique to
+   ensure faster convergence.
+
+   log(x) = log((1+s)/(1-s)) = 2*s + (2/3)*s^3 + (2/5)*s^5 + (2/7)*s^7
+   x = ((1+s)/(1-s)) 
+   x = 1 + r
+   s = r/(2+r)
+
+*/
+
+_CLC_OVERLOAD _CLC_DEF float
+#if defined(COMPILING_LOG2)
+log2(float x)
+#elif defined(COMPILING_LOG10)
+log10(float x)
+#else
+log(float x)
+#endif
+{
+
+#if defined(COMPILING_LOG2)
+    const float LOG2E = 0x1.715476p+0f;      // 1.4426950408889634
+    const float LOG2E_HEAD = 0x1.700000p+0f; // 1.4375
+    const float LOG2E_TAIL = 0x1.547652p-8f; // 0.00519504072
+#elif defined(COMPILING_LOG10)
+    USE_TABLE(float2, p_log, LOG10_TBL);
+    const float LOG10E = 0x1.bcb7b2p-2f;        // 0.43429448190325182
+    const float LOG10E_HEAD = 0x1.bc0000p-2f;   // 0.43359375
+    const float LOG10E_TAIL = 0x1.6f62a4p-11f;  // 0.0007007319
+    const float LOG10_2_HEAD = 0x1.340000p-2f;  // 0.30078125
+    const float LOG10_2_TAIL = 0x1.04d426p-12f; // 0.000248745637
+#else
+    USE_TABLE(float2, p_log, LOGE_TBL);
+    const float LOG2_HEAD = 0x1.62e000p-1f;  // 0.693115234
+    const float LOG2_TAIL = 0x1.0bfbe8p-15f; // 0.0000319461833
+#endif
+
+    uint xi = as_uint(x);
+    uint ax = xi & EXSIGNBIT_SP32;
+
+    // Calculations for |x-1| < 2^-4
+    float r = x - 1.0f;
+    int near1 = fabs(r) < 0x1.0p-4f;
+    float u2 = MATH_DIVIDE(r, 2.0f + r);
+    float corr = u2 * r;
+    float u = u2 + u2;
+    float v = u * u;
+    float znear1, z1, z2;
+
+    // 2/(5 * 2^5), 2/(3 * 2^3)
+    z2 = mad(u, mad(v, 0x1.99999ap-7f, 0x1.555556p-4f)*v, -corr);
+
+#if defined(COMPILING_LOG2)
+    z1 = as_float(as_int(r) & 0xffff0000);
+    z2 = z2 + (r - z1);
+    znear1 = mad(z1, LOG2E_HEAD, mad(z2, LOG2E_HEAD, mad(z1, LOG2E_TAIL, z2*LOG2E_TAIL)));
+#elif defined(COMPILING_LOG10)
+    z1 = as_float(as_int(r) & 0xffff0000);
+    z2 = z2 + (r - z1);
+    znear1 = mad(z1, LOG10E_HEAD, mad(z2, LOG10E_HEAD, mad(z1, LOG10E_TAIL, z2*LOG10E_TAIL)));
+#else
+    znear1 = z2 + r;
+#endif
+
+    // Calculations for x not near 1
+    int m = (int)(xi >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
+
+    // Normalize subnormal
+    uint xis = as_uint(as_float(xi | 0x3f800000) - 1.0f);
+    int ms = (int)(xis >> EXPSHIFTBITS_SP32) - 253;
+    int c = m == -127;
+    m = c ? ms : m;
+    uint xin = c ? xis : xi;
+
+    float mf = (float)m;
+    uint indx = (xin & 0x007f0000) + ((xin & 0x00008000) << 1);
+
+    // F - Y
+    float f = as_float(0x3f000000 | indx) - as_float(0x3f000000 | (xin & MANTBITS_SP32));
+
+    indx = indx >> 16;
+    r = f * USE_TABLE(log_inv_tbl, indx);
+
+    // 1/3,  1/2
+    float poly = mad(mad(r, 0x1.555556p-2f, 0.5f), r*r, r);
+
+#if defined(COMPILING_LOG2)
+    float2 tv = USE_TABLE(log2_tbl, indx);
+    z1 = tv.s0 + mf;
+    z2 = mad(poly, -LOG2E, tv.s1);
+#elif defined(COMPILING_LOG10)
+    float2 tv = p_log[indx];
+    z1 = mad(mf, LOG10_2_HEAD, tv.s0);
+    z2 = mad(poly, -LOG10E, mf*LOG10_2_TAIL) + tv.s1;
+#else
+    float2 tv = p_log[indx];
+    z1 = mad(mf, LOG2_HEAD, tv.s0);
+    z2 = mad(mf, LOG2_TAIL, -poly) + tv.s1;
+#endif
+
+    float z = z1 + z2;
+    z = near1 ? znear1 : z;
+
+    // Corner cases
+    z = ax >= PINFBITPATT_SP32 ? x : z;
+    z = xi != ax ? as_float(QNANBITPATT_SP32) : z;
+    z = ax == 0 ? as_float(NINFBITPATT_SP32) : z;
+
+    return z;
+}
+
+#ifdef cl_khr_fp64
+
+_CLC_OVERLOAD _CLC_DEF double
+#if defined(COMPILING_LOG2)
+log2(double x)
+#elif defined(COMPILING_LOG10)
+log10(double x)
+#else
+log(double x)
+#endif
+{
+
+#ifndef COMPILING_LOG2
+    // log2_lead and log2_tail sum to an extra-precise version of ln(2)
+    const double log2_lead = 6.93147122859954833984e-01; /* 0x3fe62e42e0000000 */
+    const double log2_tail = 5.76999904754328540596e-08; /* 0x3e6efa39ef35793c */
+#endif
+
+#if defined(COMPILING_LOG10)
+    // log10e_lead and log10e_tail sum to an extra-precision version of log10(e) (19 bits in lead)
+    const double log10e_lead = 4.34293746948242187500e-01;  /* 0x3fdbcb7800000000 */
+    const double log10e_tail = 7.3495500964015109100644e-7; /* 0x3ea8a93728719535 */
+#elif defined(COMPILING_LOG2)
+    // log2e_lead and log2e_tail sum to an extra-precision version of log2(e) (19 bits in lead)
+    const double log2e_lead = 1.44269180297851562500E+00; /* 0x3FF7154400000000 */
+    const double log2e_tail = 3.23791044778235969970E-06; /* 0x3ECB295C17F0BBBE */
+#endif
+
+    // log_thresh1 = 9.39412117004394531250e-1 = 0x3fee0faa00000000
+    // log_thresh2 = 1.06449508666992187500 = 0x3ff1082c00000000 
+    const double log_thresh1 = 0x1.e0faap-1;
+    const double log_thresh2 = 0x1.1082cp+0;
+
+    int is_near = x >= log_thresh1 & x <= log_thresh2;
+
+    // Near 1 code
+    double r = x - 1.0;
+    double u = r / (2.0 + r);
+    double correction = r * u;
+    u = u + u;
+    double v = u * u;
+    double r1 = r;
+
+    const double ca_1 = 8.33333333333317923934e-02; /* 0x3fb55555555554e6 */
+    const double ca_2 = 1.25000000037717509602e-02; /* 0x3f89999999bac6d4 */
+    const double ca_3 = 2.23213998791944806202e-03; /* 0x3f62492307f1519f */
+    const double ca_4 = 4.34887777707614552256e-04; /* 0x3f3c8034c85dfff0 */
+
+    double r2 = fma(u*v, fma(v, fma(v, fma(v, ca_4, ca_3), ca_2), ca_1), -correction);
+
+#if defined(COMPILING_LOG10)
+    r = r1;
+    r1 = as_double(as_ulong(r1) & 0xffffffff00000000);
+    r2 = r2 + (r - r1);
+    double ret_near = fma(log10e_lead, r1, fma(log10e_lead, r2, fma(log10e_tail, r1, log10e_tail * r2)));
+#elif defined(COMPILING_LOG2)
+    r = r1;
+    r1 = as_double(as_ulong(r1) & 0xffffffff00000000);
+    r2 = r2 + (r - r1);
+    double ret_near = fma(log2e_lead, r1, fma(log2e_lead, r2, fma(log2e_tail, r1, log2e_tail*r2)));
+#else
+    double ret_near = r1 + r2;
+#endif
+
+    // This is the far from 1 code
+
+    // Deal with subnormal
+    ulong ux = as_ulong(x);
+    ulong uxs = as_ulong(as_double(0x03d0000000000000UL | ux) - 0x1.0p-962);
+    int c = ux < IMPBIT_DP64;
+    ux = c ? uxs : ux;
+    int expadjust = c ? 60 : 0;
+
+    int xexp = ((as_int2(ux).hi >> 20) & 0x7ff) - EXPBIAS_DP64 - expadjust;
+    double f = as_double(HALFEXPBITS_DP64 | (ux & MANTBITS_DP64));
+    int index = as_int2(ux).hi >> 13;
+    index = ((0x80 | (index & 0x7e)) >> 1) + (index & 0x1);
+
+    double2 tv = USE_TABLE(ln_tbl, index - 64);
+    double z1 = tv.s0;
+    double q = tv.s1;
+
+    double f1 = index * 0x1.0p-7;
+    double f2 = f - f1;
+    u = f2 / fma(f2, 0.5, f1);
+    v = u * u;
+
+    const double cb_1 = 8.33333333333333593622e-02; /* 0x3fb5555555555557 */
+    const double cb_2 = 1.24999999978138668903e-02; /* 0x3f89999999865ede */
+    const double cb_3 = 2.23219810758559851206e-03; /* 0x3f6249423bd94741 */
+
+    double poly = v * fma(v, fma(v, cb_3, cb_2), cb_1);
+    double z2 = q + fma(u, poly, u);
+
+    double dxexp = (double)xexp;
+#if defined (COMPILING_LOG10)
+    // Add xexp * log(2) to z1,z2 to get log(x)
+    r1 = fma(dxexp, log2_lead, z1);
+    r2 = fma(dxexp, log2_tail, z2);
+    double ret_far = fma(log10e_lead, r1, fma(log10e_lead, r2, fma(log10e_tail, r1, log10e_tail*r2)));
+#elif defined(COMPILING_LOG2)
+    r1 = fma(log2e_lead, z1, dxexp);
+    r2 = fma(log2e_lead, z2, fma(log2e_tail, z1, log2e_tail*z2));
+    double ret_far = r1 + r2;
+#else
+    r1 = fma(dxexp, log2_lead, z1);
+    r2 = fma(dxexp, log2_tail, z2);
+    double ret_far = r1 + r2;
+#endif
+
+    double ret = is_near ? ret_near : ret_far;
+
+    ret = isinf(x) ? as_double(PINFBITPATT_DP64) : ret;
+    ret = isnan(x) | (x < 0.0) ? as_double(QNANBITPATT_DP64) : ret;
+    ret = x == 0.0 ? as_double(NINFBITPATT_DP64) : ret;
+    return ret;
+}
+
+#endif // cl_khr_fp64

Modified: libclc/trunk/generic/lib/math/tables.cl
URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/generic/lib/math/tables.cl?rev=243131&r1=243130&r2=243131&view=diff
==============================================================================
--- libclc/trunk/generic/lib/math/tables.cl (original)
+++ libclc/trunk/generic/lib/math/tables.cl Fri Jul 24 13:07:12 2015
@@ -288,6 +288,137 @@ DECLARE_TABLE(float, LOG_INV_TBL, 129) =
     0x1.000000p+0f,
 };
 
+DECLARE_TABLE(float2, LOG2_TBL, 129) = {
+    (float2)(0x0.000000p+0f, 0x0.000000p+0f),
+    (float2)(0x1.6f8000p-7f, 0x1.942dbap-17f),
+    (float2)(0x1.6e0000p-6f, 0x1.e5a170p-16f),
+    (float2)(0x1.118000p-5f, 0x1.347544p-15f),
+    (float2)(0x1.6b8000p-5f, 0x1.69bac6p-16f),
+    (float2)(0x1.c48000p-5f, 0x1.7eae42p-15f),
+    (float2)(0x1.0e8000p-4f, 0x1.9c4fd0p-15f),
+    (float2)(0x1.3a8000p-4f, 0x1.17ee92p-15f),
+    (float2)(0x1.660000p-4f, 0x1.fb7d64p-15f),
+    (float2)(0x1.918000p-4f, 0x1.42dc8cp-17f),
+    (float2)(0x1.bc8000p-4f, 0x1.0902b6p-18f),
+    (float2)(0x1.e70000p-4f, 0x1.7608bep-15f),
+    (float2)(0x1.088000p-3f, 0x1.162336p-13f),
+    (float2)(0x1.1d8000p-3f, 0x1.3465d4p-13f),
+    (float2)(0x1.328000p-3f, 0x1.74f13cp-14f),
+    (float2)(0x1.470000p-3f, 0x1.aa7e60p-13f),
+    (float2)(0x1.5c0000p-3f, 0x1.a39fbcp-19f),
+    (float2)(0x1.700000p-3f, 0x1.d0b53ap-13f),
+    (float2)(0x1.848000p-3f, 0x1.0af40ap-13f),
+    (float2)(0x1.988000p-3f, 0x1.b741dep-13f),
+    (float2)(0x1.ac8000p-3f, 0x1.d78b6cp-13f),
+    (float2)(0x1.c08000p-3f, 0x1.6db376p-13f),
+    (float2)(0x1.d48000p-3f, 0x1.ee4c32p-15f),
+    (float2)(0x1.e80000p-3f, 0x1.02f9d2p-13f),
+    (float2)(0x1.fb8000p-3f, 0x1.05ae40p-13f),
+    (float2)(0x1.078000p-2f, 0x1.0adbb0p-14f),
+    (float2)(0x1.110000p-2f, 0x1.83ed68p-13f),
+    (float2)(0x1.1a8000p-2f, 0x1.016ca4p-12f),
+    (float2)(0x1.240000p-2f, 0x1.01eac2p-12f),
+    (float2)(0x1.2d8000p-2f, 0x1.887e26p-13f),
+    (float2)(0x1.370000p-2f, 0x1.24cea4p-14f),
+    (float2)(0x1.400000p-2f, 0x1.918ec6p-12f),
+    (float2)(0x1.498000p-2f, 0x1.3c25e6p-13f),
+    (float2)(0x1.528000p-2f, 0x1.6f7f12p-12f),
+    (float2)(0x1.5c0000p-2f, 0x1.a39fbcp-18f),
+    (float2)(0x1.650000p-2f, 0x1.8fe466p-14f),
+    (float2)(0x1.6e0000p-2f, 0x1.10e6cep-13f),
+    (float2)(0x1.770000p-2f, 0x1.d2ba7ep-14f),
+    (float2)(0x1.800000p-2f, 0x1.4ac62cp-15f),
+    (float2)(0x1.888000p-2f, 0x1.a71cb8p-12f),
+    (float2)(0x1.918000p-2f, 0x1.dd448ep-13f),
+    (float2)(0x1.9a8000p-2f, 0x1.1c8f10p-21f),
+    (float2)(0x1.a30000p-2f, 0x1.bb053ep-13f),
+    (float2)(0x1.ab8000p-2f, 0x1.861e5ep-12f),
+    (float2)(0x1.b40000p-2f, 0x1.fafdcep-12f),
+    (float2)(0x1.bd0000p-2f, 0x1.e5d3cep-15f),
+    (float2)(0x1.c58000p-2f, 0x1.2fad28p-14f),
+    (float2)(0x1.ce0000p-2f, 0x1.492474p-15f),
+    (float2)(0x1.d60000p-2f, 0x1.d4f80cp-12f),
+    (float2)(0x1.de8000p-2f, 0x1.4ff510p-12f),
+    (float2)(0x1.e70000p-2f, 0x1.3550f2p-13f),
+    (float2)(0x1.ef0000p-2f, 0x1.b59ccap-12f),
+    (float2)(0x1.f78000p-2f, 0x1.42b464p-13f),
+    (float2)(0x1.ff8000p-2f, 0x1.5e66a0p-12f),
+    (float2)(0x1.038000p-1f, 0x1.f6a2e4p-11f),
+    (float2)(0x1.080000p-1f, 0x1.39e4fep-14f),
+    (float2)(0x1.0c0000p-1f, 0x1.0500d6p-13f),
+    (float2)(0x1.100000p-1f, 0x1.13b152p-13f),
+    (float2)(0x1.140000p-1f, 0x1.93f542p-14f),
+    (float2)(0x1.180000p-1f, 0x1.467b94p-16f),
+    (float2)(0x1.1b8000p-1f, 0x1.cc47a4p-11f),
+    (float2)(0x1.1f8000p-1f, 0x1.78f4c2p-11f),
+    (float2)(0x1.238000p-1f, 0x1.107508p-11f),
+    (float2)(0x1.278000p-1f, 0x1.2602c2p-12f),
+    (float2)(0x1.2b8000p-1f, 0x1.a39fbcp-20f),
+    (float2)(0x1.2f0000p-1f, 0x1.5a1d7ap-11f),
+    (float2)(0x1.330000p-1f, 0x1.3e355ap-12f),
+    (float2)(0x1.368000p-1f, 0x1.cffedap-11f),
+    (float2)(0x1.3a8000p-1f, 0x1.d9fd50p-12f),
+    (float2)(0x1.3e0000p-1f, 0x1.f64de6p-11f),
+    (float2)(0x1.420000p-1f, 0x1.d83f4cp-12f),
+    (float2)(0x1.458000p-1f, 0x1.cea628p-11f),
+    (float2)(0x1.498000p-1f, 0x1.3c25e6p-12f),
+    (float2)(0x1.4d0000p-1f, 0x1.5a96ccp-11f),
+    (float2)(0x1.510000p-1f, 0x1.18708ap-17f),
+    (float2)(0x1.548000p-1f, 0x1.374652p-12f),
+    (float2)(0x1.580000p-1f, 0x1.2089a6p-11f),
+    (float2)(0x1.5b8000p-1f, 0x1.93432cp-11f),
+    (float2)(0x1.5f0000p-1f, 0x1.f3fd06p-11f),
+    (float2)(0x1.630000p-1f, 0x1.0b8f54p-13f),
+    (float2)(0x1.668000p-1f, 0x1.004722p-12f),
+    (float2)(0x1.6a0000p-1f, 0x1.57cf2cp-12f),
+    (float2)(0x1.6d8000p-1f, 0x1.8cb53ap-12f),
+    (float2)(0x1.710000p-1f, 0x1.9f4d8ap-12f),
+    (float2)(0x1.748000p-1f, 0x1.8feb26p-12f),
+    (float2)(0x1.780000p-1f, 0x1.5edfeep-12f),
+    (float2)(0x1.7b8000p-1f, 0x1.0c7c9ap-12f),
+    (float2)(0x1.7f0000p-1f, 0x1.322182p-13f),
+    (float2)(0x1.828000p-1f, 0x1.3ab7cep-18f),
+    (float2)(0x1.858000p-1f, 0x1.a82c2cp-11f),
+    (float2)(0x1.890000p-1f, 0x1.3dd2c0p-11f),
+    (float2)(0x1.8c8000p-1f, 0x1.871da4p-12f),
+    (float2)(0x1.900000p-1f, 0x1.cc2c00p-14f),
+    (float2)(0x1.930000p-1f, 0x1.9fdb68p-11f),
+    (float2)(0x1.968000p-1f, 0x1.ed6956p-12f),
+    (float2)(0x1.9a0000p-1f, 0x1.f1a760p-14f),
+    (float2)(0x1.9d0000p-1f, 0x1.767f54p-11f),
+    (float2)(0x1.a08000p-1f, 0x1.3f6d26p-12f),
+    (float2)(0x1.a38000p-1f, 0x1.b9fce2p-11f),
+    (float2)(0x1.a70000p-1f, 0x1.8ae816p-12f),
+    (float2)(0x1.aa0000p-1f, 0x1.c23d60p-11f),
+    (float2)(0x1.ad8000p-1f, 0x1.60f388p-12f),
+    (float2)(0x1.b08000p-1f, 0x1.9049aep-11f),
+    (float2)(0x1.b40000p-1f, 0x1.8734a8p-13f),
+    (float2)(0x1.b70000p-1f, 0x1.2523d4p-11f),
+    (float2)(0x1.ba0000p-1f, 0x1.da6ce6p-11f),
+    (float2)(0x1.bd8000p-1f, 0x1.038e62p-12f),
+    (float2)(0x1.c08000p-1f, 0x1.1b511ep-11f),
+    (float2)(0x1.c38000p-1f, 0x1.a728b8p-11f),
+    (float2)(0x1.c70000p-1f, 0x1.2b5d22p-14f),
+    (float2)(0x1.ca0000p-1f, 0x1.2c6e54p-12f),
+    (float2)(0x1.cd0000p-1f, 0x1.f35064p-12f),
+    (float2)(0x1.d00000p-1f, 0x1.4fdb48p-11f),
+    (float2)(0x1.d30000p-1f, 0x1.98ec9ep-11f),
+    (float2)(0x1.d60000p-1f, 0x1.d4f80cp-11f),
+    (float2)(0x1.d98000p-1f, 0x1.0643d6p-17f),
+    (float2)(0x1.dc8000p-1f, 0x1.33567ep-14f),
+    (float2)(0x1.df8000p-1f, 0x1.e0410cp-14f),
+    (float2)(0x1.e28000p-1f, 0x1.142e0ep-13f),
+    (float2)(0x1.e58000p-1f, 0x1.063c88p-13f),
+    (float2)(0x1.e88000p-1f, 0x1.8d66c4p-14f),
+    (float2)(0x1.eb8000p-1f, 0x1.57e32ap-15f),
+    (float2)(0x1.ee0000p-1f, 0x1.ed1c6cp-11f),
+    (float2)(0x1.f10000p-1f, 0x1.b8a076p-11f),
+    (float2)(0x1.f40000p-1f, 0x1.7822f2p-11f),
+    (float2)(0x1.f70000p-1f, 0x1.2bbc3ap-11f),
+    (float2)(0x1.fa0000p-1f, 0x1.a708bap-12f),
+    (float2)(0x1.fd0000p-1f, 0x1.be4c7ep-13f),
+    (float2)(0x1.000000p+0f, 0x0.000000p+0f)
+};
 
 DECLARE_TABLE(uchar, PIBITS_TBL, ) = {
     224, 241, 27, 193, 12, 88, 33, 116, 53, 126, 196, 126, 237, 175,
@@ -306,6 +437,7 @@ DECLARE_TABLE(uchar, PIBITS_TBL, ) = {
 
 TABLE_FUNCTION(float2, LOGE_TBL, loge_tbl);
 TABLE_FUNCTION(float, LOG_INV_TBL, log_inv_tbl);
+TABLE_FUNCTION(float2, LOG2_TBL, log2_tbl);
 
 uint4 TABLE_MANGLE(pibits_tbl)(size_t idx) {
     return *(__constant uint4 *)(PIBITS_TBL + idx);

Modified: libclc/trunk/generic/lib/math/tables.h
URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/generic/lib/math/tables.h?rev=243131&r1=243130&r2=243131&view=diff
==============================================================================
--- libclc/trunk/generic/lib/math/tables.h (original)
+++ libclc/trunk/generic/lib/math/tables.h Fri Jul 24 13:07:12 2015
@@ -40,6 +40,7 @@
 
 TABLE_FUNCTION_DECL(float2, loge_tbl);
 TABLE_FUNCTION_DECL(float, log_inv_tbl);
+TABLE_FUNCTION_DECL(float2, log2_tbl);
 TABLE_FUNCTION_DECL(uint4,  pibits_tbl);
 
 #ifdef cl_khr_fp64





More information about the cfe-commits mailing list