[Libclc-dev] [PATCH 1/1] log10: Use sw implementation from amd builtins

Aaron Watry via Libclc-dev libclc-dev at lists.llvm.org
Thu Apr 19 19:27:50 PDT 2018


Reproduced the failures before, confirms CTS passes log10 after on my RX580

Tested-By: Aaron Watry <awatry at gmail.com>

--Aaron

On Thu, Apr 19, 2018 at 12:06 PM, Jan Vesely via Libclc-dev
<libclc-dev at lists.llvm.org> wrote:
> Add missing table.
> Fixes log10D CTS on carrizo.
> Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
> ---
>  generic/lib/math/log10.cl   |  39 ++++++++++++-
>  generic/lib/math/log10.inc  |  13 -----
>  generic/lib/math/log_base.h |   6 +-
>  generic/lib/math/tables.cl  | 133 ++++++++++++++++++++++++++++++++++++++++++++
>  generic/lib/math/tables.h   |   1 +
>  5 files changed, 173 insertions(+), 19 deletions(-)
>  delete mode 100644 generic/lib/math/log10.inc
>
> diff --git a/generic/lib/math/log10.cl b/generic/lib/math/log10.cl
> index 8216f9b..35a53a1 100644
> --- a/generic/lib/math/log10.cl
> +++ b/generic/lib/math/log10.cl
> @@ -1,4 +1,39 @@
> +/*
> + * Copyright (c) 2015 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
>  #include <clc/clc.h>
> +#include "../clcmacro.h"
> +#include "tables.h"
> +
> +#ifdef cl_khr_fp64
> +#pragma OPENCL EXTENSION cl_khr_fp64 : enable
> +#endif // cl_khr_fp64
> +
> +#define COMPILING_LOG10
> +#include "log_base.h"
> +#undef COMPILING_LOG10
> +
> +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, log10, float);
>
> -#define __CLC_BODY <log10.inc>
> -#include <clc/math/gentype.inc>
> +#ifdef cl_khr_fp64
> +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, log10, double);
> +#endif // cl_khr_fp64
> diff --git a/generic/lib/math/log10.inc b/generic/lib/math/log10.inc
> deleted file mode 100644
> index 423308a..0000000
> --- a/generic/lib/math/log10.inc
> +++ /dev/null
> @@ -1,13 +0,0 @@
> -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE log10(__CLC_GENTYPE val) {
> -  // log10(x) = log2(x) / log2(10)
> -  // 1 / log2(10) = 0.30102999566 = log10(2)
> -  // SP representation is 0.30103 (0x1.344136p-2)
> -  // DP representation is 0.301029995659999993762312442414(0x1.34413509E61D8p-2)
> -#if __CLC_FPSIZE == 32
> -  return log2(val) * 0x1.344136p-2f;
> -#elif __CLC_FPSIZE == 64
> -  return log2(val) * 0x1.34413509E61D8p-2;
> -#else
> -#error unknown _CLC_FPSIZE
> -#endif
> -}
> diff --git a/generic/lib/math/log_base.h b/generic/lib/math/log_base.h
> index bf2f82b..f5b6f1c 100644
> --- a/generic/lib/math/log_base.h
> +++ b/generic/lib/math/log_base.h
> @@ -92,14 +92,12 @@ log(float x)
>      const float LOG2E_HEAD = 0x1.700000p+0f; // 1.4375
>      const float LOG2E_TAIL = 0x1.547652p-8f; // 0.00519504072
>  #elif defined(COMPILING_LOG10)
> -    USE_TABLE(float2, p_log, LOG10_TBL);
>      const float LOG10E = 0x1.bcb7b2p-2f;        // 0.43429448190325182
>      const float LOG10E_HEAD = 0x1.bc0000p-2f;   // 0.43359375
>      const float LOG10E_TAIL = 0x1.6f62a4p-11f;  // 0.0007007319
>      const float LOG10_2_HEAD = 0x1.340000p-2f;  // 0.30078125
>      const float LOG10_2_TAIL = 0x1.04d426p-12f; // 0.000248745637
>  #else
> -    USE_TABLE(float2, p_log, LOGE_TBL);
>      const float LOG2_HEAD = 0x1.62e000p-1f;  // 0.693115234
>      const float LOG2_TAIL = 0x1.0bfbe8p-15f; // 0.0000319461833
>  #endif
> @@ -158,11 +156,11 @@ log(float x)
>      z1 = tv.s0 + mf;
>      z2 = mad(poly, -LOG2E, tv.s1);
>  #elif defined(COMPILING_LOG10)
> -    float2 tv = p_log[indx];
> +    float2 tv = USE_TABLE(log10_tbl, indx);
>      z1 = mad(mf, LOG10_2_HEAD, tv.s0);
>      z2 = mad(poly, -LOG10E, mf*LOG10_2_TAIL) + tv.s1;
>  #else
> -    float2 tv = p_log[indx];
> +    float2 tv = USE_TABLE(log_tbl, indx);
>      z1 = mad(mf, LOG2_HEAD, tv.s0);
>      z2 = mad(mf, LOG2_TAIL, -poly) + tv.s1;
>  #endif
> diff --git a/generic/lib/math/tables.cl b/generic/lib/math/tables.cl
> index b72fddd..596487c 100644
> --- a/generic/lib/math/tables.cl
> +++ b/generic/lib/math/tables.cl
> @@ -552,6 +552,138 @@ DECLARE_TABLE(float2, LOG2_TBL, 129) = {
>      (float2)(0x1.000000p+0f, 0x0.000000p+0f)
>  };
>
> +DECLARE_TABLE(float2, LOG10_TBL, 129) = {
> +    (float2)(0x0.000000p+0f, 0x0.000000p+0f),
> +    (float2)(0x1.ba8000p-9f, 0x1.f51c88p-19f),
> +    (float2)(0x1.b90000p-8f, 0x1.1da93ep-18f),
> +    (float2)(0x1.498000p-7f, 0x1.8428a2p-18f),
> +    (float2)(0x1.b58000p-7f, 0x1.a423acp-17f),
> +    (float2)(0x1.108000p-6f, 0x1.41d422p-17f),
> +    (float2)(0x1.458000p-6f, 0x1.d3d6b2p-16f),
> +    (float2)(0x1.7a8000p-6f, 0x1.70f7cep-16f),
> +    (float2)(0x1.af0000p-6f, 0x1.7e4ac0p-16f),
> +    (float2)(0x1.e38000p-6f, 0x1.ab2f40p-24f),
> +    (float2)(0x1.0b8000p-5f, 0x1.00d40ap-16f),
> +    (float2)(0x1.250000p-5f, 0x1.40b03ep-15f),
> +    (float2)(0x1.3e8000p-5f, 0x1.446668p-15f),
> +    (float2)(0x1.580000p-5f, 0x1.1c7758p-16f),
> +    (float2)(0x1.710000p-5f, 0x1.20d09ep-15f),
> +    (float2)(0x1.8a0000p-5f, 0x1.fd6f5cp-16f),
> +    (float2)(0x1.a30000p-5f, 0x1.53ac12p-18f),
> +    (float2)(0x1.bb8000p-5f, 0x1.4d02c6p-16f),
> +    (float2)(0x1.d40000p-5f, 0x1.d5164ep-17f),
> +    (float2)(0x1.ec0000p-5f, 0x1.991facp-15f),
> +    (float2)(0x1.020000p-4f, 0x1.0a307cp-14f),
> +    (float2)(0x1.0e0000p-4f, 0x1.e94ec0p-15f),
> +    (float2)(0x1.1a0000p-4f, 0x1.1a22a8p-15f),
> +    (float2)(0x1.258000p-4f, 0x1.d4857ap-14f),
> +    (float2)(0x1.318000p-4f, 0x1.982ae2p-15f),
> +    (float2)(0x1.3d0000p-4f, 0x1.74cd70p-14f),
> +    (float2)(0x1.488000p-4f, 0x1.cfb476p-14f),
> +    (float2)(0x1.540000p-4f, 0x1.ddcc64p-14f),
> +    (float2)(0x1.5f8000p-4f, 0x1.a01222p-14f),
> +    (float2)(0x1.6b0000p-4f, 0x1.177dbcp-14f),
> +    (float2)(0x1.768000p-4f, 0x1.140a24p-16f),
> +    (float2)(0x1.818000p-4f, 0x1.298f40p-14f),
> +    (float2)(0x1.8c8000p-4f, 0x1.c60e20p-14f),
> +    (float2)(0x1.980000p-4f, 0x1.b65052p-18f),
> +    (float2)(0x1.a30000p-4f, 0x1.53ac12p-17f),
> +    (float2)(0x1.ad8000p-4f, 0x1.f41d04p-14f),
> +    (float2)(0x1.b88000p-4f, 0x1.7934eap-14f),
> +    (float2)(0x1.c38000p-4f, 0x1.75252ep-15f),
> +    (float2)(0x1.ce0000p-4f, 0x1.b90790p-14f),
> +    (float2)(0x1.d90000p-4f, 0x1.d5866ap-16f),
> +    (float2)(0x1.e38000p-4f, 0x1.e0d586p-15f),
> +    (float2)(0x1.ee0000p-4f, 0x1.2ae984p-14f),
> +    (float2)(0x1.f88000p-4f, 0x1.25a0d0p-14f),
> +    (float2)(0x1.018000p-3f, 0x1.c2a064p-15f),
> +    (float2)(0x1.068000p-3f, 0x1.2f59e8p-13f),
> +    (float2)(0x1.0b8000p-3f, 0x1.cf424cp-13f),
> +    (float2)(0x1.110000p-3f, 0x1.42f080p-15f),
> +    (float2)(0x1.160000p-3f, 0x1.684156p-14f),
> +    (float2)(0x1.1b0000p-3f, 0x1.f38f64p-14f),
> +    (float2)(0x1.200000p-3f, 0x1.22077ap-13f),
> +    (float2)(0x1.250000p-3f, 0x1.2d34d6p-13f),
> +    (float2)(0x1.2a0000p-3f, 0x1.1ba328p-13f),
> +    (float2)(0x1.2f0000p-3f, 0x1.db48e2p-14f),
> +    (float2)(0x1.340000p-3f, 0x1.4712a0p-14f),
> +    (float2)(0x1.390000p-3f, 0x1.ed0894p-16f),
> +    (float2)(0x1.3d8000p-3f, 0x1.bc39b6p-13f),
> +    (float2)(0x1.428000p-3f, 0x1.1f9ff8p-13f),
> +    (float2)(0x1.478000p-3f, 0x1.a07d3ap-15f),
> +    (float2)(0x1.4c0000p-3f, 0x1.9601fap-13f),
> +    (float2)(0x1.510000p-3f, 0x1.532214p-14f),
> +    (float2)(0x1.558000p-3f, 0x1.a31462p-13f),
> +    (float2)(0x1.5a8000p-3f, 0x1.05a584p-14f),
> +    (float2)(0x1.5f0000p-3f, 0x1.4911c8p-13f),
> +    (float2)(0x1.638000p-3f, 0x1.f615fep-13f),
> +    (float2)(0x1.688000p-3f, 0x1.1445b0p-14f),
> +    (float2)(0x1.6d0000p-3f, 0x1.057abcp-13f),
> +    (float2)(0x1.718000p-3f, 0x1.685f0ap-13f),
> +    (float2)(0x1.760000p-3f, 0x1.b31022p-13f),
> +    (float2)(0x1.7a8000p-3f, 0x1.e5cd62p-13f),
> +    (float2)(0x1.7f8000p-3f, 0x1.aa6ca8p-22f),
> +    (float2)(0x1.840000p-3f, 0x1.1944bcp-19f),
> +    (float2)(0x1.880000p-3f, 0x1.f0b980p-13f),
> +    (float2)(0x1.8c8000p-3f, 0x1.c60e20p-13f),
> +    (float2)(0x1.910000p-3f, 0x1.849daep-13f),
> +    (float2)(0x1.958000p-3f, 0x1.2ca202p-13f),
> +    (float2)(0x1.9a0000p-3f, 0x1.7ca842p-14f),
> +    (float2)(0x1.9e8000p-3f, 0x1.cf6180p-16f),
> +    (float2)(0x1.a28000p-3f, 0x1.9fa186p-13f),
> +    (float2)(0x1.a70000p-3f, 0x1.df5554p-14f),
> +    (float2)(0x1.ab8000p-3f, 0x1.51eaccp-16f),
> +    (float2)(0x1.af8000p-3f, 0x1.4f8e88p-13f),
> +    (float2)(0x1.b40000p-3f, 0x1.7f49aap-15f),
> +    (float2)(0x1.b80000p-3f, 0x1.5b3c72p-13f),
> +    (float2)(0x1.bc8000p-3f, 0x1.07fd5cp-15f),
> +    (float2)(0x1.c08000p-3f, 0x1.144d18p-13f),
> +    (float2)(0x1.c48000p-3f, 0x1.d25700p-13f),
> +    (float2)(0x1.c90000p-3f, 0x1.f1369ep-15f),
> +    (float2)(0x1.cd0000p-3f, 0x1.1260fap-13f),
> +    (float2)(0x1.d10000p-3f, 0x1.94c038p-13f),
> +    (float2)(0x1.d58000p-3f, 0x1.ccfdb8p-20f),
> +    (float2)(0x1.d98000p-3f, 0x1.7c70dap-15f),
> +    (float2)(0x1.dd8000p-3f, 0x1.4ee87ap-14f),
> +    (float2)(0x1.e18000p-3f, 0x1.b99d86p-14f),
> +    (float2)(0x1.e58000p-3f, 0x1.feafc0p-14f),
> +    (float2)(0x1.e98000p-3f, 0x1.0f3b16p-13f),
> +    (float2)(0x1.ed8000p-3f, 0x1.0ca34cp-13f),
> +    (float2)(0x1.f18000p-3f, 0x1.ef75b2p-14f),
> +    (float2)(0x1.f58000p-3f, 0x1.a15704p-14f),
> +    (float2)(0x1.f98000p-3f, 0x1.2f3cfap-14f),
> +    (float2)(0x1.fd8000p-3f, 0x1.32f1dcp-15f),
> +    (float2)(0x1.008000p-2f, 0x1.f02d90p-13f),
> +    (float2)(0x1.028000p-2f, 0x1.821964p-13f),
> +    (float2)(0x1.048000p-2f, 0x1.02a708p-13f),
> +    (float2)(0x1.068000p-2f, 0x1.c7f450p-15f),
> +    (float2)(0x1.080000p-2f, 0x1.e820cap-12f),
> +    (float2)(0x1.0a0000p-2f, 0x1.8ecd14p-12f),
> +    (float2)(0x1.0c0000p-2f, 0x1.2d15f4p-12f),
> +    (float2)(0x1.0e0000p-2f, 0x1.861b72p-13f),
> +    (float2)(0x1.100000p-2f, 0x1.4319e6p-14f),
> +    (float2)(0x1.118000p-2f, 0x1.d6520ep-12f),
> +    (float2)(0x1.138000p-2f, 0x1.53c218p-12f),
> +    (float2)(0x1.158000p-2f, 0x1.925000p-13f),
> +    (float2)(0x1.178000p-2f, 0x1.b4a7a2p-15f),
> +    (float2)(0x1.190000p-2f, 0x1.9c19eep-12f),
> +    (float2)(0x1.1b0000p-2f, 0x1.f38f64p-13f),
> +    (float2)(0x1.1d0000p-2f, 0x1.3ebb32p-14f),
> +    (float2)(0x1.1e8000p-2f, 0x1.9ddf96p-12f),
> +    (float2)(0x1.208000p-2f, 0x1.c8d472p-13f),
> +    (float2)(0x1.228000p-2f, 0x1.1af536p-15f),
> +    (float2)(0x1.240000p-2f, 0x1.5acca0p-12f),
> +    (float2)(0x1.260000p-2f, 0x1.158770p-13f),
> +    (float2)(0x1.278000p-2f, 0x1.b35350p-12f),
> +    (float2)(0x1.298000p-2f, 0x1.a91532p-13f),
> +    (float2)(0x1.2b0000p-2f, 0x1.ee7896p-12f),
> +    (float2)(0x1.2d0000p-2f, 0x1.012c1cp-12f),
> +    (float2)(0x1.2f0000p-2f, 0x1.967ab4p-17f),
> +    (float2)(0x1.308000p-2f, 0x1.111e3cp-12f),
> +    (float2)(0x1.328000p-2f, 0x1.cf340ep-17f),
> +    (float2)(0x1.340000p-2f, 0x1.04d426p-12f),
> +};
> +
>  DECLARE_TABLE(uchar, PIBITS_TBL, ) = {
>      224, 241, 27, 193, 12, 88, 33, 116, 53, 126, 196, 126, 237, 175,
>      169, 75, 74, 41, 222, 231, 28, 244, 236, 197, 151, 175, 31,
> @@ -880,6 +1012,7 @@ TABLE_FUNCTION(float2, LOGE_TBL, loge_tbl);
>  TABLE_FUNCTION(float, LOG_INV_TBL, log_inv_tbl);
>  TABLE_FUNCTION(float2, LOG_INV_TBL_EP, log_inv_tbl_ep);
>  TABLE_FUNCTION(float2, LOG2_TBL, log2_tbl);
> +TABLE_FUNCTION(float2, LOG10_TBL, log10_tbl);
>
>  uint4 TABLE_MANGLE(pibits_tbl)(size_t idx) {
>      return *(__constant uint4 *)(PIBITS_TBL + idx);
> diff --git a/generic/lib/math/tables.h b/generic/lib/math/tables.h
> index 8e1d773..8045242 100644
> --- a/generic/lib/math/tables.h
> +++ b/generic/lib/math/tables.h
> @@ -42,6 +42,7 @@ TABLE_FUNCTION_DECL(float2, loge_tbl);
>  TABLE_FUNCTION_DECL(float, log_inv_tbl);
>  TABLE_FUNCTION_DECL(float2, log_inv_tbl_ep);
>  TABLE_FUNCTION_DECL(float2, log2_tbl);
> +TABLE_FUNCTION_DECL(float2, log10_tbl);
>  TABLE_FUNCTION_DECL(uint4,  pibits_tbl);
>  TABLE_FUNCTION_DECL(float2, sinhcosh_tbl);
>  TABLE_FUNCTION_DECL(float2, cbrt_tbl);
> --
> 2.14.3
>
> _______________________________________________
> Libclc-dev mailing list
> Libclc-dev at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/libclc-dev


More information about the Libclc-dev mailing list