[Libclc-dev] [PATCH 1/1] log10: Use sw implementation from amd builtins

Aaron Watry via Libclc-dev libclc-dev at lists.llvm.org
Sat Apr 21 12:58:09 PDT 2018


On Sat, Apr 21, 2018, 2:19 PM Jan Vesely <jan.vesely at rutgers.edu> wrote:

> On Thu, 2018-04-19 at 21:27 -0500, Aaron Watry via Libclc-dev wrote:
> > Reproduced the failures before, confirms CTS passes log10 after on my
> RX580
>
> It should also improve worst case ULP for the single precision log10
> test.
>
> > Tested-By: Aaron Watry <awatry at gmail.com>
>
> thanks. do you plan to take a closer look for review later? or should I
> go ahead and push this patch?
>

Just go ahead and push it.  As far as I'm concerned it's fine.  It builds
and passes CTS, so the only quibbles I'd likely find with it would be
stylistic given the table-based nature of the implementation.

>
> thanks,
> Jan
> >
> > --Aaron
> >
> > On Thu, Apr 19, 2018 at 12:06 PM, Jan Vesely via Libclc-dev
> > <libclc-dev at lists.llvm.org> wrote:
> > > Add missing table.
> > > Fixes log10D CTS on carrizo.
> > > Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
> > > ---
> > >  generic/lib/math/log10.cl   |  39 ++++++++++++-
> > >  generic/lib/math/log10.inc  |  13 -----
> > >  generic/lib/math/log_base.h |   6 +-
> > >  generic/lib/math/tables.cl  | 133
> ++++++++++++++++++++++++++++++++++++++++++++
> > >  generic/lib/math/tables.h   |   1 +
> > >  5 files changed, 173 insertions(+), 19 deletions(-)
> > >  delete mode 100644 generic/lib/math/log10.inc
> > >
> > > diff --git a/generic/lib/math/log10.cl b/generic/lib/math/log10.cl
> > > index 8216f9b..35a53a1 100644
> > > --- a/generic/lib/math/log10.cl
> > > +++ b/generic/lib/math/log10.cl
> > > @@ -1,4 +1,39 @@
> > > +/*
> > > + * Copyright (c) 2015 Advanced Micro Devices, Inc.
> > > + *
> > > + * Permission is hereby granted, free of charge, to any person
> obtaining a copy
> > > + * of this software and associated documentation files (the
> "Software"), to deal
> > > + * in the Software without restriction, including without limitation
> the rights
> > > + * to use, copy, modify, merge, publish, distribute, sublicense,
> and/or sell
> > > + * copies of the Software, and to permit persons to whom the Software
> is
> > > + * furnished to do so, subject to the following conditions:
> > > + *
> > > + * The above copyright notice and this permission notice shall be
> included in
> > > + * all copies or substantial portions of the Software.
> > > + *
> > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
> SHALL THE
> > > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
> OTHER
> > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING FROM,
> > > + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> DEALINGS IN
> > > + * THE SOFTWARE.
> > > + */
> > > +
> > >  #include <clc/clc.h>
> > > +#include "../clcmacro.h"
> > > +#include "tables.h"
> > > +
> > > +#ifdef cl_khr_fp64
> > > +#pragma OPENCL EXTENSION cl_khr_fp64 : enable
> > > +#endif // cl_khr_fp64
> > > +
> > > +#define COMPILING_LOG10
> > > +#include "log_base.h"
> > > +#undef COMPILING_LOG10
> > > +
> > > +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, log10, float);
> > >
> > > -#define __CLC_BODY <log10.inc>
> > > -#include <clc/math/gentype.inc>
> > > +#ifdef cl_khr_fp64
> > > +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, log10, double);
> > > +#endif // cl_khr_fp64
> > > diff --git a/generic/lib/math/log10.inc b/generic/lib/math/log10.inc
> > > deleted file mode 100644
> > > index 423308a..0000000
> > > --- a/generic/lib/math/log10.inc
> > > +++ /dev/null
> > > @@ -1,13 +0,0 @@
> > > -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE log10(__CLC_GENTYPE val) {
> > > -  // log10(x) = log2(x) / log2(10)
> > > -  // 1 / log2(10) = 0.30102999566 = log10(2)
> > > -  // SP representation is 0.30103 (0x1.344136p-2)
> > > -  // DP representation is
> 0.301029995659999993762312442414(0x1.34413509E61D8p-2)
> > > -#if __CLC_FPSIZE == 32
> > > -  return log2(val) * 0x1.344136p-2f;
> > > -#elif __CLC_FPSIZE == 64
> > > -  return log2(val) * 0x1.34413509E61D8p-2;
> > > -#else
> > > -#error unknown _CLC_FPSIZE
> > > -#endif
> > > -}
> > > diff --git a/generic/lib/math/log_base.h b/generic/lib/math/log_base.h
> > > index bf2f82b..f5b6f1c 100644
> > > --- a/generic/lib/math/log_base.h
> > > +++ b/generic/lib/math/log_base.h
> > > @@ -92,14 +92,12 @@ log(float x)
> > >      const float LOG2E_HEAD = 0x1.700000p+0f; // 1.4375
> > >      const float LOG2E_TAIL = 0x1.547652p-8f; // 0.00519504072
> > >  #elif defined(COMPILING_LOG10)
> > > -    USE_TABLE(float2, p_log, LOG10_TBL);
> > >      const float LOG10E = 0x1.bcb7b2p-2f;        // 0.43429448190325182
> > >      const float LOG10E_HEAD = 0x1.bc0000p-2f;   // 0.43359375
> > >      const float LOG10E_TAIL = 0x1.6f62a4p-11f;  // 0.0007007319
> > >      const float LOG10_2_HEAD = 0x1.340000p-2f;  // 0.30078125
> > >      const float LOG10_2_TAIL = 0x1.04d426p-12f; // 0.000248745637
> > >  #else
> > > -    USE_TABLE(float2, p_log, LOGE_TBL);
> > >      const float LOG2_HEAD = 0x1.62e000p-1f;  // 0.693115234
> > >      const float LOG2_TAIL = 0x1.0bfbe8p-15f; // 0.0000319461833
> > >  #endif
> > > @@ -158,11 +156,11 @@ log(float x)
> > >      z1 = tv.s0 + mf;
> > >      z2 = mad(poly, -LOG2E, tv.s1);
> > >  #elif defined(COMPILING_LOG10)
> > > -    float2 tv = p_log[indx];
> > > +    float2 tv = USE_TABLE(log10_tbl, indx);
> > >      z1 = mad(mf, LOG10_2_HEAD, tv.s0);
> > >      z2 = mad(poly, -LOG10E, mf*LOG10_2_TAIL) + tv.s1;
> > >  #else
> > > -    float2 tv = p_log[indx];
> > > +    float2 tv = USE_TABLE(log_tbl, indx);
> > >      z1 = mad(mf, LOG2_HEAD, tv.s0);
> > >      z2 = mad(mf, LOG2_TAIL, -poly) + tv.s1;
> > >  #endif
> > > diff --git a/generic/lib/math/tables.cl b/generic/lib/math/tables.cl
> > > index b72fddd..596487c 100644
> > > --- a/generic/lib/math/tables.cl
> > > +++ b/generic/lib/math/tables.cl
> > > @@ -552,6 +552,138 @@ DECLARE_TABLE(float2, LOG2_TBL, 129) = {
> > >      (float2)(0x1.000000p+0f, 0x0.000000p+0f)
> > >  };
> > >
> > > +DECLARE_TABLE(float2, LOG10_TBL, 129) = {
> > > +    (float2)(0x0.000000p+0f, 0x0.000000p+0f),
> > > +    (float2)(0x1.ba8000p-9f, 0x1.f51c88p-19f),
> > > +    (float2)(0x1.b90000p-8f, 0x1.1da93ep-18f),
> > > +    (float2)(0x1.498000p-7f, 0x1.8428a2p-18f),
> > > +    (float2)(0x1.b58000p-7f, 0x1.a423acp-17f),
> > > +    (float2)(0x1.108000p-6f, 0x1.41d422p-17f),
> > > +    (float2)(0x1.458000p-6f, 0x1.d3d6b2p-16f),
> > > +    (float2)(0x1.7a8000p-6f, 0x1.70f7cep-16f),
> > > +    (float2)(0x1.af0000p-6f, 0x1.7e4ac0p-16f),
> > > +    (float2)(0x1.e38000p-6f, 0x1.ab2f40p-24f),
> > > +    (float2)(0x1.0b8000p-5f, 0x1.00d40ap-16f),
> > > +    (float2)(0x1.250000p-5f, 0x1.40b03ep-15f),
> > > +    (float2)(0x1.3e8000p-5f, 0x1.446668p-15f),
> > > +    (float2)(0x1.580000p-5f, 0x1.1c7758p-16f),
> > > +    (float2)(0x1.710000p-5f, 0x1.20d09ep-15f),
> > > +    (float2)(0x1.8a0000p-5f, 0x1.fd6f5cp-16f),
> > > +    (float2)(0x1.a30000p-5f, 0x1.53ac12p-18f),
> > > +    (float2)(0x1.bb8000p-5f, 0x1.4d02c6p-16f),
> > > +    (float2)(0x1.d40000p-5f, 0x1.d5164ep-17f),
> > > +    (float2)(0x1.ec0000p-5f, 0x1.991facp-15f),
> > > +    (float2)(0x1.020000p-4f, 0x1.0a307cp-14f),
> > > +    (float2)(0x1.0e0000p-4f, 0x1.e94ec0p-15f),
> > > +    (float2)(0x1.1a0000p-4f, 0x1.1a22a8p-15f),
> > > +    (float2)(0x1.258000p-4f, 0x1.d4857ap-14f),
> > > +    (float2)(0x1.318000p-4f, 0x1.982ae2p-15f),
> > > +    (float2)(0x1.3d0000p-4f, 0x1.74cd70p-14f),
> > > +    (float2)(0x1.488000p-4f, 0x1.cfb476p-14f),
> > > +    (float2)(0x1.540000p-4f, 0x1.ddcc64p-14f),
> > > +    (float2)(0x1.5f8000p-4f, 0x1.a01222p-14f),
> > > +    (float2)(0x1.6b0000p-4f, 0x1.177dbcp-14f),
> > > +    (float2)(0x1.768000p-4f, 0x1.140a24p-16f),
> > > +    (float2)(0x1.818000p-4f, 0x1.298f40p-14f),
> > > +    (float2)(0x1.8c8000p-4f, 0x1.c60e20p-14f),
> > > +    (float2)(0x1.980000p-4f, 0x1.b65052p-18f),
> > > +    (float2)(0x1.a30000p-4f, 0x1.53ac12p-17f),
> > > +    (float2)(0x1.ad8000p-4f, 0x1.f41d04p-14f),
> > > +    (float2)(0x1.b88000p-4f, 0x1.7934eap-14f),
> > > +    (float2)(0x1.c38000p-4f, 0x1.75252ep-15f),
> > > +    (float2)(0x1.ce0000p-4f, 0x1.b90790p-14f),
> > > +    (float2)(0x1.d90000p-4f, 0x1.d5866ap-16f),
> > > +    (float2)(0x1.e38000p-4f, 0x1.e0d586p-15f),
> > > +    (float2)(0x1.ee0000p-4f, 0x1.2ae984p-14f),
> > > +    (float2)(0x1.f88000p-4f, 0x1.25a0d0p-14f),
> > > +    (float2)(0x1.018000p-3f, 0x1.c2a064p-15f),
> > > +    (float2)(0x1.068000p-3f, 0x1.2f59e8p-13f),
> > > +    (float2)(0x1.0b8000p-3f, 0x1.cf424cp-13f),
> > > +    (float2)(0x1.110000p-3f, 0x1.42f080p-15f),
> > > +    (float2)(0x1.160000p-3f, 0x1.684156p-14f),
> > > +    (float2)(0x1.1b0000p-3f, 0x1.f38f64p-14f),
> > > +    (float2)(0x1.200000p-3f, 0x1.22077ap-13f),
> > > +    (float2)(0x1.250000p-3f, 0x1.2d34d6p-13f),
> > > +    (float2)(0x1.2a0000p-3f, 0x1.1ba328p-13f),
> > > +    (float2)(0x1.2f0000p-3f, 0x1.db48e2p-14f),
> > > +    (float2)(0x1.340000p-3f, 0x1.4712a0p-14f),
> > > +    (float2)(0x1.390000p-3f, 0x1.ed0894p-16f),
> > > +    (float2)(0x1.3d8000p-3f, 0x1.bc39b6p-13f),
> > > +    (float2)(0x1.428000p-3f, 0x1.1f9ff8p-13f),
> > > +    (float2)(0x1.478000p-3f, 0x1.a07d3ap-15f),
> > > +    (float2)(0x1.4c0000p-3f, 0x1.9601fap-13f),
> > > +    (float2)(0x1.510000p-3f, 0x1.532214p-14f),
> > > +    (float2)(0x1.558000p-3f, 0x1.a31462p-13f),
> > > +    (float2)(0x1.5a8000p-3f, 0x1.05a584p-14f),
> > > +    (float2)(0x1.5f0000p-3f, 0x1.4911c8p-13f),
> > > +    (float2)(0x1.638000p-3f, 0x1.f615fep-13f),
> > > +    (float2)(0x1.688000p-3f, 0x1.1445b0p-14f),
> > > +    (float2)(0x1.6d0000p-3f, 0x1.057abcp-13f),
> > > +    (float2)(0x1.718000p-3f, 0x1.685f0ap-13f),
> > > +    (float2)(0x1.760000p-3f, 0x1.b31022p-13f),
> > > +    (float2)(0x1.7a8000p-3f, 0x1.e5cd62p-13f),
> > > +    (float2)(0x1.7f8000p-3f, 0x1.aa6ca8p-22f),
> > > +    (float2)(0x1.840000p-3f, 0x1.1944bcp-19f),
> > > +    (float2)(0x1.880000p-3f, 0x1.f0b980p-13f),
> > > +    (float2)(0x1.8c8000p-3f, 0x1.c60e20p-13f),
> > > +    (float2)(0x1.910000p-3f, 0x1.849daep-13f),
> > > +    (float2)(0x1.958000p-3f, 0x1.2ca202p-13f),
> > > +    (float2)(0x1.9a0000p-3f, 0x1.7ca842p-14f),
> > > +    (float2)(0x1.9e8000p-3f, 0x1.cf6180p-16f),
> > > +    (float2)(0x1.a28000p-3f, 0x1.9fa186p-13f),
> > > +    (float2)(0x1.a70000p-3f, 0x1.df5554p-14f),
> > > +    (float2)(0x1.ab8000p-3f, 0x1.51eaccp-16f),
> > > +    (float2)(0x1.af8000p-3f, 0x1.4f8e88p-13f),
> > > +    (float2)(0x1.b40000p-3f, 0x1.7f49aap-15f),
> > > +    (float2)(0x1.b80000p-3f, 0x1.5b3c72p-13f),
> > > +    (float2)(0x1.bc8000p-3f, 0x1.07fd5cp-15f),
> > > +    (float2)(0x1.c08000p-3f, 0x1.144d18p-13f),
> > > +    (float2)(0x1.c48000p-3f, 0x1.d25700p-13f),
> > > +    (float2)(0x1.c90000p-3f, 0x1.f1369ep-15f),
> > > +    (float2)(0x1.cd0000p-3f, 0x1.1260fap-13f),
> > > +    (float2)(0x1.d10000p-3f, 0x1.94c038p-13f),
> > > +    (float2)(0x1.d58000p-3f, 0x1.ccfdb8p-20f),
> > > +    (float2)(0x1.d98000p-3f, 0x1.7c70dap-15f),
> > > +    (float2)(0x1.dd8000p-3f, 0x1.4ee87ap-14f),
> > > +    (float2)(0x1.e18000p-3f, 0x1.b99d86p-14f),
> > > +    (float2)(0x1.e58000p-3f, 0x1.feafc0p-14f),
> > > +    (float2)(0x1.e98000p-3f, 0x1.0f3b16p-13f),
> > > +    (float2)(0x1.ed8000p-3f, 0x1.0ca34cp-13f),
> > > +    (float2)(0x1.f18000p-3f, 0x1.ef75b2p-14f),
> > > +    (float2)(0x1.f58000p-3f, 0x1.a15704p-14f),
> > > +    (float2)(0x1.f98000p-3f, 0x1.2f3cfap-14f),
> > > +    (float2)(0x1.fd8000p-3f, 0x1.32f1dcp-15f),
> > > +    (float2)(0x1.008000p-2f, 0x1.f02d90p-13f),
> > > +    (float2)(0x1.028000p-2f, 0x1.821964p-13f),
> > > +    (float2)(0x1.048000p-2f, 0x1.02a708p-13f),
> > > +    (float2)(0x1.068000p-2f, 0x1.c7f450p-15f),
> > > +    (float2)(0x1.080000p-2f, 0x1.e820cap-12f),
> > > +    (float2)(0x1.0a0000p-2f, 0x1.8ecd14p-12f),
> > > +    (float2)(0x1.0c0000p-2f, 0x1.2d15f4p-12f),
> > > +    (float2)(0x1.0e0000p-2f, 0x1.861b72p-13f),
> > > +    (float2)(0x1.100000p-2f, 0x1.4319e6p-14f),
> > > +    (float2)(0x1.118000p-2f, 0x1.d6520ep-12f),
> > > +    (float2)(0x1.138000p-2f, 0x1.53c218p-12f),
> > > +    (float2)(0x1.158000p-2f, 0x1.925000p-13f),
> > > +    (float2)(0x1.178000p-2f, 0x1.b4a7a2p-15f),
> > > +    (float2)(0x1.190000p-2f, 0x1.9c19eep-12f),
> > > +    (float2)(0x1.1b0000p-2f, 0x1.f38f64p-13f),
> > > +    (float2)(0x1.1d0000p-2f, 0x1.3ebb32p-14f),
> > > +    (float2)(0x1.1e8000p-2f, 0x1.9ddf96p-12f),
> > > +    (float2)(0x1.208000p-2f, 0x1.c8d472p-13f),
> > > +    (float2)(0x1.228000p-2f, 0x1.1af536p-15f),
> > > +    (float2)(0x1.240000p-2f, 0x1.5acca0p-12f),
> > > +    (float2)(0x1.260000p-2f, 0x1.158770p-13f),
> > > +    (float2)(0x1.278000p-2f, 0x1.b35350p-12f),
> > > +    (float2)(0x1.298000p-2f, 0x1.a91532p-13f),
> > > +    (float2)(0x1.2b0000p-2f, 0x1.ee7896p-12f),
> > > +    (float2)(0x1.2d0000p-2f, 0x1.012c1cp-12f),
> > > +    (float2)(0x1.2f0000p-2f, 0x1.967ab4p-17f),
> > > +    (float2)(0x1.308000p-2f, 0x1.111e3cp-12f),
> > > +    (float2)(0x1.328000p-2f, 0x1.cf340ep-17f),
> > > +    (float2)(0x1.340000p-2f, 0x1.04d426p-12f),
> > > +};
> > > +
> > >  DECLARE_TABLE(uchar, PIBITS_TBL, ) = {
> > >      224, 241, 27, 193, 12, 88, 33, 116, 53, 126, 196, 126, 237, 175,
> > >      169, 75, 74, 41, 222, 231, 28, 244, 236, 197, 151, 175, 31,
> > > @@ -880,6 +1012,7 @@ TABLE_FUNCTION(float2, LOGE_TBL, loge_tbl);
> > >  TABLE_FUNCTION(float, LOG_INV_TBL, log_inv_tbl);
> > >  TABLE_FUNCTION(float2, LOG_INV_TBL_EP, log_inv_tbl_ep);
> > >  TABLE_FUNCTION(float2, LOG2_TBL, log2_tbl);
> > > +TABLE_FUNCTION(float2, LOG10_TBL, log10_tbl);
> > >
> > >  uint4 TABLE_MANGLE(pibits_tbl)(size_t idx) {
> > >      return *(__constant uint4 *)(PIBITS_TBL + idx);
> > > diff --git a/generic/lib/math/tables.h b/generic/lib/math/tables.h
> > > index 8e1d773..8045242 100644
> > > --- a/generic/lib/math/tables.h
> > > +++ b/generic/lib/math/tables.h
> > > @@ -42,6 +42,7 @@ TABLE_FUNCTION_DECL(float2, loge_tbl);
> > >  TABLE_FUNCTION_DECL(float, log_inv_tbl);
> > >  TABLE_FUNCTION_DECL(float2, log_inv_tbl_ep);
> > >  TABLE_FUNCTION_DECL(float2, log2_tbl);
> > > +TABLE_FUNCTION_DECL(float2, log10_tbl);
> > >  TABLE_FUNCTION_DECL(uint4,  pibits_tbl);
> > >  TABLE_FUNCTION_DECL(float2, sinhcosh_tbl);
> > >  TABLE_FUNCTION_DECL(float2, cbrt_tbl);
> > > --
> > > 2.14.3
> > >
> > > _______________________________________________
> > > Libclc-dev mailing list
> > > Libclc-dev at lists.llvm.org
> > > http://lists.llvm.org/cgi-bin/mailman/listinfo/libclc-dev
> >
> > _______________________________________________
> > Libclc-dev mailing list
> > Libclc-dev at lists.llvm.org
> > http://lists.llvm.org/cgi-bin/mailman/listinfo/libclc-dev
>
> --
> Jan Vesely <jan.vesely at rutgers.edu>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/libclc-dev/attachments/20180421/2a1a1222/attachment-0001.html>


More information about the Libclc-dev mailing list