[Libclc-dev] [PATCH 1/1] log10: Use sw implementation from amd builtins
Jan Vesely via Libclc-dev
libclc-dev at lists.llvm.org
Mon Apr 23 11:16:20 PDT 2018
On Sat, 2018-04-21 at 19:58 +0000, Aaron Watry via Libclc-dev wrote:
> On Sat, Apr 21, 2018, 2:19 PM Jan Vesely <jan.vesely at rutgers.edu> wrote:
>
> > On Thu, 2018-04-19 at 21:27 -0500, Aaron Watry via Libclc-dev wrote:
> > > Reproduced the failures before, confirms CTS passes log10 after on my
> >
> > RX580
> >
> > It should also improve worst case ULP for the single precision log10
> > test.
> >
> > > Tested-By: Aaron Watry <awatry at gmail.com>
> >
> > thanks. do you plan to take a closer look for review later? or should I
> > go ahead and push this patch?
> >
>
> Just go ahead and push it. As far as I'm concerned it's fine. It builds
> and passes CTS, so the only quibbles I'd likely find with it would be
> stylistic given the table-based nature of the implementation.
Can I consider this acked by? I don't want to introduce precedent of
unreviewerd/acked commits now that were are close to clc 1.1
completion.
thanks,
Jan
>
> >
> > thanks,
> > Jan
> > >
> > > --Aaron
> > >
> > > On Thu, Apr 19, 2018 at 12:06 PM, Jan Vesely via Libclc-dev
> > > <libclc-dev at lists.llvm.org> wrote:
> > > > Add missing table.
> > > > Fixes log10D CTS on carrizo.
> > > > Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
> > > > ---
> > > > generic/lib/math/log10.cl | 39 ++++++++++++-
> > > > generic/lib/math/log10.inc | 13 -----
> > > > generic/lib/math/log_base.h | 6 +-
> > > > generic/lib/math/tables.cl | 133
> >
> > ++++++++++++++++++++++++++++++++++++++++++++
> > > > generic/lib/math/tables.h | 1 +
> > > > 5 files changed, 173 insertions(+), 19 deletions(-)
> > > > delete mode 100644 generic/lib/math/log10.inc
> > > >
> > > > diff --git a/generic/lib/math/log10.cl b/generic/lib/math/log10.cl
> > > > index 8216f9b..35a53a1 100644
> > > > --- a/generic/lib/math/log10.cl
> > > > +++ b/generic/lib/math/log10.cl
> > > > @@ -1,4 +1,39 @@
> > > > +/*
> > > > + * Copyright (c) 2015 Advanced Micro Devices, Inc.
> > > > + *
> > > > + * Permission is hereby granted, free of charge, to any person
> >
> > obtaining a copy
> > > > + * of this software and associated documentation files (the
> >
> > "Software"), to deal
> > > > + * in the Software without restriction, including without limitation
> >
> > the rights
> > > > + * to use, copy, modify, merge, publish, distribute, sublicense,
> >
> > and/or sell
> > > > + * copies of the Software, and to permit persons to whom the Software
> >
> > is
> > > > + * furnished to do so, subject to the following conditions:
> > > > + *
> > > > + * The above copyright notice and this permission notice shall be
> >
> > included in
> > > > + * all copies or substantial portions of the Software.
> > > > + *
> > > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> >
> > EXPRESS OR
> > > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> >
> > MERCHANTABILITY,
> > > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
> >
> > SHALL THE
> > > > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
> >
> > OTHER
> > > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> >
> > ARISING FROM,
> > > > + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> >
> > DEALINGS IN
> > > > + * THE SOFTWARE.
> > > > + */
> > > > +
> > > > #include <clc/clc.h>
> > > > +#include "../clcmacro.h"
> > > > +#include "tables.h"
> > > > +
> > > > +#ifdef cl_khr_fp64
> > > > +#pragma OPENCL EXTENSION cl_khr_fp64 : enable
> > > > +#endif // cl_khr_fp64
> > > > +
> > > > +#define COMPILING_LOG10
> > > > +#include "log_base.h"
> > > > +#undef COMPILING_LOG10
> > > > +
> > > > +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, log10, float);
> > > >
> > > > -#define __CLC_BODY <log10.inc>
> > > > -#include <clc/math/gentype.inc>
> > > > +#ifdef cl_khr_fp64
> > > > +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, log10, double);
> > > > +#endif // cl_khr_fp64
> > > > diff --git a/generic/lib/math/log10.inc b/generic/lib/math/log10.inc
> > > > deleted file mode 100644
> > > > index 423308a..0000000
> > > > --- a/generic/lib/math/log10.inc
> > > > +++ /dev/null
> > > > @@ -1,13 +0,0 @@
> > > > -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE log10(__CLC_GENTYPE val) {
> > > > - // log10(x) = log2(x) / log2(10)
> > > > - // 1 / log2(10) = 0.30102999566 = log10(2)
> > > > - // SP representation is 0.30103 (0x1.344136p-2)
> > > > - // DP representation is
> >
> > 0.301029995659999993762312442414(0x1.34413509E61D8p-2)
> > > > -#if __CLC_FPSIZE == 32
> > > > - return log2(val) * 0x1.344136p-2f;
> > > > -#elif __CLC_FPSIZE == 64
> > > > - return log2(val) * 0x1.34413509E61D8p-2;
> > > > -#else
> > > > -#error unknown _CLC_FPSIZE
> > > > -#endif
> > > > -}
> > > > diff --git a/generic/lib/math/log_base.h b/generic/lib/math/log_base.h
> > > > index bf2f82b..f5b6f1c 100644
> > > > --- a/generic/lib/math/log_base.h
> > > > +++ b/generic/lib/math/log_base.h
> > > > @@ -92,14 +92,12 @@ log(float x)
> > > > const float LOG2E_HEAD = 0x1.700000p+0f; // 1.4375
> > > > const float LOG2E_TAIL = 0x1.547652p-8f; // 0.00519504072
> > > > #elif defined(COMPILING_LOG10)
> > > > - USE_TABLE(float2, p_log, LOG10_TBL);
> > > > const float LOG10E = 0x1.bcb7b2p-2f; // 0.43429448190325182
> > > > const float LOG10E_HEAD = 0x1.bc0000p-2f; // 0.43359375
> > > > const float LOG10E_TAIL = 0x1.6f62a4p-11f; // 0.0007007319
> > > > const float LOG10_2_HEAD = 0x1.340000p-2f; // 0.30078125
> > > > const float LOG10_2_TAIL = 0x1.04d426p-12f; // 0.000248745637
> > > > #else
> > > > - USE_TABLE(float2, p_log, LOGE_TBL);
> > > > const float LOG2_HEAD = 0x1.62e000p-1f; // 0.693115234
> > > > const float LOG2_TAIL = 0x1.0bfbe8p-15f; // 0.0000319461833
> > > > #endif
> > > > @@ -158,11 +156,11 @@ log(float x)
> > > > z1 = tv.s0 + mf;
> > > > z2 = mad(poly, -LOG2E, tv.s1);
> > > > #elif defined(COMPILING_LOG10)
> > > > - float2 tv = p_log[indx];
> > > > + float2 tv = USE_TABLE(log10_tbl, indx);
> > > > z1 = mad(mf, LOG10_2_HEAD, tv.s0);
> > > > z2 = mad(poly, -LOG10E, mf*LOG10_2_TAIL) + tv.s1;
> > > > #else
> > > > - float2 tv = p_log[indx];
> > > > + float2 tv = USE_TABLE(log_tbl, indx);
> > > > z1 = mad(mf, LOG2_HEAD, tv.s0);
> > > > z2 = mad(mf, LOG2_TAIL, -poly) + tv.s1;
> > > > #endif
> > > > diff --git a/generic/lib/math/tables.cl b/generic/lib/math/tables.cl
> > > > index b72fddd..596487c 100644
> > > > --- a/generic/lib/math/tables.cl
> > > > +++ b/generic/lib/math/tables.cl
> > > > @@ -552,6 +552,138 @@ DECLARE_TABLE(float2, LOG2_TBL, 129) = {
> > > > (float2)(0x1.000000p+0f, 0x0.000000p+0f)
> > > > };
> > > >
> > > > +DECLARE_TABLE(float2, LOG10_TBL, 129) = {
> > > > + (float2)(0x0.000000p+0f, 0x0.000000p+0f),
> > > > + (float2)(0x1.ba8000p-9f, 0x1.f51c88p-19f),
> > > > + (float2)(0x1.b90000p-8f, 0x1.1da93ep-18f),
> > > > + (float2)(0x1.498000p-7f, 0x1.8428a2p-18f),
> > > > + (float2)(0x1.b58000p-7f, 0x1.a423acp-17f),
> > > > + (float2)(0x1.108000p-6f, 0x1.41d422p-17f),
> > > > + (float2)(0x1.458000p-6f, 0x1.d3d6b2p-16f),
> > > > + (float2)(0x1.7a8000p-6f, 0x1.70f7cep-16f),
> > > > + (float2)(0x1.af0000p-6f, 0x1.7e4ac0p-16f),
> > > > + (float2)(0x1.e38000p-6f, 0x1.ab2f40p-24f),
> > > > + (float2)(0x1.0b8000p-5f, 0x1.00d40ap-16f),
> > > > + (float2)(0x1.250000p-5f, 0x1.40b03ep-15f),
> > > > + (float2)(0x1.3e8000p-5f, 0x1.446668p-15f),
> > > > + (float2)(0x1.580000p-5f, 0x1.1c7758p-16f),
> > > > + (float2)(0x1.710000p-5f, 0x1.20d09ep-15f),
> > > > + (float2)(0x1.8a0000p-5f, 0x1.fd6f5cp-16f),
> > > > + (float2)(0x1.a30000p-5f, 0x1.53ac12p-18f),
> > > > + (float2)(0x1.bb8000p-5f, 0x1.4d02c6p-16f),
> > > > + (float2)(0x1.d40000p-5f, 0x1.d5164ep-17f),
> > > > + (float2)(0x1.ec0000p-5f, 0x1.991facp-15f),
> > > > + (float2)(0x1.020000p-4f, 0x1.0a307cp-14f),
> > > > + (float2)(0x1.0e0000p-4f, 0x1.e94ec0p-15f),
> > > > + (float2)(0x1.1a0000p-4f, 0x1.1a22a8p-15f),
> > > > + (float2)(0x1.258000p-4f, 0x1.d4857ap-14f),
> > > > + (float2)(0x1.318000p-4f, 0x1.982ae2p-15f),
> > > > + (float2)(0x1.3d0000p-4f, 0x1.74cd70p-14f),
> > > > + (float2)(0x1.488000p-4f, 0x1.cfb476p-14f),
> > > > + (float2)(0x1.540000p-4f, 0x1.ddcc64p-14f),
> > > > + (float2)(0x1.5f8000p-4f, 0x1.a01222p-14f),
> > > > + (float2)(0x1.6b0000p-4f, 0x1.177dbcp-14f),
> > > > + (float2)(0x1.768000p-4f, 0x1.140a24p-16f),
> > > > + (float2)(0x1.818000p-4f, 0x1.298f40p-14f),
> > > > + (float2)(0x1.8c8000p-4f, 0x1.c60e20p-14f),
> > > > + (float2)(0x1.980000p-4f, 0x1.b65052p-18f),
> > > > + (float2)(0x1.a30000p-4f, 0x1.53ac12p-17f),
> > > > + (float2)(0x1.ad8000p-4f, 0x1.f41d04p-14f),
> > > > + (float2)(0x1.b88000p-4f, 0x1.7934eap-14f),
> > > > + (float2)(0x1.c38000p-4f, 0x1.75252ep-15f),
> > > > + (float2)(0x1.ce0000p-4f, 0x1.b90790p-14f),
> > > > + (float2)(0x1.d90000p-4f, 0x1.d5866ap-16f),
> > > > + (float2)(0x1.e38000p-4f, 0x1.e0d586p-15f),
> > > > + (float2)(0x1.ee0000p-4f, 0x1.2ae984p-14f),
> > > > + (float2)(0x1.f88000p-4f, 0x1.25a0d0p-14f),
> > > > + (float2)(0x1.018000p-3f, 0x1.c2a064p-15f),
> > > > + (float2)(0x1.068000p-3f, 0x1.2f59e8p-13f),
> > > > + (float2)(0x1.0b8000p-3f, 0x1.cf424cp-13f),
> > > > + (float2)(0x1.110000p-3f, 0x1.42f080p-15f),
> > > > + (float2)(0x1.160000p-3f, 0x1.684156p-14f),
> > > > + (float2)(0x1.1b0000p-3f, 0x1.f38f64p-14f),
> > > > + (float2)(0x1.200000p-3f, 0x1.22077ap-13f),
> > > > + (float2)(0x1.250000p-3f, 0x1.2d34d6p-13f),
> > > > + (float2)(0x1.2a0000p-3f, 0x1.1ba328p-13f),
> > > > + (float2)(0x1.2f0000p-3f, 0x1.db48e2p-14f),
> > > > + (float2)(0x1.340000p-3f, 0x1.4712a0p-14f),
> > > > + (float2)(0x1.390000p-3f, 0x1.ed0894p-16f),
> > > > + (float2)(0x1.3d8000p-3f, 0x1.bc39b6p-13f),
> > > > + (float2)(0x1.428000p-3f, 0x1.1f9ff8p-13f),
> > > > + (float2)(0x1.478000p-3f, 0x1.a07d3ap-15f),
> > > > + (float2)(0x1.4c0000p-3f, 0x1.9601fap-13f),
> > > > + (float2)(0x1.510000p-3f, 0x1.532214p-14f),
> > > > + (float2)(0x1.558000p-3f, 0x1.a31462p-13f),
> > > > + (float2)(0x1.5a8000p-3f, 0x1.05a584p-14f),
> > > > + (float2)(0x1.5f0000p-3f, 0x1.4911c8p-13f),
> > > > + (float2)(0x1.638000p-3f, 0x1.f615fep-13f),
> > > > + (float2)(0x1.688000p-3f, 0x1.1445b0p-14f),
> > > > + (float2)(0x1.6d0000p-3f, 0x1.057abcp-13f),
> > > > + (float2)(0x1.718000p-3f, 0x1.685f0ap-13f),
> > > > + (float2)(0x1.760000p-3f, 0x1.b31022p-13f),
> > > > + (float2)(0x1.7a8000p-3f, 0x1.e5cd62p-13f),
> > > > + (float2)(0x1.7f8000p-3f, 0x1.aa6ca8p-22f),
> > > > + (float2)(0x1.840000p-3f, 0x1.1944bcp-19f),
> > > > + (float2)(0x1.880000p-3f, 0x1.f0b980p-13f),
> > > > + (float2)(0x1.8c8000p-3f, 0x1.c60e20p-13f),
> > > > + (float2)(0x1.910000p-3f, 0x1.849daep-13f),
> > > > + (float2)(0x1.958000p-3f, 0x1.2ca202p-13f),
> > > > + (float2)(0x1.9a0000p-3f, 0x1.7ca842p-14f),
> > > > + (float2)(0x1.9e8000p-3f, 0x1.cf6180p-16f),
> > > > + (float2)(0x1.a28000p-3f, 0x1.9fa186p-13f),
> > > > + (float2)(0x1.a70000p-3f, 0x1.df5554p-14f),
> > > > + (float2)(0x1.ab8000p-3f, 0x1.51eaccp-16f),
> > > > + (float2)(0x1.af8000p-3f, 0x1.4f8e88p-13f),
> > > > + (float2)(0x1.b40000p-3f, 0x1.7f49aap-15f),
> > > > + (float2)(0x1.b80000p-3f, 0x1.5b3c72p-13f),
> > > > + (float2)(0x1.bc8000p-3f, 0x1.07fd5cp-15f),
> > > > + (float2)(0x1.c08000p-3f, 0x1.144d18p-13f),
> > > > + (float2)(0x1.c48000p-3f, 0x1.d25700p-13f),
> > > > + (float2)(0x1.c90000p-3f, 0x1.f1369ep-15f),
> > > > + (float2)(0x1.cd0000p-3f, 0x1.1260fap-13f),
> > > > + (float2)(0x1.d10000p-3f, 0x1.94c038p-13f),
> > > > + (float2)(0x1.d58000p-3f, 0x1.ccfdb8p-20f),
> > > > + (float2)(0x1.d98000p-3f, 0x1.7c70dap-15f),
> > > > + (float2)(0x1.dd8000p-3f, 0x1.4ee87ap-14f),
> > > > + (float2)(0x1.e18000p-3f, 0x1.b99d86p-14f),
> > > > + (float2)(0x1.e58000p-3f, 0x1.feafc0p-14f),
> > > > + (float2)(0x1.e98000p-3f, 0x1.0f3b16p-13f),
> > > > + (float2)(0x1.ed8000p-3f, 0x1.0ca34cp-13f),
> > > > + (float2)(0x1.f18000p-3f, 0x1.ef75b2p-14f),
> > > > + (float2)(0x1.f58000p-3f, 0x1.a15704p-14f),
> > > > + (float2)(0x1.f98000p-3f, 0x1.2f3cfap-14f),
> > > > + (float2)(0x1.fd8000p-3f, 0x1.32f1dcp-15f),
> > > > + (float2)(0x1.008000p-2f, 0x1.f02d90p-13f),
> > > > + (float2)(0x1.028000p-2f, 0x1.821964p-13f),
> > > > + (float2)(0x1.048000p-2f, 0x1.02a708p-13f),
> > > > + (float2)(0x1.068000p-2f, 0x1.c7f450p-15f),
> > > > + (float2)(0x1.080000p-2f, 0x1.e820cap-12f),
> > > > + (float2)(0x1.0a0000p-2f, 0x1.8ecd14p-12f),
> > > > + (float2)(0x1.0c0000p-2f, 0x1.2d15f4p-12f),
> > > > + (float2)(0x1.0e0000p-2f, 0x1.861b72p-13f),
> > > > + (float2)(0x1.100000p-2f, 0x1.4319e6p-14f),
> > > > + (float2)(0x1.118000p-2f, 0x1.d6520ep-12f),
> > > > + (float2)(0x1.138000p-2f, 0x1.53c218p-12f),
> > > > + (float2)(0x1.158000p-2f, 0x1.925000p-13f),
> > > > + (float2)(0x1.178000p-2f, 0x1.b4a7a2p-15f),
> > > > + (float2)(0x1.190000p-2f, 0x1.9c19eep-12f),
> > > > + (float2)(0x1.1b0000p-2f, 0x1.f38f64p-13f),
> > > > + (float2)(0x1.1d0000p-2f, 0x1.3ebb32p-14f),
> > > > + (float2)(0x1.1e8000p-2f, 0x1.9ddf96p-12f),
> > > > + (float2)(0x1.208000p-2f, 0x1.c8d472p-13f),
> > > > + (float2)(0x1.228000p-2f, 0x1.1af536p-15f),
> > > > + (float2)(0x1.240000p-2f, 0x1.5acca0p-12f),
> > > > + (float2)(0x1.260000p-2f, 0x1.158770p-13f),
> > > > + (float2)(0x1.278000p-2f, 0x1.b35350p-12f),
> > > > + (float2)(0x1.298000p-2f, 0x1.a91532p-13f),
> > > > + (float2)(0x1.2b0000p-2f, 0x1.ee7896p-12f),
> > > > + (float2)(0x1.2d0000p-2f, 0x1.012c1cp-12f),
> > > > + (float2)(0x1.2f0000p-2f, 0x1.967ab4p-17f),
> > > > + (float2)(0x1.308000p-2f, 0x1.111e3cp-12f),
> > > > + (float2)(0x1.328000p-2f, 0x1.cf340ep-17f),
> > > > + (float2)(0x1.340000p-2f, 0x1.04d426p-12f),
> > > > +};
> > > > +
> > > > DECLARE_TABLE(uchar, PIBITS_TBL, ) = {
> > > > 224, 241, 27, 193, 12, 88, 33, 116, 53, 126, 196, 126, 237, 175,
> > > > 169, 75, 74, 41, 222, 231, 28, 244, 236, 197, 151, 175, 31,
> > > > @@ -880,6 +1012,7 @@ TABLE_FUNCTION(float2, LOGE_TBL, loge_tbl);
> > > > TABLE_FUNCTION(float, LOG_INV_TBL, log_inv_tbl);
> > > > TABLE_FUNCTION(float2, LOG_INV_TBL_EP, log_inv_tbl_ep);
> > > > TABLE_FUNCTION(float2, LOG2_TBL, log2_tbl);
> > > > +TABLE_FUNCTION(float2, LOG10_TBL, log10_tbl);
> > > >
> > > > uint4 TABLE_MANGLE(pibits_tbl)(size_t idx) {
> > > > return *(__constant uint4 *)(PIBITS_TBL + idx);
> > > > diff --git a/generic/lib/math/tables.h b/generic/lib/math/tables.h
> > > > index 8e1d773..8045242 100644
> > > > --- a/generic/lib/math/tables.h
> > > > +++ b/generic/lib/math/tables.h
> > > > @@ -42,6 +42,7 @@ TABLE_FUNCTION_DECL(float2, loge_tbl);
> > > > TABLE_FUNCTION_DECL(float, log_inv_tbl);
> > > > TABLE_FUNCTION_DECL(float2, log_inv_tbl_ep);
> > > > TABLE_FUNCTION_DECL(float2, log2_tbl);
> > > > +TABLE_FUNCTION_DECL(float2, log10_tbl);
> > > > TABLE_FUNCTION_DECL(uint4, pibits_tbl);
> > > > TABLE_FUNCTION_DECL(float2, sinhcosh_tbl);
> > > > TABLE_FUNCTION_DECL(float2, cbrt_tbl);
> > > > --
> > > > 2.14.3
> > > >
> > > > _______________________________________________
> > > > Libclc-dev mailing list
> > > > Libclc-dev at lists.llvm.org
> > > > http://lists.llvm.org/cgi-bin/mailman/listinfo/libclc-dev
> > >
> > > _______________________________________________
> > > Libclc-dev mailing list
> > > Libclc-dev at lists.llvm.org
> > > http://lists.llvm.org/cgi-bin/mailman/listinfo/libclc-dev
> >
> > --
> > Jan Vesely <jan.vesely at rutgers.edu>
>
> _______________________________________________
> Libclc-dev mailing list
> Libclc-dev at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/libclc-dev
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 488 bytes
Desc: This is a digitally signed message part
URL: <http://lists.llvm.org/pipermail/libclc-dev/attachments/20180423/1403e1e2/attachment.sig>
More information about the Libclc-dev
mailing list