[Libclc-dev] [PATCH 13/15] Use a more accurate implementation for exp
Jan Vesely
jan.vesely at rutgers.edu
Thu Apr 9 15:38:39 PDT 2015
I assume the original implementation fails the new piglit on SI.
The new test still passes on r600, I guess I should look for siimlar
breaking case.
LGTM
On Tue, 2015-04-07 at 18:05 +0000, Tom Stellard wrote:
> Using exp2(x * M_LOG2E_F) does not give us accurate enough results for
> OpenCL. If you look at the new exp implementation you'll see that
> it does multiply the input by M_LOG2E_F, but it still uses the original
> input in part of the calculation.
>
> This exp implementation was ported from the AMD builtin library
> and has been tested with piglit, OpenCV, and the ocl conformance tests.
> ---
> generic/lib/math/exp.cl | 88 ++++++++++++++++++++++++++++++++++++++++++++++--
> generic/lib/math/exp.inc | 10 ------
> 2 files changed, 85 insertions(+), 13 deletions(-)
> delete mode 100644 generic/lib/math/exp.inc
>
> diff --git a/generic/lib/math/exp.cl b/generic/lib/math/exp.cl
> index dbf4a93..37f693c 100644
> --- a/generic/lib/math/exp.cl
> +++ b/generic/lib/math/exp.cl
> @@ -1,8 +1,90 @@
> +/*
> + * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> #include <clc/clc.h>
>
> +#include "math.h"
> +#include "../clcmacro.h"
> +
> +_CLC_OVERLOAD _CLC_DEF float exp(float x) {
> +
> + // Reduce x
> + const float ln2HI = 0x1.62e300p-1f;
> + const float ln2LO = 0x1.2fefa2p-17f;
> + const float invln2 = 0x1.715476p+0f;
> +
> + float fhalF = x < 0.0f ? -0.5f : 0.5f;
> + int p = mad(x, invln2, fhalF);
> + float fp = (float)p;
> + float hi = mad(fp, -ln2HI, x); // t*ln2HI is exact here
> + float lo = -fp*ln2LO;
> +
> + // Evaluate poly
> + float t = hi + lo;
> + float tt = t*t;
> + float v = mad(tt,
> + -mad(tt,
> + mad(tt,
> + mad(tt,
> + mad(tt, 0x1.637698p-25f, -0x1.bbd41cp-20f),
> + 0x1.1566aap-14f),
> + -0x1.6c16c2p-9f),
> + 0x1.555556p-3f),
> + t);
> +
> + float y = 1.0f - (((-lo) - MATH_DIVIDE(t * v, 2.0f - v)) - hi);
> +
> + // Scale by 2^p
> + float r = as_float(as_int(y) + (p << 23));
> +
> + const float ulim = 0x1.62e430p+6f; // ln(largest_normal) = 88.72283905206835305366
> + const float llim = -0x1.5d589ep+6f; // ln(smallest_normal) = -87.33654475055310898657
> +
> + r = x < llim ? 0.0f : r;
> + r = x < ulim ? r : as_float(0x7f800000);
> + return isnan(x) ? x : r;
> +}
> +
> +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, exp, float)
> +
> #ifdef cl_khr_fp64
> +
> +#include "exp_helper.h"
> +
> #pragma OPENCL EXTENSION cl_khr_fp64 : enable
> -#endif
>
> -#define __CLC_BODY <exp.inc>
> -#include <clc/math/gentype.inc>
> +_CLC_OVERLOAD _CLC_DEF double exp(double x) {
> +
> + const double X_MIN = -0x1.74910d52d3051p+9; // -1075*ln(2)
> + const double X_MAX = 0x1.62e42fefa39efp+9; // 1024*ln(2)
> + const double R_64_BY_LOG2 = 0x1.71547652b82fep+6; // 64/ln(2)
> + const double R_LOG2_BY_64_LD = 0x1.62e42fefa0000p-7; // head ln(2)/64
> + const double R_LOG2_BY_64_TL = 0x1.cf79abc9e3b39p-46; // tail ln(2)/64
> +
> + int n = convert_int(x * R_64_BY_LOG2);
> + double r = fma(-R_LOG2_BY_64_TL, (double)n, fma(-R_LOG2_BY_64_LD, (double)n, x));
> + return __clc_exp_helper(x, X_MIN, X_MAX, r, n);
> +}
> +
> +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, exp, double)
> +
> +#endif
> diff --git a/generic/lib/math/exp.inc b/generic/lib/math/exp.inc
> deleted file mode 100644
> index 525fb59..0000000
> --- a/generic/lib/math/exp.inc
> +++ /dev/null
> @@ -1,10 +0,0 @@
> -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE exp(__CLC_GENTYPE val) {
> - // exp(x) = exp2(x * log2(e))
> -#if __CLC_FPSIZE == 32
> - return exp2(val * M_LOG2E_F);
> -#elif __CLC_FPSIZE == 64
> - return exp2(val * M_LOG2E);
> -#else
> -#error unknown _CLC_FPSIZE
> -#endif
> -}
--
Jan Vesely <jan.vesely at rutgers.edu>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part
URL: <http://lists.llvm.org/pipermail/libclc-dev/attachments/20150409/aa8118be/attachment.sig>
More information about the Libclc-dev
mailing list