[Libclc-dev] [PATCH 13/15] Use a more accurate implementation for exp

Thu Apr 9 15:38:39 PDT 2015

I assume the original implementation fails the new piglit on SI.
The new test still passes on r600, I guess I should look for siimlar
breaking case.

LGTM

On Tue, 2015-04-07 at 18:05 +0000, Tom Stellard wrote:
> Using exp2(x * M_LOG2E_F) does not give us accurate enough results for
> OpenCL.  If you look at the new exp implementation you'll see that
> it does multiply the input by M_LOG2E_F, but it still uses the original
> input in part of the calculation.
> 
> This exp implementation was ported from the AMD builtin library
> and has been tested with piglit, OpenCV, and the ocl conformance tests.
> ---
>  generic/lib/math/exp.cl  | 88 ++++++++++++++++++++++++++++++++++++++++++++++--
>  generic/lib/math/exp.inc | 10 ------
>  2 files changed, 85 insertions(+), 13 deletions(-)
>  delete mode 100644 generic/lib/math/exp.inc
> 
> diff --git a/generic/lib/math/exp.cl b/generic/lib/math/exp.cl
> index dbf4a93..37f693c 100644
> --- a/generic/lib/math/exp.cl
> +++ b/generic/lib/math/exp.cl
> @@ -1,8 +1,90 @@
> +/*
> + * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
>  #include <clc/clc.h>
>  
> +#include "math.h"
> +#include "../clcmacro.h"
> +
> +_CLC_OVERLOAD _CLC_DEF float exp(float x) {
> +
> +    // Reduce x
> +    const float ln2HI = 0x1.62e300p-1f;
> +    const float ln2LO = 0x1.2fefa2p-17f;
> +    const float invln2 = 0x1.715476p+0f;
> +
> +    float fhalF = x < 0.0f ? -0.5f : 0.5f;
> +    int p  = mad(x, invln2, fhalF);
> +    float fp = (float)p;
> +    float hi = mad(fp, -ln2HI, x); // t*ln2HI is exact here
> +    float lo = -fp*ln2LO;
> +
> +    // Evaluate poly
> +    float t = hi + lo;
> +    float tt  = t*t;
> +    float v = mad(tt,
> +                  -mad(tt,
> +                       mad(tt,
> +                           mad(tt,
> +                               mad(tt, 0x1.637698p-25f, -0x1.bbd41cp-20f),
> +                               0x1.1566aap-14f),
> +                           -0x1.6c16c2p-9f),
> +                       0x1.555556p-3f),
> +                  t);
> +
> +    float y = 1.0f - (((-lo) - MATH_DIVIDE(t * v, 2.0f - v)) - hi);
> +
> +    // Scale by 2^p
> +    float r =  as_float(as_int(y) + (p << 23));
> +
> +    const float ulim =  0x1.62e430p+6f; // ln(largest_normal) = 88.72283905206835305366
> +    const float llim = -0x1.5d589ep+6f; // ln(smallest_normal) = -87.33654475055310898657
> +
> +    r = x < llim ? 0.0f : r;
> +    r = x < ulim ? r : as_float(0x7f800000);
> +    return isnan(x) ? x : r;
> +}
> +
> +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, exp, float)
> +
>  #ifdef cl_khr_fp64
> +
> +#include "exp_helper.h"
> +
>  #pragma OPENCL EXTENSION cl_khr_fp64 : enable
> -#endif
>  
> -#define __CLC_BODY <exp.inc>
> -#include <clc/math/gentype.inc>
> +_CLC_OVERLOAD _CLC_DEF double exp(double x) {
> +
> +    const double X_MIN = -0x1.74910d52d3051p+9; // -1075*ln(2)
> +    const double X_MAX = 0x1.62e42fefa39efp+9; // 1024*ln(2)
> +    const double R_64_BY_LOG2 = 0x1.71547652b82fep+6; // 64/ln(2)
> +    const double R_LOG2_BY_64_LD = 0x1.62e42fefa0000p-7; // head ln(2)/64
> +    const double R_LOG2_BY_64_TL = 0x1.cf79abc9e3b39p-46; // tail ln(2)/64
> +
> +    int n = convert_int(x * R_64_BY_LOG2);
> +    double r = fma(-R_LOG2_BY_64_TL, (double)n, fma(-R_LOG2_BY_64_LD, (double)n, x));
> +    return __clc_exp_helper(x, X_MIN, X_MAX, r, n);
> +}
> +
> +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, exp, double)
> +
> +#endif
> diff --git a/generic/lib/math/exp.inc b/generic/lib/math/exp.inc
> deleted file mode 100644
> index 525fb59..0000000
> --- a/generic/lib/math/exp.inc
> +++ /dev/null
> @@ -1,10 +0,0 @@
> -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE exp(__CLC_GENTYPE val) {
> -  // exp(x) = exp2(x * log2(e))
> -#if __CLC_FPSIZE == 32
> -  return exp2(val * M_LOG2E_F);
> -#elif __CLC_FPSIZE == 64
> -  return exp2(val * M_LOG2E);
> -#else
> -#error unknown _CLC_FPSIZE
> -#endif
> -}

-- 
Jan Vesely <jan.vesely at rutgers.edu>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part
URL: <http://lists.llvm.org/pipermail/libclc-dev/attachments/20150409/aa8118be/attachment.sig>