[Libclc-dev] [PATCH 12/15] Implement exp2 using OpenCL C rather than using an intrinsic
Tom Stellard
thomas.stellard at amd.com
Tue Apr 7 11:05:43 PDT 2015
Not all targets support the intrinsic, so it's better to have a
generic implementation which does not use it.
This exp2 implementation was ported from the AMD builtin library
and has been tested with piglit, OpenCV, and the ocl conformance tests.
---
generic/include/clc/math/exp2.h | 28 ++++++++++---
generic/include/clc/math/exp2.inc | 23 +++++++++++
generic/lib/SOURCES | 2 +
generic/lib/math/exp2.cl | 86 +++++++++++++++++++++++++++++++++++++++
generic/lib/math/exp_helper.cl | 69 +++++++++++++++++++++++++++++++
generic/lib/math/exp_helper.h | 29 +++++++++++++
generic/lib/math/tables.cl | 70 +++++++++++++++++++++++++++++++
generic/lib/math/tables.h | 2 +-
8 files changed, 303 insertions(+), 6 deletions(-)
create mode 100644 generic/include/clc/math/exp2.inc
create mode 100644 generic/lib/math/exp2.cl
create mode 100644 generic/lib/math/exp_helper.cl
create mode 100644 generic/lib/math/exp_helper.h
diff --git a/generic/include/clc/math/exp2.h b/generic/include/clc/math/exp2.h
index ec0dad2..14167e8 100644
--- a/generic/include/clc/math/exp2.h
+++ b/generic/include/clc/math/exp2.h
@@ -1,6 +1,24 @@
-#undef exp2
-#define exp2 __clc_exp2
+/*
+ * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
-#define __CLC_FUNCTION __clc_exp2
-#define __CLC_INTRINSIC "llvm.exp2"
-#include <clc/math/unary_intrin.inc>
+#define __CLC_BODY <clc/math/exp2.inc>
+#include <clc/math/gentype.inc>
diff --git a/generic/include/clc/math/exp2.inc b/generic/include/clc/math/exp2.inc
new file mode 100644
index 0000000..3ecaae6
--- /dev/null
+++ b/generic/include/clc/math/exp2.inc
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE exp2(__CLC_GENTYPE x);
diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
index 0e8c7d9..9b0986c 100644
--- a/generic/lib/SOURCES
+++ b/generic/lib/SOURCES
@@ -79,6 +79,8 @@ math/cospi.cl
math/ep_log.cl
math/erfc.cl
math/exp.cl
+math/exp_helper.cl
+math/exp2.cl
math/exp10.cl
math/fmax.cl
math/fmin.cl
diff --git a/generic/lib/math/exp2.cl b/generic/lib/math/exp2.cl
new file mode 100644
index 0000000..1ddccbd
--- /dev/null
+++ b/generic/lib/math/exp2.cl
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "math.h"
+#include "../clcmacro.h"
+
+_CLC_OVERLOAD _CLC_DEF float exp2(float x) {
+
+ // Reduce x
+ const float ln2HI = 0x1.62e300p-1f;
+ const float ln2LO = 0x1.2fefa2p-17f;
+
+ float t = rint(x);
+ int p = (int)t;
+ float tt = x - t;
+ float hi = tt * ln2HI;
+ float lo = tt * ln2LO;
+
+ // Evaluate poly
+ t = hi + lo;
+ tt = t*t;
+ float v = mad(tt,
+ -mad(tt,
+ mad(tt,
+ mad(tt,
+ mad(tt, 0x1.637698p-25f, -0x1.bbd41cp-20f),
+ 0x1.1566aap-14f),
+ -0x1.6c16c2p-9f),
+ 0x1.555556p-3f),
+ t);
+
+ float y = 1.0f - (((-lo) - MATH_DIVIDE(t * v, 2.0f - v)) - hi);
+
+ // Scale by 2^p
+ float r = as_float(as_int(y) + (p << 23));
+
+ const float ulim = 128.0f;
+ const float llim = -126.0f;
+
+ r = x < llim ? 0.0f : r;
+ r = x < ulim ? r : as_float(0x7f800000);
+ return isnan(x) ? x : r;
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, exp2, float)
+
+#ifdef cl_khr_fp64
+
+#include "exp_helper.h"
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_OVERLOAD _CLC_DEF double exp2(double x) {
+ const double R_LN2 = 0x1.62e42fefa39efp-1; // ln(2)
+ const double R_1_BY_64 = 1.0 / 64.0;
+
+ int n = convert_int(x * 64.0);
+ double r = R_LN2 * fma(-R_1_BY_64, (double)n, x);
+ return __clc_exp_helper(x, -1074.0, 1024.0, r, n);
+}
+
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, exp2, double)
+
+#endif
diff --git a/generic/lib/math/exp_helper.cl b/generic/lib/math/exp_helper.cl
new file mode 100644
index 0000000..046f306
--- /dev/null
+++ b/generic/lib/math/exp_helper.cl
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "math.h"
+#include "tables.h"
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_DEF double __clc_exp_helper(double x, double x_min, double x_max, double r, int n) {
+
+ int j = n & 0x3f;
+ int m = n >> 6;
+
+ // 6 term tail of Taylor expansion of e^r
+ double z2 = r * fma(r,
+ fma(r,
+ fma(r,
+ fma(r,
+ fma(r, 0x1.6c16c16c16c17p-10, 0x1.1111111111111p-7),
+ 0x1.5555555555555p-5),
+ 0x1.5555555555555p-3),
+ 0x1.0000000000000p-1),
+ 1.0);
+
+ double2 tv = USE_TABLE(two_to_jby64_ep_tbl, j);
+ z2 = fma(tv.s0 + tv.s1, z2, tv.s1) + tv.s0;
+
+ int small_value = (m < -1022) || ((m == -1022) && (z2 < 1.0));
+
+ int n1 = m >> 2;
+ int n2 = m-n1;
+ double z3= z2 * as_double(((long)n1 + 1023) << 52);
+ z3 *= as_double(((long)n2 + 1023) << 52);
+
+ z2 = ldexp(z2, m);
+ z2 = small_value ? z3: z2;
+
+ z2 = isnan(x) ? x : z2;
+
+ z2 = x > x_max ? as_double(PINFBITPATT_DP64) : z2;
+ z2 = x < x_min ? 0.0 : z2;
+
+ return z2;
+}
+
+#endif // cl_khr_fp64
diff --git a/generic/lib/math/exp_helper.h b/generic/lib/math/exp_helper.h
new file mode 100644
index 0000000..e6df2fd
--- /dev/null
+++ b/generic/lib/math/exp_helper.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+_CLC_DECL double __clc_exp_helper(double x, double x_min, double x_max, double r, int n);
+
+#endif
diff --git a/generic/lib/math/tables.cl b/generic/lib/math/tables.cl
index 090e64a..5a620ec 100644
--- a/generic/lib/math/tables.cl
+++ b/generic/lib/math/tables.cl
@@ -634,6 +634,76 @@ DECLARE_TABLE(double2, ATAN_JBY256_TBL, 241) = {
(double2)(0x1.921fb00000000p-1, 0x1.5110b4611a626p-23),
};
+DECLARE_TABLE(double2, TWO_TO_JBY64_EP, 64) = {
+ (double2)(0x1.0000000000000p+0, 0x0.0000000000000p+0),
+ (double2)(0x1.02c9a30000000p+0, 0x1.cef00c1dcdef9p-25),
+ (double2)(0x1.059b0d0000000p+0, 0x1.8ac2ba1d73e2ap-27),
+ (double2)(0x1.0874510000000p+0, 0x1.0eb37901186bep-25),
+ (double2)(0x1.0b55860000000p+0, 0x1.9f3121ec53172p-25),
+ (double2)(0x1.0e3ec30000000p+0, 0x1.69e8d10103a17p-27),
+ (double2)(0x1.11301d0000000p+0, 0x1.25b50a4ebbf1ap-32),
+ (double2)(0x1.1429aa0000000p+0, 0x1.d525bbf668203p-25),
+ (double2)(0x1.172b830000000p+0, 0x1.8faa2f5b9bef9p-25),
+ (double2)(0x1.1a35be0000000p+0, 0x1.6df96ea796d31p-25),
+ (double2)(0x1.1d48730000000p+0, 0x1.68b9aa7805b80p-28),
+ (double2)(0x1.2063b80000000p+0, 0x1.0c519ac771dd6p-25),
+ (double2)(0x1.2387a60000000p+0, 0x1.ceac470cd83f5p-25),
+ (double2)(0x1.26b4560000000p+0, 0x1.789f37495e99cp-26),
+ (double2)(0x1.29e9df0000000p+0, 0x1.47f7b84b09745p-26),
+ (double2)(0x1.2d285a0000000p+0, 0x1.b900c2d002475p-26),
+ (double2)(0x1.306fe00000000p+0, 0x1.4636e2a5bd1abp-25),
+ (double2)(0x1.33c08b0000000p+0, 0x1.320b7fa64e430p-27),
+ (double2)(0x1.371a730000000p+0, 0x1.ceaa72a9c5154p-26),
+ (double2)(0x1.3a7db30000000p+0, 0x1.3967fdba86f24p-26),
+ (double2)(0x1.3dea640000000p+0, 0x1.82468446b6824p-25),
+ (double2)(0x1.4160a20000000p+0, 0x1.f72e29f84325bp-28),
+ (double2)(0x1.44e0860000000p+0, 0x1.8624b40c4dbd0p-30),
+ (double2)(0x1.486a2b0000000p+0, 0x1.704f3404f068ep-26),
+ (double2)(0x1.4bfdad0000000p+0, 0x1.4d8a89c750e5ep-26),
+ (double2)(0x1.4f9b270000000p+0, 0x1.a74b29ab4cf62p-26),
+ (double2)(0x1.5342b50000000p+0, 0x1.a753e077c2a0fp-26),
+ (double2)(0x1.56f4730000000p+0, 0x1.ad49f699bb2c0p-26),
+ (double2)(0x1.5ab07d0000000p+0, 0x1.a90a852b19260p-25),
+ (double2)(0x1.5e76f10000000p+0, 0x1.6b48521ba6f93p-26),
+ (double2)(0x1.6247eb0000000p+0, 0x1.d2ac258f87d03p-31),
+ (double2)(0x1.6623880000000p+0, 0x1.2a91124893ecfp-27),
+ (double2)(0x1.6a09e60000000p+0, 0x1.9fcef32422cbep-26),
+ (double2)(0x1.6dfb230000000p+0, 0x1.8ca345de441c5p-25),
+ (double2)(0x1.71f75e0000000p+0, 0x1.1d8bee7ba46e1p-25),
+ (double2)(0x1.75feb50000000p+0, 0x1.9099f22fdba6ap-26),
+ (double2)(0x1.7a11470000000p+0, 0x1.f580c36bea881p-27),
+ (double2)(0x1.7e2f330000000p+0, 0x1.b3d398841740ap-26),
+ (double2)(0x1.8258990000000p+0, 0x1.2999c25159f11p-25),
+ (double2)(0x1.868d990000000p+0, 0x1.68925d901c83bp-25),
+ (double2)(0x1.8ace540000000p+0, 0x1.15506dadd3e2ap-27),
+ (double2)(0x1.8f1ae90000000p+0, 0x1.22aee6c57304ep-25),
+ (double2)(0x1.93737b0000000p+0, 0x1.9b8bc9e8a0387p-29),
+ (double2)(0x1.97d8290000000p+0, 0x1.fbc9c9f173d24p-25),
+ (double2)(0x1.9c49180000000p+0, 0x1.51f8480e3e235p-27),
+ (double2)(0x1.a0c6670000000p+0, 0x1.6bbcac96535b5p-25),
+ (double2)(0x1.a5503b0000000p+0, 0x1.1f12ae45a1224p-27),
+ (double2)(0x1.a9e6b50000000p+0, 0x1.5e7f6fd0fac90p-26),
+ (double2)(0x1.ae89f90000000p+0, 0x1.2b5a75abd0e69p-25),
+ (double2)(0x1.b33a2b0000000p+0, 0x1.09e2bf5ed7fa1p-25),
+ (double2)(0x1.b7f76f0000000p+0, 0x1.7daf237553d84p-27),
+ (double2)(0x1.bcc1e90000000p+0, 0x1.2f074891ee83dp-30),
+ (double2)(0x1.c199bd0000000p+0, 0x1.b0aa538444196p-25),
+ (double2)(0x1.c67f120000000p+0, 0x1.cafa29694426fp-25),
+ (double2)(0x1.cb720d0000000p+0, 0x1.9df20d22a0797p-25),
+ (double2)(0x1.d072d40000000p+0, 0x1.40f12f71a1e45p-25),
+ (double2)(0x1.d5818d0000000p+0, 0x1.9f7490e4bb40bp-25),
+ (double2)(0x1.da9e600000000p+0, 0x1.ed9942b84600dp-27),
+ (double2)(0x1.dfc9730000000p+0, 0x1.bdcdaf5cb4656p-27),
+ (double2)(0x1.e502ee0000000p+0, 0x1.e2cffd89cf44cp-26),
+ (double2)(0x1.ea4afa0000000p+0, 0x1.52486cc2c7b9dp-27),
+ (double2)(0x1.efa1be0000000p+0, 0x1.cc2b44eee3fa4p-25),
+ (double2)(0x1.f507650000000p+0, 0x1.6dc8a80ce9f09p-25),
+ (double2)(0x1.fa7c180000000p+0, 0x1.9e90d82e90a7ep-28)
+
+};
+
+
TABLE_FUNCTION(double2, ATAN_JBY256_TBL, atan_jby256_tbl);
+TABLE_FUNCTION(double2, TWO_TO_JBY64_EP, two_to_jby64_ep_tbl);
#endif // cl_khr_fp64
diff --git a/generic/lib/math/tables.h b/generic/lib/math/tables.h
index d09adf1..55ff853 100644
--- a/generic/lib/math/tables.h
+++ b/generic/lib/math/tables.h
@@ -48,5 +48,5 @@ TABLE_FUNCTION_DECL(uint4, pibits_tbl);
TABLE_FUNCTION_DECL(double2, ln_tbl);
TABLE_FUNCTION_DECL(double2, atan_jby256_tbl);
-
+TABLE_FUNCTION_DECL(double2, two_to_jby64_ep_tbl);
#endif // cl_khr_fp64
--
2.0.4
More information about the Libclc-dev
mailing list