[Libclc-dev] [PATCH 1/1] math: Don't use llvm intrinsic for pow
Jan Vesely via Libclc-dev
libclc-dev at lists.llvm.org
Wed Sep 27 11:55:04 PDT 2017
the intrinsic does not work for fp64
amdgpu targets expand the fp32 intrinsic into exp2(mul(log2)) anyway.
Fixes crash in pow(double, double).
fp32 version still hits the same precision failures in CTS as the intrinsic
implementation.
Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
---
generic/include/clc/math/pow.h | 8 ++------
generic/include/clc/math/pow.inc | 1 +
generic/lib/SOURCES | 1 +
generic/lib/math/pow.cl | 4 ++++
generic/lib/math/pow.inc | 10 ++++++++++
5 files changed, 18 insertions(+), 6 deletions(-)
create mode 100644 generic/include/clc/math/pow.inc
create mode 100644 generic/lib/math/pow.cl
create mode 100644 generic/lib/math/pow.inc
diff --git a/generic/include/clc/math/pow.h b/generic/include/clc/math/pow.h
index 320d341..a11dc05 100644
--- a/generic/include/clc/math/pow.h
+++ b/generic/include/clc/math/pow.h
@@ -1,6 +1,2 @@
-#undef pow
-#define pow __clc_pow
-
-#define __CLC_FUNCTION __clc_pow
-#define __CLC_INTRINSIC "llvm.pow"
-#include <clc/math/binary_intrin.inc>
+#define __CLC_BODY <clc/math/pow.inc>
+#include <clc/math/gentype.inc>
diff --git a/generic/include/clc/math/pow.inc b/generic/include/clc/math/pow.inc
new file mode 100644
index 0000000..1af72c1
--- /dev/null
+++ b/generic/include/clc/math/pow.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE pow(__CLC_GENTYPE x, __CLC_GENTYPE y);
diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
index f919bc7..285e2f4 100644
--- a/generic/lib/SOURCES
+++ b/generic/lib/SOURCES
@@ -130,6 +130,7 @@ math/native_log2.cl
math/tables.cl
math/clc_nextafter.cl
math/nextafter.cl
+math/pow.cl
math/pown.cl
math/sin.cl
math/sincos.cl
diff --git a/generic/lib/math/pow.cl b/generic/lib/math/pow.cl
new file mode 100644
index 0000000..884e335
--- /dev/null
+++ b/generic/lib/math/pow.cl
@@ -0,0 +1,4 @@
+#include <clc/clc.h>
+
+#define __CLC_BODY <pow.inc>
+#include <clc/math/gentype.inc>
diff --git a/generic/lib/math/pow.inc b/generic/lib/math/pow.inc
new file mode 100644
index 0000000..91df8fc
--- /dev/null
+++ b/generic/lib/math/pow.inc
@@ -0,0 +1,10 @@
+// TODO: enable when fp16 exp2 and log2 are implemented
+#if __CLC_FPSIZE > 16
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE pow(__CLC_GENTYPE x, __CLC_GENTYPE y) {
+#ifdef __CLC_SCALAR
+ if (isnan(x) || isnan(y))
+ return (__CLC_GENTYPE)NAN;
+#endif
+ return exp2(y * log2(x));
+}
+#endif
--
2.13.5
More information about the Libclc-dev
mailing list