[Libclc-dev] [PATCH 1/1] math: Don't use llvm intrinsic for pow

Wed Sep 27 11:55:04 PDT 2017

the intrinsic does not work for fp64
amdgpu targets expand the fp32 intrinsic into exp2(mul(log2)) anyway.

Fixes crash in pow(double, double).
fp32 version still hits the same precision failures in CTS as the intrinsic
implementation.

Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
---
 generic/include/clc/math/pow.h   |  8 ++------
 generic/include/clc/math/pow.inc |  1 +
 generic/lib/SOURCES              |  1 +
 generic/lib/math/pow.cl          |  4 ++++
 generic/lib/math/pow.inc         | 10 ++++++++++
 5 files changed, 18 insertions(+), 6 deletions(-)
 create mode 100644 generic/include/clc/math/pow.inc
 create mode 100644 generic/lib/math/pow.cl
 create mode 100644 generic/lib/math/pow.inc

diff --git a/generic/include/clc/math/pow.h b/generic/include/clc/math/pow.h
index 320d341..a11dc05 100644
--- a/generic/include/clc/math/pow.h
+++ b/generic/include/clc/math/pow.h
@@ -1,6 +1,2 @@
-#undef pow
-#define pow __clc_pow
-
-#define __CLC_FUNCTION __clc_pow
-#define __CLC_INTRINSIC "llvm.pow"
-#include <clc/math/binary_intrin.inc>
+#define __CLC_BODY <clc/math/pow.inc>
+#include <clc/math/gentype.inc>
diff --git a/generic/include/clc/math/pow.inc b/generic/include/clc/math/pow.inc
new file mode 100644
index 0000000..1af72c1
--- /dev/null
+++ b/generic/include/clc/math/pow.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE pow(__CLC_GENTYPE x, __CLC_GENTYPE y);
diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
index f919bc7..285e2f4 100644
--- a/generic/lib/SOURCES
+++ b/generic/lib/SOURCES
@@ -130,6 +130,7 @@ math/native_log2.cl
 math/tables.cl
 math/clc_nextafter.cl
 math/nextafter.cl
+math/pow.cl
 math/pown.cl
 math/sin.cl
 math/sincos.cl
diff --git a/generic/lib/math/pow.cl b/generic/lib/math/pow.cl
new file mode 100644
index 0000000..884e335
--- /dev/null
+++ b/generic/lib/math/pow.cl
@@ -0,0 +1,4 @@
+#include <clc/clc.h>
+
+#define __CLC_BODY <pow.inc>
+#include <clc/math/gentype.inc>
diff --git a/generic/lib/math/pow.inc b/generic/lib/math/pow.inc
new file mode 100644
index 0000000..91df8fc
--- /dev/null
+++ b/generic/lib/math/pow.inc
@@ -0,0 +1,10 @@
+// TODO: enable when fp16 exp2 and log2 are implemented
+#if __CLC_FPSIZE > 16
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE pow(__CLC_GENTYPE x, __CLC_GENTYPE y) {
+#ifdef __CLC_SCALAR
+	if (isnan(x) || isnan(y))
+		return (__CLC_GENTYPE)NAN;
+#endif
+	return exp2(y * log2(x));
+}
+#endif
-- 
2.13.5