[Libclc-dev] [PATCH v3 1/1] math: Don't use llvm intrinsic for pow

Jan Vesely via Libclc-dev libclc-dev at lists.llvm.org
Wed Sep 27 13:29:01 PDT 2017


the intrinsic does not work for fp64
amdgpu targets expand the fp32 intrinsic into exp2(mul(log2)) anyway.

v2: drop leftover development code
v3: enable cl_khr_fp64 in gentype.inc

Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
---
I really should wait for travis to complete all builds before sending
out patches.
Sorry for the noise.

Jan

 generic/include/clc/math/gentype.inc | 1 +
 generic/include/clc/math/pow.h       | 8 ++------
 generic/include/clc/math/pow.inc     | 1 +
 generic/lib/SOURCES                  | 1 +
 generic/lib/math/pow.cl              | 4 ++++
 generic/lib/math/pow.inc             | 6 ++++++
 6 files changed, 15 insertions(+), 6 deletions(-)
 create mode 100644 generic/include/clc/math/pow.inc
 create mode 100644 generic/lib/math/pow.cl
 create mode 100644 generic/lib/math/pow.inc

diff --git a/generic/include/clc/math/gentype.inc b/generic/include/clc/math/gentype.inc
index e6ffad1..37a98e6 100644
--- a/generic/include/clc/math/gentype.inc
+++ b/generic/include/clc/math/gentype.inc
@@ -54,6 +54,7 @@
 
 #ifndef __FLOAT_ONLY
 #ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
 #define __CLC_SCALAR_GENTYPE double
 #define __CLC_FPSIZE 64
 
diff --git a/generic/include/clc/math/pow.h b/generic/include/clc/math/pow.h
index 320d341..a11dc05 100644
--- a/generic/include/clc/math/pow.h
+++ b/generic/include/clc/math/pow.h
@@ -1,6 +1,2 @@
-#undef pow
-#define pow __clc_pow
-
-#define __CLC_FUNCTION __clc_pow
-#define __CLC_INTRINSIC "llvm.pow"
-#include <clc/math/binary_intrin.inc>
+#define __CLC_BODY <clc/math/pow.inc>
+#include <clc/math/gentype.inc>
diff --git a/generic/include/clc/math/pow.inc b/generic/include/clc/math/pow.inc
new file mode 100644
index 0000000..1af72c1
--- /dev/null
+++ b/generic/include/clc/math/pow.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE pow(__CLC_GENTYPE x, __CLC_GENTYPE y);
diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
index f919bc7..285e2f4 100644
--- a/generic/lib/SOURCES
+++ b/generic/lib/SOURCES
@@ -130,6 +130,7 @@ math/native_log2.cl
 math/tables.cl
 math/clc_nextafter.cl
 math/nextafter.cl
+math/pow.cl
 math/pown.cl
 math/sin.cl
 math/sincos.cl
diff --git a/generic/lib/math/pow.cl b/generic/lib/math/pow.cl
new file mode 100644
index 0000000..884e335
--- /dev/null
+++ b/generic/lib/math/pow.cl
@@ -0,0 +1,4 @@
+#include <clc/clc.h>
+
+#define __CLC_BODY <pow.inc>
+#include <clc/math/gentype.inc>
diff --git a/generic/lib/math/pow.inc b/generic/lib/math/pow.inc
new file mode 100644
index 0000000..e1030d7
--- /dev/null
+++ b/generic/lib/math/pow.inc
@@ -0,0 +1,6 @@
+// TODO: enable when fp16 exp2 and log2 are implemented
+#if __CLC_FPSIZE > 16
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE pow(__CLC_GENTYPE x, __CLC_GENTYPE y) {
+	return exp2(y * log2(x));
+}
+#endif
-- 
2.13.5



More information about the Libclc-dev mailing list