[Libclc-dev] [PATCH 1/2] integer: Add popcount implementation using ctpop intrinsic
Aaron Watry via Libclc-dev
libclc-dev at lists.llvm.org
Wed Sep 6 20:22:40 PDT 2017
Also copy/modify the unary_intrin.inc from math/ to make the
intrinsic declaration somewhat reusable.
Passes CL CTS integer_ops/test_integer_ops popcount tests for CL 1.2
Tested on GCN 1.0 (Pitcairn)
Signed-off-by: Aaron Watry <awatry at gmail.com>
---
generic/include/clc/clc.h | 1 +
generic/include/clc/integer/popcount.h | 6 ++++++
generic/include/clc/integer/unary_intrin.inc | 20 ++++++++++++++++++++
3 files changed, 27 insertions(+)
create mode 100644 generic/include/clc/integer/popcount.h
create mode 100644 generic/include/clc/integer/unary_intrin.inc
diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h
index 1200511..a93c8ef 100644
--- a/generic/include/clc/clc.h
+++ b/generic/include/clc/clc.h
@@ -126,6 +126,7 @@
#include <clc/integer/mad_sat.h>
#include <clc/integer/mul24.h>
#include <clc/integer/mul_hi.h>
+#include <clc/integer/popcount.h>
#include <clc/integer/rhadd.h>
#include <clc/integer/rotate.h>
#include <clc/integer/sub_sat.h>
diff --git a/generic/include/clc/integer/popcount.h b/generic/include/clc/integer/popcount.h
new file mode 100644
index 0000000..99e4021
--- /dev/null
+++ b/generic/include/clc/integer/popcount.h
@@ -0,0 +1,6 @@
+#undef popcount
+#define popcount __clc_popcount
+
+#define __CLC_FUNCTION __clc_popcount
+#define __CLC_INTRINSIC "llvm.ctpop"
+#include <clc/integer/unary_intrin.inc>
diff --git a/generic/include/clc/integer/unary_intrin.inc b/generic/include/clc/integer/unary_intrin.inc
new file mode 100644
index 0000000..ee9862a
--- /dev/null
+++ b/generic/include/clc/integer/unary_intrin.inc
@@ -0,0 +1,20 @@
+#define __CLC_INTRINSIC_DEF(SCALAR_TYPE, BIT_SIZE) \
+_CLC_OVERLOAD SCALAR_TYPE __CLC_FUNCTION(SCALAR_TYPE x) __asm(__CLC_INTRINSIC ".i" BIT_SIZE); \
+_CLC_OVERLOAD SCALAR_TYPE##2 __CLC_FUNCTION(SCALAR_TYPE##2 x) __asm(__CLC_INTRINSIC ".v2i" BIT_SIZE); \
+_CLC_OVERLOAD SCALAR_TYPE##3 __CLC_FUNCTION(SCALAR_TYPE##3 x) __asm(__CLC_INTRINSIC ".v3i" BIT_SIZE); \
+_CLC_OVERLOAD SCALAR_TYPE##4 __CLC_FUNCTION(SCALAR_TYPE##4 x) __asm(__CLC_INTRINSIC ".v4i" BIT_SIZE); \
+_CLC_OVERLOAD SCALAR_TYPE##8 __CLC_FUNCTION(SCALAR_TYPE##8 x) __asm(__CLC_INTRINSIC ".v8i" BIT_SIZE); \
+_CLC_OVERLOAD SCALAR_TYPE##16 __CLC_FUNCTION(SCALAR_TYPE##16 x) __asm(__CLC_INTRINSIC ".v16i" BIT_SIZE);
+
+__CLC_INTRINSIC_DEF(char, "8")
+__CLC_INTRINSIC_DEF(uchar, "8")
+__CLC_INTRINSIC_DEF(short, "16")
+__CLC_INTRINSIC_DEF(ushort, "16")
+__CLC_INTRINSIC_DEF(int, "32")
+__CLC_INTRINSIC_DEF(uint, "32")
+__CLC_INTRINSIC_DEF(long, "64")
+__CLC_INTRINSIC_DEF(ulong, "64")
+
+#undef __CLC_FUNCTION
+#undef __CLC_INTRINSIC
+#undef __CLC_INTRINSIC_DEF
--
2.11.0
More information about the Libclc-dev
mailing list