[Libclc-dev] [PATCH 1/2] integer: Add popcount implementation using ctpop intrinsic

Aaron Watry via Libclc-dev libclc-dev at lists.llvm.org
Wed Sep 6 20:22:40 PDT 2017


Also copy/modify the unary_intrin.inc from math/ to make the
intrinsic declaration somewhat reusable.

Passes CL CTS integer_ops/test_integer_ops popcount tests for CL 1.2

Tested on GCN 1.0 (Pitcairn)

Signed-off-by: Aaron Watry <awatry at gmail.com>
---
 generic/include/clc/clc.h                    |  1 +
 generic/include/clc/integer/popcount.h       |  6 ++++++
 generic/include/clc/integer/unary_intrin.inc | 20 ++++++++++++++++++++
 3 files changed, 27 insertions(+)
 create mode 100644 generic/include/clc/integer/popcount.h
 create mode 100644 generic/include/clc/integer/unary_intrin.inc

diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h
index 1200511..a93c8ef 100644
--- a/generic/include/clc/clc.h
+++ b/generic/include/clc/clc.h
@@ -126,6 +126,7 @@
 #include <clc/integer/mad_sat.h>
 #include <clc/integer/mul24.h>
 #include <clc/integer/mul_hi.h>
+#include <clc/integer/popcount.h>
 #include <clc/integer/rhadd.h>
 #include <clc/integer/rotate.h>
 #include <clc/integer/sub_sat.h>
diff --git a/generic/include/clc/integer/popcount.h b/generic/include/clc/integer/popcount.h
new file mode 100644
index 0000000..99e4021
--- /dev/null
+++ b/generic/include/clc/integer/popcount.h
@@ -0,0 +1,6 @@
+#undef popcount
+#define popcount __clc_popcount
+
+#define __CLC_FUNCTION __clc_popcount
+#define __CLC_INTRINSIC "llvm.ctpop"
+#include <clc/integer/unary_intrin.inc>
diff --git a/generic/include/clc/integer/unary_intrin.inc b/generic/include/clc/integer/unary_intrin.inc
new file mode 100644
index 0000000..ee9862a
--- /dev/null
+++ b/generic/include/clc/integer/unary_intrin.inc
@@ -0,0 +1,20 @@
+#define __CLC_INTRINSIC_DEF(SCALAR_TYPE, BIT_SIZE) \
+_CLC_OVERLOAD SCALAR_TYPE __CLC_FUNCTION(SCALAR_TYPE x) __asm(__CLC_INTRINSIC ".i" BIT_SIZE); \
+_CLC_OVERLOAD SCALAR_TYPE##2 __CLC_FUNCTION(SCALAR_TYPE##2 x) __asm(__CLC_INTRINSIC ".v2i" BIT_SIZE); \
+_CLC_OVERLOAD SCALAR_TYPE##3 __CLC_FUNCTION(SCALAR_TYPE##3 x) __asm(__CLC_INTRINSIC ".v3i" BIT_SIZE); \
+_CLC_OVERLOAD SCALAR_TYPE##4 __CLC_FUNCTION(SCALAR_TYPE##4 x) __asm(__CLC_INTRINSIC ".v4i" BIT_SIZE); \
+_CLC_OVERLOAD SCALAR_TYPE##8 __CLC_FUNCTION(SCALAR_TYPE##8 x) __asm(__CLC_INTRINSIC ".v8i" BIT_SIZE); \
+_CLC_OVERLOAD SCALAR_TYPE##16 __CLC_FUNCTION(SCALAR_TYPE##16 x) __asm(__CLC_INTRINSIC ".v16i" BIT_SIZE);
+
+__CLC_INTRINSIC_DEF(char, "8")
+__CLC_INTRINSIC_DEF(uchar, "8")
+__CLC_INTRINSIC_DEF(short, "16")
+__CLC_INTRINSIC_DEF(ushort, "16")
+__CLC_INTRINSIC_DEF(int, "32")
+__CLC_INTRINSIC_DEF(uint, "32")
+__CLC_INTRINSIC_DEF(long, "64")
+__CLC_INTRINSIC_DEF(ulong, "64")
+
+#undef __CLC_FUNCTION
+#undef __CLC_INTRINSIC
+#undef __CLC_INTRINSIC_DEF
-- 
2.11.0



More information about the Libclc-dev mailing list