[Libclc-dev] [PATCH] relational: Implement shuffle2 builtin

Aaron Watry via Libclc-dev libclc-dev at lists.llvm.org
Sun Jun 11 20:30:49 PDT 2017


This was added in CL 1.1

Tested with a Radeon HD 7850 (Pitcairn) using the CL CTS via:
test_conformance/relationals/test_relationals shuffle_built_in_dual_input

Signed-off-by: Aaron Watry <awatry at gmail.com>
---
 generic/include/clc/clc.h                 |   1 +
 generic/include/clc/relational/shuffle2.h |  44 +++++++++
 generic/lib/SOURCES                       |   1 +
 generic/lib/relational/shuffle2.cl        | 156 ++++++++++++++++++++++++++++++
 4 files changed, 202 insertions(+)
 create mode 100644 generic/include/clc/relational/shuffle2.h
 create mode 100644 generic/lib/relational/shuffle2.cl

diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h
index ac1dab5..8ec1c3d 100644
--- a/generic/include/clc/clc.h
+++ b/generic/include/clc/clc.h
@@ -174,6 +174,7 @@
 #include <clc/relational/isunordered.h>
 #include <clc/relational/select.h>
 #include <clc/relational/shuffle.h>
+#include <clc/relational/shuffle2.h>
 #include <clc/relational/signbit.h>
 
 /* 6.11.8 Synchronization Functions */
diff --git a/generic/include/clc/relational/shuffle2.h b/generic/include/clc/relational/shuffle2.h
new file mode 100644
index 0000000..7fb9fd6
--- /dev/null
+++ b/generic/include/clc/relational/shuffle2.h
@@ -0,0 +1,44 @@
+//===-- generic/include/clc/relational/shuffle2.h ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under both the University of Illinois Open Source
+// License and the MIT license. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define _CLC_SHUFFLE2_DECL(TYPE, MASKTYPE, RETTYPE) \
+  _CLC_OVERLOAD _CLC_DECL RETTYPE shuffle2(TYPE x, TYPE y, MASKTYPE mask);
+
+//Return type is same base type as the input type, with the same vector size as the mask.
+//Elements in the mask must be the same size (number of bits) as the input value.
+//E.g. char8 ret = shuffle2(char2 x, char2 y, uchar8 mask);
+
+#define _CLC_VECTOR_SHUFFLE2_MASKSIZE(INBASE, INTYPE, MASKTYPE) \
+  _CLC_SHUFFLE2_DECL(INTYPE, MASKTYPE##2, INBASE##2) \
+  _CLC_SHUFFLE2_DECL(INTYPE, MASKTYPE##4, INBASE##4) \
+  _CLC_SHUFFLE2_DECL(INTYPE, MASKTYPE##8, INBASE##8) \
+  _CLC_SHUFFLE2_DECL(INTYPE, MASKTYPE##16, INBASE##16) \
+
+#define _CLC_VECTOR_SHUFFLE2_INSIZE(TYPE, MASKTYPE) \
+  _CLC_VECTOR_SHUFFLE2_MASKSIZE(TYPE, TYPE##2, MASKTYPE) \
+  _CLC_VECTOR_SHUFFLE2_MASKSIZE(TYPE, TYPE##4, MASKTYPE) \
+  _CLC_VECTOR_SHUFFLE2_MASKSIZE(TYPE, TYPE##8, MASKTYPE) \
+  _CLC_VECTOR_SHUFFLE2_MASKSIZE(TYPE, TYPE##16, MASKTYPE) \
+
+_CLC_VECTOR_SHUFFLE2_INSIZE(char, uchar)
+_CLC_VECTOR_SHUFFLE2_INSIZE(short, ushort)
+_CLC_VECTOR_SHUFFLE2_INSIZE(int, uint)
+_CLC_VECTOR_SHUFFLE2_INSIZE(long, ulong)
+_CLC_VECTOR_SHUFFLE2_INSIZE(uchar, uchar)
+_CLC_VECTOR_SHUFFLE2_INSIZE(ushort, ushort)
+_CLC_VECTOR_SHUFFLE2_INSIZE(uint, uint)
+_CLC_VECTOR_SHUFFLE2_INSIZE(ulong, ulong)
+_CLC_VECTOR_SHUFFLE2_INSIZE(float, uint)
+#ifdef cl_khr_fp64
+_CLC_VECTOR_SHUFFLE2_INSIZE(double, ulong)
+#endif
+
+#undef _CLC_SHUFFLE_DECL
+#undef _CLC_VECTOR_SHUFFLE2_MASKSIZE
+#undef _CLC_VECTOR_SHUFFLE2_INSIZE
diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
index fe0df5a..c5c0624 100644
--- a/generic/lib/SOURCES
+++ b/generic/lib/SOURCES
@@ -140,6 +140,7 @@ relational/isnotequal.cl
 relational/isordered.cl
 relational/isunordered.cl
 relational/shuffle.cl
+relational/shuffle2.cl
 relational/signbit.cl
 shared/clamp.cl
 shared/max.cl
diff --git a/generic/lib/relational/shuffle2.cl b/generic/lib/relational/shuffle2.cl
new file mode 100644
index 0000000..42bfc86
--- /dev/null
+++ b/generic/lib/relational/shuffle2.cl
@@ -0,0 +1,156 @@
+//===-- generic/lib/relational/shuffle2.cl ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under both the University of Illinois Open Source
+// License and the MIT license. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/clc.h>
+
+#define _CLC_ELEMENT_CASES2(VAR) \
+    case 0: return VAR.s0; \
+    case 1: return VAR.s1;
+
+#define _CLC_ELEMENT_CASES4(VAR) \
+    _CLC_ELEMENT_CASES2(VAR) \
+    case 2: return VAR.s2; \
+    case 3: return VAR.s3;
+
+#define _CLC_ELEMENT_CASES8(VAR) \
+    _CLC_ELEMENT_CASES4(VAR) \
+    case 4: return VAR.s4; \
+    case 5: return VAR.s5; \
+    case 6: return VAR.s6; \
+    case 7: return VAR.s7;
+
+#define _CLC_ELEMENT_CASES16(VAR) \
+    _CLC_ELEMENT_CASES8(VAR) \
+    case 8: return VAR.s8; \
+    case 9: return VAR.s9; \
+    case 10: return VAR.sA; \
+    case 11: return VAR.sB; \
+    case 12: return VAR.sC; \
+    case 13: return VAR.sD; \
+    case 14: return VAR.sE; \
+    case 15: return VAR.sF;
+
+#define _CLC_GET_ELEMENT_DEFINE(ARGTYPE, ARGSIZE, IDXTYPE) \
+     __attribute__((always_inline)) \
+     ARGTYPE __clc_get_el_##ARGTYPE##ARGSIZE##_##IDXTYPE(ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, IDXTYPE idx) {\
+        if (idx < ARGSIZE) \
+            switch (idx){ \
+                _CLC_ELEMENT_CASES##ARGSIZE(x) \
+                default: return 0; \
+            } \
+        else \
+            switch (idx - ARGSIZE){ \
+                _CLC_ELEMENT_CASES##ARGSIZE(y) \
+                default: return 0; \
+            } \
+    } \
+
+#define _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
+    ret_val.s0 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s0); \
+    ret_val.s1 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s1);
+
+#define _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
+    _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
+    ret_val.s2 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s2); \
+    ret_val.s3 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s3);
+
+#define _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
+    _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
+    ret_val.s4 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s4); \
+    ret_val.s5 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s5); \
+    ret_val.s6 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s6); \
+    ret_val.s7 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s7);
+
+#define _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
+    _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
+    ret_val.s8 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s8); \
+    ret_val.s9 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s9); \
+    ret_val.sA = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sA); \
+    ret_val.sB = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sB); \
+    ret_val.sC = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sC); \
+    ret_val.sD = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sD); \
+    ret_val.sE = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sE); \
+    ret_val.sF = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sF); \
+
+#define _CLC_SHUFFLE_DEFINE2(ARGTYPE, ARGSIZE, MASKTYPE) \
+_CLC_DEF _CLC_OVERLOAD ARGTYPE##2 shuffle2(ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, MASKTYPE##2 mask){ \
+    ARGTYPE##2 ret_val; \
+    mask &= (MASKTYPE##2)(ARGSIZE * 2 - 1); \
+    _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
+    return ret_val; \
+}
+
+#define _CLC_SHUFFLE_DEFINE4(ARGTYPE, ARGSIZE, MASKTYPE) \
+_CLC_DEF _CLC_OVERLOAD ARGTYPE##4 shuffle2(ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, MASKTYPE##4 mask){ \
+    ARGTYPE##4 ret_val; \
+    mask &= (MASKTYPE##4)(ARGSIZE * 2 - 1); \
+    _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
+    return ret_val; \
+}
+
+#define _CLC_SHUFFLE_DEFINE8(ARGTYPE, ARGSIZE, MASKTYPE) \
+_CLC_DEF _CLC_OVERLOAD ARGTYPE##8 shuffle2(ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, MASKTYPE##8 mask){ \
+    ARGTYPE##8 ret_val; \
+    mask &= (MASKTYPE##8)(ARGSIZE * 2 - 1); \
+    _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
+    return ret_val; \
+}
+
+#define _CLC_SHUFFLE_DEFINE16(ARGTYPE, ARGSIZE, MASKTYPE) \
+_CLC_DEF _CLC_OVERLOAD ARGTYPE##16 shuffle2(ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, MASKTYPE##16 mask){ \
+    ARGTYPE##16 ret_val; \
+    mask &= (MASKTYPE##16)(ARGSIZE * 2 - 1); \
+    _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
+    return ret_val; \
+}
+
+#define _CLC_VECTOR_SHUFFLE_MASKSIZE(INTYPE, ARGSIZE, MASKTYPE) \
+  _CLC_GET_ELEMENT_DEFINE(INTYPE, ARGSIZE, MASKTYPE) \
+  _CLC_SHUFFLE_DEFINE2(INTYPE, ARGSIZE, MASKTYPE) \
+  _CLC_SHUFFLE_DEFINE4(INTYPE, ARGSIZE, MASKTYPE) \
+  _CLC_SHUFFLE_DEFINE8(INTYPE, ARGSIZE, MASKTYPE) \
+  _CLC_SHUFFLE_DEFINE16(INTYPE, ARGSIZE, MASKTYPE) \
+
+#define _CLC_VECTOR_SHUFFLE_INSIZE(TYPE, MASKTYPE) \
+  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 2, MASKTYPE) \
+  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 4, MASKTYPE) \
+  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 8, MASKTYPE) \
+  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 16, MASKTYPE) \
+
+
+
+_CLC_VECTOR_SHUFFLE_INSIZE(char, uchar)
+_CLC_VECTOR_SHUFFLE_INSIZE(short, ushort)
+_CLC_VECTOR_SHUFFLE_INSIZE(int, uint)
+_CLC_VECTOR_SHUFFLE_INSIZE(long, ulong)
+_CLC_VECTOR_SHUFFLE_INSIZE(uchar, uchar)
+_CLC_VECTOR_SHUFFLE_INSIZE(ushort, ushort)
+_CLC_VECTOR_SHUFFLE_INSIZE(uint, uint)
+_CLC_VECTOR_SHUFFLE_INSIZE(ulong, ulong)
+_CLC_VECTOR_SHUFFLE_INSIZE(float, uint)
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+_CLC_VECTOR_SHUFFLE_INSIZE(double, ulong)
+#endif
+
+#undef _CLC_ELEMENT_CASES2
+#undef _CLC_ELEMENT_CASES4
+#undef _CLC_ELEMENT_CASES8
+#undef _CLC_ELEMENT_CASES16
+#undef _CLC_GET_ELEMENT_DEFINE
+#undef _CLC_SHUFFLE_SET_2_ELEMENTS
+#undef _CLC_SHUFFLE_SET_4_ELEMENTS
+#undef _CLC_SHUFFLE_SET_8_ELEMENTS
+#undef _CLC_SHUFFLE_SET_16_ELEMENTS
+#undef _CLC_SHUFFLE_DEFINE2
+#undef _CLC_SHUFFLE_DEFINE4
+#undef _CLC_SHUFFLE_DEFINE8
+#undef _CLC_SHUFFLE_DEFINE16
+#undef _CLC_VECTOR_SHUFFLE_MASKSIZE
+#undef _CLC_VECTOR_SHUFFLE_INSIZE
-- 
2.11.0



More information about the Libclc-dev mailing list