[Libclc-dev] [PATCH] relational: Implement shuffle2 builtin

Jan Vesely via Libclc-dev libclc-dev at lists.llvm.org
Thu Aug 31 15:16:18 PDT 2017


On Sun, 2017-06-11 at 22:30 -0500, Aaron Watry via Libclc-dev wrote:
> This was added in CL 1.1
> 
> Tested with a Radeon HD 7850 (Pitcairn) using the CL CTS via:
> test_conformance/relationals/test_relationals shuffle_built_in_dual_input
> 
> Signed-off-by: Aaron Watry <awatry at gmail.com>

similar to the other one. If you add half/cl_khr_fp16 version and move
this to clc/misc:
Reviewed-by: Jan Vesely <jan.vesely at rutgers.edu>

this one works on Turks as well.

Jan

> ---
>  generic/include/clc/clc.h                 |   1 +
>  generic/include/clc/relational/shuffle2.h |  44 +++++++++
>  generic/lib/SOURCES                       |   1 +
>  generic/lib/relational/shuffle2.cl        | 156 ++++++++++++++++++++++++++++++
>  4 files changed, 202 insertions(+)
>  create mode 100644 generic/include/clc/relational/shuffle2.h
>  create mode 100644 generic/lib/relational/shuffle2.cl
> 
> diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h
> index ac1dab5..8ec1c3d 100644
> --- a/generic/include/clc/clc.h
> +++ b/generic/include/clc/clc.h
> @@ -174,6 +174,7 @@
>  #include <clc/relational/isunordered.h>
>  #include <clc/relational/select.h>
>  #include <clc/relational/shuffle.h>
> +#include <clc/relational/shuffle2.h>
>  #include <clc/relational/signbit.h>
>  
>  /* 6.11.8 Synchronization Functions */
> diff --git a/generic/include/clc/relational/shuffle2.h b/generic/include/clc/relational/shuffle2.h
> new file mode 100644
> index 0000000..7fb9fd6
> --- /dev/null
> +++ b/generic/include/clc/relational/shuffle2.h
> @@ -0,0 +1,44 @@
> +//===-- generic/include/clc/relational/shuffle2.h ------------------------------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is dual licensed under both the University of Illinois Open Source
> +// License and the MIT license. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#define _CLC_SHUFFLE2_DECL(TYPE, MASKTYPE, RETTYPE) \
> +  _CLC_OVERLOAD _CLC_DECL RETTYPE shuffle2(TYPE x, TYPE y, MASKTYPE mask);
> +
> +//Return type is same base type as the input type, with the same vector size as the mask.
> +//Elements in the mask must be the same size (number of bits) as the input value.
> +//E.g. char8 ret = shuffle2(char2 x, char2 y, uchar8 mask);
> +
> +#define _CLC_VECTOR_SHUFFLE2_MASKSIZE(INBASE, INTYPE, MASKTYPE) \
> +  _CLC_SHUFFLE2_DECL(INTYPE, MASKTYPE##2, INBASE##2) \
> +  _CLC_SHUFFLE2_DECL(INTYPE, MASKTYPE##4, INBASE##4) \
> +  _CLC_SHUFFLE2_DECL(INTYPE, MASKTYPE##8, INBASE##8) \
> +  _CLC_SHUFFLE2_DECL(INTYPE, MASKTYPE##16, INBASE##16) \
> +
> +#define _CLC_VECTOR_SHUFFLE2_INSIZE(TYPE, MASKTYPE) \
> +  _CLC_VECTOR_SHUFFLE2_MASKSIZE(TYPE, TYPE##2, MASKTYPE) \
> +  _CLC_VECTOR_SHUFFLE2_MASKSIZE(TYPE, TYPE##4, MASKTYPE) \
> +  _CLC_VECTOR_SHUFFLE2_MASKSIZE(TYPE, TYPE##8, MASKTYPE) \
> +  _CLC_VECTOR_SHUFFLE2_MASKSIZE(TYPE, TYPE##16, MASKTYPE) \
> +
> +_CLC_VECTOR_SHUFFLE2_INSIZE(char, uchar)
> +_CLC_VECTOR_SHUFFLE2_INSIZE(short, ushort)
> +_CLC_VECTOR_SHUFFLE2_INSIZE(int, uint)
> +_CLC_VECTOR_SHUFFLE2_INSIZE(long, ulong)
> +_CLC_VECTOR_SHUFFLE2_INSIZE(uchar, uchar)
> +_CLC_VECTOR_SHUFFLE2_INSIZE(ushort, ushort)
> +_CLC_VECTOR_SHUFFLE2_INSIZE(uint, uint)
> +_CLC_VECTOR_SHUFFLE2_INSIZE(ulong, ulong)
> +_CLC_VECTOR_SHUFFLE2_INSIZE(float, uint)
> +#ifdef cl_khr_fp64
> +_CLC_VECTOR_SHUFFLE2_INSIZE(double, ulong)
> +#endif
> +
> +#undef _CLC_SHUFFLE_DECL
> +#undef _CLC_VECTOR_SHUFFLE2_MASKSIZE
> +#undef _CLC_VECTOR_SHUFFLE2_INSIZE
> diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
> index fe0df5a..c5c0624 100644
> --- a/generic/lib/SOURCES
> +++ b/generic/lib/SOURCES
> @@ -140,6 +140,7 @@ relational/isnotequal.cl
>  relational/isordered.cl
>  relational/isunordered.cl
>  relational/shuffle.cl
> +relational/shuffle2.cl
>  relational/signbit.cl
>  shared/clamp.cl
>  shared/max.cl
> diff --git a/generic/lib/relational/shuffle2.cl b/generic/lib/relational/shuffle2.cl
> new file mode 100644
> index 0000000..42bfc86
> --- /dev/null
> +++ b/generic/lib/relational/shuffle2.cl
> @@ -0,0 +1,156 @@
> +//===-- generic/lib/relational/shuffle2.cl ------------------------------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is dual licensed under both the University of Illinois Open Source
> +// License and the MIT license. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include <clc/clc.h>
> +
> +#define _CLC_ELEMENT_CASES2(VAR) \
> +    case 0: return VAR.s0; \
> +    case 1: return VAR.s1;
> +
> +#define _CLC_ELEMENT_CASES4(VAR) \
> +    _CLC_ELEMENT_CASES2(VAR) \
> +    case 2: return VAR.s2; \
> +    case 3: return VAR.s3;
> +
> +#define _CLC_ELEMENT_CASES8(VAR) \
> +    _CLC_ELEMENT_CASES4(VAR) \
> +    case 4: return VAR.s4; \
> +    case 5: return VAR.s5; \
> +    case 6: return VAR.s6; \
> +    case 7: return VAR.s7;
> +
> +#define _CLC_ELEMENT_CASES16(VAR) \
> +    _CLC_ELEMENT_CASES8(VAR) \
> +    case 8: return VAR.s8; \
> +    case 9: return VAR.s9; \
> +    case 10: return VAR.sA; \
> +    case 11: return VAR.sB; \
> +    case 12: return VAR.sC; \
> +    case 13: return VAR.sD; \
> +    case 14: return VAR.sE; \
> +    case 15: return VAR.sF;
> +
> +#define _CLC_GET_ELEMENT_DEFINE(ARGTYPE, ARGSIZE, IDXTYPE) \
> +     __attribute__((always_inline)) \
> +     ARGTYPE __clc_get_el_##ARGTYPE##ARGSIZE##_##IDXTYPE(ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, IDXTYPE idx) {\
> +        if (idx < ARGSIZE) \
> +            switch (idx){ \
> +                _CLC_ELEMENT_CASES##ARGSIZE(x) \
> +                default: return 0; \
> +            } \
> +        else \
> +            switch (idx - ARGSIZE){ \
> +                _CLC_ELEMENT_CASES##ARGSIZE(y) \
> +                default: return 0; \
> +            } \
> +    } \
> +
> +#define _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
> +    ret_val.s0 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s0); \
> +    ret_val.s1 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s1);
> +
> +#define _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
> +    _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
> +    ret_val.s2 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s2); \
> +    ret_val.s3 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s3);
> +
> +#define _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
> +    _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
> +    ret_val.s4 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s4); \
> +    ret_val.s5 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s5); \
> +    ret_val.s6 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s6); \
> +    ret_val.s7 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s7);
> +
> +#define _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
> +    _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
> +    ret_val.s8 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s8); \
> +    ret_val.s9 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.s9); \
> +    ret_val.sA = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sA); \
> +    ret_val.sB = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sB); \
> +    ret_val.sC = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sC); \
> +    ret_val.sD = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sD); \
> +    ret_val.sE = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sE); \
> +    ret_val.sF = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, y, mask.sF); \
> +
> +#define _CLC_SHUFFLE_DEFINE2(ARGTYPE, ARGSIZE, MASKTYPE) \
> +_CLC_DEF _CLC_OVERLOAD ARGTYPE##2 shuffle2(ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, MASKTYPE##2 mask){ \
> +    ARGTYPE##2 ret_val; \
> +    mask &= (MASKTYPE##2)(ARGSIZE * 2 - 1); \
> +    _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
> +    return ret_val; \
> +}
> +
> +#define _CLC_SHUFFLE_DEFINE4(ARGTYPE, ARGSIZE, MASKTYPE) \
> +_CLC_DEF _CLC_OVERLOAD ARGTYPE##4 shuffle2(ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, MASKTYPE##4 mask){ \
> +    ARGTYPE##4 ret_val; \
> +    mask &= (MASKTYPE##4)(ARGSIZE * 2 - 1); \
> +    _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
> +    return ret_val; \
> +}
> +
> +#define _CLC_SHUFFLE_DEFINE8(ARGTYPE, ARGSIZE, MASKTYPE) \
> +_CLC_DEF _CLC_OVERLOAD ARGTYPE##8 shuffle2(ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, MASKTYPE##8 mask){ \
> +    ARGTYPE##8 ret_val; \
> +    mask &= (MASKTYPE##8)(ARGSIZE * 2 - 1); \
> +    _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
> +    return ret_val; \
> +}
> +
> +#define _CLC_SHUFFLE_DEFINE16(ARGTYPE, ARGSIZE, MASKTYPE) \
> +_CLC_DEF _CLC_OVERLOAD ARGTYPE##16 shuffle2(ARGTYPE##ARGSIZE x, ARGTYPE##ARGSIZE y, MASKTYPE##16 mask){ \
> +    ARGTYPE##16 ret_val; \
> +    mask &= (MASKTYPE##16)(ARGSIZE * 2 - 1); \
> +    _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
> +    return ret_val; \
> +}
> +
> +#define _CLC_VECTOR_SHUFFLE_MASKSIZE(INTYPE, ARGSIZE, MASKTYPE) \
> +  _CLC_GET_ELEMENT_DEFINE(INTYPE, ARGSIZE, MASKTYPE) \
> +  _CLC_SHUFFLE_DEFINE2(INTYPE, ARGSIZE, MASKTYPE) \
> +  _CLC_SHUFFLE_DEFINE4(INTYPE, ARGSIZE, MASKTYPE) \
> +  _CLC_SHUFFLE_DEFINE8(INTYPE, ARGSIZE, MASKTYPE) \
> +  _CLC_SHUFFLE_DEFINE16(INTYPE, ARGSIZE, MASKTYPE) \
> +
> +#define _CLC_VECTOR_SHUFFLE_INSIZE(TYPE, MASKTYPE) \
> +  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 2, MASKTYPE) \
> +  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 4, MASKTYPE) \
> +  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 8, MASKTYPE) \
> +  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 16, MASKTYPE) \
> +
> +
> +
> +_CLC_VECTOR_SHUFFLE_INSIZE(char, uchar)
> +_CLC_VECTOR_SHUFFLE_INSIZE(short, ushort)
> +_CLC_VECTOR_SHUFFLE_INSIZE(int, uint)
> +_CLC_VECTOR_SHUFFLE_INSIZE(long, ulong)
> +_CLC_VECTOR_SHUFFLE_INSIZE(uchar, uchar)
> +_CLC_VECTOR_SHUFFLE_INSIZE(ushort, ushort)
> +_CLC_VECTOR_SHUFFLE_INSIZE(uint, uint)
> +_CLC_VECTOR_SHUFFLE_INSIZE(ulong, ulong)
> +_CLC_VECTOR_SHUFFLE_INSIZE(float, uint)
> +#ifdef cl_khr_fp64
> +#pragma OPENCL EXTENSION cl_khr_fp64 : enable
> +_CLC_VECTOR_SHUFFLE_INSIZE(double, ulong)
> +#endif
> +
> +#undef _CLC_ELEMENT_CASES2
> +#undef _CLC_ELEMENT_CASES4
> +#undef _CLC_ELEMENT_CASES8
> +#undef _CLC_ELEMENT_CASES16
> +#undef _CLC_GET_ELEMENT_DEFINE
> +#undef _CLC_SHUFFLE_SET_2_ELEMENTS
> +#undef _CLC_SHUFFLE_SET_4_ELEMENTS
> +#undef _CLC_SHUFFLE_SET_8_ELEMENTS
> +#undef _CLC_SHUFFLE_SET_16_ELEMENTS
> +#undef _CLC_SHUFFLE_DEFINE2
> +#undef _CLC_SHUFFLE_DEFINE4
> +#undef _CLC_SHUFFLE_DEFINE8
> +#undef _CLC_SHUFFLE_DEFINE16
> +#undef _CLC_VECTOR_SHUFFLE_MASKSIZE
> +#undef _CLC_VECTOR_SHUFFLE_INSIZE
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part
URL: <http://lists.llvm.org/pipermail/libclc-dev/attachments/20170831/59a09a58/attachment-0001.sig>


More information about the Libclc-dev mailing list