[Libclc-dev] [PATCH] relational: Implement shuffle builtin

Jan Vesely via Libclc-dev libclc-dev at lists.llvm.org
Thu Aug 31 08:59:45 PDT 2017


On Sun, 2017-06-11 at 22:04 -0500, Aaron Watry via Libclc-dev wrote:
> This was added in CL 1.1
> 
> Tested with a Radeon HD 7850 (Pitcairn) using the CL CTS via:
> test_conformance/relationals/test_relationals shuffle_built_in

sorry it took so long. I think there are still parts missing but we
might be able to get away with it if clang can handle mask type
conversion implicitly.
it also needs to ignore the high bits of mask elements.
see inline comments.

I haven't tested it yet. I'll try to do that and provide shuffle2
piglits asap.

> 
> Signed-off-by: Aaron Watry <awatry at gmail.com>
> ---
>  generic/include/clc/clc.h                |   1 +
>  generic/include/clc/relational/shuffle.h |  44 +++++++++
>  generic/lib/SOURCES                      |   1 +
>  generic/lib/relational/shuffle.cl        | 153 +++++++++++++++++++++++++++++++
>  4 files changed, 199 insertions(+)
>  create mode 100644 generic/include/clc/relational/shuffle.h
>  create mode 100644 generic/lib/relational/shuffle.cl
> 
> diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h
> index 4c29214..ac1dab5 100644
> --- a/generic/include/clc/clc.h
> +++ b/generic/include/clc/clc.h
> @@ -173,6 +173,7 @@
>  #include <clc/relational/isordered.h>
>  #include <clc/relational/isunordered.h>
>  #include <clc/relational/select.h>
> +#include <clc/relational/shuffle.h>

Not sure why CTS puts these in relational category. specs have a misc
chapter for them, so it'd be nice to add new dir in clc.

>  #include <clc/relational/signbit.h>
>  
>  /* 6.11.8 Synchronization Functions */
> diff --git a/generic/include/clc/relational/shuffle.h b/generic/include/clc/relational/shuffle.h
> new file mode 100644
> index 0000000..e10ac5e
> --- /dev/null
> +++ b/generic/include/clc/relational/shuffle.h
> @@ -0,0 +1,44 @@
> +//===-- generic/include/clc/relational/shuffle.h ------------------------------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is dual licensed under both the University of Illinois Open Source
> +// License and the MIT license. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#define _CLC_SHUFFLE_DECL(TYPE, MASKTYPE, RETTYPE) \
> +  _CLC_OVERLOAD _CLC_DECL RETTYPE shuffle(TYPE x, MASKTYPE mask);
> +
> +//Return type is same base type as the input type, with the same vector size as the mask.
> +//Elements in the mask must be the same size (number of bits) as the input value.
> +//E.g. char8 ret = shuffle(char2 x, uchar8 mask);
> +
> +#define _CLC_VECTOR_SHUFFLE_MASKSIZE(INBASE, INTYPE, MASKTYPE) \
> +  _CLC_SHUFFLE_DECL(INTYPE, MASKTYPE##2, INBASE##2) \
> +  _CLC_SHUFFLE_DECL(INTYPE, MASKTYPE##4, INBASE##4) \
> +  _CLC_SHUFFLE_DECL(INTYPE, MASKTYPE##8, INBASE##8) \
> +  _CLC_SHUFFLE_DECL(INTYPE, MASKTYPE##16, INBASE##16) \
> +
> +#define _CLC_VECTOR_SHUFFLE_INSIZE(TYPE, MASKTYPE) \
> +  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, TYPE##2, MASKTYPE) \
> +  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, TYPE##4, MASKTYPE) \
> +  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, TYPE##8, MASKTYPE) \
> +  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, TYPE##16, MASKTYPE) \
> +
> +_CLC_VECTOR_SHUFFLE_INSIZE(char, uchar)
> +_CLC_VECTOR_SHUFFLE_INSIZE(short, ushort)
> +_CLC_VECTOR_SHUFFLE_INSIZE(int, uint)
> +_CLC_VECTOR_SHUFFLE_INSIZE(long, ulong)
> +_CLC_VECTOR_SHUFFLE_INSIZE(uchar, uchar)
> +_CLC_VECTOR_SHUFFLE_INSIZE(ushort, ushort)
> +_CLC_VECTOR_SHUFFLE_INSIZE(uint, uint)
> +_CLC_VECTOR_SHUFFLE_INSIZE(ulong, ulong)
> +_CLC_VECTOR_SHUFFLE_INSIZE(float, uint)
> +#ifdef cl_khr_fp64
> +_CLC_VECTOR_SHUFFLE_INSIZE(double, ulong)
> +#endif
> +
> +#undef _CLC_SHUFFLE_DECL
> +#undef _CLC_VECTOR_SHUFFLE_MASKSIZE
> +#undef _CLC_VECTOR_SHUFFLE_INSIZE
> diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
> index 9e0157b..fe0df5a 100644
> --- a/generic/lib/SOURCES
> +++ b/generic/lib/SOURCES
> @@ -139,6 +139,7 @@ relational/isnormal.cl
>  relational/isnotequal.cl
>  relational/isordered.cl
>  relational/isunordered.cl
> +relational/shuffle.cl
>  relational/signbit.cl
>  shared/clamp.cl
>  shared/max.cl
> diff --git a/generic/lib/relational/shuffle.cl b/generic/lib/relational/shuffle.cl
> new file mode 100644
> index 0000000..7d96f86
> --- /dev/null
> +++ b/generic/lib/relational/shuffle.cl
> @@ -0,0 +1,153 @@
> +//===-- generic/lib/relational/shuffle.cl ------------------------------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is dual licensed under both the University of Illinois Open Source
> +// License and the MIT license. See LICENSE.TXT for details.
> +//
> +//===----------------------------------------------------------------------===//
> +
> +#include <clc/clc.h>
> +
> +#define _CLC_ELEMENT_CASES2(VAR) \
> +    case 0: return VAR.s0; \
> +    case 1: return VAR.s1;
> +
> +#define _CLC_ELEMENT_CASES4(VAR) \
> +    _CLC_ELEMENT_CASES2(VAR) \
> +    case 2: return VAR.s2; \
> +    case 3: return VAR.s3;
> +
> +#define _CLC_ELEMENT_CASES8(VAR) \
> +    _CLC_ELEMENT_CASES4(VAR) \
> +    case 4: return VAR.s4; \
> +    case 5: return VAR.s5; \
> +    case 6: return VAR.s6; \
> +    case 7: return VAR.s7;
> +
> +#define _CLC_ELEMENT_CASES16(VAR) \
> +    _CLC_ELEMENT_CASES8(VAR) \
> +    case 8: return VAR.s8; \
> +    case 9: return VAR.s9; \
> +    case 10: return VAR.sA; \
> +    case 11: return VAR.sB; \
> +    case 12: return VAR.sC; \
> +    case 13: return VAR.sD; \
> +    case 14: return VAR.sE; \
> +    case 15: return VAR.sF;
> +
> +#define _CLC_GET_ELEMENT_DEFINE(ARGTYPE, ARGSIZE, IDXTYPE) \
> +    inline ARGTYPE __clc_get_el_##ARGTYPE##ARGSIZE##_##IDXTYPE(ARGTYPE##ARGSIZE x, IDXTYPE idx) {\
> +        switch (idx){ \

I think you need "idx % #ARGSIZE" here. Specs explitcilty mention that
higher bits are ignored and the newly posted piglit tests check this.

> +            _CLC_ELEMENT_CASES##ARGSIZE(x) \
> +            default: return 0; \
> +        } \
> +    } \
> +
> +#define _CLC_SHUFFLE_SET_ONE_ELEMENT(ARGTYPE, ARGSIZE, INDEX, MASKTYPE) \
> +    ret_val.s##INDEX = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s##INDEX); \
> +
> +#define _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
> +    ret_val.s0 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s0); \
> +    ret_val.s1 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s1);
> +
> +#define _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
> +    _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
> +    ret_val.s2 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s2); \
> +    ret_val.s3 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s3);
> +
> +#define _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
> +    _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
> +    ret_val.s4 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s4); \
> +    ret_val.s5 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s5); \
> +    ret_val.s6 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s6); \
> +    ret_val.s7 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s7);
> +
> +#define _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
> +    _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
> +    ret_val.s8 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s8); \
> +    ret_val.s9 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s9); \
> +    ret_val.sA = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sA); \
> +    ret_val.sB = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sB); \
> +    ret_val.sC = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sC); \
> +    ret_val.sD = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sD); \
> +    ret_val.sE = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sE); \
> +    ret_val.sF = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sF); \
> +
> +#define _CLC_SHUFFLE_DEFINE2(ARGTYPE, ARGSIZE, MASKTYPE) \
> +_CLC_DEF _CLC_OVERLOAD ARGTYPE##2 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##2 mask){ \
> +    ARGTYPE##2 ret_val; \
> +    mask &= (MASKTYPE##2)(ARGSIZE-1); \
> +    _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
> +    return ret_val; \
> +}
> +
> +#define _CLC_SHUFFLE_DEFINE4(ARGTYPE, ARGSIZE, MASKTYPE) \
> +_CLC_DEF _CLC_OVERLOAD ARGTYPE##4 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##4 mask){ \
> +    ARGTYPE##4 ret_val; \
> +    mask &= (MASKTYPE##4)(ARGSIZE-1); \
> +    _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
> +    return ret_val; \
> +}
> +
> +#define _CLC_SHUFFLE_DEFINE8(ARGTYPE, ARGSIZE, MASKTYPE) \
> +_CLC_DEF _CLC_OVERLOAD ARGTYPE##8 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##8 mask){ \
> +    ARGTYPE##8 ret_val; \
> +    mask &= (MASKTYPE##8)(ARGSIZE-1); \
> +    _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
> +    return ret_val; \
> +}
> +
> +#define _CLC_SHUFFLE_DEFINE16(ARGTYPE, ARGSIZE, MASKTYPE) \
> +_CLC_DEF _CLC_OVERLOAD ARGTYPE##16 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##16 mask){ \
> +    ARGTYPE##16 ret_val; \
> +    mask &= (MASKTYPE##16)(ARGSIZE-1); \
> +    _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
> +    return ret_val; \
> +}
> +
> +#define _CLC_VECTOR_SHUFFLE_MASKSIZE(INTYPE, ARGSIZE, MASKTYPE) \
> +  _CLC_GET_ELEMENT_DEFINE(INTYPE, ARGSIZE, MASKTYPE) \
> +  _CLC_SHUFFLE_DEFINE2(INTYPE, ARGSIZE, MASKTYPE) \
> +  _CLC_SHUFFLE_DEFINE4(INTYPE, ARGSIZE, MASKTYPE) \
> +  _CLC_SHUFFLE_DEFINE8(INTYPE, ARGSIZE, MASKTYPE) \
> +  _CLC_SHUFFLE_DEFINE16(INTYPE, ARGSIZE, MASKTYPE) \
> +
> +#define _CLC_VECTOR_SHUFFLE_INSIZE(TYPE, MASKTYPE) \
> +  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 2, MASKTYPE) \
> +  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 4, MASKTYPE) \
> +  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 8, MASKTYPE) \
> +  _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 16, MASKTYPE) \
> +
> +
> +
> +_CLC_VECTOR_SHUFFLE_INSIZE(char, uchar)
> +_CLC_VECTOR_SHUFFLE_INSIZE(short, ushort)
> +_CLC_VECTOR_SHUFFLE_INSIZE(int, uint)
> +_CLC_VECTOR_SHUFFLE_INSIZE(long, ulong)
> +_CLC_VECTOR_SHUFFLE_INSIZE(uchar, uchar)
> +_CLC_VECTOR_SHUFFLE_INSIZE(ushort, ushort)
> +_CLC_VECTOR_SHUFFLE_INSIZE(uint, uint)
> +_CLC_VECTOR_SHUFFLE_INSIZE(ulong, ulong)
> +_CLC_VECTOR_SHUFFLE_INSIZE(float, uint)

Mask type can be any vector of unsigned type, so I think you need all
combinations.

> +#ifdef cl_khr_fp64
> +#pragma OPENCL EXTENSION cl_khr_fp64 : enable
> +_CLC_VECTOR_SHUFFLE_INSIZE(double, ulong)

I think this needs other mask types.

> +#endif

add half/cl_khr_fp16 here.

thanks,
Jan

> +
> +#undef _CLC_ELEMENT_CASES2
> +#undef _CLC_ELEMENT_CASES4
> +#undef _CLC_ELEMENT_CASES8
> +#undef _CLC_ELEMENT_CASES16
> +#undef _CLC_GET_ELEMENT_DEFINE
> +#undef _CLC_SHUFFLE_SET_ONE_ELEMENT
> +#undef _CLC_SHUFFLE_SET_2_ELEMENTS
> +#undef _CLC_SHUFFLE_SET_4_ELEMENTS
> +#undef _CLC_SHUFFLE_SET_8_ELEMENTS
> +#undef _CLC_SHUFFLE_SET_16_ELEMENTS
> +#undef _CLC_SHUFFLE_DEFINE2
> +#undef _CLC_SHUFFLE_DEFINE4
> +#undef _CLC_SHUFFLE_DEFINE8
> +#undef _CLC_SHUFFLE_DEFINE16
> +#undef _CLC_VECTOR_SHUFFLE_MASKSIZE
> +#undef _CLC_VECTOR_SHUFFLE_INSIZE
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part
URL: <http://lists.llvm.org/pipermail/libclc-dev/attachments/20170831/b4acb087/attachment.sig>


More information about the Libclc-dev mailing list