[Libclc-dev] [PATCH] relational: Implement shuffle builtin
Aaron Watry via Libclc-dev
libclc-dev at lists.llvm.org
Fri Sep 1 12:21:32 PDT 2017
On Thu, Aug 31, 2017 at 5:14 PM, Jan Vesely <jan.vesely at rutgers.edu> wrote:
> On Thu, 2017-08-31 at 11:59 -0400, Jan Vesely wrote:
>> On Sun, 2017-06-11 at 22:04 -0500, Aaron Watry via Libclc-dev wrote:
>> > This was added in CL 1.1
>> >
>> > Tested with a Radeon HD 7850 (Pitcairn) using the CL CTS via:
>> > test_conformance/relationals/test_relationals shuffle_built_in
>>
>> sorry it took so long. I think there are still parts missing but we
>> might be able to get away with it if clang can handle mask type
>> conversion implicitly.
>> it also needs to ignore the high bits of mask elements.
>> see inline comments.
>
> looks like I was wrong on both accounts.
Yeah, I did this with an 'mask &= (MASKTYPE##N)(ARGSIZE-1)' as the
first piece of the function implementation, instead of in the switch
statement. I did test this using the CTS, which handled float/double,
but doesn't have fp16 tests. I'll see what I need to do to get this
working on my SI using your piglit tests (assuming that I can).
I'll also go ahead and move this to a new misc/ directory as
suggested. If I can manage to test the fp16 support myself, do you
want to see the new version,or would the review stand, assuming that
all I have to add is:
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
_CLC_VECTOR_SHUFFLE_INSIZE(half, ushort)
#endif
--Aaron
> If you add half version and move this to clc/misc:
> Reviewed-by: Jan Vesely <jan.vesely at rutgers.edu>
>
>>
>> I haven't tested it yet. I'll try to do that and provide shuffle2
>> piglits asap.
>
> works at least on carrizo/iceland with llvm 5.0
>
> Jan
>
>>
>> >
>> > Signed-off-by: Aaron Watry <awatry at gmail.com>
>> > ---
>> > generic/include/clc/clc.h | 1 +
>> > generic/include/clc/relational/shuffle.h | 44 +++++++++
>> > generic/lib/SOURCES | 1 +
>> > generic/lib/relational/shuffle.cl | 153 +++++++++++++++++++++++++++++++
>> > 4 files changed, 199 insertions(+)
>> > create mode 100644 generic/include/clc/relational/shuffle.h
>> > create mode 100644 generic/lib/relational/shuffle.cl
>> >
>> > diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h
>> > index 4c29214..ac1dab5 100644
>> > --- a/generic/include/clc/clc.h
>> > +++ b/generic/include/clc/clc.h
>> > @@ -173,6 +173,7 @@
>> > #include <clc/relational/isordered.h>
>> > #include <clc/relational/isunordered.h>
>> > #include <clc/relational/select.h>
>> > +#include <clc/relational/shuffle.h>
>>
>> Not sure why CTS puts these in relational category. specs have a misc
>> chapter for them, so it'd be nice to add new dir in clc.
>>
>> > #include <clc/relational/signbit.h>
>> >
>> > /* 6.11.8 Synchronization Functions */
>> > diff --git a/generic/include/clc/relational/shuffle.h b/generic/include/clc/relational/shuffle.h
>> > new file mode 100644
>> > index 0000000..e10ac5e
>> > --- /dev/null
>> > +++ b/generic/include/clc/relational/shuffle.h
>> > @@ -0,0 +1,44 @@
>> > +//===-- generic/include/clc/relational/shuffle.h ------------------------------===//
>> > +//
>> > +// The LLVM Compiler Infrastructure
>> > +//
>> > +// This file is dual licensed under both the University of Illinois Open Source
>> > +// License and the MIT license. See LICENSE.TXT for details.
>> > +//
>> > +//===----------------------------------------------------------------------===//
>> > +
>> > +#define _CLC_SHUFFLE_DECL(TYPE, MASKTYPE, RETTYPE) \
>> > + _CLC_OVERLOAD _CLC_DECL RETTYPE shuffle(TYPE x, MASKTYPE mask);
>> > +
>> > +//Return type is same base type as the input type, with the same vector size as the mask.
>> > +//Elements in the mask must be the same size (number of bits) as the input value.
>> > +//E.g. char8 ret = shuffle(char2 x, uchar8 mask);
>> > +
>> > +#define _CLC_VECTOR_SHUFFLE_MASKSIZE(INBASE, INTYPE, MASKTYPE) \
>> > + _CLC_SHUFFLE_DECL(INTYPE, MASKTYPE##2, INBASE##2) \
>> > + _CLC_SHUFFLE_DECL(INTYPE, MASKTYPE##4, INBASE##4) \
>> > + _CLC_SHUFFLE_DECL(INTYPE, MASKTYPE##8, INBASE##8) \
>> > + _CLC_SHUFFLE_DECL(INTYPE, MASKTYPE##16, INBASE##16) \
>> > +
>> > +#define _CLC_VECTOR_SHUFFLE_INSIZE(TYPE, MASKTYPE) \
>> > + _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, TYPE##2, MASKTYPE) \
>> > + _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, TYPE##4, MASKTYPE) \
>> > + _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, TYPE##8, MASKTYPE) \
>> > + _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, TYPE##16, MASKTYPE) \
>> > +
>> > +_CLC_VECTOR_SHUFFLE_INSIZE(char, uchar)
>> > +_CLC_VECTOR_SHUFFLE_INSIZE(short, ushort)
>> > +_CLC_VECTOR_SHUFFLE_INSIZE(int, uint)
>> > +_CLC_VECTOR_SHUFFLE_INSIZE(long, ulong)
>> > +_CLC_VECTOR_SHUFFLE_INSIZE(uchar, uchar)
>> > +_CLC_VECTOR_SHUFFLE_INSIZE(ushort, ushort)
>> > +_CLC_VECTOR_SHUFFLE_INSIZE(uint, uint)
>> > +_CLC_VECTOR_SHUFFLE_INSIZE(ulong, ulong)
>> > +_CLC_VECTOR_SHUFFLE_INSIZE(float, uint)
>> > +#ifdef cl_khr_fp64
>> > +_CLC_VECTOR_SHUFFLE_INSIZE(double, ulong)
>> > +#endif
>> > +
>> > +#undef _CLC_SHUFFLE_DECL
>> > +#undef _CLC_VECTOR_SHUFFLE_MASKSIZE
>> > +#undef _CLC_VECTOR_SHUFFLE_INSIZE
>> > diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
>> > index 9e0157b..fe0df5a 100644
>> > --- a/generic/lib/SOURCES
>> > +++ b/generic/lib/SOURCES
>> > @@ -139,6 +139,7 @@ relational/isnormal.cl
>> > relational/isnotequal.cl
>> > relational/isordered.cl
>> > relational/isunordered.cl
>> > +relational/shuffle.cl
>> > relational/signbit.cl
>> > shared/clamp.cl
>> > shared/max.cl
>> > diff --git a/generic/lib/relational/shuffle.cl b/generic/lib/relational/shuffle.cl
>> > new file mode 100644
>> > index 0000000..7d96f86
>> > --- /dev/null
>> > +++ b/generic/lib/relational/shuffle.cl
>> > @@ -0,0 +1,153 @@
>> > +//===-- generic/lib/relational/shuffle.cl ------------------------------===//
>> > +//
>> > +// The LLVM Compiler Infrastructure
>> > +//
>> > +// This file is dual licensed under both the University of Illinois Open Source
>> > +// License and the MIT license. See LICENSE.TXT for details.
>> > +//
>> > +//===----------------------------------------------------------------------===//
>> > +
>> > +#include <clc/clc.h>
>> > +
>> > +#define _CLC_ELEMENT_CASES2(VAR) \
>> > + case 0: return VAR.s0; \
>> > + case 1: return VAR.s1;
>> > +
>> > +#define _CLC_ELEMENT_CASES4(VAR) \
>> > + _CLC_ELEMENT_CASES2(VAR) \
>> > + case 2: return VAR.s2; \
>> > + case 3: return VAR.s3;
>> > +
>> > +#define _CLC_ELEMENT_CASES8(VAR) \
>> > + _CLC_ELEMENT_CASES4(VAR) \
>> > + case 4: return VAR.s4; \
>> > + case 5: return VAR.s5; \
>> > + case 6: return VAR.s6; \
>> > + case 7: return VAR.s7;
>> > +
>> > +#define _CLC_ELEMENT_CASES16(VAR) \
>> > + _CLC_ELEMENT_CASES8(VAR) \
>> > + case 8: return VAR.s8; \
>> > + case 9: return VAR.s9; \
>> > + case 10: return VAR.sA; \
>> > + case 11: return VAR.sB; \
>> > + case 12: return VAR.sC; \
>> > + case 13: return VAR.sD; \
>> > + case 14: return VAR.sE; \
>> > + case 15: return VAR.sF;
>> > +
>> > +#define _CLC_GET_ELEMENT_DEFINE(ARGTYPE, ARGSIZE, IDXTYPE) \
>> > + inline ARGTYPE __clc_get_el_##ARGTYPE##ARGSIZE##_##IDXTYPE(ARGTYPE##ARGSIZE x, IDXTYPE idx) {\
>> > + switch (idx){ \
>>
>> I think you need "idx % #ARGSIZE" here. Specs explitcilty mention that
>> higher bits are ignored and the newly posted piglit tests check this.
>>
>> > + _CLC_ELEMENT_CASES##ARGSIZE(x) \
>> > + default: return 0; \
>> > + } \
>> > + } \
>> > +
>> > +#define _CLC_SHUFFLE_SET_ONE_ELEMENT(ARGTYPE, ARGSIZE, INDEX, MASKTYPE) \
>> > + ret_val.s##INDEX = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s##INDEX); \
>> > +
>> > +#define _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
>> > + ret_val.s0 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s0); \
>> > + ret_val.s1 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s1);
>> > +
>> > +#define _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
>> > + _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
>> > + ret_val.s2 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s2); \
>> > + ret_val.s3 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s3);
>> > +
>> > +#define _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
>> > + _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
>> > + ret_val.s4 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s4); \
>> > + ret_val.s5 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s5); \
>> > + ret_val.s6 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s6); \
>> > + ret_val.s7 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s7);
>> > +
>> > +#define _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
>> > + _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
>> > + ret_val.s8 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s8); \
>> > + ret_val.s9 = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.s9); \
>> > + ret_val.sA = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sA); \
>> > + ret_val.sB = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sB); \
>> > + ret_val.sC = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sC); \
>> > + ret_val.sD = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sD); \
>> > + ret_val.sE = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sE); \
>> > + ret_val.sF = __clc_get_el_##ARGTYPE##ARGSIZE##_##MASKTYPE(x, mask.sF); \
>> > +
>> > +#define _CLC_SHUFFLE_DEFINE2(ARGTYPE, ARGSIZE, MASKTYPE) \
>> > +_CLC_DEF _CLC_OVERLOAD ARGTYPE##2 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##2 mask){ \
>> > + ARGTYPE##2 ret_val; \
>> > + mask &= (MASKTYPE##2)(ARGSIZE-1); \
>> > + _CLC_SHUFFLE_SET_2_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
>> > + return ret_val; \
>> > +}
>> > +
>> > +#define _CLC_SHUFFLE_DEFINE4(ARGTYPE, ARGSIZE, MASKTYPE) \
>> > +_CLC_DEF _CLC_OVERLOAD ARGTYPE##4 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##4 mask){ \
>> > + ARGTYPE##4 ret_val; \
>> > + mask &= (MASKTYPE##4)(ARGSIZE-1); \
>> > + _CLC_SHUFFLE_SET_4_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
>> > + return ret_val; \
>> > +}
>> > +
>> > +#define _CLC_SHUFFLE_DEFINE8(ARGTYPE, ARGSIZE, MASKTYPE) \
>> > +_CLC_DEF _CLC_OVERLOAD ARGTYPE##8 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##8 mask){ \
>> > + ARGTYPE##8 ret_val; \
>> > + mask &= (MASKTYPE##8)(ARGSIZE-1); \
>> > + _CLC_SHUFFLE_SET_8_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
>> > + return ret_val; \
>> > +}
>> > +
>> > +#define _CLC_SHUFFLE_DEFINE16(ARGTYPE, ARGSIZE, MASKTYPE) \
>> > +_CLC_DEF _CLC_OVERLOAD ARGTYPE##16 shuffle(ARGTYPE##ARGSIZE x, MASKTYPE##16 mask){ \
>> > + ARGTYPE##16 ret_val; \
>> > + mask &= (MASKTYPE##16)(ARGSIZE-1); \
>> > + _CLC_SHUFFLE_SET_16_ELEMENTS(ARGTYPE, ARGSIZE, MASKTYPE) \
>> > + return ret_val; \
>> > +}
>> > +
>> > +#define _CLC_VECTOR_SHUFFLE_MASKSIZE(INTYPE, ARGSIZE, MASKTYPE) \
>> > + _CLC_GET_ELEMENT_DEFINE(INTYPE, ARGSIZE, MASKTYPE) \
>> > + _CLC_SHUFFLE_DEFINE2(INTYPE, ARGSIZE, MASKTYPE) \
>> > + _CLC_SHUFFLE_DEFINE4(INTYPE, ARGSIZE, MASKTYPE) \
>> > + _CLC_SHUFFLE_DEFINE8(INTYPE, ARGSIZE, MASKTYPE) \
>> > + _CLC_SHUFFLE_DEFINE16(INTYPE, ARGSIZE, MASKTYPE) \
>> > +
>> > +#define _CLC_VECTOR_SHUFFLE_INSIZE(TYPE, MASKTYPE) \
>> > + _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 2, MASKTYPE) \
>> > + _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 4, MASKTYPE) \
>> > + _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 8, MASKTYPE) \
>> > + _CLC_VECTOR_SHUFFLE_MASKSIZE(TYPE, 16, MASKTYPE) \
>> > +
>> > +
>> > +
>> > +_CLC_VECTOR_SHUFFLE_INSIZE(char, uchar)
>> > +_CLC_VECTOR_SHUFFLE_INSIZE(short, ushort)
>> > +_CLC_VECTOR_SHUFFLE_INSIZE(int, uint)
>> > +_CLC_VECTOR_SHUFFLE_INSIZE(long, ulong)
>> > +_CLC_VECTOR_SHUFFLE_INSIZE(uchar, uchar)
>> > +_CLC_VECTOR_SHUFFLE_INSIZE(ushort, ushort)
>> > +_CLC_VECTOR_SHUFFLE_INSIZE(uint, uint)
>> > +_CLC_VECTOR_SHUFFLE_INSIZE(ulong, ulong)
>> > +_CLC_VECTOR_SHUFFLE_INSIZE(float, uint)
>>
>> Mask type can be any vector of unsigned type, so I think you need all
>> combinations.
>>
>> > +#ifdef cl_khr_fp64
>> > +#pragma OPENCL EXTENSION cl_khr_fp64 : enable
>> > +_CLC_VECTOR_SHUFFLE_INSIZE(double, ulong)
>>
>> I think this needs other mask types.
>>
>> > +#endif
>>
>> add half/cl_khr_fp16 here.
>>
>> thanks,
>> Jan
>>
>> > +
>> > +#undef _CLC_ELEMENT_CASES2
>> > +#undef _CLC_ELEMENT_CASES4
>> > +#undef _CLC_ELEMENT_CASES8
>> > +#undef _CLC_ELEMENT_CASES16
>> > +#undef _CLC_GET_ELEMENT_DEFINE
>> > +#undef _CLC_SHUFFLE_SET_ONE_ELEMENT
>> > +#undef _CLC_SHUFFLE_SET_2_ELEMENTS
>> > +#undef _CLC_SHUFFLE_SET_4_ELEMENTS
>> > +#undef _CLC_SHUFFLE_SET_8_ELEMENTS
>> > +#undef _CLC_SHUFFLE_SET_16_ELEMENTS
>> > +#undef _CLC_SHUFFLE_DEFINE2
>> > +#undef _CLC_SHUFFLE_DEFINE4
>> > +#undef _CLC_SHUFFLE_DEFINE8
>> > +#undef _CLC_SHUFFLE_DEFINE16
>> > +#undef _CLC_VECTOR_SHUFFLE_MASKSIZE
>> > +#undef _CLC_VECTOR_SHUFFLE_INSIZE
More information about the Libclc-dev
mailing list