[Libclc-dev] [PATCH] relational: Implement signbit

Aaron Watry awatry at gmail.com
Fri Jun 20 18:28:32 PDT 2014


FYI: I've introduced the _CLC_DEFINE_RELATIONAL_UNARY* macros here and
immediately undef'd them after they're used.  I'm planning on moving
this set of Macros to clcmacro.h and then introducing a 2-arg BINARY
version as well for things like isgreaterequal and friends.

By using the _CLC_DEFINE_RELATIONAL_BINARY* macros just mentioned, I'm
able to cut the isgreaterequal llvm assembly to about a third of its
previous size (e.g. max variable identifier of %41 vs %138 in the
final dump for isgreaterequal(float16, float16).  signbit was roughly
the same as well.

Yes, we should probably fix the __builtin_* scalar functions to accept
vector inputs... I don't have the necessary time at the moment to do
that, sadly.

Should I continue going down this road for now?  The code as is works
perfectly on radeonsi, it just might be leaving some performance on
the table.  Functional now, fast later? Or do we hold off on
implementing these built-ins until clang can handle them optimally
(and wait for distro's to pick up the new version... you can probably
tell which option I prefer).

--Aaron

On Fri, Jun 20, 2014 at 7:41 PM, Aaron Watry <awatry at gmail.com> wrote:
> v2 Changes:
>    - use __builtin_signbit instead of shifting by hand
>    - significantly improve vector shuffling
>    - Works correctly now for signbit(float16) on radeonsi
>
> Signed-off-by: Aaron Watry <awatry at gmail.com>
> ---
>  generic/include/clc/clc.h                |  1 +
>  generic/include/clc/relational/signbit.h | 18 +++++++
>  generic/lib/SOURCES                      |  1 +
>  generic/lib/relational/signbit.cl        | 87 ++++++++++++++++++++++++++++++++
>  4 files changed, 107 insertions(+)
>  create mode 100644 generic/include/clc/relational/signbit.h
>  create mode 100644 generic/lib/relational/signbit.cl
>
> diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h
> index 7702e68..1c68bd5 100644
> --- a/generic/include/clc/clc.h
> +++ b/generic/include/clc/clc.h
> @@ -113,6 +113,7 @@
>  #include <clc/relational/isnan.h>
>  #include <clc/relational/isnotequal.h>
>  #include <clc/relational/select.h>
> +#include <clc/relational/signbit.h>
>
>  /* 6.11.8 Synchronization Functions */
>  #include <clc/synchronization/cl_mem_fence_flags.h>
> diff --git a/generic/include/clc/relational/signbit.h b/generic/include/clc/relational/signbit.h
> new file mode 100644
> index 0000000..774d6e0
> --- /dev/null
> +++ b/generic/include/clc/relational/signbit.h
> @@ -0,0 +1,18 @@
> +
> +#define _CLC_SIGNBIT_DECL(TYPE, RETTYPE) \
> +  _CLC_OVERLOAD _CLC_DECL RETTYPE signbit(TYPE x);
> +
> +#define _CLC_VECTOR_SIGNBIT_DECL(TYPE, RETTYPE) \
> +  _CLC_SIGNBIT_DECL(TYPE##2, RETTYPE##2)  \
> +  _CLC_SIGNBIT_DECL(TYPE##3, RETTYPE##3)  \
> +  _CLC_SIGNBIT_DECL(TYPE##4, RETTYPE##4)  \
> +  _CLC_SIGNBIT_DECL(TYPE##8, RETTYPE##8)  \
> +  _CLC_SIGNBIT_DECL(TYPE##16, RETTYPE##16)
> +
> +_CLC_SIGNBIT_DECL(float, int)
> +_CLC_VECTOR_SIGNBIT_DECL(float, int)
> +
> +#ifdef cl_khr_fp64
> +_CLC_SIGNBIT_DECL(double, int)
> +_CLC_VECTOR_SIGNBIT_DECL(double, long)
> +#endif
> \ No newline at end of file
> diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
> index 611966f..aa638d8 100644
> --- a/generic/lib/SOURCES
> +++ b/generic/lib/SOURCES
> @@ -44,6 +44,7 @@ relational/isgreater.cl
>  relational/isgreaterequal.cl
>  relational/isnotequal.cl
>  relational/isnan.cl
> +relational/signbit.cl
>  shared/clamp.cl
>  shared/max.cl
>  shared/min.cl
> diff --git a/generic/lib/relational/signbit.cl b/generic/lib/relational/signbit.cl
> new file mode 100644
> index 0000000..1f496d9
> --- /dev/null
> +++ b/generic/lib/relational/signbit.cl
> @@ -0,0 +1,87 @@
> +#include <clc/clc.h>
> +
> +#define _CLC_DEFINE_RELATIONAL_UNARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_NAME, ARG_TYPE) \
> +_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x){ \
> +       return BUILTIN_NAME(x); \
> +} \
> +
> +#define _CLC_DEFINE_RELATIONAL_UNARY_VEC(RET_TYPE, FUNCTION, ARG_TYPE) \
> +_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
> +  return (RET_TYPE)( (RET_TYPE){FUNCTION(x.lo), FUNCTION(x.hi)} != (RET_TYPE)0); \
> +} \
> +
> +#define _CLC_DEFINE_RELATIONAL_UNARY_VEC2(RET_TYPE, FUNCTION, ARG_TYPE) \
> +_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
> +  return (RET_TYPE)( (RET_TYPE){FUNCTION(x.lo), FUNCTION(x.hi)} != (RET_TYPE)0); \
> +} \
> +
> +#define _CLC_DEFINE_RELATIONAL_UNARY_VEC3(RET_TYPE, FUNCTION, ARG_TYPE) \
> +_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
> +  return (RET_TYPE)((FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2)) != (RET_TYPE)0); \
> +} \
> +
> +#define _CLC_DEFINE_RELATIONAL_UNARY_VEC4(RET_TYPE, FUNCTION, ARG_TYPE) \
> +_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
> +  return (RET_TYPE)( \
> +       ( \
> +               FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), FUNCTION(x.s3) \
> +       ) != (RET_TYPE)0); \
> +} \
> +
> +#define _CLC_DEFINE_RELATIONAL_UNARY_VEC8(RET_TYPE, FUNCTION, ARG_TYPE) \
> +_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
> +  return (RET_TYPE)( \
> +       ( \
> +               FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), FUNCTION(x.s3), \
> +               FUNCTION(x.s4), FUNCTION(x.s5), FUNCTION(x.s6), FUNCTION(x.s7) \
> +       ) != (RET_TYPE)0); \
> +} \
> +
> +#define _CLC_DEFINE_RELATIONAL_UNARY_VEC16(RET_TYPE, FUNCTION, ARG_TYPE) \
> +_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG_TYPE x) { \
> +  return (RET_TYPE)( \
> +       ( \
> +               FUNCTION(x.s0), FUNCTION(x.s1), FUNCTION(x.s2), FUNCTION(x.s3), \
> +               FUNCTION(x.s4), FUNCTION(x.s5), FUNCTION(x.s6), FUNCTION(x.s7), \
> +               FUNCTION(x.s8), FUNCTION(x.s9), FUNCTION(x.sa), FUNCTION(x.sb), \
> +               FUNCTION(x.sc), FUNCTION(x.sd), FUNCTION(x.se), FUNCTION(x.sf) \
> +       ) != (RET_TYPE)0); \
> +} \
> +
> +
> +#define _CLC_DEFINE_RELATIONAL_UNARY(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, ARG_TYPE) \
> +_CLC_DEFINE_RELATIONAL_UNARY_SCALAR(RET_TYPE, FUNCTION, BUILTIN_FUNCTION, ARG_TYPE) \
> +_CLC_DEFINE_RELATIONAL_UNARY_VEC2(RET_TYPE##2, FUNCTION, ARG_TYPE##2) \
> +_CLC_DEFINE_RELATIONAL_UNARY_VEC3(RET_TYPE##3, FUNCTION, ARG_TYPE##3) \
> +_CLC_DEFINE_RELATIONAL_UNARY_VEC4(RET_TYPE##4, FUNCTION, ARG_TYPE##4) \
> +_CLC_DEFINE_RELATIONAL_UNARY_VEC8(RET_TYPE##8, FUNCTION, ARG_TYPE##8) \
> +_CLC_DEFINE_RELATIONAL_UNARY_VEC16(RET_TYPE##16, FUNCTION, ARG_TYPE##16) \
> +
> +_CLC_DEFINE_RELATIONAL_UNARY(int, signbit, __builtin_signbitf, float)
> +
> +#ifdef cl_khr_fp64
> +
> +#pragma OPENCL EXTENSION cl_khr_fp64 : enable
> +
> +// The scalar version of signbit(double) returns an int, but the vector versions
> +// return long.
> +
> +_CLC_DEF _CLC_OVERLOAD int signbit(double x){
> +       return __builtin_signbit(x);
> +}
> +
> +_CLC_DEFINE_RELATIONAL_UNARY_VEC2(long2, signbit, double2)
> +_CLC_DEFINE_RELATIONAL_UNARY_VEC3(long3, signbit, double3)
> +_CLC_DEFINE_RELATIONAL_UNARY_VEC4(long4, signbit, double4)
> +_CLC_DEFINE_RELATIONAL_UNARY_VEC8(long8, signbit, double8)
> +_CLC_DEFINE_RELATIONAL_UNARY_VEC16(long16, signbit, double16)
> +
> +#endif
> +
> +#undef _CLC_DEFINE_RELATIONAL_UNARY
> +#undef _CLC_DEFINE_RELATIONAL_UNARY_SCALAR
> +#undef _CLC_DEFINE_RELATIONAL_UNARY_VEC2
> +#undef _CLC_DEFINE_RELATIONAL_UNARY_VEC3
> +#undef _CLC_DEFINE_RELATIONAL_UNARY_VEC4
> +#undef _CLC_DEFINE_RELATIONAL_UNARY_VEC8
> +#undef _CLC_DEFINE_RELATIONAL_UNARY_VEC16
> \ No newline at end of file
> --
> 1.9.1
>




More information about the Libclc-dev mailing list