[Libclc-dev] [PATCH 2/4] relational: Implement isgreaterequal

Wed Jun 11 16:10:44 PDT 2014

Unknown.  Many of the relational functions that I've been working with
today have been wrapping a comparison of two expressions as a function
call (about as simple as it can get).  I honestly don't know if
there's much appeal to calling __builtin_something over just doing the
comparison directly and letting it get parsed/lowered.  More things
that can go wrong as a tradeoff for maybe lower compile time?

I've been doing it this way because my LLVM-foo needs improvement and
I'm more comfortable with writing the C version.  If we can come up
with a simple template to fix the builtin functions, then I can take a
crack at doing that.

That being said, would it make sense to do that as a follow-on fix so
that these changes can work with existing versions of llvm/clang?

--Aaron

On Wed, Jun 11, 2014 at 6:03 PM, Jeroen Ketema <j.ketema at imperial.ac.uk> wrote:
>
> I’m wondering: Wouldn’t it be better to extend the builtins to handle vectors properly, or would that break other things?
>
> Jeroen
>
> On 11 Jun 2014, at 20:15, Aaron Watry <awatry at gmail.com> wrote:
>
>> Signed-off-by: Aaron Watry <awatry at gmail.com>
>> ---
>> generic/include/clc/clc.h                       |  3 ++-
>> generic/include/clc/relational/isgreaterequal.h | 18 +++++++++++++
>> generic/lib/SOURCES                             |  1 +
>> generic/lib/relational/isgreaterequal.cl        | 36 +++++++++++++++++++++++++
>> 4 files changed, 57 insertions(+), 1 deletion(-)
>> create mode 100644 generic/include/clc/relational/isgreaterequal.h
>> create mode 100644 generic/lib/relational/isgreaterequal.cl
>>
>> diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h
>> index 040ed2c..a220da5 100644
>> --- a/generic/include/clc/clc.h
>> +++ b/generic/include/clc/clc.h
>> @@ -104,9 +104,10 @@
>> /* 6.11.6 Relational Functions */
>> #include <clc/relational/any.h>
>> #include <clc/relational/bitselect.h>
>> +#include <clc/relational/isgreater.h>
>> +#include <clc/relational/isgreaterequal.h>
>> #include <clc/relational/isnan.h>
>> #include <clc/relational/select.h>
>> -#include <clc/relational/isgreater.h>
>>
>> /* 6.11.8 Synchronization Functions */
>> #include <clc/synchronization/cl_mem_fence_flags.h>
>> diff --git a/generic/include/clc/relational/isgreaterequal.h b/generic/include/clc/relational/isgreaterequal.h
>> new file mode 100644
>> index 0000000..5274714
>> --- /dev/null
>> +++ b/generic/include/clc/relational/isgreaterequal.h
>> @@ -0,0 +1,18 @@
>> +
>> +#define _CLC_ISGREATEREQUAL_DECL(TYPE, RETTYPE) \
>> +  _CLC_OVERLOAD _CLC_DECL RETTYPE isgreaterequal(TYPE x, TYPE y);
>> +
>> +#define _CLC_VECTOR_ISGREATEREQUAL_DECL(TYPE, RETTYPE) \
>> +  _CLC_ISGREATEREQUAL_DECL(TYPE##2, RETTYPE##2)  \
>> +  _CLC_ISGREATEREQUAL_DECL(TYPE##3, RETTYPE##3)  \
>> +  _CLC_ISGREATEREQUAL_DECL(TYPE##4, RETTYPE##4)  \
>> +  _CLC_ISGREATEREQUAL_DECL(TYPE##8, RETTYPE##8)  \
>> +  _CLC_ISGREATEREQUAL_DECL(TYPE##16, RETTYPE##16)
>> +
>> +_CLC_ISGREATEREQUAL_DECL(float, int)
>> +_CLC_VECTOR_ISGREATEREQUAL_DECL(float, int)
>> +
>> +#ifdef cl_khr_fp64
>> +_CLC_ISGREATEREQUAL_DECL(double, int)
>> +_CLC_VECTOR_ISGREATEREQUAL_DECL(double, long)
>> +#endif
>> \ No newline at end of file
>> diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
>> index 6479b53..9b8cf89 100644
>> --- a/generic/lib/SOURCES
>> +++ b/generic/lib/SOURCES
>> @@ -36,6 +36,7 @@ math/nextafter.cl
>> math/sincos.cl
>> relational/any.cl
>> relational/isgreater.cl
>> +relational/isgreaterequal.cl
>> relational/isnan.cl
>> shared/clamp.cl
>> shared/max.cl
>> diff --git a/generic/lib/relational/isgreaterequal.cl b/generic/lib/relational/isgreaterequal.cl
>> new file mode 100644
>> index 0000000..da47aa3
>> --- /dev/null
>> +++ b/generic/lib/relational/isgreaterequal.cl
>> @@ -0,0 +1,36 @@
>> +#include <clc/clc.h>
>> +
>> +//Note: It would be nice to use __builtin_isgreaterequal, but that seems to only take scalar values as input, which will
>> +//      produce incorrect output for vector input types.
>> +//
>> +//      For the the same reason (1 vs -1 output), we can't use the _CLC_DEFINE_BINARY_BUILTIN macro here as that expands
>> +//      all vector operations to multiple scalar operations
>> +
>> +#define _CLC_DEFINE_ISGREATEREQUAL(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \
>> +_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
>> +  return (x >= y); \
>> +} \
>> +
>> +_CLC_DEFINE_ISGREATEREQUAL(int, isgreaterequal, float, float)
>> +_CLC_DEFINE_ISGREATEREQUAL(int2, isgreaterequal, float2, float2)
>> +_CLC_DEFINE_ISGREATEREQUAL(int3, isgreaterequal, float3, float3)
>> +_CLC_DEFINE_ISGREATEREQUAL(int4, isgreaterequal, float4, float4)
>> +_CLC_DEFINE_ISGREATEREQUAL(int8, isgreaterequal, float8, float8)
>> +_CLC_DEFINE_ISGREATEREQUAL(int16, isgreaterequal, float16, float16)
>> +
>> +#ifdef cl_khr_fp64
>> +
>> +#pragma OPENCL EXTENSION cl_khr_fp64 : enable
>> +
>> +// The scalar version of isgreaterequal(double) returns an int, but the vector versions
>> +// return long.
>> +_CLC_DEFINE_ISGREATEREQUAL(int, isgreaterequal, double, double)
>> +_CLC_DEFINE_ISGREATEREQUAL(long2, isgreaterequal, double2, double2)
>> +_CLC_DEFINE_ISGREATEREQUAL(long3, isgreaterequal, double3, double3)
>> +_CLC_DEFINE_ISGREATEREQUAL(long4, isgreaterequal, double4, double4)
>> +_CLC_DEFINE_ISGREATEREQUAL(long8, isgreaterequal, double8, double8)
>> +_CLC_DEFINE_ISGREATEREQUAL(long16, isgreaterequal, double16, double16)
>> +
>> +#endif
>> +
>> +#undef _CLC_DEFINE_ISGREATEREQUAL
>> \ No newline at end of file
>> --
>> 1.9.1
>>
>>
>> _______________________________________________
>> Libclc-dev mailing list
>> Libclc-dev at pcc.me.uk
>> http://www.pcc.me.uk/cgi-bin/mailman/listinfo/libclc-dev
>