[Libclc-dev] [PATCH 7/7] Add vstore_half_rte implementation
Jeroen Ketema via Libclc-dev
libclc-dev at lists.llvm.org
Tue Feb 6 10:38:32 PST 2018
> On 6 Feb 2018, at 19:37, Jan Vesely <jan.vesely at rutgers.edu> wrote:
>
> On Wed, 2018-01-31 at 22:35 -0500, Jan Vesely wrote:
>> On Wed, 2018-01-31 at 23:11 +0100, Jeroen Ketema via Libclc-dev wrote:
>>> The comments on this patch set look a bit funny to me (multiline with
>>> stars as the left margin, don’t see much of that in LLVM).
>>
>> These follow other longer comments (such as in erf.cl/erfc.cl). LLVM is
>> in C++ so it prefers C++ style comments. I agree that libclc codestyle
>> is inconsistent and can use cleanup, but that's for another day.
>>
>>> Otherwise, LGTM,
>>
>> Thanks.
>>
>>> although I cannot possibly comment on the implementations of the
>>> rounding functions (I assume these are pretty much standard?).
>>
>> It's my own implementation, _rtz and _rti round to the nearest half
>> representable float in that direction (except for high order _rti which
>> relies on half conversion to convert to Inf).
>>
>> _rtn/_rtp then just choose from the above too based on sign.
>>
>> _rte pretty much replicates most of the work, and last/qrs bits follow
>> the standard (hence extra comments). I assume that the compiler is able
>> to remove most of the duplicities.
>
> Hi,
>
> is the clarification good enough, or do you want to see some changes
> before I push the series?
Was good enough.
Jeroen
> thanks,
> Jan
>
>>
>> regards,
>> Jan
>>
>>>
>>> Jeroen
>>>
>>>> On 29 Jan 2018, at 01:07, Jan Vesely via Libclc-dev <libclc-dev at lists.llvm.org> wrote:
>>>>
>>>> Passes CTS on carrizo
>>>> Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
>>>> ---
>>>> generic/include/clc/shared/vstore.h | 2 ++
>>>> generic/lib/shared/vstore.cl | 45 ++++++++++++++++++++++++++++++++++++-
>>>> 2 files changed, 46 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/generic/include/clc/shared/vstore.h b/generic/include/clc/shared/vstore.h
>>>> index b510e0a..ebad330 100644
>>>> --- a/generic/include/clc/shared/vstore.h
>>>> +++ b/generic/include/clc/shared/vstore.h
>>>> @@ -40,6 +40,7 @@ _CLC_VECTOR_VSTORE_HALF_PRIM1(float,)
>>>> _CLC_VECTOR_VSTORE_HALF_PRIM1(float, _rtz)
>>>> _CLC_VECTOR_VSTORE_HALF_PRIM1(float, _rtn)
>>>> _CLC_VECTOR_VSTORE_HALF_PRIM1(float, _rtp)
>>>> +_CLC_VECTOR_VSTORE_HALF_PRIM1(float, _rte)
>>>>
>>>> #ifdef cl_khr_fp64
>>>> _CLC_VECTOR_VSTORE_PRIM1(double)
>>>> @@ -47,6 +48,7 @@ _CLC_VECTOR_VSTORE_HALF_PRIM1(float, _rtp)
>>>> _CLC_VECTOR_VSTORE_HALF_PRIM1(double, _rtz)
>>>> _CLC_VECTOR_VSTORE_HALF_PRIM1(double, _rtn)
>>>> _CLC_VECTOR_VSTORE_HALF_PRIM1(double, _rtp)
>>>> + _CLC_VECTOR_VSTORE_HALF_PRIM1(double, _rte)
>>>> #endif
>>>>
>>>> #ifdef cl_khr_fp16
>>>> diff --git a/generic/lib/shared/vstore.cl b/generic/lib/shared/vstore.cl
>>>> index 2bfb369..c035095 100644
>>>> --- a/generic/lib/shared/vstore.cl
>>>> +++ b/generic/lib/shared/vstore.cl
>>>> @@ -147,6 +147,27 @@ _CLC_DEF _CLC_OVERLOAD float __clc_rtp(float x)
>>>> {
>>>> return ((as_uint(x) & 0x80000000) == 0) ? __clc_rti(x) : __clc_rtz(x);
>>>> }
>>>> +_CLC_DEF _CLC_OVERLOAD float __clc_rte(float x)
>>>> +{
>>>> + /* Mantisa + implicit bit */
>>>> + const uint mantissa = (as_uint(x) & 0x7fffff) | (1u << 23);
>>>> + const int exp = (as_uint(x) >> 23 & 0xff) - 127;
>>>> + int shift = 13;
>>>> + if (exp < -14) {
>>>> + /* The default assumes lower 13 bits are rounded,
>>>> + * but it might be more for denormals.
>>>> + * Shifting beyond last == 0b, and qr == 00b is not necessary */
>>>> + shift += min(-(exp + 14), 15);
>>>> + }
>>>> + int mask = (1 << shift) - 1;
>>>> + const uint grs = mantissa & mask;
>>>> + const uint last = mantissa & (1 << shift);
>>>> + /* IEEE round up rule is: grs > 101b or grs == 100b and last == 1.
>>>> + * exp > 15 should round to inf. */
>>>> + bool roundup = (grs > (1 << (shift - 1))) ||
>>>> + (grs == (1 << (shift - 1)) && last != 0) || (exp > 15);
>>>> + return roundup ? __clc_rti(x) : __clc_rtz(x);
>>>> +}
>>>>
>>>> #ifdef cl_khr_fp64
>>>> _CLC_DEF _CLC_OVERLOAD double __clc_noop(double x)
>>>> @@ -192,13 +213,35 @@ _CLC_DEF _CLC_OVERLOAD double __clc_rtp(double x)
>>>> {
>>>> return ((as_ulong(x) & 0x8000000000000000UL) == 0) ? __clc_rti(x) : __clc_rtz(x);
>>>> }
>>>> +_CLC_DEF _CLC_OVERLOAD double __clc_rte(double x)
>>>> +{
>>>> + /* Mantisa + implicit bit */
>>>> + const ulong mantissa = (as_ulong(x) & 0xfffffffffffff) | (1UL << 52);
>>>> + const int exp = (as_ulong(x) >> 52 & 0x7ff) - 1023;
>>>> + int shift = 42;
>>>> + if (exp < -14) {
>>>> + /* The default assumes lower 13 bits are rounded,
>>>> + * but it might be more for denormals.
>>>> + * Shifting beyond last == 0b, and qr == 00b is not necessary */
>>>> + shift += min(-(exp + 14), 15);
>>>> + }
>>>> + ulong mask = (1UL << shift) - 1UL;
>>>> + const ulong grs = mantissa & mask;
>>>> + const ulong last = mantissa & (1UL << shift);
>>>> + /* IEEE round up rule is: grs > 101b or grs == 100b and last == 1.
>>>> + * exp > 15 should round to inf. */
>>>> + bool roundup = (grs > (1UL << (shift - 1UL))) ||
>>>> + (grs == (1UL << (shift - 1UL)) && last != 0) || (exp > 15);
>>>> + return roundup ? __clc_rti(x) : __clc_rtz(x);
>>>> +}
>>>> #endif
>>>>
>>>> #define __XFUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS) \
>>>> __FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_noop) \
>>>> __FUNC(SUFFIX ## _rtz, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_rtz) \
>>>> __FUNC(SUFFIX ## _rtn, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_rtn) \
>>>> - __FUNC(SUFFIX ## _rtp, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_rtp)
>>>> + __FUNC(SUFFIX ## _rtp, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_rtp) \
>>>> + __FUNC(SUFFIX ## _rte, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_rte)
>>>>
>>>> #define FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS) \
>>>> __XFUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS)
>>>> --
>>>> 2.14.3
>>>>
>>>> _______________________________________________
>>>> Libclc-dev mailing list
>>>> Libclc-dev at lists.llvm.org
>>>> http://lists.llvm.org/cgi-bin/mailman/listinfo/libclc-dev
>>>
>>> _______________________________________________
>>> Libclc-dev mailing list
>>> Libclc-dev at lists.llvm.org
>>> http://lists.llvm.org/cgi-bin/mailman/listinfo/libclc-dev
>>
>>
>
> --
> Jan Vesely <jan.vesely at rutgers.edu>
More information about the Libclc-dev
mailing list