From libclc-dev at lists.llvm.org Wed Jun 6 18:25:16 2018 From: libclc-dev at lists.llvm.org (Jan Vesely via Libclc-dev) Date: Wed, 06 Jun 2018 18:25:16 -0700 Subject: [Libclc-dev] [PATCH 1/1] math/fma: Add fp32 software implementation In-Reply-To: <6ac7201e1c92674cb3a6300dd37a78e236d1ad86.camel@rutgers.edu> References: <20180519234641.17360-1-jan.vesely@rutgers.edu> <6ac7201e1c92674cb3a6300dd37a78e236d1ad86.camel@rutgers.edu> Message-ID: <1528334716.5228.0.camel@rutgers.edu> On Tue, 2018-05-29 at 16:09 -0400, Jan Vesely wrote: > On Sat, 2018-05-19 at 19:46 -0400, Jan Vesely wrote: > > Passes CTS on carrizo (when forced to use sw fma) and turks. > > Signed-off-by: Jan Vesely > > --- > > generic/include/clc/math/fma.h | 11 +- > > generic/include/clc/math/ternary_decl.inc | 1 + > > generic/include/math/clc_fma.h | 11 ++ > > generic/lib/SOURCES | 2 + > > generic/lib/math/clc_fma.cl | 158 ++++++++++++++++++++++ > > generic/lib/math/fma.cl | 7 + > > generic/lib/math/fma.inc | 7 + > > 7 files changed, 192 insertions(+), 5 deletions(-) > > create mode 100644 generic/include/clc/math/ternary_decl.inc > > create mode 100644 generic/include/math/clc_fma.h > > create mode 100644 generic/lib/math/clc_fma.cl > > create mode 100644 generic/lib/math/fma.cl > > create mode 100644 generic/lib/math/fma.inc > > > > diff --git a/generic/include/clc/math/fma.h b/generic/include/clc/math/fma.h > > index 02d39f6..c7c23eb 100644 > > --- a/generic/include/clc/math/fma.h > > +++ b/generic/include/clc/math/fma.h > > @@ -1,6 +1,7 @@ > > -#undef fma > > -#define fma __clc_fma > > +#define __CLC_BODY > > +#define __CLC_FUNCTION fma > > > > -#define __CLC_FUNCTION __clc_fma > > -#define __CLC_INTRINSIC "llvm.fma" > > -#include > > +#include > > + > > +#undef __CLC_BODY > > +#undef __CLC_FUNCTION > > diff --git a/generic/include/clc/math/ternary_decl.inc b/generic/include/clc/math/ternary_decl.inc > > new file mode 100644 > > index 0000000..0598684 > > --- /dev/null > > +++ b/generic/include/clc/math/ternary_decl.inc > > @@ -0,0 +1 @@ > > +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE a, __CLC_GENTYPE b, __CLC_GENTYPE c); > > diff --git a/generic/include/math/clc_fma.h b/generic/include/math/clc_fma.h > > new file mode 100644 > > index 0000000..3ae6a74 > > --- /dev/null > > +++ b/generic/include/math/clc_fma.h > > @@ -0,0 +1,11 @@ > > +#define __CLC_FUNCTION __clc_fma > > +#define __CLC_INTRINSIC "llvm.fma" > > +#include > > + > > +#define __FLOAT_ONLY > > +#define __CLC_FUNCTION __clc_sw_fma > > +#define __CLC_BODY > > +#include > > +#undef __CLC_BODY > > +#undef __CLC_FUNCTION > > +#undef __FLOAT_ONLY > > diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES > > index 1f1a427..565750e 100644 > > --- a/generic/lib/SOURCES > > +++ b/generic/lib/SOURCES > > @@ -101,6 +101,8 @@ math/exp2.cl > > math/clc_exp10.cl > > math/exp10.cl > > math/fdim.cl > > +math/clc_fma.cl > > +math/fma.cl > > math/fmax.cl > > math/fmin.cl > > math/clc_fmod.cl > > diff --git a/generic/lib/math/clc_fma.cl b/generic/lib/math/clc_fma.cl > > new file mode 100644 > > index 0000000..dee90e9 > > --- /dev/null > > +++ b/generic/lib/math/clc_fma.cl > > @@ -0,0 +1,158 @@ > > +/* > > + * Copyright (c) 2014 Advanced Micro Devices, Inc. > > + * > > + * Permission is hereby granted, free of charge, to any person obtaining a copy > > + * of this software and associated documentation files (the "Software"), to deal > > + * in the Software without restriction, including without limitation the rights > > + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell > > + * copies of the Software, and to permit persons to whom the Software is > > + * furnished to do so, subject to the following conditions: > > + * > > + * The above copyright notice and this permission notice shall be included in > > + * all copies or substantial portions of the Software. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE > > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, > > + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN > > + * THE SOFTWARE. > > + */ > > + > > +#include > > + > > +#include "config.h" > > +#include "math.h" > > +#include "../clcmacro.h" > > + > > +struct fp { > > + ulong mantissa; > > + int exponent; > > + uint sign; > > +}; > > + > > +_CLC_DEF _CLC_OVERLOAD float __clc_sw_fma(float a, float b, float c) > > +{ > > + /* special cases */ > > + if (isnan(a) || isnan(b) || isnan(c) || isinf(a) || isinf(b)) > > + return mad(a, b, c); > > + > > + /* If only c is inf, and both a,b are regular numbers, the result is c*/ > > + if (isinf(c)) > > + return c; > > + > > + a = __clc_flush_denormal_if_not_supported(a); > > + b = __clc_flush_denormal_if_not_supported(b); > > + c = __clc_flush_denormal_if_not_supported(c); > > + > > + if (c == 0) > > + return a * b; > > + > > + struct fp st_a, st_b, st_c; > > + > > + st_a.exponent = a == .0f ? 0 : ((as_uint(a) & 0x7f800000) >> 23) - 127; > > + st_b.exponent = b == .0f ? 0 : ((as_uint(b) & 0x7f800000) >> 23) - 127; > > + st_c.exponent = c == .0f ? 0 : ((as_uint(c) & 0x7f800000) >> 23) - 127; > > + > > + st_a.mantissa = a == .0f ? 0 : (as_uint(a) & 0x7fffff) | 0x800000; > > + st_b.mantissa = b == .0f ? 0 : (as_uint(b) & 0x7fffff) | 0x800000; > > + st_c.mantissa = c == .0f ? 0 : (as_uint(c) & 0x7fffff) | 0x800000; > > + > > + st_a.sign = as_uint(a) & 0x80000000; > > + st_b.sign = as_uint(b) & 0x80000000; > > + st_c.sign = as_uint(c) & 0x80000000; > > + > > + // Multiplication. > > + // Move the product to the highest bits to maximize precision > > + // mantissa is 24 bits => product is 48 bits, 2bits non-fraction. > > + // Add one bit for future addition overflow, > > + // add another bit to detect subtraction underflow > > + struct fp st_mul; > > + st_mul.sign = st_a.sign ^ st_b.sign; > > + st_mul.mantissa = (st_a.mantissa * st_b.mantissa) << 14ul; > > + st_mul.exponent = st_mul.mantissa ? st_a.exponent + st_b.exponent : 0; > > + > > + // FIXME: Detecting a == 0 || b == 0 above crashed GCN isel > > + if (st_mul.exponent == 0 && st_mul.mantissa == 0) > > + return c; > > + > > +// Mantissa is 23 fractional bits, shift it the same way as product mantissa > > +#define C_ADJUST 37ul > > + > > + // both exponents are bias adjusted > > + int exp_diff = st_mul.exponent - st_c.exponent; > > + > > + st_c.mantissa <<= C_ADJUST; > > + ulong cutoff_bits = 0; > > + ulong cutoff_mask = (1ul << abs(exp_diff)) - 1ul; > > + if (exp_diff > 0) { > > + cutoff_bits = exp_diff >= 64 ? st_c.mantissa : (st_c.mantissa & cutoff_mask); > > + st_c.mantissa = exp_diff >= 64 ? 0 : (st_c.mantissa >> exp_diff); > > + } else { > > + cutoff_bits = -exp_diff >= 64 ? st_mul.mantissa : (st_mul.mantissa & cutoff_mask); > > + st_mul.mantissa = -exp_diff >= 64 ? 0 : (st_mul.mantissa >> -exp_diff); > > + } > > + > > + struct fp st_fma; > > + st_fma.sign = st_mul.sign; > > + st_fma.exponent = max(st_mul.exponent, st_c.exponent); > > + if (st_c.sign == st_mul.sign) { > > + st_fma.mantissa = st_mul.mantissa + st_c.mantissa; > > + } else { > > + // cutoff bits borrow one > > + st_fma.mantissa = st_mul.mantissa - st_c.mantissa - (cutoff_bits && (st_mul.exponent > st_c.exponent) ? 1 : 0); > > + } > > + > > + // underflow: st_c.sign != st_mul.sign, and magnitude switches the sign > > + if (st_fma.mantissa > LONG_MAX) { > > + st_fma.mantissa = 0 - st_fma.mantissa; > > + st_fma.sign = st_mul.sign ^ 0x80000000; > > + } > > + > > + // detect overflow/underflow > > + int overflow_bits = 3 - clz(st_fma.mantissa); > > + > > + // adjust exponent > > + st_fma.exponent += overflow_bits; > > + > > + // handle underflow > > + if (overflow_bits < 0) { > > + st_fma.mantissa <<= -overflow_bits; > > + overflow_bits = 0; > > + } > > + > > + // rounding > > + ulong trunc_mask = (1ul << (C_ADJUST + overflow_bits)) - 1; > > + ulong trunc_bits = (st_fma.mantissa & trunc_mask) | (cutoff_bits != 0); > > + ulong last_bit = st_fma.mantissa & (1ul << (C_ADJUST + overflow_bits)); > > + ulong grs_bits = (0x4ul << (C_ADJUST - 3 + overflow_bits)); > > + > > + // round to nearest even > > + if ((trunc_bits > grs_bits) || > > + (trunc_bits == grs_bits && last_bit != 0)) > > + st_fma.mantissa += (1ul << (C_ADJUST + overflow_bits)); > > + > > + // Shift mantissa back to bit 23 > > + st_fma.mantissa = (st_fma.mantissa >> (C_ADJUST + overflow_bits)); > > + > > + // Detect rounding overflow > > + if (st_fma.mantissa > 0xffffff) { > > + ++st_fma.exponent; > > + st_fma.mantissa >>= 1; > > + } > > + > > + if (st_fma.mantissa == 0) > > + return .0f; > > + > > + // Flating point range limit > > + if (st_fma.exponent > 127) > > + return as_float(as_uint(INFINITY) | st_fma.sign); > > + > > + // Flush denormals > > + if (st_fma.exponent <= -127) > > + return as_float(st_fma.sign); > > + > > + return as_float(st_fma.sign | ((st_fma.exponent + 127) << 23) | ((uint)st_fma.mantissa & 0x7fffff)); > > +} > > +_CLC_TERNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_sw_fma, float, float, float) > > diff --git a/generic/lib/math/fma.cl b/generic/lib/math/fma.cl > > new file mode 100644 > > index 0000000..9ad81be > > --- /dev/null > > +++ b/generic/lib/math/fma.cl > > @@ -0,0 +1,7 @@ > > +#include > > + > > +#include "math.h" > > +#include "math/clc_fma.h" > > + > > +#define __CLC_BODY > > +#include > > diff --git a/generic/lib/math/fma.inc b/generic/lib/math/fma.inc > > new file mode 100644 > > index 0000000..654208f > > --- /dev/null > > +++ b/generic/lib/math/fma.inc > > @@ -0,0 +1,7 @@ > > +_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE fma(__CLC_GENTYPE a, __CLC_GENTYPE b, __CLC_GENTYPE c) { > > +#if __CLC_FPSIZE == 32 && HAVE_HW_FMA32() == 0 > > + return __clc_sw_fma(a, b, c); > > +#else > > + return __clc_fma(a, b, c); > > +#endif > > +} > > ping. ping2. Jan > > Jan -- Jan Vesely -------------- next part -------------- A non-text attachment was scrubbed... Name: signature.asc Type: application/pgp-signature Size: 833 bytes Desc: This is a digitally signed message part URL: From libclc-dev at lists.llvm.org Wed Jun 6 18:25:43 2018 From: libclc-dev at lists.llvm.org (Jan Vesely via Libclc-dev) Date: Wed, 06 Jun 2018 18:25:43 -0700 Subject: [Libclc-dev] [PATCH 2/2] r600/fmin: Flush denormals before calling builtin. In-Reply-To: <5fa96e44b7a6d04581d5570338da27a5b958696a.camel@rutgers.edu> References: <20180521153647.12124-1-jan.vesely@rutgers.edu> <20180521153647.12124-2-jan.vesely@rutgers.edu> <5fa96e44b7a6d04581d5570338da27a5b958696a.camel@rutgers.edu> Message-ID: <1528334743.5228.1.camel@rutgers.edu> On Tue, 2018-05-29 at 16:10 -0400, Jan Vesely wrote: > On Mon, 2018-05-21 at 11:36 -0400, Jan Vesely wrote: > > Same reason as amdgcn. > > Fixes fmin, minmag CTS on turks. > > Signed-off-by: Jan Vesely > > --- > > r600/lib/SOURCES | 1 + > > r600/lib/math/fmin.cl | 30 ++++++++++++++++++++++++++++++ > > 2 files changed, 31 insertions(+) > > create mode 100644 r600/lib/math/fmin.cl > > > > diff --git a/r600/lib/SOURCES b/r600/lib/SOURCES > > index 132b151..e69be4a 100644 > > --- a/r600/lib/SOURCES > > +++ b/r600/lib/SOURCES > > @@ -1,4 +1,5 @@ > > math/fmax.cl > > +math/fmin.cl > > synchronization/barrier_impl.ll > > workitem/get_global_offset.cl > > workitem/get_group_id.cl > > diff --git a/r600/lib/math/fmin.cl b/r600/lib/math/fmin.cl > > new file mode 100644 > > index 0000000..09f1e4c > > --- /dev/null > > +++ b/r600/lib/math/fmin.cl > > @@ -0,0 +1,30 @@ > > +#include > > + > > +#include "../../../generic/lib/clcmacro.h" > > +#include "../../../generic/lib/math/math.h" > > + > > +_CLC_DEF _CLC_OVERLOAD float fmin(float x, float y) > > +{ > > + /* fcanonicalize removes sNaNs and flushes denormals if not enabled. > > + * Otherwise fmin instruction flushes the values for comparison, > > + * but outputs original denormal */ > > + x = __clc_flush_denormal_if_not_supported(x); > > + y = __clc_flush_denormal_if_not_supported(y); > > + return __builtin_fminf(x, y); > > +} > > +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, fmin, float, float) > > + > > +#ifdef cl_khr_fp64 > > + > > +#pragma OPENCL EXTENSION cl_khr_fp64 : enable > > + > > +_CLC_DEF _CLC_OVERLOAD double fmin(double x, double y) > > +{ > > + return __builtin_fmin(x, y); > > +} > > +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, fmin, double, double) > > + > > +#endif > > + > > +#define __CLC_BODY <../../../generic/lib/math/fmin.inc> > > +#include > > ping. ping2. Jan > > Jan -- Jan Vesely -------------- next part -------------- A non-text attachment was scrubbed... Name: signature.asc Type: application/pgp-signature Size: 833 bytes Desc: This is a digitally signed message part URL: From libclc-dev at lists.llvm.org Wed Jun 6 18:26:20 2018 From: libclc-dev at lists.llvm.org (Tom Stellard via Libclc-dev) Date: Wed, 6 Jun 2018 18:26:20 -0700 Subject: [Libclc-dev] [PATCH 1/1] math/fma: Add fp32 software implementation In-Reply-To: <1528334716.5228.0.camel@rutgers.edu> References: <20180519234641.17360-1-jan.vesely@rutgers.edu> <6ac7201e1c92674cb3a6300dd37a78e236d1ad86.camel@rutgers.edu> <1528334716.5228.0.camel@rutgers.edu> Message-ID: <1172be87-dcdb-f1df-eaa5-3e796972623b@redhat.com> On 06/06/2018 06:25 PM, Jan Vesely via Libclc-dev wrote: > On Tue, 2018-05-29 at 16:09 -0400, Jan Vesely wrote: >> On Sat, 2018-05-19 at 19:46 -0400, Jan Vesely wrote: >>> Passes CTS on carrizo (when forced to use sw fma) and turks. >>> Signed-off-by: Jan Vesely >>> --- >>> generic/include/clc/math/fma.h | 11 +- >>> generic/include/clc/math/ternary_decl.inc | 1 + >>> generic/include/math/clc_fma.h | 11 ++ >>> generic/lib/SOURCES | 2 + >>> generic/lib/math/clc_fma.cl | 158 ++++++++++++++++++++++ >>> generic/lib/math/fma.cl | 7 + >>> generic/lib/math/fma.inc | 7 + >>> 7 files changed, 192 insertions(+), 5 deletions(-) >>> create mode 100644 generic/include/clc/math/ternary_decl.inc >>> create mode 100644 generic/include/math/clc_fma.h >>> create mode 100644 generic/lib/math/clc_fma.cl >>> create mode 100644 generic/lib/math/fma.cl >>> create mode 100644 generic/lib/math/fma.inc >>> >>> diff --git a/generic/include/clc/math/fma.h b/generic/include/clc/math/fma.h >>> index 02d39f6..c7c23eb 100644 >>> --- a/generic/include/clc/math/fma.h >>> +++ b/generic/include/clc/math/fma.h >>> @@ -1,6 +1,7 @@ >>> -#undef fma >>> -#define fma __clc_fma >>> +#define __CLC_BODY >>> +#define __CLC_FUNCTION fma >>> >>> -#define __CLC_FUNCTION __clc_fma >>> -#define __CLC_INTRINSIC "llvm.fma" >>> -#include >>> +#include >>> + >>> +#undef __CLC_BODY >>> +#undef __CLC_FUNCTION >>> diff --git a/generic/include/clc/math/ternary_decl.inc b/generic/include/clc/math/ternary_decl.inc >>> new file mode 100644 >>> index 0000000..0598684 >>> --- /dev/null >>> +++ b/generic/include/clc/math/ternary_decl.inc >>> @@ -0,0 +1 @@ >>> +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE a, __CLC_GENTYPE b, __CLC_GENTYPE c); >>> diff --git a/generic/include/math/clc_fma.h b/generic/include/math/clc_fma.h >>> new file mode 100644 >>> index 0000000..3ae6a74 >>> --- /dev/null >>> +++ b/generic/include/math/clc_fma.h >>> @@ -0,0 +1,11 @@ >>> +#define __CLC_FUNCTION __clc_fma >>> +#define __CLC_INTRINSIC "llvm.fma" >>> +#include >>> + >>> +#define __FLOAT_ONLY >>> +#define __CLC_FUNCTION __clc_sw_fma >>> +#define __CLC_BODY >>> +#include >>> +#undef __CLC_BODY >>> +#undef __CLC_FUNCTION >>> +#undef __FLOAT_ONLY >>> diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES >>> index 1f1a427..565750e 100644 >>> --- a/generic/lib/SOURCES >>> +++ b/generic/lib/SOURCES >>> @@ -101,6 +101,8 @@ math/exp2.cl >>> math/clc_exp10.cl >>> math/exp10.cl >>> math/fdim.cl >>> +math/clc_fma.cl >>> +math/fma.cl >>> math/fmax.cl >>> math/fmin.cl >>> math/clc_fmod.cl >>> diff --git a/generic/lib/math/clc_fma.cl b/generic/lib/math/clc_fma.cl >>> new file mode 100644 >>> index 0000000..dee90e9 >>> --- /dev/null >>> +++ b/generic/lib/math/clc_fma.cl >>> @@ -0,0 +1,158 @@ >>> +/* >>> + * Copyright (c) 2014 Advanced Micro Devices, Inc. >>> + * >>> + * Permission is hereby granted, free of charge, to any person obtaining a copy >>> + * of this software and associated documentation files (the "Software"), to deal >>> + * in the Software without restriction, including without limitation the rights >>> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell >>> + * copies of the Software, and to permit persons to whom the Software is >>> + * furnished to do so, subject to the following conditions: >>> + * >>> + * The above copyright notice and this permission notice shall be included in >>> + * all copies or substantial portions of the Software. >>> + * >>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE >>> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER >>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, >>> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN >>> + * THE SOFTWARE. >>> + */ >>> + >>> +#include >>> + >>> +#include "config.h" >>> +#include "math.h" >>> +#include "../clcmacro.h" >>> + >>> +struct fp { >>> + ulong mantissa; >>> + int exponent; >>> + uint sign; >>> +}; >>> + >>> +_CLC_DEF _CLC_OVERLOAD float __clc_sw_fma(float a, float b, float c) >>> +{ >>> + /* special cases */ >>> + if (isnan(a) || isnan(b) || isnan(c) || isinf(a) || isinf(b)) >>> + return mad(a, b, c); >>> + >>> + /* If only c is inf, and both a,b are regular numbers, the result is c*/ >>> + if (isinf(c)) >>> + return c; >>> + >>> + a = __clc_flush_denormal_if_not_supported(a); >>> + b = __clc_flush_denormal_if_not_supported(b); >>> + c = __clc_flush_denormal_if_not_supported(c); >>> + >>> + if (c == 0) >>> + return a * b; >>> + >>> + struct fp st_a, st_b, st_c; >>> + >>> + st_a.exponent = a == .0f ? 0 : ((as_uint(a) & 0x7f800000) >> 23) - 127; >>> + st_b.exponent = b == .0f ? 0 : ((as_uint(b) & 0x7f800000) >> 23) - 127; >>> + st_c.exponent = c == .0f ? 0 : ((as_uint(c) & 0x7f800000) >> 23) - 127; >>> + >>> + st_a.mantissa = a == .0f ? 0 : (as_uint(a) & 0x7fffff) | 0x800000; >>> + st_b.mantissa = b == .0f ? 0 : (as_uint(b) & 0x7fffff) | 0x800000; >>> + st_c.mantissa = c == .0f ? 0 : (as_uint(c) & 0x7fffff) | 0x800000; >>> + >>> + st_a.sign = as_uint(a) & 0x80000000; >>> + st_b.sign = as_uint(b) & 0x80000000; >>> + st_c.sign = as_uint(c) & 0x80000000; >>> + >>> + // Multiplication. >>> + // Move the product to the highest bits to maximize precision >>> + // mantissa is 24 bits => product is 48 bits, 2bits non-fraction. >>> + // Add one bit for future addition overflow, >>> + // add another bit to detect subtraction underflow >>> + struct fp st_mul; >>> + st_mul.sign = st_a.sign ^ st_b.sign; >>> + st_mul.mantissa = (st_a.mantissa * st_b.mantissa) << 14ul; >>> + st_mul.exponent = st_mul.mantissa ? st_a.exponent + st_b.exponent : 0; >>> + >>> + // FIXME: Detecting a == 0 || b == 0 above crashed GCN isel >>> + if (st_mul.exponent == 0 && st_mul.mantissa == 0) >>> + return c; >>> + >>> +// Mantissa is 23 fractional bits, shift it the same way as product mantissa >>> +#define C_ADJUST 37ul >>> + >>> + // both exponents are bias adjusted >>> + int exp_diff = st_mul.exponent - st_c.exponent; >>> + >>> + st_c.mantissa <<= C_ADJUST; >>> + ulong cutoff_bits = 0; >>> + ulong cutoff_mask = (1ul << abs(exp_diff)) - 1ul; >>> + if (exp_diff > 0) { >>> + cutoff_bits = exp_diff >= 64 ? st_c.mantissa : (st_c.mantissa & cutoff_mask); >>> + st_c.mantissa = exp_diff >= 64 ? 0 : (st_c.mantissa >> exp_diff); >>> + } else { >>> + cutoff_bits = -exp_diff >= 64 ? st_mul.mantissa : (st_mul.mantissa & cutoff_mask); >>> + st_mul.mantissa = -exp_diff >= 64 ? 0 : (st_mul.mantissa >> -exp_diff); >>> + } >>> + >>> + struct fp st_fma; >>> + st_fma.sign = st_mul.sign; >>> + st_fma.exponent = max(st_mul.exponent, st_c.exponent); >>> + if (st_c.sign == st_mul.sign) { >>> + st_fma.mantissa = st_mul.mantissa + st_c.mantissa; >>> + } else { >>> + // cutoff bits borrow one >>> + st_fma.mantissa = st_mul.mantissa - st_c.mantissa - (cutoff_bits && (st_mul.exponent > st_c.exponent) ? 1 : 0); >>> + } >>> + >>> + // underflow: st_c.sign != st_mul.sign, and magnitude switches the sign >>> + if (st_fma.mantissa > LONG_MAX) { >>> + st_fma.mantissa = 0 - st_fma.mantissa; >>> + st_fma.sign = st_mul.sign ^ 0x80000000; >>> + } >>> + >>> + // detect overflow/underflow >>> + int overflow_bits = 3 - clz(st_fma.mantissa); >>> + >>> + // adjust exponent >>> + st_fma.exponent += overflow_bits; >>> + >>> + // handle underflow >>> + if (overflow_bits < 0) { >>> + st_fma.mantissa <<= -overflow_bits; >>> + overflow_bits = 0; >>> + } >>> + >>> + // rounding >>> + ulong trunc_mask = (1ul << (C_ADJUST + overflow_bits)) - 1; >>> + ulong trunc_bits = (st_fma.mantissa & trunc_mask) | (cutoff_bits != 0); >>> + ulong last_bit = st_fma.mantissa & (1ul << (C_ADJUST + overflow_bits)); >>> + ulong grs_bits = (0x4ul << (C_ADJUST - 3 + overflow_bits)); >>> + >>> + // round to nearest even >>> + if ((trunc_bits > grs_bits) || >>> + (trunc_bits == grs_bits && last_bit != 0)) >>> + st_fma.mantissa += (1ul << (C_ADJUST + overflow_bits)); >>> + >>> + // Shift mantissa back to bit 23 >>> + st_fma.mantissa = (st_fma.mantissa >> (C_ADJUST + overflow_bits)); >>> + >>> + // Detect rounding overflow >>> + if (st_fma.mantissa > 0xffffff) { >>> + ++st_fma.exponent; >>> + st_fma.mantissa >>= 1; >>> + } >>> + >>> + if (st_fma.mantissa == 0) >>> + return .0f; >>> + >>> + // Flating point range limit >>> + if (st_fma.exponent > 127) >>> + return as_float(as_uint(INFINITY) | st_fma.sign); >>> + >>> + // Flush denormals >>> + if (st_fma.exponent <= -127) >>> + return as_float(st_fma.sign); >>> + >>> + return as_float(st_fma.sign | ((st_fma.exponent + 127) << 23) | ((uint)st_fma.mantissa & 0x7fffff)); >>> +} >>> +_CLC_TERNARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, float, __clc_sw_fma, float, float, float) >>> diff --git a/generic/lib/math/fma.cl b/generic/lib/math/fma.cl >>> new file mode 100644 >>> index 0000000..9ad81be >>> --- /dev/null >>> +++ b/generic/lib/math/fma.cl >>> @@ -0,0 +1,7 @@ >>> +#include >>> + >>> +#include "math.h" >>> +#include "math/clc_fma.h" >>> + >>> +#define __CLC_BODY >>> +#include >>> diff --git a/generic/lib/math/fma.inc b/generic/lib/math/fma.inc >>> new file mode 100644 >>> index 0000000..654208f >>> --- /dev/null >>> +++ b/generic/lib/math/fma.inc >>> @@ -0,0 +1,7 @@ >>> +_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE fma(__CLC_GENTYPE a, __CLC_GENTYPE b, __CLC_GENTYPE c) { >>> +#if __CLC_FPSIZE == 32 && HAVE_HW_FMA32() == 0 >>> + return __clc_sw_fma(a, b, c); >>> +#else >>> + return __clc_fma(a, b, c); >>> +#endif >>> +} >> >> ping. > > ping2. > LGTM. > Jan > >> >> Jan > > > > _______________________________________________ > Libclc-dev mailing list > Libclc-dev at lists.llvm.org > http://lists.llvm.org/cgi-bin/mailman/listinfo/libclc-dev > From libclc-dev at lists.llvm.org Wed Jun 6 18:27:03 2018 From: libclc-dev at lists.llvm.org (Tom Stellard via Libclc-dev) Date: Wed, 6 Jun 2018 18:27:03 -0700 Subject: [Libclc-dev] [PATCH 2/2] r600/fmin: Flush denormals before calling builtin. In-Reply-To: <1528334743.5228.1.camel@rutgers.edu> References: <20180521153647.12124-1-jan.vesely@rutgers.edu> <20180521153647.12124-2-jan.vesely@rutgers.edu> <5fa96e44b7a6d04581d5570338da27a5b958696a.camel@rutgers.edu> <1528334743.5228.1.camel@rutgers.edu> Message-ID: On 06/06/2018 06:25 PM, Jan Vesely via Libclc-dev wrote: > On Tue, 2018-05-29 at 16:10 -0400, Jan Vesely wrote: >> On Mon, 2018-05-21 at 11:36 -0400, Jan Vesely wrote: >>> Same reason as amdgcn. >>> Fixes fmin, minmag CTS on turks. >>> Signed-off-by: Jan Vesely >>> --- >>> r600/lib/SOURCES | 1 + >>> r600/lib/math/fmin.cl | 30 ++++++++++++++++++++++++++++++ >>> 2 files changed, 31 insertions(+) >>> create mode 100644 r600/lib/math/fmin.cl >>> >>> diff --git a/r600/lib/SOURCES b/r600/lib/SOURCES >>> index 132b151..e69be4a 100644 >>> --- a/r600/lib/SOURCES >>> +++ b/r600/lib/SOURCES >>> @@ -1,4 +1,5 @@ >>> math/fmax.cl >>> +math/fmin.cl >>> synchronization/barrier_impl.ll >>> workitem/get_global_offset.cl >>> workitem/get_group_id.cl >>> diff --git a/r600/lib/math/fmin.cl b/r600/lib/math/fmin.cl >>> new file mode 100644 >>> index 0000000..09f1e4c >>> --- /dev/null >>> +++ b/r600/lib/math/fmin.cl >>> @@ -0,0 +1,30 @@ >>> +#include >>> + >>> +#include "../../../generic/lib/clcmacro.h" >>> +#include "../../../generic/lib/math/math.h" >>> + >>> +_CLC_DEF _CLC_OVERLOAD float fmin(float x, float y) >>> +{ >>> + /* fcanonicalize removes sNaNs and flushes denormals if not enabled. >>> + * Otherwise fmin instruction flushes the values for comparison, >>> + * but outputs original denormal */ >>> + x = __clc_flush_denormal_if_not_supported(x); >>> + y = __clc_flush_denormal_if_not_supported(y); >>> + return __builtin_fminf(x, y); >>> +} >>> +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, fmin, float, float) >>> + >>> +#ifdef cl_khr_fp64 >>> + >>> +#pragma OPENCL EXTENSION cl_khr_fp64 : enable >>> + >>> +_CLC_DEF _CLC_OVERLOAD double fmin(double x, double y) >>> +{ >>> + return __builtin_fmin(x, y); >>> +} >>> +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, fmin, double, double) >>> + >>> +#endif >>> + >>> +#define __CLC_BODY <../../../generic/lib/math/fmin.inc> >>> +#include >> >> ping. > > ping2. > LGTM for both. > Jan > >> >> Jan > > > > _______________________________________________ > Libclc-dev mailing list > Libclc-dev at lists.llvm.org > http://lists.llvm.org/cgi-bin/mailman/listinfo/libclc-dev > From libclc-dev at lists.llvm.org Wed Jun 20 13:34:57 2018 From: libclc-dev at lists.llvm.org (Jan Vesely via Libclc-dev) Date: Wed, 20 Jun 2018 16:34:57 -0400 Subject: [Libclc-dev] [PATCH 0/7] Atomics cleanup and volatile change Message-ID: <20180620203504.5123-1-jan.vesely@rutgers.edu> This series cleans up atomic declarations and implementation. It also switches 32bit atom_* functions to accept atomic pointers. The reasoning is included in 7/7 commit message. I know I've spoken against this change in the past, but all things considered I leaning towards this being an oversight in the specs. thanks, Jan Jan Vesely (7): atomic: Move define cleanup to shared include atomic: Cleanup atomic_cmpxchg header atom: Consolidate cl_khr_{local,global}_int32_{base,extended}_atomics declarations atom: Consolidate cl_khr_int64_{base,extended}_atomics declarations atomic: Provide function implementation of atomic_{dec,inc} atom: Consolidate cl_khr_{local,global}_int32_{base,extended}_atomics implementation atom: Use volatile pointers for cl_khr_{global,local}_int32_{base,extended}_atomics generic/include/clc/atom_decl_int32.inc | 10 ++++++++++ generic/include/clc/atom_decl_int64.inc | 11 +++++++++++ generic/include/clc/atomic/atomic_add.h | 3 --- generic/include/clc/atomic/atomic_and.h | 3 --- generic/include/clc/atomic/atomic_cmpxchg.h | 19 ++++--------------- generic/include/clc/atomic/atomic_dec.h | 5 ++++- generic/include/clc/atomic/atomic_decl.inc | 6 +++++- generic/include/clc/atomic/atomic_inc.h | 5 ++++- generic/include/clc/atomic/atomic_max.h | 3 --- generic/include/clc/atomic/atomic_min.h | 3 --- generic/include/clc/atomic/atomic_or.h | 3 --- generic/include/clc/atomic/atomic_sub.h | 3 --- generic/include/clc/atomic/atomic_xchg.h | 7 +++---- generic/include/clc/atomic/atomic_xor.h | 3 --- .../clc/cl_khr_global_int32_base_atomics/atom_add.h | 5 +++-- .../cl_khr_global_int32_base_atomics/atom_cmpxchg.h | 4 ++-- .../clc/cl_khr_global_int32_base_atomics/atom_dec.h | 4 ++-- .../clc/cl_khr_global_int32_base_atomics/atom_inc.h | 4 ++-- .../clc/cl_khr_global_int32_base_atomics/atom_sub.h | 5 +++-- .../clc/cl_khr_global_int32_base_atomics/atom_xchg.h | 5 +++-- .../cl_khr_global_int32_extended_atomics/atom_and.h | 5 +++-- .../cl_khr_global_int32_extended_atomics/atom_max.h | 5 +++-- .../cl_khr_global_int32_extended_atomics/atom_min.h | 5 +++-- .../cl_khr_global_int32_extended_atomics/atom_or.h | 5 +++-- .../cl_khr_global_int32_extended_atomics/atom_xor.h | 5 +++-- .../include/clc/cl_khr_int64_base_atomics/atom_add.h | 6 ++---- .../include/clc/cl_khr_int64_base_atomics/atom_sub.h | 6 ++---- .../include/clc/cl_khr_int64_base_atomics/atom_xchg.h | 6 ++---- .../clc/cl_khr_int64_extended_atomics/atom_and.h | 6 ++---- .../clc/cl_khr_int64_extended_atomics/atom_max.h | 6 ++---- .../clc/cl_khr_int64_extended_atomics/atom_min.h | 6 ++---- .../clc/cl_khr_int64_extended_atomics/atom_or.h | 6 ++---- .../clc/cl_khr_int64_extended_atomics/atom_xor.h | 6 ++---- .../clc/cl_khr_local_int32_base_atomics/atom_add.h | 5 +++-- .../cl_khr_local_int32_base_atomics/atom_cmpxchg.h | 4 ++-- .../clc/cl_khr_local_int32_base_atomics/atom_dec.h | 4 ++-- .../clc/cl_khr_local_int32_base_atomics/atom_inc.h | 4 ++-- .../clc/cl_khr_local_int32_base_atomics/atom_sub.h | 5 +++-- .../clc/cl_khr_local_int32_base_atomics/atom_xchg.h | 5 +++-- .../cl_khr_local_int32_extended_atomics/atom_and.h | 5 +++-- .../cl_khr_local_int32_extended_atomics/atom_max.h | 5 +++-- .../cl_khr_local_int32_extended_atomics/atom_min.h | 5 +++-- .../clc/cl_khr_local_int32_extended_atomics/atom_or.h | 5 +++-- .../cl_khr_local_int32_extended_atomics/atom_xor.h | 5 +++-- generic/lib/SOURCES | 2 ++ generic/lib/atom_int32_binary.inc | 14 ++++++++++++++ generic/lib/atomic/atomic_dec.cl | 12 ++++++++++++ generic/lib/atomic/atomic_inc.cl | 12 ++++++++++++ .../lib/cl_khr_global_int32_base_atomics/atom_add.cl | 12 +++--------- .../cl_khr_global_int32_base_atomics/atom_cmpxchg.cl | 2 +- .../lib/cl_khr_global_int32_base_atomics/atom_dec.cl | 4 ++-- .../lib/cl_khr_global_int32_base_atomics/atom_inc.cl | 4 ++-- .../lib/cl_khr_global_int32_base_atomics/atom_sub.cl | 12 +++--------- .../lib/cl_khr_global_int32_base_atomics/atom_xchg.cl | 12 +++--------- .../cl_khr_global_int32_extended_atomics/atom_and.cl | 12 +++--------- .../cl_khr_global_int32_extended_atomics/atom_max.cl | 12 +++--------- .../cl_khr_global_int32_extended_atomics/atom_min.cl | 12 +++--------- .../cl_khr_global_int32_extended_atomics/atom_or.cl | 12 +++--------- .../cl_khr_global_int32_extended_atomics/atom_xor.cl | 12 +++--------- .../lib/cl_khr_local_int32_base_atomics/atom_add.cl | 12 +++--------- .../cl_khr_local_int32_base_atomics/atom_cmpxchg.cl | 2 +- .../lib/cl_khr_local_int32_base_atomics/atom_dec.cl | 4 ++-- .../lib/cl_khr_local_int32_base_atomics/atom_inc.cl | 4 ++-- .../lib/cl_khr_local_int32_base_atomics/atom_sub.cl | 12 +++--------- .../lib/cl_khr_local_int32_base_atomics/atom_xchg.cl | 12 +++--------- .../cl_khr_local_int32_extended_atomics/atom_and.cl | 12 +++--------- .../cl_khr_local_int32_extended_atomics/atom_max.cl | 12 +++--------- .../cl_khr_local_int32_extended_atomics/atom_min.cl | 12 +++--------- .../cl_khr_local_int32_extended_atomics/atom_or.cl | 12 +++--------- .../cl_khr_local_int32_extended_atomics/atom_xor.cl | 12 +++--------- 70 files changed, 215 insertions(+), 273 deletions(-) create mode 100644 generic/include/clc/atom_decl_int32.inc create mode 100644 generic/include/clc/atom_decl_int64.inc create mode 100644 generic/lib/atom_int32_binary.inc create mode 100644 generic/lib/atomic/atomic_dec.cl create mode 100644 generic/lib/atomic/atomic_inc.cl -- 2.16.4 From libclc-dev at lists.llvm.org Wed Jun 20 13:34:58 2018 From: libclc-dev at lists.llvm.org (Jan Vesely via Libclc-dev) Date: Wed, 20 Jun 2018 16:34:58 -0400 Subject: [Libclc-dev] [PATCH 1/7] atomic: Move define cleanup to shared include In-Reply-To: <20180620203504.5123-1-jan.vesely@rutgers.edu> References: <20180620203504.5123-1-jan.vesely@rutgers.edu> Message-ID: <20180620203504.5123-2-jan.vesely@rutgers.edu> Signed-off-by: Jan Vesely --- generic/include/clc/atomic/atomic_add.h | 3 --- generic/include/clc/atomic/atomic_and.h | 3 --- generic/include/clc/atomic/atomic_decl.inc | 6 +++++- generic/include/clc/atomic/atomic_max.h | 3 --- generic/include/clc/atomic/atomic_min.h | 3 --- generic/include/clc/atomic/atomic_or.h | 3 --- generic/include/clc/atomic/atomic_sub.h | 3 --- generic/include/clc/atomic/atomic_xchg.h | 7 +++---- generic/include/clc/atomic/atomic_xor.h | 3 --- 9 files changed, 8 insertions(+), 26 deletions(-) diff --git a/generic/include/clc/atomic/atomic_add.h b/generic/include/clc/atomic/atomic_add.h index 7dd4fd3..302e4fb 100644 --- a/generic/include/clc/atomic/atomic_add.h +++ b/generic/include/clc/atomic/atomic_add.h @@ -1,5 +1,2 @@ #define __CLC_FUNCTION atomic_add #include -#undef __CLC_FUNCTION -#undef __CLC_DECLARE_ATOMIC -#undef __CLC_DECLARE_ATOMIC_ADDRSPACE diff --git a/generic/include/clc/atomic/atomic_and.h b/generic/include/clc/atomic/atomic_and.h index a198c46..4759b9f 100644 --- a/generic/include/clc/atomic/atomic_and.h +++ b/generic/include/clc/atomic/atomic_and.h @@ -1,5 +1,2 @@ #define __CLC_FUNCTION atomic_and #include -#undef __CLC_FUNCTION -#undef __CLC_DECLARE_ATOMIC -#undef __CLC_DECLARE_ATOMIC_ADDRSPACE diff --git a/generic/include/clc/atomic/atomic_decl.inc b/generic/include/clc/atomic/atomic_decl.inc index 49ccde2..4d81ac1 100644 --- a/generic/include/clc/atomic/atomic_decl.inc +++ b/generic/include/clc/atomic/atomic_decl.inc @@ -1,4 +1,3 @@ - #define __CLC_DECLARE_ATOMIC(ADDRSPACE, TYPE) \ _CLC_OVERLOAD _CLC_DECL TYPE __CLC_FUNCTION (volatile ADDRSPACE TYPE *, TYPE); @@ -8,3 +7,8 @@ __CLC_DECLARE_ATOMIC_ADDRSPACE(int) __CLC_DECLARE_ATOMIC_ADDRSPACE(uint) + +#undef __CLC_DECLARE_ATOMIC_ADDRSPACE +#undef __CLC_DECLARE_ATOMIC + +#undef __CLC_FUNCTION diff --git a/generic/include/clc/atomic/atomic_max.h b/generic/include/clc/atomic/atomic_max.h index ed09ec9..f6803c4 100644 --- a/generic/include/clc/atomic/atomic_max.h +++ b/generic/include/clc/atomic/atomic_max.h @@ -1,5 +1,2 @@ #define __CLC_FUNCTION atomic_max #include -#undef __CLC_FUNCTION -#undef __CLC_DECLARE_ATOMIC -#undef __CLC_DECLARE_ATOMIC_ADDRSPACE diff --git a/generic/include/clc/atomic/atomic_min.h b/generic/include/clc/atomic/atomic_min.h index 6a46af4..dfa8179 100644 --- a/generic/include/clc/atomic/atomic_min.h +++ b/generic/include/clc/atomic/atomic_min.h @@ -1,5 +1,2 @@ #define __CLC_FUNCTION atomic_min #include -#undef __CLC_FUNCTION -#undef __CLC_DECLARE_ATOMIC -#undef __CLC_DECLARE_ATOMIC_ADDRSPACE diff --git a/generic/include/clc/atomic/atomic_or.h b/generic/include/clc/atomic/atomic_or.h index 2369d81..64ce311 100644 --- a/generic/include/clc/atomic/atomic_or.h +++ b/generic/include/clc/atomic/atomic_or.h @@ -1,5 +1,2 @@ #define __CLC_FUNCTION atomic_or #include -#undef __CLC_FUNCTION -#undef __CLC_DECLARE_ATOMIC -#undef __CLC_DECLARE_ATOMIC_ADDRSPACE diff --git a/generic/include/clc/atomic/atomic_sub.h b/generic/include/clc/atomic/atomic_sub.h index 993e995..0d2f99e 100644 --- a/generic/include/clc/atomic/atomic_sub.h +++ b/generic/include/clc/atomic/atomic_sub.h @@ -1,5 +1,2 @@ #define __CLC_FUNCTION atomic_sub #include -#undef __CLC_FUNCTION -#undef __CLC_DECLARE_ATOMIC -#undef __CLC_DECLARE_ATOMIC_ADDRSPACE diff --git a/generic/include/clc/atomic/atomic_xchg.h b/generic/include/clc/atomic/atomic_xchg.h index ebe0d9a..59a9572 100644 --- a/generic/include/clc/atomic/atomic_xchg.h +++ b/generic/include/clc/atomic/atomic_xchg.h @@ -1,6 +1,5 @@ #define __CLC_FUNCTION atomic_xchg + +_CLC_OVERLOAD _CLC_DECL float __CLC_FUNCTION (volatile local float *, float); +_CLC_OVERLOAD _CLC_DECL float __CLC_FUNCTION (volatile global float *, float); #include -__CLC_DECLARE_ATOMIC_ADDRSPACE(float); -#undef __CLC_FUNCTION -#undef __CLC_DECLARE_ATOMIC -#undef __CLC_DECLARE_ATOMIC_ADDRSPACE diff --git a/generic/include/clc/atomic/atomic_xor.h b/generic/include/clc/atomic/atomic_xor.h index 2cb7480..1318bf2 100644 --- a/generic/include/clc/atomic/atomic_xor.h +++ b/generic/include/clc/atomic/atomic_xor.h @@ -1,5 +1,2 @@ #define __CLC_FUNCTION atomic_xor #include -#undef __CLC_FUNCTION -#undef __CLC_DECLARE_ATOMIC -#undef __CLC_DECLARE_ATOMIC_ADDRSPACE -- 2.16.4 From libclc-dev at lists.llvm.org Wed Jun 20 13:34:59 2018 From: libclc-dev at lists.llvm.org (Jan Vesely via Libclc-dev) Date: Wed, 20 Jun 2018 16:34:59 -0400 Subject: [Libclc-dev] [PATCH 2/7] atomic: Cleanup atomic_cmpxchg header In-Reply-To: <20180620203504.5123-1-jan.vesely@rutgers.edu> References: <20180620203504.5123-1-jan.vesely@rutgers.edu> Message-ID: <20180620203504.5123-3-jan.vesely@rutgers.edu> It's easier to just list the four function declarations Signed-off-by: Jan Vesely --- generic/include/clc/atomic/atomic_cmpxchg.h | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/generic/include/clc/atomic/atomic_cmpxchg.h b/generic/include/clc/atomic/atomic_cmpxchg.h index 2e4f1c2..15bfdab 100644 --- a/generic/include/clc/atomic/atomic_cmpxchg.h +++ b/generic/include/clc/atomic/atomic_cmpxchg.h @@ -1,15 +1,4 @@ -#define __CLC_FUNCTION atomic_cmpxchg - -#define __CLC_DECLARE_ATOMIC_3_ARG(ADDRSPACE, TYPE) \ - _CLC_OVERLOAD _CLC_DECL TYPE __CLC_FUNCTION (volatile ADDRSPACE TYPE *, TYPE, TYPE); - -#define __CLC_DECLARE_ATOMIC_ADDRSPACE_3_ARG(TYPE) \ - __CLC_DECLARE_ATOMIC_3_ARG(global, TYPE) \ - __CLC_DECLARE_ATOMIC_3_ARG(local, TYPE) - -__CLC_DECLARE_ATOMIC_ADDRSPACE_3_ARG(int) -__CLC_DECLARE_ATOMIC_ADDRSPACE_3_ARG(uint) - -#undef __CLC_FUNCTION -#undef __CLC_DECLARE_ATOMIC_3_ARG -#undef __CLC_DECLARE_ATOMIC_ADDRESS_SPACE_3_ARG +_CLC_OVERLOAD _CLC_DECL int atomic_cmpxchg (volatile local int *, int, int); +_CLC_OVERLOAD _CLC_DECL int atomic_cmpxchg (volatile global int *, int, int); +_CLC_OVERLOAD _CLC_DECL uint atomic_cmpxchg (volatile local uint *, uint, uint); +_CLC_OVERLOAD _CLC_DECL uint atomic_cmpxchg (volatile global uint *, uint, uint); -- 2.16.4 From libclc-dev at lists.llvm.org Wed Jun 20 13:35:00 2018 From: libclc-dev at lists.llvm.org (Jan Vesely via Libclc-dev) Date: Wed, 20 Jun 2018 16:35:00 -0400 Subject: [Libclc-dev] [PATCH 3/7] atom: Consolidate cl_khr_{local, global}_int32_{base, extended}_atomics declarations In-Reply-To: <20180620203504.5123-1-jan.vesely@rutgers.edu> References: <20180620203504.5123-1-jan.vesely@rutgers.edu> Message-ID: <20180620203504.5123-4-jan.vesely@rutgers.edu> Signed-off-by: Jan Vesely --- generic/include/clc/atom_decl_int32.inc | 10 ++++++++++ .../include/clc/cl_khr_global_int32_base_atomics/atom_add.h | 5 +++-- .../include/clc/cl_khr_global_int32_base_atomics/atom_sub.h | 5 +++-- .../include/clc/cl_khr_global_int32_base_atomics/atom_xchg.h | 5 +++-- .../clc/cl_khr_global_int32_extended_atomics/atom_and.h | 5 +++-- .../clc/cl_khr_global_int32_extended_atomics/atom_max.h | 5 +++-- .../clc/cl_khr_global_int32_extended_atomics/atom_min.h | 5 +++-- .../include/clc/cl_khr_global_int32_extended_atomics/atom_or.h | 5 +++-- .../clc/cl_khr_global_int32_extended_atomics/atom_xor.h | 5 +++-- generic/include/clc/cl_khr_local_int32_base_atomics/atom_add.h | 5 +++-- generic/include/clc/cl_khr_local_int32_base_atomics/atom_sub.h | 5 +++-- .../include/clc/cl_khr_local_int32_base_atomics/atom_xchg.h | 5 +++-- .../include/clc/cl_khr_local_int32_extended_atomics/atom_and.h | 5 +++-- .../include/clc/cl_khr_local_int32_extended_atomics/atom_max.h | 5 +++-- .../include/clc/cl_khr_local_int32_extended_atomics/atom_min.h | 5 +++-- .../include/clc/cl_khr_local_int32_extended_atomics/atom_or.h | 5 +++-- .../include/clc/cl_khr_local_int32_extended_atomics/atom_xor.h | 5 +++-- 17 files changed, 58 insertions(+), 32 deletions(-) create mode 100644 generic/include/clc/atom_decl_int32.inc diff --git a/generic/include/clc/atom_decl_int32.inc b/generic/include/clc/atom_decl_int32.inc new file mode 100644 index 0000000..dc76356 --- /dev/null +++ b/generic/include/clc/atom_decl_int32.inc @@ -0,0 +1,10 @@ +#define __CLC_DECLARE_ATOM(ADDRSPACE, TYPE) \ + _CLC_OVERLOAD _CLC_DECL TYPE __CLC_FUNCTION (ADDRSPACE TYPE *, TYPE); + +__CLC_DECLARE_ATOM(__CLC_ADDRESS_SPACE, int) +__CLC_DECLARE_ATOM(__CLC_ADDRESS_SPACE, uint) + +#undef __CLC_DECLARE_ATOM + +#undef __CLC_FUNCTION +#undef __CLC_ADDRESS_SPACE diff --git a/generic/include/clc/cl_khr_global_int32_base_atomics/atom_add.h b/generic/include/clc/cl_khr_global_int32_base_atomics/atom_add.h index 9740b3d..244e2b2 100644 --- a/generic/include/clc/cl_khr_global_int32_base_atomics/atom_add.h +++ b/generic/include/clc/cl_khr_global_int32_base_atomics/atom_add.h @@ -1,2 +1,3 @@ -_CLC_OVERLOAD _CLC_DECL int atom_add(global int *p, int val); -_CLC_OVERLOAD _CLC_DECL unsigned int atom_add(global unsigned int *p, unsigned int val); +#define __CLC_FUNCTION atom_add +#define __CLC_ADDRESS_SPACE global +#include diff --git a/generic/include/clc/cl_khr_global_int32_base_atomics/atom_sub.h b/generic/include/clc/cl_khr_global_int32_base_atomics/atom_sub.h index c435c72..311ffb4 100644 --- a/generic/include/clc/cl_khr_global_int32_base_atomics/atom_sub.h +++ b/generic/include/clc/cl_khr_global_int32_base_atomics/atom_sub.h @@ -1,2 +1,3 @@ -_CLC_OVERLOAD _CLC_DECL int atom_sub(global int *p, int val); -_CLC_OVERLOAD _CLC_DECL unsigned int atom_sub(global unsigned int *p, unsigned int val); +#define __CLC_FUNCTION atom_sub +#define __CLC_ADDRESS_SPACE global +#include diff --git a/generic/include/clc/cl_khr_global_int32_base_atomics/atom_xchg.h b/generic/include/clc/cl_khr_global_int32_base_atomics/atom_xchg.h index 6a18e9e..c09eb49 100644 --- a/generic/include/clc/cl_khr_global_int32_base_atomics/atom_xchg.h +++ b/generic/include/clc/cl_khr_global_int32_base_atomics/atom_xchg.h @@ -1,2 +1,3 @@ -_CLC_OVERLOAD _CLC_DECL int atom_xchg(global int *p, int val); -_CLC_OVERLOAD _CLC_DECL unsigned int atom_xchg(global unsigned int *p, unsigned int val); +#define __CLC_FUNCTION atom_xchg +#define __CLC_ADDRESS_SPACE global +#include diff --git a/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_and.h b/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_and.h index 19df7d6..4a9dd46 100644 --- a/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_and.h +++ b/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_and.h @@ -1,2 +1,3 @@ -_CLC_OVERLOAD _CLC_DECL int atom_and(global int *p, int val); -_CLC_OVERLOAD _CLC_DECL unsigned int atom_and(global unsigned int *p, unsigned int val); +#define __CLC_FUNCTION atom_and +#define __CLC_ADDRESS_SPACE global +#include diff --git a/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_max.h b/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_max.h index b46ce29..e57b4d7 100644 --- a/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_max.h +++ b/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_max.h @@ -1,2 +1,3 @@ -_CLC_OVERLOAD _CLC_DECL int atom_max(global int *p, int val); -_CLC_OVERLOAD _CLC_DECL unsigned int atom_max(global unsigned int *p, unsigned int val); +#define __CLC_FUNCTION atom_max +#define __CLC_ADDRESS_SPACE global +#include diff --git a/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_min.h b/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_min.h index 0e458eb..bd62f5d 100644 --- a/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_min.h +++ b/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_min.h @@ -1,2 +1,3 @@ -_CLC_OVERLOAD _CLC_DECL int atom_min(global int *p, int val); -_CLC_OVERLOAD _CLC_DECL unsigned int atom_min(global unsigned int *p, unsigned int val); +#define __CLC_FUNCTION atom_min +#define __CLC_ADDRESS_SPACE global +#include diff --git a/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_or.h b/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_or.h index 91cde56..1a04055 100644 --- a/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_or.h +++ b/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_or.h @@ -1,2 +1,3 @@ -_CLC_OVERLOAD _CLC_DECL int atom_or(global int *p, int val); -_CLC_OVERLOAD _CLC_DECL unsigned int atom_or(global unsigned int *p, unsigned int val); +#define __CLC_FUNCTION atom_or +#define __CLC_ADDRESS_SPACE global +#include diff --git a/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_xor.h b/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_xor.h index f787849..217c04b 100644 --- a/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_xor.h +++ b/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_xor.h @@ -1,2 +1,3 @@ -_CLC_OVERLOAD _CLC_DECL int atom_xor(global int *p, int val); -_CLC_OVERLOAD _CLC_DECL unsigned int atom_xor(global unsigned int *p, unsigned int val); +#define __CLC_FUNCTION atom_xor +#define __CLC_ADDRESS_SPACE global +#include diff --git a/generic/include/clc/cl_khr_local_int32_base_atomics/atom_add.h b/generic/include/clc/cl_khr_local_int32_base_atomics/atom_add.h index 096d011..e60ffe8 100644 --- a/generic/include/clc/cl_khr_local_int32_base_atomics/atom_add.h +++ b/generic/include/clc/cl_khr_local_int32_base_atomics/atom_add.h @@ -1,2 +1,3 @@ -_CLC_OVERLOAD _CLC_DECL int atom_add(local int *p, int val); -_CLC_OVERLOAD _CLC_DECL unsigned int atom_add(local unsigned int *p, unsigned int val); +#define __CLC_FUNCTION atom_add +#define __CLC_ADDRESS_SPACE local +#include diff --git a/generic/include/clc/cl_khr_local_int32_base_atomics/atom_sub.h b/generic/include/clc/cl_khr_local_int32_base_atomics/atom_sub.h index 6363780..9735071 100644 --- a/generic/include/clc/cl_khr_local_int32_base_atomics/atom_sub.h +++ b/generic/include/clc/cl_khr_local_int32_base_atomics/atom_sub.h @@ -1,2 +1,3 @@ -_CLC_OVERLOAD _CLC_DECL int atom_sub(local int *p, int val); -_CLC_OVERLOAD _CLC_DECL unsigned int atom_sub(local unsigned int *p, unsigned int val); +#define __CLC_FUNCTION atom_sub +#define __CLC_ADDRESS_SPACE local +#include diff --git a/generic/include/clc/cl_khr_local_int32_base_atomics/atom_xchg.h b/generic/include/clc/cl_khr_local_int32_base_atomics/atom_xchg.h index c5a1f09..16aeafa 100644 --- a/generic/include/clc/cl_khr_local_int32_base_atomics/atom_xchg.h +++ b/generic/include/clc/cl_khr_local_int32_base_atomics/atom_xchg.h @@ -1,2 +1,3 @@ -_CLC_OVERLOAD _CLC_DECL int atom_xchg(local int *p, int val); -_CLC_OVERLOAD _CLC_DECL unsigned int atom_xchg(local unsigned int *p, unsigned int val); +#define __CLC_FUNCTION atom_xchg +#define __CLC_ADDRESS_SPACE local +#include diff --git a/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_and.h b/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_and.h index 96d7b1a..710e202 100644 --- a/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_and.h +++ b/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_and.h @@ -1,2 +1,3 @@ -_CLC_OVERLOAD _CLC_DECL int atom_and(local int *p, int val); -_CLC_OVERLOAD _CLC_DECL unsigned int atom_and(local unsigned int *p, unsigned int val); +#define __CLC_FUNCTION atom_and +#define __CLC_ADDRESS_SPACE local +#include diff --git a/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_max.h b/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_max.h index 7d6b17d..f675e28 100644 --- a/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_max.h +++ b/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_max.h @@ -1,2 +1,3 @@ -_CLC_OVERLOAD _CLC_DECL int atom_max(local int *p, int val); -_CLC_OVERLOAD _CLC_DECL unsigned int atom_max(local unsigned int *p, unsigned int val); +#define __CLC_FUNCTION atom_max +#define __CLC_ADDRESS_SPACE local +#include diff --git a/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_min.h b/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_min.h index ddb6cf3..d2476c1 100644 --- a/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_min.h +++ b/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_min.h @@ -1,2 +1,3 @@ -_CLC_OVERLOAD _CLC_DECL int atom_min(local int *p, int val); -_CLC_OVERLOAD _CLC_DECL unsigned int atom_min(local unsigned int *p, unsigned int val); +#define __CLC_FUNCTION atom_min +#define __CLC_ADDRESS_SPACE local +#include diff --git a/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_or.h b/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_or.h index 518c256..1532f61 100644 --- a/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_or.h +++ b/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_or.h @@ -1,2 +1,3 @@ -_CLC_OVERLOAD _CLC_DECL int atom_or(local int *p, int val); -_CLC_OVERLOAD _CLC_DECL unsigned int atom_or(local unsigned int *p, unsigned int val); +#define __CLC_FUNCTION atom_or +#define __CLC_ADDRESS_SPACE local +#include diff --git a/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_xor.h b/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_xor.h index e6c9f2f..808a147 100644 --- a/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_xor.h +++ b/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_xor.h @@ -1,2 +1,3 @@ -_CLC_OVERLOAD _CLC_DECL int atom_xor(local int *p, int val); -_CLC_OVERLOAD _CLC_DECL unsigned int atom_xor(local unsigned int *p, unsigned int val); +#define __CLC_FUNCTION atom_xor +#define __CLC_ADDRESS_SPACE local +#include -- 2.16.4 From libclc-dev at lists.llvm.org Wed Jun 20 13:35:01 2018 From: libclc-dev at lists.llvm.org (Jan Vesely via Libclc-dev) Date: Wed, 20 Jun 2018 16:35:01 -0400 Subject: [Libclc-dev] [PATCH 4/7] atom: Consolidate cl_khr_int64_{base, extended}_atomics declarations In-Reply-To: <20180620203504.5123-1-jan.vesely@rutgers.edu> References: <20180620203504.5123-1-jan.vesely@rutgers.edu> Message-ID: <20180620203504.5123-5-jan.vesely@rutgers.edu> Signed-off-by: Jan Vesely --- generic/include/clc/atom_decl_int64.inc | 11 +++++++++++ generic/include/clc/cl_khr_int64_base_atomics/atom_add.h | 6 ++---- generic/include/clc/cl_khr_int64_base_atomics/atom_sub.h | 6 ++---- generic/include/clc/cl_khr_int64_base_atomics/atom_xchg.h | 6 ++---- generic/include/clc/cl_khr_int64_extended_atomics/atom_and.h | 6 ++---- generic/include/clc/cl_khr_int64_extended_atomics/atom_max.h | 6 ++---- generic/include/clc/cl_khr_int64_extended_atomics/atom_min.h | 6 ++---- generic/include/clc/cl_khr_int64_extended_atomics/atom_or.h | 6 ++---- generic/include/clc/cl_khr_int64_extended_atomics/atom_xor.h | 6 ++---- 9 files changed, 27 insertions(+), 32 deletions(-) create mode 100644 generic/include/clc/atom_decl_int64.inc diff --git a/generic/include/clc/atom_decl_int64.inc b/generic/include/clc/atom_decl_int64.inc new file mode 100644 index 0000000..1e0cc32 --- /dev/null +++ b/generic/include/clc/atom_decl_int64.inc @@ -0,0 +1,11 @@ +#define __CLC_DECLARE_ATOM(ADDRSPACE, TYPE) \ + _CLC_OVERLOAD _CLC_DECL TYPE __CLC_FUNCTION (volatile ADDRSPACE TYPE *, TYPE); + +__CLC_DECLARE_ATOM(local, long) +__CLC_DECLARE_ATOM(local, ulong) +__CLC_DECLARE_ATOM(global, long) +__CLC_DECLARE_ATOM(global, ulong) + +#undef __CLC_DECLARE_ATOM + +#undef __CLC_FUNCTION diff --git a/generic/include/clc/cl_khr_int64_base_atomics/atom_add.h b/generic/include/clc/cl_khr_int64_base_atomics/atom_add.h index 5addc13..6f6adad 100644 --- a/generic/include/clc/cl_khr_int64_base_atomics/atom_add.h +++ b/generic/include/clc/cl_khr_int64_base_atomics/atom_add.h @@ -1,4 +1,2 @@ -_CLC_OVERLOAD _CLC_DECL long atom_add(volatile global long *p, long val); -_CLC_OVERLOAD _CLC_DECL unsigned long atom_add(volatile global unsigned long *p, unsigned long val); -_CLC_OVERLOAD _CLC_DECL long atom_add(volatile local long *p, long val); -_CLC_OVERLOAD _CLC_DECL unsigned long atom_add(volatile local unsigned long *p, unsigned long val); +#define __CLC_FUNCTION atom_add +#include diff --git a/generic/include/clc/cl_khr_int64_base_atomics/atom_sub.h b/generic/include/clc/cl_khr_int64_base_atomics/atom_sub.h index 2186ec9..e0431dd 100644 --- a/generic/include/clc/cl_khr_int64_base_atomics/atom_sub.h +++ b/generic/include/clc/cl_khr_int64_base_atomics/atom_sub.h @@ -1,4 +1,2 @@ -_CLC_OVERLOAD _CLC_DECL long atom_sub(volatile global long *p, long val); -_CLC_OVERLOAD _CLC_DECL unsigned long atom_sub(volatile global unsigned long *p, unsigned long val); -_CLC_OVERLOAD _CLC_DECL long atom_sub(volatile local long *p, long val); -_CLC_OVERLOAD _CLC_DECL unsigned long atom_sub(volatile local unsigned long *p, unsigned long val); +#define __CLC_FUNCTION atom_sub +#include diff --git a/generic/include/clc/cl_khr_int64_base_atomics/atom_xchg.h b/generic/include/clc/cl_khr_int64_base_atomics/atom_xchg.h index 3627af1..cac9688 100644 --- a/generic/include/clc/cl_khr_int64_base_atomics/atom_xchg.h +++ b/generic/include/clc/cl_khr_int64_base_atomics/atom_xchg.h @@ -1,4 +1,2 @@ -_CLC_OVERLOAD _CLC_DECL long atom_xchg(volatile global long *p, long val); -_CLC_OVERLOAD _CLC_DECL unsigned long atom_xchg(volatile global unsigned long *p, unsigned long val); -_CLC_OVERLOAD _CLC_DECL long atom_xchg(volatile local long *p, long val); -_CLC_OVERLOAD _CLC_DECL unsigned long atom_xchg(volatile local unsigned long *p, unsigned long val); +#define __CLC_FUNCTION atom_xchg +#include diff --git a/generic/include/clc/cl_khr_int64_extended_atomics/atom_and.h b/generic/include/clc/cl_khr_int64_extended_atomics/atom_and.h index 388df1d..b8e6104 100644 --- a/generic/include/clc/cl_khr_int64_extended_atomics/atom_and.h +++ b/generic/include/clc/cl_khr_int64_extended_atomics/atom_and.h @@ -1,4 +1,2 @@ -_CLC_OVERLOAD _CLC_DECL long atom_and(volatile global long *p, long val); -_CLC_OVERLOAD _CLC_DECL unsigned long atom_and(volatile global unsigned long *p, unsigned long val); -_CLC_OVERLOAD _CLC_DECL long atom_and(volatile local long *p, long val); -_CLC_OVERLOAD _CLC_DECL unsigned long atom_and(volatile local unsigned long *p, unsigned long val); +#define __CLC_FUNCTION atom_and +#include diff --git a/generic/include/clc/cl_khr_int64_extended_atomics/atom_max.h b/generic/include/clc/cl_khr_int64_extended_atomics/atom_max.h index b84b5a0..b9cff19 100644 --- a/generic/include/clc/cl_khr_int64_extended_atomics/atom_max.h +++ b/generic/include/clc/cl_khr_int64_extended_atomics/atom_max.h @@ -1,4 +1,2 @@ -_CLC_OVERLOAD _CLC_DECL long atom_max(volatile global long *p, long val); -_CLC_OVERLOAD _CLC_DECL unsigned long atom_max(volatile global unsigned long *p, unsigned long val); -_CLC_OVERLOAD _CLC_DECL long atom_max(volatile local long *p, long val); -_CLC_OVERLOAD _CLC_DECL unsigned long atom_max(volatile local unsigned long *p, unsigned long val); +#define __CLC_FUNCTION atom_max +#include diff --git a/generic/include/clc/cl_khr_int64_extended_atomics/atom_min.h b/generic/include/clc/cl_khr_int64_extended_atomics/atom_min.h index bd70b0b..0a19d21 100644 --- a/generic/include/clc/cl_khr_int64_extended_atomics/atom_min.h +++ b/generic/include/clc/cl_khr_int64_extended_atomics/atom_min.h @@ -1,4 +1,2 @@ -_CLC_OVERLOAD _CLC_DECL long atom_min(volatile global long *p, long val); -_CLC_OVERLOAD _CLC_DECL unsigned long atom_min(volatile global unsigned long *p, unsigned long val); -_CLC_OVERLOAD _CLC_DECL long atom_min(volatile local long *p, long val); -_CLC_OVERLOAD _CLC_DECL unsigned long atom_min(volatile local unsigned long *p, unsigned long val); +#define __CLC_FUNCTION atom_min +#include diff --git a/generic/include/clc/cl_khr_int64_extended_atomics/atom_or.h b/generic/include/clc/cl_khr_int64_extended_atomics/atom_or.h index e307822..f4baa36 100644 --- a/generic/include/clc/cl_khr_int64_extended_atomics/atom_or.h +++ b/generic/include/clc/cl_khr_int64_extended_atomics/atom_or.h @@ -1,4 +1,2 @@ -_CLC_OVERLOAD _CLC_DECL long atom_or(volatile global long *p, long val); -_CLC_OVERLOAD _CLC_DECL unsigned long atom_or(volatile global unsigned long *p, unsigned long val); -_CLC_OVERLOAD _CLC_DECL long atom_or(volatile local long *p, long val); -_CLC_OVERLOAD _CLC_DECL unsigned long atom_or(volatile local unsigned long *p, unsigned long val); +#define __CLC_FUNCTION atom_or +#include diff --git a/generic/include/clc/cl_khr_int64_extended_atomics/atom_xor.h b/generic/include/clc/cl_khr_int64_extended_atomics/atom_xor.h index 54eb492..d64a342 100644 --- a/generic/include/clc/cl_khr_int64_extended_atomics/atom_xor.h +++ b/generic/include/clc/cl_khr_int64_extended_atomics/atom_xor.h @@ -1,4 +1,2 @@ -_CLC_OVERLOAD _CLC_DECL long atom_xor(volatile global long *p, long val); -_CLC_OVERLOAD _CLC_DECL unsigned long atom_xor(volatile global unsigned long *p, unsigned long val); -_CLC_OVERLOAD _CLC_DECL long atom_xor(volatile local long *p, long val); -_CLC_OVERLOAD _CLC_DECL unsigned long atom_xor(volatile local unsigned long *p, unsigned long val); +#define __CLC_FUNCTION atom_xor +#include -- 2.16.4 From libclc-dev at lists.llvm.org Wed Jun 20 13:35:02 2018 From: libclc-dev at lists.llvm.org (Jan Vesely via Libclc-dev) Date: Wed, 20 Jun 2018 16:35:02 -0400 Subject: [Libclc-dev] [PATCH 5/7] atomic: Provide function implementation of atomic_{dec, inc} In-Reply-To: <20180620203504.5123-1-jan.vesely@rutgers.edu> References: <20180620203504.5123-1-jan.vesely@rutgers.edu> Message-ID: <20180620203504.5123-6-jan.vesely@rutgers.edu> Signed-off-by: Jan Vesely --- generic/include/clc/atomic/atomic_dec.h | 5 ++++- generic/include/clc/atomic/atomic_inc.h | 5 ++++- generic/lib/SOURCES | 2 ++ generic/lib/atomic/atomic_dec.cl | 12 ++++++++++++ generic/lib/atomic/atomic_inc.cl | 12 ++++++++++++ 5 files changed, 34 insertions(+), 2 deletions(-) create mode 100644 generic/lib/atomic/atomic_dec.cl create mode 100644 generic/lib/atomic/atomic_inc.cl diff --git a/generic/include/clc/atomic/atomic_dec.h b/generic/include/clc/atomic/atomic_dec.h index 15d0588..a2d82bb 100644 --- a/generic/include/clc/atomic/atomic_dec.h +++ b/generic/include/clc/atomic/atomic_dec.h @@ -1 +1,4 @@ -#define atomic_dec(p) atomic_sub(p, 1) +_CLC_OVERLOAD _CLC_DECL int atomic_dec (volatile local int *); +_CLC_OVERLOAD _CLC_DECL int atomic_dec (volatile global int *); +_CLC_OVERLOAD _CLC_DECL uint atomic_dec (volatile local uint *); +_CLC_OVERLOAD _CLC_DECL uint atomic_dec (volatile global uint *); diff --git a/generic/include/clc/atomic/atomic_inc.h b/generic/include/clc/atomic/atomic_inc.h index d8bc342..950a1e1 100644 --- a/generic/include/clc/atomic/atomic_inc.h +++ b/generic/include/clc/atomic/atomic_inc.h @@ -1 +1,4 @@ -#define atomic_inc(p) atomic_add(p, 1) +_CLC_OVERLOAD _CLC_DECL int atomic_inc (volatile local int *); +_CLC_OVERLOAD _CLC_DECL int atomic_inc (volatile global int *); +_CLC_OVERLOAD _CLC_DECL uint atomic_inc (volatile local uint *); +_CLC_OVERLOAD _CLC_DECL uint atomic_inc (volatile global uint *); diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES index 565750e..788aee6 100644 --- a/generic/lib/SOURCES +++ b/generic/lib/SOURCES @@ -7,6 +7,8 @@ async/wait_group_events.cl atomic/atomic_add.cl atomic/atomic_and.cl atomic/atomic_cmpxchg.cl +atomic/atomic_dec.cl +atomic/atomic_inc.cl atomic/atomic_max.cl atomic/atomic_min.cl atomic/atomic_or.cl diff --git a/generic/lib/atomic/atomic_dec.cl b/generic/lib/atomic/atomic_dec.cl new file mode 100644 index 0000000..829aff4 --- /dev/null +++ b/generic/lib/atomic/atomic_dec.cl @@ -0,0 +1,12 @@ +#include + +#define IMPL(TYPE, AS) \ +_CLC_OVERLOAD _CLC_DEF TYPE atomic_dec(volatile AS TYPE *p) { \ + return __sync_fetch_and_sub(p, (TYPE)1); \ +} + +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) +#undef IMPL diff --git a/generic/lib/atomic/atomic_inc.cl b/generic/lib/atomic/atomic_inc.cl new file mode 100644 index 0000000..67a7e8d --- /dev/null +++ b/generic/lib/atomic/atomic_inc.cl @@ -0,0 +1,12 @@ +#include + +#define IMPL(TYPE, AS) \ +_CLC_OVERLOAD _CLC_DEF TYPE atomic_inc(volatile AS TYPE *p) { \ + return __sync_fetch_and_add(p, (TYPE)1); \ +} + +IMPL(int, global) +IMPL(unsigned int, global) +IMPL(int, local) +IMPL(unsigned int, local) +#undef IMPL -- 2.16.4 From libclc-dev at lists.llvm.org Wed Jun 20 13:35:03 2018 From: libclc-dev at lists.llvm.org (Jan Vesely via Libclc-dev) Date: Wed, 20 Jun 2018 16:35:03 -0400 Subject: [Libclc-dev] [PATCH 6/7] atom: Consolidate cl_khr_{local, global}_int32_{base, extended}_atomics implementation In-Reply-To: <20180620203504.5123-1-jan.vesely@rutgers.edu> References: <20180620203504.5123-1-jan.vesely@rutgers.edu> Message-ID: <20180620203504.5123-7-jan.vesely@rutgers.edu> These are just atomic_* wrappers. Switch inc,dec to use atomic_* wrappers as well. Signed-off-by: Jan Vesely --- generic/lib/atom_int32_binary.inc | 14 ++++++++++++++ generic/lib/cl_khr_global_int32_base_atomics/atom_add.cl | 12 +++--------- generic/lib/cl_khr_global_int32_base_atomics/atom_dec.cl | 2 +- generic/lib/cl_khr_global_int32_base_atomics/atom_inc.cl | 2 +- generic/lib/cl_khr_global_int32_base_atomics/atom_sub.cl | 12 +++--------- generic/lib/cl_khr_global_int32_base_atomics/atom_xchg.cl | 12 +++--------- .../lib/cl_khr_global_int32_extended_atomics/atom_and.cl | 12 +++--------- .../lib/cl_khr_global_int32_extended_atomics/atom_max.cl | 12 +++--------- .../lib/cl_khr_global_int32_extended_atomics/atom_min.cl | 12 +++--------- .../lib/cl_khr_global_int32_extended_atomics/atom_or.cl | 12 +++--------- .../lib/cl_khr_global_int32_extended_atomics/atom_xor.cl | 12 +++--------- generic/lib/cl_khr_local_int32_base_atomics/atom_add.cl | 12 +++--------- generic/lib/cl_khr_local_int32_base_atomics/atom_dec.cl | 2 +- generic/lib/cl_khr_local_int32_base_atomics/atom_inc.cl | 2 +- generic/lib/cl_khr_local_int32_base_atomics/atom_sub.cl | 12 +++--------- generic/lib/cl_khr_local_int32_base_atomics/atom_xchg.cl | 12 +++--------- .../lib/cl_khr_local_int32_extended_atomics/atom_and.cl | 12 +++--------- .../lib/cl_khr_local_int32_extended_atomics/atom_max.cl | 12 +++--------- .../lib/cl_khr_local_int32_extended_atomics/atom_min.cl | 12 +++--------- generic/lib/cl_khr_local_int32_extended_atomics/atom_or.cl | 12 +++--------- .../lib/cl_khr_local_int32_extended_atomics/atom_xor.cl | 12 +++--------- 21 files changed, 66 insertions(+), 148 deletions(-) create mode 100644 generic/lib/atom_int32_binary.inc diff --git a/generic/lib/atom_int32_binary.inc b/generic/lib/atom_int32_binary.inc new file mode 100644 index 0000000..185144c --- /dev/null +++ b/generic/lib/atom_int32_binary.inc @@ -0,0 +1,14 @@ +#include +#include "utils.h" + +#define __CLC_ATOM_IMPL(AS, TYPE) \ +_CLC_OVERLOAD _CLC_DEF TYPE __CLC_XCONCAT(atom_, __CLC_ATOMIC_OP) (AS TYPE *p, TYPE val) { \ + return __CLC_XCONCAT(atomic_, __CLC_ATOMIC_OP) (p, val); \ +} + +__CLC_ATOM_IMPL(__CLC_ATOMIC_ADDRESS_SPACE, int) +__CLC_ATOM_IMPL(__CLC_ATOMIC_ADDRESS_SPACE, uint) + +#undef __CLC_ATOM_IMPL +#undef __CLC_ATOMIC_OP +#undef __CLC_ATOMIC_ADDRESS_SPACE diff --git a/generic/lib/cl_khr_global_int32_base_atomics/atom_add.cl b/generic/lib/cl_khr_global_int32_base_atomics/atom_add.cl index 9151b0c..69f1875 100644 --- a/generic/lib/cl_khr_global_int32_base_atomics/atom_add.cl +++ b/generic/lib/cl_khr_global_int32_base_atomics/atom_add.cl @@ -1,9 +1,3 @@ -#include - -#define IMPL(TYPE) \ -_CLC_OVERLOAD _CLC_DEF TYPE atom_add(global TYPE *p, TYPE val) { \ - return atomic_add(p, val); \ -} - -IMPL(int) -IMPL(unsigned int) +#define __CLC_ATOMIC_OP add +#define __CLC_ATOMIC_ADDRESS_SPACE global +#include "../atom_int32_binary.inc" diff --git a/generic/lib/cl_khr_global_int32_base_atomics/atom_dec.cl b/generic/lib/cl_khr_global_int32_base_atomics/atom_dec.cl index cc24d2f..290bec2 100644 --- a/generic/lib/cl_khr_global_int32_base_atomics/atom_dec.cl +++ b/generic/lib/cl_khr_global_int32_base_atomics/atom_dec.cl @@ -2,7 +2,7 @@ #define IMPL(TYPE) \ _CLC_OVERLOAD _CLC_DEF TYPE atom_dec(global TYPE *p) { \ - return atom_sub(p, (TYPE)1); \ + return atomic_dec(p); \ } IMPL(int) diff --git a/generic/lib/cl_khr_global_int32_base_atomics/atom_inc.cl b/generic/lib/cl_khr_global_int32_base_atomics/atom_inc.cl index 9193ae3..422bb78 100644 --- a/generic/lib/cl_khr_global_int32_base_atomics/atom_inc.cl +++ b/generic/lib/cl_khr_global_int32_base_atomics/atom_inc.cl @@ -2,7 +2,7 @@ #define IMPL(TYPE) \ _CLC_OVERLOAD _CLC_DEF TYPE atom_inc(global TYPE *p) { \ - return atom_add(p, (TYPE)1); \ + return atomic_inc(p); \ } IMPL(int) diff --git a/generic/lib/cl_khr_global_int32_base_atomics/atom_sub.cl b/generic/lib/cl_khr_global_int32_base_atomics/atom_sub.cl index 7faa3cc..e3ca935 100644 --- a/generic/lib/cl_khr_global_int32_base_atomics/atom_sub.cl +++ b/generic/lib/cl_khr_global_int32_base_atomics/atom_sub.cl @@ -1,9 +1,3 @@ -#include - -#define IMPL(TYPE) \ -_CLC_OVERLOAD _CLC_DEF TYPE atom_sub(global TYPE *p, TYPE val) { \ - return atomic_sub(p, val); \ -} - -IMPL(int) -IMPL(unsigned int) +#define __CLC_ATOMIC_OP sub +#define __CLC_ATOMIC_ADDRESS_SPACE global +#include "../atom_int32_binary.inc" diff --git a/generic/lib/cl_khr_global_int32_base_atomics/atom_xchg.cl b/generic/lib/cl_khr_global_int32_base_atomics/atom_xchg.cl index 9c77db1..3389c86 100644 --- a/generic/lib/cl_khr_global_int32_base_atomics/atom_xchg.cl +++ b/generic/lib/cl_khr_global_int32_base_atomics/atom_xchg.cl @@ -1,9 +1,3 @@ -#include - -#define IMPL(TYPE) \ -_CLC_OVERLOAD _CLC_DEF TYPE atom_xchg(global TYPE *p, TYPE val) { \ - return atomic_xchg(p, val); \ -} - -IMPL(int) -IMPL(unsigned int) +#define __CLC_ATOMIC_OP xchg +#define __CLC_ATOMIC_ADDRESS_SPACE global +#include "../atom_int32_binary.inc" diff --git a/generic/lib/cl_khr_global_int32_extended_atomics/atom_and.cl b/generic/lib/cl_khr_global_int32_extended_atomics/atom_and.cl index e587969..da002d5 100644 --- a/generic/lib/cl_khr_global_int32_extended_atomics/atom_and.cl +++ b/generic/lib/cl_khr_global_int32_extended_atomics/atom_and.cl @@ -1,9 +1,3 @@ -#include - -#define IMPL(TYPE) \ -_CLC_OVERLOAD _CLC_DEF TYPE atom_and(global TYPE *p, TYPE val) { \ - return atomic_and(p, val); \ -} - -IMPL(int) -IMPL(unsigned int) \ No newline at end of file +#define __CLC_ATOMIC_OP and +#define __CLC_ATOMIC_ADDRESS_SPACE global +#include "../atom_int32_binary.inc" diff --git a/generic/lib/cl_khr_global_int32_extended_atomics/atom_max.cl b/generic/lib/cl_khr_global_int32_extended_atomics/atom_max.cl index 09177ed..9b7975f 100644 --- a/generic/lib/cl_khr_global_int32_extended_atomics/atom_max.cl +++ b/generic/lib/cl_khr_global_int32_extended_atomics/atom_max.cl @@ -1,9 +1,3 @@ -#include - -#define IMPL(TYPE) \ -_CLC_OVERLOAD _CLC_DEF TYPE atom_max(global TYPE *p, TYPE val) { \ - return atomic_max(p, val); \ -} - -IMPL(int) -IMPL(unsigned int) +#define __CLC_ATOMIC_OP max +#define __CLC_ATOMIC_ADDRESS_SPACE global +#include "../atom_int32_binary.inc" diff --git a/generic/lib/cl_khr_global_int32_extended_atomics/atom_min.cl b/generic/lib/cl_khr_global_int32_extended_atomics/atom_min.cl index 277c41b..04b5f75 100644 --- a/generic/lib/cl_khr_global_int32_extended_atomics/atom_min.cl +++ b/generic/lib/cl_khr_global_int32_extended_atomics/atom_min.cl @@ -1,9 +1,3 @@ -#include - -#define IMPL(TYPE) \ -_CLC_OVERLOAD _CLC_DEF TYPE atom_min(global TYPE *p, TYPE val) { \ - return atomic_min(p, val); \ -} - -IMPL(int) -IMPL(unsigned int) +#define __CLC_ATOMIC_OP min +#define __CLC_ATOMIC_ADDRESS_SPACE global +#include "../atom_int32_binary.inc" diff --git a/generic/lib/cl_khr_global_int32_extended_atomics/atom_or.cl b/generic/lib/cl_khr_global_int32_extended_atomics/atom_or.cl index a936a8e..dbf4e15 100644 --- a/generic/lib/cl_khr_global_int32_extended_atomics/atom_or.cl +++ b/generic/lib/cl_khr_global_int32_extended_atomics/atom_or.cl @@ -1,9 +1,3 @@ -#include - -#define IMPL(TYPE) \ -_CLC_OVERLOAD _CLC_DEF TYPE atom_or(global TYPE *p, TYPE val) { \ - return atomic_or(p, val); \ -} - -IMPL(int) -IMPL(unsigned int) +#define __CLC_ATOMIC_OP or +#define __CLC_ATOMIC_ADDRESS_SPACE global +#include "../atom_int32_binary.inc" diff --git a/generic/lib/cl_khr_global_int32_extended_atomics/atom_xor.cl b/generic/lib/cl_khr_global_int32_extended_atomics/atom_xor.cl index 1a8e350..40a403d 100644 --- a/generic/lib/cl_khr_global_int32_extended_atomics/atom_xor.cl +++ b/generic/lib/cl_khr_global_int32_extended_atomics/atom_xor.cl @@ -1,9 +1,3 @@ -#include - -#define IMPL(TYPE) \ -_CLC_OVERLOAD _CLC_DEF TYPE atom_xor(global TYPE *p, TYPE val) { \ - return atomic_xor(p, val); \ -} - -IMPL(int) -IMPL(unsigned int) +#define __CLC_ATOMIC_OP xor +#define __CLC_ATOMIC_ADDRESS_SPACE global +#include "../atom_int32_binary.inc" diff --git a/generic/lib/cl_khr_local_int32_base_atomics/atom_add.cl b/generic/lib/cl_khr_local_int32_base_atomics/atom_add.cl index a5dea18..55dd78c 100644 --- a/generic/lib/cl_khr_local_int32_base_atomics/atom_add.cl +++ b/generic/lib/cl_khr_local_int32_base_atomics/atom_add.cl @@ -1,9 +1,3 @@ -#include - -#define IMPL(TYPE) \ -_CLC_OVERLOAD _CLC_DEF TYPE atom_add(local TYPE *p, TYPE val) { \ - return atomic_add(p, val); \ -} - -IMPL(int) -IMPL(unsigned int) +#define __CLC_ATOMIC_OP add +#define __CLC_ATOMIC_ADDRESS_SPACE local +#include "../atom_int32_binary.inc" diff --git a/generic/lib/cl_khr_local_int32_base_atomics/atom_dec.cl b/generic/lib/cl_khr_local_int32_base_atomics/atom_dec.cl index cfb3d80..d708a6d 100644 --- a/generic/lib/cl_khr_local_int32_base_atomics/atom_dec.cl +++ b/generic/lib/cl_khr_local_int32_base_atomics/atom_dec.cl @@ -2,7 +2,7 @@ #define IMPL(TYPE) \ _CLC_OVERLOAD _CLC_DEF TYPE atom_dec(local TYPE *p) { \ - return atom_sub(p, (TYPE)1); \ + return atomic_dec(p); \ } IMPL(int) diff --git a/generic/lib/cl_khr_local_int32_base_atomics/atom_inc.cl b/generic/lib/cl_khr_local_int32_base_atomics/atom_inc.cl index 8ea4738..8878673 100644 --- a/generic/lib/cl_khr_local_int32_base_atomics/atom_inc.cl +++ b/generic/lib/cl_khr_local_int32_base_atomics/atom_inc.cl @@ -2,7 +2,7 @@ #define IMPL(TYPE) \ _CLC_OVERLOAD _CLC_DEF TYPE atom_inc(local TYPE *p) { \ - return atom_add(p, (TYPE)1); \ + return atomic_inc(p); \ } IMPL(int) diff --git a/generic/lib/cl_khr_local_int32_base_atomics/atom_sub.cl b/generic/lib/cl_khr_local_int32_base_atomics/atom_sub.cl index c96696a..c0af334 100644 --- a/generic/lib/cl_khr_local_int32_base_atomics/atom_sub.cl +++ b/generic/lib/cl_khr_local_int32_base_atomics/atom_sub.cl @@ -1,9 +1,3 @@ -#include - -#define IMPL(TYPE) \ -_CLC_OVERLOAD _CLC_DEF TYPE atom_sub(local TYPE *p, TYPE val) { \ - return atomic_sub(p, val); \ -} - -IMPL(int) -IMPL(unsigned int) +#define __CLC_ATOMIC_OP sub +#define __CLC_ATOMIC_ADDRESS_SPACE local +#include "../atom_int32_binary.inc" diff --git a/generic/lib/cl_khr_local_int32_base_atomics/atom_xchg.cl b/generic/lib/cl_khr_local_int32_base_atomics/atom_xchg.cl index 7d4bcca..7120068 100644 --- a/generic/lib/cl_khr_local_int32_base_atomics/atom_xchg.cl +++ b/generic/lib/cl_khr_local_int32_base_atomics/atom_xchg.cl @@ -1,9 +1,3 @@ -#include - -#define IMPL(TYPE) \ -_CLC_OVERLOAD _CLC_DEF TYPE atom_xchg(local TYPE *p, TYPE val) { \ - return atomic_xchg(p, val); \ -} - -IMPL(int) -IMPL(unsigned int) +#define __CLC_ATOMIC_OP xchg +#define __CLC_ATOMIC_ADDRESS_SPACE local +#include "../atom_int32_binary.inc" diff --git a/generic/lib/cl_khr_local_int32_extended_atomics/atom_and.cl b/generic/lib/cl_khr_local_int32_extended_atomics/atom_and.cl index 180103a..62b6ae7 100644 --- a/generic/lib/cl_khr_local_int32_extended_atomics/atom_and.cl +++ b/generic/lib/cl_khr_local_int32_extended_atomics/atom_and.cl @@ -1,9 +1,3 @@ -#include - -#define IMPL(TYPE) \ -_CLC_OVERLOAD _CLC_DEF TYPE atom_and(local TYPE *p, TYPE val) { \ - return atomic_and(p, val); \ -} - -IMPL(int) -IMPL(unsigned int) \ No newline at end of file +#define __CLC_ATOMIC_OP and +#define __CLC_ATOMIC_ADDRESS_SPACE local +#include "../atom_int32_binary.inc" diff --git a/generic/lib/cl_khr_local_int32_extended_atomics/atom_max.cl b/generic/lib/cl_khr_local_int32_extended_atomics/atom_max.cl index b90301b..8f03b9d 100644 --- a/generic/lib/cl_khr_local_int32_extended_atomics/atom_max.cl +++ b/generic/lib/cl_khr_local_int32_extended_atomics/atom_max.cl @@ -1,9 +1,3 @@ -#include - -#define IMPL(TYPE) \ -_CLC_OVERLOAD _CLC_DEF TYPE atom_max(local TYPE *p, TYPE val) { \ - return atomic_max(p, val); \ -} - -IMPL(int) -IMPL(unsigned int) +#define __CLC_ATOMIC_OP max +#define __CLC_ATOMIC_ADDRESS_SPACE local +#include "../atom_int32_binary.inc" diff --git a/generic/lib/cl_khr_local_int32_extended_atomics/atom_min.cl b/generic/lib/cl_khr_local_int32_extended_atomics/atom_min.cl index 3acedd8..59a3252 100644 --- a/generic/lib/cl_khr_local_int32_extended_atomics/atom_min.cl +++ b/generic/lib/cl_khr_local_int32_extended_atomics/atom_min.cl @@ -1,9 +1,3 @@ -#include - -#define IMPL(TYPE) \ -_CLC_OVERLOAD _CLC_DEF TYPE atom_min(local TYPE *p, TYPE val) { \ - return atomic_min(p, val); \ -} - -IMPL(int) -IMPL(unsigned int) +#define __CLC_ATOMIC_OP min +#define __CLC_ATOMIC_ADDRESS_SPACE local +#include "../atom_int32_binary.inc" diff --git a/generic/lib/cl_khr_local_int32_extended_atomics/atom_or.cl b/generic/lib/cl_khr_local_int32_extended_atomics/atom_or.cl index 338ff2c..8ea408e 100644 --- a/generic/lib/cl_khr_local_int32_extended_atomics/atom_or.cl +++ b/generic/lib/cl_khr_local_int32_extended_atomics/atom_or.cl @@ -1,9 +1,3 @@ -#include - -#define IMPL(TYPE) \ -_CLC_OVERLOAD _CLC_DEF TYPE atom_or(local TYPE *p, TYPE val) { \ - return atomic_or(p, val); \ -} - -IMPL(int) -IMPL(unsigned int) +#define __CLC_ATOMIC_OP or +#define __CLC_ATOMIC_ADDRESS_SPACE local +#include "../atom_int32_binary.inc" diff --git a/generic/lib/cl_khr_local_int32_extended_atomics/atom_xor.cl b/generic/lib/cl_khr_local_int32_extended_atomics/atom_xor.cl index 51ae3c0..8c28662 100644 --- a/generic/lib/cl_khr_local_int32_extended_atomics/atom_xor.cl +++ b/generic/lib/cl_khr_local_int32_extended_atomics/atom_xor.cl @@ -1,9 +1,3 @@ -#include - -#define IMPL(TYPE) \ -_CLC_OVERLOAD _CLC_DEF TYPE atom_xor(local TYPE *p, TYPE val) { \ - return atomic_xor(p, val); \ -} - -IMPL(int) -IMPL(unsigned int) +#define __CLC_ATOMIC_OP xor +#define __CLC_ATOMIC_ADDRESS_SPACE local +#include "../atom_int32_binary.inc" -- 2.16.4 From libclc-dev at lists.llvm.org Wed Jun 20 13:35:04 2018 From: libclc-dev at lists.llvm.org (Jan Vesely via Libclc-dev) Date: Wed, 20 Jun 2018 16:35:04 -0400 Subject: [Libclc-dev] [PATCH 7/7] atom: Use volatile pointers for cl_khr_{global, local}_int32_{base, extended}_atomics In-Reply-To: <20180620203504.5123-1-jan.vesely@rutgers.edu> References: <20180620203504.5123-1-jan.vesely@rutgers.edu> Message-ID: <20180620203504.5123-8-jan.vesely@rutgers.edu> int64 versions were switched to volatile pointers in cl1.1 cl1.1 also renamed atom_ functions to atomic_ that use volatile pointers. CTS and applications use volatile pointers. Passes CTS on carrizo no return piglit tests still pass on turks. Signed-off-by: Jan Vesely --- generic/include/clc/atom_decl_int32.inc | 2 +- generic/include/clc/cl_khr_global_int32_base_atomics/atom_cmpxchg.h | 4 ++-- generic/include/clc/cl_khr_global_int32_base_atomics/atom_dec.h | 4 ++-- generic/include/clc/cl_khr_global_int32_base_atomics/atom_inc.h | 4 ++-- generic/include/clc/cl_khr_local_int32_base_atomics/atom_cmpxchg.h | 4 ++-- generic/include/clc/cl_khr_local_int32_base_atomics/atom_dec.h | 4 ++-- generic/include/clc/cl_khr_local_int32_base_atomics/atom_inc.h | 4 ++-- generic/lib/atom_int32_binary.inc | 2 +- generic/lib/cl_khr_global_int32_base_atomics/atom_cmpxchg.cl | 2 +- generic/lib/cl_khr_global_int32_base_atomics/atom_dec.cl | 2 +- generic/lib/cl_khr_global_int32_base_atomics/atom_inc.cl | 2 +- generic/lib/cl_khr_local_int32_base_atomics/atom_cmpxchg.cl | 2 +- generic/lib/cl_khr_local_int32_base_atomics/atom_dec.cl | 2 +- generic/lib/cl_khr_local_int32_base_atomics/atom_inc.cl | 2 +- 14 files changed, 20 insertions(+), 20 deletions(-) diff --git a/generic/include/clc/atom_decl_int32.inc b/generic/include/clc/atom_decl_int32.inc index dc76356..989ecfb 100644 --- a/generic/include/clc/atom_decl_int32.inc +++ b/generic/include/clc/atom_decl_int32.inc @@ -1,5 +1,5 @@ #define __CLC_DECLARE_ATOM(ADDRSPACE, TYPE) \ - _CLC_OVERLOAD _CLC_DECL TYPE __CLC_FUNCTION (ADDRSPACE TYPE *, TYPE); + _CLC_OVERLOAD _CLC_DECL TYPE __CLC_FUNCTION (volatile ADDRSPACE TYPE *, TYPE); __CLC_DECLARE_ATOM(__CLC_ADDRESS_SPACE, int) __CLC_DECLARE_ATOM(__CLC_ADDRESS_SPACE, uint) diff --git a/generic/include/clc/cl_khr_global_int32_base_atomics/atom_cmpxchg.h b/generic/include/clc/cl_khr_global_int32_base_atomics/atom_cmpxchg.h index 168f423..d84a9eb 100644 --- a/generic/include/clc/cl_khr_global_int32_base_atomics/atom_cmpxchg.h +++ b/generic/include/clc/cl_khr_global_int32_base_atomics/atom_cmpxchg.h @@ -1,2 +1,2 @@ -_CLC_OVERLOAD _CLC_DECL int atom_cmpxchg(global int *p, int cmp, int val); -_CLC_OVERLOAD _CLC_DECL unsigned int atom_cmpxchg(global unsigned int *p, unsigned int cmp, unsigned int val); +_CLC_OVERLOAD _CLC_DECL int atom_cmpxchg(volatile global int *p, int cmp, int val); +_CLC_OVERLOAD _CLC_DECL unsigned int atom_cmpxchg(volatile global unsigned int *p, unsigned int cmp, unsigned int val); diff --git a/generic/include/clc/cl_khr_global_int32_base_atomics/atom_dec.h b/generic/include/clc/cl_khr_global_int32_base_atomics/atom_dec.h index bbc872c..d3c1c45 100644 --- a/generic/include/clc/cl_khr_global_int32_base_atomics/atom_dec.h +++ b/generic/include/clc/cl_khr_global_int32_base_atomics/atom_dec.h @@ -1,2 +1,2 @@ -_CLC_OVERLOAD _CLC_DECL int atom_dec(global int *p); -_CLC_OVERLOAD _CLC_DECL unsigned int atom_dec(global unsigned int *p); +_CLC_OVERLOAD _CLC_DECL int atom_dec(volatile global int *p); +_CLC_OVERLOAD _CLC_DECL unsigned int atom_dec(volatile global unsigned int *p); diff --git a/generic/include/clc/cl_khr_global_int32_base_atomics/atom_inc.h b/generic/include/clc/cl_khr_global_int32_base_atomics/atom_inc.h index 050747c..712374d 100644 --- a/generic/include/clc/cl_khr_global_int32_base_atomics/atom_inc.h +++ b/generic/include/clc/cl_khr_global_int32_base_atomics/atom_inc.h @@ -1,2 +1,2 @@ -_CLC_OVERLOAD _CLC_DECL int atom_inc(global int *p); -_CLC_OVERLOAD _CLC_DECL unsigned int atom_inc(global unsigned int *p); +_CLC_OVERLOAD _CLC_DECL int atom_inc(volatile global int *p); +_CLC_OVERLOAD _CLC_DECL unsigned int atom_inc(volatile global unsigned int *p); diff --git a/generic/include/clc/cl_khr_local_int32_base_atomics/atom_cmpxchg.h b/generic/include/clc/cl_khr_local_int32_base_atomics/atom_cmpxchg.h index e10a84f..c967d36 100644 --- a/generic/include/clc/cl_khr_local_int32_base_atomics/atom_cmpxchg.h +++ b/generic/include/clc/cl_khr_local_int32_base_atomics/atom_cmpxchg.h @@ -1,2 +1,2 @@ -_CLC_OVERLOAD _CLC_DECL int atom_cmpxchg(local int *p, int cmp, int val); -_CLC_OVERLOAD _CLC_DECL unsigned int atom_cmpxchg(local unsigned int *p, unsigned int cmp, unsigned int val); +_CLC_OVERLOAD _CLC_DECL int atom_cmpxchg(volatile local int *p, int cmp, int val); +_CLC_OVERLOAD _CLC_DECL unsigned int atom_cmpxchg(volatile local unsigned int *p, unsigned int cmp, unsigned int val); diff --git a/generic/include/clc/cl_khr_local_int32_base_atomics/atom_dec.h b/generic/include/clc/cl_khr_local_int32_base_atomics/atom_dec.h index e74d8fc..7bb33c9 100644 --- a/generic/include/clc/cl_khr_local_int32_base_atomics/atom_dec.h +++ b/generic/include/clc/cl_khr_local_int32_base_atomics/atom_dec.h @@ -1,2 +1,2 @@ -_CLC_OVERLOAD _CLC_DECL int atom_dec(local int *p); -_CLC_OVERLOAD _CLC_DECL unsigned int atom_dec(local unsigned int *p); +_CLC_OVERLOAD _CLC_DECL int atom_dec(volatile local int *p); +_CLC_OVERLOAD _CLC_DECL unsigned int atom_dec(volatile local unsigned int *p); diff --git a/generic/include/clc/cl_khr_local_int32_base_atomics/atom_inc.h b/generic/include/clc/cl_khr_local_int32_base_atomics/atom_inc.h index 718f1f2..96e6f30 100644 --- a/generic/include/clc/cl_khr_local_int32_base_atomics/atom_inc.h +++ b/generic/include/clc/cl_khr_local_int32_base_atomics/atom_inc.h @@ -1,2 +1,2 @@ -_CLC_OVERLOAD _CLC_DECL int atom_inc(local int *p); -_CLC_OVERLOAD _CLC_DECL unsigned int atom_inc(local unsigned int *p); +_CLC_OVERLOAD _CLC_DECL int atom_inc(volatile local int *p); +_CLC_OVERLOAD _CLC_DECL unsigned int atom_inc(volatile local unsigned int *p); diff --git a/generic/lib/atom_int32_binary.inc b/generic/lib/atom_int32_binary.inc index 185144c..3af4c4b 100644 --- a/generic/lib/atom_int32_binary.inc +++ b/generic/lib/atom_int32_binary.inc @@ -2,7 +2,7 @@ #include "utils.h" #define __CLC_ATOM_IMPL(AS, TYPE) \ -_CLC_OVERLOAD _CLC_DEF TYPE __CLC_XCONCAT(atom_, __CLC_ATOMIC_OP) (AS TYPE *p, TYPE val) { \ +_CLC_OVERLOAD _CLC_DEF TYPE __CLC_XCONCAT(atom_, __CLC_ATOMIC_OP) (volatile AS TYPE *p, TYPE val) { \ return __CLC_XCONCAT(atomic_, __CLC_ATOMIC_OP) (p, val); \ } diff --git a/generic/lib/cl_khr_global_int32_base_atomics/atom_cmpxchg.cl b/generic/lib/cl_khr_global_int32_base_atomics/atom_cmpxchg.cl index 7647740..becf767 100644 --- a/generic/lib/cl_khr_global_int32_base_atomics/atom_cmpxchg.cl +++ b/generic/lib/cl_khr_global_int32_base_atomics/atom_cmpxchg.cl @@ -1,7 +1,7 @@ #include #define IMPL(TYPE) \ -_CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(global TYPE *p, TYPE cmp, TYPE val) { \ +_CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(volatile global TYPE *p, TYPE cmp, TYPE val) { \ return atomic_cmpxchg(p, cmp, val); \ } diff --git a/generic/lib/cl_khr_global_int32_base_atomics/atom_dec.cl b/generic/lib/cl_khr_global_int32_base_atomics/atom_dec.cl index 290bec2..22aeaf9 100644 --- a/generic/lib/cl_khr_global_int32_base_atomics/atom_dec.cl +++ b/generic/lib/cl_khr_global_int32_base_atomics/atom_dec.cl @@ -1,7 +1,7 @@ #include #define IMPL(TYPE) \ -_CLC_OVERLOAD _CLC_DEF TYPE atom_dec(global TYPE *p) { \ +_CLC_OVERLOAD _CLC_DEF TYPE atom_dec(volatile global TYPE *p) { \ return atomic_dec(p); \ } diff --git a/generic/lib/cl_khr_global_int32_base_atomics/atom_inc.cl b/generic/lib/cl_khr_global_int32_base_atomics/atom_inc.cl index 422bb78..2fd7102 100644 --- a/generic/lib/cl_khr_global_int32_base_atomics/atom_inc.cl +++ b/generic/lib/cl_khr_global_int32_base_atomics/atom_inc.cl @@ -1,7 +1,7 @@ #include #define IMPL(TYPE) \ -_CLC_OVERLOAD _CLC_DEF TYPE atom_inc(global TYPE *p) { \ +_CLC_OVERLOAD _CLC_DEF TYPE atom_inc(volatile global TYPE *p) { \ return atomic_inc(p); \ } diff --git a/generic/lib/cl_khr_local_int32_base_atomics/atom_cmpxchg.cl b/generic/lib/cl_khr_local_int32_base_atomics/atom_cmpxchg.cl index 16e9579..ea648f0 100644 --- a/generic/lib/cl_khr_local_int32_base_atomics/atom_cmpxchg.cl +++ b/generic/lib/cl_khr_local_int32_base_atomics/atom_cmpxchg.cl @@ -1,7 +1,7 @@ #include #define IMPL(TYPE) \ -_CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(local TYPE *p, TYPE cmp, TYPE val) { \ +_CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(volatile local TYPE *p, TYPE cmp, TYPE val) { \ return atomic_cmpxchg(p, cmp, val); \ } diff --git a/generic/lib/cl_khr_local_int32_base_atomics/atom_dec.cl b/generic/lib/cl_khr_local_int32_base_atomics/atom_dec.cl index d708a6d..33772a0 100644 --- a/generic/lib/cl_khr_local_int32_base_atomics/atom_dec.cl +++ b/generic/lib/cl_khr_local_int32_base_atomics/atom_dec.cl @@ -1,7 +1,7 @@ #include #define IMPL(TYPE) \ -_CLC_OVERLOAD _CLC_DEF TYPE atom_dec(local TYPE *p) { \ +_CLC_OVERLOAD _CLC_DEF TYPE atom_dec(volatile local TYPE *p) { \ return atomic_dec(p); \ } diff --git a/generic/lib/cl_khr_local_int32_base_atomics/atom_inc.cl b/generic/lib/cl_khr_local_int32_base_atomics/atom_inc.cl index 8878673..d91ff65 100644 --- a/generic/lib/cl_khr_local_int32_base_atomics/atom_inc.cl +++ b/generic/lib/cl_khr_local_int32_base_atomics/atom_inc.cl @@ -1,7 +1,7 @@ #include #define IMPL(TYPE) \ -_CLC_OVERLOAD _CLC_DEF TYPE atom_inc(local TYPE *p) { \ +_CLC_OVERLOAD _CLC_DEF TYPE atom_inc(volatile local TYPE *p) { \ return atomic_inc(p); \ } -- 2.16.4 From libclc-dev at lists.llvm.org Thu Jun 21 08:04:08 2018 From: libclc-dev at lists.llvm.org (Aaron Watry via Libclc-dev) Date: Thu, 21 Jun 2018 10:04:08 -0500 Subject: [Libclc-dev] [PATCH 7/7] atom: Use volatile pointers for cl_khr_{global, local}_int32_{base, extended}_atomics In-Reply-To: <20180620203504.5123-8-jan.vesely@rutgers.edu> References: <20180620203504.5123-1-jan.vesely@rutgers.edu> <20180620203504.5123-8-jan.vesely@rutgers.edu> Message-ID: Series is: Tested-By/Reviewed-By: Aaron Watry I agree that the volatile pointer mismatch between spec versions is probably just a bug/oversight in the spec document, which is why I had sent the previous series to change this a while back. If the CL 1.1 document had clarified this situation beyond just saying the functions were renamed (while also silently adding the volatile keyword), this would've been a bit easier. --Aaron On Wed, Jun 20, 2018 at 3:35 PM, Jan Vesely via Libclc-dev wrote: > int64 versions were switched to volatile pointers in cl1.1 > cl1.1 also renamed atom_ functions to atomic_ that use volatile pointers. > CTS and applications use volatile pointers. > > Passes CTS on carrizo > no return piglit tests still pass on turks. > > Signed-off-by: Jan Vesely > --- > generic/include/clc/atom_decl_int32.inc | 2 +- > generic/include/clc/cl_khr_global_int32_base_atomics/atom_cmpxchg.h | 4 ++-- > generic/include/clc/cl_khr_global_int32_base_atomics/atom_dec.h | 4 ++-- > generic/include/clc/cl_khr_global_int32_base_atomics/atom_inc.h | 4 ++-- > generic/include/clc/cl_khr_local_int32_base_atomics/atom_cmpxchg.h | 4 ++-- > generic/include/clc/cl_khr_local_int32_base_atomics/atom_dec.h | 4 ++-- > generic/include/clc/cl_khr_local_int32_base_atomics/atom_inc.h | 4 ++-- > generic/lib/atom_int32_binary.inc | 2 +- > generic/lib/cl_khr_global_int32_base_atomics/atom_cmpxchg.cl | 2 +- > generic/lib/cl_khr_global_int32_base_atomics/atom_dec.cl | 2 +- > generic/lib/cl_khr_global_int32_base_atomics/atom_inc.cl | 2 +- > generic/lib/cl_khr_local_int32_base_atomics/atom_cmpxchg.cl | 2 +- > generic/lib/cl_khr_local_int32_base_atomics/atom_dec.cl | 2 +- > generic/lib/cl_khr_local_int32_base_atomics/atom_inc.cl | 2 +- > 14 files changed, 20 insertions(+), 20 deletions(-) > > diff --git a/generic/include/clc/atom_decl_int32.inc b/generic/include/clc/atom_decl_int32.inc > index dc76356..989ecfb 100644 > --- a/generic/include/clc/atom_decl_int32.inc > +++ b/generic/include/clc/atom_decl_int32.inc > @@ -1,5 +1,5 @@ > #define __CLC_DECLARE_ATOM(ADDRSPACE, TYPE) \ > - _CLC_OVERLOAD _CLC_DECL TYPE __CLC_FUNCTION (ADDRSPACE TYPE *, TYPE); > + _CLC_OVERLOAD _CLC_DECL TYPE __CLC_FUNCTION (volatile ADDRSPACE TYPE *, TYPE); > > __CLC_DECLARE_ATOM(__CLC_ADDRESS_SPACE, int) > __CLC_DECLARE_ATOM(__CLC_ADDRESS_SPACE, uint) > diff --git a/generic/include/clc/cl_khr_global_int32_base_atomics/atom_cmpxchg.h b/generic/include/clc/cl_khr_global_int32_base_atomics/atom_cmpxchg.h > index 168f423..d84a9eb 100644 > --- a/generic/include/clc/cl_khr_global_int32_base_atomics/atom_cmpxchg.h > +++ b/generic/include/clc/cl_khr_global_int32_base_atomics/atom_cmpxchg.h > @@ -1,2 +1,2 @@ > -_CLC_OVERLOAD _CLC_DECL int atom_cmpxchg(global int *p, int cmp, int val); > -_CLC_OVERLOAD _CLC_DECL unsigned int atom_cmpxchg(global unsigned int *p, unsigned int cmp, unsigned int val); > +_CLC_OVERLOAD _CLC_DECL int atom_cmpxchg(volatile global int *p, int cmp, int val); > +_CLC_OVERLOAD _CLC_DECL unsigned int atom_cmpxchg(volatile global unsigned int *p, unsigned int cmp, unsigned int val); > diff --git a/generic/include/clc/cl_khr_global_int32_base_atomics/atom_dec.h b/generic/include/clc/cl_khr_global_int32_base_atomics/atom_dec.h > index bbc872c..d3c1c45 100644 > --- a/generic/include/clc/cl_khr_global_int32_base_atomics/atom_dec.h > +++ b/generic/include/clc/cl_khr_global_int32_base_atomics/atom_dec.h > @@ -1,2 +1,2 @@ > -_CLC_OVERLOAD _CLC_DECL int atom_dec(global int *p); > -_CLC_OVERLOAD _CLC_DECL unsigned int atom_dec(global unsigned int *p); > +_CLC_OVERLOAD _CLC_DECL int atom_dec(volatile global int *p); > +_CLC_OVERLOAD _CLC_DECL unsigned int atom_dec(volatile global unsigned int *p); > diff --git a/generic/include/clc/cl_khr_global_int32_base_atomics/atom_inc.h b/generic/include/clc/cl_khr_global_int32_base_atomics/atom_inc.h > index 050747c..712374d 100644 > --- a/generic/include/clc/cl_khr_global_int32_base_atomics/atom_inc.h > +++ b/generic/include/clc/cl_khr_global_int32_base_atomics/atom_inc.h > @@ -1,2 +1,2 @@ > -_CLC_OVERLOAD _CLC_DECL int atom_inc(global int *p); > -_CLC_OVERLOAD _CLC_DECL unsigned int atom_inc(global unsigned int *p); > +_CLC_OVERLOAD _CLC_DECL int atom_inc(volatile global int *p); > +_CLC_OVERLOAD _CLC_DECL unsigned int atom_inc(volatile global unsigned int *p); > diff --git a/generic/include/clc/cl_khr_local_int32_base_atomics/atom_cmpxchg.h b/generic/include/clc/cl_khr_local_int32_base_atomics/atom_cmpxchg.h > index e10a84f..c967d36 100644 > --- a/generic/include/clc/cl_khr_local_int32_base_atomics/atom_cmpxchg.h > +++ b/generic/include/clc/cl_khr_local_int32_base_atomics/atom_cmpxchg.h > @@ -1,2 +1,2 @@ > -_CLC_OVERLOAD _CLC_DECL int atom_cmpxchg(local int *p, int cmp, int val); > -_CLC_OVERLOAD _CLC_DECL unsigned int atom_cmpxchg(local unsigned int *p, unsigned int cmp, unsigned int val); > +_CLC_OVERLOAD _CLC_DECL int atom_cmpxchg(volatile local int *p, int cmp, int val); > +_CLC_OVERLOAD _CLC_DECL unsigned int atom_cmpxchg(volatile local unsigned int *p, unsigned int cmp, unsigned int val); > diff --git a/generic/include/clc/cl_khr_local_int32_base_atomics/atom_dec.h b/generic/include/clc/cl_khr_local_int32_base_atomics/atom_dec.h > index e74d8fc..7bb33c9 100644 > --- a/generic/include/clc/cl_khr_local_int32_base_atomics/atom_dec.h > +++ b/generic/include/clc/cl_khr_local_int32_base_atomics/atom_dec.h > @@ -1,2 +1,2 @@ > -_CLC_OVERLOAD _CLC_DECL int atom_dec(local int *p); > -_CLC_OVERLOAD _CLC_DECL unsigned int atom_dec(local unsigned int *p); > +_CLC_OVERLOAD _CLC_DECL int atom_dec(volatile local int *p); > +_CLC_OVERLOAD _CLC_DECL unsigned int atom_dec(volatile local unsigned int *p); > diff --git a/generic/include/clc/cl_khr_local_int32_base_atomics/atom_inc.h b/generic/include/clc/cl_khr_local_int32_base_atomics/atom_inc.h > index 718f1f2..96e6f30 100644 > --- a/generic/include/clc/cl_khr_local_int32_base_atomics/atom_inc.h > +++ b/generic/include/clc/cl_khr_local_int32_base_atomics/atom_inc.h > @@ -1,2 +1,2 @@ > -_CLC_OVERLOAD _CLC_DECL int atom_inc(local int *p); > -_CLC_OVERLOAD _CLC_DECL unsigned int atom_inc(local unsigned int *p); > +_CLC_OVERLOAD _CLC_DECL int atom_inc(volatile local int *p); > +_CLC_OVERLOAD _CLC_DECL unsigned int atom_inc(volatile local unsigned int *p); > diff --git a/generic/lib/atom_int32_binary.inc b/generic/lib/atom_int32_binary.inc > index 185144c..3af4c4b 100644 > --- a/generic/lib/atom_int32_binary.inc > +++ b/generic/lib/atom_int32_binary.inc > @@ -2,7 +2,7 @@ > #include "utils.h" > > #define __CLC_ATOM_IMPL(AS, TYPE) \ > -_CLC_OVERLOAD _CLC_DEF TYPE __CLC_XCONCAT(atom_, __CLC_ATOMIC_OP) (AS TYPE *p, TYPE val) { \ > +_CLC_OVERLOAD _CLC_DEF TYPE __CLC_XCONCAT(atom_, __CLC_ATOMIC_OP) (volatile AS TYPE *p, TYPE val) { \ > return __CLC_XCONCAT(atomic_, __CLC_ATOMIC_OP) (p, val); \ > } > > diff --git a/generic/lib/cl_khr_global_int32_base_atomics/atom_cmpxchg.cl b/generic/lib/cl_khr_global_int32_base_atomics/atom_cmpxchg.cl > index 7647740..becf767 100644 > --- a/generic/lib/cl_khr_global_int32_base_atomics/atom_cmpxchg.cl > +++ b/generic/lib/cl_khr_global_int32_base_atomics/atom_cmpxchg.cl > @@ -1,7 +1,7 @@ > #include > > #define IMPL(TYPE) \ > -_CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(global TYPE *p, TYPE cmp, TYPE val) { \ > +_CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(volatile global TYPE *p, TYPE cmp, TYPE val) { \ > return atomic_cmpxchg(p, cmp, val); \ > } > > diff --git a/generic/lib/cl_khr_global_int32_base_atomics/atom_dec.cl b/generic/lib/cl_khr_global_int32_base_atomics/atom_dec.cl > index 290bec2..22aeaf9 100644 > --- a/generic/lib/cl_khr_global_int32_base_atomics/atom_dec.cl > +++ b/generic/lib/cl_khr_global_int32_base_atomics/atom_dec.cl > @@ -1,7 +1,7 @@ > #include > > #define IMPL(TYPE) \ > -_CLC_OVERLOAD _CLC_DEF TYPE atom_dec(global TYPE *p) { \ > +_CLC_OVERLOAD _CLC_DEF TYPE atom_dec(volatile global TYPE *p) { \ > return atomic_dec(p); \ > } > > diff --git a/generic/lib/cl_khr_global_int32_base_atomics/atom_inc.cl b/generic/lib/cl_khr_global_int32_base_atomics/atom_inc.cl > index 422bb78..2fd7102 100644 > --- a/generic/lib/cl_khr_global_int32_base_atomics/atom_inc.cl > +++ b/generic/lib/cl_khr_global_int32_base_atomics/atom_inc.cl > @@ -1,7 +1,7 @@ > #include > > #define IMPL(TYPE) \ > -_CLC_OVERLOAD _CLC_DEF TYPE atom_inc(global TYPE *p) { \ > +_CLC_OVERLOAD _CLC_DEF TYPE atom_inc(volatile global TYPE *p) { \ > return atomic_inc(p); \ > } > > diff --git a/generic/lib/cl_khr_local_int32_base_atomics/atom_cmpxchg.cl b/generic/lib/cl_khr_local_int32_base_atomics/atom_cmpxchg.cl > index 16e9579..ea648f0 100644 > --- a/generic/lib/cl_khr_local_int32_base_atomics/atom_cmpxchg.cl > +++ b/generic/lib/cl_khr_local_int32_base_atomics/atom_cmpxchg.cl > @@ -1,7 +1,7 @@ > #include > > #define IMPL(TYPE) \ > -_CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(local TYPE *p, TYPE cmp, TYPE val) { \ > +_CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(volatile local TYPE *p, TYPE cmp, TYPE val) { \ > return atomic_cmpxchg(p, cmp, val); \ > } > > diff --git a/generic/lib/cl_khr_local_int32_base_atomics/atom_dec.cl b/generic/lib/cl_khr_local_int32_base_atomics/atom_dec.cl > index d708a6d..33772a0 100644 > --- a/generic/lib/cl_khr_local_int32_base_atomics/atom_dec.cl > +++ b/generic/lib/cl_khr_local_int32_base_atomics/atom_dec.cl > @@ -1,7 +1,7 @@ > #include > > #define IMPL(TYPE) \ > -_CLC_OVERLOAD _CLC_DEF TYPE atom_dec(local TYPE *p) { \ > +_CLC_OVERLOAD _CLC_DEF TYPE atom_dec(volatile local TYPE *p) { \ > return atomic_dec(p); \ > } > > diff --git a/generic/lib/cl_khr_local_int32_base_atomics/atom_inc.cl b/generic/lib/cl_khr_local_int32_base_atomics/atom_inc.cl > index 8878673..d91ff65 100644 > --- a/generic/lib/cl_khr_local_int32_base_atomics/atom_inc.cl > +++ b/generic/lib/cl_khr_local_int32_base_atomics/atom_inc.cl > @@ -1,7 +1,7 @@ > #include > > #define IMPL(TYPE) \ > -_CLC_OVERLOAD _CLC_DEF TYPE atom_inc(local TYPE *p) { \ > +_CLC_OVERLOAD _CLC_DEF TYPE atom_inc(volatile local TYPE *p) { \ > return atomic_inc(p); \ > } > > -- > 2.16.4 > > _______________________________________________ > Libclc-dev mailing list > Libclc-dev at lists.llvm.org > http://lists.llvm.org/cgi-bin/mailman/listinfo/libclc-dev From libclc-dev at lists.llvm.org Thu Jun 21 13:02:08 2018 From: libclc-dev at lists.llvm.org (Jan Vesely via Libclc-dev) Date: Thu, 21 Jun 2018 16:02:08 -0400 Subject: [Libclc-dev] [PATCH 7/7] atom: Use volatile pointers for cl_khr_{global, local}_int32_{base, extended}_atomics In-Reply-To: References: <20180620203504.5123-1-jan.vesely@rutgers.edu> <20180620203504.5123-8-jan.vesely@rutgers.edu> Message-ID: <1529611328.21309.3.camel@rutgers.edu> On Thu, 2018-06-21 at 10:04 -0500, Aaron Watry via Libclc-dev wrote: > Series is: Tested-By/Reviewed-By: Aaron Watry thanks. > I agree that the volatile pointer mismatch between spec versions is > probably just a bug/oversight in the spec document, which is why I had > sent the previous series to change this a while back. > > If the CL 1.1 document had clarified this situation beyond just saying > the functions were renamed (while also silently adding the volatile > keyword), this would've been a bit easier. yeah. sorry it took me so long to see the light :) I tried using the old Tom's patch, but it did not apply cleanly, and I thought a bit of cleanup wouldn't hurt. Jan > > --Aaron > > On Wed, Jun 20, 2018 at 3:35 PM, Jan Vesely via Libclc-dev > wrote: > > int64 versions were switched to volatile pointers in cl1.1 > > cl1.1 also renamed atom_ functions to atomic_ that use volatile pointers. > > CTS and applications use volatile pointers. > > > > Passes CTS on carrizo > > no return piglit tests still pass on turks. > > > > Signed-off-by: Jan Vesely > > --- > > generic/include/clc/atom_decl_int32.inc | 2 +- > > generic/include/clc/cl_khr_global_int32_base_atomics/atom_cmpxchg.h | 4 ++-- > > generic/include/clc/cl_khr_global_int32_base_atomics/atom_dec.h | 4 ++-- > > generic/include/clc/cl_khr_global_int32_base_atomics/atom_inc.h | 4 ++-- > > generic/include/clc/cl_khr_local_int32_base_atomics/atom_cmpxchg.h | 4 ++-- > > generic/include/clc/cl_khr_local_int32_base_atomics/atom_dec.h | 4 ++-- > > generic/include/clc/cl_khr_local_int32_base_atomics/atom_inc.h | 4 ++-- > > generic/lib/atom_int32_binary.inc | 2 +- > > generic/lib/cl_khr_global_int32_base_atomics/atom_cmpxchg.cl | 2 +- > > generic/lib/cl_khr_global_int32_base_atomics/atom_dec.cl | 2 +- > > generic/lib/cl_khr_global_int32_base_atomics/atom_inc.cl | 2 +- > > generic/lib/cl_khr_local_int32_base_atomics/atom_cmpxchg.cl | 2 +- > > generic/lib/cl_khr_local_int32_base_atomics/atom_dec.cl | 2 +- > > generic/lib/cl_khr_local_int32_base_atomics/atom_inc.cl | 2 +- > > 14 files changed, 20 insertions(+), 20 deletions(-) > > > > diff --git a/generic/include/clc/atom_decl_int32.inc b/generic/include/clc/atom_decl_int32.inc > > index dc76356..989ecfb 100644 > > --- a/generic/include/clc/atom_decl_int32.inc > > +++ b/generic/include/clc/atom_decl_int32.inc > > @@ -1,5 +1,5 @@ > > #define __CLC_DECLARE_ATOM(ADDRSPACE, TYPE) \ > > - _CLC_OVERLOAD _CLC_DECL TYPE __CLC_FUNCTION (ADDRSPACE TYPE *, TYPE); > > + _CLC_OVERLOAD _CLC_DECL TYPE __CLC_FUNCTION (volatile ADDRSPACE TYPE *, TYPE); > > > > __CLC_DECLARE_ATOM(__CLC_ADDRESS_SPACE, int) > > __CLC_DECLARE_ATOM(__CLC_ADDRESS_SPACE, uint) > > diff --git a/generic/include/clc/cl_khr_global_int32_base_atomics/atom_cmpxchg.h b/generic/include/clc/cl_khr_global_int32_base_atomics/atom_cmpxchg.h > > index 168f423..d84a9eb 100644 > > --- a/generic/include/clc/cl_khr_global_int32_base_atomics/atom_cmpxchg.h > > +++ b/generic/include/clc/cl_khr_global_int32_base_atomics/atom_cmpxchg.h > > @@ -1,2 +1,2 @@ > > -_CLC_OVERLOAD _CLC_DECL int atom_cmpxchg(global int *p, int cmp, int val); > > -_CLC_OVERLOAD _CLC_DECL unsigned int atom_cmpxchg(global unsigned int *p, unsigned int cmp, unsigned int val); > > +_CLC_OVERLOAD _CLC_DECL int atom_cmpxchg(volatile global int *p, int cmp, int val); > > +_CLC_OVERLOAD _CLC_DECL unsigned int atom_cmpxchg(volatile global unsigned int *p, unsigned int cmp, unsigned int val); > > diff --git a/generic/include/clc/cl_khr_global_int32_base_atomics/atom_dec.h b/generic/include/clc/cl_khr_global_int32_base_atomics/atom_dec.h > > index bbc872c..d3c1c45 100644 > > --- a/generic/include/clc/cl_khr_global_int32_base_atomics/atom_dec.h > > +++ b/generic/include/clc/cl_khr_global_int32_base_atomics/atom_dec.h > > @@ -1,2 +1,2 @@ > > -_CLC_OVERLOAD _CLC_DECL int atom_dec(global int *p); > > -_CLC_OVERLOAD _CLC_DECL unsigned int atom_dec(global unsigned int *p); > > +_CLC_OVERLOAD _CLC_DECL int atom_dec(volatile global int *p); > > +_CLC_OVERLOAD _CLC_DECL unsigned int atom_dec(volatile global unsigned int *p); > > diff --git a/generic/include/clc/cl_khr_global_int32_base_atomics/atom_inc.h b/generic/include/clc/cl_khr_global_int32_base_atomics/atom_inc.h > > index 050747c..712374d 100644 > > --- a/generic/include/clc/cl_khr_global_int32_base_atomics/atom_inc.h > > +++ b/generic/include/clc/cl_khr_global_int32_base_atomics/atom_inc.h > > @@ -1,2 +1,2 @@ > > -_CLC_OVERLOAD _CLC_DECL int atom_inc(global int *p); > > -_CLC_OVERLOAD _CLC_DECL unsigned int atom_inc(global unsigned int *p); > > +_CLC_OVERLOAD _CLC_DECL int atom_inc(volatile global int *p); > > +_CLC_OVERLOAD _CLC_DECL unsigned int atom_inc(volatile global unsigned int *p); > > diff --git a/generic/include/clc/cl_khr_local_int32_base_atomics/atom_cmpxchg.h b/generic/include/clc/cl_khr_local_int32_base_atomics/atom_cmpxchg.h > > index e10a84f..c967d36 100644 > > --- a/generic/include/clc/cl_khr_local_int32_base_atomics/atom_cmpxchg.h > > +++ b/generic/include/clc/cl_khr_local_int32_base_atomics/atom_cmpxchg.h > > @@ -1,2 +1,2 @@ > > -_CLC_OVERLOAD _CLC_DECL int atom_cmpxchg(local int *p, int cmp, int val); > > -_CLC_OVERLOAD _CLC_DECL unsigned int atom_cmpxchg(local unsigned int *p, unsigned int cmp, unsigned int val); > > +_CLC_OVERLOAD _CLC_DECL int atom_cmpxchg(volatile local int *p, int cmp, int val); > > +_CLC_OVERLOAD _CLC_DECL unsigned int atom_cmpxchg(volatile local unsigned int *p, unsigned int cmp, unsigned int val); > > diff --git a/generic/include/clc/cl_khr_local_int32_base_atomics/atom_dec.h b/generic/include/clc/cl_khr_local_int32_base_atomics/atom_dec.h > > index e74d8fc..7bb33c9 100644 > > --- a/generic/include/clc/cl_khr_local_int32_base_atomics/atom_dec.h > > +++ b/generic/include/clc/cl_khr_local_int32_base_atomics/atom_dec.h > > @@ -1,2 +1,2 @@ > > -_CLC_OVERLOAD _CLC_DECL int atom_dec(local int *p); > > -_CLC_OVERLOAD _CLC_DECL unsigned int atom_dec(local unsigned int *p); > > +_CLC_OVERLOAD _CLC_DECL int atom_dec(volatile local int *p); > > +_CLC_OVERLOAD _CLC_DECL unsigned int atom_dec(volatile local unsigned int *p); > > diff --git a/generic/include/clc/cl_khr_local_int32_base_atomics/atom_inc.h b/generic/include/clc/cl_khr_local_int32_base_atomics/atom_inc.h > > index 718f1f2..96e6f30 100644 > > --- a/generic/include/clc/cl_khr_local_int32_base_atomics/atom_inc.h > > +++ b/generic/include/clc/cl_khr_local_int32_base_atomics/atom_inc.h > > @@ -1,2 +1,2 @@ > > -_CLC_OVERLOAD _CLC_DECL int atom_inc(local int *p); > > -_CLC_OVERLOAD _CLC_DECL unsigned int atom_inc(local unsigned int *p); > > +_CLC_OVERLOAD _CLC_DECL int atom_inc(volatile local int *p); > > +_CLC_OVERLOAD _CLC_DECL unsigned int atom_inc(volatile local unsigned int *p); > > diff --git a/generic/lib/atom_int32_binary.inc b/generic/lib/atom_int32_binary.inc > > index 185144c..3af4c4b 100644 > > --- a/generic/lib/atom_int32_binary.inc > > +++ b/generic/lib/atom_int32_binary.inc > > @@ -2,7 +2,7 @@ > > #include "utils.h" > > > > #define __CLC_ATOM_IMPL(AS, TYPE) \ > > -_CLC_OVERLOAD _CLC_DEF TYPE __CLC_XCONCAT(atom_, __CLC_ATOMIC_OP) (AS TYPE *p, TYPE val) { \ > > +_CLC_OVERLOAD _CLC_DEF TYPE __CLC_XCONCAT(atom_, __CLC_ATOMIC_OP) (volatile AS TYPE *p, TYPE val) { \ > > return __CLC_XCONCAT(atomic_, __CLC_ATOMIC_OP) (p, val); \ > > } > > > > diff --git a/generic/lib/cl_khr_global_int32_base_atomics/atom_cmpxchg.cl b/generic/lib/cl_khr_global_int32_base_atomics/atom_cmpxchg.cl > > index 7647740..becf767 100644 > > --- a/generic/lib/cl_khr_global_int32_base_atomics/atom_cmpxchg.cl > > +++ b/generic/lib/cl_khr_global_int32_base_atomics/atom_cmpxchg.cl > > @@ -1,7 +1,7 @@ > > #include > > > > #define IMPL(TYPE) \ > > -_CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(global TYPE *p, TYPE cmp, TYPE val) { \ > > +_CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(volatile global TYPE *p, TYPE cmp, TYPE val) { \ > > return atomic_cmpxchg(p, cmp, val); \ > > } > > > > diff --git a/generic/lib/cl_khr_global_int32_base_atomics/atom_dec.cl b/generic/lib/cl_khr_global_int32_base_atomics/atom_dec.cl > > index 290bec2..22aeaf9 100644 > > --- a/generic/lib/cl_khr_global_int32_base_atomics/atom_dec.cl > > +++ b/generic/lib/cl_khr_global_int32_base_atomics/atom_dec.cl > > @@ -1,7 +1,7 @@ > > #include > > > > #define IMPL(TYPE) \ > > -_CLC_OVERLOAD _CLC_DEF TYPE atom_dec(global TYPE *p) { \ > > +_CLC_OVERLOAD _CLC_DEF TYPE atom_dec(volatile global TYPE *p) { \ > > return atomic_dec(p); \ > > } > > > > diff --git a/generic/lib/cl_khr_global_int32_base_atomics/atom_inc.cl b/generic/lib/cl_khr_global_int32_base_atomics/atom_inc.cl > > index 422bb78..2fd7102 100644 > > --- a/generic/lib/cl_khr_global_int32_base_atomics/atom_inc.cl > > +++ b/generic/lib/cl_khr_global_int32_base_atomics/atom_inc.cl > > @@ -1,7 +1,7 @@ > > #include > > > > #define IMPL(TYPE) \ > > -_CLC_OVERLOAD _CLC_DEF TYPE atom_inc(global TYPE *p) { \ > > +_CLC_OVERLOAD _CLC_DEF TYPE atom_inc(volatile global TYPE *p) { \ > > return atomic_inc(p); \ > > } > > > > diff --git a/generic/lib/cl_khr_local_int32_base_atomics/atom_cmpxchg.cl b/generic/lib/cl_khr_local_int32_base_atomics/atom_cmpxchg.cl > > index 16e9579..ea648f0 100644 > > --- a/generic/lib/cl_khr_local_int32_base_atomics/atom_cmpxchg.cl > > +++ b/generic/lib/cl_khr_local_int32_base_atomics/atom_cmpxchg.cl > > @@ -1,7 +1,7 @@ > > #include > > > > #define IMPL(TYPE) \ > > -_CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(local TYPE *p, TYPE cmp, TYPE val) { \ > > +_CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(volatile local TYPE *p, TYPE cmp, TYPE val) { \ > > return atomic_cmpxchg(p, cmp, val); \ > > } > > > > diff --git a/generic/lib/cl_khr_local_int32_base_atomics/atom_dec.cl b/generic/lib/cl_khr_local_int32_base_atomics/atom_dec.cl > > index d708a6d..33772a0 100644 > > --- a/generic/lib/cl_khr_local_int32_base_atomics/atom_dec.cl > > +++ b/generic/lib/cl_khr_local_int32_base_atomics/atom_dec.cl > > @@ -1,7 +1,7 @@ > > #include > > > > #define IMPL(TYPE) \ > > -_CLC_OVERLOAD _CLC_DEF TYPE atom_dec(local TYPE *p) { \ > > +_CLC_OVERLOAD _CLC_DEF TYPE atom_dec(volatile local TYPE *p) { \ > > return atomic_dec(p); \ > > } > > > > diff --git a/generic/lib/cl_khr_local_int32_base_atomics/atom_inc.cl b/generic/lib/cl_khr_local_int32_base_atomics/atom_inc.cl > > index 8878673..d91ff65 100644 > > --- a/generic/lib/cl_khr_local_int32_base_atomics/atom_inc.cl > > +++ b/generic/lib/cl_khr_local_int32_base_atomics/atom_inc.cl > > @@ -1,7 +1,7 @@ > > #include > > > > #define IMPL(TYPE) \ > > -_CLC_OVERLOAD _CLC_DEF TYPE atom_inc(local TYPE *p) { \ > > +_CLC_OVERLOAD _CLC_DEF TYPE atom_inc(volatile local TYPE *p) { \ > > return atomic_inc(p); \ > > } > > > > -- > > 2.16.4 > > > > _______________________________________________ > > Libclc-dev mailing list > > Libclc-dev at lists.llvm.org > > http://lists.llvm.org/cgi-bin/mailman/listinfo/libclc-dev > > _______________________________________________ > Libclc-dev mailing list > Libclc-dev at lists.llvm.org > http://lists.llvm.org/cgi-bin/mailman/listinfo/libclc-dev -- Jan Vesely -------------- next part -------------- A non-text attachment was scrubbed... Name: signature.asc Type: application/pgp-signature Size: 833 bytes Desc: This is a digitally signed message part URL: