[Libclc-dev] [PATCH 1/1] Implement generic mad_sat
Jan Vesely
jan.vesely at rutgers.edu
Tue Sep 2 11:34:55 PDT 2014
On Tue, 2014-09-02 at 13:25 -0500, Aaron Watry wrote:
> With Tom's LLVM patch applied, the mad_sat long/ulong tests pass for
> me on both evergreen (cedar) and SI (pitcairn).
>
> I think we're good here.
Thanks to both of you. I pushed the patch.
jan
>
> --Aaron
>
> On Tue, Sep 2, 2014 at 12:23 PM, Tom Stellard <tom at stellard.net> wrote:
> > On Tue, Sep 02, 2014 at 12:12:13PM -0500, Aaron Watry wrote:
> >> I'm still getting:
> >>
> >> LLVM ERROR: Cannot select: 0x1b1a5b0: i64 = and 0x1b1af90, 0x115bbc0
> >> [ORD=19] [ID=83]
> >> 0x1b1af90: i64 = any_extend 0x1b1bae8 [ORD=18] [ID=82]
> >> 0x1b1bae8: i32 = and 0x11609d0, 0x115ce78 [ORD=18] [ID=80]
> >> 0x11609d0: i32 = mul 0x1161cf8, 0x1b16508 [ORD=10] [ID=51]
> >> 0x1161cf8: i32 = truncate 0x1b198b8 [ORD=10] [ID=37]
> >> 0x1b198b8: i64,ch = CopyFromReg 0x1b1abe0:1, 0x1b19288 [ORD=4] [ID=32]
> >> 0x1b19288: i64 = Register %vreg3 [ID=8]
> >> 0x1b16508: i32 = truncate 0x115e098 [ORD=9] [ID=43]
> >> 0x115e098: i64 = sra 0x1164e40, 0x115fff0 [ORD=5] [ID=35]
> >> <snip>
> >>
> >
> > This is a bug in the backend. I have a local patch I will send to llvm-commits soon.
> >
> > -Tom
> >
> >> Let me at least do a full clean + rebuild and then try again before pushing.
> >>
> >> --Aaron
> >>
> >> On Tue, Sep 2, 2014 at 12:05 PM, Tom Stellard <tom at stellard.net> wrote:
> >> > On Mon, Sep 01, 2014 at 03:10:04PM -0400, Jan Vesely wrote:
> >> >> v2: Fix trailing whitespace
> >> >> Fix signed long overflow
> >> >> improve comment
> >> >>
> >> >> v3: fix typo
> >> >>
> >> >> Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
> >> >> ---
> >> >>
> >> >> Hi Aaron,
> >> >> any luck running these on SI?
> >> >
> >> > I just tested on SI. LGTM.
> >> >
> >> > -Tom
> >> >
> >> >>
> >> >> thanks,
> >> >> jan
> >> >>
> >> >> generic/include/clc/clc.h | 1 +
> >> >> generic/include/clc/integer/mad_sat.h | 3 ++
> >> >> generic/include/clc/integer/mad_sat.inc | 1 +
> >> >> generic/lib/SOURCES | 1 +
> >> >> generic/lib/clcmacro.h | 22 ++++++++++
> >> >> generic/lib/integer/mad_sat.cl | 72 +++++++++++++++++++++++++++++++++
> >> >> 6 files changed, 100 insertions(+)
> >> >> create mode 100644 generic/include/clc/integer/mad_sat.h
> >> >> create mode 100644 generic/include/clc/integer/mad_sat.inc
> >> >> create mode 100644 generic/lib/integer/mad_sat.cl
> >> >>
> >> >> diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h
> >> >> index 84b25ac..20664f9 100644
> >> >> --- a/generic/include/clc/clc.h
> >> >> +++ b/generic/include/clc/clc.h
> >> >> @@ -82,6 +82,7 @@
> >> >> #include <clc/integer/hadd.h>
> >> >> #include <clc/integer/mad24.h>
> >> >> #include <clc/integer/mad_hi.h>
> >> >> +#include <clc/integer/mad_sat.h>
> >> >> #include <clc/integer/mul24.h>
> >> >> #include <clc/integer/mul_hi.h>
> >> >> #include <clc/integer/rhadd.h>
> >> >> diff --git a/generic/include/clc/integer/mad_sat.h b/generic/include/clc/integer/mad_sat.h
> >> >> new file mode 100644
> >> >> index 0000000..3e92372
> >> >> --- /dev/null
> >> >> +++ b/generic/include/clc/integer/mad_sat.h
> >> >> @@ -0,0 +1,3 @@
> >> >> +#define __CLC_BODY <clc/integer/mad_sat.inc>
> >> >> +#include <clc/integer/gentype.inc>
> >> >> +#undef __CLC_BODY
> >> >> diff --git a/generic/include/clc/integer/mad_sat.inc b/generic/include/clc/integer/mad_sat.inc
> >> >> new file mode 100644
> >> >> index 0000000..5da2bdf
> >> >> --- /dev/null
> >> >> +++ b/generic/include/clc/integer/mad_sat.inc
> >> >> @@ -0,0 +1 @@
> >> >> +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mad_sat(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z);
> >> >> diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
> >> >> index 21fc7ca..5ad823d 100644
> >> >> --- a/generic/lib/SOURCES
> >> >> +++ b/generic/lib/SOURCES
> >> >> @@ -20,6 +20,7 @@ integer/clz_if.ll
> >> >> integer/clz_impl.ll
> >> >> integer/hadd.cl
> >> >> integer/mad24.cl
> >> >> +integer/mad_sat.cl
> >> >> integer/mul24.cl
> >> >> integer/mul_hi.cl
> >> >> integer/rhadd.cl
> >> >> diff --git a/generic/lib/clcmacro.h b/generic/lib/clcmacro.h
> >> >> index 730073a..ef102ea 100644
> >> >> --- a/generic/lib/clcmacro.h
> >> >> +++ b/generic/lib/clcmacro.h
> >> >> @@ -41,6 +41,28 @@
> >> >> return (RET_TYPE##16)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \
> >> >> }
> >> >>
> >> >> +#define _CLC_TERNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE, ARG3_TYPE) \
> >> >> + DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y, ARG3_TYPE##2 z) { \
> >> >> + return (RET_TYPE##2)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y)); \
> >> >> + } \
> >> >> +\
> >> >> + DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y, ARG3_TYPE##3 z) { \
> >> >> + return (RET_TYPE##3)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y), \
> >> >> + FUNCTION(x.z, y.z, z.z)); \
> >> >> + } \
> >> >> +\
> >> >> + DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y, ARG3_TYPE##4 z) { \
> >> >> + return (RET_TYPE##4)(FUNCTION(x.lo, y.lo, z.lo), FUNCTION(x.hi, y.hi, z.hi)); \
> >> >> + } \
> >> >> +\
> >> >> + DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y, ARG3_TYPE##8 z) { \
> >> >> + return (RET_TYPE##8)(FUNCTION(x.lo, y.lo, z.lo), FUNCTION(x.hi, y.hi, z.hi)); \
> >> >> + } \
> >> >> +\
> >> >> + DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 y, ARG3_TYPE##16 z) { \
> >> >> + return (RET_TYPE##16)(FUNCTION(x.lo, y.lo, z.lo), FUNCTION(x.hi, y.hi, z.hi)); \
> >> >> + }
> >> >> +
> >> >> #define _CLC_DEFINE_BINARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, ARG2_TYPE) \
> >> >> _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
> >> >> return BUILTIN(x, y); \
> >> >> diff --git a/generic/lib/integer/mad_sat.cl b/generic/lib/integer/mad_sat.cl
> >> >> new file mode 100644
> >> >> index 0000000..1708b29
> >> >> --- /dev/null
> >> >> +++ b/generic/lib/integer/mad_sat.cl
> >> >> @@ -0,0 +1,72 @@
> >> >> +#include <clc/clc.h>
> >> >> +#include "../clcmacro.h"
> >> >> +
> >> >> +_CLC_OVERLOAD _CLC_DEF char mad_sat(char x, char y, char z) {
> >> >> + return clamp((short)mad24((short)x, (short)y, (short)z), (short)CHAR_MIN, (short) CHAR_MAX);
> >> >> +}
> >> >> +
> >> >> +_CLC_OVERLOAD _CLC_DEF uchar mad_sat(uchar x, uchar y, uchar z) {
> >> >> + return clamp((ushort)mad24((ushort)x, (ushort)y, (ushort)z), (ushort)0, (ushort) UCHAR_MAX);
> >> >> +}
> >> >> +
> >> >> +_CLC_OVERLOAD _CLC_DEF short mad_sat(short x, short y, short z) {
> >> >> + return clamp((int)mad24((int)x, (int)y, (int)z), (int)SHRT_MIN, (int) SHRT_MAX);
> >> >> +}
> >> >> +
> >> >> +_CLC_OVERLOAD _CLC_DEF ushort mad_sat(ushort x, ushort y, ushort z) {
> >> >> + return clamp((uint)mad24((uint)x, (uint)y, (uint)z), (uint)0, (uint) USHRT_MAX);
> >> >> +}
> >> >> +
> >> >> +_CLC_OVERLOAD _CLC_DEF int mad_sat(int x, int y, int z) {
> >> >> + int mhi = mul_hi(x, y);
> >> >> + uint mlo = x * y;
> >> >> + long m = upsample(mhi, mlo);
> >> >> + m += z;
> >> >> + if (m > INT_MAX)
> >> >> + return INT_MAX;
> >> >> + if (m < INT_MIN)
> >> >> + return INT_MIN;
> >> >> + return m;
> >> >> +}
> >> >> +
> >> >> +_CLC_OVERLOAD _CLC_DEF uint mad_sat(uint x, uint y, uint z) {
> >> >> + if (mul_hi(x, y) != 0)
> >> >> + return UINT_MAX;
> >> >> + return add_sat(x * y, z);
> >> >> +}
> >> >> +
> >> >> +_CLC_OVERLOAD _CLC_DEF long mad_sat(long x, long y, long z) {
> >> >> + long hi = mul_hi(x, y);
> >> >> + ulong ulo = x * y;
> >> >> + long slo = x * y;
> >> >> + /* Big overflow of more than 2 bits, add can't fix this */
> >> >> + if (((x < 0) == (y < 0)) && hi != 0)
> >> >> + return LONG_MAX;
> >> >> + /* Low overflow in mul and z not neg enough to correct it */
> >> >> + if (hi == 0 && ulo >= LONG_MAX && (z > 0 || (ulo + z) > LONG_MAX))
> >> >> + return LONG_MAX;
> >> >> + /* Big overflow of more than 2 bits, add can't fix this */
> >> >> + if (((x < 0) != (y < 0)) && hi != -1)
> >> >> + return LONG_MIN;
> >> >> + /* Low overflow in mul and z not pos enough to correct it */
> >> >> + if (hi == -1 && ulo <= ((ulong)LONG_MAX + 1UL) && (z < 0 || z < (LONG_MAX - ulo)))
> >> >> + return LONG_MIN;
> >> >> + /* We have checked all conditions, any overflow in addition returns
> >> >> + * the correct value */
> >> >> + return ulo + z;
> >> >> +}
> >> >> +
> >> >> +_CLC_OVERLOAD _CLC_DEF ulong mad_sat(ulong x, ulong y, ulong z) {
> >> >> + if (mul_hi(x, y) != 0)
> >> >> + return ULONG_MAX;
> >> >> + return add_sat(x * y, z);
> >> >> +}
> >> >> +
> >> >> +_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, mad_sat, char, char, char)
> >> >> +_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, mad_sat, uchar, uchar, uchar)
> >> >> +_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, mad_sat, short, short, short)
> >> >> +_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, mad_sat, ushort, ushort, ushort)
> >> >> +_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, mad_sat, int, int, int)
> >> >> +_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, mad_sat, uint, uint, uint)
> >> >> +_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, mad_sat, long, long, long)
> >> >> +_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, mad_sat, ulong, ulong, ulong)
> >> >> --
> >> >> 1.9.3
> >> >>
> >> >>
> >> >> _______________________________________________
> >> >> Libclc-dev mailing list
> >> >> Libclc-dev at pcc.me.uk
> >> >> http://www.pcc.me.uk/cgi-bin/mailman/listinfo/libclc-dev
> >> >
> >> > _______________________________________________
> >> > Libclc-dev mailing list
> >> > Libclc-dev at pcc.me.uk
> >> > http://www.pcc.me.uk/cgi-bin/mailman/listinfo/libclc-dev
--
Jan Vesely <jan.vesely at rutgers.edu>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part
URL: <http://lists.llvm.org/pipermail/libclc-dev/attachments/20140902/6cfd242b/attachment.sig>
More information about the Libclc-dev
mailing list