[Libclc-dev] [PATCH v2 1/1] Implement generic mad_sat

Tue Aug 5 12:51:30 PDT 2014

On Tue, Aug 5, 2014 at 10:06 AM, Jan Vesely <jan.vesely at rutgers.edu> wrote:
> v2: Fix trailing whitespace
>     Fix signed long overflow
>     improve comment
>
> Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
> ---
>
> NOTE: Use http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20140804/229312.html to avoid llvm segfault
>
>  generic/include/clc/clc.h               |  1 +
>  generic/include/clc/integer/mad_sat.h   |  3 ++
>  generic/include/clc/integer/mad_sat.inc |  1 +
>  generic/lib/SOURCES                     |  1 +
>  generic/lib/clcmacro.h                  | 22 ++++++++++
>  generic/lib/integer/mad_sat.cl          | 72 +++++++++++++++++++++++++++++++++
>  6 files changed, 100 insertions(+)
>  create mode 100644 generic/include/clc/integer/mad_sat.h
>  create mode 100644 generic/include/clc/integer/mad_sat.inc
>  create mode 100644 generic/lib/integer/mad_sat.cl
>
> diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h
> index 9815c56..aca9b53 100644
> --- a/generic/include/clc/clc.h
> +++ b/generic/include/clc/clc.h
> @@ -82,6 +82,7 @@
>  #include <clc/integer/hadd.h>
>  #include <clc/integer/mad24.h>
>  #include <clc/integer/mad_hi.h>
> +#include <clc/integer/mad_sat.h>
>  #include <clc/integer/mul24.h>
>  #include <clc/integer/mul_hi.h>
>  #include <clc/integer/rhadd.h>
> diff --git a/generic/include/clc/integer/mad_sat.h b/generic/include/clc/integer/mad_sat.h
> new file mode 100644
> index 0000000..3e92372
> --- /dev/null
> +++ b/generic/include/clc/integer/mad_sat.h
> @@ -0,0 +1,3 @@
> +#define __CLC_BODY <clc/integer/mad_sat.inc>
> +#include <clc/integer/gentype.inc>
> +#undef __CLC_BODY
> diff --git a/generic/include/clc/integer/mad_sat.inc b/generic/include/clc/integer/mad_sat.inc
> new file mode 100644
> index 0000000..5da2bdf
> --- /dev/null
> +++ b/generic/include/clc/integer/mad_sat.inc
> @@ -0,0 +1 @@
> +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mad_sat(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z);
> diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
> index bfdec7b..7d3fa6b 100644
> --- a/generic/lib/SOURCES
> +++ b/generic/lib/SOURCES
> @@ -19,6 +19,7 @@ integer/clz_if.ll
>  integer/clz_impl.ll
>  integer/hadd.cl
>  integer/mad24.cl
> +integer/mad_sat.cl
>  integer/mul24.cl
>  integer/mul_hi.cl
>  integer/rhadd.cl
> diff --git a/generic/lib/clcmacro.h b/generic/lib/clcmacro.h
> index 730073a..ef102ea 100644
> --- a/generic/lib/clcmacro.h
> +++ b/generic/lib/clcmacro.h
> @@ -41,6 +41,28 @@
>      return (RET_TYPE##16)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \
>    }
>
> +#define _CLC_TERNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE, ARG3_TYPE) \
> +  DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y, ARG3_TYPE##2 z) { \
> +    return (RET_TYPE##2)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y)); \
> +  } \
> +\
> +  DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y, ARG3_TYPE##3 z) { \
> +    return (RET_TYPE##3)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y), \
> +                         FUNCTION(x.z, y.z, z.z)); \
> +  } \
> +\
> +  DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y, ARG3_TYPE##4 z) { \
> +    return (RET_TYPE##4)(FUNCTION(x.lo, y.lo, z.lo), FUNCTION(x.hi, y.hi, z.hi)); \
> +  } \
> +\
> +  DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y, ARG3_TYPE##8 z) { \
> +    return (RET_TYPE##8)(FUNCTION(x.lo, y.lo, z.lo), FUNCTION(x.hi, y.hi, z.hi)); \
> +  } \
> +\
> +  DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 y, ARG3_TYPE##16 z) { \
> +    return (RET_TYPE##16)(FUNCTION(x.lo, y.lo, z.lo), FUNCTION(x.hi, y.hi, z.hi)); \
> +  }
> +
>  #define _CLC_DEFINE_BINARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, ARG2_TYPE) \
>  _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
>    return BUILTIN(x, y); \
> diff --git a/generic/lib/integer/mad_sat.cl b/generic/lib/integer/mad_sat.cl
> new file mode 100644
> index 0000000..7048931
> --- /dev/null
> +++ b/generic/lib/integer/mad_sat.cl
> @@ -0,0 +1,72 @@
> +#include <clc/clc.h>
> +#include "../clcmacro.h"
> +
> +_CLC_OVERLOAD _CLC_DEF char mad_sat(char x, char y, char z) {
> +  return clamp((short)mad24((short)x, (short)y, (short)z), (short)CHAR_MIN, (short) CHAR_MAX);
> +}
> +
> +_CLC_OVERLOAD _CLC_DEF uchar mad_sat(uchar x, uchar y, uchar z) {
> +  return clamp((ushort)mad24((ushort)x, (ushort)y, (ushort)z), (ushort)0, (ushort) UCHAR_MAX);
> +}
> +
> +_CLC_OVERLOAD _CLC_DEF short mad_sat(short x, short y, short z) {
> +  return clamp((int)mad24((int)x, (int)y, (int)z), (int)SHRT_MIN, (int) SHRT_MAX);
> +}
> +
> +_CLC_OVERLOAD _CLC_DEF ushort mad_sat(ushort x, ushort y, ushort z) {
> +  return clamp((uint)mad24((uint)x, (uint)y, (uint)z), (uint)0, (uint) USHRT_MAX);
> +}
> +
> +_CLC_OVERLOAD _CLC_DEF int mad_sat(int x, int y, int z) {
> +  int mhi = mul_hi(x, y);
> +  uint mlo = x * y;
> +  long m = upsample(mhi, mlo);
> +  m += z;
> +  if (m > INT_MAX)
> +    return INT_MAX;
> +  if (m < INT_MIN)
> +    return INT_MIN;
> +  return m;
> +}
> +
> +_CLC_OVERLOAD _CLC_DEF uint mad_sat(uint x, uint y, uint z) {
> +  if (mul_hi(x, y) != 0)
> +    return UINT_MAX;
> +  return add_sat(x * y, z);
> +}
> +
> +_CLC_OVERLOAD _CLC_DEF long mad_sat(long x, long y, long z) {
> +  long hi = mul_hi(x, y);
> +  ulong ulo = x * y;
> +  long  slo = x * y;
> +  /* Big overflow of more than 2 bits, add can't fix this */
> +  if (((x < 0) == (y < 0)) && hi != 0)
> +    return LONG_MAX;
> +  /* Low overflow in mul and z not neg enough to correct it */
> +  if (hi == 0 && ulo >= LONG_MAX && (z > 0 || (ulo + z) > LONG_MAX))
> +    return LONG_MAX;
> +  /* Big overflow of more than 2 bits, add can't fix this */
> +  if (((x < 0) != (y < 0)) && hi != -1)
> +    return LONG_MIN;
> +  /* Low overflow in mul and z not pos enough to correct it */
> +  if (hi == -1 && ulo <= ((ulong)LONG_MAX + 1UL) && (z < 0 || z < (LONG_MAX - ulo)))
> +    return LONG_MIN;
> +  /* We have checked all conditions, any overflow in addtion returns

s/addtion/addition/

The code returns all test passes on evergreen using the piglit unit
tests for mad_sat (all data types) after I applied your LLVM
workaround.

Otherwise, I'm trying to find out if there's any redundant boolean
logic in the above bits. Especially I'm wondering if we need to keep
checking if hi is greater than, less than, equal to, or not equal to 0
and -1, or if we can just pull that part out to a quick overflow test
at the beginning.

Either way, I've successfully tested this version of the code with
your LLVM FlattenCFG.cpp patch and gotten successful unit test passes
on CEDAR (Radeon 5400).  I believe that radeonsi will probably still
fail due to the ulong instruction selection issue that I noted
yesterday (unless the FlattenCFG change also affects this in a
slightly different way), but that doesn't seem like an issue with this
patch so much as the back-end.

--Aaron

> +   * the correct value */
> +  return ulo + z;
> +}
> +
> +_CLC_OVERLOAD _CLC_DEF ulong mad_sat(ulong x, ulong y, ulong z) {
> +  if (mul_hi(x, y) != 0)
> +    return ULONG_MAX;
> +  return add_sat(x * y, z);
> +}
> +
> +_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, char, mad_sat, char, char, char)
> +_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uchar, mad_sat, uchar, uchar, uchar)
> +_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, short, mad_sat, short, short, short)
> +_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ushort, mad_sat, ushort, ushort, ushort)
> +_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, int, mad_sat, int, int, int)
> +_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, uint, mad_sat, uint, uint, uint)
> +_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, long, mad_sat, long, long, long)
> +_CLC_TERNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, ulong, mad_sat, ulong, ulong, ulong)
> --
> 1.9.3
>