[Libclc-dev] [PATCH 1/1] Add initial support for half precision builtins

Jan Vesely via Libclc-dev libclc-dev at lists.llvm.org
Thu May 10 11:44:35 PDT 2018


Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
---
I plan to implement the remaining routines by promoting to float and
back, but convert routines need to be added first (and the failing ones
fixed).
This should be enough to fix
https://bugs.freedesktop.org/show_bug.cgi?id=96897

 amdgcn/lib/math/fmax.cl                     | 15 +++++
 amdgcn/lib/math/fmin.cl                     | 15 +++++
 amdgpu/lib/math/nextafter.cl                |  5 ++
 amdgpu/lib/math/sqrt.cl                     |  7 +++
 generic/include/clc/as_type.h               |  9 +++
 generic/include/clc/async/gentype.inc       | 25 +++++++++
 generic/include/clc/float/definitions.h     |  5 ++
 generic/include/clc/geometric/floatn.inc    | 31 +++++++++++
 generic/include/clc/math/binary_intrin.inc  | 10 ++++
 generic/include/clc/math/gentype.inc        | 62 ++++++++++++++++++++-
 generic/include/clc/math/nan.inc            |  4 +-
 generic/include/clc/math/ternary_intrin.inc | 11 ++++
 generic/include/clc/math/unary_intrin.inc   | 10 ++++
 generic/include/clc/relational/floatn.inc   | 41 ++++++++++++++
 generic/include/clc/relational/isequal.h    |  4 ++
 generic/include/clc/relational/isinf.h      |  5 ++
 generic/include/clc/relational/isnan.h      |  5 ++
 generic/include/math/clc_ldexp.h            |  4 ++
 generic/lib/geometric/dot.cl                | 22 ++++++++
 generic/lib/geometric/length.cl             | 42 +++++++++++++-
 generic/lib/math/acos.inc                   |  9 ++-
 generic/lib/math/asin.inc                   | 10 +++-
 generic/lib/math/clc_nextafter.cl           |  7 +++
 generic/lib/math/clc_sqrt_impl.inc          |  3 +
 generic/lib/math/clc_sw_binary.inc          |  2 +
 generic/lib/math/clc_sw_unary.inc           |  2 +
 generic/lib/math/copysign.cl                | 15 +++++
 generic/lib/math/fmax.cl                    | 16 ++++++
 generic/lib/math/fmax.inc                   | 10 ++++
 generic/lib/math/fmin.cl                    | 15 +++++
 generic/lib/math/fmin.inc                   | 10 ++++
 generic/lib/math/fract.inc                  | 10 +++-
 generic/lib/math/ldexp.cl                   |  6 ++
 generic/lib/math/ldexp.inc                  |  5 ++
 generic/lib/math/lgamma_r.inc               |  4 +-
 generic/lib/math/modf.inc                   |  2 +-
 generic/lib/math/nan.inc                    |  9 ++-
 generic/lib/math/pown.inc                   |  2 +
 generic/lib/math/remquo.inc                 |  2 +
 generic/lib/math/rootn.inc                  |  2 +
 generic/lib/math/sincos.inc                 |  3 +
 generic/lib/math/sqrt.cl                    |  8 +++
 generic/lib/relational/isequal.cl           | 16 +++++-
 generic/lib/relational/isfinite.cl          | 13 +++++
 generic/lib/relational/isgreater.cl         | 15 +++++
 generic/lib/relational/isgreaterequal.cl    | 14 +++++
 generic/lib/relational/isinf.cl             | 12 ++++
 generic/lib/relational/isless.cl            | 14 +++++
 generic/lib/relational/islessequal.cl       | 14 +++++
 generic/lib/relational/islessgreater.cl     | 14 +++++
 generic/lib/relational/isnan.cl             | 14 +++++
 generic/lib/relational/isnormal.cl          | 13 +++++
 generic/lib/relational/isnotequal.cl        | 10 ++++
 generic/lib/relational/isordered.cl         | 10 ++++
 generic/lib/relational/isunordered.cl       | 14 +++++
 generic/lib/relational/signbit.cl           | 14 +++++
 generic/lib/shared/vstore_half.inc          |  2 +
 57 files changed, 660 insertions(+), 13 deletions(-)

diff --git a/amdgcn/lib/math/fmax.cl b/amdgcn/lib/math/fmax.cl
index c1e6cb9..226d48b 100644
--- a/amdgcn/lib/math/fmax.cl
+++ b/amdgcn/lib/math/fmax.cl
@@ -25,6 +25,21 @@ _CLC_DEF _CLC_OVERLOAD double fmax(double x, double y)
 }
 _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, fmax, double, double)
 
+#endif
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_DEF _CLC_OVERLOAD half fmax(half x, half y)
+{
+   if (isnan(x))
+      return y;
+   if (isnan(y))
+      return x;
+   return (y < x) ? y : x;
+}
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, fmax, half, half)
+
 #endif
 
 #define __CLC_BODY <../../../generic/lib/math/fmax.inc>
diff --git a/amdgcn/lib/math/fmin.cl b/amdgcn/lib/math/fmin.cl
index 6bc2fa5..35dea8b 100644
--- a/amdgcn/lib/math/fmin.cl
+++ b/amdgcn/lib/math/fmin.cl
@@ -25,6 +25,21 @@ _CLC_DEF _CLC_OVERLOAD double fmin(double x, double y)
 }
 _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, fmin, double, double)
 
+#endif
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_DEF _CLC_OVERLOAD half fmin(half x, half y)
+{
+   if (isnan(x))
+      return y;
+   if (isnan(y))
+      return x;
+   return (y < x) ? y : x;
+}
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, fmin, half, half)
+
 #endif
 
 #define __CLC_BODY <../../../generic/lib/math/fmin.inc>
diff --git a/amdgpu/lib/math/nextafter.cl b/amdgpu/lib/math/nextafter.cl
index 5b4521d..b290da0 100644
--- a/amdgpu/lib/math/nextafter.cl
+++ b/amdgpu/lib/math/nextafter.cl
@@ -8,3 +8,8 @@ _CLC_DEFINE_BINARY_BUILTIN(float, nextafter, __clc_nextafter, float, float)
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 _CLC_DEFINE_BINARY_BUILTIN(double, nextafter, __clc_nextafter, double, double)
 #endif
+
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+_CLC_DEFINE_BINARY_BUILTIN(half, nextafter, __clc_nextafter, half, half)
+#endif
diff --git a/amdgpu/lib/math/sqrt.cl b/amdgpu/lib/math/sqrt.cl
index 395a3f9..5562600 100644
--- a/amdgpu/lib/math/sqrt.cl
+++ b/amdgpu/lib/math/sqrt.cl
@@ -26,6 +26,13 @@
 
 _CLC_DEFINE_UNARY_BUILTIN(float, sqrt, __clc_sqrt, float)
 
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+_CLC_DEFINE_UNARY_BUILTIN(half, sqrt, __clc_sqrt, half)
+
+#endif
+
 #ifdef cl_khr_fp64
 
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
diff --git a/generic/include/clc/as_type.h b/generic/include/clc/as_type.h
index 0bb9ee2..1bc76b0 100644
--- a/generic/include/clc/as_type.h
+++ b/generic/include/clc/as_type.h
@@ -66,3 +66,12 @@
 #define as_double8(x) __builtin_astype(x, double8)
 #define as_double16(x) __builtin_astype(x, double16)
 #endif
+
+#ifdef cl_khr_fp16
+#define as_half(x) __builtin_astype(x, half)
+#define as_half2(x) __builtin_astype(x, half2)
+#define as_half3(x) __builtin_astype(x, half3)
+#define as_half4(x) __builtin_astype(x, half4)
+#define as_half8(x) __builtin_astype(x, half8)
+#define as_half16(x) __builtin_astype(x, half16)
+#endif
diff --git a/generic/include/clc/async/gentype.inc b/generic/include/clc/async/gentype.inc
index c08bb6f..09d465f 100644
--- a/generic/include/clc/async/gentype.inc
+++ b/generic/include/clc/async/gentype.inc
@@ -204,4 +204,29 @@
 
 #endif
 
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16: enable
+
+#define __CLC_GENTYPE half
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE half2
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE half4
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE half8
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#define __CLC_GENTYPE half16
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+
+#endif
+
 #undef __CLC_BODY
diff --git a/generic/include/clc/float/definitions.h b/generic/include/clc/float/definitions.h
index 84a102e..7141ba8 100644
--- a/generic/include/clc/float/definitions.h
+++ b/generic/include/clc/float/definitions.h
@@ -80,6 +80,11 @@
 #define HALF_MIN_10_EXP -4
 #define HALF_MIN_EXP    -13
 
+#define HALF_RADIX 	2
+#define HALF_MAX 	0x1.ffcp15h
+#define HALF_MIN 	0x1.0p-14h
+#define HALF_EPSILON 	0x1.0p-10h
+
 #endif
 
 #endif
diff --git a/generic/include/clc/geometric/floatn.inc b/generic/include/clc/geometric/floatn.inc
index f1ec38c..49c797f 100644
--- a/generic/include/clc/geometric/floatn.inc
+++ b/generic/include/clc/geometric/floatn.inc
@@ -53,4 +53,35 @@
 #endif
 #endif
 
+#ifndef __FLOAT_ONLY
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+#define __CLC_FLOAT half
+#define __CLC_FPSIZE 16
+
+#define __CLC_FLOATN half
+#define __CLC_SCALAR
+#include __CLC_BODY
+#undef __CLC_FLOATN
+#undef __CLC_SCALAR
+
+#define __CLC_FLOATN half2
+#include __CLC_BODY
+#undef __CLC_FLOATN
+
+#define __CLC_FLOATN half3
+#include __CLC_BODY
+#undef __CLC_FLOATN
+
+#define __CLC_FLOATN half4
+#include __CLC_BODY
+#undef __CLC_FLOATN
+
+#undef __CLC_FLOAT
+#undef __CLC_FPSIZE
+
+#endif
+#endif
+
 #undef __CLC_BODY
diff --git a/generic/include/clc/math/binary_intrin.inc b/generic/include/clc/math/binary_intrin.inc
index dd4440a..d1a3cae 100644
--- a/generic/include/clc/math/binary_intrin.inc
+++ b/generic/include/clc/math/binary_intrin.inc
@@ -15,5 +15,15 @@ _CLC_OVERLOAD double8 __CLC_FUNCTION(double8, double8) __asm(__CLC_INTRINSIC ".v
 _CLC_OVERLOAD double16 __CLC_FUNCTION(double16, double16) __asm(__CLC_INTRINSIC ".v16f64");
 #endif
 
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+_CLC_OVERLOAD half __CLC_FUNCTION(half, half) __asm(__CLC_INTRINSIC ".f16");
+_CLC_OVERLOAD half2 __CLC_FUNCTION(half2, half2) __asm(__CLC_INTRINSIC ".v2f16");
+_CLC_OVERLOAD half3 __CLC_FUNCTION(half3, half3) __asm(__CLC_INTRINSIC ".v3f16");
+_CLC_OVERLOAD half4 __CLC_FUNCTION(half4, half4) __asm(__CLC_INTRINSIC ".v4f16");
+_CLC_OVERLOAD half8 __CLC_FUNCTION(half8, half8) __asm(__CLC_INTRINSIC ".v8f16");
+_CLC_OVERLOAD half16 __CLC_FUNCTION(half16, half16) __asm(__CLC_INTRINSIC ".v16f16");
+#endif
+
 #undef __CLC_FUNCTION
 #undef __CLC_INTRINSIC
diff --git a/generic/include/clc/math/gentype.inc b/generic/include/clc/math/gentype.inc
index 954cd00..966b426 100644
--- a/generic/include/clc/math/gentype.inc
+++ b/generic/include/clc/math/gentype.inc
@@ -110,6 +110,66 @@
 #undef __CLC_FPSIZE
 #undef __CLC_SCALAR_GENTYPE
 #endif
+#endif
 
-#undef __CLC_BODY
+#ifndef __FLOAT_ONLY
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+#define __CLC_SCALAR_GENTYPE half
+#define __CLC_FPSIZE 16
+
+#define __CLC_SCALAR
+#define __CLC_GENTYPE half
+#define __CLC_INTN int
+#include __CLC_BODY
+#undef __CLC_GENTYPE
+#undef __CLC_INTN
+#undef __CLC_SCALAR
+
+#define __CLC_GENTYPE half2
+#define __CLC_INTN int2
+#define __CLC_VECSIZE 2
+#include __CLC_BODY
+#undef __CLC_VECSIZE
+#undef __CLC_GENTYPE
+#undef __CLC_INTN
+
+#define __CLC_GENTYPE half3
+#define __CLC_INTN int3
+#define __CLC_VECSIZE 3
+#include __CLC_BODY
+#undef __CLC_VECSIZE
+#undef __CLC_GENTYPE
+#undef __CLC_INTN
+
+#define __CLC_GENTYPE half4
+#define __CLC_INTN int4
+#define __CLC_VECSIZE 4
+#include __CLC_BODY
+#undef __CLC_VECSIZE
+#undef __CLC_GENTYPE
+#undef __CLC_INTN
+
+#define __CLC_GENTYPE half8
+#define __CLC_INTN int8
+#define __CLC_VECSIZE 8
+#include __CLC_BODY
+#undef __CLC_VECSIZE
+#undef __CLC_GENTYPE
+#undef __CLC_INTN
+
+#define __CLC_GENTYPE half16
+#define __CLC_INTN int16
+#define __CLC_VECSIZE 16
+#include __CLC_BODY
+#undef __CLC_VECSIZE
+#undef __CLC_GENTYPE
+#undef __CLC_INTN
+
+#undef __CLC_FPSIZE
+#undef __CLC_SCALAR_GENTYPE
 #endif
+#endif
+
+#undef __CLC_BODY
diff --git a/generic/include/clc/math/nan.inc b/generic/include/clc/math/nan.inc
index bf36ed2..20199ed 100644
--- a/generic/include/clc/math/nan.inc
+++ b/generic/include/clc/math/nan.inc
@@ -4,8 +4,10 @@
 
 #if __CLC_FPSIZE == 64
 #define __CLC_NATN __CLC_XCONCAT(ulong, __CLC_VECSIZE)
-#else
+#elif __CLC_FPSIZE == 32
 #define __CLC_NATN __CLC_XCONCAT(uint, __CLC_VECSIZE)
+#else
+#define __CLC_NATN __CLC_XCONCAT(ushort, __CLC_VECSIZE)
 #endif
 
 _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE nan(__CLC_NATN code);
diff --git a/generic/include/clc/math/ternary_intrin.inc b/generic/include/clc/math/ternary_intrin.inc
index 58ce36b..b384b26 100644
--- a/generic/include/clc/math/ternary_intrin.inc
+++ b/generic/include/clc/math/ternary_intrin.inc
@@ -15,5 +15,16 @@ _CLC_OVERLOAD double8 __CLC_FUNCTION(double8, double8, double8) __asm(__CLC_INTR
 _CLC_OVERLOAD double16 __CLC_FUNCTION(double16, double16, double16) __asm(__CLC_INTRINSIC ".v16f64");
 #endif
 
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16: enable
+_CLC_OVERLOAD half __CLC_FUNCTION(half, half, half) __asm(__CLC_INTRINSIC ".f16");
+_CLC_OVERLOAD half2 __CLC_FUNCTION(half2, half2, half2) __asm(__CLC_INTRINSIC ".v2f16");
+_CLC_OVERLOAD half3 __CLC_FUNCTION(half3, half3, half3) __asm(__CLC_INTRINSIC ".v3f16");
+_CLC_OVERLOAD half4 __CLC_FUNCTION(half4, half4, half4) __asm(__CLC_INTRINSIC ".v4f16");
+_CLC_OVERLOAD half8 __CLC_FUNCTION(half8, half8, half8) __asm(__CLC_INTRINSIC ".v8f16");
+_CLC_OVERLOAD half16 __CLC_FUNCTION(half16, half16, half16) __asm(__CLC_INTRINSIC ".v16f16");
+#endif
+
+
 #undef __CLC_FUNCTION
 #undef __CLC_INTRINSIC
diff --git a/generic/include/clc/math/unary_intrin.inc b/generic/include/clc/math/unary_intrin.inc
index 8e42109..532bb1f 100644
--- a/generic/include/clc/math/unary_intrin.inc
+++ b/generic/include/clc/math/unary_intrin.inc
@@ -15,5 +15,15 @@ _CLC_OVERLOAD double8 __CLC_FUNCTION(double8 d) __asm(__CLC_INTRINSIC ".v8f64");
 _CLC_OVERLOAD double16 __CLC_FUNCTION(double16 d) __asm(__CLC_INTRINSIC ".v16f64");
 #endif
 
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16: enable
+_CLC_OVERLOAD half __CLC_FUNCTION(half d) __asm(__CLC_INTRINSIC ".f16");
+_CLC_OVERLOAD half2 __CLC_FUNCTION(half2 d) __asm(__CLC_INTRINSIC ".v2f16");
+_CLC_OVERLOAD half3 __CLC_FUNCTION(half3 d) __asm(__CLC_INTRINSIC ".v3f16");
+_CLC_OVERLOAD half4 __CLC_FUNCTION(half4 d) __asm(__CLC_INTRINSIC ".v4f16");
+_CLC_OVERLOAD half8 __CLC_FUNCTION(half8 d) __asm(__CLC_INTRINSIC ".v8f16");
+_CLC_OVERLOAD half16 __CLC_FUNCTION(half16 d) __asm(__CLC_INTRINSIC ".v16f16");
+#endif
+
 #undef __CLC_FUNCTION
 #undef __CLC_INTRINSIC
diff --git a/generic/include/clc/relational/floatn.inc b/generic/include/clc/relational/floatn.inc
index 8d7fd52..fc0d687 100644
--- a/generic/include/clc/relational/floatn.inc
+++ b/generic/include/clc/relational/floatn.inc
@@ -39,6 +39,7 @@
 #undef __CLC_INT
 
 #ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
 
 #define __CLC_FLOATN double
 #define __CLC_INTN int
@@ -76,6 +77,46 @@
 #undef __CLC_INTN
 #undef __CLC_FLOATN
 
+#endif
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+#define __CLC_FLOATN half
+#define __CLC_INTN int
+#include __CLC_BODY
+#undef __CLC_INTN
+#undef __CLC_FLOATN
+
+#define __CLC_FLOATN half2
+#define __CLC_INTN short2
+#include __CLC_BODY
+#undef __CLC_INTN
+#undef __CLC_FLOATN
+
+#define __CLC_FLOATN half3
+#define __CLC_INTN short3
+#include __CLC_BODY
+#undef __CLC_INTN
+#undef __CLC_FLOATN
+
+#define __CLC_FLOATN half4
+#define __CLC_INTN short4
+#include __CLC_BODY
+#undef __CLC_INTN
+#undef __CLC_FLOATN
+
+#define __CLC_FLOATN half8
+#define __CLC_INTN short8
+#include __CLC_BODY
+#undef __CLC_INTN
+#undef __CLC_FLOATN
+
+#define __CLC_FLOATN half16
+#define __CLC_INTN short16
+#include __CLC_BODY
+#undef __CLC_INTN
+#undef __CLC_FLOATN
+
 #endif
 
 #undef __CLC_BODY
diff --git a/generic/include/clc/relational/isequal.h b/generic/include/clc/relational/isequal.h
index c28a985..23d3d7e 100644
--- a/generic/include/clc/relational/isequal.h
+++ b/generic/include/clc/relational/isequal.h
@@ -15,6 +15,10 @@ _CLC_VECTOR_ISEQUAL_DECL(float, int)
 _CLC_ISEQUAL_DECL(double, int)
 _CLC_VECTOR_ISEQUAL_DECL(double, long)
 #endif
+#ifdef cl_khr_fp16
+_CLC_ISEQUAL_DECL(half, int)
+_CLC_VECTOR_ISEQUAL_DECL(half, short)
+#endif
 
 #undef _CLC_ISEQUAL_DECL
 #undef _CLC_VECTOR_ISEQUAL_DEC
diff --git a/generic/include/clc/relational/isinf.h b/generic/include/clc/relational/isinf.h
index 869f0c8..751132d 100644
--- a/generic/include/clc/relational/isinf.h
+++ b/generic/include/clc/relational/isinf.h
@@ -17,5 +17,10 @@ _CLC_ISINF_DECL(int, double)
 _CLC_VECTOR_ISINF_DECL(long, double)
 #endif
 
+#ifdef cl_khr_fp16
+_CLC_ISINF_DECL(int, half)
+_CLC_VECTOR_ISINF_DECL(short, half)
+#endif
+
 #undef _CLC_ISINF_DECL
 #undef _CLC_VECTOR_ISINF_DECL
diff --git a/generic/include/clc/relational/isnan.h b/generic/include/clc/relational/isnan.h
index 93eb9df..f193be4 100644
--- a/generic/include/clc/relational/isnan.h
+++ b/generic/include/clc/relational/isnan.h
@@ -17,5 +17,10 @@ _CLC_ISNAN_DECL(int, double)
 _CLC_VECTOR_ISNAN_DECL(long, double)
 #endif
 
+#ifdef cl_khr_fp16
+_CLC_ISNAN_DECL(int, half)
+_CLC_VECTOR_ISNAN_DECL(short, half)
+#endif
+
 #undef _CLC_ISNAN_DECL
 #undef _CLC_VECTOR_ISNAN_DECL
diff --git a/generic/include/math/clc_ldexp.h b/generic/include/math/clc_ldexp.h
index 23ac25e..dff07a8 100644
--- a/generic/include/math/clc_ldexp.h
+++ b/generic/include/math/clc_ldexp.h
@@ -4,3 +4,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_ldexp(float, int);
   #pragma OPENCL EXTENSION cl_khr_fp64 : enable
   _CLC_DEF _CLC_OVERLOAD double __clc_ldexp(double, int);
 #endif
+#ifdef cl_khr_fp16
+  #pragma OPENCL EXTENSION cl_khr_fp16 : enable
+  _CLC_DEF _CLC_OVERLOAD float __clc_ldexp(half, int);
+#endif
diff --git a/generic/lib/geometric/dot.cl b/generic/lib/geometric/dot.cl
index 0d6fe6c..e58bc26 100644
--- a/generic/lib/geometric/dot.cl
+++ b/generic/lib/geometric/dot.cl
@@ -37,3 +37,25 @@ _CLC_OVERLOAD _CLC_DEF double dot(double4 p0, double4 p1) {
 }
 
 #endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_OVERLOAD _CLC_DEF half dot(half p0, half p1) {
+  return p0*p1;
+}
+
+_CLC_OVERLOAD _CLC_DEF half dot(half2 p0, half2 p1) {
+  return p0.x*p1.x + p0.y*p1.y;
+}
+
+_CLC_OVERLOAD _CLC_DEF half dot(half3 p0, half3 p1) {
+  return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z;
+}
+
+_CLC_OVERLOAD _CLC_DEF half dot(half4 p0, half4 p1) {
+  return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w;
+}
+
+#endif
diff --git a/generic/lib/geometric/length.cl b/generic/lib/geometric/length.cl
index e7f31b4..170ec58 100644
--- a/generic/lib/geometric/length.cl
+++ b/generic/lib/geometric/length.cl
@@ -79,9 +79,47 @@ _CLC_OVERLOAD _CLC_DEF double length(double3 p) {
   V_DLENGTH(p);
 }
 
-_CLC_OVERLOAD _CLC_DEF double
-length(double4 p) {
+_CLC_OVERLOAD _CLC_DEF double length(double4 p) {
   V_DLENGTH(p);
 }
 
 #endif
+
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_OVERLOAD _CLC_DEF half length(half p){
+  return fabs(p);
+}
+
+// Only available in CLC1.2
+#ifndef HALF_MIN
+#define HALF_MIN   0x1.0p-14h
+#endif
+
+#define V_HLENGTH(p)                       \
+  half l2 = dot(p, p);                     \
+                                           \
+  if (l2 < HALF_MIN) {                     \
+      p *= 0x1.0p+12h;                     \
+      return sqrt(dot(p, p)) * 0x1.0p-12h; \
+  } else if (l2 == INFINITY) {             \
+      p *= 0x1.0p-7h;                      \
+      return sqrt(dot(p, p)) * 0x1.0p+7h;  \
+  }                                        \
+                                           \
+  return sqrt(l2);
+
+_CLC_OVERLOAD _CLC_DEF half length(half2 p) {
+  V_HLENGTH(p);
+}
+
+_CLC_OVERLOAD _CLC_DEF half length(half3 p) {
+  V_HLENGTH(p);
+}
+
+_CLC_OVERLOAD _CLC_DEF half length(half4 p) {
+  V_HLENGTH(p);
+}
+
+#endif
diff --git a/generic/lib/math/acos.inc b/generic/lib/math/acos.inc
index cac9499..3b915c8 100644
--- a/generic/lib/math/acos.inc
+++ b/generic/lib/math/acos.inc
@@ -11,10 +11,15 @@
  * precision of #4 may be better.
  */
 
+// TODO: Enable half precision when atan2 is implemented
+#if __CLC_FPSIZE > 16
+
 #if __CLC_FPSIZE == 32
 #define __CLC_CONST(x) x ## f
-#else
+#elif __CLC_FPSIZE == 64
 #define __CLC_CONST(x) x
+#else
+#define __CLC_CONST(x) xh
 #endif
 
 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE acos(__CLC_GENTYPE x) {
@@ -27,3 +32,5 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE acos(__CLC_GENTYPE x) {
 }
 
 #undef __CLC_CONST
+
+#endif
diff --git a/generic/lib/math/asin.inc b/generic/lib/math/asin.inc
index 4643cf8..08cc4a0 100644
--- a/generic/lib/math/asin.inc
+++ b/generic/lib/math/asin.inc
@@ -1,12 +1,18 @@
+// TODO: Enable half precision when atan2 is implemented
+#if __CLC_FPSIZE > 16
 
 #if __CLC_FPSIZE == 32
 #define __CLC_CONST(x) x ## f
-#else
+#elif __CLC_FPSIZE == 64
 #define __CLC_CONST(x) x
+#else
+#define __CLC_CONST(x) x ## h
 #endif
 
 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE asin(__CLC_GENTYPE x) {
-  return atan2(x, sqrt( (__CLC_GENTYPE)__CLC_CONST(1.0) -(x*x) ));
+  return atan2(x, sqrt( (__CLC_GENTYPE)__CLC_CONST(1.0) - (x*x) ));
 }
 
 #undef __CLC_CONST
+
+#endif
diff --git a/generic/lib/math/clc_nextafter.cl b/generic/lib/math/clc_nextafter.cl
index d5c0af0..d32ef70 100644
--- a/generic/lib/math/clc_nextafter.cl
+++ b/generic/lib/math/clc_nextafter.cl
@@ -37,3 +37,10 @@ _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_nextafter, float, flo
 NEXTAFTER(double, ulong, long)
 _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_nextafter, double, double)
 #endif
+
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+NEXTAFTER(half, ushort, short)
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_nextafter, half, half)
+#endif
diff --git a/generic/lib/math/clc_sqrt_impl.inc b/generic/lib/math/clc_sqrt_impl.inc
index e97b540..67f2ca9 100644
--- a/generic/lib/math/clc_sqrt_impl.inc
+++ b/generic/lib/math/clc_sqrt_impl.inc
@@ -26,6 +26,9 @@
 #elif __CLC_FPSIZE == 64
 #define __CLC_NAN __builtin_nan("")
 #define ZERO 0.0
+#elif __CLC_FPSIZE == 16
+#define __CLC_NAN (half)NAN
+#define ZERO 0.0h
 #else
 #error "Invalid value for __CLC_FPSIZE"
 #endif
diff --git a/generic/lib/math/clc_sw_binary.inc b/generic/lib/math/clc_sw_binary.inc
index 0fe1524..bbedcf5 100644
--- a/generic/lib/math/clc_sw_binary.inc
+++ b/generic/lib/math/clc_sw_binary.inc
@@ -2,8 +2,10 @@
 
 #define __CLC_SW_FUNC(x) __CLC_CONCAT(__clc_, x)
 
+#if __CLC_FPSIZE > 16
 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x, __CLC_GENTYPE y) {
   return __CLC_SW_FUNC(__CLC_FUNC)(x, y);
 }
+#endif
 
 #undef __CLC_SW_FUNC
diff --git a/generic/lib/math/clc_sw_unary.inc b/generic/lib/math/clc_sw_unary.inc
index 0f8467c..3183c26 100644
--- a/generic/lib/math/clc_sw_unary.inc
+++ b/generic/lib/math/clc_sw_unary.inc
@@ -2,8 +2,10 @@
 
 #define __CLC_SW_FUNC(x) __CLC_CONCAT(__clc_, x)
 
+#if __CLC_FPSIZE > 16
 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNC(__CLC_GENTYPE x) {
   return __CLC_SW_FUNC(__CLC_FUNC)(x);
 }
+#endif
 
 #undef __CLC_SW_FUNC
diff --git a/generic/lib/math/copysign.cl b/generic/lib/math/copysign.cl
index 4e0c51b..df65e9d 100644
--- a/generic/lib/math/copysign.cl
+++ b/generic/lib/math/copysign.cl
@@ -10,3 +10,18 @@ _CLC_DEFINE_BINARY_BUILTIN(float, copysign, __builtin_copysignf, float, float)
 _CLC_DEFINE_BINARY_BUILTIN(double, copysign, __builtin_copysign, double, double)
 
 #endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_DEF _CLC_OVERLOAD half copysign(half x, half y)
+{
+   ushort sign_x = as_ushort(x) & 0x8000u;
+   ushort unsigned_y = as_ushort(y) & 0x7ffffu;
+
+   return as_half((ushort)(sign_x | unsigned_y));
+}
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, copysign, half, half)
+
+#endif
diff --git a/generic/lib/math/fmax.cl b/generic/lib/math/fmax.cl
index 239da3d..5c269ce 100644
--- a/generic/lib/math/fmax.cl
+++ b/generic/lib/math/fmax.cl
@@ -12,5 +12,21 @@ _CLC_DEFINE_BINARY_BUILTIN(double, fmax, __builtin_fmax, double, double);
 
 #endif
 
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_DEF _CLC_OVERLOAD half fmax(half x, half y)
+{
+   if (isnan(x))
+      return y;
+   if (isnan(y))
+      return x;
+   return (x < y) ? y : x;
+}
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, fmax, half, half)
+
+#endif
+
 #define __CLC_BODY <fmax.inc>
 #include <clc/math/gentype.inc>
diff --git a/generic/lib/math/fmax.inc b/generic/lib/math/fmax.inc
index 8315c5f..a91ad6b 100644
--- a/generic/lib/math/fmax.inc
+++ b/generic/lib/math/fmax.inc
@@ -15,4 +15,14 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmax(__CLC_GENTYPE x, double y) {
 
 #endif // ifdef cl_khr_fp64
 
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmax(__CLC_GENTYPE x, half y) {
+  return fmax(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y));
+}
+
+#endif // ifdef cl_khr_fp16
+
 #endif // !defined(__CLC_SCALAR)
diff --git a/generic/lib/math/fmin.cl b/generic/lib/math/fmin.cl
index 28c7d01..45c112d 100644
--- a/generic/lib/math/fmin.cl
+++ b/generic/lib/math/fmin.cl
@@ -10,6 +10,21 @@ _CLC_DEFINE_BINARY_BUILTIN(float, fmin, __builtin_fminf, float, float);
 
 _CLC_DEFINE_BINARY_BUILTIN(double, fmin, __builtin_fmin, double, double);
 
+#endif
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_DEF _CLC_OVERLOAD half fmin(half x, half y)
+{
+   if (isnan(x))
+      return y;
+   if (isnan(y))
+      return x;
+   return (y < x) ? y : x;
+}
+_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, fmin, half, half)
+
 #endif
 
 #define __CLC_BODY <fmin.inc>
diff --git a/generic/lib/math/fmin.inc b/generic/lib/math/fmin.inc
index d4b5ac2..9875678 100644
--- a/generic/lib/math/fmin.inc
+++ b/generic/lib/math/fmin.inc
@@ -15,4 +15,14 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmin(__CLC_GENTYPE x, double y) {
 
 #endif // ifdef cl_khr_fp64
 
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fmin(__CLC_GENTYPE x, half y) {
+  return fmin(x, (__CLC_GENTYPE)((__CLC_SCALAR_GENTYPE)y));
+}
+
+#endif // ifdef cl_khr_fp16
+
 #endif // !defined(__CLC_SCALAR)
diff --git a/generic/lib/math/fract.inc b/generic/lib/math/fract.inc
index 8d2a4d7..f8bc183 100644
--- a/generic/lib/math/fract.inc
+++ b/generic/lib/math/fract.inc
@@ -22,14 +22,19 @@
 
 #if __CLC_FPSIZE == 32
 #define MIN_CONSTANT 0x1.fffffep-1f
-#else
+#define ZERO 0.0f
+#elif __CLC_FPSIZE == 64
 #define MIN_CONSTANT 0x1.fffffffffffffp-1
+#define ZERO 0.0
+#elif __CLC_FPSIZE == 16
+#define MIN_CONSTANT 0x1.ffcp-1h
+#define ZERO 0.0h
 #endif
 
 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE fract(__CLC_GENTYPE x, private __CLC_GENTYPE *iptr) {
   *iptr = floor(x);
   __CLC_GENTYPE r = fmin(x - *iptr, MIN_CONSTANT);
-  r = isinf(x) ? 0.0f : r;
+  r = isinf(x) ? ZERO : r;
   r = isnan(x) ? x : r;
   return r;
 }
@@ -47,3 +52,4 @@ FRACT_DEF(local);
 FRACT_DEF(global);
 
 #undef MIN_CONSTANT
+#undef ZERO
diff --git a/generic/lib/math/ldexp.cl b/generic/lib/math/ldexp.cl
index 9be3127..190a4d5 100644
--- a/generic/lib/math/ldexp.cl
+++ b/generic/lib/math/ldexp.cl
@@ -33,7 +33,13 @@ _CLC_DEFINE_BINARY_BUILTIN(float, ldexp, __clc_ldexp, float, int)
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 
 _CLC_DEFINE_BINARY_BUILTIN(double, ldexp, __clc_ldexp, double, int)
+#endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
 
+_CLC_DEFINE_BINARY_BUILTIN(half, ldexp, __clc_ldexp, half, int)
 #endif
 
 // This defines all the ldexp(GENTYPE, int) variants
diff --git a/generic/lib/math/ldexp.inc b/generic/lib/math/ldexp.inc
index 6e28fbb..d6144d7 100644
--- a/generic/lib/math/ldexp.inc
+++ b/generic/lib/math/ldexp.inc
@@ -20,6 +20,9 @@
  * THE SOFTWARE.
  */
 
+// TODO: Enable half precision when ldexp is implemented.
+#if __CLC_FPSIZE > 16
+
 #ifndef __CLC_SCALAR
 
 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE ldexp(__CLC_GENTYPE x, int n) {
@@ -27,3 +30,5 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE ldexp(__CLC_GENTYPE x, int n) {
 }
 
 #endif
+
+#endif
diff --git a/generic/lib/math/lgamma_r.inc b/generic/lib/math/lgamma_r.inc
index 316d4fa..0e19ba8 100644
--- a/generic/lib/math/lgamma_r.inc
+++ b/generic/lib/math/lgamma_r.inc
@@ -21,10 +21,12 @@
  * THE SOFTWARE.
  */
 
-
+// TODO: Enable half precision when the base version is implemented.
+#if __CLC_FPSIZE > 16
 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE lgamma_r(__CLC_GENTYPE x, __CLC_ADDRSPACE __CLC_INTN *iptr) {
     __CLC_INTN private_iptr;
     __CLC_GENTYPE ret = lgamma_r(x, &private_iptr);
     *iptr = private_iptr;
     return ret;
 }
+#endif
diff --git a/generic/lib/math/modf.inc b/generic/lib/math/modf.inc
index 1486b76..bafc5dc 100644
--- a/generic/lib/math/modf.inc
+++ b/generic/lib/math/modf.inc
@@ -22,7 +22,7 @@
 
 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE modf(__CLC_GENTYPE x, __CLC_GENTYPE *iptr) {
   *iptr = trunc(x);
-  return copysign(isinf(x) ? 0.0f : x - *iptr, x);
+  return copysign(isinf(x) ? (__CLC_GENTYPE)0.0f : x - *iptr, x);
 }
 
 #define MODF_DEF(addrspace) \
diff --git a/generic/lib/math/nan.inc b/generic/lib/math/nan.inc
index 6e4afc8..f6a9125 100644
--- a/generic/lib/math/nan.inc
+++ b/generic/lib/math/nan.inc
@@ -7,11 +7,18 @@ _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE nan(__CLC_XCONCAT(ulong, __CLC_VECSIZE) cod
 {
 	return __CLC_AS_GENTYPE(code | 0x7ff0000000000000ul);
 }
-#else
+#elif __CLC_FPSIZE == 32
 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE nan(__CLC_XCONCAT(uint, __CLC_VECSIZE) code)
 {
 	return __CLC_AS_GENTYPE(code | 0x7fc00000);
 }
+#else
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE nan(__CLC_XCONCAT(ushort, __CLC_VECSIZE) code)
+{
+	const ushort mask = 0x7e00;
+	const __CLC_XCONCAT(ushort, __CLC_VECSIZE) res = code | mask;
+	return __CLC_AS_GENTYPE(res);
+}
 #endif
 
 
diff --git a/generic/lib/math/pown.inc b/generic/lib/math/pown.inc
index 84729d9..10e7d75 100644
--- a/generic/lib/math/pown.inc
+++ b/generic/lib/math/pown.inc
@@ -1,3 +1,5 @@
+#if __CLC_FPSIZE > 16
 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE pown(__CLC_GENTYPE x, __CLC_INTN y) {
   return __clc_pown(x, y);
 }
+#endif
diff --git a/generic/lib/math/remquo.inc b/generic/lib/math/remquo.inc
index c1de78a..cf9f938 100644
--- a/generic/lib/math/remquo.inc
+++ b/generic/lib/math/remquo.inc
@@ -1,6 +1,8 @@
+#if __CLC_FPSIZE > 16
 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE remquo(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_ADDRESS_SPACE __CLC_INTN *q) {
   __CLC_INTN local_q;
   __CLC_GENTYPE ret = __clc_remquo(x, y, &local_q);
   *q = local_q;
   return ret;
 }
+#endif
diff --git a/generic/lib/math/rootn.inc b/generic/lib/math/rootn.inc
index 3f5b00c..9fb1d31 100644
--- a/generic/lib/math/rootn.inc
+++ b/generic/lib/math/rootn.inc
@@ -1,3 +1,5 @@
+#if __CLC_FPSIZE > 16
 _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE rootn(__CLC_GENTYPE x, __CLC_INTN y) {
   return __clc_rootn(x, y);
 }
+#endif
diff --git a/generic/lib/math/sincos.inc b/generic/lib/math/sincos.inc
index e97f0f9..2318ffb 100644
--- a/generic/lib/math/sincos.inc
+++ b/generic/lib/math/sincos.inc
@@ -1,3 +1,5 @@
+// TODO: Enable half precision when sin/cos is implemented
+#if __CLC_FPSIZE > 16
 #define __CLC_DECLARE_SINCOS(ADDRSPACE, TYPE) \
   _CLC_OVERLOAD _CLC_DEF TYPE sincos (TYPE x, ADDRSPACE TYPE * cosval) { \
     *cosval = cos(x); \
@@ -9,3 +11,4 @@ __CLC_DECLARE_SINCOS(local, __CLC_GENTYPE)
 __CLC_DECLARE_SINCOS(private, __CLC_GENTYPE)
 
 #undef __CLC_DECLARE_SINCOS
+#endif
diff --git a/generic/lib/math/sqrt.cl b/generic/lib/math/sqrt.cl
index 300e274..b05d6bc 100644
--- a/generic/lib/math/sqrt.cl
+++ b/generic/lib/math/sqrt.cl
@@ -33,3 +33,11 @@ _CLC_DEFINE_UNARY_BUILTIN(float, sqrt, __clc_sqrt, float)
 _CLC_DEFINE_UNARY_BUILTIN(double, sqrt, __clc_sqrt, double)
 
 #endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_DEFINE_UNARY_BUILTIN(half, sqrt, __clc_sqrt, half)
+
+#endif
diff --git a/generic/lib/relational/isequal.cl b/generic/lib/relational/isequal.cl
index 9d79ba6..3f14f94 100644
--- a/generic/lib/relational/isequal.cl
+++ b/generic/lib/relational/isequal.cl
@@ -26,5 +26,19 @@ _CLC_DEFINE_ISEQUAL(long8, isequal, double8, double8)
 _CLC_DEFINE_ISEQUAL(long16, isequal, double16, double16)
 
 #endif
+#ifdef cl_khr_fp16
 
-#undef _CLC_DEFINE_ISEQUAL
\ No newline at end of file
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of isequal(half) returns an int, but the vector versions
+// return short.
+_CLC_DEFINE_ISEQUAL(int, isequal, half, half)
+_CLC_DEFINE_ISEQUAL(short2, isequal, half2, half2)
+_CLC_DEFINE_ISEQUAL(short3, isequal, half3, half3)
+_CLC_DEFINE_ISEQUAL(short4, isequal, half4, half4)
+_CLC_DEFINE_ISEQUAL(short8, isequal, half8, half8)
+_CLC_DEFINE_ISEQUAL(short16, isequal, half16, half16)
+
+#endif
+
+#undef _CLC_DEFINE_ISEQUAL
diff --git a/generic/lib/relational/isfinite.cl b/generic/lib/relational/isfinite.cl
index d0658c0..15b92fa 100644
--- a/generic/lib/relational/isfinite.cl
+++ b/generic/lib/relational/isfinite.cl
@@ -16,3 +16,16 @@ _CLC_DEF _CLC_OVERLOAD int isfinite(double x) {
 _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isfinite, double)
 
 #endif
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of isfinite(half) returns an int, but the vector versions
+// return short.
+_CLC_DEF _CLC_OVERLOAD int isfinite(half x) {
+  return __builtin_isfinite(x);
+}
+
+_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, isfinite, half)
+
+#endif
diff --git a/generic/lib/relational/isgreater.cl b/generic/lib/relational/isgreater.cl
index 79456e5..167d6f2 100644
--- a/generic/lib/relational/isgreater.cl
+++ b/generic/lib/relational/isgreater.cl
@@ -20,3 +20,18 @@ _CLC_DEF _CLC_OVERLOAD int isgreater(double x, double y){
 _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isgreater, double, double)
 
 #endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of isgreater(half, half) returns an int, but the vector versions
+// return short.
+
+_CLC_DEF _CLC_OVERLOAD int isgreater(half x, half y){
+	return __builtin_isgreater(x, y);
+}
+
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isgreater, half, half)
+
+#endif
diff --git a/generic/lib/relational/isgreaterequal.cl b/generic/lib/relational/isgreaterequal.cl
index 2d5ebe5..128a1d0 100644
--- a/generic/lib/relational/isgreaterequal.cl
+++ b/generic/lib/relational/isgreaterequal.cl
@@ -20,3 +20,17 @@ _CLC_DEF _CLC_OVERLOAD int isgreaterequal(double x, double y){
 _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isgreaterequal, double, double)
 
 #endif
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of isgreaterequal(half, half) returns an int, but the vector versions
+// return short.
+
+_CLC_DEF _CLC_OVERLOAD int isgreaterequal(half x, half y){
+	return __builtin_isgreaterequal(x, y);
+}
+
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isgreaterequal, half, half)
+
+#endif
diff --git a/generic/lib/relational/isinf.cl b/generic/lib/relational/isinf.cl
index 1452d91..96aae4a 100644
--- a/generic/lib/relational/isinf.cl
+++ b/generic/lib/relational/isinf.cl
@@ -14,5 +14,17 @@ _CLC_DEF _CLC_OVERLOAD int isinf(double x) {
 }
 
 _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isinf, double)
+#endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of isinf(half) returns an int, but the vector versions
+// return short.
+_CLC_DEF _CLC_OVERLOAD int isinf(half x) {
+  return __builtin_isinf(x);
+}
 
+_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, isinf, half)
 #endif
diff --git a/generic/lib/relational/isless.cl b/generic/lib/relational/isless.cl
index 56a3e13..1dbf767 100644
--- a/generic/lib/relational/isless.cl
+++ b/generic/lib/relational/isless.cl
@@ -20,3 +20,17 @@ _CLC_DEF _CLC_OVERLOAD int isless(double x, double y){
 _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isless, double, double)
 
 #endif
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of isless(half, half) returns an int, but the vector versions
+// return short.
+
+_CLC_DEF _CLC_OVERLOAD int isless(half x, half y){
+	return __builtin_isless(x, y);
+}
+
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isless, half, half)
+
+#endif
diff --git a/generic/lib/relational/islessequal.cl b/generic/lib/relational/islessequal.cl
index 259c307..db64bea 100644
--- a/generic/lib/relational/islessequal.cl
+++ b/generic/lib/relational/islessequal.cl
@@ -20,3 +20,17 @@ _CLC_DEF _CLC_OVERLOAD int islessequal(double x, double y){
 _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, islessequal, double, double)
 
 #endif
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of islessequal(half, half) returns an int, but the vector versions
+// return short.
+
+_CLC_DEF _CLC_OVERLOAD int islessequal(half x, half y){
+	return __builtin_islessequal(x, y);
+}
+
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, islessequal, half, half)
+
+#endif
diff --git a/generic/lib/relational/islessgreater.cl b/generic/lib/relational/islessgreater.cl
index fc029f3..9e9b11e 100644
--- a/generic/lib/relational/islessgreater.cl
+++ b/generic/lib/relational/islessgreater.cl
@@ -20,3 +20,17 @@ _CLC_DEF _CLC_OVERLOAD int islessgreater(double x, double y){
 _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, islessgreater, double, double)
 
 #endif
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of islessgreater(half, half) returns an int, but the vector versions
+// return short.
+
+_CLC_DEF _CLC_OVERLOAD int islessgreater(half x, half y){
+	return __builtin_islessgreater(x, y);
+}
+
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, islessgreater, half, half)
+
+#endif
diff --git a/generic/lib/relational/isnan.cl b/generic/lib/relational/isnan.cl
index f82dc5d..3d31047 100644
--- a/generic/lib/relational/isnan.cl
+++ b/generic/lib/relational/isnan.cl
@@ -16,3 +16,17 @@ _CLC_DEF _CLC_OVERLOAD int isnan(double x) {
 _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isnan, double)
 
 #endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of isnan(half) returns an int, but the vector versions
+// return short.
+_CLC_DEF _CLC_OVERLOAD int isnan(half x) {
+  return __builtin_isnan(x);
+}
+
+_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, isnan, half)
+
+#endif
diff --git a/generic/lib/relational/isnormal.cl b/generic/lib/relational/isnormal.cl
index 2e6b42d..a3dbf66 100644
--- a/generic/lib/relational/isnormal.cl
+++ b/generic/lib/relational/isnormal.cl
@@ -16,3 +16,16 @@ _CLC_DEF _CLC_OVERLOAD int isnormal(double x) {
 _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, isnormal, double)
 
 #endif
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of isnormal(half) returns an int, but the vector versions
+// return short.
+_CLC_DEF _CLC_OVERLOAD int isnormal(half x) {
+  return __builtin_isnormal(x);
+}
+
+_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, isnormal, half)
+
+#endif
diff --git a/generic/lib/relational/isnotequal.cl b/generic/lib/relational/isnotequal.cl
index 787fd8d..afd293d 100644
--- a/generic/lib/relational/isnotequal.cl
+++ b/generic/lib/relational/isnotequal.cl
@@ -18,6 +18,16 @@ _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(int, isnotequal, float, float)
 _CLC_DEFINE_ISNOTEQUAL(int, isnotequal, double, double)
 _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isnotequal, double, double)
 
+#endif
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of isnotequal(half, half) returns an int, but the vector versions
+// return short.
+
+_CLC_DEFINE_ISNOTEQUAL(int, isnotequal, half, half)
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isnotequal, half, half)
+
 #endif
 
 #undef _CLC_DEFINE_ISNOTEQUAL
diff --git a/generic/lib/relational/isordered.cl b/generic/lib/relational/isordered.cl
index ebda2eb..cedd05f 100644
--- a/generic/lib/relational/isordered.cl
+++ b/generic/lib/relational/isordered.cl
@@ -18,6 +18,16 @@ _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(int, isordered, float, float)
 _CLC_DEFINE_ISORDERED(int, isordered, double, double)
 _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isordered, double, double)
 
+#endif
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of isordered(half, half) returns an int, but the vector versions
+// return short.
+
+_CLC_DEFINE_ISORDERED(int, isordered, half, half)
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isordered, half, half)
+
 #endif
 
 #undef _CLC_DEFINE_ISORDERED
diff --git a/generic/lib/relational/isunordered.cl b/generic/lib/relational/isunordered.cl
index 8bc5e3f..9093980 100644
--- a/generic/lib/relational/isunordered.cl
+++ b/generic/lib/relational/isunordered.cl
@@ -20,3 +20,17 @@ _CLC_DEF _CLC_OVERLOAD int isunordered(double x, double y){
 _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, isunordered, double, double)
 
 #endif
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of isunordered(half, half) returns an int, but the vector versions
+// return short.
+
+_CLC_DEF _CLC_OVERLOAD int isunordered(half x, half y){
+	return __builtin_isunordered(x, y);
+}
+
+_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, isunordered, half, half)
+
+#endif
diff --git a/generic/lib/relational/signbit.cl b/generic/lib/relational/signbit.cl
index ab37d2f..a7378d7 100644
--- a/generic/lib/relational/signbit.cl
+++ b/generic/lib/relational/signbit.cl
@@ -17,3 +17,17 @@ _CLC_DEF _CLC_OVERLOAD int signbit(double x){
 _CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(long, signbit, double)
 
 #endif
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+// The scalar version of signbit(half) returns an int, but the vector versions
+// return short.
+
+_CLC_DEF _CLC_OVERLOAD int signbit(half x){
+	return __builtin_signbit(x);
+}
+
+_CLC_DEFINE_RELATIONAL_UNARY_VEC_ALL(short, signbit, half)
+
+#endif
diff --git a/generic/lib/shared/vstore_half.inc b/generic/lib/shared/vstore_half.inc
index ee4e38b..c8d6271 100644
--- a/generic/lib/shared/vstore_half.inc
+++ b/generic/lib/shared/vstore_half.inc
@@ -1,4 +1,5 @@
 
+#if __CLC_FPSIZE > 16
 #ifdef __CLC_VECSIZE
 
 #if __CLC_VECSIZE == 3
@@ -17,3 +18,4 @@
   FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local);
   FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global);
 #endif
+#endif
-- 
2.17.0



More information about the Libclc-dev mailing list