[clang] 2254803 - [OpenCL] opencl-c.h: remove arg names for vload/vstore builtins

Sven van Haastregt via cfe-commits cfe-commits at lists.llvm.org
Wed Mar 23 04:13:01 PDT 2022


Author: Sven van Haastregt
Date: 2022-03-23T11:12:50Z
New Revision: 22548032be7646569a5eb8c39fa03f227a494062

URL: https://github.com/llvm/llvm-project/commit/22548032be7646569a5eb8c39fa03f227a494062
DIFF: https://github.com/llvm/llvm-project/commit/22548032be7646569a5eb8c39fa03f227a494062.diff

LOG: [OpenCL] opencl-c.h: remove arg names for vload/vstore builtins

This simplifies completeness comparisons against OpenCLBuiltins.td and
also makes the header no longer "claim" the identifiers "data" and
"offset".

Continues the direction set out in D119560.

Added: 
    

Modified: 
    clang/lib/Headers/opencl-c.h

Removed: 
    


################################################################################
diff  --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
index 9b8461bfccfda..49acac840f158 100644
--- a/clang/lib/Headers/opencl-c.h
+++ b/clang/lib/Headers/opencl-c.h
@@ -11201,545 +11201,545 @@ half16 __ovld __cnfn select(half16 a, half16 b, ushort16 c);
  * 64-bit aligned if gentype is long, ulong, double.
  */
 
-char2 __ovld __purefn vload2(size_t offset, const __constant char *p);
-uchar2 __ovld __purefn vload2(size_t offset, const __constant uchar *p);
-short2 __ovld __purefn vload2(size_t offset, const __constant short *p);
-ushort2 __ovld __purefn vload2(size_t offset, const __constant ushort *p);
-int2 __ovld __purefn vload2(size_t offset, const __constant int *p);
-uint2 __ovld __purefn vload2(size_t offset, const __constant uint *p);
-long2 __ovld __purefn vload2(size_t offset, const __constant long *p);
-ulong2 __ovld __purefn vload2(size_t offset, const __constant ulong *p);
-float2 __ovld __purefn vload2(size_t offset, const __constant float *p);
-char3 __ovld __purefn vload3(size_t offset, const __constant char *p);
-uchar3 __ovld __purefn vload3(size_t offset, const __constant uchar *p);
-short3 __ovld __purefn vload3(size_t offset, const __constant short *p);
-ushort3 __ovld __purefn vload3(size_t offset, const __constant ushort *p);
-int3 __ovld __purefn vload3(size_t offset, const __constant int *p);
-uint3 __ovld __purefn vload3(size_t offset, const __constant uint *p);
-long3 __ovld __purefn vload3(size_t offset, const __constant long *p);
-ulong3 __ovld __purefn vload3(size_t offset, const __constant ulong *p);
-float3 __ovld __purefn vload3(size_t offset, const __constant float *p);
-char4 __ovld __purefn vload4(size_t offset, const __constant char *p);
-uchar4 __ovld __purefn vload4(size_t offset, const __constant uchar *p);
-short4 __ovld __purefn vload4(size_t offset, const __constant short *p);
-ushort4 __ovld __purefn vload4(size_t offset, const __constant ushort *p);
-int4 __ovld __purefn vload4(size_t offset, const __constant int *p);
-uint4 __ovld __purefn vload4(size_t offset, const __constant uint *p);
-long4 __ovld __purefn vload4(size_t offset, const __constant long *p);
-ulong4 __ovld __purefn vload4(size_t offset, const __constant ulong *p);
-float4 __ovld __purefn vload4(size_t offset, const __constant float *p);
-char8 __ovld __purefn vload8(size_t offset, const __constant char *p);
-uchar8 __ovld __purefn vload8(size_t offset, const __constant uchar *p);
-short8 __ovld __purefn vload8(size_t offset, const __constant short *p);
-ushort8 __ovld __purefn vload8(size_t offset, const __constant ushort *p);
-int8 __ovld __purefn vload8(size_t offset, const __constant int *p);
-uint8 __ovld __purefn vload8(size_t offset, const __constant uint *p);
-long8 __ovld __purefn vload8(size_t offset, const __constant long *p);
-ulong8 __ovld __purefn vload8(size_t offset, const __constant ulong *p);
-float8 __ovld __purefn vload8(size_t offset, const __constant float *p);
-char16 __ovld __purefn vload16(size_t offset, const __constant char *p);
-uchar16 __ovld __purefn vload16(size_t offset, const __constant uchar *p);
-short16 __ovld __purefn vload16(size_t offset, const __constant short *p);
-ushort16 __ovld __purefn vload16(size_t offset, const __constant ushort *p);
-int16 __ovld __purefn vload16(size_t offset, const __constant int *p);
-uint16 __ovld __purefn vload16(size_t offset, const __constant uint *p);
-long16 __ovld __purefn vload16(size_t offset, const __constant long *p);
-ulong16 __ovld __purefn vload16(size_t offset, const __constant ulong *p);
-float16 __ovld __purefn vload16(size_t offset, const __constant float *p);
+char2 __ovld __purefn vload2(size_t, const __constant char *);
+uchar2 __ovld __purefn vload2(size_t, const __constant uchar *);
+short2 __ovld __purefn vload2(size_t, const __constant short *);
+ushort2 __ovld __purefn vload2(size_t, const __constant ushort *);
+int2 __ovld __purefn vload2(size_t, const __constant int *);
+uint2 __ovld __purefn vload2(size_t, const __constant uint *);
+long2 __ovld __purefn vload2(size_t, const __constant long *);
+ulong2 __ovld __purefn vload2(size_t, const __constant ulong *);
+float2 __ovld __purefn vload2(size_t, const __constant float *);
+char3 __ovld __purefn vload3(size_t, const __constant char *);
+uchar3 __ovld __purefn vload3(size_t, const __constant uchar *);
+short3 __ovld __purefn vload3(size_t, const __constant short *);
+ushort3 __ovld __purefn vload3(size_t, const __constant ushort *);
+int3 __ovld __purefn vload3(size_t, const __constant int *);
+uint3 __ovld __purefn vload3(size_t, const __constant uint *);
+long3 __ovld __purefn vload3(size_t, const __constant long *);
+ulong3 __ovld __purefn vload3(size_t, const __constant ulong *);
+float3 __ovld __purefn vload3(size_t, const __constant float *);
+char4 __ovld __purefn vload4(size_t, const __constant char *);
+uchar4 __ovld __purefn vload4(size_t, const __constant uchar *);
+short4 __ovld __purefn vload4(size_t, const __constant short *);
+ushort4 __ovld __purefn vload4(size_t, const __constant ushort *);
+int4 __ovld __purefn vload4(size_t, const __constant int *);
+uint4 __ovld __purefn vload4(size_t, const __constant uint *);
+long4 __ovld __purefn vload4(size_t, const __constant long *);
+ulong4 __ovld __purefn vload4(size_t, const __constant ulong *);
+float4 __ovld __purefn vload4(size_t, const __constant float *);
+char8 __ovld __purefn vload8(size_t, const __constant char *);
+uchar8 __ovld __purefn vload8(size_t, const __constant uchar *);
+short8 __ovld __purefn vload8(size_t, const __constant short *);
+ushort8 __ovld __purefn vload8(size_t, const __constant ushort *);
+int8 __ovld __purefn vload8(size_t, const __constant int *);
+uint8 __ovld __purefn vload8(size_t, const __constant uint *);
+long8 __ovld __purefn vload8(size_t, const __constant long *);
+ulong8 __ovld __purefn vload8(size_t, const __constant ulong *);
+float8 __ovld __purefn vload8(size_t, const __constant float *);
+char16 __ovld __purefn vload16(size_t, const __constant char *);
+uchar16 __ovld __purefn vload16(size_t, const __constant uchar *);
+short16 __ovld __purefn vload16(size_t, const __constant short *);
+ushort16 __ovld __purefn vload16(size_t, const __constant ushort *);
+int16 __ovld __purefn vload16(size_t, const __constant int *);
+uint16 __ovld __purefn vload16(size_t, const __constant uint *);
+long16 __ovld __purefn vload16(size_t, const __constant long *);
+ulong16 __ovld __purefn vload16(size_t, const __constant ulong *);
+float16 __ovld __purefn vload16(size_t, const __constant float *);
 #ifdef cl_khr_fp64
-double2 __ovld __purefn vload2(size_t offset, const __constant double *p);
-double3 __ovld __purefn vload3(size_t offset, const __constant double *p);
-double4 __ovld __purefn vload4(size_t offset, const __constant double *p);
-double8 __ovld __purefn vload8(size_t offset, const __constant double *p);
-double16 __ovld __purefn vload16(size_t offset, const __constant double *p);
+double2 __ovld __purefn vload2(size_t, const __constant double *);
+double3 __ovld __purefn vload3(size_t, const __constant double *);
+double4 __ovld __purefn vload4(size_t, const __constant double *);
+double8 __ovld __purefn vload8(size_t, const __constant double *);
+double16 __ovld __purefn vload16(size_t, const __constant double *);
 #endif //cl_khr_fp64
 
 #ifdef cl_khr_fp16
-half __ovld __purefn vload(size_t offset, const __constant half *p);
-half2 __ovld __purefn vload2(size_t offset, const __constant half *p);
-half3 __ovld __purefn vload3(size_t offset, const __constant half *p);
-half4 __ovld __purefn vload4(size_t offset, const __constant half *p);
-half8 __ovld __purefn vload8(size_t offset, const __constant half *p);
-half16 __ovld __purefn vload16(size_t offset, const __constant half *p);
+half __ovld __purefn vload(size_t, const __constant half *);
+half2 __ovld __purefn vload2(size_t, const __constant half *);
+half3 __ovld __purefn vload3(size_t, const __constant half *);
+half4 __ovld __purefn vload4(size_t, const __constant half *);
+half8 __ovld __purefn vload8(size_t, const __constant half *);
+half16 __ovld __purefn vload16(size_t, const __constant half *);
 #endif //cl_khr_fp16
 
 #if defined(__opencl_c_generic_address_space)
-char2 __ovld __purefn vload2(size_t offset, const char *p);
-uchar2 __ovld __purefn vload2(size_t offset, const uchar *p);
-short2 __ovld __purefn vload2(size_t offset, const short *p);
-ushort2 __ovld __purefn vload2(size_t offset, const ushort *p);
-int2 __ovld __purefn vload2(size_t offset, const int *p);
-uint2 __ovld __purefn vload2(size_t offset, const uint *p);
-long2 __ovld __purefn vload2(size_t offset, const long *p);
-ulong2 __ovld __purefn vload2(size_t offset, const ulong *p);
-float2 __ovld __purefn vload2(size_t offset, const float *p);
-char3 __ovld __purefn vload3(size_t offset, const char *p);
-uchar3 __ovld __purefn vload3(size_t offset, const uchar *p);
-short3 __ovld __purefn vload3(size_t offset, const short *p);
-ushort3 __ovld __purefn vload3(size_t offset, const ushort *p);
-int3 __ovld __purefn vload3(size_t offset, const int *p);
-uint3 __ovld __purefn vload3(size_t offset, const uint *p);
-long3 __ovld __purefn vload3(size_t offset, const long *p);
-ulong3 __ovld __purefn vload3(size_t offset, const ulong *p);
-float3 __ovld __purefn vload3(size_t offset, const float *p);
-char4 __ovld __purefn vload4(size_t offset, const char *p);
-uchar4 __ovld __purefn vload4(size_t offset, const uchar *p);
-short4 __ovld __purefn vload4(size_t offset, const short *p);
-ushort4 __ovld __purefn vload4(size_t offset, const ushort *p);
-int4 __ovld __purefn vload4(size_t offset, const int *p);
-uint4 __ovld __purefn vload4(size_t offset, const uint *p);
-long4 __ovld __purefn vload4(size_t offset, const long *p);
-ulong4 __ovld __purefn vload4(size_t offset, const ulong *p);
-float4 __ovld __purefn vload4(size_t offset, const float *p);
-char8 __ovld __purefn vload8(size_t offset, const char *p);
-uchar8 __ovld __purefn vload8(size_t offset, const uchar *p);
-short8 __ovld __purefn vload8(size_t offset, const short *p);
-ushort8 __ovld __purefn vload8(size_t offset, const ushort *p);
-int8 __ovld __purefn vload8(size_t offset, const int *p);
-uint8 __ovld __purefn vload8(size_t offset, const uint *p);
-long8 __ovld __purefn vload8(size_t offset, const long *p);
-ulong8 __ovld __purefn vload8(size_t offset, const ulong *p);
-float8 __ovld __purefn vload8(size_t offset, const float *p);
-char16 __ovld __purefn vload16(size_t offset, const char *p);
-uchar16 __ovld __purefn vload16(size_t offset, const uchar *p);
-short16 __ovld __purefn vload16(size_t offset, const short *p);
-ushort16 __ovld __purefn vload16(size_t offset, const ushort *p);
-int16 __ovld __purefn vload16(size_t offset, const int *p);
-uint16 __ovld __purefn vload16(size_t offset, const uint *p);
-long16 __ovld __purefn vload16(size_t offset, const long *p);
-ulong16 __ovld __purefn vload16(size_t offset, const ulong *p);
-float16 __ovld __purefn vload16(size_t offset, const float *p);
+char2 __ovld __purefn vload2(size_t, const char *);
+uchar2 __ovld __purefn vload2(size_t, const uchar *);
+short2 __ovld __purefn vload2(size_t, const short *);
+ushort2 __ovld __purefn vload2(size_t, const ushort *);
+int2 __ovld __purefn vload2(size_t, const int *);
+uint2 __ovld __purefn vload2(size_t, const uint *);
+long2 __ovld __purefn vload2(size_t, const long *);
+ulong2 __ovld __purefn vload2(size_t, const ulong *);
+float2 __ovld __purefn vload2(size_t, const float *);
+char3 __ovld __purefn vload3(size_t, const char *);
+uchar3 __ovld __purefn vload3(size_t, const uchar *);
+short3 __ovld __purefn vload3(size_t, const short *);
+ushort3 __ovld __purefn vload3(size_t, const ushort *);
+int3 __ovld __purefn vload3(size_t, const int *);
+uint3 __ovld __purefn vload3(size_t, const uint *);
+long3 __ovld __purefn vload3(size_t, const long *);
+ulong3 __ovld __purefn vload3(size_t, const ulong *);
+float3 __ovld __purefn vload3(size_t, const float *);
+char4 __ovld __purefn vload4(size_t, const char *);
+uchar4 __ovld __purefn vload4(size_t, const uchar *);
+short4 __ovld __purefn vload4(size_t, const short *);
+ushort4 __ovld __purefn vload4(size_t, const ushort *);
+int4 __ovld __purefn vload4(size_t, const int *);
+uint4 __ovld __purefn vload4(size_t, const uint *);
+long4 __ovld __purefn vload4(size_t, const long *);
+ulong4 __ovld __purefn vload4(size_t, const ulong *);
+float4 __ovld __purefn vload4(size_t, const float *);
+char8 __ovld __purefn vload8(size_t, const char *);
+uchar8 __ovld __purefn vload8(size_t, const uchar *);
+short8 __ovld __purefn vload8(size_t, const short *);
+ushort8 __ovld __purefn vload8(size_t, const ushort *);
+int8 __ovld __purefn vload8(size_t, const int *);
+uint8 __ovld __purefn vload8(size_t, const uint *);
+long8 __ovld __purefn vload8(size_t, const long *);
+ulong8 __ovld __purefn vload8(size_t, const ulong *);
+float8 __ovld __purefn vload8(size_t, const float *);
+char16 __ovld __purefn vload16(size_t, const char *);
+uchar16 __ovld __purefn vload16(size_t, const uchar *);
+short16 __ovld __purefn vload16(size_t, const short *);
+ushort16 __ovld __purefn vload16(size_t, const ushort *);
+int16 __ovld __purefn vload16(size_t, const int *);
+uint16 __ovld __purefn vload16(size_t, const uint *);
+long16 __ovld __purefn vload16(size_t, const long *);
+ulong16 __ovld __purefn vload16(size_t, const ulong *);
+float16 __ovld __purefn vload16(size_t, const float *);
 
 #ifdef cl_khr_fp64
-double2 __ovld __purefn vload2(size_t offset, const double *p);
-double3 __ovld __purefn vload3(size_t offset, const double *p);
-double4 __ovld __purefn vload4(size_t offset, const double *p);
-double8 __ovld __purefn vload8(size_t offset, const double *p);
-double16 __ovld __purefn vload16(size_t offset, const double *p);
+double2 __ovld __purefn vload2(size_t, const double *);
+double3 __ovld __purefn vload3(size_t, const double *);
+double4 __ovld __purefn vload4(size_t, const double *);
+double8 __ovld __purefn vload8(size_t, const double *);
+double16 __ovld __purefn vload16(size_t, const double *);
 #endif //cl_khr_fp64
 
 #ifdef cl_khr_fp16
-half __ovld __purefn vload(size_t offset, const half *p);
-half2 __ovld __purefn vload2(size_t offset, const half *p);
-half3 __ovld __purefn vload3(size_t offset, const half *p);
-half4 __ovld __purefn vload4(size_t offset, const half *p);
-half8 __ovld __purefn vload8(size_t offset, const half *p);
-half16 __ovld __purefn vload16(size_t offset, const half *p);
+half __ovld __purefn vload(size_t, const half *);
+half2 __ovld __purefn vload2(size_t, const half *);
+half3 __ovld __purefn vload3(size_t, const half *);
+half4 __ovld __purefn vload4(size_t, const half *);
+half8 __ovld __purefn vload8(size_t, const half *);
+half16 __ovld __purefn vload16(size_t, const half *);
 #endif //cl_khr_fp16
 #endif //defined(__opencl_c_generic_address_space)
 
 #if defined(__opencl_c_named_address_space_builtins)
-char2 __ovld __purefn vload2(size_t offset, const __global char *p);
-uchar2 __ovld __purefn vload2(size_t offset, const __global uchar *p);
-short2 __ovld __purefn vload2(size_t offset, const __global short *p);
-ushort2 __ovld __purefn vload2(size_t offset, const __global ushort *p);
-int2 __ovld __purefn vload2(size_t offset, const __global int *p);
-uint2 __ovld __purefn vload2(size_t offset, const __global uint *p);
-long2 __ovld __purefn vload2(size_t offset, const __global long *p);
-ulong2 __ovld __purefn vload2(size_t offset, const __global ulong *p);
-float2 __ovld __purefn vload2(size_t offset, const __global float *p);
-char3 __ovld __purefn vload3(size_t offset, const __global char *p);
-uchar3 __ovld __purefn vload3(size_t offset, const __global uchar *p);
-short3 __ovld __purefn vload3(size_t offset, const __global short *p);
-ushort3 __ovld __purefn vload3(size_t offset, const __global ushort *p);
-int3 __ovld __purefn vload3(size_t offset, const __global int *p);
-uint3 __ovld __purefn vload3(size_t offset, const __global uint *p);
-long3 __ovld __purefn vload3(size_t offset, const __global long *p);
-ulong3 __ovld __purefn vload3(size_t offset, const __global ulong *p);
-float3 __ovld __purefn vload3(size_t offset, const __global float *p);
-char4 __ovld __purefn vload4(size_t offset, const __global char *p);
-uchar4 __ovld __purefn vload4(size_t offset, const __global uchar *p);
-short4 __ovld __purefn vload4(size_t offset, const __global short *p);
-ushort4 __ovld __purefn vload4(size_t offset, const __global ushort *p);
-int4 __ovld __purefn vload4(size_t offset, const __global int *p);
-uint4 __ovld __purefn vload4(size_t offset, const __global uint *p);
-long4 __ovld __purefn vload4(size_t offset, const __global long *p);
-ulong4 __ovld __purefn vload4(size_t offset, const __global ulong *p);
-float4 __ovld __purefn vload4(size_t offset, const __global float *p);
-char8 __ovld __purefn vload8(size_t offset, const __global char *p);
-uchar8 __ovld __purefn vload8(size_t offset, const __global uchar *p);
-short8 __ovld __purefn vload8(size_t offset, const __global short *p);
-ushort8 __ovld __purefn vload8(size_t offset, const __global ushort *p);
-int8 __ovld __purefn vload8(size_t offset, const __global int *p);
-uint8 __ovld __purefn vload8(size_t offset, const __global uint *p);
-long8 __ovld __purefn vload8(size_t offset, const __global long *p);
-ulong8 __ovld __purefn vload8(size_t offset, const __global ulong *p);
-float8 __ovld __purefn vload8(size_t offset, const __global float *p);
-char16 __ovld __purefn vload16(size_t offset, const __global char *p);
-uchar16 __ovld __purefn vload16(size_t offset, const __global uchar *p);
-short16 __ovld __purefn vload16(size_t offset, const __global short *p);
-ushort16 __ovld __purefn vload16(size_t offset, const __global ushort *p);
-int16 __ovld __purefn vload16(size_t offset, const __global int *p);
-uint16 __ovld __purefn vload16(size_t offset, const __global uint *p);
-long16 __ovld __purefn vload16(size_t offset, const __global long *p);
-ulong16 __ovld __purefn vload16(size_t offset, const __global ulong *p);
-float16 __ovld __purefn vload16(size_t offset, const __global float *p);
-char2 __ovld __purefn vload2(size_t offset, const __local char *p);
-uchar2 __ovld __purefn vload2(size_t offset, const __local uchar *p);
-short2 __ovld __purefn vload2(size_t offset, const __local short *p);
-ushort2 __ovld __purefn vload2(size_t offset, const __local ushort *p);
-int2 __ovld __purefn vload2(size_t offset, const __local int *p);
-uint2 __ovld __purefn vload2(size_t offset, const __local uint *p);
-long2 __ovld __purefn vload2(size_t offset, const __local long *p);
-ulong2 __ovld __purefn vload2(size_t offset, const __local ulong *p);
-float2 __ovld __purefn vload2(size_t offset, const __local float *p);
-char3 __ovld __purefn vload3(size_t offset, const __local char *p);
-uchar3 __ovld __purefn vload3(size_t offset, const __local uchar *p);
-short3 __ovld __purefn vload3(size_t offset, const __local short *p);
-ushort3 __ovld __purefn vload3(size_t offset, const __local ushort *p);
-int3 __ovld __purefn vload3(size_t offset, const __local int *p);
-uint3 __ovld __purefn vload3(size_t offset, const __local uint *p);
-long3 __ovld __purefn vload3(size_t offset, const __local long *p);
-ulong3 __ovld __purefn vload3(size_t offset, const __local ulong *p);
-float3 __ovld __purefn vload3(size_t offset, const __local float *p);
-char4 __ovld __purefn vload4(size_t offset, const __local char *p);
-uchar4 __ovld __purefn vload4(size_t offset, const __local uchar *p);
-short4 __ovld __purefn vload4(size_t offset, const __local short *p);
-ushort4 __ovld __purefn vload4(size_t offset, const __local ushort *p);
-int4 __ovld __purefn vload4(size_t offset, const __local int *p);
-uint4 __ovld __purefn vload4(size_t offset, const __local uint *p);
-long4 __ovld __purefn vload4(size_t offset, const __local long *p);
-ulong4 __ovld __purefn vload4(size_t offset, const __local ulong *p);
-float4 __ovld __purefn vload4(size_t offset, const __local float *p);
-char8 __ovld __purefn vload8(size_t offset, const __local char *p);
-uchar8 __ovld __purefn vload8(size_t offset, const __local uchar *p);
-short8 __ovld __purefn vload8(size_t offset, const __local short *p);
-ushort8 __ovld __purefn vload8(size_t offset, const __local ushort *p);
-int8 __ovld __purefn vload8(size_t offset, const __local int *p);
-uint8 __ovld __purefn vload8(size_t offset, const __local uint *p);
-long8 __ovld __purefn vload8(size_t offset, const __local long *p);
-ulong8 __ovld __purefn vload8(size_t offset, const __local ulong *p);
-float8 __ovld __purefn vload8(size_t offset, const __local float *p);
-char16 __ovld __purefn vload16(size_t offset, const __local char *p);
-uchar16 __ovld __purefn vload16(size_t offset, const __local uchar *p);
-short16 __ovld __purefn vload16(size_t offset, const __local short *p);
-ushort16 __ovld __purefn vload16(size_t offset, const __local ushort *p);
-int16 __ovld __purefn vload16(size_t offset, const __local int *p);
-uint16 __ovld __purefn vload16(size_t offset, const __local uint *p);
-long16 __ovld __purefn vload16(size_t offset, const __local long *p);
-ulong16 __ovld __purefn vload16(size_t offset, const __local ulong *p);
-float16 __ovld __purefn vload16(size_t offset, const __local float *p);
-char2 __ovld __purefn vload2(size_t offset, const __private char *p);
-uchar2 __ovld __purefn vload2(size_t offset, const __private uchar *p);
-short2 __ovld __purefn vload2(size_t offset, const __private short *p);
-ushort2 __ovld __purefn vload2(size_t offset, const __private ushort *p);
-int2 __ovld __purefn vload2(size_t offset, const __private int *p);
-uint2 __ovld __purefn vload2(size_t offset, const __private uint *p);
-long2 __ovld __purefn vload2(size_t offset, const __private long *p);
-ulong2 __ovld __purefn vload2(size_t offset, const __private ulong *p);
-float2 __ovld __purefn vload2(size_t offset, const __private float *p);
-char3 __ovld __purefn vload3(size_t offset, const __private char *p);
-uchar3 __ovld __purefn vload3(size_t offset, const __private uchar *p);
-short3 __ovld __purefn vload3(size_t offset, const __private short *p);
-ushort3 __ovld __purefn vload3(size_t offset, const __private ushort *p);
-int3 __ovld __purefn vload3(size_t offset, const __private int *p);
-uint3 __ovld __purefn vload3(size_t offset, const __private uint *p);
-long3 __ovld __purefn vload3(size_t offset, const __private long *p);
-ulong3 __ovld __purefn vload3(size_t offset, const __private ulong *p);
-float3 __ovld __purefn vload3(size_t offset, const __private float *p);
-char4 __ovld __purefn vload4(size_t offset, const __private char *p);
-uchar4 __ovld __purefn vload4(size_t offset, const __private uchar *p);
-short4 __ovld __purefn vload4(size_t offset, const __private short *p);
-ushort4 __ovld __purefn vload4(size_t offset, const __private ushort *p);
-int4 __ovld __purefn vload4(size_t offset, const __private int *p);
-uint4 __ovld __purefn vload4(size_t offset, const __private uint *p);
-long4 __ovld __purefn vload4(size_t offset, const __private long *p);
-ulong4 __ovld __purefn vload4(size_t offset, const __private ulong *p);
-float4 __ovld __purefn vload4(size_t offset, const __private float *p);
-char8 __ovld __purefn vload8(size_t offset, const __private char *p);
-uchar8 __ovld __purefn vload8(size_t offset, const __private uchar *p);
-short8 __ovld __purefn vload8(size_t offset, const __private short *p);
-ushort8 __ovld __purefn vload8(size_t offset, const __private ushort *p);
-int8 __ovld __purefn vload8(size_t offset, const __private int *p);
-uint8 __ovld __purefn vload8(size_t offset, const __private uint *p);
-long8 __ovld __purefn vload8(size_t offset, const __private long *p);
-ulong8 __ovld __purefn vload8(size_t offset, const __private ulong *p);
-float8 __ovld __purefn vload8(size_t offset, const __private float *p);
-char16 __ovld __purefn vload16(size_t offset, const __private char *p);
-uchar16 __ovld __purefn vload16(size_t offset, const __private uchar *p);
-short16 __ovld __purefn vload16(size_t offset, const __private short *p);
-ushort16 __ovld __purefn vload16(size_t offset, const __private ushort *p);
-int16 __ovld __purefn vload16(size_t offset, const __private int *p);
-uint16 __ovld __purefn vload16(size_t offset, const __private uint *p);
-long16 __ovld __purefn vload16(size_t offset, const __private long *p);
-ulong16 __ovld __purefn vload16(size_t offset, const __private ulong *p);
-float16 __ovld __purefn vload16(size_t offset, const __private float *p);
+char2 __ovld __purefn vload2(size_t, const __global char *);
+uchar2 __ovld __purefn vload2(size_t, const __global uchar *);
+short2 __ovld __purefn vload2(size_t, const __global short *);
+ushort2 __ovld __purefn vload2(size_t, const __global ushort *);
+int2 __ovld __purefn vload2(size_t, const __global int *);
+uint2 __ovld __purefn vload2(size_t, const __global uint *);
+long2 __ovld __purefn vload2(size_t, const __global long *);
+ulong2 __ovld __purefn vload2(size_t, const __global ulong *);
+float2 __ovld __purefn vload2(size_t, const __global float *);
+char3 __ovld __purefn vload3(size_t, const __global char *);
+uchar3 __ovld __purefn vload3(size_t, const __global uchar *);
+short3 __ovld __purefn vload3(size_t, const __global short *);
+ushort3 __ovld __purefn vload3(size_t, const __global ushort *);
+int3 __ovld __purefn vload3(size_t, const __global int *);
+uint3 __ovld __purefn vload3(size_t, const __global uint *);
+long3 __ovld __purefn vload3(size_t, const __global long *);
+ulong3 __ovld __purefn vload3(size_t, const __global ulong *);
+float3 __ovld __purefn vload3(size_t, const __global float *);
+char4 __ovld __purefn vload4(size_t, const __global char *);
+uchar4 __ovld __purefn vload4(size_t, const __global uchar *);
+short4 __ovld __purefn vload4(size_t, const __global short *);
+ushort4 __ovld __purefn vload4(size_t, const __global ushort *);
+int4 __ovld __purefn vload4(size_t, const __global int *);
+uint4 __ovld __purefn vload4(size_t, const __global uint *);
+long4 __ovld __purefn vload4(size_t, const __global long *);
+ulong4 __ovld __purefn vload4(size_t, const __global ulong *);
+float4 __ovld __purefn vload4(size_t, const __global float *);
+char8 __ovld __purefn vload8(size_t, const __global char *);
+uchar8 __ovld __purefn vload8(size_t, const __global uchar *);
+short8 __ovld __purefn vload8(size_t, const __global short *);
+ushort8 __ovld __purefn vload8(size_t, const __global ushort *);
+int8 __ovld __purefn vload8(size_t, const __global int *);
+uint8 __ovld __purefn vload8(size_t, const __global uint *);
+long8 __ovld __purefn vload8(size_t, const __global long *);
+ulong8 __ovld __purefn vload8(size_t, const __global ulong *);
+float8 __ovld __purefn vload8(size_t, const __global float *);
+char16 __ovld __purefn vload16(size_t, const __global char *);
+uchar16 __ovld __purefn vload16(size_t, const __global uchar *);
+short16 __ovld __purefn vload16(size_t, const __global short *);
+ushort16 __ovld __purefn vload16(size_t, const __global ushort *);
+int16 __ovld __purefn vload16(size_t, const __global int *);
+uint16 __ovld __purefn vload16(size_t, const __global uint *);
+long16 __ovld __purefn vload16(size_t, const __global long *);
+ulong16 __ovld __purefn vload16(size_t, const __global ulong *);
+float16 __ovld __purefn vload16(size_t, const __global float *);
+char2 __ovld __purefn vload2(size_t, const __local char *);
+uchar2 __ovld __purefn vload2(size_t, const __local uchar *);
+short2 __ovld __purefn vload2(size_t, const __local short *);
+ushort2 __ovld __purefn vload2(size_t, const __local ushort *);
+int2 __ovld __purefn vload2(size_t, const __local int *);
+uint2 __ovld __purefn vload2(size_t, const __local uint *);
+long2 __ovld __purefn vload2(size_t, const __local long *);
+ulong2 __ovld __purefn vload2(size_t, const __local ulong *);
+float2 __ovld __purefn vload2(size_t, const __local float *);
+char3 __ovld __purefn vload3(size_t, const __local char *);
+uchar3 __ovld __purefn vload3(size_t, const __local uchar *);
+short3 __ovld __purefn vload3(size_t, const __local short *);
+ushort3 __ovld __purefn vload3(size_t, const __local ushort *);
+int3 __ovld __purefn vload3(size_t, const __local int *);
+uint3 __ovld __purefn vload3(size_t, const __local uint *);
+long3 __ovld __purefn vload3(size_t, const __local long *);
+ulong3 __ovld __purefn vload3(size_t, const __local ulong *);
+float3 __ovld __purefn vload3(size_t, const __local float *);
+char4 __ovld __purefn vload4(size_t, const __local char *);
+uchar4 __ovld __purefn vload4(size_t, const __local uchar *);
+short4 __ovld __purefn vload4(size_t, const __local short *);
+ushort4 __ovld __purefn vload4(size_t, const __local ushort *);
+int4 __ovld __purefn vload4(size_t, const __local int *);
+uint4 __ovld __purefn vload4(size_t, const __local uint *);
+long4 __ovld __purefn vload4(size_t, const __local long *);
+ulong4 __ovld __purefn vload4(size_t, const __local ulong *);
+float4 __ovld __purefn vload4(size_t, const __local float *);
+char8 __ovld __purefn vload8(size_t, const __local char *);
+uchar8 __ovld __purefn vload8(size_t, const __local uchar *);
+short8 __ovld __purefn vload8(size_t, const __local short *);
+ushort8 __ovld __purefn vload8(size_t, const __local ushort *);
+int8 __ovld __purefn vload8(size_t, const __local int *);
+uint8 __ovld __purefn vload8(size_t, const __local uint *);
+long8 __ovld __purefn vload8(size_t, const __local long *);
+ulong8 __ovld __purefn vload8(size_t, const __local ulong *);
+float8 __ovld __purefn vload8(size_t, const __local float *);
+char16 __ovld __purefn vload16(size_t, const __local char *);
+uchar16 __ovld __purefn vload16(size_t, const __local uchar *);
+short16 __ovld __purefn vload16(size_t, const __local short *);
+ushort16 __ovld __purefn vload16(size_t, const __local ushort *);
+int16 __ovld __purefn vload16(size_t, const __local int *);
+uint16 __ovld __purefn vload16(size_t, const __local uint *);
+long16 __ovld __purefn vload16(size_t, const __local long *);
+ulong16 __ovld __purefn vload16(size_t, const __local ulong *);
+float16 __ovld __purefn vload16(size_t, const __local float *);
+char2 __ovld __purefn vload2(size_t, const __private char *);
+uchar2 __ovld __purefn vload2(size_t, const __private uchar *);
+short2 __ovld __purefn vload2(size_t, const __private short *);
+ushort2 __ovld __purefn vload2(size_t, const __private ushort *);
+int2 __ovld __purefn vload2(size_t, const __private int *);
+uint2 __ovld __purefn vload2(size_t, const __private uint *);
+long2 __ovld __purefn vload2(size_t, const __private long *);
+ulong2 __ovld __purefn vload2(size_t, const __private ulong *);
+float2 __ovld __purefn vload2(size_t, const __private float *);
+char3 __ovld __purefn vload3(size_t, const __private char *);
+uchar3 __ovld __purefn vload3(size_t, const __private uchar *);
+short3 __ovld __purefn vload3(size_t, const __private short *);
+ushort3 __ovld __purefn vload3(size_t, const __private ushort *);
+int3 __ovld __purefn vload3(size_t, const __private int *);
+uint3 __ovld __purefn vload3(size_t, const __private uint *);
+long3 __ovld __purefn vload3(size_t, const __private long *);
+ulong3 __ovld __purefn vload3(size_t, const __private ulong *);
+float3 __ovld __purefn vload3(size_t, const __private float *);
+char4 __ovld __purefn vload4(size_t, const __private char *);
+uchar4 __ovld __purefn vload4(size_t, const __private uchar *);
+short4 __ovld __purefn vload4(size_t, const __private short *);
+ushort4 __ovld __purefn vload4(size_t, const __private ushort *);
+int4 __ovld __purefn vload4(size_t, const __private int *);
+uint4 __ovld __purefn vload4(size_t, const __private uint *);
+long4 __ovld __purefn vload4(size_t, const __private long *);
+ulong4 __ovld __purefn vload4(size_t, const __private ulong *);
+float4 __ovld __purefn vload4(size_t, const __private float *);
+char8 __ovld __purefn vload8(size_t, const __private char *);
+uchar8 __ovld __purefn vload8(size_t, const __private uchar *);
+short8 __ovld __purefn vload8(size_t, const __private short *);
+ushort8 __ovld __purefn vload8(size_t, const __private ushort *);
+int8 __ovld __purefn vload8(size_t, const __private int *);
+uint8 __ovld __purefn vload8(size_t, const __private uint *);
+long8 __ovld __purefn vload8(size_t, const __private long *);
+ulong8 __ovld __purefn vload8(size_t, const __private ulong *);
+float8 __ovld __purefn vload8(size_t, const __private float *);
+char16 __ovld __purefn vload16(size_t, const __private char *);
+uchar16 __ovld __purefn vload16(size_t, const __private uchar *);
+short16 __ovld __purefn vload16(size_t, const __private short *);
+ushort16 __ovld __purefn vload16(size_t, const __private ushort *);
+int16 __ovld __purefn vload16(size_t, const __private int *);
+uint16 __ovld __purefn vload16(size_t, const __private uint *);
+long16 __ovld __purefn vload16(size_t, const __private long *);
+ulong16 __ovld __purefn vload16(size_t, const __private ulong *);
+float16 __ovld __purefn vload16(size_t, const __private float *);
 
 #ifdef cl_khr_fp64
-double2 __ovld __purefn vload2(size_t offset, const __global double *p);
-double3 __ovld __purefn vload3(size_t offset, const __global double *p);
-double4 __ovld __purefn vload4(size_t offset, const __global double *p);
-double8 __ovld __purefn vload8(size_t offset, const __global double *p);
-double16 __ovld __purefn vload16(size_t offset, const __global double *p);
-double2 __ovld __purefn vload2(size_t offset, const __local double *p);
-double3 __ovld __purefn vload3(size_t offset, const __local double *p);
-double4 __ovld __purefn vload4(size_t offset, const __local double *p);
-double8 __ovld __purefn vload8(size_t offset, const __local double *p);
-double16 __ovld __purefn vload16(size_t offset, const __local double *p);
-double2 __ovld __purefn vload2(size_t offset, const __private double *p);
-double3 __ovld __purefn vload3(size_t offset, const __private double *p);
-double4 __ovld __purefn vload4(size_t offset, const __private double *p);
-double8 __ovld __purefn vload8(size_t offset, const __private double *p);
-double16 __ovld __purefn vload16(size_t offset, const __private double *p);
+double2 __ovld __purefn vload2(size_t, const __global double *);
+double3 __ovld __purefn vload3(size_t, const __global double *);
+double4 __ovld __purefn vload4(size_t, const __global double *);
+double8 __ovld __purefn vload8(size_t, const __global double *);
+double16 __ovld __purefn vload16(size_t, const __global double *);
+double2 __ovld __purefn vload2(size_t, const __local double *);
+double3 __ovld __purefn vload3(size_t, const __local double *);
+double4 __ovld __purefn vload4(size_t, const __local double *);
+double8 __ovld __purefn vload8(size_t, const __local double *);
+double16 __ovld __purefn vload16(size_t, const __local double *);
+double2 __ovld __purefn vload2(size_t, const __private double *);
+double3 __ovld __purefn vload3(size_t, const __private double *);
+double4 __ovld __purefn vload4(size_t, const __private double *);
+double8 __ovld __purefn vload8(size_t, const __private double *);
+double16 __ovld __purefn vload16(size_t, const __private double *);
 #endif //cl_khr_fp64
 
 #ifdef cl_khr_fp16
-half __ovld __purefn vload(size_t offset, const __global half *p);
-half2 __ovld __purefn vload2(size_t offset, const __global half *p);
-half3 __ovld __purefn vload3(size_t offset, const __global half *p);
-half4 __ovld __purefn vload4(size_t offset, const __global half *p);
-half8 __ovld __purefn vload8(size_t offset, const __global half *p);
-half16 __ovld __purefn vload16(size_t offset, const __global half *p);
-half __ovld __purefn vload(size_t offset, const __local half *p);
-half2 __ovld __purefn vload2(size_t offset, const __local half *p);
-half3 __ovld __purefn vload3(size_t offset, const __local half *p);
-half4 __ovld __purefn vload4(size_t offset, const __local half *p);
-half8 __ovld __purefn vload8(size_t offset, const __local half *p);
-half16 __ovld __purefn vload16(size_t offset, const __local half *p);
-half __ovld __purefn vload(size_t offset, const __private half *p);
-half2 __ovld __purefn vload2(size_t offset, const __private half *p);
-half3 __ovld __purefn vload3(size_t offset, const __private half *p);
-half4 __ovld __purefn vload4(size_t offset, const __private half *p);
-half8 __ovld __purefn vload8(size_t offset, const __private half *p);
-half16 __ovld __purefn vload16(size_t offset, const __private half *p);
+half __ovld __purefn vload(size_t, const __global half *);
+half2 __ovld __purefn vload2(size_t, const __global half *);
+half3 __ovld __purefn vload3(size_t, const __global half *);
+half4 __ovld __purefn vload4(size_t, const __global half *);
+half8 __ovld __purefn vload8(size_t, const __global half *);
+half16 __ovld __purefn vload16(size_t, const __global half *);
+half __ovld __purefn vload(size_t, const __local half *);
+half2 __ovld __purefn vload2(size_t, const __local half *);
+half3 __ovld __purefn vload3(size_t, const __local half *);
+half4 __ovld __purefn vload4(size_t, const __local half *);
+half8 __ovld __purefn vload8(size_t, const __local half *);
+half16 __ovld __purefn vload16(size_t, const __local half *);
+half __ovld __purefn vload(size_t, const __private half *);
+half2 __ovld __purefn vload2(size_t, const __private half *);
+half3 __ovld __purefn vload3(size_t, const __private half *);
+half4 __ovld __purefn vload4(size_t, const __private half *);
+half8 __ovld __purefn vload8(size_t, const __private half *);
+half16 __ovld __purefn vload16(size_t, const __private half *);
 #endif //cl_khr_fp16
 #endif //defined(__opencl_c_named_address_space_builtins)
 
 #if defined(__opencl_c_generic_address_space)
-void __ovld vstore2(char2 data, size_t offset, char *p);
-void __ovld vstore2(uchar2 data, size_t offset, uchar *p);
-void __ovld vstore2(short2 data, size_t offset, short *p);
-void __ovld vstore2(ushort2 data, size_t offset, ushort *p);
-void __ovld vstore2(int2 data, size_t offset, int *p);
-void __ovld vstore2(uint2 data, size_t offset, uint *p);
-void __ovld vstore2(long2 data, size_t offset, long *p);
-void __ovld vstore2(ulong2 data, size_t offset, ulong *p);
-void __ovld vstore2(float2 data, size_t offset, float *p);
-void __ovld vstore3(char3 data, size_t offset, char *p);
-void __ovld vstore3(uchar3 data, size_t offset, uchar *p);
-void __ovld vstore3(short3 data, size_t offset, short *p);
-void __ovld vstore3(ushort3 data, size_t offset, ushort *p);
-void __ovld vstore3(int3 data, size_t offset, int *p);
-void __ovld vstore3(uint3 data, size_t offset, uint *p);
-void __ovld vstore3(long3 data, size_t offset, long *p);
-void __ovld vstore3(ulong3 data, size_t offset, ulong *p);
-void __ovld vstore3(float3 data, size_t offset, float *p);
-void __ovld vstore4(char4 data, size_t offset, char *p);
-void __ovld vstore4(uchar4 data, size_t offset, uchar *p);
-void __ovld vstore4(short4 data, size_t offset, short *p);
-void __ovld vstore4(ushort4 data, size_t offset, ushort *p);
-void __ovld vstore4(int4 data, size_t offset, int *p);
-void __ovld vstore4(uint4 data, size_t offset, uint *p);
-void __ovld vstore4(long4 data, size_t offset, long *p);
-void __ovld vstore4(ulong4 data, size_t offset, ulong *p);
-void __ovld vstore4(float4 data, size_t offset, float *p);
-void __ovld vstore8(char8 data, size_t offset, char *p);
-void __ovld vstore8(uchar8 data, size_t offset, uchar *p);
-void __ovld vstore8(short8 data, size_t offset, short *p);
-void __ovld vstore8(ushort8 data, size_t offset, ushort *p);
-void __ovld vstore8(int8 data, size_t offset, int *p);
-void __ovld vstore8(uint8 data, size_t offset, uint *p);
-void __ovld vstore8(long8 data, size_t offset, long *p);
-void __ovld vstore8(ulong8 data, size_t offset, ulong *p);
-void __ovld vstore8(float8 data, size_t offset, float *p);
-void __ovld vstore16(char16 data, size_t offset, char *p);
-void __ovld vstore16(uchar16 data, size_t offset, uchar *p);
-void __ovld vstore16(short16 data, size_t offset, short *p);
-void __ovld vstore16(ushort16 data, size_t offset, ushort *p);
-void __ovld vstore16(int16 data, size_t offset, int *p);
-void __ovld vstore16(uint16 data, size_t offset, uint *p);
-void __ovld vstore16(long16 data, size_t offset, long *p);
-void __ovld vstore16(ulong16 data, size_t offset, ulong *p);
-void __ovld vstore16(float16 data, size_t offset, float *p);
+void __ovld vstore2(char2, size_t, char *);
+void __ovld vstore2(uchar2, size_t, uchar *);
+void __ovld vstore2(short2, size_t, short *);
+void __ovld vstore2(ushort2, size_t, ushort *);
+void __ovld vstore2(int2, size_t, int *);
+void __ovld vstore2(uint2, size_t, uint *);
+void __ovld vstore2(long2, size_t, long *);
+void __ovld vstore2(ulong2, size_t, ulong *);
+void __ovld vstore2(float2, size_t, float *);
+void __ovld vstore3(char3, size_t, char *);
+void __ovld vstore3(uchar3, size_t, uchar *);
+void __ovld vstore3(short3, size_t, short *);
+void __ovld vstore3(ushort3, size_t, ushort *);
+void __ovld vstore3(int3, size_t, int *);
+void __ovld vstore3(uint3, size_t, uint *);
+void __ovld vstore3(long3, size_t, long *);
+void __ovld vstore3(ulong3, size_t, ulong *);
+void __ovld vstore3(float3, size_t, float *);
+void __ovld vstore4(char4, size_t, char *);
+void __ovld vstore4(uchar4, size_t, uchar *);
+void __ovld vstore4(short4, size_t, short *);
+void __ovld vstore4(ushort4, size_t, ushort *);
+void __ovld vstore4(int4, size_t, int *);
+void __ovld vstore4(uint4, size_t, uint *);
+void __ovld vstore4(long4, size_t, long *);
+void __ovld vstore4(ulong4, size_t, ulong *);
+void __ovld vstore4(float4, size_t, float *);
+void __ovld vstore8(char8, size_t, char *);
+void __ovld vstore8(uchar8, size_t, uchar *);
+void __ovld vstore8(short8, size_t, short *);
+void __ovld vstore8(ushort8, size_t, ushort *);
+void __ovld vstore8(int8, size_t, int *);
+void __ovld vstore8(uint8, size_t, uint *);
+void __ovld vstore8(long8, size_t, long *);
+void __ovld vstore8(ulong8, size_t, ulong *);
+void __ovld vstore8(float8, size_t, float *);
+void __ovld vstore16(char16, size_t, char *);
+void __ovld vstore16(uchar16, size_t, uchar *);
+void __ovld vstore16(short16, size_t, short *);
+void __ovld vstore16(ushort16, size_t, ushort *);
+void __ovld vstore16(int16, size_t, int *);
+void __ovld vstore16(uint16, size_t, uint *);
+void __ovld vstore16(long16, size_t, long *);
+void __ovld vstore16(ulong16, size_t, ulong *);
+void __ovld vstore16(float16, size_t, float *);
 #ifdef cl_khr_fp64
-void __ovld vstore2(double2 data, size_t offset, double *p);
-void __ovld vstore3(double3 data, size_t offset, double *p);
-void __ovld vstore4(double4 data, size_t offset, double *p);
-void __ovld vstore8(double8 data, size_t offset, double *p);
-void __ovld vstore16(double16 data, size_t offset, double *p);
+void __ovld vstore2(double2, size_t, double *);
+void __ovld vstore3(double3, size_t, double *);
+void __ovld vstore4(double4, size_t, double *);
+void __ovld vstore8(double8, size_t, double *);
+void __ovld vstore16(double16, size_t, double *);
 #endif //cl_khr_fp64
 #ifdef cl_khr_fp16
-void __ovld vstore(half data, size_t offset, half *p);
-void __ovld vstore2(half2 data, size_t offset, half *p);
-void __ovld vstore3(half3 data, size_t offset, half *p);
-void __ovld vstore4(half4 data, size_t offset, half *p);
-void __ovld vstore8(half8 data, size_t offset, half *p);
-void __ovld vstore16(half16 data, size_t offset, half *p);
+void __ovld vstore(half, size_t, half *);
+void __ovld vstore2(half2, size_t, half *);
+void __ovld vstore3(half3, size_t, half *);
+void __ovld vstore4(half4, size_t, half *);
+void __ovld vstore8(half8, size_t, half *);
+void __ovld vstore16(half16, size_t, half *);
 #endif //cl_khr_fp16
 #endif //defined(__opencl_c_generic_address_space)
 
 #if defined(__opencl_c_named_address_space_builtins)
-void __ovld vstore2(char2 data, size_t offset, __global char *p);
-void __ovld vstore2(uchar2 data, size_t offset, __global uchar *p);
-void __ovld vstore2(short2 data, size_t offset, __global short *p);
-void __ovld vstore2(ushort2 data, size_t offset, __global ushort *p);
-void __ovld vstore2(int2 data, size_t offset, __global int *p);
-void __ovld vstore2(uint2 data, size_t offset, __global uint *p);
-void __ovld vstore2(long2 data, size_t offset, __global long *p);
-void __ovld vstore2(ulong2 data, size_t offset, __global ulong *p);
-void __ovld vstore2(float2 data, size_t offset, __global float *p);
-void __ovld vstore3(char3 data, size_t offset, __global char *p);
-void __ovld vstore3(uchar3 data, size_t offset, __global uchar *p);
-void __ovld vstore3(short3 data, size_t offset, __global short *p);
-void __ovld vstore3(ushort3 data, size_t offset, __global ushort *p);
-void __ovld vstore3(int3 data, size_t offset, __global int *p);
-void __ovld vstore3(uint3 data, size_t offset, __global uint *p);
-void __ovld vstore3(long3 data, size_t offset, __global long *p);
-void __ovld vstore3(ulong3 data, size_t offset, __global ulong *p);
-void __ovld vstore3(float3 data, size_t offset, __global float *p);
-void __ovld vstore4(char4 data, size_t offset, __global char *p);
-void __ovld vstore4(uchar4 data, size_t offset, __global uchar *p);
-void __ovld vstore4(short4 data, size_t offset, __global short *p);
-void __ovld vstore4(ushort4 data, size_t offset, __global ushort *p);
-void __ovld vstore4(int4 data, size_t offset, __global int *p);
-void __ovld vstore4(uint4 data, size_t offset, __global uint *p);
-void __ovld vstore4(long4 data, size_t offset, __global long *p);
-void __ovld vstore4(ulong4 data, size_t offset, __global ulong *p);
-void __ovld vstore4(float4 data, size_t offset, __global float *p);
-void __ovld vstore8(char8 data, size_t offset, __global char *p);
-void __ovld vstore8(uchar8 data, size_t offset, __global uchar *p);
-void __ovld vstore8(short8 data, size_t offset, __global short *p);
-void __ovld vstore8(ushort8 data, size_t offset, __global ushort *p);
-void __ovld vstore8(int8 data, size_t offset, __global int *p);
-void __ovld vstore8(uint8 data, size_t offset, __global uint *p);
-void __ovld vstore8(long8 data, size_t offset, __global long *p);
-void __ovld vstore8(ulong8 data, size_t offset, __global ulong *p);
-void __ovld vstore8(float8 data, size_t offset, __global float *p);
-void __ovld vstore16(char16 data, size_t offset, __global char *p);
-void __ovld vstore16(uchar16 data, size_t offset, __global uchar *p);
-void __ovld vstore16(short16 data, size_t offset, __global short *p);
-void __ovld vstore16(ushort16 data, size_t offset, __global ushort *p);
-void __ovld vstore16(int16 data, size_t offset, __global int *p);
-void __ovld vstore16(uint16 data, size_t offset, __global uint *p);
-void __ovld vstore16(long16 data, size_t offset, __global long *p);
-void __ovld vstore16(ulong16 data, size_t offset, __global ulong *p);
-void __ovld vstore16(float16 data, size_t offset, __global float *p);
-void __ovld vstore2(char2 data, size_t offset, __local char *p);
-void __ovld vstore2(uchar2 data, size_t offset, __local uchar *p);
-void __ovld vstore2(short2 data, size_t offset, __local short *p);
-void __ovld vstore2(ushort2 data, size_t offset, __local ushort *p);
-void __ovld vstore2(int2 data, size_t offset, __local int *p);
-void __ovld vstore2(uint2 data, size_t offset, __local uint *p);
-void __ovld vstore2(long2 data, size_t offset, __local long *p);
-void __ovld vstore2(ulong2 data, size_t offset, __local ulong *p);
-void __ovld vstore2(float2 data, size_t offset, __local float *p);
-void __ovld vstore3(char3 data, size_t offset, __local char *p);
-void __ovld vstore3(uchar3 data, size_t offset, __local uchar *p);
-void __ovld vstore3(short3 data, size_t offset, __local short *p);
-void __ovld vstore3(ushort3 data, size_t offset, __local ushort *p);
-void __ovld vstore3(int3 data, size_t offset, __local int *p);
-void __ovld vstore3(uint3 data, size_t offset, __local uint *p);
-void __ovld vstore3(long3 data, size_t offset, __local long *p);
-void __ovld vstore3(ulong3 data, size_t offset, __local ulong *p);
-void __ovld vstore3(float3 data, size_t offset, __local float *p);
-void __ovld vstore4(char4 data, size_t offset, __local char *p);
-void __ovld vstore4(uchar4 data, size_t offset, __local uchar *p);
-void __ovld vstore4(short4 data, size_t offset, __local short *p);
-void __ovld vstore4(ushort4 data, size_t offset, __local ushort *p);
-void __ovld vstore4(int4 data, size_t offset, __local int *p);
-void __ovld vstore4(uint4 data, size_t offset, __local uint *p);
-void __ovld vstore4(long4 data, size_t offset, __local long *p);
-void __ovld vstore4(ulong4 data, size_t offset, __local ulong *p);
-void __ovld vstore4(float4 data, size_t offset, __local float *p);
-void __ovld vstore8(char8 data, size_t offset, __local char *p);
-void __ovld vstore8(uchar8 data, size_t offset, __local uchar *p);
-void __ovld vstore8(short8 data, size_t offset, __local short *p);
-void __ovld vstore8(ushort8 data, size_t offset, __local ushort *p);
-void __ovld vstore8(int8 data, size_t offset, __local int *p);
-void __ovld vstore8(uint8 data, size_t offset, __local uint *p);
-void __ovld vstore8(long8 data, size_t offset, __local long *p);
-void __ovld vstore8(ulong8 data, size_t offset, __local ulong *p);
-void __ovld vstore8(float8 data, size_t offset, __local float *p);
-void __ovld vstore16(char16 data, size_t offset, __local char *p);
-void __ovld vstore16(uchar16 data, size_t offset, __local uchar *p);
-void __ovld vstore16(short16 data, size_t offset, __local short *p);
-void __ovld vstore16(ushort16 data, size_t offset, __local ushort *p);
-void __ovld vstore16(int16 data, size_t offset, __local int *p);
-void __ovld vstore16(uint16 data, size_t offset, __local uint *p);
-void __ovld vstore16(long16 data, size_t offset, __local long *p);
-void __ovld vstore16(ulong16 data, size_t offset, __local ulong *p);
-void __ovld vstore16(float16 data, size_t offset, __local float *p);
-void __ovld vstore2(char2 data, size_t offset, __private char *p);
-void __ovld vstore2(uchar2 data, size_t offset, __private uchar *p);
-void __ovld vstore2(short2 data, size_t offset, __private short *p);
-void __ovld vstore2(ushort2 data, size_t offset, __private ushort *p);
-void __ovld vstore2(int2 data, size_t offset, __private int *p);
-void __ovld vstore2(uint2 data, size_t offset, __private uint *p);
-void __ovld vstore2(long2 data, size_t offset, __private long *p);
-void __ovld vstore2(ulong2 data, size_t offset, __private ulong *p);
-void __ovld vstore2(float2 data, size_t offset, __private float *p);
-void __ovld vstore3(char3 data, size_t offset, __private char *p);
-void __ovld vstore3(uchar3 data, size_t offset, __private uchar *p);
-void __ovld vstore3(short3 data, size_t offset, __private short *p);
-void __ovld vstore3(ushort3 data, size_t offset, __private ushort *p);
-void __ovld vstore3(int3 data, size_t offset, __private int *p);
-void __ovld vstore3(uint3 data, size_t offset, __private uint *p);
-void __ovld vstore3(long3 data, size_t offset, __private long *p);
-void __ovld vstore3(ulong3 data, size_t offset, __private ulong *p);
-void __ovld vstore3(float3 data, size_t offset, __private float *p);
-void __ovld vstore4(char4 data, size_t offset, __private char *p);
-void __ovld vstore4(uchar4 data, size_t offset, __private uchar *p);
-void __ovld vstore4(short4 data, size_t offset, __private short *p);
-void __ovld vstore4(ushort4 data, size_t offset, __private ushort *p);
-void __ovld vstore4(int4 data, size_t offset, __private int *p);
-void __ovld vstore4(uint4 data, size_t offset, __private uint *p);
-void __ovld vstore4(long4 data, size_t offset, __private long *p);
-void __ovld vstore4(ulong4 data, size_t offset, __private ulong *p);
-void __ovld vstore4(float4 data, size_t offset, __private float *p);
-void __ovld vstore8(char8 data, size_t offset, __private char *p);
-void __ovld vstore8(uchar8 data, size_t offset, __private uchar *p);
-void __ovld vstore8(short8 data, size_t offset, __private short *p);
-void __ovld vstore8(ushort8 data, size_t offset, __private ushort *p);
-void __ovld vstore8(int8 data, size_t offset, __private int *p);
-void __ovld vstore8(uint8 data, size_t offset, __private uint *p);
-void __ovld vstore8(long8 data, size_t offset, __private long *p);
-void __ovld vstore8(ulong8 data, size_t offset, __private ulong *p);
-void __ovld vstore8(float8 data, size_t offset, __private float *p);
-void __ovld vstore16(char16 data, size_t offset, __private char *p);
-void __ovld vstore16(uchar16 data, size_t offset, __private uchar *p);
-void __ovld vstore16(short16 data, size_t offset, __private short *p);
-void __ovld vstore16(ushort16 data, size_t offset, __private ushort *p);
-void __ovld vstore16(int16 data, size_t offset, __private int *p);
-void __ovld vstore16(uint16 data, size_t offset, __private uint *p);
-void __ovld vstore16(long16 data, size_t offset, __private long *p);
-void __ovld vstore16(ulong16 data, size_t offset, __private ulong *p);
-void __ovld vstore16(float16 data, size_t offset, __private float *p);
+void __ovld vstore2(char2, size_t, __global char *);
+void __ovld vstore2(uchar2, size_t, __global uchar *);
+void __ovld vstore2(short2, size_t, __global short *);
+void __ovld vstore2(ushort2, size_t, __global ushort *);
+void __ovld vstore2(int2, size_t, __global int *);
+void __ovld vstore2(uint2, size_t, __global uint *);
+void __ovld vstore2(long2, size_t, __global long *);
+void __ovld vstore2(ulong2, size_t, __global ulong *);
+void __ovld vstore2(float2, size_t, __global float *);
+void __ovld vstore3(char3, size_t, __global char *);
+void __ovld vstore3(uchar3, size_t, __global uchar *);
+void __ovld vstore3(short3, size_t, __global short *);
+void __ovld vstore3(ushort3, size_t, __global ushort *);
+void __ovld vstore3(int3, size_t, __global int *);
+void __ovld vstore3(uint3, size_t, __global uint *);
+void __ovld vstore3(long3, size_t, __global long *);
+void __ovld vstore3(ulong3, size_t, __global ulong *);
+void __ovld vstore3(float3, size_t, __global float *);
+void __ovld vstore4(char4, size_t, __global char *);
+void __ovld vstore4(uchar4, size_t, __global uchar *);
+void __ovld vstore4(short4, size_t, __global short *);
+void __ovld vstore4(ushort4, size_t, __global ushort *);
+void __ovld vstore4(int4, size_t, __global int *);
+void __ovld vstore4(uint4, size_t, __global uint *);
+void __ovld vstore4(long4, size_t, __global long *);
+void __ovld vstore4(ulong4, size_t, __global ulong *);
+void __ovld vstore4(float4, size_t, __global float *);
+void __ovld vstore8(char8, size_t, __global char *);
+void __ovld vstore8(uchar8, size_t, __global uchar *);
+void __ovld vstore8(short8, size_t, __global short *);
+void __ovld vstore8(ushort8, size_t, __global ushort *);
+void __ovld vstore8(int8, size_t, __global int *);
+void __ovld vstore8(uint8, size_t, __global uint *);
+void __ovld vstore8(long8, size_t, __global long *);
+void __ovld vstore8(ulong8, size_t, __global ulong *);
+void __ovld vstore8(float8, size_t, __global float *);
+void __ovld vstore16(char16, size_t, __global char *);
+void __ovld vstore16(uchar16, size_t, __global uchar *);
+void __ovld vstore16(short16, size_t, __global short *);
+void __ovld vstore16(ushort16, size_t, __global ushort *);
+void __ovld vstore16(int16, size_t, __global int *);
+void __ovld vstore16(uint16, size_t, __global uint *);
+void __ovld vstore16(long16, size_t, __global long *);
+void __ovld vstore16(ulong16, size_t, __global ulong *);
+void __ovld vstore16(float16, size_t, __global float *);
+void __ovld vstore2(char2, size_t, __local char *);
+void __ovld vstore2(uchar2, size_t, __local uchar *);
+void __ovld vstore2(short2, size_t, __local short *);
+void __ovld vstore2(ushort2, size_t, __local ushort *);
+void __ovld vstore2(int2, size_t, __local int *);
+void __ovld vstore2(uint2, size_t, __local uint *);
+void __ovld vstore2(long2, size_t, __local long *);
+void __ovld vstore2(ulong2, size_t, __local ulong *);
+void __ovld vstore2(float2, size_t, __local float *);
+void __ovld vstore3(char3, size_t, __local char *);
+void __ovld vstore3(uchar3, size_t, __local uchar *);
+void __ovld vstore3(short3, size_t, __local short *);
+void __ovld vstore3(ushort3, size_t, __local ushort *);
+void __ovld vstore3(int3, size_t, __local int *);
+void __ovld vstore3(uint3, size_t, __local uint *);
+void __ovld vstore3(long3, size_t, __local long *);
+void __ovld vstore3(ulong3, size_t, __local ulong *);
+void __ovld vstore3(float3, size_t, __local float *);
+void __ovld vstore4(char4, size_t, __local char *);
+void __ovld vstore4(uchar4, size_t, __local uchar *);
+void __ovld vstore4(short4, size_t, __local short *);
+void __ovld vstore4(ushort4, size_t, __local ushort *);
+void __ovld vstore4(int4, size_t, __local int *);
+void __ovld vstore4(uint4, size_t, __local uint *);
+void __ovld vstore4(long4, size_t, __local long *);
+void __ovld vstore4(ulong4, size_t, __local ulong *);
+void __ovld vstore4(float4, size_t, __local float *);
+void __ovld vstore8(char8, size_t, __local char *);
+void __ovld vstore8(uchar8, size_t, __local uchar *);
+void __ovld vstore8(short8, size_t, __local short *);
+void __ovld vstore8(ushort8, size_t, __local ushort *);
+void __ovld vstore8(int8, size_t, __local int *);
+void __ovld vstore8(uint8, size_t, __local uint *);
+void __ovld vstore8(long8, size_t, __local long *);
+void __ovld vstore8(ulong8, size_t, __local ulong *);
+void __ovld vstore8(float8, size_t, __local float *);
+void __ovld vstore16(char16, size_t, __local char *);
+void __ovld vstore16(uchar16, size_t, __local uchar *);
+void __ovld vstore16(short16, size_t, __local short *);
+void __ovld vstore16(ushort16, size_t, __local ushort *);
+void __ovld vstore16(int16, size_t, __local int *);
+void __ovld vstore16(uint16, size_t, __local uint *);
+void __ovld vstore16(long16, size_t, __local long *);
+void __ovld vstore16(ulong16, size_t, __local ulong *);
+void __ovld vstore16(float16, size_t, __local float *);
+void __ovld vstore2(char2, size_t, __private char *);
+void __ovld vstore2(uchar2, size_t, __private uchar *);
+void __ovld vstore2(short2, size_t, __private short *);
+void __ovld vstore2(ushort2, size_t, __private ushort *);
+void __ovld vstore2(int2, size_t, __private int *);
+void __ovld vstore2(uint2, size_t, __private uint *);
+void __ovld vstore2(long2, size_t, __private long *);
+void __ovld vstore2(ulong2, size_t, __private ulong *);
+void __ovld vstore2(float2, size_t, __private float *);
+void __ovld vstore3(char3, size_t, __private char *);
+void __ovld vstore3(uchar3, size_t, __private uchar *);
+void __ovld vstore3(short3, size_t, __private short *);
+void __ovld vstore3(ushort3, size_t, __private ushort *);
+void __ovld vstore3(int3, size_t, __private int *);
+void __ovld vstore3(uint3, size_t, __private uint *);
+void __ovld vstore3(long3, size_t, __private long *);
+void __ovld vstore3(ulong3, size_t, __private ulong *);
+void __ovld vstore3(float3, size_t, __private float *);
+void __ovld vstore4(char4, size_t, __private char *);
+void __ovld vstore4(uchar4, size_t, __private uchar *);
+void __ovld vstore4(short4, size_t, __private short *);
+void __ovld vstore4(ushort4, size_t, __private ushort *);
+void __ovld vstore4(int4, size_t, __private int *);
+void __ovld vstore4(uint4, size_t, __private uint *);
+void __ovld vstore4(long4, size_t, __private long *);
+void __ovld vstore4(ulong4, size_t, __private ulong *);
+void __ovld vstore4(float4, size_t, __private float *);
+void __ovld vstore8(char8, size_t, __private char *);
+void __ovld vstore8(uchar8, size_t, __private uchar *);
+void __ovld vstore8(short8, size_t, __private short *);
+void __ovld vstore8(ushort8, size_t, __private ushort *);
+void __ovld vstore8(int8, size_t, __private int *);
+void __ovld vstore8(uint8, size_t, __private uint *);
+void __ovld vstore8(long8, size_t, __private long *);
+void __ovld vstore8(ulong8, size_t, __private ulong *);
+void __ovld vstore8(float8, size_t, __private float *);
+void __ovld vstore16(char16, size_t, __private char *);
+void __ovld vstore16(uchar16, size_t, __private uchar *);
+void __ovld vstore16(short16, size_t, __private short *);
+void __ovld vstore16(ushort16, size_t, __private ushort *);
+void __ovld vstore16(int16, size_t, __private int *);
+void __ovld vstore16(uint16, size_t, __private uint *);
+void __ovld vstore16(long16, size_t, __private long *);
+void __ovld vstore16(ulong16, size_t, __private ulong *);
+void __ovld vstore16(float16, size_t, __private float *);
 #ifdef cl_khr_fp64
-void __ovld vstore2(double2 data, size_t offset, __global double *p);
-void __ovld vstore3(double3 data, size_t offset, __global double *p);
-void __ovld vstore4(double4 data, size_t offset, __global double *p);
-void __ovld vstore8(double8 data, size_t offset, __global double *p);
-void __ovld vstore16(double16 data, size_t offset, __global double *p);
-void __ovld vstore2(double2 data, size_t offset, __local double *p);
-void __ovld vstore3(double3 data, size_t offset, __local double *p);
-void __ovld vstore4(double4 data, size_t offset, __local double *p);
-void __ovld vstore8(double8 data, size_t offset, __local double *p);
-void __ovld vstore16(double16 data, size_t offset, __local double *p);
-void __ovld vstore2(double2 data, size_t offset, __private double *p);
-void __ovld vstore3(double3 data, size_t offset, __private double *p);
-void __ovld vstore4(double4 data, size_t offset, __private double *p);
-void __ovld vstore8(double8 data, size_t offset, __private double *p);
-void __ovld vstore16(double16 data, size_t offset, __private double *p);
+void __ovld vstore2(double2, size_t, __global double *);
+void __ovld vstore3(double3, size_t, __global double *);
+void __ovld vstore4(double4, size_t, __global double *);
+void __ovld vstore8(double8, size_t, __global double *);
+void __ovld vstore16(double16, size_t, __global double *);
+void __ovld vstore2(double2, size_t, __local double *);
+void __ovld vstore3(double3, size_t, __local double *);
+void __ovld vstore4(double4, size_t, __local double *);
+void __ovld vstore8(double8, size_t, __local double *);
+void __ovld vstore16(double16, size_t, __local double *);
+void __ovld vstore2(double2, size_t, __private double *);
+void __ovld vstore3(double3, size_t, __private double *);
+void __ovld vstore4(double4, size_t, __private double *);
+void __ovld vstore8(double8, size_t, __private double *);
+void __ovld vstore16(double16, size_t, __private double *);
 #endif //cl_khr_fp64
 #ifdef cl_khr_fp16
-void __ovld vstore(half data, size_t offset, __global half *p);
-void __ovld vstore2(half2 data, size_t offset, __global half *p);
-void __ovld vstore3(half3 data, size_t offset, __global half *p);
-void __ovld vstore4(half4 data, size_t offset, __global half *p);
-void __ovld vstore8(half8 data, size_t offset, __global half *p);
-void __ovld vstore16(half16 data, size_t offset, __global half *p);
-void __ovld vstore(half data, size_t offset, __local half *p);
-void __ovld vstore2(half2 data, size_t offset, __local half *p);
-void __ovld vstore3(half3 data, size_t offset, __local half *p);
-void __ovld vstore4(half4 data, size_t offset, __local half *p);
-void __ovld vstore8(half8 data, size_t offset, __local half *p);
-void __ovld vstore16(half16 data, size_t offset, __local half *p);
-void __ovld vstore(half data, size_t offset, __private half *p);
-void __ovld vstore2(half2 data, size_t offset, __private half *p);
-void __ovld vstore3(half3 data, size_t offset, __private half *p);
-void __ovld vstore4(half4 data, size_t offset, __private half *p);
-void __ovld vstore8(half8 data, size_t offset, __private half *p);
-void __ovld vstore16(half16 data, size_t offset, __private half *p);
+void __ovld vstore(half, size_t, __global half *);
+void __ovld vstore2(half2, size_t, __global half *);
+void __ovld vstore3(half3, size_t, __global half *);
+void __ovld vstore4(half4, size_t, __global half *);
+void __ovld vstore8(half8, size_t, __global half *);
+void __ovld vstore16(half16, size_t, __global half *);
+void __ovld vstore(half, size_t, __local half *);
+void __ovld vstore2(half2, size_t, __local half *);
+void __ovld vstore3(half3, size_t, __local half *);
+void __ovld vstore4(half4, size_t, __local half *);
+void __ovld vstore8(half8, size_t, __local half *);
+void __ovld vstore16(half16, size_t, __local half *);
+void __ovld vstore(half, size_t, __private half *);
+void __ovld vstore2(half2, size_t, __private half *);
+void __ovld vstore3(half3, size_t, __private half *);
+void __ovld vstore4(half4, size_t, __private half *);
+void __ovld vstore8(half8, size_t, __private half *);
+void __ovld vstore16(half16, size_t, __private half *);
 #endif //cl_khr_fp16
 #endif //defined(__opencl_c_named_address_space_builtins)
 
@@ -11751,15 +11751,15 @@ void __ovld vstore16(half16 data, size_t offset, __private half *p);
  * The read address computed as (p + offset)
  * must be 16-bit aligned.
  */
-float __ovld __purefn vload_half(size_t offset, const __constant half *p);
+float __ovld __purefn vload_half(size_t, const __constant half *);
 #if defined(__opencl_c_generic_address_space)
-float __ovld __purefn vload_half(size_t offset, const half *p);
+float __ovld __purefn vload_half(size_t, const half *);
 #endif //defined(__opencl_c_generic_address_space)
 
 #if defined(__opencl_c_named_address_space_builtins)
-float __ovld __purefn vload_half(size_t offset, const __global half *p);
-float __ovld __purefn vload_half(size_t offset, const __local half *p);
-float __ovld __purefn vload_half(size_t offset, const __private half *p);
+float __ovld __purefn vload_half(size_t, const __global half *);
+float __ovld __purefn vload_half(size_t, const __local half *);
+float __ovld __purefn vload_half(size_t, const __private half *);
 #endif //defined(__opencl_c_named_address_space_builtins)
 
 /**
@@ -11770,35 +11770,35 @@ float __ovld __purefn vload_half(size_t offset, const __private half *p);
  * value is returned. The read address computed
  * as (p + (offset * n)) must be 16-bit aligned.
  */
-float2 __ovld __purefn vload_half2(size_t offset, const __constant half *p);
-float3 __ovld __purefn vload_half3(size_t offset, const __constant half *p);
-float4 __ovld __purefn vload_half4(size_t offset, const __constant half *p);
-float8 __ovld __purefn vload_half8(size_t offset, const __constant half *p);
-float16 __ovld __purefn vload_half16(size_t offset, const __constant half *p);
+float2 __ovld __purefn vload_half2(size_t, const __constant half *);
+float3 __ovld __purefn vload_half3(size_t, const __constant half *);
+float4 __ovld __purefn vload_half4(size_t, const __constant half *);
+float8 __ovld __purefn vload_half8(size_t, const __constant half *);
+float16 __ovld __purefn vload_half16(size_t, const __constant half *);
 #if defined(__opencl_c_generic_address_space)
-float2 __ovld __purefn vload_half2(size_t offset, const half *p);
-float3 __ovld __purefn vload_half3(size_t offset, const half *p);
-float4 __ovld __purefn vload_half4(size_t offset, const half *p);
-float8 __ovld __purefn vload_half8(size_t offset, const half *p);
-float16 __ovld __purefn vload_half16(size_t offset, const half *p);
+float2 __ovld __purefn vload_half2(size_t, const half *);
+float3 __ovld __purefn vload_half3(size_t, const half *);
+float4 __ovld __purefn vload_half4(size_t, const half *);
+float8 __ovld __purefn vload_half8(size_t, const half *);
+float16 __ovld __purefn vload_half16(size_t, const half *);
 #endif //defined(__opencl_c_generic_address_space)
 
 #if defined(__opencl_c_named_address_space_builtins)
-float2 __ovld __purefn vload_half2(size_t offset, const __global half *p);
-float3 __ovld __purefn vload_half3(size_t offset, const __global half *p);
-float4 __ovld __purefn vload_half4(size_t offset, const __global half *p);
-float8 __ovld __purefn vload_half8(size_t offset, const __global half *p);
-float16 __ovld __purefn vload_half16(size_t offset, const __global half *p);
-float2 __ovld __purefn vload_half2(size_t offset, const __local half *p);
-float3 __ovld __purefn vload_half3(size_t offset, const __local half *p);
-float4 __ovld __purefn vload_half4(size_t offset, const __local half *p);
-float8 __ovld __purefn vload_half8(size_t offset, const __local half *p);
-float16 __ovld __purefn vload_half16(size_t offset, const __local half *p);
-float2 __ovld __purefn vload_half2(size_t offset, const __private half *p);
-float3 __ovld __purefn vload_half3(size_t offset, const __private half *p);
-float4 __ovld __purefn vload_half4(size_t offset, const __private half *p);
-float8 __ovld __purefn vload_half8(size_t offset, const __private half *p);
-float16 __ovld __purefn vload_half16(size_t offset, const __private half *p);
+float2 __ovld __purefn vload_half2(size_t, const __global half *);
+float3 __ovld __purefn vload_half3(size_t, const __global half *);
+float4 __ovld __purefn vload_half4(size_t, const __global half *);
+float8 __ovld __purefn vload_half8(size_t, const __global half *);
+float16 __ovld __purefn vload_half16(size_t, const __global half *);
+float2 __ovld __purefn vload_half2(size_t, const __local half *);
+float3 __ovld __purefn vload_half3(size_t, const __local half *);
+float4 __ovld __purefn vload_half4(size_t, const __local half *);
+float8 __ovld __purefn vload_half8(size_t, const __local half *);
+float16 __ovld __purefn vload_half16(size_t, const __local half *);
+float2 __ovld __purefn vload_half2(size_t, const __private half *);
+float3 __ovld __purefn vload_half3(size_t, const __private half *);
+float4 __ovld __purefn vload_half4(size_t, const __private half *);
+float8 __ovld __purefn vload_half8(size_t, const __private half *);
+float16 __ovld __purefn vload_half16(size_t, const __private half *);
 #endif //defined(__opencl_c_named_address_space_builtins)
 
 /**
@@ -11813,52 +11813,52 @@ float16 __ovld __purefn vload_half16(size_t offset, const __private half *p);
  * nearest even.
  */
 #if defined(__opencl_c_generic_address_space)
-void __ovld vstore_half(float data, size_t offset, half *p);
-void __ovld vstore_half_rte(float data, size_t offset, half *p);
-void __ovld vstore_half_rtz(float data, size_t offset, half *p);
-void __ovld vstore_half_rtp(float data, size_t offset, half *p);
-void __ovld vstore_half_rtn(float data, size_t offset, half *p);
+void __ovld vstore_half(float, size_t, half *);
+void __ovld vstore_half_rte(float, size_t, half *);
+void __ovld vstore_half_rtz(float, size_t, half *);
+void __ovld vstore_half_rtp(float, size_t, half *);
+void __ovld vstore_half_rtn(float, size_t, half *);
 #ifdef cl_khr_fp64
-void __ovld vstore_half(double data, size_t offset, half *p);
-void __ovld vstore_half_rte(double data, size_t offset, half *p);
-void __ovld vstore_half_rtz(double data, size_t offset, half *p);
-void __ovld vstore_half_rtp(double data, size_t offset, half *p);
-void __ovld vstore_half_rtn(double data, size_t offset, half *p);
+void __ovld vstore_half(double, size_t, half *);
+void __ovld vstore_half_rte(double, size_t, half *);
+void __ovld vstore_half_rtz(double, size_t, half *);
+void __ovld vstore_half_rtp(double, size_t, half *);
+void __ovld vstore_half_rtn(double, size_t, half *);
 #endif //cl_khr_fp64
 #endif //defined(__opencl_c_generic_address_space)
 
 #if defined(__opencl_c_named_address_space_builtins)
-void __ovld vstore_half(float data, size_t offset, __global half *p);
-void __ovld vstore_half_rte(float data, size_t offset, __global half *p);
-void __ovld vstore_half_rtz(float data, size_t offset, __global half *p);
-void __ovld vstore_half_rtp(float data, size_t offset, __global half *p);
-void __ovld vstore_half_rtn(float data, size_t offset, __global half *p);
-void __ovld vstore_half(float data, size_t offset, __local half *p);
-void __ovld vstore_half_rte(float data, size_t offset, __local half *p);
-void __ovld vstore_half_rtz(float data, size_t offset, __local half *p);
-void __ovld vstore_half_rtp(float data, size_t offset, __local half *p);
-void __ovld vstore_half_rtn(float data, size_t offset, __local half *p);
-void __ovld vstore_half(float data, size_t offset, __private half *p);
-void __ovld vstore_half_rte(float data, size_t offset, __private half *p);
-void __ovld vstore_half_rtz(float data, size_t offset, __private half *p);
-void __ovld vstore_half_rtp(float data, size_t offset, __private half *p);
-void __ovld vstore_half_rtn(float data, size_t offset, __private half *p);
+void __ovld vstore_half(float, size_t, __global half *);
+void __ovld vstore_half_rte(float, size_t, __global half *);
+void __ovld vstore_half_rtz(float, size_t, __global half *);
+void __ovld vstore_half_rtp(float, size_t, __global half *);
+void __ovld vstore_half_rtn(float, size_t, __global half *);
+void __ovld vstore_half(float, size_t, __local half *);
+void __ovld vstore_half_rte(float, size_t, __local half *);
+void __ovld vstore_half_rtz(float, size_t, __local half *);
+void __ovld vstore_half_rtp(float, size_t, __local half *);
+void __ovld vstore_half_rtn(float, size_t, __local half *);
+void __ovld vstore_half(float, size_t, __private half *);
+void __ovld vstore_half_rte(float, size_t, __private half *);
+void __ovld vstore_half_rtz(float, size_t, __private half *);
+void __ovld vstore_half_rtp(float, size_t, __private half *);
+void __ovld vstore_half_rtn(float, size_t, __private half *);
 #ifdef cl_khr_fp64
-void __ovld vstore_half(double data, size_t offset, __global half *p);
-void __ovld vstore_half_rte(double data, size_t offset, __global half *p);
-void __ovld vstore_half_rtz(double data, size_t offset, __global half *p);
-void __ovld vstore_half_rtp(double data, size_t offset, __global half *p);
-void __ovld vstore_half_rtn(double data, size_t offset, __global half *p);
-void __ovld vstore_half(double data, size_t offset, __local half *p);
-void __ovld vstore_half_rte(double data, size_t offset, __local half *p);
-void __ovld vstore_half_rtz(double data, size_t offset, __local half *p);
-void __ovld vstore_half_rtp(double data, size_t offset, __local half *p);
-void __ovld vstore_half_rtn(double data, size_t offset, __local half *p);
-void __ovld vstore_half(double data, size_t offset, __private half *p);
-void __ovld vstore_half_rte(double data, size_t offset, __private half *p);
-void __ovld vstore_half_rtz(double data, size_t offset, __private half *p);
-void __ovld vstore_half_rtp(double data, size_t offset, __private half *p);
-void __ovld vstore_half_rtn(double data, size_t offset, __private half *p);
+void __ovld vstore_half(double, size_t, __global half *);
+void __ovld vstore_half_rte(double, size_t, __global half *);
+void __ovld vstore_half_rtz(double, size_t, __global half *);
+void __ovld vstore_half_rtp(double, size_t, __global half *);
+void __ovld vstore_half_rtn(double, size_t, __global half *);
+void __ovld vstore_half(double, size_t, __local half *);
+void __ovld vstore_half_rte(double, size_t, __local half *);
+void __ovld vstore_half_rtz(double, size_t, __local half *);
+void __ovld vstore_half_rtp(double, size_t, __local half *);
+void __ovld vstore_half_rtn(double, size_t, __local half *);
+void __ovld vstore_half(double, size_t, __private half *);
+void __ovld vstore_half_rte(double, size_t, __private half *);
+void __ovld vstore_half_rtz(double, size_t, __private half *);
+void __ovld vstore_half_rtp(double, size_t, __private half *);
+void __ovld vstore_half_rtn(double, size_t, __private half *);
 #endif //cl_khr_fp64
 #endif //defined(__opencl_c_named_address_space_builtins)
 
@@ -11874,212 +11874,212 @@ void __ovld vstore_half_rtn(double data, size_t offset, __private half *p);
  * nearest even.
  */
 #if defined(__opencl_c_generic_address_space)
-void __ovld vstore_half2(float2 data, size_t offset, half *p);
-void __ovld vstore_half3(float3 data, size_t offset, half *p);
-void __ovld vstore_half4(float4 data, size_t offset, half *p);
-void __ovld vstore_half8(float8 data, size_t offset, half *p);
-void __ovld vstore_half16(float16 data, size_t offset, half *p);
-void __ovld vstore_half2_rte(float2 data, size_t offset, half *p);
-void __ovld vstore_half3_rte(float3 data, size_t offset, half *p);
-void __ovld vstore_half4_rte(float4 data, size_t offset, half *p);
-void __ovld vstore_half8_rte(float8 data, size_t offset, half *p);
-void __ovld vstore_half16_rte(float16 data, size_t offset, half *p);
-void __ovld vstore_half2_rtz(float2 data, size_t offset, half *p);
-void __ovld vstore_half3_rtz(float3 data, size_t offset, half *p);
-void __ovld vstore_half4_rtz(float4 data, size_t offset, half *p);
-void __ovld vstore_half8_rtz(float8 data, size_t offset, half *p);
-void __ovld vstore_half16_rtz(float16 data, size_t offset, half *p);
-void __ovld vstore_half2_rtp(float2 data, size_t offset, half *p);
-void __ovld vstore_half3_rtp(float3 data, size_t offset, half *p);
-void __ovld vstore_half4_rtp(float4 data, size_t offset, half *p);
-void __ovld vstore_half8_rtp(float8 data, size_t offset, half *p);
-void __ovld vstore_half16_rtp(float16 data, size_t offset, half *p);
-void __ovld vstore_half2_rtn(float2 data, size_t offset, half *p);
-void __ovld vstore_half3_rtn(float3 data, size_t offset, half *p);
-void __ovld vstore_half4_rtn(float4 data, size_t offset, half *p);
-void __ovld vstore_half8_rtn(float8 data, size_t offset, half *p);
-void __ovld vstore_half16_rtn(float16 data, size_t offset, half *p);
+void __ovld vstore_half2(float2, size_t, half *);
+void __ovld vstore_half3(float3, size_t, half *);
+void __ovld vstore_half4(float4, size_t, half *);
+void __ovld vstore_half8(float8, size_t, half *);
+void __ovld vstore_half16(float16, size_t, half *);
+void __ovld vstore_half2_rte(float2, size_t, half *);
+void __ovld vstore_half3_rte(float3, size_t, half *);
+void __ovld vstore_half4_rte(float4, size_t, half *);
+void __ovld vstore_half8_rte(float8, size_t, half *);
+void __ovld vstore_half16_rte(float16, size_t, half *);
+void __ovld vstore_half2_rtz(float2, size_t, half *);
+void __ovld vstore_half3_rtz(float3, size_t, half *);
+void __ovld vstore_half4_rtz(float4, size_t, half *);
+void __ovld vstore_half8_rtz(float8, size_t, half *);
+void __ovld vstore_half16_rtz(float16, size_t, half *);
+void __ovld vstore_half2_rtp(float2, size_t, half *);
+void __ovld vstore_half3_rtp(float3, size_t, half *);
+void __ovld vstore_half4_rtp(float4, size_t, half *);
+void __ovld vstore_half8_rtp(float8, size_t, half *);
+void __ovld vstore_half16_rtp(float16, size_t, half *);
+void __ovld vstore_half2_rtn(float2, size_t, half *);
+void __ovld vstore_half3_rtn(float3, size_t, half *);
+void __ovld vstore_half4_rtn(float4, size_t, half *);
+void __ovld vstore_half8_rtn(float8, size_t, half *);
+void __ovld vstore_half16_rtn(float16, size_t, half *);
 #ifdef cl_khr_fp64
-void __ovld vstore_half2(double2 data, size_t offset, half *p);
-void __ovld vstore_half3(double3 data, size_t offset, half *p);
-void __ovld vstore_half4(double4 data, size_t offset, half *p);
-void __ovld vstore_half8(double8 data, size_t offset, half *p);
-void __ovld vstore_half16(double16 data, size_t offset, half *p);
-void __ovld vstore_half2_rte(double2 data, size_t offset, half *p);
-void __ovld vstore_half3_rte(double3 data, size_t offset, half *p);
-void __ovld vstore_half4_rte(double4 data, size_t offset, half *p);
-void __ovld vstore_half8_rte(double8 data, size_t offset, half *p);
-void __ovld vstore_half16_rte(double16 data, size_t offset, half *p);
-void __ovld vstore_half2_rtz(double2 data, size_t offset, half *p);
-void __ovld vstore_half3_rtz(double3 data, size_t offset, half *p);
-void __ovld vstore_half4_rtz(double4 data, size_t offset, half *p);
-void __ovld vstore_half8_rtz(double8 data, size_t offset, half *p);
-void __ovld vstore_half16_rtz(double16 data, size_t offset, half *p);
-void __ovld vstore_half2_rtp(double2 data, size_t offset, half *p);
-void __ovld vstore_half3_rtp(double3 data, size_t offset, half *p);
-void __ovld vstore_half4_rtp(double4 data, size_t offset, half *p);
-void __ovld vstore_half8_rtp(double8 data, size_t offset, half *p);
-void __ovld vstore_half16_rtp(double16 data, size_t offset, half *p);
-void __ovld vstore_half2_rtn(double2 data, size_t offset, half *p);
-void __ovld vstore_half3_rtn(double3 data, size_t offset, half *p);
-void __ovld vstore_half4_rtn(double4 data, size_t offset, half *p);
-void __ovld vstore_half8_rtn(double8 data, size_t offset, half *p);
-void __ovld vstore_half16_rtn(double16 data, size_t offset, half *p);
+void __ovld vstore_half2(double2, size_t, half *);
+void __ovld vstore_half3(double3, size_t, half *);
+void __ovld vstore_half4(double4, size_t, half *);
+void __ovld vstore_half8(double8, size_t, half *);
+void __ovld vstore_half16(double16, size_t, half *);
+void __ovld vstore_half2_rte(double2, size_t, half *);
+void __ovld vstore_half3_rte(double3, size_t, half *);
+void __ovld vstore_half4_rte(double4, size_t, half *);
+void __ovld vstore_half8_rte(double8, size_t, half *);
+void __ovld vstore_half16_rte(double16, size_t, half *);
+void __ovld vstore_half2_rtz(double2, size_t, half *);
+void __ovld vstore_half3_rtz(double3, size_t, half *);
+void __ovld vstore_half4_rtz(double4, size_t, half *);
+void __ovld vstore_half8_rtz(double8, size_t, half *);
+void __ovld vstore_half16_rtz(double16, size_t, half *);
+void __ovld vstore_half2_rtp(double2, size_t, half *);
+void __ovld vstore_half3_rtp(double3, size_t, half *);
+void __ovld vstore_half4_rtp(double4, size_t, half *);
+void __ovld vstore_half8_rtp(double8, size_t, half *);
+void __ovld vstore_half16_rtp(double16, size_t, half *);
+void __ovld vstore_half2_rtn(double2, size_t, half *);
+void __ovld vstore_half3_rtn(double3, size_t, half *);
+void __ovld vstore_half4_rtn(double4, size_t, half *);
+void __ovld vstore_half8_rtn(double8, size_t, half *);
+void __ovld vstore_half16_rtn(double16, size_t, half *);
 #endif //cl_khr_fp64
 #endif //defined(__opencl_c_generic_address_space)
 
 #if defined(__opencl_c_named_address_space_builtins)
-void __ovld vstore_half2(float2 data, size_t offset, __global half *p);
-void __ovld vstore_half3(float3 data, size_t offset, __global half *p);
-void __ovld vstore_half4(float4 data, size_t offset, __global half *p);
-void __ovld vstore_half8(float8 data, size_t offset, __global half *p);
-void __ovld vstore_half16(float16 data, size_t offset, __global half *p);
-void __ovld vstore_half2_rte(float2 data, size_t offset, __global half *p);
-void __ovld vstore_half3_rte(float3 data, size_t offset, __global half *p);
-void __ovld vstore_half4_rte(float4 data, size_t offset, __global half *p);
-void __ovld vstore_half8_rte(float8 data, size_t offset, __global half *p);
-void __ovld vstore_half16_rte(float16 data, size_t offset, __global half *p);
-void __ovld vstore_half2_rtz(float2 data, size_t offset, __global half *p);
-void __ovld vstore_half3_rtz(float3 data, size_t offset, __global half *p);
-void __ovld vstore_half4_rtz(float4 data, size_t offset, __global half *p);
-void __ovld vstore_half8_rtz(float8 data, size_t offset, __global half *p);
-void __ovld vstore_half16_rtz(float16 data, size_t offset, __global half *p);
-void __ovld vstore_half2_rtp(float2 data, size_t offset, __global half *p);
-void __ovld vstore_half3_rtp(float3 data, size_t offset, __global half *p);
-void __ovld vstore_half4_rtp(float4 data, size_t offset, __global half *p);
-void __ovld vstore_half8_rtp(float8 data, size_t offset, __global half *p);
-void __ovld vstore_half16_rtp(float16 data, size_t offset, __global half *p);
-void __ovld vstore_half2_rtn(float2 data, size_t offset, __global half *p);
-void __ovld vstore_half3_rtn(float3 data, size_t offset, __global half *p);
-void __ovld vstore_half4_rtn(float4 data, size_t offset, __global half *p);
-void __ovld vstore_half8_rtn(float8 data, size_t offset, __global half *p);
-void __ovld vstore_half16_rtn(float16 data, size_t offset, __global half *p);
-void __ovld vstore_half2(float2 data, size_t offset, __local half *p);
-void __ovld vstore_half3(float3 data, size_t offset, __local half *p);
-void __ovld vstore_half4(float4 data, size_t offset, __local half *p);
-void __ovld vstore_half8(float8 data, size_t offset, __local half *p);
-void __ovld vstore_half16(float16 data, size_t offset, __local half *p);
-void __ovld vstore_half2_rte(float2 data, size_t offset, __local half *p);
-void __ovld vstore_half3_rte(float3 data, size_t offset, __local half *p);
-void __ovld vstore_half4_rte(float4 data, size_t offset, __local half *p);
-void __ovld vstore_half8_rte(float8 data, size_t offset, __local half *p);
-void __ovld vstore_half16_rte(float16 data, size_t offset, __local half *p);
-void __ovld vstore_half2_rtz(float2 data, size_t offset, __local half *p);
-void __ovld vstore_half3_rtz(float3 data, size_t offset, __local half *p);
-void __ovld vstore_half4_rtz(float4 data, size_t offset, __local half *p);
-void __ovld vstore_half8_rtz(float8 data, size_t offset, __local half *p);
-void __ovld vstore_half16_rtz(float16 data, size_t offset, __local half *p);
-void __ovld vstore_half2_rtp(float2 data, size_t offset, __local half *p);
-void __ovld vstore_half3_rtp(float3 data, size_t offset, __local half *p);
-void __ovld vstore_half4_rtp(float4 data, size_t offset, __local half *p);
-void __ovld vstore_half8_rtp(float8 data, size_t offset, __local half *p);
-void __ovld vstore_half16_rtp(float16 data, size_t offset, __local half *p);
-void __ovld vstore_half2_rtn(float2 data, size_t offset, __local half *p);
-void __ovld vstore_half3_rtn(float3 data, size_t offset, __local half *p);
-void __ovld vstore_half4_rtn(float4 data, size_t offset, __local half *p);
-void __ovld vstore_half8_rtn(float8 data, size_t offset, __local half *p);
-void __ovld vstore_half16_rtn(float16 data, size_t offset, __local half *p);
-void __ovld vstore_half2(float2 data, size_t offset, __private half *p);
-void __ovld vstore_half3(float3 data, size_t offset, __private half *p);
-void __ovld vstore_half4(float4 data, size_t offset, __private half *p);
-void __ovld vstore_half8(float8 data, size_t offset, __private half *p);
-void __ovld vstore_half16(float16 data, size_t offset, __private half *p);
-void __ovld vstore_half2_rte(float2 data, size_t offset, __private half *p);
-void __ovld vstore_half3_rte(float3 data, size_t offset, __private half *p);
-void __ovld vstore_half4_rte(float4 data, size_t offset, __private half *p);
-void __ovld vstore_half8_rte(float8 data, size_t offset, __private half *p);
-void __ovld vstore_half16_rte(float16 data, size_t offset, __private half *p);
-void __ovld vstore_half2_rtz(float2 data, size_t offset, __private half *p);
-void __ovld vstore_half3_rtz(float3 data, size_t offset, __private half *p);
-void __ovld vstore_half4_rtz(float4 data, size_t offset, __private half *p);
-void __ovld vstore_half8_rtz(float8 data, size_t offset, __private half *p);
-void __ovld vstore_half16_rtz(float16 data, size_t offset, __private half *p);
-void __ovld vstore_half2_rtp(float2 data, size_t offset, __private half *p);
-void __ovld vstore_half3_rtp(float3 data, size_t offset, __private half *p);
-void __ovld vstore_half4_rtp(float4 data, size_t offset, __private half *p);
-void __ovld vstore_half8_rtp(float8 data, size_t offset, __private half *p);
-void __ovld vstore_half16_rtp(float16 data, size_t offset, __private half *p);
-void __ovld vstore_half2_rtn(float2 data, size_t offset, __private half *p);
-void __ovld vstore_half3_rtn(float3 data, size_t offset, __private half *p);
-void __ovld vstore_half4_rtn(float4 data, size_t offset, __private half *p);
-void __ovld vstore_half8_rtn(float8 data, size_t offset, __private half *p);
-void __ovld vstore_half16_rtn(float16 data, size_t offset, __private half *p);
+void __ovld vstore_half2(float2, size_t, __global half *);
+void __ovld vstore_half3(float3, size_t, __global half *);
+void __ovld vstore_half4(float4, size_t, __global half *);
+void __ovld vstore_half8(float8, size_t, __global half *);
+void __ovld vstore_half16(float16, size_t, __global half *);
+void __ovld vstore_half2_rte(float2, size_t, __global half *);
+void __ovld vstore_half3_rte(float3, size_t, __global half *);
+void __ovld vstore_half4_rte(float4, size_t, __global half *);
+void __ovld vstore_half8_rte(float8, size_t, __global half *);
+void __ovld vstore_half16_rte(float16, size_t, __global half *);
+void __ovld vstore_half2_rtz(float2, size_t, __global half *);
+void __ovld vstore_half3_rtz(float3, size_t, __global half *);
+void __ovld vstore_half4_rtz(float4, size_t, __global half *);
+void __ovld vstore_half8_rtz(float8, size_t, __global half *);
+void __ovld vstore_half16_rtz(float16, size_t, __global half *);
+void __ovld vstore_half2_rtp(float2, size_t, __global half *);
+void __ovld vstore_half3_rtp(float3, size_t, __global half *);
+void __ovld vstore_half4_rtp(float4, size_t, __global half *);
+void __ovld vstore_half8_rtp(float8, size_t, __global half *);
+void __ovld vstore_half16_rtp(float16, size_t, __global half *);
+void __ovld vstore_half2_rtn(float2, size_t, __global half *);
+void __ovld vstore_half3_rtn(float3, size_t, __global half *);
+void __ovld vstore_half4_rtn(float4, size_t, __global half *);
+void __ovld vstore_half8_rtn(float8, size_t, __global half *);
+void __ovld vstore_half16_rtn(float16, size_t, __global half *);
+void __ovld vstore_half2(float2, size_t, __local half *);
+void __ovld vstore_half3(float3, size_t, __local half *);
+void __ovld vstore_half4(float4, size_t, __local half *);
+void __ovld vstore_half8(float8, size_t, __local half *);
+void __ovld vstore_half16(float16, size_t, __local half *);
+void __ovld vstore_half2_rte(float2, size_t, __local half *);
+void __ovld vstore_half3_rte(float3, size_t, __local half *);
+void __ovld vstore_half4_rte(float4, size_t, __local half *);
+void __ovld vstore_half8_rte(float8, size_t, __local half *);
+void __ovld vstore_half16_rte(float16, size_t, __local half *);
+void __ovld vstore_half2_rtz(float2, size_t, __local half *);
+void __ovld vstore_half3_rtz(float3, size_t, __local half *);
+void __ovld vstore_half4_rtz(float4, size_t, __local half *);
+void __ovld vstore_half8_rtz(float8, size_t, __local half *);
+void __ovld vstore_half16_rtz(float16, size_t, __local half *);
+void __ovld vstore_half2_rtp(float2, size_t, __local half *);
+void __ovld vstore_half3_rtp(float3, size_t, __local half *);
+void __ovld vstore_half4_rtp(float4, size_t, __local half *);
+void __ovld vstore_half8_rtp(float8, size_t, __local half *);
+void __ovld vstore_half16_rtp(float16, size_t, __local half *);
+void __ovld vstore_half2_rtn(float2, size_t, __local half *);
+void __ovld vstore_half3_rtn(float3, size_t, __local half *);
+void __ovld vstore_half4_rtn(float4, size_t, __local half *);
+void __ovld vstore_half8_rtn(float8, size_t, __local half *);
+void __ovld vstore_half16_rtn(float16, size_t, __local half *);
+void __ovld vstore_half2(float2, size_t, __private half *);
+void __ovld vstore_half3(float3, size_t, __private half *);
+void __ovld vstore_half4(float4, size_t, __private half *);
+void __ovld vstore_half8(float8, size_t, __private half *);
+void __ovld vstore_half16(float16, size_t, __private half *);
+void __ovld vstore_half2_rte(float2, size_t, __private half *);
+void __ovld vstore_half3_rte(float3, size_t, __private half *);
+void __ovld vstore_half4_rte(float4, size_t, __private half *);
+void __ovld vstore_half8_rte(float8, size_t, __private half *);
+void __ovld vstore_half16_rte(float16, size_t, __private half *);
+void __ovld vstore_half2_rtz(float2, size_t, __private half *);
+void __ovld vstore_half3_rtz(float3, size_t, __private half *);
+void __ovld vstore_half4_rtz(float4, size_t, __private half *);
+void __ovld vstore_half8_rtz(float8, size_t, __private half *);
+void __ovld vstore_half16_rtz(float16, size_t, __private half *);
+void __ovld vstore_half2_rtp(float2, size_t, __private half *);
+void __ovld vstore_half3_rtp(float3, size_t, __private half *);
+void __ovld vstore_half4_rtp(float4, size_t, __private half *);
+void __ovld vstore_half8_rtp(float8, size_t, __private half *);
+void __ovld vstore_half16_rtp(float16, size_t, __private half *);
+void __ovld vstore_half2_rtn(float2, size_t, __private half *);
+void __ovld vstore_half3_rtn(float3, size_t, __private half *);
+void __ovld vstore_half4_rtn(float4, size_t, __private half *);
+void __ovld vstore_half8_rtn(float8, size_t, __private half *);
+void __ovld vstore_half16_rtn(float16, size_t, __private half *);
 #ifdef cl_khr_fp64
-void __ovld vstore_half2(double2 data, size_t offset, __global half *p);
-void __ovld vstore_half3(double3 data, size_t offset, __global half *p);
-void __ovld vstore_half4(double4 data, size_t offset, __global half *p);
-void __ovld vstore_half8(double8 data, size_t offset, __global half *p);
-void __ovld vstore_half16(double16 data, size_t offset, __global half *p);
-void __ovld vstore_half2_rte(double2 data, size_t offset, __global half *p);
-void __ovld vstore_half3_rte(double3 data, size_t offset, __global half *p);
-void __ovld vstore_half4_rte(double4 data, size_t offset, __global half *p);
-void __ovld vstore_half8_rte(double8 data, size_t offset, __global half *p);
-void __ovld vstore_half16_rte(double16 data, size_t offset, __global half *p);
-void __ovld vstore_half2_rtz(double2 data, size_t offset, __global half *p);
-void __ovld vstore_half3_rtz(double3 data, size_t offset, __global half *p);
-void __ovld vstore_half4_rtz(double4 data, size_t offset, __global half *p);
-void __ovld vstore_half8_rtz(double8 data, size_t offset, __global half *p);
-void __ovld vstore_half16_rtz(double16 data, size_t offset, __global half *p);
-void __ovld vstore_half2_rtp(double2 data, size_t offset, __global half *p);
-void __ovld vstore_half3_rtp(double3 data, size_t offset, __global half *p);
-void __ovld vstore_half4_rtp(double4 data, size_t offset, __global half *p);
-void __ovld vstore_half8_rtp(double8 data, size_t offset, __global half *p);
-void __ovld vstore_half16_rtp(double16 data, size_t offset, __global half *p);
-void __ovld vstore_half2_rtn(double2 data, size_t offset, __global half *p);
-void __ovld vstore_half3_rtn(double3 data, size_t offset, __global half *p);
-void __ovld vstore_half4_rtn(double4 data, size_t offset, __global half *p);
-void __ovld vstore_half8_rtn(double8 data, size_t offset, __global half *p);
-void __ovld vstore_half16_rtn(double16 data, size_t offset, __global half *p);
-void __ovld vstore_half2(double2 data, size_t offset, __local half *p);
-void __ovld vstore_half3(double3 data, size_t offset, __local half *p);
-void __ovld vstore_half4(double4 data, size_t offset, __local half *p);
-void __ovld vstore_half8(double8 data, size_t offset, __local half *p);
-void __ovld vstore_half16(double16 data, size_t offset, __local half *p);
-void __ovld vstore_half2_rte(double2 data, size_t offset, __local half *p);
-void __ovld vstore_half3_rte(double3 data, size_t offset, __local half *p);
-void __ovld vstore_half4_rte(double4 data, size_t offset, __local half *p);
-void __ovld vstore_half8_rte(double8 data, size_t offset, __local half *p);
-void __ovld vstore_half16_rte(double16 data, size_t offset, __local half *p);
-void __ovld vstore_half2_rtz(double2 data, size_t offset, __local half *p);
-void __ovld vstore_half3_rtz(double3 data, size_t offset, __local half *p);
-void __ovld vstore_half4_rtz(double4 data, size_t offset, __local half *p);
-void __ovld vstore_half8_rtz(double8 data, size_t offset, __local half *p);
-void __ovld vstore_half16_rtz(double16 data, size_t offset, __local half *p);
-void __ovld vstore_half2_rtp(double2 data, size_t offset, __local half *p);
-void __ovld vstore_half3_rtp(double3 data, size_t offset, __local half *p);
-void __ovld vstore_half4_rtp(double4 data, size_t offset, __local half *p);
-void __ovld vstore_half8_rtp(double8 data, size_t offset, __local half *p);
-void __ovld vstore_half16_rtp(double16 data, size_t offset, __local half *p);
-void __ovld vstore_half2_rtn(double2 data, size_t offset, __local half *p);
-void __ovld vstore_half3_rtn(double3 data, size_t offset, __local half *p);
-void __ovld vstore_half4_rtn(double4 data, size_t offset, __local half *p);
-void __ovld vstore_half8_rtn(double8 data, size_t offset, __local half *p);
-void __ovld vstore_half16_rtn(double16 data, size_t offset, __local half *p);
-void __ovld vstore_half2(double2 data, size_t offset, __private half *p);
-void __ovld vstore_half3(double3 data, size_t offset, __private half *p);
-void __ovld vstore_half4(double4 data, size_t offset, __private half *p);
-void __ovld vstore_half8(double8 data, size_t offset, __private half *p);
-void __ovld vstore_half16(double16 data, size_t offset, __private half *p);
-void __ovld vstore_half2_rte(double2 data, size_t offset, __private half *p);
-void __ovld vstore_half3_rte(double3 data, size_t offset, __private half *p);
-void __ovld vstore_half4_rte(double4 data, size_t offset, __private half *p);
-void __ovld vstore_half8_rte(double8 data, size_t offset, __private half *p);
-void __ovld vstore_half16_rte(double16 data, size_t offset, __private half *p);
-void __ovld vstore_half2_rtz(double2 data, size_t offset, __private half *p);
-void __ovld vstore_half3_rtz(double3 data, size_t offset, __private half *p);
-void __ovld vstore_half4_rtz(double4 data, size_t offset, __private half *p);
-void __ovld vstore_half8_rtz(double8 data, size_t offset, __private half *p);
-void __ovld vstore_half16_rtz(double16 data, size_t offset, __private half *p);
-void __ovld vstore_half2_rtp(double2 data, size_t offset, __private half *p);
-void __ovld vstore_half3_rtp(double3 data, size_t offset, __private half *p);
-void __ovld vstore_half4_rtp(double4 data, size_t offset, __private half *p);
-void __ovld vstore_half8_rtp(double8 data, size_t offset, __private half *p);
-void __ovld vstore_half16_rtp(double16 data, size_t offset, __private half *p);
-void __ovld vstore_half2_rtn(double2 data, size_t offset, __private half *p);
-void __ovld vstore_half3_rtn(double3 data, size_t offset, __private half *p);
-void __ovld vstore_half4_rtn(double4 data, size_t offset, __private half *p);
-void __ovld vstore_half8_rtn(double8 data, size_t offset, __private half *p);
-void __ovld vstore_half16_rtn(double16 data, size_t offset, __private half *p);
+void __ovld vstore_half2(double2, size_t, __global half *);
+void __ovld vstore_half3(double3, size_t, __global half *);
+void __ovld vstore_half4(double4, size_t, __global half *);
+void __ovld vstore_half8(double8, size_t, __global half *);
+void __ovld vstore_half16(double16, size_t, __global half *);
+void __ovld vstore_half2_rte(double2, size_t, __global half *);
+void __ovld vstore_half3_rte(double3, size_t, __global half *);
+void __ovld vstore_half4_rte(double4, size_t, __global half *);
+void __ovld vstore_half8_rte(double8, size_t, __global half *);
+void __ovld vstore_half16_rte(double16, size_t, __global half *);
+void __ovld vstore_half2_rtz(double2, size_t, __global half *);
+void __ovld vstore_half3_rtz(double3, size_t, __global half *);
+void __ovld vstore_half4_rtz(double4, size_t, __global half *);
+void __ovld vstore_half8_rtz(double8, size_t, __global half *);
+void __ovld vstore_half16_rtz(double16, size_t, __global half *);
+void __ovld vstore_half2_rtp(double2, size_t, __global half *);
+void __ovld vstore_half3_rtp(double3, size_t, __global half *);
+void __ovld vstore_half4_rtp(double4, size_t, __global half *);
+void __ovld vstore_half8_rtp(double8, size_t, __global half *);
+void __ovld vstore_half16_rtp(double16, size_t, __global half *);
+void __ovld vstore_half2_rtn(double2, size_t, __global half *);
+void __ovld vstore_half3_rtn(double3, size_t, __global half *);
+void __ovld vstore_half4_rtn(double4, size_t, __global half *);
+void __ovld vstore_half8_rtn(double8, size_t, __global half *);
+void __ovld vstore_half16_rtn(double16, size_t, __global half *);
+void __ovld vstore_half2(double2, size_t, __local half *);
+void __ovld vstore_half3(double3, size_t, __local half *);
+void __ovld vstore_half4(double4, size_t, __local half *);
+void __ovld vstore_half8(double8, size_t, __local half *);
+void __ovld vstore_half16(double16, size_t, __local half *);
+void __ovld vstore_half2_rte(double2, size_t, __local half *);
+void __ovld vstore_half3_rte(double3, size_t, __local half *);
+void __ovld vstore_half4_rte(double4, size_t, __local half *);
+void __ovld vstore_half8_rte(double8, size_t, __local half *);
+void __ovld vstore_half16_rte(double16, size_t, __local half *);
+void __ovld vstore_half2_rtz(double2, size_t, __local half *);
+void __ovld vstore_half3_rtz(double3, size_t, __local half *);
+void __ovld vstore_half4_rtz(double4, size_t, __local half *);
+void __ovld vstore_half8_rtz(double8, size_t, __local half *);
+void __ovld vstore_half16_rtz(double16, size_t, __local half *);
+void __ovld vstore_half2_rtp(double2, size_t, __local half *);
+void __ovld vstore_half3_rtp(double3, size_t, __local half *);
+void __ovld vstore_half4_rtp(double4, size_t, __local half *);
+void __ovld vstore_half8_rtp(double8, size_t, __local half *);
+void __ovld vstore_half16_rtp(double16, size_t, __local half *);
+void __ovld vstore_half2_rtn(double2, size_t, __local half *);
+void __ovld vstore_half3_rtn(double3, size_t, __local half *);
+void __ovld vstore_half4_rtn(double4, size_t, __local half *);
+void __ovld vstore_half8_rtn(double8, size_t, __local half *);
+void __ovld vstore_half16_rtn(double16, size_t, __local half *);
+void __ovld vstore_half2(double2, size_t, __private half *);
+void __ovld vstore_half3(double3, size_t, __private half *);
+void __ovld vstore_half4(double4, size_t, __private half *);
+void __ovld vstore_half8(double8, size_t, __private half *);
+void __ovld vstore_half16(double16, size_t, __private half *);
+void __ovld vstore_half2_rte(double2, size_t, __private half *);
+void __ovld vstore_half3_rte(double3, size_t, __private half *);
+void __ovld vstore_half4_rte(double4, size_t, __private half *);
+void __ovld vstore_half8_rte(double8, size_t, __private half *);
+void __ovld vstore_half16_rte(double16, size_t, __private half *);
+void __ovld vstore_half2_rtz(double2, size_t, __private half *);
+void __ovld vstore_half3_rtz(double3, size_t, __private half *);
+void __ovld vstore_half4_rtz(double4, size_t, __private half *);
+void __ovld vstore_half8_rtz(double8, size_t, __private half *);
+void __ovld vstore_half16_rtz(double16, size_t, __private half *);
+void __ovld vstore_half2_rtp(double2, size_t, __private half *);
+void __ovld vstore_half3_rtp(double3, size_t, __private half *);
+void __ovld vstore_half4_rtp(double4, size_t, __private half *);
+void __ovld vstore_half8_rtp(double8, size_t, __private half *);
+void __ovld vstore_half16_rtp(double16, size_t, __private half *);
+void __ovld vstore_half2_rtn(double2, size_t, __private half *);
+void __ovld vstore_half3_rtn(double3, size_t, __private half *);
+void __ovld vstore_half4_rtn(double4, size_t, __private half *);
+void __ovld vstore_half8_rtn(double8, size_t, __private half *);
+void __ovld vstore_half16_rtn(double16, size_t, __private half *);
 #endif //cl_khr_fp64
 #endif //defined(__opencl_c_named_address_space_builtins)
 
@@ -12096,35 +12096,35 @@ void __ovld vstore_half16_rtn(double16 data, size_t offset, __private half *p);
  * The address computed as (p + (offset * 4))
  * must be aligned to sizeof (half) * 4 bytes.
  */
-float2 __ovld __purefn vloada_half2(size_t offset, const __constant half *p);
-float3 __ovld __purefn vloada_half3(size_t offset, const __constant half *p);
-float4 __ovld __purefn vloada_half4(size_t offset, const __constant half *p);
-float8 __ovld __purefn vloada_half8(size_t offset, const __constant half *p);
-float16 __ovld __purefn vloada_half16(size_t offset, const __constant half *p);
+float2 __ovld __purefn vloada_half2(size_t, const __constant half *);
+float3 __ovld __purefn vloada_half3(size_t, const __constant half *);
+float4 __ovld __purefn vloada_half4(size_t, const __constant half *);
+float8 __ovld __purefn vloada_half8(size_t, const __constant half *);
+float16 __ovld __purefn vloada_half16(size_t, const __constant half *);
 #if defined(__opencl_c_generic_address_space)
-float2 __ovld __purefn vloada_half2(size_t offset, const half *p);
-float3 __ovld __purefn vloada_half3(size_t offset, const half *p);
-float4 __ovld __purefn vloada_half4(size_t offset, const half *p);
-float8 __ovld __purefn vloada_half8(size_t offset, const half *p);
-float16 __ovld __purefn vloada_half16(size_t offset, const half *p);
+float2 __ovld __purefn vloada_half2(size_t, const half *);
+float3 __ovld __purefn vloada_half3(size_t, const half *);
+float4 __ovld __purefn vloada_half4(size_t, const half *);
+float8 __ovld __purefn vloada_half8(size_t, const half *);
+float16 __ovld __purefn vloada_half16(size_t, const half *);
 #endif //defined(__opencl_c_generic_address_space)
 
 #if defined(__opencl_c_named_address_space_builtins)
-float2 __ovld __purefn vloada_half2(size_t offset, const __global half *p);
-float3 __ovld __purefn vloada_half3(size_t offset, const __global half *p);
-float4 __ovld __purefn vloada_half4(size_t offset, const __global half *p);
-float8 __ovld __purefn vloada_half8(size_t offset, const __global half *p);
-float16 __ovld __purefn vloada_half16(size_t offset, const __global half *p);
-float2 __ovld __purefn vloada_half2(size_t offset, const __local half *p);
-float3 __ovld __purefn vloada_half3(size_t offset, const __local half *p);
-float4 __ovld __purefn vloada_half4(size_t offset, const __local half *p);
-float8 __ovld __purefn vloada_half8(size_t offset, const __local half *p);
-float16 __ovld __purefn vloada_half16(size_t offset, const __local half *p);
-float2 __ovld __purefn vloada_half2(size_t offset, const __private half *p);
-float3 __ovld __purefn vloada_half3(size_t offset, const __private half *p);
-float4 __ovld __purefn vloada_half4(size_t offset, const __private half *p);
-float8 __ovld __purefn vloada_half8(size_t offset, const __private half *p);
-float16 __ovld __purefn vloada_half16(size_t offset, const __private half *p);
+float2 __ovld __purefn vloada_half2(size_t, const __global half *);
+float3 __ovld __purefn vloada_half3(size_t, const __global half *);
+float4 __ovld __purefn vloada_half4(size_t, const __global half *);
+float8 __ovld __purefn vloada_half8(size_t, const __global half *);
+float16 __ovld __purefn vloada_half16(size_t, const __global half *);
+float2 __ovld __purefn vloada_half2(size_t, const __local half *);
+float3 __ovld __purefn vloada_half3(size_t, const __local half *);
+float4 __ovld __purefn vloada_half4(size_t, const __local half *);
+float8 __ovld __purefn vloada_half8(size_t, const __local half *);
+float16 __ovld __purefn vloada_half16(size_t, const __local half *);
+float2 __ovld __purefn vloada_half2(size_t, const __private half *);
+float3 __ovld __purefn vloada_half3(size_t, const __private half *);
+float4 __ovld __purefn vloada_half4(size_t, const __private half *);
+float8 __ovld __purefn vloada_half8(size_t, const __private half *);
+float16 __ovld __purefn vloada_half16(size_t, const __private half *);
 #endif //defined(__opencl_c_named_address_space_builtins)
 
 /**
@@ -12144,250 +12144,250 @@ float16 __ovld __purefn vloada_half16(size_t offset, const __private half *p);
  * round to nearest even.
  */
 #if defined(__opencl_c_generic_address_space)
-void __ovld vstorea_half2(float2 data, size_t offset, half *p);
-void __ovld vstorea_half3(float3 data, size_t offset, half *p);
-void __ovld vstorea_half4(float4 data, size_t offset, half *p);
-void __ovld vstorea_half8(float8 data, size_t offset, half *p);
-void __ovld vstorea_half16(float16 data, size_t offset, half *p);
-
-void __ovld vstorea_half2_rte(float2 data, size_t offset, half *p);
-void __ovld vstorea_half3_rte(float3 data, size_t offset, half *p);
-void __ovld vstorea_half4_rte(float4 data, size_t offset, half *p);
-void __ovld vstorea_half8_rte(float8 data, size_t offset, half *p);
-void __ovld vstorea_half16_rte(float16 data, size_t offset, half *p);
-
-void __ovld vstorea_half2_rtz(float2 data, size_t offset, half *p);
-void __ovld vstorea_half3_rtz(float3 data, size_t offset, half *p);
-void __ovld vstorea_half4_rtz(float4 data, size_t offset, half *p);
-void __ovld vstorea_half8_rtz(float8 data, size_t offset, half *p);
-void __ovld vstorea_half16_rtz(float16 data, size_t offset, half *p);
-
-void __ovld vstorea_half2_rtp(float2 data, size_t offset, half *p);
-void __ovld vstorea_half3_rtp(float3 data, size_t offset, half *p);
-void __ovld vstorea_half4_rtp(float4 data, size_t offset, half *p);
-void __ovld vstorea_half8_rtp(float8 data, size_t offset, half *p);
-void __ovld vstorea_half16_rtp(float16 data, size_t offset, half *p);
-
-void __ovld vstorea_half2_rtn(float2 data, size_t offset, half *p);
-void __ovld vstorea_half3_rtn(float3 data, size_t offset, half *p);
-void __ovld vstorea_half4_rtn(float4 data, size_t offset, half *p);
-void __ovld vstorea_half8_rtn(float8 data, size_t offset, half *p);
-void __ovld vstorea_half16_rtn(float16 data, size_t offset, half *p);
+void __ovld vstorea_half2(float2, size_t, half *);
+void __ovld vstorea_half3(float3, size_t, half *);
+void __ovld vstorea_half4(float4, size_t, half *);
+void __ovld vstorea_half8(float8, size_t, half *);
+void __ovld vstorea_half16(float16, size_t, half *);
+
+void __ovld vstorea_half2_rte(float2, size_t, half *);
+void __ovld vstorea_half3_rte(float3, size_t, half *);
+void __ovld vstorea_half4_rte(float4, size_t, half *);
+void __ovld vstorea_half8_rte(float8, size_t, half *);
+void __ovld vstorea_half16_rte(float16, size_t, half *);
+
+void __ovld vstorea_half2_rtz(float2, size_t, half *);
+void __ovld vstorea_half3_rtz(float3, size_t, half *);
+void __ovld vstorea_half4_rtz(float4, size_t, half *);
+void __ovld vstorea_half8_rtz(float8, size_t, half *);
+void __ovld vstorea_half16_rtz(float16, size_t, half *);
+
+void __ovld vstorea_half2_rtp(float2, size_t, half *);
+void __ovld vstorea_half3_rtp(float3, size_t, half *);
+void __ovld vstorea_half4_rtp(float4, size_t, half *);
+void __ovld vstorea_half8_rtp(float8, size_t, half *);
+void __ovld vstorea_half16_rtp(float16, size_t, half *);
+
+void __ovld vstorea_half2_rtn(float2, size_t, half *);
+void __ovld vstorea_half3_rtn(float3, size_t, half *);
+void __ovld vstorea_half4_rtn(float4, size_t, half *);
+void __ovld vstorea_half8_rtn(float8, size_t, half *);
+void __ovld vstorea_half16_rtn(float16, size_t, half *);
 
 #ifdef cl_khr_fp64
-void __ovld vstorea_half2(double2 data, size_t offset, half *p);
-void __ovld vstorea_half3(double3 data, size_t offset, half *p);
-void __ovld vstorea_half4(double4 data, size_t offset, half *p);
-void __ovld vstorea_half8(double8 data, size_t offset, half *p);
-void __ovld vstorea_half16(double16 data, size_t offset, half *p);
-
-void __ovld vstorea_half2_rte(double2 data, size_t offset, half *p);
-void __ovld vstorea_half3_rte(double3 data, size_t offset, half *p);
-void __ovld vstorea_half4_rte(double4 data, size_t offset, half *p);
-void __ovld vstorea_half8_rte(double8 data, size_t offset, half *p);
-void __ovld vstorea_half16_rte(double16 data, size_t offset, half *p);
-
-void __ovld vstorea_half2_rtz(double2 data, size_t offset, half *p);
-void __ovld vstorea_half3_rtz(double3 data, size_t offset, half *p);
-void __ovld vstorea_half4_rtz(double4 data, size_t offset, half *p);
-void __ovld vstorea_half8_rtz(double8 data, size_t offset, half *p);
-void __ovld vstorea_half16_rtz(double16 data, size_t offset, half *p);
-
-void __ovld vstorea_half2_rtp(double2 data, size_t offset, half *p);
-void __ovld vstorea_half3_rtp(double3 data, size_t offset, half *p);
-void __ovld vstorea_half4_rtp(double4 data, size_t offset, half *p);
-void __ovld vstorea_half8_rtp(double8 data, size_t offset, half *p);
-void __ovld vstorea_half16_rtp(double16 data, size_t offset, half *p);
-
-void __ovld vstorea_half2_rtn(double2 data, size_t offset, half *p);
-void __ovld vstorea_half3_rtn(double3 data, size_t offset, half *p);
-void __ovld vstorea_half4_rtn(double4 data, size_t offset, half *p);
-void __ovld vstorea_half8_rtn(double8 data, size_t offset, half *p);
-void __ovld vstorea_half16_rtn(double16 data, size_t offset, half *p);
+void __ovld vstorea_half2(double2, size_t, half *);
+void __ovld vstorea_half3(double3, size_t, half *);
+void __ovld vstorea_half4(double4, size_t, half *);
+void __ovld vstorea_half8(double8, size_t, half *);
+void __ovld vstorea_half16(double16, size_t, half *);
+
+void __ovld vstorea_half2_rte(double2, size_t, half *);
+void __ovld vstorea_half3_rte(double3, size_t, half *);
+void __ovld vstorea_half4_rte(double4, size_t, half *);
+void __ovld vstorea_half8_rte(double8, size_t, half *);
+void __ovld vstorea_half16_rte(double16, size_t, half *);
+
+void __ovld vstorea_half2_rtz(double2, size_t, half *);
+void __ovld vstorea_half3_rtz(double3, size_t, half *);
+void __ovld vstorea_half4_rtz(double4, size_t, half *);
+void __ovld vstorea_half8_rtz(double8, size_t, half *);
+void __ovld vstorea_half16_rtz(double16, size_t, half *);
+
+void __ovld vstorea_half2_rtp(double2, size_t, half *);
+void __ovld vstorea_half3_rtp(double3, size_t, half *);
+void __ovld vstorea_half4_rtp(double4, size_t, half *);
+void __ovld vstorea_half8_rtp(double8, size_t, half *);
+void __ovld vstorea_half16_rtp(double16, size_t, half *);
+
+void __ovld vstorea_half2_rtn(double2, size_t, half *);
+void __ovld vstorea_half3_rtn(double3, size_t, half *);
+void __ovld vstorea_half4_rtn(double4, size_t, half *);
+void __ovld vstorea_half8_rtn(double8, size_t, half *);
+void __ovld vstorea_half16_rtn(double16, size_t, half *);
 #endif //cl_khr_fp64
 #endif //defined(__opencl_c_generic_address_space)
 
 #if defined(__opencl_c_named_address_space_builtins)
-void __ovld vstorea_half2(float2 data, size_t offset, __global half *p);
-void __ovld vstorea_half3(float3 data, size_t offset, __global half *p);
-void __ovld vstorea_half4(float4 data, size_t offset, __global half *p);
-void __ovld vstorea_half8(float8 data, size_t offset, __global half *p);
-void __ovld vstorea_half16(float16 data, size_t offset, __global half *p);
-
-void __ovld vstorea_half2_rte(float2 data, size_t offset, __global half *p);
-void __ovld vstorea_half3_rte(float3 data, size_t offset, __global half *p);
-void __ovld vstorea_half4_rte(float4 data, size_t offset, __global half *p);
-void __ovld vstorea_half8_rte(float8 data, size_t offset, __global half *p);
-void __ovld vstorea_half16_rte(float16 data, size_t offset, __global half *p);
-
-void __ovld vstorea_half2_rtz(float2 data, size_t offset, __global half *p);
-void __ovld vstorea_half3_rtz(float3 data, size_t offset, __global half *p);
-void __ovld vstorea_half4_rtz(float4 data, size_t offset, __global half *p);
-void __ovld vstorea_half8_rtz(float8 data, size_t offset, __global half *p);
-void __ovld vstorea_half16_rtz(float16 data, size_t offset, __global half *p);
-
-void __ovld vstorea_half2_rtp(float2 data, size_t offset, __global half *p);
-void __ovld vstorea_half3_rtp(float3 data, size_t offset, __global half *p);
-void __ovld vstorea_half4_rtp(float4 data, size_t offset, __global half *p);
-void __ovld vstorea_half8_rtp(float8 data, size_t offset, __global half *p);
-void __ovld vstorea_half16_rtp(float16 data, size_t offset, __global half *p);
-
-void __ovld vstorea_half2_rtn(float2 data, size_t offset, __global half *p);
-void __ovld vstorea_half3_rtn(float3 data, size_t offset, __global half *p);
-void __ovld vstorea_half4_rtn(float4 data, size_t offset, __global half *p);
-void __ovld vstorea_half8_rtn(float8 data, size_t offset, __global half *p);
-void __ovld vstorea_half16_rtn(float16 data, size_t offset, __global half *p);
-
-void __ovld vstorea_half2(float2 data, size_t offset, __local half *p);
-void __ovld vstorea_half3(float3 data, size_t offset, __local half *p);
-void __ovld vstorea_half4(float4 data, size_t offset, __local half *p);
-void __ovld vstorea_half8(float8 data, size_t offset, __local half *p);
-void __ovld vstorea_half16(float16 data, size_t offset, __local half *p);
-
-void __ovld vstorea_half2_rte(float2 data, size_t offset, __local half *p);
-void __ovld vstorea_half3_rte(float3 data, size_t offset, __local half *p);
-void __ovld vstorea_half4_rte(float4 data, size_t offset, __local half *p);
-void __ovld vstorea_half8_rte(float8 data, size_t offset, __local half *p);
-void __ovld vstorea_half16_rte(float16 data, size_t offset, __local half *p);
-
-void __ovld vstorea_half2_rtz(float2 data, size_t offset, __local half *p);
-void __ovld vstorea_half3_rtz(float3 data, size_t offset, __local half *p);
-void __ovld vstorea_half4_rtz(float4 data, size_t offset, __local half *p);
-void __ovld vstorea_half8_rtz(float8 data, size_t offset, __local half *p);
-void __ovld vstorea_half16_rtz(float16 data, size_t offset, __local half *p);
-
-void __ovld vstorea_half2_rtp(float2 data, size_t offset, __local half *p);
-void __ovld vstorea_half3_rtp(float3 data, size_t offset, __local half *p);
-void __ovld vstorea_half4_rtp(float4 data, size_t offset, __local half *p);
-void __ovld vstorea_half8_rtp(float8 data, size_t offset, __local half *p);
-void __ovld vstorea_half16_rtp(float16 data, size_t offset, __local half *p);
-
-void __ovld vstorea_half2_rtn(float2 data, size_t offset, __local half *p);
-void __ovld vstorea_half3_rtn(float3 data, size_t offset, __local half *p);
-void __ovld vstorea_half4_rtn(float4 data, size_t offset, __local half *p);
-void __ovld vstorea_half8_rtn(float8 data, size_t offset, __local half *p);
-void __ovld vstorea_half16_rtn(float16 data, size_t offset, __local half *p);
-
-void __ovld vstorea_half2(float2 data, size_t offset, __private half *p);
-void __ovld vstorea_half3(float3 data, size_t offset, __private half *p);
-void __ovld vstorea_half4(float4 data, size_t offset, __private half *p);
-void __ovld vstorea_half8(float8 data, size_t offset, __private half *p);
-void __ovld vstorea_half16(float16 data, size_t offset, __private half *p);
-
-void __ovld vstorea_half2_rte(float2 data, size_t offset, __private half *p);
-void __ovld vstorea_half3_rte(float3 data, size_t offset, __private half *p);
-void __ovld vstorea_half4_rte(float4 data, size_t offset, __private half *p);
-void __ovld vstorea_half8_rte(float8 data, size_t offset, __private half *p);
-void __ovld vstorea_half16_rte(float16 data, size_t offset, __private half *p);
-
-void __ovld vstorea_half2_rtz(float2 data, size_t offset, __private half *p);
-void __ovld vstorea_half3_rtz(float3 data, size_t offset, __private half *p);
-void __ovld vstorea_half4_rtz(float4 data, size_t offset, __private half *p);
-void __ovld vstorea_half8_rtz(float8 data, size_t offset, __private half *p);
-void __ovld vstorea_half16_rtz(float16 data, size_t offset, __private half *p);
-
-void __ovld vstorea_half2_rtp(float2 data, size_t offset, __private half *p);
-void __ovld vstorea_half3_rtp(float3 data, size_t offset, __private half *p);
-void __ovld vstorea_half4_rtp(float4 data, size_t offset, __private half *p);
-void __ovld vstorea_half8_rtp(float8 data, size_t offset, __private half *p);
-void __ovld vstorea_half16_rtp(float16 data, size_t offset, __private half *p);
-
-void __ovld vstorea_half2_rtn(float2 data, size_t offset, __private half *p);
-void __ovld vstorea_half3_rtn(float3 data, size_t offset, __private half *p);
-void __ovld vstorea_half4_rtn(float4 data, size_t offset, __private half *p);
-void __ovld vstorea_half8_rtn(float8 data, size_t offset, __private half *p);
-void __ovld vstorea_half16_rtn(float16 data, size_t offset, __private half *p);
+void __ovld vstorea_half2(float2, size_t, __global half *);
+void __ovld vstorea_half3(float3, size_t, __global half *);
+void __ovld vstorea_half4(float4, size_t, __global half *);
+void __ovld vstorea_half8(float8, size_t, __global half *);
+void __ovld vstorea_half16(float16, size_t, __global half *);
+
+void __ovld vstorea_half2_rte(float2, size_t, __global half *);
+void __ovld vstorea_half3_rte(float3, size_t, __global half *);
+void __ovld vstorea_half4_rte(float4, size_t, __global half *);
+void __ovld vstorea_half8_rte(float8, size_t, __global half *);
+void __ovld vstorea_half16_rte(float16, size_t, __global half *);
+
+void __ovld vstorea_half2_rtz(float2, size_t, __global half *);
+void __ovld vstorea_half3_rtz(float3, size_t, __global half *);
+void __ovld vstorea_half4_rtz(float4, size_t, __global half *);
+void __ovld vstorea_half8_rtz(float8, size_t, __global half *);
+void __ovld vstorea_half16_rtz(float16, size_t, __global half *);
+
+void __ovld vstorea_half2_rtp(float2, size_t, __global half *);
+void __ovld vstorea_half3_rtp(float3, size_t, __global half *);
+void __ovld vstorea_half4_rtp(float4, size_t, __global half *);
+void __ovld vstorea_half8_rtp(float8, size_t, __global half *);
+void __ovld vstorea_half16_rtp(float16, size_t, __global half *);
+
+void __ovld vstorea_half2_rtn(float2, size_t, __global half *);
+void __ovld vstorea_half3_rtn(float3, size_t, __global half *);
+void __ovld vstorea_half4_rtn(float4, size_t, __global half *);
+void __ovld vstorea_half8_rtn(float8, size_t, __global half *);
+void __ovld vstorea_half16_rtn(float16, size_t, __global half *);
+
+void __ovld vstorea_half2(float2, size_t, __local half *);
+void __ovld vstorea_half3(float3, size_t, __local half *);
+void __ovld vstorea_half4(float4, size_t, __local half *);
+void __ovld vstorea_half8(float8, size_t, __local half *);
+void __ovld vstorea_half16(float16, size_t, __local half *);
+
+void __ovld vstorea_half2_rte(float2, size_t, __local half *);
+void __ovld vstorea_half3_rte(float3, size_t, __local half *);
+void __ovld vstorea_half4_rte(float4, size_t, __local half *);
+void __ovld vstorea_half8_rte(float8, size_t, __local half *);
+void __ovld vstorea_half16_rte(float16, size_t, __local half *);
+
+void __ovld vstorea_half2_rtz(float2, size_t, __local half *);
+void __ovld vstorea_half3_rtz(float3, size_t, __local half *);
+void __ovld vstorea_half4_rtz(float4, size_t, __local half *);
+void __ovld vstorea_half8_rtz(float8, size_t, __local half *);
+void __ovld vstorea_half16_rtz(float16, size_t, __local half *);
+
+void __ovld vstorea_half2_rtp(float2, size_t, __local half *);
+void __ovld vstorea_half3_rtp(float3, size_t, __local half *);
+void __ovld vstorea_half4_rtp(float4, size_t, __local half *);
+void __ovld vstorea_half8_rtp(float8, size_t, __local half *);
+void __ovld vstorea_half16_rtp(float16, size_t, __local half *);
+
+void __ovld vstorea_half2_rtn(float2, size_t, __local half *);
+void __ovld vstorea_half3_rtn(float3, size_t, __local half *);
+void __ovld vstorea_half4_rtn(float4, size_t, __local half *);
+void __ovld vstorea_half8_rtn(float8, size_t, __local half *);
+void __ovld vstorea_half16_rtn(float16, size_t, __local half *);
+
+void __ovld vstorea_half2(float2, size_t, __private half *);
+void __ovld vstorea_half3(float3, size_t, __private half *);
+void __ovld vstorea_half4(float4, size_t, __private half *);
+void __ovld vstorea_half8(float8, size_t, __private half *);
+void __ovld vstorea_half16(float16, size_t, __private half *);
+
+void __ovld vstorea_half2_rte(float2, size_t, __private half *);
+void __ovld vstorea_half3_rte(float3, size_t, __private half *);
+void __ovld vstorea_half4_rte(float4, size_t, __private half *);
+void __ovld vstorea_half8_rte(float8, size_t, __private half *);
+void __ovld vstorea_half16_rte(float16, size_t, __private half *);
+
+void __ovld vstorea_half2_rtz(float2, size_t, __private half *);
+void __ovld vstorea_half3_rtz(float3, size_t, __private half *);
+void __ovld vstorea_half4_rtz(float4, size_t, __private half *);
+void __ovld vstorea_half8_rtz(float8, size_t, __private half *);
+void __ovld vstorea_half16_rtz(float16, size_t, __private half *);
+
+void __ovld vstorea_half2_rtp(float2, size_t, __private half *);
+void __ovld vstorea_half3_rtp(float3, size_t, __private half *);
+void __ovld vstorea_half4_rtp(float4, size_t, __private half *);
+void __ovld vstorea_half8_rtp(float8, size_t, __private half *);
+void __ovld vstorea_half16_rtp(float16, size_t, __private half *);
+
+void __ovld vstorea_half2_rtn(float2, size_t, __private half *);
+void __ovld vstorea_half3_rtn(float3, size_t, __private half *);
+void __ovld vstorea_half4_rtn(float4, size_t, __private half *);
+void __ovld vstorea_half8_rtn(float8, size_t, __private half *);
+void __ovld vstorea_half16_rtn(float16, size_t, __private half *);
 
 #ifdef cl_khr_fp64
-void __ovld vstorea_half2(double2 data, size_t offset, __global half *p);
-void __ovld vstorea_half3(double3 data, size_t offset, __global half *p);
-void __ovld vstorea_half4(double4 data, size_t offset, __global half *p);
-void __ovld vstorea_half8(double8 data, size_t offset, __global half *p);
-void __ovld vstorea_half16(double16 data, size_t offset, __global half *p);
-
-void __ovld vstorea_half2_rte(double2 data, size_t offset, __global half *p);
-void __ovld vstorea_half3_rte(double3 data, size_t offset, __global half *p);
-void __ovld vstorea_half4_rte(double4 data, size_t offset, __global half *p);
-void __ovld vstorea_half8_rte(double8 data, size_t offset, __global half *p);
-void __ovld vstorea_half16_rte(double16 data, size_t offset, __global half *p);
-
-void __ovld vstorea_half2_rtz(double2 data, size_t offset, __global half *p);
-void __ovld vstorea_half3_rtz(double3 data, size_t offset, __global half *p);
-void __ovld vstorea_half4_rtz(double4 data, size_t offset, __global half *p);
-void __ovld vstorea_half8_rtz(double8 data, size_t offset, __global half *p);
-void __ovld vstorea_half16_rtz(double16 data, size_t offset, __global half *p);
-
-void __ovld vstorea_half2_rtp(double2 data, size_t offset, __global half *p);
-void __ovld vstorea_half3_rtp(double3 data, size_t offset, __global half *p);
-void __ovld vstorea_half4_rtp(double4 data, size_t offset, __global half *p);
-void __ovld vstorea_half8_rtp(double8 data, size_t offset, __global half *p);
-void __ovld vstorea_half16_rtp(double16 data, size_t offset, __global half *p);
-
-void __ovld vstorea_half2_rtn(double2 data, size_t offset, __global half *p);
-void __ovld vstorea_half3_rtn(double3 data, size_t offset, __global half *p);
-void __ovld vstorea_half4_rtn(double4 data, size_t offset, __global half *p);
-void __ovld vstorea_half8_rtn(double8 data, size_t offset, __global half *p);
-void __ovld vstorea_half16_rtn(double16 data, size_t offset, __global half *p);
-
-void __ovld vstorea_half2(double2 data, size_t offset, __local half *p);
-void __ovld vstorea_half3(double3 data, size_t offset, __local half *p);
-void __ovld vstorea_half4(double4 data, size_t offset, __local half *p);
-void __ovld vstorea_half8(double8 data, size_t offset, __local half *p);
-void __ovld vstorea_half16(double16 data, size_t offset, __local half *p);
-
-void __ovld vstorea_half2_rte(double2 data, size_t offset, __local half *p);
-void __ovld vstorea_half3_rte(double3 data, size_t offset, __local half *p);
-void __ovld vstorea_half4_rte(double4 data, size_t offset, __local half *p);
-void __ovld vstorea_half8_rte(double8 data, size_t offset, __local half *p);
-void __ovld vstorea_half16_rte(double16 data, size_t offset, __local half *p);
-
-void __ovld vstorea_half2_rtz(double2 data, size_t offset, __local half *p);
-void __ovld vstorea_half3_rtz(double3 data, size_t offset, __local half *p);
-void __ovld vstorea_half4_rtz(double4 data, size_t offset, __local half *p);
-void __ovld vstorea_half8_rtz(double8 data, size_t offset, __local half *p);
-void __ovld vstorea_half16_rtz(double16 data, size_t offset, __local half *p);
-
-void __ovld vstorea_half2_rtp(double2 data, size_t offset, __local half *p);
-void __ovld vstorea_half3_rtp(double3 data, size_t offset, __local half *p);
-void __ovld vstorea_half4_rtp(double4 data, size_t offset, __local half *p);
-void __ovld vstorea_half8_rtp(double8 data, size_t offset, __local half *p);
-void __ovld vstorea_half16_rtp(double16 data, size_t offset, __local half *p);
-
-void __ovld vstorea_half2_rtn(double2 data, size_t offset, __local half *p);
-void __ovld vstorea_half3_rtn(double3 data, size_t offset, __local half *p);
-void __ovld vstorea_half4_rtn(double4 data, size_t offset, __local half *p);
-void __ovld vstorea_half8_rtn(double8 data, size_t offset, __local half *p);
-void __ovld vstorea_half16_rtn(double16 data, size_t offset, __local half *p);
-
-void __ovld vstorea_half2(double2 data, size_t offset, __private half *p);
-void __ovld vstorea_half3(double3 data, size_t offset, __private half *p);
-void __ovld vstorea_half4(double4 data, size_t offset, __private half *p);
-void __ovld vstorea_half8(double8 data, size_t offset, __private half *p);
-void __ovld vstorea_half16(double16 data, size_t offset, __private half *p);
-
-void __ovld vstorea_half2_rte(double2 data, size_t offset, __private half *p);
-void __ovld vstorea_half3_rte(double3 data, size_t offset, __private half *p);
-void __ovld vstorea_half4_rte(double4 data, size_t offset, __private half *p);
-void __ovld vstorea_half8_rte(double8 data, size_t offset, __private half *p);
-void __ovld vstorea_half16_rte(double16 data, size_t offset, __private half *p);
-
-void __ovld vstorea_half2_rtz(double2 data, size_t offset, __private half *p);
-void __ovld vstorea_half3_rtz(double3 data, size_t offset, __private half *p);
-void __ovld vstorea_half4_rtz(double4 data, size_t offset, __private half *p);
-void __ovld vstorea_half8_rtz(double8 data, size_t offset, __private half *p);
-void __ovld vstorea_half16_rtz(double16 data, size_t offset, __private half *p);
-
-void __ovld vstorea_half2_rtp(double2 data, size_t offset, __private half *p);
-void __ovld vstorea_half3_rtp(double3 data, size_t offset, __private half *p);
-void __ovld vstorea_half4_rtp(double4 data, size_t offset, __private half *p);
-void __ovld vstorea_half8_rtp(double8 data, size_t offset, __private half *p);
-void __ovld vstorea_half16_rtp(double16 data, size_t offset, __private half *p);
-
-void __ovld vstorea_half2_rtn(double2 data,size_t offset, __private half *p);
-void __ovld vstorea_half3_rtn(double3 data,size_t offset, __private half *p);
-void __ovld vstorea_half4_rtn(double4 data,size_t offset, __private half *p);
-void __ovld vstorea_half8_rtn(double8 data,size_t offset, __private half *p);
-void __ovld vstorea_half16_rtn(double16 data,size_t offset, __private half *p);
+void __ovld vstorea_half2(double2, size_t, __global half *);
+void __ovld vstorea_half3(double3, size_t, __global half *);
+void __ovld vstorea_half4(double4, size_t, __global half *);
+void __ovld vstorea_half8(double8, size_t, __global half *);
+void __ovld vstorea_half16(double16, size_t, __global half *);
+
+void __ovld vstorea_half2_rte(double2, size_t, __global half *);
+void __ovld vstorea_half3_rte(double3, size_t, __global half *);
+void __ovld vstorea_half4_rte(double4, size_t, __global half *);
+void __ovld vstorea_half8_rte(double8, size_t, __global half *);
+void __ovld vstorea_half16_rte(double16, size_t, __global half *);
+
+void __ovld vstorea_half2_rtz(double2, size_t, __global half *);
+void __ovld vstorea_half3_rtz(double3, size_t, __global half *);
+void __ovld vstorea_half4_rtz(double4, size_t, __global half *);
+void __ovld vstorea_half8_rtz(double8, size_t, __global half *);
+void __ovld vstorea_half16_rtz(double16, size_t, __global half *);
+
+void __ovld vstorea_half2_rtp(double2, size_t, __global half *);
+void __ovld vstorea_half3_rtp(double3, size_t, __global half *);
+void __ovld vstorea_half4_rtp(double4, size_t, __global half *);
+void __ovld vstorea_half8_rtp(double8, size_t, __global half *);
+void __ovld vstorea_half16_rtp(double16, size_t, __global half *);
+
+void __ovld vstorea_half2_rtn(double2, size_t, __global half *);
+void __ovld vstorea_half3_rtn(double3, size_t, __global half *);
+void __ovld vstorea_half4_rtn(double4, size_t, __global half *);
+void __ovld vstorea_half8_rtn(double8, size_t, __global half *);
+void __ovld vstorea_half16_rtn(double16, size_t, __global half *);
+
+void __ovld vstorea_half2(double2, size_t, __local half *);
+void __ovld vstorea_half3(double3, size_t, __local half *);
+void __ovld vstorea_half4(double4, size_t, __local half *);
+void __ovld vstorea_half8(double8, size_t, __local half *);
+void __ovld vstorea_half16(double16, size_t, __local half *);
+
+void __ovld vstorea_half2_rte(double2, size_t, __local half *);
+void __ovld vstorea_half3_rte(double3, size_t, __local half *);
+void __ovld vstorea_half4_rte(double4, size_t, __local half *);
+void __ovld vstorea_half8_rte(double8, size_t, __local half *);
+void __ovld vstorea_half16_rte(double16, size_t, __local half *);
+
+void __ovld vstorea_half2_rtz(double2, size_t, __local half *);
+void __ovld vstorea_half3_rtz(double3, size_t, __local half *);
+void __ovld vstorea_half4_rtz(double4, size_t, __local half *);
+void __ovld vstorea_half8_rtz(double8, size_t, __local half *);
+void __ovld vstorea_half16_rtz(double16, size_t, __local half *);
+
+void __ovld vstorea_half2_rtp(double2, size_t, __local half *);
+void __ovld vstorea_half3_rtp(double3, size_t, __local half *);
+void __ovld vstorea_half4_rtp(double4, size_t, __local half *);
+void __ovld vstorea_half8_rtp(double8, size_t, __local half *);
+void __ovld vstorea_half16_rtp(double16, size_t, __local half *);
+
+void __ovld vstorea_half2_rtn(double2, size_t, __local half *);
+void __ovld vstorea_half3_rtn(double3, size_t, __local half *);
+void __ovld vstorea_half4_rtn(double4, size_t, __local half *);
+void __ovld vstorea_half8_rtn(double8, size_t, __local half *);
+void __ovld vstorea_half16_rtn(double16, size_t, __local half *);
+
+void __ovld vstorea_half2(double2, size_t, __private half *);
+void __ovld vstorea_half3(double3, size_t, __private half *);
+void __ovld vstorea_half4(double4, size_t, __private half *);
+void __ovld vstorea_half8(double8, size_t, __private half *);
+void __ovld vstorea_half16(double16, size_t, __private half *);
+
+void __ovld vstorea_half2_rte(double2, size_t, __private half *);
+void __ovld vstorea_half3_rte(double3, size_t, __private half *);
+void __ovld vstorea_half4_rte(double4, size_t, __private half *);
+void __ovld vstorea_half8_rte(double8, size_t, __private half *);
+void __ovld vstorea_half16_rte(double16, size_t, __private half *);
+
+void __ovld vstorea_half2_rtz(double2, size_t, __private half *);
+void __ovld vstorea_half3_rtz(double3, size_t, __private half *);
+void __ovld vstorea_half4_rtz(double4, size_t, __private half *);
+void __ovld vstorea_half8_rtz(double8, size_t, __private half *);
+void __ovld vstorea_half16_rtz(double16, size_t, __private half *);
+
+void __ovld vstorea_half2_rtp(double2, size_t, __private half *);
+void __ovld vstorea_half3_rtp(double3, size_t, __private half *);
+void __ovld vstorea_half4_rtp(double4, size_t, __private half *);
+void __ovld vstorea_half8_rtp(double8, size_t, __private half *);
+void __ovld vstorea_half16_rtp(double16, size_t, __private half *);
+
+void __ovld vstorea_half2_rtn(double2, size_t, __private half *);
+void __ovld vstorea_half3_rtn(double3, size_t, __private half *);
+void __ovld vstorea_half4_rtn(double4, size_t, __private half *);
+void __ovld vstorea_half8_rtn(double8, size_t, __private half *);
+void __ovld vstorea_half16_rtn(double16, size_t, __private half *);
 #endif //cl_khr_fp64
 #endif //defined(__opencl_c_named_address_space_builtins)
 
@@ -13012,29 +13012,29 @@ ulong __ovld atom_xchg(volatile __local ulong *p, ulong val);
  * (old + 1) and store result at location
  * pointed by p. The function returns old.
  */
-int __ovld atomic_inc(volatile __global int *p);
-uint __ovld atomic_inc(volatile __global uint *p);
-int __ovld atomic_inc(volatile __local int *p);
-uint __ovld atomic_inc(volatile __local uint *p);
+int __ovld atomic_inc(volatile __global int *);
+uint __ovld atomic_inc(volatile __global uint *);
+int __ovld atomic_inc(volatile __local int *);
+uint __ovld atomic_inc(volatile __local uint *);
 #ifdef __OPENCL_CPP_VERSION__
-int __ovld atomic_inc(volatile int *p);
-uint __ovld atomic_inc(volatile uint *p);
+int __ovld atomic_inc(volatile int *);
+uint __ovld atomic_inc(volatile uint *);
 #endif
 
 #if defined(cl_khr_global_int32_base_atomics)
-int __ovld atom_inc(volatile __global int *p);
-uint __ovld atom_inc(volatile __global uint *p);
+int __ovld atom_inc(volatile __global int *);
+uint __ovld atom_inc(volatile __global uint *);
 #endif
 #if defined(cl_khr_local_int32_base_atomics)
-int __ovld atom_inc(volatile __local int *p);
-uint __ovld atom_inc(volatile __local uint *p);
+int __ovld atom_inc(volatile __local int *);
+uint __ovld atom_inc(volatile __local uint *);
 #endif
 
 #if defined(cl_khr_int64_base_atomics)
-long __ovld atom_inc(volatile __global long *p);
-ulong __ovld atom_inc(volatile __global ulong *p);
-long __ovld atom_inc(volatile __local long *p);
-ulong __ovld atom_inc(volatile __local ulong *p);
+long __ovld atom_inc(volatile __global long *);
+ulong __ovld atom_inc(volatile __global ulong *);
+long __ovld atom_inc(volatile __local long *);
+ulong __ovld atom_inc(volatile __local ulong *);
 #endif
 
 /**
@@ -13043,29 +13043,29 @@ ulong __ovld atom_inc(volatile __local ulong *p);
  * (old - 1) and store result at location
  * pointed by p. The function returns old.
  */
-int __ovld atomic_dec(volatile __global int *p);
-uint __ovld atomic_dec(volatile __global uint *p);
-int __ovld atomic_dec(volatile __local int *p);
-uint __ovld atomic_dec(volatile __local uint *p);
+int __ovld atomic_dec(volatile __global int *);
+uint __ovld atomic_dec(volatile __global uint *);
+int __ovld atomic_dec(volatile __local int *);
+uint __ovld atomic_dec(volatile __local uint *);
 #ifdef __OPENCL_CPP_VERSION__
-int __ovld atomic_dec(volatile int *p);
-uint __ovld atomic_dec(volatile uint *p);
+int __ovld atomic_dec(volatile int *);
+uint __ovld atomic_dec(volatile uint *);
 #endif
 
 #if defined(cl_khr_global_int32_base_atomics)
-int __ovld atom_dec(volatile __global int *p);
-uint __ovld atom_dec(volatile __global uint *p);
+int __ovld atom_dec(volatile __global int *);
+uint __ovld atom_dec(volatile __global uint *);
 #endif
 #if defined(cl_khr_local_int32_base_atomics)
-int __ovld atom_dec(volatile __local int *p);
-uint __ovld atom_dec(volatile __local uint *p);
+int __ovld atom_dec(volatile __local int *);
+uint __ovld atom_dec(volatile __local uint *);
 #endif
 
 #if defined(cl_khr_int64_base_atomics)
-long __ovld atom_dec(volatile __global long *p);
-ulong __ovld atom_dec(volatile __global ulong *p);
-long __ovld atom_dec(volatile __local long *p);
-ulong __ovld atom_dec(volatile __local ulong *p);
+long __ovld atom_dec(volatile __global long *);
+ulong __ovld atom_dec(volatile __global ulong *);
+long __ovld atom_dec(volatile __local long *);
+ulong __ovld atom_dec(volatile __local ulong *);
 #endif
 
 /**


        


More information about the cfe-commits mailing list