I haven't forgotten about these two...  I'm just trying to figure out some ambiguity in both the 1.2/2.0/2.2 spec related to whether a scalar version (vec-size 1 with no numeric suffix) is needed. The CTS tests for vloada_half being supported, while the spec's language changes a bit between versions, and never gets to what I'd call a consistent state.<div><br></div><div>--Aaron<br><br><div class="gmail_quote"><div dir="ltr">On Sun, Sep 24, 2017, 4:03 PM Jan Vesely <<a href="mailto:jan.vesely@rutgers.edu">jan.vesely@rutgers.edu</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Passes newly posted piglits on Turks.<br>
<br>
Signed-off-by: Jan Vesely <<a href="mailto:jan.vesely@rutgers.edu" target="_blank">jan.vesely@rutgers.edu</a>><br>
---<br>
 generic/include/clc/shared/vload.h | 30 +++++++++++++++---------------<br>
 generic/lib/shared/<a href="http://vload.cl" rel="noreferrer" target="_blank">vload.cl</a>        | 10 ++++++++--<br>
 generic/lib/shared/vload_half.inc  | 26 ++++++++++++++++++--------<br>
 3 files changed, 41 insertions(+), 25 deletions(-)<br>
<br>
diff --git a/generic/include/clc/shared/vload.h b/generic/include/clc/shared/vload.h<br>
index 8c262dd..f6ae917 100644<br>
--- a/generic/include/clc/shared/vload.h<br>
+++ b/generic/include/clc/shared/vload.h<br>
@@ -12,22 +12,24 @@<br>
   _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __private) \<br>
   _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __local) \<br>
   _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __constant) \<br>
-  _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global) \<br>
+  _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global)<br>
<br>
 #define _CLC_VECTOR_VLOAD_PRIM1(PRIM_TYPE) \<br>
-  _CLC_VECTOR_VLOAD_PRIM3(, PRIM_TYPE, PRIM_TYPE) \<br>
+  _CLC_VECTOR_VLOAD_PRIM3(, PRIM_TYPE, PRIM_TYPE)<br>
<br>
-#define _CLC_VECTOR_VLOAD_PRIM() \<br>
-    _CLC_VECTOR_VLOAD_PRIM1(char) \<br>
-    _CLC_VECTOR_VLOAD_PRIM1(uchar) \<br>
-    _CLC_VECTOR_VLOAD_PRIM1(short) \<br>
-    _CLC_VECTOR_VLOAD_PRIM1(ushort) \<br>
-    _CLC_VECTOR_VLOAD_PRIM1(int) \<br>
-    _CLC_VECTOR_VLOAD_PRIM1(uint) \<br>
-    _CLC_VECTOR_VLOAD_PRIM1(long) \<br>
-    _CLC_VECTOR_VLOAD_PRIM1(ulong) \<br>
-    _CLC_VECTOR_VLOAD_PRIM1(float) \<br>
-    _CLC_VECTOR_VLOAD_PRIM3(_half, half, float)<br>
+// Declare vector load prototypes<br>
+_CLC_VECTOR_VLOAD_PRIM1(char)<br>
+_CLC_VECTOR_VLOAD_PRIM1(uchar)<br>
+_CLC_VECTOR_VLOAD_PRIM1(short)<br>
+_CLC_VECTOR_VLOAD_PRIM1(ushort)<br>
+_CLC_VECTOR_VLOAD_PRIM1(int)<br>
+_CLC_VECTOR_VLOAD_PRIM1(uint)<br>
+_CLC_VECTOR_VLOAD_PRIM1(long)<br>
+_CLC_VECTOR_VLOAD_PRIM1(ulong)<br>
+_CLC_VECTOR_VLOAD_PRIM1(float)<br>
+_CLC_VECTOR_VLOAD_PRIM3(_half, half, float)<br>
+// Use suffix to declare aligned vloada_halfN<br>
+_CLC_VECTOR_VLOAD_PRIM3(a_half, half, float)<br>
<br>
 #ifdef cl_khr_fp64<br>
 #pragma OPENCL EXTENSION cl_khr_fp64: enable<br>
@@ -38,7 +40,6 @@<br>
   _CLC_VECTOR_VLOAD_PRIM1(half)<br>
 #endif<br>
<br>
-_CLC_VECTOR_VLOAD_PRIM()<br>
 // Plain vload_half also needs to be declared<br>
 _CLC_VLOAD_DECL(_half, half, float, , __constant)<br>
 _CLC_VLOAD_DECL(_half, half, float, , __global)<br>
@@ -49,4 +50,3 @@ _CLC_VLOAD_DECL(_half, half, float, , __private)<br>
 #undef _CLC_VECTOR_VLOAD_DECL<br>
 #undef _CLC_VECTOR_VLOAD_PRIM3<br>
 #undef _CLC_VECTOR_VLOAD_PRIM1<br>
-#undef _CLC_VECTOR_VLOAD_PRIM<br>
diff --git a/generic/lib/shared/<a href="http://vload.cl" rel="noreferrer" target="_blank">vload.cl</a> b/generic/lib/shared/<a href="http://vload.cl" rel="noreferrer" target="_blank">vload.cl</a><br>
index 0892270..9c37fcf 100644<br>
--- a/generic/lib/shared/<a href="http://vload.cl" rel="noreferrer" target="_blank">vload.cl</a><br>
+++ b/generic/lib/shared/<a href="http://vload.cl" rel="noreferrer" target="_blank">vload.cl</a><br>
@@ -85,15 +85,21 @@ float __clc_vload_half_float_helper__private(const __private half *);<br>
        VEC_LOAD8(val.lo, AS) \<br>
        VEC_LOAD8(val.hi, AS)<br>
<br>
-#define __FUNC(SUFFIX, VEC_SIZE, TYPE, AS) \<br>
+#define __FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS) \<br>
   _CLC_OVERLOAD _CLC_DEF TYPE vload_half##SUFFIX(size_t offset, const AS half *mem) { \<br>
     offset *= VEC_SIZE; \<br>
     TYPE __tmp; \<br>
     VEC_LOAD##VEC_SIZE(__tmp, AS) \<br>
     return __tmp; \<br>
+  } \<br>
+  _CLC_OVERLOAD _CLC_DEF TYPE vloada_half##SUFFIX(size_t offset, const AS half *mem) { \<br>
+    offset *= OFFSET_SIZE; \<br>
+    TYPE __tmp; \<br>
+    VEC_LOAD##VEC_SIZE(__tmp, AS) \<br>
+    return __tmp; \<br>
   }<br>
<br>
-#define FUNC(SUFFIX, VEC_SIZE, TYPE, AS) __FUNC(SUFFIX, VEC_SIZE, TYPE, AS)<br>
+#define FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS) __FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS)<br>
<br>
 #define __CLC_BODY "vload_half.inc"<br>
 #include <clc/math/gentype.inc><br>
diff --git a/generic/lib/shared/vload_half.inc b/generic/lib/shared/vload_half.inc<br>
index 00dae8a..11b2bf7 100644<br>
--- a/generic/lib/shared/vload_half.inc<br>
+++ b/generic/lib/shared/vload_half.inc<br>
@@ -1,13 +1,23 @@<br>
 #if __CLC_FPSIZE == 32<br>
+<br>
 #ifdef __CLC_VECSIZE<br>
-  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __private);<br>
-  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __local);<br>
-  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __global);<br>
-  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __constant);<br>
+<br>
+#if __CLC_VECSIZE == 3<br>
+#  define __CLC_OFFSET 4<br>
 #else<br>
-  FUNC(, 1, __CLC_GENTYPE, __private);<br>
-  FUNC(, 1, __CLC_GENTYPE, __local);<br>
-  FUNC(, 1, __CLC_GENTYPE, __global);<br>
-  FUNC(, 1, __CLC_GENTYPE, __constant);<br>
+#  define __CLC_OFFSET __CLC_VECSIZE<br>
+#endif<br>
+<br>
+  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __private);<br>
+  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __local);<br>
+  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __global);<br>
+  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __constant);<br>
+<br>
+#undef __CLC_OFFSET<br>
+#else<br>
+  FUNC(, 1, 1, __CLC_GENTYPE, __private);<br>
+  FUNC(, 1, 1, __CLC_GENTYPE, __local);<br>
+  FUNC(, 1, 1, __CLC_GENTYPE, __global);<br>
+  FUNC(, 1, 1, __CLC_GENTYPE, __constant);<br>
 #endif<br>
 #endif<br>
--<br>
2.13.5<br>
<br>
</blockquote></div></div>