[libclc] r312839 - Implement vload_half{,n} and vload(half)

Jan Vesely via cfe-commits cfe-commits at lists.llvm.org
Fri Sep 8 16:59:00 PDT 2017


Author: jvesely
Date: Fri Sep  8 16:59:00 2017
New Revision: 312839

URL: http://llvm.org/viewvc/llvm-project?rev=312839&view=rev
Log:
Implement vload_half{,n} and vload(half)

v2: add vload(half) as well
    make helpers amdgpu specific (NVPTX uses different private AS numbering)
    use clang builtin on clang >= 6

Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
Reviewed-by: Tom Stellard <tstellar at redhat.com>

Added:
    libclc/trunk/amdgpu/lib/shared/vload_half_helpers.ll
    libclc/trunk/generic/lib/shared/vload_half.inc
Modified:
    libclc/trunk/amdgpu/lib/SOURCES_4.0
    libclc/trunk/amdgpu/lib/SOURCES_5.0
    libclc/trunk/generic/include/clc/shared/vload.h
    libclc/trunk/generic/lib/shared/vload.cl

Modified: libclc/trunk/amdgpu/lib/SOURCES_4.0
URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/amdgpu/lib/SOURCES_4.0?rev=312839&r1=312838&r2=312839&view=diff
==============================================================================
--- libclc/trunk/amdgpu/lib/SOURCES_4.0 (original)
+++ libclc/trunk/amdgpu/lib/SOURCES_4.0 Fri Sep  8 16:59:00 2017
@@ -1 +1,2 @@
+shared/vload_half_helpers.ll
 shared/vstore_half_helpers.ll

Modified: libclc/trunk/amdgpu/lib/SOURCES_5.0
URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/amdgpu/lib/SOURCES_5.0?rev=312839&r1=312838&r2=312839&view=diff
==============================================================================
--- libclc/trunk/amdgpu/lib/SOURCES_5.0 (original)
+++ libclc/trunk/amdgpu/lib/SOURCES_5.0 Fri Sep  8 16:59:00 2017
@@ -1 +1,2 @@
+shared/vload_half_helpers.ll
 shared/vstore_half_helpers.ll

Added: libclc/trunk/amdgpu/lib/shared/vload_half_helpers.ll
URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/amdgpu/lib/shared/vload_half_helpers.ll?rev=312839&view=auto
==============================================================================
--- libclc/trunk/amdgpu/lib/shared/vload_half_helpers.ll (added)
+++ libclc/trunk/amdgpu/lib/shared/vload_half_helpers.ll Fri Sep  8 16:59:00 2017
@@ -0,0 +1,23 @@
+define float @__clc_vload_half_float_helper__private(half addrspace(0)* nocapture %ptr) nounwind alwaysinline {
+  %data = load half, half addrspace(0)* %ptr
+  %res = fpext half %data to float
+  ret float %res
+}
+
+define float @__clc_vload_half_float_helper__global(half addrspace(1)* nocapture %ptr) nounwind alwaysinline {
+  %data = load half, half addrspace(1)* %ptr
+  %res = fpext half %data to float
+  ret float %res
+}
+
+define float @__clc_vload_half_float_helper__local(half addrspace(3)* nocapture %ptr) nounwind alwaysinline {
+  %data = load half, half addrspace(3)* %ptr
+  %res = fpext half %data to float
+  ret float %res
+}
+
+define float @__clc_vload_half_float_helper__constant(half addrspace(2)* nocapture %ptr) nounwind alwaysinline {
+  %data = load half, half addrspace(2)* %ptr
+  %res = fpext half %data to float
+  ret float %res
+}

Modified: libclc/trunk/generic/include/clc/shared/vload.h
URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/generic/include/clc/shared/vload.h?rev=312839&r1=312838&r2=312839&view=diff
==============================================================================
--- libclc/trunk/generic/include/clc/shared/vload.h (original)
+++ libclc/trunk/generic/include/clc/shared/vload.h Fri Sep  8 16:59:00 2017
@@ -1,18 +1,21 @@
-#define _CLC_VLOAD_DECL(PRIM_TYPE, VEC_TYPE, WIDTH, ADDR_SPACE) \
-  _CLC_OVERLOAD _CLC_DECL VEC_TYPE vload##WIDTH(size_t offset, const ADDR_SPACE PRIM_TYPE *x);
+#define _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, VEC_TYPE, WIDTH, ADDR_SPACE) \
+  _CLC_OVERLOAD _CLC_DECL VEC_TYPE vload##SUFFIX##WIDTH(size_t offset, const ADDR_SPACE MEM_TYPE *x);
 
-#define _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, ADDR_SPACE) \
-  _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##2, 2, ADDR_SPACE) \
-  _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##3, 3, ADDR_SPACE) \
-  _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##4, 4, ADDR_SPACE) \
-  _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE) \
-  _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##16, 16, ADDR_SPACE)
+#define _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, ADDR_SPACE) \
+  _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##2, 2, ADDR_SPACE) \
+  _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##3, 3, ADDR_SPACE) \
+  _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##4, 4, ADDR_SPACE) \
+  _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE) \
+  _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##16, 16, ADDR_SPACE)
+
+#define _CLC_VECTOR_VLOAD_PRIM3(SUFFIX, MEM_TYPE, PRIM_TYPE) \
+  _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __private) \
+  _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __local) \
+  _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __constant) \
+  _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global) \
 
 #define _CLC_VECTOR_VLOAD_PRIM1(PRIM_TYPE) \
-  _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __private) \
-  _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __local) \
-  _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __constant) \
-  _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __global) \
+  _CLC_VECTOR_VLOAD_PRIM3(, PRIM_TYPE, PRIM_TYPE) \
 
 #define _CLC_VECTOR_VLOAD_PRIM() \
     _CLC_VECTOR_VLOAD_PRIM1(char) \
@@ -24,14 +27,26 @@
     _CLC_VECTOR_VLOAD_PRIM1(long) \
     _CLC_VECTOR_VLOAD_PRIM1(ulong) \
     _CLC_VECTOR_VLOAD_PRIM1(float) \
-        
+    _CLC_VECTOR_VLOAD_PRIM3(_half, half, float)
+
 #ifdef cl_khr_fp64
-#define _CLC_VECTOR_VLOAD() \
-  _CLC_VECTOR_VLOAD_PRIM1(double) \
-  _CLC_VECTOR_VLOAD_PRIM()
-#else
-#define _CLC_VECTOR_VLOAD() \
-  _CLC_VECTOR_VLOAD_PRIM()
+#pragma OPENCL EXTENSION cl_khr_fp64: enable
+  _CLC_VECTOR_VLOAD_PRIM1(double)
 #endif
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16: enable
+  _CLC_VECTOR_VLOAD_PRIM1(half)
+#endif
+
+_CLC_VECTOR_VLOAD_PRIM()
+// Plain vload_half also needs to be declared
+_CLC_VLOAD_DECL(_half, half, float, , __constant)
+_CLC_VLOAD_DECL(_half, half, float, , __global)
+_CLC_VLOAD_DECL(_half, half, float, , __local)
+_CLC_VLOAD_DECL(_half, half, float, , __private)
 
-_CLC_VECTOR_VLOAD()
+#undef _CLC_VLOAD_DECL
+#undef _CLC_VECTOR_VLOAD_DECL
+#undef _CLC_VECTOR_VLOAD_PRIM3
+#undef _CLC_VECTOR_VLOAD_PRIM1
+#undef _CLC_VECTOR_VLOAD_PRIM

Modified: libclc/trunk/generic/lib/shared/vload.cl
URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/generic/lib/shared/vload.cl?rev=312839&r1=312838&r2=312839&view=diff
==============================================================================
--- libclc/trunk/generic/lib/shared/vload.cl (original)
+++ libclc/trunk/generic/lib/shared/vload.cl Fri Sep  8 16:59:00 2017
@@ -50,3 +50,62 @@ VLOAD_TYPES()
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
     VLOAD_ADDR_SPACES(double)
 #endif
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+    VLOAD_ADDR_SPACES(half)
+#endif
+
+/* vload_half are legal even without cl_khr_fp16 */
+/* no vload_half for double */
+#if __clang_major__ < 6
+float __clc_vload_half_float_helper__constant(const __constant half *);
+float __clc_vload_half_float_helper__global(const __global half *);
+float __clc_vload_half_float_helper__local(const __local half *);
+float __clc_vload_half_float_helper__private(const __private half *);
+
+#define VEC_LOAD1(val, AS) val = __clc_vload_half_float_helper##AS (&mem[offset++]);
+#else
+#define VEC_LOAD1(val, AS) val = __builtin_load_halff(&mem[offset++]);
+#endif
+
+#define VEC_LOAD2(val, AS) \
+	VEC_LOAD1(val.lo, AS) \
+	VEC_LOAD1(val.hi, AS)
+#define VEC_LOAD3(val, AS) \
+	VEC_LOAD1(val.s0, AS) \
+	VEC_LOAD1(val.s1, AS) \
+	VEC_LOAD1(val.s2, AS)
+#define VEC_LOAD4(val, AS) \
+	VEC_LOAD2(val.lo, AS) \
+	VEC_LOAD2(val.hi, AS)
+#define VEC_LOAD8(val, AS) \
+	VEC_LOAD4(val.lo, AS) \
+	VEC_LOAD4(val.hi, AS)
+#define VEC_LOAD16(val, AS) \
+	VEC_LOAD8(val.lo, AS) \
+	VEC_LOAD8(val.hi, AS)
+
+#define __FUNC(SUFFIX, VEC_SIZE, TYPE, AS) \
+  _CLC_OVERLOAD _CLC_DEF TYPE vload_half##SUFFIX(size_t offset, const AS half *mem) { \
+    offset *= VEC_SIZE; \
+    TYPE __tmp; \
+    VEC_LOAD##VEC_SIZE(__tmp, AS) \
+    return __tmp; \
+  }
+
+#define FUNC(SUFFIX, VEC_SIZE, TYPE, AS) __FUNC(SUFFIX, VEC_SIZE, TYPE, AS)
+
+#define __CLC_BODY "vload_half.inc"
+#include <clc/math/gentype.inc>
+#undef __CLC_BODY
+#undef FUNC
+#undef __FUNC
+#undef VEC_LOAD16
+#undef VEC_LOAD8
+#undef VEC_LOAD4
+#undef VEC_LOAD3
+#undef VEC_LOAD2
+#undef VEC_LOAD1
+#undef VLOAD_TYPES
+#undef VLOAD_ADDR_SPACES
+#undef VLOAD_VECTORIZE

Added: libclc/trunk/generic/lib/shared/vload_half.inc
URL: http://llvm.org/viewvc/llvm-project/libclc/trunk/generic/lib/shared/vload_half.inc?rev=312839&view=auto
==============================================================================
--- libclc/trunk/generic/lib/shared/vload_half.inc (added)
+++ libclc/trunk/generic/lib/shared/vload_half.inc Fri Sep  8 16:59:00 2017
@@ -0,0 +1,13 @@
+#if __CLC_FPSIZE == 32
+#ifdef __CLC_VECSIZE
+  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __private);
+  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __local);
+  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __global);
+  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __constant);
+#else
+  FUNC(, 1, __CLC_GENTYPE, __private);
+  FUNC(, 1, __CLC_GENTYPE, __local);
+  FUNC(, 1, __CLC_GENTYPE, __global);
+  FUNC(, 1, __CLC_GENTYPE, __constant);
+#endif
+#endif




More information about the cfe-commits mailing list