[libclc] 3d21fa5 - libclc: Make all built-ins overloadable

Tom Stellard via cfe-commits cfe-commits at lists.llvm.org
Mon Aug 17 13:56:21 PDT 2020


Author: Daniel Stone
Date: 2020-08-17T13:55:48-07:00
New Revision: 3d21fa56f5f5afbbf16b35b199480af71e1189a3

URL: https://github.com/llvm/llvm-project/commit/3d21fa56f5f5afbbf16b35b199480af71e1189a3
DIFF: https://github.com/llvm/llvm-project/commit/3d21fa56f5f5afbbf16b35b199480af71e1189a3.diff

LOG: libclc: Make all built-ins overloadable

The SPIR spec states that all OpenCL built-in functions should be
overloadable and mangled, to ensure consistency.

Add the overload attribute to functions which were missing them:
work dimensions, memory barriers and fences, and events.

Reviewed By: tstellar, jenatali

Differential Revision: https://reviews.llvm.org/D82078

Added: 
    

Modified: 
    libclc/amdgcn-amdhsa/lib/workitem/get_global_size.cl
    libclc/amdgcn-amdhsa/lib/workitem/get_local_size.cl
    libclc/amdgcn-amdhsa/lib/workitem/get_num_groups.cl
    libclc/amdgcn/lib/mem_fence/fence.cl
    libclc/amdgcn/lib/synchronization/barrier.cl
    libclc/amdgcn/lib/workitem/get_global_offset.cl
    libclc/amdgcn/lib/workitem/get_global_size.cl
    libclc/amdgcn/lib/workitem/get_group_id.cl
    libclc/amdgcn/lib/workitem/get_local_id.cl
    libclc/amdgcn/lib/workitem/get_local_size.cl
    libclc/amdgcn/lib/workitem/get_num_groups.cl
    libclc/amdgcn/lib/workitem/get_work_dim.cl
    libclc/generic/include/clc/async/wait_group_events.h
    libclc/generic/include/clc/explicit_fence/explicit_memory_fence.h
    libclc/generic/include/clc/synchronization/barrier.h
    libclc/generic/include/clc/workitem/get_global_id.h
    libclc/generic/include/clc/workitem/get_global_offset.h
    libclc/generic/include/clc/workitem/get_global_size.h
    libclc/generic/include/clc/workitem/get_group_id.h
    libclc/generic/include/clc/workitem/get_local_id.h
    libclc/generic/include/clc/workitem/get_local_size.h
    libclc/generic/include/clc/workitem/get_num_groups.h
    libclc/generic/include/clc/workitem/get_work_dim.h
    libclc/generic/lib/async/wait_group_events.cl
    libclc/generic/lib/workitem/get_global_id.cl
    libclc/generic/lib/workitem/get_global_size.cl
    libclc/ptx-nvidiacl/lib/mem_fence/fence.cl
    libclc/ptx-nvidiacl/lib/synchronization/barrier.cl
    libclc/ptx-nvidiacl/lib/workitem/get_global_id.cl
    libclc/ptx-nvidiacl/lib/workitem/get_group_id.cl
    libclc/ptx-nvidiacl/lib/workitem/get_local_id.cl
    libclc/ptx-nvidiacl/lib/workitem/get_local_size.cl
    libclc/ptx-nvidiacl/lib/workitem/get_num_groups.cl
    libclc/r600/lib/synchronization/barrier.cl
    libclc/r600/lib/workitem/get_global_offset.cl
    libclc/r600/lib/workitem/get_global_size.cl
    libclc/r600/lib/workitem/get_group_id.cl
    libclc/r600/lib/workitem/get_local_id.cl
    libclc/r600/lib/workitem/get_local_size.cl
    libclc/r600/lib/workitem/get_num_groups.cl
    libclc/r600/lib/workitem/get_work_dim.cl

Removed: 
    


################################################################################
diff  --git a/libclc/amdgcn-amdhsa/lib/workitem/get_global_size.cl b/libclc/amdgcn-amdhsa/lib/workitem/get_global_size.cl
index 2f95f9916b2c..62bd2ba28352 100644
--- a/libclc/amdgcn-amdhsa/lib/workitem/get_global_size.cl
+++ b/libclc/amdgcn-amdhsa/lib/workitem/get_global_size.cl
@@ -15,10 +15,9 @@
 CONST_AS uchar * __clc_amdgcn_dispatch_ptr(void) __asm("llvm.amdgcn.dispatch.ptr");
 #endif
 
-_CLC_DEF size_t get_global_size(uint dim)
-{
-	CONST_AS uint * ptr = (CONST_AS uint *) __dispatch_ptr();
-	if (dim < 3)
-		return ptr[3 + dim];
-	return 1;
+_CLC_DEF _CLC_OVERLOAD size_t get_global_size(uint dim) {
+  CONST_AS uint *ptr = (CONST_AS uint *)__dispatch_ptr();
+  if (dim < 3)
+    return ptr[3 + dim];
+  return 1;
 }

diff  --git a/libclc/amdgcn-amdhsa/lib/workitem/get_local_size.cl b/libclc/amdgcn-amdhsa/lib/workitem/get_local_size.cl
index 9f208d8aea77..9f09fd5a16ec 100644
--- a/libclc/amdgcn-amdhsa/lib/workitem/get_local_size.cl
+++ b/libclc/amdgcn-amdhsa/lib/workitem/get_local_size.cl
@@ -15,16 +15,15 @@
 CONST_AS char * __clc_amdgcn_dispatch_ptr(void) __asm("llvm.amdgcn.dispatch.ptr");
 #endif
 
-_CLC_DEF size_t get_local_size(uint dim)
-{
-	CONST_AS uint * ptr = (CONST_AS uint *) __dispatch_ptr();
-	switch (dim) {
-	case 0:
-		return ptr[1] & 0xffffu;
-	case 1:
-		return ptr[1] >> 16;
-	case 2:
-		return ptr[2] & 0xffffu;
-	}
-	return 1;
+_CLC_DEF _CLC_OVERLOAD size_t get_local_size(uint dim) {
+  CONST_AS uint *ptr = (CONST_AS uint *)__dispatch_ptr();
+  switch (dim) {
+  case 0:
+    return ptr[1] & 0xffffu;
+  case 1:
+    return ptr[1] >> 16;
+  case 2:
+    return ptr[2] & 0xffffu;
+  }
+  return 1;
 }

diff  --git a/libclc/amdgcn-amdhsa/lib/workitem/get_num_groups.cl b/libclc/amdgcn-amdhsa/lib/workitem/get_num_groups.cl
index 946b526fdb68..35dc22188521 100644
--- a/libclc/amdgcn-amdhsa/lib/workitem/get_num_groups.cl
+++ b/libclc/amdgcn-amdhsa/lib/workitem/get_num_groups.cl
@@ -1,7 +1,7 @@
 
 #include <clc/clc.h>
 
-_CLC_DEF size_t get_num_groups(uint dim) {
+_CLC_DEF _CLC_OVERLOAD size_t get_num_groups(uint dim) {
   size_t global_size = get_global_size(dim);
   size_t local_size = get_local_size(dim);
   size_t num_groups = global_size / local_size;

diff  --git a/libclc/amdgcn/lib/mem_fence/fence.cl b/libclc/amdgcn/lib/mem_fence/fence.cl
index b85baf755b85..c7a10bb0238a 100644
--- a/libclc/amdgcn/lib/mem_fence/fence.cl
+++ b/libclc/amdgcn/lib/mem_fence/fence.cl
@@ -17,24 +17,21 @@ void __clc_amdgcn_s_waitcnt(unsigned flags);
 _CLC_DEF void __clc_amdgcn_s_waitcnt(unsigned)  __asm("llvm.amdgcn.s.waitcnt");
 #endif
 
-_CLC_DEF void mem_fence(cl_mem_fence_flags flags)
-{
-	if (flags & CLK_GLOBAL_MEM_FENCE) {
-		// scalar loads are counted with LGKM but we don't know whether
-		// the compiler turned any loads to scalar
-		__waitcnt(0);
-	} else if (flags & CLK_LOCAL_MEM_FENCE)
-		__waitcnt(0xff); // LGKM is [12:8]
+_CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) {
+  if (flags & CLK_GLOBAL_MEM_FENCE) {
+    // scalar loads are counted with LGKM but we don't know whether
+    // the compiler turned any loads to scalar
+    __waitcnt(0);
+  } else if (flags & CLK_LOCAL_MEM_FENCE)
+    __waitcnt(0xff); // LGKM is [12:8]
 }
 #undef __waitcnt
 
 // We don't have separate mechanism for read and write fences
-_CLC_DEF void read_mem_fence(cl_mem_fence_flags flags)
-{
-	mem_fence(flags);
+_CLC_DEF _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags) {
+  mem_fence(flags);
 }
 
-_CLC_DEF void write_mem_fence(cl_mem_fence_flags flags)
-{
-	mem_fence(flags);
+_CLC_DEF _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags) {
+  mem_fence(flags);
 }

diff  --git a/libclc/amdgcn/lib/synchronization/barrier.cl b/libclc/amdgcn/lib/synchronization/barrier.cl
index e2f3c1369bbe..82bbd4b530f3 100644
--- a/libclc/amdgcn/lib/synchronization/barrier.cl
+++ b/libclc/amdgcn/lib/synchronization/barrier.cl
@@ -1,7 +1,6 @@
 #include <clc/clc.h>
 
-_CLC_DEF void barrier(cl_mem_fence_flags flags)
-{
-	mem_fence(flags);
-	__builtin_amdgcn_s_barrier();
+_CLC_DEF _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags) {
+  mem_fence(flags);
+  __builtin_amdgcn_s_barrier();
 }

diff  --git a/libclc/amdgcn/lib/workitem/get_global_offset.cl b/libclc/amdgcn/lib/workitem/get_global_offset.cl
index 0a87cd23f1f8..73d5694523ac 100644
--- a/libclc/amdgcn/lib/workitem/get_global_offset.cl
+++ b/libclc/amdgcn/lib/workitem/get_global_offset.cl
@@ -8,11 +8,9 @@
 #define CONST_AS __attribute__((address_space(2)))
 #endif
 
-_CLC_DEF size_t get_global_offset(uint dim)
-{
-	CONST_AS uint * ptr =
-		(CONST_AS uint *) __builtin_amdgcn_implicitarg_ptr();
-	if (dim < 3)
-		return ptr[dim + 1];
-	return 0;
+_CLC_DEF _CLC_OVERLOAD size_t get_global_offset(uint dim) {
+  CONST_AS uint *ptr = (CONST_AS uint *)__builtin_amdgcn_implicitarg_ptr();
+  if (dim < 3)
+    return ptr[dim + 1];
+  return 0;
 }

diff  --git a/libclc/amdgcn/lib/workitem/get_global_size.cl b/libclc/amdgcn/lib/workitem/get_global_size.cl
index c1e3894e4c87..2f28ca606665 100644
--- a/libclc/amdgcn/lib/workitem/get_global_size.cl
+++ b/libclc/amdgcn/lib/workitem/get_global_size.cl
@@ -4,12 +4,15 @@ uint __clc_amdgcn_get_global_size_x(void) __asm("llvm.r600.read.global.size.x");
 uint __clc_amdgcn_get_global_size_y(void) __asm("llvm.r600.read.global.size.y");
 uint __clc_amdgcn_get_global_size_z(void) __asm("llvm.r600.read.global.size.z");
 
-_CLC_DEF size_t get_global_size(uint dim)
-{
-	switch (dim) {
-	case 0: return __clc_amdgcn_get_global_size_x();
-	case 1: return __clc_amdgcn_get_global_size_y();
-	case 2: return __clc_amdgcn_get_global_size_z();
-	default: return 1;
-	}
+_CLC_DEF _CLC_OVERLOAD size_t get_global_size(uint dim) {
+  switch (dim) {
+  case 0:
+    return __clc_amdgcn_get_global_size_x();
+  case 1:
+    return __clc_amdgcn_get_global_size_y();
+  case 2:
+    return __clc_amdgcn_get_global_size_z();
+  default:
+    return 1;
+  }
 }

diff  --git a/libclc/amdgcn/lib/workitem/get_group_id.cl b/libclc/amdgcn/lib/workitem/get_group_id.cl
index eb57b3e2584a..211c86eea10d 100644
--- a/libclc/amdgcn/lib/workitem/get_group_id.cl
+++ b/libclc/amdgcn/lib/workitem/get_group_id.cl
@@ -1,11 +1,14 @@
 #include <clc/clc.h>
 
-_CLC_DEF size_t get_group_id(uint dim)
-{
-	switch(dim) {
-	case 0: return __builtin_amdgcn_workgroup_id_x();
-	case 1: return __builtin_amdgcn_workgroup_id_y();
-	case 2: return __builtin_amdgcn_workgroup_id_z();
-	default: return 1;
-	}
+_CLC_DEF _CLC_OVERLOAD size_t get_group_id(uint dim) {
+  switch (dim) {
+  case 0:
+    return __builtin_amdgcn_workgroup_id_x();
+  case 1:
+    return __builtin_amdgcn_workgroup_id_y();
+  case 2:
+    return __builtin_amdgcn_workgroup_id_z();
+  default:
+    return 1;
+  }
 }

diff  --git a/libclc/amdgcn/lib/workitem/get_local_id.cl b/libclc/amdgcn/lib/workitem/get_local_id.cl
index 9f666dea3400..073ecfa40ab4 100644
--- a/libclc/amdgcn/lib/workitem/get_local_id.cl
+++ b/libclc/amdgcn/lib/workitem/get_local_id.cl
@@ -1,11 +1,14 @@
 #include <clc/clc.h>
 
-_CLC_DEF size_t get_local_id(uint dim)
-{
-	switch(dim) {
-	case 0: return __builtin_amdgcn_workitem_id_x();
-	case 1: return __builtin_amdgcn_workitem_id_y();
-	case 2: return __builtin_amdgcn_workitem_id_z();
-	default: return 1;
-	}
+_CLC_DEF _CLC_OVERLOAD size_t get_local_id(uint dim) {
+  switch (dim) {
+  case 0:
+    return __builtin_amdgcn_workitem_id_x();
+  case 1:
+    return __builtin_amdgcn_workitem_id_y();
+  case 2:
+    return __builtin_amdgcn_workitem_id_z();
+  default:
+    return 1;
+  }
 }

diff  --git a/libclc/amdgcn/lib/workitem/get_local_size.cl b/libclc/amdgcn/lib/workitem/get_local_size.cl
index 9b19f6b35412..c398b7eb5a9d 100644
--- a/libclc/amdgcn/lib/workitem/get_local_size.cl
+++ b/libclc/amdgcn/lib/workitem/get_local_size.cl
@@ -4,12 +4,15 @@ uint __clc_amdgcn_get_local_size_x(void) __asm("llvm.r600.read.local.size.x");
 uint __clc_amdgcn_get_local_size_y(void) __asm("llvm.r600.read.local.size.y");
 uint __clc_amdgcn_get_local_size_z(void) __asm("llvm.r600.read.local.size.z");
 
-_CLC_DEF size_t get_local_size(uint dim)
-{
-	switch (dim) {
-	case 0: return __clc_amdgcn_get_local_size_x();
-	case 1: return __clc_amdgcn_get_local_size_y();
-	case 2: return __clc_amdgcn_get_local_size_z();
-	default: return 1;
-	}
+_CLC_DEF _CLC_OVERLOAD size_t get_local_size(uint dim) {
+  switch (dim) {
+  case 0:
+    return __clc_amdgcn_get_local_size_x();
+  case 1:
+    return __clc_amdgcn_get_local_size_y();
+  case 2:
+    return __clc_amdgcn_get_local_size_z();
+  default:
+    return 1;
+  }
 }

diff  --git a/libclc/amdgcn/lib/workitem/get_num_groups.cl b/libclc/amdgcn/lib/workitem/get_num_groups.cl
index f921414acc2c..020741e49cb7 100644
--- a/libclc/amdgcn/lib/workitem/get_num_groups.cl
+++ b/libclc/amdgcn/lib/workitem/get_num_groups.cl
@@ -4,12 +4,15 @@ uint __clc_amdgcn_get_num_groups_x(void) __asm("llvm.r600.read.ngroups.x");
 uint __clc_amdgcn_get_num_groups_y(void) __asm("llvm.r600.read.ngroups.y");
 uint __clc_amdgcn_get_num_groups_z(void) __asm("llvm.r600.read.ngroups.z");
 
-_CLC_DEF size_t get_num_groups(uint dim)
-{
-	switch (dim) {
-	case 0: return __clc_amdgcn_get_num_groups_x();
-	case 1: return __clc_amdgcn_get_num_groups_y();
-	case 2: return __clc_amdgcn_get_num_groups_z();
-	default: return 1;
-	}
+_CLC_DEF _CLC_OVERLOAD size_t get_num_groups(uint dim) {
+  switch (dim) {
+  case 0:
+    return __clc_amdgcn_get_num_groups_x();
+  case 1:
+    return __clc_amdgcn_get_num_groups_y();
+  case 2:
+    return __clc_amdgcn_get_num_groups_z();
+  default:
+    return 1;
+  }
 }

diff  --git a/libclc/amdgcn/lib/workitem/get_work_dim.cl b/libclc/amdgcn/lib/workitem/get_work_dim.cl
index 3add9b64f057..cb8cf83a220c 100644
--- a/libclc/amdgcn/lib/workitem/get_work_dim.cl
+++ b/libclc/amdgcn/lib/workitem/get_work_dim.cl
@@ -8,9 +8,7 @@
 #define CONST_AS __attribute__((address_space(2)))
 #endif
 
-_CLC_DEF uint get_work_dim(void)
-{
-	CONST_AS uint * ptr =
-		(CONST_AS uint *) __builtin_amdgcn_implicitarg_ptr();
-	return ptr[0];
+_CLC_DEF _CLC_OVERLOAD uint get_work_dim(void) {
+  CONST_AS uint *ptr = (CONST_AS uint *)__builtin_amdgcn_implicitarg_ptr();
+  return ptr[0];
 }

diff  --git a/libclc/generic/include/clc/async/wait_group_events.h b/libclc/generic/include/clc/async/wait_group_events.h
index 799efa0a791c..d707f4c68a20 100644
--- a/libclc/generic/include/clc/async/wait_group_events.h
+++ b/libclc/generic/include/clc/async/wait_group_events.h
@@ -1 +1,2 @@
-void wait_group_events(int num_events, event_t *event_list);
+_CLC_DECL _CLC_OVERLOAD void wait_group_events(int num_events,
+                                               event_t *event_list);

diff  --git a/libclc/generic/include/clc/explicit_fence/explicit_memory_fence.h b/libclc/generic/include/clc/explicit_fence/explicit_memory_fence.h
index 8e046b1225de..05c6d7939549 100644
--- a/libclc/generic/include/clc/explicit_fence/explicit_memory_fence.h
+++ b/libclc/generic/include/clc/explicit_fence/explicit_memory_fence.h
@@ -1,3 +1,3 @@
-_CLC_DECL void mem_fence(cl_mem_fence_flags flags);
-_CLC_DECL void read_mem_fence(cl_mem_fence_flags flags);
-_CLC_DECL void write_mem_fence(cl_mem_fence_flags flags);
+_CLC_DECL _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags);
+_CLC_DECL _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags);
+_CLC_DECL _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags);

diff  --git a/libclc/generic/include/clc/synchronization/barrier.h b/libclc/generic/include/clc/synchronization/barrier.h
index 7167a3d3f093..63e3ac58e900 100644
--- a/libclc/generic/include/clc/synchronization/barrier.h
+++ b/libclc/generic/include/clc/synchronization/barrier.h
@@ -1 +1 @@
-_CLC_DECL void barrier(cl_mem_fence_flags flags);
+_CLC_DECL _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags);

diff  --git a/libclc/generic/include/clc/workitem/get_global_id.h b/libclc/generic/include/clc/workitem/get_global_id.h
index 92759f146894..3bbace022951 100644
--- a/libclc/generic/include/clc/workitem/get_global_id.h
+++ b/libclc/generic/include/clc/workitem/get_global_id.h
@@ -1 +1 @@
-_CLC_DECL size_t get_global_id(uint dim);
+_CLC_DECL _CLC_OVERLOAD size_t get_global_id(uint dim);

diff  --git a/libclc/generic/include/clc/workitem/get_global_offset.h b/libclc/generic/include/clc/workitem/get_global_offset.h
index 7f4f6032abe6..ad7b441cf716 100644
--- a/libclc/generic/include/clc/workitem/get_global_offset.h
+++ b/libclc/generic/include/clc/workitem/get_global_offset.h
@@ -1 +1 @@
-_CLC_DECL size_t get_global_offset(uint dim);
+_CLC_DECL _CLC_OVERLOAD size_t get_global_offset(uint dim);

diff  --git a/libclc/generic/include/clc/workitem/get_global_size.h b/libclc/generic/include/clc/workitem/get_global_size.h
index 2f8370585397..1b7ccf75643d 100644
--- a/libclc/generic/include/clc/workitem/get_global_size.h
+++ b/libclc/generic/include/clc/workitem/get_global_size.h
@@ -1 +1 @@
-_CLC_DECL size_t get_global_size(uint dim);
+_CLC_DECL _CLC_OVERLOAD size_t get_global_size(uint dim);

diff  --git a/libclc/generic/include/clc/workitem/get_group_id.h b/libclc/generic/include/clc/workitem/get_group_id.h
index 346c82c6c316..b71fbc1990bc 100644
--- a/libclc/generic/include/clc/workitem/get_group_id.h
+++ b/libclc/generic/include/clc/workitem/get_group_id.h
@@ -1 +1 @@
-_CLC_DECL size_t get_group_id(uint dim);
+_CLC_DECL _CLC_OVERLOAD size_t get_group_id(uint dim);

diff  --git a/libclc/generic/include/clc/workitem/get_local_id.h b/libclc/generic/include/clc/workitem/get_local_id.h
index 169aeed86786..60aa1ec68427 100644
--- a/libclc/generic/include/clc/workitem/get_local_id.h
+++ b/libclc/generic/include/clc/workitem/get_local_id.h
@@ -1 +1 @@
-_CLC_DECL size_t get_local_id(uint dim);
+_CLC_DECL _CLC_OVERLOAD size_t get_local_id(uint dim);

diff  --git a/libclc/generic/include/clc/workitem/get_local_size.h b/libclc/generic/include/clc/workitem/get_local_size.h
index 040ec58a3d8b..808730fbc38e 100644
--- a/libclc/generic/include/clc/workitem/get_local_size.h
+++ b/libclc/generic/include/clc/workitem/get_local_size.h
@@ -1 +1 @@
-_CLC_DECL size_t get_local_size(uint dim);
+_CLC_DECL _CLC_OVERLOAD size_t get_local_size(uint dim);

diff  --git a/libclc/generic/include/clc/workitem/get_num_groups.h b/libclc/generic/include/clc/workitem/get_num_groups.h
index e555c7efc2d2..8657eb7f6eaf 100644
--- a/libclc/generic/include/clc/workitem/get_num_groups.h
+++ b/libclc/generic/include/clc/workitem/get_num_groups.h
@@ -1 +1 @@
-_CLC_DECL size_t get_num_groups(uint dim);
+_CLC_DECL _CLC_OVERLOAD size_t get_num_groups(uint dim);

diff  --git a/libclc/generic/include/clc/workitem/get_work_dim.h b/libclc/generic/include/clc/workitem/get_work_dim.h
index ae08ba9a5150..8781b2a974d4 100644
--- a/libclc/generic/include/clc/workitem/get_work_dim.h
+++ b/libclc/generic/include/clc/workitem/get_work_dim.h
@@ -1 +1 @@
-_CLC_DECL uint get_work_dim(void);
+_CLC_DECL _CLC_OVERLOAD uint get_work_dim(void);

diff  --git a/libclc/generic/lib/async/wait_group_events.cl b/libclc/generic/lib/async/wait_group_events.cl
index 05c9d58db45e..5f4eec325a04 100644
--- a/libclc/generic/lib/async/wait_group_events.cl
+++ b/libclc/generic/lib/async/wait_group_events.cl
@@ -1,5 +1,6 @@
 #include <clc/clc.h>
 
-_CLC_DEF void wait_group_events(int num_events, event_t *event_list) {
+_CLC_DEF _CLC_OVERLOAD void wait_group_events(int num_events,
+                                              event_t *event_list) {
   barrier(CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE);
 }

diff  --git a/libclc/generic/lib/workitem/get_global_id.cl b/libclc/generic/lib/workitem/get_global_id.cl
index b6c2ea1d2cca..ccd84d9d8330 100644
--- a/libclc/generic/lib/workitem/get_global_id.cl
+++ b/libclc/generic/lib/workitem/get_global_id.cl
@@ -1,5 +1,5 @@
 #include <clc/clc.h>
 
-_CLC_DEF size_t get_global_id(uint dim) {
+_CLC_DEF _CLC_OVERLOAD size_t get_global_id(uint dim) {
   return get_group_id(dim) * get_local_size(dim) + get_local_id(dim) + get_global_offset(dim);
 }

diff  --git a/libclc/generic/lib/workitem/get_global_size.cl b/libclc/generic/lib/workitem/get_global_size.cl
index 5ae649e10d51..9bc260782530 100644
--- a/libclc/generic/lib/workitem/get_global_size.cl
+++ b/libclc/generic/lib/workitem/get_global_size.cl
@@ -1,5 +1,5 @@
 #include <clc/clc.h>
 
-_CLC_DEF size_t get_global_size(uint dim) {
+_CLC_DEF _CLC_OVERLOAD size_t get_global_size(uint dim) {
   return get_num_groups(dim)*get_local_size(dim);
 }

diff  --git a/libclc/ptx-nvidiacl/lib/mem_fence/fence.cl b/libclc/ptx-nvidiacl/lib/mem_fence/fence.cl
index 16b039176ece..de078b5e8c19 100644
--- a/libclc/ptx-nvidiacl/lib/mem_fence/fence.cl
+++ b/libclc/ptx-nvidiacl/lib/mem_fence/fence.cl
@@ -1,15 +1,15 @@
 #include <clc/clc.h>
 
-_CLC_DEF void mem_fence(cl_mem_fence_flags flags) {
-   if (flags & (CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE))
-     __nvvm_membar_cta();
+_CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) {
+  if (flags & (CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE))
+    __nvvm_membar_cta();
 }
 
 // We do not have separate mechanism for read and write fences.
-_CLC_DEF void read_mem_fence(cl_mem_fence_flags flags) {
+_CLC_DEF _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags) {
   mem_fence(flags);
 }
 
-_CLC_DEF void write_mem_fence(cl_mem_fence_flags flags) {
+_CLC_DEF _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags) {
   mem_fence(flags);
 }

diff  --git a/libclc/ptx-nvidiacl/lib/synchronization/barrier.cl b/libclc/ptx-nvidiacl/lib/synchronization/barrier.cl
index 930e36a2853e..b3d99d797edf 100644
--- a/libclc/ptx-nvidiacl/lib/synchronization/barrier.cl
+++ b/libclc/ptx-nvidiacl/lib/synchronization/barrier.cl
@@ -1,6 +1,5 @@
 #include <clc/clc.h>
 
-_CLC_DEF void barrier(cl_mem_fence_flags flags) {
+_CLC_DEF _CLC_OVERLOAD void barrier(cl_mem_fence_flags flags) {
   __syncthreads();
 }
-

diff  --git a/libclc/ptx-nvidiacl/lib/workitem/get_global_id.cl b/libclc/ptx-nvidiacl/lib/workitem/get_global_id.cl
index 19bc195312cf..a7f5f59a3702 100644
--- a/libclc/ptx-nvidiacl/lib/workitem/get_global_id.cl
+++ b/libclc/ptx-nvidiacl/lib/workitem/get_global_id.cl
@@ -1,5 +1,5 @@
 #include <clc/clc.h>
 
-_CLC_DEF size_t get_global_id(uint dim) {
+_CLC_DEF _CLC_OVERLOAD size_t get_global_id(uint dim) {
   return get_group_id(dim) * get_local_size(dim) + get_local_id(dim);
 }

diff  --git a/libclc/ptx-nvidiacl/lib/workitem/get_group_id.cl b/libclc/ptx-nvidiacl/lib/workitem/get_group_id.cl
index dbc47847f9e3..bbbf1068e69e 100644
--- a/libclc/ptx-nvidiacl/lib/workitem/get_group_id.cl
+++ b/libclc/ptx-nvidiacl/lib/workitem/get_group_id.cl
@@ -1,6 +1,6 @@
 #include <clc/clc.h>
 
-_CLC_DEF size_t get_group_id(uint dim) {
+_CLC_DEF _CLC_OVERLOAD size_t get_group_id(uint dim) {
   switch (dim) {
   case 0:  return __nvvm_read_ptx_sreg_ctaid_x();
   case 1:  return __nvvm_read_ptx_sreg_ctaid_y();

diff  --git a/libclc/ptx-nvidiacl/lib/workitem/get_local_id.cl b/libclc/ptx-nvidiacl/lib/workitem/get_local_id.cl
index f31581a19a3c..a6770f2b9155 100644
--- a/libclc/ptx-nvidiacl/lib/workitem/get_local_id.cl
+++ b/libclc/ptx-nvidiacl/lib/workitem/get_local_id.cl
@@ -1,6 +1,6 @@
 #include <clc/clc.h>
 
-_CLC_DEF size_t get_local_id(uint dim) {
+_CLC_DEF _CLC_OVERLOAD size_t get_local_id(uint dim) {
   switch (dim) {
   case 0:  return __nvvm_read_ptx_sreg_tid_x();
   case 1:  return __nvvm_read_ptx_sreg_tid_y();

diff  --git a/libclc/ptx-nvidiacl/lib/workitem/get_local_size.cl b/libclc/ptx-nvidiacl/lib/workitem/get_local_size.cl
index d00b0d6c9fba..5960d5d79932 100644
--- a/libclc/ptx-nvidiacl/lib/workitem/get_local_size.cl
+++ b/libclc/ptx-nvidiacl/lib/workitem/get_local_size.cl
@@ -1,6 +1,6 @@
 #include <clc/clc.h>
 
-_CLC_DEF size_t get_local_size(uint dim) {
+_CLC_DEF _CLC_OVERLOAD size_t get_local_size(uint dim) {
   switch (dim) {
   case 0:  return __nvvm_read_ptx_sreg_ntid_x();
   case 1:  return __nvvm_read_ptx_sreg_ntid_y();

diff  --git a/libclc/ptx-nvidiacl/lib/workitem/get_num_groups.cl b/libclc/ptx-nvidiacl/lib/workitem/get_num_groups.cl
index d7abf3f29070..f0e52f1fdbc0 100644
--- a/libclc/ptx-nvidiacl/lib/workitem/get_num_groups.cl
+++ b/libclc/ptx-nvidiacl/lib/workitem/get_num_groups.cl
@@ -1,6 +1,6 @@
 #include <clc/clc.h>
 
-_CLC_DEF size_t get_num_groups(uint dim) {
+_CLC_DEF _CLC_OVERLOAD size_t get_num_groups(uint dim) {
   switch (dim) {
   case 0:  return __nvvm_read_ptx_sreg_nctaid_x();
   case 1:  return __nvvm_read_ptx_sreg_nctaid_y();

diff  --git a/libclc/r600/lib/synchronization/barrier.cl b/libclc/r600/lib/synchronization/barrier.cl
index 98200e7eda92..6a28ee3201de 100644
--- a/libclc/r600/lib/synchronization/barrier.cl
+++ b/libclc/r600/lib/synchronization/barrier.cl
@@ -2,8 +2,7 @@
 
 _CLC_DEF void __clc_r600_barrier(void) __asm("llvm.r600.group.barrier");
 
-_CLC_DEF void barrier(uint flags)
-{
+_CLC_DEF _CLC_OVERLOAD void barrier(uint flags) {
   // We should call mem_fence here, but that is not implemented for r600 yet
   __clc_r600_barrier();
 }

diff  --git a/libclc/r600/lib/workitem/get_global_offset.cl b/libclc/r600/lib/workitem/get_global_offset.cl
index b38ae3377570..7c2e403ea6ec 100644
--- a/libclc/r600/lib/workitem/get_global_offset.cl
+++ b/libclc/r600/lib/workitem/get_global_offset.cl
@@ -1,11 +1,10 @@
 #include <clc/clc.h>
 
-_CLC_DEF uint get_global_offset(uint dim)
-{
-	__attribute__((address_space(7))) uint * ptr =
-		(__attribute__((address_space(7))) uint *)
-		__builtin_r600_implicitarg_ptr();
-	if (dim < 3)
-		return ptr[dim + 1];
-	return 0;
+_CLC_DEF _CLC_OVERLOAD uint get_global_offset(uint dim) {
+  __attribute__((address_space(7))) uint *ptr =
+      (__attribute__((address_space(7)))
+       uint *)__builtin_r600_implicitarg_ptr();
+  if (dim < 3)
+    return ptr[dim + 1];
+  return 0;
 }

diff  --git a/libclc/r600/lib/workitem/get_global_size.cl b/libclc/r600/lib/workitem/get_global_size.cl
index d356929c4948..628136150d84 100644
--- a/libclc/r600/lib/workitem/get_global_size.cl
+++ b/libclc/r600/lib/workitem/get_global_size.cl
@@ -4,12 +4,15 @@ uint __clc_r600_get_global_size_x(void) __asm("llvm.r600.read.global.size.x");
 uint __clc_r600_get_global_size_y(void) __asm("llvm.r600.read.global.size.y");
 uint __clc_r600_get_global_size_z(void) __asm("llvm.r600.read.global.size.z");
 
-_CLC_DEF size_t get_global_size(uint dim)
-{
-	switch (dim) {
-	case 0: return __clc_r600_get_global_size_x();
-	case 1: return __clc_r600_get_global_size_y();
-	case 2: return __clc_r600_get_global_size_z();
-	default: return 1;
-	}
+_CLC_DEF _CLC_OVERLOAD size_t get_global_size(uint dim) {
+  switch (dim) {
+  case 0:
+    return __clc_r600_get_global_size_x();
+  case 1:
+    return __clc_r600_get_global_size_y();
+  case 2:
+    return __clc_r600_get_global_size_z();
+  default:
+    return 1;
+  }
 }

diff  --git a/libclc/r600/lib/workitem/get_group_id.cl b/libclc/r600/lib/workitem/get_group_id.cl
index e5efc0a85778..1fb993ace72e 100644
--- a/libclc/r600/lib/workitem/get_group_id.cl
+++ b/libclc/r600/lib/workitem/get_group_id.cl
@@ -1,11 +1,14 @@
 #include <clc/clc.h>
 
-_CLC_DEF uint get_group_id(uint dim)
-{
-	switch(dim) {
-	case 0: return __builtin_r600_read_tgid_x();
-	case 1: return __builtin_r600_read_tgid_y();
-	case 2: return __builtin_r600_read_tgid_z();
-	default: return 1;
-	}
+_CLC_DEF _CLC_OVERLOAD uint get_group_id(uint dim) {
+  switch (dim) {
+  case 0:
+    return __builtin_r600_read_tgid_x();
+  case 1:
+    return __builtin_r600_read_tgid_y();
+  case 2:
+    return __builtin_r600_read_tgid_z();
+  default:
+    return 1;
+  }
 }

diff  --git a/libclc/r600/lib/workitem/get_local_id.cl b/libclc/r600/lib/workitem/get_local_id.cl
index a871a5d77f0c..80fdc344193b 100644
--- a/libclc/r600/lib/workitem/get_local_id.cl
+++ b/libclc/r600/lib/workitem/get_local_id.cl
@@ -1,11 +1,14 @@
 #include <clc/clc.h>
 
-_CLC_DEF uint get_local_id(uint dim)
-{
-	switch(dim) {
-	case 0: return __builtin_r600_read_tidig_x();
-	case 1: return __builtin_r600_read_tidig_y();
-	case 2: return __builtin_r600_read_tidig_z();
-	default: return 1;
-	}
+_CLC_DEF _CLC_OVERLOAD uint get_local_id(uint dim) {
+  switch (dim) {
+  case 0:
+    return __builtin_r600_read_tidig_x();
+  case 1:
+    return __builtin_r600_read_tidig_y();
+  case 2:
+    return __builtin_r600_read_tidig_z();
+  default:
+    return 1;
+  }
 }

diff  --git a/libclc/r600/lib/workitem/get_local_size.cl b/libclc/r600/lib/workitem/get_local_size.cl
index 89e2612786e4..6edab7c46c2f 100644
--- a/libclc/r600/lib/workitem/get_local_size.cl
+++ b/libclc/r600/lib/workitem/get_local_size.cl
@@ -4,12 +4,15 @@ uint __clc_r600_get_local_size_x(void) __asm("llvm.r600.read.local.size.x");
 uint __clc_r600_get_local_size_y(void) __asm("llvm.r600.read.local.size.y");
 uint __clc_r600_get_local_size_z(void) __asm("llvm.r600.read.local.size.z");
 
-_CLC_DEF size_t get_local_size(uint dim)
-{
-	switch (dim) {
-	case 0: return __clc_r600_get_local_size_x();
-	case 1: return __clc_r600_get_local_size_y();
-	case 2: return __clc_r600_get_local_size_z();
-	default: return 1;
-	}
+_CLC_DEF _CLC_OVERLOAD size_t get_local_size(uint dim) {
+  switch (dim) {
+  case 0:
+    return __clc_r600_get_local_size_x();
+  case 1:
+    return __clc_r600_get_local_size_y();
+  case 2:
+    return __clc_r600_get_local_size_z();
+  default:
+    return 1;
+  }
 }

diff  --git a/libclc/r600/lib/workitem/get_num_groups.cl b/libclc/r600/lib/workitem/get_num_groups.cl
index dfe6cef22f8e..ab4f5f629c27 100644
--- a/libclc/r600/lib/workitem/get_num_groups.cl
+++ b/libclc/r600/lib/workitem/get_num_groups.cl
@@ -4,12 +4,15 @@ uint __clc_r600_get_num_groups_x(void) __asm("llvm.r600.read.ngroups.x");
 uint __clc_r600_get_num_groups_y(void) __asm("llvm.r600.read.ngroups.y");
 uint __clc_r600_get_num_groups_z(void) __asm("llvm.r600.read.ngroups.z");
 
-_CLC_DEF size_t get_num_groups(uint dim)
-{
-	switch (dim) {
-	case 0: return __clc_r600_get_num_groups_x();
-	case 1: return __clc_r600_get_num_groups_y();
-	case 2: return __clc_r600_get_num_groups_z();
-	default: return 1;
-	}
+_CLC_DEF _CLC_OVERLOAD size_t get_num_groups(uint dim) {
+  switch (dim) {
+  case 0:
+    return __clc_r600_get_num_groups_x();
+  case 1:
+    return __clc_r600_get_num_groups_y();
+  case 2:
+    return __clc_r600_get_num_groups_z();
+  default:
+    return 1;
+  }
 }

diff  --git a/libclc/r600/lib/workitem/get_work_dim.cl b/libclc/r600/lib/workitem/get_work_dim.cl
index fccf716cf7c9..e18a83b8dd95 100644
--- a/libclc/r600/lib/workitem/get_work_dim.cl
+++ b/libclc/r600/lib/workitem/get_work_dim.cl
@@ -1,9 +1,8 @@
 #include <clc/clc.h>
 
-_CLC_DEF uint get_work_dim(void)
-{
-	__attribute__((address_space(7))) uint * ptr =
-		(__attribute__((address_space(7))) uint *)
-		__builtin_r600_implicitarg_ptr();
-	return ptr[0];
+_CLC_DEF _CLC_OVERLOAD uint get_work_dim(void) {
+  __attribute__((address_space(7))) uint *ptr =
+      (__attribute__((address_space(7)))
+       uint *)__builtin_r600_implicitarg_ptr();
+  return ptr[0];
 }


        


More information about the cfe-commits mailing list