[Libclc-dev] [PATCH 1/1] Rework atomic ops to use clang builtins rather than llvm asm

Jan Vesely via Libclc-dev libclc-dev at lists.llvm.org
Tue Sep 19 23:46:17 PDT 2017


No changes in piglit pass rates for either turks or carrizo/iceland.

Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
---
 amdgpu/lib/SOURCES                   |   1 -
 amdgpu/lib/atomic/atomic.cl          |  65 -----------------
 generic/lib/SOURCES                  |   9 ++-
 generic/lib/atomic/atomic_add.cl     |  12 ++++
 generic/lib/atomic/atomic_and.cl     |  12 ++++
 generic/lib/atomic/atomic_cmpxchg.cl |  12 ++++
 generic/lib/atomic/atomic_impl.ll    | 133 -----------------------------------
 generic/lib/atomic/atomic_max.cl     |  12 ++++
 generic/lib/atomic/atomic_min.cl     |  12 ++++
 generic/lib/atomic/atomic_or.cl      |  12 ++++
 generic/lib/atomic/atomic_sub.cl     |  12 ++++
 generic/lib/atomic/atomic_xchg.cl    |  15 +++-
 generic/lib/atomic/atomic_xor.cl     |  12 ++++
 13 files changed, 117 insertions(+), 202 deletions(-)
 delete mode 100644 amdgpu/lib/atomic/atomic.cl
 create mode 100644 generic/lib/atomic/atomic_add.cl
 create mode 100644 generic/lib/atomic/atomic_and.cl
 create mode 100644 generic/lib/atomic/atomic_cmpxchg.cl
 delete mode 100644 generic/lib/atomic/atomic_impl.ll
 create mode 100644 generic/lib/atomic/atomic_max.cl
 create mode 100644 generic/lib/atomic/atomic_min.cl
 create mode 100644 generic/lib/atomic/atomic_or.cl
 create mode 100644 generic/lib/atomic/atomic_sub.cl
 create mode 100644 generic/lib/atomic/atomic_xor.cl

diff --git a/amdgpu/lib/SOURCES b/amdgpu/lib/SOURCES
index f817538..4414621 100644
--- a/amdgpu/lib/SOURCES
+++ b/amdgpu/lib/SOURCES
@@ -1,4 +1,3 @@
-atomic/atomic.cl
 math/nextafter.cl
 math/sqrt.cl
 image/get_image_width.cl
diff --git a/amdgpu/lib/atomic/atomic.cl b/amdgpu/lib/atomic/atomic.cl
deleted file mode 100644
index 5bfe07b..0000000
--- a/amdgpu/lib/atomic/atomic.cl
+++ /dev/null
@@ -1,65 +0,0 @@
-#include <clc/clc.h>
-
-#define ATOMIC_FUNC_DEFINE(RET_SIGN, ARG_SIGN, TYPE, CL_FUNCTION, CLC_FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \
-_CLC_OVERLOAD _CLC_DEF RET_SIGN TYPE CL_FUNCTION (volatile CL_ADDRSPACE RET_SIGN TYPE *p, RET_SIGN TYPE val) { \
-	return (RET_SIGN TYPE)__clc_##CLC_FUNCTION##_addr##LLVM_ADDRSPACE((volatile CL_ADDRSPACE ARG_SIGN TYPE*)p, (ARG_SIGN TYPE)val); \
-}
-
-/* For atomic functions that don't need different bitcode dependending on argument signedness */
-#define ATOMIC_FUNC_SIGN(TYPE, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \
-	_CLC_DECL signed TYPE __clc_##FUNCTION##_addr##LLVM_ADDRSPACE(volatile CL_ADDRSPACE signed TYPE*, signed TYPE); \
-	ATOMIC_FUNC_DEFINE(signed, signed, TYPE, FUNCTION, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \
-	ATOMIC_FUNC_DEFINE(unsigned, signed, TYPE, FUNCTION, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE)
-
-#define ATOMIC_FUNC_ADDRSPACE(TYPE, FUNCTION) \
-	ATOMIC_FUNC_SIGN(TYPE, FUNCTION, global, 1) \
-	ATOMIC_FUNC_SIGN(TYPE, FUNCTION, local, 3)
-
-#define ATOMIC_FUNC(FUNCTION) \
-	ATOMIC_FUNC_ADDRSPACE(int, FUNCTION)
-
-#define ATOMIC_FUNC_DEFINE_3_ARG(RET_SIGN, ARG_SIGN, TYPE, CL_FUNCTION, CLC_FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \
-_CLC_OVERLOAD _CLC_DEF RET_SIGN TYPE CL_FUNCTION (volatile CL_ADDRSPACE RET_SIGN TYPE *p, RET_SIGN TYPE cmp, RET_SIGN TYPE val) { \
-	return (RET_SIGN TYPE)__clc_##CLC_FUNCTION##_addr##LLVM_ADDRSPACE((volatile CL_ADDRSPACE ARG_SIGN TYPE*)p, (ARG_SIGN TYPE)cmp, (ARG_SIGN TYPE)val); \
-}
-
-/* For atomic functions that don't need different bitcode dependending on argument signedness */
-#define ATOMIC_FUNC_SIGN_3_ARG(TYPE, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \
-	_CLC_DECL signed TYPE __clc_##FUNCTION##_addr##LLVM_ADDRSPACE(volatile CL_ADDRSPACE signed TYPE*, signed TYPE, signed TYPE); \
-	ATOMIC_FUNC_DEFINE_3_ARG(signed, signed, TYPE, FUNCTION, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE) \
-	ATOMIC_FUNC_DEFINE_3_ARG(unsigned, signed, TYPE, FUNCTION, FUNCTION, CL_ADDRSPACE, LLVM_ADDRSPACE)
-
-#define ATOMIC_FUNC_ADDRSPACE_3_ARG(TYPE, FUNCTION) \
-	ATOMIC_FUNC_SIGN_3_ARG(TYPE, FUNCTION, global, 1) \
-	ATOMIC_FUNC_SIGN_3_ARG(TYPE, FUNCTION, local, 3)
-
-#define ATOMIC_FUNC_3_ARG(FUNCTION) \
-	ATOMIC_FUNC_ADDRSPACE_3_ARG(int, FUNCTION)
-
-ATOMIC_FUNC(atomic_add)
-ATOMIC_FUNC(atomic_and)
-ATOMIC_FUNC(atomic_or)
-ATOMIC_FUNC(atomic_sub)
-ATOMIC_FUNC(atomic_xchg)
-ATOMIC_FUNC(atomic_xor)
-ATOMIC_FUNC_3_ARG(atomic_cmpxchg)
-
-_CLC_DECL signed int __clc_atomic_max_addr1(volatile global signed int*, signed int);
-_CLC_DECL signed int __clc_atomic_max_addr3(volatile local signed int*, signed int);
-_CLC_DECL uint __clc_atomic_umax_addr1(volatile global uint*, uint);
-_CLC_DECL uint __clc_atomic_umax_addr3(volatile local uint*, uint);
-
-ATOMIC_FUNC_DEFINE(signed, signed, int, atomic_max, atomic_max, global, 1)
-ATOMIC_FUNC_DEFINE(signed, signed, int, atomic_max, atomic_max, local, 3)
-ATOMIC_FUNC_DEFINE(unsigned, unsigned, int, atomic_max, atomic_umax, global, 1)
-ATOMIC_FUNC_DEFINE(unsigned, unsigned, int, atomic_max, atomic_umax, local, 3)
-
-_CLC_DECL signed int __clc_atomic_min_addr1(volatile global signed int*, signed int);
-_CLC_DECL signed int __clc_atomic_min_addr3(volatile local signed int*, signed int);
-_CLC_DECL uint __clc_atomic_umin_addr1(volatile global uint*, uint);
-_CLC_DECL uint __clc_atomic_umin_addr3(volatile local uint*, uint);
-
-ATOMIC_FUNC_DEFINE(signed, signed, int, atomic_min, atomic_min, global, 1)
-ATOMIC_FUNC_DEFINE(signed, signed, int, atomic_min, atomic_min, local, 3)
-ATOMIC_FUNC_DEFINE(unsigned, unsigned, int, atomic_min, atomic_umin, global, 1)
-ATOMIC_FUNC_DEFINE(unsigned, unsigned, int, atomic_min, atomic_umin, local, 3)
diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
index 23905c4..f919bc7 100644
--- a/generic/lib/SOURCES
+++ b/generic/lib/SOURCES
@@ -4,8 +4,15 @@ async/async_work_group_copy.cl
 async/async_work_group_strided_copy.cl
 async/prefetch.cl
 async/wait_group_events.cl
+atomic/atomic_add.cl
+atomic/atomic_and.cl
+atomic/atomic_cmpxchg.cl
+atomic/atomic_max.cl
+atomic/atomic_min.cl
+atomic/atomic_or.cl
+atomic/atomic_sub.cl
+atomic/atomic_xor.cl
 atomic/atomic_xchg.cl
-atomic/atomic_impl.ll
 cl_khr_global_int32_base_atomics/atom_add.cl
 cl_khr_global_int32_base_atomics/atom_cmpxchg.cl
 cl_khr_global_int32_base_atomics/atom_dec.cl
diff --git a/generic/lib/atomic/atomic_add.cl b/generic/lib/atomic/atomic_add.cl
new file mode 100644
index 0000000..f7d81f2
--- /dev/null
+++ b/generic/lib/atomic/atomic_add.cl
@@ -0,0 +1,12 @@
+#include <clc/clc.h>
+
+#define IMPL(TYPE, AS) \
+_CLC_OVERLOAD _CLC_DEF TYPE atomic_add(volatile AS TYPE *p, TYPE val) { \
+  return __sync_fetch_and_add(p, val); \
+}
+
+IMPL(int, global)
+IMPL(unsigned int, global)
+IMPL(int, local)
+IMPL(unsigned int, local)
+#undef IMPL
diff --git a/generic/lib/atomic/atomic_and.cl b/generic/lib/atomic/atomic_and.cl
new file mode 100644
index 0000000..556d22a
--- /dev/null
+++ b/generic/lib/atomic/atomic_and.cl
@@ -0,0 +1,12 @@
+#include <clc/clc.h>
+
+#define IMPL(TYPE, AS) \
+_CLC_OVERLOAD _CLC_DEF TYPE atomic_and(volatile AS TYPE *p, TYPE val) { \
+  return __sync_fetch_and_and(p, val); \
+}
+
+IMPL(int, global)
+IMPL(unsigned int, global)
+IMPL(int, local)
+IMPL(unsigned int, local)
+#undef IMPL
diff --git a/generic/lib/atomic/atomic_cmpxchg.cl b/generic/lib/atomic/atomic_cmpxchg.cl
new file mode 100644
index 0000000..fcf2e0c
--- /dev/null
+++ b/generic/lib/atomic/atomic_cmpxchg.cl
@@ -0,0 +1,12 @@
+#include <clc/clc.h>
+
+#define IMPL(TYPE, AS) \
+_CLC_OVERLOAD _CLC_DEF TYPE atomic_cmpxchg(volatile AS TYPE *p, TYPE cmp, TYPE val) { \
+  return __sync_val_compare_and_swap(p, cmp, val); \
+}
+
+IMPL(int, global)
+IMPL(unsigned int, global)
+IMPL(int, local)
+IMPL(unsigned int, local)
+#undef IMPL
diff --git a/generic/lib/atomic/atomic_impl.ll b/generic/lib/atomic/atomic_impl.ll
deleted file mode 100644
index 019147f..0000000
--- a/generic/lib/atomic/atomic_impl.ll
+++ /dev/null
@@ -1,133 +0,0 @@
-define i32 @__clc_atomic_add_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
-entry:
-  %0 = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %value seq_cst
-  ret i32 %0
-}
-
-define i32 @__clc_atomic_add_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline {
-entry:
-  %0 = atomicrmw volatile add i32 addrspace(3)* %ptr, i32 %value seq_cst
-  ret i32 %0
-}
-
-define i32 @__clc_atomic_and_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
-entry:
-  %0 = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %value seq_cst
-  ret i32 %0
-}
-
-define i32 @__clc_atomic_and_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline {
-entry:
-  %0 = atomicrmw volatile and i32 addrspace(3)* %ptr, i32 %value seq_cst
-  ret i32 %0
-}
-
-define i32 @__clc_atomic_cmpxchg_addr1(i32 addrspace(1)* nocapture %ptr, i32 %compare, i32 %value) nounwind alwaysinline {
-entry:
-  %0 = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %compare, i32 %value seq_cst seq_cst
-  %1 = extractvalue { i32, i1 } %0, 0
-  ret i32 %1
-}
-
-define i32 @__clc_atomic_cmpxchg_addr3(i32 addrspace(3)* nocapture %ptr, i32 %compare, i32 %value) nounwind alwaysinline {
-entry:
-  %0 = cmpxchg volatile i32 addrspace(3)* %ptr, i32 %compare, i32 %value seq_cst seq_cst
-  %1 = extractvalue { i32, i1 } %0, 0
-  ret i32 %1
-}
-
-define i32 @__clc_atomic_max_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
-entry:
-  %0 = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %value seq_cst
-  ret i32 %0
-}
-
-define i32 @__clc_atomic_max_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline {
-entry:
-  %0 = atomicrmw volatile max i32 addrspace(3)* %ptr, i32 %value seq_cst
-  ret i32 %0
-}
-
-define i32 @__clc_atomic_min_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
-entry:
-  %0 = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %value seq_cst
-  ret i32 %0
-}
-
-define i32 @__clc_atomic_min_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline {
-entry:
-  %0 = atomicrmw volatile min i32 addrspace(3)* %ptr, i32 %value seq_cst
-  ret i32 %0
-}
-
-define i32 @__clc_atomic_or_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
-entry:
-  %0 = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %value seq_cst
-  ret i32 %0
-}
-
-define i32 @__clc_atomic_or_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline {
-entry:
-  %0 = atomicrmw volatile or i32 addrspace(3)* %ptr, i32 %value seq_cst
-  ret i32 %0
-}
-
-define i32 @__clc_atomic_umax_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
-entry:
-  %0 = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %value seq_cst
-  ret i32 %0
-}
-
-define i32 @__clc_atomic_umax_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline {
-entry:
-  %0 = atomicrmw volatile umax i32 addrspace(3)* %ptr, i32 %value seq_cst
-  ret i32 %0
-}
-
-define i32 @__clc_atomic_umin_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
-entry:
-  %0 = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %value seq_cst
-  ret i32 %0
-}
-
-define i32 @__clc_atomic_umin_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline {
-entry:
-  %0 = atomicrmw volatile umin i32 addrspace(3)* %ptr, i32 %value seq_cst
-  ret i32 %0
-}
-
-define i32 @__clc_atomic_sub_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
-entry:
-  %0 = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %value seq_cst
-  ret i32 %0
-}
-
-define i32 @__clc_atomic_sub_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline {
-entry:
-  %0 = atomicrmw volatile sub i32 addrspace(3)* %ptr, i32 %value seq_cst
-  ret i32 %0
-}
-
-define i32 @__clc_atomic_xchg_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
-entry:
-  %0 = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %value seq_cst
-  ret i32 %0
-}
-
-define i32 @__clc_atomic_xchg_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline {
-entry:
-  %0 = atomicrmw volatile xchg i32 addrspace(3)* %ptr, i32 %value seq_cst
-  ret i32 %0
-}
-
-define i32 @__clc_atomic_xor_addr1(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
-entry:
-  %0 = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %value seq_cst
-  ret i32 %0
-}
-
-define i32 @__clc_atomic_xor_addr3(i32 addrspace(3)* nocapture %ptr, i32 %value) nounwind alwaysinline {
-entry:
-  %0 = atomicrmw volatile xor i32 addrspace(3)* %ptr, i32 %value seq_cst
-  ret i32 %0
-}
diff --git a/generic/lib/atomic/atomic_max.cl b/generic/lib/atomic/atomic_max.cl
new file mode 100644
index 0000000..afd86c2
--- /dev/null
+++ b/generic/lib/atomic/atomic_max.cl
@@ -0,0 +1,12 @@
+#include <clc/clc.h>
+
+#define IMPL(TYPE, AS, OP) \
+_CLC_OVERLOAD _CLC_DEF TYPE atomic_max(volatile AS TYPE *p, TYPE val) { \
+  return __sync_fetch_and_##OP(p, val); \
+}
+
+IMPL(int, global, max)
+IMPL(unsigned int, global, umax)
+IMPL(int, local, max)
+IMPL(unsigned int, local, umax)
+#undef IMPL
diff --git a/generic/lib/atomic/atomic_min.cl b/generic/lib/atomic/atomic_min.cl
new file mode 100644
index 0000000..a6099d5
--- /dev/null
+++ b/generic/lib/atomic/atomic_min.cl
@@ -0,0 +1,12 @@
+#include <clc/clc.h>
+
+#define IMPL(TYPE, AS, OP) \
+_CLC_OVERLOAD _CLC_DEF TYPE atomic_min(volatile AS TYPE *p, TYPE val) { \
+  return __sync_fetch_and_##OP(p, val); \
+}
+
+IMPL(int, global, min)
+IMPL(unsigned int, global, umin)
+IMPL(int, local, min)
+IMPL(unsigned int, local, umin)
+#undef IMPL
diff --git a/generic/lib/atomic/atomic_or.cl b/generic/lib/atomic/atomic_or.cl
new file mode 100644
index 0000000..75ef51d
--- /dev/null
+++ b/generic/lib/atomic/atomic_or.cl
@@ -0,0 +1,12 @@
+#include <clc/clc.h>
+
+#define IMPL(TYPE, AS) \
+_CLC_OVERLOAD _CLC_DEF TYPE atomic_or(volatile AS TYPE *p, TYPE val) { \
+  return __sync_fetch_and_or(p, val); \
+}
+
+IMPL(int, global)
+IMPL(unsigned int, global)
+IMPL(int, local)
+IMPL(unsigned int, local)
+#undef IMPL
diff --git a/generic/lib/atomic/atomic_sub.cl b/generic/lib/atomic/atomic_sub.cl
new file mode 100644
index 0000000..49098ff
--- /dev/null
+++ b/generic/lib/atomic/atomic_sub.cl
@@ -0,0 +1,12 @@
+#include <clc/clc.h>
+
+#define IMPL(TYPE, AS) \
+_CLC_OVERLOAD _CLC_DEF TYPE atomic_sub(volatile AS TYPE *p, TYPE val) { \
+  return __sync_fetch_and_sub(p, val); \
+}
+
+IMPL(int, global)
+IMPL(unsigned int, global)
+IMPL(int, local)
+IMPL(unsigned int, local)
+#undef IMPL
diff --git a/generic/lib/atomic/atomic_xchg.cl b/generic/lib/atomic/atomic_xchg.cl
index 9aee595..9c4e404 100644
--- a/generic/lib/atomic/atomic_xchg.cl
+++ b/generic/lib/atomic/atomic_xchg.cl
@@ -1,9 +1,20 @@
 #include <clc/clc.h>
 
 _CLC_OVERLOAD _CLC_DEF float atomic_xchg(volatile global float *p, float val) {
-  return as_float(atomic_xchg((volatile global int *)p, as_int(val)));
+  return as_float(atomic_xchg((volatile global uint *)p, as_uint(val)));
 }
 
 _CLC_OVERLOAD _CLC_DEF float atomic_xchg(volatile local float *p, float val) {
-  return as_float(atomic_xchg((volatile local int *)p, as_int(val)));
+  return as_float(atomic_xchg((volatile local uint *)p, as_uint(val)));
 }
+
+#define IMPL(TYPE, AS) \
+_CLC_OVERLOAD _CLC_DEF TYPE atomic_xchg(volatile AS TYPE *p, TYPE val) { \
+  return __sync_swap_4(p, val); \
+}
+
+IMPL(int, global)
+IMPL(unsigned int, global)
+IMPL(int, local)
+IMPL(unsigned int, local)
+#undef IMPL
diff --git a/generic/lib/atomic/atomic_xor.cl b/generic/lib/atomic/atomic_xor.cl
new file mode 100644
index 0000000..fcbe481
--- /dev/null
+++ b/generic/lib/atomic/atomic_xor.cl
@@ -0,0 +1,12 @@
+#include <clc/clc.h>
+
+#define IMPL(TYPE, AS) \
+_CLC_OVERLOAD _CLC_DEF TYPE atomic_xor(volatile AS TYPE *p, TYPE val) { \
+  return __sync_fetch_and_xor(p, val); \
+}
+
+IMPL(int, global)
+IMPL(unsigned int, global)
+IMPL(int, local)
+IMPL(unsigned int, local)
+#undef IMPL
-- 
2.13.5



More information about the Libclc-dev mailing list