[libclc] libclc: Fix missing overloads for atomic_fetch_add/sub (PR #188478)

Matt Arsenault via cfe-commits cfe-commits at lists.llvm.org
Thu Mar 26 06:50:05 PDT 2026


https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/188478

>From 073ed409c6154efe99c9dfd430a10145ea015174 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 25 Mar 2026 13:45:19 +0100
Subject: [PATCH 1/4] libclc: Fix missing overloads for atomic_fetch_add/sub

Follow up to #185263, which missed the overloads which take a memory
order.
---
 .../lib/generic/atomic/atomic_fetch_add.cl    | 19 +++++++++++++++++++
 .../lib/generic/atomic/atomic_fetch_sub.cl    | 19 +++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/libclc/opencl/lib/generic/atomic/atomic_fetch_add.cl b/libclc/opencl/lib/generic/atomic/atomic_fetch_add.cl
index ff2115f4c4b41..f4f3c322975ac 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_fetch_add.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_fetch_add.cl
@@ -27,12 +27,24 @@ atomic_fetch_add(volatile __local atomic_uintptr_t *p, ptrdiff_t v) {
                                    __ATOMIC_SEQ_CST, __MEMORY_SCOPE_DEVICE);
 }
 
+_CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_add(
+    volatile __local atomic_uintptr_t *p, ptrdiff_t v, memory_order order) {
+  return __scoped_atomic_fetch_add((volatile __local uintptr_t *)p, v, order,
+                                   __MEMORY_SCOPE_DEVICE);
+}
+
 _CLC_OVERLOAD _CLC_DEF uintptr_t
 atomic_fetch_add(volatile __global atomic_uintptr_t *p, ptrdiff_t v) {
   return __scoped_atomic_fetch_add((volatile __global uintptr_t *)p, v,
                                    __ATOMIC_SEQ_CST, __MEMORY_SCOPE_DEVICE);
 }
 
+_CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_add(
+    volatile __global atomic_uintptr_t *p, ptrdiff_t v, memory_order order) {
+  return __scoped_atomic_fetch_add((volatile __global uintptr_t *)p, v, order,
+                                   __MEMORY_SCOPE_DEVICE);
+}
+
 #if _CLC_GENERIC_AS_SUPPORTED
 
 _CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_add(volatile atomic_uintptr_t *p,
@@ -41,6 +53,13 @@ _CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_add(volatile atomic_uintptr_t *p,
                                    __MEMORY_SCOPE_DEVICE);
 }
 
+_CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_add(volatile atomic_uintptr_t *p,
+                                                  ptrdiff_t v,
+                                                  memory_order order) {
+  return __scoped_atomic_fetch_add((volatile uintptr_t *)p, v, order,
+                                   __MEMORY_SCOPE_DEVICE);
+}
+
 #endif // _CLC_GENERIC_AS_SUPPORTED
 
 #endif // defined(__opencl_c_atomic_order_seq_cst) &&
diff --git a/libclc/opencl/lib/generic/atomic/atomic_fetch_sub.cl b/libclc/opencl/lib/generic/atomic/atomic_fetch_sub.cl
index 9740ff7c036db..1d8811335e810 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_fetch_sub.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_fetch_sub.cl
@@ -27,12 +27,24 @@ atomic_fetch_sub(volatile __local atomic_uintptr_t *p, ptrdiff_t v) {
                                    __ATOMIC_SEQ_CST, __MEMORY_SCOPE_DEVICE);
 }
 
+_CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_sub(
+    volatile __local atomic_uintptr_t *p, ptrdiff_t v, memory_order order) {
+  return __scoped_atomic_fetch_sub((volatile __local uintptr_t *)p, v, order,
+                                   __MEMORY_SCOPE_DEVICE);
+}
+
 _CLC_OVERLOAD _CLC_DEF uintptr_t
 atomic_fetch_sub(volatile __global atomic_uintptr_t *p, ptrdiff_t v) {
   return __scoped_atomic_fetch_sub((volatile __global uintptr_t *)p, v,
                                    __ATOMIC_SEQ_CST, __MEMORY_SCOPE_DEVICE);
 }
 
+_CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_sub(
+    volatile __global atomic_uintptr_t *p, ptrdiff_t v, memory_order order) {
+  return __scoped_atomic_fetch_sub((volatile __global uintptr_t *)p, v, order,
+                                   __MEMORY_SCOPE_DEVICE);
+}
+
 #if _CLC_GENERIC_AS_SUPPORTED
 
 _CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_sub(volatile atomic_uintptr_t *p,
@@ -41,6 +53,13 @@ _CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_sub(volatile atomic_uintptr_t *p,
                                    __MEMORY_SCOPE_DEVICE);
 }
 
+_CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_sub(volatile atomic_uintptr_t *p,
+                                                  ptrdiff_t v,
+                                                  memory_order order) {
+  return __scoped_atomic_fetch_sub((volatile uintptr_t *)p, v, order,
+                                   __MEMORY_SCOPE_DEVICE);
+}
+
 #endif // _CLC_GENERIC_AS_SUPPORTED
 
 #endif // defined(__opencl_c_atomic_order_seq_cst) &&

>From 85fe32bce491360ff7cdcaf859987d9af71cee36 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 25 Mar 2026 14:46:47 +0100
Subject: [PATCH 2/4] Address comments

---
 .../lib/generic/atomic/atomic_fetch_add.cl    | 49 ++++++++++++++-----
 .../lib/generic/atomic/atomic_fetch_sub.cl    | 49 ++++++++++++++-----
 2 files changed, 72 insertions(+), 26 deletions(-)

diff --git a/libclc/opencl/lib/generic/atomic/atomic_fetch_add.cl b/libclc/opencl/lib/generic/atomic/atomic_fetch_add.cl
index f4f3c322975ac..e399afef9f0d6 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_fetch_add.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_fetch_add.cl
@@ -18,8 +18,9 @@
 #define __CLC_BODY "atomic_def.inc"
 #include "clc/math/gentype.inc"
 
-#if defined(__opencl_c_atomic_order_seq_cst) &&                                \
-    defined(__opencl_c_atomic_scope_device)
+#ifdef __opencl_c_atomic_scope_device
+
+#ifdef __opencl_c_atomic_order_seq_cst
 
 _CLC_OVERLOAD _CLC_DEF uintptr_t
 atomic_fetch_add(volatile __local atomic_uintptr_t *p, ptrdiff_t v) {
@@ -27,6 +28,21 @@ atomic_fetch_add(volatile __local atomic_uintptr_t *p, ptrdiff_t v) {
                                    __ATOMIC_SEQ_CST, __MEMORY_SCOPE_DEVICE);
 }
 
+_CLC_OVERLOAD _CLC_DEF uintptr_t
+atomic_fetch_add(volatile __global atomic_uintptr_t *p, ptrdiff_t v) {
+  return __scoped_atomic_fetch_add((volatile __global uintptr_t *)p, v,
+                                   __ATOMIC_SEQ_CST, __MEMORY_SCOPE_DEVICE);
+}
+
+#if _CLC_GENERIC_AS_SUPPORTED
+_CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_add(volatile atomic_uintptr_t *p,
+                                                  ptrdiff_t v) {
+  return __scoped_atomic_fetch_add((volatile uintptr_t *)p, v, __ATOMIC_SEQ_CST,
+                                   __MEMORY_SCOPE_DEVICE);
+}
+#endif // _CLC_GENERIC_AS_SUPPORTED
+#endif // __opencl_c_atomic_order_seq_cst
+
 _CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_add(
     volatile __local atomic_uintptr_t *p, ptrdiff_t v, memory_order order) {
   return __scoped_atomic_fetch_add((volatile __local uintptr_t *)p, v, order,
@@ -34,9 +50,10 @@ _CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_add(
 }
 
 _CLC_OVERLOAD _CLC_DEF uintptr_t
-atomic_fetch_add(volatile __global atomic_uintptr_t *p, ptrdiff_t v) {
-  return __scoped_atomic_fetch_add((volatile __global uintptr_t *)p, v,
-                                   __ATOMIC_SEQ_CST, __MEMORY_SCOPE_DEVICE);
+atomic_fetch_add(volatile __local atomic_uintptr_t *p, ptrdiff_t v,
+                 memory_order order, memory_scope scope) {
+  return __scoped_atomic_fetch_add((volatile __local uintptr_t *)p, v, order,
+                                   __opencl_get_clang_memory_scope(scope));
 }
 
 _CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_add(
@@ -45,22 +62,28 @@ _CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_add(
                                    __MEMORY_SCOPE_DEVICE);
 }
 
-#if _CLC_GENERIC_AS_SUPPORTED
+_CLC_OVERLOAD _CLC_DEF uintptr_t
+atomic_fetch_add(volatile __global atomic_uintptr_t *p, ptrdiff_t v,
+                 memory_order order, memory_scope scope) {
+  return __scoped_atomic_fetch_add((volatile __global uintptr_t *)p, v, order,
+                                   __opencl_get_clang_memory_scope(scope));
+}
 
+#if _CLC_GENERIC_AS_SUPPORTED
 _CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_add(volatile atomic_uintptr_t *p,
-                                                  ptrdiff_t v) {
-  return __scoped_atomic_fetch_add((volatile uintptr_t *)p, v, __ATOMIC_SEQ_CST,
+                                                  ptrdiff_t v,
+                                                  memory_order order) {
+  return __scoped_atomic_fetch_add((volatile uintptr_t *)p, v, order,
                                    __MEMORY_SCOPE_DEVICE);
 }
 
 _CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_add(volatile atomic_uintptr_t *p,
                                                   ptrdiff_t v,
-                                                  memory_order order) {
+                                                  memory_order order,
+                                                  memory_scope scope) {
   return __scoped_atomic_fetch_add((volatile uintptr_t *)p, v, order,
-                                   __MEMORY_SCOPE_DEVICE);
+                                   __opencl_get_clang_memory_scope(scope));
 }
 
 #endif // _CLC_GENERIC_AS_SUPPORTED
-
-#endif // defined(__opencl_c_atomic_order_seq_cst) &&
-       // defined(__opencl_c_atomic_scope_device)
+#endif // __opencl_c_atomic_scope_device
diff --git a/libclc/opencl/lib/generic/atomic/atomic_fetch_sub.cl b/libclc/opencl/lib/generic/atomic/atomic_fetch_sub.cl
index 1d8811335e810..f639a71794639 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_fetch_sub.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_fetch_sub.cl
@@ -18,8 +18,9 @@
 #define __CLC_BODY "atomic_def.inc"
 #include "clc/math/gentype.inc"
 
-#if defined(__opencl_c_atomic_order_seq_cst) &&                                \
-    defined(__opencl_c_atomic_scope_device)
+#ifdef __opencl_c_atomic_scope_device
+
+#ifdef __opencl_c_atomic_order_seq_cst
 
 _CLC_OVERLOAD _CLC_DEF uintptr_t
 atomic_fetch_sub(volatile __local atomic_uintptr_t *p, ptrdiff_t v) {
@@ -27,6 +28,21 @@ atomic_fetch_sub(volatile __local atomic_uintptr_t *p, ptrdiff_t v) {
                                    __ATOMIC_SEQ_CST, __MEMORY_SCOPE_DEVICE);
 }
 
+_CLC_OVERLOAD _CLC_DEF uintptr_t
+atomic_fetch_sub(volatile __global atomic_uintptr_t *p, ptrdiff_t v) {
+  return __scoped_atomic_fetch_sub((volatile __global uintptr_t *)p, v,
+                                   __ATOMIC_SEQ_CST, __MEMORY_SCOPE_DEVICE);
+}
+
+#if _CLC_GENERIC_AS_SUPPORTED
+_CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_sub(volatile atomic_uintptr_t *p,
+                                                  ptrdiff_t v) {
+  return __scoped_atomic_fetch_sub((volatile uintptr_t *)p, v, __ATOMIC_SEQ_CST,
+                                   __MEMORY_SCOPE_DEVICE);
+}
+#endif // _CLC_GENERIC_AS_SUPPORTED
+#endif // __opencl_c_atomic_order_seq_cst
+
 _CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_sub(
     volatile __local atomic_uintptr_t *p, ptrdiff_t v, memory_order order) {
   return __scoped_atomic_fetch_sub((volatile __local uintptr_t *)p, v, order,
@@ -34,9 +50,10 @@ _CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_sub(
 }
 
 _CLC_OVERLOAD _CLC_DEF uintptr_t
-atomic_fetch_sub(volatile __global atomic_uintptr_t *p, ptrdiff_t v) {
-  return __scoped_atomic_fetch_sub((volatile __global uintptr_t *)p, v,
-                                   __ATOMIC_SEQ_CST, __MEMORY_SCOPE_DEVICE);
+atomic_fetch_sub(volatile __local atomic_uintptr_t *p, ptrdiff_t v,
+                 memory_order order, memory_scope scope) {
+  return __scoped_atomic_fetch_sub((volatile __local uintptr_t *)p, v, order,
+                                   __opencl_get_clang_memory_scope(scope));
 }
 
 _CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_sub(
@@ -45,22 +62,28 @@ _CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_sub(
                                    __MEMORY_SCOPE_DEVICE);
 }
 
-#if _CLC_GENERIC_AS_SUPPORTED
+_CLC_OVERLOAD _CLC_DEF uintptr_t
+atomic_fetch_sub(volatile __global atomic_uintptr_t *p, ptrdiff_t v,
+                 memory_order order, memory_scope scope) {
+  return __scoped_atomic_fetch_sub((volatile __global uintptr_t *)p, v, order,
+                                   __opencl_get_clang_memory_scope(scope));
+}
 
+#if _CLC_GENERIC_AS_SUPPORTED
 _CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_sub(volatile atomic_uintptr_t *p,
-                                                  ptrdiff_t v) {
-  return __scoped_atomic_fetch_sub((volatile uintptr_t *)p, v, __ATOMIC_SEQ_CST,
+                                                  ptrdiff_t v,
+                                                  memory_order order) {
+  return __scoped_atomic_fetch_sub((volatile uintptr_t *)p, v, order,
                                    __MEMORY_SCOPE_DEVICE);
 }
 
 _CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_sub(volatile atomic_uintptr_t *p,
                                                   ptrdiff_t v,
-                                                  memory_order order) {
+                                                  memory_order order,
+                                                  memory_scope scope) {
   return __scoped_atomic_fetch_sub((volatile uintptr_t *)p, v, order,
-                                   __MEMORY_SCOPE_DEVICE);
+                                   __opencl_get_clang_memory_scope(scope));
 }
 
 #endif // _CLC_GENERIC_AS_SUPPORTED
-
-#endif // defined(__opencl_c_atomic_order_seq_cst) &&
-       // defined(__opencl_c_atomic_scope_device)
+#endif // __opencl_c_atomic_scope_device

>From e5cbd91fe859a0ab952d46630d89606fdae711f4 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 25 Mar 2026 14:55:13 +0100
Subject: [PATCH 3/4] Fix formatting

---
 .../lib/generic/atomic/atomic_fetch_add.cl     | 18 +++++++++---------
 .../lib/generic/atomic/atomic_fetch_sub.cl     | 18 +++++++++---------
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/libclc/opencl/lib/generic/atomic/atomic_fetch_add.cl b/libclc/opencl/lib/generic/atomic/atomic_fetch_add.cl
index e399afef9f0d6..d63101fced189 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_fetch_add.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_fetch_add.cl
@@ -22,20 +22,20 @@
 
 #ifdef __opencl_c_atomic_order_seq_cst
 
-_CLC_OVERLOAD _CLC_DEF uintptr_t
+_CLC_DEF _CLC_OVERLOAD uintptr_t
 atomic_fetch_add(volatile __local atomic_uintptr_t *p, ptrdiff_t v) {
   return __scoped_atomic_fetch_add((volatile __local uintptr_t *)p, v,
                                    __ATOMIC_SEQ_CST, __MEMORY_SCOPE_DEVICE);
 }
 
-_CLC_OVERLOAD _CLC_DEF uintptr_t
+_CLC_DEF _CLC_OVERLOAD uintptr_t
 atomic_fetch_add(volatile __global atomic_uintptr_t *p, ptrdiff_t v) {
   return __scoped_atomic_fetch_add((volatile __global uintptr_t *)p, v,
                                    __ATOMIC_SEQ_CST, __MEMORY_SCOPE_DEVICE);
 }
 
 #if _CLC_GENERIC_AS_SUPPORTED
-_CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_add(volatile atomic_uintptr_t *p,
+_CLC_DEF _CLC_OVERLOAD uintptr_t atomic_fetch_add(volatile atomic_uintptr_t *p,
                                                   ptrdiff_t v) {
   return __scoped_atomic_fetch_add((volatile uintptr_t *)p, v, __ATOMIC_SEQ_CST,
                                    __MEMORY_SCOPE_DEVICE);
@@ -43,26 +43,26 @@ _CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_add(volatile atomic_uintptr_t *p,
 #endif // _CLC_GENERIC_AS_SUPPORTED
 #endif // __opencl_c_atomic_order_seq_cst
 
-_CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_add(
+_CLC_DEF _CLC_OVERLOAD uintptr_t atomic_fetch_add(
     volatile __local atomic_uintptr_t *p, ptrdiff_t v, memory_order order) {
   return __scoped_atomic_fetch_add((volatile __local uintptr_t *)p, v, order,
                                    __MEMORY_SCOPE_DEVICE);
 }
 
-_CLC_OVERLOAD _CLC_DEF uintptr_t
+_CLC_DEF _CLC_OVERLOAD uintptr_t
 atomic_fetch_add(volatile __local atomic_uintptr_t *p, ptrdiff_t v,
                  memory_order order, memory_scope scope) {
   return __scoped_atomic_fetch_add((volatile __local uintptr_t *)p, v, order,
                                    __opencl_get_clang_memory_scope(scope));
 }
 
-_CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_add(
+_CLC_DEF _CLC_OVERLOAD uintptr_t atomic_fetch_add(
     volatile __global atomic_uintptr_t *p, ptrdiff_t v, memory_order order) {
   return __scoped_atomic_fetch_add((volatile __global uintptr_t *)p, v, order,
                                    __MEMORY_SCOPE_DEVICE);
 }
 
-_CLC_OVERLOAD _CLC_DEF uintptr_t
+_CLC_DEF _CLC_OVERLOAD uintptr_t
 atomic_fetch_add(volatile __global atomic_uintptr_t *p, ptrdiff_t v,
                  memory_order order, memory_scope scope) {
   return __scoped_atomic_fetch_add((volatile __global uintptr_t *)p, v, order,
@@ -70,14 +70,14 @@ atomic_fetch_add(volatile __global atomic_uintptr_t *p, ptrdiff_t v,
 }
 
 #if _CLC_GENERIC_AS_SUPPORTED
-_CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_add(volatile atomic_uintptr_t *p,
+_CLC_DEF _CLC_OVERLOAD uintptr_t atomic_fetch_add(volatile atomic_uintptr_t *p,
                                                   ptrdiff_t v,
                                                   memory_order order) {
   return __scoped_atomic_fetch_add((volatile uintptr_t *)p, v, order,
                                    __MEMORY_SCOPE_DEVICE);
 }
 
-_CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_add(volatile atomic_uintptr_t *p,
+_CLC_DEF _CLC_OVERLOAD uintptr_t atomic_fetch_add(volatile atomic_uintptr_t *p,
                                                   ptrdiff_t v,
                                                   memory_order order,
                                                   memory_scope scope) {
diff --git a/libclc/opencl/lib/generic/atomic/atomic_fetch_sub.cl b/libclc/opencl/lib/generic/atomic/atomic_fetch_sub.cl
index f639a71794639..c97b9cb247d51 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_fetch_sub.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_fetch_sub.cl
@@ -22,20 +22,20 @@
 
 #ifdef __opencl_c_atomic_order_seq_cst
 
-_CLC_OVERLOAD _CLC_DEF uintptr_t
+_CLC_DEF _CLC_OVERLOAD uintptr_t
 atomic_fetch_sub(volatile __local atomic_uintptr_t *p, ptrdiff_t v) {
   return __scoped_atomic_fetch_sub((volatile __local uintptr_t *)p, v,
                                    __ATOMIC_SEQ_CST, __MEMORY_SCOPE_DEVICE);
 }
 
-_CLC_OVERLOAD _CLC_DEF uintptr_t
+_CLC_DEF _CLC_OVERLOAD uintptr_t
 atomic_fetch_sub(volatile __global atomic_uintptr_t *p, ptrdiff_t v) {
   return __scoped_atomic_fetch_sub((volatile __global uintptr_t *)p, v,
                                    __ATOMIC_SEQ_CST, __MEMORY_SCOPE_DEVICE);
 }
 
 #if _CLC_GENERIC_AS_SUPPORTED
-_CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_sub(volatile atomic_uintptr_t *p,
+_CLC_DEF _CLC_OVERLOAD uintptr_t atomic_fetch_sub(volatile atomic_uintptr_t *p,
                                                   ptrdiff_t v) {
   return __scoped_atomic_fetch_sub((volatile uintptr_t *)p, v, __ATOMIC_SEQ_CST,
                                    __MEMORY_SCOPE_DEVICE);
@@ -43,26 +43,26 @@ _CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_sub(volatile atomic_uintptr_t *p,
 #endif // _CLC_GENERIC_AS_SUPPORTED
 #endif // __opencl_c_atomic_order_seq_cst
 
-_CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_sub(
+_CLC_DEF _CLC_OVERLOAD uintptr_t atomic_fetch_sub(
     volatile __local atomic_uintptr_t *p, ptrdiff_t v, memory_order order) {
   return __scoped_atomic_fetch_sub((volatile __local uintptr_t *)p, v, order,
                                    __MEMORY_SCOPE_DEVICE);
 }
 
-_CLC_OVERLOAD _CLC_DEF uintptr_t
+_CLC_DEF _CLC_OVERLOAD uintptr_t
 atomic_fetch_sub(volatile __local atomic_uintptr_t *p, ptrdiff_t v,
                  memory_order order, memory_scope scope) {
   return __scoped_atomic_fetch_sub((volatile __local uintptr_t *)p, v, order,
                                    __opencl_get_clang_memory_scope(scope));
 }
 
-_CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_sub(
+_CLC_DEF _CLC_OVERLOAD uintptr_t atomic_fetch_sub(
     volatile __global atomic_uintptr_t *p, ptrdiff_t v, memory_order order) {
   return __scoped_atomic_fetch_sub((volatile __global uintptr_t *)p, v, order,
                                    __MEMORY_SCOPE_DEVICE);
 }
 
-_CLC_OVERLOAD _CLC_DEF uintptr_t
+_CLC_DEF _CLC_OVERLOAD uintptr_t
 atomic_fetch_sub(volatile __global atomic_uintptr_t *p, ptrdiff_t v,
                  memory_order order, memory_scope scope) {
   return __scoped_atomic_fetch_sub((volatile __global uintptr_t *)p, v, order,
@@ -70,14 +70,14 @@ atomic_fetch_sub(volatile __global atomic_uintptr_t *p, ptrdiff_t v,
 }
 
 #if _CLC_GENERIC_AS_SUPPORTED
-_CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_sub(volatile atomic_uintptr_t *p,
+_CLC_DEF _CLC_OVERLOAD uintptr_t atomic_fetch_sub(volatile atomic_uintptr_t *p,
                                                   ptrdiff_t v,
                                                   memory_order order) {
   return __scoped_atomic_fetch_sub((volatile uintptr_t *)p, v, order,
                                    __MEMORY_SCOPE_DEVICE);
 }
 
-_CLC_OVERLOAD _CLC_DEF uintptr_t atomic_fetch_sub(volatile atomic_uintptr_t *p,
+_CLC_DEF _CLC_OVERLOAD uintptr_t atomic_fetch_sub(volatile atomic_uintptr_t *p,
                                                   ptrdiff_t v,
                                                   memory_order order,
                                                   memory_scope scope) {

>From 8a7a7a5f8a319b16845ae45f2fad62296e858aa7 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 26 Mar 2026 14:48:31 +0100
Subject: [PATCH 4/4] Macro fixes

---
 .../lib/generic/atomic/atomic_fetch_add.cl    | 47 ++++++++++++-------
 .../lib/generic/atomic/atomic_fetch_sub.cl    | 47 ++++++++++++-------
 2 files changed, 62 insertions(+), 32 deletions(-)

diff --git a/libclc/opencl/lib/generic/atomic/atomic_fetch_add.cl b/libclc/opencl/lib/generic/atomic/atomic_fetch_add.cl
index d63101fced189..12b6f37b3fdf5 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_fetch_add.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_fetch_add.cl
@@ -18,9 +18,15 @@
 #define __CLC_BODY "atomic_def.inc"
 #include "clc/math/gentype.inc"
 
-#ifdef __opencl_c_atomic_scope_device
+// If the device address space is 64-bits, the data types atomic_intptr_t,
+// atomic_uintptr_t, atomic_size_t and atomic_ptrdiff_t are supported if the
+// cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics extensions are
+// supported and have been enabled.
+#if __SIZEOF_POINTER__ < 8 || (defined(cl_khr_int64_base_atomics) &&           \
+                               defined(cl_khr_int64_extended_atomics))
 
-#ifdef __opencl_c_atomic_order_seq_cst
+#if defined(__opencl_c_atomic_scope_device) &&                                 \
+    defined(__opencl_c_atomic_order_seq_cst)
 
 _CLC_DEF _CLC_OVERLOAD uintptr_t
 atomic_fetch_add(volatile __local atomic_uintptr_t *p, ptrdiff_t v) {
@@ -41,7 +47,10 @@ _CLC_DEF _CLC_OVERLOAD uintptr_t atomic_fetch_add(volatile atomic_uintptr_t *p,
                                    __MEMORY_SCOPE_DEVICE);
 }
 #endif // _CLC_GENERIC_AS_SUPPORTED
-#endif // __opencl_c_atomic_order_seq_cst
+#endif // defined(__opencl_c_atomic_scope_device) &&
+       // defined(__opencl_c_atomic_order_seq_cst)
+
+#ifdef __opencl_c_atomic_scope_device
 
 _CLC_DEF _CLC_OVERLOAD uintptr_t atomic_fetch_add(
     volatile __local atomic_uintptr_t *p, ptrdiff_t v, memory_order order) {
@@ -49,6 +58,22 @@ _CLC_DEF _CLC_OVERLOAD uintptr_t atomic_fetch_add(
                                    __MEMORY_SCOPE_DEVICE);
 }
 
+_CLC_DEF _CLC_OVERLOAD uintptr_t atomic_fetch_add(
+    volatile __global atomic_uintptr_t *p, ptrdiff_t v, memory_order order) {
+  return __scoped_atomic_fetch_add((volatile __global uintptr_t *)p, v, order,
+                                   __MEMORY_SCOPE_DEVICE);
+}
+
+#if _CLC_GENERIC_AS_SUPPORTED
+_CLC_DEF _CLC_OVERLOAD uintptr_t atomic_fetch_add(volatile atomic_uintptr_t *p,
+                                                  ptrdiff_t v,
+                                                  memory_order order) {
+  return __scoped_atomic_fetch_add((volatile uintptr_t *)p, v, order,
+                                   __MEMORY_SCOPE_DEVICE);
+}
+#endif // _CLC_GENERIC_AS_SUPPORTED
+#endif // __opencl_c_atomic_scope_device
+
 _CLC_DEF _CLC_OVERLOAD uintptr_t
 atomic_fetch_add(volatile __local atomic_uintptr_t *p, ptrdiff_t v,
                  memory_order order, memory_scope scope) {
@@ -56,12 +81,6 @@ atomic_fetch_add(volatile __local atomic_uintptr_t *p, ptrdiff_t v,
                                    __opencl_get_clang_memory_scope(scope));
 }
 
-_CLC_DEF _CLC_OVERLOAD uintptr_t atomic_fetch_add(
-    volatile __global atomic_uintptr_t *p, ptrdiff_t v, memory_order order) {
-  return __scoped_atomic_fetch_add((volatile __global uintptr_t *)p, v, order,
-                                   __MEMORY_SCOPE_DEVICE);
-}
-
 _CLC_DEF _CLC_OVERLOAD uintptr_t
 atomic_fetch_add(volatile __global atomic_uintptr_t *p, ptrdiff_t v,
                  memory_order order, memory_scope scope) {
@@ -70,12 +89,6 @@ atomic_fetch_add(volatile __global atomic_uintptr_t *p, ptrdiff_t v,
 }
 
 #if _CLC_GENERIC_AS_SUPPORTED
-_CLC_DEF _CLC_OVERLOAD uintptr_t atomic_fetch_add(volatile atomic_uintptr_t *p,
-                                                  ptrdiff_t v,
-                                                  memory_order order) {
-  return __scoped_atomic_fetch_add((volatile uintptr_t *)p, v, order,
-                                   __MEMORY_SCOPE_DEVICE);
-}
 
 _CLC_DEF _CLC_OVERLOAD uintptr_t atomic_fetch_add(volatile atomic_uintptr_t *p,
                                                   ptrdiff_t v,
@@ -86,4 +99,6 @@ _CLC_DEF _CLC_OVERLOAD uintptr_t atomic_fetch_add(volatile atomic_uintptr_t *p,
 }
 
 #endif // _CLC_GENERIC_AS_SUPPORTED
-#endif // __opencl_c_atomic_scope_device
+
+#endif // __SIZEOF_POINTER__ < 8 || (defined(cl_khr_int64_base_atomics) &&
+       // defined(cl_khr_int64_extended_atomics))
diff --git a/libclc/opencl/lib/generic/atomic/atomic_fetch_sub.cl b/libclc/opencl/lib/generic/atomic/atomic_fetch_sub.cl
index c97b9cb247d51..6dfcdde207efc 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_fetch_sub.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_fetch_sub.cl
@@ -18,9 +18,15 @@
 #define __CLC_BODY "atomic_def.inc"
 #include "clc/math/gentype.inc"
 
-#ifdef __opencl_c_atomic_scope_device
+// If the device subress space is 64-bits, the data types atomic_intptr_t,
+// atomic_uintptr_t, atomic_size_t and atomic_ptrdiff_t are supported if the
+// cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics extensions are
+// supported and have been enabled.
+#if __SIZEOF_POINTER__ < 8 || (defined(cl_khr_int64_base_atomics) &&           \
+                               defined(cl_khr_int64_extended_atomics))
 
-#ifdef __opencl_c_atomic_order_seq_cst
+#if defined(__opencl_c_atomic_scope_device) &&                                 \
+    defined(__opencl_c_atomic_order_seq_cst)
 
 _CLC_DEF _CLC_OVERLOAD uintptr_t
 atomic_fetch_sub(volatile __local atomic_uintptr_t *p, ptrdiff_t v) {
@@ -41,7 +47,10 @@ _CLC_DEF _CLC_OVERLOAD uintptr_t atomic_fetch_sub(volatile atomic_uintptr_t *p,
                                    __MEMORY_SCOPE_DEVICE);
 }
 #endif // _CLC_GENERIC_AS_SUPPORTED
-#endif // __opencl_c_atomic_order_seq_cst
+#endif // defined(__opencl_c_atomic_scope_device) &&
+       // defined(__opencl_c_atomic_order_seq_cst)
+
+#ifdef __opencl_c_atomic_scope_device
 
 _CLC_DEF _CLC_OVERLOAD uintptr_t atomic_fetch_sub(
     volatile __local atomic_uintptr_t *p, ptrdiff_t v, memory_order order) {
@@ -49,6 +58,22 @@ _CLC_DEF _CLC_OVERLOAD uintptr_t atomic_fetch_sub(
                                    __MEMORY_SCOPE_DEVICE);
 }
 
+_CLC_DEF _CLC_OVERLOAD uintptr_t atomic_fetch_sub(
+    volatile __global atomic_uintptr_t *p, ptrdiff_t v, memory_order order) {
+  return __scoped_atomic_fetch_sub((volatile __global uintptr_t *)p, v, order,
+                                   __MEMORY_SCOPE_DEVICE);
+}
+
+#if _CLC_GENERIC_AS_SUPPORTED
+_CLC_DEF _CLC_OVERLOAD uintptr_t atomic_fetch_sub(volatile atomic_uintptr_t *p,
+                                                  ptrdiff_t v,
+                                                  memory_order order) {
+  return __scoped_atomic_fetch_sub((volatile uintptr_t *)p, v, order,
+                                   __MEMORY_SCOPE_DEVICE);
+}
+#endif // _CLC_GENERIC_AS_SUPPORTED
+#endif // __opencl_c_atomic_scope_device
+
 _CLC_DEF _CLC_OVERLOAD uintptr_t
 atomic_fetch_sub(volatile __local atomic_uintptr_t *p, ptrdiff_t v,
                  memory_order order, memory_scope scope) {
@@ -56,12 +81,6 @@ atomic_fetch_sub(volatile __local atomic_uintptr_t *p, ptrdiff_t v,
                                    __opencl_get_clang_memory_scope(scope));
 }
 
-_CLC_DEF _CLC_OVERLOAD uintptr_t atomic_fetch_sub(
-    volatile __global atomic_uintptr_t *p, ptrdiff_t v, memory_order order) {
-  return __scoped_atomic_fetch_sub((volatile __global uintptr_t *)p, v, order,
-                                   __MEMORY_SCOPE_DEVICE);
-}
-
 _CLC_DEF _CLC_OVERLOAD uintptr_t
 atomic_fetch_sub(volatile __global atomic_uintptr_t *p, ptrdiff_t v,
                  memory_order order, memory_scope scope) {
@@ -70,12 +89,6 @@ atomic_fetch_sub(volatile __global atomic_uintptr_t *p, ptrdiff_t v,
 }
 
 #if _CLC_GENERIC_AS_SUPPORTED
-_CLC_DEF _CLC_OVERLOAD uintptr_t atomic_fetch_sub(volatile atomic_uintptr_t *p,
-                                                  ptrdiff_t v,
-                                                  memory_order order) {
-  return __scoped_atomic_fetch_sub((volatile uintptr_t *)p, v, order,
-                                   __MEMORY_SCOPE_DEVICE);
-}
 
 _CLC_DEF _CLC_OVERLOAD uintptr_t atomic_fetch_sub(volatile atomic_uintptr_t *p,
                                                   ptrdiff_t v,
@@ -86,4 +99,6 @@ _CLC_DEF _CLC_OVERLOAD uintptr_t atomic_fetch_sub(volatile atomic_uintptr_t *p,
 }
 
 #endif // _CLC_GENERIC_AS_SUPPORTED
-#endif // __opencl_c_atomic_scope_device
+
+#endif // __SIZEOF_POINTER__ < 8 || (defined(cl_khr_int64_base_atomics) &&
+       // defined(cl_khr_int64_extended_atomics))



More information about the cfe-commits mailing list