[libclc] [libclc] Add OpenCL atomic_*_explicit builtins (PR #168318)

Wenju He via cfe-commits cfe-commits at lists.llvm.org
Sun Nov 23 23:13:07 PST 2025


https://github.com/wenju-he updated https://github.com/llvm/llvm-project/pull/168318

>From 5965c4bd306c1a227188ea6eeb9c3d247e9ab2f3 Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Mon, 17 Nov 2025 06:52:38 +0100
Subject: [PATCH 1/4] [libclc] Add OpenCL atomic_*_explicit builtins

Implement atomic_*_explicit (e.g. atomic_store_explicit) with
memory_order plus optional memory_scope.

OpenCL memory_order maps 1:1 to Clang (e.g. OpenCL memory_order_relaxed
== Clang __ATOMIC_RELAXED), so we pass it unchanged to clc_atomic_*
function which forwards to Clang _scoped_atomic* builtins.

Other changes:
* Add __opencl_get_clang_memory_scope helper in opencl/utils.h
  (OpenCL scope -> Clang scope).
* Correct atomic_compare_exchange return type to bool.
* Fix atomic_compare_exchange to return true when value stored in the
  pointer equals expected value.
---
 .../atomic/atomic_compare_exchange_strong.h   |   3 +
 .../atomic/atomic_compare_exchange_weak.h     |   3 +
 .../include/clc/opencl/atomic/atomic_decl.inc |  91 ++++++++++--
 .../clc/opencl/atomic/atomic_exchange.h       |   3 +
 .../clc/opencl/atomic/atomic_fetch_add.h      |   3 +
 .../clc/opencl/atomic/atomic_fetch_and.h      |   3 +
 .../clc/opencl/atomic/atomic_fetch_max.h      |   3 +
 .../clc/opencl/atomic/atomic_fetch_min.h      |   3 +
 .../clc/opencl/atomic/atomic_fetch_or.h       |   3 +
 .../clc/opencl/atomic/atomic_fetch_sub.h      |   3 +
 .../clc/opencl/atomic/atomic_fetch_xor.h      |   3 +
 .../include/clc/opencl/atomic/atomic_load.h   |   3 +
 .../include/clc/opencl/atomic/atomic_store.h  |   3 +
 libclc/opencl/include/clc/opencl/types.h      |  48 +++++++
 libclc/opencl/include/clc/opencl/utils.h      |  33 +++++
 .../atomic/atomic_compare_exchange_strong.cl  |   7 +-
 .../atomic/atomic_compare_exchange_weak.cl    |   7 +-
 .../opencl/lib/generic/atomic/atomic_def.inc  | 131 +++++++++++++++---
 .../lib/generic/atomic/atomic_exchange.cl     |   7 +-
 .../lib/generic/atomic/atomic_fetch_add.cl    |   7 +-
 .../lib/generic/atomic/atomic_fetch_and.cl    |   7 +-
 .../lib/generic/atomic/atomic_fetch_max.cl    |   7 +-
 .../lib/generic/atomic/atomic_fetch_min.cl    |   7 +-
 .../lib/generic/atomic/atomic_fetch_or.cl     |   7 +-
 .../lib/generic/atomic/atomic_fetch_sub.cl    |   7 +-
 .../lib/generic/atomic/atomic_fetch_xor.cl    |   7 +-
 .../opencl/lib/generic/atomic/atomic_load.cl  |   7 +-
 .../opencl/lib/generic/atomic/atomic_store.cl |   7 +-
 28 files changed, 325 insertions(+), 98 deletions(-)
 create mode 100644 libclc/opencl/include/clc/opencl/types.h
 create mode 100644 libclc/opencl/include/clc/opencl/utils.h

diff --git a/libclc/opencl/include/clc/opencl/atomic/atomic_compare_exchange_strong.h b/libclc/opencl/include/clc/opencl/atomic/atomic_compare_exchange_strong.h
index 59bfa0e87dd8f..4870b13329e4f 100644
--- a/libclc/opencl/include/clc/opencl/atomic/atomic_compare_exchange_strong.h
+++ b/libclc/opencl/include/clc/opencl/atomic/atomic_compare_exchange_strong.h
@@ -9,6 +9,9 @@
 #ifndef __CLC_OPENCL_ATOMIC_ATOMIC_COMPARE_EXCHANGE_STRONG_H__
 #define __CLC_OPENCL_ATOMIC_ATOMIC_COMPARE_EXCHANGE_STRONG_H__
 
+#include <clc/opencl/opencl-base.h>
+#include <clc/opencl/types.h>
+
 #define __CLC_FUNCTION atomic_compare_exchange_strong
 #define __CLC_COMPARE_EXCHANGE
 
diff --git a/libclc/opencl/include/clc/opencl/atomic/atomic_compare_exchange_weak.h b/libclc/opencl/include/clc/opencl/atomic/atomic_compare_exchange_weak.h
index 7106c3e061d65..103d4f5504d71 100644
--- a/libclc/opencl/include/clc/opencl/atomic/atomic_compare_exchange_weak.h
+++ b/libclc/opencl/include/clc/opencl/atomic/atomic_compare_exchange_weak.h
@@ -9,6 +9,9 @@
 #ifndef __CLC_OPENCL_ATOMIC_ATOMIC_COMPARE_EXCHANGE_WEAK_H__
 #define __CLC_OPENCL_ATOMIC_ATOMIC_COMPARE_EXCHANGE_WEAK_H__
 
+#include <clc/opencl/opencl-base.h>
+#include <clc/opencl/types.h>
+
 #define __CLC_FUNCTION atomic_compare_exchange_weak
 #define __CLC_COMPARE_EXCHANGE
 
diff --git a/libclc/opencl/include/clc/opencl/atomic/atomic_decl.inc b/libclc/opencl/include/clc/opencl/atomic/atomic_decl.inc
index 38d250f0693f7..a36e68bca86a2 100644
--- a/libclc/opencl/include/clc/opencl/atomic/atomic_decl.inc
+++ b/libclc/opencl/include/clc/opencl/atomic/atomic_decl.inc
@@ -25,32 +25,105 @@
 
 #define __CLC_ATOMIC_GENTYPE __CLC_XCONCAT(atomic_, __CLC_GENTYPE)
 
+#define __CLC_FUNCTION_EXPLICIT __CLC_XCONCAT(__CLC_FUNCTION, _explicit)
+
+#ifdef __CLC_NO_VALUE_ARG
+#define __CLC_DECL_ATOMIC(ADDRSPACE)                                           \
+  _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION_EXPLICIT(               \
+      volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr, memory_order Order,        \
+      memory_scope Scope);
+#elif defined(__CLC_RETURN_VOID)
+#define __CLC_DECL_ATOMIC(ADDRSPACE)                                           \
+  _CLC_OVERLOAD _CLC_DECL void __CLC_FUNCTION_EXPLICIT(                        \
+      volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr, __CLC_GENTYPE Value,       \
+      memory_order Order, memory_scope Scope);
+#elif defined(__CLC_COMPARE_EXCHANGE)
+#define __CLC_DECL_ATOMIC(ADDRSPACE)                                           \
+  _CLC_OVERLOAD _CLC_DECL bool __CLC_FUNCTION_EXPLICIT(                        \
+      volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr,                            \
+      ADDRSPACE __CLC_GENTYPE *Expected, __CLC_GENTYPE Desired,                \
+      memory_order Order, memory_scope Scope);
+#else
+#define __CLC_DECL_ATOMIC(ADDRSPACE)                                           \
+  _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION_EXPLICIT(               \
+      volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr, __CLC_GENTYPE Value,       \
+      memory_order Order, memory_scope Scope);
+#endif
+
+__CLC_DECL_ATOMIC(global)
+__CLC_DECL_ATOMIC(local)
+#if _CLC_GENERIC_AS_SUPPORTED
+__CLC_DECL_ATOMIC()
+#endif
+
+#undef __CLC_DECL_ATOMIC
+
+#if defined(__opencl_c_atomic_scope_device)
+
+#ifdef __CLC_NO_VALUE_ARG
+#define __CLC_DECL_ATOMIC(ADDRSPACE)                                           \
+  _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION_EXPLICIT(               \
+      volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr, memory_order Order);
+#elif defined(__CLC_RETURN_VOID)
+#define __CLC_DECL_ATOMIC(ADDRSPACE)                                           \
+  _CLC_OVERLOAD _CLC_DECL void __CLC_FUNCTION_EXPLICIT(                        \
+      volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr, __CLC_GENTYPE Value,       \
+      memory_order Order);
+#elif defined(__CLC_COMPARE_EXCHANGE)
+#define __CLC_DECL_ATOMIC(ADDRSPACE)                                           \
+  _CLC_OVERLOAD _CLC_DECL bool __CLC_FUNCTION_EXPLICIT(                        \
+      volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr,                            \
+      ADDRSPACE __CLC_GENTYPE *Expected, __CLC_GENTYPE Desired,                \
+      memory_order Success, memory_order Failure);
+#else
+#define __CLC_DECL_ATOMIC(ADDRSPACE)                                           \
+  _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION_EXPLICIT(               \
+      volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr, __CLC_GENTYPE Value,       \
+      memory_order Order);
+#endif
+
+__CLC_DECL_ATOMIC(global)
+__CLC_DECL_ATOMIC(local)
+#if _CLC_GENERIC_AS_SUPPORTED
+__CLC_DECL_ATOMIC()
+#endif
+
+#undef __CLC_DECL_ATOMIC
+
+#endif // defined(__opencl_c_atomic_scope_device)
+
+#if defined(__opencl_c_atomic_order_seq_cst) &&                                \
+    defined(__opencl_c_atomic_scope_device)
+
 #ifdef __CLC_NO_VALUE_ARG
-#define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         \
+#define __CLC_DECL_ATOMIC(ADDRSPACE)                                           \
   _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(                        \
       volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr);
 #elif defined(__CLC_RETURN_VOID)
-#define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         \
+#define __CLC_DECL_ATOMIC(ADDRSPACE)                                           \
   _CLC_OVERLOAD _CLC_DECL void __CLC_FUNCTION(                                 \
       volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr, __CLC_GENTYPE Value);
 #elif defined(__CLC_COMPARE_EXCHANGE)
-#define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         \
-  _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(                        \
+#define __CLC_DECL_ATOMIC(ADDRSPACE)                                           \
+  _CLC_OVERLOAD _CLC_DECL bool __CLC_FUNCTION(                                 \
       volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr,                            \
       ADDRSPACE __CLC_GENTYPE *Expected, __CLC_GENTYPE Desired);
 #else
-#define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         \
+#define __CLC_DECL_ATOMIC(ADDRSPACE)                                           \
   _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(                        \
       volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr, __CLC_GENTYPE Value);
 #endif
 
-__CLC_DEFINE_ATOMIC(global)
-__CLC_DEFINE_ATOMIC(local)
+__CLC_DECL_ATOMIC(global)
+__CLC_DECL_ATOMIC(local)
 #if _CLC_GENERIC_AS_SUPPORTED
-__CLC_DEFINE_ATOMIC()
+__CLC_DECL_ATOMIC()
 #endif
 
-#undef __CLC_DEFINE_ATOMIC
+#undef __CLC_DECL_ATOMIC
+
+#endif // defined(__opencl_c_atomic_order_seq_cst) &&
+       // defined(__opencl_c_atomic_scope_device)
 
 #endif // __CLC_HAVE_FP_ATOMIC || __CLC_HAVE_INT_ATOMIC
 
diff --git a/libclc/opencl/include/clc/opencl/atomic/atomic_exchange.h b/libclc/opencl/include/clc/opencl/atomic/atomic_exchange.h
index 9d949825b58c3..d47691b373eb0 100644
--- a/libclc/opencl/include/clc/opencl/atomic/atomic_exchange.h
+++ b/libclc/opencl/include/clc/opencl/atomic/atomic_exchange.h
@@ -9,6 +9,9 @@
 #ifndef __CLC_OPENCL_ATOMIC_ATOMIC_EXCHANGE_H__
 #define __CLC_OPENCL_ATOMIC_ATOMIC_EXCHANGE_H__
 
+#include <clc/opencl/opencl-base.h>
+#include <clc/opencl/types.h>
+
 #define __CLC_FUNCTION atomic_exchange
 
 #define __CLC_BODY <clc/opencl/atomic/atomic_decl.inc>
diff --git a/libclc/opencl/include/clc/opencl/atomic/atomic_fetch_add.h b/libclc/opencl/include/clc/opencl/atomic/atomic_fetch_add.h
index bae5a7a7e19bb..9ec29e1a553da 100644
--- a/libclc/opencl/include/clc/opencl/atomic/atomic_fetch_add.h
+++ b/libclc/opencl/include/clc/opencl/atomic/atomic_fetch_add.h
@@ -9,6 +9,9 @@
 #ifndef __CLC_OPENCL_ATOMIC_ATOMIC_FETCH_ADD_H__
 #define __CLC_OPENCL_ATOMIC_ATOMIC_FETCH_ADD_H__
 
+#include <clc/opencl/opencl-base.h>
+#include <clc/opencl/types.h>
+
 #define __CLC_FUNCTION atomic_fetch_add
 
 #define __CLC_BODY <clc/opencl/atomic/atomic_decl.inc>
diff --git a/libclc/opencl/include/clc/opencl/atomic/atomic_fetch_and.h b/libclc/opencl/include/clc/opencl/atomic/atomic_fetch_and.h
index 9f9d2225f910e..fb51102911228 100644
--- a/libclc/opencl/include/clc/opencl/atomic/atomic_fetch_and.h
+++ b/libclc/opencl/include/clc/opencl/atomic/atomic_fetch_and.h
@@ -9,6 +9,9 @@
 #ifndef __CLC_OPENCL_ATOMIC_ATOMIC_FETCH_AND_H__
 #define __CLC_OPENCL_ATOMIC_ATOMIC_FETCH_AND_H__
 
+#include <clc/opencl/opencl-base.h>
+#include <clc/opencl/types.h>
+
 #define __CLC_FUNCTION atomic_fetch_and
 
 #define __CLC_BODY <clc/opencl/atomic/atomic_decl.inc>
diff --git a/libclc/opencl/include/clc/opencl/atomic/atomic_fetch_max.h b/libclc/opencl/include/clc/opencl/atomic/atomic_fetch_max.h
index bef102dc82f48..8902e000a1024 100644
--- a/libclc/opencl/include/clc/opencl/atomic/atomic_fetch_max.h
+++ b/libclc/opencl/include/clc/opencl/atomic/atomic_fetch_max.h
@@ -9,6 +9,9 @@
 #ifndef __CLC_OPENCL_ATOMIC_ATOMIC_FETCH_MAX_H__
 #define __CLC_OPENCL_ATOMIC_ATOMIC_FETCH_MAX_H__
 
+#include <clc/opencl/opencl-base.h>
+#include <clc/opencl/types.h>
+
 #define __CLC_FUNCTION atomic_fetch_max
 
 #define __CLC_BODY <clc/opencl/atomic/atomic_decl.inc>
diff --git a/libclc/opencl/include/clc/opencl/atomic/atomic_fetch_min.h b/libclc/opencl/include/clc/opencl/atomic/atomic_fetch_min.h
index d7e346dc44368..0b79b5d9f9d18 100644
--- a/libclc/opencl/include/clc/opencl/atomic/atomic_fetch_min.h
+++ b/libclc/opencl/include/clc/opencl/atomic/atomic_fetch_min.h
@@ -9,6 +9,9 @@
 #ifndef __CLC_OPENCL_ATOMIC_ATOMIC_FETCH_MIN_H__
 #define __CLC_OPENCL_ATOMIC_ATOMIC_FETCH_MIN_H__
 
+#include <clc/opencl/opencl-base.h>
+#include <clc/opencl/types.h>
+
 #define __CLC_FUNCTION atomic_fetch_min
 
 #define __CLC_BODY <clc/opencl/atomic/atomic_decl.inc>
diff --git a/libclc/opencl/include/clc/opencl/atomic/atomic_fetch_or.h b/libclc/opencl/include/clc/opencl/atomic/atomic_fetch_or.h
index aa00982e15a56..5928e15cc3f53 100644
--- a/libclc/opencl/include/clc/opencl/atomic/atomic_fetch_or.h
+++ b/libclc/opencl/include/clc/opencl/atomic/atomic_fetch_or.h
@@ -9,6 +9,9 @@
 #ifndef __CLC_OPENCL_ATOMIC_ATOMIC_FETCH_OR_H__
 #define __CLC_OPENCL_ATOMIC_ATOMIC_FETCH_OR_H__
 
+#include <clc/opencl/opencl-base.h>
+#include <clc/opencl/types.h>
+
 #define __CLC_FUNCTION atomic_fetch_or
 
 #define __CLC_BODY <clc/opencl/atomic/atomic_decl.inc>
diff --git a/libclc/opencl/include/clc/opencl/atomic/atomic_fetch_sub.h b/libclc/opencl/include/clc/opencl/atomic/atomic_fetch_sub.h
index 3d04ed7ba34f8..76e519f933121 100644
--- a/libclc/opencl/include/clc/opencl/atomic/atomic_fetch_sub.h
+++ b/libclc/opencl/include/clc/opencl/atomic/atomic_fetch_sub.h
@@ -9,6 +9,9 @@
 #ifndef __CLC_OPENCL_ATOMIC_ATOMIC_FETCH_SUB_H__
 #define __CLC_OPENCL_ATOMIC_ATOMIC_FETCH_SUB_H__
 
+#include <clc/opencl/opencl-base.h>
+#include <clc/opencl/types.h>
+
 #define __CLC_FUNCTION atomic_fetch_sub
 
 #define __CLC_BODY <clc/opencl/atomic/atomic_decl.inc>
diff --git a/libclc/opencl/include/clc/opencl/atomic/atomic_fetch_xor.h b/libclc/opencl/include/clc/opencl/atomic/atomic_fetch_xor.h
index 2cdff08069025..c0befd44eae20 100644
--- a/libclc/opencl/include/clc/opencl/atomic/atomic_fetch_xor.h
+++ b/libclc/opencl/include/clc/opencl/atomic/atomic_fetch_xor.h
@@ -9,6 +9,9 @@
 #ifndef __CLC_OPENCL_ATOMIC_ATOMIC_FETCH_XOR_H__
 #define __CLC_OPENCL_ATOMIC_ATOMIC_FETCH_XOR_H__
 
+#include <clc/opencl/opencl-base.h>
+#include <clc/opencl/types.h>
+
 #define __CLC_FUNCTION atomic_fetch_xor
 
 #define __CLC_BODY <clc/opencl/atomic/atomic_decl.inc>
diff --git a/libclc/opencl/include/clc/opencl/atomic/atomic_load.h b/libclc/opencl/include/clc/opencl/atomic/atomic_load.h
index 7db259b136ec8..1aaa26bdecc9e 100644
--- a/libclc/opencl/include/clc/opencl/atomic/atomic_load.h
+++ b/libclc/opencl/include/clc/opencl/atomic/atomic_load.h
@@ -9,6 +9,9 @@
 #ifndef __CLC_OPENCL_ATOMIC_ATOMIC_LOAD_H__
 #define __CLC_OPENCL_ATOMIC_ATOMIC_LOAD_H__
 
+#include <clc/opencl/opencl-base.h>
+#include <clc/opencl/types.h>
+
 #define __CLC_FUNCTION atomic_load
 #define __CLC_NO_VALUE_ARG
 
diff --git a/libclc/opencl/include/clc/opencl/atomic/atomic_store.h b/libclc/opencl/include/clc/opencl/atomic/atomic_store.h
index b3cdfc6ffaeae..f754314918f82 100644
--- a/libclc/opencl/include/clc/opencl/atomic/atomic_store.h
+++ b/libclc/opencl/include/clc/opencl/atomic/atomic_store.h
@@ -9,6 +9,9 @@
 #ifndef __CLC_OPENCL_ATOMIC_ATOMIC_STORE_H__
 #define __CLC_OPENCL_ATOMIC_ATOMIC_STORE_H__
 
+#include <clc/opencl/opencl-base.h>
+#include <clc/opencl/types.h>
+
 #define __CLC_FUNCTION atomic_store
 #define __CLC_RETURN_VOID
 
diff --git a/libclc/opencl/include/clc/opencl/types.h b/libclc/opencl/include/clc/opencl/types.h
new file mode 100644
index 0000000000000..b1be88f21bdaa
--- /dev/null
+++ b/libclc/opencl/include/clc/opencl/types.h
@@ -0,0 +1,48 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_OPENCL_TYPES_H__
+#define __CLC_OPENCL_TYPES_H__
+
+// Copied from clang/lib/Headers/opencl-c-base.h
+
+typedef enum memory_scope {
+  memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM,
+  memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP,
+  memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE,
+#if defined(__opencl_c_atomic_scope_all_devices)
+  memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES,
+#if (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >= 202100)
+  memory_scope_all_devices = memory_scope_all_svm_devices,
+#endif // (__OPENCL_C_VERSION__ >= CL_VERSION_3_0 || __OPENCL_CPP_VERSION__ >=
+       // 202100)
+#endif // defined(__opencl_c_atomic_scope_all_devices)
+/**
+ * Subgroups have different requirements on forward progress, so just test
+ * all the relevant macros.
+ * CL 3.0 sub-groups "they are not guaranteed to make independent forward
+ * progress" KHR subgroups "Subgroups within a workgroup are independent, make
+ * forward progress with respect to each other"
+ */
+#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) ||                \
+    defined(__opencl_c_subgroups)
+  memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP
+#endif
+} memory_scope;
+
+typedef enum memory_order {
+  memory_order_relaxed = __ATOMIC_RELAXED,
+  memory_order_acquire = __ATOMIC_ACQUIRE,
+  memory_order_release = __ATOMIC_RELEASE,
+  memory_order_acq_rel = __ATOMIC_ACQ_REL,
+#if defined(__opencl_c_atomic_order_seq_cst)
+  memory_order_seq_cst = __ATOMIC_SEQ_CST
+#endif
+} memory_order;
+
+#endif // __CLC_OPENCL_TYPES_H__
diff --git a/libclc/opencl/include/clc/opencl/utils.h b/libclc/opencl/include/clc/opencl/utils.h
new file mode 100644
index 0000000000000..c677f82ebb67d
--- /dev/null
+++ b/libclc/opencl/include/clc/opencl/utils.h
@@ -0,0 +1,33 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_OPENCL_UTILS_H__
+#define __CLC_OPENCL_UTILS_H__
+
+#include <clc/internal/clc.h>
+#include <clc/opencl/types.h>
+
+static _CLC_INLINE int __opencl_get_clang_memory_scope(memory_scope scope) {
+  switch (scope) {
+  case __OPENCL_MEMORY_SCOPE_WORK_ITEM:
+    return __MEMORY_SCOPE_SINGLE;
+#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) ||                \
+    defined(__opencl_c_subgroups)
+  case __OPENCL_MEMORY_SCOPE_SUB_GROUP:
+    return __MEMORY_SCOPE_WVFRNT;
+#endif
+  case __OPENCL_MEMORY_SCOPE_WORK_GROUP:
+    return __MEMORY_SCOPE_WRKGRP;
+  case __OPENCL_MEMORY_SCOPE_DEVICE:
+    return __MEMORY_SCOPE_DEVICE;
+  default:
+    return __MEMORY_SCOPE_SYSTEM;
+  }
+}
+
+#endif // __CLC_OPENCL_UTILS_H__
diff --git a/libclc/opencl/lib/generic/atomic/atomic_compare_exchange_strong.cl b/libclc/opencl/lib/generic/atomic/atomic_compare_exchange_strong.cl
index 2c1f07d8ca485..c0ca3f8f7d332 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_compare_exchange_strong.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_compare_exchange_strong.cl
@@ -6,11 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#if defined(__opencl_c_atomic_order_seq_cst) &&                                \
-    defined(__opencl_c_atomic_scope_device)
-
 #include <clc/atomic/clc_atomic_compare_exchange.h>
 #include <clc/opencl/atomic/atomic_compare_exchange_strong.h>
+#include <clc/opencl/utils.h>
 
 #define __CLC_FUNCTION atomic_compare_exchange_strong
 #define __CLC_COMPARE_EXCHANGE
@@ -20,6 +18,3 @@
 
 #define __CLC_BODY <atomic_def.inc>
 #include <clc/math/gentype.inc>
-
-#endif // defined(__opencl_c_atomic_order_seq_cst) &&
-       // defined(__opencl_c_atomic_scope_device)
diff --git a/libclc/opencl/lib/generic/atomic/atomic_compare_exchange_weak.cl b/libclc/opencl/lib/generic/atomic/atomic_compare_exchange_weak.cl
index 69bdf37250f70..39768fb345714 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_compare_exchange_weak.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_compare_exchange_weak.cl
@@ -6,11 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#if defined(__opencl_c_atomic_order_seq_cst) &&                                \
-    defined(__opencl_c_atomic_scope_device)
-
 #include <clc/atomic/clc_atomic_compare_exchange.h>
 #include <clc/opencl/atomic/atomic_compare_exchange_weak.h>
+#include <clc/opencl/utils.h>
 
 #define __CLC_FUNCTION atomic_compare_exchange_weak
 #define __CLC_COMPARE_EXCHANGE
@@ -20,6 +18,3 @@
 
 #define __CLC_BODY <atomic_def.inc>
 #include <clc/math/gentype.inc>
-
-#endif // defined(__opencl_c_atomic_order_seq_cst) &&
-       // defined(__opencl_c_atomic_scope_device)
diff --git a/libclc/opencl/lib/generic/atomic/atomic_def.inc b/libclc/opencl/lib/generic/atomic/atomic_def.inc
index a4ccab5990888..99fb778a8b342 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_def.inc
+++ b/libclc/opencl/lib/generic/atomic/atomic_def.inc
@@ -12,7 +12,8 @@
                                  defined(cl_khr_int64_extended_atomics))
 #define __CLC_HAVE_64_ATOMIC
 #endif
-#if defined(__CLC_FPSIZE) && (__CLC_FPSIZE < 64 || defined(__CLC_HAVE_64_ATOMIC)
+#if defined(__CLC_FPSIZE) &&                                                   \
+    (__CLC_FPSIZE < 64 || defined(__CLC_HAVE_64_ATOMIC))
 #define __CLC_HAVE_FP_ATOMIC
 #endif
 #if defined(__CLC_GENSIZE) &&                                                  \
@@ -24,41 +25,134 @@
 
 #define __CLC_ATOMIC_GENTYPE __CLC_XCONCAT(atomic_, __CLC_GENTYPE)
 
+#define __CLC_FUNCTION_EXPLICIT __CLC_XCONCAT(__CLC_FUNCTION, _explicit)
+
 #ifdef __CLC_NO_VALUE_ARG
 #define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         \
-  _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION(                         \
-      volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr) {                          \
-    return __CLC_IMPL_FUNCTION((volatile ADDRSPACE __CLC_GENTYPE *)Ptr,        \
-                               __ATOMIC_SEQ_CST, __MEMORY_SCOPE_DEVICE);       \
+  _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION_EXPLICIT(                \
+      volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr, memory_order Order,        \
+      memory_scope Scope) {                                                    \
+    return __CLC_IMPL_FUNCTION((volatile ADDRSPACE __CLC_GENTYPE *)Ptr, Order, \
+                               __opencl_get_clang_memory_scope(Scope));        \
   }
 #elif defined(__CLC_RETURN_VOID)
 #define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         \
-  _CLC_OVERLOAD _CLC_DEF void __CLC_FUNCTION(                                  \
-      volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr, __CLC_GENTYPE Value) {     \
-    __CLC_IMPL_FUNCTION((volatile ADDRSPACE __CLC_GENTYPE *)Ptr, Value,        \
-                        __ATOMIC_SEQ_CST, __MEMORY_SCOPE_DEVICE);              \
+  _CLC_OVERLOAD _CLC_DEF void __CLC_FUNCTION_EXPLICIT(                         \
+      volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr, __CLC_GENTYPE Value,       \
+      memory_order Order, memory_scope Scope) {                                \
+    __CLC_IMPL_FUNCTION((volatile ADDRSPACE __CLC_GENTYPE *)Ptr, Value, Order, \
+                        __opencl_get_clang_memory_scope(Scope));               \
   }
 #elif defined(__CLC_COMPARE_EXCHANGE)
 #define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         \
-  _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION(                         \
+  _CLC_OVERLOAD _CLC_DEF bool __CLC_FUNCTION_EXPLICIT(                         \
       volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr,                            \
-      ADDRSPACE __CLC_GENTYPE *Expected, __CLC_GENTYPE Desired) {              \
+      ADDRSPACE __CLC_GENTYPE *Expected, __CLC_GENTYPE Desired,                \
+      memory_order Success, memory_order Failure, memory_scope Scope) {        \
     __CLC_GENTYPE Comparator = *Expected;                                      \
     __CLC_GENTYPE RetValue = __clc_atomic_compare_exchange(                    \
-        (volatile ADDRSPACE __CLC_GENTYPE *)Ptr, Comparator, Desired,          \
-        __ATOMIC_SEQ_CST, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE);            \
+        (volatile ADDRSPACE __CLC_GENTYPE *)Ptr, Comparator, Desired, Success, \
+        Failure, __opencl_get_clang_memory_scope(Scope));                      \
     if (Comparator != RetValue) {                                              \
       *Expected = RetValue;                                                    \
-      return true;                                                             \
+      return false;                                                            \
     }                                                                          \
-    return false;                                                              \
+    return true;                                                               \
+  }
+#else
+#define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         \
+  _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION_EXPLICIT(                \
+      volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr, __CLC_GENTYPE Value,       \
+      memory_order Order, memory_scope Scope) {                                \
+    return __CLC_IMPL_FUNCTION((volatile ADDRSPACE __CLC_GENTYPE *)Ptr, Value, \
+                               Order, __opencl_get_clang_memory_scope(Scope)); \
+  }
+#endif
+
+__CLC_DEFINE_ATOMIC(global)
+__CLC_DEFINE_ATOMIC(local)
+#if _CLC_GENERIC_AS_SUPPORTED
+__CLC_DEFINE_ATOMIC()
+#endif
+
+#undef __CLC_DEFINE_ATOMIC
+
+#if defined(__opencl_c_atomic_scope_device)
+
+#ifdef __CLC_NO_VALUE_ARG
+#define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         \
+  _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION_EXPLICIT(                \
+      volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr, memory_order Order) {      \
+    return __CLC_FUNCTION_EXPLICIT(Ptr, Order, __OPENCL_MEMORY_SCOPE_DEVICE);  \
+  }
+#elif defined(__CLC_RETURN_VOID)
+#define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         \
+  _CLC_OVERLOAD _CLC_DEF void __CLC_FUNCTION_EXPLICIT(                         \
+      volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr, __CLC_GENTYPE Value,       \
+      memory_order Order) {                                                    \
+    __CLC_FUNCTION_EXPLICIT(Ptr, Value, Order, __OPENCL_MEMORY_SCOPE_DEVICE);  \
+  }
+#elif defined(__CLC_COMPARE_EXCHANGE)
+#define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         \
+  _CLC_OVERLOAD _CLC_DEF bool __CLC_FUNCTION_EXPLICIT(                         \
+      volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr,                            \
+      ADDRSPACE __CLC_GENTYPE *Expected, __CLC_GENTYPE Desired,                \
+      memory_order Success, memory_order Failure) {                            \
+    return __CLC_FUNCTION_EXPLICIT(Ptr, Expected, Desired, Success, Failure,   \
+                                   __OPENCL_MEMORY_SCOPE_DEVICE);              \
+  }
+#else
+#define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         \
+  _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION_EXPLICIT(                \
+      volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr, __CLC_GENTYPE Value,       \
+      memory_order Order) {                                                    \
+    return __CLC_FUNCTION_EXPLICIT(Ptr, Value, Order,                          \
+                                   __OPENCL_MEMORY_SCOPE_DEVICE);              \
+  }
+#endif
+
+__CLC_DEFINE_ATOMIC(global)
+__CLC_DEFINE_ATOMIC(local)
+#if _CLC_GENERIC_AS_SUPPORTED
+__CLC_DEFINE_ATOMIC()
+#endif
+
+#undef __CLC_DEFINE_ATOMIC
+
+#endif // defined(__opencl_c_atomic_scope_device)
+
+#if defined(__opencl_c_atomic_order_seq_cst) &&                                \
+    defined(__opencl_c_atomic_scope_device)
+
+#ifdef __CLC_NO_VALUE_ARG
+#define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         \
+  _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION(                         \
+      volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr) {                          \
+    return __CLC_FUNCTION_EXPLICIT(Ptr, __ATOMIC_SEQ_CST,                      \
+                                   __OPENCL_MEMORY_SCOPE_DEVICE);              \
+  }
+#elif defined(__CLC_RETURN_VOID)
+#define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         \
+  _CLC_OVERLOAD _CLC_DEF void __CLC_FUNCTION(                                  \
+      volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr, __CLC_GENTYPE Value) {     \
+    __CLC_FUNCTION_EXPLICIT(Ptr, Value, __ATOMIC_SEQ_CST,                      \
+                            __OPENCL_MEMORY_SCOPE_DEVICE);                     \
+  }
+#elif defined(__CLC_COMPARE_EXCHANGE)
+#define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         \
+  _CLC_OVERLOAD _CLC_DEF bool __CLC_FUNCTION(                                  \
+      volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr,                            \
+      ADDRSPACE __CLC_GENTYPE *Expected, __CLC_GENTYPE Desired) {              \
+    return __CLC_FUNCTION_EXPLICIT(Ptr, Expected, Desired, __ATOMIC_SEQ_CST,   \
+                                   __ATOMIC_SEQ_CST,                           \
+                                   __OPENCL_MEMORY_SCOPE_DEVICE);              \
   }
 #else
 #define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         \
   _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION(                         \
       volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr, __CLC_GENTYPE Value) {     \
-    return __CLC_IMPL_FUNCTION((volatile ADDRSPACE __CLC_GENTYPE *)Ptr, Value, \
-                               __ATOMIC_SEQ_CST, __MEMORY_SCOPE_DEVICE);       \
+    return __CLC_FUNCTION_EXPLICIT(Ptr, Value, __ATOMIC_SEQ_CST,               \
+                                   __OPENCL_MEMORY_SCOPE_DEVICE);              \
   }
 #endif
 
@@ -70,6 +164,9 @@ __CLC_DEFINE_ATOMIC()
 
 #undef __CLC_DEFINE_ATOMIC
 
+#endif // defined(__opencl_c_atomic_order_seq_cst) &&
+       // defined(__opencl_c_atomic_scope_device)
+
 #endif // __CLC_HAVE_FP_ATOMIC || __CLC_HAVE_INT_ATOMIC
 
 #undef __CLC_HAVE_INT_ATOMIC
diff --git a/libclc/opencl/lib/generic/atomic/atomic_exchange.cl b/libclc/opencl/lib/generic/atomic/atomic_exchange.cl
index 5f7e2fa593e3f..f7568f6ace38c 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_exchange.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_exchange.cl
@@ -6,11 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#if defined(__opencl_c_atomic_order_seq_cst) &&                                \
-    defined(__opencl_c_atomic_scope_device)
-
 #include <clc/atomic/clc_atomic_exchange.h>
 #include <clc/opencl/atomic/atomic_exchange.h>
+#include <clc/opencl/utils.h>
 
 #define __CLC_FUNCTION atomic_exchange
 #define __CLC_IMPL_FUNCTION __clc_atomic_exchange
@@ -20,6 +18,3 @@
 
 #define __CLC_BODY <atomic_def.inc>
 #include <clc/math/gentype.inc>
-
-#endif // defined(__opencl_c_atomic_order_seq_cst) &&
-       // defined(__opencl_c_atomic_scope_device)
diff --git a/libclc/opencl/lib/generic/atomic/atomic_fetch_add.cl b/libclc/opencl/lib/generic/atomic/atomic_fetch_add.cl
index 0362ff89d1d78..d27cc7120ccce 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_fetch_add.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_fetch_add.cl
@@ -6,11 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#if defined(__opencl_c_atomic_order_seq_cst) &&                                \
-    defined(__opencl_c_atomic_scope_device)
-
 #include <clc/atomic/clc_atomic_fetch_add.h>
 #include <clc/opencl/atomic/atomic_fetch_add.h>
+#include <clc/opencl/utils.h>
 
 #define __CLC_FUNCTION atomic_fetch_add
 #define __CLC_IMPL_FUNCTION __clc_atomic_fetch_add
@@ -20,6 +18,3 @@
 
 #define __CLC_BODY <atomic_def.inc>
 #include <clc/math/gentype.inc>
-
-#endif // defined(__opencl_c_atomic_order_seq_cst) &&
-       // defined(__opencl_c_atomic_scope_device)
diff --git a/libclc/opencl/lib/generic/atomic/atomic_fetch_and.cl b/libclc/opencl/lib/generic/atomic/atomic_fetch_and.cl
index a1796f20c6e44..b8531722911cf 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_fetch_and.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_fetch_and.cl
@@ -6,17 +6,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#if defined(__opencl_c_atomic_order_seq_cst) &&                                \
-    defined(__opencl_c_atomic_scope_device)
-
 #include <clc/atomic/clc_atomic_fetch_and.h>
 #include <clc/opencl/atomic/atomic_fetch_and.h>
+#include <clc/opencl/utils.h>
 
 #define __CLC_FUNCTION atomic_fetch_and
 #define __CLC_IMPL_FUNCTION __clc_atomic_fetch_and
 
 #define __CLC_BODY <atomic_def.inc>
 #include <clc/integer/gentype.inc>
-
-#endif // defined(__opencl_c_atomic_order_seq_cst) &&
-       // defined(__opencl_c_atomic_scope_device)
diff --git a/libclc/opencl/lib/generic/atomic/atomic_fetch_max.cl b/libclc/opencl/lib/generic/atomic/atomic_fetch_max.cl
index 03b5d1d8ae7bd..b644ca336437a 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_fetch_max.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_fetch_max.cl
@@ -6,11 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#if defined(__opencl_c_atomic_order_seq_cst) &&                                \
-    defined(__opencl_c_atomic_scope_device)
-
 #include <clc/atomic/clc_atomic_fetch_max.h>
 #include <clc/opencl/atomic/atomic_fetch_max.h>
+#include <clc/opencl/utils.h>
 
 #define __CLC_FUNCTION atomic_fetch_max
 #define __CLC_IMPL_FUNCTION __clc_atomic_fetch_max
@@ -20,6 +18,3 @@
 
 #define __CLC_BODY <atomic_def.inc>
 #include <clc/math/gentype.inc>
-
-#endif // defined(__opencl_c_atomic_order_seq_cst) &&
-       // defined(__opencl_c_atomic_scope_device)
diff --git a/libclc/opencl/lib/generic/atomic/atomic_fetch_min.cl b/libclc/opencl/lib/generic/atomic/atomic_fetch_min.cl
index 60ffeff04cc6a..f24fcf329b6f2 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_fetch_min.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_fetch_min.cl
@@ -6,11 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#if defined(__opencl_c_atomic_order_seq_cst) &&                                \
-    defined(__opencl_c_atomic_scope_device)
-
 #include <clc/atomic/clc_atomic_fetch_min.h>
 #include <clc/opencl/atomic/atomic_fetch_min.h>
+#include <clc/opencl/utils.h>
 
 #define __CLC_FUNCTION atomic_fetch_min
 #define __CLC_IMPL_FUNCTION __clc_atomic_fetch_min
@@ -20,6 +18,3 @@
 
 #define __CLC_BODY <atomic_def.inc>
 #include <clc/math/gentype.inc>
-
-#endif // defined(__opencl_c_atomic_order_seq_cst) &&
-       // defined(__opencl_c_atomic_scope_device)
diff --git a/libclc/opencl/lib/generic/atomic/atomic_fetch_or.cl b/libclc/opencl/lib/generic/atomic/atomic_fetch_or.cl
index 8f4100bb150e3..1f6fe4cac090a 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_fetch_or.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_fetch_or.cl
@@ -6,17 +6,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#if defined(__opencl_c_atomic_order_seq_cst) &&                                \
-    defined(__opencl_c_atomic_scope_device)
-
 #include <clc/atomic/clc_atomic_fetch_or.h>
 #include <clc/opencl/atomic/atomic_fetch_or.h>
+#include <clc/opencl/utils.h>
 
 #define __CLC_FUNCTION atomic_fetch_or
 #define __CLC_IMPL_FUNCTION __clc_atomic_fetch_or
 
 #define __CLC_BODY <atomic_def.inc>
 #include <clc/integer/gentype.inc>
-
-#endif // defined(__opencl_c_atomic_order_seq_cst) &&
-       // defined(__opencl_c_atomic_scope_device)
diff --git a/libclc/opencl/lib/generic/atomic/atomic_fetch_sub.cl b/libclc/opencl/lib/generic/atomic/atomic_fetch_sub.cl
index ecb5b4315ee86..94323a2c0fcb6 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_fetch_sub.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_fetch_sub.cl
@@ -6,11 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#if defined(__opencl_c_atomic_order_seq_cst) &&                                \
-    defined(__opencl_c_atomic_scope_device)
-
 #include <clc/atomic/clc_atomic_fetch_sub.h>
 #include <clc/opencl/atomic/atomic_fetch_sub.h>
+#include <clc/opencl/utils.h>
 
 #define __CLC_FUNCTION atomic_fetch_sub
 #define __CLC_IMPL_FUNCTION __clc_atomic_fetch_sub
@@ -20,6 +18,3 @@
 
 #define __CLC_BODY <atomic_def.inc>
 #include <clc/math/gentype.inc>
-
-#endif // defined(__opencl_c_atomic_order_seq_cst) &&
-       // defined(__opencl_c_atomic_scope_device)
diff --git a/libclc/opencl/lib/generic/atomic/atomic_fetch_xor.cl b/libclc/opencl/lib/generic/atomic/atomic_fetch_xor.cl
index c49a55820c8d4..13e1db1124f9a 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_fetch_xor.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_fetch_xor.cl
@@ -6,17 +6,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#if defined(__opencl_c_atomic_order_seq_cst) &&                                \
-    defined(__opencl_c_atomic_scope_device)
-
 #include <clc/atomic/clc_atomic_fetch_xor.h>
 #include <clc/opencl/atomic/atomic_fetch_xor.h>
+#include <clc/opencl/utils.h>
 
 #define __CLC_FUNCTION atomic_fetch_xor
 #define __CLC_IMPL_FUNCTION __clc_atomic_fetch_xor
 
 #define __CLC_BODY <atomic_def.inc>
 #include <clc/integer/gentype.inc>
-
-#endif // defined(__opencl_c_atomic_order_seq_cst) &&
-       // defined(__opencl_c_atomic_scope_device)
diff --git a/libclc/opencl/lib/generic/atomic/atomic_load.cl b/libclc/opencl/lib/generic/atomic/atomic_load.cl
index e904330be0064..1b93ce84ea863 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_load.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_load.cl
@@ -6,11 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#if defined(__opencl_c_atomic_order_seq_cst) &&                                \
-    defined(__opencl_c_atomic_scope_device)
-
 #include <clc/atomic/clc_atomic_load.h>
 #include <clc/opencl/atomic/atomic_load.h>
+#include <clc/opencl/utils.h>
 
 #define __CLC_FUNCTION atomic_load
 #define __CLC_IMPL_FUNCTION __clc_atomic_load
@@ -21,6 +19,3 @@
 
 #define __CLC_BODY <atomic_def.inc>
 #include <clc/math/gentype.inc>
-
-#endif // defined(__opencl_c_atomic_order_seq_cst) &&
-       // defined(__opencl_c_atomic_scope_device)
diff --git a/libclc/opencl/lib/generic/atomic/atomic_store.cl b/libclc/opencl/lib/generic/atomic/atomic_store.cl
index 584e29ef99a5f..fcaa4d3128f7d 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_store.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_store.cl
@@ -6,11 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#if defined(__opencl_c_atomic_order_seq_cst) &&                                \
-    defined(__opencl_c_atomic_scope_device)
-
 #include <clc/atomic/clc_atomic_store.h>
 #include <clc/opencl/atomic/atomic_store.h>
+#include <clc/opencl/utils.h>
 
 #define __CLC_FUNCTION atomic_store
 #define __CLC_IMPL_FUNCTION __clc_atomic_store
@@ -21,6 +19,3 @@
 
 #define __CLC_BODY <atomic_def.inc>
 #include <clc/math/gentype.inc>
-
-#endif // defined(__opencl_c_atomic_order_seq_cst) &&
-       // defined(__opencl_c_atomic_scope_device)

>From 857b7951eca428ea5e1a5f3fa60109b53a45fd5d Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Tue, 18 Nov 2025 02:32:52 +0100
Subject: [PATCH 2/4] change default order to __ATOMIC_RELAXED

---
 libclc/opencl/lib/generic/atomic/atomic_def.inc     | 10 +++++-----
 libclc/opencl/lib/generic/atomic/atomic_inc_dec.inc |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/libclc/opencl/lib/generic/atomic/atomic_def.inc b/libclc/opencl/lib/generic/atomic/atomic_def.inc
index 99fb778a8b342..059f00f528425 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_def.inc
+++ b/libclc/opencl/lib/generic/atomic/atomic_def.inc
@@ -128,14 +128,14 @@ __CLC_DEFINE_ATOMIC()
 #define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         \
   _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION(                         \
       volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr) {                          \
-    return __CLC_FUNCTION_EXPLICIT(Ptr, __ATOMIC_SEQ_CST,                      \
+    return __CLC_FUNCTION_EXPLICIT(Ptr, __ATOMIC_RELAXED,                      \
                                    __OPENCL_MEMORY_SCOPE_DEVICE);              \
   }
 #elif defined(__CLC_RETURN_VOID)
 #define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         \
   _CLC_OVERLOAD _CLC_DEF void __CLC_FUNCTION(                                  \
       volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr, __CLC_GENTYPE Value) {     \
-    __CLC_FUNCTION_EXPLICIT(Ptr, Value, __ATOMIC_SEQ_CST,                      \
+    __CLC_FUNCTION_EXPLICIT(Ptr, Value, __ATOMIC_RELAXED,                      \
                             __OPENCL_MEMORY_SCOPE_DEVICE);                     \
   }
 #elif defined(__CLC_COMPARE_EXCHANGE)
@@ -143,15 +143,15 @@ __CLC_DEFINE_ATOMIC()
   _CLC_OVERLOAD _CLC_DEF bool __CLC_FUNCTION(                                  \
       volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr,                            \
       ADDRSPACE __CLC_GENTYPE *Expected, __CLC_GENTYPE Desired) {              \
-    return __CLC_FUNCTION_EXPLICIT(Ptr, Expected, Desired, __ATOMIC_SEQ_CST,   \
-                                   __ATOMIC_SEQ_CST,                           \
+    return __CLC_FUNCTION_EXPLICIT(Ptr, Expected, Desired, __ATOMIC_RELAXED,   \
+                                   __ATOMIC_RELAXED,                           \
                                    __OPENCL_MEMORY_SCOPE_DEVICE);              \
   }
 #else
 #define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         \
   _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION(                         \
       volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr, __CLC_GENTYPE Value) {     \
-    return __CLC_FUNCTION_EXPLICIT(Ptr, Value, __ATOMIC_SEQ_CST,               \
+    return __CLC_FUNCTION_EXPLICIT(Ptr, Value, __ATOMIC_RELAXED,               \
                                    __OPENCL_MEMORY_SCOPE_DEVICE);              \
   }
 #endif
diff --git a/libclc/opencl/lib/generic/atomic/atomic_inc_dec.inc b/libclc/opencl/lib/generic/atomic/atomic_inc_dec.inc
index 03eb5d1b33057..dd8dafb38c883 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_inc_dec.inc
+++ b/libclc/opencl/lib/generic/atomic/atomic_inc_dec.inc
@@ -13,7 +13,7 @@
 #define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         \
   _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION(                         \
       volatile ADDRSPACE __CLC_GENTYPE *Ptr) {                                 \
-    return __CLC_IMPL_FUNCTION(Ptr, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_DEVICE);  \
+    return __CLC_IMPL_FUNCTION(Ptr, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE);  \
   }
 
 __CLC_DEFINE_ATOMIC(global)

>From 5e2939eb1e6ababd7e45ef359dc62893a8f9f555 Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Tue, 18 Nov 2025 02:38:49 +0100
Subject: [PATCH 3/4] Revert "change default order to __ATOMIC_RELAXED"

This reverts commit 857b7951eca428ea5e1a5f3fa60109b53a45fd5d.
---
 libclc/opencl/lib/generic/atomic/atomic_def.inc     | 10 +++++-----
 libclc/opencl/lib/generic/atomic/atomic_inc_dec.inc |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/libclc/opencl/lib/generic/atomic/atomic_def.inc b/libclc/opencl/lib/generic/atomic/atomic_def.inc
index 059f00f528425..99fb778a8b342 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_def.inc
+++ b/libclc/opencl/lib/generic/atomic/atomic_def.inc
@@ -128,14 +128,14 @@ __CLC_DEFINE_ATOMIC()
 #define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         \
   _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION(                         \
       volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr) {                          \
-    return __CLC_FUNCTION_EXPLICIT(Ptr, __ATOMIC_RELAXED,                      \
+    return __CLC_FUNCTION_EXPLICIT(Ptr, __ATOMIC_SEQ_CST,                      \
                                    __OPENCL_MEMORY_SCOPE_DEVICE);              \
   }
 #elif defined(__CLC_RETURN_VOID)
 #define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         \
   _CLC_OVERLOAD _CLC_DEF void __CLC_FUNCTION(                                  \
       volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr, __CLC_GENTYPE Value) {     \
-    __CLC_FUNCTION_EXPLICIT(Ptr, Value, __ATOMIC_RELAXED,                      \
+    __CLC_FUNCTION_EXPLICIT(Ptr, Value, __ATOMIC_SEQ_CST,                      \
                             __OPENCL_MEMORY_SCOPE_DEVICE);                     \
   }
 #elif defined(__CLC_COMPARE_EXCHANGE)
@@ -143,15 +143,15 @@ __CLC_DEFINE_ATOMIC()
   _CLC_OVERLOAD _CLC_DEF bool __CLC_FUNCTION(                                  \
       volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr,                            \
       ADDRSPACE __CLC_GENTYPE *Expected, __CLC_GENTYPE Desired) {              \
-    return __CLC_FUNCTION_EXPLICIT(Ptr, Expected, Desired, __ATOMIC_RELAXED,   \
-                                   __ATOMIC_RELAXED,                           \
+    return __CLC_FUNCTION_EXPLICIT(Ptr, Expected, Desired, __ATOMIC_SEQ_CST,   \
+                                   __ATOMIC_SEQ_CST,                           \
                                    __OPENCL_MEMORY_SCOPE_DEVICE);              \
   }
 #else
 #define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         \
   _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION(                         \
       volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr, __CLC_GENTYPE Value) {     \
-    return __CLC_FUNCTION_EXPLICIT(Ptr, Value, __ATOMIC_RELAXED,               \
+    return __CLC_FUNCTION_EXPLICIT(Ptr, Value, __ATOMIC_SEQ_CST,               \
                                    __OPENCL_MEMORY_SCOPE_DEVICE);              \
   }
 #endif
diff --git a/libclc/opencl/lib/generic/atomic/atomic_inc_dec.inc b/libclc/opencl/lib/generic/atomic/atomic_inc_dec.inc
index dd8dafb38c883..03eb5d1b33057 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_inc_dec.inc
+++ b/libclc/opencl/lib/generic/atomic/atomic_inc_dec.inc
@@ -13,7 +13,7 @@
 #define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         \
   _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION(                         \
       volatile ADDRSPACE __CLC_GENTYPE *Ptr) {                                 \
-    return __CLC_IMPL_FUNCTION(Ptr, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE);  \
+    return __CLC_IMPL_FUNCTION(Ptr, __ATOMIC_SEQ_CST, __MEMORY_SCOPE_DEVICE);  \
   }
 
 __CLC_DEFINE_ATOMIC(global)

>From 808f3d8c358ed97706054c878969d71a801edba2 Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he at intel.com>
Date: Mon, 24 Nov 2025 08:12:43 +0100
Subject: [PATCH 4/4] remove volatile from CLC functions

---
 libclc/clc/include/clc/atomic/atomic_decl.inc      | 13 ++++++-------
 .../generic/atomic/clc_atomic_compare_exchange.inc |  4 ++--
 libclc/clc/lib/generic/atomic/clc_atomic_def.inc   | 14 ++++++--------
 libclc/opencl/lib/generic/atomic/atom_add.cl       |  6 +++---
 libclc/opencl/lib/generic/atomic/atom_and.cl       |  6 +++---
 libclc/opencl/lib/generic/atomic/atom_cmpxchg.cl   |  8 ++++----
 libclc/opencl/lib/generic/atomic/atom_dec.cl       |  6 +++---
 libclc/opencl/lib/generic/atomic/atom_inc.cl       |  6 +++---
 libclc/opencl/lib/generic/atomic/atom_max.cl       |  6 +++---
 libclc/opencl/lib/generic/atomic/atom_min.cl       |  6 +++---
 libclc/opencl/lib/generic/atomic/atom_or.cl        |  6 +++---
 libclc/opencl/lib/generic/atomic/atom_sub.cl       |  6 +++---
 libclc/opencl/lib/generic/atomic/atom_xchg.cl      |  6 +++---
 libclc/opencl/lib/generic/atomic/atom_xor.cl       |  6 +++---
 libclc/opencl/lib/generic/atomic/atomic_add.cl     |  2 +-
 libclc/opencl/lib/generic/atomic/atomic_and.cl     |  2 +-
 libclc/opencl/lib/generic/atomic/atomic_cmpxchg.cl |  4 ++--
 .../atomic/atomic_compare_exchange_strong.cl       |  1 +
 .../generic/atomic/atomic_compare_exchange_weak.cl |  1 +
 libclc/opencl/lib/generic/atomic/atomic_def.inc    | 14 +++++++-------
 .../opencl/lib/generic/atomic/atomic_inc_dec.inc   |  3 ++-
 libclc/opencl/lib/generic/atomic/atomic_max.cl     | 12 ++++++------
 libclc/opencl/lib/generic/atomic/atomic_min.cl     | 12 ++++++------
 libclc/opencl/lib/generic/atomic/atomic_or.cl      |  2 +-
 libclc/opencl/lib/generic/atomic/atomic_sub.cl     |  2 +-
 libclc/opencl/lib/generic/atomic/atomic_xchg.cl    |  2 +-
 libclc/opencl/lib/generic/atomic/atomic_xor.cl     |  2 +-
 27 files changed, 79 insertions(+), 79 deletions(-)

diff --git a/libclc/clc/include/clc/atomic/atomic_decl.inc b/libclc/clc/include/clc/atomic/atomic_decl.inc
index 5e0f456e34009..87ebe2b355e69 100644
--- a/libclc/clc/include/clc/atomic/atomic_decl.inc
+++ b/libclc/clc/include/clc/atomic/atomic_decl.inc
@@ -15,24 +15,23 @@
 #ifdef __CLC_NO_VALUE_ARG
 #define __CLC_DECLARE_ATOMIC(ADDRSPACE)                                        \
   _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(                        \
-      volatile ADDRSPACE __CLC_GENTYPE *Ptr, int MemoryOrder,                  \
-      int MemoryScope);
+      ADDRSPACE __CLC_GENTYPE *Ptr, int MemoryOrder, int MemoryScope);
 #elif defined(__CLC_RETURN_VOID)
 #define __CLC_DECLARE_ATOMIC(ADDRSPACE)                                        \
   _CLC_OVERLOAD _CLC_DECL void __CLC_FUNCTION(                                 \
-      volatile ADDRSPACE __CLC_GENTYPE *Ptr, __CLC_GENTYPE Value,              \
-      int MemoryOrder, int MemoryScope);
+      ADDRSPACE __CLC_GENTYPE *Ptr, __CLC_GENTYPE Value, int MemoryOrder,      \
+      int MemoryScope);
 #elif defined(__CLC_COMPARE_EXCHANGE)
 #define __CLC_DECLARE_ATOMIC(ADDRSPACE)                                        \
   _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(                        \
-      volatile ADDRSPACE __CLC_GENTYPE *Ptr, __CLC_GENTYPE Comparator,         \
+      ADDRSPACE __CLC_GENTYPE *Ptr, __CLC_GENTYPE Comparator,                  \
       __CLC_GENTYPE Value, int MemoryOrderEqual, int MemoryOrderUnequal,       \
       int MemoryScope);
 #else
 #define __CLC_DECLARE_ATOMIC(ADDRSPACE)                                        \
   _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(                        \
-      volatile ADDRSPACE __CLC_GENTYPE *Ptr, __CLC_GENTYPE Value,              \
-      int MemoryOrder, int MemoryScope);
+      ADDRSPACE __CLC_GENTYPE *Ptr, __CLC_GENTYPE Value, int MemoryOrder,      \
+      int MemoryScope);
 #endif
 
 __CLC_DECLARE_ATOMIC(global)
diff --git a/libclc/clc/lib/generic/atomic/clc_atomic_compare_exchange.inc b/libclc/clc/lib/generic/atomic/clc_atomic_compare_exchange.inc
index 74284fd61024c..07ef69d426768 100644
--- a/libclc/clc/lib/generic/atomic/clc_atomic_compare_exchange.inc
+++ b/libclc/clc/lib/generic/atomic/clc_atomic_compare_exchange.inc
@@ -25,7 +25,7 @@
 
 #define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         \
   _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_atomic_compare_exchange(          \
-      volatile ADDRSPACE __CLC_GENTYPE *Ptr, __CLC_GENTYPE Comparator,         \
+      ADDRSPACE __CLC_GENTYPE *Ptr, __CLC_GENTYPE Comparator,                  \
       __CLC_GENTYPE Value, int MemoryOrderEqual, int MemoryOrderUnequal,       \
       int MemoryScope) {                                                       \
     __CLC_U_GENTYPE Comp = __CLC_AS_U_GENTYPE(Comparator);                     \
@@ -39,7 +39,7 @@
 
 #define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         \
   _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_atomic_compare_exchange(          \
-      volatile ADDRSPACE __CLC_GENTYPE *Ptr, __CLC_GENTYPE Comparator,         \
+      ADDRSPACE __CLC_GENTYPE *Ptr, __CLC_GENTYPE Comparator,                  \
       __CLC_GENTYPE Value, int MemoryOrderEqual, int MemoryOrderUnequal,       \
       int MemoryScope) {                                                       \
     __scoped_atomic_compare_exchange_n(Ptr, &Comparator, Value, false,         \
diff --git a/libclc/clc/lib/generic/atomic/clc_atomic_def.inc b/libclc/clc/lib/generic/atomic/clc_atomic_def.inc
index 75561430b33ad..ab56cc783da00 100644
--- a/libclc/clc/lib/generic/atomic/clc_atomic_def.inc
+++ b/libclc/clc/lib/generic/atomic/clc_atomic_def.inc
@@ -36,32 +36,30 @@
 #ifdef __CLC_NO_VALUE_ARG
 #define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         \
   _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION(                         \
-      volatile ADDRSPACE __CLC_GENTYPE *Ptr, int MemoryOrder,                  \
-      int MemoryScope) {                                                       \
+      ADDRSPACE __CLC_GENTYPE *Ptr, int MemoryOrder, int MemoryScope) {        \
     return __CLC_AS_RETTYPE(__CLC_IMPL_FUNCTION(                               \
         (ADDRSPACE __CLC_CASTTYPE *)Ptr, MemoryOrder, MemoryScope));           \
   }
 #elif defined(__CLC_INC_DEC)
 #define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         \
   _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION(                         \
-      volatile ADDRSPACE __CLC_GENTYPE *Ptr, int MemoryOrder,                  \
-      int MemoryScope) {                                                       \
+      ADDRSPACE __CLC_GENTYPE *Ptr, int MemoryOrder, int MemoryScope) {        \
     return __CLC_IMPL_FUNCTION(Ptr, (__CLC_GENTYPE)1, MemoryOrder,             \
                                MemoryScope);                                   \
   }
 #elif defined(__CLC_RETURN_VOID)
 #define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         \
   _CLC_OVERLOAD _CLC_DEF void __CLC_FUNCTION(                                  \
-      volatile ADDRSPACE __CLC_GENTYPE *Ptr, __CLC_GENTYPE Value,              \
-      int MemoryOrder, int MemoryScope) {                                      \
+      ADDRSPACE __CLC_GENTYPE *Ptr, __CLC_GENTYPE Value, int MemoryOrder,      \
+      int MemoryScope) {                                                       \
     __CLC_IMPL_FUNCTION((ADDRSPACE __CLC_CASTTYPE *)Ptr,                       \
                         __CLC_AS_CASTTYPE(Value), MemoryOrder, MemoryScope);   \
   }
 #else
 #define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         \
   _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION(                         \
-      volatile ADDRSPACE __CLC_GENTYPE *Ptr, __CLC_GENTYPE Value,              \
-      int MemoryOrder, int MemoryScope) {                                      \
+      ADDRSPACE __CLC_GENTYPE *Ptr, __CLC_GENTYPE Value, int MemoryOrder,      \
+      int MemoryScope) {                                                       \
     return __CLC_AS_RETTYPE(__CLC_IMPL_FUNCTION(                               \
         (ADDRSPACE __CLC_CASTTYPE *)Ptr, __CLC_AS_CASTTYPE(Value),             \
         MemoryOrder, MemoryScope));                                            \
diff --git a/libclc/opencl/lib/generic/atomic/atom_add.cl b/libclc/opencl/lib/generic/atomic/atom_add.cl
index 368bbb790fd88..1628ea39de434 100644
--- a/libclc/opencl/lib/generic/atomic/atom_add.cl
+++ b/libclc/opencl/lib/generic/atomic/atom_add.cl
@@ -12,12 +12,12 @@
 // Non-volatile overloads are for backward compatibility with OpenCL 1.0.
 
 #define __CLC_IMPL(AS, TYPE)                                                   \
-  _CLC_OVERLOAD _CLC_DEF TYPE atom_add(volatile AS TYPE *p, TYPE val) {        \
+  _CLC_OVERLOAD _CLC_DEF TYPE atom_add(AS TYPE *p, TYPE val) {                 \
     return __clc_atomic_fetch_add(p, val, __ATOMIC_RELAXED,                    \
                                   __MEMORY_SCOPE_DEVICE);                      \
   }                                                                            \
-  _CLC_OVERLOAD _CLC_DEF TYPE atom_add(AS TYPE *p, TYPE val) {                 \
-    return atom_add((volatile AS TYPE *)p, val);                               \
+  _CLC_OVERLOAD _CLC_DEF TYPE atom_add(volatile AS TYPE *p, TYPE val) {        \
+    return atom_add((AS TYPE *)p, val);                                        \
   }
 
 #ifdef cl_khr_global_int32_base_atomics
diff --git a/libclc/opencl/lib/generic/atomic/atom_and.cl b/libclc/opencl/lib/generic/atomic/atom_and.cl
index ffcc5bffaafac..bad85046c12e5 100644
--- a/libclc/opencl/lib/generic/atomic/atom_and.cl
+++ b/libclc/opencl/lib/generic/atomic/atom_and.cl
@@ -12,12 +12,12 @@
 // Non-volatile overloads are for backward compatibility with OpenCL 1.0.
 
 #define __CLC_IMPL(AS, TYPE)                                                   \
-  _CLC_OVERLOAD _CLC_DEF TYPE atom_and(volatile AS TYPE *p, TYPE val) {        \
+  _CLC_OVERLOAD _CLC_DEF TYPE atom_and(AS TYPE *p, TYPE val) {                 \
     return __clc_atomic_fetch_and(p, val, __ATOMIC_RELAXED,                    \
                                   __MEMORY_SCOPE_DEVICE);                      \
   }                                                                            \
-  _CLC_OVERLOAD _CLC_DEF TYPE atom_and(AS TYPE *p, TYPE val) {                 \
-    return atom_and((volatile AS TYPE *)p, val);                               \
+  _CLC_OVERLOAD _CLC_DEF TYPE atom_and(volatile AS TYPE *p, TYPE val) {        \
+    return atom_and((AS TYPE *)p, val);                                        \
   }
 
 #ifdef cl_khr_global_int32_extended_atomics
diff --git a/libclc/opencl/lib/generic/atomic/atom_cmpxchg.cl b/libclc/opencl/lib/generic/atomic/atom_cmpxchg.cl
index 2e72ec529c45e..41ae9328e9480 100644
--- a/libclc/opencl/lib/generic/atomic/atom_cmpxchg.cl
+++ b/libclc/opencl/lib/generic/atomic/atom_cmpxchg.cl
@@ -12,14 +12,14 @@
 // Non-volatile overloads are for backward compatibility with OpenCL 1.0.
 
 #define __CLC_IMPL(AS, TYPE)                                                   \
-  _CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(volatile AS TYPE *p, TYPE cmp,      \
-                                           TYPE val) {                         \
+  _CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(AS TYPE *p, TYPE cmp, TYPE val) {   \
     return __clc_atomic_compare_exchange(p, cmp, val, __ATOMIC_RELAXED,        \
                                          __ATOMIC_RELAXED,                     \
                                          __MEMORY_SCOPE_DEVICE);               \
   }                                                                            \
-  _CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(AS TYPE *p, TYPE cmp, TYPE val) {   \
-    return atom_cmpxchg((volatile AS TYPE *)p, cmp, val);                      \
+  _CLC_OVERLOAD _CLC_DEF TYPE atom_cmpxchg(volatile AS TYPE *p, TYPE cmp,      \
+                                           TYPE val) {                         \
+    return atom_cmpxchg((AS TYPE *)p, cmp, val);                               \
   }
 
 #ifdef cl_khr_global_int32_base_atomics
diff --git a/libclc/opencl/lib/generic/atomic/atom_dec.cl b/libclc/opencl/lib/generic/atomic/atom_dec.cl
index a1c7e58ef9e03..1f81f1d37b510 100644
--- a/libclc/opencl/lib/generic/atomic/atom_dec.cl
+++ b/libclc/opencl/lib/generic/atomic/atom_dec.cl
@@ -12,11 +12,11 @@
 // Non-volatile overloads are for backward compatibility with OpenCL 1.0.
 
 #define __CLC_IMPL(AS, TYPE)                                                   \
-  _CLC_OVERLOAD _CLC_DEF TYPE atom_dec(volatile AS TYPE *p) {                  \
+  _CLC_OVERLOAD _CLC_DEF TYPE atom_dec(AS TYPE *p) {                           \
     return __clc_atomic_dec(p, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE);       \
   }                                                                            \
-  _CLC_OVERLOAD _CLC_DEF TYPE atom_dec(AS TYPE *p) {                           \
-    return atom_dec((volatile AS TYPE *)p);                                    \
+  _CLC_OVERLOAD _CLC_DEF TYPE atom_dec(volatile AS TYPE *p) {                  \
+    return atom_dec((AS TYPE *)p);                                             \
   }
 
 #ifdef cl_khr_global_int32_base_atomics
diff --git a/libclc/opencl/lib/generic/atomic/atom_inc.cl b/libclc/opencl/lib/generic/atomic/atom_inc.cl
index f3636d85693b8..5ae5bbb67e791 100644
--- a/libclc/opencl/lib/generic/atomic/atom_inc.cl
+++ b/libclc/opencl/lib/generic/atomic/atom_inc.cl
@@ -12,11 +12,11 @@
 // Non-volatile overloads are for backward compatibility with OpenCL 1.0.
 
 #define __CLC_IMPL(AS, TYPE)                                                   \
-  _CLC_OVERLOAD _CLC_DEF TYPE atom_inc(volatile AS TYPE *p) {                  \
+  _CLC_OVERLOAD _CLC_DEF TYPE atom_inc(AS TYPE *p) {                           \
     return __clc_atomic_inc(p, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE);       \
   }                                                                            \
-  _CLC_OVERLOAD _CLC_DEF TYPE atom_inc(AS TYPE *p) {                           \
-    return atom_inc((volatile AS TYPE *)p);                                    \
+  _CLC_OVERLOAD _CLC_DEF TYPE atom_inc(volatile AS TYPE *p) {                  \
+    return atom_inc((AS TYPE *)p);                                             \
   }
 
 #ifdef cl_khr_global_int32_base_atomics
diff --git a/libclc/opencl/lib/generic/atomic/atom_max.cl b/libclc/opencl/lib/generic/atomic/atom_max.cl
index c2095ec36ba1e..249f5efbd4491 100644
--- a/libclc/opencl/lib/generic/atomic/atom_max.cl
+++ b/libclc/opencl/lib/generic/atomic/atom_max.cl
@@ -12,12 +12,12 @@
 // Non-volatile overloads are for backward compatibility with OpenCL 1.0.
 
 #define __CLC_IMPL(AS, TYPE)                                                   \
-  _CLC_OVERLOAD _CLC_DEF TYPE atom_max(volatile AS TYPE *p, TYPE val) {        \
+  _CLC_OVERLOAD _CLC_DEF TYPE atom_max(AS TYPE *p, TYPE val) {                 \
     return __clc_atomic_fetch_max(p, val, __ATOMIC_RELAXED,                    \
                                   __MEMORY_SCOPE_DEVICE);                      \
   }                                                                            \
-  _CLC_OVERLOAD _CLC_DEF TYPE atom_max(AS TYPE *p, TYPE val) {                 \
-    return atom_max((volatile AS TYPE *)p, val);                               \
+  _CLC_OVERLOAD _CLC_DEF TYPE atom_max(volatile AS TYPE *p, TYPE val) {        \
+    return atom_max((AS TYPE *)p, val);                                        \
   }
 
 #ifdef cl_khr_global_int32_extended_atomics
diff --git a/libclc/opencl/lib/generic/atomic/atom_min.cl b/libclc/opencl/lib/generic/atomic/atom_min.cl
index 6360d018d1e90..029c601dd53af 100644
--- a/libclc/opencl/lib/generic/atomic/atom_min.cl
+++ b/libclc/opencl/lib/generic/atomic/atom_min.cl
@@ -12,12 +12,12 @@
 // Non-volatile overloads are for backward compatibility with OpenCL 1.0.
 
 #define __CLC_IMPL(AS, TYPE)                                                   \
-  _CLC_OVERLOAD _CLC_DEF TYPE atom_min(volatile AS TYPE *p, TYPE val) {        \
+  _CLC_OVERLOAD _CLC_DEF TYPE atom_min(AS TYPE *p, TYPE val) {                 \
     return __clc_atomic_fetch_min(p, val, __ATOMIC_RELAXED,                    \
                                   __MEMORY_SCOPE_DEVICE);                      \
   }                                                                            \
-  _CLC_OVERLOAD _CLC_DEF TYPE atom_min(AS TYPE *p, TYPE val) {                 \
-    return atom_min((volatile AS TYPE *)p, val);                               \
+  _CLC_OVERLOAD _CLC_DEF TYPE atom_min(volatile AS TYPE *p, TYPE val) {        \
+    return atom_min((AS TYPE *)p, val);                                        \
   }
 
 #ifdef cl_khr_global_int32_extended_atomics
diff --git a/libclc/opencl/lib/generic/atomic/atom_or.cl b/libclc/opencl/lib/generic/atomic/atom_or.cl
index ad28aa436de8c..91f745bdda61a 100644
--- a/libclc/opencl/lib/generic/atomic/atom_or.cl
+++ b/libclc/opencl/lib/generic/atomic/atom_or.cl
@@ -12,12 +12,12 @@
 // Non-volatile overloads are for backward compatibility with OpenCL 1.0.
 
 #define __CLC_IMPL(AS, TYPE)                                                   \
-  _CLC_OVERLOAD _CLC_DEF TYPE atom_or(volatile AS TYPE *p, TYPE val) {         \
+  _CLC_OVERLOAD _CLC_DEF TYPE atom_or(AS TYPE *p, TYPE val) {                  \
     return __clc_atomic_fetch_or(p, val, __ATOMIC_RELAXED,                     \
                                  __MEMORY_SCOPE_DEVICE);                       \
   }                                                                            \
-  _CLC_OVERLOAD _CLC_DEF TYPE atom_or(AS TYPE *p, TYPE val) {                  \
-    return atom_or((volatile AS TYPE *)p, val);                                \
+  _CLC_OVERLOAD _CLC_DEF TYPE atom_or(volatile AS TYPE *p, TYPE val) {         \
+    return atom_or((AS TYPE *)p, val);                                         \
   }
 
 #ifdef cl_khr_global_int32_extended_atomics
diff --git a/libclc/opencl/lib/generic/atomic/atom_sub.cl b/libclc/opencl/lib/generic/atomic/atom_sub.cl
index 9daaa1b3ce154..7eeabd51dad48 100644
--- a/libclc/opencl/lib/generic/atomic/atom_sub.cl
+++ b/libclc/opencl/lib/generic/atomic/atom_sub.cl
@@ -12,12 +12,12 @@
 // Non-volatile overloads are for backward compatibility with OpenCL 1.0.
 
 #define __CLC_IMPL(AS, TYPE)                                                   \
-  _CLC_OVERLOAD _CLC_DEF TYPE atom_sub(volatile AS TYPE *p, TYPE val) {        \
+  _CLC_OVERLOAD _CLC_DEF TYPE atom_sub(AS TYPE *p, TYPE val) {                 \
     return __clc_atomic_fetch_sub(p, val, __ATOMIC_RELAXED,                    \
                                   __MEMORY_SCOPE_DEVICE);                      \
   }                                                                            \
-  _CLC_OVERLOAD _CLC_DEF TYPE atom_sub(AS TYPE *p, TYPE val) {                 \
-    return atom_sub((volatile AS TYPE *)p, val);                               \
+  _CLC_OVERLOAD _CLC_DEF TYPE atom_sub(volatile AS TYPE *p, TYPE val) {        \
+    return atom_sub((AS TYPE *)p, val);                                        \
   }
 
 #ifdef cl_khr_global_int32_base_atomics
diff --git a/libclc/opencl/lib/generic/atomic/atom_xchg.cl b/libclc/opencl/lib/generic/atomic/atom_xchg.cl
index 5b75873f29760..770e4366de834 100644
--- a/libclc/opencl/lib/generic/atomic/atom_xchg.cl
+++ b/libclc/opencl/lib/generic/atomic/atom_xchg.cl
@@ -12,12 +12,12 @@
 // Non-volatile overloads are for backward compatibility with OpenCL 1.0.
 
 #define __CLC_IMPL(AS, TYPE)                                                   \
-  _CLC_OVERLOAD _CLC_DEF TYPE atom_xchg(volatile AS TYPE *p, TYPE val) {       \
+  _CLC_OVERLOAD _CLC_DEF TYPE atom_xchg(AS TYPE *p, TYPE val) {                \
     return __clc_atomic_exchange(p, val, __ATOMIC_RELAXED,                     \
                                  __MEMORY_SCOPE_DEVICE);                       \
   }                                                                            \
-  _CLC_OVERLOAD _CLC_DEF TYPE atom_xchg(AS TYPE *p, TYPE val) {                \
-    return atom_xchg((volatile AS TYPE *)p, val);                              \
+  _CLC_OVERLOAD _CLC_DEF TYPE atom_xchg(volatile AS TYPE *p, TYPE val) {       \
+    return atom_xchg((AS TYPE *)p, val);                                       \
   }
 
 #ifdef cl_khr_global_int32_base_atomics
diff --git a/libclc/opencl/lib/generic/atomic/atom_xor.cl b/libclc/opencl/lib/generic/atomic/atom_xor.cl
index 21aba01267e18..3fcf54e1c8832 100644
--- a/libclc/opencl/lib/generic/atomic/atom_xor.cl
+++ b/libclc/opencl/lib/generic/atomic/atom_xor.cl
@@ -12,12 +12,12 @@
 // Non-volatile overloads are for backward compatibility with OpenCL 1.0.
 
 #define __CLC_IMPL(AS, TYPE)                                                   \
-  _CLC_OVERLOAD _CLC_DEF TYPE atom_xor(volatile AS TYPE *p, TYPE val) {        \
+  _CLC_OVERLOAD _CLC_DEF TYPE atom_xor(AS TYPE *p, TYPE val) {                 \
     return __clc_atomic_fetch_xor(p, val, __ATOMIC_RELAXED,                    \
                                   __MEMORY_SCOPE_DEVICE);                      \
   }                                                                            \
-  _CLC_OVERLOAD _CLC_DEF TYPE atom_xor(AS TYPE *p, TYPE val) {                 \
-    return atom_xor((volatile AS TYPE *)p, val);                               \
+  _CLC_OVERLOAD _CLC_DEF TYPE atom_xor(volatile AS TYPE *p, TYPE val) {        \
+    return atom_xor((AS TYPE *)p, val);                                        \
   }
 
 #ifdef cl_khr_global_int32_extended_atomics
diff --git a/libclc/opencl/lib/generic/atomic/atomic_add.cl b/libclc/opencl/lib/generic/atomic/atomic_add.cl
index 5501d30544e7c..2f6606eb33338 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_add.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_add.cl
@@ -11,7 +11,7 @@
 
 #define __CLC_IMPL(TYPE, AS)                                                   \
   _CLC_OVERLOAD _CLC_DEF TYPE atomic_add(volatile AS TYPE *p, TYPE val) {      \
-    return __clc_atomic_fetch_add(p, val, __ATOMIC_RELAXED,                    \
+    return __clc_atomic_fetch_add((AS TYPE *)p, val, __ATOMIC_RELAXED,         \
                                   __MEMORY_SCOPE_DEVICE);                      \
   }
 
diff --git a/libclc/opencl/lib/generic/atomic/atomic_and.cl b/libclc/opencl/lib/generic/atomic/atomic_and.cl
index ce1adbb6f8235..3bb1f7a6af273 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_and.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_and.cl
@@ -11,7 +11,7 @@
 
 #define __CLC_IMPL(TYPE, AS)                                                   \
   _CLC_OVERLOAD _CLC_DEF TYPE atomic_and(volatile AS TYPE *p, TYPE val) {      \
-    return __clc_atomic_fetch_and(p, val, __ATOMIC_RELAXED,                    \
+    return __clc_atomic_fetch_and((AS TYPE *)p, val, __ATOMIC_RELAXED,         \
                                   __MEMORY_SCOPE_DEVICE);                      \
   }
 
diff --git a/libclc/opencl/lib/generic/atomic/atomic_cmpxchg.cl b/libclc/opencl/lib/generic/atomic/atomic_cmpxchg.cl
index 16a8db43e9374..135813b8b879c 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_cmpxchg.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_cmpxchg.cl
@@ -12,8 +12,8 @@
 #define __CLC_IMPL(TYPE, AS)                                                   \
   _CLC_OVERLOAD _CLC_DEF TYPE atomic_cmpxchg(volatile AS TYPE *p, TYPE cmp,    \
                                              TYPE val) {                       \
-    return __clc_atomic_compare_exchange(p, cmp, val, __ATOMIC_RELAXED,        \
-                                         __ATOMIC_RELAXED,                     \
+    return __clc_atomic_compare_exchange((AS TYPE *)p, cmp, val,               \
+                                         __ATOMIC_RELAXED, __ATOMIC_RELAXED,   \
                                          __MEMORY_SCOPE_DEVICE);               \
   }
 
diff --git a/libclc/opencl/lib/generic/atomic/atomic_compare_exchange_strong.cl b/libclc/opencl/lib/generic/atomic/atomic_compare_exchange_strong.cl
index c0ca3f8f7d332..f113e82285a02 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_compare_exchange_strong.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_compare_exchange_strong.cl
@@ -11,6 +11,7 @@
 #include <clc/opencl/utils.h>
 
 #define __CLC_FUNCTION atomic_compare_exchange_strong
+#define __CLC_IMPL_FUNCTION __clc_atomic_compare_exchange
 #define __CLC_COMPARE_EXCHANGE
 
 #define __CLC_BODY <atomic_def.inc>
diff --git a/libclc/opencl/lib/generic/atomic/atomic_compare_exchange_weak.cl b/libclc/opencl/lib/generic/atomic/atomic_compare_exchange_weak.cl
index 39768fb345714..f5c2899bb18ed 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_compare_exchange_weak.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_compare_exchange_weak.cl
@@ -11,6 +11,7 @@
 #include <clc/opencl/utils.h>
 
 #define __CLC_FUNCTION atomic_compare_exchange_weak
+#define __CLC_IMPL_FUNCTION __clc_atomic_compare_exchange
 #define __CLC_COMPARE_EXCHANGE
 
 #define __CLC_BODY <atomic_def.inc>
diff --git a/libclc/opencl/lib/generic/atomic/atomic_def.inc b/libclc/opencl/lib/generic/atomic/atomic_def.inc
index 99fb778a8b342..d13ec24db087d 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_def.inc
+++ b/libclc/opencl/lib/generic/atomic/atomic_def.inc
@@ -32,7 +32,7 @@
   _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION_EXPLICIT(                \
       volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr, memory_order Order,        \
       memory_scope Scope) {                                                    \
-    return __CLC_IMPL_FUNCTION((volatile ADDRSPACE __CLC_GENTYPE *)Ptr, Order, \
+    return __CLC_IMPL_FUNCTION((ADDRSPACE __CLC_GENTYPE *)Ptr, Order,          \
                                __opencl_get_clang_memory_scope(Scope));        \
   }
 #elif defined(__CLC_RETURN_VOID)
@@ -40,7 +40,7 @@
   _CLC_OVERLOAD _CLC_DEF void __CLC_FUNCTION_EXPLICIT(                         \
       volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr, __CLC_GENTYPE Value,       \
       memory_order Order, memory_scope Scope) {                                \
-    __CLC_IMPL_FUNCTION((volatile ADDRSPACE __CLC_GENTYPE *)Ptr, Value, Order, \
+    __CLC_IMPL_FUNCTION((ADDRSPACE __CLC_GENTYPE *)Ptr, Value, Order,          \
                         __opencl_get_clang_memory_scope(Scope));               \
   }
 #elif defined(__CLC_COMPARE_EXCHANGE)
@@ -50,9 +50,9 @@
       ADDRSPACE __CLC_GENTYPE *Expected, __CLC_GENTYPE Desired,                \
       memory_order Success, memory_order Failure, memory_scope Scope) {        \
     __CLC_GENTYPE Comparator = *Expected;                                      \
-    __CLC_GENTYPE RetValue = __clc_atomic_compare_exchange(                    \
-        (volatile ADDRSPACE __CLC_GENTYPE *)Ptr, Comparator, Desired, Success, \
-        Failure, __opencl_get_clang_memory_scope(Scope));                      \
+    __CLC_GENTYPE RetValue = __CLC_IMPL_FUNCTION(                              \
+        (ADDRSPACE __CLC_GENTYPE *)Ptr, Comparator, Desired, Success, Failure, \
+        __opencl_get_clang_memory_scope(Scope));                               \
     if (Comparator != RetValue) {                                              \
       *Expected = RetValue;                                                    \
       return false;                                                            \
@@ -64,8 +64,8 @@
   _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION_EXPLICIT(                \
       volatile ADDRSPACE __CLC_ATOMIC_GENTYPE *Ptr, __CLC_GENTYPE Value,       \
       memory_order Order, memory_scope Scope) {                                \
-    return __CLC_IMPL_FUNCTION((volatile ADDRSPACE __CLC_GENTYPE *)Ptr, Value, \
-                               Order, __opencl_get_clang_memory_scope(Scope)); \
+    return __CLC_IMPL_FUNCTION((ADDRSPACE __CLC_GENTYPE *)Ptr, Value, Order,   \
+                               __opencl_get_clang_memory_scope(Scope));        \
   }
 #endif
 
diff --git a/libclc/opencl/lib/generic/atomic/atomic_inc_dec.inc b/libclc/opencl/lib/generic/atomic/atomic_inc_dec.inc
index dd8dafb38c883..fe517ef7a7de7 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_inc_dec.inc
+++ b/libclc/opencl/lib/generic/atomic/atomic_inc_dec.inc
@@ -13,7 +13,8 @@
 #define __CLC_DEFINE_ATOMIC(ADDRSPACE)                                         \
   _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __CLC_FUNCTION(                         \
       volatile ADDRSPACE __CLC_GENTYPE *Ptr) {                                 \
-    return __CLC_IMPL_FUNCTION(Ptr, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE);  \
+    return __CLC_IMPL_FUNCTION((ADDRSPACE __CLC_GENTYPE *)Ptr,                 \
+                               __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE);       \
   }
 
 __CLC_DEFINE_ATOMIC(global)
diff --git a/libclc/opencl/lib/generic/atomic/atomic_max.cl b/libclc/opencl/lib/generic/atomic/atomic_max.cl
index 362a0ed90ca0e..fe0efdbb55c82 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_max.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_max.cl
@@ -9,14 +9,14 @@
 #include <clc/atomic/clc_atomic_fetch_max.h>
 #include <clc/opencl/atomic/atomic_max.h>
 
-#define __CLC_IMPL(TYPE, AS, OP)                                               \
+#define __CLC_IMPL(TYPE, AS)                                                   \
   _CLC_OVERLOAD _CLC_DEF TYPE atomic_max(volatile AS TYPE *p, TYPE val) {      \
-    return __clc_atomic_fetch_max(p, val, __ATOMIC_RELAXED,                    \
+    return __clc_atomic_fetch_max((AS TYPE *)p, val, __ATOMIC_RELAXED,         \
                                   __MEMORY_SCOPE_DEVICE);                      \
   }
 
-__CLC_IMPL(int, global, max)
-__CLC_IMPL(unsigned int, global, umax)
-__CLC_IMPL(int, local, max)
-__CLC_IMPL(unsigned int, local, umax)
+__CLC_IMPL(int, global)
+__CLC_IMPL(unsigned int, global)
+__CLC_IMPL(int, local)
+__CLC_IMPL(unsigned int, local)
 #undef __CLC_IMPL
diff --git a/libclc/opencl/lib/generic/atomic/atomic_min.cl b/libclc/opencl/lib/generic/atomic/atomic_min.cl
index 1976be0014d70..6a4586b5d26b6 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_min.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_min.cl
@@ -9,14 +9,14 @@
 #include <clc/atomic/clc_atomic_fetch_min.h>
 #include <clc/opencl/atomic/atomic_min.h>
 
-#define __CLC_IMPL(TYPE, AS, OP)                                               \
+#define __CLC_IMPL(TYPE, AS)                                                   \
   _CLC_OVERLOAD _CLC_DEF TYPE atomic_min(volatile AS TYPE *p, TYPE val) {      \
-    return __clc_atomic_fetch_min(p, val, __ATOMIC_RELAXED,                    \
+    return __clc_atomic_fetch_min((AS TYPE *)p, val, __ATOMIC_RELAXED,         \
                                   __MEMORY_SCOPE_DEVICE);                      \
   }
 
-__CLC_IMPL(int, global, min)
-__CLC_IMPL(unsigned int, global, umin)
-__CLC_IMPL(int, local, min)
-__CLC_IMPL(unsigned int, local, umin)
+__CLC_IMPL(int, global)
+__CLC_IMPL(unsigned int, global)
+__CLC_IMPL(int, local)
+__CLC_IMPL(unsigned int, local)
 #undef __CLC_IMPL
diff --git a/libclc/opencl/lib/generic/atomic/atomic_or.cl b/libclc/opencl/lib/generic/atomic/atomic_or.cl
index ef8bc00f45593..1720b66b87fff 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_or.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_or.cl
@@ -11,7 +11,7 @@
 
 #define __CLC_IMPL(TYPE, AS)                                                   \
   _CLC_OVERLOAD _CLC_DEF TYPE atomic_or(volatile AS TYPE *p, TYPE val) {       \
-    return __clc_atomic_fetch_or(p, val, __ATOMIC_RELAXED,                     \
+    return __clc_atomic_fetch_or((AS TYPE *)p, val, __ATOMIC_RELAXED,          \
                                  __MEMORY_SCOPE_DEVICE);                       \
   }
 
diff --git a/libclc/opencl/lib/generic/atomic/atomic_sub.cl b/libclc/opencl/lib/generic/atomic/atomic_sub.cl
index 397737d113c0d..ddb7cc29d3bb0 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_sub.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_sub.cl
@@ -11,7 +11,7 @@
 
 #define __CLC_IMPL(TYPE, AS)                                                   \
   _CLC_OVERLOAD _CLC_DEF TYPE atomic_sub(volatile AS TYPE *p, TYPE val) {      \
-    return __clc_atomic_fetch_sub(p, val, __ATOMIC_RELAXED,                    \
+    return __clc_atomic_fetch_sub((AS TYPE *)p, val, __ATOMIC_RELAXED,         \
                                   __MEMORY_SCOPE_DEVICE);                      \
   }
 
diff --git a/libclc/opencl/lib/generic/atomic/atomic_xchg.cl b/libclc/opencl/lib/generic/atomic/atomic_xchg.cl
index 2b4bbf06d9400..8e0b87cc9343c 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_xchg.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_xchg.cl
@@ -11,7 +11,7 @@
 
 #define __CLC_IMPL(TYPE, AS)                                                   \
   _CLC_OVERLOAD _CLC_DEF TYPE atomic_xchg(volatile AS TYPE *p, TYPE val) {     \
-    return __clc_atomic_exchange(p, val, __ATOMIC_RELAXED,                     \
+    return __clc_atomic_exchange((AS TYPE *)p, val, __ATOMIC_RELAXED,          \
                                  __MEMORY_SCOPE_DEVICE);                       \
   }
 
diff --git a/libclc/opencl/lib/generic/atomic/atomic_xor.cl b/libclc/opencl/lib/generic/atomic/atomic_xor.cl
index 1f200c58edbff..46dc6c3e9111f 100644
--- a/libclc/opencl/lib/generic/atomic/atomic_xor.cl
+++ b/libclc/opencl/lib/generic/atomic/atomic_xor.cl
@@ -11,7 +11,7 @@
 
 #define __CLC_IMPL(TYPE, AS)                                                   \
   _CLC_OVERLOAD _CLC_DEF TYPE atomic_xor(volatile AS TYPE *p, TYPE val) {      \
-    return __clc_atomic_fetch_xor(p, val, __ATOMIC_RELAXED,                    \
+    return __clc_atomic_fetch_xor((AS TYPE *)p, val, __ATOMIC_RELAXED,         \
                                   __MEMORY_SCOPE_DEVICE);                      \
   }
 



More information about the cfe-commits mailing list