[llvm-branch-commits] [libclc] libclc: Add amdgpu subgroup functions (PR #184845)

Matt Arsenault via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu Mar 5 15:06:35 PST 2026


https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/184845

>From 1813143a1403ec4d0175f4e163e658a593713e3a Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 5 Mar 2026 15:53:22 +0100
Subject: [PATCH] libclc: Add amdgpu subgroup functions

---
 libclc/opencl/lib/amdgcn/SOURCES              |  1 +
 libclc/opencl/lib/amdgcn/subgroup/subgroup.cl | 74 +++++++++++++++++++
 2 files changed, 75 insertions(+)
 create mode 100644 libclc/opencl/lib/amdgcn/subgroup/subgroup.cl

diff --git a/libclc/opencl/lib/amdgcn/SOURCES b/libclc/opencl/lib/amdgcn/SOURCES
index 0522e13f5d3db..d73fc1c666af5 100644
--- a/libclc/opencl/lib/amdgcn/SOURCES
+++ b/libclc/opencl/lib/amdgcn/SOURCES
@@ -1,4 +1,5 @@
 mem_fence/fence.cl
+subgroup/subgroup.cl
 synchronization/barrier.cl
 workitem/get_global_offset.cl
 workitem/get_group_id.cl
diff --git a/libclc/opencl/lib/amdgcn/subgroup/subgroup.cl b/libclc/opencl/lib/amdgcn/subgroup/subgroup.cl
new file mode 100644
index 0000000000000..d56a873407711
--- /dev/null
+++ b/libclc/opencl/lib/amdgcn/subgroup/subgroup.cl
@@ -0,0 +1,74 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/opencl/opencl-base.h>
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+static uint wavesize_log2() {
+  return __builtin_amdgcn_wavefrontsize() == 64 ? 6 : 5;
+}
+
+static uint workgroup_size() {
+  return mul24((uint)get_local_size(2),
+               mul24((uint)get_local_size(1), (uint)get_local_size(0)));
+}
+
+static uint enqueued_workgroup_size() {
+  return mul24((uint)get_enqueued_local_size(2),
+               mul24((uint)get_enqueued_local_size(1),
+                     (uint)get_enqueued_local_size(0)));
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_size(void) {
+  uint wavesize = __builtin_amdgcn_wavefrontsize();
+  uint lid = (uint)get_local_linear_id();
+  return min(wavesize, workgroup_size() - (lid & ~(wavesize - 1)));
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_max_sub_group_size(void) {
+  return min(__builtin_amdgcn_wavefrontsize(), enqueued_workgroup_size());
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_num_sub_groups(void) {
+  return (workgroup_size() + __builtin_amdgcn_wavefrontsize() - 1) >>
+         wavesize_log2();
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_enqueued_num_sub_groups(void) {
+  return (enqueued_workgroup_size() + __builtin_amdgcn_wavefrontsize() - 1) >>
+         wavesize_log2();
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_id(void) {
+  return (uint)get_local_linear_id() >> wavesize_log2();
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_local_id(void) {
+  return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
+}
+
+_CLC_DEF _CLC_OVERLOAD void sub_group_barrier(cl_mem_fence_flags flags,
+                                              memory_scope scope) {
+  __builtin_amdgcn_wave_barrier();
+
+  if (flags)
+    atomic_work_item_fence(flags, memory_order_acq_rel, scope);
+}
+
+_CLC_DEF _CLC_OVERLOAD void sub_group_barrier(cl_mem_fence_flags flags) {
+  sub_group_barrier(flags, memory_scope_sub_group);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST int sub_group_all(int x) {
+  return __builtin_amdgcn_ballot_w64(x) == __builtin_amdgcn_read_exec();
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST int sub_group_any(int x) {
+  return __builtin_amdgcn_ballot_w64(x) != 0;
+}



More information about the llvm-branch-commits mailing list