[llvm-branch-commits] [libclc] libclc: Add amdgpu subgroup functions (PR #184845)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Mar 5 15:06:35 PST 2026
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/184845
>From 1813143a1403ec4d0175f4e163e658a593713e3a Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 5 Mar 2026 15:53:22 +0100
Subject: [PATCH] libclc: Add amdgpu subgroup functions
---
libclc/opencl/lib/amdgcn/SOURCES | 1 +
libclc/opencl/lib/amdgcn/subgroup/subgroup.cl | 74 +++++++++++++++++++
2 files changed, 75 insertions(+)
create mode 100644 libclc/opencl/lib/amdgcn/subgroup/subgroup.cl
diff --git a/libclc/opencl/lib/amdgcn/SOURCES b/libclc/opencl/lib/amdgcn/SOURCES
index 0522e13f5d3db..d73fc1c666af5 100644
--- a/libclc/opencl/lib/amdgcn/SOURCES
+++ b/libclc/opencl/lib/amdgcn/SOURCES
@@ -1,4 +1,5 @@
mem_fence/fence.cl
+subgroup/subgroup.cl
synchronization/barrier.cl
workitem/get_global_offset.cl
workitem/get_group_id.cl
diff --git a/libclc/opencl/lib/amdgcn/subgroup/subgroup.cl b/libclc/opencl/lib/amdgcn/subgroup/subgroup.cl
new file mode 100644
index 0000000000000..d56a873407711
--- /dev/null
+++ b/libclc/opencl/lib/amdgcn/subgroup/subgroup.cl
@@ -0,0 +1,74 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/opencl/opencl-base.h>
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+static uint wavesize_log2() {
+ return __builtin_amdgcn_wavefrontsize() == 64 ? 6 : 5;
+}
+
+static uint workgroup_size() {
+ return mul24((uint)get_local_size(2),
+ mul24((uint)get_local_size(1), (uint)get_local_size(0)));
+}
+
+static uint enqueued_workgroup_size() {
+ return mul24((uint)get_enqueued_local_size(2),
+ mul24((uint)get_enqueued_local_size(1),
+ (uint)get_enqueued_local_size(0)));
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_size(void) {
+ uint wavesize = __builtin_amdgcn_wavefrontsize();
+ uint lid = (uint)get_local_linear_id();
+ return min(wavesize, workgroup_size() - (lid & ~(wavesize - 1)));
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_max_sub_group_size(void) {
+ return min(__builtin_amdgcn_wavefrontsize(), enqueued_workgroup_size());
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_num_sub_groups(void) {
+ return (workgroup_size() + __builtin_amdgcn_wavefrontsize() - 1) >>
+ wavesize_log2();
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_enqueued_num_sub_groups(void) {
+ return (enqueued_workgroup_size() + __builtin_amdgcn_wavefrontsize() - 1) >>
+ wavesize_log2();
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_id(void) {
+ return (uint)get_local_linear_id() >> wavesize_log2();
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_local_id(void) {
+ return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
+}
+
+_CLC_DEF _CLC_OVERLOAD void sub_group_barrier(cl_mem_fence_flags flags,
+ memory_scope scope) {
+ __builtin_amdgcn_wave_barrier();
+
+ if (flags)
+ atomic_work_item_fence(flags, memory_order_acq_rel, scope);
+}
+
+_CLC_DEF _CLC_OVERLOAD void sub_group_barrier(cl_mem_fence_flags flags) {
+ sub_group_barrier(flags, memory_scope_sub_group);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST int sub_group_all(int x) {
+ return __builtin_amdgcn_ballot_w64(x) == __builtin_amdgcn_read_exec();
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST int sub_group_any(int x) {
+ return __builtin_amdgcn_ballot_w64(x) != 0;
+}
More information about the llvm-branch-commits
mailing list