[libclc] libclc: Move subgroup functions into clc (PR #185220)

Matt Arsenault via cfe-commits cfe-commits at lists.llvm.org
Sun Mar 8 00:13:18 PST 2026


https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/185220

>From 363d14cf976ccc9ea844297c15cab370ba3f936c Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sat, 7 Mar 2026 19:14:18 +0100
Subject: [PATCH] libclc: Move subgroup functions into clc

It turns out there was a generic implementation of the id and sizes.
The practice of splitting every single function into its own file is
kind of a pain here, so introduce a utility header for amdgpu.
---
 libclc/clc/include/clc/amdgpu/amdgpu_utils.h  | 27 +++++++++
 .../clc/include/clc/subgroup/clc_subgroup.h   | 23 +++++++
 libclc/clc/lib/amdgcn/SOURCES                 |  4 ++
 libclc/clc/lib/amdgcn/subgroup/subgroup.cl    | 28 +++++++++
 .../workitem/clc_get_max_sub_group_size.cl    |  7 ++-
 .../amdgcn/workitem/clc_get_num_sub_groups.cl | 16 +++++
 .../amdgcn/workitem/clc_get_sub_group_id.cl   | 15 +++++
 .../amdgcn/workitem/clc_get_sub_group_size.cl | 18 ++++++
 libclc/opencl/lib/amdgcn/SOURCES              |  1 -
 libclc/opencl/lib/amdgcn/subgroup/subgroup.cl | 60 -------------------
 libclc/opencl/lib/generic/SOURCES             |  1 +
 .../opencl/lib/generic/subgroup/subgroup.cl   | 41 +++++++++++++
 12 files changed, 178 insertions(+), 63 deletions(-)
 create mode 100644 libclc/clc/include/clc/amdgpu/amdgpu_utils.h
 create mode 100644 libclc/clc/include/clc/subgroup/clc_subgroup.h
 create mode 100644 libclc/clc/lib/amdgcn/subgroup/subgroup.cl
 create mode 100644 libclc/clc/lib/amdgcn/workitem/clc_get_num_sub_groups.cl
 create mode 100644 libclc/clc/lib/amdgcn/workitem/clc_get_sub_group_id.cl
 create mode 100644 libclc/clc/lib/amdgcn/workitem/clc_get_sub_group_size.cl
 delete mode 100644 libclc/opencl/lib/amdgcn/subgroup/subgroup.cl
 create mode 100644 libclc/opencl/lib/generic/subgroup/subgroup.cl

diff --git a/libclc/clc/include/clc/amdgpu/amdgpu_utils.h b/libclc/clc/include/clc/amdgpu/amdgpu_utils.h
new file mode 100644
index 0000000000000..40c5d770c7bde
--- /dev/null
+++ b/libclc/clc/include/clc/amdgpu/amdgpu_utils.h
@@ -0,0 +1,27 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/integer/clc_mul24.h"
+#include "clc/workitem/clc_get_enqueued_local_size.h"
+#include "clc/workitem/clc_get_local_size.h"
+
+static inline uint __clc_amdgpu_workgroup_size() {
+  return __clc_mul24((uint)__clc_get_local_size(2),
+                     __clc_mul24((uint)__clc_get_local_size(1),
+                                 (uint)__clc_get_local_size(0)));
+}
+
+static inline uint __clc_amdgpu_enqueued_workgroup_size() {
+  return __clc_mul24((uint)__clc_get_enqueued_local_size(2),
+                     __clc_mul24((uint)__clc_get_enqueued_local_size(1),
+                                 (uint)__clc_get_enqueued_local_size(0)));
+}
+
+static inline uint __clc_amdgpu_wavesize_log2() {
+  return __builtin_amdgcn_wavefrontsize() == 64 ? 6 : 5;
+}
diff --git a/libclc/clc/include/clc/subgroup/clc_subgroup.h b/libclc/clc/include/clc/subgroup/clc_subgroup.h
new file mode 100644
index 0000000000000..f0a2a11d48445
--- /dev/null
+++ b/libclc/clc/include/clc/subgroup/clc_subgroup.h
@@ -0,0 +1,23 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_SUBGROUP_CLC_SUB_GROUP_SUBGROUP_H__
+#define __CLC_SUBGROUP_CLC_SUB_GROUP_SUBGROUP_H__
+
+#include "clc/internal/clc.h"
+
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_size(void);
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_max_sub_group_size(void);
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_num_sub_groups(void);
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_enqueued_num_sub_groups(void);
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_id(void);
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_local_id(void);
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST int __clc_sub_group_all(int x);
+_CLC_DECL _CLC_OVERLOAD _CLC_CONST int __clc_sub_group_any(int x);
+
+#endif // __CLC_SUBGROUP_CLC_SUB_GROUP_SUBGROUP_H__
diff --git a/libclc/clc/lib/amdgcn/SOURCES b/libclc/clc/lib/amdgcn/SOURCES
index a280461b1664a..a0b6c168b207e 100644
--- a/libclc/clc/lib/amdgcn/SOURCES
+++ b/libclc/clc/lib/amdgcn/SOURCES
@@ -1,6 +1,7 @@
 address_space/qualifier.cl
 math/clc_ldexp.cl
 mem_fence/clc_mem_fence.cl
+subgroup/subgroup.cl
 subgroup/sub_group_broadcast.cl
 synchronization/clc_sub_group_barrier.cl
 synchronization/clc_work_group_barrier.cl
@@ -12,4 +13,7 @@ workitem/clc_get_local_id.cl
 workitem/clc_get_local_size.cl
 workitem/clc_get_max_sub_group_size.cl
 workitem/clc_get_num_groups.cl
+workitem/clc_get_num_sub_groups.cl
+workitem/clc_get_sub_group_id.cl
+workitem/clc_get_sub_group_size.cl
 workitem/clc_get_work_dim.cl
diff --git a/libclc/clc/lib/amdgcn/subgroup/subgroup.cl b/libclc/clc/lib/amdgcn/subgroup/subgroup.cl
new file mode 100644
index 0000000000000..71f4abc42e895
--- /dev/null
+++ b/libclc/clc/lib/amdgcn/subgroup/subgroup.cl
@@ -0,0 +1,28 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/amdgpu/amdgpu_utils.h"
+#include "clc/subgroup/clc_subgroup.h"
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_enqueued_num_sub_groups(void) {
+  return (__clc_amdgpu_enqueued_workgroup_size() +
+          __builtin_amdgcn_wavefrontsize() - 1) >>
+         __clc_amdgpu_wavesize_log2();
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_local_id(void) {
+  return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST int __clc_sub_group_all(int x) {
+  return __builtin_amdgcn_ballot_w64(x) == __builtin_amdgcn_read_exec();
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST int __clc_sub_group_any(int x) {
+  return __builtin_amdgcn_ballot_w64(x) != 0;
+}
diff --git a/libclc/clc/lib/amdgcn/workitem/clc_get_max_sub_group_size.cl b/libclc/clc/lib/amdgcn/workitem/clc_get_max_sub_group_size.cl
index cc56f8d9c325d..7df7f21d9098f 100644
--- a/libclc/clc/lib/amdgcn/workitem/clc_get_max_sub_group_size.cl
+++ b/libclc/clc/lib/amdgcn/workitem/clc_get_max_sub_group_size.cl
@@ -6,8 +6,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <clc/workitem/clc_get_max_sub_group_size.h>
+#include "clc/amdgpu/amdgpu_utils.h"
+#include "clc/shared/clc_min.h"
+#include "clc/workitem/clc_get_max_sub_group_size.h"
 
 _CLC_OVERLOAD _CLC_DEF uint __clc_get_max_sub_group_size() {
-  return __builtin_amdgcn_wavefrontsize();
+  return __clc_min(__builtin_amdgcn_wavefrontsize(),
+                   __clc_amdgpu_enqueued_workgroup_size());
 }
diff --git a/libclc/clc/lib/amdgcn/workitem/clc_get_num_sub_groups.cl b/libclc/clc/lib/amdgcn/workitem/clc_get_num_sub_groups.cl
new file mode 100644
index 0000000000000..cb71ef282466b
--- /dev/null
+++ b/libclc/clc/lib/amdgcn/workitem/clc_get_num_sub_groups.cl
@@ -0,0 +1,16 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/amdgpu/amdgpu_utils.h"
+#include "clc/subgroup/clc_subgroup.h"
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_num_sub_groups(void) {
+  uint group_size = __clc_amdgpu_workgroup_size();
+  return (group_size + __builtin_amdgcn_wavefrontsize() - 1) >>
+         __clc_amdgpu_wavesize_log2();
+}
diff --git a/libclc/clc/lib/amdgcn/workitem/clc_get_sub_group_id.cl b/libclc/clc/lib/amdgcn/workitem/clc_get_sub_group_id.cl
new file mode 100644
index 0000000000000..ba3baf98bda14
--- /dev/null
+++ b/libclc/clc/lib/amdgcn/workitem/clc_get_sub_group_id.cl
@@ -0,0 +1,15 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/amdgpu/amdgpu_utils.h"
+#include "clc/workitem/clc_get_local_linear_id.h"
+#include "clc/workitem/clc_get_sub_group_id.h"
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_id(void) {
+  return (uint)__clc_get_local_linear_id() >> __clc_amdgpu_wavesize_log2();
+}
diff --git a/libclc/clc/lib/amdgcn/workitem/clc_get_sub_group_size.cl b/libclc/clc/lib/amdgcn/workitem/clc_get_sub_group_size.cl
new file mode 100644
index 0000000000000..77c9f8e91d8ee
--- /dev/null
+++ b/libclc/clc/lib/amdgcn/workitem/clc_get_sub_group_size.cl
@@ -0,0 +1,18 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/amdgpu/amdgpu_utils.h"
+#include "clc/shared/clc_min.h"
+#include "clc/workitem/clc_get_local_linear_id.h"
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint __clc_get_sub_group_size(void) {
+  uint wavesize = __builtin_amdgcn_wavefrontsize();
+  uint lid = (uint)__clc_get_local_linear_id();
+  return __clc_min(wavesize,
+                   __clc_amdgpu_workgroup_size() - (lid & ~(wavesize - 1)));
+}
diff --git a/libclc/opencl/lib/amdgcn/SOURCES b/libclc/opencl/lib/amdgcn/SOURCES
index 7010953d28100..78877425504d6 100644
--- a/libclc/opencl/lib/amdgcn/SOURCES
+++ b/libclc/opencl/lib/amdgcn/SOURCES
@@ -1,3 +1,2 @@
 async/wait_group_events.cl
 printf/__printf_alloc.cl
-subgroup/subgroup.cl
diff --git a/libclc/opencl/lib/amdgcn/subgroup/subgroup.cl b/libclc/opencl/lib/amdgcn/subgroup/subgroup.cl
deleted file mode 100644
index d67d84e763b4f..0000000000000
--- a/libclc/opencl/lib/amdgcn/subgroup/subgroup.cl
+++ /dev/null
@@ -1,60 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include <clc/opencl/opencl-base.h>
-
-static uint wavesize_log2() {
-  return __builtin_amdgcn_wavefrontsize() == 64 ? 6 : 5;
-}
-
-static uint workgroup_size() {
-  return mul24((uint)get_local_size(2),
-               mul24((uint)get_local_size(1), (uint)get_local_size(0)));
-}
-
-static uint enqueued_workgroup_size() {
-  return mul24((uint)get_enqueued_local_size(2),
-               mul24((uint)get_enqueued_local_size(1),
-                     (uint)get_enqueued_local_size(0)));
-}
-
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_size(void) {
-  uint wavesize = __builtin_amdgcn_wavefrontsize();
-  uint lid = (uint)get_local_linear_id();
-  return min(wavesize, workgroup_size() - (lid & ~(wavesize - 1)));
-}
-
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_max_sub_group_size(void) {
-  return min(__builtin_amdgcn_wavefrontsize(), enqueued_workgroup_size());
-}
-
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_num_sub_groups(void) {
-  return (workgroup_size() + __builtin_amdgcn_wavefrontsize() - 1) >>
-         wavesize_log2();
-}
-
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_enqueued_num_sub_groups(void) {
-  return (enqueued_workgroup_size() + __builtin_amdgcn_wavefrontsize() - 1) >>
-         wavesize_log2();
-}
-
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_id(void) {
-  return (uint)get_local_linear_id() >> wavesize_log2();
-}
-
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_local_id(void) {
-  return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
-}
-
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST int sub_group_all(int x) {
-  return __builtin_amdgcn_ballot_w64(x) == __builtin_amdgcn_read_exec();
-}
-
-_CLC_DEF _CLC_OVERLOAD _CLC_CONST int sub_group_any(int x) {
-  return __builtin_amdgcn_ballot_w64(x) != 0;
-}
diff --git a/libclc/opencl/lib/generic/SOURCES b/libclc/opencl/lib/generic/SOURCES
index 8e2df4a3e910a..f735c66548c30 100644
--- a/libclc/opencl/lib/generic/SOURCES
+++ b/libclc/opencl/lib/generic/SOURCES
@@ -201,6 +201,7 @@ shared/max.cl
 shared/min.cl
 shared/vload.cl
 shared/vstore.cl
+subgroup/subgroup.cl
 subgroup/sub_group_broadcast.cl
 synchronization/sub_group_barrier.cl
 synchronization/work_group_barrier.cl
diff --git a/libclc/opencl/lib/generic/subgroup/subgroup.cl b/libclc/opencl/lib/generic/subgroup/subgroup.cl
new file mode 100644
index 0000000000000..fd552ada4afaf
--- /dev/null
+++ b/libclc/opencl/lib/generic/subgroup/subgroup.cl
@@ -0,0 +1,41 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/subgroup/clc_subgroup.h"
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_size(void) {
+  return __clc_get_sub_group_size();
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_max_sub_group_size(void) {
+  return __clc_get_max_sub_group_size();
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_num_sub_groups(void) {
+  return __clc_get_num_sub_groups();
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_enqueued_num_sub_groups(void) {
+  return __clc_get_enqueued_num_sub_groups();
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_id(void) {
+  return __clc_get_sub_group_id();
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST uint get_sub_group_local_id(void) {
+  return __clc_get_sub_group_local_id();
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST int sub_group_all(int x) {
+  return __clc_sub_group_all(x);
+}
+
+_CLC_DEF _CLC_OVERLOAD _CLC_CONST int sub_group_any(int x) {
+  return __clc_sub_group_any(x);
+}



More information about the cfe-commits mailing list