[libclc] [libclc] Refine __clc_get_sub_group_size with fast full sub-group path (PR #163149)
Wenju He via cfe-commits
cfe-commits at lists.llvm.org
Sun Oct 19 19:15:53 PDT 2025
================
@@ -13,14 +13,11 @@
#include <clc/workitem/clc_get_sub_group_size.h>
_CLC_OVERLOAD _CLC_DEF uint __clc_get_sub_group_size() {
- if (__clc_get_sub_group_id() != __clc_get_num_sub_groups() - 1) {
- return __clc_get_max_sub_group_size();
- }
- size_t size_x = __clc_get_local_size(0);
- size_t size_y = __clc_get_local_size(1);
- size_t size_z = __clc_get_local_size(2);
- size_t linear_size = size_z * size_y * size_x;
- size_t uniform_groups = __clc_get_num_sub_groups() - 1;
- size_t uniform_size = __clc_get_max_sub_group_size() * uniform_groups;
- return linear_size - uniform_size;
+ size_t linear_size = __clc_get_local_size(0) * __clc_get_local_size(1) *
+ __clc_get_local_size(2);
+ uint remainder = linear_size % __clc_get_max_sub_group_size();
----------------
wenju-he wrote:
kindly ping
https://github.com/llvm/llvm-project/pull/163149
More information about the cfe-commits
mailing list