[libclc] 1a32a41 - libclc: Add subgroup scan functions (#188828)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Mar 27 01:37:31 PDT 2026
Author: Matt Arsenault
Date: 2026-03-27T09:37:27+01:00
New Revision: 1a32a4185b84bba30474305cd915d1fbcfa1a352
URL: https://github.com/llvm/llvm-project/commit/1a32a4185b84bba30474305cd915d1fbcfa1a352
DIFF: https://github.com/llvm/llvm-project/commit/1a32a4185b84bba30474305cd915d1fbcfa1a352.diff
LOG: libclc: Add subgroup scan functions (#188828)
Add the base implementation using ds_swizzle which should work
on all subtargets. There are at least 2 more paths available for
newer targets.
Added:
libclc/clc/include/clc/subgroup/clc_sub_group_scan.h
libclc/clc/include/clc/subgroup/clc_sub_group_scan.inc
libclc/clc/lib/amdgpu/subgroup/clc_amdgpu_ds_swizzle.inc
libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.cl
libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.inc
libclc/opencl/lib/generic/subgroup/sub_group_scan_exclusive.cl
libclc/opencl/lib/generic/subgroup/sub_group_scan_exclusive.inc
libclc/opencl/lib/generic/subgroup/sub_group_scan_inclusive.cl
libclc/opencl/lib/generic/subgroup/sub_group_scan_inclusive.inc
Modified:
libclc/clc/include/clc/subgroup/clc_sub_group_broadcast.h
libclc/clc/lib/amdgpu/CMakeLists.txt
libclc/opencl/lib/generic/CMakeLists.txt
Removed:
################################################################################
diff --git a/libclc/clc/include/clc/subgroup/clc_sub_group_broadcast.h b/libclc/clc/include/clc/subgroup/clc_sub_group_broadcast.h
index d61f57860fe5b..e9fb566c54ef8 100644
--- a/libclc/clc/include/clc/subgroup/clc_sub_group_broadcast.h
+++ b/libclc/clc/include/clc/subgroup/clc_sub_group_broadcast.h
@@ -19,4 +19,6 @@
#define __CLC_BODY "clc/subgroup/clc_subgroup_broadcast.inc"
#include "clc/math/gentype.inc"
+#undef __CLC_FUNCTION
+
#endif // __CLC_SUBGROUP_CLC_SUB_GROUP_BROADCAST_H__
diff --git a/libclc/clc/include/clc/subgroup/clc_sub_group_scan.h b/libclc/clc/include/clc/subgroup/clc_sub_group_scan.h
new file mode 100644
index 0000000000000..a849ffa792758
--- /dev/null
+++ b/libclc/clc/include/clc/subgroup/clc_sub_group_scan.h
@@ -0,0 +1,20 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_SUBGROUP_CLC_SUB_GROUP_SCAN_H__
+#define __CLC_SUBGROUP_CLC_SUB_GROUP_SCAN_H__
+
+#include "clc/internal/clc.h"
+
+#define __CLC_BODY "clc/subgroup/clc_sub_group_scan.inc"
+#include "clc/integer/gentype.inc"
+
+#define __CLC_BODY "clc/subgroup/clc_sub_group_scan.inc"
+#include "clc/math/gentype.inc"
+
+#endif // __CLC_SUBGROUP_CLC_SUB_GROUP_SCAN_H__
diff --git a/libclc/clc/include/clc/subgroup/clc_sub_group_scan.inc b/libclc/clc/include/clc/subgroup/clc_sub_group_scan.inc
new file mode 100644
index 0000000000000..924da27782f32
--- /dev/null
+++ b/libclc/clc/include/clc/subgroup/clc_sub_group_scan.inc
@@ -0,0 +1,27 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifdef __CLC_SCALAR
+_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE
+__clc_sub_group_scan_inclusive_add(__CLC_GENTYPE x);
+
+_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE
+__clc_sub_group_scan_inclusive_min(__CLC_GENTYPE x);
+
+_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE
+__clc_sub_group_scan_inclusive_max(__CLC_GENTYPE x);
+
+_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE
+__clc_sub_group_scan_exclusive_add(__CLC_GENTYPE x);
+
+_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE
+__clc_sub_group_scan_exclusive_min(__CLC_GENTYPE x);
+
+_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE
+__clc_sub_group_scan_exclusive_max(__CLC_GENTYPE x);
+#endif
diff --git a/libclc/clc/lib/amdgpu/CMakeLists.txt b/libclc/clc/lib/amdgpu/CMakeLists.txt
index ea79b2294d991..186b6ab0b85b1 100644
--- a/libclc/clc/lib/amdgpu/CMakeLists.txt
+++ b/libclc/clc/lib/amdgpu/CMakeLists.txt
@@ -30,6 +30,7 @@ libclc_configure_source_list(CLC_AMDGPU_SOURCES
subgroup/clc_subgroup.cl
subgroup/clc_sub_group_broadcast.cl
subgroup/clc_sub_group_reduce.cl
+ subgroup/clc_sub_group_scan.cl
synchronization/clc_sub_group_barrier.cl
synchronization/clc_work_group_barrier.cl
workitem/clc_get_enqueued_local_size.cl
diff --git a/libclc/clc/lib/amdgpu/subgroup/clc_amdgpu_ds_swizzle.inc b/libclc/clc/lib/amdgpu/subgroup/clc_amdgpu_ds_swizzle.inc
new file mode 100644
index 0000000000000..2455b0698aae1
--- /dev/null
+++ b/libclc/clc/lib/amdgpu/subgroup/clc_amdgpu_ds_swizzle.inc
@@ -0,0 +1,60 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if defined(__CLC_SCALAR)
+
+#if (defined(__CLC_GENSIZE) && __CLC_GENSIZE <= 32) || \
+ defined(__CLC_FPSIZE) && __CLC_FPSIZE <= 32
+
+#define COERCE_FUNC(func, mask) \
+ static _CLC_OVERLOAD __CLC_GENTYPE func(__CLC_GENTYPE x) { \
+ __CLC_U_GENTYPE bitcast = __CLC_AS_U_GENTYPE(x); \
+ uint ext = __clc_convert_uint(bitcast); \
+ uint swizzle = __builtin_amdgcn_ds_swizzle(ext, mask); \
+ __CLC_U_GENTYPE trunc = __CLC_CONVERT_U_GENTYPE(swizzle); \
+ return __CLC_AS_GENTYPE(trunc); \
+ }
+
+//------------------------------------------------------------------------------
+// Swizzle masks used in inclusive scan
+//------------------------------------------------------------------------------
+
+COERCE_FUNC(__clc_amdgpu_ds_swizzle_bcastx2_lane0, SWIZZLE_BCASTX2_LANE0);
+COERCE_FUNC(__clc_amdgpu_ds_swizzle_bcastx4_lane1, SWIZZLE_BCASTX4_LANE1);
+COERCE_FUNC(__clc_amdgpu_ds_swizzle_bcastx8_lane3, SWIZZLE_BCASTX8_LANE3);
+COERCE_FUNC(__clc_amdgpu_ds_swizzle_bcastx16_lane7, SWIZZLE_BCASTX16_LANE7);
+COERCE_FUNC(__clc_amdgpu_ds_swizzle_bcastx32_lane15, SWIZZLE_BCASTX32_LANE15);
+
+//------------------------------------------------------------------------------
+// Swizzle masks used in exclusive scan adjustment
+//------------------------------------------------------------------------------
+
+COERCE_FUNC(__clc_amdgpu_ds_swizzle_quad_perm_shift_right1,
+ SWIZZLE_QUAD_PERM(0, 0, 1, 2));
+
+#elif defined(__CLC_GENSIZE) && __CLC_GENSIZE == 64 || \
+ defined(__CLC_FPSIZE) && __CLC_FPSIZE == 64
+
+#define SPLIT_FUNC_64(func_name) \
+ static _CLC_OVERLOAD __CLC_GENTYPE func_name(__CLC_GENTYPE x) { \
+ uint2 vec = __clc_as_uint2(x); \
+ uint2 r = {func_name(vec.lo), func_name(vec.hi)}; \
+ return __CLC_AS_GENTYPE(r); \
+ }
+
+SPLIT_FUNC_64(__clc_amdgpu_ds_swizzle_bcastx2_lane0)
+SPLIT_FUNC_64(__clc_amdgpu_ds_swizzle_bcastx4_lane1)
+SPLIT_FUNC_64(__clc_amdgpu_ds_swizzle_bcastx8_lane3)
+SPLIT_FUNC_64(__clc_amdgpu_ds_swizzle_bcastx16_lane7)
+SPLIT_FUNC_64(__clc_amdgpu_ds_swizzle_bcastx32_lane15)
+
+SPLIT_FUNC_64(__clc_amdgpu_ds_swizzle_quad_perm_shift_right1)
+
+#endif
+
+#endif // __CLC_SCALAR
diff --git a/libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.cl b/libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.cl
new file mode 100644
index 0000000000000..3ef735aac2aae
--- /dev/null
+++ b/libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.cl
@@ -0,0 +1,94 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/clc_convert.h"
+#include "clc/math/clc_fmax.h"
+#include "clc/math/clc_fmin.h"
+#include "clc/shared/clc_max.h"
+#include "clc/shared/clc_min.h"
+#include "clc/subgroup/clc_sub_group_broadcast.h"
+#include "clc/subgroup/clc_sub_group_scan.h"
+#include "clc/subgroup/clc_subgroup.h"
+
+#define QUAD_PERM (1 << 15)
+
+// The first basic swizzle mode (when offset[15] == 1) allows full data sharing
+// between a group of 4 consecutive threads.
+#define SWIZZLE_QUAD_PERM(S0, S1, S2, S3) \
+ (uint)(QUAD_PERM | (S3 << 6) | (S2 << 4) | (S1 << 2) | S0)
+
+#define SWIZZLE_PAIRWISE(XOR_MASK, OR_MASK, AND_MASK) \
+ (uint)((XOR_MASK << 10) | (OR_MASK << 5) | AND_MASK)
+
+#define SWIZZLE_BCASTX2_LANE0 SWIZZLE_PAIRWISE(0x00, 0x00, 0x1e)
+#define SWIZZLE_BCASTX4_LANE1 SWIZZLE_PAIRWISE(0x00, 0x01, 0x1c)
+#define SWIZZLE_BCASTX8_LANE3 SWIZZLE_PAIRWISE(0x00, 0x03, 0x18)
+#define SWIZZLE_BCASTX16_LANE7 SWIZZLE_PAIRWISE(0x00, 0x07, 0x10)
+#define SWIZZLE_BCASTX32_LANE15 SWIZZLE_PAIRWISE(0x00, 0x0f, 0x00)
+
+#define __CLC_BODY "clc_amdgpu_ds_swizzle.inc"
+#include "clc/integer/gentype.inc"
+
+#define __CLC_BODY "clc_amdgpu_ds_swizzle.inc"
+#include "clc/math/gentype.inc"
+
+//------------------------------------------------------------------------------
+// Integer and fp add
+//------------------------------------------------------------------------------
+
+#define __CLC_FUNCTION_INCLUSIVE __clc_sub_group_scan_inclusive_add
+#define __CLC_FUNCTION_EXCLUSIVE __clc_sub_group_scan_exclusive_add
+#define __CLC_FUNCTION_IMPL(x, y) ((x) + (y))
+#define __CLC_SUBGROUP_SCAN_ID_VAL (__CLC_GENTYPE)0
+#define __CLC_BODY "clc_sub_group_scan.inc"
+#include "clc/integer/gentype.inc"
+
+#define __CLC_BODY "clc_sub_group_scan.inc"
+#include "clc/math/gentype.inc"
+
+#undef __CLC_FUNCTION_INCLUSIVE
+#undef __CLC_FUNCTION_EXCLUSIVE
+#undef __CLC_FUNCTION_IMPL
+#undef __CLC_SUBGROUP_SCAN_ID_VAL
+
+//------------------------------------------------------------------------------
+// Integer and fp min
+//------------------------------------------------------------------------------
+
+#define __CLC_FUNCTION_INCLUSIVE __clc_sub_group_scan_inclusive_min
+#define __CLC_FUNCTION_EXCLUSIVE __clc_sub_group_scan_exclusive_min
+#define __CLC_FUNCTION_IMPL(x, y) __clc_min(x, y)
+#define __CLC_SUBGROUP_SCAN_ID_VAL __CLC_GEN_MAX
+#define __CLC_BODY "clc_sub_group_scan.inc"
+#include "clc/integer/gentype.inc"
+
+#define __CLC_BODY "clc_sub_group_scan.inc"
+#include "clc/math/gentype.inc"
+#undef __CLC_FUNCTION_IMPL
+#undef __CLC_FUNCTION_INCLUSIVE
+#undef __CLC_FUNCTION_EXCLUSIVE
+#undef __CLC_SUBGROUP_SCAN_ID_VAL
+
+//------------------------------------------------------------------------------
+// Integer and fp max
+//------------------------------------------------------------------------------
+
+#define __CLC_FUNCTION_INCLUSIVE __clc_sub_group_scan_inclusive_max
+#define __CLC_FUNCTION_EXCLUSIVE __clc_sub_group_scan_exclusive_max
+#define __CLC_FUNCTION_IMPL(x, y) __clc_max(x, y)
+#define __CLC_SUBGROUP_SCAN_ID_VAL __CLC_GEN_MIN
+
+#define __CLC_BODY "clc_sub_group_scan.inc"
+#include "clc/integer/gentype.inc"
+
+#define __CLC_BODY "clc_sub_group_scan.inc"
+#include "clc/math/gentype.inc"
+#undef __CLC_FUNCTION_IMPL
+#undef __CLC_FUNCTION_INCLUSIVE
+#undef __CLC_FUNCTION_EXCLUSIVE
+#undef __CLC_SUBGROUP_SCAN_ID_VAL
diff --git a/libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.inc b/libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.inc
new file mode 100644
index 0000000000000..af0b3a30a0bfd
--- /dev/null
+++ b/libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.inc
@@ -0,0 +1,83 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if defined(__CLC_SCALAR)
+
+#if defined(__CLC_GEN_S)
+#define __CLC_GEN_MAX (__CLC_GENTYPE)((1LL << (__CLC_GENSIZE - 1)) - 1LL)
+#define __CLC_GEN_MIN (__CLC_GENTYPE)(-(1LL << (__CLC_GENSIZE - 1)))
+#elif defined(__CLC_GEN_U)
+#define __CLC_GEN_MAX (__CLC_GENTYPE)((1ull << __CLC_GENSIZE) - 1ull)
+#define __CLC_GEN_MIN (__CLC_GENTYPE)0
+#elif defined(__CLC_FPSIZE)
+#define __CLC_GEN_MIN -INFINITY
+#define __CLC_GEN_MAX INFINITY
+#endif
+
+_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __CLC_FUNCTION_INCLUSIVE(__CLC_GENTYPE x) {
+ uint l = __clc_get_sub_group_local_id();
+
+ __CLC_GENTYPE v = __clc_amdgpu_ds_swizzle_bcastx2_lane0(x);
+ v = (l & 1) ? v : __CLC_SUBGROUP_SCAN_ID_VAL;
+ __CLC_GENTYPE s = __CLC_FUNCTION_IMPL(x, v);
+
+ v = __clc_amdgpu_ds_swizzle_bcastx4_lane1(s);
+ v = (l & 2) ? v : __CLC_SUBGROUP_SCAN_ID_VAL;
+ s = __CLC_FUNCTION_IMPL(s, v);
+
+ v = __clc_amdgpu_ds_swizzle_bcastx8_lane3(s);
+ v = (l & 4) ? v : __CLC_SUBGROUP_SCAN_ID_VAL;
+ s = __CLC_FUNCTION_IMPL(s, v);
+
+ v = __clc_amdgpu_ds_swizzle_bcastx16_lane7(s);
+ v = (l & 8) ? v : __CLC_SUBGROUP_SCAN_ID_VAL;
+ s = __CLC_FUNCTION_IMPL(s, v);
+
+ v = __clc_amdgpu_ds_swizzle_bcastx32_lane15(s);
+ v = (l & 16) ? v : __CLC_SUBGROUP_SCAN_ID_VAL;
+ s = __CLC_FUNCTION_IMPL(s, v);
+
+ if (__builtin_amdgcn_wavefrontsize() == 64) {
+ v = __clc_sub_group_broadcast(s, 31);
+ v = l > 31 ? v : __CLC_SUBGROUP_SCAN_ID_VAL;
+ s = __CLC_FUNCTION_IMPL(s, v);
+ }
+
+ return s;
+}
+
+_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __CLC_FUNCTION_EXCLUSIVE(__CLC_GENTYPE x) {
+ __CLC_GENTYPE s = __CLC_FUNCTION_INCLUSIVE(x);
+ __CLC_GENTYPE t = s;
+
+ s = __clc_amdgpu_ds_swizzle_quad_perm_shift_right1(t);
+
+ __CLC_GENTYPE v = __clc_amdgpu_ds_swizzle_bcastx8_lane3(t);
+
+ uint l = __clc_get_sub_group_local_id();
+
+ s = ((l & 0x7) == 0x4) ? v : s;
+
+ v = __clc_amdgpu_ds_swizzle_bcastx16_lane7(t);
+ s = ((l & 0xf) == 0x8) ? v : s;
+
+ v = __clc_amdgpu_ds_swizzle_bcastx32_lane15(t);
+ s = ((l & 0x1f) == 0x10) ? v : s;
+
+ if (__builtin_amdgcn_wavefrontsize() == 64) {
+ v = __clc_sub_group_broadcast(t, 31);
+ s = (l == 32) ? v : s;
+ }
+
+ return (l == 0) ? __CLC_SUBGROUP_SCAN_ID_VAL : s;
+}
+
+#undef __CLC_GEN_MIN
+#undef __CLC_GEN_MAX
+
+#endif // __CLC_SCALAR
diff --git a/libclc/opencl/lib/generic/CMakeLists.txt b/libclc/opencl/lib/generic/CMakeLists.txt
index f30af80e9c65e..1b8beb57e34de 100644
--- a/libclc/opencl/lib/generic/CMakeLists.txt
+++ b/libclc/opencl/lib/generic/CMakeLists.txt
@@ -207,6 +207,8 @@ libclc_configure_source_list(OPENCL_GENERIC_SOURCES
shared/vstore.cl
subgroup/sub_group_broadcast.cl
subgroup/sub_group_reduce.cl
+ subgroup/sub_group_scan_exclusive.cl
+ subgroup/sub_group_scan_inclusive.cl
subgroup/subgroup.cl
synchronization/sub_group_barrier.cl
synchronization/work_group_barrier.cl
diff --git a/libclc/opencl/lib/generic/subgroup/sub_group_scan_exclusive.cl b/libclc/opencl/lib/generic/subgroup/sub_group_scan_exclusive.cl
new file mode 100644
index 0000000000000..8825a838f3392
--- /dev/null
+++ b/libclc/opencl/lib/generic/subgroup/sub_group_scan_exclusive.cl
@@ -0,0 +1,15 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/subgroup/clc_sub_group_scan.h"
+
+#define __CLC_BODY "sub_group_scan_exclusive.inc"
+#include "clc/integer/gentype.inc"
+
+#define __CLC_BODY "sub_group_scan_exclusive.inc"
+#include "clc/math/gentype.inc"
diff --git a/libclc/opencl/lib/generic/subgroup/sub_group_scan_exclusive.inc b/libclc/opencl/lib/generic/subgroup/sub_group_scan_exclusive.inc
new file mode 100644
index 0000000000000..243637d8d824f
--- /dev/null
+++ b/libclc/opencl/lib/generic/subgroup/sub_group_scan_exclusive.inc
@@ -0,0 +1,28 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if defined(__CLC_SCALAR) && \
+ ((defined(__CLC_FPSIZE) || __CLC_GENSIZE == 32 || __CLC_GENSIZE == 64) || \
+ defined(cl_khr_subgroup_extended_types))
+
+_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE
+sub_group_scan_exclusive_add(__CLC_GENTYPE x) {
+ return __clc_sub_group_scan_exclusive_add(x);
+}
+
+_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE
+sub_group_scan_exclusive_min(__CLC_GENTYPE x) {
+ return __clc_sub_group_scan_exclusive_min(x);
+}
+
+_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE
+sub_group_scan_exclusive_max(__CLC_GENTYPE x) {
+ return __clc_sub_group_scan_exclusive_max(x);
+}
+
+#endif
diff --git a/libclc/opencl/lib/generic/subgroup/sub_group_scan_inclusive.cl b/libclc/opencl/lib/generic/subgroup/sub_group_scan_inclusive.cl
new file mode 100644
index 0000000000000..15e8120443c61
--- /dev/null
+++ b/libclc/opencl/lib/generic/subgroup/sub_group_scan_inclusive.cl
@@ -0,0 +1,15 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/subgroup/clc_sub_group_scan.h"
+
+#define __CLC_BODY "sub_group_scan_inclusive.inc"
+#include "clc/integer/gentype.inc"
+
+#define __CLC_BODY "sub_group_scan_inclusive.inc"
+#include "clc/math/gentype.inc"
diff --git a/libclc/opencl/lib/generic/subgroup/sub_group_scan_inclusive.inc b/libclc/opencl/lib/generic/subgroup/sub_group_scan_inclusive.inc
new file mode 100644
index 0000000000000..7f0ad6d01b153
--- /dev/null
+++ b/libclc/opencl/lib/generic/subgroup/sub_group_scan_inclusive.inc
@@ -0,0 +1,28 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if defined(__CLC_SCALAR) && \
+ ((defined(__CLC_FPSIZE) || __CLC_GENSIZE == 32 || __CLC_GENSIZE == 64) || \
+ defined(cl_khr_subgroup_extended_types))
+
+_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE
+sub_group_scan_inclusive_add(__CLC_GENTYPE x) {
+ return __clc_sub_group_scan_inclusive_add(x);
+}
+
+_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE
+sub_group_scan_inclusive_min(__CLC_GENTYPE x) {
+ return __clc_sub_group_scan_inclusive_min(x);
+}
+
+_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE
+sub_group_scan_inclusive_max(__CLC_GENTYPE x) {
+ return __clc_sub_group_scan_inclusive_max(x);
+}
+
+#endif
More information about the cfe-commits
mailing list