[libclc] 1a32a41 - libclc: Add subgroup scan functions (#188828)

via cfe-commits cfe-commits at lists.llvm.org
Fri Mar 27 01:37:31 PDT 2026


Author: Matt Arsenault
Date: 2026-03-27T09:37:27+01:00
New Revision: 1a32a4185b84bba30474305cd915d1fbcfa1a352

URL: https://github.com/llvm/llvm-project/commit/1a32a4185b84bba30474305cd915d1fbcfa1a352
DIFF: https://github.com/llvm/llvm-project/commit/1a32a4185b84bba30474305cd915d1fbcfa1a352.diff

LOG: libclc: Add subgroup scan functions (#188828)

Add the base implementation using ds_swizzle which should work
on all subtargets. There are at least 2 more paths available for
newer targets.

Added: 
    libclc/clc/include/clc/subgroup/clc_sub_group_scan.h
    libclc/clc/include/clc/subgroup/clc_sub_group_scan.inc
    libclc/clc/lib/amdgpu/subgroup/clc_amdgpu_ds_swizzle.inc
    libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.cl
    libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.inc
    libclc/opencl/lib/generic/subgroup/sub_group_scan_exclusive.cl
    libclc/opencl/lib/generic/subgroup/sub_group_scan_exclusive.inc
    libclc/opencl/lib/generic/subgroup/sub_group_scan_inclusive.cl
    libclc/opencl/lib/generic/subgroup/sub_group_scan_inclusive.inc

Modified: 
    libclc/clc/include/clc/subgroup/clc_sub_group_broadcast.h
    libclc/clc/lib/amdgpu/CMakeLists.txt
    libclc/opencl/lib/generic/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/libclc/clc/include/clc/subgroup/clc_sub_group_broadcast.h b/libclc/clc/include/clc/subgroup/clc_sub_group_broadcast.h
index d61f57860fe5b..e9fb566c54ef8 100644
--- a/libclc/clc/include/clc/subgroup/clc_sub_group_broadcast.h
+++ b/libclc/clc/include/clc/subgroup/clc_sub_group_broadcast.h
@@ -19,4 +19,6 @@
 #define __CLC_BODY "clc/subgroup/clc_subgroup_broadcast.inc"
 #include "clc/math/gentype.inc"
 
+#undef __CLC_FUNCTION
+
 #endif // __CLC_SUBGROUP_CLC_SUB_GROUP_BROADCAST_H__

diff  --git a/libclc/clc/include/clc/subgroup/clc_sub_group_scan.h b/libclc/clc/include/clc/subgroup/clc_sub_group_scan.h
new file mode 100644
index 0000000000000..a849ffa792758
--- /dev/null
+++ b/libclc/clc/include/clc/subgroup/clc_sub_group_scan.h
@@ -0,0 +1,20 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_SUBGROUP_CLC_SUB_GROUP_SCAN_H__
+#define __CLC_SUBGROUP_CLC_SUB_GROUP_SCAN_H__
+
+#include "clc/internal/clc.h"
+
+#define __CLC_BODY "clc/subgroup/clc_sub_group_scan.inc"
+#include "clc/integer/gentype.inc"
+
+#define __CLC_BODY "clc/subgroup/clc_sub_group_scan.inc"
+#include "clc/math/gentype.inc"
+
+#endif // __CLC_SUBGROUP_CLC_SUB_GROUP_SCAN_H__

diff  --git a/libclc/clc/include/clc/subgroup/clc_sub_group_scan.inc b/libclc/clc/include/clc/subgroup/clc_sub_group_scan.inc
new file mode 100644
index 0000000000000..924da27782f32
--- /dev/null
+++ b/libclc/clc/include/clc/subgroup/clc_sub_group_scan.inc
@@ -0,0 +1,27 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifdef __CLC_SCALAR
+_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE
+__clc_sub_group_scan_inclusive_add(__CLC_GENTYPE x);
+
+_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE
+__clc_sub_group_scan_inclusive_min(__CLC_GENTYPE x);
+
+_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE
+__clc_sub_group_scan_inclusive_max(__CLC_GENTYPE x);
+
+_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE
+__clc_sub_group_scan_exclusive_add(__CLC_GENTYPE x);
+
+_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE
+__clc_sub_group_scan_exclusive_min(__CLC_GENTYPE x);
+
+_CLC_DECL _CLC_OVERLOAD __CLC_GENTYPE
+__clc_sub_group_scan_exclusive_max(__CLC_GENTYPE x);
+#endif

diff  --git a/libclc/clc/lib/amdgpu/CMakeLists.txt b/libclc/clc/lib/amdgpu/CMakeLists.txt
index ea79b2294d991..186b6ab0b85b1 100644
--- a/libclc/clc/lib/amdgpu/CMakeLists.txt
+++ b/libclc/clc/lib/amdgpu/CMakeLists.txt
@@ -30,6 +30,7 @@ libclc_configure_source_list(CLC_AMDGPU_SOURCES
   subgroup/clc_subgroup.cl
   subgroup/clc_sub_group_broadcast.cl
   subgroup/clc_sub_group_reduce.cl
+  subgroup/clc_sub_group_scan.cl
   synchronization/clc_sub_group_barrier.cl
   synchronization/clc_work_group_barrier.cl
   workitem/clc_get_enqueued_local_size.cl

diff  --git a/libclc/clc/lib/amdgpu/subgroup/clc_amdgpu_ds_swizzle.inc b/libclc/clc/lib/amdgpu/subgroup/clc_amdgpu_ds_swizzle.inc
new file mode 100644
index 0000000000000..2455b0698aae1
--- /dev/null
+++ b/libclc/clc/lib/amdgpu/subgroup/clc_amdgpu_ds_swizzle.inc
@@ -0,0 +1,60 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if defined(__CLC_SCALAR)
+
+#if (defined(__CLC_GENSIZE) && __CLC_GENSIZE <= 32) ||                         \
+    defined(__CLC_FPSIZE) && __CLC_FPSIZE <= 32
+
+#define COERCE_FUNC(func, mask)                                                \
+  static _CLC_OVERLOAD __CLC_GENTYPE func(__CLC_GENTYPE x) {                   \
+    __CLC_U_GENTYPE bitcast = __CLC_AS_U_GENTYPE(x);                           \
+    uint ext = __clc_convert_uint(bitcast);                                    \
+    uint swizzle = __builtin_amdgcn_ds_swizzle(ext, mask);                     \
+    __CLC_U_GENTYPE trunc = __CLC_CONVERT_U_GENTYPE(swizzle);                  \
+    return __CLC_AS_GENTYPE(trunc);                                            \
+  }
+
+//------------------------------------------------------------------------------
+// Swizzle masks used in inclusive scan
+//------------------------------------------------------------------------------
+
+COERCE_FUNC(__clc_amdgpu_ds_swizzle_bcastx2_lane0, SWIZZLE_BCASTX2_LANE0);
+COERCE_FUNC(__clc_amdgpu_ds_swizzle_bcastx4_lane1, SWIZZLE_BCASTX4_LANE1);
+COERCE_FUNC(__clc_amdgpu_ds_swizzle_bcastx8_lane3, SWIZZLE_BCASTX8_LANE3);
+COERCE_FUNC(__clc_amdgpu_ds_swizzle_bcastx16_lane7, SWIZZLE_BCASTX16_LANE7);
+COERCE_FUNC(__clc_amdgpu_ds_swizzle_bcastx32_lane15, SWIZZLE_BCASTX32_LANE15);
+
+//------------------------------------------------------------------------------
+// Swizzle masks used in exclusive scan adjustment
+//------------------------------------------------------------------------------
+
+COERCE_FUNC(__clc_amdgpu_ds_swizzle_quad_perm_shift_right1,
+            SWIZZLE_QUAD_PERM(0, 0, 1, 2));
+
+#elif defined(__CLC_GENSIZE) && __CLC_GENSIZE == 64 ||                         \
+    defined(__CLC_FPSIZE) && __CLC_FPSIZE == 64
+
+#define SPLIT_FUNC_64(func_name)                                               \
+  static _CLC_OVERLOAD __CLC_GENTYPE func_name(__CLC_GENTYPE x) {              \
+    uint2 vec = __clc_as_uint2(x);                                             \
+    uint2 r = {func_name(vec.lo), func_name(vec.hi)};                          \
+    return __CLC_AS_GENTYPE(r);                                                \
+  }
+
+SPLIT_FUNC_64(__clc_amdgpu_ds_swizzle_bcastx2_lane0)
+SPLIT_FUNC_64(__clc_amdgpu_ds_swizzle_bcastx4_lane1)
+SPLIT_FUNC_64(__clc_amdgpu_ds_swizzle_bcastx8_lane3)
+SPLIT_FUNC_64(__clc_amdgpu_ds_swizzle_bcastx16_lane7)
+SPLIT_FUNC_64(__clc_amdgpu_ds_swizzle_bcastx32_lane15)
+
+SPLIT_FUNC_64(__clc_amdgpu_ds_swizzle_quad_perm_shift_right1)
+
+#endif
+
+#endif // __CLC_SCALAR

diff  --git a/libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.cl b/libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.cl
new file mode 100644
index 0000000000000..3ef735aac2aae
--- /dev/null
+++ b/libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.cl
@@ -0,0 +1,94 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/clc_convert.h"
+#include "clc/math/clc_fmax.h"
+#include "clc/math/clc_fmin.h"
+#include "clc/shared/clc_max.h"
+#include "clc/shared/clc_min.h"
+#include "clc/subgroup/clc_sub_group_broadcast.h"
+#include "clc/subgroup/clc_sub_group_scan.h"
+#include "clc/subgroup/clc_subgroup.h"
+
+#define QUAD_PERM (1 << 15)
+
+// The first basic swizzle mode (when offset[15] == 1) allows full data sharing
+// between a group of 4 consecutive threads.
+#define SWIZZLE_QUAD_PERM(S0, S1, S2, S3)                                      \
+  (uint)(QUAD_PERM | (S3 << 6) | (S2 << 4) | (S1 << 2) | S0)
+
+#define SWIZZLE_PAIRWISE(XOR_MASK, OR_MASK, AND_MASK)                          \
+  (uint)((XOR_MASK << 10) | (OR_MASK << 5) | AND_MASK)
+
+#define SWIZZLE_BCASTX2_LANE0 SWIZZLE_PAIRWISE(0x00, 0x00, 0x1e)
+#define SWIZZLE_BCASTX4_LANE1 SWIZZLE_PAIRWISE(0x00, 0x01, 0x1c)
+#define SWIZZLE_BCASTX8_LANE3 SWIZZLE_PAIRWISE(0x00, 0x03, 0x18)
+#define SWIZZLE_BCASTX16_LANE7 SWIZZLE_PAIRWISE(0x00, 0x07, 0x10)
+#define SWIZZLE_BCASTX32_LANE15 SWIZZLE_PAIRWISE(0x00, 0x0f, 0x00)
+
+#define __CLC_BODY "clc_amdgpu_ds_swizzle.inc"
+#include "clc/integer/gentype.inc"
+
+#define __CLC_BODY "clc_amdgpu_ds_swizzle.inc"
+#include "clc/math/gentype.inc"
+
+//------------------------------------------------------------------------------
+//  Integer and fp add
+//------------------------------------------------------------------------------
+
+#define __CLC_FUNCTION_INCLUSIVE __clc_sub_group_scan_inclusive_add
+#define __CLC_FUNCTION_EXCLUSIVE __clc_sub_group_scan_exclusive_add
+#define __CLC_FUNCTION_IMPL(x, y) ((x) + (y))
+#define __CLC_SUBGROUP_SCAN_ID_VAL (__CLC_GENTYPE)0
+#define __CLC_BODY "clc_sub_group_scan.inc"
+#include "clc/integer/gentype.inc"
+
+#define __CLC_BODY "clc_sub_group_scan.inc"
+#include "clc/math/gentype.inc"
+
+#undef __CLC_FUNCTION_INCLUSIVE
+#undef __CLC_FUNCTION_EXCLUSIVE
+#undef __CLC_FUNCTION_IMPL
+#undef __CLC_SUBGROUP_SCAN_ID_VAL
+
+//------------------------------------------------------------------------------
+//  Integer and fp min
+//------------------------------------------------------------------------------
+
+#define __CLC_FUNCTION_INCLUSIVE __clc_sub_group_scan_inclusive_min
+#define __CLC_FUNCTION_EXCLUSIVE __clc_sub_group_scan_exclusive_min
+#define __CLC_FUNCTION_IMPL(x, y) __clc_min(x, y)
+#define __CLC_SUBGROUP_SCAN_ID_VAL __CLC_GEN_MAX
+#define __CLC_BODY "clc_sub_group_scan.inc"
+#include "clc/integer/gentype.inc"
+
+#define __CLC_BODY "clc_sub_group_scan.inc"
+#include "clc/math/gentype.inc"
+#undef __CLC_FUNCTION_IMPL
+#undef __CLC_FUNCTION_INCLUSIVE
+#undef __CLC_FUNCTION_EXCLUSIVE
+#undef __CLC_SUBGROUP_SCAN_ID_VAL
+
+//------------------------------------------------------------------------------
+//  Integer and fp max
+//------------------------------------------------------------------------------
+
+#define __CLC_FUNCTION_INCLUSIVE __clc_sub_group_scan_inclusive_max
+#define __CLC_FUNCTION_EXCLUSIVE __clc_sub_group_scan_exclusive_max
+#define __CLC_FUNCTION_IMPL(x, y) __clc_max(x, y)
+#define __CLC_SUBGROUP_SCAN_ID_VAL __CLC_GEN_MIN
+
+#define __CLC_BODY "clc_sub_group_scan.inc"
+#include "clc/integer/gentype.inc"
+
+#define __CLC_BODY "clc_sub_group_scan.inc"
+#include "clc/math/gentype.inc"
+#undef __CLC_FUNCTION_IMPL
+#undef __CLC_FUNCTION_INCLUSIVE
+#undef __CLC_FUNCTION_EXCLUSIVE
+#undef __CLC_SUBGROUP_SCAN_ID_VAL

diff  --git a/libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.inc b/libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.inc
new file mode 100644
index 0000000000000..af0b3a30a0bfd
--- /dev/null
+++ b/libclc/clc/lib/amdgpu/subgroup/clc_sub_group_scan.inc
@@ -0,0 +1,83 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if defined(__CLC_SCALAR)
+
+#if defined(__CLC_GEN_S)
+#define __CLC_GEN_MAX (__CLC_GENTYPE)((1LL << (__CLC_GENSIZE - 1)) - 1LL)
+#define __CLC_GEN_MIN (__CLC_GENTYPE)(-(1LL << (__CLC_GENSIZE - 1)))
+#elif defined(__CLC_GEN_U)
+#define __CLC_GEN_MAX (__CLC_GENTYPE)((1ull << __CLC_GENSIZE) - 1ull)
+#define __CLC_GEN_MIN (__CLC_GENTYPE)0
+#elif defined(__CLC_FPSIZE)
+#define __CLC_GEN_MIN -INFINITY
+#define __CLC_GEN_MAX INFINITY
+#endif
+
+_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __CLC_FUNCTION_INCLUSIVE(__CLC_GENTYPE x) {
+  uint l = __clc_get_sub_group_local_id();
+
+  __CLC_GENTYPE v = __clc_amdgpu_ds_swizzle_bcastx2_lane0(x);
+  v = (l & 1) ? v : __CLC_SUBGROUP_SCAN_ID_VAL;
+  __CLC_GENTYPE s = __CLC_FUNCTION_IMPL(x, v);
+
+  v = __clc_amdgpu_ds_swizzle_bcastx4_lane1(s);
+  v = (l & 2) ? v : __CLC_SUBGROUP_SCAN_ID_VAL;
+  s = __CLC_FUNCTION_IMPL(s, v);
+
+  v = __clc_amdgpu_ds_swizzle_bcastx8_lane3(s);
+  v = (l & 4) ? v : __CLC_SUBGROUP_SCAN_ID_VAL;
+  s = __CLC_FUNCTION_IMPL(s, v);
+
+  v = __clc_amdgpu_ds_swizzle_bcastx16_lane7(s);
+  v = (l & 8) ? v : __CLC_SUBGROUP_SCAN_ID_VAL;
+  s = __CLC_FUNCTION_IMPL(s, v);
+
+  v = __clc_amdgpu_ds_swizzle_bcastx32_lane15(s);
+  v = (l & 16) ? v : __CLC_SUBGROUP_SCAN_ID_VAL;
+  s = __CLC_FUNCTION_IMPL(s, v);
+
+  if (__builtin_amdgcn_wavefrontsize() == 64) {
+    v = __clc_sub_group_broadcast(s, 31);
+    v = l > 31 ? v : __CLC_SUBGROUP_SCAN_ID_VAL;
+    s = __CLC_FUNCTION_IMPL(s, v);
+  }
+
+  return s;
+}
+
+_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE __CLC_FUNCTION_EXCLUSIVE(__CLC_GENTYPE x) {
+  __CLC_GENTYPE s = __CLC_FUNCTION_INCLUSIVE(x);
+  __CLC_GENTYPE t = s;
+
+  s = __clc_amdgpu_ds_swizzle_quad_perm_shift_right1(t);
+
+  __CLC_GENTYPE v = __clc_amdgpu_ds_swizzle_bcastx8_lane3(t);
+
+  uint l = __clc_get_sub_group_local_id();
+
+  s = ((l & 0x7) == 0x4) ? v : s;
+
+  v = __clc_amdgpu_ds_swizzle_bcastx16_lane7(t);
+  s = ((l & 0xf) == 0x8) ? v : s;
+
+  v = __clc_amdgpu_ds_swizzle_bcastx32_lane15(t);
+  s = ((l & 0x1f) == 0x10) ? v : s;
+
+  if (__builtin_amdgcn_wavefrontsize() == 64) {
+    v = __clc_sub_group_broadcast(t, 31);
+    s = (l == 32) ? v : s;
+  }
+
+  return (l == 0) ? __CLC_SUBGROUP_SCAN_ID_VAL : s;
+}
+
+#undef __CLC_GEN_MIN
+#undef __CLC_GEN_MAX
+
+#endif // __CLC_SCALAR

diff  --git a/libclc/opencl/lib/generic/CMakeLists.txt b/libclc/opencl/lib/generic/CMakeLists.txt
index f30af80e9c65e..1b8beb57e34de 100644
--- a/libclc/opencl/lib/generic/CMakeLists.txt
+++ b/libclc/opencl/lib/generic/CMakeLists.txt
@@ -207,6 +207,8 @@ libclc_configure_source_list(OPENCL_GENERIC_SOURCES
   shared/vstore.cl
   subgroup/sub_group_broadcast.cl
   subgroup/sub_group_reduce.cl
+  subgroup/sub_group_scan_exclusive.cl
+  subgroup/sub_group_scan_inclusive.cl
   subgroup/subgroup.cl
   synchronization/sub_group_barrier.cl
   synchronization/work_group_barrier.cl

diff  --git a/libclc/opencl/lib/generic/subgroup/sub_group_scan_exclusive.cl b/libclc/opencl/lib/generic/subgroup/sub_group_scan_exclusive.cl
new file mode 100644
index 0000000000000..8825a838f3392
--- /dev/null
+++ b/libclc/opencl/lib/generic/subgroup/sub_group_scan_exclusive.cl
@@ -0,0 +1,15 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/subgroup/clc_sub_group_scan.h"
+
+#define __CLC_BODY "sub_group_scan_exclusive.inc"
+#include "clc/integer/gentype.inc"
+
+#define __CLC_BODY "sub_group_scan_exclusive.inc"
+#include "clc/math/gentype.inc"

diff  --git a/libclc/opencl/lib/generic/subgroup/sub_group_scan_exclusive.inc b/libclc/opencl/lib/generic/subgroup/sub_group_scan_exclusive.inc
new file mode 100644
index 0000000000000..243637d8d824f
--- /dev/null
+++ b/libclc/opencl/lib/generic/subgroup/sub_group_scan_exclusive.inc
@@ -0,0 +1,28 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if defined(__CLC_SCALAR) &&                                                   \
+    ((defined(__CLC_FPSIZE) || __CLC_GENSIZE == 32 || __CLC_GENSIZE == 64) ||  \
+     defined(cl_khr_subgroup_extended_types))
+
+_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE
+sub_group_scan_exclusive_add(__CLC_GENTYPE x) {
+  return __clc_sub_group_scan_exclusive_add(x);
+}
+
+_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE
+sub_group_scan_exclusive_min(__CLC_GENTYPE x) {
+  return __clc_sub_group_scan_exclusive_min(x);
+}
+
+_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE
+sub_group_scan_exclusive_max(__CLC_GENTYPE x) {
+  return __clc_sub_group_scan_exclusive_max(x);
+}
+
+#endif

diff  --git a/libclc/opencl/lib/generic/subgroup/sub_group_scan_inclusive.cl b/libclc/opencl/lib/generic/subgroup/sub_group_scan_inclusive.cl
new file mode 100644
index 0000000000000..15e8120443c61
--- /dev/null
+++ b/libclc/opencl/lib/generic/subgroup/sub_group_scan_inclusive.cl
@@ -0,0 +1,15 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clc/subgroup/clc_sub_group_scan.h"
+
+#define __CLC_BODY "sub_group_scan_inclusive.inc"
+#include "clc/integer/gentype.inc"
+
+#define __CLC_BODY "sub_group_scan_inclusive.inc"
+#include "clc/math/gentype.inc"

diff  --git a/libclc/opencl/lib/generic/subgroup/sub_group_scan_inclusive.inc b/libclc/opencl/lib/generic/subgroup/sub_group_scan_inclusive.inc
new file mode 100644
index 0000000000000..7f0ad6d01b153
--- /dev/null
+++ b/libclc/opencl/lib/generic/subgroup/sub_group_scan_inclusive.inc
@@ -0,0 +1,28 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if defined(__CLC_SCALAR) &&                                                   \
+    ((defined(__CLC_FPSIZE) || __CLC_GENSIZE == 32 || __CLC_GENSIZE == 64) ||  \
+     defined(cl_khr_subgroup_extended_types))
+
+_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE
+sub_group_scan_inclusive_add(__CLC_GENTYPE x) {
+  return __clc_sub_group_scan_inclusive_add(x);
+}
+
+_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE
+sub_group_scan_inclusive_min(__CLC_GENTYPE x) {
+  return __clc_sub_group_scan_inclusive_min(x);
+}
+
+_CLC_DEF _CLC_OVERLOAD __CLC_GENTYPE
+sub_group_scan_inclusive_max(__CLC_GENTYPE x) {
+  return __clc_sub_group_scan_inclusive_max(x);
+}
+
+#endif


        


More information about the cfe-commits mailing list